
    Zi                    (
   d dl mZ d dlZd dlZd dlZd dlZd dlmZ d dlm	Z	 d dl
Z
d dlZd dlmZmZmZ d dlmZmZmZ d dlmZ d dlmZmZmZmZ d d	lmZ d d
lmZmZmZ 	 d dl m!Z! n# e"$ r dZ!Y nw xY wdZ#dZ$dZ%dZ&dZ'dZ(dZ)dZ*dZ+dZ,i Z-d  e.e          D             Z/ G d de          Z0 G d de          Z1 G d de          Z2 G d de          Z3ej        j4         e
j5        d          ej        j6         e
j5        d           ej        j7         e
j5        d!          ej        j8         e
j5        d"          ej        j9        eej        j:        eej        j;        eiZ<ej        j6         e
j=        d e
j>        #           e
j=        d$e
j>        #          fej        j4         e
j=        d%e
j?        #           e
j=        d&e
j?        #          fej        j8         e
j=        d e
j@        #           e
j=        d'e
j@        #          fej        j7         e
j=        d(e
jA        #           e
j=        d)e
jA        #          fej        j;         e
j=        d e#           e
j=        d*e#          fej        j:         e
j=        d+e#           e
j=        d,e#          fiZBej        j4         e
j=        d-e
j?        #           e
j=        d&e
j?        #          fej        j7         e
j=        d.e
jA        #           e
j=        d)e
jA        #          fiZCej        j6         e
j=        d e
j>        #           e
j=        d&e
j>        #          fej        j4         e
j=        d/e
j?        #           e
j=        d0e
j?        #          fej        j8         e
j=        d e
j@        #           e
j=        d)e
j@        #          fej        j7         e
j=        d1e
jA        #           e
j=        d2e
jA        #          fej        j;         e
j=        d e#           e
j=        d,e#          fej        j:         e
j=        d3e#           e
j=        d4e#          fiZDd5d6d7ZEdd8ZFdd:ZGd; ZH	 	 	 	 dddIZI	 dddKZJ	 	 dddTZKddUZLddVZMddZZNdd^ZO G d_ d`          ZP G da db          ZQ G dc dd          ZRde ZSdf ZTdg ZUdh ZVddmZWdn ZXddpZYddrZZddtZ[ddvZ\ddyZ]ddzZ^dd{Z_dd|Z`dd}ZaddZbddZcddZdddZeddZfddZgddZhddZiddZjddZkdS )    )annotationsN)Enum)Path)float8_e4m3fnint4uint4)
ModelProtoTensorProtoexternal_data_helper)onnx_pb)
make_graph
make_model	make_nodemake_tensor_value_info)ReferenceEvaluator)GraphOptimizationLevelInferenceSessionSessionOptions)to_array_extendedzonnx.quantizez0.1.0ai.onnxzcom.microsoftQuantizeLinear_QuantizeLinear_InputDequantizeLinear_DequantizeLinear_Output
_quantizedl        c                    i | ]@}t          t          t          |          t                    *t          t          |          |AS  )
isinstancegetattrr
   int).0ks     a/root/projects/butler/venv/lib64/python3.11/site-packages/onnxruntime/quantization/quant_utils.py
<dictcomp>r$   -   sA    qqqq
SZ[fhiSjSjloHpHpqQ''qqq    c                  2    e Zd ZdZdZd Zed             ZdS )QuantizationModer      c                    | j         S Nnameselfs    r#   __str__zQuantizationMode.__str__8   
    yr%   c                V    	 t           |          S # t          $ r t                      w xY wr*   )r'   KeyError
ValueError)modes    r#   from_stringzQuantizationMode.from_string;   s7    	#D)) 	 	 	,,	    (N)__name__
__module____qualname__
IntegerOps
QLinearOpsr/   staticmethodr5   r   r%   r#   r'   r'   4   sH        JJ     \  r%   r'   c                  2    e Zd ZdZdZd Zed             ZdS )QuantizedValueTyper   r(   c                    | j         S r*   r+   r-   s    r#   r/   zQuantizedValueType.__str__G   r0   r%   c                V    	 t           |          S # t          $ r t                      w xY wr*   )r>   r2   r3   )vs    r#   r5   zQuantizedValueType.from_stringJ   s7    	%a(( 	 	 	,,	r6   N)r7   r8   r9   InputInitializerr/   r<   r5   r   r%   r#   r>   r>   C   sH        EK     \  r%   r>   c                  \    e Zd ZdZdZdZdZdZdZdZ	d Z
ed	             Zed
             ZdS )	QuantTyper   r(                  c                    | j         S r*   r+   r-   s    r#   r/   zQuantType.__str__[   r0   r%   c                V    	 t           |          S # t          $ r t                      w xY wr*   )rE   r2   r3   )ts    r#   r5   zQuantType.from_string^   s6    	Q< 	 	 	,,	r6   c                   | t           j        k    rt          j        S | t           j        k    rt          j        S | t           j        k    rt          j        S | t           j        k    rt          j	        S | t           j
        k    rt          j        S | t           j        k    rt          j        S | t           j        k    rt          j        S t!          d| d          )NzUnexpected value qtype=.)rE   QInt8r
   INT8QUInt8UINT8QUInt16UINT16QInt16INT16QFLOAT8E4M3FNFLOAT8E4M3FNQUInt4UINT4QInt4INT4r3   r-   s    r#   tensor_typezQuantType.tensor_typee   s    9?""##9###$$9$$$%%9###$$9***++9###$$9?""##<4<<<===r%   N)r7   r8   r9   rP   rR   rX   rV   rT   r\   rZ   r/   r<   r5   propertyr^   r   r%   r#   rE   rE   R   s|        EFMFGEF     \ > > X> > >r%   rE   c                  2    e Zd ZdZdZd Zed             ZdS )QuantFormatr   r(   c                    | j         S r*   r+   r-   s    r#   r/   zQuantFormat.__str__|   r0   r%   c                V    	 t           |          S # t          $ r t                      w xY wr*   )ra   r2   r3   )formats    r#   r5   zQuantFormat.from_string   s7    	v&& 	 	 	,,	r6   N)r7   r8   r9   	QOperatorQDQr/   r<   r5   r   r%   r#   ra   ra   x   sH        I
C     \  r%   ra   int8uint8int16uint16dtype   i   i  i i     i   iii@   i i @  rG   zero_point_indexc                T   g }t          |          D ]\  }}t          j        t          |          t          j                  r(|                    t          j        |                     nEt          |t          j                  r|                    |           nt          d| d|           || k    rI|d         }|j
        t          j        k    s|j
        t          j        k    rt          d|j
                   t          |          dk    rt          |          n|d         S )Nzarg z is not an array: rs   zzero_point cannot be r(   r   )	enumeratenumpy
issubdtypetypenumberappendarrayr   ndarray	TypeErrorrl   float32float16lentuple)ru   argsnew_argsiarA   s         r#   _check_typer      s   H$ 
C 
C1DGGU\22 	=OOEKNN++++5=)) 	=OOA;1;;;;<<<   Aw%-''17em+C+C A A ABBB!(mma//5???Xa[@r%   c                   | t           v sJ d|  d            | t          j        j        t          j        j        t          j        j        t          j        j        fv rF|dk    rt          d|d          |j        t          j
        k    rt          j        }n:|j        t          j        k    rt          j        }nt          d|j         d          t          t!          t#          dg dgt$          j                            d| g dg          	          t#          d
g ddg          gdt+          d|d           t+          d|d           gt+          d| d           g                    }t-          |          }t/          |                    d ||d          d                   S t           |          }	t3          | dd          \  }
}|t5          |
|          n|
}|t7          ||          n|}t          j        |                    t          j
                  |z                                  |z             }t          j        ||||           t/          |                    |	                    S )NUnexpected data type > requested. Only INT8, UINT8, INT16, and UINT16 are supported.r   z2zero_point is expected to be null for float 8 not rO   zUnexpected dtype Constant
zero_point)valuer   )Xscaler   Yqur   r   )r   r   F)reduce_range	symmetric)out) ONNX_TYPE_TO_NP_TYPE
onnx_protor
   rY   FLOAT8E4M3FNUZ
FLOAT8E5M2FLOAT8E5M2FNUZNotImplementedErrorrl   rx   r   FLOATr   FLOAT16r3   r   r   r   onnxhelpermake_tensorr   r   r   runget_qmin_qmax_for_qTypemaxminasarrayastyperoundclip)qTypearrr   r   lowhigh	onnx_type
onnx_modelrefrl   qminqmaxcliplowcliphigharr_fp32s                  r#   quantize_nparrayr      sv   ((((eeee )(( +-)-	   ??%&j[e&j&j&jkkk9%%#)IIY%-''#+II====>>>"Bdk>U>UVbdikmpqor>s>s   .0L0L0LseTT	 *3	4@@*7ItDD (UD99: 
 

  !,,3774sU)C)CDDQGHHH %U+,URWXXX
d$'O#dC...&*&63tT???D=#**U]";";e"C!J!J!L!Lz!YZZ
8WhH====8??511222r%   Fc           	        |dk    s|dk     rt          d| d|           t          j        | t          j        d| j                            } t          j        |t          j        d|j                            }|,t          || t          j        || j                  z             }|r?t          j        t          j        |           t          j        |                    }| } |
 }||k    sJ d|  d|             t          j        || z
  t          j	                  }t          j        |t          j	                  t          j        |t          j	                  z
  }t          j        ||z            }	|	dk    s
J d            |	t          j
        |j                  j        k     r7t          j        d	|j                  }	t          j        d|j                  }
n|rRt          j        t          j        ||z   t          j        d
t          j	                  z            |j                  }
n3t          j        t          j        || |	z  z
            |j                  }
|	                    |j                  }	|
|	gS )a  Calculate the scale s and zero point z for the quantization relation
    r = s(q-z), where r are the original values and q are the corresponding
    quantized values.

    r and z are calculated such that every value within [rmin,rmax] has an
    approximate representation within [qmin,qmax]. In addition, qmin <= z <=
    qmax is enforced. If the symmetric flag is set to True, the interval
    [rmin,rmax] is symmetrized to [-absmax, +absmax], where
    absmax = max(abs(rmin), abs(rmax)).

    :parameter rmin: minimum value of r
    :parameter rmax: maximum value of r
    :parameter qmin: minimum value representable by the target quantization data type
    :parameter qmax: maximum value representable by the target quantization data type
    :parameter symmetric: True if the floating-point range should be made symmetric. Defaults to False.
    :parameter min_real_range: Minimum floating-point range (i.e., rmax - rmin) to enforce. Defaults to None.
    :return: zero and scale [z, s]

    r   Bqmin and qmax must meet requirement: qmin <= 0 <= qmax while qmin:, qmmax:rk   Nzqmin=z > qmax=zscale issue      ?g       @)r3   rx   minimumr}   rl   maximumr   r   absfloat64finfotinyr   r   )rminrmaxr   r   r   min_real_rangeabsmaxdrdqr   r   s              r#   compute_scale_zpr      sQ   ( axx4!88r^brrlprrsss
 =u{1DJ???@@D=u{1DJ???@@D !4nDJ O O OOPP uy	$@@ww4<<<555t55<<<	TD[	6	6	6B	T	/	/	/%+d%-2X2X2X	XBKR  EA:::}:::u{4:&&+++Ctz222[$*555

 
	Y TD[EK5=,Q,Q,QQRRZ^Zd  JJ U[u1D%E%ETZXXXJTZ((r%   c                   d}| t           vrz| t          j        k    rLddlm} |}d t          d          D             }t          j        d |D             t          j                  }nt          d|  d	          |t           | <   n| t          j        k    rddlm} |}|t          d
|  d          t          j        t           |                    }t          j        d|          }t          j        ||z  |j                  }||gS )ar  Calculate the scale s for a float8 type (E4M3FN).
    The function assumes the coefficient distribution and the float 8
    distribution are similar to two gaussian laws.

    :return: zero and scale [z, s]

    More details in notebook `quantization_fp8.ipynb
    <https://github.com/microsoft/onnxruntime/blob/main/docs/python/notebooks/quantization_fp8.ipynb>`_.
    Nr   )r   c                ,    g | ]}t          |          S r   )float)r!   r   s     r#   
<listcomp>z+compute_scale_zp_float8.<locals>.<listcomp><  s    777q%((777r%      c                b    g | ],}t          j        |          t          j        |          *|-S r   )rx   isnanisinf)r!   fs     r#   r   z+compute_scale_zp_float8.<locals>.<listcomp>>  s3    TTTqek!nnTU[QR^^TTTTr%   rk   zQuantization to element_type=z not implemented.zUnexpected element_type rO   )FLOAT8_DISTRIBUTIONSr
   rY   	ml_dtypesr   rangerx   r}   r   r3   r   stdrl   )	element_typer   zp_dtyper   
all_valuesvaluesstd_f8zeror   s	            r#   compute_scale_zp_float8r   ,  s>    H///;333//////$H77E#JJ777J[TTJTTT\a\i  FF \\\\\]]]-3\**	1	1	1++++++ B<BBBCCCY+L9::F;q)))DKfCI666E%=r%   datanumpy.ndarray
quant_typeonnx.TensorProto.DataTyper   boolr   r   float | Nonermin_overridermax_overridereturn#tuple[numpy.ndarray, numpy.ndarray]c                   t          | t          j                  s t          dt	          |            d          ||}n%t          |           r|                                 nd}||}n%t          |           r|                                 nd}t          j        || j	                  }t          j        || j	                  }t          j        d| j	                  }	|t          j        k    rJ|rt          d          t          j        |           }
t          ||
          \  }}	t          ||	d	          S |t          j        t          j        t          j        t          j        t          j        t          j        fv rit-          |||
          \  }}t          |           rt/          ||||||          \  }}	nt          j        d|j	                  }t          ||	d	          S t1          d| d          )a  
    Returns the zero_point and scale for the given data.

    :param data: The data for which to compute quantization parameters.
    :param quant_type: The quantization data type.
    :param symmetric: whether symmetric quantization is used or not.
    :parameter reduce_range: True if the quantization range should be reduced. Defaults to False.
    :parameter min_real_range: Minimum floating-point range (i.e., rmax - rmin) to enforce. Defaults to None.
    :parameter rmin_override: The value of rmin to use if not None. Otherwise, uses min(data).
    :parameter rmax_override: The value of rmax to use if not None. Otherwise, uses max(data).
    :return: zero point and scale
    z%Weight must be given as an array not rO   Ng        rk   r   z1Unsupported option reduce_range=True for float 8.r   rt   r   z Unexpected value for quant_type=)r   rx   r~   r   rz   r   r   r   r}   rl   r
   rY   RuntimeErrorr   r   r   rQ   rS   rW   rU   r]   r[   r   r   r3   )r   r   r   r   r   r   r   r   r   r   r   r   r   r   s                 r#   compute_data_quant_paramsr   P  s   * dEM** OMT

MMMNNN  YY/txxzzzC  YY/txxzzzC;t4:...D;t4:...DK4:...E[--- 	TRSSSioo3JDD
E:uqAAAA   -ZQZ[[[
dt99 	: 0tT4Tb c cJQdj999J:uqAAAA
E
EEE
F
FFr%   2tuple[numpy.ndarray, numpy.ndarray, numpy.ndarray]c                   t          | ||||||          \  }}|t          j        k    rt          || ||          }	t	          |	                    t          j                                                  dz  dk              rxt          j	        |           }
t          d|
                                 d|
                                 d|	                                 d|	                                 d	          |||	fS |t          j        t          j        t          j        t          j        t          j        t          j        fv rt          || ||          }	|||	fS t'          d| d          )al  
    :param data: data to quantize
    :param qType: data type to quantize to.
    :param symmetric: whether symmetric quantization is used or not.
    :parameter reduce_range: True if the quantization range should be reduced. Defaults to False.
    :parameter min_real_range: Minimum floating-point range (i.e., rmax - rmin) to enforce. Defaults to None.
    :parameter rmin_override: The value of rmin to use if not None. Otherwise, uses min(data).
    :parameter rmax_override: The value of rmax to use if not None. Otherwise, uses max(data).
    :return: minimum, maximum, zero point, scale, and quantized weights

    To pack weights, we compute a linear transformation

    - when data `type == uint8` mode, from `[rmin, rmax]` -> :math:`[0, 2^{b-1}]` and
    - when data `type == int8`, from `[-m , m]` -> :math:`[-(2^{b-1}-1), 2^{b-1}-1]` where
        `m = max(abs(rmin), abs(rmax))`

    and add necessary intermediate nodes to transform quantized weight to full weight using the equation

    :math:`r = S(q-z)`, where

    - *r*: real original value
    - *q*: quantized value
    - *S*: scale
    - *z*: zero point
    rn   z+One of the quantized value is NaN data in [z, z], quantized_data in [z].zUnexpected value for qType=rO   )r   r
   rY   r   anyviewrx   rh   ravelr   r   r   r   rQ   rS   rW   rU   r]   r[   r3   )r   r   r   r   r   r   r   r   r   quantized_datanp_datas              r#   quantize_datar     s   8 2 J ((()%ujII##EK0066883>3FGG 	mD))GWgkkmm W Ww{{}} W W&4&8&8&:&:W W>L>P>P>R>RW W W   5.00   *%ujII5.00
;5;;;
<
<<r%   weightonnx.TensorProtor   r   axis
int | Nonequant_weight_name
str | Nonec                   t          |           }d}|%t          ||                                ||          }n|j        |         }t	          |j                  }	d|	|<   g }
t          |          D ]}|                    ||          }||         }||         }t          ||                                ||          }|
                    t          j	        |          
                    |	                     t          j        |
|          }|r|n| j         t           }|t          j        j        k    rAt          j                    }||_        |j                            | j                   ||_        |                                                                                                |_        t0          t1          |          }|j        |j        k    s*|                                |                                k    rrt3          d|j         d|                                dd          d|                                dd          d| j         dt5          |          dd	          d
          n|t          j        j        t          j        j        fv r|j        t<          t>          fvrt3          d| d          tA          tC          |                                                    }t          j"        #                    ||| j        |d          }nmt          j"        $                    |          }t          j	        ||          
                    | j                  }t          j%        &                    ||          }|S )aG  
    Returns a quantized version of the given ONNX initializer.

    :param weight: The ONNX initializer to quantize.
    :param quant_type: The final quantized data type.
    :param zero_point: The zero-point value to use for quantization.
    :param scale: The scale value to use for quantization.
    :param axis: The quantization axis if quantizing per-channel. Defaults to None.
    :param quant_weight_name: The name of the quantized initializer.
                              If not specified, the quantized name is generated.
    :return: The quantized ONNX initializer.
    Nr(   zThe initializer of shape z! could not be created, expecting 
   z, got z and shape=z
raw=   rO   zQuantized weights for z. must be 8-bit before packing as 4-bit values.T)rawrk   )'tensor_proto_to_arrayr   r   shapelistr   taker|   rx   r   reshapeconcatenater,   TENSOR_NAME_QUANT_SUFFIXr   r
   rY   	data_typedimsextendflattencopytobytesraw_datar   r   strr]   r[   rl   r   r   bytespack_bytes_to_4bitr   r   tensor_dtype_to_np_dtypenumpy_helper
from_array)r   r   r   r   r   r   weight_dataq_weight_datachannel_countchannel_dimsquantized_channel_data_listr   channel_datachannel_scalechannel_zero_pointquantized_channel_dataq_weight_nameq_weight_initializercheckpacked_dataquant_np_dtypes                        r#   quantize_onnx_initializerr    s   ( (//K*.M|([5F5F5H5H%Q[\\#)$/K-..T&(#}%% 	l 	lA&++At44L!!HM!+A%5L..00-AS& &" (..u}=S/T/T/\/\]i/j/jkkkk)*EtLL):j%%6;@jPh@j@jMT%222#/11)3&!((555$1!(5(=(=(?(?(D(D(F(F(N(N(P(P%( &&:;;E{k///5==??mF[F[F]F]3]3]"@0A @ @$,,..ss3@ @;@==??3B3;O@ @\b\h@ @ !566tt<@ @ @  
 
(-t/?/EF	F	FtUm33uuuuvvv .}/D/D/F/FGGHH  ${66}jRXR]_jpt6uu==jIIm>JJJRRSYS^__#0;;M=YYr%   c                   | t           j        j        k    rt          d          d}|rt                              |           }n3|r| t          v rt          |          }nt                              |           }|st          d|  d          |\  }}|dk    s|dk     r&t          d| d| d|j	         d	| d
| d|            |S )z
    Return qmin and qmax, the minimum and maximum value representable by the given qType
    :parameter qType: onnx.onnx_pb.TensorProto.UINT8 or onnx.onnx_pb.TensorProto.UINT8
    :return: qmin, qmax
    z;This function is not implemented for float 8 as not needed.Nr   r   r   r   r   z, dtype=z, reduce_range=z, symmetric=z, qType=)
r   r
   rY   r   ONNX_INT_TYPE_REDUCED_RANGEgetONNX_INT_TYPE_SYMMETRIC_RANGEONNX_INT_TYPE_RANGEr3   rl   )r   r   r   qranger   r   s         r#   r   r     s#    
&333!"_```F 0,0077	 0u ===.u5$((// xvvvvwwwJD$axx4!8844 4"&4 404
4 4KW4 4"4 4,14 4
 
 	
 Mr%   c                6    t          | ||          \  }}||z
  S )z
    Helper function to get the quantization range for a type.
        parameter qType: quantization type.
        return: quantization range.
    r   )r   )r   r   r   r   r   s        r#   get_qrange_for_qTyper%  :  s&     )	RRRJD$$;r%   r    ranktuple[bool, int]c                <    | dk     r| |z   n| }|dk    o||k     }||fS )z
    Helper function that tries to return a normalized axis in the range [0, rank - 1].
    :parameter axis: The axis to normalize.
    :parameter rank: The tensor rank (number of dimensions).
    :return (is_valid, axis_norm)
    r   r   )r   r&  	axis_normis_valids       r#   normalize_axisr+  D  s7      $axxtTIA~2)d"2HYr%   src_8bitr
  	bytearrayc                "   t          |           }|dk    rt                      S |dz   dz  }t          |          }d}d}||dz
  k     r3| |dz            dz  dz  | |         dz  z  ||<   |dz  }|dz  }||dz
  k     3||k     r| |         dz  ||<   |S )aB  
    Copies a source array of 8-bit values into a destination bytearray of packed 4-bit values.
    Assumes that the source values are already in the appropriate int4 range.
    :parameter src_8bit: The 8-bit element values to pack.
    :return A bytearray with every two 8-bit src elements packed into a single byte.
    r   r(   rF   ro   rH   )r   r-  )r,  	num_elemsdst_sizedstsrc_idst_is         r#   r  r  P  s     HIA~~{{A!#H
H

CEE )a-

	*S0Q68E?S;PQE


 )a-


 ye_s*E
Jr%   c                       e Zd ZdZg g dfdZdS )QuantizedInitializerzJ
    Represents a linearly quantized weight input from ONNX operators
    Nc
                    || _         || _        || _        || _        || _        || _        || _        || _        |	| _        d S r*   )	r,   initializerrminsrmaxszero_pointsscalesr   r   r   )
r.   r,   r7  r8  r9  r:  r;  r   r   r   s
             r#   __init__zQuantizedInitializer.__init__s  sJ     	&

&	,			r%   r7   r8   r9   __doc__r<  r   r%   r#   r5  r5  n  s=               r%   r5  c                  "    e Zd ZdZ	 	 	 	 ddZdS )QuantizedValuezI
    Represents a linearly quantized value (input\output\intializer)
    Nc
                    || _         || _        || _        || _        || _        || _        || _        || _        |	| _        d S r*   )	original_nameq_name
scale_namezp_name
value_typer   	node_type
node_qtype
scale_type)
r.   r,   new_quantized_namerD  zero_point_namequantized_value_typer   rG  rH  rI  s
             r#   r<  zQuantizedValue.__init__  sH     "($&.	"$$r%   )NNNNr=  r   r%   r#   r@  r@    s@          % % % % % %r%   r@  c                      e Zd ZdZd ZdS )BiasToQuantizez+
    Represents a bias to be quantized
    c                0    || _         || _        || _        d S r*   )	bias_name
input_nameweight_name)r.   rP  rQ  rR  s       r#   r<  zBiasToQuantize.__init__  s    "$&r%   Nr=  r   r%   r#   rN  rN    s-         ' ' ' ' 'r%   rN  c                   | j         dk    rt          d| j         d          | j         dk    r| j        }n| j         dk    r| j        }n| j         dk    r| j        }n| j         dk    r| j        }n| j         dk    r| j        }n| j         d	k    r| j        }nl| j         d
k    r| j	        }nY| j         dk    r| j
        }nF| j         dk    r| j        }n3| j         dk    r| j        }n t          d| j         d| j          d          | j        |iS )z
    Convert attribute to kwarg format for use with onnx.helper.make_node.
        :parameter attribute: attribute in AttributeProto format.
        :return: attribute in {key: value} format.
    r   z
attribute z does not have type specified.r(   rF   rG   rH   rI   rJ   rp      	   r   z has unsupported type rO   )rz   r3   r,   r   r   srM   gfloatsintsstringstensorsgraphs)	attributer   s     r#   attribute_to_kwargr^    sB    ~TinTTTUUU ~	1			1			1			1			1		 	1			1		!	1		!	2		 ]in]]IN]]]^^^NE""r%   c                Z      fd|D             }t          |          dk    r|d         ndS )z
    Helper function to find item by name in a list.
        parameter item_name: name of the item.
        parameter item_list: list of items.
        return: item if found. None otherwise.
    c                *    g | ]}|j         k    |S r   r+   )r!   item	item_names     r#   r   z find_by_name.<locals>.<listcomp>  s%    BBBd49	+A+AT+A+A+Ar%   r   N)r   )rb  	item_listitemss   `  r#   find_by_namere    s;     CBBBiBBBE5zzA~~5884/r%   c                d    d}t          t          |                    D ]}||         | k    r|}|S )zC
    Helper function to return index of an item in a node list
    rs   )r   r   )	elem_name	elem_listelem_idxr   s       r#   get_elem_indexrj    s@     H3y>>""  Q<9$$HOr%   c                H    t           j                            d| |g|          S )z
    Helper function to create a Mul node.
        parameter inputs: list of input names.
        parameter output: output name.
        parameter name: name of the node.
        return: Mul node in NodeProto format.
    Mul)r   r   r   )inputsoutputr,   s      r#   get_mul_nodero    s"     ;  $???r%   filenamer   
identifierr	  c                V    | j                             | j        |z   | j        z             S )zp
    Helper function to generate a identifiable filepath by concatenating the given identifier as a suffix.
    )parentjoinpathstemsuffix)rp  rq  s     r#   generate_identified_filenamerw    s(     ?##HMJ$>$PQQQr%   c                   dd l }dd lm} dd l} |j        |j                   t          d           t          |            t          d           t          |           |                    | |d           |                    d           |	                    d           |
                    d	           |                                 d S )
Nr   )	thresholdz
Histogram:zHistogram Edges:T)fillzTensor valueCountszTensor value V.S. Counts)sysmatplotlib.pyplotpyplotrx   set_printoptionsmaxsizeprintstairsxlabelylabeltitleshow)hist
hist_edgesr|  pltrx   s        r#   
apply_plotr    s    JJJ######LLLES[1111	,	$KKK	
	*JJtZdJ+++JJ~JJxII()))HHJJJJJr%   rO   c           	     
   ddl ddl}ddlddlmc mc m} ddlmc mc m} ddl	m
mm t          j        d|              G fddj                  }                    | |          }t#          t$          j                            |d          d	          5 }|                    |           ddd           n# 1 swxY w Y                       d          }|                    d
          }	g }
t1          |                                           D ]:}| |         }|                                }t7          |                    d|                                                    t7          |                    d|                                                    g}t=          t?          |                    }|	                     |          }|	                     |          }|!                    |	           |"                    |	|           |#                    |	|           |$                    |	          }|
%                    |           <|&                    |	tO          |
                     |
D ]}|	(                    |           |	)                                }|*                    |	           |+                    |	|           |,                    |	          }|	-                    |           |	.                                }t#          t$          j                            |d          d          5 }|                    |           ddd           n# 1 swxY w Y   t$          j/                            dd          dv r|j        0                    |d          }|1                                }te          |          D ]c}|3                    |          }t          j        |4                                           t          j        |5                                           dt#          t$          j                            |d          d	          5 }t1          |                                           D ]}| |         }|                                }t7          |                    d|                                                    t7          |                    d|                                                    g}|dz   t=          t?          |                    z   }|                    |           |                    d           	 ddd           dS # 1 swxY w Y   dS )z>
    Helper function to write calibration table to files.
    r   N)CalibrationMethod
TensorDataTensorsDatazcalibration cache: c                  "    e Zd Z fdZdS )*write_calibration_table.<locals>.MyEncoderc                \   t          |f          r|                                S t          |j                  r*|                                t	          |j                  ddS t          |          r|j        j        t	          |          dS j        	                    | |          S )Nznumpy.array)r   rl   CLS)r  r   )
r   to_dictr~   tolistr	  rl   	__class__r7   JSONEncoderdefault)r.   objr  r  r  jsonnps     r#   r  z2write_calibration_table.<locals>.MyEncoder.default$  s    #
K899 %{{}}$#rz** ] #

s39~~m\\\#011 J"}5CIII#++D#666r%   N)r7   r8   r9   r  )r  r  r  r  r  s   r#   	MyEncoderr  #  sB        	7 	7 	7 	7 	7 	7 	7 	7 	7 	7 	7r%   r  )clszcalibration.jsonwi   highestlowestzcalibration.flatbufferswbQUANTIZATION_DEBUG0)r(   1zcalibration.cache 
)6r  flatbuffersrx   5onnxruntime.quantization.CalTableFlatBuffers.KeyValuequantizationCalTableFlatBuffersKeyValue5onnxruntime.quantization.CalTableFlatBuffers.TrtTableTrtTable"onnxruntime.quantization.calibrater  r  r  logginginfor  dumpsopenospathjoinwriter}   Buildersortedkeysr  r   r   ra  r	  r   CreateStringKeyValueStartKeyValueAddKeyKeyValueAddValueKeyValueEndr|   TrtTableStartDictVectorr   PrependUOffsetTRelative	EndVectorTrtTableStartTrtTableAddDictTrtTableEndFinishOutputenvironGetRootAsTrtTable
DictLengthr   DictKeyValue)calibration_cachedirr  r  r  r  	json_datafiler   builderkey_value_listkeyr   d_valuesrX  r   flat_key
flat_value	key_value	main_dict	cal_tablebufdict_lenr   r  r  r  r  r  s                           @@@@@r#   write_calibration_tabler    s!   
 KKKLLLLLLLLLLLLLLLLLLLLLLLL]]]]]]]]]]L:'8::;;;7 7 7 7 7 7 7 7 7 7 7D$ 7 7 7 

,)
<<I	bgll3 233S	9	9 T

9               88A;;D!!$''GN',,..// ) )"3'>>##(,,y$//446677(,,x..335566
 CKK  '',,))%00
w'''222!!':666((11	i(((($$Wc..A.ABBB# 3 3	''	2222!!##I7###Wi000$$W--INN9
..

C	bgll3 9::D	A	A T

3               
z~~*C00H<<%77Q??	''))x 	, 	,A!q))IL)))L**++++ 
bgll3 344c	:	: 
d+002233 		 		C&s+F~~''Hhll9d3388::;;hll8T227799::F #ICKK 0 00EJJuJJt		
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
s7   )CCCM))M-0M-&C7U++U/2U/-C6?c                   | dk                         t          j                  }| dk                         t          j                  }|                                }| j        |z
  }|sdS |t          |          z  t          |          z  }|dk     sJ d| d| d|             |                      t          j                  }|||z  | |z  z   z  }|dk                                    dk    sJ |S )a~  Given a discrete distribution (may have not been normalized to 1),
    smooth it by replacing zeros with eps multiplied by a scaling factor
    and taking the corresponding amount off the non-zero values.
    Ref: http://web.engr.illinois.edu/~hanj/cs412/bk3/KL-divergence.pdf
         https://github.com//apache/incubator-mxnet/blob/master/python/mxnet/contrib/quantization.py
    r   Nr   zn_zeros=z, n_nonzeros=z, eps1=)r   rx   r   sumsizer   )pepsis_zerosis_nonzerosn_zeros
n_nonzeroseps1r  s           r#   smooth_distributionr  o  s     Qu}--H6//%-00KllnnG'!J tw%
"3"33D#:::Q'QQ
QQ4QQ:::88EM""DC(Nte{222DAI??!!!!Kr%   
model_pathc                    t          j        |                                 d          }t          d |j        j        D                       S )NF)load_external_datac              3  >   K   | ]}t          j        |          V  d S r*   )r   uses_external_data)r!   
intializers     r#   	<genexpr>z*model_has_external_data.<locals>.<genexpr>  s.      mmz#6zBBmmmmmmr%   )r   loadas_posixr   graphr7  )r  models     r#   model_has_external_datar    sF    Ij))++FFFEmmUZU`Ulmmmmmmr%   opt_model_pathc                    t                      }|                                |_        t          j        |_        i }dg|d<   t          |                                 |fddgi|}dS )z
        Generate model that applies graph optimization (constant folding, etc.)
        parameter model_path: path to the original onnx model
        parameter opt_model_path: path to the optimized onnx model
    :return: optimized onnx model
    ConstantSharingdisabled_optimizers	providersCPUExecutionProviderN)r   r  optimized_model_filepathr   ORT_ENABLE_BASICgraph_optimization_levelr   )r  r  sess_optionkwargs_s        r#   optimize_modelr     sr     !""K+9+B+B+D+DK(+A+RK(F%6$7F !,,..jjH^G_jcijjAAAr%   r  r	   c                    ddi}| j         r+| j         D ]#}|                    |j        |j        i           $t          j                            | |           dS )z>Tag the model that it went through quantization pre-processingonnx.quant.pre_processonnxruntime.quantNmetadata_propsupdater  r   r   r   set_model_props)r  r  props      r#   add_pre_process_metadatar	    sh    .0CDN :( 	: 	:D!!48TZ"89999K~66666r%   c                Z    | j         r#| j         D ]}|j        dk    r|j        dk    r dS dS )zCCheck the model whether it went through quantization pre-processingr  r  TFr  r  r   )r  r  s     r#   model_has_pre_process_metadatar    sG     ( 	 	Dx333
FY8Y8Ytt5r%   c                    ddi}| j         r+| j         D ]#}|                    |j        |j        i           $t          j                            | |           d S )N
onnx.inferr  r  )r  r  r  s      r#   add_infer_metadatar    sh    "$78N 4% 	4 	4A!!15!'"23333K~66666r%   c                Z    | j         r#| j         D ]}|j        dk    r|j        dk    r dS dS )Nr  r  TFr  )r  r  s     r#   model_has_infer_metadatar    sF     % 	 	Au$$4G)G)Gtt5r%   c                    d | j         D             }t          |          dk    rt          d          |d         j        }|S )Nc                6    g | ]}|j         r|j         d k    |S )r   )domain)r!   opsets     r#   r   z%get_opset_version.<locals>.<listcomp>  s.    mmm5<mSXS_clSlSleSlSlSlr%   r(   z$Failed to find proper ai.onnx domainr   )opset_importr   r3   version)r  ai_onnx_domainopset_versions      r#   get_opset_versionr    sM    mm);mmmN
>a?@@@"1%-Mr%   weight_typec                   t          |           }|}t          |d|          }|dk     r0|t          j        j        k    rt          j        d| d           d}n?|dk    rt          j        d| d           n |dk     rt          j        d| d           d}||k    r/t          j                            | |          } t          |           } | S )	Nr^      z$The original model opset version is z, which does not support quantization to float 8. Please update the model to opset >= 19. Automatically update the model to opset 19. Please verify the quantized model.r   ze, which does not support node fusions. Please update the model to opset >= 11 for better performance.z, which does not support quantization. Please update the model to opset >= 11. Automatically update the model to opset 11. Please verify the quantized model.   )
r  r   r   r
   rY   r  warningversion_converterconvert_version&save_and_reload_model_with_shape_infer)r  r  r  target_opset_versionweight_quant_types        r#   update_opset_versionr%    s-   %e,,M(]KHHr/43C3PPP1= 1 1 1	
 	
 	

  "	"		M= M M M	
 	
 	
 	

 
		1= 1 1 1	
 	
 	

  "},,&66u>RSS 7u==Lr%   c                ,   t          | d          }t          j                            t	          |           t	          |                     t          j        |                                          }t          |           |                                 |S )Nz	-inferred)	rw  r   shape_inferenceinfer_shapes_pathr	  r  r  r  unlink)r  inferred_model_pathr  s      r#   load_model_with_shape_inferr+    s{    6z;OO**3z??C@S<T<TUUUI)224455Eu   Lr%   c                <   t          j        d          5 }t          j        |           }t	          |                              d          }t          j        ||                                d           t          |          cd d d            S # 1 swxY w Y   d S )Nz
ort.quant.)prefixz
model.onnxT)save_as_external_data)
tempfileTemporaryDirectoryr  deepcopyr   rt  r   
save_modelr  r+  )r  quant_tmp_dir
model_copyr  s       r#   r"  r"    s    		$L	9	9	9 7]]5))
-((11,??

J$7$7$9$9QUVVVV*:66	7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7s   A.BBBr7  r
   c                    | j         t          j        j        t          j        j        fv rt
          j                            |           S t          d| j	         dt          | j                             )Nz&Only float type is supported. Weights z is )r  r   r
   r   r   r   r  to_arrayr3   r,   type_to_name)r7  s    r#   r   r     si    !7!=z?U?] ^^^ ))+666
l1All|T_TiGjll  r%   tensor_namec                    | dz   S )N_QuantizeLinearr   r8  s    r#   add_quant_suffixr<    s    ***r%   c                    | t           z   S r*   )QUANT_INPUT_SUFFIXr;  s    r#   add_quant_input_suffixr?  
  s    +++r%   c                    | dz   S )N_QuantizeLinear_Outputr   r;  s    r#   add_quant_output_suffixrB    s    111r%   c                    | dz   S )N_DequantizeLinearr   r;  s    r#   add_dequant_suffixrE    s    ,,,r%   c                    | dz   S )N_DequantizeLinear_Inputr   r;  s    r#   add_dequant_input_suffixrH    s    222r%   c                    | t           z   S r*   )DEQUANT_OUTPUT_SUFFIXr;  s    r#   add_dequant_output_suffixrK    s    ...r%   )NN)FN)FNNN)r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   )r   r   )r   r   r   r   r   r   r   r   r   r   r   r   r   r   )FF)r   r    r&  r    r   r'  )r,  r
  r   r-  )rp  r   rq  r	  r   r   )rO   )r  )r  r   )r  r   r  r   )r  r	   )r  r	   r   r   )r  r	   r   r    )r  r	   r  rE   r   r	   )r  r   r   r	   )r  r	   r   r	   )r7  r
   r   r   )r8  r	  r   r	  )r   r	  )l
__future__r   r  r  r  r/  enumr   pathlibr   rx   r   r   r   r   r   r	   r
   r   r   r   onnx.helperr   r   r   r   onnx.referencer   onnxruntimer   r   r   onnx.reference.op_runr   ImportError__producer____version__onnx_domain	ms_domainQUANT_OP_NAMEr>  DEQUANT_OP_NAMErJ  r  MODEL_SIZE_THRESHOLDr   r  r7  r'   r>   rE   ra   rQ   rl   rS   rW   rU   rY   r]   r[   r   r}   rh   rg   rj   ri   r"  r!  r  r   r   r   r   r   r   r  r   r%  r+  r  r5  r@  rN  r^  re  rj  ro  rw  r  r  r  r  r   r	  r  r  r  r  r%  r+  r"  r   r<  r?  rB  rE  rH  rK  r   r%   r#   <module>r[     s	   # " " " " "   				                0 0 0 0 0 0 0 0 0 0 > > > > > > > > > > & & & & & & Q Q Q Q Q Q Q Q Q Q Q Q - - - - - - P P P P P P P P P P7777777   
 	 , $2 ' !  qqCC4D4Dqqq    t          #> #> #> #> #> #> #> #>L    $     V!4!4 +%+g"6"6 +%+g"6"6!;5;x#8#8' %   ;5;q#D#D#DkekRU]b]hFiFiFi"j+%+d%*"E"E"E{u{SV^c^hGiGiGi!j!KEK$F$F$FTYafamHnHnHn#o ;5;vU[#I#I#I;5;W\didoKpKpKp"q ;5;q#>#>#>BV[@\@\@\"]+%+b"="="={u{1TX?Y?Y?Y!Z  +%+d%*"E"E"E{u{SV^c^hGiGiGi!j ;5;vU[#I#I#I;5;W\didoKpKpKp"q!   ;5;q#D#D#DkekRU]b]hFiFiFi"j+%+c"D"D"DkekRT\a\fFgFgFg!h!KEK$F$F$FTYafamHnHnHn#o ;5;vU[#I#I#I;5;W\didoKpKpKp"q ;5;q#>#>#>AUZ@[@[@["\+%+b"="="={u{1TX?Y?Y?Y!Z  )+ A A A A A 13 13 13 13h< < < <~! ! !P #'"&"&;G ;G ;G ;G ;G~ hl:= := := := :=D $(L  L  L  L  L ^   @   	 	 	 	   <       >% % % % % % % %8' ' ' ' ' ' ' '"# "# "#J0 0 0  @ @ @R R R R  $Y Y Y Yx   2n n n n
k k k k 7 7 7 7   7 7 7 7      ! ! ! !H   7 7 7 7   + + + +, , , ,2 2 2 2- - - -3 3 3 3/ / / / / /s   "A) )A32A3