o
    \h                     @  s  d dl mZ d dlZd dlZd dlZd dlZd dlmZ d dlm	Z	 d dl
Z
d dlZd dlmZmZmZ d dlmZ d dlmZmZmZmZ d dlmZ d d	lmZmZmZ zd d
lmZ W n eyi   dZY nw z
d dlmZm Z  W n ey   dZdZ Y nw zd dl!m"Z" W n ey   dZ"Y nw dZ#dZ$dZ%dZ&dZ'dZ(dZ)dZ*dZ+dZ,i Z-dd e.eD Z/G dd deZ0G dd deZ1G dd deZ2G dd  d eZ3ejj4e
5d!ejj6e
5d"ejj7e
5d#ejj8e
5d$ejj9eejj:eejj;e iZ<ejj6e
j=d e
j>d%e
j=d&e
j>d%fejj4e
j=d'e
j?d%e
j=d(e
j?d%fejj8e
j=d e
j@d%e
j=d)e
j@d%fejj7e
j=d*e
jAd%e
j=d+e
jAd%fejj;e
j=d e d%e
j=d,e d%fejj:e
j=d-ed%e
j=d.ed%fiZBejj6e
j=d e
j>d%e
j=d/e
j>d%fejj4e
j=d0e
j?d%e
j=d(e
j?d%fejj8e
j=d e
j@d%e
j=d1e
j@d%fejj7e
j=d2e
jAd%e
j=d+e
jAd%fiZCejj6e
j=d e
j>d%e
j=d(e
j>d%fejj4e
j=d3e
j?d%e
j=d4e
j?d%fejj8e
j=d e
j@d%e
j=d+e
j@d%fejj7e
j=d5e
jAd%e
j=d6e
jAd%fejj;e
j=d ed%e
j=d.ed%fejj:e
j=d7ed%e
j=d8ed%fiZDd9d:d;d<ZEdd=d>ZFdd@dAZGdBdC ZH	?			dddQdRZI	dddTdUZJ		ddd^d_ZKdd`daZLddbdcZMddgdhZNddldmZOG dndo doZPG dpdq dqZQG drds dsZRdtdu ZSdvdw ZTdxdy ZUdzd{ ZVdddZWdd ZXdddZYdddZZdddZ[dddZ\dddZ]dddZ^dddZ_dddZ`dddZadddZbdddZcdddZddddZedddZfdddZgdddZhdddZidS )    )annotationsN)Enum)Path)
ModelProtoTensorProtoexternal_data_helper)onnx_pb)
make_graph
make_model	make_nodemake_tensor_value_info)ReferenceEvaluator)GraphOptimizationLevelInferenceSessionSessionOptionsfloat8e4m3fn)int4uint4)to_array_extendedzonnx.quantizez0.1.0zai.onnxzcom.microsoftQuantizeLinear_QuantizeLinear_InputDequantizeLinear_DequantizeLinear_Output
_quantizedl        c                 C  s(   i | ]}t tt|trtt||qS  )
isinstancegetattrr   int).0kr   r   g/home/air/segue/gemini/backup/venv/lib/python3.10/site-packages/onnxruntime/quantization/quant_utils.py
<dictcomp>9   s   ( r"   c                   @  (   e Zd ZdZdZdd Zedd ZdS )QuantizationModer      c                 C     | j S Nnameselfr   r   r!   __str__D      zQuantizationMode.__str__c                 C      zt |  W S  ty   t w r'   )r$   KeyError
ValueError)moder   r   r!   from_stringG   
   
zQuantizationMode.from_stringN)__name__
__module____qualname__
IntegerOps
QLinearOpsr,   staticmethodr2   r   r   r   r!   r$   @       r$   c                   @  r#   )QuantizedValueTyper   r%   c                 C  r&   r'   r(   r*   r   r   r!   r,   S   r-   zQuantizedValueType.__str__c                 C  r.   r'   )r;   r/   r0   )vr   r   r!   r2   V   r3   zQuantizedValueType.from_stringN)r4   r5   r6   InputInitializerr,   r9   r2   r   r   r   r!   r;   O   r:   r;   c                   @  sH   e Zd ZdZdZdZdZdZdZdZ	dd	 Z
ed
d Zedd ZdS )	QuantTyper   r%                  c                 C  r&   r'   r(   r*   r   r   r!   r,   g   r-   zQuantType.__str__c                 C  r.   r'   )r?   r/   r0   )tr   r   r!   r2   j   r3   zQuantType.from_stringc                 C  s   | t jkrtjS | t jkrtjS | t jkrtjS | t jkr tj	S | t j
kr(tjS | t jkr0tjS | t jkr8tjS td| d)NzUnexpected value qtype=.)r?   QInt8r   INT8QUInt8UINT8QUInt16UINT16QInt16INT16QFLOAT8E4M3FNFLOAT8E4M3FNQUInt4UINT4QInt4INT4r0   r*   r   r   r!   tensor_typeq   s   






zQuantType.tensor_typeN)r4   r5   r6   rG   rI   rO   rM   rK   rS   rQ   r,   r9   r2   propertyrU   r   r   r   r!   r?   ^   s    
r?   c                   @  r#   )QuantFormatr   r%   c                 C  r&   r'   r(   r*   r   r   r!   r,      r-   zQuantFormat.__str__c                 C  r.   r'   )rW   r/   r0   )formatr   r   r!   r2      r3   zQuantFormat.from_stringN)r4   r5   r6   	QOperatorQDQr,   r9   r2   r   r   r   r!   rW      r:   rW   int8uint8int16uint16dtype   i   i  i i     i      ii  ii@   i i @  rA   zero_point_indexc                 G  s   g }t |D ]H\}}tt|tjr|t| nt|tjr(|| n
t	d| d| || krN|d }|j
tjksF|j
tjkrNt	d|j
 qt|dkrYt|S |d S )Nzarg z is not an array: rh   zzero_point cannot be r%   r   )	enumeratenumpy
issubdtypetypenumberappendarrayr   ndarray	TypeErrorr`   float32float16lentuple)rj   argsnew_argsiar<   r   r   r!   _check_type   s   r|   c                 C  s  | t v sJ d|  d| tjjtjjtjjtjjfv r|dkr(td|d|jt	j
kr2tj}n|jt	jkr<tj}n	td|j dtttdg dgtjd| g dgd	td
g ddggdtd|d td|d gtd| d g}t|}t|d ||dd S t |  }	t| ddd\}
}|d urt|
|n|
}|d urt||n|}t	|t	j
|  | }t	j||||d t||	S )NUnexpected data type > requested. Only INT8, UINT8, INT16, and UINT16 are supported.r   z2zero_point is expected to be null for float 8 not rF   zUnexpected dtype Constant
zero_point)valuer   )Xscaler   Yqur   r   )r   r   F)reduce_range	symmetric)out) ONNX_TYPE_TO_NP_TYPE
onnx_protor   rP   FLOAT8E4M3FNUZ
FLOAT8E5M2FLOAT8E5M2FNUZNotImplementedErrorr`   rl   rt   FLOATru   FLOAT16r0   r
   r	   r   onnxhelpermake_tensorr   r   r|   runget_qmin_qmax_for_qTypemaxminasarrayastyperoundclip)qTypearrr   r   lowhigh	onnx_type
onnx_modelrefr`   qminqmaxcliplowcliphigharr_fp32r   r   r!   quantize_nparray   sN   



r   Fc                 C  s  |dks|dk rt d| d| t| tjd| jd} t|tjd|jd}|dur;t|| tj|| jd }|rOtt| t|}| } |
 }||ks]J d|  d| tj||  tj	d}tj|tj	dtj|tj	d }t|| }	|	dksJ d|	t
|jjk rtjd	|jd}	tjd|jd}
|
|	gS |rtjt|| tjd
tj	d |jd}
ntjt|| |	  |jd}
|	|j}	|
|	gS )a  Calculate the scale s and zero point z for the quantization relation
    r = s(q-z), where r are the original values and q are the corresponding
    quantized values.

    r and z are calculated such that every value within [rmin,rmax] has an
    approximate representation within [qmin,qmax]. In addition, qmin <= z <=
    qmax is enforced. If the symmetric flag is set to True, the interval
    [rmin,rmax] is symmetrized to [-absmax, +absmax], where
    absmax = max(abs(rmin), abs(rmax)).

    :parameter rmin: minimum value of r
    :parameter rmax: maximum value of r
    :parameter qmin: minimum value representable by the target quantization data type
    :parameter qmax: maximum value representable by the target quantization data type
    :parameter symmetric: True if the floating-point range should be made symmetric. Defaults to False.
    :parameter min_real_range: Minimum floating-point range (i.e., rmax - rmin) to enforce. Defaults to None.
    :return: zero and scale [z, s]

    r   Bqmin and qmax must meet requirement: qmin <= 0 <= qmax while qmin:, qmmax:r_   Nzqmin=z > qmax=zscale issue      ?g       @)r0   rl   minimumrq   r`   maximumr   r   absfloat64finfotinyr   r   )rminrmaxr   r   r   min_real_rangeabsmaxdrdqr   r   r   r   r!   compute_scale_zp   s4     r   c           	        s   d}| t vr?| tjkr2ddlm  ddlm} |} fddtdD }tj	dd |D tj
d	}ntd
|  d|t | < n| tjkrLddlm} |}|du rXtd|  dtt |  }tj	d|d	}tj	|| |jd	}||gS )ar  Calculate the scale s for a float8 type (E4M3FN).
    The function assumes the coefficient distribution and the float 8
    distribution are similar to two gaussian laws.

    :return: zero and scale [z, s]

    More details in notebook `quantization_fp8.ipynb
    <https://github.com/microsoft/onnxruntime/blob/main/docs/python/notebooks/quantization_fp8.ipynb>`_.
    Nr   float8e4m3_to_float32r   c                   s   g | ]} |qS r   r   )r   rz   r   r   r!   
<listcomp>K  s    z+compute_scale_zp_float8.<locals>.<listcomp>   c                 S  s$   g | ]}t |st |s|qS r   )rl   isnanisinf)r   fr   r   r!   r   M  s   $ r_   zQuantization to element_type=z not implemented.zUnexpected element_type rF   )FLOAT8_DISTRIBUTIONSr   rP   onnx.numpy_helperr   #onnx.reference.custom_element_typesr   rangerl   rq   rt   r0   rs   stdr`   )	element_typer   zp_dtyper   
all_valuesvaluesstd_f8zeror   r   r   r!   compute_scale_zp_float8:  s*   



r   datanumpy.ndarray
quant_typeonnx.TensorProto.DataTyper   boolr   r   float | Nonermin_overridermax_overridereturn#tuple[numpy.ndarray, numpy.ndarray]c                 C  sP  t | tjstdt|  d|dur|}n
t| r|  nd}|dur(|}n
t| r0|  nd}tj|| j	d}tj|| j	d}tjd| j	d}	|t
jkrh|rUtdt| }
t||
\}}	t||	dd	S |t
jt
jt
jt
jt
jt
jfv rt|||d
\}}t| rt||||||\}}	ntjd|j	d}t||	dd	S td| d)a  
    Returns the zero_point and scale for the given data.

    :param data: The data for which to compute quantization parameters.
    :param quant_type: The quantization data type.
    :param symmetric: whether symmetric quantization is used or not.
    :parameter reduce_range: True if the quantization range should be reduced. Defaults to False.
    :parameter min_real_range: Minimum floating-point range (i.e., rmax - rmin) to enforce. Defaults to None.
    :parameter rmin_override: The value of rmin to use if not None. Otherwise, uses min(data).
    :parameter rmax_override: The value of rmax to use if not None. Otherwise, uses max(data).
    :return: zero point and scale
    z%Weight must be given as an array not rF   Ng        r_   r   z1Unsupported option reduce_range=True for float 8.r   ri   r   z Unexpected value for quant_type=)r   rl   rr   rs   rn   rv   r   r   rq   r`   r   rP   RuntimeErrorr   r   r|   rH   rJ   rN   rL   rT   rR   r   r   r0   )r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r!   compute_data_quant_params_  s>   

r   2tuple[numpy.ndarray, numpy.ndarray, numpy.ndarray]c              
   C  s   t | ||||||\}}|tjkrIt|| ||}	t|	tj d@ dkrDt	| }
t
d|
  d|
  d|	  d|	  d	|||	fS |tjtjtjtjtjtjfv ret|| ||}	|||	fS td| d)al  
    :param data: data to quantize
    :param qType: data type to quantize to.
    :param symmetric: whether symmetric quantization is used or not.
    :parameter reduce_range: True if the quantization range should be reduced. Defaults to False.
    :parameter min_real_range: Minimum floating-point range (i.e., rmax - rmin) to enforce. Defaults to None.
    :parameter rmin_override: The value of rmin to use if not None. Otherwise, uses min(data).
    :parameter rmax_override: The value of rmax to use if not None. Otherwise, uses max(data).
    :return: minimum, maximum, zero point, scale, and quantized weights

    To pack weights, we compute a linear transformation

    - when data `type == uint8` mode, from `[rmin, rmax]` -> :math:`[0, 2^{b-1}]` and
    - when data `type == int8`, from `[-m , m]` -> :math:`[-(2^{b-1}-1), 2^{b-1}-1]` where
        `m = max(abs(rmin), abs(rmax))`

    and add necessary intermediate nodes to transform quantized weight to full weight using the equation

    :math:`r = S(q-z)`, where

    - *r*: real original value
    - *q*: quantized value
    - *S*: scale
    - *z*: zero point
    rb   z+One of the quantized value is NaN data in [z, z], quantized_data in [z].zUnexpected value for qType=rF   )r   r   rP   r   anyr   rl   r\   ravelr   r   r   r   rH   rJ   rN   rL   rT   rR   r0   )r   r   r   r   r   r   r   r   r   quantized_datanp_datar   r   r!   quantize_data  s@   
	


r   weightonnx.TensorProtor   r   axis
int | Nonequant_weight_name
str | Nonec                 C  s  t | }d}|du rt|| ||}n?|j| }t|j}	d|	|< g }
t|D ]$}|||}|| }|| }t|| ||}|
t	|
|	 q(t|
|}|rW|n| j t }|tjjkrt }||_|j| j ||_|   |_tdurt|}|j|jks| | krtd|j d| dd  d| dd  d| j dt|dd	  d
|S |tjjtjjfv r|jtjtjfvrtd| dt t!| }tj"j#||| j|dd}|S tj"$|}tj	||d
| j}tj%&||}|S )aG  
    Returns a quantized version of the given ONNX initializer.

    :param weight: The ONNX initializer to quantize.
    :param quant_type: The final quantized data type.
    :param zero_point: The zero-point value to use for quantization.
    :param scale: The scale value to use for quantization.
    :param axis: The quantization axis if quantizing per-channel. Defaults to None.
    :param quant_weight_name: The name of the quantized initializer.
                              If not specified, the quantized name is generated.
    :return: The quantized ONNX initializer.
    Nr%   zThe initializer of shape z! could not be created, expecting 
   z, got z and shape=z
raw=   rF   zQuantized weights for z. must be 8-bit before packing as 4-bit values.T)rawr_   )'tensor_proto_to_arrayr   r   shapelistr   takerp   rl   r   reshapeconcatenater)   TENSOR_NAME_QUANT_SUFFIXr   r   rP   	data_typedimsextendflattencopytobytesraw_datar   r   strrT   rR   r`   r[   r\   bytespack_bytes_to_4bitr   r   tensor_dtype_to_np_dtypenumpy_helper
from_array)r   r   r   r   r   r   weight_dataq_weight_datachannel_countchannel_dimsquantized_channel_data_listrz   channel_datachannel_scalechannel_zero_pointquantized_channel_dataq_weight_nameq_weight_initializercheckpacked_dataquant_np_dtyper   r   r!   quantize_onnx_initializer  sb   


r
  c                 C  s   | t jjkr
tdd}|rt| }n|r| tv rt|  }nt| }|s.td|  d|\}}|dks:|dk rQtd| d| d|j	 d	| d
| d|  |S )z
    Return qmin and qmax, the minimum and maximum value representable by the given qType
    :parameter qType: onnx.onnx_pb.TensorProto.UINT8 or onnx.onnx_pb.TensorProto.UINT8
    :return: qmin, qmax
    z;This function is not implemented for float 8 as not needed.Nr}   r~   r   r   r   z, dtype=z, reduce_range=z, symmetric=z, qType=)
r   r   rP   r   ONNX_INT_TYPE_REDUCED_RANGEgetONNX_INT_TYPE_SYMMETRIC_RANGEONNX_INT_TYPE_RANGEr0   r`   )r   r   r   qranger   r   r   r   r!   r   )  s8   

r   c                 C  s   t | ||d\}}|| S )z
    Helper function to get the quantization range for a type.
        parameter qType: quantization type.
        return: quantization range.
    r   )r   )r   r   r   r   r   r   r   r!   get_qrange_for_qTypeI  s   r  r   ranktuple[bool, int]c                 C  s,   | dk r| | n| }|dko||k }||fS )z
    Helper function that tries to return a normalized axis in the range [0, rank - 1].
    :parameter axis: The axis to normalize.
    :parameter rank: The tensor rank (number of dimensions).
    :return (is_valid, axis_norm)
    r   r   )r   r  	axis_normis_validr   r   r!   normalize_axisS  s   r  src_8bitr   	bytearrayc                 C  s   t | }|dkrt S |d d }t|}d}d}||d k r?| |d  d@ d> | | d@ B ||< |d7 }|d7 }||d k s||k rK| | d@ ||< |S )aB  
    Copies a source array of 8-bit values into a destination bytearray of packed 4-bit values.
    Assumes that the source values are already in the appropriate int4 range.
    :parameter src_8bit: The 8-bit element values to pack.
    :return A bytearray with every two 8-bit src elements packed into a single byte.
    r   r%   r@   rc   rB   )rv   r  )r  	num_elemsdst_sizedstsrc_idst_ir   r   r!   r   _  s   $r   c                   @  s    e Zd ZdZg g dfddZdS )QuantizedInitializerzJ
    Represents a linearly quantized weight input from ONNX operators
    Nc
           
      C  :   || _ || _|| _|| _|| _|| _|| _|| _|	| _d S r'   )	r)   initializerrminsrmaxszero_pointsscalesr   r   r   )
r+   r)   r  r   r!  r"  r#  r   r   r   r   r   r!   __init__  s   
zQuantizedInitializer.__init__r4   r5   r6   __doc__r$  r   r   r   r!   r  }  s    r  c                   @  s"   e Zd ZdZ				dddZdS )QuantizedValuezI
    Represents a linearly quantized value (input\output\intializer)
    Nc
           
      C  r  r'   )	original_nameq_name
scale_namezp_name
value_typer   	node_type
node_qtype
scale_type)
r+   r)   new_quantized_namer*  zero_point_namequantized_value_typer   r-  r.  r/  r   r   r!   r$    s   
zQuantizedValue.__init__)NNNNr%  r   r   r   r!   r'    s    r'  c                   @  s   e Zd ZdZdd ZdS )BiasToQuantizez+
    Represents a bias to be quantized
    c                 C  s   || _ || _|| _d S r'   )	bias_name
input_nameweight_name)r+   r4  r5  r6  r   r   r!   r$    s   
zBiasToQuantize.__init__Nr%  r   r   r   r!   r3    s    r3  c                 C  s   | j dkrtd| j d| j dkr| j}n^| j dkr | j}nU| j dkr)| j}nL| j dkr2| j}nC| j dkr;| j}n:| j d	krD| j}n1| j d
krM| j	}n(| j dkrV| j
}n| j dkr_| j}n| j dkrh| j}ntd| j d| j  d| j|iS )z
    Convert attribute to kwarg format for use with onnx.helper.make_node.
        :parameter attribute: attribute in AttributeProto format.
        :return: attribute in {key: value} format.
    r   z
attribute z does not have type specified.r%   r@   rA   rB   rC   rD   rd      	   r   z has unsupported type rF   )rn   r0   r)   r   rz   srE   gfloatsintsstringstensorsgraphs)	attributer   r   r   r!   attribute_to_kwarg  s0   











rA  c                   s*    fdd|D }t |dkr|d S dS )z
    Helper function to find item by name in a list.
        parameter item_name: name of the item.
        parameter item_list: list of items.
        return: item if found. None otherwise.
    c                   s   g | ]	}|j  kr|qS r   r(   )r   item	item_namer   r!   r     s    z find_by_name.<locals>.<listcomp>r   N)rv   )rD  	item_listitemsr   rC  r!   find_by_name  s   rG  c                 C  s*   d}t t|D ]
}|| | kr|}q|S )zC
    Helper function to return index of an item in a node list
    rh   )r   rv   )	elem_name	elem_listelem_idxrz   r   r   r!   get_elem_index  s   rK  c                 C  s   t jd| |g|S )z
    Helper function to create a Mul node.
        parameter inputs: list of input names.
        parameter output: output name.
        parameter name: name of the node.
        return: Mul node in NodeProto format.
    Mul)r   r   r   )inputsoutputr)   r   r   r!   get_mul_node  s   rO  filenamer   
identifierr   c                 C  s   | j | j| | j S )zp
    Helper function to generate a identifiable filepath by concatenating the given identifier as a suffix.
    )parentjoinpathstemsuffix)rP  rQ  r   r   r!   generate_identified_filename	  s   rV  c                 C  s   dd l }dd lm} dd l}|j|jd td t|  td t| |j| |dd |d |	d |
d	 |  d S )
Nr   )	thresholdz
Histogram:zHistogram Edges:T)fillzTensor valueCountszTensor value V.S. Counts)sysmatplotlib.pyplotpyplotrl   set_printoptionsmaxsizeprintstairsxlabelylabeltitleshow)hist
hist_edgesrZ  pltrl   r   r   r!   
apply_plot  s   


rh  rF   c                   s<  ddl ddl}ddlddlm  m  m} ddlm  m  m} ddl	m
 mm td|   G  fdddj}j| |d}ttj|dd	}|| W d   n1 sfw   Y  d}|d
}	g }
t|  D ]I}| | }| }t|d| t|d| g}tt|}|	 |}|	 |}|!|	 |"|	| |#|	| |$|	}|
%| q}|&|	t'|
 |
D ]}|	(| q|	) }|*|	 |+|	| |,|	}|	-| |	. }ttj|dd}|| W d   n	1 sw   Y  tj/dddv rF|j0|d}|1 }t2|D ]}|3|}t|4  t|5  q/ttj|dd	C}t|  D ]3}| | }| }t|d| t|d| g}|d tt| }|| |d qWW d   dS 1 sw   Y  dS )z>
    Helper function to write calibration table to files.
    r   N)CalibrationMethod
TensorDataTensorsDatazcalibration cache: c                      s    e Zd Z fddZdS )z*write_calibration_table.<locals>.MyEncoderc                   sb   t |fr| S t |jr| t|jddS t | r*|jjt|dS j	| |S )Nznumpy.array)r   r`   CLS)rl  r   )
r   to_dictrr   tolistr   r`   	__class__r4   JSONEncoderdefault)r+   objri  rj  rk  jsonnpr   r!   rq  3  s   
z2write_calibration_table.<locals>.MyEncoder.defaultN)r4   r5   r6   rq  r   rs  r   r!   	MyEncoder2  s    rv  )clszcalibration.jsonwi   highestlowestzcalibration.flatbufferswbQUANTIZATION_DEBUG)r%   1zcalibration.cache 
)6rt  flatbuffersrl   5onnxruntime.quantization.CalTableFlatBuffers.KeyValuequantizationCalTableFlatBuffersKeyValue5onnxruntime.quantization.CalTableFlatBuffers.TrtTableTrtTable"onnxruntime.quantization.calibrateri  rj  rk  logginginforp  dumpsopenospathjoinwriterq   Buildersortedkeysrm  floatr  rB  r   r   CreateStringKeyValueStartKeyValueAddKeyKeyValueAddValueKeyValueEndrp   TrtTableStartDictVectorrv   PrependUOffsetTRelative	EndVectorTrtTableStartTrtTableAddDictTrtTableEndFinishOutputenvironGetRootAsTrtTable
DictLengthr   DictKeyValue)calibration_cachedirr  r  r  rv  	json_datafiler   builderkey_value_listkeyr   d_valuesr;  r   flat_key
flat_value	key_value	main_dict	cal_tablebufdict_lenrz   r   rs  r!   write_calibration_table"  sx   











$r  -C6?c                 C  s   | dk tj}| dk tj}| }| j| }|sdS |t| t| }|dk s8J d| d| d| |  tj}||| | |  7 }|dk dksSJ |S )a~  Given a discrete distribution (may have not been normalized to 1),
    smooth it by replacing zeros with eps multiplied by a scaling factor
    and taking the corresponding amount off the non-zero values.
    Ref: http://web.engr.illinois.edu/~hanj/cs412/bk3/KL-divergence.pdf
         https://github.com//apache/incubator-mxnet/blob/master/python/mxnet/contrib/quantization.py
    r   Nr   zn_zeros=z, n_nonzeros=z, eps1=)r   rl   rt   sumsizer  )pepsis_zerosis_nonzerosn_zeros
n_nonzeroseps1re  r   r   r!   smooth_distribution~  s   
"r  
model_pathc                 C  s(   t j|  dd}tdd |jjD S )NF)load_external_datac                 s  s    | ]}t |V  qd S r'   )r   uses_external_data)r   
intializerr   r   r!   	<genexpr>  s    z*model_has_external_data.<locals>.<genexpr>)r   loadas_posixr   graphr  )r  modelr   r   r!   model_has_external_data  s   r  opt_model_pathc                 C  sF   t  }| |_tj|_i }dg|d< t|  |fddgi|}dS )z
        Generate model that applies graph optimization (constant folding, etc.)
        parameter model_path: path to the original onnx model
        parameter opt_model_path: path to the optimized onnx model
    :return: optimized onnx model
    ConstantSharingdisabled_optimizers	providersCPUExecutionProviderN)r   r  optimized_model_filepathr   ORT_ENABLE_BASICgraph_optimization_levelr   )r  r  sess_optionkwargs_r   r   r!   optimize_model  s   

 r  r  r   c                 C  s>   ddi}| j r| j D ]}||j|ji q
tj| | dS )z>Tag the model that it went through quantization pre-processingonnx.quant.pre_processonnxruntime.quantNmetadata_propsupdater  r   r   r   set_model_props)r  r  propr   r   r!   add_pre_process_metadata  s
   
r  c                 C  0   | j r| j D ]}|jdkr|jdkr dS qdS )zCCheck the model whether it went through quantization pre-processingr  r  TFr  r  r   )r  r  r   r   r!   model_has_pre_process_metadata  s   
r  c                 C  s>   ddi}| j r| j D ]}||j|ji q
tj| | d S )N
onnx.inferr  r  )r  r  r  r   r   r!   add_infer_metadata  s
   
r  c                 C  r  )Nr  r  TFr  )r  r  r   r   r!   model_has_infer_metadata  s   
r  c                 C  sB   t | d}tjt| t| t| }t| |  |S )Nz	-inferred)	rV  r   shape_inferenceinfer_shapes_pathr   r  r  r  unlink)r  inferred_model_pathr  r   r   r!   load_model_with_shape_infer  s   
r  c                 C  sd   t jdd"}t| }t|d}tj|| dd t	|W  d    S 1 s+w   Y  d S )Nz
ort.quant.)prefixz
model.onnxT)save_as_external_data)
tempfileTemporaryDirectoryr   deepcopyr   rS  r   
save_modelr  r  )r  quant_tmp_dir
model_copyr  r   r   r!   &save_and_reload_model_with_shape_infer  s   
$r  r  r   c                 C  s>   | j tjjtjjfv rtj| S td| j	 dt
| j   )Nz&Only float type is supported. Weights z is )r   r   r   r   r   r   r   to_arrayr0   r)   type_to_name)r  r   r   r!   r     s
   r   tensor_namec                 C     | d S )N_QuantizeLinearr   r  r   r   r!   add_quant_suffix     r  c                 C     | t  S r'   )QUANT_INPUT_SUFFIXr  r   r   r!   add_quant_input_suffix  r  r  c                 C  r  )N_QuantizeLinear_Outputr   r  r   r   r!   add_quant_output_suffix  r  r   c                 C  r  )N_DequantizeLinearr   r  r   r   r!   add_dequant_suffix  r  r  c                 C  r  )N_DequantizeLinear_Inputr   r  r   r   r!   add_dequant_input_suffix  r  r  c                 C  r  r'   )DEQUANT_OUTPUT_SUFFIXr  r   r   r!   add_dequant_output_suffix  r  r  )NN)FN)FNNN)r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   )r   r   )r   r   r   r   r   r   r   r   r   r   r   r   r   r   )FF)r   r   r  r   r   r  )r  r   r   r  )rP  r   rQ  r   r   r   )rF   )r  )r  r   )r  r   r  r   )r  r   )r  r   r   r   )r  r   r   r   )r  r   r   r   )r  r   r   r   )r  r   r   r   )r   r   )j
__future__r   r   r  r  r  enumr   pathlibr   rl   r   r   r   r   r   r   onnx.helperr	   r
   r   r   onnx.referencer   onnxruntimer   r   r   r   r   ImportErrorr   r   onnx.reference.op_runr   __producer____version__onnx_domain	ms_domainQUANT_OP_NAMEr  DEQUANT_OP_NAMEr  r   MODEL_SIZE_THRESHOLDr   r  r  r$   r;   r?   rW   rH   r`   rJ   rN   rL   rP   rT   rR   r   rq   r\   r[   r^   r]   r  r  r  r|   r   r   r   r   r   r
  r   r  r  r   r  r'  r3  rA  rG  rK  rO  rV  rh  r  r  r  r  r  r  r  r  r  r  r   r  r  r   r  r  r  r   r   r   r!   <module>   s   &$$$$  $
$$$$$$$  


4?)?B
O
 


%


\



	
	


	

	



