o
    vi                     @  s  d dl mZ d dlZd dlZd dlZd dlZd dlmZ d dlm	Z	 d dl
Z
d dlZd dlmZmZmZ d dlmZ d dlmZmZmZmZ d dlmZ d d	lmZmZmZ zd d
lmZ W n eyi   dZY nw z
d dlmZm Z  W n ey   dZdZ Y nw zd dl!m"Z" W n ey   dZ"Y nw dZ#dZ$dZ%dZ&dZ'dZ(dZ)dZ*dZ+dZ,i Z-dd e.eD Z/G dd deZ0G dd deZ1G dd deZ2G dd  d eZ3ejj4e
5d!ejj6e
5d"ejj7e
5d#ejj8e
5d$ejj9eejj:eejj;e iZ<ejj6e
j=d e
j>d%e
j=d&e
j>d%fejj4e
j=d'e
j?d%e
j=d(e
j?d%fejj8e
j=d e
j@d%e
j=d)e
j@d%fejj7e
j=d*e
jAd%e
j=d+e
jAd%fejj;e
j=d e d%e
j=d,e d%fejj:e
j=d-ed%e
j=d.ed%fiZBejj6e
j=d e
j>d%e
j=d/e
j>d%fejj4e
j=d0e
j?d%e
j=d(e
j?d%fejj8e
j=d e
j@d%e
j=d1e
j@d%fejj7e
j=d2e
jAd%e
j=d+e
jAd%fiZCejj6e
j=d e
j>d%e
j=d(e
j>d%fejj4e
j=d3e
j?d%e
j=d4e
j?d%fejj8e
j=d e
j@d%e
j=d+e
j@d%fejj7e
j=d5e
jAd%e
j=d6e
jAd%fejj;e
j=d ed%e
j=d.ed%fejj:e
j=d7ed%e
j=d8ed%fiZDd9d:d;d<ZEdd=d>ZFdd@dAZGdBdC ZH	?			dddQdRZI	dddTdUZJ		ddd^d_ZKdd`daZLddbdcZMddgdhZNddldmZOG dndo doZPG dpdq dqZQG drds dsZRdtdu ZSdvdw ZTdxdy ZUdzd{ ZVdddZWdd ZXdddZYdddZZdddZ[dddZ\dddZ]dddZ^dddZ_dddZ`dddZadddZbdddZcdddZddddZedddZfdddZgdddZhdddZidddZjdddZkdS )    )annotationsN)Enum)Path)
ModelProtoTensorProtoexternal_data_helper)onnx_pb)
make_graph
make_model	make_nodemake_tensor_value_info)ReferenceEvaluator)GraphOptimizationLevelInferenceSessionSessionOptionsfloat8e4m3fn)int4uint4)to_array_extendedzonnx.quantizez0.1.0ai.onnxzcom.microsoftQuantizeLinear_QuantizeLinear_InputDequantizeLinear_DequantizeLinear_Output
_quantizedl        c                 C  s(   i | ]}t tt|trtt||qS  )
isinstancegetattrr   int).0kr   r   c/home/air/biblejyuku/back/venv/lib/python3.10/site-packages/onnxruntime/quantization/quant_utils.py
<dictcomp>9   s   ( r#   c                   @  (   e Zd ZdZdZdd Zedd ZdS )QuantizationModer      c                 C     | j S Nnameselfr   r   r"   __str__D      zQuantizationMode.__str__c                 C      zt |  W S  ty   t w r(   )r%   KeyError
ValueError)moder   r   r"   from_stringG   
   
zQuantizationMode.from_stringN)__name__
__module____qualname__
IntegerOps
QLinearOpsr-   staticmethodr3   r   r   r   r"   r%   @       r%   c                   @  r$   )QuantizedValueTyper   r&   c                 C  r'   r(   r)   r+   r   r   r"   r-   S   r.   zQuantizedValueType.__str__c                 C  r/   r(   )r<   r0   r1   )vr   r   r"   r3   V   r4   zQuantizedValueType.from_stringN)r5   r6   r7   InputInitializerr-   r:   r3   r   r   r   r"   r<   O   r;   r<   c                   @  sH   e Zd ZdZdZdZdZdZdZdZ	dd	 Z
ed
d Zedd ZdS )	QuantTyper   r&                  c                 C  r'   r(   r)   r+   r   r   r"   r-   g   r.   zQuantType.__str__c                 C  r/   r(   )r@   r0   r1   )tr   r   r"   r3   j   r4   zQuantType.from_stringc                 C  s   | t jkrtjS | t jkrtjS | t jkrtjS | t jkr tj	S | t j
kr(tjS | t jkr0tjS | t jkr8tjS td| d)NzUnexpected value qtype=.)r@   QInt8r   INT8QUInt8UINT8QUInt16UINT16QInt16INT16QFLOAT8E4M3FNFLOAT8E4M3FNQUInt4UINT4QInt4INT4r1   r+   r   r   r"   tensor_typeq   s   






zQuantType.tensor_typeN)r5   r6   r7   rH   rJ   rP   rN   rL   rT   rR   r-   r:   r3   propertyrV   r   r   r   r"   r@   ^   s    
r@   c                   @  r$   )QuantFormatr   r&   c                 C  r'   r(   r)   r+   r   r   r"   r-      r.   zQuantFormat.__str__c                 C  r/   r(   )rX   r0   r1   )formatr   r   r"   r3      r4   zQuantFormat.from_stringN)r5   r6   r7   	QOperatorQDQr-   r:   r3   r   r   r   r"   rX      r;   rX   int8uint8int16uint16dtype   i   i  i i     i      ii  ii@   i i @  rB   zero_point_indexc                 G  s   g }t |D ]H\}}tt|tjr|t| nt|tjr(|| n
t	d| d| || krN|d }|j
tjksF|j
tjkrNt	d|j
 qt|dkrYt|S |d S )Nzarg z is not an array: ri   zzero_point cannot be r&   r   )	enumeratenumpy
issubdtypetypenumberappendarrayr   ndarray	TypeErrorra   float32float16lentuple)rk   argsnew_argsiar=   r   r   r"   _check_type   s   r}   c                 C  s  | t v sJ d|  d| tjjtjjtjjtjjfv r|dkr(td|d|jt	j
kr2tj}n|jt	jkr<tj}n	td|j dtttdg dgtjd| g dgd	td
g ddggdtd|d td|d gtd| d g}t|}t|d ||dd S t |  }	t| ddd\}
}|d urt|
|n|
}|d urt||n|}t	|t	j
|  | }t	j||||d t||	S )NUnexpected data type > requested. Only INT8, UINT8, INT16, and UINT16 are supported.r   z2zero_point is expected to be null for float 8 not rG   zUnexpected dtype Constant
zero_point)valuer   )Xscaler   Yqur   r   )r   r   F)reduce_range	symmetric)out) ONNX_TYPE_TO_NP_TYPE
onnx_protor   rQ   FLOAT8E4M3FNUZ
FLOAT8E5M2FLOAT8E5M2FNUZNotImplementedErrorra   rm   ru   FLOATrv   FLOAT16r1   r
   r	   r   onnxhelpermake_tensorr   r   r}   runget_qmin_qmax_for_qTypemaxminasarrayastyperoundclip)qTypearrr   r   lowhigh	onnx_type
onnx_modelrefra   qminqmaxcliplowcliphigharr_fp32r   r   r"   quantize_nparray   sN   



r   Fc                 C  s  |dks|dk rt d| d| t| tjd| jd} t|tjd|jd}|dur;t|| tj|| jd }|rOtt| t|}| } |
 }||ks]J d|  d| tj||  tj	d}tj|tj	dtj|tj	d }t|| }	|	dksJ d|	t
|jjk rtjd	|jd}	tjd|jd}
|
|	gS |rtjt|| tjd
tj	d |jd}
ntjt|| |	  |jd}
|	|j}	|
|	gS )a  Calculate the scale s and zero point z for the quantization relation
    r = s(q-z), where r are the original values and q are the corresponding
    quantized values.

    r and z are calculated such that every value within [rmin,rmax] has an
    approximate representation within [qmin,qmax]. In addition, qmin <= z <=
    qmax is enforced. If the symmetric flag is set to True, the interval
    [rmin,rmax] is symmetrized to [-absmax, +absmax], where
    absmax = max(abs(rmin), abs(rmax)).

    :parameter rmin: minimum value of r
    :parameter rmax: maximum value of r
    :parameter qmin: minimum value representable by the target quantization data type
    :parameter qmax: maximum value representable by the target quantization data type
    :parameter symmetric: True if the floating-point range should be made symmetric. Defaults to False.
    :parameter min_real_range: Minimum floating-point range (i.e., rmax - rmin) to enforce. Defaults to None.
    :return: zero and scale [z, s]

    r   Bqmin and qmax must meet requirement: qmin <= 0 <= qmax while qmin:, qmmax:r`   Nzqmin=z > qmax=zscale issue      ?g       @)r1   rm   minimumrr   ra   maximumr   r   absfloat64finfotinyr   r   )rminrmaxr   r   r   min_real_rangeabsmaxdrdqr   r   r   r   r"   compute_scale_zp   s4     r   c           	        s   d}| t vr?| tjkr2ddlm  ddlm} |} fddtdD }tj	dd |D tj
d	}ntd
|  d|t | < n| tjkrLddlm} |}|du rXtd|  dtt |  }tj	d|d	}tj	|| |jd	}||gS )ar  Calculate the scale s for a float8 type (E4M3FN).
    The function assumes the coefficient distribution and the float 8
    distribution are similar to two gaussian laws.

    :return: zero and scale [z, s]

    More details in notebook `quantization_fp8.ipynb
    <https://github.com/microsoft/onnxruntime/blob/main/docs/python/notebooks/quantization_fp8.ipynb>`_.
    Nr   float8e4m3_to_float32r   c                   s   g | ]} |qS r   r   )r    r{   r   r   r"   
<listcomp>K  s    z+compute_scale_zp_float8.<locals>.<listcomp>   c                 S  s$   g | ]}t |st |s|qS r   )rm   isnanisinf)r    fr   r   r"   r   M  s   $ r`   zQuantization to element_type=z not implemented.zUnexpected element_type rG   )FLOAT8_DISTRIBUTIONSr   rQ   onnx.numpy_helperr   #onnx.reference.custom_element_typesr   rangerm   rr   ru   r1   rt   stdra   )	element_typer   zp_dtyper   
all_valuesvaluesstd_f8zeror   r   r   r"   compute_scale_zp_float8:  s*   



r   datanumpy.ndarray
quant_typeonnx.TensorProto.DataTyper   boolr   r   float | Nonermin_overridermax_overridereturn#tuple[numpy.ndarray, numpy.ndarray]c                 C  sP  t | tjstdt|  d|dur|}n
t| r|  nd}|dur(|}n
t| r0|  nd}tj|| j	d}tj|| j	d}tjd| j	d}	|t
jkrh|rUtdt| }
t||
\}}	t||	dd	S |t
jt
jt
jt
jt
jt
jfv rt|||d
\}}t| rt||||||\}}	ntjd|j	d}t||	dd	S td| d)a  
    Returns the zero_point and scale for the given data.

    :param data: The data for which to compute quantization parameters.
    :param quant_type: The quantization data type.
    :param symmetric: whether symmetric quantization is used or not.
    :parameter reduce_range: True if the quantization range should be reduced. Defaults to False.
    :parameter min_real_range: Minimum floating-point range (i.e., rmax - rmin) to enforce. Defaults to None.
    :parameter rmin_override: The value of rmin to use if not None. Otherwise, uses min(data).
    :parameter rmax_override: The value of rmax to use if not None. Otherwise, uses max(data).
    :return: zero point and scale
    z%Weight must be given as an array not rG   Ng        r`   r   z1Unsupported option reduce_range=True for float 8.r   rj   r   z Unexpected value for quant_type=)r   rm   rs   rt   ro   rw   r   r   rr   ra   r   rQ   RuntimeErrorr   r   r}   rI   rK   rO   rM   rU   rS   r   r   r1   )r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r"   compute_data_quant_params_  s>   

r   2tuple[numpy.ndarray, numpy.ndarray, numpy.ndarray]c              
   C  s   t | ||||||\}}|tjkrIt|| ||}	t|	tj d@ dkrDt	| }
t
d|
  d|
  d|	  d|	  d	|||	fS |tjtjtjtjtjtjfv ret|| ||}	|||	fS td| d)al  
    :param data: data to quantize
    :param qType: data type to quantize to.
    :param symmetric: whether symmetric quantization is used or not.
    :parameter reduce_range: True if the quantization range should be reduced. Defaults to False.
    :parameter min_real_range: Minimum floating-point range (i.e., rmax - rmin) to enforce. Defaults to None.
    :parameter rmin_override: The value of rmin to use if not None. Otherwise, uses min(data).
    :parameter rmax_override: The value of rmax to use if not None. Otherwise, uses max(data).
    :return: minimum, maximum, zero point, scale, and quantized weights

    To pack weights, we compute a linear transformation

    - when data `type == uint8` mode, from `[rmin, rmax]` -> :math:`[0, 2^{b-1}]` and
    - when data `type == int8`, from `[-m , m]` -> :math:`[-(2^{b-1}-1), 2^{b-1}-1]` where
        `m = max(abs(rmin), abs(rmax))`

    and add necessary intermediate nodes to transform quantized weight to full weight using the equation

    :math:`r = S(q-z)`, where

    - *r*: real original value
    - *q*: quantized value
    - *S*: scale
    - *z*: zero point
    rc   z+One of the quantized value is NaN data in [z, z], quantized_data in [z].zUnexpected value for qType=rG   )r   r   rQ   r   anyr   rm   r]   ravelr   r   r   r   rI   rK   rO   rM   rU   rS   r1   )r   r   r   r   r   r   r   r   r   quantized_datanp_datar   r   r"   quantize_data  s@   
	


r   weightonnx.TensorProtor   r   axis
int | Nonequant_weight_name
str | Nonec                 C  s  t | }d}|du rt|| ||}n?|j| }t|j}	d|	|< g }
t|D ]$}|||}|| }|| }t|| ||}|
t	|
|	 q(t|
|}|rW|n| j t }|tjjkrt }||_|j| j ||_|   |_tdurt|}|j|jks| | krtd|j d| dd  d| dd  d| j dt|dd	  d
|S |tjjtjjfv r|jtjtjfvrtd| dt t!| }tj"j#||| j|dd}|S tj"$|}tj	||d
| j}tj%&||}|S )aG  
    Returns a quantized version of the given ONNX initializer.

    :param weight: The ONNX initializer to quantize.
    :param quant_type: The final quantized data type.
    :param zero_point: The zero-point value to use for quantization.
    :param scale: The scale value to use for quantization.
    :param axis: The quantization axis if quantizing per-channel. Defaults to None.
    :param quant_weight_name: The name of the quantized initializer.
                              If not specified, the quantized name is generated.
    :return: The quantized ONNX initializer.
    Nr&   zThe initializer of shape z! could not be created, expecting 
   z, got z and shape=z
raw=   rG   zQuantized weights for z. must be 8-bit before packing as 4-bit values.T)rawr`   )'tensor_proto_to_arrayr   r   shapelistr   takerq   rm   r   reshapeconcatenater*   TENSOR_NAME_QUANT_SUFFIXr   r   rQ   	data_typedimsextendflattencopytobytesraw_datar   r   strrU   rS   ra   r\   r]   bytespack_bytes_to_4bitr   r   tensor_dtype_to_np_dtypenumpy_helper
from_array)r   r   r   r   r   r   weight_dataq_weight_datachannel_countchannel_dimsquantized_channel_data_listr{   channel_datachannel_scalechannel_zero_pointquantized_channel_dataq_weight_nameq_weight_initializercheckpacked_dataquant_np_dtyper   r   r"   quantize_onnx_initializer  sb   


r  c                 C  s   | t jjkr
tdd}|rt| }n|r| tv rt|  }nt| }|s.td|  d|\}}|dks:|dk rQtd| d| d|j	 d	| d
| d|  |S )z
    Return qmin and qmax, the minimum and maximum value representable by the given qType
    :parameter qType: onnx.onnx_pb.TensorProto.UINT8 or onnx.onnx_pb.TensorProto.UINT8
    :return: qmin, qmax
    z;This function is not implemented for float 8 as not needed.Nr~   r   r   r   r   z, dtype=z, reduce_range=z, symmetric=z, qType=)
r   r   rQ   r   ONNX_INT_TYPE_REDUCED_RANGEgetONNX_INT_TYPE_SYMMETRIC_RANGEONNX_INT_TYPE_RANGEr1   ra   )r   r   r   qranger   r   r   r   r"   r   )  s8   

r   c                 C  s   t | ||d\}}|| S )z
    Helper function to get the quantization range for a type.
        parameter qType: quantization type.
        return: quantization range.
    r   )r   )r   r   r   r   r   r   r   r"   get_qrange_for_qTypeI  s   r  r   ranktuple[bool, int]c                 C  s,   | dk r| | n| }|dko||k }||fS )z
    Helper function that tries to return a normalized axis in the range [0, rank - 1].
    :parameter axis: The axis to normalize.
    :parameter rank: The tensor rank (number of dimensions).
    :return (is_valid, axis_norm)
    r   r   )r   r  	axis_normis_validr   r   r"   normalize_axisS  s   r  src_8bitr   	bytearrayc                 C  s   t | }|dkrt S |d d }t|}d}d}||d k r?| |d  d@ d> | | d@ B ||< |d7 }|d7 }||d k s||k rK| | d@ ||< |S )aB  
    Copies a source array of 8-bit values into a destination bytearray of packed 4-bit values.
    Assumes that the source values are already in the appropriate int4 range.
    :parameter src_8bit: The 8-bit element values to pack.
    :return A bytearray with every two 8-bit src elements packed into a single byte.
    r   r&   rA   rd   rC   )rw   r  )r  	num_elemsdst_sizedstsrc_idst_ir   r   r"   r   _  s   $r   c                   @  s    e Zd ZdZg g dfddZdS )QuantizedInitializerzJ
    Represents a linearly quantized weight input from ONNX operators
    Nc
           
      C  :   || _ || _|| _|| _|| _|| _|| _|| _|	| _d S r(   )	r*   initializerrminsrmaxszero_pointsscalesr   r   r   )
r,   r*   r   r!  r"  r#  r$  r   r   r   r   r   r"   __init__  s   
zQuantizedInitializer.__init__r5   r6   r7   __doc__r%  r   r   r   r"   r  }  s    r  c                   @  s"   e Zd ZdZ				dddZdS )QuantizedValuezI
    Represents a linearly quantized value (input\output\intializer)
    Nc
           
      C  r  r(   )	original_nameq_name
scale_namezp_name
value_typer   	node_type
node_qtype
scale_type)
r,   r*   new_quantized_namer+  zero_point_namequantized_value_typer   r.  r/  r0  r   r   r"   r%    s   
zQuantizedValue.__init__)NNNNr&  r   r   r   r"   r(    s    r(  c                   @  s   e Zd ZdZdd ZdS )BiasToQuantizez+
    Represents a bias to be quantized
    c                 C  s   || _ || _|| _d S r(   )	bias_name
input_nameweight_name)r,   r5  r6  r7  r   r   r"   r%    s   
zBiasToQuantize.__init__Nr&  r   r   r   r"   r4    s    r4  c                 C  s   | j dkrtd| j d| j dkr| j}n^| j dkr | j}nU| j dkr)| j}nL| j dkr2| j}nC| j dkr;| j}n:| j d	krD| j}n1| j d
krM| j	}n(| j dkrV| j
}n| j dkr_| j}n| j dkrh| j}ntd| j d| j  d| j|iS )z
    Convert attribute to kwarg format for use with onnx.helper.make_node.
        :parameter attribute: attribute in AttributeProto format.
        :return: attribute in {key: value} format.
    r   z
attribute z does not have type specified.r&   rA   rB   rC   rD   rE   re      	   r   z has unsupported type rG   )ro   r1   r*   r   r{   srF   gfloatsintsstringstensorsgraphs)	attributer   r   r   r"   attribute_to_kwarg  s0   











rB  c                   s*    fdd|D }t |dkr|d S dS )z
    Helper function to find item by name in a list.
        parameter item_name: name of the item.
        parameter item_list: list of items.
        return: item if found. None otherwise.
    c                   s   g | ]	}|j  kr|qS r   r)   )r    item	item_namer   r"   r     s    z find_by_name.<locals>.<listcomp>r   N)rw   )rE  	item_listitemsr   rD  r"   find_by_name  s   rH  c                 C  s*   d}t t|D ]
}|| | kr|}q|S )zC
    Helper function to return index of an item in a node list
    ri   )r   rw   )	elem_name	elem_listelem_idxr{   r   r   r"   get_elem_index  s   rL  c                 C  s   t jd| |g|S )z
    Helper function to create a Mul node.
        parameter inputs: list of input names.
        parameter output: output name.
        parameter name: name of the node.
        return: Mul node in NodeProto format.
    Mul)r   r   r   )inputsoutputr*   r   r   r"   get_mul_node  s   rP  filenamer   
identifierr   c                 C  s   | j | j| | j S )zp
    Helper function to generate a identifiable filepath by concatenating the given identifier as a suffix.
    )parentjoinpathstemsuffix)rQ  rR  r   r   r"   generate_identified_filename	  s   rW  c                 C  s   dd l }dd lm} dd l}|j|jd td t|  td t| |j| |dd |d |	d |
d	 |  d S )
Nr   )	thresholdz
Histogram:zHistogram Edges:T)fillzTensor valueCountszTensor value V.S. Counts)sysmatplotlib.pyplotpyplotrm   set_printoptionsmaxsizeprintstairsxlabelylabeltitleshow)hist
hist_edgesr[  pltrm   r   r   r"   
apply_plot  s   


ri  rG   c                   s<  ddl ddl}ddlddlm  m  m} ddlm  m  m} ddl	m
 mm td|   G  fdddj}j| |d}ttj|dd	}|| W d   n1 sfw   Y  d}|d
}	g }
t|  D ]I}| | }| }t|d| t|d| g}tt|}|	 |}|	 |}|!|	 |"|	| |#|	| |$|	}|
%| q}|&|	t'|
 |
D ]}|	(| q|	) }|*|	 |+|	| |,|	}|	-| |	. }ttj|dd}|| W d   n	1 sw   Y  tj/dddv rF|j0|d}|1 }t2|D ]}|3|}t|4  t|5  q/ttj|dd	C}t|  D ]3}| | }| }t|d| t|d| g}|d tt| }|| |d qWW d   dS 1 sw   Y  dS )z>
    Helper function to write calibration table to files.
    r   N)CalibrationMethod
TensorDataTensorsDatazcalibration cache: c                      s    e Zd Z fddZdS )z*write_calibration_table.<locals>.MyEncoderc                   sb   t |fr| S t |jr| t|jddS t | r*|jjt|dS j	| |S )Nznumpy.array)r   ra   CLS)rm  r   )
r   to_dictrs   tolistr   ra   	__class__r5   JSONEncoderdefault)r,   objrj  rk  rl  jsonnpr   r"   rr  3  s   
z2write_calibration_table.<locals>.MyEncoder.defaultN)r5   r6   r7   rr  r   rt  r   r"   	MyEncoder2  s    rw  )clszcalibration.jsonwi   highestlowestzcalibration.flatbufferswbQUANTIZATION_DEBUG0)r&   1zcalibration.cache 
)6ru  flatbuffersrm   5onnxruntime.quantization.CalTableFlatBuffers.KeyValuequantizationCalTableFlatBuffersKeyValue5onnxruntime.quantization.CalTableFlatBuffers.TrtTableTrtTable"onnxruntime.quantization.calibraterj  rk  rl  logginginforq  dumpsopenospathjoinwriterr   Buildersortedkeysrn  floatr  rC  r   r   CreateStringKeyValueStartKeyValueAddKeyKeyValueAddValueKeyValueEndrq   TrtTableStartDictVectorrw   PrependUOffsetTRelative	EndVectorTrtTableStartTrtTableAddDictTrtTableEndFinishOutputenvironGetRootAsTrtTable
DictLengthr   DictKeyValue)calibration_cachedirr  r  r  rw  	json_datafiler   builderkey_value_listkeyr   d_valuesr<  r   flat_key
flat_value	key_value	main_dict	cal_tablebufdict_lenr{   r   rt  r"   write_calibration_table"  sx   











$r  -C6?c                 C  s   | dk tj}| dk tj}| }| j| }|sdS |t| t| }|dk s8J d| d| d| |  tj}||| | |  7 }|dk dksSJ |S )a~  Given a discrete distribution (may have not been normalized to 1),
    smooth it by replacing zeros with eps multiplied by a scaling factor
    and taking the corresponding amount off the non-zero values.
    Ref: http://web.engr.illinois.edu/~hanj/cs412/bk3/KL-divergence.pdf
         https://github.com//apache/incubator-mxnet/blob/master/python/mxnet/contrib/quantization.py
    r   Nr   zn_zeros=z, n_nonzeros=z, eps1=)r   rm   ru   sumsizer  )pepsis_zerosis_nonzerosn_zeros
n_nonzeroseps1rf  r   r   r"   smooth_distribution~  s   
"r  
model_pathc                 C  s(   t j|  dd}tdd |jjD S )NF)load_external_datac                 s  s    | ]}t |V  qd S r(   )r   uses_external_data)r    
intializerr   r   r"   	<genexpr>  s    z*model_has_external_data.<locals>.<genexpr>)r   loadas_posixr   graphr   )r  modelr   r   r"   model_has_external_data  s   r  opt_model_pathc                 C  sF   t  }| |_tj|_i }dg|d< t|  |fddgi|}dS )z
        Generate model that applies graph optimization (constant folding, etc.)
        parameter model_path: path to the original onnx model
        parameter opt_model_path: path to the optimized onnx model
    :return: optimized onnx model
    ConstantSharingdisabled_optimizers	providersCPUExecutionProviderN)r   r  optimized_model_filepathr   ORT_ENABLE_BASICgraph_optimization_levelr   )r  r  sess_optionkwargs_r   r   r"   optimize_model  s   

 r  r  r   c                 C  s>   ddi}| j r| j D ]}||j|ji q
tj| | dS )z>Tag the model that it went through quantization pre-processingonnx.quant.pre_processonnxruntime.quantNmetadata_propsupdater  r   r   r   set_model_props)r  r  propr   r   r"   add_pre_process_metadata  s
   
r  c                 C  0   | j r| j D ]}|jdkr|jdkr dS qdS )zCCheck the model whether it went through quantization pre-processingr  r  TFr  r  r   )r  r  r   r   r"   model_has_pre_process_metadata  s   
r  c                 C  s>   ddi}| j r| j D ]}||j|ji q
tj| | d S )N
onnx.inferr  r  )r  r  r  r   r   r"   add_infer_metadata  s
   
r  c                 C  r  )Nr  r  TFr  )r  r  r   r   r"   model_has_infer_metadata  s   
r  c                 C  s2   dd | j D }t|dkrtd|d j}|S )Nc                 S  s    g | ]}|j r|j d kr|qS )r   )domain)r    opsetr   r   r"   r     s     z%get_opset_version.<locals>.<listcomp>r&   z$Failed to find proper ai.onnx domainr   )opset_importrw   r1   version)r  ai_onnx_domainopset_versionr   r   r"   get_opset_version  s
   
r  weight_typec                 C  s   t | }|}t|d|}|dk r"|tjjkr"td| d d}n|dkr0td| d n|dk r?td| d d}||krNtj| |} t	| } | S )	NrV      z$The original model opset version is z, which does not support quantization to float 8. Please update the model to opset >= 19. Automatically update the model to opset 19. Please verify the quantized model.r   ze, which does not support node fusions. Please update the model to opset >= 11 for better performance.z, which does not support quantization. Please update the model to opset >= 11. Automatically update the model to opset 11. Please verify the quantized model.   )
r  r   r   r   rQ   r  warningversion_converterconvert_version&save_and_reload_model_with_shape_infer)r  r  r  target_opset_versionweight_quant_typer   r   r"   update_opset_version  s*   


r  c                 C  sB   t | d}tjt| t| t| }t| |  |S )Nz	-inferred)	rW  r   shape_inferenceinfer_shapes_pathr   r  r  r  unlink)r  inferred_model_pathr  r   r   r"   load_model_with_shape_infer  s   
r  c                 C  sd   t jdd"}t| }t|d}tj|| dd t	|W  d    S 1 s+w   Y  d S )Nz
ort.quant.)prefixz
model.onnxT)save_as_external_data)
tempfileTemporaryDirectoryr   deepcopyr   rT  r   
save_modelr  r  )r  quant_tmp_dir
model_copyr  r   r   r"   r    s   
$r  r   r   c                 C  s>   | j tjjtjjfv rtj| S td| j	 dt
| j   )Nz&Only float type is supported. Weights z is )r   r   r   r   r   r   r   to_arrayr1   r*   type_to_name)r   r   r   r"   r     s
   r   tensor_namec                 C     | d S )N_QuantizeLinearr   r  r   r   r"   add_quant_suffix     r  c                 C     | t  S r(   )QUANT_INPUT_SUFFIXr  r   r   r"   add_quant_input_suffix  r  r  c                 C  r	  )N_QuantizeLinear_Outputr   r  r   r   r"   add_quant_output_suffix  r  r  c                 C  r	  )N_DequantizeLinearr   r  r   r   r"   add_dequant_suffix!  r  r  c                 C  r	  )N_DequantizeLinear_Inputr   r  r   r   r"   add_dequant_input_suffix%  r  r  c                 C  r  r(   )DEQUANT_OUTPUT_SUFFIXr  r   r   r"   add_dequant_output_suffix)  r  r  )NN)FN)FNNN)r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   )r   r   )r   r   r   r   r   r   r   r   r   r   r   r   r   r   )FF)r   r   r  r   r   r  )r  r   r   r  )rQ  r   rR  r   r   r   )rG   )r  )r  r   )r  r   r  r   )r  r   )r  r   r   r   )r  r   r   r   )r  r   r  r@   r   r   )r  r   r   r   )r  r   r   r   )r   r   r   r   )r  r   r   r   )r   r   )l
__future__r   r   r  r  r   enumr   pathlibr   rm   r   r   r   r   r   r   onnx.helperr	   r
   r   r   onnx.referencer   onnxruntimer   r   r   r   r   ImportErrorr   r   onnx.reference.op_runr   __producer____version__onnx_domain	ms_domainQUANT_OP_NAMEr  DEQUANT_OP_NAMEr  r   MODEL_SIZE_THRESHOLDr   r  r  r%   r<   r@   rX   rI   ra   rK   rO   rM   rQ   rU   rS   r   rr   r]   r\   r_   r^   r  r  r  r}   r   r   r   r   r   r  r   r  r  r   r  r(  r4  rB  rH  rL  rP  rW  ri  r  r  r  r  r  r  r  r  r  r  r  r  r   r  r  r  r  r  r  r   r   r   r"   <module>   s   &$$$$  $
$$$$$$$  


4?)?B
O
 


%


\



	
	


	
$
	

	



