o
    %Thj                     @   s   d dl Z d dlmZ d dlZd dlZd dlZzd dlmZ W n e	y)   dZY nw ddl
mZ ddlmZ ddlmZmZmZmZmZmZmZmZmZmZmZmZ ddlmZ G d	d
 d
ZG dd dZdS )    N)Any)to_array_extended   )
TensorData)	ONNXModel)DEQUANT_OP_NAMEONNX_TYPE_TO_NP_TYPEQUANT_OP_NAMETENSOR_NAME_QUANT_SUFFIXfind_by_namemodel_has_infer_metadatanormalize_axispack_bytes_to_4bitquantize_dataquantize_nparray&save_and_reload_model_with_shape_infertensor_proto_to_array)TensorQuantOverridesHelperc                   @   sL   e Zd Zdeeef fddZdddZdd Zd	d
 Z	dd Z
dd ZdS )QuantizationParamsdatac                 K   s   i | _ | D ]d\}}t|tstdt| d|d|dkr7t|tttjfs7tdt| d|d|dkrNt|tsN|d urNtdt| d|dkrf|j	tj
tjfvrftd|j	 d||| j |< qd S )	NzKeys must be strings not z for k=.axisz1Values must be numpy arrays, int, float, str not z'Axis value must be an int or None, not scalez5scale must a float32 or float16 numpy element but is )r   items
isinstancestr	TypeErrortypeintnpndarraydtypefloat32float16
ValueError)selfr   kv r(   h/home/air/segue/gemini/back/venv/lib/python3.10/site-packages/onnxruntime/quantization/base_quantizer.py__init__'   s   
zQuantizationParams.__init__Nc                 C   s   | j ||S N)r   get)r%   keydefault_valuer(   r(   r)   r,   4      zQuantizationParams.getc                 c   s    | j E d H  d S r+   r   r%   r(   r(   r)   __iter__7   s   zQuantizationParams.__iter__c                 C   s
   | j | S r+   r0   )r%   r-   r(   r(   r)   __getitem__:      
zQuantizationParams.__getitem__c                 C   s   || j |< d S r+   r0   )r%   r-   valuer(   r(   r)   __setitem__=   r/   zQuantizationParams.__setitem__c                 C   s
   t | jS r+   )lenr   r1   r(   r(   r)   __len__@   r4   zQuantizationParams.__len__r+   )__name__
__module____qualname__dictr   r   r*   r,   r2   r3   r6   r8   r(   r(   r(   r)   r   &   s    
r   c                   @   s   e Zd Z	dddZdejjdefddZdd	 Z	d
d Z
dd Zdd Zdd Zdd Zd ddZd!ddZ		d"ddZdd ZdS )#BaseQuantizerNc                 C   s  t |st|}dd |jjD | _| jdd |jjD  | jdd |jjD  t|| _	|| _
|| _|
r;|
ni | _d| jv oG| jd | _d | _d| jv oU| jd | _| jdd | _| jdd	| _| jd
| _t|d|| _t|d|| _	 |d urtdd | D rtddd | D  d|| _|| _|| _|	| _|  | _t | jdi | _!dd | j	" D | _#| j!$| j#| j% |\}}|st&|| j!' | _(d S )Nc                 S      i | ]}|j |qS r(   name).0vir(   r(   r)   
<dictcomp>T       z*BaseQuantizer.__init__.<locals>.<dictcomp>c                 S   r>   r(   r?   )rA   otr(   r(   r)   rC   U   rD   c                 S   r>   r(   r?   )rA   itr(   r(   r)   rC   V   rD   EnableSubgraphForceQuantizeNoInputCheckWeightSymmetricActivationSymmetricFMinimumRealRangetensor_typec                 s   s    | ]	}t |t V  qd S r+   )r   r   )rA   tr(   r(   r)   	<genexpr>y   s    z)BaseQuantizer.__init__.<locals>.<genexpr>z(tensors_range contains unexpected types c                 S   s   h | ]}t |qS r(   )r   )rA   r'   r(   r(   r)   	<setcomp>{   rD   z)BaseQuantizer.__init__.<locals>.<setcomp>z, not TensorData.TensorQuantOverridesc                 S   r>   r(   r?   )rA   initzerr(   r(   r)   rC      rD   ))r   r   graph
value_infovalue_infosupdateoutputinputr   modelper_channelreduce_rangeextra_optionsenable_subgraph_quantizationparentforce_quantize_no_input_checkr,   _is_weight_symmetricis_activation_symmetricmin_real_rangegetattractivation_qTypeweight_qTypeanyvaluesr   tensors_rangenodes_to_quantizenodes_to_excludeop_types_to_quantizecheck_opset_versionopset_versionr   tensor_quant_overridesinitializerinitializersis_validkeysr$   get_quant_typestensor_quant_override_qtypes)r%   rX   rY   rZ   rd   rc   rg   rh   ri   rj   r[   overrides_validoverrides_errr(   r(   r)   r*   E   sJ   

zBaseQuantizer.__init__weight_quant_typereturnc                 C   s0   | j d ur| j S |tjjtjjtjjtjjfv S r+   )r_   onnxTensorProtoINT4INT8INT16FLOAT8E4M3FN)r%   rv   r(   r(   r)   is_weight_symmetric   s   
z!BaseQuantizer.is_weight_symmetricc                 C   s   t r+   )NotImplementedErrorr1   r(   r(   r)   quantize_model   s   zBaseQuantizer.quantize_modelc                 C   s   t || j }|d uS r+   )r   rX   rn   )r%   
input_namern   r(   r(   r)   is_input_a_initializer   s   z$BaseQuantizer.is_input_a_initializerc                 C   s   | j S r+   )rY   r1   r(   r(   r)   is_per_channel   s   zBaseQuantizer.is_per_channelc                 C   sN   t || j }|d ur|jtjjtjjfv S | jr| j	d u r!dS | j	
|S )NF)r   rX   rn   	data_typerx   ry   FLOATFLOAT16r\   r]   is_valid_quantize_weight)r%   weight_nameweightr(   r(   r)   r      s   z&BaseQuantizer.is_valid_quantize_weightc                 C   sh   | j d urt| j dkr|j| j vrdS |j| jvrdS |jttfv r%dS | jd ur2|j| jv r2dS dS )Nr   FT)rh   r7   r@   op_typerj   r   r	   ri   )r%   noder(   r(   r)   should_quantize_node   s   
z"BaseQuantizer.should_quantize_nodec                 C   s  dd | j j jD }t|dkrtd|d j}|dkr(td| d dS |dk rOtd| d	 | j j j|d  | j j jt	j
d
dg d}|dk r| jt	jjkrtd| d | j j j|d  | j j jt	j
d
dg d| j j _d}|S )Nc                 S   s    g | ]}|j r|j d kr|qS )zai.onnx)domain)rA   opsetr(   r(   r)   
<listcomp>   s    z5BaseQuantizer.check_opset_version.<locals>.<listcomp>r   z$Failed to find proper ai.onnx domainr   
   z$The original model opset version is ze, which does not support node fusions. Please update the model to opset >= 11 for better performance.z, which does not support quantization. Please update the model to opset >= 11. Updating the model automatically to opset 11. Please verify the quantized model.       z, which does not support quantization to float 8. Please update the model to opset >= 19. Updating the model automatically to opset 19. Please verify the quantized model.	   )rX   opset_importr7   r$   versionloggingwarningremoveextendrx   helpermake_opsetidrd   ry   r}   
ir_version)r%   ai_onnx_domainrl   r(   r(   r)   rk      s6   




z!BaseQuantizer.check_opset_version      ?c                 C   s  t || j }t|}|t }| jtjjkr`t	
|}|jt	jkr'tjj}	n|jt	jkr2tjj}	n	td|j d|t	j}
t	jdg|
jd}|d}tj|
|}| j|g d}nz|| | }t	j
|t	jdt	j
|t	jd }
|
 }
t	t	t	jj}t	t	t	jj}t	|
|k st	|
|krtd| d t	 |
||t	j}
t	j
|
t	jd|j!}tj||}| j|g t	j
||jdd}d	}| j}	|d
 }tj||}| j|g | jtjjkr| j}ntjj"}|d }| jtjjkrtj#$|| jdgdg}n$|j%dkr.t	j&|j't	jdd}tj||}n
tj#$||g dg}| j|g ||||||	fS )z]
        Quantized the bias. Zero Point == 0 and Scale == Input_Scale * Weight_Scale
        zEOnly float16 or float32 are supported with float 8 but bias dtype is r   r   r!   CastzQuantized bias `z<` exceeds the range of a int32. The bias scale is too small.DequantizeLinear_scale_zero_point        r   )(r   rX   rn   r   r
   rd   rx   ry   r}   r   asarrayr!   r#   r   r"   r   r   astypearrayreshapenumpy_helper
from_arrayinitializer_extendfloat64roundiinfoint32minmaxre   r   r   clipdimsINT32r   make_tensorsizezerosshape)r%   	bias_nameinput_scaleweight_scalebetabias_initializer	bias_dataquantized_bias_namer   
node_qtypequantized_data
bias_scalebias_scale_datapacked_bias_initializer	node_type	int32_min	int32_maxbias_np_dataquantized_bias_scale_namepacked_bias_scale_initializerrL   quantized_bias_zp_namepacked_bias_zp_initializerbias_zp_datar(   r(   r)   quantize_bias_static_impl   sj   



 
z'BaseQuantizer.quantize_bias_static_implFc                 C   s~  |j t }|j d }|j d }t|}| jj|j i d}	d|	v r%|	d j}d|	v r{d|	v r{tj|	d t| d}
t|	d }t	||
 ||
}t|
tjsWJ dt|
 |
jtjkrc|
jtjkskJ d	|
j t|tjszJ dt| na|| jkr| |n| j}t|
 ||	d
||	d| jo|| j|	d|	dd\}
}}t|
tjsJ dt|
 |
jtjkr|
jtjksJ d	|
j t|tjsJ dt| |j}tj||g |d }tj||g |
d }| j||g |s| jtj j!krkt  }| j|_|j"#|j" ||_ |
 $ % |_&t'durjt'|}|j(|j(ksB|% |% krjt)d|j( d|% dd  d|% dd  d|j( dt*|dd  dnH|tj j+tj j,fv r|jtj-tj.fvrt)d| dt/t0|% }tjj|||j"|dd}ntj1|tj2|d|j"}tj34||}| j|g |||fS )a  
        :param weight: TensorProto initializer
        :param qType: type to quantize to
        :param keep_float_weight: Whether to quantize the weight. In some cases, we only want to qunatize scale and zero point.
                                  If keep_float_weight is False, quantize the weight, or don't quantize the weight.
        :return: quantized weight name, zero point name, scale name
        r   r   default_val
quant_typer   
zero_pointr   Unexpected type Unexpected dtype 	symmetricrZ   rminrmaxrZ   ra   rmin_overridermax_override)r   NzThe initializer of shape z! could not be created, expecting r   z, got z and shape=z
raw=   r   Quantized weights for . must be 8-bit before packing as 4-bit values.Traw)5r@   r
   r   rm   get_per_tensor_overridesrL   r   r   r   r   flattenr   r    r   r!   r"   r#   rd   r~   r`   r   r,   rZ   ra   r   rx   r   r   r   tolistrX   r   ry   r}   r   r   copytobytesraw_datar   r   RuntimeErrorr   rz   UINT4int8uint8bytesr   r   tensor_dtype_to_np_dtyper   r   )r%   r   qTyperZ   keep_float_weightq_weight_namezp_name
scale_nameweight_dataquant_overridesr   r   q_weight_datar   scale_dtypescale_initializerzero_initializerq_weight_initializercheckpacked_datar(   r(   r)   quantize_initializer_impl6  s   
	



 



 


z'BaseQuantizer.quantize_initializer_implTc           &      C   s  t || j }|d u rtd|t|}t|j}t||\}	}
|	s0td| d| d| |
}|j| }| jj	|d|igd}t|}|dkrY||krYtd| d	| d
t|d d |\}}|rj||kr|td| d| d|d d  dd|d v r|d d j
}|d d| |}|d d| jo|}g }g }g }t|j}t|}d||< t|D ]}|||}||k r|nd}|| }d|v r5d|v r5tj|d t| d}t|d }t|| ||}t|tjsJ dt| |jtjkr|jtjksJ d|j t|tjs$J dt| t|tjs4J dt| n]t| |||| j|d|dd\}}}t|tjs\J dt| |jtjkrj|jtjksrJ d|j t|tjsJ dt| t|tjsJ dt| || || |t| | qt!||}|t" }|d }|d } |j#| g}!t$j%&| |j'|!t(|) }"t$j%&|||!t(|) }#| j*|"|#g |s=|t$j+j,t$j+j-fv r |jtj.tj/fvrt0d| dt1t2|3 }$t$j%j&||||$dd }%| j*|%g ntj|t$j%4|d |j#}t$j56||}%| j*|%g ||| fS )!Nz{} is not an initializerzWeight z# has a per-channel axis with value z  that is out-of-bounds for rank r   r   r   z.Per-channel tensor quantization overrides for z must have either 1 or z& elements in the list of dictionaries.r   z"Tensor quantization overrides for z& specify an unexpected axis. Expected z
, but got r   r   r   rZ   r   r   r   r   r   r   r   r   r   r   r   r   Tr   )7r   rX   rn   r$   r   r7   r   r   rm   get_per_channel_overridesrL   r,   r~   rZ   listrangetaker   r   r   r   r   r   r    r   r!   r"   r#   r   ra   appendr   r   concatenater
   r   rx   r   r   r   hstackr   r   ry   rz   r   r   r   r   r   r   r   r   r   r   )&r%   r   rd   channel_axisrZ   r   rn   weightsweights_rankis_axis_valid	axis_normchannel_countquant_overrides_for_channelsnum_channel_overridesis_axis_override_validaxis_overrider   zero_point_list
scale_listquantized_per_channel_data_listweights_shapereshape_dimsiper_channel_datachannel_override_indexchannel_quant_overridesr   r   quantized_per_channel_dataquantized_weightsr   r   r   zero_scale_shaper   r   r   r   r(   r(   r)    quantize_weight_per_channel_impl  s   	






  

 





z.BaseQuantizer.quantize_weight_per_channel_implc                 C   s   | j d u rd S | j D ]p}|jdv r_| |sqt| j |jd  dkr(q|jd | j vs8|jd | j vr9q| j |jd  }t	|t
sVtdt| d|jd d|| j |jd < q|jdkr|| |sjqt
tdtd	d
| j |jd < qd S )N)ClipRelur   r   r   z for r   Softmaxr   r   )lowesthighest)rg   rX   nodesr   r   r7   input_name_to_nodesrW   rV   r   r   r   r   r   r"   )r%   r   tdr(   r(   r)   adjust_tensor_ranges  s(   


 
 

$z"BaseQuantizer.adjust_tensor_rangesr+   )r   )FF)TF)r9   r:   r;   r*   rx   ry   DataTypeboolr~   r   r   r   r   r   rk   r   r   r  r#  r(   r(   r(   r)   r=   D   s"    
K

#
T`
 r=   )r   typingr   numpyr   rx   onnx.numpy_helperonnx.reference.op_runr   ImportError	calibrater   
onnx_modelr   quant_utilsr   r   r	   r
   r   r   r   r   r   r   r   r   rm   r   r   r=   r(   r(   r(   r)   <module>   s    8