o
    vi2d                     @   s   d dl Z d dlmZ d dlZd dlZd dlZzd dlmZ W n e	y)   dZY nw ddl
mZ ddlmZ ddlmZmZmZmZmZmZmZmZmZmZmZmZmZ ddlmZ G d	d
 d
ZG dd dZdS )    N)Any)to_array_extended   )
TensorData)	ONNXModel)DEQUANT_OP_NAMEONNX_TYPE_TO_NP_TYPEQUANT_OP_NAMETENSOR_NAME_QUANT_SUFFIXfind_by_nameget_opset_versionmodel_has_infer_metadatanormalize_axispack_bytes_to_4bitquantize_dataquantize_nparray&save_and_reload_model_with_shape_infertensor_proto_to_array)TensorQuantOverridesHelperc                   @   sL   e Zd Zdeeef fddZdddZdd Zd	d
 Z	dd Z
dd ZdS )QuantizationParamsdatac                 K   s   i | _ | D ]e\}}t|tstdt| d|d|dkr8t|tttjt	fs8tdt| d|d|dkrOt|tsO|d urOtdt| d|dkrg|j
tjtjfvrgtd|j
 d||| j |< qd S )	NzKeys must be strings not z for k=.axisz1Values must be numpy arrays, int, float, str not z'Axis value must be an int or None, not scalez5scale must a float32 or float16 numpy element but is )r   items
isinstancestr	TypeErrortypeintnpndarrayfloatdtypefloat32float16
ValueError)selfr   kv r*   f/home/air/biblejyuku/back/venv/lib/python3.10/site-packages/onnxruntime/quantization/base_quantizer.py__init__(   s   
zQuantizationParams.__init__Nc                 C   s   | j ||S N)r   get)r'   keydefault_valuer*   r*   r+   r.   5      zQuantizationParams.getc                 c   s    | j E d H  d S r-   r   r'   r*   r*   r+   __iter__8   s   zQuantizationParams.__iter__c                 C   s
   | j | S r-   r2   )r'   r/   r*   r*   r+   __getitem__;      
zQuantizationParams.__getitem__c                 C   s   || j |< d S r-   r2   )r'   r/   valuer*   r*   r+   __setitem__>   r1   zQuantizationParams.__setitem__c                 C   s
   t | jS r-   )lenr   r3   r*   r*   r+   __len__A   r6   zQuantizationParams.__len__r-   )__name__
__module____qualname__dictr   r   r,   r.   r4   r5   r8   r:   r*   r*   r*   r+   r   '   s    
r   c                   @   s   e Zd Z	dddZdejjdefddZdd	 Z	d
d Z
dd Zdd Zdd ZdddZdddZ		d ddZdd ZdS )!BaseQuantizerNc                 C   s  t |st|}dd |jjD | _| jdd |jjD  | jdd |jjD  t|| _	t
|| _|| _|| _|
r@|
ni | _d| jv oL| jd | _d | _d| jv oZ| jd | _| jdd | _| jdd	| _| jd
| _t|d|| _t|d|| _	 |d urtdd | D rtddd | D  d|| _|| _|| _|	| _t | jdi | _!dd | j	" D | _#| j!$| j#| j% |\}}|st&|| j!' | _(d S )Nc                 S      i | ]}|j |qS r*   name).0vir*   r*   r+   
<dictcomp>U       z*BaseQuantizer.__init__.<locals>.<dictcomp>c                 S   r@   r*   rA   )rC   otr*   r*   r+   rE   V   rF   c                 S   r@   r*   rA   )rC   itr*   r*   r+   rE   W   rF   EnableSubgraphForceQuantizeNoInputCheckWeightSymmetricActivationSymmetricFMinimumRealRangetensor_typec                 s   s    | ]	}t |t V  qd S r-   )r   r   )rC   tr*   r*   r+   	<genexpr>{   s    z)BaseQuantizer.__init__.<locals>.<genexpr>z(tensors_range contains unexpected types c                 S   s   h | ]}t |qS r*   )r   )rC   r)   r*   r*   r+   	<setcomp>}   rF   z)BaseQuantizer.__init__.<locals>.<setcomp>z, not TensorData.TensorQuantOverridesc                 S   r@   r*   rA   )rC   initzerr*   r*   r+   rE      rF   ))r   r   graph
value_infovalue_infosupdateoutputinputr   modelr   opset_versionper_channelreduce_rangeextra_optionsenable_subgraph_quantizationparentforce_quantize_no_input_checkr.   _is_weight_symmetricis_activation_symmetricmin_real_rangegetattractivation_qTypeweight_qTypeanyvaluesr   tensors_rangenodes_to_quantizenodes_to_excludeop_types_to_quantizer   tensor_quant_overridesinitializerinitializersis_validkeysr&   get_quant_typestensor_quant_override_qtypes)r'   rZ   r\   r]   rg   rf   rj   rk   rl   rm   r^   overrides_validoverrides_errr*   r*   r+   r,   F   sJ   

zBaseQuantizer.__init__weight_quant_typereturnc                 C   s0   | j d ur| j S |tjjtjjtjjtjjfv S r-   )rb   onnxTensorProtoINT4INT8INT16FLOAT8E4M3FN)r'   rw   r*   r*   r+   is_weight_symmetric   s   
z!BaseQuantizer.is_weight_symmetricc                 C   s   t r-   )NotImplementedErrorr3   r*   r*   r+   quantize_model   s   zBaseQuantizer.quantize_modelc                 C   s   t || j }|d uS r-   )r   rZ   ro   )r'   
input_namero   r*   r*   r+   is_input_a_initializer   s   z$BaseQuantizer.is_input_a_initializerc                 C   s   | j S r-   )r\   r3   r*   r*   r+   is_per_channel   s   zBaseQuantizer.is_per_channelc                 C   sN   t || j }|d ur|jtjjtjjfv S | jr| j	d u r!dS | j	
|S )NF)r   rZ   ro   	data_typery   rz   FLOATFLOAT16r_   r`   is_valid_quantize_weight)r'   weight_nameweightr*   r*   r+   r      s   z&BaseQuantizer.is_valid_quantize_weightc                 C   sh   | j d urt| j dkr|j| j vrdS |j| jvrdS |jttfv r%dS | jd ur2|j| jv r2dS dS )Nr   FT)rk   r9   rB   op_typerm   r   r	   rl   )r'   noder*   r*   r+   should_quantize_node   s   
z"BaseQuantizer.should_quantize_node      ?c                 C   s  t || j }t|}|t }| jtjjkr`t	
|}|jt	jkr'tjj}	n|jt	jkr2tjj}	n	td|j d|t	j}
t	jdg|
jd}|d}tj|
|}| j|g d}nz|| | }t	j
|t	jdt	j
|t	jd }
|
 }
t	t	t	jj}t	t	t	jj}t	|
|k st	|
|krtd| d t	 |
||t	j}
t	j
|
t	jd|j!}tj||}| j|g t	j
||jdd}d	}| j}	|d
 }tj||}| j|g | jtjjkr| j}ntjj"}|d }| jtjjkrtj#$|| jdgdg}n$|j%dkr.t	j&|j't	jdd}tj||}n
tj#$||g dg}| j|g ||||||	fS )z]
        Quantized the bias. Zero Point == 0 and Scale == Input_Scale * Weight_Scale
        zEOnly float16 or float32 are supported with float 8 but bias dtype is r   r   r#   CastzQuantized bias `z<` exceeds the range of a int32. The bias scale is too small.DequantizeLinear_scale_zero_point        r   )(r   rZ   ro   r   r
   rg   ry   rz   r~   r    asarrayr#   r%   r   r$   r   r   astypearrayreshapenumpy_helper
from_arrayinitializer_extendfloat64roundiinfoint32minmaxrh   loggingwarningclipdimsINT32helpermake_tensorsizezerosshape)r'   	bias_nameinput_scaleweight_scalebetabias_initializer	bias_dataquantized_bias_namer   
node_qtypequantized_data
bias_scalebias_scale_datapacked_bias_initializer	node_type	int32_min	int32_maxbias_np_dataquantized_bias_scale_namepacked_bias_scale_initializerrN   quantized_bias_zp_namepacked_bias_zp_initializerbias_zp_datar*   r*   r+   quantize_bias_static_impl   sj   



 
z'BaseQuantizer.quantize_bias_static_implFc                 C   s~  |j t }|j d }|j d }t|}| jj|j i d}	d|	v r%|	d j}d|	v r{d|	v r{tj|	d t| d}
t|	d }t	||
 ||
}t|
tjsWJ dt|
 |
jtjkrc|
jtjkskJ d	|
j t|tjszJ dt| na|| jkr| |n| j}t|
 ||	d
||	d| jo|| j|	d|	dd\}
}}t|
tjsJ dt|
 |
jtjkr|
jtjksJ d	|
j t|tjsJ dt| |j}tj||g |d }tj||g |
d }| j||g |s| jtj j!krkt  }| j|_|j"#|j" ||_ |
 $ % |_&t'durjt'|}|j(|j(ksB|% |% krjt)d|j( d|% dd  d|% dd  d|j( dt*|dd  dnH|tj j+tj j,fv r|jtj-tj.fvrt)d| dt/t0|% }tjj|||j"|dd}ntj1|tj2|d|j"}tj34||}| j|g |||fS )a  
        :param weight: TensorProto initializer
        :param qType: type to quantize to
        :param keep_float_weight: Whether to quantize the weight. In some cases, we only want to qunatize scale and zero point.
                                  If keep_float_weight is False, quantize the weight, or don't quantize the weight.
        :return: quantized weight name, zero point name, scale name
        r   r   default_val
quant_typer   
zero_pointr   Unexpected type Unexpected dtype 	symmetricr]   rminrmaxr]   rd   rmin_overridermax_override)r   NzThe initializer of shape z! could not be created, expecting 
   z, got z and shape=z
raw=   r   Quantized weights for . must be 8-bit before packing as 4-bit values.Traw)5rB   r
   r   rn   get_per_tensor_overridesrN   r    r   r   r   flattenr   r!   r   r#   r$   r%   rg   r   rc   r   r.   r]   rd   r   ry   r   r   r   tolistrZ   r   rz   r~   r   extendcopytobytesraw_datar   r   RuntimeErrorr   r{   UINT4int8uint8bytesr   r   tensor_dtype_to_np_dtyper   r   )r'   r   qTyper]   keep_float_weightq_weight_namezp_name
scale_nameweight_dataquant_overridesr   r   q_weight_datar   scale_dtypescale_initializerzero_initializerq_weight_initializercheckpacked_datar*   r*   r+   quantize_initializer_impl  s   
	



 



 


z'BaseQuantizer.quantize_initializer_implTc           &      C   s  t || j }|d u rtd|t|}t|j}t||\}	}
|	s0td| d| d| |
}|j| }| jj	|d|igd}t|}|dkrY||krYtd| d	| d
t|d d |\}}|rj||kr|td| d| d|d d  dd|d v r|d d j
}|d d| |}|d d| jo|}g }g }g }t|j}t|}d||< t|D ]}|||}||k r|nd}|| }d|v r5d|v r5tj|d t| d}t|d }t|| ||}t|tjsJ dt| |jtjkr|jtjksJ d|j t|tjs$J dt| t|tjs4J dt| n]t| |||| j|d|dd\}}}t|tjs\J dt| |jtjkrj|jtjksrJ d|j t|tjsJ dt| t|tjsJ dt| || || |t| | qt!||}|t" }|d }|d } |j#| g}!t$j%&| |j'|!t(|) }"t$j%&|||!t(|) }#| j*|"|#g |s=|t$j+j,t$j+j-fv r |jtj.tj/fvrt0d| dt1t2|3 }$t$j%j&||||$dd }%| j*|%g ntj|t$j%4|d |j#}t$j56||}%| j*|%g ||| fS )!Nz{} is not an initializerzWeight z# has a per-channel axis with value z  that is out-of-bounds for rank r   r   r   z.Per-channel tensor quantization overrides for z must have either 1 or z& elements in the list of dictionaries.r   z"Tensor quantization overrides for z& specify an unexpected axis. Expected z
, but got r   r   r   r]   r   r   r   r   r   r   r   r   r   r   r   r   Tr   )7r   rZ   ro   r&   r   r9   r   r   rn   get_per_channel_overridesrN   r.   r   r]   listrangetaker    r   r   r   r   r   r!   r   r#   r$   r%   r   rd   appendr   r   concatenater
   r   ry   r   r   r   hstackr   r   rz   r{   r   r   r   r   r   r   r   r   r   r   )&r'   r   rg   channel_axisr]   r   ro   weightsweights_rankis_axis_valid	axis_normchannel_countquant_overrides_for_channelsnum_channel_overridesis_axis_override_validaxis_overrider   zero_point_list
scale_listquantized_per_channel_data_listweights_shapereshape_dimsiper_channel_datachannel_override_indexchannel_quant_overridesr   r   quantized_per_channel_dataquantized_weightsr   r   r   zero_scale_shaper   r   r   r   r*   r*   r+    quantize_weight_per_channel_impln  s   	






  

 





z.BaseQuantizer.quantize_weight_per_channel_implc                 C   s   | j d u rd S | j D ]p}|jdv r_| |sqt| j |jd  dkr(q|jd | j vs8|jd | j vr9q| j |jd  }t	|t
sVtdt| d|jd d|| j |jd < q|jdkr|| |sjqt
tdtd	d
| j |jd < qd S )N)ClipRelur   r   r   z for r   Softmaxr   r   )lowesthighest)rj   rZ   nodesr   r   r9   input_name_to_nodesrY   rX   r   r   r   r   r    r$   )r'   r   tdr*   r*   r+   adjust_tensor_ranges  s(   


 
 

$z"BaseQuantizer.adjust_tensor_rangesr-   )r   )FF)TF)r;   r<   r=   r,   ry   rz   DataTypeboolr   r   r   r   r   r   r   r   r  r  r*   r*   r*   r+   r?   E   s     
J


T`
 r?   ) r   typingr   numpyr    ry   onnx.numpy_helperonnx.reference.op_runr   ImportError	calibrater   
onnx_modelr   quant_utilsr   r   r	   r
   r   r   r   r   r   r   r   r   r   rn   r   r   r?   r*   r*   r*   r+   <module>   s    <