o
    3IhL                      @   s   d dl Z d dlZd dlZd dlmZ ddlmZmZmZm	Z	m
Z
 ddlmZ ddlmZ G dd	 d	eZ	 G d
d deZ	 G dd deZG dd deZdS )    N)onnx_pb   )TENSOR_NAME_QUANT_SUFFIXQuantizedValueQuantizedValueTypefind_by_nameget_mul_node   )QuantOperatorBase)QDQOperatorBasec                       $   e Zd Z fddZdd Z  ZS )	QOpMatMulc                       t  || d S Nsuper__init__selfonnx_quantizer	onnx_node	__class__ i/home/air/sanwanet/gpt-api/venv/lib/python3.10/site-packages/onnxruntime/quantization/operators/matmul.pyr         zQOpMatMul.__init__c                 C   s   | j | jstd| jj d dS | j | jjd s5| j | jjd s5td| jj d dS | j j	rU| j 
| jjd sUtd| j j d| jj d dS d	S )
NzIgnore MatMul ]Fr	   r   z&Ignore MatMul due to non float inputs z%Ignore MatMul due to non constant B: [T)	quantizershould_quantize_nodenodeloggingdebugnameis_float_tensorinputinfoq_matmul_const_b_onlyfind_initializer_in_pathgraph_scope)r   r   r   r   should_quantize   s    zQOpMatMul.should_quantize)__name__
__module____qualname__r   r*   __classcell__r   r   r   r   r          r   c                       r   )MatMulIntegerc                    r   r   r   r   r   r   r   r   )   r   zMatMulInteger.__init__c                 C   s  | j }|jdks
J | j|dg\}}}}| jj|dgddd\}}}}	|| || || ||	 |jd d }
|jrH|jd nd}tj	
d	|| |
g|}|| |
d
 }| jj|jd dd}tj	j
d|
g|g|
d |d}|| t|dksJ |r|d n|d d |d  d }t|| jj}|d u rt||d |}|| |jd }d}|r|d }|t||g|jd | | j j|7  _d S )NMatMulr   r	   Treduce_rangeop_level_per_channel_output_quantized_quant r0   _cast_output)	mandatoryCast_cast)tor   _scales_mul__mulz:0_output_scale_mul)r    op_typer   quantize_activationquantize_weightextendoutputr#   onnxhelper	make_nodeappendget_tensor_typelenr   	new_nodesr   )r   r    quantized_input_nameszero_point_namesscale_namesnodesquantized_input_names_weightzero_point_names_weightscale_names_weightnodes_weightmatmul_integer_outputmatmul_integer_namematmul_integer_nodecast_op_outputotype	cast_nodescales_mul_opscales_mul_nodescales_mul_op_outputoutput_scale_mul_opr   r   r   quantize,   st   








zMatMulInteger.quantizer+   r,   r-   r   r_   r.   r   r   r   r   r0   (   r/   r0   c                       s(   e Zd Z fddZ fddZ  ZS )QLinearMatMulc                    r   r   r   r   r   r   r   r   |   r   zQLinearMatMul.__init__c                    s  | j }|jdks
J | j|dg\}}}}| jj|dgddd\}}}}	|| || || ||	 | j|jd \}
}}}}|
rM|d u rRt 	 S |jd t
 }|jra|jd nd}g }||d  ||d  ||d  ||d  ||d  ||d  || || | jjtjjtjjtjjtjjhv rdnd}tjjd	||g||d
}|| t|jd |||tj}|| jj|jd < | j j|7  _d S )Nr1   r   r	   Tr2   r6   r7   zcom.microsoftra   )domain)r    rA   r   rB   rC   rD   _get_quantization_paramsrE   r   r_   r   r#   rI   weight_qType
onnx_protoTensorProtoFLOAT8E4M3FNFLOAT8E4M3FNUZ
FLOAT8E5M2FLOAT8E5M2FNUZrF   rG   rH   r   r   Inputquantized_value_maprL   )r   r    rM   rN   rO   rP   rQ   rR   rS   rT   
data_foundoutput_scale_nameoutput_zp_namer>   qlinear_matmul_outputqlinear_matmul_nameqlinear_matmul_inputsrb   qlinear_matmul_nodeq_outputr   r   r   r_      s   







zQLinearMatMul.quantizer`   r   r   r   r   ra   {   s    ra   c                       r   )	QDQMatMulc                    r   r   r   r   r   r   r   r      r   zQDQMatMul.__init__c                 C   s   | j }|jdks
J | jr|j}nt|j|j}|D ].}t|| jj	
 rC| jj|d|jd\}}|r<| j|| q| j| q| j| qd S )Nr1   r	   )default_axisrA   )r    rA   disable_qdq_for_node_outputr%   	itertoolschainrE   r   r   modelinitializeris_tensor_per_channel"quantize_weight_tensor_per_channelquantize_weight_tensorquantize_activation_tensor)r   r    nodes_to_iteratetensor_nameis_per_channelchannel_axisr   r   r   r_      s   
zQDQMatMul.quantizer`   r   r   r   r   ru      r/   ru   )rx   r!   rF   r   re   quant_utilsr   r   r   r   r   base_operatorr
   qdq_base_operatorr   r   r0   ra   ru   r   r   r   r   <module>   s    NU