o
    3Ih                     @   s   d dl Z d dlZd dlZddlmZmZmZmZm	Z	m
Z
mZ ddlmZ ddlmZ ddlmZ dd	 Zd
d Zdd ZG dd deZG dd deZdS )    N   )TENSOR_NAME_QUANT_SUFFIXQuantizedValueQuantizedValueTypeattribute_to_kwargfind_by_nameget_mul_node	ms_domain   )QuantOperatorBase)	QOpMatMul)QDQOperatorBasec                 C   s0   dd | j D }t|rtj|d dkS dS )Nc                 S      g | ]	}|j d kr|qS )transBname.0attr r   g/home/air/sanwanet/gpt-api/venv/lib/python3.10/site-packages/onnxruntime/quantization/operators/gemm.py
<listcomp>       z#is_B_transposed.<locals>.<listcomp>r   F	attributelenonnxhelperget_attribute_value)	gemm_nodetransB_attributer   r   r   is_B_transposed   s   r!   c                 C   s,   dd | j D }t|rtj|d S dS )Nc                 S   r   betar   r   r   r   r   r      r   zget_beta.<locals>.<listcomp>r         ?r   r   beta_attributer   r   r   get_beta   s   r'   c                 C   s&   dd | j D }t|rd|d _dS )Nc                 S   r   r"   r   r   r   r   r   r   %   r   z$set_default_beta.<locals>.<listcomp>r$   r   )r   r   fr%   r   r   r   set_default_beta$   s   
r)   c                       s(   e Zd Z fddZ fddZ  ZS )QLinearGemmc                       t  || d S Nsuper__init__selfonnx_quantizer	onnx_node	__class__r   r   r/   -      zQLinearGemm.__init__c              	      s  | j }|jdks
J | j|jd \}}}}}| j|jd r[| j r[| j|dg\}}}}	| j	|jd | jj
t|rBdnd}
||
d  ||
d  ||
d  n0| j|dg\}}}}	| jj|dg| jjd\}}}}|| || || |	| |r|d u rt  S d}t|jdkr| j|jd st  S | j|jd |jd |jd t| j }|jd t }|jr|jd nd}i }|jD ]}|jd	kr|t| qt|d
< g }tdD ]}||| || || g q||||g tjjd||g|fi |}|	| t|jd |||t j!|j| jj
d}|| jj"|jd < | j j#|	7  _#d S )NGemmr   r
   r   )reduce_range    _quantr#   domainQGemm)	node_type
node_qtype)$nodeop_type	quantizer_get_quantization_paramsoutputis_input_a_initializerinputis_per_channelquantize_activationquantize_weight_per_channelweight_qTyper!   appendquantize_weightr8   extendr.   quantizer   quantize_bias_staticr'   r   r   r   updater   r	   ranger   r   	make_noder   r   Inputquantized_value_map	new_nodes)r1   r@   
data_foundoutput_scale_nameoutput_zp_name_quantized_input_nameszero_point_namesscale_namesnodesquant_weight_tuplequantized_input_names_weightzero_point_names_weightscale_names_weightnodes_weightquantized_bias_nameqgemm_output
qgemm_namekwargsr   qgemm_inputsi
qgemm_nodeq_outputr4   r   r   rN   0   s   





 


	zQLinearGemm.quantize__name__
__module____qualname__r/   rN   __classcell__r   r   r4   r   r*   ,   s    r*   c                       s$   e Zd Z fddZdd Z  ZS )QDQGemmc                    r+   r,   r-   r0   r4   r   r   r/      r6   zQDQGemm.__init__c                 C   s
  | j }|jdks
J | j|jd  | js| j|jd  | jj|jd t|r,dndd\}}|r?| j	|jd | n	| j
|jd  t|jdkr| j|jd rv| j|j|jd |jd |jd t| j  t| j  d S td| j j d d S d S )	Nr7   r   r
   )default_axisr:   r   zBias of Gemm node 'zC' is not constant. Please exclude this node for better performance.)r@   rA   rB   quantize_activation_tensorrF   disable_qdq_for_node_outputrD   is_tensor_per_channelr!   "quantize_weight_tensor_per_channelquantize_weight_tensorr   rE   quantize_bias_tensorr   r'   r)   loggingwarning)r1   r@   is_weight_per_channelweight_axisr   r   r   rN      s*   
$zQDQGemm.quantizerk   r   r   r4   r   rp      s    rp   )rx   numpynpr   quant_utilsr   r   r   r   r   r   r	   base_operatorr   matmulr   qdq_base_operatorr   r!   r'   r)   r*   rp   r   r   r   r   <module>   s    $	c