o
    vi
                     @   sF   d dl Z d dl mZ ddlmZmZ ddlmZ 	 G dd deZdS )	    N)onnx_pb   )attribute_to_kwarg	ms_domain   )QuantOperatorBasec                       s0   e Zd Z fddZdd Z fddZ  ZS )AttentionQuantc                    s   t  || d S N)super__init__)selfonnx_quantizer	onnx_node	__class__ k/home/air/biblejyuku/back/venv/lib/python3.10/site-packages/onnxruntime/quantization/operators/attention.pyr      s   zAttentionQuant.__init__c                 C   s   | j | jS r	   )	quantizershould_quantize_nodenode)r   r   r   r   should_quantize   s   zAttentionQuant.should_quantizec                    s  | j }|jdks
J |jD ]}|jdkrt    S q| j|dg\}}}}| jj|dgddd\}}}	}
|	| |	| |	|	 |	|
 |du rTt  S |jsYdn|jd	 }g }|	| |	|j
d
 g |	| |	t|j
dkr|j
d ndg |	| |	t|j
dkr|j
d ndg i }|jD ]	}|t| qt|d< tjjd||j|fi |}|| | j j|7  _dS )z
        parameter node: Attention node.
        parameter new_nodes_list: List of new nodes created before processing this node.
        return: a list of nodes in topological order that represents quantized Attention node.
        	Attentionqkv_hidden_sizesr   r   T)reduce_rangeop_level_per_channelN _quantr         domain
QAttention)r   op_type	attributenamer
   quantizer   quantize_activationquantize_weightextendinputlenupdater   r   onnxhelper	make_nodeoutputappend	new_nodes)r   r   attrquantized_input_nameszero_point_namesscale_namesnodesquantized_input_names_weightzero_point_names_weightscale_names_weightnodes_weightqattention_nameinputskwargsr"   qattention_noder   r   r   r$      sN   








$
$

zAttentionQuant.quantize)__name__
__module____qualname__r   r   r$   __classcell__r   r   r   r   r      s    r   )	r+   r   
onnx_protoquant_utilsr   r   base_operatorr   r   r   r   r   r   <module>   s    