o
    ²3Ih
  ã                   @   sF   d dl Z d dl mZ ddlmZmZ ddlmZ 	 G dd„ deƒZdS )	é    N)Úonnx_pbé   )Úattribute_to_kwargÚ	ms_domainé   )ÚQuantOperatorBasec                       s0   e Zd Z‡ fdd„Zdd„ Z‡ fdd„Z‡  ZS )ÚAttentionQuantc                    s   t ƒ  ||¡ d S ©N)ÚsuperÚ__init__)ÚselfÚonnx_quantizerÚ	onnx_node©Ú	__class__© úl/home/air/sanwanet/gpt-api/venv/lib/python3.10/site-packages/onnxruntime/quantization/operators/attention.pyr      s   zAttentionQuant.__init__c                 C   s   | j  | j¡S r	   )Ú	quantizerÚshould_quantize_nodeÚnode)r   r   r   r   Úshould_quantize   s   zAttentionQuant.should_quantizec                    s˜  | j }|jdks
J ‚|jD ]}|jdkrtƒ  ¡   S q| j |dg¡\}}}}| jj|dgddd\}}}	}
| 	|¡ | 	|¡ | 	|	¡ | 	|
¡ |du rTtƒ  ¡ S |jsYdn|jd	 }g }| 	|¡ | 	|j
d
 g¡ | 	|¡ | 	t|j
ƒdkr|j
d ndg¡ | 	|¡ | 	t|j
ƒdkr˜|j
d ndg¡ i }|jD ]	}| t|ƒ¡ q¡t|d< tjjd||j|fi |¤Ž}| |¡ | j j|7  _dS )zé
        parameter node: Attention node.
        parameter new_nodes_list: List of new nodes created before processing this node.
        return: a list of nodes in topological order that represents quantized Attention node.
        Ú	AttentionÚqkv_hidden_sizesr   r   T)Úreduce_rangeÚop_level_per_channelNÚ Ú_quantr   é   é   ÚdomainÚ
QAttention)r   Úop_typeÚ	attributeÚnamer
   Úquantizer   Úquantize_activationÚquantize_weightÚextendÚinputÚlenÚupdater   r   ÚonnxÚhelperÚ	make_nodeÚoutputÚappendÚ	new_nodes)r   r   ÚattrÚquantized_input_namesÚzero_point_namesÚscale_namesÚnodesÚquantized_input_names_weightÚzero_point_names_weightÚscale_names_weightÚnodes_weightÚqattention_nameÚinputsÚkwargsr"   Úqattention_noder   r   r   r$      sN   

ÿûû






$
$

zAttentionQuant.quantize)Ú__name__Ú
__module__Ú__qualname__r   r   r$   Ú__classcell__r   r   r   r   r      s    r   )	r+   r   Ú
onnx_protoÚquant_utilsr   r   Úbase_operatorr   r   r   r   r   r   Ú<module>   s    