o
    3IhL0                     @   sH   d dl mZ d dlmZ G dd dZG dd deZG dd dZd	S )
    )ArgumentParser)Enumc                   @   s   e Zd ZdZdZdZdZdS )AttentionMaskFormatr            N)__name__
__module____qualname__MaskIndexEndMaskIndexEndAndStartAttentionMaskNoMask r   r   g/home/air/sanwanet/gpt-api/venv/lib/python3.10/site-packages/onnxruntime/transformers/fusion_options.pyr   	   s
    r   c                   @   s4   e Zd ZdZdZdZdZdd Zdd Zd	d
 Z	dS )AttentionOpType	AttentionMultiHeadAttentionGroupQueryAttentionPagedAttentionc                 C   s   | j S Nvalueselfr   r   r   __str__   s   zAttentionOpType.__str__c                 C   s
   t | jS r   )hashr   r   r   r   r   __hash__!      
zAttentionOpType.__hash__c                 C   s   |j | j kS r   r   )r   otherr   r   r   __eq__$      zAttentionOpType.__eq__N)
r   r	   r
   r   r   r   r   r   r   r    r   r   r   r   r      s    r   c                   @   sV   e Zd ZdZdd ZdddZdd Zd	efd
dZe	dd Z
e	defddZdS )FusionOptionsz'Options of fusion in graph optimizationc                 C   s   d| _ d| _d| _d| _d| _d| _d| _d| _d| _d| _	d| _
d| _d| _d| _d| _|dkr4d| _tj| _|dkrAtj| _n|dkrItj| _d | _|dv rgd| _d| _d| _d| _d| _d| _d| _d S d S )NTFclipbertvitunetvaer#   )enable_geluenable_layer_normenable_attentionenable_rotary_embeddingsuse_multi_head_attention!disable_multi_head_attention_biasenable_skip_layer_normenable_embed_layer_normenable_bias_skip_layer_normenable_bias_geluenable_gelu_approximationenable_qordered_matmulenable_shape_inferenceenable_gemm_fast_gelugroup_norm_channels_lastr   r   attention_mask_formatr   r   attention_op_typeenable_nhwc_convenable_group_normenable_skip_group_normenable_bias_splitgeluenable_packed_qkvenable_packed_kvenable_bias_add)r   
model_typer   r   r   __init__+   s@   

zFusionOptions.__init__Tc                 C   s   |rt j| _d S t j| _d S r   )r   r   r8   r   )r   use_raw_maskr   r   r   use_raw_attention_maskZ   s   z$FusionOptions.use_raw_attention_maskc                 C   s   t j| _d S r   )r   r   r8   r   r   r   r   disable_attention_mask`   r!   z$FusionOptions.disable_attention_maskattn_op_typec                 C   s
   || _ d S r   )r9   )r   rF   r   r   r   set_attention_op_typec   r   z#FusionOptions.set_attention_op_typec                 C   s6  t | j}| jrd|_| jrd|_| jrd|_| jrd|_	| j
r#d|_
| jr)d|_| jr/d|_| jr5d|_| jr;d|_| jrAd|_| jrGd|_| jrMd|_| jrU|d | jr]|d | jrd|  | jdv r| jrod|_| jrud|_| jr{d|_ | j!rd|_"| j#rd|_$| j%rd|_&| j'rd|_(| j)rd|_*|S )NFTr&   )+r"   rA   disable_gelur)   disable_layer_normr*   disable_rotary_embeddingsr,   disable_attentionr+   r-   disable_skip_layer_normr/   disable_embed_layer_normr0   disable_bias_skip_layer_normr1   disable_bias_gelur2   r3   disable_shape_inferencer5   r6   use_mask_indexrD   no_attention_maskrE   use_group_norm_channels_firstr7   disable_nhwc_convr:   disable_group_normr;   disable_skip_group_normr<   disable_bias_splitgelur=   disable_packed_qkvr>   disable_packed_kvr?   disable_bias_addr@   )argsoptionsr   r   r   parsef   sb   



zFusionOptions.parseparserc                 C   s  | j ddddd | jdd | j ddddd | jdd	 | j d
dddd | jdd | j ddddd | jdd | j ddddd | jdd | j ddddd | jdd | j ddddd | jdd | j ddddd | jdd | j ddddd | jdd | j dddd d | jdd! | j d"ddd#d | jdd$ | j d%ddd&d | jdd' | j d(ddd)d | jdd* | j d+ddd,d | jdd- | j d.ddd/d | jdd0 | j d1ddd2d | jdd3 | j d4ddd5d | jdd6 | j d7ddd8d | jdd9 | j d:ddd;d | jdd< | j d=ddd>d | jdd? | j d@dddAd | jddB | j dCdddDd | jddE | j dFdddGd d S )HNz--disable_attentionF
store_truezdisable Attention fusion)requiredactionhelp)rK   z--disable_skip_layer_normz%disable SkipLayerNormalization fusion)rL   z--disable_embed_layer_normz&disable EmbedLayerNormalization fusion)rM   z--disable_bias_skip_layer_normz2disable Add Bias and SkipLayerNormalization fusion)rN   z--disable_bias_geluz)disable Add Bias and Gelu/FastGelu fusion)rO   z--disable_layer_normz!disable LayerNormalization fusion)rI   z--disable_geluzdisable Gelu fusion)rH   z--enable_gelu_approximationz+enable Gelu/BiasGelu to FastGelu conversion)r3   z--disable_shape_inferencez disable symbolic shape inference)rP   z--enable_gemm_fast_geluzenable GemmfastGelu fusion)r6   z--use_mask_indexzWuse mask index to activate fused attention to speed up. It requires right-side padding!)rQ   z--use_raw_attention_maskzuse raw attention mask. Use this option if your input is not right-side padding. This might deactivate fused attention and get worse performance.)rD   z--no_attention_maskz1no attention mask. Only works for model_type=bert)rR   z--use_multi_head_attentionzUse MultiHeadAttention instead of Attention operator for testing purpose. Note that MultiHeadAttention might be slower than Attention when qkv are not packed. )r-   z--disable_group_normz9not fuse GroupNorm. Only works for model_type=unet or vae)rU   z--disable_skip_group_normzPnot fuse Add + GroupNorm to SkipGroupNorm. Only works for model_type=unet or vae)rV   z--disable_packed_kvz[not use packed kv for cross attention in MultiHeadAttention. Only works for model_type=unet)rY   z--disable_packed_qkvz[not use packed qkv for self attention in MultiHeadAttention. Only works for model_type=unet)rX   z--disable_bias_addz0not fuse BiasAdd. Only works for model_type=unet)rZ   z--disable_bias_splitgeluz6not fuse BiasSplitGelu. Only works for model_type=unet)rW   z--disable_nhwc_convz:Do not use NhwcConv. Only works for model_type=unet or vae)rT   z--use_group_norm_channels_firstznUse channels_first (NCHW) instead of channels_last (NHWC) for GroupNorm. Only works for model_type=unet or vae)rS   z--disable_rotary_embeddingsz5Do not fuse rotary embeddings into RotaryEmbedding op)add_argumentset_defaults)r^   r   r   r   add_arguments   s@  
zFusionOptions.add_argumentsN)T)r   r	   r
   __doc__rB   rD   rE   r   rG   staticmethodr]   r   re   r   r   r   r   r"   (   s    
/
5r"   N)argparser   enumr   r   r   r"   r   r   r   r   <module>   s
   