o
    ²3IhE  ã                   @   sL   d dl mZ d dlmZ d dlmZ d dlmZ eeƒZ	G dd„ deƒZ
dS )é    )Ú	getLogger)ÚFusion)Úhelper)Ú	OnnxModelc                       s”   e Zd Zdef‡ fdd„Zdedefdd„Zded	B fd
d„Zdededed	B fdd„Z	dededed	B fdd„Z
dededed	B fdd„Z‡  ZS )ÚFusionFastGeluÚmodelc                    s   t ƒ  |dd¡ d S )NÚFastGeluÚTanh)ÚsuperÚ__init__)Úselfr   ©Ú	__class__© úh/home/air/sanwanet/gpt-api/venv/lib/python3.10/site-packages/onnxruntime/transformers/fusion_fastgelu.pyr      s   zFusionFastGelu.__init__Úinput_name_to_nodesÚoutput_name_to_nodec                 C   sL   |   |||¡r	d S |  |||¡rd S |  |||¡rd S |  |||¡r$d S d S )N)Úfuse_1Úfuse_2Úfuse_3Úfuse_4)r   Ú	tanh_noder   r   r   r   r   Úfuse   s   ÿzFusionFastGelu.fuseÚreturnNc                 C   sŠ  |j d |vr	dS ||j d  }t|ƒdks|d jdkrdS |d }| j |d¡s,dS |j d |vr5dS ||j d  }t|ƒdksI|d jdkrKdS |d }| j |dd|¡}|du r^dS | j |d¡}|dk rkdS |j|dkrsdnd }	| j ||dkr€dnd|¡}
| j |dd|¡}|du r“dS | jj|dd	d
}|dk r¢dS | j |d|dkr­dnd|¡}|du r·dS | jj|dd||
rÃ|
gng d}|du rÍdS | jj|dd	d
}|dk rÜdS | j |d|dkrçdnd|¡}|du rñdS | j |d¡súdS |jd |	krdS ||||||||g}| j 	||j d g||¡sdS | j
 |¡ tjd|	g|j | j d¡d}d|_| j |¡ | j| j|j< dS )aj  
        Fuse Gelu with tanh into one node:
              +---------------------------+
              |                           |
              |                           v
            [root] --> Pow --> Mul -----> Add  --> Mul --> Tanh --> Add --> Mul
              |       (Y=3)   (B=0.0447...)       (B=0.7978...)    (B=1)     ^
              |                                                              |
              +------> Mul(B=0.5)--------------------------------------------+
        Note that constant input for Add and Mul could be first or second input: like either A=0.5 or B=0.5 is fine.
        r   Né   ÚAddç      ð?ÚMulç      à?ç+ö—Ý“‡é?ç-Cëâ6?©Údelta©Úexcludeç¥,Cëâ¦?ÚPowç      @r   ©ÚinputsÚoutputsÚnameúcom.microsoftT)ÚoutputÚlenÚop_typer   Úhas_constant_inputÚmatch_parentÚfind_constant_inputÚinputÚ
get_parentÚis_safe_to_fuse_nodesÚnodes_to_removeÚextendr   Ú	make_nodeÚcreate_node_nameÚdomainÚnodes_to_addÚappendÚthis_graph_nameÚnode_name_to_graph_namer+   )r   r   r   r   ÚchildrenÚadd_after_tanhÚmul_after_tanhÚmul_halfÚiÚ
root_inputÚ	root_nodeÚmul_before_tanhÚadd_before_tanhÚmul_after_powÚpowÚsubgraph_nodesÚ
fused_noder   r   r   r      sš   ûø

ü
üzFusionFastGelu.fuse_1c                 C   sÐ  |j d |vr	dS ||j d  }t|ƒdks|d jdkrdS |d }| j |d¡s,dS |j d |vr5dS ||j d  }t|ƒdksI|d jdkrKdS |d }| j |d¡}|dk r\dS |j d |vredS ||j d  }t|ƒdksy|d jdkr{dS |d }| j ||jd |j d krdnd|¡}	| j |dd|¡}
|
du r¢dS | jj|
dd	d
}|dk r±dS | j |
d|dkr¼dnd|¡}|du rÆdS | jj|dd||	rÒ|	gng d}|du rÜdS | jj|dd	d
}|dk rëdS | j |d|dkrödnd|¡}|du rdS | j |d¡sdS |j|jd |j d krdnd }|jd |kr'dS |||||
|||g}| j 	||j d g||¡sAdS | j
 |¡ tjd|g|j | j d¡d}d|_| j |¡ | j| j|j< dS )aÐ  
        This pattern is from Tensorflow model.
        Fuse Gelu with tanh into one node:
              +---------------------------+
              |                           |
              |                           v
            [root] --> Pow --> Mul -----> Add  --> Mul --> Tanh --> Add --> Mul(B=0.5)-->Mul-->
              |       (Y=3)   (B=0.0447...)       (B=0.7978...)    (B=1)                  ^
              |                                                                           |
              +---------------------------------------------------------------------------+
        Note that constant input for Add and Mul could be first or second input: like either A=0.5 or B=0.5 is fine.
        r   Nr   r   r   r   r   r   r    r!   r#   r%   r&   r'   r   r(   r,   T)r-   r.   r/   r   r0   r2   r4   r3   r1   r5   r6   r7   r   r8   r9   r:   r;   r<   r=   r>   r+   )r   r   r   r   r?   r@   rB   rC   Úmul_after_mul_halfrE   rF   rG   rH   rI   rD   rJ   rK   r   r   r   r   ‹   s¨   ýû
$ø

ü
üzFusionFastGelu.fuse_2c              	   C   sØ  |j d |vr	dS ||j d  }t|ƒdks|d jdkrdS |d }| j |d¡s,dS |j d |vr5dS ||j d  }t|ƒdksI|d jdkrKdS |d }| j |dd|¡}|du r^dS | j |d¡}|dk rkdS |j|dkrsdnd }	| j |dd|¡}
|
du r…dS | j |
dd|¡}|du r”dS | j |d¡}|dk r¡dS | j |
dd|¡}|du r°dS | jj|dd	d
}|dk r¿dS |j|dkrÇdnd |	krÎdS | j |d|dkrÙdnd|¡}|du rãdS |jd |	kríd}n|jd |	kr÷d}ndS | j |d||¡}|du r	dS | jj|dd	d
}|dk rdS |j|dkr"dnd |	kr*dS ||||
|||||g	}| j ||j d g||¡sEdS | j	 
|¡ tjd|	g|j | j d¡d}d|_| j |¡ | j| j|j< dS )až  
        OpenAI's gelu implementation, also used in Megatron:
           Gelu(x) = x * 0.5 * (1.0 + torch.tanh(0.79788456 * x * (1.0 + 0.044715 * x * x)))

        Fuse subgraph into a FastGelu node:
            +------------ Mul (B=0.79788456) -------------------+
            |                                                   |
            +-------------------------------+                   |
            |                               |                   |
            |                               v                   v
          [root] --> Mul (B=0.044715) --> Mul --> Add(B=1) --> Mul --> Tanh --> Add(B=1) --> Mul-->
            |                                                                                 ^
            |                                                                                 |
            +-----------> Mul (B=0.5) --------------------------------------------------------+
        r   Nr   r   r   r   r   r   r    r!   r%   r   r(   r,   T)r-   r.   r/   r   r0   r1   r2   r3   r5   r6   r7   r   r8   r9   r:   r;   r<   r=   r>   r+   )r   r   r   r   r?   r@   Úmul_lastrB   rC   rD   rF   Úadd_1ÚjÚmul_7978ÚkÚmul_before_add_1ÚanotherÚmul_0447ÚmrJ   rK   r   r   r   r   ÿ   s¤   

÷
ü
üzFusionFastGelu.fuse_3c              	   C   s  |j d |vr	dS ||j d  }t|ƒdks|d jdkrdS |d }| j |d¡s,dS |j d |vr5dS ||j d  }t|ƒdksI|d jdkrKdS |d }|j d |vrXdS ||j d  }t|ƒdksl|d jdkrndS |d }| j |d¡s{dS |j|jd |j d kr‰dnd }| j |dd|¡}	|	du r›dS | jj|	dd	d
}
|
dk rªdS | j |	d|
dkrµdnd|¡}|du r¿dS |jd |krÉd}n|jd |krÓd}ndS | j |d||¡}|du rädS | jj|dd	d
}|dk ródS | j |d|dkrþdnd|¡}|du r	dS |jd |krd}n|jd |krd}ndS | j |d||¡}|du r1dS |jd |ksA|jd |krCdS |||||	||||g	}| j ||j d g||¡s^dS | j	 
|¡ tjd|g|j | j d¡d}d|_| j |¡ | j| j|j< |  d¡ dS )aR  
        PyTorch's gelu implementation with tanh approximation:
           Gelu(x) = 0.5 * x * (1 + torch.tanh(0.7978845834732056 * (x + 0.044714998453855515 * x * x * x)))

        Fuse Gelu with tanh into one node:
              +-----------------+------------------+
              |                 |                  |
              |                 v                  v
            [root] ==> Mul --> Mul --> Mul -----> Add  --> Mul --> Tanh --> Add -----> Mul --> Mul -->
              |                       (A=0.0447)          (A=0.7978)        (A=1)       ^     (A=0.5)
              |                                                                         |
              +-------------------------------------------------------------------------+
        Note that constant input for Add and Mul could be first or second input.
        r   Nr   r   r   r   r   r   g{®Gáz„?r!   r%   r   r(   r,   T)r-   r.   r/   r   r0   r3   r1   r2   r5   r6   r7   r   r8   r9   r:   r;   r<   r=   r>   r+   Úincrease_counter)r   r   r   r   r?   r@   rA   rB   rD   rF   rQ   rG   rS   rH   rU   Ú	mul_cubedÚmul_squaredrJ   rK   r   r   r   r   s  sª   "

 ÷
ü
ü
zFusionFastGelu.fuse_4)Ú__name__Ú
__module__Ú__qualname__r   r   Údictr   Úboolr   r   r   r   Ú__classcell__r   r   r   r   r      s    lt"tr   N)Úloggingr   Úfusion_baser   Úonnxr   Ú
onnx_modelr   rY   Úloggerr   r   r   r   r   Ú<module>   s   