o
    ²3Ih<2  ã                   @   s|   d dl mZ d dlZd dlmZmZ d dlmZmZmZm	Z	 d dlm
Z d dlmZ eeƒZG dd„ dƒZG d	d
„ d
ƒZdS )é    )Ú	getLoggerN)Úarray_equalÚndarray)Ú	NodeProtoÚTensorProtoÚhelperÚnumpy_helper)Úonnx_pb)Ú	OnnxModelc                
   @   s.  e Zd Zdefdd„Zdedeeef fdd„Zd1defd	d
„Z				d2dede
dedB dedB fdd„Zdefdd„Zdefdd„Zedd„ ƒZed3defdd„ƒZdededB fdd„Zed4defdd„ƒZed ejfd!d"„ƒZed5dedefd$d%„ƒZd&e
fd'd(„Zd)d*„ Zd+d,„ Zd-d.„ Zd/d0„ ZdS )6ÚFusionUtilsÚmodelc                 C   s
   || _ d S ©N)r   )Úselfr   © r   úe/home/air/sanwanet/gpt-api/venv/lib/python3.10/site-packages/onnxruntime/transformers/fusion_utils.pyÚ__init__   s   
zFusionUtils.__init__Ú
input_nameÚreturnc                 C   sn   | j  |¡}|d ur&|jjjtjkr&|  |¡\}}t 	d|› d¡ d|fS t 	d|› d|d u› ¡ d|fS )NzCasted graph input z	 to int32TzDid not cast graph input z to int32: found F)
r   Úfind_graph_inputÚtypeÚtensor_typeÚ	elem_typer   ÚINT32Úcast_input_to_int32ÚloggerÚdebug)r   r   Úgraph_inputÚcast_outputÚ	cast_noder   r   r   Úcast_graph_input_to_int32   s   z%FusionUtils.cast_graph_input_to_int32Úint32c                 C   sf   |d | }|dkrt tjƒ}n|dkrt tjƒ}n|dkr$t tjƒ}ntdƒ‚|  |||¡}||fS )NÚ_r    Úfloat32Úfloat16z"Invalid target_type: {target_type})Úintr   r   ÚFLOATÚFLOAT16Ú
ValueErrorÚadd_cast_node)r   r   Útarget_typeÚoutput_nameÚto_typer   r   r   r   Ú
cast_input   s   zFusionUtils.cast_inputNr+   r*   Ú
graph_namec           	      C   s”   |d u r|d|›  }|g}|d u r| j  ¡ }||v r,|| }|r,|jdkr,|jd g}tjd||gd}|j t d|¡g¡ | j j	||d |S )NÚ	_cast_to_ÚCastr   )ÚinputsÚoutputsÚto)r-   )
r   Úoutput_name_to_nodeÚop_typeÚinputr   Ú	make_nodeÚ	attributeÚextendÚmake_attributeÚadd_node)	r   r   r+   r*   r3   r-   r0   Úparent_noder   r   r   r   r(   .   s   
zFusionUtils.add_cast_nodec                 C   s   |   |d¡S )Nr    )r,   )r   r   r   r   r   r   I   s   zFusionUtils.cast_input_to_int32c                 C   s„   | j  ¡ }|| }|D ]4}|jdkr?d}|jD ]}|jdkr*|jttjƒkr*d} nq|r?|j	d }| j  
|¡ | j  ||¡ qd S )Nr/   Fr2   Tr   )r   Úinput_name_to_nodesr4   r7   ÚnameÚir$   r   r   ÚoutputÚremove_nodeÚreplace_input_of_all_nodes)r   r   r<   ÚnodesÚnodeÚis_int32Úattr*   r   r   r   Úremove_cast_int32L   s    


€
€özFusionUtils.remove_cast_int32c                 C   s|   d}| j | |v r%| || j |  v r%|| j |   | ¡ t|| j |  ƒ}|| j |< ||v r7||  | ¡ |S | g||< |S )Nr   )r5   ÚremoveÚlenÚappend)rC   r>   Únew_input_namer<   Úold_input_referencer   r   r   Úupdate_node_input[   s    

þzFusionUtils.update_node_inputr   c           
      C   s<   |j | }|j | }t ||||¡}|dko|  |¡ }	|	S )a  
        Before:
              (input)-->parent-->node-->(output)
        After:
              (input)-->parent-->
                |
                +----->node-->(output)

        This function returns a flag whether the parent node can be removed.
        r   )r5   r   rL   Úfind_graph_output)
r   rC   r;   r<   Únode_input_indexÚparent_input_indexÚold_input_namerJ   rK   Úparent_can_be_removedr   r   r   Úskip_parentk   s
   

zFusionUtils.skip_parentrC   c                 C   sV   |j dv sJ ‚t|jƒdkr| j |jd ¡S d }|jD ]}|jdkr(t |¡}q|S )N)ÚSqueezeÚ	Unsqueezeé   Úaxes)	r4   rH   r5   r   Úget_constant_valuer7   r=   r   Úget_attribute_value)r   rC   rV   Úattrr   r   r   Úget_squeeze_or_unsqueeze_axes   s   


€z)FusionUtils.get_squeeze_or_unsqueeze_axesÚattribute_namec                 C   sR   |}| j D ]}|j|krt |¡}qt|tƒr%t|ttfƒo$t||ddS ||kS )a¦  Verify that a node has expected value for an attribute.

        Args:
            node (NodeProto): a node to check
            attribute_name (str): name of attribute
            expected_value (Any): expected value of the attribute
            default_value (Any, optional): default value if the attribute does not exist. Defaults to None.

        Returns:
            bool: whether the check is passed or not
        F©Ú	equal_nan)r7   r=   r   rX   Ú
isinstanceÚlistr   r   )rC   r[   Úexpected_valueÚdefault_valueÚvaluerY   r   r   r   Úcheck_node_attributeŽ   s   


€
z FusionUtils.check_node_attributeÚtensorc                 C   sˆ   t | tjƒstdt| ƒ› ƒ‚t| jƒdks| jtjjkr!tdƒ‚| j	r@t
 t
j| j	dd| j¡}t
 |ddg¡}| ¡ | _	| S tdƒ‚)	z¶Transpose a 2-D INT8 TensorProto
        Args:
            tensor (TensorProto): tensor to be transposed
        Returns:
            tensor (TensorProto): transposed tensor
        z3Expected input type is an ONNX TensorProto but got é   z'Only INT8 2-D tensors can be transposedÚint8)ÚdtyperU   r   zonly raw buffer supported)r^   Ú
onnx_protor   r'   r   rH   ÚdimsÚ	data_typeÚINT8Úraw_dataÚnumpyÚreshapeÚ
frombufferÚ	transposeÚtobytes)rd   Ú
int32_dataÚint32_transposed_datar   r   r   Útranspose_2d_int8_tensor¥   s   
þz$FusionUtils.transpose_2d_int8_tensorTc                 C   s²   | j dvrt d| j › ¡ | | jd ¡}|du rdS |jdkp,|jdko,|jd dk}|r3|s3dS t| jƒdkr<dS | | jd ¡}|j|jkrLdS |du rRdS t 	|dk¡S )	a  Verify if a provided QuantizeLinear (Q) / DequantizeLinear (DQ) node is a good candidate for fusion.
           It is a good candidate for fusion if:
           (1) The Q/DQ node is for per-tensor quantization if allow_per_tensor_quantization_only is `True`
           (2) The Q/DQ node should have constant scale
           (3) The Q/DQ node should have a zero point of 0
        Args:
            node (NodeProto): a Q/DQ node to check
        Returns:
            bool: whether the check is passed or not
        >   ÚQuantizeLinearÚDequantizeLinearz+Provided node is not a Q/DQ node. Op Type: rU   NFr   re   T)
r4   r   r   rW   r5   ÚndimÚshaperH   rm   Úall)rC   r   Ú"allow_per_tensor_quantization_onlyÚscaleÚscale_has_single_elementÚ
zero_pointr   r   r   Úcheck_qdq_node_for_fusion½   s    
"z%FusionUtils.check_qdq_node_for_fusionÚinput_indexc                 C   sR   t |jƒ|ks	J ‚| j |j| ¡}t|tƒr%t|ttfƒo$t||ddS ||kS )a7  Verify that a node has expected input value

        Args:
            node (NodeProto): a node to check
            input_index (int): index of its input to be verified
            expected_value (Any): expected value of the input

        Returns:
            bool: whether the check is passed or not
        Fr\   )rH   r5   r   rW   r^   r_   r   r   )r   rC   r   r`   rb   r   r   r   Úcheck_node_input_valueè   s
   
z"FusionUtils.check_node_input_valuec                 C   sˆ   g }| j  ¡ }| j  ¡ D ] }|jdkr,|jd |vr,| j  |jd |jd ¡ | |¡ q|rB| j  |¡ t	 
dt|ƒ› d¡ dS dS )z>Remove Identity nodes, except those right before graph output.ÚIdentityr   zRemoved z Identity nodesN)r   Úget_graphs_output_namesrB   r4   r?   rA   r5   rI   Úremove_nodesr   ÚinforH   )r   Únodes_to_removeÚgraph_output_namesrC   r   r   r   Úremove_identity_nodesü   s   


€þz!FusionUtils.remove_identity_nodesc                 C   ó   | j  ¡  d S r   )r   Úremove_cascaded_cast_nodes©r   r   r   r   r‰   
  ó   z&FusionUtils.remove_cascaded_cast_nodesc                 C   rˆ   r   )r   Úremove_useless_cast_nodesrŠ   r   r   r   rŒ     r‹   z%FusionUtils.remove_useless_cast_nodesc                 C   s@  | j jdd}|du rdS g }| j  ¡ D ]0}|jdkrD| |jd ¡}| |jd ¡}|rD|rD||krDt d|j	› d|› ¡ | 
|¡ q|rœt| j  ¡ ƒ}t| j  ¡ ƒ}|D ]F}tt|jƒ|@ ƒrˆtt|jƒ|@ ƒs‡t| j  ¡ |jd  ƒdkr‡| j  |jd |jd ¡ nqW| j  |jd |jd ¡ | j  |¡ qWdS dS )	ziRemove reshape node that is not needed based on symbolic shape inference: input and output has same shapeT)ÚupdateNÚReshaper   zRemove reshape node z* since its input shape is same as output: rU   )r   Úinfer_runtime_shaperB   r4   Úget_edge_shaper5   r?   r   r„   r=   rI   ÚsetÚget_graphs_input_namesr‚   ÚboolrH   r<   Úreplace_output_of_all_nodesrA   r@   )r   Úshape_inferr…   rC   Úinput_shapeÚoutput_shapeÚgraph_input_namesr†   r   r   r   Úremove_useless_reshape_nodes  s8   
ÿ
€ÿòz(FusionUtils.remove_useless_reshape_nodes)r    )NNN)r   r   r   )T)Ú__name__Ú
__module__Ú__qualname__r
   r   ÚstrÚtupler“   r   r,   r$   r(   r   rF   ÚstaticmethodrL   rR   r   r   rZ   rc   rh   r   rt   r~   r€   r‡   r‰   rŒ   r™   r   r   r   r   r      sD    
úþýü
ú
*r   c                   @   s(   e Zd Zeddededefdd„ƒZdS )	ÚNumpyHelperFrd   Ú
fill_zerosr   c                 C   s$   |rt | jt | j¡dS t | ¡S )N)rx   rg   )r   ri   r   Útensor_dtype_to_np_dtyperj   r   Úto_array)rd   r¡   r   r   r   r£   3  s   
þ
zNumpyHelper.to_arrayN)F)rš   r›   rœ   rŸ   r   r“   r   r£   r   r   r   r   r    2  s    r    )Úloggingr   rm   r   r   Úonnxr   r   r   r   r	   rh   Ú
onnx_modelr
   rš   r   r   r    r   r   r   r   Ú<module>   s     $