o
    3Ih<2                     @   s|   d dl mZ d dlZd dlmZmZ d dlmZmZmZm	Z	 d dlm
Z d dlmZ eeZG dd dZG d	d
 d
ZdS )    )	getLoggerN)array_equalndarray)	NodeProtoTensorProtohelpernumpy_helper)onnx_pb)	OnnxModelc                
   @   s.  e Zd ZdefddZdedeeef fddZd1defd	d
Z				d2dede
dedB dedB fddZdefddZdefddZedd Zed3defddZdededB fddZed4defddZed ejfd!d"Zed5dedefd$d%Zd&e
fd'd(Zd)d* Zd+d, Zd-d. Zd/d0 ZdS )6FusionUtilsmodelc                 C   s
   || _ d S N)r   )selfr    r   e/home/air/sanwanet/gpt-api/venv/lib/python3.10/site-packages/onnxruntime/transformers/fusion_utils.py__init__   s   
zFusionUtils.__init__
input_namereturnc                 C   sn   | j |}|d ur&|jjjtjkr&| |\}}t	d| d d|fS t	d| d|d u  d|fS )NzCasted graph input z	 to int32TzDid not cast graph input z to int32: found F)
r   find_graph_inputtypetensor_type	elem_typer   INT32cast_input_to_int32loggerdebug)r   r   graph_inputcast_output	cast_noder   r   r   cast_graph_input_to_int32   s   z%FusionUtils.cast_graph_input_to_int32int32c                 C   sf   |d | }|dkrt tj}n|dkrt tj}n|dkr$t tj}ntd| |||}||fS )N_r    float32float16z"Invalid target_type: {target_type})intr   r   FLOATFLOAT16
ValueErroradd_cast_node)r   r   target_typeoutput_nameto_typer   r   r   r   
cast_input   s   zFusionUtils.cast_inputNr+   r*   
graph_namec           	      C   s   |d u r|d|  }|g}|d u r| j  }||v r,|| }|r,|jdkr,|jd g}tjd||gd}|jtd|g | j j	||d |S )N	_cast_to_Castr   )inputsoutputsto)r-   )
r   output_name_to_nodeop_typeinputr   	make_node	attributeextendmake_attributeadd_node)	r   r   r+   r*   r3   r-   r0   parent_noder   r   r   r   r(   .   s   
zFusionUtils.add_cast_nodec                 C   s   |  |dS )Nr    )r,   )r   r   r   r   r   r   I   s   zFusionUtils.cast_input_to_int32c                 C   s   | j  }|| }|D ]4}|jdkr?d}|jD ]}|jdkr*|jttjkr*d} nq|r?|j	d }| j 
| | j || qd S )Nr/   Fr2   Tr   )r   input_name_to_nodesr4   r7   nameir$   r   r   outputremove_nodereplace_input_of_all_nodes)r   r   r<   nodesnodeis_int32attr*   r   r   r   remove_cast_int32L   s    



zFusionUtils.remove_cast_int32c                 C   s|   d}| j | |v r%| || j |  v r%|| j |  |  t|| j |  }|| j |< ||v r7|| |  |S | g||< |S )Nr   )r5   removelenappend)rC   r>   new_input_namer<   old_input_referencer   r   r   update_node_input[   s    

zFusionUtils.update_node_inputr   c           
      C   s<   |j | }|j | }t||||}|dko| | }	|	S )a  
        Before:
              (input)-->parent-->node-->(output)
        After:
              (input)-->parent-->
                |
                +----->node-->(output)

        This function returns a flag whether the parent node can be removed.
        r   )r5   r   rL   find_graph_output)
r   rC   r;   r<   node_input_indexparent_input_indexold_input_namerJ   rK   parent_can_be_removedr   r   r   skip_parentk   s
   

zFusionUtils.skip_parentrC   c                 C   sV   |j dv sJ t|jdkr| j|jd S d }|jD ]}|jdkr(t|}q|S )N)Squeeze	Unsqueeze   axes)	r4   rH   r5   r   get_constant_valuer7   r=   r   get_attribute_value)r   rC   rV   attrr   r   r   get_squeeze_or_unsqueeze_axes   s   


z)FusionUtils.get_squeeze_or_unsqueeze_axesattribute_namec                 C   sR   |}| j D ]}|j|krt|}qt|tr%t|ttfo$t||ddS ||kS )a  Verify that a node has expected value for an attribute.

        Args:
            node (NodeProto): a node to check
            attribute_name (str): name of attribute
            expected_value (Any): expected value of the attribute
            default_value (Any, optional): default value if the attribute does not exist. Defaults to None.

        Returns:
            bool: whether the check is passed or not
        F	equal_nan)r7   r=   r   rX   
isinstancelistr   r   )rC   r[   expected_valuedefault_valuevaluerY   r   r   r   check_node_attribute   s   



z FusionUtils.check_node_attributetensorc                 C   s   t | tjstdt|  t| jdks| jtjjkr!td| j	r@t
t
j| j	dd| j}t
|ddg}| | _	| S td)	zTranspose a 2-D INT8 TensorProto
        Args:
            tensor (TensorProto): tensor to be transposed
        Returns:
            tensor (TensorProto): transposed tensor
        z3Expected input type is an ONNX TensorProto but got    z'Only INT8 2-D tensors can be transposedint8)dtyperU   r   zonly raw buffer supported)r^   
onnx_protor   r'   r   rH   dims	data_typeINT8raw_datanumpyreshape
frombuffer	transposetobytes)rd   
int32_dataint32_transposed_datar   r   r   transpose_2d_int8_tensor   s   
z$FusionUtils.transpose_2d_int8_tensorTc                 C   s   | j dvrtd| j   || jd }|du rdS |jdkp,|jdko,|jd dk}|r3|s3dS t| jdkr<dS || jd }|j|jkrLdS |du rRdS t	|dkS )	a  Verify if a provided QuantizeLinear (Q) / DequantizeLinear (DQ) node is a good candidate for fusion.
           It is a good candidate for fusion if:
           (1) The Q/DQ node is for per-tensor quantization if allow_per_tensor_quantization_only is `True`
           (2) The Q/DQ node should have constant scale
           (3) The Q/DQ node should have a zero point of 0
        Args:
            node (NodeProto): a Q/DQ node to check
        Returns:
            bool: whether the check is passed or not
        >   QuantizeLinearDequantizeLinearz+Provided node is not a Q/DQ node. Op Type: rU   NFr   re   T)
r4   r   r   rW   r5   ndimshaperH   rm   all)rC   r   "allow_per_tensor_quantization_onlyscalescale_has_single_element
zero_pointr   r   r   check_qdq_node_for_fusion   s    
"z%FusionUtils.check_qdq_node_for_fusioninput_indexc                 C   sR   t |j|ks	J | j|j| }t|tr%t|ttfo$t||ddS ||kS )a7  Verify that a node has expected input value

        Args:
            node (NodeProto): a node to check
            input_index (int): index of its input to be verified
            expected_value (Any): expected value of the input

        Returns:
            bool: whether the check is passed or not
        Fr\   )rH   r5   r   rW   r^   r_   r   r   )r   rC   r   r`   rb   r   r   r   check_node_input_value   s
   
z"FusionUtils.check_node_input_valuec                 C   s   g }| j  }| j  D ] }|jdkr,|jd |vr,| j |jd |jd  || q|rB| j | t	
dt| d dS dS )z>Remove Identity nodes, except those right before graph output.Identityr   zRemoved z Identity nodesN)r   get_graphs_output_namesrB   r4   r?   rA   r5   rI   remove_nodesr   inforH   )r   nodes_to_removegraph_output_namesrC   r   r   r   remove_identity_nodes   s   


z!FusionUtils.remove_identity_nodesc                 C      | j   d S r   )r   remove_cascaded_cast_nodesr   r   r   r   r   
     z&FusionUtils.remove_cascaded_cast_nodesc                 C   r   r   )r   remove_useless_cast_nodesr   r   r   r   r     r   z%FusionUtils.remove_useless_cast_nodesc                 C   s@  | j jdd}|du rdS g }| j  D ]0}|jdkrD||jd }||jd }|rD|rD||krDtd|j	 d|  |
| q|rt| j  }t| j  }|D ]F}tt|j|@ rtt|j|@ st| j  |jd  dkr| j |jd |jd  nqW| j |jd |jd  | j | qWdS dS )	ziRemove reshape node that is not needed based on symbolic shape inference: input and output has same shapeT)updateNReshaper   zRemove reshape node z* since its input shape is same as output: rU   )r   infer_runtime_shaperB   r4   get_edge_shaper5   r?   r   r   r=   rI   setget_graphs_input_namesr   boolrH   r<   replace_output_of_all_nodesrA   r@   )r   shape_inferr   rC   input_shapeoutput_shapegraph_input_namesr   r   r   r   remove_useless_reshape_nodes  s8   

z(FusionUtils.remove_useless_reshape_nodes)r    )NNN)r   r   r   )T)__name__
__module____qualname__r
   r   strtupler   r   r,   r$   r(   r   rF   staticmethodrL   rR   r   r   rZ   rc   rh   r   rt   r~   r   r   r   r   r   r   r   r   r   r      sD    


*r   c                   @   s(   e Zd ZeddededefddZdS )	NumpyHelperFrd   
fill_zerosr   c                 C   s$   |rt | jt| jdS t| S )N)rx   rg   )r   ri   r   tensor_dtype_to_np_dtyperj   r   to_array)rd   r   r   r   r   r   3  s   

zNumpyHelper.to_arrayN)F)r   r   r   r   r   r   r   r   r   r   r   r   r   2  s    r   )loggingr   rm   r   r   onnxr   r   r   r   r	   rh   
onnx_modelr
   r   r   r   r   r   r   r   r   <module>   s     $