o
    3Ih&                     @   s   d dl Zd dlZd dlmZ ddlmZmZmZm	Z	m
Z
mZ ddlmZ ddlmZ G dd	 d	eZG d
d deZG dd deZdS )    N)onnx_pb   )TENSOR_NAME_QUANT_SUFFIXQuantizedValueQuantizedValueTypeattribute_to_kwargfind_by_nameget_mul_node   )QuantOperatorBase)QDQOperatorBasec                       s,   e Zd Z fddZdd Zdd Z  ZS )ConvIntegerc                       t  || d S Nsuper__init__selfonnx_quantizer	onnx_node	__class__ g/home/air/sanwanet/gpt-api/venv/lib/python3.10/site-packages/onnxruntime/quantization/operators/conv.pyr         zConvInteger.__init__c                 C   s   | j }| jj}t|jd | }|du r td|jd  d|jd }|jd }|d }|d }	tj	t
|jtjd	}
d
|
d< tj|tjjt
|jg|
}|| tjd||g|	g}|| tjd||	g|g|d }|| dS )a  
        Given a node, this function handles bias add by adding a "reshape" node on bias and an "add" node
            parameter nodes: new nodes would be appended into nodes
            parameter node: current node (Conv)
            parameter scaled_output: output of quant conv without bias
            parameter output: output of Conv
            parameter bias_name: bias of Conv
            return: the name of output
        r
   Nz	Expected z to be an initializerr   r   _bias_reshape_shape_bias_reshape_output)dtypeReshapeAdd	_bias_add)node	quantizermodelr   inputinitializer
ValueErroroutputnponeslendimsint64onnxhelpermake_tensor
onnx_protoTensorProtoINT64add_initializer	make_nodeappend)r   nodesscaled_outputr#   r%   weightr)   reshape_input_datareshape_input_shapereshape_outputshape
init_shapereshape_nodeadd_noder   r   r   add_bias   s&   




zConvInteger.add_biasc                 C   s  | j }|jdks
J | j|dg\}}}}| jj|dg| jjd\}}}}	|| || || ||	 |jd d }
|jrI|jd nd}i }|j	D ]	}|
t| qPtjjd|| |
g|fi |}|| | jj|jd d	d
}|
d }tjjd|
g|g|
d |d}|| t|dksJ |r|d }n|d d |d  d }t|| jj}|d u rt||d |}|| |jd }t|jdk}|s|jd n|jd d }|r|d nd}|t||g|| |r| || | j j|7  _d S )NConvr   r
   reduce_range_output_quantized_quant r   T)	mandatory_cast_outputCast_cast)tor   _scales_mul__mulz:0   quant_scaled_output_output_scale_mul)r#   op_typer$   quantize_activationquantize_weightrE   extendr)   name	attributeupdater   r/   r0   r6   r7   get_tensor_typer,   r   	new_nodesr	   r&   rB   )r   r#   quantized_input_nameszero_point_namesscale_namesr8   quantized_input_names_weightzero_point_names_weightscale_names_weightnodes_weightconv_integer_outputconv_integer_namekwargsrY   conv_integer_node	onnx_typecast_op_output	cast_nodescales_mul_opscales_mul_nodescales_mul_op_outputhas_biasscaled_output_nameoutput_scale_mul_opr   r   r   quantize:   sz   









zConvInteger.quantize)__name__
__module____qualname__r   rB   rq   __classcell__r   r   r   r   r      s    %r   c                       s(   e Zd Z fddZ fddZ  ZS )QLinearConvc                    r   r   r   r   r   r   r   r      r   zQLinearConv.__init__c                    s  | j }|jdks
J | j|jd \}}}}}| j|jd rU| j rU| j|dg\}}}}	| j	|jd t
jjd}
||
d  ||
d  ||
d  n0| j|dg\}}}}	| jj|dg| jjd\}}}}|| || || |	| |r|d u rt  S d}d}t|jdkr| jjt
jjkrtd	| j|jd |jd |jd }d
}|jd t }|jr|jd nd}i }|jD ]	}|t| qg }||d  ||d  ||d  ||d  ||d  ||d  || || |r|| tjjd||g|fi |}|	| t |jd |||t!j"}|| jj#|jd < | j j$|	7  _$d S )NrC   r   r
   r   rD   rH   FrQ   z@Quantization to FLOAT8E4M3FN for operator Conv is not supported.TrG   rv   )%r#   rT   r$   _get_quantization_paramsr)   is_input_a_initializerr&   is_per_channelrU   quantize_weight_per_channelr2   r3   INT8r7   rV   rE   rW   r   rq   r,   weight_qTypeFLOAT8E4M3FNRuntimeErrorquantize_bias_staticr   rX   rY   rZ   r   r/   r0   r6   r   r   Inputquantized_value_mapr\   )r   r#   
data_foundoutput_scale_nameoutput_zp_namerO   r]   r^   r_   r8   quant_weight_tupler`   ra   rb   rc   quantized_bias_namebias_presentqlinear_conv_outputqlinear_conv_namerf   rY   qlinear_conv_inputsqlinear_conv_nodeq_outputr   r   r   rq      s   




"





zQLinearConv.quantizerr   rs   rt   r   rq   ru   r   r   r   r   rv      s    rv   c                       s$   e Zd Z fddZdd Z  ZS )QDQConvc                    r   r   r   r   r   r   r   r      r   zQDQConv.__init__c                 C   s   | j }|jdks|jdksJ | j|jd  | js$| j|jd  | jj|jd |jdkr2dndd\}}|rE| j|jd | n	| j	|jd  t
|jdkrj| j|j|jd |jd |jd  d S d S )NrC   ConvTransposer   r
   )default_axisrQ   r   )r#   rT   r$   quantize_activation_tensorr&   disable_qdq_for_node_outputr)   is_tensor_per_channel"quantize_weight_tensor_per_channelquantize_weight_tensorr,   quantize_bias_tensorrX   )r   r#   is_weight_per_channelweight_axisr   r   r   rq      s   
*zQDQConv.quantizer   r   r   r   r   r      s    r   )numpyr*   r/   r   r2   quant_utilsr   r   r   r   r   r	   base_operatorr   qdq_base_operatorr   r   rv   r   r   r   r   r   <module>   s     xf