o
    3Ih                     @   s`   d dl Z d dlZd dlZd dlZd dlmZ e eZdd Z	dd Z
dd ZG d	d
 d
ZdS )    N)Conv1Dc                 C   s<   | j j\}}tj||}| j jj |j _| jj|j_|S )N)	weightshapetorchnnLineardataT
contiguousbias)modulein_sizeout_sizelinear r   h/home/air/sanwanet/gpt-api/venv/lib/python3.10/site-packages/onnxruntime/transformers/quantize_helper.py_conv1d_to_linear   s
   r   c                 C   sN   t d t| jD ]}| j| }t|tr t|}|| j|< q
t| q
dS )zsin-place
    This is for Dynamic Quantization, as Conv1D is not recognized by PyTorch, convert it to nn.Linear
    zreplace Conv1D with LinearN)loggerdebuglist_modules
isinstancer   r   conv1d_to_linear)modelnamer   r   r   r   r   r      s   



r   c                 C   s.   t |  d tjdd }td |S )Nztemp.p   )r   save
state_dictospathgetsizeremove)r   sizer   r   r   _get_size_of_pytorch_model'   s   
r#   c                   @   s,   e Zd ZeejfddZedddZdS )QuantizeHelperc                 C   sL   t |  tjj| tjjh|d}tdt|   tdt|  |S )z{
        Usage: model = quantize_model(model)

        TODO: mix of in-place and return, but results are different
        )dtypez'Size of full precision Torch model(MB):z"Size of quantized Torch model(MB):)	r   r   quantizationquantize_dynamicr   r   r   infor#   )r   r%   quantized_modelr   r   r   quantize_torch_model/   s
   z#QuantizeHelper.quantize_torch_modelFc                 C   s   ddl m} ddlm} ||jjddd tdtj	
| d   || ||dtjjid	 td
|  tdtj	
|d   d S )Nr   )Path)r'   T)parentsexist_okz&Size of full precision ONNX model(MB):r   DefaultTensorType)use_external_data_formatextra_optionszquantized model saved to:z!Size of quantized ONNX model(MB):)pathlibr+   onnxruntime.quantizationr'   parentmkdirr   r(   r   r   r    onnxTensorProtoFLOAT)onnx_model_pathquantized_model_pathr/   r+   r'   r   r   r   quantize_onnx_model<   s   
 z"QuantizeHelper.quantize_onnx_modelN)F)__name__
__module____qualname__staticmethodr   qint8r*   r:   r   r   r   r   r$   .   s
    r$   )loggingr   r5   r   transformers.modeling_utilsr   	getLoggerr;   r   r   r   r#   r$   r   r   r   r   <module>   s   
