o
    %Th                     @   s  d dl Z d dlZd dlZd dlZd dlZd dlmZ d dlmZ d dl	m
Z
 d dlZd dlZd dlmZmZmZmZ d dlZddlmZmZmZ dejd	ejd
ejfddZ		 d0dejd	ejdedB ded
ejf
ddZG dd dZG dd dZG dd deZG dd de jdZ G dd dZ!G dd de!Z"G dd de!Z#G d d! d!e#Z$G d"d# d#e#Z%G d$d% d%e#Z&G d&d' d'e jdZ'G d(d) d)e'Z(dd*ej)d+di fd,e*e
B d-ee* dB fd.d/Z+dS )1    N)Sequence)Enum)Path)
ModelProtoTensorProtohelpernumpy_helper   )
apply_plotload_model_with_shape_infersmooth_distributionpkqkreturnc                 C   s|   t j| j| jd}| dd t | dd |dd   |dd< | dk|dk@ }d||< | dk|dk@ }t j|| < |S )z
    See https://docs.scipy.org/doc/scipy/reference/generated/scipy.special.rel_entr.html#scipy.special.rel_entr.
    Python implementation.
    dtypeNr   )npemptyshaper   loginf)r   r   resc2c1 r   c/home/air/segue/gemini/back/venv/lib/python3.10/site-packages/onnxruntime/quantization/calibrate.pyrel_entr   s   2r   baseaxisc                 C   s   |du s|dksJ d|dusJ dt | t j} d|  t j| |dd } t |t j}t | |\} }d| t j||dd }t| |}t j||d}|dur]|t | }|| jS )	z
    Simplifeied version of entropy.
    Source: https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.entropy.html.
    This avoids taking a dependency on scipy just for this function.
    Nr   z0base={base} must be a positive number or `None`.z
qk is None      ?T)r   keepdimsr   )	r   asarrayastypefloat32sumbroadcast_arraysr   r   r   )r   r   r   r   vecsr   r   r   entropy'   s   
r)   c                   @   sL   e Zd Zeg dZeg dZdd Zedd Zedd Z	d	d
 Z
dS )
TensorData)avgstdlowesthighesthist
hist_edgesbins)r+   r,   r-   r.   r0   c                 K   s   t | | _| D ]E\}}|tjvr td|dtj d|tjv rJt|ds6tdt	| d||j
tjtjfvrJtd|j
 d|t| || qd S )NzUnexpected value z not in .r   Unexpected type z for k=zUnexpected dtype )listkeys_attrsitemsr*   _allowed
ValueError_floatshasattrtyper   r   float16r$   setattr)selfkwargskvr   r   r   __init__G   s   


zTensorData.__init__c                 C   4   t | dr
t | dstdt|  d| j| jfS )Nr-   r.   z0Attributes 'lowest' and/or 'highest' missing in r2   )r;   AttributeErrordirr-   r.   r?   r   r   r   range_valueS      zTensorData.range_valuec                 C   rD   )Nr+   r,   z)Attributes 'avg' and/or 'std' missing in r2   )r;   rE   rF   r+   r,   rG   r   r   r   avg_stdY   rI   zTensorData.avg_stdc                    s$    fdd j D } jj|d< |S )Nc                    s   i | ]}|t  |qS r   )getattr).0rA   rG   r   r   
<dictcomp>a   s    z&TensorData.to_dict.<locals>.<dictcomp>CLS)r6   	__class____name__r?   datar   rG   r   to_dict_   s   zTensorData.to_dictN)rP   
__module____qualname__	frozensetr8   r:   rC   propertyrH   rJ   rS   r   r   r   r   r*   C   s    

r*   c                   @   sf   e Zd ZdeeeeB f fddZdd Zdd Z	dd	 Z
d
d Zdd Zdd Zdd Zdd ZdS )TensorsDatarR   c              	   C   s   || _ i | _| D ]p\}}t|tstdt| dt|trf|tj	kr;t
|dkr;t|d |d d| j|< q
t
|dkrUt|d |d |d |d d	| j|< q
td
|ddt
| d| dt|tsutdt| d|| j|< q
d S )NzKeys must be strings not r2      r   r	   r-   r.         )r-   r.   r/   r1   zUnexpected tuple for rz	, it has z elements: zValues must be TensorData not )calibration_methodrR   r7   
isinstancestr	TypeErrorr<   tupleCalibrationMethodMinMaxlenr*   )r?   r^   rR   rA   rB   r   r   r   rC   g   s"   

&"
zTensorsData.__init__c                 c   s    | j E d H  d S NrR   rG   r   r   r   __iter__y   s   zTensorsData.__iter__c                 C   s
   || j v S rf   rg   r?   keyr   r   r   __contains__|      
zTensorsData.__contains__c                 C   s
   | j | S rf   rg   ri   r   r   r   __getitem__   rl   zTensorsData.__getitem__c                 C   s(   || j vrtd|d|| j |< d S )Nz)Only an existing tensor can be modified, z is not.)rR   RuntimeError)r?   rj   valuer   r   r   __setitem__   s   
zTensorsData.__setitem__c                 C   
   | j  S rf   )rR   r5   rG   r   r   r   r5      rl   zTensorsData.keysc                 C   rq   rf   )rR   valuesrG   r   r   r   rr      rl   zTensorsData.valuesc                 C   rq   rf   )rR   r7   rG   r   r   r   r7      rl   zTensorsData.itemsc                 C   s   | j j| j| jd}|S )N)rN   rR   r^   )rO   rP   rR   r^   rQ   r   r   r   rS      s
   zTensorsData.to_dictN)rP   rT   rU   dictr`   r*   rb   rC   rh   rk   rm   rp   r5   rr   r7   rS   r   r   r   r   rX   f   s    rX   c                   @   s   e Zd ZdZdZdZdZdS )rc   r   r	   rY   r\   N)rP   rT   rU   rd   Entropy
PercentileDistributionr   r   r   r   rc      s
    rc   c                   @   sV   e Zd Zedd ZejdefddZdd Z	dd	 Z
d
d ZdedefddZdS )CalibrationDataReaderc                 C   s   t |dr
t|jptS )Nget_next)r;   callablerx   NotImplemented)clssubclassr   r   r   __subclasshook__   s   z&CalibrationDataReader.__subclasshook__r   c                 C      t )z9generate the input data dict for ONNXinferenceSession runNotImplementedErrorrG   r   r   r   rx      s   zCalibrationDataReader.get_nextc                 C   s   | S rf   r   rG   r   r   r   rh         zCalibrationDataReader.__iter__c                 C   s   |   }|d u r
t|S rf   )rx   StopIteration)r?   resultr   r   r   __next__   s   zCalibrationDataReader.__next__c                 C   r~   rf   r   rG   r   r   r   __len__   r   zCalibrationDataReader.__len__start_index	end_indexc                 C   r~   rf   r   )r?   r   r   r   r   r   	set_range   r   zCalibrationDataReader.set_rangeN)rP   rT   rU   classmethodr}   abcabstractmethodrs   rx   rh   r   r   intr   r   r   r   r   rw      s    
rw   )	metaclassc                   @   s   e Zd Z					ddeeB dee dB fddZdgfd	d
Zdd Zde	fddZ
dd Zdd ZdefddZdefddZdS )CalibraterBaseNaugmented_model.onnxF
model_pathop_types_to_calibratec                 C   sn   t |trtt|| _nt |trt|| _ntd|| _|| _|| _|| _	|| _
d| _d| _dg| _dS )a  
        :param model_path: ONNX model to calibrate. It should be a model file path
        :param op_types_to_calibrate: operator types to calibrate. By default, calibrate all the float32/float16 tensors.
        :param augmented_model_path: save augmented model to this path.
        :param symmetric: make range of tensor symmetric (central point is 0).
        :param use_external_data_format: use external data format to store model which size is >= 2Gb.
        :param per_channel: whether to compute ranges per each channel.
        z model_path should be model path.NCPUExecutionProvider)r_   r`   r   r   modelr9   r   augmented_model_path	symmetricuse_external_data_formatper_channelaugment_modelinfer_sessionexecution_providers)r?   r   r   r   r   r   r   r   r   r   rC      s   

zCalibraterBase.__init__r   c                 C   s   || _ |   dS )zz
        reset the execution providers to execute the collect_data. It triggers to re-creating inference session.
        N)r   create_inference_session)r?   r   r   r   r   set_execution_providers   s   z&CalibraterBase.set_execution_providersc                 C   s,   t  }t jj|_t j| j|| jd| _dS )z9
        create an OnnxRuntime InferenceSession.
        )sess_options	providersN)	onnxruntimeSessionOptionsGraphOptimizationLevelORT_DISABLE_ALLgraph_optimization_levelInferenceSessionr   r   r   )r?   r   r   r   r   r      s   
z'CalibraterBase.create_inference_sessionr   c           	      C   s   dd |j jD }|dd |j jD  |dd |j jD  dd |j jD }t }tjtj	h}|j j
D ]4}| jrB|j| jv rkt|j|jD ] }||v rj|| }|jdrj|jjj|v rj||vrj|| qJq7||fS )z
        select input/output tensors of candidate nodes to calibrate.
        returns:
            tensors (set): set of tensor name.
            value_infos (dict): tensor name to value info.
        c                 S      i | ]}|j |qS r   namerL   vir   r   r   rM          z>CalibraterBase.select_tensors_to_calibrate.<locals>.<dictcomp>c                 S   r   r   r   )rL   otr   r   r   rM      r   c                 S   r   r   r   )rL   itr   r   r   rM      r   c                 S      h | ]}|j qS r   r   )rL   initr   r   r   	<setcomp>       z=CalibraterBase.select_tensors_to_calibrate.<locals>.<setcomp>tensor_type)graph
value_infoupdateoutputinputinitializersetr   FLOATFLOAT16noder   op_type	itertoolschainr<   HasFieldr   	elem_typeadd)	r?   r   value_infosr   tensors_to_calibratetensor_type_to_calibrater   tensor_namer   r   r   r   select_tensors_to_calibrate   s$   

z*CalibraterBase.select_tensors_to_calibratec                 C      | j S )zP
        return: augmented onnx model. Call after calling augment_graph
        )r   rG   r   r   r   get_augment_model  s   z CalibraterBase.get_augment_modelc                 C   r~   )z
        abstract method: augment the input model to prepare for collecting data. It will:
            1. augment the model to be able to collect desired statistics data
            2. save augmented model to augmented_model_paths
        r   rG   r   r   r   augment_graph  s   zCalibraterBase.augment_graphdata_readerc                 C   r~   )z
        abstract method: collect the tensors that will be used for range computation. It can be called multiple times.
        r   )r?   r   r   r   r   collect_data     zCalibraterBase.collect_datar   c                 C   r~   )ze
        abstract method: compute data based on the calibration method stored in TensorsData
        r   rG   r   r   r   compute_data"  r   zCalibraterBase.compute_data)Nr   FFF)rP   rT   rU   r`   r   r   rC   r   r   r   r   r   r   rw   r   rX   r   r   r   r   r   r      s$    

"r   c                       sx   e Zd Z								ddeeB dee dB f fddZd	d
 Zdd Zde	fddZ
dd ZdefddZ  ZS )MinMaxCalibraterNr   F{Gz?r   r   c
           
         s|   t  j||||||	d g | _d| _t| jjj| _dd | jjjD | _	|| _
|r6|dk s2|dkr6td|| _|| _dS )aw  
        :param model_path: ONNX model to calibrate. It is a model path
        :param op_types_to_calibrate: operator types to calibrate. By default, calibrate all the float32/float16 tensors.
        :param augmented_model_path: save augmented model to this path.
        :param symmetric: make range of tensor symmetric (central point is 0).
        :param use_external_data_format: use external data format to store model which size is >= 2Gb
        :param moving_average: compute the moving average of the minimum and maximum values instead of the global minimum and maximum.
        :param averaging_constant: constant smoothing factor to use when computing the moving average.
        :param max_intermediate_outputs: maximum number of intermediate outputs before an intermediate range is computed.
        :param per_channel: whether to compute ranges per each channel.
        )r   r   r   r   r   Nc                 S   r   r   r   rL   r   r   r   r   r   L  r   z,MinMaxCalibrater.__init__.<locals>.<setcomp>r   r	   z;Invalid averaging constant, which should not be < 0 or > 1.)superrC   intermediate_outputscalibrate_tensors_rangere   r   r   r   num_model_outputsmodel_original_outputsmoving_averager9   averaging_constantmax_intermediate_outputs)
r?   r   r   r   r   r   r   r   r   r   rO   r   r   rC   *  s"   
zMinMaxCalibrater.__init__c                    s    j\}}tt ttjdgtj	d}jj
j| dd   fdd}|D ]}||d ||d q1tjjjjd	 d
S )z
        Adds ReduceMin and ReduceMax nodes to all quantization_candidates op type nodes in
        model and ensures their outputs are stored as part of the graph output
        :return: augmented ONNX model
        r   c                 S   s6   |j D ]}tj| |jr|j  S qtd|  d)Nz&Model does not contain a version for 'z'.)opset_importonnxdefshasdomainversionrn   )r   r   r   r   r   r   get_op_version^  s
   

z6MinMaxCalibrater.augment_graph.<locals>.get_op_versionc                    s  d}| d | }|d }t jj|| g|g||d}t jjd|g|g|d}dd jjjD }|d	d jjjD  |d
d jjjD  | |v rV||  j	j
j}ntd| djrt||  j	j
jj}	dgtd|	}
 |jdk r|jtd|
 n tt }ttj|
tjd|}|j| jjj| jjj||g jjjt ||d g d S )Nr	   __Reshape)r    r   Reshape)inputsoutputsr   c                 S   r   r   r   r   r   r   r   rM   w  r   zNMinMaxCalibrater.augment_graph.<locals>.add_reduce_min_max.<locals>.<dictcomp>c                 S   r   r   r   )rL   or   r   r   rM   x  r   c                 S   r   r   r   rL   ir   r   r   rM   y  r   z'Unable to guess tensor type for tensor zE, running shape inference before quantization may resolve this issue.r   rY      axesr   )!r   r   	make_noder   r   r   r   r   r   r<   r   r   r9   r   re   r   dimrange	attributeappendmake_attributer`   uuiduuid4r   
from_arrayr   arrayint64r   r   extendmake_tensor_value_info)r   reduce_op_namer    reduce_outputintermediate_outputreduce_nodereshape_noder   	onnx_typetensor_rankreduced_axesreduce_axes_namereduce_axesr   reshape_shape_namer?   r   r   add_reduce_min_maxd  s>   
 z:MinMaxCalibrater.augment_graph.<locals>.add_reduce_min_max	ReduceMin	ReduceMaxsave_as_external_dataN)r   r   r`   r   r   r   r   r   r   r   r   r   r   r   saver   r   )r?   tensorsr   reshape_shaper  tensorr   r  r   r   S  s   .

zMinMaxCalibrater.augment_graphc                 C   
   g | _ d S rf   r   rG   r   r   r   clear_collected_data  rl   z%MinMaxCalibrater.clear_collected_datar   c                 C   s   	 |  }|sn| j| jd | | jd ur$t| j| jkr$|   qt| jdkr5| jd u r5t	d| 
 }t|tsHtdt| d|   d S )NTr   No data is collected.z+compute_data must return a TensorsData not r2   )rx   r   r   r   runr   re   r  r   r9   r   r_   rX   ra   r<   )r?   r   r   tr   r   r   r     s   

zMinMaxCalibrater.collect_datac                 C   s   |s|S |  D ]o\}}t|tr|jd }|jd }n|\}}t|| tr6|| jd }|| jd }n|| \}}| jrR|| j||   }	|| j||   }
n
t||}	t||}
t|tsht|| trqt|	|
d||< q|	|
f||< q|S )Nr   r	   rZ   )r7   r_   r*   rH   r   r   minmax)r?   	old_range	new_rangerj   ro   old_minold_maxnew_minnew_max	min_value	max_valuer   r   r   merge_range  s(   



zMinMaxCalibrater.merge_ranger   c                    s  t jdkr
jS fddtt jd D fddjD }i |D ]}| D ]\}}|g | q.q(jd   fddtdt  dD }fdd	D }g }tdt  dD ]X}jrt	j
| |  dd
}	t	j
| |d   dd
}
nt	j| |  dd
}	t	j| |d   dd
}
jrt	jt	|	t	|
gdd
}|| |f qg||	|
f qgttjtt||dd}jrۈj|_jS |_jS )z
        Compute the min-max range of tensor
        :return: dictionary mapping: {added node names: (ReduceMin, ReduceMax) pairs }
        r   c                    s   g | ]
} j  | jqS r   )r   get_outputsr   r   rG   r   r   
<listcomp>  s    z1MinMaxCalibrater.compute_data.<locals>.<listcomp>c                       g | ]}t t |d dqS Fstrictrs   ziprL   r   output_namesr   r   r         Nc                    s   g | ]} |  d d qS )r   r   )
rpartitionr   )added_output_namesr   r   r     s    rY   c                    s    i | ]}|j vr| | qS r   )r   r   )merged_output_dictr?   r   r   rM     s    z1MinMaxCalibrater.compute_data.<locals>.<dictcomp>r!   r	   Fr#  )re   r   r   r   r7   
setdefaultr   r   r   r   nanmeannanminnanmaxr   absrX   rc   rd   rs   r&  r  )r?   output_dicts_listdrA   rB   calibrate_tensor_namesmerged_added_output_dictpairsr   min_value_arraymax_value_arraymax_absolute_valuenew_calibrate_tensors_ranger   )r,  r-  r)  r?   r   r     sJ    

zMinMaxCalibrater.compute_data)Nr   FFFr   NF)rP   rT   rU   r`   r   r   rC   r   r  rw   r   r  rX   r   __classcell__r   r   r   r   r   )  s&    
)I!r   c                       sr   e Zd Z									dd	eeB d
ee dB f fddZdd Zdd Zde	fddZ
defddZ  ZS )HistogramCalibraterNr   F
percentile      -X@samer   r   c                    sv   t  j|||||d g | _d| _t| jjj| _dd | jjjD | _	d| _
|| _|| _|| _|	| _d| _|
| _dS )a=  
        :param model_path: ONNX model to calibrate. It is a model path.
        :param op_types_to_calibrate: operator types to calibrate. By default, calibrate all the float32/float16 tensors.
        :param augmented_model_path: save augmented model to this path.
        :param use_external_data_format: use external data format to store model which size is >= 2Gb
        :param method: A string. One of ['entropy', 'percentile'].
        :param symmetric: make range of tensor symmetric (central point is 0).
        :param num_bins: number of bins to create a new histogram for collecting tensor values.
        :param num_quantized_bins: number of quantized bins. Default 128.
        :param percentile: A float number between [0, 100]. Default 99.99.
        :param scenario: see :class:`DistributionCalibrater`
        )r   r   r   r   Nc                 S   r   r   r   r   r   r   r   r   .  r   z/HistogramCalibrater.__init__.<locals>.<setcomp>)r   rC   r   r   re   r   r   r   r   r   	collectormethodnum_binsnum_quantized_binsr>  r   scenario)r?   r   r   r   r   rD  r   rE  rF  r>  rG  r   r   r   rC     s$   
zHistogramCalibrater.__init__c                 C   sV   |  | j\| _}| jD ]}|| jvr| jjj||  qtj| j| j	| j
d dS )z
        make all quantization_candidates op type nodes as part of the graph output.
        :return: augmented ONNX model
        r  N)r   r   r   r   r   r   r   r   r
  r   r   )r?   r   r  r   r   r   r   7  s   


z!HistogramCalibrater.augment_graphc                 C   r  rf   r  rG   r   r   r   r  G  rl   z(HistogramCalibrater.clear_collected_datar   c                    sB  dd j  D }dd j  D 	 | }|sn-j d|}g }t|D ]\}}| |v r<|t| q)|| q)j| qt	jdkrTt
dfd	djD }i  |D ]}	|	 D ]\}
} |
g | qhqb fd
d D }jstjjjjjjd_j|   dS )zy
        Entropy Calibrator collects operators' tensors as well as generates tensor histogram for each operator.
        c                 S   r   r   r   rL   node_argr   r   r   r   N  r   z3HistogramCalibrater.collect_data.<locals>.<setcomp>c                 S   s   g | ]}|j qS r   r   rH  r   r   r   r   O  r   z4HistogramCalibrater.collect_data.<locals>.<listcomp>TNr   r  c                    r!  r"  r%  r'  r(  r   r   r   e  r*  c                    s    i | ]}|j v r| | qS r   )r   r   )merged_dictr?   r   r   rM   o  s     z4HistogramCalibrater.collect_data.<locals>.<dictcomp>)rD  r   rE  rF  r>  rG  )r   
get_inputsr  rx   r  	enumerater   copyr   re   r9   r7   r.  rC  HistogramCollectorrD  r   rE  rF  r>  rG  collectr  )r?   r   input_names_setr   r   fixed_outputsoutput_indexr   r3  r4  rA   rB   clean_merged_dictr   )rJ  r)  r?   r   r   J  sH   
z HistogramCalibrater.collect_datar   c                 C   sh   | j stdt| trtj}nt| trtj}nt| tr"tj	}n
t
dt|  dt|| j  S )z
        Compute the min-max range of tensor
        :return: dictionary mapping: {tensor name: (min value, max value)}
        z9No collector created and can't generate calibration data.zUnknown calibrater z". This method must be overwritten.)rC  r9   r_   EntropyCalibraterrc   rt   PercentileCalibraterru   DistributionCalibraterrv   ra   r<   rX   compute_collection_result)r?   calr   r   r   r   ~  s   


z HistogramCalibrater.compute_data)	Nr   Fr>  Fr?  r@  rA  rB  )rP   rT   rU   r`   r   r   rC   r   r  rw   r   rX   r   r<  r   r   r   r   r=  
  s&    
,4r=  c                       sB   e Zd Z							d
deeB dee dB f fdd	Z  ZS )rT  Nr   Fr)   r?  r   r   c	           	   
          t  j||||||||d dS )a  
        :param model_path: ONNX model to calibrate. It is a model path
        :param op_types_to_calibrate: operator types to calibrate. By default, calibrate all the float32/float16 tensors.
        :param augmented_model_path: save augmented model to this path.
        :param use_external_data_format: use external data format to store model which size is >= 2Gb
        :param method: A string. One of ['entropy', 'percentile', 'distribution'].
        :param symmetric: make range of tensor symmetric (central point is 0).
        :param num_bins: number of bins to create a new histogram for collecting tensor values.
        :param num_quantized_bins: number of quantized bins. Default 128.
        )rD  r   rE  rF  Nr   rC   )	r?   r   r   r   r   rD  r   rE  rF  r   r   r   rC        
zEntropyCalibrater.__init__)Nr   Fr)   Fr?  r?  rP   rT   rU   r`   r   r   rC   r<  r   r   r   r   rT        
rT  c                       sB   e Zd Z							ddeeB dee dB f fd	d
Z  ZS )rU  Nr   Fr>  r@  rA  r   r   c	           	   
      rY  )a  
        :param model_path: ONNX model to calibrate. It is a model path
        :param op_types_to_calibrate: operator types to calibrate. By default, calibrate all the float32/float16 tensors.
        :param augmented_model_path: save augmented model to this path.
        :param use_external_data_format: use external data format to store model which size is >= 2Gb
        :param method: A string. One of ['entropy', 'percentile', 'distribution'].
        :param symmetric: make range of tensor symmetric (central point is 0).
        :param num_quantized_bins: number of quantized bins. Default 128.
        :param percentile: A float number between [0, 100]. Default 99.99.
        )rD  r   rE  r>  NrZ  )	r?   r   r   r   r   rD  r   rE  r>  r   r   r   rC     r[  zPercentileCalibrater.__init__)Nr   Fr>  Fr@  rA  r\  r   r   r   r   rU    r]  rU  c                       s@   e Zd Z						ddeeB dee dB f fd	d
Z  ZS )rV  Nr   Fdistributionr?  rB  r   r   c              	      s   t  j|||||||d dS )a  
        :param model_path: ONNX model to calibrate. It is a model path
        :param op_types_to_calibrate: operator types to calibrate. By default, calibrate all the float32/float16 tensors.
        :param augmented_model_path: save augmented model to this path.
        :param use_external_data_format: use external data format to store model which size is >= 2Gb
        :param method: A string. One of ['entropy', 'percentile', 'distribution'].
        :param symmetric: make range of tensor symmetric (central point is 0).
        :param num_bins: number of bins to create a new histogram for collecting tensor values.
        :param scenario: for float 8 only, if `scenario="same"`,
            the algorithm weights and float 8 follow the same distribution,
            if `scenario="p3"`, it assumes the weights follow
            a gaussian law and float 8 ~ X^3 where X is a gaussian law
        )rD  rE  rG  NrZ  )r?   r   r   r   r   rD  rE  rG  r   r   r   rC     s   
zDistributionCalibrater.__init__)Nr   Fr^  r?  rB  r\  r   r   r   r   rV    s    
rV  c                   @   s,   e Zd ZdZejdd Zejdd ZdS )CalibrationDataCollectorzL
    Base class for collecting data for calibration-based quantization.
    c                 C   r~   )z
        Generate informative data based on given data.
            name_to_arr : dict
                tensor name to NDArray data
        r   r?   name_to_arrr   r   r   rO    s   z CalibrationDataCollector.collectc                 C   r~   )z?
        Get the optimal result among collection data.
        r   rG   r   r   r   rW    s   z2CalibrationDataCollector.compute_collection_resultN)rP   rT   rU   __doc__r   r   rO  rW  r   r   r   r   r_    s    
r_  c                   @   sv   e Zd ZdZdd Zdd Zdd Zdd	 Zd
d Zdd Z	dd Z
dd Zdd ZedddZdd Zdd ZdS )rN  a`  
    Collecting histogram for each tensor. Percentile and Entropy method are supported.

    ref: https://github.com//apache/incubator-mxnet/blob/master/python/mxnet/contrib/quantization.py
    ref: https://docs.nvidia.com/deeplearning/tensorrt/pytorch-quantization-toolkit/docs/_modules/
                 pytorch_quantization/calib/histogram.html
    c                 C   s.   i | _ || _|| _|| _|| _|| _|| _d S rf   )histogram_dictrD  r   rE  rF  r>  rG  )r?   rD  r   rE  rF  r>  rG  r   r   r   rC     s   
zHistogramCollector.__init__c                 C   r   rf   )rc  rG   r   r   r   get_histogram_dict   s   z%HistogramCollector.get_histogram_dictc                 C   sH   t d | jdv r| |S | jdkr | jr| |S | |S td)Nz/Collecting tensor data and making histogram ...>   r)   r^  r>  DOnly 'entropy', 'percentile' or 'distribution' methods are supported)printrD  collect_valuer   collect_absolute_valuer9   r`  r   r   r   rO  #  s   




zHistogramCollector.collectc                 C   sr  |  D ]1\}}t|trB|D ]}t|tjs$J dt| d|qdd |D }t|dks<J d| d|t|}nt|tjsTtdt| d||}|	 }|j
dkrjt|}t|}ntjd|jd}tjd|jd}t|}|| jvrtj|| jd	\}	}
|
|j}
|jtjksJ d
|	|
||f| j|< q| j| }|d }|d }t|dsJ dt| t|dsJ dt| |d }|d }t|}||d kr|d |d  }t|d | || |}t||f}tj||d	\}	}
|
|j}
|	dt|  |7  < |jtjks'J d
|	|
t||t||f| j|< qdS )z5
        Collect histogram on absolute value
        r3   z for tensor=c                 S   r   r   r   )rL   ar   r   r   r   :  r   z<HistogramCollector.collect_absolute_value.<locals>.<setcomp>r	   z6The calibration expects only one element type but got r   r   )r1   zMonly float32 or float16 is supported, every constant must be explicitly typedrY   r\   r   z'old_min should be a numpy array but is r   N)r7   r_   r4   r   ndarrayr<   re   r"   r9   flattensizer0  r1  r   r   absoluterc  	histogramrE  r#   float64r;   arangehstackr  r  )r?   ra  r  data_arrarrdtypesdata_arr_npr  r  r/   r0   old_histogramr  r  old_histold_hist_edges	temp_amaxwidthnew_bin_edgesr   r   r   rh  2  s\   
&





 z)HistogramCollector.collect_absolute_valuec           
      C   s   |  D ]k\}}t|}| }|jdkr!t|}t|}ntjd|jd}tjd|jd}tjt	t
|t
||jd}|| jv rW| j| }| |||||| j|< qtj|| j| |fd\}}	||	|||f| j|< qdS )z1
        Collect histogram on real value
        r   r   r   N)r7   r   r"   rk  rl  r0  r1  r   r   r  r2  rc  merge_histogramrn  rE  )
r?   ra  r  rr  r  r  	thresholdrv  r/   r0   r   r   r   rg  l  s,   





z HistogramCollector.collect_valuec                 C   s  |\}}}}	}
||
kr)t j|t||
 |
fd\}}|| |t||t|	||
fS |
dkrAt j|t|| |fd\}}||7 }n9t|}d|
 | }t||
 | d }|d|  }|| |
 }t j||| |fd\}}||||   |7  < ||t||t|	||fS )Nr|  r   rY   r	   )r   rn  re   r  r  r   )r?   rv  rr  r  r  new_thresholdrw  rx  r  r  old_thresholdnew_histr   r/   r0   old_num_bins
old_stridehalf_increased_binsnew_num_binsr   r   r   r}    s2   
z"HistogramCollector.merge_histogramc                 C   sl   | j r
t| j dkrtdtd| jd | jdkr |  S | jdkr)|  S | jdkr2|  S td)	Nr   z=Histogram has not been collected. Please run collect() first.z0Finding optimal threshold for each tensor using z algorithm ...r)   r>  r^  re  )rc  re   r9   rf  rD  compute_entropycompute_percentilecompute_distributionrG   r   r   r   rW    s   


z,HistogramCollector.compute_collection_resultc                 C   s  | j dk s
| j dkrtd| j}| j }i }tdt|  td| j  tdd|  d| d	 | D ]\}}|d }|d
 }| }t	|| }	| j
rrt|	|d }
tj||
 |jd tj||
 |jdf||< n*d| d }t|	d| }
t|	|}tj|| |jdtj||
 |jdf||< |d }|d }|| d |k r||| d
 f||< || d
 |kr|| d |f||< g || |d d R ||< tjdddv rt|| q8|S )Nr   d   z<Invalid percentile. Must be in range 0 <= percentile <= 100.Number of tensors : Number of histogram bins : zPercentile : (g      Y@,)r	   r   g      i@r   rY   r\   QUANTIZATION_DEBUGr	   1)r>  r9   rc  rf  re   rE  r7   r%   r   cumsumr   searchsortedr   r   osenvirongetr
   )r?   rc  r>  thresholds_dictr  rn  r/   r0   totalcdf	idx_rightpercent_to_cut_one_sideidx_leftr  r  r   r   r   r    sF   

z%HistogramCollector.compute_percentilec                 C   s   | j }| j}i }tdt|  td| j d td| j  | D ]-\}}| ||}|||< g ||d d R ||< tj	dddv rSt
|d |d	  q&|S )
Nr  r  z: (The number may increase depends on the data it collects)zNumber of quantized bins : rY   r  r   r  r	   )rc  rF  rf  re   rE  r7   get_entropy_thresholdr  r  r  r
   )r?   rc  rF  r  r  rn  optimal_thresholdr   r   r   r    s   z"HistogramCollector.compute_entropyr	   c                 C   s  |dkrt d| d|d d |dd   d }|dkrJ| |  |   }| |d   |   |d  d }tj||jdtj||jdfS t||krt|d dkr| ||   |   }| || | d   |   d }tj||jdtj||jdfS t|| }d|t|< d|t|< t|| | }| |  |   }| |d   |   |d  d }tj||jdtj||jdfS )	Nr   zpower=z <= 0 is invalid.r   r	   g      ?rY   r   )	r9   r%   r   r   r   r   r2  isnanisinf)r/   r0   powerrr   r+   r,   factr   r   r   _avg_std  s$   $ $ $ zHistogramCollector._avg_stdc           	   	   C   s:  | j dk r	td| j}i }tdt|  td| j   td| jd | D ]n\}}|d }|d }|jtj	ks@J | jd	krP| j
||dd
\}}n| jdkr`| j
||dd
\}}ntd|jtj	kslJ |jtj	kstJ |jtj	ks|J t||||| | d||< tjdddv rt|| q,|S )Ni   z3Invalid num_bins. Must be in range 512 <= num_bins.r  r  zScenario : r  r   r	   rB  )r  p3gUUUUUU?z,Invalid scenario. Must be in {'same', 'p3'}.)r+   r,   r/   r0   r-   r.   r  r  )rE  r9   rc  rf  re   rG  r7   r   r   ro  r  r*   r  r  r  r  r  r
   )	r?   rc  r  r  rn  r/   r0   avg_coefstd_coefr   r   r   r    s>   





z'HistogramCollector.compute_distributionc                    s  |d }|d }|j }|d }|d }|d j t|| d } fddt|j D }	t||d dD ]}
||
 }t||
 d |}|| || f|	|
| < t||| }| }t|d| }t||d }|d  |7  < |d  |7  < |dk	tj
}tj|tj
d}|j | }t|D ]}|| }|| }t||| ||< q|d  t||| d 7  < tj|j tj
d}t|D ] }|| }|| }t||| }|dkr|| | |||< qt|}t|}|du s|du rtjtj d}n
tjt|| d}|||
| < q5t|}|	| }|d }|d	 }|d |k r2||d f}|d |kr?|d |f}t|d d
sIJ t|d d
sSJ |S )aF  Given a dataset, find the optimal threshold for quantizing it.
        The reference distribution is `q`, and the candidate distribution is `p`.
        `q` is a truncated version of the original distribution.
        Ref: http://on-demand.gputechconf.com/gtc/2017/presentation/s7310-8-bit-inference-with-tensorrt.pdf
        r   r	   rY   c                    s(   g | ]}t jd  dt jd  dfqS )r   r   )r   r   r   r   r   r   r   I  s   ( z<HistogramCollector.get_entropy_threshold.<locals>.<listcomp>Nr   r   r\   r   )rl  r   r   zerosr   r  rM  deepcopyr%   r#   r   r   r   r   r)   argminr;   )r?   rn  rF  r/   r0   rE  zero_bin_indexnum_half_quantized_binkl_divergence
thresholdsr   r   r   sliced_distributionpleft_outliers_countright_outliers_countnonzerosquantized_binsnum_merged_binsindexstartendqnormdivmin_kl_divergence_idxr  r  r  r   r   r   r  ;  sf   

 
z(HistogramCollector.get_entropy_thresholdN)r	   )rP   rT   rU   rb  rC   rd  rO  rh  rg  r}  rW  r  r  staticmethodr  r  r  r   r   r   r   rN    s    	:  .(rN  r   Fr   r   c                 C   sX  d }|t jkr3|dd}|dd}	|dd}
|dd }|dd}t| |||||	|
||d	}na|t jkrV|d	d
}|dd
}|dd}t| ||||||d}n>|t jkry|d	d}|dd}|dd}t| ||||||d}n|t jkr|d	d}|dd}t	| |||||d}|r|
  |r||_|  |S td| )Nr   Fr   r   r   r   r   )r   r   r   r   r   r   rE  r?  rF  )r   r   rE  rF  r@  r>  rA  T)r   r   rE  r>  rG  rB  )r   rE  rG  zUnsupported calibration method )rc   rd   r  r   rt   rT  ru   rU  rv   rV  r   r   r   r9   )r   r   r   calibrate_methodr   r   extra_options
calibratorr   r   r   r   r   rE  rF  r>  rG  r   r   r   create_calibrator  s|   	


	

	r  )Nr   ),r   rM  r   r  r   collections.abcr   enumr   pathlibr   numpyr   r   r   r   r   r   r   quant_utilsr
   r   r   rj  r   floatr   r)   r*   rX   rc   ABCMetarw   r   r   r=  rT  rU  rV  r_  rN  rd   r`   r  r   r   r   r   <module>   sn   
#4n b ""#   
