o
    3Ih3                     @   s  d dl Z d dlZd dlZd dlZd dlmZ 	 dddZdd Zdd Zd	d
 Z	dd Z
			dddZdd Zdd Zdd Zedkr}e Zede d dlmZ eej ejsfejsaJ deeZnejZd dlmZ eeeZeD ]Zee qvdS dS )    N)TensorProtoc                 C   s  t  }|jdddtdd |jdddtdd |jd	d
dtddd |jdddtddd |jddtddd |jddtddd |jddtddd |jddtddd |jddtddd |jddtd d d |jd!dtd d"d |jd#dtd d$d |jd%dd&g d'd(d) |jd*d+dd,d-d. |jdd/ |jd0dtd1d2d |jd3dd,d4d. |jdd5 |jd6dd,d7d. |jdd8 |jd9d:dd,d; |jdd< || S )=Nz-iz--inputFz2Set the input file for reading the profile results)requiredtypehelpz-mz--modelzIonnx model path to run profiling. Required when --input is not specified.z-bz--batch_size   zbatch size of input)r   r   defaultr   z-sz--sequence_length    zsequence length of inputz--past_sequence_lengthzpast sequence length for gpt2z--global_lengthz&number of global tokens for longformerz	--samplesi  z\number of samples to test. Set it large enough to reduce the variance of performance result.z--thresholdg{Gz?zfThreshold of run time ratio among all nodes. Nodes with larger ratio will show in top expensive nodes.z--thread_numznumber of threads to usez--input_ids_namez"input name for input IDs, for bertz--segment_ids_namez$input name for segment IDs, for bertz--input_mask_namez'input name for attention mask, for bertz--dummy_inputsr   )bertgpt2
longformerr   zEType of model inputs. The default will create dummy inputs with ones.)r   r   choicesr   z-gz	--use_gpu
store_truezuse GPU)r   actionr   )use_gpuz
--providercudazExecution provider to usez--basic_optimizationz_Enable only basic graph optimizations. By default, all optimizations are enabled in OnnxRuntime)basic_optimizationz--kernel_time_onlyz.Only include the kernel time and no fence time)kernel_time_onlyz-vz	--verbose)r   r   )verbose)argparseArgumentParseradd_argumentstrintfloatset_defaults
parse_args)argvparser r   a/home/air/sanwanet/gpt-api/venv/lib/python3.10/site-packages/onnxruntime/transformers/profiler.pyparse_arguments   s   		
r!   c                 C   sD   ddl m} || ||| |dd}|D ]}|d |}	q| }
|
S )Nr   )create_onnxruntime_sessionT)enable_all_optimizationnum_threadsenable_profiling)benchmark_helperr"   runend_profiling)onnx_model_pathr   providerr   
thread_num
all_inputsr"   sessioninputs_profile_filer   r   r    run_profile   s   	r1   c                 C   s&   t | dtkrt| | dS d S )Nvalue)r   
WhichOneofr   getattr)dimr   r   r    get_dim_from_type_proto   s   &r6   c                 C   s   dd | j jjD S )Nc                 S   s   g | ]}t |qS r   )r6   ).0dr   r   r    
<listcomp>   s    z-get_shape_from_type_proto.<locals>.<listcomp>)tensor_typeshaper5   )
type_protor   r   r    get_shape_from_type_proto   s   r=   c                    s  i  |   D ]m}t|j}g }t|D ]\}}t|tr!|| qt|dkr+ dS t|dkr7|||d < t|dkrC|||d < |jjj	}	|	t
jt
jt
jfv sTJ |	t
jkr\tjn
|	t
jkrdtjntj}
tj||
d}| |j< q fddt|D }|S )a  Create dummy inputs for ONNX model.

    Args:
        onnx_model (OnnxModel): ONNX model
        batch_size (int): batch size
        sequence_length (int): sequence length
        samples (int): number of samples

    Returns:
        List[Dict]: list of inputs
       Nr   r   dtypec                       g | ]} qS r   r   r7   r/   dummy_inputsr   r    r9          z'create_dummy_inputs.<locals>.<listcomp>)'get_graph_inputs_excluding_initializersr=   r   	enumerate
isinstancer   appendlenr:   	elem_typer   FLOATINT32INT64numpyfloat32int64int32onesnamerange)
onnx_model
batch_sizesequence_lengthsamplesgraph_inputr;   symbol_dimsir5   rK   	data_typedatar,   r   rC   r    create_dummy_inputs   s0   




r_   c                 C   sB   ddl m}m} || |||\}	}
}||||dd|	|
|dd	}|S )a-  Create dummy inputs for BERT model.

    Args:
        onnx_model (OnnxModel): ONNX model
        batch_size (int): batch size
        sequence_length (int): sequence length
        samples (int): number of samples
        input_ids_name (str, optional): Name of graph input for input IDs. Defaults to None.
        segment_ids_name (str, optional): Name of graph input for segment IDs. Defaults to None.
        input_mask_name (str, optional): Name of graph input for attention mask. Defaults to None.

    Returns:
        List[Dict]: list of inputs
    r   )find_bert_inputsgenerate_test_data{   F)
test_casesseedr   	input_idssegment_ids
input_maskrandom_mask_length)bert_test_datar`   ra   )rV   rW   rX   rY   input_ids_namesegment_ids_nameinput_mask_namer`   ra   re   rf   rg   r,   r   r   r    create_bert_inputs   s   rm   c                    s   ||||| d}i  |   D ]V}t|j}t|D ]\}}	t|	tr4|	|vr.td|	 ||	 ||< q|jjj}
|
t	j
t	jt	jfv sFJ |
t	j
krNtjn
|
t	jkrVtjntj}tj||d}| |j< q fddt|D }|S )a  Create dummy inputs for GPT-2 model.

    Args:
        onnx_model (OnnxModel): ONNX model
        batch_size (int): batch size
        sequence_length (int): sequence length
        past_sequence_length (int): past sequence length
        samples (int): number of samples

    Raises:
        RuntimeError: symbolic is not supported. Use the tool convert_to_onnx.py to export ONNX model instead.

    Returns:
        List[Dict]: list of inputs
    )rW   seq_lenpast_seq_lentotal_seq_lensymbol is not supported: r?   c                    rA   r   r   rB   rC   r   r    r9   7  rE   z&create_gpt2_inputs.<locals>.<listcomp>)rF   r=   r   rG   rH   r   RuntimeErrorr:   rK   r   rL   rM   rN   rO   rP   rQ   rR   rS   rT   rU   )rV   rW   rX   past_sequence_lengthrY   symbolsrZ   r;   r\   r5   rK   r]   r^   r,   r   rC   r    create_gpt2_inputs  s0   



ru   c                    s  ||d}i  |   D ]m}t|j}t|D ]\}}	t|	tr0|	|vr*td|	 ||	 ||< q|jjj}
|
t	j
t	jt	jfv sBJ |
t	j
krJtjn
|
t	jkrRtjntj}d|jv rltj||d}d|ddd|f< ntj||d}| |j< q fddt|D }|S )	a  Create dummy inputs for Longformer model.

    Args:
        onnx_model (OnnxModel): ONNX model
        batch_size (int): batch size
        sequence_length (int): sequence length
        global_length (int): number of global tokens
        samples (int): number of samples

    Raises:
        RuntimeError: symbolic is not supported. Use the tool convert_longformer_to_onnx.py to export ONNX model instead.

    Returns:
        List[Dict]: list of inputs
    )rW   rX   rq   globalr?   r   Nc                    rA   r   r   rB   rC   r   r    r9   f  rE   z,create_longformer_inputs.<locals>.<listcomp>)rF   r=   r   rG   rH   r   rr   r:   rK   r   rL   rM   rN   rO   rP   rQ   rR   rT   zerosrS   rU   )rV   rW   rX   global_lengthrY   rt   rZ   r;   r\   r5   rK   r]   r^   r,   r   rC   r    create_longformer_inputs;  s.   





ry   c                 C   s  | j dkr| j ntjdd}dtjvrt|tjd< ddlm} ddlm	} ||| j
}d }| jdkrEt|| j| j| j| j| j| j}n.| jdkrWt|| j| j| j| j}n| jd	krit|| j| j| j| j}n
t|| j| j| j}t| j
| j| j| j| j |}|S )
Nr   F)logicalOMP_NUM_THREADS)load)	OnnxModelr
   r   r   )r+   psutil	cpu_countosenvironr   onnxr|   rV   r}   modelrD   rm   rW   rX   rY   rj   rk   rl   ru   rs   ry   rx   r_   r1   r   r*   r   )argsr$   r|   r}   rV   r,   r0   r   r   r    r'   j  sV   


	
	r'   __main__	Arguments)setup_loggerzMrequires either --model to run profiling or --input to read profiling results)process_results)N)NNN)r   r   rO   r~   r   r   r!   r1   r6   r=   r_   rm   ru   ry   r'   __name__	argumentsprintr&   r   r   inputr   r0   profile_result_processorr   resultsliner   r   r   r    <module>   sD    
	 /
)0/6




