
    ZiW                     4   d dl Z d dlZd dlZd dlZd dlZd dlZd dlZd dlmZm	Z	 d dl
mZ d dlmZ d dlmZ d dlmZ d dlmZ d dlZd dlZd dlZd dlmZ d dlZ ej        e          Z G d	 d
e          Z G d de          Z G d d          Zdej        iZddddddi fdZ d*dZ!d+dZ"d Z#d Z$d Z%d Z&d,dZ'ej(        d fdZ)d Z*d-dZ+de,e-e.ef                  dz  fd Z/ G d! d"e          Z0 G d# d$e0          Z1 G d% d&e0          Z2d.d(Z3d) Z4dS )/    N)ABCabstractmethod)ThreadPoolExecutor)datetime)Enum)sleep)Any)versionc                   $    e Zd ZdZdZdZdZd ZdS )	Precisionfp32fp16int8int4c                     | j         S Nvalueselfs    f/root/projects/butler/venv/lib64/python3.11/site-packages/onnxruntime/transformers/benchmark_helper.py__str__zPrecision.__str__%   
    z    N)__name__
__module____qualname__FLOAT32FLOAT16INT8INT4r    r   r   r   r      s7        GGDD    r   r   c                        e Zd ZdZdZdZd ZdS )OptimizerInfono_optby_ort	by_scriptc                     | j         S r   r   r   s    r   r   zOptimizerInfo.__str__0   r   r   N)r   r   r   NOOPTBYORTBYSCRIPTr   r"   r   r   r$   r$   )   s4         EEH    r   r$   c                        e Zd Zd Zd Zd ZdS )ConfigModifierc                     || _         d S r   
num_layers)r   r0   s     r   __init__zConfigModifier.__init__5   s    $r   c                    | j         d S t          |d          r.| j         |_        t                              d| j                     t          |d          r.| j         |_        t                              d| j                     t          |d          r0| j         |_        t                              d| j                     d S d S )Nnum_hidden_layersz6Modifying pytorch model's number of hidden layers to: encoder_layersz7Modifying pytorch model's number of encoder layers to: zdecoder_layers z7Modifying pytorch model's number of decoder layers to: )r0   hasattrr3   loggerinfor4   decoder_layers)r   configs     r   modifyzConfigModifier.modify8   s    ?"F6.// 	d'+F$KKbQUQ`bbccc6+,, 	e$(OF!KKcRVRaccddd6,-- 	e$(OF!KKcRVRaccddddd	e 	er   c                     | j         S r   r/   r   s    r   get_layer_numzConfigModifier.get_layer_numE   s
    r   N)r   r   r   r1   r:   r<   r"   r   r   r-   r-   4   sD        % % %e e e    r   r-   float32TFc	                    t          j                    }	|rt           j        j        |	_        nt           j        j        |	_        |rd|	_        |dk    r)||	_        t          	                    d|	j                    |rd|	_
        nd|	_
        |t          j                    v r|g}
nE|r@|dk    rddg}
n8|dk    rd	dg}
n-|d
k    s|ddg}
n |dk    rg d}
nt          d|           dg}
rfd|
D             }
|r|	                    dd           d }	 t          j        | |	|
          }n0# t          $ r# t                              d|  d|
            Y nw xY w|S )NTr   z%Session option: intra_op_num_threads=   dmlDmlExecutionProviderCPUExecutionProvidermigraphxMIGraphXExecutionProvidercudaCUDAExecutionProvidertensorrt)TensorrtExecutionProviderrG   rC   z)The execution provider is not supported: c                 0    g | ]}|v r
||         fn|S r"   r"   ).0nameprovider_optionss     r   
<listcomp>z.create_onnxruntime_session.<locals>.<listcomp>   s6    ppp^bt?O7O7Od,T233UYpppr   z(mlas.enable_gemm_fastmath_arm64_bfloat161)	providerszFailed to create session for z with providers=)onnxruntimeSessionOptionsGraphOptimizationLevelORT_ENABLE_ALLgraph_optimization_levelORT_ENABLE_BASICenable_profilingintra_op_num_threadsr6   debuglog_severity_levelget_available_providersRuntimeErroradd_session_config_entryInferenceSession	Exception	exception)onnx_model_pathuse_gpuproviderenable_all_optimizationnum_threadsrW   verbose(enable_mlas_gemm_fastmath_arm64_bfloat16rM   sess_optionsrP   sessions           `   r   create_onnxruntime_sessionrj   O   s    -//L d0;0R0a--0;0R0c- -(,%Q,7)`\=^``aaa ,*+''*+';68888J			 -u/1GHII##+&II 8#302HIII##  II U8UUVVV+,	 qppppfoppp	/ _--.XZ]^^^Gg.Xabbb g g geeeZceefffffg Ns   D. .*EEc                     | r"t          j        dt           j                   d S t          j        dt           j                   t          j        d                              t           j                   d S )Nz8[%(filename)s:%(lineno)s - %(funcName)20s()] %(message)s)formatlevelz%(message)stransformers)loggingbasicConfigDEBUGINFO	getLoggersetLevelWARNING)rf   s    r   setup_loggerrv      sx     DM-	
 	
 	
 	
 	
 	

 	=EEEE.))227?CCCCCr   c                    | r3t           j                            |           st          j        |            |r3t           j                            |          st          j        |           |re|dk    r dt	          j                    v s
J d            n?t          t	          j                                                  ddg          r
J d            t          	                    dt          j                    t          	                    dt          j                    t          	                    d	t          j                    t          j        t          j                  t          j        d
          k    sJ t          j        t          j                  t          j        d          k    sJ t          j        t          j                  t          j        d
          k    sJ d S )NrA   rB   zBPlease install onnxruntime-directml package to test GPU inference.rG   rE   zSPlease install onnxruntime-gpu package, or install migraphx, to test GPU inference.zPyTorch Version:zTransformers Version:zOnnxRuntime Version:z1.10.0z4.12.0)ospathexistsmakedirsrQ   r[   set
isdisjointr6   r7   torch__version__rn   r
   parse)	cache_dir
output_dirrb   rc   s       r   prepare_environmentr      s    	22 
I  "'..44  
J 	eu)[-P-R-RRRRT SRRR
 ;>@@AALL(*EF  e ede e e KK65#466777
KKB(@BBCCC
KK@{'>@@AAA =*++w}X/F/FFFFF=122gmH6M6MMMMM=011W]85L5LLLLLLLr   c                    t          |           t          t          |                     z  dz  }t          j        | t          j                  dz  }|d|z  z  }t          |           |dt          j        | d          dz  dt          j        | d          dz  dt          j        | d          dz  d|d|ddS )Ng     @@)dtypez.2fZ   _   c   )
test_timeslatency_variancelatency_90_percentilelatency_95_percentilelatency_99_percentileaverage_latency_msQPS)sumfloatlennumpyvarfloat64
percentile)latency_list
batch_size
latency_msr   
throughputs        r   get_latency_resultr      s    \""U3|+<+<%=%==FJyU]CCCfLv
23J ,''/55$)$4\2$F$F$O!U!U$)$4\2$F$F$O!U!U$)$4\2$F$F$O!U!U!+11""  r   c                 (   t          |ddd          5 }g d}t          j        ||          }|                                 | D ]}|                    |           	 d d d            n# 1 swxY w Y   t
                              d|            d S )Na asciimodenewlineencoding)enginer
   rP   device	precision	optimizer
io_binding
model_nameinputsthreadsr   sequence_lengthcustom_layer_numr   r   r   r   r   r   r   r   
fieldnamesz&Detail results are saved to csv file: )opencsv
DictWriterwriteheaderwriterowr6   r7   )resultscsv_filenamecsv_filecolumn_names
csv_writerresults         r   output_detailsr      s    	lb7	C	C	C (x
 
 
0 ^HFFF
    	( 	(F''''	(7( ( ( ( ( ( ( ( ( ( ( ( ( ( (< KKGGGHHHHHs   A	A**A.1A.c                 "   t          |ddd          5 }g dg }|j        D ]L}|j        dgk    r|                    d|            '|j        D ]}|                    d| d|            Mt	          j        ||z             }|                                 |j        D ]4}d	D ]-}	|j        D ]!}
d
D ]}|j	        D ]}i }| D ]}|d         |k    r|d         |	k    r|d         |
k    r|d         |k    r|d         |k    rfd|
                                D             }|sD|                    |           |                    t                              |d                     nD ]}||         ||         k    sJ |d         }|d         }|r|d         |d| d| <   |d         |d| <   |r|                    |           #/6	 d d d            n# 1 swxY w Y   t                              d|            d S )Nr   r   r   r   )r   r   r   r   r
   rP   r   r   r   r   r   b_sr   )         )TFr   r   r   r   r   r   c                 $    i | ]\  }}|v 	||S r"   r"   )rK   kvheader_namess      r   
<dictcomp>z"output_summary.<locals>.<dictcomp>  s+    .d.d.d1RSWcRcRcq!RcRcRcr   r   r   r   z'Summary results are saved to csv file: )r   batch_sizessequence_lengthsappendr   r   r   modelsenginesre   itemsupdatedictfromkeysr   r6   r7   )r   r   argsr   
data_namesr   r   r   r   input_countengine_namer   r   rowr   headersr   r   sr   s                      @r   output_summaryr      s(   	lb7	C	C	C 49x
 
 
 
* 	K 	KJ$,,!!"2j"2"23333'+'< K KO%%&I*&I&I&I&IJJJJK ^H
9RSSS
   + 	9 	9J( 9 9#'< 9 9K&7 9 9
'+'7 9 9G"$C*1 T T$*<$8J$F$F(.x(8K(G(G(.x(8K(G(G(.|(<
(J(J(.y(9W(D(D.d.d.d.d.d.d.dG+. %H(+

7(;(;(;(+

4==R3P3P(Q(Q(Q(Q1= )H )HA36q6WQZ3G3G3G3G3G(.|(<A(./@(AA'( %T<BCW<XLLLQLL(9(97=>R7SGGG" 9 * 3 3C 8 8 819999	9149 49 49 49 49 49 49 49 49 49 49 49 49 49 49l KKH,HHIIIIIs   GG''G+.G+c           
         t          |ddd          5 }ddddgt          t          t          |                                                                                               }t          j        ||	          }|                                 | D ]{}t          t          j                              | |         d<   t          j        | |         d<   t          j        | |         d<   || |         d<   |                    | |                    |	 d d d            n# 1 swxY w Y   t                               d
|            d S )Nr   r   r   r   model_filenamer   rn   r~   r   z(Fusion statistics is saved to csv file: )r   listnextitervalueskeysr   r   r   strr   nowrn   r   r~   r   r6   r7   )model_fusion_statisticsr   r   r   r   keys         r   output_fusion_statisticsr   &  s   	lb7	C	C	C >x	

 $t3::<<==>>CCEEFF
 ^HFFF
   * 	> 	>C7:8<>>7J7J#C(4;G;S#C(8494E#C(1=@#C()9: 7 <====	>> > > > > > > > > > > > > > >  KKI<IIJJJJJs   C<DD!$D!c                     i }t          j         fdd|           t          j         fdd|          }|                    |           |                    ddi           |                    t          ||                     |S )Nc                  0                         d            S r   run
ort_inputsort_sessions   r   <lambda>zinference_ort.<locals>.<lambda><  s    +//$
;; r   r   numberrepeatc                  0                         d            S r   r   r   s   r   r   zinference_ort.<locals>.<lambda>=  s    z)J)J r   r   F)timeitr   r   r   )r   r   result_templaterepeat_timesr   warm_up_repeatr   r   s   ``      r   inference_ortr   :  s    F
M;;;;;An]]]]=!J!J!J!J!JST]ijjjL
MM/"""
MM<'(((
MM$\:>>???Mr   c           
          i }                                  |D ]}t          j        ||                                       |	          }t                              t          ||         j                  |
          }                    ||j	        j
        d||j        |                                           t          |          dk    rt          |||	           t          |          D ]\\  }}                    |||         j	        j
        dt"          j        ||         j        ||                                                    ]t'          j         fdd|           t'          j         fdd|          }|                    |           |                    ddi           |                    t-          ||                     |S )Nr   c                  .                                    S r   run_with_iobindingr   r   s   r   r   z/inference_ort_with_io_binding.<locals>.<lambda>q      ..z:: r   r   r   c                  .                                    S r   r   r   s   r   r   z/inference_ort_with_io_binding.<locals>.<lambda>w  r   r   r   T)r   r~   
from_numpytoIO_BINDING_DATA_TYPE_MAPgetr   r   
bind_inputr   typeshapedata_ptrr   allocateOutputBuffers	enumeratebind_outputr   r=   r   r   r   r   )r   r   r   r   ort_output_namesort_outputsoutput_buffersoutput_buffer_max_sizesr   r   	data_typer   r   rL   np_input
input_typeiort_output_namer   r   s   `                  @r   inference_ort_with_io_bindingr  D  s    F ''))J 

 

#Jt$45588@@-11#j6F6L2M2MyYY
O N	
 	
 	
 	
 >an.EvNNN'(899 
 
?1$)MN 1&&((	
 	
 	
 	
 M:::::    =:::::  L
 MM/"""
MM<&'''
MM$\:>>???Mr   c                 x    |D ]6}|                      t          j        |t          j        |                     7d S )N)r   r   )r   r~   emptyr=   )r	  r
  r   r  s       r   r  r    sK     % R Rek!5=PPPQQQQR Rr   {   c                    t          j        |            t          j                             |            t          j        |            t          j                            |            t          j                            |            dS )z5Set random seed manually to get deterministic resultsN)randomseedr   r~   manual_seedrF   manual_seed_all)r  s    r   set_random_seedr    sk    
K	Ld	d	J4   	Jt$$$$$r   returnc            	         ddl m} m}m}m}m}m}m} 	  |             g } |            }t          |t                    sd S t          |          D ]j}	 | ||	                    }
t          |
t                    r d S |                    |	 | ||	                    |
j        |
j        |
j        d           k |             |S # | $ r}t!          d|           Y d }~d S d }~ww xY w)Nr   	NVMLErrornvmlDeviceGetCountnvmlDeviceGetHandleByIndexnvmlDeviceGetMemoryInfonvmlDeviceGetNamenvmlInitnvmlShutdown)idrL   totalfreeused-Error fetching GPU information using nvml: %s)py3nvml.py3nvmlr  r  r  r   r!  r"  r#  
isinstanceintranger   r   r%  r&  r'  print)r  r  r  r   r!  r"  r#  r   device_countr  r7   errors               r   get_gpu_infor0    s                    


))++,,, 	4|$$ 	 	A**+E+Ea+H+HIID$$$ ttMM--.H.H.K.KLL!Z I I     	   =uEEEttttts$   +C ;C >AC C'C""C'c                   Z    e Zd ZddZd Zedeeee	f                  dz  fd            Z
dS )MemoryMonitorTc                     || _         d S r   )keep_measuring)r   r4  s     r   r1   zMemoryMonitor.__init__  s    ,r   c                     dd l }d}	 t          ||                    t          j                                                              j        dz            }t          d           | j        snf|S )Nr   T   {Gzt?)	psutilmaxProcessrx   getpidmemory_inforssr   r4  )r   r8  	max_usages      r   measure_cpu_usagezMemoryMonitor.measure_cpu_usage  sq    		Iv~~bikk'B'B'N'N'P'P'TW^'^__I%LLL& 		
 r   r  Nc                     t                      r   )NotImplementedErrorr   s    r   measure_gpu_usagezMemoryMonitor.measure_gpu_usage  s    !###r   T)r   r   r   r1   r?  r   r   r   r   r	   rB  r"   r   r   r2  r2    sp        - - - -	 	 	 $4S#X#7$#> $ $ $ ^$ $ $r   r2  c                   N     e Zd Zd fd	Zdeeeef                  dz  fdZ xZ	S )CudaMemoryMonitorTc                 J    t                                          |           d S r   )superr1   )r   r4  	__class__s     r   r1   zCudaMemoryMonitor.__init__  s!    (((((r   r  Nc                   
 ddl m}m}mm}mm}m} g g 
	  |              |            }t          |t                    st                              d|            d S d t          |          D             fdt          |          D             
	 t          |          D ]l} | |                    }t          |t                    r t                              d|             d S t          |         |j        dz            |<   mt!          d	           | j        sn |             
fd
t          |          D             S # |$ r&}	t                              d|	           Y d }	~	d S d }	~	ww xY w)Nr   r  z*nvmlDeviceGetCount result is not integer: c                     g | ]}d S r   r"   rK   r  s     r   rN   z7CudaMemoryMonitor.measure_gpu_usage.<locals>.<listcomp>  s    <<<1Q<<<r   c                 8    g | ]}  |                    S r"   r"   )rK   r  r  r!  s     r   rN   z7CudaMemoryMonitor.measure_gpu_usage.<locals>.<listcomp>  s1    fffQ))*D*DQ*G*GHHfffr   Tz%nvmlDeviceGetMemoryInfo returns str: r6  r7  c                 4    g | ]}||         |         d S )	device_idrL   max_used_MBr"   rK   r  gpu_namemax_gpu_usages     r   rN   z7CudaMemoryMonitor.measure_gpu_usage.<locals>.<listcomp>  sD        	 "#$QK#0#3   r   r(  )r)  r  r  r  r   r!  r"  r#  r*  r+  r6   r/  r,  r   r9  r'  r   r4  )r   r  r  r   r"  r#  r.  r  r7   r/  rS  rT  r  r!  s             @@@@r   rB  z#CudaMemoryMonitor.measure_gpu_usage  s6   	
 	
 	
 	
 	
 	
 	
 	
 	
 	
 	
 	
 	
 	
 	
 	
 	
 	
 	HJJJ--//LlC00 X,XXYYYt<<l(;(;<<<MfffffRWXdReRefffH	|,, R RA223M3Ma3P3PQQD!$,, $%ST%S%STTT#tt'*=+;TY=P'Q'QM!$$e* 	 LNNN     |,,     	 	 	LLH%PPP44444	s&   AE $BE 4AE E?E::E?rC  )
r   r   r   r1   r   r   r   r	   rB  __classcell__rH  s   @r   rE  rE    sj        ) ) ) ) ) )+4S#X#7$#> + + + + + + + +r   rE  c                   ,     e Zd Zd fd	Zd Zd Z xZS )RocmMemoryMonitorTc                 `   t                                          |           d}t          j                            |          r-|t
          j        vrt
          j                            |           	 dd l}|| _        | j                                         d S # t          $ r d | _        Y d S w xY w)Nz/opt/rocm/libexec/rocm_smir   )
rG  r1   rx   ry   rz   sysr   rocm_smiinitializeRsmiImportError)r   r4  rocm_smi_pathr[  rH  s       r   r1   zRocmMemoryMonitor.__init__  s    (((47>>-(( 	/CH,,...	!OOO$DMM((***** 	! 	! 	! DMMMM	!s   2$B B-,B-c                 b    | j         dS | j                             |d          d         dz  dz  S )Nr>   VRAMr   i   )r[  
getMemInfo)r   devs     r   get_used_memoryz!RocmMemoryMonitor.get_used_memory  s6    = 2}''V44Q7$>EEr   c                    | j         d S | j         &t          | j                                                   nd}d t          |          D             d t          |          D             	 t          |          D ].}t	          |         |                     |                    |<   /t          j        d           | j        sn[fdt          |          D             S )Nr   c                     g | ]}d S rK  r"   rL  s     r   rN   z7RocmMemoryMonitor.measure_gpu_usage.<locals>.<listcomp>  s    888q888r   c                     g | ]}d | S )GPUr"   rL  s     r   rN   z7RocmMemoryMonitor.measure_gpu_usage.<locals>.<listcomp>  s    ;;;!I!II;;;r   Tr7  c                 4    g | ]}||         |         d S rO  r"   rR  s     r   rN   z7RocmMemoryMonitor.measure_gpu_usage.<locals>.<listcomp>"  sD     
 
 
 	  ,Q/ 
 
 
r   )	r[  r   listDevicesr,  r9  rc  timer   r4  )r   r.  r  rS  rT  s      @@r   rB  z#RocmMemoryMonitor.measure_gpu_usage  s   = 4;?=;Ts4=4466777Z[88E,$7$7888;;u\':':;;;	<(( R R#&}Q'79M9Ma9P9P#Q#Qa  Ju& 	
 
 
 
 
 <((
 
 
 	
r   rC  )r   r   r   r1   rc  rB  rU  rV  s   @r   rX  rX    s^        ! ! ! ! ! !F F F

 
 
 
 
 
 
r   rX  rF   c                 l   d }|dk    rt           }nt          } |d          }| r||}n|                                }|d S ||S t                      5 } |            }|                    |j                  }	 |                    |          }	|	                                }
d|_        |                                }n # d|_        |                                }w xY w|	 d d d            d S t                              d| d|            t          |          dk    rt          |          dk    rpt          |          t          |          k    rPd}t          |          D ]0\  }}|d         }||         d         }||z
  }t          ||          }1|cd d d            S d d d            n# 1 swxY w Y   d S ||}n|                                }||S t                      5 } |            }|                    |j                  }	 |                    |          }	|	                                }
d|_        |                                }n # d|_        |                                }w xY wt                              d|d	d
|d	d           ||z
  cd d d            S # 1 swxY w Y   d S )NrocmFzGPU memory usage: before=z  peak=r   r   rQ  zCPU memory usage: before=z.1fz
 MB, peak=z MB)rX  rE  rB  r   submitr   r4  r6   r7   r   r  r9  r?  )is_gpufuncmonitor_typestart_memorymemory_monitor_typemonitormemory_before_testexecutor
mem_thread	fn_thread_r>  max_usedr  memory_beforebeforeafterr'  s                     r   measure_memoryr}  ,  s   v//!!%((G "#!-!(!:!:!<!<%4<%%!! 	 X))++G!)BCCJ0$OOD11	$$&&).&&--//		 */&&--//	//// 	  	  	  	  	  	  	  	  KKZ4FZZyZZ[[[%&&!++I!0C0CL^H_H_cfgpcqcqHqHq(12D(E(E 3 3$A}*=9F%aL7E 6>D"8T22HH-	  	  	  	  	  	  	  	  	  	  	  	  	  	  	  	  	  	  	  	  	  	  	 . t )$6688|!!			 .%%''__W%>??
	, --I  ""A%*G""))++II &+G""))++I++++d0BdddR[ddddeee--. . . . . . . . . . . . . . . . . .s[   %F2;)C $F2 CF20B)F22F69F6*%J))I9J)I22*J))J-0J-c                  l    g d} d}| D ]*}t          j        |          }||r|dz  }|| d| z  }+|S )N)ORT_DISABLE_FUSED_ATTENTION!ORT_ENABLE_FUSED_CAUSAL_ATTENTION!ORT_DISABLE_FUSED_CROSS_ATTENTIONORT_DISABLE_TRT_FLASH_ATTENTION&ORT_DISABLE_MEMORY_EFFICIENT_ATTENTIONORT_TRANSFORMER_OPTIONSORT_CUDA_GEMM_OPTIONSr   ,=)rx   getenv)	env_namesenvrL   r   s       r   get_ort_environment_variablesr  p  sn      I C ! !	$= 	3JC$     Jr   rC  r   rK  )r  )rF   N)5r   ro   rx   r  rZ  rj  r   abcr   r   concurrent.futuresr   r   enumr   r   typingr	   r   r~   rn   	packagingr
   rQ   rs   r   r6   r   r$   r-   r=   r   rj   rv   r   r   r   r   r   r   longlongr  r  r  r   r   r   r0  r2  rE  rX  r}  r  r"   r   r   <module>r     ss   


  				  



   # # # # # # # # 1 1 1 1 1 1                                        		8	$	$           D          , u}   -2A A A AHD D D DM M M M8   I I ID7J 7J 7JtK K K(   * n: : : :zR R R% % % %#d4S>*T1 # # # #L$ $ $ $ $C $ $ $(/ / / / / / / /d(
 (
 (
 (
 (
 (
 (
 (
VA. A. A. A.H    r   