
    bi                        d dl mZ d dlZd dlmZmZ d dlmZ  ej        e	          Z
erd dlmZmZmZ 	 d dlmZ n# e$ r Y nw xY w	 	 	 dddZdS )    )annotationsN)TYPE_CHECKINGLiteral)save_or_push_to_hub_modelCrossEncoderSentenceTransformerSparseEncoder)OptimizationConfigFmodel2SentenceTransformer | SparseEncoder | CrossEncoderoptimization_config4OptimizationConfig | Literal['O1', 'O2', 'O3', 'O4']model_name_or_pathstrpush_to_hubbool	create_prfile_suffix
str | NonereturnNonec                ^   ddl m}m}m} 	 ddlm}	m}
 ddlm} n# t          $ r t          d          w xY wt          | |          o?t          |           o0t          | d         d          ot          | d         j        |	          }t          | |          o?t          |           o0t          | d         d          ot          | d         j        |	          }t          | |          ot          | j        |	          }|s|s|st          d          |s|r| d         j        }n| j        }|
                    |          t          t"                    r4|j        vrt          d          p t'          |                      d
t)          fdd|||d| 	  	         d	S )a  
    Export an optimized ONNX model from a SentenceTransformer, SparseEncoder, or CrossEncoder model.

    The O1-O4 optimization levels are defined by Optimum and are documented here:
    https://huggingface.co/docs/optimum/main/en/onnxruntime/usage_guides/optimization

    The optimization levels are:

    - O1: basic general optimizations.
    - O2: basic and extended general optimizations, transformers-specific fusions.
    - O3: same as O2 with GELU approximation.
    - O4: same as O3 with mixed precision (fp16, GPU-only)

    See the following pages for more information & benchmarks:

    - `Sentence Transformer > Usage > Speeding up Inference <https://sbert.net/docs/sentence_transformer/usage/efficiency.html>`_
    - `Cross Encoder > Usage > Speeding up Inference <https://sbert.net/docs/cross_encoder/usage/efficiency.html>`_

    Args:
        model (SentenceTransformer | SparseEncoder | CrossEncoder): The SentenceTransformer, SparseEncoder,
            or CrossEncoder model to be optimized. Must be loaded with `backend="onnx"`.
        optimization_config (OptimizationConfig | Literal["O1", "O2", "O3", "O4"]): The optimization configuration or level.
        model_name_or_path (str): The path or Hugging Face Hub repository name where the optimized model will be saved.
        push_to_hub (bool, optional): Whether to push the optimized model to the Hugging Face Hub. Defaults to False.
        create_pr (bool, optional): Whether to create a pull request when pushing to the Hugging Face Hub. Defaults to False.
        file_suffix (str | None, optional): The suffix to add to the optimized model file name. Defaults to None.

    Raises:
        ImportError: If the required packages `optimum` and `onnxruntime` are not installed.
        ValueError: If the provided model is not a valid SentenceTransformer, SparseEncoder, or CrossEncoder model loaded with `backend="onnx"`.
        ValueError: If the provided optimization_config is not valid.

    Returns:
        None
    r   r   )ORTModelORTOptimizer)AutoOptimizationConfigzPlease install Optimum and ONNX Runtime to use this function. You can install them with pip: `pip install sentence-transformers[onnx]` or `pip install sentence-transformers[onnx-gpu]`
auto_modelz}The model must be a Transformer-based SentenceTransformer, SparseEncoder, or CrossEncoder model loaded with `backend="onnx"`.z\optimization_config must be an OptimizationConfig instance or one of 'O1', 'O2', 'O3', 'O4'.N	optimizedc                4                         |           S )N)r   )optimize)save_dirr   r   	optimizers    a/root/projects/butler/venv/lib/python3.11/site-packages/sentence_transformers/backend/optimize.py<lambda>z-export_optimized_onnx_model.<locals>.<lambda>o   s    ););<OQYgr);)s)s     export_optimized_onnx_modelonnx)	export_functionexport_function_nameconfigr   r   r   r   backendr   )sentence_transformersr   r	   r
   optimum.onnxruntimer   r   !optimum.onnxruntime.configurationr   ImportError
isinstancelenhasattrr   r   
ValueErrorfrom_pretrainedr   _LEVELSgetattrr   )r   r   r   r   r   r   r   r	   r
   r   r   r   viable_st_modelviable_se_modelviable_ce_model	ort_modelr"   s    `   `          @r#   r&   r&      sq   V WVVVVVVVVV
>>>>>>>>LLLLLLL 
 
 
?
 
 	

 	5-.. 	6JJ	6E!Hl++	6 uQx*H55	  	5-(( 	6JJ	6E!Hl++	6 uQx*H55	  !55[*U[RZ:[:[O 
 
/ 
 L
 
 	
  */ *#Ah1		#k	,,Y77I%s++ U&<&DDDn   "8%8Rg&<>QRRTT!ssssss:"-
 
 
 
 
 
s    8)FFN)r   r   r   r   r   r   r   r   r   r   r   r   r   r   )
__future__r   loggingtypingr   r   #sentence_transformers.backend.utilsr   	getLogger__name__loggerr,   r   r	   r
   r.   r   r/   r&    r%   r#   <module>rC      s    " " " " " "  ) ) ) ) ) ) ) ) I I I I I I		8	$	$ VVVVVVVVVVHHHHHHH    "e e e e e e es   = AA