
    bi                         d dl Z d dlmZ d dlmZ ddlmZ erddlmZ ddl	m
Z
mZmZmZ dd	lmZmZ  e            rd dlZ ej        e          Z G d
 de          ZdS )    N)TYPE_CHECKING)version   )HfQuantizer   )PreTrainedModel)is_gptqmodel_availableis_optimum_availableis_torch_availablelogging)
GPTQConfigQuantizationConfigMixinc                   r     e Zd ZdZdZdef fdZd Zdd	Zd
 Z	ddZ
ddZedefd            Zd Z xZS )GptqHfQuantizerz
    Quantizer of the GPTQ method - for GPTQ the quantizer support calibration of the model through
    the GPT-QModel package (Python import name `gptqmodel`). Quantization is done under the hood for users if they
    load a non-prequantized model.
    Fquantization_configc                      t                      j        |fi | t                      st          d          ddlm} |                    | j                                                  | _	        d S )NGLoading a GPTQ quantized model requires optimum (`pip install optimum`)r   )GPTQQuantizer)
super__init__r
   ImportErroroptimum.gptqr   	from_dictr   to_dict_optimumoptimum_quantizer)selfr   kwargsr   	__class__s       a/root/projects/butler/venv/lib/python3.11/site-packages/transformers/quantizers/quantizer_gptq.pyr   zGptqHfQuantizer.__init__,   s~    ,77777#%% 	ighhh......!.!8!89Q9a9a9c9c!d!d    c                 N   t                      st          d          t                      }|s-t          j                                        st          d          t                      st          d          t                      rt          j        t          j
                            d                    t          j        d          k     sGt          j        t          j
                            d                    t          j        d          k     rt          d          d S d S )	Nr   z2GPU is required to quantize or run quantize model.zTLoading a GPTQ quantized model requires gptqmodel (`pip install gptqmodel`) library.	gptqmodelz1.4.3optimum1.23.99zJThe gptqmodel version should be >= 1.4.3, optimum version should >= 1.24.0)r
   r   r	   torchcudais_availableRuntimeErrorr   parse	importlibmetadata)r   argsr   gptq_supports_cpus       r   validate_environmentz$GptqHfQuantizer.validate_environment5   s
   #%% 	ighhh244  	l)@)@)B)B 	lSTTT')) 	ltuuu#%% 	lM),44[AABBW]SZE[E[[[}Y/77	BBCCgmT]F^F^^^jkkk		l 	l^^r    dtypetorch.dtypereturnc                 Z    |t           j        k    rt                              d           |S )NzLWe suggest you to set `dtype=torch.float16` for better efficiency with GPTQ.)r%   float16loggerinfo)r   r/   s     r   update_dtypezGptqHfQuantizer.update_dtypeD   s'    EM!!KKfgggr    c                 6    |dt          j        d          i}|S )N cpu)r%   device)r   
device_maps     r   update_device_mapz!GptqHfQuantizer.update_device_mapI   s"    el5112Jr    modelr   c                 B   |j         j        dk    rt          d          | j        rxt	          j        t          j                            d                    t	          j        d          k    r| j        	                    |          }d S  | j        j	        |fi |}d S d S )N	input_idsz%We can only quantize pure text model.r#   r$   )
r   main_input_namer(   pre_quantizedr   r)   r*   r+   r   convert_modelr   r=   r   s      r   $_process_model_before_weight_loadingz4GptqHfQuantizer._process_model_before_weight_loadingN   s    ?*k99FGGG 	N}Y/77	BBCCw}U^G_G___.<<UCC<.<UMMfMM	N 	Nr    c                 :   | j         r| j                            |          }d S | j        j        |j        | j        _        | j                            || j        j                   t          j        | j        	                                          |j
        _        d S )N)rA   r   post_init_modelr   	tokenizername_or_pathquantize_modelr   r   to_dictconfigrC   s      r   #_process_model_after_weight_loadingz3GptqHfQuantizer._process_model_after_weight_loadingY   s     	f*::5AAEEE'195:5G(2"11%9Q9[\\\/9/CDDZDbDbDdDd/e/eEL,,,r    c                     dS NT r   s    r   is_trainablezGptqHfQuantizer.is_trainablec   s    tr    c                     dS rN   rO   rP   s    r   is_serializablezGptqHfQuantizer.is_serializableg   s    tr    )r/   r0   r1   r0   )r=   r   )__name__
__module____qualname____doc__requires_calibrationr   r   r.   r6   r<   rD   rL   propertyboolrQ   rS   __classcell__)r   s   @r   r   r   #   s          !e,C e e e e e el l l   
  
	N 	N 	N 	Nf f f f d    X      r    r   )r*   typingr   	packagingr   baser   modeling_utilsr   utilsr	   r
   r   r   utils.quantization_configr   r   r%   
get_loggerrT   r4   r   rO   r    r   <module>rc      s                                1000000 ] ] ] ] ] ] ] ] ] ] ] ] K K K K K K K K  LLL		H	%	%E E E E Ek E E E E Er    