
    bi                         d dl mZ ddlmZ ddlmZ ddlmZ erddlm	Z	 ddl
mZmZmZmZmZ dd	lmZ  e            rd d
lZ ej        e          Z G d de          Zd
S )    )TYPE_CHECKING   )tqdm   )HfQuantizer)get_module_from_name)PreTrainedModel)is_accelerate_availableis_flute_availableis_hadamard_availableis_torch_availablelogging)QuantizationConfigMixinNc                        e Zd ZdZdZdef fdZd Zdd	Z	 	 ddZ	ddZ
edefd            Zd Zd
ddedefdZd Z xZS )HiggsHfQuantizerz
    Quantizer of the HIGGS method. Enables the loading of prequantized models and in-flight quantization of full-precision models.
    Fquantization_configc                 <     t                      j        |fi | d S )N)super__init__)selfr   kwargs	__class__s      b/root/projects/butler/venv/lib/python3.11/site-packages/transformers/quantizers/quantizer_higgs.pyr   zHiggsHfQuantizer.__init__)   s)    ,7777777    c                    t           j                                        st          d          t	                      st          d          t                      st          d          t                      st          d          |t          d          t          |t                    r;d|                                v sd|                                v rt          d          d S d S )	NzNHIGGS quantization is only supported on GPU. Please use a different quantizer.zHUsing `higgs` quantization requires Accelerate: `pip install accelerate`zLUsing `higgs` quantization requires FLUTE: `pip install flute-kernel>=0.3.0`zbUsing `higgs` quantization requires fast_hadamard_transform: `pip install fast_hadamard_transform`zwYou are attempting to load a HIGGS model without setting device_map. Please set device_map comprised of 'cuda' devices.cpudiskzYou are attempting to load a HIGGS model with a device_map that contains a CPU or disk device. This is not supported. Please remove the CPU or disk device from the device_map.)torchcudais_availableNotImplementedErrorr
   ImportErrorr   r   
ValueError
isinstancedictvalues)r   
device_mapr   s      r   validate_environmentz%HiggsHfQuantizer.validate_environment,   s   z&&(( 	x%&vwww&(( 	jhiii!## 	nlmmm$&& 	t   F   
D)) 	
))++++v9J9J9L9L/L/L h  	 	/L/Lr   dtypetorch.dtypereturnc                 l    |t           j        k    r#|t           j        k    rt          d| d          |S )NzInvalid `dtype` zS. HIGGS quantization only supports `dtype=torch.float16` or `dtype=torch.bfloat16`.)r   float16bfloat16r#   )r   r)   s     r   update_dtypezHiggsHfQuantizer.update_dtypeG   sB    EM!!eu~&=&=}5}}}   r   modelr	   c                     ddl m} |                     || j        j        |j                  | _         ||| j        | j                   d S )Nr   )replace_with_higgs_linear)r   modules_to_not_convert)integrationsr2   get_modules_to_not_convertr   r3   _keep_in_fp32_modules)r   r0   r   r2   s       r   $_process_model_before_weight_loadingz5HiggsHfQuantizer._process_model_before_weight_loadingq   ss    
 	=<<<<<&*&E&E4+BED_'
 '
# 	"! $ 8#'#>	
 	
 	
 	
 	
 	
r   c                 j  
 ddl m}m} ddlm} ddlm
 i }
fd|                                D             }t          |	                                dd	          D ]\  }}	|	j
        j        |vr# ||	j
        j        
          ||	j
        j        <   ||	j
        j                 |	_        |                    | j        j        |                   |	_         ||	j
        j        |	j        j        |	j                  \  |	j
        _        |	_        |	j                                        | j        j        |<   d S )Nr   )TuneMetaDatamaybe_tune_and_repack)make_workspace_streamkr   HiggsLinearc                 <    i | ]\  }}t          |          ||S  )r$   ).0namemoduler=   s      r   
<dictcomp>zHHiggsHfQuantizer._process_model_after_weight_loading.<locals>.<dictcomp>   s1    sss,$S]^dfqSrSrsvsssr   zRepacking HIGGS modulesF)descleave)device)weightscalesmetadata)
flute.tuner9   r:   flute.utilsr;   r4   r=   named_modulesr   itemsrG   rF   	workspace	from_dictr   tune_metadatadatarH   to_dict)r   r0   r   r9   r:   r;   flute_workspacesflute_modulesrA   rB   r=   s             @r   #_process_model_after_weight_loadingz4HiggsHfQuantizer._process_model_after_weight_loading   su   BBBBBBBB666666......ssss%:M:M:O:Osss !4!4!6!6=V^cddd 	Z 	ZLD& }#+;;;9O9OW]WdWk9l9l9l !56/0DEF $0#9#9$:R:`ae:f#g#gF 7L7L})})-8 8 84FM 4
 <B;O;W;W;Y;YD$2488	Z 	Zr   c                     dS )NFr?   r   s    r   is_trainablezHiggsHfQuantizer.is_trainable   s    ur   c                     dS )NTr?   rW   s    r   is_serializablez HiggsHfQuantizer.is_serializable   s    tr   
param_namec                 h    ddl m} t          ||          \  }}t          ||          r|dk    rdS dS )Nr   r<   rG   TF)r4   r=   r   r$   )r   r0   r[   r   r=   rB   tensor_names          r   param_needs_quantizationz)HiggsHfQuantizer.param_needs_quantization   sN    ......25*EEfk** 	{h/F/F45r   c                 (    ddl m}  ||          }|S )Nr   )dequantize_higgs)r4   r`   )r   r0   r`   s      r   _dequantizezHiggsHfQuantizer._dequantize   s)    333333  ''r   )r)   r*   r+   r*   )r0   r	   )__name__
__module____qualname____doc__requires_calibrationr   r   r(   r/   r7   rU   propertyboolrX   rZ   strr^   ra   __classcell__)r   s   @r   r   r   "   s         !8,C 8 8 8 8 8 8  6   T
 
 
 
 
"Z Z Z Z2 d    X  .? S _c          r   r   )typingr   utils.loggingr   baser   quantizers_utilsr   modeling_utilsr	   utilsr
   r   r   r   r   utils.quantization_configr   r   
get_loggerrb   loggerr   r?   r   r   <module>rt      s   !                             2 2 2 2 2 2  1000000 s s s s s s s s s s s s s s ? ? ? ? ? ?  LLL		H	%	%N N N N N{ N N N N Nr   