§
    ‚b¦i`  ã                   ó¾   — d dl Zd dlmZ d dlmZ ddlmZ erddlm	Z	 ddl
mZmZmZmZ dd	lmZ  e¦   «         rd dlZ ej        e¦  «        Z G d
„ de¦  «        ZdS )é    N)ÚTYPE_CHECKING)Úversioné   )ÚHfQuantizeré   )ÚPreTrainedModel)Úis_accelerate_availableÚis_gptqmodel_availableÚis_torch_availableÚlogging)Ú
AwqBackendc                   ó\   ‡ — e Zd ZdZdZˆ fd„Zd„ Zd„ Zdd„Zd	„ Z	d
„ Z
ed„ ¦   «         Zˆ xZS )ÚAwqQuantizerzu
    4-bit quantization for Activation-aware Weight Quantization(AWQ) (https://huggingface.co/papers/2306.00978)
    Tc                 ó<   •—  t          ¦   «         j        |fi |¤Ž d S )N)ÚsuperÚ__init__)ÚselfÚquantization_configÚkwargsÚ	__class__s      €ú`/root/projects/butler/venv/lib/python3.11/site-packages/transformers/quantizers/quantizer_awq.pyr   zAwqQuantizer.__init__+   s)   ø€ Ø‰ŒÔÐ,Ð7Ð7°Ð7Ð7Ð7Ð7Ð7ó    c                 óz   — t          ¦   «         st          d¦  «        ‚t          ¦   «         st          d¦  «        ‚d S )NzaLoading an AWQ quantized model requires gptqmodel. Please install it with `pip install gptqmodel`zMLoading an AWQ quantized model requires accelerate (`pip install accelerate`))r
   ÚImportErrorr	   )r   r   s     r   Úvalidate_environmentz!AwqQuantizer.validate_environment.   sQ   € Ý%Ñ'Ô'ð 	ÝØsñô ð õ 'Ñ(Ô(ð 	oÝÐmÑnÔnÐnð	oð 	or   c                 ó¸  — |t           j        k    rct           j                             ¦   «         st           j                             ¦   «         r't
                               d¦  «         t           j        }nf|t           j        k    rVt           j                             ¦   «         st           j                             ¦   «         rt
                               d¦  «         |S )Nz[`torch.bfloat16` is not supported for AWQ CUDA/XPU kernels yet. Casting to `torch.float16`.zWWe suggest you to set `dtype=torch.float16` for better efficiency on CUDA/XPU with AWQ.)ÚtorchÚbfloat16ÚcudaÚis_availableÚxpuÚloggerÚwarningÚfloat16)r   Údtypes     r   Úupdate_dtypezAwqQuantizer.update_dtype7   s¤   € Ø•E”NÒ"Ð"­¬
×(?Ò(?Ñ(AÔ(AÐ"ÅUÄY×E[ÒE[ÑE]ÔE]Ð"ÝNŠNØmñô ð õ ”MˆEˆEØ•e”mÒ#Ð#­¬×)@Ò)@Ñ)BÔ)BÐ#ÅeÄi×F\ÒF\ÑF^ÔF^Ð#ÝNŠNÐtÑuÔuÐuØˆr   Úmodelr   c                 óô   — ddl m}m} |                      || j        j        |j        d¬¦  «        | _         ||| j        | j        |                     d¦  «        ¬¦  «        } |||j        j	        ¦  «        }d S )Nr   )Úreplace_quantization_scalesÚreplace_with_awq_linearT)Úadd_default_skipsÚ
device_map)r   Úmodules_to_not_convertr,   )
Úintegrationsr)   r*   Úget_modules_to_not_convertr   r-   Ú_keep_in_fp32_modulesÚgetÚconfigÚ
model_type)r   r'   r   r)   r*   s        r   Ú$_process_model_before_weight_loadingz1AwqQuantizer._process_model_before_weight_loadingA   sŸ   € ØWÐWÐWÐWÐWÐWÐWÐWà&*×&EÒ&EØ4Ô+ÔBÀEÔD_Ðswð 'Fñ '
ô '
ˆÔ#ð (Ð'ØØ $Ô 8Ø#'Ô#>Ø—z’z ,Ñ/Ô/ð	
ñ 
ô 
ˆð ,Ð+¨E°5´<Ô3JÑKÔKˆˆˆr   c                 ó@   — ddl m}  ||| j        j        ¬¦  «         d S )Nr   )Úhf_gptqmodel_post_init)Úuse_act_order)Úgptqmodel.utils.modelr6   r   Údesc_act)r   r'   r   r6   s       r   Ú#_process_model_after_weight_loadingz0AwqQuantizer._process_model_after_weight_loadingQ   s6   € Ø@Ð@Ð@Ð@Ð@Ð@àÐ˜u°DÔ4LÔ4UÐVÑVÔVÐVÐVÐVr   c                 ó†   — | j         j        t          j        t          j        fv rt
                               d¦  «         dS dS )Nz7You cannot save an AWQ model that uses Exllama backend!FT)r   Úbackendr   Ú
EXLLAMA_V1Ú
EXLLAMA_V2r"   r#   ©r   s    r   Úis_serializablezAwqQuantizer.is_serializableV   s;   € ØÔ#Ô+µ
Ô0EÅzÔG\Ð/]Ð]Ð]ÝNŠNÐTÑUÔUÐUØ5àˆtr   c                 ó   — t          j        t          j                              d¦  «        ¦  «        t          j        d¦  «        k    S )NÚ	gptqmodelz5.0.0)r   ÚparseÚ	importlibÚmetadatar?   s    r   Úis_trainablezAwqQuantizer.is_trainable]   s3   € åŒ}YÔ/×7Ò7¸ÑDÔDÑEÔEÍÌÐW^ÑI_ÔI_Ò_Ð_r   )r'   r   )Ú__name__Ú
__module__Ú__qualname__Ú__doc__Úrequires_calibrationr   r   r&   r4   r:   r@   ÚpropertyrF   Ú__classcell__)r   s   @r   r   r   #   sÆ   ø€ € € € € ðð ð
  Ðð8ð 8ð 8ð 8ð 8ðoð oð oðð ð ðLð Lð Lð Lð Wð Wð Wð
ð ð ð ð`ð `ñ „Xð`ð `ð `ð `ð `r   r   )Úimportlib.metadatarD   Útypingr   Ú	packagingr   Úbaser   Úmodeling_utilsr   Úutilsr	   r
   r   r   Úutils.quantization_configr   r   Ú
get_loggerrG   r"   r   © r   r   ú<module>rW      s  ðð Ð Ð Ð Ø  Ð  Ð  Ð  Ð  Ð  à Ð Ð Ð Ð Ð à Ð Ð Ð Ð Ð ð ð 1Ø0Ð0Ð0Ð0Ð0Ð0à `Ð `Ð `Ð `Ð `Ð `Ð `Ð `Ð `Ð `Ð `Ð `Ø 2Ð 2Ð 2Ð 2Ð 2Ð 2ð ÐÑÔð Ø€L€L€Là	ˆÔ	˜HÑ	%Ô	%€ð<`ð <`ð <`ð <`ð <`;ñ <`ô <`ð <`ð <`ð <`r   