
    bi                         d dl mZ ddlmZ ddlmZ erddlmZ ddlm	Z	m
Z
mZmZ ddlmZ  e            rd d	lZ ej        e          Z G d
 de          Zd	S )    )TYPE_CHECKING   )HfQuantizer)get_module_from_name   )PreTrainedModel)is_accelerate_availableis_optimum_quanto_availableis_torch_availablelogging)QuantoConfigNc                        e Zd ZdZdZdef fdZd Zddded	e	fd
Z
deeeez  f         d	eeeez  f         fdZdddeddd	ef fdZddZed	e	fd            Zd Zd Z xZS )QuantoHfQuantizerz*
    Quantizer for the quanto library
    Fquantization_configc                      t                      j        |fi | ddddd}|                    | j        j        d           | _        d S )Nr   g      ?g      ?)int8float8int4int2)super__init__getr   weightsquantized_param_size)selfr   kwargsmap_to_param_size	__class__s       c/root/projects/butler/venv/lib/python3.11/site-packages/transformers/quantizers/quantizer_quanto.pyr   zQuantoHfQuantizer.__init__-   sc    ,77777	
 
 %6$9$9$:R:Z\`$a$a!!!    c                    t                      st          d          t                      st          d          |                    d          }t	          |t
                    rNt          |          dk    rd|                                v sd|                                v rt          d          | j	        j
        t          d          d S )	NzhLoading an optimum-quanto quantized model requires optimum-quanto library (`pip install optimum-quanto`)z`Loading an optimum-quanto quantized model requires accelerate library (`pip install accelerate`)
device_mapr   cpudiskzYou are attempting to load an model with a device_map that contains a CPU or disk device.This is not supported with quanto when the model is quantized on the fly. Please remove the CPU or disk device from the device_map.zWe don't support quantizing the activations with transformers library.Use quanto library for more complex use cases such as activations quantization, calibration and quantization aware training.)r
   ImportErrorr	   r   
isinstancedictlenvalues
ValueErrorr   activations)r   argsr   r"   s       r   validate_environmentz&QuantoHfQuantizer.validate_environment7   s    *,, 	z   '(( 	r   ZZ--
j$'' 	:""u
0A0A0C0C'C'CvQ[QbQbQdQdGdGd P  
 #/;O   <;r    modelr   
param_namereturnc                 p    ddl m} t          ||          \  }}t          ||          rd|v r|j         S dS )Nr   )QModuleMixinweightF)optimum.quantor2   r   r&   frozen)r   r.   r/   r   r2   moduletensor_names          r   param_needs_quantizationz*QuantoHfQuantizer.param_needs_quantizationN   sT    //////25*EEfl++ 	K0G0G}$$5r    
max_memoryc                 B    d |                                 D             }|S )Nc                      i | ]\  }}||d z  S )g? ).0keyvals      r   
<dictcomp>z7QuantoHfQuantizer.adjust_max_memory.<locals>.<dictcomp>Z   s"    III(#sc3:IIIr    )items)r   r9   s     r   adjust_max_memoryz#QuantoHfQuantizer.adjust_max_memoryY   s'    IIj6F6F6H6HIII
r    paramztorch.Tensorc                     |                      ||          r| j        | j        S t                                          |||          S )z4Return the element size (in bytes) for `param_name`.)r8   r   r   param_element_size)r   r.   r/   rC   r   s       r   rE   z$QuantoHfQuantizer.param_element_size]   sH    ((
;; 	-@Y@e,,ww))%UCCCr    c                     ddl m} |                     || j        j        |j                  | _         ||| j        | j                  }d S )Nr   )replace_with_quanto_layers)modules_to_not_convertr   )integrationsrG   get_modules_to_not_convertr   rH   _keep_in_fp32_modules)r   r.   r   rG   s       r   $_process_model_before_weight_loadingz6QuantoHfQuantizer._process_model_before_weight_loadingd   sg    ======&*&E&E4+BED_'
 '
# +*$*E[_[s
 
 
r    c                     dS )NTr<   r   s    r   is_trainablezQuantoHfQuantizer.is_trainableo   s    tr    c                     dS )NFr<   rN   s    r   is_serializablez!QuantoHfQuantizer.is_serializables   s    ur    c                 $    ddl m}  ||           S )Nr   )QuantoQuantize)integrations.quantorS   )r   rS   s     r   get_quantize_opsz"QuantoHfQuantizer.get_quantize_opsv   s$    888888~d###r    )r.   r   )__name__
__module____qualname____doc__requires_calibrationr   r   r-   strboolr8   r'   intrB   floatrE   rL   propertyrO   rQ   rU   __classcell__)r   s   @r   r   r   &   st         !bL b b b b b b  .	.? 	S 	_c 	 	 	 	DcCi,@ T#sUXy.EY    D(9 Ds DSa Dfk D D D D D D	
 	
 	
 	
 d    X  $ $ $ $ $ $ $r    r   )typingr   baser   quantizers_utilsr   modeling_utilsr   utilsr	   r
   r   r   utils.quantization_configr   torch
get_loggerrV   loggerr   r<   r    r   <module>rj      s   !                 2 2 2 2 2 2  1000000            5 4 4 4 4 4  LLL		H	%	%S$ S$ S$ S$ S$ S$ S$ S$ S$ S$r    