
    bi/                        d dl Zd dlZd dlZd dlZd dlmZ d dlmZ d dl	m
Z
mZ  e
            rddlmZ ddlmZ  e            rn ej        ej                            d                    Z ej        ej                            d                     ej        d	          k    rd d
lmZ d dlmZ  ej        e          Zdededz  fdZd Zd Z G d de          Z G d de          ZdS )    N)version)logging)is_torch_availableis_torchao_available   )ConversionOps)get_module_from_nametorchao0.15.0)unflatten_tensor_state_dict)is_metadata_torchaoconfig_namereturnc                     |                                  } t          j        d|           }|r|                    d          S dS )z
    Extract the size digit from strings like "4weight", "8weight".
    Returns the digit as an integer if found, otherwise None.
    z
(\d)weight   N)lowerresearchgroup)r   	str_matchs     \/root/projects/butler/venv/lib/python3.11/site-packages/transformers/integrations/torchao.pyfuzzy_match_sizer   *   sE    
 ##%%K	-55I "q!!!4    c                     ddl m} ddlm} t	          | |          r$| j        j         d|                                  dS t	          | |          r,| j        j         d| j         dt          | j	                   dS d S )Nr   )AffineQuantizedTensor)LinearActivationQuantizedTensor()z(activation=	, weight=)
torchao.dtypesr   7torchao.quantization.linear_activation_quantized_tensorr   
isinstance	__class____name___quantization_typeinput_quant_funcoriginal_weight_tensor)weightr   r   s      r   r%   r%   9   s    444444gggggg&/00 M"+LLf.G.G.I.ILLLL&9:: P"+  P  P9P  P  P[mnt  oL  \M  \M  P  P  P  	PP Pr   c                     t          | j                  }|)d| j        j        d          d| j        j        d          dS d| j        j        d          d| j        j        d          d| S )Nzin_features=r   z, out_features=r   z, weight=Noner   )r%   r(   shape)selfr(   s     r   _linear_extra_reprr,   D   sx    ,,F~fdk/2ff4;CTUVCWffffjdk/2jj4;CTUVCWjjbhjjjr   c                       e Zd Zd Z	 	 	 ddeeej        f         dej        j	        dz  dedz  deeej        f         fdZ
dS )	TorchAoQuantizec                     || _         d S Nhf_quantizerr+   r2   s     r   __init__zTorchAoQuantize.__init__M       (r   N
input_dictmodelfull_layer_namer   c                 

   ddl m} t          |                                          d         \  }}t	          |t
                    r|d         n|}t          ||          \  }	}
t          j        	                    ||j
                  |	j        |
<   |                                }t          |	          t          |          k    }| j        j        j        }|r,|r*t#          |j                            d          dd           | j        j                                        t+          j        d          k    rdd	l m} | j        j                                        }t	          ||          r|                    d
d          \  }}d }||j        v r-|                    d          r
J d            |j        |         }n||j        v r-|                    d          r
J d            |j        |         }n|j        D ]p}|                    d          st;          j        |dd          |          r|j        |         } nHt;          j        |dd          |          r|j        |         } nq|j                            dd           }||dk    rQ|r|r|	j         !                                } ||	|d            |"                    |           d|	_#        |r|rd|ini S  |||i          } ||	|d            |"                    |           d|	_#        i S ||iS nz| j        j                                        t+          j        d          k    rEddl m$} | j        j                                        }t	          ||          r|                    d
d          \  }}d }||j        v r-|                    d          r
J d            |j        |         }ng|j        D ]D}|                    d          st;          j        |dd          |          r|j        |         } nE|j                            dd           }|R|r|r|	j         !                                } ||	|d            |"                    |           d|	_#        |r|rd|ini S ||iS |r|r|	j         !                                } ||	| j        j                                                   |"                    |           d|	_#        |r|rd|ini S )Nr   )	quantize_)requires_gradT)decodertie_word_embeddingsFr   )FqnToConfig.r   zre:zHparam fqn should not start with`re:`, which is used for specifying regexzImodule fqn should not start with`re:`, which is used for specifying regex   _defaultr(   c                     dS NT xfqns     r   <lambda>z)TorchAoQuantize.convert.<locals>.<lambda>   s    T r   zlm_head.weight)	filter_fnz0.12.0)ModuleFqnToConfigc                     dS rC   rD   rE   s     r   rH   z)TorchAoQuantize.convert.<locals>.<lambda>   s    $ r   )%torchao.quantizationr:   tupleitemsr"   listr	   torchnn	Parameterr;   _parametersget_input_embeddingsidr2   quantization_configuntie_embedding_weightssetattrconfigget_text_config_get_ao_versionr   Versionr>   get_apply_tensor_subclassrsplitfqn_to_config
startswithmodule_fqn_to_configr   	fullmatchgetr(   clonediscard_is_hf_initializedrJ   )r+   r6   r7   r8   missing_keyskwargsr:   _valuemoduletensor_nameinput_embedis_embedding_paramrW   r>   rY   
module_fqntop_level_param_namecmaybe_module_fqn_patternlm_headcustom_param_fqn_configrJ   s                          r   convertzTorchAoQuantize.convertP   s    	322222))++,,Q/5&ud33>a25/JJ*/(*<*<URWRe*<*f*f;' 0022ZZ2k??:"&"3"G"_" 	^'9 	^EL000>>@UW\]]] 0@@BBgoV^F_F___888888&:TTVVF&+.. .03B3I3I#q3Q3Q0
0"f&:::)44U;;  b   3ODAA6#777)44U;;  c   3J?AA 5;4H N N07BB5II 	"$\*B122*FXX " & ;<T UA!E\*B122*F
SS " & ;<T UA!E"
 #7;;JMM=+x77- <2I <&,m&9&9&;&;G!	&!.A.ACCC$,,_===481>PtUlt 0'::rtt 3>+?SUV>W2X2X/!	&*ATRRRR$,,_===481!	'//].0d 2BBDDX`HaHaaa>>>>>>&:TTVVF&"344 0 / 6 6sA > >
A!<<<)44U;;  c   3J?AA4:4O N N07BB5II "$\*B122*F
SS " & ;<T UA!E"
 #7;;JMM=) 8.E 8"(-"5"5"7"7Ifa3F3FGGGG ((99904F-:LpQhp,g66npp'// 	,"9 	,m))++G	&$+?YY[[\\\_---$(!.@dE\d '**bddr   )NNN)r$   
__module____qualname__r4   dictstrrP   TensorrQ   Moduleru   rD   r   r   r.   r.   L   s        ) ) ) )-&*ye yeel*+ye x%ye t	ye 
c5<	 ye ye ye ye ye yer   r.   c                       e Zd Zd Z	 	 	 	 d	deeej        f         dee         dz  dej	        j
        dz  dedz  deeej        f         f
dZdS )
TorchAoDeserializec                     || _         d S r0   r1   r3   s     r   r4   zTorchAoDeserialize.__init__   r5   r   Nr6   source_patternsr7   r8   r   c           
         t          |                                          d         |v}i }d                    |                    d          dd                   }	|r3t	          |d         t                     r|d         d         }
nx|d         }
no|                                D ]Z}t          ||                   dk    r)t          d| dt          ||                    d	          ||         d         ||	 d| <   [|r||
iS t          j        d
          t          k    rt          | j        j                  st          d          t          || j        j                  \  }}|rJ ||         }t          ||          \  }}t	          |t          j        j                  rt%          j        t(          |          |_        ||iS )a&  
        Consolidates tensor subclass components before reconstructing the object

        For example:
            input_dict: {
                "_weight_qdata": torch.Tensor,
                "_weight_scale": torch.Tensor,
            }
            full_layer_name: "model.layers.0.self_attn.k_proj.weight"

            Given this, we reconstruct a Float8Tensor instance using the qdata and scale
            and return it as a dictionary with the full_layer_name as the key and the recovered
            Float8Tensor instance as the value.
        r   r?   Nr(   r   zExpected a single tensor for z	 but got z tensors insteadr   zOTo use `safetensors` serialization, you should have `torchao>=0.15.0` installed)rO   keysjoinsplitr"   len
ValueErrorr   parseTORCHAO_VERSIONr   r2   metadatar   r	   rP   rQ   Lineartypes
MethodTyper,   
extra_repr)r+   r6   r   r7   r8   rg   rh   is_unsafe_serialization
param_data
layer_namer(   suffixunflattened_state_dictleftover_state_dict	new_paramrk   ri   s                    r   ru   zTorchAoDeserialize.convert   s   . #'z'8'8"9"9!"<O"S
XXo33C88"=>>
" 	M*X.55 .#H-a0#H-$//++ M Mz&)**a//$rrrZX^M_I`I`rrr   8B&7I!7L
j3363344 # 	p#V,,-))_<<ATUYUfUoApAp<nooo6Q)27
 7
3 3 '&&&*?;	(@@	feho.. 	M % 01CV L LF++r   )NNNN)r$   rv   rw   r4   rx   ry   rP   rz   rO   rQ   r{   ru   rD   r   r   r}   r}      s        ) ) ) -1(,&*:, :,el*+:, cT):, x%	:,
 t:, 
c5<	 :, :, :, :, :, :,r   r}   ) importlib.metadata	importlibr   r   rP   	packagingr   transformers.utilsr   transformers.utils.import_utilsr   r   core_model_loadingr   quantizers.quantizers_utilsr	   r   r   r   1torchao.prototype.safetensors.safetensors_supportr   /torchao.prototype.safetensors.safetensors_utilsr   
get_loggerr$   loggerry   r   r%   r,   r.   r}   rD   r   r   <module>r      s	       				         & & & & & & T T T T T T T T  3222222 > > > > > >  X#gmI$6$>$>y$I$IJJOw}Y'//	::;;}w}X?V?VVV	
 	
 	
 	
 	
 	
 	XWWWWW		H	%	%# #*    P P Pk k k}e }e }e }e }em }e }e }e@>, >, >, >, >, >, >, >, >, >,r   