
    bi                        d dl mZ d dlmZ d dlmZmZ ddlmZ ddl	m
Z
mZ  ej        e          Z e            r
d dlZd dlmZ 	 	 	 	 	 dddZ G d de          Z G d de          ZdS )    )annotations)Any)is_torch_availablelogging   )ConversionOps)get_module_from_nameshould_convert_moduleNcuda:0Fmodeltorch.nn.Modulemodules_to_not_convertlist[str] | Nonequant_configdict | Nonecompute_dtypetorch.dtypedevicestrpre_quantizedboolreturnc           
        ddl m} |g }t          |                                           D ]\  }}t	          |t
          j                  s t          ||          s1|                    d          \  }	}
}|	r| 	                    |	          n| } ||s|j
        nd|s|j        nd|s	|j        dund|||d          }t          |||           | S )a  
    Replace nn.Linear modules with empty SINQLinear modules.

    Args:
        model: The model to modify
        modules_to_not_convert: List of module names to skip
        quant_config: SINQ quantization config dict (None for pre-quantized models)
        compute_dtype: Computation dtype for the quantized layers
        device: Device string for the quantized layers
        pre_quantized: Whether loading a pre-quantized checkpoint

    Returns:
        The modified model with SINQLinear modules
    r   )
SINQLinearN.FT)in_featuresout_featuresbiasr   r   r   use_unpack_kernel)sinq.sinqlinear_hfr   listnamed_modules
isinstancennLinearr
   
rpartitionget_submoduler   r   r   setattr)r   r   r   r   r   r   r   	full_namemoduleparent_path_
child_nameparent
sinq_layers                 Y/root/projects/butler/venv/lib/python3.11/site-packages/transformers/integrations/sinq.pyreplace_with_sinq_linearr1       s    , .-----%!#!%"5"5"7"788 0 0	6&"),, 	$Y0FGG 	%.%9%9#%>%>"Q
5@K$$[111eZ2?I**T4AK,,t2?J&+T))U%'"
 
 

 	
J////L    c                  (    e Zd ZdZd Z	 	 	 dddZdS )SinqQuantizea'  
    Param-level ConversionOp for SINQ (from FP weights).

    At load time, for each `Linear.weight` that should be quantized:
      - The SINQLinear module already exists (created in _process_model_before_weight_loading)
      - We just call quantize() on it with the loaded weight tensor
    c                    || _         d S Nhf_quantizerselfr8   s     r0   __init__zSinqQuantize.__init__\       (r2   N
input_dictdict[str, Any]r   torch.nn.Module | Nonefull_layer_name
str | Noner   dict[str, torch.Tensor]c                2   t          t          |                                                    \  }}t          |t                    r|d         n|}t          ||          \  }	}
|	                    |           ||                    |           d|	_        i S )Nr   T)	nextiteritemsr#   r!   r	   quantizediscard_is_hf_initialized)r:   r=   r   r@   missing_keyskwargsr,   valuesweight_tensorr*   tensor_names              r0   convertzSinqQuantize.convert_   s     j..001122	6%/%=%=Iq		625/JJ&&&#  111$(!	r2   )NNNr=   r>   r   r?   r@   rA   r   rB   __name__
__module____qualname____doc__r;   rO    r2   r0   r4   r4   S   sQ         ) ) ) )-&*      r2   r4   c                  &    e Zd ZdZd Z	 	 dddZdS )SinqDeserializea0  
    ConversionOp for loading *pre-quantized* SINQ checkpoints.

    Checkpoint layout (what `SINQLinear.state_dict` produces) is, per module:
        <prefix>.W_q
        <prefix>.bias
        <prefix>.meta

    WeightConverter in the quantizer is configured so that:
      - we group ".W_q", ".meta", ".bias" as input_dict
      - conceptually treat them as belonging to "<prefix>.weight"
      - and call this SinqDeserialize.convert to load the state into the existing SINQLinear.

    The returned dict is {} because we load directly into the module.
    c                    || _         d S r6   r7   r9   s     r0   r;   zSinqDeserialize.__init__   r<   r2   Nr=   r>   r   r?   r@   rA   r   rB   c                .   t          |                                          D ]%\  }}t          |t                     r|d         ||<   &|                    d          }|                    d          }|                    d          }	||Ot	          t          |                                                    }t          |t                     r|d         }||iS t          ||          \  }
}||d}|	|	|d<   |
                    |           d|
_	        i S )Nr   z.W_qz.metaz.bias)W_qmetar   T)
r!   rF   r#   getrD   rE   rL   r	   load_state_dictrI   )r:   r=   r   r@   rK   kvr[   r\   r   r*   r,   states                r0   rO   zSinqDeserialize.convert   s)    ))++,, 	% 	%DAq!T"" % !!
1nnV$$~~g&&~~g&& ;$,T*++--..//A!T"" aD#Q''(@@	 
 
  E&Mu%%%$(!	r2   )NNrP   rQ   rV   r2   r0   rX   rX   v   sN          ) ) ) )-&*	# # # # # # #r2   rX   )NNNr   F)r   r   r   r   r   r   r   r   r   r   r   r   r   r   )
__future__r   typingr   transformers.utilsr   r   core_model_loadingr   quantizers.quantizers_utilsr	   r
   
get_loggerrR   loggertorchtorch.nnr$   r1   r4   rX   rV   r2   r0   <module>rk      s7   # " " " " "       : : : : : : : : . . . . . . U U U U U U U U 
	H	%	% LLL
 04 $!%0 0 0 0 0f         =      F7 7 7 7 7m 7 7 7 7 7r2   