
    bi&c                        d dl mZmZ  e            r
ddlZddlmZ ddlmZ d dlmZ d dl	m
Z
mZ  ej        e          Zg dZed	             Z G d
 de          Z G d de          Z G d de          Zd Zd Zej        dddej        dedej        fdZej        dddej        dedej        fdZ G d dej                  Zd Zd Zd Z d Z!d Z"d  Z#d#d!e$e%         dz  fd"Z&dS )$   )is_torch_availablelogging    N)nn)contextmanager)ConversionOps)get_module_from_nameshould_convert_module)g        g      ?g      ?g      ?g       @g      @g      @g      @g       g      g      g      g       g      g      g      c              #     K   t                      rdd l}t          | |j                  r| j        } n%t          | t
                    r |j        |           } t          | dd           }|dk    r8|j                            |           5  d V  	 d d d            d S # 1 swxY w Y   |dk    rHt          |d          r8|j	                            |           5  d V  	 d d d            d S # 1 swxY w Y   d V  d S )Nr   typecudaxpu)
r   torch
isinstanceTensordevicestrgetattrr   hasattrr   )devr   dev_types      Z/root/projects/butler/venv/lib/python3.11/site-packages/transformers/integrations/mxfp4.py	on_devicer   1   s      c5<(( 	$*CCS!! 	$%,s##C3--v""3''                  u!6!6!!#&&                  
EEEEEs$   BB"BC))C-0C-c                       e Zd Zd Z	 	 	 d	deeej        f         dej        j	        dz  de
e         dz  dedz  deeej        f         f
dZdS )
Mxfp4Quantizec                     || _         d S Nhf_quantizerselfr   s     r   __init__zMxfp4Quantize.__init__H       (    N
input_dictmodelmissing_keysfull_layer_namereturnc                 J   t          |                                          d         \  }}t          |t                    r|d         n|}t	          ||          \  }}t          j        |j                  5  t          |t                    rt          |	                    dd          t                    \  }	}
t          j        j        t          j        j        t          j        j        }}}t          |	|
t                    \  }	}
d|v rdnd}||j        v r|j        |= t#          |||	           t#          || d ||
 | |                                           |                    |            d	|_        i cd d d            S 	 d d d            d S # 1 swxY w Y   d S )
Nr   gate_up_proj	down_proj_precision_configrhs_dataweight_scaleflex_ctxT)tupleitemsr   listr	   r   r   Mxfp4GptOssExpertsquantize_to_mxfp4	transposetriton_kernels_hub
matmul_ogsPrecisionConfigFlexCtx
InFlexDataswizzle_mxfp4_parameterssetattrdiscard_is_hf_initialized)r!   r%   r&   r'   r(   kwargs_valuemoduletriton_weight_tensorr3   r=   r>   r?   projs                  r   convertzMxfp4Quantize.convertK   s%    ))++,,Q/5&ud33>a(@@	\%,'' 	 	&"455 5FuWY[]G^G^`r5s5s2$l&1A&19&1< +5
 6C(,8J6 62$l *8?)J)J~~P[6---*40&:;;;...#OYcYcYeYeHfHfHfggg   $$%9:::,0)9	 	 	 	 	 	 	 		 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	s   6DFFFNNN__name__
__module____qualname__r"   dictr   r   r   r   Moduler7   rK    r$   r   r   r   G   s        ) ) ) )-)-&*) )el*+) x%) 3i$&	)
 t) 
c5<	 ) ) ) ) ) )r$   r   c                       e Zd Zd Z	 	 	 ddeeej        f         dej        j	        dz  dedz  deeej        f         fdZ
dS )	Mxfp4Dequantizec                     || _         d S r   r   r    s     r   r"   zMxfp4Dequantize.__init__x   r#   r$   Nr%   r&   r(   r)   c                 J   d|                                 v r2t          |d         t                    r|d         d         }n|d         }d|                                 v r2t          |d         t                    r|d         d         }n|d         }t          ||          }||iS )N_blocksr   _scales)keysr   r7   dequantize_convertops)	r!   r%   r&   r(   r'   rE   blocksscalesdequantizeds	            r   rK   zMxfp4Dequantize.convert{   s     
))))*Y/66 /#I.q1#I.
))))*Y/66 /#I.q1#I. ,FF;;--r$   rL   )rN   rO   rP   r"   rQ   r   r   r   r   rR   rK   rS   r$   r   rU   rU   w   s        ) ) ) )-&*. .el*+. x%. t	. 
c5<	 . . . . . .r$   rU   c                       e Zd Zd Z	 	 	 d	deeej        f         dej        j	        dz  dedz  de
e         dz  deeej        f         f
dZdS )
Mxfp4Deserializec                     || _         d S r   r   r    s     r   r"   zMxfp4Deserialize.__init__   r#   r$   Nr%   r&   r(   r'   r)   c                    i }d|                                 v r8t          |d         t                    r|d         d         |d<   n|d         |d<   d|                                 v r8t          |d         t                    r|d         d         |d<   n|d         |d<   t          ||          \  }}d|v rdnd}	t	          |d         |d         ||	|d         j        t                     |                    |            d|_        i S )NrX   r   rY   r-   r.   T)	rZ   r   r7   r	   swizzle_mxfp4_convertopsr   r;   rC   rD   )
r!   r%   r&   r(   r'   rE   
param_datarH   rF   rJ   s
             r   rK   zMxfp4Deserialize.convert   s3    

))))*Y/66 >(29(=a(@
9%%(29(=
9%
))))*Y/66 >(29(=a(@
9%%(29(=
9% )@@	!/?!B!B~~ y!y!y!(	
 	
 	
 	1222$(! 	r$   rL   rM   rS   r$   r   r`   r`      s        ) ) ) )-&*)-$ $el*+$ x%$ t	$
 3i$&$ 
c5<	 $ $ $ $ $ $r$   r`   c                     |j         j        j        } ||                     t          j                  t          j        d          \  } }| |fS )N   )axis)numerics_detailsmxfpdowncast_to_mxfp_torchtor   bfloat16uint8)wr;   rj   w_scales       r   r9   r9      sG    /@E\''U^(<(<ekPQRRRJAwg:r$   c                    |j         j        |j         j        |j         j        }}}|j        j        }|j        j        j        }|                    d          \  }}	 | || |          |fi |	}  | ||          |          }| |fS )zE
    Changes the layout of the tensors depending on the hardware
    rf   )mx_axisdtype)tensorFP4convert_layoutwrap_torch_tensortensor_detailslayoutStridedLayout"make_default_matmul_mxfp4_w_layout)
rn   ro   r;   ru   rv   rw   ry   rz   value_layoutvalue_layout_optss
             r   r@   r@      s    
 	!%!0!3 +C
  .5F&5<JM&,&O&OXY&O&Z&Z#L#((#666ZZHYZZAn..w77GGGg:r$   i   rs   rows_per_chunkrs   r   r)   c                P   ddl }|                     t          j                  } |                    t          j                  dz
  }| j        dd         |j        k    s"J d| j        dd         d|j                    t          j        t          || j                  }| j        ^ }}}|	                    |          |z  }	| 
                    |	|          } |
                    |	d          }t          j        |	|d	z  || j                  }
t          d|	|          D ]}t          ||z   |	          }| ||         }|||         }|
||         }|d
z                      t          j                  }||         |ddddd	f<   ~|dz	                      t          j                  }||         |ddddd	f<   ~t          j        |||           ~~~  |
j
        g |||d	z  R  j        g |||z  d	z  R  }
|
                    dd	                                          S )w
    Convert the mxfp4 weights again, dequantizing and makes them compatible with the forward
    pass of GPT_OSS.
    r   N   r+   zblocks.shape[:-1]=z does not match scales.shape=)rs   r   rf   r         )out)mathrk   r   rm   int32shapert   
FP4_VALUESr   prodreshapeemptyrangeminintldexpviewr:   
contiguous)r\   r]   rs   r   r   lutprefix_shapeGB
rows_totalr   r0r1blkexpsubidx_loidx_his                     r   _convert_moe_packed_tensorsr      sI    KKKYYu{##FYYu{##c)F<,,,.dcrc1B.d.dU[Ua.d.d,,,
,zv}
E
E
EC ,\1a<((1,J^^J**F^^J**F
+j!a%uV]
K
K
KCAz>22  n$j11RUmRUm"R%j *++6{AAAqt!tG (uy))6{AAAqt!tG 	C#&&&&cc
3+#+
.|
.Q
.A
.
.
.
3
M\
M1q519
M
M
MC==A))+++r$   c                    	 t          | |||          S # t          j        $ r@ |                     d          } |                    d          }t          | |||          cY S w xY w)r   r~   cpu)r   r   OutOfMemoryErrorrk   )r\   r]   rs   r   s       r   convert_moe_packed_tensorsr     s    g*66Weffff ! g g g5!!5!!*66Weffffffgs    AA$#A$c                   B     e Zd Z fdZdej        dej        fdZ xZS )r8   c           	      h   t                                                       |j        | _        |j        | _        |j        | _        t          j        t          j	        | j        d| j        z  | j        dz  dt          j
                  d          | _        t          j        t          j	        | j        d| j        z  t          j                  d          | _        t          j        t          j	        | j        | j        | j        dz  dft          j
                  d          | _        t          j        t          j	        | j        | j        t          j                  d          | _        d| _        t#          |dd	          | _        d | _        d | _        t#          |dd	          | _        d S )
Nr          rr   Frequires_gradgZd;?swiglu_limitg      @)superr"   num_local_expertsnum_expertsintermediate_sizehidden_sizer   	Parameterr   zerosrm   r-   float32gate_up_proj_biasr.   down_proj_biasalphar   limitgate_up_proj_precision_configdown_proj_precision_config)r!   config	__class__s     r   r"   zMxfp4GptOssExperts.__init__)  s   !3!'!9!-LK(!d.D*DdFVZ\F\^`hmhsttt
 
 

 "$K(!d.D*DEMZZZjo"
 "
 "
 K)4+;T=SWY=Y[]^fkfqrrr
 
 

 !lK($*:%-PPP`e
 
 
 
V^S99
-1**.'V^S99


r$   hidden_statesr)   c                 2   t           j        j        t           j        j        t           j        j        }}}t           j        j        }t          |j                  5   | |d|d          | j        | j	        fd          }	 ||| j
        | j                            t          j                  ||| j        d |	          }
 ||
| j        | j                            t          j                  ||| j        |j                  }d d d            n# 1 swxY w Y   |S )Nswiglu)r   r   r   )gather_indxprecision_configgammasfused_activation)scatter_indxr   r   )r;   r<   FnSpecsFusedActivationr   	swiglu_fnr   r   r   r   r-   r   rk   r   r   r   r.   r   r   	gate_scal)r!   r   routing_data
gather_idxscatter_idxr   r   r<   r   actintermediate_cache1intermediate_cache3s               r   forwardzMxfp4GptOssExperts.forwardG  sb   )1)9)4 #-
 '-7	}+,, 	 	!/''(I?Q"R"RUYU_aeakTlnoppC",*!&))%-88&!%!C!$	# 	# 	# #-*##&&u}55(!%!@#-# # #	 	 	 	 	 	 	 	 	 	 	 	 	 	 	. #"s   B'DDD)rN   rO   rP   r"   r   r   r   __classcell__)r   s   @r   r8   r8   (  s`        : : : : :<#U\ #]b]i # # # # # # # #r$   r8   c                 T   dd l }t          j        j        t          j        j        t          j        j        t          j        j        f\  }}}}t          | j                  5  t          j
                                        }t          |j                            dd                    }d}	| j        d         }
| j        d         }||z  }||z  }|dz   |z  }|
|z  }d } || |          \  }}t          j        |d          }t          j        |d          \  }}t          j        |d|          }|                    d          }t          j        |||dz
            ||         }|                    d                              t          j                  }d	}t          j        ||k     ||          }t          j        |d
                              t          j                  }t          j        |                              t          j                  }t          j        ||k     ||	          }t          j        ||k    ||	          }t          j        ||	k    |	|          }||         }t          j        ||         |	k    |	|          } ||                                |                                          } ||                                |                                          } ||||          }|}d d d            n# 1 swxY w Y    ||||||          ||fS )Nr   
LOCAL_RANK0r+   rf   c                     t          j        |  dd          d d d |f         }|                                }t          j        | |d          }||                                fS )Nrf   T)dimstabler   )r   argsortlongtake_along_dimr   )valsktk_indxtk_vals       r   topkz routing_torch_dist.<locals>.topk  sb    mTEq>>>qqq"1"uEGllnnG)$Q???F7;;==((r$   r   )binsmaxi  T)r   )src_indxdst_indx)osr;   routing
GatherIndxRoutingDataScatterIndxcompute_expt_data_torchr   r   r   distributedget_world_sizer   environgetr   softmaxsortgatherr   histcr   rk   r   wherer   )logitsn_expts_actr   r   r   r   r   
world_sizerankreplace_valuen_tokensn_expts_totn_local_expertslocal_expert_startlocal_expert_endn_gates_padr   	expt_scal	expt_indxsort_indiceshistvar	topk_indx	gate_indxr   r   r   	expt_datahit_expertss                                r   routing_torch_distr  k  sF    III 	"-".".":	EAJ[*A 
6=	!	! 3" 3"&5577
2:>>,4455<?l1o%3!O3 1H7,	) 	) 	)  $tFK88	9M)444	"'*YA">">">	<LA|<<	 %%b))	{9;K!OLLLM_`pMpqNN2&&))%+66	 K	,> >YOO	M)D999<<U[II	M),,//<<	K	,< <iWW	K 2i ?MZZ	K	] :M9UU	i(	K	) 4 E}V_``	 !j)--//IMMOOTTT"{IMMOOimmooVVV++D/;OO	!g3" 3" 3" 3" 3" 3" 3" 3" 3" 3" 3" 3" 3" 3" 3"h ;y$iPPR]_kkks    J!LLLc                 |   dd l m} |                                r,|                                rt	          | d          rt
          }nt          j        j        }|j        d         }|	                    d| j
        j                  }t          j                            || j
        j        | j
        j                  }t#          |j                  5   ||| j
        j                  \  }}}d d d            n# 1 swxY w Y   |                     ||||          }	|		                    |d| j
        j                  }	|	|fS )Nr   
_is_hookedr+   )r   )torch.distributedr   is_availableis_initializedr   r  r;   r   r   r   router
hidden_dimr   
functionallinearweightbiasr   r   top_kexperts)
r!   r   distr   
batch_sizerouter_logitsr   r   r   
routed_outs
             r   mlp_forwardr    s   $$$$$$ 5t2244 5|9T9T 5$$,4$Q'J!))"dk.DEEMM((8JDKL\]]M	='	(	( Z Z07t{GX0Y0Y-j+Z Z Z Z Z Z Z Z Z Z Z Z Z Z Z m\:S^__J##JDK4JKKJ}$$s   C55C9<C9c                    ddl m} |                    d          }|                    d          }|                    d          }	|                    d          }
|                    d          }|                    d          }d	D ]}||v r| ||||||	|
||          }| d
}| d}t          | |                    dd          d         |           t          | |          rt          | |          rt          t          | |          t          | |                    }t          | |t          j	        
                    |                    |                               t          | |           t          | |           d S )Nr   shard_and_distribute_moduler&   empty_paramcasting_dtypeto_contiguousr   device_mesh)r-   r.   rX   rY   .rf   )integrations.tensor_parallelr  r   rB   rsplitr   r   r   r   r   r   rk   delattr)rH   
param_nameparam_valuetarget_devicedq_param_namerE   r  r&   r  r  r  r   r  rJ   blocks_attrscales_attrr^   s                    r   
dequantizer)    s   JJJJJJJJwE**]++KJJ//MJJ//M::fD**]++K- - -:&99!!!	 	 "***K!***KFJ--c155a8+FFFv{++ -0L0L -89U9UW^_egrWsWstteh&8&89V9V&W&WXXX,,,,,,)- -r$   c                 `    t          | |          }t          j                            |          S r   )r   r   r   r   )r\   r]   r^   s      r   r[   r[     s'    ,VV<<K8k***r$   c                    |j         j        |j         j        |j         j        }}}ddlm}	 |                    d          }
|                    d          }|                    d          }|                    d          }|                    d          }|                    d          }d	|v r4|                    d
          d                             d          d         }d|v r4|                    d
          d                             d          d         }| |	|
|||||||           nJt          | |	                    d
d          d         t          j                            |d                     | d}| d}t          | |          }t          | |          }|j        j        dk    r&|j        j        dk    r|                    d          }|dk    r |                    || j        dz  d          }n|                    |d| j        dz            }t          |d|          dk    r:t'          t          d          r#t          j                                        j        nd}|                    |                                          }|                    |                                          }t1          |          5  t3          |                    dd          |                    dd          |          \  }}ddd           n# 1 swxY w Y   |dk    r*t          j        || j        | j        dz  g          |_        n&t          j        || j        | j        g          |_        t          | ||           t          | | d || | |                                           t=          | |           t=          | |           ~dS dS dS )q
    This transforms the weights obtained using `convert_gpt_oss.py` to load them into `Mxfp4GptOssExperts`.
    r   r  r&   r  r  r  r   r  r\   r  r+   rX   r   r]   rY   Nrf   Fr   metar-   r   r   acceleratorr   r,   r/   r0   r2   )r<   r=   r>   r?   r   r  r   splitrB   r!  r   r   r   r   r   r   sizer   r   r   r.  current_acceleratorrk   r   r   r@   r:   Sizer   r   r"  )rH   r#  r$  r%  r;   rE   r=   r>   r?   r  r&   r  r  r  r   r  rJ   r'  r(  r\   r]   local_expertsrI   r3   s                           r   load_and_swizzle_mxfp4r4    s@   
 	%5%-%0 )WO
 KJJJJJJJwE**]++KJJ//MJJ//M::fD**]++K:$$R(..y99!<:$$R(..y99!<##;ZW[]h	
 	
 	
 	
 	
))#q11!4eh6H6Hdi6H6j6jkkk"""K"""KV[))FV[))F}V##(:f(D(DA>!!^^M63Ka3OQSTTFF^^M2v7OST7TUUF=&-88EAALSTY[hLiLiuE-AACCHHouM=))4466=))4466}%% 	 	1>  R((&*:*:2r*B*BDV2 2. ,	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 >!!).]FDVX^XpstXt4u)v)v &&).]FD\^d^p4q)r)r & 	2333&&&OQ[Q[Q]Q]@^@^@^___	
 	
 	
 	$$$$$$FFA $#(D(Ds   
=LLLc                    |j         j        |j         j        |j         j        }}}|                     d          }	t          |d|          dk    r:t          t          d          r#t          j        	                                j
        nd}|                     |                                          } |                    |                                          }|dk    r |                     |	|j        dz  d          } n|                     |	d|j        dz            } t          |d|          dk    rd}t          |          5  t!          |                     d	d          |                    d	d          |          \  }
}d
d
d
           n# 1 swxY w Y   |dk    r*t          j        |	|j        |j        dz  g          |
_        n&t          j        |	|j        |j        g          |
_        ||j        v r|j        |= t-          |||
           t-          || d || | |                                           d
S )r,  r   r   r   r.  r   r-   r   r+   r,   Nr/   r0   r2   )r<   r=   r>   r?   r0  r   r   r   r.  r1  r   rk   r   r   r   r   r@   r:   r2  r   r   rA   rB   )r\   r]   rH   rJ   r%  r;   r=   r>   r?   r3  rI   r3   s               r   rc   rc   ,  s   
 	%5%-%0 )WO KKNNM}fm44==HOPUWdHeHeq)==??DDkqYY}%%0022FYY}%%0022F~v/G!/KRPPr63Kq3PQQ}fm44==	=	!	! 
 
-:R$$f&6&6r2&>&>@R.
 .
*l
 
 
 
 
 
 
 
 
 
 
 
 
 
 

 ~%*Z@RTZTlopTp0q%r%r""%*Z@XZ`Zl0m%n%n" v!!!t$FD.///"""\GGZZ\\<Z<Z<Z[[[    s   =FFFmodules_to_not_convertc                    |j         r| S ddlm}  |d          ad}|                                 D ]\  }}t          ||          s|j        j        dk    r]|j         sVt          j	        d          5  | 
                    |t          | j                             d}ddd           n# 1 swxY w Y   |j        j        d	k    r#|j         sd
dlm}  |t          |          |_        |st"                              d           | S )aD  
    Public method that replaces the expert layers of the given model with mxfp4 quantized layers.

    Args:
        model (`torch.nn.Module`):
            The model to convert, can be any `torch.nn.Module` instance.
        quantization_config (`Mxfp4Config`, defaults to `None`):
            The quantization config object that contains the quantization parameters.
        modules_to_not_convert (`list`, *optional*, defaults to `None`):
            A list of modules to not convert. If a module name is in the list (e.g. `lm_head`), it will not be
            converted.
    rf   )
get_kernelz(kernels-community/gpt-oss-triton-kernelsFGptOssExpertsr-  TN	GptOssMLPr   )
MethodTypezYou are loading your model using mixed-precision FP4 quantization but no linear modules were found in your model. Please double check your model architecture, or submit an issue on github if you think this is a bug.)r)  hub_kernelsr8  r;   named_modulesr
   r   rN   r   r   set_submoduler8   r   typesr;  r  r   loggerwarning)r&   quantization_configr6  r8  has_been_replacedmodule_namerH   r;  s           r   replace_with_mxfp4_linearrE  [  s    % '''''' $$NOO$2244 
= 
=V$[2HII 	$77@S@^7f%% ) )##K1CEL1Q1QRRR$(!) ) ) ) ) ) ) ) ) ) ) ) ) ) ) $33<O<Z3(((((('ZV<<FN 
	
 	
 	
 Ls   2+B))B-	0B-	)NN)'utilsr   r   r   r   
contextlibr   core_model_loadingr   quantizers.quantizers_utilsr	   r
   
get_loggerrN   r@  r   r   r   rU   r`   r9   r@   rl   rs   r   r   r   r   rR   r8   r  r  r)  r[   r4  rc   r7   r   rE  rS   r$   r   <module>rK     s   0 / / / / / / /  LLL % % % % % % . . . . . . U U U U U U U U 
	H	%	%  
( 
 
 
*- - - - -M - - -`. . . . .m . . .8( ( ( ( (} ( ( (X    0 &3, 3, 3, ;	3,
 3, \3, 3, 3, 3,t &g g g ;	g
 g \g g g g2># ># ># ># ># ># ># >#FAl Al AlH% % %(- - -B+ + +
@ @ @F, , ,^) )W[\_W`cgWg ) ) ) ) ) )r$   