
    biu	                         d dl Z ddlmZ de j        dede j        fdZ	 	 dd	e j        j        d
e j        de j        de j        de j        dz  dededz  de	e j        df         fdZ
dS )    N   )PagedAttentionCachehidden_statesn_repreturnc                     | j         \  }}}}|dk    r| S | dddddddddf                             |||||          } |                     |||z  ||          S )z
    This is the equivalent of torch.repeat_interleave(x, dim=1, repeats=n_rep). The hidden states go from (batch,
    num_key_value_heads, seqlen, head_dim) to (batch, num_attention_heads, seqlen, head_dim)
       N)shapeexpandreshape)r   r   batchnum_key_value_headsslenhead_dims         _/root/projects/butler/venv/lib/python3.11/site-packages/transformers/integrations/sdpa_paged.py	repeat_kvr      s    
 2?1D.Ehzz!!!!QQQaaa"23::5BUW\^bdlmmM  (;e(CT8TTT            modulequerykeyvalueattention_maskdropoutscalingc           	         |                     dd           }||                    ||| j        |d         |d                   \  }}|                    dd                              d          }|                    dd                              d          }t          | d          r*t          || j                  }t          || j                  }|}	|                                }|                                }|                                }t          j
        j                            ||||	||d	          }
|
                    dd
                                          }
|
d fS )Ncache
read_indexwrite_index)
key_statesvalue_states	layer_idxr   r   r   r	   num_key_value_groupsF)	attn_mask	dropout_pscale	is_causalr   )popupdater"   	transpose	unsqueezehasattrr   r#   
contiguoustorchnn
functionalscaled_dot_product_attention)r   r   r   r   r   r   r   kwargsr   causal_maskattn_outputs              r   sdpa_attention_paged_forwardr5      so    )/

7D(A(AE\\&l+}- " 
 

U mmAq!!++A..1%%//22 v-.. >V899%!<== !K E
..

CE(%BB C 	 	K ''1--88::Kr   )r   N)r.   $generation.continuous_batching.cacher   Tensorintr   r/   Modulefloattupler5    r   r   <module>r=      s     F F F F F F	UU\ 	U# 	U%, 	U 	U 	U 	U$  0 0HO0<0 
0 <	0
 L4'0 0 T\0 5<0 0 0 0 0 0r   