
    bi                        d dl Z ddlmZmZmZ ddlmZ  ej        e          Z	 edd          Z
 edd          Z e            Z e            Zd	e j        d
ede j        fdZde j        dz  de j        defdZ	 	 	 dde j        j        de j        de j        de j        de j        dz  dededz  dedz  dee j        df         fdZdS )    N   )is_torch_npu_availableis_torch_xpu_availablelogging)is_torch_greater_or_equalz2.5T)
accept_devz2.8hidden_statesn_repreturnc                     | j         \  }}}}|dk    r| S | dddddddddf                             |||||          } |                     |||z  ||          S )z
    This is the equivalent of torch.repeat_interleave(x, dim=1, repeats=n_rep). The hidden states go from (batch,
    num_key_value_heads, seqlen, head_dim) to (batch, num_attention_heads, seqlen, head_dim)
       N)shapeexpandreshape)r	   r
   batchnum_key_value_headsslenhead_dims         c/root/projects/butler/venv/lib/python3.11/site-packages/transformers/integrations/sdpa_attention.py	repeat_kvr      s    
 2?1D.Ehzz!!!!QQQaaa"23::5BUW\^bdlmmM  (;e(CT8TTT    attention_maskkeyc                 4    t           rt          S t          o| d u S )N)_is_torch_xpu_available#_is_torch_greater_or_equal_than_2_8#_is_torch_greater_or_equal_than_2_5)r   r   s     r   use_gqa_in_sdpar      s"      322.I>T3IIr           modulequeryvaluedropoutscaling	is_causalc                 J   |                     dd          rt                              d           i }	t          | d          r?t	          ||          s+t          || j                  }t          || j                  }nddi}	||nt          | dd          }|j        d         d	k    o|d u o|}t          j
                                        r.t          |t          j                  r|                                }t          rU|S|j        t          j        k    r>t          j        |                                                              |j                  }t          j        j        j        |||f||||d
|	}
|
                    d	d                                          }
|
d fS )Noutput_attentionsFz`sdpa` attention does not support `output_attentions=True`. Please set your attention to `eager` if you want any of these features.num_key_value_groups
enable_gqaTr%   r   r   )	attn_mask	dropout_pscaler%   )getloggerwarning_oncehasattrr   r   r(   getattrr   torchjit
is_tracing
isinstanceTensoritem_is_torch_npu_availabledtypeboollogical_nottodevicenn
functionalscaled_dot_product_attention	transpose
contiguous)r    r!   r   r"   r   r#   r$   r%   kwargssdpa_kwargsattn_outputs              r   sdpa_attention_forwardrF   (   s    zz%u-- 
W	
 	
 	
 Kv-.. /~s33 	/C!<==CeV%@AAEE'.K '2		UY8Z8ZI A"K~'=K)I y %*Y"E"E %NN$$	
  W%.*>%**L*L".~/B/B/D/DEEHHVVN(%B	 !	 	 	 	K ''1--88::Kr   )r   NN)r2   utilsr   r   r   utils.import_utilsr   
get_logger__name__r.   r   r   r   r8   r6   intr   r:   r   r>   ModulefloattuplerF    r   r   <module>rP      s    K K K K K K K K K K : : : : : : 
	H	%	% '@&?RV&W&W&W #&?&?RV&W&W&W #0022 0022 	UU\ 	U# 	U%, 	U 	U 	U 	U	JEL4$7 	Jel 	Jt 	J 	J 	J 	J$  !@ @HO@<@ 
@ <	@
 L4'@ @ T\@ d{@ 5<@ @ @ @ @ @r   