
    bi:1                    N   d Z ddlmZ ddlZddlmZ ddlmZ ddlm	Z	 ddl
mZ dd	lmZmZ erdd
lmZ ddlmZ i Ze ed           G d d                                  Z G d d          Z edd          Zd,dZd-d"Zd.d/d'Z ej                    Zd0d(Zd.d)d*d+ZdS )1z
Contains the logic for automatic additional output capture with our forward decorators.
This mostly describe the hooks used and the logic to make capture thread/context safe.
    )annotationsN)
ContextVar)	dataclasswraps)TYPE_CHECKING   )is_torchdynamo_compilingrequires)nn   PreTrainedModel)torch)backendsc                  H    e Zd ZU dZded<   dZded<   dZded	<   dZded
<   dS )OutputRecordera  
    Configuration for recording outputs from a model via hooks.

    Attributes:
        target_class (Type): The class (e.g., nn.Module) to which the hook will be attached.
        index (Optional[int]): If the output is a tuple/list, optionally record only at a specific index.
        layer_name (Optional[str]): Name of the submodule to target (if needed), e.g., "transformer.layer.3.attn".
        class_name (Optional[str]): Name of the class to which the hook will be attached. Could be the suffix of class name in some cases.
    ztype[nn.Module]target_classr   intindexN
str | None
layer_name
class_name)__name__
__module____qualname____doc____annotations__r   r   r        ^/root/projects/butler/venv/lib/python3.11/site-packages/transformers/utils/output_capturing.pyr   r   '   sY           "!!!ENNNN!J!!!!!J!!!!!!r    r   c                  *    e Zd ZdZd Zd Zd Zd ZdS )CompileableContextVara  
    Convenience wrapper around a ContextVar for usage with `torch.compile`.
    This behaves exactly as a `ContextVar`, except when compilation is triggered in which case it behaves as a simple
    global variable. This is useful as `torch.compile` cannot trace the `get` method of `ContextVar`. This however means
    that the access to the underlying variable is not thread-safe when compilation is triggered.
    c                N    t          ||          | _        || _        d| _        d S )NdefaultF)r   context_var
global_var	compiling)selfnamer&   s      r!   __init__zCompileableContextVar.__init__B   s)    %dG<<<!r    c                    | j         r| j        S t                      rd| _        | j        S | j                                        S NT)r)   r(   r
   is_compilingr'   get)r*   s    r!   r0   zCompileableContextVar.getG   sI    > 	.?" ()) .$(!&'++---r    c                r    t                      r|| _        d| _        d S | j                            |          S r.   )r
   r(   r)   r'   set)r*   values     r!   r2   zCompileableContextVar.setS   s:    #%% 	/#DO!DN4#''...r    c                h    | j         rd | _        d| _         d S | j                            |           d S )NF)r)   r(   r'   reset)r*   tokens     r!   r5   zCompileableContextVar.reset[   s;    > 	*"DO"DNNN""5)))))r    N)r   r   r   r   r,   r0   r2   r5   r   r    r!   r#   r#   :   sZ           

. 
. 
./ / /* * * * *r    r#   output_collectorr%   module	nn.Modulekeystrr   r   returnNonec                @    fd}|                      |           dS )zaInstall the forward hook needed to capture the output described by `key` and `index` in `module`.c                   t                                           }||                                vrd S dk    r:t          |                   dk    r!|                             |d                    t          |t                    s|                             |           d S |         #|                             |                    d S d S )Nhidden_statesr   )_active_collectorr0   keyslenappend
isinstancetuple)r8   argsoutputcollected_outputsr   r:   s       r!   output_capturing_hookz;install_output_capuring_hook.<locals>.output_capturing_hookj   s    -1133$3D3I3I3K3K(K(KF/!!c*;C*@&A&AQ&F&Fc"))$q'222&%(( 	9c"))&11111E]&c"))&-88888 '&r    N)register_forward_hook)r8   r:   r   rJ   s    `` r!   install_output_capuring_hookrL   g   s?    9 9 9 9 9 9   !677777r    parent_modulemodule_namecapture_tasks list[tuple[str, OutputRecorder]]c                   ddl m} |                                 D ]B\  }}t          ||          st	          || d| |           ,t          || d|            C|D ]i\  }}|j        t          | |j                  s!|j        A|                    |j                  r'|j	        
|j	        |vrSt          | ||j                   jdS )a  
    Recursively install all output capturing hooks on all submodules of `parent_module`.
    Note that we need to use this recursive approach instead of simply iterating over all modules, because we want
    to respect the `capture_tasks` of all individual submodels (`PreTrainedModel` instances) in the graph. That is, once
    we reach a submodel in the graph, its children should use this submodel's `capture_tasks`, but other parts of the graph
    should not.
    r   r   .)prefixN)modeling_utilsr   named_childrenrE   recursively_install_hooks"install_all_output_capturing_hooksr   r   endswithr   rL   r   )rM   rN   rO   r   r+   r8   r:   specss           r!   rV   rV   {   s     100000 &4466 W Wf&/22 	W%f.E.Et.E.E}UUUU /v>U>Ut>U>UVVVVV $ J J
U*z-I[/\/\*([-A-A%BR-S-S(+0@0S0S(U[IIIJ Jr    modelr   rS   r   c                    t                               t          | j                            pi }g }|                                D ]\  }}t          |t                    s|g}|D ]z}t          |t                    sLd|v rdnd}t          |t                    sdn|}t          |t                    s|nd}	t          |	||          }|                    ||f           {||nd}t          | ||           d| _
        dS )z
    Install the output recording hooks on all the modules in `model`. Tis will take care of correctly dispatching
    the `_can_record_outputs` property of each individual submodels in case of composite models.
    r@   r   r	   N)r   r   r    T)_CAN_RECORD_REGISTRYr0   r;   	__class__itemsrE   listr   rD   rV   !_output_capturing_hooks_installed)
rZ   rS   capture_flagsrO   r:   layer_specsrY   r   r   r   s
             r!   rW   rW      s.    ),,S-A-ABBHbMM)//11 	/ 	/[+t,, 	(&-K  	/ 	/Ee^44 f,33)3E3)?)?JTTU
,6uc,B,BLuu&LZdeee  #u....	/ )VVrFeV];;;.2E+++r    c                    t          | dd          rdS t          5  t          | dd          r	 ddd           dS t          |            ddd           dS # 1 swxY w Y   dS )z
    Check if the model already has output capturing hooks installed, and install them if it is not already the
    case.
    Note that this is thread-safe, in case 2 (or more) threads want to install them concurrently.
    ra   FN)getattr_hook_installation_lockrW   )rZ   s    r!   maybe_install_capturing_hooksrg      s     u95AA 	  2 2 5=uEE 		2 2 2 2 2 2 2 2 	+51112 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2s   AAAAT)tie_last_hidden_statesc               ,    fd}|  ||           S |S )a  
    Decorator to intercept specific layer outputs through hooks. The hooks are installed only once and lazily,
    the first time output capture is requested with the `output_xxx` kwargs/config.
    The implementation is fully context/thread safe, except when using `torch.compile`, as dynamo is unable to trace
    through `ContextVar` methods.

    Args:
        tie_last_hidden_states (`bool`, *optional*, defaults to `True`):
            Whether to overwrite `out.hidden_states[-1]` with the `out.last_hidden_state`.
            This is true for all language models and should be toggled off only if
            `out.hidden_states[-1]` has to be the hidden state before last layer norm, which
            is needed for some vision models (e.g. CLIP, SigLIP)
    c                @     t                      fd            }|S )Nc                                         dt           j        dd                    }t                              t           j                            pi } fd|D             }d|v r-                    dt           j        dd                    |d<   d|v r-                    dt           j        dd                    |d	<   d
 |                                D             }t          |          dk    rt                      t                              |          }	  
 g|R i }t                              |           n# t                              |           w xY w|D ]_}	|	dk    rsnt          |d          r4||	         d d         ||	<   ||	                             |j                   nCt          |d          r3||	         d d         ||	<   ||	                             |j                   t#          ||	                   ||	<   |	dk    rt%          ||	         t&                    r`t          ||	                   dk    rGt#          ||	         dd d                   ||	<   t#          ||	         dd d                   |d|	z   <   -t#          ||	                   ||	<   Gt#          ||	                   ||	<   a|du r|                                }|S )Nreturn_dictTc                x    i | ]6}d |                      d | t          j        d | d                    7S )output_F)r0   re   config).0kkwargsr*   s     r!   
<dictcomp>zHcapture_outputs.<locals>.wrapped_fn.<locals>.wrapper.<locals>.<dictcomp>   s[        !vzz-A--mXYmm]b9c9cdd  r    cross_attentionsoutput_attentionsFoutput_cross_attentionsmask_decoder_attentionsoutput_mask_decoder_attentionsc                F    i | ]\  }}||                     d d          g S )rn   r\   )replace)rp   rq   vs      r!   rs   zHcapture_outputs.<locals>.wrapped_fn.<locals>.wrapper.<locals>.<dictcomp>   s2     g g g$!Qef g9b!9!92 g g gr    r   r@   vision_hidden_stateslast_hidden_state
attentionsr   r	   cross_)popre   ro   r]   r0   r;   r^   r_   rC   rg   rA   r2   r5   hasattrrD   r|   r~   rF   rE   r`   to_tuple)r*   rG   rr   rl   capturable_flagsrecordable_keysrI   output_tokenoutputsr:   funcrh   s   ` `       r!   wrapperz4capture_outputs.<locals>.wrapped_fn.<locals>.wrapper   s]    !**]GDKX\4]4]^^K  477DN8K8KLLRPR    )  O
 "%555=CZZ'>QSX)Y)Y> > 9: ),<<<DJJJ'>QSX)Y)YE E @A !h g/J_J_JaJa g g g$%%))-d333,001BCCL6$t5d555f55 "''5555!''5555 ) A A/))1 Q *@AA Q1B31G1L)#.)#.55g6RSSSS *=>> Q1B31G1L)#.)#.55g6OPPP#():3)?#@#@GCLLL((!"23"7>> E3GWX[G\C]C]abCbCb',->s-CADqD-I'J'J278I#8NqtRSt8T2U2U3//',->s-C'D'D#():3)?#@#@GCLLe##!**,,Ns   (E E+r   )r   r   rh   s   ` r!   
wrapped_fnz#capture_outputs.<locals>.wrapped_fn   s:    	t=	 =	 =	 =	 =	 
=	~ r    r   )r   rh   r   s    ` r!   capture_outputsr      s@    A A A A AF z$r    )r8   r9   r:   r;   r   r   r<   r=   )rM   r9   rN   r;   rO   rP   r<   r=   )N)rZ   r   rS   r   r<   r=   )rZ   r   r<   r=   )r   
__future__r   	threadingcontextvarsr   dataclassesr   	functoolsr   typingr   import_utilsr
   r   r   r   rT   r   r]   r   r#   rA   rL   rV   rW   Lockrf   rg   r   r   r    r!   <module>r      s   
 # " " " " "     " " " " " " ! ! ! ! ! !                   < < < < < < < <  1000000   	:" " " " " " "  ""&* &* &* &* &* &* &* &*T *)*<dKKK 8 8 8 8(J J J J@3 3 3 3 3: )).** 2 2 2 2&T T T T T T T Tr    