
    bi,                     >   d dl mZ d dlZd dlmZ ddlmZ ddlmZm	Z	m
Z
mZ ddlmZ ddlmZ ddlmZmZmZmZ  ej        e          Z G d	 d
ej                  Ze G d d                      Ze G d d                      Ze G d d                      ZdS )    )partialN   )Cache)BaseModelOutputWithPastQuestionAnsweringModelOutput SequenceClassifierOutputWithPastTokenClassifierOutput)	AutoModel)Unpack)TransformersKwargsauto_docstringcan_return_tupleloggingc                   &     e Zd ZdZdZ fdZ xZS )GradientCheckpointingLayera  Base class for layers with gradient checkpointing.

    This class enables gradient checkpointing functionality for a layer. By default, gradient checkpointing is disabled
    (`gradient_checkpointing = False`). When `model.set_gradient_checkpointing()` is called, gradient checkpointing is
    enabled by setting `gradient_checkpointing = True` and assigning a checkpointing function to `_gradient_checkpointing_func`.

    Important:

        When using gradient checkpointing with `use_reentrant=True`, inputs that require gradients (e.g. hidden states)
        must be passed as positional arguments (`*args`) rather than keyword arguments to properly propagate gradients.

        Example:

            ```python
            >>> # Correct - hidden_states passed as positional arg
            >>> out = self.layer(hidden_states, attention_mask=attention_mask)

            >>> # Incorrect - hidden_states passed as keyword arg
            >>> out = self.layer(hidden_states=hidden_states, attention_mask=attention_mask)
            ```
    Fc                    | j         r| j        rd}| j        j        }d| d}d|v r|d         rd|d<   |dz  }d}d|v r|d         d |d<   |dz  }d}d	|v r|d	         d |d	<   |d
z  }d}d|v r|d         d |d<   |dz  }d}|r2|                    d          dz   }t
                              |            | j        t          t                      j
        fi |g|R  S  t                      j
        |i |S )NFz7Caching is incompatible with gradient checkpointing in z	. Setting	use_cachez `use_cache=False`,Tpast_key_valuez `past_key_value=None`,past_key_valuesz `past_key_values=None`,
layer_pastz `layer_past=None`,,.)gradient_checkpointingtraining	__class____name__rstriploggerwarning_once_gradient_checkpointing_funcr   super__call__)selfargskwargsdo_warn
layer_namemessager   s         W/root/projects/butler/venv/lib/python3.11/site-packages/transformers/modeling_layers.pyr"   z#GradientCheckpointingLayer.__call__;   s   &  	a4=  	aG0JePZeeeGf$$)<$&+{#00  6))f5E.F.R+/'(44 F**v6G/H/T,0()55v%%&*>*J'+|$00  -!..--3##G,,,444WUWW=M5X5XQW5X5X`[_````uww0000    )r   
__module____qualname____doc__r   r"   __classcell__r   s   @r)   r   r   "   sJ         , #"1 "1 "1 "1 "1 "1 "1 "1 "1r*   r   c                        e Zd ZdZ fdZee	 	 	 	 	 	 	 ddej        dz  dej	        dz  dej        dz  de
dz  dej        dz  d	ej        dz  d
edz  dee         defd                        Z xZS ) GenericForSequenceClassificationmodelc                 &   t                                          |           |j        | _        t          | | j        t          j        |                     t          j        |j	        | j        d          | _
        |                                  d S )NF)bias)r!   __init__
num_labelssetattrbase_model_prefixr
   from_confignnLinearhidden_sizescore	post_initr#   configr   s     r)   r5   z)GenericForSequenceClassification.__init__d   sz        +d,i.CF.K.KLLLYv14?OOO
 	r*   N	input_idsattention_maskposition_idsr   inputs_embedslabelsr   r%   returnc           	      8    t          | | j                  |f|||||d|}	|	j        }
|                     |
          }||j        d         }n|j        d         }| j        j        |dk    rt          d          | j        j        d}n|}|| j        j        k                        |j	        t          j                  }t          j        |j        d         |j	        t          j                  }||z                      d          }n)d}t                              | j        j         d           |t          j        ||j	                  |f         }d }||                     |||| j        	          }t'          |||	j        |	j        |	j        
          S )NrB   rC   r   rD   r   r   r   z=Cannot handle batch sizes > 1 if no padding token is defined.)devicedtypez will not detect padding tokens in `inputs_embeds`. Results may be unexpected if using padding tokens in conjunction with `inputs_embeds.`)rJ   )logitsrE   pooled_logitsr@   )lossrL   r   hidden_states
attentions)getattrr8   last_hidden_stater=   shaper@   pad_token_id
ValueErrortorJ   torchint32arangeargmaxr   r   r   r   loss_functionr   r   rO   rP   )r#   rA   rB   rC   r   rD   rE   r   r%   transformer_outputsrO   rL   
batch_sizelast_non_pad_tokennon_pad_masktoken_indicesrM   rN   s                     r)   forwardz(GenericForSequenceClassification.forwardn   s    8]wtTE[7\7\8
)%+'8
 8
 8
 8
 ,=M** "+JJ&,Q/J;#+
a\]]];#+!#"%)AAEEfmUZU`aaL!L)<V]Z_ZefffM"/,">!F!Fr!J!J!#>* Z Z Z  
 u|Jv}MMMOaab%%VFR_hlhs%ttD/ /?-;*5
 
 
 	
r*   NNNNNNN)r   r+   r,   r8   r5   r   r   rW   
LongTensorTensorr   FloatTensorboolr   r   r   ra   r.   r/   s   @r)   r1   r1   `   s             .2.204(,26*.!%8
 8
#d*8
 t+8
 &-	8

 8
 (4/8
  4'8
 $;8
 +,8
 
*8
 8
 8
 ^ 8
 8
 8
 8
 8
r*   r1   c                        e Zd ZdZ fdZd Zd Zee	 	 	 	 	 	 	 dde	j
        dz  de	j        dz  de	j
        dz  d	edz  d
e	j        dz  de	j
        dz  de	j
        dz  dee         defd                        Z xZS )GenericForQuestionAnsweringr2   c                     t                                          |           t          | | j        t	          j        |                     t          j        |j        d          | _	        | 
                                 d S )N   )r!   r5   r7   r8   r
   r9   r:   r;   r<   
qa_outputsr>   r?   s     r)   r5   z$GenericForQuestionAnswering.__init__   si       d,i.CF.K.KLLL)F$6:: 	r*   c                 6    t          | | j                  j        S NrQ   r8   embed_tokens)r#   s    r)   get_input_embeddingsz0GenericForQuestionAnswering.get_input_embeddings   s    tT344AAr*   c                 :    |t          | | j                  _        d S rm   rn   )r#   values     r)   set_input_embeddingsz0GenericForQuestionAnswering.set_input_embeddings   s    =Bd,--:::r*   NrA   rB   rC   r   rD   start_positionsend_positionsr%   rF   c                     t          | | j                  |f||||d|}	|	j        }
|                     |
          }|                    dd          \  }}|                    d                                          }|                    d                                          }d }|| | j        ||||fi |}t          ||||	j	        |	j
                  S )N)rB   rC   r   rD   r   rI   )dim)rN   start_logits
end_logitsrO   rP   )rQ   r8   rR   rk   splitsqueeze
contiguousr[   r   rO   rP   )r#   rA   rB   rC   r   rD   rt   ru   r%   outputssequence_outputrL   rx   ry   rN   s                  r)   ra   z#GenericForQuestionAnswering.forward   s    ,Q749O+P+P,
)%+',
 ,
 ,
 ,
 "311#)<<r<#:#: j#++B//::<<''++6688
&=+D%4%lJQ^iibhiiD+%!!/)
 
 
 	
r*   rb   )r   r+   r,   r8   r5   rp   rs   r   r   rW   rc   rd   r   re   r   r   r   ra   r.   r/   s   @r)   rh   rh      s:           B B BC C C  .2.204(,263715%
 %
#d*%
 t+%
 &-	%

 %
 (4/%
 )D0%
 '$.%
 +,%
 
&%
 %
 %
 ^ %
 %
 %
 %
 %
r*   rh   c                        e Zd ZdZ fdZee	 	 	 	 	 	 	 ddej        dz  dej	        dz  dej        dz  de
dz  dej        dz  d	ej        dz  d
edz  dee         defd                        Z xZS )GenericForTokenClassificationr2   c                    t                                          |           |j        | _        t          | | j        t          j        |                     t          |dd           |j        }nt          |dd           |j	        }nd}t          j        |          | _        t          j        |j        |j                  | _        |                                  d S )Nclassifier_dropouthidden_dropoutg?)r!   r5   r6   r7   r8   r
   r9   rQ   r   r   r:   Dropoutdropoutr;   r<   r=   r>   )r#   r@   r   r   s      r)   r5   z&GenericForTokenClassification.__init__   s        +d,i.CF.K.KLLL6/66B!'!:V-t44@!'!6!$z"455Yv163DEE
 	r*   NrA   rB   rC   r   rD   rE   r   r%   rF   c           	      "    t          | | j                  |f|||||d|}	|	j        }
|                     |
          }
|                     |
          }d }||                     ||| j                  }t          |||	j        |	j	                  S )NrH   )rN   rL   rO   rP   )
rQ   r8   rR   r   r=   r[   r@   r	   rO   rP   )r#   rA   rB   rC   r   rD   rE   r   r%   r}   r~   rL   rN   s                r)   ra   z%GenericForTokenClassification.forward   s     ,Q749O+P+P,
)%+',
 ,
 ,
 ,
 "3,,77O,,%%ffdkBBD$!/)	
 
 
 	
r*   rb   )r   r+   r,   r8   r5   r   r   rW   rc   rd   r   re   rf   r   r   r	   ra   r.   r/   s   @r)   r   r      s           "  .2.204(,26*.!%!
 !
#d*!
 t+!
 &-	!

 !
 (4/!
  4'!
 $;!
 +,!
 
!
 !
 !
 ^ !
 !
 !
 !
 !
r*   r   )	functoolsr   rW   torch.nnr:   cache_utilsr   modeling_outputsr   r   r   r	   models.autor
   processing_utilsr   utilsr   r   r   r   
get_loggerr   r   Moduler   r1   rh   r    r*   r)   <module>r      s                                 # " " " " " $ $ $ $ $ $ P P P P P P P P P P P P 
	H	%	%;1 ;1 ;1 ;1 ;1 ;1 ;1 ;1| G
 G
 G
 G
 G
 G
 G
 G
T 9
 9
 9
 9
 9
 9
 9
 9
x 7
 7
 7
 7
 7
 7
 7
 7
 7
 7
r*   