
    bik"                        d dl mZ d dlmZ d dlZd dlmc mZ d dlm	Z	mZ d dl
mZ d dlmZ  G d dej                  ZdS )	    )annotations)IterableN)Tensornn)util)SentenceTransformerc                  R     e Zd Z	 	 	 	 dd fdZddZddZedd            Z xZS )MegaBatchMarginLoss皙?333333?T2   modelr   positive_marginfloatnegative_marginuse_mini_batched_versionboolmini_batch_sizeintreturnNonec                    t                                                       || _        || _        || _        || _        |r| j        n| j        | _        dS )a  
        Given a large batch (like 500 or more examples) of (anchor_i, positive_i) pairs, find for each pair in the batch
        the hardest negative, i.e. find j != i such that cos_sim(anchor_i, positive_j) is maximal. Then create from this a
        triplet (anchor_i, positive_i, positive_j) where positive_j serves as the negative for this triplet.

        Then train as with the triplet loss.

        Args:
            model: SentenceTransformerModel
            positive_margin: Positive margin, cos(anchor, positive)
                should be > positive_margin
            negative_margin: Negative margin, cos(anchor, negative)
                should be < negative_margin
            use_mini_batched_version: As large batch sizes require a lot
                of memory, we can use a mini-batched version. We break
                down the large batch into smaller batches with fewer
                examples.
            mini_batch_size: Size for the mini-batches. Should be a
                divisor for the batch size in your data loader.

        References:
            - This loss function was inspired by the ParaNMT paper: https://www.aclweb.org/anthology/P18-1042/

        Requirements:
            1. (anchor, positive) pairs
            2. Large batches (500 or more examples)

        Inputs:
            +---------------------------------------+--------+
            | Texts                                 | Labels |
            +=======================================+========+
            | (anchor, positive) pairs              | none   |
            +---------------------------------------+--------+

        Recommendations:
            - Use ``BatchSamplers.NO_DUPLICATES`` (:class:`docs <sentence_transformers.training_args.BatchSamplers>`) to
              ensure that no in-batch negatives are duplicates of the anchor or positive samples.

        Example:
            ::

                from sentence_transformers import SentenceTransformer, SentenceTransformerTrainingArguments, SentenceTransformerTrainer, losses
                from datasets import Dataset

                train_batch_size = 250
                train_mini_batch_size = 32

                model = SentenceTransformer('all-MiniLM-L6-v2')
                train_dataset = Dataset.from_dict({
                    "anchor": [f"This is sentence number {i}" for i in range(500)],
                    "positive": [f"This is sentence number {i}" for i in range(1, 501)],
                })
                loss = losses.MegaBatchMarginLoss(model=model, mini_batch_size=train_mini_batch_size)

                args = SentenceTransformerTrainingArguments(
                    output_dir="output",
                    per_device_train_batch_size=train_batch_size,
                )
                trainer = SentenceTransformerTrainer(
                    model=model,
                    args=args,
                    train_dataset=train_dataset,
                    loss=loss,
                )
                trainer.train()
        N)	super__init__r   r   r   r   forward_mini_batchedforward_non_mini_batchedforward)selfr   r   r   r   r   	__class__s         k/root/projects/butler/venv/lib/python3.11/site-packages/sentence_transformers/losses/MegaBatchMarginLoss.pyr   zMegaBatchMarginLoss.__init__   sW    T 	
...4Lot00RVRo    sentence_featuresIterable[dict[str, Tensor]]labelsr   c                h   |\  t                                                    }t          t          t	                                                 }g }t          j                    5  | j                                         t          d|| j
                  D ]m| j
        z   fd                                D             }|                    |                     |          d                                                    n| j                                         d d d            n# 1 swxY w Y   t          j        |d          }t          j        t          |          t          |          |j                  }t          dt          |          | j
                  D ]| j
        z   |                     fd|D                       d         }d |D             }	t          j                    5  t%          j        ||          }
|
d|         z  z
  }t          j        |d	          \  }}d d d            n# 1 swxY w Y   |D ].}|D ])}|	|                             |         |                    */|D ]}t          j        |	|                   |	|<    |                     fd
|D                       d         }|                     |	          d         }|j        |j        k    sJ |j        |j        k    sJ t/          j        ||          }t/          j        ||          }t/          j        | j        |z
            t/          j        || j        z
            z   }|                                }t          |
          k     r|                                 |S )Nr   c                ,    i | ]\  }}||         S  r'   ).0kvend_idx	start_idxs      r    
<dictcomp>z<MegaBatchMarginLoss.forward_mini_batched.<locals>.<dictcomp>i   s)    #Y#Y#Y1Aq7):';#Y#Y#Yr!   sentence_embeddingdim)devicec                2    i | ]}||                  S r'   r'   )r(   keyanchorr+   r,   s     r    r-   z<MegaBatchMarginLoss.forward_mini_batched.<locals>.<dictcomp>s   s)    $b$b$bSS&+i6G*H$b$b$br!   c                    i | ]}|g S r'   r'   )r(   r3   s     r    r-   z<MegaBatchMarginLoss.forward_mini_batched.<locals>.<dictcomp>y   s    %G%G%G#c2%G%G%Gr!         c                2    i | ]}||                  S r'   r'   )r(   r3   r+   positiver,   s     r    r-   z<MegaBatchMarginLoss.forward_mini_batched.<locals>.<dictcomp>   s*    &f&f&fQTsHSM)G:K,L&f&f&fr!   )listkeyslennextitertorchno_gradr   evalranger   itemsappenddetachtraincateyer1   r   pytorch_cos_simmaxstackshapeFcosine_similarityrelur   r   meanbackward)r   r"   r$   feature_names
batch_sizeall_positive_embinput_mini_batchdiagonal_matrix
anchor_embhard_negative_features
cos_scoresnegative_scoresnegatives_maxnegatives_idshard_negative_idr3   positive_embnegative_emb
pos_cosine
neg_cosinelossesr4   r+   r9   r,   s                        @@@@r    r   z(MegaBatchMarginLoss.forward_mini_batched_   sk   ,V[[]]++$tH~~"6"6788
]__ 	 	JOO"1j$2FGG e e	#d&::#Y#Y#Y#Y#YHXHX#Y#Y#Y  ''

3C(D(DEY(Z(a(a(c(cddddJ	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 !9%51===)C(8$9$93?O;P;PYiYpqqq q#&6"7"79MNN (	" (	"I$"66G$b$b$b$b$b$bTa$b$b$bcc$J &H%G%G%G%G" Q Q!1*>NOO
_Yw5F%G!GG   05ya/P/P/P,}Q Q Q Q Q Q Q Q Q Q Q Q Q Q Q %2 X X ( X XC*3/66x}EU7VWWWWX % W W.3k:PQT:U.V.V&s++  ::&f&f&f&f&f&fXe&f&f&fgg$L  ::&<==>RSL#|'99999#|'99999 ,ZFFJ,ZFFJVD0:=>>
UYUiHiAjAjjF[[]]F Z((!!!s%   /B7D22D69D6?II	I	c                     fd|D             }|\  }}t          j        ||          }t          j        |          }|dt          j        |j        d|j        iz  z
  }t          j        |d          \  }	}
t          j	         j
        |z
            t          j	        |	 j        z
            z   }|                                S )Nc                F    g | ]}                     |          d          S )r.   )r   )r(   sentence_featurer   s     r    
<listcomp>z@MegaBatchMarginLoss.forward_non_mini_batched.<locals>.<listcomp>   s-    mmmGW

+,,-ABmmmr!   r6   r1   r7   r/   )r   rI   r?   diagonalrH   rL   r1   rJ   rM   rO   r   r   rP   )r   r"   r$   repsembeddings_aembeddings_brY   positive_scoresrZ   r[   _rb   s   `           r    r   z,MegaBatchMarginLoss.forward_non_mini_batched   s    mmmm[lmmm%)"l),EE
.44$	:+FJ4EFFF
 !9_!<<<q,>??!&Y]YmImBnBnn{{}}r!   strc                    dS )Na  
@inproceedings{wieting-gimpel-2018-paranmt,
    title = "{P}ara{NMT}-50{M}: Pushing the Limits of Paraphrastic Sentence Embeddings with Millions of Machine Translations",
    author = "Wieting, John and Gimpel, Kevin",
    editor = "Gurevych, Iryna and Miyao, Yusuke",
    booktitle = "Proceedings of the 56th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)",
    month = jul,
    year = "2018",
    address = "Melbourne, Australia",
    publisher = "Association for Computational Linguistics",
    url = "https://aclanthology.org/P18-1042",
    doi = "10.18653/v1/P18-1042",
    pages = "451--462",
}
r'   )r   s    r    citationzMegaBatchMarginLoss.citation   s     r!   )r   r   Tr   )r   r   r   r   r   r   r   r   r   r   r   r   )r"   r#   r$   r   r   r   )r   rm   )	__name__
__module____qualname__r   r   r   propertyro   __classcell__)r   s   @r    r
   r
      s         "%!$)-!Op Op Op Op Op Op Opb< < < <~       X    r!   r
   )
__future__r   collections.abcr   r?   torch.nn.functionalr   
functionalrM   r   sentence_transformersr   )sentence_transformers.SentenceTransformerr   Moduler
   r'   r!   r    <module>r|      s    " " " " " " $ $ $ $ $ $                   & & & & & & I I I I I In n n n n") n n n n nr!   