
    bi=                    p    d Z ddlmZ ddlZddlZddlZddlmZ  G d d          Z G d d	e          Z	dS )
a@  
This file contains deprecated code that can only be used with the old `model.fit`-style Sentence Transformers v2.X training.
It exists for backwards compatibility with the `model.old_fit` method, but will be removed in a future version.

Nowadays, with Sentence Transformers v3+, it is recommended to use the `SentenceTransformerTrainer` class to train models.
See https://www.sbert.net/docs/sentence_transformer/training_overview.html for more information.

Instead, you should create a `datasets` `Dataset` for training: https://huggingface.co/docs/datasets/create_dataset
    )annotationsN   )InputExamplec                  <    e Zd ZdZddddej        dddfdZdd	Zd
S )STSDataReadera1  Reads in the STS dataset. Each line contains two sentences (s1_col_idx, s2_col_idx) and one label (score_col_idx)

    Default values expects a tab separated file with the first & second column the sentence pair and third column the score (0...1). Default config normalizes scores from 0...5 to 0...1
    r   r      	T   c
                    || _         || _        || _        || _        || _        || _        || _        || _        |	| _        d S )N)	dataset_folderscore_col_idx
s1_col_idx
s2_col_idx	delimiterquotingnormalize_scores	min_score	max_score)
selfr   r   r   r   r   r   r   r   r   s
             f/root/projects/butler/venv/lib/python3.11/site-packages/sentence_transformers/readers/STSDataReader.py__init__zSTSDataReader.__init__   sJ     -*$$" 0""    c           
        t           j                            | j        |          }|                    d          rt          j        |dd          nt          |d          5 }t          j        || j	        | j
                  }g }t          |          D ]\  }}t          || j                           }	| j        r|	| j        z
  | j        | j        z
  z  }	|| j                 }
|| j                 }|                    t)          |t+          |          z   |
|g|	                     |dk    rt-          |          |k    r nd	d	d	           n# 1 swxY w Y   |S )
zJfilename specified which data split to use (train.csv, dev.csv, test.csv).z.gzrtutf8)encodingzutf-8)r   r   )guidtextslabelr   N)ospathjoinr   endswithgzipopencsvreaderr   r   	enumeratefloatr   r   r   r   r   r   appendr   strlen)r   filenamemax_examplesfilepathfIndataexamplesidrowscores1s2s               r   get_exampleszSTSDataReader.get_examples0   s   7<< 3X>>   ''2DIhv6666h111	 :cT^T\RRRDH$T?? 
 
Cc$"4566( Y"T^38WXE))(SWW2DRQSH\a b b bccc!##H(E(EE#	 	 	 	 	 	 	 	 	 	 	 	 	 	 	& s   #C EEEN)r   )__name__
__module____qualname____doc__r&   
QUOTE_NONEr   r8    r   r   r   r      sb          # # # #,     r   r   c                  >     e Zd ZdZddddej        dddf fd	Z xZS )	STSBenchmarkDataReaderzReader especially for the STS benchmark dataset. There, the sentences are in column 5 and 6, the score is in column 4.
    Scores are normalized from 0...5 to 0...1
    r
         r	   Tr   c
                \    t                                          |||||||||		  	         d S )N)	r   r   r   r   r   r   r   r   r   )superr   )r   r   r   r   r   r   r   r   r   r   	__class__s             r   r   zSTSBenchmarkDataReader.__init__N   sK     	)!!'- 	 
	
 
	
 
	
 
	
 
	
r   )r9   r:   r;   r<   r&   r=   r   __classcell__)rE   s   @r   r@   r@   I   sc          
 
 
 
 
 
 
 
 
 
r   r@   )
r<   
__future__r   r&   r$   r     r   r   r@   r>   r   r   <module>rI      s     # " " " " " 



  				      2 2 2 2 2 2 2 2j
 
 
 
 
] 
 
 
 
 
r   