
    Zi                         d Z ddlmZ ddlmZmZ e G d d                      Zdee         deee	ef                  fdZ
d	eee	ef                  dee         fd
ZdS )zEvaluation results utilities for the `.eval_results/*.yaml` format.

See https://huggingface.co/docs/hub/eval-results for more details.
Specifications are available at https://github.com/huggingface/hub-docs/blob/main/eval_results.yaml.
    )	dataclass)AnyOptionalc                   
   e Zd ZU dZeed<   eed<   eed<   dZee         ed<   dZ	ee         ed<   dZ
ee         ed<   dZee         ed	<   dZee         ed
<   dZee         ed<   dZee         ed<   dZee         ed<   ddZdS )EvalResultEntrya
  
    Evaluation result entry for the `.eval_results/*.yaml` format.

    Represents evaluation scores stored in model repos that automatically appear on
    the model page and the benchmark dataset's leaderboard.

    For the legacy `model-index` format in `README.md`, use [`EvalResult`] instead.

    See https://huggingface.co/docs/hub/eval-results for more details.

    Args:
        dataset_id (`str`):
            Benchmark dataset ID from the Hub. Example: "cais/hle", "Idavidrein/gpqa".
        task_id (`str`):
            Task identifier within the benchmark. Example: "gpqa_diamond".
        value (`Any`):
            The metric value. Example: 20.90.
        dataset_revision (`str`, *optional*):
            Git SHA of the benchmark dataset.
        verify_token (`str`, *optional*):
            A signature that can be used to prove that evaluation is provably auditable and reproducible.
        date (`str`, *optional*):
            When the evaluation was run (ISO-8601 datetime). Defaults to git commit time.
        source_url (`str`, *optional*):
            Link to the evaluation source (e.g., https://huggingface.co/spaces/SaylorTwift/smollm3-mmlu-pro). Required if `source_name`, `source_user`, or `source_org` is provided.
        source_name (`str`, *optional*):
            Display name for the source. Example: "Eval Logs".
        source_user (`str`, *optional*):
            HF user name for attribution. Example: "celinah".
        source_org (`str`, *optional*):
            HF org name for attribution. Example: "cais".
        notes (`str`, *optional*):
            Details about the evaluation setup. Example: "tools", "no-tools", "chain-of-thought".

    Example:
        ```python
        >>> from huggingface_hub import EvalResultEntry
        >>> # Minimal example with required fields only
        >>> result = EvalResultEntry(
        ...     dataset_id="Idavidrein/gpqa",
        ...     task_id="gpqa_diamond",
        ...     value=0.412,
        ... )
        >>> # Full example with all fields
        >>> result = EvalResultEntry(
        ...     dataset_id="cais/hle",
        ...     task_id="default",
        ...     value=20.90,
        ...     dataset_revision="5503434ddd753f426f4b38109466949a1217c2bb",
        ...     verify_token="eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9...",
        ...     date="2025-01-15T10:30:00Z",
        ...     source_url="https://huggingface.co/datasets/cais/hle",
        ...     source_name="CAIS HLE",
        ...     source_org="cais",
        ...     notes="no-tools",
        ... )

        ```
    
dataset_idtask_idvalueNdataset_revisionverify_tokendate
source_urlsource_namesource_user
source_orgnotesreturnc                 `    | j         | j        | j        | j        t	          d          d S d S )NzaIf `source_name`, `source_user`, or `source_org` is provided, `source_url` must also be provided.)r   r   r   r   
ValueError)selfs    X/root/projects/butler/venv/lib/python3.11/site-packages/huggingface_hub/_eval_results.py__post_init__zEvalResultEntry.__post_init__U   sI    (D,<,HDOLgo%s   &% MhLg    )r   N)__name__
__module____qualname____doc__str__annotations__r   r   r   r   r   r   r   r   r   r   r    r   r   r   r      s         : :x OOOLLLJJJ&*hsm***"&L(3-&&&D(3- $J$$$!%K#%%%!%K#%%% $J$$$E8C=     r   r   entriesr   c                    g }| D ]}|j         |j        d}|j        
|j        |d<   ||j        d}|j        
|j        |d<   |j        
|j        |d<   |j        Ad|j        i}|j        
|j        |d<   |j        
|j        |d	<   |j	        
|j	        |d
<   ||d<   |j
        
|j
        |d<   |                    |           |S )a  Convert a list of [`EvalResultEntry`] objects to a YAML-serializable list of dicts.

    This produces the format expected in `.eval_results/*.yaml` files.

    Args:
        entries (`list[EvalResultEntry]`):
            List of evaluation result entries to serialize.

    Returns:
        `list[dict[str, Any]]`: A list of dictionaries ready to be dumped to YAML.

    Example:
        ```python
        >>> from huggingface_hub import EvalResultEntry, eval_result_entries_to_yaml
        >>> entries = [
        ...     EvalResultEntry(dataset_id="cais/hle", task_id="default", value=20.90),
        ...     EvalResultEntry(dataset_id="Idavidrein/gpqa", task_id="gpqa_diamond", value=0.412),
        ... ]
        >>> yaml_data = eval_result_entries_to_yaml(entries)
        >>> yaml_data[0]
        {'dataset': {'id': 'cais/hle', 'task_id': 'default'}, 'value': 20.9}

        ```

        To upload eval results to the Hub:
        ```python
        >>> import yaml
        >>> from huggingface_hub import upload_file, EvalResultEntry, eval_result_entries_to_yaml
        >>> entries = [
        ...     EvalResultEntry(dataset_id="cais/hle", task_id="default", value=20.90),
        ... ]
        >>> yaml_content = yaml.dump(eval_result_entries_to_yaml(entries))
        >>> upload_file(
        ...     path_or_fileobj=yaml_content.encode(),
        ...     path_in_repo=".eval_results/hle.yaml",
        ...     repo_id="your-username/your-model",
        ... )

        ```
    )idr	   Nrevision)datasetr
   verifyTokenr   urlnameuserorgsourcer   )r   r	   r   r
   r   r   r   r   r   r   r   append)r!   resultentryr%   datar+   s         r   eval_result_entries_to_yamlr0   ^   s   R F  ).)9em"T"T!-"'"8GJ+2U[II)"'"4D:! :DL'&+U-=%>F ,!&!2v ,!&!2v+ % 0u#DN;"!KDMdMr   r/   c                 n   g }| D ].}|                     d|          }|                     di           }|                     di           }t          |d         |d         |d         |                     d          |                     d          |                     d	          |r|                     d
          nd|r|                     d          nd|r|                     d          nd|r|                     d          nd|                     d                    }|                    |           0|S )a  Parse a list of dicts into [`EvalResultEntry`] objects.

    This parses the `.eval_results/*.yaml` format. For the legacy `model-index` format,
    use [`model_index_to_eval_results`] instead.

    Args:
        data (`list[dict[str, Any]]`):
            A list of dictionaries (e.g., parsed from YAML or API response).

    Returns:
        `list[EvalResultEntry]`: A list of evaluation result entry objects.

    Example:
        ```python
        >>> from huggingface_hub import parse_eval_result_entries
        >>> data = [
        ...     {"dataset": {"id": "cais/hle", "task_id": "default"}, "value": 20.90},
        ...     {"dataset": {"id": "Idavidrein/gpqa", "task_id": "gpqa_diamond"}, "value": 0.412},
        ... ]
        >>> entries = parse_eval_result_entries(data)
        >>> entries[0].dataset_id
        'cais/hle'
        >>> entries[0].value
        20.9

        ```
    r/   r%   r+   r#   r
   r	   r$   r&   r   r'   Nr(   r)   r*   r   )r   r
   r	   r   r   r   r   r   r   r   r   )getr   r,   )r/   r!   item
entry_datar%   r+   r.   s          r   parse_eval_result_entriesr5      s8   8 G  XXfd++
..B//"--t}W%I&$[[44#66'',2<vzz%(((.4>

6***$.4>

6***$,2<vzz%(((..))
 
 
 	uNr   N)r   dataclassesr   typingr   r   r   listdictr   r0   r5   r    r   r   <module>r:      s     " ! ! ! ! !                 O O O O O O O OdCo)> C4SRUXCW C C C CL/Dc3h$8 /T/=R / / / / / /r   