
    bie                     L   d Z ddlZddlZddlZddlZddlZddlmZ d Zd Z	d Z
d ZdZd	 Zd
 ZdGdZdZdZdZdZdZdZdZdZdZdZdZdZdZdZdZeeeeeeeeeeeeeedZdZ dZ!eZ"eZ#dZ$d Z%d Z&d Z'd Z(d Z)d Z*eZ+d Z,d Z-d Z.d Z/eZ0d Z1d Z2d Z3eZ4eZ5d Z6eZ7eZ8eZ9d Z:d!Z; ed"e fd#e!fd$e9fd%e"fd&e8fd'e#fd(e;fd)e.fd*e$fd+e%fd,e:fd-e0fd.e&fd/e3fd0e1fd1e'fd2e(fd3e/fd4e6fd5e)fd6e4fd7e*fd8e+fd9e5fd:e7fd;e2fd<e-fd=e,fg          Z< eg d>          Z=d? Z>ddddd@dAdBdddddddCdDZ?dHdEZ@dF ZAdS )Iz3
Doc utilities: Utilities related to documentation
    N)OrderedDictc                     t          j        |           rdS t          j        |           }|                                d         }t	          |          t	          |                                          z
  }d|z   S )z^Return the indentation level of the start of the docstring of a class or function (or method).   r   )inspectisclass	getsource
splitlineslenlstrip)funcsource
first_linefunction_def_levels       Q/root/projects/butler/venv/lib/python3.11/site-packages/transformers/utils/doc.pyget_docstring_indentation_levelr      sr     t qt$$F""$$Q'JZ3z/@/@/B/B+C+CC!!!    c                        fd}|S )Nc                 ^    d                               | j        | j        ndz   | _        | S N )join__doc__fndocstrs    r   docstring_decoratorz1add_start_docstrings.<locals>.docstring_decorator&   s,    WWV__bj6L

RTU
	r    r   r   s   ` r   add_start_docstringsr   %   $         r   c                        fd}|S )Nc                 F   d| j                             d          d          d}d| d}t          |           | j        | j        nd}	 t	          d |                                D                       }t          |          t          |                                          z
  }n# t          $ r }Y nw xY w	}|d	z   k    r8fd
	D             }t          j
        t          j        |          dz            }d                    |          |z   }||z   | _        | S )Nz[`.r   z`]z    The aa   forward method, overrides the `__call__` special method.

    <Tip>

    Although the recipe for forward pass needs to be defined within this function, one should call the [`Module`]
    instance afterwards instead of this since the former takes care of running the pre and post processing steps while
    the latter silently ignores them.

    </Tip>
r   c              3   J   K   | ]}|                                 d k    |V  dS )r   N)strip).0lines     r   	<genexpr>zUadd_start_docstrings_to_model_forward.<locals>.docstring_decorator.<locals>.<genexpr>>   s:      "c"cDPTPZPZP\P\`bPbPb4PbPbPbPb"c"cr   r   c                 d    g | ],}t          j        t          j        |          d z            -S ) )textwrapindentdedent)r&   doccorrect_indentations     r   
<listcomp>zVadd_start_docstrings_to_model_forward.<locals>.docstring_decorator.<locals>.<listcomp>G   s6    gggY\HOHOC$8$8#@S:STTgggr   r*   )__qualname__splitr   r   nextr	   r
   r   StopIterationr+   r,   r-   r   )
r   
class_nameintrocurrent_docfirst_non_emptydoc_indentationdocs	docstringr/   r   s
           @r   r   zBadd_start_docstrings_to_model_forward.<locals>.docstring_decorator.   sV   ;"///44Q7;;;
	j 	 	 	 >bAA$&J$:bjj	2""c"cK4J4J4L4L"c"c"cccO!/22S9O9O9Q9Q5R5RROO 	2 	2 	21OOO	2  a"5555gggg`fgggDOHOE$:$:CBU<UVVEGGDMMK/	Y&
	s   AB* *B98B9r   r   s   ` r   %add_start_docstrings_to_model_forwardr<   -   s%        @ r   c                        fd}|S )Nc                 ^    | j         | j         ndd                              z   | _         | S r   )r   r   r   s    r   r   z/add_end_docstrings.<locals>.docstring_decoratorR   s+    $&J$:bjjbggfooU
	r   r   r   s   ` r   add_end_docstringsr?   Q   r    r   a:  
    Returns:
        [`{full_output_type}`] or `tuple(torch.FloatTensor)`: A [`{full_output_type}`] or a tuple of
        `torch.FloatTensor` (if `return_dict=False` is passed or when `config.return_dict=False`) comprising various
        elements depending on the configuration ([`{config_class}`]) and inputs.

c                 h    t          j        d|           }|dn|                                d         S )z.Returns the indentation in the first line of tz^(\s*)\SNr   r   )researchgroups)trB   s     r   _get_indentrE   b   s.    Y{A&&F22V]]__Q%77r   c                 (   t          |           }g }d}|                     d          D ][}t          |          |k    r6t          |          dk    r|                    |dd                    | d}K||dd          dz  }\|                    |dd                    t	          t          |                    D ]@}t          j        dd||                   ||<   t          j        d	d
||                   ||<   Ad                    |          S )z,Convert output_args_doc to display properly.r   
r   N   z^(\s+)(\S+)(\s+)z\1- **\2**\3z:\s*\n\s*(\S)z -- \1)rE   r2   r
   appendrangerA   subr   )output_args_docr,   blockscurrent_blockr'   is         r   _convert_output_args_docrQ   h   s*    ))FFM%%d++ 	- 	-t&&=!!A%%mCRC0111#KKKMM QRR___,MM
MM-$%%% 3v;; C CF.KKq	F+Yq	BBq		99Vr   Tc                    | j         }d}||                    d          }d}|t          |          k     rNt          j        d||                   3|dz  }|t          |          k     rt          j        d||                   3|t          |          k     r0d                    ||dz   d                   }t          |          }n|rt          d| j         d          |r.| j	         d| j         }t                              ||	          }	nt          |           }d
| d}	||	dz  }	|	}
||
|z  }
||
                    d          }d}t          ||                   dk    r|dz  }t          ||                   dk    t          t          ||                             }||k     r+d||z
  z  fd|D             }d                    |          }
|
S )zH
    Prepares the return part of the docstring using `output_type`.
    NrG   r   z^\s*(Args|Parameters):\s*$   z@No `Args` or `Parameters` section is found in the docstring of `zH`. Make sure it has docstring and contain either `Args` or `Parameters`.r#   )full_output_typeconfig_classz
Returns:
    ``z:
r*   c                 F    g | ]}t          |          d k    r | n|S )r   )r
   )r&   r'   to_adds     r   r0   z._prepare_output_docstrings.<locals>.<listcomp>   s7    VVV3t99q==''''dVVVr   )r   r2   r
   rA   rB   r   rQ   
ValueError__name__
__module__PT_RETURN_INTRODUCTIONformatstrrE   )output_typerU   
min_indent	add_introoutput_docstringparams_docstringlinesrP   rT   r6   resultr,   rX   s               @r   _prepare_output_docstringsrf      s;    #*# &&t,,#e**nn+H%PQ(!S!S![FA #e**nn+H%PQ(!S!S![s5zz>>#yyAyy)9::78HII 	GS^Sg G G G    )4MM{7KMM&--?O^j-kk{++7$4777'UNEF#"" T""%(mmq  FA %(mmq  [q**++JJ/0FVVVVPUVVVEYYu%%FMr   aJ  
    <Tip warning={true}>

    This example uses a random model as the real ones are all very big. To get proper results, you should use
    {real_checkpoint} instead of {fake_checkpoint}. If you get out-of-memory when loading that checkpoint, you can try
    adding `device_map="auto"` in the `from_pretrained` call.

    </Tip>
a  
    Example:

    ```python
    >>> from transformers import AutoTokenizer, {model_class}
    >>> import torch

    >>> tokenizer = AutoTokenizer.from_pretrained("{checkpoint}")
    >>> model = {model_class}.from_pretrained("{checkpoint}")

    >>> inputs = tokenizer(
    ...     "HuggingFace is a company based in Paris and New York", add_special_tokens=False, return_tensors="pt"
    ... )

    >>> with torch.no_grad():
    ...     logits = model(**inputs).logits

    >>> predicted_token_class_ids = logits.argmax(-1)

    >>> # Note that tokens are classified rather then input words which means that
    >>> # there might be more predicted token classes than words.
    >>> # Multiple token classes might account for the same word
    >>> predicted_tokens_classes = [model.config.id2label[t.item()] for t in predicted_token_class_ids[0]]
    >>> predicted_tokens_classes
    {expected_output}

    >>> labels = predicted_token_class_ids
    >>> loss = model(**inputs, labels=labels).loss
    >>> round(loss.item(), 2)
    {expected_loss}
    ```
a_  
    Example:

    ```python
    >>> from transformers import AutoTokenizer, {model_class}
    >>> import torch

    >>> tokenizer = AutoTokenizer.from_pretrained("{checkpoint}")
    >>> model = {model_class}.from_pretrained("{checkpoint}")

    >>> question, text = "Who was Jim Henson?", "Jim Henson was a nice puppet"

    >>> inputs = tokenizer(question, text, return_tensors="pt")
    >>> with torch.no_grad():
    ...     outputs = model(**inputs)

    >>> answer_start_index = outputs.start_logits.argmax()
    >>> answer_end_index = outputs.end_logits.argmax()

    >>> predict_answer_tokens = inputs.input_ids[0, answer_start_index : answer_end_index + 1]
    >>> tokenizer.decode(predict_answer_tokens, skip_special_tokens=True)
    {expected_output}

    >>> # target is "nice puppet"
    >>> target_start_index = torch.tensor([{qa_target_start_index}])
    >>> target_end_index = torch.tensor([{qa_target_end_index}])

    >>> outputs = model(**inputs, start_positions=target_start_index, end_positions=target_end_index)
    >>> loss = outputs.loss
    >>> round(loss.item(), 2)
    {expected_loss}
    ```
a  
    Example of single-label classification:

    ```python
    >>> import torch
    >>> from transformers import AutoTokenizer, {model_class}

    >>> tokenizer = AutoTokenizer.from_pretrained("{checkpoint}")
    >>> model = {model_class}.from_pretrained("{checkpoint}")

    >>> inputs = tokenizer("Hello, my dog is cute", return_tensors="pt")

    >>> with torch.no_grad():
    ...     logits = model(**inputs).logits

    >>> predicted_class_id = logits.argmax().item()
    >>> model.config.id2label[predicted_class_id]
    {expected_output}

    >>> # To train a model on `num_labels` classes, you can pass `num_labels=num_labels` to `.from_pretrained(...)`
    >>> num_labels = len(model.config.id2label)
    >>> model = {model_class}.from_pretrained("{checkpoint}", num_labels=num_labels)

    >>> labels = torch.tensor([1])
    >>> loss = model(**inputs, labels=labels).loss
    >>> round(loss.item(), 2)
    {expected_loss}
    ```

    Example of multi-label classification:

    ```python
    >>> import torch
    >>> from transformers import AutoTokenizer, {model_class}

    >>> tokenizer = AutoTokenizer.from_pretrained("{checkpoint}")
    >>> model = {model_class}.from_pretrained("{checkpoint}", problem_type="multi_label_classification")

    >>> inputs = tokenizer("Hello, my dog is cute", return_tensors="pt")

    >>> with torch.no_grad():
    ...     logits = model(**inputs).logits

    >>> predicted_class_ids = torch.arange(0, logits.shape[-1])[torch.sigmoid(logits).squeeze(dim=0) > 0.5]

    >>> # To train a model on `num_labels` classes, you can pass `num_labels=num_labels` to `.from_pretrained(...)`
    >>> num_labels = len(model.config.id2label)
    >>> model = {model_class}.from_pretrained(
    ...     "{checkpoint}", num_labels=num_labels, problem_type="multi_label_classification"
    ... )

    >>> labels = torch.sum(
    ...     torch.nn.functional.one_hot(predicted_class_ids[None, :].clone(), num_classes=num_labels), dim=1
    ... ).to(torch.float)
    >>> loss = model(**inputs, labels=labels).loss
    ```
a   
    Example:

    ```python
    >>> from transformers import AutoTokenizer, {model_class}
    >>> import torch

    >>> tokenizer = AutoTokenizer.from_pretrained("{checkpoint}")
    >>> model = {model_class}.from_pretrained("{checkpoint}")

    >>> inputs = tokenizer("The capital of France is {mask}.", return_tensors="pt")

    >>> with torch.no_grad():
    ...     logits = model(**inputs).logits

    >>> # retrieve index of {mask}
    >>> mask_token_index = (inputs.input_ids == tokenizer.mask_token_id)[0].nonzero(as_tuple=True)[0]

    >>> predicted_token_id = logits[0, mask_token_index].argmax(axis=-1)
    >>> tokenizer.decode(predicted_token_id)
    {expected_output}

    >>> labels = tokenizer("The capital of France is Paris.", return_tensors="pt")["input_ids"]
    >>> # mask labels of non-{mask} tokens
    >>> labels = torch.where(inputs.input_ids == tokenizer.mask_token_id, labels, -100)

    >>> outputs = model(**inputs, labels=labels)
    >>> round(outputs.loss.item(), 2)
    {expected_loss}
    ```
a  
    Example:

    ```python
    >>> from transformers import AutoTokenizer, {model_class}
    >>> import torch

    >>> tokenizer = AutoTokenizer.from_pretrained("{checkpoint}")
    >>> model = {model_class}.from_pretrained("{checkpoint}")

    >>> inputs = tokenizer("Hello, my dog is cute", return_tensors="pt")
    >>> outputs = model(**inputs)

    >>> last_hidden_states = outputs.last_hidden_state
    ```
a  
    Example:

    ```python
    >>> from transformers import AutoTokenizer, {model_class}
    >>> import torch

    >>> tokenizer = AutoTokenizer.from_pretrained("{checkpoint}")
    >>> model = {model_class}.from_pretrained("{checkpoint}")

    >>> prompt = "In Italy, pizza served in formal settings, such as at a restaurant, is presented unsliced."
    >>> choice0 = "It is eaten with a fork and a knife."
    >>> choice1 = "It is eaten while held in the hand."
    >>> labels = torch.tensor(0).unsqueeze(0)  # choice0 is correct (according to Wikipedia ;)), batch size 1

    >>> encoding = tokenizer([prompt, prompt], [choice0, choice1], return_tensors="pt", padding=True)
    >>> outputs = model(**{{k: v.unsqueeze(0) for k, v in encoding.items()}}, labels=labels)  # batch size is 1

    >>> # the linear classifier still needs to be trained
    >>> loss = outputs.loss
    >>> logits = outputs.logits
    ```
a  
    Example:

    ```python
    >>> import torch
    >>> from transformers import AutoTokenizer, {model_class}

    >>> tokenizer = AutoTokenizer.from_pretrained("{checkpoint}")
    >>> model = {model_class}.from_pretrained("{checkpoint}")

    >>> inputs = tokenizer("Hello, my dog is cute", return_tensors="pt")
    >>> outputs = model(**inputs, labels=inputs["input_ids"])
    >>> loss = outputs.loss
    >>> logits = outputs.logits
    ```
aA  
    Example:

    ```python
    >>> from transformers import AutoProcessor, {model_class}
    >>> import torch
    >>> from datasets import load_dataset

    >>> dataset = load_dataset("hf-internal-testing/librispeech_asr_demo", "clean", split="validation")
    >>> dataset = dataset.sort("id")
    >>> sampling_rate = dataset.features["audio"].sampling_rate

    >>> processor = AutoProcessor.from_pretrained("{checkpoint}")
    >>> model = {model_class}.from_pretrained("{checkpoint}")

    >>> # audio file is decoded on the fly
    >>> inputs = processor(dataset[0]["audio"]["array"], sampling_rate=sampling_rate, return_tensors="pt")
    >>> with torch.no_grad():
    ...     outputs = model(**inputs)

    >>> last_hidden_states = outputs.last_hidden_state
    >>> list(last_hidden_states.shape)
    {expected_output}
    ```
a]  
    Example:

    ```python
    >>> from transformers import AutoProcessor, {model_class}
    >>> from datasets import load_dataset
    >>> import torch

    >>> dataset = load_dataset("hf-internal-testing/librispeech_asr_demo", "clean", split="validation")
    >>> dataset = dataset.sort("id")
    >>> sampling_rate = dataset.features["audio"].sampling_rate

    >>> processor = AutoProcessor.from_pretrained("{checkpoint}")
    >>> model = {model_class}.from_pretrained("{checkpoint}")

    >>> # audio file is decoded on the fly
    >>> inputs = processor(dataset[0]["audio"]["array"], sampling_rate=sampling_rate, return_tensors="pt")
    >>> with torch.no_grad():
    ...     logits = model(**inputs).logits
    >>> predicted_ids = torch.argmax(logits, dim=-1)

    >>> # transcribe speech
    >>> transcription = processor.batch_decode(predicted_ids)
    >>> transcription[0]
    {expected_output}

    >>> inputs["labels"] = processor(text=dataset[0]["text"], return_tensors="pt").input_ids

    >>> # compute loss
    >>> loss = model(**inputs).loss
    >>> round(loss.item(), 2)
    {expected_loss}
    ```
a  
    Example:

    ```python
    >>> from transformers import AutoFeatureExtractor, {model_class}
    >>> from datasets import load_dataset
    >>> import torch

    >>> dataset = load_dataset("hf-internal-testing/librispeech_asr_demo", "clean", split="validation")
    >>> dataset = dataset.sort("id")
    >>> sampling_rate = dataset.features["audio"].sampling_rate

    >>> feature_extractor = AutoFeatureExtractor.from_pretrained("{checkpoint}")
    >>> model = {model_class}.from_pretrained("{checkpoint}")

    >>> # audio file is decoded on the fly
    >>> inputs = feature_extractor(dataset[0]["audio"]["array"], sampling_rate=sampling_rate, return_tensors="pt")

    >>> with torch.no_grad():
    ...     logits = model(**inputs).logits

    >>> predicted_class_ids = torch.argmax(logits, dim=-1).item()
    >>> predicted_label = model.config.id2label[predicted_class_ids]
    >>> predicted_label
    {expected_output}

    >>> # compute loss - target_label is e.g. "down"
    >>> target_label = model.config.id2label[0]
    >>> inputs["labels"] = torch.tensor([model.config.label2id[target_label]])
    >>> loss = model(**inputs).loss
    >>> round(loss.item(), 2)
    {expected_loss}
    ```
a  
    Example:

    ```python
    >>> from transformers import AutoFeatureExtractor, {model_class}
    >>> from datasets import load_dataset
    >>> import torch

    >>> dataset = load_dataset("hf-internal-testing/librispeech_asr_demo", "clean", split="validation")
    >>> dataset = dataset.sort("id")
    >>> sampling_rate = dataset.features["audio"].sampling_rate

    >>> feature_extractor = AutoFeatureExtractor.from_pretrained("{checkpoint}")
    >>> model = {model_class}.from_pretrained("{checkpoint}")

    >>> # audio file is decoded on the fly
    >>> inputs = feature_extractor(dataset[0]["audio"]["array"], return_tensors="pt", sampling_rate=sampling_rate)
    >>> with torch.no_grad():
    ...     logits = model(**inputs).logits

    >>> probabilities = torch.sigmoid(logits[0])
    >>> # labels is a one-hot array of shape (num_frames, num_speakers)
    >>> labels = (probabilities > 0.5).long()
    >>> labels[0].tolist()
    {expected_output}
    ```
a  
    Example:

    ```python
    >>> from transformers import AutoFeatureExtractor, {model_class}
    >>> from datasets import load_dataset
    >>> import torch

    >>> dataset = load_dataset("hf-internal-testing/librispeech_asr_demo", "clean", split="validation")
    >>> dataset = dataset.sort("id")
    >>> sampling_rate = dataset.features["audio"].sampling_rate

    >>> feature_extractor = AutoFeatureExtractor.from_pretrained("{checkpoint}")
    >>> model = {model_class}.from_pretrained("{checkpoint}")

    >>> # audio file is decoded on the fly
    >>> inputs = feature_extractor(
    ...     [d["array"] for d in dataset[:2]["audio"]], sampling_rate=sampling_rate, return_tensors="pt", padding=True
    ... )
    >>> with torch.no_grad():
    ...     embeddings = model(**inputs).embeddings

    >>> embeddings = torch.nn.functional.normalize(embeddings, dim=-1).cpu()

    >>> # the resulting embeddings can be used for cosine similarity-based retrieval
    >>> cosine_sim = torch.nn.CosineSimilarity(dim=-1)
    >>> similarity = cosine_sim(embeddings[0], embeddings[1])
    >>> threshold = 0.7  # the optimal threshold is dataset-dependent
    >>> if similarity < threshold:
    ...     print("Speakers are not the same!")
    >>> round(similarity.item(), 2)
    {expected_output}
    ```
a  
    Example:

    ```python
    >>> from transformers import AutoImageProcessor, {model_class}
    >>> import torch
    >>> from datasets import load_dataset

    >>> dataset = load_dataset("huggingface/cats-image")
    >>> image = dataset["test"]["image"][0]

    >>> image_processor = AutoImageProcessor.from_pretrained("{checkpoint}")
    >>> model = {model_class}.from_pretrained("{checkpoint}")

    >>> inputs = image_processor(image, return_tensors="pt")

    >>> with torch.no_grad():
    ...     outputs = model(**inputs)

    >>> last_hidden_states = outputs.last_hidden_state
    >>> list(last_hidden_states.shape)
    {expected_output}
    ```
a  
    Example:

    ```python
    >>> from transformers import AutoImageProcessor, {model_class}
    >>> import torch
    >>> from datasets import load_dataset

    >>> dataset = load_dataset("huggingface/cats-image")
    >>> image = dataset["test"]["image"][0]

    >>> image_processor = AutoImageProcessor.from_pretrained("{checkpoint}")
    >>> model = {model_class}.from_pretrained("{checkpoint}")

    >>> inputs = image_processor(image, return_tensors="pt")

    >>> with torch.no_grad():
    ...     logits = model(**inputs).logits

    >>> # model predicts one of the 1000 ImageNet classes
    >>> predicted_label = logits.argmax(-1).item()
    >>> print(model.config.id2label[predicted_label])
    {expected_output}
    ```
)SequenceClassificationQuestionAnsweringTokenClassificationMultipleChoiceMaskedLMLMHead	BaseModelSpeechBaseModelCTCAudioClassificationAudioFrameClassificationAudioXVectorVisionBaseModelImageClassificationa  
    Example:

    ```python
    >>> from transformers import AutoProcessor, {model_class}, SpeechT5HifiGan

    >>> model = {model_class}.from_pretrained("{checkpoint}")

    >>> processor = AutoProcessor.from_pretrained("{checkpoint}")
    >>> vocoder = SpeechT5HifiGan.from_pretrained("microsoft/speecht5_hifigan")
    >>> inputs = processor(text="Hello, my dog is cute", return_tensors="pt")

    >>> # generate speech
    >>> speech = model.generate(inputs["input_ids"], speaker_embeddings=speaker_embeddings, vocoder=vocoder)
    ```
az  
    Example:

    ```python
    >>> from transformers import AutoProcessor, {model_class}

    >>> model = {model_class}.from_pretrained("{checkpoint}")

    >>> processor = AutoProcessor.from_pretrained("{checkpoint}")
    >>> inputs = processor(text="Hello, my dog is cute", return_tensors="pt")

    >>> # generate speech
    >>> speech = model(inputs["input_ids"])
    ```
a.  
    Example:

    ```python
    >>> from transformers import AutoImageProcessor, {model_class}
    >>> import torch
    >>> from PIL import Image
    >>> import httpx
        >>> from io import BytesIO

    >>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
    >>> with httpx.stream("GET", url) as response:
    ...     image = Image.open(BytesIO(response.read())).convert("RGB")

    >>> processor = AutoImageProcessor.from_pretrained("{checkpoint}")
    >>> model = {model_class}.from_pretrained("{checkpoint}")

    >>> device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    >>> model.to(device)

    >>> # prepare image for the model
    >>> inputs = processor(images=image, return_tensors="pt").to(device)

    >>> with torch.no_grad():
    ...     outputs = model(**inputs)

    >>> # interpolate to original size
    >>> post_processed_output = processor.post_process_depth_estimation(
    ...     outputs, [(image.height, image.width)],
    ... )
    >>> predicted_depth = post_processed_output[0]["predicted_depth"]
    ```
z%
    Example:

    ```python
    ```
a  
    Example:

    ```python
    >>> from PIL import Image
    >>> from transformers import AutoProcessor, {model_class}

    >>> model = {model_class}.from_pretrained("{checkpoint}")
    >>> processor = AutoProcessor.from_pretrained("{checkpoint}")

    >>> messages = [
    ...     {{
    ...         "role": "user", "content": [
    ...             {{"type": "image", "url": "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/pipeline-cat-chonk.jpeg"}},
    ...             {{"type": "text", "text": "Where is the cat standing?"}},
    ...         ]
    ...     }},
    ... ]

    >>> inputs = processor.apply_chat_template(
    ...     messages,
    ...     tokenize=True,
    ...     return_dict=True,
    ...     return_tensors="pt",
    ...     add_generation_prompt=True
    ... )
    >>> # Generate
    >>> generate_ids = model.generate(**inputs)
    >>> processor.batch_decode(generate_ids, skip_special_tokens=True)[0]
    ```
text-to-audio-spectrogramtext-to-audio-waveformautomatic-speech-recognitionaudio-frame-classificationaudio-classificationaudio-xvectorimage-text-to-textvisual-question-answeringdepth-estimationvideo-classificationzero-shot-image-classificationimage-classificationzero-shot-object-detectionobject-detectionimage-segmentationimage-to-imageimage-feature-extractiontext-generationtable-question-answeringdocument-question-answeringquestion-answeringnext-sentence-predictionmultiple-choicetext-classificationtoken-classification	fill-maskmask-generationpretraining))+MODEL_FOR_TEXT_TO_SPECTROGRAM_MAPPING_NAMESru   )(MODEL_FOR_TEXT_TO_WAVEFORM_MAPPING_NAMESrv   )(MODEL_FOR_SPEECH_SEQ_2_SEQ_MAPPING_NAMESrw   )MODEL_FOR_CTC_MAPPING_NAMESrw   )2MODEL_FOR_AUDIO_FRAME_CLASSIFICATION_MAPPING_NAMESrx   ),MODEL_FOR_AUDIO_CLASSIFICATION_MAPPING_NAMESry   )%MODEL_FOR_AUDIO_XVECTOR_MAPPING_NAMESrz   )*MODEL_FOR_IMAGE_TEXT_TO_TEXT_MAPPING_NAMESr{   )1MODEL_FOR_VISUAL_QUESTION_ANSWERING_MAPPING_NAMESr|   )(MODEL_FOR_DEPTH_ESTIMATION_MAPPING_NAMESr}   ),MODEL_FOR_VIDEO_CLASSIFICATION_MAPPING_NAMESr~   )6MODEL_FOR_ZERO_SHOT_IMAGE_CLASSIFICATION_MAPPING_NAMESr   ),MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING_NAMESr   )2MODEL_FOR_ZERO_SHOT_OBJECT_DETECTION_MAPPING_NAMESr   )(MODEL_FOR_OBJECT_DETECTION_MAPPING_NAMESr   )*MODEL_FOR_IMAGE_SEGMENTATION_MAPPING_NAMESr   )&MODEL_FOR_IMAGE_TO_IMAGE_MAPPING_NAMESr   )MODEL_FOR_IMAGE_MAPPING_NAMESr   )!MODEL_FOR_CAUSAL_LM_MAPPING_NAMESr   )0MODEL_FOR_TABLE_QUESTION_ANSWERING_MAPPING_NAMESr   )3MODEL_FOR_DOCUMENT_QUESTION_ANSWERING_MAPPING_NAMESr   )*MODEL_FOR_QUESTION_ANSWERING_MAPPING_NAMESr   )0MODEL_FOR_NEXT_SENTENCE_PREDICTION_MAPPING_NAMESr   )'MODEL_FOR_MULTIPLE_CHOICE_MAPPING_NAMESr   )/MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING_NAMESr   ),MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING_NAMESr   )!MODEL_FOR_MASKED_LM_MAPPING_NAMESr   )'MODEL_FOR_MASK_GENERATION_MAPPING_NAMESr   )#MODEL_FOR_PRETRAINING_MAPPING_NAMESr   c                     |                                 D ]*\  }}|d|z   dz   }t          j        d| dd|           } +| S )zo
    Removes the lines testing an output with the doctest syntax in a code sample when it's set to `None`.
    N{}z\n([^\n]+)\n\s+z\nrG   )itemsrA   rL   )r;   kwargskeyvaluedoc_keys        r   filter_outputs_from_exampler     s^     llnn L L
U)c/F9g9994KK		r   z[MASK]      )processor_class
checkpointr_   rU   maskqa_target_start_indexqa_target_end_index	model_clsmodalityexpected_outputexpected_lossreal_checkpointrevisionc                 F    	
 
	 fd}|S )Nc                     | j                             d          d         n}t          }|dd}d|v sd|v rdk    r	|d         }nd|v r	|d         }nd|v r	|d         }nd	|v r	|d	         }nd
|v r	|d
         }nd|v s|dv r	|d         }nd|v sd|v r	|d         }nd|v r	|d         }nvd|v r	|d         }nid|v rdk    r	|d         }nVd|v rdk    r	|d         }nCd|v rdk    r	|d         }n0d|v sd|v r	|d         }nd|v r	|d         }nt          d|           t	          |          }
t
          |z   }| j        pdd                    
          z   }dnt          	          } |j	        d#i |}It          j        d          rt          d d          |                    d  d!d  d" d!          }||z   |z   | _        | S )$Nr#   r   z{true})model_classr   r   r   r   r   r   r   r   fake_checkpointtruerg   rp   audiorh   ri   rj   rk   )FlaubertWithLMHeadModelXLMWithLMHeadModelrl   CausalLMro   rq   XVectorrr   Modelrn   visionrs   Encoderrm   rt   z#Docstring can't be built for model )r   r   r   z^refs/pr/\\d+zThe provided revision 'zW' is incorrect. It should point to a pull request reference on the hub like 'refs/pr/6'zfrom_pretrained("z")z", revision="r   )r1   r2   PT_SAMPLE_DOCSTRINGSrY   r   FAKE_MODEL_DISCLAIMERr   r   rf   r]   rA   matchreplace)r   r   sample_docstrings
doc_kwargscode_samplefunc_doc
output_doc	built_docr   rU   r   r   r   r   r   r   r_   r   r   r   r   r   s           r   r   z7add_code_sample_docstrings.<locals>.docstring_decorator  s*   7@7Hbo++C0033i0 '.$%:#6.*.)
 

 %337LP[7[7[aimtatat+,ABKK%44+,DEKK K//+,?@KK"k11+,ABKK,,+,<=KK;&&+9j*j*j+J7KK$$
k(A(A+H5KKk!!+E2KK';66+,FGKK+%%(g*=*=+N;KK##G(;(;+,=>KK##H(<(<+,=>KK##yK'?'?+K8KK"k11+,ABKKP;PPQQQ1
 
 
 &/+=KJ$"7&.RR4N{\h4i4i
&K&4444	x((33  Lh L L L   "))2J2224m
4m4mai4m4m4m I 
*Y6
	r   r   )r   r   r_   rU   r   r   r   r   r   r   r   r   r   r   r   s   `````````````` r   add_code_sample_docstringsr     s     I I I I I I I I I I I I I I I I I IV r   c                       fd}|S )Nc                    | j         }|                    d          }d}|t          |          k     rNt          j        d||                   3|dz  }|t          |          k     rt          j        d||                   3|t          |          k     rMt          t          ||                             }t          |          ||<   d                    |          }nt          d|  d|           || _         | S )NrG   r   z^\s*Returns?:\s*$rS   )r`   zThe function ze should have an empty 'Return:' or 'Returns:' in its docstring as placeholder, current docstring is:
)	r   r2   r
   rA   rB   rE   rf   r   rY   )r   r   rd   rP   r,   rU   r_   s        r   r   z6replace_return_docstrings.<locals>.docstring_decorator-  s   :t$$#e**nn+?q!J!J!RFA #e**nn+?q!J!J!Rs5zz>>U1X..//F1+|X^___E!Hyy''HH5 5 5*25 5   
	r   r   )r_   rU   r   s   `` r   replace_return_docstringsr   ,  s*         $ r   c                     t          j        | j        | j        | j        | j        | j                  }t          j        ||           }| j	        |_	        |S )zReturns a copy of a function f.)nameargdefsclosure)
typesFunctionType__code____globals__rZ   __defaults____closure__	functoolsupdate_wrapper__kwdefaults__)fgs     r   	copy_funcr   B  sM     	1:q}1:q~ghgtuuuA A&&A'AHr   )NT)NN)Br   r   r   rA   r+   r   collectionsr   r   r   r<   r?   r\   rE   rQ   rf   r   PT_TOKEN_CLASSIFICATION_SAMPLEPT_QUESTION_ANSWERING_SAMPLE!PT_SEQUENCE_CLASSIFICATION_SAMPLEPT_MASKED_LM_SAMPLEPT_BASE_MODEL_SAMPLEPT_MULTIPLE_CHOICE_SAMPLEPT_CAUSAL_LM_SAMPLEPT_SPEECH_BASE_MODEL_SAMPLEPT_SPEECH_CTC_SAMPLEPT_SPEECH_SEQ_CLASS_SAMPLEPT_SPEECH_FRAME_CLASS_SAMPLEPT_SPEECH_XVECTOR_SAMPLEPT_VISION_BASE_MODEL_SAMPLEPT_VISION_SEQ_CLASS_SAMPLEr    TEXT_TO_AUDIO_SPECTROGRAM_SAMPLETEXT_TO_AUDIO_WAVEFORM_SAMPLE!AUDIO_FRAME_CLASSIFICATION_SAMPLEAUDIO_XVECTOR_SAMPLEDEPTH_ESTIMATION_SAMPLEVIDEO_CLASSIFICATION_SAMPLE!ZERO_SHOT_OBJECT_DETECTION_SAMPLEIMAGE_TO_IMAGE_SAMPLEIMAGE_FEATURE_EXTRACTION_SAMPLE"DOCUMENT_QUESTION_ANSWERING_SAMPLENEXT_SENTENCE_PREDICTION_SAMPLEMULTIPLE_CHOICE_SAMPLEPRETRAINING_SAMPLEMASK_GENERATION_SAMPLE VISUAL_QUESTION_ANSWERING_SAMPLETEXT_GENERATION_SAMPLEIMAGE_CLASSIFICATION_SAMPLEIMAGE_SEGMENTATION_SAMPLEFILL_MASK_SAMPLEOBJECT_DETECTION_SAMPLEQUESTION_ANSWERING_SAMPLETEXT_CLASSIFICATION_SAMPLETABLE_QUESTION_ANSWERING_SAMPLETOKEN_CLASSIFICATION_SAMPLEAUDIO_CLASSIFICATION_SAMPLE#AUTOMATIC_SPEECH_RECOGNITION_SAMPLE%ZERO_SHOT_IMAGE_CLASSIFICATION_SAMPLE$IMAGE_TEXT_TO_TEXT_GENERATION_SAMPLE#PIPELINE_TASKS_TO_SAMPLE_DOCSTRINGSMODELS_TO_PIPELINEr   r   r   r   r   r   r   <module>r     s%         				   # # # # # #" " "  ! ! !H   8 8 8  41 1 1 1h " B   D8% !t @ " 0 " 4! F! H  :! F 2 8 @59/#!%25 <,25  $$  $! " %A ! 0   F % ! # & "#  3   $    9     9  ? #  =  9  '; #) %( $B '2k	$&FG	!#@A	')LM	%'HI	!<=	./	CD	$&FG	45	!<=	)+PQ	!<=	%'HI	45	89	01	#%DE	23	#%DE	&(JK	89	#%DE	23	 :;	!<=	&'	23	*+9' ' #F ![! ! !# # L    	[ [ [ [ [|   ,    r   