
    bi                         d dl mZmZmZmZ ddlmZmZmZm	Z	m
Z
 ddlmZmZ  e            rddlmZ  e            rd dlZddlmZmZ erd d	lmZ  e	j        e          Z e ed
                     G d de                      ZdS )    )TYPE_CHECKINGAnyUnionoverload   )add_end_docstringsis_torch_availableis_vision_availableloggingrequires_backends   )Pipelinebuild_pipeline_init_args)
load_imageN)(MODEL_FOR_OBJECT_DETECTION_MAPPING_NAMES,MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING_NAMES)ImageT)has_image_processorc                       e Zd ZdZdZdZdZdZ fdZd Z	e
deedf         d	ed
edeeeef                  fd            Ze
dee         ed         z  d	ed
edeeeeef                           fd            Zdeeeef                  eeeeef                           z  f fdZddZd ZddZdddeeef         fdZ xZS )ObjectDetectionPipelinea  
    Object detection pipeline using any `AutoModelForObjectDetection`. This pipeline predicts bounding boxes of objects
    and their classes.

    Example:

    ```python
    >>> from transformers import pipeline

    >>> detector = pipeline(model="facebook/detr-resnet-50")
    >>> detector("https://huggingface.co/datasets/Narsil/image_dummy/raw/main/parrots.png")
    [{'score': 0.997, 'label': 'bird', 'box': {'xmin': 69, 'ymin': 171, 'xmax': 396, 'ymax': 507}}, {'score': 0.999, 'label': 'bird', 'box': {'xmin': 398, 'ymin': 105, 'xmax': 767, 'ymax': 507}}]

    >>> # x, y  are expressed relative to the top left hand corner.
    ```

    Learn more about the basics of using a pipeline in the [pipeline tutorial](../pipeline_tutorial)

    This object detection pipeline can currently be loaded from [`pipeline`] using the following task identifier:
    `"object-detection"`.

    See the list of available models on [huggingface.co/models](https://huggingface.co/models?filter=object-detection).
    FTNc                      t                      j        |i | t          | d           t          j                    }|                    t                     |                     |           d S )Nvision)super__init__r   r   copyupdater   check_model_type)selfargskwargsmapping	__class__s       b/root/projects/butler/venv/lib/python3.11/site-packages/transformers/pipelines/object_detection.pyr   z ObjectDetectionPipeline.__init__8   sj    $)&)))$))):?AACDDDg&&&&&    c                 P    i }d|v r|d         |d<   i }d|v r|d         |d<   |i |fS )Ntimeout	threshold )r   r    preprocess_paramspostprocess_kwargss       r#   _sanitize_parametersz,ObjectDetectionPipeline._sanitize_parameters@   sR    +1)+<i(&  .4[.A{+ "&888r$   imagezImage.Imager   r    returnc                     d S Nr(   r   r,   r   r    s       r#   __call__z ObjectDetectionPipeline.__call__I   s    mpmpr$   c                     d S r/   r(   r0   s       r#   r1   z ObjectDetectionPipeline.__call__L   s	     &)Sr$   c                 v    d|v rd|vr|                     d          |d<    t                      j        |i |S )ai  
        Detect objects (bounding boxes & classes) in the image(s) passed as inputs.

        Args:
            inputs (`str`, `list[str]`, `PIL.Image` or `list[PIL.Image]`):
                The pipeline handles three types of images:

                - A string containing an HTTP(S) link pointing to an image
                - A string containing a local path to an image
                - An image loaded in PIL directly

                The pipeline accepts either a single image or a batch of images. Images in a batch must all be in the
                same format: all as HTTP(S) links, all as local paths, or all as PIL images.
            threshold (`float`, *optional*, defaults to 0.5):
                The probability necessary to make a prediction.
            timeout (`float`, *optional*, defaults to None):
                The maximum time in seconds to wait for fetching images from the web. If None, no timeout is set and
                the call may block forever.

        Return:
            A list of dictionaries or a list of list of dictionaries containing the result. If the input is a single
            image, will return a list of dictionaries, if the input is a list of several images, will return a list of
            list of dictionaries corresponding to each image.

            The dictionaries contain the following keys:

            - **label** (`str`) -- The class label identified by the model.
            - **score** (`float`) -- The score attributed by the model for that label.
            - **box** (`list[dict[str, int]]`) -- The bounding box of detected object in image's original size.
        imagesinputs)popr   r1   )r   r   r    r"   s      r#   r1   z ObjectDetectionPipeline.__call__Q   sM    @ v(&"8"8%zz(33F8uww0000r$   c                 .   t          ||          }t          j        |j        |j        gg          }|                     |gd          }|                    | j                  }| j        $|                     |d         |d         d          }||d<   |S )N)r&   pt)r4   return_tensorswordsboxes)textr;   r9   target_size)	r   torch	IntTensorheightwidthimage_processortodtype	tokenizer)r   r,   r&   r=   r5   s        r#   
preprocessz"ObjectDetectionPipeline.preprocessu   s    5'222oek'B&CDD%%eWT%JJ4:&&>%^^w`d^eeF +}r$   c                     |                     d          } | j        di |}|                    d|i|          }| j        |d         |d<   |S )Nr=   bboxr(   )r6   modelr"   rE   )r   model_inputsr=   outputsmodel_outputss        r#   _forwardz ObjectDetectionPipeline._forward   sf    "&&}55$*,,|,,))=+*Q*QRR>%$0$8M&!r$         ?c                 L    |d         } j         |d                                         \   fd|d                             d                              d                              d          \  }} fd|                                D             }fd|d	                             d          D             }g d
fdt          |                                ||          D             }n j                            ||          }	|	d         }
|
d         }|
d         }|
d         }|                                |
d<    fd|D             |
d<    fd|D             |
d<   g d
fdt          |
d         |
d         |
d                   D             }|S )Nr=   r   c           
                               t          j        | d         z  dz  | d         z  dz  | d         z  dz  | d         z  dz  g                    S )Nr   i  r   r      )_get_bounding_boxr>   Tensor)rH   r@   r   rA   s    r#   unnormalizez8ObjectDetectionPipeline.postprocess.<locals>.unnormalize   sq    --L"T!W_t3#d1g-4"T!W_t3#d1g-4	 	 	 	r$   logits)dimc                 >    g | ]}j         j        j        |         S r(   )rI   configid2label).0
predictionr   s     r#   
<listcomp>z7ObjectDetectionPipeline.postprocess.<locals>.<listcomp>   s&    ```dj'0<```r$   c                 &    g | ]} |          S r(   r(   )r[   rH   rT   s     r#   r]   z7ObjectDetectionPipeline.postprocess.<locals>.<listcomp>   s#    TTT4[[&&TTTr$   rH   )scorelabelboxc                 b    g | ]+}|d          k    t          t          |                    ,S )r   dictzip)r[   valskeysr'   s     r#   r]   z7ObjectDetectionPipeline.postprocess.<locals>.<listcomp>   s;    wwwDcghicjmvcvcv$s4//cvcvcvr$   scoreslabelsr;   c                 b    g | ]+}j         j        j        |                                         ,S r(   )rI   rY   rZ   item)r[   r`   r   s     r#   r]   z7ObjectDetectionPipeline.postprocess.<locals>.<listcomp>   s/    'e'e'eUZ
(9(B5::<<(P'e'e'er$   c                 :    g | ]}                     |          S r(   )rR   )r[   ra   r   s     r#   r]   z7ObjectDetectionPipeline.postprocess.<locals>.<listcomp>   s'    &T&T&Tst'='=c'B'B&T&T&Tr$   c                 J    g | ]}t          t          |                     S r(   rc   )r[   rf   rg   s     r#   r]   z7ObjectDetectionPipeline.postprocess.<locals>.<listcomp>   s9        St__%%  r$   )rE   tolistsqueezesoftmaxmaxre   rB   post_process_object_detection)r   rL   r'   r=   rh   classesri   r;   
annotationraw_annotationsraw_annotationr@   rg   rT   rA   s   ` `        @@@@r#   postprocessz#ObjectDetectionPipeline.postprocess   s   #M2>% (N1133MFE
 
 
 
 
 
 
 ,H5==a@@HHRHPPTTY[T\\OFG````w~~O_O_```FTTTT=3H3P3PQR3S3STTTE,,,DwwwwwCQWY^<_<_wwwJJ #2PPQ^`ikvwwO,Q/N#H-F#H-F"7+E'-}}N8$'e'e'e'e^d'e'e'eN8$&T&T&T&Te&T&T&TN7# -,,D   x 8.:RTbcjTkll  J
 r$   ra   ztorch.Tensorc                 j    |                                                                 \  }}}}||||d}|S )a%  
        Turns list [xmin, xmax, ymin, ymax] into dict { "xmin": xmin, ... }

        Args:
            box (`torch.Tensor`): Tensor containing the coordinates in corners format.

        Returns:
            bbox (`dict[str, int]`): Dict containing the coordinates in corners format.
        )xminyminxmaxymax)intrn   )r   ra   ry   rz   r{   r|   rH   s          r#   rR   z)ObjectDetectionPipeline._get_bounding_box   sE     "%!1!1!3!3dD$	
 
 r$   r/   )rN   )__name__
__module____qualname____doc___load_processor_load_image_processor_load_feature_extractor_load_tokenizerr   r+   r   r   strr   listrd   r1   rF   rM   rw   r}   rR   __classcell__)r"   s   @r#   r   r      s        0 O #O' ' ' ' '9 9 9 peC$67ppspW[\`adfiai\jWkppp Xp)#Ym!44)=@)LO)	d4S>"	#) ) ) X)"14S#X+?$tDQTVYQYNG[B\+\ "1 "1 "1 "1 "1 "1H     + + + +Z^ S#X        r$   r   )typingr   r   r   r   utilsr   r	   r
   r   r   baser   r   image_utilsr   r>   models.auto.modeling_autor   r   PILr   
get_loggerr~   loggerr   r(   r$   r#   <module>r      sh   6 6 6 6 6 6 6 6 6 6 6 6 k k k k k k k k k k k k k k 4 4 4 4 4 4 4 4  )((((((  LLL       
  		H	%	% ,,FFFGGk k k k kh k k HGk k kr$   