
    bi	                    d    d dl mZ d dlZd dlmZ d dlmZ  ej        e	          Z
ddd
ZddZdS )    )annotationsN)loggingFtensortorch.Tensor	with_gradboolreturnc                    t          j                    rt          j                    r|r%t          j        j                                       }n[t          j                    } fdt          |          D             }t          j        |            t          j	                    } ||<   t          j
        |d          S d}t                              |            S )ap  
    Gathers a tensor from each distributed rank into a list. Always retains gradients for the local rank's tensor,
    and optionally retains gradients for the gathered tensors if `with_grad` is True.

    Args:
        tensor (torch.Tensor): The tensor to gather from each rank.
        with_grad (bool, optional): If True, the local rank's tensor retains its gradients. Defaults to False.

    Returns:
        torch.Tensor: A tensor containing the gathered tensors from all ranks, concatenated along the first dimension.
        If torch.distributed is not available or not initialized, returns the original tensor.
    c                8    g | ]}t          j                  S  )torch
zeros_like).0_r   s     a/root/projects/butler/venv/lib/python3.11/site-packages/sentence_transformers/util/distributed.py
<listcomp>zall_gather.<locals>.<listcomp>   s$    TTTQ 0 8 8TTT    r   )dimzTrying to gather while torch.distributed is not available or has not been initialized, returning the original (local) tensor. This is expected if you are only using one GPU; consider not using gathering to remove this warning.)distis_availableis_initializedr   distributednn
all_gatherget_world_sizerangeget_rankcatloggerwarning_once)r   r   gathered_tensors
world_size
local_rankwarnings   `     r   r   r      s      2t244 2 	2$03>>vFF,..JTTTT%
BSBSTTT O,f555 J+1Z(y)q1111	S 
    Mr   c                $    t          | d          S )a  
    Gathers a tensor from each distributed rank into a list, retaining gradients for the local rank's tensor.

    Args:
        tensor (torch.Tensor): The tensor to gather from each rank.

    Returns:
        torch.Tensor: A tensor containing the gathered tensors from all ranks, concatenated along the first dimension.
        If torch.distributed is not available or not initialized, returns the original tensor.
    T)r   )r   )r   s    r   all_gather_with_gradr&   2   s     f----r   )F)r   r   r   r   r	   r   )r   r   r	   r   )
__future__r   r   torch.distributedr   r   transformers.utilsr   
get_logger__name__r   r   r&   r   r   r   <module>r,      s    " " " " " "              & & & & & & 
	H	%	%$ $ $ $ $N. . . . . .r   