
    pbi[F                     R   d dl Z d dlZd dlmZ d dlmZ d dlZd dlmZ d dl	m
Z
 d dlmZmZmZ g dZ e j        e          Z	 dZ G d	 d
          Z G d de          Z e ej        d          ej                  Zd Z G d d          Z G d d          Zde
dedee
         fdZdej        dededeej                 fdZ d Z!	 	 d"de"edf         de#e$ef         dz  dede"edf         dz  de#e$ef         dz  de"ee"         ee#         f         fd Z%dee         fd!Z&dS )#    N)Sequence)Anymap_aggregate)	BlockMask)tree_flattentree_maptree_unflatten)TensorChunkSpecsplit_args_kwargs_into_chunksmerge_chunksFc                       e Zd ZdZd ZdS )_CustomReducera$  
    Custom reducer class that can be used to specify a custom operation that
    reduces losses of multiple microbatches into one value.

    Example:
    >>> # xdoctest: +SKIP
    >>> sum_reducer = _CustomReducer(
    >>>     torch.tensor(0.0),
    >>>     lambda a, b: a + b
    >>> )
    c                 "    || _         || _        d S N)
init_value	reduce_fn)selfr   r   s      d/root/projects/butler/venv/lib64/python3.11/site-packages/torch/distributed/pipelining/microbatch.py__init__z_CustomReducer.__init__+   s    $"    N)__name__
__module____qualname____doc__r    r   r   r   r      s-        
 
# # # # #r   r   c                       e Zd ZdS )_LossReducerNr   r   r   r   r   r   r   r   0           Dr   r   g        c                       e Zd ZU dZd Zeed<   d Zd Ze	de
edf         fd            Ze	deeef         fd	            Zd
S )r   z2
    Class used to specify chunking of inputs
    c                     || _         d S r   	split_dim)r   r$   s     r   r   zTensorChunkSpec.__init__@   s    "r   r$   c                 J    | j         j         d| j         j         d| j         dS )N.())	__class__r   r   r$   r   s    r   __repr__zTensorChunkSpec.__repr__E   s/    ~(VV4>+BVVT^VVV	
r   c                     d| j          dS )NzTensorChunkSpec(r(   r#   r*   s    r   __str__zTensorChunkSpec.__str__J   s    3$.3333r   
chunk_dims.c                 (    t          | d           }|S )a  
        A helper for creating a tuple of `TensorChunkSpec` from a tuple of chunk
        dimensions (int's).
        Example:
            >>> # xdoctest: +SKIP
            >>> # There are three positional arguments to the model, and
            >>> # we are chunking them along dimension 0, 0 and 1, respectively
            >>> args_chunk_spec = TensorChunkSpec.from_tuple((0, 0, 1))
        c                      t          |           S r   r   dims    r   <lambda>z,TensorChunkSpec.from_tuple.<locals>.<lambda>\       ,, r   r   )r.   args_chunk_specs     r   
from_tuplezTensorChunkSpec.from_tupleM   s$     (,,
 
 r   c                 (    t          | d           }|S )a\  
        A helper for creating a dictionary of `TensorChunkSpec` from a
        dictionary of chunk dimensions (int's).
        Example:
            >>> # xdoctest: +SKIP
            >>> # Chunk dimension 0 for the "id" argument, 1 for the "mask" argument
            >>> kwargs_chunk_spec = TensorChunkSpec.from_dict({"id": 0, "mask": 1})
        c                      t          |           S r   r1   r2   s    r   r4   z+TensorChunkSpec.from_dict.<locals>.<lambda>n   r5   r   r   )r.   kwargs_chunk_specs     r   	from_dictzTensorChunkSpec.from_dict`   s%     *,,
 
 ! r   N)r   r   r   r   r   int__annotations__r+   r-   staticmethodtupler7   dictstrr;   r   r   r   r   r   ;   s          # # # NNN
 
 

4 4 4 #s(O   \$ !cN! ! ! \! ! !r   r   c                       e Zd ZdS )
_ReplicateNr   r   r   r   rC   rC   t   r    r   rC   
block_mask
num_chunksreturnc                      j                             d          dk    r g|z  S  j                             d          |k    s
J d            d}t          j         j         ||          }t          j         j        ||          } j        t          j         j        ||          ndg|z  } j        t          j         j        ||          ndg|z  }g }d}t          |          D ]~}	 fd}
|                    t          j
        ||	         ||	         ||	         ||	          j         |
|           j                             |||	                             d          z  }|S )a	  Given a block mask, split the block mask along the batch dimension (dim0).

    Args:
        block_mask: Block mask to split
        num_chunks: Number of chunks to split the block mask into

    Returns:
        chunk_block_masks: List of chunked block masks
    r      z;Block mask has fewer batch size than the number of chunks. Nc                       fd}|S )Nc                 d    t          j        |           }                    | |z   |||          S r   )torch	full_likemask_mod)bhq_idxkv_idxb_offsetrD   idxs        r   batch_offset_mask_modzI_split_block_mask.<locals>.create_mask_mod.<locals>.batch_offset_mask_mod   s2     ?1c22!**1x<E6JJJr   r   )rS   rT   rD   s   ` r   create_mask_modz*_split_block_mask.<locals>.create_mask_mod   s0    K K K K K K )(r   )kv_num_blocks
kv_indicesfull_kv_num_blocksfull_kv_indices
BLOCK_SIZErM   seq_lengths)rV   sizerK   tensor_splitrW   rX   rY   rangeappendr   from_kv_blocksrZ   r[   )rD   rE   	batch_dimkv_num_blocks_chunkskv_indices_chunksfull_kv_num_blocks_chunksfull_kv_indices_chunkschunk_block_masksbatch_offset	chunk_idxrU   s   `          r   _split_block_maskri   x   s    $$Q''1,,|j((#((++z999E :99 I - *i  *:+@*iXX (4 	:8*iPPPVj   %1 	:5z9MMMVj   L:&& @ @		) 	) 	) 	) 	) 	  $29=,Y7#<Y#G 6y A%0(66&2  
	
 
	
 
	
 	,Y7<<Q???r   tensorspecc                    |                      |j                  |k    s&J d|                      |j                   d            t          j        | ||j                  }t          s|S g }d}|D ]}t          j        |           }||                     |j                  z   }t          ddd          g|j        z  }	t          ||          |	|j        <   |||	<   |                    |           ||                     |j                  z  }|S )zGiven a tensor, and a chunking spec, split the tensor.
    Args:

        tensor: Tensor to split
        spec: Chunking spec
        num_chunks: Number of chunks to split the tensor into

    Returns:
        chunk_tensors: List of chunked tensors
    zTensor size z is smaller than num_chunksr   N)	r\   r$   rK   r]   _debug_mask_minibatches
zeros_likeslicendimr_   )
rj   rk   rE   chunk_tensorsexpanded_chunkssplit_dim_idxchunk_tensornew_val	upper_idxslice_indicess
             r   _split_tensorrx      s!     ;;t~&&*444Ov{{4>22OOO 544 &vz4>JJM" OM% 
; 
;"6**!L$5$5dn$E$EE	tT4001GL@(-mY(G(Gdn%!-w'''**4>:::r   c                 T   | sd t          |          D             S t          |           t          |          k    sNJ dt          |                                            dt          |                                                       |J t	          | d           \  }t	          |d           \  }}g }t          ||d	          D ]R\  }}|t          u st          |t                    r|                    |           :t          |t          j
                  rEt          |t                    sJ |                    |                    |j                             t          |t                    rt          |t                    sJ |j        d
k    s
J d            |j                            d
          dk    r|                    |           |                    |j                            d
                     >t!          d| d| d          t#          g ||R  }	d t          |	          D             }
t          ||d	          D ]\  }}g }|t          u st          |t                    r|g|	z  }nht          |t          j
                  rt%          |||	          }n<t          |t                    rt'          ||	          }nt!          d| d| d          t          |
|d	          D ]\  }}|                    |           fd|
D             S )aW  
    Given a dictionary of args, and a dictionary of chunking specs, shard the
    args according to the chunking specs.

    Args:
        args_dict: Dictionary of args
        args_chunk_spec: Dictionary of chunking specs
        num_chunks: Number of chunks to shard the args into

    Returns:
        args_split: List of sharded args
    c                     g | ]}i S r   r   .0_s     r   
<listcomp>z'_shard_dict_of_args.<locals>.<listcomp>   s    ...q...r   zargs_dict.keys() = z args_chunk_spec.keys() = Nc                 ,    t          | t                    S r   
isinstancer   xs    r   r4   z%_shard_dict_of_args.<locals>.<lambda>   s    Z9%=%= r   is_leafc                 ,    t          | t                    S r   r   r   s    r   r4   z%_shard_dict_of_args.<locals>.<lambda>   s    :a+C+C r   Tstrictr   z#BlockMask only supports split_dim=0rH   zUnsupported chunk spec: z and value: z combination.c                     g | ]}g S r   r   r{   s     r   r~   z'_shard_dict_of_args.<locals>.<listcomp>  s    $J$J$JAR$J$J$Jr   c                 0    g | ]}t          |          S r   )r
   )r|   _flat_split_result	tree_specs     r   r~   z'_shard_dict_of_args.<locals>.<listcomp>.  s4        	)955  r   )r^   lenlistkeysr   ziprC   r   r_   rK   Tensorr   r\   r$   r   rV   
ValueErrorminrx   ri   )	args_dictr6   rE   valueschunk_specsr}   split_sizesvrk   result_num_chunksflat_split_resultsv_splitsr   _v_splitr   s                 @r   _shard_dict_of_argsr      s   $  /..E*--....y>>S11111	Cd9>>#3#344 	C 	C$()=)=)?)?$@$@	C 	C 211 &&&$==  FI "!C!C  NK
 Kv{4888  4 :D*!=!=z****5<(( 	dO44444qvvdn5566669%% 	dO44444>Q&&&(M&&&##A&&!++"":....""1?#7#7#:#:;;;;M4MMQMMM   5[5*555$J$J7H1I1I$J$J$Jv{4888 0 04"$:D*!=!=s..HH5<(( 	$Q.?@@HH9%% 	(,=>>HHM4MMQMMM   -0-
 -
 -
 	0 	0( %%h////	0
   "4   r   args.kwargschunksr6   r:   c                    |i }d }|t          || d           }|t          ||d           }t          t          t          |                     t          t          |                    |          }t	          |          }t          |||          }t	          |          |k     rTt	          |          }t          t          t          |                     t          t          |                    |          }t	          |          t	          |          k    r/t          dt	          |           dt	          |                     d |D             }	|	|fS )	a  
    Given a sequence of args and kwargs, split them into a number of chunks
    according to  their respective chunking specs.

    Args:
        args: Tuple of args
        kwargs: Dict of kwargs
        chunks: Number of chunks to split the args and kwargs into
        args_chunk_spec: chunking specs for args, in same shape as args
        kwargs_chunk_spec: chunking specs for kwargs, in same shape as kwargs

    Returns:
        args_split: List of sharded args
        kwargs_split: List of sharded kwargs
    Nc                     t          | t          j        t          z            rt	          t
                    S t                      S r   )r   rK   r   r   r   DEFAULT_CHUNK_DIMrC   r   s    r   default_specz3split_args_kwargs_into_chunks.<locals>.default_specq  s4    a	122 	 "#4555<<r   c                 ,    t          | t                    S r   r   r   s    r   r4   z/split_args_kwargs_into_chunks.<locals>.<lambda>y  s    *Q	2J2J r   r   c                 ,    t          | t                    S r   r   r   s    r   r4   z/split_args_kwargs_into_chunks.<locals>.<lambda>~  s    Jq)4L4L r   z;args and kwargs are split into different number of chunks: z, c           
      z    g | ]7t          fd t          t                              D                       8S )c              3   (   K   | ]}|         V  d S r   r   )r|   i
chunk_argss     r   	<genexpr>z;split_args_kwargs_into_chunks.<locals>.<listcomp>.<genexpr>  s'      <<jm<<<<<<r   )r?   r^   r   )r|   r   s    @r   r~   z1split_args_kwargs_into_chunks.<locals>.<listcomp>  sT        	<<<<U3z??%;%;<<<<<  r   )r	   r   r@   	enumerater   RuntimeError)
r   r   r   r6   r:   r   args_split_dictreal_num_chunkskwargs_split
args_splits
             r   r   r   4  s   p ~      "$(J(J
 
 
  $&*L*L
 
 
 *Yt__Y''(( O
 /**O& L <?** l++-4!!?++,,
 
 ?s<0000;?##; ;'*<'8'8; ;
 
 	

 )  J
 |##r   c                 b   |t          |          \  }}n=t          | d                   \  }}t          t                    gt          |          z  }g | D ]^}t          |          \  }}t          |          t          |          k    rt	          d| d|                               |           _g }t          |          D ]V\  }	t          |	t                    rofdt          t                              D             }
t          r|
d         j
        }|
dd         D ]}|j
        |k    sJ t          j        t          j        |ddit          |
          |	j        	          }g }d}t          |
          t          |          k    sJ t          |
|d
          D ]s\  }}||                    |	j                  z   }t#          ddd          g|j        z  }t#          ||          ||	j        <   ||         }|                    |           |}tn|
}|                    t          j        ||	j                             t          |	t(                    r_|	j        }t          t                              D ]$}|	                    ||                            }%|                    |           d                  }t          dt                              D ]}|                  |k    sJ |                    |           Xt/          ||          S )z
    Given a list of chunks, merge them into a single value according to
    the chunk spec.

    Args:
        chunks: list of chunks
        chunk_spec: Chunking spec for the chunks

    Returns:
        value: Merged value
    Nr   zChunk z did not match chunk spec c                 ,    g | ]}|                  S r   r   )r|   rh   arg_idxchunks_flatteneds     r   r~   z merge_chunks.<locals>.<listcomp>  s3        !+G4  r   rH   devicemeta)sectionsr3   Tr   r2   )r   r   r   r   r   r_   r   r   r^   rm   shaperK   r]   emptyr$   r   r\   ro   rp   catr   r   r   r
   )r   
chunk_specspec_flattenedflatten_specchunk0_flatchunkchunk_flattenedr}   args_flattenedargpartial_valuesoverall_shapevalmeta_chunksvalues_to_catchunk_start_idxpartial_value
meta_chunkchunk_end_idxrw   slicedreduced_valrh   valuer   r   s                           @@r   r   r     s   Z '3J'?'?$ %1$;$;!\)*;<<=K@P@PP  1 1)%003~#6#666SeSSzSSTTT0000
 N!.11 2) 2)c?++ 1	)    !&s+;'<'<!=!=  N
 ' / .q 1 7)!""- 6 6C955555#0K>v>> 00   !#"#>**c+.>.>>>>>14"K2 2 2 
4 
4-M: %4joocm6T6T$TM%*4t%<%<$=@R$RM38-3X3XM#-0*=9F!((000&3OO
4 !/!!%)Ms}"M"M"MNNNN^,, 	).K"3'7#8#899  	!mm!1)!<W!E  !!+....$Q'0E"1c*:&;&;<< E E	'	27;uDDDDD!!%(((( .,777r   )NN)'loggingoperatorcollections.abcr   typingr   rK   torch.fx.noder   !torch.nn.attention.flex_attentionr   torch.utils._pytreer   r	   r
   __all__	getLoggerr   loggerrm   r   r   rj   addsum_reducerr   r   rC   r<   r   ri   r   rx   r   r?   r@   rA   r   r   r   r   r   <module>r      s     $ $ $ $ $ $        ' ' ' ' ' ' 7 7 7 7 7 7 F F F F F F F F F F   
	8	$	$
   # # # # # # # #$	 	 	 	 	> 	 	 	 l<5<,,hl;;  5! 5! 5! 5! 5! 5! 5! 5!r	 	 	 	 	 	 	 	=== 
)_= = = =@&L&
& & el	& & & &RP P Pn ;?;?p$ p$
S/p$cNT!p$ p$ ?C/047	p$
 C01D8p$ 4;T
"#p$ p$ p$ p$fy8Iy8 y8 y8 y8 y8 y8r   