
    bi                     J    d dl mZ d dlmZ d dlmZ d dlmZmZ dede	fdZ
dS )	    )Path)Any)TikTokenConverter)TIKTOKEN_VOCAB_FILETOKENIZER_FILEencoding
output_dirc                    t          |          }|                    d           |dz  t          z  }|t          z  }|j                            dd           t          |                                          }t          |                                          }	 ddlm} ddl	m
} t          | t
                    r ||           }  || j        |           nW# t          $ rJ}t          |          }	d|	                                v rt          d	          |t          d
          |d}~ww xY wt!          || j        | j                                                  }
|
                    |           dS )a  
    Converts given `tiktoken` encoding to `PretrainedTokenizerFast` and saves the configuration of converted tokenizer
    on disk.

    Args:
        encoding (`str` or `tiktoken.Encoding`):
            Tokenizer from `tiktoken` library. If `encoding` is `str`, the tokenizer will be loaded with
            `tiktoken.get_encoding(encoding)`.
        output_dir (`str`):
            Save path for converted tokenizer configuration file.
    T)exist_oktiktoken)parentsr   r   )get_encoding)dump_tiktoken_bpeblobfilezY`blobfile` is required to save a `tiktoken` file. Install it with `pip install blobfile`.zY`tiktoken` is required to save a `tiktoken` file. Install it with `pip install tiktoken`.N)
vocab_filepatternextra_special_tokens)r   mkdirr   r   parentstrabsoluter   r   tiktoken.loadr   
isinstance_mergeable_ranksImportErrorlower
ValueErrorr   _pat_str_special_tokens	convertedsave)r   r	   	save_filetokenizer_filesave_file_absoluteoutput_file_absoluter   r   e	error_msg	tokenizers              ]/root/projects/butler/venv/lib/python3.11/site-packages/transformers/integrations/tiktoken.pyconvert_tiktoken_to_fastr*      s    j!!Jd###Z'*==I.0N 4$777Y//1122~668899))))))333333h$$ 	.#|H--H(35GHHHH   FF	****k  g
 
	 "%x/@W_Wo  ikk  NN'(((((s   =C 
D.$AD))D.N)pathlibr   typingr   #transformers.convert_slow_tokenizerr   *transformers.tokenization_utils_tokenizersr   r   r   r*        r)   <module>r1      s                A A A A A A Z Z Z Z Z Z Z Z-)s -) -) -) -) -) -) -)r0   