
    Sܶi                         d dl Z d dlZd dlZi dddddddd	d
dddddddddddddddddddddddZd%d efd!Zd efd"Z G d# d$          ZdS )&    Nu   œoeu   ŒOE   øo   ØO   æae   ÆAE   ßssu   ẞSSu   đdu   ĐD   ð   Ð   þth   Þu   łlu   ŁL sc                 l    d                     fdt          j        d|           D                       S )z
    Replace any other markers, symbols, and punctuations with a space,
    and drop any diacritics (category 'Mn' and some manual mappings)
    r   c              3      K   | ]Z}|v r|nO|t           v rt           |         n9t          j        |          d k    rdnt          j        |          d         dv rdn|V  [dS )Mnr   r   MSP N)ADDITIONAL_DIACRITICSunicodedatacategory).0ckeeps     \/root/projects/openclaw-proxy/venv/lib/python3.11/site-packages/whisper/normalizers/basic.py	<genexpr>z0remove_symbols_and_diacritics.<locals>.<genexpr>    s          Dyy A --- &a(( #+A..$66 B + 4Q 7 7 :e C C         NFKDjoinr!   	normalize)r   r%   s    `r&   remove_symbols_and_diacriticsr-      sP    
 77     &vq11     r(   c                 f    d                     d t          j        d|           D                       S )z[
    Replace any other markers, symbols, punctuations with a space, keeping diacritics
    r   c              3   V   K   | ]$}t          j        |          d          dv rdn|V  %dS )r   r   r   N)r!   r"   )r#   r$   s     r&   r'   z!remove_symbols.<locals>.<genexpr>6   sT         #A&&q)U22     r(   NFKCr*   )r   s    r&   remove_symbolsr1   2   sC     77  &vq11     r(   c                   ,    e Zd ZddedefdZdefdZdS )	BasicTextNormalizerFremove_diacriticssplit_lettersc                 >    |rt           nt          | _        || _        d S )N)r-   r1   cleanr5   )selfr4   r5   s      r&   __init__zBasicTextNormalizer.__init__=   s'    ->R))N 	
 +r(   r   c                 t   |                                 }t          j        dd|          }t          j        dd|          }|                     |                                           }| j        r3d                    t          j        d|t          j                            }t          j        dd|          }|S )Nz[<\[][^>\]]*[>\]]r   z\(([^)]+?)\)r   z\Xz\s+)	lowerresubr7   r5   r+   regexfindallU)r8   r   s     r&   __call__zBasicTextNormalizer.__call__C   s    GGIIF'Q//F?B**JJqMM!! 	;ua99::AFC
 
 r(   N)FF)__name__
__module____qualname__boolr9   strrA    r(   r&   r3   r3   <   sV        + +$ +t + + + +#      r(   r3   )r   )r<   r!   r>   r    rF   r-   r1   r3   rG   r(   r&   <module>rH      sT   				     $$ 	# 	#	
 	$ 	$ 	$ 
4 	# 	# 	# 	# 	$ 	$ 	#  	#! ( S    .c             r(   