
    ?i/              
       `   U d Z ddlZddlZddlZddlZddlZddlmZ ddlm	Z	 ddl
mZ ddlZddlZddlmZmZmZ ddlmZmZ  ej        ej        d	            ej        d
          Z e	d          pdZdZdZdZdZ e	d          pdZdZe e!d<    G d de"e          Z# G d de          Z$ G d de          Z% G d de          Z& G d de          Z'da(ej)        dz  e!d<   dej)        fdZ*d e+de"fd!Z,d"e"d#e"de"fd$Z-d"e"de+fd%Z.d&ed'e"d(e'd#e"ddf
d)Z/d&ed*e+d(e'ddfd+Z0d&ed(e'd#e"ddfd,Z1 ed
d-.          Z2e23                    d/          d&eddfd0            Z4e25                    d1          de6e"e"f         fd2            Z7e8d3k    rddl9Z9 e9j:        d4d5d6d78           dS dS )9uT   openclaw-proxy — async WebSocket gateway between XiaoZhi ESP32 and OpenClaw agent.    N)Enum)which)Literal)FastAPI	WebSocketWebSocketDisconnect)	BaseModelValidationErrorz1%(asctime)s [%(levelname)s] %(name)s: %(message)s)levelformatzopenclaw-proxyopenclawmainx   basezzh-CN-XiaoxiaoNeuralffmpegi   MAX_AUDIO_BUFFER_BYTESc                       e Zd ZdZdZdZdZdS )HardwareActionz+Actions reported by XiaoZhi ESP32 hardware.key_downkey_up	wake_wordN)__name__
__module____qualname____doc__KEY_DOWNKEY_UP	WAKE_WORD     %/root/projects/openclaw-proxy/main.pyr   r   '   s#        55HFIIIr    r   c                   (    e Zd ZU dZeed<   eed<   dS )HardwareEventz"Incoming JSON frame from hardware.typeactionN)r   r   r   r   str__annotations__r   r   r    r!   r#   r#   /   s-         ,,
IIIr    r#   c                   ,    e Zd ZU dZdZeed<   eed<   dS )StateNotificationzEOutgoing state frame pushed to hardware (e.g. breathing LED trigger).stater$   statusNr   r   r   r   r$   r&   r'   r   r    r!   r)   r)   6   s/         OOD#KKKKKr    r)   c                   ,    e Zd ZU dZdZeed<   eed<   dS )	TextReplyz.Outgoing text reply carrying the LLM response.
text_replyr$   textNr,   r   r    r!   r.   r.   =   s/         88D#
IIIIIr    r.   c                   p    e Zd ZU dZddiZdZed         ed<   g Ze	e
         ed<   defd	Zde
fd
ZddZdS )SessionStatez9Per-connection state machine for press-to-talk buffering.arbitrary_types_allowedTidle)r4   	listening
processingr+   audio_chunksreturnc                 >    t          d | j        D                       S )z*Return total buffered audio size in bytes.c              3   4   K   | ]}t          |          V  d S )N)len).0cs     r!   	<genexpr>z+SessionState.buffer_size.<locals>.<genexpr>N   s(      55a3q66555555r    )sumr7   selfs    r!   buffer_sizezSessionState.buffer_sizeL   s"    554#4555555r    c                 l    d                     | j                  }| j                                         |S )z)Join and clear all buffered audio chunks.r    )joinr7   clear)rA   audios     r!   drain_audiozSessionState.drain_audioP   s0    *++!!!r    Nc                 F    d| _         | j                                         dS )z&Reset state to idle with empty buffer.r4   N)r+   r7   rE   r@   s    r!   resetzSessionState.resetV   s$    !!!!!r    r8   N)r   r   r   r   model_configr+   r   r'   r7   listbytesintrB   rG   rI   r   r    r!   r2   r2   D   s         CC-t4L9?FG56??? "L$u+"""6S 6 6 6 6U    " " " " " "r    r2   _whisper_modelr8   c                      t           9t                              dt                     t	          j        t                    a t           S )z3Return the Whisper model, loading it on first call.Nu   Loading Whisper '%s' model…)rO   loggerinfoWHISPER_MODEL_SIZEwhisper
load_modelr   r    r!   _get_whisper_modelrV   a   s7     35GHHH +,>??r    audio_bytesc                   K   t                               dt          |                      t                      }t	          j        dd          5 }|                    |            |                                 t          j	        |j
        |j        d           d{V }ddd           n# 1 swxY w Y   |d                                         }t                               d	|           |S )
zConvert Opus audio to text via Whisper.

    Args:
        audio_bytes: Raw Opus-encoded audio from ESP32.

    Returns:
        Transcribed text string.
    z$STT: received %d bytes of Opus audioz.opusT)suffixdeletezh)languageNr0   u   STT: transcribed → %s)rQ   rR   r;   rV   tempfileNamedTemporaryFilewriteflushasyncio	to_thread
transcribenamestrip)rW   modelfresultr0   s        r!   process_sttri   m   s)      KK6K8H8HIII  E		$GD	A	A	A 
Q					(aft
 
 
 
 
 
 
 
 

 
 
 
 
 
 
 
 
 
 
 
 
 
 
 &>!!D
KK)4000Ks   AB,,B03B0r0   
session_idc                 8  K   t                               d|            t          j        t          ddt
          d|d| ddt          t                    t          j        j	        t          j        j	                   d	{V }|
                                 d	{V \  }}|j        d
k    r[|                    d                                          }t                               d|j        |           t          d|           t!          j        |                                          }|                    di                               dg           }|st          d          |d
                             dd          }t                               d|           |S )aF  Send text to OpenClaw agent via CLI and return its reply.

    Args:
        text: User utterance transcribed by STT.
        session_id: Per-connection session ID for multi-turn conversation.

    Returns:
        Agent reply string.

    Raises:
        RuntimeError: If the CLI call fails or returns unexpected output.
    u   LLM: sending to OpenClaw → %sagentz--agentz--session-idz-mz--jsonz	--timeout)stdoutstderrNr   replaceerrorszOpenClaw CLI failed (rc=%d): %szOpenClaw CLI error: rh   payloadsz OpenClaw returned empty payloadsr0    u   LLM: reply ← %s)rQ   rR   ra   create_subprocess_execOPENCLAW_CLIOPENCLAW_AGENT_IDr&   OPENCLAW_TIMEOUT
subprocessPIPEcommunicate
returncodedecodere   errorRuntimeErrorjsonloadsget)	r0   rj   procrm   rn   err_msgdatarr   replys	            r!   call_openclawr      s      KK14888/g$
dS)**!&!&	 	 	 	 	 	 	 	 	D  ++--------NFF!--y-1177996QQQ;';;<<<:fmmoo&&Dxx"%%))*b99H ?=>>>QKOOFB''E
KK#U+++Lr    c                 @  K   t                               d|            t          j        | t                    }g }|                                2 3 d{V }|d         dk    r|                    |d                    /6 d                    |          }t          j	        t          dd	d
ddddddddddt          j        j        t          j        j        t          j        j                   d{V }|                    |           d{V \  }}|j        dk    r[|                    d                                          }t                               d|j        |           t%          d|           t                               dt'          |                     |S )zConvert text to Opus audio via edge-tts and ffmpeg.

    Args:
        text: Text to synthesize.

    Returns:
        Opus-encoded audio bytes.

    Raises:
        RuntimeError: If ffmpeg conversion fails.
    u   TTS: synthesising → %s)r0   voiceNr$   rF   r   r    z-izpipe:0z-c:alibopusz-b:a24kz-ar16000z-ac1z-fopuszpipe:1)stdinrm   rn   )inputr   ro   rp   zffmpeg failed (rc=%d): %szffmpeg TTS conversion error: z%TTS: generated %d bytes of Opus audio)rQ   rR   edge_ttsCommunicateEDGE_TTS_VOICEstreamappendrD   ra   rt   
FFMPEG_BINrx   ry   rz   r{   r|   re   r}   r~   r;   )	r0   rz   
mp3_chunkschunk	mp3_bytesr   
opus_bytesrn   r   s	            r!   process_ttsr      s      KK*D111 &DGGGK J"))++ - - - - - - -e=G##eFm,,, , $$I /D(	65wsfh %!&!&        D  $//i/@@@@@@@@J!--y-1177990$/7KKKD7DDEEE
KK7ZIIIs   A>wsrawr*   c                   K   	 t          j        |          }n1# t           j        $ r t                              d|           Y dS w xY w	 t          d
i |}nE# t          $ r8}t                              d|                                           Y d}~dS d}~ww xY wt                              d|j	        j
                   |j	        t          j        t          j        fv r|j        dk    r3t                              d           |j                                         d|_        t#          d          }|                     |                                           d{V  dS |j	        t          j        k    rM|j        dk    r"t                              d|j                   dS d	|_        t+          | ||           d{V  dS dS )zParse and route an incoming JSON text frame.

    Args:
        ws: Active WebSocket connection.
        raw: Raw text payload received from client.
        state: Per-connection session state.
        session_id: Per-connection session ID.
    z Malformed JSON (ignored): %.120sNzInvalid hardware event: %szHardware event: %sr5   u0   Re-key_down while listening — resetting bufferr+   u    key_up in '%s' state — ignoredr6   r   )r   r   JSONDecodeErrorrQ   warningr#   r
   rq   rR   r%   valuer   r   r   r+   r7   rE   r)   	send_textmodel_dump_jsonr   handle_pipeline)r   r   r*   rj   payloadeventexcacks           r!   handle_text_framer      s     *S//   93???((((   3SZZ\\BBB KK$el&8999|/1IJJJ<;&&KKJKKK$$&&&"{333ll3..0011111111111	.	.	.<;&&NN=u|LLLF#b%44444444444 
/	.s'    *AAA 
B"-BBr   c                   K   |j         dk    r"t                              d|j                    dS |                                t	          |          z   t
          k    r/t                              d|                                           dS |j                            |           t                              dt	          |          |                                           dS )zBuffer an audio binary frame if in listening state.

    Args:
        ws: Active WebSocket connection.
        data: Raw Opus audio bytes.
        state: Per-connection session state.
    r5   u&   Binary frame in '%s' state — droppedNu/   Audio buffer full (%d bytes) — dropping framezBuffered %d bytes (total: %d))	r+   rQ   r   rB   r;   r   r7   r   rR   )r   r   r*   s      r!   handle_binary_framer     s       |{""?NNNSYY&)???H%J[J[J]J]^^^	d###
KK/TE<M<M<O<OPPPPPr    c                    K   |                                 }|s0t                              d           |                                 dS 	 t	          |           d{V }                     t          d                                                     d{V  d	 fd}t          j	         |                      }	 t          ||           d{V }|                                 n# |                                 w xY w                     t          |                                                     d{V  t          |           d{V }t                              dt          |                                          |           d{V  |                                 dS # |                                 w xY w)
u   Drain buffered audio and run STT → LLM → TTS pipeline.

    Args:
        ws: Active WebSocket connection.
        state: Per-connection session state.
        session_id: Per-connection session ID.
    u.   key_up with empty buffer — skipping pipelineNthinkingr   r8   c                     K   	 t          j        d           d{V  	                      t          d                                                     d{V  n# t
          $ r Y dS w xY wi)z=Send thinking heartbeat every 3 seconds while LLM is working.T   Nr   r   )ra   sleepr   r)   r   	Exception)r   s   r!   	heartbeatz"handle_pipeline.<locals>.heartbeat0  s      mA&&&&&&&&&,,)<<<LLNN          !   EEs   ;A 
A*)A*)r0   z#Pushing %d bytes of audio to clientrJ   )rG   rQ   r   rI   ri   r   r)   r   ra   create_taskr   cancelr.   r   rR   r;   
send_bytes)	r   r*   rj   rF   r0   r   heartbeat_taskr   	tts_audios	   `        r!   r   r     s      E GHHH  '''''''' ll,J???OOQQRRRRRRRRR		 		 		 		 		 		 !,YY[[99	$'j99999999E!!####N!!####ll9%000@@BBCCCCCCCCC &e,,,,,,,,	93y>>JJJmmI&&&&&&&&&s&   A2F+ >C) F+ )C??BF+ +Gz0.1.0)titleversionz/chatc                   K   |                                   d{V  | j        r| j        j         d| j        j         nd}dt	          j                    j        dd          }t                      }t          	                    d||           	 	 | 
                                 d{V }|d         d	k    rn_|                    d
          }|                    d          }|t          | |||           d{V  n|t          | ||           d{V  n# t          $ r Y nw xY wt          	                    d||           dS # t          	                    d||           w xY w)z3Main WebSocket endpoint for XiaoZhi ESP32 hardware.N:unknownzxiaozhi-   z!Client connected: %s (session=%s)Tr$   zwebsocket.disconnectr0   rM   z$Client disconnected: %s (session=%s))acceptclienthostportuuiduuid4hexr2   rQ   rR   receiver   r   r   r   )r   peerrj   r*   message	text_data	byte_datas          r!   websocket_chatr   P  s      ))++359Kbin//ry~///)D2DJLL,RaR022JNNE
KK3T:FFFN	@JJLL((((((Gv"888F++IG,,I$'IujIIIIIIIIII&)"i?????????	@      	:D*MMMMM:D*MMMMs+   BD E 
D$!E #D$$E E#z/healthc                     K   ddiS )zLiveness probe.r+   okr   r   r    r!   healthr   m  s       dr    __main__zmain:appz0.0.0.0i@  T)r   r   reload);r   ra   r   loggingr]   r   enumr   shutilr   typingr   r   rT   fastapir   r   r   pydanticr	   r
   basicConfigINFO	getLoggerrQ   ru   rv   rw   rS   r   r   r   rN   r'   r&   r   r#   r)   r.   r2   rO   WhisperrV   rM   ri   r   r   r   r   r   app	websocketr   r   dictr   r   uvicornrunr   r    r!   <module>r      s;   Z Z Z                          ; ; ; ; ; ; ; ; ; ; / / / / / / / /  
,>    
	+	,	, uZ  .J   'U8__(
.  . . .    S$       I       	       	   " " " " "9 " " "4 *.$& - - -GO    5 S    ,'c 's 's ' ' ' 'T(C (E ( ( ( (\&5&5&5$0&5>A&5	&5 &5 &5 &5RQ) Q5 Q QRV Q Q Q Q(00&0470	0 0 0 0j g$g666 wNY N4 N N N N8 d38n     zNNNGK
dCCCCCC r    