
    9i6n                         d Z ddlZddlZddlmZ ddlmZ ddlmZ ddl	m
Z
  e
e          Zg dZg dZg d	Zg d
Zg dZg dZdZdZ ed          ZdZdZdZ G d d          ZdS )u   Zhihu Playwright Engine — authentication and answer publishing.

Handles:
- Persistent session via zhihu_state.json (cookies)
- QR-code login flow with Slack notification + 2-minute polling
- Answer publishing via headless Chromium + playwright-stealth
    N)Path)Optional)	WebClient)setup_logger)z.SignFlow-qrcode imgz.LoginQrcode imgzcanvas.SignFlow-qrcodeCanvasz.SignFlow-qrcode canvas)u)   div.SignFlow-tab:has-text('扫码登录')u"   li.SignFlow-tab:has-text('扫码')u   span:has-text('扫码登录'))z.AppHeader-profileAvatarz;div[data-za-detail-view-path-module='AppHeader'] img.Avatarz.TopstoryTabs)u   button:has-text('写回答')u   a:has-text('写回答')u   button:has-text('编辑回答')u   a:has-text('编辑回答')u%   [role='button']:has-text('写回答')u(   [role='button']:has-text('编辑回答')z0[data-za-detail-view-element-name='WriteAnswer']z3[data-za-element-name='QuestionDetail_WriteAnswer']z%.QuestionDetailPageHeaderRight buttonz".QuestionDetail-buttonGroup buttonz[class*='WriteAnswer'])z,.DraftEditor-content[contenteditable='true']z".RichText [contenteditable='true']z3[contenteditable='true'].public-DraftEditor-content)u   button:has-text('发布回答')u%   .AnswerForm button:has-text('发布')u%   .AnswerForm button:has-text('提交')u   button:has-text('发布')u   button:has-text('提交')zbutton.SubmitAnswerz%[data-za-element-name='SubmitAnswer']zhttps://www.zhihu.com/signinzhttps://www.zhihu.com/z/tmp/zhihu_qr.pngx         c            	           e Zd ZdZ	 ddededee         ddfdZde	fdZ
d	ed
edefdZde	fdZddZde	fdZde	fdZddZdee         fdZdededdfdZde	fdZddZdeddfdZdefdZdeddfdZdS )ZhihuPlaywrightEnginezPlaywright-powered Zhihu auth and answer publisher.

    Example:
        engine = ZhihuPlaywrightEngine(slack_client, channel_id)
        if engine.ensure_logged_in():
            engine.publish_answer(question_url, answer_text)
    Nslack_clientnotify_channel
state_filereturnc                     || _         || _        |p6t          t                                                    j        d         dz  dz  | _        | j        j                            dd           dS )aC  Initialize the engine.

        Args:
            slack_client: Authenticated Slack WebClient for QR upload.
            notify_channel: Slack channel/DM ID to send QR images to.
            state_file: Override path for session cookie persistence.
                Defaults to <project_root>/data/zhihu_state.json.
           datazzhihu_state.jsonT)parentsexist_okN)	r   r   r   __file__resolver   r   parentmkdir)selfr   r   r   s       @/root/projects/butler/slack_bot/zhihu/zhihu_playwright_engine.py__init__zZhihuPlaywrightEngine.__init___   sq     ),$ 
NN""$$,Q/&8;MM 	 	$$TD$AAAAA    c                    ddl m} ddlm}  |            5 }|j                            d          }|                    dddd	
          }|                                } |                                |           | 	                    |          rl| 
                    |          r<t                              d           |                                 	 ddd           dS t                              d           nt                              d           |                     ||          }|                                 |cddd           S # 1 swxY w Y   dS )zVerify session validity; run QR login flow if cookie is expired.

        Returns:
            True if a valid authenticated session exists after this call.
        r   sync_playwrightStealthTheadless      widthheightoMozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36viewport
user_agentz-Zhihu: session valid (loaded from state file)Nz/Zhihu: saved session expired, starting QR loginz'Zhihu: no state file, starting QR login)playwright.sync_apir   playwright_stealthr!   chromiumlaunchnew_contextnew_pageapply_stealth_sync_load_state_is_logged_inloggerinfoclose_qr_login_flow)r   r   r!   pbrowsercontextpagesuccesss           r   ensure_logged_inz&ZhihuPlaywrightEngine.ensure_logged_inv   s    	877777......_ 	!j'''66G))#'3775 *  G ##%%DGII((... (( G%%d++  KK OPPPMMOOO%	 	 	 	 	 	 	 	& MNNNNEFFF))'488GMMOOO3	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	s   C E$A EEEquestion_urlanswer_textc                    ddl m} ddlm}  |            5 }|j                            d          }|                    dddd	
          }	 |                     |          st          d          |	                                } |            
                    |           ddl}	|	                    d|          }
|
st          d|          |	                    dd|
                    d                                        d          }t                               d|            |                    t&          dd           |                    d           |                    |dd           |                    d           d|j        v s	d|j        v rt          d          |                    d          }|rt          d|           |                     |           |                     ||           |                     |          }|                     |           ||                                 cddd           S # |                                 w xY w# 1 swxY w Y   dS )at  Navigate to a Zhihu question page and publish an answer.

        Args:
            question_url: Full URL of the target Zhihu question.
            answer_text: Plain-text answer body to publish.

        Returns:
            URL of the successfully published answer.

        Raises:
            RuntimeError: If session is not loaded, or DOM interaction fails.
        r   r   r    Tr"   r$   r%   r&   r)   r*   z=No session loaded. Call ensure_logged_in() before publishing.Nzhttps?://\S+zInvalid question URL: z[>\'")\].,;]+$ /zNavigating to question: domcontentloaded:  
wait_untiltimeout  i0u    signinloginuu   知乎 session 已失效，导航时跳转至登录页。请重新点击「确认发布」重新触发扫码登录。uC  () => {
                        const t = document.title || '';
                        const b = document.body ? document.body.textContent : '';
                        return t.includes('404') || b.includes('你访问的页面不存在')
                            || b.includes('页面不存在');
                    }uB   问题页面返回 404，可能该问题已删除或 URL 有误：)r-   r   r.   r!   r/   r0   r1   r4   RuntimeErrorr2   r3   researchsubgrouprstripr6   r7   gotoZHIHU_HOME_URLwait_for_timeouturlevaluate_click_write_answer_fill_answer_submit_answer_save_stater8   )r   r@   rA   r   r!   r:   r;   r<   r=   _re_mis_404
answer_urls                r   publish_answerz$ZhihuPlaywrightEngine.publish_answer   s%    	877777......_ C	 !j'''66G))#'3775 *  G9 ''00 &W   ''))		,,T222 !   ZZ>> R&'P'P'PQQQ"ww'8"bhhqkkJJQQRUVVE|EEFFF
 		.5GQV	WWW%%d+++		,3Eu	UUU%%d+++ tx''7dh+>+>&W      &k]ikk   ((...!!$444!0066
   )))! GC	  C	  C	  C	  C	  C	  C	  C	 F GC	  C	  C	  C	  C	  C	  C	  C	  C	  C	 s*   6I$GI*I$I!!I$$I(+I(c                    | j                                         sdS 	 t          j        | j                             d                    }|                    dg           }|sdS |                    |           t                              dt          |           d| j                     dS # t          $ r(}t                              d|            Y d	}~dS d	}~ww xY w)
zRestore saved cookies into a browser context.

        Args:
            context: Playwright BrowserContext to inject cookies into.

        Returns:
            True if state file existed and cookies were loaded.
        Futf-8encodingcookieszLoaded z cookies from TzState load failed: N)r   existsjsonloads	read_textgetadd_cookiesr6   debuglen	Exceptionwarning)r   r<   r   rf   es        r   r4   z!ZhihuPlaywrightEngine._load_state   s     %%'' 	5
	:do777IIJJDhhy"--G u(((LLP3w<<PPtPPQQQ4 	 	 	NN44455555555	s   AB- $AB- -
C7CCc                 (   |                                 t          j                    d}| j                            t	          j        |dd          d           t                              dt          |d                    d	| j                    d
S )zPersist current browser cookies to the state file.

        Args:
            context: Playwright BrowserContext after successful authentication.
        )rf   saved_atFr	   )ensure_asciiindentrc   rd   zSaved rf   z cookies to N)	rf   timer   
write_textrh   dumpsr6   r7   rn   )r   r<   states      r   r\   z!ZhihuPlaywrightEngine._save_state  s     ((	
 
 	""Ju5;;; 	# 	
 	
 	
 	QSy!122QQQQRRRRRr   c                 N   	 |                     t          dd           t          D ]I}	 |                    |d           t                              d|             dS # t          $ r Y Fw xY wn4# t          $ r'}t                              d|            Y d	}~nd	}~ww xY wd
S )zNavigate to Zhihu home and verify login-specific elements appear.

        Args:
            page: Playwright Page with cookies loaded.

        Returns:
            True if logged-in UI elements are found.
        rE   rF   rG     rI   zLogin confirmed via selector: TzLogin check navigation failed: NF)rT   rU   _LOGIN_CHECK_SELECTORSwait_for_selectorr6   rm   ro   )r   r=   selrq   s       r   r5   z#ZhihuPlaywrightEngine._is_logged_in  s    
	@IIn1CUISSS-  **3*===LL!G#!G!GHHH44    H  	@ 	@ 	@LL>1>>????????	@us:   &A1 4A A1  
A-*A1 ,A--A1 1
B";BB"c                    |                     t          dd           |                     |           t          dt          dz             D ]}t
                              d| dt                      |                     |          }||                     d| d	            d
S | 	                    ||           | 
                    |          r-|                     |           |                     d            dS |t          k     rEt
                              d           |                    d           |                     |           |                     dt           d           d
S )a  Execute the full QR login loop (up to _MAX_QR_ATTEMPTS refreshes).

        Navigates to sign-in, switches to QR tab, screenshots the code,
        uploads to Slack, then polls for login success.

        Args:
            context: Playwright BrowserContext (for saving state on success).
            page: Active Playwright Page.

        Returns:
            True if login succeeded before all attempts exhausted.
        rE   rF   rG      zQR login attempt rD   Nu&   ❌ 无法截取知乎二维码（第 u    次），放弃登录。Fu,   ✅ 知乎登录成功，Cookie 已保存。Tz0QR timed out, refreshing page for new QR code...)rH   u,   ❌ 知乎二维码扫码超时（已重试 u>    次）。
请稍后重新发起 `mode zhihu-hunter` 流程。)rT   ZHIHU_SIGNIN_URL_switch_to_qr_tabrange_MAX_QR_ATTEMPTSr6   r7   _capture_qr_notify_slack_send_qr_to_slack_poll_for_loginr\   reload)r   r<   r=   attemptqr_paths        r   r9   z$ZhihuPlaywrightEngine._qr_login_flow2  s    			"/A5	QQQt$$$Q 01 455 	- 	-GKKHGHH6FHHIII&&t,,G""#nG#n#n#nooouu""7G444##D))   )))""#QRRRtt )))NOOO'9:::&&t,,,B;K B B B	
 	
 	
 ur   c                 .   t           D ]}	 |                    |          j        }|                    d          rI|                                 |                    d           t                              d|             dS }# t          $ r Y w xY wdS )zClick the QR-code tab on the login page if it exists.

        Args:
            page: Page showing the Zhihu sign-in form.
        rJ   r|   r%   zSwitched to QR tab via: N)	_QR_TAB_SELECTORSlocatorfirst
is_visibleclickrV   r6   rm   ro   )r   r=   r   tabs       r   r   z'ZhihuPlaywrightEngine._switch_to_qr_tab]  s     % 		 		Cll3''->>$>// IIKKK))#...LL!AC!A!ABBBFF	
    		 		s   A6B
BBc                 D   t           D ]}	 |                    |d           |                    |          j        }|                    t          t                               t                              d| dt                      t          c S # t          $ r Y w xY wt          
                    d           	 |                    t          t                    d           t          S # t          $ r(}t                              d	|            Y d
}~d
S d
}~ww xY w)aL  Screenshot the QR code element and save it to a temp file.

        Tries specific element selectors first; falls back to a full-page
        screenshot if none match.

        Args:
            page: Page currently showing the QR code.

        Returns:
            Path to the saved PNG file, or None on complete failure.
        @  r|   )pathzQR captured via selector 'u   ' → zAElement QR selectors failed; falling back to full-page screenshotFr   	full_pagez"Full-page screenshot also failed: N)_QR_IMAGE_SELECTORSr~   r   r   
screenshotstr_QR_TMP_PATHr6   r7   ro   rp   error)r   r=   r   elemrq   s        r   r   z!ZhihuPlaywrightEngine._capture_qrn  s5    ' 	 	C&&sD&999||C((.S%6%6777RRRLRRSSS####    	Z[[[	OO\!2!2eODDD 	 	 	LLAaAABBB44444	s*   BB
BB=/C- -
D7DDr   r   c           	      l   d| dt            d}	 |                    d          5 }| j                            | j        |dd| d|           d	d	d	           n# 1 swxY w Y   t
                              d
           d	S # t          $ r(}t
                              d|            Y d	}~d	S d	}~ww xY w)zUpload the QR code PNG to Slack with instructions.

        Args:
            qr_path: Local path to the QR code image.
            attempt: Current attempt number shown in the message.
        u@   🔑 *请用知乎 App 扫描以下二维码完成登录*（第 rD   uE    次）
⏱ 二维码有效期约 2 分钟，超时将自动刷新。rbzzhihu_qr.pngu   知乎登录二维码（第 u    次）)channelfilefilenametitleinitial_commentNzQR image uploaded to SlackzSlack QR upload failed: )	r   openr   files_upload_v2r   r6   r7   ro   r   )r   r   r   captionfrq   s         r   r   z'ZhihuPlaywrightEngine._send_qr_to_slack  s=   Mw M MYi M M M 		9d## q!11 /+IIII$+ 2                  KK455555 	9 	9 	9LL7A77888888888	9s:   B )AB AB !A"B 
B3B..B3c                    t          j                    t          z   }t          j                    |k     r	 |                    d t          dz             t
                              d|j                    dS # t          $ rD t          |t          j                    z
            }t
          
                    d| d           Y nw xY wt          j                    |k     dS )	a.  Poll until the page leaves the sign-in URL or timeout expires.

        Checks every _POLL_INTERVAL_S seconds for up to _LOGIN_TIMEOUT_S total.

        Args:
            page: Page that started at ZHIHU_SIGNIN_URL.

        Returns:
            True if login was detected before the deadline.
        c                     d| v od| vS )Nz	zhihu.comrL    )rW   s    r   <lambda>z7ZhihuPlaywrightEngine._poll_for_login.<locals>.<lambda>  s    s 2 Jxs7J r   i  r|   u"   Login confirmed — redirected to TzWaiting for QR scan... zs leftF)rv   	monotonic_LOGIN_TIMEOUT_Swait_for_url_POLL_INTERVAL_Sr6   r7   rW   ro   intrm   )r   r=   deadline	remainings       r   r   z%ZhihuPlaywrightEngine._poll_for_login  s     >##&66n))
J!!JJ,t3 "    KKKLLLt J J J4>+;+; ;<<	HyHHHIIIIIJ n)) us   AA8 8ACCc                 6   |                     d           |                    d           t          D ]}	 |                    |          j        }|                    dd           |                                 |                    d           t                              d|             dS # t          $ r Y w xY w|                     d	          }t          
                    d
|dd                     |                     d          }|r5|                    d           t                              d| d           dS 	 ddl}t          |                    d                    }|                    t!          |          d           | j                            | j        t!          |          dd           |                    d           n4# t          $ r'}t          
                    d|            Y d}~nd}~ww xY wt+          d          )u   Find and click the 写回答 button on a question page.

        Args:
            page: Page showing a Zhihu question.

        Raises:
            RuntimeError: If no write-answer button is found by any strategy.
        zwindow.scrollBy(0, 400)r%   visiblerK   ry   rI   i  u   Clicked '写回答' via: Na  () => {
                const els = document.querySelectorAll('button, a, [role="button"]');
                return Array.from(els)
                    .map(e => e.textContent.trim())
                    .filter(t => t.length > 0 && t.length < 30);
            }u.   '写回答' not found. Buttons/links on page:    u'  () => {
                const TARGETS = new Set(['写回答','回答问题','回答','发起回答','编辑回答']);
                const ZW_RE = /[​‌‍﻿ ]/g;
                const walker = document.createTreeWalker(
                    document.body, NodeFilter.SHOW_TEXT
                );
                let node;
                while ((node = walker.nextNode())) {
                    const cleaned = node.textContent.replace(ZW_RE, '').trim();
                    if (TARGETS.has(cleaned)) {
                        const el = node.parentElement;
                        const btn = el.closest('button, a, [role="button"]') || el;
                        btn.click();
                        return cleaned;
                    }
                }
                return null;
            }u2   Clicked '写回答' via text-node fallback (text='z')r   z.png)suffixFr   zzhihu_debug.pngu   🔍 发布调试截图：找不到「写回答」按钮，请确认页面状态（是否有弹窗/登录墙/按钮文字变了）。)r   r   r   r   T)
missing_okz Debug screenshot upload failed: u~   Could not find '写回答' button. Zhihu DOM may have changed — update _WRITE_ANSWER_SELECTORS in zhihu_playwright_engine.py)rX   rV   _WRITE_ANSWER_SELECTORSr   r   wait_forr   r6   rm   ro   rp   r7   tempfiler   mktempr   r   r   r   r   unlinkrN   )	r   r=   r   btn	all_textsclickedr   tmp_ss_ss_errs	            r   rY   z)ZhihuPlaywrightEngine._click_write_answer  sg    	/000c"""* 		 		Cll3''-9d;;;		%%d+++>>>???    MM
 
	 	X	RUSURUXXYYY --
 
(  	!!$'''KKXWXXXYYYF	IOOO(///8899FOOVO>>>--+[[*a .    MMTM**** 	I 	I 	INNGgGGHHHHHHHH	I K
 
 	
s+   A7B//
B<;B<BG 
H	"HH	textc                    d}t           D ]b}	                     |d                               |          j        }t                              d|             n# t          $ r Y _w xY w|t          d          |                                 	                    d           dt          ffd}                    d	|           	                    d
            |            t          |          dz  k    r-t                              dt          |           d           dS t                              d                               d|           	                    d
            |            t          |          dz  k    r-t                              dt          |           d           dS t                              d           |                                 j                            d           j                            d           j                            |d           	                    d           t                              dt          |           d           dS )u  Input answer text into Zhihu's Draft.js rich-text editor.

        Tries three strategies in order, verifying content after each attempt:
          1. execCommand('insertText') — most reliable for contenteditable
          2. Synthetic ClipboardEvent paste
          3. keyboard.type() — slow but universally compatible

        Args:
            page: Page with the answer editor open.
            text: Plain-text answer to insert.
        Nr   r|   zFound editor via: zTCould not find answer editor. Update _EDITOR_SELECTORS in zhihu_playwright_engine.pyi  r   c                  .                          d          S )z1Return current character count inside the editor.az  () => {
                    const ed = document.querySelector(
                        '.DraftEditor-content[contenteditable="true"],'
                        + '.RichText [contenteditable="true"],'
                        + '[contenteditable="true"].public-DraftEditor-content'
                    );
                    return ed ? ed.textContent.length : 0;
                })rX   )r=   s   r   _content_lengthz;ZhihuPlaywrightEngine._fill_answer.<locals>._content_length8  s    ==	 	 	r   a  (text) => {
                const ed = document.querySelector(
                    '.DraftEditor-content[contenteditable="true"],'
                    + '.RichText [contenteditable="true"],'
                    + '[contenteditable="true"].public-DraftEditor-content'
                );
                if (!ed) return false;
                ed.focus();
                document.execCommand('selectAll');
                document.execCommand('insertText', false, text);
                return true;
            }iX  r	   zFilled z! chars via execCommand insertTextz:execCommand insertText insufficient; trying ClipboardEventab  (text) => {
                const ed = document.querySelector(
                    '.DraftEditor-content[contenteditable="true"],'
                    + '.RichText [contenteditable="true"],'
                    + '[contenteditable="true"].public-DraftEditor-content'
                );
                if (!ed) return;
                ed.focus();
                const dt = new DataTransfer();
                dt.setData('text/plain', text);
                ed.dispatchEvent(new ClipboardEvent('paste', {
                    bubbles: true, cancelable: true, clipboardData: dt
                }));
            }z chars via ClipboardEventzAClipboardEvent also insufficient; falling back to keyboard.type()z	Control+aDelete   )delayz chars via keyboard.type())_EDITOR_SELECTORSr~   r   r   r6   rm   ro   rN   r   rV   r   rX   rn   r7   rp   keyboardpresstype)r   r=   r   editorr   r   s    `    r   rZ   z"ZhihuPlaywrightEngine._fill_answer  s    $ 	 	C&&sD&999c**07#77888    >I  
 	c"""	 	 	 	 	 	 	 	 	
 	
 	
 	c"""?D		Q..KKN#d))NNNOOOF 	STTT 	
 	
 	
" 	c"""?D		Q..KKF#d))FFFGGGF 	Z[[[K(((H%%%4q)))c"""Cc$iiCCCDDDDDs   AA
A+*A+c                    t           D ]}	 |                    |          j        }|                    dd           |                                 t
                              d|            |                    d           |                    d          }|r!t
                              d|            |c S t
                              d|j	                    |j	        c S # t          $ r Y w xY wt          d	          )
u	  Click the publish button and return the answer URL.

        After clicking 发布, waits for the page to update then extracts the
        answer URL (``/question/<id>/answer/<id>``) from the DOM.  Falls back
        to the question URL if no answer link is found.

        Args:
            page: Page with the answer text already filled in.

        Returns:
            URL of the published answer, or question URL as fallback.

        Raises:
            RuntimeError: If the publish button cannot be found.
        r   i  r   zClicked publish via: r{   aH  () => {
                        // Look for a canonical answer link on the page
                        const links = Array.from(
                            document.querySelectorAll('a[href*="/answer/"]')
                        );
                        // Prefer links that point to the current question
                        const sorted = links
                            .map(a => a.href)
                            .filter(h => /\/question\/\d+\/answer\/\d+/.test(h));
                        return sorted.length ? sorted[sorted.length - 1] : null;
                    }zAnswer published: z!Answer submitted (URL fallback): zVCould not find publish button. Update _PUBLISH_SELECTORS in zhihu_playwright_engine.py)_PUBLISH_SELECTORSr   r   r   r   r6   r7   rV   rX   rW   ro   rN   )r   r=   r   r   r`   s        r   r[   z$ZhihuPlaywrightEngine._submit_answer}  s-     & 	 	Cll3''-9d;;;		9C99:::%%d+++ "]]
 
  &KK AZ A ABBB%%%% JJJKKKx    F
 
 	
s   B,C%:(C%%
C21C2messagec                     	 | j                             | j        |           dS # t          $ r(}t                              d|            Y d}~dS d}~ww xY w)zSend a plain text message to the notify channel.

        Args:
            message: Message text (supports Slack mrkdwn formatting).
        )r   r   zSlack notify failed: N)r   chat_postMessager   ro   r6   r   )r   r   rq   s      r   r   z#ZhihuPlaywrightEngine._notify_slack  s    	6..+ /       	6 	6 	6LL444555555555	6s   !% 
AAA)N)r   N)__name__
__module____qualname____doc__r   r   r   r   r   boolr?   ra   r4   r\   r5   r9   r   r   r   r   r   rY   rZ   r[   r   r   r   r   r   r   V   s$         &*	B BB B TN	B
 
B B B B."$ " " " "HS 3 S S S S S  S  S  S nd    .S S S S$T    0)t ) ) ) )V   "8D>    @9 9 9 9 9 9 90t    :S
 S
 S
 S
jcEs cEt cE cE cE cEJ4
c 4
 4
 4
 4
p6S 6T 6 6 6 6 6 6r   r   )r   rh   rv   pathlibr   typingr   	slack_sdkr   health.utils.logging_configr   r   r6   r   r   r}   r   r   r   r   rU   r   r   r   r   r   r   r   r   <module>r      sQ                        4 4 4 4 4 4	h		                   2 +D,--   k	6 k	6 k	6 k	6 k	6 k	6 k	6 k	6 k	6 k	6r   