
    iq                        U d Z ddlZddlZddlZddlmZmZmZ ddlmZ ddl	m
Z
mZ ddlmZmZ ddlmZ ddlmZ dd	lmZ dd
lmZ ddlmZ  ee          Zg dZe
e         ed<   dZdddddddZe eef         ed<   dZ!dZ"dZ#dZ$ ej%        d          Z& G d de          Z' G d de          Z( G d d          Z)dS ) u{  Zhihu Hunter — vault scanning, question discovery, and answer drafting.

Workflow:
    1. scan_and_hunt()  →  scans recent vault files, extracts keywords,
                           searches Tavily for matching Zhihu questions.
    2. draft_answer()   →  uses existing RAG + ZhihuGenerator to produce
                           a Zhihu-style answer draft ready for review.
    N)datetimetimezone	timedelta)Path)ListOptional)	BaseModelField)setup_logger)	GeminiLLM)ZhihuGenerator)ObsidianIndexer)ChromaVectorStore)notesArticleauto_report/deep-diveCLAW/deepfact	Clippings
WATCH_DIRSr   r   r   r   r   )r   article	clippingsclawdeepfactz	deep-diveDIR_SHORTCUTS      
      zzhihu\.com/question/\d+c                       e Zd ZU dZeed<   eed<   dZee         ed<    ee	          Z
ee         ed<    ed           Zeed	<   dZee         ed
<   dZee         ed<   dZee         ed<   dS )ZhihuQuestionz2A Zhihu question candidate surfaced by the hunter.titleurlNsnippetdefault_factorykeywordsc                  d    t          j        t          j                                                  S Ntzr   nowr   utc	isoformat     5/root/projects/butler/slack_bot/zhihu/zhihu_hunter.py<lambda>zZhihuQuestion.<lambda>E   !     = = = G G I I r0   found_atdescriptionsource_fileoutline)__name__
__module____qualname____doc__str__annotations__r#   r   r
   listr&   r   r4   r5   r6   r7   r/   r0   r1   r    r    =   s         <<JJJ	HHH!GXc]!!!%555Hd3i555EII  Hc    "&K#%%%!%K#%%%!GXc]!!!!!r0   r    c                       e Zd ZU dZeed<   eed<    ed           Zeed<    ee	          Z
ee         ed<   dZee         ed	<   dS )
AnswerDraftz/A generated answer draft awaiting human review.questioncontentc                  d    t          j        t          j                                                  S r(   r+   r/   r0   r1   r2   zAnswerDraft.<lambda>R   r3   r0   r$   generated_atrag_sourcesN
vault_file)r8   r9   r:   r;   r    r=   r<   r
   rD   r>   rE   r   rF   r   r/   r0   r1   r@   r@   L   s         99LLLII  L#    #U4888Kc888 $J$$$$$r0   r@   c            
       $   e Zd ZdZdedededdfdZ	 	 	 d)d
ede	e
e                  dede
e         fdZdede
e         fdZdede
e         fdZdede
e         fdZdedeee	e         f         fdZdedefdZd*dededefdZ	 	 d+d
ede	e
e                  dede
e         fdZ	 d,dede	e
e                  de
e         fdZ	 	 d-de
e         d ed!edefd"Z	 d*d#edede
e         fd$Zd%e
e         de
e         fd&Zd'ede
e         fd(ZdS ).ZhihuHunterzDiscovers Zhihu questions from vault content and drafts answers.

    Example:
        hunter = ZhihuHunter(vault_path, vector_store, indexer)
        questions = hunter.scan_and_hunt(since_days=3)
        draft = hunter.draft_answer(questions[0])
    
vault_pathvector_storeindexerreturnNc                 V    || _         || _        || _        t                      | _        dS )a  Initialize the hunter.

        Args:
            vault_path: Root path of the Obsidian vault.
            vector_store: ChromaDB vector store for semantic retrieval.
            indexer: ObsidianIndexer for keyword-based search and writing samples.
        N)rI   rJ   rK   r   llm)selfrI   rJ   rK   s       r1   __init__zZhihuHunter.__init__c   s)     %(;;r0       
since_daysdirs
topic_hintc                    d|pd |rd| dndz   }t                               d| d           |                     |||          }|st                               d	           g S t                               d
|            |                     |          }t                               dt	          |           d           |S )a  Scan vault files and surface matching Zhihu questions.

        Args:
            since_days: Age threshold for non-Article directories.
            dirs: Restrict scan to these vault dirs (relative paths). None = all WATCH_DIRS.
            topic_hint: Optional focus hint injected into keyword extraction prompt.

        Returns:
            List of ZhihuQuestion candidates (up to _MAX_QUESTIONS_TOTAL).
        zdirs=allz	, topic=''rR   zZhihuHunter: scanning vault ())rT   rU   z&No keywords extracted from vault fileszExtracted keywords: Found z Zhihu questions)loggerinfo_scan_vault_for_keywords_find_questionslen)rO   rS   rT   rU   	scan_descr&   	questionss          r1   scan_and_huntzZhihuHunter.scan_and_huntw   s      ,DME++J/^/H:/H/H/H/H\^_	@I@@@AAA00$S]0^^ 	KK@AAAI5855666((22	=S^^===>>>r0   rel_pathc                 J   | j         |z  }|                                st          d|           |                    dd          }t	          j        dd|t          j                                                  }t          	                    d| d	t          |           d
           |                     |dd                   }|st          	                    d           g S t          	                    d|            |                     |          }|D ]	}||_        
|S )a  Extract keywords from one vault file and find matching Zhihu questions.

        The returned questions carry ``source_file`` set to ``rel_path`` so that
        ``draft_answer()`` will ground the generated answer in that article.

        Args:
            rel_path: Path relative to vault_path (e.g. "Article/8-xxx-Revised.md").

        Returns:
            List of ZhihuQuestion candidates with source_file populated.

        Raises:
            FileNotFoundError: If the file does not exist under the vault root.
        u   文件不存在：utf-8ignoreencodingerrors^---\n.*?\n---\nrR   flagszscan_single_file:  ( chars)N  z&No keywords extracted from single filezKeywords from file: )rI   existsFileNotFoundError	read_textresubDOTALLstripr[   r\   r_   _extract_keywords_with_llmr^   r6   )rO   rc   	file_pathtextr&   ra   qs          r1   scan_single_filezZhihuHunter.scan_single_file   s.    Oh.	!! 	F#$D$D$DEEE""GH"EEv)2t29EEEKKMMGGGSYYGGGHHH224;?? 	KK@AAAI5855666((22	 	% 	%A$AMMr0   patternc           	         t          | j                            |          d d          }|st          d|           d |D             }t                              d| dt          |           d|            g }t          |          d	k    rd
nd}|D ]}	 |                    dd          }t          j	        dd|t          j
                                                  }|                    d|j         d|d|                     w# t          $ r*}t                              d| d|            Y d}~d}~ww xY wd                    |          }	|                     |	dd                   }
|
st                              d           g S t                              d|
            |                     |
          }t'          |d                             | j                            }|D ]	}||_        
|S )aX  Find vault files matching a glob pattern and hunt Zhihu questions.

        All matched files are combined for keyword extraction, so the questions
        reflect the full set of content.  The most-recently-modified matched file
        is stored as ``source_file`` to ground the answer draft.

        Args:
            pattern: Glob pattern relative to vault_path
                     (e.g. "Article/*openclaw*-Revised.md").

        Returns:
            List of ZhihuQuestion candidates with source_file set.

        Raises:
            FileNotFoundError: If no files match the pattern.
        c                 4    |                                  j        S N)statst_mtime)ps    r1   r2   z-ZhihuHunter.scan_glob_files.<locals>.<lambda>   s    !&&((+ r0   T)keyreverseu   没有文件匹配：c                     g | ]	}|j         
S r/   )name).0r   s     r1   
<listcomp>z/ZhihuHunter.scan_glob_files.<locals>.<listcomp>   s    )))A)))r0   zscan_glob_files: 'u   ' → z files:    i     re   rf   rg   rj   rR   rk   []
NCould not read : 

---

@  z-No keywords extracted from glob-matched fileszKeywords from glob: r   )sortedrI   globrq   r[   r\   r_   rr   rs   rt   ru   rv   appendr   OSErrordebugjoinrw   r^   r<   relative_tor6   )rO   r|   matchednamespartsper_file_budgetfpry   ecombinedr&   ra   primaryrz   s                 r1   scan_glob_fileszZhihuHunter.scan_glob_files   sM   " O  ))++
 
 

  	G#$EG$E$EFFF)))))UUUGUUeUUVVV "%g,,!"3"3$$ 	: 	:B:||WX|FFv12t29MMMSSUUEEET2B?2B-CEEFFFF : : :8r88Q8899999999: !%%e,,228ETE?CC 	KKGHHHI5855666((22	 gaj,,T_==>> 	$ 	$A#AMMs   A3D
E D==Er"   c           
         t          j        dd|                              d                              d          d                             d          d                             d          }|                     |          \  }}t                              d|d	|d
t          |pd           d           t          |||          gS )ak  Create a question card directly from a Zhihu question URL.

        Used when the user pastes a URL they were invited to answer.
        Fetches the page title and description so the card displays readable info.

        Args:
            url: A zhihu.com/question/<id> URL.

        Returns:
            Single-element list containing the ZhihuQuestion.
        [>\'")\].,;]+$rR   <?r   #/zDirect URL hunt: u    → title=z, desc=z chars)r!   r"   r5   )
rs   rt   lstripsplitrstrip_fetch_question_infor[   r\   r_   r    )rO   r"   	clean_urlr!   r5   s        r1   hunt_direct_urlzZhihuHunter.hunt_direct_url   s     F,b#66==cBBHHMMaPVVWZ[[\]^eefijj	!66yAA{n	nnnnPST_TecePfPfnnnoooEykRRRSSr0   c                 N   t          t                                                    j        d         dz  dz  }	 ddlm} ddlm} ddl} |            5 }|j	        
                    d	          }|                    d
          }|                                rV|                    |                    d                    }	|	                    dg           }
|
r|                    |
           |                                } |                                |           |                    |dd           |                    d           d}|                    d          }|r&|                                                                }|sC|                                pd}|                    d          d                                         }t5          j        dd|          }d}|                    d          }|rv|                                                                }|                    dd                                          }t5          j        dd|                                          }|sh|                    d          }|rQ|                    d          pd                                }t5          j        dd|                                          }|rd|v rd}|r|dd         }nd}|                                 |r|dk    r||fcddd           S ddd           n# 1 swxY w Y   n7# t>          $ r*}t@          !                    d| d |            Y d}~nd}~ww xY w|"                    d!                              d!          d"         }d#| dfS )$a  Fetch the question title and description via Playwright.

        Falls back to a URL-derived label if Playwright is unavailable or fails.

        Args:
            url: Cleaned question URL.

        Returns:
            Tuple of (title, description). Description may be None.
           datazzhihu_state.jsonr   )sync_playwright)StealthNT)headlesszoMozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36)
user_agentre   )rh   cookiesdomcontentloadedi:  )
wait_untiltimeouti  rR   zh1.QuestionHeader-titleu	    - 知乎u   ^\(\d+ 条消息\)\s*zdiv.QuestionRichTextu   ​u   …?\s*显示全部\s*$zmeta[property="og:description"]rB   u   …$u   知乎，中文互联网u   知乎z%Playwright question fetch failed for r   r   u   知乎问题 #)#r   __file__resolveparentsplaywright.sync_apir   playwright_stealthr   jsonchromiumlaunchnew_contextrp   loadsrr   getadd_cookiesnew_pageapply_stealth_syncgotowait_for_timeoutquery_selector
inner_textrv   r!   r   rs   rt   replaceget_attributeclose	Exceptionr[   r   r   )rO   r"   
state_filer   r   _jsonr   browsercontextr   r   pager!   title_elr5   desc_elog_elr   qids                      r1   r   z ZhihuHunter._fetch_question_info  sK    (^^++--5a86ADVV
?	M;;;;;;222222     "" 8.a*++T+::!--9 .   $$&& 5 ;;z';';W';'M'MNND"hhy"55G 5++G444''))		,,T222		#*<e	LLL%%d+++ ../HII :$//117799E @ JJLL.BE!KK44Q7==??E7UCC #--.DEE ^")"4"4"6"6"<"<">">K"-"5"5h"C"C"I"I"K"KK"$&)CR"U"U"["["]"]K" O //0QRRE O',':':9'E'E'K&R&R&T&T&(fWb+&F&F&L&L&N&N '#=#L#L"&K '"-ete"4KK"&K .Uh.. +-q8. 8. 8. 8. 8. 8. 8. 8. 8. 8. 8. 8. 8. 8. 8. 8. 8. 8. 8. 8. 8. 8. 8.r  	M 	M 	MLLKKKKKLLLLLLLL	M jjoo##C((,%%%t++sB   L; KL/L; #L; /L33L; 6L37L; ;
M/ M**M/queryc                    t           j                            d          }|sdS 	 ddlm}  ||          }|                    |dddd	g
          }|                    dg           }|sdS d |D             }d                    |          S # t          $ r(}t          	                    d|            Y d}~dS d}~ww xY w)a  Search the web for recent articles/news about the query topic.

        Results are injected into the answer prompt so it reads like it
        comes from someone who's actively following the latest developments.

        Args:
            query: Topic or question title to search.

        Returns:
            Formatted string of recent snippets, or empty string on failure.
        TAVILY_API_KEYrR   r   TavilyClientapi_keybasicr   Z   	zhihu.com)r   search_depthmax_resultsdaysexclude_domainsresultsc                     g | ]N}d |d          d|d                              d          d          d|                    dd          d	d
          OS )z- r!   rm   r"   r      z): rB   rR   N   )r   r   )r   rs     r1   r   z9ZhihuHunter._fetch_recent_web_context.<locals>.<listcomp>n  st        [QwZZZ1U8>>##6#6q#9ZZaeeIr>R>RSWTWSW>XZZ  r0   
z!Recent web context fetch failed: N)
osenvironr   tavilyr   searchr   r   r[   r   )	rO   r   r   r   clientr   itemsr   r   s	            r1   _fetch_recent_web_contextz%ZhihuHunter._fetch_recent_web_contextR  s    *..!122 	2	++++++!\'222Fmm$!, $  G KK	2..E r   E 99U### 	 	 	LL@Q@@AAA22222	s   AB , B 
B?B::B?rA   feedbackc           	         t                               d|j         |rd|dd          dndz              g }| j                            |j        d          }|D ].}|                    t          |j                  j                   /| j	        
                    |j        d	
          }|D ]Y}|                    d          d                             d                                          }|r|                    |           Zd}	|j        r| j        |j        z  }
	 |
                    dd          }t!          j        dd|t           j                                                  }|dd         }	|                    dt          |j                  j                   t                               d|j         dt)          |	           d           n<# t*          $ r/}t                               d|j         d|            Y d}~nd}~ww xY wt/          | j	        | j                  }|j        }|j        r|d|j         z  }|j        r|d|j         z  }|j        r7|d|j         z  }t                               d|j        dd                    |	r|d|	 z  }|                     |j                  }|r3|d| z  }t                               d t)          |           d           |r|d!| z  }|                    |g "          \  }}t;          ||t=          t>                               |                    #          S )$u  Generate a Zhihu-style answer draft for a question.

        Delegates to the existing ZhihuGenerator (reuses prompt + style guide).
        The RAG context is enriched by both semantic (ChromaDB) and
        keyword-based (ObsidianIndexer) retrieval.

        Args:
            question: ZhihuQuestion to answer.
            feedback: Optional revision instructions from the user (e.g. "更口语化一些").

        Returns:
            AnswerDraft with generated content and source list.
        zDrafting answer for: z [feedback: N(   ]rR   r   )top_k   )limitr   r   z# re   rf   rg   rj   rk   i  zLoaded primary article: rm   rn   zCould not read source file r   u   

问题详细描述：u    | 背景：u   

用户提供的回答思路（必须严格按照这个逻辑框架展开，从知识库中找到对应的内容来支撑每个论点，不要自由发挥框架结构）：
zInjected user outline: <   ur   

主要参考文章（请基于此文的观点和论据来回答，可直接引用其中的分析和结论）：
u   

近期相关资讯（最近90天内的动态，用来补充最新工具/发布/讨论，让回答更有时效感，自然地融入即可，不要刻意罗列）：
zInjected recent web context (u   

用户修改意见：)history)rA   rB   rE   )!r[   r\   r!   rJ   search_knowledger   r   source_pathr   rK   r   r   rv   r6   rI   rr   rs   rt   ru   insertr_   r   warningr   r5   r#   r7   r   chatr@   r>   dictfromkeys)rO   rA   r   rE   semantic_resultsr   keyword_resultsnote
first_lineprimary_articler   rawr   	generatorprompt_inputrecent_contextrB   _s                     r1   draft_answerzZhihuHunter.draft_answerw  s    	<HN<<ck@s@_xX[Y[X[}@_@_@_@_qstuuu "$,==hnTU=VV! 	9 	9AtAM2278888,--hnA-FF# 	/ 	/DD))!,22488>>@@J /"":...  		Z8#77BZllGHlEEf0"cKKKQQSS"%ete*""1d8+?&@&@&EFFFlx7KllsSbOcOclllmmmm Z Z ZXX=QXXUVXXYYYYYYYYZ #4<1BCC	~ 	ON8LNNNL 	>=8+;===L  	MHu}  vFH HL KKK(2B3B32GKKLLL 	TBQT TL 77GG 	Vq`nq qL KKTN8K8KTTTUUU 	CBBBBL^^L"^==
T]];7788
 
 
 	
s   !B5G 
H!%HHc                 &   t          j        t          j                  t	          |          z
  }|                     ||          }|sg S |pt          }|t          gk    rdnd}|                     ||          }| 	                    ||          S )am  Collect vault files and extract topic keywords via LLM.

        Args:
            since_days: Age threshold for non-Article directories.
            dirs: Dirs to scan; None = all WATCH_DIRS.
            topic_hint: Optional focus instruction for keyword extraction.

        Returns:
            Deduplicated list of topic keywords (up to _MAX_KEYWORDS).
        r)   )r   )rT   r     )max_chars_per_file)rU   )
r   r,   r   r-   r   _collect_recent_filesr   _ARTICLE_DIR_read_files_summaryrw   )	rO   rS   rT   rU   cutofffiles	scan_dirsr   r   s	            r1   r]   z$ZhihuHunter._scan_vault_for_keywords  s      ...
1K1K1KK**6*== 	I &J	!*|n!<!<###++Eo+VV..xJ.OOOr0   r  c                 T   ||nt           }g }|D ]h}| j        |z  }|                                st                              d|            ?g }|                    d          D ]}	 |                                j        }	|t          k    r't          j
        |	t          j                  }
|
|k     rN|j                                                            d          }|                    |	||f           # t"          $ r Y w xY w|t          k    r|                    d            t&          }n|                    d            t(          }|                    d	 |d|         D                        jt                              d
t-          |           d| d           |S )av  Walk vault directories and return candidate files.

        Article directory:
          - No time filter (always included regardless of cutoff)
          - Revised files sorted first, then by mtime descending
          - Up to _MAX_FILES_ARTICLE slots
        Other directories:
          - Only files modified after cutoff
          - Sorted by mtime descending
          - Up to _MAX_FILES_PER_DIR slots

        Args:
            cutoff: Freshness threshold for non-Article directories.
            dirs: Directories to scan (relative paths). None = all WATCH_DIRS.

        Returns:
            List of Path objects.
        NzWatch dir not found, skipping: z*.mdr)   z-revised.mdc                 ,    | d         rdnd| d          fS )Nr   r   r/   ts    r1   r2   z3ZhihuHunter._collect_recent_files.<locals>.<lambda>  s    AaD/?qqa!A$.G r0   )r   c                     | d          S )Nr   r/   r  s    r1   r2   z3ZhihuHunter._collect_recent_files.<locals>.<lambda>  s    qte r0   c              3   "   K   | ]
\  }}}|V  d S r   r/   )r   r  r   s      r1   	<genexpr>z4ZhihuHunter._collect_recent_files.<locals>.<genexpr>!  s(      EE71aQEEEEEEr0   z
Collected z files from vault (dirs=rY   )r   rI   is_dirr[   r   rglobr   r   r  r   fromtimestampr   r-   r   lowerendswithr   r   sort_MAX_FILES_ARTICLE_MAX_FILES_PER_DIRextendr_   )rO   r  rT   r  	collectedrel_dir	watch_dir
candidatesmd_filemtimefile_dt
is_revised	max_filess                r1   r  z!ZhihuHunter._collect_recent_files  s   . !,DD*	 "	  	F 	FG'1I##%% JyJJKKK :<J$??622 
 
	#LLNN3E,.."*"88<"P"P"P"V++$!(!3!3!5!5!>!>}!M!MJ%%uj'&BCCCC   H ,&&$G$GHHH.		OO444.	EEj).DEEEEEEEV#i..VV)VVVWWWs   )A
C94AC99
DDr   r  r  	max_charsr  c                    g }|}|D ]}|dk    r n	 |                     dd          }t          j        dd|t          j                                                  }|dt          ||                   }|                    d	|j         d
|            |t          |          z  }# t          $ r*}	t                              d| d|	            Y d}	~	d}	~	ww xY wd                    |          S )a  Read and concatenate file contents up to a character budget.

        Each file is capped at max_chars_per_file so that no single large file
        exhausts the total budget and crowds out other directories.

        Args:
            files: List of markdown file paths.
            max_chars: Total character budget across all files.
            max_chars_per_file: Per-file cap to ensure directory diversity.

        Returns:
            Concatenated content string with file-name headers.
        r   re   rf   rg   rj   rR   rk   Nr   r   r   r   r   )rr   rs   rt   ru   rv   minr   r   r_   r   r[   r   r   )
rO   r  r0  r  r   	remainingfry   chunkr   s
             r1   r  zZhihuHunter._read_files_summary&  s%   & 	 	9 	9AA~~9{{GH{EEv12t29MMMSSUU@c"4i@@@A333E33444SZZ'		 9 9 97q77A77888888889 !!%(((s   BB((
C2 CCcombined_textc                 p   |rd| dnd}dt            d| d|dd          d	}	 | j                            |g           \  }}d
 |                                                                D             }|dt                    S # t
          $ r)}t                              d|            g cY d}~S d}~ww xY w)u,  Use LLM to distill topic keywords from aggregated vault content.

        Args:
            combined_text: Concatenated recent vault content.
            topic_hint: Optional focus instruction (e.g. "agent开发").

        Returns:
            List of keyword strings (up to _MAX_KEYWORDS).
        u   
- 重点关注与「u9   」相关的话题，优先提取这个方向的关键词rR   u   你是一位知乎内容运营专家。请从以下笔记内容中提炼出适合在知乎搜索问题的关键词。

要求：
- 输出 u@   个以内的关键词，每行一个，无序号无符号
- 关键词要在知乎上有真实讨论热度，即普通中国用户会关心并提问的话题领域
- 避免过于偏门的专有名词（如具体项目代号、小众产品名），要用它所属的上一级概念
  例：「OpenClaw」→「AI智能体安全」；「BadJs」→「微信监控机制」；「Moon Monitor」→「健康监测设备」
- 不要太泛（如"AI""技术""健康"），也不要太窄（如只有一个工具代码名称）
- 优先选择最近讨论最多、最有深度的话题uA   
- 只输出关键词列表，不要解释

=== 笔记内容 ===
Nro   r   c                     g | ]Q}|                                 |                                                     d           =|                                 RS )r   )rv   
startswith)r   lines     r1   r   z:ZhihuHunter._extract_keywords_with_llm.<locals>.<listcomp>m  s^       ::<< )-

(?(?(D(D

  r0   zKeyword extraction failed: )_MAX_KEYWORDSrN   generate_responserv   
splitlinesr   r[   error)	rO   r6  rU   topic_instructionpromptresponser  r&   r   s	            r1   rw   z&ZhihuHunter._extract_keywords_with_llmK  s     "jZjjjj! 	   :K  uu  
	(44VR@@KHa $NN,,7799  H
 N]N++ 	 	 	LL:q::;;;IIIIII	s   AB 
B5B0*B50B5r&   c                 d   ddl m} t          j                            d          }|st          d           ||          }t                      }g }|D ]}	 |                     ||          }|D ]C}	|	                    dd          }
t          	                    |
          s4t          j	        d|
          }|sLt          j        d	d|                    d                                        d
          d                             d          d                             d          }t          j	        d|          r||v r|                    |           |                    t#          |	                    d|          ||	                    dd          dd         pd|g                     Ec# t$          $ r+}t&                              d| d|            Y d}~d}~ww xY wt&                              dt-          |           dt.           d           |dt.                   S )a  Search Tavily for Zhihu questions matching each keyword.

        Collects candidates from ALL keywords before truncating so that later
        keywords (which can cover different facets) are not silently dropped.

        Args:
            keywords: List of topic keywords to search.

        Returns:
            Deduplicated ZhihuQuestion list (up to _MAX_QUESTIONS_TOTAL).
        r   r   r   zTAVILY_API_KEY not setr   r"   rR   zhttps?://\S+r   r   r   r   z/answer/|/comment/r!   rB   N   )r!   r"   r#   r&   z"Tavily search failed for keyword 'z': rZ   z Zhihu questions (capped at rY   )r   r   r   r   r   RuntimeErrorset_tavily_search_MIN_QUESTION_URL_REr   rs   rt   groupr   r   addr   r    r   r[   r   r\   r_   _MAX_QUESTIONS_TOTAL)rO   r&   r   r   r   	seen_urlsra   kwr   itemr"   _mr   r   s                 r1   r^   zZhihuHunter._find_questionsy  sw    	('''''*..!122 	97888g...!ee	)+	 	P 	PBP--fb99#  D((5"--C/66s;; ! ?C88B !  "'8"bhhqkk J J P PQT U UVW X ^ ^_b c cde f m mnq r rIy!6	BB !  I-- MM),,,$$%"&((7B"7"7 )$(HHY$;$;DSD$A$IT&(T	     .  P P PNBNN1NNOOOOOOOOP 	`S^^``I]```aaa...//s   EF88
G- G((G-keywordc                     d| }t                               d|            |                    |dddgd          }|                    dg           S )	a  Run a single Tavily search scoped to zhihu.com/question.

        Args:
            client: Authenticated TavilyClient instance.
            keyword: Search keyword.

        Returns:
            List of result dicts from Tavily (url, title, content, score).
        zsite:zhihu.com/question zTavily search: r   r   r   im  )r   r   r   include_domainsr   r   )r[   r   r   r   )rO   r   rO  r   rA  s        r1   rF  zZhihuHunter._tavily_search  sk     5744.u..///== (M ! 
 
 ||Ir***r0   )rQ   NrR   )rR   )NrR   r   )r   r  )r8   r9   r:   r;   r   r   r   rP   intr   r   r<   r    rb   r{   r   r   tupler   r   r@   r  r]   r   r  r  rw   r^   r  rF  r/   r0   r1   rH   rH   Z   s@         ( !	
 
   , $(	  tCy! 	
 
m	   :" "m1D " " " "H6s 6tM/B 6 6 6 6pT3 T4+> T T T T"O, O,c8C=6H0I O, O, O, O,b#s #s # # # #JT
 T
] T
c T
; T
 T
 T
 T
v %)	P PP tCy!P 	P
 
cP P P P> %)9 99 tCy!9 
d	9 9 9 9| "%	#) #)Dz#) #)  	#)
 
#) #) #) #)P * ** * 
c	* * * *\40S	 40d=6I 40 40 40 40l+c +d4j + + + + + +r0   rH   )*r;   r   rs   requestsr   r   r   pathlibr   typingr   r   pydanticr	   r
   health.utils.logging_configr   slack_bot.llm.geminir   slack_bot.obsidian.generatorsr   slack_bot.obsidian.indexerr   slack_bot.obsidian.vector_storer   r8   r[   r   r<   r=   r  r   r  r%  r$  r;  rJ  compilerG  r    r@   rH   r/   r0   r1   <module>r^     s     
			 				  2 2 2 2 2 2 2 2 2 2       ! ! ! ! ! ! ! ! % % % % % % % % 4 4 4 4 4 4 * * * * * * 8 8 8 8 8 8 6 6 6 6 6 6 = = = = = =	h		
  
DI     !!)! !tCH~       !rz"<== 
" " " " "I " " "	% 	% 	% 	% 	%) 	% 	% 	%h	+ h	+ h	+ h	+ h	+ h	+ h	+ h	+ h	+ h	+r0   