Coverage for slack_bot / obsidian / generators.py: 0%

170 statements  

« prev     ^ index     » next       coverage.py v7.13.4, created at 2026-03-02 17:44 +0800

1import os 

2from typing import List, Dict, Tuple, Optional 

3from slack_bot.llm.gemini import GeminiLLM 

4from slack_bot.obsidian.indexer import ObsidianIndexer 

5from slack_bot.tools.web import WebSearchTool 

6from health.utils.logging_config import setup_logger 

7from health.utils.time_utils import get_current_time_str 

8 

9logger = setup_logger(__name__) 

10 

11 

12OBSIDIAN_SYSTEM_PROMPT = """You are Butler (Obsidian Edition), an intelligent knowledge assistant connected to the user's second brain. 

13Current Time: {current_time} 

14 

15Your Role: 

16You are NOT a health assistant. You are a Knowledge Partner designed to help the user think, write, and communicate. You draw directly from the user's local Obsidian notes, values, and methodology. 

17 

18Core Capabilities: 

191. Writing: You mimic the user's unique writing style (as defined in `writing_style.md`). 

202. Communication: You draft high-EQ, logically rigorous replies for professional contexts (as defined in `REPLY-SAMPLE.md`). 

213. Decision: You act as a "Devil's Advocate" and strategic advisor using the user's decision frameworks (GPA, IPO). 

22 

23Guidelines: 

24- **Style Alignment**: Strictly adhere to the tone and sentence structures found in the provided samples. 

25- **Data Source**: Rely primarily on the provided context (RAG) and loaded markdown files. 

26- **Identity**: You are pragmatic, rational, and value "technological optimism" and "intellectual honesty". 

27""" 

28 

29class BaseGenerator: 

30 def __init__(self, indexer: ObsidianIndexer): 

31 formatted_prompt = OBSIDIAN_SYSTEM_PROMPT.format(current_time=get_current_time_str()) 

32 self.llm = GeminiLLM(system_instruction=formatted_prompt) 

33 self.indexer = indexer 

34 self.workspace_root = os.path.dirname(os.path.dirname(os.path.dirname(__file__))) 

35 

36 def _read_file(self, filename: str) -> str: 

37 """Reads a file from the workspace root.""" 

38 path = os.path.join(self.workspace_root, filename) 

39 try: 

40 with open(path, "r", encoding="utf-8") as f: 

41 return f.read() 

42 except Exception as e: 

43 logger.error(f"Failed to read {filename}: {e}") 

44 return "" 

45 

46 def chat(self, user_input: str, history: List[Dict]) -> Tuple[str, List[Dict]]: 

47 """ 

48 Processes chat input. 

49 If history is empty, treats input as a Topic/Query and builds a Rich Prompt. 

50 If history exists, treats input as follow-up instruction. 

51 Returns (response_text, updated_history) 

52 """ 

53 raise NotImplementedError 

54 

55class WritingAssistant(BaseGenerator): 

56 def chat(self, user_input: str, history: List[Dict]) -> Tuple[str, List[Dict]]: 

57 

58 is_first_turn = (len(history) == 0) 

59 

60 if is_first_turn: 

61 # === First Turn: Build Rich Prompt === 

62 # user_input is treated as "Topic" 

63 

64 # Naive splitting for context if formatted as "Topic | Context" 

65 parts = user_input.split("|", 1) 

66 topic = parts[0].strip() 

67 extra_context = parts[1].strip() if len(parts) > 1 else "" 

68 

69 # 1. RAG & Samples 

70 style_samples = self.indexer.get_writing_samples(count=3) 

71 style_text = "\n\n".join(style_samples) 

72 

73 rag_notes = self.indexer.search(topic, limit=5) 

74 rag_text = "\n\n".join(rag_notes) 

75 

76 writing_style_guide = self._read_file("writing_style.md") 

77 methodology = self._read_file("methodology.md") 

78 

79 prompt = f"""You are a ghostwriter for the user. Your goal is to write an article on the TOPIC provided. 

80  

81 === YOUR IDENTITY & METHODOLOGY === 

82 {methodology} 

83  

84 === YOUR WRITING STYLE GUIDE === 

85 {writing_style_guide} 

86  

87 === YOUR WRITING SAMPLES (MIMIC THIS TONE) === 

88 {style_text} 

89  

90 === RELEVANT NOTES FROM OBSIDIAN VAULT === 

91 {rag_text} 

92  

93 === TASK === 

94 Topic: {topic} 

95 Extra Context: {extra_context} 

96  

97 Write the article in Chinese (unless the topic implies English).  

98 Adhere strictly to the "Identity" and "Style Guide". 

99 """ 

100 

101 logger.info(f"Generating article for topic: {topic}") 

102 actual_user_message = prompt # We inject the big prompt 

103 

104 else: 

105 # === Follow-up Turn === 

106 # user_input is just instructions like "Make it shorter" 

107 logger.info(f"Follow-up instruction: {user_input}") 

108 actual_user_message = user_input 

109 prompt = user_input 

110 

111 # Call LLM 

112 response, _ = self.llm.generate_response(prompt, history) 

113 

114 # Update History 

115 new_history = history.copy() 

116 new_history.append({"role": "user", "content": actual_user_message}) 

117 new_history.append({"role": "assistant", "content": response}) 

118 

119 return response, new_history 

120 

121class ReplyGenerator(BaseGenerator): 

122 def chat(self, user_input: str, history: List[Dict]) -> Tuple[str, List[Dict]]: 

123 

124 is_first_turn = (len(history) == 0) 

125 

126 if is_first_turn: 

127 # === First Turn === 

128 static_samples = self._read_file("REPLY-SAMPLE.md") 

129 dynamic_samples = self.indexer.get_reply_samples(count=3) 

130 dynamic_samples_text = "\n\n".join(dynamic_samples) 

131 methodology = self._read_file("methodology.md") 

132 

133 prompt = f"""You are an experienced Product/R&D Manager.  

134 Your task is to draft a reply to a challenging situation. 

135  

136 === CORE PHILOSOPHY === 

137 {methodology} 

138  

139 === REPLY GUIDELINES === 

140 1. Logic: Be clear and structured (e.g., numbered lists). 

141 2. Responsibility: Don't take unnecessary blame, but don't shirk core duties. Explain objective causes. 

142 3. Emotion: Be empathetic but professional. 

143 4. Conciseness: No fluff. 

144  

145 === REFERENCE SAMPLES (FEW-SHOT) === 

146 {static_samples} 

147  

148 {dynamic_samples_text} 

149  

150 === THE SITUATION === 

151 {user_input} 

152  

153 Draft a reply. If the context implies a specific role (PM, Dev Lead), assume that role. 

154 """ 

155 logger.info(f"Generating reply for query: {user_input}") 

156 actual_user_message = prompt 

157 

158 else: 

159 # === Follow-up Turn === 

160 logger.info(f"Follow-up reply instruction: {user_input}") 

161 actual_user_message = user_input 

162 prompt = user_input 

163 

164 response, _ = self.llm.generate_response(prompt, history) 

165 

166 new_history = history.copy() 

167 new_history.append({"role": "user", "content": actual_user_message}) 

168 new_history.append({"role": "assistant", "content": response}) 

169 

170 return response, new_history 

171 

172class DecisionSupport(BaseGenerator): 

173 def chat(self, user_input: str, history: List[Dict]) -> Tuple[str, List[Dict]]: 

174 

175 is_first_turn = (len(history) == 0) 

176 

177 if is_first_turn: 

178 decision_guide = self._read_file("decision.md") 

179 methodology = self._read_file("methodology.md") 

180 

181 rag_notes = self.indexer.search(user_input, limit=3) 

182 rag_text = "\n\n".join(rag_notes) 

183 

184 prompt = f"""You are a Decision Support Assistant. 

185 Your goal is to help the user think through a complex decision. 

186  

187 === DECISION FRAMEWORK === 

188 {decision_guide} 

189  

190 === CORE PHILOSOPHY === 

191 {methodology} 

192  

193 === RELEVANT CONTEXT FROM NOTES === 

194 {rag_text} 

195  

196 === THE DECISION / ISSUE === 

197 {user_input} 

198  

199 === TASK === 

200 1. Challenge the premise: Is this the right problem to solve? 

201 2. Apply the Framework: Use GPA (Goal, Priority, Alternatives) and IPO (Information, People, Objective reasoning) models. 

202 3. Pre-mortem: What is the worst that could happen? 

203 4. Provide a recommendation or a set of questions to clarify. 

204 """ 

205 

206 logger.info(f"Analyzing decision: {user_input}") 

207 actual_user_message = prompt 

208 else: 

209 logger.info(f"Follow-up decision instruction: {user_input}") 

210 actual_user_message = user_input 

211 prompt = user_input 

212 

213 response, _ = self.llm.generate_response(prompt, history) 

214 

215 new_history = history.copy() 

216 new_history.append({"role": "user", "content": actual_user_message}) 

217 new_history.append({"role": "assistant", "content": response}) 

218 

219 return response, new_history 

220 

221class SearchAnalyzer(BaseGenerator): 

222 def chat(self, user_input: str, history: List[Dict]) -> Tuple[str, List[Dict]]: 

223 

224 # Tools definition for the LLM 

225 tools = [ 

226 { 

227 "type": "function", 

228 "function": { 

229 "name": "search_web", 

230 "description": "Search the public web using DuckDuckGo. Use this when the internal notes are insufficient or when you need up-to-date external information.", 

231 "parameters": { 

232 "type": "object", 

233 "properties": { 

234 "query": { 

235 "type": "string", 

236 "description": "The search query" 

237 } 

238 }, 

239 "required": ["query"] 

240 } 

241 } 

242 }, 

243 { 

244 "type": "function", 

245 "function": { 

246 "name": "list_recent_files", 

247 "description": "List files in the Obsidian vault that have been modified recently. Use this when the user asks for 'recent updates', 'what's new', or changes in the last X days.", 

248 "parameters": { 

249 "type": "object", 

250 "properties": { 

251 "days": { 

252 "type": "integer", 

253 "description": "Number of days to check (default 5)", 

254 "default": 5 

255 } 

256 }, 

257 "required": [] 

258 } 

259 } 

260 } 

261 ] 

262 

263 is_first_turn = (len(history) == 0) 

264 

265 # Current Turn Message 

266 if is_first_turn: 

267 rag_notes = self.indexer.search(user_input, limit=5) 

268 rag_text = "\n\n".join(rag_notes) 

269 

270 logger.info(f"SearchAnalyzer retrieved {len(rag_notes)} local notes.") 

271 for note in rag_notes: 

272 # Extract source line for logging 

273 first_line = note.split('\n')[0] 

274 logger.debug(f" - Retrieved: {first_line}") 

275 

276 prompt = f"""You are an Analyst using Butler (Obsidian Edition). 

277  

278 === TASK === 

279 Answer the user's question.  

280 CRITICAL: You MUST explicitly cite your sources in the output. 

281  

282 === STRATEGY === 

283 1. first: Check "RELEVANT LOCAL NOTES" provided below. 

284 2. second: If the notes answer the question, answer directly. 

285 3. third: If the notes are missing info, ambiguos, or outdated, USE THE `search_web` TOOL to find external info. 

286  

287 === OUTPUT FORMAT === 

288 - Begin with a summary. 

289 - When stating facts, append the source in brackets, e.g., "The server failed [Source: XLSmart 2025-12-25.md]" or "Market share is 20% [Source: Web Search]". 

290 - If data comes from BOTH, mention both. 

291  

292 === RELEVANT LOCAL NOTES === 

293 {rag_text} 

294  

295 === USER QUESTION === 

296 {user_input} 

297 """ 

298 actual_user_message = prompt 

299 else: 

300 actual_user_message = user_input 

301 

302 # Prepare execution context 

303 # We don't want to mutate the input history directly until the end 

304 exec_history = history.copy() 

305 exec_history.append({"role": "user", "content": actual_user_message}) 

306 

307 logger.info(f"Analyzer thinking on: {user_input}") 

308 

309 # === ReAct Loop (Max 2 turns to prevent infinite loops) === 

310 for i in range(2): 

311 response_text, tool_calls = self.llm.generate_response( 

312 message="", # Message is already in history 

313 context=exec_history, 

314 tools=tools 

315 ) 

316 

317 if tool_calls: 

318 # LLM wants to use tools 

319 logger.info(f"LLM requested tool calls: {tool_calls}") 

320 

321 # Append the assistant's request (with tool calls) to history 

322 exec_history.append({ 

323 "role": "assistant", 

324 "content": f"[Tool Call Request: {tool_calls}]" 

325 }) 

326 

327 for tc in tool_calls: 

328 if tc["name"] == "search_web": 

329 query = tc["args"].get("query") 

330 search_result = WebSearchTool.search_web(query) 

331 

332 # Feed result back 

333 exec_history.append({ 

334 "role": "user", # Using 'function' role would be better if our LLM class supports it fully, but 'user' works as system injection 

335 "content": f"--- TOOL OUTPUT (search_web) ---\n{search_result}" 

336 }) 

337 

338 elif tc["name"] == "list_recent_files": 

339 days = tc["args"].get("days", 5) 

340 # Call indexer method 

341 recent_list = self.indexer.get_recent_files(days=days) 

342 

343 exec_history.append({ 

344 "role": "user", 

345 "content": f"--- TOOL OUTPUT (list_recent_files) ---\n{recent_list}" 

346 }) 

347 

348 # Loop again to get the final answer using tool outputs 

349 try: 

350 # Second pass with tool outputs 

351 response_text, _ = self.llm.generate_response( 

352 message="", 

353 context=exec_history, 

354 tools=tools 

355 ) 

356 return response_text, exec_history 

357 except Exception as e: 

358 logger.error(f"Error in second ReAct loop: {e}") 

359 return response_text, exec_history 

360 

361 else: 

362 # Final answer (no tools) 

363 return response_text, exec_history 

364 

365 # Fallback if loop exhausted 

366 return response_text, exec_history 

367 

368 

369class DeAIReviser(BaseGenerator): 

370 """Remove AI tone from articles - text-in, text-out mode.""" 

371 

372 def chat(self, user_input: str, history: List[Dict]) -> Tuple[str, List[Dict]]: 

373 

374 is_first_turn = (len(history) == 0) 

375 

376 if is_first_turn: 

377 # === First Turn: User pastes the AI-generated article === 

378 # Load the POWELL_REVISE.md guide 

379 powell_revise_guide = self._read_file("POWELL_REVISE.md") 

380 

381 # Load user's writing samples for style reference 

382 style_samples = self.indexer.get_writing_samples(count=3) 

383 style_text = "\n\n".join(style_samples) 

384 

385 prompt = f"""You are an expert editor specializing in removing AI-generated tone from articles. 

386 

387Your task is to revise the article provided by the user to make it sound more human and authentic. 

388 

389=== REVISION GUIDE === 

390{powell_revise_guide} 

391 

392=== USER'S AUTHENTIC WRITING SAMPLES (for style reference) === 

393{style_text} 

394 

395=== ARTICLE TO REVISE === 

396{user_input} 

397 

398=== TASK === 

3991. Analyze the article and identify AI characteristics (过渡词堆积, 排比对仗, 空洞修饰词等) 

4002. Rewrite the article following the POWELL_REVISE.md principles 

4013. Preserve: 

402 - Original meaning and key information 

403 - Technical terms and proper nouns 

404 - Code blocks, lists, and formatting 

405 - Original language (Chinese/English) 

4064. Output the revised article directly (no meta-commentary like "Here's the revised version...") 

407 

408CRITICAL: Output ONLY the revised article text. Do NOT add explanations before or after. 

409""" 

410 

411 logger.info(f"DeAI Reviser: Processing article ({len(user_input)} chars)") 

412 actual_user_message = prompt 

413 

414 else: 

415 # === Follow-up Turn: User gives refinement instructions === 

416 logger.info(f"DeAI Reviser: Follow-up instruction - {user_input}") 

417 actual_user_message = user_input 

418 prompt = user_input 

419 

420 # Call LLM 

421 response, _ = self.llm.generate_response(prompt, history) 

422 

423 # Update History 

424 new_history = history.copy() 

425 new_history.append({"role": "user", "content": actual_user_message}) 

426 new_history.append({"role": "assistant", "content": response}) 

427 

428 return response, new_history 

429 

430 

431class ZhihuGenerator(BaseGenerator): 

432 """Generate Zhihu-style answers based on style guide and user's knowledge base.""" 

433 

434 def chat(self, user_input: str, history: List[Dict]) -> Tuple[str, List[Dict]]: 

435 

436 is_first_turn = (len(history) == 0) 

437 

438 if is_first_turn: 

439 # === First Turn: Build Rich Prompt === 

440 # Expected input format: "Question | Core Ideas" 

441 parts = user_input.split("|", 1) 

442 question = parts[0].strip() 

443 core_ideas = parts[1].strip() if len(parts) > 1 else "" 

444 

445 # 1. Load Style Guide (zhihu-style.md serves as writing standard, NOT as material) 

446 zhihu_style_guide = self._read_file("zhihu-style.md") 

447 

448 # 2. RAG from Obsidian vault for actual content material 

449 # Search Article directory for relevant revised.md files 

450 rag_notes = self.indexer.search(question, limit=5) 

451 rag_text = "\n\n".join(rag_notes) 

452 

453 # 3. Load methodology for consistent identity 

454 methodology = self._read_file("methodology.md") 

455 

456 prompt = f"""You are answering a Zhihu question. Your goal is to write an authentic, experience-driven answer. 

457 

458=== YOUR IDENTITY & METHODOLOGY === 

459{methodology} 

460 

461=== ZHIHU STYLE GUIDE (FOLLOW THESE WRITING PRINCIPLES) === 

462{zhihu_style_guide} 

463 

464CRITICAL: The style guide above defines HOW to write (tone, structure, language patterns). 

465Do NOT treat zhihu-sample content as factual material. It's ONLY for style reference. 

466 

467=== RELEVANT KNOWLEDGE FROM YOUR OBSIDIAN VAULT (USE AS ACTUAL MATERIAL) === 

468{rag_text} 

469 

470=== THE QUESTION === 

471{question} 

472 

473=== YOUR CORE IDEAS === 

474{core_ideas} 

475 

476=== TASK === 

4771. Answer the question based on YOUR knowledge (from Obsidian vault) and core ideas 

4782. Follow the Zhihu style guide strictly for tone and structure 

4793. Share real experiences, examples, and insights from the vault material 

4804. Write in Chinese (unless question implies English) 

4815. Use first person ("我", "我们") frequently 

4826. Be professional yet approachable, technical yet relatable 

4837. Adhere to the "七个避免事项" (avoid AI tone, avoid empty buzzwords, etc.) 

484 

485OUTPUT FORMAT: 

486- If the answer is over 300 characters, append this at the end: 

487 

488--- 

489 

490**更多关于这些问题的思考可以在公众号搜索账户: AI Manifest** 

491 

492Begin writing the answer: 

493""" 

494 

495 logger.info(f"Generating Zhihu answer for: {question[:50]}...") 

496 actual_user_message = prompt 

497 

498 else: 

499 # === Follow-up Turn: User gives refinement instructions === 

500 logger.info(f"Follow-up Zhihu instruction: {user_input}") 

501 actual_user_message = user_input 

502 prompt = user_input 

503 

504 # Call LLM 

505 response, _ = self.llm.generate_response(prompt, history) 

506 

507 # Post-processing: Add signature if length > 300 and not already present 

508 if is_first_turn and len(response) > 300: 

509 signature = "\n\n---\n\n**更多关于这些问题的思考可以在公众号搜索账户: AI Manifest**" 

510 if "AI Manifest" not in response: 

511 response += signature 

512 

513 # Update History 

514 new_history = history.copy() 

515 new_history.append({"role": "user", "content": actual_user_message}) 

516 new_history.append({"role": "assistant", "content": response}) 

517 

518 return response, new_history