Coverage for slack_bot/obsidian/generators.py: 0%

1import os

2from typing import List, Dict, Tuple, Optional

3from slack_bot.llm.gemini import GeminiLLM

4from slack_bot.obsidian.indexer import ObsidianIndexer

5from slack_bot.tools.web import WebSearchTool

6from health.utils.logging_config import setup_logger

7from health.utils.time_utils import get_current_time_str

9logger = setup_logger(__name__)

12OBSIDIAN_SYSTEM_PROMPT = """You are Butler (Obsidian Edition), an intelligent knowledge assistant connected to the user's second brain.

13Current Time: {current_time}

15Your Role:

16You are NOT a health assistant. You are a Knowledge Partner designed to help the user think, write, and communicate. You draw directly from the user's local Obsidian notes, values, and methodology.

18Core Capabilities:

191. Writing: You mimic the user's unique writing style (as defined in `writing_style.md`).

202. Communication: You draft high-EQ, logically rigorous replies for professional contexts (as defined in `REPLY-SAMPLE.md`).

213. Decision: You act as a "Devil's Advocate" and strategic advisor using the user's decision frameworks (GPA, IPO).

23Guidelines:

24- **Style Alignment**: Strictly adhere to the tone and sentence structures found in the provided samples.

25- **Data Source**: Rely primarily on the provided context (RAG) and loaded markdown files.

26- **Identity**: You are pragmatic, rational, and value "technological optimism" and "intellectual honesty".

27"""

29class BaseGenerator:

30 def __init__(self, indexer: ObsidianIndexer):

31 formatted_prompt = OBSIDIAN_SYSTEM_PROMPT.format(current_time=get_current_time_str())

32 self.llm = GeminiLLM(system_instruction=formatted_prompt)

33 self.indexer = indexer

34 self.workspace_root = os.path.dirname(os.path.dirname(os.path.dirname(__file__)))

36 def _read_file(self, filename: str) -> str:

37 """Reads a file from the workspace root."""

38 path = os.path.join(self.workspace_root, filename)

39 try:

40 with open(path, "r", encoding="utf-8") as f:

41 return f.read()

42 except Exception as e:

43 logger.error(f"Failed to read {filename}: {e}")

44 return ""

46 def chat(self, user_input: str, history: List[Dict]) -> Tuple[str, List[Dict]]:

47 """

48 Processes chat input.

49 If history is empty, treats input as a Topic/Query and builds a Rich Prompt.

50 If history exists, treats input as follow-up instruction.

51 Returns (response_text, updated_history)

52 """

53 raise NotImplementedError

55class WritingAssistant(BaseGenerator):

56 def chat(self, user_input: str, history: List[Dict]) -> Tuple[str, List[Dict]]:

58 is_first_turn = (len(history) == 0)

60 if is_first_turn:

61 # === First Turn: Build Rich Prompt ===

62 # user_input is treated as "Topic"

64 # Naive splitting for context if formatted as "Topic | Context"

65 parts = user_input.split("|", 1)

66 topic = parts[0].strip()

67 extra_context = parts[1].strip() if len(parts) > 1 else ""

69 # 1. RAG & Samples

70 style_samples = self.indexer.get_writing_samples(count=3)

71 style_text = "\n\n".join(style_samples)

73 rag_notes = self.indexer.search(topic, limit=5)

74 rag_text = "\n\n".join(rag_notes)

76 writing_style_guide = self._read_file("writing_style.md")

77 methodology = self._read_file("methodology.md")

79 prompt = f"""You are a ghostwriter for the user. Your goal is to write an article on the TOPIC provided.

81 === YOUR IDENTITY & METHODOLOGY ===

82 {methodology}

84 === YOUR WRITING STYLE GUIDE ===

85 {writing_style_guide}

87 === YOUR WRITING SAMPLES (MIMIC THIS TONE) ===

88 {style_text}

90 === RELEVANT NOTES FROM OBSIDIAN VAULT ===

91 {rag_text}

93 === TASK ===

94 Topic: {topic}

95 Extra Context: {extra_context}

97 Write the article in Chinese (unless the topic implies English).

98 Adhere strictly to the "Identity" and "Style Guide".

99 """

100

101 logger.info(f"Generating article for topic: {topic}")

102 actual_user_message = prompt # We inject the big prompt

103

104 else:

105 # === Follow-up Turn ===

106 # user_input is just instructions like "Make it shorter"

107 logger.info(f"Follow-up instruction: {user_input}")

108 actual_user_message = user_input

109 prompt = user_input

110

111 # Call LLM

112 response, _ = self.llm.generate_response(prompt, history)

113

114 # Update History

115 new_history = history.copy()

116 new_history.append({"role": "user", "content": actual_user_message})

117 new_history.append({"role": "assistant", "content": response})

118

119 return response, new_history

120

121class ReplyGenerator(BaseGenerator):

122 def chat(self, user_input: str, history: List[Dict]) -> Tuple[str, List[Dict]]:

123

124 is_first_turn = (len(history) == 0)

125

126 if is_first_turn:

127 # === First Turn ===

128 static_samples = self._read_file("REPLY-SAMPLE.md")

129 dynamic_samples = self.indexer.get_reply_samples(count=3)

130 dynamic_samples_text = "\n\n".join(dynamic_samples)

131 methodology = self._read_file("methodology.md")

132

133 prompt = f"""You are an experienced Product/R&D Manager.

134 Your task is to draft a reply to a challenging situation.

135

136 === CORE PHILOSOPHY ===

137 {methodology}

138

139 === REPLY GUIDELINES ===

140 1. Logic: Be clear and structured (e.g., numbered lists).

141 2. Responsibility: Don't take unnecessary blame, but don't shirk core duties. Explain objective causes.

142 3. Emotion: Be empathetic but professional.

143 4. Conciseness: No fluff.

144

145 === REFERENCE SAMPLES (FEW-SHOT) ===

146 {static_samples}

147

148 {dynamic_samples_text}

149

150 === THE SITUATION ===

151 {user_input}

152

153 Draft a reply. If the context implies a specific role (PM, Dev Lead), assume that role.

154 """

155 logger.info(f"Generating reply for query: {user_input}")

156 actual_user_message = prompt

157

158 else:

159 # === Follow-up Turn ===

160 logger.info(f"Follow-up reply instruction: {user_input}")

161 actual_user_message = user_input

162 prompt = user_input

163

164 response, _ = self.llm.generate_response(prompt, history)

165

166 new_history = history.copy()

167 new_history.append({"role": "user", "content": actual_user_message})

168 new_history.append({"role": "assistant", "content": response})

169

170 return response, new_history

171

172class DecisionSupport(BaseGenerator):

173 def chat(self, user_input: str, history: List[Dict]) -> Tuple[str, List[Dict]]:

174

175 is_first_turn = (len(history) == 0)

176

177 if is_first_turn:

178 decision_guide = self._read_file("decision.md")

179 methodology = self._read_file("methodology.md")

180

181 rag_notes = self.indexer.search(user_input, limit=3)

182 rag_text = "\n\n".join(rag_notes)

183

184 prompt = f"""You are a Decision Support Assistant.

185 Your goal is to help the user think through a complex decision.

186

187 === DECISION FRAMEWORK ===

188 {decision_guide}

189

190 === CORE PHILOSOPHY ===

191 {methodology}

192

193 === RELEVANT CONTEXT FROM NOTES ===

194 {rag_text}

195

196 === THE DECISION / ISSUE ===

197 {user_input}

198

199 === TASK ===

200 1. Challenge the premise: Is this the right problem to solve?

201 2. Apply the Framework: Use GPA (Goal, Priority, Alternatives) and IPO (Information, People, Objective reasoning) models.

202 3. Pre-mortem: What is the worst that could happen?

203 4. Provide a recommendation or a set of questions to clarify.

204 """

205

206 logger.info(f"Analyzing decision: {user_input}")

207 actual_user_message = prompt

208 else:

209 logger.info(f"Follow-up decision instruction: {user_input}")

210 actual_user_message = user_input

211 prompt = user_input

212

213 response, _ = self.llm.generate_response(prompt, history)

214

215 new_history = history.copy()

216 new_history.append({"role": "user", "content": actual_user_message})

217 new_history.append({"role": "assistant", "content": response})

218

219 return response, new_history

220

221class SearchAnalyzer(BaseGenerator):

222 def chat(self, user_input: str, history: List[Dict]) -> Tuple[str, List[Dict]]:

223

224 # Tools definition for the LLM

225 tools = [

226 {

227 "type": "function",

228 "function": {

229 "name": "search_web",

230 "description": "Search the public web using DuckDuckGo. Use this when the internal notes are insufficient or when you need up-to-date external information.",

231 "parameters": {

232 "type": "object",

233 "properties": {

234 "query": {

235 "type": "string",

236 "description": "The search query"

237 }

238 },

239 "required": ["query"]

240 }

241 }

242 },

243 {

244 "type": "function",

245 "function": {

246 "name": "list_recent_files",

247 "description": "List files in the Obsidian vault that have been modified recently. Use this when the user asks for 'recent updates', 'what's new', or changes in the last X days.",

248 "parameters": {

249 "type": "object",

250 "properties": {

251 "days": {

252 "type": "integer",

253 "description": "Number of days to check (default 5)",

254 "default": 5

255 }

256 },

257 "required": []

258 }

259 }

260 }

261 ]

262

263 is_first_turn = (len(history) == 0)

264

265 # Current Turn Message

266 if is_first_turn:

267 rag_notes = self.indexer.search(user_input, limit=5)

268 rag_text = "\n\n".join(rag_notes)

269

270 logger.info(f"SearchAnalyzer retrieved {len(rag_notes)} local notes.")

271 for note in rag_notes:

272 # Extract source line for logging

273 first_line = note.split('\n')[0]

274 logger.debug(f" - Retrieved: {first_line}")

275

276 prompt = f"""You are an Analyst using Butler (Obsidian Edition).

277

278 === TASK ===

279 Answer the user's question.

280 CRITICAL: You MUST explicitly cite your sources in the output.

281

282 === STRATEGY ===

283 1. first: Check "RELEVANT LOCAL NOTES" provided below.

284 2. second: If the notes answer the question, answer directly.

285 3. third: If the notes are missing info, ambiguos, or outdated, USE THE `search_web` TOOL to find external info.

286

287 === OUTPUT FORMAT ===

288 - Begin with a summary.

289 - When stating facts, append the source in brackets, e.g., "The server failed [Source: XLSmart 2025-12-25.md]" or "Market share is 20% [Source: Web Search]".

290 - If data comes from BOTH, mention both.

291

292 === RELEVANT LOCAL NOTES ===

293 {rag_text}

294

295 === USER QUESTION ===

296 {user_input}

297 """

298 actual_user_message = prompt

299 else:

300 actual_user_message = user_input

301

302 # Prepare execution context

303 # We don't want to mutate the input history directly until the end

304 exec_history = history.copy()

305 exec_history.append({"role": "user", "content": actual_user_message})

306

307 logger.info(f"Analyzer thinking on: {user_input}")

308

309 # === ReAct Loop (Max 2 turns to prevent infinite loops) ===

310 for i in range(2):

311 response_text, tool_calls = self.llm.generate_response(

312 message="", # Message is already in history

313 context=exec_history,

314 tools=tools

315 )

316

317 if tool_calls:

318 # LLM wants to use tools

319 logger.info(f"LLM requested tool calls: {tool_calls}")

320

321 # Append the assistant's request (with tool calls) to history

322 exec_history.append({

323 "role": "assistant",

324 "content": f"[Tool Call Request: {tool_calls}]"

325 })

326

327 for tc in tool_calls:

328 if tc["name"] == "search_web":

329 query = tc["args"].get("query")

330 search_result = WebSearchTool.search_web(query)

331

332 # Feed result back

333 exec_history.append({

334 "role": "user", # Using 'function' role would be better if our LLM class supports it fully, but 'user' works as system injection

335 "content": f"--- TOOL OUTPUT (search_web) ---\n{search_result}"

336 })

337

338 elif tc["name"] == "list_recent_files":

339 days = tc["args"].get("days", 5)

340 # Call indexer method

341 recent_list = self.indexer.get_recent_files(days=days)

342

343 exec_history.append({

344 "role": "user",

345 "content": f"--- TOOL OUTPUT (list_recent_files) ---\n{recent_list}"

346 })

347

348 # Loop again to get the final answer using tool outputs

349 try:

350 # Second pass with tool outputs

351 response_text, _ = self.llm.generate_response(

352 message="",

353 context=exec_history,

354 tools=tools

355 )

356 return response_text, exec_history

357 except Exception as e:

358 logger.error(f"Error in second ReAct loop: {e}")

359 return response_text, exec_history

360

361 else:

362 # Final answer (no tools)

363 return response_text, exec_history

364

365 # Fallback if loop exhausted

366 return response_text, exec_history

367

368

369class DeAIReviser(BaseGenerator):

370 """Remove AI tone from articles - text-in, text-out mode."""

371

372 def chat(self, user_input: str, history: List[Dict]) -> Tuple[str, List[Dict]]:

373

374 is_first_turn = (len(history) == 0)

375

376 if is_first_turn:

377 # === First Turn: User pastes the AI-generated article ===

378 # Load the POWELL_REVISE.md guide

379 powell_revise_guide = self._read_file("POWELL_REVISE.md")

380

381 # Load user's writing samples for style reference

382 style_samples = self.indexer.get_writing_samples(count=3)

383 style_text = "\n\n".join(style_samples)

384

385 prompt = f"""You are an expert editor specializing in removing AI-generated tone from articles.

386

387Your task is to revise the article provided by the user to make it sound more human and authentic.

388

389=== REVISION GUIDE ===

390{powell_revise_guide}

391

392=== USER'S AUTHENTIC WRITING SAMPLES (for style reference) ===

393{style_text}

394

395=== ARTICLE TO REVISE ===

396{user_input}

397

398=== TASK ===

3991. Analyze the article and identify AI characteristics (过渡词堆积, 排比对仗, 空洞修饰词等)

4002. Rewrite the article following the POWELL_REVISE.md principles

4013. Preserve:

402 - Original meaning and key information

403 - Technical terms and proper nouns

404 - Code blocks, lists, and formatting

405 - Original language (Chinese/English)

4064. Output the revised article directly (no meta-commentary like "Here's the revised version...")

407

408CRITICAL: Output ONLY the revised article text. Do NOT add explanations before or after.

409"""

410

411 logger.info(f"DeAI Reviser: Processing article ({len(user_input)} chars)")

412 actual_user_message = prompt

413

414 else:

415 # === Follow-up Turn: User gives refinement instructions ===

416 logger.info(f"DeAI Reviser: Follow-up instruction - {user_input}")

417 actual_user_message = user_input

418 prompt = user_input

419

420 # Call LLM

421 response, _ = self.llm.generate_response(prompt, history)

422

423 # Update History

424 new_history = history.copy()

425 new_history.append({"role": "user", "content": actual_user_message})

426 new_history.append({"role": "assistant", "content": response})

427

428 return response, new_history

429

430

431class ZhihuGenerator(BaseGenerator):

432 """Generate Zhihu-style answers based on style guide and user's knowledge base."""

433

434 def chat(self, user_input: str, history: List[Dict]) -> Tuple[str, List[Dict]]:

435

436 is_first_turn = (len(history) == 0)

437

438 if is_first_turn:

439 # === First Turn: Build Rich Prompt ===

440 # Expected input format: "Question | Core Ideas"

441 parts = user_input.split("|", 1)

442 question = parts[0].strip()

443 core_ideas = parts[1].strip() if len(parts) > 1 else ""

444

445 # 1. Load Style Guide (zhihu-style.md serves as writing standard, NOT as material)

446 zhihu_style_guide = self._read_file("zhihu-style.md")

447

448 # 2. RAG from Obsidian vault for actual content material

449 # Search Article directory for relevant revised.md files

450 rag_notes = self.indexer.search(question, limit=5)

451 rag_text = "\n\n".join(rag_notes)

452

453 # 3. Load methodology for consistent identity

454 methodology = self._read_file("methodology.md")

455

456 prompt = f"""You are answering a Zhihu question. Your goal is to write an authentic, experience-driven answer.

457

458=== YOUR IDENTITY & METHODOLOGY ===

459{methodology}

460

461=== ZHIHU STYLE GUIDE (FOLLOW THESE WRITING PRINCIPLES) ===

462{zhihu_style_guide}

463

464CRITICAL: The style guide above defines HOW to write (tone, structure, language patterns).

465Do NOT treat zhihu-sample content as factual material. It's ONLY for style reference.

466

467=== RELEVANT KNOWLEDGE FROM YOUR OBSIDIAN VAULT (USE AS ACTUAL MATERIAL) ===

468{rag_text}

469

470=== THE QUESTION ===

471{question}

472

473=== YOUR CORE IDEAS ===

474{core_ideas}

475

476=== TASK ===

4771. Answer the question based on YOUR knowledge (from Obsidian vault) and core ideas

4782. Follow the Zhihu style guide strictly for tone and structure

4793. Share real experiences, examples, and insights from the vault material

4804. Write in Chinese (unless question implies English)

4815. Use first person ("我", "我们") frequently

4826. Be professional yet approachable, technical yet relatable

4837. Adhere to the "七个避免事项" (avoid AI tone, avoid empty buzzwords, etc.)

484

485OUTPUT FORMAT:

486- If the answer is over 300 characters, append this at the end:

487

488---

489

490**更多关于这些问题的思考可以在公众号搜索账户: AI Manifest**

491

492Begin writing the answer:

493"""

494

495 logger.info(f"Generating Zhihu answer for: {question[:50]}...")

496 actual_user_message = prompt

497

498 else:

499 # === Follow-up Turn: User gives refinement instructions ===

500 logger.info(f"Follow-up Zhihu instruction: {user_input}")

501 actual_user_message = user_input

502 prompt = user_input

503

504 # Call LLM

505 response, _ = self.llm.generate_response(prompt, history)

506

507 # Post-processing: Add signature if length > 300 and not already present

508 if is_first_turn and len(response) > 300:

509 signature = "\n\n---\n\n**更多关于这些问题的思考可以在公众号搜索账户: AI Manifest**"

510 if "AI Manifest" not in response:

511 response += signature

512

513 # Update History

514 new_history = history.copy()

515 new_history.append({"role": "user", "content": actual_user_message})

516 new_history.append({"role": "assistant", "content": response})

517

518 return response, new_history

Coverage for slack_bot / obsidian / generators.py: 0%

170 statements