import os
from pathlib import Path
from typing import List, Dict, Tuple, Optional, TYPE_CHECKING
from slack_bot.llm.gemini import GeminiLLM
from slack_bot.obsidian.indexer import ObsidianIndexer
from slack_bot.tools.web import WebSearchTool
from health.utils.logging_config import setup_logger
from health.utils.time_utils import get_current_time_str

if TYPE_CHECKING:
    from slack_bot.obsidian.vector_store import ChromaVectorStore

logger = setup_logger(__name__)


OBSIDIAN_SYSTEM_PROMPT = """You are Butler (Obsidian Edition), an intelligent knowledge assistant connected to the user's second brain.
Current Time: {current_time}

Your Role:
You are NOT a health assistant. You are a Knowledge Partner designed to help the user think, write, and communicate. You draw directly from the user's local Obsidian notes, values, and methodology.

Core Capabilities:
1. Writing: You mimic the user's unique writing style (as defined in `writing_style.md`).
2. Communication: You draft high-EQ, logically rigorous replies for professional contexts (as defined in `REPLY-SAMPLE.md`).
3. Decision: You act as a "Devil's Advocate" and strategic advisor using the user's decision frameworks (GPA, IPO).

Guidelines:
- **Style Alignment**: Strictly adhere to the tone and sentence structures found in the provided samples.
- **Data Source**: Rely primarily on the provided context (RAG) and loaded markdown files.
- **Identity**: You are pragmatic, rational, and value "technological optimism" and "intellectual honesty".
"""

class BaseGenerator:
    def __init__(
        self,
        indexer: ObsidianIndexer,
        vector_store: Optional["ChromaVectorStore"] = None,
    ):
        formatted_prompt = OBSIDIAN_SYSTEM_PROMPT.format(current_time=get_current_time_str())
        self.llm = GeminiLLM(system_instruction=formatted_prompt)
        self.indexer = indexer
        self.vector_store = vector_store
        self.workspace_root = os.path.dirname(os.path.dirname(os.path.dirname(__file__)))

    def _read_file(self, filename: str) -> str:
        """Reads a file from the workspace root."""
        path = os.path.join(self.workspace_root, filename)
        try:
            with open(path, "r", encoding="utf-8") as f:
                return f.read()
        except Exception as e:
            logger.error(f"Failed to read {filename}: {e}")
            return ""

    def _semantic_search(self, query: str, top_k: int = 3) -> str:
        """Run semantic search against the vector store and format results.

        Args:
            query: Search query text.
            top_k: Maximum number of chunks to retrieve.

        Returns:
            Formatted string block, or empty string if no results.
        """
        if self.vector_store is None:
            return ""
        try:
            results = self.vector_store.search_knowledge(query, top_k=top_k)
        except Exception as e:
            logger.warning(f"_semantic_search failed: {e}")
            return ""
        if not results:
            return ""
        lines = ["=== SEMANTIC KNOWLEDGE FROM VAULT ==="]
        for r in results:
            lines.append(f"[{Path(r.source_path).name} / {r.header_hierarchy}]")
            # Truncate individual chunks to avoid prompt bloat (~500 tokens ≈ 2000 chars)
            chunk_text = r.text[:2000]
            lines.append(chunk_text)
            lines.append("")
        return "\n".join(lines)

class WritingAssistant(BaseGenerator):
    def chat(self, user_input: str, history: List[Dict]) -> Tuple[str, List[Dict]]:

        is_first_turn = (len(history) == 0)

        if is_first_turn:
            # === First Turn: Build Rich Prompt ===
            # user_input is treated as "Topic"

            # Naive splitting for context if formatted as "Topic | Context"
            parts = user_input.split("|", 1)
            topic = parts[0].strip()
            extra_context = parts[1].strip() if len(parts) > 1 else ""

            # 1. RAG & Samples
            style_samples = self.indexer.get_writing_samples(count=3)
            style_text = "\n\n".join(style_samples)

            rag_notes = self.indexer.search(topic, limit=5)
            rag_text = "\n\n".join(rag_notes)

            writing_style_guide = self._read_file("writing_style.md")
            methodology = self._read_file("methodology.md")
            powell_revise_guide = self._read_file("POWELL_REVISE.md")

            semantic_context = self._semantic_search(topic, top_k=3)

            prompt = f"""You are a ghostwriter for the user. Your goal is to write an article on the TOPIC provided.

            === YOUR IDENTITY & METHODOLOGY ===
            {methodology}

            === YOUR WRITING STYLE GUIDE ===
            {writing_style_guide}

            === ANTI-AI-TONE GUIDE (APPLY WHILE WRITING, NOT JUST AT THE END) ===
            {powell_revise_guide}

            === YOUR WRITING SAMPLES (MIMIC THIS TONE) ===
            {style_text}

            === RELEVANT NOTES FROM OBSIDIAN VAULT ===
            {rag_text}

            {semantic_context}

            === TASK ===
            Topic: {topic}
            Extra Context: {extra_context}

            Write the article in Chinese (unless the topic implies English).
            Adhere strictly to the "Identity" and "Style Guide".
            """

            logger.info(f"Generating article for topic: {topic}")
            actual_user_message = prompt # We inject the big prompt

        else:
            # === Follow-up Turn ===
            # user_input is just instructions like "Make it shorter"
            logger.info(f"Follow-up instruction: {user_input}")
            actual_user_message = user_input
            prompt = user_input

        # Call LLM — first pass: content generation
        draft, _ = self.llm.generate_response(prompt, history)

        # === Second pass: de-AI review (first turn only) ===
        if is_first_turn:
            review_prompt = f"""You are a strict editor. A draft article has been written. Your ONLY job is to remove AI tone. Do NOT change facts or add new content.

=== TONE TARGET (最高优先级) ===
目标语感：像一个懂行的人在跟朋友聊天时随口说的。不端着，也不刻意耍帅。
两种AI味都要杀：
- 正式AI味：过渡词堆砌、排比对仗、空洞修饰、鸡汤结尾
- 油腻AI味：刻意装随意（"折腾去吧"）、强行造金句（"搞坏了不心疼，跑通了就是资产"）、给普通概念起酷名字（"隔离舱"、"暴力碰撞"、"最干净的契约"）、用一句俏皮话收尾装潇洒
正确的语感：平铺直叙，该说什么说什么，说完就停。不需要让读者觉得你很酷。

=== VOCABULARY KILL LIST (逐词扫描，命中即删或换大白话) ===
1. 夸张修饰: 极度脆弱、极其脆弱、毁灭性的、灾难级的、压倒性的、远超想象、不止一个量级、颠覆性的、革命性的、折磨人、天翻地覆、风险极高、瞬间崩断、精疲力竭、真金白银、本质上、其实是、所谓的
2. 大词空转: 范式转移、范式突变、开发范式、数字化容器、数字雇员、温和池架构、非确定性、阅后即焚、毫秒级就绪、毫秒级响应、赋能、底层逻辑、仪式性努力、深刻变革、暴力碰撞、最干净的契约、服务化的潜力、可扩展的执行信用、终极归宿、壁垒、分水岭、沉淀（动词修饰数据/经验时）、云端办公室、命门
3. 鸡汤/口号: 正途、值得跨越、安置灵魂、掌控感、才是王道、苦活累活交给、未来已来、危机就是转机、从玩具变成生产力、把环境变成了资产、安全底线、这才是成熟的做法、才是X的关键、不是建议是必须
4. 概念包装: 裸奔（=没防护）、修路/飙车（=基础工作/高级用法）、提款机（=攻击目标）、隔离舱（=容器）、数字雇员（=脚本）、执行信用（=资源）、云端办公室（=云服务器）、牢笼（=限制）、独立房间（=隔离环境）
5. 油腻收尾: "X去吧"式潇洒收尾、"搞坏了不心疼，跑通了就是Y"式金句、俏皮话总结全文、"把X关在Y里...把Y放在Z上"式排比收尾
6. 伪口语: 说白了、说实话、折腾这玩意儿

=== SENTENCE PATTERN KILL LIST (命中即改写) ===
1. 防御性写法："很多人觉得X，实际上Y"、"有人X，有人已经Y了"、"一部分人在X，另一部分人在Y" → 直接说判断，删掉铺垫
2. 对比式定义（最高优先级，必须逐句扫描）：
   触发词："不是……而是……"、"本质上是"、"其实是"、"所谓的"、"从X变成了Y"、"别把X当Y"、"才是……的区别/关键/核心"
   改写方法：删掉否定/对比框架，直接陈述后半句。具体示例：
   "不是怎么写代码，而是写成什么样才对" → "怎么写代码已经不够用了。更要命的是写成什么样才对"
   "本质上是给AI装导航" → "就是给AI装导航"
   "所谓的技术门槛，从熟练度变成了判断力" → "技术门槛变了。以前拼熟练度，现在拼判断力"
   "其实是经典软件工程的复兴" → "老一套方法论反而活过来了"
   "让AI从聊天框变成运行程序" → "AI不再只是聊天框，它能全天候盯着任务跑"
   "这才是它和ChatGPT的区别" → "ChatGPT你关了浏览器它就停了。这个一直在跑"
   "但这其实是底层模型的能力" → "但那是底层模型的能力"
3. 换皮夸张：用另一个夸张词替换夸张词不算修好 → 换成具体后果
4. 概念包装：给普通事物起酷名字 → 用最朴素的说法
5. 油腻金句：短句对仗、押韵、刻意工整的收尾句 → 删掉或拆成普通句
6. 比喻堆砌：全篇最多1个比喻，多余全删换直说
7. 鸡汤/口号结尾：删除励志或耍帅收尾，给具体建议或直接停
8. 首尾呼应：禁止结尾重复开头
9. 编号分段：线性论述删除序号改自然段落；并列对比可保留
10. 强行总结：删除"综上"/"一句话"式收尾
11. 关键词重复：同一个修辞性词汇全篇只能出现一次
12. 过渡词换皮："第一个要说的是"="首先"，"另一个问题是"="其次" → 直接删掉过渡，开门见山
13. 伪分类二分法："一部分人X，另一部分人Y"、"修路派/飙车派" → 删除分类框架，直接说事实

=== DRAFT TO REVIEW ===
{draft}

=== OUTPUT ===
Return ONLY the corrected text. No explanation, no meta-commentary.
"""
            logger.info("Running second-pass de-AI review on writing draft...")
            response, _ = self.llm.generate_response(review_prompt, [])
        else:
            response = draft

        # Update History
        new_history = history.copy()
        new_history.append({"role": "user", "content": actual_user_message})
        new_history.append({"role": "assistant", "content": response})

        return response, new_history

class ReplyGenerator(BaseGenerator):
    def chat(self, user_input: str, history: List[Dict]) -> Tuple[str, List[Dict]]:

        is_first_turn = (len(history) == 0)

        if is_first_turn:
             # === First Turn ===
            static_samples = self._read_file("REPLY-SAMPLE.md")
            dynamic_samples = self.indexer.get_reply_samples(count=3)
            dynamic_samples_text = "\n\n".join(dynamic_samples)
            methodology = self._read_file("methodology.md")

            semantic_context = self._semantic_search(user_input, top_k=3)

            prompt = f"""You are an experienced Product/R&D Manager.
            Your task is to draft a reply to a challenging situation.

            === CORE PHILOSOPHY ===
            {methodology}

            === REPLY GUIDELINES ===
            1. Logic: Be clear and structured (e.g., numbered lists).
            2. Responsibility: Don't take unnecessary blame, but don't shirk core duties. Explain objective causes.
            3. Emotion: Be empathetic but professional.
            4. Conciseness: No fluff.

            === REFERENCE SAMPLES (FEW-SHOT) ===
            {static_samples}

            {dynamic_samples_text}

            {semantic_context}

            === THE SITUATION ===
            {user_input}

            Draft a reply. If the context implies a specific role (PM, Dev Lead), assume that role.
            """
            logger.info(f"Generating reply for query: {user_input}")
            actual_user_message = prompt

        else:
             # === Follow-up Turn ===
            logger.info(f"Follow-up reply instruction: {user_input}")
            actual_user_message = user_input
            prompt = user_input

        response, _ = self.llm.generate_response(prompt, history)

        new_history = history.copy()
        new_history.append({"role": "user", "content": actual_user_message})
        new_history.append({"role": "assistant", "content": response})

        return response, new_history

class DecisionSupport(BaseGenerator):
    def chat(self, user_input: str, history: List[Dict]) -> Tuple[str, List[Dict]]:

        is_first_turn = (len(history) == 0)

        if is_first_turn:
            decision_guide = self._read_file("decision.md")
            methodology = self._read_file("methodology.md")

            rag_notes = self.indexer.search(user_input, limit=3)
            rag_text = "\n\n".join(rag_notes)

            prompt = f"""You are a Decision Support Assistant.
            Your goal is to help the user think through a complex decision.

            === DECISION FRAMEWORK ===
            {decision_guide}

            === CORE PHILOSOPHY ===
            {methodology}

            === RELEVANT CONTEXT FROM NOTES ===
            {rag_text}

            === THE DECISION / ISSUE ===
            {user_input}

            === TASK ===
            1. Challenge the premise: Is this the right problem to solve?
            2. Apply the Framework: Use GPA (Goal, Priority, Alternatives) and IPO (Information, People, Objective reasoning) models.
            3. Pre-mortem: What is the worst that could happen?
            4. Provide a recommendation or a set of questions to clarify.
            """

            logger.info(f"Analyzing decision: {user_input}")
            actual_user_message = prompt
        else:
            logger.info(f"Follow-up decision instruction: {user_input}")
            actual_user_message = user_input
            prompt = user_input

        response, _ = self.llm.generate_response(prompt, history)

        new_history = history.copy()
        new_history.append({"role": "user", "content": actual_user_message})
        new_history.append({"role": "assistant", "content": response})

        return response, new_history

class SearchAnalyzer(BaseGenerator):
    def chat(self, user_input: str, history: List[Dict]) -> Tuple[str, List[Dict]]:

        # Tools definition for the LLM
        tools = [
            {
                "type": "function",
                "function": {
                    "name": "search_web",
                    "description": "Search the public web using DuckDuckGo. Use this when the internal notes are insufficient or when you need up-to-date external information.",
                    "parameters": {
                        "type": "object",
                        "properties": {
                            "query": {
                                "type": "string",
                                "description": "The search query"
                            }
                        },
                        "required": ["query"]
                    }
                }
            },
            {
                "type": "function",
                "function": {
                    "name": "list_recent_files",
                    "description": "List files in the Obsidian vault that have been modified recently. Use this when the user asks for 'recent updates', 'what's new', or changes in the last X days.",
                    "parameters": {
                        "type": "object",
                        "properties": {
                            "days": {
                                "type": "integer",
                                "description": "Number of days to check (default 5)",
                                "default": 5
                            }
                        },
                        "required": []
                    }
                }
            }
        ]

        is_first_turn = (len(history) == 0)

        # Current Turn Message
        if is_first_turn:
            rag_notes = self.indexer.search(user_input, limit=5)
            rag_text = "\n\n".join(rag_notes)

            logger.info(f"SearchAnalyzer retrieved {len(rag_notes)} local notes.")
            for note in rag_notes:
                # Extract source line for logging
                first_line = note.split('\n')[0]
                logger.debug(f" - Retrieved: {first_line}")

            prompt = f"""You are an Analyst using Butler (Obsidian Edition).

            === TASK ===
            Answer the user's question.
            CRITICAL: You MUST explicitly cite your sources in the output.

            === STRATEGY ===
            1. first: Check "RELEVANT LOCAL NOTES" provided below.
            2. second: If the notes answer the question, answer directly.
            3. third: If the notes are missing info, ambiguos, or outdated, USE THE `search_web` TOOL to find external info.

            === OUTPUT FORMAT ===
            - Begin with a summary.
            - When stating facts, append the source in brackets, e.g., "The server failed [Source: XLSmart 2025-12-25.md]" or "Market share is 20% [Source: Web Search]".
            - If data comes from BOTH, mention both.

            === RELEVANT LOCAL NOTES ===
            {rag_text}

            === USER QUESTION ===
            {user_input}
            """
            actual_user_message = prompt
        else:
            actual_user_message = user_input

        # Prepare execution context
        # We don't want to mutate the input history directly until the end
        exec_history = history.copy()
        exec_history.append({"role": "user", "content": actual_user_message})

        logger.info(f"Analyzer thinking on: {user_input}")

        # === ReAct Loop (Max 2 turns to prevent infinite loops) ===
        for i in range(2):
            response_text, tool_calls = self.llm.generate_response(
                message="", # Message is already in history
                context=exec_history,
                tools=tools
            )

            if tool_calls:
                # LLM wants to use tools
                logger.info(f"LLM requested tool calls: {tool_calls}")

                # Append the assistant's request (with tool calls) to history
                exec_history.append({
                    "role": "assistant",
                    "content": f"[Tool Call Request: {tool_calls}]"
                })

                for tc in tool_calls:
                    if tc["name"] == "search_web":
                        query = tc["args"].get("query")
                        search_result = WebSearchTool.search_web(query)

                        # Feed result back
                        exec_history.append({
                            "role": "user", # Using 'function' role would be better if our LLM class supports it fully, but 'user' works as system injection
                            "content": f"--- TOOL OUTPUT (search_web) ---\n{search_result}"
                        })

                    elif tc["name"] == "list_recent_files":
                        days = tc["args"].get("days", 5)
                        # Call indexer method
                        recent_list = self.indexer.get_recent_files(days=days)

                        exec_history.append({
                            "role": "user",
                            "content": f"--- TOOL OUTPUT (list_recent_files) ---\n{recent_list}"
                        })

                # Loop again to get the final answer using tool outputs
                try:
                     # Second pass with tool outputs
                    response_text, _ = self.llm.generate_response(
                        message="",
                        context=exec_history,
                        tools=tools
                    )
                    return response_text, exec_history
                except Exception as e:
                    logger.error(f"Error in second ReAct loop: {e}")
                    return response_text, exec_history

            else:
                # Final answer (no tools)
                return response_text, exec_history

        # Fallback if loop exhausted
        return response_text, exec_history


class DeAIReviser(BaseGenerator):
    """Remove AI tone from articles - text-in, text-out mode."""

    def chat(self, user_input: str, history: List[Dict]) -> Tuple[str, List[Dict]]:

        is_first_turn = (len(history) == 0)

        if is_first_turn:
            # === First Turn: User pastes the AI-generated article ===
            # Load the POWELL_REVISE.md guide
            powell_revise_guide = self._read_file("POWELL_REVISE.md")

            # Load user's writing samples for style reference
            style_samples = self.indexer.get_writing_samples(count=3)
            style_text = "\n\n".join(style_samples)

            prompt = f"""You are an expert editor specializing in removing AI-generated tone from articles.

Your task is to revise the article provided by the user to make it sound more human and authentic.

=== REVISION GUIDE ===
{powell_revise_guide}

=== USER'S AUTHENTIC WRITING SAMPLES (for style reference) ===
{style_text}

=== ARTICLE TO REVISE ===
{user_input}

=== TASK ===
1. Analyze the article and identify AI characteristics (过渡词堆积, 排比对仗, 空洞修饰词等)
2. Rewrite the article following the POWELL_REVISE.md principles
3. Preserve:
   - Original meaning and key information
   - Technical terms and proper nouns
   - Code blocks, lists, and formatting
   - Original language (Chinese/English)
4. Output the revised article directly (no meta-commentary like "Here's the revised version...")

CRITICAL: Output ONLY the revised article text. Do NOT add explanations before or after.
"""

            logger.info(f"DeAI Reviser: Processing article ({len(user_input)} chars)")
            actual_user_message = prompt

        else:
            # === Follow-up Turn: User gives refinement instructions ===
            logger.info(f"DeAI Reviser: Follow-up instruction - {user_input}")
            actual_user_message = user_input
            prompt = user_input

        # Call LLM
        response, _ = self.llm.generate_response(prompt, history)

        # Update History
        new_history = history.copy()
        new_history.append({"role": "user", "content": actual_user_message})
        new_history.append({"role": "assistant", "content": response})

        return response, new_history


class ZhihuGenerator(BaseGenerator):
    """Generate Zhihu-style answers based on style guide and user's knowledge base."""

    def chat(self, user_input: str, history: List[Dict]) -> Tuple[str, List[Dict]]:

        is_first_turn = (len(history) == 0)
        built_rich_prompt = False  # tracks whether we ran the full question->draft pipeline

        # Pre-load rule files (needed for second-pass review)
        zhihu_bot_style = self._read_file("zhihu-bot-style.md")

        # Treat as a new question if:
        # - history is empty (genuine first turn), OR
        # - user input contains "|" (explicit "Question | Core Ideas" format), OR
        # - user input starts with known question triggers like "回答这个问题", OR
        # - user input contains a Zhihu question URL (pasting a new question link)
        is_new_question = (
            is_first_turn
            or "|" in user_input
            or user_input.strip().startswith("回答这个问题")
            or "zhihu.com/question/" in user_input
        )

        if is_new_question:
            # === First Turn: Build Rich Prompt ===
            # Expected input format: "Question | Core Ideas"
            parts = user_input.split("|", 1)
            question = parts[0].strip()
            core_ideas = parts[1].strip() if len(parts) > 1 else ""

            # 1. Load Style Guide (zhihu-style.md serves as writing standard, NOT as material)
            zhihu_style_guide = self._read_file("zhihu-style.md")
            # Bot-specific overrides (plain-text output, no bracket explanations, etc.)
            if zhihu_bot_style:
                zhihu_style_guide = zhihu_style_guide + "\n\n" + zhihu_bot_style

            # 2. RAG from Obsidian vault for actual content material
            rag_notes = self.indexer.search(question, limit=5)
            rag_text = "\n\n".join(rag_notes)

            # 3. Load methodology
            methodology = self._read_file("methodology.md")

            semantic_context = self._semantic_search(question, top_k=3)

            prompt = f"""You are answering a Zhihu question. Your goal is to write an authentic, experience-driven answer that sounds like a real human, not AI-generated.

=== YOUR IDENTITY & METHODOLOGY ===
{methodology}

=== ZHIHU STYLE GUIDE (FOLLOW THESE WRITING PRINCIPLES) ===
{zhihu_style_guide}

CRITICAL: The style guide above defines HOW to write (tone, structure, language patterns).
Do NOT treat zhihu-sample content as factual material. It's ONLY for style reference.

=== RELEVANT KNOWLEDGE FROM YOUR OBSIDIAN VAULT (USE AS ACTUAL MATERIAL) ===
{rag_text}

{semantic_context}

=== THE QUESTION ===
{question}

=== YOUR CORE IDEAS ===
{core_ideas}

=== TASK ===
1. Answer the question based on YOUR knowledge (from Obsidian vault) and core ideas
2. Write in Chinese (unless question implies English)
3. Be direct and opinionated — attach concrete evaluation to every fact (not feature listing)
4. If recent web context is provided, weave it in naturally
5. STRICT: Never add bracket explanations. Write "大语言模型" not "大语言模型（LLM）"
6. No self-promotion, no identity reveal, no past project references
7. No poetry, quotes, or "升华" endings. Stop when the point is made.
8. LENGTH: 500–1000 Chinese characters. Cut a point, not quality.

=== ANTI-AI CORE RULES (apply while writing) ===
- 语感目标：像懂行的人随手打字，不端着，不耍帅
- 禁止：排比对仗、过渡词堆砌（首先/其次/最后）、空洞修饰（深入/全面/显著）
- 禁止：比喻超过1个、概念包装（给普通事物起酷名字）、鸡汤结尾
- 禁止：防御性写法（很多人觉得X但其实Y）、对比式定义（不是X是Y、本质上是、其实是、所谓的、从X变成了Y、才是……的区别/关键）
- 陈述事实时必须附带主观判断，不要纯客观罗列

Begin writing the answer:
"""

            logger.info(f"Generating Zhihu answer for: {question[:50]}...")
            actual_user_message = prompt
            built_rich_prompt = True

        else:
            # === Follow-up Turn: User gives refinement instructions (e.g. "make it shorter") ===
            logger.info(f"Follow-up Zhihu instruction: {user_input}")
            actual_user_message = user_input
            prompt = user_input

        # Call LLM — first pass: content generation
        # For new questions, use empty history to avoid contamination from previous conversations
        llm_history = [] if is_new_question else history
        draft, _ = self.llm.generate_response(prompt, llm_history)

        # === Second pass: rule enforcement ===
        # Runs whenever a full draft was generated (built_rich_prompt=True),
        # regardless of whether history was empty or not.
        # Skipped on follow-up refinement turns (e.g. "make it shorter").
        run_review = built_rich_prompt
        if run_review:
            review_prompt = f"""You are a strict editor. A draft Zhihu answer has been written. Your ONLY job is to remove AI tone. Do NOT change facts or add new content.

=== TONE TARGET (最高优先级) ===
目标语感：像一个懂行的人在知乎随手打字回答。不端着，也不刻意耍帅。
两种AI味都要杀：
- 正式AI味：过渡词堆砌、排比对仗、空洞修饰、鸡汤结尾
- 油腻AI味：刻意装随意（"折腾去吧"）、强行造金句（"搞坏了不心疼，跑通了就是资产"）、给普通概念起酷名字（"隔离舱"、"暴力碰撞"、"最干净的契约"）、用一句俏皮话收尾装潇洒
正确的语感：平铺直叙，该说什么说什么，说完就停。不需要让读者觉得你很酷。

=== VOCABULARY KILL LIST (逐词扫描，命中即删或换大白话) ===
1. 夸张修饰: 极度脆弱、极其脆弱、毁灭性的、灾难级的、压倒性的、远超想象、不止一个量级、颠覆性的、革命性的、折磨人、天翻地覆、风险极高、瞬间崩断、精疲力竭、真金白银、本质上、其实是、所谓的
2. 大词空转: 范式转移、范式突变、开发范式、数字化容器、数字雇员、温和池架构、非确定性、阅后即焚、毫秒级就绪、毫秒级响应、赋能、底层逻辑、仪式性努力、深刻变革、暴力碰撞、最干净的契约、服务化的潜力、不可逆的浪潮、时代大背景、可扩展的执行信用、终极归宿、壁垒、分水岭、沉淀（动词修饰数据/经验时）、云端办公室、命门
3. 鸡汤/口号: 正途、值得跨越、安置灵魂、掌控感、才是王道、苦活累活交给、未来已来、危机就是转机、挑战与机遇并存、从玩具变成生产力、把环境变成了资产、安全底线、这才是成熟的做法、才是X的关键、不是建议是必须
4. 概念包装: 裸奔（=没防护）、修路/飙车（=基础工作/高级用法）、提款机（=攻击目标）、隔离舱（=容器）、数字雇员（=脚本）、执行信用（=资源）、云端办公室（=云服务器）、牢笼（=限制）、独立房间（=隔离环境）
5. 油腻收尾: "X去吧"式潇洒收尾、"搞坏了不心疼，跑通了就是Y"式金句、俏皮话总结全文、"把X关在Y里...把Y放在Z上"式排比收尾
6. 伪口语: 说白了、说实话、折腾这玩意儿

=== SENTENCE PATTERN KILL LIST (命中即改写) ===
1. 防御性写法："很多人觉得X，实际上Y"、"有人X，有人已经Y了"、"一部分人在X，另一部分人在Y" → 直接说判断，删掉铺垫
2. 对比式定义（最高优先级，必须逐句扫描）：
   触发词："不是……而是……"、"本质上是"、"其实是"、"所谓的"、"从X变成了Y"、"别把X当Y"、"才是……的区别/关键/核心"
   改写方法：删掉否定/对比框架，直接陈述后半句。具体示例：
   "不是怎么写代码，而是写成什么样才对" → "怎么写代码已经不够用了。更要命的是写成什么样才对"
   "本质上是给AI装导航" → "就是给AI装导航"
   "所谓的技术门槛，从熟练度变成了判断力" → "技术门槛变了。以前拼熟练度，现在拼判断力"
   "其实是经典软件工程的复兴" → "老一套方法论反而活过来了"
   "让AI从聊天框变成运行程序" → "AI不再只是聊天框，它能全天候盯着任务跑"
   "这才是它和ChatGPT的区别" → "ChatGPT你关了浏览器它就停了。这个一直在跑"
   "但这其实是底层模型的能力" → "但那是底层模型的能力"
3. 换皮夸张：用另一个夸张词替换夸张词不算修好 → 换成具体后果
4. 概念包装：给普通事物起酷名字 → 用最朴素的说法
5. 油腻金句：短句对仗、押韵、刻意工整的收尾句 → 删掉或拆成普通句
6. 比喻堆砌：全篇最多1个比喻，多余全删换直说
7. 鸡汤/口号结尾：删除励志或耍帅收尾，给具体建议或直接停
8. 首尾呼应：禁止结尾重复开头
9. 编号分段：线性论述删除序号改自然段落；并列对比可保留
10. 强行总结：删除"综上"/"一句话"式收尾
11. 关键词重复：同一个修辞性词汇全篇只能出现一次
12. 过渡词换皮："第一个要说的是"="首先"，"另一个问题是"="其次" → 直接删掉过渡，开门见山
13. 伪分类二分法："一部分人X，另一部分人Y"、"修路派/飙车派" → 删除分类框架，直接说事实

=== DRAFT TO REVIEW ===
{draft}

=== OUTPUT ===
Return ONLY the corrected Chinese text. No explanation, no meta-commentary, no English.
"""
            logger.info("Running second-pass rule enforcement on Zhihu draft...")
            response, _ = self.llm.generate_response(review_prompt, [])
        else:
            response = draft

        # Update History
        new_history = history.copy()
        new_history.append({"role": "user", "content": actual_user_message})
        new_history.append({"role": "assistant", "content": response})

        return response, new_history