"""Zhihu Playwright Engine — authentication and answer publishing.

Handles:
- Persistent session via zhihu_state.json (cookies)
- QR-code login flow with Slack notification + 2-minute polling
- Answer publishing via headless Chromium + playwright-stealth
"""

import json
import time
from pathlib import Path
from typing import Optional

from slack_sdk import WebClient

from health.utils.logging_config import setup_logger

logger = setup_logger(__name__)


# ── Selectors ─────────────────────────────────────────────────────────────────
# Zhihu may change its DOM; all click/wait operations use _try_selectors()
# so multiple candidates are attempted before raising.

_QR_IMAGE_SELECTORS = [
    ".SignFlow-qrcode img",
    ".LoginQrcode img",
    "canvas.SignFlow-qrcodeCanvas",      # newer Zhihu: QR drawn on canvas
    ".SignFlow-qrcode canvas",
]

_QR_TAB_SELECTORS = [
    "div.SignFlow-tab:has-text('扫码登录')",
    "li.SignFlow-tab:has-text('扫码')",
    "span:has-text('扫码登录')",
]

_LOGIN_CHECK_SELECTORS = [
    ".AppHeader-profileAvatar",
    "div[data-za-detail-view-path-module='AppHeader'] img.Avatar",
    ".TopstoryTabs",                     # home page tabs visible only when logged in
]

_WRITE_ANSWER_SELECTORS = [
    "button:has-text('写回答')",
    "a:has-text('写回答')",
    "button:has-text('编辑回答')",
    "a:has-text('编辑回答')",
    "[role='button']:has-text('写回答')",
    "[role='button']:has-text('编辑回答')",
    "[data-za-detail-view-element-name='WriteAnswer']",
    "[data-za-element-name='QuestionDetail_WriteAnswer']",
    ".QuestionDetailPageHeaderRight button",
    ".QuestionDetail-buttonGroup button",
    "[class*='WriteAnswer']",
]

_EDITOR_SELECTORS = [
    ".DraftEditor-content[contenteditable='true']",
    ".RichText [contenteditable='true']",
    "[contenteditable='true'].public-DraftEditor-content",
]

_PUBLISH_SELECTORS = [
    "button:has-text('发布回答')",           # edit existing answer
    ".AnswerForm button:has-text('发布')",   # new answer, scoped to form
    ".AnswerForm button:has-text('提交')",
    "button:has-text('发布')",
    "button:has-text('提交')",
    "button.SubmitAnswer",
    "[data-za-element-name='SubmitAnswer']",
]


# ── Constants ─────────────────────────────────────────────────────────────────

ZHIHU_SIGNIN_URL = "https://www.zhihu.com/signin"
ZHIHU_HOME_URL   = "https://www.zhihu.com/"

_QR_TMP_PATH      = Path("/tmp/zhihu_qr.png")
_LOGIN_TIMEOUT_S  = 120   # total seconds to wait for QR scan
_POLL_INTERVAL_S  = 5     # seconds between login-check polls
_MAX_QR_ATTEMPTS  = 2     # refresh QR this many times before giving up


class ZhihuPlaywrightEngine:
    """Playwright-powered Zhihu auth and answer publisher.

    Example:
        engine = ZhihuPlaywrightEngine(slack_client, channel_id)
        if engine.ensure_logged_in():
            engine.publish_answer(question_url, answer_text)
    """

    def __init__(
        self,
        slack_client: WebClient,
        notify_channel: str,
        state_file: Optional[Path] = None,
    ) -> None:
        """Initialize the engine.

        Args:
            slack_client: Authenticated Slack WebClient for QR upload.
            notify_channel: Slack channel/DM ID to send QR images to.
            state_file: Override path for session cookie persistence.
                Defaults to <project_root>/data/zhihu_state.json.
        """
        self.slack_client = slack_client
        self.notify_channel = notify_channel
        self.state_file = state_file or (
            Path(__file__).resolve().parents[3] / "data" / "zhihu_state.json"
        )
        self.state_file.parent.mkdir(parents=True, exist_ok=True)

    # ── Public API ─────────────────────────────────────────────────────────────

    def ensure_logged_in(self) -> bool:
        """Verify session validity; run QR login flow if cookie is expired.

        Returns:
            True if a valid authenticated session exists after this call.
        """
        from playwright.sync_api import sync_playwright
        from playwright_stealth import Stealth

        with sync_playwright() as p:
            browser = p.chromium.launch(headless=True)
            context = browser.new_context(
                viewport={"width": 1280, "height": 800},
                user_agent=(
                    "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
                    "AppleWebKit/537.36 (KHTML, like Gecko) "
                    "Chrome/122.0.0.0 Safari/537.36"
                ),
            )
            page = context.new_page()
            Stealth().apply_stealth_sync(page)

            # ── Try saved session first ──
            if self._load_state(context):
                if self._is_logged_in(page):
                    logger.info("Zhihu: session valid (loaded from state file)")
                    browser.close()
                    return True
                logger.info("Zhihu: saved session expired, starting QR login")
            else:
                logger.info("Zhihu: no state file, starting QR login")

            success = self._qr_login_flow(context, page)
            browser.close()
            return success

    def publish_answer(self, question_url: str, answer_text: str) -> str:
        """Navigate to a Zhihu question page and publish an answer.

        Args:
            question_url: Full URL of the target Zhihu question.
            answer_text: Plain-text answer body to publish.

        Returns:
            URL of the successfully published answer.

        Raises:
            RuntimeError: If session is not loaded, or DOM interaction fails.
        """
        from playwright.sync_api import sync_playwright
        from playwright_stealth import Stealth

        with sync_playwright() as p:
            browser = p.chromium.launch(headless=True)
            context = browser.new_context(
                viewport={"width": 1280, "height": 800},
                user_agent=(
                    "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
                    "AppleWebKit/537.36 (KHTML, like Gecko) "
                    "Chrome/122.0.0.0 Safari/537.36"
                ),
            )
            try:
                if not self._load_state(context):
                    raise RuntimeError(
                        "No session loaded. Call ensure_logged_in() before publishing."
                    )

                page = context.new_page()
                Stealth().apply_stealth_sync(page)

                # Extract the bare URL — strip invisible/zero-width characters
                # that .strip() misses (BOM, zero-width spaces, etc.)
                import re as _re
                _m = _re.search(r'https?://\S+', question_url)
                if not _m:
                    raise RuntimeError(f"Invalid question URL: {question_url!r}")
                # Strip trailing HTML/Slack/markdown junk chars (>, ", ', ), ], etc.)
                question_url = _re.sub(r'[>\'")\].,;]+$', '', _m.group(0)).rstrip("/")

                logger.info(f"Navigating to question: {question_url}")

                # Visit home page first to warm up the session before going to the question.
                # Zhihu's anti-bot layer returns 404 when a headless browser jumps straight
                # to a question URL without prior navigation history.
                page.goto(ZHIHU_HOME_URL, wait_until="domcontentloaded", timeout=15000)
                page.wait_for_timeout(2000)

                page.goto(question_url, wait_until="domcontentloaded", timeout=30000)
                page.wait_for_timeout(3000)   # let React finish rendering

                # Detect redirect to login or 404 early
                if "signin" in page.url or "login" in page.url:
                    raise RuntimeError(
                        "知乎 session 已失效，导航时跳转至登录页。"
                        "请重新点击「确认发布」重新触发扫码登录。"
                    )
                is_404 = page.evaluate(
                    """() => {
                        const t = document.title || '';
                        const b = document.body ? document.body.textContent : '';
                        return t.includes('404') || b.includes('你访问的页面不存在')
                            || b.includes('页面不存在');
                    }"""
                )
                if is_404:
                    raise RuntimeError(
                        f"问题页面返回 404，可能该问题已删除或 URL 有误：{question_url}"
                    )

                self._click_write_answer(page)
                self._fill_answer(page, answer_text)
                answer_url = self._submit_answer(page)

                # Refresh state (cookies may have been renewed mid-session)
                self._save_state(context)
                return answer_url

            finally:
                browser.close()

    # ── Session Persistence ────────────────────────────────────────────────────

    def _load_state(self, context) -> bool:
        """Restore saved cookies into a browser context.

        Args:
            context: Playwright BrowserContext to inject cookies into.

        Returns:
            True if state file existed and cookies were loaded.
        """
        if not self.state_file.exists():
            return False
        try:
            data = json.loads(self.state_file.read_text(encoding="utf-8"))
            cookies = data.get("cookies", [])
            if not cookies:
                return False
            context.add_cookies(cookies)
            logger.debug(f"Loaded {len(cookies)} cookies from {self.state_file}")
            return True
        except Exception as e:
            logger.warning(f"State load failed: {e}")
            return False

    def _save_state(self, context) -> None:
        """Persist current browser cookies to the state file.

        Args:
            context: Playwright BrowserContext after successful authentication.
        """
        state = {
            "cookies": context.cookies(),
            "saved_at": time.time(),
        }
        self.state_file.write_text(
            json.dumps(state, ensure_ascii=False, indent=2),
            encoding="utf-8",
        )
        logger.info(f"Saved {len(state['cookies'])} cookies to {self.state_file}")

    # ── Login Check ────────────────────────────────────────────────────────────

    def _is_logged_in(self, page) -> bool:
        """Navigate to Zhihu home and verify login-specific elements appear.

        Args:
            page: Playwright Page with cookies loaded.

        Returns:
            True if logged-in UI elements are found.
        """
        try:
            page.goto(ZHIHU_HOME_URL, wait_until="domcontentloaded", timeout=15000)
            for sel in _LOGIN_CHECK_SELECTORS:
                try:
                    page.wait_for_selector(sel, timeout=4000)
                    logger.debug(f"Login confirmed via selector: {sel}")
                    return True
                except Exception:
                    continue
        except Exception as e:
            logger.debug(f"Login check navigation failed: {e}")
        return False

    # ── QR Code Login Flow ─────────────────────────────────────────────────────

    def _qr_login_flow(self, context, page) -> bool:
        """Execute the full QR login loop (up to _MAX_QR_ATTEMPTS refreshes).

        Navigates to sign-in, switches to QR tab, screenshots the code,
        uploads to Slack, then polls for login success.

        Args:
            context: Playwright BrowserContext (for saving state on success).
            page: Active Playwright Page.

        Returns:
            True if login succeeded before all attempts exhausted.
        """
        page.goto(ZHIHU_SIGNIN_URL, wait_until="domcontentloaded", timeout=15000)
        self._switch_to_qr_tab(page)

        for attempt in range(1, _MAX_QR_ATTEMPTS + 1):
            logger.info(f"QR login attempt {attempt}/{_MAX_QR_ATTEMPTS}")

            qr_path = self._capture_qr(page)
            if qr_path is None:
                self._notify_slack(f"❌ 无法截取知乎二维码（第 {attempt} 次），放弃登录。")
                return False

            self._send_qr_to_slack(qr_path, attempt)

            if self._poll_for_login(page):
                self._save_state(context)
                self._notify_slack("✅ 知乎登录成功，Cookie 已保存。")
                return True

            # Timeout — try to refresh QR for next attempt
            if attempt < _MAX_QR_ATTEMPTS:
                logger.info("QR timed out, refreshing page for new QR code...")
                page.reload(wait_until="domcontentloaded")
                self._switch_to_qr_tab(page)

        self._notify_slack(
            f"❌ 知乎二维码扫码超时（已重试 {_MAX_QR_ATTEMPTS} 次）。\n"
            "请稍后重新发起 `mode zhihu-hunter` 流程。"
        )
        return False

    def _switch_to_qr_tab(self, page) -> None:
        """Click the QR-code tab on the login page if it exists.

        Args:
            page: Page showing the Zhihu sign-in form.
        """
        for sel in _QR_TAB_SELECTORS:
            try:
                tab = page.locator(sel).first
                if tab.is_visible(timeout=2000):
                    tab.click()
                    page.wait_for_timeout(800)
                    logger.debug(f"Switched to QR tab via: {sel}")
                    return
            except Exception:
                continue

    def _capture_qr(self, page) -> Optional[Path]:
        """Screenshot the QR code element and save it to a temp file.

        Tries specific element selectors first; falls back to a full-page
        screenshot if none match.

        Args:
            page: Page currently showing the QR code.

        Returns:
            Path to the saved PNG file, or None on complete failure.
        """
        # Try element-level screenshot (cleaner crop)
        for sel in _QR_IMAGE_SELECTORS:
            try:
                page.wait_for_selector(sel, timeout=8000)
                elem = page.locator(sel).first
                elem.screenshot(path=str(_QR_TMP_PATH))
                logger.info(f"QR captured via selector '{sel}' → {_QR_TMP_PATH}")
                return _QR_TMP_PATH
            except Exception:
                continue

        # Fallback: full-page screenshot (user will see entire login page)
        logger.warning("Element QR selectors failed; falling back to full-page screenshot")
        try:
            page.screenshot(path=str(_QR_TMP_PATH), full_page=False)
            return _QR_TMP_PATH
        except Exception as e:
            logger.error(f"Full-page screenshot also failed: {e}")
            return None

    def _send_qr_to_slack(self, qr_path: Path, attempt: int) -> None:
        """Upload the QR code PNG to Slack with instructions.

        Args:
            qr_path: Local path to the QR code image.
            attempt: Current attempt number shown in the message.
        """
        caption = (
            f"🔑 *请用知乎 App 扫描以下二维码完成登录*（第 {attempt}/{_MAX_QR_ATTEMPTS} 次）\n"
            f"⏱ 二维码有效期约 2 分钟，超时将自动刷新。"
        )
        try:
            with qr_path.open("rb") as f:
                self.slack_client.files_upload_v2(
                    channel=self.notify_channel,
                    file=f,
                    filename="zhihu_qr.png",
                    title=f"知乎登录二维码（第 {attempt} 次）",
                    initial_comment=caption,
                )
            logger.info("QR image uploaded to Slack")
        except Exception as e:
            logger.error(f"Slack QR upload failed: {e}")

    def _poll_for_login(self, page) -> bool:
        """Poll until the page leaves the sign-in URL or timeout expires.

        Checks every _POLL_INTERVAL_S seconds for up to _LOGIN_TIMEOUT_S total.

        Args:
            page: Page that started at ZHIHU_SIGNIN_URL.

        Returns:
            True if login was detected before the deadline.
        """
        deadline = time.monotonic() + _LOGIN_TIMEOUT_S
        while time.monotonic() < deadline:
            try:
                # wait_for_url raises on timeout; catches login redirect
                page.wait_for_url(
                    lambda url: "zhihu.com" in url and "signin" not in url,
                    timeout=_POLL_INTERVAL_S * 1000,
                )
                logger.info(f"Login confirmed — redirected to {page.url}")
                return True
            except Exception:
                remaining = int(deadline - time.monotonic())
                logger.debug(f"Waiting for QR scan... {remaining}s left")

        return False

    # ── Publishing Helpers ─────────────────────────────────────────────────────

    def _click_write_answer(self, page) -> None:
        """Find and click the 写回答 button on a question page.

        Args:
            page: Page showing a Zhihu question.

        Raises:
            RuntimeError: If no write-answer button is found by any strategy.
        """
        # Scroll to trigger lazy-loaded elements
        page.evaluate("window.scrollBy(0, 400)")
        page.wait_for_timeout(800)

        for sel in _WRITE_ANSWER_SELECTORS:
            try:
                btn = page.locator(sel).first
                btn.wait_for(state="visible", timeout=3000)
                btn.click()
                page.wait_for_timeout(1500)
                logger.debug(f"Clicked '写回答' via: {sel}")
                return
            except Exception:
                continue

        # Diagnostic: log every short button/link text to help debug DOM changes
        all_texts = page.evaluate(
            """() => {
                const els = document.querySelectorAll('button, a, [role="button"]');
                return Array.from(els)
                    .map(e => e.textContent.trim())
                    .filter(t => t.length > 0 && t.length < 30);
            }"""
        )
        logger.warning(f"'写回答' not found. Buttons/links on page: {all_texts[:30]}")

        # JS fallback v2: use TreeWalker but strip zero-width chars before comparing
        # (Zhihu wraps button text with \u200b which JS .trim() does NOT remove)
        clicked = page.evaluate(
            """() => {
                const TARGETS = new Set(['写回答','回答问题','回答','发起回答','编辑回答']);
                const ZW_RE = /[\u200b\u200c\u200d\ufeff\u00a0]/g;
                const walker = document.createTreeWalker(
                    document.body, NodeFilter.SHOW_TEXT
                );
                let node;
                while ((node = walker.nextNode())) {
                    const cleaned = node.textContent.replace(ZW_RE, '').trim();
                    if (TARGETS.has(cleaned)) {
                        const el = node.parentElement;
                        const btn = el.closest('button, a, [role="button"]') || el;
                        btn.click();
                        return cleaned;
                    }
                }
                return null;
            }"""
        )
        if clicked:
            page.wait_for_timeout(1500)
            logger.info(f"Clicked '写回答' via text-node fallback (text='{clicked}')")
            return

        # Upload a screenshot to Slack so we can see what the page looks like
        try:
            import tempfile
            tmp_ss = Path(tempfile.mktemp(suffix=".png"))
            page.screenshot(path=str(tmp_ss), full_page=False)
            self.slack_client.files_upload_v2(
                channel=self.notify_channel,
                file=str(tmp_ss),
                filename="zhihu_debug.png",
                initial_comment=(
                    "🔍 发布调试截图：找不到「写回答」按钮，"
                    "请确认页面状态（是否有弹窗/登录墙/按钮文字变了）。"
                ),
            )
            tmp_ss.unlink(missing_ok=True)
        except Exception as _ss_err:
            logger.warning(f"Debug screenshot upload failed: {_ss_err}")

        raise RuntimeError(
            "Could not find '写回答' button. Zhihu DOM may have changed — "
            "update _WRITE_ANSWER_SELECTORS in zhihu_playwright_engine.py"
        )

    def _fill_answer(self, page, text: str) -> None:
        """Input answer text into Zhihu's Draft.js rich-text editor.

        Tries three strategies in order, verifying content after each attempt:
          1. execCommand('insertText') — most reliable for contenteditable
          2. Synthetic ClipboardEvent paste
          3. keyboard.type() — slow but universally compatible

        Args:
            page: Page with the answer editor open.
            text: Plain-text answer to insert.
        """
        # Locate the contenteditable editor
        editor = None
        for sel in _EDITOR_SELECTORS:
            try:
                page.wait_for_selector(sel, timeout=8000)
                editor = page.locator(sel).first
                logger.debug(f"Found editor via: {sel}")
                break
            except Exception:
                continue

        if editor is None:
            raise RuntimeError(
                "Could not find answer editor. "
                "Update _EDITOR_SELECTORS in zhihu_playwright_engine.py"
            )

        editor.click()
        page.wait_for_timeout(500)

        def _content_length() -> int:
            """Return current character count inside the editor."""
            return page.evaluate(
                """() => {
                    const ed = document.querySelector(
                        '.DraftEditor-content[contenteditable="true"],'
                        + '.RichText [contenteditable="true"],'
                        + '[contenteditable="true"].public-DraftEditor-content'
                    );
                    return ed ? ed.textContent.length : 0;
                }"""
            )

        # Strategy 1: execCommand('insertText') — works best with Draft.js
        page.evaluate(
            """(text) => {
                const ed = document.querySelector(
                    '.DraftEditor-content[contenteditable="true"],'
                    + '.RichText [contenteditable="true"],'
                    + '[contenteditable="true"].public-DraftEditor-content'
                );
                if (!ed) return false;
                ed.focus();
                document.execCommand('selectAll');
                document.execCommand('insertText', false, text);
                return true;
            }""",
            text,
        )
        page.wait_for_timeout(600)

        if _content_length() >= len(text) // 2:
            logger.info(f"Filled {len(text)} chars via execCommand insertText")
            return

        # Strategy 2: Synthetic ClipboardEvent paste
        logger.warning("execCommand insertText insufficient; trying ClipboardEvent")
        page.evaluate(
            """(text) => {
                const ed = document.querySelector(
                    '.DraftEditor-content[contenteditable="true"],'
                    + '.RichText [contenteditable="true"],'
                    + '[contenteditable="true"].public-DraftEditor-content'
                );
                if (!ed) return;
                ed.focus();
                const dt = new DataTransfer();
                dt.setData('text/plain', text);
                ed.dispatchEvent(new ClipboardEvent('paste', {
                    bubbles: true, cancelable: true, clipboardData: dt
                }));
            }""",
            text,
        )
        page.wait_for_timeout(600)

        if _content_length() >= len(text) // 2:
            logger.info(f"Filled {len(text)} chars via ClipboardEvent")
            return

        # Strategy 3: keyboard.type() — slow but always works
        logger.warning("ClipboardEvent also insufficient; falling back to keyboard.type()")
        editor.click()
        page.keyboard.press("Control+a")
        page.keyboard.press("Delete")
        page.keyboard.type(text, delay=8)
        page.wait_for_timeout(500)
        logger.info(f"Filled {len(text)} chars via keyboard.type()")

    def _submit_answer(self, page) -> str:
        """Click the publish button and return the answer URL.

        After clicking 发布, waits for the page to update then extracts the
        answer URL (``/question/<id>/answer/<id>``) from the DOM.  Falls back
        to the question URL if no answer link is found.

        Args:
            page: Page with the answer text already filled in.

        Returns:
            URL of the published answer, or question URL as fallback.

        Raises:
            RuntimeError: If the publish button cannot be found.
        """
        for sel in _PUBLISH_SELECTORS:
            try:
                btn = page.locator(sel).first
                btn.wait_for(state="visible", timeout=5000)
                btn.click()
                logger.info(f"Clicked publish via: {sel}")
                # Zhihu keeps making background requests; don't wait for networkidle.
                page.wait_for_timeout(4000)

                # Try to extract the actual answer URL from the page
                answer_url = page.evaluate(
                    """() => {
                        // Look for a canonical answer link on the page
                        const links = Array.from(
                            document.querySelectorAll('a[href*="/answer/"]')
                        );
                        // Prefer links that point to the current question
                        const sorted = links
                            .map(a => a.href)
                            .filter(h => /\\/question\\/\\d+\\/answer\\/\\d+/.test(h));
                        return sorted.length ? sorted[sorted.length - 1] : null;
                    }"""
                )
                if answer_url:
                    logger.info(f"Answer published: {answer_url}")
                    return answer_url

                # Fallback: return question URL
                logger.info(f"Answer submitted (URL fallback): {page.url}")
                return page.url
            except Exception:
                continue

        raise RuntimeError(
            "Could not find publish button. "
            "Update _PUBLISH_SELECTORS in zhihu_playwright_engine.py"
        )

    # ── Utility ────────────────────────────────────────────────────────────────

    def _notify_slack(self, message: str) -> None:
        """Send a plain text message to the notify channel.

        Args:
            message: Message text (supports Slack mrkdwn formatting).
        """
        try:
            self.slack_client.chat_postMessage(
                channel=self.notify_channel,
                text=message,
            )
        except Exception as e:
            logger.error(f"Slack notify failed: {e}")
