"""Intelligent context compression for multi-agent meetings.

Prevents whiteboard explosion by compressing scratchpad content
while preserving recent entries in full and keeping full transcripts.
"""

from __future__ import annotations

import logging
import re
from typing import TYPE_CHECKING

from src.agent import format_scratchpad_summary
from src.models import MeetingState, ScratchpadEntry

if TYPE_CHECKING:
    from src.llm_client import LLMClient

logger = logging.getLogger(__name__)

# Token thresholds
_CONTEXT_SUMMARY_THRESHOLD = 3000  # tokens — summarize background context
_RECENT_ENTRIES_TO_KEEP = 2
_TRUNCATE_CHARS = 200  # chars per entry in Level 1

_SUMMARIZE_SCRATCHPAD_PROMPT = (
    "Summarize the following meeting discussion into a structured briefing.\n"
    "Preserve: key arguments, disagreements, decisions, open questions.\n"
    "Remove: rhetorical flourishes, examples used for illustration, repetition.\n"
    "Output in bullet points, max 800 tokens."
)

_SUMMARIZE_CONTEXT_PROMPT = (
    "Summarize the following background material into a structured briefing.\n"
    "Preserve: key facts, requirements, constraints, goals.\n"
    "Remove: verbose explanations, examples, repetition.\n"
    "Output in bullet points, max 1500 tokens."
)


def estimate_tokens(text: str) -> int:
    """Estimate token count for mixed Chinese/English text.

    Uses a heuristic: Chinese characters ~1.5 chars/token,
    English/ASCII ~4 chars/token. No external dependencies.

    Args:
        text: Input text to estimate.

    Returns:
        Approximate token count.
    """
    if not text:
        return 0

    chinese_chars = len(re.findall(r"[\u4e00-\u9fff\u3400-\u4dbf]", text))
    other_chars = len(text) - chinese_chars

    chinese_tokens = chinese_chars / 1.5
    other_tokens = other_chars / 4.0

    return int(chinese_tokens + other_tokens)


class ContextManager:
    """Manage meeting context to prevent whiteboard explosion.

    Uses a three-level compression strategy:
    - Level 0: Full content (under budget)
    - Level 1: Truncate early entries, keep recent in full
    - Level 2: LLM-summarize early entries, keep recent in full

    Args:
        budget_tokens: Maximum tokens for the scratchpad portion.
    """

    def __init__(self, budget_tokens: int = 6000) -> None:
        self.budget_tokens = budget_tokens

    def compact_scratchpad(
        self,
        entries: list[ScratchpadEntry],
        client: LLMClient,
    ) -> str:
        """Compress scratchpad entries to fit within budget.

        Strategy (progressive disclosure):
        1. If raw scratchpad < budget → return full text (Level 0)
        2. If exceeds → truncate early entries, keep recent 2 full (Level 1)
        3. If still exceeds → LLM-summarize early entries (Level 2)

        Args:
            entries: All scratchpad entries.
            client: LLM client for Level 2 summarization.

        Returns:
            Compressed scratchpad text fitting within budget.
        """
        if not entries:
            return "No discussion yet."

        full_text = format_scratchpad_summary(entries)
        full_tokens = estimate_tokens(full_text)

        # Level 0: fits in budget
        if full_tokens <= self.budget_tokens:
            logger.debug("Level 0: scratchpad fits (%d tokens)", full_tokens)
            return full_text

        # Split into early and recent entries.
        # If we have very few entries (≤ _RECENT_ENTRIES_TO_KEEP), keep at least
        # 1 entry as "early" so compression can work on it.
        if len(entries) <= _RECENT_ENTRIES_TO_KEEP:
            # With few entries, keep only the last one as "recent" and compress the rest
            recent_count = min(1, len(entries) - 1) if len(entries) > 1 else 0
        else:
            recent_count = _RECENT_ENTRIES_TO_KEEP

        early_entries = entries[:-recent_count] if recent_count > 0 else entries
        recent_entries = entries[-recent_count:] if recent_count > 0 else []

        recent_text = format_scratchpad_summary(recent_entries) if recent_entries else ""
        recent_tokens = estimate_tokens(recent_text)

        # Budget available for early entries; guard against negative
        early_budget = max(0, self.budget_tokens - recent_tokens)

        # Level 1: truncate early entries
        truncated_lines: list[str] = []
        for entry in early_entries:
            content = entry.content
            if len(content) > _TRUNCATE_CHARS:
                content = content[:_TRUNCATE_CHARS] + "..."
            truncated_lines.append(f"[{entry.agent_name}]: {content}")
        truncated_text = "\n".join(truncated_lines)
        truncated_tokens = estimate_tokens(truncated_text)

        if truncated_tokens <= early_budget and early_budget > 0 and truncated_lines:
            logger.debug("Level 1: truncated early entries (%d tokens)", truncated_tokens + recent_tokens)
            combined = truncated_text
            if recent_text:
                combined += "\n" + recent_text
            return combined

        # Level 2: LLM summarization of early entries
        logger.debug("Level 2: LLM summarization")
        if early_entries:
            early_full_text = format_scratchpad_summary(early_entries)
            summary = client.chat(
                system=_SUMMARIZE_SCRATCHPAD_PROMPT,
                messages=[{"role": "user", "content": early_full_text}],
            )
        else:
            # All entries are "recent" — summarize them directly
            summary = client.chat(
                system=_SUMMARIZE_SCRATCHPAD_PROMPT,
                messages=[{"role": "user", "content": full_text}],
            )
            return f"[Discussion Summary]\n{summary}"

        if recent_text:
            return f"[Earlier Discussion Summary]\n{summary}\n\n[Recent Entries]\n{recent_text}"
        return f"[Discussion Summary]\n{summary}"

    def build_pm_context(
        self,
        state: MeetingState,
        client: LLMClient,
    ) -> str:
        """Build compressed context for the PM agent.

        Round 0 (first round): includes full context.
        Later rounds: uses context_summary if available.

        Args:
            state: Current meeting state.
            client: LLM client for compression.

        Returns:
            Formatted string for PM's user message.
        """
        parts: list[str] = [f"## Meeting Topic\n\n{state.topic}\n"]

        # Context: first round gets full, later rounds get summary
        if state.context:
            if state.current_round == 0 or not state.context_summary:
                parts.append(f"## Background Context\n\n{state.context}\n")
            else:
                parts.append(f"## Background Context (Summary)\n\n{state.context_summary}\n")

        # Scratchpad: compressed
        scratchpad_text = self.compact_scratchpad(state.scratchpad, client)
        parts.append(f"## Current Whiteboard (Round {state.current_round})\n\n{scratchpad_text}")

        return "\n".join(parts)

    def build_agent_context(
        self,
        state: MeetingState,
        target_agent: str,
        pm_prompt: str,
        client: LLMClient,
    ) -> str:
        """Build compressed context for a specific agent.

        Agents get context_summary (not full context) and compressed scratchpad.

        Args:
            state: Current meeting state.
            target_agent: Name of the agent receiving this context.
            pm_prompt: The PM's specific question for this agent.
            client: LLM client for compression.

        Returns:
            Formatted string for the agent's scratchpad_summary parameter.
        """
        parts: list[str] = []

        # Background: always use summary if available
        if state.context_summary:
            parts.append(f"[Background]\n{state.context_summary}\n")
        elif state.context:
            parts.append(f"[Background]\n{state.context}\n")

        # Compressed scratchpad
        scratchpad_text = self.compact_scratchpad(state.scratchpad, client)
        parts.append(scratchpad_text)

        # PM's specific prompt for this agent
        if pm_prompt:
            parts.append(f"\n[PM's Question]\n{pm_prompt}")

        return "\n".join(parts)

    def summarize_context(
        self,
        context: str,
        client: LLMClient,
    ) -> str:
        """Generate a summary of long background context.

        If context is short enough, returns it as-is.

        Args:
            context: Original background context.
            client: LLM client for summarization.

        Returns:
            Original context if short, LLM summary if long.
        """
        if not context:
            return ""

        tokens = estimate_tokens(context)
        if tokens <= _CONTEXT_SUMMARY_THRESHOLD:
            return context

        summary = client.chat(
            system=_SUMMARIZE_CONTEXT_PROMPT,
            messages=[{"role": "user", "content": context}],
        )
        return summary
