"""Deterministic context collection for meeting inputs."""

from __future__ import annotations

import hashlib
from pathlib import Path

from core.models import ContextBundle, ContextDocument, ContextSource, MeetingInput


DEFAULT_EXCLUDED_DIRS = {".git", "node_modules", "venv", ".venv", "__pycache__"}


def build_context_bundle(meeting_input: MeetingInput, meeting_file_path: Path) -> ContextBundle:
    """Collect file and directory context into a deterministic bundle."""

    base_dir = meeting_file_path.resolve().parent
    documents: list[ContextDocument] = []
    skipped_paths: list[dict[str, str]] = []

    for source in meeting_input.context_sources:
        resolved = base_dir / source.path
        if source.type == "file":
            document = _load_document(resolved, source, base_dir, skipped_paths)
            if document is not None:
                documents.append(document)
            continue

        for path in _iter_directory_files(resolved, source, base_dir):
            document = _load_document(path, source, base_dir, skipped_paths)
            if document is not None:
                documents.append(document)

    summary = f"Collected {len(documents)} context documents"
    return ContextBundle(summary=summary, documents=documents, skipped_paths=skipped_paths)


def _iter_directory_files(directory: Path, source: ContextSource, base_dir: Path) -> list[Path]:
    matches: set[Path] = set()
    for pattern in source.include:
        matches.update(directory.glob(pattern))

    filtered: list[Path] = []
    for path in matches:
        if not path.is_file():
            continue
        relative = path.relative_to(base_dir)
        if any(part in DEFAULT_EXCLUDED_DIRS for part in relative.parts):
            continue
        if any(path.match(pattern) for pattern in source.exclude):
            continue
        filtered.append(path)

    filtered.sort(key=lambda item: str(item.relative_to(base_dir)))
    return filtered[: source.max_files]


def _load_document(
    path: Path,
    source: ContextSource,
    base_dir: Path,
    skipped_paths: list[dict[str, str]],
) -> ContextDocument | None:
    if path.is_symlink():
        _record_skip(skipped_paths, path, base_dir, "symlink")
        return None

    try:
        stat_result = path.stat()
    except OSError:
        _record_skip(skipped_paths, path, base_dir, "unreadable")
        return None

    if stat_result.st_mode & 0o444 == 0:
        _record_skip(skipped_paths, path, base_dir, "unreadable")
        return None

    try:
        data = path.read_bytes()
    except OSError:
        _record_skip(skipped_paths, path, base_dir, "unreadable")
        return None

    if b"\x00" in data:
        _record_skip(skipped_paths, path, base_dir, "binary")
        return None

    excerpt = data.decode("utf-8", errors="replace")[: source.max_chars_per_file]
    relative = path.relative_to(base_dir)
    relative_str = str(relative)
    return ContextDocument(
        document_id=_document_id(relative_str),
        source_path=relative_str,
        source_type="file" if source.type == "file" else "directory_file",
        purpose=source.purpose,
        excerpt=excerpt,
        char_count=len(excerpt),
    )


def _record_skip(
    skipped_paths: list[dict[str, str]],
    path: Path,
    base_dir: Path,
    reason: str,
) -> None:
    skipped_paths.append({"path": str(path.relative_to(base_dir)), "reason": reason})


def _document_id(relative_path: str) -> str:
    safe_path = relative_path.replace("/", "__")
    digest = hashlib.sha1(relative_path.encode("utf-8")).hexdigest()[:8]
    return f"{safe_path}__{digest}"
