"""Tests for HackerNews, Reddit, and GitHub collectors."""
from datetime import datetime
from unittest.mock import MagicMock, patch

from pytest_httpx import HTTPXMock

from ai_usecases_explorer.collectors.github import GitHubCollector
from ai_usecases_explorer.collectors.hackernews import HackerNewsCollector
from ai_usecases_explorer.collectors.reddit import RedditCollector

# ---------------------------------------------------------------------------
# HackerNews Collector
# ---------------------------------------------------------------------------

HN_RESPONSE = {
    "hits": [
        {
            "objectID": "12345",
            "title": "Show HN: I built a Claude Code workflow for local DB queries",
            "url": "https://example.com/claude-db",
            "author": "hnuser1",
            "points": 45,
            "created_at": "2026-02-26T10:00:00.000Z",
            "story_text": "I connected Claude Code directly to my local Postgres instance...",
            "num_comments": 12,
        },
        {
            "objectID": "12346",
            "title": "Low points post",
            "url": "https://example.com/low",
            "author": "hnuser2",
            "points": 3,
            "created_at": "2026-02-26T09:00:00.000Z",
            "story_text": "Not much here.",
            "num_comments": 1,
        },
    ],
    "nbHits": 2,
}

HN_RESPONSE_NO_URL = {
    "hits": [
        {
            "objectID": "99999",
            "title": "Ask HN: Best LLM workflow tools?",
            "url": None,
            "author": "asker",
            "points": 50,
            "created_at": "2026-02-26T08:00:00.000Z",
            "story_text": "I want to automate my dev workflow...",
            "num_comments": 5,
        }
    ],
    "nbHits": 1,
}


HN_EMPTY = {"hits": [], "nbHits": 0}
_HN_KEYWORD_COUNT = 12  # must match len(_KEYWORDS) in hackernews.py


def _mock_hn(httpx_mock: HTTPXMock, first_response: dict) -> None:
    """Add first_response for the first keyword, empty for the rest."""
    httpx_mock.add_response(json=first_response)
    for _ in range(_HN_KEYWORD_COUNT - 1):
        httpx_mock.add_response(json=HN_EMPTY)


class TestHackerNewsCollector:
    def test_fetch_returns_list_of_raw_items(self, httpx_mock: HTTPXMock) -> None:
        _mock_hn(httpx_mock, HN_RESPONSE)
        collector = HackerNewsCollector(min_points=10)
        items = collector.fetch()
        assert isinstance(items, list)

    def test_filters_by_min_points(self, httpx_mock: HTTPXMock) -> None:
        _mock_hn(httpx_mock, HN_RESPONSE)
        collector = HackerNewsCollector(min_points=10)
        items = collector.fetch()
        assert len(items) == 1
        assert items[0].author == "hnuser1"

    def test_item_has_required_fields(self, httpx_mock: HTTPXMock) -> None:
        _mock_hn(httpx_mock, HN_RESPONSE)
        collector = HackerNewsCollector(min_points=10)
        items = collector.fetch()
        item = items[0]
        assert item.title == "Show HN: I built a Claude Code workflow for local DB queries"
        assert item.source_url == "https://example.com/claude-db"
        assert item.author == "hnuser1"
        assert item.source_platform == "hackernews"
        assert isinstance(item.published_at, datetime)

    def test_fallback_url_for_ask_hn(self, httpx_mock: HTTPXMock) -> None:
        _mock_hn(httpx_mock, HN_RESPONSE_NO_URL)
        collector = HackerNewsCollector(min_points=10)
        items = collector.fetch()
        assert len(items) == 1
        assert "news.ycombinator.com" in items[0].source_url

    def test_empty_response_returns_empty_list(self, httpx_mock: HTTPXMock) -> None:
        for _ in range(_HN_KEYWORD_COUNT):
            httpx_mock.add_response(json=HN_EMPTY)
        collector = HackerNewsCollector(min_points=10)
        items = collector.fetch()
        assert items == []

    def test_raw_content_includes_title_and_text(self, httpx_mock: HTTPXMock) -> None:
        _mock_hn(httpx_mock, HN_RESPONSE)
        collector = HackerNewsCollector(min_points=10)
        items = collector.fetch()
        assert "Show HN" in items[0].raw_content
        assert "Postgres" in items[0].raw_content

    def test_deduplicates_across_queries(self, httpx_mock: HTTPXMock) -> None:
        # Same item appears in multiple keyword results — returned only once
        for _ in range(_HN_KEYWORD_COUNT):
            httpx_mock.add_response(json=HN_RESPONSE)
        collector = HackerNewsCollector(min_points=10)
        items = collector.fetch()
        ids = [i.source_url for i in items]
        assert len(ids) == len(set(ids))


# ---------------------------------------------------------------------------
# Reddit Collector (Exa.ai neural search, subreddit-based)
# ---------------------------------------------------------------------------


def _make_exa_result(
    url: str = "https://reddit.com/r/ClaudeCode/comments/abc123/title/",
    title: str = "How I automated my daily reports with Claude",
    text: str = "I built a Python script that calls Claude every morning...",
    published_date: str = "2026-02-26T10:00:00Z",
    author: str = "redditor1",
) -> MagicMock:
    result = MagicMock()
    result.url = url
    result.title = title
    result.text = text
    result.published_date = published_date
    result.author = author
    return result


def _make_exa_response(results: list[MagicMock]) -> MagicMock:
    response = MagicMock()
    response.results = results
    return response


class TestRedditCollector:
    def test_returns_empty_when_no_api_key(self) -> None:
        collector = RedditCollector(subreddits=["ClaudeCode"], api_key="")
        items = collector.fetch()
        assert items == []

    def test_fetches_and_returns_items(self) -> None:
        mock_response = _make_exa_response([_make_exa_result()])
        mock_exa = MagicMock()
        mock_exa.search_and_contents.return_value = mock_response

        with patch("ai_usecases_explorer.collectors.reddit.Exa", return_value=mock_exa):
            collector = RedditCollector(subreddits=["ClaudeCode"], api_key="test-key")
            items = collector.fetch()

        assert len(items) == 1
        assert items[0].title == "How I automated my daily reports with Claude"
        assert items[0].source_platform == "reddit"
        assert "reddit.com" in items[0].source_url

    def test_item_has_required_fields(self) -> None:
        mock_response = _make_exa_response([_make_exa_result()])
        mock_exa = MagicMock()
        mock_exa.search_and_contents.return_value = mock_response

        with patch("ai_usecases_explorer.collectors.reddit.Exa", return_value=mock_exa):
            collector = RedditCollector(subreddits=["ClaudeCode"], api_key="test-key")
            items = collector.fetch()

        item = items[0]
        assert item.title == "How I automated my daily reports with Claude"
        assert item.source_platform == "reddit"
        assert item.author == "redditor1"
        assert isinstance(item.published_at, datetime)

    def test_deduplicates_within_subreddit(self) -> None:
        # Same URL appearing twice in results should be deduplicated
        same_result = _make_exa_result()
        mock_exa = MagicMock()
        mock_exa.search_and_contents.return_value = _make_exa_response([same_result, same_result])

        with patch("ai_usecases_explorer.collectors.reddit.Exa", return_value=mock_exa):
            collector = RedditCollector(subreddits=["ClaudeCode"], api_key="test-key")
            items = collector.fetch()

        urls = [i.source_url for i in items]
        assert len(urls) == len(set(urls))

    def test_multiple_subreddits_aggregate_results(self) -> None:
        result_a = _make_exa_result(url="https://reddit.com/r/vibecoding/comments/aaa/")
        result_b = _make_exa_result(url="https://reddit.com/r/singularity/comments/bbb/")
        mock_exa = MagicMock()
        mock_exa.search_and_contents.side_effect = [
            _make_exa_response([result_a]),
            _make_exa_response([result_b]),
        ]

        with patch("ai_usecases_explorer.collectors.reddit.Exa", return_value=mock_exa):
            collector = RedditCollector(subreddits=["vibecoding", "singularity"], api_key="test-key")
            items = collector.fetch()

        assert len(items) == 2

    def test_accepts_results_from_any_reddit_subreddit(self) -> None:
        # Exa may return URLs from subreddits not in the target list — accept them
        # (path-based domain filtering is unsupported by Exa; we accept any reddit.com URL)
        other_sub_result = _make_exa_result(url="https://reddit.com/r/LocalLLaMA/comments/xyz/")
        mock_exa = MagicMock()
        mock_exa.search_and_contents.return_value = _make_exa_response([other_sub_result])

        with patch("ai_usecases_explorer.collectors.reddit.Exa", return_value=mock_exa):
            collector = RedditCollector(subreddits=["ClaudeCode"], api_key="test-key")
            items = collector.fetch()

        assert len(items) == 1

    def test_empty_results_returns_empty_list(self) -> None:
        mock_exa = MagicMock()
        mock_exa.search_and_contents.return_value = _make_exa_response([])

        with patch("ai_usecases_explorer.collectors.reddit.Exa", return_value=mock_exa):
            collector = RedditCollector(subreddits=["ClaudeCode"], api_key="test-key")
            items = collector.fetch()

        assert items == []

    def test_gracefully_handles_exa_error(self) -> None:
        mock_exa = MagicMock()
        mock_exa.search_and_contents.side_effect = RuntimeError("network error")

        with patch("ai_usecases_explorer.collectors.reddit.Exa", return_value=mock_exa):
            collector = RedditCollector(subreddits=["ClaudeCode"], api_key="test-key")
            items = collector.fetch()

        assert items == []

    def test_raw_content_includes_title_and_text(self) -> None:
        mock_response = _make_exa_response([_make_exa_result()])
        mock_exa = MagicMock()
        mock_exa.search_and_contents.return_value = mock_response

        with patch("ai_usecases_explorer.collectors.reddit.Exa", return_value=mock_exa):
            collector = RedditCollector(subreddits=["ClaudeCode"], api_key="test-key")
            items = collector.fetch()

        assert "automated" in items[0].raw_content
        assert "Python" in items[0].raw_content

    def test_passes_reddit_domain_to_exa(self) -> None:
        # include_domains must be ["reddit.com"] — Exa does not support path-based filtering.
        # start_published_date must NOT be set — Exa has no published_date for Reddit posts.
        mock_exa = MagicMock()
        mock_exa.search_and_contents.return_value = _make_exa_response([])

        with patch("ai_usecases_explorer.collectors.reddit.Exa", return_value=mock_exa):
            collector = RedditCollector(subreddits=["ClaudeCode"], api_key="test-key")
            collector.fetch()

        call_kwargs = mock_exa.search_and_contents.call_args
        assert call_kwargs.kwargs["include_domains"] == ["reddit.com"]
        assert "start_published_date" not in call_kwargs.kwargs
        assert call_kwargs.kwargs["text"] is True


# ---------------------------------------------------------------------------
# GitHub Collector
# ---------------------------------------------------------------------------

GITHUB_SEARCH_RESPONSE = {
    "total_count": 2,
    "incomplete_results": False,
    "items": [
        {
            "id": 111,
            "name": "claude-local-rag",
            "full_name": "user1/claude-local-rag",
            "html_url": "https://github.com/user1/claude-local-rag",
            "description": "RAG pipeline connecting Claude to local PDF files",
            "topics": ["claude", "rag", "llm", "python"],
            "stargazers_count": 42,
            "language": "Python",
            "pushed_at": "2026-02-26T12:00:00Z",
            "created_at": "2026-02-25T08:00:00Z",
            "owner": {"login": "user1"},
        },
        {
            "id": 222,
            "name": "low-star-repo",
            "full_name": "user2/low-star-repo",
            "html_url": "https://github.com/user2/low-star-repo",
            "description": "Just a test",
            "topics": ["llm"],
            "stargazers_count": 2,
            "language": "JavaScript",
            "pushed_at": "2026-02-26T10:00:00Z",
            "created_at": "2026-02-26T09:00:00Z",
            "owner": {"login": "user2"},
        },
    ],
}


GITHUB_EMPTY = {"total_count": 0, "incomplete_results": False, "items": []}
_GITHUB_TOPIC_COUNT = 8  # must match len(_TOPICS) in github.py


def _mock_github(httpx_mock: HTTPXMock, first_response: dict) -> None:
    """First topic returns real data, rest return empty."""
    httpx_mock.add_response(json=first_response)
    for _ in range(_GITHUB_TOPIC_COUNT - 1):
        httpx_mock.add_response(json=GITHUB_EMPTY)


class TestGitHubCollector:
    def test_fetch_returns_list(self, httpx_mock: HTTPXMock) -> None:
        _mock_github(httpx_mock, GITHUB_SEARCH_RESPONSE)
        collector = GitHubCollector(min_stars=10)
        items = collector.fetch()
        assert isinstance(items, list)

    def test_filters_by_min_stars(self, httpx_mock: HTTPXMock) -> None:
        _mock_github(httpx_mock, GITHUB_SEARCH_RESPONSE)
        collector = GitHubCollector(min_stars=10)
        items = collector.fetch()
        assert len(items) == 1
        assert items[0].author == "user1"

    def test_item_has_required_fields(self, httpx_mock: HTTPXMock) -> None:
        _mock_github(httpx_mock, GITHUB_SEARCH_RESPONSE)
        collector = GitHubCollector(min_stars=10)
        items = collector.fetch()
        item = items[0]
        assert item.title == "user1/claude-local-rag"
        assert item.source_url == "https://github.com/user1/claude-local-rag"
        assert item.source_platform == "github"
        assert isinstance(item.published_at, datetime)

    def test_raw_content_includes_description_and_topics(self, httpx_mock: HTTPXMock) -> None:
        _mock_github(httpx_mock, GITHUB_SEARCH_RESPONSE)
        collector = GitHubCollector(min_stars=10)
        items = collector.fetch()
        assert "RAG pipeline" in items[0].raw_content
        assert "claude" in items[0].raw_content

    def test_empty_response_returns_empty_list(self, httpx_mock: HTTPXMock) -> None:
        for _ in range(_GITHUB_TOPIC_COUNT):
            httpx_mock.add_response(json=GITHUB_EMPTY)
        collector = GitHubCollector(min_stars=10)
        items = collector.fetch()
        assert items == []

    def test_deduplicates_across_topics(self, httpx_mock: HTTPXMock) -> None:
        # Same repo appears under multiple topic queries — returned only once
        for _ in range(_GITHUB_TOPIC_COUNT):
            httpx_mock.add_response(json=GITHUB_SEARCH_RESPONSE)
        collector = GitHubCollector(min_stars=10)
        items = collector.fetch()
        urls = [i.source_url for i in items]
        assert len(urls) == len(set(urls))

    def test_uses_auth_header_when_token_provided(self, httpx_mock: HTTPXMock) -> None:
        _mock_github(httpx_mock, GITHUB_SEARCH_RESPONSE)
        collector = GitHubCollector(min_stars=10, token="ghp_test123")
        items = collector.fetch()
        assert isinstance(items, list)
