#!/usr/bin/env python3
"""
Query & Analysis Test Scenarios - Based on real user questions from logs.

This test suite covers common query patterns users ask the health bot.
It uses real Gemini LLM and tools but isolated test data directory.

Usage:
    # Run all scenarios
    python tests/test_query_scenarios.py

    # Run specific scenario
    python tests/test_query_scenarios.py --scenario sleep_analysis

    # Use test data directory
    export DATA_DIR="/tmp/butler_test_query"
    python tests/test_query_scenarios.py
"""

import os
import sys
from datetime import date, timedelta
from typing import List, Tuple, Dict, Any
from pathlib import Path

sys.path.append(os.getcwd())

from dotenv import load_dotenv
from slack_bot.llm.gemini import GeminiLLM
from slack_bot.tools.registry import TOOLS_SCHEMA, TOOL_FUNCTIONS
from health.utils.logging_config import setup_logger

logger = setup_logger(__name__)
load_dotenv()


class QueryTestScenario:
    """Represents a single test scenario."""

    def __init__(
        self,
        name: str,
        user_query: str,
        expected_tools: List[str],
        description: str = "",
        context: List[Dict[str, str]] = None
    ):
        self.name = name
        self.user_query = user_query
        self.expected_tools = expected_tools
        self.description = description
        self.context = context or []

    def run(self, llm: GeminiLLM) -> Tuple[bool, str, List[Dict]]:
        """
        Run the test scenario.

        Returns:
            (success, response_text, tool_calls)
        """
        print(f"\n{'='*80}")
        print(f"🧪 Scenario: {self.name}")
        print(f"📝 Description: {self.description}")
        print(f"💬 User Query: {self.user_query}")
        print(f"🔧 Expected Tools: {', '.join(self.expected_tools)}")
        print('='*80)

        try:
            response, tool_calls = llm.generate_response(
                message=self.user_query,
                context=self.context,
                tools=TOOLS_SCHEMA
            )

            # Check if expected tools were called
            called_tools = [tc['name'] for tc in (tool_calls or [])]
            success = all(tool in called_tools for tool in self.expected_tools)

            print(f"\n🛠️  Tools Called: {called_tools if called_tools else 'None'}")

            # Execute tools if called
            if tool_calls:
                for i, tc in enumerate(tool_calls, 1):
                    print(f"\n  [{i}] Executing: {tc['name']}({tc['args']})")

                    if tc['name'] in TOOL_FUNCTIONS:
                        try:
                            result = TOOL_FUNCTIONS[tc['name']](**tc['args'])
                            result_preview = str(result)[:200] + "..." if len(str(result)) > 200 else str(result)
                            print(f"      ✅ Result: {result_preview}")
                        except Exception as e:
                            print(f"      ❌ Error: {e}")

            print(f"\n🤖 Gemini Response:\n{response}\n")

            if success:
                print(f"✅ Test PASSED - All expected tools called")
            else:
                print(f"⚠️  Test PARTIAL - Expected {self.expected_tools}, got {called_tools}")

            return success, response, tool_calls or []

        except Exception as e:
            print(f"❌ Test FAILED with exception: {e}")
            return False, str(e), []


# ============================================================================
# Test Scenario Definitions (Based on real user questions from logs)
# ============================================================================

SCENARIOS = [
    # --- Category 1: Single Day Queries ---
    QueryTestScenario(
        name="daily_summary",
        user_query="今天的健康数据怎么样？",
        expected_tools=["get_daily_detailed_stats"],
        description="Basic daily summary query - most common use case"
    ),

    QueryTestScenario(
        name="yesterday_sleep_analysis",
        user_query="分析昨晚睡眠数据",
        expected_tools=["get_daily_detailed_stats"],
        description="Sleep analysis for previous night"
    ),

    QueryTestScenario(
        name="todays_workout_analysis",
        user_query="今早椭圆机运动请深入分析",
        expected_tools=["get_activity_history"],
        description="Today's workout analysis with activity details"
    ),

    # --- Category 2: Time Range Queries ---
    QueryTestScenario(
        name="two_week_sleep_trend",
        user_query="分析下最近两周的睡眠情况",
        expected_tools=["get_metric_history"],
        description="Two-week sleep trend analysis"
    ),

    QueryTestScenario(
        name="yearly_trend",
        user_query="过去一年的历史趋势呢",
        expected_tools=["get_aggregated_analysis"],
        description="Yearly health metrics trend"
    ),

    QueryTestScenario(
        name="monthly_rhr_trend",
        user_query="最近一个月的静息心率变化",
        expected_tools=["get_metric_history"],
        description="Monthly resting heart rate trend"
    ),

    QueryTestScenario(
        name="hrv_90_days",
        user_query="最近3个月的HRV趋势",
        expected_tools=["get_aggregated_analysis"],
        description="3-month HRV trend with statistics"
    ),

    # --- Category 3: Multi-metric Queries ---
    QueryTestScenario(
        name="sleep_and_workout",
        user_query="深入分析一下昨晚的睡眠和今早的椭圆机",
        expected_tools=["get_daily_detailed_stats", "get_activity_history"],
        description="Combined sleep + workout analysis"
    ),

    QueryTestScenario(
        name="comprehensive_today",
        user_query="帮我查询今天的健康数据和运动记录",
        expected_tools=["get_daily_detailed_stats", "get_activity_history"],
        description="Comprehensive daily query"
    ),

    # --- Category 4: Causal Analysis ---
    QueryTestScenario(
        name="carb_impact_analysis",
        user_query="确实好像没有碳水容易胸闷，不知道是不是真的这个原因，请帮我分析一下底层原因",
        expected_tools=["get_manual_history", "analyze_driver"],
        description="Analyze impact of carb restriction on symptoms"
    ),

    QueryTestScenario(
        name="alcohol_sleep_correlation",
        user_query="喝酒对我的睡眠有什么影响？",
        expected_tools=["analyze_driver"],
        description="Alcohol impact on sleep quality"
    ),

    QueryTestScenario(
        name="supplement_effectiveness",
        user_query="镁补充剂对我的睡眠有帮助吗？",
        expected_tools=["analyze_driver"],
        description="Supplement effectiveness analysis"
    ),

    # --- Category 5: Advanced Analytics ---
    QueryTestScenario(
        name="recovery_insights",
        user_query="最近30天的恢复指标怎么样？",
        expected_tools=["get_health_insights"],
        description="Recovery metrics and correlations"
    ),

    QueryTestScenario(
        name="fitness_trends",
        user_query="我的健身趋势如何，有进步吗？",
        expected_tools=["get_health_insights"],
        description="Fitness progression analysis"
    ),

    # --- Category 6: Manual Logs Query ---
    QueryTestScenario(
        name="diet_history",
        user_query="最近一周我都吃了什么？",
        expected_tools=["get_manual_history"],
        description="Diet log history query"
    ),

    QueryTestScenario(
        name="alcohol_consumption",
        user_query="过去两周的饮酒记录",
        expected_tools=["get_manual_history"],
        description="Alcohol consumption history"
    ),

    QueryTestScenario(
        name="all_logs_week",
        user_query="上周的所有手动记录",
        expected_tools=["get_manual_history"],
        description="All manual logs for last week"
    ),

    # --- Category 7: Sync Operations ---
    QueryTestScenario(
        name="sync_then_query",
        user_query="先同步Garmin数据，然后查询今天的数据",
        expected_tools=["sync_garmin", "get_daily_detailed_stats"],
        description="Sync before querying to ensure fresh data"
    ),
]


# ============================================================================
# Test Runner
# ============================================================================

def run_all_scenarios(scenarios: List[QueryTestScenario]) -> None:
    """Run all test scenarios."""

    print("\n" + "="*80)
    print("🤖 Butler Query & Analysis Test Suite")
    print("="*80)
    print(f"📊 Total Scenarios: {len(scenarios)}")
    print(f"🔧 Model: {os.getenv('GEMINI_MODEL', 'default')}")
    print(f"📂 Data Dir: {os.getenv('DATA_DIR', 'default')}")
    print(f"🌐 Proxy: {os.getenv('GEMINI_BASE_URL', 'default')}")
    print("="*80)

    # Initialize LLM
    llm = GeminiLLM()

    # Run scenarios
    results = []
    for scenario in scenarios:
        success, response, tool_calls = scenario.run(llm)
        results.append({
            'name': scenario.name,
            'success': success,
            'tools_called': [tc['name'] for tc in tool_calls]
        })

    # Summary
    print("\n" + "="*80)
    print("📊 Test Summary")
    print("="*80)

    passed = sum(1 for r in results if r['success'])
    total = len(results)

    for r in results:
        status = "✅ PASS" if r['success'] else "⚠️  PARTIAL"
        print(f"{status}  {r['name']:<30} → {', '.join(r['tools_called']) or 'No tools'}")

    print("="*80)
    print(f"✅ Passed: {passed}/{total}")
    print(f"⚠️  Partial: {total - passed}/{total}")
    print("="*80)


def run_specific_scenario(scenario_name: str) -> None:
    """Run a specific scenario by name."""
    matching = [s for s in SCENARIOS if s.name == scenario_name]

    if not matching:
        print(f"❌ Scenario '{scenario_name}' not found.")
        print(f"Available scenarios: {', '.join(s.name for s in SCENARIOS)}")
        sys.exit(1)

    llm = GeminiLLM()
    matching[0].run(llm)


# ============================================================================
# Main Entry Point
# ============================================================================

if __name__ == "__main__":
    import argparse

    parser = argparse.ArgumentParser(description="Butler Query Test Scenarios")
    parser.add_argument(
        '--scenario',
        type=str,
        help='Run specific scenario by name'
    )
    parser.add_argument(
        '--list',
        action='store_true',
        help='List all available scenarios'
    )

    args = parser.parse_args()

    if args.list:
        print("\n📋 Available Test Scenarios:\n")
        for i, s in enumerate(SCENARIOS, 1):
            print(f"{i:2}. {s.name:<30} - {s.description}")
        print()
        sys.exit(0)

    if args.scenario:
        run_specific_scenario(args.scenario)
    else:
        run_all_scenarios(SCENARIOS)