#!/usr/bin/env python3
"""
生成 AI 应用场景每日简报（使用 LLM 翻译）
"""
import sqlite3
from datetime import datetime, timedelta
from pathlib import Path
import json
import subprocess
from collections import Counter

DB_PATH = Path("/root/.openclaw/workspace/projects/ai_usecase_collector/data/ai_usecases.db")
REPORT_PATH = Path("/root/vault/obsidian_vault/obsidian/Documents/obsidian/CLAW/AI应用场景")
CACHE_PATH = Path("/root/.openclaw/workspace/projects/ai_usecase_collector/translation_cache.json")

def get_db_connection():
    """获取数据库连接"""
    conn = sqlite3.connect(DB_PATH)
    conn.row_factory = sqlite3.Row
    return conn

def get_todays_scenes(conn):
    """获取今天的新场景"""
    today = datetime.now().strftime("%Y-%m-%d")
    cursor = conn.cursor()
    cursor.execute("""
        SELECT * FROM scenes
        WHERE date(discovery_date) = date(?)
        ORDER BY innovation_score DESC
    """, (today,))
    return cursor.fetchall()

def get_all_scenes(conn):
    """获取所有场景"""
    cursor = conn.cursor()
    cursor.execute("""
        SELECT * FROM scenes
        ORDER BY innovation_score DESC, discovery_date DESC
    """)
    return cursor.fetchall()

def get_category_distribution(scenes):
    """获取分类分布"""
    categories = []
    for scene in scenes:
        if scene['categories']:
            cats = [c.strip() for c in scene['categories'].split(',')]
            categories.extend(cats)
    return Counter(categories)

def get_top_tags(scenes, limit=15):
    """获取热门标签"""
    tags = []
    for scene in scenes:
        if scene['tags']:
            tag_list = [t.strip() for t in scene['tags'].split(',')]
            tags.extend(tag_list)
    return Counter(tags).most_common(limit)

def translate_with_llm(text, cache):
    if not text or len(text) < 10:
        return text
    
    # 检查缓存
    cache_key = text[:100]
    if cache_key in cache:
        return cache[cache_key]

    # 简单规则：检查是否包含大量英文
    text_lower = text.lower()
    common_english_words = [
        'this', 'that', 'with', 'from', 'have', 'will', 'can',
        'for', 'are', 'been', 'their', 'your', 'more', 'other'
    ]
    
    english_word_count = sum(1 for word in common_english_words if f' {word} ' in text_lower or f'{word},' in text_lower)

    if english_word_count >= 1:
        print(f"[→] 条件满足，准备调用 LLM 进行翻译...")
        # 构建翻译提示
        prompt = f"Translate the following English text to Chinese. Keep technical terms in English if needed. Return only the Chinese translation, no explanations:\n\n{text}"
        print(f"[i] 生成的 Prompt (前100字符): {prompt[:100]}...")
        
        try:
            # 调用 openclaw CLI 进行翻译
            result = subprocess.run(
                ['openclaw', 'agent', '--message', prompt, '--session-id', 'main'],
                capture_output=True,
                text=True,
                timeout=45
            )
            if result.returncode == 0 and result.stdout.strip():
                translation = result.stdout.strip()
                # 清理输出，只取核心翻译
                lines = translation.split('\n')
                for line in lines:
                    cleaned_line = line.strip()
                    if cleaned_line and not cleaned_line.startswith(('*', '#', 'Overall', '---', '✅')) and 'NO_REPLY' not in cleaned_line:
                        cache[cache_key] = cleaned_line
                        return cleaned_line
        except subprocess.TimeoutExpired:
            # Log timeout errors to a file instead of stdout
            with open("/root/.openclaw/workspace/projects/ai_usecase_collector/translation_errors.log", "a") as f:
                f.write(f"{datetime.now()}: Timeout translating: {text[:50]}...\n")
        except Exception as e:
             # Log other errors to a file
            with open("/root/.openclaw/workspace/projects/ai_usecase_collector/translation_errors.log", "a") as f:
                f.write(f"{datetime.now()}: Error translating: {e}\n")
        
        except subprocess.TimeoutExpired:
            print(f"[!] 翻译超时: {text[:50]}...")
        except Exception as e:
            print(f"[!] 翻译异常: {e}")
    else:
        pass
    
    # 默认返回原文本
    cache[cache_key] = text
    return text

def load_translation_cache():
    """加载翻译缓存"""
    if CACHE_PATH.exists():
        try:
            with open(CACHE_PATH, 'r', encoding='utf-8') as f:
                return json.load(f)
        except:
            return {}
    return {}

def save_translation_cache(cache):
    """保存翻译缓存"""
    try:
        with open(CACHE_PATH, 'w', encoding='utf-8') as f:
            json.dump(cache, f, ensure_ascii=False, indent=2)
    except Exception as e:
        print(f"⚠️ 保存翻译缓存失败: {e}")

# ... (后面部分和之前一致，省略)
def extract_trends(scenes):
    """提取趋势洞察"""
    trends = []

    # 分析技术创新
    tech_mentions = Counter()
    for scene in scenes:
        if scene['key_technologies']:
            techs = [t.strip() for t in scene['key_technologies'].split(',')]
            tech_mentions.update(techs)

    # 找出热门技术
    hot_techs = tech_mentions.most_common(5)

    # 分析应用领域
    cats = get_category_distribution(scenes)
    hot_industries = cats.most_common(5)

    trends.append({
        'category': '技术趋势',
        'insight': 'Agentic AI 与多 Agent 协作成为主流，' +
                   'MCP (Model Context Protocol) 正在成为 AI 系统集成的标准协议',
        'evidence': [t[0] for t in hot_techs]
    })

    trends.append({
        'category': '应用趋势',
        'insight': '从单一任务自动化转向端到端工作流自动化，' +
                   'AI 开始承担"编排者"角色，协调多个专业 Agent',
        'evidence': [c[0] for c in hot_industries]
    })

    return trends

def generate_report():
    """生成报告"""
    today = datetime.now().strftime("%Y-%m-%d")
    date_display = datetime.now().strftime("%Y年%m月%d日")

    conn = get_db_connection()
    
    # 加载翻译缓存
    translation_cache = load_translation_cache()

    # 获取数据
    todays_scenes = get_todays_scenes(conn)
    all_scenes = get_all_scenes(conn)

    # 如果今天没有新场景，获取最近 3 天的
    if not todays_scenes:
        three_days_ago = (datetime.now() - timedelta(days=3)).isoformat()
        cursor = conn.cursor()
        cursor.execute("""
            SELECT * FROM scenes
            WHERE discovery_date > ?
            ORDER BY innovation_score DESC, discovery_date DESC
        """, (three_days_ago,))
        todays_scenes = cursor.fetchall()
        report_type = "最近 3 天"
    else:
        report_type = "今日"

    # 提取趋势
    trends = extract_trends(all_scenes)
    top_tags = get_top_tags(all_scenes)
    category_dist = get_category_distribution(all_scenes)

    # 生成 Markdown 报告
    report = f"""# AI 应用场景每日简报

**日期**: {date_display}
**数据统计**: {len(all_scenes)} 个场景 | 新增 {len(todays_scenes)} 个 ({report_type})

---

## 📌 今日/新增重点场景

"""

    # 列出新增场景（前 5 个）
    for i, scene in enumerate(todays_scenes[:5], 1):
        # 使用 LLM 翻译描述
        description_zh = translate_with_llm(scene['description'], translation_cache)
        
        report += f"### {i}. {scene['title']}\n\n"
        report += f"**来源**: {scene['source_platform']}\n\n"
        
        # 英文描述（标签可以是英文）
        if scene['description']:
            report += f"**描述**: {scene['description'][:300]}\n\n"
        
        # 中文描述（如果有）
        if description_zh != scene['description']:
            report += f"**中文说明**: {description_zh}\n\n"
        
        if scene['categories']:
            report += f"**领域**: {scene['categories']}\n\n"
        if scene['tags']:
            report += f"**标签**: {scene['tags']}\n\n"
        if scene['key_technologies']:
            report += f"**技术**: {scene['key_technologies']}\n\n"
        
        # 原文链接（清晰展示）
        if scene['original_url']:
            report += f"**原文链接**: [{scene['original_url']}]({scene['original_url']})\n\n"
        
        report += "---\n\n"

    # 趋势洞察
    report += "## 🔍 趋势洞察\n\n"
    for trend in trends:
        report += f"### {trend['category']}\n\n"
        report += f"**核心观点**: {trend['insight']}\n\n"
        report += f"**佐证**: {', '.join(trend['evidence'][:5])}\n\n"
        report += "---\n\n"

    # 热门标签
    report += "## 🏷️ 热门标签\n\n"
    for tag, count in top_tags:
        report += f"- **{tag}** ({count})\n"
    report += "\n"

    # 应用领域分布
    report += "## 📊 应用领域分布\n\n"
    for cat, count in category_dist.most_common(10):
        bar = "█" * min(count, 20)
        report += f"- **{cat}**: {bar} {count}\n"
    report += "\n"

    # 数据源
    report += "## 📚 数据来源\n\n"
    sources = {}
    for scene in all_scenes:
        source = scene['source_platform']
        sources[source] = sources.get(source, 0) + 1

    for source, count in sorted(sources.items(), key=lambda x: x[1], reverse=True):
        report += f"- {source}: {count} 个场景\n"

    report += f"\n---\n\n"
    report += f"*本报告由 AI 自动生成于 {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}*\n"

    # 保存翻译缓存
    save_translation_cache(translation_cache)

    conn.close()

    return report, len(todays_scenes)

def save_report(report):
    """保存报告到 Obsidian"""
    today = datetime.now().strftime("%Y-%m-%d")
    filename = f"{today}-每日简报.md"
    filepath = REPORT_PATH / filename

    # 确保目录存在
    REPORT_PATH.mkdir(parents=True, exist_ok=True)

    with open(filepath, 'w', encoding='utf-8') as f:
        f.write(report)

    return filepath

def main():
    """主函数"""
    print("📝 正在生成每日简报...")
    report, new_count = generate_report()
    filepath = save_report(report)

    print(f"✅ 报告已生成: {filepath}")
    print(f"📊 新增场景数: {new_count}")

    return report, str(filepath), new_count

if __name__ == "__main__":
    main()
