#!/usr/bin/env python3
"""
AI 场景收集器 - 从搜索结果中提取有价值的应用场景
"""

import sqlite3
import json
from datetime import datetime
from urllib.parse import urlparse

# 数据库路径
DB_PATH = "/root/.openclaw/workspace/projects/ai_usecase_collector/data/ai_usecases.db"

# 今日收集的场景数据
SCENES = [
    {
        "title": "X (Twitter) Research Agent - 社交媒体趋势分析",
        "description": "AI 代理可以深入分析推文，识别领域内的流行趋势，将发现记录到 Google Sheets，甚至为用户在 Google Docs 中起草帖子。这代理可以直接连接到 X 平台，自动化社交媒体内容创作和趋势分析。",
        "original_url": "https://www.gumloop.com/blog/ai-agent-use-cases",
        "source_platform": "Gumloop",
        "categories": "社交媒体,内容创作,市场研究",
        "tags": "Twitter,趋势分析,内容自动化",
        "key_technologies": "LangChain,AutoGen",
        "innovation_score": 8,
        "industry": "Marketing"
    },
    {
        "title": "CrewAI - 多代理协作工作流",
        "description": "专为协作式多代理工作流设计的框架 - 非常适合基于团队的自动化。允许创建多个专业化代理协同工作，每个代理负责特定任务，实现复杂任务的自动化处理。",
        "original_url": "https://ai.plainenglish.io/the-15-best-ai-agent-builders-in-2026-tools-features-use-cases-cc2079c680af",
        "source_platform": "Plain English AI",
        "categories": "多代理系统,工作流自动化,团队协作",
        "tags": "CrewAI,多代理,协作自动化",
        "key_technologies": "CrewAI,LangChain",
        "innovation_score": 9,
        "industry": "Enterprise"
    },
    {
        "title": "客户服务自动化 - 票据自动处理",
        "description": "2026年验证的用例：客户服务（自主票据解决、退款、升级）。AI 代理可以自主处理客户支持工单，自动识别问题类型，提供解决方案或升级到人工支持，显著提升响应效率。",
        "original_url": "https://joget.com/ai-agent-adoption-in-2026-what-the-analysts-data-shows/",
        "source_platform": "Joget",
        "categories": "客户服务,自动化,支持系统",
        "tags": "客户支持,票据处理,退款自动化",
        "key_technologies": "NLP,对话系统,工作流自动化",
        "innovation_score": 8,
        "industry": "Customer Service"
    },
    {
        "title": "财务运营自动化 - 发票匹配与审计",
        "description": "财务和运营用例：发票匹配、费用审计、预测。AI 代理可以自动处理财务文档，匹配发票与采购订单，审计异常支出，并提供财务预测分析。",
        "original_url": "https://joget.com/ai-agent-adoption-in-2026-what-the-analysts-data-shows/",
        "source_platform": "Joget",
        "categories": "财务,运营,审计",
        "tags": "发票处理,费用审计,财务预测",
        "key_technologies": "OCR,NLP,机器学习",
        "innovation_score": 9,
        "industry": "Finance"
    },
    {
        "title": "安全与合规 - 威胁检测与策略执行",
        "description": "安全和合规用例：威胁检测、策略执行、异常检测。AI 代理可以实时监控安全事件，自动检测潜在威胁，执行安全策略，识别异常行为模式。",
        "original_url": "https://joget.com/ai-agent-adoption-in-2026-what-the-analysts-data-shows/",
        "source_platform": "Joget",
        "categories": "安全,合规,监控",
        "tags": "威胁检测,安全策略,异常检测",
        "key_technologies": "异常检测,实时监控,规则引擎",
        "innovation_score": 9,
        "industry": "Security"
    },
    {
        "title": "Model Context Protocol (MCP) - 数字装配线",
        "description": "2026年商业价值增长的'数字装配线'：人工引导的多步骤工作流，多个代理运行从头到尾的流程。MCP 标准允许代理无缝连接到各种数据源，实现复杂的企业级自动化。",
        "original_url": "https://www.gappsgroup.com/blog/ai-agent-trends-2026-from-chatbots-to-autonomous-business-ecosystems",
        "source_platform": "Gapps Group",
        "categories": "企业集成,工作流编排,数据连接",
        "tags": "MCP,企业集成,多代理编排",
        "key_technologies": "MCP,API集成,工作流编排",
        "innovation_score": 10,
        "industry": "Enterprise"
    },
    {
        "title": "自改进 AI 代理系统",
        "description": "2026年将是自改进 Agentic AI 系统增加研究和实际实施的一年。这些系统不仅执行任务，还将自主学习和改进。它们会评估自己的表现，识别改进领域，并自主优化其方法。",
        "original_url": "https://cloudsecurityalliance.org/blog/2026/01/16/my-top-10-predictions-for-agentic-ai-in-2026",
        "source_platform": "Cloud Security Alliance",
        "categories": "AI研究,自学习,元学习",
        "tags": "自改进,元AI,自主学习",
        "key_technologies": "元学习,强化学习,自我评估",
        "innovation_score": 10,
        "industry": "AI Research"
    },
    {
        "title": "医疗保健 - AI 语音记录与医生减负",
        "description": "在医疗保健领域，AI 语音记录正在减少医生职业倦怠，每年为每个医疗机构节省超过 100 万美元。AI 代理自动记录医患对话，生成结构化病历，减少行政工作负担。",
        "original_url": "https://www.druidai.com/blog/ai-trends-in-2026",
        "source_platform": "Druid AI",
        "categories": "医疗保健,语音技术,效率提升",
        "tags": "医疗AI,语音转写,病历自动化",
        "key_technologies": "语音识别,NLP,文档生成",
        "innovation_score": 9,
        "industry": "Healthcare"
    },
    {
        "title": "金融 - 实时欺诈检测与自动审计",
        "description": "在金融领域，代理通过实时欺诈检测和自动化审计来保护交易安全。AI 代理可以实时分析交易模式，识别可疑活动，自动触发警报或阻止交易，并进行持续的合规审计。",
        "original_url": "https://www.druidai.com/blog/ai-trends-in-2026",
        "source_platform": "Druid AI",
        "categories": "金融安全,欺诈检测,合规审计",
        "tags": "欺诈检测,实时监控,合规自动化",
        "key_technologies": "异常检测,实时分析,规则引擎",
        "innovation_score": 9,
        "industry": "Finance"
    },
    {
        "title": "Agentic AI 管理物流和生产",
        "description": "2026年，Agentic AI 将端到端地管理物流和生产。AI 代理将实时重新路由库存，加急运输，分配维护资源，并根据需求动态调整生产。企业采用 Agentic 系统可以显著提高运营效率。",
        "original_url": "https://www.forbes.com/sites/markminevich/2025/12/31/agentic-ai-takes-over-11-shocking-2026-predictions",
        "source_platform": "Forbes",
        "categories": "物流,生产管理,供应链",
        "tags": "物流自动化,生产优化,供应链管理",
        "key_technologies": "优化算法,实时调度,IoT集成",
        "innovation_score": 10,
        "industry": "Logistics & Manufacturing"
    },
    {
        "title": "医疗保健 - 12个真实世界 AI 代理用例",
        "description": "医疗保健正面临来自各个方面的巨大压力。提供者面临人员短缺和患者需求上升。AI 代理在医疗保健中解决提供者、患者和核心业务工作流程中的日常问题。包括患者分诊、预约安排、临床决策支持等。",
        "original_url": "https://www.kore.ai/blog/ai-agents-in-healthcare-12-real-world-use-cases-2026",
        "source_platform": "Kore.ai",
        "categories": "医疗保健,患者护理,运营效率",
        "tags": "医疗AI,患者分诊,预约自动化",
        "key_technologies": "NLP,临床决策支持,工作流自动化",
        "innovation_score": 9,
        "industry": "Healthcare"
    },
    {
        "title": "制造业 - 自主维护调度",
        "description": "在2026年，制造业的顶级表现者将部署和扩展 Agentic AI，用于自主维护调度和供应链编排，与那些陷入'试点炼狱'的企业形成对比。AI 代理根据设备数据预测维护需求，自动安排维护，最小化停机时间。",
        "original_url": "https://www.dataiku.com/stories/blog/manufacturing-ai-trends-2026",
        "source_platform": "Dataiku",
        "categories": "制造业,预测性维护,供应链",
        "tags": "维护自动化,预测性维护,制造优化",
        "key_technologies": "预测模型,IoT,调度算法",
        "innovation_score": 9,
        "industry": "Manufacturing"
    },
    {
        "title": "治理导向的 Agentic AI - 可衡量的 ROI",
        "description": "整个行业的 AI 部署正在从实验性试点转向集成、自主代理，在严格治理控制下运行核心高价值工作流程，并且需要展示价值。企业重视治理和信任，将自动化策略建立在可观察性和人类监督的基础上。",
        "original_url": "https://www.snowflake.com/en/blog/ai-in-healthcare/",
        "source_platform": "Snowflake",
        "categories": "AI治理,企业AI,ROI优化",
        "tags": "AI治理,可观察性,监督学习",
        "key_technologies": "治理框架,可观察性工具,监控",
        "innovation_score": 8,
        "industry": "Enterprise"
    },
    {
        "title": "编排成为连接组织 - 解决 AI 规模化问题",
        "description": "2026年是编排被广泛认可为解决规模化问题并使 AI 有用的连接组织的一年。这包括 Agentic 编排的不断增长的作用，即智能代理在工作流程中协调决策和行动，而不是作为独立的工具行动。",
        "original_url": "https://www.redwood.com/article/ai-automation-trends/",
        "source_platform": "Redwood",
        "categories": "AI编排,工作流管理,规模化",
        "tags": "工作流编排,规模化AI,协调代理",
        "key_technologies": "编排引擎,工作流管理,API编排",
        "innovation_score": 9,
        "industry": "Enterprise"
    },
    {
        "title": "10-20-70 规则 - AI 成功部署策略",
        "description": "如此量级的变革需要在许多方面进行关键调整。成功的 AI 创新者遵循 10-20-70 规则，即公司应将其努力的 10% 用于算法，20% 用于技术和数据，其余 70% 用于人员和流程。这一强调对于确保 AI 技术真正创造商业价值至关重要。",
        "original_url": "https://www.bcg.com/publications/2026/how-ai-agents-will-transform-health-care",
        "source_platform": "BCG",
        "categories": "AI战略,变革管理,部署最佳实践",
        "tags": "AI部署,变革管理,组织变革",
        "key_technologies": "组织设计,流程优化,培训",
        "innovation_score": 8,
        "industry": "General"
    },
    {
        "title": "云成本优化 - 实时可见性与控制",
        "description": "实时云成本可见性和控制以及大规模持续云支出优化等能力使企业能够减少浪费并将消费与需求对齐。对于许多组织来说，以成本为中心的 Agentic AI 计划成为更广泛自动化计划的基础。",
        "original_url": "https://www.cloudkeeper.com/insights/blog/top-agentic-ai-trends-watch-2026-how-ai-agents-are-redefining-enterprise-automation",
        "source_platform": "Cloudkeeper",
        "categories": "云管理,成本优化,资源优化",
        "tags": "云成本优化,资源管理,FinOpsOps",
        "key_technologies": "云API,成本分析,优化算法",
        "innovation_score": 8,
        "industry": "Cloud/IT"
    },
    {
        "title": "数据基础现代化 - Agent Ready 数据",
        "description": "专注于使数据就绪代理。投资于元数据、本体的实时访问层，确保您的代理不会基于过时或无上下文的信息产生幻觉。2026年是从线性、人为流程转向智能、网络化、自适应自动化的一年。",
        "original_url": "https://naviant.com/blog/ai-agentic-automation-trends/",
        "source_platform": "Naviant",
        "categories": "数据管理,数据架构,数据治理",
        "tags": "数据准备,元数据,本体,实时访问",
        "key_technologies": "数据平台,元数据管理,本体工程",
        "innovation_score": 8,
        "industry": "Data/IT"
    }
]

def generate_id(url):
    """从URL生成唯一ID"""
    parsed = urlparse(url)
    # 使用域名和路径生成ID
    clean_id = parsed.netloc.replace('.', '_') + parsed.path.replace('/', '_')
    return clean_id.replace('-', '_').rstrip('_')

def insert_scenes():
    """将场景插入数据库"""
    conn = sqlite3.connect(DB_PATH)
    cursor = conn.cursor()

    new_count = 0
    duplicate_count = 0
    skipped_urls = []

    for scene in SCENES:
        scene_id = generate_id(scene['original_url'])

        # 检查是否已存在
        cursor.execute("SELECT id FROM scenes WHERE original_url = ?", (scene['original_url'],))
        existing = cursor.fetchone()

        if existing:
            duplicate_count += 1
            skipped_urls.append(scene['title'])
            continue

        # 插入新场景
        try:
            cursor.execute("""
                INSERT INTO scenes (
                    id, title, description, original_url, source_platform,
                    categories, tags, key_technologies, innovation_score,
                    is_new, discovery_date
                ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, 1, ?)
            """, (
                scene_id,
                scene['title'],
                scene['description'],
                scene['original_url'],
                scene['source_platform'],
                scene['categories'],
                scene['tags'],
                scene['key_technologies'],
                scene['innovation_score'],
                datetime.now().strftime('%Y-%m-%d %H:%M:%S')
            ))
            new_count += 1
            print(f"✓ 新增: {scene['title']}")
        except sqlite3.IntegrityError as e:
            # 处理唯一约束冲突
            cursor.execute("SELECT id FROM scenes WHERE original_url = ?", (scene['original_url'],))
            if cursor.fetchone():
                duplicate_count += 1
                skipped_urls.append(scene['title'])
                print(f"⊘ 已存在: {scene['title']}")
            else:
                print(f"✗ 错误: {scene['title']} - {e}")

    # 记录收集日志
    cursor.execute("""
        INSERT INTO collection_log (source, items_found, items_new, status)
        VALUES (?, ?, ?, ?)
    """, ('tavily-search', len(SCENES), new_count, 'completed'))

    conn.commit()
    conn.close()

    print(f"\n收集完成:")
    print(f"  总处理: {len(SCENES)} 条")
    print(f"  新增: {new_count} 条")
    print(f"  已存在: {duplicate_count} 条")

    if skipped_urls:
        print(f"\n已存在的场景:")
        for title in skipped_urls[:5]:
            print(f"  - {title}")
        if len(skipped_urls) > 5:
            print(f"  ... 还有 {len(skipped_urls) - 5} 条")

    return new_count, duplicate_count

if __name__ == "__main__":
    insert_scenes()