from core.task_executor import TaskExecutor, TaskStatus
from typing import Dict, Any
import time
import json
import re

# 中文文本分析执行器
class TextAnalysisExecutor(TaskExecutor):
    def __init__(self):
        """初始化中文文本分析执行器"""
        super().__init__(llm_model="deepseek-chat")
        # 定义任务步骤
        self.task_steps = [
            {
                "id": "input_validation",
                "name": "Validate Input",
                "required_info": ["text"],
                "instruction": "Validate if the input text is not empty and contains valid Chinese characters. Check for proper UTF-8 encoding."
            },
            {
                "id": "text_preprocessing",
                "name": "Preprocess Text",
                "required_info": ["text"],
                "instruction": "Clean and preprocess the text by: 1) Normalizing Chinese punctuation, 2) Removing unnecessary whitespace while preserving sentence structure, 3) Standardizing traditional/simplified characters if needed."
            },
            {
                "id": "generate_summary",
                "name": "Generate Summary",
                "required_info": ["preprocessed_text"],
                "instruction": "Generate a concise summary in Chinese that captures the main points. Maintain the original language style and terminology."
            },
            {
                "id": "extract_keywords",
                "name": "Extract Keywords",
                "required_info": ["preprocessed_text"],
                "instruction": "Extract the most important Chinese keywords and key phrases from the text. Include both technical terms and contextual phrases."
            },
            {
                "id": "final_analysis",
                "name": "Final Analysis",
                "required_info": ["summary", "keywords"],
                "instruction": "Combine the summary and keywords into a comprehensive analysis report in Chinese. Structure the report with clear sections for summary, key points, and insights."
            }
        ]

    def validate_input(self, input_data: Dict[str, Any]) -> bool:
        """验证文本分析任务的输入数据"""
        if "text" not in input_data:
            return False
        text = input_data.get("text", "")
        return isinstance(text, str) and len(text.strip()) > 0

    async def execute_step(self, step_id: str, step_data: Dict[str, Any]) -> bool:
        """执行文本分析任务的特定步骤"""
        try:
            # 初始化当前步骤状态
            self.current_step = {
                "id": step_id,  # 步骤ID
                "name": step_data["name"],  # 步骤名称
                "status": TaskStatus.IN_PROGRESS.value,  # 状态设为进行中
                "progress": 0  # 进度初始化为0
            }

            # 获取步骤指令和输入
            instruction = step_data.get("instruction", "")
            step_input = {}

            # 准备步骤特定的输入
            if step_id == "input_validation":
                text = self.task_input.get("text", "")
                # 验证中文文本编码
                try:
                    text.encode('utf-8').decode('utf-8')
                except UnicodeError:
                    return {"error": "Invalid text encoding. Please ensure the text is properly encoded in UTF-8."}
                step_input = {"text": text}
            
            elif step_id == "text_preprocessing":
                text = self.task_input.get("text", "")
                # 中文文本预处理
                # 1. 规范化空白字符，同时保留中文文本结构
                text = re.sub(r'\s+', ' ', text).strip()
                # 2. 规范化中文标点符号（简单示例）
                text = text.replace('，', ',').replace('。', '.').replace('：', ':')
                step_input = {"text": text}
            elif step_id == "generate_summary":
                # 从上一步获取预处理后的文本
                prev_result = next(
                    (step["result"] for step in self.execution_path if step["step_id"] == "text_preprocessing"),
                    {}
                )
                step_input = {"text": prev_result.get("preprocessed_text", self.task_input.get("text", ""))}
            elif step_id == "extract_keywords":
                # 从上一步获取预处理后的文本
                prev_result = next(
                    (step["result"] for step in self.execution_path if step["step_id"] == "text_preprocessing"),
                    {}
                )
                step_input = {"text": prev_result.get("preprocessed_text", self.task_input.get("text", ""))}
            elif step_id == "final_analysis":
                # 从之前的步骤获取摘要和关键词
                summary_result = next(
                    (step["result"] for step in self.execution_path if step["step_id"] == "generate_summary"),
                    {}
                )
                keywords_result = next(
                    (step["result"] for step in self.execution_path if step["step_id"] == "extract_keywords"),
                    {}
                )
                step_input = {
                    "summary": summary_result.get("summary", ""),
                    "keywords": keywords_result.get("keywords", [])
                }

            # 使用LLM执行步骤
            step_result = await self.llm_executor.execute_step(
                step_instruction=instruction,
                step_input=step_input
            )

            if not step_result.get("success", False):
                raise Exception(f"Step failed: {step_result.get('error', 'Unknown error')}")

            # 更新执行路径
            self.execution_path.append({
                "step_id": step_id,
                "result": step_result.get("output", {})
            })

            # 更新步骤状态
            self.current_step["status"] = TaskStatus.COMPLETED.value
            self.current_step["progress"] = 100

            # 如果需要创建检查点
            if len(self.execution_path) % self.CHECKPOINT_INTERVAL == 0:
                self.create_checkpoint()

            return True

        except Exception as e:
            self.logger.error(f"Step {step_id} failed: {str(e)}")
            self.current_step["status"] = TaskStatus.FAILED.value
            return False

def main():
    """主函数：执行中文文本分析示例"""
    # 设置控制台编码为UTF-8
    import sys, io
    sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8')
    
    # 创建执行器实例
    executor = TextAnalysisExecutor()
    
    # 示例中文文本
    sample_text = """
        从 ChatGPT 到 Devin：AI 编程的四个发展阶段与范式转变 Koji：我们再聊一聊 AI 编程。编程领域今年取得了非常令人兴奋的进展。雨森一直有很强的框架归纳和总结能力。前不久你跟我分享过你提炼出来的 AI 编程发展四段论，要不要在播客里和大家分享一下？ 雨森：这其实是和很多朋友一起探讨得出的结果，是大家智慧的结晶。AI 编程从 ChatGPT 出现到现在也就两年出头的时间，但已经经历了四个阶段。 第一个阶段是让 AI 直接写代码，典型代表是早期的 ChatGPT、Claude。我们给它一个需求，比如「帮我写个贪吃蛇」，它就给出一段代码。在这个过程中，它既不知道我为什么要写贪吃蛇，也不知道代码运行情况如何。可能要我去本地编译运行后发现报错，再把错误告诉它，它才能给出调试后的结果。这时的 AI 完全就像一个只能通过邮件交流的笔友，是简单的问答模式。 第二阶段是以 GitHub Copilot 为代表，AI 开始拥有上下文，它可以把整个组织的代码库作为 context。这样 AI 就获得了大量新的背景信息。但这时用户还是需要手动把代码贴到 IDE 里面进行调试。我觉得这是 2.0 阶段，就是我们让 AI 拥有了 codebase 作为上下文。 2024 年一个非常大的进步是以 Cursor 为代表的编程 Copilot 的出现。它的核心理念是预测用户未来要写什么代码。根据你的代码库以及刚才写的代码，它预测你接下来要写什么代码、创建什么文件、做什么操作。这里面对于生成代码的质量和数量，以及文件的创建和修改都有很大提升。后来 Windsurf 还加入了对命令行操作的自动化，这样 AI 就能很好地使用我的电脑。原来的 AI 是在一张纸上写代码，我把代码抄走运行；现在 AI 可以在我的电脑上创建文件、执行命令行操作，进入到「我为你写」的阶段。 当我们觉得这已经很令人兴奋时，Devin 的出现带来了几个重要突破：首先，它可以异步工作。Cursor、Windsurf 这些工具虽然一步操作做的事情比较多，但仍然需要持续的注意力，即「我说一步它做一步」。而 Devin 可以持续工作，把用户的注意力释放出来。这是因为它多了一个 Planner，可以规划任务。 其次，它可以通过虚拟机执行更多操作，做更多调试工作。比如你写个网站，它可以自己用虚拟机去访问这个网站，检查前端后端的业务逻辑是否正确，并且可以随时打断和调整。大家用 Cursor 或者 ChatGPT 都知道，你无法在它输出的中间做调整，必须等它输出完后才能修改。但 Devin 就像真人一样，你可以在它完成任务时给出新指令，它会把这个结合到已有的 Planner 里调整计划。这就从「为你写」进化到了「为你做」。 总结一下这四个阶段：第一阶段是让 AI 写代码，代表是 ChatGPT；第二阶段是 AI 开放代码库，代表是 GitHub Copilot；第三阶段是 AI 可以自动写代码并执行，代表是 Cursor 和 Windsurf；第四阶段是 AI 虚拟员工，Devin 开创了一个很好的先例。
    """

    # 准备输入数据
    task_input = {
        "text": sample_text
    }

    # 执行任务
    import asyncio
    result = asyncio.run(executor.execute(task_input))
    
    # 格式化输出结果
    print("\n 文本分析结果:")
    print("=" * 50)
    
    # 打印每个步骤的结果
    for step in result.get("execution_path", []):
        step_id = step["step_id"]
        result_data = step["result"]
        
        print(f"\n {step_id.upper()}:")
        print("-" * 30)
        
        if step_id == "input_validation":
            print(" 输入验证完成")
        elif step_id == "text_preprocessing":
            print(" 文本预处理完成")
            if "preprocessed_text" in result_data:
                print("\n处理后的文本:")
                print(result_data["preprocessed_text"])
        elif step_id == "generate_summary":
            print("\n 文本摘要:")
            if "summary" in result_data:
                print(result_data["summary"])
        elif step_id == "extract_keywords":
            print("\n 关键词:")
            if "keywords" in result_data:
                keywords = result_data["keywords"]
                if isinstance(keywords, list):
                    print("、".join(keywords))
                else:
                    print(keywords)
        elif step_id == "final_analysis":
            print("\n 最终分析:")
            if "analysis" in result_data:
                print(result_data["analysis"])

if __name__ == "__main__":
    main()