From 5330de49fc901f354ff0e6e616a8523a6cd00cce Mon Sep 17 00:00:00 2001
From: zhukang <274546966@qq.com>
Date: Wed, 15 Jan 2025 22:08:38 +0800
Subject: [PATCH] test: add clipboard content tests

- Add test suite for clipboard content handling
- Add test cases for different content types
- Add test data module
- Improve clipboard operation reliability
- Add better HTML content extraction
---
 project/llmclipboard/llmclipboard/app.py      |  35 ++++-
 .../llmclipboard/tests/__init__.py            |   1 +
 .../llmclipboard/tests/test_clipboard.py      | 127 ++++++++++++++++++
 .../llmclipboard/tests/test_data.py           |  66 +++++++++
 4 files changed, 222 insertions(+), 7 deletions(-)
 create mode 100644 project/llmclipboard/llmclipboard/tests/__init__.py
 create mode 100644 project/llmclipboard/llmclipboard/tests/test_clipboard.py
 create mode 100644 project/llmclipboard/llmclipboard/tests/test_data.py
diff --git a/project/llmclipboard/llmclipboard/app.py b/project/llmclipboard/llmclipboard/app.py
index 86f9e55..ed2e212 100644
--- a/project/llmclipboard/llmclipboard/app.py
+++ b/project/llmclipboard/llmclipboard/app.py
@@ -82,9 +82,10 @@ class TextCaptureService:
             except Exception as e:
                 self.logger.debug(f"获取Unicode文本失败: {e}")
 
-            # 如果Unicode获取失败，尝试获取HTML格式
+            # 尝试获取HTML格式
             try:
-                html_content = win32clipboard.GetClipboardData(win32con.CF_HTML)
+                CF_HTML = win32clipboard.RegisterClipboardFormat("HTML Format")
+                html_content = win32clipboard.GetClipboardData(CF_HTML)
                 self.logger.info("获取HTML格式内容")
                 if html_content:
                     # 解析HTML格式的内容
@@ -97,10 +98,28 @@ class TextCaptureService:
                     h.ignore_tables = False
                     
                     # 从HTML字符串中提取实际的HTML内容
-                    start = html_content.find('<html>')
-                    end = html_content.find('</html>')
-                    if start != -1 and end != -1:
-                        html_content = html_content[start:end+7]
+                    try:
+                        if isinstance(html_content, bytes):
+                            html_content = html_content.decode('utf-8')
+                        
+                        # 查找HTML内容的开始和结束
+                        start = html_content.find('<html>')
+                        if start == -1:
+                            start = html_content.find('<!--StartFragment-->')
+                            if start != -1:
+                                start = html_content.find('<', start + 20)
+                        
+                        end = html_content.find('</html>')
+                        if end == -1:
+                            end = html_content.find('<!--EndFragment-->')
+                        
+                        if start != -1:
+                            if end != -1:
+                                html_content = html_content[start:end]
+                            else:
+                                html_content = html_content[start:]
+                    except Exception as e:
+                        self.logger.debug(f"HTML内容提取失败: {e}")
                     
                     content = h.handle(html_content).strip()
                     if content and self.is_valid_content(content):
@@ -113,7 +132,9 @@ class TextCaptureService:
                 text_content = win32clipboard.GetClipboardData(win32con.CF_TEXT)
                 self.logger.info("获取普通文本格式内容")
                 if text_content:
-                    content = text_content.decode('gbk')
+                    if isinstance(text_content, bytes):
+                        text_content = text_content.decode('gbk', errors='ignore')
+                    content = text_content
                     if self.is_valid_content(content):
                         return content
             except Exception as e:
diff --git a/project/llmclipboard/llmclipboard/tests/__init__.py b/project/llmclipboard/llmclipboard/tests/__init__.py
new file mode 100644
index 0000000..4540de9
--- /dev/null
+++ b/project/llmclipboard/llmclipboard/tests/__init__.py
@@ -0,0 +1 @@
+"""测试包"""
diff --git a/project/llmclipboard/llmclipboard/tests/test_clipboard.py b/project/llmclipboard/llmclipboard/tests/test_clipboard.py
new file mode 100644
index 0000000..8278737
--- /dev/null
+++ b/project/llmclipboard/llmclipboard/tests/test_clipboard.py
@@ -0,0 +1,127 @@
+"""剪贴板测试模块"""
+import unittest
+import win32clipboard
+import win32con
+import time
+from ..app import TextCaptureService
+from . import test_data
+
+# HTML格式的标识符
+CF_HTML = win32clipboard.RegisterClipboardFormat("HTML Format")
+
+class TestClipboard(unittest.TestCase):
+    """剪贴板测试类"""
+    
+    def setUp(self):
+        """测试准备"""
+        self.service = TextCaptureService()
+        
+    def set_clipboard_text(self, text, format_type=win32con.CF_UNICODETEXT):
+        """设置剪贴板内容"""
+        max_retries = 3
+        for retry in range(max_retries):
+            try:
+                # 确保剪贴板已关闭
+                try:
+                    win32clipboard.CloseClipboard()
+                except:
+                    pass
+                
+                time.sleep(0.1 * (retry + 1))  # 递增等待时间
+                
+                # 打开剪贴板
+                win32clipboard.OpenClipboard()
+                win32clipboard.EmptyClipboard()
+                
+                # 根据不同格式处理数据
+                if format_type == CF_HTML:
+                    # 处理HTML格式
+                    if isinstance(text, str):
+                        text = text.encode('utf-8')
+                elif format_type == win32con.CF_UNICODETEXT:
+                    # 处理Unicode文本
+                    if isinstance(text, bytes):
+                        text = text.decode('utf-8')
+                elif format_type == win32con.CF_TEXT:
+                    # 处理普通文本
+                    if isinstance(text, str):
+                        text = text.encode('gbk')
+                
+                # 设置剪贴板数据
+                win32clipboard.SetClipboardData(format_type, text)
+                win32clipboard.CloseClipboard()
+                time.sleep(0.1)  # 等待剪贴板操作完成
+                return
+            except Exception as e:
+                print(f"剪贴板操作失败 (尝试 {retry + 1}/{max_retries}): {e}")
+                time.sleep(0.5)
+                try:
+                    win32clipboard.CloseClipboard()
+                except:
+                    pass
+                if retry == max_retries - 1:
+                    raise Exception(f"设置剪贴板内容失败: {e}")
+        
+    def test_plain_text(self):
+        """测试普通文本处理"""
+        self.set_clipboard_text(test_data.PLAIN_TEXT)
+        content = self.service.get_clipboard_content()
+        self.assertIsNotNone(content)
+        self.assertEqual(content.strip(), test_data.PLAIN_TEXT.strip())
+        
+    def test_markdown_text(self):
+        """测试Markdown文本处理"""
+        self.set_clipboard_text(test_data.MARKDOWN_TEXT)
+        content = self.service.get_clipboard_content()
+        self.assertIsNotNone(content)
+        self.assertTrue('# 这是一个Markdown文档' in content)
+        
+    def test_html_text(self):
+        """测试HTML文本处理"""
+        # 构造CF_HTML格式的数据
+        html = test_data.HTML_TEXT
+        header = (
+            "Version:0.9\r\n"
+            "StartHTML:00000000\r\n"
+            "EndHTML:00000000\r\n"
+            "StartFragment:00000000\r\n"
+            "EndFragment:00000000\r\n"
+        )
+        html_data = header + html
+        self.set_clipboard_text(html_data, CF_HTML)
+        content = self.service.get_clipboard_content()
+        self.assertIsNotNone(content)
+        self.assertTrue('HTML测试文档' in content)
+        
+    def test_empty_excalidraw(self):
+        """测试空的Excalidraw内容"""
+        self.set_clipboard_text(test_data.EMPTY_EXCALIDRAW)
+        content = self.service.get_clipboard_content()
+        self.assertIsNone(content)
+        
+    def test_valid_excalidraw(self):
+        """测试有效的Excalidraw内容"""
+        self.set_clipboard_text(test_data.VALID_EXCALIDRAW)
+        content = self.service.get_clipboard_content()
+        self.assertIsNotNone(content)
+        
+    def test_invalid_json(self):
+        """测试无效的JSON内容"""
+        self.set_clipboard_text(test_data.INVALID_JSON)
+        content = self.service.get_clipboard_content()
+        self.assertIsNotNone(content)
+        
+    def test_empty_content(self):
+        """测试空白内容"""
+        self.set_clipboard_text(test_data.EMPTY_CONTENT)
+        content = self.service.get_clipboard_content()
+        self.assertIsNone(content)
+        
+    def test_short_content(self):
+        """测试过短内容"""
+        self.set_clipboard_text(test_data.SHORT_CONTENT)
+        content = self.service.get_clipboard_content()
+        self.assertIsNone(content)
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/project/llmclipboard/llmclipboard/tests/test_data.py b/project/llmclipboard/llmclipboard/tests/test_data.py
new file mode 100644
index 0000000..927104e
--- /dev/null
+++ b/project/llmclipboard/llmclipboard/tests/test_data.py
@@ -0,0 +1,66 @@
+"""测试数据模块"""
+
+# 普通文本内容
+PLAIN_TEXT = """这是一个测试文本
+它包含多行内容
+用于测试文本处理功能"""
+
+# Markdown格式内容
+MARKDOWN_TEXT = """# 这是一个Markdown文档
+
+## 简介
+这是一个用于测试的Markdown文档。
+
+### 特点
+- 包含标题
+- 包含列表
+- 包含格式化文本
+
+> 这是一个引用
+"""
+
+# HTML格式内容
+HTML_TEXT = """<html>
+<body>
+<h1>HTML测试文档</h1>
+<p>这是一个<strong>HTML</strong>格式的文档，用于测试<em>格式转换</em>功能。</p>
+<ul>
+    <li>列表项1</li>
+    <li>列表项2</li>
+</ul>
+</body>
+</html>"""
+
+# 空的Excalidraw内容
+EMPTY_EXCALIDRAW = """{
+    "type": "excalidraw/clipboard",
+    "elements": [],
+    "files": {}
+}"""
+
+# 包含内容的Excalidraw
+VALID_EXCALIDRAW = """{
+    "type": "excalidraw/clipboard",
+    "elements": [
+        {
+            "type": "rectangle",
+            "x": 100,
+            "y": 100,
+            "width": 200,
+            "height": 100
+        }
+    ],
+    "files": {}
+}"""
+
+# 无效的JSON内容
+INVALID_JSON = """{
+    "type": "invalid
+    "data": []
+}"""
+
+# 空白内容
+EMPTY_CONTENT = "   \n   \t   \n"
+
+# 过短内容
+SHORT_CONTENT = "ab"