From 5330de49fc901f354ff0e6e616a8523a6cd00cce Mon Sep 17 00:00:00 2001 From: zhukang <274546966@qq.com> Date: Wed, 15 Jan 2025 22:08:38 +0800 Subject: [PATCH] test: add clipboard content tests - Add test suite for clipboard content handling - Add test cases for different content types - Add test data module - Improve clipboard operation reliability - Add better HTML content extraction --- project/llmclipboard/llmclipboard/app.py | 35 ++++- .../llmclipboard/tests/__init__.py | 1 + .../llmclipboard/tests/test_clipboard.py | 127 ++++++++++++++++++ .../llmclipboard/tests/test_data.py | 66 +++++++++ 4 files changed, 222 insertions(+), 7 deletions(-) create mode 100644 project/llmclipboard/llmclipboard/tests/__init__.py create mode 100644 project/llmclipboard/llmclipboard/tests/test_clipboard.py create mode 100644 project/llmclipboard/llmclipboard/tests/test_data.py diff --git a/project/llmclipboard/llmclipboard/app.py b/project/llmclipboard/llmclipboard/app.py index 86f9e55..ed2e212 100644 --- a/project/llmclipboard/llmclipboard/app.py +++ b/project/llmclipboard/llmclipboard/app.py @@ -82,9 +82,10 @@ class TextCaptureService: except Exception as e: self.logger.debug(f"获取Unicode文本失败: {e}") - # 如果Unicode获取失败,尝试获取HTML格式 + # 尝试获取HTML格式 try: - html_content = win32clipboard.GetClipboardData(win32con.CF_HTML) + CF_HTML = win32clipboard.RegisterClipboardFormat("HTML Format") + html_content = win32clipboard.GetClipboardData(CF_HTML) self.logger.info("获取HTML格式内容") if html_content: # 解析HTML格式的内容 @@ -97,10 +98,28 @@ class TextCaptureService: h.ignore_tables = False # 从HTML字符串中提取实际的HTML内容 - start = html_content.find('') - end = html_content.find('') - if start != -1 and end != -1: - html_content = html_content[start:end+7] + try: + if isinstance(html_content, bytes): + html_content = html_content.decode('utf-8') + + # 查找HTML内容的开始和结束 + start = html_content.find('') + if start == -1: + start = html_content.find('') + if start != -1: + start = html_content.find('<', start + 20) + + end = html_content.find('') + if end == -1: + end = html_content.find('') + + if start != -1: + if end != -1: + html_content = html_content[start:end] + else: + html_content = html_content[start:] + except Exception as e: + self.logger.debug(f"HTML内容提取失败: {e}") content = h.handle(html_content).strip() if content and self.is_valid_content(content): @@ -113,7 +132,9 @@ class TextCaptureService: text_content = win32clipboard.GetClipboardData(win32con.CF_TEXT) self.logger.info("获取普通文本格式内容") if text_content: - content = text_content.decode('gbk') + if isinstance(text_content, bytes): + text_content = text_content.decode('gbk', errors='ignore') + content = text_content if self.is_valid_content(content): return content except Exception as e: diff --git a/project/llmclipboard/llmclipboard/tests/__init__.py b/project/llmclipboard/llmclipboard/tests/__init__.py new file mode 100644 index 0000000..4540de9 --- /dev/null +++ b/project/llmclipboard/llmclipboard/tests/__init__.py @@ -0,0 +1 @@ +"""测试包""" diff --git a/project/llmclipboard/llmclipboard/tests/test_clipboard.py b/project/llmclipboard/llmclipboard/tests/test_clipboard.py new file mode 100644 index 0000000..8278737 --- /dev/null +++ b/project/llmclipboard/llmclipboard/tests/test_clipboard.py @@ -0,0 +1,127 @@ +"""剪贴板测试模块""" +import unittest +import win32clipboard +import win32con +import time +from ..app import TextCaptureService +from . import test_data + +# HTML格式的标识符 +CF_HTML = win32clipboard.RegisterClipboardFormat("HTML Format") + +class TestClipboard(unittest.TestCase): + """剪贴板测试类""" + + def setUp(self): + """测试准备""" + self.service = TextCaptureService() + + def set_clipboard_text(self, text, format_type=win32con.CF_UNICODETEXT): + """设置剪贴板内容""" + max_retries = 3 + for retry in range(max_retries): + try: + # 确保剪贴板已关闭 + try: + win32clipboard.CloseClipboard() + except: + pass + + time.sleep(0.1 * (retry + 1)) # 递增等待时间 + + # 打开剪贴板 + win32clipboard.OpenClipboard() + win32clipboard.EmptyClipboard() + + # 根据不同格式处理数据 + if format_type == CF_HTML: + # 处理HTML格式 + if isinstance(text, str): + text = text.encode('utf-8') + elif format_type == win32con.CF_UNICODETEXT: + # 处理Unicode文本 + if isinstance(text, bytes): + text = text.decode('utf-8') + elif format_type == win32con.CF_TEXT: + # 处理普通文本 + if isinstance(text, str): + text = text.encode('gbk') + + # 设置剪贴板数据 + win32clipboard.SetClipboardData(format_type, text) + win32clipboard.CloseClipboard() + time.sleep(0.1) # 等待剪贴板操作完成 + return + except Exception as e: + print(f"剪贴板操作失败 (尝试 {retry + 1}/{max_retries}): {e}") + time.sleep(0.5) + try: + win32clipboard.CloseClipboard() + except: + pass + if retry == max_retries - 1: + raise Exception(f"设置剪贴板内容失败: {e}") + + def test_plain_text(self): + """测试普通文本处理""" + self.set_clipboard_text(test_data.PLAIN_TEXT) + content = self.service.get_clipboard_content() + self.assertIsNotNone(content) + self.assertEqual(content.strip(), test_data.PLAIN_TEXT.strip()) + + def test_markdown_text(self): + """测试Markdown文本处理""" + self.set_clipboard_text(test_data.MARKDOWN_TEXT) + content = self.service.get_clipboard_content() + self.assertIsNotNone(content) + self.assertTrue('# 这是一个Markdown文档' in content) + + def test_html_text(self): + """测试HTML文本处理""" + # 构造CF_HTML格式的数据 + html = test_data.HTML_TEXT + header = ( + "Version:0.9\r\n" + "StartHTML:00000000\r\n" + "EndHTML:00000000\r\n" + "StartFragment:00000000\r\n" + "EndFragment:00000000\r\n" + ) + html_data = header + html + self.set_clipboard_text(html_data, CF_HTML) + content = self.service.get_clipboard_content() + self.assertIsNotNone(content) + self.assertTrue('HTML测试文档' in content) + + def test_empty_excalidraw(self): + """测试空的Excalidraw内容""" + self.set_clipboard_text(test_data.EMPTY_EXCALIDRAW) + content = self.service.get_clipboard_content() + self.assertIsNone(content) + + def test_valid_excalidraw(self): + """测试有效的Excalidraw内容""" + self.set_clipboard_text(test_data.VALID_EXCALIDRAW) + content = self.service.get_clipboard_content() + self.assertIsNotNone(content) + + def test_invalid_json(self): + """测试无效的JSON内容""" + self.set_clipboard_text(test_data.INVALID_JSON) + content = self.service.get_clipboard_content() + self.assertIsNotNone(content) + + def test_empty_content(self): + """测试空白内容""" + self.set_clipboard_text(test_data.EMPTY_CONTENT) + content = self.service.get_clipboard_content() + self.assertIsNone(content) + + def test_short_content(self): + """测试过短内容""" + self.set_clipboard_text(test_data.SHORT_CONTENT) + content = self.service.get_clipboard_content() + self.assertIsNone(content) + +if __name__ == '__main__': + unittest.main() diff --git a/project/llmclipboard/llmclipboard/tests/test_data.py b/project/llmclipboard/llmclipboard/tests/test_data.py new file mode 100644 index 0000000..927104e --- /dev/null +++ b/project/llmclipboard/llmclipboard/tests/test_data.py @@ -0,0 +1,66 @@ +"""测试数据模块""" + +# 普通文本内容 +PLAIN_TEXT = """这是一个测试文本 +它包含多行内容 +用于测试文本处理功能""" + +# Markdown格式内容 +MARKDOWN_TEXT = """# 这是一个Markdown文档 + +## 简介 +这是一个用于测试的Markdown文档。 + +### 特点 +- 包含标题 +- 包含列表 +- 包含格式化文本 + +> 这是一个引用 +""" + +# HTML格式内容 +HTML_TEXT = """ + +

HTML测试文档

+

这是一个HTML格式的文档,用于测试格式转换功能。

+ + +""" + +# 空的Excalidraw内容 +EMPTY_EXCALIDRAW = """{ + "type": "excalidraw/clipboard", + "elements": [], + "files": {} +}""" + +# 包含内容的Excalidraw +VALID_EXCALIDRAW = """{ + "type": "excalidraw/clipboard", + "elements": [ + { + "type": "rectangle", + "x": 100, + "y": 100, + "width": 200, + "height": 100 + } + ], + "files": {} +}""" + +# 无效的JSON内容 +INVALID_JSON = """{ + "type": "invalid + "data": [] +}""" + +# 空白内容 +EMPTY_CONTENT = " \n \t \n" + +# 过短内容 +SHORT_CONTENT = "ab"