test: add clipboard content tests
- Add test suite for clipboard content handling - Add test cases for different content types - Add test data module - Improve clipboard operation reliability - Add better HTML content extraction
This commit is contained in:
parent
bcd15f141f
commit
5330de49fc
@ -82,9 +82,10 @@ class TextCaptureService:
|
||||
except Exception as e:
|
||||
self.logger.debug(f"获取Unicode文本失败: {e}")
|
||||
|
||||
# 如果Unicode获取失败,尝试获取HTML格式
|
||||
# 尝试获取HTML格式
|
||||
try:
|
||||
html_content = win32clipboard.GetClipboardData(win32con.CF_HTML)
|
||||
CF_HTML = win32clipboard.RegisterClipboardFormat("HTML Format")
|
||||
html_content = win32clipboard.GetClipboardData(CF_HTML)
|
||||
self.logger.info("获取HTML格式内容")
|
||||
if html_content:
|
||||
# 解析HTML格式的内容
|
||||
@ -97,10 +98,28 @@ class TextCaptureService:
|
||||
h.ignore_tables = False
|
||||
|
||||
# 从HTML字符串中提取实际的HTML内容
|
||||
start = html_content.find('<html>')
|
||||
end = html_content.find('</html>')
|
||||
if start != -1 and end != -1:
|
||||
html_content = html_content[start:end+7]
|
||||
try:
|
||||
if isinstance(html_content, bytes):
|
||||
html_content = html_content.decode('utf-8')
|
||||
|
||||
# 查找HTML内容的开始和结束
|
||||
start = html_content.find('<html>')
|
||||
if start == -1:
|
||||
start = html_content.find('<!--StartFragment-->')
|
||||
if start != -1:
|
||||
start = html_content.find('<', start + 20)
|
||||
|
||||
end = html_content.find('</html>')
|
||||
if end == -1:
|
||||
end = html_content.find('<!--EndFragment-->')
|
||||
|
||||
if start != -1:
|
||||
if end != -1:
|
||||
html_content = html_content[start:end]
|
||||
else:
|
||||
html_content = html_content[start:]
|
||||
except Exception as e:
|
||||
self.logger.debug(f"HTML内容提取失败: {e}")
|
||||
|
||||
content = h.handle(html_content).strip()
|
||||
if content and self.is_valid_content(content):
|
||||
@ -113,7 +132,9 @@ class TextCaptureService:
|
||||
text_content = win32clipboard.GetClipboardData(win32con.CF_TEXT)
|
||||
self.logger.info("获取普通文本格式内容")
|
||||
if text_content:
|
||||
content = text_content.decode('gbk')
|
||||
if isinstance(text_content, bytes):
|
||||
text_content = text_content.decode('gbk', errors='ignore')
|
||||
content = text_content
|
||||
if self.is_valid_content(content):
|
||||
return content
|
||||
except Exception as e:
|
||||
|
||||
1
project/llmclipboard/llmclipboard/tests/__init__.py
Normal file
1
project/llmclipboard/llmclipboard/tests/__init__.py
Normal file
@ -0,0 +1 @@
|
||||
"""测试包"""
|
||||
127
project/llmclipboard/llmclipboard/tests/test_clipboard.py
Normal file
127
project/llmclipboard/llmclipboard/tests/test_clipboard.py
Normal file
@ -0,0 +1,127 @@
|
||||
"""剪贴板测试模块"""
|
||||
import unittest
|
||||
import win32clipboard
|
||||
import win32con
|
||||
import time
|
||||
from ..app import TextCaptureService
|
||||
from . import test_data
|
||||
|
||||
# HTML格式的标识符
|
||||
CF_HTML = win32clipboard.RegisterClipboardFormat("HTML Format")
|
||||
|
||||
class TestClipboard(unittest.TestCase):
|
||||
"""剪贴板测试类"""
|
||||
|
||||
def setUp(self):
|
||||
"""测试准备"""
|
||||
self.service = TextCaptureService()
|
||||
|
||||
def set_clipboard_text(self, text, format_type=win32con.CF_UNICODETEXT):
|
||||
"""设置剪贴板内容"""
|
||||
max_retries = 3
|
||||
for retry in range(max_retries):
|
||||
try:
|
||||
# 确保剪贴板已关闭
|
||||
try:
|
||||
win32clipboard.CloseClipboard()
|
||||
except:
|
||||
pass
|
||||
|
||||
time.sleep(0.1 * (retry + 1)) # 递增等待时间
|
||||
|
||||
# 打开剪贴板
|
||||
win32clipboard.OpenClipboard()
|
||||
win32clipboard.EmptyClipboard()
|
||||
|
||||
# 根据不同格式处理数据
|
||||
if format_type == CF_HTML:
|
||||
# 处理HTML格式
|
||||
if isinstance(text, str):
|
||||
text = text.encode('utf-8')
|
||||
elif format_type == win32con.CF_UNICODETEXT:
|
||||
# 处理Unicode文本
|
||||
if isinstance(text, bytes):
|
||||
text = text.decode('utf-8')
|
||||
elif format_type == win32con.CF_TEXT:
|
||||
# 处理普通文本
|
||||
if isinstance(text, str):
|
||||
text = text.encode('gbk')
|
||||
|
||||
# 设置剪贴板数据
|
||||
win32clipboard.SetClipboardData(format_type, text)
|
||||
win32clipboard.CloseClipboard()
|
||||
time.sleep(0.1) # 等待剪贴板操作完成
|
||||
return
|
||||
except Exception as e:
|
||||
print(f"剪贴板操作失败 (尝试 {retry + 1}/{max_retries}): {e}")
|
||||
time.sleep(0.5)
|
||||
try:
|
||||
win32clipboard.CloseClipboard()
|
||||
except:
|
||||
pass
|
||||
if retry == max_retries - 1:
|
||||
raise Exception(f"设置剪贴板内容失败: {e}")
|
||||
|
||||
def test_plain_text(self):
|
||||
"""测试普通文本处理"""
|
||||
self.set_clipboard_text(test_data.PLAIN_TEXT)
|
||||
content = self.service.get_clipboard_content()
|
||||
self.assertIsNotNone(content)
|
||||
self.assertEqual(content.strip(), test_data.PLAIN_TEXT.strip())
|
||||
|
||||
def test_markdown_text(self):
|
||||
"""测试Markdown文本处理"""
|
||||
self.set_clipboard_text(test_data.MARKDOWN_TEXT)
|
||||
content = self.service.get_clipboard_content()
|
||||
self.assertIsNotNone(content)
|
||||
self.assertTrue('# 这是一个Markdown文档' in content)
|
||||
|
||||
def test_html_text(self):
|
||||
"""测试HTML文本处理"""
|
||||
# 构造CF_HTML格式的数据
|
||||
html = test_data.HTML_TEXT
|
||||
header = (
|
||||
"Version:0.9\r\n"
|
||||
"StartHTML:00000000\r\n"
|
||||
"EndHTML:00000000\r\n"
|
||||
"StartFragment:00000000\r\n"
|
||||
"EndFragment:00000000\r\n"
|
||||
)
|
||||
html_data = header + html
|
||||
self.set_clipboard_text(html_data, CF_HTML)
|
||||
content = self.service.get_clipboard_content()
|
||||
self.assertIsNotNone(content)
|
||||
self.assertTrue('HTML测试文档' in content)
|
||||
|
||||
def test_empty_excalidraw(self):
|
||||
"""测试空的Excalidraw内容"""
|
||||
self.set_clipboard_text(test_data.EMPTY_EXCALIDRAW)
|
||||
content = self.service.get_clipboard_content()
|
||||
self.assertIsNone(content)
|
||||
|
||||
def test_valid_excalidraw(self):
|
||||
"""测试有效的Excalidraw内容"""
|
||||
self.set_clipboard_text(test_data.VALID_EXCALIDRAW)
|
||||
content = self.service.get_clipboard_content()
|
||||
self.assertIsNotNone(content)
|
||||
|
||||
def test_invalid_json(self):
|
||||
"""测试无效的JSON内容"""
|
||||
self.set_clipboard_text(test_data.INVALID_JSON)
|
||||
content = self.service.get_clipboard_content()
|
||||
self.assertIsNotNone(content)
|
||||
|
||||
def test_empty_content(self):
|
||||
"""测试空白内容"""
|
||||
self.set_clipboard_text(test_data.EMPTY_CONTENT)
|
||||
content = self.service.get_clipboard_content()
|
||||
self.assertIsNone(content)
|
||||
|
||||
def test_short_content(self):
|
||||
"""测试过短内容"""
|
||||
self.set_clipboard_text(test_data.SHORT_CONTENT)
|
||||
content = self.service.get_clipboard_content()
|
||||
self.assertIsNone(content)
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
66
project/llmclipboard/llmclipboard/tests/test_data.py
Normal file
66
project/llmclipboard/llmclipboard/tests/test_data.py
Normal file
@ -0,0 +1,66 @@
|
||||
"""测试数据模块"""
|
||||
|
||||
# 普通文本内容
|
||||
PLAIN_TEXT = """这是一个测试文本
|
||||
它包含多行内容
|
||||
用于测试文本处理功能"""
|
||||
|
||||
# Markdown格式内容
|
||||
MARKDOWN_TEXT = """# 这是一个Markdown文档
|
||||
|
||||
## 简介
|
||||
这是一个用于测试的Markdown文档。
|
||||
|
||||
### 特点
|
||||
- 包含标题
|
||||
- 包含列表
|
||||
- 包含格式化文本
|
||||
|
||||
> 这是一个引用
|
||||
"""
|
||||
|
||||
# HTML格式内容
|
||||
HTML_TEXT = """<html>
|
||||
<body>
|
||||
<h1>HTML测试文档</h1>
|
||||
<p>这是一个<strong>HTML</strong>格式的文档,用于测试<em>格式转换</em>功能。</p>
|
||||
<ul>
|
||||
<li>列表项1</li>
|
||||
<li>列表项2</li>
|
||||
</ul>
|
||||
</body>
|
||||
</html>"""
|
||||
|
||||
# 空的Excalidraw内容
|
||||
EMPTY_EXCALIDRAW = """{
|
||||
"type": "excalidraw/clipboard",
|
||||
"elements": [],
|
||||
"files": {}
|
||||
}"""
|
||||
|
||||
# 包含内容的Excalidraw
|
||||
VALID_EXCALIDRAW = """{
|
||||
"type": "excalidraw/clipboard",
|
||||
"elements": [
|
||||
{
|
||||
"type": "rectangle",
|
||||
"x": 100,
|
||||
"y": 100,
|
||||
"width": 200,
|
||||
"height": 100
|
||||
}
|
||||
],
|
||||
"files": {}
|
||||
}"""
|
||||
|
||||
# 无效的JSON内容
|
||||
INVALID_JSON = """{
|
||||
"type": "invalid
|
||||
"data": []
|
||||
}"""
|
||||
|
||||
# 空白内容
|
||||
EMPTY_CONTENT = " \n \t \n"
|
||||
|
||||
# 过短内容
|
||||
SHORT_CONTENT = "ab"
|
||||
Loading…
Reference in New Issue
Block a user