test: add clipboard content tests

- Add test suite for clipboard content handling
- Add test cases for different content types
- Add test data module
- Improve clipboard operation reliability
- Add better HTML content extraction
This commit is contained in:
zhukang 2025-01-15 22:08:38 +08:00
parent bcd15f141f
commit 5330de49fc
4 changed files with 222 additions and 7 deletions

View File

@ -82,9 +82,10 @@ class TextCaptureService:
except Exception as e:
self.logger.debug(f"获取Unicode文本失败: {e}")
# 如果Unicode获取失败尝试获取HTML格式
# 尝试获取HTML格式
try:
html_content = win32clipboard.GetClipboardData(win32con.CF_HTML)
CF_HTML = win32clipboard.RegisterClipboardFormat("HTML Format")
html_content = win32clipboard.GetClipboardData(CF_HTML)
self.logger.info("获取HTML格式内容")
if html_content:
# 解析HTML格式的内容
@ -97,10 +98,28 @@ class TextCaptureService:
h.ignore_tables = False
# 从HTML字符串中提取实际的HTML内容
start = html_content.find('<html>')
end = html_content.find('</html>')
if start != -1 and end != -1:
html_content = html_content[start:end+7]
try:
if isinstance(html_content, bytes):
html_content = html_content.decode('utf-8')
# 查找HTML内容的开始和结束
start = html_content.find('<html>')
if start == -1:
start = html_content.find('<!--StartFragment-->')
if start != -1:
start = html_content.find('<', start + 20)
end = html_content.find('</html>')
if end == -1:
end = html_content.find('<!--EndFragment-->')
if start != -1:
if end != -1:
html_content = html_content[start:end]
else:
html_content = html_content[start:]
except Exception as e:
self.logger.debug(f"HTML内容提取失败: {e}")
content = h.handle(html_content).strip()
if content and self.is_valid_content(content):
@ -113,7 +132,9 @@ class TextCaptureService:
text_content = win32clipboard.GetClipboardData(win32con.CF_TEXT)
self.logger.info("获取普通文本格式内容")
if text_content:
content = text_content.decode('gbk')
if isinstance(text_content, bytes):
text_content = text_content.decode('gbk', errors='ignore')
content = text_content
if self.is_valid_content(content):
return content
except Exception as e:

View File

@ -0,0 +1 @@
"""测试包"""

View File

@ -0,0 +1,127 @@
"""剪贴板测试模块"""
import unittest
import win32clipboard
import win32con
import time
from ..app import TextCaptureService
from . import test_data
# HTML格式的标识符
CF_HTML = win32clipboard.RegisterClipboardFormat("HTML Format")
class TestClipboard(unittest.TestCase):
"""剪贴板测试类"""
def setUp(self):
"""测试准备"""
self.service = TextCaptureService()
def set_clipboard_text(self, text, format_type=win32con.CF_UNICODETEXT):
"""设置剪贴板内容"""
max_retries = 3
for retry in range(max_retries):
try:
# 确保剪贴板已关闭
try:
win32clipboard.CloseClipboard()
except:
pass
time.sleep(0.1 * (retry + 1)) # 递增等待时间
# 打开剪贴板
win32clipboard.OpenClipboard()
win32clipboard.EmptyClipboard()
# 根据不同格式处理数据
if format_type == CF_HTML:
# 处理HTML格式
if isinstance(text, str):
text = text.encode('utf-8')
elif format_type == win32con.CF_UNICODETEXT:
# 处理Unicode文本
if isinstance(text, bytes):
text = text.decode('utf-8')
elif format_type == win32con.CF_TEXT:
# 处理普通文本
if isinstance(text, str):
text = text.encode('gbk')
# 设置剪贴板数据
win32clipboard.SetClipboardData(format_type, text)
win32clipboard.CloseClipboard()
time.sleep(0.1) # 等待剪贴板操作完成
return
except Exception as e:
print(f"剪贴板操作失败 (尝试 {retry + 1}/{max_retries}): {e}")
time.sleep(0.5)
try:
win32clipboard.CloseClipboard()
except:
pass
if retry == max_retries - 1:
raise Exception(f"设置剪贴板内容失败: {e}")
def test_plain_text(self):
"""测试普通文本处理"""
self.set_clipboard_text(test_data.PLAIN_TEXT)
content = self.service.get_clipboard_content()
self.assertIsNotNone(content)
self.assertEqual(content.strip(), test_data.PLAIN_TEXT.strip())
def test_markdown_text(self):
"""测试Markdown文本处理"""
self.set_clipboard_text(test_data.MARKDOWN_TEXT)
content = self.service.get_clipboard_content()
self.assertIsNotNone(content)
self.assertTrue('# 这是一个Markdown文档' in content)
def test_html_text(self):
"""测试HTML文本处理"""
# 构造CF_HTML格式的数据
html = test_data.HTML_TEXT
header = (
"Version:0.9\r\n"
"StartHTML:00000000\r\n"
"EndHTML:00000000\r\n"
"StartFragment:00000000\r\n"
"EndFragment:00000000\r\n"
)
html_data = header + html
self.set_clipboard_text(html_data, CF_HTML)
content = self.service.get_clipboard_content()
self.assertIsNotNone(content)
self.assertTrue('HTML测试文档' in content)
def test_empty_excalidraw(self):
"""测试空的Excalidraw内容"""
self.set_clipboard_text(test_data.EMPTY_EXCALIDRAW)
content = self.service.get_clipboard_content()
self.assertIsNone(content)
def test_valid_excalidraw(self):
"""测试有效的Excalidraw内容"""
self.set_clipboard_text(test_data.VALID_EXCALIDRAW)
content = self.service.get_clipboard_content()
self.assertIsNotNone(content)
def test_invalid_json(self):
"""测试无效的JSON内容"""
self.set_clipboard_text(test_data.INVALID_JSON)
content = self.service.get_clipboard_content()
self.assertIsNotNone(content)
def test_empty_content(self):
"""测试空白内容"""
self.set_clipboard_text(test_data.EMPTY_CONTENT)
content = self.service.get_clipboard_content()
self.assertIsNone(content)
def test_short_content(self):
"""测试过短内容"""
self.set_clipboard_text(test_data.SHORT_CONTENT)
content = self.service.get_clipboard_content()
self.assertIsNone(content)
if __name__ == '__main__':
unittest.main()

View File

@ -0,0 +1,66 @@
"""测试数据模块"""
# 普通文本内容
PLAIN_TEXT = """这是一个测试文本
它包含多行内容
用于测试文本处理功能"""
# Markdown格式内容
MARKDOWN_TEXT = """# 这是一个Markdown文档
## 简介
这是一个用于测试的Markdown文档
### 特点
- 包含标题
- 包含列表
- 包含格式化文本
> 这是一个引用
"""
# HTML格式内容
HTML_TEXT = """<html>
<body>
<h1>HTML测试文档</h1>
<p>这是一个<strong>HTML</strong>格式的文档用于测试<em>格式转换</em>功能</p>
<ul>
<li>列表项1</li>
<li>列表项2</li>
</ul>
</body>
</html>"""
# 空的Excalidraw内容
EMPTY_EXCALIDRAW = """{
"type": "excalidraw/clipboard",
"elements": [],
"files": {}
}"""
# 包含内容的Excalidraw
VALID_EXCALIDRAW = """{
"type": "excalidraw/clipboard",
"elements": [
{
"type": "rectangle",
"x": 100,
"y": 100,
"width": 200,
"height": 100
}
],
"files": {}
}"""
# 无效的JSON内容
INVALID_JSON = """{
"type": "invalid
"data": []
}"""
# 空白内容
EMPTY_CONTENT = " \n \t \n"
# 过短内容
SHORT_CONTENT = "ab"