feat: Add GitHub repository classifier with OpenAI integration

This commit is contained in:
zhukang (aider) 2025-02-10 00:35:28 +08:00
parent f1d4198fbb
commit a69b20ac45
2 changed files with 79 additions and 0 deletions

76
repo_classifier.py Normal file
View File

@ -0,0 +1,76 @@
from github import Github
import openai
import os
from dotenv import load_dotenv
# 初始化 API 客户端
def init_clients():
load_dotenv()
github_token = os.getenv("GITHUB_TOKEN")
openai.api_key = os.getenv("OPENAI_API_KEY")
g = Github(github_token)
return g
# 获取仓库元数据
def get_repos(github_client):
repos = []
for repo in github_client.get_user().get_repos():
try:
readme = repo.get_readme().decoded_content.decode()[:1000]
except:
readme = ""
repos.append({
"name": repo.name,
"description": repo.description,
"language": repo.language,
"readme_snippet": readme,
"url": repo.html_url
})
return repos
# 智能分类(OpenAI 实现)
def classify_repo(repo_data):
prompt = f"""
根据以下仓库信息判断项目类型:
名称: {repo_data['name']}
描述: {repo_data['description']}
语言: {repo_data['language']}
README片段: {repo_data['readme_snippet']}
可选分类选项:
- Web 开发
- 数据科学
- DevOps 工具
- 机器学习
- 教育学习
- 其他
返回最匹配的分类标签(只需返回分类名称):
"""
response = openai.ChatCompletion.create(
model="gpt-3.5-turbo",
messages=[{"role": "user", "content": prompt}]
)
return response.choices[0].message.content.strip()
# 主流程
def main():
g = init_clients()
repos = get_repos(g)
classified_repos = []
for repo in repos:
category = classify_repo(repo)
repo["category"] = category
classified_repos.append(repo)
# 打印结果
for repo in classified_repos:
print(f"{repo['name']}: {repo['category']}")
if __name__ == "__main__":
main()

3
requirements.txt Normal file
View File

@ -0,0 +1,3 @@
PyGithub
openai
python-dotenv