AtomStorm/repo_classifier.py

77 lines
1.9 KiB
Python

from github import Github
import openai
import os
from dotenv import load_dotenv
# 初始化 API 客户端
def init_clients():
load_dotenv()
github_token = os.getenv("GITHUB_TOKEN")
openai.api_key = os.getenv("OPENAI_API_KEY")
g = Github(github_token)
return g
# 获取仓库元数据
def get_repos(github_client):
repos = []
for repo in github_client.get_user().get_repos():
try:
readme = repo.get_readme().decoded_content.decode()[:1000]
except:
readme = ""
repos.append({
"name": repo.name,
"description": repo.description,
"language": repo.language,
"readme_snippet": readme,
"url": repo.html_url
})
return repos
# 智能分类(OpenAI 实现)
def classify_repo(repo_data):
prompt = f"""
根据以下仓库信息判断项目类型:
名称: {repo_data['name']}
描述: {repo_data['description']}
语言: {repo_data['language']}
README片段: {repo_data['readme_snippet']}
可选分类选项:
- Web 开发
- 数据科学
- DevOps 工具
- 机器学习
- 教育学习
- 其他
返回最匹配的分类标签(只需返回分类名称):
"""
response = openai.ChatCompletion.create(
model="gpt-3.5-turbo",
messages=[{"role": "user", "content": prompt}]
)
return response.choices[0].message.content.strip()
# 主流程
def main():
g = init_clients()
repos = get_repos(g)
classified_repos = []
for repo in repos:
category = classify_repo(repo)
repo["category"] = category
classified_repos.append(repo)
# 打印结果
for repo in classified_repos:
print(f"{repo['name']}: {repo['category']}")
if __name__ == "__main__":
main()