diff --git a/repo_classifier.py b/repo_classifier.py new file mode 100644 index 0000000..3e15bef --- /dev/null +++ b/repo_classifier.py @@ -0,0 +1,76 @@ +from github import Github +import openai +import os +from dotenv import load_dotenv + +# 初始化 API 客户端 +def init_clients(): + load_dotenv() + github_token = os.getenv("GITHUB_TOKEN") + openai.api_key = os.getenv("OPENAI_API_KEY") + + g = Github(github_token) + return g + +# 获取仓库元数据 +def get_repos(github_client): + repos = [] + for repo in github_client.get_user().get_repos(): + try: + readme = repo.get_readme().decoded_content.decode()[:1000] + except: + readme = "" + + repos.append({ + "name": repo.name, + "description": repo.description, + "language": repo.language, + "readme_snippet": readme, + "url": repo.html_url + }) + return repos + +# 智能分类(OpenAI 实现) +def classify_repo(repo_data): + prompt = f""" + 根据以下仓库信息判断项目类型: + 名称: {repo_data['name']} + 描述: {repo_data['description']} + 语言: {repo_data['language']} + README片段: {repo_data['readme_snippet']} + + 可选分类选项: + - Web 开发 + - 数据科学 + - DevOps 工具 + - 机器学习 + - 教育学习 + - 其他 + + 返回最匹配的分类标签(只需返回分类名称): + """ + + response = openai.ChatCompletion.create( + model="gpt-3.5-turbo", + messages=[{"role": "user", "content": prompt}] + ) + + return response.choices[0].message.content.strip() + +# 主流程 +def main(): + g = init_clients() + repos = get_repos(g) + + classified_repos = [] + for repo in repos: + category = classify_repo(repo) + repo["category"] = category + classified_repos.append(repo) + + # 打印结果 + for repo in classified_repos: + print(f"{repo['name']}: {repo['category']}") + +if __name__ == "__main__": + main() diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..01d48ad --- /dev/null +++ b/requirements.txt @@ -0,0 +1,3 @@ +PyGithub +openai +python-dotenv