{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Collecting litellmNote: you may need to restart the kernel to use updated packages.\n",
      "\n",
      "  Using cached litellm-1.59.8-py3-none-any.whl.metadata (36 kB)\n",
      "Collecting aiohttp (from litellm)\n",
      "  Using cached aiohttp-3.11.11-cp311-cp311-win_amd64.whl.metadata (8.0 kB)\n",
      "Requirement already satisfied: click in k:\\atomstorm\\.conda\\lib\\site-packages (from litellm) (8.1.8)\n",
      "Collecting httpx<0.28.0,>=0.23.0 (from litellm)\n",
      "  Using cached httpx-0.27.2-py3-none-any.whl.metadata (7.1 kB)\n",
      "Requirement already satisfied: importlib-metadata>=6.8.0 in k:\\atomstorm\\.conda\\lib\\site-packages (from litellm) (8.6.1)\n",
      "Collecting jinja2<4.0.0,>=3.1.2 (from litellm)\n",
      "  Using cached jinja2-3.1.5-py3-none-any.whl.metadata (2.6 kB)\n",
      "Collecting jsonschema<5.0.0,>=4.22.0 (from litellm)\n",
      "  Using cached jsonschema-4.23.0-py3-none-any.whl.metadata (7.9 kB)\n",
      "Collecting openai>=1.55.3 (from litellm)\n",
      "  Using cached openai-1.60.1-py3-none-any.whl.metadata (27 kB)\n",
      "Requirement already satisfied: pydantic<3.0.0,>=2.0.0 in k:\\atomstorm\\.conda\\lib\\site-packages (from litellm) (2.10.6)\n",
      "Requirement already satisfied: python-dotenv>=0.2.0 in k:\\atomstorm\\.conda\\lib\\site-packages (from litellm) (1.0.1)\n",
      "Collecting tiktoken>=0.7.0 (from litellm)\n",
      "  Using cached tiktoken-0.8.0-cp311-cp311-win_amd64.whl.metadata (6.8 kB)\n",
      "Collecting tokenizers (from litellm)\n",
      "  Using cached tokenizers-0.21.0-cp39-abi3-win_amd64.whl.metadata (6.9 kB)\n",
      "Collecting anyio (from httpx<0.28.0,>=0.23.0->litellm)\n",
      "  Using cached anyio-4.8.0-py3-none-any.whl.metadata (4.6 kB)\n",
      "Requirement already satisfied: certifi in k:\\atomstorm\\.conda\\lib\\site-packages (from httpx<0.28.0,>=0.23.0->litellm) (2024.12.14)\n",
      "Collecting httpcore==1.* (from httpx<0.28.0,>=0.23.0->litellm)\n",
      "  Using cached httpcore-1.0.7-py3-none-any.whl.metadata (21 kB)\n",
      "Requirement already satisfied: idna in k:\\atomstorm\\.conda\\lib\\site-packages (from httpx<0.28.0,>=0.23.0->litellm) (3.10)\n",
      "Requirement already satisfied: sniffio in k:\\atomstorm\\.conda\\lib\\site-packages (from httpx<0.28.0,>=0.23.0->litellm) (1.3.1)\n",
      "Requirement already satisfied: h11<0.15,>=0.13 in k:\\atomstorm\\.conda\\lib\\site-packages (from httpcore==1.*->httpx<0.28.0,>=0.23.0->litellm) (0.14.0)\n",
      "Requirement already satisfied: zipp>=3.20 in k:\\atomstorm\\.conda\\lib\\site-packages (from importlib-metadata>=6.8.0->litellm) (3.21.0)\n",
      "Requirement already satisfied: MarkupSafe>=2.0 in k:\\atomstorm\\.conda\\lib\\site-packages (from jinja2<4.0.0,>=3.1.2->litellm) (3.0.2)\n",
      "Requirement already satisfied: attrs>=22.2.0 in k:\\atomstorm\\.conda\\lib\\site-packages (from jsonschema<5.0.0,>=4.22.0->litellm) (25.1.0)\n",
      "Collecting jsonschema-specifications>=2023.03.6 (from jsonschema<5.0.0,>=4.22.0->litellm)\n",
      "  Using cached jsonschema_specifications-2024.10.1-py3-none-any.whl.metadata (3.0 kB)\n",
      "Requirement already satisfied: referencing>=0.28.4 in k:\\atomstorm\\.conda\\lib\\site-packages (from jsonschema<5.0.0,>=4.22.0->litellm) (0.36.2)\n",
      "Requirement already satisfied: rpds-py>=0.7.1 in k:\\atomstorm\\.conda\\lib\\site-packages (from jsonschema<5.0.0,>=4.22.0->litellm) (0.22.3)\n",
      "Requirement already satisfied: distro<2,>=1.7.0 in k:\\atomstorm\\.conda\\lib\\site-packages (from openai>=1.55.3->litellm) (1.9.0)\n",
      "Requirement already satisfied: jiter<1,>=0.4.0 in k:\\atomstorm\\.conda\\lib\\site-packages (from openai>=1.55.3->litellm) (0.8.2)\n",
      "Requirement already satisfied: tqdm>4 in k:\\atomstorm\\.conda\\lib\\site-packages (from openai>=1.55.3->litellm) (4.67.1)\n",
      "Requirement already satisfied: typing-extensions<5,>=4.11 in k:\\atomstorm\\.conda\\lib\\site-packages (from openai>=1.55.3->litellm) (4.12.2)\n",
      "Requirement already satisfied: annotated-types>=0.6.0 in k:\\atomstorm\\.conda\\lib\\site-packages (from pydantic<3.0.0,>=2.0.0->litellm) (0.7.0)\n",
      "Requirement already satisfied: pydantic-core==2.27.2 in k:\\atomstorm\\.conda\\lib\\site-packages (from pydantic<3.0.0,>=2.0.0->litellm) (2.27.2)\n",
      "Requirement already satisfied: regex>=2022.1.18 in k:\\atomstorm\\.conda\\lib\\site-packages (from tiktoken>=0.7.0->litellm) (2024.11.6)\n",
      "Requirement already satisfied: requests>=2.26.0 in k:\\atomstorm\\.conda\\lib\\site-packages (from tiktoken>=0.7.0->litellm) (2.32.3)\n",
      "Requirement already satisfied: aiohappyeyeballs>=2.3.0 in k:\\atomstorm\\.conda\\lib\\site-packages (from aiohttp->litellm) (2.4.4)\n",
      "Collecting aiosignal>=1.1.2 (from aiohttp->litellm)\n",
      "  Using cached aiosignal-1.3.2-py2.py3-none-any.whl.metadata (3.8 kB)\n",
      "Requirement already satisfied: frozenlist>=1.1.1 in k:\\atomstorm\\.conda\\lib\\site-packages (from aiohttp->litellm) (1.5.0)\n",
      "Requirement already satisfied: multidict<7.0,>=4.5 in k:\\atomstorm\\.conda\\lib\\site-packages (from aiohttp->litellm) (6.1.0)\n",
      "Requirement already satisfied: propcache>=0.2.0 in k:\\atomstorm\\.conda\\lib\\site-packages (from aiohttp->litellm) (0.2.1)\n",
      "Requirement already satisfied: yarl<2.0,>=1.17.0 in k:\\atomstorm\\.conda\\lib\\site-packages (from aiohttp->litellm) (1.18.3)\n",
      "Requirement already satisfied: colorama in k:\\atomstorm\\.conda\\lib\\site-packages (from click->litellm) (0.4.6)\n",
      "Collecting huggingface-hub<1.0,>=0.16.4 (from tokenizers->litellm)\n",
      "  Using cached huggingface_hub-0.27.1-py3-none-any.whl.metadata (13 kB)\n",
      "Requirement already satisfied: filelock in k:\\atomstorm\\.conda\\lib\\site-packages (from huggingface-hub<1.0,>=0.16.4->tokenizers->litellm) (3.17.0)\n",
      "Requirement already satisfied: fsspec>=2023.5.0 in k:\\atomstorm\\.conda\\lib\\site-packages (from huggingface-hub<1.0,>=0.16.4->tokenizers->litellm) (2024.12.0)\n",
      "Requirement already satisfied: packaging>=20.9 in k:\\atomstorm\\.conda\\lib\\site-packages (from huggingface-hub<1.0,>=0.16.4->tokenizers->litellm) (24.2)\n",
      "Requirement already satisfied: pyyaml>=5.1 in k:\\atomstorm\\.conda\\lib\\site-packages (from huggingface-hub<1.0,>=0.16.4->tokenizers->litellm) (6.0.2)\n",
      "Requirement already satisfied: charset-normalizer<4,>=2 in k:\\atomstorm\\.conda\\lib\\site-packages (from requests>=2.26.0->tiktoken>=0.7.0->litellm) (3.4.1)\n",
      "Requirement already satisfied: urllib3<3,>=1.21.1 in k:\\atomstorm\\.conda\\lib\\site-packages (from requests>=2.26.0->tiktoken>=0.7.0->litellm) (2.3.0)\n",
      "Using cached litellm-1.59.8-py3-none-any.whl (6.7 MB)\n",
      "Using cached httpx-0.27.2-py3-none-any.whl (76 kB)\n",
      "Using cached httpcore-1.0.7-py3-none-any.whl (78 kB)\n",
      "Using cached jinja2-3.1.5-py3-none-any.whl (134 kB)\n",
      "Using cached jsonschema-4.23.0-py3-none-any.whl (88 kB)\n",
      "Using cached openai-1.60.1-py3-none-any.whl (456 kB)\n",
      "Using cached tiktoken-0.8.0-cp311-cp311-win_amd64.whl (884 kB)\n",
      "Using cached aiohttp-3.11.11-cp311-cp311-win_amd64.whl (442 kB)\n",
      "Using cached tokenizers-0.21.0-cp39-abi3-win_amd64.whl (2.4 MB)\n",
      "Using cached aiosignal-1.3.2-py2.py3-none-any.whl (7.6 kB)\n",
      "Using cached anyio-4.8.0-py3-none-any.whl (96 kB)\n",
      "Using cached huggingface_hub-0.27.1-py3-none-any.whl (450 kB)\n",
      "Using cached jsonschema_specifications-2024.10.1-py3-none-any.whl (18 kB)\n",
      "Installing collected packages: jinja2, httpcore, anyio, aiosignal, tiktoken, jsonschema-specifications, huggingface-hub, httpx, aiohttp, tokenizers, openai, jsonschema, litellm\n",
      "Successfully installed aiohttp-3.11.11 aiosignal-1.3.2 anyio-4.8.0 httpcore-1.0.7 httpx-0.27.2 huggingface-hub-0.27.1 jinja2-3.1.5 jsonschema-4.23.0 jsonschema-specifications-2024.10.1 litellm-1.59.8 openai-1.60.1 tiktoken-0.8.0 tokenizers-0.21.0\n"
     ]
    }
   ],
   "source": [
    "pip install litellm"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 方式一、Openai兼容接口，只能使用openai官方模型名"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Hello! I'm just a computer program, so I don't have feelings, but I'm here and ready to help you with anything you need. How can I assist you today?\n"
     ]
    }
   ],
   "source": [
    "from litellm import completion\n",
    "import litellm\n",
    "\n",
    "# 配置第三方 API\n",
    "litellm.api_base = \"https://yunwu.ai/v1\"  # 替换为你的第三方 API 地址\n",
    "litellm.api_key = \"sk-lXFW7Bl1ruw2qmHu287e979847354601A07fE2D85a567bD7\"  # 替换为你的 API 密钥\n",
    "\n",
    "# 调用 API\n",
    "response = litellm.completion(\n",
    "    model=\"openai/gpt-4o\",  # 替换为你的模型名称\n",
    "    messages=[\n",
    "        {\"role\": \"system\", \"content\": \"You are a helpful assistant.\"},\n",
    "        {\"role\": \"user\", \"content\": \"Hello, how are you?\"}\n",
    "    ]\n",
    ")\n",
    "\n",
    "# 输出响应\n",
    "print(response.choices[0].message.content)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 方式二、Openai兼容接口，可以使用三方模型"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "I am Qwen, a large language model developed by Alibaba Cloud. I'm designed to be helpful and versatile in providing information and assistance across various topics. How can I assist you today?\n"
     ]
    }
   ],
   "source": [
    "import litellm\n",
    "import os\n",
    "\n",
    "import openai\n",
    "client = openai.OpenAI(\n",
    "    api_key=\"sk-Dbdn0mZlCnnHCLBj70C0202697F640B0AcC29a15Eb16EdCd\",             # pass litellm proxy key, if you're using virtual keys\n",
    "    base_url=\"http://llm.towards-agi.cn/v1/\" # litellm-proxy-base url\n",
    ")\n",
    "\n",
    "response = client.chat.completions.create(\n",
    "    model=\"qwen2.5:14b\",\n",
    "    messages = [\n",
    "        {\n",
    "            \"role\": \"user\",\n",
    "            \"content\": \"what llm are you\"\n",
    "        }\n",
    "    ],\n",
    ")\n",
    "# 输出响应\n",
    "print(response.choices[0].message.content)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Azure 接口"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "g:\\AtomStorm\\.conda\\Lib\\site-packages\\pydantic\\_internal\\_config.py:345: UserWarning: Valid config keys have changed in V2:\n",
      "* 'fields' has been removed\n",
      "  warnings.warn(message, UserWarning)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Good morning! How can I assist you today?\n"
     ]
    }
   ],
   "source": [
    "import litellm\n",
    "\n",
    "# azure call\n",
    "response = litellm.completion(\n",
    "    model = \"azure/gpt-4o\",             # model = azure/<your deployment name> \n",
    "    api_base = \"https://agents-lingda.openai.azure.com/\",                                      # azure api base\n",
    "    api_version = \"2023-05-15\",                                   # azure api version\n",
    "    api_key = \"474a48443ed14770be99cc49130b1438\",                                       # azure api key\n",
    "    messages = [{\"role\": \"user\", \"content\": \"good morning\"}],\n",
    ")\n",
    "# 输出响应\n",
    "print(response.choices[0].message.content)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 方式三、OpenAI库使用openai兼容接口"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "llm Time init: 0.299158000000034s\n",
      "llm Time to first chunk: 0.636306999999988s\n",
      "我是来自阿里云的超大规模语言模型，\n",
      "我叫通义千问。作为一个AI助手，\n",
      "我可以生成各种类型的文本、回答问题和提供帮助。\n",
      "我会不断学习和进步，努力为用户提供更好的服务体验。\n",
      "如果您有任何问题或需要帮助，\n",
      "llm Time to last chunk: 1.3753963999999996s\n"
     ]
    }
   ],
   "source": [
    "import time\n",
    "start = time.perf_counter()\n",
    "from openai import OpenAI\n",
    "client = OpenAI(\n",
    "    # 如果您没有配置环境变量，请在此处用您的API Key进行替换\n",
    "    api_key=\"sk-Dbdn0mZlCnnHCLBj70C0202697F640B0AcC29a15Eb16EdCd\",\n",
    "    # 填写DashScope SDK的base_url\n",
    "    base_url=\"http://llm.towards-agi.cn/v1\",\n",
    ")\n",
    "end = time.perf_counter()\n",
    "print(f\"llm Time init: {end-start}s\")\n",
    "completion = client.chat.completions.create(\n",
    "    model=\"qwen2.5:14b\",\n",
    "    messages=[{'role': 'system', 'content': 'You are a helpful assistant.'},\n",
    "                {'role': 'user', 'content': \"介绍下你自己\"}],\n",
    "    stream=True,\n",
    "    # 通过以下设置，在流式输出的最后一行展示token使用信息\n",
    "    stream_options={\"include_usage\": True}\n",
    ")\n",
    "result=\"\"\n",
    "first = True\n",
    "for chunk in completion:\n",
    "    if len(chunk.choices)>0:\n",
    "        #print(chunk.choices[0].delta.content)\n",
    "        if first:\n",
    "            end = time.perf_counter()\n",
    "            print(f\"llm Time to first chunk: {end-start}s\")\n",
    "            first = False\n",
    "        msg = chunk.choices[0].delta.content\n",
    "        lastpos=0\n",
    "        #msglist = re.split('[,.!;:，。！?]',msg)\n",
    "        for i, char in enumerate(msg):\n",
    "            if char in \",.!;:，。！？：；\" :\n",
    "                result = result+msg[lastpos:i+1]\n",
    "                lastpos = i+1\n",
    "                if len(result)>10:\n",
    "                    print(result)\n",
    "                    result=\"\"\n",
    "        result = result+msg[lastpos:]\n",
    "end = time.perf_counter()\n",
    "print(f\"llm Time to last chunk: {end-start}s\")\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Ollama+litellm"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Collecting async_generator\n",
      "  Downloading async_generator-1.10-py3-none-any.whl.metadata (4.9 kB)\n",
      "Downloading async_generator-1.10-py3-none-any.whl (18 kB)\n",
      "Installing collected packages: async_generator\n",
      "Successfully installed async_generator-1.10\n",
      "Note: you may need to restart the kernel to use updated packages.\n"
     ]
    }
   ],
   "source": [
    "pip install async_generator"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "I'm a language model designed to assist with providing information, answering questions, and facilitating conversations based on text input. How can I help you today?\n"
     ]
    }
   ],
   "source": [
    "from litellm import completion\n",
    "\n",
    "response = completion(\n",
    "    model=\"ollama/phi4:latest\", \n",
    "    messages=[{ \"content\": \"respond in 20 words. who are you?\",\"role\": \"user\"}], \n",
    "    api_base=\"http://localhost:11434\",\n",
    "    stream=True\n",
    ")\n",
    "result = ''\n",
    "for chunk in response:\n",
    "    if chunk['choices'][0]['delta']['content']:\n",
    "        result += chunk['choices'][0]['delta']['content']\n",
    "print(result)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# DeepSeek+litellm\n",
    "https://docs.litellm.ai/docs/providers/deepseek"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    }
   ],
   "source": [
    "from litellm import completion\n",
    "import os\n",
    "\n",
    "os.environ['DEEPSEEK_API_KEY'] = \"sk-652f44592f6d4d19bad104c54f5fbf4a\"\n",
    "response = completion(\n",
    "    model=\"deepseek/deepseek-chat\", \n",
    "    messages=[\n",
    "       {\"role\": \"user\", \"content\": \"hello from litellm\"}\n",
    "   ],\n",
    "    stream=True\n",
    ")\n",
    "\n",
    "result = ''\n",
    "for chunk in response:\n",
    "    if chunk.choices[0].delta.content is None: continue\n",
    "    result += chunk.choices[0].delta.content\n",
    "print(result)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Okay, the user asked for a joke. Let me think of a good one. Maybe something light-hearted and not offensive. Animals are usually a safe topic. How about a cat and a computer? People relate to both.\n",
      "\n",
      "Why did the cat sit on the computer? Hmm, the punchline could be a play on words. \"To keep an eye on the mouse!\" Yeah, that works. Mouse has a double meaning here—computer mouse and the animal. Cats chase mice, so it makes sense. Let me check if that's not too overused. I think it's common but still effective. Should I add an emoji? Maybe a cat and mouse emoji to make it friendlier. Alright, that should do it.\n",
      "Sure! Here's a light-hearted one for you:  \n",
      "\n",
      "Why did the cat sit on the computer?  \n",
      "…*To keep an eye on the mouse!* 🐱🖱️  \n",
      "\n",
      "(Let me know if you need more—I’ve got a *paws-itive* attitude about jokes!)\n"
     ]
    }
   ],
   "source": [
    "from litellm import completion\n",
    "import os\n",
    "\n",
    "os.environ['DEEPSEEK_API_KEY'] = \"sk-652f44592f6d4d19bad104c54f5fbf4a\"\n",
    "resp = completion(\n",
    "    model=\"deepseek/deepseek-reasoner\",\n",
    "    messages=[{\"role\": \"user\", \"content\": \"Tell me a joke.\"}],\n",
    ")\n",
    "print(\n",
    "    resp.choices[0].message.provider_specific_fields[\"reasoning_content\"]\n",
    ")\n",
    "print(resp.choices[0].message.content)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.11"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}