diff --git a/021_caikit_tutorial.ipynb b/021_caikit_tutorial.ipynb new file mode 100644 index 0000000..7dd145d --- /dev/null +++ b/021_caikit_tutorial.ipynb @@ -0,0 +1,549 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Getting Started with Sentiment Analysis Pipeline in caikit" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Setup" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# %pip install caikit transformers requests\n", + "# Install mamba using https://mamba.readthedocs.io/en/latest/mamba-installation.html#mamba-install\n", + "# mamba install pytorch cpuonly -c pytorch\n", + "# mamba install grpcio" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# %%bash \n", + "# pip install fastcore\n", + "# pip install caikit[runtime-grpc] -qqq\n", + "# pip install caikit[runtime-http] -qqq" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Python 3.9.18\n" + ] + } + ], + "source": [ + "!python --version" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Outline" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "- Data Module\n", + "- Module\n", + "- config\n", + "- Runtime\n", + "- Client" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Text Classification Example" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from fastcore.all import *\n", + "import warnings; warnings.filterwarnings('ignore')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Requirements" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Overwriting requirements-caikit.txt\n" + ] + } + ], + "source": [ + "%%writefile requirements-caikit.txt\n", + "\n", + "caikit[runtime-grpc, runtime-http]\n", + "\n", + "# Only needed for HuggingFace\n", + "scipy\n", + "# torch\n", + "# transformers~=4.27.2\n", + "\n", + "# For http client\n", + "requests" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Data Module" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "Path('./text_sentiment/data_model').mkdir(exist_ok=True, parents=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Overwriting ./text_sentiment/data_model/classification.py\n" + ] + } + ], + "source": [ + "%%writefile ./text_sentiment/data_model/classification.py\n", + "\n", + "from typing import List\n", + "from caikit.core import DataObjectBase\n", + "\n", + "from caikit.core.data_model import dataobject\n", + "\n", + "# A DataObject is a data model class that is backed by a @dataclass. \n", + "@dataobject(package=\"text_sentiment.data_model\")\n", + "class ClassInfo(DataObjectBase):\n", + " class_name: str\n", + " conf: float\n", + "@dataobject(package=\"text_sentiment.data_model\")\n", + "class ClassificationPrediction(DataObjectBase):\n", + " classes: List[ClassInfo]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Overwriting ./text_sentiment/data_model/__init__.py\n" + ] + } + ], + "source": [ + "%%writefile ./text_sentiment/data_model/__init__.py\n", + "\n", + "from .classification import ClassificationPrediction" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Runtime Model" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "Path('./text_sentiment/runtime_model').mkdir(exist_ok=True, parents=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Overwriting ./text_sentiment/runtime_model/hf_module.py\n" + ] + } + ], + "source": [ + "%%writefile ./text_sentiment/runtime_model/hf_module.py\n", + "\n", + "from caikit.core import ModuleBase, ModuleLoader, ModuleSaver, TaskBase, task, module\n", + "from text_sentiment.data_model.classification import ClassificationPrediction, ClassInfo\n", + "from transformers import pipeline\n", + "\n", + "@task(required_parameters={\"text_input\": str},output_type=ClassificationPrediction)\n", + "class HFSentimentTask(TaskBase): pass # defines input args and output type for task\n", + "\n", + "@module('8f72161-c0e4-49b0-8fd0-7587b3017a35', 'HFSentimentModule', '0.0.1', HFSentimentTask)\n", + "class HFSentimentModule(ModuleBase): # inherits from ModuleBase and wraps the sentiment analysis pipeline from HF\n", + " def __init__(self, model_path) -> None:\n", + " super().__init__()\n", + " loader = ModuleLoader(model_path) # loads the model from the path\n", + " config = loader.config # gets the config from the model\n", + " model = pipeline(model=config.hf_artifact_path, task='sentiment-analysis')\n", + " self.sentiment_pipeline = model # sets the pipeline as an attribute of the module\n", + " \n", + " def run(self, text_input: str)->ClassificationPrediction:\n", + " raw_results = self.sentiment_pipeline([text_input]) # runs the pipeline on the input text\n", + " class_info = []\n", + " for result in raw_results: \n", + " class_info.append(ClassInfo(class_name=result['label'], conf=result['score'])) # creates a ClassInfo object for each result\n", + " return ClassificationPrediction(classes=class_info) # returns a ClassificationPrediction object\n", + " \n", + " @classmethod\n", + " def bootstrap(cls, model_path='distilbert-base-uncased-finetuned-sst-2-english'): # classmethod to load a HF based caikit model\n", + " return cls(model_path=model_path)\n", + " \n", + " def save(self, model_path, **kwargs):\n", + " import os\n", + " module_saver = ModuleSaver(self, model_path=model_path) # saving modules and context manager for cleaning up after saving\n", + " with module_saver:\n", + " rel_path, _ = module_saver.add_dir(\"hf_model\")\n", + " save_path = os.path.join(model_path, rel_path)\n", + " self.sentiment_pipeline.save_pretrained(save_path)\n", + " module_saver.update_config({\"hf_artifact_path\": rel_path})\n", + " \n", + " @classmethod\n", + " def load(cls, model_path): # classmethod to load a HF based caikit model\n", + " return cls(model_path=model_path)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Overwriting ./text_sentiment/runtime_model/__init__.py\n" + ] + } + ], + "source": [ + "%%writefile ./text_sentiment/runtime_model/__init__.py\n", + "\n", + "from .hf_module import HFSentimentModule" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Config" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Overwriting ./text_sentiment/config.yml\n" + ] + } + ], + "source": [ + "%%writefile ./text_sentiment/config.yml\n", + "\n", + "runtime:\n", + " library: text_sentiment" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "Path('./models/text_sentiment').mkdir(exist_ok=True, parents=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Overwriting ./models/text_sentiment/config.yml\n" + ] + } + ], + "source": [ + "%%writefile ./models/text_sentiment/config.yml\n", + "\n", + "module_id: 8f72161-c0e4-49b0-8fd0-7587b3017a35\n", + "name: HFSentimentModule\n", + "version: 0.0.1" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Runtime\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Kill the process using a particular port\n", + "# !lsof -ti tcp:8086 | xargs kill -9" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# !lsof -ti tcp:8086" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Overwriting start_runtime.py\n" + ] + } + ], + "source": [ + "%%writefile start_runtime.py\n", + "\n", + "from os import path\n", + "import sys\n", + "import alog\n", + "from caikit.runtime.__main__ import main\n", + "import caikit\n", + "\n", + "if __name__ == \"__main__\":\n", + " models_directory = path.abspath(path.join(path.dirname(__file__), \"models\"))\n", + " # models_directory = path.abspath(path.join(path.dirname('.'), \"models\"))\n", + " caikit.config.configure(config_dict=dict(\n", + " merge_strategy=\"merge\", runtime=dict(\n", + " local_models_dir=models_directory, library=\"text_sentiment\", grpc=dict(enabled=True), http=dict(enabled=True)\n", + " )\n", + " ))\n", + " sys.path.append(path.abspath(path.join(path.dirname(__file__), \"../\")))\n", + " alog.configure(default_level=\"debug\")\n", + " main()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Overwriting ./text_sentiment/__init__.py\n" + ] + } + ], + "source": [ + "%%writefile ./text_sentiment/__init__.py\n", + "\n", + "from os import path\n", + "from . import data_model, runtime_model\n", + "import caikit\n", + "\n", + "CONFIG_PATH = path.realpath(path.join(path.dirname(__file__), \"config.yml\"))\n", + "caikit.configure(CONFIG_PATH)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Client" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Overwriting ./client.py\n" + ] + } + ], + "source": [ + "%%writefile ./client.py\n", + "\n", + "from caikit.config.config import get_config # interacts with config.yml\n", + "from caikit.runtime import get_inference_request # return the inference request DataModel for the Module or Task Class \n", + "from caikit.runtime.service_factory import ServicePackageFactory\n", + "from text_sentiment.runtime_model.hf_module import HFSentimentModule\n", + "import caikit, grpc, requests, json\n", + "\n", + "if __name__ == \"__main__\":\n", + " caikit.config.configure(\n", + " config_dict=dict(merge_strategy='merge',\n", + " runtime=dict(library='text_sentiment', grpc=dict(enabled=True), http=dict(enabled=True)),)\n", + " )\n", + " inference_service = ServicePackageFactory.get_service_package(\n", + " ServicePackageFactory.ServiceType.INFERENCE\n", + " ) # ServicePackage: A container with properties referencing everything you need to bind a concrete Servicer implementation to a protobufs Service and grpc Server\n", + "\n", + " model_id = 'text_sentiment'\n", + "\n", + " if get_config().runtime.grpc.enabled:\n", + " # setup grpc client\n", + " port = 8085\n", + " channel= grpc.insecure_channel(f'localhost:{port}')\n", + " client_stub = inference_service.stub_class(channel)\n", + " \n", + " for text in ['I am not feeling well today', 'Today is a nice sunny day']:\n", + " request = get_inference_request(task_or_module_class=HFSentimentModule.TASK_CLASS)(text_input=text).to_proto()\n", + " response = client_stub.HFSentimentTaskPredict(request, \n", + " metadata=[('mm-model-id', model_id)],\n", + " timeout=1)\n", + " print('Text: ', text)\n", + " print('Response from gRPC: ', response)\n", + " \n", + " if get_config().runtime.http.enabled:\n", + " port = 8080\n", + " for text in ['I am not feeling well today', 'Today is a nice sunny day']:\n", + " payload = {\"inputs\": text}\n", + " response = requests.post(\n", + " f\"http://localhost:{port}/api/v1/{model_id}/task/hugging-face-sentiment\",\n", + " json=payload,\n", + " timeout=1,\n", + " )\n", + " print(\"\\nText:\", text)\n", + " \n", + " print(\"RESPONSE from HTTP:\", json.dumps(response.json(), indent=4))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Install the dependencies\n", + "# %pip install -r requirements-caikit.txt" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Running the caikit runtime\n", + "# !python start_runtime.py" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## fin" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from caikit.config.config import get_config\n", + "# get_config()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}