From 734aca617e34491679bdbb11e302e3ab2a42f3e6 Mon Sep 17 00:00:00 2001
From: Nikhilesh Bhatnagar <tingc9@gmail.com>
Date: Mon, 31 Jul 2023 17:59:57 +0000
Subject: [PATCH] Client notebook tweaks.

---
 README.md           |  2 +-
 triton_client.ipynb | 72 +++++++++++++++++++++++++++++++++++----------
 2 files changed, 58 insertions(+), 16 deletions(-)

diff --git a/README.md b/README.md
index c6c112b..d0b504f 100644
--- a/README.md
+++ b/README.md
@@ -51,4 +51,4 @@ nvidia-docker run --gpus=all --rm --shm-size 5g --network=host --name dhruva-ssm
 ## Querying the triton server
 
 We provide a sample ipython notebook that shows how to concurrently request the client for translations.
-Prerequisites: `pip install "tritonclient[all]" tqdm numpy`
+Prerequisites: `pip install "tritonclient[all]" tqdm numpy wonderwords`
diff --git a/triton_client.ipynb b/triton_client.ipynb
index 52fecb1..ff202ae 100644
--- a/triton_client.ipynb
+++ b/triton_client.ipynb
@@ -3,13 +3,22 @@
   {
    "cell_type": "code",
    "execution_count": 1,
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-07-31T17:54:21.337343Z",
+     "iopub.status.busy": "2023-07-31T17:54:21.336919Z",
+     "iopub.status.idle": "2023-07-31T17:54:21.495527Z",
+     "shell.execute_reply": "2023-07-31T17:54:21.494981Z",
+     "shell.execute_reply.started": "2023-07-31T17:54:21.337320Z"
+    }
+   },
    "outputs": [],
    "source": [
     "import numpy as np\n",
+    "import wonderwords\n",
     "from tqdm import tqdm\n",
-    "from random import choice\n",
     "from tritonclient.utils import *\n",
+    "from random import choice, randrange\n",
     "import tritonclient.http as httpclient\n",
     "from multiprocessing.pool import ThreadPool"
    ]
@@ -17,25 +26,51 @@
   {
    "cell_type": "code",
    "execution_count": 2,
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-07-31T17:54:22.003332Z",
+     "iopub.status.busy": "2023-07-31T17:54:22.002684Z",
+     "iopub.status.idle": "2023-07-31T17:54:22.008703Z",
+     "shell.execute_reply": "2023-07-31T17:54:22.007697Z",
+     "shell.execute_reply.started": "2023-07-31T17:54:22.003294Z"
+    }
+   },
    "outputs": [],
    "source": [
+    "shape = [1]\n",
+    "MIN_WORDS, MAX_WORDS = 4, 20\n",
     "model_name = \"ssmt_pipeline\"\n",
-    "shape = [1]"
+    "rs = wonderwords.RandomWord()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "* Hit the tritonserver with a random sentence to a random model\n",
+    "* See https://docs.nvidia.com/deeplearning/triton-inference-server/user-guide/docs/user_guide/metrics.html for metrics"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": 3,
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-07-31T17:54:23.090209Z",
+     "iopub.status.busy": "2023-07-31T17:54:23.089654Z",
+     "iopub.status.idle": "2023-07-31T17:54:23.100644Z",
+     "shell.execute_reply": "2023-07-31T17:54:23.100011Z",
+     "shell.execute_reply.started": "2023-07-31T17:54:23.090177Z"
+    }
+   },
    "outputs": [],
    "source": [
     "def task(x):\n",
-    "    lang_pair_map = list({'eng-hin': 1, 'hin-eng': 2, 'tel-eng': 4, 'hin-tel': 6, 'tel-hin': 7, 'eng-guj': 8, 'guj-eng': 9}.keys())\n",
+    "    lang_pair_map = list({'en-hi': 1, 'hi-en': 2, 'te-en': 4, 'hi-te': 6, 'te-hi': 7, 'en-gu': 8, 'gu-en': 9}.keys())\n",
     "    with httpclient.InferenceServerClient(\"localhost:8000\") as client:\n",
     "        async_responses = []\n",
     "        for i in range(10):\n",
-    "            s = 'this is a sentence.'\n",
+    "            s = ' '.join(rs.random_words(randrange(MIN_WORDS, MAX_WORDS)) + ['.']) # 'this is a sentence.' Use a constant sentence if you want to hit the cache\n",
     "            source_data = np.array([[s]], dtype='object')\n",
     "            inputs = [httpclient.InferInput(\"INPUT_TEXT\", source_data.shape, np_to_triton_dtype(source_data.dtype)), httpclient.InferInput(\"INPUT_LANGUAGE_ID\", source_data.shape, np_to_triton_dtype(source_data.dtype)), httpclient.InferInput(\"OUTPUT_LANGUAGE_ID\", source_data.shape, np_to_triton_dtype(source_data.dtype))]\n",
     "            inputs[0].set_data_from_numpy(np.array([[s]], dtype='object'))\n",
@@ -51,13 +86,21 @@
   {
    "cell_type": "code",
    "execution_count": 4,
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-07-31T17:55:24.235964Z",
+     "iopub.status.busy": "2023-07-31T17:55:24.235574Z",
+     "iopub.status.idle": "2023-07-31T17:58:30.757911Z",
+     "shell.execute_reply": "2023-07-31T17:58:30.756271Z",
+     "shell.execute_reply.started": "2023-07-31T17:55:24.235935Z"
+    }
+   },
    "outputs": [
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|██████████| 1000/1000 [01:49<00:00,  9.15it/s]\n"
+      "100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1000/1000 [03:06<00:00,  5.36it/s]\n"
      ]
     }
    ],
@@ -69,9 +112,9 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "model_metrics",
+   "display_name": "mt-model-deploy-dhruva",
    "language": "python",
-   "name": "model_metrics"
+   "name": "mt-model-deploy-dhruva"
   },
   "language_info": {
    "codemirror_mode": {
@@ -83,10 +126,9 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.11.4"
-  },
-  "orig_nbformat": 4
+   "version": "3.10.12"
+  }
  },
  "nbformat": 4,
- "nbformat_minor": 2
+ "nbformat_minor": 4
 }
-- 
GitLab