triton_client.ipynb 6.22 KB
Newer Older
Nikhilesh Bhatnagar's avatar
Nikhilesh Bhatnagar committed
1 2 3 4 5
{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
Nikhilesh Bhatnagar's avatar
Nikhilesh Bhatnagar committed
6 7 8 9 10 11 12 13 14
   "metadata": {
    "execution": {
     "iopub.execute_input": "2023-07-31T17:54:21.337343Z",
     "iopub.status.busy": "2023-07-31T17:54:21.336919Z",
     "iopub.status.idle": "2023-07-31T17:54:21.495527Z",
     "shell.execute_reply": "2023-07-31T17:54:21.494981Z",
     "shell.execute_reply.started": "2023-07-31T17:54:21.337320Z"
    }
   },
Nikhilesh Bhatnagar's avatar
Nikhilesh Bhatnagar committed
15 16 17
   "outputs": [],
   "source": [
    "import numpy as np\n",
Nikhilesh Bhatnagar's avatar
Nikhilesh Bhatnagar committed
18
    "import wonderwords\n",
Nikhilesh Bhatnagar's avatar
Nikhilesh Bhatnagar committed
19 20
    "from tqdm import tqdm\n",
    "from tritonclient.utils import *\n",
Nikhilesh Bhatnagar's avatar
Nikhilesh Bhatnagar committed
21
    "from random import choice, randrange\n",
Nikhilesh Bhatnagar's avatar
Nikhilesh Bhatnagar committed
22 23 24 25 26 27 28
    "import tritonclient.http as httpclient\n",
    "from multiprocessing.pool import ThreadPool"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
Nikhilesh Bhatnagar's avatar
Nikhilesh Bhatnagar committed
29 30 31 32 33 34 35 36 37
   "metadata": {
    "execution": {
     "iopub.execute_input": "2023-07-31T17:54:22.003332Z",
     "iopub.status.busy": "2023-07-31T17:54:22.002684Z",
     "iopub.status.idle": "2023-07-31T17:54:22.008703Z",
     "shell.execute_reply": "2023-07-31T17:54:22.007697Z",
     "shell.execute_reply.started": "2023-07-31T17:54:22.003294Z"
    }
   },
Nikhilesh Bhatnagar's avatar
Nikhilesh Bhatnagar committed
38 39
   "outputs": [],
   "source": [
Nikhilesh Bhatnagar's avatar
Nikhilesh Bhatnagar committed
40 41
    "shape = [1]\n",
    "MIN_WORDS, MAX_WORDS = 4, 20\n",
Nikhilesh Bhatnagar's avatar
Nikhilesh Bhatnagar committed
42
    "model_name = \"ssmt_pipeline\"\n",
Nikhilesh Bhatnagar's avatar
Nikhilesh Bhatnagar committed
43 44 45 46 47 48 49 50 51
    "rs = wonderwords.RandomWord()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "* Hit the tritonserver with a random sentence to a random model\n",
    "* See https://docs.nvidia.com/deeplearning/triton-inference-server/user-guide/docs/user_guide/metrics.html for metrics"
Nikhilesh Bhatnagar's avatar
Nikhilesh Bhatnagar committed
52 53 54 55 56
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
Nikhilesh Bhatnagar's avatar
Nikhilesh Bhatnagar committed
57 58 59 60 61 62 63 64 65
   "metadata": {
    "execution": {
     "iopub.execute_input": "2023-07-31T17:54:23.090209Z",
     "iopub.status.busy": "2023-07-31T17:54:23.089654Z",
     "iopub.status.idle": "2023-07-31T17:54:23.100644Z",
     "shell.execute_reply": "2023-07-31T17:54:23.100011Z",
     "shell.execute_reply.started": "2023-07-31T17:54:23.090177Z"
    }
   },
Nikhilesh Bhatnagar's avatar
Nikhilesh Bhatnagar committed
66 67 68
   "outputs": [],
   "source": [
    "def task(x):\n",
Nikhilesh Bhatnagar's avatar
Nikhilesh Bhatnagar committed
69
    "    lang_pair_map = list({'en-hi': 1, 'hi-en': 2, 'te-en': 4, 'hi-te': 6, 'te-hi': 7, 'en-gu': 8, 'gu-en': 9}.keys())\n",
Nikhilesh Bhatnagar's avatar
Nikhilesh Bhatnagar committed
70 71 72
    "    with httpclient.InferenceServerClient(\"localhost:8000\") as client:\n",
    "        async_responses = []\n",
    "        for i in range(10):\n",
Nikhilesh Bhatnagar's avatar
Nikhilesh Bhatnagar committed
73
    "            s = ' '.join(rs.random_words(randrange(MIN_WORDS, MAX_WORDS)) + ['.']) # 'this is a sentence.' Use a constant sentence if you want to hit the cache\n",
Nikhilesh Bhatnagar's avatar
Nikhilesh Bhatnagar committed
74 75 76 77 78 79 80 81 82 83 84 85 86 87 88
    "            source_data = np.array([[s]], dtype='object')\n",
    "            inputs = [httpclient.InferInput(\"INPUT_TEXT\", source_data.shape, np_to_triton_dtype(source_data.dtype)), httpclient.InferInput(\"INPUT_LANGUAGE_ID\", source_data.shape, np_to_triton_dtype(source_data.dtype)), httpclient.InferInput(\"OUTPUT_LANGUAGE_ID\", source_data.shape, np_to_triton_dtype(source_data.dtype))]\n",
    "            inputs[0].set_data_from_numpy(np.array([[s]], dtype='object'))\n",
    "            langpair = choice(lang_pair_map)\n",
    "            inputs[1].set_data_from_numpy(np.array([[langpair.split('-')[0].strip()]], dtype='object'))\n",
    "            inputs[2].set_data_from_numpy(np.array([[langpair.split('-')[1].strip()]], dtype='object'))\n",
    "            outputs = [httpclient.InferRequestedOutput(\"OUTPUT_TEXT\")]\n",
    "            async_responses.append(client.async_infer(model_name, inputs, request_id=str(1), outputs=outputs))\n",
    "        for r in async_responses: r.get_result(timeout=10).get_response()\n",
    "    return 0"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
Nikhilesh Bhatnagar's avatar
Nikhilesh Bhatnagar committed
89 90 91 92 93 94 95 96 97
   "metadata": {
    "execution": {
     "iopub.execute_input": "2023-07-31T17:55:24.235964Z",
     "iopub.status.busy": "2023-07-31T17:55:24.235574Z",
     "iopub.status.idle": "2023-07-31T17:58:30.757911Z",
     "shell.execute_reply": "2023-07-31T17:58:30.756271Z",
     "shell.execute_reply.started": "2023-07-31T17:55:24.235935Z"
    }
   },
Nikhilesh Bhatnagar's avatar
Nikhilesh Bhatnagar committed
98 99 100 101 102
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
103
      "100%|██████████| 1000/1000 [07:11<00:00,  2.32it/s]\n"
Nikhilesh Bhatnagar's avatar
Nikhilesh Bhatnagar committed
104 105 106 107
     ]
    }
   ],
   "source": [
108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162
    "with ThreadPool(100) as pool: # float32 cpu load - 5.7 ram - 10.2 G gpu util - 100% vram - 4.7 G gpu wattage - 70 W\n",
    "    for output in tqdm(pool.imap_unordered(task, range(1000), chunksize=1), total=1000): pass"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 1000/1000 [05:09<00:00,  3.23it/s]\n"
     ]
    }
   ],
   "source": [
    "with ThreadPool(100) as pool: # float16 cpu load - 5.2 ram - 10.3 G gpu util - 99% vram - 3.5 G gpu wattage - 65 W\n",
    "    for output in tqdm(pool.imap_unordered(task, range(1000), chunksize=1), total=1000): pass"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 1000/1000 [05:20<00:00,  3.12it/s]\n"
     ]
    }
   ],
   "source": [
    "with ThreadPool(100) as pool: # int8_float16 cpu load - 5.7 ram - 10.3 G gpu util - 98% vram - 2.5 G gpu wattage - 61 W\n",
    "    for output in tqdm(pool.imap_unordered(task, range(1000), chunksize=1), total=1000): pass"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 1000/1000 [05:09<00:00,  3.23it/s]\n"
     ]
    }
   ],
   "source": [
    "with ThreadPool(100) as pool: # int8 cpu load - 5.1 ram - 10.3 G gpu util - 97% vram - 2.5 G gpu wattage - 60 W\n",
Nikhilesh Bhatnagar's avatar
Nikhilesh Bhatnagar committed
163 164 165 166 167 168
    "    for output in tqdm(pool.imap_unordered(task, range(1000), chunksize=1), total=1000): pass"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
169
   "display_name": ".venv",
Nikhilesh Bhatnagar's avatar
Nikhilesh Bhatnagar committed
170
   "language": "python",
171
   "name": "python3"
Nikhilesh Bhatnagar's avatar
Nikhilesh Bhatnagar committed
172 173 174 175 176 177 178 179 180 181 182
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
183
   "version": "3.11.4"
Nikhilesh Bhatnagar's avatar
Nikhilesh Bhatnagar committed
184
  }
Nikhilesh Bhatnagar's avatar
Nikhilesh Bhatnagar committed
185 186
 },
 "nbformat": 4,
Nikhilesh Bhatnagar's avatar
Nikhilesh Bhatnagar committed
187
 "nbformat_minor": 4
Nikhilesh Bhatnagar's avatar
Nikhilesh Bhatnagar committed
188
}