Merge branch 'master' of https://ssmt.iiit.ac.in/meitygit/ssmt/mt-model-deploy-dhruva

cf66dd5e · Nikhilesh Bhatnagar · 8cf79c13 · 081781ca · cf66dd5e · cf66dd5e
Commit cf66dd5e authored Aug 17, 2023 by Nikhilesh Bhatnagar
Hide whitespace changes
Inline Side-by-side

Showing with 69 additions and 11 deletions

README.md README.md +6 -4

make_triton_model_repo.sh make_triton_model_repo.sh +4 -2

triton_client.ipynb triton_client.ipynb +59 -5

No files found.
--- a/README.md
+++ b/README.md
@@ -5,11 +5,13 @@
 ## TL;DR
 This repo contains code for python backend CTranslate2 based triton models for the SSMT project.
-Prerequisites: `python3.xx-venv`, `nvidia-docker`
+Prerequisites: `python3.xx-venv`, `nvidia-docker`, `bash`
+Quantization disabled until qualitative testing is performed. For now, the argument `int8` does nothing.
 ```bash
 git clone https://ssmt.iiit.ac.in/meitygit/ssmt/mt-model-deploy-dhruva.git
 cd mt-model-deploy-dhruva
-sh make_triton_model_repo.sh "https://ssmt.iiit.ac.in/uploads/data_mining/models.zip" "float16"
+bash make_triton_model_repo.sh "https://ssmt.iiit.ac.in/uploads/data_mining/models.zip" "int8"
 docker build -t dhruva/ssmt-model-server:1 .
 nvidia-docker run --gpus=all --rm --shm-size 5g --network=host --name dhruva-ssmt-triton-server -v./ssmt_triton_repo:/models dhruva/ssmt-model-server:1
 ```
@@ -18,7 +20,7 @@ nvidia-docker run --gpus=all --rm --shm-size 5g --network=host --name dhruva-ssm
 * This repo contains the templates and component triton models for the SSMT project.
 * Also contained is a Dockerfile to construct the triton server instance.
-* Given a URL and quantization method (those supported by CTranslate2 i.e. `int8`, `int8_float16`, `int8_bfloat16`, `int16`, `float16` and `bfloat16`) it will download, quantize and construct the SSMT Triton Repository in `./ssmt_triton_repo` (disabled, will be enabled once testing is performed on representative hardware).
+* Given a URL and quantization method (those supported by CTranslate2 i.e. `int8`, `int8_float16`, `int8_bfloat16`, `int16`, `float16` and `bfloat16`) it will download, quantize and construct the SSMT Triton Repository in `./ssmt_triton_repo` (int8 is the most efficient in size and speed on NVIDIA T4).
 * Dynamic batching and caching is supported and enabled by default.
 * The repository folder can me mounted to the dhruva ssmt triton server on `/models` and can be queried via a client.
 * Sample client code is also given as an ipython notebook.
@@ -37,7 +39,7 @@ One can construct the triton repo like so:
 ```bash
 git clone https://ssmt.iiit.ac.in/meitygit/ssmt/mt-model-deploy-dhruva.git
 cd mt-model-deploy-dhruva
-sh make_triton_model_repo.sh "https://ssmt.iiit.ac.in/uploads/data_mining/models.zip" "float16"
+bash make_triton_model_repo.sh "https://ssmt.iiit.ac.in/uploads/data_mining/models.zip" "int8"
 ```
 ## Starting the triton server

--- a/make_triton_model_repo.sh
+++ b/make_triton_model_repo.sh
+#!/bin/bash
 MODELS_URL=$1
 QUANTIZATION=$2
-wget -O models.zip $MODELS_URL
+wget -O models.zip $MODELS_URL --no-check-certificate
 unzip models.zip
 python3 -m venv ./ssmt_ct2
 source ./ssmt_ct2/bin/activate
+pip install -U pip wheel
 pip install ctranslate2 "OpenNMT-py==1.2.0"
 cd models
 ct2-opennmt-py-converter --model_path 1.pt --output_dir ./1_ct2
@@ -46,7 +48,7 @@ cp -r ../triton_models/ssmt_template_model_repo ssmt_9_ct2
 cp -r ../models/9_ct2 ssmt_9_ct2/1/translator
 sed -i 's/model_name/ssmt_9_ct2/' ssmt_9_ct2/config.pbtxt
 cd ..
-source deactivate
+deactivate
 rm -rf ssmt_ct2
 rm -f models.zip
 rm -rf models
--- a/triton_client.ipynb
+++ b/triton_client.ipynb
@@ -100,21 +100,75 @@
     "name": "stderr",
     "output_type": "stream",
     "text": [
-      "100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1000/1000 [03:06<00:00,  5.36it/s]\n"
+      "100%|██████████| 1000/1000 [07:11<00:00,  2.32it/s]\n"
     ]
    }
   ],
   "source": [
-    "with ThreadPool(100) as pool:\n",
+    "with ThreadPool(100) as pool: # float32 cpu load - 5.7 ram - 10.2 G gpu util - 100% vram - 4.7 G gpu wattage - 70 W\n",
+    "    for output in tqdm(pool.imap_unordered(task, range(1000), chunksize=1), total=1000): pass"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "100%|██████████| 1000/1000 [05:09<00:00,  3.23it/s]\n"
+     ]
+    }
+   ],
+   "source": [
+    "with ThreadPool(100) as pool: # float16 cpu load - 5.2 ram - 10.3 G gpu util - 99% vram - 3.5 G gpu wattage - 65 W\n",
+    "    for output in tqdm(pool.imap_unordered(task, range(1000), chunksize=1), total=1000): pass"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "100%|██████████| 1000/1000 [05:20<00:00,  3.12it/s]\n"
+     ]
+    }
+   ],
+   "source": [
+    "with ThreadPool(100) as pool: # int8_float16 cpu load - 5.7 ram - 10.3 G gpu util - 98% vram - 2.5 G gpu wattage - 61 W\n",
+    "    for output in tqdm(pool.imap_unordered(task, range(1000), chunksize=1), total=1000): pass"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "100%|██████████| 1000/1000 [05:09<00:00,  3.23it/s]\n"
+     ]
+    }
+   ],
+   "source": [
+    "with ThreadPool(100) as pool: # int8 cpu load - 5.1 ram - 10.3 G gpu util - 97% vram - 2.5 G gpu wattage - 60 W\n",
    "    for output in tqdm(pool.imap_unordered(task, range(1000), chunksize=1), total=1000): pass"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
-   "display_name": "mt-model-deploy-dhruva",
+   "display_name": ".venv",
   "language": "python",
-   "name": "mt-model-deploy-dhruva"
+   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
@@ -126,7 +180,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.10.12"
+   "version": "3.11.4"
  }
 },
 "nbformat": 4,