Commit ae25953d authored by Nikhilesh Bhatnagar's avatar Nikhilesh Bhatnagar

Bugfixes. Temporarily removed quantization.

parent 4dddbf5b
FROM nvcr.io/nvidia/tritonserver:23.06-py3 FROM nvcr.io/nvidia/tritonserver:23.07-py3
WORKDIR /opt/tritonserver WORKDIR /opt/tritonserver
RUN apt-get update && apt-get install -y python3.10-venv RUN apt-get update && apt-get install -y python3.10-venv
ENV VIRTUAL_ENV=/opt/dhruva-mt ENV VIRTUAL_ENV=/opt/dhruva-mt
......
...@@ -7,7 +7,7 @@ ...@@ -7,7 +7,7 @@
This repo contains code for python backend CTranslate2 based triton models for the SSMT project. This repo contains code for python backend CTranslate2 based triton models for the SSMT project.
Prerequisites: `python3.xx-venv`, `nvidia-docker` Prerequisites: `python3.xx-venv`, `nvidia-docker`
```bash ```bash
git clone http://ssmt.iiit.ac.in/meitygit/ssmt/mt-model-deploy-dhruva.git git clone https://ssmt.iiit.ac.in/meitygit/ssmt/mt-model-deploy-dhruva.git
cd mt-model-deploy-dhruva cd mt-model-deploy-dhruva
sh make_triton_model_repo.sh "https://ssmt.iiit.ac.in/uploads/data_mining/models.zip" "float16" sh make_triton_model_repo.sh "https://ssmt.iiit.ac.in/uploads/data_mining/models.zip" "float16"
docker build -t dhruva/ssmt-model-server:1 . docker build -t dhruva/ssmt-model-server:1 .
...@@ -18,11 +18,11 @@ nvidia-docker run --gpus=all --rm --shm-size 5g --network=host --name dhruva-ssm ...@@ -18,11 +18,11 @@ nvidia-docker run --gpus=all --rm --shm-size 5g --network=host --name dhruva-ssm
* This repo contains the templates and component triton models for the SSMT project. * This repo contains the templates and component triton models for the SSMT project.
* Also contained is a Dockerfile to construct the triton server instance. * Also contained is a Dockerfile to construct the triton server instance.
* Given a URL and quantization method (those supported by CTranslate2 i.e. `int8`, `int8_float16`, `int8_bfloat16`, `int16`, `float16` and `bfloat16`) it will download, quantize and construct the SSMT Triton Repository in `./ssmt_triton_repo`. * Given a URL and quantization method (those supported by CTranslate2 i.e. `int8`, `int8_float16`, `int8_bfloat16`, `int16`, `float16` and `bfloat16`) it will download, quantize and construct the SSMT Triton Repository in `./ssmt_triton_repo` (disabled, will be enabled once testing is performed on representative hardware).
* Dynamic batching and caching is supported and enabled by default. * Dynamic batching and caching is supported and enabled by default.
* The repository folder can me mounted to the dhruva ssmt triton server on `/models` and can be queried via a client. * The repository folder can me mounted to the dhruva ssmt triton server on `/models` and can be queried via a client.
* Sample client code is also given as an ipython notebook. * Sample client code is also given as an ipython notebook.
* The `model.zip` package needs to contain a folder of `.pt` and `.src` files named `1` through `9` with each file corresponding to the following mapping: `{'en-hi': 1, 'hi-en': 2, 'en-te': 3, 'te-en': 4, 'hi-te': 6, 'te-hi': 7, 'en-gu': 8, 'gu-en': 9}` * The `model.zip` package needs to contain a folder of `.pt` and `.src` files named `1` through `9` with each file corresponding to the following mapping: `{'en-hi': 1, 'hi-en': 2, 'te-en': 4, 'hi-te': 6, 'te-hi': 7, 'en-gu': 8, 'gu-en': 9}`
## Architecture of the pipeline ## Architecture of the pipeline
...@@ -35,7 +35,7 @@ The pipeline consists of 4 components, executed in order: ...@@ -35,7 +35,7 @@ The pipeline consists of 4 components, executed in order:
The exact specifications of the model inputs and outputs can be looked at in the corresponding `config.pbtxt` files. The exact specifications of the model inputs and outputs can be looked at in the corresponding `config.pbtxt` files.
One can construct the triton repo like so: One can construct the triton repo like so:
```bash ```bash
git clone http://ssmt.iiit.ac.in/meitygit/ssmt/mt-model-deploy-dhruva.git git clone https://ssmt.iiit.ac.in/meitygit/ssmt/mt-model-deploy-dhruva.git
cd mt-model-deploy-dhruva cd mt-model-deploy-dhruva
sh make_triton_model_repo.sh "https://ssmt.iiit.ac.in/uploads/data_mining/models.zip" "float16" sh make_triton_model_repo.sh "https://ssmt.iiit.ac.in/uploads/data_mining/models.zip" "float16"
``` ```
......
...@@ -6,20 +6,21 @@ python3 -m venv ./ssmt_ct2 ...@@ -6,20 +6,21 @@ python3 -m venv ./ssmt_ct2
source ./ssmt_ct2/bin/activate source ./ssmt_ct2/bin/activate
pip install ctranslate2 "OpenNMT-py==1.2.0" pip install ctranslate2 "OpenNMT-py==1.2.0"
cd models cd models
ct2-opennmt-py-converter --model_path 1.pt --quantization $QUANTIZATION --output_dir ./1_ct2 ct2-opennmt-py-converter --model_path 1.pt --output_dir ./1_ct2
ct2-opennmt-py-converter --model_path 2.pt --quantization $QUANTIZATION --output_dir ./2_ct2 ct2-opennmt-py-converter --model_path 2.pt --output_dir ./2_ct2
ct2-opennmt-py-converter --model_path 3.pt --quantization $QUANTIZATION --output_dir ./3_ct2 # ct2-opennmt-py-converter --model_path 3.pt --output_dir ./3_ct2
ct2-opennmt-py-converter --model_path 4.pt --quantization $QUANTIZATION --output_dir ./4_ct2 ct2-opennmt-py-converter --model_path 4.pt --output_dir ./4_ct2
ct2-opennmt-py-converter --model_path 6.pt --quantization $QUANTIZATION --output_dir ./6_ct2 ct2-opennmt-py-converter --model_path 6.pt --output_dir ./6_ct2
ct2-opennmt-py-converter --model_path 7.pt --quantization $QUANTIZATION --output_dir ./7_ct2 ct2-opennmt-py-converter --model_path 7.pt --output_dir ./7_ct2
ct2-opennmt-py-converter --model_path 8.pt --quantization $QUANTIZATION --output_dir ./8_ct2 ct2-opennmt-py-converter --model_path 8.pt --output_dir ./8_ct2
ct2-opennmt-py-converter --model_path 9.pt --quantization $QUANTIZATION --output_dir ./9_ct2 ct2-opennmt-py-converter --model_path 9.pt --output_dir ./9_ct2
cd .. cd ..
mkdir ssmt_triton_repo mkdir ssmt_triton_repo
cd ssmt_triton_repo cd ssmt_triton_repo
cp -r ../triton_models/ssmt_pipeline . cp -r ../triton_models/ssmt_pipeline .
cp -r ../triton_models/ssmt_model_demuxer . cp -r ../triton_models/ssmt_model_demuxer .
cp -r ../triton_models/ssmt_tokenizer . cp -r ../triton_models/ssmt_tokenizer .
mkdir ssmt_tokenizer/1/bpe_src
cp -r ../models/*.src ssmt_tokenizer/1/bpe_src cp -r ../models/*.src ssmt_tokenizer/1/bpe_src
cp -r ../triton_models/ssmt_template_model_repo ssmt_1_ct2 cp -r ../triton_models/ssmt_template_model_repo ssmt_1_ct2
cp -r ../models/1_ct2 ssmt_1_ct2/1/translator cp -r ../models/1_ct2 ssmt_1_ct2/1/translator
...@@ -27,9 +28,9 @@ sed -i 's/model_name/ssmt_1_ct2/' ssmt_1_ct2/config.pbtxt ...@@ -27,9 +28,9 @@ sed -i 's/model_name/ssmt_1_ct2/' ssmt_1_ct2/config.pbtxt
cp -r ../triton_models/ssmt_template_model_repo ssmt_2_ct2 cp -r ../triton_models/ssmt_template_model_repo ssmt_2_ct2
cp -r ../models/2_ct2 ssmt_2_ct2/1/translator cp -r ../models/2_ct2 ssmt_2_ct2/1/translator
sed -i 's/model_name/ssmt_2_ct2/' ssmt_2_ct2/config.pbtxt sed -i 's/model_name/ssmt_2_ct2/' ssmt_2_ct2/config.pbtxt
cp -r ../triton_models/ssmt_template_model_repo ssmt_3_ct2 # cp -r ../triton_models/ssmt_template_model_repo ssmt_3_ct2
cp -r ../models/3_ct2 ssmt_3_ct2/1/translator # cp -r ../models/3_ct2 ssmt_3_ct2/1/translator
sed -i 's/model_name/ssmt_3_ct2/' ssmt_3_ct2/config.pbtxt # sed -i 's/model_name/ssmt_3_ct2/' ssmt_3_ct2/config.pbtxt
cp -r ../triton_models/ssmt_template_model_repo ssmt_4_ct2 cp -r ../triton_models/ssmt_template_model_repo ssmt_4_ct2
cp -r ../models/4_ct2 ssmt_4_ct2/1/translator cp -r ../models/4_ct2 ssmt_4_ct2/1/translator
sed -i 's/model_name/ssmt_4_ct2/' ssmt_4_ct2/config.pbtxt sed -i 's/model_name/ssmt_4_ct2/' ssmt_4_ct2/config.pbtxt
......
...@@ -31,7 +31,7 @@ ...@@ -31,7 +31,7 @@
"outputs": [], "outputs": [],
"source": [ "source": [
"def task(x):\n", "def task(x):\n",
" lang_pair_map = list({'eng-hin': 1, 'hin-eng': 2, 'eng-tel':3, 'tel-eng': 4, 'hin-tel': 6, 'tel-hin': 7, 'eng-guj': 8, 'guj-eng': 9}.keys())\n", " lang_pair_map = list({'eng-hin': 1, 'hin-eng': 2, 'tel-eng': 4, 'hin-tel': 6, 'tel-hin': 7, 'eng-guj': 8, 'guj-eng': 9}.keys())\n",
" with httpclient.InferenceServerClient(\"localhost:8000\") as client:\n", " with httpclient.InferenceServerClient(\"localhost:8000\") as client:\n",
" async_responses = []\n", " async_responses = []\n",
" for i in range(10):\n", " for i in range(10):\n",
......
...@@ -7,7 +7,7 @@ class TritonPythonModel: ...@@ -7,7 +7,7 @@ class TritonPythonModel:
self.model_config = json.loads(args["model_config"]) self.model_config = json.loads(args["model_config"])
target_config = pb_utils.get_output_config_by_name(self.model_config, "OUTPUT_TEXT") target_config = pb_utils.get_output_config_by_name(self.model_config, "OUTPUT_TEXT")
self.target_dtype = pb_utils.triton_string_to_numpy(target_config["data_type"]) self.target_dtype = pb_utils.triton_string_to_numpy(target_config["data_type"])
self.lang_pair_map = {'en-hi': 1, 'hi-en': 2, 'en-te': 3, 'te-en': 4, 'hi-te': 6, 'te-hi': 7, 'en-gu': 8, 'gu-en': 9} self.lang_pair_map = {'en-hi': 1, 'hi-en': 2, 'te-en': 4, 'hi-te': 6, 'te-hi': 7, 'en-gu': 8, 'gu-en': 9}
async def execute(self, requests): async def execute(self, requests):
responses = [] responses = []
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment