Commit ae25953d authored by Nikhilesh Bhatnagar's avatar Nikhilesh Bhatnagar

Bugfixes. Temporarily removed quantization.

parent 4dddbf5b
FROM nvcr.io/nvidia/tritonserver:23.06-py3
FROM nvcr.io/nvidia/tritonserver:23.07-py3
WORKDIR /opt/tritonserver
RUN apt-get update && apt-get install -y python3.10-venv
ENV VIRTUAL_ENV=/opt/dhruva-mt
......
......@@ -7,7 +7,7 @@
This repo contains code for python backend CTranslate2 based triton models for the SSMT project.
Prerequisites: `python3.xx-venv`, `nvidia-docker`
```bash
git clone http://ssmt.iiit.ac.in/meitygit/ssmt/mt-model-deploy-dhruva.git
git clone https://ssmt.iiit.ac.in/meitygit/ssmt/mt-model-deploy-dhruva.git
cd mt-model-deploy-dhruva
sh make_triton_model_repo.sh "https://ssmt.iiit.ac.in/uploads/data_mining/models.zip" "float16"
docker build -t dhruva/ssmt-model-server:1 .
......@@ -18,11 +18,11 @@ nvidia-docker run --gpus=all --rm --shm-size 5g --network=host --name dhruva-ssm
* This repo contains the templates and component triton models for the SSMT project.
* Also contained is a Dockerfile to construct the triton server instance.
* Given a URL and quantization method (those supported by CTranslate2 i.e. `int8`, `int8_float16`, `int8_bfloat16`, `int16`, `float16` and `bfloat16`) it will download, quantize and construct the SSMT Triton Repository in `./ssmt_triton_repo`.
* Given a URL and quantization method (those supported by CTranslate2 i.e. `int8`, `int8_float16`, `int8_bfloat16`, `int16`, `float16` and `bfloat16`) it will download, quantize and construct the SSMT Triton Repository in `./ssmt_triton_repo` (disabled, will be enabled once testing is performed on representative hardware).
* Dynamic batching and caching is supported and enabled by default.
* The repository folder can me mounted to the dhruva ssmt triton server on `/models` and can be queried via a client.
* Sample client code is also given as an ipython notebook.
* The `model.zip` package needs to contain a folder of `.pt` and `.src` files named `1` through `9` with each file corresponding to the following mapping: `{'en-hi': 1, 'hi-en': 2, 'en-te': 3, 'te-en': 4, 'hi-te': 6, 'te-hi': 7, 'en-gu': 8, 'gu-en': 9}`
* The `model.zip` package needs to contain a folder of `.pt` and `.src` files named `1` through `9` with each file corresponding to the following mapping: `{'en-hi': 1, 'hi-en': 2, 'te-en': 4, 'hi-te': 6, 'te-hi': 7, 'en-gu': 8, 'gu-en': 9}`
## Architecture of the pipeline
......@@ -35,7 +35,7 @@ The pipeline consists of 4 components, executed in order:
The exact specifications of the model inputs and outputs can be looked at in the corresponding `config.pbtxt` files.
One can construct the triton repo like so:
```bash
git clone http://ssmt.iiit.ac.in/meitygit/ssmt/mt-model-deploy-dhruva.git
git clone https://ssmt.iiit.ac.in/meitygit/ssmt/mt-model-deploy-dhruva.git
cd mt-model-deploy-dhruva
sh make_triton_model_repo.sh "https://ssmt.iiit.ac.in/uploads/data_mining/models.zip" "float16"
```
......
......@@ -6,20 +6,21 @@ python3 -m venv ./ssmt_ct2
source ./ssmt_ct2/bin/activate
pip install ctranslate2 "OpenNMT-py==1.2.0"
cd models
ct2-opennmt-py-converter --model_path 1.pt --quantization $QUANTIZATION --output_dir ./1_ct2
ct2-opennmt-py-converter --model_path 2.pt --quantization $QUANTIZATION --output_dir ./2_ct2
ct2-opennmt-py-converter --model_path 3.pt --quantization $QUANTIZATION --output_dir ./3_ct2
ct2-opennmt-py-converter --model_path 4.pt --quantization $QUANTIZATION --output_dir ./4_ct2
ct2-opennmt-py-converter --model_path 6.pt --quantization $QUANTIZATION --output_dir ./6_ct2
ct2-opennmt-py-converter --model_path 7.pt --quantization $QUANTIZATION --output_dir ./7_ct2
ct2-opennmt-py-converter --model_path 8.pt --quantization $QUANTIZATION --output_dir ./8_ct2
ct2-opennmt-py-converter --model_path 9.pt --quantization $QUANTIZATION --output_dir ./9_ct2
ct2-opennmt-py-converter --model_path 1.pt --output_dir ./1_ct2
ct2-opennmt-py-converter --model_path 2.pt --output_dir ./2_ct2
# ct2-opennmt-py-converter --model_path 3.pt --output_dir ./3_ct2
ct2-opennmt-py-converter --model_path 4.pt --output_dir ./4_ct2
ct2-opennmt-py-converter --model_path 6.pt --output_dir ./6_ct2
ct2-opennmt-py-converter --model_path 7.pt --output_dir ./7_ct2
ct2-opennmt-py-converter --model_path 8.pt --output_dir ./8_ct2
ct2-opennmt-py-converter --model_path 9.pt --output_dir ./9_ct2
cd ..
mkdir ssmt_triton_repo
cd ssmt_triton_repo
cp -r ../triton_models/ssmt_pipeline .
cp -r ../triton_models/ssmt_model_demuxer .
cp -r ../triton_models/ssmt_tokenizer .
mkdir ssmt_tokenizer/1/bpe_src
cp -r ../models/*.src ssmt_tokenizer/1/bpe_src
cp -r ../triton_models/ssmt_template_model_repo ssmt_1_ct2
cp -r ../models/1_ct2 ssmt_1_ct2/1/translator
......@@ -27,9 +28,9 @@ sed -i 's/model_name/ssmt_1_ct2/' ssmt_1_ct2/config.pbtxt
cp -r ../triton_models/ssmt_template_model_repo ssmt_2_ct2
cp -r ../models/2_ct2 ssmt_2_ct2/1/translator
sed -i 's/model_name/ssmt_2_ct2/' ssmt_2_ct2/config.pbtxt
cp -r ../triton_models/ssmt_template_model_repo ssmt_3_ct2
cp -r ../models/3_ct2 ssmt_3_ct2/1/translator
sed -i 's/model_name/ssmt_3_ct2/' ssmt_3_ct2/config.pbtxt
# cp -r ../triton_models/ssmt_template_model_repo ssmt_3_ct2
# cp -r ../models/3_ct2 ssmt_3_ct2/1/translator
# sed -i 's/model_name/ssmt_3_ct2/' ssmt_3_ct2/config.pbtxt
cp -r ../triton_models/ssmt_template_model_repo ssmt_4_ct2
cp -r ../models/4_ct2 ssmt_4_ct2/1/translator
sed -i 's/model_name/ssmt_4_ct2/' ssmt_4_ct2/config.pbtxt
......
......@@ -31,7 +31,7 @@
"outputs": [],
"source": [
"def task(x):\n",
" lang_pair_map = list({'eng-hin': 1, 'hin-eng': 2, 'eng-tel':3, 'tel-eng': 4, 'hin-tel': 6, 'tel-hin': 7, 'eng-guj': 8, 'guj-eng': 9}.keys())\n",
" lang_pair_map = list({'eng-hin': 1, 'hin-eng': 2, 'tel-eng': 4, 'hin-tel': 6, 'tel-hin': 7, 'eng-guj': 8, 'guj-eng': 9}.keys())\n",
" with httpclient.InferenceServerClient(\"localhost:8000\") as client:\n",
" async_responses = []\n",
" for i in range(10):\n",
......
......@@ -7,7 +7,7 @@ class TritonPythonModel:
self.model_config = json.loads(args["model_config"])
target_config = pb_utils.get_output_config_by_name(self.model_config, "OUTPUT_TEXT")
self.target_dtype = pb_utils.triton_string_to_numpy(target_config["data_type"])
self.lang_pair_map = {'en-hi': 1, 'hi-en': 2, 'en-te': 3, 'te-en': 4, 'hi-te': 6, 'te-hi': 7, 'en-gu': 8, 'gu-en': 9}
self.lang_pair_map = {'en-hi': 1, 'hi-en': 2, 'te-en': 4, 'hi-te': 6, 'te-hi': 7, 'en-gu': 8, 'gu-en': 9}
async def execute(self, requests):
responses = []
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment