From ae25953d06209249feb12fc49fbed157a940cd46 Mon Sep 17 00:00:00 2001 From: Nikhilesh Bhatnagar Date: Mon, 31 Jul 2023 15:03:40 +0000 Subject: [PATCH] Bugfixes. Temporarily removed quantization. --- Dockerfile | 2 +- README.md | 8 +++---- make_triton_model_repo.sh | 23 +++++++++++---------- triton_client.ipynb | 2 +- triton_models/ssmt_model_demuxer/1/model.py | 2 +- 5 files changed, 19 insertions(+), 18 deletions(-) diff --git a/Dockerfile b/Dockerfile index 04b307b..85ab3ee 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -FROM nvcr.io/nvidia/tritonserver:23.06-py3 +FROM nvcr.io/nvidia/tritonserver:23.07-py3 WORKDIR /opt/tritonserver RUN apt-get update && apt-get install -y python3.10-venv ENV VIRTUAL_ENV=/opt/dhruva-mt diff --git a/README.md b/README.md index 91f8988..c6c112b 100644 --- a/README.md +++ b/README.md @@ -7,7 +7,7 @@ This repo contains code for python backend CTranslate2 based triton models for the SSMT project. Prerequisites: `python3.xx-venv`, `nvidia-docker` ```bash -git clone http://ssmt.iiit.ac.in/meitygit/ssmt/mt-model-deploy-dhruva.git +git clone https://ssmt.iiit.ac.in/meitygit/ssmt/mt-model-deploy-dhruva.git cd mt-model-deploy-dhruva sh make_triton_model_repo.sh "https://ssmt.iiit.ac.in/uploads/data_mining/models.zip" "float16" docker build -t dhruva/ssmt-model-server:1 . @@ -18,11 +18,11 @@ nvidia-docker run --gpus=all --rm --shm-size 5g --network=host --name dhruva-ssm * This repo contains the templates and component triton models for the SSMT project. * Also contained is a Dockerfile to construct the triton server instance. -* Given a URL and quantization method (those supported by CTranslate2 i.e. `int8`, `int8_float16`, `int8_bfloat16`, `int16`, `float16` and `bfloat16`) it will download, quantize and construct the SSMT Triton Repository in `./ssmt_triton_repo`. +* Given a URL and quantization method (those supported by CTranslate2 i.e. `int8`, `int8_float16`, `int8_bfloat16`, `int16`, `float16` and `bfloat16`) it will download, quantize and construct the SSMT Triton Repository in `./ssmt_triton_repo` (disabled, will be enabled once testing is performed on representative hardware). * Dynamic batching and caching is supported and enabled by default. * The repository folder can me mounted to the dhruva ssmt triton server on `/models` and can be queried via a client. * Sample client code is also given as an ipython notebook. -* The `model.zip` package needs to contain a folder of `.pt` and `.src` files named `1` through `9` with each file corresponding to the following mapping: `{'en-hi': 1, 'hi-en': 2, 'en-te': 3, 'te-en': 4, 'hi-te': 6, 'te-hi': 7, 'en-gu': 8, 'gu-en': 9}` +* The `model.zip` package needs to contain a folder of `.pt` and `.src` files named `1` through `9` with each file corresponding to the following mapping: `{'en-hi': 1, 'hi-en': 2, 'te-en': 4, 'hi-te': 6, 'te-hi': 7, 'en-gu': 8, 'gu-en': 9}` ## Architecture of the pipeline @@ -35,7 +35,7 @@ The pipeline consists of 4 components, executed in order: The exact specifications of the model inputs and outputs can be looked at in the corresponding `config.pbtxt` files. One can construct the triton repo like so: ```bash -git clone http://ssmt.iiit.ac.in/meitygit/ssmt/mt-model-deploy-dhruva.git +git clone https://ssmt.iiit.ac.in/meitygit/ssmt/mt-model-deploy-dhruva.git cd mt-model-deploy-dhruva sh make_triton_model_repo.sh "https://ssmt.iiit.ac.in/uploads/data_mining/models.zip" "float16" ``` diff --git a/make_triton_model_repo.sh b/make_triton_model_repo.sh index 8b5a8fc..b2d6f07 100644 --- a/make_triton_model_repo.sh +++ b/make_triton_model_repo.sh @@ -6,20 +6,21 @@ python3 -m venv ./ssmt_ct2 source ./ssmt_ct2/bin/activate pip install ctranslate2 "OpenNMT-py==1.2.0" cd models -ct2-opennmt-py-converter --model_path 1.pt --quantization $QUANTIZATION --output_dir ./1_ct2 -ct2-opennmt-py-converter --model_path 2.pt --quantization $QUANTIZATION --output_dir ./2_ct2 -ct2-opennmt-py-converter --model_path 3.pt --quantization $QUANTIZATION --output_dir ./3_ct2 -ct2-opennmt-py-converter --model_path 4.pt --quantization $QUANTIZATION --output_dir ./4_ct2 -ct2-opennmt-py-converter --model_path 6.pt --quantization $QUANTIZATION --output_dir ./6_ct2 -ct2-opennmt-py-converter --model_path 7.pt --quantization $QUANTIZATION --output_dir ./7_ct2 -ct2-opennmt-py-converter --model_path 8.pt --quantization $QUANTIZATION --output_dir ./8_ct2 -ct2-opennmt-py-converter --model_path 9.pt --quantization $QUANTIZATION --output_dir ./9_ct2 +ct2-opennmt-py-converter --model_path 1.pt --output_dir ./1_ct2 +ct2-opennmt-py-converter --model_path 2.pt --output_dir ./2_ct2 +# ct2-opennmt-py-converter --model_path 3.pt --output_dir ./3_ct2 +ct2-opennmt-py-converter --model_path 4.pt --output_dir ./4_ct2 +ct2-opennmt-py-converter --model_path 6.pt --output_dir ./6_ct2 +ct2-opennmt-py-converter --model_path 7.pt --output_dir ./7_ct2 +ct2-opennmt-py-converter --model_path 8.pt --output_dir ./8_ct2 +ct2-opennmt-py-converter --model_path 9.pt --output_dir ./9_ct2 cd .. mkdir ssmt_triton_repo cd ssmt_triton_repo cp -r ../triton_models/ssmt_pipeline . cp -r ../triton_models/ssmt_model_demuxer . cp -r ../triton_models/ssmt_tokenizer . +mkdir ssmt_tokenizer/1/bpe_src cp -r ../models/*.src ssmt_tokenizer/1/bpe_src cp -r ../triton_models/ssmt_template_model_repo ssmt_1_ct2 cp -r ../models/1_ct2 ssmt_1_ct2/1/translator @@ -27,9 +28,9 @@ sed -i 's/model_name/ssmt_1_ct2/' ssmt_1_ct2/config.pbtxt cp -r ../triton_models/ssmt_template_model_repo ssmt_2_ct2 cp -r ../models/2_ct2 ssmt_2_ct2/1/translator sed -i 's/model_name/ssmt_2_ct2/' ssmt_2_ct2/config.pbtxt -cp -r ../triton_models/ssmt_template_model_repo ssmt_3_ct2 -cp -r ../models/3_ct2 ssmt_3_ct2/1/translator -sed -i 's/model_name/ssmt_3_ct2/' ssmt_3_ct2/config.pbtxt +# cp -r ../triton_models/ssmt_template_model_repo ssmt_3_ct2 +# cp -r ../models/3_ct2 ssmt_3_ct2/1/translator +# sed -i 's/model_name/ssmt_3_ct2/' ssmt_3_ct2/config.pbtxt cp -r ../triton_models/ssmt_template_model_repo ssmt_4_ct2 cp -r ../models/4_ct2 ssmt_4_ct2/1/translator sed -i 's/model_name/ssmt_4_ct2/' ssmt_4_ct2/config.pbtxt diff --git a/triton_client.ipynb b/triton_client.ipynb index e993bee..52fecb1 100644 --- a/triton_client.ipynb +++ b/triton_client.ipynb @@ -31,7 +31,7 @@ "outputs": [], "source": [ "def task(x):\n", - " lang_pair_map = list({'eng-hin': 1, 'hin-eng': 2, 'eng-tel':3, 'tel-eng': 4, 'hin-tel': 6, 'tel-hin': 7, 'eng-guj': 8, 'guj-eng': 9}.keys())\n", + " lang_pair_map = list({'eng-hin': 1, 'hin-eng': 2, 'tel-eng': 4, 'hin-tel': 6, 'tel-hin': 7, 'eng-guj': 8, 'guj-eng': 9}.keys())\n", " with httpclient.InferenceServerClient(\"localhost:8000\") as client:\n", " async_responses = []\n", " for i in range(10):\n", diff --git a/triton_models/ssmt_model_demuxer/1/model.py b/triton_models/ssmt_model_demuxer/1/model.py index e8c6744..7b1aa82 100644 --- a/triton_models/ssmt_model_demuxer/1/model.py +++ b/triton_models/ssmt_model_demuxer/1/model.py @@ -7,7 +7,7 @@ class TritonPythonModel: self.model_config = json.loads(args["model_config"]) target_config = pb_utils.get_output_config_by_name(self.model_config, "OUTPUT_TEXT") self.target_dtype = pb_utils.triton_string_to_numpy(target_config["data_type"]) - self.lang_pair_map = {'en-hi': 1, 'hi-en': 2, 'en-te': 3, 'te-en': 4, 'hi-te': 6, 'te-hi': 7, 'en-gu': 8, 'gu-en': 9} + self.lang_pair_map = {'en-hi': 1, 'hi-en': 2, 'te-en': 4, 'hi-te': 6, 'te-hi': 7, 'en-gu': 8, 'gu-en': 9} async def execute(self, requests): responses = [] -- GitLab