From 0eb35ddac51b3a9e9ca4023f7a56ce747e44b3d3 Mon Sep 17 00:00:00 2001 From: Nikhilesh Bhatnagar Date: Mon, 4 Sep 2023 06:57:02 +0000 Subject: [PATCH] Added 11 more language pairs. Support for greater than 1 sentences per request. Support for different language pairs in the same request. --- .gitignore | 1 + README.md | 16 ++- make_triton_model_repo.sh | 111 ++++++++++-------- triton_models/demuxer/1/model.py | 9 ++ .../config.pbtxt | 8 +- .../1/model.py | 13 +- .../config.pbtxt | 7 +- triton_models/model_onmt/1/model.py | 32 +++++ triton_models/model_onmt/config.pbtxt | 32 +++++ .../{ssmt_pipeline => nmt}/1/.gitkeep | 0 .../{ssmt_pipeline => nmt}/config.pbtxt | 10 +- triton_models/ssmt_model_demuxer/1/model.py | 22 ---- triton_models/ssmt_tokenizer/1/model.py | 28 ----- .../1/apply_bpe.py | 0 .../1/bpe_src/.gitkeep | 0 triton_models/tokenizer/1/model.py | 13 ++ .../config.pbtxt | 8 +- 17 files changed, 175 insertions(+), 135 deletions(-) create mode 100644 triton_models/demuxer/1/model.py rename triton_models/{ssmt_model_demuxer => demuxer}/config.pbtxt (88%) rename triton_models/{ssmt_template_model_repo => model_ct2}/1/model.py (84%) rename triton_models/{ssmt_template_model_repo => model_ct2}/config.pbtxt (83%) create mode 100644 triton_models/model_onmt/1/model.py create mode 100644 triton_models/model_onmt/config.pbtxt rename triton_models/{ssmt_pipeline => nmt}/1/.gitkeep (100%) rename triton_models/{ssmt_pipeline => nmt}/config.pbtxt (91%) delete mode 100644 triton_models/ssmt_model_demuxer/1/model.py delete mode 100644 triton_models/ssmt_tokenizer/1/model.py rename triton_models/{ssmt_tokenizer => tokenizer}/1/apply_bpe.py (100%) rename triton_models/{ssmt_tokenizer => tokenizer}/1/bpe_src/.gitkeep (100%) create mode 100644 triton_models/tokenizer/1/model.py rename triton_models/{ssmt_tokenizer => tokenizer}/config.pbtxt (88%) diff --git a/.gitignore b/.gitignore index 7048a1d..c7d65da 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,2 @@ ssmt_triton_repo +himangy_triton_repo \ No newline at end of file diff --git a/README.md b/README.md index 9fd8eda..78caaff 100644 --- a/README.md +++ b/README.md @@ -11,20 +11,18 @@ Quantization disabled until qualitative testing is performed. For now, the argum ```bash git clone https://ssmt.iiit.ac.in/meitygit/ssmt/mt-model-deploy-dhruva.git cd mt-model-deploy-dhruva -bash make_triton_model_repo.sh "https://ssmt.iiit.ac.in/uploads/data_mining/models.zip" "int8" -docker build -t dhruva/ssmt-model-server:1 . -nvidia-docker run --gpus=all --rm --shm-size 5g --network=host --name dhruva-ssmt-triton-server -v./ssmt_triton_repo:/models dhruva/ssmt-model-server:1 +bash make_triton_model_repo.sh +docker build -t dhruva/himangy-model-server:1 . +nvidia-docker run --gpus=all --rm --shm-size 5g --network=host --name dhruva-himangy-triton-server -v./himangy_triton_repo:/models dhruva/himangy-model-server:1 ``` ## What this repo does * This repo contains the templates and component triton models for the SSMT project. * Also contained is a Dockerfile to construct the triton server instance. -* Given a URL and quantization method (those supported by CTranslate2 i.e. `int8`, `int8_float16`, `int8_bfloat16`, `int16`, `float16` and `bfloat16`) it will download, quantize and construct the SSMT Triton Repository in `./ssmt_triton_repo` (int8 is the most efficient in size and speed on NVIDIA T4). * Dynamic batching and caching is supported and enabled by default. -* The repository folder can me mounted to the dhruva ssmt triton server on `/models` and can be queried via a client. +* The repository folder can me mounted to the dhruva himangy triton server on `/models` and can be queried via a client. * Sample client code is also given as an ipython notebook. -* The `model.zip` package needs to contain a folder of `.pt` and `.src` files named `1` through `9` with each file corresponding to the following mapping: `{'en-hi': 1, 'hi-en': 2, 'te-en': 4, 'hi-te': 6, 'te-hi': 7, 'en-gu': 8, 'gu-en': 9}` ## Architecture of the pipeline @@ -39,15 +37,15 @@ One can construct the triton repo like so: ```bash git clone https://ssmt.iiit.ac.in/meitygit/ssmt/mt-model-deploy-dhruva.git cd mt-model-deploy-dhruva -bash make_triton_model_repo.sh "https://ssmt.iiit.ac.in/uploads/data_mining/models.zip" "int8" +bash make_triton_model_repo.sh ``` ## Starting the triton server We customize the tritonserver image with the required python packages in a venv and enable the cache in the startup command. After the model repo has beeen built, one can build and run the server like so: ```bash -docker build -t dhruva/ssmt-model-server:1 . -nvidia-docker run --gpus=all --rm --shm-size 5g --network=host --name dhruva-ssmt-triton-server -v./ssmt_triton_repo:/models dhruva/ssmt-model-server:1 +docker build -t dhruva/himangy-model-server:1 . +nvidia-docker run --gpus=all --rm --shm-size 5g --network=host --name dhruva-himangy-triton-server -v./himangy_triton_repo:/models dhruva/himangy-model-server:1 ``` ## Querying the triton server diff --git a/make_triton_model_repo.sh b/make_triton_model_repo.sh index 075b4ed..8b58ed1 100644 --- a/make_triton_model_repo.sh +++ b/make_triton_model_repo.sh @@ -1,54 +1,71 @@ #!/bin/bash -MODELS_URL=$1 -QUANTIZATION=$2 -wget -O models.zip $MODELS_URL --no-check-certificate +wget -O models.zip "https://ssmt.iiit.ac.in/uploads/data_mining/models.zip" --no-check-certificate +wget "https://cdn.iiit.ac.in/cdn/ssmt.iiit.ac.in/uploads/data_mining/HimangY-oneMT-Models-V1.zip" --no-check-certificate unzip models.zip +unzip HimangY-oneMT-Models-V1.zip python3 -m venv ./ssmt_ct2 source ./ssmt_ct2/bin/activate -pip install -U pip wheel -pip install ctranslate2 "OpenNMT-py==1.2.0" -cd models -ct2-opennmt-py-converter --model_path 1.pt --output_dir ./1_ct2 -ct2-opennmt-py-converter --model_path 2.pt --output_dir ./2_ct2 -# ct2-opennmt-py-converter --model_path 3.pt --output_dir ./3_ct2 -ct2-opennmt-py-converter --model_path 4.pt --output_dir ./4_ct2 -ct2-opennmt-py-converter --model_path 6.pt --output_dir ./6_ct2 -ct2-opennmt-py-converter --model_path 7.pt --output_dir ./7_ct2 -ct2-opennmt-py-converter --model_path 8.pt --output_dir ./8_ct2 -ct2-opennmt-py-converter --model_path 9.pt --output_dir ./9_ct2 +pip install -U pip wheel; pip install ctranslate2 "OpenNMT-py==1.2.0" +cd HimangY-oneMT-Models-V1 +ct2-opennmt-py-converter --model_path 150001.pt --output_dir en-hi; mv 150001.src en-hi.src +ct2-opennmt-py-converter --model_path 150002.pt --output_dir hi-en; mv 150002.src hi-en.src +ct2-opennmt-py-converter --model_path 150003.pt --output_dir en-te; mv 150003.src en-te.src +ct2-opennmt-py-converter --model_path 150004.pt --output_dir te-en; mv 150004.src te-en.src +ct2-opennmt-py-converter --model_path 150005.pt --output_dir hi-te; mv 150005.src hi-te.src +ct2-opennmt-py-converter --model_path 150006.pt --output_dir te-hi; mv 150006.src te-hi.src +ct2-opennmt-py-converter --model_path 150007.pt --output_dir hi-ur; mv 150007.src hi-ur.src +ct2-opennmt-py-converter --model_path 150008.pt --output_dir ur-hi; mv 150008.src ur-hi.src +# ct2-opennmt-py-converter --model_path 150009.pt --output_dir hi-gu; mv 150009.src hi-gu.src +ct2-opennmt-py-converter --model_path 150010.pt --output_dir gu-hi; mv 150010.src gu-hi.src +# ct2-opennmt-py-converter --model_path 150011.pt --output_dir hi-pa; mv 150011.src hi-pa.src +ct2-opennmt-py-converter --model_path 150013.pt --output_dir hi-or; mv 150013.src hi-or.src +ct2-opennmt-py-converter --model_path 150014.pt --output_dir or-hi; mv 150014.src or-hi.src +ct2-opennmt-py-converter --model_path 150015.pt --output_dir hi-ta; mv 150015.src hi-ta.src +ct2-opennmt-py-converter --model_path 150017.pt --output_dir hi-kn; mv 150017.src hi-kn.src +ct2-opennmt-py-converter --model_path 150018.pt --output_dir kn-hi; mv 150018.src kn-hi.src +ct2-opennmt-py-converter --model_path 150019.pt --output_dir ta-te; mv 150019.src ta-te.src +ct2-opennmt-py-converter --model_path 150020.pt --output_dir te-ta; mv 150020.src te-ta.src +cd ../models +ct2-opennmt-py-converter --model_path 1.pt --output_dir en-hi; mv 1.src en-hi.src +ct2-opennmt-py-converter --model_path 2.pt --output_dir hi-en; mv 2.src hi-en.src +# ct2-opennmt-py-converter --model_path 3.pt --output_dir en-te; mv 3.src en-te.src +ct2-opennmt-py-converter --model_path 4.pt --output_dir te-en; mv 4.src te-en.src +ct2-opennmt-py-converter --model_path 6.pt --output_dir hi-te; mv 6.src hi-te.src +ct2-opennmt-py-converter --model_path 7.pt --output_dir te-hi; mv 7.src te-hi.src +ct2-opennmt-py-converter --model_path 8.pt --output_dir en-gu; mv 8.src en-gu.src +ct2-opennmt-py-converter --model_path 9.pt --output_dir gu-en; mv 9.src gu-en.src cd .. -mkdir ssmt_triton_repo -cd ssmt_triton_repo -cp -r ../triton_models/ssmt_pipeline nmt -cp -r ../triton_models/ssmt_model_demuxer . -cp -r ../triton_models/ssmt_tokenizer . -cp -r ../models/*.src ssmt_tokenizer/1/bpe_src -cp -r ../triton_models/ssmt_template_model_repo ssmt_1_ct2 -cp -r ../models/1_ct2 ssmt_1_ct2/1/translator -sed -i 's/model_name/ssmt_1_ct2/' ssmt_1_ct2/config.pbtxt -cp -r ../triton_models/ssmt_template_model_repo ssmt_2_ct2 -cp -r ../models/2_ct2 ssmt_2_ct2/1/translator -sed -i 's/model_name/ssmt_2_ct2/' ssmt_2_ct2/config.pbtxt -# cp -r ../triton_models/ssmt_template_model_repo ssmt_3_ct2 -# cp -r ../models/3_ct2 ssmt_3_ct2/1/translator -# sed -i 's/model_name/ssmt_3_ct2/' ssmt_3_ct2/config.pbtxt -cp -r ../triton_models/ssmt_template_model_repo ssmt_4_ct2 -cp -r ../models/4_ct2 ssmt_4_ct2/1/translator -sed -i 's/model_name/ssmt_4_ct2/' ssmt_4_ct2/config.pbtxt -cp -r ../triton_models/ssmt_template_model_repo ssmt_6_ct2 -cp -r ../models/6_ct2 ssmt_6_ct2/1/translator -sed -i 's/model_name/ssmt_6_ct2/' ssmt_6_ct2/config.pbtxt -cp -r ../triton_models/ssmt_template_model_repo ssmt_7_ct2 -cp -r ../models/7_ct2 ssmt_7_ct2/1/translator -sed -i 's/model_name/ssmt_7_ct2/' ssmt_7_ct2/config.pbtxt -cp -r ../triton_models/ssmt_template_model_repo ssmt_8_ct2 -cp -r ../models/8_ct2 ssmt_8_ct2/1/translator -sed -i 's/model_name/ssmt_8_ct2/' ssmt_8_ct2/config.pbtxt -cp -r ../triton_models/ssmt_template_model_repo ssmt_9_ct2 -cp -r ../models/9_ct2 ssmt_9_ct2/1/translator -sed -i 's/model_name/ssmt_9_ct2/' ssmt_9_ct2/config.pbtxt +mkdir himangy_triton_repo +cd himangy_triton_repo +cp -r ../triton_models/nmt . +cp -r ../triton_models/demuxer . +cp -r ../triton_models/tokenizer . +cp -r ../triton_models/model_ct2 himangy-en-hi;mv ../HimangY-oneMT-Models-V1/en-hi himangy-en-hi/1/translator;mv ../HimangY-oneMT-Models-V1/en-hi.src tokenizer/1/bpe_src;sed -i 's/model_name/himangy-en-hi/' himangy-en-hi/config.pbtxt;sed -i "s/input_lang, output_lang/'en', 'hi'/" himangy-en-hi/1/model.py +cp -r ../triton_models/model_ct2 himangy-hi-en;mv ../HimangY-oneMT-Models-V1/hi-en himangy-hi-en/1/translator;mv ../HimangY-oneMT-Models-V1/hi-en.src tokenizer/1/bpe_src;sed -i 's/model_name/himangy-hi-en/' himangy-hi-en/config.pbtxt;sed -i "s/input_lang, output_lang/'hi', 'en'/" himangy-hi-en/1/model.py +cp -r ../triton_models/model_ct2 himangy-en-te;mv ../HimangY-oneMT-Models-V1/en-te himangy-en-te/1/translator;mv ../HimangY-oneMT-Models-V1/en-te.src tokenizer/1/bpe_src;sed -i 's/model_name/himangy-en-te/' himangy-en-te/config.pbtxt;sed -i "s/input_lang, output_lang/'en', 'te'/" himangy-en-te/1/model.py +cp -r ../triton_models/model_ct2 himangy-te-en;mv ../HimangY-oneMT-Models-V1/te-en himangy-te-en/1/translator;mv ../HimangY-oneMT-Models-V1/te-en.src tokenizer/1/bpe_src;sed -i 's/model_name/himangy-te-en/' himangy-te-en/config.pbtxt;sed -i "s/input_lang, output_lang/'te', 'en'/" himangy-te-en/1/model.py +cp -r ../triton_models/model_ct2 himangy-hi-te;mv ../HimangY-oneMT-Models-V1/hi-te himangy-hi-te/1/translator;mv ../HimangY-oneMT-Models-V1/hi-te.src tokenizer/1/bpe_src;sed -i 's/model_name/himangy-hi-te/' himangy-hi-te/config.pbtxt;sed -i "s/input_lang, output_lang/'hi', 'te'/" himangy-hi-te/1/model.py +cp -r ../triton_models/model_ct2 himangy-te-hi;mv ../HimangY-oneMT-Models-V1/te-hi himangy-te-hi/1/translator;mv ../HimangY-oneMT-Models-V1/te-hi.src tokenizer/1/bpe_src;sed -i 's/model_name/himangy-te-hi/' himangy-te-hi/config.pbtxt;sed -i "s/input_lang, output_lang/'te', 'hi'/" himangy-te-hi/1/model.py +cp -r ../triton_models/model_ct2 himangy-hi-ur;mv ../HimangY-oneMT-Models-V1/hi-ur himangy-hi-ur/1/translator;mv ../HimangY-oneMT-Models-V1/hi-ur.src tokenizer/1/bpe_src;sed -i 's/model_name/himangy-hi-ur/' himangy-hi-ur/config.pbtxt;sed -i "s/input_lang, output_lang/'hi', 'ur'/" himangy-hi-ur/1/model.py +cp -r ../triton_models/model_ct2 himangy-ur-hi;mv ../HimangY-oneMT-Models-V1/ur-hi himangy-ur-hi/1/translator;mv ../HimangY-oneMT-Models-V1/ur-hi.src tokenizer/1/bpe_src;sed -i 's/model_name/himangy-ur-hi/' himangy-ur-hi/config.pbtxt;sed -i "s/input_lang, output_lang/'ur', 'hi'/" himangy-ur-hi/1/model.py +cp -r ../triton_models/model_onmt himangy-hi-gu;mv ../HimangY-oneMT-Models-V1/150009.pt himangy-hi-gu/1/translator.pt;mv ../HimangY-oneMT-Models-V1/150009.src tokenizer/1/bpe_src/hi-gu.src;sed -i 's/model_name/himangy-hi-gu/' himangy-hi-gu/config.pbtxt;sed -i "s/input_lang, output_lang/'hi', 'gu'/" himangy-hi-gu/1/model.py +cp -r ../triton_models/model_ct2 himangy-gu-hi;mv ../HimangY-oneMT-Models-V1/gu-hi himangy-gu-hi/1/translator;mv ../HimangY-oneMT-Models-V1/gu-hi.src tokenizer/1/bpe_src;sed -i 's/model_name/himangy-gu-hi/' himangy-gu-hi/config.pbtxt;sed -i "s/input_lang, output_lang/'gu', 'hi'/" himangy-gu-hi/1/model.py +cp -r ../triton_models/model_onmt himangy-hi-pa;mv ../HimangY-oneMT-Models-V1/150011.pt himangy-hi-pa/1/translator.pt;mv ../HimangY-oneMT-Models-V1/150011.src tokenizer/1/bpe_src/hi-pa.src;sed -i 's/model_name/himangy-hi-pa/' himangy-hi-pa/config.pbtxt;sed -i "s/input_lang, output_lang/'hi', 'pa'/" himangy-hi-pa/1/model.py +cp -r ../triton_models/model_ct2 himangy-hi-or;mv ../HimangY-oneMT-Models-V1/hi-or himangy-hi-or/1/translator;mv ../HimangY-oneMT-Models-V1/hi-or.src tokenizer/1/bpe_src;sed -i 's/model_name/himangy-hi-or/' himangy-hi-or/config.pbtxt;sed -i "s/input_lang, output_lang/'hi', 'or'/" himangy-hi-or/1/model.py +cp -r ../triton_models/model_ct2 himangy-or-hi;mv ../HimangY-oneMT-Models-V1/or-hi himangy-or-hi/1/translator;mv ../HimangY-oneMT-Models-V1/or-hi.src tokenizer/1/bpe_src;sed -i 's/model_name/himangy-or-hi/' himangy-or-hi/config.pbtxt;sed -i "s/input_lang, output_lang/'or', 'hi'/" himangy-or-hi/1/model.py +# cp -r ../triton_models/model_ct2 himangy-hi-ta;mv ../HimangY-oneMT-Models-V1/hi-ta himangy-hi-ta/1/translator;mv ../HimangY-oneMT-Models-V1/hi-ta.src tokenizer/1/bpe_src;sed -i 's/model_name/himangy-hi-ta/' himangy-hi-ta/config.pbtxt;sed -i "s/input_lang, output_lang/'hi', 'ta'/" himangy-hi-ta/1/model.py +cp -r ../triton_models/model_ct2 himangy-hi-kn;mv ../HimangY-oneMT-Models-V1/hi-kn himangy-hi-kn/1/translator;mv ../HimangY-oneMT-Models-V1/hi-kn.src tokenizer/1/bpe_src;sed -i 's/model_name/himangy-hi-kn/' himangy-hi-kn/config.pbtxt;sed -i "s/input_lang, output_lang/'hi', 'kn'/" himangy-hi-kn/1/model.py +cp -r ../triton_models/model_ct2 himangy-kn-hi;mv ../HimangY-oneMT-Models-V1/kn-hi himangy-kn-hi/1/translator;mv ../HimangY-oneMT-Models-V1/kn-hi.src tokenizer/1/bpe_src;sed -i 's/model_name/himangy-kn-hi/' himangy-kn-hi/config.pbtxt;sed -i "s/input_lang, output_lang/'kn', 'hi'/" himangy-kn-hi/1/model.py +# cp -r ../triton_models/model_ct2 himangy-ta-te;mv ../HimangY-oneMT-Models-V1/ta-te himangy-ta-te/1/translator;mv ../HimangY-oneMT-Models-V1/ta-te.src tokenizer/1/bpe_src;sed -i 's/model_name/himangy-ta-te/' himangy-ta-te/config.pbtxt;sed -i "s/input_lang, output_lang/'ta', 'te'/" himangy-ta-te/1/model.py +# cp -r ../triton_models/model_ct2 himangy-te-ta;mv ../HimangY-oneMT-Models-V1/te-ta himangy-te-ta/1/translator;mv ../HimangY-oneMT-Models-V1/te-ta.src tokenizer/1/bpe_src;sed -i 's/model_name/himangy-te-ta/' himangy-te-ta/config.pbtxt;sed -i "s/input_lang, output_lang/'te', 'ta'/" himangy-te-ta/1/model.py +# cp -r ../triton_models/model_ct2 himangy-en-hi;mv ../models/en-hi himangy-en-hi/1/translator;mv ../models/en-hi.src tokenizer/1/bpe_src;sed -i 's/model_name/himangy-en-hi/' himangy-en-hi/config.pbtxt;sed -i "s/input_lang, output_lang/'en', 'hi'/" himangy-en-hi/1/model.py +# cp -r ../triton_models/model_ct2 himangy-hi-en;mv ../models/hi-en himangy-hi-en/1/translator;mv ../models/hi-en.src tokenizer/1/bpe_src;sed -i 's/model_name/himangy-hi-en/' himangy-hi-en/config.pbtxt;sed -i "s/input_lang, output_lang/'hi', 'en'/" himangy-hi-en/1/model.py +# cp -r ../triton_models/model_onnmt himangy-en-te;mv ../models/3.pt himangy-en-te/1/translator.pt;mv ../models/3.src tokenizer/1/bpe_src/en-te.src;sed -i 's/model_name/himangy-en-te/' himangy-en-te/config.pbtxt;sed -i "s/input_lang, output_lang/'en', 'te'/" himangy-en-te/1/model.py +# cp -r ../triton_models/model_ct2 himangy-te-en;mv ../models/te-en himangy-te-en/1/translator;mv ../models/te-en.src tokenizer/1/bpe_src;sed -i 's/model_name/himangy-te-en/' himangy-te-en/config.pbtxt;sed -i "s/input_lang, output_lang/'te', 'en'/" himangy-te-en/1/model.py +# cp -r ../triton_models/model_ct2 himangy-hi-te;mv ../models/hi-te himangy-hi-te/1/translator;mv ../models/hi-te.src tokenizer/1/bpe_src;sed -i 's/model_name/himangy-hi-te/' himangy-hi-te/config.pbtxt;sed -i "s/input_lang, output_lang/'hi', 'te'/" himangy-hi-te/1/model.py +# cp -r ../triton_models/model_ct2 himangy-te-hi;mv ../models/te-hi himangy-te-hi/1/translator;mv ../models/te-hi.src tokenizer/1/bpe_src;sed -i 's/model_name/himangy-te-hi/' himangy-te-hi/config.pbtxt;sed -i "s/input_lang, output_lang/'te', 'hi'/" himangy-te-hi/1/model.py +cp -r ../triton_models/model_ct2 himangy-en-gu;mv ../models/en-gu himangy-en-gu/1/translator;mv ../models/en-gu.src tokenizer/1/bpe_src;sed -i 's/model_name/himangy-en-gu/' himangy-en-gu/config.pbtxt;sed -i "s/input_lang, output_lang/'en', 'gu'/" himangy-en-gu/1/model.py +cp -r ../triton_models/model_ct2 himangy-gu-en;mv ../models/gu-en himangy-gu-en/1/translator;mv ../models/gu-en.src tokenizer/1/bpe_src;sed -i 's/model_name/himangy-gu-en/' himangy-gu-en/config.pbtxt;sed -i "s/input_lang, output_lang/'gu', 'en'/" himangy-gu-en/1/model.py cd .. deactivate -rm -rf ssmt_ct2 -rm -f models.zip -rm -rf models +rm -rf ssmt_ct2;rm -rf HimangY-oneMT-Models-V1;rm -rf models;rm -f models.zip;rm -f HimangY-oneMT-Models-V1.zip \ No newline at end of file diff --git a/triton_models/demuxer/1/model.py b/triton_models/demuxer/1/model.py new file mode 100644 index 0000000..8527076 --- /dev/null +++ b/triton_models/demuxer/1/model.py @@ -0,0 +1,9 @@ +import json +import numpy +import asyncio +import triton_python_backend_utils as pb_utils + +class TritonPythonModel: + def initialize(self, args): self.target_dtype = pb_utils.triton_string_to_numpy(pb_utils.get_output_config_by_name(json.loads(args['model_config']), 'OUTPUT_TEXT')['data_type']) + async def execute(self, requests): return [pb_utils.InferenceResponse(output_tensors=[pb_utils.Tensor('OUTPUT_TEXT', numpy.array([[pb_utils.get_output_tensor_by_name(result, 'OUTPUT_SENT').as_numpy()[0, 0].decode('utf-8')] for result in (await asyncio.gather(*awaits))], dtype=self.target_dtype))]) for awaits in [[pb_utils.InferenceRequest(model_name=f"himangy-{input_language_id[0].decode('utf-8')}-{output_language_id[0].decode('utf-8')}", requested_output_names=['OUTPUT_SENT'], inputs=[pb_utils.Tensor('INPUT_SENT_TOKENIZED', numpy.array([[input_text_tokenized[0].decode('utf-8')]], dtype='object'))]).async_exec() for input_text_tokenized, input_language_id, output_language_id in zip(pb_utils.get_input_tensor_by_name(request, 'INPUT_TEXT_TOKENIZED').as_numpy(), pb_utils.get_input_tensor_by_name(request, 'INPUT_LANGUAGE_ID').as_numpy(), pb_utils.get_input_tensor_by_name(request, 'OUTPUT_LANGUAGE_ID').as_numpy())] for request in requests]] + def finalize(self): pass \ No newline at end of file diff --git a/triton_models/ssmt_model_demuxer/config.pbtxt b/triton_models/demuxer/config.pbtxt similarity index 88% rename from triton_models/ssmt_model_demuxer/config.pbtxt rename to triton_models/demuxer/config.pbtxt index 1e0a3db..109a39f 100644 --- a/triton_models/ssmt_model_demuxer/config.pbtxt +++ b/triton_models/demuxer/config.pbtxt @@ -1,4 +1,4 @@ -name: "ssmt_model_demuxer" +name: "demuxer" backend: "python" max_batch_size: 4096 @@ -39,8 +39,4 @@ instance_group [ count: 1 kind: KIND_CPU } -] - -response_cache { - enable: true -} +] \ No newline at end of file diff --git a/triton_models/ssmt_template_model_repo/1/model.py b/triton_models/model_ct2/1/model.py similarity index 84% rename from triton_models/ssmt_template_model_repo/1/model.py rename to triton_models/model_ct2/1/model.py index 5d41689..170e90a 100644 --- a/triton_models/ssmt_template_model_repo/1/model.py +++ b/triton_models/model_ct2/1/model.py @@ -8,25 +8,24 @@ import triton_python_backend_utils as pb_utils class TritonPythonModel: def initialize(self, args): current_path = os.path.dirname(os.path.abspath(__file__)) + self.source_lang, self.target_lang = input_lang, output_lang self.model_config = json.loads(args["model_config"]) self.device_id = int(json.loads(args['model_instance_device_id'])) - target_config = pb_utils.get_output_config_by_name(self.model_config, "OUTPUT_TEXT") + target_config = pb_utils.get_output_config_by_name(self.model_config, "OUTPUT_SENT") self.target_dtype = pb_utils.triton_string_to_numpy(target_config["data_type"]) try: self.translator = Translator(f"{os.path.join(current_path, 'translator')}", device="cuda", intra_threads=1, inter_threads=1, device_index=[self.device_id]) except: self.translator = Translator(f"{os.path.join(current_path, 'translator')}", device="cpu", intra_threads=4) - def clean_output(self, text): text = text.replace('@@ ', '') + text = text.replace('\u200c', '') if text.startswith(' '): text = text[8:] if text.endswith(' '): text = text[:-8] return text - def execute(self, requests): - source_list = [pb_utils.get_input_tensor_by_name(request, "INPUT_TEXT_TOKENIZED") for request in requests] + source_list = [pb_utils.get_input_tensor_by_name(request, "INPUT_SENT_TOKENIZED") for request in requests] bsize_list = [source.as_numpy().shape[0] for source in source_list] src_sentences = [s[0].decode('utf-8').strip().split(' ') for source in source_list for s in source.as_numpy()] tgt_sentences = [self.clean_output(' '.join(result.hypotheses[0])) for result in self.translator.translate_iterable(src_sentences, max_batch_size=128, max_input_length=100, max_decoding_length=100)] - responses = [pb_utils.InferenceResponse(output_tensors=[pb_utils.Tensor("OUTPUT_TEXT", numpy.array([[s]for s in islice(tgt_sentences, bsize)], dtype='object').astype(self.target_dtype))]) for bsize in bsize_list] + responses = [pb_utils.InferenceResponse(output_tensors=[pb_utils.Tensor("OUTPUT_SENT", numpy.array([[s]for s in islice(tgt_sentences, bsize)], dtype='object').astype(self.target_dtype))]) for bsize in bsize_list] return responses - - def finalize(self): self.translator.unload_model() + def finalize(self): self.translator.unload_model() \ No newline at end of file diff --git a/triton_models/ssmt_template_model_repo/config.pbtxt b/triton_models/model_ct2/config.pbtxt similarity index 83% rename from triton_models/ssmt_template_model_repo/config.pbtxt rename to triton_models/model_ct2/config.pbtxt index 0127f90..82502f2 100644 --- a/triton_models/ssmt_template_model_repo/config.pbtxt +++ b/triton_models/model_ct2/config.pbtxt @@ -4,14 +4,15 @@ max_batch_size: 512 input [ { - name: "INPUT_TEXT_TOKENIZED" + name: "INPUT_SENT_TOKENIZED" data_type: TYPE_STRING dims: [ 1 ] } ] + output [ { - name: "OUTPUT_TEXT" + name: "OUTPUT_SENT" data_type: TYPE_STRING dims: [ 1 ] } @@ -28,4 +29,4 @@ instance_group [ response_cache { enable: true -} +} \ No newline at end of file diff --git a/triton_models/model_onmt/1/model.py b/triton_models/model_onmt/1/model.py new file mode 100644 index 0000000..d52b00d --- /dev/null +++ b/triton_models/model_onmt/1/model.py @@ -0,0 +1,32 @@ +import os +import json +import numpy +from itertools import islice +from argparse import Namespace +import triton_python_backend_utils as pb_utils +from onmt.translate.translator import build_translator + +class TritonPythonModel: + def initialize(self, args): + current_path = os.path.dirname(os.path.abspath(__file__)) + self.source_lang, self.target_lang = input_lang, output_lang + self.model_config = json.loads(args["model_config"]) + self.device_id = int(json.loads(args['model_instance_device_id'])) + target_config = pb_utils.get_output_config_by_name(self.model_config, "OUTPUT_SENT") + self.target_dtype = pb_utils.triton_string_to_numpy(target_config["data_type"]) + try: self.translator = build_translator(Namespace(tgt_prefix=False, alpha=0.0, batch_type='sents', beam_size=5, beta=-0.0, block_ngram_repeat=0, coverage_penalty='none', data_type='text', dump_beam='', fp32=True, gpu=self.device_id, ignore_when_blocking=[], length_penalty='none', max_length=100, max_sent_length=None, min_length=0, models=[f"{os.path.join(current_path, 'translator.pt')}"], n_best=1, output='/dev/null', phrase_table='', random_sampling_temp=1.0, random_sampling_topk=1, ratio=-0.0, replace_unk=False, report_align=False, report_time=False, seed=829, stepwise_penalty=False, tgt=None, verbose=False), report_score=False) + except: self.translator = build_translator(Namespace(tgt_prefix=False, alpha=0.0, batch_type='sents', beam_size=5, beta=-0.0, block_ngram_repeat=0, coverage_penalty='none', data_type='text', dump_beam='', fp32=True, gpu=-1, ignore_when_blocking=[], length_penalty='none', max_length=100, max_sent_length=None, min_length=0, models=[f"{os.path.join(current_path, 'translator.pt')}"], n_best=1, output='/dev/null', phrase_table='', random_sampling_temp=1.0, random_sampling_topk=1, ratio=-0.0, replace_unk=False, report_align=False, report_time=False, seed=829, stepwise_penalty=False, tgt=None, verbose=False), report_score=False) + def clean_output(self, text): + text = text.replace('@@ ', '') + text = text.replace('\u200c', '') + if text.startswith(' '): text = text[8:] + if text.endswith(' '): text = text[:-8] + return text + def execute(self, requests): + source_list = [pb_utils.get_input_tensor_by_name(request, "INPUT_SENT_TOKENIZED") for request in requests] + bsize_list = [source.as_numpy().shape[0] for source in source_list] + src_sentences = [s[0].decode('utf-8').strip().split(' ') for source in source_list for s in source.as_numpy()] + tgt_sentences = [self.clean_output(result[0]) for result in self.translator.translate(src_sentences, batch_size=128)[1]] + responses = [pb_utils.InferenceResponse(output_tensors=[pb_utils.Tensor("OUTPUT_SENT", numpy.array([[s]for s in islice(tgt_sentences, bsize)], dtype='object').astype(self.target_dtype))]) for bsize in bsize_list] + return responses + def finalize(self): del self.translator \ No newline at end of file diff --git a/triton_models/model_onmt/config.pbtxt b/triton_models/model_onmt/config.pbtxt new file mode 100644 index 0000000..82502f2 --- /dev/null +++ b/triton_models/model_onmt/config.pbtxt @@ -0,0 +1,32 @@ +name: "model_name" +backend: "python" +max_batch_size: 512 + +input [ + { + name: "INPUT_SENT_TOKENIZED" + data_type: TYPE_STRING + dims: [ 1 ] + } +] + +output [ + { + name: "OUTPUT_SENT" + data_type: TYPE_STRING + dims: [ 1 ] + } +] + +dynamic_batching {} + +instance_group [ + { + count: 1 + kind: KIND_GPU + } +] + +response_cache { + enable: true +} \ No newline at end of file diff --git a/triton_models/ssmt_pipeline/1/.gitkeep b/triton_models/nmt/1/.gitkeep similarity index 100% rename from triton_models/ssmt_pipeline/1/.gitkeep rename to triton_models/nmt/1/.gitkeep diff --git a/triton_models/ssmt_pipeline/config.pbtxt b/triton_models/nmt/config.pbtxt similarity index 91% rename from triton_models/ssmt_pipeline/config.pbtxt rename to triton_models/nmt/config.pbtxt index 3876e72..126bc1c 100644 --- a/triton_models/ssmt_pipeline/config.pbtxt +++ b/triton_models/nmt/config.pbtxt @@ -35,7 +35,7 @@ output [ ensemble_scheduling { step [ { - model_name: "ssmt_tokenizer" + model_name: "tokenizer" model_version: 1 input_map { key: "INPUT_TEXT" @@ -55,7 +55,7 @@ ensemble_scheduling { } }, { - model_name: "ssmt_model_demuxer" + model_name: "demuxer" model_version: 1 input_map { key: "INPUT_TEXT_TOKENIZED" @@ -75,8 +75,4 @@ ensemble_scheduling { } } ] -} - -response_cache { - enable: true -} +} \ No newline at end of file diff --git a/triton_models/ssmt_model_demuxer/1/model.py b/triton_models/ssmt_model_demuxer/1/model.py deleted file mode 100644 index 7b1aa82..0000000 --- a/triton_models/ssmt_model_demuxer/1/model.py +++ /dev/null @@ -1,22 +0,0 @@ -import json -import asyncio -import triton_python_backend_utils as pb_utils - -class TritonPythonModel: - def initialize(self, args): - self.model_config = json.loads(args["model_config"]) - target_config = pb_utils.get_output_config_by_name(self.model_config, "OUTPUT_TEXT") - self.target_dtype = pb_utils.triton_string_to_numpy(target_config["data_type"]) - self.lang_pair_map = {'en-hi': 1, 'hi-en': 2, 'te-en': 4, 'hi-te': 6, 'te-hi': 7, 'en-gu': 8, 'gu-en': 9} - - async def execute(self, requests): - responses = [] - infer_response_awaits = [] - for request in requests: - language_pair = f"{pb_utils.get_input_tensor_by_name(request, 'INPUT_LANGUAGE_ID').as_numpy()[0, 0].decode('utf-8')}-{pb_utils.get_input_tensor_by_name(request, 'OUTPUT_LANGUAGE_ID').as_numpy()[0, 0].decode('utf-8')}" - inference_request = pb_utils.InferenceRequest(model_name=f'ssmt_{self.lang_pair_map[language_pair]}_ct2', requested_output_names=['OUTPUT_TEXT'], inputs=[pb_utils.get_input_tensor_by_name(request, 'INPUT_TEXT_TOKENIZED')]) - infer_response_awaits.append(inference_request.async_exec()) - responses = await asyncio.gather(*infer_response_awaits) - return responses - - def finalize(self): pass diff --git a/triton_models/ssmt_tokenizer/1/model.py b/triton_models/ssmt_tokenizer/1/model.py deleted file mode 100644 index 06d9cf8..0000000 --- a/triton_models/ssmt_tokenizer/1/model.py +++ /dev/null @@ -1,28 +0,0 @@ -import os -import json -import numpy -from .apply_bpe import BPE -from ilstokenizer import tokenizer -import triton_python_backend_utils as pb_utils - -class TritonPythonModel: - def initialize(self, args): - current_path = os.path.dirname(os.path.abspath(__file__)) - self.model_config = json.loads(args["model_config"]) - target_config = pb_utils.get_output_config_by_name(self.model_config, "INPUT_TEXT_TOKENIZED") - self.target_dtype = pb_utils.triton_string_to_numpy(target_config["data_type"]) - self.lang_pair_map = {'en-hi': 1, 'hi-en': 2, 'en-te': 3, 'te-en': 4, 'hi-te': 6, 'te-hi': 7, 'en-gu': 8, 'gu-en': 9} - self.bpes = {lang_pair: BPE(open(os.path.join(current_path, f'bpe_src/{model_id}.src'), encoding='utf-8')) for lang_pair, model_id in self.lang_pair_map.items()} - - def tokenize_and_segment(self, input_text, source_lang, target_lang): - tokenized_text = tokenizer.tokenize(input_text) - if source_lang == 'en' and target_lang == 'gu': tokenized_text = f' {tokenized_text} ' - return self.bpes[f'{source_lang}-{target_lang}'].segment(tokenized_text).strip() - - def execute(self, requests): - source_gen = ((pb_utils.get_input_tensor_by_name(request, "INPUT_TEXT"), pb_utils.get_input_tensor_by_name(request, "INPUT_LANGUAGE_ID"), pb_utils.get_input_tensor_by_name(request, "OUTPUT_LANGUAGE_ID")) for request in requests) - tokenized_gen = ((self.tokenize_and_segment(input_text[0].decode('utf-8'), input_language_id[0].decode('utf-8'), output_language_id[0].decode('utf-8')) for input_text, input_language_id, output_language_id in zip(input_texts.as_numpy(), input_language_ids.as_numpy(), output_language_ids.as_numpy())) for input_texts, input_language_ids, output_language_ids in source_gen) - responses = [pb_utils.InferenceResponse(output_tensors=[pb_utils.Tensor("INPUT_TEXT_TOKENIZED", numpy.array([[tokenized_sent] for tokenized_sent in tokenized_sents], dtype=self.target_dtype))]) for tokenized_sents in tokenized_gen] - return responses - - def finalize(self): pass diff --git a/triton_models/ssmt_tokenizer/1/apply_bpe.py b/triton_models/tokenizer/1/apply_bpe.py similarity index 100% rename from triton_models/ssmt_tokenizer/1/apply_bpe.py rename to triton_models/tokenizer/1/apply_bpe.py diff --git a/triton_models/ssmt_tokenizer/1/bpe_src/.gitkeep b/triton_models/tokenizer/1/bpe_src/.gitkeep similarity index 100% rename from triton_models/ssmt_tokenizer/1/bpe_src/.gitkeep rename to triton_models/tokenizer/1/bpe_src/.gitkeep diff --git a/triton_models/tokenizer/1/model.py b/triton_models/tokenizer/1/model.py new file mode 100644 index 0000000..4dad844 --- /dev/null +++ b/triton_models/tokenizer/1/model.py @@ -0,0 +1,13 @@ +import os +import json +import numpy +from glob import iglob +from .apply_bpe import BPE +from ilstokenizer import tokenizer +import triton_python_backend_utils as pb_utils + +class TritonPythonModel: + def initialize(self, args): self.target_dtype, self.bpes = pb_utils.triton_string_to_numpy(pb_utils.get_output_config_by_name(json.loads(args["model_config"]), "INPUT_TEXT_TOKENIZED")["data_type"]), {fname.rsplit('/', maxsplit=1)[-1][:-len('.src')]: BPE(open(fname, 'r', encoding='utf-8')) for fname in iglob(f'{os.path.dirname(os.path.abspath(__file__))}/bpe_src/*.src')} + def preprocess_text(self, text, source_lang, target_lang): return f" {text} " if source_lang == 'en' and target_lang == 'gu' else text + def execute(self, requests): return [pb_utils.InferenceResponse(output_tensors=[pb_utils.Tensor("INPUT_TEXT_TOKENIZED", numpy.array([[tokenized_sent] for tokenized_sent in tokenized_sents], dtype=self.target_dtype))]) for tokenized_sents in ((self.bpes[f"{input_language_id[0].decode('utf-8')}-{output_language_id[0].decode('utf-8')}"].segment(self.preprocess_text(tokenizer.tokenize(input_text[0].decode('utf-8').lower()), input_language_id[0].decode('utf-8'), output_language_id[0].decode('utf-8'))).strip() for input_text, input_language_id, output_language_id in zip(input_texts.as_numpy(), input_language_ids.as_numpy(), output_language_ids.as_numpy())) for input_texts, input_language_ids, output_language_ids in ((pb_utils.get_input_tensor_by_name(request, "INPUT_TEXT"), pb_utils.get_input_tensor_by_name(request, "INPUT_LANGUAGE_ID"), pb_utils.get_input_tensor_by_name(request, "OUTPUT_LANGUAGE_ID")) for request in requests))] + def finalize(self): pass \ No newline at end of file diff --git a/triton_models/ssmt_tokenizer/config.pbtxt b/triton_models/tokenizer/config.pbtxt similarity index 88% rename from triton_models/ssmt_tokenizer/config.pbtxt rename to triton_models/tokenizer/config.pbtxt index 051e2e4..5cfe82a 100644 --- a/triton_models/ssmt_tokenizer/config.pbtxt +++ b/triton_models/tokenizer/config.pbtxt @@ -1,4 +1,4 @@ -name: "ssmt_tokenizer" +name: "tokenizer" backend: "python" max_batch_size: 4096 @@ -39,8 +39,4 @@ instance_group [ count: 8 kind: KIND_CPU } -] - -response_cache { - enable: true -} +] \ No newline at end of file -- GitLab