diff --git a/Dockerfile b/Dockerfile index 85ab3ee006c67afb7a60a43676d2eb6388217847..cab6282ba28e22f4b0251e2fa7b9336edfef3188 100644 --- a/Dockerfile +++ b/Dockerfile @@ -5,5 +5,7 @@ ENV VIRTUAL_ENV=/opt/dhruva-mt RUN python3 -m venv $VIRTUAL_ENV ENV PATH="$VIRTUAL_ENV/bin:$PATH" RUN pip install -U ctranslate2 OpenNMT-py==1.2.0 git+https://github.com/vmujadia/tokenizer.git -CMD ["tritonserver", "--model-repository=/models", "--cache-config=local,size=1048576"] -EXPOSE 8000 +CMD ["tritonserver", "--model-repository=/models", "--http-port=8010", "--grpc-port=8020", "--metrics-port=8030", "--cache-config=local,size=1048576"] +EXPOSE 8010 +EXPOSE 8020 +EXPOSE 8030 diff --git a/make_ct2_models.sh b/make_ct2_models.sh new file mode 100644 index 0000000000000000000000000000000000000000..68d85573ae8379d837a9c04d9e8c64cfe413767c --- /dev/null +++ b/make_ct2_models.sh @@ -0,0 +1,65 @@ +#!/bin/bash +python3 -m venv ./ssmt_ct2 +source ./ssmt_ct2/bin/activate +pip install -U pip wheel; pip install ctranslate2 "OpenNMT-py==1.2.0" +unzip ~/projects/himangy_models/models.zip;mv models himangy-v0.1;cd himangy-v0.1 +ct2-opennmt-py-converter --model_path 1.pt --quantization "int8" --output_dir en-hi;mv 1.src en-hi.src;rm 1.pt +ct2-opennmt-py-converter --model_path 2.pt --quantization "int8" --output_dir hi-en;mv 2.src hi-en.src;rm 2.pt +mv 3.pt en-te.pt;mv 3.src en-te.src +ct2-opennmt-py-converter --model_path 4.pt --quantization "int8" --output_dir te-en;mv 4.src te-en.src;rm 4.pt +ct2-opennmt-py-converter --model_path 6.pt --quantization "int8" --output_dir hi-te;mv 6.src hi-te.src;rm 6.pt +ct2-opennmt-py-converter --model_path 7.pt --quantization "int8" --output_dir te-hi;mv 7.src te-hi.src;rm 7.pt +ct2-opennmt-py-converter --model_path 8.pt --quantization "int8" --output_dir en-gu;mv 8.src en-gu.src;rm 8.pt +ct2-opennmt-py-converter --model_path 9.pt --quantization "int8" --output_dir gu-en;mv 9.src gu-en.src;rm 9.pt +cd .. +unzip ~/projects/himangy_models/HimangY-oneMT-Models-V1.zip;mv HimangY-oneMT-Models-V1 himangy-v1.0;cd himangy-v1.0 +ct2-opennmt-py-converter --model_path 150001.pt --quantization "int8" --output_dir en-hi;mv 150001.src en-hi.src;rm 150001.pt +ct2-opennmt-py-converter --model_path 150002.pt --quantization "int8" --output_dir hi-en;mv 150002.src hi-en.src;rm 150002.pt +ct2-opennmt-py-converter --model_path 150003.pt --quantization "int8" --output_dir en-te;mv 150003.src en-te.src;rm 150003.pt +ct2-opennmt-py-converter --model_path 150004.pt --quantization "int8" --output_dir te-en;mv 150004.src te-en.src;rm 150004.pt +ct2-opennmt-py-converter --model_path 150005.pt --quantization "int8" --output_dir hi-te;mv 150005.src hi-te.src;rm 150005.pt +ct2-opennmt-py-converter --model_path 150006.pt --quantization "int8" --output_dir te-hi;mv 150006.src te-hi.src;rm 150006.pt +ct2-opennmt-py-converter --model_path 150007.pt --quantization "int8" --output_dir hi-ur;mv 150007.src hi-ur.src;rm 150007.pt +ct2-opennmt-py-converter --model_path 150008.pt --quantization "int8" --output_dir ur-hi;mv 150008.src ur-hi.src;rm 150008.pt +mv 150009.pt hi-gu.pt;mv 150009.src hi-gu.src +ct2-opennmt-py-converter --model_path 150010.pt --quantization "int8" --output_dir gu-hi;mv 150010.src gu-hi.src;rm 150010.pt +mv 150011.pt hi-pa.pt;mv 150011.src hi-pa.src +ct2-opennmt-py-converter --model_path 150013.pt --quantization "int8" --output_dir hi-or;mv 150013.src hi-or.src;rm 150013.pt +ct2-opennmt-py-converter --model_path 150014.pt --quantization "int8" --output_dir or-hi;mv 150014.src or-hi.src;rm 150014.pt +ct2-opennmt-py-converter --model_path 150015.pt --quantization "int8" --output_dir hi-ta;mv 150015.src hi-ta.src;rm 150015.pt +ct2-opennmt-py-converter --model_path 150017.pt --quantization "int8" --output_dir hi-kn;mv 150017.src hi-kn.src;rm 150017.pt +ct2-opennmt-py-converter --model_path 150018.pt --quantization "int8" --output_dir kn-hi;mv 150018.src kn-hi.src;rm 150018.pt +ct2-opennmt-py-converter --model_path 150019.pt --quantization "int8" --output_dir ta-te;mv 150019.src ta-te.src;rm 150019.pt +ct2-opennmt-py-converter --model_path 150020.pt --quantization "int8" --output_dir te-ta;mv 150020.src te-ta.src;rm 150020.pt +cd .. +unzip ~/projects/himangy_models/v2.5-Himangy.zip -d himangy-v2.5;cd himangy-v2.5 +ct2-opennmt-py-converter --model_path en-hi.pt --quantization "int8" --output_dir en-hi;rm en-hi.pt +ct2-opennmt-py-converter --model_path en-te.pt --quantization "int8" --output_dir en-te;rm en-te.pt +ct2-opennmt-py-converter --model_path hi-en.pt --quantization "int8" --output_dir hi-en;rm hi-en.pt +ct2-opennmt-py-converter --model_path te-en.pt --quantization "int8" --output_dir te-en;rm te-en.pt +cd .. +mkdir himangy-ct2 +mv himangy-v2.5/en-hi himangy-ct2;mv himangy-v2.5/en-hi.src himangy-ct2 +mv himangy-v2.5/hi-en himangy-ct2;mv himangy-v2.5/hi-en.src himangy-ct2 +mv himangy-v2.5/en-te himangy-ct2;mv himangy-v2.5/en-te.src himangy-ct2 +mv himangy-v2.5/te-en himangy-ct2;mv himangy-v2.5/te-en.src himangy-ct2 +mv himangy-v1.0/hi-te himangy-ct2;mv himangy-v1.0/hi-te.src himangy-ct2 +mv himangy-v1.0/te-hi himangy-ct2;mv himangy-v1.0/te-hi.src himangy-ct2 +mv himangy-v1.0/hi-ur himangy-ct2;mv himangy-v1.0/hi-ur.src himangy-ct2 +mv himangy-v1.0/ur-hi himangy-ct2;mv himangy-v1.0/ur-hi.src himangy-ct2 +mv himangy-v1.0/hi-gu.pt himangy-ct2;mv himangy-v1.0/hi-gu.src himangy-ct2 +mv himangy-v1.0/gu-hi himangy-ct2;mv himangy-v1.0/gu-hi.src himangy-ct2 +mv himangy-v1.0/hi-pa.pt himangy-ct2;mv himangy-v1.0/hi-pa.src himangy-ct2 +mv himangy-v1.0/hi-or himangy-ct2;mv himangy-v1.0/hi-or.src himangy-ct2 +mv himangy-v1.0/or-hi himangy-ct2;mv himangy-v1.0/or-hi.src himangy-ct2 +mv himangy-v1.0/hi-ta himangy-ct2;mv himangy-v1.0/hi-ta.src himangy-ct2 +mv himangy-v1.0/hi-kn himangy-ct2;mv himangy-v1.0/hi-kn.src himangy-ct2 +mv himangy-v1.0/kn-hi himangy-ct2;mv himangy-v1.0/kn-hi.src himangy-ct2 +mv himangy-v1.0/ta-te himangy-ct2;mv himangy-v1.0/ta-te.src himangy-ct2 +mv himangy-v1.0/te-ta himangy-ct2;mv himangy-v1.0/te-ta.src himangy-ct2 +mv himangy-v0.1/en-gu himangy-ct2;mv himangy-v0.1/en-gu.src himangy-ct2 +mv himangy-v0.1/gu-en himangy-ct2;mv himangy-v0.1/gu-en.src himangy-ct2 +rm -rf himangy-v0.1 himangy-v1.0 himangy-v2.5 +zip -9 -r himangy-ct2.zip himangy-ct2 +rm -rf himangy-ct2 +deactivate;rm -rf ssmt_ct2 diff --git a/triton_models/demuxer/1/model.py b/triton_models/demuxer/1/model.py index e73b06232e9d73903aed0ef19947992b3c6c92d0..bb047aab5340b1854025a0be330acb8399366c54 100644 --- a/triton_models/demuxer/1/model.py +++ b/triton_models/demuxer/1/model.py @@ -25,7 +25,6 @@ class TritonPythonModel: result, "OUTPUT_SENT" ) .as_numpy()[0, 0] - .decode("utf-8") ] for result in (await asyncio.gather(*awaits)) ], @@ -37,14 +36,14 @@ class TritonPythonModel: for awaits in [ [ pb_utils.InferenceRequest( - model_name=f"himangy-{input_language_id[0].decode('utf-8')}-{output_language_id[0].decode('utf-8')}", + model_name=f"himangy-{input_language_id[0].decode('utf-8').split('_', maxsplit=1)[0]}-{output_language_id[0].decode('utf-8').split('_', maxsplit=1)[0]}", requested_output_names=["OUTPUT_SENT"], inputs=[ pb_utils.Tensor( "INPUT_SENT_TOKENIZED", numpy.array( - [[input_text_tokenized[0].decode("utf-8")]], - dtype="object", + [[input_text_tokenized[0]]], + dtype=self.target_dtype, ), ) ], diff --git a/triton_models/model_ct2/1/model.py b/triton_models/model_ct2/1/model.py index 09f9b3a96847d48d3f81c83acb2e4f14747d86a2..06ef312e38b06987be26671eddb58badf3656145 100644 --- a/triton_models/model_ct2/1/model.py +++ b/triton_models/model_ct2/1/model.py @@ -34,6 +34,7 @@ class TritonPythonModel: def clean_output(self, text): text = text.replace("@@ ", "") text = text.replace("\u200c", "") + text = text.replace(" ?", "?").replace(" !", "!").replace(" .", ".").replace(" ,", ",") if text.startswith(" "): text = text[8:] if text.endswith(" "): @@ -68,8 +69,8 @@ class TritonPythonModel: pb_utils.Tensor( "OUTPUT_SENT", numpy.array( - [[s] for s in islice(tgt_sentences, bsize)], dtype="object" - ).astype(self.target_dtype), + [[s.encode('utf-8')] for s in islice(tgt_sentences, bsize)], dtype=self.target_dtype + ), ) ] ) diff --git a/triton_models/model_onmt/1/model.py b/triton_models/model_onmt/1/model.py index 862b65ea397f9efe7136948626d8dc4e5d35e02c..b38a9cf759209904af338d82aff8d2592d17050b 100644 --- a/triton_models/model_onmt/1/model.py +++ b/triton_models/model_onmt/1/model.py @@ -93,6 +93,7 @@ class TritonPythonModel: def clean_output(self, text): text = text.replace("@@ ", "") text = text.replace("\u200c", "") + text = text.replace(" ?", "?").replace(" !", "!").replace(" .", ".").replace(" ,", ",") if text.startswith(" "): text = text[8:] if text.endswith(" "): @@ -120,8 +121,8 @@ class TritonPythonModel: pb_utils.Tensor( "OUTPUT_SENT", numpy.array( - [[s] for s in islice(tgt_sentences, bsize)], dtype="object" - ).astype(self.target_dtype), + [[s] for s in islice(tgt_sentences, bsize)], dtype=self.target_dtype + ), ) ] ) diff --git a/triton_models/tokenizer/1/model.py b/triton_models/tokenizer/1/model.py index c64a9ff0f6a8a1ee7ff0af3281224de2b5e797fe..3880a2eb1e07943ce9e2ab2056b28dfe6e309c2d 100644 --- a/triton_models/tokenizer/1/model.py +++ b/triton_models/tokenizer/1/model.py @@ -45,13 +45,13 @@ class TritonPythonModel: for tokenized_sents in ( ( self.bpes[ - f"{input_language_id[0].decode('utf-8')}-{output_language_id[0].decode('utf-8')}" + f"{input_language_id[0].decode('utf-8').split('_', maxsplit=1)[0]}-{output_language_id[0].decode('utf-8').split('_', maxsplit=1)[0]}" ] .segment( self.preprocess_text( tokenizer.tokenize(input_text[0].decode("utf-8").lower()), - input_language_id[0].decode("utf-8"), - output_language_id[0].decode("utf-8"), + input_language_id[0].decode("utf-8").split('_', maxsplit=1)[0], + output_language_id[0].decode("utf-8").split('_', maxsplit=1)[0], ) ) .strip()