Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in / Register
Toggle navigation
M
mt-model-deploy-dhruva
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Service Desk
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Operations
Operations
Environments
Packages & Registries
Packages & Registries
Package Registry
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
ssmt
mt-model-deploy-dhruva
Commits
e7e3fdf4
Commit
e7e3fdf4
authored
Sep 18, 2023
by
Nikhilesh Bhatnagar
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
fixes for urdu and punctuation
parent
a3a78224
Changes
6
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
81 additions
and
13 deletions
+81
-13
Dockerfile
Dockerfile
+4
-2
make_ct2_models.sh
make_ct2_models.sh
+65
-0
triton_models/demuxer/1/model.py
triton_models/demuxer/1/model.py
+3
-4
triton_models/model_ct2/1/model.py
triton_models/model_ct2/1/model.py
+3
-2
triton_models/model_onmt/1/model.py
triton_models/model_onmt/1/model.py
+3
-2
triton_models/tokenizer/1/model.py
triton_models/tokenizer/1/model.py
+3
-3
No files found.
Dockerfile
View file @
e7e3fdf4
...
...
@@ -5,5 +5,7 @@ ENV VIRTUAL_ENV=/opt/dhruva-mt
RUN
python3
-m
venv
$VIRTUAL_ENV
ENV
PATH="$VIRTUAL_ENV/bin:$PATH"
RUN
pip
install
-U
ctranslate2 OpenNMT-py
==
1.2.0 git+https://github.com/vmujadia/tokenizer.git
CMD
["tritonserver", "--model-repository=/models", "--cache-config=local,size=1048576"]
EXPOSE
8000
CMD
["tritonserver", "--model-repository=/models", "--http-port=8010", "--grpc-port=8020", "--metrics-port=8030", "--cache-config=local,size=1048576"]
EXPOSE
8010
EXPOSE
8020
EXPOSE
8030
make_ct2_models.sh
0 → 100644
View file @
e7e3fdf4
#!/bin/bash
python3
-m
venv ./ssmt_ct2
source
./ssmt_ct2/bin/activate
pip
install
-U
pip wheel
;
pip
install
ctranslate2
"OpenNMT-py==1.2.0"
unzip ~/projects/himangy_models/models.zip
;
mv
models himangy-v0.1
;
cd
himangy-v0.1
ct2-opennmt-py-converter
--model_path
1.pt
--quantization
"int8"
--output_dir
en-hi
;
mv
1.src en-hi.src
;
rm
1.pt
ct2-opennmt-py-converter
--model_path
2.pt
--quantization
"int8"
--output_dir
hi-en
;
mv
2.src hi-en.src
;
rm
2.pt
mv
3.pt en-te.pt
;
mv
3.src en-te.src
ct2-opennmt-py-converter
--model_path
4.pt
--quantization
"int8"
--output_dir
te-en
;
mv
4.src te-en.src
;
rm
4.pt
ct2-opennmt-py-converter
--model_path
6.pt
--quantization
"int8"
--output_dir
hi-te
;
mv
6.src hi-te.src
;
rm
6.pt
ct2-opennmt-py-converter
--model_path
7.pt
--quantization
"int8"
--output_dir
te-hi
;
mv
7.src te-hi.src
;
rm
7.pt
ct2-opennmt-py-converter
--model_path
8.pt
--quantization
"int8"
--output_dir
en-gu
;
mv
8.src en-gu.src
;
rm
8.pt
ct2-opennmt-py-converter
--model_path
9.pt
--quantization
"int8"
--output_dir
gu-en
;
mv
9.src gu-en.src
;
rm
9.pt
cd
..
unzip ~/projects/himangy_models/HimangY-oneMT-Models-V1.zip
;
mv
HimangY-oneMT-Models-V1 himangy-v1.0
;
cd
himangy-v1.0
ct2-opennmt-py-converter
--model_path
150001.pt
--quantization
"int8"
--output_dir
en-hi
;
mv
150001.src en-hi.src
;
rm
150001.pt
ct2-opennmt-py-converter
--model_path
150002.pt
--quantization
"int8"
--output_dir
hi-en
;
mv
150002.src hi-en.src
;
rm
150002.pt
ct2-opennmt-py-converter
--model_path
150003.pt
--quantization
"int8"
--output_dir
en-te
;
mv
150003.src en-te.src
;
rm
150003.pt
ct2-opennmt-py-converter
--model_path
150004.pt
--quantization
"int8"
--output_dir
te-en
;
mv
150004.src te-en.src
;
rm
150004.pt
ct2-opennmt-py-converter
--model_path
150005.pt
--quantization
"int8"
--output_dir
hi-te
;
mv
150005.src hi-te.src
;
rm
150005.pt
ct2-opennmt-py-converter
--model_path
150006.pt
--quantization
"int8"
--output_dir
te-hi
;
mv
150006.src te-hi.src
;
rm
150006.pt
ct2-opennmt-py-converter
--model_path
150007.pt
--quantization
"int8"
--output_dir
hi-ur
;
mv
150007.src hi-ur.src
;
rm
150007.pt
ct2-opennmt-py-converter
--model_path
150008.pt
--quantization
"int8"
--output_dir
ur-hi
;
mv
150008.src ur-hi.src
;
rm
150008.pt
mv
150009.pt hi-gu.pt
;
mv
150009.src hi-gu.src
ct2-opennmt-py-converter
--model_path
150010.pt
--quantization
"int8"
--output_dir
gu-hi
;
mv
150010.src gu-hi.src
;
rm
150010.pt
mv
150011.pt hi-pa.pt
;
mv
150011.src hi-pa.src
ct2-opennmt-py-converter
--model_path
150013.pt
--quantization
"int8"
--output_dir
hi-or
;
mv
150013.src hi-or.src
;
rm
150013.pt
ct2-opennmt-py-converter
--model_path
150014.pt
--quantization
"int8"
--output_dir
or-hi
;
mv
150014.src or-hi.src
;
rm
150014.pt
ct2-opennmt-py-converter
--model_path
150015.pt
--quantization
"int8"
--output_dir
hi-ta
;
mv
150015.src hi-ta.src
;
rm
150015.pt
ct2-opennmt-py-converter
--model_path
150017.pt
--quantization
"int8"
--output_dir
hi-kn
;
mv
150017.src hi-kn.src
;
rm
150017.pt
ct2-opennmt-py-converter
--model_path
150018.pt
--quantization
"int8"
--output_dir
kn-hi
;
mv
150018.src kn-hi.src
;
rm
150018.pt
ct2-opennmt-py-converter
--model_path
150019.pt
--quantization
"int8"
--output_dir
ta-te
;
mv
150019.src ta-te.src
;
rm
150019.pt
ct2-opennmt-py-converter
--model_path
150020.pt
--quantization
"int8"
--output_dir
te-ta
;
mv
150020.src te-ta.src
;
rm
150020.pt
cd
..
unzip ~/projects/himangy_models/v2.5-Himangy.zip
-d
himangy-v2.5
;
cd
himangy-v2.5
ct2-opennmt-py-converter
--model_path
en-hi.pt
--quantization
"int8"
--output_dir
en-hi
;
rm
en-hi.pt
ct2-opennmt-py-converter
--model_path
en-te.pt
--quantization
"int8"
--output_dir
en-te
;
rm
en-te.pt
ct2-opennmt-py-converter
--model_path
hi-en.pt
--quantization
"int8"
--output_dir
hi-en
;
rm
hi-en.pt
ct2-opennmt-py-converter
--model_path
te-en.pt
--quantization
"int8"
--output_dir
te-en
;
rm
te-en.pt
cd
..
mkdir
himangy-ct2
mv
himangy-v2.5/en-hi himangy-ct2
;
mv
himangy-v2.5/en-hi.src himangy-ct2
mv
himangy-v2.5/hi-en himangy-ct2
;
mv
himangy-v2.5/hi-en.src himangy-ct2
mv
himangy-v2.5/en-te himangy-ct2
;
mv
himangy-v2.5/en-te.src himangy-ct2
mv
himangy-v2.5/te-en himangy-ct2
;
mv
himangy-v2.5/te-en.src himangy-ct2
mv
himangy-v1.0/hi-te himangy-ct2
;
mv
himangy-v1.0/hi-te.src himangy-ct2
mv
himangy-v1.0/te-hi himangy-ct2
;
mv
himangy-v1.0/te-hi.src himangy-ct2
mv
himangy-v1.0/hi-ur himangy-ct2
;
mv
himangy-v1.0/hi-ur.src himangy-ct2
mv
himangy-v1.0/ur-hi himangy-ct2
;
mv
himangy-v1.0/ur-hi.src himangy-ct2
mv
himangy-v1.0/hi-gu.pt himangy-ct2
;
mv
himangy-v1.0/hi-gu.src himangy-ct2
mv
himangy-v1.0/gu-hi himangy-ct2
;
mv
himangy-v1.0/gu-hi.src himangy-ct2
mv
himangy-v1.0/hi-pa.pt himangy-ct2
;
mv
himangy-v1.0/hi-pa.src himangy-ct2
mv
himangy-v1.0/hi-or himangy-ct2
;
mv
himangy-v1.0/hi-or.src himangy-ct2
mv
himangy-v1.0/or-hi himangy-ct2
;
mv
himangy-v1.0/or-hi.src himangy-ct2
mv
himangy-v1.0/hi-ta himangy-ct2
;
mv
himangy-v1.0/hi-ta.src himangy-ct2
mv
himangy-v1.0/hi-kn himangy-ct2
;
mv
himangy-v1.0/hi-kn.src himangy-ct2
mv
himangy-v1.0/kn-hi himangy-ct2
;
mv
himangy-v1.0/kn-hi.src himangy-ct2
mv
himangy-v1.0/ta-te himangy-ct2
;
mv
himangy-v1.0/ta-te.src himangy-ct2
mv
himangy-v1.0/te-ta himangy-ct2
;
mv
himangy-v1.0/te-ta.src himangy-ct2
mv
himangy-v0.1/en-gu himangy-ct2
;
mv
himangy-v0.1/en-gu.src himangy-ct2
mv
himangy-v0.1/gu-en himangy-ct2
;
mv
himangy-v0.1/gu-en.src himangy-ct2
rm
-rf
himangy-v0.1 himangy-v1.0 himangy-v2.5
zip
-9
-r
himangy-ct2.zip himangy-ct2
rm
-rf
himangy-ct2
deactivate
;
rm
-rf
ssmt_ct2
triton_models/demuxer/1/model.py
View file @
e7e3fdf4
...
...
@@ -25,7 +25,6 @@ class TritonPythonModel:
result
,
"OUTPUT_SENT"
)
.
as_numpy
()[
0
,
0
]
.
decode
(
"utf-8"
)
]
for
result
in
(
await
asyncio
.
gather
(
*
awaits
))
],
...
...
@@ -37,14 +36,14 @@ class TritonPythonModel:
for
awaits
in
[
[
pb_utils
.
InferenceRequest
(
model_name
=
f"himangy-
{
input_language_id
[
0
].
decode
(
'utf-8'
)
}
-
{
output_language_id
[
0
].
decode
(
'utf-8'
)
}
"
,
model_name
=
f"himangy-
{
input_language_id
[
0
].
decode
(
'utf-8'
)
.
split
(
'_'
,
maxsplit
=
1
)[
0
]
}
-
{
output_language_id
[
0
].
decode
(
'utf-8'
).
split
(
'_'
,
maxsplit
=
1
)[
0
]
}
"
,
requested_output_names
=
[
"OUTPUT_SENT"
],
inputs
=
[
pb_utils
.
Tensor
(
"INPUT_SENT_TOKENIZED"
,
numpy
.
array
(
[[
input_text_tokenized
[
0
]
.
decode
(
"utf-8"
)
]],
dtype
=
"object"
,
[[
input_text_tokenized
[
0
]]],
dtype
=
self
.
target_dtype
,
),
)
],
...
...
triton_models/model_ct2/1/model.py
View file @
e7e3fdf4
...
...
@@ -34,6 +34,7 @@ class TritonPythonModel:
def
clean_output
(
self
,
text
):
text
=
text
.
replace
(
"@@ "
,
""
)
text
=
text
.
replace
(
"
\u200c
"
,
""
)
text
=
text
.
replace
(
" ?"
,
"?"
).
replace
(
" !"
,
"!"
).
replace
(
" ."
,
"."
).
replace
(
" ,"
,
","
)
if
text
.
startswith
(
"<to-gu> "
):
text
=
text
[
8
:]
if
text
.
endswith
(
" <to-gu>"
):
...
...
@@ -68,8 +69,8 @@ class TritonPythonModel:
pb_utils
.
Tensor
(
"OUTPUT_SENT"
,
numpy
.
array
(
[[
s
]
for
s
in
islice
(
tgt_sentences
,
bsize
)],
dtype
=
"object"
)
.
astype
(
self
.
target_dtype
)
,
[[
s
.
encode
(
'utf-8'
)]
for
s
in
islice
(
tgt_sentences
,
bsize
)],
dtype
=
self
.
target_dtype
),
)
]
)
...
...
triton_models/model_onmt/1/model.py
View file @
e7e3fdf4
...
...
@@ -93,6 +93,7 @@ class TritonPythonModel:
def
clean_output
(
self
,
text
):
text
=
text
.
replace
(
"@@ "
,
""
)
text
=
text
.
replace
(
"
\u200c
"
,
""
)
text
=
text
.
replace
(
" ?"
,
"?"
).
replace
(
" !"
,
"!"
).
replace
(
" ."
,
"."
).
replace
(
" ,"
,
","
)
if
text
.
startswith
(
"<to-gu> "
):
text
=
text
[
8
:]
if
text
.
endswith
(
" <to-gu>"
):
...
...
@@ -120,8 +121,8 @@ class TritonPythonModel:
pb_utils
.
Tensor
(
"OUTPUT_SENT"
,
numpy
.
array
(
[[
s
]
for
s
in
islice
(
tgt_sentences
,
bsize
)],
dtype
=
"object"
)
.
astype
(
self
.
target_dtype
)
,
[[
s
]
for
s
in
islice
(
tgt_sentences
,
bsize
)],
dtype
=
self
.
target_dtype
),
)
]
)
...
...
triton_models/tokenizer/1/model.py
View file @
e7e3fdf4
...
...
@@ -45,13 +45,13 @@ class TritonPythonModel:
for
tokenized_sents
in
(
(
self
.
bpes
[
f"
{
input_language_id
[
0
].
decode
(
'utf-8'
)
}
-
{
output_language_id
[
0
].
decode
(
'utf-8'
)
}
"
f"
{
input_language_id
[
0
].
decode
(
'utf-8'
)
.
split
(
'_'
,
maxsplit
=
1
)[
0
]
}
-
{
output_language_id
[
0
].
decode
(
'utf-8'
).
split
(
'_'
,
maxsplit
=
1
)[
0
]
}
"
]
.
segment
(
self
.
preprocess_text
(
tokenizer
.
tokenize
(
input_text
[
0
].
decode
(
"utf-8"
).
lower
()),
input_language_id
[
0
].
decode
(
"utf-8"
),
output_language_id
[
0
].
decode
(
"utf-8"
),
input_language_id
[
0
].
decode
(
"utf-8"
)
.
split
(
'_'
,
maxsplit
=
1
)[
0
]
,
output_language_id
[
0
].
decode
(
"utf-8"
)
.
split
(
'_'
,
maxsplit
=
1
)[
0
]
,
)
)
.
strip
()
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment