Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in / Register
Toggle navigation
M
mt-model-deploy-dhruva
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Service Desk
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Operations
Operations
Environments
Packages & Registries
Packages & Registries
Package Registry
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
ssmt
mt-model-deploy-dhruva
Commits
d0049da2
Commit
d0049da2
authored
Sep 04, 2023
by
Nikhilesh Bhatnagar
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Formatting pass.
parent
f61cdc30
Changes
11
Hide whitespace changes
Inline
Side-by-side
Showing
11 changed files
with
437 additions
and
113 deletions
+437
-113
.gitignore
.gitignore
+0
-1
triton_models/demuxer/1/model.py
triton_models/demuxer/1/model.py
+63
-3
triton_models/demuxer/config.pbtxt
triton_models/demuxer/config.pbtxt
+1
-1
triton_models/model_ct2/1/model.py
triton_models/model_ct2/1/model.py
+61
-13
triton_models/model_ct2/config.pbtxt
triton_models/model_ct2/config.pbtxt
+1
-1
triton_models/model_onmt/1/model.py
triton_models/model_onmt/1/model.py
+114
-13
triton_models/model_onmt/config.pbtxt
triton_models/model_onmt/config.pbtxt
+1
-1
triton_models/nmt/config.pbtxt
triton_models/nmt/config.pbtxt
+1
-1
triton_models/tokenizer/1/apply_bpe.py
triton_models/tokenizer/1/apply_bpe.py
+125
-74
triton_models/tokenizer/1/model.py
triton_models/tokenizer/1/model.py
+69
-4
triton_models/tokenizer/config.pbtxt
triton_models/tokenizer/config.pbtxt
+1
-1
No files found.
.gitignore
View file @
d0049da2
ssmt_triton_repo
himangy_triton_repo
\ No newline at end of file
triton_models/demuxer/1/model.py
View file @
d0049da2
...
...
@@ -3,7 +3,67 @@ import numpy
import
asyncio
import
triton_python_backend_utils
as
pb_utils
class
TritonPythonModel
:
def
initialize
(
self
,
args
):
self
.
target_dtype
=
pb_utils
.
triton_string_to_numpy
(
pb_utils
.
get_output_config_by_name
(
json
.
loads
(
args
[
'model_config'
]),
'OUTPUT_TEXT'
)[
'data_type'
])
async
def
execute
(
self
,
requests
):
return
[
pb_utils
.
InferenceResponse
(
output_tensors
=
[
pb_utils
.
Tensor
(
'OUTPUT_TEXT'
,
numpy
.
array
([[
pb_utils
.
get_output_tensor_by_name
(
result
,
'OUTPUT_SENT'
).
as_numpy
()[
0
,
0
].
decode
(
'utf-8'
)]
for
result
in
(
await
asyncio
.
gather
(
*
awaits
))],
dtype
=
self
.
target_dtype
))])
for
awaits
in
[[
pb_utils
.
InferenceRequest
(
model_name
=
f"himangy-
{
input_language_id
[
0
].
decode
(
'utf-8'
)
}
-
{
output_language_id
[
0
].
decode
(
'utf-8'
)
}
"
,
requested_output_names
=
[
'OUTPUT_SENT'
],
inputs
=
[
pb_utils
.
Tensor
(
'INPUT_SENT_TOKENIZED'
,
numpy
.
array
([[
input_text_tokenized
[
0
].
decode
(
'utf-8'
)]],
dtype
=
'object'
))]).
async_exec
()
for
input_text_tokenized
,
input_language_id
,
output_language_id
in
zip
(
pb_utils
.
get_input_tensor_by_name
(
request
,
'INPUT_TEXT_TOKENIZED'
).
as_numpy
(),
pb_utils
.
get_input_tensor_by_name
(
request
,
'INPUT_LANGUAGE_ID'
).
as_numpy
(),
pb_utils
.
get_input_tensor_by_name
(
request
,
'OUTPUT_LANGUAGE_ID'
).
as_numpy
())]
for
request
in
requests
]]
def
finalize
(
self
):
pass
\ No newline at end of file
def
initialize
(
self
,
args
):
self
.
target_dtype
=
pb_utils
.
triton_string_to_numpy
(
pb_utils
.
get_output_config_by_name
(
json
.
loads
(
args
[
"model_config"
]),
"OUTPUT_TEXT"
)[
"data_type"
]
)
async
def
execute
(
self
,
requests
):
return
[
pb_utils
.
InferenceResponse
(
output_tensors
=
[
pb_utils
.
Tensor
(
"OUTPUT_TEXT"
,
numpy
.
array
(
[
[
pb_utils
.
get_output_tensor_by_name
(
result
,
"OUTPUT_SENT"
)
.
as_numpy
()[
0
,
0
]
.
decode
(
"utf-8"
)
]
for
result
in
(
await
asyncio
.
gather
(
*
awaits
))
],
dtype
=
self
.
target_dtype
,
),
)
]
)
for
awaits
in
[
[
pb_utils
.
InferenceRequest
(
model_name
=
f"himangy-
{
input_language_id
[
0
].
decode
(
'utf-8'
)
}
-
{
output_language_id
[
0
].
decode
(
'utf-8'
)
}
"
,
requested_output_names
=
[
"OUTPUT_SENT"
],
inputs
=
[
pb_utils
.
Tensor
(
"INPUT_SENT_TOKENIZED"
,
numpy
.
array
(
[[
input_text_tokenized
[
0
].
decode
(
"utf-8"
)]],
dtype
=
"object"
,
),
)
],
).
async_exec
()
for
input_text_tokenized
,
input_language_id
,
output_language_id
in
zip
(
pb_utils
.
get_input_tensor_by_name
(
request
,
"INPUT_TEXT_TOKENIZED"
).
as_numpy
(),
pb_utils
.
get_input_tensor_by_name
(
request
,
"INPUT_LANGUAGE_ID"
).
as_numpy
(),
pb_utils
.
get_input_tensor_by_name
(
request
,
"OUTPUT_LANGUAGE_ID"
).
as_numpy
(),
)
]
for
request
in
requests
]
]
def
finalize
(
self
):
pass
triton_models/demuxer/config.pbtxt
View file @
d0049da2
...
...
@@ -39,4 +39,4 @@ instance_group [
count: 1
kind: KIND_CPU
}
]
\ No newline at end of file
]
triton_models/model_ct2/1/model.py
View file @
d0049da2
...
...
@@ -5,27 +5,75 @@ from itertools import islice
from
ctranslate2
import
Translator
import
triton_python_backend_utils
as
pb_utils
class
TritonPythonModel
:
def
initialize
(
self
,
args
):
current_path
=
os
.
path
.
dirname
(
os
.
path
.
abspath
(
__file__
))
self
.
source_lang
,
self
.
target_lang
=
input_lang
,
output_lang
self
.
model_config
=
json
.
loads
(
args
[
"model_config"
])
self
.
device_id
=
int
(
json
.
loads
(
args
[
'model_instance_device_id'
]))
target_config
=
pb_utils
.
get_output_config_by_name
(
self
.
model_config
,
"OUTPUT_SENT"
)
self
.
device_id
=
int
(
json
.
loads
(
args
[
"model_instance_device_id"
]))
target_config
=
pb_utils
.
get_output_config_by_name
(
self
.
model_config
,
"OUTPUT_SENT"
)
self
.
target_dtype
=
pb_utils
.
triton_string_to_numpy
(
target_config
[
"data_type"
])
try
:
self
.
translator
=
Translator
(
f"
{
os
.
path
.
join
(
current_path
,
'translator'
)
}
"
,
device
=
"cuda"
,
intra_threads
=
1
,
inter_threads
=
1
,
device_index
=
[
self
.
device_id
])
except
:
self
.
translator
=
Translator
(
f"
{
os
.
path
.
join
(
current_path
,
'translator'
)
}
"
,
device
=
"cpu"
,
intra_threads
=
4
)
try
:
self
.
translator
=
Translator
(
f"
{
os
.
path
.
join
(
current_path
,
'translator'
)
}
"
,
device
=
"cuda"
,
intra_threads
=
1
,
inter_threads
=
1
,
device_index
=
[
self
.
device_id
],
)
except
:
self
.
translator
=
Translator
(
f"
{
os
.
path
.
join
(
current_path
,
'translator'
)
}
"
,
device
=
"cpu"
,
intra_threads
=
4
,
)
def
clean_output
(
self
,
text
):
text
=
text
.
replace
(
'@@ '
,
''
)
text
=
text
.
replace
(
'
\u200c
'
,
''
)
if
text
.
startswith
(
'<to-gu> '
):
text
=
text
[
8
:]
if
text
.
endswith
(
' <to-gu>'
):
text
=
text
[:
-
8
]
text
=
text
.
replace
(
"@@ "
,
""
)
text
=
text
.
replace
(
"
\u200c
"
,
""
)
if
text
.
startswith
(
"<to-gu> "
):
text
=
text
[
8
:]
if
text
.
endswith
(
" <to-gu>"
):
text
=
text
[:
-
8
]
return
text
def
execute
(
self
,
requests
):
source_list
=
[
pb_utils
.
get_input_tensor_by_name
(
request
,
"INPUT_SENT_TOKENIZED"
)
for
request
in
requests
]
source_list
=
[
pb_utils
.
get_input_tensor_by_name
(
request
,
"INPUT_SENT_TOKENIZED"
)
for
request
in
requests
]
bsize_list
=
[
source
.
as_numpy
().
shape
[
0
]
for
source
in
source_list
]
src_sentences
=
[
s
[
0
].
decode
(
'utf-8'
).
strip
().
split
(
' '
)
for
source
in
source_list
for
s
in
source
.
as_numpy
()]
tgt_sentences
=
[
self
.
clean_output
(
' '
.
join
(
result
.
hypotheses
[
0
]))
for
result
in
self
.
translator
.
translate_iterable
(
src_sentences
,
max_batch_size
=
128
,
max_input_length
=
100
,
max_decoding_length
=
100
)]
responses
=
[
pb_utils
.
InferenceResponse
(
output_tensors
=
[
pb_utils
.
Tensor
(
"OUTPUT_SENT"
,
numpy
.
array
([[
s
]
for
s
in
islice
(
tgt_sentences
,
bsize
)],
dtype
=
'object'
).
astype
(
self
.
target_dtype
))])
for
bsize
in
bsize_list
]
src_sentences
=
[
s
[
0
].
decode
(
"utf-8"
).
strip
().
split
(
" "
)
for
source
in
source_list
for
s
in
source
.
as_numpy
()
]
tgt_sentences
=
[
self
.
clean_output
(
" "
.
join
(
result
.
hypotheses
[
0
]))
for
result
in
self
.
translator
.
translate_iterable
(
src_sentences
,
max_batch_size
=
128
,
max_input_length
=
100
,
max_decoding_length
=
100
,
)
]
responses
=
[
pb_utils
.
InferenceResponse
(
output_tensors
=
[
pb_utils
.
Tensor
(
"OUTPUT_SENT"
,
numpy
.
array
(
[[
s
]
for
s
in
islice
(
tgt_sentences
,
bsize
)],
dtype
=
"object"
).
astype
(
self
.
target_dtype
),
)
]
)
for
bsize
in
bsize_list
]
return
responses
def
finalize
(
self
):
self
.
translator
.
unload_model
()
\ No newline at end of file
def
finalize
(
self
):
self
.
translator
.
unload_model
()
triton_models/model_ct2/config.pbtxt
View file @
d0049da2
...
...
@@ -29,4 +29,4 @@ instance_group [
response_cache {
enable: true
}
\ No newline at end of file
}
triton_models/model_onmt/1/model.py
View file @
d0049da2
...
...
@@ -6,27 +6,128 @@ from argparse import Namespace
import
triton_python_backend_utils
as
pb_utils
from
onmt.translate.translator
import
build_translator
class
TritonPythonModel
:
def
initialize
(
self
,
args
):
current_path
=
os
.
path
.
dirname
(
os
.
path
.
abspath
(
__file__
))
self
.
source_lang
,
self
.
target_lang
=
input_lang
,
output_lang
self
.
model_config
=
json
.
loads
(
args
[
"model_config"
])
self
.
device_id
=
int
(
json
.
loads
(
args
[
'model_instance_device_id'
]))
target_config
=
pb_utils
.
get_output_config_by_name
(
self
.
model_config
,
"OUTPUT_SENT"
)
self
.
device_id
=
int
(
json
.
loads
(
args
[
"model_instance_device_id"
]))
target_config
=
pb_utils
.
get_output_config_by_name
(
self
.
model_config
,
"OUTPUT_SENT"
)
self
.
target_dtype
=
pb_utils
.
triton_string_to_numpy
(
target_config
[
"data_type"
])
try
:
self
.
translator
=
build_translator
(
Namespace
(
tgt_prefix
=
False
,
alpha
=
0.0
,
batch_type
=
'sents'
,
beam_size
=
5
,
beta
=-
0.0
,
block_ngram_repeat
=
0
,
coverage_penalty
=
'none'
,
data_type
=
'text'
,
dump_beam
=
''
,
fp32
=
True
,
gpu
=
self
.
device_id
,
ignore_when_blocking
=
[],
length_penalty
=
'none'
,
max_length
=
100
,
max_sent_length
=
None
,
min_length
=
0
,
models
=
[
f"
{
os
.
path
.
join
(
current_path
,
'translator.pt'
)
}
"
],
n_best
=
1
,
output
=
'/dev/null'
,
phrase_table
=
''
,
random_sampling_temp
=
1.0
,
random_sampling_topk
=
1
,
ratio
=-
0.0
,
replace_unk
=
False
,
report_align
=
False
,
report_time
=
False
,
seed
=
829
,
stepwise_penalty
=
False
,
tgt
=
None
,
verbose
=
False
),
report_score
=
False
)
except
:
self
.
translator
=
build_translator
(
Namespace
(
tgt_prefix
=
False
,
alpha
=
0.0
,
batch_type
=
'sents'
,
beam_size
=
5
,
beta
=-
0.0
,
block_ngram_repeat
=
0
,
coverage_penalty
=
'none'
,
data_type
=
'text'
,
dump_beam
=
''
,
fp32
=
True
,
gpu
=-
1
,
ignore_when_blocking
=
[],
length_penalty
=
'none'
,
max_length
=
100
,
max_sent_length
=
None
,
min_length
=
0
,
models
=
[
f"
{
os
.
path
.
join
(
current_path
,
'translator.pt'
)
}
"
],
n_best
=
1
,
output
=
'/dev/null'
,
phrase_table
=
''
,
random_sampling_temp
=
1.0
,
random_sampling_topk
=
1
,
ratio
=-
0.0
,
replace_unk
=
False
,
report_align
=
False
,
report_time
=
False
,
seed
=
829
,
stepwise_penalty
=
False
,
tgt
=
None
,
verbose
=
False
),
report_score
=
False
)
try
:
self
.
translator
=
build_translator
(
Namespace
(
tgt_prefix
=
False
,
alpha
=
0.0
,
batch_type
=
"sents"
,
beam_size
=
5
,
beta
=-
0.0
,
block_ngram_repeat
=
0
,
coverage_penalty
=
"none"
,
data_type
=
"text"
,
dump_beam
=
""
,
fp32
=
True
,
gpu
=
self
.
device_id
,
ignore_when_blocking
=
[],
length_penalty
=
"none"
,
max_length
=
100
,
max_sent_length
=
None
,
min_length
=
0
,
models
=
[
f"
{
os
.
path
.
join
(
current_path
,
'translator.pt'
)
}
"
],
n_best
=
1
,
output
=
"/dev/null"
,
phrase_table
=
""
,
random_sampling_temp
=
1.0
,
random_sampling_topk
=
1
,
ratio
=-
0.0
,
replace_unk
=
False
,
report_align
=
False
,
report_time
=
False
,
seed
=
829
,
stepwise_penalty
=
False
,
tgt
=
None
,
verbose
=
False
,
),
report_score
=
False
,
)
except
:
self
.
translator
=
build_translator
(
Namespace
(
tgt_prefix
=
False
,
alpha
=
0.0
,
batch_type
=
"sents"
,
beam_size
=
5
,
beta
=-
0.0
,
block_ngram_repeat
=
0
,
coverage_penalty
=
"none"
,
data_type
=
"text"
,
dump_beam
=
""
,
fp32
=
True
,
gpu
=-
1
,
ignore_when_blocking
=
[],
length_penalty
=
"none"
,
max_length
=
100
,
max_sent_length
=
None
,
min_length
=
0
,
models
=
[
f"
{
os
.
path
.
join
(
current_path
,
'translator.pt'
)
}
"
],
n_best
=
1
,
output
=
"/dev/null"
,
phrase_table
=
""
,
random_sampling_temp
=
1.0
,
random_sampling_topk
=
1
,
ratio
=-
0.0
,
replace_unk
=
False
,
report_align
=
False
,
report_time
=
False
,
seed
=
829
,
stepwise_penalty
=
False
,
tgt
=
None
,
verbose
=
False
,
),
report_score
=
False
,
)
def
clean_output
(
self
,
text
):
text
=
text
.
replace
(
'@@ '
,
''
)
text
=
text
.
replace
(
'
\u200c
'
,
''
)
if
text
.
startswith
(
'<to-gu> '
):
text
=
text
[
8
:]
if
text
.
endswith
(
' <to-gu>'
):
text
=
text
[:
-
8
]
text
=
text
.
replace
(
"@@ "
,
""
)
text
=
text
.
replace
(
"
\u200c
"
,
""
)
if
text
.
startswith
(
"<to-gu> "
):
text
=
text
[
8
:]
if
text
.
endswith
(
" <to-gu>"
):
text
=
text
[:
-
8
]
return
text
def
execute
(
self
,
requests
):
source_list
=
[
pb_utils
.
get_input_tensor_by_name
(
request
,
"INPUT_SENT_TOKENIZED"
)
for
request
in
requests
]
source_list
=
[
pb_utils
.
get_input_tensor_by_name
(
request
,
"INPUT_SENT_TOKENIZED"
)
for
request
in
requests
]
bsize_list
=
[
source
.
as_numpy
().
shape
[
0
]
for
source
in
source_list
]
src_sentences
=
[
s
[
0
].
decode
(
'utf-8'
).
strip
().
split
(
' '
)
for
source
in
source_list
for
s
in
source
.
as_numpy
()]
tgt_sentences
=
[
self
.
clean_output
(
result
[
0
])
for
result
in
self
.
translator
.
translate
(
src_sentences
,
batch_size
=
128
)[
1
]]
responses
=
[
pb_utils
.
InferenceResponse
(
output_tensors
=
[
pb_utils
.
Tensor
(
"OUTPUT_SENT"
,
numpy
.
array
([[
s
]
for
s
in
islice
(
tgt_sentences
,
bsize
)],
dtype
=
'object'
).
astype
(
self
.
target_dtype
))])
for
bsize
in
bsize_list
]
src_sentences
=
[
s
[
0
].
decode
(
"utf-8"
).
strip
().
split
(
" "
)
for
source
in
source_list
for
s
in
source
.
as_numpy
()
]
tgt_sentences
=
[
self
.
clean_output
(
result
[
0
])
for
result
in
self
.
translator
.
translate
(
src_sentences
,
batch_size
=
128
)[
1
]
]
responses
=
[
pb_utils
.
InferenceResponse
(
output_tensors
=
[
pb_utils
.
Tensor
(
"OUTPUT_SENT"
,
numpy
.
array
(
[[
s
]
for
s
in
islice
(
tgt_sentences
,
bsize
)],
dtype
=
"object"
).
astype
(
self
.
target_dtype
),
)
]
)
for
bsize
in
bsize_list
]
return
responses
def
finalize
(
self
):
del
self
.
translator
\ No newline at end of file
def
finalize
(
self
):
del
self
.
translator
triton_models/model_onmt/config.pbtxt
View file @
d0049da2
...
...
@@ -29,4 +29,4 @@ instance_group [
response_cache {
enable: true
}
\ No newline at end of file
}
triton_models/nmt/config.pbtxt
View file @
d0049da2
...
...
@@ -75,4 +75,4 @@ ensemble_scheduling {
}
}
]
}
\ No newline at end of file
}
triton_models/tokenizer/1/apply_bpe.py
View file @
d0049da2
...
...
@@ -25,18 +25,21 @@ from collections import defaultdict
# hack for python2/3 compatibility
from
io
import
open
argparse
.
open
=
open
class
BPE
(
object
):
def
__init__
(
self
,
codes
,
separator
=
'@@'
,
vocab
=
None
,
glossaries
=
None
):
def
__init__
(
self
,
codes
,
separator
=
"@@"
,
vocab
=
None
,
glossaries
=
None
):
# check version information
firstline
=
codes
.
readline
()
if
firstline
.
startswith
(
'#version:'
):
self
.
version
=
tuple
([
int
(
x
)
for
x
in
re
.
sub
(
r'(\.0+)*$'
,
''
,
firstline
.
split
()[
-
1
]).
split
(
"."
)])
if
firstline
.
startswith
(
"#version:"
):
self
.
version
=
tuple
(
[
int
(
x
)
for
x
in
re
.
sub
(
r"(\.0+)*$"
,
""
,
firstline
.
split
()[
-
1
]).
split
(
"."
)
]
)
else
:
self
.
version
=
(
0
,
1
)
codes
.
seek
(
0
)
...
...
@@ -45,10 +48,12 @@ class BPE(object):
# some hacking to deal with duplicates (only consider first instance)
self
.
bpe_codes
=
dict
(
[(
code
,
i
)
for
(
i
,
code
)
in
reversed
(
list
(
enumerate
(
self
.
bpe_codes
)))])
[(
code
,
i
)
for
(
i
,
code
)
in
reversed
(
list
(
enumerate
(
self
.
bpe_codes
)))]
)
self
.
bpe_codes_reverse
=
dict
(
[(
pair
[
0
]
+
pair
[
1
],
pair
)
for
pair
,
i
in
self
.
bpe_codes
.
items
()])
[(
pair
[
0
]
+
pair
[
1
],
pair
)
for
pair
,
i
in
self
.
bpe_codes
.
items
()]
)
self
.
separator
=
separator
...
...
@@ -62,63 +67,99 @@ class BPE(object):
"""segment single sentence (whitespace-tokenized string) with BPE encoding"""
output
=
[]
for
word
in
sentence
.
split
():
new_word
=
[
out
for
segment
in
self
.
_isolate_glossaries
(
word
)
for
out
in
encode
(
segment
,
self
.
bpe_codes
,
self
.
bpe_codes_reverse
,
self
.
vocab
,
self
.
separator
,
self
.
version
,
self
.
cache
,
self
.
glossaries
)]
new_word
=
[
out
for
segment
in
self
.
_isolate_glossaries
(
word
)
for
out
in
encode
(
segment
,
self
.
bpe_codes
,
self
.
bpe_codes_reverse
,
self
.
vocab
,
self
.
separator
,
self
.
version
,
self
.
cache
,
self
.
glossaries
,
)
]
for
item
in
new_word
[:
-
1
]:
output
.
append
(
item
+
self
.
separator
)
output
.
append
(
new_word
[
-
1
])
return
' '
.
join
(
output
)
return
" "
.
join
(
output
)
def
_isolate_glossaries
(
self
,
word
):
word_segments
=
[
word
]
for
gloss
in
self
.
glossaries
:
word_segments
=
[
out_segments
for
segment
in
word_segments
for
out_segments
in
isolate_glossary
(
segment
,
gloss
)]
word_segments
=
[
out_segments
for
segment
in
word_segments
for
out_segments
in
isolate_glossary
(
segment
,
gloss
)
]
return
word_segments
def
create_parser
():
parser
=
argparse
.
ArgumentParser
(
formatter_class
=
argparse
.
RawDescriptionHelpFormatter
,
description
=
"learn BPE-based word segmentation"
)
description
=
"learn BPE-based word segmentation"
,
)
parser
.
add_argument
(
'--input'
,
'-i'
,
type
=
argparse
.
FileType
(
'r'
),
default
=
sys
.
stdin
,
metavar
=
'PATH'
,
help
=
"Input file (default: standard input)."
)
"--input"
,
"-i"
,
type
=
argparse
.
FileType
(
"r"
),
default
=
sys
.
stdin
,
metavar
=
"PATH"
,
help
=
"Input file (default: standard input)."
,
)
parser
.
add_argument
(
'--codes'
,
'-c'
,
type
=
argparse
.
FileType
(
'r'
),
metavar
=
'PATH'
,
"--codes"
,
"-c"
,
type
=
argparse
.
FileType
(
"r"
),
metavar
=
"PATH"
,
required
=
True
,
help
=
"File with BPE codes (created by learn_bpe.py)."
)
help
=
"File with BPE codes (created by learn_bpe.py)."
,
)
parser
.
add_argument
(
'--output'
,
'-o'
,
type
=
argparse
.
FileType
(
'w'
),
default
=
sys
.
stdout
,
metavar
=
'PATH'
,
help
=
"Output file (default: standard output)"
)
"--output"
,
"-o"
,
type
=
argparse
.
FileType
(
"w"
),
default
=
sys
.
stdout
,
metavar
=
"PATH"
,
help
=
"Output file (default: standard output)"
,
)
parser
.
add_argument
(
'--separator'
,
'-s'
,
type
=
str
,
default
=
'@@'
,
metavar
=
'STR'
,
help
=
"Separator between non-final subword units (default: '%(default)s'))"
)
"--separator"
,
"-s"
,
type
=
str
,
default
=
"@@"
,
metavar
=
"STR"
,
help
=
"Separator between non-final subword units (default: '%(default)s'))"
,
)
parser
.
add_argument
(
'--vocabulary'
,
type
=
argparse
.
FileType
(
'r'
),
default
=
None
,
"--vocabulary"
,
type
=
argparse
.
FileType
(
"r"
),
default
=
None
,
metavar
=
"PATH"
,
help
=
"Vocabulary file (built with get_vocab.py). If provided, this script reverts any merge operations that produce an OOV."
)
help
=
"Vocabulary file (built with get_vocab.py). If provided, this script reverts any merge operations that produce an OOV."
,
)
parser
.
add_argument
(
'--vocabulary-threshold'
,
type
=
int
,
default
=
None
,
"--vocabulary-threshold"
,
type
=
int
,
default
=
None
,
metavar
=
"INT"
,
help
=
"Vocabulary threshold. If vocabulary is provided, any word with frequency < threshold will be treated as OOV"
)
help
=
"Vocabulary threshold. If vocabulary is provided, any word with frequency < threshold will be treated as OOV"
,
)
parser
.
add_argument
(
'--glossaries'
,
type
=
str
,
nargs
=
'+'
,
default
=
None
,
"--glossaries"
,
type
=
str
,
nargs
=
"+"
,
default
=
None
,
metavar
=
"STR"
,
help
=
"Glossaries. The strings provided in glossaries will not be affected"
+
"by the BPE (i.e. they will neither be broken into subwords, nor concatenated with other subwords"
)
help
=
"Glossaries. The strings provided in glossaries will not be affected"
+
"by the BPE (i.e. they will neither be broken into subwords, nor concatenated with other subwords"
,
)
return
parser
...
...
@@ -136,9 +177,17 @@ def get_pairs(word):
return
pairs
def
encode
(
orig
,
bpe_codes
,
bpe_codes_reverse
,
vocab
,
separator
,
version
,
cache
,
glossaries
=
None
):
"""Encode word based on list of BPE merge operations, which are applied consecutively
"""
def
encode
(
orig
,
bpe_codes
,
bpe_codes_reverse
,
vocab
,
separator
,
version
,
cache
,
glossaries
=
None
,
):
"""Encode word based on list of BPE merge operations, which are applied consecutively"""
if
orig
in
cache
:
return
cache
[
orig
]
...
...
@@ -148,9 +197,9 @@ def encode(orig, bpe_codes, bpe_codes_reverse, vocab, separator, version, cache,
return
(
orig
,)
if
version
==
(
0
,
1
):
word
=
tuple
(
orig
)
+
(
'</w>'
,)
word
=
tuple
(
orig
)
+
(
"</w>"
,)
elif
version
==
(
0
,
2
):
# more consistent handling of word-final segments
word
=
tuple
(
orig
[:
-
1
])
+
(
orig
[
-
1
]
+
'</w>'
,)
word
=
tuple
(
orig
[:
-
1
])
+
(
orig
[
-
1
]
+
"</w>"
,)
else
:
raise
NotImplementedError
...
...
@@ -160,7 +209,7 @@ def encode(orig, bpe_codes, bpe_codes_reverse, vocab, separator, version, cache,
return
orig
while
True
:
bigram
=
min
(
pairs
,
key
=
lambda
pair
:
bpe_codes
.
get
(
pair
,
float
(
'inf'
)))
bigram
=
min
(
pairs
,
key
=
lambda
pair
:
bpe_codes
.
get
(
pair
,
float
(
"inf"
)))
if
bigram
not
in
bpe_codes
:
break
first
,
second
=
bigram
...
...
@@ -189,10 +238,10 @@ def encode(orig, bpe_codes, bpe_codes_reverse, vocab, separator, version, cache,
pairs
=
get_pairs
(
word
)
# don't print end-of-word symbols
if
word
[
-
1
]
==
'</w>'
:
if
word
[
-
1
]
==
"</w>"
:
word
=
word
[:
-
1
]
elif
word
[
-
1
].
endswith
(
'</w>'
):
word
=
word
[:
-
1
]
+
(
word
[
-
1
].
replace
(
'</w>'
,
''
),)
elif
word
[
-
1
].
endswith
(
"</w>"
):
word
=
word
[:
-
1
]
+
(
word
[
-
1
].
replace
(
"</w>"
,
""
),)
if
vocab
:
word
=
check_vocab_and_split
(
word
,
bpe_codes_reverse
,
vocab
,
separator
)
...
...
@@ -207,12 +256,12 @@ def recursive_split(segment, bpe_codes, vocab, separator, final=False):
try
:
if
final
:
left
,
right
=
bpe_codes
[
segment
+
'</w>'
]
left
,
right
=
bpe_codes
[
segment
+
"</w>"
]
right
=
right
[:
-
4
]
else
:
left
,
right
=
bpe_codes
[
segment
]
except
:
#sys.stderr.write('cannot split {0} further.\n'.format(segment))
#
sys.stderr.write('cannot split {0} further.\n'.format(segment))
yield
segment
return
...
...
@@ -239,7 +288,7 @@ def check_vocab_and_split(orig, bpe_codes, vocab, separator):
if
segment
+
separator
in
vocab
:
out
.
append
(
segment
)
else
:
#sys.stderr.write('OOV: {0}\n'.format(segment))
#
sys.stderr.write('OOV: {0}\n'.format(segment))
for
item
in
recursive_split
(
segment
,
bpe_codes
,
vocab
,
separator
,
False
):
out
.
append
(
item
)
...
...
@@ -247,7 +296,7 @@ def check_vocab_and_split(orig, bpe_codes, vocab, separator):
if
segment
in
vocab
:
out
.
append
(
segment
)
else
:
#sys.stderr.write('OOV: {0}\n'.format(segment))
#
sys.stderr.write('OOV: {0}\n'.format(segment))
for
item
in
recursive_split
(
segment
,
bpe_codes
,
vocab
,
separator
,
True
):
out
.
append
(
item
)
...
...
@@ -255,8 +304,7 @@ def check_vocab_and_split(orig, bpe_codes, vocab, separator):
def
read_vocabulary
(
vocab_file
,
threshold
):
"""read vocabulary file produced by get_vocab.py, and filter according to frequency threshold.
"""
"""read vocabulary file produced by get_vocab.py, and filter according to frequency threshold."""
vocabulary
=
set
()
...
...
@@ -273,7 +321,7 @@ def isolate_glossary(word, glossary):
"""
Isolate a glossary present inside a word.
Returns a list of subwords. In which all 'glossary' glossaries are isolated
Returns a list of subwords. In which all 'glossary' glossaries are isolated
For example, if 'USA' is the glossary and '1934USABUSA' the word, the return value is:
['1934', 'USA', 'B', 'USA']
...
...
@@ -282,39 +330,42 @@ def isolate_glossary(word, glossary):
return
[
word
]
else
:
splits
=
word
.
split
(
glossary
)
segments
=
[
segment
.
strip
()
for
split
in
splits
[:
-
1
]
for
segment
in
[
split
,
glossary
]
if
segment
!=
''
]
return
segments
+
[
splits
[
-
1
].
strip
()]
if
splits
[
-
1
]
!=
''
else
segments
segments
=
[
segment
.
strip
()
for
split
in
splits
[:
-
1
]
for
segment
in
[
split
,
glossary
]
if
segment
!=
""
]
return
segments
+
[
splits
[
-
1
].
strip
()]
if
splits
[
-
1
]
!=
""
else
segments
if
__name__
==
'__main__'
:
if
__name__
==
"__main__"
:
# python 2/3 compatibility
if
sys
.
version_info
<
(
3
,
0
):
sys
.
stderr
=
codecs
.
getwriter
(
'UTF-8'
)(
sys
.
stderr
)
sys
.
stdout
=
codecs
.
getwriter
(
'UTF-8'
)(
sys
.
stdout
)
sys
.
stdin
=
codecs
.
getreader
(
'UTF-8'
)(
sys
.
stdin
)
sys
.
stderr
=
codecs
.
getwriter
(
"UTF-8"
)(
sys
.
stderr
)
sys
.
stdout
=
codecs
.
getwriter
(
"UTF-8"
)(
sys
.
stdout
)
sys
.
stdin
=
codecs
.
getreader
(
"UTF-8"
)(
sys
.
stdin
)
else
:
sys
.
stdin
=
io
.
TextIOWrapper
(
sys
.
stdin
.
buffer
,
encoding
=
'utf-8'
)
sys
.
stderr
=
io
.
TextIOWrapper
(
sys
.
stderr
.
buffer
,
encoding
=
'utf-8'
)
sys
.
stdin
=
io
.
TextIOWrapper
(
sys
.
stdin
.
buffer
,
encoding
=
"utf-8"
)
sys
.
stderr
=
io
.
TextIOWrapper
(
sys
.
stderr
.
buffer
,
encoding
=
"utf-8"
)
sys
.
stdout
=
io
.
TextIOWrapper
(
sys
.
stdout
.
buffer
,
encoding
=
'utf-8'
,
write_through
=
True
,
line_buffering
=
True
)
sys
.
stdout
.
buffer
,
encoding
=
"utf-8"
,
write_through
=
True
,
line_buffering
=
True
)
parser
=
create_parser
()
args
=
parser
.
parse_args
()
# read/write files as UTF-8
args
.
codes
=
codecs
.
open
(
args
.
codes
.
name
,
encoding
=
'utf-8'
)
if
args
.
input
.
name
!=
'<stdin>'
:
args
.
input
=
codecs
.
open
(
args
.
input
.
name
,
encoding
=
'utf-8'
)
if
args
.
output
.
name
!=
'<stdout>'
:
args
.
output
=
codecs
.
open
(
args
.
output
.
name
,
'w'
,
encoding
=
'utf-8'
)
args
.
codes
=
codecs
.
open
(
args
.
codes
.
name
,
encoding
=
"utf-8"
)
if
args
.
input
.
name
!=
"<stdin>"
:
args
.
input
=
codecs
.
open
(
args
.
input
.
name
,
encoding
=
"utf-8"
)
if
args
.
output
.
name
!=
"<stdout>"
:
args
.
output
=
codecs
.
open
(
args
.
output
.
name
,
"w"
,
encoding
=
"utf-8"
)
if
args
.
vocabulary
:
args
.
vocabulary
=
codecs
.
open
(
args
.
vocabulary
.
name
,
encoding
=
'utf-8'
)
args
.
vocabulary
=
codecs
.
open
(
args
.
vocabulary
.
name
,
encoding
=
"utf-8"
)
if
args
.
vocabulary
:
vocabulary
=
read_vocabulary
(
args
.
vocabulary
,
args
.
vocabulary_threshold
)
vocabulary
=
read_vocabulary
(
args
.
vocabulary
,
args
.
vocabulary_threshold
)
else
:
vocabulary
=
None
...
...
@@ -322,4 +373,4 @@ if __name__ == '__main__':
for
line
in
args
.
input
:
args
.
output
.
write
(
bpe
.
segment
(
line
).
strip
())
args
.
output
.
write
(
'
\n
'
)
args
.
output
.
write
(
"
\n
"
)
triton_models/tokenizer/1/model.py
View file @
d0049da2
...
...
@@ -6,8 +6,73 @@ from .apply_bpe import BPE
from
ilstokenizer
import
tokenizer
import
triton_python_backend_utils
as
pb_utils
class
TritonPythonModel
:
def
initialize
(
self
,
args
):
self
.
target_dtype
,
self
.
bpes
=
pb_utils
.
triton_string_to_numpy
(
pb_utils
.
get_output_config_by_name
(
json
.
loads
(
args
[
"model_config"
]),
"INPUT_TEXT_TOKENIZED"
)[
"data_type"
]),
{
fname
.
rsplit
(
'/'
,
maxsplit
=
1
)[
-
1
][:
-
len
(
'.src'
)]:
BPE
(
open
(
fname
,
'r'
,
encoding
=
'utf-8'
))
for
fname
in
iglob
(
f'
{
os
.
path
.
dirname
(
os
.
path
.
abspath
(
__file__
))
}
/bpe_src/*.src'
)}
def
preprocess_text
(
self
,
text
,
source_lang
,
target_lang
):
return
f"<to-gu>
{
text
}
<to-gu>"
if
source_lang
==
'en'
and
target_lang
==
'gu'
else
text
def
execute
(
self
,
requests
):
return
[
pb_utils
.
InferenceResponse
(
output_tensors
=
[
pb_utils
.
Tensor
(
"INPUT_TEXT_TOKENIZED"
,
numpy
.
array
([[
tokenized_sent
]
for
tokenized_sent
in
tokenized_sents
],
dtype
=
self
.
target_dtype
))])
for
tokenized_sents
in
((
self
.
bpes
[
f"
{
input_language_id
[
0
].
decode
(
'utf-8'
)
}
-
{
output_language_id
[
0
].
decode
(
'utf-8'
)
}
"
].
segment
(
self
.
preprocess_text
(
tokenizer
.
tokenize
(
input_text
[
0
].
decode
(
'utf-8'
).
lower
()),
input_language_id
[
0
].
decode
(
'utf-8'
),
output_language_id
[
0
].
decode
(
'utf-8'
))).
strip
()
for
input_text
,
input_language_id
,
output_language_id
in
zip
(
input_texts
.
as_numpy
(),
input_language_ids
.
as_numpy
(),
output_language_ids
.
as_numpy
()))
for
input_texts
,
input_language_ids
,
output_language_ids
in
((
pb_utils
.
get_input_tensor_by_name
(
request
,
"INPUT_TEXT"
),
pb_utils
.
get_input_tensor_by_name
(
request
,
"INPUT_LANGUAGE_ID"
),
pb_utils
.
get_input_tensor_by_name
(
request
,
"OUTPUT_LANGUAGE_ID"
))
for
request
in
requests
))]
def
finalize
(
self
):
pass
\ No newline at end of file
def
initialize
(
self
,
args
):
self
.
target_dtype
,
self
.
bpes
=
pb_utils
.
triton_string_to_numpy
(
pb_utils
.
get_output_config_by_name
(
json
.
loads
(
args
[
"model_config"
]),
"INPUT_TEXT_TOKENIZED"
)[
"data_type"
]
),
{
fname
.
rsplit
(
"/"
,
maxsplit
=
1
)[
-
1
][:
-
len
(
".src"
)]:
BPE
(
open
(
fname
,
"r"
,
encoding
=
"utf-8"
)
)
for
fname
in
iglob
(
f"
{
os
.
path
.
dirname
(
os
.
path
.
abspath
(
__file__
))
}
/bpe_src/*.src"
)
}
def
preprocess_text
(
self
,
text
,
source_lang
,
target_lang
):
return
(
f"<to-gu>
{
text
}
<to-gu>"
if
source_lang
==
"en"
and
target_lang
==
"gu"
else
text
)
def
execute
(
self
,
requests
):
return
[
pb_utils
.
InferenceResponse
(
output_tensors
=
[
pb_utils
.
Tensor
(
"INPUT_TEXT_TOKENIZED"
,
numpy
.
array
(
[[
tokenized_sent
]
for
tokenized_sent
in
tokenized_sents
],
dtype
=
self
.
target_dtype
,
),
)
]
)
for
tokenized_sents
in
(
(
self
.
bpes
[
f"
{
input_language_id
[
0
].
decode
(
'utf-8'
)
}
-
{
output_language_id
[
0
].
decode
(
'utf-8'
)
}
"
]
.
segment
(
self
.
preprocess_text
(
tokenizer
.
tokenize
(
input_text
[
0
].
decode
(
"utf-8"
).
lower
()),
input_language_id
[
0
].
decode
(
"utf-8"
),
output_language_id
[
0
].
decode
(
"utf-8"
),
)
)
.
strip
()
for
input_text
,
input_language_id
,
output_language_id
in
zip
(
input_texts
.
as_numpy
(),
input_language_ids
.
as_numpy
(),
output_language_ids
.
as_numpy
(),
)
)
for
input_texts
,
input_language_ids
,
output_language_ids
in
(
(
pb_utils
.
get_input_tensor_by_name
(
request
,
"INPUT_TEXT"
),
pb_utils
.
get_input_tensor_by_name
(
request
,
"INPUT_LANGUAGE_ID"
),
pb_utils
.
get_input_tensor_by_name
(
request
,
"OUTPUT_LANGUAGE_ID"
),
)
for
request
in
requests
)
)
]
def
finalize
(
self
):
pass
triton_models/tokenizer/config.pbtxt
View file @
d0049da2
...
...
@@ -39,4 +39,4 @@ instance_group [
count: 8
kind: KIND_CPU
}
]
\ No newline at end of file
]
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment