import json import numpy import asyncio import triton_python_backend_utils as pb_utils class TritonPythonModel: def initialize(self, args): self.target_dtype = pb_utils.triton_string_to_numpy( pb_utils.get_output_config_by_name( json.loads(args["model_config"]), "OUTPUT_TEXT" )["data_type"] ) async def execute(self, requests): return [ pb_utils.InferenceResponse( output_tensors=[ pb_utils.Tensor( "OUTPUT_TEXT", numpy.array( [ [ pb_utils.get_output_tensor_by_name( result, "OUTPUT_SENT" ) .as_numpy()[0, 0] ] for result in (await asyncio.gather(*awaits)) ], dtype=self.target_dtype, ), ) ] ) for awaits in [ [ pb_utils.InferenceRequest( model_name=f"himangy-{input_language_id[0].decode('utf-8').split('_', maxsplit=1)[0]}-{output_language_id[0].decode('utf-8').split('_', maxsplit=1)[0]}", requested_output_names=["OUTPUT_SENT"], inputs=[ pb_utils.Tensor( "INPUT_SENT_TOKENIZED", numpy.array( [[input_text_tokenized[0]]], dtype=self.target_dtype, ), ) ], ).async_exec() for input_text_tokenized, input_language_id, output_language_id in zip( pb_utils.get_input_tensor_by_name( request, "INPUT_TEXT_TOKENIZED" ).as_numpy(), pb_utils.get_input_tensor_by_name( request, "INPUT_LANGUAGE_ID" ).as_numpy(), pb_utils.get_input_tensor_by_name( request, "OUTPUT_LANGUAGE_ID" ).as_numpy(), ) ] for request in requests ] ] def finalize(self): pass