In [1]:
import numpy as np
from tqdm import tqdm
from random import choice
from tritonclient.utils import *
import tritonclient.http as httpclient
from multiprocessing.pool import ThreadPool

In [2]:
model_name = "ssmt_pipeline"
shape = [1]

In [3]:
def task(x):
    lang_pair_map = list({'eng-hin': 1, 'hin-eng': 2, 'tel-eng': 4, 'hin-tel': 6, 'tel-hin': 7, 'eng-guj': 8, 'guj-eng': 9}.keys())
    with httpclient.InferenceServerClient("localhost:8000") as client:
        async_responses = []
        for i in range(10):
            s = 'this is a sentence.'
            source_data = np.array([[s]], dtype='object')
            inputs = [httpclient.InferInput("INPUT_TEXT", source_data.shape, np_to_triton_dtype(source_data.dtype)), httpclient.InferInput("INPUT_LANGUAGE_ID", source_data.shape, np_to_triton_dtype(source_data.dtype)), httpclient.InferInput("OUTPUT_LANGUAGE_ID", source_data.shape, np_to_triton_dtype(source_data.dtype))]
            inputs[0].set_data_from_numpy(np.array([[s]], dtype='object'))
            langpair = choice(lang_pair_map)
            inputs[1].set_data_from_numpy(np.array([[langpair.split('-')[0].strip()]], dtype='object'))
            inputs[2].set_data_from_numpy(np.array([[langpair.split('-')[1].strip()]], dtype='object'))
            outputs = [httpclient.InferRequestedOutput("OUTPUT_TEXT")]
            async_responses.append(client.async_infer(model_name, inputs, request_id=str(1), outputs=outputs))
        for r in async_responses: r.get_result(timeout=10).get_response()
    return 0

In [4]:
with ThreadPool(100) as pool:
    for output in tqdm(pool.imap_unordered(task, range(1000), chunksize=1), total=1000): pass

100%|██████████| 1000/1000 [01:49<00:00,  9.15it/s]
