Initial implementation.

52a974e2 · Nikhilesh Bhatnagar · 52a974e2 · 52a974e2 · 52a974e2 · 52a974e2
Commit 52a974e2 authored Oct 30, 2023 by Nikhilesh Bhatnagar
14 changed files
--- a/.gitignore
+++ b/.gitignore
+wavs
+ort_models
+Fastspeech2_HS
\ No newline at end of file
--- a/Dockerfile
+++ b/Dockerfile
+FROM nvcr.io/nvidia/tritonserver:23.10-py3
+CMD ["tritonserver", "--model-repository=/models", "--cache-config=local,size=1048576"]
\ No newline at end of file
--- a/README.md
+++ b/README.md
+# IIT Madras TTS Triton repo for dhruva
+* Make sure to have `nvidia-docker` installed.
+* `bash make_triton_repo.sh`
+* It will form the triton repo folder at `triton_model_repo`.
+* You may change the tritonserver container tag depending on your driver version.
+* Check the notebooks for environment details. We haven't provided the exported yml file because it pulls and patches the latest espnet git, for that, look at the scripts mentioned in the notebooks.
+* Build the image `docker build -t dhruva/iitmtts-model-server:1 .`
+* Run the container `nvidia-docker run --gpus=all --rm --shm-size 5g --network=host --name dhruva-iitmtts-triton-server -v./triton_models_repo:/models dhruva/iitmtts-model-server:1`
+### TODO:
+* Batched inference unsupported
+* Followed by ensemble-fication
\ No newline at end of file
--- a/envbuilder.dockerfile
+++ b/envbuilder.dockerfile
+FROM nvcr.io/nvidia/tritonserver:23.10-py3
+ARG UID=1000
+ARG GID=1000
+RUN groupadd --system --force --gid ${GID} builder && id -u ${UID} &>/dev/null || useradd --system --gid ${GID} --uid ${UID} builder
+RUN pip install -U certifi
+RUN DEBIAN_FRONTEND=noninteractive apt-get update && apt-get install -y cmake build-essential pkg-config libgoogle-perftools-dev unzip rapidjson-dev ca-certificates locales && locale-gen en_US.UTF-8 && update-locale LANG=en_US.UTF-8
+USER ${UID}
+WORKDIR /home/builder
\ No newline at end of file
--- a/make_triton_repo.sh
+++ b/make_triton_repo.sh
+#!/bin/bash
+git clone --recursive https://github.com/smtiitm/Fastspeech2_HS.git && cd Fastspeech2_HS && git lfs fetch --all && git lfs pull && git apply ../patches/fastspeech2.patch && cd ..
+docker build --build-arg="UID=$(id -u)" --build-arg="GID=$(id -g)" -t dhruva/iitm-tts-envbuilder . -f envbuilder.dockerfile
+mkdir onnx_models
+nvidia-docker run --gpus=all -it --rm --name iitm-tts-dhruva-builder -v ./Fastspeech2_HS:/Fastspeech2_HS -v ./onnx_models:/onnx_models -v ./notebooks:/notebooks -v ./patches:/patches -v ./scripts:/scripts dhruva/iitm-tts-envbuilder bash /scripts/perform_onnx_conversion.sh
+mkdir triton_model_repo
+nvidia-docker run --gpus=all -it --rm --name iitm-tts-dhruva-builder -v ./patches:/patches -v ./triton_models/tts:/model -v ./triton_model_repo:/model_repo dhruva/iitm-tts-envbuilder bash /model/envbuilder.sh
+cp Fastspeech2_HS/text_preprocess_for_inference.py triton_model_repo/tts/1
+cp -r triton_models/tts/config.pbtxt triton_models/tts/1 triton_model_repo/tts
+cp -r onnx_models/* Fastspeech2_HS/phone_dict Fastspeech2_HS/multilingualcharmap.json triton_model_repo/tts/1
\ No newline at end of file
--- a/notebooks/create_onnx.ipynb
+++ b/notebooks/create_onnx.ipynb
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-10-30T05:49:48.963582Z",
+     "iopub.status.busy": "2023-10-30T05:49:48.962849Z",
+     "iopub.status.idle": "2023-10-30T05:50:11.833090Z",
+     "shell.execute_reply": "2023-10-30T05:50:11.832195Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/home/builder/conda/envs/text2phone/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
+      "  from .autonotebook import tqdm as notebook_tqdm\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Loading G2P model... Done!\n",
+      "Loading G2P model... Done!\n",
+      "Loading G2P model... Done!\n",
+      "Loading G2P model... Done!\n",
+      "Loading G2P model... Done!\n",
+      "Loading G2P model... Done!\n",
+      "Loading G2P model... Done!\n",
+      "Loading G2P model... Done!\n",
+      "Loading G2P model... Done!\n",
+      "Loading G2P model... Done!\n",
+      "Loading G2P model... Done!\n"
+     ]
+    }
+   ],
+   "source": [
+    "FASTSPEECH2_HOME = '/Fastspeech2_HS'\n",
+    "ONNX_MODELS_HOME = '/onnx_models'\n",
+    "import os\n",
+    "import json\n",
+    "import yaml\n",
+    "import torch\n",
+    "import shutil\n",
+    "!mkdir -p $ONNX_MODELS_HOME/vocoders\n",
+    "from sys import path\n",
+    "os.chdir(FASTSPEECH2_HOME)\n",
+    "path.append(\"hifigan\")\n",
+    "from env import AttrDict\n",
+    "from pathlib import Path\n",
+    "from models import Generator\n",
+    "from espnet_onnx.export import TTSModelExport\n",
+    "from espnet2.bin.tts_inference import Text2Speech\n",
+    "from text_preprocess_for_inference import TTSDurAlignPreprocessor, CharTextPreprocessor, TTSPreprocessor"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Saving FastSpeech2 and HifiGAN to ONNX"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-10-30T05:50:11.836793Z",
+     "iopub.status.busy": "2023-10-30T05:50:11.836609Z",
+     "iopub.status.idle": "2023-10-30T05:52:53.727897Z",
+     "shell.execute_reply": "2023-10-30T05:52:53.726516Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Removing weight norm...\n",
+      "saving fastspeech2 onnx for male, assamese...\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/home/builder/conda/envs/text2phone/lib/python3.10/site-packages/torch/onnx/_internal/jit_utils.py:306: UserWarning: Constant folding - Only steps=1 can be constant folded for opset >= 10 onnx::Slice op. Constant folding not applied. (Triggered internally at ../torch/csrc/jit/passes/onnx/constant_fold.cpp:179.)\n",
+      "  _C._jit_pass_onnx_node_shape_type_inference(node, params_dict, opset_version)\n",
+      "/home/builder/conda/envs/text2phone/lib/python3.10/site-packages/torch/onnx/utils.py:689: UserWarning: Constant folding - Only steps=1 can be constant folded for opset >= 10 onnx::Slice op. Constant folding not applied. (Triggered internally at ../torch/csrc/jit/passes/onnx/constant_fold.cpp:179.)\n",
+      "  _C._jit_pass_onnx_graph_shape_type_inference(\n",
+      "/home/builder/conda/envs/text2phone/lib/python3.10/site-packages/torch/onnx/utils.py:1186: UserWarning: Constant folding - Only steps=1 can be constant folded for opset >= 10 onnx::Slice op. Constant folding not applied. (Triggered internally at ../torch/csrc/jit/passes/onnx/constant_fold.cpp:179.)\n",
+      "  _C._jit_pass_onnx_graph_shape_type_inference(\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "============= Diagnostic Run torch.onnx.export version 2.0.1+cu117 =============\n",
+      "verbose: False, log level: Level.ERROR\n",
+      "======================= 0 NONE 0 NOTE 0 WARNING 0 ERROR ========================\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "WARNING:root:Please consider to run pre-processing before quantization. Refer to example: https://github.com/microsoft/onnxruntime-inference-examples/blob/main/quantization/image_classification/cpu/ReadMe.md \n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.0/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.0/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.1/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.1/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.2/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.2/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.3/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.3/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.0/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.0/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.1/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.1/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.2/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.2/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.3/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.3/self_attn/MatMul_1]\n",
+      "saving vocoder onnx for male-aryan...\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/home/builder/conda/envs/text2phone/lib/python3.10/site-packages/torch/onnx/utils.py:2033: UserWarning: No names were found for specified dynamic axes of provided input.Automatically generated names will be applied to each dynamic axes of input input\n",
+      "  warnings.warn(\n",
+      "/home/builder/conda/envs/text2phone/lib/python3.10/site-packages/torch/onnx/utils.py:2033: UserWarning: No names were found for specified dynamic axes of provided input.Automatically generated names will be applied to each dynamic axes of input output\n",
+      "  warnings.warn(\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "============= Diagnostic Run torch.onnx.export version 2.0.1+cu117 =============\n",
+      "verbose: False, log level: Level.ERROR\n",
+      "======================= 0 NONE 0 NOTE 0 WARNING 0 ERROR ========================\n",
+      "\n",
+      "Removing weight norm...\n",
+      "saving fastspeech2 onnx for male, bengali...\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/home/builder/conda/envs/text2phone/lib/python3.10/site-packages/torch/onnx/_internal/jit_utils.py:306: UserWarning: Constant folding - Only steps=1 can be constant folded for opset >= 10 onnx::Slice op. Constant folding not applied. (Triggered internally at ../torch/csrc/jit/passes/onnx/constant_fold.cpp:179.)\n",
+      "  _C._jit_pass_onnx_node_shape_type_inference(node, params_dict, opset_version)\n",
+      "/home/builder/conda/envs/text2phone/lib/python3.10/site-packages/torch/onnx/utils.py:689: UserWarning: Constant folding - Only steps=1 can be constant folded for opset >= 10 onnx::Slice op. Constant folding not applied. (Triggered internally at ../torch/csrc/jit/passes/onnx/constant_fold.cpp:179.)\n",
+      "  _C._jit_pass_onnx_graph_shape_type_inference(\n",
+      "/home/builder/conda/envs/text2phone/lib/python3.10/site-packages/torch/onnx/utils.py:1186: UserWarning: Constant folding - Only steps=1 can be constant folded for opset >= 10 onnx::Slice op. Constant folding not applied. (Triggered internally at ../torch/csrc/jit/passes/onnx/constant_fold.cpp:179.)\n",
+      "  _C._jit_pass_onnx_graph_shape_type_inference(\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "============= Diagnostic Run torch.onnx.export version 2.0.1+cu117 =============\n",
+      "verbose: False, log level: Level.ERROR\n",
+      "======================= 0 NONE 0 NOTE 0 WARNING 0 ERROR ========================\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "WARNING:root:Please consider to run pre-processing before quantization. Refer to example: https://github.com/microsoft/onnxruntime-inference-examples/blob/main/quantization/image_classification/cpu/ReadMe.md \n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.0/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.0/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.1/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.1/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.2/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.2/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.3/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.3/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.0/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.0/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.1/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.1/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.2/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.2/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.3/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.3/self_attn/MatMul_1]\n",
+      "Removing weight norm...\n",
+      "saving fastspeech2 onnx for male, bodo...\n",
+      "Removing weight norm...\n",
+      "saving fastspeech2 onnx for male, english...\n",
+      "============= Diagnostic Run torch.onnx.export version 2.0.1+cu117 =============\n",
+      "verbose: False, log level: Level.ERROR\n",
+      "======================= 0 NONE 0 NOTE 0 WARNING 0 ERROR ========================\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "WARNING:root:Please consider to run pre-processing before quantization. Refer to example: https://github.com/microsoft/onnxruntime-inference-examples/blob/main/quantization/image_classification/cpu/ReadMe.md \n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.0/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.0/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.1/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.1/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.2/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.2/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.3/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.3/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.0/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.0/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.1/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.1/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.2/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.2/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.3/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.3/self_attn/MatMul_1]\n",
+      "Removing weight norm...\n",
+      "saving fastspeech2 onnx for male, gujarati...\n",
+      "============= Diagnostic Run torch.onnx.export version 2.0.1+cu117 =============\n",
+      "verbose: False, log level: Level.ERROR\n",
+      "======================= 0 NONE 0 NOTE 0 WARNING 0 ERROR ========================\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "WARNING:root:Please consider to run pre-processing before quantization. Refer to example: https://github.com/microsoft/onnxruntime-inference-examples/blob/main/quantization/image_classification/cpu/ReadMe.md \n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.0/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.0/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.1/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.1/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.2/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.2/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.3/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.3/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.0/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.0/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.1/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.1/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.2/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.2/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.3/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.3/self_attn/MatMul_1]\n",
+      "Removing weight norm...\n",
+      "saving fastspeech2 onnx for male, hindi...\n",
+      "============= Diagnostic Run torch.onnx.export version 2.0.1+cu117 =============\n",
+      "verbose: False, log level: Level.ERROR\n",
+      "======================= 0 NONE 0 NOTE 0 WARNING 0 ERROR ========================\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "WARNING:root:Please consider to run pre-processing before quantization. Refer to example: https://github.com/microsoft/onnxruntime-inference-examples/blob/main/quantization/image_classification/cpu/ReadMe.md \n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.0/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.0/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.1/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.1/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.2/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.2/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.3/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.3/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.0/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.0/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.1/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.1/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.2/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.2/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.3/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.3/self_attn/MatMul_1]\n",
+      "Removing weight norm...\n",
+      "saving fastspeech2 onnx for male, kannada...\n",
+      "============= Diagnostic Run torch.onnx.export version 2.0.1+cu117 =============\n",
+      "verbose: False, log level: Level.ERROR\n",
+      "======================= 0 NONE 0 NOTE 0 WARNING 0 ERROR ========================\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "WARNING:root:Please consider to run pre-processing before quantization. Refer to example: https://github.com/microsoft/onnxruntime-inference-examples/blob/main/quantization/image_classification/cpu/ReadMe.md \n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.0/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.0/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.1/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.1/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.2/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.2/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.3/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.3/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.0/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.0/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.1/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.1/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.2/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.2/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.3/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.3/self_attn/MatMul_1]\n",
+      "saving vocoder onnx for male-dravidian...\n",
+      "============= Diagnostic Run torch.onnx.export version 2.0.1+cu117 =============\n",
+      "verbose: False, log level: Level.ERROR\n",
+      "======================= 0 NONE 0 NOTE 0 WARNING 0 ERROR ========================\n",
+      "\n",
+      "Removing weight norm...\n",
+      "saving fastspeech2 onnx for male, malayalam...\n",
+      "============= Diagnostic Run torch.onnx.export version 2.0.1+cu117 =============\n",
+      "verbose: False, log level: Level.ERROR\n",
+      "======================= 0 NONE 0 NOTE 0 WARNING 0 ERROR ========================\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "WARNING:root:Please consider to run pre-processing before quantization. Refer to example: https://github.com/microsoft/onnxruntime-inference-examples/blob/main/quantization/image_classification/cpu/ReadMe.md \n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.0/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.0/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.1/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.1/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.2/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.2/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.3/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.3/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.0/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.0/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.1/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.1/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.2/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.2/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.3/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.3/self_attn/MatMul_1]\n",
+      "Removing weight norm...\n",
+      "saving fastspeech2 onnx for male, manipuri...\n",
+      "============= Diagnostic Run torch.onnx.export version 2.0.1+cu117 =============\n",
+      "verbose: False, log level: Level.ERROR\n",
+      "======================= 0 NONE 0 NOTE 0 WARNING 0 ERROR ========================\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "WARNING:root:Please consider to run pre-processing before quantization. Refer to example: https://github.com/microsoft/onnxruntime-inference-examples/blob/main/quantization/image_classification/cpu/ReadMe.md \n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.0/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.0/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.1/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.1/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.2/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.2/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.3/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.3/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.0/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.0/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.1/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.1/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.2/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.2/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.3/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.3/self_attn/MatMul_1]\n",
+      "Removing weight norm...\n",
+      "saving fastspeech2 onnx for male, marathi...\n",
+      "============= Diagnostic Run torch.onnx.export version 2.0.1+cu117 =============\n",
+      "verbose: False, log level: Level.ERROR\n",
+      "======================= 0 NONE 0 NOTE 0 WARNING 0 ERROR ========================\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "WARNING:root:Please consider to run pre-processing before quantization. Refer to example: https://github.com/microsoft/onnxruntime-inference-examples/blob/main/quantization/image_classification/cpu/ReadMe.md \n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.0/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.0/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.1/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.1/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.2/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.2/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.3/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.3/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.0/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.0/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.1/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.1/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.2/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.2/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.3/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.3/self_attn/MatMul_1]\n",
+      "Removing weight norm...\n",
+      "saving fastspeech2 onnx for male, odia...\n",
+      "============= Diagnostic Run torch.onnx.export version 2.0.1+cu117 =============\n",
+      "verbose: False, log level: Level.ERROR\n",
+      "======================= 0 NONE 0 NOTE 0 WARNING 0 ERROR ========================\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "WARNING:root:Please consider to run pre-processing before quantization. Refer to example: https://github.com/microsoft/onnxruntime-inference-examples/blob/main/quantization/image_classification/cpu/ReadMe.md \n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.0/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.0/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.1/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.1/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.2/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.2/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.3/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.3/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.0/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.0/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.1/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.1/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.2/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.2/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.3/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.3/self_attn/MatMul_1]\n",
+      "Removing weight norm...\n",
+      "saving fastspeech2 onnx for male, punjabi...\n",
+      "============= Diagnostic Run torch.onnx.export version 2.0.1+cu117 =============\n",
+      "verbose: False, log level: Level.ERROR\n",
+      "======================= 0 NONE 0 NOTE 0 WARNING 0 ERROR ========================\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "WARNING:root:Please consider to run pre-processing before quantization. Refer to example: https://github.com/microsoft/onnxruntime-inference-examples/blob/main/quantization/image_classification/cpu/ReadMe.md \n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.0/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.0/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.1/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.1/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.2/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.2/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.3/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.3/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.0/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.0/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.1/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.1/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.2/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.2/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.3/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.3/self_attn/MatMul_1]\n",
+      "Removing weight norm...\n",
+      "saving fastspeech2 onnx for male, rajasthani...\n",
+      "============= Diagnostic Run torch.onnx.export version 2.0.1+cu117 =============\n",
+      "verbose: False, log level: Level.ERROR\n",
+      "======================= 0 NONE 0 NOTE 0 WARNING 0 ERROR ========================\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "WARNING:root:Please consider to run pre-processing before quantization. Refer to example: https://github.com/microsoft/onnxruntime-inference-examples/blob/main/quantization/image_classification/cpu/ReadMe.md \n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.0/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.0/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.1/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.1/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.2/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.2/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.3/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.3/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.0/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.0/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.1/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.1/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.2/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.2/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.3/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.3/self_attn/MatMul_1]\n",
+      "Removing weight norm...\n",
+      "saving fastspeech2 onnx for male, tamil...\n",
+      "============= Diagnostic Run torch.onnx.export version 2.0.1+cu117 =============\n",
+      "verbose: False, log level: Level.ERROR\n",
+      "======================= 0 NONE 0 NOTE 0 WARNING 0 ERROR ========================\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "WARNING:root:Please consider to run pre-processing before quantization. Refer to example: https://github.com/microsoft/onnxruntime-inference-examples/blob/main/quantization/image_classification/cpu/ReadMe.md \n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.0/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.0/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.1/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.1/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.2/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.2/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.3/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.3/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.0/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.0/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.1/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.1/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.2/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.2/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.3/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.3/self_attn/MatMul_1]\n",
+      "Removing weight norm...\n",
+      "saving fastspeech2 onnx for male, telugu...\n",
+      "============= Diagnostic Run torch.onnx.export version 2.0.1+cu117 =============\n",
+      "verbose: False, log level: Level.ERROR\n",
+      "======================= 0 NONE 0 NOTE 0 WARNING 0 ERROR ========================\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "WARNING:root:Please consider to run pre-processing before quantization. Refer to example: https://github.com/microsoft/onnxruntime-inference-examples/blob/main/quantization/image_classification/cpu/ReadMe.md \n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.0/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.0/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.1/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.1/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.2/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.2/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.3/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.3/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.0/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.0/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.1/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.1/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.2/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.2/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.3/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.3/self_attn/MatMul_1]\n",
+      "Removing weight norm...\n",
+      "saving fastspeech2 onnx for male, urdu...\n",
+      "============= Diagnostic Run torch.onnx.export version 2.0.1+cu117 =============\n",
+      "verbose: False, log level: Level.ERROR\n",
+      "======================= 0 NONE 0 NOTE 0 WARNING 0 ERROR ========================\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "WARNING:root:Please consider to run pre-processing before quantization. Refer to example: https://github.com/microsoft/onnxruntime-inference-examples/blob/main/quantization/image_classification/cpu/ReadMe.md \n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.0/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.0/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.1/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.1/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.2/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.2/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.3/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.3/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.0/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.0/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.1/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.1/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.2/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.2/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.3/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.3/self_attn/MatMul_1]\n",
+      "Removing weight norm...\n",
+      "saving fastspeech2 onnx for female, assamese...\n",
+      "============= Diagnostic Run torch.onnx.export version 2.0.1+cu117 =============\n",
+      "verbose: False, log level: Level.ERROR\n",
+      "======================= 0 NONE 0 NOTE 0 WARNING 0 ERROR ========================\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "WARNING:root:Please consider to run pre-processing before quantization. Refer to example: https://github.com/microsoft/onnxruntime-inference-examples/blob/main/quantization/image_classification/cpu/ReadMe.md \n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.0/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.0/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.1/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.1/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.2/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.2/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.3/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.3/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.0/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.0/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.1/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.1/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.2/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.2/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.3/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.3/self_attn/MatMul_1]\n",
+      "saving vocoder onnx for female-aryan...\n",
+      "============= Diagnostic Run torch.onnx.export version 2.0.1+cu117 =============\n",
+      "verbose: False, log level: Level.ERROR\n",
+      "======================= 0 NONE 0 NOTE 0 WARNING 0 ERROR ========================\n",
+      "\n",
+      "Removing weight norm...\n",
+      "saving fastspeech2 onnx for female, bengali...\n",
+      "============= Diagnostic Run torch.onnx.export version 2.0.1+cu117 =============\n",
+      "verbose: False, log level: Level.ERROR\n",
+      "======================= 0 NONE 0 NOTE 0 WARNING 0 ERROR ========================\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "WARNING:root:Please consider to run pre-processing before quantization. Refer to example: https://github.com/microsoft/onnxruntime-inference-examples/blob/main/quantization/image_classification/cpu/ReadMe.md \n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.0/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.0/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.1/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.1/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.2/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.2/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.3/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.3/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.0/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.0/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.1/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.1/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.2/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.2/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.3/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.3/self_attn/MatMul_1]\n",
+      "Removing weight norm...\n",
+      "saving fastspeech2 onnx for female, bodo...\n",
+      "============= Diagnostic Run torch.onnx.export version 2.0.1+cu117 =============\n",
+      "verbose: False, log level: Level.ERROR\n",
+      "======================= 0 NONE 0 NOTE 0 WARNING 0 ERROR ========================\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "WARNING:root:Please consider to run pre-processing before quantization. Refer to example: https://github.com/microsoft/onnxruntime-inference-examples/blob/main/quantization/image_classification/cpu/ReadMe.md \n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.0/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.0/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.1/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.1/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.2/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.2/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.3/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.3/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.0/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.0/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.1/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.1/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.2/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.2/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.3/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.3/self_attn/MatMul_1]\n",
+      "Removing weight norm...\n",
+      "saving fastspeech2 onnx for female, english...\n",
+      "============= Diagnostic Run torch.onnx.export version 2.0.1+cu117 =============\n",
+      "verbose: False, log level: Level.ERROR\n",
+      "======================= 0 NONE 0 NOTE 0 WARNING 0 ERROR ========================\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "WARNING:root:Please consider to run pre-processing before quantization. Refer to example: https://github.com/microsoft/onnxruntime-inference-examples/blob/main/quantization/image_classification/cpu/ReadMe.md \n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.0/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.0/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.1/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.1/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.2/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.2/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.3/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.3/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.0/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.0/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.1/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.1/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.2/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.2/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.3/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.3/self_attn/MatMul_1]\n",
+      "Removing weight norm...\n",
+      "saving fastspeech2 onnx for female, gujarati...\n",
+      "============= Diagnostic Run torch.onnx.export version 2.0.1+cu117 =============\n",
+      "verbose: False, log level: Level.ERROR\n",
+      "======================= 0 NONE 0 NOTE 0 WARNING 0 ERROR ========================\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "WARNING:root:Please consider to run pre-processing before quantization. Refer to example: https://github.com/microsoft/onnxruntime-inference-examples/blob/main/quantization/image_classification/cpu/ReadMe.md \n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.0/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.0/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.1/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.1/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.2/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.2/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.3/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.3/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.0/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.0/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.1/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.1/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.2/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.2/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.3/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.3/self_attn/MatMul_1]\n",
+      "Removing weight norm...\n",
+      "saving fastspeech2 onnx for female, hindi...\n",
+      "============= Diagnostic Run torch.onnx.export version 2.0.1+cu117 =============\n",
+      "verbose: False, log level: Level.ERROR\n",
+      "======================= 0 NONE 0 NOTE 0 WARNING 0 ERROR ========================\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "WARNING:root:Please consider to run pre-processing before quantization. Refer to example: https://github.com/microsoft/onnxruntime-inference-examples/blob/main/quantization/image_classification/cpu/ReadMe.md \n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.0/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.0/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.1/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.1/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.2/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.2/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.3/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.3/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.0/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.0/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.1/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.1/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.2/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.2/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.3/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.3/self_attn/MatMul_1]\n",
+      "Removing weight norm...\n",
+      "saving fastspeech2 onnx for female, kannada...\n",
+      "============= Diagnostic Run torch.onnx.export version 2.0.1+cu117 =============\n",
+      "verbose: False, log level: Level.ERROR\n",
+      "======================= 0 NONE 0 NOTE 0 WARNING 0 ERROR ========================\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "WARNING:root:Please consider to run pre-processing before quantization. Refer to example: https://github.com/microsoft/onnxruntime-inference-examples/blob/main/quantization/image_classification/cpu/ReadMe.md \n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.0/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.0/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.1/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.1/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.2/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.2/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.3/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.3/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.0/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.0/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.1/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.1/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.2/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.2/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.3/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.3/self_attn/MatMul_1]\n",
+      "saving vocoder onnx for female-dravidian...\n",
+      "============= Diagnostic Run torch.onnx.export version 2.0.1+cu117 =============\n",
+      "verbose: False, log level: Level.ERROR\n",
+      "======================= 0 NONE 0 NOTE 0 WARNING 0 ERROR ========================\n",
+      "\n",
+      "Removing weight norm...\n",
+      "saving fastspeech2 onnx for female, malayalam...\n",
+      "============= Diagnostic Run torch.onnx.export version 2.0.1+cu117 =============\n",
+      "verbose: False, log level: Level.ERROR\n",
+      "======================= 0 NONE 0 NOTE 0 WARNING 0 ERROR ========================\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "WARNING:root:Please consider to run pre-processing before quantization. Refer to example: https://github.com/microsoft/onnxruntime-inference-examples/blob/main/quantization/image_classification/cpu/ReadMe.md \n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.0/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.0/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.1/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.1/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.2/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.2/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.3/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.3/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.0/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.0/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.1/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.1/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.2/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.2/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.3/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.3/self_attn/MatMul_1]\n",
+      "Removing weight norm...\n",
+      "saving fastspeech2 onnx for female, manipuri...\n",
+      "============= Diagnostic Run torch.onnx.export version 2.0.1+cu117 =============\n",
+      "verbose: False, log level: Level.ERROR\n",
+      "======================= 0 NONE 0 NOTE 0 WARNING 0 ERROR ========================\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "WARNING:root:Please consider to run pre-processing before quantization. Refer to example: https://github.com/microsoft/onnxruntime-inference-examples/blob/main/quantization/image_classification/cpu/ReadMe.md \n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.0/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.0/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.1/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.1/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.2/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.2/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.3/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.3/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.0/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.0/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.1/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.1/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.2/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.2/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.3/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.3/self_attn/MatMul_1]\n",
+      "Removing weight norm...\n",
+      "saving fastspeech2 onnx for female, marathi...\n",
+      "============= Diagnostic Run torch.onnx.export version 2.0.1+cu117 =============\n",
+      "verbose: False, log level: Level.ERROR\n",
+      "======================= 0 NONE 0 NOTE 0 WARNING 0 ERROR ========================\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "WARNING:root:Please consider to run pre-processing before quantization. Refer to example: https://github.com/microsoft/onnxruntime-inference-examples/blob/main/quantization/image_classification/cpu/ReadMe.md \n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.0/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.0/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.1/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.1/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.2/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.2/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.3/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.3/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.0/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.0/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.1/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.1/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.2/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.2/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.3/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.3/self_attn/MatMul_1]\n",
+      "Removing weight norm...\n",
+      "saving fastspeech2 onnx for female, odia...\n",
+      "============= Diagnostic Run torch.onnx.export version 2.0.1+cu117 =============\n",
+      "verbose: False, log level: Level.ERROR\n",
+      "======================= 0 NONE 0 NOTE 0 WARNING 0 ERROR ========================\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "WARNING:root:Please consider to run pre-processing before quantization. Refer to example: https://github.com/microsoft/onnxruntime-inference-examples/blob/main/quantization/image_classification/cpu/ReadMe.md \n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.0/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.0/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.1/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.1/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.2/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.2/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.3/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.3/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.0/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.0/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.1/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.1/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.2/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.2/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.3/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.3/self_attn/MatMul_1]\n",
+      "Removing weight norm...\n",
+      "saving fastspeech2 onnx for female, punjabi...\n",
+      "============= Diagnostic Run torch.onnx.export version 2.0.1+cu117 =============\n",
+      "verbose: False, log level: Level.ERROR\n",
+      "======================= 0 NONE 0 NOTE 0 WARNING 0 ERROR ========================\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "WARNING:root:Please consider to run pre-processing before quantization. Refer to example: https://github.com/microsoft/onnxruntime-inference-examples/blob/main/quantization/image_classification/cpu/ReadMe.md \n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.0/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.0/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.1/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.1/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.2/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.2/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.3/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.3/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.0/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.0/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.1/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.1/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.2/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.2/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.3/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.3/self_attn/MatMul_1]\n",
+      "Removing weight norm...\n",
+      "saving fastspeech2 onnx for female, rajasthani...\n",
+      "============= Diagnostic Run torch.onnx.export version 2.0.1+cu117 =============\n",
+      "verbose: False, log level: Level.ERROR\n",
+      "======================= 0 NONE 0 NOTE 0 WARNING 0 ERROR ========================\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "WARNING:root:Please consider to run pre-processing before quantization. Refer to example: https://github.com/microsoft/onnxruntime-inference-examples/blob/main/quantization/image_classification/cpu/ReadMe.md \n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.0/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.0/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.1/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.1/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.2/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.2/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.3/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.3/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.0/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.0/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.1/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.1/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.2/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.2/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.3/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.3/self_attn/MatMul_1]\n",
+      "Removing weight norm...\n",
+      "saving fastspeech2 onnx for female, tamil...\n",
+      "============= Diagnostic Run torch.onnx.export version 2.0.1+cu117 =============\n",
+      "verbose: False, log level: Level.ERROR\n",
+      "======================= 0 NONE 0 NOTE 0 WARNING 0 ERROR ========================\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "WARNING:root:Please consider to run pre-processing before quantization. Refer to example: https://github.com/microsoft/onnxruntime-inference-examples/blob/main/quantization/image_classification/cpu/ReadMe.md \n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.0/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.0/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.1/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.1/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.2/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.2/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.3/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.3/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.0/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.0/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.1/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.1/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.2/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.2/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.3/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.3/self_attn/MatMul_1]\n",
+      "Removing weight norm...\n",
+      "saving fastspeech2 onnx for female, telugu...\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/home/builder/conda/envs/text2phone/lib/python3.10/site-packages/torch/onnx/_internal/jit_utils.py:306: UserWarning: Constant folding - Only steps=1 can be constant folded for opset >= 10 onnx::Slice op. Constant folding not applied. (Triggered internally at ../torch/csrc/jit/passes/onnx/constant_fold.cpp:179.)\n",
+      "  _C._jit_pass_onnx_node_shape_type_inference(node, params_dict, opset_version)\n",
+      "/home/builder/conda/envs/text2phone/lib/python3.10/site-packages/torch/onnx/utils.py:689: UserWarning: Constant folding - Only steps=1 can be constant folded for opset >= 10 onnx::Slice op. Constant folding not applied. (Triggered internally at ../torch/csrc/jit/passes/onnx/constant_fold.cpp:179.)\n",
+      "  _C._jit_pass_onnx_graph_shape_type_inference(\n",
+      "/home/builder/conda/envs/text2phone/lib/python3.10/site-packages/torch/onnx/utils.py:1186: UserWarning: Constant folding - Only steps=1 can be constant folded for opset >= 10 onnx::Slice op. Constant folding not applied. (Triggered internally at ../torch/csrc/jit/passes/onnx/constant_fold.cpp:179.)\n",
+      "  _C._jit_pass_onnx_graph_shape_type_inference(\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "============= Diagnostic Run torch.onnx.export version 2.0.1+cu117 =============\n",
+      "verbose: False, log level: Level.ERROR\n",
+      "======================= 0 NONE 0 NOTE 0 WARNING 0 ERROR ========================\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "WARNING:root:Please consider to run pre-processing before quantization. Refer to example: https://github.com/microsoft/onnxruntime-inference-examples/blob/main/quantization/image_classification/cpu/ReadMe.md \n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.0/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.0/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.1/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.1/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.2/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.2/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.3/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.3/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.0/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.0/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.1/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.1/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.2/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.2/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.3/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.3/self_attn/MatMul_1]\n",
+      "Removing weight norm...\n",
+      "saving fastspeech2 onnx for female, urdu...\n",
+      "============= Diagnostic Run torch.onnx.export version 2.0.1+cu117 =============\n",
+      "verbose: False, log level: Level.ERROR\n",
+      "======================= 0 NONE 0 NOTE 0 WARNING 0 ERROR ========================\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "WARNING:root:Please consider to run pre-processing before quantization. Refer to example: https://github.com/microsoft/onnxruntime-inference-examples/blob/main/quantization/image_classification/cpu/ReadMe.md \n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.0/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.0/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.1/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.1/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.2/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.2/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.3/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/encoder/encoders/encoders.3/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.0/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.0/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.1/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.1/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.2/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.2/self_attn/MatMul_1]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.3/self_attn/MatMul]\n",
+      "Ignore MatMul due to non constant B: /[/decoder/encoders/encoders.3/self_attn/MatMul_1]\n"
+     ]
+    }
+   ],
+   "source": [
+    "def convert_fastspeech2_model(language, gender):\n",
+    "    model = load_fastspeech2_model_pytorch(language, gender, 'cpu')\n",
+    "    os.chdir(ONNX_MODELS_HOME);TTSModelExport('text2phone').export(model, f'{language}-{gender}-ort', quantize=True);os.chdir(FASTSPEECH2_HOME)\n",
+    "    return model\n",
+    "def load_fastspeech2_model_pytorch(language, gender, device):\n",
+    "    with open(f\"{language}/{gender}/model/config.yaml\", \"r\") as file: config = yaml.safe_load(file)\n",
+    "    current_working_directory, feat, pitch, energy = os.getcwd(), \"model/feats_stats.npz\", \"model/pitch_stats.npz\", \"model/energy_stats.npz\"\n",
+    "    feat_path, pitch_path, energy_path = os.path.join(current_working_directory, language, gender, feat), os.path.join(current_working_directory, language, gender, pitch), os.path.join(current_working_directory ,language, gender, energy)\n",
+    "    config[\"normalize_conf\"][\"stats_file\"], config[\"pitch_normalize_conf\"][\"stats_file\"], config[\"energy_normalize_conf\"][\"stats_file\"] = feat_path, pitch_path, energy_path\n",
+    "    with open(f\"{language}/{gender}/model/config.yaml\", \"w\") as file: yaml.dump(config, file)\n",
+    "    tts_model, tts_config = f\"{language}/{gender}/model/model.pth\", f\"{language}/{gender}/model/config.yaml\"\n",
+    "    return Text2Speech(train_config=tts_config, model_file=tts_model, device=device)\n",
+    "def load_hifigan_vocoder_pytorch(gender, family, device):\n",
+    "    vocoder_config, vocoder_generator = f\"vocoder/{gender}/{family}/hifigan/config.json\", f\"vocoder/{gender}/{family}/hifigan/generator\"\n",
+    "    with open(vocoder_config, 'r') as f: h = AttrDict(json.load(f))\n",
+    "    torch.manual_seed(h.seed)\n",
+    "    generator, state_dict_g = Generator(h).to(torch.device(device)), torch.load(vocoder_generator, torch.device(device))\n",
+    "    generator.load_state_dict(state_dict_g['generator'])\n",
+    "    generator.eval()\n",
+    "    generator.remove_weight_norm()\n",
+    "    return generator\n",
+    "for gender in ['male', 'female']:\n",
+    "    for language, family in [('assamese', 'aryan'), ('bengali', 'aryan'), ('bodo', 'aryan'), ('english', 'aryan'), ('gujarati', 'aryan'), ('hindi', 'aryan'), ('kannada', 'dravidian'), ('malayalam', 'dravidian'), ('manipuri', 'aryan'), ('marathi', 'aryan'), ('odia', 'aryan'), ('punjabi', 'aryan'), ('rajasthani', 'aryan'), ('tamil', 'dravidian'), ('telugu', 'dravidian'), ('urdu', 'aryan')]:\n",
+    "        vocoder = load_hifigan_vocoder_pytorch(gender, family, 'cpu')\n",
+    "        print(f'saving fastspeech2 onnx for {gender}, {language}...')\n",
+    "        if language == \"urdu\" or language == \"punjabi\": preprocessor = CharTextPreprocessor()\n",
+    "        elif language == \"english\": preprocessor = TTSPreprocessor()\n",
+    "        else: preprocessor = TTSDurAlignPreprocessor()\n",
+    "        sample_text = \" \".join(preprocessor.preprocess(\"this is a sentence\", language, gender)[0])\n",
+    "        try:\n",
+    "            model_out = (convert_fastspeech2_model(language, gender)(sample_text)[\"feat_gen_denorm\"].T.unsqueeze(0) * 2.3262).to('cpu')\n",
+    "            if not os.path.exists(f'{ONNX_MODELS_HOME}/vocoders/{gender}-{family}-vocoder.onnx'):\n",
+    "                print(f'saving vocoder onnx for {gender}-{family}...')\n",
+    "                torch.onnx.export(vocoder, model_out, f'{ONNX_MODELS_HOME}/vocoders/{gender}-{family}-vocoder.onnx', input_names=['input'], output_names=['output'], dynamic_axes={'input': [0, 2], 'output': [0]})\n",
+    "        except FileNotFoundError: pass\n",
+    "for f in Path('~/.cache/espnet_onnx').rglob('full'): shutil.rmtree(f)\n",
+    "!mv ~/.cache/espnet_onnx/tag_config.yaml $ONNX_MODELS_HOME/text2phone"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.6"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
--- a/notebooks/orig-inference.ipynb
+++ b/notebooks/orig-inference.ipynb
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Loading G2P model... Done!\n",
+      "Loading G2P model... Done!\n",
+      "Loading G2P model... Done!\n",
+      "Loading G2P model... Done!\n",
+      "Loading G2P model... Done!\n",
+      "Loading G2P model... Done!\n",
+      "Loading G2P model... Done!\n",
+      "Loading G2P model... Done!\n",
+      "Loading G2P model... Done!\n",
+      "Loading G2P model... Done!\n",
+      "Loading G2P model... Done!\n"
+     ]
+    }
+   ],
+   "source": [
+    "import os\n",
+    "import json\n",
+    "import yaml\n",
+    "import numpy\n",
+    "import torch\n",
+    "!mkdir -p ../wavs\n",
+    "import onnxruntime\n",
+    "from sys import path\n",
+    "from tqdm import tqdm\n",
+    "SAMPLING_RATE = 22050\n",
+    "os.chdir('../Fastspeech2_HS')\n",
+    "path.append(\"hifigan\")\n",
+    "from env import AttrDict\n",
+    "from models import Generator\n",
+    "from IPython.display import Audio\n",
+    "from scipy.io.wavfile import write\n",
+    "from meldataset import MAX_WAV_VALUE\n",
+    "from espnet_onnx.export import TTSModelExport\n",
+    "from espnet2.bin.tts_inference import Text2Speech\n",
+    "from espnet_onnx import Text2Speech as Text2SpeechInference\n",
+    "from text_preprocess_for_inference import TTSDurAlignPreprocessor, CharTextPreprocessor, TTSPreprocessor"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Original Inference\n",
+    "* uses the environment defined in Fastspeech2_HS repo"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def load_hifigan_vocoder(language, gender, family, device):\n",
+    "    vocoder_config = f\"vocoder/{gender}/{family}/hifigan/config.json\"\n",
+    "    vocoder_generator = f\"vocoder/{gender}/{family}/hifigan/generator\"\n",
+    "    with open(vocoder_config, 'r') as f: json_config = json.load(f)\n",
+    "    h = AttrDict(json_config)\n",
+    "    torch.manual_seed(h.seed)\n",
+    "    device = torch.device(device)\n",
+    "    generator = Generator(h).to(device)\n",
+    "    state_dict_g = torch.load(vocoder_generator, device)\n",
+    "    generator.load_state_dict(state_dict_g['generator'])\n",
+    "    generator.eval()\n",
+    "    generator.remove_weight_norm()\n",
+    "    return generator\n",
+    "def load_fastspeech2_model(language, gender, device):\n",
+    "    with open(f\"{language}/{gender}/model/config.yaml\", \"r\") as file: config = yaml.safe_load(file)\n",
+    "    current_working_directory = os.getcwd()\n",
+    "    feat = \"model/feats_stats.npz\"\n",
+    "    pitch = \"model/pitch_stats.npz\"\n",
+    "    energy = \"model/energy_stats.npz\"\n",
+    "    feat_path = os.path.join(current_working_directory, language, gender, feat)\n",
+    "    pitch_path = os.path.join(current_working_directory, language, gender, pitch)\n",
+    "    energy_path = os.path.join(current_working_directory, language, gender, energy)\n",
+    "    config[\"normalize_conf\"][\"stats_file\"] = feat_path\n",
+    "    config[\"pitch_normalize_conf\"][\"stats_file\"] = pitch_path\n",
+    "    config[\"energy_normalize_conf\"][\"stats_file\"] = energy_path\n",
+    "    with open(f\"{language}/{gender}/model/config.yaml\", \"w\") as file: yaml.dump(config, file)\n",
+    "    tts_model = f\"{language}/{gender}/model/model.pth\"\n",
+    "    tts_config = f\"{language}/{gender}/model/config.yaml\"\n",
+    "    return Text2Speech(train_config=tts_config, model_file=tts_model, device=device)\n",
+    "def text_synthesis(language, gender, sample_text, vocoder, MAX_WAV_VALUE, device):\n",
+    "    with torch.no_grad():\n",
+    "        model = load_fastspeech2_model(language, gender, device)\n",
+    "        out = model(sample_text, decode_conf={\"alpha\": 1})\n",
+    "        x = out[\"feat_gen_denorm\"].T.unsqueeze(0) * 2.3262\n",
+    "        x = x.to(device)\n",
+    "        y_g_hat = vocoder(x)\n",
+    "        audio = y_g_hat.squeeze()\n",
+    "        audio = audio * MAX_WAV_VALUE\n",
+    "        audio = audio.cpu().numpy().astype('int16')\n",
+    "        return audio\n",
+    "def text2speech(language, gender, family, sample_text, device):\n",
+    "    vocoder = load_hifigan_vocoder(language, gender, family, device)\n",
+    "    if language == \"urdu\" or language == \"punjabi\": preprocessor = CharTextPreprocessor()\n",
+    "    elif language == \"english\": preprocessor = TTSPreprocessor()\n",
+    "    else: preprocessor = TTSDurAlignPreprocessor()\n",
+    "    preprocessed_text, phrases = preprocessor.preprocess(sample_text, language, gender)\n",
+    "    preprocessed_text = \" \".join(preprocessed_text)\n",
+    "    audio = text_synthesis(language, gender, preprocessed_text, vocoder, MAX_WAV_VALUE, device)\n",
+    "    output_file = f\"../wavs/{language}_{gender}-{family}_orig_output.wav\"\n",
+    "    write(output_file, SAMPLING_RATE, audio)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Original Inference Results"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Removing weight norm...\n",
+      "length: 26624 array: [945 894 605 ...  10  12  30]\n",
+      "abssum: 42271788 min: -16131 max: 10878\n"
+     ]
+    }
+   ],
+   "source": [
+    "audio_orig = text_synthesis('english', 'male', 'this is a sentence', load_hifigan_vocoder('english', 'male', 'aryan', 'cpu'), MAX_WAV_VALUE, 'cpu')\n",
+    "print('length:', len(audio_orig), 'array:', audio_orig)\n",
+    "print('abssum:', numpy.abs(audio_orig).sum(), 'min:', audio_orig.min(), 'max:', audio_orig.max())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Removing weight norm...\n",
+      "length: 26624 array: [945 894 605 ...  10  12  30]\n",
+      "abssum: 42271796 min: -16131 max: 10878\n"
+     ]
+    }
+   ],
+   "source": [
+    "audio_orig = text_synthesis('english', 'male', 'this is a sentence', load_hifigan_vocoder('english', 'male', 'aryan', 'cuda'), MAX_WAV_VALUE, 'cuda')\n",
+    "print('length:', len(audio_orig), 'array:', audio_orig)\n",
+    "print('abssum:', numpy.abs(audio_orig).sum(), 'min:', audio_orig.min(), 'max:', audio_orig.max())"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Latest Pytorch Inference Results\n",
+    "* similar environment as defined in scripts/perform_onnx_conversion"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/home/nikhilesh/miniforge3/envs/iitm-tts-latest-pytorch/lib/python3.10/site-packages/torch/nn/utils/weight_norm.py:30: UserWarning: torch.nn.utils.weight_norm is deprecated in favor of torch.nn.utils.parametrizations.weight_norm.\n",
+      "  warnings.warn(\"torch.nn.utils.weight_norm is deprecated in favor of torch.nn.utils.parametrizations.weight_norm.\")\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Removing weight norm...\n",
+      "length: 26624 array: [945 894 605 ...  10  12  30]\n",
+      "abssum: 42271783 min: -16131 max: 10878\n"
+     ]
+    }
+   ],
+   "source": [
+    "audio_orig = text_synthesis('english', 'male', 'this is a sentence', load_hifigan_vocoder('english', 'male', 'aryan', 'cpu'), MAX_WAV_VALUE, 'cpu')\n",
+    "print('length:', len(audio_orig), 'array:', audio_orig)\n",
+    "print('abssum:', numpy.abs(audio_orig).sum(), 'min:', audio_orig.min(), 'max:', audio_orig.max())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "audio_orig = text_synthesis('english', 'male', 'this is a sentence', load_hifigan_vocoder('english', 'male', 'aryan', 'cuda'), MAX_WAV_VALUE, 'cuda')\n",
+    "print('length:', len(audio_orig), 'array:', audio_orig)\n",
+    "print('abssum:', numpy.abs(audio_orig).sum(), 'min:', audio_orig.min(), 'max:', audio_orig.max())"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "ORT Conversion"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "!mkdir -p ../ort_models\n",
+    "def convert_to_ort(language, gender, family):\n",
+    "    vocoder = load_hifigan_vocoder(language, gender, family, 'cpu')\n",
+    "    model = load_fastspeech2_model(language, gender, 'cpu')\n",
+    "    if language == \"urdu\" or language == \"punjabi\": preprocessor = CharTextPreprocessor()\n",
+    "    elif language == \"english\": preprocessor = TTSPreprocessor()\n",
+    "    else: preprocessor = TTSDurAlignPreprocessor()\n",
+    "    preprocessed_text, phrases = preprocessor.preprocess('this is a sentence', language, gender)\n",
+    "    preprocessed_text = \" \".join(preprocessed_text)\n",
+    "    exporter = TTSModelExport()\n",
+    "    exporter.export(model, f'{language}-{gender}-ort', quantize=False)\n",
+    "    out = model(preprocessed_text, decode_conf={\"alpha\": 1})\n",
+    "    x = out[\"feat_gen_denorm\"].T.unsqueeze(0) * 2.3262\n",
+    "    torch.onnx.export(vocoder, x, f'../ort_models/vocoders/{gender}-{family}-vocoder.onnx', input_names=['input'], output_names=['output'], dynamic_axes={'input': [0, 2], 'output': [0]})"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "convert_to_ort('english', 'male', 'aryan')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "ORT Inference\n",
+    "* environment as defined in triton_models/tts/envbuilder.sh\n",
+    "* you can delete the ort_models folder"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def load_hifigan_vocoder(language, gender, family, device): return onnxruntime.InferenceSession(f\"../ort_models/vocoders/{gender}-{family}-vocoder.onnx\", providers=['CPUExecutionProvider' if device == 'cpu' else 'CUDAExecutionProvider'])\n",
+    "def load_fastspeech2_model(language, gender, device): return Text2SpeechInference(f'{language}-{gender}-ort', providers=['CPUExecutionProvider' if device == 'cpu' else 'CUDAExecutionProvider'])\n",
+    "def text_synthesis(language, gender, sample_text, vocoder, MAX_WAV_VALUE, device):\n",
+    "    model = load_fastspeech2_model(language, gender, device)\n",
+    "    x = numpy.expand_dims(model.postprocess(model.tts_model(model.preprocess.token_id_converter.tokens2ids(model.preprocess.tokenizer.text2tokens(sample_text)))['feat_gen']).T, axis=0) * 2.3262\n",
+    "    y_g_hat = vocoder.run(None, {'input': x})[0]\n",
+    "    audio = y_g_hat.squeeze()\n",
+    "    audio = audio * MAX_WAV_VALUE\n",
+    "    audio = audio.astype('int16')\n",
+    "    return audio\n",
+    "def text2speech(language, gender, family, sample_text, device):\n",
+    "    vocoder = load_hifigan_vocoder(language, gender, family, device)\n",
+    "    if language == \"urdu\" or language == \"punjabi\": preprocessor = CharTextPreprocessor()\n",
+    "    elif language == \"english\": preprocessor = TTSPreprocessor()\n",
+    "    else: preprocessor = TTSDurAlignPreprocessor()\n",
+    "    preprocessed_text, phrases = preprocessor.preprocess(sample_text, language, gender)\n",
+    "    preprocessed_text = \" \".join(preprocessed_text)\n",
+    "    audio = text_synthesis(language, gender, preprocessed_text, vocoder, MAX_WAV_VALUE, device)\n",
+    "    output_file = f\"../wavs/{language}_{gender}-{family}_ort_output.wav\"\n",
+    "    write(output_file, SAMPLING_RATE, audio)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "ORT Inference Results"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "audio_orig = text_synthesis('english', 'male', 'this', load_hifigan_vocoder('english', 'male', 'aryan', 'cpu'), MAX_WAV_VALUE, 'cpu')\n",
+    "print('length:', len(audio_orig), 'array:', audio_orig)\n",
+    "print('abssum:', numpy.abs(audio_orig).sum(), 'min:', audio_orig.min(), 'max:', audio_orig.max())"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "iitm-tts",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.6"
+  },
+  "orig_nbformat": 4
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
--- a/notebooks/triton_inference.ipynb
+++ b/notebooks/triton_inference.ipynb
--- a/patches/espnet_onnx.patch
+++ b/patches/espnet_onnx.patch
+diff --git a/espnet_onnx/tts/abs_tts_model.py b/espnet_onnx/tts/abs_tts_model.py
+index 591947a..9de64a5 100644
+--- a/espnet_onnx/tts/abs_tts_model.py
+++ b/espnet_onnx/tts/abs_tts_model.py
+@@ -86,20 +86,20 @@ class AbsTTSModel(AbsModel):
+         self._build_normalizer()
+         self._build_vocoder(providers, use_quantized)
+-    def _check_ort_version(self, providers: List[str]):
+    def _check_ort_version(self, providers: List):
+         # check cpu
+         if (
+             onnxruntime.get_device() == "CPU"
+             and "CPUExecutionProvider" not in providers
+-        ):
+-            raise RuntimeError(
+-                "If you want to use GPU, then follow `How to use GPU on espnet_onnx` chapter in readme to install onnxruntime-gpu."
+-            )
+        ): pass
+#            raise RuntimeError(
+#                "If you want to use GPU, then follow `How to use GPU on espnet_onnx` chapter in readme to install onnxruntime-gpu."
+#            )
+         # check GPU
+-        if onnxruntime.get_device() == "GPU" and providers == ["CPUExecutionProvider"]:
+-            warnings.warn(
+-                "Inference will be executed on the CPU. Please provide gpu providers. Read `How to use GPU on espnet_onnx` in readme in detail."
+-            )
+        if onnxruntime.get_device() == "GPU" and providers == ["CPUExecutionProvider"]: pass
+#            warnings.warn(
+#                "Inference will be executed on the CPU. Please provide gpu providers. Read `How to use GPU on espnet_onnx` in readme in detail."
+#            )
+-        logging.info(f'Providers [{" ,".join(providers)}] detected.')
+#        logging.info(f'Providers [{" ,".join(providers)}] detected.')
+diff --git a/espnet_onnx/tts/tts_model.py b/espnet_onnx/tts/tts_model.py
+index 78023f5..de4ebba 100644
+--- a/espnet_onnx/tts/tts_model.py
+++ b/espnet_onnx/tts/tts_model.py
+@@ -14,7 +14,7 @@ class Text2Speech(AbsTTSModel):
+         self,
+         tag_name: str = None,
+         model_dir: Union[Path, str] = None,
+-        providers: List[str] = ["CPUExecutionProvider"],
+        providers: List = ["CPUExecutionProvider"],
+         use_quantized: bool = False,
+     ):
+         assert check_argument_types()
+diff --git a/espnet_onnx/utils/abs_model.py b/espnet_onnx/utils/abs_model.py
+index 1270468..4aa63c6 100644
+--- a/espnet_onnx/utils/abs_model.py
+++ b/espnet_onnx/utils/abs_model.py
+@@ -46,23 +46,23 @@ class AbsModel(ABC):
+     def _build_model(self, providers, use_quantized):
+         raise NotImplementedError
+-    def _check_ort_version(self, providers: List[str]):
+    def _check_ort_version(self, providers: List):
+         # check cpu
+         if (
+             onnxruntime.get_device() == "CPU"
+             and "CPUExecutionProvider" not in providers
+-        ):
+-            raise RuntimeError(
+-                "If you want to use GPU, then follow `How to use GPU on espnet_onnx` chapter in readme to install onnxruntime-gpu."
+-            )
+        ): pass
+#            raise RuntimeError(
+#                "If you want to use GPU, then follow `How to use GPU on espnet_onnx` chapter in readme to install onnxruntime-gpu."
+#            )
+         # check GPU
+-        if onnxruntime.get_device() == "GPU" and providers == ["CPUExecutionProvider"]:
+-            warnings.warn(
+-                "Inference will be executed on the CPU. Please provide gpu providers. Read `How to use GPU on espnet_onnx` in readme in detail."
+-            )
+        if onnxruntime.get_device() == "GPU" and providers == ["CPUExecutionProvider"]: pass
+#            warnings.warn(
+#                "Inference will be executed on the CPU. Please provide gpu providers. Read `How to use GPU on espnet_onnx` in readme in detail."
+#            )
+-        logging.info(f'Providers [{" ,".join(providers)}] detected.')
+#        logging.info(f'Providers [{" ,".join(providers)}] detected.')
+ class AbsExportModel(ABC):
+diff --git a/setup.py b/setup.py
+index 483b062..ee37d37 100644
+--- a/setup.py
+++ b/setup.py
+@@ -4,9 +4,9 @@ requirements = {
+     "install": [
+         "setuptools>=38.5.1",
+         "librosa>=0.8.0",
+-        "onnxruntime",
+        "onnxruntime-gpu",
+         "sentencepiece>=0.1.91,!=0.1.92",
+-        "typeguard==2.13.0",
+        "typeguard==2.13.3",
+         "PyYAML>=5.1.2",
+         "g2p-en",
+         "jamo==0.4.1",  # For kss
--- a/patches/fastspeech2.patch
+++ b/patches/fastspeech2.patch
+diff --git a/text_preprocess_for_inference.py b/text_preprocess_for_inference.py
+index ccca511..2191ebb 100644
+--- a/text_preprocess_for_inference.py
+++ b/text_preprocess_for_inference.py
+@@ -3,6 +3,8 @@ TTS Preprocessing
+ Developed by Arun Kumar A(CS20S013) - November 2022
+ Code Changes by Utkarsh - 2023
+ '''
+import locale
+locale.setlocale(locale.LC_ALL, 'C.UTF-8')
+ import os
+ import re
+ import json
+@@ -40,14 +42,14 @@ def add_to_dictionary(dict_to_add, dict_file):
+             df_temp = pd.read_csv(temp_dict_file, delimiter=" ", header=None, dtype=str)
+             if len(df_temp) > len(df_orig):
+                 os.rename(temp_dict_file, dict_file)
+-                print(f"{len(dict_to_add)} new words appended to Dictionary: {dict_file}")
+                # print(f"{len(dict_to_add)} new words appended to Dictionary: {dict_file}")
+         except:
+             print(traceback.format_exc())
+     else:
+         # create a new dictionary
+         with open(dict_file, "a") as f:
+             f.write(append_string)
+-        print(f"New Dictionary: {dict_file} created with {len(dict_to_add)} words")
+        # print(f"New Dictionary: {dict_file} created with {len(dict_to_add)} words")
+ class TextCleaner:
+@@ -104,7 +106,7 @@ class Phonifier:
+             except Exception as e:
+                 print(traceback.format_exc())
+-        print("Phone dictionary loaded for the following languages:", list(self.phone_dictionary.keys()))
+        # print("Phone dictionary loaded for the following languages:", list(self.phone_dictionary.keys()))
+         self.g2p = G2p()
+         print('Loading G2P model... Done!')
+@@ -315,7 +317,7 @@ class Phonifier:
+                     #print('INSIDE IF CONDITION OF ADDING WORDS')
+         else:
+             non_dict_words = words
+-        print(f"word not in dict: {non_dict_words}")
+        # print(f"word not in dict: {non_dict_words}")
+         if len(non_dict_words) > 0:
+             # unified parser has to be run for the non dictionary words
+@@ -335,7 +337,7 @@ class Phonifier:
+                     phn_out_dict[non_dict_words[i]] = self.en_g2p(non_dict_words[i])
+                 # Create a string representation of the dictionary
+                 data_str = "\n".join([f"{key}\t{value}" for key, value in phn_out_dict.items()])
+-                print(f"data_str: {data_str}")
+                # print(f"data_str: {data_str}")
+                 with open(out_dict_file, "w") as f:
+                     f.write(data_str)
+             else:
+@@ -358,7 +360,7 @@ class Phonifier:
+                     for original_word, formatted_word in zip(non_dict_words, replaced_output_list):
+                         line = f"{original_word}\t{formatted_word}\n"
+                         file.write(line)
+-                        print(line, end='') 
+                        # print(line, end='') 
+             try:
+@@ -415,8 +417,8 @@ class Phonifier:
+             non_dict_words = words
+         if len(non_dict_words) > 0:
+-            print(len(non_dict_words))
+-            print(non_dict_words)
+            # print(len(non_dict_words))
+            # print(non_dict_words)
+             # unified parser has to be run for the non dictionary words
+             os.makedirs("tmp", exist_ok=True)
+             timestamp = str(time.time())
+@@ -434,7 +436,7 @@ class Phonifier:
+                     phn_out_dict[non_dict_words[i]] = self.en_g2p(non_dict_words[i])
+                 # Create a string representation of the dictionary
+                 data_str = "\n".join([f"{key}\t{value}" for key, value in phn_out_dict.items()])
+-                print(f"data_str: {data_str}")
+                # print(f"data_str: {data_str}")
+                 with open(out_dict_file, "w") as f:
+                     f.write(data_str)
+             else:
+@@ -454,12 +456,12 @@ class Phonifier:
+                     for original_word, formatted_word in zip(non_dict_words, replaced_output_list):
+                         line = f"{original_word}\t{formatted_word}\n"
+                         file.write(line)
+-                        print(line, end='') 
+                        # print(line, end='') 
+             try:
+                 df = pd.read_csv(out_dict_file, delimiter="\t", header=None, dtype=str)
+                 new_dict = df.dropna().set_index(0).to_dict('dict')[1]
+-                print(new_dict)
+                # print(new_dict)
+                 if language not in self.phone_dictionary:
+                     self.phone_dictionary[language] = new_dict
+                 else:
+@@ -656,7 +658,7 @@ class TextNormalizer:
+                     text = re.sub(str(digit), ' '+num_to_word(digit, self.keydict[language])+' ', text)
+             return self.__post_cleaning(text)
+         else:
+-            print(f"No num-to-char for the given language {language}.")
+            # print(f"No num-to-char for the given language {language}.")
+             return self.__post_cleaning(text)
+     def num2text_list(self, text, language):
+@@ -671,7 +673,7 @@ class TextNormalizer:
+                 output_text.append(line)
+             return self.__post_cleaning_list(output_text)
+         else:
+-            print(f"No num-to-char for the given language {language}.")
+            # print(f"No num-to-char for the given language {language}.")
+             return self.__post_cleaning_list(text)
+     def normalize(self, text, language):
+@@ -758,9 +760,9 @@ class TTSDurAlignPreprocessor:
+     def preprocess(self, text, language, gender):
+         # text = text.strip()
+-        print(text)
+        # print(text)
+         text = self.text_cleaner.clean(text)
+-        print("cleaned text", text)
+        # print("cleaned text", text)
+         # text = self.text_normalizer.insert_space(text)
+         text = self.text_normalizer.num2text(text, language)
+         # print(text)
+@@ -769,9 +771,9 @@ class TTSDurAlignPreprocessor:
+         phrasified_text = TextPhrasifier.phrasify(text)
+         #print("phrased",phrasified_text)
+         phonified_text = self.phonifier.phonify(phrasified_text, language, gender)
+-        print("phonetext",phonified_text)
+        # print("phonetext",phonified_text)
+         phonified_text = self.post_processor.textProcesor(phonified_text)
+-        print(phonified_text)
+        # print(phonified_text)
+         return phonified_text, phrasified_text
+ class TTSDurAlignPreprocessor_VTT:
+@@ -854,9 +856,9 @@ class TTSPreprocessor:
+         text = self.text_normalizer.normalize(text, language)
+         phrasified_text = TextPhrasifier.phrasify(text)
+         phonified_text = self.phonifier.phonify(phrasified_text, language, gender)
+-        print(phonified_text)
+        # print(phonified_text)
+         phonified_text = self.post_processor.textProcesorForEnglish(phonified_text)
+-        print(phonified_text)
+        # print(phonified_text)
+         return phonified_text, phrasified_text
+ class TTSPreprocessor_VTT:
--- a/scripts/perform_onnx_conversion.sh
+++ b/scripts/perform_onnx_conversion.sh
+#!/bin/bash
+wget https://github.com/conda-forge/miniforge/releases/latest/download/Miniforge3-Linux-x86_64.sh
+sh Miniforge3-Linux-x86_64.sh -b -p ${HOME}/conda
+rm Miniforge3-Linux-x86_64.sh
+source "${HOME}/conda/etc/profile.d/conda.sh"
+source "${HOME}/conda/etc/profile.d/mamba.sh"
+conda create -y --name text2phone python=3.10 --no-default-packages
+conda activate text2phone
+git clone --recursive https://github.com/espnet/espnet_onnx.git
+cd espnet_onnx && git apply /patches/espnet_onnx.patch && python setup.py bdist_wheel && cd ..
+pip install -U nbconvert ipykernel onnx torch==2.0.1 torchaudio indic-num2words espnet_model_zoo espnet_onnx/dist/espnet_onnx-0.2.0-py3-none-any.whl espnet
+mkdir -p /home/builder/nltk_data/corpora && wget --directory-prefix=/home/builder/nltk_data/corpora "https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/cmudict.zip" && unzip /home/builder/nltk_data/corpora/cmudict.zip -d /home/builder/nltk_data/corpora
+mkdir -p /home/builder/nltk_data/taggers && wget --directory-prefix=/home/builder/nltk_data/taggers https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/taggers/averaged_perceptron_tagger.zip && unzip /home/builder/nltk_data/taggers/averaged_perceptron_tagger.zip -d /home/builder/nltk_data/taggers
+rm -rf espnet_onnx
+mkdir -p ~/.cache/espnet_onnx && jupyter nbconvert --inplace --to notebook --execute /notebooks/create_onnx.ipynb
\ No newline at end of file
--- a/triton_models/tts/1/model.py
+++ b/triton_models/tts/1/model.py
+import os
+os.environ["NLTK_DATA"] = "."
+os.chdir(os.path.dirname(__file__))
+import json
+import numpy
+import onnxruntime
+from sys import path
+MAX_WAV_VALUE = 32768.0
+SAMPLING_RATE = 22050
+from random import choice
+from itertools import product
+from espnet_onnx import Text2Speech
+import triton_python_backend_utils as pb_utils
+from text_preprocess_for_inference import (
+    TTSDurAlignPreprocessor,
+    CharTextPreprocessor,
+    TTSPreprocessor,
+)
+LANGMAP = {
+    "as": ("assamese", "aryan"),
+    "bn": ("bengali", "aryan"),
+    "brx": ("bodo", "aryan"),
+    "en": ("english", "aryan"),
+    "gu": ("gujarati", "aryan"),
+    "hi": ("hindi", "aryan"),
+    "kn": ("kannada", "dravidian"),
+    "ml": ("malayalam", "dravidian"),
+    "mni": ("manipuri", "aryan"),
+    "mr": ("marathi", "aryan"),
+    "or": ("odia", "aryan"),
+    "pa": ("punjabi", "aryan"),
+    "rj": ("rajasthani", "aryan"),
+    "ta": ("tamil", "dravidian"),
+    "te": ("telugu", "dravidian"),
+    "ur": ("urdu", "aryan"),
+}
+class TritonPythonModel:
+    def initialize(self, args):
+        self.device_id = int(json.loads(args["model_instance_device_id"]))
+        self.target_dtype = pb_utils.triton_string_to_numpy(
+            pb_utils.get_output_config_by_name(
+                json.loads(args["model_config"]), "OUTPUT_GENERATED_AUDIO"
+            )["data_type"]
+        )
+        self.tts_preprocessor = TTSPreprocessor()
+        self.char_text_preprocessor = CharTextPreprocessor()
+        self.tts_dur_align_preprocessor = TTSDurAlignPreprocessor()
+        self.preprocessors = {}
+        for lang, _ in LANGMAP.values():
+            if lang == "urdu" or lang == "punjabi":
+                self.preprocessors[lang] = self.char_text_preprocessor
+            elif lang == "english":
+                self.preprocessors[lang] = self.tts_preprocessor
+            else:
+                self.preprocessors[lang] = self.tts_dur_align_preprocessor
+        self.models = {}
+        for (language, _), gender in product(LANGMAP.values(), ("male",)):
+            try:
+                self.models[(language, gender)] = self.load_fastspeech2_model(
+                    language,
+                    gender,
+                    f"cuda",
+                )
+            except:
+                pass
+        self.vocoders = {
+            (gender, family): self.load_vocoder(gender, family, "cuda")
+            for gender, family in product(("male",), ("aryan", "dravidian"))
+        }
+    def load_vocoder(self, gender, family, device):
+        return onnxruntime.InferenceSession(
+            f"vocoders/{gender}-{family}-vocoder.onnx",
+            providers=[
+                "CPUExecutionProvider"
+                if device == "cpu"
+                else ("CUDAExecutionProvider", {"device_id": self.device_id})
+            ],
+        )
+    def load_fastspeech2_model(self, language, gender, device):
+        model = Text2Speech(
+            providers=[
+                "CPUExecutionProvider"
+                if device == "cpu"
+                else ("CUDAExecutionProvider", {"device_id": self.device_id})
+            ],
+            model_dir=f"text2phone/{language}-{gender}-ort",
+            use_quantized=True,
+        )
+        return model
+    def determine_gender(self, name):
+        if name.lower() in ("m", "male"):
+            return "male"
+        elif name.lower() in ("f", "fem", "female"):
+            return "female"
+        else:
+            return choice(["male", "female"])
+    def synthesize_audio(self, text, lang_id, speaker_id):
+        (language, family), gender = LANGMAP[
+            lang_id[0].decode("utf-8")
+        ], self.determine_gender(speaker_id[0].decode("utf-8"))
+        preprocessor = self.preprocessors[language]
+        preprocessed_text = " ".join(
+            preprocessor.preprocess(text[0].decode("utf=8"), language, gender)[0]
+        )
+        model, vocoder = (
+            self.models[(language, gender)],
+            self.vocoders[(gender, family)],
+        )
+        x = (
+            numpy.expand_dims(
+                model.postprocess(
+                    model.tts_model(
+                        model.preprocess.token_id_converter.tokens2ids(
+                            model.preprocess.tokenizer.text2tokens(preprocessed_text)
+                        )
+                    )["feat_gen"]
+                ).T,
+                axis=0,
+            )
+            * 2.3262
+        )
+        y_g_hat = vocoder.run(None, {"input": x})[0]
+        audio = y_g_hat.squeeze() * MAX_WAV_VALUE
+        return audio.astype("int16")
+    def execute(self, requests):
+        return [
+            pb_utils.InferenceResponse(
+                output_tensors=[
+                    pb_utils.Tensor(
+                        "OUTPUT_GENERATED_AUDIO",
+                        numpy.array(
+                            [[processed_sent] for processed_sent in processed_sents],
+                            dtype=self.target_dtype,
+                        ),
+                    )
+                ]
+            )
+            for processed_sents in (
+                (
+                    self.synthesize_audio(
+                        input_text, input_language_id, input_speaker_id
+                    ).tobytes()
+                    for input_text, input_speaker_id, input_language_id in zip(
+                        input_texts.as_numpy(),
+                        input_speaker_ids.as_numpy(),
+                        input_language_ids.as_numpy(),
+                    )
+                )
+                for input_texts, input_speaker_ids, input_language_ids in (
+                    (
+                        pb_utils.get_input_tensor_by_name(request, "INPUT_TEXT"),
+                        pb_utils.get_input_tensor_by_name(request, "INPUT_SPEAKER_ID"),
+                        pb_utils.get_input_tensor_by_name(request, "INPUT_LANGUAGE_ID"),
+                    )
+                    for request in requests
+                )
+            )
+        ]
+    def finalize(self):
+        pass
--- a/triton_models/tts/config.pbtxt
+++ b/triton_models/tts/config.pbtxt
+name: "tts"
+backend: "python"
+max_batch_size: 64
+input [
+  {
+    name: "INPUT_TEXT"
+    data_type: TYPE_STRING
+    dims: [ 1 ]
+  }
+]
+input [
+  {
+    name: "INPUT_SPEAKER_ID"
+    data_type: TYPE_STRING
+    dims: [ 1 ]
+  }
+]
+input [
+  {
+    name: "INPUT_LANGUAGE_ID"
+    data_type: TYPE_STRING
+    dims: [ 1 ]
+  }
+]
+output [
+  {
+    name: "OUTPUT_GENERATED_AUDIO"
+    data_type: TYPE_STRING
+    dims: [ 1 ]
+  }
+]
+dynamic_batching {}
+instance_group [
+  {
+    count: 1
+    kind: KIND_GPU
+  }
+]
+parameters: {
+  key: "EXECUTION_ENV_PATH",
+  value: {string_value: "$$TRITON_MODEL_DIRECTORY/tts.tar.gz"}
+}
\ No newline at end of file
--- a/triton_models/tts/envbuilder.sh
+++ b/triton_models/tts/envbuilder.sh
+#!/bin/bash
+wget https://github.com/conda-forge/miniforge/releases/latest/download/Miniforge3-Linux-x86_64.sh
+sh Miniforge3-Linux-x86_64.sh -b -p ${HOME}/conda
+rm Miniforge3-Linux-x86_64.sh
+source "${HOME}/conda/etc/profile.d/conda.sh"
+source "${HOME}/conda/etc/profile.d/mamba.sh"
+conda install -y conda-pack
+export PYTHONNOUSERSITE=True
+conda create -y --name tts python=3.11 --no-default-packages
+conda activate tts
+mamba install -c "nvidia/label/cuda-11.8.0" libcublas libcufft cuda-cudart -y
+git clone --recursive https://github.com/espnet/espnet_onnx.git
+cd espnet_onnx && git apply /patches/espnet_onnx.patch && python setup.py bdist_wheel && cd ..
+pip install -U numpy pandas nltk indic-num2words g2p_en "espnet_onnx/dist/espnet_onnx-0.2.0-py3-none-any.whl"
+conda deactivate
+conda pack -n tts
+conda activate tts
+mkdir -p /model_repo/tts/1/corpora && wget --directory-prefix=/model_repo/tts/1/corpora "https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/cmudict.zip" && unzip /model_repo/tts/1/corpora/cmudict.zip -d /model_repo/tts/1/corpora
+mkdir -p /model_repo/tts/1/taggers && wget --directory-prefix=/model_repo/tts/1/taggers https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/taggers/averaged_perceptron_tagger.zip && unzip /model_repo/tts/1/taggers/averaged_perceptron_tagger.zip -d /model_repo/tts/1/taggers
+git clone https://github.com/triton-inference-server/python_backend -b r23.10
+cd python_backend && mkdir build && cd build
+cmake -DTRITON_ENABLE_GPU=ON -DTRITON_BACKEND_REPO_TAG=r23.10 -DTRITON_COMMON_REPO_TAG=r23.10 -DTRITON_CORE_REPO_TAG=r23.10 -DCMAKE_INSTALL_PREFIX:PATH=`pwd`/install ..
+make -j$(nproc) triton-python-backend-stub && cd ../..
+mv tts.tar.gz python_backend/build/triton_python_backend_stub /model_repo/tts
\ No newline at end of file