fastspeech2.patch 6.8 KB
Newer Older
Nikhilesh Bhatnagar's avatar
Nikhilesh Bhatnagar committed
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155
diff --git a/text_preprocess_for_inference.py b/text_preprocess_for_inference.py
index ccca511..2191ebb 100644
--- a/text_preprocess_for_inference.py
+++ b/text_preprocess_for_inference.py
@@ -3,6 +3,8 @@ TTS Preprocessing
 Developed by Arun Kumar A(CS20S013) - November 2022
 Code Changes by Utkarsh - 2023
 '''
+import locale
+locale.setlocale(locale.LC_ALL, 'C.UTF-8')
 import os
 import re
 import json
@@ -40,14 +42,14 @@ def add_to_dictionary(dict_to_add, dict_file):
             df_temp = pd.read_csv(temp_dict_file, delimiter=" ", header=None, dtype=str)
             if len(df_temp) > len(df_orig):
                 os.rename(temp_dict_file, dict_file)
-                print(f"{len(dict_to_add)} new words appended to Dictionary: {dict_file}")
+                # print(f"{len(dict_to_add)} new words appended to Dictionary: {dict_file}")
         except:
             print(traceback.format_exc())
     else:
         # create a new dictionary
         with open(dict_file, "a") as f:
             f.write(append_string)
-        print(f"New Dictionary: {dict_file} created with {len(dict_to_add)} words")
+        # print(f"New Dictionary: {dict_file} created with {len(dict_to_add)} words")
 
 
 class TextCleaner:
@@ -104,7 +106,7 @@ class Phonifier:
             except Exception as e:
                 print(traceback.format_exc())
 
-        print("Phone dictionary loaded for the following languages:", list(self.phone_dictionary.keys()))
+        # print("Phone dictionary loaded for the following languages:", list(self.phone_dictionary.keys()))
 
         self.g2p = G2p()
         print('Loading G2P model... Done!')
@@ -315,7 +317,7 @@ class Phonifier:
                     #print('INSIDE IF CONDITION OF ADDING WORDS')
         else:
             non_dict_words = words
-        print(f"word not in dict: {non_dict_words}")
+        # print(f"word not in dict: {non_dict_words}")
 
         if len(non_dict_words) > 0:
             # unified parser has to be run for the non dictionary words
@@ -335,7 +337,7 @@ class Phonifier:
                     phn_out_dict[non_dict_words[i]] = self.en_g2p(non_dict_words[i])
                 # Create a string representation of the dictionary
                 data_str = "\n".join([f"{key}\t{value}" for key, value in phn_out_dict.items()])
-                print(f"data_str: {data_str}")
+                # print(f"data_str: {data_str}")
                 with open(out_dict_file, "w") as f:
                     f.write(data_str)
             else:
@@ -358,7 +360,7 @@ class Phonifier:
                     for original_word, formatted_word in zip(non_dict_words, replaced_output_list):
                         line = f"{original_word}\t{formatted_word}\n"
                         file.write(line)
-                        print(line, end='') 
+                        # print(line, end='') 
                   
 
             try:
@@ -415,8 +417,8 @@ class Phonifier:
             non_dict_words = words
 
         if len(non_dict_words) > 0:
-            print(len(non_dict_words))
-            print(non_dict_words)
+            # print(len(non_dict_words))
+            # print(non_dict_words)
             # unified parser has to be run for the non dictionary words
             os.makedirs("tmp", exist_ok=True)
             timestamp = str(time.time())
@@ -434,7 +436,7 @@ class Phonifier:
                     phn_out_dict[non_dict_words[i]] = self.en_g2p(non_dict_words[i])
                 # Create a string representation of the dictionary
                 data_str = "\n".join([f"{key}\t{value}" for key, value in phn_out_dict.items()])
-                print(f"data_str: {data_str}")
+                # print(f"data_str: {data_str}")
                 with open(out_dict_file, "w") as f:
                     f.write(data_str)
             else:
@@ -454,12 +456,12 @@ class Phonifier:
                     for original_word, formatted_word in zip(non_dict_words, replaced_output_list):
                         line = f"{original_word}\t{formatted_word}\n"
                         file.write(line)
-                        print(line, end='') 
+                        # print(line, end='') 
         
             try:
                 df = pd.read_csv(out_dict_file, delimiter="\t", header=None, dtype=str)
                 new_dict = df.dropna().set_index(0).to_dict('dict')[1]
-                print(new_dict)
+                # print(new_dict)
                 if language not in self.phone_dictionary:
                     self.phone_dictionary[language] = new_dict
                 else:
@@ -656,7 +658,7 @@ class TextNormalizer:
                     text = re.sub(str(digit), ' '+num_to_word(digit, self.keydict[language])+' ', text)
             return self.__post_cleaning(text)
         else:
-            print(f"No num-to-char for the given language {language}.")
+            # print(f"No num-to-char for the given language {language}.")
             return self.__post_cleaning(text)
 
     def num2text_list(self, text, language):
@@ -671,7 +673,7 @@ class TextNormalizer:
                 output_text.append(line)
             return self.__post_cleaning_list(output_text)
         else:
-            print(f"No num-to-char for the given language {language}.")
+            # print(f"No num-to-char for the given language {language}.")
             return self.__post_cleaning_list(text)
 
     def normalize(self, text, language):
@@ -758,9 +760,9 @@ class TTSDurAlignPreprocessor:
 
     def preprocess(self, text, language, gender):
         # text = text.strip()
-        print(text)
+        # print(text)
         text = self.text_cleaner.clean(text)
-        print("cleaned text", text)
+        # print("cleaned text", text)
         # text = self.text_normalizer.insert_space(text)
         text = self.text_normalizer.num2text(text, language)
         # print(text)
@@ -769,9 +771,9 @@ class TTSDurAlignPreprocessor:
         phrasified_text = TextPhrasifier.phrasify(text)
         #print("phrased",phrasified_text)
         phonified_text = self.phonifier.phonify(phrasified_text, language, gender)
-        print("phonetext",phonified_text)
+        # print("phonetext",phonified_text)
         phonified_text = self.post_processor.textProcesor(phonified_text)
-        print(phonified_text)
+        # print(phonified_text)
         return phonified_text, phrasified_text
 
 class TTSDurAlignPreprocessor_VTT:
@@ -854,9 +856,9 @@ class TTSPreprocessor:
         text = self.text_normalizer.normalize(text, language)
         phrasified_text = TextPhrasifier.phrasify(text)
         phonified_text = self.phonifier.phonify(phrasified_text, language, gender)
-        print(phonified_text)
+        # print(phonified_text)
         phonified_text = self.post_processor.textProcesorForEnglish(phonified_text)
-        print(phonified_text)
+        # print(phonified_text)
         return phonified_text, phrasified_text
 
 class TTSPreprocessor_VTT: