get_translation_iiith_telugu_hindi.py 994 Bytes
Newer Older
Vandan Mujadia's avatar
Vandan Mujadia committed
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25
import sys
import codecs
import requests
import json
from tqdm import tqdm
import glob

url = "http://ssmt.iiit.ac.in/onemt"
headers = {
    'Content-Type': 'application/json',
    'Accept': 'application/json'
}

def call_mt(text, lang='eng', Tlang='hin'):
    data = {'text': text, 'lang':lang, 'Tlang':Tlang, "mode": "BaseLine - V5"}
    r = requests.post(url, headers=headers, json=data)
    output = json.loads(r.text)['data']
    return output

source_file = sys.argv[1]
output_file = sys.argv[2]


def read_file(path, isref=False):
    _data = []
Vandan Mujadia's avatar
update  
Vandan Mujadia committed
26
    for line in codecs.open(path, encoding="utf8"):
Vandan Mujadia's avatar
Vandan Mujadia committed
27 28 29 30 31 32 33 34 35 36 37 38
        line = line.strip()
        line = line.replace('_','').replace('`','').replace('"','').replace("'",'')
        _data.append(line)
    return _data

_data_source = read_file(source_file)
_data_sys = [call_mt(text, 'tel', 'hin').replace('\n',' ') for text in tqdm(_data_source)]
_f = open(output_file, 'w')
for line in _data_sys:
    _f.write(line+'\n')
    print (line)
_f.close()