#!/usr/bin/env python
# -*- coding: utf-8 -*-
Created by

@author: priyank

import json
import requests
from SocketServer import ThreadingMixIn
import threading
import codecs
import re
import cgi
from BaseHTTPServer import HTTPServer, BaseHTTPRequestHandler
from optparse import OptionParser
from urlparse import urlparse, parse_qs
import os
import sys
from argparse import ArgumentParser

#configuring commandline parser and check if the all command line parameters are valid
parser.add_argument('-c', '--serverConfigFile', help='server configuration file (with path)', required=True)
parser.add_argument('-i', '--inputFile', help='inputFile (with path)', required=True)
args = parser.parse_args()

#getting command line config files and check if files exist
serverFile = args.serverConfigFile
inputFile = args.inputFile

#function to get sentences from SSF
def sentenceCollector(inputString):
    if "Sentence><Sentence" in inputString:
	inputString = inputString.replace('Sentence><Sentence', 'Sentence>\n<Sentence')
    inArray = inputString.strip().split("\n")
    sentList = []
    tempString = ""
    for line in inArray:
	line = line.rstrip()
	if line:
	    if line.startswith('<Sentence '):
		tempString = tempString + line + "\n"
	    elif line.startswith('</Sentence'):
		tempString = tempString + line + "\n"
		tempString = ""
		tempString = tempString + line + "\n"
    return sentList

# Function to get output of lats module(wordgenerator)
def wordgenCollector(inputString):
    inArray = inputString.strip().split("\n")
    #sentList = []
    tempString = ""
    for line in inArray:
	line = line.rstrip()
	linearray = line.split("\t")
	if line and len(linearray) >=2:
	    if line.startswith('<Sentence '):
	    elif line.startswith('</Sentence'):
	    elif linearray[1] == '((' or linearray[1] == '))':
		tempString = tempString + linearray[1] + " "
    return tempString

if not os.path.isfile(serverFile):
    print  " serverFile file", serverFile ,"does not exist."
if not os.path.isfile(inputFile):
    print  " inputFile file", inputFile ,"does not exist."
server_details = {}
#getting server details
with open(serverFile) as server_file:
    server_details = json.load(server_file)

translationURL = server_details['pan']

f =, "rb", "utf-8")
lines = f.readlines()

tokenizerURLArray = translationURL.split("/")
tokenizerURLArray[-2] = '1'
modulesURL = tokenizerURLArray[0] + "/" + tokenizerURLArray[1] + "/" + tokenizerURLArray[2] + "/" + tokenizerURLArray[5] + "/" + tokenizerURLArray[6] + "/modules"
tokenizerURL = "/".join(tokenizerURLArray)

tokenizerURLArray = translationURL.split("/")
tokenizerURLArray[-3] = '2'
translationURL = "/".join(tokenizerURLArray)

myheaders = {"Content-type": "application/x-www-form-urlencoded; charset=UTF-8"}
proxies = {
        	"http" :None,

res =, proxies=proxies, headers=myheaders)

lastModule = ''
secondLastModule = ''
# getting last modules
if res is not None:
    modulesList = json.loads(res.text)
    lastModule = modulesList[-1]
    secondLastModule = modulesList[-2]
    print "Null response from server"

response_data = {}
response_data['language'] = 'pan'
response_data['text'] = lines
output= ""
wxoutput = ""

# processing sentence in each line by calling MT
# Processing paras: one line is considered as a para
iii = 0
intermediatearray = []
mystr = ""
for line in lines:
    line = line.strip()
    if line :
		# calling tokenizer on line
		dataToSend = {"data":line.strip().encode('utf-8')}
		res =, proxies=proxies, headers=myheaders, data=dataToSend)
		tokenOut = json.loads(res.text)
		sentences = sentenceCollector(tokenOut['tokenizer-1'])
		jjj = 0
		tempdict = {}
		mystr = mystr + "paraid:" + str((iii + 1)) + "\n" + line + "\n"
		for sentence in sentences:
			dataToSend = {"data":sentence.strip().encode('utf-8').strip()}
			res =, proxies=proxies, headers=myheaders, data=dataToSend)
			completeOut = json.loads(res.text)
			lastmoduleOutput = completeOut[lastModule+"-"+str((modulesList.index(lastModule))+1)]
			secondlastmoduleOutput = completeOut[secondLastModule+"-"+str((modulesList.index(secondLastModule))+1)]
			finalOutput = lastmoduleOutput
			output = output + finalOutput + " \n\n"
			wxoutput = wxoutput + secondlastmoduleOutput + " \n\n"

			mystr = mystr + "sentid:" + str((jjj + 1)) + "\n" + line + "\n"
			mystr = mystr + lastmoduleOutput + "\n"
			jjj = jjj + 1
	    	iii = iii + 1
	   	output = output + " \n\n"
	    	wxoutput = wxoutput + " \n\n"
		mystr = mystr + "---------------------------------------------------------\n"


print mystr