prepareinput.py 545 Bytes
Newer Older
priyank's avatar
priyank committed
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22
from glob import glob
import os, codecs

files = [y for x in os.walk("tests/") for y in glob(os.path.join(x[0], '*.in'))]

largeinput = ""
for fl in files:
    f = codecs.open(fl, "rb", "utf-8")
    lines = f.readlines()
    f.close()
    for line in lines:
	line = line.strip()
	if line:
	    linearray = line.split("\t")
	    if len(linearray) >= 2:
		largeinput = largeinput + line + "\n"

largeinput = '<Sentence id="1">\n' + largeinput + '</Sentence>'

f = codecs.open("largeinput.txt", "wb", "utf-8")
f.write(largeinput.strip())
f.close()