importos,sys,codecs#!/usr/bin/env python# -*- coding: utf-8 -*-'''Created by@author: priyank'''deftokenizer(text,ind):"""Tokenize the text only on space."""tokens=text.split()tokens_ssf=[str(index+1)+'\t'+token+'\tunk'forindex,tokeninenumerate(tokens)]tokens_ssf_with_sentence=['<Sentence id="'+str(ind+1)+'">']+tokens_ssf+['</Sentence>']return'\n'.join(tokens_ssf_with_sentence)f=codecs.open(sys.argv[1],"rb","utf-8")lines=f.readlines()f.close()finalOutput=""ii=0forlineinlines:line=line.strip()ifline:finalOutput=finalOutput+tokenizer(line,(ii))+"\n"ii=ii+1print(finalOutput.encode('utf-8'))