''' Created on Sep 28, 2013 @author: priyank ''' import sys import os from MyParser import MyParser parser=MyParser() parser.add_argument('-i', '--input', help='Input file (with path)', required=True) parser.add_argument('-b', '--bag', help='verb bag file (with path)', required=True) args = parser.parse_args() inputfile = args.input bagfile = args.bag if not os.path.isfile(inputfile): print " Input file", inputfile ,"does not exist." sys.exit(0); if not os.path.isfile(bagfile): print "verb Bag file", bagfile ,"does not exist." sys.exit(0); #reading input file f = open(inputfile) inputdata = f.readlines() f.close() f = open(bagfile) bagdata = f.readlines() bagdata = [x.strip() for x in bagdata] f.close() """function to search the feature with root in bag file data""" def searchinbag(ftr, bagdata, root): flg = False for i in range(len(bagdata)): ftr_str = ftr.strip()[1:-1].split(' ')[1].split('=')[1] ftr_data = ftr_str.split(',') if bagdata[i].split(':')[0].strip() == root: bg_ftr = bagdata[i].split(':')[1].strip().split(',') #print ftr_data[0][1:], bg_ftr[0] if bg_ftr[0] == ftr_data[0][1:] and bg_ftr[1] == ftr_data[1] and bg_ftr[2] == ftr_data[2] and bg_ftr[3] == ftr_data[3] and bg_ftr[4] == ftr_data[4] and bg_ftr[5] == ftr_data[6]: flg = True return flg return flg def searchinBagWithStructure(oldftr, bagdata, root): flg = False newftr = '' for i in range(len(bagdata)): baglinedata = bagdata[i].strip().split('\t') if baglinedata[1] == root: bagftr = baglinedata[2] if oldftr.strip() == bagftr: newftr = baglinedata[3].strip() flg = True return flg, newftr return flg, newftr """ ftr_str = ftr.strip()[1:-1].split(' ')[1].split('=')[1] ftr_data = ftr_str.split(',') if bagdata[i].split(':')[0].strip() == root: bg_ftr = bagdata[i].split(':')[1].strip().split(',') #print ftr_data[0][1:], bg_ftr[0] if bg_ftr[0] == ftr_data[0][1:] and bg_ftr[1] == ftr_data[1] and bg_ftr[2] == ftr_data[2] and bg_ftr[3] == ftr_data[3] and bg_ftr[4] == ftr_data[4] and bg_ftr[5] == ftr_data[6]: flg = True return flg return flg""" for line in inputdata: if len(line.strip().split('\t')) == 4: features = line.strip().split('\t')[3].split('|') if len(features) == 1: print line.rstrip() elif len(features) == 2: flag_for_noOfFeatures = True if features[0][1:-1].split(' ')[1].split('=')[1].split(',')[1] == 'v': f1 = features[0][1:-1].split(' ')[1].split('=')[1].split(',') f2 = features[1][1:-1].split(' ')[1].split('=')[1].split(',') if f1[1] == 'v' and f1[6] == '0' and f1[2] == 'any' and f1[3] == 'any' and f1[4] == 'any': if f2[1] == 'v' and f2[2] == 'm' or f2[3] == 'sg' and f2[4] == 'any' and f2[6] == 'yA': line = line.split('\t') features = reversed(features) features = '|'.join(features) print line[0]+'\t'+line[1]+'\t'+line[2]+'\t'+features flag_for_noOfFeatures = False else: flag_for_noOfFeatures = True else: flag_for_noOfFeatures = True else: flag_for_noOfFeatures = True if flag_for_noOfFeatures: flg = False flaag = False truefeature = '' ii = 0 root = line.split('\t')[1].strip() newftr = '' if line.split('\t')[2].strip() == 'VM' or line.split('\t')[2].strip() == 'VAUX': flg, newftr = searchinBagWithStructure(line.strip().split('\t')[3], bagdata, root) if flg: line = line.split('\t') line[3] = newftr newline = '\t'.join(line) print newline.rstrip() else: print line.rstrip() elif len(features) == 0: print line.rstrip() elif len(features) > 2: flg = False flaag = False truefeature = '' ii = 0 root = line.split('\t')[1].strip() newftr = '' if line.split('\t')[2].strip() == 'VM' or line.split('\t')[2].strip() == 'VAUX': flg, newftr = searchinBagWithStructure(line.strip().split('\t')[3], bagdata, root) if flg: line = line.split('\t') line[3] = newftr newline = '\t'.join(line) print newline.rstrip() else: print line.rstrip() else: print line.rstrip()