verb-reorder.py 5.15 KB
Newer Older
priyank's avatar
priyank committed
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158
'''
Created on Sep 28, 2013

@author: priyank
'''
import sys
import os
from MyParser import MyParser

parser=MyParser()
parser.add_argument('-i', '--input', help='Input file (with path)', required=True)
parser.add_argument('-b', '--bag', help='verb bag file (with path)', required=True)
args = parser.parse_args()

inputfile = args.input
bagfile = args.bag

if not os.path.isfile(inputfile):
    print  " Input file", inputfile ,"does not exist."
    sys.exit(0);
    

if not os.path.isfile(bagfile):
    print  "verb Bag file", bagfile ,"does not exist."
    sys.exit(0);
    
#reading input file 
f = open(inputfile)
inputdata = f.readlines()
f.close()


f = open(bagfile)
bagdata = f.readlines()
bagdata = [x.strip() for x in bagdata]
f.close()

"""function to search the feature with root in bag file data"""
def searchinbag(ftr, bagdata, root):
    flg = False
    for i in range(len(bagdata)):
        ftr_str = ftr.strip()[1:-1].split(' ')[1].split('=')[1]
        ftr_data = ftr_str.split(',')
        if bagdata[i].split(':')[0].strip() == root:
            bg_ftr = bagdata[i].split(':')[1].strip().split(',')
            #print ftr_data[0][1:], bg_ftr[0]
            if bg_ftr[0] == ftr_data[0][1:] and bg_ftr[1] == ftr_data[1] and bg_ftr[2] == ftr_data[2] and bg_ftr[3] == ftr_data[3] and bg_ftr[4] == ftr_data[4] and bg_ftr[5] == ftr_data[6]:
                flg = True
                return flg
    return flg


def searchinBagWithStructure(oldftr, bagdata, root):
    flg = False
    newftr = ''
    for i in range(len(bagdata)):
        baglinedata = bagdata[i].strip().split('\t')
        
        if baglinedata[1] == root:
            bagftr = baglinedata[2]
            if oldftr.strip() == bagftr:
                newftr = baglinedata[3].strip()
                flg = True
                return flg, newftr
        
    return flg, newftr
            
"""        
        ftr_str = ftr.strip()[1:-1].split(' ')[1].split('=')[1]
        ftr_data = ftr_str.split(',')
        if bagdata[i].split(':')[0].strip() == root:
            bg_ftr = bagdata[i].split(':')[1].strip().split(',')
            #print ftr_data[0][1:], bg_ftr[0]
            if bg_ftr[0] == ftr_data[0][1:] and bg_ftr[1] == ftr_data[1] and bg_ftr[2] == ftr_data[2] and bg_ftr[3] == ftr_data[3] and bg_ftr[4] == ftr_data[4] and bg_ftr[5] == ftr_data[6]:
                flg = True
                return flg
    return flg"""


for line in inputdata:
    
    if len(line.strip().split('\t')) == 4:
        
        features = line.strip().split('\t')[3].split('|')
        if len(features) == 1:
            print line.rstrip()
            
        elif len(features) == 2:
            flag_for_noOfFeatures = True
            if features[0][1:-1].split(' ')[1].split('=')[1].split(',')[1] == 'v':
                f1 = features[0][1:-1].split(' ')[1].split('=')[1].split(',')
                f2 = features[1][1:-1].split(' ')[1].split('=')[1].split(',')
                if f1[1] == 'v' and f1[6] == '0' and f1[2] == 'any' and f1[3] == 'any' and f1[4] == 'any':
                    if f2[1] == 'v' and f2[2] == 'm' or f2[3] == 'sg' and f2[4] == 'any' and f2[6] == 'yA':
                        line = line.split('\t')
                        features = reversed(features)
                        features = '|'.join(features)
                        print line[0]+'\t'+line[1]+'\t'+line[2]+'\t'+features 
                        flag_for_noOfFeatures = False
                    else:
                        
                        flag_for_noOfFeatures = True
                else:
                    
                    flag_for_noOfFeatures = True
            
            else:
                flag_for_noOfFeatures = True
                
            if flag_for_noOfFeatures:
                
                flg = False
                flaag = False
                truefeature = ''
                ii = 0
                root = line.split('\t')[1].strip()
                newftr = ''
                if line.split('\t')[2].strip() == 'VM' or line.split('\t')[2].strip() == 'VAUX':
                    flg, newftr = searchinBagWithStructure(line.strip().split('\t')[3], bagdata, root)

                if flg:
                    line = line.split('\t')
                    line[3] = newftr
                    newline = '\t'.join(line)
                    print newline.rstrip()
                
                else:
                    print line.rstrip()           
     
            
        elif len(features) == 0:
            print line.rstrip()
            
        elif len(features) > 2:
            
            flg = False
            flaag = False
            truefeature = ''
            ii = 0
            root = line.split('\t')[1].strip()
            newftr = ''
            if line.split('\t')[2].strip() == 'VM' or line.split('\t')[2].strip() == 'VAUX':
                flg, newftr = searchinBagWithStructure(line.strip().split('\t')[3], bagdata, root)

            if flg:
                line = line.split('\t')
                line[3] = newftr
                newline = '\t'.join(line)
                print newline.rstrip()
                
            else:
                print line.rstrip()           

    else:
        print line.rstrip()