while($line=<>) { chomp($line); my $word1,$word; $line=~/([0-9]*)\t(.*)/; $num=$1; $word=$2; #if($word=~/[^s]OY/) { # $word=~s/OY/A/g; # } if($word=~/Zaè/) { $word=~s/Zaè//g; } #if($word=~/tsa$/) { # $word=~s/tsa$/tsu/g; # } #if($word=~/sa/) { # $word=~s/sa/s/g; # } #if($word=~/uz/) { # $word=~s/uz/u/g; # } if($word=~/UM/) { $word=~s/UM/Uz/g; } #if($word1 ne "") { #print "1\t$word1\n"; #} if($word=~/ki \. mI \./) { $word=~s/ki \. mI \./kimI/g; } if($word=~/ki \.mI \./) { $word=~s/ki \.mI \./kimI/g; } if($word=~/^[0-9]/) { $word=~s/,//g; } if($word=~/dZ/) { $word=~s/dZ/d/g; } if($word=~/([0-9]+)veM/) { $word=~/([0-9]+)veM/g; $word=$1."\tunk"; $word1="veM"; } if($word=~/([0-9]+)vIM/) { $word=~/([0-9]+)vIM/g; $word=$1."\tunk"; $word1="vIM"; } if($word=~/([0-9]+)vAM/) { $word=~/([0-9]+)vAM/g; $word=$1."\tunk"; $word1="vAM"; } if($word=~/([0-9]+)vAz/) { $word=~/([0-9]+)vAz/g; $word=$1."\tunk"; $word1="vAz"; } if($word=~/([0-9]+)r[IAe]/) { $word=~/([0-9]+)r([AIe])\t/g; $word1="r".$2; $word=$1."\tunk"; } if($word=~/([0-9]+)W[IAe]/) { $word=~/([0-9]+)W([AIe])\t/g; $word1="W".$2; $word=$1."\tunk"; } if($word=~/([0-9]+)T[IAe]/) { $word=~/([0-9]+)T([AIe])\t/g; $word1="T".$2; $word=$1."\tunk"; } if($word=~/([0-9]+)v[IA]/) { $word=~/([0-9]+)v([AI])\t/g; $word1="v".$2; $word=$1."\tunk"; } if($word=~/([A-z]+ne)vAl[AIe]/) { my $sec; $word=~/([A-z]+ne)vAl([AIe])/g; $sec=$2; $word=$1."\tunk"; $word1="vAl".$sec; } elsif($word=~/([A-z]+)vAl[AIe]/) { my $sec; $word=~/([A-z]+)vAl([AIe])/g; $sec=$2; $word=$1."\tunk"; $word1="vAl".$sec; } #ZZZZZ #ZZZZZif($word=~/([A-z]+ne)vAle/) { #ZZZZZ # $word=~s/([A-z]+ne)vAle/g; #ZZZZZ $word=~/([A-z]+ne)vAle/g; #ZZZZZ $word=$1."\tunk"; #ZZZZZ $word1="vAle"; #ZZZZZ } #ZZZZZif($word=~/([A-z]+ne)vAlI/) { #ZZZZZ # $word=~s/([A-z]+ne)vAlI/$1 vAlI/g; #ZZZZZ $word=~/([A-z]+ne)vAlI/g; #ZZZZZ $word=$1."\tunk"; #ZZZZZ $word1="vAlI"; #ZZZZZ } #ZZZZZ if($word=~/([A-z]+ne)vAloM/) { $word=~s/([A-z]+ne)vAloM/$1 vAloM/g; $word=~/([A-z]+ne)vAloM/g; $word=$1."\tunk"; $word1="vAloM"; } print "1\t$word\n"; if($word1 ne ""){ print "1\t$word1\tunk\n"; } }