## Deals with spelling variation & simple SANDHI for TELUGU --TEL-TAM MT## #Oct, 2008, PARAMESHKRISHNA #Then adopted for Telugu-Hindi by gurao 1May2012 while($line=<>) { chomp($line); my $word1,$word2, $word; ##my $word1, $word; $line=~s/\x{2018}/'/g; # <2018> ‘ is Replaced by single quote "'" $line=~s/\x{2019}/'/g; # <2019> ’ is Replaced by single quote "'" $line=~s/\x{200D}//g; # <200D> is Removed $line=~s/\x{200C}//g; # <200C> is Removed $line=~s/\x{200c}//g; # <200C> is Removed $line=~s/\x{202c}//g; # <200C> is Removed $line=~s/\x{200B}//g; # <200B> is Removed $line=~s/\x{feff}//g; # is Removed $line=~s/\x{0D}//g; # ^M is Removed $line=~s/á//g; # $line=~/([0-9]*)\t(.*)/; $num=$1; $word=$2; if($word=~/([A-z])([vy]+)AgixeV\t/) { ##marcipowAremo modified to include vaswA([dmnvr])emo gurao30052012 $word=~/(.*)([vy]+)AgixeV(.*)/; $word=$1."\tunk"; $word1="AgixeV".$3; } if($word=~/([A-z])([x]+)AgixeV\t/) { ##marcipowAremo modified to include vaswA([dmnvr])emo gurao30052012 $word=~/(.*)([x]+)AgixeV(.*)/; $word=$1."xa\tunk"; $word1="AgixeV".$3; } if($word=~/([A-z])([vy]+)AgiveV\t/) { ##marcipowAremo modified to include vaswA([dmnvr])emo gurao30052012 $word=~/(.*)([vy]+)AgiveV(.*)/; $word=$1."\tunk"; $word1="AgiveV".$3; } # if($word=~/([A-z])([vy]+)Agiruva\t/) { # $word=~/(.*)([vy]+)Agiruva(.*)/; # $word=$1."\tunk"; # $word1="Agiruva".$3; # } if($word=~/wweVMba\t/) { ##mAdiwweVMba $word=~/(.*)wweVMba(.*)/; $word=$1."wwu\tunk"; $word1="eVMba".$2; } if($word=~/yukwa\t/) { ##PAyibarayukwa $word=~/(.*)yukwa(.*)/; $word=$1."\tunk"; $word1="yukwa".$2; } if($word=~/([nw]+)eVMba\t/) { ##hoyiweVMba $word=~/(.*)([nw]+)eVMba(.*)/; $word=$1."u\tunk"; $word1="eVMba".$2; } if($word=~/yenalla\t/) { ##kammiyenalla $word=~/(.*)yenalla(.*)/; $word=$1."\tunk"; $word1="alla".$2; } if($word=~/igiMwalU\t/) { ##savAligiMwalU $word=~/(.*)igiMwalU(.*)/; $word=$1."\tunk"; $word1="giMwalU".$2; } if($word=~/veVMba\t/) { ##hoyiweVMba $word=~/(.*)veVMba(.*)/; $word=$1."\tunk"; $word1="eVMba".$2; } if($word=~/wweVMxu\t/) { ##mAdiwweVMxu $word=~/(.*)wweVMxu(.*)/; $word=$1."wwu\tunk"; $word1="eVMxu".$2; } if($word=~/([n]+)eVMxu\t/) { ##mAdiwweVMxu $word=~/(.*)eVMxu(.*)/; $word=$1."u\tunk"; $word1="eVMxu".$2; } if($word=~/([yv]+)eVMxu\t/) { ##mAdiwweVMxu $word=~/(.*)([yv]+)eVMxu(.*)/; $word=$1."\tunk"; $word1="eVMxu".$2; } if($word=~/AreVMxu\t/) { ##ittixxAreVMxu $word=~/(.*)AreVMxu(.*)/; $word=$1."AreV\tunk"; $word1="eVMxu".$2; } if($word=~/iveVlla\t/) { ##iveVlla $word=~/(.*)iveVlla(.*)/; $word=$1."ivu\tunk"; $word1="eVlla".$2; } if($word=~/ংোxalleVlla\t/) { ##iveVlla $word=~/(.*)xalleVlla(.*)/; $word=$1."xalli\tunk"; $word1="eVlla".$2; } if($word=~/eVllara\t/) { ##mAdiwweVMba $word=~/(.*)eVllara(.*)/; $word=$1."a\tunk"; $word1="eVllara".$2; } if($word=~/oVmmeV\t/) { ##yAvAgalAxaroVmmeV $word=~/(.*)oVmmeV(.*)/; $word=$1."U\tunk"; $word1="oVmmeV".$2; } if($word=~/ixxavu\t/) { ##maragagalYuixxavu $word=~/(.*)ixxavu(.*)/; $word=$1."u\tunk"; $word1="ixxavu".$2; } if($word=~/([yv]+)illaxeV\t/) { ##AyXavillaxeV $word=~/(.*)([yv]+)illaxeV(.*)/; $word=$1."\tunk"; $word1="illaxeV".$3; } if($word=~/([yv]+)illa\t/) { ##AyXavillaxeV $word=~/(.*)([yv]+)illa(.*)/; $word=$1."\tunk"; $word1="illa".$3; } if($word=~/([tvy]+)ixeV\t/) { ##AScaryakaravAxuxu modified to include AScaryakara([vAxuxu])emo yash $word=~/(.*)ixeV(.*)/; $word=$1."u\tunk"; $word1="ixeV".$2; } if($word=~/allixeV\t/) { $word=~/(.*)allixeV(.*)/; $word=$1."alli\tunk"; $word1="ixeV".$2; } if($word=~/galYiveV\t/) { ##AScaryakaravAxuxu modified to include AScaryakara([vAxuxu])emo yash $word=~/(.*)galYiveV(.*)/; $word=$1."galYu\tunk"; $word1="iveV".$2; } if($word=~/vAguwwaxeV\t/) { ##AScaryakaravAxuxu modified to include AScaryakara([vAxuxu])emo yash $word=~/(.*)vAguwwaxeV(.*)/; $word=$1."\tunk"; $word1="AguwwaxeV".$2; } # if($word=~/lAguwwaveV\t/) { ##AScaryakaravAxuxu modified to include AScaryakara([vAxuxu])emo yash # $word=~/(.*)lAguwwaveV(.*)/; # $word=$1."lu\tunk"; # $word1="AguwwaveV".$2; # } if($word=~/yAguvuxilla\t/) { ##AScaryakaravAxuxu modified to include AScaryakara([vAxuxu])emo yash $word=~/(.*)yAguvuxilla(.*)/; $word=$1."\tunk"; $word1="Aguvuxilla".$2; } if($word=~/wwiruwwAreV\t/) { ##baruwwiruwwAreV modified to include baru+wwA([iru+uww])emo yash & shanta 07012013 $word=~/(.*)wwiruwwAreV(.*)/; $word=$1."wwA\tunk"; $word1="iruwwAreV".$2; } if($word=~/vAgiruwwaxeV\t/) { $word=~/(.*)vAgiruwwaxeV(.*)/; $word=$1."\tunk"; $word1="AgiruwwaxeV".$2; } # if($word=~/vAxareV\t/) { ##ulleKavAxareV modified to include ulleka+v([AxareV])emo yash 29082013 # $word=~/(.*)vAxareV(.*)/; # $word=$1."\tunk"; # $word1="AxareV".$2; # } # if($word=~/Ayiwu\t/) { # $word=~/(.*)Ayiwu(.*)/; # $word=$1."a\tunk"; # $word1="Ayiwu".$2; # } if($word=~/xallAyiwu\t/) { $word=~/(.*)xallAyiwu(.*)/; $word=$1."xalli\tunk"; $word1="Ayiwu".$2; } if($word=~/allaxeVyU\t/) { $word=~/(.*)allaxeVyU(.*)/; $word=$1."u\tunk"; $word1="allaxeV".$2; } if($word=~/alAyiwu\t/) { $word=~/(.*)alAyiwu(.*)/; $word=$1."u\tunk"; $word1="Ayiwu".$2; } if($word=~/raMxu\t/) { $word=~/(.*)raMxu(.*)/; $word=$1."\tunk"; $word1="raMxu".$2; } if($word=~/াyara\t/) { $word=~/(.*)yara(.*)/; $word=$1."\tunk"; $word1="raMxu".$2; } if($word=~/([A-z])([nryx]+)oVMxigeV\t/) { ##GoRisixxAreV modified to include GoRisu([ixxAreV])emo yash & shanta 08012013 $word=~/(.*)oVMxigeV(.*)/; $word=$1."a\tunk"; $word1="oVMxigeV".$2; } if($word=~/vAgi\t/) { ##GoRisixxAreV modified to include GoRisu([ixxAreV])emo yash & shanta 08012013 $word=~/(.*)vAgi(.*)/; $word=$1."\tunk"; } if($word=~/ilYixu\t/) { ##keVlYagilYixu modified to include keVlYag(eV)ilYixu emo yash & shanta 08012013 $word=~/(.*)ilYixu(.*)/; $word=$1."eV\tunk"; $word1="ilYixu".$2; } if($word=~/virisikoVlYlYabahuxu\t/) { ##marcipowAremo modified to include vaswA([dmnvr])emo gurao30052012 $word=~/(.*)virisikoVlYlYabahuxu(.*)/; $word=$1."\tunk"; $word1="irisikoVlYlYabahuxu".$2; } if($word=~/iruwwaxeV\t/) { ##marcipowAremo modified to include vaswA([dmnvr])emo gurao30052012 $word=~/(.*)iruwwaxeV(.*)/; $word=$1."u\tunk"; $word1="iruwwaxeV".$2; } if($word=~/iruwwareV\t/) { ##marcipowAremo modified to include vaswA([dmnvr])emo gurao30052012 $word=~/(.*)iruwwareV(.*)/; $word=$1."i\tunk"; $word1="iruwwareV".$2; } if($word=~/iruvuxu\t/) { $word=~/(.*)iruvuxu(.*)/; $word=$1."i\tunk"; $word1="iruvuxu".$2; } if($word=~/xalliruva\t/) { ##marcipowAremo modified to include vaswA([dmnvr])emo gurao30052012 $word=~/(.*)xalliruva(.*)/; $word=$1."xalli\tunk"; $word1="iruva".$2; } if($word=~/galYAgixxAreV\t/) { ##marcipowAremo modified to include vaswA([dmnvr])emo gurao30052012 $word=~/(.*)galYAgixxAreV(.*)/; $word=$1."galYu\tunk"; $word1="AgixxAreV".$2; } if($word=~/([A-z])([vy]+)iruva\t/) { ##GoRisixxAreV modified to include GoRisu([ixxAreV])emo yash & shanta 08012013 $word=~/(.*)([vy]+)iruva(.*)/; $word=$1."\tunk"; $word1="iruva".$3; } if($word=~/koVMdiruva\t/) { ##GoRisixxAreV modified to include kattikoVMdu([iruva])emo yash $word=~/(.*)koVMdiruva(.*)/; $word=$1."koVMdu\tunk"; $word1="iruva".$2; } if($word=~/([A-z])([y]+)oVlYageV\t/) { ##GoRisixxAreV modified to include GoRisu([ixxAreV])emo yash & shanta 08012013 $word=~/(.*)([y]+)oVlYageV(.*)/; $word=$1."\tunk"; $word1="oVlYageV".$3; } # if($word=~/vallaxeV\t/) { ##mAwravallaxeV modified to mAwra allaxeV emo yash & shanta 11022013 # $word=~/(.*)vallaxeV(.*)/; # $word=$1."\tunk"; # $word1="allaxeV".$2; # } #################################################### print "1\t$word\n"; if($word1 ne "") { print "1\t$word1\n"; } if(($word1 ne "")&&($word2 ne "")) { print "1\t$word2\n"; $word2=~s/.*//g; } }