#!/usr/bin/perl #Reading command line arguments use Getopt::Long "GetOptions"; $ENV{"LC_ALL"} = "C"; &GetOptions("help!"=>\$help,'mode=s' => \$mode, 'path=s'=> \$path, 'stype=s'=> \$stype, 'slang=s'=>\$slang, 'tlang=s'=>\$tlang, ); print "Unprocessed by Getopt::Long\n" if $ARGV[0]; foreach (@ARGV) { print "$_\n"; exit(0); } if($help eq 1) { print "transliteration-3.4\n(27th Jan 2009 last modified on 8th December 2008)\n\n"; print "usage : ./run-transliteration.pl --path=/home/transliteration-3.4 --stype=ssf --slang=hin --tlang=tel \n"; print "or ./run-transliteration.pl --path=/home/transliteration-3.4 --stype=text --slang=hin --tlang=tel \n"; exit(0); } #Checking for each command line arguments if($path eq "") { print "Please Specify the Path as defined in --help\n"; exit(0); } if($stype eq "") { print "Please Specify the Source Language type in ssf/text as defined in --help\n"; exit(0); } if($slang eq "") { print "Please Specify the Language as defined in --help\n"; exit(0); } if($tlang eq "") { print "Please Specify the Language as defined in --help\n"; exit(0); } my $bin=$path."/bin"; #Requiring Mapping modules of source language require "$bin/$slang/utf2wx.pl"; require "$bin/$slang/wx2utf.pl"; #Requiring API require "$path/API/feature_filter.pl"; #Requiring Mapping module of target language require "$path/trans.pl"; while($line = ) { chomp ($line); #SSF Conversion if(($stype eq "ssf") or ($stype eq "SSF")) { ($num,$lex,$pos,$fs) = split(/\t/,$line); $flag=0; #TKN Field Conversion #Checking "@" symbol if($lex=~/^\@.*/) { ($sym,$lex)=split(/\@/,$lex); #Symbol extracted $flag=1; } if($lex ne "((" and $lex ne "))") { #Conversion happens only when the input is starting with "@" symbol if($flag) { #Identifying source language $srlang = &findlang($lex); #if intended source language is marathi set $srlang to marathi if(($slang eq "mar") and ($srlang eq "hin")) { $srlang="mar"; } #Conversion happens only when the input is in source language specified by user if($srlang =~/$slang/) { system("echo $lex > word"); #Urdu to Hindi conversion if(($slang eq "urd") and ($tlang eq "hin")) { #Requiring Mapping module require "$bin/$slang/urd2hin.pl"; #Conversion function call &urd2hin($path,"word","out_word"); open(FILE2,"out_word"); $lex_out = ; chomp($lex_out); system("rm -f word out_word"); } #Hindi to Urdu conversion elsif(($slang eq "hin") and ($tlang eq "urd")) { #Requiring Mapping module require "$bin/$tlang/hin2urd.pl"; #Conversion function call &hin2urd($path,"word","out_word"); open(FILE2,"out_word"); $lex_out = ; chomp($lex_out); system("rm -f word out_word"); } else { #UTF -> WX conversion function call of source language &utf2wx($path,"word","out_word"); open(FILE1,"out_word"); $lex_out = ; chomp($lex_out); #Bengla v->b mapping if($tlang eq "ben") { $lex_out=~s/v/b/g; } system("echo $lex_out > word"); #WX -> UTF conversion function call of target language &trans($path,$tlang,"word","outword"); open(FILE2,"outword"); $lex_out = ; chomp($lex_out); system("rm -f word out_word"); system("rm -f word outword"); } } else{ $lex_out=$lex; } #Reattaching "@" symbol $lex_out="@".$lex_out; } else { $lex_out = $lex; } } if($fs ne "") { #Feature Structure conversion @fss = split(/\|/,$fs); my $len = @fss; @string = ""; $newfs = ""; my $i=0; foreach $af (@fss) { #Extraction of each fields using API my $FSreference = &read_FS($af,$line); my @lex_root = &get_values("lex",$FSreference); my @fs_vib = &get_values("vib",$FSreference); my @head_root = &get_values("head",$FSreference); my @name_root = &get_values("name",$FSreference); #Lexical field conversion foreach $field (@lex_root) { $flag1=0; #Checking "@" symbol if($field=~/^\@.*/) { ($sym,$field)=split(/\@/,$field); #Symbol extracted $flag1=1; } #Conversion happens only when the input is starting with "@" symbol if($flag1) { #Identifying source language $srlang = &findlang($field); #if intended source language is marathi set $srlang to marathi if(($slang eq "mar") and ($srlang eq "hin")) { $srlang="mar"; } #Conversion happens only when the input is in source language specified by user if($srlang =~/$slang/) { system("echo $field > word "); #Urdu to Hindi conversion if(($slang eq "urd") and ($tlang eq "hin")) { #Requiring Mapping module require "$bin/$slang/urd2hin.pl"; #Conversion function call &urd2hin($path,"word","out_word"); open(FILE2,"out_word"); $val_out = ; chomp($val_out); system("rm -f word out_word"); } #Hindi to Urdu conversion elsif(($slang eq "hin") and ($tlang eq "urd")) { #Requiring Mapping module require "$bin/$tlang/hin2urd.pl"; #Conversion function call &hin2urd($path,"word","out_word"); open(FILE2,"out_word"); $val_out = ; chomp($val_out); system("rm -f word out_word"); } else { #UTF -> WX conversion function call of source language &utf2wx($path,"word","out_word"); open(FILE1,"out_word"); $val_out = ; chomp($val_out); #Bengla v->b mapping if($tlang eq "ben") { $val_out=~s/v/b/g; } system("echo $val_out > word"); #WX -> UTF conversion function call of target language &trans($path,$tlang,"word","outword"); open(FILE2,"outword"); $val_out = ; chomp($val_out); system("rm -f word out_word"); system("rm -f word outword"); } } else{ $val_out=$field; } #Reattaching "@" symbol $val_out="@".$val_out; #updating the Lex field with new value using API my @lex_arr=(); $val_out=~s/\@\./\./g; push @lex_arr,$val_out; &update_attr_val("lex", \@lex_arr,$FSreference,$af); $string[$i]=&make_string($FSreference,$af); } else { $val_out=$field; #updating the Lex field with same value using API my @lex_arr=(); push @lex_arr,$val_out; &update_attr_val("lex", \@lex_arr,$FSreference,$af); $string[$i]=&make_string($FSreference,$af); } } #Vibhakthi field conversion foreach $field (@fs_vib) { $flag2=0; #Checking "@" symbol $temp_field=$field; if(($field=~/^\@.*/)||($field=~/^-\@.*/)) { ($sym,$field)=split(/\@/,$field);#Symbol extracted $flag2=1; } #Conversion happens only when the input is starting with "@" symbol if($flag2) { #Identifying source language $srlang = &findlang($field); #if intended source language is marathi set $srlang to marathi if(($slang eq "mar") and ($srlang eq "hin")) { $srlang="mar"; } #Conversion happens only when the input is in source language specified by user if($srlang=~/$slang/) { system("echo $field > word "); #Urdu to Hindi conversion if(($slang eq "urd") and ($tlang eq "hin")) { #Requiring Mapping module require "$bin/$slang/urd2hin.pl"; #Conversion function call &urd2hin($path,"word","out_word"); open(FILE2,"out_word"); $val_out = ; chomp($val_out); system("rm -f word out_word"); } #Hindi to Urdu conversion elsif(($slang eq "hin") and ($tlang eq "urd")) { #Requiring Mapping module require "$bin/$tlang/hin2urd.pl"; #Conversion function call &hin2urd($path,"word","out_word"); open(FILE2,"out_word"); $val_out = ; chomp($val_out); system("rm -f word out_word"); } else { #UTF -> WX conversion function call of source language &utf2wx($path,"word","out_word"); open(FILE1,"out_word"); $val_out = ; chomp($val_out); #Bengla v->b mapping if($tlang eq "ben") { $val_out=~s/v/b/g; } system("echo $val_out > word"); #WX -> UTF conversion function call of target language &trans($path,$tlang,"word","outword"); open(FILE2,"outword"); $val_out = ; chomp($val_out); system("rm -f word out_word"); system("rm -f word outword"); } } else{ $val_out=$field; } #Reattaching "@" symbol if($temp_field=~/^-/){ $val_out="-@".$val_out; } else{ $val_out="@".$val_out; } $val_out=~s/\@\./\./g; #Updating vibhakthi field with new value using API my @vib_arr=(); push @vib_arr,$val_out; &update_attr_val("vib", \@vib_arr,$FSreference,$af); $string[$i]=&make_string($FSreference,$af); } else{ $val_out=$field; #Updating vibhakthi field with same value using API my @vib_arr=(); push @vib_arr,$val_out; &update_attr_val("vib", \@vib_arr,$FSreference,$af); $string[$i]=&make_string($FSreference,$af); } } if(@head_root ne "") { #HEAD conversion foreach $field (@head_root) { $flag2=0; $flag3 = 0; if($field=~/^\"\@.*\"/) { $field=~s/\"//g;#Symbol extracted $flag3 = 1; } #Checking "@" symbol if($field=~/^\@.*/) { ($sym,$field)=split(/\@/,$field);#Symbol extracted $flag2=1; } #Conversion happens only when the input is starting with "@" symbol if($flag2) { #Identifying source language $srlang = &findlang($field); #if intended source language is marathi set $srlang to marathi if(($slang eq "mar") and ($srlang eq "hin")) { $srlang="mar"; } #Conversion happens only when the input is in source language specified by user if($srlang=~/$slang/) { system("echo $field > word "); #Urdu to Hindi conversion if(($slang eq "urd") and ($tlang eq "hin")) { #Requiring Mapping module require "$bin/$slang/urd2hin.pl"; #Conversion function call &urd2hin($path,"word","out_word"); open(FILE2,"out_word"); $val_out = ; chomp($val_out); system("rm -f word out_word"); } #Hindi to Urdu conversion elsif(($slang eq "hin") and ($tlang eq "urd")) { #Requiring Mapping module require "$bin/$tlang/hin2urd.pl"; #Conversion function call &hin2urd($path,"word","out_word"); open(FILE2,"out_word"); $val_out = ; chomp($val_out); system("rm -f word out_word"); } else { #UTF -> WX conversion function call of source language &utf2wx($path,"word","out_word"); open(FILE1,"out_word"); $val_out = ; chomp($val_out); #Bengla v->b mapping if($tlang eq "ben") { $val_out=~s/v/b/g; } system("echo $val_out > word"); #WX -> UTF conversion function call of target language &trans($path,$tlang,"word","outword"); open(FILE2,"outword"); $val_out = ; chomp($val_out); system("rm -f word out_word"); system("rm -f word outword"); } } else{ $val_out=$field; } #Reattaching symbols $val_out="@".$val_out; if($flag3) { $val_out = "\"".$val_out."\""; } $val_out=~s/\@\./\./g; #Updation of HEAD with new value using API my @head_arr=(); push @head_arr,$val_out; &update_attr_val("head", \@head_arr,$FSreference,$af); $string[$i]=&make_string($FSreference,$af); } else{ $val_out=$field; #Updation of HEAD with old value using API my @head_arr=(); push @head_arr,$val_out; &update_attr_val("head", \@head_arr,$FSreference,$af); $string[$i]=&make_string($FSreference,$af); } } } if(@name_root ne "") { #NAME conversion foreach $field (@name_root) { $flag2=0; $flag3 = 0; if($field=~/^\"\@.*\"/) { $field=~s/\"//g;#Symbol extracted $flag3 = 1; } #Checking "@" symbol if($field=~/^\@.*/) { ($sym,$field)=split(/\@/,$field); #Symbol extracted $flag2=1; } #Conversion happens only when the input is starting with "@" symbol if($flag2) { #Identifying source language $srlang = &findlang($field); #if intended source language is marathi set $srlang to marathi if(($slang eq "mar") and ($srlang eq "hin")) { $srlang="mar"; } #Conversion happens only when the input is in source language specified by user if($srlang=~/$slang/) { system("echo $field > word "); #Urdu to Hindi conversion if(($slang eq "urd") and ($tlang eq "hin")) { #Requiring Mapping module require "$bin/$slang/urd2hin.pl"; #Conversion function call &urd2hin($path,"word","out_word"); open(FILE2,"out_word"); $val_out = ; chomp($val_out); system("rm -f word out_word"); } #Hindi to Urdu conversion elsif(($slang eq "hin") and ($tlang eq "urd")) { #Requiring Mapping module require "$bin/$tlang/hin2urd.pl"; #Conversion function call &hin2urd($path,"word","out_word"); open(FILE2,"out_word"); $val_out = ; chomp($val_out); system("rm -f word out_word"); } else { #UTF -> WX conversion function call of source language &utf2wx($path,"word","out_word"); open(FILE1,"out_word"); $val_out = ; chomp($val_out); #Bengla v->b mapping if($tlang eq "ben") { $val_out=~s/v/b/g; } system("echo $val_out > word"); #WX -> UTF conversion function call of target language &trans($path,$tlang,"word","outword"); open(FILE2,"outword"); $val_out = ; chomp($val_out); system("rm -f word out_word"); system("rm -f word outword"); } } else{ $val_out=$field; } #Reattaching symbols $val_out="@".$val_out; if($flag3) { $val_out = "\"".$val_out."\""; } $val_out=~s/\@\./\./g; #Updation of NAME with new value using API my @name_arr=(); push @name_arr,$val_out; &update_attr_val("name", \@name_arr,$FSreference,$af); $string[$i]=&make_string($FSreference,$af); } else{ $val_out=$field; #Updation of NAME with old value using API my @name_arr=(); push @name_arr,$val_out; &update_attr_val("name", \@name_arr,$FSreference,$af); $string[$i]=&make_string($FSreference,$af); } } } $i++; } #Making New FS foreach $string (@string) { if(--$len) { $newfs=$newfs.$string."|"; } else{ $newfs=$newfs.$string; } } #Clean up of Arrays delete @string[0..$#string]; delete @lex_root[0..$#lex_root]; delete @fss[0..$#fss]; #Printing converted line if($line =~ /\(\(/ or $line =~ /\)\)/) { ($num,$lex,$pos,$fs) = split(/\t/,$line); $lex=~s/\@\./\./g; print $num,"\t",$lex,"\t",$pos,"\t",$newfs,"\n"; } else { $lex_out=~s/\@\./\./g; print $num,"\t",$lex_out,"\t",$pos,"\t",$newfs,"\n"; } } else{ if($lex ne "((" and $lex ne "))") { print $num,"\t",$lex_out,"\t",$pos,"\t",$fs,"\n"; } else{ print $line."\n"; } } } #TEXT File conversion elsif(($stype eq "text") or ($stype eq "TEXT")) { @words = split(/\s/,$line); foreach $lex (@words) { my $flag=0; #Symbols extraction if($lex=~/^\@.*/) { ($sym,$lex)=split(/\@/,$lex); print "\@"; } elsif($lex=~/^\'.*/) { if($lex=~/^\'.*\'/) { $symbol=1; } ($sym,$lex)=split(/\'/,$lex); print "\'"; } elsif($lex=~/^.*\'/) { ($lex,$sym)=split(/\'/,$lex); $symbol=1; } elsif($lex=~/^\`.*/) { if($lex=~/^\`.*\`/) { $symbol3=1; } ($sym,$lex)=split(/\`/,$lex); print "\`"; } elsif($lex=~/^.*\`/) { ($lex,$sym)=split(/\`/,$lex); $symbol3=1; } elsif($lex=~/^\(.*/) { ($sym,$lex)=split(/\(/,$lex); print "\("; if($lex=~/^.*\)/) { ($lex,$sym)=split(/\)/,$lex); $symbol2 =1; } } elsif($lex=~/^.*\)/) { ($lex,$sym)=split(/\)/,$lex); $symbol2=1; } elsif($lex=~/^\".*/) { if($lex=~/^\".*\"/) { $symbol1=1; } ($sym,$lex)=split(/\"/,$lex); print "\""; } else{ if($lex=~/^.*\"/) { ($lex,$sym)=split(/\"/,$lex); $symbol1=1; } } #Identifying source language $srlang = &findlang($lex); #if intended source language is marathi set $srlang to marathi if(($slang eq "mar") and ($srlang eq "hin")) { $srlang="mar"; } if(($lex=~/^\|.*/) and (($slang eq "hin") or ($slang eq "mar"))) { $lex=~s/\|/\./g; } #Conversion happens only when the input is in source language specified by user if($srlang =~/$slang/) { system("echo $lex > word"); #Urdu to Hindi conversion if(($slang eq "urd") and ($tlang eq "hin")) { #Requiring Mapping module require "$bin/$slang/urd2hin.pl"; #Conversion function call &urd2hin($path,"word","out_word"); open(FILE2,"out_word"); $lex_out = ; chomp($lex_out); system("rm -f word out_word"); } #Hindi to Urdu conversion elsif(($slang eq "hin") and ($tlang eq "urd")) { #Requiring Mapping module require "$bin/$tlang/hin2urd.pl"; #Conversion function call &hin2urd($path,"word","out_word"); open(FILE2,"out_word"); $lex_out = ; chomp($lex_out); system("rm -f word out_word"); } else { #UTF -> WX conversion function call of source language &utf2wx($path,"word","out_word"); open(FILE1,"out_word"); $lex_out = ; chomp($lex_out); #Bengla v->b mapping if($tlang eq "ben") { $lex_out=~s/v/b/g; } system("echo $lex_out > word"); #WX -> UTF conversion function call of target language &trans($path,$tlang,"word","outword"); open(FILE2,"outword"); $lex_out = ; chomp($lex_out); system("rm -f word out_word"); system("rm -f word outword"); } } else { $lex_out=$lex; } if(!$symbol and !$symbol1 and !$symbol2 and !$symbol3) { print $lex_out." "; } #Reattaching symbols elsif($symbol){ print $lex_out."\' "; } elsif($symbol2){ print $lex_out."\) "; } elsif($symbol3){ print $lex_out."\` "; } else{ print $lex_out."\" "; } $symbol =0; $symbol1 =0; $symbol2 =0; $symbol3 =0; $flag = 0 ; } print "\n"; } } #Module to find the source language #Language is determined by the unicode value of first letter(ignoring numbers and _) of the word sub findlang { $infile =$_[0]; system("echo $_[0] > lanword"); open(FILE, "<:utf8", "lanword"); $word = ; if($word=~/([0-9]+\_)(.*)/) { $word=$2; } @letter = split(//,$word); $val = ord($letter[0]); system("rm -f lanword"); if($val >= 2304 and $val <= 2431) { $result = "hin"; } elsif($val >= 2432 and $val <= 2559) { $result = "ben"; } elsif($val >= 2560 and $val <= 2659) { $result = "pan"; } elsif($val >= 2944 and $val <= 3071) { $result = "tam"; } elsif($val >= 3072 and $val <= 3199) { $result = "tel"; } elsif($val >= 3200 and $val <= 3327) { $result = "kan"; } elsif($val >= 3328 and $val <= 3455) { $result = "mal"; } elsif($val >= 1536 and $val <= 1791) { $result = "urd"; } else { $result = "eng"; } return $result; }