package ILMT::TEL::HIN::WX2UTF; use Dir::Self; use Data::Dumper; my $cwd = __DIR__; use IPC::Run qw(run); use File::Temp qw/ tempfile /; use File::Slurp qw( slurp ); use ILMT::TEL::HIN::IndicCC; use ILMT::TEL::HIN::SSFAPI::feature_filter; use ILMT::TEL::HIN::SSFAPI::shakti_tree_api; sub process { my %args = @_; utf8::encode($args{data}); $args{'lang'} = $args{'src_lang'}; $args{'src'} = 'wx'; $args{'tgt'} = 'utf'; $args{'type'} = 'ssf'; $args{'data'} = $args{data}; my $wxtoutf1 = convert_notation(%args); utf8::decode($wxtoutf1); return $wxtoutf1; }; sub convert_notation { my %par = @_; my $data = $par{'data'}; my $type = $par{'type'}; my $lang = $par{'lang'}; my $src = $par{'src'}; my $tgt = $par{'tgt'}; my $result = ""; if( lc($src) eq "wx" and lc($tgt) eq "utf") { #open(STDIN,"<:utf8"); open INFILE, '<', \$data or die $!; if( lc($type) eq "ssf" ) { #while($line=) while($line=) { chomp ($line); ($num, $tkn, $pos, $fs) = split(/\t/,$line); if ($tkn !~ /^\^?\@.*/) { $lex_out = &wx2utf($tkn, $lang); } else { $lex_out = $tkn; } if($fs ne "") { @fss = split(/\|/, $fs); my $len = @fss; @string = ""; $newfs = ""; my $i=0; foreach $af (@fss) { my $FSreference = &read_FS($af, $line); my @lex_root = &get_values("lex", $FSreference); my @cat_root = &get_values("cat", $FSreference); my @fs_vib = &get_values("vib", $FSreference); my @fs_head = &get_values("head", $FSreference); my @fs_name = &get_values("name", $FSreference); foreach $field (@lex_root) { # if lcat is punc then don't convert it if ($cat_root[0] ne "punc" and $lex_root[0] !~ /^\^?eng\~.*/) { $val_out = &wx2utf($field, $lang); } else { $field =~ s/eng~//; $val_out = $field; } my @lex_arr=(); push @lex_arr,$val_out; &update_attr_val("lex", \@lex_arr,$FSreference,$af); $string[$i]=&make_string($FSreference,$af); } foreach $field1 (@fs_vib) { if ($field1 !~ /^\^?eng\~.*/) { $vib_out = &wx2utf($field1, $lang); } else { $field1 =~ s/eng~//; $vib_out = $field1; } my @fs_vib_arr=(); push @fs_vib_arr,$vib_out; &update_attr_val("vib", \@fs_vib_arr,$FSreference,$af); $string[$i]=&make_string($FSreference,$af); } $i++; } foreach $string (@string) { if(--$len) { $newfs=$newfs.$string."|"; } else { $newfs=$newfs.$string; } } $newfs =~s /af=',,,,,,,'//g; delete @string[0..$#string]; delete @lex_root[0..$#lex_root]; delete @fss[0..$#fss]; if($line =~ /\(\(/ or $line =~ /\)\)/) { ($num,$lex,$pos,$fs) = split(/\t/,$line); binmode STDOUT, ":utf8"; #binmode OUTFILE, ":utf8"; $result .= $num."\t".$lex."\t".$pos."\t".$newfs."\n"; } else { binmode STDOUT, ":utf8"; $result .= $num."\t".$lex_out."\t".$pos."\t".$newfs."\n"; } } # end if fs ne "" else { # try to understand this else block if($lex ne "((" and $lex ne "))") { binmode STDOUT, ":utf8"; $result .= $num."\t".$lex_out."\t".$pos."\t".$fs."\n"; } else { $result .= $line."\n"; } } } # end while loop } # if format SSF end #elsif(($type eq "TEXT") or ($type eq "text")) elsif( lc($type) eq "text" ) { while(my $line=) { #chomp ($line); my $text_out = wx2utf($line, $lang); binmode STDOUT, ":utf8"; $result .= $text_out; #print "under construction\n"; } } # type text end else { #print "type mismatch\n"; print "format type $type is NOT supported.\n"; print "Try $0 --help for more information\n"; exit(0); } } # end if statement src=wx tgt=utf elsif( lc($src) eq "utf" and lc($tgt) eq "wx") { #open(STDIN,"<:utf8"); #open INFILE, '<', \$data or die $!; my @temparray = split('\n', $data); if( lc($type) eq "ssf" ) { #print "ssf conversion start\n"; #while(my $line=) while(my $line=shift(@temparray)) { chomp ($line); ($num, $tkn, $pos, $fs) = split(/\t/,$line); #$lex_out = &utf2wx($tkn, $lang); if ($tkn !~ /[a-zA-Z]/ ){ $lex_out = &utf2wx($tkn, $lang); } else { $lex_out = "eng~".$tkn; } if($fs ne "") { @fss = split(/\|/, $fs); my $len = @fss; @string = ""; $newfs = ""; my $i=0; foreach $af (@fss) { if ($af !~ /^\^) { #chomp ($line); #print "before convert call $line\n"; my $text_out = utf2wx($line, $lang); binmode STDOUT, ":utf8"; $result .= $text_out; #print "under construction\n"; } } # end format type text else { print "format type $type is NOT supported.\n"; print "Try $0 --help for more information\n"; exit(0); } } # end elsif statement src=utf tgt=wx else { print "$0: source and target encoding mismatch\n"; print "Try $0 --help for more information\n"; exit(0); } return $result; }; 1;