package ILMT::PAN::HIN::WX2UTF; #use strict; #use warnings; use Dir::Self; use Data::Dumper; use ILMT::PAN::HIN::WX2UTF::IndicCC; use ILMT::PAN::HIN::SSFAPI::feature_filter; use ILMT::PAN::HIN::SSFAPI::shakti_tree_api; sub process { my %args = @_; utf8::encode($args{'data'}); $args{'lang'} = $args{'tgt_lang'}; $args{'src'} = 'wx'; $args{'tgt'} = 'utf'; $args{'type'} = 'ssf'; my $output = convert_notation(%args); utf8::decode($output); return $output; }; sub convert_notation { my %par = @_; my $data = $par{'data'}; my $type = $par{'type'}; my $lang = $par{'lang'}; my $src = $par{'src'}; my $tgt = $par{'tgt'}; my $result = ""; if( lc($src) eq "wx" and lc($tgt) eq "utf") { #open(STDIN,"<:utf8"); #open INFILE, '<:utf8', \$data or die $!; my @temparray = split('\n', $data); if( lc($type) eq "ssf" ) { #while($line=) #while(my $line=) while(my $line=shift(@temparray)) { chomp ($line); my ($num,$lex,$pos,$fs) = split(/\t/,$line); #$lexlang = &findlang($lex); #print "$lexlang\n"; #if ($lexlang eq "eng") #{ my $lex_out = wx2utf($lex, $lang); #} #else #{ # $lex_out = $lex; #} if(length $fs) { my @fss = split(/\|/, $fs); my $len = @fss; my @string; my $newfs = ""; my $i = 0; foreach my $af (@fss) { my $FSreference = &read_FS($af, $line); my @lex_root = &get_values("lex", $FSreference); my @cat_root = &get_values("cat", $FSreference); my @fs_vib = &get_values("vib", $FSreference); my @fs_head = &get_values("head", $FSreference); my @fs_name = &get_values("name", $FSreference); foreach my $field (@lex_root) { if ($cat_root[0] ne "punc" and $lex_root[0] !~ /^\^?\@.*/) { my $val_out = wx2utf($field, $lang); my @lex_arr=(); push @lex_arr,$val_out; &update_attr_val("lex", \@lex_arr,$FSreference,$af); $string[$i]=&make_string($FSreference,$af); } } foreach my $field1 (@fs_vib) { if ($field1 !~ /^\^?\@.*/) { my $vib_out = wx2utf($field1, $lang); my @fs_vib_arr=(); push @fs_vib_arr,$vib_out; &update_attr_val("vib", \@fs_vib_arr,$FSreference,$af); $string[$i]=&make_string($FSreference,$af); } } foreach my $field (@fs_head) { my $val_out = wx2utf($field, $lang); my @head_arr=(); push @head_arr,$val_out; &update_attr_val("head", \@head_arr, $FSreference, $af); $string[$i] = &make_string($FSreference, $af); } foreach my $field (@fs_name) { my $val_out = wx2utf($field, $lang); my @name_arr=(); push @name_arr,$val_out; &update_attr_val("name", \@name_arr,$FSreference,$af); $string[$i]=&make_string($FSreference,$af); } $i++; } foreach my $string (@string) { if(--$len) { $newfs=$newfs.$string."|"; } else { $newfs=$newfs.$string; } } delete @string[0..$#string]; #delete @lex_root[0..$#lex_root]; delete @fss[0..$#fss]; if($line =~ /\(\(/ or $line =~ /\)\)/) { ($num,$lex,$pos,$fs) = split(/\t/,$line); binmode STDOUT, ":utf8"; $result .= $num."\t".$lex."\t".$pos."\t".$newfs."\n"; } else { binmode STDOUT, ":utf8"; $result .= $num."\t".$lex_out."\t".$pos."\t".$newfs."\n"; } } # end if fs ne "" else { # try to understand this else block if(length $lex and $lex ne "((" and $lex ne "))") { binmode STDOUT, ":utf8"; $result .= $num."\t".$lex_out."\t".$pos."\t".$fs."\n"; } else { $result .= $line."\n"; } } } # end while loop } # if format SSF end #elsif(($type eq "TEXT") or ($type eq "text")) elsif( lc($type) eq "text" ) { while(my $line=) { #chomp ($line); my $text_out = wx2utf($line, $lang); binmode STDOUT, ":utf8"; $result .= $text_out; #print "under construction\n"; } } # type text end else { #print "type mismatch\n"; print "format type $type is NOT supported.\n"; print "Try $0 --help for more information\n"; exit(0); } } # end if statement src=wx tgt=utf elsif( lc($src) eq "utf" and lc($tgt) eq "wx") { #open(STDIN,"<:utf8"); open INFILE, '<:utf8', \$data or die $!; if( lc($type) eq "ssf" ) { #print "ssf conversion start\n"; while(my $line=) { chomp ($line); my ($num,$lex,$pos,$fs) = split(/\t/,$line); my $lex_out = utf2wx($lex, $lang); if(length $fs) { my @fss = split(/\|/, $fs); my $len = @fss; my @string; my $newfs = ""; my $i=0; foreach my $af (@fss) { my $FSreference = &read_FS($af, $line); my @lex_root = &get_values("lex", $FSreference); my @cat_root = &get_values("cat", $FSreference); my @fs_vib = &get_values("vib", $FSreference); my @fs_head = &get_values("head", $FSreference); my @fs_name = &get_values("name", $FSreference); foreach my $field (@lex_root) { if ($cat_root[0] ne "punc" ) { my $val_out = utf2wx($field, $lang); my @lex_arr=(); push @lex_arr,$val_out; &update_attr_val("lex", \@lex_arr,$FSreference,$af); $string[$i]=&make_string($FSreference,$af); } } foreach my $field1 (@fs_vib) { my $vib_out = utf2wx($field1, $lang); my @fs_vib_arr=(); push @fs_vib_arr,$vib_out; &update_attr_val("vib", \@fs_vib_arr,$FSreference,$af); $string[$i]=&make_string($FSreference,$af); } foreach my $field (@fs_head) { my $val_out = utf2wx($field, $lang); my @head_arr=(); push @head_arr,$val_out; &update_attr_val("head", \@head_arr, $FSreference, $af); $string[$i] = &make_string($FSreference, $af); } foreach my $field (@fs_name) { my $val_out = utf2wx($field, $lang); my @name_arr=(); push @name_arr,$val_out; &update_attr_val("name", \@name_arr,$FSreference,$af); $string[$i]=&make_string($FSreference,$af); } $i++; } foreach my $string (@string) { if(--$len) { $newfs=$newfs.$string."|"; } else { $newfs=$newfs.$string; } } delete @string[0..$#string]; #delete @lex_root[0..$#lex_root]; delete @fss[0..$#fss]; if($line =~ /\(\(/ or $line =~ /\)\)/) { ($num,$lex,$pos,$fs) = split(/\t/,$line); $result .= $num."\t".$lex."\t".$pos."\t".$newfs."\n"; } else { binmode STDOUT, ":utf8"; $result .= $num."\t".$lex_out."\t".$pos."\t".$newfs."\n"; } } # end if fs ne "" else { # try to understand this else block if(length $lex and $lex ne "((" and $lex ne "))") { binmode STDOUT, ":utf8"; $result .= $num."\t".$lex_out."\t".$pos."\t".$fs."\n"; } else{ $result .= $line."\n"; } } } # end while loop } # type SSF end elsif( lc($type) eq "text") { #print "text conversion block called\n"; while(my $line=) { #chomp ($line); #print "before convert call $line\n"; my $text_out = utf2wx($line, $lang); binmode STDOUT, ":utf8"; $result .= $text_out; #print "under construction\n"; } } # end format type text else { print "format type $type is NOT supported.\n"; print "Try $0 --help for more information\n"; exit(0); } } # end elsif statement src=utf tgt=wx else { print "$0: source and target encoding mismatch\n"; print "Try $0 --help for more information\n"; exit(0); } return $result; } 1;