package ILMT::PAN::HIN::VibhaktiSplitter; #use strict; #use warnings; use Dir::Self; use Data::Dumper; use ILMT::PAN::HIN::SSFAPI::feature_filter; use ILMT::PAN::HIN::SSFAPI::shakti_tree_api; my $functionfile = __DIR__ . "/VibhaktiSplitter/data/functionwords_hin.txt"; my %rule_hash = (); sub preprocess { open(RULE,$functionfile) or die "Couldn't open $functionfile for reading: $!\n"; @rules = ; foreach $rule (@rules) { if($rule !~ /^\s*$/) { chomp($rule); ($fea,$array_in) = split(/\s+/,$rule); $rule_hash{ $fea } = $array_in; } } } sub process { my %par = @_; my $input = $par{'data'}; utf8::encode($input); read_story(\$input); my $body; my $numBody = get_bodycount(); my $result; # will load all the file into memory $numBody = &get_bodycount(); for(my($bodyNum)=1;$bodyNum<=$numBody;$bodyNum++) { #Will get $body = &get_body($bodyNum,$body); my($numPara) = &get_paracount($body); for(my($i1)=1;$i1<=$numPara;$i1++) { my($para); $para = &get_para($i1); my($numSent) = &get_sentcount($para); for(my($j1)=1;$j1<=$numSent;$j1++) { my($sent) = &get_sent($para,$j1); my @all_children1 =&get_nodes(3,"NP",$sent); my @all_children2 =&get_nodes(3,"JJP",$sent); my @all_children3 =&get_nodes(3,"RBP",$sent); my @all_children4 =&get_nodes(3,"VGF",$sent); my @all_children5 =&get_nodes(3,"VNN",$sent); my @all_children6 =&get_nodes(3,"VGINF",$sent); my @all_children7 =&get_nodes(3,"VGNF",$sent); my @all_children8 =&get_nodes(3,"VGNN",$sent); @all_children = (@all_children1,@all_children2,@all_children3,@all_children4,@all_children5,@all_children6,@all_children7,@all_children8); $num = @all_children; for($i = 0; $i < $num; $i++) { my @all_children1 =&get_nodes(3,"NP",$sent); my @all_children2 =&get_nodes(3,"JJP",$sent); my @all_children3 =&get_nodes(3,"RBP",$sent); my @all_children4 =&get_nodes(3,"VGF",$sent); my @all_children5 =&get_nodes(3,"VNN",$sent); my @all_children6 =&get_nodes(3,"VGINF",$sent); my @all_children7 =&get_nodes(3,"VGNF",$sent); my @all_children8 =&get_nodes(3,"VGNN",$sent); @all_children = (@all_children1,@all_children2,@all_children3,@all_children4,@all_children5,@all_children6,@all_children7,@all_children8); $num = @all_children; $node = $all_children[$i]; my(@childs_here) = &get_children($node,$sent); $num_child_here = @childs_here; my $val_fs=&get_field($node, 4,$sent); my $chunk_name=&get_field($node, 3,$sent); $FSreference = &read_FS($val_fs,$sent); my @tams = &get_values("vib",$FSreference,$sent); my @vposs = &get_values("vpos",$FSreference,$sent); $vpos = $vposs[0]; $vpos =~ s/"//g; $vpos =~ s/RP_//g; $vpos =~ s/RP$//g; (@new_words) = split(/_/,$tams[0]); $num_words = @new_words; (@new_posis) = split(/_/,$vpos); $num_posis = @new_posis; ####----------------------- Removing the extra vib and tam For example, if there is "vib_4_tam" There shouldnt be two such things $flag = 0; for($p1 = 0; ($p1 < $num_posis) and ($num_posis != $new_words); $p1++) { $tmp = $new_posis[$p1]; if(($tmp =~ /vib/ or $tmp =~ /tam/) and $flag == 0) { $flag = 1; } elsif(($tmp =~ /vib/ or $tmp =~ /tam/) and $flag == 1) { # print "Del $tmp\n"; # # delete $new_posis[$p1]; splice(@new_posis, $p1, $p1); $p1--; $num_posis = @new_posis; } } $num_posis = @new_posis; # if($num_words > $num_posis) # { # $diff = $num_words - $num_posis; # $add = $num_child_here + $num_posis; # print $add."\n"; ## for($p1 = 0; $p1 < $diff; $p1++) # { # push(@new_posis,$add); # $add++; # } # } # print join('_',@new_posis)."\n"; $flag_tam = 0; for($l = 0; $l < $num_posis; $l++) { $new_posi = $new_posis[$l]; if($new_posi =~ /tam/ or $new_posi =~ /vib/) { $flag_tam = 1; last; } } $num_posis = @new_posis; for($l = 0; $l < $num_words; $l++) { $new_word = $new_words[$l]; $new_posi = $new_posis[$l]; if($new_posi =~ /tam/ or $new_posi =~ /vib/ or ($l == 0 and $flag_tam == 0)) { $new_posi =~ s/[a-z]//g; $new_posi =~ s/[A-Z]//g; $new_posi += $node; # print $new_posi." is the pos\n"; my @per_chunk_arr=(); push @per_chunk_arr,$new_word; my $val_fs =&get_field($new_posi, 4,$sent); my $FSreference1 = &read_FS($val_fs,$sent); &update_attr_val("vib", \@per_chunk_arr,$FSreference1,$sent); my $string=&make_string($FSreference1,$sent); &modify_field($new_posi,4,$string,$sent); } else { if($new_posi eq "") { my(@childs_here) = &get_children($node,$sent); # print join('_',@childs_here)."\n"; $num_child_here = @childs_here; $new_posi = $num_child_here + 1; } $bkp_posi = $new_posi; $new_posi =~ s/[a-z]//g; $new_posi =~ s/[A-Z]//g; $new_posi += $node; ($root,$tam) = split(/\+/,$new_word); if($root eq "hE") { $tam = "hE"; } if($root eq "WA") { $tam = "WA"; } if($bkp_posi =~ /NEG/) { $pos1 = "NEG"; $dummy[2] = ""; } elsif($rule_hash{$root} =~ /PSP/) { $pos1 = "PSP"; $dummy[2] = ""; } elsif($rule_hash{$root} =~ /NST/) { $pos1 = "NST"; $dummy[2] = ""; } else { $pos1 = "VAUX"; $dummy[2] = ""; } &add_leaf($new_posi,0,$new_word,$pos1,$dummy[2],$sent); } } } } } } open OUTFILE, '>', \$result or die $!; select(OUTFILE); printstory(); select(STDOUT); utf8::decode($result); return $result; } preprocess(); 1;