#!/usr/bin/persl #3 functions fill_fs, parse and load sub fill_fs { my $word=@_[0]; my $pos=@_[1]; my $fs=@_[2]; my %fs_hash=(); my $root,$lcat,$num,$gend,$pers,$tam,$case,$cm; my $spec,$emph,$dubi,$interj,$conj,$hon,$agr_gen,$agr_num,$agr_per,$suff; if($word ne "") { $fs_hash{"word"}=$word; } if($pos ne "") { $fs_hash{"pos"}=$pos; } if($fs eq "") { return(\%fs_hash); } if($fs=~/root=\s*"\s*([^\s\"]+)/) { #print "HELLO-ROOT-$1-\n"; $root=$1; if($1 eq "") {$root="NULL";} $fs_hash{"root"}=$root; } if($fs=~/lcat=\s*"\s*([^\s\"]+)/) { $lcat=$1; if($1 eq "") {$lcat="NULL";} $fs_hash{"lcat"}=$lcat; } if($fs=~/gend=\s*"\s*([^\s\"]*)/) { $gend=$1; if($1 eq "") {$gend="NULL";} $fs_hash{"gend"}=$gend; } if($fs=~/num=\s*"\s*([^\s\"]*)/) { $num=$1; if($1 eq "") {$num="NULL";} $fs_hash{"num"}=$num; } if($fs=~/pers=\s*"\s*([^\s\"]*)/) { $pers=$1; if($1 eq "") {$pers="NULL";} $fs_hash{"pers"}=$pers; } if($fs=~/tam=\s*"\s*([^\s\"]*)/) { $tam=$1; if($1 eq "") {$tam="NULL";} $fs_hash{"tam"}=$tam; } if($fs=~/cm=\s*"\s*([^\s\"]*)/) { $cm=$1; if($1 eq "") {$cm="NULL";} $fs_hash{"cm"}=$cm; } if($fs=~/case=\s*"\s*([^\s\"]*)/) { $case=$1; if($1 eq "") {$case="NULL";} $fs_hash{"case"}=$case; } if($fs=~/spec=\s*"\s*([^\s\"]*)/) { $spec=$1; if($1 eq "") {$spec="NULL";} $fs_hash{"spec"}=$spec; } if($fs=~/emph=\s*"\s*([^\s\"]*)/) { $emph=$1; if($1 eq "") {$emph="NULL";} $fs_hash{"emph"}=$emph; } if($fs=~/dubi=\s*"\s*([^\s\"]*)/) { $dubi=$1; if($1 eq "") {$dubi="NULL";} $fs_hash{"dubi"}=$dubi; } if($fs=~/interj=\s*"\s*([^\s\"]*)/) { $interj=$1; if($1 eq "") {$interj="NULL";} $fs_hash{"interj"}=$interj; } if($fs=~/conj=\s*"\s*([^\s\"]*)/) { $conj=$1; if($1 eq "") {$conj="NULL";} $fs_hash{"hon"}=$conj; } if($fs=~/agr_gen=\s*"\s*([^\s\"]*)/) { $agr_gen=$1; if($1 eq "") {$agr_gen="NULL";} $fs_hash{"agr_gen"}=$agr_gen; } if($fs=~/agr_num=\s*"\s*([^\s\"]*)/) { $agr_num=$1; if($1 eq "") {$agr_num="NULL";} $fs_hash{"agr_num"}=$agr_num; } if($fs=~/agr_per=\s*"\s*([^\s\"]*)/) { $agr_per=$1; if($1 eq "") {$agr_per="NULL";} $fs_hash{"agr_per"}=$agr_per; } if($fs=~/suff=\s*"\s*([^\s\"]*)/) { $suff=$1; if($1 eq "") {$suff="NULL";} $fs_hash{"suff"}=$suff; } return(\%fs_hash); } sub parse { my $rule_no=@_[0]; my $lhs=@_[1]; my $rhs=@_[2]; my $ref_Tree=@_[3]; @_RULE_TREE_=@$ref_Tree; my @_LHS_=(); my @_RHS_=(); print ERR "PARSING AND LOADING THE RULE----$rule_no\n"; print ERR "LHS PARSING AND LOADING-----------\n"; my @array=split(/\s+/,$lhs); # print ERR "check--1st element--",@array[0],"\n"; print ERR "Array size $#array\n"; my $i=0; while ($i<=$#array) { my $x=@array[$i]; # print ERR "-->x--",$x,"\n"; my @fs_array=(); if($x=~/([A-Z]+)\~?([0-9]+)?(\<[^\>]*\>)?(\(\()?(.*)?(\)\))?/) { my $chunk=$1; my $token_id=$2; print ERR "Chunk--$chunk~$token_id\n"; my $words_fs=$5; my $head_fs=$3; my %head_hash=(); my $head_val=0; if($words_fs=~/^\{/) { my @array_words=split(/\}/,$words_fs); foreach my $y (@array_words) { # print ERR "-->y--",$y,"\n"; if($y=~/\{([\$\.\_a-zA-Z0-9]+)?\%?([a-zA-Z]+)?(\<[^\>]*\>)?/) { # print ERR "1-$1\n"; # print ERR "2-$2\n"; # print ERR "3-$3\n"; $word=$1; $pos=$2; $fs=$3; if($fs ne "" or $word ne "" or $pos ne "") { my $ref_fs_hash=&fill_fs($word,$pos,$fs); my %fs_hash=%$ref_fs_hash; # foreach (keys(%fs_hash)) # { # print $_,"--",$fs_hash{$_},"\n"; # } push(@fs_array,$ref_fs_hash); } } } } # print ERR "$words_fs-->HEADhash\n"; if($head_fs ne "") { $head_val=1; my $ref_head_hash=&fill_fs("","",$words_fs); %head_hash=%$ref_head_hash; # foreach (keys(%head_hash)) # { # print ERR $_,"--",$head_hash{$_},"\n"; # } } # delete @array_words[0..$#array_words]; print ERR "headvalue=$head_val\n"; my $_LHS_1={"chunk"=>"$chunk","token_id"=>"$token_id","fs"=>\@fs_array,"head_val"=>$head_val,"head_fs"=>\%head_hash}; push(@_LHS_,$_LHS_1); } $i++; } #RHS PARSING AND LOADING my @array_rhs=split(/\s+/,$rhs); print ERR "RHS PARSING---------------------------------\n"; print ERR "RHS:",$rhs,"\n"; # print ERR "array size $#array_rhs\n"; my $i=0; while ($i<=$#array_rhs) { my $x=@array_rhs[$i]; # print ERR "-->x--",$x,"\n"; if($x=~/(\+)?([A-Z]+)\~?([0-9]+)?(\<[^\<]*\>)?(\(\()?(.*)?(\)\))?/) { my $add_id=0; if($1 eq "+") { $add_id=1; } my $chunk=$2; my $token_id=$3; print ERR "RHS:CHUNK_TAG-$chunk~$token_id\n"; my $words_fs=$6; my $head_fs=$4; my @rhs_fs_array=(); my %head_hash=(); my $head_val=0; my @drop_childs; ##List of children to be deleted my @add_childs; ##List of children to be deleted my $count_child=1; if($words_fs=~/^\{/ or $word_fs=~/^+/) { my @array_rhs_words=split(/\}/,$words_fs); foreach my $y (@array_rhs_words) { #print "yout-$y-\n"; if($y eq "{") { print ERR "drop yes\n"; push(@drop_childs,$count_child); # if($fs ne "" or $word ne "" or $pos ne "") # { # my $ref_rhs_fs_hash=&fill_fs($word,$pos,$fs); # my %rhs_fs_hash=%$ref_rhs_fs_hash; # foreach (keys(%rhs_fs_hash)) # { # print ERR "hash--->",$_,"--",$rhs_fs_hash{$_},"\n"; # } # push(@rhs_fs_array,$ref_rhs_fs_hash); # print ERR "LEX--",$rhs_fs_array->[0]->{"root"},"\n"; # } } if($y=~/(\+)?\{([\$\.\_a-zA-Z0-9\^]+)?\%?([a-zA-Z]+)?(\<[^\>]*\>)?/) { # print ERR "1-$1\n"; # print ERR "2-$2\n"; # print ERR "3-$3\n"; my $add_child=$1; # print "y-$y-\n"; if($add_child eq "+") { # print "Add child\n"; push(@add_childs,$count_child); } my $word=$2; my $pos=$3; my $fs=$4; if($fs ne "" or $word ne "" or $pos ne "") { my $ref_rhs_fs_hash=&fill_fs($word,$pos,$fs); my %rhs_fs_hash=%$ref_rhs_fs_hash; # foreach (keys(%rhs_fs_hash)) # { # print ERR "hash--->",$_,"--",$rhs_fs_hash{$_},"\n"; # } push(@rhs_fs_array,$ref_rhs_fs_hash); # print ERR "LEX--",$rhs_fs_array->[0]->{"root"},"\n"; } } $count_child++; } } if($head_fs ne "") { $head_val=1; # print "head-fs-$head_fs-\n"; my $ref_head_hash=&fill_fs("","",$head_fs); %head_hash=%$ref_head_hash; # foreach (keys(%head_hash)) # { # print $_,"--",$head_hash{$_},"\n"; # } } # delete @array_rhs_words[0..$#array_rhs_words]; # print ERR "chunk--$chunk\n"; # print ERR "addid--$add_id\n"; my $_RHS_1={"chunk"=>"$chunk", "token_id"=>"$token_id", "fs"=>\@rhs_fs_array, "add"=>"$add_id", "add_val"=>$#array_rhs_words+2, "drop_childs"=>\@drop_childs, "add_childs"=>\@add_childs, "head_val"=>"$head_val", "head_fs"=>\%head_hash}; push(@_RHS_,$_RHS_1); } $i++; } # print ERR "Lhs 1st element->",$_LHS_[0]->{"chunk"},"\n"; # print ERR "Lhs 2nd element->",$_LHS_[1]->{"chunk"},"\n"; # print ERR "Rhs 1st element->",$_RHS_[0]->{"chunk"},"\n"; # print ERR "Rhs 2nd element->",$_RHS_[1]->{"chunk"},"\n"; #print "RULE No--$rule_no\n"; my @_RULE_NO_; push(@_RULE_NO_,$rule_no); my @RULE=(); push(@RULE,\@_LHS_); push(@RULE,\@_RHS_); push(@RULE,\@_RULE_NO_); push(@_RULE_TREE_,\@RULE); $total_rules=@_RULE_TREE_; print ERR "Number of Rules: $total_rules\n"; $total_rules--; for($lind=0;$_RULE_TREE_[$total_rules]->[0]->[$lind]->{"chunk"} ne "";$lind++) { my $match=0; for($rind=0;$_RULE_TREE_[$total_rules]->[1]->[$rind]->{"chunk"} ne "";$rind++) { $left_chunk=$_RULE_TREE_[$total_rules]->[0]->[$lind]->{"chunk"}."~".$_RULE_TREE_[$total_rules]->[0]->[$lind]->{"token_id"}; $right_chunk=$_RULE_TREE_[$total_rules]->[1]->[$rind]->{"chunk"}."~".$_RULE_TREE_[$total_rules]->[1]->[$rind]->{"token_id"}; #print "LEFT&RIGHT-$left_chunk--$right_chunk\n"; if($left_chunk eq $right_chunk) { $match=1; next; } } if($match==0) { print ERR "DROPING THE NODE $left_chunk\n"; $_RULE_TREE_[$total_rules]->[0]->[$lind]->{"drop"}=1; } } # print $_RULE_TREE_[1]->[0]->[0]->{"token_id"},"\n"; # print ERR "tree->",$_RULE_TREE_[2]->[0]->[0]->{"chunk"}," "; # print ERR "tree->",$_RULE_TREE_[3]->[0]->[0]->{"chunk"}," "; # print ERR "rhs tree->",$_RULE_TREE_[0]->[2]->[1]->{"chunk"}," "; # print $_RULE_TREE_[0]->[2]->[1]->{"token_id"},"\n"; # print ERR "rhs PTR->",$_RULE_TREE_[0]->[1]->[1],"\n"; # print ERR "LEX--",$_RULE_TREE_[0]->[1]->[1]->{"fs"}->[0]->{"root"},"\n"; # print ERR "TAM--",$_RULE_TREE_[0]->[1]->[1]->{"fs"}->[0]->{"tam"},"\n"; # print ERR "RHS END---------------------------------\n"; # delete @array[0..$#array]; return (\@_RULE_TREE_); } sub load { my $rule_file=@_[0]; my $ref_RULE_TREE_=@_[1]; open(fp,$rule_file); while() { # print "Rule Lines-",$_,"\n"; if($_=~/^\#/ or $_=~/^\s*\n/) { next; } if($_=~/([a-zA-Z0-9\~\(\)\{\}\"\$\_\<\>\s]+)/) { # print $1,"\n"; } if($_=~/^\s*R\s*([0-9][0-9]*)\s*:\s*([a-zA-Z0-9\%\~\=\(\)\{\}\,\"\$\_\.\@\<\>\s]+)=>\s*([a-zA-Z0-9\^\%\~\=\+\(\)\{\}\"\$\,\_\.\@\<\>\s]+)\s*\n/) { my $rule_no=$1; my $LHS=$2; my $RHS=$3; print ERR "RULE:",$_,"\n"; my $ref_RULE_TREE_=&parse($rule_no,$LHS,$RHS,$ref_RULE_TREE_); } } return($ref_RULE_TREE_); } 1;