Commit 5568b960 authored by priyank's avatar priyank

kan shallow parser first commit

parents
#!/usr/bin/env python
# -*- coding: utf-8 -*-
'''
Created by
@author: priyank
'''
import json
import requests
from SocketServer import ThreadingMixIn
import threading
import codecs
import re
import cgi
from BaseHTTPServer import HTTPServer, BaseHTTPRequestHandler
from optparse import OptionParser
from urlparse import urlparse, parse_qs
import os
import sys
from argparse import ArgumentParser
#configuring commandline parser and check if the all command line parameters are valid
parser=ArgumentParser()
parser.add_argument('-c', '--serverConfigFile', help='server configuration file (with path)', required=True)
parser.add_argument('-i', '--inputFile', help='inputFile (with path)', required=True)
args = parser.parse_args()
#getting command line config files and check if files exist
serverFile = args.serverConfigFile
inputFile = args.inputFile
#function to get sentences from SSF
def sentenceCollector(inputString):
if "Sentence><Sentence" in inputString:
inputString = inputString.replace('Sentence><Sentence', 'Sentence>\n<Sentence')
inArray = inputString.strip().split("\n")
sentList = []
tempString = ""
for line in inArray:
line = line.rstrip()
if line:
if line.startswith('<Sentence '):
tempString = tempString + line + "\n"
elif line.startswith('</Sentence'):
tempString = tempString + line + "\n"
sentList.append(tempString)
tempString = ""
else:
tempString = tempString + line + "\n"
return sentList
# Function to get output of lats module(wordgenerator)
def wordgenCollector(inputString):
inArray = inputString.strip().split("\n")
#sentList = []
tempString = ""
for line in inArray:
line = line.rstrip()
linearray = line.split("\t")
if line and len(linearray) >=2:
if line.startswith('<Sentence '):
continue
elif line.startswith('</Sentence'):
continue
elif linearray[1] == '((' or linearray[1] == '))':
continue
else:
tempString = tempString + linearray[1] + " "
return tempString
if not os.path.isfile(serverFile):
print " serverFile file", serverFile ,"does not exist."
sys.exit(0);
if not os.path.isfile(inputFile):
print " inputFile file", inputFile ,"does not exist."
sys.exit(0);
server_details = {}
#getting server details
with open(serverFile) as server_file:
server_details = json.load(server_file)
translationURL = server_details['kan']
f = codecs.open(inputFile, "rb", "utf-8")
lines = f.readlines()
f.close()
tokenizerURLArray = translationURL.split("/")
tokenizerURLArray[-2] = '1'
modulesURL = tokenizerURLArray[0] + "/" + tokenizerURLArray[1] + "/" + tokenizerURLArray[2] + "/" + tokenizerURLArray[5] + "/" + tokenizerURLArray[6] + "/modules"
tokenizerURL = "/".join(tokenizerURLArray)
tokenizerURLArray = translationURL.split("/")
tokenizerURLArray[-3] = '2'
translationURL = "/".join(tokenizerURLArray)
myheaders = {"Content-type": "application/x-www-form-urlencoded; charset=UTF-8"}
proxies = {
"http" :None,
"https":None
}
res = requests.post(modulesURL, proxies=proxies, headers=myheaders)
lastModule = ''
secondLastModule = ''
# getting last modules
if res is not None:
modulesList = json.loads(res.text)
lastModule = modulesList[-1]
secondLastModule = modulesList[-2]
else:
print "Null response from server"
sys.exit(0)
response_data = {}
response_data['language'] = 'kan'
response_data['text'] = lines
output= ""
wxoutput = ""
# processing sentence in each line by calling MT
# Processing paras: one line is considered as a para
iii = 0
intermediatearray = []
mystr = ""
for line in lines:
line = line.strip()
if line :
# calling tokenizer on line
dataToSend = {"data":line.strip().encode('utf-8')}
res = requests.post(tokenizerURL, proxies=proxies, headers=myheaders, data=dataToSend)
tokenOut = json.loads(res.text)
sentences = sentenceCollector(tokenOut['tokenizer-1'])
jjj = 0
tempdict = {}
mystr = mystr + "paraid:" + str((iii + 1)) + "\n" + line + "\n"
for sentence in sentences:
dataToSend = {"data":sentence.strip().encode('utf-8').strip()}
res = requests.post(translationURL, proxies=proxies, headers=myheaders, data=dataToSend)
completeOut = json.loads(res.text)
lastmoduleOutput = completeOut[lastModule+"-"+str((modulesList.index(lastModule))+1)]
secondlastmoduleOutput = completeOut[secondLastModule+"-"+str((modulesList.index(secondLastModule))+1)]
finalOutput = lastmoduleOutput
output = output + finalOutput + " \n\n"
wxoutput = wxoutput + secondlastmoduleOutput + " \n\n"
mystr = mystr + "sentid:" + str((jjj + 1)) + "\n" + line + "\n"
mystr = mystr + lastmoduleOutput + "\n"
jjj = jjj + 1
iii = iii + 1
output = output + " \n\n"
wxoutput = wxoutput + " \n\n"
mystr = mystr + "---------------------------------------------------------\n"
print mystr
# ILMT-API
An API for quering ILMT Systems
## For Installation of dependencies related to Sampark refer to dependencies.txt
## For Installation of perl related dependencies run script install.sh with following command:
```
sh install.sh
```
## Steps to install MT modules run:
```
cd ilmt-api-kan-shallowparser/
sh setup.sh
```
## Steps to execute API
```
unset PERL5LIB; . ./setup-env.sh;
perl api.pl prefork -l http://*:8585 -w 1
```
It will make the API in listening form.
## Steps to test API:
a) On browser run following URL:
```
http://<YOUR_IP>:8585/kan/hin/translate?data="ಇಂಡಿಯಾದ ವ್ಯವಸ್ಥಾಪಕ ನಿರ್ದೇ"&pretty=true
```
b) here replace <YOUR_IP> with your IP address.
c) For testing API with curl run the following:
```
curl --noproxy '*' http://localhost:8585/kan/hin/translate --data data="ಇಂಡಿಯಾದ ವ್ಯವಸ್ಥಾಪಕ ನಿರ್ದೇ"
```
d) For MT with UI run following command:
```
http://<YOUR_IP>:8585/
```
e) Now you would see proper translation panels.
## To execute sentences using API:
please update the host IP in server.json from 10.2.63.52 to your IP and run following command:
$python GetShallowParserOutput.py -c server.json -i input.txt
## To check which ports are allotted to different services, run following command:
```
find ./ | grep 'run/.*_'
```
#!/usr/bin/env perl
use Dir::Self;
use strict;
use warnings;
use Data::Dumper;
use Mojolicious::Lite;
use lib __DIR__ . "/lib";
use ILMT::Translator qw(get_translator get_langpairs);
use ILMT::KAN::HIN;
plugin qw(Mojolicious::Plugin::ForkCall);
any '/:src/:tgt/translate' => sub {
my $c = shift->render_later;
$c->inactivity_timeout(3600);
my %args = %{$c->req->params->to_hash};
$args{'src_lang'} = $c->param('src');
$args{'tgt_lang'} = $c->param('tgt');
$args{'data'} = $args{'input'} = $args{'data'} // $args{'input'};
$c->fork_call(
sub {
my (%args) = @_;
my $translator = get_translator(uc($c->param('src')), uc($c->param('tgt')));
return $translator->translate(%args);
},
[%args],
sub {
my ($c, $final_result) = @_;
if (exists $args{"pretty"}) {
my $final_string = join "\n", map { "$_:\n$final_result->{$_}" } keys %$final_result;
$c->render(template => 'pretty', result => $final_string);
} else {
$c->render(json => $final_result);
}
}
);
};
any '/:src/:tgt/:start/:end' => sub {
my $c = shift->render_later;
$c->inactivity_timeout(3600);
my %args = %{$c->req->params->to_hash};
$args{'src_lang'} = $c->param('src');
$args{'tgt_lang'} = $c->param('tgt');
$args{'data'} = $args{'input'} = $args{'data'} // $args{'input'};
$c->fork_call(
sub {
my (%args) = @_;
my $translator = get_translator(uc($c->param('src')), uc($c->param('tgt')));
return $translator->partial_p($c->param('start'), $c->param('end'), %args);
},
[%args],
sub {
my ($c, $final_result) = @_;
if (exists $args{"pretty"}) {
my $final_string = join "\n", map { "$_:\n$final_result->{$_}" } keys %$final_result;
$c->render(template => 'pretty', result => $final_string);
} else {
$c->render(json => $final_result);
}
}
);
};
any '/partialtranslate/new/:src/:tgt/:start/:end/partial' => sub {
print "inside partialtranslate/new ............";
my $c = shift;
my %args = %{$c->req->params->to_hash};
$args{'src_lang'} = $c->param('src');
$args{'tgt_lang'} = $c->param('tgt');
$args{'data'} = $args{'input'} = $args{'data'} // $args{'input'};
my $translator = get_translator(uc($c->param('src')), uc($c->param('tgt')));
my $final_result = $translator->partial_p($c->param('start'), $c->param('end'), %args);
#$c->render(json => $final_result);
if (exists $args{"pretty"}) {
my $final_string = join "\n", map { "$_:\n$final_result->{$_}" } keys %$final_result;
$c->render(template => 'pretty', result => $final_string);
} else {
$c->render(json => $final_result);
}
};
any '/mytranslate/new/api/:src/:tgt/mytranslate' => sub {
print "inside mytranslate ............";
my $c = shift;
my %args = %{$c->req->params->to_hash};
$args{'src_lang'} = $c->param('src');
$args{'tgt_lang'} = $c->param('tgt');
$args{'data'} = $args{'input'} = $args{'data'} // $args{'input'};
my $translator = get_translator(uc($c->param('src')), uc($c->param('tgt')));
my $final_result = $translator->translate(%args);
#$c->render(json => $final_result);
if (exists $args{"pretty"}) {
my $final_string = join "\n", map { "$_:\n$final_result->{$_}" } keys %$final_result;
$c->render(template => 'pretty', result => $final_string);
} else {
$c->render(json => $final_result);
}
};
any '/:src/:tgt/' => sub {
my $c = shift;
my $translator = get_translator(uc($c->param('src')), uc($c->param('tgt')));
$c->render(text => scalar @{$translator->{seq}});
};
any '/:src/:tgt/modules' => sub {
my $c = shift;
my $translator = get_translator(uc($c->param('src')), uc($c->param('tgt')));
my @modules = map { lc($_) } @{$translator->{seq}};
$c->render(json => \@modules);
};
any '/langpairs' => sub {
my $c = shift;
my %langpairs = get_langpairs();
print Dumper(\%langpairs);
$c->render(json => \%langpairs);
};
get '/' => sub {
my $c = shift;
$c->reply->static('index.html');
};
app->start;
__DATA__
@@ pretty.html.ep
<pre><%= $result %></pre>
Dependencies:
+ jdk8
Follow below steps to install jdk:-
a) Download Jdk-8 from oracle website for linux 64 bit tar
b) cp downloaded package to '/usr/local'
c) tar -xvf jdk-<version>-linux-x64.tar.gz
d) vim /etc/profile
e) export JAVA_HOME="/usr/local/jdk<version>"
f) export PATH=$PATH:$JAVA_HOME/bin
g) source /etc/profile
h) java -version
+ CRF++ 0.51+
Follow below steps to install CRF++:-
$ cd CRF++-0.51
$./configure
$make
$make install
check if it is installed properly or not :-
crf_test --version
Note:-
if 'libcrfpp.so.0' not found after CRF installation than use below command
ln -s /usr/local/lib/libcrfpp.so.0 /usr/lib/libcrfpp.so.0
+ gcc
+ gdbm
+ libgdbm-dev
+ glib-2.0
+ libglib2.0-dev
For ubuntu use following command:
sudo apt-get install libgdbm-dev libglib2.0-dev g++
# apache ant installation
- wget https://downloads.apache.org/ant/manual/apache-ant-1.10.6-manual.tar.gz
- sudo cp apache-ant-1.10.6-manual.tar.gz /usr/local/
- cd /usr/local/
- sudo su
- tar -xvf apache-ant-1.10.6-manual.tar.gz
- # exit from root by typing exit
- vim .bashrc
- Add following lines at the bottom of file:
export ANT_HOME=/usr/local/<ANT DIRECTORY
export PATH=${PATH}:${ANT_HOME}/bin
- save the file
- $source .bashrc
- LOGOUT and LOGIN
ಮಗರ್ ನದಿಯ ವಾಸ್ತವಿಕ ಶಾಂತ ಸ್ವರೂಪ ಅದರ ವಾಸ್ತವಿಕ ಸುಂದರತೆ.
ಇದು ಚಿಕ್ಕದಾದ ಸುಂದರ ಹಳ್ಳಿ,ಇಲ್ಲಿಂದ ಹಲವಾರು ಗ್ಲೇಷಿಯರ್ಗಳ ಯಾತ್ರೆ ಮಾಡಬಹುದು.
ಮೂರ್ತಿಯ ಹಿಂದಿನ ಜಾಗದಲ್ಲಿ ತೋರಣವಿದೆ,ಅಲ್ಲಿ ಹತ್ತು ಅವತಾರಗಳ ಲೀಲೆ ಚಿತ್ರಿಸಲಾಗಿದೆ.
ಈ ಮೂರ್ತಿಗೆ ನಾಲ್ಕು ಮುಖ ಮತ್ತು ನಾಲ್ಕು ಕೈಗಳಿವೆ.
sudo apt-get install curl
curl -L http://cpanmin.us | perl - --sudo App::cpanminus
sudo cpanm Data::Dumper
sudo cpanm Dir::Self
sudo cpanm Mojolicious::Lite
sudo cpanm Module::Runtime
sudo cpanm Module::Pluggable
sudo cpanm Mojolicious::Plugin::ForkCall
sudo cpanm IPC::Run
sudo cpanm Module::Pluggable
sudo apt-get install python-argparse requests
package ILMT::KAN::HIN;
use strict;
use warnings;
use Data::Dumper;
use ILMT::Translator;
my @seq = (
"Tokenizer",
"UTF2WX",
"Morph",
"POSTagger",
"Chunker",
"Prune",
"GuessMorph",
"PickOneMorph",
"Repair",
"ComputeHead",
"ComputeVibhakti",
"WX2UTF"
);
my $langpair_obj = new_translator ILMT::Translator("KAN", "HIN", \@seq);
package ILMT::Translator;
use strict;
use warnings;
use Dir::Self;
use Data::Dumper;
use Exporter qw(import);
use Module::Pluggable::Object;
use Module::Runtime qw(use_module);
our @EXPORT_OK = qw(get_translator get_langpairs);
my %translator_table;
sub new_translator {
my $class = shift;
my $self = {
src => shift,
tgt => shift,
};
my $search_path = "ILMT::$self->{src}::$self->{tgt}";
@{$self->{plugins}} = map use_module($_),
grep /^${search_path}::[^:]+$/,
Module::Pluggable::Object->new(search_path => $search_path)->plugins;
$self->{seq} = shift;
bless $self, $class;
# Register this module as a translator service
$translator_table{$self->{src}}{$self->{tgt}} = $self;
return $self;
}
sub get_translator {
my ($src, $tgt) = @_;
return $translator_table{$src}{$tgt};
}
sub get_langpairs {
print Dumper(\%translator_table);
return map +(lc $_ => [ map lc, keys %{$translator_table{$_}} ]), keys %translator_table;
}
sub translate {
my ($self, %args) = @_;
my $result = "";
my @identifiers;
my %final_result;
my @dispatch_seq = @{$self->{seq}};
foreach my $index (0 .. $#dispatch_seq) {
my $module = $dispatch_seq[$index ++];
my $identifier = lc("${module}-$index");
push @identifiers, $identifier;
my $package = "ILMT::$self->{src}::$self->{tgt}::$module";
$args{$identifier} = $package->can('process')->(%args);
$args{'data'} = $args{$identifier};
}
@final_result{@identifiers} = @args{@identifiers};
return \%final_result;
}
sub partial_p {
my ($self, $start, $end, %args) = @_;
my $result = "";
my @dispatch_seq = @{$self->{seq}};
my @identifiers;
my %final_result;
foreach my $index ($start .. $end) {
my $module = $dispatch_seq[$index - 1];
my $identifier = lc("${module}-$index");
push @identifiers, $identifier;
#print "module ## $module\n";
my $package = "ILMT::$self->{src}::$self->{tgt}::$module";
$args{$identifier} = $package->can('process')->(%args);
$args{'data'} = $args{$identifier};
#print Dumper(\$args{$identifier});
}
@final_result{@identifiers} = @args{@identifiers};
return \%final_result;
}
1;
package ILMT::KAN::HIN::Chunker;
use strict;
use warnings;
use Dir::Self;
use Data::Dumper;
use IPC::Run qw(run);
my @dispatch_seq = (
"ssf2tnt",
"crf_test",
"bio2ssf",
);
sub process {
my %args = @_;
utf8::encode($args{"data"});
foreach my $submodule (@dispatch_seq) {
$args{'data'} = __PACKAGE__->can($submodule)->(%args);
}
utf8::decode($args{"data"});
return $args{"data"};
}
sub ssf2tnt {
my %par = @_;
my $data = $par{'data'};
my $result = "";
open my $fh, '<', \$data or die $!;
while (my $line = <$fh>)
{
chomp($line);
if($line=~/<\/S/)
{
$result .= "\n";
next;
}
if($line =~ /^\s*$/) # if the line has all space charcters
{
$result .= "\n";
next;
}
$line=~s/[ ]+/___/g;
my ($att1,$att2,$att3,$att4) = split (/[\t]+/, $line);
if($att1 =~ /\<.*/ || $att2 eq "((" || $att2 eq "))") #unwanted lines
{
next;
}
else
{
$result .= "$att2\t$att3\t$att4\n";
}
}
return $result;
}
sub bio2ssf {
my %par = @_;
my $data = $par{'data'};
my $result = "";
open my $fh, '<', \$data or die $!;
my $line = "";
my $startFlag = 1;
my $wno = 1;
my $prevCTag = "";
my $error = "";
my $lno = 0;
my $sno = 1;
my $cno=0;
while($line = <$fh>)
{
$lno ++;
if($line =~ /^\s*$/)
{ # start of a sentence
$result .= "\t))\t\t\n";
$result .= "</Sentence>\n\n";
$startFlag = 1;
$wno = 1;
$prevCTag = "";
$sno ++;
next;
}
if($startFlag == 1)
{
$result .= "<Sentence id=\"$sno\">\n";
}
chomp($line);
my @cols = split(/\s+/,$line);
if($cols[3] =~ /^B-(\w+)/)
{
my $ctag = $1;
if($prevCTag ne "O" && $startFlag == 0)
{
$result .= "\t))\t\t\n";
$wno++;
}
$cno++;
$result .= "$cno\t((\t$ctag\t\n";
$wno=1;
$prevCTag = $ctag;
}
elsif($cols[3] =~ /^O/)
{
if($prevCTag ne "O" && $startFlag == 0)
{
$result .= "\t))\t\t\n";
$wno++;
}
$prevCTag = "O";
}
if($cols[3] =~ /I-(\w+)/ )
{ # check for inconsistencies .. does not form a chunk if there r inconsistencies
my $ctag = $1;
if($ctag ne $prevCTag)
{
$error =$error . "Inconsistency of Chunk tag in I-$ctag at Line no:$lno : There is no B-$ctag to the prev. word\n";
}
}
$cols[2]=~s/___/ /g;
$result .= "$cno.$wno\t$cols[0]\t$cols[1]\t$cols[2]\n";
$wno ++;
$startFlag = 0;
}
return $result;
}
sub crf_test {
my %par = @_;
my $data = $par{'data'};
my $result = "";
run ["/usr/local/bin/crf_test", "-m", __DIR__ . "/Chunker/models/chunker_kan_264K.model"], \$data, \$result;
return $result;
}
[submodule "API"]
path = API
url = https://gitlab.com/ilmt/ILMT-TEL-HIN-SSFAPI.git
package ILMT::KAN::HIN::ComputeHead;
use strict;
use warnings;
use Dir::Self;
use Data::Dumper;
use ILMT::KAN::HIN::SSFAPI::feature_filter;
use ILMT::KAN::HIN::SSFAPI::shakti_tree_api;
use ILMT::KAN::HIN::ComputeHead::make_chunk_name;
use ILMT::KAN::HIN::ComputeHead::copy_np_head;
use ILMT::KAN::HIN::ComputeHead::copy_vg_head;
sub process {
my %args = @_;
my $input = $args{'data'};
read_story(\$input);
my $numBody = get_bodycount();
my $result;
my $body;
for(my($bodyNum)=1;$bodyNum<=$numBody;$bodyNum++)
{
$body = &get_body($bodyNum,$body);
# Count the number of Paragraphs in the story
my($numPara) = &get_paracount($body);
#print STDERR "Paras : $numPara\n";
# Iterate through paragraphs in the story
for(my($i)=1;$i<=$numPara;$i++)
{
my($para);
# Read Paragraph
$para = &get_para($i);
# Count the number of sentences in this paragraph
my($numSent) = &get_sentcount($para);
# print STDERR "\n $i no.of sent $numSent";
#print STDERR "Para Number $i, Num Sentences $numSent\n";
#print $numSent."\n";
# Iterate through sentences in the paragraph
for(my($j)=1;$j<=$numSent;$j++)
{
#print " ... Processing sent $j\n";
# Read the sentence which is in SSF format
my($sent) = &get_sent($para,$j);
#print STDERR "$sent";
# print "check--\n";
# &print_tree($sent);
# Get the nodes of the sentence (words in our case)
#Copy NP head
# &AddID($sent);
&make_chunk_name($sent);
&copy_np_head($sent);
#Copy NP VG head
&copy_vg_head($sent);
}
}
}
open OUTFILE, '>', \$result or die $!;
select(OUTFILE);
printstory();
select(STDOUT);
return $result;
}
1;
package ILMT::KAN::HIN::ComputeHead::copy_np_head;
use Exporter qw(import);
use ILMT::KAN::HIN::ComputeHead::get_head_np;
our @EXPORT = qw(copy_np_head);
# For the details please see get_head.pl
sub copy_np_head
{
my $sent=@_[0];
my $vibh_home=@_[1];
&copy_head_np("NP",$sent);
&copy_head_np("JJP",$sent);
&copy_head_np("CCP",$sent);
&copy_head_np("RBP",$sent);
&copy_head_np("BLK",$sent);
&copy_head_np("NEGP",$sent);
#&print_tree();
} #End of Sub
1;
package ILMT::KAN::HIN::ComputeHead::copy_vg_head;
use Exporter qw(import);
use ILMT::KAN::HIN::SSFAPI::feature_filter;
use ILMT::KAN::HIN::SSFAPI::shakti_tree_api;
use ILMT::KAN::HIN::ComputeHead::get_head_vg;
our @EXPORT = qw(copy_vg_head);
#for details please check get_head.pl
sub copy_vg_head
{
my $sent=@_[0];
&copy_head_vg("VGF",$sent);
&copy_head_vg("VGNF",$sent);
&copy_head_vg("VGINF",$sent);
&copy_head_vg("VGNN",$sent);
&copy_head_vg("NULL__VGNN",$sent);
&copy_head_vg("NULL__VGF",$sent);
&copy_head_vg("NULL__VGNF",$sent);
}
1;
package ILMT::KAN::HIN::ComputeHead::get_head_np;
use Exporter qw(import);
use ILMT::KAN::HIN::SSFAPI::feature_filter;
use ILMT::KAN::HIN::SSFAPI::shakti_tree_api;
our @EXPORT = qw(copy_head_np);
sub copy_head_np
{
my ($pos_tag)=$_[0]; #array which contains all the POS tags
my ($sent)=$_[1]; #array in which each line of input is stored
my %hash=();
if($pos_tag =~ /^NP/)
{
$match = "NN"; #Modified in version 1.4
#For NST
}
if($pos_tag =~ /^V/ )
{
$match = "V";
}
if($pos_tag =~ /^JJP/ )
{
$match = "J";
}
if($pos_tag =~ /^CCP/ )
{
$match = "CC";
}
if($pos_tag =~ /^RBP/ )
{
$match = "RB";
}
my @np_nodes = &get_nodes(3,$pos_tag,$sent);#gives the nodes at which each pos_tag tag matches(index of chunk start)
for($i=$#np_nodes;$i>=0;$i--)
{
my (@childs)=&get_children($np_nodes[$i],$sent);#gives the nodes(index) at which childs(words in a chunk) are found
$j = $#childs;
while($j >= 0)
{
#$f1=node id in decreasing order
#$f2=tokens(words) in dec order
#$f3=word tags
#$f4=feature structure
# print "$childs[$j]"."\n"; "--"."@sent"."\n";
my($f0,$f1,$f2,$f3,$f4)=&get_fields($childs[$j],$sent);
$word=$f2;
# print "--".$f4,"---\n";
$f4=~s/\//&sl/;
my ($x,$f4)=split(/</,$f4);
my ($f4,$x)=split(/>/,$f4);
$f4=~s/</&angO/;
$f4=~s/>/&angC/;
$f4="<".$f4.">";
# print "3 start head>>".$f4."<<\n";
my $fs_ref = &read_FS($f4);
# print "3 end head\n";
my @name_val = &get_values("name", $fs_ref);
#print "$word"."\n";
if($f3 eq "PRP") ##to make sure that the pronouns are identified correctly
{
$f3 = "NN";
}
if($f3 eq "WQ") ##to make sure that the pronouns are identified correctly
{
$f3 = "NN";
}
if($f3=~/^$match/)
{
if($hash{$f2} eq "")
{
$hash{$word}=1;
}
elsif($hash{$f2} ne "")
{
$hash{$word}=$hash{$word}+1;
}
$id=$hash{$word};
my ($x,$y)=split(/>/,$f4);
$x =~ s/ name=[^ >]+//;
if($id==1)
{
$att_val="$word";
}
elsif($id!=1)
{
$att_val="$word"."_"."$id";
}
#$new_fs = $x." head=\"$name_val[0]\">";
$new_fs = $x." head=$name_val[0]>";
#my $new_head_fs=$x." name=\"$att_val\">";
#&modify_field($childs[$j],4,$new_head_fs,$sent);
last;
}
elsif($j == 0)
{
my($f0,$f1,$f2,$f3,$f4)=&get_fields($childs[$#childs],$sent);
#-----------------modifications to handle PRP and PSP case------------------
$change=$#childs;
$f4=~s/\//&sl/;
my ($x,$f4)=split(/</,$f4);
my ($f4,$x)=split(/>/,$f4);
$f4=~s/</&angO/;
$f4=~s/>/&angC/;
$f4="<".$f4.">";
while(1)
{
if($f3 eq "PSP" or $f3 eq "PRP")
{
$change=$change-1;
if($childs[$change] eq "") ##Modifications per Version 1.3
{ ##To handle NP chunks with single PSP
$change=$change+1; ##
last; ##
}
($f0,$f1,$f2,$f3,$f4)=&get_fields($childs[$change],$sent);
}
else
{
last;
}
}
$new_fs = $f4;
$word=$f2;
my $fs_ref = &read_FS($f4);
my @name_val = &get_values("name", $fs_ref);
if($hash{$f2} eq "")
{
$hash{$word}=1;
}
elsif($hash{$f2} ne "")
{
$hash{$word}=$hash{$word}+1;
}
$id=$hash{$word};
#--------------------------------------------------------------------------------
my ($x,$y)=split(/>/,$f4);
$x =~ s/ name=[^ >]+//;
if($id==1)
{
$att_val="$word";
}
elsif($id!=1)
{
$att_val="$word"."_"."$id";
}
#$new_fs = $x." head=\"$name_val[0]\">";
$new_fs = $x." head=$name_val[0]>";
#my $new_head_fs=$x." name=\"$att_val\">";
#&modify_field($childs[$change],4,$new_head_fs,$sent);
}
$j--;
}
($f0,$f1,$f2,$f3,$f4) = &get_fields($np_nodes[$i],$sent);
if($f4 eq '')
{
##print "1check ---$new_fs\n";
&modify_field($np_nodes[$i],4,$new_fs,$sent);
($f0,$f1,$f2,$f3,$f4) = &get_fields($np_nodes[$i],$sent);
$fs_ptr = &read_FS($f4,$sent);
#print "---x--$x\n";
#&add_attr_val("name",$head_att_val,$fs_ptr,$sent);
($f0,$f1,$f2,$f3,$f4) = &get_fields($np_nodes[$i],$sent);
#print "2check ---$f4\n";
}
else
{
$fs_ptr = &read_FS($f4,$sent);
$new_fs_ptr = &read_FS($new_fs,$sent);
&merge($fs_ptr,$new_fs_ptr,$sent);
$fs_string = &make_string($fs_ptr);
&modify_field($np_nodes[$i],4,$fs_string,$sent);
($f0,$f1,$f2,$f3,$f4) = &get_fields($np_nodes[$i],$sent);
$fs_ptr = &read_FS($f4,$sent);
#&add_attr_val("name",$head_att_val,$fs_ptr,$sent);
#&modify_field($np_nodes[$i], 4, $head_att_val,$sent);
}
}
#print "hiii--\n"
#&print_tree();
#print "hiii\n";
}
1;
package ILMT::KAN::HIN::ComputeHead::get_head_vg;
use Exporter qw(import);
use ILMT::KAN::HIN::SSFAPI::feature_filter;
use ILMT::KAN::HIN::SSFAPI::shakti_tree_api;
our @EXPORT = qw(copy_head_vg);
sub copy_head_vg
{
my($pos_tag) = $_[0]; #array which contains all the POS tags
my($sent) = $_[1]; #array in which each line of input is stored
my %hash=();
if($pos_tag =~ /^NP/)
{
$match = "N";
}
if($pos_tag =~ /^V/ )
{
$match = "V";
}
if($pos_tag =~ /^JJP/ )
{
$match = "J";
}
if($pos_tag =~ /^CCP/ )
{
$match = "CC";
}
if($pos_tag =~ /^RBP/ )
{
$match = "RB";
}
@np_nodes = &get_nodes(3,$pos_tag,$sent);
for($i=$#np_nodes; $i>=0; $i--)
{
my(@childs) = &get_children($np_nodes[$i],$sent);
$j = 0;
while($j <= $#childs)
{
#$f1=node id in decreasing order
#$f2=tokens(words) in dec order
#$f3=word tags
#$f4=feature structure
my($f0,$f1,$f2,$f3,$f4) = &get_fields($childs[$j],$sent);
$word=$f2;
$f4=~s/\//&sl/;
my ($x,$f4)=split(/</,$f4);
my ($f4,$x)=split(/>/,$f4);
$f4=~s/</&angO/;
$f4=~s/>/&angC/;
$f4="<".$f4.">";
if($f3 =~ /^$match/)
{
$new_fs = $f4;
my $fs_ref = &read_FS($f4); #feature structure is sent to function where all the categories are dealt
my @name_val = &get_values("name", $fs_ref);
if($hash{$f2} eq "")
{
$hash{$word}=1;
}
elsif($hash{$f2} ne "")
{
$hash{$word}=$hash{$word}+1;
}
$id=$hash{$word};
my ($x,$y)=split(/>/,$f4);
$x =~ s/ name=[^ >]+//;
if($id==1)
{
$att_val="$word";
}
elsif($id!=1)
{
$att_val="$word"."_"."$id";
}
#$new_fs = $x." head=\"$name_val[0]\">";
$new_fs = $x." head=$name_val[0]>";
#my $new_head_fs=$x." name=\"$att_val\">";
#&modify_field($childs[$j],4,$new_fs,$sent);
last;
}
elsif($j == 0)
{
my($f0,$f1,$f2,$f3,$f4) = &get_fields($childs[$#childs],$sent);
$word=$f2;
$f4=~s/\//&sl/;
my ($x,$f4)=split(/</,$f4);
my ($f4,$x)=split(/>/,$f4);
$f4=~s/</&angO/;
$f4=~s/>/&angC/;
$f4="<".$f4.">";
my $fs_ref = &read_FS($f4);
my @name_val = &get_values("name", $fs_ref);
if($hash{$f2} eq "")
{
$hash{$word}=1;
}
elsif($hash{$f2} ne "")
{
$hash{$word}=$hash{$word}+1;
}
$id=$hash{$word};
my ($x,$y)=split(/>/,$f4);
$x =~ s/ name=[^ >]+//;
if($id==1)
{
$att_val="$word";
}
elsif($id!=1)
{
$att_val="$word"."_"."$id";
}
#$new_fs = $x." head=\"$name_val[0]\">";
$new_fs = $x." head=$name_val[0]>";
#my $new_head_fs=$x." name=\"$att_val\">";
#&modify_field($childs[$#childs],4,$new_fs,$sent);
}
$j++;
}
($f0,$f1,$f2,$f3,$f4) = &get_fields($np_nodes[$i],$sent);
if($f4 eq '')
{
&modify_field($np_nodes[$i],4,$new_fs,$sent);
}
else
{
$fs_ptr = &read_FS($f4,$sent);
$new_fs_ptr = &read_FS($new_fs,$sent);
&merge($fs_ptr,$new_fs_ptr,$sent);
$fs_string = &make_string($fs_ptr,$sent);
&modify_field($np_nodes[$i],4,$fs_string,$sent);
}
}
}
1;
package ILMT::KAN::HIN::ComputeHead::make_chunk_name;
use Exporter qw(import);
use ILMT::KAN::HIN::SSFAPI::feature_filter;
use ILMT::KAN::HIN::SSFAPI::shakti_tree_api;
our @EXPORT = qw(make_chunk_name);
#use strict;
sub make_chunk_name()
{
my($i, @leaves, $new_fs, @tree, $line, $string, $file, @lines, @string2, $string_ref1, $string1, $string_name);
$input = $_[0];
my %hash_index;
my %hash_chunk;
my @final_tree;
#&read_story($input);
my @tree = &get_children(0, $input);
my $ssf_string = &get_field($tree[0], 3, $input);
if($ssf_string eq "SSF")
{
@final_tree = &get_children(1, $input);
}
else
{
@final_tree = @tree;
}
my $k, $index=0, $count=0, $index_chunk=0;
@tree = &get_children($s,$input);
foreach $i(@final_tree)
{
$string = &get_field($i, 4,$input);
@leaves = &get_children($i,$input);
my $string_fs = &read_FS($string, $input);
foreach $m(@leaves)
{
$string1 = &get_field($m, 4,$input);
$string_fs1 = &read_FS($string1, $input);
$new_fs = &make_string($string_fs1, $input);
&modify_field($m, 4, $new_fs, $input);
}
}
foreach $i(@final_tree)
{
my $count_chunk=0;
$index_chunk++;
$string = &get_field($i, 4, $input);
$string_fs = &read_FS($string, $input);
my @old_value_name = &get_values("name", $string_fs, $input);
#print @old_value_name,"\n";
if($old_value_name[0]=~/\'/ or $old_drel[0]=~/\"/)
{
$old_value_name[0]=~s/\'//g;
$old_value_name[0]=~s/\"//g;
}
my @chunk = &get_field($i, 3, $input);
for ($ite1=1; $ite1<$index_chunk; $ite1++)
{
my $actual_chunk_name = $hash_chunk{$ite1};
my @chunk_name_split = split(/__/, $actual_chunk_name);
if($chunk_name_split[0] eq $chunk[0])
{
$count_chunk++;
}
}
my @chunk1;
if($count_chunk == 0)
{
$hash_chunk{$index_chunk} = "$chunk[0]"."__1";
$chunk1[0] = $chunk[0];
}
else
{
$new_count_chunk = $count_chunk+1;
$chunk1[0] = "$chunk[0]"."$new_count_chunk";
$hash_chunk{$index_chunk} = "$chunk[0]"."__$new_count_chunk";
}
foreach $m_drel(@final_tree)
{
my $string_child = &get_field($m_drel, 4, $input);
my $string_fs_child = &read_FS($string_child, $input);
my @old_drel = &get_values("drel", $string_fs_child, $input);
my @old_dmrel = &get_values("dmrel", $string_fs_child, $input);
my @old_reftype = &get_values("reftype", $string_fs_child, $input);
my @old_coref = &get_values("coref", $string_fs_child, $input);
#my @old_attr = &get_attributes($string_fs_child, $input);
if($old_drel[0]=~/\'/ or $old_drel[0]=~/\"/)
{
$old_drel[0]=~s/\'//g;
$old_drel[0]=~s/\"//g;
}
if($old_dmrel[0]=~/\'/ or $old_dmrel[0]=~/\"/)
{
$old_dmrel[0]=~s/\'//g;
$old_dmrel[0]=~s/\"//g;
}
if($old_reftype[0]=~/\'/ or $old_reftype[0]=~/\"/)
{
$old_reftype[0]=~s/\'//g;
$old_reftype[0]=~s/\"//g;
}
if($old_coref[0]=~/\'/ or $old_coref[0]=~/\"/)
{
$old_coref[0]=~s/\'//g;
$old_coref[0]=~s/\"//g;
}
my @old_drel_name = split(/:/, $old_drel[0]);
my @old_dmrel_name = split(/:/, $old_dmrel[0]);
my @old_reftype_name = split(/:/, $old_reftype[0]);
my @old_coref_name = split(/:/, $old_coref[0]);
if(($old_drel_name[1] eq $old_value_name[0]) && ($old_drel_name[1] ne ""))
{
my @new_drel;
$new_drel[0] = "$old_drel_name[0]:$chunk1[0]";
&del_attr_val("drel", $string_fs_child, $input);
# &add_attr_val("drel", \@new_drel, $string_fs_child, $input);
}
if(($old_dmrel_name[1] eq $old_value_name[0]) && ($old_dmrel_name[1] ne ""))
{
my @new_dmrel;
$new_dmrel[0] = "$old_dmrel_name[0]:$chunk1[0]";
&del_attr_val("dmrel", $string_fs_child, $input);
# &add_attr_val("dmrel", \@new_dmrel, $string_fs_child, $input);
}
if(($old_reftype_name[1] eq $old_value_name[0]) && ($old_reftype_name[1] ne ""))
{
my @new_reftype;
$new_reftype[0] = "$old_reftype_name[0]:$chunk1[0]";
&del_attr_val("reftype", $string_fs_child, $input);
# &add_attr_val("reftype", \@new_reftype, $string_fs_child, $input);
}
if(($old_coref_name[0] eq $old_value_name[0]) && ($old_coref_name[0] ne ""))
{
my @new_coref;
$new_coref[0] = $chunk1[0];
&del_attr_val("coref", $string_fs_child, $input);
# &add_attr_val("coref", \@new_coref, $string_fs_child, $input);
}
# my $name_attribute_chunk = &make_string($string_fs_child, $input);
# &modify_field($m_drel, 4, $name_attribute_chunk, $input);
}
&del_attr_val("name", $string_fs, $input);
# &add_attr_val("name", \@chunk1, $string_fs, $input);
# my $name_fs_chunk = &make_string($string_fs, $input);
# &modify_field($i, 4, $name_fs_chunk, $input);
my $string1 = &get_field($i, 4, $input);
my $attr = &read_FS($string1, $input);
#my @attribute_array = &get_attributes($attr, $input);
#$count=@attribute_array;
#print $count, "\n";
}
foreach $i(@final_tree)
{
$string = &get_field($i, 4, $input);
@leaves = &get_children($i, $input);
foreach $m(@leaves)
{
$count=0;
$index++;
$string2 = &get_field($m, 4, $input);
$string_fs2 = &read_FS($string2, $input);
my @token = &get_field($m, 2, $input);
for ($ite=1; $ite<$index; $ite++)
{
my $actual_name = $hash_index{$ite};
my @name_split = split(/__/, $actual_name);
if($name_split[0] eq $token[0])
{
$count++;
}
}
if($count == 0)
{
my @token1;
$token1[0] = $token[0];
&del_attr_val("name", $string_fs2, $input);
&add_attr_val("name", \@token1, $string_fs2, $input);
my $name_fs = &make_string($string_fs2, $input);
&modify_field($m, 4, $name_fs,$input);
$hash_index{$index} = "$token[0]"."__1";
}
else
{
$new_count = $count+1;
my @new_token = "$token[0]"."$new_count";
&del_attr_val("name", $string_fs2, $input);
&add_attr_val("name", \@new_token, $string_fs2,$input);
my $name_fs = &make_string($string_fs2,$input);
&modify_field($m, 4, $name_fs, $input);
$hash_index{$index} = "$token[0]"."__$new_count";
}
}
}
}
1;
[submodule "API"]
path = API
url = https://gitlab.com/ilmt/ILMT-TEL-HIN-SSFAPI.git
package ILMT::KAN::HIN::ComputeVibhakti;
#use strict;
#use warnings;
use Dir::Self;
use Data::Dumper;
use ILMT::KAN::HIN::SSFAPI::feature_filter;
use ILMT::KAN::HIN::SSFAPI::shakti_tree_api;
use ILMT::KAN::HIN::ComputeVibhakti::ComputeTAM;
sub process {
my %par = @_;
my $input = $par{'data'};
read_story(\$input);
my $keep = $par{'keep'};
my $body;
my $numbody = get_bodycount();
for(my($bodynum)=1;$bodynum<=$numbody;$bodynum++)
{
$body = get_body($bodynum,$body);
# count the number of paragraphs in the story
my($numpara) = get_paracount($body);
#print stderr "paras : $numpara\n";
# iterate through paragraphs in the story
for(my($i)=1;$i<=$numpara;$i++)
{
my($para);
# read paragraph
$para = get_para($i);
# count the number of sentences in this paragraph
my($numsent) = get_sentcount($para);
# iterate through sentences in the paragraph
for(my($j)=1;$j<=$numsent;$j++)
{
# read the sentence which is in ssf format
my($sent) = get_sent($para,$j);
#copy vibhakti info
ComputeVibhakti($sent, $keep);
#compute tam
ComputeTAM($sent, $keep);
}
}
}
open OUTFILE, '>', \$result or die $!;
select(OUTFILE);
printstory();
select(STDOUT);
return $result;
}
#the module prunes multiple feature structure (NN, NNP, PRP at present), it also removes the parsarg node in the NP and adds it to its noun fs.
#$&compute_vibhakti;
sub ComputeVibhakti
{
my $sent=@_[0];
my $vibh_home = @_[1];
#my $delete; #keeps count of all the deleted node, helps in locating node obtained before deletion.
#get all the noun nodes in the tree, the noun will be case marked '1' if a vibhakti is present, else case is '0'
#my @all_leaves = &get_leaves();
#&read(@_[0]);
my @all_children_NP =&get_nodes(3,"NP",$sent); #gets all the NP nodes
my @all_children_RBP =&get_nodes(3,"RBP",$sent); #gets all the RBP nodes
my @all_children = (@all_children_NP , @all_children_RBP); #contains all the NP and RBP nodes
my @all_children = sort { $a <=> $b } @all_children;
foreach $node(@all_children)
{
my @node_leaves=&get_leaves_child($node,$sent); #gets leaf nodes of NP or RBP node
$position="";
$nhead=0;
$f4=&get_field($node,4,$sent); # gets feature structure
my $string_fs = &read_FS($f4, $sent);
#gets head and vibhakti values
my @head_value = &get_values("head", $string_fs, $sent);
my @vibh_value=&get_values("vib", $string_fs, $sent);
$vibh_chunk=$vibh_value[0];
#iterates through each leaf node and gets postag, word, fs
foreach $NP_child(@node_leaves)
{
my $pos = &get_field($NP_child,3,$sent);
my $word = &get_field($NP_child,2,$sent);
my $fs = &get_field($NP_child,4,$sent);
my $str_fs=&read_FS($fs,$sent);
my @name_value=&get_values("name",$str_fs,$sent);
if($pos eq "NN" or $pos eq "NNP" or $pos eq "PRP")
{
$nhead=1;
$flag=0;
$prev_RB=0;
$flag_NN=1
}
if($pos eq "RB")
{
$flag=1;
$prev_RB = 1;
$flag_NN=0
}
if($head_value[0] eq $name_value[0])
{
$num=$NP_child-$node; #gives position of the leaf with respect to the node
# modifies the value of vpos(position) in a chunk
if($position ne "")
{$position=$position."_"."vib$num";}
if($position eq "")
{$position="vib$num";}
}
#Adds the RP vibhakti to vpos
if($pos eq "RP")
{
if($position ne "")
{
$position=$position."_"."RP";
}
else
{
next;
}
}
if($pos eq "PSP" or $pos eq "NST" and $nhead==1)
{
#Adds position of vibhakti in vpos(position) value
if($position ne "")
{
$num=$NP_child-$node;
$position=$position."_".$num;
}
else
{
$position=$NP_child-$node;
}
my $val_fs=&get_field($NP_child, 4,$sent);
$FSreference = &read_FS($val_fs,$sent); #reads feature structure of the leaf
my @cur_vibhakti = &get_values("lex",$FSreference); #fetches the lexical value of vibhakti
my @cur_vib_vib = &get_values("vib",$FSreference);
#adds the lexical value of vibhakti to vibh_chunk
if($vibh_chunk ne "")
{
$vibh_chunk=$vibh_chunk . "_" . $cur_vibhakti[0];
}
else
{
$vibh_chunk="0_".$cur_vibhakti[0];
}
push(@remove,$NP_child);
}
}
if($vibh_chunk)
{
my @vibh_chunk_arr=();
push @vibh_chunk_arr,$vibh_chunk; #pushes the value of vibh_chunk in vibh_chunk_arr
my $head_node=&get_field($node,4,$sent);
my $FSreference1 = &read_FS($head_node,$sent); #gets FS value
&update_attr_val("vib", \@vibh_chunk_arr,$FSreference1,$sent); #updates value of attribute vib
# Modifies the value of fs by adding new attribute vpos that will be in output.
my $string=&make_string($FSreference1,$sent);
my ($x,$y)=split(/>/,$string);
my $new_head_fs=$x." vpos=\"$position\">";
&modify_field($node,4,$new_head_fs,$sent);
undef $head_word;
undef $new_string;
}
}
#Deletes the leaves containing vibhakti.
$delete=0;
foreach (@remove)
{
&delete_node($_-$delete,$sent);
$delete++;
}
delete @remove[0..$#remove];
}
1;
package ILMT::KAN::HIN::ComputeVibhakti::ComputeTAM;
use ILMT::KAN::HIN::SSFAPI::feature_filter;
use ILMT::KAN::HIN::SSFAPI::shakti_tree_api;
use Exporter qw(import);
our @EXPORT = qw(ComputeTAM);
sub ComputeTAM
{
my $sent=@_[0];
my $keep=$_[1];
my @uns_VG_nodes = &get_nodes(3,"VGF",$sent); #get all the VG nodes
my @VGINF_nodes = &get_nodes(3,"VGINF",$sent); #get all the VG nodes
my @VGNF_nodes = &get_nodes(3,"VGNF",$sent); #get all the VG nodes
my @VGNN_nodes = &get_nodes(3,"VGNN",$sent); #get all the VG nodes
#push VGINF,VGNF,VGNN nodes to uns_VG_nodes. Thus we have single list containing all the VG nodes.
foreach (@VGINF_nodes)
{
push(@uns_VG_nodes,$_);
}
foreach (@VGNF_nodes)
{
push(@uns_VG_nodes,$_);
}
foreach (@VGNN_nodes)
{
push(@uns_VG_nodes,$_);
}
my @remove;
my @VG_nodes = sort {$a <=> $b}(@uns_VG_nodes); #sorting list in ascending order
foreach $node (@VG_nodes)
{
my @leaves = &get_leaves_child($node,$sent); #gets all the leaves of the the VG node
my $parent = $node;
my $head = 0;
my $final_tam_aux = "";
my $neg = "";
$fs_array_head = "";
$verb_leaf_present = 0;
my $flag=0;
my @final_tam;
my @_leaf;
my $position="";
$f4=&get_field($node,4,$sent); #gets fs of the node
my $string_fs = &read_FS($f4, $sent);
my @head_value = &get_values("head", $string_fs, $sent);#gets head value of the node
#checks for verb leaf, if present sets verb_leaf_present=1
foreach $leaf (@leaves)
{
$leaf_tag = &get_field($leaf, 3,$sent);#gets postag of leaf
if($leaf_tag =~ /^V/)
{
$verb_leaf_present = 1;
}
}
foreach $leaf (@leaves)
{
$leaf_tag = &get_field($leaf, 3,$sent);#gets postag of leaf
$leaf_lex = &get_field($leaf, 2,$sent);#gets lexical item of leaf
if($leaf_tag =~/^V/ and $head == 0)
{
$head = 1;
$node_head = $leaf;
$fs = &get_field($leaf, 4,$sent);#gets feature structure
$fs_array = &read_FS($fs,$sent);
$fs_array_head = $fs_array;
@tam = &get_values("vib", $fs_array,$sent);
my @name_value= &get_values("name",$fs_array,$sent); #gets value of name attribute
if($head_value[0] eq $name_value[0])
{
$num=$leaf-$node; #gives position of the leaf with respect to the nodei
# modifies the value of vpos(position) in a chunk
if($position ne "")
{$position=$position."_"."tam$num";}
if($position eq "")
{$position="tam$num";}
}
#storing tam values
if($tam[0] ne "")
{
if($final_tam_aux ne "")
{
$final_tam_aux = $final_tam_aux."_".$tam[0];
}
else
{
$final_tam_aux = $tam[0];
}
#store all the tam of all interpretation in $final_tam
}
else
{
if($final_tam_aux ne "")
{
$final_tam_aux = $final_tam_aux."_".0;
}
else
{
$final_tam_aux = 0;
}
}
}
elsif($leaf_tag=~/^VAUX/ or $leaf_tag=~/PSP/ or $leaf_tag=~/NST/) #identifying whether a vibhakti or not.
{
$flag=1;
#modifying the value of vpos(position)
if($position ne "")
{
$num=$leaf-$node;
$position=$position."_".$num;
}
else
{
$position=$leaf-$node;
}
my $word1=&get_field($leaf,2,$sent); #gets the word(lex)
#print "LEAF TAG--$leaf_tag--$word1\n";
push(@remove,$leaf);
$fs = &get_field($leaf, 4,$sent);
$fs_array = &read_FS($fs,$sent);
@tam = &get_values("vib", $fs_array); #gets value of vib attribute
@lex = &get_values("lex", $fs_array); #gets lexical item(root)
push(@_leaf,$leaf);
my $root = $lex[0];
my $tam_t = "";
#line 137 to 162 modifies tam feature of fs.
if($tam[0] ne "" and $tam[0] ne "`" and $tam[0] ne "0" and $tam[0] ne $root)
{
$tam_t = $tam[0];
if($final_tam_aux ne "")
{
$final_tam_aux = $final_tam_aux."_".$root."+".$tam_t;
}
else
{
$final_tam_aux = $root."+".$tam_t;
}
}
else
{
$tam_t = "0";
if($final_tam_aux ne "")
{
$final_tam_aux = $final_tam_aux."_".$root;
}
else
{
$final_tam_aux = $root;
}
}
}
elsif($leaf_tag eq 'NEG' and $verb_leaf_present == 1)
{
=cut
if($position ne "")
{
$num=$leaf-$node;
$position=$position."_"."NEG$num";
}
if($position eq "")
{
$num=$leaf-$node;
$position="NEG$num";
}
$neg = &get_field($leaf, 2,$sent);
push(@remove,$leaf);
$flag=1;
$fs = &get_field($leaf, 4,$sent);
$fs_array = &read_FS($fs,$sent);
@tam = &get_values("vib", $fs_array);
@lex = &get_values("lex", $fs_array);
push(@_leaf,$leaf);
my $root = $lex[0];
my $tam_t = "";
if($tam[0] ne "" and $tam[0] ne "`" and $tam[0] ne "0" and $tam[0] ne $root)
{
$tam_t = $tam[0];
if($final_tam_aux ne "")
{
$final_tam_aux = $final_tam_aux."_".$root."+".$tam_t;
}
else
{
$final_tam_aux = $root."+".$tam_t;
}
}
else
{
$tam_t = "0";
if($final_tam_aux ne "")
{
$final_tam_aux = $final_tam_aux."_".$root;
}
else
{
$final_tam_aux = $root;
}
}
=cut
}
}
$fs_head = &get_field($parent, 4,$sent);
$fs_head_array = &read_FS($fs_head,$sent);
my @num,@gen,@per;
#print "-->",$#_leaf,"\n";
if($#_leaf>0)
{
my $fs1 = &get_field($_leaf[-1], 4,$sent);
my $fs2 = &get_field($_leaf[-2], 4,$sent);
$fs_array1=&read_FS($fs1,$sent);
$fs_array2=&read_FS($fs2,$sent);
@num = &get_values("num", $fs_array1,$sent);
@per = &get_values("per", $fs_array1,$sent);
@gen = &get_values("gen", $fs_array2,$sent);
}
if($#_leaf==0)
{
my $fs1 = &get_field($_leaf[-1], 4,$sent);
my $pos1 = &get_field($_leaf[-1], 3,$sent);
if($pos1 eq "VAUX" or $pos1 eq "PSP")
{
$fs_array1=&read_FS($fs1,$sent);
@num = &get_values("num", $fs_array1,$sent);
@per = &get_values("per", $fs_array1,$sent);
}
}
$tam_new[0] = $final_tam_aux;
&update_attr_val_2("vib", \@tam_new, $fs_head_array->[0],$sent);
#print "@num[0]--@per[0]--@gen[0]\n";
if(@gen[0] ne "")
{
&update_attr_val_2("gen", \@gen, $fs_head_array->[0],$sent);
}
if(@num[0] ne "")
{
&update_attr_val_2("num", \@num, $fs_head_array->[0],$sent);
}
if(@per[0] ne "")
{
&update_attr_val_2("per", \@per, $fs_head_array->[0],$sent);
}
if($verb_leaf_present == 1 and $flag==1)
{
$string_head = &make_string($fs_head_array,$sent);
my ($x,$y)=split(/>/,$string_head);
my $new_head_fs=$x." vpos=\"$position\">";
&modify_field($parent, 4, $new_head_fs,$sent);
}
delete @num[0..$#remove];
delete @per[0..$#remove];
delete @gen[0..$#remove];
}
my @sort_remove=sort{$a <=> $b} @remove;
my $delete=0;
foreach (@sort_remove)
{
&delete_node($_-$delete,$sent);
$delete++;
}
delete @remove[0..$#remove];
delete @sort_remove[0..$#remove];
#print "after vib comp--\n";
#&print_tree();
}
1;
package ILMT::KAN::HIN::GuessMorph;
#use strict;
#use warnings;
use Dir::Self;
use Data::Dumper;
use IPC::Run qw(run);
my $cwd = __DIR__;
use File::Temp qw/ tempfile /;
sub process {
my %args = @_;
utf8::encode($args{"data"});
open INFILE, '<', \$args{"data"} or die $!;
my $result = "";
while(my $line=<INFILE>) {
chomp($line);
my ($id,$token,$pos,$fs)=split(/\t+/,$line);
chomp($fs);
if($pos eq "FRAGP"){
$pos=~s/FRAGP/BLK/g;
}
if($fs=~/\|/)
{
$fs=~s/<fs af=\'|\'>//g;
if(($fs) && ($fs=~/\|/))
{
($fs1,$fs2,$fs3,$fs4)=split(/\|/,$fs);
($root1,$lcat1,$g1,$n1,$p1,$c1,$tam1,$suff1)=split(/,/,$fs1);
($root2,$lcat2,$g2,$n2,$p2,$c2,$tam2,$suff2)=split(/,/,$fs2);
if(($suff1 eq "0_o")&&($suff2 eq "lo"))
{
$fs1 ="" ;
}
}
$fst1= "$id\t$token\t$pos\t<fs af='$fs2'>";
$fst2= "$id\t$token\t$pos\t<fs af='$fs1'>|<fs af='$fs2'>";
if($fs1 eq "") {
$result = $result . "$fst1\n";
}
else {
$result = $result . "$fst2\n";
}
}
else {
$result = $result . "$id\t$token\t$pos\t$fs\n";
}
}
utf8::decode($result);
return $result;
};
1
# Object files
*.o
*.ko
*.obj
*.elf
# Precompiled Headers
*.gch
*.pch
# Libraries
*.lib
*.a
*.la
*.lo
# Shared objects (inc. Windows DLLs)
*.dll
*.so
*.so.*
*.dylib
# Executables
*.exe
*.out
*.app
*.i*86
*.x86_64
*.hex
# Debug files
*.dSYM/
# Files Generated on Compilation
lib/ILMT/TEL/HIN/Morph/analyzer/data/adaM_suff.c
lib/ILMT/TEL/HIN/Morph/analyzer/data/adj_nEna.c
lib/ILMT/TEL/HIN/Morph/analyzer/data/adj_ni.c
lib/ILMT/TEL/HIN/Morph/analyzer/data/adj_pAti.c
lib/ILMT/TEL/HIN/Morph/analyzer/data/adj_xi.c
lib/ILMT/TEL/HIN/Morph/analyzer/data/adv.c
lib/ILMT/TEL/HIN/Morph/analyzer/data/adv_xi.c
lib/ILMT/TEL/HIN/Morph/analyzer/data/ani_suff.c
lib/ILMT/TEL/HIN/Morph/analyzer/data/aux_info.c
lib/ILMT/TEL/HIN/Morph/analyzer/data/avy.c
lib/ILMT/TEL/HIN/Morph/analyzer/data/c_data/derived_const.c
lib/ILMT/TEL/HIN/Morph/analyzer/data/c_data/derived_const.h
lib/ILMT/TEL/HIN/Morph/analyzer/data/c_data/info.h
lib/ILMT/TEL/HIN/Morph/analyzer/data/const.c
lib/ILMT/TEL/HIN/Morph/analyzer/data/const.h
lib/ILMT/TEL/HIN/Morph/analyzer/data/derived_const.h
lib/ILMT/TEL/HIN/Morph/analyzer/data/dict_tel
lib/ILMT/TEL/HIN/Morph/analyzer/data/e_suff.c
lib/ILMT/TEL/HIN/Morph/analyzer/data/feature_value.c
lib/ILMT/TEL/HIN/Morph/analyzer/data/gala.c
lib/ILMT/TEL/HIN/Morph/analyzer/data/ina_suff.c
lib/ILMT/TEL/HIN/Morph/analyzer/data/noun_Ena.c
lib/ILMT/TEL/HIN/Morph/analyzer/data/noun_lAMti.c
lib/ILMT/TEL/HIN/Morph/analyzer/data/noun_nom.c
lib/ILMT/TEL/HIN/Morph/analyzer/data/noun_pron.c
lib/ILMT/TEL/HIN/Morph/analyzer/data/num_gaMta_suff.c
lib/ILMT/TEL/HIN/Morph/analyzer/data/num_suff.c
lib/ILMT/TEL/HIN/Morph/analyzer/data/pdgm_offset_info
lib/ILMT/TEL/HIN/Morph/analyzer/data/shell/feature_value
lib/ILMT/TEL/HIN/Morph/analyzer/data/shell/offset
lib/ILMT/TEL/HIN/Morph/analyzer/data/shell/suff_info
lib/ILMT/TEL/HIN/Morph/analyzer/data/suff
lib/ILMT/TEL/HIN/Morph/analyzer/data/suff_info.c
lib/ILMT/TEL/HIN/Morph/analyzer/data/uword
lib/ILMT/TEL/HIN/Morph/analyzer/data/wanaM_suff.c
lib/ILMT/TEL/HIN/Morph/analyzer/data/wunna_suff.c
lib/ILMT/TEL/HIN/Morph/analyzer/data/yoVkka.c
# Logfile
morph.log
package ILMT::KAN::HIN::Morph;
use strict;
use warnings;
use Dir::Self;
use Data::Dumper;
use IO::Socket::INET;
use IPC::Run qw(run);
my $cwd = __DIR__;
use File::Slurp;
use File::Temp qw/ tempfile /;
use ILMT::KAN::HIN::Morph::remove_zwj;
use ILMT::KAN::HIN::Morph::word_split;
use ILMT::KAN::HIN::Morph::cat_change;
my %daemons = (
"morph" => {
"path" => __DIR__ . "/Morph/analyzer/morph_tel.exe",
"args" => "--pdgmfilepath " . __DIR__ . "/Morph/analyzer/data/ " .
"--uwordpath " . __DIR__ . "/Morph/analyzer/data/dict_tel " .
"--dictfilepath " . __DIR__ . "/Morph/analyzer/data/dict " .
"-ULDWH --tcpserver",
"port" => "61003"
}
);
my @dispatch_seq = (
"remove_sentence_tag",
"remove_zwj",
"word_split",
"morph_analyzer",
"add_sentence_tag"
);
sub add_sentence_tag {
my %par = @_;
my $data = $par{'data'};
open INFILE, '<', \$data or die $!;
my $result = "<Sentence id=\"1\">\n";
while (my $line=<INFILE>) {
$result .= $line;
}
$result .= "</Sentence>\n";
return $result;
}
sub morph_analyzer {
my %args = @_;
my ($fh3, $filename3) = tempfile("rXXXX", DIR => "/tmp", SUFFIX => ".tmp");
binmode($fh3, ":utf8");
print $fh3 $args{data};
close $fh3;
my ($fh, $filename) = tempfile("f1XXXX", DIR => "/tmp", SUFFIX => ".tmp");
close $fh;
qx{$cwd/Morph/analyser/morph.out --logfilepath morph.log --pdgmfilepath $cwd/Morph/analyser/data/ --uwordpath $cwd/Morph/analyser/data/dict_final --dictfilepath $cwd/Morph/analyser/data/dict/ -ULDWH --inputfile $filename3 --outputfile $filename };
my $morphout = read_file($filename);
unlink $filename or die "Couldn't delete temp file! $filename";
unlink $filename3 or die "Couldn't delete temp file! $filename3";
return $morphout;
}
sub process {
my %args = @_;
utf8::encode($args{"data"});
foreach my $submodule (@dispatch_seq) {
#print Dumper($submodule);
$args{'data'} = __PACKAGE__->can($submodule)->(%args);
}
utf8::decode($args{"data"});
return $args{"data"};
}
sub remove_sentence_tag {
my %par = @_;
my $data = $par{'data'};
open my $fh, '<', \$data or die $!;
my $result = "";
while(my $line = <$fh>) {
if($line=~/^</) {
next;
} else {
$result .= $line;
}
}
return $result;
}
/**
* File Name : Avy_srch.c
*/
/** Function : Avy_srch
* Avy_srch(morph)
* This function checks whether the given word is Avy or not.
* If it is true it returns the numeric value.
* it takes morph of char type as argument. whrer morph is the input word
* Return :int , returns a numeric value indicating as a numeral or a special
* character
*/
#include <string.h>
#include <stdio.h>
#include <ctype.h>
#include "defn.h"
#include "morph_logger.h"
extern char *program_name;
extern FILE *log_file;
extern char *log_messg;
#define FUNCTION "Avy_srch()"
extern int sizeof_ind; /* table size */
extern char indword[][AVYWORDSIZE]; /* table start address */
/**
* Prototypes of the functions in this file with a file scope
*/
int AVY_srch(morph)
char morph[Morphsize]; /* word */
{
int loop1, size_avy_struct; /* size of avy */
char *avy_ptr, dummy_avy[AVYWORDSIZE]; /* pointer of avy , dummy avy */
int morph_size; /* word length */
int ISNUMBER; /* isnumber for checking numeric value */
PRINT_LOG(log_file,
" checking whether the given word is Avy or not.\n");
size_avy_struct = sizeof(dummy_avy);
/* to check if given word is nonalphabet */
morph_size = strlen(morph);
ISNUMBER = 1;
/* check if loop is alpha numeric or not */
for (loop1 = 0; loop1 <= morph_size; ++loop1)
if (isalpha(morph[loop1])) { /* if morph is alpha numeric */
ISNUMBER = 0;
break;
}
if (ISNUMBER) /* if ISNUMBER is true */
return (-3); /* numeral */
else {
/*** searching in avy file ***/
/* this function returns the ptr to the location of the key in the table, if it is present else returns a null */
avy_ptr =
(char *) my_bsearch(morph, (char *) indword, sizeof_ind,
size_avy_struct, strcmp);
sprintf(log_messg, "INFO: AVY has identifeid as %s", avy_ptr);
PRINT_LOG(log_file,
" checking whether the given word is Avy or not.\n");
if (avy_ptr != NULL) { /* searched in AVY file */
/* free(avy_ptr); */
return (-1);
} else {
return (-2);
} /* Neither a numeral not an avy */
}
}
1 bAyi unk
1 oVNagixareV unk
1 byAktIriyAgalYu unk
1 XAlYi unk
1 wIvra unk
1 goVlYisuwwaveV unk
1 . unk
#ifndef C_API_V2_H
#define C_API_V2_H
#include "functions.h"
#include "core_functions.h"
#include "ssf_functions.h"
#include "fs_functions.h"
#endif
1 (( NP <fs af='child,n,m,p,3,0,,' head=1>
1.1 children NNS <fs af='child,n,m,p,3,0,,'>
))
2 (( VG <fs af='watch,v,m,s,3,0,,' aspect=PROG head=2>
2.1 are VBP <fs af='be,v,m,s,3,0,,' head=2>
2.2 watching VBG <fs af='watch,v,m,s,3,0,,' aspect=PROG>
))
3 (( PP <fs af='watch,,,,3,0,gvs,reddy' aspect=PRoG head=2>|<fs af='watch,,,,3,0,dfmd,reddy' aspect=PROG head=2>
3.1 in IN <fs af='in,v,m,s,3,0,,' aspect=PROG head=3>
3.2 (( NP <fs af='the,v,m,s,3,0,,' aspect=PROG head=4>
3.2.1 the DT <fs af='house,v,m,s,3,0,,' aspect=PROG head=3>
3.2.2 house NN <fs af='the,v,m,s,3,0,,' aspect=hgfd>
))
))
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment