Commit d5648ce8 authored by priyank's avatar priyank

pan shallow parser first commit

parents
#!/usr/bin/env python
# -*- coding: utf-8 -*-
'''
Created by
@author: priyank
'''
import json
import requests
from SocketServer import ThreadingMixIn
import threading
import codecs
import re
import cgi
from BaseHTTPServer import HTTPServer, BaseHTTPRequestHandler
from optparse import OptionParser
from urlparse import urlparse, parse_qs
import os
import sys
from argparse import ArgumentParser
#configuring commandline parser and check if the all command line parameters are valid
parser=ArgumentParser()
parser.add_argument('-c', '--serverConfigFile', help='server configuration file (with path)', required=True)
parser.add_argument('-i', '--inputFile', help='inputFile (with path)', required=True)
args = parser.parse_args()
#getting command line config files and check if files exist
serverFile = args.serverConfigFile
inputFile = args.inputFile
#function to get sentences from SSF
def sentenceCollector(inputString):
if "Sentence><Sentence" in inputString:
inputString = inputString.replace('Sentence><Sentence', 'Sentence>\n<Sentence')
inArray = inputString.strip().split("\n")
sentList = []
tempString = ""
for line in inArray:
line = line.rstrip()
if line:
if line.startswith('<Sentence '):
tempString = tempString + line + "\n"
elif line.startswith('</Sentence'):
tempString = tempString + line + "\n"
sentList.append(tempString)
tempString = ""
else:
tempString = tempString + line + "\n"
return sentList
# Function to get output of lats module(wordgenerator)
def wordgenCollector(inputString):
inArray = inputString.strip().split("\n")
#sentList = []
tempString = ""
for line in inArray:
line = line.rstrip()
linearray = line.split("\t")
if line and len(linearray) >=2:
if line.startswith('<Sentence '):
continue
elif line.startswith('</Sentence'):
continue
elif linearray[1] == '((' or linearray[1] == '))':
continue
else:
tempString = tempString + linearray[1] + " "
return tempString
if not os.path.isfile(serverFile):
print " serverFile file", serverFile ,"does not exist."
sys.exit(0);
if not os.path.isfile(inputFile):
print " inputFile file", inputFile ,"does not exist."
sys.exit(0);
server_details = {}
#getting server details
with open(serverFile) as server_file:
server_details = json.load(server_file)
translationURL = server_details['pan']
f = codecs.open(inputFile, "rb", "utf-8")
lines = f.readlines()
f.close()
tokenizerURLArray = translationURL.split("/")
tokenizerURLArray[-2] = '1'
modulesURL = tokenizerURLArray[0] + "/" + tokenizerURLArray[1] + "/" + tokenizerURLArray[2] + "/" + tokenizerURLArray[5] + "/" + tokenizerURLArray[6] + "/modules"
tokenizerURL = "/".join(tokenizerURLArray)
tokenizerURLArray = translationURL.split("/")
tokenizerURLArray[-3] = '2'
translationURL = "/".join(tokenizerURLArray)
myheaders = {"Content-type": "application/x-www-form-urlencoded; charset=UTF-8"}
proxies = {
"http" :None,
"https":None
}
res = requests.post(modulesURL, proxies=proxies, headers=myheaders)
lastModule = ''
secondLastModule = ''
# getting last modules
if res is not None:
modulesList = json.loads(res.text)
lastModule = modulesList[-1]
secondLastModule = modulesList[-2]
else:
print "Null response from server"
sys.exit(0)
response_data = {}
response_data['language'] = 'pan'
response_data['text'] = lines
output= ""
wxoutput = ""
# processing sentence in each line by calling MT
# Processing paras: one line is considered as a para
iii = 0
intermediatearray = []
mystr = ""
for line in lines:
line = line.strip()
if line :
# calling tokenizer on line
dataToSend = {"data":line.strip().encode('utf-8')}
res = requests.post(tokenizerURL, proxies=proxies, headers=myheaders, data=dataToSend)
tokenOut = json.loads(res.text)
sentences = sentenceCollector(tokenOut['tokenizer-1'])
jjj = 0
tempdict = {}
mystr = mystr + "paraid:" + str((iii + 1)) + "\n" + line + "\n"
for sentence in sentences:
dataToSend = {"data":sentence.strip().encode('utf-8').strip()}
res = requests.post(translationURL, proxies=proxies, headers=myheaders, data=dataToSend)
completeOut = json.loads(res.text)
lastmoduleOutput = completeOut[lastModule+"-"+str((modulesList.index(lastModule))+1)]
secondlastmoduleOutput = completeOut[secondLastModule+"-"+str((modulesList.index(secondLastModule))+1)]
finalOutput = lastmoduleOutput
output = output + finalOutput + " \n\n"
wxoutput = wxoutput + secondlastmoduleOutput + " \n\n"
mystr = mystr + "sentid:" + str((jjj + 1)) + "\n" + line + "\n"
mystr = mystr + lastmoduleOutput + "\n"
jjj = jjj + 1
iii = iii + 1
output = output + " \n\n"
wxoutput = wxoutput + " \n\n"
mystr = mystr + "---------------------------------------------------------\n"
print mystr
# ILMT-API
An API for quering ILMT Systems
## For Installation of dependencies related to Sampark refer to dependencies.txt
## For Installation of perl related dependencies run script install.sh with following command:
```
sh install.sh
```
## Steps to install Shallowparser MT modules run:
```
cd ilmt-api-pan-shallowparser/
sh setup.sh
```
## Steps to execute API
```
unset PERL5LIB; . ./setup-env.sh;
perl api.pl prefork -l http://*:8686 -w 1
```
It will make the API in listening form.
## Steps to test API:
a) On browser run following URL:
```
http://<YOUR_IP>:8686/pan/hin/translate?data="ਭਾਰਤੀ ਸੰਸਕ੍ਰਿਤੀ ਵਿੱਚ ਰਾਸ਼ੀਆਂ ਦਾ ਆਪਣਾ ਮਹੱਤਵ ਹੈ। ."&pretty=true
```
b) here replace <YOUR_IP> with your IP address.
c) For testing API with curl run the following:
```
curl --noproxy '*' http://localhost:8686/pan/hin/translate --data data="ਭਾਰਤੀ ਸੰਸਕ੍ਰਿਤੀ ਵਿੱਚ ਰਾਸ਼ੀਆਂ ਦਾ ਆਪਣਾ ਮਹੱਤਵ ਹੈ। ."
```
d) For Shallowparser with UI run following command:
```
http://<YOUR_IP>:8686/
```
e) Now you would see proper translation panels.
## To execute sentences using API:
please update the host IP in server.json from 10.2.63.52 to your IP and run following command:
$python GetShallowParserOutput.py -c server.json -i input.txt
```
## Shallowparser of Punjabi Language
#!/usr/bin/env perl
use Dir::Self;
use strict;
use warnings;
use Data::Dumper;
use Mojolicious::Lite;
use lib __DIR__ . "/lib";
use ILMT::Translator qw(get_translator get_langpairs);
use ILMT::PAN::HIN;
plugin qw(Mojolicious::Plugin::ForkCall);
any '/:src/:tgt/translate' => sub {
my $c = shift->render_later;
$c->inactivity_timeout(3600);
my %args = %{$c->req->params->to_hash};
$args{'src_lang'} = $c->param('src');
$args{'tgt_lang'} = $c->param('tgt');
$args{'data'} = $args{'input'} = $args{'data'} // $args{'input'};
$c->fork_call(
sub {
my (%args) = @_;
my $translator = get_translator(uc($c->param('src')), uc($c->param('tgt')));
return $translator->translate(%args);
},
[%args],
sub {
my ($c, $final_result) = @_;
if (exists $args{"pretty"}) {
my $final_string = join "\n", map { "$_:\n$final_result->{$_}" } keys %$final_result;
$c->render(template => 'pretty', result => $final_string);
} else {
$c->render(json => $final_result);
}
}
);
};
any '/:src/:tgt/:start/:end' => sub {
my $c = shift->render_later;
$c->inactivity_timeout(3600);
my %args = %{$c->req->params->to_hash};
$args{'src_lang'} = $c->param('src');
$args{'tgt_lang'} = $c->param('tgt');
$args{'data'} = $args{'input'} = $args{'data'} // $args{'input'};
$c->fork_call(
sub {
my (%args) = @_;
my $translator = get_translator(uc($c->param('src')), uc($c->param('tgt')));
return $translator->partial_p($c->param('start'), $c->param('end'), %args);
},
[%args],
sub {
my ($c, $final_result) = @_;
if (exists $args{"pretty"}) {
my $final_string = join "\n", map { "$_:\n$final_result->{$_}" } keys %$final_result;
$c->render(template => 'pretty', result => $final_string);
} else {
$c->render(json => $final_result);
}
}
);
};
any '/partialtranslate/new/:src/:tgt/:start/:end/partial' => sub {
print "inside partialtranslate/new ............";
my $c = shift;
my %args = %{$c->req->params->to_hash};
$args{'src_lang'} = $c->param('src');
$args{'tgt_lang'} = $c->param('tgt');
$args{'data'} = $args{'input'} = $args{'data'} // $args{'input'};
my $translator = get_translator(uc($c->param('src')), uc($c->param('tgt')));
my $final_result = $translator->partial_p($c->param('start'), $c->param('end'), %args);
#$c->render(json => $final_result);
if (exists $args{"pretty"}) {
my $final_string = join "\n", map { "$_:\n$final_result->{$_}" } keys %$final_result;
$c->render(template => 'pretty', result => $final_string);
} else {
$c->render(json => $final_result);
}
};
any '/mytranslate/new/api/:src/:tgt/mytranslate' => sub {
print "inside mytranslate ............";
my $c = shift;
my %args = %{$c->req->params->to_hash};
$args{'src_lang'} = $c->param('src');
$args{'tgt_lang'} = $c->param('tgt');
$args{'data'} = $args{'input'} = $args{'data'} // $args{'input'};
my $translator = get_translator(uc($c->param('src')), uc($c->param('tgt')));
my $final_result = $translator->translate(%args);
#$c->render(json => $final_result);
if (exists $args{"pretty"}) {
my $final_string = join "\n", map { "$_:\n$final_result->{$_}" } keys %$final_result;
$c->render(template => 'pretty', result => $final_string);
} else {
$c->render(json => $final_result);
}
};
any '/:src/:tgt/' => sub {
my $c = shift;
my $translator = get_translator(uc($c->param('src')), uc($c->param('tgt')));
$c->render(text => scalar @{$translator->{seq}});
};
any '/:src/:tgt/modules' => sub {
my $c = shift;
my $translator = get_translator(uc($c->param('src')), uc($c->param('tgt')));
my @modules = map { lc($_) } @{$translator->{seq}};
$c->render(json => \@modules);
};
any '/langpairs' => sub {
my $c = shift;
my %langpairs = get_langpairs();
print Dumper(\%langpairs);
$c->render(json => \%langpairs);
};
get '/' => sub {
my $c = shift;
$c->reply->static('index.html');
};
app->start;
__DATA__
@@ pretty.html.ep
<pre><%= $result %></pre>
Dependencies:
+ jdk8
Follow below steps to install jdk:-
a) Download Jdk-8 from oracle website for linux 64 bit tar
b) cp downloaded package to '/usr/local'
c) tar -xvf jdk-<version>-linux-x64.tar.gz
d) vim /etc/profile
e) export JAVA_HOME="/usr/local/jdk<version>"
f) export PATH=$PATH:$JAVA_HOME/bin
g) source /etc/profile
h) java -version
+ CRF++ 0.51+
Follow below steps to install CRF++:-
$ cd CRF++-0.51
$./configure
$make
$make install
check if it is installed properly or not :-
crf_test --version
Note:-
if 'libcrfpp.so.0' not found after CRF installation than use below command
ln -s /usr/local/lib/libcrfpp.so.0 /usr/lib/libcrfpp.so.0
+ gcc
+ gdbm
+ libgdbm-dev
+ glib-2.0
+ libglib2.0-dev
For ubuntu use following command:
sudo apt-get install libgdbm-dev libglib2.0-dev g++
# apache ant installation
- wget https://downloads.apache.org/ant/manual/apache-ant-1.10.6-manual.tar.gz
- sudo cp apache-ant-1.10.6-manual.tar.gz /usr/local/
- cd /usr/local/
- sudo su
- tar -xvf apache-ant-1.10.6-manual.tar.gz
- # exit from root by typing exit
- vim .bashrc
- Add following lines at the bottom of file:
export ANT_HOME=/usr/local/<ANT DIRECTORY
export PATH=${PATH}:${ANT_HOME}/bin
- save the file
- $source .bashrc
ਭਾਰਤੀ ਸੰਸਕ੍ਰਿਤੀ ਵਿੱਚ ਰਾਸ਼ੀਆਂ ਦਾ ਆਪਣਾ ਮਹੱਤਵ ਹੈ।
ਰਾਸ਼ੀਆਂ ਨਾ ਸਿਰਫ਼ ਭਵਿੱਖ ਦੀ ਸੂਚਨਾ ਦਿੰਦੀਆਂ ਹਨ, ਬਲਕਿ ਵਿਅਕਤੀਤਵ ਦਾ ਸ਼ੀਸ਼ਾ ਵੀ ਹੁੰਦੀਆਂ ਹਨ।
ਤਾਂ ਦੇਖੋ ਕੀ ਕਹਿੰਦੀ ਹੈ ਤੁਹਾਡੀ ਗਰਲਫ੍ਰੈਂਡ ਦੀ ਰਾਸ਼ੀ।
ਕਿੰਨਾ ਗੁੱਸਾ ਕਰਦੀ ਹੈ ਤੁਹਾਡੀ ਵੈਲੇਨਟਾਈਨ।
ਜੇਕਰ ਤੁਹਾਡੀ ਗਰਲਫ੍ਰੈਂਡ ਦੀ ਰਾਸ਼ੀ ਇਹ ਹੈ ਤਾਂ ਤੁਸੀਂ ਸੁਪਰ ਲੱਕੀ ਹੋ।
ਇਸ ਰਾਸ਼ੀ ਵਾਲੀਆਂ ਕੁੜੀਆਂ ਮੇਮਣੇ ਦੀ ਤਰ੍ਹਾਂ ਸਿੱਧੀਆਂ-ਸਾਧੀਆਂ ਹੁੰਦੀਆਂ ਹਨ।
ਆਮ ਤੌਰ 'ਤੇ ਇਹਨਾਂ ਨੂੰ ਗੁੱਸਾ ਨਹੀਂ ਆਉਂਦਾ ਹੈ।
ਇਸ ਰਾਸ਼ੀ ਦੀ ਮੁਟਿਆਰਾਂ ਜਿੰਨੀਆਂ ਮਿਹਨਤੀ ਹੁੰਦੀਆਂ ਹਨ, ਓਨੀਆਂ ਹੀ ਗੁੱਸੇ ਵਾਲੀਆਂ ਵੀ ਹੁੰਦੀਆਂ ਹਨ।
ਪਰ ਡੌਂਟ ਵਰੀ, ਇਹਨਾਂ ਨੂੰ ਆਪਣੇ ਗੁੱਸੇ ਤੇ ਕਾਬੂ ਰੱਖਣਾ ਵੀ ਆਉਂਦਾ ਹੈ।
sudo apt-get install curl
curl -L http://cpanmin.us | perl - --sudo App::cpanminus
sudo cpanm Data::Dumper
sudo cpanm Dir::Self
sudo cpanm Mojolicious::Lite
sudo cpanm Module::Runtime
sudo cpanm Module::Pluggable
sudo cpanm Mojolicious::Plugin::ForkCall
sudo cpanm IPC::Run
sudo cpanm Module::Pluggable
sudo apt-get install python-argparse requests
package ILMT::PAN::HIN;
use strict;
use warnings;
use Data::Dumper;
use ILMT::Translator;
my @seq = (
"Tokenizer",
"Morph",
"POSTagger",
"Chunker",
"Prune",
"GuessMorph",
"PickOneMorph",
"ComputeHead",
"ComputeVibhakti",
"WX2UTF"
);
my $langpair_obj = new_translator ILMT::Translator("PAN", "HIN", \@seq);
package ILMT::Translator;
use strict;
use warnings;
use Dir::Self;
use Data::Dumper;
use Exporter qw(import);
use Module::Pluggable::Object;
use Module::Runtime qw(use_module);
our @EXPORT_OK = qw(get_translator get_langpairs);
my %translator_table;
sub new_translator {
my $class = shift;
my $self = {
src => shift,
tgt => shift,
};
my $search_path = "ILMT::$self->{src}::$self->{tgt}";
@{$self->{plugins}} = map use_module($_),
grep /^${search_path}::[^:]+$/,
Module::Pluggable::Object->new(search_path => $search_path)->plugins;
$self->{seq} = shift;
bless $self, $class;
# Register this module as a translator service
$translator_table{$self->{src}}{$self->{tgt}} = $self;
return $self;
}
sub get_translator {
my ($src, $tgt) = @_;
return $translator_table{$src}{$tgt};
}
sub get_langpairs {
print Dumper(\%translator_table);
return map +(lc $_ => [ map lc, keys %{$translator_table{$_}} ]), keys %translator_table;
}
sub translate {
my ($self, %args) = @_;
my $result = "";
my @identifiers;
my %final_result;
my @dispatch_seq = @{$self->{seq}};
foreach my $index (0 .. $#dispatch_seq) {
my $module = $dispatch_seq[$index ++];
my $identifier = lc("${module}-$index");
push @identifiers, $identifier;
my $package = "ILMT::$self->{src}::$self->{tgt}::$module";
$args{$identifier} = $package->can('process')->(%args);
$args{'data'} = $args{$identifier};
}
@final_result{@identifiers} = @args{@identifiers};
return \%final_result;
}
sub partial_p {
my ($self, $start, $end, %args) = @_;
my $result = "";
my @dispatch_seq = @{$self->{seq}};
my @identifiers;
my %final_result;
foreach my $index ($start .. $end) {
my $module = $dispatch_seq[$index - 1];
my $identifier = lc("${module}-$index");
push @identifiers, $identifier;
print "module ## $module\n";
my $package = "ILMT::$self->{src}::$self->{tgt}::$module";
$args{$identifier} = $package->can('process')->(%args);
$args{'data'} = $args{$identifier};
}
@final_result{@identifiers} = @args{@identifiers};
return \%final_result;
}
1;
use strict;
use warnings;
use Data::Dumper;
use Graph::Directed;
use JSON;
use List::Util qw(reduce);
use Mojolicious::Lite;
use Mojo::Redis2;
use lib "./lib";
use ILMT::PAN::HIN::Chunker;
app->config(hypnotoad => {listen => ['http://*:80']});
my $modulename = "ilmt.pan.hin.chunker";
my %database = ();
helper redis => sub {
state $r = Mojo::Redis2->new(url => "redis://redis:6379");
};
sub process {
my $hash = $_[0];
my %newhash;
if (keys %{$hash} == 1) {
%newhash = (data => (%{$hash})[1]);
} else {
@newhash{ map { s/_[^_]*$//r } keys %{$hash} } = values %{$hash};
}
return ILMT::PAN::HIN::Chunker::process(%newhash);
}
sub genError {
my $c = shift;
my $error = shift;
$c->render(json => to_json({Error => $error}), status => 400);
}
sub genDAGGraph {
my %edges = %{$_[0]};
my $g = Graph::Directed->new();
foreach my $from (keys %edges) {
foreach my $to (@{$edges{$from}}) {
$g->add_edge($from, $to);
}
}
return $g;
}
post '/pipeline' => sub {
my $c = shift;
my $ilmt_json = decode_json($c->req->body);
my $ilmt_modid = $ilmt_json->{modid} || genError($c, "No ModuleID Specified!") && return;
my $ilmt_jobid = $ilmt_json->{jobid} || genError($c, "No JobID Specified!") && return;
my $ilmt_data = $ilmt_json->{data} || genError($c, "No Data Specified!") && return;
my $ilmt_dag = genDAGGraph($ilmt_json->{edges});
genError($c, "Edges not specified!") && return if (!$ilmt_dag);
my $ilmt_module = $modulename . '_' . $ilmt_modid;
my @ilmt_inputs = map {@$_[0]} $ilmt_dag->edges_to($ilmt_module);
if (!$database{$ilmt_jobid}) {
$database{$ilmt_jobid} = {};
$database{"data_$ilmt_jobid"} = {};
}
foreach (@ilmt_inputs) {
my $input_module = $_ =~ s/_[^_]*$//r;
$database{$ilmt_jobid}{$input_module} = $ilmt_data->{$_} if $ilmt_data->{$_};
}
%{$database{"data_$ilmt_jobid"}} = (%{$database{"data_$ilmt_jobid"}}, %{$ilmt_data});
if (@ilmt_inputs == keys %{$database{$ilmt_jobid}}) {
$c->render(json => "{Response: 'Processing...'}", status => 202);
my $ilmt_output = process($database{$ilmt_jobid});
$ilmt_data->{$ilmt_module} = $ilmt_output;
%{$ilmt_data} = (%{$ilmt_data}, %{$database{"data_$ilmt_jobid"}});
my @ilmt_next = map {@$_[1]} $ilmt_dag->edges_from($ilmt_module);
if (@ilmt_next) {
foreach (@ilmt_next) {
my @module_info = split(/_([^_]+)$/, $_);
my $next_module = $module_info[0];
$ilmt_json->{modid} = $module_info[1];
$c->ua->post("http://$next_module/pipeline" => json
=> from_json(encode_json($ilmt_json), {utf8 => 1}) => sub {
my ($ua, $tx) = @_;
my $msg = $tx->error ? $tx->error->{message} : $tx->res->body;
$c->app->log->debug("[$ilmt_jobid]: $msg\n");
});
}
} else {
$c->redis->publish($ilmt_jobid => encode_json($ilmt_json));
}
delete $database{$ilmt_jobid};
} else {
$c->render(json => "{Response: 'Waiting for more inputs...'}", status => 202);
}
};
app->start;
package ILMT::PAN::HIN::Chunker;
use strict;
use warnings;
use Dir::Self;
use Data::Dumper;
use IPC::Run qw(run);
my @dispatch_seq = (
"ssf2tnt",
"crf_test",
"bio2ssf",
);
sub process {
my %args = @_;
utf8::encode($args{"data"});
foreach my $submodule (@dispatch_seq) {
$args{'data'} = __PACKAGE__->can($submodule)->(%args);
}
utf8::decode($args{"data"});
return $args{"data"};
}
sub ssf2tnt {
my %par = @_;
my $data = $par{'data'};
my $result = "";
open my $fh, '<', \$data or die $!;
while (my $line = <$fh>)
{
chomp($line);
if($line=~/<\/S/)
{
$result .= "\n";
next;
}
if($line =~ /^\s*$/) # if the line has all space charcters
{
$result .= "\n";
next;
}
$line=~s/[ ]+/___/g;
my ($att1,$att2,$att3,$att4) = split (/[\t]+/, $line);
if($att1 =~ /\<.*/ || $att2 eq "((" || $att2 eq "))") #unwanted lines
{
next;
}
else
{
$result .= "$att2\t$att3\t$att4\n";
}
}
return $result;
}
sub bio2ssf {
my %par = @_;
my $data = $par{'data'};
my $result = "";
open my $fh, '<', \$data or die $!;
my $line = "";
my $startFlag = 1;
my $wno = 1;
my $prevCTag = "";
my $error = "";
my $lno = 0;
my $sno = 1;
my $cno=0;
while($line = <$fh>)
{
$lno ++;
if($line =~ /^\s*$/)
{ # start of a sentence
$result .= "\t))\t\t\n";
$result .= "</Sentence>\n\n";
$startFlag = 1;
$wno = 1;
$prevCTag = "";
$sno ++;
next;
}
if($startFlag == 1)
{
$result .= "<Sentence id=\"$sno\">\n";
}
chomp($line);
my @cols = split(/\s+/,$line);
if($cols[3] =~ /^B-(\w+)/)
{
my $ctag = $1;
if($prevCTag ne "O" && $startFlag == 0)
{
$result .= "\t))\t\t\n";
$wno++;
}
$cno++;
$result .= "$cno\t((\t$ctag\t\n";
$wno=1;
$prevCTag = $ctag;
}
elsif($cols[3] =~ /^O/)
{
if($prevCTag ne "O" && $startFlag == 0)
{
$result .= "\t))\t\t\n";
$wno++;
}
$prevCTag = "O";
}
if($cols[3] =~ /I-(\w+)/ )
{ # check for inconsistencies .. does not form a chunk if there r inconsistencies
my $ctag = $1;
if($ctag ne $prevCTag)
{
$error =$error . "Inconsistency of Chunk tag in I-$ctag at Line no:$lno : There is no B-$ctag to the prev. word\n";
}
}
$cols[2]=~s/___/ /g;
$result .= "$cno.$wno\t$cols[0]\t$cols[1]\t$cols[2]\n";
$wno ++;
$startFlag = 0;
}
return $result;
}
sub crf_test {
my %par = @_;
my $data = $par{'data'};
my $result = "";
run ["/usr/local/bin/crf_test", "-m", __DIR__ . "/Chunker/models/chunker_pan.model"], \$data, \$result;
return $result;
}
[submodule "API"]
path = API
url = https://gitlab.com/ilmt/ILMT-PAN-HIN-SSFAPI.git
use strict;
use warnings;
use Data::Dumper;
use Graph::Directed;
use JSON;
use List::Util qw(reduce);
use Mojolicious::Lite;
use Mojo::Redis2;
use lib qw(lib API/lib);
use ILMT::PAN::HIN::ComputeHead;
my $modulename = "ilmt.pan.hin.computehead";
my %database = ();
helper redis => sub {
state $r = Mojo::Redis2->new(url => "redis://redis:6379");
};
sub process {
my $hash = $_[0];
my %newhash;
if (keys %{$hash} == 1) {
%newhash = (data => (%{$hash})[1]);
} else {
@newhash{ map { s/_[^_]*$//r } keys %{$hash} } = values %{$hash};
}
return ILMT::PAN::HIN::ComputeHead::process(%newhash);
}
sub genError {
my $c = shift;
my $error = shift;
$c->render(json => to_json({Error => $error}), status => 400);
}
sub genDAGGraph {
my %edges = %{$_[0]};
my $g = Graph::Directed->new();
foreach my $from (keys %edges) {
foreach my $to (@{$edges{$from}}) {
$g->add_edge($from, $to);
}
}
return $g;
}
post '/pipeline' => sub {
my $c = shift;
my $ilmt_json = decode_json($c->req->body);
my $ilmt_modid = $ilmt_json->{modid} || genError($c, "No ModuleID Specified!") && return;
my $ilmt_jobid = $ilmt_json->{jobid} || genError($c, "No JobID Specified!") && return;
my $ilmt_data = $ilmt_json->{data} || genError($c, "No Data Specified!") && return;
my $ilmt_dag = genDAGGraph($ilmt_json->{edges});
genError($c, "Edges not specified!") && return if (!$ilmt_dag);
my $ilmt_module = $modulename . '_' . $ilmt_modid;
my @ilmt_inputs = map {@$_[0]} $ilmt_dag->edges_to($ilmt_module);
if (!$database{$ilmt_jobid}) {
$database{$ilmt_jobid} = {};
$database{"data_$ilmt_jobid"} = {};
}
foreach (@ilmt_inputs) {
my $input_module = $_ =~ s/_[^_]*$//r;
$database{$ilmt_jobid}{$input_module} = $ilmt_data->{$_} if $ilmt_data->{$_};
}
%{$database{"data_$ilmt_jobid"}} = (%{$database{"data_$ilmt_jobid"}}, %{$ilmt_data});
if (@ilmt_inputs == keys %{$database{$ilmt_jobid}}) {
$c->render(json => "{Response: 'Processing...'}", status => 202);
my $ilmt_output = process($database{$ilmt_jobid});
$ilmt_data->{$ilmt_module} = $ilmt_output;
%{$ilmt_data} = (%{$ilmt_data}, %{$database{"data_$ilmt_jobid"}});
my @ilmt_next = map {@$_[1]} $ilmt_dag->edges_from($ilmt_module);
if (@ilmt_next) {
foreach (@ilmt_next) {
my @module_info = split(/_([^_]+)$/, $_);
my $next_module = $module_info[0];
$ilmt_json->{modid} = $module_info[1];
$c->ua->post("http://$next_module/pipeline" => json
=> from_json(encode_json($ilmt_json), {utf8 => 1}) => sub {
my ($ua, $tx) = @_;
my $msg = $tx->error ? $tx->error->{message} : $tx->res->body;
$c->app->log->debug("[$ilmt_jobid]: $msg\n");
});
}
} else {
$c->redis->publish($ilmt_jobid => encode_json($ilmt_json));
}
delete $database{$ilmt_jobid};
} else {
$c->render(json => "{Response: 'Waiting for more inputs...'}", status => 202);
}
};
app->start;
package ILMT::PAN::HIN::ComputeHead;
use strict;
use warnings;
use Dir::Self;
use Data::Dumper;
use ILMT::PAN::HIN::SSFAPI::feature_filter;
use ILMT::PAN::HIN::SSFAPI::shakti_tree_api;
use ILMT::PAN::HIN::ComputeHead::make_chunk_name;
use ILMT::PAN::HIN::ComputeHead::copy_np_head;
use ILMT::PAN::HIN::ComputeHead::copy_vg_head;
sub process {
my %args = @_;
my $input = $args{'data'};
utf8::encode($input);
read_story(\$input);
my $numBody = get_bodycount();
my $result;
my $body;
for(my($bodyNum)=1;$bodyNum<=$numBody;$bodyNum++)
{
$body = &get_body($bodyNum,$body);
# Count the number of Paragraphs in the story
my($numPara) = &get_paracount($body);
#print STDERR "Paras : $numPara\n";
# Iterate through paragraphs in the story
for(my($i)=1;$i<=$numPara;$i++)
{
my($para);
# Read Paragraph
$para = &get_para($i);
# Count the number of sentences in this paragraph
my($numSent) = &get_sentcount($para);
# print STDERR "\n $i no.of sent $numSent";
#print STDERR "Para Number $i, Num Sentences $numSent\n";
#print $numSent."\n";
# Iterate through sentences in the paragraph
for(my($j)=1;$j<=$numSent;$j++)
{
#print " ... Processing sent $j\n";
# Read the sentence which is in SSF format
my($sent) = &get_sent($para,$j);
#print STDERR "$sent";
# print "check--\n";
# &print_tree($sent);
# Get the nodes of the sentence (words in our case)
#Copy NP head
# &AddID($sent);
&make_chunk_name($sent);
&copy_np_head($sent);
#Copy NP VG head
&copy_vg_head($sent);
}
}
}
open OUTFILE, '>', \$result or die $!;
select(OUTFILE);
printstory();
select(STDOUT);
utf8::decode($result);
return $result;
}
1;
package ILMT::PAN::HIN::ComputeHead::copy_np_head;
use Exporter qw(import);
use ILMT::PAN::HIN::ComputeHead::get_head_np;
our @EXPORT = qw(copy_np_head);
# For the details please see get_head.pl
sub copy_np_head
{
my $sent=@_[0];
&copy_head_np("NP",$sent);
&copy_head_np("JJP",$sent);
&copy_head_np("CCP",$sent);
&copy_head_np("RBP",$sent);
&copy_head_np("BLK",$sent);
&copy_head_np("NEGP",$sent);
#&print_tree();
} #End of Sub
1;
package ILMT::PAN::HIN::ComputeHead::copy_vg_head;
use Exporter qw(import);
use ILMT::PAN::HIN::SSFAPI::feature_filter;
use ILMT::PAN::HIN::SSFAPI::shakti_tree_api;
use ILMT::PAN::HIN::ComputeHead::get_head_vg;
our @EXPORT = qw(copy_vg_head);
#for details please check get_head.pl
sub copy_vg_head
{
my $sent=@_[0];
&copy_head_vg("VGF",$sent);
&copy_head_vg("VGNF",$sent);
&copy_head_vg("VGINF",$sent);
&copy_head_vg("VGNN",$sent);
}
1;
package ILMT::PAN::HIN::ComputeHead::get_head_np;
use Exporter qw(import);
use ILMT::PAN::HIN::SSFAPI::feature_filter;
use ILMT::PAN::HIN::SSFAPI::shakti_tree_api;
our @EXPORT = qw(copy_head_np);
sub copy_head_np
{
my ($pos_tag)=$_[0];
my ($sent)=$_[1];
my $vibh_home = $_[2];
my %hash=();
if($pos_tag =~ /^NP/)
{
$match = "NN"; #Modified in version 1.4
#For NST
}
if($pos_tag =~ /^V/ )
{
$match = "V";
}
if($pos_tag =~ /^JJP/ )
{
$match = "J";
}
if($pos_tag =~ /^CCP/ )
{
$match = "CC";
}
if($pos_tag =~ /^RBP/ )
{
$match = "RB";
}
my @np_nodes = &get_nodes(3,$pos_tag,$sent);
for($i=$#np_nodes;$i>=0;$i--)
{
my (@childs)=&get_children($np_nodes[$i],$sent);
$j = $#childs;
while($j >= 0)
{
my($f0,$f1,$f2,$f3,$f4)=&get_fields($childs[$j],$sent);
$word=$f2;
my $fs_ref = &read_FS($f4);
my @name_val = &get_values("name", $fs_ref);
if($f3 eq "PRP") ##to make sure that the pronouns are identified correctly
{
$f3 = "NN";
}
if($f3=~/^$match/)
{
if($hash{$f2} eq "")
{
$hash{$word}=1;
}
elsif($hash{$f2} ne "")
{
$hash{$word}=$hash{$word}+1;
}
$id=$hash{$word};
my ($x,$y)=split(/>/,$f4);
$x =~ s/ name=[^ >]+//;
if($id==1)
{
$att_val="$word";
}
elsif($id!=1)
{
$att_val="$word"."_"."$id";
}
$new_fs = $x." head=\'$name_val[0]\'>";
#$new_fs = $x." head=$name_val[0]>";
#my $new_head_fs=$x." name=\"$att_val\">";
#&modify_field($childs[$j],4,$new_head_fs,$sent);
last;
}
elsif($j == 0)
{
my($f0,$f1,$f2,$f3,$f4)=&get_fields($childs[$#childs],$sent);
#-----------------modifications to handle PRP and PSP case------------------
$change=$#childs;
while(1)
{
if($f3 eq "PSP" or $f3 eq "PRP")
{
$change=$change-1;
if($childs[$change] eq "") ##Modifications per Version 1.3
{ ##To handle NP chunks with single PSP
$change=$change+1; ##
last; ##
}
($f0,$f1,$f2,$f3,$f4)=&get_fields($childs[$change],$sent);
}
else
{
last;
}
}
$new_fs = $f4;
$word=$f2;
my $fs_ref = &read_FS($f4);
my @name_val = &get_values("name", $fs_ref);
if($hash{$f2} eq "")
{
$hash{$word}=1;
}
elsif($hash{$f2} ne "")
{
$hash{$word}=$hash{$word}+1;
}
$id=$hash{$word};
#--------------------------------------------------------------------------------
my ($x,$y)=split(/>/,$f4);
$x =~ s/ name=[^ >]+//;
if($id==1)
{
$att_val="$word";
}
elsif($id!=1)
{
$att_val="$word"."_"."$id";
}
$new_fs = $x." head=\'$name_val[0]\'>";
#$new_fs = $x." head=$name_val[0]>";
#my $new_head_fs=$x." name=\"$att_val\">";
#&modify_field($childs[$change],4,$new_head_fs,$sent);
}
$j--;
}
($f0,$f1,$f2,$f3,$f4) = &get_fields($np_nodes[$i],$sent);
if($f4 eq '')
{
##print "1check ---$new_fs\n";
&modify_field($np_nodes[$i],4,$new_fs,$sent);
($f0,$f1,$f2,$f3,$f4) = &get_fields($np_nodes[$i],$sent);
$fs_ptr = &read_FS($f4,$sent);
#print "---x--$x\n";
#&add_attr_val("name",$head_att_val,$fs_ptr,$sent);
($f0,$f1,$f2,$f3,$f4) = &get_fields($np_nodes[$i],$sent);
#print "2check ---$f4\n";
}
else
{
$fs_ptr = &read_FS($f4,$sent);
$new_fs_ptr = &read_FS($new_fs,$sent);
&merge($fs_ptr,$new_fs_ptr,$sent);
$fs_string = &make_string($fs_ptr);
&modify_field($np_nodes[$i],4,$fs_string,$sent);
($f0,$f1,$f2,$f3,$f4) = &get_fields($np_nodes[$i],$sent);
$fs_ptr = &read_FS($f4,$sent);
#&add_attr_val("name",$head_att_val,$fs_ptr,$sent);
#&modify_field($np_nodes[$i], 4, $head_att_val,$sent);
}
}
#print "hiii--\n"
#&print_tree();
#print "hiii\n";
}
1;
package ILMT::PAN::HIN::ComputeHead::get_head_vg;
use Exporter qw(import);
use ILMT::PAN::HIN::SSFAPI::feature_filter;
use ILMT::PAN::HIN::SSFAPI::shakti_tree_api;
our @EXPORT = qw(copy_head_vg);
#&AddID($ARGV[0]);
sub copy_head_vg
{
my($pos_tag) = $_[0];
my($sent) = $_[1];
my %hash=();
if($pos_tag =~ /^NP/)
{
$match = "N";
}
if($pos_tag =~ /^V/ )
{
$match = "V";
}
if($pos_tag =~ /^JJP/ )
{
$match = "J";
}
if($pos_tag =~ /^CCP/ )
{
$match = "CC";
}
if($pos_tag =~ /^RBP/ )
{
$match = "RB";
}
@np_nodes = &get_nodes(3,$pos_tag,$sent);
for($i=$#np_nodes; $i>=0; $i--)
{
my(@childs) = &get_children($np_nodes[$i],$sent);
$j = 0;
while($j <= $#childs)
{
my($f0,$f1,$f2,$f3,$f4) = &get_fields($childs[$j],$sent);
$word=$f2;
if($f3 =~ /^$match/)
{
$new_fs = $f4;
my $fs_ref = &read_FS($f4);
my @name_val = &get_values("name", $fs_ref);
if($hash{$f2} eq "")
{
$hash{$word}=1;
}
elsif($hash{$f2} ne "")
{
$hash{$word}=$hash{$word}+1;
}
$id=$hash{$word};
my ($x,$y)=split(/>/,$f4);
$x =~ s/ name=[^ >]+//;
if($id==1)
{
$att_val="$word";
}
elsif($id!=1)
{
$att_val="$word"."_"."$id";
}
$new_fs = $x." head=\'$name_val[0]\'>";
#$new_fs = $x." head=$name_val[0]>";
#my $new_head_fs=$x." name=\"$att_val\">";
#&modify_field($childs[$j],4,$new_fs,$sent);
last;
}
elsif($j == 0)
{
my($f0,$f1,$f2,$f3,$f4) = &get_fields($childs[$#childs],$sent);
$word=$f2;
my $fs_ref = &read_FS($f4);
my @name_val = &get_values("name", $fs_ref);
if($hash{$f2} eq "")
{
$hash{$word}=1;
}
elsif($hash{$f2} ne "")
{
$hash{$word}=$hash{$word}+1;
}
$id=$hash{$word};
my ($x,$y)=split(/>/,$f4);
$x =~ s/ name=[^ >]+//;
if($id==1)
{
$att_val="$word";
}
elsif($id!=1)
{
$att_val="$word"."_"."$id";
}
$new_fs = $x." head=\'$name_val[0]\'>";
#$new_fs = $x." head=$name_val[0]>";
#my $new_head_fs=$x." name=\"$att_val\">";
#&modify_field($childs[$#childs],4,$new_fs,$sent);
}
$j++;
}
($f0,$f1,$f2,$f3,$f4) = &get_fields($np_nodes[$i],$sent);
if($f4 eq '')
{
&modify_field($np_nodes[$i],4,$new_fs,$sent);
}
else
{
$fs_ptr = &read_FS($f4,$sent);
$new_fs_ptr = &read_FS($new_fs,$sent);
&merge($fs_ptr,$new_fs_ptr,$sent);
$fs_string = &make_string($fs_ptr,$sent);
&modify_field($np_nodes[$i],4,$fs_string,$sent);
}
}
}
1;
package ILMT::PAN::HIN::ComputeHead::make_chunk_name;
use Exporter qw(import);
use ILMT::PAN::HIN::SSFAPI::feature_filter;
use ILMT::PAN::HIN::SSFAPI::shakti_tree_api;
our @EXPORT = qw(make_chunk_name);
#use strict;
sub make_chunk_name()
{
my($i, @leaves, $new_fs, @tree, $line, $string, $file, @lines, @string2, $string_ref1, $string1, $string_name);
$input = $_[0];
my %hash_index;
my %hash_chunk;
my @final_tree;
#&read_story($input);
my @tree = &get_children(0, $input);
my $ssf_string = &get_field($tree[0], 3, $input);
if($ssf_string eq "SSF")
{
@final_tree = &get_children(1, $input);
}
else
{
@final_tree = @tree;
}
my $k, $index=0, $count=0, $index_chunk=0;
@tree = &get_children($s,$input);
foreach $i(@final_tree)
{
$string = &get_field($i, 4,$input);
@leaves = &get_children($i,$input);
my $string_fs = &read_FS($string, $input);
foreach $m(@leaves)
{
$string1 = &get_field($m, 4,$input);
$string_fs1 = &read_FS($string1, $input);
$new_fs = &make_string($string_fs1, $input);
&modify_field($m, 4, $new_fs, $input);
}
}
foreach $i(@final_tree)
{
my $count_chunk=0;
$index_chunk++;
$string = &get_field($i, 4, $input);
$string_fs = &read_FS($string, $input);
my @old_value_name = &get_values("name", $string_fs, $input);
#print @old_value_name,"\n";
if($old_value_name[0]=~/\'/ or $old_drel[0]=~/\"/)
{
$old_value_name[0]=~s/\'//g;
$old_value_name[0]=~s/\"//g;
}
my @chunk = &get_field($i, 3, $input);
for ($ite1=1; $ite1<$index_chunk; $ite1++)
{
my $actual_chunk_name = $hash_chunk{$ite1};
my @chunk_name_split = split(/__/, $actual_chunk_name);
if($chunk_name_split[0] eq $chunk[0])
{
$count_chunk++;
}
}
my @chunk1;
if($count_chunk == 0)
{
$hash_chunk{$index_chunk} = "$chunk[0]"."__1";
$chunk1[0] = $chunk[0];
}
else
{
$new_count_chunk = $count_chunk+1;
$chunk1[0] = "$chunk[0]"."$new_count_chunk";
$hash_chunk{$index_chunk} = "$chunk[0]"."__$new_count_chunk";
}
foreach $m_drel(@final_tree)
{
my $string_child = &get_field($m_drel, 4, $input);
my $string_fs_child = &read_FS($string_child, $input);
my @old_drel = &get_values("drel", $string_fs_child, $input);
my @old_dmrel = &get_values("dmrel", $string_fs_child, $input);
my @old_reftype = &get_values("reftype", $string_fs_child, $input);
my @old_coref = &get_values("coref", $string_fs_child, $input);
#my @old_attr = &get_attributes($string_fs_child, $input);
if($old_drel[0]=~/\'/ or $old_drel[0]=~/\"/)
{
$old_drel[0]=~s/\'//g;
$old_drel[0]=~s/\"//g;
}
if($old_dmrel[0]=~/\'/ or $old_dmrel[0]=~/\"/)
{
$old_dmrel[0]=~s/\'//g;
$old_dmrel[0]=~s/\"//g;
}
if($old_reftype[0]=~/\'/ or $old_reftype[0]=~/\"/)
{
$old_reftype[0]=~s/\'//g;
$old_reftype[0]=~s/\"//g;
}
if($old_coref[0]=~/\'/ or $old_coref[0]=~/\"/)
{
$old_coref[0]=~s/\'//g;
$old_coref[0]=~s/\"//g;
}
my @old_drel_name = split(/:/, $old_drel[0]);
my @old_dmrel_name = split(/:/, $old_dmrel[0]);
my @old_reftype_name = split(/:/, $old_reftype[0]);
my @old_coref_name = split(/:/, $old_coref[0]);
if(($old_drel_name[1] eq $old_value_name[0]) && ($old_drel_name[1] ne ""))
{
my @new_drel;
$new_drel[0] = "$old_drel_name[0]:$chunk1[0]";
&del_attr_val("drel", $string_fs_child, $input);
# &add_attr_val("drel", \@new_drel, $string_fs_child, $input);
}
if(($old_dmrel_name[1] eq $old_value_name[0]) && ($old_dmrel_name[1] ne ""))
{
my @new_dmrel;
$new_dmrel[0] = "$old_dmrel_name[0]:$chunk1[0]";
&del_attr_val("dmrel", $string_fs_child, $input);
# &add_attr_val("dmrel", \@new_dmrel, $string_fs_child, $input);
}
if(($old_reftype_name[1] eq $old_value_name[0]) && ($old_reftype_name[1] ne ""))
{
my @new_reftype;
$new_reftype[0] = "$old_reftype_name[0]:$chunk1[0]";
&del_attr_val("reftype", $string_fs_child, $input);
# &add_attr_val("reftype", \@new_reftype, $string_fs_child, $input);
}
if(($old_coref_name[0] eq $old_value_name[0]) && ($old_coref_name[0] ne ""))
{
my @new_coref;
$new_coref[0] = $chunk1[0];
&del_attr_val("coref", $string_fs_child, $input);
# &add_attr_val("coref", \@new_coref, $string_fs_child, $input);
}
# my $name_attribute_chunk = &make_string($string_fs_child, $input);
# &modify_field($m_drel, 4, $name_attribute_chunk, $input);
}
&del_attr_val("name", $string_fs, $input);
# &add_attr_val("name", \@chunk1, $string_fs, $input);
# my $name_fs_chunk = &make_string($string_fs, $input);
# &modify_field($i, 4, $name_fs_chunk, $input);
my $string1 = &get_field($i, 4, $input);
my $attr = &read_FS($string1, $input);
#my @attribute_array = &get_attributes($attr, $input);
#$count=@attribute_array;
#print $count, "\n";
}
foreach $i(@final_tree)
{
$string = &get_field($i, 4, $input);
@leaves = &get_children($i, $input);
foreach $m(@leaves)
{
$count=0;
$index++;
$string2 = &get_field($m, 4, $input);
$string_fs2 = &read_FS($string2, $input);
my @token = &get_field($m, 2, $input);
for ($ite=1; $ite<$index; $ite++)
{
my $actual_name = $hash_index{$ite};
my @name_split = split(/__/, $actual_name);
if($name_split[0] eq $token[0])
{
$count++;
}
}
if($count == 0)
{
my @token1;
$token1[0] = $token[0];
&del_attr_val("name", $string_fs2, $input);
&add_attr_val("name", \@token1, $string_fs2, $input);
my $name_fs = &make_string($string_fs2, $input);
&modify_field($m, 4, $name_fs,$input);
$hash_index{$index} = "$token[0]"."__1";
}
else
{
$new_count = $count+1;
my @new_token = "$token[0]"."$new_count";
&del_attr_val("name", $string_fs2, $input);
&add_attr_val("name", \@new_token, $string_fs2,$input);
my $name_fs = &make_string($string_fs2,$input);
&modify_field($m, 4, $name_fs, $input);
$hash_index{$index} = "$token[0]"."__$new_count";
}
}
}
}
1;
[submodule "API"]
path = API
url = https://gitlab.com/ilmt/ILMT-PAN-HIN-SSFAPI.git
use strict;
use warnings;
use Data::Dumper;
use Graph::Directed;
use JSON;
use List::Util qw(reduce);
use Mojolicious::Lite;
use Mojo::Redis2;
use lib qw(lib API/lib);
use ILMT::PAN::HIN::GuessMorph;
my $modulename = "ilmt.pan.hin.guessmorph";
my %database = ();
helper redis => sub {
state $r = Mojo::Redis2->new(url => "redis://redis:6379");
};
sub process {
my $hash = $_[0];
my %newhash;
if (keys %{$hash} == 1) {
%newhash = (data => (%{$hash})[1]);
} else {
@newhash{ map { s/_[^_]*$//r } keys %{$hash} } = values %{$hash};
}
return ILMT::PAN::HIN::GuessMorph::process(%newhash);
}
sub genError {
my $c = shift;
my $error = shift;
$c->render(json => to_json({Error => $error}), status => 400);
}
sub genDAGGraph {
my %edges = %{$_[0]};
my $g = Graph::Directed->new();
foreach my $from (keys %edges) {
foreach my $to (@{$edges{$from}}) {
$g->add_edge($from, $to);
}
}
return $g;
}
post '/pipeline' => sub {
my $c = shift;
my $ilmt_json = decode_json($c->req->body);
my $ilmt_modid = $ilmt_json->{modid} || genError($c, "No ModuleID Specified!") && return;
my $ilmt_jobid = $ilmt_json->{jobid} || genError($c, "No JobID Specified!") && return;
my $ilmt_data = $ilmt_json->{data} || genError($c, "No Data Specified!") && return;
my $ilmt_dag = genDAGGraph($ilmt_json->{edges});
genError($c, "Edges not specified!") && return if (!$ilmt_dag);
my $ilmt_module = $modulename . '_' . $ilmt_modid;
my @ilmt_inputs = map {@$_[0]} $ilmt_dag->edges_to($ilmt_module);
if (!$database{$ilmt_jobid}) {
$database{$ilmt_jobid} = {};
$database{"data_$ilmt_jobid"} = {};
}
foreach (@ilmt_inputs) {
my $input_module = $_ =~ s/_[^_]*$//r;
$database{$ilmt_jobid}{$input_module} = $ilmt_data->{$_} if $ilmt_data->{$_};
}
%{$database{"data_$ilmt_jobid"}} = (%{$database{"data_$ilmt_jobid"}}, %{$ilmt_data});
if (@ilmt_inputs == keys %{$database{$ilmt_jobid}}) {
$c->render(json => "{Response: 'Processing...'}", status => 202);
my $ilmt_output = process($database{$ilmt_jobid});
$ilmt_data->{$ilmt_module} = $ilmt_output;
%{$ilmt_data} = (%{$ilmt_data}, %{$database{"data_$ilmt_jobid"}});
my @ilmt_next = map {@$_[1]} $ilmt_dag->edges_from($ilmt_module);
if (@ilmt_next) {
foreach (@ilmt_next) {
my @module_info = split(/_([^_]+)$/, $_);
my $next_module = $module_info[0];
$ilmt_json->{modid} = $module_info[1];
$c->ua->post("http://$next_module/pipeline" => json
=> from_json(encode_json($ilmt_json), {utf8 => 1}) => sub {
my ($ua, $tx) = @_;
my $msg = $tx->error ? $tx->error->{message} : $tx->res->body;
$c->app->log->debug("[$ilmt_jobid]: $msg\n");
});
}
} else {
$c->redis->publish($ilmt_jobid => encode_json($ilmt_json));
}
delete $database{$ilmt_jobid};
} else {
$c->render(json => "{Response: 'Waiting for more inputs...'}", status => 202);
}
};
app->start;
package ILMT::PAN::HIN::GuessMorph;
use strict;
use warnings;
use Dir::Self;
use Data::Dumper;
use MIME::Base64;
my $cwd = __DIR__;
my %daemons = (
"guessmorph" => {
"path" => "java",
"args" => "-jar $cwd/GuessMorph/GuessMorph.jar -u $cwd/GuessMorph -d",
"port" => "21003"
}
);
sub process {
my %args = @_;
utf8::encode($args{data});
$args{data} = encode_base64($args{data}, "");
my $result = call_daemon("guessmorph", $args{data});
utf8::decode($result);
return $result;
};
sub run_daemons {
my @daemon_names = @_;
foreach my $daemon_name (@daemon_names) {
my %daemon = %{$daemons{$daemon_name}};
my $cmd = "$daemon{path} $daemon{args} $daemon{port} &";
my $runfile = __DIR__ . "/run/${daemon_name}_$daemon{port}";
system("flock -e -w 0.01 $runfile -c '$cmd'") == 0
or warn "[" . __PACKAGE__ . "]: Port $daemon{port} maybe unavailable! $?\n";
}
}
sub call_daemon {
my ($daemon_name, $input) = @_;
my $port = $daemons{$daemon_name}{port};
my ($socket, $client_socket);
$socket = new IO::Socket::INET (
PeerHost => '127.0.0.1',
PeerPort => $port,
Proto => 'tcp',
) or die "ERROR in Socket Creation : $!\n";
$socket->send("$input\n");
my $result = "";
while (my $line = $socket->getline) {
$result .= $line;
}
$socket->close();
return $result;
}
run_daemons(("guessmorph"));
1;
========================
BUILD OUTPUT DESCRIPTION
========================
When you build an Java application project that has a main class, the IDE
automatically copies all of the JAR
files on the projects classpath to your projects dist/lib folder. The IDE
also adds each of the JAR files to the Class-Path element in the application
JAR files manifest file (MANIFEST.MF).
To run the project from the command line, go to the dist folder and
type the following:
java -jar "SSFAPI.jar"
To distribute this project, zip up the dist folder (including the lib folder)
and distribute the ZIP file.
Notes:
* If two JAR files on the project classpath have the same name, only the first
JAR file is copied to the lib folder.
* If the classpath contains a folder of classes or resources, none of the
classpath elements are copied to the dist folder.
* If a library on the projects classpath also has a Class-Path element
specified in the manifest,the content of the Class-Path element has to be on
the projects runtime path.
* To set a main class in a standard Java project, right-click the project node
in the Projects window and choose Properties. Then click Run and enter the
class name in the Main Class field. Alternatively, you can manually type the
class name in the manifest Main-Class element.
#Encoding codes
UTF-8 utf8
UTF-16 utf16
ISO-8859-1 iso-8859-1
ISCII iscii
ISFOCM isfocm
ISFOCB isfocb
Phonetic phonetic
Typewriter-Remington typ-remington
Eenadu eenadu
Vaartha vaartha
Nafees Naskh nafees-naskh
#Feature structure properties
#root [a-zA\-\_]+
root .+
cat (n|v|adj|adv|det)
gend [m|f]
num [s|p]
pers (1|2|3)
case [0-9]+
vibh .*
tam .*
# Mandatory attributes
basicName af
# All of the below have to be single characters
nodeStart <
nodeEnd >
defAttribSeparator ,
attribSeparator /
fsOR |
attribOR |
attribEquate =
Column Names::ColumnName DataType DefaultValue EnumValues
Feature Enum ne ne||drel||name
Value Enum k1 NEP||NED||NEO||NEA||NEB||NET||NEL||NETI||NEN||NEM||NETE||k1||k2||k3||k4||k5||k7s||k7p||vmod||nmod
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment