Commit 9e424ff9 authored by priyank's avatar priyank

hin- shallow parser first commit

parents
#!/usr/bin/env python
# -*- coding: utf-8 -*-
'''
Created by
@author: priyank
'''
import json
import requests
from SocketServer import ThreadingMixIn
import threading
import codecs
import re
import cgi
from BaseHTTPServer import HTTPServer, BaseHTTPRequestHandler
from optparse import OptionParser
from urlparse import urlparse, parse_qs
import os
import sys
from argparse import ArgumentParser
#configuring commandline parser and check if the all command line parameters are valid
parser=ArgumentParser()
parser.add_argument('-c', '--serverConfigFile', help='server configuration file (with path)', required=True)
parser.add_argument('-i', '--inputFile', help='inputFile (with path)', required=True)
args = parser.parse_args()
#getting command line config files and check if files exist
serverFile = args.serverConfigFile
inputFile = args.inputFile
#function to get sentences from SSF
def sentenceCollector(inputString):
if "Sentence><Sentence" in inputString:
inputString = inputString.replace('Sentence><Sentence', 'Sentence>\n<Sentence')
inArray = inputString.strip().split("\n")
sentList = []
tempString = ""
for line in inArray:
line = line.rstrip()
if line:
if line.startswith('<Sentence '):
tempString = tempString + line + "\n"
elif line.startswith('</Sentence'):
tempString = tempString + line + "\n"
sentList.append(tempString)
tempString = ""
else:
tempString = tempString + line + "\n"
return sentList
# Function to get output of lats module(wordgenerator)
def wordgenCollector(inputString):
inArray = inputString.strip().split("\n")
#sentList = []
tempString = ""
for line in inArray:
line = line.rstrip()
linearray = line.split("\t")
if line and len(linearray) >=2:
if line.startswith('<Sentence '):
continue
elif line.startswith('</Sentence'):
continue
elif linearray[1] == '((' or linearray[1] == '))':
continue
else:
tempString = tempString + linearray[1] + " "
return tempString
if not os.path.isfile(serverFile):
print " serverFile file", serverFile ,"does not exist."
sys.exit(0);
if not os.path.isfile(inputFile):
print " inputFile file", inputFile ,"does not exist."
sys.exit(0);
server_details = {}
#getting server details
with open(serverFile) as server_file:
server_details = json.load(server_file)
translationURL = server_details['hin']
f = codecs.open(inputFile, "rb", "utf-8")
lines = f.readlines()
f.close()
tokenizerURLArray = translationURL.split("/")
tokenizerURLArray[-2] = '1'
modulesURL = tokenizerURLArray[0] + "/" + tokenizerURLArray[1] + "/" + tokenizerURLArray[2] + "/" + tokenizerURLArray[5] + "/" + tokenizerURLArray[6] + "/modules"
tokenizerURL = "/".join(tokenizerURLArray)
tokenizerURLArray = translationURL.split("/")
tokenizerURLArray[-3] = '2'
translationURL = "/".join(tokenizerURLArray)
myheaders = {"Content-type": "application/x-www-form-urlencoded; charset=UTF-8"}
proxies = {
"http" :None,
"https":None
}
res = requests.post(modulesURL, proxies=proxies, headers=myheaders)
lastModule = ''
secondLastModule = ''
# getting last modules
if res is not None:
modulesList = json.loads(res.text)
lastModule = modulesList[-1]
secondLastModule = modulesList[-2]
else:
print "Null response from server"
sys.exit(0)
response_data = {}
response_data['language'] = 'hin'
response_data['text'] = lines
output= ""
wxoutput = ""
# processing sentence in each line by calling MT
# Processing paras: one line is considered as a para
iii = 0
intermediatearray = []
mystr = ""
for line in lines:
line = line.strip()
if line :
# calling tokenizer on line
dataToSend = {"data":line.strip().encode('utf-8')}
res = requests.post(tokenizerURL, proxies=proxies, headers=myheaders, data=dataToSend)
tokenOut = json.loads(res.text)
sentences = sentenceCollector(tokenOut['tokenizer-1'])
jjj = 0
tempdict = {}
mystr = mystr + "paraid:" + str((iii + 1)) + "\n" + line + "\n"
for sentence in sentences:
dataToSend = {"data":sentence.strip().encode('utf-8').strip()}
res = requests.post(translationURL, proxies=proxies, headers=myheaders, data=dataToSend)
completeOut = json.loads(res.text)
lastmoduleOutput = completeOut[lastModule+"-"+str((modulesList.index(lastModule))+1)]
secondlastmoduleOutput = completeOut[secondLastModule+"-"+str((modulesList.index(secondLastModule))+1)]
finalOutput = lastmoduleOutput
output = output + finalOutput + " \n\n"
wxoutput = wxoutput + secondlastmoduleOutput + " \n\n"
mystr = mystr + "sentid:" + str((jjj + 1)) + "\n" + line + "\n"
mystr = mystr + lastmoduleOutput + "\n"
jjj = jjj + 1
iii = iii + 1
output = output + " \n\n"
wxoutput = wxoutput + " \n\n"
mystr = mystr + "---------------------------------------------------------\n"
print mystr
# ILMT-API
An API for quering ILMT Systems
## For Installation of dependencies related to Sampark refer to dependencies.txt
## For Installation of perl related dependencies run script install.sh with following command:
```
sh install.sh
```
## Steps to install Shallowparser MT modules run:
```
cd ilmt-api-hin-shallowparser/
sh setup.sh
```
## Steps to execute API
```
unset PERL5LIB; . ./setup-env.sh;
perl api.pl prefork -l http://*:8080 -w 1
```
It will make the API in listening form.
## Steps to test API:
a) On browser run following URL:
```
http://<YOUR_IP>:8080/hin/pan/translate?data="उस वक्त की कुछ उत्कृष्ट चीजों की छाप आज भी इस शहर पर देखी जा सकती है।"&pretty=true
```
b) here replace <YOUR_IP> with your IP address.
c) For testing API with curl run the following:
```
curl --noproxy '*' http://localhost:8080/hin/pan/translate --data data="उस वक्त की कुछ उत्कृष्ट चीजों की छाप आज भी इस शहर पर देखी जा सकती है।"
```
d) For Shallowparser with UI run following command:
```
http://<YOUR_IP>:8080/
```
e) Now you would see proper translation panels.
## To execute sentences using API:
please update the host IP in server.json from 10.2.63.52 to your IP and run following cvommand:
$python GetShallowParserOutput.py -c server.json -i input.txt
## To check which ports are allotted to different services, run following command:
```
find ./ | grep 'run/.*_'
```
#!/usr/bin/env perl
use Dir::Self;
use strict;
use warnings;
use Data::Dumper;
use Mojolicious::Lite;
use lib __DIR__ . "/lib";
use ILMT::Translator qw(get_translator get_langpairs);
use ILMT::HIN::PAN;
plugin qw(Mojolicious::Plugin::ForkCall);
any '/:src/:tgt/translate' => sub {
my $c = shift->render_later;
$c->inactivity_timeout(3600);
my %args = %{$c->req->params->to_hash};
$args{'src_lang'} = $c->param('src');
$args{'tgt_lang'} = $c->param('tgt');
$args{'data'} = $args{'input'} = $args{'data'} // $args{'input'};
$c->fork_call(
sub {
my (%args) = @_;
my $translator = get_translator(uc($c->param('src')), uc($c->param('tgt')));
return $translator->translate(%args);
},
[%args],
sub {
my ($c, $final_result) = @_;
if (exists $args{"pretty"}) {
my $final_string = join "\n", map { "$_:\n$final_result->{$_}" } keys %$final_result;
$c->render(template => 'pretty', result => $final_string);
} else {
$c->render(json => $final_result);
}
}
);
};
any '/:src/:tgt/:start/:end' => sub {
my $c = shift->render_later;
$c->inactivity_timeout(3600);
my %args = %{$c->req->params->to_hash};
$args{'src_lang'} = $c->param('src');
$args{'tgt_lang'} = $c->param('tgt');
$args{'data'} = $args{'input'} = $args{'data'} // $args{'input'};
$c->fork_call(
sub {
my (%args) = @_;
my $translator = get_translator(uc($c->param('src')), uc($c->param('tgt')));
return $translator->partial_p($c->param('start'), $c->param('end'), %args);
},
[%args],
sub {
my ($c, $final_result) = @_;
if (exists $args{"pretty"}) {
my $final_string = join "\n", map { "$_:\n$final_result->{$_}" } keys %$final_result;
$c->render(template => 'pretty', result => $final_string);
} else {
$c->render(json => $final_result);
}
}
);
};
any '/partialtranslate/new/:src/:tgt/:start/:end/partial' => sub {
print "inside partialtranslate/new ............";
my $c = shift;
my %args = %{$c->req->params->to_hash};
$args{'src_lang'} = $c->param('src');
$args{'tgt_lang'} = $c->param('tgt');
$args{'data'} = $args{'input'} = $args{'data'} // $args{'input'};
my $translator = get_translator(uc($c->param('src')), uc($c->param('tgt')));
my $final_result = $translator->partial_p($c->param('start'), $c->param('end'), %args);
#$c->render(json => $final_result);
if (exists $args{"pretty"}) {
my $final_string = join "\n", map { "$_:\n$final_result->{$_}" } keys %$final_result;
$c->render(template => 'pretty', result => $final_string);
} else {
$c->render(json => $final_result);
}
};
any '/mytranslate/new/api/:src/:tgt/mytranslate' => sub {
print "inside mytranslate ............";
my $c = shift;
my %args = %{$c->req->params->to_hash};
$args{'src_lang'} = $c->param('src');
$args{'tgt_lang'} = $c->param('tgt');
$args{'data'} = $args{'input'} = $args{'data'} // $args{'input'};
my $translator = get_translator(uc($c->param('src')), uc($c->param('tgt')));
my $final_result = $translator->translate(%args);
#$c->render(json => $final_result);
if (exists $args{"pretty"}) {
my $final_string = join "\n", map { "$_:\n$final_result->{$_}" } keys %$final_result;
$c->render(template => 'pretty', result => $final_string);
} else {
$c->render(json => $final_result);
}
};
any '/:src/:tgt/' => sub {
my $c = shift;
my $translator = get_translator(uc($c->param('src')), uc($c->param('tgt')));
$c->render(text => scalar @{$translator->{seq}});
};
any '/:src/:tgt/modules' => sub {
my $c = shift;
my $translator = get_translator(uc($c->param('src')), uc($c->param('tgt')));
my @modules = map { lc($_) } @{$translator->{seq}};
$c->render(json => \@modules);
};
any '/langpairs' => sub {
my $c = shift;
my %langpairs = get_langpairs();
print Dumper(\%langpairs);
$c->render(json => \%langpairs);
};
get '/' => sub {
my $c = shift;
$c->reply->static('index.html');
};
app->start;
__DATA__
@@ pretty.html.ep
<pre><%= $result %></pre>
Dependencies:
+ jdk8
Follow below steps to install jdk:-
a) Download Jdk-8 from oracle website for linux 64 bit tar
b) cp downloaded package to '/usr/local'
c) tar -xvf jdk-<version>-linux-x64.tar.gz
d) vim /etc/profile
e) export JAVA_HOME="/usr/local/jdk<version>"
f) export PATH=$PATH:$JAVA_HOME/bin
g) source /etc/profile
h) java -version
+ CRF++ 0.51+
Follow below steps to install CRF++:-
$ cd CRF++-0.51
$./configure
$make
$make install
check if it is installed properly or not :-
crf_test --version
Note:-
if 'libcrfpp.so.0' not found after CRF installation than use below command
ln -s /usr/local/lib/libcrfpp.so.0 /usr/lib/libcrfpp.so.0
+ gcc
+ gdbm
+ libgdbm-dev
+ glib-2.0
+ libglib2.0-dev
For ubuntu use following command:
sudo apt-get install libgdbm-dev libglib2.0-dev g++
# apache ant installation
- wget https://downloads.apache.org/ant/manual/apache-ant-1.10.6-manual.tar.gz
- sudo cp apache-ant-1.10.6-manual.tar.gz /usr/local/
- cd /usr/local/
- sudo su
- tar -xvf apache-ant-1.10.6-manual.tar.gz
- # exit from root by typing exit
- vim .bashrc
- Add following lines at the bottom of file:
export ANT_HOME=/usr/local/<ANT DIRECTORY
export PATH=${PATH}:${ANT_HOME}/bin
- save the file
- $source .bashrc
देश के टूरिजम में राजस्थान एक अहम जगह रखता है। यहां आने वाले इसके हर नजारे पर फिदा हो जाते हैं।
वैसे, कुछ जगहें ऐसी भी हैं, जहां आकर टूरिस्ट उसी के रंग में रंग जाते हैं। खूबसूरत नजारों व लोकेशंस वाली राजस्थान की ऐसी ही एक जगह है उदयपुर।
इसे राजस्थान का सबसे ज्यादा रोमैंटिक शहर मानने के साथ 'पूरब का वेनिस' भी कहा जाता है।
इस शहर की स्थापना मेवाड़ के महाराणा उदय सिंह ने 1599 में की थी।
उस वक्त की कुछ उत्कृष्ट चीजों की छाप आज भी इस शहर पर देखी जा सकती है।
sudo apt-get install curl
curl -L http://cpanmin.us | perl - --sudo App::cpanminus
sudo cpanm Data::Dumper
sudo cpanm Dir::Self
sudo cpanm Mojolicious::Lite
sudo cpanm Module::Runtime
sudo cpanm Module::Pluggable
sudo cpanm Mojolicious::Plugin::ForkCall
sudo cpanm IPC::Run
sudo cpanm Module::Pluggable
sudo apt-get install python-argparse requests
package ILMT::HIN::PAN;
use strict;
use warnings;
use Data::Dumper;
use ILMT::Translator;
my @seq = (
"Tokenizer",
"UTF2WX",
"Morph",
"POSTagger",
"Chunker",
"Prune",
"GuessMorph",
"PickOneMorph",
"ComputeHead",
"ComputeVibhakti",
"WX2UTF"
);
my $langpair_obj = new_translator ILMT::Translator("HIN", "PAN", \@seq);
package ILMT::Translator;
use strict;
use warnings;
use Dir::Self;
use Data::Dumper;
use Exporter qw(import);
use Module::Pluggable::Object;
use Module::Runtime qw(use_module);
our @EXPORT_OK = qw(get_translator get_langpairs);
my %translator_table;
sub new_translator {
my $class = shift;
my $self = {
src => shift,
tgt => shift,
};
my $search_path = "ILMT::$self->{src}::$self->{tgt}";
@{$self->{plugins}} = map use_module($_),
grep /^${search_path}::[^:]+$/,
Module::Pluggable::Object->new(search_path => $search_path)->plugins;
$self->{seq} = shift;
bless $self, $class;
# Register this module as a translator service
$translator_table{$self->{src}}{$self->{tgt}} = $self;
return $self;
}
sub get_translator {
my ($src, $tgt) = @_;
return $translator_table{$src}{$tgt};
}
sub get_langpairs {
print Dumper(\%translator_table);
return map +(lc $_ => [ map lc, keys %{$translator_table{$_}} ]), keys %translator_table;
}
sub translate {
my ($self, %args) = @_;
my $result = "";
my @identifiers;
my %final_result;
my @dispatch_seq = @{$self->{seq}};
foreach my $index (0 .. $#dispatch_seq) {
my $module = $dispatch_seq[$index ++];
my $identifier = lc("${module}-$index");
push @identifiers, $identifier;
my $package = "ILMT::$self->{src}::$self->{tgt}::$module";
$args{$identifier} = $package->can('process')->(%args);
$args{'data'} = $args{$identifier};
}
@final_result{@identifiers} = @args{@identifiers};
return \%final_result;
}
sub partial_p {
my ($self, $start, $end, %args) = @_;
my $result = "";
my @dispatch_seq = @{$self->{seq}};
my @identifiers;
my %final_result;
foreach my $index ($start .. $end) {
my $module = $dispatch_seq[$index - 1];
my $identifier = lc("${module}-$index");
push @identifiers, $identifier;
print "module ## $module\n";
my $package = "ILMT::$self->{src}::$self->{tgt}::$module";
$args{$identifier} = $package->can('process')->(%args);
$args{'data'} = $args{$identifier};
}
@final_result{@identifiers} = @args{@identifiers};
return \%final_result;
}
1;
use strict;
use warnings;
use Data::Dumper;
use Graph::Directed;
use JSON;
use List::Util qw(reduce);
use Mojolicious::Lite;
use Mojo::Redis2;
use lib "./lib";
use ILMT::HIN::PAN::Chunker;
my $modulename = "ilmt.hin.pan.chunker";
my %database = ();
helper redis => sub {
state $r = Mojo::Redis2->new(url => "redis://redis:6379");
};
sub process {
my $hash = $_[0];
my %newhash;
if (keys %{$hash} == 1) {
%newhash = (data => (%{$hash})[1]);
} else {
@newhash{ map { s/_[^_]*$//r } keys %{$hash} } = values %{$hash};
}
return ILMT::HIN::PAN::Chunker::process(%newhash);
}
sub genError {
my $c = shift;
my $error = shift;
$c->render(json => to_json({Error => $error}), status => 400);
}
sub genDAGGraph {
my %edges = %{$_[0]};
my $g = Graph::Directed->new();
foreach my $from (keys %edges) {
foreach my $to (@{$edges{$from}}) {
$g->add_edge($from, $to);
}
}
return $g;
}
post '/pipeline' => sub {
my $c = shift;
my $ilmt_json = decode_json($c->req->body);
my $ilmt_modid = $ilmt_json->{modid} || genError($c, "No ModuleID Specified!") && return;
my $ilmt_jobid = $ilmt_json->{jobid} || genError($c, "No JobID Specified!") && return;
my $ilmt_data = $ilmt_json->{data} || genError($c, "No Data Specified!") && return;
my $ilmt_dag = genDAGGraph($ilmt_json->{edges});
genError($c, "Edges not specified!") && return if (!$ilmt_dag);
my $ilmt_module = $modulename . '_' . $ilmt_modid;
my @ilmt_inputs = map {@$_[0]} $ilmt_dag->edges_to($ilmt_module);
if (!$database{$ilmt_jobid}) {
$database{$ilmt_jobid} = {};
$database{"data_$ilmt_jobid"} = {};
}
foreach (@ilmt_inputs) {
my $input_module = $_ =~ s/_[^_]*$//r;
$database{$ilmt_jobid}{$input_module} = $ilmt_data->{$_} if $ilmt_data->{$_};
}
%{$database{"data_$ilmt_jobid"}} = (%{$database{"data_$ilmt_jobid"}}, %{$ilmt_data});
if (@ilmt_inputs == keys %{$database{$ilmt_jobid}}) {
$c->render(json => "{Response: 'Processing...'}", status => 202);
my $ilmt_output = process($database{$ilmt_jobid});
$ilmt_data->{$ilmt_module} = $ilmt_output;
%{$ilmt_data} = (%{$ilmt_data}, %{$database{"data_$ilmt_jobid"}});
my @ilmt_next = map {@$_[1]} $ilmt_dag->edges_from($ilmt_module);
if (@ilmt_next) {
foreach (@ilmt_next) {
my @module_info = split(/_([^_]+)$/, $_);
my $next_module = $module_info[0];
$ilmt_json->{modid} = $module_info[1];
$c->ua->post("http://$next_module/pipeline" => json
=> from_json(encode_json($ilmt_json), {utf8 => 1}) => sub {
my ($ua, $tx) = @_;
my $msg = $tx->error ? $tx->error->{message} : $tx->res->body;
$c->app->log->debug("[$ilmt_jobid]: $msg\n");
});
}
} else {
$c->redis->publish($ilmt_jobid => encode_json($ilmt_json));
}
delete $database{$ilmt_jobid};
} else {
$c->render(json => "{Response: 'Waiting for more inputs...'}", status => 202);
}
};
app->start;
package ILMT::HIN::PAN::Chunker;
use strict;
use warnings;
use Dir::Self;
use Data::Dumper;
use IPC::Run qw(run);
my @dispatch_seq = (
"ssf2tnt",
"crf_test",
"bio2ssf",
);
sub process {
my %args = @_;
foreach my $submodule (@dispatch_seq) {
$args{'data'} = __PACKAGE__->can($submodule)->(%args);
}
return $args{"data"};
}
sub ssf2tnt {
my %par = @_;
my $data = $par{'data'};
my $result = "";
open my $fh, '<', \$data or die $!;
while (my $line = <$fh>)
{
chomp($line);
if($line=~/<\/S/)
{
$result .= "\n";
next;
}
if($line =~ /^\s*$/) # if the line has all space charcters
{
$result .= "\n";
next;
}
$line=~s/[ ]+/___/g;
my ($att1,$att2,$att3,$att4) = split (/[\t]+/, $line);
if($att1 =~ /\<.*/ || $att2 eq "((" || $att2 eq "))") #unwanted lines
{
next;
}
else
{
$result .= "$att2\t$att3\t$att4\n";
}
}
return $result;
}
sub bio2ssf {
my %par = @_;
my $data = $par{'data'};
my $result = "";
open my $fh, '<', \$data or die $!;
my $line = "";
my $startFlag = 1;
my $wno = 1;
my $prevCTag = "";
my $error = "";
my $lno = 0;
my $sno = 1;
my $cno=0;
while($line = <$fh>)
{
$lno ++;
if($line =~ /^\s*$/)
{ # start of a sentence
$result .= "\t))\t\t\n";
$result .= "</Sentence>\n\n";
$startFlag = 1;
$wno = 1;
$prevCTag = "";
$sno ++;
next;
}
if($startFlag == 1)
{
$result .= "<Sentence id=\"$sno\">\n";
}
chomp($line);
my @cols = split(/\s+/,$line);
if($cols[3] =~ /^B-(\w+)/)
{
my $ctag = $1;
if($prevCTag ne "O" && $startFlag == 0)
{
$result .= "\t))\t\t\n";
$wno++;
}
$cno++;
$result .= "$cno\t((\t$ctag\t\n";
$wno=1;
$prevCTag = $ctag;
}
elsif($cols[3] =~ /^O/)
{
if($prevCTag ne "O" && $startFlag == 0)
{
$result .= "\t))\t\t\n";
$wno++;
}
$prevCTag = "O";
}
if($cols[3] =~ /I-(\w+)/ )
{ # check for inconsistencies .. does not form a chunk if there r inconsistencies
my $ctag = $1;
if($ctag ne $prevCTag)
{
$error =$error . "Inconsistency of Chunk tag in I-$ctag at Line no:$lno : There is no B-$ctag to the prev. word\n";
}
}
$cols[2]=~s/___/ /g;
$result .= "$cno.$wno\t$cols[0]\t$cols[1]\t$cols[2]\n";
$wno ++;
$startFlag = 0;
}
return $result;
}
sub crf_test {
my %par = @_;
my $data = $par{'data'};
my $result = "";
run ["/usr/local/bin/crf_test", "-m", __DIR__ . "/Chunker/models/300k_hin_chunker.model"], \$data, \$result;
return $result;
}
[submodule "API"]
path = API
url = https://gitlab.com/ilmt/ILMT-HIN-PAN-SSFAPI.git
use strict;
use warnings;
use Data::Dumper;
use Graph::Directed;
use JSON;
use List::Util qw(reduce);
use Mojolicious::Lite;
use Mojo::Redis2;
use lib qw(lib API/lib);
use ILMT::HIN::PAN::ComputeHead;
my $modulename = "ilmt.hin.pan.computehead";
my %database = ();
helper redis => sub {
state $r = Mojo::Redis2->new(url => "redis://redis:6379");
};
sub process {
my $hash = $_[0];
my %newhash;
if (keys %{$hash} == 1) {
%newhash = (data => (%{$hash})[1]);
} else {
@newhash{ map { s/_[^_]*$//r } keys %{$hash} } = values %{$hash};
}
return ILMT::HIN::PAN::ComputeHead::process(%newhash);
}
sub genError {
my $c = shift;
my $error = shift;
$c->render(json => to_json({Error => $error}), status => 400);
}
sub genDAGGraph {
my %edges = %{$_[0]};
my $g = Graph::Directed->new();
foreach my $from (keys %edges) {
foreach my $to (@{$edges{$from}}) {
$g->add_edge($from, $to);
}
}
return $g;
}
post '/pipeline' => sub {
my $c = shift;
my $ilmt_json = decode_json($c->req->body);
my $ilmt_modid = $ilmt_json->{modid} || genError($c, "No ModuleID Specified!") && return;
my $ilmt_jobid = $ilmt_json->{jobid} || genError($c, "No JobID Specified!") && return;
my $ilmt_data = $ilmt_json->{data} || genError($c, "No Data Specified!") && return;
my $ilmt_dag = genDAGGraph($ilmt_json->{edges});
genError($c, "Edges not specified!") && return if (!$ilmt_dag);
my $ilmt_module = $modulename . '_' . $ilmt_modid;
my @ilmt_inputs = map {@$_[0]} $ilmt_dag->edges_to($ilmt_module);
if (!$database{$ilmt_jobid}) {
$database{$ilmt_jobid} = {};
$database{"data_$ilmt_jobid"} = {};
}
foreach (@ilmt_inputs) {
my $input_module = $_ =~ s/_[^_]*$//r;
$database{$ilmt_jobid}{$input_module} = $ilmt_data->{$_} if $ilmt_data->{$_};
}
%{$database{"data_$ilmt_jobid"}} = (%{$database{"data_$ilmt_jobid"}}, %{$ilmt_data});
if (@ilmt_inputs == keys %{$database{$ilmt_jobid}}) {
$c->render(json => "{Response: 'Processing...'}", status => 202);
my $ilmt_output = process($database{$ilmt_jobid});
$ilmt_data->{$ilmt_module} = $ilmt_output;
%{$ilmt_data} = (%{$ilmt_data}, %{$database{"data_$ilmt_jobid"}});
my @ilmt_next = map {@$_[1]} $ilmt_dag->edges_from($ilmt_module);
if (@ilmt_next) {
foreach (@ilmt_next) {
my @module_info = split(/_([^_]+)$/, $_);
my $next_module = $module_info[0];
$ilmt_json->{modid} = $module_info[1];
$c->ua->post("http://$next_module/pipeline" => json
=> from_json(encode_json($ilmt_json), {utf8 => 1}) => sub {
my ($ua, $tx) = @_;
my $msg = $tx->error ? $tx->error->{message} : $tx->res->body;
$c->app->log->debug("[$ilmt_jobid]: $msg\n");
});
}
} else {
$c->redis->publish($ilmt_jobid => encode_json($ilmt_json));
}
delete $database{$ilmt_jobid};
} else {
$c->render(json => "{Response: 'Waiting for more inputs...'}", status => 202);
}
};
app->start;
[submodule "API"]
path = API
url = https://gitlab.com/ilmt/ILMT-HIN-PAN-SSFAPI.git
[submodule "API"]
path = API
url = https://gitlab.com/ilmt/ILMT-HIN-PAN-SSFAPI.git
use strict;
use warnings;
use Data::Dumper;
use Graph::Directed;
use JSON;
use List::Util qw(reduce);
use Mojolicious::Lite;
use Mojo::Redis2;
use lib qw(lib API/lib);
use ILMT::HIN::PAN::DefaultFeatures;
my $modulename = "ilmt.hin.pan.defaultfeatures";
my %database = ();
helper redis => sub {
state $r = Mojo::Redis2->new(url => "redis://redis:6379");
};
sub process {
my $hash = $_[0];
my %newhash;
if (keys %{$hash} == 1) {
%newhash = (data => (%{$hash})[1]);
} else {
@newhash{ map { s/_[^_]*$//r } keys %{$hash} } = values %{$hash};
}
return ILMT::HIN::PAN::DefaultFeatures::process(%newhash);
}
sub genError {
my $c = shift;
my $error = shift;
$c->render(json => to_json({Error => $error}), status => 400);
}
sub genDAGGraph {
my %edges = %{$_[0]};
my $g = Graph::Directed->new();
foreach my $from (keys %edges) {
foreach my $to (@{$edges{$from}}) {
$g->add_edge($from, $to);
}
}
return $g;
}
post '/pipeline' => sub {
my $c = shift;
my $ilmt_json = decode_json($c->req->body);
my $ilmt_modid = $ilmt_json->{modid} || genError($c, "No ModuleID Specified!") && return;
my $ilmt_jobid = $ilmt_json->{jobid} || genError($c, "No JobID Specified!") && return;
my $ilmt_data = $ilmt_json->{data} || genError($c, "No Data Specified!") && return;
my $ilmt_dag = genDAGGraph($ilmt_json->{edges});
genError($c, "Edges not specified!") && return if (!$ilmt_dag);
my $ilmt_module = $modulename . '_' . $ilmt_modid;
my @ilmt_inputs = map {@$_[0]} $ilmt_dag->edges_to($ilmt_module);
if (!$database{$ilmt_jobid}) {
$database{$ilmt_jobid} = {};
$database{"data_$ilmt_jobid"} = {};
}
foreach (@ilmt_inputs) {
my $input_module = $_ =~ s/_[^_]*$//r;
$database{$ilmt_jobid}{$input_module} = $ilmt_data->{$_} if $ilmt_data->{$_};
}
%{$database{"data_$ilmt_jobid"}} = (%{$database{"data_$ilmt_jobid"}}, %{$ilmt_data});
if (@ilmt_inputs == keys %{$database{$ilmt_jobid}}) {
$c->render(json => "{Response: 'Processing...'}", status => 202);
my $ilmt_output = process($database{$ilmt_jobid});
$ilmt_data->{$ilmt_module} = $ilmt_output;
%{$ilmt_data} = (%{$ilmt_data}, %{$database{"data_$ilmt_jobid"}});
my @ilmt_next = map {@$_[1]} $ilmt_dag->edges_from($ilmt_module);
if (@ilmt_next) {
foreach (@ilmt_next) {
my @module_info = split(/_([^_]+)$/, $_);
my $next_module = $module_info[0];
$ilmt_json->{modid} = $module_info[1];
$c->ua->post("http://$next_module/pipeline" => json
=> from_json(encode_json($ilmt_json), {utf8 => 1}) => sub {
my ($ua, $tx) = @_;
my $msg = $tx->error ? $tx->error->{message} : $tx->res->body;
$c->app->log->debug("[$ilmt_jobid]: $msg\n");
});
}
} else {
$c->redis->publish($ilmt_jobid => encode_json($ilmt_json));
}
delete $database{$ilmt_jobid};
} else {
$c->render(json => "{Response: 'Waiting for more inputs...'}", status => 202);
}
};
app->start;
package ILMT::HIN::PAN::DefaultFeatures;
#use strict;
#use warnings;
use Dir::Self;
use Data::Dumper;
use ILMT::HIN::PAN::SSFAPI::feature_filter;
use ILMT::HIN::PAN::SSFAPI::shakti_tree_api;
my $ruleFile = __DIR__ . "/DefaultFeatures/rule.rl";
my %rule_hash = ();
sub preprocess {
open(RULE,$ruleFile);
@rules = <RULE>;
%rule_hash = ();
foreach $rule (@rules)
{
if($rule !~ /^\s*$/)
{
chomp($rule);
($fea,$array_in) = split(/\s+/,$rule);
$rule_hash{ $fea } = $array_in;
}
}
}
sub process {
my %par = @_;
my $input = $par{'data'};
utf8::encode($input);
read_story(\$input);
my $result;
my $numBody = &get_bodycount();
my $body;
for(my($bodyNum)=1;$bodyNum<=$numBody;$bodyNum++)
{
$body = &get_body($bodyNum,$body);
my($numPara) = &get_paracount($body);
for(my($i1)=1;$i1<=$numPara;$i1++)
{
my($para);
$para = &get_para($i1);
my($numSent) = &get_sentcount($para);
for(my($j1)=1;$j1<=$numSent;$j1++)
{
my($sent) = &get_sent($para,$j1);
@nodes=&get_leaves($sent);
foreach $node (@nodes)
{
$fsSt = &get_field($node,4,$sent);
$fs_array = &read_FS($fsSt,$sent);
@lex = &get_values("lex", $fs_array,$sent);
@cat = &get_values("cat", $fs_array,$sent);
@tam = &get_values("vib", $fs_array,$sent);
@gen = &get_values("gen", $fs_array,$sent);
@num = &get_values("num", $fs_array,$sent);
@per = &get_values("per", $fs_array,$sent);
if($cat[0] eq "")
{
my @per_chunk_arr=();
push @per_chunk_arr,$rule_hash{"cat"};
my $head_node=&get_field($node,4,$sent);
my $FSreference1 = &read_FS($head_node,$sent);
&update_attr_val("cat", \@per_chunk_arr,$FSreference1,$sent);
my $string=&make_string($FSreference1,$sent);
&modify_field($node,4,$string,$sent);
}
if($tam[0] eq "")
{
my @per_chunk_arr=();
push @per_chunk_arr,$rule_hash{"tam"};
my $head_node=&get_field($node,4,$sent);
my $FSreference1 = &read_FS($head_node,$sent);
&update_attr_val("tam", \@per_chunk_arr,$FSreference1,$sent);
my $string=&make_string($FSreference1,$sent);
&modify_field($node,4,$string,$sent);
}
if($gen[0] eq "")
{
my @per_chunk_arr=();
push @per_chunk_arr,$rule_hash{"gen"};
my $head_node=&get_field($node,4,$sent);
my $FSreference1 = &read_FS($head_node,$sent);
&update_attr_val("gen", \@per_chunk_arr,$FSreference1,$sent);
my $string=&make_string($FSreference1,$sent);
&modify_field($node,4,$string,$sent);
}
if($num[0] eq "")
{
my @per_chunk_arr=();
push @per_chunk_arr,$rule_hash{"num"};
my $head_node=&get_field($node,4,$sent);
my $FSreference1 = &read_FS($head_node,$sent);
&update_attr_val("num", \@per_chunk_arr,$FSreference1,$sent);
my $string=&make_string($FSreference1,$sent);
&modify_field($node,4,$string,$sent);
}
if($per[0] eq "")
{
my @per_chunk_arr=();
push @per_chunk_arr,$rule_hash{"per"};
my $head_node=&get_field($node,4,$sent);
my $FSreference1 = &read_FS($head_node,$sent);
&update_attr_val("per", \@per_chunk_arr,$FSreference1,$sent);
my $string=&make_string($FSreference1,$sent);
&modify_field($node,4,$string,$sent);
}
}
}
}
}
open OUTFILE, '>', \$result or die $!;
select(OUTFILE);
printstory();
select(STDOUT);
utf8::decode($result);
return $result;
}
preprocess();
1;
This diff is collapsed.
This diff is collapsed.
Hindi GUESS MORPH
-----------------------------
*Version 1.1
Some new Rule are implemented
Interchunk guessing is implemented .
*Version 1.0
Basic version implemented
#Encoding codes
UTF-8 utf8
UTF-16 utf16
ISO-8859-1 iso-8859-1
ISCII iscii
ISFOCM isfocm
ISFOCB isfocb
Phonetic phonetic
Typewriter-Remington typ-remington
Eenadu eenadu
Vaartha vaartha
Nafees Naskh nafees-naskh
Column Names::ColumnName DataType DefaultValue EnumValues
Feature Enum ne ne||drel||name
Value Enum k1 NEP||NED||NEO||NEA||NEB||NET||NEL||NETI||NEN||NEM||NETE||k1||k2||k3||k4||k5||k7s||k7p||vmod||nmod
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment