Tokenizer.pm 1.71 KB
Newer Older
priyank's avatar
priyank committed
1 2 3 4 5
package ILMT::TEL::HIN::Tokenizer;
use strict;
use warnings;
use Dir::Self;
use Data::Dumper;
priyank's avatar
priyank committed
6 7 8 9
use IPC::Run qw(run);
use List::UtilsBy qw(max_by);
use File::Temp qw/ tempfile /;
use File::Slurp qw( slurp );
priyank's avatar
priyank committed
10

priyank's avatar
priyank committed
11
my $cwd = __DIR__;
priyank's avatar
priyank committed
12 13 14 15 16 17 18 19 20 21 22
my %daemons = (
    "tokenizer" => {
        "path" => "ind-tokz",
        "args" => "--l tel --s --daemonize --port",
        "port" => "61001"
    }
);

sub process {
    my %args = @_;
    utf8::encode($args{data});
priyank's avatar
priyank committed
23 24 25 26 27 28 29 30 31 32 33 34
    
    my ($fh2, $filename2) = tempfile("tokenizer_inputXXXX", DIR => "/tmp", SUFFIX => ".tmp");
    print $fh2 $args{"data"};
    close($fh2);

    my $token_out;
    run ["python", "$cwd/tokenize.py", $filename2], ">", \$token_out; 

    unlink $filename2 or die "Couldn't delete temp file! $filename2";

    utf8::decode($token_out);
    return $token_out;
priyank's avatar
priyank committed
35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65
}

sub run_daemons {
    my @daemon_names = @_;
    foreach my $daemon_name (@daemon_names) {
        my %daemon = %{$daemons{$daemon_name}};
        my $cmd = "$daemon{path} $daemon{args} $daemon{port} &";
        my $runfile = __DIR__ . "/run/${daemon_name}_$daemon{port}";
        system("flock -e -w 0.01 $runfile -c '$cmd'") == 0
            or warn "[" . __PACKAGE__ . "]: Port $daemon{port} maybe unavailable! $?\n";
    }
}

sub call_daemon {
    my ($daemon_name, $input) = @_;
    my $port = $daemons{$daemon_name}{port};
    my ($socket, $client_socket);
    $socket = new IO::Socket::INET (
        PeerHost => '127.0.0.1',
        PeerPort => $port,
        Proto => 'tcp',
    ) or die "ERROR in Socket Creation : $!\n";
    $socket->send("$input\n");
    my $result = "";
    while (my $line = $socket->getline) {
        $result .= $line;
    }
    $socket->close();
    return $result;
}

priyank's avatar
priyank committed
66
#run_daemons(("tokenizer"));
priyank's avatar
priyank committed
67 68

1;