Tokenizer.pm 1.71 KB
Newer Older
priyank's avatar
priyank committed
1 2 3 4 5 6
package ILMT::URD::HIN::Tokenizer;
use strict;
use warnings;
use Dir::Self;
use Data::Dumper;

priyank's avatar
priyank committed
7 8 9 10 11 12 13
use IPC::Run qw(run);
use List::UtilsBy qw(max_by);
use File::Temp qw/ tempfile /;
use File::Slurp qw( slurp );

my $cwd = __DIR__;

priyank's avatar
priyank committed
14 15 16 17 18 19 20 21 22 23 24
my %daemons = (
    "tokenizer" => {
        "path" => "ind-tokz",
        "args" => "--l urd --s --daemonize --port",
        "port" => "31001"
    }
);

sub process {
    my %args = @_;
    utf8::encode($args{data});
priyank's avatar
priyank committed
25 26 27 28 29 30 31 32 33 34 35 36
    
    my ($fh2, $filename2) = tempfile("tokenizer_inputXXXX", DIR => "/tmp", SUFFIX => ".tmp");
    print $fh2 $args{"data"};
    close($fh2);

    my $token_out;
    run ["python", "$cwd/tokenize.py", $filename2], ">", \$token_out; 

    unlink $filename2 or die "Couldn't delete temp file! $filename2";

    utf8::decode($token_out);
    return $token_out;
priyank's avatar
priyank committed
37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67
}

sub run_daemons {
    my @daemon_names = @_;
    foreach my $daemon_name (@daemon_names) {
        my %daemon = %{$daemons{$daemon_name}};
        my $cmd = "$daemon{path} $daemon{args} $daemon{port} &";
        my $runfile = __DIR__ . "/run/${daemon_name}_$daemon{port}";
        system("flock -e -w 0.01 $runfile -c '$cmd'") == 0
            or warn "[" . __PACKAGE__ . "]: Port $daemon{port} maybe unavailable! $?\n";
    }
}

sub call_daemon {
    my ($daemon_name, $input) = @_;
    my $port = $daemons{$daemon_name}{port};
    my ($socket, $client_socket);
    $socket = new IO::Socket::INET (
        PeerHost => '127.0.0.1',
        PeerPort => $port,
        Proto => 'tcp',
    ) or die "ERROR in Socket Creation : $!\n";
    $socket->send("$input\n");
    my $result = "";
    while (my $line = $socket->getline) {
        $result .= $line;
    }
    $socket->close();
    return $result;
}

priyank's avatar
priyank committed
68
#run_daemons(("tokenizer"));
priyank's avatar
priyank committed
69 70

1;