Tokenizer.pm 1.71 KB
Newer Older
priyank's avatar
priyank committed
1 2 3 4 5 6
package ILMT::KAN::HIN::Tokenizer;
use strict;
use warnings;
use Dir::Self;
use Data::Dumper;

priyank's avatar
priyank committed
7 8 9 10 11 12 13
use IPC::Run qw(run);
use List::UtilsBy qw(max_by);
use File::Temp qw/ tempfile /;
use File::Slurp qw( slurp );

my $cwd = __DIR__;

priyank's avatar
priyank committed
14 15 16 17 18 19 20 21
my %daemons = (
    "tokenizer" => {
        "path" => "ind-tokz",
        "args" => "--l kan --s --daemonize --port",
        "port" => "8111"
    }
);

priyank's avatar
priyank committed
22 23


priyank's avatar
priyank committed
24 25 26
sub process {
    my %args = @_;
    utf8::encode($args{data});
priyank's avatar
priyank committed
27 28 29 30 31 32 33 34 35 36 37 38
    
    my ($fh2, $filename2) = tempfile("tokenizer_inputXXXX", DIR => "/tmp", SUFFIX => ".tmp");
    print $fh2 $args{"data"};
    close($fh2);

    my $token_out;
    run ["python", "$cwd/tokenize.py", $filename2], ">", \$token_out; 

    unlink $filename2 or die "Couldn't delete temp file! $filename2";

    utf8::decode($token_out);
    return $token_out;
priyank's avatar
priyank committed
39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69
}

sub run_daemons {
    my @daemon_names = @_;
    foreach my $daemon_name (@daemon_names) {
        my %daemon = %{$daemons{$daemon_name}};
        my $cmd = "$daemon{path} $daemon{args} $daemon{port} &";
        my $runfile = __DIR__ . "/run/${daemon_name}_$daemon{port}";
        system("flock -e -w 0.01 $runfile -c '$cmd'") == 0
            or warn "[" . __PACKAGE__ . "]: Port $daemon{port} maybe unavailable! $?\n";
    }
}

sub call_daemon {
    my ($daemon_name, $input) = @_;
    my $port = $daemons{$daemon_name}{port};
    my ($socket, $client_socket);
    $socket = new IO::Socket::INET (
        PeerHost => '127.0.0.1',
        PeerPort => $port,
        Proto => 'tcp',
    ) or die "ERROR in Socket Creation : $!\n";
    $socket->send("$input\n");
    my $result = "";
    while (my $line = $socket->getline) {
        $result .= $line;
    }
    $socket->close();
    return $result;
}

priyank's avatar
priyank committed
70
#run_daemons(("tokenizer"));
priyank's avatar
priyank committed
71 72

1;