Prune.pm 1.58 KB
Newer Older
priyank's avatar
priyank committed
1 2 3 4 5 6 7 8 9 10 11 12
package ILMT::URD::HIN::Prune;
use strict;
use warnings;
use Dir::Self;
use Data::Dumper;
use ILMT::URD::HIN::Prune::prune_on_pos;
use ILMT::URD::HIN::Prune::prune_on_case;
use ILMT::URD::HIN::SSFAPI::feature_filter;
use ILMT::URD::HIN::SSFAPI::shakti_tree_api;

sub process {
    my %par = @_;
priyank's avatar
priyank committed
13
    utf8::encode($par{'data'});
priyank's avatar
priyank committed
14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52
    my $input = $par{'data'};

    my $db_file = __DIR__ . "/Prune/mapping.dat";
    read_story(\$input);

    my $body;
    my $numBody = get_bodycount();
    my $result;
    for(my($bodyNum)=1;$bodyNum<=$numBody;$bodyNum++)
    {
        $body = get_body($bodyNum,$body);
        # Count the number of Paragraphs in the story
        my($numPara) = get_paracount($body);
        # Iterate through paragraphs in the story
        for(my($i)=1;$i<=$numPara;$i++)
        {
            my($para);
            # Read Paragraph
            $para = get_para($i);
            # Count the number of sentences in this paragraph
            my($numSent) = get_sentcount($para);
            #print $numSent."\n";
            # Iterate through sentences in the paragraph
            for(my($j)=1;$j<=$numSent;$j++)
            {
                #print " ... Processing sent $j\n";
                # Read the sentence which is in SSF format
                my($sent) = get_sent($para,$j);
                #       print_tree($sent);
                prune_on_pos($db_file,$sent);
                prune_on_case($sent);
            }
        }
    }

    open OUTFILE, '>', \$result  or die $!;
    select(OUTFILE);
    printstory();
    select(STDOUT);
priyank's avatar
priyank committed
53
    utf8::decode($result);
priyank's avatar
priyank committed
54 55 56 57
    return $result;
}

1;