prepare_lm.sh 690 Bytes
Newer Older
Nikhilesh Bhatnagar's avatar
Nikhilesh Bhatnagar committed
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35
#!/usr/bin/env bash

langdir=""
lmdir=""

. ./cmd.sh
. ./path.sh
. parse_options.sh

arpa_lm=$1
data=$2

if [ -z $langdir ]; then
  langdir=$data/lang
fi
if [ -z $lmdir ]; then
  lmdir=$data/lang_test
fi

if [ ! -d $langdir ]; then
  echo "$langdir not found. run local/prepare_lang.sh first" && exit 1
fi

mkdir -p $lmdir
cp -r $langdir/* $lmdir

if [[ "$arpa_lm" == *.gz ]]; then
  gunzip -c $arpa_lm | arpa2fst --disambig-symbol=#0 --read-symbol-table=$lmdir/words.txt - $lmdir/G.fst
else
  arpa2fst --disambig-symbol=#0 --read-symbol-table=$lmdir/words.txt $arpa_lm $lmdir/G.fst
fi
fstisstochastic $lmdir/G.fst
utils/validate_lang.pl $lmdir || exit 1

echo "done preparing lm ($lmdir)"