add-self-loop-simple.cc 2.8 KB
Newer Older
Nikhilesh Bhatnagar's avatar
Nikhilesh Bhatnagar committed
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94
/*
 * Copyright (c) Facebook, Inc. and its affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */

#include <iostream>
#include "fstext/fstext-lib.h" // @manual
#include "util/common-utils.h" // @manual

/*
 * This program is to modify a FST without self-loop by:
 *   for each incoming arc with non-eps input symbol, add a self-loop arc
 *   with that non-eps symbol as input and eps as output.
 *
 * This is to make sure the resultant FST can do deduplication for repeated
 * symbols, which is very common in acoustic model
 *
 */
namespace {
int32 AddSelfLoopsSimple(fst::StdVectorFst* fst) {
  typedef fst::MutableArcIterator<fst::StdVectorFst> IterType;

  int32 num_states_before = fst->NumStates();
  fst::MakePrecedingInputSymbolsSame(false, fst);
  int32 num_states_after = fst->NumStates();
  KALDI_LOG << "There are " << num_states_before
            << " states in the original FST; "
            << " after MakePrecedingInputSymbolsSame, there are "
            << num_states_after << " states " << std::endl;

  auto weight_one = fst::StdArc::Weight::One();

  int32 num_arc_added = 0;

  fst::StdArc self_loop_arc;
  self_loop_arc.weight = weight_one;

  int32 num_states = fst->NumStates();
  std::vector<std::set<int32>> incoming_non_eps_label_per_state(num_states);

  for (int32 state = 0; state < num_states; state++) {
    for (IterType aiter(fst, state); !aiter.Done(); aiter.Next()) {
      fst::StdArc arc(aiter.Value());
      if (arc.ilabel != 0) {
        incoming_non_eps_label_per_state[arc.nextstate].insert(arc.ilabel);
      }
    }
  }

  for (int32 state = 0; state < num_states; state++) {
    if (!incoming_non_eps_label_per_state[state].empty()) {
      auto& ilabel_set = incoming_non_eps_label_per_state[state];
      for (auto it = ilabel_set.begin(); it != ilabel_set.end(); it++) {
        self_loop_arc.ilabel = *it;
        self_loop_arc.olabel = 0;
        self_loop_arc.nextstate = state;
        fst->AddArc(state, self_loop_arc);
        num_arc_added++;
      }
    }
  }
  return num_arc_added;
}

void print_usage() {
  std::cout << "add-self-loop-simple usage:\n"
               "\tadd-self-loop-simple <in-fst> <out-fst> \n";
}
} // namespace

int main(int argc, char** argv) {
  if (argc != 3) {
    print_usage();
    exit(1);
  }

  auto input = argv[1];
  auto output = argv[2];

  auto fst = fst::ReadFstKaldi(input);
  auto num_states = fst->NumStates();
  KALDI_LOG << "Loading FST from " << input << " with " << num_states
            << " states." << std::endl;

  int32 num_arc_added = AddSelfLoopsSimple(fst);
  KALDI_LOG << "Adding " << num_arc_added << " self-loop arcs " << std::endl;

  fst::WriteFstKaldi(*fst, std::string(output));
  KALDI_LOG << "Writing FST to " << output << std::endl;

  delete fst;
}