Commit e7a17bb1 authored by pruthwik mishra's avatar pruthwik mishra

Update tokenizer_for_indian_languages_on_files.py

parent 6e4b830f
...@@ -137,7 +137,7 @@ def main(): ...@@ -137,7 +137,7 @@ def main():
os.makedirs(args.out) os.makedirs(args.out)
if args.lang in ['hi', 'or', 'mn', 'as', 'bn', 'pa']: if args.lang in ['hi', 'or', 'mn', 'as', 'bn', 'pa']:
lang = 0 lang = 0
elif args.lang == 'ur': elif args.lang in ['ur', 'ks']:
lang = 1 lang = 1
elif args.lang in ['en', 'gu', 'mr', 'ml', 'kn', 'te', 'ta']: elif args.lang in ['en', 'gu', 'mr', 'ml', 'kn', 'te', 'ta']:
lang = 2 lang = 2
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment