wordcut-engine 1.2.2

Word segmentation/breaking library
Documentation
1
2
3
4
5
6
7
8
9
10
#!/bin/bash

LANG=C
LC_ALL=C
ROOT=$(realpath $(dirname $BASH_SOURCE)/..)
cat $ROOT/data/laowords.txt $ROOT/data/thai2words.txt $ROOT/data/words_th.txt $ROOT/data/cjwordlist.txt \
		$ROOT/data/khmerdict.txt $ROOT/data/myanmar-dict.txt $ROOT/data/thai.txt \
	| sort \
	| uniq \
	> $ROOT/data/mixed-wordlist.txt