mod init;
use clap::Parser;
use std::io;
use std::io::BufRead;
use std::path::{Path, PathBuf};
use wordcut_engine::replacer;
#[derive(clap::ValueEnum, Clone, Debug)]
enum Lang {
Lao,
Khmer,
Myanmar,
Thai,
}
#[derive(Parser, Debug)]
#[clap(author, version, about, long_about = None)]
struct Args {
#[clap(short, long, value_parser)]
dict_path: Option<String>,
#[clap(short, long, value_parser)]
cluster_rules_path: Option<String>,
#[clap(short = 's', long, value_parser)]
word_delimiter: Option<String>,
#[clap(short = 'r', long, value_parser)]
replace_rules_path: Option<String>,
#[clap(short, long, value_parser, value_enum)]
lang: Option<Lang>,
}
pub fn join_data_path(base_path: &str) -> PathBuf {
let mut buf = PathBuf::new();
if !cfg!(feature = "onedir") {
buf.push(env!("CARGO_MANIFEST_DIR"));
buf.push("data");
}
buf.push(base_path);
return buf
}
fn main() {
let args = Args::parse();
let lang = args.lang;
let dict_path = match args.dict_path.as_ref() {
Some(dict_path) => Path::new(dict_path),
None => match lang {
Some(Lang::Lao) => init::lao_path(),
Some(Lang::Khmer) => init::khmer_dict_path(),
Some(Lang::Myanmar) => init::myanmar_dict_path(),
Some(Lang::Thai) | None => init::default_path(),
},
};
let word_delim = match args.word_delimiter.as_ref().map(|delim| delim.as_str()) {
Some(word_delim) => word_delim,
None => "|",
};
let dict = init::load_dict(dict_path).unwrap();
let cluster_rule_path = if let Some(cluster_rules_path) = args.cluster_rules_path {
Some(cluster_rules_path.to_string())
} else {
match lang {
Some(Lang::Lao) => init::lao_clusters_path(),
Some(Lang::Khmer) => init::khmer_clusters_path(),
Some(Lang::Myanmar) => init::myanmar_clusters_path(),
Some(Lang::Thai) | None => init::thai_cluster_path(),
}
};
let wordcut = match cluster_rule_path {
Some(cluster_rule_path) => {
let cluster_re =
init::wordcut_engine::load_cluster_rules(Path::new(&cluster_rule_path)).unwrap();
init::Wordcut::new_with_cluster_re(dict, cluster_re)
}
None => init::Wordcut::new(dict),
};
let replace_rules_path = if let Some(replace_rules_path) = args.replace_rules_path {
Some(replace_rules_path)
} else {
match lang {
Some(Lang::Thai) | None => init::thai_replace_rules_path(),
_ => None,
}
};
let replace_rules = if let Some(replace_rules_path) = replace_rules_path {
replacer::load_imm_rules(&replace_rules_path).expect("Load replace rules")
} else {
vec![]
};
for line_opt in io::stdin().lock().lines() {
let cleaned_line = match line_opt {
Ok(line) => line.trim_end_matches('\n').to_string(),
Err(e) => panic!("Cannot read line {}", e),
};
let mod_line = replacer::replace(&replace_rules, &cleaned_line);
let segmented_string = wordcut.put_delimiters(&mod_line, word_delim);
println!("{}", segmented_string);
}
}