pub(crate) mod candidate;
pub(crate) mod composite_key;
pub(crate) mod dictentry;
mod dictionary_candidate;
mod dictionary_parser;
pub mod empty_dict;
pub(crate) mod file_dictionary;
pub mod static_dict;
pub mod user_dictionary;
use crate::error::CskkError;
use crate::form_changer::numeric_form_changer::{
numeric_to_daiji_as_number, numeric_to_kanji_each, numeric_to_simple_kanji_as_number,
numeric_to_thousand_separator, numeric_to_zenkaku,
};
pub(crate) use candidate::Candidate;
pub(crate) use composite_key::CompositeKey;
use dictentry::DictEntry;
pub(in crate::dictionary) use dictionary_candidate::DictionaryCandidate;
use empty_dict::EmptyDictionary;
use log::*;
use regex::Regex;
use static_dict::StaticFileDict;
use std::sync::{Arc, Mutex};
use user_dictionary::UserDictionary;
#[derive(Debug)]
pub(crate) enum CskkDictionaryType {
StaticFile(StaticFileDict),
UserFile(UserDictionary),
EmptyDict(EmptyDictionary),
}
#[derive(Debug)]
pub struct CskkDictionary {
pub(crate) mutex: Mutex<CskkDictionaryType>,
}
impl CskkDictionary {
fn new(dictionary: CskkDictionaryType) -> Self {
Self {
mutex: Mutex::new(dictionary),
}
}
pub fn new_static_dict(file_path: &str, encode: &str) -> Result<CskkDictionary, CskkError> {
let dictionary = StaticFileDict::new(file_path, encode)?;
Ok(CskkDictionary::new(CskkDictionaryType::StaticFile(
dictionary,
)))
}
pub fn new_user_dict(file_path: &str, encode: &str) -> Result<CskkDictionary, CskkError> {
let dictionary = UserDictionary::new(file_path, encode)?;
Ok(CskkDictionary::new(CskkDictionaryType::UserFile(
dictionary,
)))
}
pub fn new_empty_dict() -> Result<CskkDictionary, CskkError> {
Ok(CskkDictionary::new(CskkDictionaryType::EmptyDict(
EmptyDictionary::default(),
)))
}
}
pub(crate) fn confirm_candidate(
dictionary: &mut Arc<CskkDictionary>,
composite_key: &CompositeKey,
candidate: &Candidate,
) -> Result<bool, CskkError> {
debug!("confirm: {:?}", candidate);
#[allow(clippy::significant_drop_in_scrutinee)]
match *dictionary.mutex.lock().unwrap() {
CskkDictionaryType::StaticFile(ref mut dict) => {
dict.select_candidate(composite_key, candidate)
}
CskkDictionaryType::UserFile(ref mut dict) => {
dict.select_candidate(composite_key, candidate)
}
CskkDictionaryType::EmptyDict(ref mut dict) => {
dict.select_candidate(composite_key, candidate)
}
}
}
pub(crate) fn purge_candidate(
dictionary: &mut Arc<CskkDictionary>,
composite_key: &CompositeKey,
candidate: &Candidate,
) -> Result<bool, CskkError> {
#[allow(clippy::significant_drop_in_scrutinee)]
match *dictionary.mutex.lock().unwrap() {
CskkDictionaryType::StaticFile(ref mut dict) => {
dict.purge_candidate(composite_key, candidate)
}
CskkDictionaryType::UserFile(ref mut dict) => {
dict.purge_candidate(composite_key, candidate)
}
CskkDictionaryType::EmptyDict(ref mut dict) => {
dict.purge_candidate(composite_key, candidate)
}
}
}
pub(crate) fn get_all_candidates(
dictionaries: &[Arc<CskkDictionary>],
composite_key: &CompositeKey,
) -> Vec<Candidate> {
get_all_candidates_inner(dictionaries, composite_key, false)
}
lazy_static! {
static ref NUM_REGEX: Regex = Regex::new(r"\d+").unwrap();
}
fn get_all_candidates_inner(
dictionaries: &[Arc<CskkDictionary>],
composite_key: &CompositeKey,
is_numeric_re_lookup: bool,
) -> Vec<Candidate> {
let mut deduped_candidates = vec![];
let mut ordered_candidates = vec![];
let mut composite_key = composite_key.to_owned();
let mut matched_numbers = vec![];
if !is_numeric_re_lookup {
let pair = to_composite_to_numeric_dict_key(&composite_key);
composite_key = pair.0;
matched_numbers = pair.1;
}
let candidates = get_candidates_in_order(dictionaries, &composite_key);
ordered_candidates.extend(candidates.to_owned());
deduped_candidates.extend(candidates);
if deduped_candidates.is_empty() {
return vec![];
}
deduped_candidates.sort_unstable();
deduped_candidates.reverse();
deduped_candidates.dedup_by(|a, b| a.kouho_text == b.kouho_text);
deduped_candidates.reverse();
let mut result = vec![];
for candidate in ordered_candidates {
let mut matched_index = usize::MAX;
for (pos, deduped) in deduped_candidates.iter().enumerate() {
if (*deduped).eq(&candidate) {
if is_numeric_re_lookup {
result.push(Candidate::from_dictionary_candidate(
&composite_key,
deduped,
));
} else {
result.append(&mut replace_numeric_match(
&Candidate::from_dictionary_candidate(&composite_key, deduped),
&matched_numbers,
dictionaries,
));
}
matched_index = pos;
}
}
if matched_index < usize::MAX {
deduped_candidates.remove(matched_index);
}
}
result
}
fn get_candidates_in_order(
dictionaries: &[Arc<CskkDictionary>],
composite_key: &CompositeKey,
) -> Vec<DictionaryCandidate> {
let mut result = Vec::new();
for cskkdict in dictionaries.iter() {
let lock = cskkdict.mutex.lock().unwrap();
if let Some(dict_entry) = match &*lock {
CskkDictionaryType::StaticFile(dict) => dict.lookup(composite_key),
CskkDictionaryType::UserFile(dict) => dict.lookup(composite_key),
CskkDictionaryType::EmptyDict(dict) => dict.lookup(composite_key),
} {
let strict_okuri_cands = if composite_key.has_okuri() {
dict_entry.get_candidates(composite_key.get_okuri())
} else {
None
};
if let Some(candidates) = strict_okuri_cands {
result.extend(candidates.to_owned());
}
let non_strict_okuri_cands = dict_entry.get_candidates(&None);
if let Some(candidates) = non_strict_okuri_cands {
result.extend(candidates.to_owned());
}
}
}
result
}
pub(crate) fn to_composite_to_numeric_dict_key(
to_composite: &CompositeKey,
) -> (CompositeKey, Vec<String>) {
let mut dict_key = to_composite.get_to_composite().to_owned();
let mut matched_numbers = vec![];
for numeric_match in NUM_REGEX.find_iter(to_composite.get_to_composite()) {
let new_dict_key = dict_key.replacen(numeric_match.as_str(), "#", 1);
dict_key = new_dict_key;
matched_numbers.push(numeric_match.as_str().to_owned());
}
(
CompositeKey::new(&dict_key, to_composite.get_okuri().to_owned()),
matched_numbers,
)
}
pub(crate) fn numeric_string_count(to_composite: &str) -> usize {
NUM_REGEX.find_iter(to_composite).count()
}
pub(crate) fn numeric_entry_count(entry: &str) -> usize {
lazy_static! {
static ref NUM_ENTRY_REGEX: Regex = Regex::new(r"#[0123458]").unwrap();
}
NUM_ENTRY_REGEX.find_iter(entry).count()
}
fn replace_numeric_match(
candidate: &Candidate,
matched_numbers: &[String],
dictionaries: &[Arc<CskkDictionary>],
) -> Vec<Candidate> {
let output_text_list =
replace_numeric_string(&candidate.kouho_text, matched_numbers, dictionaries);
let mut result = vec![];
for output_text in output_text_list {
let mut new_candidate = candidate.clone();
new_candidate.output = output_text;
result.push(new_candidate)
}
result
}
pub(crate) fn replace_numeric_string(
kouho_text: &str,
numbers: &[String],
dictionaries: &[Arc<CskkDictionary>],
) -> Vec<String> {
lazy_static! {
static ref NUMERIC_ENTRY_REGEX: Regex = Regex::new(r"#[0123458]").unwrap();
}
let mut current_output_texts = vec![kouho_text.to_string()];
for (n, entry_match) in NUMERIC_ENTRY_REGEX.find_iter(kouho_text).enumerate() {
match entry_match.as_str() {
"#0" => {
let mut replaced_output_texts = vec![];
for output_text in ¤t_output_texts {
replaced_output_texts.push(output_text.replacen("#0", &numbers[n], 1));
}
current_output_texts = replaced_output_texts;
}
"#1" => {
let mut replaced_output_texts = vec![];
for kouho_text in ¤t_output_texts {
replaced_output_texts.push(kouho_text.replacen(
"#1",
&numeric_to_zenkaku(&numbers[n]),
1,
));
}
current_output_texts = replaced_output_texts;
}
"#2" => {
let mut replaced_output_texts = vec![];
for kouho_text in ¤t_output_texts {
replaced_output_texts.push(kouho_text.replacen(
"#2",
&numeric_to_kanji_each(&numbers[n]),
1,
));
}
current_output_texts = replaced_output_texts;
}
"#3" => {
let mut replaced_output_texts = vec![];
for output_text in ¤t_output_texts {
replaced_output_texts.push(output_text.replacen(
"#3",
&numeric_to_simple_kanji_as_number(&numbers[n]),
1,
));
}
current_output_texts = replaced_output_texts;
}
"#4" => {
let mut replaced_output_texts = vec![];
let numeric_lookup_results = get_all_candidates_inner(
dictionaries,
&CompositeKey::new(&numbers[n], None),
true,
);
for kouho_text in ¤t_output_texts {
for numeric_lookup in &numeric_lookup_results {
replaced_output_texts.push(kouho_text.replacen(
"#4",
&numeric_lookup.kouho_text,
1,
));
}
}
current_output_texts = replaced_output_texts;
}
"#5" => {
let mut replaced_output_texts = vec![];
for kouho_text in ¤t_output_texts {
replaced_output_texts.push(kouho_text.replacen(
"#5",
&numeric_to_daiji_as_number(&numbers[n], false),
1,
));
replaced_output_texts.push(kouho_text.replacen(
"#5",
&numeric_to_daiji_as_number(&numbers[n], true),
1,
));
}
current_output_texts = replaced_output_texts;
}
"#8" => {
let mut replaced_output_texts = vec![];
for kouho_text in ¤t_output_texts {
replaced_output_texts.push(kouho_text.replacen(
"#8",
&numeric_to_thousand_separator(&numbers[n]),
1,
));
}
current_output_texts = replaced_output_texts;
}
_ => {}
}
}
current_output_texts
}
#[allow(dead_code)]
pub(crate) fn get_nth_candidate(
dictionaries: &[Arc<CskkDictionary>],
composite_key: &CompositeKey,
selection_pointer: usize,
) -> Option<Candidate> {
let candidates = get_all_candidates(dictionaries, composite_key);
candidates.get(selection_pointer).cloned()
}
pub(crate) trait Dictionary {
fn lookup(&self, composite_key: &CompositeKey) -> Option<&DictEntry>;
fn is_read_only(&self) -> bool {
true
}
fn save_dictionary(&mut self) -> Result<bool, CskkError> {
Ok(false)
}
fn select_candidate(
&mut self,
_composite_key: &CompositeKey,
_candidate: &Candidate,
) -> Result<bool, CskkError> {
Ok(false)
}
fn purge_candidate(
&mut self,
_composite_key: &CompositeKey,
_candidate: &Candidate,
) -> Result<bool, CskkError> {
Ok(false)
}
fn reload(&mut self) -> Result<(), CskkError> {
Ok(())
}
}
#[cfg(test)]
mod test {
use super::*;
#[test]
fn test_numeric_string_count() {
assert_eq!(numeric_string_count("123つぶ"), 1);
assert_eq!(numeric_string_count("1にち1かい"), 2);
assert_eq!(numeric_string_count("1じつせんしゅう"), 1);
}
}