1pub(crate) mod candidate;
2pub(crate) mod composite_key;
3pub(crate) mod dictentry;
4mod dictionary_candidate;
5mod dictionary_parser;
6pub mod empty_dict;
7pub(crate) mod file_dictionary;
8mod lru_ordered_map;
9pub mod static_dict;
10pub mod user_dictionary;
11
12use crate::dictionary::dictionary_candidate::{CompletionCandidate, DictionaryEntry};
13use crate::error::CskkError;
14use crate::form_changer::numeric_form_changer::{
15 numeric_to_daiji_as_number, numeric_to_kanji_each, numeric_to_simple_kanji_as_number,
16 numeric_to_thousand_separator, numeric_to_zenkaku,
17};
18pub(crate) use candidate::Candidate;
19pub(crate) use composite_key::CompositeKey;
20use dictentry::DictEntry;
21pub(in crate::dictionary) use dictionary_candidate::DictionaryCandidate;
22use empty_dict::EmptyDictionary;
23use log::*;
24use regex::Regex;
25use static_dict::StaticFileDict;
26use std::sync::{Arc, Mutex};
27use user_dictionary::UserDictionary;
28
29#[derive(Debug)]
31pub(crate) enum CskkDictionaryType {
32 StaticFile(StaticFileDict),
33 UserFile(UserDictionary),
34 EmptyDict(EmptyDictionary),
35}
36
37#[derive(Debug)]
39pub struct CskkDictionary {
40 is_completable: bool,
41 pub(crate) mutex: Mutex<CskkDictionaryType>,
42}
43
44impl CskkDictionary {
45 fn new(dictionary: CskkDictionaryType, is_completable: bool) -> Self {
46 Self {
47 is_completable,
48 mutex: Mutex::new(dictionary),
49 }
50 }
51
52 pub fn new_static_dict(
56 file_path: &str,
57 encode: &str,
58 is_completable: bool,
59 ) -> Result<CskkDictionary, CskkError> {
60 let dictionary = StaticFileDict::new(file_path, encode)?;
61 Ok(CskkDictionary::new(
62 CskkDictionaryType::StaticFile(dictionary),
63 is_completable,
64 ))
65 }
66
67 pub fn new_user_dict(
71 file_path: &str,
72 encode: &str,
73 is_completable: bool,
74 ) -> Result<CskkDictionary, CskkError> {
75 let dictionary = UserDictionary::new(file_path, encode)?;
76 Ok(CskkDictionary::new(
77 CskkDictionaryType::UserFile(dictionary),
78 is_completable,
79 ))
80 }
81
82 pub fn new_empty_dict() -> Result<CskkDictionary, CskkError> {
85 Ok(CskkDictionary::new(
86 CskkDictionaryType::EmptyDict(EmptyDictionary::default()),
87 false,
88 ))
89 }
90}
91
92pub(crate) fn confirm_candidate(
96 dictionary: &Arc<CskkDictionary>,
97 candidate: &Candidate,
98) -> Result<bool, CskkError> {
99 debug!("confirm: {:?}", candidate);
100 #[allow(clippy::significant_drop_in_scrutinee)]
102 match *dictionary.mutex.lock().unwrap() {
103 CskkDictionaryType::StaticFile(ref mut dict) => dict.select_candidate(candidate),
104 CskkDictionaryType::UserFile(ref mut dict) => dict.select_candidate(candidate),
105 CskkDictionaryType::EmptyDict(ref mut dict) => dict.select_candidate(candidate),
106 }
107}
108
109pub(crate) fn purge_candidate(
113 dictionary: &Arc<CskkDictionary>,
114 composite_key: &CompositeKey,
115 candidate: &Candidate,
116) -> Result<bool, CskkError> {
117 #[allow(clippy::significant_drop_in_scrutinee)]
119 match *dictionary.mutex.lock().unwrap() {
120 CskkDictionaryType::StaticFile(ref mut dict) => {
121 dict.purge_candidate(composite_key, candidate)
122 }
123 CskkDictionaryType::UserFile(ref mut dict) => {
124 dict.purge_candidate(composite_key, candidate)
125 }
126 CskkDictionaryType::EmptyDict(ref mut dict) => {
127 dict.purge_candidate(composite_key, candidate)
128 }
129 }
130}
131
132pub(crate) fn get_all_candidates(
134 dictionaries: &[Arc<CskkDictionary>],
135 composite_key: &CompositeKey,
136) -> Vec<Candidate> {
137 get_all_candidates_inner(dictionaries, composite_key, false)
138}
139
140pub(crate) fn get_all_complete(
146 dictionaries: &[Arc<CskkDictionary>],
147 composite_key: &CompositeKey,
148) -> Vec<Candidate> {
149 let dict_candidates = get_all_complete_inner(dictionaries, composite_key);
150 let deduped_completion_candidate = dedup_candidates(dict_candidates);
151
152 deduped_completion_candidate
153 .into_iter()
154 .map(|x| Candidate::from_completion_candidate(&x))
155 .collect()
156}
157fn get_all_complete_inner(
165 dictionaries: &[Arc<CskkDictionary>],
166 composite_key: &CompositeKey,
167) -> Vec<CompletionCandidate> {
168 let mut result = Vec::new();
169
170 for cskkdict in dictionaries.iter() {
171 if cskkdict.is_completable {
172 let lock = cskkdict.mutex.lock().unwrap();
173 let dict_entries = match &*lock {
174 CskkDictionaryType::StaticFile(dict) => dict.complete(composite_key),
175 CskkDictionaryType::UserFile(dict) => dict.complete(composite_key),
176 CskkDictionaryType::EmptyDict(dict) => dict.complete(composite_key),
177 };
178 for dict_entry in dict_entries {
179 let candidates = dict_entry.get_candidates(composite_key.get_okuri());
180
181 if let Some(candidates) = candidates {
182 result.extend(candidates.iter().map(|x| {
184 CompletionCandidate::from_dictionary_candidate(
185 &dict_entry.midashi,
186 composite_key.get_okuri(),
187 x,
188 )
189 }));
190 }
191 }
192 }
193 }
194
195 result
196}
197
198fn get_all_candidates_inner(
203 dictionaries: &[Arc<CskkDictionary>],
204 composite_key: &CompositeKey,
205 is_numeric_re_lookup: bool,
206) -> Vec<Candidate> {
207 let mut matched_numbers: Vec<String>;
208
209 let exact_match_candidates = get_candidates_in_order(dictionaries, &composite_key);
210 let exact_match_candidates = dedup_candidates(exact_match_candidates);
211 let mut all_candidates: Vec<Candidate> = exact_match_candidates
212 .into_iter()
213 .map(|dictionary_candidate| {
214 Candidate::from_dictionary_candidate(&composite_key, &dictionary_candidate)
215 })
216 .collect();
217
218 if !is_numeric_re_lookup {
219 let replaced_key;
220 (replaced_key, matched_numbers) = to_composite_to_numeric_dict_key(&composite_key);
221 if replaced_key != *composite_key {
222 let numeric_replace_match_candidates =
223 get_candidates_in_order(dictionaries, &replaced_key);
224 let numeric_replace_match_candidates =
225 dedup_candidates(numeric_replace_match_candidates);
226 let mut numeric_replace_match_candidates: Vec<Candidate> =
227 numeric_replace_match_candidates
228 .into_iter()
229 .map(|dictionary_candidate| {
230 Candidate::from_dictionary_candidate(&replaced_key, &dictionary_candidate)
231 })
232 .flat_map(|candidate| {
233 replace_numeric_match(&candidate, &matched_numbers, dictionaries)
234 })
235 .collect();
236 all_candidates.append(&mut numeric_replace_match_candidates);
237 }
238 }
239
240 all_candidates
241}
242
243fn dedup_candidates<T>(dictionary_candidates: Vec<T>) -> Vec<T>
247where
248 T: DictionaryEntry + Ord + Clone,
249{
250 let mut deduped_candidates = vec![];
251 let mut ordered_candidates = vec![];
252
253 deduped_candidates.extend(dictionary_candidates.to_owned());
254 ordered_candidates.extend(dictionary_candidates);
255
256 if deduped_candidates.is_empty() {
257 return vec![];
258 }
259 deduped_candidates.sort_unstable();
260 deduped_candidates.reverse();
262 deduped_candidates.dedup_by(|a, b| a.get_kouho_text() == b.get_kouho_text());
263 deduped_candidates.reverse();
265
266 let mut result = vec![];
267 for candidate in ordered_candidates {
268 let mut matched_index = usize::MAX;
269 for (pos, deduped) in deduped_candidates.iter().enumerate() {
270 if (*deduped).eq(&candidate) {
271 result.push(deduped.to_owned());
272 matched_index = pos;
273 }
274 }
275 if matched_index < usize::MAX {
276 deduped_candidates.remove(matched_index);
277 }
278 }
279
280 result
281}
282
283fn get_candidates_in_order(
291 dictionaries: &[Arc<CskkDictionary>],
292 composite_key: &CompositeKey,
293) -> Vec<DictionaryCandidate> {
294 let mut result = Vec::new();
295
296 for cskkdict in dictionaries.iter() {
297 let lock = cskkdict.mutex.lock().unwrap();
298 if let Some(dict_entry) = match &*lock {
299 CskkDictionaryType::StaticFile(dict) => dict.lookup(composite_key),
300 CskkDictionaryType::UserFile(dict) => dict.lookup(composite_key),
301 CskkDictionaryType::EmptyDict(dict) => dict.lookup(composite_key),
302 } {
303 let strict_okuri_cands = if composite_key.has_okuri() {
304 dict_entry.get_candidates(composite_key.get_okuri())
305 } else {
306 None
307 };
308 if let Some(candidates) = strict_okuri_cands {
309 result.extend(candidates.to_owned());
310 }
311
312 let non_strict_okuri_cands = dict_entry.get_candidates(&None);
313 if let Some(candidates) = non_strict_okuri_cands {
314 result.extend(candidates.to_owned());
315 }
316 }
317 }
318
319 result
320}
321
322lazy_static! {
323 static ref NUM_REGEX: Regex = Regex::new(r"\d+").unwrap();
324}
325pub(crate) fn to_composite_to_numeric_dict_key(
331 to_composite: &CompositeKey,
332) -> (CompositeKey, Vec<String>) {
333 let mut dict_key = to_composite.get_to_composite().to_owned();
334 let mut matched_numbers = vec![];
335 for numeric_match in NUM_REGEX.find_iter(to_composite.get_to_composite()) {
336 let new_dict_key = dict_key.replacen(numeric_match.as_str(), "#", 1);
337 dict_key = new_dict_key;
338 matched_numbers.push(numeric_match.as_str().to_owned());
339 }
340 (
341 CompositeKey::new(&dict_key, to_composite.get_okuri().to_owned()),
342 matched_numbers,
343 )
344}
345
346pub(crate) fn numeric_string_count(to_composite: &str) -> usize {
355 NUM_REGEX.find_iter(to_composite).count()
356}
357
358pub(crate) fn numeric_entry_count(entry: &str) -> usize {
367 lazy_static! {
368 static ref NUM_ENTRY_REGEX: Regex = Regex::new(r"#[0123458]").unwrap();
369 }
370 NUM_ENTRY_REGEX.find_iter(entry).count()
371}
372
373fn replace_numeric_match(
375 candidate: &Candidate,
376 matched_numbers: &[String],
377 dictionaries: &[Arc<CskkDictionary>],
378) -> Vec<Candidate> {
379 let output_text_list =
380 replace_numeric_string(&candidate.kouho_text, matched_numbers, dictionaries);
381
382 let mut result = vec![];
383 for output_text in output_text_list {
384 let mut new_candidate = candidate.clone();
385 new_candidate.output = output_text;
386 result.push(new_candidate)
387 }
388 result
389}
390
391pub(crate) fn replace_numeric_string(
393 kouho_text: &str,
394 numbers: &[String],
395 dictionaries: &[Arc<CskkDictionary>],
396) -> Vec<String> {
397 lazy_static! {
398 static ref NUMERIC_ENTRY_REGEX: Regex = Regex::new(r"#[0123458]").unwrap();
399 }
400 let mut current_output_texts = vec![kouho_text.to_string()];
401 for (n, entry_match) in NUMERIC_ENTRY_REGEX.find_iter(kouho_text).enumerate() {
402 if n < numbers.len() {
403 match entry_match.as_str() {
404 "#0" => {
405 let mut replaced_output_texts = vec![];
406 for output_text in ¤t_output_texts {
407 replaced_output_texts.push(output_text.replacen("#0", &numbers[n], 1));
408 }
409 current_output_texts = replaced_output_texts;
410 }
411 "#1" => {
412 let mut replaced_output_texts = vec![];
413 for kouho_text in ¤t_output_texts {
414 replaced_output_texts.push(kouho_text.replacen(
415 "#1",
416 &numeric_to_zenkaku(&numbers[n]),
417 1,
418 ));
419 }
420 current_output_texts = replaced_output_texts;
421 }
422 "#2" => {
423 let mut replaced_output_texts = vec![];
424 for kouho_text in ¤t_output_texts {
425 replaced_output_texts.push(kouho_text.replacen(
426 "#2",
427 &numeric_to_kanji_each(&numbers[n]),
428 1,
429 ));
430 }
431 current_output_texts = replaced_output_texts;
432 }
433 "#3" => {
434 let mut replaced_output_texts = vec![];
435 for output_text in ¤t_output_texts {
436 replaced_output_texts.push(output_text.replacen(
437 "#3",
438 &numeric_to_simple_kanji_as_number(&numbers[n]),
439 1,
440 ));
441 }
442 current_output_texts = replaced_output_texts;
443 }
444 "#4" => {
445 let mut replaced_output_texts = vec![];
446 let numeric_lookup_results = get_all_candidates_inner(
447 dictionaries,
448 &CompositeKey::new(&numbers[n], None),
449 true,
450 );
451 for kouho_text in ¤t_output_texts {
452 for numeric_lookup in &numeric_lookup_results {
453 replaced_output_texts.push(kouho_text.replacen(
454 "#4",
455 &numeric_lookup.kouho_text,
456 1,
457 ));
458 }
459 }
460 current_output_texts = replaced_output_texts;
461 }
462 "#5" => {
463 let mut replaced_output_texts = vec![];
464 for kouho_text in ¤t_output_texts {
465 replaced_output_texts.push(kouho_text.replacen(
466 "#5",
467 &numeric_to_daiji_as_number(&numbers[n], false),
468 1,
469 ));
470 replaced_output_texts.push(kouho_text.replacen(
471 "#5",
472 &numeric_to_daiji_as_number(&numbers[n], true),
473 1,
474 ));
475 }
476 current_output_texts = replaced_output_texts;
477 }
478 "#8" => {
479 let mut replaced_output_texts = vec![];
480 for kouho_text in ¤t_output_texts {
481 replaced_output_texts.push(kouho_text.replacen(
482 "#8",
483 &numeric_to_thousand_separator(&numbers[n]),
484 1,
485 ));
486 }
487 current_output_texts = replaced_output_texts;
488 }
489 _ => {}
490 }
491 }
492 }
493 current_output_texts
494}
495
496#[allow(dead_code)]
501pub(crate) fn get_nth_candidate(
502 dictionaries: &[Arc<CskkDictionary>],
503 composite_key: &CompositeKey,
504 selection_pointer: usize,
505) -> Option<Candidate> {
506 let candidates = get_all_candidates(dictionaries, composite_key);
507 candidates.get(selection_pointer).cloned()
508}
509
510pub(crate) trait Dictionary {
511 fn lookup(&self, composite_key: &CompositeKey) -> Option<&DictEntry>;
513
514 fn is_read_only(&self) -> bool {
515 true
516 }
517 fn complete<'a>(
527 &'a self,
528 midashi_head: &'a CompositeKey,
529 ) -> Box<dyn Iterator<Item = &'a DictEntry> + 'a>;
530 fn save_dictionary(&mut self) -> Result<bool, CskkError> {
533 Ok(false)
534 }
535
536 fn select_candidate(&mut self, _candidate: &Candidate) -> Result<bool, CskkError> {
540 Ok(false)
541 }
542 fn purge_candidate(
545 &mut self,
546 _composite_key: &CompositeKey,
547 _candidate: &Candidate,
548 ) -> Result<bool, CskkError> {
549 Ok(false)
550 }
551
552 fn reload(&mut self) -> Result<(), CskkError> {
554 Ok(())
555 }
556}
557
558#[cfg(test)]
559mod test {
560 use super::*;
561
562 #[test]
563 fn test_numeric_string_count() {
564 assert_eq!(numeric_string_count("123つぶ"), 1);
565 assert_eq!(numeric_string_count("1にち1かい"), 2);
566 assert_eq!(numeric_string_count("1じつせんしゅう"), 1);
567 }
568
569 #[test]
570 fn get_all_candidates_basic() {
571 let test_dictionary =
572 CskkDictionary::new_static_dict("tests/data/dictionaries/SKK-JISYO.S", "euc-jp", false)
573 .unwrap();
574 let dictionaries = vec![Arc::new(test_dictionary)];
575 let key = CompositeKey::new("あい", None);
576 let result = get_all_candidates(&dictionaries, &key);
577
578 assert_eq!(result[0].kouho_text, "愛");
579 }
580
581 #[test]
582 fn get_all_candidates_numeric_match() {
583 let test_dictionary = CskkDictionary::new_static_dict(
584 "tests/data/dictionaries/number_jisyo.dat",
585 "utf-8",
586 false,
587 )
588 .unwrap();
589 let dictionaries = vec![Arc::new(test_dictionary)];
590 let key = CompositeKey::new("5/1", None);
591 let result = get_all_candidates(&dictionaries, &key);
592
593 assert_eq!(result[0].kouho_text, "#0月#0日");
594 assert_eq!(result[0].midashi, "#/#");
595 assert_eq!(result[0].output, "5月1日");
596 }
597
598 #[test]
599 fn get_all_candidates_numeric_exact_match() {
600 let test_dictionary =
601 CskkDictionary::new_static_dict("tests/data/dictionaries/maruichi.dat", "utf-8", false)
602 .unwrap();
603 let dictionaries = vec![Arc::new(test_dictionary)];
604 let key = CompositeKey::new("まる1", None);
605 let result = get_all_candidates(&dictionaries, &key);
606
607 assert_eq!(result[0].kouho_text, "①"); assert_eq!(result[1].kouho_text, "❶");
609 assert_eq!(result[2].kouho_text, "⓵"); }
611}