harper_core/spell/
merged_dictionary.rs1use std::borrow::Cow;
2use std::hash::{BuildHasher, Hasher};
3use std::sync::Arc;
4
5use foldhash::quality::FixedState;
6use itertools::Itertools;
7
8use super::{FstDictionary, WordId};
9use super::{FuzzyMatchResult, dictionary::Dictionary};
10use crate::{CharString, DictWordMetadata};
11
12#[derive(Clone)]
18pub struct MergedDictionary {
19 children: Vec<Arc<dyn Dictionary>>,
20 hasher_builder: FixedState,
21 child_hashes: Vec<u64>,
22}
23
24impl MergedDictionary {
25 pub fn new() -> Self {
26 Self {
27 children: Vec::new(),
28 hasher_builder: FixedState::default(),
29 child_hashes: Vec::new(),
30 }
31 }
32
33 pub fn add_dictionary(&mut self, dictionary: Arc<dyn Dictionary>) {
34 self.child_hashes.push(self.hash_dictionary(&dictionary));
35 self.children.push(dictionary);
36 }
37
38 fn hash_dictionary(&self, dictionary: &Arc<dyn Dictionary>) -> u64 {
39 if Arc::ptr_eq(
41 dictionary,
42 &(FstDictionary::curated() as Arc<dyn Dictionary>),
43 ) {
44 return 1;
45 }
46
47 let mut hasher = self.hasher_builder.build_hasher();
48
49 dictionary
50 .words_iter()
51 .for_each(|w| w.iter().for_each(|c| hasher.write_u32(*c as u32)));
52
53 hasher.finish()
54 }
55}
56
57impl PartialEq for MergedDictionary {
58 fn eq(&self, other: &Self) -> bool {
59 self.child_hashes == other.child_hashes
60 }
61}
62
63impl Default for MergedDictionary {
64 fn default() -> Self {
65 Self::new()
66 }
67}
68
69impl Dictionary for MergedDictionary {
70 fn get_correct_capitalization_of(&self, word: &[char]) -> Option<&'_ [char]> {
71 for child in &self.children {
72 if let Some(word) = child.get_correct_capitalization_of(word) {
73 return Some(word);
74 }
75 }
76 None
77 }
78
79 fn contains_word(&self, word: &[char]) -> bool {
80 for child in &self.children {
81 if child.contains_word(word) {
82 return true;
83 }
84 }
85 false
86 }
87
88 fn contains_exact_word(&self, word: &[char]) -> bool {
89 for child in &self.children {
90 if child.contains_exact_word(word) {
91 return true;
92 }
93 }
94 false
95 }
96
97 fn get_word_metadata(&self, word: &[char]) -> Option<Cow<'_, DictWordMetadata>> {
98 self.children
99 .iter()
100 .filter_map(|d| d.get_word_metadata(word))
101 .reduce(|acc, md| Cow::Owned(acc.or(&md)))
102 }
103
104 fn words_iter(&self) -> Box<dyn Iterator<Item = &'_ [char]> + Send + '_> {
105 Box::new(self.children.iter().flat_map(|c| c.words_iter()))
106 }
107
108 fn contains_word_str(&self, word: &str) -> bool {
109 let chars: CharString = word.chars().collect();
110 self.contains_word(&chars)
111 }
112
113 fn contains_exact_word_str(&self, word: &str) -> bool {
114 let chars: CharString = word.chars().collect();
115 self.contains_word(&chars)
116 }
117
118 fn get_word_metadata_str(&self, word: &str) -> Option<Cow<'_, DictWordMetadata>> {
119 let chars: CharString = word.chars().collect();
120 self.get_word_metadata(&chars)
121 }
122
123 fn fuzzy_match(
124 &'_ self,
125 word: &[char],
126 max_distance: u8,
127 max_results: usize,
128 ) -> Vec<FuzzyMatchResult<'_>> {
129 self.children
130 .iter()
131 .flat_map(|d| d.fuzzy_match(word, max_distance, max_results))
132 .sorted_by_key(|r| r.word)
133 .dedup_by(|a, b| a.word == b.word)
134 .sorted_by_key(|r| r.edit_distance)
135 .take(max_results)
136 .collect()
137 }
138
139 fn fuzzy_match_str(
140 &'_ self,
141 word: &str,
142 max_distance: u8,
143 max_results: usize,
144 ) -> Vec<FuzzyMatchResult<'_>> {
145 self.children
146 .iter()
147 .flat_map(|d| d.fuzzy_match_str(word, max_distance, max_results))
148 .sorted_by_key(|r| r.word)
149 .dedup_by(|a, b| a.word == b.word)
150 .sorted_by_key(|r| r.edit_distance)
151 .take(max_results)
152 .collect()
153 }
154
155 fn word_count(&self) -> usize {
156 self.children.iter().map(|d| d.word_count()).sum()
157 }
158
159 fn get_word_from_id(&self, id: &WordId) -> Option<&[char]> {
160 self.children
161 .iter()
162 .find_map(|dict| dict.get_word_from_id(id))
163 }
164
165 fn find_words_with_prefix(&self, prefix: &[char]) -> Vec<Cow<'_, [char]>> {
166 self.children
167 .iter()
168 .flat_map(|dict| dict.find_words_with_prefix(prefix))
169 .sorted()
170 .dedup()
171 .collect()
172 }
173
174 fn find_words_with_common_prefix(&self, word: &[char]) -> Vec<Cow<'_, [char]>> {
175 self.children
176 .iter()
177 .flat_map(|dict| dict.find_words_with_common_prefix(word))
178 .sorted()
179 .dedup()
180 .collect()
181 }
182}