harper_core/spell/
merged_dictionary.rs1use std::borrow::Cow;
2use std::hash::{BuildHasher, Hasher};
3use std::sync::Arc;
4
5use foldhash::quality::FixedState;
6use itertools::Itertools;
7
8use super::{FstDictionary, FuzzyMatchResult, WordId, dictionary::Dictionary};
9use crate::{CharString, DictWordMetadata};
10
11#[derive(Clone)]
17pub struct MergedDictionary {
18 children: Vec<Arc<dyn Dictionary>>,
19 hasher_builder: FixedState,
20 child_hashes: Vec<u64>,
21}
22
23impl MergedDictionary {
24 pub fn new() -> Self {
25 Self {
26 children: Vec::new(),
27 hasher_builder: FixedState::default(),
28 child_hashes: Vec::new(),
29 }
30 }
31
32 pub fn add_dictionary(&mut self, dictionary: Arc<dyn Dictionary>) {
33 self.child_hashes.push(self.hash_dictionary(&dictionary));
34 self.children.push(dictionary);
35 }
36
37 fn hash_dictionary(&self, dictionary: &Arc<dyn Dictionary>) -> u64 {
38 if Arc::ptr_eq(
40 dictionary,
41 &(FstDictionary::curated() as Arc<dyn Dictionary>),
42 ) {
43 return 1;
44 }
45
46 let mut hasher = self.hasher_builder.build_hasher();
47
48 dictionary
49 .words_iter()
50 .for_each(|w| w.iter().for_each(|c| hasher.write_u32(*c as u32)));
51
52 hasher.finish()
53 }
54}
55
56impl PartialEq for MergedDictionary {
57 fn eq(&self, other: &Self) -> bool {
58 self.child_hashes == other.child_hashes
59 }
60}
61
62impl Default for MergedDictionary {
63 fn default() -> Self {
64 Self::new()
65 }
66}
67
68impl Dictionary for MergedDictionary {
69 fn get_correct_capitalization_of(&self, word: &[char]) -> Option<&'_ [char]> {
70 for child in &self.children {
71 if let Some(word) = child.get_correct_capitalization_of(word) {
72 return Some(word);
73 }
74 }
75 None
76 }
77
78 fn contains_word(&self, word: &[char]) -> bool {
79 for child in &self.children {
80 if child.contains_word(word) {
81 return true;
82 }
83 }
84 false
85 }
86
87 fn contains_exact_word(&self, word: &[char]) -> bool {
88 for child in &self.children {
89 if child.contains_exact_word(word) {
90 return true;
91 }
92 }
93 false
94 }
95
96 fn get_word_metadata(&self, word: &[char]) -> Option<Cow<'_, DictWordMetadata>> {
97 let mut meta_iter = self
98 .children
99 .iter()
100 .filter_map(|d| d.get_word_metadata(word));
101
102 let first = meta_iter.next()?;
103
104 if let Some(second) = meta_iter.next() {
106 let mut first = first.into_owned();
108 first.merge(&second);
109 meta_iter.for_each(|additional_md| {
110 first.merge(&additional_md);
111 });
112
113 Some(Cow::Owned(first))
114 } else {
115 Some(first)
117 }
118 }
119
120 fn words_iter(&self) -> Box<dyn Iterator<Item = &'_ [char]> + Send + '_> {
121 Box::new(self.children.iter().flat_map(|c| c.words_iter()))
122 }
123
124 fn contains_word_str(&self, word: &str) -> bool {
125 let chars: CharString = word.chars().collect();
126 self.contains_word(&chars)
127 }
128
129 fn contains_exact_word_str(&self, word: &str) -> bool {
130 let chars: CharString = word.chars().collect();
131 self.contains_exact_word(&chars)
132 }
133
134 fn get_word_metadata_str(&self, word: &str) -> Option<Cow<'_, DictWordMetadata>> {
135 let chars: CharString = word.chars().collect();
136 self.get_word_metadata(&chars)
137 }
138
139 fn fuzzy_match(
140 &'_ self,
141 word: &[char],
142 max_distance: u8,
143 max_results: usize,
144 ) -> Vec<FuzzyMatchResult<'_>> {
145 self.children
146 .iter()
147 .flat_map(|d| d.fuzzy_match(word, max_distance, max_results))
148 .sorted_by_key(|r| r.word)
149 .dedup_by(|a, b| a.word == b.word)
150 .sorted_by_key(|r| r.edit_distance)
151 .take(max_results)
152 .collect()
153 }
154
155 fn fuzzy_match_str(
156 &'_ self,
157 word: &str,
158 max_distance: u8,
159 max_results: usize,
160 ) -> Vec<FuzzyMatchResult<'_>> {
161 self.children
162 .iter()
163 .flat_map(|d| d.fuzzy_match_str(word, max_distance, max_results))
164 .sorted_by_key(|r| r.word)
165 .dedup_by(|a, b| a.word == b.word)
166 .sorted_by_key(|r| r.edit_distance)
167 .take(max_results)
168 .collect()
169 }
170
171 fn word_count(&self) -> usize {
172 self.children.iter().map(|d| d.word_count()).sum()
173 }
174
175 fn get_word_from_id(&self, id: &WordId) -> Option<&[char]> {
176 self.children
177 .iter()
178 .find_map(|dict| dict.get_word_from_id(id))
179 }
180
181 fn find_words_with_prefix(&self, prefix: &[char]) -> Vec<Cow<'_, [char]>> {
182 self.children
183 .iter()
184 .flat_map(|dict| dict.find_words_with_prefix(prefix))
185 .sorted()
186 .dedup()
187 .collect()
188 }
189
190 fn find_words_with_common_prefix(&self, word: &[char]) -> Vec<Cow<'_, [char]>> {
191 self.children
192 .iter()
193 .flat_map(|dict| dict.find_words_with_common_prefix(word))
194 .sorted()
195 .dedup()
196 .collect()
197 }
198}
199
200#[cfg(test)]
201mod tests {
202 use std::sync::Arc;
203
204 use crate::DictWordMetadata;
205 use crate::spell::{Dictionary, MergedDictionary, MutableDictionary};
206
207 #[test]
208 fn merged_contains_exact_word_str_is_case_sensitive() {
209 let mut user_dict = MutableDictionary::new();
210 user_dict.append_word_str("Foo", DictWordMetadata::default());
211
212 let mut merged = MergedDictionary::new();
213 merged.add_dictionary(Arc::new(user_dict));
214
215 assert!(merged.contains_word_str("Foo"));
216 assert!(merged.contains_word_str("foo"));
217
218 assert!(merged.contains_exact_word(&['F', 'o', 'o']));
219 assert!(!merged.contains_exact_word(&['f', 'o', 'o']));
220
221 assert!(merged.contains_exact_word_str("Foo"));
222 assert!(!merged.contains_exact_word_str("foo"));
223 }
224}