harper_core/spell/
merged_dictionary.rs1use std::borrow::Cow;
2use std::hash::{BuildHasher, Hasher};
3use std::sync::Arc;
4
5use foldhash::quality::FixedState;
6use itertools::Itertools;
7
8use super::{FstDictionary, WordId};
9use super::{FuzzyMatchResult, dictionary::Dictionary};
10use crate::{CharString, DictWordMetadata};
11
12#[derive(Clone)]
18pub struct MergedDictionary {
19 children: Vec<Arc<dyn Dictionary>>,
20 hasher_builder: FixedState,
21 child_hashes: Vec<u64>,
22}
23
24impl MergedDictionary {
25 pub fn new() -> Self {
26 Self {
27 children: Vec::new(),
28 hasher_builder: FixedState::default(),
29 child_hashes: Vec::new(),
30 }
31 }
32
33 pub fn add_dictionary(&mut self, dictionary: Arc<dyn Dictionary>) {
34 self.child_hashes.push(self.hash_dictionary(&dictionary));
35 self.children.push(dictionary);
36 }
37
38 fn hash_dictionary(&self, dictionary: &Arc<dyn Dictionary>) -> u64 {
39 if Arc::ptr_eq(
41 dictionary,
42 &(FstDictionary::curated() as Arc<dyn Dictionary>),
43 ) {
44 return 1;
45 }
46
47 let mut hasher = self.hasher_builder.build_hasher();
48
49 dictionary
50 .words_iter()
51 .for_each(|w| w.iter().for_each(|c| hasher.write_u32(*c as u32)));
52
53 hasher.finish()
54 }
55}
56
57impl PartialEq for MergedDictionary {
58 fn eq(&self, other: &Self) -> bool {
59 self.child_hashes == other.child_hashes
60 }
61}
62
63impl Default for MergedDictionary {
64 fn default() -> Self {
65 Self::new()
66 }
67}
68
69impl Dictionary for MergedDictionary {
70 fn get_correct_capitalization_of(&self, word: &[char]) -> Option<&'_ [char]> {
71 for child in &self.children {
72 if let Some(word) = child.get_correct_capitalization_of(word) {
73 return Some(word);
74 }
75 }
76 None
77 }
78
79 fn contains_word(&self, word: &[char]) -> bool {
80 for child in &self.children {
81 if child.contains_word(word) {
82 return true;
83 }
84 }
85 false
86 }
87
88 fn contains_exact_word(&self, word: &[char]) -> bool {
89 for child in &self.children {
90 if child.contains_exact_word(word) {
91 return true;
92 }
93 }
94 false
95 }
96
97 fn get_word_metadata(&self, word: &[char]) -> Option<Cow<'_, DictWordMetadata>> {
98 let mut meta_iter = self
99 .children
100 .iter()
101 .filter_map(|d| d.get_word_metadata(word));
102
103 let first = meta_iter.next()?;
104
105 if let Some(second) = meta_iter.next() {
107 let mut first = first.into_owned();
109 first.merge(&second);
110 meta_iter.for_each(|additional_md| {
111 first.merge(&additional_md);
112 });
113
114 Some(Cow::Owned(first))
115 } else {
116 Some(first)
118 }
119 }
120
121 fn words_iter(&self) -> Box<dyn Iterator<Item = &'_ [char]> + Send + '_> {
122 Box::new(self.children.iter().flat_map(|c| c.words_iter()))
123 }
124
125 fn contains_word_str(&self, word: &str) -> bool {
126 let chars: CharString = word.chars().collect();
127 self.contains_word(&chars)
128 }
129
130 fn contains_exact_word_str(&self, word: &str) -> bool {
131 let chars: CharString = word.chars().collect();
132 self.contains_word(&chars)
133 }
134
135 fn get_word_metadata_str(&self, word: &str) -> Option<Cow<'_, DictWordMetadata>> {
136 let chars: CharString = word.chars().collect();
137 self.get_word_metadata(&chars)
138 }
139
140 fn fuzzy_match(
141 &'_ self,
142 word: &[char],
143 max_distance: u8,
144 max_results: usize,
145 ) -> Vec<FuzzyMatchResult<'_>> {
146 self.children
147 .iter()
148 .flat_map(|d| d.fuzzy_match(word, max_distance, max_results))
149 .sorted_by_key(|r| r.word)
150 .dedup_by(|a, b| a.word == b.word)
151 .sorted_by_key(|r| r.edit_distance)
152 .take(max_results)
153 .collect()
154 }
155
156 fn fuzzy_match_str(
157 &'_ self,
158 word: &str,
159 max_distance: u8,
160 max_results: usize,
161 ) -> Vec<FuzzyMatchResult<'_>> {
162 self.children
163 .iter()
164 .flat_map(|d| d.fuzzy_match_str(word, max_distance, max_results))
165 .sorted_by_key(|r| r.word)
166 .dedup_by(|a, b| a.word == b.word)
167 .sorted_by_key(|r| r.edit_distance)
168 .take(max_results)
169 .collect()
170 }
171
172 fn word_count(&self) -> usize {
173 self.children.iter().map(|d| d.word_count()).sum()
174 }
175
176 fn get_word_from_id(&self, id: &WordId) -> Option<&[char]> {
177 self.children
178 .iter()
179 .find_map(|dict| dict.get_word_from_id(id))
180 }
181
182 fn find_words_with_prefix(&self, prefix: &[char]) -> Vec<Cow<'_, [char]>> {
183 self.children
184 .iter()
185 .flat_map(|dict| dict.find_words_with_prefix(prefix))
186 .sorted()
187 .dedup()
188 .collect()
189 }
190
191 fn find_words_with_common_prefix(&self, word: &[char]) -> Vec<Cow<'_, [char]>> {
192 self.children
193 .iter()
194 .flat_map(|dict| dict.find_words_with_common_prefix(word))
195 .sorted()
196 .dedup()
197 .collect()
198 }
199}