harper_core/spell/
merged_dictionary.rs1use std::hash::{BuildHasher, Hasher};
2use std::sync::Arc;
3
4use foldhash::quality::FixedState;
5use itertools::Itertools;
6
7use super::{FstDictionary, WordId};
8use super::{FuzzyMatchResult, dictionary::Dictionary};
9use crate::{CharString, WordMetadata};
10
11#[derive(Clone)]
17pub struct MergedDictionary {
18 children: Vec<Arc<dyn Dictionary>>,
19 hasher_builder: FixedState,
20 child_hashes: Vec<u64>,
21}
22
23impl MergedDictionary {
24 pub fn new() -> Self {
25 Self {
26 children: Vec::new(),
27 hasher_builder: FixedState::default(),
28 child_hashes: Vec::new(),
29 }
30 }
31
32 pub fn add_dictionary(&mut self, dictionary: Arc<dyn Dictionary>) {
33 self.child_hashes.push(self.hash_dictionary(&dictionary));
34 self.children.push(dictionary);
35 }
36
37 fn hash_dictionary(&self, dictionary: &Arc<dyn Dictionary>) -> u64 {
38 if Arc::ptr_eq(
40 dictionary,
41 &(FstDictionary::curated() as Arc<dyn Dictionary>),
42 ) {
43 return 1;
44 }
45
46 let mut hasher = self.hasher_builder.build_hasher();
47
48 dictionary
49 .words_iter()
50 .for_each(|w| w.iter().for_each(|c| hasher.write_u32(*c as u32)));
51
52 hasher.finish()
53 }
54}
55
56impl PartialEq for MergedDictionary {
57 fn eq(&self, other: &Self) -> bool {
58 self.child_hashes == other.child_hashes
59 }
60}
61
62impl Default for MergedDictionary {
63 fn default() -> Self {
64 Self::new()
65 }
66}
67
68impl Dictionary for MergedDictionary {
69 fn get_correct_capitalization_of(&self, word: &[char]) -> Option<&'_ [char]> {
70 for child in &self.children {
71 if let Some(word) = child.get_correct_capitalization_of(word) {
72 return Some(word);
73 }
74 }
75 None
76 }
77
78 fn contains_word(&self, word: &[char]) -> bool {
79 for child in &self.children {
80 if child.contains_word(word) {
81 return true;
82 }
83 }
84 false
85 }
86
87 fn contains_exact_word(&self, word: &[char]) -> bool {
88 for child in &self.children {
89 if child.contains_exact_word(word) {
90 return true;
91 }
92 }
93 false
94 }
95
96 fn get_word_metadata(&self, word: &[char]) -> Option<&WordMetadata> {
97 for child in &self.children {
98 if let Some(found_item) = child.get_word_metadata(word) {
99 return Some(found_item);
100 }
101 }
102
103 None
104 }
105
106 fn words_iter(&self) -> Box<dyn Iterator<Item = &'_ [char]> + Send + '_> {
107 Box::new(self.children.iter().flat_map(|c| c.words_iter()))
108 }
109
110 fn contains_word_str(&self, word: &str) -> bool {
111 let chars: CharString = word.chars().collect();
112 self.contains_word(&chars)
113 }
114
115 fn contains_exact_word_str(&self, word: &str) -> bool {
116 let chars: CharString = word.chars().collect();
117 self.contains_word(&chars)
118 }
119
120 fn get_word_metadata_str(&self, word: &str) -> Option<&WordMetadata> {
121 let chars: CharString = word.chars().collect();
122 self.get_word_metadata(&chars)
123 }
124
125 fn fuzzy_match(
126 &self,
127 word: &[char],
128 max_distance: u8,
129 max_results: usize,
130 ) -> Vec<FuzzyMatchResult> {
131 self.children
132 .iter()
133 .flat_map(|d| d.fuzzy_match(word, max_distance, max_results))
134 .sorted_by_key(|r| r.edit_distance)
135 .take(max_results)
136 .collect()
137 }
138
139 fn fuzzy_match_str(
140 &self,
141 word: &str,
142 max_distance: u8,
143 max_results: usize,
144 ) -> Vec<FuzzyMatchResult> {
145 self.children
146 .iter()
147 .flat_map(|d| d.fuzzy_match_str(word, max_distance, max_results))
148 .sorted_by_key(|r| r.edit_distance)
149 .take(max_results)
150 .collect()
151 }
152
153 fn word_count(&self) -> usize {
154 self.children.iter().map(|d| d.word_count()).sum()
155 }
156
157 fn get_word_from_id(&self, id: &WordId) -> Option<&[char]> {
158 self.children
159 .iter()
160 .find_map(|dict| dict.get_word_from_id(id))
161 }
162}