Skip to main content

harper_core/spell/
merged_dictionary.rs

1use std::borrow::Cow;
2use std::hash::{BuildHasher, Hasher};
3use std::sync::Arc;
4
5use foldhash::quality::FixedState;
6use itertools::Itertools;
7
8use super::{FstDictionary, WordId};
9use super::{FuzzyMatchResult, dictionary::Dictionary};
10use crate::{CharString, DictWordMetadata};
11
12/// A simple wrapper over [`Dictionary`] that allows
13/// one to merge multiple dictionaries without copying.
14///
15/// In cases where more than one dictionary contains a word, data in the first
16/// dictionary inserted will be returned.
17#[derive(Clone)]
18pub struct MergedDictionary {
19    children: Vec<Arc<dyn Dictionary>>,
20    hasher_builder: FixedState,
21    child_hashes: Vec<u64>,
22}
23
24impl MergedDictionary {
25    pub fn new() -> Self {
26        Self {
27            children: Vec::new(),
28            hasher_builder: FixedState::default(),
29            child_hashes: Vec::new(),
30        }
31    }
32
33    pub fn add_dictionary(&mut self, dictionary: Arc<dyn Dictionary>) {
34        self.child_hashes.push(self.hash_dictionary(&dictionary));
35        self.children.push(dictionary);
36    }
37
38    fn hash_dictionary(&self, dictionary: &Arc<dyn Dictionary>) -> u64 {
39        // Hashing the curated dictionary isn't super helpful and takes a long time.
40        if Arc::ptr_eq(
41            dictionary,
42            &(FstDictionary::curated() as Arc<dyn Dictionary>),
43        ) {
44            return 1;
45        }
46
47        let mut hasher = self.hasher_builder.build_hasher();
48
49        dictionary
50            .words_iter()
51            .for_each(|w| w.iter().for_each(|c| hasher.write_u32(*c as u32)));
52
53        hasher.finish()
54    }
55}
56
57impl PartialEq for MergedDictionary {
58    fn eq(&self, other: &Self) -> bool {
59        self.child_hashes == other.child_hashes
60    }
61}
62
63impl Default for MergedDictionary {
64    fn default() -> Self {
65        Self::new()
66    }
67}
68
69impl Dictionary for MergedDictionary {
70    fn get_correct_capitalization_of(&self, word: &[char]) -> Option<&'_ [char]> {
71        for child in &self.children {
72            if let Some(word) = child.get_correct_capitalization_of(word) {
73                return Some(word);
74            }
75        }
76        None
77    }
78
79    fn contains_word(&self, word: &[char]) -> bool {
80        for child in &self.children {
81            if child.contains_word(word) {
82                return true;
83            }
84        }
85        false
86    }
87
88    fn contains_exact_word(&self, word: &[char]) -> bool {
89        for child in &self.children {
90            if child.contains_exact_word(word) {
91                return true;
92            }
93        }
94        false
95    }
96
97    fn get_word_metadata(&self, word: &[char]) -> Option<Cow<'_, DictWordMetadata>> {
98        let mut meta_iter = self
99            .children
100            .iter()
101            .filter_map(|d| d.get_word_metadata(word));
102
103        let first = meta_iter.next()?;
104
105        // Check if multiple entries were found for the word.
106        if let Some(second) = meta_iter.next() {
107            // If so, merge them.
108            let mut first = first.into_owned();
109            first.merge(&second);
110            meta_iter.for_each(|additional_md| {
111                first.merge(&additional_md);
112            });
113
114            Some(Cow::Owned(first))
115        } else {
116            // If not, return the sole found entry.
117            Some(first)
118        }
119    }
120
121    fn words_iter(&self) -> Box<dyn Iterator<Item = &'_ [char]> + Send + '_> {
122        Box::new(self.children.iter().flat_map(|c| c.words_iter()))
123    }
124
125    fn contains_word_str(&self, word: &str) -> bool {
126        let chars: CharString = word.chars().collect();
127        self.contains_word(&chars)
128    }
129
130    fn contains_exact_word_str(&self, word: &str) -> bool {
131        let chars: CharString = word.chars().collect();
132        self.contains_word(&chars)
133    }
134
135    fn get_word_metadata_str(&self, word: &str) -> Option<Cow<'_, DictWordMetadata>> {
136        let chars: CharString = word.chars().collect();
137        self.get_word_metadata(&chars)
138    }
139
140    fn fuzzy_match(
141        &'_ self,
142        word: &[char],
143        max_distance: u8,
144        max_results: usize,
145    ) -> Vec<FuzzyMatchResult<'_>> {
146        self.children
147            .iter()
148            .flat_map(|d| d.fuzzy_match(word, max_distance, max_results))
149            .sorted_by_key(|r| r.word)
150            .dedup_by(|a, b| a.word == b.word)
151            .sorted_by_key(|r| r.edit_distance)
152            .take(max_results)
153            .collect()
154    }
155
156    fn fuzzy_match_str(
157        &'_ self,
158        word: &str,
159        max_distance: u8,
160        max_results: usize,
161    ) -> Vec<FuzzyMatchResult<'_>> {
162        self.children
163            .iter()
164            .flat_map(|d| d.fuzzy_match_str(word, max_distance, max_results))
165            .sorted_by_key(|r| r.word)
166            .dedup_by(|a, b| a.word == b.word)
167            .sorted_by_key(|r| r.edit_distance)
168            .take(max_results)
169            .collect()
170    }
171
172    fn word_count(&self) -> usize {
173        self.children.iter().map(|d| d.word_count()).sum()
174    }
175
176    fn get_word_from_id(&self, id: &WordId) -> Option<&[char]> {
177        self.children
178            .iter()
179            .find_map(|dict| dict.get_word_from_id(id))
180    }
181
182    fn find_words_with_prefix(&self, prefix: &[char]) -> Vec<Cow<'_, [char]>> {
183        self.children
184            .iter()
185            .flat_map(|dict| dict.find_words_with_prefix(prefix))
186            .sorted()
187            .dedup()
188            .collect()
189    }
190
191    fn find_words_with_common_prefix(&self, word: &[char]) -> Vec<Cow<'_, [char]>> {
192        self.children
193            .iter()
194            .flat_map(|dict| dict.find_words_with_common_prefix(word))
195            .sorted()
196            .dedup()
197            .collect()
198    }
199}