harper_core/spell/
merged_dictionary.rs

1use std::hash::{BuildHasher, Hasher};
2use std::sync::Arc;
3
4use foldhash::quality::FixedState;
5use itertools::Itertools;
6
7use super::{FstDictionary, WordId};
8use super::{FuzzyMatchResult, dictionary::Dictionary};
9use crate::{CharString, WordMetadata};
10
11/// A simple wrapper over [`Dictionary`] that allows
12/// one to merge multiple dictionaries without copying.
13///
14/// In cases where more than one dictionary contains a word, data in the first
15/// dictionary inserted will be returned.
16#[derive(Clone)]
17pub struct MergedDictionary {
18    children: Vec<Arc<dyn Dictionary>>,
19    hasher_builder: FixedState,
20    child_hashes: Vec<u64>,
21}
22
23impl MergedDictionary {
24    pub fn new() -> Self {
25        Self {
26            children: Vec::new(),
27            hasher_builder: FixedState::default(),
28            child_hashes: Vec::new(),
29        }
30    }
31
32    pub fn add_dictionary(&mut self, dictionary: Arc<dyn Dictionary>) {
33        self.child_hashes.push(self.hash_dictionary(&dictionary));
34        self.children.push(dictionary);
35    }
36
37    fn hash_dictionary(&self, dictionary: &Arc<dyn Dictionary>) -> u64 {
38        // Hashing the curated dictionary isn't super helpful and takes a long time.
39        if Arc::ptr_eq(
40            dictionary,
41            &(FstDictionary::curated() as Arc<dyn Dictionary>),
42        ) {
43            return 1;
44        }
45
46        let mut hasher = self.hasher_builder.build_hasher();
47
48        dictionary
49            .words_iter()
50            .for_each(|w| w.iter().for_each(|c| hasher.write_u32(*c as u32)));
51
52        hasher.finish()
53    }
54}
55
56impl PartialEq for MergedDictionary {
57    fn eq(&self, other: &Self) -> bool {
58        self.child_hashes == other.child_hashes
59    }
60}
61
62impl Default for MergedDictionary {
63    fn default() -> Self {
64        Self::new()
65    }
66}
67
68impl Dictionary for MergedDictionary {
69    fn get_correct_capitalization_of(&self, word: &[char]) -> Option<&'_ [char]> {
70        for child in &self.children {
71            if let Some(word) = child.get_correct_capitalization_of(word) {
72                return Some(word);
73            }
74        }
75        None
76    }
77
78    fn contains_word(&self, word: &[char]) -> bool {
79        for child in &self.children {
80            if child.contains_word(word) {
81                return true;
82            }
83        }
84        false
85    }
86
87    fn contains_exact_word(&self, word: &[char]) -> bool {
88        for child in &self.children {
89            if child.contains_exact_word(word) {
90                return true;
91            }
92        }
93        false
94    }
95
96    fn get_word_metadata(&self, word: &[char]) -> Option<&WordMetadata> {
97        for child in &self.children {
98            if let Some(found_item) = child.get_word_metadata(word) {
99                return Some(found_item);
100            }
101        }
102
103        None
104    }
105
106    fn words_iter(&self) -> Box<dyn Iterator<Item = &'_ [char]> + Send + '_> {
107        Box::new(self.children.iter().flat_map(|c| c.words_iter()))
108    }
109
110    fn contains_word_str(&self, word: &str) -> bool {
111        let chars: CharString = word.chars().collect();
112        self.contains_word(&chars)
113    }
114
115    fn contains_exact_word_str(&self, word: &str) -> bool {
116        let chars: CharString = word.chars().collect();
117        self.contains_word(&chars)
118    }
119
120    fn get_word_metadata_str(&self, word: &str) -> Option<&WordMetadata> {
121        let chars: CharString = word.chars().collect();
122        self.get_word_metadata(&chars)
123    }
124
125    fn fuzzy_match(
126        &self,
127        word: &[char],
128        max_distance: u8,
129        max_results: usize,
130    ) -> Vec<FuzzyMatchResult> {
131        self.children
132            .iter()
133            .flat_map(|d| d.fuzzy_match(word, max_distance, max_results))
134            .sorted_by_key(|r| r.edit_distance)
135            .take(max_results)
136            .collect()
137    }
138
139    fn fuzzy_match_str(
140        &self,
141        word: &str,
142        max_distance: u8,
143        max_results: usize,
144    ) -> Vec<FuzzyMatchResult> {
145        self.children
146            .iter()
147            .flat_map(|d| d.fuzzy_match_str(word, max_distance, max_results))
148            .sorted_by_key(|r| r.edit_distance)
149            .take(max_results)
150            .collect()
151    }
152
153    fn word_count(&self) -> usize {
154        self.children.iter().map(|d| d.word_count()).sum()
155    }
156
157    fn get_word_from_id(&self, id: &WordId) -> Option<&[char]> {
158        self.children
159            .iter()
160            .find_map(|dict| dict.get_word_from_id(id))
161    }
162}