harper_core/spell/
merged_dictionary.rs

1use std::borrow::Cow;
2use std::hash::{BuildHasher, Hasher};
3use std::sync::Arc;
4
5use foldhash::quality::FixedState;
6use itertools::Itertools;
7
8use super::{FstDictionary, WordId};
9use super::{FuzzyMatchResult, dictionary::Dictionary};
10use crate::{CharString, DictWordMetadata};
11
12/// A simple wrapper over [`Dictionary`] that allows
13/// one to merge multiple dictionaries without copying.
14///
15/// In cases where more than one dictionary contains a word, data in the first
16/// dictionary inserted will be returned.
17#[derive(Clone)]
18pub struct MergedDictionary {
19    children: Vec<Arc<dyn Dictionary>>,
20    hasher_builder: FixedState,
21    child_hashes: Vec<u64>,
22}
23
24impl MergedDictionary {
25    pub fn new() -> Self {
26        Self {
27            children: Vec::new(),
28            hasher_builder: FixedState::default(),
29            child_hashes: Vec::new(),
30        }
31    }
32
33    pub fn add_dictionary(&mut self, dictionary: Arc<dyn Dictionary>) {
34        self.child_hashes.push(self.hash_dictionary(&dictionary));
35        self.children.push(dictionary);
36    }
37
38    fn hash_dictionary(&self, dictionary: &Arc<dyn Dictionary>) -> u64 {
39        // Hashing the curated dictionary isn't super helpful and takes a long time.
40        if Arc::ptr_eq(
41            dictionary,
42            &(FstDictionary::curated() as Arc<dyn Dictionary>),
43        ) {
44            return 1;
45        }
46
47        let mut hasher = self.hasher_builder.build_hasher();
48
49        dictionary
50            .words_iter()
51            .for_each(|w| w.iter().for_each(|c| hasher.write_u32(*c as u32)));
52
53        hasher.finish()
54    }
55}
56
57impl PartialEq for MergedDictionary {
58    fn eq(&self, other: &Self) -> bool {
59        self.child_hashes == other.child_hashes
60    }
61}
62
63impl Default for MergedDictionary {
64    fn default() -> Self {
65        Self::new()
66    }
67}
68
69impl Dictionary for MergedDictionary {
70    fn get_correct_capitalization_of(&self, word: &[char]) -> Option<&'_ [char]> {
71        for child in &self.children {
72            if let Some(word) = child.get_correct_capitalization_of(word) {
73                return Some(word);
74            }
75        }
76        None
77    }
78
79    fn contains_word(&self, word: &[char]) -> bool {
80        for child in &self.children {
81            if child.contains_word(word) {
82                return true;
83            }
84        }
85        false
86    }
87
88    fn contains_exact_word(&self, word: &[char]) -> bool {
89        for child in &self.children {
90            if child.contains_exact_word(word) {
91                return true;
92            }
93        }
94        false
95    }
96
97    fn get_word_metadata(&self, word: &[char]) -> Option<Cow<'_, DictWordMetadata>> {
98        self.children
99            .iter()
100            .filter_map(|d| d.get_word_metadata(word))
101            .reduce(|acc, md| Cow::Owned(acc.or(&md)))
102    }
103
104    fn words_iter(&self) -> Box<dyn Iterator<Item = &'_ [char]> + Send + '_> {
105        Box::new(self.children.iter().flat_map(|c| c.words_iter()))
106    }
107
108    fn contains_word_str(&self, word: &str) -> bool {
109        let chars: CharString = word.chars().collect();
110        self.contains_word(&chars)
111    }
112
113    fn contains_exact_word_str(&self, word: &str) -> bool {
114        let chars: CharString = word.chars().collect();
115        self.contains_word(&chars)
116    }
117
118    fn get_word_metadata_str(&self, word: &str) -> Option<Cow<'_, DictWordMetadata>> {
119        let chars: CharString = word.chars().collect();
120        self.get_word_metadata(&chars)
121    }
122
123    fn fuzzy_match(
124        &'_ self,
125        word: &[char],
126        max_distance: u8,
127        max_results: usize,
128    ) -> Vec<FuzzyMatchResult<'_>> {
129        self.children
130            .iter()
131            .flat_map(|d| d.fuzzy_match(word, max_distance, max_results))
132            .sorted_by_key(|r| r.word)
133            .dedup_by(|a, b| a.word == b.word)
134            .sorted_by_key(|r| r.edit_distance)
135            .take(max_results)
136            .collect()
137    }
138
139    fn fuzzy_match_str(
140        &'_ self,
141        word: &str,
142        max_distance: u8,
143        max_results: usize,
144    ) -> Vec<FuzzyMatchResult<'_>> {
145        self.children
146            .iter()
147            .flat_map(|d| d.fuzzy_match_str(word, max_distance, max_results))
148            .sorted_by_key(|r| r.word)
149            .dedup_by(|a, b| a.word == b.word)
150            .sorted_by_key(|r| r.edit_distance)
151            .take(max_results)
152            .collect()
153    }
154
155    fn word_count(&self) -> usize {
156        self.children.iter().map(|d| d.word_count()).sum()
157    }
158
159    fn get_word_from_id(&self, id: &WordId) -> Option<&[char]> {
160        self.children
161            .iter()
162            .find_map(|dict| dict.get_word_from_id(id))
163    }
164
165    fn find_words_with_prefix(&self, prefix: &[char]) -> Vec<Cow<'_, [char]>> {
166        self.children
167            .iter()
168            .flat_map(|dict| dict.find_words_with_prefix(prefix))
169            .sorted()
170            .dedup()
171            .collect()
172    }
173
174    fn find_words_with_common_prefix(&self, word: &[char]) -> Vec<Cow<'_, [char]>> {
175        self.children
176            .iter()
177            .flat_map(|dict| dict.find_words_with_common_prefix(word))
178            .sorted()
179            .dedup()
180            .collect()
181    }
182}