json_surf/
fuzzy.rs

1use std::path::PathBuf;
2use std::convert::{From, TryFrom};
3
4
5use symspell;
6use symspell::{AsciiStringStrategy, SymSpell, Verbosity};
7use crate::prelude::*;
8
9/// One word fuzzier e.g. name city country
10pub struct FuzzyWord {
11    corpus: Option<Vec<FuzzyConfig>>,
12    engine: SymSpell<AsciiStringStrategy>,
13}
14
15impl FuzzyWord {
16    pub fn new(corpus: Option<Vec<FuzzyConfig>>) -> Self {
17        let mut engine: SymSpell<AsciiStringStrategy> = SymSpell::default();
18        if let None = corpus {
19            return Self {
20                corpus,
21                engine,
22            };
23        }
24
25        let corpus = corpus.unwrap();
26        if corpus.is_empty() {
27            let corpus: Option<Vec<FuzzyConfig>> = None;
28            return Self {
29                corpus,
30                engine,
31            };
32        }
33
34        for config in &corpus {
35            engine.load_dictionary(
36                &config.corpus.as_path().to_string_lossy().to_string(),
37                config.term_index,
38                config.count_index,
39                &config.separator,
40            );
41        };
42        let corpus = Some(corpus);
43        Self {
44            corpus,
45            engine,
46        }
47    }
48    pub fn corpus(&self) -> Option<&Vec<FuzzyConfig>> {
49        self.corpus.as_ref()
50    }
51    pub fn lookup(&self, correct: &str) -> Option<Vec<String>> {
52        let suggestions = self.engine.lookup(correct, Verbosity::Top, 2);
53        if suggestions.is_empty() {
54            return None;
55        };
56        let mut result = Vec::<String>::with_capacity(suggestions.len());
57        for suggestion in suggestions {
58            result.push(suggestion.term);
59        };
60        Some(result)
61    }
62}
63
64/// By default bootstrap from default config
65impl Default for FuzzyWord {
66    fn default() -> Self {
67        let config = FuzzyConfig::default();
68        let corpus = Some(vec![config]);
69        FuzzyWord::new(corpus)
70    }
71}
72
73/// Try to bootstrap from dir or path
74impl TryFrom<&str> for FuzzyWord {
75    type Error = IndexError;
76    fn try_from(path: &str) -> Result<Self, Self::Error> {
77        let corpus = PathBuf::from(path);
78        let paths = if corpus.is_dir() {
79            ls(path)?
80        } else {
81            vec![PathBuf::from(path)]
82        };
83        let mut config = Vec::<FuzzyConfig>::with_capacity(paths.len());
84        for path in paths {
85            config.push(FuzzyConfig::from(path));
86        };
87        let corpus = Some(config);
88        Ok(Self::new(
89            corpus,
90        ))
91    }
92}
93
94
95/// Load multiple dictionary entries from a file of word/frequency count pairs.
96/// This goes for bootstrapping symspell
97/// * `corpus` - file paths.
98/// * `term_index` - The column position of the word.
99/// * `count_index` - The column position of the frequency count.
100/// * `separator` - Separator between word and frequency
101#[derive(Clone, Debug, PartialEq)]
102pub struct FuzzyConfig {
103    corpus: PathBuf,
104    term_index: i64,
105    count_index: i64,
106    separator: String,
107}
108
109/// Creates an instance
110impl FuzzyConfig {
111    pub fn new(corpus: PathBuf, term_index: i64, count_index: i64, separator: String) -> Self {
112        Self {
113            corpus,
114            term_index,
115            count_index,
116            separator,
117        }
118    }
119}
120
121/// By default load names from corpus directory
122impl Default for FuzzyConfig {
123    fn default() -> Self {
124        let path = "corpus/frequency_names.txt";
125        FuzzyConfig::from(path)
126    }
127}
128
129impl From<&str> for FuzzyConfig {
130    fn from(path: &str) -> Self {
131        let corpus = PathBuf::from(path);
132        FuzzyConfig::from(corpus)
133    }
134}
135
136impl From<PathBuf> for FuzzyConfig {
137    fn from(corpus: PathBuf) -> Self {
138        let term_index = 0i64;
139        let count_index = 1i64;
140        let separator = " ".to_string();
141        Self::new(
142            corpus,
143            term_index,
144            count_index,
145            separator,
146        )
147    }
148}
149
150
151#[cfg(test)]
152mod tests {
153    use super::*;
154    use std::path::Path;
155    use std::fs::{create_dir, remove_dir_all, File};
156
157    #[test]
158    fn validate_default_fuzzy_config() {
159        let corpus = PathBuf::from("corpus/frequency_names.txt");
160        let term_index = 0i64;
161        let count_index = 1i64;
162        let separator = " ".to_string();
163        let expected = FuzzyConfig::new(
164            corpus,
165            term_index,
166            count_index,
167            separator,
168        );
169        let computed = FuzzyConfig::default();
170        assert_eq!(computed, expected);
171    }
172
173    #[test]
174    fn validate_default_fuzzy() {
175        let expected = Some(vec![FuzzyConfig::default()]);
176        let computed = FuzzyWord::default();
177        assert_eq!(&expected.as_ref(), &computed.corpus());
178    }
179
180    #[test]
181    fn validate_default_fuzzy_word() {
182        let fuzz = FuzzyWord::default();
183        let suggestions = fuzz.lookup("surav");
184        assert!(suggestions.is_some());
185        let suggestions = suggestions.unwrap();
186        assert_eq!(suggestions, vec!["saurav".to_string()]);
187    }
188
189    #[test]
190    fn validate_fuzzy_word() {
191        let word = FuzzyWord::new(None);
192        assert_eq!(word.corpus(), None)
193    }
194
195    #[test]
196    fn test_for_empty_corpus() {
197        let home = ".test_for_empty_corpus";
198        let index_path = home;
199        let path = Path::new(&index_path);
200        assert!(!path.exists());
201        let result = create_dir(path);
202        assert!(result.is_ok());
203        let path = format!("{}/{}", home, "foo.txt");
204        let path = Path::new(&path);
205        {
206            let result = File::create(&path);
207            assert!(result.is_ok());
208        }
209
210        let config = FuzzyConfig::new(PathBuf::from(path), 1, 1, "".to_string());
211        let corpus = Some(vec![config]);
212
213        let _ = FuzzyWord::new(corpus);
214        let _ = remove_dir_all(index_path);
215        // let _ = remove_dir_all(home);
216    }
217}