Skip to main content

symspell/
string_strategy.rs

1#[cfg(not(target_arch = "wasm32"))]
2use unidecode::unidecode;
3
4pub trait StringStrategy: Clone + Default {
5    fn new() -> Self;
6    fn prepare(&self, s: &str) -> String;
7    fn len(&self, s: &str) -> usize;
8    fn remove(&self, s: &str, index: usize) -> String;
9    fn slice(&self, s: &str, start: usize, end: usize) -> String;
10    fn suffix(&self, s: &str, start: usize) -> String;
11    fn at(&self, s: &str, i: isize) -> Option<char>;
12}
13
14#[cfg(not(target_arch = "wasm32"))]
15#[derive(Clone)]
16#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
17#[derive(Default)]
18pub struct AsciiStringStrategy {}
19
20#[cfg(not(target_arch = "wasm32"))]
21impl StringStrategy for AsciiStringStrategy {
22    fn new() -> Self {
23        Self {}
24    }
25
26    fn prepare(&self, s: &str) -> String {
27        unidecode(s)
28    }
29
30    fn len(&self, s: &str) -> usize {
31        s.len()
32    }
33
34    fn remove(&self, s: &str, index: usize) -> String {
35        let mut x = s.to_string();
36        x.remove(index);
37        x
38    }
39
40    fn slice(&self, s: &str, start: usize, end: usize) -> String {
41        s[start..end].to_string()
42    }
43
44    fn suffix(&self, s: &str, start: usize) -> String {
45        self.slice(s, start, s.len())
46    }
47
48    fn at(&self, s: &str, i: isize) -> Option<char> {
49        if i < 0 || i >= s.len() as isize {
50            return None;
51        }
52
53        Some(s.as_bytes()[i as usize] as char)
54    }
55}
56
57// backward compatibility on typo
58pub type UnicodeiStringStrategy = UnicodeStringStrategy;
59
60#[derive(Clone)]
61#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
62#[derive(Default)]
63pub struct UnicodeStringStrategy {}
64
65impl StringStrategy for UnicodeStringStrategy {
66    fn new() -> Self {
67        Self {}
68    }
69
70    fn prepare(&self, s: &str) -> String {
71        s.to_string()
72    }
73
74    fn len(&self, s: &str) -> usize {
75        s.chars().count()
76    }
77
78    fn remove(&self, s: &str, index: usize) -> String {
79        s.chars()
80            .enumerate()
81            .filter(|(ii, _)| ii != &index)
82            .map(|(_, ch)| ch)
83            .collect()
84    }
85
86    fn slice(&self, s: &str, start: usize, end: usize) -> String {
87        s.chars().skip(start).take(end - start).collect()
88    }
89
90    fn suffix(&self, s: &str, start: usize) -> String {
91        s.chars().skip(start).collect::<String>()
92    }
93
94    fn at(&self, s: &str, i: isize) -> Option<char> {
95        if i < 0 {
96            return None;
97        }
98
99        s.chars().nth(i as usize)
100    }
101}
102
103#[cfg(not(target_arch = "wasm32"))]
104#[cfg(test)]
105mod tests {
106    use super::*;
107
108    #[test]
109    fn prepare() {
110        assert_eq!(AsciiStringStrategy::new().prepare("čičina"), "cicina");
111    }
112
113    #[test]
114    fn ascii_slice_prefix() {
115        assert_eq!(AsciiStringStrategy::new().slice("daleko", 0, 3), "dal");
116    }
117
118    #[test]
119    fn ascii_slice_suffix() {
120        assert_eq!(AsciiStringStrategy::new().slice("daleko", 3, 6), "eko");
121    }
122
123    #[test]
124    fn ascii_remove() {
125        assert_eq!(AsciiStringStrategy::new().remove("daleko", 2), "daeko");
126    }
127
128    #[test]
129    fn ascii_at_negative() {
130        assert_eq!(AsciiStringStrategy::new().at("daleko", -2), None);
131    }
132
133    #[test]
134    fn ascii_at_correct() {
135        assert_eq!(AsciiStringStrategy::new().at("daleko", 3), Some('e'));
136    }
137
138    #[test]
139    fn ascii_at_over_limit() {
140        assert_eq!(AsciiStringStrategy::new().at("daleko", 6), None);
141    }
142
143    #[test]
144    fn unicodei_strategy() {
145        assert_eq!(UnicodeiStringStrategy::new().prepare("ciccio"), "ciccio");
146    }
147
148    #[test]
149    fn unicode_len_multibyte() {
150        assert_eq!(UnicodeStringStrategy::new().len("héllo"), 5);
151        assert_eq!(UnicodeStringStrategy::new().len("日本語"), 3);
152    }
153
154    #[test]
155    fn unicode_slice_multibyte() {
156        assert_eq!(UnicodeStringStrategy::new().slice("héllo", 0, 3), "hél");
157        assert_eq!(UnicodeStringStrategy::new().slice("日本語", 1, 3), "本語");
158    }
159
160    #[test]
161    fn unicode_remove_multibyte() {
162        assert_eq!(UnicodeStringStrategy::new().remove("héllo", 1), "hllo");
163        assert_eq!(UnicodeStringStrategy::new().remove("日本語", 0), "本語");
164    }
165
166    #[test]
167    fn unicode_at_multibyte() {
168        assert_eq!(UnicodeStringStrategy::new().at("héllo", 1), Some('é'));
169        assert_eq!(UnicodeStringStrategy::new().at("日本語", 2), Some('語'));
170        assert_eq!(UnicodeStringStrategy::new().at("日本語", 3), None);
171        assert_eq!(UnicodeStringStrategy::new().at("日本語", -1), None);
172    }
173
174    #[test]
175    fn unicode_suffix_multibyte() {
176        assert_eq!(UnicodeStringStrategy::new().suffix("héllo", 2), "llo");
177        assert_eq!(UnicodeStringStrategy::new().suffix("日本語", 1), "本語");
178    }
179}