symspell/
string_strategy.rs1#[cfg(not(target_arch = "wasm32"))]
2use unidecode::unidecode;
3
4pub trait StringStrategy: Clone + Default {
5 fn new() -> Self;
6 fn prepare(&self, s: &str) -> String;
7 fn len(&self, s: &str) -> usize;
8 fn remove(&self, s: &str, index: usize) -> String;
9 fn slice(&self, s: &str, start: usize, end: usize) -> String;
10 fn suffix(&self, s: &str, start: usize) -> String;
11 fn at(&self, s: &str, i: isize) -> Option<char>;
12}
13
14#[cfg(not(target_arch = "wasm32"))]
15#[derive(Clone)]
16#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
17#[derive(Default)]
18pub struct AsciiStringStrategy {}
19
20#[cfg(not(target_arch = "wasm32"))]
21impl StringStrategy for AsciiStringStrategy {
22 fn new() -> Self {
23 Self {}
24 }
25
26 fn prepare(&self, s: &str) -> String {
27 unidecode(s)
28 }
29
30 fn len(&self, s: &str) -> usize {
31 s.len()
32 }
33
34 fn remove(&self, s: &str, index: usize) -> String {
35 let mut x = s.to_string();
36 x.remove(index);
37 x
38 }
39
40 fn slice(&self, s: &str, start: usize, end: usize) -> String {
41 s[start..end].to_string()
42 }
43
44 fn suffix(&self, s: &str, start: usize) -> String {
45 self.slice(s, start, s.len())
46 }
47
48 fn at(&self, s: &str, i: isize) -> Option<char> {
49 if i < 0 || i >= s.len() as isize {
50 return None;
51 }
52
53 Some(s.as_bytes()[i as usize] as char)
54 }
55}
56
57pub type UnicodeiStringStrategy = UnicodeStringStrategy;
59
60#[derive(Clone)]
61#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
62#[derive(Default)]
63pub struct UnicodeStringStrategy {}
64
65impl StringStrategy for UnicodeStringStrategy {
66 fn new() -> Self {
67 Self {}
68 }
69
70 fn prepare(&self, s: &str) -> String {
71 s.to_string()
72 }
73
74 fn len(&self, s: &str) -> usize {
75 s.chars().count()
76 }
77
78 fn remove(&self, s: &str, index: usize) -> String {
79 s.chars()
80 .enumerate()
81 .filter(|(ii, _)| ii != &index)
82 .map(|(_, ch)| ch)
83 .collect()
84 }
85
86 fn slice(&self, s: &str, start: usize, end: usize) -> String {
87 s.chars().skip(start).take(end - start).collect()
88 }
89
90 fn suffix(&self, s: &str, start: usize) -> String {
91 s.chars().skip(start).collect::<String>()
92 }
93
94 fn at(&self, s: &str, i: isize) -> Option<char> {
95 if i < 0 {
96 return None;
97 }
98
99 s.chars().nth(i as usize)
100 }
101}
102
103#[cfg(not(target_arch = "wasm32"))]
104#[cfg(test)]
105mod tests {
106 use super::*;
107
108 #[test]
109 fn prepare() {
110 assert_eq!(AsciiStringStrategy::new().prepare("čičina"), "cicina");
111 }
112
113 #[test]
114 fn ascii_slice_prefix() {
115 assert_eq!(AsciiStringStrategy::new().slice("daleko", 0, 3), "dal");
116 }
117
118 #[test]
119 fn ascii_slice_suffix() {
120 assert_eq!(AsciiStringStrategy::new().slice("daleko", 3, 6), "eko");
121 }
122
123 #[test]
124 fn ascii_remove() {
125 assert_eq!(AsciiStringStrategy::new().remove("daleko", 2), "daeko");
126 }
127
128 #[test]
129 fn ascii_at_negative() {
130 assert_eq!(AsciiStringStrategy::new().at("daleko", -2), None);
131 }
132
133 #[test]
134 fn ascii_at_correct() {
135 assert_eq!(AsciiStringStrategy::new().at("daleko", 3), Some('e'));
136 }
137
138 #[test]
139 fn ascii_at_over_limit() {
140 assert_eq!(AsciiStringStrategy::new().at("daleko", 6), None);
141 }
142
143 #[test]
144 fn unicodei_strategy() {
145 assert_eq!(UnicodeiStringStrategy::new().prepare("ciccio"), "ciccio");
146 }
147
148 #[test]
149 fn unicode_len_multibyte() {
150 assert_eq!(UnicodeStringStrategy::new().len("héllo"), 5);
151 assert_eq!(UnicodeStringStrategy::new().len("日本語"), 3);
152 }
153
154 #[test]
155 fn unicode_slice_multibyte() {
156 assert_eq!(UnicodeStringStrategy::new().slice("héllo", 0, 3), "hél");
157 assert_eq!(UnicodeStringStrategy::new().slice("日本語", 1, 3), "本語");
158 }
159
160 #[test]
161 fn unicode_remove_multibyte() {
162 assert_eq!(UnicodeStringStrategy::new().remove("héllo", 1), "hllo");
163 assert_eq!(UnicodeStringStrategy::new().remove("日本語", 0), "本語");
164 }
165
166 #[test]
167 fn unicode_at_multibyte() {
168 assert_eq!(UnicodeStringStrategy::new().at("héllo", 1), Some('é'));
169 assert_eq!(UnicodeStringStrategy::new().at("日本語", 2), Some('語'));
170 assert_eq!(UnicodeStringStrategy::new().at("日本語", 3), None);
171 assert_eq!(UnicodeStringStrategy::new().at("日本語", -1), None);
172 }
173
174 #[test]
175 fn unicode_suffix_multibyte() {
176 assert_eq!(UnicodeStringStrategy::new().suffix("héllo", 2), "llo");
177 assert_eq!(UnicodeStringStrategy::new().suffix("日本語", 1), "本語");
178 }
179}