simple_string_patterns/
alphanumeric.rs1use std::str::FromStr;
2use crate::{utils::add_sanitized_numeric_string, CharType, MatchOccurrences, ToSegments};
3
4pub trait IsNumeric {
9 fn is_numeric(&self) -> bool;
15}
16
17impl IsNumeric for str {
19
20 fn is_numeric(&self) -> bool {
24 let num_chars = self.chars().count();
25 if num_chars < 1 {
27 return false;
28 }
29 let last_index = num_chars - 1;
30 let mut num_valid: usize = 0;
31 let mut index: usize = 0;
32 let mut num_decimal_separators = 0usize;
33 for c in self.chars().into_iter() {
34 let is_digit = c.is_digit(10);
35 let valid_char = if is_digit {
36 true
37 } else {
38 match c {
39 '-' => index == 0,
40 '.' => index < last_index && num_decimal_separators < 1,
41 _ => false
42 }
43 };
44 if c == '.' {
45 num_decimal_separators += 1;
46 }
47 if valid_char {
48 num_valid += 1;
49 }
50 index += 1;
51 }
52 num_valid == num_chars
53 }
54}
55
56
57pub trait StripCharacters<'a> where Self:ToSegments {
59
60 fn strip_non_alphanum(&self) -> String;
63
64 fn strip_non_digits(&self) -> String;
66
67 fn strip_spaces(&self) -> String {
69 self.strip_by_type(CharType::Spaces)
70 }
71
72 fn strip_by_type(&self, ct: CharType<'a>) -> String;
74
75 fn strip_by_types(&self, cts: &[CharType<'a>]) -> String;
77
78 fn filter_by_type(&self, ct: CharType<'a>) -> String;
80
81 fn filter_by_types(&self, cts: &[CharType<'a>]) -> String;
83
84 fn to_numeric_strings(&self) -> Vec<String> {
86 self.to_numeric_strings_conditional(false)
87 }
88
89 fn to_numeric_strings_euro(&self) -> Vec<String> {
91 self.to_numeric_strings_conditional(true)
92 }
93
94 fn to_numeric_strings_conditional(&self, enforce_comma_separator: bool) -> Vec<String>;
95
96 fn to_numbers_conditional<T: FromStr>(&self, enforce_comma_separator: bool) -> Vec<T>;
102
103 fn to_numbers<T: FromStr>(&self) -> Vec<T> {
105 self.to_numbers_conditional::<T>(false)
106 }
107
108 fn to_numbers_euro<T: FromStr>(&self) -> Vec<T> {
110 self.to_numbers_conditional::<T>(true)
111 }
112
113 fn split_to_numbers<T: FromStr + Copy>(&self, pattern: &str) -> Vec<T> {
116 self.to_segments(pattern).into_iter().filter_map(|part| part.to_first_number::<T>()).collect::<Vec<T>>()
117 }
118
119 fn correct_numeric_string(&self, enforce_comma_separator: bool) -> String;
123
124 fn to_first_number<T: FromStr + Copy>(&self) -> Option<T> {
126 if let Some(number) = self.to_numbers::<T>().first() {
127 Some(*number)
128 } else {
129 None
130 }
131 }
132
133 fn to_first_number_euro<T: FromStr + Copy>(&self) -> Option<T> {
136 if let Some(number) = self.to_numbers_euro::<T>().first() {
137 Some(*number)
138 } else {
139 None
140 }
141 }
142
143 fn strip_non_numeric(&self) -> String {
146 self.to_numeric_strings().join(" ")
147 }
148
149}
150
151
152impl<'a> StripCharacters<'a> for str {
153
154 fn strip_non_alphanum(&self) -> String {
157 self.chars().into_iter().filter(|c| c.is_alphanumeric()).collect::<String>()
158 }
159
160 fn strip_non_digits(&self) -> String {
164 self.chars().into_iter().filter(|c| c.is_digit(10)).collect::<String>()
165 }
166
167 fn strip_by_type(&self, ct: CharType<'a>) -> String {
169 self.chars().into_iter().filter(|c| ct.is_in_range(c) == false).collect::<String>()
170 }
171
172 fn strip_by_types(&self, cts: &[CharType<'a>]) -> String {
174 self.chars().into_iter().filter(|c| cts.iter().any(|ct| ct.is_in_range(c)) == false).collect::<String>()
175 }
176
177 fn filter_by_type(&self, ct: CharType<'a>) -> String {
179 self.chars().into_iter().filter(|c| ct.is_in_range(c)).collect::<String>()
180 }
181
182 fn filter_by_types(&self, cts: &[CharType<'a>]) -> String {
184 self.chars().into_iter().filter(|c| cts.iter().any(|ct| ct.is_in_range(c))).collect::<String>()
185 }
186
187 fn correct_numeric_string(&self, enforce_comma_separator: bool) -> String {
191 let commas = self.find_matched_indices(",");
192 let last_comma_index = commas.last().unwrap_or(&0).to_owned();
193 let points = self.find_matched_indices(".");
194 let last_point_index = points.last().unwrap_or(&0).to_owned();
195 let num_commas = commas.len();
196 if points.len() > 1 || (last_comma_index > last_point_index && num_commas <= 1) || (enforce_comma_separator && num_commas <= 1) {
197 if num_commas < 1 {
198 self.replace(".", "")
199 } else {
200 let (main, dec_part) = self.to_start_end(",");
201 [main.replace(".", ""), dec_part].join(".")
202 }
203 } else {
204 self.replace(",", "")
205 }
206 }
207
208 fn to_numeric_strings_conditional(&self, enforce_comma_separator: bool) -> Vec<String> {
210 let mut prev_char = ' ';
211 let mut seq_num = 0;
212 let mut num_string = String::new();
213 let mut output: Vec<String> = Vec::new();
214 let last_index = self.chars().count().checked_sub(1).unwrap_or(0);
215 let mut index: usize = 0;
216 let mut prev_is_separator = false;
217 for component in self.chars() {
218 let mut is_end = index == last_index;
219 let is_digit = component.is_digit(10);
220 if prev_is_separator && !is_digit {
223 let num_str_len = num_string.len();
224 if num_str_len > 1 {
225 num_string = (&num_string[0..num_str_len - 1]).to_string();
227 is_end = true;
228 seq_num = num_string.len();
229 }
230 }
231 if is_digit {
232 if prev_char == '-' {
233 num_string.push(prev_char);
234 }
235 num_string.push(component);
236 seq_num += 1;
237 prev_is_separator = false;
238 } else if prev_char.is_digit(10) {
239 match component {
240 '.' | '․' | ',' => {
241 if index == last_index {
243 is_end = true;
244 } else {
245 if component == ',' {
246 num_string.push(',');
247 } else {
248 num_string.push('.');
249 }
250 seq_num = 0;
252 }
253 prev_is_separator = true;
254 },
255 _ => {
256 is_end = true;
257 }
258 }
259 } else {
260 is_end = true;
261 prev_is_separator = false;
262 }
263 if is_end {
264 if seq_num > 0 {
265 add_sanitized_numeric_string(&mut output, &num_string.correct_numeric_string(enforce_comma_separator));
266 num_string = String::new();
268 seq_num = 0;
270 }
271 }
272 prev_char = component;
273 index += 1;
274 }
275 output
276 }
277
278 fn to_numbers_conditional<T: FromStr>(&self, enforce_comma_separator: bool) -> Vec<T> {
280 self.to_numeric_strings_conditional(enforce_comma_separator).into_iter()
281 .map(|s| s.parse::<T>())
282 .filter_map(|s| s.ok())
283 .collect()
284 }
285
286}
287
288
289pub trait CharGroupMatch {
291 fn has_digits(&self) -> bool;
293
294 fn has_digits_radix(&self, radix: u8) -> bool;
296
297 fn has_alphanumeric(&self) -> bool;
299
300 fn has_alphabetic(&self) -> bool;
302
303 fn is_digits_only(&self) -> bool;
304
305 fn is_digits_only_radix(&self, radix: u8) -> bool;
307
308}
309
310impl CharGroupMatch for str {
311
312 fn has_digits(&self) -> bool {
313 self.chars().any(|c| c.is_ascii_digit())
314 }
315
316 fn has_digits_radix(&self, radix: u8) -> bool {
317 self.chars().any(|c| c.is_digit(radix as u32))
318 }
319
320 fn has_alphanumeric(&self) -> bool {
321 self.chars().any(char::is_alphanumeric)
322 }
323
324 fn has_alphabetic(&self) -> bool {
325 self.chars().any(char::is_alphabetic)
326 }
327
328 fn is_digits_only(&self) -> bool {
329 self.chars().all(|c| c.is_ascii_digit())
330 }
331
332 fn is_digits_only_radix(&self, radix: u8) -> bool {
334 self.chars().all(|c| c.is_digit(radix as u32))
335 }
336
337}