1use crate::utils::splitter::rules::{Outcome, SplitRule};
2
3pub trait Splitter {
5 fn split_into_tokens(&self, expression: &str) -> Vec<String>;
7}
8
9#[derive(Copy, Clone, Eq, PartialEq)]
11pub enum SplitWhitespaceOption {
12 None,
14 Remove,
16}
17
18pub struct DefaultSplitter<'a> {
29 rules: Vec<Box<dyn SplitRule + 'a>>,
30}
31
32impl<'a> DefaultSplitter<'a> {
33 #[inline]
34 pub fn new(kind: SplitWhitespaceOption) -> DefaultSplitter<'a> {
35 DefaultSplitterBuilder::default()
36 .rule(rules::SplitNumeric)
37 .rule(rules::SplitIdentifier)
38 .rule(rules::SplitOperator)
39 .whitespace(kind)
40 .build()
41 }
42
43 pub fn with_numeric_rule<F: 'a>(rule: F) -> Self
44 where
45 F: SplitRule + 'a,
46 {
47 DefaultSplitterBuilder::default()
48 .rule(rule)
49 .rule(rules::SplitIdentifier)
50 .rule(rules::SplitOperator)
51 .whitespace(SplitWhitespaceOption::Remove)
52 .build()
53 }
54
55 pub fn with_identifier_rule<F: 'a>(rule: F) -> Self
56 where
57 F: SplitRule + 'a,
58 {
59 DefaultSplitterBuilder::default()
60 .rule(rules::SplitNumeric)
61 .rule(rule)
62 .rule(rules::SplitIdentifier)
63 .rule(rules::SplitOperator)
64 .whitespace(SplitWhitespaceOption::Remove)
65 .build()
66 }
67
68 #[inline]
69 pub fn builder() -> DefaultSplitterBuilder<'a> {
70 DefaultSplitterBuilder::default()
71 }
72
73 #[inline]
74 pub fn rules(&self) -> &[Box<dyn SplitRule + 'a>] {
75 self.rules.as_slice()
76 }
77}
78
79impl Splitter for DefaultSplitter<'_> {
80 fn split_into_tokens(&self, expression: &str) -> Vec<String> {
81 let mut tokens = Vec::new();
82 let mut iterator = expression.chars().peekable();
83
84 while let Some(c) = iterator.peek().cloned() {
85 iterator.next();
86
87 let mut next = false;
88
89 for rule in &self.rules {
90 match rule.split(c, &mut iterator) {
91 Outcome::Data(s) => {
92 tokens.push(s);
93 next = true;
94 break;
95 }
96 Outcome::Continue => {
97 continue;
98 }
99 Outcome::Skip => {
100 next = true;
101 break;
102 }
103 }
104 }
105
106 if !next {
107 tokens.push(c.to_string());
108 }
109 }
110
111 tokens
112 }
113}
114
115impl Default for DefaultSplitter<'_> {
116 fn default() -> Self {
117 DefaultSplitter::new(SplitWhitespaceOption::Remove)
118 }
119}
120
121pub struct DefaultSplitterBuilder<'a> {
122 rules: Vec<Box<dyn SplitRule + 'a>>,
123 whitespace_option: Option<SplitWhitespaceOption>,
124}
125
126impl<'a> DefaultSplitterBuilder<'a> {
127 pub fn new() -> Self {
128 DefaultSplitterBuilder {
129 rules: Vec::new(),
130 whitespace_option: None,
131 }
132 }
133
134 pub fn insert_rule<F: 'a>(mut self, index: usize, rule: F) -> Self
135 where
136 F: SplitRule + 'a,
137 {
138 self.rules.insert(index, Box::new(rule));
139 self
140 }
141
142 pub fn rule<F: 'a>(mut self, rule: F) -> Self
143 where
144 F: SplitRule + 'a,
145 {
146 self.rules.push(Box::new(rule));
147 self
148 }
149
150 pub fn whitespace(mut self, option: SplitWhitespaceOption) -> Self {
151 self.whitespace_option = Some(option);
152 self
153 }
154
155 pub fn build(self) -> DefaultSplitter<'a> {
156 let DefaultSplitterBuilder {
157 rules,
158 whitespace_option,
159 ..
160 } = self;
161
162 let mut rules = rules;
163 let whitespace_option = whitespace_option.unwrap_or(SplitWhitespaceOption::None);
164
165 match whitespace_option {
166 SplitWhitespaceOption::None => {}
167 SplitWhitespaceOption::Remove => rules.push(Box::new(rules::SkipWhitespace)),
168 };
169
170 DefaultSplitter { rules }
171 }
172}
173
174impl Default for DefaultSplitterBuilder<'_> {
175 fn default() -> Self {
176 DefaultSplitterBuilder::new()
177 }
178}
179
180pub mod rules {
181 use std::collections::HashSet;
182 use std::iter::Peekable;
183 use std::str::Chars;
184
185 pub enum Outcome {
187 Data(String),
189 Continue,
191 Skip,
193 }
194
195 pub trait SplitRule {
196 fn split(&self, c: char, rest: &mut Peekable<Chars>) -> Outcome;
197 }
198
199 pub struct SplitIdentifier;
200 impl SplitRule for SplitIdentifier {
201 fn split(&self, c: char, rest: &mut Peekable<Chars>) -> Outcome {
202 #[inline]
203 fn is_valid_char(c: &char) -> bool {
204 matches!(c, 'a'..='z' | 'A'..='Z' | '0'..='9' | '_')
205 }
206
207 match c {
208 'a'..='z' | 'A'..='Z' | '_' => {
209 let mut temp = String::new();
210 temp.push(c);
211
212 while let Some(c) = rest.next_if(is_valid_char) {
213 temp.push(c);
214 }
215
216 Outcome::Data(temp)
217 }
218 _ => Outcome::Continue,
219 }
220 }
221 }
222
223 pub struct SplitNumeric;
224 impl SplitRule for SplitNumeric {
225 fn split(&self, c: char, rest: &mut Peekable<Chars>) -> Outcome {
226 #[inline]
227 fn is_valid_char(c: &char) -> bool {
228 matches!(c, '0'..='9' | '.')
229 }
230
231 match c {
232 '0'..='9' => {
233 let mut temp = String::new();
234 temp.push(c);
235
236 let mut has_decimal_point = false;
237
238 while let Some(c) = rest.next_if(is_valid_char) {
239 if c == '.' {
240 if has_decimal_point {
241 break;
242 }
243
244 has_decimal_point = true;
245 }
246
247 temp.push(c);
248 }
249
250 Outcome::Data(temp)
251 }
252 _ => Outcome::Continue,
253 }
254 }
255 }
256
257 pub struct SplitOperator;
258 impl SplitRule for SplitOperator {
259 fn split(&self, c: char, rest: &mut Peekable<Chars>) -> Outcome {
260 fn is_valid_char(c: &char) -> bool {
261 matches!(
262 c,
263 '~' | '`'
264 | '!'
265 | '@'
266 | '#'
267 | '$'
268 | '%'
269 | '^'
270 | '&'
271 | '*'
272 | '-'
273 | '+'
274 | '_'
275 | ':'
276 | ';'
277 | '"'
278 | '\''
279 | '|'
280 | '\\'
281 | '?'
282 | '.'
283 | '<'
284 | '>'
285 | '/'
286 | '='
287 | ','
288 )
289 }
290
291 match c {
292 _ if is_valid_char(&c) => {
293 let mut temp = String::new();
294 temp.push(c);
295
296 while let Some(c) = rest.next_if(is_valid_char) {
297 temp.push(c);
298 }
299
300 Outcome::Data(temp)
301 }
302 _ => Outcome::Continue,
303 }
304 }
305 }
306
307 pub struct SplitWithOperatorsBuilder {
308 operators: HashSet<char>,
309 }
310 impl SplitWithOperatorsBuilder {
311 pub fn new() -> Self {
312 let operators = HashSet::from([
313 '~', '`', '!', '@', '#', '$', '%', '^', '&', '*', '-', '+', '_', ':', ';', '"',
314 '\'', '|', '\\', '?', '.', '<', '>', '/', '=', ',',
315 ]);
316
317 SplitWithOperatorsBuilder { operators }
318 }
319
320 pub fn empty() -> Self {
321 SplitWithOperatorsBuilder { operators: HashSet::new() }
322 }
323
324 pub fn add_operator(&mut self, operator: char) -> &mut Self {
325 self.operators.insert(operator);
326 self
327 }
328
329 pub fn except(mut self, operator: char) -> Self {
330 self.operators.remove(&operator);
331 self
332 }
333
334 pub fn build(self) -> SplitWithOperators {
335 SplitWithOperators {
336 operators: self.operators,
337 }
338 }
339 }
340
341 pub struct SplitWithOperators {
342 operators: HashSet<char>,
343 }
344 impl SplitWithOperators {
345 pub fn new() -> Self {
346 SplitWithOperatorsBuilder::new().build()
347 }
348
349 pub fn builder() -> SplitWithOperatorsBuilder {
350 SplitWithOperatorsBuilder::new()
351 }
352
353 pub fn is_valid(&self, c: &char) -> bool {
354 self.operators.contains(c)
355 }
356 }
357 impl SplitRule for SplitWithOperators {
358 fn split(&self, c: char, rest: &mut Peekable<Chars>) -> Outcome {
359 match c {
360 _ if self.is_valid(&c) => {
361 let mut temp = String::new();
362 temp.push(c);
363
364 while let Some(c) = rest.next_if(|c| self.is_valid(c)) {
365 temp.push(c);
366 }
367
368 Outcome::Data(temp)
369 }
370 _ => Outcome::Continue,
371 }
372 }
373 }
374
375 pub struct SkipWhitespace;
376 impl SplitRule for SkipWhitespace {
377 fn split(&self, c: char, _: &mut Peekable<Chars>) -> Outcome {
378 if c.is_whitespace() {
379 return Outcome::Skip;
380 }
381
382 Outcome::Continue
383 }
384 }
385
386 #[cfg(feature = "binary")]
387 pub struct SplitBinary;
388
389 #[cfg(feature = "binary")]
390 impl SplitRule for SplitBinary {
391 fn split(&self, c: char, rest: &mut Peekable<Chars>) -> Outcome {
392 fn is_next_binary(chars: &mut Peekable<Chars>) -> bool {
393 chars.peek() == Some(&'1') || chars.peek() == Some(&'0')
394 }
395
396 if c == 'b' && is_next_binary(rest) {
397 let mut temp = String::new();
398 temp.push(c);
399 while let Some(c) = rest.peek() {
400 if c.is_ascii_digit() {
401 temp.push(*c);
402 rest.next();
403 } else {
404 break;
405 }
406 }
407
408 Outcome::Data(temp)
409 } else {
410 Outcome::Continue
411 }
412 }
413 }
414}
415
416#[cfg(test)]
417mod tests {
418 use super::DefaultSplitter;
419 use super::{SplitWhitespaceOption, Splitter};
420
421 #[test]
422 fn split_into_tokens() {
423 let splitter = DefaultSplitter::default();
424 assert_eq!(
425 ["10", "+", "-", "2", "*", "Sin", "(", "45", ")"].to_vec(),
426 splitter.split_into_tokens("10 + -2 * Sin(45)")
427 );
428 assert_eq!(
429 ["10", "+", "(", "-", "3", ")", "*", "0.25"].to_vec(),
430 splitter.split_into_tokens("10 + (-3) * 0.25")
431 );
432 assert_eq!(
433 ["(", "x", "+", "y", ")", "-", "2", "^", "10"].to_vec(),
434 splitter.split_into_tokens("(x+y)-2^10")
435 );
436 assert_eq!(
437 ["Log2", "(", "25", ")", "*", "PI", "-", "2"].to_vec(),
438 splitter.split_into_tokens("Log2(25) * PI - 2")
439 );
440 assert_eq!(
441 ["2", "PI", "+", "10"].to_vec(),
442 splitter.split_into_tokens("2PI + 10")
443 );
444 assert_eq!(
445 ["x", "=", "10"].to_vec(),
446 splitter.split_into_tokens("x = 10")
447 );
448
449 assert_eq!(
450 ["5", " ", "*", " ", "2"].to_vec(),
451 DefaultSplitter::new(SplitWhitespaceOption::None).split_into_tokens("5 * 2")
452 );
453
454 assert_eq!(
455 ["256", ">>", "3"].to_vec(),
456 DefaultSplitter::default().split_into_tokens("256 >> 3")
457 );
458 }
459}