mathypad_core/expression/
chumsky_parser.rs

1//! New chumsky-based parser implementation for mathematical expressions
2
3use super::tokens::Token;
4use crate::units::parse_unit;
5use chumsky::prelude::*;
6
7/// Parse a mathematical expression using chumsky
8pub fn parse_expression_chumsky(input: &str) -> Result<Vec<Token>, String> {
9    // Create a simple parser that directly parses from string to tokens
10    let parser = create_token_parser();
11
12    match parser.parse(input).into_result() {
13        Ok(tokens) => {
14            // Validate parentheses are balanced
15            let mut paren_count = 0;
16            for token in &tokens {
17                match token {
18                    Token::LeftParen => paren_count += 1,
19                    Token::RightParen => {
20                        paren_count -= 1;
21                        if paren_count < 0 {
22                            return Err("Unmatched closing parenthesis".to_string());
23                        }
24                    }
25                    _ => {}
26                }
27            }
28            if paren_count != 0 {
29                return Err("Unmatched opening parenthesis".to_string());
30            }
31
32            // Validate no consecutive operators (except minus for negation)
33            for i in 0..tokens.len().saturating_sub(1) {
34                let current = &tokens[i];
35                let next = &tokens[i + 1];
36
37                let is_current_op = matches!(
38                    current,
39                    Token::Plus | Token::Minus | Token::Multiply | Token::Divide | Token::Power
40                );
41                let is_next_op = matches!(
42                    next,
43                    Token::Plus | Token::Minus | Token::Multiply | Token::Divide | Token::Power
44                );
45
46                if is_current_op && is_next_op {
47                    // Allow minus after operators for negation, but not other combinations
48                    if !matches!(next, Token::Minus) {
49                        return Err("Invalid consecutive operators".to_string());
50                    }
51                }
52            }
53
54            Ok(tokens)
55        }
56        Err(errs) => {
57            let error_msg = errs
58                .into_iter()
59                .map(|e| format!("{:?}", e))
60                .collect::<Vec<_>>()
61                .join(", ");
62            Err(error_msg)
63        }
64    }
65}
66
67/// Create the main token parser
68fn create_token_parser<'a>() -> impl Parser<'a, &'a str, Vec<Token>, extra::Err<Rich<'a, char>>> {
69    // Parser for numerical suffixes like "k" for thousands
70    let number_suffix = choice((just('k').to(1_000.0), just('K').to(1_000.0)));
71
72    // Parser for numbers (integers and decimals with optional commas and suffixes)
73    let number = choice((
74        // Numbers with commas (like 1,000 or 1,234.56)
75        text::digits(10)
76            .then(just(',').then(text::digits(10)).repeated())
77            .then(just('.').then(text::digits(10)).or_not())
78            .to_slice(),
79        // Regular numbers without commas
80        text::int(10)
81            .then(just('.').then(text::digits(10)).or_not())
82            .to_slice(),
83    ))
84    .then(number_suffix.or_not())
85    .map(|(s, suffix_opt): (&str, Option<f64>)| {
86        let cleaned = s.replace(",", "");
87        let base_value = cleaned.parse::<f64>().unwrap_or(0.0);
88        if let Some(multiplier) = suffix_opt {
89            base_value * multiplier
90        } else {
91            base_value
92        }
93    });
94
95    // Parser for identifiers (words, but not compound with slashes - those are handled separately)
96    let identifier = text::ascii::ident().map(|s: &str| s.to_string());
97
98    // Parser for the percent symbol
99    let percent_symbol = just('%').map(|_| "%".to_string());
100
101    // Parser for currency symbols
102    let currency_symbol = choice((
103        just('$').to("$"),
104        just('€').to("€"),
105        just('£').to("£"),
106        just('¥').to("¥"),
107        just('₹').to("₹"),
108        just('₩').to("₩"),
109    ))
110    .map(|s: &str| s.to_string());
111
112    // Parser for compound identifiers (like "GiB/s") - only for valid units
113    let compound_identifier = text::ascii::ident()
114        .then(
115            just('/')
116                .padded() // Allow spaces around the slash
117                .then(text::ascii::ident()),
118        )
119        .try_map(|(base, (_, suffix)): (&str, (char, &str)), span| {
120            let compound = format!("{}/{}", base, suffix);
121            // Only allow compound identifiers if they form a valid unit
122            if parse_unit(&compound).is_some() {
123                Ok(compound)
124            } else {
125                Err(Rich::custom(
126                    span,
127                    "Invalid compound identifier - not a valid unit",
128                ))
129            }
130        });
131
132    // Parser for currency rate units (like "$/year", "€/month") - currency symbol followed by /time
133    let currency_rate = currency_symbol
134        .then(just('/'))
135        .then(text::ascii::ident())
136        .try_map(
137            |((currency_str, _), time_str): ((String, char), &str), span| {
138                let compound = format!("{}/{}", currency_str, time_str);
139                // Only allow if it forms a valid rate unit
140                if parse_unit(&compound).is_some() {
141                    Ok(compound)
142                } else {
143                    Err(Rich::custom(span, "Invalid currency rate unit"))
144                }
145            },
146        );
147
148    // Parser for line references (like "line1", "line2", etc.)
149    let line_ref = just("line")
150        .then(text::int(10))
151        .map(|(_, num_str): (_, &str)| {
152            if let Ok(line_num) = num_str.parse::<usize>() {
153                if line_num > 0 {
154                    Token::LineReference(line_num - 1)
155                } else {
156                    Token::LineReference(0)
157                }
158            } else {
159                Token::LineReference(0)
160            }
161        });
162
163    // Parser for keywords
164    let keyword = choice((
165        text::keyword("to").to(Token::To),
166        text::keyword("in").to(Token::In),
167        text::keyword("of").to(Token::Of),
168    ));
169
170    // Parser for operators (including assignment)
171    let operator = choice((
172        just('+').to(Token::Plus),
173        just('-').to(Token::Minus),
174        just('*').to(Token::Multiply),
175        just('/').to(Token::Divide),
176        just('^').to(Token::Power),
177        just('(').to(Token::LeftParen),
178        just(')').to(Token::RightParen),
179        just('=').to(Token::Assign),
180    ));
181
182    // Combined unit parser (tries currency rates first, then compound units, then simple identifiers, then percent, then currency)
183    let unit_identifier = choice((
184        currency_rate, // Must come first to match $/year before $ is parsed separately
185        compound_identifier,
186        identifier,
187        percent_symbol,
188        currency_symbol,
189    ));
190
191    // Parser for numbers with optional units
192    let number_with_unit = number
193        .then(
194            just(' ')
195                .repeated()
196                .then(unit_identifier)
197                .try_map(|(_, unit_str): ((), String), span| {
198                    // Don't treat keywords as units in this context
199                    if unit_str == "to" || unit_str == "in" || unit_str == "of" {
200                        Err(Rich::custom(span, "Keywords are not units"))
201                    } else if let Some(unit) = parse_unit(&unit_str) {
202                        Ok(unit)
203                    } else {
204                        Err(Rich::custom(span, format!("Unknown unit: {}", unit_str)))
205                    }
206                })
207                .or_not(),
208        )
209        .map(|(num, unit_opt)| {
210            if let Some(unit) = unit_opt {
211                Token::NumberWithUnit(num, unit)
212            } else {
213                Token::Number(num)
214            }
215        });
216
217    // Parser for currency rate amounts (like "$5/hr", "€10/day")
218    #[allow(clippy::type_complexity)]
219    let currency_rate_amount = currency_symbol
220        .then(just(' ').repeated()) // Optional spaces
221        .then(number)
222        .then(just('/'))
223        .then(text::ascii::ident())
224        .try_map(|parsed: ((((String, ()), f64), char), &str), span| {
225            let ((((currency_str, _), amount), _), time_str) = parsed;
226            let compound = format!("{}/{}", currency_str, time_str);
227            // Only allow if it forms a valid rate unit
228            if let Some(unit) = parse_unit(&compound) {
229                Ok(Token::NumberWithUnit(amount, unit))
230            } else {
231                Err(Rich::custom(span, "Invalid currency rate unit"))
232            }
233        });
234
235    // Parser for currency amounts (currency symbol followed by number)
236    let currency_amount = currency_symbol
237        .then(just(' ').repeated()) // Optional spaces
238        .then(number)
239        .map(|((currency_str, _), amount)| {
240            if let Some(unit) = parse_unit(&currency_str) {
241                Token::NumberWithUnit(amount, unit)
242            } else {
243                Token::Number(amount) // Fallback, should not happen
244            }
245        });
246
247    // Parser for standalone units (for conversions like "to KiB")
248    let standalone_unit = unit_identifier.try_map(|word: String, span| {
249        if let Some(unit) = parse_unit(&word) {
250            Ok(Token::NumberWithUnit(1.0, unit))
251        } else {
252            // Don't fail - let it be handled as a variable instead
253            Err(Rich::custom(span, "Not a unit"))
254        }
255    });
256
257    // Parser for function calls (known function names followed by '(')
258    let function = identifier
259        .then_ignore(just(' ').repeated())
260        .then_ignore(just('(').rewind())
261        .try_map(|name: String, span| match name.to_lowercase().as_str() {
262            "sqrt" => Ok(Token::Function(name)),
263            "sum_above" => Ok(Token::Function(name)),
264            _ => Err(Rich::custom(span, "Unknown function")),
265        });
266
267    // Parser for variables (catch-all for any identifier not handled above)
268    let variable = identifier.map(|word: String| Token::Variable(word));
269
270    // Main token parser - try each option in order (most specific first)
271    let token = choice((
272        line_ref,             // Must come first to catch "line1" before "line" is treated as unit
273        keyword,              // "to" and "in" keywords
274        currency_rate_amount, // Currency rate amounts like "$5/hr" (must come before currency_amount)
275        currency_amount, // Currency symbols followed by numbers (must come before number_with_unit)
276        number_with_unit, // Numbers with optional units
277        operator,        // Mathematical operators
278        function,        // Function calls (must come before variable)
279        standalone_unit, // Standalone units for conversions
280        variable,        // Variables (identifiers that aren't units/keywords/line refs)
281    ));
282
283    // Parser for punctuation/separators to skip
284    let punctuation = choice((
285        just(':'),
286        just(';'),
287        just(','),
288        just('!'),
289        just('?'),
290        just('.'), // Keep it simple - decimal points in numbers are handled in number parser
291        just('"'),
292        just('\''),
293        just('`'),
294        just('|'),
295        just('&'),
296        just('#'),
297        just('@'),
298        just('~'),
299        just('['),
300        just(']'),
301        just('{'),
302        just('}'),
303        just('<'),
304        just('>'),
305    ));
306
307    // Combined parser that tries tokens first, then skips punctuation
308    let element = choice((token.map(Some), punctuation.to(None)));
309
310    // Parse elements separated by whitespace, filter out None (punctuation)
311    element
312        .padded()
313        .repeated()
314        .collect::<Vec<_>>()
315        .map(|elements| elements.into_iter().flatten().collect())
316        .then_ignore(end())
317}
318
319#[cfg(test)]
320mod tests {
321    use super::*;
322    use crate::units::Unit;
323
324    #[test]
325    fn test_number_parsing() {
326        let result = parse_expression_chumsky("42");
327        assert!(result.is_ok(), "Parsing failed: {:?}", result);
328        let tokens = result.unwrap();
329        assert_eq!(tokens.len(), 1);
330        assert!(matches!(tokens[0], Token::Number(42.0)));
331    }
332
333    #[test]
334    fn test_number_with_unit() {
335        let result = parse_expression_chumsky("5 GiB");
336        assert!(result.is_ok(), "Parsing failed: {:?}", result);
337        let tokens = result.unwrap();
338        assert_eq!(tokens.len(), 1);
339        assert!(matches!(tokens[0], Token::NumberWithUnit(5.0, Unit::GiB)));
340    }
341
342    #[test]
343    fn test_simple_arithmetic() {
344        let result = parse_expression_chumsky("2 + 3");
345        assert!(result.is_ok(), "Parsing failed: {:?}", result);
346        let tokens = result.unwrap();
347        assert_eq!(tokens.len(), 3);
348        assert!(matches!(tokens[0], Token::Number(2.0)));
349        assert!(matches!(tokens[1], Token::Plus));
350        assert!(matches!(tokens[2], Token::Number(3.0)));
351    }
352
353    #[test]
354    fn test_line_reference() {
355        let result = parse_expression_chumsky("line1 + 4");
356        assert!(result.is_ok(), "Parsing failed: {:?}", result);
357        let tokens = result.unwrap();
358        assert_eq!(tokens.len(), 3);
359        assert!(matches!(tokens[0], Token::LineReference(0)));
360        assert!(matches!(tokens[1], Token::Plus));
361        assert!(matches!(tokens[2], Token::Number(4.0)));
362    }
363
364    #[test]
365    fn test_complex_expressions() {
366        let result = parse_expression_chumsky("line1 * 2 GiB + 500 MiB to KiB");
367        assert!(result.is_ok(), "Parsing failed: {:?}", result);
368        let tokens = result.unwrap();
369        assert_eq!(tokens.len(), 7);
370        assert!(matches!(tokens[0], Token::LineReference(0)));
371        assert!(matches!(tokens[1], Token::Multiply));
372        assert!(matches!(tokens[2], Token::NumberWithUnit(2.0, Unit::GiB)));
373        assert!(matches!(tokens[3], Token::Plus));
374        assert!(matches!(tokens[4], Token::NumberWithUnit(500.0, Unit::MiB)));
375        assert!(matches!(tokens[5], Token::To));
376        assert!(matches!(tokens[6], Token::NumberWithUnit(1.0, Unit::KiB)));
377    }
378
379    #[test]
380    fn test_parentheses() {
381        let result = parse_expression_chumsky("(5 + 3) * 2");
382        assert!(result.is_ok(), "Parsing failed: {:?}", result);
383        let tokens = result.unwrap();
384        assert_eq!(tokens.len(), 7);
385        assert!(matches!(tokens[0], Token::LeftParen));
386        assert!(matches!(tokens[1], Token::Number(5.0)));
387        assert!(matches!(tokens[2], Token::Plus));
388        assert!(matches!(tokens[3], Token::Number(3.0)));
389        assert!(matches!(tokens[4], Token::RightParen));
390        assert!(matches!(tokens[5], Token::Multiply));
391        assert!(matches!(tokens[6], Token::Number(2.0)));
392    }
393
394    #[test]
395    fn test_conversion() {
396        let result = parse_expression_chumsky("1 GiB to KiB");
397        assert!(result.is_ok(), "Parsing failed: {:?}", result);
398        let tokens = result.unwrap();
399        assert_eq!(tokens.len(), 3);
400        assert!(matches!(tokens[0], Token::NumberWithUnit(1.0, Unit::GiB)));
401        assert!(matches!(tokens[1], Token::To));
402        assert!(matches!(tokens[2], Token::NumberWithUnit(1.0, Unit::KiB)));
403    }
404
405    #[test]
406    fn test_in_keyword() {
407        let result = parse_expression_chumsky("24 MiB * 32 in KiB");
408        assert!(result.is_ok(), "Parsing failed: {:?}", result);
409        let tokens = result.unwrap();
410        assert_eq!(tokens.len(), 5);
411        assert!(matches!(tokens[0], Token::NumberWithUnit(24.0, Unit::MiB)));
412        assert!(matches!(tokens[1], Token::Multiply));
413        assert!(matches!(tokens[2], Token::Number(32.0)));
414        assert!(matches!(tokens[3], Token::In));
415        assert!(matches!(tokens[4], Token::NumberWithUnit(1.0, Unit::KiB)));
416    }
417
418    #[test]
419    fn test_time_rate_multiplication() {
420        let result = parse_expression_chumsky("1 hour * 10 GiB/s");
421        println!("Tokens for '1 hour * 10 GiB/s': {:?}", result);
422        assert!(result.is_ok(), "Parsing failed: {:?}", result);
423        let tokens = result.unwrap();
424        // Should parse as: NumberWithUnit(1.0, Hour), Multiply, NumberWithUnit(10.0, RateUnit(GiB, Second))
425        assert_eq!(tokens.len(), 3);
426        assert!(matches!(tokens[0], Token::NumberWithUnit(1.0, _)));
427        assert!(matches!(tokens[1], Token::Multiply));
428        assert!(matches!(tokens[2], Token::NumberWithUnit(10.0, _)));
429    }
430
431    #[test]
432    fn test_comma_separated_numbers() {
433        let result = parse_expression_chumsky("1,000 GiB");
434        assert!(result.is_ok(), "Parsing failed: {:?}", result);
435        let tokens = result.unwrap();
436        assert_eq!(tokens.len(), 1);
437        assert!(matches!(
438            tokens[0],
439            Token::NumberWithUnit(1000.0, Unit::GiB)
440        ));
441
442        let result = parse_expression_chumsky("1,234.56 MB");
443        assert!(result.is_ok(), "Parsing failed: {:?}", result);
444        let tokens = result.unwrap();
445        assert_eq!(tokens.len(), 1);
446        assert!(matches!(
447            tokens[0],
448            Token::NumberWithUnit(1234.56, Unit::MB)
449        ));
450
451        let result = parse_expression_chumsky("1,000,000 bytes");
452        assert!(result.is_ok(), "Parsing failed: {:?}", result);
453        let tokens = result.unwrap();
454        assert_eq!(tokens.len(), 1);
455        assert!(matches!(
456            tokens[0],
457            Token::NumberWithUnit(1000000.0, Unit::Byte)
458        ));
459    }
460
461    #[test]
462    fn test_numbers_without_spaces() {
463        // Test basic numbers without spaces
464        let result = parse_expression_chumsky("5GiB");
465        assert!(result.is_ok(), "Parsing '5GiB' failed: {:?}", result);
466        let tokens = result.unwrap();
467        assert_eq!(tokens.len(), 1);
468        assert!(matches!(tokens[0], Token::NumberWithUnit(5.0, Unit::GiB)));
469
470        let result = parse_expression_chumsky("100MB");
471        assert!(result.is_ok(), "Parsing '100MB' failed: {:?}", result);
472        let tokens = result.unwrap();
473        assert_eq!(tokens.len(), 1);
474        assert!(matches!(tokens[0], Token::NumberWithUnit(100.0, Unit::MB)));
475
476        // Test decimal numbers without spaces
477        let result = parse_expression_chumsky("2.5TiB");
478        assert!(result.is_ok(), "Parsing '2.5TiB' failed: {:?}", result);
479        let tokens = result.unwrap();
480        assert_eq!(tokens.len(), 1);
481        assert!(matches!(tokens[0], Token::NumberWithUnit(2.5, Unit::TiB)));
482
483        // Test comma numbers without spaces
484        let result = parse_expression_chumsky("1,000GiB");
485        assert!(result.is_ok(), "Parsing '1,000GiB' failed: {:?}", result);
486        let tokens = result.unwrap();
487        assert_eq!(tokens.len(), 1);
488        assert!(matches!(
489            tokens[0],
490            Token::NumberWithUnit(1000.0, Unit::GiB)
491        ));
492
493        // Test compound units without spaces
494        let result = parse_expression_chumsky("10GiB/s");
495        assert!(result.is_ok(), "Parsing '10GiB/s' failed: {:?}", result);
496        let tokens = result.unwrap();
497        assert_eq!(tokens.len(), 1);
498        assert!(matches!(
499            tokens[0],
500            Token::NumberWithUnit(10.0, Unit::RateUnit(_, _))
501        ));
502        if let Token::NumberWithUnit(_, Unit::RateUnit(ref unit1, ref unit2)) = tokens[0] {
503            assert_eq!(**unit1, Unit::GiB);
504            assert_eq!(**unit2, Unit::Second);
505        }
506
507        // Test expressions with multiple units without spaces
508        let result = parse_expression_chumsky("1,000GiB + 512MiB");
509        assert!(
510            result.is_ok(),
511            "Parsing '1,000GiB + 512MiB' failed: {:?}",
512            result
513        );
514        let tokens = result.unwrap();
515        assert_eq!(tokens.len(), 3);
516        assert!(matches!(
517            tokens[0],
518            Token::NumberWithUnit(1000.0, Unit::GiB)
519        ));
520        assert!(matches!(tokens[1], Token::Plus));
521        assert!(matches!(tokens[2], Token::NumberWithUnit(512.0, Unit::MiB)));
522    }
523
524    #[test]
525    fn test_edge_case_numbers() {
526        // Test zero
527        let result = parse_expression_chumsky("0");
528        assert!(result.is_ok(), "Parsing '0' failed: {:?}", result);
529        let tokens = result.unwrap();
530        assert_eq!(tokens.len(), 1);
531        assert!(matches!(tokens[0], Token::Number(0.0)));
532
533        // Test zero with unit
534        let result = parse_expression_chumsky("0 GiB");
535        assert!(result.is_ok(), "Parsing '0 GiB' failed: {:?}", result);
536        let tokens = result.unwrap();
537        assert_eq!(tokens.len(), 1);
538        assert!(matches!(tokens[0], Token::NumberWithUnit(0.0, Unit::GiB)));
539
540        // Test decimal starting with zero
541        let result = parse_expression_chumsky("0.5 MB");
542        assert!(result.is_ok(), "Parsing '0.5 MB' failed: {:?}", result);
543        let tokens = result.unwrap();
544        assert_eq!(tokens.len(), 1);
545        assert!(matches!(tokens[0], Token::NumberWithUnit(0.5, Unit::MB)));
546
547        // Test very large number
548        let result = parse_expression_chumsky("999,999,999.99 TB");
549        assert!(result.is_ok(), "Parsing large number failed: {:?}", result);
550        let tokens = result.unwrap();
551        assert_eq!(tokens.len(), 1);
552        assert!(matches!(
553            tokens[0],
554            Token::NumberWithUnit(999999999.99, Unit::TB)
555        ));
556
557        // Test very small decimal
558        let result = parse_expression_chumsky("0.000001 seconds");
559        assert!(result.is_ok(), "Parsing small decimal failed: {:?}", result);
560        let tokens = result.unwrap();
561        assert_eq!(tokens.len(), 1);
562        assert!(matches!(
563            tokens[0],
564            Token::NumberWithUnit(0.000001, Unit::Second)
565        ));
566    }
567
568    #[test]
569    fn test_all_operators() {
570        // Test all mathematical operators
571        let result = parse_expression_chumsky("1 + 2 - 3 * 4 / 5");
572        assert!(result.is_ok(), "Parsing all operators failed: {:?}", result);
573        let tokens = result.unwrap();
574        assert_eq!(tokens.len(), 9);
575        assert!(matches!(tokens[0], Token::Number(1.0)));
576        assert!(matches!(tokens[1], Token::Plus));
577        assert!(matches!(tokens[2], Token::Number(2.0)));
578        assert!(matches!(tokens[3], Token::Minus));
579        assert!(matches!(tokens[4], Token::Number(3.0)));
580        assert!(matches!(tokens[5], Token::Multiply));
581        assert!(matches!(tokens[6], Token::Number(4.0)));
582        assert!(matches!(tokens[7], Token::Divide));
583        assert!(matches!(tokens[8], Token::Number(5.0)));
584    }
585
586    #[test]
587    fn test_nested_parentheses() {
588        let result = parse_expression_chumsky("((1 + 2) * (3 - 4)) / 5");
589        assert!(
590            result.is_ok(),
591            "Parsing nested parentheses failed: {:?}",
592            result
593        );
594        let tokens = result.unwrap();
595        assert_eq!(tokens.len(), 15);
596        assert!(matches!(tokens[0], Token::LeftParen));
597        assert!(matches!(tokens[1], Token::LeftParen));
598        assert!(matches!(tokens[2], Token::Number(1.0)));
599        assert!(matches!(tokens[13], Token::Divide));
600        assert!(matches!(tokens[14], Token::Number(5.0)));
601    }
602
603    #[test]
604    fn test_multiple_line_references() {
605        let result = parse_expression_chumsky("line1 + line2 * line10");
606        assert!(
607            result.is_ok(),
608            "Parsing multiple line refs failed: {:?}",
609            result
610        );
611        let tokens = result.unwrap();
612        assert_eq!(tokens.len(), 5);
613        assert!(matches!(tokens[0], Token::LineReference(0)));
614        assert!(matches!(tokens[1], Token::Plus));
615        assert!(matches!(tokens[2], Token::LineReference(1)));
616        assert!(matches!(tokens[3], Token::Multiply));
617        assert!(matches!(tokens[4], Token::LineReference(9)));
618    }
619
620    #[test]
621    fn test_all_unit_types() {
622        // Test data units
623        let result = parse_expression_chumsky("1 B + 2 KB + 3 MB + 4 GB + 5 TB + 6 PB + 7 EB");
624        assert!(result.is_ok(), "Parsing data units failed: {:?}", result);
625
626        // Test binary data units
627        let result = parse_expression_chumsky("1 KiB + 2 MiB + 3 GiB + 4 TiB + 5 PiB + 6 EiB");
628        assert!(
629            result.is_ok(),
630            "Parsing binary data units failed: {:?}",
631            result
632        );
633
634        // Test time units
635        let result = parse_expression_chumsky("1 ns + 2 us + 3 ms + 4 s + 5 min + 6 h + 7 day");
636        assert!(result.is_ok(), "Parsing time units failed: {:?}", result);
637
638        // Test rate units
639        let result = parse_expression_chumsky("1 B/s + 2 KB/s + 3 GiB/s");
640        assert!(result.is_ok(), "Parsing rate units failed: {:?}", result);
641
642        // Test QPS units
643        let result = parse_expression_chumsky("1 QPS + 2 QPM + 3 QPH + 4 req/s");
644        assert!(result.is_ok(), "Parsing QPS units failed: {:?}", result);
645
646        // Test bit units
647        let result = parse_expression_chumsky("1 bit + 2 Kb + 3 Mb + 4 Gb");
648        assert!(result.is_ok(), "Parsing bit units failed: {:?}", result);
649    }
650
651    #[test]
652    fn test_keyword_combinations() {
653        // Test both conversion keywords
654        let result = parse_expression_chumsky("1 GiB to MB in KiB");
655        assert!(result.is_ok(), "Parsing keywords failed: {:?}", result);
656        let tokens = result.unwrap();
657        assert_eq!(tokens.len(), 5);
658        assert!(matches!(tokens[1], Token::To));
659        assert!(matches!(tokens[3], Token::In));
660
661        // Test keywords with line references
662        let result = parse_expression_chumsky("line1 to GiB");
663        assert!(
664            result.is_ok(),
665            "Parsing line ref + keyword failed: {:?}",
666            result
667        );
668
669        // Test keywords with complex expressions
670        let result = parse_expression_chumsky("(1 GiB + 512 MiB) * 2 to TB");
671        assert!(
672            result.is_ok(),
673            "Parsing complex + keyword failed: {:?}",
674            result
675        );
676    }
677
678    #[test]
679    fn test_whitespace_variations() {
680        // Test extra spaces
681        let result = parse_expression_chumsky("  1   +   2   ");
682        assert!(result.is_ok(), "Parsing extra spaces failed: {:?}", result);
683        let tokens = result.unwrap();
684        assert_eq!(tokens.len(), 3);
685
686        // Test tabs and mixed whitespace
687        let result = parse_expression_chumsky("1\t+\t2");
688        assert!(result.is_ok(), "Parsing tabs failed: {:?}", result);
689
690        // Test no spaces around operators
691        let result = parse_expression_chumsky("1+2*3");
692        assert!(result.is_ok(), "Parsing no spaces failed: {:?}", result);
693        let tokens = result.unwrap();
694        assert_eq!(tokens.len(), 5);
695    }
696
697    #[test]
698    fn test_exponentiation_parsing() {
699        // Test basic exponentiation
700        let result = parse_expression_chumsky("2^3");
701        assert!(result.is_ok(), "Parsing '2^3' failed: {:?}", result);
702        let tokens = result.unwrap();
703        assert_eq!(tokens.len(), 3);
704        assert!(matches!(tokens[0], Token::Number(2.0)));
705        assert!(matches!(tokens[1], Token::Power));
706        assert!(matches!(tokens[2], Token::Number(3.0)));
707
708        // Test with spaces
709        let result = parse_expression_chumsky("2 ^ 3");
710        assert!(
711            result.is_ok(),
712            "Parsing '2 ^ 3' with spaces failed: {:?}",
713            result
714        );
715        let tokens = result.unwrap();
716        assert_eq!(tokens.len(), 3);
717
718        // Test chained exponentiation
719        let result = parse_expression_chumsky("2^3^2");
720        assert!(result.is_ok(), "Parsing '2^3^2' failed: {:?}", result);
721        let tokens = result.unwrap();
722        assert_eq!(tokens.len(), 5);
723    }
724
725    #[test]
726    fn test_function_parsing() {
727        // Test sqrt function
728        let result = parse_expression_chumsky("sqrt(4)");
729        assert!(result.is_ok(), "Parsing 'sqrt(4)' failed: {:?}", result);
730        let tokens = result.unwrap();
731        assert_eq!(tokens.len(), 4);
732        assert!(matches!(tokens[0], Token::Function(ref name) if name == "sqrt"));
733        assert!(matches!(tokens[1], Token::LeftParen));
734        assert!(matches!(tokens[2], Token::Number(4.0)));
735        assert!(matches!(tokens[3], Token::RightParen));
736
737        // Test function with spaces
738        let result = parse_expression_chumsky("sqrt (9)");
739        assert!(
740            result.is_ok(),
741            "Parsing 'sqrt (9)' with space failed: {:?}",
742            result
743        );
744        let tokens = result.unwrap();
745        assert_eq!(tokens.len(), 4);
746        assert!(matches!(tokens[0], Token::Function(ref name) if name == "sqrt"));
747
748        // Test function in expression
749        let result = parse_expression_chumsky("2 + sqrt(16)");
750        assert!(
751            result.is_ok(),
752            "Parsing '2 + sqrt(16)' failed: {:?}",
753            result
754        );
755        let tokens = result.unwrap();
756        assert_eq!(tokens.len(), 6); // 2, +, sqrt, (, 16, )
757    }
758
759    #[test]
760    fn test_compound_units_with_spaces() {
761        // Test compound units with spaces around slash
762        let result = parse_expression_chumsky("100 MB / s");
763        assert!(
764            result.is_ok(),
765            "Parsing 'MB / s' with spaces failed: {:?}",
766            result
767        );
768        let tokens = result.unwrap();
769        assert_eq!(tokens.len(), 1);
770        assert!(matches!(
771            tokens[0],
772            Token::NumberWithUnit(100.0, Unit::RateUnit(_, _))
773        ));
774        if let Token::NumberWithUnit(_, Unit::RateUnit(ref unit1, ref unit2)) = tokens[0] {
775            assert_eq!(**unit1, Unit::MB);
776            assert_eq!(**unit2, Unit::Second);
777        }
778
779        // Test compound units without spaces (should still work)
780        let result = parse_expression_chumsky("100 MB/s");
781        assert!(
782            result.is_ok(),
783            "Parsing 'MB/s' without spaces failed: {:?}",
784            result
785        );
786        let tokens = result.unwrap();
787        assert_eq!(tokens.len(), 1);
788        assert!(matches!(
789            tokens[0],
790            Token::NumberWithUnit(100.0, Unit::RateUnit(_, _))
791        ));
792        if let Token::NumberWithUnit(_, Unit::RateUnit(ref unit1, ref unit2)) = tokens[0] {
793            assert_eq!(**unit1, Unit::MB);
794            assert_eq!(**unit2, Unit::Second);
795        }
796
797        // Test conversion with compound units with spaces
798        let result = parse_expression_chumsky("25 QPS to req / min");
799        assert!(
800            result.is_ok(),
801            "Parsing QPS conversion with spaces failed: {:?}",
802            result
803        );
804        let tokens = result.unwrap();
805        assert_eq!(tokens.len(), 3);
806        assert!(matches!(
807            tokens[0],
808            Token::NumberWithUnit(25.0, Unit::RateUnit(_, _))
809        ));
810        if let Token::NumberWithUnit(_, Unit::RateUnit(ref unit1, ref unit2)) = tokens[0] {
811            assert_eq!(**unit1, Unit::Query);
812            assert_eq!(**unit2, Unit::Second);
813        }
814        assert!(matches!(tokens[1], Token::To));
815        assert!(matches!(
816            tokens[2],
817            Token::NumberWithUnit(1.0, Unit::RateUnit(_, _))
818        ));
819        if let Token::NumberWithUnit(_, Unit::RateUnit(ref unit1, ref unit2)) = tokens[2] {
820            assert_eq!(**unit1, Unit::Request);
821            assert_eq!(**unit2, Unit::Minute);
822        }
823
824        // Test various request rate units with spaces
825        let result = parse_expression_chumsky("50 req / s + 30 requests / min");
826        assert!(
827            result.is_ok(),
828            "Parsing request rates with spaces failed: {:?}",
829            result
830        );
831        let tokens = result.unwrap();
832        assert_eq!(tokens.len(), 3);
833        assert!(matches!(
834            tokens[0],
835            Token::NumberWithUnit(50.0, Unit::RateUnit(_, _))
836        ));
837        if let Token::NumberWithUnit(_, Unit::RateUnit(ref unit1, ref unit2)) = tokens[0] {
838            assert_eq!(**unit1, Unit::Request);
839            assert_eq!(**unit2, Unit::Second);
840        }
841        assert!(matches!(tokens[1], Token::Plus));
842        assert!(matches!(
843            tokens[2],
844            Token::NumberWithUnit(30.0, Unit::RateUnit(_, _))
845        ));
846        if let Token::NumberWithUnit(_, Unit::RateUnit(ref unit1, ref unit2)) = tokens[2] {
847            assert_eq!(**unit1, Unit::Request);
848            assert_eq!(**unit2, Unit::Minute);
849        }
850    }
851
852    #[test]
853    fn test_error_cases() {
854        // Test invalid unit - now that we have variables, this parses as Number + Variable
855        // The error happens at evaluation time, not parse time
856        let result = parse_expression_chumsky("1 invalidunit");
857        assert!(result.is_ok(), "Should parse as number + variable");
858
859        // Test invalid line reference
860        let result = parse_expression_chumsky("line0");
861        assert!(
862            result.is_ok(),
863            "line0 should be valid (0-indexed internally)"
864        );
865
866        // Note: "1 +" might actually parse as just "1" in chumsky due to how we handle it
867        // The incomplete operator is handled during evaluation, not parsing
868
869        // Note: The chumsky parser might be more lenient with some syntax errors
870        // depending on how the combinators are set up
871
872        let result = parse_expression_chumsky("1 + 2)");
873        assert!(result.is_err(), "Should fail on unmatched parentheses");
874
875        // Test double operators
876        let result = parse_expression_chumsky("1 ++ 2");
877        assert!(result.is_err(), "Should fail on double operators");
878
879        // Test that malformed decimals are now parsed as separate tokens
880        let result = parse_expression_chumsky("1.2.3");
881        assert!(result.is_ok(), "Should parse as separate tokens: 1.2 and 3");
882        let tokens = result.unwrap();
883        assert_eq!(tokens.len(), 2); // Should be [Number(1.2), Number(3)]
884    }
885
886    #[test]
887    fn test_case_sensitivity() {
888        // Test case variations of units
889        let result = parse_expression_chumsky("1 gib + 2 GIB + 3 GiB");
890        assert!(result.is_ok(), "Case sensitivity test failed: {:?}", result);
891
892        // Test case variations of keywords (note: keywords are case-sensitive in chumsky parser)
893        let result = parse_expression_chumsky("1 GiB to mb");
894        assert!(result.is_ok(), "Keyword case test failed: {:?}", result);
895
896        let result = parse_expression_chumsky("1 GiB in kb");
897        assert!(result.is_ok(), "Keyword case test failed: {:?}", result);
898    }
899
900    #[test]
901    fn test_complex_real_world_expressions() {
902        // Test realistic data center calculation
903        let result = parse_expression_chumsky("(50PB + 10EB) / 1000 to TB/s");
904        assert!(
905            result.is_ok(),
906            "Complex data center calc failed: {:?}",
907            result
908        );
909
910        // Test realistic QPS calculation
911        let result = parse_expression_chumsky("(100QPS + 50req/s) * 1hour to queries");
912        assert!(result.is_ok(), "Complex QPS calc failed: {:?}", result);
913
914        // Test mixed unit types in realistic scenario
915        let result = parse_expression_chumsky("1000GiB / 10min + 500MB/s * 2h");
916        assert!(result.is_ok(), "Mixed unit calc failed: {:?}", result);
917
918        // Test line references in complex expression
919        let result = parse_expression_chumsky("(line1 + line2) * 2.5 to GiB/s");
920        assert!(result.is_ok(), "Complex line ref calc failed: {:?}", result);
921    }
922
923    #[test]
924    fn test_k_suffix_parsing() {
925        // Test basic 'k' suffix
926        let result = parse_expression_chumsky("50k");
927        assert!(result.is_ok(), "Failed to parse '50k': {:?}", result);
928        let tokens = result.unwrap();
929        assert_eq!(tokens.len(), 1);
930        if let Token::Number(val) = &tokens[0] {
931            assert_eq!(*val, 50000.0);
932        } else {
933            panic!("Expected Number token, got {:?}", tokens[0]);
934        }
935
936        // Test uppercase 'K' suffix
937        let result = parse_expression_chumsky("25K");
938        assert!(result.is_ok(), "Failed to parse '25K': {:?}", result);
939        let tokens = result.unwrap();
940        assert_eq!(tokens.len(), 1);
941        if let Token::Number(val) = &tokens[0] {
942            assert_eq!(*val, 25000.0);
943        } else {
944            panic!("Expected Number token, got {:?}", tokens[0]);
945        }
946
947        // Test decimal with 'k' suffix
948        let result = parse_expression_chumsky("3.5k");
949        assert!(result.is_ok(), "Failed to parse '3.5k': {:?}", result);
950        let tokens = result.unwrap();
951        assert_eq!(tokens.len(), 1);
952        if let Token::Number(val) = &tokens[0] {
953            assert_eq!(*val, 3500.0);
954        } else {
955            panic!("Expected Number token, got {:?}", tokens[0]);
956        }
957    }
958
959    #[test]
960    fn test_k_suffix_with_currency() {
961        // Test currency with 'k' suffix
962        let result = parse_expression_chumsky("$50k");
963        assert!(result.is_ok(), "Failed to parse '$50k': {:?}", result);
964        let tokens = result.unwrap();
965        assert_eq!(tokens.len(), 1);
966        if let Token::NumberWithUnit(val, unit) = &tokens[0] {
967            assert_eq!(*val, 50000.0);
968            assert_eq!(*unit, Unit::USD);
969        } else {
970            panic!("Expected NumberWithUnit token, got {:?}", tokens[0]);
971        }
972
973        // Test different currencies with 'k' suffix
974        let result = parse_expression_chumsky("€100K");
975        assert!(result.is_ok(), "Failed to parse '€100K': {:?}", result);
976        let tokens = result.unwrap();
977        assert_eq!(tokens.len(), 1);
978        if let Token::NumberWithUnit(val, unit) = &tokens[0] {
979            assert_eq!(*val, 100000.0);
980            assert_eq!(*unit, Unit::EUR);
981        } else {
982            panic!("Expected NumberWithUnit token, got {:?}", tokens[0]);
983        }
984    }
985
986    #[test]
987    fn test_k_suffix_with_arithmetic() {
988        // Test arithmetic with 'k' suffix numbers
989        let result = parse_expression_chumsky("50k + 25K");
990        assert!(result.is_ok(), "Failed to parse '50k + 25K': {:?}", result);
991        let tokens = result.unwrap();
992        assert_eq!(tokens.len(), 3); // Number, Operator, Number
993        if let Token::Number(val1) = &tokens[0] {
994            assert_eq!(*val1, 50000.0);
995        }
996        if let Token::Number(val2) = &tokens[2] {
997            assert_eq!(*val2, 25000.0);
998        }
999
1000        // Test with units
1001        let result = parse_expression_chumsky("100k MB");
1002        assert!(result.is_ok(), "Failed to parse '100k MB': {:?}", result);
1003        let tokens = result.unwrap();
1004        assert_eq!(tokens.len(), 1);
1005        if let Token::NumberWithUnit(val, unit) = &tokens[0] {
1006            assert_eq!(*val, 100000.0);
1007            assert_eq!(*unit, Unit::MB);
1008        } else {
1009            panic!("Expected NumberWithUnit token, got {:?}", tokens[0]);
1010        }
1011    }
1012
1013    #[test]
1014    fn test_sum_above_function_parsing() {
1015        // Test basic sum_above() parsing
1016        let result = parse_expression_chumsky("sum_above()");
1017        assert!(
1018            result.is_ok(),
1019            "Failed to parse 'sum_above()': {:?}",
1020            result
1021        );
1022        let tokens = result.unwrap();
1023        assert_eq!(tokens.len(), 3); // Function, LeftParen, RightParen
1024        if let Token::Function(func_name) = &tokens[0] {
1025            assert_eq!(func_name, "sum_above");
1026        } else {
1027            panic!("Expected Function token, got {:?}", tokens[0]);
1028        }
1029        assert!(matches!(tokens[1], Token::LeftParen));
1030        assert!(matches!(tokens[2], Token::RightParen));
1031
1032        // Test sum_above() with arithmetic
1033        let result = parse_expression_chumsky("sum_above() + 100");
1034        assert!(
1035            result.is_ok(),
1036            "Failed to parse 'sum_above() + 100': {:?}",
1037            result
1038        );
1039        let tokens = result.unwrap();
1040        assert_eq!(tokens.len(), 5); // Function, LeftParen, RightParen, Plus, Number
1041        if let Token::Function(func_name) = &tokens[0] {
1042            assert_eq!(func_name, "sum_above");
1043        } else {
1044            panic!("Expected Function token, got {:?}", tokens[0]);
1045        }
1046
1047        // Test case insensitivity
1048        let result = parse_expression_chumsky("SUM_ABOVE()");
1049        assert!(
1050            result.is_ok(),
1051            "Failed to parse 'SUM_ABOVE()': {:?}",
1052            result
1053        );
1054        let tokens = result.unwrap();
1055        if let Token::Function(func_name) = &tokens[0] {
1056            assert_eq!(func_name, "SUM_ABOVE");
1057        } else {
1058            panic!("Expected Function token, got {:?}", tokens[0]);
1059        }
1060    }
1061}