Skip to main content

lemma/parsing/
literals.rs

1use super::Rule;
2use crate::error::LemmaError;
3use crate::parsing::ast::Span;
4use crate::parsing::ast::*;
5use crate::Source;
6
7use chrono::{Datelike, Timelike};
8use pest::iterators::Pair;
9use rust_decimal::Decimal;
10use std::str::FromStr;
11use std::sync::Arc;
12
13pub(crate) fn parse_literal(
14    pair: Pair<Rule>,
15    attribute: &str,
16    doc_name: &str,
17    source_text: Arc<str>,
18) -> Result<Value, LemmaError> {
19    match pair.as_rule() {
20        Rule::number_literal => {
21            parse_number_literal(pair, attribute, doc_name, source_text.clone())
22        }
23        Rule::number_unit_literal => {
24            let (n, u) = parse_number_unit_literal(pair, attribute, doc_name, source_text.clone())?;
25            Ok(Value::Scale(n, u))
26        }
27        Rule::text_literal => parse_string_literal(pair),
28        Rule::boolean_literal => {
29            parse_boolean_literal(pair, attribute, doc_name, source_text.clone())
30        }
31        Rule::percent_literal => {
32            parse_percent_literal(pair, attribute, doc_name, source_text.clone())
33        }
34        Rule::permille_literal => {
35            parse_permille_literal(pair, attribute, doc_name, source_text.clone())
36        }
37        Rule::date_time_literal => {
38            parse_datetime_literal(pair, attribute, doc_name, source_text.clone())
39        }
40        Rule::time_literal => parse_time_literal(pair, attribute, doc_name, source_text.clone()),
41        Rule::duration_literal => {
42            let s = pair.as_str();
43            let source = Source::new(
44                attribute,
45                Span::from_pest_span(pair.as_span()),
46                doc_name,
47                source_text.clone(),
48            );
49            parse_duration_from_string(s, &source)
50        }
51        _ => Err(LemmaError::engine(
52            format!("Unsupported literal type: {:?}", pair.as_rule()),
53            Some(Source::new(
54                attribute,
55                Span::from_pest_span(pair.as_span()),
56                doc_name,
57                source_text.clone(),
58            )),
59            None::<String>,
60        )),
61    }
62}
63
64fn parse_number_literal(
65    pair: Pair<Rule>,
66    attribute: &str,
67    doc_name: &str,
68    source_text: Arc<str>,
69) -> Result<Value, LemmaError> {
70    let pair_str = pair.as_str();
71    let span = Span::from_pest_span(pair.as_span());
72    let mut inner = pair.into_inner();
73
74    let number = match inner.next() {
75        Some(inner_pair) => match inner_pair.as_rule() {
76            Rule::scientific_number => {
77                parse_scientific_number(inner_pair, attribute, doc_name, source_text.clone())?
78            }
79            Rule::decimal_number => {
80                let inner_span = Span::from_pest_span(inner_pair.as_span());
81                parse_decimal_number(
82                    inner_pair.as_str(),
83                    inner_span,
84                    attribute,
85                    doc_name,
86                    source_text.clone(),
87                )?
88            }
89            _ => {
90                return Err(LemmaError::engine(
91                    "Unexpected number literal structure",
92                    Some(Source::new(attribute, span, doc_name, source_text.clone())),
93                    None::<String>,
94                ));
95            }
96        },
97        None => parse_decimal_number(
98            pair_str,
99            span.clone(),
100            attribute,
101            doc_name,
102            source_text.clone(),
103        )?,
104    };
105
106    Ok(Value::Number(number))
107}
108
109fn parse_string_literal(pair: Pair<Rule>) -> Result<Value, LemmaError> {
110    let content = pair.as_str();
111    let unquoted = &content[1..content.len() - 1];
112    Ok(Value::Text(unquoted.to_string()))
113}
114
115fn parse_boolean_literal(
116    pair: Pair<Rule>,
117    attribute: &str,
118    doc_name: &str,
119    source_text: Arc<str>,
120) -> Result<Value, LemmaError> {
121    use crate::BooleanValue;
122
123    let boolean_value = match pair.as_str() {
124        "true" => BooleanValue::True,
125        "false" => BooleanValue::False,
126        "yes" => BooleanValue::Yes,
127        "no" => BooleanValue::No,
128        "accept" => BooleanValue::Accept,
129        "reject" => BooleanValue::Reject,
130        _ => {
131            let span = Span::from_pest_span(pair.as_span());
132            return Err(LemmaError::engine(
133                format!("Invalid boolean: '{}'\n             Expected one of: true, false, yes, no, accept, reject", pair.as_str()),
134                Some(Source::new(attribute, span, doc_name, source_text.clone())),
135                None::<String>,
136            ));
137        }
138    };
139
140    Ok(Value::Boolean(boolean_value))
141}
142
143fn parse_percent_literal(
144    pair: Pair<Rule>,
145    attribute: &str,
146    doc_name: &str,
147    source_text: Arc<str>,
148) -> Result<Value, LemmaError> {
149    let pair_span = Span::from_pest_span(pair.as_span());
150    for inner_pair in pair.into_inner() {
151        if inner_pair.as_rule() == Rule::number_literal {
152            let inner_span = Span::from_pest_span(inner_pair.as_span());
153            let percentage_value =
154                parse_number_literal(inner_pair, attribute, doc_name, source_text.clone())?;
155            match &percentage_value {
156                Value::Number(n) => {
157                    // Convert percent (50) to ratio (0.50) for storage
158                    use rust_decimal::Decimal;
159                    let ratio_value = *n / Decimal::from(100);
160                    return Ok(Value::Ratio(ratio_value, Some("percent".to_string())));
161                }
162                _ => {
163                    return Err(LemmaError::engine(
164                        "Expected number in percent literal",
165                        Some(Source::new(
166                            attribute,
167                            inner_span,
168                            doc_name,
169                            source_text.clone(),
170                        )),
171                        None::<String>,
172                    ));
173                }
174            }
175        }
176    }
177    Err(LemmaError::engine(
178        "Invalid percent literal: missing number",
179        Some(Source::new(
180            attribute,
181            pair_span,
182            doc_name,
183            source_text.clone(),
184        )),
185        None::<String>,
186    ))
187}
188
189fn parse_permille_literal(
190    pair: Pair<Rule>,
191    attribute: &str,
192    doc_name: &str,
193    source_text: Arc<str>,
194) -> Result<Value, LemmaError> {
195    let pair_span = Span::from_pest_span(pair.as_span());
196    for inner_pair in pair.into_inner() {
197        if inner_pair.as_rule() == Rule::number_literal {
198            let inner_span = Span::from_pest_span(inner_pair.as_span());
199            let permille_value =
200                parse_number_literal(inner_pair, attribute, doc_name, source_text.clone())?;
201            match &permille_value {
202                Value::Number(n) => {
203                    // Convert permille (5) to ratio (0.005) for storage
204                    use rust_decimal::Decimal;
205                    let ratio_value = *n / Decimal::from(1000);
206                    return Ok(Value::Ratio(ratio_value, Some("permille".to_string())));
207                }
208                _ => {
209                    return Err(LemmaError::engine(
210                        "Expected number in permille literal",
211                        Some(Source::new(
212                            attribute,
213                            inner_span,
214                            doc_name,
215                            source_text.clone(),
216                        )),
217                        None::<String>,
218                    ));
219                }
220            }
221        }
222    }
223    Err(LemmaError::engine(
224        "Invalid permille literal: missing number",
225        Some(Source::new(
226            attribute,
227            pair_span,
228            doc_name,
229            source_text.clone(),
230        )),
231        None::<String>,
232    ))
233}
234
235/// Parse a duration string (e.g. "10 hours", "120 hours") into Value::Duration.
236/// Single implementation for both Lemma source (via parse_literal) and runtime fact values.
237pub(crate) fn parse_duration_from_string(
238    value_str: &str,
239    source: &Source,
240) -> Result<Value, LemmaError> {
241    let trimmed = value_str.trim();
242    let mut parts: Vec<&str> = trimmed.split_whitespace().collect();
243    if parts.len() < 2 {
244        return Err(LemmaError::engine(
245            format!(
246                "Invalid duration: '{}'. Expected format: <number> <unit> (e.g. 10 hours, 2 weeks)",
247                value_str
248            ),
249            Some(source.clone()),
250            None::<String>,
251        ));
252    }
253    let unit_str = parts.pop().unwrap();
254    let number_str = parts.join(" ").replace(['_', ','], "");
255    let n = Decimal::from_str(&number_str).map_err(|_| {
256        LemmaError::engine(
257            format!("Invalid duration number: '{}'", number_str),
258            Some(source.clone()),
259            None::<String>,
260        )
261    })?;
262    let unit_lower = unit_str.to_lowercase();
263    let unit = match unit_lower.as_str() {
264        "year" | "years" => DurationUnit::Year,
265        "month" | "months" => DurationUnit::Month,
266        "week" | "weeks" => DurationUnit::Week,
267        "day" | "days" => DurationUnit::Day,
268        "hour" | "hours" => DurationUnit::Hour,
269        "minute" | "minutes" => DurationUnit::Minute,
270        "second" | "seconds" => DurationUnit::Second,
271        "millisecond" | "milliseconds" => DurationUnit::Millisecond,
272        "microsecond" | "microseconds" => DurationUnit::Microsecond,
273        _ => {
274            return Err(LemmaError::engine(
275                format!(
276                    "Unknown duration unit: '{}'. Expected one of: years, months, weeks, days, hours, minutes, seconds, milliseconds, microseconds",
277                    unit_str
278                ),
279                Some(source.clone()),
280                None::<String>,
281            ));
282        }
283    };
284    Ok(Value::Duration(n, unit))
285}
286
287/// Parse a "number unit" string (e.g. "1 eur", "50 percent", "500 permille") into `(number, unit_name)`.
288/// Does not validate the unit against any type; use `ScaleUnits::get()` or `RatioUnits::get()` for that.
289/// Single canonical implementation used by both AST (Pest) and runtime string parsing for scale and ratio.
290pub(crate) fn parse_number_unit_string(s: &str) -> Result<(Decimal, String), String> {
291    let trimmed = s.trim();
292    let mut parts = trimmed.split_whitespace();
293    let number_part = parts.next().ok_or_else(|| {
294        if trimmed.is_empty() {
295            "Scale value cannot be empty. Use a number followed by a unit (e.g. '10 eur')."
296                .to_string()
297        } else {
298            format!(
299                "Invalid scale value: '{}'. Scale value must be a number followed by a unit (e.g. '10 eur').",
300                s
301            )
302        }
303    })?;
304    let unit_part = parts.next().ok_or_else(|| {
305        format!(
306            "Scale value must include a unit (e.g. '{} eur').",
307            number_part
308        )
309    })?;
310    let clean = number_part.replace(['_', ','], "");
311    let n = Decimal::from_str(&clean).map_err(|_| format!("Invalid scale: '{}'", s))?;
312    Ok((n, unit_part.to_string()))
313}
314
315/// Parse a number+unit literal from AST (e.g. fact value "1 eur" in source).
316/// Uses the same logic as `parse_scale_number_unit_string`; only the source (pair.as_str()) comes from Pest.
317pub(crate) fn parse_number_unit_literal(
318    pair: Pair<Rule>,
319    attribute: &str,
320    doc_name: &str,
321    source_text: Arc<str>,
322) -> Result<(Decimal, String), LemmaError> {
323    let s = pair.as_str();
324    let span = Span::from_pest_span(pair.as_span());
325    parse_number_unit_string(s).map_err(|msg| {
326        LemmaError::engine(
327            msg,
328            Some(Source::new(attribute, span, doc_name, source_text.clone())),
329            None::<String>,
330        )
331    })
332}
333
334fn parse_datetime_literal(
335    pair: Pair<Rule>,
336    attribute: &str,
337    doc_name: &str,
338    source_text: Arc<str>,
339) -> Result<Value, LemmaError> {
340    let datetime_str = pair.as_str();
341
342    if let Ok(dt) = datetime_str.parse::<chrono::DateTime<chrono::FixedOffset>>() {
343        let offset = dt.offset().local_minus_utc();
344        return Ok(Value::Date(DateTimeValue {
345            year: dt.year(),
346            month: dt.month(),
347            day: dt.day(),
348            hour: dt.hour(),
349            minute: dt.minute(),
350            second: dt.second(),
351            timezone: Some(TimezoneValue {
352                offset_hours: (offset / 3600) as i8,
353                offset_minutes: ((offset % 3600) / 60) as u8,
354            }),
355        }));
356    }
357
358    if let Ok(dt) = datetime_str.parse::<chrono::NaiveDateTime>() {
359        return Ok(Value::Date(DateTimeValue {
360            year: dt.year(),
361            month: dt.month(),
362            day: dt.day(),
363            hour: dt.hour(),
364            minute: dt.minute(),
365            second: dt.second(),
366            timezone: None,
367        }));
368    }
369
370    if let Ok(d) = datetime_str.parse::<chrono::NaiveDate>() {
371        return Ok(Value::Date(DateTimeValue {
372            year: d.year(),
373            month: d.month(),
374            day: d.day(),
375            hour: 0,
376            minute: 0,
377            second: 0,
378            timezone: None,
379        }));
380    }
381
382    Err(LemmaError::engine(
383        format!("Invalid date/time format: '{}'\n         Expected one of:\n         - Date: YYYY-MM-DD (e.g., 2024-01-15)\n         - DateTime: YYYY-MM-DDTHH:MM:SS (e.g., 2024-01-15T14:30:00)\n         - With timezone: YYYY-MM-DDTHH:MM:SSZ or +HH:MM (e.g., 2024-01-15T14:30:00Z)\n         Note: Month must be 1-12, day must be valid for the month (no Feb 30), hours 0-23, minutes/seconds 0-59", datetime_str),
384        Some(Source::new(
385            attribute,
386            Span::from_pest_span(pair.as_span()),
387            doc_name,
388            source_text.clone(),
389        )),
390        None::<String>,
391    ))
392}
393
394fn parse_time_literal(
395    pair: Pair<Rule>,
396    attribute: &str,
397    doc_name: &str,
398    source_text: Arc<str>,
399) -> Result<Value, LemmaError> {
400    let time_str = pair.as_str();
401
402    if let Ok(t) = time_str.parse::<chrono::DateTime<chrono::FixedOffset>>() {
403        let offset = t.offset().local_minus_utc();
404        return Ok(Value::Time(TimeValue {
405            hour: t.hour() as u8,
406            minute: t.minute() as u8,
407            second: t.second() as u8,
408            timezone: Some(TimezoneValue {
409                offset_hours: (offset / 3600) as i8,
410                offset_minutes: ((offset % 3600) / 60) as u8,
411            }),
412        }));
413    }
414
415    if let Ok(t) = time_str.parse::<chrono::NaiveTime>() {
416        return Ok(Value::Time(TimeValue {
417            hour: t.hour() as u8,
418            minute: t.minute() as u8,
419            second: t.second() as u8,
420            timezone: None,
421        }));
422    }
423
424    Err(LemmaError::engine(
425        format!("Invalid time format: '{}'\n         Expected: HH:MM or HH:MM:SS (e.g., 14:30 or 14:30:00)\n         With timezone: HH:MM:SSZ or +HH:MM (e.g., 14:30:00Z or 14:30:00+01:00)\n         Note: Hours must be 0-23, minutes and seconds must be 0-59", time_str),
426        Some(Source::new(
427            attribute,
428            Span::from_pest_span(pair.as_span()),
429            doc_name,
430            source_text.clone(),
431        )),
432        None::<String>,
433    ))
434}
435
436const MAX_DECIMAL_EXPONENT: i32 = 28;
437
438fn parse_scientific_number(
439    pair: Pair<Rule>,
440    attribute: &str,
441    doc_name: &str,
442    source_text: Arc<str>,
443) -> Result<Decimal, LemmaError> {
444    let span = Span::from_pest_span(pair.as_span());
445    let mut inner = pair.into_inner();
446
447    let mantissa_pair = inner.next().ok_or_else(|| {
448        LemmaError::engine(
449            "Missing mantissa in scientific notation",
450            Some(Source::new(
451                attribute,
452                span.clone(),
453                doc_name,
454                source_text.clone(),
455            )),
456            None::<String>,
457        )
458    })?;
459    let exponent_pair = inner.next().ok_or_else(|| {
460        LemmaError::engine(
461            "Missing exponent in scientific notation",
462            Some(Source::new(
463                attribute,
464                span.clone(),
465                doc_name,
466                source_text.clone(),
467            )),
468            None::<String>,
469        )
470    })?;
471
472    let mantissa = parse_decimal_number(
473        mantissa_pair.as_str(),
474        Span::from_pest_span(mantissa_pair.as_span()),
475        attribute,
476        doc_name,
477        source_text.clone(),
478    )?;
479    let exponent_span = Span::from_pest_span(exponent_pair.as_span());
480    let exponent: i32 = exponent_pair.as_str().parse().map_err(|_| {
481        LemmaError::engine(
482            format!(
483                "Invalid exponent: '{}'\n             Expected an integer between -{} and +{}",
484                exponent_pair.as_str(),
485                MAX_DECIMAL_EXPONENT,
486                MAX_DECIMAL_EXPONENT
487            ),
488            Some(Source::new(
489                attribute,
490                exponent_span.clone(),
491                doc_name,
492                source_text.clone(),
493            )),
494            None::<String>,
495        )
496    })?;
497
498    let power_of_ten = decimal_pow10(exponent).ok_or_else(|| {
499        LemmaError::engine(
500            format!("Exponent {} is out of range\n             Maximum supported exponent is ±{} (values up to ~10^28)", exponent, MAX_DECIMAL_EXPONENT),
501            Some(Source::new(
502                attribute,
503                exponent_span,
504                doc_name,
505                source_text.clone(),
506            )),
507            None::<String>,
508        )
509    })?;
510
511    if exponent >= 0 {
512        mantissa.checked_mul(power_of_ten).ok_or_else(|| {
513            LemmaError::engine(
514                format!(
515                    "Number overflow: result of {}e{} exceeds maximum value (~10^28)",
516                    mantissa, exponent
517                ),
518                Some(Source::new(
519                    attribute,
520                    span.clone(),
521                    doc_name,
522                    source_text.clone(),
523                )),
524                None::<String>,
525            )
526        })
527    } else {
528        mantissa.checked_div(power_of_ten).ok_or_else(|| {
529            LemmaError::engine(
530                format!(
531                    "Precision error: result of {}e{} has too many decimal places (max 28)",
532                    mantissa, exponent
533                ),
534                Some(Source::new(attribute, span, doc_name, source_text.clone())),
535                None::<String>,
536            )
537        })
538    }
539}
540
541fn decimal_pow10(exp: i32) -> Option<Decimal> {
542    let abs_exp = exp.abs();
543    if abs_exp > MAX_DECIMAL_EXPONENT {
544        return None;
545    }
546
547    let mut result = Decimal::ONE;
548    let ten = Decimal::from(10);
549
550    for _ in 0..abs_exp {
551        result = result.checked_mul(ten)?;
552    }
553
554    Some(result)
555}
556
557fn parse_decimal_number(
558    number_str: &str,
559    span: Span,
560    attribute: &str,
561    doc_name: &str,
562    source_text: Arc<str>,
563) -> Result<Decimal, LemmaError> {
564    let clean_number = number_str.replace(['_', ','], "");
565    Decimal::from_str(&clean_number).map_err(|_| {
566        LemmaError::engine(
567            format!("Invalid number: '{}'\n             Expected a valid decimal number (e.g., 42, 3.14, 1_000_000, 1,000,000)\n             Note: Use underscores or commas as thousand separators if needed", number_str),
568            Some(Source::new(attribute, span, doc_name, source_text)),
569            None::<String>,
570        )
571    })
572}
573
574// ============================================================================
575// String parsing helpers (for type constraint parsing)
576// ============================================================================
577
578/// Parse a date string into a DateTimeValue (for type constraint parsing)
579pub fn parse_date_string(s: &str) -> Result<DateTimeValue, String> {
580    use chrono::{Datelike, Timelike};
581
582    if let Ok(dt) = s.parse::<chrono::DateTime<chrono::FixedOffset>>() {
583        let offset = dt.offset().local_minus_utc();
584        return Ok(DateTimeValue {
585            year: dt.year(),
586            month: dt.month(),
587            day: dt.day(),
588            hour: dt.hour(),
589            minute: dt.minute(),
590            second: dt.second(),
591            timezone: Some(TimezoneValue {
592                offset_hours: (offset / 3600) as i8,
593                offset_minutes: ((offset % 3600) / 60) as u8,
594            }),
595        });
596    }
597
598    if let Ok(dt) = s.parse::<chrono::NaiveDateTime>() {
599        return Ok(DateTimeValue {
600            year: dt.year(),
601            month: dt.month(),
602            day: dt.day(),
603            hour: dt.hour(),
604            minute: dt.minute(),
605            second: dt.second(),
606            timezone: None,
607        });
608    }
609
610    if let Ok(d) = s.parse::<chrono::NaiveDate>() {
611        return Ok(DateTimeValue {
612            year: d.year(),
613            month: d.month(),
614            day: d.day(),
615            hour: 0,
616            minute: 0,
617            second: 0,
618            timezone: None,
619        });
620    }
621
622    Err(format!("Invalid date format: '{}'", s))
623}
624
625/// Parse a time string into a TimeValue (for type constraint parsing)
626pub fn parse_time_string(s: &str) -> Result<TimeValue, String> {
627    use chrono::Timelike;
628
629    if let Ok(t) = s.parse::<chrono::DateTime<chrono::FixedOffset>>() {
630        let offset = t.offset().local_minus_utc();
631        return Ok(TimeValue {
632            hour: t.hour() as u8,
633            minute: t.minute() as u8,
634            second: t.second() as u8,
635            timezone: Some(TimezoneValue {
636                offset_hours: (offset / 3600) as i8,
637                offset_minutes: ((offset % 3600) / 60) as u8,
638            }),
639        });
640    }
641
642    if let Ok(t) = s.parse::<chrono::NaiveTime>() {
643        return Ok(TimeValue {
644            hour: t.hour() as u8,
645            minute: t.minute() as u8,
646            second: t.second() as u8,
647            timezone: None,
648        });
649    }
650
651    Err(format!("Invalid time format: '{}'", s))
652}
653
654// ============================================================================
655// Tests
656// ============================================================================
657
658#[cfg(test)]
659mod tests {
660    use crate::parsing::parse;
661    use crate::ResourceLimits;
662
663    #[test]
664    fn parse_rejects_percent_literal_with_trailing_digits() {
665        // Guard against tokenization bugs around percent literals.
666        // The grammar comment says '%' must be directly followed by a non-digit or EOI.
667        let input = r#"doc test
668fact x = 10%5"#;
669        let result = parse(input, "test.lemma", &ResourceLimits::default());
670        assert!(
671            result.is_err(),
672            "Percent literals like `10%5` must be rejected"
673        );
674    }
675
676    #[test]
677    fn parse_permille_double_percent_syntax() {
678        use crate::parsing::ast::Value;
679        use rust_decimal::Decimal;
680        use std::str::FromStr;
681
682        let input = "doc test\nrule x = 5%%";
683        let docs = parse(input, "test.lemma", &ResourceLimits::default()).unwrap();
684        let rule = &docs[0].rules[0];
685        match &rule.expression.kind {
686            crate::parsing::ast::ExpressionKind::Literal(Value::Ratio(n, Some(unit))) => {
687                assert_eq!(*n, Decimal::from_str("0.005").unwrap());
688                assert_eq!(unit, "permille");
689            }
690            other => panic!("Expected Ratio permille literal, got {:?}", other),
691        }
692    }
693
694    #[test]
695    fn parse_permille_word_syntax() {
696        use crate::parsing::ast::Value;
697        use rust_decimal::Decimal;
698        use std::str::FromStr;
699
700        let input = "doc test\nrule x = 5 permille";
701        let docs = parse(input, "test.lemma", &ResourceLimits::default()).unwrap();
702        let rule = &docs[0].rules[0];
703        match &rule.expression.kind {
704            crate::parsing::ast::ExpressionKind::Literal(Value::Ratio(n, Some(unit))) => {
705                assert_eq!(*n, Decimal::from_str("0.005").unwrap());
706                assert_eq!(unit, "permille");
707            }
708            other => panic!("Expected Ratio permille literal, got {:?}", other),
709        }
710    }
711
712    #[test]
713    fn parse_rejects_permille_literal_with_trailing_digits() {
714        let input = "doc test\nfact x = 10%%5";
715        let result = parse(input, "test.lemma", &ResourceLimits::default());
716        assert!(
717            result.is_err(),
718            "Permille literals like `10%%5` must be rejected"
719        );
720    }
721}