lemma/parser/
literals.rs

1use crate::error::LemmaError;
2use crate::parser::Rule;
3use crate::semantic::*;
4
5use chrono::{Datelike, Timelike};
6use pest::iterators::Pair;
7use regex;
8use rust_decimal::Decimal;
9use std::str::FromStr;
10
11pub(crate) fn parse_literal(pair: Pair<Rule>) -> Result<LiteralValue, LemmaError> {
12    match pair.as_rule() {
13        Rule::number_literal => parse_number_literal(pair),
14        Rule::string_literal => parse_string_literal(pair),
15        Rule::boolean_literal => parse_boolean_literal(pair),
16        Rule::percentage_literal => parse_percentage_literal(pair),
17        Rule::regex_literal => parse_regex_literal(pair),
18        Rule::date_time_literal => parse_datetime_literal(pair),
19        Rule::time_literal => parse_time_literal(pair),
20        Rule::unit_literal => parse_unit_literal(pair),
21        _ => Err(LemmaError::Engine(format!(
22            "Unsupported literal type: {:?}",
23            pair.as_rule()
24        ))),
25    }
26}
27
28fn parse_number_literal(pair: Pair<Rule>) -> Result<LiteralValue, LemmaError> {
29    let pair_str = pair.as_str();
30    let mut inner = pair.into_inner();
31
32    let number = match inner.next() {
33        Some(inner_pair) => match inner_pair.as_rule() {
34            Rule::scientific_number => parse_scientific_number(inner_pair)?,
35            Rule::decimal_number => parse_decimal_number(inner_pair.as_str())?,
36            _ => {
37                return Err(LemmaError::Engine(
38                    "Unexpected number literal structure".to_string(),
39                ))
40            }
41        },
42        None => parse_decimal_number(pair_str)?,
43    };
44
45    Ok(LiteralValue::Number(number))
46}
47
48fn parse_string_literal(pair: Pair<Rule>) -> Result<LiteralValue, LemmaError> {
49    let content = pair.as_str();
50    let unquoted = &content[1..content.len() - 1];
51    Ok(LiteralValue::Text(unquoted.to_string()))
52}
53
54/// Parse boolean literals.
55/// Accepts: true, false, yes, no, accept, reject (case-sensitive)
56fn parse_boolean_literal(pair: Pair<Rule>) -> Result<LiteralValue, LemmaError> {
57    let boolean = match pair.as_str() {
58        "true" | "yes" | "accept" => true,
59        "false" | "no" | "reject" => false,
60        _ => {
61            return Err(LemmaError::Engine(format!(
62                "Invalid boolean: '{}'\n\
63             Expected one of: true, false, yes, no, accept, reject",
64                pair.as_str()
65            )))
66        }
67    };
68    Ok(LiteralValue::Boolean(boolean))
69}
70
71fn parse_percentage_literal(pair: Pair<Rule>) -> Result<LiteralValue, LemmaError> {
72    for inner_pair in pair.into_inner() {
73        if inner_pair.as_rule() == Rule::number_literal {
74            let percentage = parse_number_literal(inner_pair)?;
75            match percentage {
76                LiteralValue::Number(n) => return Ok(LiteralValue::Percentage(n)),
77                _ => {
78                    return Err(LemmaError::Engine(
79                        "Expected number in percentage literal".to_string(),
80                    ))
81                }
82            }
83        }
84    }
85    Err(LemmaError::Engine(
86        "Invalid percentage literal: missing number".to_string(),
87    ))
88}
89
90/// Parse regex literals enclosed in forward slashes (e.g., /pattern/)
91/// Validates that the pattern is a valid regular expression
92fn parse_regex_literal(pair: Pair<Rule>) -> Result<LiteralValue, LemmaError> {
93    let regex_str = pair.as_str().to_string();
94    let mut pattern_parts = Vec::new();
95    for inner_pair in pair.into_inner() {
96        if inner_pair.as_rule() == Rule::regex_char {
97            pattern_parts.push(inner_pair.as_str());
98        }
99    }
100    let pattern = pattern_parts.join("");
101    match regex::Regex::new(&pattern) {
102        Ok(_) => Ok(LiteralValue::Regex(regex_str)),
103        Err(e) => Err(LemmaError::Engine(format!(
104            "Invalid regex pattern in '{}': {}\n\
105             Note: Use /pattern/ syntax, escape forward slashes as \\/",
106            regex_str, e
107        ))),
108    }
109}
110
111// Complex Literals
112
113fn parse_unit_literal(pair: Pair<Rule>) -> Result<LiteralValue, LemmaError> {
114    let mut number = None;
115    let mut unit_str = None;
116
117    for inner_pair in pair.into_inner() {
118        match inner_pair.as_rule() {
119            Rule::number_literal => {
120                let lit = parse_number_literal(inner_pair)?;
121                match lit {
122                    LiteralValue::Number(n) => number = Some(n),
123                    _ => {
124                        return Err(LemmaError::Engine(
125                            "Expected number in unit literal".to_string(),
126                        ))
127                    }
128                }
129            }
130            Rule::unit_word => {
131                unit_str = Some(inner_pair.as_str());
132            }
133            _ => {}
134        }
135    }
136
137    let value =
138        number.ok_or_else(|| LemmaError::Engine("Missing number in unit literal".to_string()))?;
139    let unit =
140        unit_str.ok_or_else(|| LemmaError::Engine("Missing unit in unit literal".to_string()))?;
141
142    // Resolve the unit string to a LiteralValue
143    super::units::resolve_unit(value, unit)
144}
145
146/// Parse date/time literals with comprehensive error messages.
147/// Supports formats:
148/// - Date only: YYYY-MM-DD (e.g., 2024-01-15)
149/// - DateTime: YYYY-MM-DDTHH:MM:SS (e.g., 2024-01-15T14:30:00)
150/// - With timezone: YYYY-MM-DDTHH:MM:SSZ or YYYY-MM-DDTHH:MM:SS+HH:MM
151fn parse_datetime_literal(pair: Pair<Rule>) -> Result<LiteralValue, LemmaError> {
152    let datetime_str = pair.as_str();
153
154    // Try datetime with timezone first
155    if let Ok(dt) = datetime_str.parse::<chrono::DateTime<chrono::FixedOffset>>() {
156        let offset = dt.offset().local_minus_utc();
157        return Ok(LiteralValue::Date(DateTimeValue {
158            year: dt.year(),
159            month: dt.month(),
160            day: dt.day(),
161            hour: dt.hour(),
162            minute: dt.minute(),
163            second: dt.second(),
164            timezone: Some(TimezoneValue {
165                offset_hours: (offset / 3600) as i8,
166                offset_minutes: ((offset % 3600) / 60) as u8,
167            }),
168        }));
169    }
170
171    // Try datetime without timezone
172    if let Ok(dt) = datetime_str.parse::<chrono::NaiveDateTime>() {
173        return Ok(LiteralValue::Date(DateTimeValue {
174            year: dt.year(),
175            month: dt.month(),
176            day: dt.day(),
177            hour: dt.hour(),
178            minute: dt.minute(),
179            second: dt.second(),
180            timezone: None,
181        }));
182    }
183
184    // Try date only
185    if let Ok(d) = datetime_str.parse::<chrono::NaiveDate>() {
186        return Ok(LiteralValue::Date(DateTimeValue {
187            year: d.year(),
188            month: d.month(),
189            day: d.day(),
190            hour: 0,
191            minute: 0,
192            second: 0,
193            timezone: None,
194        }));
195    }
196
197    // Provide helpful error message
198    Err(LemmaError::Engine(format!(
199        "Invalid date/time format: '{}'\n\
200         Expected one of:\n\
201         - Date: YYYY-MM-DD (e.g., 2024-01-15)\n\
202         - DateTime: YYYY-MM-DDTHH:MM:SS (e.g., 2024-01-15T14:30:00)\n\
203         - With timezone: YYYY-MM-DDTHH:MM:SSZ or +HH:MM (e.g., 2024-01-15T14:30:00Z)\n\
204         Note: Month must be 1-12, day must be valid for the month (no Feb 30), hours 0-23, minutes/seconds 0-59",
205        datetime_str
206    )))
207}
208
209/// Parse time literals with comprehensive error messages.
210/// Supports formats:
211/// - Time: HH:MM or HH:MM:SS (e.g., 14:30 or 14:30:00)
212/// - With timezone: HH:MM:SSZ or HH:MM:SS+HH:MM
213fn parse_time_literal(pair: Pair<Rule>) -> Result<LiteralValue, LemmaError> {
214    let time_str = pair.as_str();
215
216    // Try time with timezone first
217    if let Ok(t) = time_str.parse::<chrono::DateTime<chrono::FixedOffset>>() {
218        let offset = t.offset().local_minus_utc();
219        return Ok(LiteralValue::Time(TimeValue {
220            hour: t.hour() as u8,
221            minute: t.minute() as u8,
222            second: t.second() as u8,
223            timezone: Some(TimezoneValue {
224                offset_hours: (offset / 3600) as i8,
225                offset_minutes: ((offset % 3600) / 60) as u8,
226            }),
227        }));
228    }
229
230    // Try time without timezone
231    if let Ok(t) = time_str.parse::<chrono::NaiveTime>() {
232        return Ok(LiteralValue::Time(TimeValue {
233            hour: t.hour() as u8,
234            minute: t.minute() as u8,
235            second: t.second() as u8,
236            timezone: None,
237        }));
238    }
239
240    // Provide helpful error message
241    Err(LemmaError::Engine(format!(
242        "Invalid time format: '{}'\n\
243         Expected: HH:MM or HH:MM:SS (e.g., 14:30 or 14:30:00)\n\
244         With timezone: HH:MM:SSZ or +HH:MM (e.g., 14:30:00Z or 14:30:00+01:00)\n\
245         Note: Hours must be 0-23, minutes and seconds must be 0-59",
246        time_str
247    )))
248}
249
250// rust_decimal limits: max value ~10^28 (fits in 96 bits), max scale 28 decimal places
251// This means we can safely handle exponents from -28 to +28
252const MAX_DECIMAL_EXPONENT: i32 = 28;
253
254/// Parse scientific notation numbers (e.g., 1.23e+5, 5.67E-3, 1e10).
255/// Converts mantissa * 10^exponent to a Decimal value.
256fn parse_scientific_number(pair: Pair<Rule>) -> Result<Decimal, LemmaError> {
257    let mut inner = pair.into_inner();
258
259    let mantissa_pair = inner
260        .next()
261        .ok_or_else(|| LemmaError::Engine("Missing mantissa in scientific notation".to_string()))?;
262    let exponent_pair = inner
263        .next()
264        .ok_or_else(|| LemmaError::Engine("Missing exponent in scientific notation".to_string()))?;
265
266    let mantissa = parse_decimal_number(mantissa_pair.as_str())?;
267    let exponent: i32 = exponent_pair.as_str().parse().map_err(|_| {
268        LemmaError::Engine(format!(
269            "Invalid exponent: '{}'\n\
270             Expected an integer between -{} and +{}",
271            exponent_pair.as_str(),
272            MAX_DECIMAL_EXPONENT,
273            MAX_DECIMAL_EXPONENT
274        ))
275    })?;
276
277    let power_of_ten = decimal_pow10(exponent).ok_or_else(|| {
278        LemmaError::Engine(format!(
279            "Exponent {} is out of range\n\
280             Maximum supported exponent is ±{} (values up to ~10^28)",
281            exponent, MAX_DECIMAL_EXPONENT
282        ))
283    })?;
284
285    // For positive exponents, multiply (1e3 = 1000)
286    // For negative exponents, divide (1e-3 = 0.001)
287    if exponent >= 0 {
288        mantissa.checked_mul(power_of_ten).ok_or_else(|| {
289            LemmaError::Engine(format!(
290                "Number overflow: result of {}e{} exceeds maximum value (~10^28)",
291                mantissa, exponent
292            ))
293        })
294    } else {
295        mantissa.checked_div(power_of_ten).ok_or_else(|| {
296            LemmaError::Engine(format!(
297                "Precision error: result of {}e{} has too many decimal places (max 28)",
298                mantissa, exponent
299            ))
300        })
301    }
302}
303
304/// Calculate 10^exp as a Decimal value
305/// Returns None if the exponent exceeds Decimal's precision limits
306fn decimal_pow10(exp: i32) -> Option<Decimal> {
307    let abs_exp = exp.abs();
308    if abs_exp > MAX_DECIMAL_EXPONENT {
309        return None;
310    }
311
312    let mut result = Decimal::ONE;
313    let ten = Decimal::from(10);
314
315    for _ in 0..abs_exp {
316        result = result.checked_mul(ten)?;
317    }
318
319    Some(result)
320}
321
322/// Parse a decimal number, supporting underscores as digit separators.
323/// Examples: 42, 3.14, 1_000_000, -5.5
324fn parse_decimal_number(number_str: &str) -> Result<Decimal, LemmaError> {
325    let clean_number = number_str.replace('_', "");
326    Decimal::from_str(&clean_number).map_err(|_| {
327        LemmaError::Engine(format!(
328            "Invalid number: '{}'\n\
329             Expected a valid decimal number (e.g., 42, 3.14, 1_000_000)\n\
330             Note: Use underscores as thousand separators if needed",
331            number_str
332        ))
333    })
334}