datex_core/parser/
utils.rs

1use crate::parser::lexer::{IntegerWithVariant, Token};
2use crate::values::core_values::decimal::Decimal;
3use crate::values::core_values::decimal::typed_decimal::{
4    DecimalTypeVariant, TypedDecimal,
5};
6use crate::values::core_values::error::NumberParseError;
7use crate::values::core_values::integer::typed_integer::TypedInteger;
8use core::iter::Peekable;
9use core::str::{Chars, FromStr};
10use datex_core::values::core_values::integer::Integer;
11use datex_core::values::core_values::integer::typed_integer::IntegerTypeVariant;
12
13pub enum IntegerOrDecimal {
14    Integer(Integer),
15    Decimal(Decimal),
16    TypedInteger(TypedInteger),
17    TypedDecimal(TypedDecimal),
18}
19
20pub enum IntegerOrTypedInteger {
21    Integer(Integer),
22    TypedInteger(TypedInteger),
23}
24
25/// Parses an integer literal with an integer part, an optional exponent part, and an optional variant suffix.
26/// Returns either an Integer or a Decimal value.
27pub fn parse_integer_literal(
28    lit: String,
29) -> Result<IntegerOrDecimal, NumberParseError> {
30    // first consume all digits for the integer part, skipping underscores
31    let mut chars = lit.chars().peekable();
32    let integer_part = consume_digits_with_underscores(&mut chars);
33
34    // check for exponent part
35    let mut exponent_part = String::new();
36    if let Some(&c) = chars.peek() {
37        if c == 'e' || c == 'E' {
38            chars.next();
39            // optional + or -
40            if let Some(&c2) = chars.peek() {
41                if c2 == '-' {
42                    exponent_part.push(c2);
43                    chars.next();
44                } else if c2 == '+' {
45                    chars.next();
46                }
47            }
48            // consume all digits for exponent part
49            exponent_part += &consume_digits_with_underscores(&mut chars);
50        }
51    }
52
53    // the rest is the variant suffix, if any
54    let variant_part: String = chars.collect();
55
56    // integer only if no exponent part
57    if exponent_part.is_empty() {
58        // no variant and no exponent -> plain integer
59        if variant_part.is_empty() {
60            Integer::from_string(&integer_part).map(IntegerOrDecimal::Integer)
61        }
62        // variant -> distinguish between integer and decimal type variants
63        else {
64            // try to get integer type variant from variant part
65            if let Ok(integer_variant) =
66                IntegerTypeVariant::from_str(&variant_part)
67            {
68                TypedInteger::from_string_with_variant(
69                    &integer_part,
70                    integer_variant,
71                )
72                .map(IntegerOrDecimal::TypedInteger)
73            }
74            // otherwise try to parse as typed decimal
75            else if let Ok(decimal_variant) =
76                DecimalTypeVariant::from_str(&variant_part)
77            {
78                TypedDecimal::from_string_and_variant(
79                    &integer_part,
80                    decimal_variant,
81                )
82                .map(IntegerOrDecimal::TypedDecimal)
83            } else {
84                // should not happen if valid string literal is passed in
85                unreachable!()
86            }
87        }
88    }
89    // decimal if exponent part is present
90    else {
91        let full_number = format!("{}e{}", integer_part, exponent_part);
92        // no variant -> plain decimal with exponent
93        if variant_part.is_empty() {
94            Decimal::from_string(&full_number).map(IntegerOrDecimal::Decimal)
95        }
96        // decimal variant -> typed decimal with exponent
97        else if let Ok(decimal_variant) =
98            DecimalTypeVariant::from_str(&variant_part)
99        {
100            TypedDecimal::from_string_and_variant(&full_number, decimal_variant)
101                .map(IntegerOrDecimal::TypedDecimal)
102        }
103        // otherwise invalid variant for decimal with exponent
104        else {
105            Err(NumberParseError::InvalidFormat)
106        }
107    }
108}
109
110fn consume_digits_with_underscores(chars: &mut Peekable<Chars>) -> String {
111    let mut part = String::new();
112    while let Some(&c) = chars.peek() {
113        if c.is_digit(10) {
114            part.push(c);
115            chars.next();
116        } else if c == '_' {
117            // skip underscores
118            chars.next();
119        } else {
120            break;
121        }
122    }
123    part
124}
125
126pub fn parse_integer_with_variant(
127    integer_with_variant: IntegerWithVariant,
128    token: Token,
129) -> Result<IntegerOrTypedInteger, NumberParseError> {
130    let radix = match token {
131        Token::BinaryIntegerLiteral(_) => 2,
132        Token::OctalIntegerLiteral(_) => 8,
133        Token::HexadecimalIntegerLiteral(_) => 16,
134        _ => unreachable!(),
135    };
136    match integer_with_variant.variant {
137        Some(var) => TypedInteger::from_string_radix_with_variant(
138            &integer_with_variant.value[2..],
139            radix,
140            var,
141        )
142        .map(IntegerOrTypedInteger::TypedInteger),
143        None => {
144            Integer::from_string_radix(&integer_with_variant.value[2..], radix)
145                .map(IntegerOrTypedInteger::Integer)
146        }
147    }
148}
149
150/// Takes a literal text string input, e.g. ""Hello, world!"" or "'Hello, world!' or ""x\"""
151/// and returns the unescaped text, e.g. "Hello, world!" or 'Hello, world!' or "x\""
152pub fn unescape_text(text: &str) -> String {
153    // remove first and last quote (double or single)
154    let escaped = text[1..text.len() - 1]
155        // Replace escape sequences with actual characters
156        .replace(r#"\""#, "\"") // Replace \" with "
157        .replace(r#"\'"#, "'") // Replace \' with '
158        .replace(r#"\n"#, "\n") // Replace \n with newline
159        .replace(r#"\r"#, "\r") // Replace \r with carriage return
160        .replace(r#"\t"#, "\t") // Replace \t with tab
161        .replace(r#"\b"#, "\x08") // Replace \b with backspace
162        .replace(r#"\f"#, "\x0C") // Replace \f with form feed
163        .replace(r#"\\"#, "\\") // Replace \\ with \
164        // TODO #156 remove all other backslashes before any other character
165        .to_string();
166    // Decode unicode escapes, e.g. \u1234 or \uD800\uDC00
167    decode_json_unicode_escapes(&escaped)
168}
169
170// TODO #352: double check if this works correctly for all edge cases
171/// Decodes JSON-style unicode escape sequences, including surrogate pairs
172fn decode_json_unicode_escapes(input: &str) -> String {
173    let mut output = String::new();
174    let mut chars = input.chars().peekable();
175
176    while let Some(ch) = chars.next() {
177        if ch == '\\' && chars.peek() == Some(&'u') {
178            chars.next(); // skip 'u'
179
180            let mut code_unit = String::new();
181            for _ in 0..4 {
182                if let Some(c) = chars.next() {
183                    code_unit.push(c);
184                } else {
185                    output.push_str("\\u");
186                    output.push_str(&code_unit);
187                    break;
188                }
189            }
190
191            if let Ok(first_unit) = u16::from_str_radix(&code_unit, 16) {
192                if (0xD800..=0xDBFF).contains(&first_unit) {
193                    // High surrogate — look for low surrogate
194                    if chars.next() == Some('\\') && chars.next() == Some('u') {
195                        let mut low_code = String::new();
196                        for _ in 0..4 {
197                            if let Some(c) = chars.next() {
198                                low_code.push(c);
199                            } else {
200                                output.push_str(&format!(
201                                    "\\u{first_unit:04X}\\u{low_code}"
202                                ));
203                                break;
204                            }
205                        }
206
207                        if let Ok(second_unit) =
208                            u16::from_str_radix(&low_code, 16)
209                            && (0xDC00..=0xDFFF).contains(&second_unit)
210                        {
211                            let combined = 0x10000
212                                + (((first_unit - 0xD800) as u32) << 10)
213                                + ((second_unit - 0xDC00) as u32);
214                            if let Some(c) = char::from_u32(combined) {
215                                output.push(c);
216                                continue;
217                            }
218                        }
219
220                        // Invalid surrogate fallback
221                        output.push_str(&format!(
222                            "\\u{first_unit:04X}\\u{low_code}"
223                        ));
224                    } else {
225                        // Unpaired high surrogate
226                        output.push_str(&format!("\\u{first_unit:04X}"));
227                    }
228                } else {
229                    // Normal scalar value
230                    if let Some(c) = char::from_u32(first_unit as u32) {
231                        output.push(c);
232                    } else {
233                        output.push_str(&format!("\\u{first_unit:04X}"));
234                    }
235                }
236            } else {
237                output.push_str(&format!("\\u{code_unit}"));
238            }
239        } else {
240            output.push(ch);
241        }
242    }
243
244    output
245}