Skip to main content

chipi_core/
format_parser.rs

1//! Character-level parser for format string internals.
2//!
3//! Parses format strings like `"b{lk ? l}{aa ? a} {li:#x}"` into
4//! a sequence of [`FormatPiece`] values.
5
6use crate::error::{Error, ErrorKind, Span};
7use crate::types::*;
8
9/// Parse a format string into a sequence of format pieces.
10///
11/// The input should be the raw content between quotes (without the quotes).
12pub fn parse_format_string(input: &str, span: &Span) -> Result<Vec<FormatPiece>, Error> {
13    let mut pieces = Vec::new();
14    let chars: Vec<char> = input.chars().collect();
15    let mut pos = 0;
16    let mut literal = String::new();
17
18    while pos < chars.len() {
19        match chars[pos] {
20            '\\' if pos + 1 < chars.len() => match chars[pos + 1] {
21                '{' | '}' | '?' | ':' | '\\' => {
22                    literal.push(chars[pos + 1]);
23                    pos += 2;
24                }
25                _ => {
26                    literal.push('\\');
27                    literal.push(chars[pos + 1]);
28                    pos += 2;
29                }
30            },
31            '{' => {
32                if !literal.is_empty() {
33                    pieces.push(FormatPiece::Literal(std::mem::take(&mut literal)));
34                }
35                pos += 1;
36                let piece = parse_expr_block(&chars, &mut pos, span)?;
37                pieces.push(piece);
38            }
39            _ => {
40                literal.push(chars[pos]);
41                pos += 1;
42            }
43        }
44    }
45
46    if !literal.is_empty() {
47        pieces.push(FormatPiece::Literal(literal));
48    }
49
50    Ok(pieces)
51}
52
53/// Parse the content inside `{...}` and return a FormatPiece::FieldRef.
54fn parse_expr_block(chars: &[char], pos: &mut usize, span: &Span) -> Result<FormatPiece, Error> {
55    skip_ws(chars, pos);
56
57    // Collect all content until the closing `}`
58    let content_start = *pos;
59    let mut depth = 1;
60    while *pos < chars.len() {
61        match chars[*pos] {
62            '{' => depth += 1,
63            '}' => {
64                depth -= 1;
65                if depth == 0 {
66                    break;
67                }
68            }
69            '\\' => {
70                *pos += 1; // skip next
71            }
72            _ => {}
73        }
74        *pos += 1;
75    }
76
77    if depth != 0 {
78        return Err(Error::new(
79            ErrorKind::InvalidFormatString("unclosed '{'".to_string()),
80            span.clone(),
81        ));
82    }
83
84    let content: String = chars[content_start..*pos].iter().collect();
85    *pos += 1; // skip closing '}'
86
87    // Check for ternary (has `?` not inside parentheses)
88    if let Some(ternary_pos) = find_ternary_question(&content) {
89        let field = content[..ternary_pos].trim().to_string();
90        let rest = &content[ternary_pos + 1..];
91
92        // Check for `:` separator for else branch
93        if let Some(colon_pos) = find_ternary_colon(rest) {
94            let if_nonzero = rest[..colon_pos].trim().to_string();
95            let if_zero = rest[colon_pos + 1..].trim().to_string();
96            return Ok(FormatPiece::FieldRef {
97                expr: FormatExpr::Ternary {
98                    field,
99                    if_nonzero,
100                    if_zero: Some(if_zero),
101                },
102                spec: None,
103            });
104        } else {
105            let if_nonzero = rest.trim().to_string();
106            return Ok(FormatPiece::FieldRef {
107                expr: FormatExpr::Ternary {
108                    field,
109                    if_nonzero,
110                    if_zero: None,
111                },
112                spec: None,
113            });
114        }
115    }
116
117    // Split off format specifier: last `:` not inside parentheses
118    let (expr_str, spec) = split_format_spec(&content);
119
120    let expr = parse_expression(expr_str.trim(), span)?;
121
122    Ok(FormatPiece::FieldRef {
123        expr,
124        spec: spec.map(|s| s.to_string()),
125    })
126}
127
128/// Parse an expression string (field, arithmetic, map call, builtin call, int literal).
129fn parse_expression(input: &str, span: &Span) -> Result<FormatExpr, Error> {
130    let input = input.trim();
131    if input.is_empty() {
132        return Err(Error::new(
133            ErrorKind::InvalidFormatString("empty expression".to_string()),
134            span.clone(),
135        ));
136    }
137
138    // Try arithmetic: look for +, -, * at the top level (not inside parens)
139    // Lowest precedence first: +, -
140    if let Some(op_pos) = find_top_level_op(input, &['+', '-']) {
141        let left = input[..op_pos].trim();
142        let op_char = input.as_bytes()[op_pos] as char;
143        let right = input[op_pos + 1..].trim();
144        let op = match op_char {
145            '+' => ArithOp::Add,
146            '-' => ArithOp::Sub,
147            _ => unreachable!(),
148        };
149        return Ok(FormatExpr::Arithmetic {
150            left: Box::new(parse_expression(left, span)?),
151            op,
152            right: Box::new(parse_expression(right, span)?),
153        });
154    }
155
156    // Next precedence: *, /, %
157    if let Some(op_pos) = find_top_level_op(input, &['*', '/', '%']) {
158        let left = input[..op_pos].trim();
159        let op_char = input.as_bytes()[op_pos] as char;
160        let right = input[op_pos + 1..].trim();
161        let op = match op_char {
162            '*' => ArithOp::Mul,
163            '/' => ArithOp::Div,
164            '%' => ArithOp::Mod,
165            _ => unreachable!(),
166        };
167        return Ok(FormatExpr::Arithmetic {
168            left: Box::new(parse_expression(left, span)?),
169            op,
170            right: Box::new(parse_expression(right, span)?),
171        });
172    }
173
174    // Check for function call: identifier(args)
175    if let Some(paren_pos) = input.find('(') {
176        if input.ends_with(')') {
177            let func_name = input[..paren_pos].trim();
178            let args_str = &input[paren_pos + 1..input.len() - 1];
179            let args = parse_arg_list(args_str, span)?;
180
181            // Check if it's a builtin
182            match func_name {
183                "rotate_right" => {
184                    return Ok(FormatExpr::BuiltinCall {
185                        func: BuiltinFunc::RotateRight,
186                        args,
187                    });
188                }
189                "rotate_left" => {
190                    return Ok(FormatExpr::BuiltinCall {
191                        func: BuiltinFunc::RotateLeft,
192                        args,
193                    });
194                }
195                _ => {
196                    return Ok(FormatExpr::MapCall {
197                        map_name: func_name.to_string(),
198                        args,
199                    });
200                }
201            }
202        }
203    }
204
205    // Unary negation: -expr becomes 0 - expr
206    if let Some(rest) = input.strip_prefix('-') {
207        let rest = rest.trim();
208        if !rest.is_empty() {
209            return Ok(FormatExpr::Arithmetic {
210                left: Box::new(FormatExpr::IntLiteral(0)),
211                op: ArithOp::Sub,
212                right: Box::new(parse_expression(rest, span)?),
213            });
214        }
215    }
216
217    // Integer literal
218    if let Some(val) = try_parse_int(input) {
219        return Ok(FormatExpr::IntLiteral(val));
220    }
221
222    // Dotted access: field.fragment (sub-decoder access)
223    if let Some(dot_pos) = input.find('.') {
224        let field = &input[..dot_pos];
225        let fragment = &input[dot_pos + 1..];
226        if is_valid_identifier(field) && is_valid_identifier(fragment) {
227            return Ok(FormatExpr::SubDecoderAccess {
228                field: field.to_string(),
229                fragment: fragment.to_string(),
230            });
231        }
232    }
233
234    // Must be a field reference
235    if is_valid_identifier(input) {
236        return Ok(FormatExpr::Field(input.to_string()));
237    }
238
239    Err(Error::new(
240        ErrorKind::InvalidFormatString(format!("invalid expression '{}'", input)),
241        span.clone(),
242    ))
243}
244
245/// Parse a comma-separated list of argument expressions.
246fn parse_arg_list(input: &str, span: &Span) -> Result<Vec<FormatExpr>, Error> {
247    if input.trim().is_empty() {
248        return Ok(Vec::new());
249    }
250
251    let mut args = Vec::new();
252    let mut depth = 0;
253    let mut start = 0;
254
255    for (i, ch) in input.char_indices() {
256        match ch {
257            '(' => depth += 1,
258            ')' => depth -= 1,
259            ',' if depth == 0 => {
260                let arg = input[start..i].trim();
261                args.push(parse_expression(arg, span)?);
262                start = i + 1;
263            }
264            _ => {}
265        }
266    }
267
268    let last = input[start..].trim();
269    if !last.is_empty() {
270        args.push(parse_expression(last, span)?);
271    }
272
273    Ok(args)
274}
275
276/// Find the position of `?` for ternary, not inside parentheses.
277fn find_ternary_question(s: &str) -> Option<usize> {
278    let mut depth = 0;
279    for (i, ch) in s.char_indices() {
280        match ch {
281            '(' => depth += 1,
282            ')' => depth -= 1,
283            '\\' => continue,
284            '?' if depth == 0 => return Some(i),
285            _ => {}
286        }
287    }
288    None
289}
290
291/// Find the position of `:` for ternary else branch (not inside parens, not escaped).
292fn find_ternary_colon(s: &str) -> Option<usize> {
293    let chars: Vec<char> = s.chars().collect();
294    let mut i = 0;
295    let mut depth = 0;
296    let mut byte_pos = 0;
297
298    while i < chars.len() {
299        match chars[i] {
300            '(' => depth += 1,
301            ')' => depth -= 1,
302            '\\' => {
303                i += 1;
304                byte_pos += chars[i - 1].len_utf8();
305                if i < chars.len() {
306                    byte_pos += chars[i].len_utf8();
307                }
308                i += 1;
309                continue;
310            }
311            ':' if depth == 0 => return Some(byte_pos),
312            _ => {}
313        }
314        byte_pos += chars[i].len_utf8();
315        i += 1;
316    }
317    None
318}
319
320/// Split off format specifier from expression.
321/// Returns (expression, optional_spec).
322fn split_format_spec(content: &str) -> (&str, Option<&str>) {
323    // Find the last `:` not inside parentheses
324    let mut depth = 0;
325    let mut last_colon = None;
326
327    for (i, ch) in content.char_indices() {
328        match ch {
329            '(' => depth += 1,
330            ')' => depth -= 1,
331            ':' if depth == 0 => last_colon = Some(i),
332            _ => {}
333        }
334    }
335
336    if let Some(colon_pos) = last_colon {
337        let spec = &content[colon_pos + 1..];
338        // Validate it looks like a format spec (starts with format chars, not an identifier)
339        if is_format_spec(spec) {
340            return (&content[..colon_pos], Some(spec));
341        }
342    }
343
344    (content, None)
345}
346
347/// Check if a string looks like a Rust format specifier.
348fn is_format_spec(s: &str) -> bool {
349    if s.is_empty() {
350        return false;
351    }
352    // Common format specs: #x, #X, #o, #b, #04x, 04x, x, b, etc.
353    let first = s.chars().next().unwrap();
354    first == '#'
355        || first == '0'
356        || first == 'x'
357        || first == 'X'
358        || first == 'o'
359        || first == 'b'
360        || first == '?'
361        || first == 'e'
362        || first == 'E'
363        || first.is_ascii_digit()
364}
365
366/// Find a top-level binary operator (not inside parens), scanning right-to-left for left-associativity.
367fn find_top_level_op(s: &str, ops: &[char]) -> Option<usize> {
368    let chars: Vec<char> = s.chars().collect();
369    let mut depth = 0;
370    let mut last = None;
371
372    // Map char indices to byte positions
373    let mut byte_positions = Vec::with_capacity(chars.len());
374    let mut byte_pos = 0;
375    for &ch in &chars {
376        byte_positions.push(byte_pos);
377        byte_pos += ch.len_utf8();
378    }
379
380    for (i, &ch) in chars.iter().enumerate() {
381        match ch {
382            '(' => depth += 1,
383            ')' => depth -= 1,
384            c if depth == 0 && ops.contains(&c) => {
385                // Don't match if at the very start (could be unary minus)
386                if i > 0 {
387                    last = Some(byte_positions[i]);
388                }
389            }
390            _ => {}
391        }
392    }
393
394    last
395}
396
397fn try_parse_int(s: &str) -> Option<i64> {
398    if let Some(hex) = s.strip_prefix("0x").or_else(|| s.strip_prefix("0X")) {
399        i64::from_str_radix(hex, 16).ok()
400    } else {
401        s.parse::<i64>().ok()
402    }
403}
404
405fn is_valid_identifier(s: &str) -> bool {
406    if s.is_empty() {
407        return false;
408    }
409    let mut chars = s.chars();
410    let first = chars.next().unwrap();
411    if !first.is_ascii_alphabetic() && first != '_' {
412        return false;
413    }
414    chars.all(|c| c.is_ascii_alphanumeric() || c == '_')
415}
416
417fn skip_ws(chars: &[char], pos: &mut usize) {
418    while *pos < chars.len() && chars[*pos].is_ascii_whitespace() {
419        *pos += 1;
420    }
421}
422
423#[cfg(test)]
424mod tests {
425    use super::*;
426
427    fn test_span() -> Span {
428        Span::new("test", 1, 1, 0)
429    }
430
431    #[test]
432    fn test_plain_literal() {
433        let pieces = parse_format_string("hello", &test_span()).unwrap();
434        assert_eq!(pieces.len(), 1);
435        match &pieces[0] {
436            FormatPiece::Literal(s) => assert_eq!(s, "hello"),
437            _ => panic!("expected literal"),
438        }
439    }
440
441    #[test]
442    fn test_simple_field_ref() {
443        let pieces = parse_format_string("val={field}", &test_span()).unwrap();
444        assert_eq!(pieces.len(), 2);
445        match &pieces[0] {
446            FormatPiece::Literal(s) => assert_eq!(s, "val="),
447            _ => panic!("expected literal"),
448        }
449        match &pieces[1] {
450            FormatPiece::FieldRef { expr, spec } => {
451                assert!(matches!(expr, FormatExpr::Field(f) if f == "field"));
452                assert!(spec.is_none());
453            }
454            _ => panic!("expected field ref"),
455        }
456    }
457
458    #[test]
459    fn test_field_with_format_spec() {
460        let pieces = parse_format_string("{field:#x}", &test_span()).unwrap();
461        assert_eq!(pieces.len(), 1);
462        match &pieces[0] {
463            FormatPiece::FieldRef { expr, spec } => {
464                assert!(matches!(expr, FormatExpr::Field(f) if f == "field"));
465                assert_eq!(spec.as_deref(), Some("#x"));
466            }
467            _ => panic!("expected field ref"),
468        }
469    }
470
471    #[test]
472    fn test_ternary_no_else() {
473        let pieces = parse_format_string("{lk ? l}", &test_span()).unwrap();
474        assert_eq!(pieces.len(), 1);
475        match &pieces[0] {
476            FormatPiece::FieldRef { expr, .. } => match expr {
477                FormatExpr::Ternary {
478                    field,
479                    if_nonzero,
480                    if_zero,
481                } => {
482                    assert_eq!(field, "lk");
483                    assert_eq!(if_nonzero, "l");
484                    assert!(if_zero.is_none());
485                }
486                _ => panic!("expected ternary"),
487            },
488            _ => panic!("expected field ref"),
489        }
490    }
491
492    #[test]
493    fn test_ternary_with_else() {
494        let pieces = parse_format_string("{aa ? a : b}", &test_span()).unwrap();
495        assert_eq!(pieces.len(), 1);
496        match &pieces[0] {
497            FormatPiece::FieldRef { expr, .. } => match expr {
498                FormatExpr::Ternary {
499                    field,
500                    if_nonzero,
501                    if_zero,
502                } => {
503                    assert_eq!(field, "aa");
504                    assert_eq!(if_nonzero, "a");
505                    assert_eq!(if_zero.as_deref(), Some("b"));
506                }
507                _ => panic!("expected ternary"),
508            },
509            _ => panic!("expected field ref"),
510        }
511    }
512
513    #[test]
514    fn test_arithmetic() {
515        let pieces = parse_format_string("{a + b * 4}", &test_span()).unwrap();
516        assert_eq!(pieces.len(), 1);
517        match &pieces[0] {
518            FormatPiece::FieldRef { expr, .. } => match expr {
519                FormatExpr::Arithmetic { op, .. } => {
520                    assert_eq!(*op, ArithOp::Add);
521                }
522                _ => panic!("expected arithmetic"),
523            },
524            _ => panic!("expected field ref"),
525        }
526    }
527
528    #[test]
529    fn test_map_call() {
530        let pieces = parse_format_string("{spr_name(spr)}", &test_span()).unwrap();
531        assert_eq!(pieces.len(), 1);
532        match &pieces[0] {
533            FormatPiece::FieldRef { expr, .. } => match expr {
534                FormatExpr::MapCall { map_name, args } => {
535                    assert_eq!(map_name, "spr_name");
536                    assert_eq!(args.len(), 1);
537                }
538                _ => panic!("expected map call"),
539            },
540            _ => panic!("expected field ref"),
541        }
542    }
543
544    #[test]
545    fn test_builtin_call() {
546        let pieces = parse_format_string("{rotate_right(val, amt)}", &test_span()).unwrap();
547        assert_eq!(pieces.len(), 1);
548        match &pieces[0] {
549            FormatPiece::FieldRef { expr, .. } => match expr {
550                FormatExpr::BuiltinCall { func, args } => {
551                    assert_eq!(*func, BuiltinFunc::RotateRight);
552                    assert_eq!(args.len(), 2);
553                }
554                _ => panic!("expected builtin call"),
555            },
556            _ => panic!("expected field ref"),
557        }
558    }
559
560    #[test]
561    fn test_escaped_braces() {
562        let pieces = parse_format_string("a\\{b\\}c", &test_span()).unwrap();
563        assert_eq!(pieces.len(), 1);
564        match &pieces[0] {
565            FormatPiece::Literal(s) => assert_eq!(s, "a{b}c"),
566            _ => panic!("expected literal"),
567        }
568    }
569
570    #[test]
571    fn test_int_literal() {
572        let pieces = parse_format_string("{42}", &test_span()).unwrap();
573        assert_eq!(pieces.len(), 1);
574        match &pieces[0] {
575            FormatPiece::FieldRef { expr, .. } => {
576                assert!(matches!(expr, FormatExpr::IntLiteral(42)));
577            }
578            _ => panic!("expected field ref"),
579        }
580    }
581
582    #[test]
583    fn test_mixed() {
584        let pieces = parse_format_string("b{lk ? l}{aa ? a} {li:#x}", &test_span()).unwrap();
585        assert_eq!(pieces.len(), 5);
586    }
587}