Skip to main content

ron_schema/ron/
parser.rs

1/*************************
2 * Author: Bradley Hunter
3 */
4
5use crate::span::{Position, Span, Spanned};
6use crate::error::{RonErrorKind, RonParseError};
7use super::{RonValue, RonStruct};
8
9#[derive(Debug)]
10struct Parser<'a> {
11    source: &'a str,
12    bytes: &'a [u8],
13    offset: usize,
14    line: usize,
15    column: usize,
16}
17
18impl<'a> Parser<'a> {
19    fn new(source: &'a str) -> Self {
20        Self { source, bytes: source.as_bytes(), offset: 0, line: 1, column: 1 }
21    }
22
23    fn position(&self) -> Position {
24        Position { offset: self.offset, line: self.line, column: self.column }
25    }
26
27    fn peek(&self) -> Option<u8> {
28        self.bytes.get(self.offset).copied()
29    }
30
31    fn advance(&mut self) {
32        if let Some(byte) = self.peek() {
33            if byte == b'\n'{
34                self.column = 1;
35                self.line += 1;
36            } else {
37                self.column += 1;
38            }
39            self.offset += 1;
40        } 
41    }
42
43    fn skip_whitespace(&mut self) {
44        loop {
45            match self.peek() {
46                Some(b' ' | b'\t' | b'\n' | b'\r') => self.advance(),
47                Some(b'/') if self.bytes.get(self.offset + 1) == Some(&b'/') => {
48                    while self.peek().is_some_and(|b| b != b'\n') {
49                        self.advance();
50                    }
51                }
52                _ => break,
53            }
54        }
55    }
56
57    fn expect_char(&mut self, expected: u8) -> Result<(), RonParseError> {
58        let start = self.position();
59        match self.peek() {
60            Some(b) if b == expected => {
61                self.advance();
62                Ok(())
63            },
64            Some(b) => {
65                self.advance();
66                let end = self.position();
67                Err(RonParseError { 
68                    span: Span { 
69                        start, 
70                        end 
71                    }, 
72                    kind: RonErrorKind::UnexpectedToken { 
73                        expected: format!("'{}'", expected as char), 
74                        found: format!("'{}'", b as char) 
75                    } 
76                })
77            },
78            None => {
79                Err(RonParseError { 
80                    span: Span { 
81                        start, 
82                        end: start 
83                    }, 
84                    kind: RonErrorKind::UnexpectedToken { 
85                        expected: format!("'{}'", expected as char), 
86                        found: "end of input".to_string() 
87                    } 
88                })
89            }
90        }
91    }
92
93    fn parse_identifier(&mut self) -> Result<Spanned<String>, RonParseError> {
94        let start = self.position();
95
96        // Check for valid identifier start
97        match self.peek() {
98            Some(b) if b.is_ascii_alphabetic() || b == b'_' => {},
99            Some(b) => {
100                self.advance();
101                let end = self.position();
102                return Err(RonParseError {
103                    span: Span { start, end },
104                    kind: RonErrorKind::UnexpectedToken {
105                        expected: "identifier".to_string(),
106                        found: format!("'{}'", b as char),
107                    },
108                });
109            },
110            None => {
111                return Err(RonParseError {
112                    span: Span { start, end: start },
113                    kind: RonErrorKind::UnexpectedToken {
114                        expected: "identifier".to_string(),
115                        found: "end of input".to_string(),
116                    },
117                });
118            },
119        }
120
121        // Consume all identifier continuation characters
122        while self.peek().is_some_and(|b| b.is_ascii_alphanumeric() || b == b'_') {
123            self.advance();
124        }
125
126        // Slice out the identifier text
127        let end = self.position();
128        Ok(Spanned {
129            value: self.source[start.offset..end.offset].to_string(),
130            span: Span { start, end },
131        })
132    }
133
134    #[allow(clippy::too_many_lines)]
135    fn parse_value(&mut self) -> Result<Spanned<RonValue>, RonParseError> {
136        self.skip_whitespace();
137        let start = self.position();
138
139        match self.peek() {
140            Some(b'"') => {
141                self.advance(); // skip opening quote
142                let mut content = String::new();
143                loop {
144                    match self.peek() {
145                        Some(b'"') => {
146                            self.advance(); // skip closing quote
147                            break;
148                        }
149                        // b'\\' is a single backslash byte — Rust escapes it in source code.
150                        // We detect RON escape sequences (like \n, \t, \") by first matching
151                        // the backslash, then checking the next character to decide what to emit.
152                        Some(b'\\') => {
153                            self.advance(); // skip the backslash
154                            match self.peek() {
155                                Some(b'n') => { content.push('\n'); self.advance(); }
156                                Some(b't') => { content.push('\t'); self.advance(); }
157                                Some(b'\\') => { content.push('\\'); self.advance(); }
158                                Some(b'"') => { content.push('"'); self.advance(); }
159                                Some(b) => { content.push(b as char); self.advance(); }
160                                None => {
161                                    return Err(RonParseError {
162                                        span: Span { start, end: self.position() },
163                                        kind: RonErrorKind::UnterminatedString,
164                                    });
165                                }
166                            }
167                        }
168                        Some(b) => {
169                            content.push(b as char);
170                            self.advance();
171                        }
172                        None => {
173                            return Err(RonParseError {
174                                span: Span { start, end: self.position() },
175                                kind: RonErrorKind::UnterminatedString,
176                            });
177                        }
178                    }
179                }
180                let end = self.position();
181                Ok(Spanned {
182                    value: RonValue::String(content),
183                    span: Span { start, end },
184                })
185            },
186            Some(b) if b.is_ascii_digit() || b == b'-' => {
187                if b == b'-' {
188                    self.advance();
189                }
190
191                let mut has_dot = false;
192        
193                loop {
194                    match self.peek() {
195                        Some(b) if b.is_ascii_digit() => {self.advance();},
196                        Some(b'.') if !has_dot => {
197                            has_dot = true;
198                            self.advance();
199                        },
200                        Some(_) | None => {break;}
201                    }
202                }
203
204                let end = self.position();
205                let number_str = &self.source[start.offset..end.offset];
206                if has_dot {
207                    let num_float = number_str.parse::<f64>();
208                    if let Ok(num) = num_float {
209                        Ok(Spanned {
210                            value: RonValue::Float(num),
211                            span: Span { start, end },
212                        })
213                    } else {
214                        Err(RonParseError { 
215                            span: Span { start, end }, 
216                            kind: RonErrorKind::InvalidNumber { text: number_str.to_string() } 
217                        })
218                    }
219                } else {
220                    let num_int = number_str.parse::<i64>();
221                    if let Ok(num) = num_int {
222                        Ok(Spanned {
223                            value: RonValue::Integer(num),
224                            span: Span { start, end },
225                        })
226                    } else {
227                        Err(RonParseError { 
228                            span: Span { start, end }, 
229                            kind: RonErrorKind::InvalidNumber { text: number_str.to_string() } 
230                        })
231                    }
232                }
233            },
234            Some(b) if b.is_ascii_alphabetic() => {
235                let identifier = self.parse_identifier()?;
236                let word = identifier.value.as_str();
237                let identifier_span = identifier.span;
238                match word {
239                    "true" => {
240                        Ok(Spanned { value: RonValue::Bool(true), span: identifier_span })
241                    },
242                    "false" => {
243                        Ok(Spanned { value: RonValue::Bool(false), span: identifier_span })
244                    }
245                    "None" => {
246                        Ok(Spanned { value: RonValue::Option(None), span: identifier_span })
247                    }
248                    "Some" => {
249                        self.skip_whitespace();
250                        self.expect_char(b'(')?;
251                        let inner = self.parse_value()?;
252                        self.expect_char(b')')?;
253                        Ok(Spanned { 
254                            value: RonValue::Option(Some(Box::new(inner))), 
255                            span: Span { start, end: self.position() } 
256                        })
257                    }
258                    _ => {
259                        Ok(Spanned { 
260                            value: RonValue::Identifier(word.to_string()), 
261                            span: identifier_span 
262                        })
263                    }
264                }
265            },
266            Some(b'[') => {
267                self.advance();
268                let mut elements = Vec::new();
269                loop {
270                    self.skip_whitespace();
271                    if let Some(b']') = self.peek() {
272                        break;
273                    }
274                    let value = self.parse_value()?;
275                    elements.push(value);
276                    self.skip_whitespace();
277                    if let Some(b',') = self.peek() {
278                        self.advance();
279                    }
280                }
281                self.expect_char(b']')?;
282                Ok(Spanned { 
283                    value: RonValue::List(elements), 
284                    span: Span { start, end: self.position() } 
285                })
286            },
287            Some(b'(') => {
288                self.advance();
289                let mut fields: Vec<(Spanned<String>, Spanned<RonValue>)> = Vec::new();
290                loop {
291                    self.skip_whitespace();
292                    if let Some(b')') = self.peek() {
293                        break;
294                    }
295                    let field = self.parse_identifier()?;
296                    self.skip_whitespace();
297                    self.expect_char(b':')?;
298                    self.skip_whitespace();
299                    let value = self.parse_value()?;
300                    fields.push((field, value));
301                    self.skip_whitespace();
302                    match self.peek() {
303                        Some(b',') => self.advance(),
304                        Some(_) => {}
305                        None => {
306                            return Err(RonParseError { 
307                                span: Span { start, end: self.position() } , 
308                                kind: RonErrorKind::UnexpectedToken { 
309                                    expected: "character".to_string(), 
310                                    found: "end of file".to_string() } 
311                                });
312                        }
313                    }
314                }
315                let close_span_start = self.position();
316                self.expect_char(b')')?;
317                let close_span = Span{ start: close_span_start, end: self.position() };
318                Ok(Spanned { 
319                    value: RonValue::Struct(RonStruct { fields, close_span }), 
320                    span: Span { start, end: self.position() } 
321                })
322            }
323            Some(b) => {
324                self.advance();
325                let end = self.position();
326                Err(RonParseError { 
327                    span: Span { start, end }, 
328                    kind: RonErrorKind::UnexpectedToken { 
329                        expected: "value".to_string(), 
330                        found: format!("{}", b as char) 
331                    } 
332                })
333            },
334            None => {
335                Err(RonParseError { 
336                    span: Span { start, end: start }, 
337                    kind: RonErrorKind::UnexpectedToken { 
338                        expected: "value".to_string(), 
339                        found: "end of file".to_string() 
340                    } 
341                })
342            }
343        }
344    }
345}
346
347/// Parses a RON data source string into a spanned value tree.
348///
349/// # Errors
350///
351/// Returns a [`RonParseError`] if the source contains syntax errors.
352pub fn parse_ron(source: &str) -> Result<Spanned<RonValue>, RonParseError> {
353    let mut parser = Parser::new(source);
354    parser.parse_value()
355}
356
357#[cfg(test)]
358mod tests {
359    use super::*;
360
361    fn parser(source: &str) -> Parser<'_> {
362        Parser::new(source)
363    }
364
365    // ========================================================
366    // parse_value() — string parsing
367    // ========================================================
368
369    // Parses a simple quoted string.
370    #[test]
371    fn string_simple() {
372        let mut p = parser("\"hello\"");
373        let v = p.parse_value().unwrap();
374        assert_eq!(v.value, RonValue::String("hello".to_string()));
375    }
376
377    // Parses an empty string.
378    #[test]
379    fn string_empty() {
380        let mut p = parser("\"\"");
381        let v = p.parse_value().unwrap();
382        assert_eq!(v.value, RonValue::String("".to_string()));
383    }
384
385    // Parses a string with spaces.
386    #[test]
387    fn string_with_spaces() {
388        let mut p = parser("\"Ashborn Hound\"");
389        let v = p.parse_value().unwrap();
390        assert_eq!(v.value, RonValue::String("Ashborn Hound".to_string()));
391    }
392
393    // Escape sequence: \" becomes a literal quote.
394    #[test]
395    fn string_escaped_quote() {
396        let mut p = parser("\"say \\\"hi\\\"\"");
397        let v = p.parse_value().unwrap();
398        assert_eq!(v.value, RonValue::String("say \"hi\"".to_string()));
399    }
400
401    // Escape sequence: \\ becomes a single backslash.
402    #[test]
403    fn string_escaped_backslash() {
404        let mut p = parser("\"a\\\\b\"");
405        let v = p.parse_value().unwrap();
406        assert_eq!(v.value, RonValue::String("a\\b".to_string()));
407    }
408
409    // Escape sequence: \n becomes a newline.
410    #[test]
411    fn string_escaped_newline() {
412        let mut p = parser("\"line1\\nline2\"");
413        let v = p.parse_value().unwrap();
414        assert_eq!(v.value, RonValue::String("line1\nline2".to_string()));
415    }
416
417    // Escape sequence: \t becomes a tab.
418    #[test]
419    fn string_escaped_tab() {
420        let mut p = parser("\"col1\\tcol2\"");
421        let v = p.parse_value().unwrap();
422        assert_eq!(v.value, RonValue::String("col1\tcol2".to_string()));
423    }
424
425    // Unterminated string is an error.
426    #[test]
427    fn string_unterminated() {
428        let mut p = parser("\"hello");
429        let err = p.parse_value().unwrap_err();
430        assert_eq!(err.kind, RonErrorKind::UnterminatedString);
431    }
432
433    // ========================================================
434    // parse_value() — integer parsing
435    // ========================================================
436
437    // Parses a positive integer.
438    #[test]
439    fn integer_positive() {
440        let mut p = parser("42");
441        let v = p.parse_value().unwrap();
442        assert_eq!(v.value, RonValue::Integer(42));
443    }
444
445    // Parses zero.
446    #[test]
447    fn integer_zero() {
448        let mut p = parser("0");
449        let v = p.parse_value().unwrap();
450        assert_eq!(v.value, RonValue::Integer(0));
451    }
452
453    // Parses a negative integer.
454    #[test]
455    fn integer_negative() {
456        let mut p = parser("-7");
457        let v = p.parse_value().unwrap();
458        assert_eq!(v.value, RonValue::Integer(-7));
459    }
460
461    // ========================================================
462    // parse_value() — float parsing
463    // ========================================================
464
465    // Parses a simple float.
466    #[test]
467    fn float_simple() {
468        let mut p = parser("3.14");
469        let v = p.parse_value().unwrap();
470        assert_eq!(v.value, RonValue::Float(3.14));
471    }
472
473    // Parses a negative float.
474    #[test]
475    fn float_negative() {
476        let mut p = parser("-0.5");
477        let v = p.parse_value().unwrap();
478        assert_eq!(v.value, RonValue::Float(-0.5));
479    }
480
481    // Parses 1.0 as a float, not an integer.
482    #[test]
483    fn float_one_point_zero() {
484        let mut p = parser("1.0");
485        let v = p.parse_value().unwrap();
486        assert_eq!(v.value, RonValue::Float(1.0));
487    }
488
489    // ========================================================
490    // parse_value() — boolean parsing
491    // ========================================================
492
493    // Parses "true" as Bool(true).
494    #[test]
495    fn bool_true() {
496        let mut p = parser("true");
497        let v = p.parse_value().unwrap();
498        assert_eq!(v.value, RonValue::Bool(true));
499    }
500
501    // Parses "false" as Bool(false).
502    #[test]
503    fn bool_false() {
504        let mut p = parser("false");
505        let v = p.parse_value().unwrap();
506        assert_eq!(v.value, RonValue::Bool(false));
507    }
508
509    // ========================================================
510    // parse_value() — option parsing
511    // ========================================================
512
513    // Parses "None" as Option(None).
514    #[test]
515    fn option_none() {
516        let mut p = parser("None");
517        let v = p.parse_value().unwrap();
518        assert_eq!(v.value, RonValue::Option(None));
519    }
520
521    // Parses "Some(5)" as Option(Some(Integer(5))).
522    #[test]
523    fn option_some_integer() {
524        let mut p = parser("Some(5)");
525        let v = p.parse_value().unwrap();
526        if let RonValue::Option(Some(inner)) = &v.value {
527            assert_eq!(inner.value, RonValue::Integer(5));
528        } else {
529            panic!("expected Option(Some(...))");
530        }
531    }
532
533    // Parses "Some(\"hi\")" as Option(Some(String)).
534    #[test]
535    fn option_some_string() {
536        let mut p = parser("Some(\"hi\")");
537        let v = p.parse_value().unwrap();
538        if let RonValue::Option(Some(inner)) = &v.value {
539            assert_eq!(inner.value, RonValue::String("hi".to_string()));
540        } else {
541            panic!("expected Option(Some(...))");
542        }
543    }
544
545    // ========================================================
546    // parse_value() — identifier parsing
547    // ========================================================
548
549    // Bare identifier is parsed as Identifier.
550    #[test]
551    fn identifier_bare() {
552        let mut p = parser("Creature");
553        let v = p.parse_value().unwrap();
554        assert_eq!(v.value, RonValue::Identifier("Creature".to_string()));
555    }
556
557    // Another bare identifier.
558    #[test]
559    fn identifier_another() {
560        let mut p = parser("Sentinels");
561        let v = p.parse_value().unwrap();
562        assert_eq!(v.value, RonValue::Identifier("Sentinels".to_string()));
563    }
564
565    // ========================================================
566    // parse_value() — list parsing
567    // ========================================================
568
569    // Parses an empty list.
570    #[test]
571    fn list_empty() {
572        let mut p = parser("[]");
573        let v = p.parse_value().unwrap();
574        if let RonValue::List(elems) = &v.value {
575            assert!(elems.is_empty());
576        } else {
577            panic!("expected List");
578        }
579    }
580
581    // Parses a list with one element.
582    #[test]
583    fn list_single_element() {
584        let mut p = parser("[Creature]");
585        let v = p.parse_value().unwrap();
586        if let RonValue::List(elems) = &v.value {
587            assert_eq!(elems.len(), 1);
588            assert_eq!(elems[0].value, RonValue::Identifier("Creature".to_string()));
589        } else {
590            panic!("expected List");
591        }
592    }
593
594    // Parses a list with multiple elements.
595    #[test]
596    fn list_multiple_elements() {
597        let mut p = parser("[Creature, Trap, Artifact]");
598        let v = p.parse_value().unwrap();
599        if let RonValue::List(elems) = &v.value {
600            assert_eq!(elems.len(), 3);
601        } else {
602            panic!("expected List");
603        }
604    }
605
606    // Trailing comma in list is allowed.
607    #[test]
608    fn list_trailing_comma() {
609        let mut p = parser("[Creature, Trap,]");
610        let v = p.parse_value().unwrap();
611        if let RonValue::List(elems) = &v.value {
612            assert_eq!(elems.len(), 2);
613        } else {
614            panic!("expected List");
615        }
616    }
617
618    // List of strings.
619    #[test]
620    fn list_of_strings() {
621        let mut p = parser("[\"Vigilance\", \"Haste\"]");
622        let v = p.parse_value().unwrap();
623        if let RonValue::List(elems) = &v.value {
624            assert_eq!(elems.len(), 2);
625            assert_eq!(elems[0].value, RonValue::String("Vigilance".to_string()));
626            assert_eq!(elems[1].value, RonValue::String("Haste".to_string()));
627        } else {
628            panic!("expected List");
629        }
630    }
631
632    // ========================================================
633    // parse_value() — struct parsing
634    // ========================================================
635
636    // Parses an empty struct.
637    #[test]
638    fn struct_empty() {
639        let mut p = parser("()");
640        let v = p.parse_value().unwrap();
641        if let RonValue::Struct(s) = &v.value {
642            assert!(s.fields.is_empty());
643        } else {
644            panic!("expected Struct");
645        }
646    }
647
648    // Parses a struct with one field.
649    #[test]
650    fn struct_single_field() {
651        let mut p = parser("(name: \"Ashborn Hound\")");
652        let v = p.parse_value().unwrap();
653        if let RonValue::Struct(s) = &v.value {
654            assert_eq!(s.fields.len(), 1);
655            assert_eq!(s.fields[0].0.value, "name");
656            assert_eq!(s.fields[0].1.value, RonValue::String("Ashborn Hound".to_string()));
657        } else {
658            panic!("expected Struct");
659        }
660    }
661
662    // Parses a struct with multiple fields.
663    #[test]
664    fn struct_multiple_fields() {
665        let mut p = parser("(name: \"foo\", age: 5)");
666        let v = p.parse_value().unwrap();
667        if let RonValue::Struct(s) = &v.value {
668            assert_eq!(s.fields.len(), 2);
669        } else {
670            panic!("expected Struct");
671        }
672    }
673
674    // Trailing comma in struct is allowed.
675    #[test]
676    fn struct_trailing_comma() {
677        let mut p = parser("(name: \"foo\",)");
678        let v = p.parse_value().unwrap();
679        if let RonValue::Struct(s) = &v.value {
680            assert_eq!(s.fields.len(), 1);
681        } else {
682            panic!("expected Struct");
683        }
684    }
685
686    // Struct captures close_span for the closing paren.
687    #[test]
688    fn struct_close_span_captured() {
689        let mut p = parser("(x: 1)");
690        let v = p.parse_value().unwrap();
691        if let RonValue::Struct(s) = &v.value {
692            assert_eq!(s.close_span.start.offset, 5);
693            assert_eq!(s.close_span.end.offset, 6);
694        } else {
695            panic!("expected Struct");
696        }
697    }
698
699    // Nested struct.
700    #[test]
701    fn struct_nested() {
702        let mut p = parser("(cost: (generic: 2, sigil: 1))");
703        let v = p.parse_value().unwrap();
704        if let RonValue::Struct(s) = &v.value {
705            assert_eq!(s.fields.len(), 1);
706            assert_eq!(s.fields[0].0.value, "cost");
707            if let RonValue::Struct(inner) = &s.fields[0].1.value {
708                assert_eq!(inner.fields.len(), 2);
709            } else {
710                panic!("expected nested Struct");
711            }
712        } else {
713            panic!("expected Struct");
714        }
715    }
716
717    // ========================================================
718    // parse_value() — whitespace and comments
719    // ========================================================
720
721    // Leading whitespace is skipped.
722    #[test]
723    fn whitespace_leading() {
724        let mut p = parser("  42");
725        let v = p.parse_value().unwrap();
726        assert_eq!(v.value, RonValue::Integer(42));
727    }
728
729    // Comments are skipped.
730    #[test]
731    fn comment_before_value() {
732        let mut p = parser("// comment\n42");
733        let v = p.parse_value().unwrap();
734        assert_eq!(v.value, RonValue::Integer(42));
735    }
736
737    // ========================================================
738    // parse_value() — span accuracy
739    // ========================================================
740
741    // Span start is after whitespace, not before.
742    #[test]
743    fn span_starts_after_whitespace() {
744        let mut p = parser("  42");
745        let v = p.parse_value().unwrap();
746        assert_eq!(v.span.start.offset, 2);
747    }
748
749    // Span covers the full value.
750    #[test]
751    fn span_covers_string() {
752        let mut p = parser("\"hello\"");
753        let v = p.parse_value().unwrap();
754        assert_eq!(v.span.start.offset, 0);
755        assert_eq!(v.span.end.offset, 7);
756    }
757
758    // ========================================================
759    // parse_value() — error cases
760    // ========================================================
761
762    // Empty input is an error.
763    #[test]
764    fn error_empty_input() {
765        let mut p = parser("");
766        let err = p.parse_value().unwrap_err();
767        match err.kind {
768            RonErrorKind::UnexpectedToken { found, .. } => {
769                assert_eq!(found, "end of file");
770            }
771            other => panic!("expected UnexpectedToken, got {:?}", other),
772        }
773    }
774
775    // Unexpected character is an error.
776    #[test]
777    fn error_unexpected_char() {
778        let mut p = parser("@");
779        assert!(p.parse_value().is_err());
780    }
781
782    // ========================================================
783    // parse_ron() integration tests
784    // ========================================================
785
786    // Parses a complete card-like struct.
787    #[test]
788    fn ron_full_struct() {
789        let source = r#"(
790            name: "Ashborn Hound",
791            card_types: [Creature],
792            legendary: false,
793            power: Some(1),
794            toughness: None,
795            keywords: [],
796            flavor_text: "placeholder",
797        )"#;
798        let v = parse_ron(source).unwrap();
799        if let RonValue::Struct(s) = &v.value {
800            assert_eq!(s.fields.len(), 7);
801            assert_eq!(s.fields[0].0.value, "name");
802            assert_eq!(s.fields[0].1.value, RonValue::String("Ashborn Hound".to_string()));
803        } else {
804            panic!("expected Struct");
805        }
806    }
807}