Skip to main content

ron_schema/schema/
parser.rs

1/*************************
2 * Author: Bradley Hunter
3 */
4
5use crate::span::{Position, Span, Spanned};
6use crate::error::{SchemaParseError, SchemaErrorKind};
7use super::{SchemaType, FieldDef, StructDef, EnumDef, HashSet, Schema, HashMap};
8
9#[derive(Debug)]
10struct Parser<'a> {
11    source: &'a str,
12    bytes: &'a [u8],
13    offset: usize,
14    line: usize,
15    column: usize,
16}
17
18impl<'a> Parser<'a> {
19    fn new(source: &'a str) -> Self {
20        Self { source, bytes: source.as_bytes(), offset: 0, line: 1, column: 1 }
21    }
22
23    fn position(&self) -> Position {
24        Position { offset: self.offset, line: self.line, column: self.column }
25    }
26
27    fn peek(&self) -> Option<u8> {
28        self.bytes.get(self.offset).copied()
29    }
30
31    fn advance(&mut self) {
32        if let Some(byte) = self.peek() {
33            if byte == b'\n'{
34                self.column = 1;
35                self.line += 1;
36            } else {
37                self.column += 1;
38            }
39            self.offset += 1;
40        } 
41    }
42
43    fn skip_whitespace(&mut self) {
44        loop {
45            match self.peek() {
46                Some(b' ' | b'\t' | b'\n' | b'\r') => self.advance(),
47                Some(b'/') if self.bytes.get(self.offset + 1) == Some(&b'/') => {
48                    while self.peek().is_some_and(|b| b != b'\n') {
49                        self.advance();
50                    }
51                }
52                _ => break,
53            }
54        }
55    }
56
57    fn expect_char(&mut self, expected: u8) -> Result<(), SchemaParseError> {
58        let start = self.position();
59        match self.peek() {
60            Some(b) if b == expected => {
61                self.advance();
62                Ok(())
63            },
64            Some(b) => {
65                self.advance();
66                let end = self.position();
67                Err(SchemaParseError { 
68                    span: Span { 
69                        start, 
70                        end 
71                    }, 
72                    kind: SchemaErrorKind::UnexpectedToken { 
73                        expected: format!("'{}'", expected as char), 
74                        found: format!("'{}'", b as char) 
75                    } 
76                })
77            },
78            None => {
79                Err(SchemaParseError { 
80                    span: Span { 
81                        start, 
82                        end: start 
83                    }, 
84                    kind: SchemaErrorKind::UnexpectedToken { 
85                        expected: format!("'{}'", expected as char), 
86                        found: "end of input".to_string() 
87                    } 
88                })
89            }
90        }
91    }
92
93    fn parse_identifier(&mut self) -> Result<Spanned<String>, SchemaParseError> {
94        let start = self.position();
95
96        // Check for valid identifier start
97        match self.peek() {
98            Some(b) if b.is_ascii_alphabetic() || b == b'_' => {},
99            Some(b) => {
100                self.advance();
101                let end = self.position();
102                return Err(SchemaParseError {
103                    span: Span { start, end },
104                    kind: SchemaErrorKind::UnexpectedToken {
105                        expected: "identifier".to_string(),
106                        found: format!("'{}'", b as char),
107                    },
108                });
109            },
110            None => {
111                return Err(SchemaParseError {
112                    span: Span { start, end: start },
113                    kind: SchemaErrorKind::UnexpectedToken {
114                        expected: "identifier".to_string(),
115                        found: "end of input".to_string(),
116                    },
117                });
118            },
119        }
120
121        // Consume all identifier continuation characters
122        while self.peek().is_some_and(|b| b.is_ascii_alphanumeric() || b == b'_') {
123            self.advance();
124        }
125
126        // Slice out the identifier text
127        let end = self.position();
128        Ok(Spanned {
129            value: self.source[start.offset..end.offset].to_string(),
130            span: Span { start, end },
131        })
132    }
133
134    #[allow(clippy::too_many_lines)]
135    fn parse_type(&mut self) -> Result<Spanned<SchemaType>, SchemaParseError> {
136        self.skip_whitespace();
137        let start = self.position();
138
139        match self.peek() {
140            Some(b'[') => {
141                // List: consume '[', parse inner type, expect ']'
142                self.advance();
143                self.skip_whitespace();
144                let inner = self.parse_type()?;
145                self.skip_whitespace();
146                self.expect_char(b']')?;
147                let end = self.position();
148                Ok(Spanned {
149                    value: SchemaType::List(Box::new(inner.value)),
150                    span: Span { start, end },
151                })
152            }
153            Some(b'{') => {
154                // Map: consume '{', parse key type, expect ':', parse value type, expect '}'
155                self.advance();
156                self.skip_whitespace();
157                let key_type = self.parse_type()?;
158                // Validate key type is String, Integer, or EnumRef
159                match &key_type.value {
160                    SchemaType::String | SchemaType::Integer | SchemaType::EnumRef(_) => {}
161                    _ => {
162                        return Err(SchemaParseError {
163                            span: key_type.span,
164                            kind: SchemaErrorKind::InvalidMapKeyType {
165                                found: format!("{:?}", key_type.value),
166                            },
167                        });
168                    }
169                }
170                self.skip_whitespace();
171                self.expect_char(b':')?;
172                self.skip_whitespace();
173                let value_type = self.parse_type()?;
174                self.skip_whitespace();
175                self.expect_char(b'}')?;
176                let end = self.position();
177                Ok(Spanned {
178                    value: SchemaType::Map(Box::new(key_type.value), Box::new(value_type.value)),
179                    span: Span { start, end },
180                })
181            }
182            Some(b'(') => {
183                // Disambiguate struct vs tuple:
184                // Save position, consume '(', skip whitespace.
185                // If ')' → empty struct. If identifier followed by ':' → struct.
186                // Otherwise → tuple (comma-separated types).
187                let saved = (self.offset, self.line, self.column);
188                self.advance(); // consume '('
189                self.skip_whitespace();
190
191                let is_struct = if self.peek() == Some(b')') {
192                    true // empty parens → treat as empty struct
193                } else {
194                    // Try to determine if this is name: Type (struct) or Type, Type (tuple)
195                    let probe_pos = (self.offset, self.line, self.column);
196                    let is_field = if let Ok(_id) = self.parse_identifier() {
197                        self.skip_whitespace();
198                        
199                        self.peek() == Some(b':')
200                    } else {
201                        false
202                    };
203                    // Rewind to after '('
204                    self.offset = probe_pos.0;
205                    self.line = probe_pos.1;
206                    self.column = probe_pos.2;
207                    is_field
208                };
209
210                // Rewind to before '(' and parse as struct or tuple
211                self.offset = saved.0;
212                self.line = saved.1;
213                self.column = saved.2;
214
215                if is_struct {
216                    let struct_def = self.parse_struct()?;
217                    let end = self.position();
218                    Ok(Spanned {
219                        value: SchemaType::Struct(struct_def),
220                        span: Span { start, end },
221                    })
222                } else {
223                    let types = self.parse_tuple_type()?;
224                    let end = self.position();
225                    Ok(Spanned {
226                        value: SchemaType::Tuple(types),
227                        span: Span { start, end },
228                    })
229                }
230            }
231            Some(b) if b.is_ascii_alphabetic() => {
232                // Identifier: could be primitive, Option, or EnumRef
233                let id = self.parse_identifier()?;
234                match id.value.as_str() {
235                    "String" => Ok(Spanned { value: SchemaType::String, span: id.span }),
236                    "Integer" => Ok(Spanned { value: SchemaType::Integer, span: id.span }),
237                    "Float" => Ok(Spanned { value: SchemaType::Float, span: id.span }),
238                    "Bool" => Ok(Spanned { value: SchemaType::Bool, span: id.span }),
239                    "Option" => {
240                        // expect '(', parse inner type, expect ')'
241                        self.skip_whitespace();
242                        self.expect_char(b'(')?;
243                        self.skip_whitespace();
244                        let inner = self.parse_type()?;
245                        self.skip_whitespace();
246                        self.expect_char(b')')?;
247                        let end = self.position();
248                        Ok(Spanned {
249                            value: SchemaType::Option(Box::new(inner.value)),
250                            span: Span { start, end },
251                        })
252                    }
253                    _ => Ok(Spanned { value: SchemaType::EnumRef(id.value), span: id.span }),
254                }
255            }
256            Some(b) => {
257                // Error: unexpected character
258                self.advance();
259                let end = self.position();
260                Err(SchemaParseError {
261                    span: Span { start, end },
262                    kind: SchemaErrorKind::UnexpectedToken {
263                        expected: "type".to_string(),
264                        found: format!("'{}'", b as char),
265                    },
266                })
267            }
268            None => {
269                Err(SchemaParseError {
270                    span: Span { start, end: start },
271                    kind: SchemaErrorKind::UnexpectedToken {
272                        expected: "type".to_string(),
273                        found: "end of input".to_string(),
274                    },
275                })
276            }
277        }
278    }
279
280    fn parse_field(&mut self) -> Result<FieldDef, SchemaParseError> {
281        self.skip_whitespace();
282        let name = self.parse_identifier()?;
283        self.skip_whitespace();
284        self.expect_char(b':')?;
285        self.skip_whitespace();
286        let type_ = self.parse_type()?;
287        Ok(FieldDef{
288            name,
289            type_
290        })
291    }
292
293    fn parse_struct(&mut self) -> Result<StructDef, SchemaParseError> {
294        self.skip_whitespace();
295        self.expect_char(b'(')?;
296        let mut fields: Vec<FieldDef> = Vec::new();
297        loop {
298            self.skip_whitespace();
299            if let Some(byte) = self.peek() {
300                if byte == b')' {
301                    break ;
302                } 
303                let field = self.parse_field()?;
304                fields.push(field);
305                self.skip_whitespace();
306                if self.peek() == Some(b',') {
307                    self.advance();
308                }
309            } else {
310                return Err(SchemaParseError {
311                    span: Span { start: self.position(), end: self.position() },
312                    kind: SchemaErrorKind::UnexpectedToken { expected: ")".to_string(), found: "end of file".to_string() }
313                });
314            }
315        }
316        self.expect_char(b')')?;
317        Ok(StructDef { fields })
318    }
319
320    /// Parses `(Type, Type, ...)` as a tuple type.
321    fn parse_tuple_type(&mut self) -> Result<Vec<SchemaType>, SchemaParseError> {
322        self.skip_whitespace();
323        self.expect_char(b'(')?;
324        let mut types = Vec::new();
325        loop {
326            self.skip_whitespace();
327            if self.peek() == Some(b')') {
328                break;
329            }
330            let t = self.parse_type()?;
331            types.push(t.value);
332            self.skip_whitespace();
333            if self.peek() == Some(b',') {
334                self.advance();
335            }
336        }
337        self.expect_char(b')')?;
338        Ok(types)
339    }
340
341    fn parse_enum_def(&mut self) -> Result<EnumDef, SchemaParseError> {
342        self.skip_whitespace();
343        let keyword = self.parse_identifier()?;
344        if keyword.value != "enum" {
345            return Err(SchemaParseError {
346                span: keyword.span,
347                kind: SchemaErrorKind::UnexpectedToken {
348                    expected: "\"enum\"".to_string(),
349                    found: keyword.value,
350                },
351            });
352        }
353        self.skip_whitespace();
354        let name = self.parse_identifier()?;
355        self.skip_whitespace();
356        self.expect_char(b'{')?;
357        let mut variants = HashMap::new();
358        loop {
359            self.skip_whitespace();
360            if let Some(byte) = self.peek() {
361                if byte == b'}' {
362                    break;
363                }
364                let variant = self.parse_identifier()?;
365                // Check for associated data: Variant(Type)
366                self.skip_whitespace();
367                let data_type = if self.peek() == Some(b'(') {
368                    self.advance(); // consume '('
369                    self.skip_whitespace();
370                    let t = self.parse_type()?;
371                    self.skip_whitespace();
372                    self.expect_char(b')')?;
373                    Some(t.value)
374                } else {
375                    None
376                };
377                variants.insert(variant.value, data_type);
378                self.skip_whitespace();
379                if self.peek() == Some(b',') {
380                    self.advance();
381                }
382            } else {
383                return Err(SchemaParseError {
384                    span: Span { start: self.position(), end: self.position() },
385                    kind: SchemaErrorKind::UnexpectedToken { expected: "}".to_string(), found: "end of file".to_string() }
386                });
387            }
388        }
389
390        self.expect_char(b'}')?;
391        Ok(EnumDef { name: name.value, variants })
392    }
393
394    /// Parses `type Name = <type>` — assumes the "type" keyword has already been confirmed.
395    fn parse_alias_def(&mut self) -> Result<(String, Spanned<SchemaType>), SchemaParseError> {
396        self.skip_whitespace();
397        self.parse_identifier()?; // consume "type" keyword
398        self.skip_whitespace();
399        let name = self.parse_identifier()?;
400        self.skip_whitespace();
401        self.expect_char(b'=')?;
402        self.skip_whitespace();
403        let type_ = self.parse_type()?;
404        Ok((name.value, type_))
405    }
406}
407
408/// Parses a `.ronschema` source string into a [`Schema`].
409///
410/// # Errors
411///
412/// Returns a [`SchemaParseError`] if the source contains syntax errors,
413/// duplicate definitions, or unresolved enum references.
414pub fn parse_schema(source: &str) -> Result<Schema, SchemaParseError> {
415    let mut parser = Parser::new(source);
416    parser.skip_whitespace();
417
418    let mut root = if parser.peek() == Some(b'(') {
419        parser.parse_struct()?
420    } else {
421        StructDef { fields: Vec::new() }
422    };
423
424    let mut enums: HashMap<String, EnumDef> = HashMap::new();
425    let mut aliases: HashMap<String, Spanned<SchemaType>> = HashMap::new();
426
427    loop {
428        parser.skip_whitespace();
429        if parser.peek().is_none() {
430            break;
431        }
432
433        // Peek ahead to determine if this is "enum" or "type"
434        let start = parser.position();
435        let keyword = parser.parse_identifier()?;
436
437        match keyword.value.as_str() {
438            "enum" => {
439                // Rewind — parse_enum_def expects to consume "enum" itself
440                parser.offset = start.offset;
441                parser.line = start.line;
442                parser.column = start.column;
443
444                let enum_def = parser.parse_enum_def()?;
445                if let Some(old) = enums.insert(enum_def.name.clone(), enum_def) {
446                    return Err(SchemaParseError {
447                        span: Span { start: parser.position(), end: parser.position() },
448                        kind: SchemaErrorKind::DuplicateEnum { name: old.name },
449                    });
450                }
451            }
452            "type" => {
453                // Rewind — parse_alias_def expects to consume "type" itself
454                parser.offset = start.offset;
455                parser.line = start.line;
456                parser.column = start.column;
457
458                let (name, type_) = parser.parse_alias_def()?;
459                if aliases.contains_key(&name) {
460                    return Err(SchemaParseError {
461                        span: type_.span,
462                        kind: SchemaErrorKind::DuplicateAlias { name },
463                    });
464                }
465                aliases.insert(name, type_);
466            }
467            other => {
468                return Err(SchemaParseError {
469                    span: keyword.span,
470                    kind: SchemaErrorKind::UnexpectedToken {
471                        expected: "\"enum\" or \"type\"".to_string(),
472                        found: other.to_string(),
473                    },
474                });
475            }
476        }
477    }
478
479    // Reclassify EnumRefs that are actually aliases — in the root struct and in alias definitions.
480    // Collect alias names into a set to avoid borrow conflicts when mutating alias values.
481    let alias_names: HashSet<String> = aliases.keys().cloned().collect();
482    reclassify_refs_in_struct_by_name(&mut root, &alias_names);
483    for spanned_type in aliases.values_mut() {
484        reclassify_refs_in_type_by_name(&mut spanned_type.value, &alias_names);
485    }
486
487    // Verify all refs resolve to a known enum or alias
488    verify_refs(&root, &enums, &aliases)?;
489
490    // Check for recursive aliases
491    verify_no_recursive_aliases(&aliases)?;
492
493    Ok(Schema { root, enums, aliases })
494}
495
496/// Reclassifies `EnumRef` names that are actually type aliases into `AliasRef`.
497/// Mutates the struct in place.
498fn reclassify_refs_in_struct_by_name(
499    struct_def: &mut StructDef,
500    alias_names: &HashSet<String>,
501) {
502    for field in &mut struct_def.fields {
503        reclassify_refs_in_type_by_name(&mut field.type_.value, alias_names);
504    }
505}
506
507fn reclassify_refs_in_type_by_name(
508    schema_type: &mut SchemaType,
509    alias_names: &HashSet<String>,
510) {
511    match schema_type {
512        SchemaType::EnumRef(name) if alias_names.contains(name.as_str()) => {
513            *schema_type = SchemaType::AliasRef(name.clone());
514        }
515        SchemaType::Option(inner) | SchemaType::List(inner) => {
516            reclassify_refs_in_type_by_name(inner, alias_names);
517        }
518        SchemaType::Map(key, value) => {
519            reclassify_refs_in_type_by_name(key, alias_names);
520            reclassify_refs_in_type_by_name(value, alias_names);
521        }
522        SchemaType::Tuple(types) => {
523            for t in types {
524                reclassify_refs_in_type_by_name(t, alias_names);
525            }
526        }
527        SchemaType::Struct(struct_def) => {
528            reclassify_refs_in_struct_by_name(struct_def, alias_names);
529        }
530        _ => {}
531    }
532}
533
534/// Verifies all `EnumRef` names resolve to a defined enum.
535/// (`AliasRefs` have already been reclassified, so any remaining `EnumRef` must be an actual enum.)
536fn verify_refs(
537    struct_def: &StructDef,
538    enums: &HashMap<String, EnumDef>,
539    aliases: &HashMap<String, Spanned<SchemaType>>,
540) -> Result<(), SchemaParseError> {
541    for field in &struct_def.fields {
542        check_type_refs(&field.type_.value, field.type_.span, enums, aliases)?;
543    }
544    Ok(())
545}
546
547fn check_type_refs(
548    schema_type: &SchemaType,
549    span: Span,
550    enums: &HashMap<String, EnumDef>,
551    aliases: &HashMap<String, Spanned<SchemaType>>,
552) -> Result<(), SchemaParseError> {
553    match schema_type {
554        SchemaType::EnumRef(name) => {
555            if !enums.contains_key(name) {
556                return Err(SchemaParseError {
557                    span,
558                    kind: SchemaErrorKind::UnresolvedType { name: name.clone() },
559                });
560            }
561        }
562        SchemaType::AliasRef(name) => {
563            if !aliases.contains_key(name) {
564                return Err(SchemaParseError {
565                    span,
566                    kind: SchemaErrorKind::UnresolvedType { name: name.clone() },
567                });
568            }
569        }
570        SchemaType::Option(inner) | SchemaType::List(inner) => {
571            check_type_refs(inner, span, enums, aliases)?;
572        }
573        SchemaType::Map(key, value) => {
574            check_type_refs(key, span, enums, aliases)?;
575            check_type_refs(value, span, enums, aliases)?;
576        }
577        SchemaType::Tuple(types) => {
578            for t in types {
579                check_type_refs(t, span, enums, aliases)?;
580            }
581        }
582        SchemaType::Struct(struct_def) => {
583            verify_refs(struct_def, enums, aliases)?;
584        }
585        _ => {}
586    }
587    Ok(())
588}
589
590/// Detects recursive type aliases — an alias that references itself directly or indirectly.
591fn verify_no_recursive_aliases(
592    aliases: &HashMap<String, Spanned<SchemaType>>,
593) -> Result<(), SchemaParseError> {
594    for (name, spanned_type) in aliases {
595        let mut visited = HashSet::new();
596        visited.insert(name.as_str());
597        if let Some(cycle_name) = find_alias_cycle(&spanned_type.value, aliases, &mut visited) {
598            return Err(SchemaParseError {
599                span: spanned_type.span,
600                kind: SchemaErrorKind::RecursiveAlias { name: cycle_name },
601            });
602        }
603    }
604    Ok(())
605}
606
607fn find_alias_cycle<'a>(
608    schema_type: &'a SchemaType,
609    aliases: &'a HashMap<String, Spanned<SchemaType>>,
610    visited: &mut HashSet<&'a str>,
611) -> Option<String> {
612    match schema_type {
613        SchemaType::AliasRef(name) => {
614            if visited.contains(name.as_str()) {
615                return Some(name.clone());
616            }
617            visited.insert(name.as_str());
618            if let Some(target) = aliases.get(name) {
619                return find_alias_cycle(&target.value, aliases, visited);
620            }
621            None
622        }
623        SchemaType::Option(inner) | SchemaType::List(inner) => {
624            find_alias_cycle(inner, aliases, visited)
625        }
626        SchemaType::Map(key, value) => {
627            if let Some(cycle) = find_alias_cycle(key, aliases, visited) {
628                return Some(cycle);
629            }
630            find_alias_cycle(value, aliases, visited)
631        }
632        SchemaType::Tuple(types) => {
633            for t in types {
634                if let Some(cycle) = find_alias_cycle(t, aliases, visited) {
635                    return Some(cycle);
636                }
637            }
638            None
639        }
640        SchemaType::Struct(struct_def) => {
641            for field in &struct_def.fields {
642                if let Some(cycle) = find_alias_cycle(&field.type_.value, aliases, visited) {
643                    return Some(cycle);
644                }
645            }
646            None
647        }
648        _ => None,
649    }
650}
651
652#[cfg(test)]
653mod tests {
654    use super::*;
655
656    // ========================================================
657    // Helper: constructs a Parser for direct method testing
658    // ========================================================
659
660    fn parser(source: &str) -> Parser<'_> {
661        Parser::new(source)
662    }
663
664    // ========================================================
665    // peek() tests
666    // ========================================================
667
668    // Returns the current byte without advancing.
669    #[test]
670    fn peek_returns_current_byte() {
671        let p = parser("abc");
672        assert_eq!(p.peek(), Some(b'a'));
673    }
674
675    // Returns None when at end of input.
676    #[test]
677    fn peek_returns_none_at_end() {
678        let p = parser("");
679        assert_eq!(p.peek(), None);
680    }
681
682    // ========================================================
683    // advance() tests
684    // ========================================================
685
686    // Moves to the next byte and increments column.
687    #[test]
688    fn advance_increments_offset_and_column() {
689        let mut p = parser("ab");
690        p.advance();
691        assert_eq!(p.offset, 1);
692        assert_eq!(p.column, 2);
693        assert_eq!(p.peek(), Some(b'b'));
694    }
695
696    // Newline resets column to 1 and increments line.
697    #[test]
698    fn advance_past_newline_increments_line() {
699        let mut p = parser("a\nb");
700        p.advance(); // past 'a'
701        p.advance(); // past '\n'
702        assert_eq!(p.line, 2);
703        assert_eq!(p.column, 1);
704    }
705
706    // Advancing at end of input is a no-op.
707    #[test]
708    fn advance_at_end_is_noop() {
709        let mut p = parser("");
710        p.advance();
711        assert_eq!(p.offset, 0);
712    }
713
714    // ========================================================
715    // position() tests
716    // ========================================================
717
718    // Initial position is offset 0, line 1, column 1.
719    #[test]
720    fn position_initial_state() {
721        let p = parser("abc");
722        let pos = p.position();
723        assert_eq!(pos.offset, 0);
724        assert_eq!(pos.line, 1);
725        assert_eq!(pos.column, 1);
726    }
727
728    // Position tracks correctly after advancing.
729    #[test]
730    fn position_after_advance() {
731        let mut p = parser("ab\nc");
732        p.advance(); // 'a'
733        p.advance(); // 'b'
734        p.advance(); // '\n'
735        let pos = p.position();
736        assert_eq!(pos.offset, 3);
737        assert_eq!(pos.line, 2);
738        assert_eq!(pos.column, 1);
739    }
740
741    // ========================================================
742    // skip_whitespace() tests
743    // ========================================================
744
745    // Skips spaces, tabs, and newlines.
746    #[test]
747    fn skip_whitespace_skips_spaces_tabs_newlines() {
748        let mut p = parser("  \t\nabc");
749        p.skip_whitespace();
750        assert_eq!(p.peek(), Some(b'a'));
751    }
752
753    // Skips line comments.
754    #[test]
755    fn skip_whitespace_skips_line_comment() {
756        let mut p = parser("// comment\nabc");
757        p.skip_whitespace();
758        assert_eq!(p.peek(), Some(b'a'));
759    }
760
761    // Skips whitespace after a comment.
762    #[test]
763    fn skip_whitespace_skips_comment_then_whitespace() {
764        let mut p = parser("// comment\n  abc");
765        p.skip_whitespace();
766        assert_eq!(p.peek(), Some(b'a'));
767    }
768
769    // Does nothing when already on a non-whitespace character.
770    #[test]
771    fn skip_whitespace_noop_on_nonwhitespace() {
772        let mut p = parser("abc");
773        p.skip_whitespace();
774        assert_eq!(p.offset, 0);
775    }
776
777    // ========================================================
778    // expect_char() tests
779    // ========================================================
780
781    // Consumes the expected character and returns Ok.
782    #[test]
783    fn expect_char_consumes_matching_byte() {
784        let mut p = parser("(abc");
785        assert!(p.expect_char(b'(').is_ok());
786        assert_eq!(p.peek(), Some(b'a'));
787    }
788
789    // Returns error when character doesn't match.
790    #[test]
791    fn expect_char_error_on_mismatch() {
792        let mut p = parser("abc");
793        let err = p.expect_char(b'(').unwrap_err();
794        assert!(matches!(err.kind, SchemaErrorKind::UnexpectedToken { .. }));
795    }
796
797    // Returns error at end of input.
798    #[test]
799    fn expect_char_error_at_end_of_input() {
800        let mut p = parser("");
801        let err = p.expect_char(b'(').unwrap_err();
802        match err.kind {
803            SchemaErrorKind::UnexpectedToken { found, .. } => {
804                assert_eq!(found, "end of input");
805            }
806            other => panic!("expected UnexpectedToken, got {:?}", other),
807        }
808    }
809
810    // ========================================================
811    // parse_identifier() tests
812    // ========================================================
813
814    // Reads a simple alphabetic identifier.
815    #[test]
816    fn parse_identifier_reads_alpha() {
817        let mut p = parser("name:");
818        let id = p.parse_identifier().unwrap();
819        assert_eq!(id.value, "name");
820    }
821
822    // Reads an identifier with underscores.
823    #[test]
824    fn parse_identifier_reads_snake_case() {
825        let mut p = parser("field_name:");
826        let id = p.parse_identifier().unwrap();
827        assert_eq!(id.value, "field_name");
828    }
829
830    // Reads an identifier with digits.
831    #[test]
832    fn parse_identifier_reads_alphanumeric() {
833        let mut p = parser("cost2:");
834        let id = p.parse_identifier().unwrap();
835        assert_eq!(id.value, "cost2");
836    }
837
838    // Reads a PascalCase identifier (for types/enums).
839    #[test]
840    fn parse_identifier_reads_pascal_case() {
841        let mut p = parser("CardType ");
842        let id = p.parse_identifier().unwrap();
843        assert_eq!(id.value, "CardType");
844    }
845
846    // Stops at non-identifier characters.
847    #[test]
848    fn parse_identifier_stops_at_delimiter() {
849        let mut p = parser("name: String");
850        let id = p.parse_identifier().unwrap();
851        assert_eq!(id.value, "name");
852        assert_eq!(p.peek(), Some(b':'));
853    }
854
855    // Records correct span for the identifier.
856    #[test]
857    fn parse_identifier_span_is_correct() {
858        let mut p = parser("name:");
859        let id = p.parse_identifier().unwrap();
860        assert_eq!(id.span.start.offset, 0);
861        assert_eq!(id.span.end.offset, 4);
862    }
863
864    // Error when starting with a digit.
865    #[test]
866    fn parse_identifier_error_on_digit_start() {
867        let mut p = parser("42abc");
868        assert!(p.parse_identifier().is_err());
869    }
870
871    // Error at end of input.
872    #[test]
873    fn parse_identifier_error_at_end_of_input() {
874        let mut p = parser("");
875        assert!(p.parse_identifier().is_err());
876    }
877
878    // ========================================================
879    // parse_type() tests
880    // ========================================================
881
882    // Parses "String" as SchemaType::String.
883    #[test]
884    fn parse_type_string() {
885        let mut p = parser("String");
886        let t = p.parse_type().unwrap();
887        assert_eq!(t.value, SchemaType::String);
888    }
889
890    // Parses "Integer" as SchemaType::Integer.
891    #[test]
892    fn parse_type_integer() {
893        let mut p = parser("Integer");
894        let t = p.parse_type().unwrap();
895        assert_eq!(t.value, SchemaType::Integer);
896    }
897
898    // Parses "Float" as SchemaType::Float.
899    #[test]
900    fn parse_type_float() {
901        let mut p = parser("Float");
902        let t = p.parse_type().unwrap();
903        assert_eq!(t.value, SchemaType::Float);
904    }
905
906    // Parses "Bool" as SchemaType::Bool.
907    #[test]
908    fn parse_type_bool() {
909        let mut p = parser("Bool");
910        let t = p.parse_type().unwrap();
911        assert_eq!(t.value, SchemaType::Bool);
912    }
913
914    // Parses "[String]" as a List wrapping String.
915    #[test]
916    fn parse_type_list() {
917        let mut p = parser("[String]");
918        let t = p.parse_type().unwrap();
919        assert_eq!(t.value, SchemaType::List(Box::new(SchemaType::String)));
920    }
921
922    // Parses "Option(Integer)" as an Option wrapping Integer.
923    #[test]
924    fn parse_type_option() {
925        let mut p = parser("Option(Integer)");
926        let t = p.parse_type().unwrap();
927        assert_eq!(t.value, SchemaType::Option(Box::new(SchemaType::Integer)));
928    }
929
930    // Parses an unknown PascalCase name as an EnumRef.
931    #[test]
932    fn parse_type_enum_ref() {
933        let mut p = parser("Faction");
934        let t = p.parse_type().unwrap();
935        assert_eq!(t.value, SchemaType::EnumRef("Faction".to_string()));
936    }
937
938    // Parses nested composites: [Option(String)].
939    #[test]
940    fn parse_type_nested_list_of_option() {
941        let mut p = parser("[Option(String)]");
942        let t = p.parse_type().unwrap();
943        assert_eq!(
944            t.value,
945            SchemaType::List(Box::new(SchemaType::Option(Box::new(SchemaType::String))))
946        );
947    }
948
949    // Parses an inline struct type.
950    #[test]
951    fn parse_type_inline_struct() {
952        let mut p = parser("(\n  x: Integer,\n)");
953        let t = p.parse_type().unwrap();
954        if let SchemaType::Struct(s) = &t.value {
955            assert_eq!(s.fields.len(), 1);
956            assert_eq!(s.fields[0].name.value, "x");
957        } else {
958            panic!("expected SchemaType::Struct");
959        }
960    }
961
962    // Error on unexpected token in type position.
963    #[test]
964    fn parse_type_error_on_unexpected_token() {
965        let mut p = parser("42");
966        let err = p.parse_type().unwrap_err();
967        match err.kind {
968            SchemaErrorKind::UnexpectedToken { expected, .. } => {
969                assert_eq!(expected, "type");
970            }
971            other => panic!("expected UnexpectedToken, got {:?}", other),
972        }
973    }
974
975    // ========================================================
976    // parse_field() tests
977    // ========================================================
978
979    // Parses "name: String" into a FieldDef.
980    #[test]
981    fn parse_field_name_and_type() {
982        let mut p = parser("name: String,");
983        let f = p.parse_field().unwrap();
984        assert_eq!(f.name.value, "name");
985        assert_eq!(f.type_.value, SchemaType::String);
986    }
987
988    // Error when colon is missing.
989    #[test]
990    fn parse_field_error_missing_colon() {
991        let mut p = parser("name String");
992        let err = p.parse_field().unwrap_err();
993        assert!(matches!(err.kind, SchemaErrorKind::UnexpectedToken { .. }));
994    }
995
996    // ========================================================
997    // parse_struct() tests
998    // ========================================================
999
1000    // Parses an empty struct.
1001    #[test]
1002    fn parse_struct_empty() {
1003        let mut p = parser("()");
1004        let s = p.parse_struct().unwrap();
1005        assert!(s.fields.is_empty());
1006    }
1007
1008    // Parses a struct with one field.
1009    #[test]
1010    fn parse_struct_single_field() {
1011        let mut p = parser("(\n  name: String,\n)");
1012        let s = p.parse_struct().unwrap();
1013        assert_eq!(s.fields.len(), 1);
1014        assert_eq!(s.fields[0].name.value, "name");
1015    }
1016
1017    // Parses a struct with multiple fields.
1018    #[test]
1019    fn parse_struct_multiple_fields() {
1020        let mut p = parser("(\n  a: String,\n  b: Integer,\n)");
1021        let s = p.parse_struct().unwrap();
1022        assert_eq!(s.fields.len(), 2);
1023    }
1024
1025    // Struct without trailing comma is valid.
1026    #[test]
1027    fn parse_struct_no_trailing_comma() {
1028        let mut p = parser("(\n  name: String\n)");
1029        let s = p.parse_struct().unwrap();
1030        assert_eq!(s.fields.len(), 1);
1031    }
1032
1033    // Error on unclosed struct.
1034    #[test]
1035    fn parse_struct_error_on_unclosed() {
1036        let mut p = parser("(\n  name: String,\n");
1037        assert!(p.parse_struct().is_err());
1038    }
1039
1040    // ========================================================
1041    // parse_enum_def() tests
1042    // ========================================================
1043
1044    // Parses a simple enum definition.
1045    #[test]
1046    fn parse_enum_def_simple() {
1047        let mut p = parser("enum Dir { North, South }");
1048        let e = p.parse_enum_def().unwrap();
1049        assert_eq!(e.name, "Dir");
1050        assert_eq!(e.variants.len(), 2);
1051        assert!(e.variants.contains_key("North"));
1052        assert!(e.variants.contains_key("South"));
1053    }
1054
1055    // Trailing comma in variant list is allowed.
1056    #[test]
1057    fn parse_enum_def_trailing_comma() {
1058        let mut p = parser("enum Dir { North, South, }");
1059        let e = p.parse_enum_def().unwrap();
1060        assert_eq!(e.variants.len(), 2);
1061    }
1062
1063    // Single variant enum is valid.
1064    #[test]
1065    fn parse_enum_def_single_variant() {
1066        let mut p = parser("enum Single { Only }");
1067        let e = p.parse_enum_def().unwrap();
1068        assert_eq!(e.variants.len(), 1);
1069    }
1070
1071    // Error when keyword is not "enum".
1072    #[test]
1073    fn parse_enum_def_error_wrong_keyword() {
1074        let mut p = parser("struct Dir { North }");
1075        let err = p.parse_enum_def().unwrap_err();
1076        assert!(matches!(err.kind, SchemaErrorKind::UnexpectedToken { .. }));
1077    }
1078
1079    // Error on unclosed enum.
1080    #[test]
1081    fn parse_enum_def_error_on_unclosed() {
1082        let mut p = parser("enum Dir { North, South");
1083        assert!(p.parse_enum_def().is_err());
1084    }
1085
1086    // ========================================================
1087    // parse_schema() integration tests
1088    // ========================================================
1089
1090    // Empty input produces an empty schema.
1091    #[test]
1092    fn schema_empty_input() {
1093        let schema = parse_schema("").unwrap();
1094        assert!(schema.root.fields.is_empty());
1095    }
1096
1097    // Empty input produces no enums.
1098    #[test]
1099    fn schema_empty_input_no_enums() {
1100        let schema = parse_schema("").unwrap();
1101        assert!(schema.enums.is_empty());
1102    }
1103
1104    // Root struct with enum ref resolves when enum is defined.
1105    #[test]
1106    fn schema_enum_ref_resolves() {
1107        let source = "(\n  faction: Faction,\n)\nenum Faction { Sentinels, Reavers }";
1108        let schema = parse_schema(source).unwrap();
1109        assert_eq!(schema.root.fields[0].type_.value, SchemaType::EnumRef("Faction".to_string()));
1110    }
1111
1112    // Multiple enum definitions are all stored.
1113    #[test]
1114    fn schema_multiple_enums_stored() {
1115        let source = "enum A { X }\nenum B { Y }";
1116        let schema = parse_schema(source).unwrap();
1117        assert_eq!(schema.enums.len(), 2);
1118    }
1119
1120    // Comments before root struct are ignored.
1121    #[test]
1122    fn schema_comments_before_root() {
1123        let source = "// comment\n(\n  name: String,\n)";
1124        let schema = parse_schema(source).unwrap();
1125        assert_eq!(schema.root.fields.len(), 1);
1126    }
1127
1128    // Inline comment after field is ignored.
1129    #[test]
1130    fn schema_inline_comment_after_field() {
1131        let source = "(\n  name: String, // a name\n)";
1132        let schema = parse_schema(source).unwrap();
1133        assert_eq!(schema.root.fields[0].name.value, "name");
1134    }
1135
1136    // Unresolved type ref is an error.
1137    #[test]
1138    fn schema_unresolved_type_ref() {
1139        let err = parse_schema("(\n  f: Faction,\n)").unwrap_err();
1140        assert_eq!(err.kind, SchemaErrorKind::UnresolvedType { name: "Faction".to_string() });
1141    }
1142
1143    // Unresolved type ref inside Option is an error.
1144    #[test]
1145    fn schema_unresolved_type_ref_in_option() {
1146        let err = parse_schema("(\n  t: Option(Timing),\n)").unwrap_err();
1147        assert_eq!(err.kind, SchemaErrorKind::UnresolvedType { name: "Timing".to_string() });
1148    }
1149
1150    // Unresolved type ref inside List is an error.
1151    #[test]
1152    fn schema_unresolved_type_ref_in_list() {
1153        let err = parse_schema("(\n  t: [CardType],\n)").unwrap_err();
1154        assert_eq!(err.kind, SchemaErrorKind::UnresolvedType { name: "CardType".to_string() });
1155    }
1156
1157    // Duplicate enum name is an error.
1158    #[test]
1159    fn schema_duplicate_enum_name() {
1160        let err = parse_schema("enum A { X }\nenum A { Y }").unwrap_err();
1161        assert_eq!(err.kind, SchemaErrorKind::DuplicateEnum { name: "A".to_string() });
1162    }
1163
1164    // ========================================================
1165    // Type alias tests — parsing
1166    // ========================================================
1167
1168    // Basic type alias is stored in schema.aliases.
1169    #[test]
1170    fn alias_stored_in_schema() {
1171        let source = "(\n  cost: Cost,\n)\ntype Cost = (generic: Integer,)";
1172        let schema = parse_schema(source).unwrap();
1173        assert!(schema.aliases.contains_key("Cost"));
1174    }
1175
1176    // Alias field is reclassified from EnumRef to AliasRef.
1177    #[test]
1178    fn alias_ref_reclassified() {
1179        let source = "(\n  cost: Cost,\n)\ntype Cost = (generic: Integer,)";
1180        let schema = parse_schema(source).unwrap();
1181        assert_eq!(schema.root.fields[0].type_.value, SchemaType::AliasRef("Cost".to_string()));
1182    }
1183
1184    // Alias to a primitive type.
1185    #[test]
1186    fn alias_to_primitive() {
1187        let source = "(\n  name: Name,\n)\ntype Name = String";
1188        let schema = parse_schema(source).unwrap();
1189        assert_eq!(schema.aliases["Name"].value, SchemaType::String);
1190    }
1191
1192    // Alias to a list type.
1193    #[test]
1194    fn alias_to_list() {
1195        let source = "(\n  tags: Tags,\n)\ntype Tags = [String]";
1196        let schema = parse_schema(source).unwrap();
1197        assert_eq!(schema.aliases["Tags"].value, SchemaType::List(Box::new(SchemaType::String)));
1198    }
1199
1200    // Alias to an option type.
1201    #[test]
1202    fn alias_to_option() {
1203        let source = "(\n  power: Power,\n)\ntype Power = Option(Integer)";
1204        let schema = parse_schema(source).unwrap();
1205        assert_eq!(schema.aliases["Power"].value, SchemaType::Option(Box::new(SchemaType::Integer)));
1206    }
1207
1208    // Alias inside a list field is reclassified.
1209    #[test]
1210    fn alias_ref_inside_list_reclassified() {
1211        let source = "(\n  costs: [Cost],\n)\ntype Cost = (generic: Integer,)";
1212        let schema = parse_schema(source).unwrap();
1213        assert_eq!(
1214            schema.root.fields[0].type_.value,
1215            SchemaType::List(Box::new(SchemaType::AliasRef("Cost".to_string())))
1216        );
1217    }
1218
1219    // Alias inside an option field is reclassified.
1220    #[test]
1221    fn alias_ref_inside_option_reclassified() {
1222        let source = "(\n  cost: Option(Cost),\n)\ntype Cost = (generic: Integer,)";
1223        let schema = parse_schema(source).unwrap();
1224        assert_eq!(
1225            schema.root.fields[0].type_.value,
1226            SchemaType::Option(Box::new(SchemaType::AliasRef("Cost".to_string())))
1227        );
1228    }
1229
1230    // Enums and aliases can coexist.
1231    #[test]
1232    fn alias_and_enum_coexist() {
1233        let source = "(\n  cost: Cost,\n  kind: Kind,\n)\ntype Cost = (generic: Integer,)\nenum Kind { A, B }";
1234        let schema = parse_schema(source).unwrap();
1235        assert!(schema.aliases.contains_key("Cost"));
1236        assert!(schema.enums.contains_key("Kind"));
1237    }
1238
1239    // ========================================================
1240    // Type alias tests — error cases
1241    // ========================================================
1242
1243    // Duplicate alias name is an error.
1244    #[test]
1245    fn alias_duplicate_name() {
1246        let source = "type A = String\ntype A = Integer";
1247        let err = parse_schema(source).unwrap_err();
1248        assert_eq!(err.kind, SchemaErrorKind::DuplicateAlias { name: "A".to_string() });
1249    }
1250
1251    // Recursive alias is an error.
1252    #[test]
1253    fn alias_recursive_direct() {
1254        let source = "(\n  x: Foo,\n)\ntype Foo = Option(Foo)";
1255        let err = parse_schema(source).unwrap_err();
1256        assert_eq!(err.kind, SchemaErrorKind::RecursiveAlias { name: "Foo".to_string() });
1257    }
1258
1259    // Indirect recursive alias is an error.
1260    #[test]
1261    fn alias_recursive_indirect() {
1262        let source = "(\n  x: Foo,\n)\ntype Foo = Option(Bar)\ntype Bar = [Foo]";
1263        let err = parse_schema(source).unwrap_err();
1264        assert!(matches!(err.kind, SchemaErrorKind::RecursiveAlias { .. }));
1265    }
1266
1267    // ========================================================
1268    // Map type tests — parsing
1269    // ========================================================
1270
1271    // Parses a map type with String keys and Integer values.
1272    #[test]
1273    fn parse_type_map_string_to_integer() {
1274        let mut p = parser("{String: Integer}");
1275        let t = p.parse_type().unwrap();
1276        assert_eq!(
1277            t.value,
1278            SchemaType::Map(Box::new(SchemaType::String), Box::new(SchemaType::Integer))
1279        );
1280    }
1281
1282    // Parses a map type with Integer keys.
1283    #[test]
1284    fn parse_type_map_integer_keys() {
1285        let mut p = parser("{Integer: String}");
1286        let t = p.parse_type().unwrap();
1287        assert_eq!(
1288            t.value,
1289            SchemaType::Map(Box::new(SchemaType::Integer), Box::new(SchemaType::String))
1290        );
1291    }
1292
1293    // Map type field in a schema.
1294    #[test]
1295    fn schema_map_field() {
1296        let source = "(\n  attrs: {String: Integer},\n)";
1297        let schema = parse_schema(source).unwrap();
1298        assert_eq!(
1299            schema.root.fields[0].type_.value,
1300            SchemaType::Map(Box::new(SchemaType::String), Box::new(SchemaType::Integer))
1301        );
1302    }
1303
1304    // Map with enum key type is allowed.
1305    #[test]
1306    fn schema_map_enum_key() {
1307        let source = "(\n  scores: {Stat: Integer},\n)\nenum Stat { Str, Dex, Con }";
1308        let schema = parse_schema(source).unwrap();
1309        assert_eq!(
1310            schema.root.fields[0].type_.value,
1311            SchemaType::Map(Box::new(SchemaType::EnumRef("Stat".to_string())), Box::new(SchemaType::Integer))
1312        );
1313    }
1314
1315    // Map with Float key type is rejected.
1316    #[test]
1317    fn schema_map_float_key_rejected() {
1318        let source = "(\n  bad: {Float: String},\n)";
1319        let err = parse_schema(source).unwrap_err();
1320        assert!(matches!(err.kind, SchemaErrorKind::InvalidMapKeyType { .. }));
1321    }
1322
1323    // Map with Bool key type is rejected.
1324    #[test]
1325    fn schema_map_bool_key_rejected() {
1326        let source = "(\n  bad: {Bool: String},\n)";
1327        let err = parse_schema(source).unwrap_err();
1328        assert!(matches!(err.kind, SchemaErrorKind::InvalidMapKeyType { .. }));
1329    }
1330
1331    // ========================================================
1332    // Tuple type tests — parsing
1333    // ========================================================
1334
1335    // Parses a tuple type with two elements.
1336    #[test]
1337    fn parse_type_tuple() {
1338        let mut p = parser("(Float, Float)");
1339        let t = p.parse_type().unwrap();
1340        assert_eq!(t.value, SchemaType::Tuple(vec![SchemaType::Float, SchemaType::Float]));
1341    }
1342
1343    // Parses a tuple type with mixed types.
1344    #[test]
1345    fn parse_type_tuple_mixed() {
1346        let mut p = parser("(String, Integer, Bool)");
1347        let t = p.parse_type().unwrap();
1348        assert_eq!(
1349            t.value,
1350            SchemaType::Tuple(vec![SchemaType::String, SchemaType::Integer, SchemaType::Bool])
1351        );
1352    }
1353
1354    // Tuple type in a schema field.
1355    #[test]
1356    fn schema_tuple_field() {
1357        let source = "(\n  pos: (Float, Float),\n)";
1358        let schema = parse_schema(source).unwrap();
1359        assert_eq!(
1360            schema.root.fields[0].type_.value,
1361            SchemaType::Tuple(vec![SchemaType::Float, SchemaType::Float])
1362        );
1363    }
1364
1365    // Inline struct still works after tuple disambiguation.
1366    #[test]
1367    fn schema_struct_still_works() {
1368        let source = "(\n  cost: (generic: Integer,),\n)";
1369        let schema = parse_schema(source).unwrap();
1370        if let SchemaType::Struct(s) = &schema.root.fields[0].type_.value {
1371            assert_eq!(s.fields[0].name.value, "generic");
1372        } else {
1373            panic!("expected Struct");
1374        }
1375    }
1376
1377    // Empty parens still parse as empty struct.
1378    #[test]
1379    fn schema_empty_parens_is_struct() {
1380        let source = "(\n  empty: (),\n)";
1381        let schema = parse_schema(source).unwrap();
1382        assert!(matches!(schema.root.fields[0].type_.value, SchemaType::Struct(_)));
1383    }
1384
1385    // ========================================================
1386    // Enum variants with data — parsing
1387    // ========================================================
1388
1389    // Parses enum with data variants.
1390    #[test]
1391    fn parse_enum_data_variant() {
1392        let source = "enum Effect { Damage(Integer), Heal(Integer), Draw }";
1393        let schema = parse_schema(source).unwrap();
1394        let effect = schema.enums.get("Effect").unwrap();
1395        assert_eq!(effect.variants.get("Damage"), Some(&Some(SchemaType::Integer)));
1396        assert_eq!(effect.variants.get("Heal"), Some(&Some(SchemaType::Integer)));
1397        assert_eq!(effect.variants.get("Draw"), Some(&None));
1398    }
1399
1400    // Enum with struct data variant.
1401    #[test]
1402    fn parse_enum_struct_data_variant() {
1403        let source = "enum Action { Move((Integer, Integer)), Wait }";
1404        let schema = parse_schema(source).unwrap();
1405        let action = schema.enums.get("Action").unwrap();
1406        assert!(matches!(action.variants.get("Move"), Some(Some(SchemaType::Tuple(_)))));
1407        assert_eq!(action.variants.get("Wait"), Some(&None));
1408    }
1409}