1use crate::span::{Position, Span, Spanned};
6use crate::error::{SchemaParseError, SchemaErrorKind};
7use super::{SchemaType, FieldDef, StructDef, EnumDef, HashSet, Schema, HashMap};
8
9#[derive(Debug)]
10struct Parser<'a> {
11 source: &'a str,
12 bytes: &'a [u8],
13 offset: usize,
14 line: usize,
15 column: usize,
16}
17
18impl<'a> Parser<'a> {
19 fn new(source: &'a str) -> Self {
20 Self { source, bytes: source.as_bytes(), offset: 0, line: 1, column: 1 }
21 }
22
23 fn position(&self) -> Position {
24 Position { offset: self.offset, line: self.line, column: self.column }
25 }
26
27 fn peek(&self) -> Option<u8> {
28 self.bytes.get(self.offset).copied()
29 }
30
31 fn advance(&mut self) {
32 if let Some(byte) = self.peek() {
33 if byte == b'\n'{
34 self.column = 1;
35 self.line += 1;
36 } else {
37 self.column += 1;
38 }
39 self.offset += 1;
40 }
41 }
42
43 fn skip_whitespace(&mut self) {
44 loop {
45 match self.peek() {
46 Some(b' ' | b'\t' | b'\n' | b'\r') => self.advance(),
47 Some(b'/') if self.bytes.get(self.offset + 1) == Some(&b'/') => {
48 while self.peek().is_some_and(|b| b != b'\n') {
49 self.advance();
50 }
51 }
52 _ => break,
53 }
54 }
55 }
56
57 fn expect_char(&mut self, expected: u8) -> Result<(), SchemaParseError> {
58 let start = self.position();
59 match self.peek() {
60 Some(b) if b == expected => {
61 self.advance();
62 Ok(())
63 },
64 Some(b) => {
65 self.advance();
66 let end = self.position();
67 Err(SchemaParseError {
68 span: Span {
69 start,
70 end
71 },
72 kind: SchemaErrorKind::UnexpectedToken {
73 expected: format!("'{}'", expected as char),
74 found: format!("'{}'", b as char)
75 }
76 })
77 },
78 None => {
79 Err(SchemaParseError {
80 span: Span {
81 start,
82 end: start
83 },
84 kind: SchemaErrorKind::UnexpectedToken {
85 expected: format!("'{}'", expected as char),
86 found: "end of input".to_string()
87 }
88 })
89 }
90 }
91 }
92
93 fn parse_identifier(&mut self) -> Result<Spanned<String>, SchemaParseError> {
94 let start = self.position();
95
96 match self.peek() {
98 Some(b) if b.is_ascii_alphabetic() || b == b'_' => {},
99 Some(b) => {
100 self.advance();
101 let end = self.position();
102 return Err(SchemaParseError {
103 span: Span { start, end },
104 kind: SchemaErrorKind::UnexpectedToken {
105 expected: "identifier".to_string(),
106 found: format!("'{}'", b as char),
107 },
108 });
109 },
110 None => {
111 return Err(SchemaParseError {
112 span: Span { start, end: start },
113 kind: SchemaErrorKind::UnexpectedToken {
114 expected: "identifier".to_string(),
115 found: "end of input".to_string(),
116 },
117 });
118 },
119 }
120
121 while self.peek().is_some_and(|b| b.is_ascii_alphanumeric() || b == b'_') {
123 self.advance();
124 }
125
126 let end = self.position();
128 Ok(Spanned {
129 value: self.source[start.offset..end.offset].to_string(),
130 span: Span { start, end },
131 })
132 }
133
134 fn parse_type(&mut self) -> Result<Spanned<SchemaType>, SchemaParseError> {
135 self.skip_whitespace();
136 let start = self.position();
137
138 match self.peek() {
139 Some(b'[') => {
140 self.advance();
142 self.skip_whitespace();
143 let inner = self.parse_type()?;
144 self.skip_whitespace();
145 self.expect_char(b']')?;
146 let end = self.position();
147 Ok(Spanned {
148 value: SchemaType::List(Box::new(inner.value)),
149 span: Span { start, end },
150 })
151 }
152 Some(b'(') => {
153 let struct_def = self.parse_struct()?;
154 let end = self.position();
155 Ok(Spanned {
156 value: SchemaType::Struct(struct_def),
157 span: Span { start, end },
158 })
159 }
160 Some(b) if b.is_ascii_alphabetic() => {
161 let id = self.parse_identifier()?;
163 match id.value.as_str() {
164 "String" => Ok(Spanned { value: SchemaType::String, span: id.span }),
165 "Integer" => Ok(Spanned { value: SchemaType::Integer, span: id.span }),
166 "Float" => Ok(Spanned { value: SchemaType::Float, span: id.span }),
167 "Bool" => Ok(Spanned { value: SchemaType::Bool, span: id.span }),
168 "Option" => {
169 self.skip_whitespace();
171 self.expect_char(b'(')?;
172 self.skip_whitespace();
173 let inner = self.parse_type()?;
174 self.skip_whitespace();
175 self.expect_char(b')')?;
176 let end = self.position();
177 Ok(Spanned {
178 value: SchemaType::Option(Box::new(inner.value)),
179 span: Span { start, end },
180 })
181 }
182 _ => Ok(Spanned { value: SchemaType::EnumRef(id.value), span: id.span }),
183 }
184 }
185 Some(b) => {
186 self.advance();
188 let end = self.position();
189 Err(SchemaParseError {
190 span: Span { start, end },
191 kind: SchemaErrorKind::UnexpectedToken {
192 expected: "type".to_string(),
193 found: format!("'{}'", b as char),
194 },
195 })
196 }
197 None => {
198 Err(SchemaParseError {
199 span: Span { start, end: start },
200 kind: SchemaErrorKind::UnexpectedToken {
201 expected: "type".to_string(),
202 found: "end of input".to_string(),
203 },
204 })
205 }
206 }
207 }
208
209 fn parse_field(&mut self) -> Result<FieldDef, SchemaParseError> {
210 self.skip_whitespace();
211 let name = self.parse_identifier()?;
212 self.skip_whitespace();
213 self.expect_char(b':')?;
214 self.skip_whitespace();
215 let type_ = self.parse_type()?;
216 Ok(FieldDef{
217 name,
218 type_
219 })
220 }
221
222 fn parse_struct(&mut self) -> Result<StructDef, SchemaParseError> {
223 self.skip_whitespace();
224 self.expect_char(b'(')?;
225 let mut fields: Vec<FieldDef> = Vec::new();
226 loop {
227 self.skip_whitespace();
228 if let Some(byte) = self.peek() {
229 if byte == b')' {
230 break ;
231 }
232 let field = self.parse_field()?;
233 fields.push(field);
234 self.skip_whitespace();
235 if self.peek() == Some(b',') {
236 self.advance();
237 }
238 } else {
239 return Err(SchemaParseError {
240 span: Span { start: self.position(), end: self.position() },
241 kind: SchemaErrorKind::UnexpectedToken { expected: ")".to_string(), found: "end of file".to_string() }
242 });
243 }
244 }
245 self.expect_char(b')')?;
246 Ok(StructDef { fields })
247 }
248
249 fn parse_enum_def(&mut self) -> Result<EnumDef, SchemaParseError> {
250 self.skip_whitespace();
251 let keyword = self.parse_identifier()?;
252 if keyword.value != "enum" {
253 return Err(SchemaParseError {
254 span: keyword.span,
255 kind: SchemaErrorKind::UnexpectedToken {
256 expected: "\"enum\"".to_string(),
257 found: keyword.value,
258 },
259 });
260 }
261 self.skip_whitespace();
262 let name = self.parse_identifier()?;
263 self.skip_whitespace();
264 self.expect_char(b'{')?;
265 let mut variants = HashSet::new();
266 loop {
267 self.skip_whitespace();
268 if let Some(byte) = self.peek() {
269 if byte == b'}' {
270 break ;
271 }
272 let variant = self.parse_identifier()?;
273 variants.insert(variant.value);
274 self.skip_whitespace();
275 if self.peek() == Some(b',') {
276 self.advance();
277 }
278 } else {
279 return Err(SchemaParseError {
280 span: Span { start: self.position(), end: self.position() },
281 kind: SchemaErrorKind::UnexpectedToken { expected: "}".to_string(), found: "end of file".to_string() }
282 });
283 }
284 }
285
286 self.expect_char(b'}')?;
287 Ok(EnumDef { name: name.value, variants })
288 }
289}
290
291pub fn parse_schema(source: &str) -> Result<Schema, SchemaParseError> {
298 let mut parser = Parser::new(source);
299 parser.skip_whitespace();
300
301 let root = if parser.peek() == Some(b'(') {
302 parser.parse_struct()?
303 } else {
304 StructDef { fields: Vec::new() }
305 };
306
307 let mut enums: HashMap<String, EnumDef> = HashMap::new();
308 loop {
309 parser.skip_whitespace();
310 if parser.peek().is_none() {
311 break;
312 }
313 let enum_def = parser.parse_enum_def()?;
314 if let Some(old) = enums.insert(enum_def.name.clone(), enum_def) {
315 return Err(SchemaParseError {
316 span: Span { start: parser.position(), end: parser.position() },
317 kind: SchemaErrorKind::DuplicateEnum { name: old.name },
318 });
319 }
320 }
321
322 verify_enum_refs(&root, &enums)?;
323
324 Ok(Schema { root, enums })
325}
326
327fn verify_enum_refs(
328 struct_def: &StructDef,
329 enums: &HashMap<String, EnumDef>,
330) -> Result<(), SchemaParseError> {
331 for field in &struct_def.fields {
332 verify_type_enum_refs(&field.type_, enums)?;
333 }
334 Ok(())
335}
336
337fn verify_type_enum_refs(
338 spanned_type: &Spanned<SchemaType>,
339 enums: &HashMap<String, EnumDef>,
340) -> Result<(), SchemaParseError> {
341 check_schema_type(&spanned_type.value, spanned_type.span, enums)
342}
343
344fn check_schema_type(
345 schema_type: &SchemaType,
346 span: Span,
347 enums: &HashMap<String, EnumDef>,
348) -> Result<(), SchemaParseError> {
349 match schema_type {
350 SchemaType::EnumRef(name) => {
351 if !enums.contains_key(name) {
352 return Err(SchemaParseError {
353 span,
354 kind: SchemaErrorKind::UnresolvedEnum { name: name.clone() },
355 });
356 }
357 }
358 SchemaType::Option(inner) | SchemaType::List(inner) => {
359 check_schema_type(inner, span, enums)?;
360 }
361 SchemaType::Struct(struct_def) => {
362 verify_enum_refs(struct_def, enums)?;
363 }
364 _ => {}
365 }
366 Ok(())
367}
368
369#[cfg(test)]
370mod tests {
371 use super::*;
372
373 fn parser(source: &str) -> Parser<'_> {
378 Parser::new(source)
379 }
380
381 #[test]
387 fn peek_returns_current_byte() {
388 let p = parser("abc");
389 assert_eq!(p.peek(), Some(b'a'));
390 }
391
392 #[test]
394 fn peek_returns_none_at_end() {
395 let p = parser("");
396 assert_eq!(p.peek(), None);
397 }
398
399 #[test]
405 fn advance_increments_offset_and_column() {
406 let mut p = parser("ab");
407 p.advance();
408 assert_eq!(p.offset, 1);
409 assert_eq!(p.column, 2);
410 assert_eq!(p.peek(), Some(b'b'));
411 }
412
413 #[test]
415 fn advance_past_newline_increments_line() {
416 let mut p = parser("a\nb");
417 p.advance(); p.advance(); assert_eq!(p.line, 2);
420 assert_eq!(p.column, 1);
421 }
422
423 #[test]
425 fn advance_at_end_is_noop() {
426 let mut p = parser("");
427 p.advance();
428 assert_eq!(p.offset, 0);
429 }
430
431 #[test]
437 fn position_initial_state() {
438 let p = parser("abc");
439 let pos = p.position();
440 assert_eq!(pos.offset, 0);
441 assert_eq!(pos.line, 1);
442 assert_eq!(pos.column, 1);
443 }
444
445 #[test]
447 fn position_after_advance() {
448 let mut p = parser("ab\nc");
449 p.advance(); p.advance(); p.advance(); let pos = p.position();
453 assert_eq!(pos.offset, 3);
454 assert_eq!(pos.line, 2);
455 assert_eq!(pos.column, 1);
456 }
457
458 #[test]
464 fn skip_whitespace_skips_spaces_tabs_newlines() {
465 let mut p = parser(" \t\nabc");
466 p.skip_whitespace();
467 assert_eq!(p.peek(), Some(b'a'));
468 }
469
470 #[test]
472 fn skip_whitespace_skips_line_comment() {
473 let mut p = parser("// comment\nabc");
474 p.skip_whitespace();
475 assert_eq!(p.peek(), Some(b'a'));
476 }
477
478 #[test]
480 fn skip_whitespace_skips_comment_then_whitespace() {
481 let mut p = parser("// comment\n abc");
482 p.skip_whitespace();
483 assert_eq!(p.peek(), Some(b'a'));
484 }
485
486 #[test]
488 fn skip_whitespace_noop_on_nonwhitespace() {
489 let mut p = parser("abc");
490 p.skip_whitespace();
491 assert_eq!(p.offset, 0);
492 }
493
494 #[test]
500 fn expect_char_consumes_matching_byte() {
501 let mut p = parser("(abc");
502 assert!(p.expect_char(b'(').is_ok());
503 assert_eq!(p.peek(), Some(b'a'));
504 }
505
506 #[test]
508 fn expect_char_error_on_mismatch() {
509 let mut p = parser("abc");
510 let err = p.expect_char(b'(').unwrap_err();
511 assert!(matches!(err.kind, SchemaErrorKind::UnexpectedToken { .. }));
512 }
513
514 #[test]
516 fn expect_char_error_at_end_of_input() {
517 let mut p = parser("");
518 let err = p.expect_char(b'(').unwrap_err();
519 match err.kind {
520 SchemaErrorKind::UnexpectedToken { found, .. } => {
521 assert_eq!(found, "end of input");
522 }
523 other => panic!("expected UnexpectedToken, got {:?}", other),
524 }
525 }
526
527 #[test]
533 fn parse_identifier_reads_alpha() {
534 let mut p = parser("name:");
535 let id = p.parse_identifier().unwrap();
536 assert_eq!(id.value, "name");
537 }
538
539 #[test]
541 fn parse_identifier_reads_snake_case() {
542 let mut p = parser("field_name:");
543 let id = p.parse_identifier().unwrap();
544 assert_eq!(id.value, "field_name");
545 }
546
547 #[test]
549 fn parse_identifier_reads_alphanumeric() {
550 let mut p = parser("cost2:");
551 let id = p.parse_identifier().unwrap();
552 assert_eq!(id.value, "cost2");
553 }
554
555 #[test]
557 fn parse_identifier_reads_pascal_case() {
558 let mut p = parser("CardType ");
559 let id = p.parse_identifier().unwrap();
560 assert_eq!(id.value, "CardType");
561 }
562
563 #[test]
565 fn parse_identifier_stops_at_delimiter() {
566 let mut p = parser("name: String");
567 let id = p.parse_identifier().unwrap();
568 assert_eq!(id.value, "name");
569 assert_eq!(p.peek(), Some(b':'));
570 }
571
572 #[test]
574 fn parse_identifier_span_is_correct() {
575 let mut p = parser("name:");
576 let id = p.parse_identifier().unwrap();
577 assert_eq!(id.span.start.offset, 0);
578 assert_eq!(id.span.end.offset, 4);
579 }
580
581 #[test]
583 fn parse_identifier_error_on_digit_start() {
584 let mut p = parser("42abc");
585 assert!(p.parse_identifier().is_err());
586 }
587
588 #[test]
590 fn parse_identifier_error_at_end_of_input() {
591 let mut p = parser("");
592 assert!(p.parse_identifier().is_err());
593 }
594
595 #[test]
601 fn parse_type_string() {
602 let mut p = parser("String");
603 let t = p.parse_type().unwrap();
604 assert_eq!(t.value, SchemaType::String);
605 }
606
607 #[test]
609 fn parse_type_integer() {
610 let mut p = parser("Integer");
611 let t = p.parse_type().unwrap();
612 assert_eq!(t.value, SchemaType::Integer);
613 }
614
615 #[test]
617 fn parse_type_float() {
618 let mut p = parser("Float");
619 let t = p.parse_type().unwrap();
620 assert_eq!(t.value, SchemaType::Float);
621 }
622
623 #[test]
625 fn parse_type_bool() {
626 let mut p = parser("Bool");
627 let t = p.parse_type().unwrap();
628 assert_eq!(t.value, SchemaType::Bool);
629 }
630
631 #[test]
633 fn parse_type_list() {
634 let mut p = parser("[String]");
635 let t = p.parse_type().unwrap();
636 assert_eq!(t.value, SchemaType::List(Box::new(SchemaType::String)));
637 }
638
639 #[test]
641 fn parse_type_option() {
642 let mut p = parser("Option(Integer)");
643 let t = p.parse_type().unwrap();
644 assert_eq!(t.value, SchemaType::Option(Box::new(SchemaType::Integer)));
645 }
646
647 #[test]
649 fn parse_type_enum_ref() {
650 let mut p = parser("Faction");
651 let t = p.parse_type().unwrap();
652 assert_eq!(t.value, SchemaType::EnumRef("Faction".to_string()));
653 }
654
655 #[test]
657 fn parse_type_nested_list_of_option() {
658 let mut p = parser("[Option(String)]");
659 let t = p.parse_type().unwrap();
660 assert_eq!(
661 t.value,
662 SchemaType::List(Box::new(SchemaType::Option(Box::new(SchemaType::String))))
663 );
664 }
665
666 #[test]
668 fn parse_type_inline_struct() {
669 let mut p = parser("(\n x: Integer,\n)");
670 let t = p.parse_type().unwrap();
671 if let SchemaType::Struct(s) = &t.value {
672 assert_eq!(s.fields.len(), 1);
673 assert_eq!(s.fields[0].name.value, "x");
674 } else {
675 panic!("expected SchemaType::Struct");
676 }
677 }
678
679 #[test]
681 fn parse_type_error_on_unexpected_token() {
682 let mut p = parser("42");
683 let err = p.parse_type().unwrap_err();
684 match err.kind {
685 SchemaErrorKind::UnexpectedToken { expected, .. } => {
686 assert_eq!(expected, "type");
687 }
688 other => panic!("expected UnexpectedToken, got {:?}", other),
689 }
690 }
691
692 #[test]
698 fn parse_field_name_and_type() {
699 let mut p = parser("name: String,");
700 let f = p.parse_field().unwrap();
701 assert_eq!(f.name.value, "name");
702 assert_eq!(f.type_.value, SchemaType::String);
703 }
704
705 #[test]
707 fn parse_field_error_missing_colon() {
708 let mut p = parser("name String");
709 let err = p.parse_field().unwrap_err();
710 assert!(matches!(err.kind, SchemaErrorKind::UnexpectedToken { .. }));
711 }
712
713 #[test]
719 fn parse_struct_empty() {
720 let mut p = parser("()");
721 let s = p.parse_struct().unwrap();
722 assert!(s.fields.is_empty());
723 }
724
725 #[test]
727 fn parse_struct_single_field() {
728 let mut p = parser("(\n name: String,\n)");
729 let s = p.parse_struct().unwrap();
730 assert_eq!(s.fields.len(), 1);
731 assert_eq!(s.fields[0].name.value, "name");
732 }
733
734 #[test]
736 fn parse_struct_multiple_fields() {
737 let mut p = parser("(\n a: String,\n b: Integer,\n)");
738 let s = p.parse_struct().unwrap();
739 assert_eq!(s.fields.len(), 2);
740 }
741
742 #[test]
744 fn parse_struct_no_trailing_comma() {
745 let mut p = parser("(\n name: String\n)");
746 let s = p.parse_struct().unwrap();
747 assert_eq!(s.fields.len(), 1);
748 }
749
750 #[test]
752 fn parse_struct_error_on_unclosed() {
753 let mut p = parser("(\n name: String,\n");
754 assert!(p.parse_struct().is_err());
755 }
756
757 #[test]
763 fn parse_enum_def_simple() {
764 let mut p = parser("enum Dir { North, South }");
765 let e = p.parse_enum_def().unwrap();
766 assert_eq!(e.name, "Dir");
767 assert_eq!(e.variants.len(), 2);
768 assert!(e.variants.contains("North"));
769 assert!(e.variants.contains("South"));
770 }
771
772 #[test]
774 fn parse_enum_def_trailing_comma() {
775 let mut p = parser("enum Dir { North, South, }");
776 let e = p.parse_enum_def().unwrap();
777 assert_eq!(e.variants.len(), 2);
778 }
779
780 #[test]
782 fn parse_enum_def_single_variant() {
783 let mut p = parser("enum Single { Only }");
784 let e = p.parse_enum_def().unwrap();
785 assert_eq!(e.variants.len(), 1);
786 }
787
788 #[test]
790 fn parse_enum_def_error_wrong_keyword() {
791 let mut p = parser("struct Dir { North }");
792 let err = p.parse_enum_def().unwrap_err();
793 assert!(matches!(err.kind, SchemaErrorKind::UnexpectedToken { .. }));
794 }
795
796 #[test]
798 fn parse_enum_def_error_on_unclosed() {
799 let mut p = parser("enum Dir { North, South");
800 assert!(p.parse_enum_def().is_err());
801 }
802
803 #[test]
809 fn schema_empty_input() {
810 let schema = parse_schema("").unwrap();
811 assert!(schema.root.fields.is_empty());
812 }
813
814 #[test]
816 fn schema_empty_input_no_enums() {
817 let schema = parse_schema("").unwrap();
818 assert!(schema.enums.is_empty());
819 }
820
821 #[test]
823 fn schema_enum_ref_resolves() {
824 let source = "(\n faction: Faction,\n)\nenum Faction { Sentinels, Reavers }";
825 let schema = parse_schema(source).unwrap();
826 assert_eq!(schema.root.fields[0].type_.value, SchemaType::EnumRef("Faction".to_string()));
827 }
828
829 #[test]
831 fn schema_multiple_enums_stored() {
832 let source = "enum A { X }\nenum B { Y }";
833 let schema = parse_schema(source).unwrap();
834 assert_eq!(schema.enums.len(), 2);
835 }
836
837 #[test]
839 fn schema_comments_before_root() {
840 let source = "// comment\n(\n name: String,\n)";
841 let schema = parse_schema(source).unwrap();
842 assert_eq!(schema.root.fields.len(), 1);
843 }
844
845 #[test]
847 fn schema_inline_comment_after_field() {
848 let source = "(\n name: String, // a name\n)";
849 let schema = parse_schema(source).unwrap();
850 assert_eq!(schema.root.fields[0].name.value, "name");
851 }
852
853 #[test]
855 fn schema_unresolved_enum_ref() {
856 let err = parse_schema("(\n f: Faction,\n)").unwrap_err();
857 assert_eq!(err.kind, SchemaErrorKind::UnresolvedEnum { name: "Faction".to_string() });
858 }
859
860 #[test]
862 fn schema_unresolved_enum_ref_in_option() {
863 let err = parse_schema("(\n t: Option(Timing),\n)").unwrap_err();
864 assert_eq!(err.kind, SchemaErrorKind::UnresolvedEnum { name: "Timing".to_string() });
865 }
866
867 #[test]
869 fn schema_unresolved_enum_ref_in_list() {
870 let err = parse_schema("(\n t: [CardType],\n)").unwrap_err();
871 assert_eq!(err.kind, SchemaErrorKind::UnresolvedEnum { name: "CardType".to_string() });
872 }
873
874 #[test]
876 fn schema_duplicate_enum_name() {
877 let err = parse_schema("enum A { X }\nenum A { Y }").unwrap_err();
878 assert_eq!(err.kind, SchemaErrorKind::DuplicateEnum { name: "A".to_string() });
879 }
880}