1use std::path::Path;
4use indexmap::IndexMap;
5use crate::{Error, Result, Value, Schema, Field, FieldType, Union, Variant};
6use crate::types::ObjectMap;
7use crate::lexer::{Token, TokenKind, Lexer};
8
9const MAX_PARSE_DEPTH: usize = 256;
12
13pub struct Parser {
14 tokens: Vec<Token>,
15 pos: usize,
16 schemas: IndexMap<String, Schema>,
17 unions: IndexMap<String, Union>,
18 base_path: Option<std::path::PathBuf>,
19 include_stack: Vec<std::path::PathBuf>,
21 is_root_array: bool,
23}
24
25impl Parser {
26 pub fn new(tokens: Vec<Token>) -> Self {
27 Self {
28 tokens,
29 pos: 0,
30 schemas: IndexMap::new(),
31 unions: IndexMap::new(),
32 base_path: None,
33 include_stack: Vec::new(),
34 is_root_array: false,
35 }
36 }
37
38 pub fn with_base_path(mut self, path: &Path) -> Self {
39 self.base_path = path.parent().map(|p| p.to_path_buf());
40 self
41 }
42
43 pub fn parse(&mut self) -> Result<IndexMap<String, Value>> {
44 let mut result = IndexMap::new();
45
46 while !self.at_end() {
47 match self.current_kind() {
48 TokenKind::Directive(d) => {
49 let directive = d.clone();
50 self.advance();
51 match directive.as_str() {
52 "struct" => self.parse_struct_def()?,
53 "union" => self.parse_union_def()?,
54 "include" => {
55 let included = self.parse_include()?;
56 for (k, v) in included {
57 result.insert(k, v);
58 }
59 }
60 "root-array" => {
61 self.is_root_array = true;
63 }
64 _ => {
65 let directive_line = self.tokens[self.pos - 1].line;
70 if !self.at_end()
71 && self.current().line == directive_line
72 && self.can_start_value()
73 {
74 let _ = self.parse_value(0)?;
75 }
76 }
77 }
78 }
79 TokenKind::Word(_) | TokenKind::String(_) => {
80 let (key, value) = self.parse_pair(0)?;
81 result.insert(key, value);
82 }
83 TokenKind::Ref(r) => {
84 let ref_name = r.clone();
85 self.advance();
86 self.expect(TokenKind::Colon)?;
87 let value = self.parse_value(0)?;
88 result.insert(format!("!{}", ref_name), value);
89 }
90 TokenKind::Eof => break,
91 _ => { self.advance(); }
92 }
93 }
94
95 Ok(result)
96 }
97
98 pub fn into_schemas(self) -> IndexMap<String, Schema> {
99 self.schemas
100 }
101
102 pub fn into_unions(self) -> IndexMap<String, Union> {
103 self.unions
104 }
105
106 pub fn into_schemas_and_unions(self) -> (IndexMap<String, Schema>, IndexMap<String, Union>) {
108 (self.schemas, self.unions)
109 }
110
111 pub fn is_root_array(&self) -> bool {
113 self.is_root_array
114 }
115
116 fn parse_struct_def(&mut self) -> Result<()> {
121 let name = self.expect_word()?;
122 self.expect(TokenKind::LParen)?;
123
124 let mut schema = Schema::new(&name);
125
126 while !self.check(TokenKind::RParen) {
127 let field_name = match self.current_kind() {
129 TokenKind::Word(w) => {
130 let w = w.clone();
131 self.advance();
132 w
133 }
134 _ => return Err(Error::UnexpectedToken {
135 expected: "field name".to_string(),
136 got: format!("{:?}", self.current_kind()),
137 }),
138 };
139
140 let field_type = if self.check(TokenKind::Colon) {
141 self.advance();
142 self.parse_field_type()?
143 } else {
144 FieldType::new("string")
145 };
146
147 schema.add_field(field_name, field_type);
148
149 if self.check(TokenKind::Comma) {
150 self.advance();
151 }
152 }
153
154 self.expect(TokenKind::RParen)?;
155 self.schemas.insert(name, schema);
156 Ok(())
157 }
158
159 fn parse_union_def(&mut self) -> Result<()> {
164 let name = self.expect_word()?;
165 self.expect(TokenKind::LBrace)?;
166
167 let mut union_type = Union::new(&name);
168
169 while !self.check(TokenKind::RBrace) {
170 let variant_name = self.expect_word()?;
171 self.expect(TokenKind::LParen)?;
172
173 let mut variant = Variant::new(&variant_name);
174
175 while !self.check(TokenKind::RParen) {
176 let field_name = self.expect_word()?;
177
178 let field_type = if self.check(TokenKind::Colon) {
179 self.advance();
180 self.parse_field_type()?
181 } else {
182 FieldType::new("string")
183 };
184
185 variant.fields.push(Field::new(field_name, field_type));
186
187 if self.check(TokenKind::Comma) {
188 self.advance();
189 }
190 }
191
192 self.expect(TokenKind::RParen)?;
193 union_type.add_variant(variant);
194
195 if self.check(TokenKind::Comma) {
196 self.advance();
197 }
198 }
199
200 self.expect(TokenKind::RBrace)?;
201 self.unions.insert(name, union_type);
202 Ok(())
203 }
204
205 fn parse_include(&mut self) -> Result<IndexMap<String, Value>> {
210 let path_str = match self.current_kind() {
211 TokenKind::String(s) => s.clone(),
212 TokenKind::Word(w) => w.clone(),
213 _ => return Err(Error::UnexpectedToken {
214 expected: "file path".to_string(),
215 got: format!("{:?}", self.current_kind()),
216 }),
217 };
218 self.advance();
219
220 let include_path = if let Some(ref base) = self.base_path {
222 base.join(&path_str)
223 } else {
224 std::path::PathBuf::from(&path_str)
225 };
226
227 let canonical = include_path.canonicalize()
229 .unwrap_or_else(|_| include_path.clone());
230 if self.include_stack.contains(&canonical) {
231 return Err(Error::ParseError(format!(
232 "Circular include detected: {}", canonical.display()
233 )));
234 }
235 if self.include_stack.len() >= 32 {
236 return Err(Error::ParseError(
237 "Include depth exceeds limit of 32".into()
238 ));
239 }
240
241 let content = std::fs::read_to_string(&include_path)
243 .map_err(|e| Error::ParseError(format!("Failed to include {}: {}", path_str, e)))?;
244
245 let tokens = Lexer::new(&content).tokenize()?;
246 let mut parser = Parser::new(tokens);
247 if let Some(parent) = include_path.parent() {
248 parser.base_path = Some(parent.to_path_buf());
249 }
250 parser.include_stack = self.include_stack.clone();
253 parser.include_stack.push(canonical);
254 parser.schemas = self.schemas.clone();
255 parser.unions = self.unions.clone();
256
257 let data = parser.parse()?;
258
259 for (name, schema) in parser.schemas {
261 self.schemas.insert(name, schema);
262 }
263 for (name, union_type) in parser.unions {
264 self.unions.insert(name, union_type);
265 }
266
267 Ok(data)
268 }
269
270 fn parse_field_type(&mut self) -> Result<FieldType> {
271 let mut type_str = String::new();
272
273 if self.check(TokenKind::LBracket) {
275 self.advance();
276 self.expect(TokenKind::RBracket)?;
277 type_str.push_str("[]");
278 }
279
280 let base = self.expect_word()?;
282
283 match base.as_str() {
285 "object" | "map" | "tuple" | "ref" | "tagged" => {
286 return Err(Error::ParseError(
287 format!("'{}' is a value type and cannot be used as a schema field type", base)
288 ));
289 }
290 _ => {}
291 }
292
293 type_str.push_str(&base);
294
295 if self.check(TokenKind::Question) {
297 self.advance();
298 type_str.push('?');
299 }
300
301 Ok(FieldType::parse(&type_str))
302 }
303
304 fn parse_pair(&mut self, depth: usize) -> Result<(String, Value)> {
309 let key = match self.current_kind() {
310 TokenKind::Word(w) => w.clone(),
311 TokenKind::String(s) => s.clone(),
312 _ => return Err(Error::UnexpectedToken {
313 expected: "key".to_string(),
314 got: format!("{:?}", self.current_kind()),
315 }),
316 };
317 self.advance();
318 self.expect(TokenKind::Colon)?;
319 let value = self.parse_value(depth)?;
320 Ok((key, value))
321 }
322
323 fn parse_value(&mut self, depth: usize) -> Result<Value> {
328 if depth > MAX_PARSE_DEPTH {
329 return Err(Error::ParseError("maximum parse nesting depth exceeded".into()));
330 }
331 match self.current_kind() {
332 TokenKind::Null => { self.advance(); Ok(Value::Null) }
333 TokenKind::Bool(b) => { let b = *b; self.advance(); Ok(Value::Bool(b)) }
334 TokenKind::Int(i) => { let i = *i; self.advance(); Ok(Value::Int(i)) }
335 TokenKind::UInt(u) => { let u = *u; self.advance(); Ok(Value::UInt(u)) }
336 TokenKind::JsonNumber(s) => { let s = s.clone(); self.advance(); Ok(Value::JsonNumber(s)) }
337 TokenKind::Float(f) => { let f = *f; self.advance(); Ok(Value::Float(f)) }
338 TokenKind::String(s) => { let s = s.clone(); self.advance(); Ok(Value::String(s)) }
339 TokenKind::Bytes(b) => { let b = b.clone(); self.advance(); Ok(Value::Bytes(b)) }
340 TokenKind::Word(w) => { let w = w.clone(); self.advance(); Ok(Value::String(w)) }
341 TokenKind::Ref(r) => { let r = r.clone(); self.advance(); Ok(Value::Ref(r)) }
342 TokenKind::Timestamp(ts, tz) => { let ts = *ts; let tz = *tz; self.advance(); Ok(Value::Timestamp(ts, tz)) }
343 TokenKind::Colon => {
344 self.advance(); match self.current_kind() {
346 TokenKind::Word(w) => {
347 let tag = w.clone();
348 self.advance(); let inner = self.parse_value(depth + 1)?;
350 Ok(Value::Tagged(tag, Box::new(inner)))
351 }
352 _ => Err(Error::UnexpectedToken {
353 expected: "tag name after ':'".to_string(),
354 got: format!("{:?}", self.current_kind()),
355 })
356 }
357 }
358 TokenKind::Directive(d) => {
359 let directive = d.clone();
360 self.advance();
361 self.parse_directive_value(&directive, depth)
362 }
363 TokenKind::LBrace => self.parse_object(depth + 1),
364 TokenKind::LBracket => self.parse_array(depth + 1),
365 TokenKind::LParen => self.parse_tuple(depth + 1),
366 _ => Err(Error::UnexpectedToken {
367 expected: "value".to_string(),
368 got: format!("{:?}", self.current_kind()),
369 }),
370 }
371 }
372
373 fn parse_directive_value(&mut self, directive: &str, depth: usize) -> Result<Value> {
374 match directive {
375 "table" => self.parse_table(depth),
376 "map" => self.parse_map(depth),
377 _ => {
378 if self.can_start_value() {
380 let _ = self.parse_value(depth)?;
381 }
382 Ok(Value::Null)
383 }
384 }
385 }
386
387 fn can_start_value(&self) -> bool {
389 matches!(
390 self.current_kind(),
391 TokenKind::Null
392 | TokenKind::Bool(_)
393 | TokenKind::Int(_)
394 | TokenKind::UInt(_)
395 | TokenKind::Float(_)
396 | TokenKind::String(_)
397 | TokenKind::Bytes(_)
398 | TokenKind::Word(_)
399 | TokenKind::Ref(_)
400 | TokenKind::Timestamp(_, _)
401 | TokenKind::JsonNumber(_)
402 | TokenKind::Colon
403 | TokenKind::Directive(_)
404 | TokenKind::LBrace
405 | TokenKind::LBracket
406 | TokenKind::LParen
407 )
408 }
409
410 fn parse_map(&mut self, depth: usize) -> Result<Value> {
411 self.expect(TokenKind::LBrace)?;
412 let mut pairs = Vec::new();
413
414 while !self.check(TokenKind::RBrace) {
415 let key = match self.current_kind() {
418 TokenKind::String(s) => { let s = s.clone(); self.advance(); Value::String(s) }
419 TokenKind::Word(w) => { let w = w.clone(); self.advance(); Value::String(w) }
420 TokenKind::Int(i) => { let i = *i; self.advance(); Value::Int(i) }
421 TokenKind::UInt(u) => { let u = *u; self.advance(); Value::UInt(u) }
422 _ => return Err(Error::UnexpectedToken {
423 expected: "map key".to_string(),
424 got: format!("{:?}", self.current_kind()),
425 }),
426 };
427
428 self.expect(TokenKind::Colon)?;
429 let value = self.parse_value(depth + 1)?;
430 pairs.push((key, value));
431
432 if self.check(TokenKind::Comma) {
433 self.advance();
434 }
435 }
436
437 self.expect(TokenKind::RBrace)?;
438 Ok(Value::Map(pairs))
439 }
440
441 fn parse_table(&mut self, depth: usize) -> Result<Value> {
442 let struct_name = self.expect_word()?;
443 let schema = self.schemas
444 .get(&struct_name)
445 .ok_or_else(|| Error::UnknownStruct(struct_name.clone()))?
446 .clone();
447
448 self.expect(TokenKind::LBracket)?;
449
450 let mut rows = Vec::new();
451 while !self.check(TokenKind::RBracket) {
452 let row = self.parse_tuple_with_schema(&schema, depth + 1)?;
453 rows.push(row);
454 if self.check(TokenKind::Comma) {
455 self.advance();
456 }
457 }
458
459 self.expect(TokenKind::RBracket)?;
460 Ok(Value::Array(rows))
461 }
462
463 fn parse_tuple_with_schema(&mut self, schema: &Schema, depth: usize) -> Result<Value> {
464 self.expect(TokenKind::LParen)?;
465
466 let mut obj = ObjectMap::new();
467 for field in &schema.fields {
468 let value = self.parse_value_for_field(&field.field_type, depth)?;
469 obj.insert(field.name.clone(), value);
470 if self.check(TokenKind::Comma) {
471 self.advance();
472 }
473 }
474
475 self.expect(TokenKind::RParen)?;
476 Ok(Value::Object(obj))
477 }
478
479 fn parse_value_for_field(&mut self, field_type: &FieldType, depth: usize) -> Result<Value> {
480 if self.check(TokenKind::Null) {
482 self.advance();
483 return Ok(Value::Null);
484 }
485
486 if !field_type.is_array && self.check(TokenKind::LParen) {
491 if let Some(schema) = self.schemas.get(&field_type.base).cloned() {
492 return self.parse_tuple_with_schema(&schema, depth + 1);
493 }
494 }
495
496 if field_type.is_array {
498 self.expect(TokenKind::LBracket)?;
499 let mut arr = Vec::new();
500 let inner_type = FieldType::new(&field_type.base);
501 while !self.check(TokenKind::RBracket) {
502 arr.push(self.parse_value_for_field(&inner_type, depth + 1)?);
503 if self.check(TokenKind::Comma) {
504 self.advance();
505 }
506 }
507 self.expect(TokenKind::RBracket)?;
508 return Ok(Value::Array(arr));
509 }
510
511 self.parse_value(depth)
513 }
514
515 fn parse_object(&mut self, depth: usize) -> Result<Value> {
516 self.expect(TokenKind::LBrace)?;
517 let mut obj = ObjectMap::new();
518
519 while !self.check(TokenKind::RBrace) {
520 if let TokenKind::Ref(r) = self.current_kind() {
521 let key = format!("!{}", r);
522 self.advance();
523 self.expect(TokenKind::Colon)?;
524 let value = self.parse_value(depth)?;
525 obj.insert(key, value);
526 } else {
527 let (key, value) = self.parse_pair(depth)?;
528 obj.insert(key, value);
529 }
530 if self.check(TokenKind::Comma) {
531 self.advance();
532 }
533 }
534
535 self.expect(TokenKind::RBrace)?;
536 Ok(Value::Object(obj))
537 }
538
539 fn parse_array(&mut self, depth: usize) -> Result<Value> {
540 self.expect(TokenKind::LBracket)?;
541 let mut arr = Vec::new();
542
543 while !self.check(TokenKind::RBracket) {
544 arr.push(self.parse_value(depth)?);
545 if self.check(TokenKind::Comma) {
546 self.advance();
547 }
548 }
549
550 self.expect(TokenKind::RBracket)?;
551 Ok(Value::Array(arr))
552 }
553
554 fn parse_tuple(&mut self, depth: usize) -> Result<Value> {
555 self.expect(TokenKind::LParen)?;
556 let mut arr = Vec::new();
557
558 while !self.check(TokenKind::RParen) {
559 arr.push(self.parse_value(depth)?);
560 if self.check(TokenKind::Comma) {
561 self.advance();
562 }
563 }
564
565 self.expect(TokenKind::RParen)?;
566 Ok(Value::Array(arr))
567 }
568
569 fn current(&self) -> &Token {
574 self.tokens.get(self.pos).unwrap_or(&Token {
575 kind: TokenKind::Eof,
576 line: 0,
577 col: 0,
578 })
579 }
580
581 fn current_kind(&self) -> &TokenKind {
582 &self.current().kind
583 }
584
585 fn advance(&mut self) {
586 if self.pos < self.tokens.len() {
587 self.pos += 1;
588 }
589 }
590
591 fn check(&self, expected: TokenKind) -> bool {
592 std::mem::discriminant(self.current_kind()) == std::mem::discriminant(&expected)
593 }
594
595 fn expect(&mut self, expected: TokenKind) -> Result<()> {
596 if self.check(expected.clone()) {
597 self.advance();
598 Ok(())
599 } else {
600 Err(Error::UnexpectedToken {
601 expected: format!("{:?}", expected),
602 got: format!("{:?}", self.current_kind()),
603 })
604 }
605 }
606
607 fn expect_word(&mut self) -> Result<String> {
608 match self.current_kind() {
609 TokenKind::Word(w) => {
610 let w = w.clone();
611 self.advance();
612 Ok(w)
613 }
614 _ => Err(Error::UnexpectedToken {
615 expected: "word".to_string(),
616 got: format!("{:?}", self.current_kind()),
617 }),
618 }
619 }
620
621 fn at_end(&self) -> bool {
622 matches!(self.current_kind(), TokenKind::Eof)
623 }
624}
625
626#[cfg(test)]
627mod tests {
628 use super::*;
629 use crate::lexer::Lexer;
630
631 fn parse(input: &str) -> Result<IndexMap<String, Value>> {
632 let tokens = Lexer::new(input).tokenize()?;
633 Parser::new(tokens).parse()
634 }
635
636 #[test]
637 fn test_simple_values() {
638 let data = parse("a: 1, b: hello, c: true, d: ~").unwrap();
639 assert_eq!(data.get("a").unwrap().as_int(), Some(1));
640 assert_eq!(data.get("b").unwrap().as_str(), Some("hello"));
641 assert_eq!(data.get("c").unwrap().as_bool(), Some(true));
642 assert!(data.get("d").unwrap().is_null());
643 }
644
645 #[test]
646 fn test_object() {
647 let data = parse("obj: {x: 1, y: 2}").unwrap();
648 let obj = data.get("obj").unwrap().as_object().unwrap();
649 assert_eq!(obj.get("x").unwrap().as_int(), Some(1));
650 assert_eq!(obj.get("y").unwrap().as_int(), Some(2));
651 }
652
653 #[test]
654 fn test_array() {
655 let data = parse("arr: [1, 2, 3]").unwrap();
656 let arr = data.get("arr").unwrap().as_array().unwrap();
657 assert_eq!(arr.len(), 3);
658 assert_eq!(arr[0].as_int(), Some(1));
659 }
660
661 #[test]
662 fn test_struct_and_table() {
663 let input = r#"
664 @struct point (x: int, y: int)
665 points: @table point [
666 (1, 2),
667 (3, 4),
668 ]
669 "#;
670 let tokens = Lexer::new(input).tokenize().unwrap();
671 let mut parser = Parser::new(tokens);
672 let data = parser.parse().unwrap();
673
674 let points = data.get("points").unwrap().as_array().unwrap();
675 assert_eq!(points.len(), 2);
676
677 let p0 = points[0].as_object().unwrap();
678 assert_eq!(p0.get("x").unwrap().as_int(), Some(1));
679 assert_eq!(p0.get("y").unwrap().as_int(), Some(2));
680 }
681
682 #[test]
687 fn test_union_def() {
688 let input = r#"
689 @union Shape {
690 Circle(radius: float),
691 Rectangle(width: float, height: float),
692 Point(),
693 }
694 "#;
695 let tokens = Lexer::new(input).tokenize().unwrap();
696 let mut parser = Parser::new(tokens);
697 parser.parse().unwrap();
698 let unions = parser.into_unions();
699 let shape = unions.get("Shape").unwrap();
700 assert_eq!(shape.variants.len(), 3);
701 assert_eq!(shape.variants[0].name, "Circle");
702 assert_eq!(shape.variants[0].fields.len(), 1);
703 assert_eq!(shape.variants[1].name, "Rectangle");
704 assert_eq!(shape.variants[1].fields.len(), 2);
705 assert_eq!(shape.variants[2].name, "Point");
706 assert_eq!(shape.variants[2].fields.len(), 0);
707 }
708
709 #[test]
714 fn test_map_value() {
715 let data = parse("m: @map {1: one, 2: two}").unwrap();
716 let m = data.get("m").unwrap().as_map().unwrap();
717 assert_eq!(m.len(), 2);
718 assert_eq!(m[0].0.as_int(), Some(1));
719 assert_eq!(m[0].1.as_str(), Some("one"));
720 assert_eq!(m[1].0.as_int(), Some(2));
721 assert_eq!(m[1].1.as_str(), Some("two"));
722 }
723
724 #[test]
725 fn test_map_with_string_keys() {
726 let data = parse(r#"m: @map {"key1": 10, "key2": 20}"#).unwrap();
727 let m = data.get("m").unwrap().as_map().unwrap();
728 assert_eq!(m.len(), 2);
729 }
730
731 #[test]
732 fn test_map_empty() {
733 let data = parse("m: @map {}").unwrap();
734 let m = data.get("m").unwrap().as_map().unwrap();
735 assert_eq!(m.len(), 0);
736 }
737
738 #[test]
743 fn test_ref_value() {
744 let data = parse("config: !base_config").unwrap();
745 assert_eq!(data.get("config").unwrap().as_ref_name(), Some("base_config"));
746 }
747
748 #[test]
749 fn test_tagged_value() {
750 let data = parse("status: :ok 200").unwrap();
751 let (tag, inner) = data.get("status").unwrap().as_tagged().unwrap();
752 assert_eq!(tag, "ok");
753 assert_eq!(inner.as_int(), Some(200));
754 }
755
756 #[test]
757 fn test_tagged_null() {
758 let data = parse("status: :none ~").unwrap();
759 let (tag, inner) = data.get("status").unwrap().as_tagged().unwrap();
760 assert_eq!(tag, "none");
761 assert!(inner.is_null());
762 }
763
764 #[test]
765 fn test_tagged_value_no_space_after_colon() {
766 let data = parse("status::ok 200").unwrap();
768 let (tag, inner) = data.get("status").unwrap().as_tagged().unwrap();
769 assert_eq!(tag, "ok");
770 assert_eq!(inner.as_int(), Some(200));
771 }
772
773 #[test]
774 fn test_key_value_no_space_after_colon() {
775 let data = parse("name:alice\nage:30").unwrap();
777 assert_eq!(data.get("name").unwrap().as_str(), Some("alice"));
778 assert_eq!(data.get("age").unwrap().as_int(), Some(30));
779 }
780
781 #[test]
786 fn test_tuple_value() {
787 let data = parse("point: (1, 2, 3)").unwrap();
788 let arr = data.get("point").unwrap().as_array().unwrap();
789 assert_eq!(arr.len(), 3);
790 assert_eq!(arr[0].as_int(), Some(1));
791 assert_eq!(arr[1].as_int(), Some(2));
792 assert_eq!(arr[2].as_int(), Some(3));
793 }
794
795 #[test]
796 fn test_nested_object() {
797 let data = parse("outer: {inner: {x: 1}}").unwrap();
798 let outer = data.get("outer").unwrap().as_object().unwrap();
799 let inner = outer.get("inner").unwrap().as_object().unwrap();
800 assert_eq!(inner.get("x").unwrap().as_int(), Some(1));
801 }
802
803 #[test]
804 fn test_nested_arrays() {
805 let data = parse("matrix: [[1, 2], [3, 4]]").unwrap();
806 let matrix = data.get("matrix").unwrap().as_array().unwrap();
807 assert_eq!(matrix.len(), 2);
808 let row0 = matrix[0].as_array().unwrap();
809 assert_eq!(row0[0].as_int(), Some(1));
810 }
811
812 #[test]
817 fn test_struct_with_nullable_field() {
818 let input = r#"
819 @struct user (name: string, email: string?)
820 users: @table user [
821 (alice, "a@test.com"),
822 (bob, ~),
823 ]
824 "#;
825 let tokens = Lexer::new(input).tokenize().unwrap();
826 let mut parser = Parser::new(tokens);
827 let data = parser.parse().unwrap();
828 let schemas = parser.into_schemas();
829
830 let schema = schemas.get("user").unwrap();
831 assert!(schema.fields[1].field_type.nullable);
832
833 let users = data.get("users").unwrap().as_array().unwrap();
834 assert_eq!(users.len(), 2);
835 assert!(users[1].as_object().unwrap().get("email").unwrap().is_null());
836 }
837
838 #[test]
839 fn test_struct_with_array_field() {
840 let input = r#"
841 @struct item (name: string, tags: []string)
842 items: @table item [
843 (widget, [cool, useful]),
844 ]
845 "#;
846 let tokens = Lexer::new(input).tokenize().unwrap();
847 let mut parser = Parser::new(tokens);
848 let data = parser.parse().unwrap();
849
850 let items = data.get("items").unwrap().as_array().unwrap();
851 let tags = items[0].as_object().unwrap().get("tags").unwrap().as_array().unwrap();
852 assert_eq!(tags.len(), 2);
853 }
854
855 #[test]
860 fn test_root_array_directive() {
861 let input = "@root-array\nroot: [1, 2, 3]";
862 let tokens = Lexer::new(input).tokenize().unwrap();
863 let mut parser = Parser::new(tokens);
864 parser.parse().unwrap();
865 assert!(parser.is_root_array());
866 }
867
868 #[test]
873 fn test_ref_key_at_top_level() {
874 let input = "!defaults: {theme: dark}";
875 let data = parse(input).unwrap();
876 assert!(data.contains_key("!defaults"));
877 let obj = data.get("!defaults").unwrap().as_object().unwrap();
878 assert_eq!(obj.get("theme").unwrap().as_str(), Some("dark"));
879 }
880
881 #[test]
886 fn test_string_key() {
887 let data = parse(r#""my key": 42"#).unwrap();
888 assert_eq!(data.get("my key").unwrap().as_int(), Some(42));
889 }
890
891 #[test]
896 fn test_unexpected_token_error() {
897 let result = parse("] invalid");
898 let _ = result;
901 }
902
903 #[test]
904 fn test_missing_colon_error() {
905 let input = "key value";
907 let result = parse(input);
908 assert!(result.is_err());
909 }
910
911 #[test]
912 fn test_unknown_struct_in_table() {
913 let input = "data: @table nonexistent [(1, 2)]";
914 let result = parse(input);
915 assert!(result.is_err());
916 }
917
918 #[test]
923 fn test_struct_field_without_type() {
924 let input = r#"
925 @struct simple (name, value)
926 items: @table simple [
927 (hello, world),
928 ]
929 "#;
930 let tokens = Lexer::new(input).tokenize().unwrap();
931 let mut parser = Parser::new(tokens);
932 let data = parser.parse().unwrap();
933 let schemas = parser.into_schemas();
934
935 let schema = schemas.get("simple").unwrap();
937 assert_eq!(schema.fields[0].field_type.base, "string");
938 assert_eq!(schema.fields[1].field_type.base, "string");
939
940 let items = data.get("items").unwrap().as_array().unwrap();
941 assert_eq!(items[0].as_object().unwrap().get("name").unwrap().as_str(), Some("hello"));
942 }
943
944 #[test]
949 fn test_unknown_directive_ignored() {
950 let data = parse("@custom_directive\nkey: value").unwrap();
952 assert_eq!(data.get("key").unwrap().as_str(), Some("value"));
953 }
954
955 #[test]
956 fn test_unknown_directive_consumes_same_line_argument() {
957 let data = parse("@custom foo\nkey: value").unwrap();
959 assert!(data.get("foo").is_none(), "foo should be consumed as directive arg, not a key");
960 assert_eq!(data.get("key").unwrap().as_str(), Some("value"));
961
962 let data = parse("@custom [1, 2, 3]\nkey: value").unwrap();
964 assert_eq!(data.get("key").unwrap().as_str(), Some("value"));
965
966 let data = parse("@custom {a: 1}\nkey: value").unwrap();
968 assert_eq!(data.get("key").unwrap().as_str(), Some("value"));
969
970 let data = parse("@custom\nkey: value").unwrap();
972 assert_eq!(data.get("key").unwrap().as_str(), Some("value"));
973
974 let data = parse("key: value\n@custom").unwrap();
976 assert_eq!(data.get("key").unwrap().as_str(), Some("value"));
977
978 let data = parse("@custom\nfoo: bar").unwrap();
980 assert_eq!(data.get("foo").unwrap().as_str(), Some("bar"));
981 }
982
983 #[test]
984 fn test_unknown_directive_value_consumes_argument() {
985 let data = parse("key: @unknown [1, 2, 3]\nother: 42").unwrap();
987 assert!(data.get("key").unwrap().is_null(), "unknown directive value should be null");
988 assert_eq!(data.get("other").unwrap().as_int(), Some(42), "next key should parse normally");
989
990 let data = parse("key: @unknown {a: 1}\nother: ok").unwrap();
992 assert!(data.get("key").unwrap().is_null());
993 assert_eq!(data.get("other").unwrap().as_str(), Some("ok"));
994
995 let data = parse("key: @unknown 42\nother: ok").unwrap();
997 assert!(data.get("key").unwrap().is_null());
998 assert_eq!(data.get("other").unwrap().as_str(), Some("ok"));
999
1000 let data = parse("arr: [@unknown, 1, 2]").unwrap();
1002 let arr = data.get("arr").unwrap().as_array().unwrap();
1003 assert!(arr[0].is_null());
1004 assert_eq!(arr[1].as_int(), Some(1));
1005 }
1006
1007 #[test]
1012 fn test_object_with_ref_key() {
1013 let data = parse("obj: {!base: 1, key: 2}").unwrap();
1014 let obj = data.get("obj").unwrap().as_object().unwrap();
1015 assert!(obj.contains_key("!base"));
1016 assert_eq!(obj.get("!base").unwrap().as_int(), Some(1));
1017 assert_eq!(obj.get("key").unwrap().as_int(), Some(2));
1018 }
1019
1020 #[test]
1025 fn test_nested_struct_in_table() {
1026 let input = r#"
1027 @struct addr (city: string, zip: string)
1028 @struct person (name: string, home: addr)
1029 people: @table person [
1030 (alice, (Boston, "02101")),
1031 (bob, (NYC, "10001")),
1032 ]
1033 "#;
1034 let tokens = Lexer::new(input).tokenize().unwrap();
1035 let mut parser = Parser::new(tokens);
1036 let data = parser.parse().unwrap();
1037
1038 let people = data.get("people").unwrap().as_array().unwrap();
1039 let alice_home = people[0].as_object().unwrap().get("home").unwrap().as_object().unwrap();
1040 assert_eq!(alice_home.get("city").unwrap().as_str(), Some("Boston"));
1041 }
1042
1043 #[test]
1044 fn test_include_cycle_detection() {
1045 let dir = std::env::temp_dir();
1047 let file_path = dir.join("test_cycle_self.tl");
1048 std::fs::write(&file_path, "@include \"test_cycle_self.tl\"\nval: 1").unwrap();
1049
1050 let content = std::fs::read_to_string(&file_path).unwrap();
1051 let tokens = Lexer::new(&content).tokenize().unwrap();
1052 let mut parser = Parser::new(tokens).with_base_path(&file_path);
1053 let result = parser.parse();
1054 assert!(result.is_err(), "Should detect self-referencing include");
1055 let err_msg = result.unwrap_err().to_string();
1056 assert!(err_msg.contains("Circular include"), "Error should mention circular include: {}", err_msg);
1057
1058 std::fs::remove_file(&file_path).ok();
1059 }
1060
1061 #[test]
1062 fn test_include_mutual_cycle_detection() {
1063 let dir = std::env::temp_dir();
1065 let file_a = dir.join("test_cycle_a.tl");
1066 let file_b = dir.join("test_cycle_b.tl");
1067 std::fs::write(&file_a, "@include \"test_cycle_b.tl\"\na_val: 1").unwrap();
1068 std::fs::write(&file_b, "@include \"test_cycle_a.tl\"\nb_val: 2").unwrap();
1069
1070 let content = std::fs::read_to_string(&file_a).unwrap();
1071 let tokens = Lexer::new(&content).tokenize().unwrap();
1072 let mut parser = Parser::new(tokens).with_base_path(&file_a);
1073 let result = parser.parse();
1074 assert!(result.is_err(), "Should detect mutual cycle between A and B");
1075 let err_msg = result.unwrap_err().to_string();
1076 assert!(err_msg.contains("Circular include"), "Error should mention circular include: {}", err_msg);
1077
1078 std::fs::remove_file(&file_a).ok();
1079 std::fs::remove_file(&file_b).ok();
1080 }
1081
1082 #[test]
1083 fn test_include_stack_propagated_to_child() {
1084 let parser = Parser::new(vec![]);
1086 assert!(parser.include_stack.is_empty(), "New parser should have empty include stack");
1087 }
1088
1089 #[test]
1094 fn test_bytes_literal_value() {
1095 let data = parse(r#"payload: b"cafef00d""#).unwrap();
1096 let val = data.get("payload").unwrap();
1097 assert_eq!(val.as_bytes(), Some(&[0xca, 0xfe, 0xf0, 0x0d][..]));
1098 }
1099
1100 #[test]
1101 fn test_bytes_literal_empty_value() {
1102 let data = parse(r#"empty: b"""#).unwrap();
1103 let val = data.get("empty").unwrap();
1104 assert_eq!(val.as_bytes(), Some(&[][..]));
1105 }
1106
1107 #[test]
1108 fn test_bytes_literal_in_array() {
1109 let data = parse(r#"arr: [b"cafe", b"babe"]"#).unwrap();
1110 let arr = data.get("arr").unwrap().as_array().unwrap();
1111 assert_eq!(arr[0].as_bytes(), Some(&[0xca, 0xfe][..]));
1112 assert_eq!(arr[1].as_bytes(), Some(&[0xba, 0xbe][..]));
1113 }
1114
1115 #[test]
1116 fn test_bytes_literal_in_object() {
1117 let data = parse(r#"obj: {data: b"ff00"}"#).unwrap();
1118 let obj = data.get("obj").unwrap().as_object().unwrap();
1119 assert_eq!(obj.get("data").unwrap().as_bytes(), Some(&[0xff, 0x00][..]));
1120 }
1121
1122 #[test]
1127 fn test_fuzz_deeply_nested_arrays_no_stack_overflow() {
1128 let depth = 500;
1130 let input = format!("key: {}{}", "[".repeat(depth), "]".repeat(depth));
1131 let result = crate::TeaLeaf::parse(&input);
1132 match result {
1133 Err(e) => {
1134 let err = format!("{}", e);
1135 assert!(err.contains("nesting depth"), "Error should mention nesting depth: {}", err);
1136 }
1137 Ok(_) => panic!("Should fail with depth exceeded, not succeed"),
1138 }
1139 }
1140
1141 #[test]
1142 fn test_fuzz_deeply_nested_objects_no_stack_overflow() {
1143 let depth = 500;
1145 let mut input = String::from("key: ");
1146 for i in 0..depth {
1147 input.push_str(&format!("{{k{}: ", i));
1148 }
1149 input.push_str("1");
1150 for _ in 0..depth {
1151 input.push('}');
1152 }
1153 let result = crate::TeaLeaf::parse(&input);
1154 assert!(result.is_err(), "Should fail with depth exceeded, not stack overflow");
1155 }
1156
1157 #[test]
1158 fn test_fuzz_deeply_nested_tags_no_stack_overflow() {
1159 let depth = 500;
1161 let mut input = String::from("key: ");
1162 for i in 0..depth {
1163 input.push_str(&format!(":t{} ", i));
1164 }
1165 input.push_str("42");
1166 let result = crate::TeaLeaf::parse(&input);
1167 assert!(result.is_err(), "Should fail with depth exceeded, not stack overflow");
1168 }
1169
1170 #[test]
1171 fn test_parse_depth_256_succeeds() {
1172 let depth = 200;
1174 let input = format!("key: {}1{}", "[".repeat(depth), "]".repeat(depth));
1175 let result = crate::TeaLeaf::parse(&input);
1176 if let Err(e) = &result {
1177 panic!("200 levels of nesting should be fine: {}", e);
1178 }
1179 }
1180
1181 #[test]
1182 fn test_fuzz_crash_e42e_full_parse_no_panic() {
1183 let input = "\"0B\u{10}\u{3}#\"0BP\u{07FE}-----\u{061D}\u{07FE}\u{07FE}-----\u{061D}\u{3}#\"0B\u{10}\u{3}#\"0BP\u{07FE}-----\u{061D}\u{07FE}\u{07FE}-----\u{061D}\u{07FE}";
1186 let _ = crate::TeaLeaf::parse(input);
1187 }
1188
1189 #[test]
1190 fn test_fuzz_crash_d038_full_parse_no_panic() {
1191 let input = "z\" \"-\"\t; \"\"\")\"\"\" 8] ] 02)3313312)313-333-333-3332)33-133-3-33331333302)33";
1194 let _ = crate::TeaLeaf::parse(input);
1195 }
1196
1197 #[test]
1198 fn test_reject_value_only_schema_field_types() {
1199 for bad_type in &["object", "map", "tuple", "ref", "tagged"] {
1201 let input = format!("@struct Bad (field: {})\n", bad_type);
1202 let result = crate::TeaLeaf::parse(&input);
1203 assert!(result.is_err(), "should reject '{}' as schema field type", bad_type);
1204 let err = format!("{}", result.err().unwrap());
1205 assert!(err.contains("value type"), "error for '{}' should mention 'value type': {}", bad_type, err);
1206 }
1207 let result = crate::TeaLeaf::parse("@struct Bad (field: []object)\n");
1209 assert!(result.is_err(), "should reject '[]object' as schema field type");
1210
1211 for good_type in &["string", "int", "int8", "float", "bool", "bytes", "timestamp", "MyStruct"] {
1213 let input = format!("@struct Good (field: {})\n", good_type);
1214 assert!(crate::TeaLeaf::parse(&input).is_ok(), "'{}' should be accepted", good_type);
1215 }
1216 }
1217}