1use std::path::Path;
4use indexmap::IndexMap;
5use crate::{Error, Result, Value, Schema, Field, FieldType, Union, Variant};
6use crate::types::ObjectMap;
7use crate::lexer::{Token, TokenKind, Lexer};
8
9const MAX_PARSE_DEPTH: usize = 256;
12
13pub struct Parser {
14 tokens: Vec<Token>,
15 pos: usize,
16 schemas: IndexMap<String, Schema>,
17 unions: IndexMap<String, Union>,
18 base_path: Option<std::path::PathBuf>,
19 include_stack: Vec<std::path::PathBuf>,
21 is_root_array: bool,
23}
24
25impl Parser {
26 pub fn new(tokens: Vec<Token>) -> Self {
27 Self {
28 tokens,
29 pos: 0,
30 schemas: IndexMap::new(),
31 unions: IndexMap::new(),
32 base_path: None,
33 include_stack: Vec::new(),
34 is_root_array: false,
35 }
36 }
37
38 pub fn with_base_path(mut self, path: &Path) -> Self {
39 self.base_path = path.parent().map(|p| p.to_path_buf());
40 self
41 }
42
43 pub fn parse(&mut self) -> Result<IndexMap<String, Value>> {
44 let mut result = IndexMap::new();
45
46 while !self.at_end() {
47 match self.current_kind() {
48 TokenKind::Directive(d) => {
49 let directive = d.clone();
50 self.advance();
51 match directive.as_str() {
52 "struct" => self.parse_struct_def()?,
53 "union" => self.parse_union_def()?,
54 "include" => {
55 let included = self.parse_include()?;
56 for (k, v) in included {
57 result.insert(k, v);
58 }
59 }
60 "root-array" => {
61 self.is_root_array = true;
63 }
64 _ => {
65 let directive_line = self.tokens[self.pos - 1].line;
70 if !self.at_end()
71 && self.current().line == directive_line
72 && self.can_start_value()
73 {
74 let _ = self.parse_value(0)?;
75 }
76 }
77 }
78 }
79 TokenKind::Word(_) | TokenKind::String(_) => {
80 let (key, value) = self.parse_pair(0)?;
81 result.insert(key, value);
82 }
83 TokenKind::Ref(r) => {
84 let ref_name = r.clone();
85 self.advance();
86 self.expect(TokenKind::Colon)?;
87 let value = self.parse_value(0)?;
88 result.insert(format!("!{}", ref_name), value);
89 }
90 TokenKind::Eof => break,
91 _ => { self.advance(); }
92 }
93 }
94
95 Ok(result)
96 }
97
98 pub fn into_schemas(self) -> IndexMap<String, Schema> {
99 self.schemas
100 }
101
102 pub fn into_unions(self) -> IndexMap<String, Union> {
103 self.unions
104 }
105
106 pub fn into_schemas_and_unions(self) -> (IndexMap<String, Schema>, IndexMap<String, Union>) {
108 (self.schemas, self.unions)
109 }
110
111 pub fn is_root_array(&self) -> bool {
113 self.is_root_array
114 }
115
116 fn parse_struct_def(&mut self) -> Result<()> {
121 let name = self.expect_word()?;
122 self.expect(TokenKind::LParen)?;
123
124 let mut schema = Schema::new(&name);
125
126 while !self.check(TokenKind::RParen) {
127 let field_name = match self.current_kind() {
130 TokenKind::Word(w) => {
131 let w = w.clone();
132 self.advance();
133 w
134 }
135 TokenKind::String(s) => {
136 let s = s.clone();
137 self.advance();
138 s
139 }
140 _ => return Err(Error::UnexpectedToken {
141 expected: "field name".to_string(),
142 got: format!("{:?}", self.current_kind()),
143 }),
144 };
145
146 let field_type = if self.check(TokenKind::Colon) {
147 self.advance();
148 self.parse_field_type()?
149 } else {
150 FieldType::new("string")
151 };
152
153 schema.add_field(field_name, field_type);
154
155 if self.check(TokenKind::Comma) {
156 self.advance();
157 }
158 }
159
160 self.expect(TokenKind::RParen)?;
161 self.schemas.insert(name, schema);
162 Ok(())
163 }
164
165 fn parse_union_def(&mut self) -> Result<()> {
170 let name = self.expect_word()?;
171 self.expect(TokenKind::LBrace)?;
172
173 let mut union_type = Union::new(&name);
174
175 while !self.check(TokenKind::RBrace) {
176 let variant_name = self.expect_word()?;
177 self.expect(TokenKind::LParen)?;
178
179 let mut variant = Variant::new(&variant_name);
180
181 while !self.check(TokenKind::RParen) {
182 let field_name = self.expect_word()?;
183
184 let field_type = if self.check(TokenKind::Colon) {
185 self.advance();
186 self.parse_field_type()?
187 } else {
188 FieldType::new("string")
189 };
190
191 variant.fields.push(Field::new(field_name, field_type));
192
193 if self.check(TokenKind::Comma) {
194 self.advance();
195 }
196 }
197
198 self.expect(TokenKind::RParen)?;
199 union_type.add_variant(variant);
200
201 if self.check(TokenKind::Comma) {
202 self.advance();
203 }
204 }
205
206 self.expect(TokenKind::RBrace)?;
207 self.unions.insert(name, union_type);
208 Ok(())
209 }
210
211 fn parse_include(&mut self) -> Result<IndexMap<String, Value>> {
216 let path_str = match self.current_kind() {
217 TokenKind::String(s) => s.clone(),
218 TokenKind::Word(w) => w.clone(),
219 _ => return Err(Error::UnexpectedToken {
220 expected: "file path".to_string(),
221 got: format!("{:?}", self.current_kind()),
222 }),
223 };
224 self.advance();
225
226 let include_path = if let Some(ref base) = self.base_path {
228 base.join(&path_str)
229 } else {
230 std::path::PathBuf::from(&path_str)
231 };
232
233 let canonical = include_path.canonicalize()
235 .unwrap_or_else(|_| include_path.clone());
236 if self.include_stack.contains(&canonical) {
237 return Err(Error::ParseError(format!(
238 "Circular include detected: {}", canonical.display()
239 )));
240 }
241 if self.include_stack.len() >= 32 {
242 return Err(Error::ParseError(
243 "Include depth exceeds limit of 32".into()
244 ));
245 }
246
247 let content = std::fs::read_to_string(&include_path)
249 .map_err(|e| Error::ParseError(format!("Failed to include {}: {}", path_str, e)))?;
250
251 let tokens = Lexer::new(&content).tokenize()?;
252 let mut parser = Parser::new(tokens);
253 if let Some(parent) = include_path.parent() {
254 parser.base_path = Some(parent.to_path_buf());
255 }
256 parser.include_stack = self.include_stack.clone();
259 parser.include_stack.push(canonical);
260 parser.schemas = self.schemas.clone();
261 parser.unions = self.unions.clone();
262
263 let data = parser.parse()?;
264
265 for (name, schema) in parser.schemas {
267 self.schemas.insert(name, schema);
268 }
269 for (name, union_type) in parser.unions {
270 self.unions.insert(name, union_type);
271 }
272
273 Ok(data)
274 }
275
276 fn parse_field_type(&mut self) -> Result<FieldType> {
277 let mut type_str = String::new();
278
279 if self.check(TokenKind::LBracket) {
281 self.advance();
282 self.expect(TokenKind::RBracket)?;
283 type_str.push_str("[]");
284 }
285
286 let base = self.expect_word()?;
288
289 match base.as_str() {
291 "object" | "map" | "tuple" | "ref" | "tagged" => {
292 return Err(Error::ParseError(
293 format!("'{}' is a value type and cannot be used as a schema field type", base)
294 ));
295 }
296 _ => {}
297 }
298
299 type_str.push_str(&base);
300
301 if self.check(TokenKind::Question) {
303 self.advance();
304 type_str.push('?');
305 }
306
307 Ok(FieldType::parse(&type_str))
308 }
309
310 fn parse_pair(&mut self, depth: usize) -> Result<(String, Value)> {
315 let key = match self.current_kind() {
316 TokenKind::Word(w) => w.clone(),
317 TokenKind::String(s) => s.clone(),
318 _ => return Err(Error::UnexpectedToken {
319 expected: "key".to_string(),
320 got: format!("{:?}", self.current_kind()),
321 }),
322 };
323 self.advance();
324 self.expect(TokenKind::Colon)?;
325 let value = self.parse_value(depth)?;
326 Ok((key, value))
327 }
328
329 fn parse_value(&mut self, depth: usize) -> Result<Value> {
334 if depth > MAX_PARSE_DEPTH {
335 return Err(Error::ParseError("maximum parse nesting depth exceeded".into()));
336 }
337 match self.current_kind() {
338 TokenKind::Null => { self.advance(); Ok(Value::Null) }
339 TokenKind::Bool(b) => { let b = *b; self.advance(); Ok(Value::Bool(b)) }
340 TokenKind::Int(i) => { let i = *i; self.advance(); Ok(Value::Int(i)) }
341 TokenKind::UInt(u) => { let u = *u; self.advance(); Ok(Value::UInt(u)) }
342 TokenKind::JsonNumber(s) => { let s = s.clone(); self.advance(); Ok(Value::JsonNumber(s)) }
343 TokenKind::Float(f) => { let f = *f; self.advance(); Ok(Value::Float(f)) }
344 TokenKind::String(s) => { let s = s.clone(); self.advance(); Ok(Value::String(s)) }
345 TokenKind::Bytes(b) => { let b = b.clone(); self.advance(); Ok(Value::Bytes(b)) }
346 TokenKind::Word(w) => { let w = w.clone(); self.advance(); Ok(Value::String(w)) }
347 TokenKind::Ref(r) => { let r = r.clone(); self.advance(); Ok(Value::Ref(r)) }
348 TokenKind::Timestamp(ts, tz) => { let ts = *ts; let tz = *tz; self.advance(); Ok(Value::Timestamp(ts, tz)) }
349 TokenKind::Colon => {
350 self.advance(); match self.current_kind() {
352 TokenKind::Word(w) => {
353 let tag = w.clone();
354 self.advance(); let inner = self.parse_value(depth + 1)?;
356 Ok(Value::Tagged(tag, Box::new(inner)))
357 }
358 _ => Err(Error::UnexpectedToken {
359 expected: "tag name after ':'".to_string(),
360 got: format!("{:?}", self.current_kind()),
361 })
362 }
363 }
364 TokenKind::Directive(d) => {
365 let directive = d.clone();
366 self.advance();
367 self.parse_directive_value(&directive, depth)
368 }
369 TokenKind::LBrace => self.parse_object(depth + 1),
370 TokenKind::LBracket => self.parse_array(depth + 1),
371 TokenKind::LParen => self.parse_tuple(depth + 1),
372 _ => Err(Error::UnexpectedToken {
373 expected: "value".to_string(),
374 got: format!("{:?}", self.current_kind()),
375 }),
376 }
377 }
378
379 fn parse_directive_value(&mut self, directive: &str, depth: usize) -> Result<Value> {
380 match directive {
381 "table" => self.parse_table(depth),
382 "map" => self.parse_map(depth),
383 _ => {
384 if self.can_start_value() {
386 let _ = self.parse_value(depth)?;
387 }
388 Ok(Value::Null)
389 }
390 }
391 }
392
393 fn can_start_value(&self) -> bool {
395 matches!(
396 self.current_kind(),
397 TokenKind::Null
398 | TokenKind::Bool(_)
399 | TokenKind::Int(_)
400 | TokenKind::UInt(_)
401 | TokenKind::Float(_)
402 | TokenKind::String(_)
403 | TokenKind::Bytes(_)
404 | TokenKind::Word(_)
405 | TokenKind::Ref(_)
406 | TokenKind::Timestamp(_, _)
407 | TokenKind::JsonNumber(_)
408 | TokenKind::Colon
409 | TokenKind::Directive(_)
410 | TokenKind::LBrace
411 | TokenKind::LBracket
412 | TokenKind::LParen
413 )
414 }
415
416 fn parse_map(&mut self, depth: usize) -> Result<Value> {
417 self.expect(TokenKind::LBrace)?;
418 let mut pairs = Vec::new();
419
420 while !self.check(TokenKind::RBrace) {
421 let key = match self.current_kind() {
424 TokenKind::String(s) => { let s = s.clone(); self.advance(); Value::String(s) }
425 TokenKind::Word(w) => { let w = w.clone(); self.advance(); Value::String(w) }
426 TokenKind::Int(i) => { let i = *i; self.advance(); Value::Int(i) }
427 TokenKind::UInt(u) => { let u = *u; self.advance(); Value::UInt(u) }
428 _ => return Err(Error::UnexpectedToken {
429 expected: "map key".to_string(),
430 got: format!("{:?}", self.current_kind()),
431 }),
432 };
433
434 self.expect(TokenKind::Colon)?;
435 let value = self.parse_value(depth + 1)?;
436 pairs.push((key, value));
437
438 if self.check(TokenKind::Comma) {
439 self.advance();
440 }
441 }
442
443 self.expect(TokenKind::RBrace)?;
444 Ok(Value::Map(pairs))
445 }
446
447 fn parse_table(&mut self, depth: usize) -> Result<Value> {
448 let struct_name = self.expect_word()?;
449 let schema = self.schemas
450 .get(&struct_name)
451 .ok_or_else(|| Error::UnknownStruct(struct_name.clone()))?
452 .clone();
453
454 self.expect(TokenKind::LBracket)?;
455
456 let mut rows = Vec::new();
457 while !self.check(TokenKind::RBracket) {
458 let row = self.parse_tuple_with_schema(&schema, depth + 1)?;
459 rows.push(row);
460 if self.check(TokenKind::Comma) {
461 self.advance();
462 }
463 }
464
465 self.expect(TokenKind::RBracket)?;
466 Ok(Value::Array(rows))
467 }
468
469 fn parse_tuple_with_schema(&mut self, schema: &Schema, depth: usize) -> Result<Value> {
470 self.expect(TokenKind::LParen)?;
471
472 let mut obj = ObjectMap::new();
473 for field in &schema.fields {
474 let value = self.parse_value_for_field(&field.field_type, depth)?;
475 obj.insert(field.name.clone(), value);
476 if self.check(TokenKind::Comma) {
477 self.advance();
478 }
479 }
480
481 self.expect(TokenKind::RParen)?;
482 Ok(Value::Object(obj))
483 }
484
485 fn parse_value_for_field(&mut self, field_type: &FieldType, depth: usize) -> Result<Value> {
486 if self.check(TokenKind::Null) {
488 self.advance();
489 return Ok(Value::Null);
490 }
491
492 if !field_type.is_array && self.check(TokenKind::LParen) {
497 if let Some(schema) = self.schemas.get(&field_type.base).cloned() {
498 return self.parse_tuple_with_schema(&schema, depth + 1);
499 }
500 }
501
502 if field_type.is_array {
504 self.expect(TokenKind::LBracket)?;
505 let mut arr = Vec::new();
506 let inner_type = FieldType::new(&field_type.base);
507 while !self.check(TokenKind::RBracket) {
508 arr.push(self.parse_value_for_field(&inner_type, depth + 1)?);
509 if self.check(TokenKind::Comma) {
510 self.advance();
511 }
512 }
513 self.expect(TokenKind::RBracket)?;
514 return Ok(Value::Array(arr));
515 }
516
517 self.parse_value(depth)
519 }
520
521 fn parse_object(&mut self, depth: usize) -> Result<Value> {
522 self.expect(TokenKind::LBrace)?;
523 let mut obj = ObjectMap::new();
524
525 while !self.check(TokenKind::RBrace) {
526 if let TokenKind::Ref(r) = self.current_kind() {
527 let key = format!("!{}", r);
528 self.advance();
529 self.expect(TokenKind::Colon)?;
530 let value = self.parse_value(depth)?;
531 obj.insert(key, value);
532 } else {
533 let (key, value) = self.parse_pair(depth)?;
534 obj.insert(key, value);
535 }
536 if self.check(TokenKind::Comma) {
537 self.advance();
538 }
539 }
540
541 self.expect(TokenKind::RBrace)?;
542 Ok(Value::Object(obj))
543 }
544
545 fn parse_array(&mut self, depth: usize) -> Result<Value> {
546 self.expect(TokenKind::LBracket)?;
547 let mut arr = Vec::new();
548
549 while !self.check(TokenKind::RBracket) {
550 arr.push(self.parse_value(depth)?);
551 if self.check(TokenKind::Comma) {
552 self.advance();
553 }
554 }
555
556 self.expect(TokenKind::RBracket)?;
557 Ok(Value::Array(arr))
558 }
559
560 fn parse_tuple(&mut self, depth: usize) -> Result<Value> {
561 self.expect(TokenKind::LParen)?;
562 let mut arr = Vec::new();
563
564 while !self.check(TokenKind::RParen) {
565 arr.push(self.parse_value(depth)?);
566 if self.check(TokenKind::Comma) {
567 self.advance();
568 }
569 }
570
571 self.expect(TokenKind::RParen)?;
572 Ok(Value::Array(arr))
573 }
574
575 fn current(&self) -> &Token {
580 self.tokens.get(self.pos).unwrap_or(&Token {
581 kind: TokenKind::Eof,
582 line: 0,
583 col: 0,
584 })
585 }
586
587 fn current_kind(&self) -> &TokenKind {
588 &self.current().kind
589 }
590
591 fn advance(&mut self) {
592 if self.pos < self.tokens.len() {
593 self.pos += 1;
594 }
595 }
596
597 fn check(&self, expected: TokenKind) -> bool {
598 std::mem::discriminant(self.current_kind()) == std::mem::discriminant(&expected)
599 }
600
601 fn expect(&mut self, expected: TokenKind) -> Result<()> {
602 if self.check(expected.clone()) {
603 self.advance();
604 Ok(())
605 } else {
606 Err(Error::UnexpectedToken {
607 expected: format!("{:?}", expected),
608 got: format!("{:?}", self.current_kind()),
609 })
610 }
611 }
612
613 fn expect_word(&mut self) -> Result<String> {
614 match self.current_kind() {
615 TokenKind::Word(w) => {
616 let w = w.clone();
617 self.advance();
618 Ok(w)
619 }
620 _ => Err(Error::UnexpectedToken {
621 expected: "word".to_string(),
622 got: format!("{:?}", self.current_kind()),
623 }),
624 }
625 }
626
627 fn at_end(&self) -> bool {
628 matches!(self.current_kind(), TokenKind::Eof)
629 }
630}
631
632#[cfg(test)]
633mod tests {
634 use super::*;
635 use crate::lexer::Lexer;
636
637 fn parse(input: &str) -> Result<IndexMap<String, Value>> {
638 let tokens = Lexer::new(input).tokenize()?;
639 Parser::new(tokens).parse()
640 }
641
642 #[test]
643 fn test_simple_values() {
644 let data = parse("a: 1, b: hello, c: true, d: ~").unwrap();
645 assert_eq!(data.get("a").unwrap().as_int(), Some(1));
646 assert_eq!(data.get("b").unwrap().as_str(), Some("hello"));
647 assert_eq!(data.get("c").unwrap().as_bool(), Some(true));
648 assert!(data.get("d").unwrap().is_null());
649 }
650
651 #[test]
652 fn test_object() {
653 let data = parse("obj: {x: 1, y: 2}").unwrap();
654 let obj = data.get("obj").unwrap().as_object().unwrap();
655 assert_eq!(obj.get("x").unwrap().as_int(), Some(1));
656 assert_eq!(obj.get("y").unwrap().as_int(), Some(2));
657 }
658
659 #[test]
660 fn test_array() {
661 let data = parse("arr: [1, 2, 3]").unwrap();
662 let arr = data.get("arr").unwrap().as_array().unwrap();
663 assert_eq!(arr.len(), 3);
664 assert_eq!(arr[0].as_int(), Some(1));
665 }
666
667 #[test]
668 fn test_struct_and_table() {
669 let input = r#"
670 @struct point (x: int, y: int)
671 points: @table point [
672 (1, 2),
673 (3, 4),
674 ]
675 "#;
676 let tokens = Lexer::new(input).tokenize().unwrap();
677 let mut parser = Parser::new(tokens);
678 let data = parser.parse().unwrap();
679
680 let points = data.get("points").unwrap().as_array().unwrap();
681 assert_eq!(points.len(), 2);
682
683 let p0 = points[0].as_object().unwrap();
684 assert_eq!(p0.get("x").unwrap().as_int(), Some(1));
685 assert_eq!(p0.get("y").unwrap().as_int(), Some(2));
686 }
687
688 #[test]
693 fn test_union_def() {
694 let input = r#"
695 @union Shape {
696 Circle(radius: float),
697 Rectangle(width: float, height: float),
698 Point(),
699 }
700 "#;
701 let tokens = Lexer::new(input).tokenize().unwrap();
702 let mut parser = Parser::new(tokens);
703 parser.parse().unwrap();
704 let unions = parser.into_unions();
705 let shape = unions.get("Shape").unwrap();
706 assert_eq!(shape.variants.len(), 3);
707 assert_eq!(shape.variants[0].name, "Circle");
708 assert_eq!(shape.variants[0].fields.len(), 1);
709 assert_eq!(shape.variants[1].name, "Rectangle");
710 assert_eq!(shape.variants[1].fields.len(), 2);
711 assert_eq!(shape.variants[2].name, "Point");
712 assert_eq!(shape.variants[2].fields.len(), 0);
713 }
714
715 #[test]
720 fn test_map_value() {
721 let data = parse("m: @map {1: one, 2: two}").unwrap();
722 let m = data.get("m").unwrap().as_map().unwrap();
723 assert_eq!(m.len(), 2);
724 assert_eq!(m[0].0.as_int(), Some(1));
725 assert_eq!(m[0].1.as_str(), Some("one"));
726 assert_eq!(m[1].0.as_int(), Some(2));
727 assert_eq!(m[1].1.as_str(), Some("two"));
728 }
729
730 #[test]
731 fn test_map_with_string_keys() {
732 let data = parse(r#"m: @map {"key1": 10, "key2": 20}"#).unwrap();
733 let m = data.get("m").unwrap().as_map().unwrap();
734 assert_eq!(m.len(), 2);
735 }
736
737 #[test]
738 fn test_map_empty() {
739 let data = parse("m: @map {}").unwrap();
740 let m = data.get("m").unwrap().as_map().unwrap();
741 assert_eq!(m.len(), 0);
742 }
743
744 #[test]
749 fn test_ref_value() {
750 let data = parse("config: !base_config").unwrap();
751 assert_eq!(data.get("config").unwrap().as_ref_name(), Some("base_config"));
752 }
753
754 #[test]
755 fn test_tagged_value() {
756 let data = parse("status: :ok 200").unwrap();
757 let (tag, inner) = data.get("status").unwrap().as_tagged().unwrap();
758 assert_eq!(tag, "ok");
759 assert_eq!(inner.as_int(), Some(200));
760 }
761
762 #[test]
763 fn test_tagged_null() {
764 let data = parse("status: :none ~").unwrap();
765 let (tag, inner) = data.get("status").unwrap().as_tagged().unwrap();
766 assert_eq!(tag, "none");
767 assert!(inner.is_null());
768 }
769
770 #[test]
771 fn test_tagged_value_no_space_after_colon() {
772 let data = parse("status::ok 200").unwrap();
774 let (tag, inner) = data.get("status").unwrap().as_tagged().unwrap();
775 assert_eq!(tag, "ok");
776 assert_eq!(inner.as_int(), Some(200));
777 }
778
779 #[test]
780 fn test_key_value_no_space_after_colon() {
781 let data = parse("name:alice\nage:30").unwrap();
783 assert_eq!(data.get("name").unwrap().as_str(), Some("alice"));
784 assert_eq!(data.get("age").unwrap().as_int(), Some(30));
785 }
786
787 #[test]
792 fn test_tuple_value() {
793 let data = parse("point: (1, 2, 3)").unwrap();
794 let arr = data.get("point").unwrap().as_array().unwrap();
795 assert_eq!(arr.len(), 3);
796 assert_eq!(arr[0].as_int(), Some(1));
797 assert_eq!(arr[1].as_int(), Some(2));
798 assert_eq!(arr[2].as_int(), Some(3));
799 }
800
801 #[test]
802 fn test_nested_object() {
803 let data = parse("outer: {inner: {x: 1}}").unwrap();
804 let outer = data.get("outer").unwrap().as_object().unwrap();
805 let inner = outer.get("inner").unwrap().as_object().unwrap();
806 assert_eq!(inner.get("x").unwrap().as_int(), Some(1));
807 }
808
809 #[test]
810 fn test_nested_arrays() {
811 let data = parse("matrix: [[1, 2], [3, 4]]").unwrap();
812 let matrix = data.get("matrix").unwrap().as_array().unwrap();
813 assert_eq!(matrix.len(), 2);
814 let row0 = matrix[0].as_array().unwrap();
815 assert_eq!(row0[0].as_int(), Some(1));
816 }
817
818 #[test]
823 fn test_struct_with_nullable_field() {
824 let input = r#"
825 @struct user (name: string, email: string?)
826 users: @table user [
827 (alice, "a@test.com"),
828 (bob, ~),
829 ]
830 "#;
831 let tokens = Lexer::new(input).tokenize().unwrap();
832 let mut parser = Parser::new(tokens);
833 let data = parser.parse().unwrap();
834 let schemas = parser.into_schemas();
835
836 let schema = schemas.get("user").unwrap();
837 assert!(schema.fields[1].field_type.nullable);
838
839 let users = data.get("users").unwrap().as_array().unwrap();
840 assert_eq!(users.len(), 2);
841 assert!(users[1].as_object().unwrap().get("email").unwrap().is_null());
842 }
843
844 #[test]
845 fn test_struct_with_array_field() {
846 let input = r#"
847 @struct item (name: string, tags: []string)
848 items: @table item [
849 (widget, [cool, useful]),
850 ]
851 "#;
852 let tokens = Lexer::new(input).tokenize().unwrap();
853 let mut parser = Parser::new(tokens);
854 let data = parser.parse().unwrap();
855
856 let items = data.get("items").unwrap().as_array().unwrap();
857 let tags = items[0].as_object().unwrap().get("tags").unwrap().as_array().unwrap();
858 assert_eq!(tags.len(), 2);
859 }
860
861 #[test]
866 fn test_root_array_directive() {
867 let input = "@root-array\nroot: [1, 2, 3]";
868 let tokens = Lexer::new(input).tokenize().unwrap();
869 let mut parser = Parser::new(tokens);
870 parser.parse().unwrap();
871 assert!(parser.is_root_array());
872 }
873
874 #[test]
879 fn test_ref_key_at_top_level() {
880 let input = "!defaults: {theme: dark}";
881 let data = parse(input).unwrap();
882 assert!(data.contains_key("!defaults"));
883 let obj = data.get("!defaults").unwrap().as_object().unwrap();
884 assert_eq!(obj.get("theme").unwrap().as_str(), Some("dark"));
885 }
886
887 #[test]
892 fn test_string_key() {
893 let data = parse(r#""my key": 42"#).unwrap();
894 assert_eq!(data.get("my key").unwrap().as_int(), Some(42));
895 }
896
897 #[test]
902 fn test_unexpected_token_error() {
903 let result = parse("] invalid");
904 let _ = result;
907 }
908
909 #[test]
910 fn test_missing_colon_error() {
911 let input = "key value";
913 let result = parse(input);
914 assert!(result.is_err());
915 }
916
917 #[test]
918 fn test_unknown_struct_in_table() {
919 let input = "data: @table nonexistent [(1, 2)]";
920 let result = parse(input);
921 assert!(result.is_err());
922 }
923
924 #[test]
929 fn test_struct_field_without_type() {
930 let input = r#"
931 @struct simple (name, value)
932 items: @table simple [
933 (hello, world),
934 ]
935 "#;
936 let tokens = Lexer::new(input).tokenize().unwrap();
937 let mut parser = Parser::new(tokens);
938 let data = parser.parse().unwrap();
939 let schemas = parser.into_schemas();
940
941 let schema = schemas.get("simple").unwrap();
943 assert_eq!(schema.fields[0].field_type.base, "string");
944 assert_eq!(schema.fields[1].field_type.base, "string");
945
946 let items = data.get("items").unwrap().as_array().unwrap();
947 assert_eq!(items[0].as_object().unwrap().get("name").unwrap().as_str(), Some("hello"));
948 }
949
950 #[test]
955 fn test_unknown_directive_ignored() {
956 let data = parse("@custom_directive\nkey: value").unwrap();
958 assert_eq!(data.get("key").unwrap().as_str(), Some("value"));
959 }
960
961 #[test]
962 fn test_unknown_directive_consumes_same_line_argument() {
963 let data = parse("@custom foo\nkey: value").unwrap();
965 assert!(data.get("foo").is_none(), "foo should be consumed as directive arg, not a key");
966 assert_eq!(data.get("key").unwrap().as_str(), Some("value"));
967
968 let data = parse("@custom [1, 2, 3]\nkey: value").unwrap();
970 assert_eq!(data.get("key").unwrap().as_str(), Some("value"));
971
972 let data = parse("@custom {a: 1}\nkey: value").unwrap();
974 assert_eq!(data.get("key").unwrap().as_str(), Some("value"));
975
976 let data = parse("@custom\nkey: value").unwrap();
978 assert_eq!(data.get("key").unwrap().as_str(), Some("value"));
979
980 let data = parse("key: value\n@custom").unwrap();
982 assert_eq!(data.get("key").unwrap().as_str(), Some("value"));
983
984 let data = parse("@custom\nfoo: bar").unwrap();
986 assert_eq!(data.get("foo").unwrap().as_str(), Some("bar"));
987 }
988
989 #[test]
990 fn test_unknown_directive_value_consumes_argument() {
991 let data = parse("key: @unknown [1, 2, 3]\nother: 42").unwrap();
993 assert!(data.get("key").unwrap().is_null(), "unknown directive value should be null");
994 assert_eq!(data.get("other").unwrap().as_int(), Some(42), "next key should parse normally");
995
996 let data = parse("key: @unknown {a: 1}\nother: ok").unwrap();
998 assert!(data.get("key").unwrap().is_null());
999 assert_eq!(data.get("other").unwrap().as_str(), Some("ok"));
1000
1001 let data = parse("key: @unknown 42\nother: ok").unwrap();
1003 assert!(data.get("key").unwrap().is_null());
1004 assert_eq!(data.get("other").unwrap().as_str(), Some("ok"));
1005
1006 let data = parse("arr: [@unknown, 1, 2]").unwrap();
1008 let arr = data.get("arr").unwrap().as_array().unwrap();
1009 assert!(arr[0].is_null());
1010 assert_eq!(arr[1].as_int(), Some(1));
1011 }
1012
1013 #[test]
1018 fn test_object_with_ref_key() {
1019 let data = parse("obj: {!base: 1, key: 2}").unwrap();
1020 let obj = data.get("obj").unwrap().as_object().unwrap();
1021 assert!(obj.contains_key("!base"));
1022 assert_eq!(obj.get("!base").unwrap().as_int(), Some(1));
1023 assert_eq!(obj.get("key").unwrap().as_int(), Some(2));
1024 }
1025
1026 #[test]
1031 fn test_nested_struct_in_table() {
1032 let input = r#"
1033 @struct addr (city: string, zip: string)
1034 @struct person (name: string, home: addr)
1035 people: @table person [
1036 (alice, (Boston, "02101")),
1037 (bob, (NYC, "10001")),
1038 ]
1039 "#;
1040 let tokens = Lexer::new(input).tokenize().unwrap();
1041 let mut parser = Parser::new(tokens);
1042 let data = parser.parse().unwrap();
1043
1044 let people = data.get("people").unwrap().as_array().unwrap();
1045 let alice_home = people[0].as_object().unwrap().get("home").unwrap().as_object().unwrap();
1046 assert_eq!(alice_home.get("city").unwrap().as_str(), Some("Boston"));
1047 }
1048
1049 #[test]
1050 fn test_include_cycle_detection() {
1051 let dir = std::env::temp_dir();
1053 let file_path = dir.join("test_cycle_self.tl");
1054 std::fs::write(&file_path, "@include \"test_cycle_self.tl\"\nval: 1").unwrap();
1055
1056 let content = std::fs::read_to_string(&file_path).unwrap();
1057 let tokens = Lexer::new(&content).tokenize().unwrap();
1058 let mut parser = Parser::new(tokens).with_base_path(&file_path);
1059 let result = parser.parse();
1060 assert!(result.is_err(), "Should detect self-referencing include");
1061 let err_msg = result.unwrap_err().to_string();
1062 assert!(err_msg.contains("Circular include"), "Error should mention circular include: {}", err_msg);
1063
1064 std::fs::remove_file(&file_path).ok();
1065 }
1066
1067 #[test]
1068 fn test_include_mutual_cycle_detection() {
1069 let dir = std::env::temp_dir();
1071 let file_a = dir.join("test_cycle_a.tl");
1072 let file_b = dir.join("test_cycle_b.tl");
1073 std::fs::write(&file_a, "@include \"test_cycle_b.tl\"\na_val: 1").unwrap();
1074 std::fs::write(&file_b, "@include \"test_cycle_a.tl\"\nb_val: 2").unwrap();
1075
1076 let content = std::fs::read_to_string(&file_a).unwrap();
1077 let tokens = Lexer::new(&content).tokenize().unwrap();
1078 let mut parser = Parser::new(tokens).with_base_path(&file_a);
1079 let result = parser.parse();
1080 assert!(result.is_err(), "Should detect mutual cycle between A and B");
1081 let err_msg = result.unwrap_err().to_string();
1082 assert!(err_msg.contains("Circular include"), "Error should mention circular include: {}", err_msg);
1083
1084 std::fs::remove_file(&file_a).ok();
1085 std::fs::remove_file(&file_b).ok();
1086 }
1087
1088 #[test]
1089 fn test_include_stack_propagated_to_child() {
1090 let parser = Parser::new(vec![]);
1092 assert!(parser.include_stack.is_empty(), "New parser should have empty include stack");
1093 }
1094
1095 #[test]
1100 fn test_bytes_literal_value() {
1101 let data = parse(r#"payload: b"cafef00d""#).unwrap();
1102 let val = data.get("payload").unwrap();
1103 assert_eq!(val.as_bytes(), Some(&[0xca, 0xfe, 0xf0, 0x0d][..]));
1104 }
1105
1106 #[test]
1107 fn test_bytes_literal_empty_value() {
1108 let data = parse(r#"empty: b"""#).unwrap();
1109 let val = data.get("empty").unwrap();
1110 assert_eq!(val.as_bytes(), Some(&[][..]));
1111 }
1112
1113 #[test]
1114 fn test_bytes_literal_in_array() {
1115 let data = parse(r#"arr: [b"cafe", b"babe"]"#).unwrap();
1116 let arr = data.get("arr").unwrap().as_array().unwrap();
1117 assert_eq!(arr[0].as_bytes(), Some(&[0xca, 0xfe][..]));
1118 assert_eq!(arr[1].as_bytes(), Some(&[0xba, 0xbe][..]));
1119 }
1120
1121 #[test]
1122 fn test_bytes_literal_in_object() {
1123 let data = parse(r#"obj: {data: b"ff00"}"#).unwrap();
1124 let obj = data.get("obj").unwrap().as_object().unwrap();
1125 assert_eq!(obj.get("data").unwrap().as_bytes(), Some(&[0xff, 0x00][..]));
1126 }
1127
1128 #[test]
1133 fn test_fuzz_deeply_nested_arrays_no_stack_overflow() {
1134 let depth = 500;
1136 let input = format!("key: {}{}", "[".repeat(depth), "]".repeat(depth));
1137 let result = crate::TeaLeaf::parse(&input);
1138 match result {
1139 Err(e) => {
1140 let err = format!("{}", e);
1141 assert!(err.contains("nesting depth"), "Error should mention nesting depth: {}", err);
1142 }
1143 Ok(_) => panic!("Should fail with depth exceeded, not succeed"),
1144 }
1145 }
1146
1147 #[test]
1148 fn test_fuzz_deeply_nested_objects_no_stack_overflow() {
1149 let depth = 500;
1151 let mut input = String::from("key: ");
1152 for i in 0..depth {
1153 input.push_str(&format!("{{k{}: ", i));
1154 }
1155 input.push_str("1");
1156 for _ in 0..depth {
1157 input.push('}');
1158 }
1159 let result = crate::TeaLeaf::parse(&input);
1160 assert!(result.is_err(), "Should fail with depth exceeded, not stack overflow");
1161 }
1162
1163 #[test]
1164 fn test_fuzz_deeply_nested_tags_no_stack_overflow() {
1165 let depth = 500;
1167 let mut input = String::from("key: ");
1168 for i in 0..depth {
1169 input.push_str(&format!(":t{} ", i));
1170 }
1171 input.push_str("42");
1172 let result = crate::TeaLeaf::parse(&input);
1173 assert!(result.is_err(), "Should fail with depth exceeded, not stack overflow");
1174 }
1175
1176 #[test]
1177 fn test_parse_depth_256_succeeds() {
1178 let depth = 200;
1180 let input = format!("key: {}1{}", "[".repeat(depth), "]".repeat(depth));
1181 let result = crate::TeaLeaf::parse(&input);
1182 if let Err(e) = &result {
1183 panic!("200 levels of nesting should be fine: {}", e);
1184 }
1185 }
1186
1187 #[test]
1188 fn test_fuzz_crash_e42e_full_parse_no_panic() {
1189 let input = "\"0B\u{10}\u{3}#\"0BP\u{07FE}-----\u{061D}\u{07FE}\u{07FE}-----\u{061D}\u{3}#\"0B\u{10}\u{3}#\"0BP\u{07FE}-----\u{061D}\u{07FE}\u{07FE}-----\u{061D}\u{07FE}";
1192 let _ = crate::TeaLeaf::parse(input);
1193 }
1194
1195 #[test]
1196 fn test_fuzz_crash_d038_full_parse_no_panic() {
1197 let input = "z\" \"-\"\t; \"\"\")\"\"\" 8] ] 02)3313312)313-333-333-3332)33-133-3-33331333302)33";
1200 let _ = crate::TeaLeaf::parse(input);
1201 }
1202
1203 #[test]
1204 fn test_reject_value_only_schema_field_types() {
1205 for bad_type in &["object", "map", "tuple", "ref", "tagged"] {
1207 let input = format!("@struct Bad (field: {})\n", bad_type);
1208 let result = crate::TeaLeaf::parse(&input);
1209 assert!(result.is_err(), "should reject '{}' as schema field type", bad_type);
1210 let err = format!("{}", result.err().unwrap());
1211 assert!(err.contains("value type"), "error for '{}' should mention 'value type': {}", bad_type, err);
1212 }
1213 let result = crate::TeaLeaf::parse("@struct Bad (field: []object)\n");
1215 assert!(result.is_err(), "should reject '[]object' as schema field type");
1216
1217 for good_type in &["string", "int", "int8", "float", "bool", "bytes", "timestamp", "MyStruct"] {
1219 let input = format!("@struct Good (field: {})\n", good_type);
1220 assert!(crate::TeaLeaf::parse(&input).is_ok(), "'{}' should be accepted", good_type);
1221 }
1222 }
1223
1224 #[test]
1225 fn test_parse_struct_with_quoted_fields() {
1226 let input = "@struct foo(\"@type\":string, name:string)\ndata:@table foo[(A,x),(B,y)]\n";
1228 let doc = crate::TeaLeaf::parse(input).unwrap();
1229 let arr = doc.get("data").unwrap().as_array().unwrap();
1230 assert_eq!(arr.len(), 2);
1231
1232 let first = arr[0].as_object().unwrap();
1233 assert_eq!(first.get("@type").unwrap().as_str(), Some("A"));
1234 assert_eq!(first.get("name").unwrap().as_str(), Some("x"));
1235
1236 let second = arr[1].as_object().unwrap();
1237 assert_eq!(second.get("@type").unwrap().as_str(), Some("B"));
1238 assert_eq!(second.get("name").unwrap().as_str(), Some("y"));
1239 }
1240}