1use std::path::Path;
4use indexmap::IndexMap;
5use crate::{Error, Result, Value, Schema, Field, FieldType, Union, Variant};
6use crate::types::ObjectMap;
7use crate::lexer::{Token, TokenKind, Lexer};
8
9const MAX_PARSE_DEPTH: usize = 256;
12
13pub struct Parser {
14 tokens: Vec<Token>,
15 pos: usize,
16 schemas: IndexMap<String, Schema>,
17 unions: IndexMap<String, Union>,
18 base_path: Option<std::path::PathBuf>,
19 include_stack: Vec<std::path::PathBuf>,
21 is_root_array: bool,
23}
24
25impl Parser {
26 pub fn new(tokens: Vec<Token>) -> Self {
27 Self {
28 tokens,
29 pos: 0,
30 schemas: IndexMap::new(),
31 unions: IndexMap::new(),
32 base_path: None,
33 include_stack: Vec::new(),
34 is_root_array: false,
35 }
36 }
37
38 pub fn with_base_path(mut self, path: &Path) -> Self {
39 self.base_path = path.parent().map(|p| p.to_path_buf());
40 self
41 }
42
43 pub fn parse(&mut self) -> Result<IndexMap<String, Value>> {
44 let mut result = IndexMap::new();
45
46 while !self.at_end() {
47 match self.current_kind() {
48 TokenKind::Directive(d) => {
49 let directive = d.clone();
50 self.advance();
51 match directive.as_str() {
52 "struct" => self.parse_struct_def()?,
53 "union" => self.parse_union_def()?,
54 "include" => {
55 let included = self.parse_include()?;
56 for (k, v) in included {
57 result.insert(k, v);
58 }
59 }
60 "root-array" => {
61 self.is_root_array = true;
63 }
64 _ => {
65 let directive_line = self.tokens[self.pos - 1].line;
70 if !self.at_end()
71 && self.current().line == directive_line
72 && self.can_start_value()
73 {
74 let _ = self.parse_value(0)?;
75 }
76 }
77 }
78 }
79 TokenKind::Word(_) | TokenKind::String(_) => {
80 let (key, value) = self.parse_pair(0)?;
81 result.insert(key, value);
82 }
83 TokenKind::Ref(r) => {
84 let ref_name = r.clone();
85 self.advance();
86 self.expect(TokenKind::Colon)?;
87 let value = self.parse_value(0)?;
88 result.insert(format!("!{}", ref_name), value);
89 }
90 TokenKind::Eof => break,
91 _ => { self.advance(); }
92 }
93 }
94
95 Ok(result)
96 }
97
98 pub fn into_schemas(self) -> IndexMap<String, Schema> {
99 self.schemas
100 }
101
102 pub fn into_unions(self) -> IndexMap<String, Union> {
103 self.unions
104 }
105
106 pub fn into_schemas_and_unions(self) -> (IndexMap<String, Schema>, IndexMap<String, Union>) {
108 (self.schemas, self.unions)
109 }
110
111 pub fn is_root_array(&self) -> bool {
113 self.is_root_array
114 }
115
116 fn parse_struct_def(&mut self) -> Result<()> {
121 let name = self.expect_word()?;
122 self.expect(TokenKind::LParen)?;
123
124 let mut schema = Schema::new(&name);
125
126 while !self.check(TokenKind::RParen) {
127 let field_name = match self.current_kind() {
129 TokenKind::Word(w) => {
130 let w = w.clone();
131 self.advance();
132 w
133 }
134 _ => return Err(Error::UnexpectedToken {
135 expected: "field name".to_string(),
136 got: format!("{:?}", self.current_kind()),
137 }),
138 };
139
140 let field_type = if self.check(TokenKind::Colon) {
141 self.advance();
142 self.parse_field_type()?
143 } else {
144 FieldType::new("string")
145 };
146
147 schema.add_field(field_name, field_type);
148
149 if self.check(TokenKind::Comma) {
150 self.advance();
151 }
152 }
153
154 self.expect(TokenKind::RParen)?;
155 self.schemas.insert(name, schema);
156 Ok(())
157 }
158
159 fn parse_union_def(&mut self) -> Result<()> {
164 let name = self.expect_word()?;
165 self.expect(TokenKind::LBrace)?;
166
167 let mut union_type = Union::new(&name);
168
169 while !self.check(TokenKind::RBrace) {
170 let variant_name = self.expect_word()?;
171 self.expect(TokenKind::LParen)?;
172
173 let mut variant = Variant::new(&variant_name);
174
175 while !self.check(TokenKind::RParen) {
176 let field_name = self.expect_word()?;
177
178 let field_type = if self.check(TokenKind::Colon) {
179 self.advance();
180 self.parse_field_type()?
181 } else {
182 FieldType::new("string")
183 };
184
185 variant.fields.push(Field::new(field_name, field_type));
186
187 if self.check(TokenKind::Comma) {
188 self.advance();
189 }
190 }
191
192 self.expect(TokenKind::RParen)?;
193 union_type.add_variant(variant);
194
195 if self.check(TokenKind::Comma) {
196 self.advance();
197 }
198 }
199
200 self.expect(TokenKind::RBrace)?;
201 self.unions.insert(name, union_type);
202 Ok(())
203 }
204
205 fn parse_include(&mut self) -> Result<IndexMap<String, Value>> {
210 let path_str = match self.current_kind() {
211 TokenKind::String(s) => s.clone(),
212 TokenKind::Word(w) => w.clone(),
213 _ => return Err(Error::UnexpectedToken {
214 expected: "file path".to_string(),
215 got: format!("{:?}", self.current_kind()),
216 }),
217 };
218 self.advance();
219
220 let include_path = if let Some(ref base) = self.base_path {
222 base.join(&path_str)
223 } else {
224 std::path::PathBuf::from(&path_str)
225 };
226
227 let canonical = include_path.canonicalize()
229 .unwrap_or_else(|_| include_path.clone());
230 if self.include_stack.contains(&canonical) {
231 return Err(Error::ParseError(format!(
232 "Circular include detected: {}", canonical.display()
233 )));
234 }
235 if self.include_stack.len() >= 32 {
236 return Err(Error::ParseError(
237 "Include depth exceeds limit of 32".into()
238 ));
239 }
240
241 let content = std::fs::read_to_string(&include_path)
243 .map_err(|e| Error::ParseError(format!("Failed to include {}: {}", path_str, e)))?;
244
245 let tokens = Lexer::new(&content).tokenize()?;
246 let mut parser = Parser::new(tokens);
247 if let Some(parent) = include_path.parent() {
248 parser.base_path = Some(parent.to_path_buf());
249 }
250 parser.include_stack = self.include_stack.clone();
253 parser.include_stack.push(canonical);
254 parser.schemas = self.schemas.clone();
255 parser.unions = self.unions.clone();
256
257 let data = parser.parse()?;
258
259 for (name, schema) in parser.schemas {
261 self.schemas.insert(name, schema);
262 }
263 for (name, union_type) in parser.unions {
264 self.unions.insert(name, union_type);
265 }
266
267 Ok(data)
268 }
269
270 fn parse_field_type(&mut self) -> Result<FieldType> {
271 let mut type_str = String::new();
272
273 if self.check(TokenKind::LBracket) {
275 self.advance();
276 self.expect(TokenKind::RBracket)?;
277 type_str.push_str("[]");
278 }
279
280 let base = self.expect_word()?;
282
283 match base.as_str() {
285 "object" | "map" | "tuple" | "ref" | "tagged" => {
286 return Err(Error::ParseError(
287 format!("'{}' is a value type and cannot be used as a schema field type", base)
288 ));
289 }
290 _ => {}
291 }
292
293 type_str.push_str(&base);
294
295 if self.check(TokenKind::Question) {
297 self.advance();
298 type_str.push('?');
299 }
300
301 Ok(FieldType::parse(&type_str))
302 }
303
304 fn parse_pair(&mut self, depth: usize) -> Result<(String, Value)> {
309 let key = match self.current_kind() {
310 TokenKind::Word(w) => w.clone(),
311 TokenKind::String(s) => s.clone(),
312 _ => return Err(Error::UnexpectedToken {
313 expected: "key".to_string(),
314 got: format!("{:?}", self.current_kind()),
315 }),
316 };
317 self.advance();
318 self.expect(TokenKind::Colon)?;
319 let value = self.parse_value(depth)?;
320 Ok((key, value))
321 }
322
323 fn parse_value(&mut self, depth: usize) -> Result<Value> {
328 if depth > MAX_PARSE_DEPTH {
329 return Err(Error::ParseError("maximum parse nesting depth exceeded".into()));
330 }
331 match self.current_kind() {
332 TokenKind::Null => { self.advance(); Ok(Value::Null) }
333 TokenKind::Bool(b) => { let b = *b; self.advance(); Ok(Value::Bool(b)) }
334 TokenKind::Int(i) => { let i = *i; self.advance(); Ok(Value::Int(i)) }
335 TokenKind::UInt(u) => { let u = *u; self.advance(); Ok(Value::UInt(u)) }
336 TokenKind::JsonNumber(s) => { let s = s.clone(); self.advance(); Ok(Value::JsonNumber(s)) }
337 TokenKind::Float(f) => { let f = *f; self.advance(); Ok(Value::Float(f)) }
338 TokenKind::String(s) => { let s = s.clone(); self.advance(); Ok(Value::String(s)) }
339 TokenKind::Bytes(b) => { let b = b.clone(); self.advance(); Ok(Value::Bytes(b)) }
340 TokenKind::Word(w) => { let w = w.clone(); self.advance(); Ok(Value::String(w)) }
341 TokenKind::Ref(r) => { let r = r.clone(); self.advance(); Ok(Value::Ref(r)) }
342 TokenKind::Timestamp(ts, tz) => { let ts = *ts; let tz = *tz; self.advance(); Ok(Value::Timestamp(ts, tz)) }
343 TokenKind::Tag(t) => {
344 let tag = t.clone();
345 self.advance();
346 let inner = self.parse_value(depth + 1)?;
347 Ok(Value::Tagged(tag, Box::new(inner)))
348 }
349 TokenKind::Directive(d) => {
350 let directive = d.clone();
351 self.advance();
352 self.parse_directive_value(&directive, depth)
353 }
354 TokenKind::LBrace => self.parse_object(depth + 1),
355 TokenKind::LBracket => self.parse_array(depth + 1),
356 TokenKind::LParen => self.parse_tuple(depth + 1),
357 _ => Err(Error::UnexpectedToken {
358 expected: "value".to_string(),
359 got: format!("{:?}", self.current_kind()),
360 }),
361 }
362 }
363
364 fn parse_directive_value(&mut self, directive: &str, depth: usize) -> Result<Value> {
365 match directive {
366 "table" => self.parse_table(depth),
367 "map" => self.parse_map(depth),
368 _ => {
369 if self.can_start_value() {
371 let _ = self.parse_value(depth)?;
372 }
373 Ok(Value::Null)
374 }
375 }
376 }
377
378 fn can_start_value(&self) -> bool {
380 matches!(
381 self.current_kind(),
382 TokenKind::Null
383 | TokenKind::Bool(_)
384 | TokenKind::Int(_)
385 | TokenKind::UInt(_)
386 | TokenKind::Float(_)
387 | TokenKind::String(_)
388 | TokenKind::Bytes(_)
389 | TokenKind::Word(_)
390 | TokenKind::Ref(_)
391 | TokenKind::Timestamp(_, _)
392 | TokenKind::JsonNumber(_)
393 | TokenKind::Tag(_)
394 | TokenKind::Directive(_)
395 | TokenKind::LBrace
396 | TokenKind::LBracket
397 | TokenKind::LParen
398 )
399 }
400
401 fn parse_map(&mut self, depth: usize) -> Result<Value> {
402 self.expect(TokenKind::LBrace)?;
403 let mut pairs = Vec::new();
404
405 while !self.check(TokenKind::RBrace) {
406 let key = match self.current_kind() {
409 TokenKind::String(s) => { let s = s.clone(); self.advance(); Value::String(s) }
410 TokenKind::Word(w) => { let w = w.clone(); self.advance(); Value::String(w) }
411 TokenKind::Int(i) => { let i = *i; self.advance(); Value::Int(i) }
412 TokenKind::UInt(u) => { let u = *u; self.advance(); Value::UInt(u) }
413 _ => return Err(Error::UnexpectedToken {
414 expected: "map key".to_string(),
415 got: format!("{:?}", self.current_kind()),
416 }),
417 };
418
419 self.expect(TokenKind::Colon)?;
420 let value = self.parse_value(depth + 1)?;
421 pairs.push((key, value));
422
423 if self.check(TokenKind::Comma) {
424 self.advance();
425 }
426 }
427
428 self.expect(TokenKind::RBrace)?;
429 Ok(Value::Map(pairs))
430 }
431
432 fn parse_table(&mut self, depth: usize) -> Result<Value> {
433 let struct_name = self.expect_word()?;
434 let schema = self.schemas
435 .get(&struct_name)
436 .ok_or_else(|| Error::UnknownStruct(struct_name.clone()))?
437 .clone();
438
439 self.expect(TokenKind::LBracket)?;
440
441 let mut rows = Vec::new();
442 while !self.check(TokenKind::RBracket) {
443 let row = self.parse_tuple_with_schema(&schema, depth + 1)?;
444 rows.push(row);
445 if self.check(TokenKind::Comma) {
446 self.advance();
447 }
448 }
449
450 self.expect(TokenKind::RBracket)?;
451 Ok(Value::Array(rows))
452 }
453
454 fn parse_tuple_with_schema(&mut self, schema: &Schema, depth: usize) -> Result<Value> {
455 self.expect(TokenKind::LParen)?;
456
457 let mut obj = ObjectMap::new();
458 for field in &schema.fields {
459 let value = self.parse_value_for_field(&field.field_type, depth)?;
460 obj.insert(field.name.clone(), value);
461 if self.check(TokenKind::Comma) {
462 self.advance();
463 }
464 }
465
466 self.expect(TokenKind::RParen)?;
467 Ok(Value::Object(obj))
468 }
469
470 fn parse_value_for_field(&mut self, field_type: &FieldType, depth: usize) -> Result<Value> {
471 if self.check(TokenKind::Null) {
473 self.advance();
474 return Ok(Value::Null);
475 }
476
477 if !field_type.is_array && self.check(TokenKind::LParen) {
482 if let Some(schema) = self.schemas.get(&field_type.base).cloned() {
483 return self.parse_tuple_with_schema(&schema, depth + 1);
484 }
485 }
486
487 if field_type.is_array {
489 self.expect(TokenKind::LBracket)?;
490 let mut arr = Vec::new();
491 let inner_type = FieldType::new(&field_type.base);
492 while !self.check(TokenKind::RBracket) {
493 arr.push(self.parse_value_for_field(&inner_type, depth + 1)?);
494 if self.check(TokenKind::Comma) {
495 self.advance();
496 }
497 }
498 self.expect(TokenKind::RBracket)?;
499 return Ok(Value::Array(arr));
500 }
501
502 self.parse_value(depth)
504 }
505
506 fn parse_object(&mut self, depth: usize) -> Result<Value> {
507 self.expect(TokenKind::LBrace)?;
508 let mut obj = ObjectMap::new();
509
510 while !self.check(TokenKind::RBrace) {
511 if let TokenKind::Ref(r) = self.current_kind() {
512 let key = format!("!{}", r);
513 self.advance();
514 self.expect(TokenKind::Colon)?;
515 let value = self.parse_value(depth)?;
516 obj.insert(key, value);
517 } else {
518 let (key, value) = self.parse_pair(depth)?;
519 obj.insert(key, value);
520 }
521 if self.check(TokenKind::Comma) {
522 self.advance();
523 }
524 }
525
526 self.expect(TokenKind::RBrace)?;
527 Ok(Value::Object(obj))
528 }
529
530 fn parse_array(&mut self, depth: usize) -> Result<Value> {
531 self.expect(TokenKind::LBracket)?;
532 let mut arr = Vec::new();
533
534 while !self.check(TokenKind::RBracket) {
535 arr.push(self.parse_value(depth)?);
536 if self.check(TokenKind::Comma) {
537 self.advance();
538 }
539 }
540
541 self.expect(TokenKind::RBracket)?;
542 Ok(Value::Array(arr))
543 }
544
545 fn parse_tuple(&mut self, depth: usize) -> Result<Value> {
546 self.expect(TokenKind::LParen)?;
547 let mut arr = Vec::new();
548
549 while !self.check(TokenKind::RParen) {
550 arr.push(self.parse_value(depth)?);
551 if self.check(TokenKind::Comma) {
552 self.advance();
553 }
554 }
555
556 self.expect(TokenKind::RParen)?;
557 Ok(Value::Array(arr))
558 }
559
560 fn current(&self) -> &Token {
565 self.tokens.get(self.pos).unwrap_or(&Token {
566 kind: TokenKind::Eof,
567 line: 0,
568 col: 0,
569 })
570 }
571
572 fn current_kind(&self) -> &TokenKind {
573 &self.current().kind
574 }
575
576 fn advance(&mut self) {
577 if self.pos < self.tokens.len() {
578 self.pos += 1;
579 }
580 }
581
582 fn check(&self, expected: TokenKind) -> bool {
583 std::mem::discriminant(self.current_kind()) == std::mem::discriminant(&expected)
584 }
585
586 fn expect(&mut self, expected: TokenKind) -> Result<()> {
587 if self.check(expected.clone()) {
588 self.advance();
589 Ok(())
590 } else {
591 Err(Error::UnexpectedToken {
592 expected: format!("{:?}", expected),
593 got: format!("{:?}", self.current_kind()),
594 })
595 }
596 }
597
598 fn expect_word(&mut self) -> Result<String> {
599 match self.current_kind() {
600 TokenKind::Word(w) => {
601 let w = w.clone();
602 self.advance();
603 Ok(w)
604 }
605 _ => Err(Error::UnexpectedToken {
606 expected: "word".to_string(),
607 got: format!("{:?}", self.current_kind()),
608 }),
609 }
610 }
611
612 fn at_end(&self) -> bool {
613 matches!(self.current_kind(), TokenKind::Eof)
614 }
615}
616
617#[cfg(test)]
618mod tests {
619 use super::*;
620 use crate::lexer::Lexer;
621
622 fn parse(input: &str) -> Result<IndexMap<String, Value>> {
623 let tokens = Lexer::new(input).tokenize()?;
624 Parser::new(tokens).parse()
625 }
626
627 #[test]
628 fn test_simple_values() {
629 let data = parse("a: 1, b: hello, c: true, d: ~").unwrap();
630 assert_eq!(data.get("a").unwrap().as_int(), Some(1));
631 assert_eq!(data.get("b").unwrap().as_str(), Some("hello"));
632 assert_eq!(data.get("c").unwrap().as_bool(), Some(true));
633 assert!(data.get("d").unwrap().is_null());
634 }
635
636 #[test]
637 fn test_object() {
638 let data = parse("obj: {x: 1, y: 2}").unwrap();
639 let obj = data.get("obj").unwrap().as_object().unwrap();
640 assert_eq!(obj.get("x").unwrap().as_int(), Some(1));
641 assert_eq!(obj.get("y").unwrap().as_int(), Some(2));
642 }
643
644 #[test]
645 fn test_array() {
646 let data = parse("arr: [1, 2, 3]").unwrap();
647 let arr = data.get("arr").unwrap().as_array().unwrap();
648 assert_eq!(arr.len(), 3);
649 assert_eq!(arr[0].as_int(), Some(1));
650 }
651
652 #[test]
653 fn test_struct_and_table() {
654 let input = r#"
655 @struct point (x: int, y: int)
656 points: @table point [
657 (1, 2),
658 (3, 4),
659 ]
660 "#;
661 let tokens = Lexer::new(input).tokenize().unwrap();
662 let mut parser = Parser::new(tokens);
663 let data = parser.parse().unwrap();
664
665 let points = data.get("points").unwrap().as_array().unwrap();
666 assert_eq!(points.len(), 2);
667
668 let p0 = points[0].as_object().unwrap();
669 assert_eq!(p0.get("x").unwrap().as_int(), Some(1));
670 assert_eq!(p0.get("y").unwrap().as_int(), Some(2));
671 }
672
673 #[test]
678 fn test_union_def() {
679 let input = r#"
680 @union Shape {
681 Circle(radius: float),
682 Rectangle(width: float, height: float),
683 Point(),
684 }
685 "#;
686 let tokens = Lexer::new(input).tokenize().unwrap();
687 let mut parser = Parser::new(tokens);
688 parser.parse().unwrap();
689 let unions = parser.into_unions();
690 let shape = unions.get("Shape").unwrap();
691 assert_eq!(shape.variants.len(), 3);
692 assert_eq!(shape.variants[0].name, "Circle");
693 assert_eq!(shape.variants[0].fields.len(), 1);
694 assert_eq!(shape.variants[1].name, "Rectangle");
695 assert_eq!(shape.variants[1].fields.len(), 2);
696 assert_eq!(shape.variants[2].name, "Point");
697 assert_eq!(shape.variants[2].fields.len(), 0);
698 }
699
700 #[test]
705 fn test_map_value() {
706 let data = parse("m: @map {1: one, 2: two}").unwrap();
707 let m = data.get("m").unwrap().as_map().unwrap();
708 assert_eq!(m.len(), 2);
709 assert_eq!(m[0].0.as_int(), Some(1));
710 assert_eq!(m[0].1.as_str(), Some("one"));
711 assert_eq!(m[1].0.as_int(), Some(2));
712 assert_eq!(m[1].1.as_str(), Some("two"));
713 }
714
715 #[test]
716 fn test_map_with_string_keys() {
717 let data = parse(r#"m: @map {"key1": 10, "key2": 20}"#).unwrap();
718 let m = data.get("m").unwrap().as_map().unwrap();
719 assert_eq!(m.len(), 2);
720 }
721
722 #[test]
723 fn test_map_empty() {
724 let data = parse("m: @map {}").unwrap();
725 let m = data.get("m").unwrap().as_map().unwrap();
726 assert_eq!(m.len(), 0);
727 }
728
729 #[test]
734 fn test_ref_value() {
735 let data = parse("config: !base_config").unwrap();
736 assert_eq!(data.get("config").unwrap().as_ref_name(), Some("base_config"));
737 }
738
739 #[test]
740 fn test_tagged_value() {
741 let data = parse("status: :ok 200").unwrap();
742 let (tag, inner) = data.get("status").unwrap().as_tagged().unwrap();
743 assert_eq!(tag, "ok");
744 assert_eq!(inner.as_int(), Some(200));
745 }
746
747 #[test]
748 fn test_tagged_null() {
749 let data = parse("status: :none ~").unwrap();
750 let (tag, inner) = data.get("status").unwrap().as_tagged().unwrap();
751 assert_eq!(tag, "none");
752 assert!(inner.is_null());
753 }
754
755 #[test]
760 fn test_tuple_value() {
761 let data = parse("point: (1, 2, 3)").unwrap();
762 let arr = data.get("point").unwrap().as_array().unwrap();
763 assert_eq!(arr.len(), 3);
764 assert_eq!(arr[0].as_int(), Some(1));
765 assert_eq!(arr[1].as_int(), Some(2));
766 assert_eq!(arr[2].as_int(), Some(3));
767 }
768
769 #[test]
770 fn test_nested_object() {
771 let data = parse("outer: {inner: {x: 1}}").unwrap();
772 let outer = data.get("outer").unwrap().as_object().unwrap();
773 let inner = outer.get("inner").unwrap().as_object().unwrap();
774 assert_eq!(inner.get("x").unwrap().as_int(), Some(1));
775 }
776
777 #[test]
778 fn test_nested_arrays() {
779 let data = parse("matrix: [[1, 2], [3, 4]]").unwrap();
780 let matrix = data.get("matrix").unwrap().as_array().unwrap();
781 assert_eq!(matrix.len(), 2);
782 let row0 = matrix[0].as_array().unwrap();
783 assert_eq!(row0[0].as_int(), Some(1));
784 }
785
786 #[test]
791 fn test_struct_with_nullable_field() {
792 let input = r#"
793 @struct user (name: string, email: string?)
794 users: @table user [
795 (alice, "a@test.com"),
796 (bob, ~),
797 ]
798 "#;
799 let tokens = Lexer::new(input).tokenize().unwrap();
800 let mut parser = Parser::new(tokens);
801 let data = parser.parse().unwrap();
802 let schemas = parser.into_schemas();
803
804 let schema = schemas.get("user").unwrap();
805 assert!(schema.fields[1].field_type.nullable);
806
807 let users = data.get("users").unwrap().as_array().unwrap();
808 assert_eq!(users.len(), 2);
809 assert!(users[1].as_object().unwrap().get("email").unwrap().is_null());
810 }
811
812 #[test]
813 fn test_struct_with_array_field() {
814 let input = r#"
815 @struct item (name: string, tags: []string)
816 items: @table item [
817 (widget, [cool, useful]),
818 ]
819 "#;
820 let tokens = Lexer::new(input).tokenize().unwrap();
821 let mut parser = Parser::new(tokens);
822 let data = parser.parse().unwrap();
823
824 let items = data.get("items").unwrap().as_array().unwrap();
825 let tags = items[0].as_object().unwrap().get("tags").unwrap().as_array().unwrap();
826 assert_eq!(tags.len(), 2);
827 }
828
829 #[test]
834 fn test_root_array_directive() {
835 let input = "@root-array\nroot: [1, 2, 3]";
836 let tokens = Lexer::new(input).tokenize().unwrap();
837 let mut parser = Parser::new(tokens);
838 parser.parse().unwrap();
839 assert!(parser.is_root_array());
840 }
841
842 #[test]
847 fn test_ref_key_at_top_level() {
848 let input = "!defaults: {theme: dark}";
849 let data = parse(input).unwrap();
850 assert!(data.contains_key("!defaults"));
851 let obj = data.get("!defaults").unwrap().as_object().unwrap();
852 assert_eq!(obj.get("theme").unwrap().as_str(), Some("dark"));
853 }
854
855 #[test]
860 fn test_string_key() {
861 let data = parse(r#""my key": 42"#).unwrap();
862 assert_eq!(data.get("my key").unwrap().as_int(), Some(42));
863 }
864
865 #[test]
870 fn test_unexpected_token_error() {
871 let result = parse("] invalid");
872 let _ = result;
875 }
876
877 #[test]
878 fn test_missing_colon_error() {
879 let input = "key value";
881 let result = parse(input);
882 assert!(result.is_err());
883 }
884
885 #[test]
886 fn test_unknown_struct_in_table() {
887 let input = "data: @table nonexistent [(1, 2)]";
888 let result = parse(input);
889 assert!(result.is_err());
890 }
891
892 #[test]
897 fn test_struct_field_without_type() {
898 let input = r#"
899 @struct simple (name, value)
900 items: @table simple [
901 (hello, world),
902 ]
903 "#;
904 let tokens = Lexer::new(input).tokenize().unwrap();
905 let mut parser = Parser::new(tokens);
906 let data = parser.parse().unwrap();
907 let schemas = parser.into_schemas();
908
909 let schema = schemas.get("simple").unwrap();
911 assert_eq!(schema.fields[0].field_type.base, "string");
912 assert_eq!(schema.fields[1].field_type.base, "string");
913
914 let items = data.get("items").unwrap().as_array().unwrap();
915 assert_eq!(items[0].as_object().unwrap().get("name").unwrap().as_str(), Some("hello"));
916 }
917
918 #[test]
923 fn test_unknown_directive_ignored() {
924 let data = parse("@custom_directive\nkey: value").unwrap();
926 assert_eq!(data.get("key").unwrap().as_str(), Some("value"));
927 }
928
929 #[test]
930 fn test_unknown_directive_consumes_same_line_argument() {
931 let data = parse("@custom foo\nkey: value").unwrap();
933 assert!(data.get("foo").is_none(), "foo should be consumed as directive arg, not a key");
934 assert_eq!(data.get("key").unwrap().as_str(), Some("value"));
935
936 let data = parse("@custom [1, 2, 3]\nkey: value").unwrap();
938 assert_eq!(data.get("key").unwrap().as_str(), Some("value"));
939
940 let data = parse("@custom {a: 1}\nkey: value").unwrap();
942 assert_eq!(data.get("key").unwrap().as_str(), Some("value"));
943
944 let data = parse("@custom\nkey: value").unwrap();
946 assert_eq!(data.get("key").unwrap().as_str(), Some("value"));
947
948 let data = parse("key: value\n@custom").unwrap();
950 assert_eq!(data.get("key").unwrap().as_str(), Some("value"));
951
952 let data = parse("@custom\nfoo: bar").unwrap();
954 assert_eq!(data.get("foo").unwrap().as_str(), Some("bar"));
955 }
956
957 #[test]
958 fn test_unknown_directive_value_consumes_argument() {
959 let data = parse("key: @unknown [1, 2, 3]\nother: 42").unwrap();
961 assert!(data.get("key").unwrap().is_null(), "unknown directive value should be null");
962 assert_eq!(data.get("other").unwrap().as_int(), Some(42), "next key should parse normally");
963
964 let data = parse("key: @unknown {a: 1}\nother: ok").unwrap();
966 assert!(data.get("key").unwrap().is_null());
967 assert_eq!(data.get("other").unwrap().as_str(), Some("ok"));
968
969 let data = parse("key: @unknown 42\nother: ok").unwrap();
971 assert!(data.get("key").unwrap().is_null());
972 assert_eq!(data.get("other").unwrap().as_str(), Some("ok"));
973
974 let data = parse("arr: [@unknown, 1, 2]").unwrap();
976 let arr = data.get("arr").unwrap().as_array().unwrap();
977 assert!(arr[0].is_null());
978 assert_eq!(arr[1].as_int(), Some(1));
979 }
980
981 #[test]
986 fn test_object_with_ref_key() {
987 let data = parse("obj: {!base: 1, key: 2}").unwrap();
988 let obj = data.get("obj").unwrap().as_object().unwrap();
989 assert!(obj.contains_key("!base"));
990 assert_eq!(obj.get("!base").unwrap().as_int(), Some(1));
991 assert_eq!(obj.get("key").unwrap().as_int(), Some(2));
992 }
993
994 #[test]
999 fn test_nested_struct_in_table() {
1000 let input = r#"
1001 @struct addr (city: string, zip: string)
1002 @struct person (name: string, home: addr)
1003 people: @table person [
1004 (alice, (Boston, "02101")),
1005 (bob, (NYC, "10001")),
1006 ]
1007 "#;
1008 let tokens = Lexer::new(input).tokenize().unwrap();
1009 let mut parser = Parser::new(tokens);
1010 let data = parser.parse().unwrap();
1011
1012 let people = data.get("people").unwrap().as_array().unwrap();
1013 let alice_home = people[0].as_object().unwrap().get("home").unwrap().as_object().unwrap();
1014 assert_eq!(alice_home.get("city").unwrap().as_str(), Some("Boston"));
1015 }
1016
1017 #[test]
1018 fn test_include_cycle_detection() {
1019 let dir = std::env::temp_dir();
1021 let file_path = dir.join("test_cycle_self.tl");
1022 std::fs::write(&file_path, "@include \"test_cycle_self.tl\"\nval: 1").unwrap();
1023
1024 let content = std::fs::read_to_string(&file_path).unwrap();
1025 let tokens = Lexer::new(&content).tokenize().unwrap();
1026 let mut parser = Parser::new(tokens).with_base_path(&file_path);
1027 let result = parser.parse();
1028 assert!(result.is_err(), "Should detect self-referencing include");
1029 let err_msg = result.unwrap_err().to_string();
1030 assert!(err_msg.contains("Circular include"), "Error should mention circular include: {}", err_msg);
1031
1032 std::fs::remove_file(&file_path).ok();
1033 }
1034
1035 #[test]
1036 fn test_include_mutual_cycle_detection() {
1037 let dir = std::env::temp_dir();
1039 let file_a = dir.join("test_cycle_a.tl");
1040 let file_b = dir.join("test_cycle_b.tl");
1041 std::fs::write(&file_a, "@include \"test_cycle_b.tl\"\na_val: 1").unwrap();
1042 std::fs::write(&file_b, "@include \"test_cycle_a.tl\"\nb_val: 2").unwrap();
1043
1044 let content = std::fs::read_to_string(&file_a).unwrap();
1045 let tokens = Lexer::new(&content).tokenize().unwrap();
1046 let mut parser = Parser::new(tokens).with_base_path(&file_a);
1047 let result = parser.parse();
1048 assert!(result.is_err(), "Should detect mutual cycle between A and B");
1049 let err_msg = result.unwrap_err().to_string();
1050 assert!(err_msg.contains("Circular include"), "Error should mention circular include: {}", err_msg);
1051
1052 std::fs::remove_file(&file_a).ok();
1053 std::fs::remove_file(&file_b).ok();
1054 }
1055
1056 #[test]
1057 fn test_include_stack_propagated_to_child() {
1058 let parser = Parser::new(vec![]);
1060 assert!(parser.include_stack.is_empty(), "New parser should have empty include stack");
1061 }
1062
1063 #[test]
1068 fn test_bytes_literal_value() {
1069 let data = parse(r#"payload: b"cafef00d""#).unwrap();
1070 let val = data.get("payload").unwrap();
1071 assert_eq!(val.as_bytes(), Some(&[0xca, 0xfe, 0xf0, 0x0d][..]));
1072 }
1073
1074 #[test]
1075 fn test_bytes_literal_empty_value() {
1076 let data = parse(r#"empty: b"""#).unwrap();
1077 let val = data.get("empty").unwrap();
1078 assert_eq!(val.as_bytes(), Some(&[][..]));
1079 }
1080
1081 #[test]
1082 fn test_bytes_literal_in_array() {
1083 let data = parse(r#"arr: [b"cafe", b"babe"]"#).unwrap();
1084 let arr = data.get("arr").unwrap().as_array().unwrap();
1085 assert_eq!(arr[0].as_bytes(), Some(&[0xca, 0xfe][..]));
1086 assert_eq!(arr[1].as_bytes(), Some(&[0xba, 0xbe][..]));
1087 }
1088
1089 #[test]
1090 fn test_bytes_literal_in_object() {
1091 let data = parse(r#"obj: {data: b"ff00"}"#).unwrap();
1092 let obj = data.get("obj").unwrap().as_object().unwrap();
1093 assert_eq!(obj.get("data").unwrap().as_bytes(), Some(&[0xff, 0x00][..]));
1094 }
1095
1096 #[test]
1101 fn test_fuzz_deeply_nested_arrays_no_stack_overflow() {
1102 let depth = 500;
1104 let input = format!("key: {}{}", "[".repeat(depth), "]".repeat(depth));
1105 let result = crate::TeaLeaf::parse(&input);
1106 match result {
1107 Err(e) => {
1108 let err = format!("{}", e);
1109 assert!(err.contains("nesting depth"), "Error should mention nesting depth: {}", err);
1110 }
1111 Ok(_) => panic!("Should fail with depth exceeded, not succeed"),
1112 }
1113 }
1114
1115 #[test]
1116 fn test_fuzz_deeply_nested_objects_no_stack_overflow() {
1117 let depth = 500;
1119 let mut input = String::from("key: ");
1120 for i in 0..depth {
1121 input.push_str(&format!("{{k{}: ", i));
1122 }
1123 input.push_str("1");
1124 for _ in 0..depth {
1125 input.push('}');
1126 }
1127 let result = crate::TeaLeaf::parse(&input);
1128 assert!(result.is_err(), "Should fail with depth exceeded, not stack overflow");
1129 }
1130
1131 #[test]
1132 fn test_fuzz_deeply_nested_tags_no_stack_overflow() {
1133 let depth = 500;
1135 let mut input = String::from("key: ");
1136 for i in 0..depth {
1137 input.push_str(&format!(":t{} ", i));
1138 }
1139 input.push_str("42");
1140 let result = crate::TeaLeaf::parse(&input);
1141 assert!(result.is_err(), "Should fail with depth exceeded, not stack overflow");
1142 }
1143
1144 #[test]
1145 fn test_parse_depth_256_succeeds() {
1146 let depth = 200;
1148 let input = format!("key: {}1{}", "[".repeat(depth), "]".repeat(depth));
1149 let result = crate::TeaLeaf::parse(&input);
1150 if let Err(e) = &result {
1151 panic!("200 levels of nesting should be fine: {}", e);
1152 }
1153 }
1154
1155 #[test]
1156 fn test_fuzz_crash_e42e_full_parse_no_panic() {
1157 let input = "\"0B\u{10}\u{3}#\"0BP\u{07FE}-----\u{061D}\u{07FE}\u{07FE}-----\u{061D}\u{3}#\"0B\u{10}\u{3}#\"0BP\u{07FE}-----\u{061D}\u{07FE}\u{07FE}-----\u{061D}\u{07FE}";
1160 let _ = crate::TeaLeaf::parse(input);
1161 }
1162
1163 #[test]
1164 fn test_fuzz_crash_d038_full_parse_no_panic() {
1165 let input = "z\" \"-\"\t; \"\"\")\"\"\" 8] ] 02)3313312)313-333-333-3332)33-133-3-33331333302)33";
1168 let _ = crate::TeaLeaf::parse(input);
1169 }
1170
1171 #[test]
1172 fn test_reject_value_only_schema_field_types() {
1173 for bad_type in &["object", "map", "tuple", "ref", "tagged"] {
1175 let input = format!("@struct Bad (field: {})\n", bad_type);
1176 let result = crate::TeaLeaf::parse(&input);
1177 assert!(result.is_err(), "should reject '{}' as schema field type", bad_type);
1178 let err = format!("{}", result.err().unwrap());
1179 assert!(err.contains("value type"), "error for '{}' should mention 'value type': {}", bad_type, err);
1180 }
1181 let result = crate::TeaLeaf::parse("@struct Bad (field: []object)\n");
1183 assert!(result.is_err(), "should reject '[]object' as schema field type");
1184
1185 for good_type in &["string", "int", "int8", "float", "bool", "bytes", "timestamp", "MyStruct"] {
1187 let input = format!("@struct Good (field: {})\n", good_type);
1188 assert!(crate::TeaLeaf::parse(&input).is_ok(), "'{}' should be accepted", good_type);
1189 }
1190 }
1191}