Skip to main content

tealeaf/
parser.rs

1//! Parser for TeaLeaf text format
2
3use std::path::Path;
4use indexmap::IndexMap;
5use crate::{Error, Result, Value, Schema, Field, FieldType, Union, Variant};
6use crate::types::ObjectMap;
7use crate::lexer::{Token, TokenKind, Lexer};
8
9/// Maximum recursion depth for nested parse_value calls (arrays, objects, maps, tuples, tags).
10/// Matches the binary reader's MAX_DECODE_DEPTH to ensure text↔binary parity.
11const MAX_PARSE_DEPTH: usize = 256;
12
13pub struct Parser {
14    tokens: Vec<Token>,
15    pos: usize,
16    schemas: IndexMap<String, Schema>,
17    unions: IndexMap<String, Union>,
18    base_path: Option<std::path::PathBuf>,
19    /// Tracks included file paths for cycle detection
20    include_stack: Vec<std::path::PathBuf>,
21    /// Indicates the source was a root-level JSON array (set by @root-array directive)
22    is_root_array: bool,
23}
24
25impl Parser {
26    pub fn new(tokens: Vec<Token>) -> Self {
27        Self {
28            tokens,
29            pos: 0,
30            schemas: IndexMap::new(),
31            unions: IndexMap::new(),
32            base_path: None,
33            include_stack: Vec::new(),
34            is_root_array: false,
35        }
36    }
37
38    pub fn with_base_path(mut self, path: &Path) -> Self {
39        self.base_path = path.parent().map(|p| p.to_path_buf());
40        self
41    }
42
43    pub fn parse(&mut self) -> Result<IndexMap<String, Value>> {
44        let mut result = IndexMap::new();
45
46        while !self.at_end() {
47            match self.current_kind() {
48                TokenKind::Directive(d) => {
49                    let directive = d.clone();
50                    self.advance();
51                    match directive.as_str() {
52                        "struct" => self.parse_struct_def()?,
53                        "union" => self.parse_union_def()?,
54                        "include" => {
55                            let included = self.parse_include()?;
56                            for (k, v) in included {
57                                result.insert(k, v);
58                            }
59                        }
60                        "root-array" => {
61                            // Marks this document as representing a root-level JSON array
62                            self.is_root_array = true;
63                        }
64                        _ => {
65                            // Unknown top-level directive: silently ignored (spec §1.18).
66                            // Consume same-line argument for forward compatibility —
67                            // a future directive like @custom foo should not leave
68                            // "foo" to be misparsed as a key.
69                            let directive_line = self.tokens[self.pos - 1].line;
70                            if !self.at_end()
71                                && self.current().line == directive_line
72                                && self.can_start_value()
73                            {
74                                let _ = self.parse_value(0)?;
75                            }
76                        }
77                    }
78                }
79                TokenKind::Word(_) | TokenKind::String(_) => {
80                    let (key, value) = self.parse_pair(0)?;
81                    result.insert(key, value);
82                }
83                TokenKind::Ref(r) => {
84                    let ref_name = r.clone();
85                    self.advance();
86                    self.expect(TokenKind::Colon)?;
87                    let value = self.parse_value(0)?;
88                    result.insert(format!("!{}", ref_name), value);
89                }
90                TokenKind::Eof => break,
91                _ => { self.advance(); }
92            }
93        }
94
95        Ok(result)
96    }
97
98    pub fn into_schemas(self) -> IndexMap<String, Schema> {
99        self.schemas
100    }
101
102    pub fn into_unions(self) -> IndexMap<String, Union> {
103        self.unions
104    }
105
106    /// Consume the parser and return both schemas and unions.
107    pub fn into_schemas_and_unions(self) -> (IndexMap<String, Schema>, IndexMap<String, Union>) {
108        (self.schemas, self.unions)
109    }
110
111    /// Check if the @root-array directive was present
112    pub fn is_root_array(&self) -> bool {
113        self.is_root_array
114    }
115
116    // =========================================================================
117    // Struct Definition
118    // =========================================================================
119
120    fn parse_struct_def(&mut self) -> Result<()> {
121        let name = self.expect_word()?;
122        self.expect(TokenKind::LParen)?;
123
124        let mut schema = Schema::new(&name);
125
126        while !self.check(TokenKind::RParen) {
127            // Field names must be unquoted names per spec grammar
128            let field_name = match self.current_kind() {
129                TokenKind::Word(w) => {
130                    let w = w.clone();
131                    self.advance();
132                    w
133                }
134                _ => return Err(Error::UnexpectedToken {
135                    expected: "field name".to_string(),
136                    got: format!("{:?}", self.current_kind()),
137                }),
138            };
139
140            let field_type = if self.check(TokenKind::Colon) {
141                self.advance();
142                self.parse_field_type()?
143            } else {
144                FieldType::new("string")
145            };
146
147            schema.add_field(field_name, field_type);
148
149            if self.check(TokenKind::Comma) {
150                self.advance();
151            }
152        }
153
154        self.expect(TokenKind::RParen)?;
155        self.schemas.insert(name, schema);
156        Ok(())
157    }
158
159    // =========================================================================
160    // Union Definition
161    // =========================================================================
162
163    fn parse_union_def(&mut self) -> Result<()> {
164        let name = self.expect_word()?;
165        self.expect(TokenKind::LBrace)?;
166
167        let mut union_type = Union::new(&name);
168
169        while !self.check(TokenKind::RBrace) {
170            let variant_name = self.expect_word()?;
171            self.expect(TokenKind::LParen)?;
172
173            let mut variant = Variant::new(&variant_name);
174
175            while !self.check(TokenKind::RParen) {
176                let field_name = self.expect_word()?;
177
178                let field_type = if self.check(TokenKind::Colon) {
179                    self.advance();
180                    self.parse_field_type()?
181                } else {
182                    FieldType::new("string")
183                };
184
185                variant.fields.push(Field::new(field_name, field_type));
186
187                if self.check(TokenKind::Comma) {
188                    self.advance();
189                }
190            }
191
192            self.expect(TokenKind::RParen)?;
193            union_type.add_variant(variant);
194
195            if self.check(TokenKind::Comma) {
196                self.advance();
197            }
198        }
199
200        self.expect(TokenKind::RBrace)?;
201        self.unions.insert(name, union_type);
202        Ok(())
203    }
204
205    // =========================================================================
206    // Include Directive
207    // =========================================================================
208
209    fn parse_include(&mut self) -> Result<IndexMap<String, Value>> {
210        let path_str = match self.current_kind() {
211            TokenKind::String(s) => s.clone(),
212            TokenKind::Word(w) => w.clone(),
213            _ => return Err(Error::UnexpectedToken {
214                expected: "file path".to_string(),
215                got: format!("{:?}", self.current_kind()),
216            }),
217        };
218        self.advance();
219
220        // Resolve path relative to current file
221        let include_path = if let Some(ref base) = self.base_path {
222            base.join(&path_str)
223        } else {
224            std::path::PathBuf::from(&path_str)
225        };
226
227        // Cycle detection and depth limit
228        let canonical = include_path.canonicalize()
229            .unwrap_or_else(|_| include_path.clone());
230        if self.include_stack.contains(&canonical) {
231            return Err(Error::ParseError(format!(
232                "Circular include detected: {}", canonical.display()
233            )));
234        }
235        if self.include_stack.len() >= 32 {
236            return Err(Error::ParseError(
237                "Include depth exceeds limit of 32".into()
238            ));
239        }
240
241        // Read and parse the included file
242        let content = std::fs::read_to_string(&include_path)
243            .map_err(|e| Error::ParseError(format!("Failed to include {}: {}", path_str, e)))?;
244
245        let tokens = Lexer::new(&content).tokenize()?;
246        let mut parser = Parser::new(tokens);
247        if let Some(parent) = include_path.parent() {
248            parser.base_path = Some(parent.to_path_buf());
249        }
250        // Propagate include stack and accumulated schemas/unions to child parser
251        // so that schemas from earlier includes are available in later includes.
252        parser.include_stack = self.include_stack.clone();
253        parser.include_stack.push(canonical);
254        parser.schemas = self.schemas.clone();
255        parser.unions = self.unions.clone();
256
257        let data = parser.parse()?;
258
259        // Merge schemas and unions
260        for (name, schema) in parser.schemas {
261            self.schemas.insert(name, schema);
262        }
263        for (name, union_type) in parser.unions {
264            self.unions.insert(name, union_type);
265        }
266
267        Ok(data)
268    }
269
270    fn parse_field_type(&mut self) -> Result<FieldType> {
271        let mut type_str = String::new();
272
273        // Handle array prefix
274        if self.check(TokenKind::LBracket) {
275            self.advance();
276            self.expect(TokenKind::RBracket)?;
277            type_str.push_str("[]");
278        }
279
280        // Base type
281        let base = self.expect_word()?;
282
283        // Reject value-only types that cannot be schema field types (spec §2.1)
284        match base.as_str() {
285            "object" | "map" | "tuple" | "ref" | "tagged" => {
286                return Err(Error::ParseError(
287                    format!("'{}' is a value type and cannot be used as a schema field type", base)
288                ));
289            }
290            _ => {}
291        }
292
293        type_str.push_str(&base);
294
295        // Nullable suffix
296        if self.check(TokenKind::Question) {
297            self.advance();
298            type_str.push('?');
299        }
300
301        Ok(FieldType::parse(&type_str))
302    }
303
304    // =========================================================================
305    // Key-Value Pairs
306    // =========================================================================
307
308    fn parse_pair(&mut self, depth: usize) -> Result<(String, Value)> {
309        let key = match self.current_kind() {
310            TokenKind::Word(w) => w.clone(),
311            TokenKind::String(s) => s.clone(),
312            _ => return Err(Error::UnexpectedToken {
313                expected: "key".to_string(),
314                got: format!("{:?}", self.current_kind()),
315            }),
316        };
317        self.advance();
318        self.expect(TokenKind::Colon)?;
319        let value = self.parse_value(depth)?;
320        Ok((key, value))
321    }
322
323    // =========================================================================
324    // Values
325    // =========================================================================
326
327    fn parse_value(&mut self, depth: usize) -> Result<Value> {
328        if depth > MAX_PARSE_DEPTH {
329            return Err(Error::ParseError("maximum parse nesting depth exceeded".into()));
330        }
331        match self.current_kind() {
332            TokenKind::Null => { self.advance(); Ok(Value::Null) }
333            TokenKind::Bool(b) => { let b = *b; self.advance(); Ok(Value::Bool(b)) }
334            TokenKind::Int(i) => { let i = *i; self.advance(); Ok(Value::Int(i)) }
335            TokenKind::UInt(u) => { let u = *u; self.advance(); Ok(Value::UInt(u)) }
336            TokenKind::JsonNumber(s) => { let s = s.clone(); self.advance(); Ok(Value::JsonNumber(s)) }
337            TokenKind::Float(f) => { let f = *f; self.advance(); Ok(Value::Float(f)) }
338            TokenKind::String(s) => { let s = s.clone(); self.advance(); Ok(Value::String(s)) }
339            TokenKind::Bytes(b) => { let b = b.clone(); self.advance(); Ok(Value::Bytes(b)) }
340            TokenKind::Word(w) => { let w = w.clone(); self.advance(); Ok(Value::String(w)) }
341            TokenKind::Ref(r) => { let r = r.clone(); self.advance(); Ok(Value::Ref(r)) }
342            TokenKind::Timestamp(ts, tz) => { let ts = *ts; let tz = *tz; self.advance(); Ok(Value::Timestamp(ts, tz)) }
343            TokenKind::Tag(t) => {
344                let tag = t.clone();
345                self.advance();
346                let inner = self.parse_value(depth + 1)?;
347                Ok(Value::Tagged(tag, Box::new(inner)))
348            }
349            TokenKind::Directive(d) => {
350                let directive = d.clone();
351                self.advance();
352                self.parse_directive_value(&directive, depth)
353            }
354            TokenKind::LBrace => self.parse_object(depth + 1),
355            TokenKind::LBracket => self.parse_array(depth + 1),
356            TokenKind::LParen => self.parse_tuple(depth + 1),
357            _ => Err(Error::UnexpectedToken {
358                expected: "value".to_string(),
359                got: format!("{:?}", self.current_kind()),
360            }),
361        }
362    }
363
364    fn parse_directive_value(&mut self, directive: &str, depth: usize) -> Result<Value> {
365        match directive {
366            "table" => self.parse_table(depth),
367            "map" => self.parse_map(depth),
368            _ => {
369                // Unknown directive in value position: consume argument, return null (spec §1.18)
370                if self.can_start_value() {
371                    let _ = self.parse_value(depth)?;
372                }
373                Ok(Value::Null)
374            }
375        }
376    }
377
378    /// Returns true if the current token can begin a value expression.
379    fn can_start_value(&self) -> bool {
380        matches!(
381            self.current_kind(),
382            TokenKind::Null
383                | TokenKind::Bool(_)
384                | TokenKind::Int(_)
385                | TokenKind::UInt(_)
386                | TokenKind::Float(_)
387                | TokenKind::String(_)
388                | TokenKind::Bytes(_)
389                | TokenKind::Word(_)
390                | TokenKind::Ref(_)
391                | TokenKind::Timestamp(_, _)
392                | TokenKind::JsonNumber(_)
393                | TokenKind::Tag(_)
394                | TokenKind::Directive(_)
395                | TokenKind::LBrace
396                | TokenKind::LBracket
397                | TokenKind::LParen
398        )
399    }
400
401    fn parse_map(&mut self, depth: usize) -> Result<Value> {
402        self.expect(TokenKind::LBrace)?;
403        let mut pairs = Vec::new();
404
405        while !self.check(TokenKind::RBrace) {
406            // Parse key (string, name, or integer per spec grammar:
407            // map_key = string | name | integer)
408            let key = match self.current_kind() {
409                TokenKind::String(s) => { let s = s.clone(); self.advance(); Value::String(s) }
410                TokenKind::Word(w) => { let w = w.clone(); self.advance(); Value::String(w) }
411                TokenKind::Int(i) => { let i = *i; self.advance(); Value::Int(i) }
412                TokenKind::UInt(u) => { let u = *u; self.advance(); Value::UInt(u) }
413                _ => return Err(Error::UnexpectedToken {
414                    expected: "map key".to_string(),
415                    got: format!("{:?}", self.current_kind()),
416                }),
417            };
418
419            self.expect(TokenKind::Colon)?;
420            let value = self.parse_value(depth + 1)?;
421            pairs.push((key, value));
422
423            if self.check(TokenKind::Comma) {
424                self.advance();
425            }
426        }
427
428        self.expect(TokenKind::RBrace)?;
429        Ok(Value::Map(pairs))
430    }
431
432    fn parse_table(&mut self, depth: usize) -> Result<Value> {
433        let struct_name = self.expect_word()?;
434        let schema = self.schemas
435            .get(&struct_name)
436            .ok_or_else(|| Error::UnknownStruct(struct_name.clone()))?
437            .clone();
438
439        self.expect(TokenKind::LBracket)?;
440
441        let mut rows = Vec::new();
442        while !self.check(TokenKind::RBracket) {
443            let row = self.parse_tuple_with_schema(&schema, depth + 1)?;
444            rows.push(row);
445            if self.check(TokenKind::Comma) {
446                self.advance();
447            }
448        }
449
450        self.expect(TokenKind::RBracket)?;
451        Ok(Value::Array(rows))
452    }
453
454    fn parse_tuple_with_schema(&mut self, schema: &Schema, depth: usize) -> Result<Value> {
455        self.expect(TokenKind::LParen)?;
456
457        let mut obj = ObjectMap::new();
458        for field in &schema.fields {
459            let value = self.parse_value_for_field(&field.field_type, depth)?;
460            obj.insert(field.name.clone(), value);
461            if self.check(TokenKind::Comma) {
462                self.advance();
463            }
464        }
465
466        self.expect(TokenKind::RParen)?;
467        Ok(Value::Object(obj))
468    }
469
470    fn parse_value_for_field(&mut self, field_type: &FieldType, depth: usize) -> Result<Value> {
471        // Handle null
472        if self.check(TokenKind::Null) {
473            self.advance();
474            return Ok(Value::Null);
475        }
476
477        // Handle nested struct — schema names shadow built-in types, so check
478        // by name rather than relying on is_struct() which lacks schema context.
479        // The LParen guard disambiguates: struct tuples always start with `(`,
480        // while primitive values (int, bool, string, etc.) never do.
481        if !field_type.is_array && self.check(TokenKind::LParen) {
482            if let Some(schema) = self.schemas.get(&field_type.base).cloned() {
483                return self.parse_tuple_with_schema(&schema, depth + 1);
484            }
485        }
486
487        // Handle array
488        if field_type.is_array {
489            self.expect(TokenKind::LBracket)?;
490            let mut arr = Vec::new();
491            let inner_type = FieldType::new(&field_type.base);
492            while !self.check(TokenKind::RBracket) {
493                arr.push(self.parse_value_for_field(&inner_type, depth + 1)?);
494                if self.check(TokenKind::Comma) {
495                    self.advance();
496                }
497            }
498            self.expect(TokenKind::RBracket)?;
499            return Ok(Value::Array(arr));
500        }
501
502        // Regular value
503        self.parse_value(depth)
504    }
505
506    fn parse_object(&mut self, depth: usize) -> Result<Value> {
507        self.expect(TokenKind::LBrace)?;
508        let mut obj = ObjectMap::new();
509
510        while !self.check(TokenKind::RBrace) {
511            if let TokenKind::Ref(r) = self.current_kind() {
512                let key = format!("!{}", r);
513                self.advance();
514                self.expect(TokenKind::Colon)?;
515                let value = self.parse_value(depth)?;
516                obj.insert(key, value);
517            } else {
518                let (key, value) = self.parse_pair(depth)?;
519                obj.insert(key, value);
520            }
521            if self.check(TokenKind::Comma) {
522                self.advance();
523            }
524        }
525
526        self.expect(TokenKind::RBrace)?;
527        Ok(Value::Object(obj))
528    }
529
530    fn parse_array(&mut self, depth: usize) -> Result<Value> {
531        self.expect(TokenKind::LBracket)?;
532        let mut arr = Vec::new();
533
534        while !self.check(TokenKind::RBracket) {
535            arr.push(self.parse_value(depth)?);
536            if self.check(TokenKind::Comma) {
537                self.advance();
538            }
539        }
540
541        self.expect(TokenKind::RBracket)?;
542        Ok(Value::Array(arr))
543    }
544
545    fn parse_tuple(&mut self, depth: usize) -> Result<Value> {
546        self.expect(TokenKind::LParen)?;
547        let mut arr = Vec::new();
548
549        while !self.check(TokenKind::RParen) {
550            arr.push(self.parse_value(depth)?);
551            if self.check(TokenKind::Comma) {
552                self.advance();
553            }
554        }
555
556        self.expect(TokenKind::RParen)?;
557        Ok(Value::Array(arr))
558    }
559
560    // =========================================================================
561    // Helpers
562    // =========================================================================
563
564    fn current(&self) -> &Token {
565        self.tokens.get(self.pos).unwrap_or(&Token {
566            kind: TokenKind::Eof,
567            line: 0,
568            col: 0,
569        })
570    }
571
572    fn current_kind(&self) -> &TokenKind {
573        &self.current().kind
574    }
575
576    fn advance(&mut self) {
577        if self.pos < self.tokens.len() {
578            self.pos += 1;
579        }
580    }
581
582    fn check(&self, expected: TokenKind) -> bool {
583        std::mem::discriminant(self.current_kind()) == std::mem::discriminant(&expected)
584    }
585
586    fn expect(&mut self, expected: TokenKind) -> Result<()> {
587        if self.check(expected.clone()) {
588            self.advance();
589            Ok(())
590        } else {
591            Err(Error::UnexpectedToken {
592                expected: format!("{:?}", expected),
593                got: format!("{:?}", self.current_kind()),
594            })
595        }
596    }
597
598    fn expect_word(&mut self) -> Result<String> {
599        match self.current_kind() {
600            TokenKind::Word(w) => {
601                let w = w.clone();
602                self.advance();
603                Ok(w)
604            }
605            _ => Err(Error::UnexpectedToken {
606                expected: "word".to_string(),
607                got: format!("{:?}", self.current_kind()),
608            }),
609        }
610    }
611
612    fn at_end(&self) -> bool {
613        matches!(self.current_kind(), TokenKind::Eof)
614    }
615}
616
617#[cfg(test)]
618mod tests {
619    use super::*;
620    use crate::lexer::Lexer;
621
622    fn parse(input: &str) -> Result<IndexMap<String, Value>> {
623        let tokens = Lexer::new(input).tokenize()?;
624        Parser::new(tokens).parse()
625    }
626
627    #[test]
628    fn test_simple_values() {
629        let data = parse("a: 1, b: hello, c: true, d: ~").unwrap();
630        assert_eq!(data.get("a").unwrap().as_int(), Some(1));
631        assert_eq!(data.get("b").unwrap().as_str(), Some("hello"));
632        assert_eq!(data.get("c").unwrap().as_bool(), Some(true));
633        assert!(data.get("d").unwrap().is_null());
634    }
635
636    #[test]
637    fn test_object() {
638        let data = parse("obj: {x: 1, y: 2}").unwrap();
639        let obj = data.get("obj").unwrap().as_object().unwrap();
640        assert_eq!(obj.get("x").unwrap().as_int(), Some(1));
641        assert_eq!(obj.get("y").unwrap().as_int(), Some(2));
642    }
643
644    #[test]
645    fn test_array() {
646        let data = parse("arr: [1, 2, 3]").unwrap();
647        let arr = data.get("arr").unwrap().as_array().unwrap();
648        assert_eq!(arr.len(), 3);
649        assert_eq!(arr[0].as_int(), Some(1));
650    }
651
652    #[test]
653    fn test_struct_and_table() {
654        let input = r#"
655            @struct point (x: int, y: int)
656            points: @table point [
657                (1, 2),
658                (3, 4),
659            ]
660        "#;
661        let tokens = Lexer::new(input).tokenize().unwrap();
662        let mut parser = Parser::new(tokens);
663        let data = parser.parse().unwrap();
664
665        let points = data.get("points").unwrap().as_array().unwrap();
666        assert_eq!(points.len(), 2);
667
668        let p0 = points[0].as_object().unwrap();
669        assert_eq!(p0.get("x").unwrap().as_int(), Some(1));
670        assert_eq!(p0.get("y").unwrap().as_int(), Some(2));
671    }
672
673    // -------------------------------------------------------------------------
674    // Union parsing
675    // -------------------------------------------------------------------------
676
677    #[test]
678    fn test_union_def() {
679        let input = r#"
680            @union Shape {
681                Circle(radius: float),
682                Rectangle(width: float, height: float),
683                Point(),
684            }
685        "#;
686        let tokens = Lexer::new(input).tokenize().unwrap();
687        let mut parser = Parser::new(tokens);
688        parser.parse().unwrap();
689        let unions = parser.into_unions();
690        let shape = unions.get("Shape").unwrap();
691        assert_eq!(shape.variants.len(), 3);
692        assert_eq!(shape.variants[0].name, "Circle");
693        assert_eq!(shape.variants[0].fields.len(), 1);
694        assert_eq!(shape.variants[1].name, "Rectangle");
695        assert_eq!(shape.variants[1].fields.len(), 2);
696        assert_eq!(shape.variants[2].name, "Point");
697        assert_eq!(shape.variants[2].fields.len(), 0);
698    }
699
700    // -------------------------------------------------------------------------
701    // Map parsing
702    // -------------------------------------------------------------------------
703
704    #[test]
705    fn test_map_value() {
706        let data = parse("m: @map {1: one, 2: two}").unwrap();
707        let m = data.get("m").unwrap().as_map().unwrap();
708        assert_eq!(m.len(), 2);
709        assert_eq!(m[0].0.as_int(), Some(1));
710        assert_eq!(m[0].1.as_str(), Some("one"));
711        assert_eq!(m[1].0.as_int(), Some(2));
712        assert_eq!(m[1].1.as_str(), Some("two"));
713    }
714
715    #[test]
716    fn test_map_with_string_keys() {
717        let data = parse(r#"m: @map {"key1": 10, "key2": 20}"#).unwrap();
718        let m = data.get("m").unwrap().as_map().unwrap();
719        assert_eq!(m.len(), 2);
720    }
721
722    #[test]
723    fn test_map_empty() {
724        let data = parse("m: @map {}").unwrap();
725        let m = data.get("m").unwrap().as_map().unwrap();
726        assert_eq!(m.len(), 0);
727    }
728
729    // -------------------------------------------------------------------------
730    // Ref and Tagged values
731    // -------------------------------------------------------------------------
732
733    #[test]
734    fn test_ref_value() {
735        let data = parse("config: !base_config").unwrap();
736        assert_eq!(data.get("config").unwrap().as_ref_name(), Some("base_config"));
737    }
738
739    #[test]
740    fn test_tagged_value() {
741        let data = parse("status: :ok 200").unwrap();
742        let (tag, inner) = data.get("status").unwrap().as_tagged().unwrap();
743        assert_eq!(tag, "ok");
744        assert_eq!(inner.as_int(), Some(200));
745    }
746
747    #[test]
748    fn test_tagged_null() {
749        let data = parse("status: :none ~").unwrap();
750        let (tag, inner) = data.get("status").unwrap().as_tagged().unwrap();
751        assert_eq!(tag, "none");
752        assert!(inner.is_null());
753    }
754
755    // -------------------------------------------------------------------------
756    // Tuple and nested structures
757    // -------------------------------------------------------------------------
758
759    #[test]
760    fn test_tuple_value() {
761        let data = parse("point: (1, 2, 3)").unwrap();
762        let arr = data.get("point").unwrap().as_array().unwrap();
763        assert_eq!(arr.len(), 3);
764        assert_eq!(arr[0].as_int(), Some(1));
765        assert_eq!(arr[1].as_int(), Some(2));
766        assert_eq!(arr[2].as_int(), Some(3));
767    }
768
769    #[test]
770    fn test_nested_object() {
771        let data = parse("outer: {inner: {x: 1}}").unwrap();
772        let outer = data.get("outer").unwrap().as_object().unwrap();
773        let inner = outer.get("inner").unwrap().as_object().unwrap();
774        assert_eq!(inner.get("x").unwrap().as_int(), Some(1));
775    }
776
777    #[test]
778    fn test_nested_arrays() {
779        let data = parse("matrix: [[1, 2], [3, 4]]").unwrap();
780        let matrix = data.get("matrix").unwrap().as_array().unwrap();
781        assert_eq!(matrix.len(), 2);
782        let row0 = matrix[0].as_array().unwrap();
783        assert_eq!(row0[0].as_int(), Some(1));
784    }
785
786    // -------------------------------------------------------------------------
787    // Struct fields with types
788    // -------------------------------------------------------------------------
789
790    #[test]
791    fn test_struct_with_nullable_field() {
792        let input = r#"
793            @struct user (name: string, email: string?)
794            users: @table user [
795                (alice, "a@test.com"),
796                (bob, ~),
797            ]
798        "#;
799        let tokens = Lexer::new(input).tokenize().unwrap();
800        let mut parser = Parser::new(tokens);
801        let data = parser.parse().unwrap();
802        let schemas = parser.into_schemas();
803
804        let schema = schemas.get("user").unwrap();
805        assert!(schema.fields[1].field_type.nullable);
806
807        let users = data.get("users").unwrap().as_array().unwrap();
808        assert_eq!(users.len(), 2);
809        assert!(users[1].as_object().unwrap().get("email").unwrap().is_null());
810    }
811
812    #[test]
813    fn test_struct_with_array_field() {
814        let input = r#"
815            @struct item (name: string, tags: []string)
816            items: @table item [
817                (widget, [cool, useful]),
818            ]
819        "#;
820        let tokens = Lexer::new(input).tokenize().unwrap();
821        let mut parser = Parser::new(tokens);
822        let data = parser.parse().unwrap();
823
824        let items = data.get("items").unwrap().as_array().unwrap();
825        let tags = items[0].as_object().unwrap().get("tags").unwrap().as_array().unwrap();
826        assert_eq!(tags.len(), 2);
827    }
828
829    // -------------------------------------------------------------------------
830    // Root-array directive
831    // -------------------------------------------------------------------------
832
833    #[test]
834    fn test_root_array_directive() {
835        let input = "@root-array\nroot: [1, 2, 3]";
836        let tokens = Lexer::new(input).tokenize().unwrap();
837        let mut parser = Parser::new(tokens);
838        parser.parse().unwrap();
839        assert!(parser.is_root_array());
840    }
841
842    // -------------------------------------------------------------------------
843    // Ref key at top level
844    // -------------------------------------------------------------------------
845
846    #[test]
847    fn test_ref_key_at_top_level() {
848        let input = "!defaults: {theme: dark}";
849        let data = parse(input).unwrap();
850        assert!(data.contains_key("!defaults"));
851        let obj = data.get("!defaults").unwrap().as_object().unwrap();
852        assert_eq!(obj.get("theme").unwrap().as_str(), Some("dark"));
853    }
854
855    // -------------------------------------------------------------------------
856    // String keys
857    // -------------------------------------------------------------------------
858
859    #[test]
860    fn test_string_key() {
861        let data = parse(r#""my key": 42"#).unwrap();
862        assert_eq!(data.get("my key").unwrap().as_int(), Some(42));
863    }
864
865    // -------------------------------------------------------------------------
866    // Error cases
867    // -------------------------------------------------------------------------
868
869    #[test]
870    fn test_unexpected_token_error() {
871        let result = parse("] invalid");
872        // The parser may skip unexpected tokens or error
873        // Just ensure it doesn't panic
874        let _ = result;
875    }
876
877    #[test]
878    fn test_missing_colon_error() {
879        // A word followed by a value without colon
880        let input = "key value";
881        let result = parse(input);
882        assert!(result.is_err());
883    }
884
885    #[test]
886    fn test_unknown_struct_in_table() {
887        let input = "data: @table nonexistent [(1, 2)]";
888        let result = parse(input);
889        assert!(result.is_err());
890    }
891
892    // -------------------------------------------------------------------------
893    // Struct field type defaults
894    // -------------------------------------------------------------------------
895
896    #[test]
897    fn test_struct_field_without_type() {
898        let input = r#"
899            @struct simple (name, value)
900            items: @table simple [
901                (hello, world),
902            ]
903        "#;
904        let tokens = Lexer::new(input).tokenize().unwrap();
905        let mut parser = Parser::new(tokens);
906        let data = parser.parse().unwrap();
907        let schemas = parser.into_schemas();
908
909        // Fields without explicit type default to "string"
910        let schema = schemas.get("simple").unwrap();
911        assert_eq!(schema.fields[0].field_type.base, "string");
912        assert_eq!(schema.fields[1].field_type.base, "string");
913
914        let items = data.get("items").unwrap().as_array().unwrap();
915        assert_eq!(items[0].as_object().unwrap().get("name").unwrap().as_str(), Some("hello"));
916    }
917
918    // -------------------------------------------------------------------------
919    // Unknown directive
920    // -------------------------------------------------------------------------
921
922    #[test]
923    fn test_unknown_directive_ignored() {
924        // Directive on its own line — next line is a key-value, not an argument
925        let data = parse("@custom_directive\nkey: value").unwrap();
926        assert_eq!(data.get("key").unwrap().as_str(), Some("value"));
927    }
928
929    #[test]
930    fn test_unknown_directive_consumes_same_line_argument() {
931        // Same-line word argument: consumed, not misparsed as a key
932        let data = parse("@custom foo\nkey: value").unwrap();
933        assert!(data.get("foo").is_none(), "foo should be consumed as directive arg, not a key");
934        assert_eq!(data.get("key").unwrap().as_str(), Some("value"));
935
936        // Same-line array argument
937        let data = parse("@custom [1, 2, 3]\nkey: value").unwrap();
938        assert_eq!(data.get("key").unwrap().as_str(), Some("value"));
939
940        // Same-line object argument
941        let data = parse("@custom {a: 1}\nkey: value").unwrap();
942        assert_eq!(data.get("key").unwrap().as_str(), Some("value"));
943
944        // No argument (directive alone on line)
945        let data = parse("@custom\nkey: value").unwrap();
946        assert_eq!(data.get("key").unwrap().as_str(), Some("value"));
947
948        // No argument (directive at end of file)
949        let data = parse("key: value\n@custom").unwrap();
950        assert_eq!(data.get("key").unwrap().as_str(), Some("value"));
951
952        // Argument on next line: NOT consumed
953        let data = parse("@custom\nfoo: bar").unwrap();
954        assert_eq!(data.get("foo").unwrap().as_str(), Some("bar"));
955    }
956
957    #[test]
958    fn test_unknown_directive_value_consumes_argument() {
959        // Spec §1.18: unknown directive in value position consumes argument, returns null
960        let data = parse("key: @unknown [1, 2, 3]\nother: 42").unwrap();
961        assert!(data.get("key").unwrap().is_null(), "unknown directive value should be null");
962        assert_eq!(data.get("other").unwrap().as_int(), Some(42), "next key should parse normally");
963
964        // With object argument
965        let data = parse("key: @unknown {a: 1}\nother: ok").unwrap();
966        assert!(data.get("key").unwrap().is_null());
967        assert_eq!(data.get("other").unwrap().as_str(), Some("ok"));
968
969        // With simple argument
970        let data = parse("key: @unknown 42\nother: ok").unwrap();
971        assert!(data.get("key").unwrap().is_null());
972        assert_eq!(data.get("other").unwrap().as_str(), Some("ok"));
973
974        // Without argument (just the directive)
975        let data = parse("arr: [@unknown, 1, 2]").unwrap();
976        let arr = data.get("arr").unwrap().as_array().unwrap();
977        assert!(arr[0].is_null());
978        assert_eq!(arr[1].as_int(), Some(1));
979    }
980
981    // -------------------------------------------------------------------------
982    // Object with ref keys
983    // -------------------------------------------------------------------------
984
985    #[test]
986    fn test_object_with_ref_key() {
987        let data = parse("obj: {!base: 1, key: 2}").unwrap();
988        let obj = data.get("obj").unwrap().as_object().unwrap();
989        assert!(obj.contains_key("!base"));
990        assert_eq!(obj.get("!base").unwrap().as_int(), Some(1));
991        assert_eq!(obj.get("key").unwrap().as_int(), Some(2));
992    }
993
994    // -------------------------------------------------------------------------
995    // Nested struct in table
996    // -------------------------------------------------------------------------
997
998    #[test]
999    fn test_nested_struct_in_table() {
1000        let input = r#"
1001            @struct addr (city: string, zip: string)
1002            @struct person (name: string, home: addr)
1003            people: @table person [
1004                (alice, (Boston, "02101")),
1005                (bob, (NYC, "10001")),
1006            ]
1007        "#;
1008        let tokens = Lexer::new(input).tokenize().unwrap();
1009        let mut parser = Parser::new(tokens);
1010        let data = parser.parse().unwrap();
1011
1012        let people = data.get("people").unwrap().as_array().unwrap();
1013        let alice_home = people[0].as_object().unwrap().get("home").unwrap().as_object().unwrap();
1014        assert_eq!(alice_home.get("city").unwrap().as_str(), Some("Boston"));
1015    }
1016
1017    #[test]
1018    fn test_include_cycle_detection() {
1019        // Create a file that includes itself
1020        let dir = std::env::temp_dir();
1021        let file_path = dir.join("test_cycle_self.tl");
1022        std::fs::write(&file_path, "@include \"test_cycle_self.tl\"\nval: 1").unwrap();
1023
1024        let content = std::fs::read_to_string(&file_path).unwrap();
1025        let tokens = Lexer::new(&content).tokenize().unwrap();
1026        let mut parser = Parser::new(tokens).with_base_path(&file_path);
1027        let result = parser.parse();
1028        assert!(result.is_err(), "Should detect self-referencing include");
1029        let err_msg = result.unwrap_err().to_string();
1030        assert!(err_msg.contains("Circular include"), "Error should mention circular include: {}", err_msg);
1031
1032        std::fs::remove_file(&file_path).ok();
1033    }
1034
1035    #[test]
1036    fn test_include_mutual_cycle_detection() {
1037        // Create two files that include each other: A -> B -> A
1038        let dir = std::env::temp_dir();
1039        let file_a = dir.join("test_cycle_a.tl");
1040        let file_b = dir.join("test_cycle_b.tl");
1041        std::fs::write(&file_a, "@include \"test_cycle_b.tl\"\na_val: 1").unwrap();
1042        std::fs::write(&file_b, "@include \"test_cycle_a.tl\"\nb_val: 2").unwrap();
1043
1044        let content = std::fs::read_to_string(&file_a).unwrap();
1045        let tokens = Lexer::new(&content).tokenize().unwrap();
1046        let mut parser = Parser::new(tokens).with_base_path(&file_a);
1047        let result = parser.parse();
1048        assert!(result.is_err(), "Should detect mutual cycle between A and B");
1049        let err_msg = result.unwrap_err().to_string();
1050        assert!(err_msg.contains("Circular include"), "Error should mention circular include: {}", err_msg);
1051
1052        std::fs::remove_file(&file_a).ok();
1053        std::fs::remove_file(&file_b).ok();
1054    }
1055
1056    #[test]
1057    fn test_include_stack_propagated_to_child() {
1058        // Verify that the include_stack starts empty
1059        let parser = Parser::new(vec![]);
1060        assert!(parser.include_stack.is_empty(), "New parser should have empty include stack");
1061    }
1062
1063    // -------------------------------------------------------------------------
1064    // Bytes literal parsing
1065    // -------------------------------------------------------------------------
1066
1067    #[test]
1068    fn test_bytes_literal_value() {
1069        let data = parse(r#"payload: b"cafef00d""#).unwrap();
1070        let val = data.get("payload").unwrap();
1071        assert_eq!(val.as_bytes(), Some(&[0xca, 0xfe, 0xf0, 0x0d][..]));
1072    }
1073
1074    #[test]
1075    fn test_bytes_literal_empty_value() {
1076        let data = parse(r#"empty: b"""#).unwrap();
1077        let val = data.get("empty").unwrap();
1078        assert_eq!(val.as_bytes(), Some(&[][..]));
1079    }
1080
1081    #[test]
1082    fn test_bytes_literal_in_array() {
1083        let data = parse(r#"arr: [b"cafe", b"babe"]"#).unwrap();
1084        let arr = data.get("arr").unwrap().as_array().unwrap();
1085        assert_eq!(arr[0].as_bytes(), Some(&[0xca, 0xfe][..]));
1086        assert_eq!(arr[1].as_bytes(), Some(&[0xba, 0xbe][..]));
1087    }
1088
1089    #[test]
1090    fn test_bytes_literal_in_object() {
1091        let data = parse(r#"obj: {data: b"ff00"}"#).unwrap();
1092        let obj = data.get("obj").unwrap().as_object().unwrap();
1093        assert_eq!(obj.get("data").unwrap().as_bytes(), Some(&[0xff, 0x00][..]));
1094    }
1095
1096    // -------------------------------------------------------------------------
1097    // Fuzz regression tests (full TeaLeaf::parse path)
1098    // -------------------------------------------------------------------------
1099
1100    #[test]
1101    fn test_fuzz_deeply_nested_arrays_no_stack_overflow() {
1102        // Crafted input with 500 nested arrays — exceeds MAX_PARSE_DEPTH (256)
1103        let depth = 500;
1104        let input = format!("key: {}{}", "[".repeat(depth), "]".repeat(depth));
1105        let result = crate::TeaLeaf::parse(&input);
1106        match result {
1107            Err(e) => {
1108                let err = format!("{}", e);
1109                assert!(err.contains("nesting depth"), "Error should mention nesting depth: {}", err);
1110            }
1111            Ok(_) => panic!("Should fail with depth exceeded, not succeed"),
1112        }
1113    }
1114
1115    #[test]
1116    fn test_fuzz_deeply_nested_objects_no_stack_overflow() {
1117        // Crafted input with 500 nested objects
1118        let depth = 500;
1119        let mut input = String::from("key: ");
1120        for i in 0..depth {
1121            input.push_str(&format!("{{k{}: ", i));
1122        }
1123        input.push_str("1");
1124        for _ in 0..depth {
1125            input.push('}');
1126        }
1127        let result = crate::TeaLeaf::parse(&input);
1128        assert!(result.is_err(), "Should fail with depth exceeded, not stack overflow");
1129    }
1130
1131    #[test]
1132    fn test_fuzz_deeply_nested_tags_no_stack_overflow() {
1133        // Crafted input with 500 nested tags: :a :b :c ... value
1134        let depth = 500;
1135        let mut input = String::from("key: ");
1136        for i in 0..depth {
1137            input.push_str(&format!(":t{} ", i));
1138        }
1139        input.push_str("42");
1140        let result = crate::TeaLeaf::parse(&input);
1141        assert!(result.is_err(), "Should fail with depth exceeded, not stack overflow");
1142    }
1143
1144    #[test]
1145    fn test_parse_depth_256_succeeds() {
1146        // 200 levels of nesting should succeed (within MAX_PARSE_DEPTH=256)
1147        let depth = 200;
1148        let input = format!("key: {}1{}", "[".repeat(depth), "]".repeat(depth));
1149        let result = crate::TeaLeaf::parse(&input);
1150        if let Err(e) = &result {
1151            panic!("200 levels of nesting should be fine: {}", e);
1152        }
1153    }
1154
1155    #[test]
1156    fn test_fuzz_crash_e42e_full_parse_no_panic() {
1157        // Regression: fuzz_parse crash-e42e7ae2f5127519e7e60e87d1cbfbc2a5bf878d
1158        // Must not panic through TeaLeaf::parse (the actual fuzz path)
1159        let input = "\"0B\u{10}\u{3}#\"0BP\u{07FE}-----\u{061D}\u{07FE}\u{07FE}-----\u{061D}\u{3}#\"0B\u{10}\u{3}#\"0BP\u{07FE}-----\u{061D}\u{07FE}\u{07FE}-----\u{061D}\u{07FE}";
1160        let _ = crate::TeaLeaf::parse(input);
1161    }
1162
1163    #[test]
1164    fn test_fuzz_crash_d038_full_parse_no_panic() {
1165        // Regression: fuzz_parse crash-d0387cbd639a8db9789ab68057f3c58c7bebbfa5
1166        // Large input with repeated date-like patterns. Must not panic.
1167        let input = "z\" \"-\"\t; \"\"\")\"\"\" 8] ] 02)3313312)313-333-333-3332)33-133-3-33331333302)33";
1168        let _ = crate::TeaLeaf::parse(input);
1169    }
1170
1171    #[test]
1172    fn test_reject_value_only_schema_field_types() {
1173        // Spec §2.1: object, map, tuple, ref, tagged are value types, not schema field types
1174        for bad_type in &["object", "map", "tuple", "ref", "tagged"] {
1175            let input = format!("@struct Bad (field: {})\n", bad_type);
1176            let result = crate::TeaLeaf::parse(&input);
1177            assert!(result.is_err(), "should reject '{}' as schema field type", bad_type);
1178            let err = format!("{}", result.err().unwrap());
1179            assert!(err.contains("value type"), "error for '{}' should mention 'value type': {}", bad_type, err);
1180        }
1181        // Array of value-only type should also be rejected
1182        let result = crate::TeaLeaf::parse("@struct Bad (field: []object)\n");
1183        assert!(result.is_err(), "should reject '[]object' as schema field type");
1184
1185        // Valid types should still work
1186        for good_type in &["string", "int", "int8", "float", "bool", "bytes", "timestamp", "MyStruct"] {
1187            let input = format!("@struct Good (field: {})\n", good_type);
1188            assert!(crate::TeaLeaf::parse(&input).is_ok(), "'{}' should be accepted", good_type);
1189        }
1190    }
1191}