Skip to main content

tealeaf/
parser.rs

1//! Parser for TeaLeaf text format
2
3use std::path::Path;
4use indexmap::IndexMap;
5use crate::{Error, Result, Value, Schema, Field, FieldType, Union, Variant};
6use crate::types::ObjectMap;
7use crate::lexer::{Token, TokenKind, Lexer};
8
9/// Maximum recursion depth for nested parse_value calls (arrays, objects, maps, tuples, tags).
10/// Matches the binary reader's MAX_DECODE_DEPTH to ensure text↔binary parity.
11const MAX_PARSE_DEPTH: usize = 256;
12
13pub struct Parser {
14    tokens: Vec<Token>,
15    pos: usize,
16    schemas: IndexMap<String, Schema>,
17    unions: IndexMap<String, Union>,
18    base_path: Option<std::path::PathBuf>,
19    /// Tracks included file paths for cycle detection
20    include_stack: Vec<std::path::PathBuf>,
21    /// Indicates the source was a root-level JSON array (set by @root-array directive)
22    is_root_array: bool,
23}
24
25impl Parser {
26    pub fn new(tokens: Vec<Token>) -> Self {
27        Self {
28            tokens,
29            pos: 0,
30            schemas: IndexMap::new(),
31            unions: IndexMap::new(),
32            base_path: None,
33            include_stack: Vec::new(),
34            is_root_array: false,
35        }
36    }
37
38    pub fn with_base_path(mut self, path: &Path) -> Self {
39        self.base_path = path.parent().map(|p| p.to_path_buf());
40        self
41    }
42
43    pub fn parse(&mut self) -> Result<IndexMap<String, Value>> {
44        let mut result = IndexMap::new();
45
46        while !self.at_end() {
47            match self.current_kind() {
48                TokenKind::Directive(d) => {
49                    let directive = d.clone();
50                    self.advance();
51                    match directive.as_str() {
52                        "struct" => self.parse_struct_def()?,
53                        "union" => self.parse_union_def()?,
54                        "include" => {
55                            let included = self.parse_include()?;
56                            for (k, v) in included {
57                                result.insert(k, v);
58                            }
59                        }
60                        "root-array" => {
61                            // Marks this document as representing a root-level JSON array
62                            self.is_root_array = true;
63                        }
64                        _ => {
65                            // Unknown top-level directive: silently ignored (spec §1.18).
66                            // Consume same-line argument for forward compatibility —
67                            // a future directive like @custom foo should not leave
68                            // "foo" to be misparsed as a key.
69                            let directive_line = self.tokens[self.pos - 1].line;
70                            if !self.at_end()
71                                && self.current().line == directive_line
72                                && self.can_start_value()
73                            {
74                                let _ = self.parse_value(0)?;
75                            }
76                        }
77                    }
78                }
79                TokenKind::Word(_) | TokenKind::String(_) => {
80                    let (key, value) = self.parse_pair(0)?;
81                    result.insert(key, value);
82                }
83                TokenKind::Ref(r) => {
84                    let ref_name = r.clone();
85                    self.advance();
86                    self.expect(TokenKind::Colon)?;
87                    let value = self.parse_value(0)?;
88                    result.insert(format!("!{}", ref_name), value);
89                }
90                TokenKind::Eof => break,
91                _ => { self.advance(); }
92            }
93        }
94
95        Ok(result)
96    }
97
98    pub fn into_schemas(self) -> IndexMap<String, Schema> {
99        self.schemas
100    }
101
102    pub fn into_unions(self) -> IndexMap<String, Union> {
103        self.unions
104    }
105
106    /// Consume the parser and return both schemas and unions.
107    pub fn into_schemas_and_unions(self) -> (IndexMap<String, Schema>, IndexMap<String, Union>) {
108        (self.schemas, self.unions)
109    }
110
111    /// Check if the @root-array directive was present
112    pub fn is_root_array(&self) -> bool {
113        self.is_root_array
114    }
115
116    // =========================================================================
117    // Struct Definition
118    // =========================================================================
119
120    fn parse_struct_def(&mut self) -> Result<()> {
121        let name = self.expect_word()?;
122        self.expect(TokenKind::LParen)?;
123
124        let mut schema = Schema::new(&name);
125
126        while !self.check(TokenKind::RParen) {
127            // Field names must be unquoted names per spec grammar
128            let field_name = match self.current_kind() {
129                TokenKind::Word(w) => {
130                    let w = w.clone();
131                    self.advance();
132                    w
133                }
134                _ => return Err(Error::UnexpectedToken {
135                    expected: "field name".to_string(),
136                    got: format!("{:?}", self.current_kind()),
137                }),
138            };
139
140            let field_type = if self.check(TokenKind::Colon) {
141                self.advance();
142                self.parse_field_type()?
143            } else {
144                FieldType::new("string")
145            };
146
147            schema.add_field(field_name, field_type);
148
149            if self.check(TokenKind::Comma) {
150                self.advance();
151            }
152        }
153
154        self.expect(TokenKind::RParen)?;
155        self.schemas.insert(name, schema);
156        Ok(())
157    }
158
159    // =========================================================================
160    // Union Definition
161    // =========================================================================
162
163    fn parse_union_def(&mut self) -> Result<()> {
164        let name = self.expect_word()?;
165        self.expect(TokenKind::LBrace)?;
166
167        let mut union_type = Union::new(&name);
168
169        while !self.check(TokenKind::RBrace) {
170            let variant_name = self.expect_word()?;
171            self.expect(TokenKind::LParen)?;
172
173            let mut variant = Variant::new(&variant_name);
174
175            while !self.check(TokenKind::RParen) {
176                let field_name = self.expect_word()?;
177
178                let field_type = if self.check(TokenKind::Colon) {
179                    self.advance();
180                    self.parse_field_type()?
181                } else {
182                    FieldType::new("string")
183                };
184
185                variant.fields.push(Field::new(field_name, field_type));
186
187                if self.check(TokenKind::Comma) {
188                    self.advance();
189                }
190            }
191
192            self.expect(TokenKind::RParen)?;
193            union_type.add_variant(variant);
194
195            if self.check(TokenKind::Comma) {
196                self.advance();
197            }
198        }
199
200        self.expect(TokenKind::RBrace)?;
201        self.unions.insert(name, union_type);
202        Ok(())
203    }
204
205    // =========================================================================
206    // Include Directive
207    // =========================================================================
208
209    fn parse_include(&mut self) -> Result<IndexMap<String, Value>> {
210        let path_str = match self.current_kind() {
211            TokenKind::String(s) => s.clone(),
212            TokenKind::Word(w) => w.clone(),
213            _ => return Err(Error::UnexpectedToken {
214                expected: "file path".to_string(),
215                got: format!("{:?}", self.current_kind()),
216            }),
217        };
218        self.advance();
219
220        // Resolve path relative to current file
221        let include_path = if let Some(ref base) = self.base_path {
222            base.join(&path_str)
223        } else {
224            std::path::PathBuf::from(&path_str)
225        };
226
227        // Cycle detection and depth limit
228        let canonical = include_path.canonicalize()
229            .unwrap_or_else(|_| include_path.clone());
230        if self.include_stack.contains(&canonical) {
231            return Err(Error::ParseError(format!(
232                "Circular include detected: {}", canonical.display()
233            )));
234        }
235        if self.include_stack.len() >= 32 {
236            return Err(Error::ParseError(
237                "Include depth exceeds limit of 32".into()
238            ));
239        }
240
241        // Read and parse the included file
242        let content = std::fs::read_to_string(&include_path)
243            .map_err(|e| Error::ParseError(format!("Failed to include {}: {}", path_str, e)))?;
244
245        let tokens = Lexer::new(&content).tokenize()?;
246        let mut parser = Parser::new(tokens);
247        if let Some(parent) = include_path.parent() {
248            parser.base_path = Some(parent.to_path_buf());
249        }
250        // Propagate include stack and accumulated schemas/unions to child parser
251        // so that schemas from earlier includes are available in later includes.
252        parser.include_stack = self.include_stack.clone();
253        parser.include_stack.push(canonical);
254        parser.schemas = self.schemas.clone();
255        parser.unions = self.unions.clone();
256
257        let data = parser.parse()?;
258
259        // Merge schemas and unions
260        for (name, schema) in parser.schemas {
261            self.schemas.insert(name, schema);
262        }
263        for (name, union_type) in parser.unions {
264            self.unions.insert(name, union_type);
265        }
266
267        Ok(data)
268    }
269
270    fn parse_field_type(&mut self) -> Result<FieldType> {
271        let mut type_str = String::new();
272
273        // Handle array prefix
274        if self.check(TokenKind::LBracket) {
275            self.advance();
276            self.expect(TokenKind::RBracket)?;
277            type_str.push_str("[]");
278        }
279
280        // Base type
281        let base = self.expect_word()?;
282
283        // Reject value-only types that cannot be schema field types (spec §2.1)
284        match base.as_str() {
285            "object" | "map" | "tuple" | "ref" | "tagged" => {
286                return Err(Error::ParseError(
287                    format!("'{}' is a value type and cannot be used as a schema field type", base)
288                ));
289            }
290            _ => {}
291        }
292
293        type_str.push_str(&base);
294
295        // Nullable suffix
296        if self.check(TokenKind::Question) {
297            self.advance();
298            type_str.push('?');
299        }
300
301        Ok(FieldType::parse(&type_str))
302    }
303
304    // =========================================================================
305    // Key-Value Pairs
306    // =========================================================================
307
308    fn parse_pair(&mut self, depth: usize) -> Result<(String, Value)> {
309        let key = match self.current_kind() {
310            TokenKind::Word(w) => w.clone(),
311            TokenKind::String(s) => s.clone(),
312            _ => return Err(Error::UnexpectedToken {
313                expected: "key".to_string(),
314                got: format!("{:?}", self.current_kind()),
315            }),
316        };
317        self.advance();
318        self.expect(TokenKind::Colon)?;
319        let value = self.parse_value(depth)?;
320        Ok((key, value))
321    }
322
323    // =========================================================================
324    // Values
325    // =========================================================================
326
327    fn parse_value(&mut self, depth: usize) -> Result<Value> {
328        if depth > MAX_PARSE_DEPTH {
329            return Err(Error::ParseError("maximum parse nesting depth exceeded".into()));
330        }
331        match self.current_kind() {
332            TokenKind::Null => { self.advance(); Ok(Value::Null) }
333            TokenKind::Bool(b) => { let b = *b; self.advance(); Ok(Value::Bool(b)) }
334            TokenKind::Int(i) => { let i = *i; self.advance(); Ok(Value::Int(i)) }
335            TokenKind::UInt(u) => { let u = *u; self.advance(); Ok(Value::UInt(u)) }
336            TokenKind::JsonNumber(s) => { let s = s.clone(); self.advance(); Ok(Value::JsonNumber(s)) }
337            TokenKind::Float(f) => { let f = *f; self.advance(); Ok(Value::Float(f)) }
338            TokenKind::String(s) => { let s = s.clone(); self.advance(); Ok(Value::String(s)) }
339            TokenKind::Bytes(b) => { let b = b.clone(); self.advance(); Ok(Value::Bytes(b)) }
340            TokenKind::Word(w) => { let w = w.clone(); self.advance(); Ok(Value::String(w)) }
341            TokenKind::Ref(r) => { let r = r.clone(); self.advance(); Ok(Value::Ref(r)) }
342            TokenKind::Timestamp(ts, tz) => { let ts = *ts; let tz = *tz; self.advance(); Ok(Value::Timestamp(ts, tz)) }
343            TokenKind::Colon => {
344                self.advance(); // consume ':'
345                match self.current_kind() {
346                    TokenKind::Word(w) => {
347                        let tag = w.clone();
348                        self.advance(); // consume tag name
349                        let inner = self.parse_value(depth + 1)?;
350                        Ok(Value::Tagged(tag, Box::new(inner)))
351                    }
352                    _ => Err(Error::UnexpectedToken {
353                        expected: "tag name after ':'".to_string(),
354                        got: format!("{:?}", self.current_kind()),
355                    })
356                }
357            }
358            TokenKind::Directive(d) => {
359                let directive = d.clone();
360                self.advance();
361                self.parse_directive_value(&directive, depth)
362            }
363            TokenKind::LBrace => self.parse_object(depth + 1),
364            TokenKind::LBracket => self.parse_array(depth + 1),
365            TokenKind::LParen => self.parse_tuple(depth + 1),
366            _ => Err(Error::UnexpectedToken {
367                expected: "value".to_string(),
368                got: format!("{:?}", self.current_kind()),
369            }),
370        }
371    }
372
373    fn parse_directive_value(&mut self, directive: &str, depth: usize) -> Result<Value> {
374        match directive {
375            "table" => self.parse_table(depth),
376            "map" => self.parse_map(depth),
377            _ => {
378                // Unknown directive in value position: consume argument, return null (spec §1.18)
379                if self.can_start_value() {
380                    let _ = self.parse_value(depth)?;
381                }
382                Ok(Value::Null)
383            }
384        }
385    }
386
387    /// Returns true if the current token can begin a value expression.
388    fn can_start_value(&self) -> bool {
389        matches!(
390            self.current_kind(),
391            TokenKind::Null
392                | TokenKind::Bool(_)
393                | TokenKind::Int(_)
394                | TokenKind::UInt(_)
395                | TokenKind::Float(_)
396                | TokenKind::String(_)
397                | TokenKind::Bytes(_)
398                | TokenKind::Word(_)
399                | TokenKind::Ref(_)
400                | TokenKind::Timestamp(_, _)
401                | TokenKind::JsonNumber(_)
402                | TokenKind::Colon
403                | TokenKind::Directive(_)
404                | TokenKind::LBrace
405                | TokenKind::LBracket
406                | TokenKind::LParen
407        )
408    }
409
410    fn parse_map(&mut self, depth: usize) -> Result<Value> {
411        self.expect(TokenKind::LBrace)?;
412        let mut pairs = Vec::new();
413
414        while !self.check(TokenKind::RBrace) {
415            // Parse key (string, name, or integer per spec grammar:
416            // map_key = string | name | integer)
417            let key = match self.current_kind() {
418                TokenKind::String(s) => { let s = s.clone(); self.advance(); Value::String(s) }
419                TokenKind::Word(w) => { let w = w.clone(); self.advance(); Value::String(w) }
420                TokenKind::Int(i) => { let i = *i; self.advance(); Value::Int(i) }
421                TokenKind::UInt(u) => { let u = *u; self.advance(); Value::UInt(u) }
422                _ => return Err(Error::UnexpectedToken {
423                    expected: "map key".to_string(),
424                    got: format!("{:?}", self.current_kind()),
425                }),
426            };
427
428            self.expect(TokenKind::Colon)?;
429            let value = self.parse_value(depth + 1)?;
430            pairs.push((key, value));
431
432            if self.check(TokenKind::Comma) {
433                self.advance();
434            }
435        }
436
437        self.expect(TokenKind::RBrace)?;
438        Ok(Value::Map(pairs))
439    }
440
441    fn parse_table(&mut self, depth: usize) -> Result<Value> {
442        let struct_name = self.expect_word()?;
443        let schema = self.schemas
444            .get(&struct_name)
445            .ok_or_else(|| Error::UnknownStruct(struct_name.clone()))?
446            .clone();
447
448        self.expect(TokenKind::LBracket)?;
449
450        let mut rows = Vec::new();
451        while !self.check(TokenKind::RBracket) {
452            let row = self.parse_tuple_with_schema(&schema, depth + 1)?;
453            rows.push(row);
454            if self.check(TokenKind::Comma) {
455                self.advance();
456            }
457        }
458
459        self.expect(TokenKind::RBracket)?;
460        Ok(Value::Array(rows))
461    }
462
463    fn parse_tuple_with_schema(&mut self, schema: &Schema, depth: usize) -> Result<Value> {
464        self.expect(TokenKind::LParen)?;
465
466        let mut obj = ObjectMap::new();
467        for field in &schema.fields {
468            let value = self.parse_value_for_field(&field.field_type, depth)?;
469            obj.insert(field.name.clone(), value);
470            if self.check(TokenKind::Comma) {
471                self.advance();
472            }
473        }
474
475        self.expect(TokenKind::RParen)?;
476        Ok(Value::Object(obj))
477    }
478
479    fn parse_value_for_field(&mut self, field_type: &FieldType, depth: usize) -> Result<Value> {
480        // Handle null
481        if self.check(TokenKind::Null) {
482            self.advance();
483            return Ok(Value::Null);
484        }
485
486        // Handle nested struct — schema names shadow built-in types, so check
487        // by name rather than relying on is_struct() which lacks schema context.
488        // The LParen guard disambiguates: struct tuples always start with `(`,
489        // while primitive values (int, bool, string, etc.) never do.
490        if !field_type.is_array && self.check(TokenKind::LParen) {
491            if let Some(schema) = self.schemas.get(&field_type.base).cloned() {
492                return self.parse_tuple_with_schema(&schema, depth + 1);
493            }
494        }
495
496        // Handle array
497        if field_type.is_array {
498            self.expect(TokenKind::LBracket)?;
499            let mut arr = Vec::new();
500            let inner_type = FieldType::new(&field_type.base);
501            while !self.check(TokenKind::RBracket) {
502                arr.push(self.parse_value_for_field(&inner_type, depth + 1)?);
503                if self.check(TokenKind::Comma) {
504                    self.advance();
505                }
506            }
507            self.expect(TokenKind::RBracket)?;
508            return Ok(Value::Array(arr));
509        }
510
511        // Regular value
512        self.parse_value(depth)
513    }
514
515    fn parse_object(&mut self, depth: usize) -> Result<Value> {
516        self.expect(TokenKind::LBrace)?;
517        let mut obj = ObjectMap::new();
518
519        while !self.check(TokenKind::RBrace) {
520            if let TokenKind::Ref(r) = self.current_kind() {
521                let key = format!("!{}", r);
522                self.advance();
523                self.expect(TokenKind::Colon)?;
524                let value = self.parse_value(depth)?;
525                obj.insert(key, value);
526            } else {
527                let (key, value) = self.parse_pair(depth)?;
528                obj.insert(key, value);
529            }
530            if self.check(TokenKind::Comma) {
531                self.advance();
532            }
533        }
534
535        self.expect(TokenKind::RBrace)?;
536        Ok(Value::Object(obj))
537    }
538
539    fn parse_array(&mut self, depth: usize) -> Result<Value> {
540        self.expect(TokenKind::LBracket)?;
541        let mut arr = Vec::new();
542
543        while !self.check(TokenKind::RBracket) {
544            arr.push(self.parse_value(depth)?);
545            if self.check(TokenKind::Comma) {
546                self.advance();
547            }
548        }
549
550        self.expect(TokenKind::RBracket)?;
551        Ok(Value::Array(arr))
552    }
553
554    fn parse_tuple(&mut self, depth: usize) -> Result<Value> {
555        self.expect(TokenKind::LParen)?;
556        let mut arr = Vec::new();
557
558        while !self.check(TokenKind::RParen) {
559            arr.push(self.parse_value(depth)?);
560            if self.check(TokenKind::Comma) {
561                self.advance();
562            }
563        }
564
565        self.expect(TokenKind::RParen)?;
566        Ok(Value::Array(arr))
567    }
568
569    // =========================================================================
570    // Helpers
571    // =========================================================================
572
573    fn current(&self) -> &Token {
574        self.tokens.get(self.pos).unwrap_or(&Token {
575            kind: TokenKind::Eof,
576            line: 0,
577            col: 0,
578        })
579    }
580
581    fn current_kind(&self) -> &TokenKind {
582        &self.current().kind
583    }
584
585    fn advance(&mut self) {
586        if self.pos < self.tokens.len() {
587            self.pos += 1;
588        }
589    }
590
591    fn check(&self, expected: TokenKind) -> bool {
592        std::mem::discriminant(self.current_kind()) == std::mem::discriminant(&expected)
593    }
594
595    fn expect(&mut self, expected: TokenKind) -> Result<()> {
596        if self.check(expected.clone()) {
597            self.advance();
598            Ok(())
599        } else {
600            Err(Error::UnexpectedToken {
601                expected: format!("{:?}", expected),
602                got: format!("{:?}", self.current_kind()),
603            })
604        }
605    }
606
607    fn expect_word(&mut self) -> Result<String> {
608        match self.current_kind() {
609            TokenKind::Word(w) => {
610                let w = w.clone();
611                self.advance();
612                Ok(w)
613            }
614            _ => Err(Error::UnexpectedToken {
615                expected: "word".to_string(),
616                got: format!("{:?}", self.current_kind()),
617            }),
618        }
619    }
620
621    fn at_end(&self) -> bool {
622        matches!(self.current_kind(), TokenKind::Eof)
623    }
624}
625
626#[cfg(test)]
627mod tests {
628    use super::*;
629    use crate::lexer::Lexer;
630
631    fn parse(input: &str) -> Result<IndexMap<String, Value>> {
632        let tokens = Lexer::new(input).tokenize()?;
633        Parser::new(tokens).parse()
634    }
635
636    #[test]
637    fn test_simple_values() {
638        let data = parse("a: 1, b: hello, c: true, d: ~").unwrap();
639        assert_eq!(data.get("a").unwrap().as_int(), Some(1));
640        assert_eq!(data.get("b").unwrap().as_str(), Some("hello"));
641        assert_eq!(data.get("c").unwrap().as_bool(), Some(true));
642        assert!(data.get("d").unwrap().is_null());
643    }
644
645    #[test]
646    fn test_object() {
647        let data = parse("obj: {x: 1, y: 2}").unwrap();
648        let obj = data.get("obj").unwrap().as_object().unwrap();
649        assert_eq!(obj.get("x").unwrap().as_int(), Some(1));
650        assert_eq!(obj.get("y").unwrap().as_int(), Some(2));
651    }
652
653    #[test]
654    fn test_array() {
655        let data = parse("arr: [1, 2, 3]").unwrap();
656        let arr = data.get("arr").unwrap().as_array().unwrap();
657        assert_eq!(arr.len(), 3);
658        assert_eq!(arr[0].as_int(), Some(1));
659    }
660
661    #[test]
662    fn test_struct_and_table() {
663        let input = r#"
664            @struct point (x: int, y: int)
665            points: @table point [
666                (1, 2),
667                (3, 4),
668            ]
669        "#;
670        let tokens = Lexer::new(input).tokenize().unwrap();
671        let mut parser = Parser::new(tokens);
672        let data = parser.parse().unwrap();
673
674        let points = data.get("points").unwrap().as_array().unwrap();
675        assert_eq!(points.len(), 2);
676
677        let p0 = points[0].as_object().unwrap();
678        assert_eq!(p0.get("x").unwrap().as_int(), Some(1));
679        assert_eq!(p0.get("y").unwrap().as_int(), Some(2));
680    }
681
682    // -------------------------------------------------------------------------
683    // Union parsing
684    // -------------------------------------------------------------------------
685
686    #[test]
687    fn test_union_def() {
688        let input = r#"
689            @union Shape {
690                Circle(radius: float),
691                Rectangle(width: float, height: float),
692                Point(),
693            }
694        "#;
695        let tokens = Lexer::new(input).tokenize().unwrap();
696        let mut parser = Parser::new(tokens);
697        parser.parse().unwrap();
698        let unions = parser.into_unions();
699        let shape = unions.get("Shape").unwrap();
700        assert_eq!(shape.variants.len(), 3);
701        assert_eq!(shape.variants[0].name, "Circle");
702        assert_eq!(shape.variants[0].fields.len(), 1);
703        assert_eq!(shape.variants[1].name, "Rectangle");
704        assert_eq!(shape.variants[1].fields.len(), 2);
705        assert_eq!(shape.variants[2].name, "Point");
706        assert_eq!(shape.variants[2].fields.len(), 0);
707    }
708
709    // -------------------------------------------------------------------------
710    // Map parsing
711    // -------------------------------------------------------------------------
712
713    #[test]
714    fn test_map_value() {
715        let data = parse("m: @map {1: one, 2: two}").unwrap();
716        let m = data.get("m").unwrap().as_map().unwrap();
717        assert_eq!(m.len(), 2);
718        assert_eq!(m[0].0.as_int(), Some(1));
719        assert_eq!(m[0].1.as_str(), Some("one"));
720        assert_eq!(m[1].0.as_int(), Some(2));
721        assert_eq!(m[1].1.as_str(), Some("two"));
722    }
723
724    #[test]
725    fn test_map_with_string_keys() {
726        let data = parse(r#"m: @map {"key1": 10, "key2": 20}"#).unwrap();
727        let m = data.get("m").unwrap().as_map().unwrap();
728        assert_eq!(m.len(), 2);
729    }
730
731    #[test]
732    fn test_map_empty() {
733        let data = parse("m: @map {}").unwrap();
734        let m = data.get("m").unwrap().as_map().unwrap();
735        assert_eq!(m.len(), 0);
736    }
737
738    // -------------------------------------------------------------------------
739    // Ref and Tagged values
740    // -------------------------------------------------------------------------
741
742    #[test]
743    fn test_ref_value() {
744        let data = parse("config: !base_config").unwrap();
745        assert_eq!(data.get("config").unwrap().as_ref_name(), Some("base_config"));
746    }
747
748    #[test]
749    fn test_tagged_value() {
750        let data = parse("status: :ok 200").unwrap();
751        let (tag, inner) = data.get("status").unwrap().as_tagged().unwrap();
752        assert_eq!(tag, "ok");
753        assert_eq!(inner.as_int(), Some(200));
754    }
755
756    #[test]
757    fn test_tagged_null() {
758        let data = parse("status: :none ~").unwrap();
759        let (tag, inner) = data.get("status").unwrap().as_tagged().unwrap();
760        assert_eq!(tag, "none");
761        assert!(inner.is_null());
762    }
763
764    #[test]
765    fn test_tagged_value_no_space_after_colon() {
766        // key::tag without spaces — works because lexer emits Colon Colon Word
767        let data = parse("status::ok 200").unwrap();
768        let (tag, inner) = data.get("status").unwrap().as_tagged().unwrap();
769        assert_eq!(tag, "ok");
770        assert_eq!(inner.as_int(), Some(200));
771    }
772
773    #[test]
774    fn test_key_value_no_space_after_colon() {
775        // key:value without space — works because lexer emits Word Colon Word
776        let data = parse("name:alice\nage:30").unwrap();
777        assert_eq!(data.get("name").unwrap().as_str(), Some("alice"));
778        assert_eq!(data.get("age").unwrap().as_int(), Some(30));
779    }
780
781    // -------------------------------------------------------------------------
782    // Tuple and nested structures
783    // -------------------------------------------------------------------------
784
785    #[test]
786    fn test_tuple_value() {
787        let data = parse("point: (1, 2, 3)").unwrap();
788        let arr = data.get("point").unwrap().as_array().unwrap();
789        assert_eq!(arr.len(), 3);
790        assert_eq!(arr[0].as_int(), Some(1));
791        assert_eq!(arr[1].as_int(), Some(2));
792        assert_eq!(arr[2].as_int(), Some(3));
793    }
794
795    #[test]
796    fn test_nested_object() {
797        let data = parse("outer: {inner: {x: 1}}").unwrap();
798        let outer = data.get("outer").unwrap().as_object().unwrap();
799        let inner = outer.get("inner").unwrap().as_object().unwrap();
800        assert_eq!(inner.get("x").unwrap().as_int(), Some(1));
801    }
802
803    #[test]
804    fn test_nested_arrays() {
805        let data = parse("matrix: [[1, 2], [3, 4]]").unwrap();
806        let matrix = data.get("matrix").unwrap().as_array().unwrap();
807        assert_eq!(matrix.len(), 2);
808        let row0 = matrix[0].as_array().unwrap();
809        assert_eq!(row0[0].as_int(), Some(1));
810    }
811
812    // -------------------------------------------------------------------------
813    // Struct fields with types
814    // -------------------------------------------------------------------------
815
816    #[test]
817    fn test_struct_with_nullable_field() {
818        let input = r#"
819            @struct user (name: string, email: string?)
820            users: @table user [
821                (alice, "a@test.com"),
822                (bob, ~),
823            ]
824        "#;
825        let tokens = Lexer::new(input).tokenize().unwrap();
826        let mut parser = Parser::new(tokens);
827        let data = parser.parse().unwrap();
828        let schemas = parser.into_schemas();
829
830        let schema = schemas.get("user").unwrap();
831        assert!(schema.fields[1].field_type.nullable);
832
833        let users = data.get("users").unwrap().as_array().unwrap();
834        assert_eq!(users.len(), 2);
835        assert!(users[1].as_object().unwrap().get("email").unwrap().is_null());
836    }
837
838    #[test]
839    fn test_struct_with_array_field() {
840        let input = r#"
841            @struct item (name: string, tags: []string)
842            items: @table item [
843                (widget, [cool, useful]),
844            ]
845        "#;
846        let tokens = Lexer::new(input).tokenize().unwrap();
847        let mut parser = Parser::new(tokens);
848        let data = parser.parse().unwrap();
849
850        let items = data.get("items").unwrap().as_array().unwrap();
851        let tags = items[0].as_object().unwrap().get("tags").unwrap().as_array().unwrap();
852        assert_eq!(tags.len(), 2);
853    }
854
855    // -------------------------------------------------------------------------
856    // Root-array directive
857    // -------------------------------------------------------------------------
858
859    #[test]
860    fn test_root_array_directive() {
861        let input = "@root-array\nroot: [1, 2, 3]";
862        let tokens = Lexer::new(input).tokenize().unwrap();
863        let mut parser = Parser::new(tokens);
864        parser.parse().unwrap();
865        assert!(parser.is_root_array());
866    }
867
868    // -------------------------------------------------------------------------
869    // Ref key at top level
870    // -------------------------------------------------------------------------
871
872    #[test]
873    fn test_ref_key_at_top_level() {
874        let input = "!defaults: {theme: dark}";
875        let data = parse(input).unwrap();
876        assert!(data.contains_key("!defaults"));
877        let obj = data.get("!defaults").unwrap().as_object().unwrap();
878        assert_eq!(obj.get("theme").unwrap().as_str(), Some("dark"));
879    }
880
881    // -------------------------------------------------------------------------
882    // String keys
883    // -------------------------------------------------------------------------
884
885    #[test]
886    fn test_string_key() {
887        let data = parse(r#""my key": 42"#).unwrap();
888        assert_eq!(data.get("my key").unwrap().as_int(), Some(42));
889    }
890
891    // -------------------------------------------------------------------------
892    // Error cases
893    // -------------------------------------------------------------------------
894
895    #[test]
896    fn test_unexpected_token_error() {
897        let result = parse("] invalid");
898        // The parser may skip unexpected tokens or error
899        // Just ensure it doesn't panic
900        let _ = result;
901    }
902
903    #[test]
904    fn test_missing_colon_error() {
905        // A word followed by a value without colon
906        let input = "key value";
907        let result = parse(input);
908        assert!(result.is_err());
909    }
910
911    #[test]
912    fn test_unknown_struct_in_table() {
913        let input = "data: @table nonexistent [(1, 2)]";
914        let result = parse(input);
915        assert!(result.is_err());
916    }
917
918    // -------------------------------------------------------------------------
919    // Struct field type defaults
920    // -------------------------------------------------------------------------
921
922    #[test]
923    fn test_struct_field_without_type() {
924        let input = r#"
925            @struct simple (name, value)
926            items: @table simple [
927                (hello, world),
928            ]
929        "#;
930        let tokens = Lexer::new(input).tokenize().unwrap();
931        let mut parser = Parser::new(tokens);
932        let data = parser.parse().unwrap();
933        let schemas = parser.into_schemas();
934
935        // Fields without explicit type default to "string"
936        let schema = schemas.get("simple").unwrap();
937        assert_eq!(schema.fields[0].field_type.base, "string");
938        assert_eq!(schema.fields[1].field_type.base, "string");
939
940        let items = data.get("items").unwrap().as_array().unwrap();
941        assert_eq!(items[0].as_object().unwrap().get("name").unwrap().as_str(), Some("hello"));
942    }
943
944    // -------------------------------------------------------------------------
945    // Unknown directive
946    // -------------------------------------------------------------------------
947
948    #[test]
949    fn test_unknown_directive_ignored() {
950        // Directive on its own line — next line is a key-value, not an argument
951        let data = parse("@custom_directive\nkey: value").unwrap();
952        assert_eq!(data.get("key").unwrap().as_str(), Some("value"));
953    }
954
955    #[test]
956    fn test_unknown_directive_consumes_same_line_argument() {
957        // Same-line word argument: consumed, not misparsed as a key
958        let data = parse("@custom foo\nkey: value").unwrap();
959        assert!(data.get("foo").is_none(), "foo should be consumed as directive arg, not a key");
960        assert_eq!(data.get("key").unwrap().as_str(), Some("value"));
961
962        // Same-line array argument
963        let data = parse("@custom [1, 2, 3]\nkey: value").unwrap();
964        assert_eq!(data.get("key").unwrap().as_str(), Some("value"));
965
966        // Same-line object argument
967        let data = parse("@custom {a: 1}\nkey: value").unwrap();
968        assert_eq!(data.get("key").unwrap().as_str(), Some("value"));
969
970        // No argument (directive alone on line)
971        let data = parse("@custom\nkey: value").unwrap();
972        assert_eq!(data.get("key").unwrap().as_str(), Some("value"));
973
974        // No argument (directive at end of file)
975        let data = parse("key: value\n@custom").unwrap();
976        assert_eq!(data.get("key").unwrap().as_str(), Some("value"));
977
978        // Argument on next line: NOT consumed
979        let data = parse("@custom\nfoo: bar").unwrap();
980        assert_eq!(data.get("foo").unwrap().as_str(), Some("bar"));
981    }
982
983    #[test]
984    fn test_unknown_directive_value_consumes_argument() {
985        // Spec §1.18: unknown directive in value position consumes argument, returns null
986        let data = parse("key: @unknown [1, 2, 3]\nother: 42").unwrap();
987        assert!(data.get("key").unwrap().is_null(), "unknown directive value should be null");
988        assert_eq!(data.get("other").unwrap().as_int(), Some(42), "next key should parse normally");
989
990        // With object argument
991        let data = parse("key: @unknown {a: 1}\nother: ok").unwrap();
992        assert!(data.get("key").unwrap().is_null());
993        assert_eq!(data.get("other").unwrap().as_str(), Some("ok"));
994
995        // With simple argument
996        let data = parse("key: @unknown 42\nother: ok").unwrap();
997        assert!(data.get("key").unwrap().is_null());
998        assert_eq!(data.get("other").unwrap().as_str(), Some("ok"));
999
1000        // Without argument (just the directive)
1001        let data = parse("arr: [@unknown, 1, 2]").unwrap();
1002        let arr = data.get("arr").unwrap().as_array().unwrap();
1003        assert!(arr[0].is_null());
1004        assert_eq!(arr[1].as_int(), Some(1));
1005    }
1006
1007    // -------------------------------------------------------------------------
1008    // Object with ref keys
1009    // -------------------------------------------------------------------------
1010
1011    #[test]
1012    fn test_object_with_ref_key() {
1013        let data = parse("obj: {!base: 1, key: 2}").unwrap();
1014        let obj = data.get("obj").unwrap().as_object().unwrap();
1015        assert!(obj.contains_key("!base"));
1016        assert_eq!(obj.get("!base").unwrap().as_int(), Some(1));
1017        assert_eq!(obj.get("key").unwrap().as_int(), Some(2));
1018    }
1019
1020    // -------------------------------------------------------------------------
1021    // Nested struct in table
1022    // -------------------------------------------------------------------------
1023
1024    #[test]
1025    fn test_nested_struct_in_table() {
1026        let input = r#"
1027            @struct addr (city: string, zip: string)
1028            @struct person (name: string, home: addr)
1029            people: @table person [
1030                (alice, (Boston, "02101")),
1031                (bob, (NYC, "10001")),
1032            ]
1033        "#;
1034        let tokens = Lexer::new(input).tokenize().unwrap();
1035        let mut parser = Parser::new(tokens);
1036        let data = parser.parse().unwrap();
1037
1038        let people = data.get("people").unwrap().as_array().unwrap();
1039        let alice_home = people[0].as_object().unwrap().get("home").unwrap().as_object().unwrap();
1040        assert_eq!(alice_home.get("city").unwrap().as_str(), Some("Boston"));
1041    }
1042
1043    #[test]
1044    fn test_include_cycle_detection() {
1045        // Create a file that includes itself
1046        let dir = std::env::temp_dir();
1047        let file_path = dir.join("test_cycle_self.tl");
1048        std::fs::write(&file_path, "@include \"test_cycle_self.tl\"\nval: 1").unwrap();
1049
1050        let content = std::fs::read_to_string(&file_path).unwrap();
1051        let tokens = Lexer::new(&content).tokenize().unwrap();
1052        let mut parser = Parser::new(tokens).with_base_path(&file_path);
1053        let result = parser.parse();
1054        assert!(result.is_err(), "Should detect self-referencing include");
1055        let err_msg = result.unwrap_err().to_string();
1056        assert!(err_msg.contains("Circular include"), "Error should mention circular include: {}", err_msg);
1057
1058        std::fs::remove_file(&file_path).ok();
1059    }
1060
1061    #[test]
1062    fn test_include_mutual_cycle_detection() {
1063        // Create two files that include each other: A -> B -> A
1064        let dir = std::env::temp_dir();
1065        let file_a = dir.join("test_cycle_a.tl");
1066        let file_b = dir.join("test_cycle_b.tl");
1067        std::fs::write(&file_a, "@include \"test_cycle_b.tl\"\na_val: 1").unwrap();
1068        std::fs::write(&file_b, "@include \"test_cycle_a.tl\"\nb_val: 2").unwrap();
1069
1070        let content = std::fs::read_to_string(&file_a).unwrap();
1071        let tokens = Lexer::new(&content).tokenize().unwrap();
1072        let mut parser = Parser::new(tokens).with_base_path(&file_a);
1073        let result = parser.parse();
1074        assert!(result.is_err(), "Should detect mutual cycle between A and B");
1075        let err_msg = result.unwrap_err().to_string();
1076        assert!(err_msg.contains("Circular include"), "Error should mention circular include: {}", err_msg);
1077
1078        std::fs::remove_file(&file_a).ok();
1079        std::fs::remove_file(&file_b).ok();
1080    }
1081
1082    #[test]
1083    fn test_include_stack_propagated_to_child() {
1084        // Verify that the include_stack starts empty
1085        let parser = Parser::new(vec![]);
1086        assert!(parser.include_stack.is_empty(), "New parser should have empty include stack");
1087    }
1088
1089    // -------------------------------------------------------------------------
1090    // Bytes literal parsing
1091    // -------------------------------------------------------------------------
1092
1093    #[test]
1094    fn test_bytes_literal_value() {
1095        let data = parse(r#"payload: b"cafef00d""#).unwrap();
1096        let val = data.get("payload").unwrap();
1097        assert_eq!(val.as_bytes(), Some(&[0xca, 0xfe, 0xf0, 0x0d][..]));
1098    }
1099
1100    #[test]
1101    fn test_bytes_literal_empty_value() {
1102        let data = parse(r#"empty: b"""#).unwrap();
1103        let val = data.get("empty").unwrap();
1104        assert_eq!(val.as_bytes(), Some(&[][..]));
1105    }
1106
1107    #[test]
1108    fn test_bytes_literal_in_array() {
1109        let data = parse(r#"arr: [b"cafe", b"babe"]"#).unwrap();
1110        let arr = data.get("arr").unwrap().as_array().unwrap();
1111        assert_eq!(arr[0].as_bytes(), Some(&[0xca, 0xfe][..]));
1112        assert_eq!(arr[1].as_bytes(), Some(&[0xba, 0xbe][..]));
1113    }
1114
1115    #[test]
1116    fn test_bytes_literal_in_object() {
1117        let data = parse(r#"obj: {data: b"ff00"}"#).unwrap();
1118        let obj = data.get("obj").unwrap().as_object().unwrap();
1119        assert_eq!(obj.get("data").unwrap().as_bytes(), Some(&[0xff, 0x00][..]));
1120    }
1121
1122    // -------------------------------------------------------------------------
1123    // Fuzz regression tests (full TeaLeaf::parse path)
1124    // -------------------------------------------------------------------------
1125
1126    #[test]
1127    fn test_fuzz_deeply_nested_arrays_no_stack_overflow() {
1128        // Crafted input with 500 nested arrays — exceeds MAX_PARSE_DEPTH (256)
1129        let depth = 500;
1130        let input = format!("key: {}{}", "[".repeat(depth), "]".repeat(depth));
1131        let result = crate::TeaLeaf::parse(&input);
1132        match result {
1133            Err(e) => {
1134                let err = format!("{}", e);
1135                assert!(err.contains("nesting depth"), "Error should mention nesting depth: {}", err);
1136            }
1137            Ok(_) => panic!("Should fail with depth exceeded, not succeed"),
1138        }
1139    }
1140
1141    #[test]
1142    fn test_fuzz_deeply_nested_objects_no_stack_overflow() {
1143        // Crafted input with 500 nested objects
1144        let depth = 500;
1145        let mut input = String::from("key: ");
1146        for i in 0..depth {
1147            input.push_str(&format!("{{k{}: ", i));
1148        }
1149        input.push_str("1");
1150        for _ in 0..depth {
1151            input.push('}');
1152        }
1153        let result = crate::TeaLeaf::parse(&input);
1154        assert!(result.is_err(), "Should fail with depth exceeded, not stack overflow");
1155    }
1156
1157    #[test]
1158    fn test_fuzz_deeply_nested_tags_no_stack_overflow() {
1159        // Crafted input with 500 nested tags: :a :b :c ... value
1160        let depth = 500;
1161        let mut input = String::from("key: ");
1162        for i in 0..depth {
1163            input.push_str(&format!(":t{} ", i));
1164        }
1165        input.push_str("42");
1166        let result = crate::TeaLeaf::parse(&input);
1167        assert!(result.is_err(), "Should fail with depth exceeded, not stack overflow");
1168    }
1169
1170    #[test]
1171    fn test_parse_depth_256_succeeds() {
1172        // 200 levels of nesting should succeed (within MAX_PARSE_DEPTH=256)
1173        let depth = 200;
1174        let input = format!("key: {}1{}", "[".repeat(depth), "]".repeat(depth));
1175        let result = crate::TeaLeaf::parse(&input);
1176        if let Err(e) = &result {
1177            panic!("200 levels of nesting should be fine: {}", e);
1178        }
1179    }
1180
1181    #[test]
1182    fn test_fuzz_crash_e42e_full_parse_no_panic() {
1183        // Regression: fuzz_parse crash-e42e7ae2f5127519e7e60e87d1cbfbc2a5bf878d
1184        // Must not panic through TeaLeaf::parse (the actual fuzz path)
1185        let input = "\"0B\u{10}\u{3}#\"0BP\u{07FE}-----\u{061D}\u{07FE}\u{07FE}-----\u{061D}\u{3}#\"0B\u{10}\u{3}#\"0BP\u{07FE}-----\u{061D}\u{07FE}\u{07FE}-----\u{061D}\u{07FE}";
1186        let _ = crate::TeaLeaf::parse(input);
1187    }
1188
1189    #[test]
1190    fn test_fuzz_crash_d038_full_parse_no_panic() {
1191        // Regression: fuzz_parse crash-d0387cbd639a8db9789ab68057f3c58c7bebbfa5
1192        // Large input with repeated date-like patterns. Must not panic.
1193        let input = "z\" \"-\"\t; \"\"\")\"\"\" 8] ] 02)3313312)313-333-333-3332)33-133-3-33331333302)33";
1194        let _ = crate::TeaLeaf::parse(input);
1195    }
1196
1197    #[test]
1198    fn test_reject_value_only_schema_field_types() {
1199        // Spec §2.1: object, map, tuple, ref, tagged are value types, not schema field types
1200        for bad_type in &["object", "map", "tuple", "ref", "tagged"] {
1201            let input = format!("@struct Bad (field: {})\n", bad_type);
1202            let result = crate::TeaLeaf::parse(&input);
1203            assert!(result.is_err(), "should reject '{}' as schema field type", bad_type);
1204            let err = format!("{}", result.err().unwrap());
1205            assert!(err.contains("value type"), "error for '{}' should mention 'value type': {}", bad_type, err);
1206        }
1207        // Array of value-only type should also be rejected
1208        let result = crate::TeaLeaf::parse("@struct Bad (field: []object)\n");
1209        assert!(result.is_err(), "should reject '[]object' as schema field type");
1210
1211        // Valid types should still work
1212        for good_type in &["string", "int", "int8", "float", "bool", "bytes", "timestamp", "MyStruct"] {
1213            let input = format!("@struct Good (field: {})\n", good_type);
1214            assert!(crate::TeaLeaf::parse(&input).is_ok(), "'{}' should be accepted", good_type);
1215        }
1216    }
1217}