Skip to main content

tealeaf/
lib.rs

1//! TeaLeaf - Schema-aware data format
2//!
3//! # Example
4//!
5//! ```rust
6//! use tealeaf::{TeaLeaf, Value};
7//!
8//! let doc = TeaLeaf::parse(r#"
9//!     @struct user (id: int, name: string)
10//!     users: @table user [
11//!         (1, alice),
12//!         (2, bob),
13//!     ]
14//! "#).unwrap();
15//!
16//! let users = doc.get("users").unwrap();
17//! ```
18
19mod types;
20mod lexer;
21mod parser;
22mod writer;
23mod reader;
24pub mod convert;
25pub mod builder;
26
27pub use types::{Error, Result, TLType, FieldType, Field, Schema, Union, Variant, Value, ObjectMap, MAGIC, VERSION, VERSION_MAJOR, VERSION_MINOR, HEADER_SIZE, MAX_STRING_LENGTH, MAX_OBJECT_FIELDS, MAX_ARRAY_LENGTH};
28pub use indexmap::IndexMap;
29pub use lexer::{Lexer, Token, TokenKind};
30pub use parser::Parser;
31pub use writer::Writer;
32pub use reader::Reader;
33pub use convert::{ToTeaLeaf, FromTeaLeaf, ConvertError, ToTeaLeafExt};
34pub use builder::TeaLeafBuilder;
35
36// Re-export derive macros when the "derive" feature is enabled
37#[cfg(feature = "derive")]
38pub use tealeaf_derive::{ToTeaLeaf, FromTeaLeaf};
39
40use std::collections::HashSet;
41use std::path::Path;
42
43/// A parsed TeaLeaf document
44pub struct TeaLeaf {
45    pub schemas: IndexMap<String, Schema>,
46    pub unions: IndexMap<String, Union>,
47    pub data: IndexMap<String, Value>,
48    /// Tracks if the source JSON was a root-level array (for round-trip fidelity)
49    is_root_array: bool,
50}
51
52impl TeaLeaf {
53    /// Create a new TeaLeaf document from data and schemas.
54    ///
55    /// This constructor is primarily for programmatic document creation.
56    /// For parsing from formats, use `parse()`, `load()`, or `from_json()`.
57    pub fn new(schemas: IndexMap<String, Schema>, data: IndexMap<String, Value>) -> Self {
58        Self {
59            schemas,
60            unions: IndexMap::new(),
61            data,
62            is_root_array: false,
63        }
64    }
65
66    /// Parse TeaLeaf text format
67    pub fn parse(input: &str) -> Result<Self> {
68        let tokens = Lexer::new(input).tokenize()?;
69        let mut parser = Parser::new(tokens);
70        let data = parser.parse()?;
71        let is_root_array = parser.is_root_array();
72        let (schemas, unions) = parser.into_schemas_and_unions();
73        Ok(Self {
74            schemas,
75            unions,
76            data,
77            is_root_array,
78        })
79    }
80
81    /// Load from text file
82    ///
83    /// Include paths are resolved relative to the loaded file's directory.
84    pub fn load<P: AsRef<Path>>(path: P) -> Result<Self> {
85        let path = path.as_ref();
86        let content = std::fs::read_to_string(path)?;
87        let tokens = Lexer::new(&content).tokenize()?;
88        let mut parser = Parser::new(tokens).with_base_path(path);
89        let data = parser.parse()?;
90        let is_root_array = parser.is_root_array();
91        let (schemas, unions) = parser.into_schemas_and_unions();
92        Ok(Self {
93            schemas,
94            unions,
95            data,
96            is_root_array,
97        })
98    }
99
100    /// Get a value by key
101    pub fn get(&self, key: &str) -> Option<&Value> {
102        self.data.get(key)
103    }
104
105    /// Get a schema by name
106    pub fn schema(&self, name: &str) -> Option<&Schema> {
107        self.schemas.get(name)
108    }
109
110    /// Get a union by name
111    pub fn union(&self, name: &str) -> Option<&Union> {
112        self.unions.get(name)
113    }
114
115    /// Compile to binary format
116    pub fn compile<P: AsRef<Path>>(&self, path: P, compress: bool) -> Result<()> {
117        let mut writer = Writer::new();
118        writer.set_root_array(self.is_root_array);
119        for (_, schema) in &self.schemas {
120            writer.add_schema(schema.clone());
121        }
122        for (_, union_def) in &self.unions {
123            writer.add_union(union_def.clone());
124        }
125        for (key, value) in &self.data {
126            let schema = self.find_schema_for_value(value, key);
127            writer.add_section(key, value, schema)?;
128        }
129        writer.write(path, compress)
130    }
131
132    fn find_schema_for_value(&self, value: &Value, key: &str) -> Option<&Schema> {
133        // Try to find a matching schema for array values
134        if let Value::Array(arr) = value {
135            if arr.is_empty() {
136                // For empty arrays, try name-based matching (singularize key → schema name)
137                let singular = singularize(key);
138                return self.schemas.values().find(|s| s.name.eq_ignore_ascii_case(&singular));
139            }
140
141            // Sample multiple elements: first, middle, last
142            let sample_indices: Vec<usize> = {
143                let mut indices = vec![0];
144                if arr.len() > 2 { indices.push(arr.len() / 2); }
145                if arr.len() > 1 { indices.push(arr.len() - 1); }
146                indices
147            };
148
149            for schema in self.schemas.values() {
150                let all_match = sample_indices.iter().all(|&i| {
151                    if let Some(Value::Object(obj)) = arr.get(i) {
152                        // All required (non-nullable) schema fields must be present
153                        schema.fields.iter().all(|f| {
154                            f.field_type.nullable || obj.contains_key(&f.name)
155                        })
156                        // All obj keys must be schema fields (no extra keys)
157                        && obj.keys().all(|k| schema.fields.iter().any(|f| f.name == *k))
158                    } else {
159                        false
160                    }
161                });
162                if all_match {
163                    return Some(schema);
164                }
165            }
166        }
167        None
168    }
169
170    /// Parse from JSON string.
171    ///
172    /// # Stability Policy
173    ///
174    /// This function follows a **"plain JSON only"** policy:
175    /// - JSON is parsed as-is with **no magic conversion**
176    /// - `{"$ref": "x"}` stays as an Object, NOT a Ref
177    /// - `{"$tag": "ok", "$value": 200}` stays as an Object, NOT a Tagged
178    /// - `"0xcafef00d"` stays as a String, NOT Bytes
179    /// - `"2024-01-15T10:30:00Z"` stays as a String, NOT a Timestamp
180    /// - `[[1, "one"], [2, "two"]]` stays as an Array, NOT a Map
181    ///
182    /// To create special TeaLeaf types, use the text format or binary API directly.
183    ///
184    /// # Number Type Inference
185    ///
186    /// - Integers that fit `i64` → `Value::Int`
187    /// - Large positive integers that fit `u64` → `Value::UInt`
188    /// - Numbers with decimals or scientific notation → `Value::Float`
189    pub fn from_json(json: &str) -> Result<Self> {
190        let json_value: serde_json::Value = serde_json::from_str(json)
191            .map_err(|e| Error::ParseError(format!("Invalid JSON: {}", e)))?;
192
193        let (data, is_root_array) = match json_value {
194            serde_json::Value::Object(obj) => {
195                let map = obj.into_iter()
196                    .map(|(k, v)| (k, json_to_tealeaf_value(v)))
197                    .collect();
198                (map, false)
199            }
200            serde_json::Value::Array(_) => {
201                // Root-level array: store under "root" key but track for round-trip
202                let mut map = IndexMap::new();
203                map.insert("root".to_string(), json_to_tealeaf_value(json_value));
204                (map, true)
205            }
206            _ => {
207                // Other primitives (string, number, bool, null) at root
208                let mut map = IndexMap::new();
209                map.insert("root".to_string(), json_to_tealeaf_value(json_value));
210                (map, false)
211            }
212        };
213
214        Ok(Self {
215            schemas: IndexMap::new(),
216            unions: IndexMap::new(),
217            data,
218            is_root_array,
219        })
220    }
221
222    /// Parse from JSON string with automatic schema inference.
223    ///
224    /// This variant analyzes the JSON structure and automatically:
225    /// - Detects arrays of uniformly-structured objects
226    /// - Infers schema names from parent keys (e.g., "products" → "product")
227    /// - Generates `@struct` definitions for uniform arrays
228    /// - Enables `@table` format output when serialized
229    ///
230    /// Use `to_tl_with_schemas()` to serialize with the inferred schemas.
231    pub fn from_json_with_schemas(json: &str) -> Result<Self> {
232        let doc = Self::from_json(json)?;
233
234        let mut inferrer = SchemaInferrer::new();
235        inferrer.infer(&doc.data);
236        let (schemas, _) = inferrer.into_schemas();
237
238        Ok(Self {
239            schemas,
240            unions: IndexMap::new(),
241            data: doc.data,
242            is_root_array: doc.is_root_array,
243        })
244    }
245
246    /// Serialize to TeaLeaf text format with schemas.
247    ///
248    /// If schemas are present (either from parsing or inference), outputs
249    /// `@struct` definitions and uses `@table` format for matching arrays.
250    ///
251    /// If this document represents a root-level JSON array (from `from_json`),
252    /// the output will include `@root-array` directive for round-trip fidelity.
253    pub fn to_tl_with_schemas(&self) -> String {
254        let mut output = String::new();
255
256        // Emit @root-array directive if this represents a root-level array
257        if self.is_root_array {
258            output.push_str("@root-array\n\n");
259        }
260
261        if self.schemas.is_empty() && self.unions.is_empty() {
262            output.push_str(&dumps(&self.data));
263        } else {
264            // Preserve insertion order from schemas/unions
265            let schema_order: Vec<String> = self.schemas.keys().cloned().collect();
266            let union_order: Vec<String> = self.unions.keys().cloned().collect();
267            output.push_str(&dumps_with_schemas(
268                &self.data, &self.schemas, &schema_order,
269                &self.unions, &union_order,
270            ));
271        }
272
273        output
274    }
275
276    /// Convert to JSON string (pretty-printed).
277    ///
278    /// # Stability Policy - TeaLeaf→JSON Fixed Representations
279    ///
280    /// Special TeaLeaf types serialize to JSON with these **stable formats**:
281    ///
282    /// | TeaLeaf Type | JSON Format                                    |
283    /// |------------|------------------------------------------------|
284    /// | Bytes      | `"0xcafef00d"` (lowercase hex with 0x prefix) |
285    /// | Timestamp  | `"2024-01-15T10:30:00.123Z"` (ISO 8601 UTC)   |
286    /// | Ref        | `{"$ref": "key_name"}`                         |
287    /// | Tagged     | `{"$tag": "tag_name", "$value": <value>}`     |
288    /// | Map        | `[[key1, val1], [key2, val2], ...]`           |
289    /// | Float NaN  | `null` (JSON has no NaN)                       |
290    /// | Float ±Inf | `null` (JSON has no Infinity)                  |
291    ///
292    /// These representations are **contractually stable** and will not change.
293    pub fn to_json(&self) -> Result<String> {
294        // If the source was a root-level array, return it directly (not wrapped in object)
295        if self.is_root_array {
296            if let Some(root_value) = self.data.get("root") {
297                return serde_json::to_string_pretty(&tealeaf_to_json_value(root_value))
298                    .map_err(|e| Error::ParseError(format!("JSON serialization failed: {}", e)));
299            }
300        }
301
302        let json_obj: serde_json::Map<String, serde_json::Value> = self.data
303            .iter()
304            .map(|(k, v)| (k.clone(), tealeaf_to_json_value(v)))
305            .collect();
306
307        serde_json::to_string_pretty(&serde_json::Value::Object(json_obj))
308            .map_err(|e| Error::ParseError(format!("JSON serialization failed: {}", e)))
309    }
310
311    /// Convert to compact JSON string (no pretty printing)
312    pub fn to_json_compact(&self) -> Result<String> {
313        // If the source was a root-level array, return it directly (not wrapped in object)
314        if self.is_root_array {
315            if let Some(root_value) = self.data.get("root") {
316                return serde_json::to_string(&tealeaf_to_json_value(root_value))
317                    .map_err(|e| Error::ParseError(format!("JSON serialization failed: {}", e)));
318            }
319        }
320
321        let json_obj: serde_json::Map<String, serde_json::Value> = self.data
322            .iter()
323            .map(|(k, v)| (k.clone(), tealeaf_to_json_value(v)))
324            .collect();
325
326        serde_json::to_string(&serde_json::Value::Object(json_obj))
327            .map_err(|e| Error::ParseError(format!("JSON serialization failed: {}", e)))
328    }
329
330    /// Set whether the document represents a root-level array.
331    pub fn set_root_array(&mut self, is_root_array: bool) {
332        self.is_root_array = is_root_array;
333    }
334
335    /// Create a TeaLeaf document from a binary Reader.
336    ///
337    /// Reads all sections from the reader and carries schemas and unions through.
338    pub fn from_reader(reader: &Reader) -> Result<Self> {
339        let mut data = IndexMap::new();
340        for key in reader.keys() {
341            data.insert(key.to_string(), reader.get(key)?);
342        }
343        let schemas: IndexMap<String, Schema> = reader.schemas.iter()
344            .map(|s| (s.name.clone(), s.clone()))
345            .collect();
346        let unions: IndexMap<String, Union> = reader.unions.iter()
347            .map(|u| (u.name.clone(), u.clone()))
348            .collect();
349        let mut doc = Self {
350            schemas,
351            unions,
352            data,
353            is_root_array: reader.is_root_array(),
354        };
355        doc.set_root_array(reader.is_root_array());
356        Ok(doc)
357    }
358
359    /// Create a TeaLeaf document from a single DTO.
360    ///
361    /// The DTO is placed under the given `key` in the document data map.
362    /// Schemas are automatically collected from the DTO type.
363    pub fn from_dto<T: convert::ToTeaLeaf>(key: &str, dto: &T) -> Self {
364        let schemas = T::collect_schemas();
365        let unions = T::collect_unions();
366        let mut data = IndexMap::new();
367        data.insert(key.to_string(), dto.to_tealeaf_value());
368        let mut doc = Self::new(schemas, data);
369        doc.unions = unions;
370        doc
371    }
372
373    /// Create a TeaLeaf document from a slice of DTOs.
374    ///
375    /// The array is placed under the given `key` and schemas are
376    /// collected from the element type.
377    pub fn from_dto_array<T: convert::ToTeaLeaf>(key: &str, items: &[T]) -> Self {
378        let schemas = T::collect_schemas();
379        let unions = T::collect_unions();
380        let mut data = IndexMap::new();
381        let arr = Value::Array(items.iter().map(|i| i.to_tealeaf_value()).collect());
382        data.insert(key.to_string(), arr);
383        let mut doc = Self::new(schemas, data);
384        doc.unions = unions;
385        doc
386    }
387
388    /// Extract a DTO from this document by key.
389    pub fn to_dto<T: convert::FromTeaLeaf>(&self, key: &str) -> Result<T> {
390        let value = self
391            .get(key)
392            .ok_or_else(|| Error::MissingField(key.to_string()))?;
393        T::from_tealeaf_value(value).map_err(|e| e.into())
394    }
395
396    /// Extract all values under a key as `Vec<T>`.
397    pub fn to_dto_vec<T: convert::FromTeaLeaf>(&self, key: &str) -> Result<Vec<T>> {
398        let value = self
399            .get(key)
400            .ok_or_else(|| Error::MissingField(key.to_string()))?;
401        let arr = value
402            .as_array()
403            .ok_or_else(|| Error::ParseError("Expected array".into()))?;
404        arr.iter()
405            .map(|v| T::from_tealeaf_value(v).map_err(|e| e.into()))
406            .collect()
407    }
408}
409
410/// Convert JSON value to TeaLeaf value (best-effort)
411fn json_to_tealeaf_value(json: serde_json::Value) -> Value {
412    match json {
413        serde_json::Value::Null => Value::Null,
414        serde_json::Value::Bool(b) => Value::Bool(b),
415        serde_json::Value::Number(n) => {
416            if let Some(i) = n.as_i64() {
417                Value::Int(i)
418            } else if let Some(u) = n.as_u64() {
419                Value::UInt(u)
420            } else {
421                let raw = n.to_string();
422                // Pure integer that doesn't fit i64/u64 → preserve exactly
423                if !raw.contains('.') && !raw.contains('e') && !raw.contains('E') {
424                    Value::JsonNumber(raw)
425                } else {
426                    match n.as_f64() {
427                        Some(f) if f.is_finite() => Value::Float(f),
428                        _ => Value::JsonNumber(raw),
429                    }
430                }
431            }
432        }
433        serde_json::Value::String(s) => Value::String(s),
434        serde_json::Value::Array(arr) => {
435            Value::Array(arr.into_iter().map(json_to_tealeaf_value).collect())
436        }
437        serde_json::Value::Object(obj) => {
438            Value::Object(
439                obj.into_iter()
440                    .map(|(k, v)| (k, json_to_tealeaf_value(v)))
441                    .collect()
442            )
443        }
444    }
445}
446
447/// Convert TeaLeaf value to JSON value
448///
449/// Type preservation:
450/// - Value::Int → JSON integer (e.g., 42)
451/// - Value::UInt → JSON integer (e.g., 18446744073709551615)
452/// - Value::Float → JSON float (e.g., 42.0)
453///
454/// Integer types are tried first during JSON import (i64, then u64) so that
455/// values within 64-bit range stay exact. Only true floats fall through to f64.
456fn tealeaf_to_json_value(tl: &Value) -> serde_json::Value {
457    match tl {
458        Value::Null => serde_json::Value::Null,
459        Value::Bool(b) => serde_json::Value::Bool(*b),
460        Value::Int(i) => serde_json::Value::Number((*i).into()),
461        Value::UInt(u) => serde_json::Value::Number((*u).into()),
462        Value::Float(f) => {
463            // Always output floats as floats - the type distinction is intentional
464            serde_json::Number::from_f64(*f)
465                .map(serde_json::Value::Number)
466                .unwrap_or(serde_json::Value::Null)
467        }
468        Value::String(s) => serde_json::Value::String(s.clone()),
469        Value::Bytes(b) => {
470            // Encode bytes as hex string with 0x prefix
471            let hex: String = b.iter().map(|byte| format!("{:02x}", byte)).collect();
472            serde_json::Value::String(format!("0x{}", hex))
473        }
474        Value::Array(arr) => {
475            serde_json::Value::Array(arr.iter().map(tealeaf_to_json_value).collect())
476        }
477        Value::Object(obj) => {
478            let map: serde_json::Map<String, serde_json::Value> = obj
479                .iter()
480                .map(|(k, v)| (k.clone(), tealeaf_to_json_value(v)))
481                .collect();
482            serde_json::Value::Object(map)
483        }
484        Value::Map(pairs) => {
485            // Convert map to array of [key, value] pairs
486            let arr: Vec<serde_json::Value> = pairs
487                .iter()
488                .map(|(k, v)| {
489                    serde_json::Value::Array(vec![
490                        tealeaf_to_json_value(k),
491                        tealeaf_to_json_value(v),
492                    ])
493                })
494                .collect();
495            serde_json::Value::Array(arr)
496        }
497        Value::Ref(r) => {
498            // Encode ref as object with special key
499            let mut obj = serde_json::Map::new();
500            obj.insert("$ref".to_string(), serde_json::Value::String(r.clone()));
501            serde_json::Value::Object(obj)
502        }
503        Value::Tagged(tag, inner) => {
504            // Encode tagged value as object
505            let mut obj = serde_json::Map::new();
506            obj.insert("$tag".to_string(), serde_json::Value::String(tag.clone()));
507            obj.insert("$value".to_string(), tealeaf_to_json_value(inner));
508            serde_json::Value::Object(obj)
509        }
510        Value::Timestamp(ts, tz) => {
511            serde_json::Value::String(format_timestamp_millis(*ts, *tz))
512        }
513        Value::JsonNumber(s) => {
514            s.parse::<serde_json::Number>()
515                .map(serde_json::Value::Number)
516                .unwrap_or_else(|_| serde_json::Value::String(s.clone()))
517        }
518    }
519}
520
521/// Read a binary TeaLeaf file
522pub fn open<P: AsRef<Path>>(path: P) -> Result<Reader> {
523    Reader::open(path)
524}
525
526/// Parse TeaLeaf text
527pub fn parse(input: &str) -> Result<TeaLeaf> {
528    TeaLeaf::parse(input)
529}
530
531/// Convenience: load and get data
532pub fn loads(input: &str) -> Result<IndexMap<String, Value>> {
533    Ok(TeaLeaf::parse(input)?.data)
534}
535
536/// Convenience: serialize to TeaLeaf text
537/// Check if a string needs quoting when serialized to TeaLeaf format.
538/// Returns true if the string could be misinterpreted as another type.
539fn needs_quoting(s: &str) -> bool {
540    if s.is_empty() {
541        return true;
542    }
543
544    // Reserved words, null literal, and float literals the lexer would interpret
545    if matches!(s, "true" | "false" | "null" | "~" | "NaN" | "inf" | "Infinity") {
546        return true;
547    }
548
549    // Whitelist approach: only allow [a-zA-Z0-9_-.] unquoted (ASCII only).
550    // Matches spec grammar: name = (letter | "_") { letter | digit | "_" | "-" | "." }
551    // Any other character (Unicode digits, whitespace, punctuation, etc.)
552    // requires quoting to ensure safe round-trip through the parser.
553    // Note: '-' is excluded here because strings starting with '-' are caught
554    // by the sign-character check below, and mid-string '-' in identifiers
555    // like "foo-bar" is safe only when the first char is a letter.
556    if s.contains(|c: char| !c.is_ascii_alphanumeric() && c != '_' && c != '-' && c != '.') {
557        return true;
558    }
559
560    // Must start with letter or underscore per grammar: name = (letter | "_") { ... }
561    let first = s.chars().next().unwrap();
562    if !first.is_ascii_alphabetic() && first != '_' {
563        return true;
564    }
565
566    // Starts with 0x/0b (hex/binary literal prefix)
567    if s.starts_with("0x") || s.starts_with("0X") || s.starts_with("0b") || s.starts_with("0B") {
568        return true;
569    }
570
571    // Starts with sign character — always quote to avoid parser ambiguity
572    // (parser may try to interpret as a signed number).
573    if s.starts_with('-') || s.starts_with('+') {
574        return true;
575    }
576
577    // Starts with a digit — could be parsed as a number
578    if first.is_ascii_digit() {
579        return true;
580    }
581
582    false
583}
584
585/// Write a key to the output, quoting if necessary for safe round-trip.
586fn write_key(out: &mut String, key: &str) {
587    if needs_quoting(key) {
588        out.push('"');
589        out.push_str(&escape_string(key));
590        out.push('"');
591    } else {
592        out.push_str(key);
593    }
594}
595
596/// Write a map key per spec grammar: `map_key = string | name | integer`.
597/// Int/UInt are written as-is. String values use `write_key` for quoting.
598/// Other value types (Null, Bool, Float, etc.) are coerced to quoted strings
599/// so that the text format always round-trips through the parser.
600fn write_map_key(out: &mut String, key: &Value) {
601    match key {
602        Value::Int(i) => out.push_str(&i.to_string()),
603        Value::UInt(u) => out.push_str(&u.to_string()),
604        Value::String(s) => write_key(out, s),
605        // Coerce non-spec key types to quoted strings for text format safety
606        Value::Null => out.push_str("\"~\""),
607        Value::Bool(b) => { out.push('"'); out.push_str(if *b { "true" } else { "false" }); out.push('"'); }
608        Value::Float(f) => { out.push('"'); out.push_str(&f.to_string()); out.push('"'); }
609        Value::JsonNumber(s) => { out.push('"'); out.push_str(s); out.push('"'); }
610        Value::Timestamp(ts, tz) => { out.push('"'); out.push_str(&format_timestamp_millis(*ts, *tz)); out.push('"'); }
611        Value::Bytes(b) => {
612            out.push_str("\"0x");
613            for byte in b { out.push_str(&format!("{:02x}", byte)); }
614            out.push('"');
615        }
616        Value::Ref(r) => { out.push('"'); out.push('!'); out.push_str(r); out.push('"'); }
617        Value::Tagged(tag, _) => { out.push('"'); out.push(':'); out.push_str(tag); out.push('"'); }
618        Value::Array(_) | Value::Object(_) | Value::Map(_) => out.push_str("\"\""),
619    }
620}
621
622pub fn dumps(data: &IndexMap<String, Value>) -> String {
623    let mut out = String::new();
624    for (key, value) in data {
625        write_key(&mut out, key);
626        out.push_str(": ");
627        write_value(&mut out, value, 0);
628        out.push('\n');
629    }
630    out
631}
632
633/// Escape a string for TeaLeaf text output.
634/// Handles: \\ \" \n \t \r \b \f and \uXXXX for other control characters.
635fn escape_string(s: &str) -> String {
636    let mut out = String::with_capacity(s.len());
637    for c in s.chars() {
638        match c {
639            '\\' => out.push_str("\\\\"),
640            '"' => out.push_str("\\\""),
641            '\n' => out.push_str("\\n"),
642            '\t' => out.push_str("\\t"),
643            '\r' => out.push_str("\\r"),
644            '\u{0008}' => out.push_str("\\b"),
645            '\u{000C}' => out.push_str("\\f"),
646            c if c.is_control() => {
647                // Other control characters use \uXXXX
648                for unit in c.encode_utf16(&mut [0u16; 2]) {
649                    out.push_str(&format!("\\u{:04x}", unit));
650                }
651            }
652            _ => out.push(c),
653        }
654    }
655    out
656}
657
658/// Format a float ensuring it always has a decimal point or uses scientific notation.
659/// Rust's f64::to_string() expands large/small values (e.g., 6.022e23 becomes
660/// "602200000000000000000000"), which would be reparsed as an integer and overflow.
661/// We use scientific notation for values outside a safe range.
662fn format_float(f: f64) -> String {
663    // Handle non-finite values with keywords the lexer recognizes
664    if f.is_nan() {
665        return "NaN".to_string();
666    }
667    if f.is_infinite() {
668        return if f.is_sign_positive() { "inf".to_string() } else { "-inf".to_string() };
669    }
670
671    let s = f.to_string();
672    if s.contains('.') || s.contains('e') || s.contains('E') {
673        // Already has decimal point or scientific notation — safe as-is
674        s
675    } else {
676        // to_string() produced an integer-looking string (no '.' or 'e').
677        // For large values, use scientific notation to avoid i64 overflow on re-parse.
678        // For small values, just append ".0".
679        let digits = s.trim_start_matches('-').len();
680        if digits > 15 {
681            format!("{:e}", f)
682        } else {
683            format!("{}.0", s)
684        }
685    }
686}
687
688fn write_value(out: &mut String, value: &Value, indent: usize) {
689    match value {
690        Value::Null => out.push('~'),
691        Value::Bool(b) => out.push_str(if *b { "true" } else { "false" }),
692        Value::Int(i) => out.push_str(&i.to_string()),
693        Value::UInt(u) => out.push_str(&u.to_string()),
694        Value::JsonNumber(s) => out.push_str(s),
695        Value::Float(f) => out.push_str(&format_float(*f)),
696        Value::String(s) => {
697            if needs_quoting(s) {
698                out.push('"');
699                out.push_str(&escape_string(s));
700                out.push('"');
701            } else {
702                out.push_str(s);
703            }
704        }
705        Value::Bytes(b) => {
706            out.push_str("b\"");
707            for byte in b {
708                out.push_str(&format!("{:02x}", byte));
709            }
710            out.push('"');
711        }
712        Value::Array(arr) => {
713            out.push('[');
714            for (i, v) in arr.iter().enumerate() {
715                if i > 0 { out.push_str(", "); }
716                write_value(out, v, indent);
717            }
718            out.push(']');
719        }
720        Value::Object(obj) => {
721            out.push('{');
722            for (i, (k, v)) in obj.iter().enumerate() {
723                if i > 0 { out.push_str(", "); }
724                write_key(out, k);
725                out.push_str(": ");
726                write_value(out, v, indent);
727            }
728            out.push('}');
729        }
730        Value::Map(pairs) => {
731            out.push_str("@map {");
732            let mut first = true;
733            for (k, v) in pairs {
734                if !first { out.push_str(", "); }
735                first = false;
736                // Map keys are restricted to string | name | integer per spec.
737                // Write Int/UInt directly; convert other types to quoted strings.
738                write_map_key(out, k);
739                out.push_str(": ");
740                write_value(out, v, indent);
741            }
742            out.push('}');
743        }
744        Value::Ref(r) => {
745            out.push('!');
746            out.push_str(r);
747        }
748        Value::Tagged(tag, inner) => {
749            out.push(':');
750            out.push_str(tag);
751            out.push(' ');
752            write_value(out, inner, indent);
753        }
754        Value::Timestamp(ts, tz) => {
755            out.push_str(&format_timestamp_millis(*ts, *tz));
756        }
757    }
758}
759
760/// Format a Unix-millis timestamp as an ISO 8601 string with timezone offset.
761/// Handles negative timestamps (pre-epoch dates) correctly using Euclidean division.
762/// Years outside [0000, 9999] are clamped to the boundary per spec (4-digit years only).
763/// When tz_offset_minutes is 0, emits 'Z' suffix. Otherwise emits +HH:MM or -HH:MM.
764fn format_timestamp_millis(ts: i64, tz_offset_minutes: i16) -> String {
765    // Clamp to representable ISO 8601 range (years 0000-9999).
766    // Year 0000-01-01T00:00:00Z = -62167219200000 ms
767    // Year 9999-12-31T23:59:59.999Z = 253402300799999 ms
768    const MIN_TS: i64 = -62_167_219_200_000;
769    const MAX_TS: i64 = 253_402_300_799_999;
770    let ts = ts.clamp(MIN_TS, MAX_TS);
771
772    // Apply timezone offset to get local time for display
773    let local_ts = ts + (tz_offset_minutes as i64) * 60_000;
774    let local_ts = local_ts.clamp(MIN_TS, MAX_TS);
775
776    let secs = local_ts.div_euclid(1000);
777    let millis = local_ts.rem_euclid(1000);
778    let days = secs.div_euclid(86400);
779    let time_secs = secs.rem_euclid(86400);
780    let hours = time_secs / 3600;
781    let mins = (time_secs % 3600) / 60;
782    let secs_rem = time_secs % 60;
783    let (year, month, day) = days_to_ymd(days);
784
785    let tz_suffix = if tz_offset_minutes == 0 {
786        "Z".to_string()
787    } else {
788        let sign = if tz_offset_minutes > 0 { '+' } else { '-' };
789        let abs = tz_offset_minutes.unsigned_abs();
790        format!("{}{:02}:{:02}", sign, abs / 60, abs % 60)
791    };
792
793    if millis > 0 {
794        format!("{:04}-{:02}-{:02}T{:02}:{:02}:{:02}.{:03}{}",
795            year, month, day, hours, mins, secs_rem, millis, tz_suffix)
796    } else {
797        format!("{:04}-{:02}-{:02}T{:02}:{:02}:{:02}{}",
798            year, month, day, hours, mins, secs_rem, tz_suffix)
799    }
800}
801
802/// Convert days since Unix epoch to (year, month, day)
803fn days_to_ymd(days: i64) -> (i64, u32, u32) {
804    // Algorithm from Howard Hinnant (extended to i64 for extreme timestamps)
805    let z = days + 719468;
806    let era = if z >= 0 { z } else { z - 146096 } / 146097;
807    let doe = (z - era * 146097) as u32;
808    let yoe = (doe - doe / 1460 + doe / 36524 - doe / 146096) / 365;
809    let y = yoe as i64 + era * 400;
810    let doy = doe - (365 * yoe + yoe / 4 - yoe / 100);
811    let mp = (5 * doy + 2) / 153;
812    let d = doy - (153 * mp + 2) / 5 + 1;
813    let m = if mp < 10 { mp + 3 } else { mp - 9 };
814    let y = if m <= 2 { y + 1 } else { y };
815    (y, m, d)
816}
817
818// =============================================================================
819// Schema Inference
820// =============================================================================
821
822/// Inferred type information for a field
823#[derive(Debug, Clone, PartialEq)]
824enum InferredType {
825    Null,
826    Bool,
827    Int,
828    Float,
829    String,
830    Array(Box<InferredType>),
831    Object(Vec<(String, InferredType)>),  // Ordered fields
832    Mixed,  // Different types seen - fall back to any
833}
834
835impl InferredType {
836    fn merge(&self, other: &InferredType) -> InferredType {
837        if self == other {
838            return self.clone();
839        }
840        match (self, other) {
841            (InferredType::Null, t) | (t, InferredType::Null) => {
842                // Null + T = T (nullable)
843                t.clone()
844            }
845            (InferredType::Int, InferredType::Float) | (InferredType::Float, InferredType::Int) => {
846                InferredType::Float
847            }
848            (InferredType::Array(a), InferredType::Array(b)) => {
849                InferredType::Array(Box::new(a.merge(b)))
850            }
851            (InferredType::Object(a), InferredType::Object(b)) => {
852                // Merge objects: keep fields present in both, track nullability
853                let mut merged = Vec::new();
854                let b_map: IndexMap<&str, &InferredType> = b.iter().map(|(k, v)| (k.as_str(), v)).collect();
855
856                for (key, a_type) in a {
857                    if let Some(b_type) = b_map.get(key.as_str()) {
858                        merged.push((key.clone(), a_type.merge(b_type)));
859                    }
860                    // Fields only in a are dropped (not uniform)
861                }
862
863                // Check if structures are compatible (same fields)
864                if merged.len() == a.len() && merged.len() == b.len() {
865                    InferredType::Object(merged)
866                } else {
867                    InferredType::Mixed
868                }
869            }
870            _ => InferredType::Mixed,
871        }
872    }
873
874    fn to_field_type(&self, schemas: &IndexMap<String, Schema>) -> FieldType {
875        match self {
876            InferredType::Null => FieldType::new("string").nullable(),  // Unknown type, default to string
877            InferredType::Bool => FieldType::new("bool"),
878            InferredType::Int => FieldType::new("int"),
879            InferredType::Float => FieldType::new("float"),
880            InferredType::String => FieldType::new("string"),
881            InferredType::Array(inner) => {
882                let inner_type = inner.to_field_type(schemas);
883                FieldType {
884                    base: inner_type.base,
885                    nullable: inner_type.nullable,
886                    is_array: true,
887                }
888            }
889            InferredType::Object(fields) => {
890                // Check if this matches an existing schema
891                for (name, schema) in schemas {
892                    if schema.fields.len() == fields.len() {
893                        let all_match = schema.fields.iter().all(|sf| {
894                            fields.iter().any(|(k, _)| k == &sf.name)
895                        });
896                        if all_match {
897                            return FieldType::new(name.clone());
898                        }
899                    }
900                }
901                // No matching schema — use "any" (not "object", which is a
902                // value-only type rejected by the parser in schema definitions)
903                FieldType::new("any")
904            }
905            InferredType::Mixed => FieldType::new("any"),
906        }
907    }
908}
909
910fn infer_type(value: &Value) -> InferredType {
911    match value {
912        Value::Null => InferredType::Null,
913        Value::Bool(_) => InferredType::Bool,
914        Value::Int(_) | Value::UInt(_) => InferredType::Int,
915        Value::Float(_) => InferredType::Float,
916        Value::String(_) => InferredType::String,
917        Value::Array(arr) => {
918            if arr.is_empty() {
919                InferredType::Array(Box::new(InferredType::Mixed))
920            } else {
921                let mut element_type = infer_type(&arr[0]);
922                for item in arr.iter().skip(1) {
923                    element_type = element_type.merge(&infer_type(item));
924                }
925                InferredType::Array(Box::new(element_type))
926            }
927        }
928        Value::Object(obj) => {
929            let fields: Vec<(String, InferredType)> = obj
930                .iter()
931                .map(|(k, v)| (k.clone(), infer_type(v)))
932                .collect();
933            InferredType::Object(fields)
934        }
935        _ => InferredType::Mixed,
936    }
937}
938
939/// Singularize a plural name (simple heuristic)
940fn singularize(name: &str) -> String {
941    let name = name.to_lowercase();
942    if name.ends_with("ies") {
943        format!("{}y", &name[..name.len()-3])
944    } else if name.ends_with("es") && (name.ends_with("sses") || name.ends_with("xes") || name.ends_with("ches") || name.ends_with("shes")) {
945        name[..name.len()-2].to_string()
946    } else if name.len() > 1 && name.ends_with('s') && !name.ends_with("ss") {
947        name[..name.len()-1].to_string()
948    } else {
949        name
950    }
951}
952
953/// Check if array elements are objects that match a schema's structure
954fn array_matches_schema(arr: &[Value], schema: &Schema) -> bool {
955    if arr.is_empty() {
956        return false;
957    }
958
959    // Check if first element is an object
960    let first = match &arr[0] {
961        Value::Object(obj) => obj,
962        _ => return false,
963    };
964
965    // Get schema field names
966    let schema_fields: HashSet<_> = schema.fields.iter().map(|f| f.name.as_str()).collect();
967
968    // Get object keys
969    let obj_keys: HashSet<_> = first.keys().map(|k| k.as_str()).collect();
970
971    // Check if there's significant overlap (at least 50% of schema fields present)
972    let overlap = schema_fields.intersection(&obj_keys).count();
973    let required_overlap = schema_fields.len() / 2;
974
975    overlap > required_overlap || overlap == schema_fields.len()
976}
977
978/// Schema inferrer that analyzes data and generates schemas
979pub struct SchemaInferrer {
980    schemas: IndexMap<String, Schema>,
981    schema_order: Vec<String>,  // Track order for output
982}
983
984impl SchemaInferrer {
985    pub fn new() -> Self {
986        Self {
987            schemas: IndexMap::new(),
988            schema_order: Vec::new(),
989        }
990    }
991
992    /// Analyze data and infer schemas from uniform object arrays
993    pub fn infer(&mut self, data: &IndexMap<String, Value>) {
994        for (key, value) in data {
995            self.analyze_value(key, value);
996        }
997    }
998
999    fn analyze_value(&mut self, hint_name: &str, value: &Value) {
1000        if let Value::Array(arr) = value {
1001            self.analyze_array(hint_name, arr);
1002        } else if let Value::Object(obj) = value {
1003            // Recursively analyze nested objects
1004            for (k, v) in obj {
1005                self.analyze_value(k, v);
1006            }
1007        }
1008    }
1009
1010    fn analyze_array(&mut self, hint_name: &str, arr: &[Value]) {
1011        if arr.is_empty() {
1012            return;
1013        }
1014
1015        // Check if all elements are objects with the same structure
1016        let first = match &arr[0] {
1017            Value::Object(obj) => obj,
1018            _ => return,
1019        };
1020
1021        // Collect field names from first object (preserving insertion order)
1022        let field_names: Vec<String> = first.keys().cloned().collect();
1023
1024        // Skip schema inference if fields are empty, or any field name is empty
1025        // or needs quoting — such names can't round-trip through @struct definitions.
1026        if field_names.is_empty()
1027            || field_names.iter().any(|n| n.is_empty() || needs_quoting(n))
1028        {
1029            return;
1030        }
1031
1032        let field_set: std::collections::BTreeSet<&str> = first.keys().map(|k| k.as_str()).collect();
1033
1034        // Verify all objects have the same fields
1035        for item in arr.iter().skip(1) {
1036            if let Value::Object(obj) = item {
1037                let item_set: std::collections::BTreeSet<&str> = obj.keys().map(|k| k.as_str()).collect();
1038                if item_set != field_set {
1039                    return;  // Not uniform
1040                }
1041            } else {
1042                return;  // Not all objects
1043            }
1044        }
1045
1046        // Infer types for each field across all objects
1047        let mut field_types: IndexMap<String, InferredType> = IndexMap::new();
1048        let mut has_null: IndexMap<String, bool> = IndexMap::new();
1049
1050        for item in arr {
1051            if let Value::Object(obj) = item {
1052                for (key, val) in obj {
1053                    let inferred = infer_type(val);
1054                    let is_null = matches!(val, Value::Null);
1055
1056                    *has_null.entry(key.clone()).or_insert(false) |= is_null;
1057
1058                    field_types
1059                        .entry(key.clone())
1060                        .and_modify(|existing| *existing = existing.merge(&inferred))
1061                        .or_insert(inferred);
1062                }
1063            }
1064        }
1065
1066        // Generate schema name from hint
1067        let schema_name = singularize(hint_name);
1068
1069        // Skip if schema already exists
1070        if self.schemas.contains_key(&schema_name) {
1071            return;
1072        }
1073
1074        // Recursively analyze nested fields in field order (depth-first).
1075        // Single pass processes arrays and objects as encountered, matching
1076        // the derive path's field-declaration-order traversal.
1077        for field_name in &field_names {
1078            // Check the first object's value for this field
1079            if let Value::Object(first_obj) = &arr[0] {
1080                match first_obj.get(field_name) {
1081                    Some(Value::Array(nested)) => {
1082                        // Arrays are always analyzed — same-name recursion
1083                        // (e.g., nodes[].nodes[]) is safe because depth-first
1084                        // ensures the inner schema is created first.
1085                        self.analyze_array(field_name, nested);
1086                    }
1087                    Some(Value::Object(_)) => {
1088                        // Skip object fields whose singularized name collides
1089                        // with this array's schema name — prevents
1090                        // self-referencing schemas (e.g., @struct root (root: root)).
1091                        if singularize(field_name) == schema_name {
1092                            continue;
1093                        }
1094
1095                        let nested_objects: Vec<&IndexMap<String, Value>> = arr
1096                            .iter()
1097                            .filter_map(|item| {
1098                                if let Value::Object(obj) = item {
1099                                    if let Some(Value::Object(nested)) = obj.get(field_name) {
1100                                        return Some(nested);
1101                                    }
1102                                }
1103                                None
1104                            })
1105                            .collect();
1106
1107                        if !nested_objects.is_empty() {
1108                            self.analyze_nested_objects(field_name, &nested_objects);
1109                        }
1110                    }
1111                    _ => {}
1112                }
1113            }
1114        }
1115
1116        // Re-check: recursive nested analysis (both arrays and objects) may have
1117        // claimed this schema name. This happens when the same field name appears
1118        // at multiple nesting levels (e.g., "nodes" containing "nodes"). The inner
1119        // schema was created first (depth-first); preserve it to avoid overwriting
1120        // with a different structure.
1121        if self.schemas.contains_key(&schema_name) {
1122            return;
1123        }
1124
1125        // Build schema
1126        let mut schema = Schema::new(&schema_name);
1127
1128        // Use insertion order from first object
1129        for field_name in &field_names {
1130            if let Some(inferred) = field_types.get(field_name) {
1131                let mut field_type = inferred.to_field_type(&self.schemas);
1132
1133                // Mark as nullable if any null values seen
1134                if has_null.get(field_name).copied().unwrap_or(false) {
1135                    field_type.nullable = true;
1136                }
1137
1138                // Check if there's a nested schema for array fields
1139                if let Value::Object(first_obj) = &arr[0] {
1140                    if let Some(Value::Array(nested_arr)) = first_obj.get(field_name) {
1141                        let nested_schema_name = singularize(field_name);
1142                        if let Some(nested_schema) = self.schemas.get(&nested_schema_name) {
1143                            // Verify array elements are objects matching the schema structure
1144                            if array_matches_schema(nested_arr, nested_schema) {
1145                                field_type = FieldType {
1146                                    base: nested_schema_name,
1147                                    nullable: field_type.nullable,
1148                                    is_array: true,
1149                                };
1150                            }
1151                        }
1152                    }
1153                }
1154
1155                // Check if there's a nested schema for object fields
1156                // (skip self-references: field singularizing to the schema being built)
1157                let nested_schema_name = singularize(field_name);
1158                if nested_schema_name != schema_name && self.schemas.contains_key(&nested_schema_name) {
1159                    if matches!(inferred, InferredType::Object(_)) {
1160                        field_type = FieldType {
1161                            base: nested_schema_name,
1162                            nullable: field_type.nullable,
1163                            is_array: false,
1164                        };
1165                    }
1166                }
1167
1168                schema.add_field(field_name, field_type);
1169            }
1170        }
1171
1172        self.schema_order.push(schema_name.clone());
1173        self.schemas.insert(schema_name, schema);
1174    }
1175
1176    /// Analyze a collection of nested objects (from the same field across array items)
1177    /// and create a schema if they have uniform structure
1178    fn analyze_nested_objects(&mut self, field_name: &str, objects: &[&IndexMap<String, Value>]) {
1179        if objects.is_empty() {
1180            return;
1181        }
1182
1183        // Get field names from first object (preserving insertion order)
1184        let first = objects[0];
1185        let nested_field_names: Vec<String> = first.keys().cloned().collect();
1186
1187        // Skip empty objects and objects with field names that can't round-trip
1188        if nested_field_names.is_empty()
1189            || nested_field_names.iter().any(|n| n.is_empty() || needs_quoting(n))
1190        {
1191            return;
1192        }
1193
1194        let field_set: std::collections::BTreeSet<&str> = first.keys().map(|k| k.as_str()).collect();
1195
1196        // Check if all objects have the same fields
1197        for obj in objects.iter().skip(1) {
1198            let obj_set: std::collections::BTreeSet<&str> = obj.keys().map(|k| k.as_str()).collect();
1199            if obj_set != field_set {
1200                return; // Not uniform
1201            }
1202        }
1203
1204        // They're uniform - create a schema
1205        let schema_name = singularize(field_name);
1206
1207        // Skip if schema already exists
1208        if self.schemas.contains_key(&schema_name) {
1209            return;
1210        }
1211
1212        // Infer field types across all objects
1213        let mut field_types: IndexMap<String, InferredType> = IndexMap::new();
1214        let mut has_null: IndexMap<String, bool> = IndexMap::new();
1215
1216        for obj in objects {
1217            for (key, val) in *obj {
1218                let inferred = infer_type(val);
1219                let is_null = matches!(val, Value::Null);
1220
1221                *has_null.entry(key.clone()).or_insert(false) |= is_null;
1222
1223                field_types
1224                    .entry(key.clone())
1225                    .and_modify(|existing| *existing = existing.merge(&inferred))
1226                    .or_insert(inferred);
1227            }
1228        }
1229
1230        // Recursively analyze nested fields in field order (depth-first).
1231        // Single pass mirrors the derive path's field-declaration-order traversal,
1232        // so CLI and Builder API produce schemas in the same order.
1233        for nested_field in &nested_field_names {
1234            if let Some(Value::Array(nested_arr)) = objects[0].get(nested_field) {
1235                self.analyze_array(nested_field, nested_arr);
1236            } else {
1237                let deeper_objects: Vec<&IndexMap<String, Value>> = objects
1238                    .iter()
1239                    .filter_map(|obj| {
1240                        if let Some(Value::Object(nested)) = obj.get(nested_field) {
1241                            Some(nested)
1242                        } else {
1243                            None
1244                        }
1245                    })
1246                    .collect();
1247
1248                if !deeper_objects.is_empty() {
1249                    self.analyze_nested_objects(nested_field, &deeper_objects);
1250                }
1251            }
1252        }
1253
1254        // Build schema
1255        let mut schema = Schema::new(&schema_name);
1256
1257        for nested_field in &nested_field_names {
1258            if let Some(inferred) = field_types.get(nested_field) {
1259                let mut field_type = inferred.to_field_type(&self.schemas);
1260
1261                if has_null.get(nested_field).copied().unwrap_or(false) {
1262                    field_type.nullable = true;
1263                }
1264
1265                // Check if this field has a nested array schema
1266                if matches!(inferred, InferredType::Array(_)) {
1267                    if let Some(Value::Array(nested_arr)) = objects[0].get(nested_field) {
1268                        let nested_schema_name = singularize(nested_field);
1269                        if let Some(nested_schema) = self.schemas.get(&nested_schema_name) {
1270                            if array_matches_schema(nested_arr, nested_schema) {
1271                                field_type = FieldType {
1272                                    base: nested_schema_name,
1273                                    nullable: field_type.nullable,
1274                                    is_array: true,
1275                                };
1276                            }
1277                        }
1278                    }
1279                }
1280
1281                // Check if this field has a nested object schema
1282                if let Some(nested_schema) = self.schemas.get(&singularize(nested_field)) {
1283                    if matches!(inferred, InferredType::Object(_)) {
1284                        field_type = FieldType::new(nested_schema.name.clone());
1285                    }
1286                }
1287
1288                schema.add_field(nested_field, field_type);
1289            }
1290        }
1291
1292        self.schema_order.push(schema_name.clone());
1293        self.schemas.insert(schema_name, schema);
1294    }
1295
1296    pub fn into_schemas(self) -> (IndexMap<String, Schema>, Vec<String>) {
1297        (self.schemas, self.schema_order)
1298    }
1299}
1300
1301impl Default for SchemaInferrer {
1302    fn default() -> Self {
1303        Self::new()
1304    }
1305}
1306
1307/// Serialize data to TeaLeaf text format with schemas
1308pub fn dumps_with_schemas(
1309    data: &IndexMap<String, Value>,
1310    schemas: &IndexMap<String, Schema>,
1311    schema_order: &[String],
1312    unions: &IndexMap<String, Union>,
1313    union_order: &[String],
1314) -> String {
1315    let mut out = String::new();
1316    let mut has_definitions = false;
1317
1318    // Write union definitions first (before structs, since structs may reference unions)
1319    for name in union_order {
1320        if let Some(union) = unions.get(name) {
1321            out.push_str("@union ");
1322            out.push_str(&union.name);
1323            out.push_str(" {\n");
1324            for (vi, variant) in union.variants.iter().enumerate() {
1325                out.push_str("  ");
1326                out.push_str(&variant.name);
1327                out.push_str(" (");
1328                for (fi, field) in variant.fields.iter().enumerate() {
1329                    if fi > 0 {
1330                        out.push_str(", ");
1331                    }
1332                    out.push_str(&field.name);
1333                    out.push_str(": ");
1334                    out.push_str(&field.field_type.to_string());
1335                }
1336                out.push(')');
1337                if vi < union.variants.len() - 1 {
1338                    out.push(',');
1339                }
1340                out.push('\n');
1341            }
1342            out.push_str("}\n");
1343            has_definitions = true;
1344        }
1345    }
1346
1347    // Write struct schemas in order
1348    for name in schema_order {
1349        if let Some(schema) = schemas.get(name) {
1350            out.push_str("@struct ");
1351            out.push_str(&schema.name);
1352            out.push_str(" (");
1353            for (i, field) in schema.fields.iter().enumerate() {
1354                if i > 0 {
1355                    out.push_str(", ");
1356                }
1357                write_key(&mut out, &field.name);
1358                out.push_str(": ");
1359                out.push_str(&field.field_type.to_string());
1360            }
1361            out.push_str(")\n");
1362            has_definitions = true;
1363        }
1364    }
1365
1366    if has_definitions {
1367        out.push('\n');
1368    }
1369
1370    // Write data (preserves insertion order)
1371    for (key, value) in data {
1372        write_key(&mut out, key);
1373        out.push_str(": ");
1374        write_value_with_schemas(&mut out, value, schemas, Some(key), 0, None);
1375        out.push('\n');
1376    }
1377
1378    out
1379}
1380
1381/// Resolve a schema for a value by trying three strategies in order:
1382/// 1. Declared type from parent schema's field type (exact match)
1383/// 2. Singularize the field key name (works for JSON-inference schemas)
1384/// 3. Case-insensitive singularize (handles derive-macro PascalCase names)
1385fn resolve_schema<'a>(
1386    schemas: &'a IndexMap<String, Schema>,
1387    declared_type: Option<&str>,
1388    hint_name: Option<&str>,
1389) -> Option<&'a Schema> {
1390    // 1. Direct lookup by declared type from parent schema
1391    if let Some(name) = declared_type {
1392        if let Some(s) = schemas.get(name) {
1393            return Some(s);
1394        }
1395    }
1396    // 2. Singularize heuristic (existing behavior for JSON-inference schemas)
1397    if let Some(hint) = hint_name {
1398        let singular = singularize(hint);
1399        if let Some(s) = schemas.get(&singular) {
1400            return Some(s);
1401        }
1402        // 3. Case-insensitive singularize (for derive-macro PascalCase names)
1403        let singular_lower = singular.to_ascii_lowercase();
1404        for (name, schema) in schemas {
1405            if name.to_ascii_lowercase() == singular_lower {
1406                return Some(schema);
1407            }
1408        }
1409    }
1410    None
1411}
1412
1413fn write_value_with_schemas(
1414    out: &mut String,
1415    value: &Value,
1416    schemas: &IndexMap<String, Schema>,
1417    hint_name: Option<&str>,
1418    indent: usize,
1419    declared_type: Option<&str>,
1420) {
1421    match value {
1422        Value::Null => out.push('~'),
1423        Value::Bool(b) => out.push_str(if *b { "true" } else { "false" }),
1424        Value::Int(i) => out.push_str(&i.to_string()),
1425        Value::UInt(u) => out.push_str(&u.to_string()),
1426        Value::JsonNumber(s) => out.push_str(s),
1427        Value::Float(f) => out.push_str(&format_float(*f)),
1428        Value::String(s) => {
1429            if needs_quoting(s) {
1430                out.push('"');
1431                out.push_str(&escape_string(s));
1432                out.push('"');
1433            } else {
1434                out.push_str(s);
1435            }
1436        }
1437        Value::Bytes(b) => {
1438            out.push_str("b\"");
1439            for byte in b {
1440                out.push_str(&format!("{:02x}", byte));
1441            }
1442            out.push('"');
1443        }
1444        Value::Array(arr) => {
1445            // Check if this array can use @table format.
1446            // Try name-based resolution first, then structural matching as fallback.
1447            let mut schema = resolve_schema(schemas, declared_type, hint_name);
1448
1449            // Structural fallback: if name-based resolution failed, find a schema
1450            // whose fields exactly match the first element's object keys.
1451            // This handles Builder-path documents where the top-level key name
1452            // (e.g., "orders") doesn't match the schema name (e.g., "SalesOrder").
1453            if schema.is_none() {
1454                if let Some(Value::Object(first_obj)) = arr.first() {
1455                    let obj_keys: HashSet<&str> = first_obj.keys().map(|k| k.as_str()).collect();
1456                    for (_, candidate) in schemas {
1457                        let schema_fields: HashSet<&str> = candidate.fields.iter().map(|f| f.name.as_str()).collect();
1458                        if schema_fields == obj_keys {
1459                            schema = Some(candidate);
1460                            break;
1461                        }
1462                    }
1463                }
1464            }
1465
1466            if let Some(schema) = schema {
1467                // Verify the first element is an object whose fields match the schema.
1468                // A name-only lookup isn't enough — if the same field name appears at
1469                // multiple nesting levels with different shapes, the schema may belong
1470                // to a different level. Applying the wrong schema drops unmatched keys.
1471                let schema_matches = if let Some(Value::Object(first_obj)) = arr.first() {
1472                    let schema_fields: HashSet<&str> = schema.fields.iter().map(|f| f.name.as_str()).collect();
1473                    let obj_keys: HashSet<&str> = first_obj.keys().map(|k| k.as_str()).collect();
1474                    schema_fields == obj_keys
1475                } else {
1476                    false
1477                };
1478
1479                if schema_matches {
1480                    out.push_str("@table ");
1481                    out.push_str(&schema.name);
1482                    out.push_str(" [\n");
1483
1484                    let inner_indent = indent + 2;
1485                    for (i, item) in arr.iter().enumerate() {
1486                        for _ in 0..inner_indent {
1487                            out.push(' ');
1488                        }
1489                        write_tuple(out, item, schema, schemas, inner_indent);
1490                        if i < arr.len() - 1 {
1491                            out.push(',');
1492                        }
1493                        out.push('\n');
1494                    }
1495
1496                    for _ in 0..indent {
1497                        out.push(' ');
1498                    }
1499                    out.push(']');
1500                    return;
1501                }
1502            }
1503
1504            // Fall back to regular array format
1505            out.push('[');
1506            for (i, v) in arr.iter().enumerate() {
1507                if i > 0 {
1508                    out.push_str(", ");
1509                }
1510                write_value_with_schemas(out, v, schemas, None, indent, None);
1511            }
1512            out.push(']');
1513        }
1514        Value::Object(obj) => {
1515            // Find the schema for this object so we can propagate field types to children.
1516            // Try name-based resolution first, then structural matching as fallback.
1517            let mut obj_schema = resolve_schema(schemas, declared_type, hint_name);
1518
1519            if obj_schema.is_none() {
1520                let obj_keys: HashSet<&str> = obj.keys().map(|k| k.as_str()).collect();
1521                for (_, candidate) in schemas {
1522                    let schema_fields: HashSet<&str> = candidate.fields.iter().map(|f| f.name.as_str()).collect();
1523                    if schema_fields == obj_keys {
1524                        obj_schema = Some(candidate);
1525                        break;
1526                    }
1527                }
1528            }
1529
1530            out.push('{');
1531            for (i, (k, v)) in obj.iter().enumerate() {
1532                if i > 0 {
1533                    out.push_str(", ");
1534                }
1535                write_key(out, k);
1536                out.push_str(": ");
1537                // Look up this field's declared type from the parent schema
1538                let field_type = obj_schema.and_then(|s| {
1539                    s.fields.iter()
1540                        .find(|f| f.name == *k)
1541                        .map(|f| f.field_type.base.as_str())
1542                });
1543                write_value_with_schemas(out, v, schemas, Some(k), indent, field_type);
1544            }
1545            out.push('}');
1546        }
1547        Value::Map(pairs) => {
1548            out.push_str("@map {");
1549            let mut first = true;
1550            for (k, v) in pairs {
1551                if !first {
1552                    out.push_str(", ");
1553                }
1554                first = false;
1555                write_map_key(out, k);
1556                out.push_str(": ");
1557                write_value_with_schemas(out, v, schemas, None, indent, None);
1558            }
1559            out.push('}');
1560        }
1561        Value::Ref(r) => {
1562            out.push('!');
1563            out.push_str(r);
1564        }
1565        Value::Tagged(tag, inner) => {
1566            out.push(':');
1567            out.push_str(tag);
1568            out.push(' ');
1569            write_value_with_schemas(out, inner, schemas, None, indent, None);
1570        }
1571        Value::Timestamp(ts, tz) => {
1572            out.push_str(&format_timestamp_millis(*ts, *tz));
1573        }
1574    }
1575}
1576
1577fn write_tuple(
1578    out: &mut String,
1579    value: &Value,
1580    schema: &Schema,
1581    schemas: &IndexMap<String, Schema>,
1582    indent: usize,
1583) {
1584    if let Value::Object(obj) = value {
1585        out.push('(');
1586        for (i, field) in schema.fields.iter().enumerate() {
1587            if i > 0 {
1588                out.push_str(", ");
1589            }
1590            if let Some(v) = obj.get(&field.name) {
1591                let type_base = field.field_type.base.as_str();
1592                // For array fields with a known schema type, write tuples directly without @table
1593                if field.field_type.is_array {
1594                    if let Some(item_schema) = resolve_schema(schemas, Some(type_base), None) {
1595                        // The schema defines the element type - write array with tuples directly
1596                        write_schema_array(out, v, item_schema, schemas, indent);
1597                    } else {
1598                        // No schema for element type - use regular array format
1599                        write_value_with_schemas(out, v, schemas, None, indent, None);
1600                    }
1601                } else if resolve_schema(schemas, Some(type_base), None).is_some() {
1602                    // Non-array field with schema type - write as nested tuple
1603                    let nested_schema = resolve_schema(schemas, Some(type_base), None).unwrap();
1604                    write_tuple(out, v, nested_schema, schemas, indent);
1605                } else {
1606                    write_value_with_schemas(out, v, schemas, None, indent, None);
1607                }
1608            } else {
1609                out.push('~');
1610            }
1611        }
1612        out.push(')');
1613    } else {
1614        write_value_with_schemas(out, value, schemas, None, indent, None);
1615    }
1616}
1617
1618/// Write an array of schema-typed values as tuples (without @table annotation)
1619fn write_schema_array(
1620    out: &mut String,
1621    value: &Value,
1622    schema: &Schema,
1623    schemas: &IndexMap<String, Schema>,
1624    indent: usize,
1625) {
1626    if let Value::Array(arr) = value {
1627        if arr.is_empty() {
1628            out.push_str("[]");
1629            return;
1630        }
1631
1632        out.push_str("[\n");
1633        let inner_indent = indent + 2;
1634        for (i, item) in arr.iter().enumerate() {
1635            for _ in 0..inner_indent {
1636                out.push(' ');
1637            }
1638            write_tuple(out, item, schema, schemas, inner_indent);
1639            if i < arr.len() - 1 {
1640                out.push(',');
1641            }
1642            out.push('\n');
1643        }
1644        for _ in 0..indent {
1645            out.push(' ');
1646        }
1647        out.push(']');
1648    } else {
1649        // Not an array - fall back to regular value writing
1650        write_value_with_schemas(out, value, schemas, None, indent, None);
1651    }
1652}
1653
1654#[cfg(test)]
1655mod tests {
1656    use super::*;
1657
1658    #[test]
1659    fn test_serde_json_number_behavior() {
1660        // Test how serde_json handles different number formats
1661        let json_str = r#"{"int": 42, "float_whole": 42.0, "float_frac": 42.5}"#;
1662        let parsed: serde_json::Value = serde_json::from_str(json_str).unwrap();
1663
1664        if let serde_json::Value::Object(obj) = parsed {
1665            let int_num = obj.get("int").unwrap().as_number().unwrap();
1666            let float_whole = obj.get("float_whole").unwrap().as_number().unwrap();
1667            let float_frac = obj.get("float_frac").unwrap().as_number().unwrap();
1668
1669            println!("int (42): is_i64={}, is_u64={}, is_f64={}",
1670                int_num.is_i64(), int_num.is_u64(), int_num.is_f64());
1671            println!("float_whole (42.0): is_i64={}, is_u64={}, is_f64={}",
1672                float_whole.is_i64(), float_whole.is_u64(), float_whole.is_f64());
1673            println!("float_frac (42.5): is_i64={}, is_u64={}, is_f64={}",
1674                float_frac.is_i64(), float_frac.is_u64(), float_frac.is_f64());
1675
1676            // Assert expected behavior
1677            assert!(int_num.is_i64(), "42 should be parsed as i64");
1678            assert!(float_whole.is_f64(), "42.0 should be parsed as f64");
1679            assert!(float_frac.is_f64(), "42.5 should be parsed as f64");
1680        }
1681
1682        // Test how Rust formats floats
1683        println!("Rust float formatting:");
1684        println!("  42.0f64.to_string() = '{}'", 42.0f64.to_string());
1685        println!("  42.5f64.to_string() = '{}'", 42.5f64.to_string());
1686
1687        // This is the problem! Rust's to_string() drops the .0
1688        // We need to ensure floats always have a decimal point
1689    }
1690
1691    #[test]
1692    fn test_parse_simple() {
1693        let doc = TeaLeaf::parse(r#"
1694            name: alice
1695            age: 30
1696            active: true
1697        "#).unwrap();
1698        
1699        assert_eq!(doc.get("name").unwrap().as_str(), Some("alice"));
1700        assert_eq!(doc.get("age").unwrap().as_int(), Some(30));
1701        assert_eq!(doc.get("active").unwrap().as_bool(), Some(true));
1702    }
1703
1704    #[test]
1705    fn test_parse_struct() {
1706        let doc = TeaLeaf::parse(r#"
1707            @struct user (id: int, name: string, email: string?)
1708            users: @table user [
1709                (1, alice, "alice@test.com"),
1710                (2, bob, ~),
1711            ]
1712        "#).unwrap();
1713        
1714        let schema = doc.schema("user").unwrap();
1715        assert_eq!(schema.fields.len(), 3);
1716        assert!(schema.fields[2].field_type.nullable);
1717        
1718        let users = doc.get("users").unwrap().as_array().unwrap();
1719        assert_eq!(users.len(), 2);
1720    }
1721
1722    #[test]
1723    fn test_nested_struct() {
1724        let doc = TeaLeaf::parse(r#"
1725            @struct address (city: string, zip: string)
1726            @struct user (id: int, name: string, home: address)
1727            users: @table user [
1728                (1, alice, (Berlin, "10115")),
1729                (2, bob, (Paris, "75001")),
1730            ]
1731        "#).unwrap();
1732        
1733        let users = doc.get("users").unwrap().as_array().unwrap();
1734        let alice = users[0].as_object().unwrap();
1735        let home = alice.get("home").unwrap().as_object().unwrap();
1736        assert_eq!(home.get("city").unwrap().as_str(), Some("Berlin"));
1737    }
1738
1739    #[test]
1740    fn test_three_level_nesting() {
1741        let doc = TeaLeaf::parse(r#"
1742            @struct method (type: string, last4: string)
1743            @struct payment (amount: float, method: method)
1744            @struct order (id: int, payment: payment)
1745            orders: @table order [
1746                (1, (99.99, (credit, "4242"))),
1747            ]
1748        "#).unwrap();
1749        
1750        let orders = doc.get("orders").unwrap().as_array().unwrap();
1751        let order = orders[0].as_object().unwrap();
1752        let payment = order.get("payment").unwrap().as_object().unwrap();
1753        let method = payment.get("method").unwrap().as_object().unwrap();
1754        assert_eq!(method.get("type").unwrap().as_str(), Some("credit"));
1755    }
1756
1757    #[test]
1758    fn test_json_roundtrip_basic() {
1759        let json = r#"{"name":"alice","age":30,"active":true,"score":95.5}"#;
1760        let doc = TeaLeaf::from_json(json).unwrap();
1761
1762        assert_eq!(doc.get("name").unwrap().as_str(), Some("alice"));
1763        assert_eq!(doc.get("age").unwrap().as_int(), Some(30));
1764        assert_eq!(doc.get("active").unwrap().as_bool(), Some(true));
1765        assert_eq!(doc.get("score").unwrap().as_float(), Some(95.5));
1766
1767        // Round-trip back to JSON
1768        let json_out = doc.to_json().unwrap();
1769        assert!(json_out.contains("\"name\":\"alice\"") || json_out.contains("\"name\": \"alice\""));
1770    }
1771
1772    #[test]
1773    fn test_json_roundtrip_root_array() {
1774        // Root-level arrays should round-trip without wrapping
1775        let json = r#"[{"id":"0001","type":"donut","name":"Cake"},{"id":"0002","type":"donut","name":"Raised"}]"#;
1776        let doc = TeaLeaf::from_json(json).unwrap();
1777
1778        // Internally stored under "root" key
1779        let root = doc.get("root").unwrap();
1780        let arr = root.as_array().unwrap();
1781        assert_eq!(arr.len(), 2);
1782
1783        // Round-trip should produce the array directly, NOT {"root": [...]}
1784        let json_out = doc.to_json_compact().unwrap();
1785        assert!(json_out.starts_with('['), "Root array should serialize directly: {}", json_out);
1786        assert!(json_out.ends_with(']'), "Root array should end with ]: {}", json_out);
1787        assert!(!json_out.contains("\"root\""), "Should NOT wrap in root object: {}", json_out);
1788
1789        // Verify content preserved
1790        assert!(json_out.contains("\"id\":\"0001\"") || json_out.contains("\"id\": \"0001\""));
1791        assert!(json_out.contains("\"name\":\"Cake\"") || json_out.contains("\"name\": \"Cake\""));
1792    }
1793
1794    #[test]
1795    fn test_json_roundtrip_root_array_empty() {
1796        // Empty array should also round-trip correctly
1797        let json = r#"[]"#;
1798        let doc = TeaLeaf::from_json(json).unwrap();
1799
1800        let json_out = doc.to_json_compact().unwrap();
1801        assert_eq!(json_out, "[]", "Empty array should round-trip: {}", json_out);
1802    }
1803
1804    #[test]
1805    fn test_json_roundtrip_root_object_with_root_key() {
1806        // An object that happens to have a "root" key should NOT be confused
1807        let json = r#"{"root":[1,2,3],"other":"value"}"#;
1808        let doc = TeaLeaf::from_json(json).unwrap();
1809
1810        let json_out = doc.to_json_compact().unwrap();
1811        // This was a root object, so it should stay as an object
1812        assert!(json_out.starts_with('{'), "Root object should stay as object: {}", json_out);
1813        assert!(json_out.contains("\"root\""), "root key should be preserved: {}", json_out);
1814        assert!(json_out.contains("\"other\""), "other key should be preserved: {}", json_out);
1815    }
1816
1817    #[test]
1818    fn test_json_export_bytes() {
1819        // Create a document with bytes programmatically
1820        let mut entries = IndexMap::new();
1821        entries.insert("data".to_string(), Value::Bytes(vec![0xca, 0xfe, 0xf0, 0x0d]));
1822        let doc = TeaLeaf { data: entries, schemas: IndexMap::new(), unions: IndexMap::new(), is_root_array: false };
1823
1824        let json = doc.to_json().unwrap();
1825        assert!(json.contains("0xcafef00d"), "Bytes should export as hex string: {}", json);
1826    }
1827
1828    #[test]
1829    fn test_json_export_ref() {
1830        let mut entries = IndexMap::new();
1831        entries.insert("config".to_string(), Value::Ref("base_config".to_string()));
1832        let doc = TeaLeaf { data: entries, schemas: IndexMap::new(), unions: IndexMap::new(), is_root_array: false };
1833
1834        let json = doc.to_json().unwrap();
1835        assert!(json.contains("\"$ref\""), "Ref should export with $ref key: {}", json);
1836        assert!(json.contains("base_config"), "Ref name should be in output: {}", json);
1837    }
1838
1839    #[test]
1840    fn test_json_export_tagged() {
1841        let mut entries = IndexMap::new();
1842        entries.insert("status".to_string(), Value::Tagged("ok".to_string(), Box::new(Value::Int(200))));
1843        let doc = TeaLeaf { data: entries, schemas: IndexMap::new(), unions: IndexMap::new(), is_root_array: false };
1844
1845        let json = doc.to_json().unwrap();
1846        assert!(json.contains("\"$tag\""), "Tagged should export with $tag key: {}", json);
1847        assert!(json.contains("\"ok\""), "Tag name should be in output: {}", json);
1848        assert!(json.contains("\"$value\""), "Tagged should have $value key: {}", json);
1849    }
1850
1851    #[test]
1852    fn test_json_export_map() {
1853        let mut entries = IndexMap::new();
1854        entries.insert("lookup".to_string(), Value::Map(vec![
1855            (Value::Int(1), Value::String("one".to_string())),
1856            (Value::Int(2), Value::String("two".to_string())),
1857        ]));
1858        let doc = TeaLeaf { data: entries, schemas: IndexMap::new(), unions: IndexMap::new(), is_root_array: false };
1859
1860        let json = doc.to_json().unwrap();
1861        // Map exports as array of [key, value] pairs
1862        // Check that the structure contains the key and values (regardless of formatting)
1863        assert!(json.contains("\"lookup\""), "Map key should be in output: {}", json);
1864        assert!(json.contains("\"one\""), "Map values should be in output: {}", json);
1865        assert!(json.contains("\"two\""), "Map values should be in output: {}", json);
1866        // Verify it's an array structure (has nested arrays)
1867        let compact = json.replace(" ", "").replace("\n", "");
1868        assert!(compact.contains("[["), "Map should export as nested array: {}", json);
1869    }
1870
1871    #[test]
1872    fn test_json_export_timestamp() {
1873        let mut entries = IndexMap::new();
1874        // 2024-01-15T10:30:00Z = 1705315800000 ms, but let's verify with a known value
1875        // Use 0 = 1970-01-01T00:00:00Z for simplicity
1876        entries.insert("created".to_string(), Value::Timestamp(0, 0));
1877        let doc = TeaLeaf { data: entries, schemas: IndexMap::new(), unions: IndexMap::new(), is_root_array: false };
1878
1879        let json = doc.to_json().unwrap();
1880        assert!(json.contains("1970-01-01"), "Timestamp should export as ISO 8601 date: {}", json);
1881        assert!(json.contains("00:00:00"), "Timestamp time should be epoch: {}", json);
1882    }
1883
1884    #[test]
1885    fn test_json_import_limitation_ref_becomes_object() {
1886        // JSON with $ref pattern should become a plain object, NOT a Ref value
1887        let json = r#"{"config":{"$ref":"base_config"}}"#;
1888        let doc = TeaLeaf::from_json(json).unwrap();
1889
1890        let config = doc.get("config").unwrap();
1891        // This should be an Object, not a Ref
1892        assert!(config.as_object().is_some(), "JSON $ref should become Object, not Ref");
1893        assert!(config.as_ref_name().is_none(), "JSON $ref should NOT become Ref value");
1894    }
1895
1896    #[test]
1897    fn test_json_import_limitation_tagged_becomes_object() {
1898        // JSON with $tag/$value pattern should become a plain object, NOT a Tagged value
1899        let json = r#"{"status":{"$tag":"ok","$value":200}}"#;
1900        let doc = TeaLeaf::from_json(json).unwrap();
1901
1902        let status = doc.get("status").unwrap();
1903        // This should be an Object, not a Tagged
1904        assert!(status.as_object().is_some(), "JSON $tag should become Object, not Tagged");
1905        assert!(status.as_tagged().is_none(), "JSON $tag should NOT become Tagged value");
1906    }
1907
1908    #[test]
1909    fn test_json_import_limitation_timestamp_becomes_string() {
1910        // ISO 8601 strings in JSON should remain strings, NOT become Timestamp
1911        let json = r#"{"created":"2024-01-15T10:30:00Z"}"#;
1912        let doc = TeaLeaf::from_json(json).unwrap();
1913
1914        let created = doc.get("created").unwrap();
1915        // This should be a String, not a Timestamp
1916        assert!(created.as_str().is_some(), "ISO timestamp string should remain String");
1917        assert!(created.as_timestamp_millis().is_none(), "ISO timestamp should NOT become Timestamp value");
1918    }
1919
1920    // =========================================================================
1921    // JSON ↔ Binary Conversion Tests
1922    // =========================================================================
1923
1924    #[test]
1925    fn test_json_to_binary_roundtrip_primitives() {
1926        use tempfile::NamedTempFile;
1927
1928        let json = r#"{"name":"alice","age":30,"score":95.5,"active":true,"nothing":null}"#;
1929        let doc = TeaLeaf::from_json(json).unwrap();
1930
1931        // Compile to binary
1932        let temp = NamedTempFile::new().unwrap();
1933        let path = temp.path();
1934        doc.compile(path, false).unwrap();
1935
1936        // Read back
1937        let reader = Reader::open(path).unwrap();
1938        assert_eq!(reader.get("name").unwrap().as_str(), Some("alice"));
1939        assert_eq!(reader.get("age").unwrap().as_int(), Some(30));
1940        assert_eq!(reader.get("score").unwrap().as_float(), Some(95.5));
1941        assert_eq!(reader.get("active").unwrap().as_bool(), Some(true));
1942        assert!(reader.get("nothing").unwrap().is_null());
1943    }
1944
1945    #[test]
1946    fn test_json_to_binary_roundtrip_arrays() {
1947        use tempfile::NamedTempFile;
1948
1949        let json = r#"{"numbers":[1,2,3,4,5],"names":["alice","bob","charlie"]}"#;
1950        let doc = TeaLeaf::from_json(json).unwrap();
1951
1952        let temp = NamedTempFile::new().unwrap();
1953        doc.compile(temp.path(), false).unwrap();
1954
1955        let reader = Reader::open(temp.path()).unwrap();
1956
1957        let numbers = reader.get("numbers").unwrap();
1958        let arr = numbers.as_array().unwrap();
1959        assert_eq!(arr.len(), 5);
1960        assert_eq!(arr[0].as_int(), Some(1));
1961        assert_eq!(arr[4].as_int(), Some(5));
1962
1963        let names = reader.get("names").unwrap();
1964        let arr = names.as_array().unwrap();
1965        assert_eq!(arr.len(), 3);
1966        assert_eq!(arr[0].as_str(), Some("alice"));
1967    }
1968
1969    #[test]
1970    fn test_json_to_binary_roundtrip_nested_objects() {
1971        use tempfile::NamedTempFile;
1972
1973        let json = r#"{"user":{"name":"alice","profile":{"bio":"dev","settings":{"theme":"dark"}}}}"#;
1974        let doc = TeaLeaf::from_json(json).unwrap();
1975
1976        let temp = NamedTempFile::new().unwrap();
1977        doc.compile(temp.path(), false).unwrap();
1978
1979        let reader = Reader::open(temp.path()).unwrap();
1980        let user = reader.get("user").unwrap();
1981        let user_obj = user.as_object().unwrap();
1982        assert_eq!(user_obj.get("name").unwrap().as_str(), Some("alice"));
1983
1984        let profile = user_obj.get("profile").unwrap().as_object().unwrap();
1985        assert_eq!(profile.get("bio").unwrap().as_str(), Some("dev"));
1986
1987        let settings = profile.get("settings").unwrap().as_object().unwrap();
1988        assert_eq!(settings.get("theme").unwrap().as_str(), Some("dark"));
1989    }
1990
1991    #[test]
1992    fn test_json_to_binary_with_compression() {
1993        use tempfile::NamedTempFile;
1994
1995        // Create a document with repetitive data to test compression
1996        let mut entries = IndexMap::new();
1997        entries.insert("data".to_string(), Value::String("a".repeat(1000)));
1998        entries.insert("count".to_string(), Value::Int(12345));
1999        let doc = TeaLeaf { data: entries, schemas: IndexMap::new(), unions: IndexMap::new(), is_root_array: false };
2000
2001        let temp = NamedTempFile::new().unwrap();
2002        doc.compile(temp.path(), true).unwrap(); // compressed
2003
2004        let reader = Reader::open(temp.path()).unwrap();
2005        assert_eq!(reader.get("data").unwrap().as_str(), Some("a".repeat(1000).as_str()));
2006        assert_eq!(reader.get("count").unwrap().as_int(), Some(12345));
2007    }
2008
2009    #[test]
2010    fn test_tl_to_binary_preserves_ref() {
2011        use tempfile::NamedTempFile;
2012
2013        let mut entries = IndexMap::new();
2014        entries.insert("base".to_string(), Value::Object(vec![
2015            ("host".to_string(), Value::String("localhost".to_string())),
2016        ].into_iter().collect()));
2017        entries.insert("config".to_string(), Value::Ref("base".to_string()));
2018        let doc = TeaLeaf { data: entries, schemas: IndexMap::new(), unions: IndexMap::new(), is_root_array: false };
2019
2020        let temp = NamedTempFile::new().unwrap();
2021        doc.compile(temp.path(), false).unwrap();
2022
2023        let reader = Reader::open(temp.path()).unwrap();
2024        let config = reader.get("config").unwrap();
2025        assert_eq!(config.as_ref_name(), Some("base"));
2026    }
2027
2028    #[test]
2029    fn test_tl_to_binary_preserves_tagged() {
2030        use tempfile::NamedTempFile;
2031
2032        let mut entries = IndexMap::new();
2033        entries.insert("status".to_string(), Value::Tagged("ok".to_string(), Box::new(Value::Int(200))));
2034        let doc = TeaLeaf { data: entries, schemas: IndexMap::new(), unions: IndexMap::new(), is_root_array: false };
2035
2036        let temp = NamedTempFile::new().unwrap();
2037        doc.compile(temp.path(), false).unwrap();
2038
2039        let reader = Reader::open(temp.path()).unwrap();
2040        let status = reader.get("status").unwrap();
2041        let (tag, value) = status.as_tagged().unwrap();
2042        assert_eq!(tag, "ok");
2043        assert_eq!(value.as_int(), Some(200));
2044    }
2045
2046    #[test]
2047    fn test_tl_to_binary_preserves_map() {
2048        use tempfile::NamedTempFile;
2049
2050        let mut entries = IndexMap::new();
2051        entries.insert("lookup".to_string(), Value::Map(vec![
2052            (Value::Int(1), Value::String("one".to_string())),
2053            (Value::Int(2), Value::String("two".to_string())),
2054        ]));
2055        let doc = TeaLeaf { data: entries, schemas: IndexMap::new(), unions: IndexMap::new(), is_root_array: false };
2056
2057        let temp = NamedTempFile::new().unwrap();
2058        doc.compile(temp.path(), false).unwrap();
2059
2060        let reader = Reader::open(temp.path()).unwrap();
2061        let lookup = reader.get("lookup").unwrap();
2062        let map = lookup.as_map().unwrap();
2063        assert_eq!(map.len(), 2);
2064        assert_eq!(map[0].0.as_int(), Some(1));
2065        assert_eq!(map[0].1.as_str(), Some("one"));
2066    }
2067
2068    #[test]
2069    fn test_tl_to_binary_preserves_bytes() {
2070        use tempfile::NamedTempFile;
2071
2072        let mut entries = IndexMap::new();
2073        entries.insert("data".to_string(), Value::Bytes(vec![0xca, 0xfe, 0xf0, 0x0d]));
2074        let doc = TeaLeaf { data: entries, schemas: IndexMap::new(), unions: IndexMap::new(), is_root_array: false };
2075
2076        let temp = NamedTempFile::new().unwrap();
2077        doc.compile(temp.path(), false).unwrap();
2078
2079        let reader = Reader::open(temp.path()).unwrap();
2080        let data = reader.get("data").unwrap();
2081        assert_eq!(data.as_bytes(), Some(vec![0xca, 0xfe, 0xf0, 0x0d].as_slice()));
2082    }
2083
2084    #[test]
2085    fn test_tl_to_binary_preserves_timestamp() {
2086        use tempfile::NamedTempFile;
2087
2088        let mut entries = IndexMap::new();
2089        entries.insert("created".to_string(), Value::Timestamp(1705315800000, 0)); // 2024-01-15T10:30:00Z
2090        let doc = TeaLeaf { data: entries, schemas: IndexMap::new(), unions: IndexMap::new(), is_root_array: false };
2091
2092        let temp = NamedTempFile::new().unwrap();
2093        doc.compile(temp.path(), false).unwrap();
2094
2095        let reader = Reader::open(temp.path()).unwrap();
2096        let created = reader.get("created").unwrap();
2097        assert_eq!(created.as_timestamp_millis(), Some(1705315800000));
2098    }
2099
2100    #[test]
2101    fn test_json_import_limitation_hex_string_remains_string() {
2102        // Hex strings in JSON should remain strings, NOT become Bytes
2103        let json = r#"{"data":"0xcafef00d"}"#;
2104        let doc = TeaLeaf::from_json(json).unwrap();
2105
2106        let data = doc.get("data").unwrap();
2107        // This should be a String, not Bytes
2108        assert!(data.as_str().is_some(), "Hex string should remain String");
2109        assert_eq!(data.as_str(), Some("0xcafef00d"));
2110        assert!(data.as_bytes().is_none(), "Hex string should NOT become Bytes value");
2111    }
2112
2113    #[test]
2114    fn test_json_import_limitation_array_pairs_remain_array() {
2115        // JSON arrays that look like map pairs should remain arrays, NOT become Maps
2116        let json = r#"{"lookup":[[1,"one"],[2,"two"]]}"#;
2117        let doc = TeaLeaf::from_json(json).unwrap();
2118
2119        let lookup = doc.get("lookup").unwrap();
2120        // This should be an Array, not a Map
2121        assert!(lookup.as_array().is_some(), "Array of pairs should remain Array");
2122        assert!(lookup.as_map().is_none(), "Array of pairs should NOT become Map value");
2123
2124        // Verify structure
2125        let arr = lookup.as_array().unwrap();
2126        assert_eq!(arr.len(), 2);
2127        let first_pair = arr[0].as_array().unwrap();
2128        assert_eq!(first_pair[0].as_int(), Some(1));
2129        assert_eq!(first_pair[1].as_str(), Some("one"));
2130    }
2131
2132    // =========================================================================
2133    // Cross-Language Parity Test
2134    // =========================================================================
2135
2136    #[test]
2137    fn test_cross_language_parity_all_types() {
2138        // This test verifies that Rust JSON export matches expected format
2139        // for ALL special types. The same fixture is tested in .NET.
2140
2141        use tempfile::NamedTempFile;
2142
2143        // Create a document with all special types
2144        let mut data = IndexMap::new();
2145        data.insert("null_val".to_string(), Value::Null);
2146        data.insert("bool_true".to_string(), Value::Bool(true));
2147        data.insert("int_val".to_string(), Value::Int(42));
2148        data.insert("float_val".to_string(), Value::Float(3.14159));
2149        data.insert("string_val".to_string(), Value::String("hello".to_string()));
2150        data.insert("bytes_val".to_string(), Value::Bytes(vec![0xca, 0xfe]));
2151        data.insert("timestamp_val".to_string(), Value::Timestamp(0, 0));
2152        data.insert("array_val".to_string(), Value::Array(vec![Value::Int(1), Value::Int(2)]));
2153        data.insert("object_val".to_string(), Value::Object(
2154            vec![("x".to_string(), Value::Int(1))].into_iter().collect()
2155        ));
2156        data.insert("ref_val".to_string(), Value::Ref("object_val".to_string()));
2157        data.insert("tagged_val".to_string(), Value::Tagged("ok".to_string(), Box::new(Value::Int(200))));
2158        data.insert("map_val".to_string(), Value::Map(vec![
2159            (Value::Int(1), Value::String("one".to_string())),
2160        ]));
2161
2162        let doc = TeaLeaf { data, schemas: IndexMap::new(), unions: IndexMap::new(), is_root_array: false };
2163
2164        // Compile to binary and read back
2165        let temp = NamedTempFile::new().unwrap();
2166        doc.compile(temp.path(), false).unwrap();
2167        let reader = Reader::open(temp.path()).unwrap();
2168
2169        // Verify each type survives binary round-trip
2170        assert!(reader.get("null_val").unwrap().is_null());
2171        assert_eq!(reader.get("bool_true").unwrap().as_bool(), Some(true));
2172        assert_eq!(reader.get("int_val").unwrap().as_int(), Some(42));
2173        assert_eq!(reader.get("float_val").unwrap().as_float(), Some(3.14159));
2174        assert_eq!(reader.get("string_val").unwrap().as_str(), Some("hello"));
2175        assert_eq!(reader.get("bytes_val").unwrap().as_bytes(), Some(&[0xca, 0xfe][..]));
2176        assert_eq!(reader.get("timestamp_val").unwrap().as_timestamp_millis(), Some(0));
2177
2178        let arr = reader.get("array_val").unwrap();
2179        assert_eq!(arr.as_array().unwrap().len(), 2);
2180
2181        let obj = reader.get("object_val").unwrap();
2182        assert!(obj.as_object().is_some());
2183
2184        let ref_val = reader.get("ref_val").unwrap();
2185        assert_eq!(ref_val.as_ref_name(), Some("object_val"));
2186
2187        let tagged = reader.get("tagged_val").unwrap();
2188        let (tag, val) = tagged.as_tagged().unwrap();
2189        assert_eq!(tag, "ok");
2190        assert_eq!(val.as_int(), Some(200));
2191
2192        let map = reader.get("map_val").unwrap();
2193        let pairs = map.as_map().unwrap();
2194        assert_eq!(pairs.len(), 1);
2195
2196        // Verify JSON export format matches expected conventions
2197        let json = doc.to_json().unwrap();
2198
2199        // Bytes should be hex string
2200        assert!(json.contains("0xcafe"), "Bytes should export as hex: {}", json);
2201
2202        // Ref should have $ref key
2203        assert!(json.contains("\"$ref\""), "Ref should have $ref key: {}", json);
2204
2205        // Tagged should have $tag and $value
2206        assert!(json.contains("\"$tag\""), "Tagged should have $tag: {}", json);
2207        assert!(json.contains("\"$value\""), "Tagged should have $value: {}", json);
2208
2209        // Map should be array of pairs (nested arrays)
2210        let compact = json.replace(" ", "").replace("\n", "");
2211        assert!(compact.contains("[["), "Map should export as array of pairs: {}", json);
2212
2213        // Timestamp should be ISO 8601
2214        assert!(json.contains("1970-01-01"), "Timestamp should be ISO 8601: {}", json);
2215    }
2216
2217    // =========================================================================
2218    // JSON Conversion Contract Tests
2219    // =========================================================================
2220    // These tests lock down the exact JSON↔TeaLeaf conversion behavior.
2221    // STABILITY POLICY:
2222    // - Plain JSON roundtrip: MUST be lossless for primitives, arrays, objects
2223    // - TeaLeaf→JSON: Special types have FIXED representations that MUST NOT change
2224    // - JSON→TeaLeaf: No magic parsing; $ref/$tag/hex/ISO8601 stay as plain JSON
2225
2226    mod conversion_contracts {
2227        use super::*;
2228
2229        // --- Plain JSON Roundtrip (STABLE) ---
2230
2231        #[test]
2232        fn contract_null_roundtrip() {
2233            let doc = TeaLeaf::from_json("null").unwrap();
2234            assert!(matches!(doc.get("root").unwrap(), Value::Null));
2235        }
2236
2237        #[test]
2238        fn contract_bool_roundtrip() {
2239            let doc = TeaLeaf::from_json(r#"{"t": true, "f": false}"#).unwrap();
2240            assert_eq!(doc.get("t").unwrap().as_bool(), Some(true));
2241            assert_eq!(doc.get("f").unwrap().as_bool(), Some(false));
2242
2243            let json = doc.to_json_compact().unwrap();
2244            assert!(json.contains("true"));
2245            assert!(json.contains("false"));
2246        }
2247
2248        #[test]
2249        fn contract_integer_roundtrip() {
2250            let doc = TeaLeaf::from_json(r#"{"zero": 0, "pos": 42, "neg": -123}"#).unwrap();
2251            assert_eq!(doc.get("zero").unwrap().as_int(), Some(0));
2252            assert_eq!(doc.get("pos").unwrap().as_int(), Some(42));
2253            assert_eq!(doc.get("neg").unwrap().as_int(), Some(-123));
2254        }
2255
2256        #[test]
2257        fn contract_float_roundtrip() {
2258            let doc = TeaLeaf::from_json(r#"{"pi": 3.14159}"#).unwrap();
2259            let pi = doc.get("pi").unwrap().as_float().unwrap();
2260            assert!((pi - 3.14159).abs() < 0.00001);
2261        }
2262
2263        #[test]
2264        fn contract_string_roundtrip() {
2265            let doc = TeaLeaf::from_json(r#"{"s": "hello world", "u": "日本語", "e": ""}"#).unwrap();
2266            assert_eq!(doc.get("s").unwrap().as_str(), Some("hello world"));
2267            assert_eq!(doc.get("u").unwrap().as_str(), Some("日本語"));
2268            assert_eq!(doc.get("e").unwrap().as_str(), Some(""));
2269        }
2270
2271        #[test]
2272        fn contract_array_roundtrip() {
2273            let doc = TeaLeaf::from_json(r#"{"arr": [1, "two", true, null]}"#).unwrap();
2274            let arr = doc.get("arr").unwrap().as_array().unwrap();
2275            assert_eq!(arr.len(), 4);
2276            assert_eq!(arr[0].as_int(), Some(1));
2277            assert_eq!(arr[1].as_str(), Some("two"));
2278            assert_eq!(arr[2].as_bool(), Some(true));
2279            assert!(matches!(arr[3], Value::Null));
2280        }
2281
2282        #[test]
2283        fn contract_nested_array_roundtrip() {
2284            let doc = TeaLeaf::from_json(r#"{"matrix": [[1, 2], [3, 4]]}"#).unwrap();
2285            let matrix = doc.get("matrix").unwrap().as_array().unwrap();
2286            assert_eq!(matrix.len(), 2);
2287            let row0 = matrix[0].as_array().unwrap();
2288            assert_eq!(row0[0].as_int(), Some(1));
2289            assert_eq!(row0[1].as_int(), Some(2));
2290        }
2291
2292        #[test]
2293        fn contract_object_roundtrip() {
2294            let doc = TeaLeaf::from_json(r#"{"user": {"name": "alice", "age": 30}}"#).unwrap();
2295            let user = doc.get("user").unwrap().as_object().unwrap();
2296            assert_eq!(user.get("name").unwrap().as_str(), Some("alice"));
2297            assert_eq!(user.get("age").unwrap().as_int(), Some(30));
2298        }
2299
2300        // --- TeaLeaf→JSON Fixed Representations (STABLE) ---
2301
2302        #[test]
2303        fn contract_bytes_to_json_hex() {
2304            let mut data = IndexMap::new();
2305            data.insert("b".to_string(), Value::Bytes(vec![0xca, 0xfe, 0xba, 0xbe]));
2306            let doc = TeaLeaf { data, schemas: IndexMap::new(), unions: IndexMap::new(), is_root_array: false };
2307
2308            let json = doc.to_json_compact().unwrap();
2309            // CONTRACT: Bytes serialize as lowercase hex with 0x prefix
2310            assert!(json.contains("\"0xcafebabe\""), "Bytes must be 0x-prefixed hex: {}", json);
2311        }
2312
2313        #[test]
2314        fn contract_bytes_empty_to_json() {
2315            let mut data = IndexMap::new();
2316            data.insert("b".to_string(), Value::Bytes(vec![]));
2317            let doc = TeaLeaf { data, schemas: IndexMap::new(), unions: IndexMap::new(), is_root_array: false };
2318
2319            let json = doc.to_json_compact().unwrap();
2320            // CONTRACT: Empty bytes serialize as "0x"
2321            assert!(json.contains("\"0x\""), "Empty bytes must be \"0x\": {}", json);
2322        }
2323
2324        #[test]
2325        fn contract_timestamp_to_json_iso8601() {
2326            let mut data = IndexMap::new();
2327            // 2024-01-15T10:50:00.123Z (verified milliseconds since epoch)
2328            data.insert("ts".to_string(), Value::Timestamp(1705315800123, 0));
2329            let doc = TeaLeaf { data, schemas: IndexMap::new(), unions: IndexMap::new(), is_root_array: false };
2330
2331            let json = doc.to_json_compact().unwrap();
2332            // CONTRACT: Timestamp serializes as ISO 8601 with milliseconds
2333            assert!(json.contains("2024-01-15T10:50:00.123Z"),
2334                "Timestamp must be ISO 8601 with ms: {}", json);
2335        }
2336
2337        #[test]
2338        fn contract_timestamp_epoch_to_json() {
2339            let mut data = IndexMap::new();
2340            data.insert("ts".to_string(), Value::Timestamp(0, 0));
2341            let doc = TeaLeaf { data, schemas: IndexMap::new(), unions: IndexMap::new(), is_root_array: false };
2342
2343            let json = doc.to_json_compact().unwrap();
2344            // CONTRACT: Unix epoch is 1970-01-01T00:00:00Z (no ms for whole seconds)
2345            assert!(json.contains("1970-01-01T00:00:00Z"),
2346                "Epoch must be 1970-01-01T00:00:00Z: {}", json);
2347        }
2348
2349        #[test]
2350        fn contract_ref_to_json() {
2351            let mut data = IndexMap::new();
2352            data.insert("r".to_string(), Value::Ref("target_key".to_string()));
2353            let doc = TeaLeaf { data, schemas: IndexMap::new(), unions: IndexMap::new(), is_root_array: false };
2354
2355            let json = doc.to_json_compact().unwrap();
2356            // CONTRACT: Ref serializes as {"$ref": "name"}
2357            assert!(json.contains("\"$ref\":\"target_key\"") || json.contains("\"$ref\": \"target_key\""),
2358                "Ref must be {{\"$ref\": \"name\"}}: {}", json);
2359        }
2360
2361        #[test]
2362        fn contract_tagged_to_json() {
2363            let mut data = IndexMap::new();
2364            data.insert("t".to_string(), Value::Tagged("ok".to_string(), Box::new(Value::Int(200))));
2365            let doc = TeaLeaf { data, schemas: IndexMap::new(), unions: IndexMap::new(), is_root_array: false };
2366
2367            let json = doc.to_json_compact().unwrap();
2368            // CONTRACT: Tagged serializes with $tag and $value keys
2369            assert!(json.contains("\"$tag\""), "Tagged must have $tag: {}", json);
2370            assert!(json.contains("\"ok\""), "Tag name must be present: {}", json);
2371            assert!(json.contains("\"$value\""), "Tagged must have $value: {}", json);
2372            assert!(json.contains("200"), "Inner value must be present: {}", json);
2373        }
2374
2375        #[test]
2376        fn contract_tagged_null_value_to_json() {
2377            let mut data = IndexMap::new();
2378            data.insert("t".to_string(), Value::Tagged("none".to_string(), Box::new(Value::Null)));
2379            let doc = TeaLeaf { data, schemas: IndexMap::new(), unions: IndexMap::new(), is_root_array: false };
2380
2381            let json = doc.to_json_compact().unwrap();
2382            // CONTRACT: Tagged with null inner still has $value: null
2383            assert!(json.contains("\"$value\":null") || json.contains("\"$value\": null"),
2384                "Tagged with null must have $value:null: {}", json);
2385        }
2386
2387        #[test]
2388        fn contract_map_to_json_pairs() {
2389            let mut data = IndexMap::new();
2390            data.insert("m".to_string(), Value::Map(vec![
2391                (Value::Int(1), Value::String("one".to_string())),
2392                (Value::Int(2), Value::String("two".to_string())),
2393            ]));
2394            let doc = TeaLeaf { data, schemas: IndexMap::new(), unions: IndexMap::new(), is_root_array: false };
2395
2396            let json = doc.to_json_compact().unwrap();
2397            // CONTRACT: Map serializes as array of [key, value] pairs
2398            assert!(json.contains("[[1,\"one\"],[2,\"two\"]]") ||
2399                    json.contains("[[1, \"one\"], [2, \"two\"]]"),
2400                "Map must be [[k,v],...]: {}", json);
2401        }
2402
2403        #[test]
2404        fn contract_map_empty_to_json() {
2405            let mut data = IndexMap::new();
2406            data.insert("m".to_string(), Value::Map(vec![]));
2407            let doc = TeaLeaf { data, schemas: IndexMap::new(), unions: IndexMap::new(), is_root_array: false };
2408
2409            let json = doc.to_json_compact().unwrap();
2410            // CONTRACT: Empty map serializes as empty array
2411            assert!(json.contains("\"m\":[]") || json.contains("\"m\": []"),
2412                "Empty map must be []: {}", json);
2413        }
2414
2415        // --- JSON→TeaLeaf No Magic (STABLE) ---
2416
2417        #[test]
2418        fn contract_json_dollar_ref_stays_object() {
2419            let doc = TeaLeaf::from_json(r#"{"x": {"$ref": "some_key"}}"#).unwrap();
2420            let x = doc.get("x").unwrap();
2421            // CONTRACT: JSON {"$ref": ...} MUST remain Object, NOT become Ref
2422            assert!(x.as_object().is_some(), "$ref in JSON must stay Object, not become Ref");
2423            assert!(x.as_ref_name().is_none(), "$ref must not auto-convert to Ref type");
2424        }
2425
2426        #[test]
2427        fn contract_json_dollar_tag_stays_object() {
2428            let doc = TeaLeaf::from_json(r#"{"x": {"$tag": "ok", "$value": 200}}"#).unwrap();
2429            let x = doc.get("x").unwrap();
2430            // CONTRACT: JSON {"$tag": ..., "$value": ...} MUST remain Object
2431            assert!(x.as_object().is_some(), "$tag in JSON must stay Object, not become Tagged");
2432            assert!(x.as_tagged().is_none(), "$tag must not auto-convert to Tagged type");
2433        }
2434
2435        #[test]
2436        fn contract_json_hex_string_stays_string() {
2437            let doc = TeaLeaf::from_json(r#"{"x": "0xcafef00d"}"#).unwrap();
2438            let x = doc.get("x").unwrap();
2439            // CONTRACT: Hex strings MUST remain String, NOT become Bytes
2440            assert_eq!(x.as_str(), Some("0xcafef00d"));
2441            assert!(x.as_bytes().is_none(), "Hex string must not auto-convert to Bytes");
2442        }
2443
2444        #[test]
2445        fn contract_json_iso_timestamp_stays_string() {
2446            let doc = TeaLeaf::from_json(r#"{"x": "2024-01-15T10:30:00.000Z"}"#).unwrap();
2447            let x = doc.get("x").unwrap();
2448            // CONTRACT: ISO 8601 strings MUST remain String, NOT become Timestamp
2449            assert_eq!(x.as_str(), Some("2024-01-15T10:30:00.000Z"));
2450            assert!(x.as_timestamp_millis().is_none(), "ISO string must not auto-convert to Timestamp");
2451        }
2452
2453        #[test]
2454        fn contract_json_array_pairs_stays_array() {
2455            let doc = TeaLeaf::from_json(r#"{"x": [[1, "one"], [2, "two"]]}"#).unwrap();
2456            let x = doc.get("x").unwrap();
2457            // CONTRACT: Array of pairs MUST remain Array, NOT become Map
2458            assert!(x.as_array().is_some(), "Array of pairs must stay Array, not become Map");
2459            assert!(x.as_map().is_none(), "Array pairs must not auto-convert to Map");
2460        }
2461
2462        // --- Number Type Inference (STABLE) ---
2463
2464        #[test]
2465        fn contract_number_integer_to_int() {
2466            let doc = TeaLeaf::from_json(r#"{"n": 42}"#).unwrap();
2467            // CONTRACT: Integers that fit i64 become Int
2468            assert!(doc.get("n").unwrap().as_int().is_some());
2469        }
2470
2471        #[test]
2472        fn contract_number_large_to_uint() {
2473            // Max u64 = 18446744073709551615, which doesn't fit i64
2474            let doc = TeaLeaf::from_json(r#"{"n": 18446744073709551615}"#).unwrap();
2475            // CONTRACT: Large positive integers that fit u64 become UInt
2476            assert!(doc.get("n").unwrap().as_uint().is_some());
2477        }
2478
2479        #[test]
2480        fn contract_number_decimal_to_float() {
2481            let doc = TeaLeaf::from_json(r#"{"n": 3.14}"#).unwrap();
2482            // CONTRACT: Numbers with decimals become Float
2483            assert!(doc.get("n").unwrap().as_float().is_some());
2484        }
2485
2486        // --- Edge Cases (STABLE) ---
2487
2488        #[test]
2489        fn contract_float_nan_to_null() {
2490            let mut data = IndexMap::new();
2491            data.insert("f".to_string(), Value::Float(f64::NAN));
2492            let doc = TeaLeaf { data, schemas: IndexMap::new(), unions: IndexMap::new(), is_root_array: false };
2493
2494            let json = doc.to_json_compact().unwrap();
2495            // CONTRACT: NaN serializes as null (JSON has no NaN)
2496            assert!(json.contains("null"), "NaN must serialize as null: {}", json);
2497        }
2498
2499        #[test]
2500        fn contract_float_infinity_to_null() {
2501            let mut data = IndexMap::new();
2502            data.insert("f".to_string(), Value::Float(f64::INFINITY));
2503            let doc = TeaLeaf { data, schemas: IndexMap::new(), unions: IndexMap::new(), is_root_array: false };
2504
2505            let json = doc.to_json_compact().unwrap();
2506            // CONTRACT: Infinity serializes as null (JSON has no Infinity)
2507            assert!(json.contains("null"), "Infinity must serialize as null: {}", json);
2508        }
2509
2510        #[test]
2511        fn contract_deep_nesting_preserved() {
2512            let doc = TeaLeaf::from_json(r#"{"a":{"b":{"c":{"d":{"e":5}}}}}"#).unwrap();
2513            let a = doc.get("a").unwrap().as_object().unwrap();
2514            let b = a.get("b").unwrap().as_object().unwrap();
2515            let c = b.get("c").unwrap().as_object().unwrap();
2516            let d = c.get("d").unwrap().as_object().unwrap();
2517            assert_eq!(d.get("e").unwrap().as_int(), Some(5));
2518        }
2519    }
2520
2521    // =========================================================================
2522    // Schema Inference Tests
2523    // =========================================================================
2524
2525    #[test]
2526    fn test_schema_inference_simple_array() {
2527        let json = r#"{"users": [{"name": "alice", "age": 30}, {"name": "bob", "age": 25}]}"#;
2528        let doc = TeaLeaf::from_json_with_schemas(json).unwrap();
2529
2530        // Should have inferred a "user" schema
2531        let schema = doc.schema("user");
2532        assert!(schema.is_some(), "Should infer 'user' schema from 'users' array");
2533
2534        let schema = schema.unwrap();
2535        assert_eq!(schema.fields.len(), 2);
2536
2537        // Fields should preserve insertion order from JSON
2538        assert_eq!(schema.fields[0].name, "name");
2539        assert_eq!(schema.fields[1].name, "age");
2540
2541        // Data should still be accessible
2542        let users = doc.get("users").unwrap().as_array().unwrap();
2543        assert_eq!(users.len(), 2);
2544        assert_eq!(users[0].as_object().unwrap().get("name").unwrap().as_str(), Some("alice"));
2545    }
2546
2547    #[test]
2548    fn test_schema_inference_nested_arrays() {
2549        let json = r#"{
2550            "orders": [
2551                {"id": 1, "items": [{"sku": "A", "qty": 2}, {"sku": "B", "qty": 1}]},
2552                {"id": 2, "items": [{"sku": "C", "qty": 3}]}
2553            ]
2554        }"#;
2555        let doc = TeaLeaf::from_json_with_schemas(json).unwrap();
2556
2557        // Should infer both "order" and "item" schemas
2558        assert!(doc.schema("order").is_some(), "Should infer 'order' schema");
2559        assert!(doc.schema("item").is_some(), "Should infer 'item' schema");
2560
2561        let order_schema = doc.schema("order").unwrap();
2562        // Order should have "id" and "items" fields
2563        assert!(order_schema.fields.iter().any(|f| f.name == "id"));
2564        assert!(order_schema.fields.iter().any(|f| f.name == "items"));
2565
2566        // The "items" field should reference the "item" schema
2567        let items_field = order_schema.fields.iter().find(|f| f.name == "items").unwrap();
2568        assert!(items_field.field_type.is_array);
2569        assert_eq!(items_field.field_type.base, "item");
2570    }
2571
2572    #[test]
2573    fn test_schema_inference_to_tl_text() {
2574        let json = r#"{"products": [{"name": "Widget", "price": 9.99}, {"name": "Gadget", "price": 19.99}]}"#;
2575        let doc = TeaLeaf::from_json_with_schemas(json).unwrap();
2576
2577        let tl_text = doc.to_tl_with_schemas();
2578
2579        // Should contain struct definition
2580        assert!(tl_text.contains("@struct product"), "TeaLeaf text should contain struct definition");
2581        assert!(tl_text.contains("name: string"), "Struct should have name field");
2582        assert!(tl_text.contains("price: float"), "Struct should have price field");
2583
2584        // Should contain @table directive
2585        assert!(tl_text.contains("@table product"), "TeaLeaf text should use @table for data");
2586
2587        // Should contain tuple format data
2588        assert!(tl_text.contains("Widget") || tl_text.contains("\"Widget\""), "Data should be present");
2589    }
2590
2591    #[test]
2592    fn test_schema_inference_roundtrip() {
2593        let json = r#"{"items": [{"id": 1, "name": "A"}, {"id": 2, "name": "B"}]}"#;
2594        let doc = TeaLeaf::from_json_with_schemas(json).unwrap();
2595
2596        // Convert to TeaLeaf text with schemas
2597        let tl_text = doc.to_tl_with_schemas();
2598
2599        // Parse the TeaLeaf text back
2600        let parsed = TeaLeaf::parse(&tl_text).unwrap();
2601
2602        // Should have the same data
2603        let items = parsed.get("items").unwrap().as_array().unwrap();
2604        assert_eq!(items.len(), 2);
2605        assert_eq!(items[0].as_object().unwrap().get("id").unwrap().as_int(), Some(1));
2606        assert_eq!(items[0].as_object().unwrap().get("name").unwrap().as_str(), Some("A"));
2607
2608        // Should have the schema
2609        assert!(parsed.schema("item").is_some());
2610    }
2611
2612    #[test]
2613    fn test_schema_inference_nullable_fields() {
2614        let json = r#"{"users": [{"name": "alice", "email": "a@test.com"}, {"name": "bob", "email": null}]}"#;
2615        let doc = TeaLeaf::from_json_with_schemas(json).unwrap();
2616
2617        let schema = doc.schema("user").unwrap();
2618        let email_field = schema.fields.iter().find(|f| f.name == "email").unwrap();
2619
2620        // Email should be nullable since one value is null
2621        assert!(email_field.field_type.nullable, "Field with null values should be nullable");
2622    }
2623
2624    #[test]
2625    fn test_schema_inference_nested_tuples_no_redundant_table() {
2626        let json = r#"{
2627            "orders": [
2628                {"id": 1, "items": [{"sku": "A", "qty": 2}]}
2629            ]
2630        }"#;
2631        let doc = TeaLeaf::from_json_with_schemas(json).unwrap();
2632        let tl_text = doc.to_tl_with_schemas();
2633
2634        // Count occurrences of @table - should only appear at top level for each schema-typed array
2635        let _table_count = tl_text.matches("@table").count();
2636
2637        // Should have @table for orders, but NOT redundant @table for nested items
2638        // The nested items array should just be [...] with tuples inside
2639        assert!(tl_text.contains("@table order"), "Should have @table for orders");
2640
2641        // Parse and verify the structure is correct
2642        let parsed = TeaLeaf::parse(&tl_text).unwrap();
2643        let orders = parsed.get("orders").unwrap().as_array().unwrap();
2644        let first_order = orders[0].as_object().unwrap();
2645        let items = first_order.get("items").unwrap().as_array().unwrap();
2646        assert_eq!(items[0].as_object().unwrap().get("sku").unwrap().as_str(), Some("A"));
2647    }
2648
2649    #[test]
2650    fn test_schema_inference_mismatched_arrays_not_matched() {
2651        // Test that arrays with different structures don't incorrectly share schemas
2652        let json = r#"{
2653            "users": [{"id": "U1", "name": "Alice"}],
2654            "products": [{"id": "P1", "price": 9.99}]
2655        }"#;
2656        let doc = TeaLeaf::from_json_with_schemas(json).unwrap();
2657
2658        // Should have separate schemas
2659        assert!(doc.schema("user").is_some());
2660        assert!(doc.schema("product").is_some());
2661
2662        // User schema should have name field
2663        let user_schema = doc.schema("user").unwrap();
2664        assert!(user_schema.fields.iter().any(|f| f.name == "name"));
2665
2666        // Product schema should have price field
2667        let product_schema = doc.schema("product").unwrap();
2668        assert!(product_schema.fields.iter().any(|f| f.name == "price"));
2669    }
2670
2671    #[test]
2672    fn test_schema_inference_special_char_quoting() {
2673        // Test that strings with special characters are properly quoted
2674        let json = r#"{"items": [
2675            {"category": "Electronics/Audio", "email": "test@example.com", "path": "a.b.c"}
2676        ]}"#;
2677        let doc = TeaLeaf::from_json_with_schemas(json).unwrap();
2678        let tl_text = doc.to_tl_with_schemas();
2679
2680        // These should be quoted in output since they contain special characters
2681        assert!(tl_text.contains("\"Electronics/Audio\""), "Slash should be quoted: {}", tl_text);
2682        assert!(tl_text.contains("\"test@example.com\""), "@ should be quoted: {}", tl_text);
2683        // Dots are valid in names per spec grammar, so a.b.c should NOT be quoted
2684        assert!(!tl_text.contains("\"a.b.c\""), "Dots should NOT be quoted per spec grammar: {}", tl_text);
2685
2686        // Should parse back correctly
2687        let parsed = TeaLeaf::parse(&tl_text).unwrap();
2688        let items = parsed.get("items").unwrap().as_array().unwrap();
2689        let item = items[0].as_object().unwrap();
2690        assert_eq!(item.get("category").unwrap().as_str(), Some("Electronics/Audio"));
2691        assert_eq!(item.get("email").unwrap().as_str(), Some("test@example.com"));
2692    }
2693
2694    #[test]
2695    fn test_schema_inference_nested_objects() {
2696        // Test that nested objects within array elements get schemas created
2697        let json = r#"{
2698            "customers": [
2699                {
2700                    "id": 1,
2701                    "name": "Alice",
2702                    "billing_address": {
2703                        "street": "123 Main St",
2704                        "city": "Boston",
2705                        "state": "MA",
2706                        "postal_code": "02101",
2707                        "country": "USA"
2708                    },
2709                    "shipping_address": {
2710                        "street": "456 Oak Ave",
2711                        "city": "Cambridge",
2712                        "state": "MA",
2713                        "postal_code": "02139",
2714                        "country": "USA"
2715                    }
2716                },
2717                {
2718                    "id": 2,
2719                    "name": "Bob",
2720                    "billing_address": {
2721                        "street": "789 Elm St",
2722                        "city": "New York",
2723                        "state": "NY",
2724                        "postal_code": "10001",
2725                        "country": "USA"
2726                    },
2727                    "shipping_address": {
2728                        "street": "789 Elm St",
2729                        "city": "New York",
2730                        "state": "NY",
2731                        "postal_code": "10001",
2732                        "country": "USA"
2733                    }
2734                }
2735            ]
2736        }"#;
2737
2738        let doc = TeaLeaf::from_json_with_schemas(json).unwrap();
2739
2740        // Should have schemas for nested objects
2741        assert!(doc.schema("billing_address").is_some(), "Should create billing_address schema");
2742        assert!(doc.schema("shipping_address").is_some(), "Should create shipping_address schema");
2743        assert!(doc.schema("customer").is_some(), "Should create customer schema");
2744
2745        // Check billing_address schema fields
2746        let billing_schema = doc.schema("billing_address").unwrap();
2747        let billing_fields: Vec<&str> = billing_schema.fields.iter().map(|f| f.name.as_str()).collect();
2748        assert!(billing_fields.contains(&"street"), "billing_address should have street field");
2749        assert!(billing_fields.contains(&"city"), "billing_address should have city field");
2750        assert!(billing_fields.contains(&"state"), "billing_address should have state field");
2751        assert!(billing_fields.contains(&"postal_code"), "billing_address should have postal_code field");
2752        assert!(billing_fields.contains(&"country"), "billing_address should have country field");
2753
2754        // Check customer schema references the nested schemas
2755        let customer_schema = doc.schema("customer").unwrap();
2756        let billing_field = customer_schema.fields.iter().find(|f| f.name == "billing_address").unwrap();
2757        assert_eq!(billing_field.field_type.base, "billing_address", "customer.billing_address should reference billing_address schema");
2758
2759        let shipping_field = customer_schema.fields.iter().find(|f| f.name == "shipping_address").unwrap();
2760        assert_eq!(shipping_field.field_type.base, "shipping_address", "customer.shipping_address should reference shipping_address schema");
2761
2762        // Serialize and verify output
2763        let tl_text = doc.to_tl_with_schemas();
2764        assert!(tl_text.contains("@struct billing_address"), "Output should contain billing_address struct");
2765        assert!(tl_text.contains("@struct shipping_address"), "Output should contain shipping_address struct");
2766        assert!(tl_text.contains("billing_address: billing_address"), "customer should have billing_address field with billing_address type");
2767        assert!(tl_text.contains("shipping_address: shipping_address"), "customer should have shipping_address field with shipping_address type");
2768    }
2769
2770    #[test]
2771    fn test_schema_inference_nested_objects_with_nulls() {
2772        // Test that nested objects handle nullable fields correctly
2773        let json = r#"{
2774            "orders": [
2775                {
2776                    "id": 1,
2777                    "customer": {
2778                        "name": "Alice",
2779                        "phone": "555-1234"
2780                    }
2781                },
2782                {
2783                    "id": 2,
2784                    "customer": {
2785                        "name": "Bob",
2786                        "phone": null
2787                    }
2788                }
2789            ]
2790        }"#;
2791
2792        let doc = TeaLeaf::from_json_with_schemas(json).unwrap();
2793
2794        // Customer schema should exist with nullable phone
2795        let customer_schema = doc.schema("customer").unwrap();
2796        let phone_field = customer_schema.fields.iter().find(|f| f.name == "phone").unwrap();
2797        assert!(phone_field.field_type.nullable, "phone field should be nullable");
2798    }
2799
2800    // =========================================================================
2801    // Coverage: dumps(), write_value(), escape_string(), format_float()
2802    // =========================================================================
2803
2804    #[test]
2805    fn test_dumps_all_value_types() {
2806        let mut data = IndexMap::new();
2807        data.insert("null_val".to_string(), Value::Null);
2808        data.insert("bool_val".to_string(), Value::Bool(true));
2809        data.insert("int_val".to_string(), Value::Int(42));
2810        data.insert("uint_val".to_string(), Value::UInt(999));
2811        data.insert("float_val".to_string(), Value::Float(3.14));
2812        data.insert("str_val".to_string(), Value::String("hello".to_string()));
2813        data.insert("bytes_val".to_string(), Value::Bytes(vec![0xca, 0xfe]));
2814        data.insert("arr_val".to_string(), Value::Array(vec![Value::Int(1), Value::Int(2)]));
2815        data.insert("obj_val".to_string(), Value::Object(
2816            vec![("x".to_string(), Value::Int(1))].into_iter().collect()
2817        ));
2818        data.insert("map_val".to_string(), Value::Map(vec![
2819            (Value::Int(1), Value::String("one".to_string())),
2820        ]));
2821        data.insert("ref_val".to_string(), Value::Ref("target".to_string()));
2822        data.insert("tag_val".to_string(), Value::Tagged("ok".to_string(), Box::new(Value::Int(200))));
2823        data.insert("ts_val".to_string(), Value::Timestamp(0, 0));
2824        data.insert("ts_millis".to_string(), Value::Timestamp(1705315800123, 0));
2825
2826        let output = dumps(&data);
2827
2828        assert!(output.contains("~"), "Should contain null");
2829        assert!(output.contains("true"), "Should contain bool");
2830        assert!(output.contains("42"), "Should contain int");
2831        assert!(output.contains("999"), "Should contain uint");
2832        assert!(output.contains("3.14"), "Should contain float");
2833        assert!(output.contains("hello"), "Should contain string");
2834        assert!(output.contains("b\"cafe\""), "Should contain bytes literal");
2835        assert!(output.contains("[1, 2]"), "Should contain array");
2836        assert!(output.contains("@map {"), "Should contain map");
2837        assert!(output.contains("!target"), "Should contain ref");
2838        assert!(output.contains(":ok 200"), "Should contain tagged");
2839        assert!(output.contains("1970-01-01T00:00:00Z"), "Should contain epoch timestamp");
2840        assert!(output.contains(".123Z"), "Should contain millis timestamp");
2841    }
2842
2843    #[test]
2844    fn test_bytes_literal_text_roundtrip() {
2845        // dumps() emits b"..." → parse() reads it back as Value::Bytes
2846        let mut data = IndexMap::new();
2847        data.insert("payload".to_string(), Value::Bytes(vec![0xca, 0xfe, 0xf0, 0x0d]));
2848        data.insert("empty".to_string(), Value::Bytes(vec![]));
2849
2850        let text = dumps(&data);
2851        assert!(text.contains(r#"b"cafef00d""#), "Should emit b\"...\" literal: {}", text);
2852        assert!(text.contains(r#"b"""#), "Should emit empty bytes literal: {}", text);
2853
2854        // Parse the text back
2855        let doc = TeaLeaf::parse(&text).unwrap();
2856        assert_eq!(doc.data.get("payload").unwrap().as_bytes(), Some(&[0xca, 0xfe, 0xf0, 0x0d][..]));
2857        assert_eq!(doc.data.get("empty").unwrap().as_bytes(), Some(&[][..]));
2858    }
2859
2860    #[test]
2861    fn test_dumps_string_quoting() {
2862        let mut data = IndexMap::new();
2863        data.insert("quoted".to_string(), Value::String("hello world".to_string()));
2864        data.insert("unquoted".to_string(), Value::String("hello".to_string()));
2865        data.insert("reserved_true".to_string(), Value::String("true".to_string()));
2866        data.insert("reserved_null".to_string(), Value::String("null".to_string()));
2867        data.insert("reserved_tilde".to_string(), Value::String("~".to_string()));
2868        data.insert("empty".to_string(), Value::String("".to_string()));
2869        data.insert("at_start".to_string(), Value::String("@directive".to_string()));
2870        data.insert("hash_start".to_string(), Value::String("#comment".to_string()));
2871        data.insert("bang_start".to_string(), Value::String("!ref".to_string()));
2872        data.insert("hex_start".to_string(), Value::String("0xabc".to_string()));
2873        data.insert("number_like".to_string(), Value::String("42abc".to_string()));
2874        data.insert("negative_like".to_string(), Value::String("-5".to_string()));
2875        data.insert("slash".to_string(), Value::String("a/b".to_string()));
2876        data.insert("dot".to_string(), Value::String("a.b".to_string()));
2877
2878        let output = dumps(&data);
2879
2880        // Quoted values should be wrapped in double quotes
2881        assert!(output.contains("\"hello world\""), "Spaces need quoting");
2882        assert!(output.contains("\"true\""), "Reserved word true needs quoting");
2883        assert!(output.contains("\"null\""), "Reserved word null needs quoting");
2884        assert!(output.contains("\"~\""), "Tilde needs quoting");
2885        assert!(output.contains("\"\""), "Empty string needs quoting");
2886        assert!(output.contains("\"@directive\""), "@ prefix needs quoting");
2887        assert!(output.contains("\"#comment\""), "# prefix needs quoting");
2888        assert!(output.contains("\"!ref\""), "! prefix needs quoting");
2889        assert!(output.contains("\"0xabc\""), "0x prefix needs quoting");
2890        assert!(output.contains("\"42abc\""), "Digit start needs quoting");
2891        assert!(output.contains("\"-5\""), "Negative number needs quoting");
2892        assert!(output.contains("\"a/b\""), "Slash needs quoting");
2893        assert!(!output.contains("\"a.b\""), "Dot should NOT need quoting per spec grammar");
2894    }
2895
2896    #[test]
2897    fn test_escape_string_control_chars() {
2898        let result = escape_string("tab\there\nnewline\rreturn");
2899        assert!(result.contains("\\t"), "Tab should be escaped");
2900        assert!(result.contains("\\n"), "Newline should be escaped");
2901        assert!(result.contains("\\r"), "CR should be escaped");
2902
2903        let result = escape_string("\x08backspace\x0cformfeed");
2904        assert!(result.contains("\\b"), "Backspace should be escaped");
2905        assert!(result.contains("\\f"), "Formfeed should be escaped");
2906
2907        let result = escape_string("quote\"and\\backslash");
2908        assert!(result.contains("\\\""), "Quote should be escaped");
2909        assert!(result.contains("\\\\"), "Backslash should be escaped");
2910
2911        // Other control characters use \uXXXX
2912        let result = escape_string("\x01");
2913        assert!(result.contains("\\u0001"), "Control char should use \\uXXXX");
2914    }
2915
2916    #[test]
2917    fn test_format_float_both_branches() {
2918        // Whole number float: Rust's to_string() drops .0, so format_float adds it back
2919        assert_eq!(format_float(42.0), "42.0");
2920
2921        // Float with decimals should stay as-is
2922        assert_eq!(format_float(3.14), "3.14");
2923
2924        // Scientific notation stays as-is
2925        let very_small = format_float(1e-20);
2926        assert!(very_small.contains('e') || very_small.contains('.'));
2927    }
2928
2929    #[test]
2930    fn test_needs_quoting_various_patterns() {
2931        // Should need quoting
2932        assert!(needs_quoting(""), "Empty string");
2933        assert!(needs_quoting("hello world"), "Whitespace");
2934        assert!(needs_quoting("a,b"), "Comma");
2935        assert!(needs_quoting("(x)"), "Parens");
2936        assert!(needs_quoting("[x]"), "Brackets");
2937        assert!(needs_quoting("{x}"), "Braces");
2938        assert!(needs_quoting("a:b"), "Colon");
2939        assert!(needs_quoting("@x"), "At sign");
2940        assert!(needs_quoting("a/b"), "Slash");
2941        assert!(!needs_quoting("a.b"), "Dot is valid in names per spec grammar");
2942        assert!(needs_quoting("true"), "Reserved true");
2943        assert!(needs_quoting("false"), "Reserved false");
2944        assert!(needs_quoting("null"), "Reserved null");
2945        assert!(needs_quoting("~"), "Reserved tilde");
2946        assert!(needs_quoting("!bang"), "Bang prefix");
2947        assert!(needs_quoting("#hash"), "Hash prefix");
2948        assert!(needs_quoting("0xdead"), "Hex prefix");
2949        assert!(needs_quoting("0Xdead"), "Hex prefix uppercase");
2950        assert!(needs_quoting("42abc"), "Starts with digit");
2951        assert!(needs_quoting("-5"), "Starts with minus+digit");
2952        assert!(needs_quoting("+5"), "Starts with plus+digit");
2953
2954        // Should NOT need quoting
2955        assert!(!needs_quoting("hello"), "Simple word");
2956        assert!(!needs_quoting("foo_bar"), "Underscore word");
2957        assert!(!needs_quoting("abc123"), "Alpha then digits");
2958    }
2959
2960    // =========================================================================
2961    // Coverage: singularize()
2962    // =========================================================================
2963
2964    #[test]
2965    fn test_singularize_rules() {
2966        // -ies → -y
2967        assert_eq!(singularize("categories"), "category");
2968        assert_eq!(singularize("entries"), "entry");
2969
2970        // -sses → -ss (special -es rule)
2971        assert_eq!(singularize("classes"), "class");
2972        assert_eq!(singularize("dresses"), "dress");
2973
2974        // -xes → -x
2975        assert_eq!(singularize("boxes"), "box");
2976        assert_eq!(singularize("indexes"), "index");
2977
2978        // -ches → -ch
2979        assert_eq!(singularize("watches"), "watch");
2980
2981        // -shes → -sh
2982        assert_eq!(singularize("dishes"), "dish");
2983
2984        // Regular -s
2985        assert_eq!(singularize("users"), "user");
2986        assert_eq!(singularize("products"), "product");
2987
2988        // Words ending in -ss (should NOT remove s)
2989        assert_eq!(singularize("boss"), "boss");
2990        assert_eq!(singularize("class"), "class");
2991
2992        // Already singular (no trailing s)
2993        assert_eq!(singularize("item"), "item");
2994        assert_eq!(singularize("child"), "child");
2995    }
2996
2997    // =========================================================================
2998    // Coverage: from_json root primitives, loads()
2999    // =========================================================================
3000
3001    #[test]
3002    fn test_from_json_root_primitive() {
3003        // Root-level string
3004        let doc = TeaLeaf::from_json(r#""hello""#).unwrap();
3005        assert_eq!(doc.get("root").unwrap().as_str(), Some("hello"));
3006        assert!(!doc.is_root_array);
3007
3008        // Root-level number
3009        let doc = TeaLeaf::from_json("42").unwrap();
3010        assert_eq!(doc.get("root").unwrap().as_int(), Some(42));
3011
3012        // Root-level bool
3013        let doc = TeaLeaf::from_json("true").unwrap();
3014        assert_eq!(doc.get("root").unwrap().as_bool(), Some(true));
3015
3016        // Root-level null
3017        let doc = TeaLeaf::from_json("null").unwrap();
3018        assert!(doc.get("root").unwrap().is_null());
3019    }
3020
3021    #[test]
3022    fn test_from_json_invalid() {
3023        let result = TeaLeaf::from_json("not valid json {{{");
3024        assert!(result.is_err());
3025    }
3026
3027    #[test]
3028    fn test_loads_convenience() {
3029        let data = loads("name: alice\nage: 30").unwrap();
3030        assert_eq!(data.get("name").unwrap().as_str(), Some("alice"));
3031        assert_eq!(data.get("age").unwrap().as_int(), Some(30));
3032    }
3033
3034    // =========================================================================
3035    // Coverage: InferredType::merge() branches
3036    // =========================================================================
3037
3038    #[test]
3039    fn test_inferred_type_merge_int_float() {
3040        let t = infer_type(&Value::Int(42));
3041        let f = infer_type(&Value::Float(3.14));
3042        let merged = t.merge(&f);
3043        assert_eq!(merged, InferredType::Float);
3044
3045        // Reverse
3046        let merged = f.merge(&t);
3047        assert_eq!(merged, InferredType::Float);
3048    }
3049
3050    #[test]
3051    fn test_inferred_type_merge_null_with_type() {
3052        let n = InferredType::Null;
3053        let s = InferredType::String;
3054        let merged = n.merge(&s);
3055        assert_eq!(merged, InferredType::String);
3056
3057        // Reverse
3058        let merged = s.merge(&n);
3059        assert_eq!(merged, InferredType::String);
3060    }
3061
3062    #[test]
3063    fn test_inferred_type_merge_arrays() {
3064        let a1 = InferredType::Array(Box::new(InferredType::Int));
3065        let a2 = InferredType::Array(Box::new(InferredType::Float));
3066        let merged = a1.merge(&a2);
3067        assert_eq!(merged, InferredType::Array(Box::new(InferredType::Float)));
3068    }
3069
3070    #[test]
3071    fn test_inferred_type_merge_objects_same_fields() {
3072        let o1 = InferredType::Object(vec![
3073            ("a".to_string(), InferredType::Int),
3074            ("b".to_string(), InferredType::String),
3075        ]);
3076        let o2 = InferredType::Object(vec![
3077            ("a".to_string(), InferredType::Float),
3078            ("b".to_string(), InferredType::String),
3079        ]);
3080        let merged = o1.merge(&o2);
3081        if let InferredType::Object(fields) = &merged {
3082            assert_eq!(fields.len(), 2);
3083            assert_eq!(fields[0].1, InferredType::Float); // Int+Float → Float
3084            assert_eq!(fields[1].1, InferredType::String);
3085        } else {
3086            panic!("Expected Object, got {:?}", merged);
3087        }
3088    }
3089
3090    #[test]
3091    fn test_inferred_type_merge_objects_different_fields() {
3092        let o1 = InferredType::Object(vec![
3093            ("a".to_string(), InferredType::Int),
3094        ]);
3095        let o2 = InferredType::Object(vec![
3096            ("b".to_string(), InferredType::String),
3097        ]);
3098        let merged = o1.merge(&o2);
3099        assert_eq!(merged, InferredType::Mixed);
3100    }
3101
3102    #[test]
3103    fn test_inferred_type_merge_incompatible() {
3104        let s = InferredType::String;
3105        let i = InferredType::Int;
3106        let merged = s.merge(&i);
3107        assert_eq!(merged, InferredType::Mixed);
3108    }
3109
3110    #[test]
3111    fn test_inferred_type_to_field_type() {
3112        let schemas = IndexMap::new();
3113
3114        assert_eq!(InferredType::Null.to_field_type(&schemas).base, "string");
3115        assert!(InferredType::Null.to_field_type(&schemas).nullable);
3116        assert_eq!(InferredType::Bool.to_field_type(&schemas).base, "bool");
3117        assert_eq!(InferredType::Int.to_field_type(&schemas).base, "int");
3118        assert_eq!(InferredType::Float.to_field_type(&schemas).base, "float");
3119        assert_eq!(InferredType::String.to_field_type(&schemas).base, "string");
3120        assert_eq!(InferredType::Mixed.to_field_type(&schemas).base, "any");
3121
3122        // Array type
3123        let arr_type = InferredType::Array(Box::new(InferredType::Int));
3124        let ft = arr_type.to_field_type(&schemas);
3125        assert_eq!(ft.base, "int");
3126        assert!(ft.is_array);
3127
3128        // Object with no matching schema → "any" (not "object", which is a value-only type)
3129        let obj_type = InferredType::Object(vec![("x".to_string(), InferredType::Int)]);
3130        assert_eq!(obj_type.to_field_type(&schemas).base, "any");
3131    }
3132
3133    #[test]
3134    fn test_inferred_type_to_field_type_with_matching_schema() {
3135        let mut schemas = IndexMap::new();
3136        let mut schema = Schema::new("point");
3137        schema.add_field("x", FieldType::new("int"));
3138        schema.add_field("y", FieldType::new("int"));
3139        schemas.insert("point".to_string(), schema);
3140
3141        let obj_type = InferredType::Object(vec![
3142            ("x".to_string(), InferredType::Int),
3143            ("y".to_string(), InferredType::Int),
3144        ]);
3145        let ft = obj_type.to_field_type(&schemas);
3146        assert_eq!(ft.base, "point");
3147    }
3148
3149    #[test]
3150    fn test_infer_type_special_values() {
3151        // Bytes, Ref, Tagged, Timestamp, Map all become Mixed
3152        assert_eq!(infer_type(&Value::Bytes(vec![1, 2])), InferredType::Mixed);
3153        assert_eq!(infer_type(&Value::Ref("x".to_string())), InferredType::Mixed);
3154        assert_eq!(infer_type(&Value::Tagged("t".to_string(), Box::new(Value::Null))), InferredType::Mixed);
3155        assert_eq!(infer_type(&Value::Timestamp(0, 0)), InferredType::Mixed);
3156        assert_eq!(infer_type(&Value::Map(vec![])), InferredType::Mixed);
3157
3158        // Empty array
3159        if let InferredType::Array(inner) = infer_type(&Value::Array(vec![])) {
3160            assert_eq!(*inner, InferredType::Mixed);
3161        } else {
3162            panic!("Expected Array");
3163        }
3164
3165        // UInt becomes Int
3166        assert_eq!(infer_type(&Value::UInt(42)), InferredType::Int);
3167    }
3168
3169    #[test]
3170    fn test_json_with_schemas_empty_nested_object_roundtrip() {
3171        // Regression: fuzzer found that [{"n":{}}] crashes because the inferrer
3172        // emits "object" as a field type, which the parser rejects as value-only.
3173        let doc = TeaLeaf::from_json_with_schemas(r#"[{"n":{}}]"#).unwrap();
3174        let tl_text = doc.to_tl_with_schemas();
3175        // Must re-parse without error
3176        let reparsed = TeaLeaf::parse(&tl_text).unwrap();
3177        assert_eq!(doc.data.len(), reparsed.data.len());
3178    }
3179
3180    // =========================================================================
3181    // Coverage: to_tl_with_schemas() edge cases
3182    // =========================================================================
3183
3184    #[test]
3185    fn test_to_tl_with_schemas_no_schemas() {
3186        let mut data = IndexMap::new();
3187        data.insert("name".to_string(), Value::String("alice".to_string()));
3188        let doc = TeaLeaf { data, schemas: IndexMap::new(), unions: IndexMap::new(), is_root_array: false };
3189
3190        let output = doc.to_tl_with_schemas();
3191        assert!(output.contains("name: alice"), "Should use dumps() format");
3192        assert!(!output.contains("@struct"), "No schemas");
3193    }
3194
3195    #[test]
3196    fn test_to_tl_with_schemas_root_array() {
3197        let mut data = IndexMap::new();
3198        data.insert("root".to_string(), Value::Array(vec![Value::Int(1), Value::Int(2)]));
3199        let doc = TeaLeaf { data, schemas: IndexMap::new(), unions: IndexMap::new(), is_root_array: true };
3200
3201        let output = doc.to_tl_with_schemas();
3202        assert!(output.starts_with("@root-array"), "Should have root-array directive");
3203    }
3204
3205    // =========================================================================
3206    // Coverage: write_value_with_schemas() for special types
3207    // =========================================================================
3208
3209    #[test]
3210    fn test_dumps_with_schemas_all_types() {
3211        let mut schemas = IndexMap::new();
3212        let mut schema = Schema::new("item");
3213        schema.add_field("id", FieldType::new("int"));
3214        schema.add_field("name", FieldType::new("string"));
3215        schemas.insert("item".to_string(), schema);
3216
3217        let mut data = IndexMap::new();
3218        // Array matching schema → @table
3219        data.insert("items".to_string(), Value::Array(vec![
3220            Value::Object(vec![
3221                ("id".to_string(), Value::Int(1)),
3222                ("name".to_string(), Value::String("Widget".to_string())),
3223            ].into_iter().collect()),
3224        ]));
3225        // Special types
3226        data.insert("ref_val".to_string(), Value::Ref("target".to_string()));
3227        data.insert("tag_val".to_string(), Value::Tagged("ok".to_string(), Box::new(Value::Int(200))));
3228        data.insert("map_val".to_string(), Value::Map(vec![
3229            (Value::Int(1), Value::String("one".to_string())),
3230        ]));
3231        data.insert("bytes_val".to_string(), Value::Bytes(vec![0xde, 0xad]));
3232        data.insert("ts_val".to_string(), Value::Timestamp(0, 0));
3233        data.insert("ts_millis".to_string(), Value::Timestamp(1705315800123, 0));
3234
3235        let schema_order = vec!["item".to_string()];
3236        let output = dumps_with_schemas(&data, &schemas, &schema_order, &IndexMap::new(), &[]);
3237
3238        assert!(output.contains("@struct item"), "Should contain schema def");
3239        assert!(output.contains("@table item"), "Should use @table format");
3240        assert!(output.contains("!target"), "Should contain ref");
3241        assert!(output.contains(":ok 200"), "Should contain tagged");
3242        assert!(output.contains("@map {"), "Should contain map");
3243        assert!(output.contains("b\"dead\""), "Should contain bytes literal");
3244        assert!(output.contains("1970-01-01T00:00:00Z"), "Should contain timestamp");
3245        assert!(output.contains(".123Z"), "Should contain millis timestamp");
3246    }
3247
3248    #[test]
3249    fn test_dumps_with_schemas_object_value() {
3250        let schemas = IndexMap::new();
3251        let mut data = IndexMap::new();
3252        data.insert("config".to_string(), Value::Object(
3253            vec![
3254                ("host".to_string(), Value::String("localhost".to_string())),
3255                ("port".to_string(), Value::Int(8080)),
3256            ].into_iter().collect()
3257        ));
3258
3259        let output = dumps_with_schemas(&data, &schemas, &[], &IndexMap::new(), &[]);
3260        assert!(output.contains("config:"), "Should contain key");
3261        assert!(output.contains("{"), "Should contain object");
3262    }
3263
3264    #[test]
3265    fn test_write_tuple_with_nested_schema() {
3266        // Test tuple writing with nested struct fields
3267        let mut schemas = IndexMap::new();
3268
3269        let mut addr = Schema::new("address");
3270        addr.add_field("city", FieldType::new("string"));
3271        addr.add_field("zip", FieldType::new("string"));
3272        schemas.insert("address".to_string(), addr);
3273
3274        let mut user = Schema::new("user");
3275        user.add_field("name", FieldType::new("string"));
3276        user.add_field("home", FieldType::new("address"));
3277        schemas.insert("user".to_string(), user);
3278
3279        let mut data = IndexMap::new();
3280        data.insert("users".to_string(), Value::Array(vec![
3281            Value::Object(vec![
3282                ("name".to_string(), Value::String("Alice".to_string())),
3283                ("home".to_string(), Value::Object(vec![
3284                    ("city".to_string(), Value::String("Boston".to_string())),
3285                    ("zip".to_string(), Value::String("02101".to_string())),
3286                ].into_iter().collect())),
3287            ].into_iter().collect()),
3288        ]));
3289
3290        let schema_order = vec!["address".to_string(), "user".to_string()];
3291        let output = dumps_with_schemas(&data, &schemas, &schema_order, &IndexMap::new(), &[]);
3292
3293        assert!(output.contains("@struct address"), "Should have address schema");
3294        assert!(output.contains("@struct user"), "Should have user schema");
3295        assert!(output.contains("@table user"), "Should use @table for users");
3296        // Nested tuples
3297        assert!(output.contains("("), "Should have tuple format");
3298    }
3299
3300    #[test]
3301    fn test_write_tuple_with_schema_array_field() {
3302        // Test tuple writing with array fields that have schemas
3303        let mut schemas = IndexMap::new();
3304
3305        let mut tag = Schema::new("tag");
3306        tag.add_field("name", FieldType::new("string"));
3307        schemas.insert("tag".to_string(), tag);
3308
3309        let mut item = Schema::new("item");
3310        item.add_field("id", FieldType::new("int"));
3311        item.add_field("tags", FieldType { base: "tag".to_string(), nullable: false, is_array: true });
3312        schemas.insert("item".to_string(), item);
3313
3314        let mut data = IndexMap::new();
3315        data.insert("items".to_string(), Value::Array(vec![
3316            Value::Object(vec![
3317                ("id".to_string(), Value::Int(1)),
3318                ("tags".to_string(), Value::Array(vec![
3319                    Value::Object(vec![
3320                        ("name".to_string(), Value::String("rust".to_string())),
3321                    ].into_iter().collect()),
3322                ])),
3323            ].into_iter().collect()),
3324        ]));
3325
3326        let schema_order = vec!["tag".to_string(), "item".to_string()];
3327        let output = dumps_with_schemas(&data, &schemas, &schema_order, &IndexMap::new(), &[]);
3328
3329        assert!(output.contains("@table item"), "Should use @table for items");
3330    }
3331
3332    #[test]
3333    fn test_write_schema_array_empty() {
3334        let schemas = IndexMap::new();
3335        let schema = Schema::new("empty");
3336        let mut out = String::new();
3337        write_schema_array(&mut out, &Value::Array(vec![]), &schema, &schemas, 0);
3338        assert_eq!(out, "[]");
3339    }
3340
3341    #[test]
3342    fn test_write_schema_array_non_array_fallback() {
3343        let schemas = IndexMap::new();
3344        let schema = Schema::new("test");
3345        let mut out = String::new();
3346        write_schema_array(&mut out, &Value::Int(42), &schema, &schemas, 0);
3347        assert_eq!(out, "42");
3348    }
3349
3350    #[test]
3351    fn test_write_tuple_missing_field() {
3352        // Test that missing fields in object produce ~
3353        let schemas = IndexMap::new();
3354        let mut schema = Schema::new("test");
3355        schema.add_field("present", FieldType::new("int"));
3356        schema.add_field("missing", FieldType::new("string"));
3357
3358        let value = Value::Object(
3359            vec![("present".to_string(), Value::Int(42))].into_iter().collect()
3360        );
3361
3362        let mut out = String::new();
3363        write_tuple(&mut out, &value, &schema, &schemas, 0);
3364        assert!(out.contains("42"), "Present field should be written");
3365        assert!(out.contains("~"), "Missing field should be ~");
3366    }
3367
3368    #[test]
3369    fn test_write_tuple_non_object() {
3370        // When tuple receives a non-object value
3371        let schemas = IndexMap::new();
3372        let schema = Schema::new("test");
3373
3374        let mut out = String::new();
3375        write_tuple(&mut out, &Value::Int(42), &schema, &schemas, 0);
3376        assert_eq!(out, "42");
3377    }
3378
3379    // =========================================================================
3380    // Coverage: array_matches_schema()
3381    // =========================================================================
3382
3383    #[test]
3384    fn test_array_matches_schema_empty() {
3385        let schema = Schema::new("test");
3386        assert!(!array_matches_schema(&[], &schema));
3387    }
3388
3389    #[test]
3390    fn test_array_matches_schema_non_object() {
3391        let schema = Schema::new("test");
3392        assert!(!array_matches_schema(&[Value::Int(1)], &schema));
3393    }
3394
3395    #[test]
3396    fn test_array_matches_schema_matching() {
3397        let mut schema = Schema::new("user");
3398        schema.add_field("name", FieldType::new("string"));
3399        schema.add_field("age", FieldType::new("int"));
3400
3401        let arr = vec![Value::Object(vec![
3402            ("name".to_string(), Value::String("Alice".to_string())),
3403            ("age".to_string(), Value::Int(30)),
3404        ].into_iter().collect())];
3405
3406        assert!(array_matches_schema(&arr, &schema));
3407    }
3408
3409    // =========================================================================
3410    // Coverage: from_dto, from_dto_array, to_dto, to_dto_vec
3411    // =========================================================================
3412
3413    #[test]
3414    fn test_from_dto_and_back() {
3415        use crate::convert::{FromTeaLeaf, ConvertError};
3416
3417        let doc = TeaLeaf::from_dto("greeting", &"hello".to_string());
3418        assert_eq!(doc.get("greeting").unwrap().as_str(), Some("hello"));
3419
3420        let result: std::result::Result<String, ConvertError> = String::from_tealeaf_value(doc.get("greeting").unwrap());
3421        assert_eq!(result.unwrap(), "hello");
3422    }
3423
3424    #[test]
3425    fn test_from_dto_array() {
3426        let items = vec!["apple".to_string(), "banana".to_string()];
3427        let doc = TeaLeaf::from_dto_array("fruits", &items);
3428        let arr = doc.get("fruits").unwrap().as_array().unwrap();
3429        assert_eq!(arr.len(), 2);
3430        assert_eq!(arr[0].as_str(), Some("apple"));
3431    }
3432
3433    #[test]
3434    fn test_to_dto_missing_key() {
3435        let doc = TeaLeaf::new(IndexMap::new(), IndexMap::new());
3436        let result: Result<String> = doc.to_dto("missing");
3437        assert!(result.is_err());
3438    }
3439
3440    #[test]
3441    fn test_to_dto_vec() {
3442        let mut data = IndexMap::new();
3443        data.insert("items".to_string(), Value::Array(vec![
3444            Value::String("a".to_string()),
3445            Value::String("b".to_string()),
3446        ]));
3447        let doc = TeaLeaf::new(IndexMap::new(), data);
3448        let result: Vec<String> = doc.to_dto_vec("items").unwrap();
3449        assert_eq!(result, vec!["a", "b"]);
3450    }
3451
3452    #[test]
3453    fn test_to_dto_vec_not_array() {
3454        let mut data = IndexMap::new();
3455        data.insert("item".to_string(), Value::String("not_an_array".to_string()));
3456        let doc = TeaLeaf::new(IndexMap::new(), data);
3457        let result: Result<Vec<String>> = doc.to_dto_vec("item");
3458        assert!(result.is_err());
3459    }
3460
3461    #[test]
3462    fn test_to_dto_vec_missing_key() {
3463        let doc = TeaLeaf::new(IndexMap::new(), IndexMap::new());
3464        let result: Result<Vec<String>> = doc.to_dto_vec("missing");
3465        assert!(result.is_err());
3466    }
3467
3468    // =========================================================================
3469    // Coverage: set_root_array, SchemaInferrer edge cases
3470    // =========================================================================
3471
3472    #[test]
3473    fn test_set_root_array() {
3474        let mut doc = TeaLeaf::new(IndexMap::new(), IndexMap::new());
3475        assert!(!doc.is_root_array);
3476        doc.set_root_array(true);
3477        assert!(doc.is_root_array);
3478    }
3479
3480    #[test]
3481    fn test_schema_inferrer_non_uniform_array() {
3482        // Array with different object structures should not create a schema
3483        let json = r#"{"items": [{"a": 1}, {"b": 2}]}"#;
3484        let doc = TeaLeaf::from_json_with_schemas(json).unwrap();
3485        assert!(doc.schema("item").is_none(), "Non-uniform array should not produce schema");
3486    }
3487
3488    #[test]
3489    fn test_schema_inferrer_mixed_types_in_array() {
3490        // Array with non-objects
3491        let json = r#"{"items": [1, 2, 3]}"#;
3492        let doc = TeaLeaf::from_json_with_schemas(json).unwrap();
3493        assert!(doc.schema("item").is_none(), "Non-object array should not produce schema");
3494    }
3495
3496    #[test]
3497    fn test_schema_inferrer_empty_array() {
3498        let json = r#"{"items": []}"#;
3499        let doc = TeaLeaf::from_json_with_schemas(json).unwrap();
3500        assert!(doc.schema("item").is_none(), "Empty array should not produce schema");
3501    }
3502
3503    #[test]
3504    fn test_schema_inferrer_duplicate_schema_name() {
3505        // Two arrays that would produce the same schema name
3506        let json = r#"{
3507            "items": [{"id": 1, "name": "A"}],
3508            "nested": {"items": [{"id": 2, "name": "B"}]}
3509        }"#;
3510        let doc = TeaLeaf::from_json_with_schemas(json).unwrap();
3511        // Should have "item" schema (first one wins)
3512        assert!(doc.schema("item").is_some());
3513    }
3514
3515    #[test]
3516    fn test_schema_inferrer_int_float_merge() {
3517        // Field that has int in one record and float in another
3518        let json = r#"{"values": [{"x": 1}, {"x": 2.5}]}"#;
3519        let doc = TeaLeaf::from_json_with_schemas(json).unwrap();
3520        let schema = doc.schema("value").unwrap();
3521        let x_field = schema.fields.iter().find(|f| f.name == "x").unwrap();
3522        assert_eq!(x_field.field_type.base, "float", "Int+Float merge should produce float");
3523    }
3524
3525    #[test]
3526    fn test_schema_inference_with_root_array() {
3527        let json = r#"[{"id": 1, "name": "Alice"}, {"id": 2, "name": "Bob"}]"#;
3528        let doc = TeaLeaf::from_json_with_schemas(json).unwrap();
3529        // Root array is stored under "root" key - the schema name should be derived from "root"
3530        // The singularize of "root" is "root" (no trailing s)
3531        // Actually, root arrays aren't typically analyzed because the key is "root" and it goes through analyze_value
3532        let root_val = doc.get("root").unwrap().as_array().unwrap();
3533        assert_eq!(root_val.len(), 2);
3534    }
3535
3536    // =========================================================================
3537    // Coverage: dumps_with_schemas with quoting in schemas
3538    // =========================================================================
3539
3540    #[test]
3541    fn test_dumps_with_schemas_string_quoting_in_tuples() {
3542        let mut schemas = IndexMap::new();
3543        let mut schema = Schema::new("item");
3544        schema.add_field("name", FieldType::new("string"));
3545        schemas.insert("item".to_string(), schema);
3546
3547        let mut data = IndexMap::new();
3548        data.insert("items".to_string(), Value::Array(vec![
3549            Value::Object(vec![
3550                ("name".to_string(), Value::String("hello world".to_string())),
3551            ].into_iter().collect()),
3552        ]));
3553
3554        let schema_order = vec!["item".to_string()];
3555        let output = dumps_with_schemas(&data, &schemas, &schema_order, &IndexMap::new(), &[]);
3556        assert!(output.contains("\"hello world\""), "String with space should be quoted in tuple");
3557    }
3558
3559    #[test]
3560    fn test_dumps_with_schemas_array_without_schema() {
3561        // Array that doesn't match any schema
3562        let schemas = IndexMap::new();
3563        let mut data = IndexMap::new();
3564        data.insert("nums".to_string(), Value::Array(vec![Value::Int(1), Value::Int(2)]));
3565
3566        let output = dumps_with_schemas(&data, &schemas, &[], &IndexMap::new(), &[]);
3567        assert!(output.contains("[1, 2]"), "Should use regular array format");
3568    }
3569
3570    // =========================================================================
3571    // Coverage: convenience functions open(), parse(), root array to_json
3572    // =========================================================================
3573
3574    #[test]
3575    fn test_open_convenience_function() {
3576        // Write a binary file first, then open with the convenience function
3577        let dir = std::env::temp_dir();
3578        let path = dir.join("test_open_conv.tlbx");
3579
3580        let mut data = IndexMap::new();
3581        data.insert("x".to_string(), Value::Int(42));
3582        let doc = TeaLeaf::new(IndexMap::new(), data);
3583        doc.compile(&path, false).unwrap();
3584
3585        let reader = super::open(&path).unwrap();
3586        assert_eq!(reader.get("x").unwrap().as_int(), Some(42));
3587        std::fs::remove_file(&path).ok();
3588    }
3589
3590    #[test]
3591    fn test_parse_convenience_function() {
3592        let doc = super::parse("greeting: hello").unwrap();
3593        assert_eq!(doc.get("greeting").unwrap().as_str(), Some("hello"));
3594    }
3595
3596    #[test]
3597    fn test_to_json_root_array() {
3598        let mut data = IndexMap::new();
3599        data.insert("root".to_string(), Value::Array(vec![Value::Int(1), Value::Int(2)]));
3600        let mut doc = TeaLeaf::new(IndexMap::new(), data);
3601        doc.set_root_array(true);
3602
3603        let json = doc.to_json().unwrap();
3604        let parsed: serde_json::Value = serde_json::from_str(&json).unwrap();
3605        assert!(parsed.is_array(), "Root array to_json should output array");
3606        assert_eq!(parsed.as_array().unwrap().len(), 2);
3607    }
3608
3609    #[test]
3610    fn test_to_json_compact_root_array() {
3611        let mut data = IndexMap::new();
3612        data.insert("root".to_string(), Value::Array(vec![Value::Int(1)]));
3613        let mut doc = TeaLeaf::new(IndexMap::new(), data);
3614        doc.set_root_array(true);
3615
3616        let json = doc.to_json_compact().unwrap();
3617        assert_eq!(json, "[1]");
3618    }
3619
3620    #[test]
3621    fn test_infer_type_bool_value() {
3622        let it = infer_type(&Value::Bool(true));
3623        assert!(matches!(it, InferredType::Bool));
3624    }
3625
3626    #[test]
3627    fn test_schema_inference_nested_object_fields() {
3628        // JSON with nested objects inside array items
3629        let json = r#"{"records": [
3630            {"id": 1, "details": {"city": "NYC", "zip": "10001"}},
3631            {"id": 2, "details": {"city": "LA", "zip": "90001"}}
3632        ]}"#;
3633        let doc = TeaLeaf::from_json_with_schemas(json).unwrap();
3634        // Should infer both "record" and "detail" schemas
3635        assert!(doc.schema("record").is_some(), "Should infer record schema");
3636    }
3637
3638    #[test]
3639    fn test_schema_inference_not_all_objects_returns_early() {
3640        // Array where second element is not an object
3641        let json = r#"{"items": [{"a": 1}, "not_an_object"]}"#;
3642        let doc = TeaLeaf::from_json_with_schemas(json).unwrap();
3643        assert!(doc.schema("item").is_none(), "Mixed array should not produce schema");
3644    }
3645
3646    #[test]
3647    fn test_to_tl_with_schemas_with_nested_array_field() {
3648        // Schema with an array-typed field
3649        let mut schemas = IndexMap::new();
3650        let mut schema = Schema::new("user");
3651        schema.add_field("name", FieldType::new("string"));
3652        schema.add_field("tags", FieldType::new("string").array());
3653        schemas.insert("user".to_string(), schema);
3654
3655        let mut data = IndexMap::new();
3656        let mut obj = IndexMap::new();
3657        obj.insert("name".to_string(), Value::String("Alice".into()));
3658        obj.insert("tags".to_string(), Value::Array(vec![
3659            Value::String("admin".into()),
3660            Value::String("active".into()),
3661        ]));
3662        data.insert("users".to_string(), Value::Array(vec![Value::Object(obj)]));
3663
3664        let doc = TeaLeaf::new(schemas, data);
3665        let text = doc.to_tl_with_schemas();
3666        assert!(text.contains("@struct user"), "Should have schema definition");
3667        assert!(text.contains("@table user"), "Should use table format");
3668    }
3669
3670    // =========================================================================
3671    // Issue 6: Improved schema matching
3672    // =========================================================================
3673
3674    #[test]
3675    fn test_schema_matching_nullable_fields_allowed_missing() {
3676        // Schema with nullable field should match objects missing that field
3677        let mut schemas = IndexMap::new();
3678        let mut s = Schema::new("Item");
3679        s.add_field("id", FieldType::new("int"));
3680        s.add_field("label", FieldType::new("string").nullable());
3681        schemas.insert("Item".to_string(), s);
3682
3683        let mut obj1 = IndexMap::new();
3684        obj1.insert("id".to_string(), Value::Int(1));
3685        // label is missing — but it's nullable, so it should still match
3686
3687        let doc = TeaLeaf {
3688            schemas,
3689            unions: IndexMap::new(),
3690            data: {
3691                let mut d = IndexMap::new();
3692                d.insert("items".to_string(), Value::Array(vec![Value::Object(obj1)]));
3693                d
3694            },
3695            is_root_array: false,
3696        };
3697        let result = doc.find_schema_for_value(doc.data.get("items").unwrap(), "items");
3698        assert!(result.is_some(), "Should match schema when nullable field is missing");
3699        assert_eq!(result.unwrap().name, "Item");
3700    }
3701
3702    #[test]
3703    fn test_schema_matching_rejects_extra_keys() {
3704        // Objects with extra keys not in schema should not match
3705        let mut schemas = IndexMap::new();
3706        let mut s = Schema::new("Point");
3707        s.add_field("x", FieldType::new("int"));
3708        s.add_field("y", FieldType::new("int"));
3709        schemas.insert("Point".to_string(), s);
3710
3711        let mut obj = IndexMap::new();
3712        obj.insert("x".to_string(), Value::Int(1));
3713        obj.insert("y".to_string(), Value::Int(2));
3714        obj.insert("z".to_string(), Value::Int(3)); // extra field
3715
3716        let doc = TeaLeaf {
3717            schemas,
3718            unions: IndexMap::new(),
3719            data: {
3720                let mut d = IndexMap::new();
3721                d.insert("points".to_string(), Value::Array(vec![Value::Object(obj)]));
3722                d
3723            },
3724            is_root_array: false,
3725        };
3726        let result = doc.find_schema_for_value(doc.data.get("points").unwrap(), "points");
3727        assert!(result.is_none(), "Should NOT match schema when extra keys are present");
3728    }
3729
3730    #[test]
3731    fn test_schema_matching_empty_array_no_matching_name() {
3732        let mut schemas = IndexMap::new();
3733        let mut s = Schema::new("Anything");
3734        s.add_field("x", FieldType::new("int"));
3735        schemas.insert("Anything".to_string(), s);
3736
3737        let doc = TeaLeaf {
3738            schemas,
3739            unions: IndexMap::new(),
3740            data: {
3741                let mut d = IndexMap::new();
3742                d.insert("empty".to_string(), Value::Array(vec![]));
3743                d
3744            },
3745            is_root_array: false,
3746        };
3747        let result = doc.find_schema_for_value(doc.data.get("empty").unwrap(), "empty");
3748        assert!(result.is_none(), "Empty array should return None when no schema name matches");
3749    }
3750
3751    #[test]
3752    fn test_schema_matching_empty_array_matches_by_name() {
3753        let mut schemas = IndexMap::new();
3754        let mut s = Schema::new("item");
3755        s.add_field("id", FieldType::new("int"));
3756        schemas.insert("item".to_string(), s);
3757
3758        let doc = TeaLeaf {
3759            schemas,
3760            unions: IndexMap::new(),
3761            data: {
3762                let mut d = IndexMap::new();
3763                d.insert("items".to_string(), Value::Array(vec![]));
3764                d
3765            },
3766            is_root_array: false,
3767        };
3768        let result = doc.find_schema_for_value(doc.data.get("items").unwrap(), "items");
3769        assert!(result.is_some(), "Empty array should match schema by singularized key name");
3770        assert_eq!(result.unwrap().name, "item");
3771    }
3772
3773    // =========================================================================
3774    // Issue 12: Negative timestamp formatting
3775    // =========================================================================
3776
3777    #[test]
3778    fn test_negative_timestamp_formatting() {
3779        // 1969-12-31T23:59:59Z = -1000 ms (1 second before epoch)
3780        let formatted = format_timestamp_millis(-1000, 0);
3781        assert_eq!(formatted, "1969-12-31T23:59:59Z");
3782    }
3783
3784    #[test]
3785    fn test_negative_timestamp_with_millis() {
3786        // -500 ms = 1969-12-31T23:59:59.500Z
3787        let formatted = format_timestamp_millis(-500, 0);
3788        assert_eq!(formatted, "1969-12-31T23:59:59.500Z");
3789    }
3790
3791    #[test]
3792    fn test_negative_timestamp_full_day() {
3793        // -86400000 ms = exactly one day before epoch = 1969-12-31T00:00:00Z
3794        let formatted = format_timestamp_millis(-86_400_000, 0);
3795        assert_eq!(formatted, "1969-12-31T00:00:00Z");
3796    }
3797
3798    #[test]
3799    fn test_epoch_timestamp() {
3800        let formatted = format_timestamp_millis(0, 0);
3801        assert_eq!(formatted, "1970-01-01T00:00:00Z");
3802    }
3803
3804    #[test]
3805    fn test_positive_timestamp_with_millis() {
3806        // 1123ms = 1 second + 123ms after epoch
3807        let formatted = format_timestamp_millis(1123, 0);
3808        assert_eq!(formatted, "1970-01-01T00:00:01.123Z");
3809    }
3810
3811    #[test]
3812    fn test_negative_timestamp_json_export() {
3813        let mut data = IndexMap::new();
3814        data.insert("ts".to_string(), Value::Timestamp(-1000, 0));
3815        let doc = TeaLeaf::new(IndexMap::new(), data);
3816        let json = doc.to_json().unwrap();
3817        assert!(json.contains("1969-12-31"), "Negative timestamp should format as pre-epoch date: {}", json);
3818    }
3819
3820    // =========================================================================
3821    // Issue 7: Deterministic serialization (IndexMap preserves insertion order)
3822    // =========================================================================
3823
3824    #[test]
3825    fn test_compile_deterministic_key_order() {
3826        // Two documents with the same data in the same insertion order
3827        // should produce identical binary output
3828        let dir = std::env::temp_dir();
3829        let path1 = dir.join("test_deterministic_1.tlbx");
3830        let path2 = dir.join("test_deterministic_2.tlbx");
3831
3832        let mut data1 = IndexMap::new();
3833        data1.insert("alpha".to_string(), Value::Int(1));
3834        data1.insert("beta".to_string(), Value::Int(2));
3835        data1.insert("gamma".to_string(), Value::Int(3));
3836        let doc1 = TeaLeaf::new(IndexMap::new(), data1);
3837        doc1.compile(&path1, false).unwrap();
3838
3839        let mut data2 = IndexMap::new();
3840        data2.insert("alpha".to_string(), Value::Int(1));
3841        data2.insert("beta".to_string(), Value::Int(2));
3842        data2.insert("gamma".to_string(), Value::Int(3));
3843        let doc2 = TeaLeaf::new(IndexMap::new(), data2);
3844        doc2.compile(&path2, false).unwrap();
3845
3846        let bytes1 = std::fs::read(&path1).unwrap();
3847        let bytes2 = std::fs::read(&path2).unwrap();
3848        assert_eq!(bytes1, bytes2, "Binary output should be identical for same insertion order");
3849
3850        std::fs::remove_file(&path1).ok();
3851        std::fs::remove_file(&path2).ok();
3852    }
3853
3854    #[test]
3855    fn test_dumps_deterministic_key_order() {
3856        // dumps() preserves IndexMap insertion order deterministically
3857        let mut data = IndexMap::new();
3858        data.insert("zebra".to_string(), Value::Int(3));
3859        data.insert("alpha".to_string(), Value::Int(1));
3860        data.insert("middle".to_string(), Value::Int(2));
3861
3862        let output1 = dumps(&data);
3863        let output2 = dumps(&data);
3864        assert_eq!(output1, output2, "dumps() should be deterministic");
3865        // Keys should appear in insertion order (IndexMap preserves insertion order)
3866        let lines: Vec<&str> = output1.trim().lines().collect();
3867        assert!(lines[0].starts_with("zebra:"), "First key should be 'zebra', got: {}", lines[0]);
3868        assert!(lines[1].starts_with("alpha:"), "Second key should be 'alpha', got: {}", lines[1]);
3869        assert!(lines[2].starts_with("middle:"), "Third key should be 'middle', got: {}", lines[2]);
3870    }
3871
3872    // =========================================================================
3873    // Order-preservation integration tests
3874    // =========================================================================
3875
3876    #[test]
3877    fn test_json_parse_preserves_key_order() {
3878        // JSON with intentionally non-alphabetical keys
3879        let json = r#"{"zebra": 1, "apple": 2, "mango": 3, "banana": 4}"#;
3880        let doc = TeaLeaf::from_json(json).unwrap();
3881        let keys: Vec<&String> = doc.data.keys().collect();
3882        assert_eq!(keys, &["zebra", "apple", "mango", "banana"],
3883            "JSON parse should preserve key insertion order");
3884    }
3885
3886    #[test]
3887    fn test_json_roundtrip_preserves_key_order() {
3888        let json = r#"{"zebra": 1, "apple": 2, "mango": 3}"#;
3889        let doc = TeaLeaf::from_json(json).unwrap();
3890        let json_out = doc.to_json().unwrap();
3891        // Parse back and verify order
3892        let parsed: serde_json::Value = serde_json::from_str(&json_out).unwrap();
3893        let keys: Vec<&str> = parsed.as_object().unwrap().keys().map(|s| s.as_str()).collect();
3894        assert_eq!(keys, &["zebra", "apple", "mango"],
3895            "JSON round-trip should preserve key order");
3896    }
3897
3898    #[test]
3899    fn test_tl_text_preserves_section_order() {
3900        let input = "zebra: 1\napple: 2\nmango: 3\n";
3901        let doc = TeaLeaf::parse(input).unwrap();
3902        let keys: Vec<&String> = doc.data.keys().collect();
3903        assert_eq!(keys, &["zebra", "apple", "mango"],
3904            "TL text parse should preserve section order");
3905
3906        // Serialize back and verify order
3907        let output = doc.to_tl_with_schemas();
3908        let lines: Vec<&str> = output.trim().lines().collect();
3909        assert!(lines[0].starts_with("zebra:"), "got: {}", lines[0]);
3910        assert!(lines[1].starts_with("apple:"), "got: {}", lines[1]);
3911        assert!(lines[2].starts_with("mango:"), "got: {}", lines[2]);
3912    }
3913
3914    #[test]
3915    fn test_binary_roundtrip_preserves_section_order() {
3916        let json = r#"{"zebra": 1, "apple": 2, "mango": 3, "banana": 4}"#;
3917        let doc = TeaLeaf::from_json(json).unwrap();
3918
3919        let dir = std::env::temp_dir();
3920        let path = dir.join("test_order_preserve.tlbx");
3921        doc.compile(&path, false).unwrap();
3922
3923        let reader = crate::Reader::open(&path).unwrap();
3924        let doc2 = TeaLeaf::from_reader(&reader).unwrap();
3925        let keys: Vec<&String> = doc2.data.keys().collect();
3926        assert_eq!(keys, &["zebra", "apple", "mango", "banana"],
3927            "Binary round-trip should preserve section order");
3928        std::fs::remove_file(&path).ok();
3929    }
3930
3931    #[test]
3932    fn test_object_field_order_preserved_through_binary() {
3933        let json = r#"{"data": {"z_last": 1, "a_first": 2, "m_middle": 3}}"#;
3934        let doc = TeaLeaf::from_json(json).unwrap();
3935
3936        let dir = std::env::temp_dir();
3937        let path = dir.join("test_obj_order.tlbx");
3938        doc.compile(&path, false).unwrap();
3939
3940        let reader = crate::Reader::open(&path).unwrap();
3941        let val = reader.get("data").unwrap();
3942        let obj = val.as_object().unwrap();
3943        let keys: Vec<&String> = obj.keys().collect();
3944        assert_eq!(keys, &["z_last", "a_first", "m_middle"],
3945            "Object field order should be preserved through binary round-trip");
3946        std::fs::remove_file(&path).ok();
3947    }
3948
3949    #[test]
3950    fn test_nested_object_order_preserved() {
3951        let json = r#"{"outer": {"z": {"c": 3, "a": 1, "b": 2}, "a": {"x": 10, "w": 20}}}"#;
3952        let doc = TeaLeaf::from_json(json).unwrap();
3953        let tl = doc.to_tl_with_schemas();
3954
3955        // Parse back and check nested order
3956        let doc2 = TeaLeaf::parse(&tl).unwrap();
3957        let outer = doc2.get("outer").unwrap().as_object().unwrap();
3958        let outer_keys: Vec<&String> = outer.keys().collect();
3959        assert_eq!(outer_keys, &["z", "a"], "Outer keys order preserved");
3960
3961        let z_obj = outer.get("z").unwrap().as_object().unwrap();
3962        let z_keys: Vec<&String> = z_obj.keys().collect();
3963        assert_eq!(z_keys, &["c", "a", "b"], "Nested object keys order preserved");
3964    }
3965
3966    #[test]
3967    fn test_schema_order_preserved_in_text() {
3968        let input = r#"
3969            @struct Zebra (z_name: string)
3970            @struct Apple (a_name: string)
3971            items: [1, 2, 3]
3972        "#;
3973        let doc = TeaLeaf::parse(input).unwrap();
3974        let schema_keys: Vec<&String> = doc.schemas.keys().collect();
3975        assert_eq!(schema_keys, &["Zebra", "Apple"],
3976            "Schema definition order should be preserved");
3977    }
3978
3979    // -------------------------------------------------------------------------
3980    // Fuzz regression tests (full serialize/roundtrip paths)
3981    // -------------------------------------------------------------------------
3982
3983    #[test]
3984    fn test_fuzz_crash_ba05f4f8_serialize_day_zero_no_panic() {
3985        // Regression: fuzz_serialize crash-ba05f4f81615e2bf2b01137126cd772c6c0cc6d2
3986        // Timestamp with month=0 or day=0 caused u32 underflow in days_from_epoch.
3987        // Exercises the full fuzz_serialize path: parse → to_json → to_tl → re-parse.
3988        let inputs = [
3989            "ts: 2024-01-00T10:30:00Z",  // day=0
3990            "ts: 2024-00-15T10:30:00Z",  // month=0
3991            "ts: 6000-00-00T00:00:00Z",  // both zero
3992        ];
3993        for input in &inputs {
3994            // parse must not panic (should return Err)
3995            let result = TeaLeaf::parse(input);
3996            if let Ok(tl) = result {
3997                let _ = tl.to_json();
3998                let _ = tl.to_json_compact();
3999                let text = tl.to_tl_with_schemas();
4000                let _ = TeaLeaf::parse(&text);
4001            }
4002        }
4003    }
4004
4005    #[test]
4006    fn test_fuzz_crash_b085ba0e_roundtrip_day_zero_no_panic() {
4007        // Regression: fuzz_roundtrip crash-b085ba0e656f074031d8c4cb5173313785fa79d1
4008        // Same days_from_epoch underflow, hit through the roundtrip path.
4009        // Exercises the full fuzz_roundtrip path: parse → compile → read → walk.
4010        let inputs = [
4011            "ts: 4001-03-00T00:00:00Z",  // day=0 (pattern from artifact)
4012            "ts: 4401-03-00T00:00:00Z",  // variant
4013        ];
4014        for input in &inputs {
4015            let result = TeaLeaf::parse(input);
4016            if let Ok(tl) = result {
4017                let tmp = tempfile::NamedTempFile::new().unwrap();
4018                if tl.compile(tmp.path(), false).is_ok() {
4019                    let bytes = std::fs::read(tmp.path()).unwrap();
4020                    if let Ok(reader) = Reader::from_bytes(bytes) {
4021                        for key in reader.keys() {
4022                            let _ = reader.get(key);
4023                        }
4024                    }
4025                }
4026            }
4027        }
4028    }
4029
4030    #[test]
4031    fn test_fuzz_crash_48767e10_json_schemas_bare_dash_roundtrip() {
4032        // Regression: fuzz_json_schemas crash-48767e10b4ec71542bfbee2bc358b1e21831a259
4033        // JSON string "-" was serialized unquoted, causing re-parse failure.
4034        for input in [
4035            r#""-""#, r#""+""#, r#""--""#, r#""-foo""#,
4036            r#"{"a": "-"}"#, r#"{"a": "+"}"#,
4037            "\"\\u0660\"",  // Arabic-Indic digit zero
4038        ] {
4039            let tl = TeaLeaf::from_json_with_schemas(input);
4040            if let Ok(tl) = tl {
4041                let text = tl.to_tl_with_schemas();
4042                let reparsed = TeaLeaf::parse(&text);
4043                assert!(
4044                    reparsed.is_ok(),
4045                    "re-parse failed for JSON input {}",
4046                    input,
4047                );
4048            }
4049        }
4050    }
4051
4052    #[test]
4053    fn test_fuzz_crash_820dac71_empty_key_roundtrip() {
4054        // Regression: fuzz_json_schemas crash-820dac71c95d324067cd88de5f24897c65ace57a
4055        // JSON object with empty key was serialized without quoting, losing the key.
4056        for input in [
4057            r#"{"":{}}"#,                // empty key with empty object
4058            r#"[{"":{}}}]"#,             // root array variant (crash-66a8d85176f76ed68ada9f9526abe4efd8352f27)
4059            r#"{"":"value"}"#,            // empty key with string value
4060        ] {
4061            if let Ok(tl) = TeaLeaf::from_json_with_schemas(input) {
4062                let text = tl.to_tl_with_schemas();
4063                let reparsed = TeaLeaf::parse(&text);
4064                assert!(
4065                    reparsed.is_ok(),
4066                    "re-parse failed for JSON input {}",
4067                    input,
4068                );
4069            }
4070        }
4071    }
4072
4073    #[test]
4074    fn test_fuzz_crash_66a8d851_root_array_empty_key() {
4075        // Regression: fuzz_json_schemas crash-66a8d85176f76ed68ada9f9526abe4efd8352f27
4076        // Root array with empty-key object: schema inference + to_tl_with_schemas roundtrip
4077        let input = r#"[{"":{}}]"#;
4078        if let Ok(tl) = TeaLeaf::from_json_with_schemas(input) {
4079            let text = tl.to_tl_with_schemas();
4080            let reparsed = TeaLeaf::parse(&text);
4081            assert!(reparsed.is_ok(), "re-parse failed for root array with empty key");
4082        }
4083    }
4084
4085    #[test]
4086    fn test_fuzz_crash_847a9194_uint_roundtrip() {
4087        // Regression: fuzz_json_schemas crash-847a919462bb567fab268023a5a29d04e92db779
4088        // Large u64 values (> i64::MAX) were demoted to f64 on re-parse, losing precision.
4089        let input = "9999999999999999999";  // > i64::MAX, fits in u64
4090        let tl = TeaLeaf::from_json_with_schemas(input).unwrap();
4091        let text = tl.to_tl_with_schemas();
4092        let reparsed = TeaLeaf::parse(&text).unwrap();
4093        let orig = tl.data.get("root").unwrap();
4094        let re = reparsed.data.get("root").unwrap();
4095        assert_eq!(orig, re, "UInt roundtrip mismatch");
4096    }
4097
4098    #[test]
4099    fn test_fuzz_crash_3902c5cc_float_infinity_roundtrip() {
4100        // Regression: fuzz_serialize crash-3902c5cc99e5e4150d08d40372c86207fbc6db7f
4101        // 5e550 and -5e550 overflow f64 and are now stored as JsonNumber.
4102        // NaN remains Float(NaN).
4103        let tl = TeaLeaf::parse("b: NaN").unwrap();
4104        let text = tl.to_tl_with_schemas();
4105        let reparsed = TeaLeaf::parse(&text).unwrap();
4106        let orig = tl.data.get("b").unwrap();
4107        let re = reparsed.data.get("b").unwrap();
4108        match (orig, re) {
4109            (Value::Float(a), Value::Float(b)) => {
4110                assert_eq!(a.to_bits(), b.to_bits(), "NaN roundtrip failed");
4111            }
4112            _ => panic!("expected Float, got {:?} / {:?}", orig, re),
4113        }
4114
4115        // 5e550 and -5e550 are now JsonNumber (overflow f64)
4116        for input in &["b: 5e550", "b: -5e550"] {
4117            let tl = TeaLeaf::parse(input).unwrap();
4118            let text = tl.to_tl_with_schemas();
4119            let reparsed = TeaLeaf::parse(&text).unwrap();
4120            let orig = tl.data.get("b").unwrap();
4121            let re = reparsed.data.get("b").unwrap();
4122            match (orig, re) {
4123                (Value::JsonNumber(a), Value::JsonNumber(b)) => {
4124                    assert_eq!(a, b, "JsonNumber roundtrip failed for {}", input);
4125                }
4126                _ => panic!("expected JsonNumber, got {:?} / {:?}", orig, re),
4127            }
4128        }
4129    }
4130
4131    #[test]
4132    fn test_needs_quoting_bare_sign() {
4133        assert!(needs_quoting("-"));
4134        assert!(needs_quoting("+"));
4135        assert!(needs_quoting("--"));
4136        assert!(needs_quoting("-foo"));
4137        assert!(needs_quoting("+bar"));
4138        assert!(needs_quoting("-1")); // negative number
4139        assert!(needs_quoting("+1")); // positive number
4140        assert!(needs_quoting("\u{0660}")); // Arabic-Indic digit zero
4141        assert!(!needs_quoting("hello"));
4142        assert!(!needs_quoting("foo-bar"));
4143    }
4144
4145    #[test]
4146    fn test_fuzz_crash_nan_string_needs_quoting() {
4147        // Regression: fuzz_parse/fuzz_serialize crash — string "NaN" must be quoted
4148        // to avoid re-parsing as Float(NaN).
4149        assert!(needs_quoting("NaN"));
4150        assert!(needs_quoting("inf"));
4151        assert!(needs_quoting("Infinity"));
4152
4153        // Roundtrip: String("NaN") must survive parse → dumps → re-parse
4154        for word in &["NaN", "inf", "Infinity"] {
4155            let input = format!("a: \"{}\"", word);
4156            let tl = TeaLeaf::parse(&input).unwrap();
4157            assert!(matches!(tl.get("a"), Some(Value::String(_))));
4158            let text = dumps(&tl.data);
4159            let reparsed = TeaLeaf::parse(&text).unwrap();
4160            assert_eq!(
4161                reparsed.get("a").unwrap().as_str(),
4162                Some(*word),
4163                "roundtrip failed for string {:?}",
4164                word,
4165            );
4166        }
4167    }
4168
4169    #[test]
4170    fn test_json_any_type_compile_roundtrip() {
4171        // Regression: from_json_with_schemas infers "any" for fields whose nested objects
4172        // don't match a schema. encode_typed_value must fall back to generic encoding
4173        // instead of erroring with "requires a schema for encoding".
4174        use tempfile::NamedTempFile;
4175
4176        let json = r#"[
4177            {"name": "alice", "meta": {"x": 1}},
4178            {"name": "bob",   "meta": {"y": "two", "z": true}}
4179        ]"#;
4180        let doc = TeaLeaf::from_json_with_schemas(json).unwrap();
4181        // "meta" has varying shapes → inferred as "any"
4182        let temp = NamedTempFile::new().unwrap();
4183        doc.compile(temp.path(), false).expect("compile with 'any' field must not error");
4184
4185        // Read back and verify data survived
4186        let reader = Reader::open(temp.path()).unwrap();
4187        assert_eq!(reader.keys().len(), doc.data.len());
4188    }
4189
4190    #[test]
4191    fn json_any_array_binary_roundtrip() {
4192        // Regression: []any fields (from JSON inference of heterogeneous arrays inside
4193        // schema-typed objects) caused binary corruption. encode_typed_value wrote
4194        // TLType::Struct as the element type for "any" (the to_tl_type default),
4195        // but the actual data was heterogeneous. The reader then read garbage bytes
4196        // as schema indices, crashing with "schema index N out of bounds".
4197        use tempfile::NamedTempFile;
4198
4199        let json = r#"{
4200            "events": [
4201                {
4202                    "id": "E1",
4203                    "type": "sale",
4204                    "data": ["SKU-100", 3, 29.99, true],
4205                    "tags": ["flash", "online"]
4206                },
4207                {
4208                    "id": "E2",
4209                    "type": "return",
4210                    "data": ["SKU-200", 1, 15.0, false],
4211                    "tags": ["in-store"]
4212                }
4213            ]
4214        }"#;
4215        let doc = TeaLeaf::from_json_with_schemas(json).unwrap();
4216
4217        // Verify inference: "data" should be []any (heterogeneous), "tags" should be []string
4218        let event_schema = doc.schemas.get("event").expect("missing 'event' schema");
4219        let data_field = event_schema.fields.iter().find(|f| f.name == "data").unwrap();
4220        assert!(data_field.field_type.is_array, "data should be array");
4221        assert_eq!(data_field.field_type.base, "any", "data should be []any, got []{}", data_field.field_type.base);
4222
4223        // Compile to binary
4224        let temp = NamedTempFile::new().unwrap();
4225        doc.compile(temp.path(), false).expect("compile must not error");
4226
4227        // Read back and verify full data integrity
4228        let reader = Reader::open(temp.path()).unwrap();
4229        let events_val = reader.get("events").expect("missing 'events' key");
4230        let events = events_val.as_array().expect("events should be array");
4231        assert_eq!(events.len(), 2, "should have 2 events");
4232
4233        // Verify first event's heterogeneous data array
4234        let e1 = events[0].as_object().expect("event should be object");
4235        assert_eq!(e1.get("id").unwrap().as_str(), Some("E1"));
4236        let data1 = e1.get("data").unwrap().as_array().expect("data should be array");
4237        assert_eq!(data1.len(), 4);
4238        assert_eq!(data1[0].as_str(), Some("SKU-100"));
4239        assert_eq!(data1[2].as_float(), Some(29.99));
4240    }
4241
4242    #[test]
4243    fn retail_orders_json_binary_roundtrip() {
4244        // End-to-end: retail_orders.json → infer schemas → compile → read → JSON
4245        // Exercises the full path that was missing from the test suite: complex
4246        // real-world JSON with heterogeneous arrays ([]any) inside schema-typed objects.
4247        use tempfile::NamedTempFile;
4248
4249        let fixture = std::path::Path::new(env!("CARGO_MANIFEST_DIR"))
4250            .join("tests/fixtures/retail_orders.json");
4251        let json = std::fs::read_to_string(&fixture)
4252            .unwrap_or_else(|e| panic!("read fixture {}: {e}", fixture.display()));
4253
4254        let doc = TeaLeaf::from_json_with_schemas(&json).unwrap();
4255        let temp = NamedTempFile::new().unwrap();
4256        doc.compile(temp.path(), true).expect("compile retail_orders must not error");
4257
4258        // Read binary back to JSON and compare
4259        let reader = Reader::open(temp.path()).unwrap();
4260        let keys = reader.keys();
4261        assert_eq!(keys.len(), 5, "expected 5 top-level keys, got {keys:?}");
4262
4263        // Verify all sections are readable and have correct element counts
4264        let orders_val = reader.get("orders").unwrap();
4265        let orders = orders_val.as_array().expect("orders");
4266        assert_eq!(orders.len(), 10, "expected 10 orders");
4267
4268        let products_val = reader.get("products").unwrap();
4269        let products = products_val.as_array().expect("products");
4270        assert_eq!(products.len(), 4, "expected 4 products");
4271
4272        let customers_val = reader.get("customers").unwrap();
4273        let customers = customers_val.as_array().expect("customers");
4274        assert_eq!(customers.len(), 3, "expected 3 customers");
4275
4276        // Spot-check: first order preserves heterogeneous fields
4277        let order1 = orders[0].as_object().expect("order should be object");
4278        assert_eq!(order1.get("order_id").unwrap().as_str(), Some("ORD-2024-00001"));
4279        let items = order1.get("items").unwrap().as_array().expect("items");
4280        assert_eq!(items.len(), 3, "first order should have 3 items");
4281    }
4282
4283    #[test]
4284    fn fuzz_repro_json_schema_bool_field_name() {
4285        // Fuzz crash: field named "bool" conflicts with type keyword
4286        let input = r#"[{"bool":{"b":2}}]"#;
4287        let tl = TeaLeaf::from_json_with_schemas(input).unwrap();
4288        let tl_text = tl.to_tl_with_schemas();
4289        let reparsed = TeaLeaf::parse(&tl_text)
4290            .unwrap_or_else(|e| panic!("Re-parse failed: {e}\nTL text:\n{tl_text}"));
4291        assert_eq!(tl.data.len(), reparsed.data.len(), "key count mismatch");
4292        for (key, orig_val) in &tl.data {
4293            let re_val = reparsed.data.get(key).unwrap_or_else(|| panic!("lost key '{key}'"));
4294            assert_eq!(orig_val, re_val, "value mismatch for key '{key}'");
4295        }
4296    }
4297
4298    /// Helper: verify that a JSON field named after a built-in type correctly
4299    /// round-trips through TL text when schema inference is used.
4300    fn assert_builtin_name_text_roundtrip(type_name: &str, inner_json: &str) {
4301        let input = format!(r#"[{{"{type_name}":{inner_json}}}]"#);
4302        let tl = TeaLeaf::from_json_with_schemas(&input)
4303            .unwrap_or_else(|e| panic!("[{type_name}] from_json_with_schemas failed: {e}"));
4304        let tl_text = tl.to_tl_with_schemas();
4305
4306        // The schema should appear in the text output
4307        assert!(
4308            tl_text.contains(&format!("@struct {type_name}")),
4309            "[{type_name}] expected @struct {type_name} in TL text:\n{tl_text}"
4310        );
4311
4312        let reparsed = TeaLeaf::parse(&tl_text)
4313            .unwrap_or_else(|e| panic!("[{type_name}] re-parse failed: {e}\nTL text:\n{tl_text}"));
4314
4315        assert_eq!(
4316            tl.data.len(), reparsed.data.len(),
4317            "[{type_name}] key count mismatch"
4318        );
4319        for (key, orig_val) in &tl.data {
4320            let re_val = reparsed.data.get(key)
4321                .unwrap_or_else(|| panic!("[{type_name}] lost key '{key}'"));
4322            assert_eq!(orig_val, re_val, "[{type_name}] value mismatch for key '{key}'");
4323        }
4324    }
4325
4326    #[test]
4327    fn schema_name_shadows_builtin_bool() {
4328        assert_builtin_name_text_roundtrip("bool", r#"{"x":1}"#);
4329    }
4330
4331    #[test]
4332    fn schema_name_shadows_builtin_int() {
4333        // Inner value is a string so field type "string" doesn't collide with schema "int"
4334        assert_builtin_name_text_roundtrip("int", r#"{"x":"hello"}"#);
4335    }
4336
4337    #[test]
4338    fn schema_name_shadows_builtin_int8() {
4339        assert_builtin_name_text_roundtrip("int8", r#"{"x":"hello"}"#);
4340    }
4341
4342    #[test]
4343    fn schema_name_shadows_builtin_int16() {
4344        assert_builtin_name_text_roundtrip("int16", r#"{"x":"hello"}"#);
4345    }
4346
4347    #[test]
4348    fn schema_name_shadows_builtin_int32() {
4349        assert_builtin_name_text_roundtrip("int32", r#"{"x":"hello"}"#);
4350    }
4351
4352    #[test]
4353    fn schema_name_shadows_builtin_int64() {
4354        assert_builtin_name_text_roundtrip("int64", r#"{"x":"hello"}"#);
4355    }
4356
4357    #[test]
4358    fn schema_name_shadows_builtin_uint() {
4359        assert_builtin_name_text_roundtrip("uint", r#"{"x":"hello"}"#);
4360    }
4361
4362    #[test]
4363    fn schema_name_shadows_builtin_uint8() {
4364        assert_builtin_name_text_roundtrip("uint8", r#"{"x":"hello"}"#);
4365    }
4366
4367    #[test]
4368    fn schema_name_shadows_builtin_uint16() {
4369        assert_builtin_name_text_roundtrip("uint16", r#"{"x":"hello"}"#);
4370    }
4371
4372    #[test]
4373    fn schema_name_shadows_builtin_uint32() {
4374        assert_builtin_name_text_roundtrip("uint32", r#"{"x":"hello"}"#);
4375    }
4376
4377    #[test]
4378    fn schema_name_shadows_builtin_uint64() {
4379        assert_builtin_name_text_roundtrip("uint64", r#"{"x":"hello"}"#);
4380    }
4381
4382    #[test]
4383    fn schema_name_shadows_builtin_float() {
4384        assert_builtin_name_text_roundtrip("float", r#"{"x":1}"#);
4385    }
4386
4387    #[test]
4388    fn schema_name_shadows_builtin_float32() {
4389        assert_builtin_name_text_roundtrip("float32", r#"{"x":1}"#);
4390    }
4391
4392    #[test]
4393    fn schema_name_shadows_builtin_float64() {
4394        assert_builtin_name_text_roundtrip("float64", r#"{"x":1}"#);
4395    }
4396
4397    #[test]
4398    fn schema_name_shadows_builtin_string() {
4399        assert_builtin_name_text_roundtrip("string", r#"{"x":1}"#);
4400    }
4401
4402    // Note: "bytes" is not tested via JSON inference because singularize("bytes") = "byte"
4403    // which is NOT a built-in type. The direct TL-parsing test below covers "bytes" as a
4404    // schema name.
4405
4406    #[test]
4407    fn schema_name_shadows_builtin_timestamp() {
4408        assert_builtin_name_text_roundtrip("timestamp", r#"{"x":1}"#);
4409    }
4410
4411    /// Test built-in type names as schemas via direct TL text parsing (not JSON inference).
4412    /// This covers names that can't arise through singularization (like "bytes").
4413    #[test]
4414    fn schema_name_shadows_builtin_direct_tl_parse() {
4415        let test_cases = &[
4416            // (TL text, expected field name, expected inner value)
4417            (
4418                "@struct bytes (x: int)\n@struct root (data: bytes)\nroot: @table root [\n  ((42))\n]",
4419                "data",
4420                Value::Object(IndexMap::from([
4421                    ("x".to_string(), Value::Int(42)),
4422                ])),
4423            ),
4424            (
4425                "@struct bool (a: int, b: string)\n@struct root (flag: bool)\nroot: @table root [\n  ((1, hello))\n]",
4426                "flag",
4427                Value::Object(IndexMap::from([
4428                    ("a".to_string(), Value::Int(1)),
4429                    ("b".to_string(), Value::String("hello".into())),
4430                ])),
4431            ),
4432        ];
4433
4434        for (tl_text, field_name, expected_val) in test_cases {
4435            let doc = TeaLeaf::parse(tl_text)
4436                .unwrap_or_else(|e| panic!("parse failed for field '{field_name}': {e}\n{tl_text}"));
4437
4438            let root_arr = doc.data.get("root").expect("missing 'root' key");
4439            if let Value::Array(arr) = root_arr {
4440                if let Value::Object(obj) = &arr[0] {
4441                    let actual = obj.get(*field_name)
4442                        .unwrap_or_else(|| panic!("missing field '{field_name}'"));
4443                    assert_eq!(actual, expected_val, "mismatch for field '{field_name}'");
4444                } else {
4445                    panic!("expected Object, got {:?}", arr[0]);
4446                }
4447            } else {
4448                panic!("expected Array, got {:?}", root_arr);
4449            }
4450        }
4451    }
4452
4453    /// Self-referencing case: @struct int (x: int) where the inner field type
4454    /// matches the schema name. The LParen guard ensures `x: int` resolves to
4455    /// primitive int (next token is a literal, not `(`).
4456    #[test]
4457    fn schema_name_shadows_builtin_self_referencing() {
4458        // JSON: [{"int": {"x": 1}}] — creates @struct int (x: int)
4459        // The inner field "x: int" must resolve to primitive int, not struct "int"
4460        let input = r#"[{"int":{"x":1}}]"#;
4461        let tl = TeaLeaf::from_json_with_schemas(input).unwrap();
4462        let tl_text = tl.to_tl_with_schemas();
4463
4464        assert!(tl_text.contains("@struct int"), "expected @struct int in:\n{tl_text}");
4465
4466        let reparsed = TeaLeaf::parse(&tl_text)
4467            .unwrap_or_else(|e| panic!("re-parse failed: {e}\nTL text:\n{tl_text}"));
4468
4469        for (key, orig_val) in &tl.data {
4470            let re_val = reparsed.data.get(key)
4471                .unwrap_or_else(|| panic!("lost key '{key}'"));
4472            assert_eq!(orig_val, re_val, "value mismatch for key '{key}'");
4473        }
4474    }
4475
4476    /// Self-referencing: @struct int (int: int) — field name AND type both "int"
4477    #[test]
4478    fn schema_name_shadows_builtin_self_ref_same_field_name() {
4479        let tl_text = "\
4480@struct int (int: int)
4481@struct root (val: int)
4482
4483root: @table root [
4484  ((42))
4485]
4486";
4487        let doc = TeaLeaf::parse(tl_text)
4488            .unwrap_or_else(|e| panic!("parse failed: {e}\nTL text:\n{tl_text}"));
4489
4490        let json = doc.to_json().unwrap();
4491        eprintln!("=== JSON ===\n{json}");
4492
4493        // The root array should have one element with field "val" as an Object
4494        let root_arr = doc.data.get("root").expect("missing 'root'");
4495        if let Value::Array(arr) = root_arr {
4496            if let Value::Object(obj) = &arr[0] {
4497                let val = obj.get("val").expect("missing field 'val'");
4498                // val should be Object({"int": Int(42)}) — struct "int" with field "int" = 42
4499                assert_eq!(
4500                    val,
4501                    &Value::Object(IndexMap::from([
4502                        ("int".to_string(), Value::Int(42)),
4503                    ])),
4504                    "expected struct instance, got {val:?}"
4505                );
4506            } else {
4507                panic!("expected Object, got {:?}", arr[0]);
4508            }
4509        } else {
4510            panic!("expected Array, got {root_arr:?}");
4511        }
4512    }
4513
4514    /// Duplicate @struct declarations: second overwrites first
4515    #[test]
4516    fn schema_name_shadows_builtin_duplicate_struct_decl() {
4517        let tl_text = "\
4518@struct int (x: int)
4519@struct int (int: int)
4520@struct root (val: int)
4521
4522root: @table root [
4523  ((42))
4524]
4525";
4526        let result = TeaLeaf::parse(tl_text);
4527        match &result {
4528            Ok(doc) => {
4529                let json = doc.to_json().unwrap();
4530                eprintln!("=== JSON ===\n{json}");
4531                eprintln!("=== schemas ===");
4532                for (name, schema) in &doc.schemas {
4533                    let fields: Vec<String> = schema.fields.iter()
4534                        .map(|f| format!("{}: {}", f.name, f.field_type.base))
4535                        .collect();
4536                    eprintln!("  @struct {name} ({})", fields.join(", "));
4537                }
4538            }
4539            Err(e) => {
4540                eprintln!("=== parse error ===\n{e}");
4541            }
4542        }
4543        // Assert that parsing succeeds
4544        result.unwrap();
4545    }
4546
4547    /// Multiple built-in-named schemas in the same document
4548    #[test]
4549    fn schema_name_shadows_multiple_builtins() {
4550        let input = r#"[{"bool":{"a":1},"int":{"b":"hello"},"float":{"c":true}}]"#;
4551        let tl = TeaLeaf::from_json_with_schemas(input).unwrap();
4552        let tl_text = tl.to_tl_with_schemas();
4553
4554        assert!(tl_text.contains("@struct bool"), "missing @struct bool");
4555        assert!(tl_text.contains("@struct int"), "missing @struct int");
4556        assert!(tl_text.contains("@struct float"), "missing @struct float");
4557
4558        let reparsed = TeaLeaf::parse(&tl_text)
4559            .unwrap_or_else(|e| panic!("re-parse failed: {e}\nTL text:\n{tl_text}"));
4560
4561        for (key, orig_val) in &tl.data {
4562            let re_val = reparsed.data.get(key)
4563                .unwrap_or_else(|| panic!("lost key '{key}'"));
4564            assert_eq!(orig_val, re_val, "value mismatch for key '{key}'");
4565        }
4566    }
4567
4568
4569    /// Fuzz crash: singularize("s") → "" (empty string), producing invalid
4570    /// @struct definitions with missing names.
4571    #[test]
4572    fn fuzz_repro_singularize_single_char_s() {
4573        let input = r#"[{"s":{"b":1}}]"#;
4574        let tl = TeaLeaf::from_json_with_schemas(input).unwrap();
4575        let tl_text = tl.to_tl_with_schemas();
4576
4577        // Schema name must not be empty — singularize("s") should return "s"
4578        assert!(
4579            tl_text.contains("@struct s"),
4580            "expected @struct s in TL text:\n{tl_text}"
4581        );
4582
4583        let reparsed = TeaLeaf::parse(&tl_text)
4584            .unwrap_or_else(|e| panic!("Re-parse failed: {e}\nTL text:\n{tl_text}"));
4585        assert_eq!(tl.data.len(), reparsed.data.len(), "key count mismatch");
4586        for (key, orig_val) in &tl.data {
4587            let re_val = reparsed.data.get(key).unwrap_or_else(|| panic!("lost key '{key}'"));
4588            assert_eq!(orig_val, re_val, "value mismatch for key '{key}'");
4589        }
4590    }
4591
4592    #[test]
4593    fn singularize_does_not_produce_empty_string() {
4594        // All single-character inputs must pass through unchanged
4595        for c in 'a'..='z' {
4596            let s = String::from(c);
4597            let result = super::singularize(&s);
4598            assert!(!result.is_empty(), "singularize({s:?}) produced empty string");
4599            assert_eq!(result, s, "singularize({s:?}) should return {s:?}, got {result:?}");
4600        }
4601    }
4602
4603    /// Fuzz crash: field name with dots causes value mismatch on roundtrip
4604    #[test]
4605    fn fuzz_repro_dots_in_field_name() {
4606        // Fuzz regression: field "root" inside root-array wrapper both singularize to "root",
4607        // causing analyze_nested_objects to create a correct inner schema that analyze_array
4608        // then overwrites with a self-referencing @struct root (root: root).
4609        let input = r#"[{"root":{"Z.lll.i0...A":44444440.0}}]"#;
4610        let tl = TeaLeaf::from_json_with_schemas(input).unwrap();
4611        let tl_text = tl.to_tl_with_schemas();
4612        let reparsed = TeaLeaf::parse(&tl_text)
4613            .unwrap_or_else(|e| panic!("Re-parse failed: {e}\nTL text:\n{tl_text}"));
4614        assert_eq!(tl.data.len(), reparsed.data.len(), "key count mismatch");
4615        for (key, orig_val) in &tl.data {
4616            let re_val = reparsed.data.get(key).unwrap_or_else(|| panic!("lost key '{key}'"));
4617            assert_eq!(orig_val, re_val, "value mismatch for key '{key}'");
4618        }
4619    }
4620
4621    #[test]
4622    fn schema_name_collision_field_matches_parent() {
4623        // When an array field name singularizes to the same name as its parent schema,
4624        // the inner schema should be preserved (not overwritten with a self-reference).
4625        // This tests the general case, not just the root-array wrapper collision.
4626        let input = r#"{"items": [{"items": {"a": 1, "b": 2}}]}"#;
4627        let tl = TeaLeaf::from_json_with_schemas(input).unwrap();
4628        let tl_text = tl.to_tl_with_schemas();
4629        let reparsed = TeaLeaf::parse(&tl_text)
4630            .unwrap_or_else(|e| panic!("Re-parse failed: {e}\nTL text:\n{tl_text}"));
4631        for (key, orig_val) in &tl.data {
4632            let re_val = reparsed.data.get(key).unwrap_or_else(|| panic!("lost key '{key}'"));
4633            assert_eq!(orig_val, re_val, "value mismatch for key '{key}'");
4634        }
4635    }
4636
4637    #[test]
4638    fn analyze_node_nesting_stress_test() {
4639        // Stress test: "node" appears at many nesting levels with different shapes.
4640        // Schema inference should NOT create conflicting schemas or lose data.
4641        let input = r#"{
4642          "node": {
4643            "id": 1,
4644            "name": "root",
4645            "active": true,
4646            "node": {
4647              "id": "child-1",
4648              "metrics": {
4649                "node": {
4650                  "value": 42.7,
4651                  "unit": "ms",
4652                  "thresholds": [10, 20, 30]
4653                }
4654              },
4655              "node": [
4656                {
4657                  "id": 2,
4658                  "enabled": false
4659                },
4660                {
4661                  "id": 3,
4662                  "enabled": "sometimes",
4663                  "node": {
4664                    "status": null,
4665                    "confidence": 0.93
4666                  }
4667                }
4668              ]
4669            }
4670          },
4671          "nodeMetadata": {
4672            "node": {
4673              "version": 5,
4674              "checksum": "a94a8fe5ccb19ba61c4c0873d391e987",
4675              "flags": {
4676                "node": true
4677              }
4678            }
4679          }
4680        }"#;
4681
4682        let tl = TeaLeaf::from_json_with_schemas(input).unwrap();
4683        eprintln!("=== schemas ({}) ===", tl.schemas.len());
4684        for (name, schema) in &tl.schemas {
4685            let fields: Vec<String> = schema.fields.iter()
4686                .map(|f| format!("{}: {}{}{}", f.name, f.field_type.base,
4687                    if f.field_type.is_array { "[]" } else { "" },
4688                    if f.field_type.nullable { "?" } else { "" }))
4689                .collect();
4690            eprintln!("  @struct {name} ({})", fields.join(", "));
4691        }
4692        let tl_text = tl.to_tl_with_schemas();
4693        eprintln!("=== TL text ===\n{tl_text}");
4694
4695        // Core correctness check: round-trip must preserve all data
4696        let reparsed = TeaLeaf::parse(&tl_text)
4697            .unwrap_or_else(|e| panic!("Re-parse failed: {e}\nTL text:\n{tl_text}"));
4698        for (key, orig_val) in &tl.data {
4699            let re_val = reparsed.data.get(key).unwrap_or_else(|| panic!("lost key '{key}'"));
4700            assert_eq!(orig_val, re_val, "value mismatch for key '{key}'");
4701        }
4702    }
4703
4704    #[test]
4705    fn schema_collision_recursive_arrays() {
4706        // "nodes" appears as arrays at two levels with different shapes.
4707        // Inner: [{name, value}], Outer: [{name, nodes}]
4708        // Both singularize to "node" — only one schema can exist.
4709        let input = r#"{
4710          "nodes": [
4711            {
4712              "name": "parent",
4713              "nodes": [
4714                {"name": "child", "value": 42}
4715              ]
4716            }
4717          ]
4718        }"#;
4719        let tl = TeaLeaf::from_json_with_schemas(input).unwrap();
4720        eprintln!("=== schemas ({}) ===", tl.schemas.len());
4721        for (name, schema) in &tl.schemas {
4722            let fields: Vec<String> = schema.fields.iter()
4723                .map(|f| format!("{}: {}{}{}", f.name, f.field_type.base,
4724                    if f.field_type.is_array { "[]" } else { "" },
4725                    if f.field_type.nullable { "?" } else { "" }))
4726                .collect();
4727            eprintln!("  @struct {name} ({})", fields.join(", "));
4728        }
4729        let tl_text = tl.to_tl_with_schemas();
4730        eprintln!("=== TL text ===\n{tl_text}");
4731        let reparsed = TeaLeaf::parse(&tl_text)
4732            .unwrap_or_else(|e| panic!("Re-parse failed: {e}\nTL text:\n{tl_text}"));
4733        for (key, orig_val) in &tl.data {
4734            let re_val = reparsed.data.get(key).unwrap_or_else(|| panic!("lost key '{key}'"));
4735            assert_eq!(orig_val, re_val, "value mismatch for key '{key}'");
4736        }
4737    }
4738
4739    #[test]
4740    fn schema_collision_recursive_same_shape() {
4741        // "nodes" appears at two levels but SAME shape [{id, name}].
4742        // Schema "node" created for inner array should also work for outer.
4743        let input = r#"{
4744          "nodes": [
4745            {
4746              "id": 1,
4747              "name": "parent",
4748              "children": [
4749                {"id": 10, "name": "child-a"},
4750                {"id": 11, "name": "child-b"}
4751              ]
4752            },
4753            {
4754              "id": 2,
4755              "name": "sibling",
4756              "children": [
4757                {"id": 20, "name": "child-c"}
4758              ]
4759            }
4760          ]
4761        }"#;
4762        let tl = TeaLeaf::from_json_with_schemas(input).unwrap();
4763        eprintln!("=== schemas ({}) ===", tl.schemas.len());
4764        for (name, schema) in &tl.schemas {
4765            let fields: Vec<String> = schema.fields.iter()
4766                .map(|f| format!("{}: {}{}{}", f.name, f.field_type.base,
4767                    if f.field_type.is_array { "[]" } else { "" },
4768                    if f.field_type.nullable { "?" } else { "" }))
4769                .collect();
4770            eprintln!("  @struct {name} ({})", fields.join(", "));
4771        }
4772        let tl_text = tl.to_tl_with_schemas();
4773        eprintln!("=== TL text ===\n{tl_text}");
4774        let reparsed = TeaLeaf::parse(&tl_text)
4775            .unwrap_or_else(|e| panic!("Re-parse failed: {e}\nTL text:\n{tl_text}"));
4776        for (key, orig_val) in &tl.data {
4777            let re_val = reparsed.data.get(key).unwrap_or_else(|| panic!("lost key '{key}'"));
4778            assert_eq!(orig_val, re_val, "value mismatch for key '{key}'");
4779        }
4780    }
4781
4782    #[test]
4783    fn schema_collision_three_level_nesting() {
4784        // "nodes" at 3 levels: L1 and L2 have same shape {name, nodes},
4785        // L3 has different shape {name, score}. All singularize to "node".
4786        // The deepest schema wins (depth-first); outer levels fall back to
4787        // generic format. No data loss at any level.
4788        let input = r#"{
4789          "nodes": [
4790            {
4791              "name": "grandparent",
4792              "nodes": [
4793                {
4794                  "name": "parent",
4795                  "nodes": [
4796                    {"name": "leaf-a", "score": 99.5},
4797                    {"name": "leaf-b", "score": 42.0}
4798                  ]
4799                }
4800              ]
4801            },
4802            {
4803              "name": "uncle",
4804              "nodes": [
4805                {
4806                  "name": "cousin",
4807                  "nodes": [
4808                    {"name": "leaf-c", "score": 77.3}
4809                  ]
4810                }
4811              ]
4812            }
4813          ]
4814        }"#;
4815
4816        let tl = TeaLeaf::from_json_with_schemas(input).unwrap();
4817        eprintln!("=== schemas ({}) ===", tl.schemas.len());
4818        for (name, schema) in &tl.schemas {
4819            let fields: Vec<String> = schema.fields.iter()
4820                .map(|f| format!("{}: {}{}{}", f.name, f.field_type.base,
4821                    if f.field_type.is_array { "[]" } else { "" },
4822                    if f.field_type.nullable { "?" } else { "" }))
4823                .collect();
4824            eprintln!("  @struct {name} ({})", fields.join(", "));
4825        }
4826        let tl_text = tl.to_tl_with_schemas();
4827        eprintln!("=== TL text ===\n{tl_text}");
4828
4829        let reparsed = TeaLeaf::parse(&tl_text)
4830            .unwrap_or_else(|e| panic!("Re-parse failed: {e}\nTL text:\n{tl_text}"));
4831        for (key, orig_val) in &tl.data {
4832            let re_val = reparsed.data.get(key).unwrap_or_else(|| panic!("lost key '{key}'"));
4833            assert_eq!(orig_val, re_val, "value mismatch for key '{key}'");
4834        }
4835    }
4836
4837    #[test]
4838    fn schema_collision_three_level_divergent_leaves() {
4839        // L1: [{name, nodes}], L2: [{name, nodes}] (same shape),
4840        // L3: [{id, value}] in one branch, [{identifier, points}] in another.
4841        // The depth-first analysis only sees the first branch's L3 shape.
4842        // The second branch's L3 must fall back to generic format.
4843        let input = r#"{
4844          "nodes": [
4845            {
4846              "name": "grandparent",
4847              "nodes": [
4848                {
4849                  "name": "parent",
4850                  "nodes": [
4851                    {"id": "leaf-a", "value": 99.5},
4852                    {"id": "leaf-b", "value": 42.0}
4853                  ]
4854                }
4855              ]
4856            },
4857            {
4858              "name": "uncle",
4859              "nodes": [
4860                {
4861                  "name": "cousin",
4862                  "nodes": [
4863                    {"identifier": "leaf-c", "points": 77.3}
4864                  ]
4865                }
4866              ]
4867            }
4868          ]
4869        }"#;
4870
4871        let tl = TeaLeaf::from_json_with_schemas(input).unwrap();
4872        eprintln!("=== schemas ({}) ===", tl.schemas.len());
4873        for (name, schema) in &tl.schemas {
4874            let fields: Vec<String> = schema.fields.iter()
4875                .map(|f| format!("{}: {}{}{}", f.name, f.field_type.base,
4876                    if f.field_type.is_array { "[]" } else { "" },
4877                    if f.field_type.nullable { "?" } else { "" }))
4878                .collect();
4879            eprintln!("  @struct {name} ({})", fields.join(", "));
4880        }
4881        let tl_text = tl.to_tl_with_schemas();
4882        eprintln!("=== TL text ===\n{tl_text}");
4883
4884        let reparsed = TeaLeaf::parse(&tl_text)
4885            .unwrap_or_else(|e| panic!("Re-parse failed: {e}\nTL text:\n{tl_text}"));
4886        for (key, orig_val) in &tl.data {
4887            let re_val = reparsed.data.get(key).unwrap_or_else(|| panic!("lost key '{key}'"));
4888            assert_eq!(orig_val, re_val, "value mismatch for key '{key}'");
4889        }
4890    }
4891
4892    #[test]
4893    fn json_inference_nested_array_inside_object() {
4894        // JSON inference must discover array schemas inside nested objects.
4895        // e.g., items[].product.stock[] should get its own @struct stock schema,
4896        // not fall back to []any.
4897        let input = r#"{
4898          "items": [
4899            {
4900              "name": "Widget",
4901              "product": {
4902                "id": "P-1",
4903                "stock": [
4904                  {"warehouse": "W1", "qty": 100, "backordered": false},
4905                  {"warehouse": "W2", "qty": 50, "backordered": true}
4906                ]
4907              }
4908            },
4909            {
4910              "name": "Gadget",
4911              "product": {
4912                "id": "P-2",
4913                "stock": [
4914                  {"warehouse": "W1", "qty": 200, "backordered": false}
4915                ]
4916              }
4917            }
4918          ]
4919        }"#;
4920
4921        let tl = TeaLeaf::from_json_with_schemas(input).unwrap();
4922        let tl_text = tl.to_tl_with_schemas();
4923
4924        // Must have a "stock" schema (from singularize("stock") = "stock")
4925        assert!(tl.schemas.contains_key("stock"),
4926            "Missing 'stock' schema. Schemas: {:?}\nTL:\n{tl_text}",
4927            tl.schemas.keys().collect::<Vec<_>>());
4928
4929        // The product schema must reference stock[] not []any
4930        let product_schema = tl.schemas.get("product").expect("missing product schema");
4931        let stock_field = product_schema.fields.iter().find(|f| f.name == "stock")
4932            .expect("product schema missing stock field");
4933        assert!(stock_field.field_type.is_array, "stock should be array");
4934        assert_eq!(stock_field.field_type.base, "stock",
4935            "stock field type should be 'stock', got '{}'", stock_field.field_type.base);
4936
4937        // Must produce @table for items and tuples for stock inside product
4938        assert!(tl_text.contains("@table item"), "Missing @table item:\n{tl_text}");
4939
4940        // Round-trip: parse back and verify data integrity
4941        let reparsed = TeaLeaf::parse(&tl_text)
4942            .unwrap_or_else(|e| panic!("Re-parse failed: {e}\nTL:\n{tl_text}"));
4943        for (key, orig_val) in &tl.data {
4944            let re_val = reparsed.data.get(key).unwrap_or_else(|| panic!("lost key '{key}'"));
4945            assert_eq!(orig_val, re_val, "value mismatch for key '{key}'");
4946        }
4947    }
4948}