Skip to main content

tealeaf/
lib.rs

1//! TeaLeaf - Schema-aware data format
2//!
3//! # Example
4//!
5//! ```rust
6//! use tealeaf::{TeaLeaf, Value};
7//!
8//! let doc = TeaLeaf::parse(r#"
9//!     @struct user (id: int, name: string)
10//!     users: @table user [
11//!         (1, alice),
12//!         (2, bob),
13//!     ]
14//! "#).unwrap();
15//!
16//! let users = doc.get("users").unwrap();
17//! ```
18
19mod types;
20mod lexer;
21mod parser;
22mod writer;
23mod reader;
24pub mod convert;
25pub mod builder;
26
27pub use types::{Error, Result, TLType, FieldType, Field, Schema, Union, Variant, Value, ObjectMap, MAGIC, VERSION, VERSION_MAJOR, VERSION_MINOR, HEADER_SIZE, MAX_STRING_LENGTH, MAX_OBJECT_FIELDS, MAX_ARRAY_LENGTH};
28pub use indexmap::IndexMap;
29pub use lexer::{Lexer, Token, TokenKind};
30pub use parser::Parser;
31pub use writer::Writer;
32pub use reader::Reader;
33pub use convert::{ToTeaLeaf, FromTeaLeaf, ConvertError, ToTeaLeafExt};
34pub use builder::TeaLeafBuilder;
35
36// Re-export derive macros when the "derive" feature is enabled
37#[cfg(feature = "derive")]
38pub use tealeaf_derive::{ToTeaLeaf, FromTeaLeaf};
39
40use std::collections::HashSet;
41use std::path::Path;
42
43/// A parsed TeaLeaf document
44pub struct TeaLeaf {
45    pub schemas: IndexMap<String, Schema>,
46    pub unions: IndexMap<String, Union>,
47    pub data: IndexMap<String, Value>,
48    /// Tracks if the source JSON was a root-level array (for round-trip fidelity)
49    is_root_array: bool,
50}
51
52impl TeaLeaf {
53    /// Create a new TeaLeaf document from data and schemas.
54    ///
55    /// This constructor is primarily for programmatic document creation.
56    /// For parsing from formats, use `parse()`, `load()`, or `from_json()`.
57    pub fn new(schemas: IndexMap<String, Schema>, data: IndexMap<String, Value>) -> Self {
58        Self {
59            schemas,
60            unions: IndexMap::new(),
61            data,
62            is_root_array: false,
63        }
64    }
65
66    /// Parse TeaLeaf text format
67    pub fn parse(input: &str) -> Result<Self> {
68        let tokens = Lexer::new(input).tokenize()?;
69        let mut parser = Parser::new(tokens);
70        let data = parser.parse()?;
71        let is_root_array = parser.is_root_array();
72        let (schemas, unions) = parser.into_schemas_and_unions();
73        Ok(Self {
74            schemas,
75            unions,
76            data,
77            is_root_array,
78        })
79    }
80
81    /// Load from text file
82    ///
83    /// Include paths are resolved relative to the loaded file's directory.
84    pub fn load<P: AsRef<Path>>(path: P) -> Result<Self> {
85        let path = path.as_ref();
86        let content = std::fs::read_to_string(path)?;
87        let tokens = Lexer::new(&content).tokenize()?;
88        let mut parser = Parser::new(tokens).with_base_path(path);
89        let data = parser.parse()?;
90        let is_root_array = parser.is_root_array();
91        let (schemas, unions) = parser.into_schemas_and_unions();
92        Ok(Self {
93            schemas,
94            unions,
95            data,
96            is_root_array,
97        })
98    }
99
100    /// Get a value by key
101    pub fn get(&self, key: &str) -> Option<&Value> {
102        self.data.get(key)
103    }
104
105    /// Get a schema by name
106    pub fn schema(&self, name: &str) -> Option<&Schema> {
107        self.schemas.get(name)
108    }
109
110    /// Get a union by name
111    pub fn union(&self, name: &str) -> Option<&Union> {
112        self.unions.get(name)
113    }
114
115    /// Compile to binary format
116    pub fn compile<P: AsRef<Path>>(&self, path: P, compress: bool) -> Result<()> {
117        let mut writer = Writer::new();
118        writer.set_root_array(self.is_root_array);
119        for (_, schema) in &self.schemas {
120            writer.add_schema(schema.clone());
121        }
122        for (_, union_def) in &self.unions {
123            writer.add_union(union_def.clone());
124        }
125        for (key, value) in &self.data {
126            let schema = self.find_schema_for_value(value, key);
127            writer.add_section(key, value, schema)?;
128        }
129        writer.write(path, compress)
130    }
131
132    fn find_schema_for_value(&self, value: &Value, key: &str) -> Option<&Schema> {
133        // Try to find a matching schema for array values
134        if let Value::Array(arr) = value {
135            if arr.is_empty() {
136                // For empty arrays, try name-based matching (singularize key → schema name)
137                let singular = singularize(key);
138                return self.schemas.values().find(|s| s.name.eq_ignore_ascii_case(&singular));
139            }
140
141            // Sample multiple elements: first, middle, last
142            let sample_indices: Vec<usize> = {
143                let mut indices = vec![0];
144                if arr.len() > 2 { indices.push(arr.len() / 2); }
145                if arr.len() > 1 { indices.push(arr.len() - 1); }
146                indices
147            };
148
149            for schema in self.schemas.values() {
150                let all_match = sample_indices.iter().all(|&i| {
151                    if let Some(Value::Object(obj)) = arr.get(i) {
152                        // All required (non-nullable) schema fields must be present
153                        schema.fields.iter().all(|f| {
154                            f.field_type.nullable || obj.contains_key(&f.name)
155                        })
156                        // All obj keys must be schema fields (no extra keys)
157                        && obj.keys().all(|k| schema.fields.iter().any(|f| f.name == *k))
158                    } else {
159                        false
160                    }
161                });
162                if all_match {
163                    return Some(schema);
164                }
165            }
166        }
167        None
168    }
169
170    /// Parse from JSON string.
171    ///
172    /// # Stability Policy
173    ///
174    /// This function follows a **"plain JSON only"** policy:
175    /// - JSON is parsed as-is with **no magic conversion**
176    /// - `{"$ref": "x"}` stays as an Object, NOT a Ref
177    /// - `{"$tag": "ok", "$value": 200}` stays as an Object, NOT a Tagged
178    /// - `"0xcafef00d"` stays as a String, NOT Bytes
179    /// - `"2024-01-15T10:30:00Z"` stays as a String, NOT a Timestamp
180    /// - `[[1, "one"], [2, "two"]]` stays as an Array, NOT a Map
181    ///
182    /// To create special TeaLeaf types, use the text format or binary API directly.
183    ///
184    /// # Number Type Inference
185    ///
186    /// - Integers that fit `i64` → `Value::Int`
187    /// - Large positive integers that fit `u64` → `Value::UInt`
188    /// - Numbers with decimals or scientific notation → `Value::Float`
189    pub fn from_json(json: &str) -> Result<Self> {
190        let json_value: serde_json::Value = serde_json::from_str(json)
191            .map_err(|e| Error::ParseError(format!("Invalid JSON: {}", e)))?;
192
193        let (data, is_root_array) = match json_value {
194            serde_json::Value::Object(obj) => {
195                let map = obj.into_iter()
196                    .map(|(k, v)| (k, json_to_tealeaf_value(v)))
197                    .collect();
198                (map, false)
199            }
200            serde_json::Value::Array(_) => {
201                // Root-level array: store under "root" key but track for round-trip
202                let mut map = IndexMap::new();
203                map.insert("root".to_string(), json_to_tealeaf_value(json_value));
204                (map, true)
205            }
206            _ => {
207                // Other primitives (string, number, bool, null) at root
208                let mut map = IndexMap::new();
209                map.insert("root".to_string(), json_to_tealeaf_value(json_value));
210                (map, false)
211            }
212        };
213
214        Ok(Self {
215            schemas: IndexMap::new(),
216            unions: IndexMap::new(),
217            data,
218            is_root_array,
219        })
220    }
221
222    /// Parse from JSON string with automatic schema inference.
223    ///
224    /// This variant analyzes the JSON structure and automatically:
225    /// - Detects arrays of uniformly-structured objects
226    /// - Infers schema names from parent keys (e.g., "products" → "product")
227    /// - Generates `@struct` definitions for uniform arrays
228    /// - Enables `@table` format output when serialized
229    ///
230    /// Use `to_tl_with_schemas()` to serialize with the inferred schemas.
231    pub fn from_json_with_schemas(json: &str) -> Result<Self> {
232        let doc = Self::from_json(json)?;
233
234        let mut inferrer = SchemaInferrer::new();
235        inferrer.infer(&doc.data);
236        let (schemas, _) = inferrer.into_schemas();
237
238        Ok(Self {
239            schemas,
240            unions: IndexMap::new(),
241            data: doc.data,
242            is_root_array: doc.is_root_array,
243        })
244    }
245
246    /// Serialize to TeaLeaf text format with schemas.
247    ///
248    /// If schemas are present (either from parsing or inference), outputs
249    /// `@struct` definitions and uses `@table` format for matching arrays.
250    ///
251    /// If this document represents a root-level JSON array (from `from_json`),
252    /// the output will include `@root-array` directive for round-trip fidelity.
253    pub fn to_tl_with_schemas(&self) -> String {
254        let mut output = String::new();
255
256        // Emit @root-array directive if this represents a root-level array
257        if self.is_root_array {
258            output.push_str("@root-array\n\n");
259        }
260
261        if self.schemas.is_empty() && self.unions.is_empty() {
262            output.push_str(&dumps(&self.data));
263        } else {
264            // Preserve insertion order from schemas/unions
265            let schema_order: Vec<String> = self.schemas.keys().cloned().collect();
266            let union_order: Vec<String> = self.unions.keys().cloned().collect();
267            output.push_str(&dumps_with_schemas(
268                &self.data, &self.schemas, &schema_order,
269                &self.unions, &union_order,
270            ));
271        }
272
273        output
274    }
275
276    /// Convert to JSON string (pretty-printed).
277    ///
278    /// # Stability Policy - TeaLeaf→JSON Fixed Representations
279    ///
280    /// Special TeaLeaf types serialize to JSON with these **stable formats**:
281    ///
282    /// | TeaLeaf Type | JSON Format                                    |
283    /// |------------|------------------------------------------------|
284    /// | Bytes      | `"0xcafef00d"` (lowercase hex with 0x prefix) |
285    /// | Timestamp  | `"2024-01-15T10:30:00.123Z"` (ISO 8601 UTC)   |
286    /// | Ref        | `{"$ref": "key_name"}`                         |
287    /// | Tagged     | `{"$tag": "tag_name", "$value": <value>}`     |
288    /// | Map        | `[[key1, val1], [key2, val2], ...]`           |
289    /// | Float NaN  | `null` (JSON has no NaN)                       |
290    /// | Float ±Inf | `null` (JSON has no Infinity)                  |
291    ///
292    /// These representations are **contractually stable** and will not change.
293    pub fn to_json(&self) -> Result<String> {
294        // If the source was a root-level array, return it directly (not wrapped in object)
295        if self.is_root_array {
296            if let Some(root_value) = self.data.get("root") {
297                return serde_json::to_string_pretty(&tealeaf_to_json_value(root_value))
298                    .map_err(|e| Error::ParseError(format!("JSON serialization failed: {}", e)));
299            }
300        }
301
302        let json_obj: serde_json::Map<String, serde_json::Value> = self.data
303            .iter()
304            .map(|(k, v)| (k.clone(), tealeaf_to_json_value(v)))
305            .collect();
306
307        serde_json::to_string_pretty(&serde_json::Value::Object(json_obj))
308            .map_err(|e| Error::ParseError(format!("JSON serialization failed: {}", e)))
309    }
310
311    /// Convert to compact JSON string (no pretty printing)
312    pub fn to_json_compact(&self) -> Result<String> {
313        // If the source was a root-level array, return it directly (not wrapped in object)
314        if self.is_root_array {
315            if let Some(root_value) = self.data.get("root") {
316                return serde_json::to_string(&tealeaf_to_json_value(root_value))
317                    .map_err(|e| Error::ParseError(format!("JSON serialization failed: {}", e)));
318            }
319        }
320
321        let json_obj: serde_json::Map<String, serde_json::Value> = self.data
322            .iter()
323            .map(|(k, v)| (k.clone(), tealeaf_to_json_value(v)))
324            .collect();
325
326        serde_json::to_string(&serde_json::Value::Object(json_obj))
327            .map_err(|e| Error::ParseError(format!("JSON serialization failed: {}", e)))
328    }
329
330    /// Set whether the document represents a root-level array.
331    pub fn set_root_array(&mut self, is_root_array: bool) {
332        self.is_root_array = is_root_array;
333    }
334
335    /// Create a TeaLeaf document from a binary Reader.
336    ///
337    /// Reads all sections from the reader and carries schemas and unions through.
338    pub fn from_reader(reader: &Reader) -> Result<Self> {
339        let mut data = IndexMap::new();
340        for key in reader.keys() {
341            data.insert(key.to_string(), reader.get(key)?);
342        }
343        let schemas: IndexMap<String, Schema> = reader.schemas.iter()
344            .map(|s| (s.name.clone(), s.clone()))
345            .collect();
346        let unions: IndexMap<String, Union> = reader.unions.iter()
347            .map(|u| (u.name.clone(), u.clone()))
348            .collect();
349        let mut doc = Self {
350            schemas,
351            unions,
352            data,
353            is_root_array: reader.is_root_array(),
354        };
355        doc.set_root_array(reader.is_root_array());
356        Ok(doc)
357    }
358
359    /// Create a TeaLeaf document from a single DTO.
360    ///
361    /// The DTO is placed under the given `key` in the document data map.
362    /// Schemas are automatically collected from the DTO type.
363    pub fn from_dto<T: convert::ToTeaLeaf>(key: &str, dto: &T) -> Self {
364        let schemas = T::collect_schemas();
365        let unions = T::collect_unions();
366        let mut data = IndexMap::new();
367        data.insert(key.to_string(), dto.to_tealeaf_value());
368        let mut doc = Self::new(schemas, data);
369        doc.unions = unions;
370        doc
371    }
372
373    /// Create a TeaLeaf document from a slice of DTOs.
374    ///
375    /// The array is placed under the given `key` and schemas are
376    /// collected from the element type.
377    pub fn from_dto_array<T: convert::ToTeaLeaf>(key: &str, items: &[T]) -> Self {
378        let schemas = T::collect_schemas();
379        let unions = T::collect_unions();
380        let mut data = IndexMap::new();
381        let arr = Value::Array(items.iter().map(|i| i.to_tealeaf_value()).collect());
382        data.insert(key.to_string(), arr);
383        let mut doc = Self::new(schemas, data);
384        doc.unions = unions;
385        doc
386    }
387
388    /// Extract a DTO from this document by key.
389    pub fn to_dto<T: convert::FromTeaLeaf>(&self, key: &str) -> Result<T> {
390        let value = self
391            .get(key)
392            .ok_or_else(|| Error::MissingField(key.to_string()))?;
393        T::from_tealeaf_value(value).map_err(|e| e.into())
394    }
395
396    /// Extract all values under a key as `Vec<T>`.
397    pub fn to_dto_vec<T: convert::FromTeaLeaf>(&self, key: &str) -> Result<Vec<T>> {
398        let value = self
399            .get(key)
400            .ok_or_else(|| Error::MissingField(key.to_string()))?;
401        let arr = value
402            .as_array()
403            .ok_or_else(|| Error::ParseError("Expected array".into()))?;
404        arr.iter()
405            .map(|v| T::from_tealeaf_value(v).map_err(|e| e.into()))
406            .collect()
407    }
408}
409
410/// Convert JSON value to TeaLeaf value (best-effort)
411fn json_to_tealeaf_value(json: serde_json::Value) -> Value {
412    match json {
413        serde_json::Value::Null => Value::Null,
414        serde_json::Value::Bool(b) => Value::Bool(b),
415        serde_json::Value::Number(n) => {
416            if let Some(i) = n.as_i64() {
417                Value::Int(i)
418            } else if let Some(u) = n.as_u64() {
419                Value::UInt(u)
420            } else {
421                let raw = n.to_string();
422                // Pure integer that doesn't fit i64/u64 → preserve exactly
423                if !raw.contains('.') && !raw.contains('e') && !raw.contains('E') {
424                    Value::JsonNumber(raw)
425                } else {
426                    match n.as_f64() {
427                        Some(f) if f.is_finite() => Value::Float(f),
428                        _ => Value::JsonNumber(raw),
429                    }
430                }
431            }
432        }
433        serde_json::Value::String(s) => Value::String(s),
434        serde_json::Value::Array(arr) => {
435            Value::Array(arr.into_iter().map(json_to_tealeaf_value).collect())
436        }
437        serde_json::Value::Object(obj) => {
438            Value::Object(
439                obj.into_iter()
440                    .map(|(k, v)| (k, json_to_tealeaf_value(v)))
441                    .collect()
442            )
443        }
444    }
445}
446
447/// Convert TeaLeaf value to JSON value
448///
449/// Type preservation:
450/// - Value::Int → JSON integer (e.g., 42)
451/// - Value::UInt → JSON integer (e.g., 18446744073709551615)
452/// - Value::Float → JSON float (e.g., 42.0)
453///
454/// Integer types are tried first during JSON import (i64, then u64) so that
455/// values within 64-bit range stay exact. Only true floats fall through to f64.
456fn tealeaf_to_json_value(tl: &Value) -> serde_json::Value {
457    match tl {
458        Value::Null => serde_json::Value::Null,
459        Value::Bool(b) => serde_json::Value::Bool(*b),
460        Value::Int(i) => serde_json::Value::Number((*i).into()),
461        Value::UInt(u) => serde_json::Value::Number((*u).into()),
462        Value::Float(f) => {
463            // Always output floats as floats - the type distinction is intentional
464            serde_json::Number::from_f64(*f)
465                .map(serde_json::Value::Number)
466                .unwrap_or(serde_json::Value::Null)
467        }
468        Value::String(s) => serde_json::Value::String(s.clone()),
469        Value::Bytes(b) => {
470            // Encode bytes as hex string with 0x prefix
471            let hex: String = b.iter().map(|byte| format!("{:02x}", byte)).collect();
472            serde_json::Value::String(format!("0x{}", hex))
473        }
474        Value::Array(arr) => {
475            serde_json::Value::Array(arr.iter().map(tealeaf_to_json_value).collect())
476        }
477        Value::Object(obj) => {
478            let map: serde_json::Map<String, serde_json::Value> = obj
479                .iter()
480                .map(|(k, v)| (k.clone(), tealeaf_to_json_value(v)))
481                .collect();
482            serde_json::Value::Object(map)
483        }
484        Value::Map(pairs) => {
485            // Convert map to array of [key, value] pairs
486            let arr: Vec<serde_json::Value> = pairs
487                .iter()
488                .map(|(k, v)| {
489                    serde_json::Value::Array(vec![
490                        tealeaf_to_json_value(k),
491                        tealeaf_to_json_value(v),
492                    ])
493                })
494                .collect();
495            serde_json::Value::Array(arr)
496        }
497        Value::Ref(r) => {
498            // Encode ref as object with special key
499            let mut obj = serde_json::Map::new();
500            obj.insert("$ref".to_string(), serde_json::Value::String(r.clone()));
501            serde_json::Value::Object(obj)
502        }
503        Value::Tagged(tag, inner) => {
504            // Encode tagged value as object
505            let mut obj = serde_json::Map::new();
506            obj.insert("$tag".to_string(), serde_json::Value::String(tag.clone()));
507            obj.insert("$value".to_string(), tealeaf_to_json_value(inner));
508            serde_json::Value::Object(obj)
509        }
510        Value::Timestamp(ts, tz) => {
511            serde_json::Value::String(format_timestamp_millis(*ts, *tz))
512        }
513        Value::JsonNumber(s) => {
514            s.parse::<serde_json::Number>()
515                .map(serde_json::Value::Number)
516                .unwrap_or_else(|_| serde_json::Value::String(s.clone()))
517        }
518    }
519}
520
521/// Read a binary TeaLeaf file
522pub fn open<P: AsRef<Path>>(path: P) -> Result<Reader> {
523    Reader::open(path)
524}
525
526/// Parse TeaLeaf text
527pub fn parse(input: &str) -> Result<TeaLeaf> {
528    TeaLeaf::parse(input)
529}
530
531/// Convenience: load and get data
532pub fn loads(input: &str) -> Result<IndexMap<String, Value>> {
533    Ok(TeaLeaf::parse(input)?.data)
534}
535
536/// Convenience: serialize to TeaLeaf text
537/// Check if a string needs quoting when serialized to TeaLeaf format.
538/// Returns true if the string could be misinterpreted as another type.
539fn needs_quoting(s: &str) -> bool {
540    if s.is_empty() {
541        return true;
542    }
543
544    // Reserved words, null literal, and float literals the lexer would interpret
545    if matches!(s, "true" | "false" | "null" | "~" | "NaN" | "inf" | "Infinity") {
546        return true;
547    }
548
549    // Whitelist approach: only allow [a-zA-Z0-9_-.] unquoted (ASCII only).
550    // Matches spec grammar: name = (letter | "_") { letter | digit | "_" | "-" | "." }
551    // Any other character (Unicode digits, whitespace, punctuation, etc.)
552    // requires quoting to ensure safe round-trip through the parser.
553    // Note: '-' is excluded here because strings starting with '-' are caught
554    // by the sign-character check below, and mid-string '-' in identifiers
555    // like "foo-bar" is safe only when the first char is a letter.
556    if s.contains(|c: char| !c.is_ascii_alphanumeric() && c != '_' && c != '-' && c != '.') {
557        return true;
558    }
559
560    // Must start with letter or underscore per grammar: name = (letter | "_") { ... }
561    let first = s.chars().next().unwrap();
562    if !first.is_ascii_alphabetic() && first != '_' {
563        return true;
564    }
565
566    // Starts with 0x/0b (hex/binary literal prefix)
567    if s.starts_with("0x") || s.starts_with("0X") || s.starts_with("0b") || s.starts_with("0B") {
568        return true;
569    }
570
571    // Starts with sign character — always quote to avoid parser ambiguity
572    // (parser may try to interpret as a signed number).
573    if s.starts_with('-') || s.starts_with('+') {
574        return true;
575    }
576
577    // Starts with a digit — could be parsed as a number
578    if first.is_ascii_digit() {
579        return true;
580    }
581
582    false
583}
584
585/// Write a key to the output, quoting if necessary for safe round-trip.
586fn write_key(out: &mut String, key: &str) {
587    if needs_quoting(key) {
588        out.push('"');
589        out.push_str(&escape_string(key));
590        out.push('"');
591    } else {
592        out.push_str(key);
593    }
594}
595
596/// Write a map key per spec grammar: `map_key = string | name | integer`.
597/// Int/UInt are written as-is. String values use `write_key` for quoting.
598/// Other value types (Null, Bool, Float, etc.) are coerced to quoted strings
599/// so that the text format always round-trips through the parser.
600fn write_map_key(out: &mut String, key: &Value) {
601    match key {
602        Value::Int(i) => out.push_str(&i.to_string()),
603        Value::UInt(u) => out.push_str(&u.to_string()),
604        Value::String(s) => write_key(out, s),
605        // Coerce non-spec key types to quoted strings for text format safety
606        Value::Null => out.push_str("\"~\""),
607        Value::Bool(b) => { out.push('"'); out.push_str(if *b { "true" } else { "false" }); out.push('"'); }
608        Value::Float(f) => { out.push('"'); out.push_str(&f.to_string()); out.push('"'); }
609        Value::JsonNumber(s) => { out.push('"'); out.push_str(s); out.push('"'); }
610        Value::Timestamp(ts, tz) => { out.push('"'); out.push_str(&format_timestamp_millis(*ts, *tz)); out.push('"'); }
611        Value::Bytes(b) => {
612            out.push_str("\"0x");
613            for byte in b { out.push_str(&format!("{:02x}", byte)); }
614            out.push('"');
615        }
616        Value::Ref(r) => { out.push('"'); out.push('!'); out.push_str(r); out.push('"'); }
617        Value::Tagged(tag, _) => { out.push('"'); out.push(':'); out.push_str(tag); out.push('"'); }
618        Value::Array(_) | Value::Object(_) | Value::Map(_) => out.push_str("\"\""),
619    }
620}
621
622pub fn dumps(data: &IndexMap<String, Value>) -> String {
623    let mut out = String::new();
624    for (key, value) in data {
625        write_key(&mut out, key);
626        out.push_str(": ");
627        write_value(&mut out, value, 0);
628        out.push('\n');
629    }
630    out
631}
632
633/// Escape a string for TeaLeaf text output.
634/// Handles: \\ \" \n \t \r \b \f and \uXXXX for other control characters.
635fn escape_string(s: &str) -> String {
636    let mut out = String::with_capacity(s.len());
637    for c in s.chars() {
638        match c {
639            '\\' => out.push_str("\\\\"),
640            '"' => out.push_str("\\\""),
641            '\n' => out.push_str("\\n"),
642            '\t' => out.push_str("\\t"),
643            '\r' => out.push_str("\\r"),
644            '\u{0008}' => out.push_str("\\b"),
645            '\u{000C}' => out.push_str("\\f"),
646            c if c.is_control() => {
647                // Other control characters use \uXXXX
648                for unit in c.encode_utf16(&mut [0u16; 2]) {
649                    out.push_str(&format!("\\u{:04x}", unit));
650                }
651            }
652            _ => out.push(c),
653        }
654    }
655    out
656}
657
658/// Format a float ensuring it always has a decimal point or uses scientific notation.
659/// Rust's f64::to_string() expands large/small values (e.g., 6.022e23 becomes
660/// "602200000000000000000000"), which would be reparsed as an integer and overflow.
661/// We use scientific notation for values outside a safe range.
662fn format_float(f: f64) -> String {
663    // Handle non-finite values with keywords the lexer recognizes
664    if f.is_nan() {
665        return "NaN".to_string();
666    }
667    if f.is_infinite() {
668        return if f.is_sign_positive() { "inf".to_string() } else { "-inf".to_string() };
669    }
670
671    let s = f.to_string();
672    if s.contains('.') || s.contains('e') || s.contains('E') {
673        // Already has decimal point or scientific notation — safe as-is
674        s
675    } else {
676        // to_string() produced an integer-looking string (no '.' or 'e').
677        // For large values, use scientific notation to avoid i64 overflow on re-parse.
678        // For small values, just append ".0".
679        let digits = s.trim_start_matches('-').len();
680        if digits > 15 {
681            format!("{:e}", f)
682        } else {
683            format!("{}.0", s)
684        }
685    }
686}
687
688fn write_value(out: &mut String, value: &Value, indent: usize) {
689    match value {
690        Value::Null => out.push('~'),
691        Value::Bool(b) => out.push_str(if *b { "true" } else { "false" }),
692        Value::Int(i) => out.push_str(&i.to_string()),
693        Value::UInt(u) => out.push_str(&u.to_string()),
694        Value::JsonNumber(s) => out.push_str(s),
695        Value::Float(f) => out.push_str(&format_float(*f)),
696        Value::String(s) => {
697            if needs_quoting(s) {
698                out.push('"');
699                out.push_str(&escape_string(s));
700                out.push('"');
701            } else {
702                out.push_str(s);
703            }
704        }
705        Value::Bytes(b) => {
706            out.push_str("b\"");
707            for byte in b {
708                out.push_str(&format!("{:02x}", byte));
709            }
710            out.push('"');
711        }
712        Value::Array(arr) => {
713            out.push('[');
714            for (i, v) in arr.iter().enumerate() {
715                if i > 0 { out.push_str(", "); }
716                write_value(out, v, indent);
717            }
718            out.push(']');
719        }
720        Value::Object(obj) => {
721            out.push('{');
722            for (i, (k, v)) in obj.iter().enumerate() {
723                if i > 0 { out.push_str(", "); }
724                write_key(out, k);
725                out.push_str(": ");
726                write_value(out, v, indent);
727            }
728            out.push('}');
729        }
730        Value::Map(pairs) => {
731            out.push_str("@map {");
732            let mut first = true;
733            for (k, v) in pairs {
734                if !first { out.push_str(", "); }
735                first = false;
736                // Map keys are restricted to string | name | integer per spec.
737                // Write Int/UInt directly; convert other types to quoted strings.
738                write_map_key(out, k);
739                out.push_str(": ");
740                write_value(out, v, indent);
741            }
742            out.push('}');
743        }
744        Value::Ref(r) => {
745            out.push('!');
746            out.push_str(r);
747        }
748        Value::Tagged(tag, inner) => {
749            out.push(':');
750            out.push_str(tag);
751            out.push(' ');
752            write_value(out, inner, indent);
753        }
754        Value::Timestamp(ts, tz) => {
755            out.push_str(&format_timestamp_millis(*ts, *tz));
756        }
757    }
758}
759
760/// Format a Unix-millis timestamp as an ISO 8601 string with timezone offset.
761/// Handles negative timestamps (pre-epoch dates) correctly using Euclidean division.
762/// Years outside [0000, 9999] are clamped to the boundary per spec (4-digit years only).
763/// When tz_offset_minutes is 0, emits 'Z' suffix. Otherwise emits +HH:MM or -HH:MM.
764fn format_timestamp_millis(ts: i64, tz_offset_minutes: i16) -> String {
765    // Clamp to representable ISO 8601 range (years 0000-9999).
766    // Year 0000-01-01T00:00:00Z = -62167219200000 ms
767    // Year 9999-12-31T23:59:59.999Z = 253402300799999 ms
768    const MIN_TS: i64 = -62_167_219_200_000;
769    const MAX_TS: i64 = 253_402_300_799_999;
770    let ts = ts.clamp(MIN_TS, MAX_TS);
771
772    // Apply timezone offset to get local time for display
773    let local_ts = ts + (tz_offset_minutes as i64) * 60_000;
774    let local_ts = local_ts.clamp(MIN_TS, MAX_TS);
775
776    let secs = local_ts.div_euclid(1000);
777    let millis = local_ts.rem_euclid(1000);
778    let days = secs.div_euclid(86400);
779    let time_secs = secs.rem_euclid(86400);
780    let hours = time_secs / 3600;
781    let mins = (time_secs % 3600) / 60;
782    let secs_rem = time_secs % 60;
783    let (year, month, day) = days_to_ymd(days);
784
785    let tz_suffix = if tz_offset_minutes == 0 {
786        "Z".to_string()
787    } else {
788        let sign = if tz_offset_minutes > 0 { '+' } else { '-' };
789        let abs = tz_offset_minutes.unsigned_abs();
790        format!("{}{:02}:{:02}", sign, abs / 60, abs % 60)
791    };
792
793    if millis > 0 {
794        format!("{:04}-{:02}-{:02}T{:02}:{:02}:{:02}.{:03}{}",
795            year, month, day, hours, mins, secs_rem, millis, tz_suffix)
796    } else {
797        format!("{:04}-{:02}-{:02}T{:02}:{:02}:{:02}{}",
798            year, month, day, hours, mins, secs_rem, tz_suffix)
799    }
800}
801
802/// Convert days since Unix epoch to (year, month, day)
803fn days_to_ymd(days: i64) -> (i64, u32, u32) {
804    // Algorithm from Howard Hinnant (extended to i64 for extreme timestamps)
805    let z = days + 719468;
806    let era = if z >= 0 { z } else { z - 146096 } / 146097;
807    let doe = (z - era * 146097) as u32;
808    let yoe = (doe - doe / 1460 + doe / 36524 - doe / 146096) / 365;
809    let y = yoe as i64 + era * 400;
810    let doy = doe - (365 * yoe + yoe / 4 - yoe / 100);
811    let mp = (5 * doy + 2) / 153;
812    let d = doy - (153 * mp + 2) / 5 + 1;
813    let m = if mp < 10 { mp + 3 } else { mp - 9 };
814    let y = if m <= 2 { y + 1 } else { y };
815    (y, m, d)
816}
817
818// =============================================================================
819// Schema Inference
820// =============================================================================
821
822/// Inferred type information for a field
823#[derive(Debug, Clone, PartialEq)]
824enum InferredType {
825    Null,
826    Bool,
827    Int,
828    Float,
829    String,
830    Array(Box<InferredType>),
831    Object(Vec<(String, InferredType)>),  // Ordered fields
832    Mixed,  // Different types seen - fall back to any
833}
834
835impl InferredType {
836    fn merge(&self, other: &InferredType) -> InferredType {
837        if self == other {
838            return self.clone();
839        }
840        match (self, other) {
841            (InferredType::Null, t) | (t, InferredType::Null) => {
842                // Null + T = T (nullable)
843                t.clone()
844            }
845            (InferredType::Int, InferredType::Float) | (InferredType::Float, InferredType::Int) => {
846                InferredType::Float
847            }
848            (InferredType::Array(a), InferredType::Array(b)) => {
849                InferredType::Array(Box::new(a.merge(b)))
850            }
851            (InferredType::Object(a), InferredType::Object(b)) => {
852                // Merge objects: keep fields present in both, track nullability
853                let mut merged = Vec::new();
854                let b_map: IndexMap<&str, &InferredType> = b.iter().map(|(k, v)| (k.as_str(), v)).collect();
855
856                for (key, a_type) in a {
857                    if let Some(b_type) = b_map.get(key.as_str()) {
858                        merged.push((key.clone(), a_type.merge(b_type)));
859                    }
860                    // Fields only in a are dropped (not uniform)
861                }
862
863                // Check if structures are compatible (same fields)
864                if merged.len() == a.len() && merged.len() == b.len() {
865                    InferredType::Object(merged)
866                } else {
867                    InferredType::Mixed
868                }
869            }
870            _ => InferredType::Mixed,
871        }
872    }
873
874    fn to_field_type(&self, schemas: &IndexMap<String, Schema>) -> FieldType {
875        match self {
876            InferredType::Null => FieldType::new("string").nullable(),  // Unknown type, default to string
877            InferredType::Bool => FieldType::new("bool"),
878            InferredType::Int => FieldType::new("int"),
879            InferredType::Float => FieldType::new("float"),
880            InferredType::String => FieldType::new("string"),
881            InferredType::Array(inner) => {
882                let inner_type = inner.to_field_type(schemas);
883                FieldType {
884                    base: inner_type.base,
885                    nullable: inner_type.nullable,
886                    is_array: true,
887                }
888            }
889            InferredType::Object(fields) => {
890                // Check if this matches an existing schema
891                for (name, schema) in schemas {
892                    if schema.fields.len() == fields.len() {
893                        let all_match = schema.fields.iter().all(|sf| {
894                            fields.iter().any(|(k, _)| k == &sf.name)
895                        });
896                        if all_match {
897                            return FieldType::new(name.clone());
898                        }
899                    }
900                }
901                // No matching schema — use "any" (not "object", which is a
902                // value-only type rejected by the parser in schema definitions)
903                FieldType::new("any")
904            }
905            InferredType::Mixed => FieldType::new("any"),
906        }
907    }
908}
909
910fn infer_type(value: &Value) -> InferredType {
911    match value {
912        Value::Null => InferredType::Null,
913        Value::Bool(_) => InferredType::Bool,
914        Value::Int(_) | Value::UInt(_) => InferredType::Int,
915        Value::Float(_) => InferredType::Float,
916        Value::String(_) => InferredType::String,
917        Value::Array(arr) => {
918            if arr.is_empty() {
919                InferredType::Array(Box::new(InferredType::Mixed))
920            } else {
921                let mut element_type = infer_type(&arr[0]);
922                for item in arr.iter().skip(1) {
923                    element_type = element_type.merge(&infer_type(item));
924                }
925                InferredType::Array(Box::new(element_type))
926            }
927        }
928        Value::Object(obj) => {
929            let fields: Vec<(String, InferredType)> = obj
930                .iter()
931                .map(|(k, v)| (k.clone(), infer_type(v)))
932                .collect();
933            InferredType::Object(fields)
934        }
935        _ => InferredType::Mixed,
936    }
937}
938
939/// Singularize a plural name (simple heuristic)
940fn singularize(name: &str) -> String {
941    let name = name.to_lowercase();
942    if name.ends_with("ies") {
943        format!("{}y", &name[..name.len()-3])
944    } else if name.ends_with("es") && (name.ends_with("sses") || name.ends_with("xes") || name.ends_with("ches") || name.ends_with("shes")) {
945        name[..name.len()-2].to_string()
946    } else if name.len() > 1 && name.ends_with('s') && !name.ends_with("ss") {
947        name[..name.len()-1].to_string()
948    } else {
949        name
950    }
951}
952
953/// Check if array elements are objects that match a schema's structure
954fn array_matches_schema(arr: &[Value], schema: &Schema) -> bool {
955    if arr.is_empty() {
956        return false;
957    }
958
959    // Check if first element is an object
960    let first = match &arr[0] {
961        Value::Object(obj) => obj,
962        _ => return false,
963    };
964
965    // Get schema field names
966    let schema_fields: HashSet<_> = schema.fields.iter().map(|f| f.name.as_str()).collect();
967
968    // Get object keys
969    let obj_keys: HashSet<_> = first.keys().map(|k| k.as_str()).collect();
970
971    // Check if there's significant overlap (at least 50% of schema fields present)
972    let overlap = schema_fields.intersection(&obj_keys).count();
973    let required_overlap = schema_fields.len() / 2;
974
975    overlap > required_overlap || overlap == schema_fields.len()
976}
977
978/// Schema inferrer that analyzes data and generates schemas
979pub struct SchemaInferrer {
980    schemas: IndexMap<String, Schema>,
981    schema_order: Vec<String>,  // Track order for output
982}
983
984impl SchemaInferrer {
985    pub fn new() -> Self {
986        Self {
987            schemas: IndexMap::new(),
988            schema_order: Vec::new(),
989        }
990    }
991
992    /// Analyze data and infer schemas from uniform object arrays
993    pub fn infer(&mut self, data: &IndexMap<String, Value>) {
994        for (key, value) in data {
995            self.analyze_value(key, value);
996        }
997    }
998
999    fn analyze_value(&mut self, hint_name: &str, value: &Value) {
1000        if let Value::Array(arr) = value {
1001            self.analyze_array(hint_name, arr);
1002        } else if let Value::Object(obj) = value {
1003            // Recursively analyze nested objects
1004            for (k, v) in obj {
1005                self.analyze_value(k, v);
1006            }
1007        }
1008    }
1009
1010    fn analyze_array(&mut self, hint_name: &str, arr: &[Value]) {
1011        if arr.is_empty() {
1012            return;
1013        }
1014
1015        // Check if all elements are objects with the same structure
1016        let first = match &arr[0] {
1017            Value::Object(obj) => obj,
1018            _ => return,
1019        };
1020
1021        // Collect field names from first object (preserving insertion order)
1022        let field_names: Vec<String> = first.keys().cloned().collect();
1023
1024        // Skip schema inference if fields are empty, or any field name is empty
1025        // or needs quoting — such names can't round-trip through @struct definitions.
1026        if field_names.is_empty()
1027            || field_names.iter().any(|n| n.is_empty() || needs_quoting(n))
1028        {
1029            return;
1030        }
1031
1032        let field_set: std::collections::BTreeSet<&str> = first.keys().map(|k| k.as_str()).collect();
1033
1034        // Verify all objects have the same fields
1035        for item in arr.iter().skip(1) {
1036            if let Value::Object(obj) = item {
1037                let item_set: std::collections::BTreeSet<&str> = obj.keys().map(|k| k.as_str()).collect();
1038                if item_set != field_set {
1039                    return;  // Not uniform
1040                }
1041            } else {
1042                return;  // Not all objects
1043            }
1044        }
1045
1046        // Infer types for each field across all objects
1047        let mut field_types: IndexMap<String, InferredType> = IndexMap::new();
1048        let mut has_null: IndexMap<String, bool> = IndexMap::new();
1049
1050        for item in arr {
1051            if let Value::Object(obj) = item {
1052                for (key, val) in obj {
1053                    let inferred = infer_type(val);
1054                    let is_null = matches!(val, Value::Null);
1055
1056                    *has_null.entry(key.clone()).or_insert(false) |= is_null;
1057
1058                    field_types
1059                        .entry(key.clone())
1060                        .and_modify(|existing| *existing = existing.merge(&inferred))
1061                        .or_insert(inferred);
1062                }
1063            }
1064        }
1065
1066        // Generate schema name from hint
1067        let schema_name = singularize(hint_name);
1068
1069        // Skip if schema already exists
1070        if self.schemas.contains_key(&schema_name) {
1071            return;
1072        }
1073
1074        // First, recursively analyze nested arrays and objects to create their schemas
1075        for item in arr {
1076            if let Value::Object(obj) = item {
1077                for (field_name, field_val) in obj {
1078                    if let Value::Array(nested) = field_val {
1079                        self.analyze_array(field_name, nested);
1080                    }
1081                }
1082                break;  // Only need to process first object for nested arrays
1083            }
1084        }
1085
1086        // Analyze nested object fields - collect all non-null objects for each field
1087        // and create schemas if they're uniform across all array items.
1088        // Skip fields whose singularized name collides with this array's schema
1089        // name — otherwise the inner schema would be overwritten and a
1090        // self-referencing field type created (e.g., @struct root (root: root)).
1091        for field_name in &field_names {
1092            if singularize(field_name) == schema_name {
1093                continue;
1094            }
1095
1096            let nested_objects: Vec<&IndexMap<String, Value>> = arr
1097                .iter()
1098                .filter_map(|item| {
1099                    if let Value::Object(obj) = item {
1100                        if let Some(Value::Object(nested)) = obj.get(field_name) {
1101                            return Some(nested);
1102                        }
1103                    }
1104                    None
1105                })
1106                .collect();
1107
1108            // If we found at least one object, check if they're uniform
1109            if !nested_objects.is_empty() {
1110                self.analyze_nested_objects(field_name, &nested_objects);
1111            }
1112        }
1113
1114        // Re-check: recursive nested analysis (both arrays and objects) may have
1115        // claimed this schema name. This happens when the same field name appears
1116        // at multiple nesting levels (e.g., "nodes" containing "nodes"). The inner
1117        // schema was created first (depth-first); preserve it to avoid overwriting
1118        // with a different structure.
1119        if self.schemas.contains_key(&schema_name) {
1120            return;
1121        }
1122
1123        // Build schema
1124        let mut schema = Schema::new(&schema_name);
1125
1126        // Use insertion order from first object
1127        for field_name in &field_names {
1128            if let Some(inferred) = field_types.get(field_name) {
1129                let mut field_type = inferred.to_field_type(&self.schemas);
1130
1131                // Mark as nullable if any null values seen
1132                if has_null.get(field_name).copied().unwrap_or(false) {
1133                    field_type.nullable = true;
1134                }
1135
1136                // Check if there's a nested schema for array fields
1137                if let Value::Object(first_obj) = &arr[0] {
1138                    if let Some(Value::Array(nested_arr)) = first_obj.get(field_name) {
1139                        let nested_schema_name = singularize(field_name);
1140                        if let Some(nested_schema) = self.schemas.get(&nested_schema_name) {
1141                            // Verify array elements are objects matching the schema structure
1142                            if array_matches_schema(nested_arr, nested_schema) {
1143                                field_type = FieldType {
1144                                    base: nested_schema_name,
1145                                    nullable: field_type.nullable,
1146                                    is_array: true,
1147                                };
1148                            }
1149                        }
1150                    }
1151                }
1152
1153                // Check if there's a nested schema for object fields
1154                // (skip self-references: field singularizing to the schema being built)
1155                let nested_schema_name = singularize(field_name);
1156                if nested_schema_name != schema_name && self.schemas.contains_key(&nested_schema_name) {
1157                    if matches!(inferred, InferredType::Object(_)) {
1158                        field_type = FieldType {
1159                            base: nested_schema_name,
1160                            nullable: field_type.nullable,
1161                            is_array: false,
1162                        };
1163                    }
1164                }
1165
1166                schema.add_field(field_name, field_type);
1167            }
1168        }
1169
1170        self.schema_order.push(schema_name.clone());
1171        self.schemas.insert(schema_name, schema);
1172    }
1173
1174    /// Analyze a collection of nested objects (from the same field across array items)
1175    /// and create a schema if they have uniform structure
1176    fn analyze_nested_objects(&mut self, field_name: &str, objects: &[&IndexMap<String, Value>]) {
1177        if objects.is_empty() {
1178            return;
1179        }
1180
1181        // Get field names from first object (preserving insertion order)
1182        let first = objects[0];
1183        let nested_field_names: Vec<String> = first.keys().cloned().collect();
1184
1185        // Skip empty objects and objects with field names that can't round-trip
1186        if nested_field_names.is_empty()
1187            || nested_field_names.iter().any(|n| n.is_empty() || needs_quoting(n))
1188        {
1189            return;
1190        }
1191
1192        let field_set: std::collections::BTreeSet<&str> = first.keys().map(|k| k.as_str()).collect();
1193
1194        // Check if all objects have the same fields
1195        for obj in objects.iter().skip(1) {
1196            let obj_set: std::collections::BTreeSet<&str> = obj.keys().map(|k| k.as_str()).collect();
1197            if obj_set != field_set {
1198                return; // Not uniform
1199            }
1200        }
1201
1202        // They're uniform - create a schema
1203        let schema_name = singularize(field_name);
1204
1205        // Skip if schema already exists
1206        if self.schemas.contains_key(&schema_name) {
1207            return;
1208        }
1209
1210        // Infer field types across all objects
1211        let mut field_types: IndexMap<String, InferredType> = IndexMap::new();
1212        let mut has_null: IndexMap<String, bool> = IndexMap::new();
1213
1214        for obj in objects {
1215            for (key, val) in *obj {
1216                let inferred = infer_type(val);
1217                let is_null = matches!(val, Value::Null);
1218
1219                *has_null.entry(key.clone()).or_insert(false) |= is_null;
1220
1221                field_types
1222                    .entry(key.clone())
1223                    .and_modify(|existing| *existing = existing.merge(&inferred))
1224                    .or_insert(inferred);
1225            }
1226        }
1227
1228        // Recursively analyze nested objects within these objects
1229        for nested_field in &nested_field_names {
1230            let deeper_objects: Vec<&IndexMap<String, Value>> = objects
1231                .iter()
1232                .filter_map(|obj| {
1233                    if let Some(Value::Object(nested)) = obj.get(nested_field) {
1234                        Some(nested)
1235                    } else {
1236                        None
1237                    }
1238                })
1239                .collect();
1240
1241            if !deeper_objects.is_empty() {
1242                self.analyze_nested_objects(nested_field, &deeper_objects);
1243            }
1244        }
1245
1246        // Build schema
1247        let mut schema = Schema::new(&schema_name);
1248
1249        for nested_field in &nested_field_names {
1250            if let Some(inferred) = field_types.get(nested_field) {
1251                let mut field_type = inferred.to_field_type(&self.schemas);
1252
1253                if has_null.get(nested_field).copied().unwrap_or(false) {
1254                    field_type.nullable = true;
1255                }
1256
1257                // Check if this field has a nested schema
1258                if let Some(nested_schema) = self.schemas.get(&singularize(nested_field)) {
1259                    if matches!(inferred, InferredType::Object(_)) {
1260                        field_type = FieldType::new(nested_schema.name.clone());
1261                    }
1262                }
1263
1264                schema.add_field(nested_field, field_type);
1265            }
1266        }
1267
1268        self.schema_order.push(schema_name.clone());
1269        self.schemas.insert(schema_name, schema);
1270    }
1271
1272    pub fn into_schemas(self) -> (IndexMap<String, Schema>, Vec<String>) {
1273        (self.schemas, self.schema_order)
1274    }
1275}
1276
1277impl Default for SchemaInferrer {
1278    fn default() -> Self {
1279        Self::new()
1280    }
1281}
1282
1283/// Serialize data to TeaLeaf text format with schemas
1284pub fn dumps_with_schemas(
1285    data: &IndexMap<String, Value>,
1286    schemas: &IndexMap<String, Schema>,
1287    schema_order: &[String],
1288    unions: &IndexMap<String, Union>,
1289    union_order: &[String],
1290) -> String {
1291    let mut out = String::new();
1292    let mut has_definitions = false;
1293
1294    // Write union definitions first (before structs, since structs may reference unions)
1295    for name in union_order {
1296        if let Some(union) = unions.get(name) {
1297            out.push_str("@union ");
1298            out.push_str(&union.name);
1299            out.push_str(" {\n");
1300            for (vi, variant) in union.variants.iter().enumerate() {
1301                out.push_str("  ");
1302                out.push_str(&variant.name);
1303                out.push_str(" (");
1304                for (fi, field) in variant.fields.iter().enumerate() {
1305                    if fi > 0 {
1306                        out.push_str(", ");
1307                    }
1308                    out.push_str(&field.name);
1309                    out.push_str(": ");
1310                    out.push_str(&field.field_type.to_string());
1311                }
1312                out.push(')');
1313                if vi < union.variants.len() - 1 {
1314                    out.push(',');
1315                }
1316                out.push('\n');
1317            }
1318            out.push_str("}\n");
1319            has_definitions = true;
1320        }
1321    }
1322
1323    // Write struct schemas in order
1324    for name in schema_order {
1325        if let Some(schema) = schemas.get(name) {
1326            out.push_str("@struct ");
1327            out.push_str(&schema.name);
1328            out.push_str(" (");
1329            for (i, field) in schema.fields.iter().enumerate() {
1330                if i > 0 {
1331                    out.push_str(", ");
1332                }
1333                write_key(&mut out, &field.name);
1334                out.push_str(": ");
1335                out.push_str(&field.field_type.to_string());
1336            }
1337            out.push_str(")\n");
1338            has_definitions = true;
1339        }
1340    }
1341
1342    if has_definitions {
1343        out.push('\n');
1344    }
1345
1346    // Write data (preserves insertion order)
1347    for (key, value) in data {
1348        write_key(&mut out, key);
1349        out.push_str(": ");
1350        write_value_with_schemas(&mut out, value, schemas, Some(key), 0, None);
1351        out.push('\n');
1352    }
1353
1354    out
1355}
1356
1357/// Resolve a schema for a value by trying three strategies in order:
1358/// 1. Declared type from parent schema's field type (exact match)
1359/// 2. Singularize the field key name (works for JSON-inference schemas)
1360/// 3. Case-insensitive singularize (handles derive-macro PascalCase names)
1361fn resolve_schema<'a>(
1362    schemas: &'a IndexMap<String, Schema>,
1363    declared_type: Option<&str>,
1364    hint_name: Option<&str>,
1365) -> Option<&'a Schema> {
1366    // 1. Direct lookup by declared type from parent schema
1367    if let Some(name) = declared_type {
1368        if let Some(s) = schemas.get(name) {
1369            return Some(s);
1370        }
1371    }
1372    // 2. Singularize heuristic (existing behavior for JSON-inference schemas)
1373    if let Some(hint) = hint_name {
1374        let singular = singularize(hint);
1375        if let Some(s) = schemas.get(&singular) {
1376            return Some(s);
1377        }
1378        // 3. Case-insensitive singularize (for derive-macro PascalCase names)
1379        let singular_lower = singular.to_ascii_lowercase();
1380        for (name, schema) in schemas {
1381            if name.to_ascii_lowercase() == singular_lower {
1382                return Some(schema);
1383            }
1384        }
1385    }
1386    None
1387}
1388
1389fn write_value_with_schemas(
1390    out: &mut String,
1391    value: &Value,
1392    schemas: &IndexMap<String, Schema>,
1393    hint_name: Option<&str>,
1394    indent: usize,
1395    declared_type: Option<&str>,
1396) {
1397    match value {
1398        Value::Null => out.push('~'),
1399        Value::Bool(b) => out.push_str(if *b { "true" } else { "false" }),
1400        Value::Int(i) => out.push_str(&i.to_string()),
1401        Value::UInt(u) => out.push_str(&u.to_string()),
1402        Value::JsonNumber(s) => out.push_str(s),
1403        Value::Float(f) => out.push_str(&format_float(*f)),
1404        Value::String(s) => {
1405            if needs_quoting(s) {
1406                out.push('"');
1407                out.push_str(&escape_string(s));
1408                out.push('"');
1409            } else {
1410                out.push_str(s);
1411            }
1412        }
1413        Value::Bytes(b) => {
1414            out.push_str("b\"");
1415            for byte in b {
1416                out.push_str(&format!("{:02x}", byte));
1417            }
1418            out.push('"');
1419        }
1420        Value::Array(arr) => {
1421            // Check if this array can use @table format.
1422            // Try name-based resolution first, then structural matching as fallback.
1423            let mut schema = resolve_schema(schemas, declared_type, hint_name);
1424
1425            // Structural fallback: if name-based resolution failed, find a schema
1426            // whose fields exactly match the first element's object keys.
1427            // This handles Builder-path documents where the top-level key name
1428            // (e.g., "orders") doesn't match the schema name (e.g., "SalesOrder").
1429            if schema.is_none() {
1430                if let Some(Value::Object(first_obj)) = arr.first() {
1431                    let obj_keys: HashSet<&str> = first_obj.keys().map(|k| k.as_str()).collect();
1432                    for (_, candidate) in schemas {
1433                        let schema_fields: HashSet<&str> = candidate.fields.iter().map(|f| f.name.as_str()).collect();
1434                        if schema_fields == obj_keys {
1435                            schema = Some(candidate);
1436                            break;
1437                        }
1438                    }
1439                }
1440            }
1441
1442            if let Some(schema) = schema {
1443                // Verify the first element is an object whose fields match the schema.
1444                // A name-only lookup isn't enough — if the same field name appears at
1445                // multiple nesting levels with different shapes, the schema may belong
1446                // to a different level. Applying the wrong schema drops unmatched keys.
1447                let schema_matches = if let Some(Value::Object(first_obj)) = arr.first() {
1448                    let schema_fields: HashSet<&str> = schema.fields.iter().map(|f| f.name.as_str()).collect();
1449                    let obj_keys: HashSet<&str> = first_obj.keys().map(|k| k.as_str()).collect();
1450                    schema_fields == obj_keys
1451                } else {
1452                    false
1453                };
1454
1455                if schema_matches {
1456                    out.push_str("@table ");
1457                    out.push_str(&schema.name);
1458                    out.push_str(" [\n");
1459
1460                    let inner_indent = indent + 2;
1461                    for (i, item) in arr.iter().enumerate() {
1462                        for _ in 0..inner_indent {
1463                            out.push(' ');
1464                        }
1465                        write_tuple(out, item, schema, schemas, inner_indent);
1466                        if i < arr.len() - 1 {
1467                            out.push(',');
1468                        }
1469                        out.push('\n');
1470                    }
1471
1472                    for _ in 0..indent {
1473                        out.push(' ');
1474                    }
1475                    out.push(']');
1476                    return;
1477                }
1478            }
1479
1480            // Fall back to regular array format
1481            out.push('[');
1482            for (i, v) in arr.iter().enumerate() {
1483                if i > 0 {
1484                    out.push_str(", ");
1485                }
1486                write_value_with_schemas(out, v, schemas, None, indent, None);
1487            }
1488            out.push(']');
1489        }
1490        Value::Object(obj) => {
1491            // Find the schema for this object so we can propagate field types to children.
1492            // Try name-based resolution first, then structural matching as fallback.
1493            let mut obj_schema = resolve_schema(schemas, declared_type, hint_name);
1494
1495            if obj_schema.is_none() {
1496                let obj_keys: HashSet<&str> = obj.keys().map(|k| k.as_str()).collect();
1497                for (_, candidate) in schemas {
1498                    let schema_fields: HashSet<&str> = candidate.fields.iter().map(|f| f.name.as_str()).collect();
1499                    if schema_fields == obj_keys {
1500                        obj_schema = Some(candidate);
1501                        break;
1502                    }
1503                }
1504            }
1505
1506            out.push('{');
1507            for (i, (k, v)) in obj.iter().enumerate() {
1508                if i > 0 {
1509                    out.push_str(", ");
1510                }
1511                write_key(out, k);
1512                out.push_str(": ");
1513                // Look up this field's declared type from the parent schema
1514                let field_type = obj_schema.and_then(|s| {
1515                    s.fields.iter()
1516                        .find(|f| f.name == *k)
1517                        .map(|f| f.field_type.base.as_str())
1518                });
1519                write_value_with_schemas(out, v, schemas, Some(k), indent, field_type);
1520            }
1521            out.push('}');
1522        }
1523        Value::Map(pairs) => {
1524            out.push_str("@map {");
1525            let mut first = true;
1526            for (k, v) in pairs {
1527                if !first {
1528                    out.push_str(", ");
1529                }
1530                first = false;
1531                write_map_key(out, k);
1532                out.push_str(": ");
1533                write_value_with_schemas(out, v, schemas, None, indent, None);
1534            }
1535            out.push('}');
1536        }
1537        Value::Ref(r) => {
1538            out.push('!');
1539            out.push_str(r);
1540        }
1541        Value::Tagged(tag, inner) => {
1542            out.push(':');
1543            out.push_str(tag);
1544            out.push(' ');
1545            write_value_with_schemas(out, inner, schemas, None, indent, None);
1546        }
1547        Value::Timestamp(ts, tz) => {
1548            out.push_str(&format_timestamp_millis(*ts, *tz));
1549        }
1550    }
1551}
1552
1553fn write_tuple(
1554    out: &mut String,
1555    value: &Value,
1556    schema: &Schema,
1557    schemas: &IndexMap<String, Schema>,
1558    indent: usize,
1559) {
1560    if let Value::Object(obj) = value {
1561        out.push('(');
1562        for (i, field) in schema.fields.iter().enumerate() {
1563            if i > 0 {
1564                out.push_str(", ");
1565            }
1566            if let Some(v) = obj.get(&field.name) {
1567                let type_base = field.field_type.base.as_str();
1568                // For array fields with a known schema type, write tuples directly without @table
1569                if field.field_type.is_array {
1570                    if let Some(item_schema) = resolve_schema(schemas, Some(type_base), None) {
1571                        // The schema defines the element type - write array with tuples directly
1572                        write_schema_array(out, v, item_schema, schemas, indent);
1573                    } else {
1574                        // No schema for element type - use regular array format
1575                        write_value_with_schemas(out, v, schemas, None, indent, None);
1576                    }
1577                } else if resolve_schema(schemas, Some(type_base), None).is_some() {
1578                    // Non-array field with schema type - write as nested tuple
1579                    let nested_schema = resolve_schema(schemas, Some(type_base), None).unwrap();
1580                    write_tuple(out, v, nested_schema, schemas, indent);
1581                } else {
1582                    write_value_with_schemas(out, v, schemas, None, indent, None);
1583                }
1584            } else {
1585                out.push('~');
1586            }
1587        }
1588        out.push(')');
1589    } else {
1590        write_value_with_schemas(out, value, schemas, None, indent, None);
1591    }
1592}
1593
1594/// Write an array of schema-typed values as tuples (without @table annotation)
1595fn write_schema_array(
1596    out: &mut String,
1597    value: &Value,
1598    schema: &Schema,
1599    schemas: &IndexMap<String, Schema>,
1600    indent: usize,
1601) {
1602    if let Value::Array(arr) = value {
1603        if arr.is_empty() {
1604            out.push_str("[]");
1605            return;
1606        }
1607
1608        out.push_str("[\n");
1609        let inner_indent = indent + 2;
1610        for (i, item) in arr.iter().enumerate() {
1611            for _ in 0..inner_indent {
1612                out.push(' ');
1613            }
1614            write_tuple(out, item, schema, schemas, inner_indent);
1615            if i < arr.len() - 1 {
1616                out.push(',');
1617            }
1618            out.push('\n');
1619        }
1620        for _ in 0..indent {
1621            out.push(' ');
1622        }
1623        out.push(']');
1624    } else {
1625        // Not an array - fall back to regular value writing
1626        write_value_with_schemas(out, value, schemas, None, indent, None);
1627    }
1628}
1629
1630#[cfg(test)]
1631mod tests {
1632    use super::*;
1633
1634    #[test]
1635    fn test_serde_json_number_behavior() {
1636        // Test how serde_json handles different number formats
1637        let json_str = r#"{"int": 42, "float_whole": 42.0, "float_frac": 42.5}"#;
1638        let parsed: serde_json::Value = serde_json::from_str(json_str).unwrap();
1639
1640        if let serde_json::Value::Object(obj) = parsed {
1641            let int_num = obj.get("int").unwrap().as_number().unwrap();
1642            let float_whole = obj.get("float_whole").unwrap().as_number().unwrap();
1643            let float_frac = obj.get("float_frac").unwrap().as_number().unwrap();
1644
1645            println!("int (42): is_i64={}, is_u64={}, is_f64={}",
1646                int_num.is_i64(), int_num.is_u64(), int_num.is_f64());
1647            println!("float_whole (42.0): is_i64={}, is_u64={}, is_f64={}",
1648                float_whole.is_i64(), float_whole.is_u64(), float_whole.is_f64());
1649            println!("float_frac (42.5): is_i64={}, is_u64={}, is_f64={}",
1650                float_frac.is_i64(), float_frac.is_u64(), float_frac.is_f64());
1651
1652            // Assert expected behavior
1653            assert!(int_num.is_i64(), "42 should be parsed as i64");
1654            assert!(float_whole.is_f64(), "42.0 should be parsed as f64");
1655            assert!(float_frac.is_f64(), "42.5 should be parsed as f64");
1656        }
1657
1658        // Test how Rust formats floats
1659        println!("Rust float formatting:");
1660        println!("  42.0f64.to_string() = '{}'", 42.0f64.to_string());
1661        println!("  42.5f64.to_string() = '{}'", 42.5f64.to_string());
1662
1663        // This is the problem! Rust's to_string() drops the .0
1664        // We need to ensure floats always have a decimal point
1665    }
1666
1667    #[test]
1668    fn test_parse_simple() {
1669        let doc = TeaLeaf::parse(r#"
1670            name: alice
1671            age: 30
1672            active: true
1673        "#).unwrap();
1674        
1675        assert_eq!(doc.get("name").unwrap().as_str(), Some("alice"));
1676        assert_eq!(doc.get("age").unwrap().as_int(), Some(30));
1677        assert_eq!(doc.get("active").unwrap().as_bool(), Some(true));
1678    }
1679
1680    #[test]
1681    fn test_parse_struct() {
1682        let doc = TeaLeaf::parse(r#"
1683            @struct user (id: int, name: string, email: string?)
1684            users: @table user [
1685                (1, alice, "alice@test.com"),
1686                (2, bob, ~),
1687            ]
1688        "#).unwrap();
1689        
1690        let schema = doc.schema("user").unwrap();
1691        assert_eq!(schema.fields.len(), 3);
1692        assert!(schema.fields[2].field_type.nullable);
1693        
1694        let users = doc.get("users").unwrap().as_array().unwrap();
1695        assert_eq!(users.len(), 2);
1696    }
1697
1698    #[test]
1699    fn test_nested_struct() {
1700        let doc = TeaLeaf::parse(r#"
1701            @struct address (city: string, zip: string)
1702            @struct user (id: int, name: string, home: address)
1703            users: @table user [
1704                (1, alice, (Berlin, "10115")),
1705                (2, bob, (Paris, "75001")),
1706            ]
1707        "#).unwrap();
1708        
1709        let users = doc.get("users").unwrap().as_array().unwrap();
1710        let alice = users[0].as_object().unwrap();
1711        let home = alice.get("home").unwrap().as_object().unwrap();
1712        assert_eq!(home.get("city").unwrap().as_str(), Some("Berlin"));
1713    }
1714
1715    #[test]
1716    fn test_three_level_nesting() {
1717        let doc = TeaLeaf::parse(r#"
1718            @struct method (type: string, last4: string)
1719            @struct payment (amount: float, method: method)
1720            @struct order (id: int, payment: payment)
1721            orders: @table order [
1722                (1, (99.99, (credit, "4242"))),
1723            ]
1724        "#).unwrap();
1725        
1726        let orders = doc.get("orders").unwrap().as_array().unwrap();
1727        let order = orders[0].as_object().unwrap();
1728        let payment = order.get("payment").unwrap().as_object().unwrap();
1729        let method = payment.get("method").unwrap().as_object().unwrap();
1730        assert_eq!(method.get("type").unwrap().as_str(), Some("credit"));
1731    }
1732
1733    #[test]
1734    fn test_json_roundtrip_basic() {
1735        let json = r#"{"name":"alice","age":30,"active":true,"score":95.5}"#;
1736        let doc = TeaLeaf::from_json(json).unwrap();
1737
1738        assert_eq!(doc.get("name").unwrap().as_str(), Some("alice"));
1739        assert_eq!(doc.get("age").unwrap().as_int(), Some(30));
1740        assert_eq!(doc.get("active").unwrap().as_bool(), Some(true));
1741        assert_eq!(doc.get("score").unwrap().as_float(), Some(95.5));
1742
1743        // Round-trip back to JSON
1744        let json_out = doc.to_json().unwrap();
1745        assert!(json_out.contains("\"name\":\"alice\"") || json_out.contains("\"name\": \"alice\""));
1746    }
1747
1748    #[test]
1749    fn test_json_roundtrip_root_array() {
1750        // Root-level arrays should round-trip without wrapping
1751        let json = r#"[{"id":"0001","type":"donut","name":"Cake"},{"id":"0002","type":"donut","name":"Raised"}]"#;
1752        let doc = TeaLeaf::from_json(json).unwrap();
1753
1754        // Internally stored under "root" key
1755        let root = doc.get("root").unwrap();
1756        let arr = root.as_array().unwrap();
1757        assert_eq!(arr.len(), 2);
1758
1759        // Round-trip should produce the array directly, NOT {"root": [...]}
1760        let json_out = doc.to_json_compact().unwrap();
1761        assert!(json_out.starts_with('['), "Root array should serialize directly: {}", json_out);
1762        assert!(json_out.ends_with(']'), "Root array should end with ]: {}", json_out);
1763        assert!(!json_out.contains("\"root\""), "Should NOT wrap in root object: {}", json_out);
1764
1765        // Verify content preserved
1766        assert!(json_out.contains("\"id\":\"0001\"") || json_out.contains("\"id\": \"0001\""));
1767        assert!(json_out.contains("\"name\":\"Cake\"") || json_out.contains("\"name\": \"Cake\""));
1768    }
1769
1770    #[test]
1771    fn test_json_roundtrip_root_array_empty() {
1772        // Empty array should also round-trip correctly
1773        let json = r#"[]"#;
1774        let doc = TeaLeaf::from_json(json).unwrap();
1775
1776        let json_out = doc.to_json_compact().unwrap();
1777        assert_eq!(json_out, "[]", "Empty array should round-trip: {}", json_out);
1778    }
1779
1780    #[test]
1781    fn test_json_roundtrip_root_object_with_root_key() {
1782        // An object that happens to have a "root" key should NOT be confused
1783        let json = r#"{"root":[1,2,3],"other":"value"}"#;
1784        let doc = TeaLeaf::from_json(json).unwrap();
1785
1786        let json_out = doc.to_json_compact().unwrap();
1787        // This was a root object, so it should stay as an object
1788        assert!(json_out.starts_with('{'), "Root object should stay as object: {}", json_out);
1789        assert!(json_out.contains("\"root\""), "root key should be preserved: {}", json_out);
1790        assert!(json_out.contains("\"other\""), "other key should be preserved: {}", json_out);
1791    }
1792
1793    #[test]
1794    fn test_json_export_bytes() {
1795        // Create a document with bytes programmatically
1796        let mut entries = IndexMap::new();
1797        entries.insert("data".to_string(), Value::Bytes(vec![0xca, 0xfe, 0xf0, 0x0d]));
1798        let doc = TeaLeaf { data: entries, schemas: IndexMap::new(), unions: IndexMap::new(), is_root_array: false };
1799
1800        let json = doc.to_json().unwrap();
1801        assert!(json.contains("0xcafef00d"), "Bytes should export as hex string: {}", json);
1802    }
1803
1804    #[test]
1805    fn test_json_export_ref() {
1806        let mut entries = IndexMap::new();
1807        entries.insert("config".to_string(), Value::Ref("base_config".to_string()));
1808        let doc = TeaLeaf { data: entries, schemas: IndexMap::new(), unions: IndexMap::new(), is_root_array: false };
1809
1810        let json = doc.to_json().unwrap();
1811        assert!(json.contains("\"$ref\""), "Ref should export with $ref key: {}", json);
1812        assert!(json.contains("base_config"), "Ref name should be in output: {}", json);
1813    }
1814
1815    #[test]
1816    fn test_json_export_tagged() {
1817        let mut entries = IndexMap::new();
1818        entries.insert("status".to_string(), Value::Tagged("ok".to_string(), Box::new(Value::Int(200))));
1819        let doc = TeaLeaf { data: entries, schemas: IndexMap::new(), unions: IndexMap::new(), is_root_array: false };
1820
1821        let json = doc.to_json().unwrap();
1822        assert!(json.contains("\"$tag\""), "Tagged should export with $tag key: {}", json);
1823        assert!(json.contains("\"ok\""), "Tag name should be in output: {}", json);
1824        assert!(json.contains("\"$value\""), "Tagged should have $value key: {}", json);
1825    }
1826
1827    #[test]
1828    fn test_json_export_map() {
1829        let mut entries = IndexMap::new();
1830        entries.insert("lookup".to_string(), Value::Map(vec![
1831            (Value::Int(1), Value::String("one".to_string())),
1832            (Value::Int(2), Value::String("two".to_string())),
1833        ]));
1834        let doc = TeaLeaf { data: entries, schemas: IndexMap::new(), unions: IndexMap::new(), is_root_array: false };
1835
1836        let json = doc.to_json().unwrap();
1837        // Map exports as array of [key, value] pairs
1838        // Check that the structure contains the key and values (regardless of formatting)
1839        assert!(json.contains("\"lookup\""), "Map key should be in output: {}", json);
1840        assert!(json.contains("\"one\""), "Map values should be in output: {}", json);
1841        assert!(json.contains("\"two\""), "Map values should be in output: {}", json);
1842        // Verify it's an array structure (has nested arrays)
1843        let compact = json.replace(" ", "").replace("\n", "");
1844        assert!(compact.contains("[["), "Map should export as nested array: {}", json);
1845    }
1846
1847    #[test]
1848    fn test_json_export_timestamp() {
1849        let mut entries = IndexMap::new();
1850        // 2024-01-15T10:30:00Z = 1705315800000 ms, but let's verify with a known value
1851        // Use 0 = 1970-01-01T00:00:00Z for simplicity
1852        entries.insert("created".to_string(), Value::Timestamp(0, 0));
1853        let doc = TeaLeaf { data: entries, schemas: IndexMap::new(), unions: IndexMap::new(), is_root_array: false };
1854
1855        let json = doc.to_json().unwrap();
1856        assert!(json.contains("1970-01-01"), "Timestamp should export as ISO 8601 date: {}", json);
1857        assert!(json.contains("00:00:00"), "Timestamp time should be epoch: {}", json);
1858    }
1859
1860    #[test]
1861    fn test_json_import_limitation_ref_becomes_object() {
1862        // JSON with $ref pattern should become a plain object, NOT a Ref value
1863        let json = r#"{"config":{"$ref":"base_config"}}"#;
1864        let doc = TeaLeaf::from_json(json).unwrap();
1865
1866        let config = doc.get("config").unwrap();
1867        // This should be an Object, not a Ref
1868        assert!(config.as_object().is_some(), "JSON $ref should become Object, not Ref");
1869        assert!(config.as_ref_name().is_none(), "JSON $ref should NOT become Ref value");
1870    }
1871
1872    #[test]
1873    fn test_json_import_limitation_tagged_becomes_object() {
1874        // JSON with $tag/$value pattern should become a plain object, NOT a Tagged value
1875        let json = r#"{"status":{"$tag":"ok","$value":200}}"#;
1876        let doc = TeaLeaf::from_json(json).unwrap();
1877
1878        let status = doc.get("status").unwrap();
1879        // This should be an Object, not a Tagged
1880        assert!(status.as_object().is_some(), "JSON $tag should become Object, not Tagged");
1881        assert!(status.as_tagged().is_none(), "JSON $tag should NOT become Tagged value");
1882    }
1883
1884    #[test]
1885    fn test_json_import_limitation_timestamp_becomes_string() {
1886        // ISO 8601 strings in JSON should remain strings, NOT become Timestamp
1887        let json = r#"{"created":"2024-01-15T10:30:00Z"}"#;
1888        let doc = TeaLeaf::from_json(json).unwrap();
1889
1890        let created = doc.get("created").unwrap();
1891        // This should be a String, not a Timestamp
1892        assert!(created.as_str().is_some(), "ISO timestamp string should remain String");
1893        assert!(created.as_timestamp_millis().is_none(), "ISO timestamp should NOT become Timestamp value");
1894    }
1895
1896    // =========================================================================
1897    // JSON ↔ Binary Conversion Tests
1898    // =========================================================================
1899
1900    #[test]
1901    fn test_json_to_binary_roundtrip_primitives() {
1902        use tempfile::NamedTempFile;
1903
1904        let json = r#"{"name":"alice","age":30,"score":95.5,"active":true,"nothing":null}"#;
1905        let doc = TeaLeaf::from_json(json).unwrap();
1906
1907        // Compile to binary
1908        let temp = NamedTempFile::new().unwrap();
1909        let path = temp.path();
1910        doc.compile(path, false).unwrap();
1911
1912        // Read back
1913        let reader = Reader::open(path).unwrap();
1914        assert_eq!(reader.get("name").unwrap().as_str(), Some("alice"));
1915        assert_eq!(reader.get("age").unwrap().as_int(), Some(30));
1916        assert_eq!(reader.get("score").unwrap().as_float(), Some(95.5));
1917        assert_eq!(reader.get("active").unwrap().as_bool(), Some(true));
1918        assert!(reader.get("nothing").unwrap().is_null());
1919    }
1920
1921    #[test]
1922    fn test_json_to_binary_roundtrip_arrays() {
1923        use tempfile::NamedTempFile;
1924
1925        let json = r#"{"numbers":[1,2,3,4,5],"names":["alice","bob","charlie"]}"#;
1926        let doc = TeaLeaf::from_json(json).unwrap();
1927
1928        let temp = NamedTempFile::new().unwrap();
1929        doc.compile(temp.path(), false).unwrap();
1930
1931        let reader = Reader::open(temp.path()).unwrap();
1932
1933        let numbers = reader.get("numbers").unwrap();
1934        let arr = numbers.as_array().unwrap();
1935        assert_eq!(arr.len(), 5);
1936        assert_eq!(arr[0].as_int(), Some(1));
1937        assert_eq!(arr[4].as_int(), Some(5));
1938
1939        let names = reader.get("names").unwrap();
1940        let arr = names.as_array().unwrap();
1941        assert_eq!(arr.len(), 3);
1942        assert_eq!(arr[0].as_str(), Some("alice"));
1943    }
1944
1945    #[test]
1946    fn test_json_to_binary_roundtrip_nested_objects() {
1947        use tempfile::NamedTempFile;
1948
1949        let json = r#"{"user":{"name":"alice","profile":{"bio":"dev","settings":{"theme":"dark"}}}}"#;
1950        let doc = TeaLeaf::from_json(json).unwrap();
1951
1952        let temp = NamedTempFile::new().unwrap();
1953        doc.compile(temp.path(), false).unwrap();
1954
1955        let reader = Reader::open(temp.path()).unwrap();
1956        let user = reader.get("user").unwrap();
1957        let user_obj = user.as_object().unwrap();
1958        assert_eq!(user_obj.get("name").unwrap().as_str(), Some("alice"));
1959
1960        let profile = user_obj.get("profile").unwrap().as_object().unwrap();
1961        assert_eq!(profile.get("bio").unwrap().as_str(), Some("dev"));
1962
1963        let settings = profile.get("settings").unwrap().as_object().unwrap();
1964        assert_eq!(settings.get("theme").unwrap().as_str(), Some("dark"));
1965    }
1966
1967    #[test]
1968    fn test_json_to_binary_with_compression() {
1969        use tempfile::NamedTempFile;
1970
1971        // Create a document with repetitive data to test compression
1972        let mut entries = IndexMap::new();
1973        entries.insert("data".to_string(), Value::String("a".repeat(1000)));
1974        entries.insert("count".to_string(), Value::Int(12345));
1975        let doc = TeaLeaf { data: entries, schemas: IndexMap::new(), unions: IndexMap::new(), is_root_array: false };
1976
1977        let temp = NamedTempFile::new().unwrap();
1978        doc.compile(temp.path(), true).unwrap(); // compressed
1979
1980        let reader = Reader::open(temp.path()).unwrap();
1981        assert_eq!(reader.get("data").unwrap().as_str(), Some("a".repeat(1000).as_str()));
1982        assert_eq!(reader.get("count").unwrap().as_int(), Some(12345));
1983    }
1984
1985    #[test]
1986    fn test_tl_to_binary_preserves_ref() {
1987        use tempfile::NamedTempFile;
1988
1989        let mut entries = IndexMap::new();
1990        entries.insert("base".to_string(), Value::Object(vec![
1991            ("host".to_string(), Value::String("localhost".to_string())),
1992        ].into_iter().collect()));
1993        entries.insert("config".to_string(), Value::Ref("base".to_string()));
1994        let doc = TeaLeaf { data: entries, schemas: IndexMap::new(), unions: IndexMap::new(), is_root_array: false };
1995
1996        let temp = NamedTempFile::new().unwrap();
1997        doc.compile(temp.path(), false).unwrap();
1998
1999        let reader = Reader::open(temp.path()).unwrap();
2000        let config = reader.get("config").unwrap();
2001        assert_eq!(config.as_ref_name(), Some("base"));
2002    }
2003
2004    #[test]
2005    fn test_tl_to_binary_preserves_tagged() {
2006        use tempfile::NamedTempFile;
2007
2008        let mut entries = IndexMap::new();
2009        entries.insert("status".to_string(), Value::Tagged("ok".to_string(), Box::new(Value::Int(200))));
2010        let doc = TeaLeaf { data: entries, schemas: IndexMap::new(), unions: IndexMap::new(), is_root_array: false };
2011
2012        let temp = NamedTempFile::new().unwrap();
2013        doc.compile(temp.path(), false).unwrap();
2014
2015        let reader = Reader::open(temp.path()).unwrap();
2016        let status = reader.get("status").unwrap();
2017        let (tag, value) = status.as_tagged().unwrap();
2018        assert_eq!(tag, "ok");
2019        assert_eq!(value.as_int(), Some(200));
2020    }
2021
2022    #[test]
2023    fn test_tl_to_binary_preserves_map() {
2024        use tempfile::NamedTempFile;
2025
2026        let mut entries = IndexMap::new();
2027        entries.insert("lookup".to_string(), Value::Map(vec![
2028            (Value::Int(1), Value::String("one".to_string())),
2029            (Value::Int(2), Value::String("two".to_string())),
2030        ]));
2031        let doc = TeaLeaf { data: entries, schemas: IndexMap::new(), unions: IndexMap::new(), is_root_array: false };
2032
2033        let temp = NamedTempFile::new().unwrap();
2034        doc.compile(temp.path(), false).unwrap();
2035
2036        let reader = Reader::open(temp.path()).unwrap();
2037        let lookup = reader.get("lookup").unwrap();
2038        let map = lookup.as_map().unwrap();
2039        assert_eq!(map.len(), 2);
2040        assert_eq!(map[0].0.as_int(), Some(1));
2041        assert_eq!(map[0].1.as_str(), Some("one"));
2042    }
2043
2044    #[test]
2045    fn test_tl_to_binary_preserves_bytes() {
2046        use tempfile::NamedTempFile;
2047
2048        let mut entries = IndexMap::new();
2049        entries.insert("data".to_string(), Value::Bytes(vec![0xca, 0xfe, 0xf0, 0x0d]));
2050        let doc = TeaLeaf { data: entries, schemas: IndexMap::new(), unions: IndexMap::new(), is_root_array: false };
2051
2052        let temp = NamedTempFile::new().unwrap();
2053        doc.compile(temp.path(), false).unwrap();
2054
2055        let reader = Reader::open(temp.path()).unwrap();
2056        let data = reader.get("data").unwrap();
2057        assert_eq!(data.as_bytes(), Some(vec![0xca, 0xfe, 0xf0, 0x0d].as_slice()));
2058    }
2059
2060    #[test]
2061    fn test_tl_to_binary_preserves_timestamp() {
2062        use tempfile::NamedTempFile;
2063
2064        let mut entries = IndexMap::new();
2065        entries.insert("created".to_string(), Value::Timestamp(1705315800000, 0)); // 2024-01-15T10:30:00Z
2066        let doc = TeaLeaf { data: entries, schemas: IndexMap::new(), unions: IndexMap::new(), is_root_array: false };
2067
2068        let temp = NamedTempFile::new().unwrap();
2069        doc.compile(temp.path(), false).unwrap();
2070
2071        let reader = Reader::open(temp.path()).unwrap();
2072        let created = reader.get("created").unwrap();
2073        assert_eq!(created.as_timestamp_millis(), Some(1705315800000));
2074    }
2075
2076    #[test]
2077    fn test_json_import_limitation_hex_string_remains_string() {
2078        // Hex strings in JSON should remain strings, NOT become Bytes
2079        let json = r#"{"data":"0xcafef00d"}"#;
2080        let doc = TeaLeaf::from_json(json).unwrap();
2081
2082        let data = doc.get("data").unwrap();
2083        // This should be a String, not Bytes
2084        assert!(data.as_str().is_some(), "Hex string should remain String");
2085        assert_eq!(data.as_str(), Some("0xcafef00d"));
2086        assert!(data.as_bytes().is_none(), "Hex string should NOT become Bytes value");
2087    }
2088
2089    #[test]
2090    fn test_json_import_limitation_array_pairs_remain_array() {
2091        // JSON arrays that look like map pairs should remain arrays, NOT become Maps
2092        let json = r#"{"lookup":[[1,"one"],[2,"two"]]}"#;
2093        let doc = TeaLeaf::from_json(json).unwrap();
2094
2095        let lookup = doc.get("lookup").unwrap();
2096        // This should be an Array, not a Map
2097        assert!(lookup.as_array().is_some(), "Array of pairs should remain Array");
2098        assert!(lookup.as_map().is_none(), "Array of pairs should NOT become Map value");
2099
2100        // Verify structure
2101        let arr = lookup.as_array().unwrap();
2102        assert_eq!(arr.len(), 2);
2103        let first_pair = arr[0].as_array().unwrap();
2104        assert_eq!(first_pair[0].as_int(), Some(1));
2105        assert_eq!(first_pair[1].as_str(), Some("one"));
2106    }
2107
2108    // =========================================================================
2109    // Cross-Language Parity Test
2110    // =========================================================================
2111
2112    #[test]
2113    fn test_cross_language_parity_all_types() {
2114        // This test verifies that Rust JSON export matches expected format
2115        // for ALL special types. The same fixture is tested in .NET.
2116
2117        use tempfile::NamedTempFile;
2118
2119        // Create a document with all special types
2120        let mut data = IndexMap::new();
2121        data.insert("null_val".to_string(), Value::Null);
2122        data.insert("bool_true".to_string(), Value::Bool(true));
2123        data.insert("int_val".to_string(), Value::Int(42));
2124        data.insert("float_val".to_string(), Value::Float(3.14159));
2125        data.insert("string_val".to_string(), Value::String("hello".to_string()));
2126        data.insert("bytes_val".to_string(), Value::Bytes(vec![0xca, 0xfe]));
2127        data.insert("timestamp_val".to_string(), Value::Timestamp(0, 0));
2128        data.insert("array_val".to_string(), Value::Array(vec![Value::Int(1), Value::Int(2)]));
2129        data.insert("object_val".to_string(), Value::Object(
2130            vec![("x".to_string(), Value::Int(1))].into_iter().collect()
2131        ));
2132        data.insert("ref_val".to_string(), Value::Ref("object_val".to_string()));
2133        data.insert("tagged_val".to_string(), Value::Tagged("ok".to_string(), Box::new(Value::Int(200))));
2134        data.insert("map_val".to_string(), Value::Map(vec![
2135            (Value::Int(1), Value::String("one".to_string())),
2136        ]));
2137
2138        let doc = TeaLeaf { data, schemas: IndexMap::new(), unions: IndexMap::new(), is_root_array: false };
2139
2140        // Compile to binary and read back
2141        let temp = NamedTempFile::new().unwrap();
2142        doc.compile(temp.path(), false).unwrap();
2143        let reader = Reader::open(temp.path()).unwrap();
2144
2145        // Verify each type survives binary round-trip
2146        assert!(reader.get("null_val").unwrap().is_null());
2147        assert_eq!(reader.get("bool_true").unwrap().as_bool(), Some(true));
2148        assert_eq!(reader.get("int_val").unwrap().as_int(), Some(42));
2149        assert_eq!(reader.get("float_val").unwrap().as_float(), Some(3.14159));
2150        assert_eq!(reader.get("string_val").unwrap().as_str(), Some("hello"));
2151        assert_eq!(reader.get("bytes_val").unwrap().as_bytes(), Some(&[0xca, 0xfe][..]));
2152        assert_eq!(reader.get("timestamp_val").unwrap().as_timestamp_millis(), Some(0));
2153
2154        let arr = reader.get("array_val").unwrap();
2155        assert_eq!(arr.as_array().unwrap().len(), 2);
2156
2157        let obj = reader.get("object_val").unwrap();
2158        assert!(obj.as_object().is_some());
2159
2160        let ref_val = reader.get("ref_val").unwrap();
2161        assert_eq!(ref_val.as_ref_name(), Some("object_val"));
2162
2163        let tagged = reader.get("tagged_val").unwrap();
2164        let (tag, val) = tagged.as_tagged().unwrap();
2165        assert_eq!(tag, "ok");
2166        assert_eq!(val.as_int(), Some(200));
2167
2168        let map = reader.get("map_val").unwrap();
2169        let pairs = map.as_map().unwrap();
2170        assert_eq!(pairs.len(), 1);
2171
2172        // Verify JSON export format matches expected conventions
2173        let json = doc.to_json().unwrap();
2174
2175        // Bytes should be hex string
2176        assert!(json.contains("0xcafe"), "Bytes should export as hex: {}", json);
2177
2178        // Ref should have $ref key
2179        assert!(json.contains("\"$ref\""), "Ref should have $ref key: {}", json);
2180
2181        // Tagged should have $tag and $value
2182        assert!(json.contains("\"$tag\""), "Tagged should have $tag: {}", json);
2183        assert!(json.contains("\"$value\""), "Tagged should have $value: {}", json);
2184
2185        // Map should be array of pairs (nested arrays)
2186        let compact = json.replace(" ", "").replace("\n", "");
2187        assert!(compact.contains("[["), "Map should export as array of pairs: {}", json);
2188
2189        // Timestamp should be ISO 8601
2190        assert!(json.contains("1970-01-01"), "Timestamp should be ISO 8601: {}", json);
2191    }
2192
2193    // =========================================================================
2194    // JSON Conversion Contract Tests
2195    // =========================================================================
2196    // These tests lock down the exact JSON↔TeaLeaf conversion behavior.
2197    // STABILITY POLICY:
2198    // - Plain JSON roundtrip: MUST be lossless for primitives, arrays, objects
2199    // - TeaLeaf→JSON: Special types have FIXED representations that MUST NOT change
2200    // - JSON→TeaLeaf: No magic parsing; $ref/$tag/hex/ISO8601 stay as plain JSON
2201
2202    mod conversion_contracts {
2203        use super::*;
2204
2205        // --- Plain JSON Roundtrip (STABLE) ---
2206
2207        #[test]
2208        fn contract_null_roundtrip() {
2209            let doc = TeaLeaf::from_json("null").unwrap();
2210            assert!(matches!(doc.get("root").unwrap(), Value::Null));
2211        }
2212
2213        #[test]
2214        fn contract_bool_roundtrip() {
2215            let doc = TeaLeaf::from_json(r#"{"t": true, "f": false}"#).unwrap();
2216            assert_eq!(doc.get("t").unwrap().as_bool(), Some(true));
2217            assert_eq!(doc.get("f").unwrap().as_bool(), Some(false));
2218
2219            let json = doc.to_json_compact().unwrap();
2220            assert!(json.contains("true"));
2221            assert!(json.contains("false"));
2222        }
2223
2224        #[test]
2225        fn contract_integer_roundtrip() {
2226            let doc = TeaLeaf::from_json(r#"{"zero": 0, "pos": 42, "neg": -123}"#).unwrap();
2227            assert_eq!(doc.get("zero").unwrap().as_int(), Some(0));
2228            assert_eq!(doc.get("pos").unwrap().as_int(), Some(42));
2229            assert_eq!(doc.get("neg").unwrap().as_int(), Some(-123));
2230        }
2231
2232        #[test]
2233        fn contract_float_roundtrip() {
2234            let doc = TeaLeaf::from_json(r#"{"pi": 3.14159}"#).unwrap();
2235            let pi = doc.get("pi").unwrap().as_float().unwrap();
2236            assert!((pi - 3.14159).abs() < 0.00001);
2237        }
2238
2239        #[test]
2240        fn contract_string_roundtrip() {
2241            let doc = TeaLeaf::from_json(r#"{"s": "hello world", "u": "日本語", "e": ""}"#).unwrap();
2242            assert_eq!(doc.get("s").unwrap().as_str(), Some("hello world"));
2243            assert_eq!(doc.get("u").unwrap().as_str(), Some("日本語"));
2244            assert_eq!(doc.get("e").unwrap().as_str(), Some(""));
2245        }
2246
2247        #[test]
2248        fn contract_array_roundtrip() {
2249            let doc = TeaLeaf::from_json(r#"{"arr": [1, "two", true, null]}"#).unwrap();
2250            let arr = doc.get("arr").unwrap().as_array().unwrap();
2251            assert_eq!(arr.len(), 4);
2252            assert_eq!(arr[0].as_int(), Some(1));
2253            assert_eq!(arr[1].as_str(), Some("two"));
2254            assert_eq!(arr[2].as_bool(), Some(true));
2255            assert!(matches!(arr[3], Value::Null));
2256        }
2257
2258        #[test]
2259        fn contract_nested_array_roundtrip() {
2260            let doc = TeaLeaf::from_json(r#"{"matrix": [[1, 2], [3, 4]]}"#).unwrap();
2261            let matrix = doc.get("matrix").unwrap().as_array().unwrap();
2262            assert_eq!(matrix.len(), 2);
2263            let row0 = matrix[0].as_array().unwrap();
2264            assert_eq!(row0[0].as_int(), Some(1));
2265            assert_eq!(row0[1].as_int(), Some(2));
2266        }
2267
2268        #[test]
2269        fn contract_object_roundtrip() {
2270            let doc = TeaLeaf::from_json(r#"{"user": {"name": "alice", "age": 30}}"#).unwrap();
2271            let user = doc.get("user").unwrap().as_object().unwrap();
2272            assert_eq!(user.get("name").unwrap().as_str(), Some("alice"));
2273            assert_eq!(user.get("age").unwrap().as_int(), Some(30));
2274        }
2275
2276        // --- TeaLeaf→JSON Fixed Representations (STABLE) ---
2277
2278        #[test]
2279        fn contract_bytes_to_json_hex() {
2280            let mut data = IndexMap::new();
2281            data.insert("b".to_string(), Value::Bytes(vec![0xca, 0xfe, 0xba, 0xbe]));
2282            let doc = TeaLeaf { data, schemas: IndexMap::new(), unions: IndexMap::new(), is_root_array: false };
2283
2284            let json = doc.to_json_compact().unwrap();
2285            // CONTRACT: Bytes serialize as lowercase hex with 0x prefix
2286            assert!(json.contains("\"0xcafebabe\""), "Bytes must be 0x-prefixed hex: {}", json);
2287        }
2288
2289        #[test]
2290        fn contract_bytes_empty_to_json() {
2291            let mut data = IndexMap::new();
2292            data.insert("b".to_string(), Value::Bytes(vec![]));
2293            let doc = TeaLeaf { data, schemas: IndexMap::new(), unions: IndexMap::new(), is_root_array: false };
2294
2295            let json = doc.to_json_compact().unwrap();
2296            // CONTRACT: Empty bytes serialize as "0x"
2297            assert!(json.contains("\"0x\""), "Empty bytes must be \"0x\": {}", json);
2298        }
2299
2300        #[test]
2301        fn contract_timestamp_to_json_iso8601() {
2302            let mut data = IndexMap::new();
2303            // 2024-01-15T10:50:00.123Z (verified milliseconds since epoch)
2304            data.insert("ts".to_string(), Value::Timestamp(1705315800123, 0));
2305            let doc = TeaLeaf { data, schemas: IndexMap::new(), unions: IndexMap::new(), is_root_array: false };
2306
2307            let json = doc.to_json_compact().unwrap();
2308            // CONTRACT: Timestamp serializes as ISO 8601 with milliseconds
2309            assert!(json.contains("2024-01-15T10:50:00.123Z"),
2310                "Timestamp must be ISO 8601 with ms: {}", json);
2311        }
2312
2313        #[test]
2314        fn contract_timestamp_epoch_to_json() {
2315            let mut data = IndexMap::new();
2316            data.insert("ts".to_string(), Value::Timestamp(0, 0));
2317            let doc = TeaLeaf { data, schemas: IndexMap::new(), unions: IndexMap::new(), is_root_array: false };
2318
2319            let json = doc.to_json_compact().unwrap();
2320            // CONTRACT: Unix epoch is 1970-01-01T00:00:00Z (no ms for whole seconds)
2321            assert!(json.contains("1970-01-01T00:00:00Z"),
2322                "Epoch must be 1970-01-01T00:00:00Z: {}", json);
2323        }
2324
2325        #[test]
2326        fn contract_ref_to_json() {
2327            let mut data = IndexMap::new();
2328            data.insert("r".to_string(), Value::Ref("target_key".to_string()));
2329            let doc = TeaLeaf { data, schemas: IndexMap::new(), unions: IndexMap::new(), is_root_array: false };
2330
2331            let json = doc.to_json_compact().unwrap();
2332            // CONTRACT: Ref serializes as {"$ref": "name"}
2333            assert!(json.contains("\"$ref\":\"target_key\"") || json.contains("\"$ref\": \"target_key\""),
2334                "Ref must be {{\"$ref\": \"name\"}}: {}", json);
2335        }
2336
2337        #[test]
2338        fn contract_tagged_to_json() {
2339            let mut data = IndexMap::new();
2340            data.insert("t".to_string(), Value::Tagged("ok".to_string(), Box::new(Value::Int(200))));
2341            let doc = TeaLeaf { data, schemas: IndexMap::new(), unions: IndexMap::new(), is_root_array: false };
2342
2343            let json = doc.to_json_compact().unwrap();
2344            // CONTRACT: Tagged serializes with $tag and $value keys
2345            assert!(json.contains("\"$tag\""), "Tagged must have $tag: {}", json);
2346            assert!(json.contains("\"ok\""), "Tag name must be present: {}", json);
2347            assert!(json.contains("\"$value\""), "Tagged must have $value: {}", json);
2348            assert!(json.contains("200"), "Inner value must be present: {}", json);
2349        }
2350
2351        #[test]
2352        fn contract_tagged_null_value_to_json() {
2353            let mut data = IndexMap::new();
2354            data.insert("t".to_string(), Value::Tagged("none".to_string(), Box::new(Value::Null)));
2355            let doc = TeaLeaf { data, schemas: IndexMap::new(), unions: IndexMap::new(), is_root_array: false };
2356
2357            let json = doc.to_json_compact().unwrap();
2358            // CONTRACT: Tagged with null inner still has $value: null
2359            assert!(json.contains("\"$value\":null") || json.contains("\"$value\": null"),
2360                "Tagged with null must have $value:null: {}", json);
2361        }
2362
2363        #[test]
2364        fn contract_map_to_json_pairs() {
2365            let mut data = IndexMap::new();
2366            data.insert("m".to_string(), Value::Map(vec![
2367                (Value::Int(1), Value::String("one".to_string())),
2368                (Value::Int(2), Value::String("two".to_string())),
2369            ]));
2370            let doc = TeaLeaf { data, schemas: IndexMap::new(), unions: IndexMap::new(), is_root_array: false };
2371
2372            let json = doc.to_json_compact().unwrap();
2373            // CONTRACT: Map serializes as array of [key, value] pairs
2374            assert!(json.contains("[[1,\"one\"],[2,\"two\"]]") ||
2375                    json.contains("[[1, \"one\"], [2, \"two\"]]"),
2376                "Map must be [[k,v],...]: {}", json);
2377        }
2378
2379        #[test]
2380        fn contract_map_empty_to_json() {
2381            let mut data = IndexMap::new();
2382            data.insert("m".to_string(), Value::Map(vec![]));
2383            let doc = TeaLeaf { data, schemas: IndexMap::new(), unions: IndexMap::new(), is_root_array: false };
2384
2385            let json = doc.to_json_compact().unwrap();
2386            // CONTRACT: Empty map serializes as empty array
2387            assert!(json.contains("\"m\":[]") || json.contains("\"m\": []"),
2388                "Empty map must be []: {}", json);
2389        }
2390
2391        // --- JSON→TeaLeaf No Magic (STABLE) ---
2392
2393        #[test]
2394        fn contract_json_dollar_ref_stays_object() {
2395            let doc = TeaLeaf::from_json(r#"{"x": {"$ref": "some_key"}}"#).unwrap();
2396            let x = doc.get("x").unwrap();
2397            // CONTRACT: JSON {"$ref": ...} MUST remain Object, NOT become Ref
2398            assert!(x.as_object().is_some(), "$ref in JSON must stay Object, not become Ref");
2399            assert!(x.as_ref_name().is_none(), "$ref must not auto-convert to Ref type");
2400        }
2401
2402        #[test]
2403        fn contract_json_dollar_tag_stays_object() {
2404            let doc = TeaLeaf::from_json(r#"{"x": {"$tag": "ok", "$value": 200}}"#).unwrap();
2405            let x = doc.get("x").unwrap();
2406            // CONTRACT: JSON {"$tag": ..., "$value": ...} MUST remain Object
2407            assert!(x.as_object().is_some(), "$tag in JSON must stay Object, not become Tagged");
2408            assert!(x.as_tagged().is_none(), "$tag must not auto-convert to Tagged type");
2409        }
2410
2411        #[test]
2412        fn contract_json_hex_string_stays_string() {
2413            let doc = TeaLeaf::from_json(r#"{"x": "0xcafef00d"}"#).unwrap();
2414            let x = doc.get("x").unwrap();
2415            // CONTRACT: Hex strings MUST remain String, NOT become Bytes
2416            assert_eq!(x.as_str(), Some("0xcafef00d"));
2417            assert!(x.as_bytes().is_none(), "Hex string must not auto-convert to Bytes");
2418        }
2419
2420        #[test]
2421        fn contract_json_iso_timestamp_stays_string() {
2422            let doc = TeaLeaf::from_json(r#"{"x": "2024-01-15T10:30:00.000Z"}"#).unwrap();
2423            let x = doc.get("x").unwrap();
2424            // CONTRACT: ISO 8601 strings MUST remain String, NOT become Timestamp
2425            assert_eq!(x.as_str(), Some("2024-01-15T10:30:00.000Z"));
2426            assert!(x.as_timestamp_millis().is_none(), "ISO string must not auto-convert to Timestamp");
2427        }
2428
2429        #[test]
2430        fn contract_json_array_pairs_stays_array() {
2431            let doc = TeaLeaf::from_json(r#"{"x": [[1, "one"], [2, "two"]]}"#).unwrap();
2432            let x = doc.get("x").unwrap();
2433            // CONTRACT: Array of pairs MUST remain Array, NOT become Map
2434            assert!(x.as_array().is_some(), "Array of pairs must stay Array, not become Map");
2435            assert!(x.as_map().is_none(), "Array pairs must not auto-convert to Map");
2436        }
2437
2438        // --- Number Type Inference (STABLE) ---
2439
2440        #[test]
2441        fn contract_number_integer_to_int() {
2442            let doc = TeaLeaf::from_json(r#"{"n": 42}"#).unwrap();
2443            // CONTRACT: Integers that fit i64 become Int
2444            assert!(doc.get("n").unwrap().as_int().is_some());
2445        }
2446
2447        #[test]
2448        fn contract_number_large_to_uint() {
2449            // Max u64 = 18446744073709551615, which doesn't fit i64
2450            let doc = TeaLeaf::from_json(r#"{"n": 18446744073709551615}"#).unwrap();
2451            // CONTRACT: Large positive integers that fit u64 become UInt
2452            assert!(doc.get("n").unwrap().as_uint().is_some());
2453        }
2454
2455        #[test]
2456        fn contract_number_decimal_to_float() {
2457            let doc = TeaLeaf::from_json(r#"{"n": 3.14}"#).unwrap();
2458            // CONTRACT: Numbers with decimals become Float
2459            assert!(doc.get("n").unwrap().as_float().is_some());
2460        }
2461
2462        // --- Edge Cases (STABLE) ---
2463
2464        #[test]
2465        fn contract_float_nan_to_null() {
2466            let mut data = IndexMap::new();
2467            data.insert("f".to_string(), Value::Float(f64::NAN));
2468            let doc = TeaLeaf { data, schemas: IndexMap::new(), unions: IndexMap::new(), is_root_array: false };
2469
2470            let json = doc.to_json_compact().unwrap();
2471            // CONTRACT: NaN serializes as null (JSON has no NaN)
2472            assert!(json.contains("null"), "NaN must serialize as null: {}", json);
2473        }
2474
2475        #[test]
2476        fn contract_float_infinity_to_null() {
2477            let mut data = IndexMap::new();
2478            data.insert("f".to_string(), Value::Float(f64::INFINITY));
2479            let doc = TeaLeaf { data, schemas: IndexMap::new(), unions: IndexMap::new(), is_root_array: false };
2480
2481            let json = doc.to_json_compact().unwrap();
2482            // CONTRACT: Infinity serializes as null (JSON has no Infinity)
2483            assert!(json.contains("null"), "Infinity must serialize as null: {}", json);
2484        }
2485
2486        #[test]
2487        fn contract_deep_nesting_preserved() {
2488            let doc = TeaLeaf::from_json(r#"{"a":{"b":{"c":{"d":{"e":5}}}}}"#).unwrap();
2489            let a = doc.get("a").unwrap().as_object().unwrap();
2490            let b = a.get("b").unwrap().as_object().unwrap();
2491            let c = b.get("c").unwrap().as_object().unwrap();
2492            let d = c.get("d").unwrap().as_object().unwrap();
2493            assert_eq!(d.get("e").unwrap().as_int(), Some(5));
2494        }
2495    }
2496
2497    // =========================================================================
2498    // Schema Inference Tests
2499    // =========================================================================
2500
2501    #[test]
2502    fn test_schema_inference_simple_array() {
2503        let json = r#"{"users": [{"name": "alice", "age": 30}, {"name": "bob", "age": 25}]}"#;
2504        let doc = TeaLeaf::from_json_with_schemas(json).unwrap();
2505
2506        // Should have inferred a "user" schema
2507        let schema = doc.schema("user");
2508        assert!(schema.is_some(), "Should infer 'user' schema from 'users' array");
2509
2510        let schema = schema.unwrap();
2511        assert_eq!(schema.fields.len(), 2);
2512
2513        // Fields should preserve insertion order from JSON
2514        assert_eq!(schema.fields[0].name, "name");
2515        assert_eq!(schema.fields[1].name, "age");
2516
2517        // Data should still be accessible
2518        let users = doc.get("users").unwrap().as_array().unwrap();
2519        assert_eq!(users.len(), 2);
2520        assert_eq!(users[0].as_object().unwrap().get("name").unwrap().as_str(), Some("alice"));
2521    }
2522
2523    #[test]
2524    fn test_schema_inference_nested_arrays() {
2525        let json = r#"{
2526            "orders": [
2527                {"id": 1, "items": [{"sku": "A", "qty": 2}, {"sku": "B", "qty": 1}]},
2528                {"id": 2, "items": [{"sku": "C", "qty": 3}]}
2529            ]
2530        }"#;
2531        let doc = TeaLeaf::from_json_with_schemas(json).unwrap();
2532
2533        // Should infer both "order" and "item" schemas
2534        assert!(doc.schema("order").is_some(), "Should infer 'order' schema");
2535        assert!(doc.schema("item").is_some(), "Should infer 'item' schema");
2536
2537        let order_schema = doc.schema("order").unwrap();
2538        // Order should have "id" and "items" fields
2539        assert!(order_schema.fields.iter().any(|f| f.name == "id"));
2540        assert!(order_schema.fields.iter().any(|f| f.name == "items"));
2541
2542        // The "items" field should reference the "item" schema
2543        let items_field = order_schema.fields.iter().find(|f| f.name == "items").unwrap();
2544        assert!(items_field.field_type.is_array);
2545        assert_eq!(items_field.field_type.base, "item");
2546    }
2547
2548    #[test]
2549    fn test_schema_inference_to_tl_text() {
2550        let json = r#"{"products": [{"name": "Widget", "price": 9.99}, {"name": "Gadget", "price": 19.99}]}"#;
2551        let doc = TeaLeaf::from_json_with_schemas(json).unwrap();
2552
2553        let tl_text = doc.to_tl_with_schemas();
2554
2555        // Should contain struct definition
2556        assert!(tl_text.contains("@struct product"), "TeaLeaf text should contain struct definition");
2557        assert!(tl_text.contains("name: string"), "Struct should have name field");
2558        assert!(tl_text.contains("price: float"), "Struct should have price field");
2559
2560        // Should contain @table directive
2561        assert!(tl_text.contains("@table product"), "TeaLeaf text should use @table for data");
2562
2563        // Should contain tuple format data
2564        assert!(tl_text.contains("Widget") || tl_text.contains("\"Widget\""), "Data should be present");
2565    }
2566
2567    #[test]
2568    fn test_schema_inference_roundtrip() {
2569        let json = r#"{"items": [{"id": 1, "name": "A"}, {"id": 2, "name": "B"}]}"#;
2570        let doc = TeaLeaf::from_json_with_schemas(json).unwrap();
2571
2572        // Convert to TeaLeaf text with schemas
2573        let tl_text = doc.to_tl_with_schemas();
2574
2575        // Parse the TeaLeaf text back
2576        let parsed = TeaLeaf::parse(&tl_text).unwrap();
2577
2578        // Should have the same data
2579        let items = parsed.get("items").unwrap().as_array().unwrap();
2580        assert_eq!(items.len(), 2);
2581        assert_eq!(items[0].as_object().unwrap().get("id").unwrap().as_int(), Some(1));
2582        assert_eq!(items[0].as_object().unwrap().get("name").unwrap().as_str(), Some("A"));
2583
2584        // Should have the schema
2585        assert!(parsed.schema("item").is_some());
2586    }
2587
2588    #[test]
2589    fn test_schema_inference_nullable_fields() {
2590        let json = r#"{"users": [{"name": "alice", "email": "a@test.com"}, {"name": "bob", "email": null}]}"#;
2591        let doc = TeaLeaf::from_json_with_schemas(json).unwrap();
2592
2593        let schema = doc.schema("user").unwrap();
2594        let email_field = schema.fields.iter().find(|f| f.name == "email").unwrap();
2595
2596        // Email should be nullable since one value is null
2597        assert!(email_field.field_type.nullable, "Field with null values should be nullable");
2598    }
2599
2600    #[test]
2601    fn test_schema_inference_nested_tuples_no_redundant_table() {
2602        let json = r#"{
2603            "orders": [
2604                {"id": 1, "items": [{"sku": "A", "qty": 2}]}
2605            ]
2606        }"#;
2607        let doc = TeaLeaf::from_json_with_schemas(json).unwrap();
2608        let tl_text = doc.to_tl_with_schemas();
2609
2610        // Count occurrences of @table - should only appear at top level for each schema-typed array
2611        let _table_count = tl_text.matches("@table").count();
2612
2613        // Should have @table for orders, but NOT redundant @table for nested items
2614        // The nested items array should just be [...] with tuples inside
2615        assert!(tl_text.contains("@table order"), "Should have @table for orders");
2616
2617        // Parse and verify the structure is correct
2618        let parsed = TeaLeaf::parse(&tl_text).unwrap();
2619        let orders = parsed.get("orders").unwrap().as_array().unwrap();
2620        let first_order = orders[0].as_object().unwrap();
2621        let items = first_order.get("items").unwrap().as_array().unwrap();
2622        assert_eq!(items[0].as_object().unwrap().get("sku").unwrap().as_str(), Some("A"));
2623    }
2624
2625    #[test]
2626    fn test_schema_inference_mismatched_arrays_not_matched() {
2627        // Test that arrays with different structures don't incorrectly share schemas
2628        let json = r#"{
2629            "users": [{"id": "U1", "name": "Alice"}],
2630            "products": [{"id": "P1", "price": 9.99}]
2631        }"#;
2632        let doc = TeaLeaf::from_json_with_schemas(json).unwrap();
2633
2634        // Should have separate schemas
2635        assert!(doc.schema("user").is_some());
2636        assert!(doc.schema("product").is_some());
2637
2638        // User schema should have name field
2639        let user_schema = doc.schema("user").unwrap();
2640        assert!(user_schema.fields.iter().any(|f| f.name == "name"));
2641
2642        // Product schema should have price field
2643        let product_schema = doc.schema("product").unwrap();
2644        assert!(product_schema.fields.iter().any(|f| f.name == "price"));
2645    }
2646
2647    #[test]
2648    fn test_schema_inference_special_char_quoting() {
2649        // Test that strings with special characters are properly quoted
2650        let json = r#"{"items": [
2651            {"category": "Electronics/Audio", "email": "test@example.com", "path": "a.b.c"}
2652        ]}"#;
2653        let doc = TeaLeaf::from_json_with_schemas(json).unwrap();
2654        let tl_text = doc.to_tl_with_schemas();
2655
2656        // These should be quoted in output since they contain special characters
2657        assert!(tl_text.contains("\"Electronics/Audio\""), "Slash should be quoted: {}", tl_text);
2658        assert!(tl_text.contains("\"test@example.com\""), "@ should be quoted: {}", tl_text);
2659        // Dots are valid in names per spec grammar, so a.b.c should NOT be quoted
2660        assert!(!tl_text.contains("\"a.b.c\""), "Dots should NOT be quoted per spec grammar: {}", tl_text);
2661
2662        // Should parse back correctly
2663        let parsed = TeaLeaf::parse(&tl_text).unwrap();
2664        let items = parsed.get("items").unwrap().as_array().unwrap();
2665        let item = items[0].as_object().unwrap();
2666        assert_eq!(item.get("category").unwrap().as_str(), Some("Electronics/Audio"));
2667        assert_eq!(item.get("email").unwrap().as_str(), Some("test@example.com"));
2668    }
2669
2670    #[test]
2671    fn test_schema_inference_nested_objects() {
2672        // Test that nested objects within array elements get schemas created
2673        let json = r#"{
2674            "customers": [
2675                {
2676                    "id": 1,
2677                    "name": "Alice",
2678                    "billing_address": {
2679                        "street": "123 Main St",
2680                        "city": "Boston",
2681                        "state": "MA",
2682                        "postal_code": "02101",
2683                        "country": "USA"
2684                    },
2685                    "shipping_address": {
2686                        "street": "456 Oak Ave",
2687                        "city": "Cambridge",
2688                        "state": "MA",
2689                        "postal_code": "02139",
2690                        "country": "USA"
2691                    }
2692                },
2693                {
2694                    "id": 2,
2695                    "name": "Bob",
2696                    "billing_address": {
2697                        "street": "789 Elm St",
2698                        "city": "New York",
2699                        "state": "NY",
2700                        "postal_code": "10001",
2701                        "country": "USA"
2702                    },
2703                    "shipping_address": {
2704                        "street": "789 Elm St",
2705                        "city": "New York",
2706                        "state": "NY",
2707                        "postal_code": "10001",
2708                        "country": "USA"
2709                    }
2710                }
2711            ]
2712        }"#;
2713
2714        let doc = TeaLeaf::from_json_with_schemas(json).unwrap();
2715
2716        // Should have schemas for nested objects
2717        assert!(doc.schema("billing_address").is_some(), "Should create billing_address schema");
2718        assert!(doc.schema("shipping_address").is_some(), "Should create shipping_address schema");
2719        assert!(doc.schema("customer").is_some(), "Should create customer schema");
2720
2721        // Check billing_address schema fields
2722        let billing_schema = doc.schema("billing_address").unwrap();
2723        let billing_fields: Vec<&str> = billing_schema.fields.iter().map(|f| f.name.as_str()).collect();
2724        assert!(billing_fields.contains(&"street"), "billing_address should have street field");
2725        assert!(billing_fields.contains(&"city"), "billing_address should have city field");
2726        assert!(billing_fields.contains(&"state"), "billing_address should have state field");
2727        assert!(billing_fields.contains(&"postal_code"), "billing_address should have postal_code field");
2728        assert!(billing_fields.contains(&"country"), "billing_address should have country field");
2729
2730        // Check customer schema references the nested schemas
2731        let customer_schema = doc.schema("customer").unwrap();
2732        let billing_field = customer_schema.fields.iter().find(|f| f.name == "billing_address").unwrap();
2733        assert_eq!(billing_field.field_type.base, "billing_address", "customer.billing_address should reference billing_address schema");
2734
2735        let shipping_field = customer_schema.fields.iter().find(|f| f.name == "shipping_address").unwrap();
2736        assert_eq!(shipping_field.field_type.base, "shipping_address", "customer.shipping_address should reference shipping_address schema");
2737
2738        // Serialize and verify output
2739        let tl_text = doc.to_tl_with_schemas();
2740        assert!(tl_text.contains("@struct billing_address"), "Output should contain billing_address struct");
2741        assert!(tl_text.contains("@struct shipping_address"), "Output should contain shipping_address struct");
2742        assert!(tl_text.contains("billing_address: billing_address"), "customer should have billing_address field with billing_address type");
2743        assert!(tl_text.contains("shipping_address: shipping_address"), "customer should have shipping_address field with shipping_address type");
2744    }
2745
2746    #[test]
2747    fn test_schema_inference_nested_objects_with_nulls() {
2748        // Test that nested objects handle nullable fields correctly
2749        let json = r#"{
2750            "orders": [
2751                {
2752                    "id": 1,
2753                    "customer": {
2754                        "name": "Alice",
2755                        "phone": "555-1234"
2756                    }
2757                },
2758                {
2759                    "id": 2,
2760                    "customer": {
2761                        "name": "Bob",
2762                        "phone": null
2763                    }
2764                }
2765            ]
2766        }"#;
2767
2768        let doc = TeaLeaf::from_json_with_schemas(json).unwrap();
2769
2770        // Customer schema should exist with nullable phone
2771        let customer_schema = doc.schema("customer").unwrap();
2772        let phone_field = customer_schema.fields.iter().find(|f| f.name == "phone").unwrap();
2773        assert!(phone_field.field_type.nullable, "phone field should be nullable");
2774    }
2775
2776    // =========================================================================
2777    // Coverage: dumps(), write_value(), escape_string(), format_float()
2778    // =========================================================================
2779
2780    #[test]
2781    fn test_dumps_all_value_types() {
2782        let mut data = IndexMap::new();
2783        data.insert("null_val".to_string(), Value::Null);
2784        data.insert("bool_val".to_string(), Value::Bool(true));
2785        data.insert("int_val".to_string(), Value::Int(42));
2786        data.insert("uint_val".to_string(), Value::UInt(999));
2787        data.insert("float_val".to_string(), Value::Float(3.14));
2788        data.insert("str_val".to_string(), Value::String("hello".to_string()));
2789        data.insert("bytes_val".to_string(), Value::Bytes(vec![0xca, 0xfe]));
2790        data.insert("arr_val".to_string(), Value::Array(vec![Value::Int(1), Value::Int(2)]));
2791        data.insert("obj_val".to_string(), Value::Object(
2792            vec![("x".to_string(), Value::Int(1))].into_iter().collect()
2793        ));
2794        data.insert("map_val".to_string(), Value::Map(vec![
2795            (Value::Int(1), Value::String("one".to_string())),
2796        ]));
2797        data.insert("ref_val".to_string(), Value::Ref("target".to_string()));
2798        data.insert("tag_val".to_string(), Value::Tagged("ok".to_string(), Box::new(Value::Int(200))));
2799        data.insert("ts_val".to_string(), Value::Timestamp(0, 0));
2800        data.insert("ts_millis".to_string(), Value::Timestamp(1705315800123, 0));
2801
2802        let output = dumps(&data);
2803
2804        assert!(output.contains("~"), "Should contain null");
2805        assert!(output.contains("true"), "Should contain bool");
2806        assert!(output.contains("42"), "Should contain int");
2807        assert!(output.contains("999"), "Should contain uint");
2808        assert!(output.contains("3.14"), "Should contain float");
2809        assert!(output.contains("hello"), "Should contain string");
2810        assert!(output.contains("b\"cafe\""), "Should contain bytes literal");
2811        assert!(output.contains("[1, 2]"), "Should contain array");
2812        assert!(output.contains("@map {"), "Should contain map");
2813        assert!(output.contains("!target"), "Should contain ref");
2814        assert!(output.contains(":ok 200"), "Should contain tagged");
2815        assert!(output.contains("1970-01-01T00:00:00Z"), "Should contain epoch timestamp");
2816        assert!(output.contains(".123Z"), "Should contain millis timestamp");
2817    }
2818
2819    #[test]
2820    fn test_bytes_literal_text_roundtrip() {
2821        // dumps() emits b"..." → parse() reads it back as Value::Bytes
2822        let mut data = IndexMap::new();
2823        data.insert("payload".to_string(), Value::Bytes(vec![0xca, 0xfe, 0xf0, 0x0d]));
2824        data.insert("empty".to_string(), Value::Bytes(vec![]));
2825
2826        let text = dumps(&data);
2827        assert!(text.contains(r#"b"cafef00d""#), "Should emit b\"...\" literal: {}", text);
2828        assert!(text.contains(r#"b"""#), "Should emit empty bytes literal: {}", text);
2829
2830        // Parse the text back
2831        let doc = TeaLeaf::parse(&text).unwrap();
2832        assert_eq!(doc.data.get("payload").unwrap().as_bytes(), Some(&[0xca, 0xfe, 0xf0, 0x0d][..]));
2833        assert_eq!(doc.data.get("empty").unwrap().as_bytes(), Some(&[][..]));
2834    }
2835
2836    #[test]
2837    fn test_dumps_string_quoting() {
2838        let mut data = IndexMap::new();
2839        data.insert("quoted".to_string(), Value::String("hello world".to_string()));
2840        data.insert("unquoted".to_string(), Value::String("hello".to_string()));
2841        data.insert("reserved_true".to_string(), Value::String("true".to_string()));
2842        data.insert("reserved_null".to_string(), Value::String("null".to_string()));
2843        data.insert("reserved_tilde".to_string(), Value::String("~".to_string()));
2844        data.insert("empty".to_string(), Value::String("".to_string()));
2845        data.insert("at_start".to_string(), Value::String("@directive".to_string()));
2846        data.insert("hash_start".to_string(), Value::String("#comment".to_string()));
2847        data.insert("bang_start".to_string(), Value::String("!ref".to_string()));
2848        data.insert("hex_start".to_string(), Value::String("0xabc".to_string()));
2849        data.insert("number_like".to_string(), Value::String("42abc".to_string()));
2850        data.insert("negative_like".to_string(), Value::String("-5".to_string()));
2851        data.insert("slash".to_string(), Value::String("a/b".to_string()));
2852        data.insert("dot".to_string(), Value::String("a.b".to_string()));
2853
2854        let output = dumps(&data);
2855
2856        // Quoted values should be wrapped in double quotes
2857        assert!(output.contains("\"hello world\""), "Spaces need quoting");
2858        assert!(output.contains("\"true\""), "Reserved word true needs quoting");
2859        assert!(output.contains("\"null\""), "Reserved word null needs quoting");
2860        assert!(output.contains("\"~\""), "Tilde needs quoting");
2861        assert!(output.contains("\"\""), "Empty string needs quoting");
2862        assert!(output.contains("\"@directive\""), "@ prefix needs quoting");
2863        assert!(output.contains("\"#comment\""), "# prefix needs quoting");
2864        assert!(output.contains("\"!ref\""), "! prefix needs quoting");
2865        assert!(output.contains("\"0xabc\""), "0x prefix needs quoting");
2866        assert!(output.contains("\"42abc\""), "Digit start needs quoting");
2867        assert!(output.contains("\"-5\""), "Negative number needs quoting");
2868        assert!(output.contains("\"a/b\""), "Slash needs quoting");
2869        assert!(!output.contains("\"a.b\""), "Dot should NOT need quoting per spec grammar");
2870    }
2871
2872    #[test]
2873    fn test_escape_string_control_chars() {
2874        let result = escape_string("tab\there\nnewline\rreturn");
2875        assert!(result.contains("\\t"), "Tab should be escaped");
2876        assert!(result.contains("\\n"), "Newline should be escaped");
2877        assert!(result.contains("\\r"), "CR should be escaped");
2878
2879        let result = escape_string("\x08backspace\x0cformfeed");
2880        assert!(result.contains("\\b"), "Backspace should be escaped");
2881        assert!(result.contains("\\f"), "Formfeed should be escaped");
2882
2883        let result = escape_string("quote\"and\\backslash");
2884        assert!(result.contains("\\\""), "Quote should be escaped");
2885        assert!(result.contains("\\\\"), "Backslash should be escaped");
2886
2887        // Other control characters use \uXXXX
2888        let result = escape_string("\x01");
2889        assert!(result.contains("\\u0001"), "Control char should use \\uXXXX");
2890    }
2891
2892    #[test]
2893    fn test_format_float_both_branches() {
2894        // Whole number float: Rust's to_string() drops .0, so format_float adds it back
2895        assert_eq!(format_float(42.0), "42.0");
2896
2897        // Float with decimals should stay as-is
2898        assert_eq!(format_float(3.14), "3.14");
2899
2900        // Scientific notation stays as-is
2901        let very_small = format_float(1e-20);
2902        assert!(very_small.contains('e') || very_small.contains('.'));
2903    }
2904
2905    #[test]
2906    fn test_needs_quoting_various_patterns() {
2907        // Should need quoting
2908        assert!(needs_quoting(""), "Empty string");
2909        assert!(needs_quoting("hello world"), "Whitespace");
2910        assert!(needs_quoting("a,b"), "Comma");
2911        assert!(needs_quoting("(x)"), "Parens");
2912        assert!(needs_quoting("[x]"), "Brackets");
2913        assert!(needs_quoting("{x}"), "Braces");
2914        assert!(needs_quoting("a:b"), "Colon");
2915        assert!(needs_quoting("@x"), "At sign");
2916        assert!(needs_quoting("a/b"), "Slash");
2917        assert!(!needs_quoting("a.b"), "Dot is valid in names per spec grammar");
2918        assert!(needs_quoting("true"), "Reserved true");
2919        assert!(needs_quoting("false"), "Reserved false");
2920        assert!(needs_quoting("null"), "Reserved null");
2921        assert!(needs_quoting("~"), "Reserved tilde");
2922        assert!(needs_quoting("!bang"), "Bang prefix");
2923        assert!(needs_quoting("#hash"), "Hash prefix");
2924        assert!(needs_quoting("0xdead"), "Hex prefix");
2925        assert!(needs_quoting("0Xdead"), "Hex prefix uppercase");
2926        assert!(needs_quoting("42abc"), "Starts with digit");
2927        assert!(needs_quoting("-5"), "Starts with minus+digit");
2928        assert!(needs_quoting("+5"), "Starts with plus+digit");
2929
2930        // Should NOT need quoting
2931        assert!(!needs_quoting("hello"), "Simple word");
2932        assert!(!needs_quoting("foo_bar"), "Underscore word");
2933        assert!(!needs_quoting("abc123"), "Alpha then digits");
2934    }
2935
2936    // =========================================================================
2937    // Coverage: singularize()
2938    // =========================================================================
2939
2940    #[test]
2941    fn test_singularize_rules() {
2942        // -ies → -y
2943        assert_eq!(singularize("categories"), "category");
2944        assert_eq!(singularize("entries"), "entry");
2945
2946        // -sses → -ss (special -es rule)
2947        assert_eq!(singularize("classes"), "class");
2948        assert_eq!(singularize("dresses"), "dress");
2949
2950        // -xes → -x
2951        assert_eq!(singularize("boxes"), "box");
2952        assert_eq!(singularize("indexes"), "index");
2953
2954        // -ches → -ch
2955        assert_eq!(singularize("watches"), "watch");
2956
2957        // -shes → -sh
2958        assert_eq!(singularize("dishes"), "dish");
2959
2960        // Regular -s
2961        assert_eq!(singularize("users"), "user");
2962        assert_eq!(singularize("products"), "product");
2963
2964        // Words ending in -ss (should NOT remove s)
2965        assert_eq!(singularize("boss"), "boss");
2966        assert_eq!(singularize("class"), "class");
2967
2968        // Already singular (no trailing s)
2969        assert_eq!(singularize("item"), "item");
2970        assert_eq!(singularize("child"), "child");
2971    }
2972
2973    // =========================================================================
2974    // Coverage: from_json root primitives, loads()
2975    // =========================================================================
2976
2977    #[test]
2978    fn test_from_json_root_primitive() {
2979        // Root-level string
2980        let doc = TeaLeaf::from_json(r#""hello""#).unwrap();
2981        assert_eq!(doc.get("root").unwrap().as_str(), Some("hello"));
2982        assert!(!doc.is_root_array);
2983
2984        // Root-level number
2985        let doc = TeaLeaf::from_json("42").unwrap();
2986        assert_eq!(doc.get("root").unwrap().as_int(), Some(42));
2987
2988        // Root-level bool
2989        let doc = TeaLeaf::from_json("true").unwrap();
2990        assert_eq!(doc.get("root").unwrap().as_bool(), Some(true));
2991
2992        // Root-level null
2993        let doc = TeaLeaf::from_json("null").unwrap();
2994        assert!(doc.get("root").unwrap().is_null());
2995    }
2996
2997    #[test]
2998    fn test_from_json_invalid() {
2999        let result = TeaLeaf::from_json("not valid json {{{");
3000        assert!(result.is_err());
3001    }
3002
3003    #[test]
3004    fn test_loads_convenience() {
3005        let data = loads("name: alice\nage: 30").unwrap();
3006        assert_eq!(data.get("name").unwrap().as_str(), Some("alice"));
3007        assert_eq!(data.get("age").unwrap().as_int(), Some(30));
3008    }
3009
3010    // =========================================================================
3011    // Coverage: InferredType::merge() branches
3012    // =========================================================================
3013
3014    #[test]
3015    fn test_inferred_type_merge_int_float() {
3016        let t = infer_type(&Value::Int(42));
3017        let f = infer_type(&Value::Float(3.14));
3018        let merged = t.merge(&f);
3019        assert_eq!(merged, InferredType::Float);
3020
3021        // Reverse
3022        let merged = f.merge(&t);
3023        assert_eq!(merged, InferredType::Float);
3024    }
3025
3026    #[test]
3027    fn test_inferred_type_merge_null_with_type() {
3028        let n = InferredType::Null;
3029        let s = InferredType::String;
3030        let merged = n.merge(&s);
3031        assert_eq!(merged, InferredType::String);
3032
3033        // Reverse
3034        let merged = s.merge(&n);
3035        assert_eq!(merged, InferredType::String);
3036    }
3037
3038    #[test]
3039    fn test_inferred_type_merge_arrays() {
3040        let a1 = InferredType::Array(Box::new(InferredType::Int));
3041        let a2 = InferredType::Array(Box::new(InferredType::Float));
3042        let merged = a1.merge(&a2);
3043        assert_eq!(merged, InferredType::Array(Box::new(InferredType::Float)));
3044    }
3045
3046    #[test]
3047    fn test_inferred_type_merge_objects_same_fields() {
3048        let o1 = InferredType::Object(vec![
3049            ("a".to_string(), InferredType::Int),
3050            ("b".to_string(), InferredType::String),
3051        ]);
3052        let o2 = InferredType::Object(vec![
3053            ("a".to_string(), InferredType::Float),
3054            ("b".to_string(), InferredType::String),
3055        ]);
3056        let merged = o1.merge(&o2);
3057        if let InferredType::Object(fields) = &merged {
3058            assert_eq!(fields.len(), 2);
3059            assert_eq!(fields[0].1, InferredType::Float); // Int+Float → Float
3060            assert_eq!(fields[1].1, InferredType::String);
3061        } else {
3062            panic!("Expected Object, got {:?}", merged);
3063        }
3064    }
3065
3066    #[test]
3067    fn test_inferred_type_merge_objects_different_fields() {
3068        let o1 = InferredType::Object(vec![
3069            ("a".to_string(), InferredType::Int),
3070        ]);
3071        let o2 = InferredType::Object(vec![
3072            ("b".to_string(), InferredType::String),
3073        ]);
3074        let merged = o1.merge(&o2);
3075        assert_eq!(merged, InferredType::Mixed);
3076    }
3077
3078    #[test]
3079    fn test_inferred_type_merge_incompatible() {
3080        let s = InferredType::String;
3081        let i = InferredType::Int;
3082        let merged = s.merge(&i);
3083        assert_eq!(merged, InferredType::Mixed);
3084    }
3085
3086    #[test]
3087    fn test_inferred_type_to_field_type() {
3088        let schemas = IndexMap::new();
3089
3090        assert_eq!(InferredType::Null.to_field_type(&schemas).base, "string");
3091        assert!(InferredType::Null.to_field_type(&schemas).nullable);
3092        assert_eq!(InferredType::Bool.to_field_type(&schemas).base, "bool");
3093        assert_eq!(InferredType::Int.to_field_type(&schemas).base, "int");
3094        assert_eq!(InferredType::Float.to_field_type(&schemas).base, "float");
3095        assert_eq!(InferredType::String.to_field_type(&schemas).base, "string");
3096        assert_eq!(InferredType::Mixed.to_field_type(&schemas).base, "any");
3097
3098        // Array type
3099        let arr_type = InferredType::Array(Box::new(InferredType::Int));
3100        let ft = arr_type.to_field_type(&schemas);
3101        assert_eq!(ft.base, "int");
3102        assert!(ft.is_array);
3103
3104        // Object with no matching schema → "any" (not "object", which is a value-only type)
3105        let obj_type = InferredType::Object(vec![("x".to_string(), InferredType::Int)]);
3106        assert_eq!(obj_type.to_field_type(&schemas).base, "any");
3107    }
3108
3109    #[test]
3110    fn test_inferred_type_to_field_type_with_matching_schema() {
3111        let mut schemas = IndexMap::new();
3112        let mut schema = Schema::new("point");
3113        schema.add_field("x", FieldType::new("int"));
3114        schema.add_field("y", FieldType::new("int"));
3115        schemas.insert("point".to_string(), schema);
3116
3117        let obj_type = InferredType::Object(vec![
3118            ("x".to_string(), InferredType::Int),
3119            ("y".to_string(), InferredType::Int),
3120        ]);
3121        let ft = obj_type.to_field_type(&schemas);
3122        assert_eq!(ft.base, "point");
3123    }
3124
3125    #[test]
3126    fn test_infer_type_special_values() {
3127        // Bytes, Ref, Tagged, Timestamp, Map all become Mixed
3128        assert_eq!(infer_type(&Value::Bytes(vec![1, 2])), InferredType::Mixed);
3129        assert_eq!(infer_type(&Value::Ref("x".to_string())), InferredType::Mixed);
3130        assert_eq!(infer_type(&Value::Tagged("t".to_string(), Box::new(Value::Null))), InferredType::Mixed);
3131        assert_eq!(infer_type(&Value::Timestamp(0, 0)), InferredType::Mixed);
3132        assert_eq!(infer_type(&Value::Map(vec![])), InferredType::Mixed);
3133
3134        // Empty array
3135        if let InferredType::Array(inner) = infer_type(&Value::Array(vec![])) {
3136            assert_eq!(*inner, InferredType::Mixed);
3137        } else {
3138            panic!("Expected Array");
3139        }
3140
3141        // UInt becomes Int
3142        assert_eq!(infer_type(&Value::UInt(42)), InferredType::Int);
3143    }
3144
3145    #[test]
3146    fn test_json_with_schemas_empty_nested_object_roundtrip() {
3147        // Regression: fuzzer found that [{"n":{}}] crashes because the inferrer
3148        // emits "object" as a field type, which the parser rejects as value-only.
3149        let doc = TeaLeaf::from_json_with_schemas(r#"[{"n":{}}]"#).unwrap();
3150        let tl_text = doc.to_tl_with_schemas();
3151        // Must re-parse without error
3152        let reparsed = TeaLeaf::parse(&tl_text).unwrap();
3153        assert_eq!(doc.data.len(), reparsed.data.len());
3154    }
3155
3156    // =========================================================================
3157    // Coverage: to_tl_with_schemas() edge cases
3158    // =========================================================================
3159
3160    #[test]
3161    fn test_to_tl_with_schemas_no_schemas() {
3162        let mut data = IndexMap::new();
3163        data.insert("name".to_string(), Value::String("alice".to_string()));
3164        let doc = TeaLeaf { data, schemas: IndexMap::new(), unions: IndexMap::new(), is_root_array: false };
3165
3166        let output = doc.to_tl_with_schemas();
3167        assert!(output.contains("name: alice"), "Should use dumps() format");
3168        assert!(!output.contains("@struct"), "No schemas");
3169    }
3170
3171    #[test]
3172    fn test_to_tl_with_schemas_root_array() {
3173        let mut data = IndexMap::new();
3174        data.insert("root".to_string(), Value::Array(vec![Value::Int(1), Value::Int(2)]));
3175        let doc = TeaLeaf { data, schemas: IndexMap::new(), unions: IndexMap::new(), is_root_array: true };
3176
3177        let output = doc.to_tl_with_schemas();
3178        assert!(output.starts_with("@root-array"), "Should have root-array directive");
3179    }
3180
3181    // =========================================================================
3182    // Coverage: write_value_with_schemas() for special types
3183    // =========================================================================
3184
3185    #[test]
3186    fn test_dumps_with_schemas_all_types() {
3187        let mut schemas = IndexMap::new();
3188        let mut schema = Schema::new("item");
3189        schema.add_field("id", FieldType::new("int"));
3190        schema.add_field("name", FieldType::new("string"));
3191        schemas.insert("item".to_string(), schema);
3192
3193        let mut data = IndexMap::new();
3194        // Array matching schema → @table
3195        data.insert("items".to_string(), Value::Array(vec![
3196            Value::Object(vec![
3197                ("id".to_string(), Value::Int(1)),
3198                ("name".to_string(), Value::String("Widget".to_string())),
3199            ].into_iter().collect()),
3200        ]));
3201        // Special types
3202        data.insert("ref_val".to_string(), Value::Ref("target".to_string()));
3203        data.insert("tag_val".to_string(), Value::Tagged("ok".to_string(), Box::new(Value::Int(200))));
3204        data.insert("map_val".to_string(), Value::Map(vec![
3205            (Value::Int(1), Value::String("one".to_string())),
3206        ]));
3207        data.insert("bytes_val".to_string(), Value::Bytes(vec![0xde, 0xad]));
3208        data.insert("ts_val".to_string(), Value::Timestamp(0, 0));
3209        data.insert("ts_millis".to_string(), Value::Timestamp(1705315800123, 0));
3210
3211        let schema_order = vec!["item".to_string()];
3212        let output = dumps_with_schemas(&data, &schemas, &schema_order, &IndexMap::new(), &[]);
3213
3214        assert!(output.contains("@struct item"), "Should contain schema def");
3215        assert!(output.contains("@table item"), "Should use @table format");
3216        assert!(output.contains("!target"), "Should contain ref");
3217        assert!(output.contains(":ok 200"), "Should contain tagged");
3218        assert!(output.contains("@map {"), "Should contain map");
3219        assert!(output.contains("b\"dead\""), "Should contain bytes literal");
3220        assert!(output.contains("1970-01-01T00:00:00Z"), "Should contain timestamp");
3221        assert!(output.contains(".123Z"), "Should contain millis timestamp");
3222    }
3223
3224    #[test]
3225    fn test_dumps_with_schemas_object_value() {
3226        let schemas = IndexMap::new();
3227        let mut data = IndexMap::new();
3228        data.insert("config".to_string(), Value::Object(
3229            vec![
3230                ("host".to_string(), Value::String("localhost".to_string())),
3231                ("port".to_string(), Value::Int(8080)),
3232            ].into_iter().collect()
3233        ));
3234
3235        let output = dumps_with_schemas(&data, &schemas, &[], &IndexMap::new(), &[]);
3236        assert!(output.contains("config:"), "Should contain key");
3237        assert!(output.contains("{"), "Should contain object");
3238    }
3239
3240    #[test]
3241    fn test_write_tuple_with_nested_schema() {
3242        // Test tuple writing with nested struct fields
3243        let mut schemas = IndexMap::new();
3244
3245        let mut addr = Schema::new("address");
3246        addr.add_field("city", FieldType::new("string"));
3247        addr.add_field("zip", FieldType::new("string"));
3248        schemas.insert("address".to_string(), addr);
3249
3250        let mut user = Schema::new("user");
3251        user.add_field("name", FieldType::new("string"));
3252        user.add_field("home", FieldType::new("address"));
3253        schemas.insert("user".to_string(), user);
3254
3255        let mut data = IndexMap::new();
3256        data.insert("users".to_string(), Value::Array(vec![
3257            Value::Object(vec![
3258                ("name".to_string(), Value::String("Alice".to_string())),
3259                ("home".to_string(), Value::Object(vec![
3260                    ("city".to_string(), Value::String("Boston".to_string())),
3261                    ("zip".to_string(), Value::String("02101".to_string())),
3262                ].into_iter().collect())),
3263            ].into_iter().collect()),
3264        ]));
3265
3266        let schema_order = vec!["address".to_string(), "user".to_string()];
3267        let output = dumps_with_schemas(&data, &schemas, &schema_order, &IndexMap::new(), &[]);
3268
3269        assert!(output.contains("@struct address"), "Should have address schema");
3270        assert!(output.contains("@struct user"), "Should have user schema");
3271        assert!(output.contains("@table user"), "Should use @table for users");
3272        // Nested tuples
3273        assert!(output.contains("("), "Should have tuple format");
3274    }
3275
3276    #[test]
3277    fn test_write_tuple_with_schema_array_field() {
3278        // Test tuple writing with array fields that have schemas
3279        let mut schemas = IndexMap::new();
3280
3281        let mut tag = Schema::new("tag");
3282        tag.add_field("name", FieldType::new("string"));
3283        schemas.insert("tag".to_string(), tag);
3284
3285        let mut item = Schema::new("item");
3286        item.add_field("id", FieldType::new("int"));
3287        item.add_field("tags", FieldType { base: "tag".to_string(), nullable: false, is_array: true });
3288        schemas.insert("item".to_string(), item);
3289
3290        let mut data = IndexMap::new();
3291        data.insert("items".to_string(), Value::Array(vec![
3292            Value::Object(vec![
3293                ("id".to_string(), Value::Int(1)),
3294                ("tags".to_string(), Value::Array(vec![
3295                    Value::Object(vec![
3296                        ("name".to_string(), Value::String("rust".to_string())),
3297                    ].into_iter().collect()),
3298                ])),
3299            ].into_iter().collect()),
3300        ]));
3301
3302        let schema_order = vec!["tag".to_string(), "item".to_string()];
3303        let output = dumps_with_schemas(&data, &schemas, &schema_order, &IndexMap::new(), &[]);
3304
3305        assert!(output.contains("@table item"), "Should use @table for items");
3306    }
3307
3308    #[test]
3309    fn test_write_schema_array_empty() {
3310        let schemas = IndexMap::new();
3311        let schema = Schema::new("empty");
3312        let mut out = String::new();
3313        write_schema_array(&mut out, &Value::Array(vec![]), &schema, &schemas, 0);
3314        assert_eq!(out, "[]");
3315    }
3316
3317    #[test]
3318    fn test_write_schema_array_non_array_fallback() {
3319        let schemas = IndexMap::new();
3320        let schema = Schema::new("test");
3321        let mut out = String::new();
3322        write_schema_array(&mut out, &Value::Int(42), &schema, &schemas, 0);
3323        assert_eq!(out, "42");
3324    }
3325
3326    #[test]
3327    fn test_write_tuple_missing_field() {
3328        // Test that missing fields in object produce ~
3329        let schemas = IndexMap::new();
3330        let mut schema = Schema::new("test");
3331        schema.add_field("present", FieldType::new("int"));
3332        schema.add_field("missing", FieldType::new("string"));
3333
3334        let value = Value::Object(
3335            vec![("present".to_string(), Value::Int(42))].into_iter().collect()
3336        );
3337
3338        let mut out = String::new();
3339        write_tuple(&mut out, &value, &schema, &schemas, 0);
3340        assert!(out.contains("42"), "Present field should be written");
3341        assert!(out.contains("~"), "Missing field should be ~");
3342    }
3343
3344    #[test]
3345    fn test_write_tuple_non_object() {
3346        // When tuple receives a non-object value
3347        let schemas = IndexMap::new();
3348        let schema = Schema::new("test");
3349
3350        let mut out = String::new();
3351        write_tuple(&mut out, &Value::Int(42), &schema, &schemas, 0);
3352        assert_eq!(out, "42");
3353    }
3354
3355    // =========================================================================
3356    // Coverage: array_matches_schema()
3357    // =========================================================================
3358
3359    #[test]
3360    fn test_array_matches_schema_empty() {
3361        let schema = Schema::new("test");
3362        assert!(!array_matches_schema(&[], &schema));
3363    }
3364
3365    #[test]
3366    fn test_array_matches_schema_non_object() {
3367        let schema = Schema::new("test");
3368        assert!(!array_matches_schema(&[Value::Int(1)], &schema));
3369    }
3370
3371    #[test]
3372    fn test_array_matches_schema_matching() {
3373        let mut schema = Schema::new("user");
3374        schema.add_field("name", FieldType::new("string"));
3375        schema.add_field("age", FieldType::new("int"));
3376
3377        let arr = vec![Value::Object(vec![
3378            ("name".to_string(), Value::String("Alice".to_string())),
3379            ("age".to_string(), Value::Int(30)),
3380        ].into_iter().collect())];
3381
3382        assert!(array_matches_schema(&arr, &schema));
3383    }
3384
3385    // =========================================================================
3386    // Coverage: from_dto, from_dto_array, to_dto, to_dto_vec
3387    // =========================================================================
3388
3389    #[test]
3390    fn test_from_dto_and_back() {
3391        use crate::convert::{FromTeaLeaf, ConvertError};
3392
3393        let doc = TeaLeaf::from_dto("greeting", &"hello".to_string());
3394        assert_eq!(doc.get("greeting").unwrap().as_str(), Some("hello"));
3395
3396        let result: std::result::Result<String, ConvertError> = String::from_tealeaf_value(doc.get("greeting").unwrap());
3397        assert_eq!(result.unwrap(), "hello");
3398    }
3399
3400    #[test]
3401    fn test_from_dto_array() {
3402        let items = vec!["apple".to_string(), "banana".to_string()];
3403        let doc = TeaLeaf::from_dto_array("fruits", &items);
3404        let arr = doc.get("fruits").unwrap().as_array().unwrap();
3405        assert_eq!(arr.len(), 2);
3406        assert_eq!(arr[0].as_str(), Some("apple"));
3407    }
3408
3409    #[test]
3410    fn test_to_dto_missing_key() {
3411        let doc = TeaLeaf::new(IndexMap::new(), IndexMap::new());
3412        let result: Result<String> = doc.to_dto("missing");
3413        assert!(result.is_err());
3414    }
3415
3416    #[test]
3417    fn test_to_dto_vec() {
3418        let mut data = IndexMap::new();
3419        data.insert("items".to_string(), Value::Array(vec![
3420            Value::String("a".to_string()),
3421            Value::String("b".to_string()),
3422        ]));
3423        let doc = TeaLeaf::new(IndexMap::new(), data);
3424        let result: Vec<String> = doc.to_dto_vec("items").unwrap();
3425        assert_eq!(result, vec!["a", "b"]);
3426    }
3427
3428    #[test]
3429    fn test_to_dto_vec_not_array() {
3430        let mut data = IndexMap::new();
3431        data.insert("item".to_string(), Value::String("not_an_array".to_string()));
3432        let doc = TeaLeaf::new(IndexMap::new(), data);
3433        let result: Result<Vec<String>> = doc.to_dto_vec("item");
3434        assert!(result.is_err());
3435    }
3436
3437    #[test]
3438    fn test_to_dto_vec_missing_key() {
3439        let doc = TeaLeaf::new(IndexMap::new(), IndexMap::new());
3440        let result: Result<Vec<String>> = doc.to_dto_vec("missing");
3441        assert!(result.is_err());
3442    }
3443
3444    // =========================================================================
3445    // Coverage: set_root_array, SchemaInferrer edge cases
3446    // =========================================================================
3447
3448    #[test]
3449    fn test_set_root_array() {
3450        let mut doc = TeaLeaf::new(IndexMap::new(), IndexMap::new());
3451        assert!(!doc.is_root_array);
3452        doc.set_root_array(true);
3453        assert!(doc.is_root_array);
3454    }
3455
3456    #[test]
3457    fn test_schema_inferrer_non_uniform_array() {
3458        // Array with different object structures should not create a schema
3459        let json = r#"{"items": [{"a": 1}, {"b": 2}]}"#;
3460        let doc = TeaLeaf::from_json_with_schemas(json).unwrap();
3461        assert!(doc.schema("item").is_none(), "Non-uniform array should not produce schema");
3462    }
3463
3464    #[test]
3465    fn test_schema_inferrer_mixed_types_in_array() {
3466        // Array with non-objects
3467        let json = r#"{"items": [1, 2, 3]}"#;
3468        let doc = TeaLeaf::from_json_with_schemas(json).unwrap();
3469        assert!(doc.schema("item").is_none(), "Non-object array should not produce schema");
3470    }
3471
3472    #[test]
3473    fn test_schema_inferrer_empty_array() {
3474        let json = r#"{"items": []}"#;
3475        let doc = TeaLeaf::from_json_with_schemas(json).unwrap();
3476        assert!(doc.schema("item").is_none(), "Empty array should not produce schema");
3477    }
3478
3479    #[test]
3480    fn test_schema_inferrer_duplicate_schema_name() {
3481        // Two arrays that would produce the same schema name
3482        let json = r#"{
3483            "items": [{"id": 1, "name": "A"}],
3484            "nested": {"items": [{"id": 2, "name": "B"}]}
3485        }"#;
3486        let doc = TeaLeaf::from_json_with_schemas(json).unwrap();
3487        // Should have "item" schema (first one wins)
3488        assert!(doc.schema("item").is_some());
3489    }
3490
3491    #[test]
3492    fn test_schema_inferrer_int_float_merge() {
3493        // Field that has int in one record and float in another
3494        let json = r#"{"values": [{"x": 1}, {"x": 2.5}]}"#;
3495        let doc = TeaLeaf::from_json_with_schemas(json).unwrap();
3496        let schema = doc.schema("value").unwrap();
3497        let x_field = schema.fields.iter().find(|f| f.name == "x").unwrap();
3498        assert_eq!(x_field.field_type.base, "float", "Int+Float merge should produce float");
3499    }
3500
3501    #[test]
3502    fn test_schema_inference_with_root_array() {
3503        let json = r#"[{"id": 1, "name": "Alice"}, {"id": 2, "name": "Bob"}]"#;
3504        let doc = TeaLeaf::from_json_with_schemas(json).unwrap();
3505        // Root array is stored under "root" key - the schema name should be derived from "root"
3506        // The singularize of "root" is "root" (no trailing s)
3507        // Actually, root arrays aren't typically analyzed because the key is "root" and it goes through analyze_value
3508        let root_val = doc.get("root").unwrap().as_array().unwrap();
3509        assert_eq!(root_val.len(), 2);
3510    }
3511
3512    // =========================================================================
3513    // Coverage: dumps_with_schemas with quoting in schemas
3514    // =========================================================================
3515
3516    #[test]
3517    fn test_dumps_with_schemas_string_quoting_in_tuples() {
3518        let mut schemas = IndexMap::new();
3519        let mut schema = Schema::new("item");
3520        schema.add_field("name", FieldType::new("string"));
3521        schemas.insert("item".to_string(), schema);
3522
3523        let mut data = IndexMap::new();
3524        data.insert("items".to_string(), Value::Array(vec![
3525            Value::Object(vec![
3526                ("name".to_string(), Value::String("hello world".to_string())),
3527            ].into_iter().collect()),
3528        ]));
3529
3530        let schema_order = vec!["item".to_string()];
3531        let output = dumps_with_schemas(&data, &schemas, &schema_order, &IndexMap::new(), &[]);
3532        assert!(output.contains("\"hello world\""), "String with space should be quoted in tuple");
3533    }
3534
3535    #[test]
3536    fn test_dumps_with_schemas_array_without_schema() {
3537        // Array that doesn't match any schema
3538        let schemas = IndexMap::new();
3539        let mut data = IndexMap::new();
3540        data.insert("nums".to_string(), Value::Array(vec![Value::Int(1), Value::Int(2)]));
3541
3542        let output = dumps_with_schemas(&data, &schemas, &[], &IndexMap::new(), &[]);
3543        assert!(output.contains("[1, 2]"), "Should use regular array format");
3544    }
3545
3546    // =========================================================================
3547    // Coverage: convenience functions open(), parse(), root array to_json
3548    // =========================================================================
3549
3550    #[test]
3551    fn test_open_convenience_function() {
3552        // Write a binary file first, then open with the convenience function
3553        let dir = std::env::temp_dir();
3554        let path = dir.join("test_open_conv.tlbx");
3555
3556        let mut data = IndexMap::new();
3557        data.insert("x".to_string(), Value::Int(42));
3558        let doc = TeaLeaf::new(IndexMap::new(), data);
3559        doc.compile(&path, false).unwrap();
3560
3561        let reader = super::open(&path).unwrap();
3562        assert_eq!(reader.get("x").unwrap().as_int(), Some(42));
3563        std::fs::remove_file(&path).ok();
3564    }
3565
3566    #[test]
3567    fn test_parse_convenience_function() {
3568        let doc = super::parse("greeting: hello").unwrap();
3569        assert_eq!(doc.get("greeting").unwrap().as_str(), Some("hello"));
3570    }
3571
3572    #[test]
3573    fn test_to_json_root_array() {
3574        let mut data = IndexMap::new();
3575        data.insert("root".to_string(), Value::Array(vec![Value::Int(1), Value::Int(2)]));
3576        let mut doc = TeaLeaf::new(IndexMap::new(), data);
3577        doc.set_root_array(true);
3578
3579        let json = doc.to_json().unwrap();
3580        let parsed: serde_json::Value = serde_json::from_str(&json).unwrap();
3581        assert!(parsed.is_array(), "Root array to_json should output array");
3582        assert_eq!(parsed.as_array().unwrap().len(), 2);
3583    }
3584
3585    #[test]
3586    fn test_to_json_compact_root_array() {
3587        let mut data = IndexMap::new();
3588        data.insert("root".to_string(), Value::Array(vec![Value::Int(1)]));
3589        let mut doc = TeaLeaf::new(IndexMap::new(), data);
3590        doc.set_root_array(true);
3591
3592        let json = doc.to_json_compact().unwrap();
3593        assert_eq!(json, "[1]");
3594    }
3595
3596    #[test]
3597    fn test_infer_type_bool_value() {
3598        let it = infer_type(&Value::Bool(true));
3599        assert!(matches!(it, InferredType::Bool));
3600    }
3601
3602    #[test]
3603    fn test_schema_inference_nested_object_fields() {
3604        // JSON with nested objects inside array items
3605        let json = r#"{"records": [
3606            {"id": 1, "details": {"city": "NYC", "zip": "10001"}},
3607            {"id": 2, "details": {"city": "LA", "zip": "90001"}}
3608        ]}"#;
3609        let doc = TeaLeaf::from_json_with_schemas(json).unwrap();
3610        // Should infer both "record" and "detail" schemas
3611        assert!(doc.schema("record").is_some(), "Should infer record schema");
3612    }
3613
3614    #[test]
3615    fn test_schema_inference_not_all_objects_returns_early() {
3616        // Array where second element is not an object
3617        let json = r#"{"items": [{"a": 1}, "not_an_object"]}"#;
3618        let doc = TeaLeaf::from_json_with_schemas(json).unwrap();
3619        assert!(doc.schema("item").is_none(), "Mixed array should not produce schema");
3620    }
3621
3622    #[test]
3623    fn test_to_tl_with_schemas_with_nested_array_field() {
3624        // Schema with an array-typed field
3625        let mut schemas = IndexMap::new();
3626        let mut schema = Schema::new("user");
3627        schema.add_field("name", FieldType::new("string"));
3628        schema.add_field("tags", FieldType::new("string").array());
3629        schemas.insert("user".to_string(), schema);
3630
3631        let mut data = IndexMap::new();
3632        let mut obj = IndexMap::new();
3633        obj.insert("name".to_string(), Value::String("Alice".into()));
3634        obj.insert("tags".to_string(), Value::Array(vec![
3635            Value::String("admin".into()),
3636            Value::String("active".into()),
3637        ]));
3638        data.insert("users".to_string(), Value::Array(vec![Value::Object(obj)]));
3639
3640        let doc = TeaLeaf::new(schemas, data);
3641        let text = doc.to_tl_with_schemas();
3642        assert!(text.contains("@struct user"), "Should have schema definition");
3643        assert!(text.contains("@table user"), "Should use table format");
3644    }
3645
3646    // =========================================================================
3647    // Issue 6: Improved schema matching
3648    // =========================================================================
3649
3650    #[test]
3651    fn test_schema_matching_nullable_fields_allowed_missing() {
3652        // Schema with nullable field should match objects missing that field
3653        let mut schemas = IndexMap::new();
3654        let mut s = Schema::new("Item");
3655        s.add_field("id", FieldType::new("int"));
3656        s.add_field("label", FieldType::new("string").nullable());
3657        schemas.insert("Item".to_string(), s);
3658
3659        let mut obj1 = IndexMap::new();
3660        obj1.insert("id".to_string(), Value::Int(1));
3661        // label is missing — but it's nullable, so it should still match
3662
3663        let doc = TeaLeaf {
3664            schemas,
3665            unions: IndexMap::new(),
3666            data: {
3667                let mut d = IndexMap::new();
3668                d.insert("items".to_string(), Value::Array(vec![Value::Object(obj1)]));
3669                d
3670            },
3671            is_root_array: false,
3672        };
3673        let result = doc.find_schema_for_value(doc.data.get("items").unwrap(), "items");
3674        assert!(result.is_some(), "Should match schema when nullable field is missing");
3675        assert_eq!(result.unwrap().name, "Item");
3676    }
3677
3678    #[test]
3679    fn test_schema_matching_rejects_extra_keys() {
3680        // Objects with extra keys not in schema should not match
3681        let mut schemas = IndexMap::new();
3682        let mut s = Schema::new("Point");
3683        s.add_field("x", FieldType::new("int"));
3684        s.add_field("y", FieldType::new("int"));
3685        schemas.insert("Point".to_string(), s);
3686
3687        let mut obj = IndexMap::new();
3688        obj.insert("x".to_string(), Value::Int(1));
3689        obj.insert("y".to_string(), Value::Int(2));
3690        obj.insert("z".to_string(), Value::Int(3)); // extra field
3691
3692        let doc = TeaLeaf {
3693            schemas,
3694            unions: IndexMap::new(),
3695            data: {
3696                let mut d = IndexMap::new();
3697                d.insert("points".to_string(), Value::Array(vec![Value::Object(obj)]));
3698                d
3699            },
3700            is_root_array: false,
3701        };
3702        let result = doc.find_schema_for_value(doc.data.get("points").unwrap(), "points");
3703        assert!(result.is_none(), "Should NOT match schema when extra keys are present");
3704    }
3705
3706    #[test]
3707    fn test_schema_matching_empty_array_no_matching_name() {
3708        let mut schemas = IndexMap::new();
3709        let mut s = Schema::new("Anything");
3710        s.add_field("x", FieldType::new("int"));
3711        schemas.insert("Anything".to_string(), s);
3712
3713        let doc = TeaLeaf {
3714            schemas,
3715            unions: IndexMap::new(),
3716            data: {
3717                let mut d = IndexMap::new();
3718                d.insert("empty".to_string(), Value::Array(vec![]));
3719                d
3720            },
3721            is_root_array: false,
3722        };
3723        let result = doc.find_schema_for_value(doc.data.get("empty").unwrap(), "empty");
3724        assert!(result.is_none(), "Empty array should return None when no schema name matches");
3725    }
3726
3727    #[test]
3728    fn test_schema_matching_empty_array_matches_by_name() {
3729        let mut schemas = IndexMap::new();
3730        let mut s = Schema::new("item");
3731        s.add_field("id", FieldType::new("int"));
3732        schemas.insert("item".to_string(), s);
3733
3734        let doc = TeaLeaf {
3735            schemas,
3736            unions: IndexMap::new(),
3737            data: {
3738                let mut d = IndexMap::new();
3739                d.insert("items".to_string(), Value::Array(vec![]));
3740                d
3741            },
3742            is_root_array: false,
3743        };
3744        let result = doc.find_schema_for_value(doc.data.get("items").unwrap(), "items");
3745        assert!(result.is_some(), "Empty array should match schema by singularized key name");
3746        assert_eq!(result.unwrap().name, "item");
3747    }
3748
3749    // =========================================================================
3750    // Issue 12: Negative timestamp formatting
3751    // =========================================================================
3752
3753    #[test]
3754    fn test_negative_timestamp_formatting() {
3755        // 1969-12-31T23:59:59Z = -1000 ms (1 second before epoch)
3756        let formatted = format_timestamp_millis(-1000, 0);
3757        assert_eq!(formatted, "1969-12-31T23:59:59Z");
3758    }
3759
3760    #[test]
3761    fn test_negative_timestamp_with_millis() {
3762        // -500 ms = 1969-12-31T23:59:59.500Z
3763        let formatted = format_timestamp_millis(-500, 0);
3764        assert_eq!(formatted, "1969-12-31T23:59:59.500Z");
3765    }
3766
3767    #[test]
3768    fn test_negative_timestamp_full_day() {
3769        // -86400000 ms = exactly one day before epoch = 1969-12-31T00:00:00Z
3770        let formatted = format_timestamp_millis(-86_400_000, 0);
3771        assert_eq!(formatted, "1969-12-31T00:00:00Z");
3772    }
3773
3774    #[test]
3775    fn test_epoch_timestamp() {
3776        let formatted = format_timestamp_millis(0, 0);
3777        assert_eq!(formatted, "1970-01-01T00:00:00Z");
3778    }
3779
3780    #[test]
3781    fn test_positive_timestamp_with_millis() {
3782        // 1123ms = 1 second + 123ms after epoch
3783        let formatted = format_timestamp_millis(1123, 0);
3784        assert_eq!(formatted, "1970-01-01T00:00:01.123Z");
3785    }
3786
3787    #[test]
3788    fn test_negative_timestamp_json_export() {
3789        let mut data = IndexMap::new();
3790        data.insert("ts".to_string(), Value::Timestamp(-1000, 0));
3791        let doc = TeaLeaf::new(IndexMap::new(), data);
3792        let json = doc.to_json().unwrap();
3793        assert!(json.contains("1969-12-31"), "Negative timestamp should format as pre-epoch date: {}", json);
3794    }
3795
3796    // =========================================================================
3797    // Issue 7: Deterministic serialization (IndexMap preserves insertion order)
3798    // =========================================================================
3799
3800    #[test]
3801    fn test_compile_deterministic_key_order() {
3802        // Two documents with the same data in the same insertion order
3803        // should produce identical binary output
3804        let dir = std::env::temp_dir();
3805        let path1 = dir.join("test_deterministic_1.tlbx");
3806        let path2 = dir.join("test_deterministic_2.tlbx");
3807
3808        let mut data1 = IndexMap::new();
3809        data1.insert("alpha".to_string(), Value::Int(1));
3810        data1.insert("beta".to_string(), Value::Int(2));
3811        data1.insert("gamma".to_string(), Value::Int(3));
3812        let doc1 = TeaLeaf::new(IndexMap::new(), data1);
3813        doc1.compile(&path1, false).unwrap();
3814
3815        let mut data2 = IndexMap::new();
3816        data2.insert("alpha".to_string(), Value::Int(1));
3817        data2.insert("beta".to_string(), Value::Int(2));
3818        data2.insert("gamma".to_string(), Value::Int(3));
3819        let doc2 = TeaLeaf::new(IndexMap::new(), data2);
3820        doc2.compile(&path2, false).unwrap();
3821
3822        let bytes1 = std::fs::read(&path1).unwrap();
3823        let bytes2 = std::fs::read(&path2).unwrap();
3824        assert_eq!(bytes1, bytes2, "Binary output should be identical for same insertion order");
3825
3826        std::fs::remove_file(&path1).ok();
3827        std::fs::remove_file(&path2).ok();
3828    }
3829
3830    #[test]
3831    fn test_dumps_deterministic_key_order() {
3832        // dumps() preserves IndexMap insertion order deterministically
3833        let mut data = IndexMap::new();
3834        data.insert("zebra".to_string(), Value::Int(3));
3835        data.insert("alpha".to_string(), Value::Int(1));
3836        data.insert("middle".to_string(), Value::Int(2));
3837
3838        let output1 = dumps(&data);
3839        let output2 = dumps(&data);
3840        assert_eq!(output1, output2, "dumps() should be deterministic");
3841        // Keys should appear in insertion order (IndexMap preserves insertion order)
3842        let lines: Vec<&str> = output1.trim().lines().collect();
3843        assert!(lines[0].starts_with("zebra:"), "First key should be 'zebra', got: {}", lines[0]);
3844        assert!(lines[1].starts_with("alpha:"), "Second key should be 'alpha', got: {}", lines[1]);
3845        assert!(lines[2].starts_with("middle:"), "Third key should be 'middle', got: {}", lines[2]);
3846    }
3847
3848    // =========================================================================
3849    // Order-preservation integration tests
3850    // =========================================================================
3851
3852    #[test]
3853    fn test_json_parse_preserves_key_order() {
3854        // JSON with intentionally non-alphabetical keys
3855        let json = r#"{"zebra": 1, "apple": 2, "mango": 3, "banana": 4}"#;
3856        let doc = TeaLeaf::from_json(json).unwrap();
3857        let keys: Vec<&String> = doc.data.keys().collect();
3858        assert_eq!(keys, &["zebra", "apple", "mango", "banana"],
3859            "JSON parse should preserve key insertion order");
3860    }
3861
3862    #[test]
3863    fn test_json_roundtrip_preserves_key_order() {
3864        let json = r#"{"zebra": 1, "apple": 2, "mango": 3}"#;
3865        let doc = TeaLeaf::from_json(json).unwrap();
3866        let json_out = doc.to_json().unwrap();
3867        // Parse back and verify order
3868        let parsed: serde_json::Value = serde_json::from_str(&json_out).unwrap();
3869        let keys: Vec<&str> = parsed.as_object().unwrap().keys().map(|s| s.as_str()).collect();
3870        assert_eq!(keys, &["zebra", "apple", "mango"],
3871            "JSON round-trip should preserve key order");
3872    }
3873
3874    #[test]
3875    fn test_tl_text_preserves_section_order() {
3876        let input = "zebra: 1\napple: 2\nmango: 3\n";
3877        let doc = TeaLeaf::parse(input).unwrap();
3878        let keys: Vec<&String> = doc.data.keys().collect();
3879        assert_eq!(keys, &["zebra", "apple", "mango"],
3880            "TL text parse should preserve section order");
3881
3882        // Serialize back and verify order
3883        let output = doc.to_tl_with_schemas();
3884        let lines: Vec<&str> = output.trim().lines().collect();
3885        assert!(lines[0].starts_with("zebra:"), "got: {}", lines[0]);
3886        assert!(lines[1].starts_with("apple:"), "got: {}", lines[1]);
3887        assert!(lines[2].starts_with("mango:"), "got: {}", lines[2]);
3888    }
3889
3890    #[test]
3891    fn test_binary_roundtrip_preserves_section_order() {
3892        let json = r#"{"zebra": 1, "apple": 2, "mango": 3, "banana": 4}"#;
3893        let doc = TeaLeaf::from_json(json).unwrap();
3894
3895        let dir = std::env::temp_dir();
3896        let path = dir.join("test_order_preserve.tlbx");
3897        doc.compile(&path, false).unwrap();
3898
3899        let reader = crate::Reader::open(&path).unwrap();
3900        let doc2 = TeaLeaf::from_reader(&reader).unwrap();
3901        let keys: Vec<&String> = doc2.data.keys().collect();
3902        assert_eq!(keys, &["zebra", "apple", "mango", "banana"],
3903            "Binary round-trip should preserve section order");
3904        std::fs::remove_file(&path).ok();
3905    }
3906
3907    #[test]
3908    fn test_object_field_order_preserved_through_binary() {
3909        let json = r#"{"data": {"z_last": 1, "a_first": 2, "m_middle": 3}}"#;
3910        let doc = TeaLeaf::from_json(json).unwrap();
3911
3912        let dir = std::env::temp_dir();
3913        let path = dir.join("test_obj_order.tlbx");
3914        doc.compile(&path, false).unwrap();
3915
3916        let reader = crate::Reader::open(&path).unwrap();
3917        let val = reader.get("data").unwrap();
3918        let obj = val.as_object().unwrap();
3919        let keys: Vec<&String> = obj.keys().collect();
3920        assert_eq!(keys, &["z_last", "a_first", "m_middle"],
3921            "Object field order should be preserved through binary round-trip");
3922        std::fs::remove_file(&path).ok();
3923    }
3924
3925    #[test]
3926    fn test_nested_object_order_preserved() {
3927        let json = r#"{"outer": {"z": {"c": 3, "a": 1, "b": 2}, "a": {"x": 10, "w": 20}}}"#;
3928        let doc = TeaLeaf::from_json(json).unwrap();
3929        let tl = doc.to_tl_with_schemas();
3930
3931        // Parse back and check nested order
3932        let doc2 = TeaLeaf::parse(&tl).unwrap();
3933        let outer = doc2.get("outer").unwrap().as_object().unwrap();
3934        let outer_keys: Vec<&String> = outer.keys().collect();
3935        assert_eq!(outer_keys, &["z", "a"], "Outer keys order preserved");
3936
3937        let z_obj = outer.get("z").unwrap().as_object().unwrap();
3938        let z_keys: Vec<&String> = z_obj.keys().collect();
3939        assert_eq!(z_keys, &["c", "a", "b"], "Nested object keys order preserved");
3940    }
3941
3942    #[test]
3943    fn test_schema_order_preserved_in_text() {
3944        let input = r#"
3945            @struct Zebra (z_name: string)
3946            @struct Apple (a_name: string)
3947            items: [1, 2, 3]
3948        "#;
3949        let doc = TeaLeaf::parse(input).unwrap();
3950        let schema_keys: Vec<&String> = doc.schemas.keys().collect();
3951        assert_eq!(schema_keys, &["Zebra", "Apple"],
3952            "Schema definition order should be preserved");
3953    }
3954
3955    // -------------------------------------------------------------------------
3956    // Fuzz regression tests (full serialize/roundtrip paths)
3957    // -------------------------------------------------------------------------
3958
3959    #[test]
3960    fn test_fuzz_crash_ba05f4f8_serialize_day_zero_no_panic() {
3961        // Regression: fuzz_serialize crash-ba05f4f81615e2bf2b01137126cd772c6c0cc6d2
3962        // Timestamp with month=0 or day=0 caused u32 underflow in days_from_epoch.
3963        // Exercises the full fuzz_serialize path: parse → to_json → to_tl → re-parse.
3964        let inputs = [
3965            "ts: 2024-01-00T10:30:00Z",  // day=0
3966            "ts: 2024-00-15T10:30:00Z",  // month=0
3967            "ts: 6000-00-00T00:00:00Z",  // both zero
3968        ];
3969        for input in &inputs {
3970            // parse must not panic (should return Err)
3971            let result = TeaLeaf::parse(input);
3972            if let Ok(tl) = result {
3973                let _ = tl.to_json();
3974                let _ = tl.to_json_compact();
3975                let text = tl.to_tl_with_schemas();
3976                let _ = TeaLeaf::parse(&text);
3977            }
3978        }
3979    }
3980
3981    #[test]
3982    fn test_fuzz_crash_b085ba0e_roundtrip_day_zero_no_panic() {
3983        // Regression: fuzz_roundtrip crash-b085ba0e656f074031d8c4cb5173313785fa79d1
3984        // Same days_from_epoch underflow, hit through the roundtrip path.
3985        // Exercises the full fuzz_roundtrip path: parse → compile → read → walk.
3986        let inputs = [
3987            "ts: 4001-03-00T00:00:00Z",  // day=0 (pattern from artifact)
3988            "ts: 4401-03-00T00:00:00Z",  // variant
3989        ];
3990        for input in &inputs {
3991            let result = TeaLeaf::parse(input);
3992            if let Ok(tl) = result {
3993                let tmp = tempfile::NamedTempFile::new().unwrap();
3994                if tl.compile(tmp.path(), false).is_ok() {
3995                    let bytes = std::fs::read(tmp.path()).unwrap();
3996                    if let Ok(reader) = Reader::from_bytes(bytes) {
3997                        for key in reader.keys() {
3998                            let _ = reader.get(key);
3999                        }
4000                    }
4001                }
4002            }
4003        }
4004    }
4005
4006    #[test]
4007    fn test_fuzz_crash_48767e10_json_schemas_bare_dash_roundtrip() {
4008        // Regression: fuzz_json_schemas crash-48767e10b4ec71542bfbee2bc358b1e21831a259
4009        // JSON string "-" was serialized unquoted, causing re-parse failure.
4010        for input in [
4011            r#""-""#, r#""+""#, r#""--""#, r#""-foo""#,
4012            r#"{"a": "-"}"#, r#"{"a": "+"}"#,
4013            "\"\\u0660\"",  // Arabic-Indic digit zero
4014        ] {
4015            let tl = TeaLeaf::from_json_with_schemas(input);
4016            if let Ok(tl) = tl {
4017                let text = tl.to_tl_with_schemas();
4018                let reparsed = TeaLeaf::parse(&text);
4019                assert!(
4020                    reparsed.is_ok(),
4021                    "re-parse failed for JSON input {}",
4022                    input,
4023                );
4024            }
4025        }
4026    }
4027
4028    #[test]
4029    fn test_fuzz_crash_820dac71_empty_key_roundtrip() {
4030        // Regression: fuzz_json_schemas crash-820dac71c95d324067cd88de5f24897c65ace57a
4031        // JSON object with empty key was serialized without quoting, losing the key.
4032        for input in [
4033            r#"{"":{}}"#,                // empty key with empty object
4034            r#"[{"":{}}}]"#,             // root array variant (crash-66a8d85176f76ed68ada9f9526abe4efd8352f27)
4035            r#"{"":"value"}"#,            // empty key with string value
4036        ] {
4037            if let Ok(tl) = TeaLeaf::from_json_with_schemas(input) {
4038                let text = tl.to_tl_with_schemas();
4039                let reparsed = TeaLeaf::parse(&text);
4040                assert!(
4041                    reparsed.is_ok(),
4042                    "re-parse failed for JSON input {}",
4043                    input,
4044                );
4045            }
4046        }
4047    }
4048
4049    #[test]
4050    fn test_fuzz_crash_66a8d851_root_array_empty_key() {
4051        // Regression: fuzz_json_schemas crash-66a8d85176f76ed68ada9f9526abe4efd8352f27
4052        // Root array with empty-key object: schema inference + to_tl_with_schemas roundtrip
4053        let input = r#"[{"":{}}]"#;
4054        if let Ok(tl) = TeaLeaf::from_json_with_schemas(input) {
4055            let text = tl.to_tl_with_schemas();
4056            let reparsed = TeaLeaf::parse(&text);
4057            assert!(reparsed.is_ok(), "re-parse failed for root array with empty key");
4058        }
4059    }
4060
4061    #[test]
4062    fn test_fuzz_crash_847a9194_uint_roundtrip() {
4063        // Regression: fuzz_json_schemas crash-847a919462bb567fab268023a5a29d04e92db779
4064        // Large u64 values (> i64::MAX) were demoted to f64 on re-parse, losing precision.
4065        let input = "9999999999999999999";  // > i64::MAX, fits in u64
4066        let tl = TeaLeaf::from_json_with_schemas(input).unwrap();
4067        let text = tl.to_tl_with_schemas();
4068        let reparsed = TeaLeaf::parse(&text).unwrap();
4069        let orig = tl.data.get("root").unwrap();
4070        let re = reparsed.data.get("root").unwrap();
4071        assert_eq!(orig, re, "UInt roundtrip mismatch");
4072    }
4073
4074    #[test]
4075    fn test_fuzz_crash_3902c5cc_float_infinity_roundtrip() {
4076        // Regression: fuzz_serialize crash-3902c5cc99e5e4150d08d40372c86207fbc6db7f
4077        // 5e550 and -5e550 overflow f64 and are now stored as JsonNumber.
4078        // NaN remains Float(NaN).
4079        let tl = TeaLeaf::parse("b: NaN").unwrap();
4080        let text = tl.to_tl_with_schemas();
4081        let reparsed = TeaLeaf::parse(&text).unwrap();
4082        let orig = tl.data.get("b").unwrap();
4083        let re = reparsed.data.get("b").unwrap();
4084        match (orig, re) {
4085            (Value::Float(a), Value::Float(b)) => {
4086                assert_eq!(a.to_bits(), b.to_bits(), "NaN roundtrip failed");
4087            }
4088            _ => panic!("expected Float, got {:?} / {:?}", orig, re),
4089        }
4090
4091        // 5e550 and -5e550 are now JsonNumber (overflow f64)
4092        for input in &["b: 5e550", "b: -5e550"] {
4093            let tl = TeaLeaf::parse(input).unwrap();
4094            let text = tl.to_tl_with_schemas();
4095            let reparsed = TeaLeaf::parse(&text).unwrap();
4096            let orig = tl.data.get("b").unwrap();
4097            let re = reparsed.data.get("b").unwrap();
4098            match (orig, re) {
4099                (Value::JsonNumber(a), Value::JsonNumber(b)) => {
4100                    assert_eq!(a, b, "JsonNumber roundtrip failed for {}", input);
4101                }
4102                _ => panic!("expected JsonNumber, got {:?} / {:?}", orig, re),
4103            }
4104        }
4105    }
4106
4107    #[test]
4108    fn test_needs_quoting_bare_sign() {
4109        assert!(needs_quoting("-"));
4110        assert!(needs_quoting("+"));
4111        assert!(needs_quoting("--"));
4112        assert!(needs_quoting("-foo"));
4113        assert!(needs_quoting("+bar"));
4114        assert!(needs_quoting("-1")); // negative number
4115        assert!(needs_quoting("+1")); // positive number
4116        assert!(needs_quoting("\u{0660}")); // Arabic-Indic digit zero
4117        assert!(!needs_quoting("hello"));
4118        assert!(!needs_quoting("foo-bar"));
4119    }
4120
4121    #[test]
4122    fn test_fuzz_crash_nan_string_needs_quoting() {
4123        // Regression: fuzz_parse/fuzz_serialize crash — string "NaN" must be quoted
4124        // to avoid re-parsing as Float(NaN).
4125        assert!(needs_quoting("NaN"));
4126        assert!(needs_quoting("inf"));
4127        assert!(needs_quoting("Infinity"));
4128
4129        // Roundtrip: String("NaN") must survive parse → dumps → re-parse
4130        for word in &["NaN", "inf", "Infinity"] {
4131            let input = format!("a: \"{}\"", word);
4132            let tl = TeaLeaf::parse(&input).unwrap();
4133            assert!(matches!(tl.get("a"), Some(Value::String(_))));
4134            let text = dumps(&tl.data);
4135            let reparsed = TeaLeaf::parse(&text).unwrap();
4136            assert_eq!(
4137                reparsed.get("a").unwrap().as_str(),
4138                Some(*word),
4139                "roundtrip failed for string {:?}",
4140                word,
4141            );
4142        }
4143    }
4144
4145    #[test]
4146    fn test_json_any_type_compile_roundtrip() {
4147        // Regression: from_json_with_schemas infers "any" for fields whose nested objects
4148        // don't match a schema. encode_typed_value must fall back to generic encoding
4149        // instead of erroring with "requires a schema for encoding".
4150        use tempfile::NamedTempFile;
4151
4152        let json = r#"[
4153            {"name": "alice", "meta": {"x": 1}},
4154            {"name": "bob",   "meta": {"y": "two", "z": true}}
4155        ]"#;
4156        let doc = TeaLeaf::from_json_with_schemas(json).unwrap();
4157        // "meta" has varying shapes → inferred as "any"
4158        let temp = NamedTempFile::new().unwrap();
4159        doc.compile(temp.path(), false).expect("compile with 'any' field must not error");
4160
4161        // Read back and verify data survived
4162        let reader = Reader::open(temp.path()).unwrap();
4163        assert_eq!(reader.keys().len(), doc.data.len());
4164    }
4165
4166    #[test]
4167    fn fuzz_repro_json_schema_bool_field_name() {
4168        // Fuzz crash: field named "bool" conflicts with type keyword
4169        let input = r#"[{"bool":{"b":2}}]"#;
4170        let tl = TeaLeaf::from_json_with_schemas(input).unwrap();
4171        let tl_text = tl.to_tl_with_schemas();
4172        let reparsed = TeaLeaf::parse(&tl_text)
4173            .unwrap_or_else(|e| panic!("Re-parse failed: {e}\nTL text:\n{tl_text}"));
4174        assert_eq!(tl.data.len(), reparsed.data.len(), "key count mismatch");
4175        for (key, orig_val) in &tl.data {
4176            let re_val = reparsed.data.get(key).unwrap_or_else(|| panic!("lost key '{key}'"));
4177            assert_eq!(orig_val, re_val, "value mismatch for key '{key}'");
4178        }
4179    }
4180
4181    /// Helper: verify that a JSON field named after a built-in type correctly
4182    /// round-trips through TL text when schema inference is used.
4183    fn assert_builtin_name_text_roundtrip(type_name: &str, inner_json: &str) {
4184        let input = format!(r#"[{{"{type_name}":{inner_json}}}]"#);
4185        let tl = TeaLeaf::from_json_with_schemas(&input)
4186            .unwrap_or_else(|e| panic!("[{type_name}] from_json_with_schemas failed: {e}"));
4187        let tl_text = tl.to_tl_with_schemas();
4188
4189        // The schema should appear in the text output
4190        assert!(
4191            tl_text.contains(&format!("@struct {type_name}")),
4192            "[{type_name}] expected @struct {type_name} in TL text:\n{tl_text}"
4193        );
4194
4195        let reparsed = TeaLeaf::parse(&tl_text)
4196            .unwrap_or_else(|e| panic!("[{type_name}] re-parse failed: {e}\nTL text:\n{tl_text}"));
4197
4198        assert_eq!(
4199            tl.data.len(), reparsed.data.len(),
4200            "[{type_name}] key count mismatch"
4201        );
4202        for (key, orig_val) in &tl.data {
4203            let re_val = reparsed.data.get(key)
4204                .unwrap_or_else(|| panic!("[{type_name}] lost key '{key}'"));
4205            assert_eq!(orig_val, re_val, "[{type_name}] value mismatch for key '{key}'");
4206        }
4207    }
4208
4209    #[test]
4210    fn schema_name_shadows_builtin_bool() {
4211        assert_builtin_name_text_roundtrip("bool", r#"{"x":1}"#);
4212    }
4213
4214    #[test]
4215    fn schema_name_shadows_builtin_int() {
4216        // Inner value is a string so field type "string" doesn't collide with schema "int"
4217        assert_builtin_name_text_roundtrip("int", r#"{"x":"hello"}"#);
4218    }
4219
4220    #[test]
4221    fn schema_name_shadows_builtin_int8() {
4222        assert_builtin_name_text_roundtrip("int8", r#"{"x":"hello"}"#);
4223    }
4224
4225    #[test]
4226    fn schema_name_shadows_builtin_int16() {
4227        assert_builtin_name_text_roundtrip("int16", r#"{"x":"hello"}"#);
4228    }
4229
4230    #[test]
4231    fn schema_name_shadows_builtin_int32() {
4232        assert_builtin_name_text_roundtrip("int32", r#"{"x":"hello"}"#);
4233    }
4234
4235    #[test]
4236    fn schema_name_shadows_builtin_int64() {
4237        assert_builtin_name_text_roundtrip("int64", r#"{"x":"hello"}"#);
4238    }
4239
4240    #[test]
4241    fn schema_name_shadows_builtin_uint() {
4242        assert_builtin_name_text_roundtrip("uint", r#"{"x":"hello"}"#);
4243    }
4244
4245    #[test]
4246    fn schema_name_shadows_builtin_uint8() {
4247        assert_builtin_name_text_roundtrip("uint8", r#"{"x":"hello"}"#);
4248    }
4249
4250    #[test]
4251    fn schema_name_shadows_builtin_uint16() {
4252        assert_builtin_name_text_roundtrip("uint16", r#"{"x":"hello"}"#);
4253    }
4254
4255    #[test]
4256    fn schema_name_shadows_builtin_uint32() {
4257        assert_builtin_name_text_roundtrip("uint32", r#"{"x":"hello"}"#);
4258    }
4259
4260    #[test]
4261    fn schema_name_shadows_builtin_uint64() {
4262        assert_builtin_name_text_roundtrip("uint64", r#"{"x":"hello"}"#);
4263    }
4264
4265    #[test]
4266    fn schema_name_shadows_builtin_float() {
4267        assert_builtin_name_text_roundtrip("float", r#"{"x":1}"#);
4268    }
4269
4270    #[test]
4271    fn schema_name_shadows_builtin_float32() {
4272        assert_builtin_name_text_roundtrip("float32", r#"{"x":1}"#);
4273    }
4274
4275    #[test]
4276    fn schema_name_shadows_builtin_float64() {
4277        assert_builtin_name_text_roundtrip("float64", r#"{"x":1}"#);
4278    }
4279
4280    #[test]
4281    fn schema_name_shadows_builtin_string() {
4282        assert_builtin_name_text_roundtrip("string", r#"{"x":1}"#);
4283    }
4284
4285    // Note: "bytes" is not tested via JSON inference because singularize("bytes") = "byte"
4286    // which is NOT a built-in type. The direct TL-parsing test below covers "bytes" as a
4287    // schema name.
4288
4289    #[test]
4290    fn schema_name_shadows_builtin_timestamp() {
4291        assert_builtin_name_text_roundtrip("timestamp", r#"{"x":1}"#);
4292    }
4293
4294    /// Test built-in type names as schemas via direct TL text parsing (not JSON inference).
4295    /// This covers names that can't arise through singularization (like "bytes").
4296    #[test]
4297    fn schema_name_shadows_builtin_direct_tl_parse() {
4298        let test_cases = &[
4299            // (TL text, expected field name, expected inner value)
4300            (
4301                "@struct bytes (x: int)\n@struct root (data: bytes)\nroot: @table root [\n  ((42))\n]",
4302                "data",
4303                Value::Object(IndexMap::from([
4304                    ("x".to_string(), Value::Int(42)),
4305                ])),
4306            ),
4307            (
4308                "@struct bool (a: int, b: string)\n@struct root (flag: bool)\nroot: @table root [\n  ((1, hello))\n]",
4309                "flag",
4310                Value::Object(IndexMap::from([
4311                    ("a".to_string(), Value::Int(1)),
4312                    ("b".to_string(), Value::String("hello".into())),
4313                ])),
4314            ),
4315        ];
4316
4317        for (tl_text, field_name, expected_val) in test_cases {
4318            let doc = TeaLeaf::parse(tl_text)
4319                .unwrap_or_else(|e| panic!("parse failed for field '{field_name}': {e}\n{tl_text}"));
4320
4321            let root_arr = doc.data.get("root").expect("missing 'root' key");
4322            if let Value::Array(arr) = root_arr {
4323                if let Value::Object(obj) = &arr[0] {
4324                    let actual = obj.get(*field_name)
4325                        .unwrap_or_else(|| panic!("missing field '{field_name}'"));
4326                    assert_eq!(actual, expected_val, "mismatch for field '{field_name}'");
4327                } else {
4328                    panic!("expected Object, got {:?}", arr[0]);
4329                }
4330            } else {
4331                panic!("expected Array, got {:?}", root_arr);
4332            }
4333        }
4334    }
4335
4336    /// Self-referencing case: @struct int (x: int) where the inner field type
4337    /// matches the schema name. The LParen guard ensures `x: int` resolves to
4338    /// primitive int (next token is a literal, not `(`).
4339    #[test]
4340    fn schema_name_shadows_builtin_self_referencing() {
4341        // JSON: [{"int": {"x": 1}}] — creates @struct int (x: int)
4342        // The inner field "x: int" must resolve to primitive int, not struct "int"
4343        let input = r#"[{"int":{"x":1}}]"#;
4344        let tl = TeaLeaf::from_json_with_schemas(input).unwrap();
4345        let tl_text = tl.to_tl_with_schemas();
4346
4347        assert!(tl_text.contains("@struct int"), "expected @struct int in:\n{tl_text}");
4348
4349        let reparsed = TeaLeaf::parse(&tl_text)
4350            .unwrap_or_else(|e| panic!("re-parse failed: {e}\nTL text:\n{tl_text}"));
4351
4352        for (key, orig_val) in &tl.data {
4353            let re_val = reparsed.data.get(key)
4354                .unwrap_or_else(|| panic!("lost key '{key}'"));
4355            assert_eq!(orig_val, re_val, "value mismatch for key '{key}'");
4356        }
4357    }
4358
4359    /// Self-referencing: @struct int (int: int) — field name AND type both "int"
4360    #[test]
4361    fn schema_name_shadows_builtin_self_ref_same_field_name() {
4362        let tl_text = "\
4363@struct int (int: int)
4364@struct root (val: int)
4365
4366root: @table root [
4367  ((42))
4368]
4369";
4370        let doc = TeaLeaf::parse(tl_text)
4371            .unwrap_or_else(|e| panic!("parse failed: {e}\nTL text:\n{tl_text}"));
4372
4373        let json = doc.to_json().unwrap();
4374        eprintln!("=== JSON ===\n{json}");
4375
4376        // The root array should have one element with field "val" as an Object
4377        let root_arr = doc.data.get("root").expect("missing 'root'");
4378        if let Value::Array(arr) = root_arr {
4379            if let Value::Object(obj) = &arr[0] {
4380                let val = obj.get("val").expect("missing field 'val'");
4381                // val should be Object({"int": Int(42)}) — struct "int" with field "int" = 42
4382                assert_eq!(
4383                    val,
4384                    &Value::Object(IndexMap::from([
4385                        ("int".to_string(), Value::Int(42)),
4386                    ])),
4387                    "expected struct instance, got {val:?}"
4388                );
4389            } else {
4390                panic!("expected Object, got {:?}", arr[0]);
4391            }
4392        } else {
4393            panic!("expected Array, got {root_arr:?}");
4394        }
4395    }
4396
4397    /// Duplicate @struct declarations: second overwrites first
4398    #[test]
4399    fn schema_name_shadows_builtin_duplicate_struct_decl() {
4400        let tl_text = "\
4401@struct int (x: int)
4402@struct int (int: int)
4403@struct root (val: int)
4404
4405root: @table root [
4406  ((42))
4407]
4408";
4409        let result = TeaLeaf::parse(tl_text);
4410        match &result {
4411            Ok(doc) => {
4412                let json = doc.to_json().unwrap();
4413                eprintln!("=== JSON ===\n{json}");
4414                eprintln!("=== schemas ===");
4415                for (name, schema) in &doc.schemas {
4416                    let fields: Vec<String> = schema.fields.iter()
4417                        .map(|f| format!("{}: {}", f.name, f.field_type.base))
4418                        .collect();
4419                    eprintln!("  @struct {name} ({})", fields.join(", "));
4420                }
4421            }
4422            Err(e) => {
4423                eprintln!("=== parse error ===\n{e}");
4424            }
4425        }
4426        // Assert that parsing succeeds
4427        result.unwrap();
4428    }
4429
4430    /// Multiple built-in-named schemas in the same document
4431    #[test]
4432    fn schema_name_shadows_multiple_builtins() {
4433        let input = r#"[{"bool":{"a":1},"int":{"b":"hello"},"float":{"c":true}}]"#;
4434        let tl = TeaLeaf::from_json_with_schemas(input).unwrap();
4435        let tl_text = tl.to_tl_with_schemas();
4436
4437        assert!(tl_text.contains("@struct bool"), "missing @struct bool");
4438        assert!(tl_text.contains("@struct int"), "missing @struct int");
4439        assert!(tl_text.contains("@struct float"), "missing @struct float");
4440
4441        let reparsed = TeaLeaf::parse(&tl_text)
4442            .unwrap_or_else(|e| panic!("re-parse failed: {e}\nTL text:\n{tl_text}"));
4443
4444        for (key, orig_val) in &tl.data {
4445            let re_val = reparsed.data.get(key)
4446                .unwrap_or_else(|| panic!("lost key '{key}'"));
4447            assert_eq!(orig_val, re_val, "value mismatch for key '{key}'");
4448        }
4449    }
4450
4451
4452    /// Fuzz crash: singularize("s") → "" (empty string), producing invalid
4453    /// @struct definitions with missing names.
4454    #[test]
4455    fn fuzz_repro_singularize_single_char_s() {
4456        let input = r#"[{"s":{"b":1}}]"#;
4457        let tl = TeaLeaf::from_json_with_schemas(input).unwrap();
4458        let tl_text = tl.to_tl_with_schemas();
4459
4460        // Schema name must not be empty — singularize("s") should return "s"
4461        assert!(
4462            tl_text.contains("@struct s"),
4463            "expected @struct s in TL text:\n{tl_text}"
4464        );
4465
4466        let reparsed = TeaLeaf::parse(&tl_text)
4467            .unwrap_or_else(|e| panic!("Re-parse failed: {e}\nTL text:\n{tl_text}"));
4468        assert_eq!(tl.data.len(), reparsed.data.len(), "key count mismatch");
4469        for (key, orig_val) in &tl.data {
4470            let re_val = reparsed.data.get(key).unwrap_or_else(|| panic!("lost key '{key}'"));
4471            assert_eq!(orig_val, re_val, "value mismatch for key '{key}'");
4472        }
4473    }
4474
4475    #[test]
4476    fn singularize_does_not_produce_empty_string() {
4477        // All single-character inputs must pass through unchanged
4478        for c in 'a'..='z' {
4479            let s = String::from(c);
4480            let result = super::singularize(&s);
4481            assert!(!result.is_empty(), "singularize({s:?}) produced empty string");
4482            assert_eq!(result, s, "singularize({s:?}) should return {s:?}, got {result:?}");
4483        }
4484    }
4485
4486    /// Fuzz crash: field name with dots causes value mismatch on roundtrip
4487    #[test]
4488    fn fuzz_repro_dots_in_field_name() {
4489        // Fuzz regression: field "root" inside root-array wrapper both singularize to "root",
4490        // causing analyze_nested_objects to create a correct inner schema that analyze_array
4491        // then overwrites with a self-referencing @struct root (root: root).
4492        let input = r#"[{"root":{"Z.lll.i0...A":44444440.0}}]"#;
4493        let tl = TeaLeaf::from_json_with_schemas(input).unwrap();
4494        let tl_text = tl.to_tl_with_schemas();
4495        let reparsed = TeaLeaf::parse(&tl_text)
4496            .unwrap_or_else(|e| panic!("Re-parse failed: {e}\nTL text:\n{tl_text}"));
4497        assert_eq!(tl.data.len(), reparsed.data.len(), "key count mismatch");
4498        for (key, orig_val) in &tl.data {
4499            let re_val = reparsed.data.get(key).unwrap_or_else(|| panic!("lost key '{key}'"));
4500            assert_eq!(orig_val, re_val, "value mismatch for key '{key}'");
4501        }
4502    }
4503
4504    #[test]
4505    fn schema_name_collision_field_matches_parent() {
4506        // When an array field name singularizes to the same name as its parent schema,
4507        // the inner schema should be preserved (not overwritten with a self-reference).
4508        // This tests the general case, not just the root-array wrapper collision.
4509        let input = r#"{"items": [{"items": {"a": 1, "b": 2}}]}"#;
4510        let tl = TeaLeaf::from_json_with_schemas(input).unwrap();
4511        let tl_text = tl.to_tl_with_schemas();
4512        let reparsed = TeaLeaf::parse(&tl_text)
4513            .unwrap_or_else(|e| panic!("Re-parse failed: {e}\nTL text:\n{tl_text}"));
4514        for (key, orig_val) in &tl.data {
4515            let re_val = reparsed.data.get(key).unwrap_or_else(|| panic!("lost key '{key}'"));
4516            assert_eq!(orig_val, re_val, "value mismatch for key '{key}'");
4517        }
4518    }
4519
4520    #[test]
4521    fn analyze_node_nesting_stress_test() {
4522        // Stress test: "node" appears at many nesting levels with different shapes.
4523        // Schema inference should NOT create conflicting schemas or lose data.
4524        let input = r#"{
4525          "node": {
4526            "id": 1,
4527            "name": "root",
4528            "active": true,
4529            "node": {
4530              "id": "child-1",
4531              "metrics": {
4532                "node": {
4533                  "value": 42.7,
4534                  "unit": "ms",
4535                  "thresholds": [10, 20, 30]
4536                }
4537              },
4538              "node": [
4539                {
4540                  "id": 2,
4541                  "enabled": false
4542                },
4543                {
4544                  "id": 3,
4545                  "enabled": "sometimes",
4546                  "node": {
4547                    "status": null,
4548                    "confidence": 0.93
4549                  }
4550                }
4551              ]
4552            }
4553          },
4554          "nodeMetadata": {
4555            "node": {
4556              "version": 5,
4557              "checksum": "a94a8fe5ccb19ba61c4c0873d391e987",
4558              "flags": {
4559                "node": true
4560              }
4561            }
4562          }
4563        }"#;
4564
4565        let tl = TeaLeaf::from_json_with_schemas(input).unwrap();
4566        eprintln!("=== schemas ({}) ===", tl.schemas.len());
4567        for (name, schema) in &tl.schemas {
4568            let fields: Vec<String> = schema.fields.iter()
4569                .map(|f| format!("{}: {}{}{}", f.name, f.field_type.base,
4570                    if f.field_type.is_array { "[]" } else { "" },
4571                    if f.field_type.nullable { "?" } else { "" }))
4572                .collect();
4573            eprintln!("  @struct {name} ({})", fields.join(", "));
4574        }
4575        let tl_text = tl.to_tl_with_schemas();
4576        eprintln!("=== TL text ===\n{tl_text}");
4577
4578        // Core correctness check: round-trip must preserve all data
4579        let reparsed = TeaLeaf::parse(&tl_text)
4580            .unwrap_or_else(|e| panic!("Re-parse failed: {e}\nTL text:\n{tl_text}"));
4581        for (key, orig_val) in &tl.data {
4582            let re_val = reparsed.data.get(key).unwrap_or_else(|| panic!("lost key '{key}'"));
4583            assert_eq!(orig_val, re_val, "value mismatch for key '{key}'");
4584        }
4585    }
4586
4587    #[test]
4588    fn schema_collision_recursive_arrays() {
4589        // "nodes" appears as arrays at two levels with different shapes.
4590        // Inner: [{name, value}], Outer: [{name, nodes}]
4591        // Both singularize to "node" — only one schema can exist.
4592        let input = r#"{
4593          "nodes": [
4594            {
4595              "name": "parent",
4596              "nodes": [
4597                {"name": "child", "value": 42}
4598              ]
4599            }
4600          ]
4601        }"#;
4602        let tl = TeaLeaf::from_json_with_schemas(input).unwrap();
4603        eprintln!("=== schemas ({}) ===", tl.schemas.len());
4604        for (name, schema) in &tl.schemas {
4605            let fields: Vec<String> = schema.fields.iter()
4606                .map(|f| format!("{}: {}{}{}", f.name, f.field_type.base,
4607                    if f.field_type.is_array { "[]" } else { "" },
4608                    if f.field_type.nullable { "?" } else { "" }))
4609                .collect();
4610            eprintln!("  @struct {name} ({})", fields.join(", "));
4611        }
4612        let tl_text = tl.to_tl_with_schemas();
4613        eprintln!("=== TL text ===\n{tl_text}");
4614        let reparsed = TeaLeaf::parse(&tl_text)
4615            .unwrap_or_else(|e| panic!("Re-parse failed: {e}\nTL text:\n{tl_text}"));
4616        for (key, orig_val) in &tl.data {
4617            let re_val = reparsed.data.get(key).unwrap_or_else(|| panic!("lost key '{key}'"));
4618            assert_eq!(orig_val, re_val, "value mismatch for key '{key}'");
4619        }
4620    }
4621
4622    #[test]
4623    fn schema_collision_recursive_same_shape() {
4624        // "nodes" appears at two levels but SAME shape [{id, name}].
4625        // Schema "node" created for inner array should also work for outer.
4626        let input = r#"{
4627          "nodes": [
4628            {
4629              "id": 1,
4630              "name": "parent",
4631              "children": [
4632                {"id": 10, "name": "child-a"},
4633                {"id": 11, "name": "child-b"}
4634              ]
4635            },
4636            {
4637              "id": 2,
4638              "name": "sibling",
4639              "children": [
4640                {"id": 20, "name": "child-c"}
4641              ]
4642            }
4643          ]
4644        }"#;
4645        let tl = TeaLeaf::from_json_with_schemas(input).unwrap();
4646        eprintln!("=== schemas ({}) ===", tl.schemas.len());
4647        for (name, schema) in &tl.schemas {
4648            let fields: Vec<String> = schema.fields.iter()
4649                .map(|f| format!("{}: {}{}{}", f.name, f.field_type.base,
4650                    if f.field_type.is_array { "[]" } else { "" },
4651                    if f.field_type.nullable { "?" } else { "" }))
4652                .collect();
4653            eprintln!("  @struct {name} ({})", fields.join(", "));
4654        }
4655        let tl_text = tl.to_tl_with_schemas();
4656        eprintln!("=== TL text ===\n{tl_text}");
4657        let reparsed = TeaLeaf::parse(&tl_text)
4658            .unwrap_or_else(|e| panic!("Re-parse failed: {e}\nTL text:\n{tl_text}"));
4659        for (key, orig_val) in &tl.data {
4660            let re_val = reparsed.data.get(key).unwrap_or_else(|| panic!("lost key '{key}'"));
4661            assert_eq!(orig_val, re_val, "value mismatch for key '{key}'");
4662        }
4663    }
4664
4665    #[test]
4666    fn schema_collision_three_level_nesting() {
4667        // "nodes" at 3 levels: L1 and L2 have same shape {name, nodes},
4668        // L3 has different shape {name, score}. All singularize to "node".
4669        // The deepest schema wins (depth-first); outer levels fall back to
4670        // generic format. No data loss at any level.
4671        let input = r#"{
4672          "nodes": [
4673            {
4674              "name": "grandparent",
4675              "nodes": [
4676                {
4677                  "name": "parent",
4678                  "nodes": [
4679                    {"name": "leaf-a", "score": 99.5},
4680                    {"name": "leaf-b", "score": 42.0}
4681                  ]
4682                }
4683              ]
4684            },
4685            {
4686              "name": "uncle",
4687              "nodes": [
4688                {
4689                  "name": "cousin",
4690                  "nodes": [
4691                    {"name": "leaf-c", "score": 77.3}
4692                  ]
4693                }
4694              ]
4695            }
4696          ]
4697        }"#;
4698
4699        let tl = TeaLeaf::from_json_with_schemas(input).unwrap();
4700        eprintln!("=== schemas ({}) ===", tl.schemas.len());
4701        for (name, schema) in &tl.schemas {
4702            let fields: Vec<String> = schema.fields.iter()
4703                .map(|f| format!("{}: {}{}{}", f.name, f.field_type.base,
4704                    if f.field_type.is_array { "[]" } else { "" },
4705                    if f.field_type.nullable { "?" } else { "" }))
4706                .collect();
4707            eprintln!("  @struct {name} ({})", fields.join(", "));
4708        }
4709        let tl_text = tl.to_tl_with_schemas();
4710        eprintln!("=== TL text ===\n{tl_text}");
4711
4712        let reparsed = TeaLeaf::parse(&tl_text)
4713            .unwrap_or_else(|e| panic!("Re-parse failed: {e}\nTL text:\n{tl_text}"));
4714        for (key, orig_val) in &tl.data {
4715            let re_val = reparsed.data.get(key).unwrap_or_else(|| panic!("lost key '{key}'"));
4716            assert_eq!(orig_val, re_val, "value mismatch for key '{key}'");
4717        }
4718    }
4719
4720    #[test]
4721    fn schema_collision_three_level_divergent_leaves() {
4722        // L1: [{name, nodes}], L2: [{name, nodes}] (same shape),
4723        // L3: [{id, value}] in one branch, [{identifier, points}] in another.
4724        // The depth-first analysis only sees the first branch's L3 shape.
4725        // The second branch's L3 must fall back to generic format.
4726        let input = r#"{
4727          "nodes": [
4728            {
4729              "name": "grandparent",
4730              "nodes": [
4731                {
4732                  "name": "parent",
4733                  "nodes": [
4734                    {"id": "leaf-a", "value": 99.5},
4735                    {"id": "leaf-b", "value": 42.0}
4736                  ]
4737                }
4738              ]
4739            },
4740            {
4741              "name": "uncle",
4742              "nodes": [
4743                {
4744                  "name": "cousin",
4745                  "nodes": [
4746                    {"identifier": "leaf-c", "points": 77.3}
4747                  ]
4748                }
4749              ]
4750            }
4751          ]
4752        }"#;
4753
4754        let tl = TeaLeaf::from_json_with_schemas(input).unwrap();
4755        eprintln!("=== schemas ({}) ===", tl.schemas.len());
4756        for (name, schema) in &tl.schemas {
4757            let fields: Vec<String> = schema.fields.iter()
4758                .map(|f| format!("{}: {}{}{}", f.name, f.field_type.base,
4759                    if f.field_type.is_array { "[]" } else { "" },
4760                    if f.field_type.nullable { "?" } else { "" }))
4761                .collect();
4762            eprintln!("  @struct {name} ({})", fields.join(", "));
4763        }
4764        let tl_text = tl.to_tl_with_schemas();
4765        eprintln!("=== TL text ===\n{tl_text}");
4766
4767        let reparsed = TeaLeaf::parse(&tl_text)
4768            .unwrap_or_else(|e| panic!("Re-parse failed: {e}\nTL text:\n{tl_text}"));
4769        for (key, orig_val) in &tl.data {
4770            let re_val = reparsed.data.get(key).unwrap_or_else(|| panic!("lost key '{key}'"));
4771            assert_eq!(orig_val, re_val, "value mismatch for key '{key}'");
4772        }
4773    }
4774}