Skip to main content

tealeaf/
lib.rs

1//! TeaLeaf - Schema-aware data format
2//!
3//! # Example
4//!
5//! ```rust
6//! use tealeaf::{TeaLeaf, Value};
7//!
8//! let doc = TeaLeaf::parse(r#"
9//!     @struct user (id: int, name: string)
10//!     users: @table user [
11//!         (1, alice),
12//!         (2, bob),
13//!     ]
14//! "#).unwrap();
15//!
16//! let users = doc.get("users").unwrap();
17//! ```
18
19mod types;
20mod lexer;
21mod parser;
22mod writer;
23mod reader;
24pub mod convert;
25pub mod builder;
26
27pub use types::{Error, Result, TLType, FieldType, Field, Schema, Union, Variant, Value, ObjectMap, MAGIC, VERSION, VERSION_MAJOR, VERSION_MINOR, HEADER_SIZE, MAX_STRING_LENGTH, MAX_OBJECT_FIELDS, MAX_ARRAY_LENGTH};
28pub use indexmap::IndexMap;
29pub use lexer::{Lexer, Token, TokenKind};
30pub use parser::Parser;
31pub use writer::Writer;
32pub use reader::Reader;
33pub use convert::{ToTeaLeaf, FromTeaLeaf, ConvertError, ToTeaLeafExt};
34pub use builder::TeaLeafBuilder;
35
36// Re-export derive macros when the "derive" feature is enabled
37#[cfg(feature = "derive")]
38pub use tealeaf_derive::{ToTeaLeaf, FromTeaLeaf};
39
40use std::collections::HashSet;
41use std::path::Path;
42
43/// A parsed TeaLeaf document
44pub struct TeaLeaf {
45    pub schemas: IndexMap<String, Schema>,
46    pub unions: IndexMap<String, Union>,
47    pub data: IndexMap<String, Value>,
48    /// Tracks if the source JSON was a root-level array (for round-trip fidelity)
49    is_root_array: bool,
50}
51
52impl TeaLeaf {
53    /// Create a new TeaLeaf document from data and schemas.
54    ///
55    /// This constructor is primarily for programmatic document creation.
56    /// For parsing from formats, use `parse()`, `load()`, or `from_json()`.
57    pub fn new(schemas: IndexMap<String, Schema>, data: IndexMap<String, Value>) -> Self {
58        Self {
59            schemas,
60            unions: IndexMap::new(),
61            data,
62            is_root_array: false,
63        }
64    }
65
66    /// Parse TeaLeaf text format
67    pub fn parse(input: &str) -> Result<Self> {
68        let tokens = Lexer::new(input).tokenize()?;
69        let mut parser = Parser::new(tokens);
70        let data = parser.parse()?;
71        let is_root_array = parser.is_root_array();
72        let (schemas, unions) = parser.into_schemas_and_unions();
73        Ok(Self {
74            schemas,
75            unions,
76            data,
77            is_root_array,
78        })
79    }
80
81    /// Load from text file
82    ///
83    /// Include paths are resolved relative to the loaded file's directory.
84    pub fn load<P: AsRef<Path>>(path: P) -> Result<Self> {
85        let path = path.as_ref();
86        let content = std::fs::read_to_string(path)?;
87        let tokens = Lexer::new(&content).tokenize()?;
88        let mut parser = Parser::new(tokens).with_base_path(path);
89        let data = parser.parse()?;
90        let is_root_array = parser.is_root_array();
91        let (schemas, unions) = parser.into_schemas_and_unions();
92        Ok(Self {
93            schemas,
94            unions,
95            data,
96            is_root_array,
97        })
98    }
99
100    /// Get a value by key
101    pub fn get(&self, key: &str) -> Option<&Value> {
102        self.data.get(key)
103    }
104
105    /// Get a schema by name
106    pub fn schema(&self, name: &str) -> Option<&Schema> {
107        self.schemas.get(name)
108    }
109
110    /// Get a union by name
111    pub fn union(&self, name: &str) -> Option<&Union> {
112        self.unions.get(name)
113    }
114
115    /// Compile to binary format
116    pub fn compile<P: AsRef<Path>>(&self, path: P, compress: bool) -> Result<()> {
117        let mut writer = Writer::new();
118        writer.set_root_array(self.is_root_array);
119        for (_, schema) in &self.schemas {
120            writer.add_schema(schema.clone());
121        }
122        for (_, union_def) in &self.unions {
123            writer.add_union(union_def.clone());
124        }
125        for (key, value) in &self.data {
126            let schema = self.find_schema_for_value(value, key);
127            writer.add_section(key, value, schema)?;
128        }
129        writer.write(path, compress)
130    }
131
132    fn find_schema_for_value(&self, value: &Value, key: &str) -> Option<&Schema> {
133        // Try to find a matching schema for array values
134        if let Value::Array(arr) = value {
135            if arr.is_empty() {
136                // For empty arrays, try name-based matching (singularize key → schema name)
137                let singular = singularize(key);
138                return self.schemas.values().find(|s| s.name.eq_ignore_ascii_case(&singular));
139            }
140
141            // Sample multiple elements: first, middle, last
142            let sample_indices: Vec<usize> = {
143                let mut indices = vec![0];
144                if arr.len() > 2 { indices.push(arr.len() / 2); }
145                if arr.len() > 1 { indices.push(arr.len() - 1); }
146                indices
147            };
148
149            for schema in self.schemas.values() {
150                let all_match = sample_indices.iter().all(|&i| {
151                    if let Some(Value::Object(obj)) = arr.get(i) {
152                        // All required (non-nullable) schema fields must be present
153                        schema.fields.iter().all(|f| {
154                            f.field_type.nullable || obj.contains_key(&f.name)
155                        })
156                        // All obj keys must be schema fields (no extra keys)
157                        && obj.keys().all(|k| schema.fields.iter().any(|f| f.name == *k))
158                    } else {
159                        false
160                    }
161                });
162                if all_match {
163                    return Some(schema);
164                }
165            }
166        }
167        None
168    }
169
170    /// Parse from JSON string.
171    ///
172    /// # Stability Policy
173    ///
174    /// This function follows a **"plain JSON only"** policy:
175    /// - JSON is parsed as-is with **no magic conversion**
176    /// - `{"$ref": "x"}` stays as an Object, NOT a Ref
177    /// - `{"$tag": "ok", "$value": 200}` stays as an Object, NOT a Tagged
178    /// - `"0xcafef00d"` stays as a String, NOT Bytes
179    /// - `"2024-01-15T10:30:00Z"` stays as a String, NOT a Timestamp
180    /// - `[[1, "one"], [2, "two"]]` stays as an Array, NOT a Map
181    ///
182    /// To create special TeaLeaf types, use the text format or binary API directly.
183    ///
184    /// # Number Type Inference
185    ///
186    /// - Integers that fit `i64` → `Value::Int`
187    /// - Large positive integers that fit `u64` → `Value::UInt`
188    /// - Numbers with decimals or scientific notation → `Value::Float`
189    pub fn from_json(json: &str) -> Result<Self> {
190        let json_value: serde_json::Value = serde_json::from_str(json)
191            .map_err(|e| Error::ParseError(format!("Invalid JSON: {}", e)))?;
192
193        let (data, is_root_array) = match json_value {
194            serde_json::Value::Object(obj) => {
195                let map = obj.into_iter()
196                    .map(|(k, v)| (k, json_to_tealeaf_value(v)))
197                    .collect();
198                (map, false)
199            }
200            serde_json::Value::Array(_) => {
201                // Root-level array: store under "root" key but track for round-trip
202                let mut map = IndexMap::new();
203                map.insert("root".to_string(), json_to_tealeaf_value(json_value));
204                (map, true)
205            }
206            _ => {
207                // Other primitives (string, number, bool, null) at root
208                let mut map = IndexMap::new();
209                map.insert("root".to_string(), json_to_tealeaf_value(json_value));
210                (map, false)
211            }
212        };
213
214        Ok(Self {
215            schemas: IndexMap::new(),
216            unions: IndexMap::new(),
217            data,
218            is_root_array,
219        })
220    }
221
222    /// Parse from JSON string with automatic schema inference.
223    ///
224    /// This variant analyzes the JSON structure and automatically:
225    /// - Detects arrays of uniformly-structured objects
226    /// - Infers schema names from parent keys (e.g., "products" → "product")
227    /// - Generates `@struct` definitions for uniform arrays
228    /// - Enables `@table` format output when serialized
229    ///
230    /// Use `to_tl_with_schemas()` to serialize with the inferred schemas.
231    pub fn from_json_with_schemas(json: &str) -> Result<Self> {
232        let doc = Self::from_json(json)?;
233
234        let mut inferrer = SchemaInferrer::new();
235        inferrer.infer(&doc.data);
236        let (schemas, _) = inferrer.into_schemas();
237
238        Ok(Self {
239            schemas,
240            unions: IndexMap::new(),
241            data: doc.data,
242            is_root_array: doc.is_root_array,
243        })
244    }
245
246    /// Serialize to TeaLeaf text format with schemas.
247    ///
248    /// If schemas are present (either from parsing or inference), outputs
249    /// `@struct` definitions and uses `@table` format for matching arrays.
250    ///
251    /// If this document represents a root-level JSON array (from `from_json`),
252    /// the output will include `@root-array` directive for round-trip fidelity.
253    pub fn to_tl_with_schemas(&self) -> String {
254        self.to_tl_with_options(&FormatOptions::default())
255    }
256
257    /// Serialize to compact TeaLeaf text format with schema definitions.
258    /// Removes insignificant whitespace (spaces after `:` and `,`, indentation,
259    /// blank lines) while keeping the format parseable. Table rows remain one
260    /// per line for readability.
261    pub fn to_tl_with_schemas_compact(&self) -> String {
262        self.to_tl_with_options(&FormatOptions::compact())
263    }
264
265    /// Serialize to TeaLeaf text format with custom formatting options.
266    ///
267    /// Use `FormatOptions::compact().with_compact_floats()` for maximum
268    /// token savings (strips whitespace and `.0` from whole-number floats).
269    pub fn to_tl_with_options(&self, opts: &FormatOptions) -> String {
270        let mut output = String::new();
271
272        if self.is_root_array {
273            if opts.compact {
274                output.push_str("@root-array\n");
275            } else {
276                output.push_str("@root-array\n\n");
277            }
278        }
279
280        if self.schemas.is_empty() && self.unions.is_empty() {
281            output.push_str(&dumps_with_options(&self.data, opts));
282        } else {
283            let schema_order: Vec<String> = self.schemas.keys().cloned().collect();
284            let union_order: Vec<String> = self.unions.keys().cloned().collect();
285            output.push_str(&dumps_with_schemas_with_options(
286                &self.data, &self.schemas, &schema_order,
287                &self.unions, &union_order, opts,
288            ));
289        }
290
291        output
292    }
293
294    /// Convert to JSON string (pretty-printed).
295    ///
296    /// # Stability Policy - TeaLeaf→JSON Fixed Representations
297    ///
298    /// Special TeaLeaf types serialize to JSON with these **stable formats**:
299    ///
300    /// | TeaLeaf Type | JSON Format                                    |
301    /// |------------|------------------------------------------------|
302    /// | Bytes      | `"0xcafef00d"` (lowercase hex with 0x prefix) |
303    /// | Timestamp  | `"2024-01-15T10:30:00.123Z"` (ISO 8601 UTC)   |
304    /// | Ref        | `{"$ref": "key_name"}`                         |
305    /// | Tagged     | `{"$tag": "tag_name", "$value": <value>}`     |
306    /// | Map        | `[[key1, val1], [key2, val2], ...]`           |
307    /// | Float NaN  | `null` (JSON has no NaN)                       |
308    /// | Float ±Inf | `null` (JSON has no Infinity)                  |
309    ///
310    /// These representations are **contractually stable** and will not change.
311    pub fn to_json(&self) -> Result<String> {
312        // If the source was a root-level array, return it directly (not wrapped in object)
313        if self.is_root_array {
314            if let Some(root_value) = self.data.get("root") {
315                return serde_json::to_string_pretty(&tealeaf_to_json_value(root_value))
316                    .map_err(|e| Error::ParseError(format!("JSON serialization failed: {}", e)));
317            }
318        }
319
320        let json_obj: serde_json::Map<String, serde_json::Value> = self.data
321            .iter()
322            .map(|(k, v)| (k.clone(), tealeaf_to_json_value(v)))
323            .collect();
324
325        serde_json::to_string_pretty(&serde_json::Value::Object(json_obj))
326            .map_err(|e| Error::ParseError(format!("JSON serialization failed: {}", e)))
327    }
328
329    /// Convert to compact JSON string (no pretty printing)
330    pub fn to_json_compact(&self) -> Result<String> {
331        // If the source was a root-level array, return it directly (not wrapped in object)
332        if self.is_root_array {
333            if let Some(root_value) = self.data.get("root") {
334                return serde_json::to_string(&tealeaf_to_json_value(root_value))
335                    .map_err(|e| Error::ParseError(format!("JSON serialization failed: {}", e)));
336            }
337        }
338
339        let json_obj: serde_json::Map<String, serde_json::Value> = self.data
340            .iter()
341            .map(|(k, v)| (k.clone(), tealeaf_to_json_value(v)))
342            .collect();
343
344        serde_json::to_string(&serde_json::Value::Object(json_obj))
345            .map_err(|e| Error::ParseError(format!("JSON serialization failed: {}", e)))
346    }
347
348    /// Set whether the document represents a root-level array.
349    pub fn set_root_array(&mut self, is_root_array: bool) {
350        self.is_root_array = is_root_array;
351    }
352
353    /// Create a TeaLeaf document from a binary Reader.
354    ///
355    /// Reads all sections from the reader and carries schemas and unions through.
356    pub fn from_reader(reader: &Reader) -> Result<Self> {
357        let mut data = IndexMap::new();
358        for key in reader.keys() {
359            data.insert(key.to_string(), reader.get(key)?);
360        }
361        let schemas: IndexMap<String, Schema> = reader.schemas.iter()
362            .map(|s| (s.name.clone(), s.clone()))
363            .collect();
364        let unions: IndexMap<String, Union> = reader.unions.iter()
365            .map(|u| (u.name.clone(), u.clone()))
366            .collect();
367        let mut doc = Self {
368            schemas,
369            unions,
370            data,
371            is_root_array: reader.is_root_array(),
372        };
373        doc.set_root_array(reader.is_root_array());
374        Ok(doc)
375    }
376
377    /// Create a TeaLeaf document from a single DTO.
378    ///
379    /// The DTO is placed under the given `key` in the document data map.
380    /// Schemas are automatically collected from the DTO type.
381    pub fn from_dto<T: convert::ToTeaLeaf>(key: &str, dto: &T) -> Self {
382        let schemas = T::collect_schemas();
383        let unions = T::collect_unions();
384        let mut data = IndexMap::new();
385        data.insert(key.to_string(), dto.to_tealeaf_value());
386        let mut doc = Self::new(schemas, data);
387        doc.unions = unions;
388        doc
389    }
390
391    /// Create a TeaLeaf document from a slice of DTOs.
392    ///
393    /// The array is placed under the given `key` and schemas are
394    /// collected from the element type.
395    pub fn from_dto_array<T: convert::ToTeaLeaf>(key: &str, items: &[T]) -> Self {
396        let schemas = T::collect_schemas();
397        let unions = T::collect_unions();
398        let mut data = IndexMap::new();
399        let arr = Value::Array(items.iter().map(|i| i.to_tealeaf_value()).collect());
400        data.insert(key.to_string(), arr);
401        let mut doc = Self::new(schemas, data);
402        doc.unions = unions;
403        doc
404    }
405
406    /// Extract a DTO from this document by key.
407    pub fn to_dto<T: convert::FromTeaLeaf>(&self, key: &str) -> Result<T> {
408        let value = self
409            .get(key)
410            .ok_or_else(|| Error::MissingField(key.to_string()))?;
411        T::from_tealeaf_value(value).map_err(|e| e.into())
412    }
413
414    /// Extract all values under a key as `Vec<T>`.
415    pub fn to_dto_vec<T: convert::FromTeaLeaf>(&self, key: &str) -> Result<Vec<T>> {
416        let value = self
417            .get(key)
418            .ok_or_else(|| Error::MissingField(key.to_string()))?;
419        let arr = value
420            .as_array()
421            .ok_or_else(|| Error::ParseError("Expected array".into()))?;
422        arr.iter()
423            .map(|v| T::from_tealeaf_value(v).map_err(|e| e.into()))
424            .collect()
425    }
426}
427
428/// Convert JSON value to TeaLeaf value (best-effort)
429fn json_to_tealeaf_value(json: serde_json::Value) -> Value {
430    match json {
431        serde_json::Value::Null => Value::Null,
432        serde_json::Value::Bool(b) => Value::Bool(b),
433        serde_json::Value::Number(n) => {
434            if let Some(i) = n.as_i64() {
435                Value::Int(i)
436            } else if let Some(u) = n.as_u64() {
437                Value::UInt(u)
438            } else {
439                let raw = n.to_string();
440                // Pure integer that doesn't fit i64/u64 → preserve exactly
441                if !raw.contains('.') && !raw.contains('e') && !raw.contains('E') {
442                    Value::JsonNumber(raw)
443                } else {
444                    match n.as_f64() {
445                        Some(f) if f.is_finite() => Value::Float(f),
446                        _ => Value::JsonNumber(raw),
447                    }
448                }
449            }
450        }
451        serde_json::Value::String(s) => Value::String(s),
452        serde_json::Value::Array(arr) => {
453            Value::Array(arr.into_iter().map(json_to_tealeaf_value).collect())
454        }
455        serde_json::Value::Object(obj) => {
456            Value::Object(
457                obj.into_iter()
458                    .map(|(k, v)| (k, json_to_tealeaf_value(v)))
459                    .collect()
460            )
461        }
462    }
463}
464
465/// Convert TeaLeaf value to JSON value
466///
467/// Type preservation:
468/// - Value::Int → JSON integer (e.g., 42)
469/// - Value::UInt → JSON integer (e.g., 18446744073709551615)
470/// - Value::Float → JSON float (e.g., 42.0)
471///
472/// Integer types are tried first during JSON import (i64, then u64) so that
473/// values within 64-bit range stay exact. Only true floats fall through to f64.
474fn tealeaf_to_json_value(tl: &Value) -> serde_json::Value {
475    match tl {
476        Value::Null => serde_json::Value::Null,
477        Value::Bool(b) => serde_json::Value::Bool(*b),
478        Value::Int(i) => serde_json::Value::Number((*i).into()),
479        Value::UInt(u) => serde_json::Value::Number((*u).into()),
480        Value::Float(f) => {
481            // Always output floats as floats - the type distinction is intentional
482            serde_json::Number::from_f64(*f)
483                .map(serde_json::Value::Number)
484                .unwrap_or(serde_json::Value::Null)
485        }
486        Value::String(s) => serde_json::Value::String(s.clone()),
487        Value::Bytes(b) => {
488            // Encode bytes as hex string with 0x prefix
489            let hex: String = b.iter().map(|byte| format!("{:02x}", byte)).collect();
490            serde_json::Value::String(format!("0x{}", hex))
491        }
492        Value::Array(arr) => {
493            serde_json::Value::Array(arr.iter().map(tealeaf_to_json_value).collect())
494        }
495        Value::Object(obj) => {
496            let map: serde_json::Map<String, serde_json::Value> = obj
497                .iter()
498                .map(|(k, v)| (k.clone(), tealeaf_to_json_value(v)))
499                .collect();
500            serde_json::Value::Object(map)
501        }
502        Value::Map(pairs) => {
503            // Convert map to array of [key, value] pairs
504            let arr: Vec<serde_json::Value> = pairs
505                .iter()
506                .map(|(k, v)| {
507                    serde_json::Value::Array(vec![
508                        tealeaf_to_json_value(k),
509                        tealeaf_to_json_value(v),
510                    ])
511                })
512                .collect();
513            serde_json::Value::Array(arr)
514        }
515        Value::Ref(r) => {
516            // Encode ref as object with special key
517            let mut obj = serde_json::Map::new();
518            obj.insert("$ref".to_string(), serde_json::Value::String(r.clone()));
519            serde_json::Value::Object(obj)
520        }
521        Value::Tagged(tag, inner) => {
522            // Encode tagged value as object
523            let mut obj = serde_json::Map::new();
524            obj.insert("$tag".to_string(), serde_json::Value::String(tag.clone()));
525            obj.insert("$value".to_string(), tealeaf_to_json_value(inner));
526            serde_json::Value::Object(obj)
527        }
528        Value::Timestamp(ts, tz) => {
529            serde_json::Value::String(format_timestamp_millis(*ts, *tz))
530        }
531        Value::JsonNumber(s) => {
532            s.parse::<serde_json::Number>()
533                .map(serde_json::Value::Number)
534                .unwrap_or_else(|_| serde_json::Value::String(s.clone()))
535        }
536    }
537}
538
539/// Read a binary TeaLeaf file
540pub fn open<P: AsRef<Path>>(path: P) -> Result<Reader> {
541    Reader::open(path)
542}
543
544/// Parse TeaLeaf text
545pub fn parse(input: &str) -> Result<TeaLeaf> {
546    TeaLeaf::parse(input)
547}
548
549/// Convenience: load and get data
550pub fn loads(input: &str) -> Result<IndexMap<String, Value>> {
551    Ok(TeaLeaf::parse(input)?.data)
552}
553
554/// Convenience: serialize to TeaLeaf text
555/// Check if a string needs quoting when serialized to TeaLeaf format.
556/// Returns true if the string could be misinterpreted as another type.
557fn needs_quoting(s: &str) -> bool {
558    if s.is_empty() {
559        return true;
560    }
561
562    // Reserved words, null literal, and float literals the lexer would interpret
563    if matches!(s, "true" | "false" | "null" | "~" | "NaN" | "inf" | "Infinity") {
564        return true;
565    }
566
567    // Whitelist approach: only allow [a-zA-Z0-9_-.] unquoted (ASCII only).
568    // Matches spec grammar: name = (letter | "_") { letter | digit | "_" | "-" | "." }
569    // Any other character (Unicode digits, whitespace, punctuation, etc.)
570    // requires quoting to ensure safe round-trip through the parser.
571    // Note: '-' is excluded here because strings starting with '-' are caught
572    // by the sign-character check below, and mid-string '-' in identifiers
573    // like "foo-bar" is safe only when the first char is a letter.
574    if s.contains(|c: char| !c.is_ascii_alphanumeric() && c != '_' && c != '-' && c != '.') {
575        return true;
576    }
577
578    // Must start with letter or underscore per grammar: name = (letter | "_") { ... }
579    let first = s.chars().next().unwrap();
580    if !first.is_ascii_alphabetic() && first != '_' {
581        return true;
582    }
583
584    // Starts with 0x/0b (hex/binary literal prefix)
585    if s.starts_with("0x") || s.starts_with("0X") || s.starts_with("0b") || s.starts_with("0B") {
586        return true;
587    }
588
589    // Starts with sign character — always quote to avoid parser ambiguity
590    // (parser may try to interpret as a signed number).
591    if s.starts_with('-') || s.starts_with('+') {
592        return true;
593    }
594
595    // Starts with a digit — could be parsed as a number
596    if first.is_ascii_digit() {
597        return true;
598    }
599
600    false
601}
602
603/// Write a key to the output, quoting if necessary for safe round-trip.
604fn write_key(out: &mut String, key: &str) {
605    if needs_quoting(key) {
606        out.push('"');
607        out.push_str(&escape_string(key));
608        out.push('"');
609    } else {
610        out.push_str(key);
611    }
612}
613
614/// Write a map key per spec grammar: `map_key = string | name | integer`.
615/// Int/UInt are written as-is. String values use `write_key` for quoting.
616/// Other value types (Null, Bool, Float, etc.) are coerced to quoted strings
617/// so that the text format always round-trips through the parser.
618fn write_map_key(out: &mut String, key: &Value) {
619    match key {
620        Value::Int(i) => out.push_str(&i.to_string()),
621        Value::UInt(u) => out.push_str(&u.to_string()),
622        Value::String(s) => write_key(out, s),
623        // Coerce non-spec key types to quoted strings for text format safety
624        Value::Null => out.push_str("\"~\""),
625        Value::Bool(b) => { out.push('"'); out.push_str(if *b { "true" } else { "false" }); out.push('"'); }
626        Value::Float(f) => { out.push('"'); out.push_str(&f.to_string()); out.push('"'); }
627        Value::JsonNumber(s) => { out.push('"'); out.push_str(s); out.push('"'); }
628        Value::Timestamp(ts, tz) => { out.push('"'); out.push_str(&format_timestamp_millis(*ts, *tz)); out.push('"'); }
629        Value::Bytes(b) => {
630            out.push_str("\"0x");
631            for byte in b { out.push_str(&format!("{:02x}", byte)); }
632            out.push('"');
633        }
634        Value::Ref(r) => { out.push('"'); out.push('!'); out.push_str(r); out.push('"'); }
635        Value::Tagged(tag, _) => { out.push('"'); out.push(':'); out.push_str(tag); out.push('"'); }
636        Value::Array(_) | Value::Object(_) | Value::Map(_) => out.push_str("\"\""),
637    }
638}
639
640/// Options controlling TeaLeaf text output format.
641#[derive(Debug, Clone, Copy, PartialEq, Eq)]
642pub struct FormatOptions {
643    /// Remove insignificant whitespace (spaces after `:` and `,`, indentation, blank lines).
644    pub compact: bool,
645    /// Emit whole-number floats without `.0` suffix (e.g., `42.0` → `42`).
646    /// Saves characters/tokens but changes float→int type on re-parse.
647    pub compact_floats: bool,
648}
649
650impl FormatOptions {
651    /// Pretty-printed output (default).
652    pub fn pretty() -> Self {
653        Self { compact: false, compact_floats: false }
654    }
655
656    /// Compact output (whitespace stripped).
657    pub fn compact() -> Self {
658        Self { compact: true, compact_floats: false }
659    }
660
661    /// Enable compact float formatting (strip `.0` from whole-number floats).
662    pub fn with_compact_floats(mut self) -> Self {
663        self.compact_floats = true;
664        self
665    }
666}
667
668impl Default for FormatOptions {
669    fn default() -> Self {
670        Self::pretty()
671    }
672}
673
674pub fn dumps(data: &IndexMap<String, Value>) -> String {
675    dumps_inner(data, &FormatOptions::default())
676}
677
678/// Serialize data to compact TeaLeaf text format (no schemas).
679/// Removes insignificant whitespace for token-efficient output.
680pub fn dumps_compact(data: &IndexMap<String, Value>) -> String {
681    dumps_inner(data, &FormatOptions::compact())
682}
683
684/// Serialize data to TeaLeaf text format with custom options (no schemas).
685pub fn dumps_with_options(data: &IndexMap<String, Value>, opts: &FormatOptions) -> String {
686    dumps_inner(data, opts)
687}
688
689fn dumps_inner(data: &IndexMap<String, Value>, opts: &FormatOptions) -> String {
690    let mut out = String::new();
691    for (key, value) in data {
692        write_key(&mut out, key);
693        out.push_str(kv_sep(opts.compact));
694        write_value(&mut out, value, 0, opts);
695        out.push('\n');
696    }
697    out
698}
699
700/// Returns ", " in pretty mode, "," in compact mode
701#[inline]
702fn sep(compact: bool) -> &'static str {
703    if compact { "," } else { ", " }
704}
705
706/// Returns ": " in pretty mode, ":" in compact mode.
707#[inline]
708fn kv_sep(compact: bool) -> &'static str {
709    if compact { ":" } else { ": " }
710}
711
712/// Escape a string for TeaLeaf text output.
713/// Handles: \\ \" \n \t \r \b \f and \uXXXX for other control characters.
714fn escape_string(s: &str) -> String {
715    let mut out = String::with_capacity(s.len());
716    for c in s.chars() {
717        match c {
718            '\\' => out.push_str("\\\\"),
719            '"' => out.push_str("\\\""),
720            '\n' => out.push_str("\\n"),
721            '\t' => out.push_str("\\t"),
722            '\r' => out.push_str("\\r"),
723            '\u{0008}' => out.push_str("\\b"),
724            '\u{000C}' => out.push_str("\\f"),
725            c if c.is_control() => {
726                // Other control characters use \uXXXX
727                for unit in c.encode_utf16(&mut [0u16; 2]) {
728                    out.push_str(&format!("\\u{:04x}", unit));
729                }
730            }
731            _ => out.push(c),
732        }
733    }
734    out
735}
736
737/// Format a float ensuring it always has a decimal point or uses scientific notation.
738/// Rust's f64::to_string() expands large/small values (e.g., 6.022e23 becomes
739/// "602200000000000000000000"), which would be reparsed as an integer and overflow.
740/// We use scientific notation for values outside a safe range.
741fn format_float(f: f64, compact_floats: bool) -> String {
742    // Handle non-finite values with keywords the lexer recognizes
743    if f.is_nan() {
744        return "NaN".to_string();
745    }
746    if f.is_infinite() {
747        return if f.is_sign_positive() { "inf".to_string() } else { "-inf".to_string() };
748    }
749
750    let s = f.to_string();
751    if s.contains('.') || s.contains('e') || s.contains('E') {
752        // Already has decimal point or scientific notation — safe as-is
753        s
754    } else {
755        // to_string() produced an integer-looking string (no '.' or 'e').
756        // For large values, use scientific notation to avoid i64 overflow on re-parse.
757        // For small values, append ".0" unless compact_floats is enabled.
758        let digits = s.trim_start_matches('-').len();
759        if digits > 15 {
760            format!("{:e}", f)
761        } else if compact_floats {
762            s
763        } else {
764            format!("{}.0", s)
765        }
766    }
767}
768
769fn write_value(out: &mut String, value: &Value, indent: usize, opts: &FormatOptions) {
770    match value {
771        Value::Null => out.push('~'),
772        Value::Bool(b) => out.push_str(if *b { "true" } else { "false" }),
773        Value::Int(i) => out.push_str(&i.to_string()),
774        Value::UInt(u) => out.push_str(&u.to_string()),
775        Value::JsonNumber(s) => out.push_str(s),
776        Value::Float(f) => out.push_str(&format_float(*f, opts.compact_floats)),
777        Value::String(s) => {
778            if needs_quoting(s) {
779                out.push('"');
780                out.push_str(&escape_string(s));
781                out.push('"');
782            } else {
783                out.push_str(s);
784            }
785        }
786        Value::Bytes(b) => {
787            out.push_str("b\"");
788            for byte in b {
789                out.push_str(&format!("{:02x}", byte));
790            }
791            out.push('"');
792        }
793        Value::Array(arr) => {
794            out.push('[');
795            for (i, v) in arr.iter().enumerate() {
796                if i > 0 { out.push_str(sep(opts.compact)); }
797                write_value(out, v, indent, opts);
798            }
799            out.push(']');
800        }
801        Value::Object(obj) => {
802            out.push('{');
803            for (i, (k, v)) in obj.iter().enumerate() {
804                if i > 0 { out.push_str(sep(opts.compact)); }
805                write_key(out, k);
806                out.push_str(kv_sep(opts.compact));
807                write_value(out, v, indent, opts);
808            }
809            out.push('}');
810        }
811        Value::Map(pairs) => {
812            out.push_str(if opts.compact { "@map{" } else { "@map {" });
813            let mut first = true;
814            for (k, v) in pairs {
815                if !first { out.push_str(sep(opts.compact)); }
816                first = false;
817                // Map keys are restricted to string | name | integer per spec.
818                // Write Int/UInt directly; convert other types to quoted strings.
819                write_map_key(out, k);
820                out.push_str(kv_sep(opts.compact));
821                write_value(out, v, indent, opts);
822            }
823            out.push('}');
824        }
825        Value::Ref(r) => {
826            out.push('!');
827            out.push_str(r);
828        }
829        Value::Tagged(tag, inner) => {
830            out.push(':');
831            out.push_str(tag);
832            out.push(' ');
833            write_value(out, inner, indent, opts);
834        }
835        Value::Timestamp(ts, tz) => {
836            out.push_str(&format_timestamp_millis(*ts, *tz));
837        }
838    }
839}
840
841/// Format a Unix-millis timestamp as an ISO 8601 string with timezone offset.
842/// Handles negative timestamps (pre-epoch dates) correctly using Euclidean division.
843/// Years outside [0000, 9999] are clamped to the boundary per spec (4-digit years only).
844/// When tz_offset_minutes is 0, emits 'Z' suffix. Otherwise emits +HH:MM or -HH:MM.
845fn format_timestamp_millis(ts: i64, tz_offset_minutes: i16) -> String {
846    // Clamp to representable ISO 8601 range (years 0000-9999).
847    // Year 0000-01-01T00:00:00Z = -62167219200000 ms
848    // Year 9999-12-31T23:59:59.999Z = 253402300799999 ms
849    const MIN_TS: i64 = -62_167_219_200_000;
850    const MAX_TS: i64 = 253_402_300_799_999;
851    let ts = ts.clamp(MIN_TS, MAX_TS);
852
853    // Apply timezone offset to get local time for display
854    let local_ts = ts + (tz_offset_minutes as i64) * 60_000;
855    let local_ts = local_ts.clamp(MIN_TS, MAX_TS);
856
857    let secs = local_ts.div_euclid(1000);
858    let millis = local_ts.rem_euclid(1000);
859    let days = secs.div_euclid(86400);
860    let time_secs = secs.rem_euclid(86400);
861    let hours = time_secs / 3600;
862    let mins = (time_secs % 3600) / 60;
863    let secs_rem = time_secs % 60;
864    let (year, month, day) = days_to_ymd(days);
865
866    let tz_suffix = if tz_offset_minutes == 0 {
867        "Z".to_string()
868    } else {
869        let sign = if tz_offset_minutes > 0 { '+' } else { '-' };
870        let abs = tz_offset_minutes.unsigned_abs();
871        format!("{}{:02}:{:02}", sign, abs / 60, abs % 60)
872    };
873
874    if millis > 0 {
875        format!("{:04}-{:02}-{:02}T{:02}:{:02}:{:02}.{:03}{}",
876            year, month, day, hours, mins, secs_rem, millis, tz_suffix)
877    } else {
878        format!("{:04}-{:02}-{:02}T{:02}:{:02}:{:02}{}",
879            year, month, day, hours, mins, secs_rem, tz_suffix)
880    }
881}
882
883/// Convert days since Unix epoch to (year, month, day)
884fn days_to_ymd(days: i64) -> (i64, u32, u32) {
885    // Algorithm from Howard Hinnant (extended to i64 for extreme timestamps)
886    let z = days + 719468;
887    let era = if z >= 0 { z } else { z - 146096 } / 146097;
888    let doe = (z - era * 146097) as u32;
889    let yoe = (doe - doe / 1460 + doe / 36524 - doe / 146096) / 365;
890    let y = yoe as i64 + era * 400;
891    let doy = doe - (365 * yoe + yoe / 4 - yoe / 100);
892    let mp = (5 * doy + 2) / 153;
893    let d = doy - (153 * mp + 2) / 5 + 1;
894    let m = if mp < 10 { mp + 3 } else { mp - 9 };
895    let y = if m <= 2 { y + 1 } else { y };
896    (y, m, d)
897}
898
899// =============================================================================
900// Schema Inference
901// =============================================================================
902
903/// Inferred type information for a field
904#[derive(Debug, Clone, PartialEq)]
905enum InferredType {
906    Null,
907    Bool,
908    Int,
909    Float,
910    String,
911    Array(Box<InferredType>),
912    Object(Vec<(String, InferredType)>),  // Ordered fields
913    Mixed,  // Different types seen - fall back to any
914}
915
916impl InferredType {
917    fn merge(&self, other: &InferredType) -> InferredType {
918        if self == other {
919            return self.clone();
920        }
921        match (self, other) {
922            (InferredType::Null, t) | (t, InferredType::Null) => {
923                // Null + T = T (nullable)
924                t.clone()
925            }
926            (InferredType::Int, InferredType::Float) | (InferredType::Float, InferredType::Int) => {
927                InferredType::Float
928            }
929            (InferredType::Array(a), InferredType::Array(b)) => {
930                InferredType::Array(Box::new(a.merge(b)))
931            }
932            (InferredType::Object(a), InferredType::Object(b)) => {
933                // Merge objects: keep fields present in both, track nullability
934                let mut merged = Vec::new();
935                let b_map: IndexMap<&str, &InferredType> = b.iter().map(|(k, v)| (k.as_str(), v)).collect();
936
937                for (key, a_type) in a {
938                    if let Some(b_type) = b_map.get(key.as_str()) {
939                        merged.push((key.clone(), a_type.merge(b_type)));
940                    }
941                    // Fields only in a are dropped (not uniform)
942                }
943
944                // Check if structures are compatible (same fields)
945                if merged.len() == a.len() && merged.len() == b.len() {
946                    InferredType::Object(merged)
947                } else {
948                    InferredType::Mixed
949                }
950            }
951            _ => InferredType::Mixed,
952        }
953    }
954
955    fn to_field_type(&self, schemas: &IndexMap<String, Schema>) -> FieldType {
956        match self {
957            InferredType::Null => FieldType::new("string").nullable(),  // Unknown type, default to string
958            InferredType::Bool => FieldType::new("bool"),
959            InferredType::Int => FieldType::new("int"),
960            InferredType::Float => FieldType::new("float"),
961            InferredType::String => FieldType::new("string"),
962            InferredType::Array(inner) => {
963                let inner_type = inner.to_field_type(schemas);
964                FieldType {
965                    base: inner_type.base,
966                    nullable: inner_type.nullable,
967                    is_array: true,
968                }
969            }
970            InferredType::Object(fields) => {
971                // Check if this matches an existing schema
972                for (name, schema) in schemas {
973                    if schema.fields.len() == fields.len() {
974                        let all_match = schema.fields.iter().all(|sf| {
975                            fields.iter().any(|(k, _)| k == &sf.name)
976                        });
977                        if all_match {
978                            return FieldType::new(name.clone());
979                        }
980                    }
981                }
982                // No matching schema — use "any" (not "object", which is a
983                // value-only type rejected by the parser in schema definitions)
984                FieldType::new("any")
985            }
986            InferredType::Mixed => FieldType::new("any"),
987        }
988    }
989}
990
991fn infer_type(value: &Value) -> InferredType {
992    match value {
993        Value::Null => InferredType::Null,
994        Value::Bool(_) => InferredType::Bool,
995        Value::Int(_) | Value::UInt(_) => InferredType::Int,
996        Value::Float(_) => InferredType::Float,
997        Value::String(_) => InferredType::String,
998        Value::Array(arr) => {
999            if arr.is_empty() {
1000                InferredType::Array(Box::new(InferredType::Mixed))
1001            } else {
1002                let mut element_type = infer_type(&arr[0]);
1003                for item in arr.iter().skip(1) {
1004                    element_type = element_type.merge(&infer_type(item));
1005                }
1006                InferredType::Array(Box::new(element_type))
1007            }
1008        }
1009        Value::Object(obj) => {
1010            let fields: Vec<(String, InferredType)> = obj
1011                .iter()
1012                .map(|(k, v)| (k.clone(), infer_type(v)))
1013                .collect();
1014            InferredType::Object(fields)
1015        }
1016        _ => InferredType::Mixed,
1017    }
1018}
1019
1020/// Singularize a plural name (simple heuristic)
1021fn singularize(name: &str) -> String {
1022    let name = name.to_lowercase();
1023    if name.ends_with("ies") {
1024        format!("{}y", &name[..name.len()-3])
1025    } else if name.ends_with("es") && (name.ends_with("sses") || name.ends_with("xes") || name.ends_with("ches") || name.ends_with("shes")) {
1026        name[..name.len()-2].to_string()
1027    } else if name.len() > 1 && name.ends_with('s') && !name.ends_with("ss") {
1028        name[..name.len()-1].to_string()
1029    } else {
1030        name
1031    }
1032}
1033
1034/// Check if array elements are objects that match a schema's structure
1035fn array_matches_schema(arr: &[Value], schema: &Schema) -> bool {
1036    if arr.is_empty() {
1037        return false;
1038    }
1039
1040    // Check if first element is an object
1041    let first = match &arr[0] {
1042        Value::Object(obj) => obj,
1043        _ => return false,
1044    };
1045
1046    // Get schema field names
1047    let schema_fields: HashSet<_> = schema.fields.iter().map(|f| f.name.as_str()).collect();
1048
1049    // Get object keys
1050    let obj_keys: HashSet<_> = first.keys().map(|k| k.as_str()).collect();
1051
1052    // Check if there's significant overlap (at least 50% of schema fields present)
1053    let overlap = schema_fields.intersection(&obj_keys).count();
1054    let required_overlap = schema_fields.len() / 2;
1055
1056    overlap > required_overlap || overlap == schema_fields.len()
1057}
1058
1059/// Schema inferrer that analyzes data and generates schemas
1060pub struct SchemaInferrer {
1061    schemas: IndexMap<String, Schema>,
1062    schema_order: Vec<String>,  // Track order for output
1063}
1064
1065impl SchemaInferrer {
1066    pub fn new() -> Self {
1067        Self {
1068            schemas: IndexMap::new(),
1069            schema_order: Vec::new(),
1070        }
1071    }
1072
1073    /// Analyze data and infer schemas from uniform object arrays
1074    pub fn infer(&mut self, data: &IndexMap<String, Value>) {
1075        for (key, value) in data {
1076            self.analyze_value(key, value);
1077        }
1078    }
1079
1080    fn analyze_value(&mut self, hint_name: &str, value: &Value) {
1081        if let Value::Array(arr) = value {
1082            self.analyze_array(hint_name, arr);
1083        } else if let Value::Object(obj) = value {
1084            // Recursively analyze nested objects
1085            for (k, v) in obj {
1086                self.analyze_value(k, v);
1087            }
1088        }
1089    }
1090
1091    fn analyze_array(&mut self, hint_name: &str, arr: &[Value]) {
1092        if arr.is_empty() {
1093            return;
1094        }
1095
1096        // Check if all elements are objects with the same structure
1097        let first = match &arr[0] {
1098            Value::Object(obj) => obj,
1099            _ => return,
1100        };
1101
1102        // Collect field names from first object (preserving insertion order)
1103        let field_names: Vec<String> = first.keys().cloned().collect();
1104
1105        // Skip schema inference if fields are empty, or any field name is empty
1106        // or needs quoting — such names can't round-trip through @struct definitions.
1107        if field_names.is_empty()
1108            || field_names.iter().any(|n| n.is_empty() || needs_quoting(n))
1109        {
1110            return;
1111        }
1112
1113        let field_set: std::collections::BTreeSet<&str> = first.keys().map(|k| k.as_str()).collect();
1114
1115        // Verify all objects have the same fields
1116        for item in arr.iter().skip(1) {
1117            if let Value::Object(obj) = item {
1118                let item_set: std::collections::BTreeSet<&str> = obj.keys().map(|k| k.as_str()).collect();
1119                if item_set != field_set {
1120                    return;  // Not uniform
1121                }
1122            } else {
1123                return;  // Not all objects
1124            }
1125        }
1126
1127        // Infer types for each field across all objects
1128        let mut field_types: IndexMap<String, InferredType> = IndexMap::new();
1129        let mut has_null: IndexMap<String, bool> = IndexMap::new();
1130
1131        for item in arr {
1132            if let Value::Object(obj) = item {
1133                for (key, val) in obj {
1134                    let inferred = infer_type(val);
1135                    let is_null = matches!(val, Value::Null);
1136
1137                    *has_null.entry(key.clone()).or_insert(false) |= is_null;
1138
1139                    field_types
1140                        .entry(key.clone())
1141                        .and_modify(|existing| *existing = existing.merge(&inferred))
1142                        .or_insert(inferred);
1143                }
1144            }
1145        }
1146
1147        // Generate schema name from hint
1148        let schema_name = singularize(hint_name);
1149
1150        // Skip if schema already exists
1151        if self.schemas.contains_key(&schema_name) {
1152            return;
1153        }
1154
1155        // Recursively analyze nested fields in field order (depth-first).
1156        // Single pass processes arrays and objects as encountered, matching
1157        // the derive path's field-declaration-order traversal.
1158        for field_name in &field_names {
1159            // Check the first object's value for this field
1160            if let Value::Object(first_obj) = &arr[0] {
1161                match first_obj.get(field_name) {
1162                    Some(Value::Array(nested)) => {
1163                        // Arrays are always analyzed — same-name recursion
1164                        // (e.g., nodes[].nodes[]) is safe because depth-first
1165                        // ensures the inner schema is created first.
1166                        self.analyze_array(field_name, nested);
1167                    }
1168                    Some(Value::Object(_)) => {
1169                        // Skip object fields whose singularized name collides
1170                        // with this array's schema name — prevents
1171                        // self-referencing schemas (e.g., @struct root (root: root)).
1172                        if singularize(field_name) == schema_name {
1173                            continue;
1174                        }
1175
1176                        let nested_objects: Vec<&IndexMap<String, Value>> = arr
1177                            .iter()
1178                            .filter_map(|item| {
1179                                if let Value::Object(obj) = item {
1180                                    if let Some(Value::Object(nested)) = obj.get(field_name) {
1181                                        return Some(nested);
1182                                    }
1183                                }
1184                                None
1185                            })
1186                            .collect();
1187
1188                        if !nested_objects.is_empty() {
1189                            self.analyze_nested_objects(field_name, &nested_objects);
1190                        }
1191                    }
1192                    _ => {}
1193                }
1194            }
1195        }
1196
1197        // Re-check: recursive nested analysis (both arrays and objects) may have
1198        // claimed this schema name. This happens when the same field name appears
1199        // at multiple nesting levels (e.g., "nodes" containing "nodes"). The inner
1200        // schema was created first (depth-first); preserve it to avoid overwriting
1201        // with a different structure.
1202        if self.schemas.contains_key(&schema_name) {
1203            return;
1204        }
1205
1206        // Build schema
1207        let mut schema = Schema::new(&schema_name);
1208
1209        // Use insertion order from first object
1210        for field_name in &field_names {
1211            if let Some(inferred) = field_types.get(field_name) {
1212                let mut field_type = inferred.to_field_type(&self.schemas);
1213
1214                // Mark as nullable if any null values seen
1215                if has_null.get(field_name).copied().unwrap_or(false) {
1216                    field_type.nullable = true;
1217                }
1218
1219                // Check if there's a nested schema for array fields
1220                if let Value::Object(first_obj) = &arr[0] {
1221                    if let Some(Value::Array(nested_arr)) = first_obj.get(field_name) {
1222                        let nested_schema_name = singularize(field_name);
1223                        if let Some(nested_schema) = self.schemas.get(&nested_schema_name) {
1224                            // Verify array elements are objects matching the schema structure
1225                            if array_matches_schema(nested_arr, nested_schema) {
1226                                field_type = FieldType {
1227                                    base: nested_schema_name,
1228                                    nullable: field_type.nullable,
1229                                    is_array: true,
1230                                };
1231                            }
1232                        }
1233                    }
1234                }
1235
1236                // Check if there's a nested schema for object fields
1237                // (skip self-references: field singularizing to the schema being built)
1238                let nested_schema_name = singularize(field_name);
1239                if nested_schema_name != schema_name && self.schemas.contains_key(&nested_schema_name) {
1240                    if matches!(inferred, InferredType::Object(_)) {
1241                        field_type = FieldType {
1242                            base: nested_schema_name,
1243                            nullable: field_type.nullable,
1244                            is_array: false,
1245                        };
1246                    }
1247                }
1248
1249                schema.add_field(field_name, field_type);
1250            }
1251        }
1252
1253        self.schema_order.push(schema_name.clone());
1254        self.schemas.insert(schema_name, schema);
1255    }
1256
1257    /// Analyze a collection of nested objects (from the same field across array items)
1258    /// and create a schema if they have uniform structure
1259    fn analyze_nested_objects(&mut self, field_name: &str, objects: &[&IndexMap<String, Value>]) {
1260        if objects.is_empty() {
1261            return;
1262        }
1263
1264        // Get field names from first object (preserving insertion order)
1265        let first = objects[0];
1266        let nested_field_names: Vec<String> = first.keys().cloned().collect();
1267
1268        // Skip empty objects and objects with field names that can't round-trip
1269        if nested_field_names.is_empty()
1270            || nested_field_names.iter().any(|n| n.is_empty() || needs_quoting(n))
1271        {
1272            return;
1273        }
1274
1275        let field_set: std::collections::BTreeSet<&str> = first.keys().map(|k| k.as_str()).collect();
1276
1277        // Check if all objects have the same fields
1278        for obj in objects.iter().skip(1) {
1279            let obj_set: std::collections::BTreeSet<&str> = obj.keys().map(|k| k.as_str()).collect();
1280            if obj_set != field_set {
1281                return; // Not uniform
1282            }
1283        }
1284
1285        // They're uniform - create a schema
1286        let schema_name = singularize(field_name);
1287
1288        // Skip if schema already exists
1289        if self.schemas.contains_key(&schema_name) {
1290            return;
1291        }
1292
1293        // Infer field types across all objects
1294        let mut field_types: IndexMap<String, InferredType> = IndexMap::new();
1295        let mut has_null: IndexMap<String, bool> = IndexMap::new();
1296
1297        for obj in objects {
1298            for (key, val) in *obj {
1299                let inferred = infer_type(val);
1300                let is_null = matches!(val, Value::Null);
1301
1302                *has_null.entry(key.clone()).or_insert(false) |= is_null;
1303
1304                field_types
1305                    .entry(key.clone())
1306                    .and_modify(|existing| *existing = existing.merge(&inferred))
1307                    .or_insert(inferred);
1308            }
1309        }
1310
1311        // Recursively analyze nested fields in field order (depth-first).
1312        // Single pass mirrors the derive path's field-declaration-order traversal,
1313        // so CLI and Builder API produce schemas in the same order.
1314        for nested_field in &nested_field_names {
1315            if let Some(Value::Array(nested_arr)) = objects[0].get(nested_field) {
1316                self.analyze_array(nested_field, nested_arr);
1317            } else {
1318                let deeper_objects: Vec<&IndexMap<String, Value>> = objects
1319                    .iter()
1320                    .filter_map(|obj| {
1321                        if let Some(Value::Object(nested)) = obj.get(nested_field) {
1322                            Some(nested)
1323                        } else {
1324                            None
1325                        }
1326                    })
1327                    .collect();
1328
1329                if !deeper_objects.is_empty() {
1330                    self.analyze_nested_objects(nested_field, &deeper_objects);
1331                }
1332            }
1333        }
1334
1335        // Build schema
1336        let mut schema = Schema::new(&schema_name);
1337
1338        for nested_field in &nested_field_names {
1339            if let Some(inferred) = field_types.get(nested_field) {
1340                let mut field_type = inferred.to_field_type(&self.schemas);
1341
1342                if has_null.get(nested_field).copied().unwrap_or(false) {
1343                    field_type.nullable = true;
1344                }
1345
1346                // Check if this field has a nested array schema
1347                if matches!(inferred, InferredType::Array(_)) {
1348                    if let Some(Value::Array(nested_arr)) = objects[0].get(nested_field) {
1349                        let nested_schema_name = singularize(nested_field);
1350                        if let Some(nested_schema) = self.schemas.get(&nested_schema_name) {
1351                            if array_matches_schema(nested_arr, nested_schema) {
1352                                field_type = FieldType {
1353                                    base: nested_schema_name,
1354                                    nullable: field_type.nullable,
1355                                    is_array: true,
1356                                };
1357                            }
1358                        }
1359                    }
1360                }
1361
1362                // Check if this field has a nested object schema
1363                if let Some(nested_schema) = self.schemas.get(&singularize(nested_field)) {
1364                    if matches!(inferred, InferredType::Object(_)) {
1365                        field_type = FieldType::new(nested_schema.name.clone());
1366                    }
1367                }
1368
1369                schema.add_field(nested_field, field_type);
1370            }
1371        }
1372
1373        self.schema_order.push(schema_name.clone());
1374        self.schemas.insert(schema_name, schema);
1375    }
1376
1377    pub fn into_schemas(self) -> (IndexMap<String, Schema>, Vec<String>) {
1378        (self.schemas, self.schema_order)
1379    }
1380}
1381
1382impl Default for SchemaInferrer {
1383    fn default() -> Self {
1384        Self::new()
1385    }
1386}
1387
1388/// Serialize data to TeaLeaf text format with schemas
1389pub fn dumps_with_schemas(
1390    data: &IndexMap<String, Value>,
1391    schemas: &IndexMap<String, Schema>,
1392    schema_order: &[String],
1393    unions: &IndexMap<String, Union>,
1394    union_order: &[String],
1395) -> String {
1396    dumps_with_schemas_inner(data, schemas, schema_order, unions, union_order, &FormatOptions::default())
1397}
1398
1399/// Serialize data to compact TeaLeaf text format with schemas.
1400/// Removes insignificant whitespace for token-efficient output.
1401pub fn dumps_with_schemas_compact(
1402    data: &IndexMap<String, Value>,
1403    schemas: &IndexMap<String, Schema>,
1404    schema_order: &[String],
1405    unions: &IndexMap<String, Union>,
1406    union_order: &[String],
1407) -> String {
1408    dumps_with_schemas_inner(data, schemas, schema_order, unions, union_order, &FormatOptions::compact())
1409}
1410
1411/// Serialize data to TeaLeaf text format with schemas and custom options.
1412pub fn dumps_with_schemas_with_options(
1413    data: &IndexMap<String, Value>,
1414    schemas: &IndexMap<String, Schema>,
1415    schema_order: &[String],
1416    unions: &IndexMap<String, Union>,
1417    union_order: &[String],
1418    opts: &FormatOptions,
1419) -> String {
1420    dumps_with_schemas_inner(data, schemas, schema_order, unions, union_order, opts)
1421}
1422
1423fn dumps_with_schemas_inner(
1424    data: &IndexMap<String, Value>,
1425    schemas: &IndexMap<String, Schema>,
1426    schema_order: &[String],
1427    unions: &IndexMap<String, Union>,
1428    union_order: &[String],
1429    opts: &FormatOptions,
1430) -> String {
1431    let mut out = String::new();
1432    let mut has_definitions = false;
1433
1434    // Write union definitions first (before structs, since structs may reference unions)
1435    for name in union_order {
1436        if let Some(union) = unions.get(name) {
1437            out.push_str("@union ");
1438            out.push_str(&union.name);
1439            out.push_str(if opts.compact { "{\n" } else { " {\n" });
1440            for (vi, variant) in union.variants.iter().enumerate() {
1441                if !opts.compact { out.push_str("  "); }
1442                out.push_str(&variant.name);
1443                out.push_str(if opts.compact { "(" } else { " (" });
1444                for (fi, field) in variant.fields.iter().enumerate() {
1445                    if fi > 0 {
1446                        out.push_str(sep(opts.compact));
1447                    }
1448                    out.push_str(&field.name);
1449                    out.push_str(kv_sep(opts.compact));
1450                    out.push_str(&field.field_type.to_string());
1451                }
1452                out.push(')');
1453                if vi < union.variants.len() - 1 {
1454                    out.push(',');
1455                }
1456                out.push('\n');
1457            }
1458            out.push_str("}\n");
1459            has_definitions = true;
1460        }
1461    }
1462
1463    // Write struct schemas in order
1464    for name in schema_order {
1465        if let Some(schema) = schemas.get(name) {
1466            out.push_str("@struct ");
1467            out.push_str(&schema.name);
1468            out.push_str(if opts.compact { "(" } else { " (" });
1469            for (i, field) in schema.fields.iter().enumerate() {
1470                if i > 0 {
1471                    out.push_str(sep(opts.compact));
1472                }
1473                write_key(&mut out, &field.name);
1474                out.push_str(kv_sep(opts.compact));
1475                out.push_str(&field.field_type.to_string());
1476            }
1477            out.push_str(")\n");
1478            has_definitions = true;
1479        }
1480    }
1481
1482    if has_definitions && !opts.compact {
1483        out.push('\n');
1484    }
1485
1486    // Write data (preserves insertion order)
1487    for (key, value) in data {
1488        write_key(&mut out, key);
1489        out.push_str(kv_sep(opts.compact));
1490        write_value_with_schemas(&mut out, value, schemas, Some(key), 0, None, opts);
1491        out.push('\n');
1492    }
1493
1494    out
1495}
1496
1497/// Resolve a schema for a value by trying three strategies in order:
1498/// 1. Declared type from parent schema's field type (exact match)
1499/// 2. Singularize the field key name (works for JSON-inference schemas)
1500/// 3. Case-insensitive singularize (handles derive-macro PascalCase names)
1501fn resolve_schema<'a>(
1502    schemas: &'a IndexMap<String, Schema>,
1503    declared_type: Option<&str>,
1504    hint_name: Option<&str>,
1505) -> Option<&'a Schema> {
1506    // 1. Direct lookup by declared type from parent schema
1507    if let Some(name) = declared_type {
1508        if let Some(s) = schemas.get(name) {
1509            return Some(s);
1510        }
1511    }
1512    // 2. Singularize heuristic (existing behavior for JSON-inference schemas)
1513    if let Some(hint) = hint_name {
1514        let singular = singularize(hint);
1515        if let Some(s) = schemas.get(&singular) {
1516            return Some(s);
1517        }
1518        // 3. Case-insensitive singularize (for derive-macro PascalCase names)
1519        let singular_lower = singular.to_ascii_lowercase();
1520        for (name, schema) in schemas {
1521            if name.to_ascii_lowercase() == singular_lower {
1522                return Some(schema);
1523            }
1524        }
1525    }
1526    None
1527}
1528
1529fn write_value_with_schemas(
1530    out: &mut String,
1531    value: &Value,
1532    schemas: &IndexMap<String, Schema>,
1533    hint_name: Option<&str>,
1534    indent: usize,
1535    declared_type: Option<&str>,
1536    opts: &FormatOptions,
1537) {
1538    match value {
1539        Value::Null => out.push('~'),
1540        Value::Bool(b) => out.push_str(if *b { "true" } else { "false" }),
1541        Value::Int(i) => out.push_str(&i.to_string()),
1542        Value::UInt(u) => out.push_str(&u.to_string()),
1543        Value::JsonNumber(s) => out.push_str(s),
1544        Value::Float(f) => out.push_str(&format_float(*f, opts.compact_floats)),
1545        Value::String(s) => {
1546            if needs_quoting(s) {
1547                out.push('"');
1548                out.push_str(&escape_string(s));
1549                out.push('"');
1550            } else {
1551                out.push_str(s);
1552            }
1553        }
1554        Value::Bytes(b) => {
1555            out.push_str("b\"");
1556            for byte in b {
1557                out.push_str(&format!("{:02x}", byte));
1558            }
1559            out.push('"');
1560        }
1561        Value::Array(arr) => {
1562            // Check if this array can use @table format.
1563            // Try name-based resolution first, then structural matching as fallback.
1564            let mut schema = resolve_schema(schemas, declared_type, hint_name);
1565
1566            // Structural fallback: if name-based resolution failed, find a schema
1567            // whose fields exactly match the first element's object keys.
1568            // This handles Builder-path documents where the top-level key name
1569            // (e.g., "orders") doesn't match the schema name (e.g., "SalesOrder").
1570            if schema.is_none() {
1571                if let Some(Value::Object(first_obj)) = arr.first() {
1572                    let obj_keys: HashSet<&str> = first_obj.keys().map(|k| k.as_str()).collect();
1573                    for (_, candidate) in schemas {
1574                        let schema_fields: HashSet<&str> = candidate.fields.iter().map(|f| f.name.as_str()).collect();
1575                        if schema_fields == obj_keys {
1576                            schema = Some(candidate);
1577                            break;
1578                        }
1579                    }
1580                }
1581            }
1582
1583            if let Some(schema) = schema {
1584                // Verify the first element is an object whose fields match the schema.
1585                // A name-only lookup isn't enough — if the same field name appears at
1586                // multiple nesting levels with different shapes, the schema may belong
1587                // to a different level. Applying the wrong schema drops unmatched keys.
1588                let schema_matches = if let Some(Value::Object(first_obj)) = arr.first() {
1589                    let schema_fields: HashSet<&str> = schema.fields.iter().map(|f| f.name.as_str()).collect();
1590                    let obj_keys: HashSet<&str> = first_obj.keys().map(|k| k.as_str()).collect();
1591                    schema_fields == obj_keys
1592                } else {
1593                    false
1594                };
1595
1596                if schema_matches {
1597                    out.push_str("@table ");
1598                    out.push_str(&schema.name);
1599                    out.push_str(if opts.compact { "[\n" } else { " [\n" });
1600
1601                    let inner_indent = if opts.compact { 0 } else { indent + 2 };
1602                    for (i, item) in arr.iter().enumerate() {
1603                        if !opts.compact {
1604                            for _ in 0..inner_indent {
1605                                out.push(' ');
1606                            }
1607                        }
1608                        write_tuple(out, item, schema, schemas, inner_indent, opts);
1609                        if i < arr.len() - 1 {
1610                            out.push(',');
1611                        }
1612                        out.push('\n');
1613                    }
1614
1615                    if !opts.compact {
1616                        for _ in 0..indent {
1617                            out.push(' ');
1618                        }
1619                    }
1620                    out.push(']');
1621                    return;
1622                }
1623            }
1624
1625            // Fall back to regular array format
1626            out.push('[');
1627            for (i, v) in arr.iter().enumerate() {
1628                if i > 0 {
1629                    out.push_str(sep(opts.compact));
1630                }
1631                write_value_with_schemas(out, v, schemas, None, indent, None, opts);
1632            }
1633            out.push(']');
1634        }
1635        Value::Object(obj) => {
1636            // Find the schema for this object so we can propagate field types to children.
1637            // Try name-based resolution first, then structural matching as fallback.
1638            let mut obj_schema = resolve_schema(schemas, declared_type, hint_name);
1639
1640            if obj_schema.is_none() {
1641                let obj_keys: HashSet<&str> = obj.keys().map(|k| k.as_str()).collect();
1642                for (_, candidate) in schemas {
1643                    let schema_fields: HashSet<&str> = candidate.fields.iter().map(|f| f.name.as_str()).collect();
1644                    if schema_fields == obj_keys {
1645                        obj_schema = Some(candidate);
1646                        break;
1647                    }
1648                }
1649            }
1650
1651            out.push('{');
1652            for (i, (k, v)) in obj.iter().enumerate() {
1653                if i > 0 {
1654                    out.push_str(sep(opts.compact));
1655                }
1656                write_key(out, k);
1657                out.push_str(kv_sep(opts.compact));
1658                // Look up this field's declared type from the parent schema
1659                let field_type = obj_schema.and_then(|s| {
1660                    s.fields.iter()
1661                        .find(|f| f.name == *k)
1662                        .map(|f| f.field_type.base.as_str())
1663                });
1664                write_value_with_schemas(out, v, schemas, Some(k), indent, field_type, opts);
1665            }
1666            out.push('}');
1667        }
1668        Value::Map(pairs) => {
1669            out.push_str(if opts.compact { "@map{" } else { "@map {" });
1670            let mut first = true;
1671            for (k, v) in pairs {
1672                if !first {
1673                    out.push_str(sep(opts.compact));
1674                }
1675                first = false;
1676                write_map_key(out, k);
1677                out.push_str(kv_sep(opts.compact));
1678                write_value_with_schemas(out, v, schemas, None, indent, None, opts);
1679            }
1680            out.push('}');
1681        }
1682        Value::Ref(r) => {
1683            out.push('!');
1684            out.push_str(r);
1685        }
1686        Value::Tagged(tag, inner) => {
1687            out.push(':');
1688            out.push_str(tag);
1689            out.push(' ');
1690            write_value_with_schemas(out, inner, schemas, None, indent, None, opts);
1691        }
1692        Value::Timestamp(ts, tz) => {
1693            out.push_str(&format_timestamp_millis(*ts, *tz));
1694        }
1695    }
1696}
1697
1698fn write_tuple(
1699    out: &mut String,
1700    value: &Value,
1701    schema: &Schema,
1702    schemas: &IndexMap<String, Schema>,
1703    indent: usize,
1704    opts: &FormatOptions,
1705) {
1706    if let Value::Object(obj) = value {
1707        out.push('(');
1708        for (i, field) in schema.fields.iter().enumerate() {
1709            if i > 0 {
1710                out.push_str(sep(opts.compact));
1711            }
1712            if let Some(v) = obj.get(&field.name) {
1713                let type_base = field.field_type.base.as_str();
1714                // For array fields with a known schema type, write tuples directly without @table
1715                if field.field_type.is_array {
1716                    if let Some(item_schema) = resolve_schema(schemas, Some(type_base), None) {
1717                        // The schema defines the element type - write array with tuples directly
1718                        write_schema_array(out, v, item_schema, schemas, indent, opts);
1719                    } else {
1720                        // No schema for element type - use regular array format
1721                        write_value_with_schemas(out, v, schemas, None, indent, None, opts);
1722                    }
1723                } else if resolve_schema(schemas, Some(type_base), None).is_some() {
1724                    // Non-array field with schema type - write as nested tuple
1725                    let nested_schema = resolve_schema(schemas, Some(type_base), None).unwrap();
1726                    write_tuple(out, v, nested_schema, schemas, indent, opts);
1727                } else {
1728                    write_value_with_schemas(out, v, schemas, None, indent, None, opts);
1729                }
1730            } else {
1731                out.push('~');
1732            }
1733        }
1734        out.push(')');
1735    } else {
1736        write_value_with_schemas(out, value, schemas, None, indent, None, opts);
1737    }
1738}
1739
1740/// Write an array of schema-typed values as tuples (without @table annotation)
1741fn write_schema_array(
1742    out: &mut String,
1743    value: &Value,
1744    schema: &Schema,
1745    schemas: &IndexMap<String, Schema>,
1746    indent: usize,
1747    opts: &FormatOptions,
1748) {
1749    if let Value::Array(arr) = value {
1750        if arr.is_empty() {
1751            out.push_str("[]");
1752            return;
1753        }
1754
1755        out.push_str("[\n");
1756        let inner_indent = if opts.compact { 0 } else { indent + 2 };
1757        for (i, item) in arr.iter().enumerate() {
1758            if !opts.compact {
1759                for _ in 0..inner_indent {
1760                    out.push(' ');
1761                }
1762            }
1763            write_tuple(out, item, schema, schemas, inner_indent, opts);
1764            if i < arr.len() - 1 {
1765                out.push(',');
1766            }
1767            out.push('\n');
1768        }
1769        if !opts.compact {
1770            for _ in 0..indent {
1771                out.push(' ');
1772            }
1773        }
1774        out.push(']');
1775    } else {
1776        // Not an array - fall back to regular value writing
1777        write_value_with_schemas(out, value, schemas, None, indent, None, opts);
1778    }
1779}
1780
1781#[cfg(test)]
1782mod tests {
1783    use super::*;
1784
1785    #[test]
1786    fn test_serde_json_number_behavior() {
1787        // Test how serde_json handles different number formats
1788        let json_str = r#"{"int": 42, "float_whole": 42.0, "float_frac": 42.5}"#;
1789        let parsed: serde_json::Value = serde_json::from_str(json_str).unwrap();
1790
1791        if let serde_json::Value::Object(obj) = parsed {
1792            let int_num = obj.get("int").unwrap().as_number().unwrap();
1793            let float_whole = obj.get("float_whole").unwrap().as_number().unwrap();
1794            let float_frac = obj.get("float_frac").unwrap().as_number().unwrap();
1795
1796            println!("int (42): is_i64={}, is_u64={}, is_f64={}",
1797                int_num.is_i64(), int_num.is_u64(), int_num.is_f64());
1798            println!("float_whole (42.0): is_i64={}, is_u64={}, is_f64={}",
1799                float_whole.is_i64(), float_whole.is_u64(), float_whole.is_f64());
1800            println!("float_frac (42.5): is_i64={}, is_u64={}, is_f64={}",
1801                float_frac.is_i64(), float_frac.is_u64(), float_frac.is_f64());
1802
1803            // Assert expected behavior
1804            assert!(int_num.is_i64(), "42 should be parsed as i64");
1805            assert!(float_whole.is_f64(), "42.0 should be parsed as f64");
1806            assert!(float_frac.is_f64(), "42.5 should be parsed as f64");
1807        }
1808
1809        // Test how Rust formats floats
1810        println!("Rust float formatting:");
1811        println!("  42.0f64.to_string() = '{}'", 42.0f64.to_string());
1812        println!("  42.5f64.to_string() = '{}'", 42.5f64.to_string());
1813
1814        // This is the problem! Rust's to_string() drops the .0
1815        // We need to ensure floats always have a decimal point
1816    }
1817
1818    #[test]
1819    fn test_parse_simple() {
1820        let doc = TeaLeaf::parse(r#"
1821            name: alice
1822            age: 30
1823            active: true
1824        "#).unwrap();
1825        
1826        assert_eq!(doc.get("name").unwrap().as_str(), Some("alice"));
1827        assert_eq!(doc.get("age").unwrap().as_int(), Some(30));
1828        assert_eq!(doc.get("active").unwrap().as_bool(), Some(true));
1829    }
1830
1831    #[test]
1832    fn test_parse_struct() {
1833        let doc = TeaLeaf::parse(r#"
1834            @struct user (id: int, name: string, email: string?)
1835            users: @table user [
1836                (1, alice, "alice@test.com"),
1837                (2, bob, ~),
1838            ]
1839        "#).unwrap();
1840        
1841        let schema = doc.schema("user").unwrap();
1842        assert_eq!(schema.fields.len(), 3);
1843        assert!(schema.fields[2].field_type.nullable);
1844        
1845        let users = doc.get("users").unwrap().as_array().unwrap();
1846        assert_eq!(users.len(), 2);
1847    }
1848
1849    #[test]
1850    fn test_nested_struct() {
1851        let doc = TeaLeaf::parse(r#"
1852            @struct address (city: string, zip: string)
1853            @struct user (id: int, name: string, home: address)
1854            users: @table user [
1855                (1, alice, (Berlin, "10115")),
1856                (2, bob, (Paris, "75001")),
1857            ]
1858        "#).unwrap();
1859        
1860        let users = doc.get("users").unwrap().as_array().unwrap();
1861        let alice = users[0].as_object().unwrap();
1862        let home = alice.get("home").unwrap().as_object().unwrap();
1863        assert_eq!(home.get("city").unwrap().as_str(), Some("Berlin"));
1864    }
1865
1866    #[test]
1867    fn test_three_level_nesting() {
1868        let doc = TeaLeaf::parse(r#"
1869            @struct method (type: string, last4: string)
1870            @struct payment (amount: float, method: method)
1871            @struct order (id: int, payment: payment)
1872            orders: @table order [
1873                (1, (99.99, (credit, "4242"))),
1874            ]
1875        "#).unwrap();
1876        
1877        let orders = doc.get("orders").unwrap().as_array().unwrap();
1878        let order = orders[0].as_object().unwrap();
1879        let payment = order.get("payment").unwrap().as_object().unwrap();
1880        let method = payment.get("method").unwrap().as_object().unwrap();
1881        assert_eq!(method.get("type").unwrap().as_str(), Some("credit"));
1882    }
1883
1884    #[test]
1885    fn test_json_roundtrip_basic() {
1886        let json = r#"{"name":"alice","age":30,"active":true,"score":95.5}"#;
1887        let doc = TeaLeaf::from_json(json).unwrap();
1888
1889        assert_eq!(doc.get("name").unwrap().as_str(), Some("alice"));
1890        assert_eq!(doc.get("age").unwrap().as_int(), Some(30));
1891        assert_eq!(doc.get("active").unwrap().as_bool(), Some(true));
1892        assert_eq!(doc.get("score").unwrap().as_float(), Some(95.5));
1893
1894        // Round-trip back to JSON
1895        let json_out = doc.to_json().unwrap();
1896        assert!(json_out.contains("\"name\":\"alice\"") || json_out.contains("\"name\": \"alice\""));
1897    }
1898
1899    #[test]
1900    fn test_json_roundtrip_root_array() {
1901        // Root-level arrays should round-trip without wrapping
1902        let json = r#"[{"id":"0001","type":"donut","name":"Cake"},{"id":"0002","type":"donut","name":"Raised"}]"#;
1903        let doc = TeaLeaf::from_json(json).unwrap();
1904
1905        // Internally stored under "root" key
1906        let root = doc.get("root").unwrap();
1907        let arr = root.as_array().unwrap();
1908        assert_eq!(arr.len(), 2);
1909
1910        // Round-trip should produce the array directly, NOT {"root": [...]}
1911        let json_out = doc.to_json_compact().unwrap();
1912        assert!(json_out.starts_with('['), "Root array should serialize directly: {}", json_out);
1913        assert!(json_out.ends_with(']'), "Root array should end with ]: {}", json_out);
1914        assert!(!json_out.contains("\"root\""), "Should NOT wrap in root object: {}", json_out);
1915
1916        // Verify content preserved
1917        assert!(json_out.contains("\"id\":\"0001\"") || json_out.contains("\"id\": \"0001\""));
1918        assert!(json_out.contains("\"name\":\"Cake\"") || json_out.contains("\"name\": \"Cake\""));
1919    }
1920
1921    #[test]
1922    fn test_json_roundtrip_root_array_empty() {
1923        // Empty array should also round-trip correctly
1924        let json = r#"[]"#;
1925        let doc = TeaLeaf::from_json(json).unwrap();
1926
1927        let json_out = doc.to_json_compact().unwrap();
1928        assert_eq!(json_out, "[]", "Empty array should round-trip: {}", json_out);
1929    }
1930
1931    #[test]
1932    fn test_json_roundtrip_root_object_with_root_key() {
1933        // An object that happens to have a "root" key should NOT be confused
1934        let json = r#"{"root":[1,2,3],"other":"value"}"#;
1935        let doc = TeaLeaf::from_json(json).unwrap();
1936
1937        let json_out = doc.to_json_compact().unwrap();
1938        // This was a root object, so it should stay as an object
1939        assert!(json_out.starts_with('{'), "Root object should stay as object: {}", json_out);
1940        assert!(json_out.contains("\"root\""), "root key should be preserved: {}", json_out);
1941        assert!(json_out.contains("\"other\""), "other key should be preserved: {}", json_out);
1942    }
1943
1944    #[test]
1945    fn test_json_export_bytes() {
1946        // Create a document with bytes programmatically
1947        let mut entries = IndexMap::new();
1948        entries.insert("data".to_string(), Value::Bytes(vec![0xca, 0xfe, 0xf0, 0x0d]));
1949        let doc = TeaLeaf { data: entries, schemas: IndexMap::new(), unions: IndexMap::new(), is_root_array: false };
1950
1951        let json = doc.to_json().unwrap();
1952        assert!(json.contains("0xcafef00d"), "Bytes should export as hex string: {}", json);
1953    }
1954
1955    #[test]
1956    fn test_json_export_ref() {
1957        let mut entries = IndexMap::new();
1958        entries.insert("config".to_string(), Value::Ref("base_config".to_string()));
1959        let doc = TeaLeaf { data: entries, schemas: IndexMap::new(), unions: IndexMap::new(), is_root_array: false };
1960
1961        let json = doc.to_json().unwrap();
1962        assert!(json.contains("\"$ref\""), "Ref should export with $ref key: {}", json);
1963        assert!(json.contains("base_config"), "Ref name should be in output: {}", json);
1964    }
1965
1966    #[test]
1967    fn test_json_export_tagged() {
1968        let mut entries = IndexMap::new();
1969        entries.insert("status".to_string(), Value::Tagged("ok".to_string(), Box::new(Value::Int(200))));
1970        let doc = TeaLeaf { data: entries, schemas: IndexMap::new(), unions: IndexMap::new(), is_root_array: false };
1971
1972        let json = doc.to_json().unwrap();
1973        assert!(json.contains("\"$tag\""), "Tagged should export with $tag key: {}", json);
1974        assert!(json.contains("\"ok\""), "Tag name should be in output: {}", json);
1975        assert!(json.contains("\"$value\""), "Tagged should have $value key: {}", json);
1976    }
1977
1978    #[test]
1979    fn test_json_export_map() {
1980        let mut entries = IndexMap::new();
1981        entries.insert("lookup".to_string(), Value::Map(vec![
1982            (Value::Int(1), Value::String("one".to_string())),
1983            (Value::Int(2), Value::String("two".to_string())),
1984        ]));
1985        let doc = TeaLeaf { data: entries, schemas: IndexMap::new(), unions: IndexMap::new(), is_root_array: false };
1986
1987        let json = doc.to_json().unwrap();
1988        // Map exports as array of [key, value] pairs
1989        // Check that the structure contains the key and values (regardless of formatting)
1990        assert!(json.contains("\"lookup\""), "Map key should be in output: {}", json);
1991        assert!(json.contains("\"one\""), "Map values should be in output: {}", json);
1992        assert!(json.contains("\"two\""), "Map values should be in output: {}", json);
1993        // Verify it's an array structure (has nested arrays)
1994        let compact = json.replace(" ", "").replace("\n", "");
1995        assert!(compact.contains("[["), "Map should export as nested array: {}", json);
1996    }
1997
1998    #[test]
1999    fn test_json_export_timestamp() {
2000        let mut entries = IndexMap::new();
2001        // 2024-01-15T10:30:00Z = 1705315800000 ms, but let's verify with a known value
2002        // Use 0 = 1970-01-01T00:00:00Z for simplicity
2003        entries.insert("created".to_string(), Value::Timestamp(0, 0));
2004        let doc = TeaLeaf { data: entries, schemas: IndexMap::new(), unions: IndexMap::new(), is_root_array: false };
2005
2006        let json = doc.to_json().unwrap();
2007        assert!(json.contains("1970-01-01"), "Timestamp should export as ISO 8601 date: {}", json);
2008        assert!(json.contains("00:00:00"), "Timestamp time should be epoch: {}", json);
2009    }
2010
2011    #[test]
2012    fn test_json_import_limitation_ref_becomes_object() {
2013        // JSON with $ref pattern should become a plain object, NOT a Ref value
2014        let json = r#"{"config":{"$ref":"base_config"}}"#;
2015        let doc = TeaLeaf::from_json(json).unwrap();
2016
2017        let config = doc.get("config").unwrap();
2018        // This should be an Object, not a Ref
2019        assert!(config.as_object().is_some(), "JSON $ref should become Object, not Ref");
2020        assert!(config.as_ref_name().is_none(), "JSON $ref should NOT become Ref value");
2021    }
2022
2023    #[test]
2024    fn test_json_import_limitation_tagged_becomes_object() {
2025        // JSON with $tag/$value pattern should become a plain object, NOT a Tagged value
2026        let json = r#"{"status":{"$tag":"ok","$value":200}}"#;
2027        let doc = TeaLeaf::from_json(json).unwrap();
2028
2029        let status = doc.get("status").unwrap();
2030        // This should be an Object, not a Tagged
2031        assert!(status.as_object().is_some(), "JSON $tag should become Object, not Tagged");
2032        assert!(status.as_tagged().is_none(), "JSON $tag should NOT become Tagged value");
2033    }
2034
2035    #[test]
2036    fn test_json_import_limitation_timestamp_becomes_string() {
2037        // ISO 8601 strings in JSON should remain strings, NOT become Timestamp
2038        let json = r#"{"created":"2024-01-15T10:30:00Z"}"#;
2039        let doc = TeaLeaf::from_json(json).unwrap();
2040
2041        let created = doc.get("created").unwrap();
2042        // This should be a String, not a Timestamp
2043        assert!(created.as_str().is_some(), "ISO timestamp string should remain String");
2044        assert!(created.as_timestamp_millis().is_none(), "ISO timestamp should NOT become Timestamp value");
2045    }
2046
2047    // =========================================================================
2048    // JSON ↔ Binary Conversion Tests
2049    // =========================================================================
2050
2051    #[test]
2052    fn test_json_to_binary_roundtrip_primitives() {
2053        use tempfile::NamedTempFile;
2054
2055        let json = r#"{"name":"alice","age":30,"score":95.5,"active":true,"nothing":null}"#;
2056        let doc = TeaLeaf::from_json(json).unwrap();
2057
2058        // Compile to binary
2059        let temp = NamedTempFile::new().unwrap();
2060        let path = temp.path();
2061        doc.compile(path, false).unwrap();
2062
2063        // Read back
2064        let reader = Reader::open(path).unwrap();
2065        assert_eq!(reader.get("name").unwrap().as_str(), Some("alice"));
2066        assert_eq!(reader.get("age").unwrap().as_int(), Some(30));
2067        assert_eq!(reader.get("score").unwrap().as_float(), Some(95.5));
2068        assert_eq!(reader.get("active").unwrap().as_bool(), Some(true));
2069        assert!(reader.get("nothing").unwrap().is_null());
2070    }
2071
2072    #[test]
2073    fn test_json_to_binary_roundtrip_arrays() {
2074        use tempfile::NamedTempFile;
2075
2076        let json = r#"{"numbers":[1,2,3,4,5],"names":["alice","bob","charlie"]}"#;
2077        let doc = TeaLeaf::from_json(json).unwrap();
2078
2079        let temp = NamedTempFile::new().unwrap();
2080        doc.compile(temp.path(), false).unwrap();
2081
2082        let reader = Reader::open(temp.path()).unwrap();
2083
2084        let numbers = reader.get("numbers").unwrap();
2085        let arr = numbers.as_array().unwrap();
2086        assert_eq!(arr.len(), 5);
2087        assert_eq!(arr[0].as_int(), Some(1));
2088        assert_eq!(arr[4].as_int(), Some(5));
2089
2090        let names = reader.get("names").unwrap();
2091        let arr = names.as_array().unwrap();
2092        assert_eq!(arr.len(), 3);
2093        assert_eq!(arr[0].as_str(), Some("alice"));
2094    }
2095
2096    #[test]
2097    fn test_json_to_binary_roundtrip_nested_objects() {
2098        use tempfile::NamedTempFile;
2099
2100        let json = r#"{"user":{"name":"alice","profile":{"bio":"dev","settings":{"theme":"dark"}}}}"#;
2101        let doc = TeaLeaf::from_json(json).unwrap();
2102
2103        let temp = NamedTempFile::new().unwrap();
2104        doc.compile(temp.path(), false).unwrap();
2105
2106        let reader = Reader::open(temp.path()).unwrap();
2107        let user = reader.get("user").unwrap();
2108        let user_obj = user.as_object().unwrap();
2109        assert_eq!(user_obj.get("name").unwrap().as_str(), Some("alice"));
2110
2111        let profile = user_obj.get("profile").unwrap().as_object().unwrap();
2112        assert_eq!(profile.get("bio").unwrap().as_str(), Some("dev"));
2113
2114        let settings = profile.get("settings").unwrap().as_object().unwrap();
2115        assert_eq!(settings.get("theme").unwrap().as_str(), Some("dark"));
2116    }
2117
2118    #[test]
2119    fn test_json_to_binary_with_compression() {
2120        use tempfile::NamedTempFile;
2121
2122        // Create a document with repetitive data to test compression
2123        let mut entries = IndexMap::new();
2124        entries.insert("data".to_string(), Value::String("a".repeat(1000)));
2125        entries.insert("count".to_string(), Value::Int(12345));
2126        let doc = TeaLeaf { data: entries, schemas: IndexMap::new(), unions: IndexMap::new(), is_root_array: false };
2127
2128        let temp = NamedTempFile::new().unwrap();
2129        doc.compile(temp.path(), true).unwrap(); // compressed
2130
2131        let reader = Reader::open(temp.path()).unwrap();
2132        assert_eq!(reader.get("data").unwrap().as_str(), Some("a".repeat(1000).as_str()));
2133        assert_eq!(reader.get("count").unwrap().as_int(), Some(12345));
2134    }
2135
2136    #[test]
2137    fn test_tl_to_binary_preserves_ref() {
2138        use tempfile::NamedTempFile;
2139
2140        let mut entries = IndexMap::new();
2141        entries.insert("base".to_string(), Value::Object(vec![
2142            ("host".to_string(), Value::String("localhost".to_string())),
2143        ].into_iter().collect()));
2144        entries.insert("config".to_string(), Value::Ref("base".to_string()));
2145        let doc = TeaLeaf { data: entries, schemas: IndexMap::new(), unions: IndexMap::new(), is_root_array: false };
2146
2147        let temp = NamedTempFile::new().unwrap();
2148        doc.compile(temp.path(), false).unwrap();
2149
2150        let reader = Reader::open(temp.path()).unwrap();
2151        let config = reader.get("config").unwrap();
2152        assert_eq!(config.as_ref_name(), Some("base"));
2153    }
2154
2155    #[test]
2156    fn test_tl_to_binary_preserves_tagged() {
2157        use tempfile::NamedTempFile;
2158
2159        let mut entries = IndexMap::new();
2160        entries.insert("status".to_string(), Value::Tagged("ok".to_string(), Box::new(Value::Int(200))));
2161        let doc = TeaLeaf { data: entries, schemas: IndexMap::new(), unions: IndexMap::new(), is_root_array: false };
2162
2163        let temp = NamedTempFile::new().unwrap();
2164        doc.compile(temp.path(), false).unwrap();
2165
2166        let reader = Reader::open(temp.path()).unwrap();
2167        let status = reader.get("status").unwrap();
2168        let (tag, value) = status.as_tagged().unwrap();
2169        assert_eq!(tag, "ok");
2170        assert_eq!(value.as_int(), Some(200));
2171    }
2172
2173    #[test]
2174    fn test_tl_to_binary_preserves_map() {
2175        use tempfile::NamedTempFile;
2176
2177        let mut entries = IndexMap::new();
2178        entries.insert("lookup".to_string(), Value::Map(vec![
2179            (Value::Int(1), Value::String("one".to_string())),
2180            (Value::Int(2), Value::String("two".to_string())),
2181        ]));
2182        let doc = TeaLeaf { data: entries, schemas: IndexMap::new(), unions: IndexMap::new(), is_root_array: false };
2183
2184        let temp = NamedTempFile::new().unwrap();
2185        doc.compile(temp.path(), false).unwrap();
2186
2187        let reader = Reader::open(temp.path()).unwrap();
2188        let lookup = reader.get("lookup").unwrap();
2189        let map = lookup.as_map().unwrap();
2190        assert_eq!(map.len(), 2);
2191        assert_eq!(map[0].0.as_int(), Some(1));
2192        assert_eq!(map[0].1.as_str(), Some("one"));
2193    }
2194
2195    #[test]
2196    fn test_tl_to_binary_preserves_bytes() {
2197        use tempfile::NamedTempFile;
2198
2199        let mut entries = IndexMap::new();
2200        entries.insert("data".to_string(), Value::Bytes(vec![0xca, 0xfe, 0xf0, 0x0d]));
2201        let doc = TeaLeaf { data: entries, schemas: IndexMap::new(), unions: IndexMap::new(), is_root_array: false };
2202
2203        let temp = NamedTempFile::new().unwrap();
2204        doc.compile(temp.path(), false).unwrap();
2205
2206        let reader = Reader::open(temp.path()).unwrap();
2207        let data = reader.get("data").unwrap();
2208        assert_eq!(data.as_bytes(), Some(vec![0xca, 0xfe, 0xf0, 0x0d].as_slice()));
2209    }
2210
2211    #[test]
2212    fn test_tl_to_binary_preserves_timestamp() {
2213        use tempfile::NamedTempFile;
2214
2215        let mut entries = IndexMap::new();
2216        entries.insert("created".to_string(), Value::Timestamp(1705315800000, 0)); // 2024-01-15T10:30:00Z
2217        let doc = TeaLeaf { data: entries, schemas: IndexMap::new(), unions: IndexMap::new(), is_root_array: false };
2218
2219        let temp = NamedTempFile::new().unwrap();
2220        doc.compile(temp.path(), false).unwrap();
2221
2222        let reader = Reader::open(temp.path()).unwrap();
2223        let created = reader.get("created").unwrap();
2224        assert_eq!(created.as_timestamp_millis(), Some(1705315800000));
2225    }
2226
2227    #[test]
2228    fn test_json_import_limitation_hex_string_remains_string() {
2229        // Hex strings in JSON should remain strings, NOT become Bytes
2230        let json = r#"{"data":"0xcafef00d"}"#;
2231        let doc = TeaLeaf::from_json(json).unwrap();
2232
2233        let data = doc.get("data").unwrap();
2234        // This should be a String, not Bytes
2235        assert!(data.as_str().is_some(), "Hex string should remain String");
2236        assert_eq!(data.as_str(), Some("0xcafef00d"));
2237        assert!(data.as_bytes().is_none(), "Hex string should NOT become Bytes value");
2238    }
2239
2240    #[test]
2241    fn test_json_import_limitation_array_pairs_remain_array() {
2242        // JSON arrays that look like map pairs should remain arrays, NOT become Maps
2243        let json = r#"{"lookup":[[1,"one"],[2,"two"]]}"#;
2244        let doc = TeaLeaf::from_json(json).unwrap();
2245
2246        let lookup = doc.get("lookup").unwrap();
2247        // This should be an Array, not a Map
2248        assert!(lookup.as_array().is_some(), "Array of pairs should remain Array");
2249        assert!(lookup.as_map().is_none(), "Array of pairs should NOT become Map value");
2250
2251        // Verify structure
2252        let arr = lookup.as_array().unwrap();
2253        assert_eq!(arr.len(), 2);
2254        let first_pair = arr[0].as_array().unwrap();
2255        assert_eq!(first_pair[0].as_int(), Some(1));
2256        assert_eq!(first_pair[1].as_str(), Some("one"));
2257    }
2258
2259    // =========================================================================
2260    // Cross-Language Parity Test
2261    // =========================================================================
2262
2263    #[test]
2264    fn test_cross_language_parity_all_types() {
2265        // This test verifies that Rust JSON export matches expected format
2266        // for ALL special types. The same fixture is tested in .NET.
2267
2268        use tempfile::NamedTempFile;
2269
2270        // Create a document with all special types
2271        let mut data = IndexMap::new();
2272        data.insert("null_val".to_string(), Value::Null);
2273        data.insert("bool_true".to_string(), Value::Bool(true));
2274        data.insert("int_val".to_string(), Value::Int(42));
2275        data.insert("float_val".to_string(), Value::Float(3.14159));
2276        data.insert("string_val".to_string(), Value::String("hello".to_string()));
2277        data.insert("bytes_val".to_string(), Value::Bytes(vec![0xca, 0xfe]));
2278        data.insert("timestamp_val".to_string(), Value::Timestamp(0, 0));
2279        data.insert("array_val".to_string(), Value::Array(vec![Value::Int(1), Value::Int(2)]));
2280        data.insert("object_val".to_string(), Value::Object(
2281            vec![("x".to_string(), Value::Int(1))].into_iter().collect()
2282        ));
2283        data.insert("ref_val".to_string(), Value::Ref("object_val".to_string()));
2284        data.insert("tagged_val".to_string(), Value::Tagged("ok".to_string(), Box::new(Value::Int(200))));
2285        data.insert("map_val".to_string(), Value::Map(vec![
2286            (Value::Int(1), Value::String("one".to_string())),
2287        ]));
2288
2289        let doc = TeaLeaf { data, schemas: IndexMap::new(), unions: IndexMap::new(), is_root_array: false };
2290
2291        // Compile to binary and read back
2292        let temp = NamedTempFile::new().unwrap();
2293        doc.compile(temp.path(), false).unwrap();
2294        let reader = Reader::open(temp.path()).unwrap();
2295
2296        // Verify each type survives binary round-trip
2297        assert!(reader.get("null_val").unwrap().is_null());
2298        assert_eq!(reader.get("bool_true").unwrap().as_bool(), Some(true));
2299        assert_eq!(reader.get("int_val").unwrap().as_int(), Some(42));
2300        assert_eq!(reader.get("float_val").unwrap().as_float(), Some(3.14159));
2301        assert_eq!(reader.get("string_val").unwrap().as_str(), Some("hello"));
2302        assert_eq!(reader.get("bytes_val").unwrap().as_bytes(), Some(&[0xca, 0xfe][..]));
2303        assert_eq!(reader.get("timestamp_val").unwrap().as_timestamp_millis(), Some(0));
2304
2305        let arr = reader.get("array_val").unwrap();
2306        assert_eq!(arr.as_array().unwrap().len(), 2);
2307
2308        let obj = reader.get("object_val").unwrap();
2309        assert!(obj.as_object().is_some());
2310
2311        let ref_val = reader.get("ref_val").unwrap();
2312        assert_eq!(ref_val.as_ref_name(), Some("object_val"));
2313
2314        let tagged = reader.get("tagged_val").unwrap();
2315        let (tag, val) = tagged.as_tagged().unwrap();
2316        assert_eq!(tag, "ok");
2317        assert_eq!(val.as_int(), Some(200));
2318
2319        let map = reader.get("map_val").unwrap();
2320        let pairs = map.as_map().unwrap();
2321        assert_eq!(pairs.len(), 1);
2322
2323        // Verify JSON export format matches expected conventions
2324        let json = doc.to_json().unwrap();
2325
2326        // Bytes should be hex string
2327        assert!(json.contains("0xcafe"), "Bytes should export as hex: {}", json);
2328
2329        // Ref should have $ref key
2330        assert!(json.contains("\"$ref\""), "Ref should have $ref key: {}", json);
2331
2332        // Tagged should have $tag and $value
2333        assert!(json.contains("\"$tag\""), "Tagged should have $tag: {}", json);
2334        assert!(json.contains("\"$value\""), "Tagged should have $value: {}", json);
2335
2336        // Map should be array of pairs (nested arrays)
2337        let compact = json.replace(" ", "").replace("\n", "");
2338        assert!(compact.contains("[["), "Map should export as array of pairs: {}", json);
2339
2340        // Timestamp should be ISO 8601
2341        assert!(json.contains("1970-01-01"), "Timestamp should be ISO 8601: {}", json);
2342    }
2343
2344    // =========================================================================
2345    // JSON Conversion Contract Tests
2346    // =========================================================================
2347    // These tests lock down the exact JSON↔TeaLeaf conversion behavior.
2348    // STABILITY POLICY:
2349    // - Plain JSON roundtrip: MUST be lossless for primitives, arrays, objects
2350    // - TeaLeaf→JSON: Special types have FIXED representations that MUST NOT change
2351    // - JSON→TeaLeaf: No magic parsing; $ref/$tag/hex/ISO8601 stay as plain JSON
2352
2353    mod conversion_contracts {
2354        use super::*;
2355
2356        // --- Plain JSON Roundtrip (STABLE) ---
2357
2358        #[test]
2359        fn contract_null_roundtrip() {
2360            let doc = TeaLeaf::from_json("null").unwrap();
2361            assert!(matches!(doc.get("root").unwrap(), Value::Null));
2362        }
2363
2364        #[test]
2365        fn contract_bool_roundtrip() {
2366            let doc = TeaLeaf::from_json(r#"{"t": true, "f": false}"#).unwrap();
2367            assert_eq!(doc.get("t").unwrap().as_bool(), Some(true));
2368            assert_eq!(doc.get("f").unwrap().as_bool(), Some(false));
2369
2370            let json = doc.to_json_compact().unwrap();
2371            assert!(json.contains("true"));
2372            assert!(json.contains("false"));
2373        }
2374
2375        #[test]
2376        fn contract_integer_roundtrip() {
2377            let doc = TeaLeaf::from_json(r#"{"zero": 0, "pos": 42, "neg": -123}"#).unwrap();
2378            assert_eq!(doc.get("zero").unwrap().as_int(), Some(0));
2379            assert_eq!(doc.get("pos").unwrap().as_int(), Some(42));
2380            assert_eq!(doc.get("neg").unwrap().as_int(), Some(-123));
2381        }
2382
2383        #[test]
2384        fn contract_float_roundtrip() {
2385            let doc = TeaLeaf::from_json(r#"{"pi": 3.14159}"#).unwrap();
2386            let pi = doc.get("pi").unwrap().as_float().unwrap();
2387            assert!((pi - 3.14159).abs() < 0.00001);
2388        }
2389
2390        #[test]
2391        fn contract_string_roundtrip() {
2392            let doc = TeaLeaf::from_json(r#"{"s": "hello world", "u": "日本語", "e": ""}"#).unwrap();
2393            assert_eq!(doc.get("s").unwrap().as_str(), Some("hello world"));
2394            assert_eq!(doc.get("u").unwrap().as_str(), Some("日本語"));
2395            assert_eq!(doc.get("e").unwrap().as_str(), Some(""));
2396        }
2397
2398        #[test]
2399        fn contract_array_roundtrip() {
2400            let doc = TeaLeaf::from_json(r#"{"arr": [1, "two", true, null]}"#).unwrap();
2401            let arr = doc.get("arr").unwrap().as_array().unwrap();
2402            assert_eq!(arr.len(), 4);
2403            assert_eq!(arr[0].as_int(), Some(1));
2404            assert_eq!(arr[1].as_str(), Some("two"));
2405            assert_eq!(arr[2].as_bool(), Some(true));
2406            assert!(matches!(arr[3], Value::Null));
2407        }
2408
2409        #[test]
2410        fn contract_nested_array_roundtrip() {
2411            let doc = TeaLeaf::from_json(r#"{"matrix": [[1, 2], [3, 4]]}"#).unwrap();
2412            let matrix = doc.get("matrix").unwrap().as_array().unwrap();
2413            assert_eq!(matrix.len(), 2);
2414            let row0 = matrix[0].as_array().unwrap();
2415            assert_eq!(row0[0].as_int(), Some(1));
2416            assert_eq!(row0[1].as_int(), Some(2));
2417        }
2418
2419        #[test]
2420        fn contract_object_roundtrip() {
2421            let doc = TeaLeaf::from_json(r#"{"user": {"name": "alice", "age": 30}}"#).unwrap();
2422            let user = doc.get("user").unwrap().as_object().unwrap();
2423            assert_eq!(user.get("name").unwrap().as_str(), Some("alice"));
2424            assert_eq!(user.get("age").unwrap().as_int(), Some(30));
2425        }
2426
2427        // --- TeaLeaf→JSON Fixed Representations (STABLE) ---
2428
2429        #[test]
2430        fn contract_bytes_to_json_hex() {
2431            let mut data = IndexMap::new();
2432            data.insert("b".to_string(), Value::Bytes(vec![0xca, 0xfe, 0xba, 0xbe]));
2433            let doc = TeaLeaf { data, schemas: IndexMap::new(), unions: IndexMap::new(), is_root_array: false };
2434
2435            let json = doc.to_json_compact().unwrap();
2436            // CONTRACT: Bytes serialize as lowercase hex with 0x prefix
2437            assert!(json.contains("\"0xcafebabe\""), "Bytes must be 0x-prefixed hex: {}", json);
2438        }
2439
2440        #[test]
2441        fn contract_bytes_empty_to_json() {
2442            let mut data = IndexMap::new();
2443            data.insert("b".to_string(), Value::Bytes(vec![]));
2444            let doc = TeaLeaf { data, schemas: IndexMap::new(), unions: IndexMap::new(), is_root_array: false };
2445
2446            let json = doc.to_json_compact().unwrap();
2447            // CONTRACT: Empty bytes serialize as "0x"
2448            assert!(json.contains("\"0x\""), "Empty bytes must be \"0x\": {}", json);
2449        }
2450
2451        #[test]
2452        fn contract_timestamp_to_json_iso8601() {
2453            let mut data = IndexMap::new();
2454            // 2024-01-15T10:50:00.123Z (verified milliseconds since epoch)
2455            data.insert("ts".to_string(), Value::Timestamp(1705315800123, 0));
2456            let doc = TeaLeaf { data, schemas: IndexMap::new(), unions: IndexMap::new(), is_root_array: false };
2457
2458            let json = doc.to_json_compact().unwrap();
2459            // CONTRACT: Timestamp serializes as ISO 8601 with milliseconds
2460            assert!(json.contains("2024-01-15T10:50:00.123Z"),
2461                "Timestamp must be ISO 8601 with ms: {}", json);
2462        }
2463
2464        #[test]
2465        fn contract_timestamp_epoch_to_json() {
2466            let mut data = IndexMap::new();
2467            data.insert("ts".to_string(), Value::Timestamp(0, 0));
2468            let doc = TeaLeaf { data, schemas: IndexMap::new(), unions: IndexMap::new(), is_root_array: false };
2469
2470            let json = doc.to_json_compact().unwrap();
2471            // CONTRACT: Unix epoch is 1970-01-01T00:00:00Z (no ms for whole seconds)
2472            assert!(json.contains("1970-01-01T00:00:00Z"),
2473                "Epoch must be 1970-01-01T00:00:00Z: {}", json);
2474        }
2475
2476        #[test]
2477        fn contract_ref_to_json() {
2478            let mut data = IndexMap::new();
2479            data.insert("r".to_string(), Value::Ref("target_key".to_string()));
2480            let doc = TeaLeaf { data, schemas: IndexMap::new(), unions: IndexMap::new(), is_root_array: false };
2481
2482            let json = doc.to_json_compact().unwrap();
2483            // CONTRACT: Ref serializes as {"$ref": "name"}
2484            assert!(json.contains("\"$ref\":\"target_key\"") || json.contains("\"$ref\": \"target_key\""),
2485                "Ref must be {{\"$ref\": \"name\"}}: {}", json);
2486        }
2487
2488        #[test]
2489        fn contract_tagged_to_json() {
2490            let mut data = IndexMap::new();
2491            data.insert("t".to_string(), Value::Tagged("ok".to_string(), Box::new(Value::Int(200))));
2492            let doc = TeaLeaf { data, schemas: IndexMap::new(), unions: IndexMap::new(), is_root_array: false };
2493
2494            let json = doc.to_json_compact().unwrap();
2495            // CONTRACT: Tagged serializes with $tag and $value keys
2496            assert!(json.contains("\"$tag\""), "Tagged must have $tag: {}", json);
2497            assert!(json.contains("\"ok\""), "Tag name must be present: {}", json);
2498            assert!(json.contains("\"$value\""), "Tagged must have $value: {}", json);
2499            assert!(json.contains("200"), "Inner value must be present: {}", json);
2500        }
2501
2502        #[test]
2503        fn contract_tagged_null_value_to_json() {
2504            let mut data = IndexMap::new();
2505            data.insert("t".to_string(), Value::Tagged("none".to_string(), Box::new(Value::Null)));
2506            let doc = TeaLeaf { data, schemas: IndexMap::new(), unions: IndexMap::new(), is_root_array: false };
2507
2508            let json = doc.to_json_compact().unwrap();
2509            // CONTRACT: Tagged with null inner still has $value: null
2510            assert!(json.contains("\"$value\":null") || json.contains("\"$value\": null"),
2511                "Tagged with null must have $value:null: {}", json);
2512        }
2513
2514        #[test]
2515        fn contract_map_to_json_pairs() {
2516            let mut data = IndexMap::new();
2517            data.insert("m".to_string(), Value::Map(vec![
2518                (Value::Int(1), Value::String("one".to_string())),
2519                (Value::Int(2), Value::String("two".to_string())),
2520            ]));
2521            let doc = TeaLeaf { data, schemas: IndexMap::new(), unions: IndexMap::new(), is_root_array: false };
2522
2523            let json = doc.to_json_compact().unwrap();
2524            // CONTRACT: Map serializes as array of [key, value] pairs
2525            assert!(json.contains("[[1,\"one\"],[2,\"two\"]]") ||
2526                    json.contains("[[1, \"one\"], [2, \"two\"]]"),
2527                "Map must be [[k,v],...]: {}", json);
2528        }
2529
2530        #[test]
2531        fn contract_map_empty_to_json() {
2532            let mut data = IndexMap::new();
2533            data.insert("m".to_string(), Value::Map(vec![]));
2534            let doc = TeaLeaf { data, schemas: IndexMap::new(), unions: IndexMap::new(), is_root_array: false };
2535
2536            let json = doc.to_json_compact().unwrap();
2537            // CONTRACT: Empty map serializes as empty array
2538            assert!(json.contains("\"m\":[]") || json.contains("\"m\": []"),
2539                "Empty map must be []: {}", json);
2540        }
2541
2542        // --- JSON→TeaLeaf No Magic (STABLE) ---
2543
2544        #[test]
2545        fn contract_json_dollar_ref_stays_object() {
2546            let doc = TeaLeaf::from_json(r#"{"x": {"$ref": "some_key"}}"#).unwrap();
2547            let x = doc.get("x").unwrap();
2548            // CONTRACT: JSON {"$ref": ...} MUST remain Object, NOT become Ref
2549            assert!(x.as_object().is_some(), "$ref in JSON must stay Object, not become Ref");
2550            assert!(x.as_ref_name().is_none(), "$ref must not auto-convert to Ref type");
2551        }
2552
2553        #[test]
2554        fn contract_json_dollar_tag_stays_object() {
2555            let doc = TeaLeaf::from_json(r#"{"x": {"$tag": "ok", "$value": 200}}"#).unwrap();
2556            let x = doc.get("x").unwrap();
2557            // CONTRACT: JSON {"$tag": ..., "$value": ...} MUST remain Object
2558            assert!(x.as_object().is_some(), "$tag in JSON must stay Object, not become Tagged");
2559            assert!(x.as_tagged().is_none(), "$tag must not auto-convert to Tagged type");
2560        }
2561
2562        #[test]
2563        fn contract_json_hex_string_stays_string() {
2564            let doc = TeaLeaf::from_json(r#"{"x": "0xcafef00d"}"#).unwrap();
2565            let x = doc.get("x").unwrap();
2566            // CONTRACT: Hex strings MUST remain String, NOT become Bytes
2567            assert_eq!(x.as_str(), Some("0xcafef00d"));
2568            assert!(x.as_bytes().is_none(), "Hex string must not auto-convert to Bytes");
2569        }
2570
2571        #[test]
2572        fn contract_json_iso_timestamp_stays_string() {
2573            let doc = TeaLeaf::from_json(r#"{"x": "2024-01-15T10:30:00.000Z"}"#).unwrap();
2574            let x = doc.get("x").unwrap();
2575            // CONTRACT: ISO 8601 strings MUST remain String, NOT become Timestamp
2576            assert_eq!(x.as_str(), Some("2024-01-15T10:30:00.000Z"));
2577            assert!(x.as_timestamp_millis().is_none(), "ISO string must not auto-convert to Timestamp");
2578        }
2579
2580        #[test]
2581        fn contract_json_array_pairs_stays_array() {
2582            let doc = TeaLeaf::from_json(r#"{"x": [[1, "one"], [2, "two"]]}"#).unwrap();
2583            let x = doc.get("x").unwrap();
2584            // CONTRACT: Array of pairs MUST remain Array, NOT become Map
2585            assert!(x.as_array().is_some(), "Array of pairs must stay Array, not become Map");
2586            assert!(x.as_map().is_none(), "Array pairs must not auto-convert to Map");
2587        }
2588
2589        // --- Number Type Inference (STABLE) ---
2590
2591        #[test]
2592        fn contract_number_integer_to_int() {
2593            let doc = TeaLeaf::from_json(r#"{"n": 42}"#).unwrap();
2594            // CONTRACT: Integers that fit i64 become Int
2595            assert!(doc.get("n").unwrap().as_int().is_some());
2596        }
2597
2598        #[test]
2599        fn contract_number_large_to_uint() {
2600            // Max u64 = 18446744073709551615, which doesn't fit i64
2601            let doc = TeaLeaf::from_json(r#"{"n": 18446744073709551615}"#).unwrap();
2602            // CONTRACT: Large positive integers that fit u64 become UInt
2603            assert!(doc.get("n").unwrap().as_uint().is_some());
2604        }
2605
2606        #[test]
2607        fn contract_number_decimal_to_float() {
2608            let doc = TeaLeaf::from_json(r#"{"n": 3.14}"#).unwrap();
2609            // CONTRACT: Numbers with decimals become Float
2610            assert!(doc.get("n").unwrap().as_float().is_some());
2611        }
2612
2613        // --- Edge Cases (STABLE) ---
2614
2615        #[test]
2616        fn contract_float_nan_to_null() {
2617            let mut data = IndexMap::new();
2618            data.insert("f".to_string(), Value::Float(f64::NAN));
2619            let doc = TeaLeaf { data, schemas: IndexMap::new(), unions: IndexMap::new(), is_root_array: false };
2620
2621            let json = doc.to_json_compact().unwrap();
2622            // CONTRACT: NaN serializes as null (JSON has no NaN)
2623            assert!(json.contains("null"), "NaN must serialize as null: {}", json);
2624        }
2625
2626        #[test]
2627        fn contract_float_infinity_to_null() {
2628            let mut data = IndexMap::new();
2629            data.insert("f".to_string(), Value::Float(f64::INFINITY));
2630            let doc = TeaLeaf { data, schemas: IndexMap::new(), unions: IndexMap::new(), is_root_array: false };
2631
2632            let json = doc.to_json_compact().unwrap();
2633            // CONTRACT: Infinity serializes as null (JSON has no Infinity)
2634            assert!(json.contains("null"), "Infinity must serialize as null: {}", json);
2635        }
2636
2637        #[test]
2638        fn contract_deep_nesting_preserved() {
2639            let doc = TeaLeaf::from_json(r#"{"a":{"b":{"c":{"d":{"e":5}}}}}"#).unwrap();
2640            let a = doc.get("a").unwrap().as_object().unwrap();
2641            let b = a.get("b").unwrap().as_object().unwrap();
2642            let c = b.get("c").unwrap().as_object().unwrap();
2643            let d = c.get("d").unwrap().as_object().unwrap();
2644            assert_eq!(d.get("e").unwrap().as_int(), Some(5));
2645        }
2646    }
2647
2648    // =========================================================================
2649    // Schema Inference Tests
2650    // =========================================================================
2651
2652    #[test]
2653    fn test_schema_inference_simple_array() {
2654        let json = r#"{"users": [{"name": "alice", "age": 30}, {"name": "bob", "age": 25}]}"#;
2655        let doc = TeaLeaf::from_json_with_schemas(json).unwrap();
2656
2657        // Should have inferred a "user" schema
2658        let schema = doc.schema("user");
2659        assert!(schema.is_some(), "Should infer 'user' schema from 'users' array");
2660
2661        let schema = schema.unwrap();
2662        assert_eq!(schema.fields.len(), 2);
2663
2664        // Fields should preserve insertion order from JSON
2665        assert_eq!(schema.fields[0].name, "name");
2666        assert_eq!(schema.fields[1].name, "age");
2667
2668        // Data should still be accessible
2669        let users = doc.get("users").unwrap().as_array().unwrap();
2670        assert_eq!(users.len(), 2);
2671        assert_eq!(users[0].as_object().unwrap().get("name").unwrap().as_str(), Some("alice"));
2672    }
2673
2674    #[test]
2675    fn test_schema_inference_nested_arrays() {
2676        let json = r#"{
2677            "orders": [
2678                {"id": 1, "items": [{"sku": "A", "qty": 2}, {"sku": "B", "qty": 1}]},
2679                {"id": 2, "items": [{"sku": "C", "qty": 3}]}
2680            ]
2681        }"#;
2682        let doc = TeaLeaf::from_json_with_schemas(json).unwrap();
2683
2684        // Should infer both "order" and "item" schemas
2685        assert!(doc.schema("order").is_some(), "Should infer 'order' schema");
2686        assert!(doc.schema("item").is_some(), "Should infer 'item' schema");
2687
2688        let order_schema = doc.schema("order").unwrap();
2689        // Order should have "id" and "items" fields
2690        assert!(order_schema.fields.iter().any(|f| f.name == "id"));
2691        assert!(order_schema.fields.iter().any(|f| f.name == "items"));
2692
2693        // The "items" field should reference the "item" schema
2694        let items_field = order_schema.fields.iter().find(|f| f.name == "items").unwrap();
2695        assert!(items_field.field_type.is_array);
2696        assert_eq!(items_field.field_type.base, "item");
2697    }
2698
2699    #[test]
2700    fn test_schema_inference_to_tl_text() {
2701        let json = r#"{"products": [{"name": "Widget", "price": 9.99}, {"name": "Gadget", "price": 19.99}]}"#;
2702        let doc = TeaLeaf::from_json_with_schemas(json).unwrap();
2703
2704        let tl_text = doc.to_tl_with_schemas();
2705
2706        // Should contain struct definition
2707        assert!(tl_text.contains("@struct product"), "TeaLeaf text should contain struct definition");
2708        assert!(tl_text.contains("name: string"), "Struct should have name field");
2709        assert!(tl_text.contains("price: float"), "Struct should have price field");
2710
2711        // Should contain @table directive
2712        assert!(tl_text.contains("@table product"), "TeaLeaf text should use @table for data");
2713
2714        // Should contain tuple format data
2715        assert!(tl_text.contains("Widget") || tl_text.contains("\"Widget\""), "Data should be present");
2716    }
2717
2718    #[test]
2719    fn test_schema_inference_roundtrip() {
2720        let json = r#"{"items": [{"id": 1, "name": "A"}, {"id": 2, "name": "B"}]}"#;
2721        let doc = TeaLeaf::from_json_with_schemas(json).unwrap();
2722
2723        // Convert to TeaLeaf text with schemas
2724        let tl_text = doc.to_tl_with_schemas();
2725
2726        // Parse the TeaLeaf text back
2727        let parsed = TeaLeaf::parse(&tl_text).unwrap();
2728
2729        // Should have the same data
2730        let items = parsed.get("items").unwrap().as_array().unwrap();
2731        assert_eq!(items.len(), 2);
2732        assert_eq!(items[0].as_object().unwrap().get("id").unwrap().as_int(), Some(1));
2733        assert_eq!(items[0].as_object().unwrap().get("name").unwrap().as_str(), Some("A"));
2734
2735        // Should have the schema
2736        assert!(parsed.schema("item").is_some());
2737    }
2738
2739    #[test]
2740    fn test_schema_inference_nullable_fields() {
2741        let json = r#"{"users": [{"name": "alice", "email": "a@test.com"}, {"name": "bob", "email": null}]}"#;
2742        let doc = TeaLeaf::from_json_with_schemas(json).unwrap();
2743
2744        let schema = doc.schema("user").unwrap();
2745        let email_field = schema.fields.iter().find(|f| f.name == "email").unwrap();
2746
2747        // Email should be nullable since one value is null
2748        assert!(email_field.field_type.nullable, "Field with null values should be nullable");
2749    }
2750
2751    #[test]
2752    fn test_schema_inference_nested_tuples_no_redundant_table() {
2753        let json = r#"{
2754            "orders": [
2755                {"id": 1, "items": [{"sku": "A", "qty": 2}]}
2756            ]
2757        }"#;
2758        let doc = TeaLeaf::from_json_with_schemas(json).unwrap();
2759        let tl_text = doc.to_tl_with_schemas();
2760
2761        // Count occurrences of @table - should only appear at top level for each schema-typed array
2762        let _table_count = tl_text.matches("@table").count();
2763
2764        // Should have @table for orders, but NOT redundant @table for nested items
2765        // The nested items array should just be [...] with tuples inside
2766        assert!(tl_text.contains("@table order"), "Should have @table for orders");
2767
2768        // Parse and verify the structure is correct
2769        let parsed = TeaLeaf::parse(&tl_text).unwrap();
2770        let orders = parsed.get("orders").unwrap().as_array().unwrap();
2771        let first_order = orders[0].as_object().unwrap();
2772        let items = first_order.get("items").unwrap().as_array().unwrap();
2773        assert_eq!(items[0].as_object().unwrap().get("sku").unwrap().as_str(), Some("A"));
2774    }
2775
2776    #[test]
2777    fn test_schema_inference_mismatched_arrays_not_matched() {
2778        // Test that arrays with different structures don't incorrectly share schemas
2779        let json = r#"{
2780            "users": [{"id": "U1", "name": "Alice"}],
2781            "products": [{"id": "P1", "price": 9.99}]
2782        }"#;
2783        let doc = TeaLeaf::from_json_with_schemas(json).unwrap();
2784
2785        // Should have separate schemas
2786        assert!(doc.schema("user").is_some());
2787        assert!(doc.schema("product").is_some());
2788
2789        // User schema should have name field
2790        let user_schema = doc.schema("user").unwrap();
2791        assert!(user_schema.fields.iter().any(|f| f.name == "name"));
2792
2793        // Product schema should have price field
2794        let product_schema = doc.schema("product").unwrap();
2795        assert!(product_schema.fields.iter().any(|f| f.name == "price"));
2796    }
2797
2798    #[test]
2799    fn test_schema_inference_special_char_quoting() {
2800        // Test that strings with special characters are properly quoted
2801        let json = r#"{"items": [
2802            {"category": "Electronics/Audio", "email": "test@example.com", "path": "a.b.c"}
2803        ]}"#;
2804        let doc = TeaLeaf::from_json_with_schemas(json).unwrap();
2805        let tl_text = doc.to_tl_with_schemas();
2806
2807        // These should be quoted in output since they contain special characters
2808        assert!(tl_text.contains("\"Electronics/Audio\""), "Slash should be quoted: {}", tl_text);
2809        assert!(tl_text.contains("\"test@example.com\""), "@ should be quoted: {}", tl_text);
2810        // Dots are valid in names per spec grammar, so a.b.c should NOT be quoted
2811        assert!(!tl_text.contains("\"a.b.c\""), "Dots should NOT be quoted per spec grammar: {}", tl_text);
2812
2813        // Should parse back correctly
2814        let parsed = TeaLeaf::parse(&tl_text).unwrap();
2815        let items = parsed.get("items").unwrap().as_array().unwrap();
2816        let item = items[0].as_object().unwrap();
2817        assert_eq!(item.get("category").unwrap().as_str(), Some("Electronics/Audio"));
2818        assert_eq!(item.get("email").unwrap().as_str(), Some("test@example.com"));
2819    }
2820
2821    #[test]
2822    fn test_schema_inference_nested_objects() {
2823        // Test that nested objects within array elements get schemas created
2824        let json = r#"{
2825            "customers": [
2826                {
2827                    "id": 1,
2828                    "name": "Alice",
2829                    "billing_address": {
2830                        "street": "123 Main St",
2831                        "city": "Boston",
2832                        "state": "MA",
2833                        "postal_code": "02101",
2834                        "country": "USA"
2835                    },
2836                    "shipping_address": {
2837                        "street": "456 Oak Ave",
2838                        "city": "Cambridge",
2839                        "state": "MA",
2840                        "postal_code": "02139",
2841                        "country": "USA"
2842                    }
2843                },
2844                {
2845                    "id": 2,
2846                    "name": "Bob",
2847                    "billing_address": {
2848                        "street": "789 Elm St",
2849                        "city": "New York",
2850                        "state": "NY",
2851                        "postal_code": "10001",
2852                        "country": "USA"
2853                    },
2854                    "shipping_address": {
2855                        "street": "789 Elm St",
2856                        "city": "New York",
2857                        "state": "NY",
2858                        "postal_code": "10001",
2859                        "country": "USA"
2860                    }
2861                }
2862            ]
2863        }"#;
2864
2865        let doc = TeaLeaf::from_json_with_schemas(json).unwrap();
2866
2867        // Should have schemas for nested objects
2868        assert!(doc.schema("billing_address").is_some(), "Should create billing_address schema");
2869        assert!(doc.schema("shipping_address").is_some(), "Should create shipping_address schema");
2870        assert!(doc.schema("customer").is_some(), "Should create customer schema");
2871
2872        // Check billing_address schema fields
2873        let billing_schema = doc.schema("billing_address").unwrap();
2874        let billing_fields: Vec<&str> = billing_schema.fields.iter().map(|f| f.name.as_str()).collect();
2875        assert!(billing_fields.contains(&"street"), "billing_address should have street field");
2876        assert!(billing_fields.contains(&"city"), "billing_address should have city field");
2877        assert!(billing_fields.contains(&"state"), "billing_address should have state field");
2878        assert!(billing_fields.contains(&"postal_code"), "billing_address should have postal_code field");
2879        assert!(billing_fields.contains(&"country"), "billing_address should have country field");
2880
2881        // Check customer schema references the nested schemas
2882        let customer_schema = doc.schema("customer").unwrap();
2883        let billing_field = customer_schema.fields.iter().find(|f| f.name == "billing_address").unwrap();
2884        assert_eq!(billing_field.field_type.base, "billing_address", "customer.billing_address should reference billing_address schema");
2885
2886        let shipping_field = customer_schema.fields.iter().find(|f| f.name == "shipping_address").unwrap();
2887        assert_eq!(shipping_field.field_type.base, "shipping_address", "customer.shipping_address should reference shipping_address schema");
2888
2889        // Serialize and verify output
2890        let tl_text = doc.to_tl_with_schemas();
2891        assert!(tl_text.contains("@struct billing_address"), "Output should contain billing_address struct");
2892        assert!(tl_text.contains("@struct shipping_address"), "Output should contain shipping_address struct");
2893        assert!(tl_text.contains("billing_address: billing_address"), "customer should have billing_address field with billing_address type");
2894        assert!(tl_text.contains("shipping_address: shipping_address"), "customer should have shipping_address field with shipping_address type");
2895    }
2896
2897    #[test]
2898    fn test_schema_inference_nested_objects_with_nulls() {
2899        // Test that nested objects handle nullable fields correctly
2900        let json = r#"{
2901            "orders": [
2902                {
2903                    "id": 1,
2904                    "customer": {
2905                        "name": "Alice",
2906                        "phone": "555-1234"
2907                    }
2908                },
2909                {
2910                    "id": 2,
2911                    "customer": {
2912                        "name": "Bob",
2913                        "phone": null
2914                    }
2915                }
2916            ]
2917        }"#;
2918
2919        let doc = TeaLeaf::from_json_with_schemas(json).unwrap();
2920
2921        // Customer schema should exist with nullable phone
2922        let customer_schema = doc.schema("customer").unwrap();
2923        let phone_field = customer_schema.fields.iter().find(|f| f.name == "phone").unwrap();
2924        assert!(phone_field.field_type.nullable, "phone field should be nullable");
2925    }
2926
2927    // =========================================================================
2928    // Coverage: dumps(), write_value(), escape_string(), format_float()
2929    // =========================================================================
2930
2931    #[test]
2932    fn test_dumps_all_value_types() {
2933        let mut data = IndexMap::new();
2934        data.insert("null_val".to_string(), Value::Null);
2935        data.insert("bool_val".to_string(), Value::Bool(true));
2936        data.insert("int_val".to_string(), Value::Int(42));
2937        data.insert("uint_val".to_string(), Value::UInt(999));
2938        data.insert("float_val".to_string(), Value::Float(3.14));
2939        data.insert("str_val".to_string(), Value::String("hello".to_string()));
2940        data.insert("bytes_val".to_string(), Value::Bytes(vec![0xca, 0xfe]));
2941        data.insert("arr_val".to_string(), Value::Array(vec![Value::Int(1), Value::Int(2)]));
2942        data.insert("obj_val".to_string(), Value::Object(
2943            vec![("x".to_string(), Value::Int(1))].into_iter().collect()
2944        ));
2945        data.insert("map_val".to_string(), Value::Map(vec![
2946            (Value::Int(1), Value::String("one".to_string())),
2947        ]));
2948        data.insert("ref_val".to_string(), Value::Ref("target".to_string()));
2949        data.insert("tag_val".to_string(), Value::Tagged("ok".to_string(), Box::new(Value::Int(200))));
2950        data.insert("ts_val".to_string(), Value::Timestamp(0, 0));
2951        data.insert("ts_millis".to_string(), Value::Timestamp(1705315800123, 0));
2952
2953        let output = dumps(&data);
2954
2955        assert!(output.contains("~"), "Should contain null");
2956        assert!(output.contains("true"), "Should contain bool");
2957        assert!(output.contains("42"), "Should contain int");
2958        assert!(output.contains("999"), "Should contain uint");
2959        assert!(output.contains("3.14"), "Should contain float");
2960        assert!(output.contains("hello"), "Should contain string");
2961        assert!(output.contains("b\"cafe\""), "Should contain bytes literal");
2962        assert!(output.contains("[1, 2]"), "Should contain array");
2963        assert!(output.contains("@map {"), "Should contain map");
2964        assert!(output.contains("!target"), "Should contain ref");
2965        assert!(output.contains(":ok 200"), "Should contain tagged");
2966        assert!(output.contains("1970-01-01T00:00:00Z"), "Should contain epoch timestamp");
2967        assert!(output.contains(".123Z"), "Should contain millis timestamp");
2968    }
2969
2970    #[test]
2971    fn test_bytes_literal_text_roundtrip() {
2972        // dumps() emits b"..." → parse() reads it back as Value::Bytes
2973        let mut data = IndexMap::new();
2974        data.insert("payload".to_string(), Value::Bytes(vec![0xca, 0xfe, 0xf0, 0x0d]));
2975        data.insert("empty".to_string(), Value::Bytes(vec![]));
2976
2977        let text = dumps(&data);
2978        assert!(text.contains(r#"b"cafef00d""#), "Should emit b\"...\" literal: {}", text);
2979        assert!(text.contains(r#"b"""#), "Should emit empty bytes literal: {}", text);
2980
2981        // Parse the text back
2982        let doc = TeaLeaf::parse(&text).unwrap();
2983        assert_eq!(doc.data.get("payload").unwrap().as_bytes(), Some(&[0xca, 0xfe, 0xf0, 0x0d][..]));
2984        assert_eq!(doc.data.get("empty").unwrap().as_bytes(), Some(&[][..]));
2985    }
2986
2987    #[test]
2988    fn test_dumps_string_quoting() {
2989        let mut data = IndexMap::new();
2990        data.insert("quoted".to_string(), Value::String("hello world".to_string()));
2991        data.insert("unquoted".to_string(), Value::String("hello".to_string()));
2992        data.insert("reserved_true".to_string(), Value::String("true".to_string()));
2993        data.insert("reserved_null".to_string(), Value::String("null".to_string()));
2994        data.insert("reserved_tilde".to_string(), Value::String("~".to_string()));
2995        data.insert("empty".to_string(), Value::String("".to_string()));
2996        data.insert("at_start".to_string(), Value::String("@directive".to_string()));
2997        data.insert("hash_start".to_string(), Value::String("#comment".to_string()));
2998        data.insert("bang_start".to_string(), Value::String("!ref".to_string()));
2999        data.insert("hex_start".to_string(), Value::String("0xabc".to_string()));
3000        data.insert("number_like".to_string(), Value::String("42abc".to_string()));
3001        data.insert("negative_like".to_string(), Value::String("-5".to_string()));
3002        data.insert("slash".to_string(), Value::String("a/b".to_string()));
3003        data.insert("dot".to_string(), Value::String("a.b".to_string()));
3004
3005        let output = dumps(&data);
3006
3007        // Quoted values should be wrapped in double quotes
3008        assert!(output.contains("\"hello world\""), "Spaces need quoting");
3009        assert!(output.contains("\"true\""), "Reserved word true needs quoting");
3010        assert!(output.contains("\"null\""), "Reserved word null needs quoting");
3011        assert!(output.contains("\"~\""), "Tilde needs quoting");
3012        assert!(output.contains("\"\""), "Empty string needs quoting");
3013        assert!(output.contains("\"@directive\""), "@ prefix needs quoting");
3014        assert!(output.contains("\"#comment\""), "# prefix needs quoting");
3015        assert!(output.contains("\"!ref\""), "! prefix needs quoting");
3016        assert!(output.contains("\"0xabc\""), "0x prefix needs quoting");
3017        assert!(output.contains("\"42abc\""), "Digit start needs quoting");
3018        assert!(output.contains("\"-5\""), "Negative number needs quoting");
3019        assert!(output.contains("\"a/b\""), "Slash needs quoting");
3020        assert!(!output.contains("\"a.b\""), "Dot should NOT need quoting per spec grammar");
3021    }
3022
3023    #[test]
3024    fn test_escape_string_control_chars() {
3025        let result = escape_string("tab\there\nnewline\rreturn");
3026        assert!(result.contains("\\t"), "Tab should be escaped");
3027        assert!(result.contains("\\n"), "Newline should be escaped");
3028        assert!(result.contains("\\r"), "CR should be escaped");
3029
3030        let result = escape_string("\x08backspace\x0cformfeed");
3031        assert!(result.contains("\\b"), "Backspace should be escaped");
3032        assert!(result.contains("\\f"), "Formfeed should be escaped");
3033
3034        let result = escape_string("quote\"and\\backslash");
3035        assert!(result.contains("\\\""), "Quote should be escaped");
3036        assert!(result.contains("\\\\"), "Backslash should be escaped");
3037
3038        // Other control characters use \uXXXX
3039        let result = escape_string("\x01");
3040        assert!(result.contains("\\u0001"), "Control char should use \\uXXXX");
3041    }
3042
3043    #[test]
3044    fn test_format_float_both_branches() {
3045        // Whole number float: Rust's to_string() drops .0, so format_float adds it back
3046        assert_eq!(format_float(42.0, false), "42.0");
3047
3048        // Float with decimals should stay as-is
3049        assert_eq!(format_float(3.14, false), "3.14");
3050
3051        // Scientific notation stays as-is
3052        let very_small = format_float(1e-20, false);
3053        assert!(very_small.contains('e') || very_small.contains('.'));
3054    }
3055
3056    #[test]
3057    fn test_format_float_compact_floats() {
3058        // With compact_floats=true, whole-number floats strip .0
3059        assert_eq!(format_float(42.0, true), "42");
3060        assert_eq!(format_float(0.0, true), "0");
3061        assert_eq!(format_float(17164000000.0, true), "17164000000");
3062        assert_eq!(format_float(35934000000.0, true), "35934000000");
3063        assert_eq!(format_float(-100.0, true), "-100");
3064
3065        // Non-whole floats are unaffected
3066        assert_eq!(format_float(3.14, true), "3.14");
3067        assert_eq!(format_float(0.5, true), "0.5");
3068
3069        // Special values unaffected
3070        assert_eq!(format_float(f64::NAN, true), "NaN");
3071        assert_eq!(format_float(f64::INFINITY, true), "inf");
3072        assert_eq!(format_float(f64::NEG_INFINITY, true), "-inf");
3073
3074        // Very large floats use scientific notation (digits > 15), unaffected
3075        let large = format_float(1e20, true);
3076        assert!(large.contains('e'), "Very large should use scientific: {}", large);
3077    }
3078
3079    #[test]
3080    fn test_dumps_with_compact_floats() {
3081        let mut data = IndexMap::new();
3082        data.insert("revenue".to_string(), Value::Float(35934000000.0));
3083        data.insert("ratio".to_string(), Value::Float(3.14));
3084        data.insert("count".to_string(), Value::Int(42));
3085
3086        // Default: whole floats keep .0
3087        let pretty = dumps(&data);
3088        assert!(pretty.contains("35934000000.0"), "Default should have .0: {}", pretty);
3089
3090        // compact_floats: whole floats stripped
3091        let opts = FormatOptions::compact().with_compact_floats();
3092        let compact = dumps_with_options(&data, &opts);
3093        assert!(compact.contains("35934000000"), "Should have whole number: {}", compact);
3094        assert!(!compact.contains("35934000000.0"), "Should NOT have .0: {}", compact);
3095        assert!(compact.contains("3.14"), "Non-whole float preserved: {}", compact);
3096        assert!(compact.contains("42"), "Int preserved: {}", compact);
3097    }
3098
3099    #[test]
3100    fn test_needs_quoting_various_patterns() {
3101        // Should need quoting
3102        assert!(needs_quoting(""), "Empty string");
3103        assert!(needs_quoting("hello world"), "Whitespace");
3104        assert!(needs_quoting("a,b"), "Comma");
3105        assert!(needs_quoting("(x)"), "Parens");
3106        assert!(needs_quoting("[x]"), "Brackets");
3107        assert!(needs_quoting("{x}"), "Braces");
3108        assert!(needs_quoting("a:b"), "Colon");
3109        assert!(needs_quoting("@x"), "At sign");
3110        assert!(needs_quoting("a/b"), "Slash");
3111        assert!(!needs_quoting("a.b"), "Dot is valid in names per spec grammar");
3112        assert!(needs_quoting("true"), "Reserved true");
3113        assert!(needs_quoting("false"), "Reserved false");
3114        assert!(needs_quoting("null"), "Reserved null");
3115        assert!(needs_quoting("~"), "Reserved tilde");
3116        assert!(needs_quoting("!bang"), "Bang prefix");
3117        assert!(needs_quoting("#hash"), "Hash prefix");
3118        assert!(needs_quoting("0xdead"), "Hex prefix");
3119        assert!(needs_quoting("0Xdead"), "Hex prefix uppercase");
3120        assert!(needs_quoting("42abc"), "Starts with digit");
3121        assert!(needs_quoting("-5"), "Starts with minus+digit");
3122        assert!(needs_quoting("+5"), "Starts with plus+digit");
3123
3124        // Should NOT need quoting
3125        assert!(!needs_quoting("hello"), "Simple word");
3126        assert!(!needs_quoting("foo_bar"), "Underscore word");
3127        assert!(!needs_quoting("abc123"), "Alpha then digits");
3128    }
3129
3130    // =========================================================================
3131    // Coverage: singularize()
3132    // =========================================================================
3133
3134    #[test]
3135    fn test_singularize_rules() {
3136        // -ies → -y
3137        assert_eq!(singularize("categories"), "category");
3138        assert_eq!(singularize("entries"), "entry");
3139
3140        // -sses → -ss (special -es rule)
3141        assert_eq!(singularize("classes"), "class");
3142        assert_eq!(singularize("dresses"), "dress");
3143
3144        // -xes → -x
3145        assert_eq!(singularize("boxes"), "box");
3146        assert_eq!(singularize("indexes"), "index");
3147
3148        // -ches → -ch
3149        assert_eq!(singularize("watches"), "watch");
3150
3151        // -shes → -sh
3152        assert_eq!(singularize("dishes"), "dish");
3153
3154        // Regular -s
3155        assert_eq!(singularize("users"), "user");
3156        assert_eq!(singularize("products"), "product");
3157
3158        // Words ending in -ss (should NOT remove s)
3159        assert_eq!(singularize("boss"), "boss");
3160        assert_eq!(singularize("class"), "class");
3161
3162        // Already singular (no trailing s)
3163        assert_eq!(singularize("item"), "item");
3164        assert_eq!(singularize("child"), "child");
3165    }
3166
3167    // =========================================================================
3168    // Coverage: from_json root primitives, loads()
3169    // =========================================================================
3170
3171    #[test]
3172    fn test_from_json_root_primitive() {
3173        // Root-level string
3174        let doc = TeaLeaf::from_json(r#""hello""#).unwrap();
3175        assert_eq!(doc.get("root").unwrap().as_str(), Some("hello"));
3176        assert!(!doc.is_root_array);
3177
3178        // Root-level number
3179        let doc = TeaLeaf::from_json("42").unwrap();
3180        assert_eq!(doc.get("root").unwrap().as_int(), Some(42));
3181
3182        // Root-level bool
3183        let doc = TeaLeaf::from_json("true").unwrap();
3184        assert_eq!(doc.get("root").unwrap().as_bool(), Some(true));
3185
3186        // Root-level null
3187        let doc = TeaLeaf::from_json("null").unwrap();
3188        assert!(doc.get("root").unwrap().is_null());
3189    }
3190
3191    #[test]
3192    fn test_from_json_invalid() {
3193        let result = TeaLeaf::from_json("not valid json {{{");
3194        assert!(result.is_err());
3195    }
3196
3197    #[test]
3198    fn test_loads_convenience() {
3199        let data = loads("name: alice\nage: 30").unwrap();
3200        assert_eq!(data.get("name").unwrap().as_str(), Some("alice"));
3201        assert_eq!(data.get("age").unwrap().as_int(), Some(30));
3202    }
3203
3204    // =========================================================================
3205    // Coverage: InferredType::merge() branches
3206    // =========================================================================
3207
3208    #[test]
3209    fn test_inferred_type_merge_int_float() {
3210        let t = infer_type(&Value::Int(42));
3211        let f = infer_type(&Value::Float(3.14));
3212        let merged = t.merge(&f);
3213        assert_eq!(merged, InferredType::Float);
3214
3215        // Reverse
3216        let merged = f.merge(&t);
3217        assert_eq!(merged, InferredType::Float);
3218    }
3219
3220    #[test]
3221    fn test_inferred_type_merge_null_with_type() {
3222        let n = InferredType::Null;
3223        let s = InferredType::String;
3224        let merged = n.merge(&s);
3225        assert_eq!(merged, InferredType::String);
3226
3227        // Reverse
3228        let merged = s.merge(&n);
3229        assert_eq!(merged, InferredType::String);
3230    }
3231
3232    #[test]
3233    fn test_inferred_type_merge_arrays() {
3234        let a1 = InferredType::Array(Box::new(InferredType::Int));
3235        let a2 = InferredType::Array(Box::new(InferredType::Float));
3236        let merged = a1.merge(&a2);
3237        assert_eq!(merged, InferredType::Array(Box::new(InferredType::Float)));
3238    }
3239
3240    #[test]
3241    fn test_inferred_type_merge_objects_same_fields() {
3242        let o1 = InferredType::Object(vec![
3243            ("a".to_string(), InferredType::Int),
3244            ("b".to_string(), InferredType::String),
3245        ]);
3246        let o2 = InferredType::Object(vec![
3247            ("a".to_string(), InferredType::Float),
3248            ("b".to_string(), InferredType::String),
3249        ]);
3250        let merged = o1.merge(&o2);
3251        if let InferredType::Object(fields) = &merged {
3252            assert_eq!(fields.len(), 2);
3253            assert_eq!(fields[0].1, InferredType::Float); // Int+Float → Float
3254            assert_eq!(fields[1].1, InferredType::String);
3255        } else {
3256            panic!("Expected Object, got {:?}", merged);
3257        }
3258    }
3259
3260    #[test]
3261    fn test_inferred_type_merge_objects_different_fields() {
3262        let o1 = InferredType::Object(vec![
3263            ("a".to_string(), InferredType::Int),
3264        ]);
3265        let o2 = InferredType::Object(vec![
3266            ("b".to_string(), InferredType::String),
3267        ]);
3268        let merged = o1.merge(&o2);
3269        assert_eq!(merged, InferredType::Mixed);
3270    }
3271
3272    #[test]
3273    fn test_inferred_type_merge_incompatible() {
3274        let s = InferredType::String;
3275        let i = InferredType::Int;
3276        let merged = s.merge(&i);
3277        assert_eq!(merged, InferredType::Mixed);
3278    }
3279
3280    #[test]
3281    fn test_inferred_type_to_field_type() {
3282        let schemas = IndexMap::new();
3283
3284        assert_eq!(InferredType::Null.to_field_type(&schemas).base, "string");
3285        assert!(InferredType::Null.to_field_type(&schemas).nullable);
3286        assert_eq!(InferredType::Bool.to_field_type(&schemas).base, "bool");
3287        assert_eq!(InferredType::Int.to_field_type(&schemas).base, "int");
3288        assert_eq!(InferredType::Float.to_field_type(&schemas).base, "float");
3289        assert_eq!(InferredType::String.to_field_type(&schemas).base, "string");
3290        assert_eq!(InferredType::Mixed.to_field_type(&schemas).base, "any");
3291
3292        // Array type
3293        let arr_type = InferredType::Array(Box::new(InferredType::Int));
3294        let ft = arr_type.to_field_type(&schemas);
3295        assert_eq!(ft.base, "int");
3296        assert!(ft.is_array);
3297
3298        // Object with no matching schema → "any" (not "object", which is a value-only type)
3299        let obj_type = InferredType::Object(vec![("x".to_string(), InferredType::Int)]);
3300        assert_eq!(obj_type.to_field_type(&schemas).base, "any");
3301    }
3302
3303    #[test]
3304    fn test_inferred_type_to_field_type_with_matching_schema() {
3305        let mut schemas = IndexMap::new();
3306        let mut schema = Schema::new("point");
3307        schema.add_field("x", FieldType::new("int"));
3308        schema.add_field("y", FieldType::new("int"));
3309        schemas.insert("point".to_string(), schema);
3310
3311        let obj_type = InferredType::Object(vec![
3312            ("x".to_string(), InferredType::Int),
3313            ("y".to_string(), InferredType::Int),
3314        ]);
3315        let ft = obj_type.to_field_type(&schemas);
3316        assert_eq!(ft.base, "point");
3317    }
3318
3319    #[test]
3320    fn test_infer_type_special_values() {
3321        // Bytes, Ref, Tagged, Timestamp, Map all become Mixed
3322        assert_eq!(infer_type(&Value::Bytes(vec![1, 2])), InferredType::Mixed);
3323        assert_eq!(infer_type(&Value::Ref("x".to_string())), InferredType::Mixed);
3324        assert_eq!(infer_type(&Value::Tagged("t".to_string(), Box::new(Value::Null))), InferredType::Mixed);
3325        assert_eq!(infer_type(&Value::Timestamp(0, 0)), InferredType::Mixed);
3326        assert_eq!(infer_type(&Value::Map(vec![])), InferredType::Mixed);
3327
3328        // Empty array
3329        if let InferredType::Array(inner) = infer_type(&Value::Array(vec![])) {
3330            assert_eq!(*inner, InferredType::Mixed);
3331        } else {
3332            panic!("Expected Array");
3333        }
3334
3335        // UInt becomes Int
3336        assert_eq!(infer_type(&Value::UInt(42)), InferredType::Int);
3337    }
3338
3339    #[test]
3340    fn test_json_with_schemas_empty_nested_object_roundtrip() {
3341        // Regression: fuzzer found that [{"n":{}}] crashes because the inferrer
3342        // emits "object" as a field type, which the parser rejects as value-only.
3343        let doc = TeaLeaf::from_json_with_schemas(r#"[{"n":{}}]"#).unwrap();
3344        let tl_text = doc.to_tl_with_schemas();
3345        // Must re-parse without error
3346        let reparsed = TeaLeaf::parse(&tl_text).unwrap();
3347        assert_eq!(doc.data.len(), reparsed.data.len());
3348    }
3349
3350    // =========================================================================
3351    // Coverage: to_tl_with_schemas() edge cases
3352    // =========================================================================
3353
3354    #[test]
3355    fn test_to_tl_with_schemas_no_schemas() {
3356        let mut data = IndexMap::new();
3357        data.insert("name".to_string(), Value::String("alice".to_string()));
3358        let doc = TeaLeaf { data, schemas: IndexMap::new(), unions: IndexMap::new(), is_root_array: false };
3359
3360        let output = doc.to_tl_with_schemas();
3361        assert!(output.contains("name: alice"), "Should use dumps() format");
3362        assert!(!output.contains("@struct"), "No schemas");
3363    }
3364
3365    #[test]
3366    fn test_to_tl_with_schemas_root_array() {
3367        let mut data = IndexMap::new();
3368        data.insert("root".to_string(), Value::Array(vec![Value::Int(1), Value::Int(2)]));
3369        let doc = TeaLeaf { data, schemas: IndexMap::new(), unions: IndexMap::new(), is_root_array: true };
3370
3371        let output = doc.to_tl_with_schemas();
3372        assert!(output.starts_with("@root-array"), "Should have root-array directive");
3373    }
3374
3375    // =========================================================================
3376    // Coverage: write_value_with_schemas() for special types
3377    // =========================================================================
3378
3379    #[test]
3380    fn test_dumps_with_schemas_all_types() {
3381        let mut schemas = IndexMap::new();
3382        let mut schema = Schema::new("item");
3383        schema.add_field("id", FieldType::new("int"));
3384        schema.add_field("name", FieldType::new("string"));
3385        schemas.insert("item".to_string(), schema);
3386
3387        let mut data = IndexMap::new();
3388        // Array matching schema → @table
3389        data.insert("items".to_string(), Value::Array(vec![
3390            Value::Object(vec![
3391                ("id".to_string(), Value::Int(1)),
3392                ("name".to_string(), Value::String("Widget".to_string())),
3393            ].into_iter().collect()),
3394        ]));
3395        // Special types
3396        data.insert("ref_val".to_string(), Value::Ref("target".to_string()));
3397        data.insert("tag_val".to_string(), Value::Tagged("ok".to_string(), Box::new(Value::Int(200))));
3398        data.insert("map_val".to_string(), Value::Map(vec![
3399            (Value::Int(1), Value::String("one".to_string())),
3400        ]));
3401        data.insert("bytes_val".to_string(), Value::Bytes(vec![0xde, 0xad]));
3402        data.insert("ts_val".to_string(), Value::Timestamp(0, 0));
3403        data.insert("ts_millis".to_string(), Value::Timestamp(1705315800123, 0));
3404
3405        let schema_order = vec!["item".to_string()];
3406        let output = dumps_with_schemas(&data, &schemas, &schema_order, &IndexMap::new(), &[]);
3407
3408        assert!(output.contains("@struct item"), "Should contain schema def");
3409        assert!(output.contains("@table item"), "Should use @table format");
3410        assert!(output.contains("!target"), "Should contain ref");
3411        assert!(output.contains(":ok 200"), "Should contain tagged");
3412        assert!(output.contains("@map {"), "Should contain map");
3413        assert!(output.contains("b\"dead\""), "Should contain bytes literal");
3414        assert!(output.contains("1970-01-01T00:00:00Z"), "Should contain timestamp");
3415        assert!(output.contains(".123Z"), "Should contain millis timestamp");
3416    }
3417
3418    #[test]
3419    fn test_dumps_with_schemas_object_value() {
3420        let schemas = IndexMap::new();
3421        let mut data = IndexMap::new();
3422        data.insert("config".to_string(), Value::Object(
3423            vec![
3424                ("host".to_string(), Value::String("localhost".to_string())),
3425                ("port".to_string(), Value::Int(8080)),
3426            ].into_iter().collect()
3427        ));
3428
3429        let output = dumps_with_schemas(&data, &schemas, &[], &IndexMap::new(), &[]);
3430        assert!(output.contains("config:"), "Should contain key");
3431        assert!(output.contains("{"), "Should contain object");
3432    }
3433
3434    #[test]
3435    fn test_write_tuple_with_nested_schema() {
3436        // Test tuple writing with nested struct fields
3437        let mut schemas = IndexMap::new();
3438
3439        let mut addr = Schema::new("address");
3440        addr.add_field("city", FieldType::new("string"));
3441        addr.add_field("zip", FieldType::new("string"));
3442        schemas.insert("address".to_string(), addr);
3443
3444        let mut user = Schema::new("user");
3445        user.add_field("name", FieldType::new("string"));
3446        user.add_field("home", FieldType::new("address"));
3447        schemas.insert("user".to_string(), user);
3448
3449        let mut data = IndexMap::new();
3450        data.insert("users".to_string(), Value::Array(vec![
3451            Value::Object(vec![
3452                ("name".to_string(), Value::String("Alice".to_string())),
3453                ("home".to_string(), Value::Object(vec![
3454                    ("city".to_string(), Value::String("Boston".to_string())),
3455                    ("zip".to_string(), Value::String("02101".to_string())),
3456                ].into_iter().collect())),
3457            ].into_iter().collect()),
3458        ]));
3459
3460        let schema_order = vec!["address".to_string(), "user".to_string()];
3461        let output = dumps_with_schemas(&data, &schemas, &schema_order, &IndexMap::new(), &[]);
3462
3463        assert!(output.contains("@struct address"), "Should have address schema");
3464        assert!(output.contains("@struct user"), "Should have user schema");
3465        assert!(output.contains("@table user"), "Should use @table for users");
3466        // Nested tuples
3467        assert!(output.contains("("), "Should have tuple format");
3468    }
3469
3470    #[test]
3471    fn test_write_tuple_with_schema_array_field() {
3472        // Test tuple writing with array fields that have schemas
3473        let mut schemas = IndexMap::new();
3474
3475        let mut tag = Schema::new("tag");
3476        tag.add_field("name", FieldType::new("string"));
3477        schemas.insert("tag".to_string(), tag);
3478
3479        let mut item = Schema::new("item");
3480        item.add_field("id", FieldType::new("int"));
3481        item.add_field("tags", FieldType { base: "tag".to_string(), nullable: false, is_array: true });
3482        schemas.insert("item".to_string(), item);
3483
3484        let mut data = IndexMap::new();
3485        data.insert("items".to_string(), Value::Array(vec![
3486            Value::Object(vec![
3487                ("id".to_string(), Value::Int(1)),
3488                ("tags".to_string(), Value::Array(vec![
3489                    Value::Object(vec![
3490                        ("name".to_string(), Value::String("rust".to_string())),
3491                    ].into_iter().collect()),
3492                ])),
3493            ].into_iter().collect()),
3494        ]));
3495
3496        let schema_order = vec!["tag".to_string(), "item".to_string()];
3497        let output = dumps_with_schemas(&data, &schemas, &schema_order, &IndexMap::new(), &[]);
3498
3499        assert!(output.contains("@table item"), "Should use @table for items");
3500    }
3501
3502    #[test]
3503    fn test_write_schema_array_empty() {
3504        let schemas = IndexMap::new();
3505        let schema = Schema::new("empty");
3506        let mut out = String::new();
3507        write_schema_array(&mut out, &Value::Array(vec![]), &schema, &schemas, 0, &FormatOptions::default());
3508        assert_eq!(out, "[]");
3509    }
3510
3511    #[test]
3512    fn test_write_schema_array_non_array_fallback() {
3513        let schemas = IndexMap::new();
3514        let schema = Schema::new("test");
3515        let mut out = String::new();
3516        write_schema_array(&mut out, &Value::Int(42), &schema, &schemas, 0, &FormatOptions::default());
3517        assert_eq!(out, "42");
3518    }
3519
3520    #[test]
3521    fn test_write_tuple_missing_field() {
3522        // Test that missing fields in object produce ~
3523        let schemas = IndexMap::new();
3524        let mut schema = Schema::new("test");
3525        schema.add_field("present", FieldType::new("int"));
3526        schema.add_field("missing", FieldType::new("string"));
3527
3528        let value = Value::Object(
3529            vec![("present".to_string(), Value::Int(42))].into_iter().collect()
3530        );
3531
3532        let mut out = String::new();
3533        write_tuple(&mut out, &value, &schema, &schemas, 0, &FormatOptions::default());
3534        assert!(out.contains("42"), "Present field should be written");
3535        assert!(out.contains("~"), "Missing field should be ~");
3536    }
3537
3538    #[test]
3539    fn test_write_tuple_non_object() {
3540        // When tuple receives a non-object value
3541        let schemas = IndexMap::new();
3542        let schema = Schema::new("test");
3543
3544        let mut out = String::new();
3545        write_tuple(&mut out, &Value::Int(42), &schema, &schemas, 0, &FormatOptions::default());
3546        assert_eq!(out, "42");
3547    }
3548
3549    // =========================================================================
3550    // Coverage: array_matches_schema()
3551    // =========================================================================
3552
3553    #[test]
3554    fn test_array_matches_schema_empty() {
3555        let schema = Schema::new("test");
3556        assert!(!array_matches_schema(&[], &schema));
3557    }
3558
3559    #[test]
3560    fn test_array_matches_schema_non_object() {
3561        let schema = Schema::new("test");
3562        assert!(!array_matches_schema(&[Value::Int(1)], &schema));
3563    }
3564
3565    #[test]
3566    fn test_array_matches_schema_matching() {
3567        let mut schema = Schema::new("user");
3568        schema.add_field("name", FieldType::new("string"));
3569        schema.add_field("age", FieldType::new("int"));
3570
3571        let arr = vec![Value::Object(vec![
3572            ("name".to_string(), Value::String("Alice".to_string())),
3573            ("age".to_string(), Value::Int(30)),
3574        ].into_iter().collect())];
3575
3576        assert!(array_matches_schema(&arr, &schema));
3577    }
3578
3579    // =========================================================================
3580    // Coverage: from_dto, from_dto_array, to_dto, to_dto_vec
3581    // =========================================================================
3582
3583    #[test]
3584    fn test_from_dto_and_back() {
3585        use crate::convert::{FromTeaLeaf, ConvertError};
3586
3587        let doc = TeaLeaf::from_dto("greeting", &"hello".to_string());
3588        assert_eq!(doc.get("greeting").unwrap().as_str(), Some("hello"));
3589
3590        let result: std::result::Result<String, ConvertError> = String::from_tealeaf_value(doc.get("greeting").unwrap());
3591        assert_eq!(result.unwrap(), "hello");
3592    }
3593
3594    #[test]
3595    fn test_from_dto_array() {
3596        let items = vec!["apple".to_string(), "banana".to_string()];
3597        let doc = TeaLeaf::from_dto_array("fruits", &items);
3598        let arr = doc.get("fruits").unwrap().as_array().unwrap();
3599        assert_eq!(arr.len(), 2);
3600        assert_eq!(arr[0].as_str(), Some("apple"));
3601    }
3602
3603    #[test]
3604    fn test_to_dto_missing_key() {
3605        let doc = TeaLeaf::new(IndexMap::new(), IndexMap::new());
3606        let result: Result<String> = doc.to_dto("missing");
3607        assert!(result.is_err());
3608    }
3609
3610    #[test]
3611    fn test_to_dto_vec() {
3612        let mut data = IndexMap::new();
3613        data.insert("items".to_string(), Value::Array(vec![
3614            Value::String("a".to_string()),
3615            Value::String("b".to_string()),
3616        ]));
3617        let doc = TeaLeaf::new(IndexMap::new(), data);
3618        let result: Vec<String> = doc.to_dto_vec("items").unwrap();
3619        assert_eq!(result, vec!["a", "b"]);
3620    }
3621
3622    #[test]
3623    fn test_to_dto_vec_not_array() {
3624        let mut data = IndexMap::new();
3625        data.insert("item".to_string(), Value::String("not_an_array".to_string()));
3626        let doc = TeaLeaf::new(IndexMap::new(), data);
3627        let result: Result<Vec<String>> = doc.to_dto_vec("item");
3628        assert!(result.is_err());
3629    }
3630
3631    #[test]
3632    fn test_to_dto_vec_missing_key() {
3633        let doc = TeaLeaf::new(IndexMap::new(), IndexMap::new());
3634        let result: Result<Vec<String>> = doc.to_dto_vec("missing");
3635        assert!(result.is_err());
3636    }
3637
3638    // =========================================================================
3639    // Coverage: set_root_array, SchemaInferrer edge cases
3640    // =========================================================================
3641
3642    #[test]
3643    fn test_set_root_array() {
3644        let mut doc = TeaLeaf::new(IndexMap::new(), IndexMap::new());
3645        assert!(!doc.is_root_array);
3646        doc.set_root_array(true);
3647        assert!(doc.is_root_array);
3648    }
3649
3650    #[test]
3651    fn test_schema_inferrer_non_uniform_array() {
3652        // Array with different object structures should not create a schema
3653        let json = r#"{"items": [{"a": 1}, {"b": 2}]}"#;
3654        let doc = TeaLeaf::from_json_with_schemas(json).unwrap();
3655        assert!(doc.schema("item").is_none(), "Non-uniform array should not produce schema");
3656    }
3657
3658    #[test]
3659    fn test_schema_inferrer_mixed_types_in_array() {
3660        // Array with non-objects
3661        let json = r#"{"items": [1, 2, 3]}"#;
3662        let doc = TeaLeaf::from_json_with_schemas(json).unwrap();
3663        assert!(doc.schema("item").is_none(), "Non-object array should not produce schema");
3664    }
3665
3666    #[test]
3667    fn test_schema_inferrer_empty_array() {
3668        let json = r#"{"items": []}"#;
3669        let doc = TeaLeaf::from_json_with_schemas(json).unwrap();
3670        assert!(doc.schema("item").is_none(), "Empty array should not produce schema");
3671    }
3672
3673    #[test]
3674    fn test_schema_inferrer_duplicate_schema_name() {
3675        // Two arrays that would produce the same schema name
3676        let json = r#"{
3677            "items": [{"id": 1, "name": "A"}],
3678            "nested": {"items": [{"id": 2, "name": "B"}]}
3679        }"#;
3680        let doc = TeaLeaf::from_json_with_schemas(json).unwrap();
3681        // Should have "item" schema (first one wins)
3682        assert!(doc.schema("item").is_some());
3683    }
3684
3685    #[test]
3686    fn test_schema_inferrer_int_float_merge() {
3687        // Field that has int in one record and float in another
3688        let json = r#"{"values": [{"x": 1}, {"x": 2.5}]}"#;
3689        let doc = TeaLeaf::from_json_with_schemas(json).unwrap();
3690        let schema = doc.schema("value").unwrap();
3691        let x_field = schema.fields.iter().find(|f| f.name == "x").unwrap();
3692        assert_eq!(x_field.field_type.base, "float", "Int+Float merge should produce float");
3693    }
3694
3695    #[test]
3696    fn test_schema_inference_with_root_array() {
3697        let json = r#"[{"id": 1, "name": "Alice"}, {"id": 2, "name": "Bob"}]"#;
3698        let doc = TeaLeaf::from_json_with_schemas(json).unwrap();
3699        // Root array is stored under "root" key - the schema name should be derived from "root"
3700        // The singularize of "root" is "root" (no trailing s)
3701        // Actually, root arrays aren't typically analyzed because the key is "root" and it goes through analyze_value
3702        let root_val = doc.get("root").unwrap().as_array().unwrap();
3703        assert_eq!(root_val.len(), 2);
3704    }
3705
3706    // =========================================================================
3707    // Coverage: dumps_with_schemas with quoting in schemas
3708    // =========================================================================
3709
3710    #[test]
3711    fn test_dumps_with_schemas_string_quoting_in_tuples() {
3712        let mut schemas = IndexMap::new();
3713        let mut schema = Schema::new("item");
3714        schema.add_field("name", FieldType::new("string"));
3715        schemas.insert("item".to_string(), schema);
3716
3717        let mut data = IndexMap::new();
3718        data.insert("items".to_string(), Value::Array(vec![
3719            Value::Object(vec![
3720                ("name".to_string(), Value::String("hello world".to_string())),
3721            ].into_iter().collect()),
3722        ]));
3723
3724        let schema_order = vec!["item".to_string()];
3725        let output = dumps_with_schemas(&data, &schemas, &schema_order, &IndexMap::new(), &[]);
3726        assert!(output.contains("\"hello world\""), "String with space should be quoted in tuple");
3727    }
3728
3729    #[test]
3730    fn test_dumps_with_schemas_array_without_schema() {
3731        // Array that doesn't match any schema
3732        let schemas = IndexMap::new();
3733        let mut data = IndexMap::new();
3734        data.insert("nums".to_string(), Value::Array(vec![Value::Int(1), Value::Int(2)]));
3735
3736        let output = dumps_with_schemas(&data, &schemas, &[], &IndexMap::new(), &[]);
3737        assert!(output.contains("[1, 2]"), "Should use regular array format");
3738    }
3739
3740    // =========================================================================
3741    // Coverage: convenience functions open(), parse(), root array to_json
3742    // =========================================================================
3743
3744    #[test]
3745    fn test_open_convenience_function() {
3746        // Write a binary file first, then open with the convenience function
3747        let dir = std::env::temp_dir();
3748        let path = dir.join("test_open_conv.tlbx");
3749
3750        let mut data = IndexMap::new();
3751        data.insert("x".to_string(), Value::Int(42));
3752        let doc = TeaLeaf::new(IndexMap::new(), data);
3753        doc.compile(&path, false).unwrap();
3754
3755        let reader = super::open(&path).unwrap();
3756        assert_eq!(reader.get("x").unwrap().as_int(), Some(42));
3757        std::fs::remove_file(&path).ok();
3758    }
3759
3760    #[test]
3761    fn test_parse_convenience_function() {
3762        let doc = super::parse("greeting: hello").unwrap();
3763        assert_eq!(doc.get("greeting").unwrap().as_str(), Some("hello"));
3764    }
3765
3766    #[test]
3767    fn test_to_json_root_array() {
3768        let mut data = IndexMap::new();
3769        data.insert("root".to_string(), Value::Array(vec![Value::Int(1), Value::Int(2)]));
3770        let mut doc = TeaLeaf::new(IndexMap::new(), data);
3771        doc.set_root_array(true);
3772
3773        let json = doc.to_json().unwrap();
3774        let parsed: serde_json::Value = serde_json::from_str(&json).unwrap();
3775        assert!(parsed.is_array(), "Root array to_json should output array");
3776        assert_eq!(parsed.as_array().unwrap().len(), 2);
3777    }
3778
3779    #[test]
3780    fn test_to_json_compact_root_array() {
3781        let mut data = IndexMap::new();
3782        data.insert("root".to_string(), Value::Array(vec![Value::Int(1)]));
3783        let mut doc = TeaLeaf::new(IndexMap::new(), data);
3784        doc.set_root_array(true);
3785
3786        let json = doc.to_json_compact().unwrap();
3787        assert_eq!(json, "[1]");
3788    }
3789
3790    #[test]
3791    fn test_infer_type_bool_value() {
3792        let it = infer_type(&Value::Bool(true));
3793        assert!(matches!(it, InferredType::Bool));
3794    }
3795
3796    #[test]
3797    fn test_schema_inference_nested_object_fields() {
3798        // JSON with nested objects inside array items
3799        let json = r#"{"records": [
3800            {"id": 1, "details": {"city": "NYC", "zip": "10001"}},
3801            {"id": 2, "details": {"city": "LA", "zip": "90001"}}
3802        ]}"#;
3803        let doc = TeaLeaf::from_json_with_schemas(json).unwrap();
3804        // Should infer both "record" and "detail" schemas
3805        assert!(doc.schema("record").is_some(), "Should infer record schema");
3806    }
3807
3808    #[test]
3809    fn test_schema_inference_not_all_objects_returns_early() {
3810        // Array where second element is not an object
3811        let json = r#"{"items": [{"a": 1}, "not_an_object"]}"#;
3812        let doc = TeaLeaf::from_json_with_schemas(json).unwrap();
3813        assert!(doc.schema("item").is_none(), "Mixed array should not produce schema");
3814    }
3815
3816    #[test]
3817    fn test_to_tl_with_schemas_with_nested_array_field() {
3818        // Schema with an array-typed field
3819        let mut schemas = IndexMap::new();
3820        let mut schema = Schema::new("user");
3821        schema.add_field("name", FieldType::new("string"));
3822        schema.add_field("tags", FieldType::new("string").array());
3823        schemas.insert("user".to_string(), schema);
3824
3825        let mut data = IndexMap::new();
3826        let mut obj = IndexMap::new();
3827        obj.insert("name".to_string(), Value::String("Alice".into()));
3828        obj.insert("tags".to_string(), Value::Array(vec![
3829            Value::String("admin".into()),
3830            Value::String("active".into()),
3831        ]));
3832        data.insert("users".to_string(), Value::Array(vec![Value::Object(obj)]));
3833
3834        let doc = TeaLeaf::new(schemas, data);
3835        let text = doc.to_tl_with_schemas();
3836        assert!(text.contains("@struct user"), "Should have schema definition");
3837        assert!(text.contains("@table user"), "Should use table format");
3838    }
3839
3840    // =========================================================================
3841    // Issue 6: Improved schema matching
3842    // =========================================================================
3843
3844    #[test]
3845    fn test_schema_matching_nullable_fields_allowed_missing() {
3846        // Schema with nullable field should match objects missing that field
3847        let mut schemas = IndexMap::new();
3848        let mut s = Schema::new("Item");
3849        s.add_field("id", FieldType::new("int"));
3850        s.add_field("label", FieldType::new("string").nullable());
3851        schemas.insert("Item".to_string(), s);
3852
3853        let mut obj1 = IndexMap::new();
3854        obj1.insert("id".to_string(), Value::Int(1));
3855        // label is missing — but it's nullable, so it should still match
3856
3857        let doc = TeaLeaf {
3858            schemas,
3859            unions: IndexMap::new(),
3860            data: {
3861                let mut d = IndexMap::new();
3862                d.insert("items".to_string(), Value::Array(vec![Value::Object(obj1)]));
3863                d
3864            },
3865            is_root_array: false,
3866        };
3867        let result = doc.find_schema_for_value(doc.data.get("items").unwrap(), "items");
3868        assert!(result.is_some(), "Should match schema when nullable field is missing");
3869        assert_eq!(result.unwrap().name, "Item");
3870    }
3871
3872    #[test]
3873    fn test_schema_matching_rejects_extra_keys() {
3874        // Objects with extra keys not in schema should not match
3875        let mut schemas = IndexMap::new();
3876        let mut s = Schema::new("Point");
3877        s.add_field("x", FieldType::new("int"));
3878        s.add_field("y", FieldType::new("int"));
3879        schemas.insert("Point".to_string(), s);
3880
3881        let mut obj = IndexMap::new();
3882        obj.insert("x".to_string(), Value::Int(1));
3883        obj.insert("y".to_string(), Value::Int(2));
3884        obj.insert("z".to_string(), Value::Int(3)); // extra field
3885
3886        let doc = TeaLeaf {
3887            schemas,
3888            unions: IndexMap::new(),
3889            data: {
3890                let mut d = IndexMap::new();
3891                d.insert("points".to_string(), Value::Array(vec![Value::Object(obj)]));
3892                d
3893            },
3894            is_root_array: false,
3895        };
3896        let result = doc.find_schema_for_value(doc.data.get("points").unwrap(), "points");
3897        assert!(result.is_none(), "Should NOT match schema when extra keys are present");
3898    }
3899
3900    #[test]
3901    fn test_schema_matching_empty_array_no_matching_name() {
3902        let mut schemas = IndexMap::new();
3903        let mut s = Schema::new("Anything");
3904        s.add_field("x", FieldType::new("int"));
3905        schemas.insert("Anything".to_string(), s);
3906
3907        let doc = TeaLeaf {
3908            schemas,
3909            unions: IndexMap::new(),
3910            data: {
3911                let mut d = IndexMap::new();
3912                d.insert("empty".to_string(), Value::Array(vec![]));
3913                d
3914            },
3915            is_root_array: false,
3916        };
3917        let result = doc.find_schema_for_value(doc.data.get("empty").unwrap(), "empty");
3918        assert!(result.is_none(), "Empty array should return None when no schema name matches");
3919    }
3920
3921    #[test]
3922    fn test_schema_matching_empty_array_matches_by_name() {
3923        let mut schemas = IndexMap::new();
3924        let mut s = Schema::new("item");
3925        s.add_field("id", FieldType::new("int"));
3926        schemas.insert("item".to_string(), s);
3927
3928        let doc = TeaLeaf {
3929            schemas,
3930            unions: IndexMap::new(),
3931            data: {
3932                let mut d = IndexMap::new();
3933                d.insert("items".to_string(), Value::Array(vec![]));
3934                d
3935            },
3936            is_root_array: false,
3937        };
3938        let result = doc.find_schema_for_value(doc.data.get("items").unwrap(), "items");
3939        assert!(result.is_some(), "Empty array should match schema by singularized key name");
3940        assert_eq!(result.unwrap().name, "item");
3941    }
3942
3943    // =========================================================================
3944    // Issue 12: Negative timestamp formatting
3945    // =========================================================================
3946
3947    #[test]
3948    fn test_negative_timestamp_formatting() {
3949        // 1969-12-31T23:59:59Z = -1000 ms (1 second before epoch)
3950        let formatted = format_timestamp_millis(-1000, 0);
3951        assert_eq!(formatted, "1969-12-31T23:59:59Z");
3952    }
3953
3954    #[test]
3955    fn test_negative_timestamp_with_millis() {
3956        // -500 ms = 1969-12-31T23:59:59.500Z
3957        let formatted = format_timestamp_millis(-500, 0);
3958        assert_eq!(formatted, "1969-12-31T23:59:59.500Z");
3959    }
3960
3961    #[test]
3962    fn test_negative_timestamp_full_day() {
3963        // -86400000 ms = exactly one day before epoch = 1969-12-31T00:00:00Z
3964        let formatted = format_timestamp_millis(-86_400_000, 0);
3965        assert_eq!(formatted, "1969-12-31T00:00:00Z");
3966    }
3967
3968    #[test]
3969    fn test_epoch_timestamp() {
3970        let formatted = format_timestamp_millis(0, 0);
3971        assert_eq!(formatted, "1970-01-01T00:00:00Z");
3972    }
3973
3974    #[test]
3975    fn test_positive_timestamp_with_millis() {
3976        // 1123ms = 1 second + 123ms after epoch
3977        let formatted = format_timestamp_millis(1123, 0);
3978        assert_eq!(formatted, "1970-01-01T00:00:01.123Z");
3979    }
3980
3981    #[test]
3982    fn test_negative_timestamp_json_export() {
3983        let mut data = IndexMap::new();
3984        data.insert("ts".to_string(), Value::Timestamp(-1000, 0));
3985        let doc = TeaLeaf::new(IndexMap::new(), data);
3986        let json = doc.to_json().unwrap();
3987        assert!(json.contains("1969-12-31"), "Negative timestamp should format as pre-epoch date: {}", json);
3988    }
3989
3990    // =========================================================================
3991    // Issue 7: Deterministic serialization (IndexMap preserves insertion order)
3992    // =========================================================================
3993
3994    #[test]
3995    fn test_compile_deterministic_key_order() {
3996        // Two documents with the same data in the same insertion order
3997        // should produce identical binary output
3998        let dir = std::env::temp_dir();
3999        let path1 = dir.join("test_deterministic_1.tlbx");
4000        let path2 = dir.join("test_deterministic_2.tlbx");
4001
4002        let mut data1 = IndexMap::new();
4003        data1.insert("alpha".to_string(), Value::Int(1));
4004        data1.insert("beta".to_string(), Value::Int(2));
4005        data1.insert("gamma".to_string(), Value::Int(3));
4006        let doc1 = TeaLeaf::new(IndexMap::new(), data1);
4007        doc1.compile(&path1, false).unwrap();
4008
4009        let mut data2 = IndexMap::new();
4010        data2.insert("alpha".to_string(), Value::Int(1));
4011        data2.insert("beta".to_string(), Value::Int(2));
4012        data2.insert("gamma".to_string(), Value::Int(3));
4013        let doc2 = TeaLeaf::new(IndexMap::new(), data2);
4014        doc2.compile(&path2, false).unwrap();
4015
4016        let bytes1 = std::fs::read(&path1).unwrap();
4017        let bytes2 = std::fs::read(&path2).unwrap();
4018        assert_eq!(bytes1, bytes2, "Binary output should be identical for same insertion order");
4019
4020        std::fs::remove_file(&path1).ok();
4021        std::fs::remove_file(&path2).ok();
4022    }
4023
4024    #[test]
4025    fn test_dumps_deterministic_key_order() {
4026        // dumps() preserves IndexMap insertion order deterministically
4027        let mut data = IndexMap::new();
4028        data.insert("zebra".to_string(), Value::Int(3));
4029        data.insert("alpha".to_string(), Value::Int(1));
4030        data.insert("middle".to_string(), Value::Int(2));
4031
4032        let output1 = dumps(&data);
4033        let output2 = dumps(&data);
4034        assert_eq!(output1, output2, "dumps() should be deterministic");
4035        // Keys should appear in insertion order (IndexMap preserves insertion order)
4036        let lines: Vec<&str> = output1.trim().lines().collect();
4037        assert!(lines[0].starts_with("zebra:"), "First key should be 'zebra', got: {}", lines[0]);
4038        assert!(lines[1].starts_with("alpha:"), "Second key should be 'alpha', got: {}", lines[1]);
4039        assert!(lines[2].starts_with("middle:"), "Third key should be 'middle', got: {}", lines[2]);
4040    }
4041
4042    // =========================================================================
4043    // Order-preservation integration tests
4044    // =========================================================================
4045
4046    #[test]
4047    fn test_json_parse_preserves_key_order() {
4048        // JSON with intentionally non-alphabetical keys
4049        let json = r#"{"zebra": 1, "apple": 2, "mango": 3, "banana": 4}"#;
4050        let doc = TeaLeaf::from_json(json).unwrap();
4051        let keys: Vec<&String> = doc.data.keys().collect();
4052        assert_eq!(keys, &["zebra", "apple", "mango", "banana"],
4053            "JSON parse should preserve key insertion order");
4054    }
4055
4056    #[test]
4057    fn test_json_roundtrip_preserves_key_order() {
4058        let json = r#"{"zebra": 1, "apple": 2, "mango": 3}"#;
4059        let doc = TeaLeaf::from_json(json).unwrap();
4060        let json_out = doc.to_json().unwrap();
4061        // Parse back and verify order
4062        let parsed: serde_json::Value = serde_json::from_str(&json_out).unwrap();
4063        let keys: Vec<&str> = parsed.as_object().unwrap().keys().map(|s| s.as_str()).collect();
4064        assert_eq!(keys, &["zebra", "apple", "mango"],
4065            "JSON round-trip should preserve key order");
4066    }
4067
4068    #[test]
4069    fn test_tl_text_preserves_section_order() {
4070        let input = "zebra: 1\napple: 2\nmango: 3\n";
4071        let doc = TeaLeaf::parse(input).unwrap();
4072        let keys: Vec<&String> = doc.data.keys().collect();
4073        assert_eq!(keys, &["zebra", "apple", "mango"],
4074            "TL text parse should preserve section order");
4075
4076        // Serialize back and verify order
4077        let output = doc.to_tl_with_schemas();
4078        let lines: Vec<&str> = output.trim().lines().collect();
4079        assert!(lines[0].starts_with("zebra:"), "got: {}", lines[0]);
4080        assert!(lines[1].starts_with("apple:"), "got: {}", lines[1]);
4081        assert!(lines[2].starts_with("mango:"), "got: {}", lines[2]);
4082    }
4083
4084    #[test]
4085    fn test_binary_roundtrip_preserves_section_order() {
4086        let json = r#"{"zebra": 1, "apple": 2, "mango": 3, "banana": 4}"#;
4087        let doc = TeaLeaf::from_json(json).unwrap();
4088
4089        let dir = std::env::temp_dir();
4090        let path = dir.join("test_order_preserve.tlbx");
4091        doc.compile(&path, false).unwrap();
4092
4093        let reader = crate::Reader::open(&path).unwrap();
4094        let doc2 = TeaLeaf::from_reader(&reader).unwrap();
4095        let keys: Vec<&String> = doc2.data.keys().collect();
4096        assert_eq!(keys, &["zebra", "apple", "mango", "banana"],
4097            "Binary round-trip should preserve section order");
4098        std::fs::remove_file(&path).ok();
4099    }
4100
4101    #[test]
4102    fn test_object_field_order_preserved_through_binary() {
4103        let json = r#"{"data": {"z_last": 1, "a_first": 2, "m_middle": 3}}"#;
4104        let doc = TeaLeaf::from_json(json).unwrap();
4105
4106        let dir = std::env::temp_dir();
4107        let path = dir.join("test_obj_order.tlbx");
4108        doc.compile(&path, false).unwrap();
4109
4110        let reader = crate::Reader::open(&path).unwrap();
4111        let val = reader.get("data").unwrap();
4112        let obj = val.as_object().unwrap();
4113        let keys: Vec<&String> = obj.keys().collect();
4114        assert_eq!(keys, &["z_last", "a_first", "m_middle"],
4115            "Object field order should be preserved through binary round-trip");
4116        std::fs::remove_file(&path).ok();
4117    }
4118
4119    #[test]
4120    fn test_nested_object_order_preserved() {
4121        let json = r#"{"outer": {"z": {"c": 3, "a": 1, "b": 2}, "a": {"x": 10, "w": 20}}}"#;
4122        let doc = TeaLeaf::from_json(json).unwrap();
4123        let tl = doc.to_tl_with_schemas();
4124
4125        // Parse back and check nested order
4126        let doc2 = TeaLeaf::parse(&tl).unwrap();
4127        let outer = doc2.get("outer").unwrap().as_object().unwrap();
4128        let outer_keys: Vec<&String> = outer.keys().collect();
4129        assert_eq!(outer_keys, &["z", "a"], "Outer keys order preserved");
4130
4131        let z_obj = outer.get("z").unwrap().as_object().unwrap();
4132        let z_keys: Vec<&String> = z_obj.keys().collect();
4133        assert_eq!(z_keys, &["c", "a", "b"], "Nested object keys order preserved");
4134    }
4135
4136    #[test]
4137    fn test_schema_order_preserved_in_text() {
4138        let input = r#"
4139            @struct Zebra (z_name: string)
4140            @struct Apple (a_name: string)
4141            items: [1, 2, 3]
4142        "#;
4143        let doc = TeaLeaf::parse(input).unwrap();
4144        let schema_keys: Vec<&String> = doc.schemas.keys().collect();
4145        assert_eq!(schema_keys, &["Zebra", "Apple"],
4146            "Schema definition order should be preserved");
4147    }
4148
4149    // -------------------------------------------------------------------------
4150    // Fuzz regression tests (full serialize/roundtrip paths)
4151    // -------------------------------------------------------------------------
4152
4153    #[test]
4154    fn test_fuzz_crash_ba05f4f8_serialize_day_zero_no_panic() {
4155        // Regression: fuzz_serialize crash-ba05f4f81615e2bf2b01137126cd772c6c0cc6d2
4156        // Timestamp with month=0 or day=0 caused u32 underflow in days_from_epoch.
4157        // Exercises the full fuzz_serialize path: parse → to_json → to_tl → re-parse.
4158        let inputs = [
4159            "ts: 2024-01-00T10:30:00Z",  // day=0
4160            "ts: 2024-00-15T10:30:00Z",  // month=0
4161            "ts: 6000-00-00T00:00:00Z",  // both zero
4162        ];
4163        for input in &inputs {
4164            // parse must not panic (should return Err)
4165            let result = TeaLeaf::parse(input);
4166            if let Ok(tl) = result {
4167                let _ = tl.to_json();
4168                let _ = tl.to_json_compact();
4169                let text = tl.to_tl_with_schemas();
4170                let _ = TeaLeaf::parse(&text);
4171            }
4172        }
4173    }
4174
4175    #[test]
4176    fn test_fuzz_crash_b085ba0e_roundtrip_day_zero_no_panic() {
4177        // Regression: fuzz_roundtrip crash-b085ba0e656f074031d8c4cb5173313785fa79d1
4178        // Same days_from_epoch underflow, hit through the roundtrip path.
4179        // Exercises the full fuzz_roundtrip path: parse → compile → read → walk.
4180        let inputs = [
4181            "ts: 4001-03-00T00:00:00Z",  // day=0 (pattern from artifact)
4182            "ts: 4401-03-00T00:00:00Z",  // variant
4183        ];
4184        for input in &inputs {
4185            let result = TeaLeaf::parse(input);
4186            if let Ok(tl) = result {
4187                let tmp = tempfile::NamedTempFile::new().unwrap();
4188                if tl.compile(tmp.path(), false).is_ok() {
4189                    let bytes = std::fs::read(tmp.path()).unwrap();
4190                    if let Ok(reader) = Reader::from_bytes(bytes) {
4191                        for key in reader.keys() {
4192                            let _ = reader.get(key);
4193                        }
4194                    }
4195                }
4196            }
4197        }
4198    }
4199
4200    #[test]
4201    fn test_fuzz_crash_48767e10_json_schemas_bare_dash_roundtrip() {
4202        // Regression: fuzz_json_schemas crash-48767e10b4ec71542bfbee2bc358b1e21831a259
4203        // JSON string "-" was serialized unquoted, causing re-parse failure.
4204        for input in [
4205            r#""-""#, r#""+""#, r#""--""#, r#""-foo""#,
4206            r#"{"a": "-"}"#, r#"{"a": "+"}"#,
4207            "\"\\u0660\"",  // Arabic-Indic digit zero
4208        ] {
4209            let tl = TeaLeaf::from_json_with_schemas(input);
4210            if let Ok(tl) = tl {
4211                let text = tl.to_tl_with_schemas();
4212                let reparsed = TeaLeaf::parse(&text);
4213                assert!(
4214                    reparsed.is_ok(),
4215                    "re-parse failed for JSON input {}",
4216                    input,
4217                );
4218            }
4219        }
4220    }
4221
4222    #[test]
4223    fn test_fuzz_crash_820dac71_empty_key_roundtrip() {
4224        // Regression: fuzz_json_schemas crash-820dac71c95d324067cd88de5f24897c65ace57a
4225        // JSON object with empty key was serialized without quoting, losing the key.
4226        for input in [
4227            r#"{"":{}}"#,                // empty key with empty object
4228            r#"[{"":{}}}]"#,             // root array variant (crash-66a8d85176f76ed68ada9f9526abe4efd8352f27)
4229            r#"{"":"value"}"#,            // empty key with string value
4230        ] {
4231            if let Ok(tl) = TeaLeaf::from_json_with_schemas(input) {
4232                let text = tl.to_tl_with_schemas();
4233                let reparsed = TeaLeaf::parse(&text);
4234                assert!(
4235                    reparsed.is_ok(),
4236                    "re-parse failed for JSON input {}",
4237                    input,
4238                );
4239            }
4240        }
4241    }
4242
4243    #[test]
4244    fn test_fuzz_crash_66a8d851_root_array_empty_key() {
4245        // Regression: fuzz_json_schemas crash-66a8d85176f76ed68ada9f9526abe4efd8352f27
4246        // Root array with empty-key object: schema inference + to_tl_with_schemas roundtrip
4247        let input = r#"[{"":{}}]"#;
4248        if let Ok(tl) = TeaLeaf::from_json_with_schemas(input) {
4249            let text = tl.to_tl_with_schemas();
4250            let reparsed = TeaLeaf::parse(&text);
4251            assert!(reparsed.is_ok(), "re-parse failed for root array with empty key");
4252        }
4253    }
4254
4255    #[test]
4256    fn test_fuzz_crash_847a9194_uint_roundtrip() {
4257        // Regression: fuzz_json_schemas crash-847a919462bb567fab268023a5a29d04e92db779
4258        // Large u64 values (> i64::MAX) were demoted to f64 on re-parse, losing precision.
4259        let input = "9999999999999999999";  // > i64::MAX, fits in u64
4260        let tl = TeaLeaf::from_json_with_schemas(input).unwrap();
4261        let text = tl.to_tl_with_schemas();
4262        let reparsed = TeaLeaf::parse(&text).unwrap();
4263        let orig = tl.data.get("root").unwrap();
4264        let re = reparsed.data.get("root").unwrap();
4265        assert_eq!(orig, re, "UInt roundtrip mismatch");
4266    }
4267
4268    #[test]
4269    fn test_fuzz_crash_3902c5cc_float_infinity_roundtrip() {
4270        // Regression: fuzz_serialize crash-3902c5cc99e5e4150d08d40372c86207fbc6db7f
4271        // 5e550 and -5e550 overflow f64 and are now stored as JsonNumber.
4272        // NaN remains Float(NaN).
4273        let tl = TeaLeaf::parse("b: NaN").unwrap();
4274        let text = tl.to_tl_with_schemas();
4275        let reparsed = TeaLeaf::parse(&text).unwrap();
4276        let orig = tl.data.get("b").unwrap();
4277        let re = reparsed.data.get("b").unwrap();
4278        match (orig, re) {
4279            (Value::Float(a), Value::Float(b)) => {
4280                assert_eq!(a.to_bits(), b.to_bits(), "NaN roundtrip failed");
4281            }
4282            _ => panic!("expected Float, got {:?} / {:?}", orig, re),
4283        }
4284
4285        // 5e550 and -5e550 are now JsonNumber (overflow f64)
4286        for input in &["b: 5e550", "b: -5e550"] {
4287            let tl = TeaLeaf::parse(input).unwrap();
4288            let text = tl.to_tl_with_schemas();
4289            let reparsed = TeaLeaf::parse(&text).unwrap();
4290            let orig = tl.data.get("b").unwrap();
4291            let re = reparsed.data.get("b").unwrap();
4292            match (orig, re) {
4293                (Value::JsonNumber(a), Value::JsonNumber(b)) => {
4294                    assert_eq!(a, b, "JsonNumber roundtrip failed for {}", input);
4295                }
4296                _ => panic!("expected JsonNumber, got {:?} / {:?}", orig, re),
4297            }
4298        }
4299    }
4300
4301    #[test]
4302    fn test_needs_quoting_bare_sign() {
4303        assert!(needs_quoting("-"));
4304        assert!(needs_quoting("+"));
4305        assert!(needs_quoting("--"));
4306        assert!(needs_quoting("-foo"));
4307        assert!(needs_quoting("+bar"));
4308        assert!(needs_quoting("-1")); // negative number
4309        assert!(needs_quoting("+1")); // positive number
4310        assert!(needs_quoting("\u{0660}")); // Arabic-Indic digit zero
4311        assert!(!needs_quoting("hello"));
4312        assert!(!needs_quoting("foo-bar"));
4313    }
4314
4315    #[test]
4316    fn test_fuzz_crash_nan_string_needs_quoting() {
4317        // Regression: fuzz_parse/fuzz_serialize crash — string "NaN" must be quoted
4318        // to avoid re-parsing as Float(NaN).
4319        assert!(needs_quoting("NaN"));
4320        assert!(needs_quoting("inf"));
4321        assert!(needs_quoting("Infinity"));
4322
4323        // Roundtrip: String("NaN") must survive parse → dumps → re-parse
4324        for word in &["NaN", "inf", "Infinity"] {
4325            let input = format!("a: \"{}\"", word);
4326            let tl = TeaLeaf::parse(&input).unwrap();
4327            assert!(matches!(tl.get("a"), Some(Value::String(_))));
4328            let text = dumps(&tl.data);
4329            let reparsed = TeaLeaf::parse(&text).unwrap();
4330            assert_eq!(
4331                reparsed.get("a").unwrap().as_str(),
4332                Some(*word),
4333                "roundtrip failed for string {:?}",
4334                word,
4335            );
4336        }
4337    }
4338
4339    #[test]
4340    fn test_json_any_type_compile_roundtrip() {
4341        // Regression: from_json_with_schemas infers "any" for fields whose nested objects
4342        // don't match a schema. encode_typed_value must fall back to generic encoding
4343        // instead of erroring with "requires a schema for encoding".
4344        use tempfile::NamedTempFile;
4345
4346        let json = r#"[
4347            {"name": "alice", "meta": {"x": 1}},
4348            {"name": "bob",   "meta": {"y": "two", "z": true}}
4349        ]"#;
4350        let doc = TeaLeaf::from_json_with_schemas(json).unwrap();
4351        // "meta" has varying shapes → inferred as "any"
4352        let temp = NamedTempFile::new().unwrap();
4353        doc.compile(temp.path(), false).expect("compile with 'any' field must not error");
4354
4355        // Read back and verify data survived
4356        let reader = Reader::open(temp.path()).unwrap();
4357        assert_eq!(reader.keys().len(), doc.data.len());
4358    }
4359
4360    #[test]
4361    fn json_any_array_binary_roundtrip() {
4362        // Regression: []any fields (from JSON inference of heterogeneous arrays inside
4363        // schema-typed objects) caused binary corruption. encode_typed_value wrote
4364        // TLType::Struct as the element type for "any" (the to_tl_type default),
4365        // but the actual data was heterogeneous. The reader then read garbage bytes
4366        // as schema indices, crashing with "schema index N out of bounds".
4367        use tempfile::NamedTempFile;
4368
4369        let json = r#"{
4370            "events": [
4371                {
4372                    "id": "E1",
4373                    "type": "sale",
4374                    "data": ["SKU-100", 3, 29.99, true],
4375                    "tags": ["flash", "online"]
4376                },
4377                {
4378                    "id": "E2",
4379                    "type": "return",
4380                    "data": ["SKU-200", 1, 15.0, false],
4381                    "tags": ["in-store"]
4382                }
4383            ]
4384        }"#;
4385        let doc = TeaLeaf::from_json_with_schemas(json).unwrap();
4386
4387        // Verify inference: "data" should be []any (heterogeneous), "tags" should be []string
4388        let event_schema = doc.schemas.get("event").expect("missing 'event' schema");
4389        let data_field = event_schema.fields.iter().find(|f| f.name == "data").unwrap();
4390        assert!(data_field.field_type.is_array, "data should be array");
4391        assert_eq!(data_field.field_type.base, "any", "data should be []any, got []{}", data_field.field_type.base);
4392
4393        // Compile to binary
4394        let temp = NamedTempFile::new().unwrap();
4395        doc.compile(temp.path(), false).expect("compile must not error");
4396
4397        // Read back and verify full data integrity
4398        let reader = Reader::open(temp.path()).unwrap();
4399        let events_val = reader.get("events").expect("missing 'events' key");
4400        let events = events_val.as_array().expect("events should be array");
4401        assert_eq!(events.len(), 2, "should have 2 events");
4402
4403        // Verify first event's heterogeneous data array
4404        let e1 = events[0].as_object().expect("event should be object");
4405        assert_eq!(e1.get("id").unwrap().as_str(), Some("E1"));
4406        let data1 = e1.get("data").unwrap().as_array().expect("data should be array");
4407        assert_eq!(data1.len(), 4);
4408        assert_eq!(data1[0].as_str(), Some("SKU-100"));
4409        assert_eq!(data1[2].as_float(), Some(29.99));
4410    }
4411
4412    #[test]
4413    fn retail_orders_json_binary_roundtrip() {
4414        // End-to-end: retail_orders.json → infer schemas → compile → read → JSON
4415        // Exercises the full path that was missing from the test suite: complex
4416        // real-world JSON with heterogeneous arrays ([]any) inside schema-typed objects.
4417        use tempfile::NamedTempFile;
4418
4419        let fixture = std::path::Path::new(env!("CARGO_MANIFEST_DIR"))
4420            .join("tests/fixtures/retail_orders.json");
4421        let json = std::fs::read_to_string(&fixture)
4422            .unwrap_or_else(|e| panic!("read fixture {}: {e}", fixture.display()));
4423
4424        let doc = TeaLeaf::from_json_with_schemas(&json).unwrap();
4425        let temp = NamedTempFile::new().unwrap();
4426        doc.compile(temp.path(), true).expect("compile retail_orders must not error");
4427
4428        // Read binary back to JSON and compare
4429        let reader = Reader::open(temp.path()).unwrap();
4430        let keys = reader.keys();
4431        assert_eq!(keys.len(), 5, "expected 5 top-level keys, got {keys:?}");
4432
4433        // Verify all sections are readable and have correct element counts
4434        let orders_val = reader.get("orders").unwrap();
4435        let orders = orders_val.as_array().expect("orders");
4436        assert_eq!(orders.len(), 10, "expected 10 orders");
4437
4438        let products_val = reader.get("products").unwrap();
4439        let products = products_val.as_array().expect("products");
4440        assert_eq!(products.len(), 4, "expected 4 products");
4441
4442        let customers_val = reader.get("customers").unwrap();
4443        let customers = customers_val.as_array().expect("customers");
4444        assert_eq!(customers.len(), 3, "expected 3 customers");
4445
4446        // Spot-check: first order preserves heterogeneous fields
4447        let order1 = orders[0].as_object().expect("order should be object");
4448        assert_eq!(order1.get("order_id").unwrap().as_str(), Some("ORD-2024-00001"));
4449        let items = order1.get("items").unwrap().as_array().expect("items");
4450        assert_eq!(items.len(), 3, "first order should have 3 items");
4451    }
4452
4453    #[test]
4454    fn fuzz_repro_json_schema_bool_field_name() {
4455        // Fuzz crash: field named "bool" conflicts with type keyword
4456        let input = r#"[{"bool":{"b":2}}]"#;
4457        let tl = TeaLeaf::from_json_with_schemas(input).unwrap();
4458        let tl_text = tl.to_tl_with_schemas();
4459        let reparsed = TeaLeaf::parse(&tl_text)
4460            .unwrap_or_else(|e| panic!("Re-parse failed: {e}\nTL text:\n{tl_text}"));
4461        assert_eq!(tl.data.len(), reparsed.data.len(), "key count mismatch");
4462        for (key, orig_val) in &tl.data {
4463            let re_val = reparsed.data.get(key).unwrap_or_else(|| panic!("lost key '{key}'"));
4464            assert_eq!(orig_val, re_val, "value mismatch for key '{key}'");
4465        }
4466    }
4467
4468    /// Helper: verify that a JSON field named after a built-in type correctly
4469    /// round-trips through TL text when schema inference is used.
4470    fn assert_builtin_name_text_roundtrip(type_name: &str, inner_json: &str) {
4471        let input = format!(r#"[{{"{type_name}":{inner_json}}}]"#);
4472        let tl = TeaLeaf::from_json_with_schemas(&input)
4473            .unwrap_or_else(|e| panic!("[{type_name}] from_json_with_schemas failed: {e}"));
4474        let tl_text = tl.to_tl_with_schemas();
4475
4476        // The schema should appear in the text output
4477        assert!(
4478            tl_text.contains(&format!("@struct {type_name}")),
4479            "[{type_name}] expected @struct {type_name} in TL text:\n{tl_text}"
4480        );
4481
4482        let reparsed = TeaLeaf::parse(&tl_text)
4483            .unwrap_or_else(|e| panic!("[{type_name}] re-parse failed: {e}\nTL text:\n{tl_text}"));
4484
4485        assert_eq!(
4486            tl.data.len(), reparsed.data.len(),
4487            "[{type_name}] key count mismatch"
4488        );
4489        for (key, orig_val) in &tl.data {
4490            let re_val = reparsed.data.get(key)
4491                .unwrap_or_else(|| panic!("[{type_name}] lost key '{key}'"));
4492            assert_eq!(orig_val, re_val, "[{type_name}] value mismatch for key '{key}'");
4493        }
4494    }
4495
4496    #[test]
4497    fn schema_name_shadows_builtin_bool() {
4498        assert_builtin_name_text_roundtrip("bool", r#"{"x":1}"#);
4499    }
4500
4501    #[test]
4502    fn schema_name_shadows_builtin_int() {
4503        // Inner value is a string so field type "string" doesn't collide with schema "int"
4504        assert_builtin_name_text_roundtrip("int", r#"{"x":"hello"}"#);
4505    }
4506
4507    #[test]
4508    fn schema_name_shadows_builtin_int8() {
4509        assert_builtin_name_text_roundtrip("int8", r#"{"x":"hello"}"#);
4510    }
4511
4512    #[test]
4513    fn schema_name_shadows_builtin_int16() {
4514        assert_builtin_name_text_roundtrip("int16", r#"{"x":"hello"}"#);
4515    }
4516
4517    #[test]
4518    fn schema_name_shadows_builtin_int32() {
4519        assert_builtin_name_text_roundtrip("int32", r#"{"x":"hello"}"#);
4520    }
4521
4522    #[test]
4523    fn schema_name_shadows_builtin_int64() {
4524        assert_builtin_name_text_roundtrip("int64", r#"{"x":"hello"}"#);
4525    }
4526
4527    #[test]
4528    fn schema_name_shadows_builtin_uint() {
4529        assert_builtin_name_text_roundtrip("uint", r#"{"x":"hello"}"#);
4530    }
4531
4532    #[test]
4533    fn schema_name_shadows_builtin_uint8() {
4534        assert_builtin_name_text_roundtrip("uint8", r#"{"x":"hello"}"#);
4535    }
4536
4537    #[test]
4538    fn schema_name_shadows_builtin_uint16() {
4539        assert_builtin_name_text_roundtrip("uint16", r#"{"x":"hello"}"#);
4540    }
4541
4542    #[test]
4543    fn schema_name_shadows_builtin_uint32() {
4544        assert_builtin_name_text_roundtrip("uint32", r#"{"x":"hello"}"#);
4545    }
4546
4547    #[test]
4548    fn schema_name_shadows_builtin_uint64() {
4549        assert_builtin_name_text_roundtrip("uint64", r#"{"x":"hello"}"#);
4550    }
4551
4552    #[test]
4553    fn schema_name_shadows_builtin_float() {
4554        assert_builtin_name_text_roundtrip("float", r#"{"x":1}"#);
4555    }
4556
4557    #[test]
4558    fn schema_name_shadows_builtin_float32() {
4559        assert_builtin_name_text_roundtrip("float32", r#"{"x":1}"#);
4560    }
4561
4562    #[test]
4563    fn schema_name_shadows_builtin_float64() {
4564        assert_builtin_name_text_roundtrip("float64", r#"{"x":1}"#);
4565    }
4566
4567    #[test]
4568    fn schema_name_shadows_builtin_string() {
4569        assert_builtin_name_text_roundtrip("string", r#"{"x":1}"#);
4570    }
4571
4572    // Note: "bytes" is not tested via JSON inference because singularize("bytes") = "byte"
4573    // which is NOT a built-in type. The direct TL-parsing test below covers "bytes" as a
4574    // schema name.
4575
4576    #[test]
4577    fn schema_name_shadows_builtin_timestamp() {
4578        assert_builtin_name_text_roundtrip("timestamp", r#"{"x":1}"#);
4579    }
4580
4581    /// Test built-in type names as schemas via direct TL text parsing (not JSON inference).
4582    /// This covers names that can't arise through singularization (like "bytes").
4583    #[test]
4584    fn schema_name_shadows_builtin_direct_tl_parse() {
4585        let test_cases = &[
4586            // (TL text, expected field name, expected inner value)
4587            (
4588                "@struct bytes (x: int)\n@struct root (data: bytes)\nroot: @table root [\n  ((42))\n]",
4589                "data",
4590                Value::Object(IndexMap::from([
4591                    ("x".to_string(), Value::Int(42)),
4592                ])),
4593            ),
4594            (
4595                "@struct bool (a: int, b: string)\n@struct root (flag: bool)\nroot: @table root [\n  ((1, hello))\n]",
4596                "flag",
4597                Value::Object(IndexMap::from([
4598                    ("a".to_string(), Value::Int(1)),
4599                    ("b".to_string(), Value::String("hello".into())),
4600                ])),
4601            ),
4602        ];
4603
4604        for (tl_text, field_name, expected_val) in test_cases {
4605            let doc = TeaLeaf::parse(tl_text)
4606                .unwrap_or_else(|e| panic!("parse failed for field '{field_name}': {e}\n{tl_text}"));
4607
4608            let root_arr = doc.data.get("root").expect("missing 'root' key");
4609            if let Value::Array(arr) = root_arr {
4610                if let Value::Object(obj) = &arr[0] {
4611                    let actual = obj.get(*field_name)
4612                        .unwrap_or_else(|| panic!("missing field '{field_name}'"));
4613                    assert_eq!(actual, expected_val, "mismatch for field '{field_name}'");
4614                } else {
4615                    panic!("expected Object, got {:?}", arr[0]);
4616                }
4617            } else {
4618                panic!("expected Array, got {:?}", root_arr);
4619            }
4620        }
4621    }
4622
4623    /// Self-referencing case: @struct int (x: int) where the inner field type
4624    /// matches the schema name. The LParen guard ensures `x: int` resolves to
4625    /// primitive int (next token is a literal, not `(`).
4626    #[test]
4627    fn schema_name_shadows_builtin_self_referencing() {
4628        // JSON: [{"int": {"x": 1}}] — creates @struct int (x: int)
4629        // The inner field "x: int" must resolve to primitive int, not struct "int"
4630        let input = r#"[{"int":{"x":1}}]"#;
4631        let tl = TeaLeaf::from_json_with_schemas(input).unwrap();
4632        let tl_text = tl.to_tl_with_schemas();
4633
4634        assert!(tl_text.contains("@struct int"), "expected @struct int in:\n{tl_text}");
4635
4636        let reparsed = TeaLeaf::parse(&tl_text)
4637            .unwrap_or_else(|e| panic!("re-parse failed: {e}\nTL text:\n{tl_text}"));
4638
4639        for (key, orig_val) in &tl.data {
4640            let re_val = reparsed.data.get(key)
4641                .unwrap_or_else(|| panic!("lost key '{key}'"));
4642            assert_eq!(orig_val, re_val, "value mismatch for key '{key}'");
4643        }
4644    }
4645
4646    /// Self-referencing: @struct int (int: int) — field name AND type both "int"
4647    #[test]
4648    fn schema_name_shadows_builtin_self_ref_same_field_name() {
4649        let tl_text = "\
4650@struct int (int: int)
4651@struct root (val: int)
4652
4653root: @table root [
4654  ((42))
4655]
4656";
4657        let doc = TeaLeaf::parse(tl_text)
4658            .unwrap_or_else(|e| panic!("parse failed: {e}\nTL text:\n{tl_text}"));
4659
4660        let json = doc.to_json().unwrap();
4661        eprintln!("=== JSON ===\n{json}");
4662
4663        // The root array should have one element with field "val" as an Object
4664        let root_arr = doc.data.get("root").expect("missing 'root'");
4665        if let Value::Array(arr) = root_arr {
4666            if let Value::Object(obj) = &arr[0] {
4667                let val = obj.get("val").expect("missing field 'val'");
4668                // val should be Object({"int": Int(42)}) — struct "int" with field "int" = 42
4669                assert_eq!(
4670                    val,
4671                    &Value::Object(IndexMap::from([
4672                        ("int".to_string(), Value::Int(42)),
4673                    ])),
4674                    "expected struct instance, got {val:?}"
4675                );
4676            } else {
4677                panic!("expected Object, got {:?}", arr[0]);
4678            }
4679        } else {
4680            panic!("expected Array, got {root_arr:?}");
4681        }
4682    }
4683
4684    /// Duplicate @struct declarations: second overwrites first
4685    #[test]
4686    fn schema_name_shadows_builtin_duplicate_struct_decl() {
4687        let tl_text = "\
4688@struct int (x: int)
4689@struct int (int: int)
4690@struct root (val: int)
4691
4692root: @table root [
4693  ((42))
4694]
4695";
4696        let result = TeaLeaf::parse(tl_text);
4697        match &result {
4698            Ok(doc) => {
4699                let json = doc.to_json().unwrap();
4700                eprintln!("=== JSON ===\n{json}");
4701                eprintln!("=== schemas ===");
4702                for (name, schema) in &doc.schemas {
4703                    let fields: Vec<String> = schema.fields.iter()
4704                        .map(|f| format!("{}: {}", f.name, f.field_type.base))
4705                        .collect();
4706                    eprintln!("  @struct {name} ({})", fields.join(", "));
4707                }
4708            }
4709            Err(e) => {
4710                eprintln!("=== parse error ===\n{e}");
4711            }
4712        }
4713        // Assert that parsing succeeds
4714        result.unwrap();
4715    }
4716
4717    /// Multiple built-in-named schemas in the same document
4718    #[test]
4719    fn schema_name_shadows_multiple_builtins() {
4720        let input = r#"[{"bool":{"a":1},"int":{"b":"hello"},"float":{"c":true}}]"#;
4721        let tl = TeaLeaf::from_json_with_schemas(input).unwrap();
4722        let tl_text = tl.to_tl_with_schemas();
4723
4724        assert!(tl_text.contains("@struct bool"), "missing @struct bool");
4725        assert!(tl_text.contains("@struct int"), "missing @struct int");
4726        assert!(tl_text.contains("@struct float"), "missing @struct float");
4727
4728        let reparsed = TeaLeaf::parse(&tl_text)
4729            .unwrap_or_else(|e| panic!("re-parse failed: {e}\nTL text:\n{tl_text}"));
4730
4731        for (key, orig_val) in &tl.data {
4732            let re_val = reparsed.data.get(key)
4733                .unwrap_or_else(|| panic!("lost key '{key}'"));
4734            assert_eq!(orig_val, re_val, "value mismatch for key '{key}'");
4735        }
4736    }
4737
4738
4739    /// Fuzz crash: singularize("s") → "" (empty string), producing invalid
4740    /// @struct definitions with missing names.
4741    #[test]
4742    fn fuzz_repro_singularize_single_char_s() {
4743        let input = r#"[{"s":{"b":1}}]"#;
4744        let tl = TeaLeaf::from_json_with_schemas(input).unwrap();
4745        let tl_text = tl.to_tl_with_schemas();
4746
4747        // Schema name must not be empty — singularize("s") should return "s"
4748        assert!(
4749            tl_text.contains("@struct s"),
4750            "expected @struct s in TL text:\n{tl_text}"
4751        );
4752
4753        let reparsed = TeaLeaf::parse(&tl_text)
4754            .unwrap_or_else(|e| panic!("Re-parse failed: {e}\nTL text:\n{tl_text}"));
4755        assert_eq!(tl.data.len(), reparsed.data.len(), "key count mismatch");
4756        for (key, orig_val) in &tl.data {
4757            let re_val = reparsed.data.get(key).unwrap_or_else(|| panic!("lost key '{key}'"));
4758            assert_eq!(orig_val, re_val, "value mismatch for key '{key}'");
4759        }
4760    }
4761
4762    #[test]
4763    fn singularize_does_not_produce_empty_string() {
4764        // All single-character inputs must pass through unchanged
4765        for c in 'a'..='z' {
4766            let s = String::from(c);
4767            let result = super::singularize(&s);
4768            assert!(!result.is_empty(), "singularize({s:?}) produced empty string");
4769            assert_eq!(result, s, "singularize({s:?}) should return {s:?}, got {result:?}");
4770        }
4771    }
4772
4773    /// Fuzz crash: field name with dots causes value mismatch on roundtrip
4774    #[test]
4775    fn fuzz_repro_dots_in_field_name() {
4776        // Fuzz regression: field "root" inside root-array wrapper both singularize to "root",
4777        // causing analyze_nested_objects to create a correct inner schema that analyze_array
4778        // then overwrites with a self-referencing @struct root (root: root).
4779        let input = r#"[{"root":{"Z.lll.i0...A":44444440.0}}]"#;
4780        let tl = TeaLeaf::from_json_with_schemas(input).unwrap();
4781        let tl_text = tl.to_tl_with_schemas();
4782        let reparsed = TeaLeaf::parse(&tl_text)
4783            .unwrap_or_else(|e| panic!("Re-parse failed: {e}\nTL text:\n{tl_text}"));
4784        assert_eq!(tl.data.len(), reparsed.data.len(), "key count mismatch");
4785        for (key, orig_val) in &tl.data {
4786            let re_val = reparsed.data.get(key).unwrap_or_else(|| panic!("lost key '{key}'"));
4787            assert_eq!(orig_val, re_val, "value mismatch for key '{key}'");
4788        }
4789    }
4790
4791    #[test]
4792    fn schema_name_collision_field_matches_parent() {
4793        // When an array field name singularizes to the same name as its parent schema,
4794        // the inner schema should be preserved (not overwritten with a self-reference).
4795        // This tests the general case, not just the root-array wrapper collision.
4796        let input = r#"{"items": [{"items": {"a": 1, "b": 2}}]}"#;
4797        let tl = TeaLeaf::from_json_with_schemas(input).unwrap();
4798        let tl_text = tl.to_tl_with_schemas();
4799        let reparsed = TeaLeaf::parse(&tl_text)
4800            .unwrap_or_else(|e| panic!("Re-parse failed: {e}\nTL text:\n{tl_text}"));
4801        for (key, orig_val) in &tl.data {
4802            let re_val = reparsed.data.get(key).unwrap_or_else(|| panic!("lost key '{key}'"));
4803            assert_eq!(orig_val, re_val, "value mismatch for key '{key}'");
4804        }
4805    }
4806
4807    #[test]
4808    fn analyze_node_nesting_stress_test() {
4809        // Stress test: "node" appears at many nesting levels with different shapes.
4810        // Schema inference should NOT create conflicting schemas or lose data.
4811        let input = r#"{
4812          "node": {
4813            "id": 1,
4814            "name": "root",
4815            "active": true,
4816            "node": {
4817              "id": "child-1",
4818              "metrics": {
4819                "node": {
4820                  "value": 42.7,
4821                  "unit": "ms",
4822                  "thresholds": [10, 20, 30]
4823                }
4824              },
4825              "node": [
4826                {
4827                  "id": 2,
4828                  "enabled": false
4829                },
4830                {
4831                  "id": 3,
4832                  "enabled": "sometimes",
4833                  "node": {
4834                    "status": null,
4835                    "confidence": 0.93
4836                  }
4837                }
4838              ]
4839            }
4840          },
4841          "nodeMetadata": {
4842            "node": {
4843              "version": 5,
4844              "checksum": "a94a8fe5ccb19ba61c4c0873d391e987",
4845              "flags": {
4846                "node": true
4847              }
4848            }
4849          }
4850        }"#;
4851
4852        let tl = TeaLeaf::from_json_with_schemas(input).unwrap();
4853        eprintln!("=== schemas ({}) ===", tl.schemas.len());
4854        for (name, schema) in &tl.schemas {
4855            let fields: Vec<String> = schema.fields.iter()
4856                .map(|f| format!("{}: {}{}{}", f.name, f.field_type.base,
4857                    if f.field_type.is_array { "[]" } else { "" },
4858                    if f.field_type.nullable { "?" } else { "" }))
4859                .collect();
4860            eprintln!("  @struct {name} ({})", fields.join(", "));
4861        }
4862        let tl_text = tl.to_tl_with_schemas();
4863        eprintln!("=== TL text ===\n{tl_text}");
4864
4865        // Core correctness check: round-trip must preserve all data
4866        let reparsed = TeaLeaf::parse(&tl_text)
4867            .unwrap_or_else(|e| panic!("Re-parse failed: {e}\nTL text:\n{tl_text}"));
4868        for (key, orig_val) in &tl.data {
4869            let re_val = reparsed.data.get(key).unwrap_or_else(|| panic!("lost key '{key}'"));
4870            assert_eq!(orig_val, re_val, "value mismatch for key '{key}'");
4871        }
4872    }
4873
4874    #[test]
4875    fn schema_collision_recursive_arrays() {
4876        // "nodes" appears as arrays at two levels with different shapes.
4877        // Inner: [{name, value}], Outer: [{name, nodes}]
4878        // Both singularize to "node" — only one schema can exist.
4879        let input = r#"{
4880          "nodes": [
4881            {
4882              "name": "parent",
4883              "nodes": [
4884                {"name": "child", "value": 42}
4885              ]
4886            }
4887          ]
4888        }"#;
4889        let tl = TeaLeaf::from_json_with_schemas(input).unwrap();
4890        eprintln!("=== schemas ({}) ===", tl.schemas.len());
4891        for (name, schema) in &tl.schemas {
4892            let fields: Vec<String> = schema.fields.iter()
4893                .map(|f| format!("{}: {}{}{}", f.name, f.field_type.base,
4894                    if f.field_type.is_array { "[]" } else { "" },
4895                    if f.field_type.nullable { "?" } else { "" }))
4896                .collect();
4897            eprintln!("  @struct {name} ({})", fields.join(", "));
4898        }
4899        let tl_text = tl.to_tl_with_schemas();
4900        eprintln!("=== TL text ===\n{tl_text}");
4901        let reparsed = TeaLeaf::parse(&tl_text)
4902            .unwrap_or_else(|e| panic!("Re-parse failed: {e}\nTL text:\n{tl_text}"));
4903        for (key, orig_val) in &tl.data {
4904            let re_val = reparsed.data.get(key).unwrap_or_else(|| panic!("lost key '{key}'"));
4905            assert_eq!(orig_val, re_val, "value mismatch for key '{key}'");
4906        }
4907    }
4908
4909    #[test]
4910    fn schema_collision_recursive_same_shape() {
4911        // "nodes" appears at two levels but SAME shape [{id, name}].
4912        // Schema "node" created for inner array should also work for outer.
4913        let input = r#"{
4914          "nodes": [
4915            {
4916              "id": 1,
4917              "name": "parent",
4918              "children": [
4919                {"id": 10, "name": "child-a"},
4920                {"id": 11, "name": "child-b"}
4921              ]
4922            },
4923            {
4924              "id": 2,
4925              "name": "sibling",
4926              "children": [
4927                {"id": 20, "name": "child-c"}
4928              ]
4929            }
4930          ]
4931        }"#;
4932        let tl = TeaLeaf::from_json_with_schemas(input).unwrap();
4933        eprintln!("=== schemas ({}) ===", tl.schemas.len());
4934        for (name, schema) in &tl.schemas {
4935            let fields: Vec<String> = schema.fields.iter()
4936                .map(|f| format!("{}: {}{}{}", f.name, f.field_type.base,
4937                    if f.field_type.is_array { "[]" } else { "" },
4938                    if f.field_type.nullable { "?" } else { "" }))
4939                .collect();
4940            eprintln!("  @struct {name} ({})", fields.join(", "));
4941        }
4942        let tl_text = tl.to_tl_with_schemas();
4943        eprintln!("=== TL text ===\n{tl_text}");
4944        let reparsed = TeaLeaf::parse(&tl_text)
4945            .unwrap_or_else(|e| panic!("Re-parse failed: {e}\nTL text:\n{tl_text}"));
4946        for (key, orig_val) in &tl.data {
4947            let re_val = reparsed.data.get(key).unwrap_or_else(|| panic!("lost key '{key}'"));
4948            assert_eq!(orig_val, re_val, "value mismatch for key '{key}'");
4949        }
4950    }
4951
4952    #[test]
4953    fn schema_collision_three_level_nesting() {
4954        // "nodes" at 3 levels: L1 and L2 have same shape {name, nodes},
4955        // L3 has different shape {name, score}. All singularize to "node".
4956        // The deepest schema wins (depth-first); outer levels fall back to
4957        // generic format. No data loss at any level.
4958        let input = r#"{
4959          "nodes": [
4960            {
4961              "name": "grandparent",
4962              "nodes": [
4963                {
4964                  "name": "parent",
4965                  "nodes": [
4966                    {"name": "leaf-a", "score": 99.5},
4967                    {"name": "leaf-b", "score": 42.0}
4968                  ]
4969                }
4970              ]
4971            },
4972            {
4973              "name": "uncle",
4974              "nodes": [
4975                {
4976                  "name": "cousin",
4977                  "nodes": [
4978                    {"name": "leaf-c", "score": 77.3}
4979                  ]
4980                }
4981              ]
4982            }
4983          ]
4984        }"#;
4985
4986        let tl = TeaLeaf::from_json_with_schemas(input).unwrap();
4987        eprintln!("=== schemas ({}) ===", tl.schemas.len());
4988        for (name, schema) in &tl.schemas {
4989            let fields: Vec<String> = schema.fields.iter()
4990                .map(|f| format!("{}: {}{}{}", f.name, f.field_type.base,
4991                    if f.field_type.is_array { "[]" } else { "" },
4992                    if f.field_type.nullable { "?" } else { "" }))
4993                .collect();
4994            eprintln!("  @struct {name} ({})", fields.join(", "));
4995        }
4996        let tl_text = tl.to_tl_with_schemas();
4997        eprintln!("=== TL text ===\n{tl_text}");
4998
4999        let reparsed = TeaLeaf::parse(&tl_text)
5000            .unwrap_or_else(|e| panic!("Re-parse failed: {e}\nTL text:\n{tl_text}"));
5001        for (key, orig_val) in &tl.data {
5002            let re_val = reparsed.data.get(key).unwrap_or_else(|| panic!("lost key '{key}'"));
5003            assert_eq!(orig_val, re_val, "value mismatch for key '{key}'");
5004        }
5005    }
5006
5007    #[test]
5008    fn schema_collision_three_level_divergent_leaves() {
5009        // L1: [{name, nodes}], L2: [{name, nodes}] (same shape),
5010        // L3: [{id, value}] in one branch, [{identifier, points}] in another.
5011        // The depth-first analysis only sees the first branch's L3 shape.
5012        // The second branch's L3 must fall back to generic format.
5013        let input = r#"{
5014          "nodes": [
5015            {
5016              "name": "grandparent",
5017              "nodes": [
5018                {
5019                  "name": "parent",
5020                  "nodes": [
5021                    {"id": "leaf-a", "value": 99.5},
5022                    {"id": "leaf-b", "value": 42.0}
5023                  ]
5024                }
5025              ]
5026            },
5027            {
5028              "name": "uncle",
5029              "nodes": [
5030                {
5031                  "name": "cousin",
5032                  "nodes": [
5033                    {"identifier": "leaf-c", "points": 77.3}
5034                  ]
5035                }
5036              ]
5037            }
5038          ]
5039        }"#;
5040
5041        let tl = TeaLeaf::from_json_with_schemas(input).unwrap();
5042        eprintln!("=== schemas ({}) ===", tl.schemas.len());
5043        for (name, schema) in &tl.schemas {
5044            let fields: Vec<String> = schema.fields.iter()
5045                .map(|f| format!("{}: {}{}{}", f.name, f.field_type.base,
5046                    if f.field_type.is_array { "[]" } else { "" },
5047                    if f.field_type.nullable { "?" } else { "" }))
5048                .collect();
5049            eprintln!("  @struct {name} ({})", fields.join(", "));
5050        }
5051        let tl_text = tl.to_tl_with_schemas();
5052        eprintln!("=== TL text ===\n{tl_text}");
5053
5054        let reparsed = TeaLeaf::parse(&tl_text)
5055            .unwrap_or_else(|e| panic!("Re-parse failed: {e}\nTL text:\n{tl_text}"));
5056        for (key, orig_val) in &tl.data {
5057            let re_val = reparsed.data.get(key).unwrap_or_else(|| panic!("lost key '{key}'"));
5058            assert_eq!(orig_val, re_val, "value mismatch for key '{key}'");
5059        }
5060    }
5061
5062    #[test]
5063    fn json_inference_nested_array_inside_object() {
5064        // JSON inference must discover array schemas inside nested objects.
5065        // e.g., items[].product.stock[] should get its own @struct stock schema,
5066        // not fall back to []any.
5067        let input = r#"{
5068          "items": [
5069            {
5070              "name": "Widget",
5071              "product": {
5072                "id": "P-1",
5073                "stock": [
5074                  {"warehouse": "W1", "qty": 100, "backordered": false},
5075                  {"warehouse": "W2", "qty": 50, "backordered": true}
5076                ]
5077              }
5078            },
5079            {
5080              "name": "Gadget",
5081              "product": {
5082                "id": "P-2",
5083                "stock": [
5084                  {"warehouse": "W1", "qty": 200, "backordered": false}
5085                ]
5086              }
5087            }
5088          ]
5089        }"#;
5090
5091        let tl = TeaLeaf::from_json_with_schemas(input).unwrap();
5092        let tl_text = tl.to_tl_with_schemas();
5093
5094        // Must have a "stock" schema (from singularize("stock") = "stock")
5095        assert!(tl.schemas.contains_key("stock"),
5096            "Missing 'stock' schema. Schemas: {:?}\nTL:\n{tl_text}",
5097            tl.schemas.keys().collect::<Vec<_>>());
5098
5099        // The product schema must reference stock[] not []any
5100        let product_schema = tl.schemas.get("product").expect("missing product schema");
5101        let stock_field = product_schema.fields.iter().find(|f| f.name == "stock")
5102            .expect("product schema missing stock field");
5103        assert!(stock_field.field_type.is_array, "stock should be array");
5104        assert_eq!(stock_field.field_type.base, "stock",
5105            "stock field type should be 'stock', got '{}'", stock_field.field_type.base);
5106
5107        // Must produce @table for items and tuples for stock inside product
5108        assert!(tl_text.contains("@table item"), "Missing @table item:\n{tl_text}");
5109
5110        // Round-trip: parse back and verify data integrity
5111        let reparsed = TeaLeaf::parse(&tl_text)
5112            .unwrap_or_else(|e| panic!("Re-parse failed: {e}\nTL:\n{tl_text}"));
5113        for (key, orig_val) in &tl.data {
5114            let re_val = reparsed.data.get(key).unwrap_or_else(|| panic!("lost key '{key}'"));
5115            assert_eq!(orig_val, re_val, "value mismatch for key '{key}'");
5116        }
5117    }
5118
5119    // ── Compact formatting tests ──────────────────────────────────────
5120
5121    #[test]
5122    fn test_dumps_compact_basic() {
5123        let mut data = IndexMap::new();
5124        data.insert("name".to_string(), Value::String("alice".to_string()));
5125        data.insert("age".to_string(), Value::Int(30));
5126        let output = dumps_compact(&data);
5127        assert!(output.contains("name:alice\n"), "got: {output}");
5128        assert!(output.contains("age:30\n"), "got: {output}");
5129    }
5130
5131    #[test]
5132    fn test_dumps_compact_array() {
5133        let mut data = IndexMap::new();
5134        data.insert("items".to_string(), Value::Array(vec![
5135            Value::Int(1), Value::Int(2), Value::Int(3),
5136        ]));
5137        let output = dumps_compact(&data);
5138        assert!(output.contains("[1,2,3]"), "got: {output}");
5139    }
5140
5141    #[test]
5142    fn test_dumps_compact_object() {
5143        let mut data = IndexMap::new();
5144        let obj: IndexMap<String, Value> = vec![
5145            ("host".to_string(), Value::String("localhost".to_string())),
5146            ("port".to_string(), Value::Int(8080)),
5147        ].into_iter().collect();
5148        data.insert("config".to_string(), Value::Object(obj));
5149        let output = dumps_compact(&data);
5150        assert!(output.contains("{host:localhost,port:8080}"), "got: {output}");
5151    }
5152
5153    #[test]
5154    fn test_dumps_compact_map() {
5155        let mut data = IndexMap::new();
5156        data.insert("m".to_string(), Value::Map(vec![
5157            (Value::Int(1), Value::String("one".to_string())),
5158            (Value::Int(2), Value::String("two".to_string())),
5159        ]));
5160        let output = dumps_compact(&data);
5161        assert!(output.contains("@map{1:one,2:two}"), "got: {output}");
5162    }
5163
5164    #[test]
5165    fn test_dumps_compact_tagged_keeps_space() {
5166        let mut data = IndexMap::new();
5167        data.insert("val".to_string(), Value::Tagged(
5168            "ok".to_string(), Box::new(Value::Int(200)),
5169        ));
5170        let output = dumps_compact(&data);
5171        assert!(output.contains(":ok 200"), "Space after :tag must be kept (tag/value would merge), got: {output}");
5172    }
5173
5174    #[test]
5175    fn test_compact_struct_definition() {
5176        let json = r#"{"users": [{"id": 1, "name": "alice"}, {"id": 2, "name": "bob"}]}"#;
5177        let doc = TeaLeaf::from_json_with_schemas(json).unwrap();
5178        let compact = doc.to_tl_with_schemas_compact();
5179        // Struct def should have no space before ( and no spaces after ,
5180        assert!(compact.contains("@struct user("), "got: {compact}");
5181        assert!(compact.contains("id:int"), "got: {compact}");
5182        // Table should have no space before [
5183        assert!(compact.contains("@table user["), "got: {compact}");
5184        // No indentation on table rows
5185        assert!(compact.contains("\n("), "rows should start at column 0, got: {compact}");
5186        assert!(!compact.contains("  ("), "no indentation in compact, got: {compact}");
5187        // No blank line between definitions and data
5188        assert!(!compact.contains(")\n\n"), "no blank line after struct def, got: {compact}");
5189    }
5190
5191    #[test]
5192    fn test_compact_is_smaller_than_pretty() {
5193        let json = r#"{"users": [{"id": 1, "name": "alice"}, {"id": 2, "name": "bob"}]}"#;
5194        let doc = TeaLeaf::from_json_with_schemas(json).unwrap();
5195        let pretty = doc.to_tl_with_schemas();
5196        let compact = doc.to_tl_with_schemas_compact();
5197        assert!(
5198            compact.len() < pretty.len(),
5199            "Compact ({}) should be smaller than pretty ({})\nCompact:\n{compact}\nPretty:\n{pretty}",
5200            compact.len(), pretty.len()
5201        );
5202    }
5203
5204    #[test]
5205    fn test_compact_roundtrip() {
5206        // Compact output must re-parse to the same data
5207        let json = r#"{
5208            "company": "FastTrack Logistics",
5209            "shipments": [
5210                {"id": "S1", "origin": "Los Angeles, CA", "weight": 250, "cost": 450.0, "delivered": true},
5211                {"id": "S2", "origin": "Chicago, IL", "weight": 180, "cost": 320.0, "delivered": false}
5212            ]
5213        }"#;
5214        let doc = TeaLeaf::from_json_with_schemas(json).unwrap();
5215        let compact = doc.to_tl_with_schemas_compact();
5216        let reparsed = TeaLeaf::parse(&compact)
5217            .unwrap_or_else(|e| panic!("Failed to re-parse compact: {e}\nCompact:\n{compact}"));
5218
5219        let json1 = doc.to_json().unwrap();
5220        let json2 = reparsed.to_json().unwrap();
5221        let v1: serde_json::Value = serde_json::from_str(&json1).unwrap();
5222        let v2: serde_json::Value = serde_json::from_str(&json2).unwrap();
5223        assert_eq!(v1, v2, "Compact round-trip data mismatch");
5224    }
5225
5226    #[test]
5227    fn test_compact_preserves_quoted_strings() {
5228        // Strings with spaces must keep their quotes and content intact
5229        let json = r#"{"items": [{"city": "New York, NY", "name": "Alice Smith"}]}"#;
5230        let doc = TeaLeaf::from_json_with_schemas(json).unwrap();
5231        let compact = doc.to_tl_with_schemas_compact();
5232        assert!(compact.contains("\"New York, NY\""), "Quoted string must be preserved, got: {compact}");
5233        assert!(compact.contains("\"Alice Smith\""), "Quoted string must be preserved, got: {compact}");
5234    }
5235
5236    #[test]
5237    fn test_compact_root_array_single_newline() {
5238        let json = r#"[1, 2, 3]"#;
5239        let doc = TeaLeaf::from_json(json).unwrap();
5240        let compact = doc.to_tl_with_schemas_compact();
5241        assert!(compact.starts_with("@root-array\n"), "got: {compact}");
5242        assert!(!compact.starts_with("@root-array\n\n"), "Should not have double newline in compact, got: {compact}");
5243    }
5244
5245    #[test]
5246    fn test_compact_no_schemas_path() {
5247        // Documents without schemas should also compact correctly
5248        let mut data = IndexMap::new();
5249        let obj: IndexMap<String, Value> = vec![
5250            ("x".to_string(), Value::Int(1)),
5251            ("y".to_string(), Value::Int(2)),
5252        ].into_iter().collect();
5253        data.insert("point".to_string(), Value::Object(obj));
5254        data.insert("label".to_string(), Value::String("origin".to_string()));
5255        let doc = TeaLeaf {
5256            schemas: IndexMap::new(),
5257            unions: IndexMap::new(),
5258            data,
5259            is_root_array: false,
5260        };
5261        let compact = doc.to_tl_with_schemas_compact();
5262        assert!(compact.contains("point:{x:1,y:2}"), "got: {compact}");
5263        assert!(compact.contains("label:origin"), "got: {compact}");
5264    }
5265
5266    #[test]
5267    fn test_compact_canonical_roundtrip() {
5268        // Verify compact output round-trips for all canonical samples
5269        let canonical_dir = std::path::Path::new(env!("CARGO_MANIFEST_DIR")).join("../canonical/samples");
5270        let samples = [
5271            "primitives", "arrays", "objects", "schemas", "timestamps",
5272            "unicode_escaping", "numbers_extended", "refs_tags_maps",
5273            "special_types", "unions", "mixed_schemas", "large_data",
5274        ];
5275        for name in &samples {
5276            let path = canonical_dir.join(format!("{}.tl", name));
5277            if !path.exists() { continue; }
5278            let doc = TeaLeaf::load(&path).unwrap();
5279            let compact = doc.to_tl_with_schemas_compact();
5280            let reparsed = TeaLeaf::parse(&compact)
5281                .unwrap_or_else(|e| panic!("Failed to re-parse compact {name}: {e}\nCompact:\n{compact}"));
5282            let json1 = doc.to_json().unwrap();
5283            let json2 = reparsed.to_json().unwrap();
5284            let v1: serde_json::Value = serde_json::from_str(&json1).unwrap();
5285            let v2: serde_json::Value = serde_json::from_str(&json2).unwrap();
5286            assert_eq!(v1, v2, "Compact round-trip failed for {name}");
5287        }
5288    }
5289}