Skip to main content

tealeaf/
lib.rs

1//! TeaLeaf - Schema-aware data format
2//!
3//! # Example
4//!
5//! ```rust
6//! use tealeaf::{TeaLeaf, Value};
7//!
8//! let doc = TeaLeaf::parse(r#"
9//!     @struct user (id: int, name: string)
10//!     users: @table user [
11//!         (1, alice),
12//!         (2, bob),
13//!     ]
14//! "#).unwrap();
15//!
16//! let users = doc.get("users").unwrap();
17//! ```
18
19mod types;
20mod lexer;
21mod parser;
22mod writer;
23mod reader;
24pub mod convert;
25pub mod builder;
26
27pub use types::{Error, Result, TLType, FieldType, Field, Schema, Union, Variant, Value, ObjectMap, MAGIC, VERSION, VERSION_MAJOR, VERSION_MINOR, HEADER_SIZE, MAX_STRING_LENGTH, MAX_OBJECT_FIELDS, MAX_ARRAY_LENGTH};
28pub use indexmap::IndexMap;
29pub use lexer::{Lexer, Token, TokenKind};
30pub use parser::Parser;
31pub use writer::Writer;
32pub use reader::Reader;
33pub use convert::{ToTeaLeaf, FromTeaLeaf, ConvertError, ToTeaLeafExt};
34pub use builder::TeaLeafBuilder;
35
36// Re-export derive macros when the "derive" feature is enabled
37#[cfg(feature = "derive")]
38pub use tealeaf_derive::{ToTeaLeaf, FromTeaLeaf};
39
40use std::collections::HashSet;
41use std::path::Path;
42
43/// A parsed TeaLeaf document
44pub struct TeaLeaf {
45    pub schemas: IndexMap<String, Schema>,
46    pub unions: IndexMap<String, Union>,
47    pub data: IndexMap<String, Value>,
48    /// Tracks if the source JSON was a root-level array (for round-trip fidelity)
49    is_root_array: bool,
50}
51
52impl TeaLeaf {
53    /// Create a new TeaLeaf document from data and schemas.
54    ///
55    /// This constructor is primarily for programmatic document creation.
56    /// For parsing from formats, use `parse()`, `load()`, or `from_json()`.
57    pub fn new(schemas: IndexMap<String, Schema>, data: IndexMap<String, Value>) -> Self {
58        Self {
59            schemas,
60            unions: IndexMap::new(),
61            data,
62            is_root_array: false,
63        }
64    }
65
66    /// Parse TeaLeaf text format
67    pub fn parse(input: &str) -> Result<Self> {
68        let tokens = Lexer::new(input).tokenize()?;
69        let mut parser = Parser::new(tokens);
70        let data = parser.parse()?;
71        let is_root_array = parser.is_root_array();
72        let (schemas, unions) = parser.into_schemas_and_unions();
73        Ok(Self {
74            schemas,
75            unions,
76            data,
77            is_root_array,
78        })
79    }
80
81    /// Load from text file
82    ///
83    /// Include paths are resolved relative to the loaded file's directory.
84    pub fn load<P: AsRef<Path>>(path: P) -> Result<Self> {
85        let path = path.as_ref();
86        let content = std::fs::read_to_string(path)?;
87        let tokens = Lexer::new(&content).tokenize()?;
88        let mut parser = Parser::new(tokens).with_base_path(path);
89        let data = parser.parse()?;
90        let is_root_array = parser.is_root_array();
91        let (schemas, unions) = parser.into_schemas_and_unions();
92        Ok(Self {
93            schemas,
94            unions,
95            data,
96            is_root_array,
97        })
98    }
99
100    /// Get a value by key
101    pub fn get(&self, key: &str) -> Option<&Value> {
102        self.data.get(key)
103    }
104
105    /// Get a schema by name
106    pub fn schema(&self, name: &str) -> Option<&Schema> {
107        self.schemas.get(name)
108    }
109
110    /// Get a union by name
111    pub fn union(&self, name: &str) -> Option<&Union> {
112        self.unions.get(name)
113    }
114
115    /// Compile to binary format
116    pub fn compile<P: AsRef<Path>>(&self, path: P, compress: bool) -> Result<()> {
117        let mut writer = Writer::new();
118        writer.set_root_array(self.is_root_array);
119        for (_, schema) in &self.schemas {
120            writer.add_schema(schema.clone());
121        }
122        for (_, union_def) in &self.unions {
123            writer.add_union(union_def.clone());
124        }
125        for (key, value) in &self.data {
126            let schema = self.find_schema_for_value(value, key);
127            writer.add_section(key, value, schema)?;
128        }
129        writer.write(path, compress)
130    }
131
132    fn find_schema_for_value(&self, value: &Value, key: &str) -> Option<&Schema> {
133        // Try to find a matching schema for array values
134        if let Value::Array(arr) = value {
135            if arr.is_empty() {
136                // For empty arrays, try name-based matching (singularize key → schema name)
137                let singular = singularize(key);
138                return self.schemas.values().find(|s| s.name.eq_ignore_ascii_case(&singular));
139            }
140
141            // Sample multiple elements: first, middle, last
142            let sample_indices: Vec<usize> = {
143                let mut indices = vec![0];
144                if arr.len() > 2 { indices.push(arr.len() / 2); }
145                if arr.len() > 1 { indices.push(arr.len() - 1); }
146                indices
147            };
148
149            for schema in self.schemas.values() {
150                let all_match = sample_indices.iter().all(|&i| {
151                    if let Some(Value::Object(obj)) = arr.get(i) {
152                        // All required (non-nullable) schema fields must be present
153                        schema.fields.iter().all(|f| {
154                            f.field_type.nullable || obj.contains_key(&f.name)
155                        })
156                        // All obj keys must be schema fields (no extra keys)
157                        && obj.keys().all(|k| schema.fields.iter().any(|f| f.name == *k))
158                    } else {
159                        false
160                    }
161                });
162                if all_match {
163                    return Some(schema);
164                }
165            }
166        }
167        None
168    }
169
170    /// Parse from JSON string.
171    ///
172    /// # Stability Policy
173    ///
174    /// This function follows a **"plain JSON only"** policy:
175    /// - JSON is parsed as-is with **no magic conversion**
176    /// - `{"$ref": "x"}` stays as an Object, NOT a Ref
177    /// - `{"$tag": "ok", "$value": 200}` stays as an Object, NOT a Tagged
178    /// - `"0xcafef00d"` stays as a String, NOT Bytes
179    /// - `"2024-01-15T10:30:00Z"` stays as a String, NOT a Timestamp
180    /// - `[[1, "one"], [2, "two"]]` stays as an Array, NOT a Map
181    ///
182    /// To create special TeaLeaf types, use the text format or binary API directly.
183    ///
184    /// # Number Type Inference
185    ///
186    /// - Integers that fit `i64` → `Value::Int`
187    /// - Large positive integers that fit `u64` → `Value::UInt`
188    /// - Numbers with decimals or scientific notation → `Value::Float`
189    pub fn from_json(json: &str) -> Result<Self> {
190        let json_value: serde_json::Value = serde_json::from_str(json)
191            .map_err(|e| Error::ParseError(format!("Invalid JSON: {}", e)))?;
192
193        let (data, is_root_array) = match json_value {
194            serde_json::Value::Object(obj) => {
195                let map = obj.into_iter()
196                    .map(|(k, v)| (k, json_to_tealeaf_value(v)))
197                    .collect();
198                (map, false)
199            }
200            serde_json::Value::Array(_) => {
201                // Root-level array: store under "root" key but track for round-trip
202                let mut map = IndexMap::new();
203                map.insert("root".to_string(), json_to_tealeaf_value(json_value));
204                (map, true)
205            }
206            _ => {
207                // Other primitives (string, number, bool, null) at root
208                let mut map = IndexMap::new();
209                map.insert("root".to_string(), json_to_tealeaf_value(json_value));
210                (map, false)
211            }
212        };
213
214        Ok(Self {
215            schemas: IndexMap::new(),
216            unions: IndexMap::new(),
217            data,
218            is_root_array,
219        })
220    }
221
222    /// Parse from JSON string with automatic schema inference.
223    ///
224    /// This variant analyzes the JSON structure and automatically:
225    /// - Detects arrays of uniformly-structured objects
226    /// - Infers schema names from parent keys (e.g., "products" → "product")
227    /// - Generates `@struct` definitions for uniform arrays
228    /// - Enables `@table` format output when serialized
229    ///
230    /// Use `to_tl_with_schemas()` to serialize with the inferred schemas.
231    pub fn from_json_with_schemas(json: &str) -> Result<Self> {
232        let doc = Self::from_json(json)?;
233
234        let mut inferrer = SchemaInferrer::new();
235        inferrer.infer(&doc.data);
236        let (schemas, _) = inferrer.into_schemas();
237
238        Ok(Self {
239            schemas,
240            unions: IndexMap::new(),
241            data: doc.data,
242            is_root_array: doc.is_root_array,
243        })
244    }
245
246    /// Serialize to TeaLeaf text format with schemas.
247    ///
248    /// If schemas are present (either from parsing or inference), outputs
249    /// `@struct` definitions and uses `@table` format for matching arrays.
250    ///
251    /// If this document represents a root-level JSON array (from `from_json`),
252    /// the output will include `@root-array` directive for round-trip fidelity.
253    pub fn to_tl_with_schemas(&self) -> String {
254        self.to_tl_with_options(&FormatOptions::default())
255    }
256
257    /// Serialize to compact TeaLeaf text format with schema definitions.
258    /// Removes insignificant whitespace (spaces after `:` and `,`, indentation,
259    /// blank lines) while keeping the format parseable. Table rows remain one
260    /// per line for readability.
261    pub fn to_tl_with_schemas_compact(&self) -> String {
262        self.to_tl_with_options(&FormatOptions::compact())
263    }
264
265    /// Serialize to TeaLeaf text format with custom formatting options.
266    ///
267    /// Use `FormatOptions::compact().with_compact_floats()` for maximum
268    /// token savings (strips whitespace and `.0` from whole-number floats).
269    pub fn to_tl_with_options(&self, opts: &FormatOptions) -> String {
270        let mut output = String::new();
271
272        if self.is_root_array {
273            if opts.compact {
274                output.push_str("@root-array\n");
275            } else {
276                output.push_str("@root-array\n\n");
277            }
278        }
279
280        if self.schemas.is_empty() && self.unions.is_empty() {
281            output.push_str(&dumps_with_options(&self.data, opts));
282        } else {
283            let schema_order: Vec<String> = self.schemas.keys().cloned().collect();
284            let union_order: Vec<String> = self.unions.keys().cloned().collect();
285            output.push_str(&dumps_with_schemas_with_options(
286                &self.data, &self.schemas, &schema_order,
287                &self.unions, &union_order, opts,
288            ));
289        }
290
291        output
292    }
293
294    /// Convert to JSON string (pretty-printed).
295    ///
296    /// # Stability Policy - TeaLeaf→JSON Fixed Representations
297    ///
298    /// Special TeaLeaf types serialize to JSON with these **stable formats**:
299    ///
300    /// | TeaLeaf Type | JSON Format                                    |
301    /// |------------|------------------------------------------------|
302    /// | Bytes      | `"0xcafef00d"` (lowercase hex with 0x prefix) |
303    /// | Timestamp  | `"2024-01-15T10:30:00.123Z"` (ISO 8601 UTC)   |
304    /// | Ref        | `{"$ref": "key_name"}`                         |
305    /// | Tagged     | `{"$tag": "tag_name", "$value": <value>}`     |
306    /// | Map        | `[[key1, val1], [key2, val2], ...]`           |
307    /// | Float NaN  | `null` (JSON has no NaN)                       |
308    /// | Float ±Inf | `null` (JSON has no Infinity)                  |
309    ///
310    /// These representations are **contractually stable** and will not change.
311    pub fn to_json(&self) -> Result<String> {
312        // If the source was a root-level array, return it directly (not wrapped in object)
313        if self.is_root_array {
314            if let Some(root_value) = self.data.get("root") {
315                return serde_json::to_string_pretty(&tealeaf_to_json_value(root_value))
316                    .map_err(|e| Error::ParseError(format!("JSON serialization failed: {}", e)));
317            }
318        }
319
320        let json_obj: serde_json::Map<String, serde_json::Value> = self.data
321            .iter()
322            .map(|(k, v)| (k.clone(), tealeaf_to_json_value(v)))
323            .collect();
324
325        serde_json::to_string_pretty(&serde_json::Value::Object(json_obj))
326            .map_err(|e| Error::ParseError(format!("JSON serialization failed: {}", e)))
327    }
328
329    /// Convert to compact JSON string (no pretty printing)
330    pub fn to_json_compact(&self) -> Result<String> {
331        // If the source was a root-level array, return it directly (not wrapped in object)
332        if self.is_root_array {
333            if let Some(root_value) = self.data.get("root") {
334                return serde_json::to_string(&tealeaf_to_json_value(root_value))
335                    .map_err(|e| Error::ParseError(format!("JSON serialization failed: {}", e)));
336            }
337        }
338
339        let json_obj: serde_json::Map<String, serde_json::Value> = self.data
340            .iter()
341            .map(|(k, v)| (k.clone(), tealeaf_to_json_value(v)))
342            .collect();
343
344        serde_json::to_string(&serde_json::Value::Object(json_obj))
345            .map_err(|e| Error::ParseError(format!("JSON serialization failed: {}", e)))
346    }
347
348    /// Set whether the document represents a root-level array.
349    pub fn set_root_array(&mut self, is_root_array: bool) {
350        self.is_root_array = is_root_array;
351    }
352
353    /// Create a TeaLeaf document from a binary Reader.
354    ///
355    /// Reads all sections from the reader and carries schemas and unions through.
356    pub fn from_reader(reader: &Reader) -> Result<Self> {
357        let mut data = IndexMap::new();
358        for key in reader.keys() {
359            data.insert(key.to_string(), reader.get(key)?);
360        }
361        let schemas: IndexMap<String, Schema> = reader.schemas.iter()
362            .map(|s| (s.name.clone(), s.clone()))
363            .collect();
364        let unions: IndexMap<String, Union> = reader.unions.iter()
365            .map(|u| (u.name.clone(), u.clone()))
366            .collect();
367        let mut doc = Self {
368            schemas,
369            unions,
370            data,
371            is_root_array: reader.is_root_array(),
372        };
373        doc.set_root_array(reader.is_root_array());
374        Ok(doc)
375    }
376
377    /// Create a TeaLeaf document from a single DTO.
378    ///
379    /// The DTO is placed under the given `key` in the document data map.
380    /// Schemas are automatically collected from the DTO type.
381    pub fn from_dto<T: convert::ToTeaLeaf>(key: &str, dto: &T) -> Self {
382        let schemas = T::collect_schemas();
383        let unions = T::collect_unions();
384        let mut data = IndexMap::new();
385        data.insert(key.to_string(), dto.to_tealeaf_value());
386        let mut doc = Self::new(schemas, data);
387        doc.unions = unions;
388        doc
389    }
390
391    /// Create a TeaLeaf document from a slice of DTOs.
392    ///
393    /// The array is placed under the given `key` and schemas are
394    /// collected from the element type.
395    pub fn from_dto_array<T: convert::ToTeaLeaf>(key: &str, items: &[T]) -> Self {
396        let schemas = T::collect_schemas();
397        let unions = T::collect_unions();
398        let mut data = IndexMap::new();
399        let arr = Value::Array(items.iter().map(|i| i.to_tealeaf_value()).collect());
400        data.insert(key.to_string(), arr);
401        let mut doc = Self::new(schemas, data);
402        doc.unions = unions;
403        doc
404    }
405
406    /// Extract a DTO from this document by key.
407    pub fn to_dto<T: convert::FromTeaLeaf>(&self, key: &str) -> Result<T> {
408        let value = self
409            .get(key)
410            .ok_or_else(|| Error::MissingField(key.to_string()))?;
411        T::from_tealeaf_value(value).map_err(|e| e.into())
412    }
413
414    /// Extract all values under a key as `Vec<T>`.
415    pub fn to_dto_vec<T: convert::FromTeaLeaf>(&self, key: &str) -> Result<Vec<T>> {
416        let value = self
417            .get(key)
418            .ok_or_else(|| Error::MissingField(key.to_string()))?;
419        let arr = value
420            .as_array()
421            .ok_or_else(|| Error::ParseError("Expected array".into()))?;
422        arr.iter()
423            .map(|v| T::from_tealeaf_value(v).map_err(|e| e.into()))
424            .collect()
425    }
426}
427
428/// Convert JSON value to TeaLeaf value (best-effort)
429fn json_to_tealeaf_value(json: serde_json::Value) -> Value {
430    match json {
431        serde_json::Value::Null => Value::Null,
432        serde_json::Value::Bool(b) => Value::Bool(b),
433        serde_json::Value::Number(n) => {
434            if let Some(i) = n.as_i64() {
435                Value::Int(i)
436            } else if let Some(u) = n.as_u64() {
437                Value::UInt(u)
438            } else {
439                let raw = n.to_string();
440                // Pure integer that doesn't fit i64/u64 → preserve exactly
441                if !raw.contains('.') && !raw.contains('e') && !raw.contains('E') {
442                    Value::JsonNumber(raw)
443                } else {
444                    match n.as_f64() {
445                        Some(f) if f.is_finite() => Value::Float(f),
446                        _ => Value::JsonNumber(raw),
447                    }
448                }
449            }
450        }
451        serde_json::Value::String(s) => Value::String(s),
452        serde_json::Value::Array(arr) => {
453            Value::Array(arr.into_iter().map(json_to_tealeaf_value).collect())
454        }
455        serde_json::Value::Object(obj) => {
456            Value::Object(
457                obj.into_iter()
458                    .map(|(k, v)| (k, json_to_tealeaf_value(v)))
459                    .collect()
460            )
461        }
462    }
463}
464
465/// Convert TeaLeaf value to JSON value
466///
467/// Type preservation:
468/// - Value::Int → JSON integer (e.g., 42)
469/// - Value::UInt → JSON integer (e.g., 18446744073709551615)
470/// - Value::Float → JSON float (e.g., 42.0)
471///
472/// Integer types are tried first during JSON import (i64, then u64) so that
473/// values within 64-bit range stay exact. Only true floats fall through to f64.
474fn tealeaf_to_json_value(tl: &Value) -> serde_json::Value {
475    match tl {
476        Value::Null => serde_json::Value::Null,
477        Value::Bool(b) => serde_json::Value::Bool(*b),
478        Value::Int(i) => serde_json::Value::Number((*i).into()),
479        Value::UInt(u) => serde_json::Value::Number((*u).into()),
480        Value::Float(f) => {
481            // Always output floats as floats - the type distinction is intentional
482            serde_json::Number::from_f64(*f)
483                .map(serde_json::Value::Number)
484                .unwrap_or(serde_json::Value::Null)
485        }
486        Value::String(s) => serde_json::Value::String(s.clone()),
487        Value::Bytes(b) => {
488            // Encode bytes as hex string with 0x prefix
489            let hex: String = b.iter().map(|byte| format!("{:02x}", byte)).collect();
490            serde_json::Value::String(format!("0x{}", hex))
491        }
492        Value::Array(arr) => {
493            serde_json::Value::Array(arr.iter().map(tealeaf_to_json_value).collect())
494        }
495        Value::Object(obj) => {
496            let map: serde_json::Map<String, serde_json::Value> = obj
497                .iter()
498                .map(|(k, v)| (k.clone(), tealeaf_to_json_value(v)))
499                .collect();
500            serde_json::Value::Object(map)
501        }
502        Value::Map(pairs) => {
503            // Convert map to array of [key, value] pairs
504            let arr: Vec<serde_json::Value> = pairs
505                .iter()
506                .map(|(k, v)| {
507                    serde_json::Value::Array(vec![
508                        tealeaf_to_json_value(k),
509                        tealeaf_to_json_value(v),
510                    ])
511                })
512                .collect();
513            serde_json::Value::Array(arr)
514        }
515        Value::Ref(r) => {
516            // Encode ref as object with special key
517            let mut obj = serde_json::Map::new();
518            obj.insert("$ref".to_string(), serde_json::Value::String(r.clone()));
519            serde_json::Value::Object(obj)
520        }
521        Value::Tagged(tag, inner) => {
522            // Encode tagged value as object
523            let mut obj = serde_json::Map::new();
524            obj.insert("$tag".to_string(), serde_json::Value::String(tag.clone()));
525            obj.insert("$value".to_string(), tealeaf_to_json_value(inner));
526            serde_json::Value::Object(obj)
527        }
528        Value::Timestamp(ts, tz) => {
529            serde_json::Value::String(format_timestamp_millis(*ts, *tz))
530        }
531        Value::JsonNumber(s) => {
532            s.parse::<serde_json::Number>()
533                .map(serde_json::Value::Number)
534                .unwrap_or_else(|_| serde_json::Value::String(s.clone()))
535        }
536    }
537}
538
539/// Read a binary TeaLeaf file
540pub fn open<P: AsRef<Path>>(path: P) -> Result<Reader> {
541    Reader::open(path)
542}
543
544/// Parse TeaLeaf text
545pub fn parse(input: &str) -> Result<TeaLeaf> {
546    TeaLeaf::parse(input)
547}
548
549/// Convenience: load and get data
550pub fn loads(input: &str) -> Result<IndexMap<String, Value>> {
551    Ok(TeaLeaf::parse(input)?.data)
552}
553
554/// Convenience: serialize to TeaLeaf text
555/// Check if a string needs quoting when serialized to TeaLeaf format.
556/// Returns true if the string could be misinterpreted as another type.
557fn needs_quoting(s: &str) -> bool {
558    if s.is_empty() {
559        return true;
560    }
561
562    // Reserved words, null literal, and float literals the lexer would interpret
563    if matches!(s, "true" | "false" | "null" | "~" | "NaN" | "inf" | "Infinity") {
564        return true;
565    }
566
567    // Whitelist approach: only allow [a-zA-Z0-9_-.] unquoted (ASCII only).
568    // Matches spec grammar: name = (letter | "_") { letter | digit | "_" | "-" | "." }
569    // Any other character (Unicode digits, whitespace, punctuation, etc.)
570    // requires quoting to ensure safe round-trip through the parser.
571    // Note: '-' is excluded here because strings starting with '-' are caught
572    // by the sign-character check below, and mid-string '-' in identifiers
573    // like "foo-bar" is safe only when the first char is a letter.
574    if s.contains(|c: char| !c.is_ascii_alphanumeric() && c != '_' && c != '-' && c != '.') {
575        return true;
576    }
577
578    // Must start with letter or underscore per grammar: name = (letter | "_") { ... }
579    let first = s.chars().next().unwrap();
580    if !first.is_ascii_alphabetic() && first != '_' {
581        return true;
582    }
583
584    // Starts with 0x/0b (hex/binary literal prefix)
585    if s.starts_with("0x") || s.starts_with("0X") || s.starts_with("0b") || s.starts_with("0B") {
586        return true;
587    }
588
589    // Starts with sign character — always quote to avoid parser ambiguity
590    // (parser may try to interpret as a signed number).
591    if s.starts_with('-') || s.starts_with('+') {
592        return true;
593    }
594
595    // Starts with a digit — could be parsed as a number
596    if first.is_ascii_digit() {
597        return true;
598    }
599
600    false
601}
602
603/// Write a key to the output, quoting if necessary for safe round-trip.
604fn write_key(out: &mut String, key: &str) {
605    if needs_quoting(key) {
606        out.push('"');
607        out.push_str(&escape_string(key));
608        out.push('"');
609    } else {
610        out.push_str(key);
611    }
612}
613
614/// Write a map key per spec grammar: `map_key = string | name | integer`.
615/// Int/UInt are written as-is. String values use `write_key` for quoting.
616/// Other value types (Null, Bool, Float, etc.) are coerced to quoted strings
617/// so that the text format always round-trips through the parser.
618fn write_map_key(out: &mut String, key: &Value) {
619    match key {
620        Value::Int(i) => out.push_str(&i.to_string()),
621        Value::UInt(u) => out.push_str(&u.to_string()),
622        Value::String(s) => write_key(out, s),
623        // Coerce non-spec key types to quoted strings for text format safety
624        Value::Null => out.push_str("\"~\""),
625        Value::Bool(b) => { out.push('"'); out.push_str(if *b { "true" } else { "false" }); out.push('"'); }
626        Value::Float(f) => { out.push('"'); out.push_str(&f.to_string()); out.push('"'); }
627        Value::JsonNumber(s) => { out.push('"'); out.push_str(s); out.push('"'); }
628        Value::Timestamp(ts, tz) => { out.push('"'); out.push_str(&format_timestamp_millis(*ts, *tz)); out.push('"'); }
629        Value::Bytes(b) => {
630            out.push_str("\"0x");
631            for byte in b { out.push_str(&format!("{:02x}", byte)); }
632            out.push('"');
633        }
634        Value::Ref(r) => { out.push('"'); out.push('!'); out.push_str(r); out.push('"'); }
635        Value::Tagged(tag, _) => { out.push('"'); out.push(':'); out.push_str(tag); out.push('"'); }
636        Value::Array(_) | Value::Object(_) | Value::Map(_) => out.push_str("\"\""),
637    }
638}
639
640/// Options controlling TeaLeaf text output format.
641#[derive(Debug, Clone, Copy, PartialEq, Eq)]
642pub struct FormatOptions {
643    /// Remove insignificant whitespace (spaces after `:` and `,`, indentation, blank lines).
644    pub compact: bool,
645    /// Emit whole-number floats without `.0` suffix (e.g., `42.0` → `42`).
646    /// Saves characters/tokens but changes float→int type on re-parse.
647    pub compact_floats: bool,
648}
649
650impl FormatOptions {
651    /// Pretty-printed output (default).
652    pub fn pretty() -> Self {
653        Self { compact: false, compact_floats: false }
654    }
655
656    /// Compact output (whitespace stripped).
657    pub fn compact() -> Self {
658        Self { compact: true, compact_floats: false }
659    }
660
661    /// Enable compact float formatting (strip `.0` from whole-number floats).
662    pub fn with_compact_floats(mut self) -> Self {
663        self.compact_floats = true;
664        self
665    }
666}
667
668impl Default for FormatOptions {
669    fn default() -> Self {
670        Self::pretty()
671    }
672}
673
674pub fn dumps(data: &IndexMap<String, Value>) -> String {
675    dumps_inner(data, &FormatOptions::default())
676}
677
678/// Serialize data to compact TeaLeaf text format (no schemas).
679/// Removes insignificant whitespace for token-efficient output.
680pub fn dumps_compact(data: &IndexMap<String, Value>) -> String {
681    dumps_inner(data, &FormatOptions::compact())
682}
683
684/// Serialize data to TeaLeaf text format with custom options (no schemas).
685pub fn dumps_with_options(data: &IndexMap<String, Value>, opts: &FormatOptions) -> String {
686    dumps_inner(data, opts)
687}
688
689fn dumps_inner(data: &IndexMap<String, Value>, opts: &FormatOptions) -> String {
690    let mut out = String::new();
691    for (key, value) in data {
692        write_key(&mut out, key);
693        out.push_str(kv_sep(opts.compact));
694        write_value(&mut out, value, 0, opts);
695        out.push('\n');
696    }
697    out
698}
699
700/// Returns ", " in pretty mode, "," in compact mode
701#[inline]
702fn sep(compact: bool) -> &'static str {
703    if compact { "," } else { ", " }
704}
705
706/// Returns ": " in pretty mode, ":" in compact mode.
707#[inline]
708fn kv_sep(compact: bool) -> &'static str {
709    if compact { ":" } else { ": " }
710}
711
712/// Escape a string for TeaLeaf text output.
713/// Handles: \\ \" \n \t \r \b \f and \uXXXX for other control characters.
714fn escape_string(s: &str) -> String {
715    let mut out = String::with_capacity(s.len());
716    for c in s.chars() {
717        match c {
718            '\\' => out.push_str("\\\\"),
719            '"' => out.push_str("\\\""),
720            '\n' => out.push_str("\\n"),
721            '\t' => out.push_str("\\t"),
722            '\r' => out.push_str("\\r"),
723            '\u{0008}' => out.push_str("\\b"),
724            '\u{000C}' => out.push_str("\\f"),
725            c if c.is_control() => {
726                // Other control characters use \uXXXX
727                for unit in c.encode_utf16(&mut [0u16; 2]) {
728                    out.push_str(&format!("\\u{:04x}", unit));
729                }
730            }
731            _ => out.push(c),
732        }
733    }
734    out
735}
736
737/// Format a float ensuring it always has a decimal point or uses scientific notation.
738/// Rust's f64::to_string() expands large/small values (e.g., 6.022e23 becomes
739/// "602200000000000000000000"), which would be reparsed as an integer and overflow.
740/// We use scientific notation for values outside a safe range.
741fn format_float(f: f64, compact_floats: bool) -> String {
742    // Handle non-finite values with keywords the lexer recognizes
743    if f.is_nan() {
744        return "NaN".to_string();
745    }
746    if f.is_infinite() {
747        return if f.is_sign_positive() { "inf".to_string() } else { "-inf".to_string() };
748    }
749
750    let s = f.to_string();
751    if s.contains('.') || s.contains('e') || s.contains('E') {
752        // Already has decimal point or scientific notation — safe as-is
753        s
754    } else {
755        // to_string() produced an integer-looking string (no '.' or 'e').
756        // For large values, use scientific notation to avoid i64 overflow on re-parse.
757        // For small values, append ".0" unless compact_floats is enabled.
758        let digits = s.trim_start_matches('-').len();
759        if digits > 15 {
760            format!("{:e}", f)
761        } else if compact_floats {
762            s
763        } else {
764            format!("{}.0", s)
765        }
766    }
767}
768
769fn write_value(out: &mut String, value: &Value, indent: usize, opts: &FormatOptions) {
770    match value {
771        Value::Null => out.push('~'),
772        Value::Bool(b) => out.push_str(if *b { "true" } else { "false" }),
773        Value::Int(i) => out.push_str(&i.to_string()),
774        Value::UInt(u) => out.push_str(&u.to_string()),
775        Value::JsonNumber(s) => out.push_str(s),
776        Value::Float(f) => out.push_str(&format_float(*f, opts.compact_floats)),
777        Value::String(s) => {
778            if needs_quoting(s) {
779                out.push('"');
780                out.push_str(&escape_string(s));
781                out.push('"');
782            } else {
783                out.push_str(s);
784            }
785        }
786        Value::Bytes(b) => {
787            out.push_str("b\"");
788            for byte in b {
789                out.push_str(&format!("{:02x}", byte));
790            }
791            out.push('"');
792        }
793        Value::Array(arr) => {
794            out.push('[');
795            for (i, v) in arr.iter().enumerate() {
796                if i > 0 { out.push_str(sep(opts.compact)); }
797                write_value(out, v, indent, opts);
798            }
799            out.push(']');
800        }
801        Value::Object(obj) => {
802            out.push('{');
803            for (i, (k, v)) in obj.iter().enumerate() {
804                if i > 0 { out.push_str(sep(opts.compact)); }
805                write_key(out, k);
806                out.push_str(kv_sep(opts.compact));
807                write_value(out, v, indent, opts);
808            }
809            out.push('}');
810        }
811        Value::Map(pairs) => {
812            out.push_str(if opts.compact { "@map{" } else { "@map {" });
813            let mut first = true;
814            for (k, v) in pairs {
815                if !first { out.push_str(sep(opts.compact)); }
816                first = false;
817                // Map keys are restricted to string | name | integer per spec.
818                // Write Int/UInt directly; convert other types to quoted strings.
819                write_map_key(out, k);
820                out.push_str(kv_sep(opts.compact));
821                write_value(out, v, indent, opts);
822            }
823            out.push('}');
824        }
825        Value::Ref(r) => {
826            out.push('!');
827            out.push_str(r);
828        }
829        Value::Tagged(tag, inner) => {
830            out.push(':');
831            out.push_str(tag);
832            out.push(' ');
833            write_value(out, inner, indent, opts);
834        }
835        Value::Timestamp(ts, tz) => {
836            out.push_str(&format_timestamp_millis(*ts, *tz));
837        }
838    }
839}
840
841/// Format a Unix-millis timestamp as an ISO 8601 string with timezone offset.
842/// Handles negative timestamps (pre-epoch dates) correctly using Euclidean division.
843/// Years outside [0000, 9999] are clamped to the boundary per spec (4-digit years only).
844/// When tz_offset_minutes is 0, emits 'Z' suffix. Otherwise emits +HH:MM or -HH:MM.
845fn format_timestamp_millis(ts: i64, tz_offset_minutes: i16) -> String {
846    // Clamp to representable ISO 8601 range (years 0000-9999).
847    // Year 0000-01-01T00:00:00Z = -62167219200000 ms
848    // Year 9999-12-31T23:59:59.999Z = 253402300799999 ms
849    const MIN_TS: i64 = -62_167_219_200_000;
850    const MAX_TS: i64 = 253_402_300_799_999;
851    let ts = ts.clamp(MIN_TS, MAX_TS);
852
853    // Apply timezone offset to get local time for display
854    let local_ts = ts + (tz_offset_minutes as i64) * 60_000;
855    let local_ts = local_ts.clamp(MIN_TS, MAX_TS);
856
857    let secs = local_ts.div_euclid(1000);
858    let millis = local_ts.rem_euclid(1000);
859    let days = secs.div_euclid(86400);
860    let time_secs = secs.rem_euclid(86400);
861    let hours = time_secs / 3600;
862    let mins = (time_secs % 3600) / 60;
863    let secs_rem = time_secs % 60;
864    let (year, month, day) = days_to_ymd(days);
865
866    let tz_suffix = if tz_offset_minutes == 0 {
867        "Z".to_string()
868    } else {
869        let sign = if tz_offset_minutes > 0 { '+' } else { '-' };
870        let abs = tz_offset_minutes.unsigned_abs();
871        format!("{}{:02}:{:02}", sign, abs / 60, abs % 60)
872    };
873
874    if millis > 0 {
875        format!("{:04}-{:02}-{:02}T{:02}:{:02}:{:02}.{:03}{}",
876            year, month, day, hours, mins, secs_rem, millis, tz_suffix)
877    } else {
878        format!("{:04}-{:02}-{:02}T{:02}:{:02}:{:02}{}",
879            year, month, day, hours, mins, secs_rem, tz_suffix)
880    }
881}
882
883/// Convert days since Unix epoch to (year, month, day)
884fn days_to_ymd(days: i64) -> (i64, u32, u32) {
885    // Algorithm from Howard Hinnant (extended to i64 for extreme timestamps)
886    let z = days + 719468;
887    let era = if z >= 0 { z } else { z - 146096 } / 146097;
888    let doe = (z - era * 146097) as u32;
889    let yoe = (doe - doe / 1460 + doe / 36524 - doe / 146096) / 365;
890    let y = yoe as i64 + era * 400;
891    let doy = doe - (365 * yoe + yoe / 4 - yoe / 100);
892    let mp = (5 * doy + 2) / 153;
893    let d = doy - (153 * mp + 2) / 5 + 1;
894    let m = if mp < 10 { mp + 3 } else { mp - 9 };
895    let y = if m <= 2 { y + 1 } else { y };
896    (y, m, d)
897}
898
899// =============================================================================
900// Schema Inference
901// =============================================================================
902
903/// Inferred type information for a field
904#[derive(Debug, Clone, PartialEq)]
905enum InferredType {
906    Null,
907    Bool,
908    Int,
909    Float,
910    String,
911    Array(Box<InferredType>),
912    Object(Vec<(String, InferredType)>),  // Ordered fields
913    Mixed,  // Different types seen - fall back to any
914}
915
916impl InferredType {
917    fn merge(&self, other: &InferredType) -> InferredType {
918        if self == other {
919            return self.clone();
920        }
921        match (self, other) {
922            (InferredType::Null, t) | (t, InferredType::Null) => {
923                // Null + T = T (nullable)
924                t.clone()
925            }
926            (InferredType::Int, InferredType::Float) | (InferredType::Float, InferredType::Int) => {
927                InferredType::Float
928            }
929            (InferredType::Array(a), InferredType::Array(b)) => {
930                InferredType::Array(Box::new(a.merge(b)))
931            }
932            (InferredType::Object(a), InferredType::Object(b)) => {
933                // Merge objects: keep fields present in both, track nullability
934                let mut merged = Vec::new();
935                let b_map: IndexMap<&str, &InferredType> = b.iter().map(|(k, v)| (k.as_str(), v)).collect();
936
937                for (key, a_type) in a {
938                    if let Some(b_type) = b_map.get(key.as_str()) {
939                        merged.push((key.clone(), a_type.merge(b_type)));
940                    }
941                    // Fields only in a are dropped (not uniform)
942                }
943
944                // Check if structures are compatible (same fields)
945                if merged.len() == a.len() && merged.len() == b.len() {
946                    InferredType::Object(merged)
947                } else {
948                    InferredType::Mixed
949                }
950            }
951            _ => InferredType::Mixed,
952        }
953    }
954
955    fn to_field_type(&self, schemas: &IndexMap<String, Schema>) -> FieldType {
956        match self {
957            InferredType::Null => FieldType::new("string").nullable(),  // Unknown type, default to string
958            InferredType::Bool => FieldType::new("bool"),
959            InferredType::Int => FieldType::new("int"),
960            InferredType::Float => FieldType::new("float"),
961            InferredType::String => FieldType::new("string"),
962            InferredType::Array(inner) => {
963                let inner_type = inner.to_field_type(schemas);
964                FieldType {
965                    base: inner_type.base,
966                    nullable: inner_type.nullable,
967                    is_array: true,
968                }
969            }
970            InferredType::Object(fields) => {
971                // Check if this matches an existing schema
972                for (name, schema) in schemas {
973                    if schema.fields.len() == fields.len() {
974                        let all_match = schema.fields.iter().all(|sf| {
975                            fields.iter().any(|(k, _)| k == &sf.name)
976                        });
977                        if all_match {
978                            return FieldType::new(name.clone());
979                        }
980                    }
981                }
982                // No matching schema — use "any" (not "object", which is a
983                // value-only type rejected by the parser in schema definitions)
984                FieldType::new("any")
985            }
986            InferredType::Mixed => FieldType::new("any"),
987        }
988    }
989}
990
991fn infer_type(value: &Value) -> InferredType {
992    match value {
993        Value::Null => InferredType::Null,
994        Value::Bool(_) => InferredType::Bool,
995        Value::Int(_) | Value::UInt(_) => InferredType::Int,
996        Value::Float(_) => InferredType::Float,
997        Value::String(_) => InferredType::String,
998        Value::Array(arr) => {
999            if arr.is_empty() {
1000                InferredType::Array(Box::new(InferredType::Mixed))
1001            } else {
1002                let mut element_type = infer_type(&arr[0]);
1003                for item in arr.iter().skip(1) {
1004                    element_type = element_type.merge(&infer_type(item));
1005                }
1006                InferredType::Array(Box::new(element_type))
1007            }
1008        }
1009        Value::Object(obj) => {
1010            let fields: Vec<(String, InferredType)> = obj
1011                .iter()
1012                .map(|(k, v)| (k.clone(), infer_type(v)))
1013                .collect();
1014            InferredType::Object(fields)
1015        }
1016        _ => InferredType::Mixed,
1017    }
1018}
1019
1020/// Singularize a plural name (simple heuristic)
1021fn singularize(name: &str) -> String {
1022    let name = name.to_lowercase();
1023    if name.ends_with("ies") {
1024        format!("{}y", &name[..name.len()-3])
1025    } else if name.ends_with("es") && (name.ends_with("sses") || name.ends_with("xes") || name.ends_with("ches") || name.ends_with("shes")) {
1026        name[..name.len()-2].to_string()
1027    } else if name.len() > 1 && name.ends_with('s') && !name.ends_with("ss") {
1028        name[..name.len()-1].to_string()
1029    } else {
1030        name
1031    }
1032}
1033
1034/// Check if array elements are objects that match a schema's structure
1035fn array_matches_schema(arr: &[Value], schema: &Schema) -> bool {
1036    if arr.is_empty() {
1037        return false;
1038    }
1039
1040    // Check if first element is an object
1041    let first = match &arr[0] {
1042        Value::Object(obj) => obj,
1043        _ => return false,
1044    };
1045
1046    // Get schema field names
1047    let schema_fields: HashSet<_> = schema.fields.iter().map(|f| f.name.as_str()).collect();
1048
1049    // Get object keys
1050    let obj_keys: HashSet<_> = first.keys().map(|k| k.as_str()).collect();
1051
1052    // Check if there's significant overlap (at least 50% of schema fields present)
1053    let overlap = schema_fields.intersection(&obj_keys).count();
1054    let required_overlap = schema_fields.len() / 2;
1055
1056    overlap > required_overlap || overlap == schema_fields.len()
1057}
1058
1059/// Schema inferrer that analyzes data and generates schemas
1060pub struct SchemaInferrer {
1061    schemas: IndexMap<String, Schema>,
1062    schema_order: Vec<String>,  // Track order for output
1063}
1064
1065impl SchemaInferrer {
1066    pub fn new() -> Self {
1067        Self {
1068            schemas: IndexMap::new(),
1069            schema_order: Vec::new(),
1070        }
1071    }
1072
1073    /// Analyze data and infer schemas from uniform object arrays
1074    pub fn infer(&mut self, data: &IndexMap<String, Value>) {
1075        for (key, value) in data {
1076            self.analyze_value(key, value);
1077        }
1078    }
1079
1080    fn analyze_value(&mut self, hint_name: &str, value: &Value) {
1081        if let Value::Array(arr) = value {
1082            self.analyze_array(hint_name, arr);
1083        } else if let Value::Object(obj) = value {
1084            // Recursively analyze nested objects
1085            for (k, v) in obj {
1086                self.analyze_value(k, v);
1087            }
1088        }
1089    }
1090
1091    fn analyze_array(&mut self, hint_name: &str, arr: &[Value]) {
1092        if arr.is_empty() {
1093            return;
1094        }
1095
1096        // Check if all elements are objects with the same structure
1097        let first = match &arr[0] {
1098            Value::Object(obj) => obj,
1099            _ => return,
1100        };
1101
1102        // Collect field names from first object (preserving insertion order)
1103        let field_names: Vec<String> = first.keys().cloned().collect();
1104
1105        // Skip schema inference if fields are empty, any field name is empty,
1106        // or the schema name itself needs quoting (it appears unquoted in
1107        // `@struct name(...)` and `@table name [...]`).
1108        // Field names that need quoting are fine — they get quoted in the
1109        // @struct definition, e.g. `@struct root("@type":string, name:string)`.
1110        if field_names.is_empty()
1111            || field_names.iter().any(|n| n.is_empty())
1112            || needs_quoting(hint_name)
1113        {
1114            return;
1115        }
1116
1117        let field_set: std::collections::BTreeSet<&str> = first.keys().map(|k| k.as_str()).collect();
1118
1119        // Verify all objects have the same fields
1120        for item in arr.iter().skip(1) {
1121            if let Value::Object(obj) = item {
1122                let item_set: std::collections::BTreeSet<&str> = obj.keys().map(|k| k.as_str()).collect();
1123                if item_set != field_set {
1124                    return;  // Not uniform
1125                }
1126            } else {
1127                return;  // Not all objects
1128            }
1129        }
1130
1131        // Infer types for each field across all objects
1132        let mut field_types: IndexMap<String, InferredType> = IndexMap::new();
1133        let mut has_null: IndexMap<String, bool> = IndexMap::new();
1134
1135        for item in arr {
1136            if let Value::Object(obj) = item {
1137                for (key, val) in obj {
1138                    let inferred = infer_type(val);
1139                    let is_null = matches!(val, Value::Null);
1140
1141                    *has_null.entry(key.clone()).or_insert(false) |= is_null;
1142
1143                    field_types
1144                        .entry(key.clone())
1145                        .and_modify(|existing| *existing = existing.merge(&inferred))
1146                        .or_insert(inferred);
1147                }
1148            }
1149        }
1150
1151        // Generate schema name from hint
1152        let schema_name = singularize(hint_name);
1153
1154        // Skip if schema already exists
1155        if self.schemas.contains_key(&schema_name) {
1156            return;
1157        }
1158
1159        // Recursively analyze nested fields in field order (depth-first).
1160        // Single pass processes arrays and objects as encountered, matching
1161        // the derive path's field-declaration-order traversal.
1162        for field_name in &field_names {
1163            // Check the first object's value for this field
1164            if let Value::Object(first_obj) = &arr[0] {
1165                match first_obj.get(field_name) {
1166                    Some(Value::Array(nested)) => {
1167                        // Arrays are always analyzed — same-name recursion
1168                        // (e.g., nodes[].nodes[]) is safe because depth-first
1169                        // ensures the inner schema is created first.
1170                        self.analyze_array(field_name, nested);
1171                    }
1172                    Some(Value::Object(_)) => {
1173                        // Skip object fields whose singularized name collides
1174                        // with this array's schema name — prevents
1175                        // self-referencing schemas (e.g., @struct root (root: root)).
1176                        if singularize(field_name) == schema_name {
1177                            continue;
1178                        }
1179
1180                        let nested_objects: Vec<&IndexMap<String, Value>> = arr
1181                            .iter()
1182                            .filter_map(|item| {
1183                                if let Value::Object(obj) = item {
1184                                    if let Some(Value::Object(nested)) = obj.get(field_name) {
1185                                        return Some(nested);
1186                                    }
1187                                }
1188                                None
1189                            })
1190                            .collect();
1191
1192                        if !nested_objects.is_empty() {
1193                            self.analyze_nested_objects(field_name, &nested_objects);
1194                        }
1195                    }
1196                    _ => {}
1197                }
1198            }
1199        }
1200
1201        // Re-check: recursive nested analysis (both arrays and objects) may have
1202        // claimed this schema name. This happens when the same field name appears
1203        // at multiple nesting levels (e.g., "nodes" containing "nodes"). The inner
1204        // schema was created first (depth-first); preserve it to avoid overwriting
1205        // with a different structure.
1206        if self.schemas.contains_key(&schema_name) {
1207            return;
1208        }
1209
1210        // Build schema
1211        let mut schema = Schema::new(&schema_name);
1212
1213        // Use insertion order from first object
1214        for field_name in &field_names {
1215            if let Some(inferred) = field_types.get(field_name) {
1216                let mut field_type = inferred.to_field_type(&self.schemas);
1217
1218                // Mark as nullable if any null values seen
1219                if has_null.get(field_name).copied().unwrap_or(false) {
1220                    field_type.nullable = true;
1221                }
1222
1223                // Check if there's a nested schema for array fields
1224                if let Value::Object(first_obj) = &arr[0] {
1225                    if let Some(Value::Array(nested_arr)) = first_obj.get(field_name) {
1226                        let nested_schema_name = singularize(field_name);
1227                        if let Some(nested_schema) = self.schemas.get(&nested_schema_name) {
1228                            // Verify array elements are objects matching the schema structure
1229                            if array_matches_schema(nested_arr, nested_schema) {
1230                                field_type = FieldType {
1231                                    base: nested_schema_name,
1232                                    nullable: field_type.nullable,
1233                                    is_array: true,
1234                                };
1235                            }
1236                        }
1237                    }
1238                }
1239
1240                // Check if there's a nested schema for object fields
1241                // (skip self-references: field singularizing to the schema being built)
1242                let nested_schema_name = singularize(field_name);
1243                if nested_schema_name != schema_name && self.schemas.contains_key(&nested_schema_name) {
1244                    if matches!(inferred, InferredType::Object(_)) {
1245                        field_type = FieldType {
1246                            base: nested_schema_name,
1247                            nullable: field_type.nullable,
1248                            is_array: false,
1249                        };
1250                    }
1251                }
1252
1253                schema.add_field(field_name, field_type);
1254            }
1255        }
1256
1257        self.schema_order.push(schema_name.clone());
1258        self.schemas.insert(schema_name, schema);
1259    }
1260
1261    /// Analyze a collection of nested objects (from the same field across array items)
1262    /// and create a schema if they have uniform structure
1263    fn analyze_nested_objects(&mut self, field_name: &str, objects: &[&IndexMap<String, Value>]) {
1264        if objects.is_empty() {
1265            return;
1266        }
1267
1268        // Get field names from first object (preserving insertion order)
1269        let first = objects[0];
1270        let nested_field_names: Vec<String> = first.keys().cloned().collect();
1271
1272        // Compute schema name early so we can check if it needs quoting
1273        let schema_name = singularize(field_name);
1274
1275        // Skip empty objects, empty field names, or when the schema name itself
1276        // needs quoting (it appears unquoted in `@struct name(...)` and `@table name [...]`).
1277        // Field names that need quoting are fine — they get quoted in the definition.
1278        if nested_field_names.is_empty()
1279            || nested_field_names.iter().any(|n| n.is_empty())
1280            || needs_quoting(&schema_name)
1281        {
1282            return;
1283        }
1284
1285        let field_set: std::collections::BTreeSet<&str> = first.keys().map(|k| k.as_str()).collect();
1286
1287        // Check if all objects have the same fields
1288        for obj in objects.iter().skip(1) {
1289            let obj_set: std::collections::BTreeSet<&str> = obj.keys().map(|k| k.as_str()).collect();
1290            if obj_set != field_set {
1291                return; // Not uniform
1292            }
1293        }
1294
1295        // Skip if schema already exists
1296        if self.schemas.contains_key(&schema_name) {
1297            return;
1298        }
1299
1300        // Infer field types across all objects
1301        let mut field_types: IndexMap<String, InferredType> = IndexMap::new();
1302        let mut has_null: IndexMap<String, bool> = IndexMap::new();
1303
1304        for obj in objects {
1305            for (key, val) in *obj {
1306                let inferred = infer_type(val);
1307                let is_null = matches!(val, Value::Null);
1308
1309                *has_null.entry(key.clone()).or_insert(false) |= is_null;
1310
1311                field_types
1312                    .entry(key.clone())
1313                    .and_modify(|existing| *existing = existing.merge(&inferred))
1314                    .or_insert(inferred);
1315            }
1316        }
1317
1318        // Recursively analyze nested fields in field order (depth-first).
1319        // Single pass mirrors the derive path's field-declaration-order traversal,
1320        // so CLI and Builder API produce schemas in the same order.
1321        for nested_field in &nested_field_names {
1322            if let Some(Value::Array(nested_arr)) = objects[0].get(nested_field) {
1323                self.analyze_array(nested_field, nested_arr);
1324            } else {
1325                let deeper_objects: Vec<&IndexMap<String, Value>> = objects
1326                    .iter()
1327                    .filter_map(|obj| {
1328                        if let Some(Value::Object(nested)) = obj.get(nested_field) {
1329                            Some(nested)
1330                        } else {
1331                            None
1332                        }
1333                    })
1334                    .collect();
1335
1336                if !deeper_objects.is_empty() {
1337                    self.analyze_nested_objects(nested_field, &deeper_objects);
1338                }
1339            }
1340        }
1341
1342        // Build schema
1343        let mut schema = Schema::new(&schema_name);
1344
1345        for nested_field in &nested_field_names {
1346            if let Some(inferred) = field_types.get(nested_field) {
1347                let mut field_type = inferred.to_field_type(&self.schemas);
1348
1349                if has_null.get(nested_field).copied().unwrap_or(false) {
1350                    field_type.nullable = true;
1351                }
1352
1353                // Check if this field has a nested array schema
1354                if matches!(inferred, InferredType::Array(_)) {
1355                    if let Some(Value::Array(nested_arr)) = objects[0].get(nested_field) {
1356                        let nested_schema_name = singularize(nested_field);
1357                        if let Some(nested_schema) = self.schemas.get(&nested_schema_name) {
1358                            if array_matches_schema(nested_arr, nested_schema) {
1359                                field_type = FieldType {
1360                                    base: nested_schema_name,
1361                                    nullable: field_type.nullable,
1362                                    is_array: true,
1363                                };
1364                            }
1365                        }
1366                    }
1367                }
1368
1369                // Check if this field has a nested object schema
1370                if let Some(nested_schema) = self.schemas.get(&singularize(nested_field)) {
1371                    if matches!(inferred, InferredType::Object(_)) {
1372                        field_type = FieldType::new(nested_schema.name.clone());
1373                    }
1374                }
1375
1376                schema.add_field(nested_field, field_type);
1377            }
1378        }
1379
1380        self.schema_order.push(schema_name.clone());
1381        self.schemas.insert(schema_name, schema);
1382    }
1383
1384    pub fn into_schemas(self) -> (IndexMap<String, Schema>, Vec<String>) {
1385        (self.schemas, self.schema_order)
1386    }
1387}
1388
1389impl Default for SchemaInferrer {
1390    fn default() -> Self {
1391        Self::new()
1392    }
1393}
1394
1395/// Serialize data to TeaLeaf text format with schemas
1396pub fn dumps_with_schemas(
1397    data: &IndexMap<String, Value>,
1398    schemas: &IndexMap<String, Schema>,
1399    schema_order: &[String],
1400    unions: &IndexMap<String, Union>,
1401    union_order: &[String],
1402) -> String {
1403    dumps_with_schemas_inner(data, schemas, schema_order, unions, union_order, &FormatOptions::default())
1404}
1405
1406/// Serialize data to compact TeaLeaf text format with schemas.
1407/// Removes insignificant whitespace for token-efficient output.
1408pub fn dumps_with_schemas_compact(
1409    data: &IndexMap<String, Value>,
1410    schemas: &IndexMap<String, Schema>,
1411    schema_order: &[String],
1412    unions: &IndexMap<String, Union>,
1413    union_order: &[String],
1414) -> String {
1415    dumps_with_schemas_inner(data, schemas, schema_order, unions, union_order, &FormatOptions::compact())
1416}
1417
1418/// Serialize data to TeaLeaf text format with schemas and custom options.
1419pub fn dumps_with_schemas_with_options(
1420    data: &IndexMap<String, Value>,
1421    schemas: &IndexMap<String, Schema>,
1422    schema_order: &[String],
1423    unions: &IndexMap<String, Union>,
1424    union_order: &[String],
1425    opts: &FormatOptions,
1426) -> String {
1427    dumps_with_schemas_inner(data, schemas, schema_order, unions, union_order, opts)
1428}
1429
1430fn dumps_with_schemas_inner(
1431    data: &IndexMap<String, Value>,
1432    schemas: &IndexMap<String, Schema>,
1433    schema_order: &[String],
1434    unions: &IndexMap<String, Union>,
1435    union_order: &[String],
1436    opts: &FormatOptions,
1437) -> String {
1438    let mut out = String::new();
1439    let mut has_definitions = false;
1440
1441    // Write union definitions first (before structs, since structs may reference unions)
1442    for name in union_order {
1443        if let Some(union) = unions.get(name) {
1444            out.push_str("@union ");
1445            out.push_str(&union.name);
1446            out.push_str(if opts.compact { "{\n" } else { " {\n" });
1447            for (vi, variant) in union.variants.iter().enumerate() {
1448                if !opts.compact { out.push_str("  "); }
1449                out.push_str(&variant.name);
1450                out.push_str(if opts.compact { "(" } else { " (" });
1451                for (fi, field) in variant.fields.iter().enumerate() {
1452                    if fi > 0 {
1453                        out.push_str(sep(opts.compact));
1454                    }
1455                    out.push_str(&field.name);
1456                    out.push_str(kv_sep(opts.compact));
1457                    out.push_str(&field.field_type.to_string());
1458                }
1459                out.push(')');
1460                if vi < union.variants.len() - 1 {
1461                    out.push(',');
1462                }
1463                out.push('\n');
1464            }
1465            out.push_str("}\n");
1466            has_definitions = true;
1467        }
1468    }
1469
1470    // Write struct schemas in order
1471    for name in schema_order {
1472        if let Some(schema) = schemas.get(name) {
1473            out.push_str("@struct ");
1474            out.push_str(&schema.name);
1475            out.push_str(if opts.compact { "(" } else { " (" });
1476            for (i, field) in schema.fields.iter().enumerate() {
1477                if i > 0 {
1478                    out.push_str(sep(opts.compact));
1479                }
1480                write_key(&mut out, &field.name);
1481                out.push_str(kv_sep(opts.compact));
1482                out.push_str(&field.field_type.to_string());
1483            }
1484            out.push_str(")\n");
1485            has_definitions = true;
1486        }
1487    }
1488
1489    if has_definitions && !opts.compact {
1490        out.push('\n');
1491    }
1492
1493    // Write data (preserves insertion order)
1494    for (key, value) in data {
1495        write_key(&mut out, key);
1496        out.push_str(kv_sep(opts.compact));
1497        write_value_with_schemas(&mut out, value, schemas, Some(key), 0, None, opts);
1498        out.push('\n');
1499    }
1500
1501    out
1502}
1503
1504/// Resolve a schema for a value by trying three strategies in order:
1505/// 1. Declared type from parent schema's field type (exact match)
1506/// 2. Singularize the field key name (works for JSON-inference schemas)
1507/// 3. Case-insensitive singularize (handles derive-macro PascalCase names)
1508fn resolve_schema<'a>(
1509    schemas: &'a IndexMap<String, Schema>,
1510    declared_type: Option<&str>,
1511    hint_name: Option<&str>,
1512) -> Option<&'a Schema> {
1513    // 1. Direct lookup by declared type from parent schema
1514    if let Some(name) = declared_type {
1515        if let Some(s) = schemas.get(name) {
1516            return Some(s);
1517        }
1518    }
1519    // 2. Singularize heuristic (existing behavior for JSON-inference schemas)
1520    if let Some(hint) = hint_name {
1521        let singular = singularize(hint);
1522        if let Some(s) = schemas.get(&singular) {
1523            return Some(s);
1524        }
1525        // 3. Case-insensitive singularize (for derive-macro PascalCase names)
1526        let singular_lower = singular.to_ascii_lowercase();
1527        for (name, schema) in schemas {
1528            if name.to_ascii_lowercase() == singular_lower {
1529                return Some(schema);
1530            }
1531        }
1532    }
1533    None
1534}
1535
1536fn write_value_with_schemas(
1537    out: &mut String,
1538    value: &Value,
1539    schemas: &IndexMap<String, Schema>,
1540    hint_name: Option<&str>,
1541    indent: usize,
1542    declared_type: Option<&str>,
1543    opts: &FormatOptions,
1544) {
1545    match value {
1546        Value::Null => out.push('~'),
1547        Value::Bool(b) => out.push_str(if *b { "true" } else { "false" }),
1548        Value::Int(i) => out.push_str(&i.to_string()),
1549        Value::UInt(u) => out.push_str(&u.to_string()),
1550        Value::JsonNumber(s) => out.push_str(s),
1551        Value::Float(f) => out.push_str(&format_float(*f, opts.compact_floats)),
1552        Value::String(s) => {
1553            if needs_quoting(s) {
1554                out.push('"');
1555                out.push_str(&escape_string(s));
1556                out.push('"');
1557            } else {
1558                out.push_str(s);
1559            }
1560        }
1561        Value::Bytes(b) => {
1562            out.push_str("b\"");
1563            for byte in b {
1564                out.push_str(&format!("{:02x}", byte));
1565            }
1566            out.push('"');
1567        }
1568        Value::Array(arr) => {
1569            // Check if this array can use @table format.
1570            // Try name-based resolution first, then structural matching as fallback.
1571            let mut schema = resolve_schema(schemas, declared_type, hint_name);
1572
1573            // Structural fallback: if name-based resolution failed, find a schema
1574            // whose fields exactly match the first element's object keys.
1575            // This handles Builder-path documents where the top-level key name
1576            // (e.g., "orders") doesn't match the schema name (e.g., "SalesOrder").
1577            if schema.is_none() {
1578                if let Some(Value::Object(first_obj)) = arr.first() {
1579                    let obj_keys: HashSet<&str> = first_obj.keys().map(|k| k.as_str()).collect();
1580                    for (_, candidate) in schemas {
1581                        let schema_fields: HashSet<&str> = candidate.fields.iter().map(|f| f.name.as_str()).collect();
1582                        if schema_fields == obj_keys {
1583                            schema = Some(candidate);
1584                            break;
1585                        }
1586                    }
1587                }
1588            }
1589
1590            if let Some(schema) = schema {
1591                // Verify the first element is an object whose fields match the schema.
1592                // A name-only lookup isn't enough — if the same field name appears at
1593                // multiple nesting levels with different shapes, the schema may belong
1594                // to a different level. Applying the wrong schema drops unmatched keys.
1595                let schema_matches = if let Some(Value::Object(first_obj)) = arr.first() {
1596                    let schema_fields: HashSet<&str> = schema.fields.iter().map(|f| f.name.as_str()).collect();
1597                    let obj_keys: HashSet<&str> = first_obj.keys().map(|k| k.as_str()).collect();
1598                    schema_fields == obj_keys
1599                } else {
1600                    false
1601                };
1602
1603                if schema_matches {
1604                    out.push_str("@table ");
1605                    out.push_str(&schema.name);
1606                    out.push_str(if opts.compact { "[\n" } else { " [\n" });
1607
1608                    let inner_indent = if opts.compact { 0 } else { indent + 2 };
1609                    for (i, item) in arr.iter().enumerate() {
1610                        if !opts.compact {
1611                            for _ in 0..inner_indent {
1612                                out.push(' ');
1613                            }
1614                        }
1615                        write_tuple(out, item, schema, schemas, inner_indent, opts);
1616                        if i < arr.len() - 1 {
1617                            out.push(',');
1618                        }
1619                        out.push('\n');
1620                    }
1621
1622                    if !opts.compact {
1623                        for _ in 0..indent {
1624                            out.push(' ');
1625                        }
1626                    }
1627                    out.push(']');
1628                    return;
1629                }
1630            }
1631
1632            // Fall back to regular array format
1633            out.push('[');
1634            for (i, v) in arr.iter().enumerate() {
1635                if i > 0 {
1636                    out.push_str(sep(opts.compact));
1637                }
1638                write_value_with_schemas(out, v, schemas, None, indent, None, opts);
1639            }
1640            out.push(']');
1641        }
1642        Value::Object(obj) => {
1643            // Find the schema for this object so we can propagate field types to children.
1644            // Try name-based resolution first, then structural matching as fallback.
1645            let mut obj_schema = resolve_schema(schemas, declared_type, hint_name);
1646
1647            if obj_schema.is_none() {
1648                let obj_keys: HashSet<&str> = obj.keys().map(|k| k.as_str()).collect();
1649                for (_, candidate) in schemas {
1650                    let schema_fields: HashSet<&str> = candidate.fields.iter().map(|f| f.name.as_str()).collect();
1651                    if schema_fields == obj_keys {
1652                        obj_schema = Some(candidate);
1653                        break;
1654                    }
1655                }
1656            }
1657
1658            out.push('{');
1659            for (i, (k, v)) in obj.iter().enumerate() {
1660                if i > 0 {
1661                    out.push_str(sep(opts.compact));
1662                }
1663                write_key(out, k);
1664                out.push_str(kv_sep(opts.compact));
1665                // Look up this field's declared type from the parent schema
1666                let field_type = obj_schema.and_then(|s| {
1667                    s.fields.iter()
1668                        .find(|f| f.name == *k)
1669                        .map(|f| f.field_type.base.as_str())
1670                });
1671                write_value_with_schemas(out, v, schemas, Some(k), indent, field_type, opts);
1672            }
1673            out.push('}');
1674        }
1675        Value::Map(pairs) => {
1676            out.push_str(if opts.compact { "@map{" } else { "@map {" });
1677            let mut first = true;
1678            for (k, v) in pairs {
1679                if !first {
1680                    out.push_str(sep(opts.compact));
1681                }
1682                first = false;
1683                write_map_key(out, k);
1684                out.push_str(kv_sep(opts.compact));
1685                write_value_with_schemas(out, v, schemas, None, indent, None, opts);
1686            }
1687            out.push('}');
1688        }
1689        Value::Ref(r) => {
1690            out.push('!');
1691            out.push_str(r);
1692        }
1693        Value::Tagged(tag, inner) => {
1694            out.push(':');
1695            out.push_str(tag);
1696            out.push(' ');
1697            write_value_with_schemas(out, inner, schemas, None, indent, None, opts);
1698        }
1699        Value::Timestamp(ts, tz) => {
1700            out.push_str(&format_timestamp_millis(*ts, *tz));
1701        }
1702    }
1703}
1704
1705fn write_tuple(
1706    out: &mut String,
1707    value: &Value,
1708    schema: &Schema,
1709    schemas: &IndexMap<String, Schema>,
1710    indent: usize,
1711    opts: &FormatOptions,
1712) {
1713    if let Value::Object(obj) = value {
1714        out.push('(');
1715        for (i, field) in schema.fields.iter().enumerate() {
1716            if i > 0 {
1717                out.push_str(sep(opts.compact));
1718            }
1719            if let Some(v) = obj.get(&field.name) {
1720                let type_base = field.field_type.base.as_str();
1721                // For array fields with a known schema type, write tuples directly without @table
1722                if field.field_type.is_array {
1723                    if let Some(item_schema) = resolve_schema(schemas, Some(type_base), None) {
1724                        // The schema defines the element type - write array with tuples directly
1725                        write_schema_array(out, v, item_schema, schemas, indent, opts);
1726                    } else {
1727                        // No schema for element type - use regular array format
1728                        write_value_with_schemas(out, v, schemas, None, indent, None, opts);
1729                    }
1730                } else if resolve_schema(schemas, Some(type_base), None).is_some() {
1731                    // Non-array field with schema type - write as nested tuple
1732                    let nested_schema = resolve_schema(schemas, Some(type_base), None).unwrap();
1733                    write_tuple(out, v, nested_schema, schemas, indent, opts);
1734                } else {
1735                    write_value_with_schemas(out, v, schemas, None, indent, None, opts);
1736                }
1737            } else {
1738                out.push('~');
1739            }
1740        }
1741        out.push(')');
1742    } else {
1743        write_value_with_schemas(out, value, schemas, None, indent, None, opts);
1744    }
1745}
1746
1747/// Write an array of schema-typed values as tuples (without @table annotation)
1748fn write_schema_array(
1749    out: &mut String,
1750    value: &Value,
1751    schema: &Schema,
1752    schemas: &IndexMap<String, Schema>,
1753    indent: usize,
1754    opts: &FormatOptions,
1755) {
1756    if let Value::Array(arr) = value {
1757        if arr.is_empty() {
1758            out.push_str("[]");
1759            return;
1760        }
1761
1762        out.push_str("[\n");
1763        let inner_indent = if opts.compact { 0 } else { indent + 2 };
1764        for (i, item) in arr.iter().enumerate() {
1765            if !opts.compact {
1766                for _ in 0..inner_indent {
1767                    out.push(' ');
1768                }
1769            }
1770            write_tuple(out, item, schema, schemas, inner_indent, opts);
1771            if i < arr.len() - 1 {
1772                out.push(',');
1773            }
1774            out.push('\n');
1775        }
1776        if !opts.compact {
1777            for _ in 0..indent {
1778                out.push(' ');
1779            }
1780        }
1781        out.push(']');
1782    } else {
1783        // Not an array - fall back to regular value writing
1784        write_value_with_schemas(out, value, schemas, None, indent, None, opts);
1785    }
1786}
1787
1788#[cfg(test)]
1789mod tests {
1790    use super::*;
1791
1792    #[test]
1793    fn test_serde_json_number_behavior() {
1794        // Test how serde_json handles different number formats
1795        let json_str = r#"{"int": 42, "float_whole": 42.0, "float_frac": 42.5}"#;
1796        let parsed: serde_json::Value = serde_json::from_str(json_str).unwrap();
1797
1798        if let serde_json::Value::Object(obj) = parsed {
1799            let int_num = obj.get("int").unwrap().as_number().unwrap();
1800            let float_whole = obj.get("float_whole").unwrap().as_number().unwrap();
1801            let float_frac = obj.get("float_frac").unwrap().as_number().unwrap();
1802
1803            println!("int (42): is_i64={}, is_u64={}, is_f64={}",
1804                int_num.is_i64(), int_num.is_u64(), int_num.is_f64());
1805            println!("float_whole (42.0): is_i64={}, is_u64={}, is_f64={}",
1806                float_whole.is_i64(), float_whole.is_u64(), float_whole.is_f64());
1807            println!("float_frac (42.5): is_i64={}, is_u64={}, is_f64={}",
1808                float_frac.is_i64(), float_frac.is_u64(), float_frac.is_f64());
1809
1810            // Assert expected behavior
1811            assert!(int_num.is_i64(), "42 should be parsed as i64");
1812            assert!(float_whole.is_f64(), "42.0 should be parsed as f64");
1813            assert!(float_frac.is_f64(), "42.5 should be parsed as f64");
1814        }
1815
1816        // Test how Rust formats floats
1817        println!("Rust float formatting:");
1818        println!("  42.0f64.to_string() = '{}'", 42.0f64.to_string());
1819        println!("  42.5f64.to_string() = '{}'", 42.5f64.to_string());
1820
1821        // This is the problem! Rust's to_string() drops the .0
1822        // We need to ensure floats always have a decimal point
1823    }
1824
1825    #[test]
1826    fn test_parse_simple() {
1827        let doc = TeaLeaf::parse(r#"
1828            name: alice
1829            age: 30
1830            active: true
1831        "#).unwrap();
1832        
1833        assert_eq!(doc.get("name").unwrap().as_str(), Some("alice"));
1834        assert_eq!(doc.get("age").unwrap().as_int(), Some(30));
1835        assert_eq!(doc.get("active").unwrap().as_bool(), Some(true));
1836    }
1837
1838    #[test]
1839    fn test_parse_struct() {
1840        let doc = TeaLeaf::parse(r#"
1841            @struct user (id: int, name: string, email: string?)
1842            users: @table user [
1843                (1, alice, "alice@test.com"),
1844                (2, bob, ~),
1845            ]
1846        "#).unwrap();
1847        
1848        let schema = doc.schema("user").unwrap();
1849        assert_eq!(schema.fields.len(), 3);
1850        assert!(schema.fields[2].field_type.nullable);
1851        
1852        let users = doc.get("users").unwrap().as_array().unwrap();
1853        assert_eq!(users.len(), 2);
1854    }
1855
1856    #[test]
1857    fn test_nested_struct() {
1858        let doc = TeaLeaf::parse(r#"
1859            @struct address (city: string, zip: string)
1860            @struct user (id: int, name: string, home: address)
1861            users: @table user [
1862                (1, alice, (Berlin, "10115")),
1863                (2, bob, (Paris, "75001")),
1864            ]
1865        "#).unwrap();
1866        
1867        let users = doc.get("users").unwrap().as_array().unwrap();
1868        let alice = users[0].as_object().unwrap();
1869        let home = alice.get("home").unwrap().as_object().unwrap();
1870        assert_eq!(home.get("city").unwrap().as_str(), Some("Berlin"));
1871    }
1872
1873    #[test]
1874    fn test_three_level_nesting() {
1875        let doc = TeaLeaf::parse(r#"
1876            @struct method (type: string, last4: string)
1877            @struct payment (amount: float, method: method)
1878            @struct order (id: int, payment: payment)
1879            orders: @table order [
1880                (1, (99.99, (credit, "4242"))),
1881            ]
1882        "#).unwrap();
1883        
1884        let orders = doc.get("orders").unwrap().as_array().unwrap();
1885        let order = orders[0].as_object().unwrap();
1886        let payment = order.get("payment").unwrap().as_object().unwrap();
1887        let method = payment.get("method").unwrap().as_object().unwrap();
1888        assert_eq!(method.get("type").unwrap().as_str(), Some("credit"));
1889    }
1890
1891    #[test]
1892    fn test_json_roundtrip_basic() {
1893        let json = r#"{"name":"alice","age":30,"active":true,"score":95.5}"#;
1894        let doc = TeaLeaf::from_json(json).unwrap();
1895
1896        assert_eq!(doc.get("name").unwrap().as_str(), Some("alice"));
1897        assert_eq!(doc.get("age").unwrap().as_int(), Some(30));
1898        assert_eq!(doc.get("active").unwrap().as_bool(), Some(true));
1899        assert_eq!(doc.get("score").unwrap().as_float(), Some(95.5));
1900
1901        // Round-trip back to JSON
1902        let json_out = doc.to_json().unwrap();
1903        assert!(json_out.contains("\"name\":\"alice\"") || json_out.contains("\"name\": \"alice\""));
1904    }
1905
1906    #[test]
1907    fn test_json_roundtrip_root_array() {
1908        // Root-level arrays should round-trip without wrapping
1909        let json = r#"[{"id":"0001","type":"donut","name":"Cake"},{"id":"0002","type":"donut","name":"Raised"}]"#;
1910        let doc = TeaLeaf::from_json(json).unwrap();
1911
1912        // Internally stored under "root" key
1913        let root = doc.get("root").unwrap();
1914        let arr = root.as_array().unwrap();
1915        assert_eq!(arr.len(), 2);
1916
1917        // Round-trip should produce the array directly, NOT {"root": [...]}
1918        let json_out = doc.to_json_compact().unwrap();
1919        assert!(json_out.starts_with('['), "Root array should serialize directly: {}", json_out);
1920        assert!(json_out.ends_with(']'), "Root array should end with ]: {}", json_out);
1921        assert!(!json_out.contains("\"root\""), "Should NOT wrap in root object: {}", json_out);
1922
1923        // Verify content preserved
1924        assert!(json_out.contains("\"id\":\"0001\"") || json_out.contains("\"id\": \"0001\""));
1925        assert!(json_out.contains("\"name\":\"Cake\"") || json_out.contains("\"name\": \"Cake\""));
1926    }
1927
1928    #[test]
1929    fn test_json_roundtrip_root_array_empty() {
1930        // Empty array should also round-trip correctly
1931        let json = r#"[]"#;
1932        let doc = TeaLeaf::from_json(json).unwrap();
1933
1934        let json_out = doc.to_json_compact().unwrap();
1935        assert_eq!(json_out, "[]", "Empty array should round-trip: {}", json_out);
1936    }
1937
1938    #[test]
1939    fn test_json_roundtrip_root_object_with_root_key() {
1940        // An object that happens to have a "root" key should NOT be confused
1941        let json = r#"{"root":[1,2,3],"other":"value"}"#;
1942        let doc = TeaLeaf::from_json(json).unwrap();
1943
1944        let json_out = doc.to_json_compact().unwrap();
1945        // This was a root object, so it should stay as an object
1946        assert!(json_out.starts_with('{'), "Root object should stay as object: {}", json_out);
1947        assert!(json_out.contains("\"root\""), "root key should be preserved: {}", json_out);
1948        assert!(json_out.contains("\"other\""), "other key should be preserved: {}", json_out);
1949    }
1950
1951    #[test]
1952    fn test_json_export_bytes() {
1953        // Create a document with bytes programmatically
1954        let mut entries = IndexMap::new();
1955        entries.insert("data".to_string(), Value::Bytes(vec![0xca, 0xfe, 0xf0, 0x0d]));
1956        let doc = TeaLeaf { data: entries, schemas: IndexMap::new(), unions: IndexMap::new(), is_root_array: false };
1957
1958        let json = doc.to_json().unwrap();
1959        assert!(json.contains("0xcafef00d"), "Bytes should export as hex string: {}", json);
1960    }
1961
1962    #[test]
1963    fn test_json_export_ref() {
1964        let mut entries = IndexMap::new();
1965        entries.insert("config".to_string(), Value::Ref("base_config".to_string()));
1966        let doc = TeaLeaf { data: entries, schemas: IndexMap::new(), unions: IndexMap::new(), is_root_array: false };
1967
1968        let json = doc.to_json().unwrap();
1969        assert!(json.contains("\"$ref\""), "Ref should export with $ref key: {}", json);
1970        assert!(json.contains("base_config"), "Ref name should be in output: {}", json);
1971    }
1972
1973    #[test]
1974    fn test_json_export_tagged() {
1975        let mut entries = IndexMap::new();
1976        entries.insert("status".to_string(), Value::Tagged("ok".to_string(), Box::new(Value::Int(200))));
1977        let doc = TeaLeaf { data: entries, schemas: IndexMap::new(), unions: IndexMap::new(), is_root_array: false };
1978
1979        let json = doc.to_json().unwrap();
1980        assert!(json.contains("\"$tag\""), "Tagged should export with $tag key: {}", json);
1981        assert!(json.contains("\"ok\""), "Tag name should be in output: {}", json);
1982        assert!(json.contains("\"$value\""), "Tagged should have $value key: {}", json);
1983    }
1984
1985    #[test]
1986    fn test_json_export_map() {
1987        let mut entries = IndexMap::new();
1988        entries.insert("lookup".to_string(), Value::Map(vec![
1989            (Value::Int(1), Value::String("one".to_string())),
1990            (Value::Int(2), Value::String("two".to_string())),
1991        ]));
1992        let doc = TeaLeaf { data: entries, schemas: IndexMap::new(), unions: IndexMap::new(), is_root_array: false };
1993
1994        let json = doc.to_json().unwrap();
1995        // Map exports as array of [key, value] pairs
1996        // Check that the structure contains the key and values (regardless of formatting)
1997        assert!(json.contains("\"lookup\""), "Map key should be in output: {}", json);
1998        assert!(json.contains("\"one\""), "Map values should be in output: {}", json);
1999        assert!(json.contains("\"two\""), "Map values should be in output: {}", json);
2000        // Verify it's an array structure (has nested arrays)
2001        let compact = json.replace(" ", "").replace("\n", "");
2002        assert!(compact.contains("[["), "Map should export as nested array: {}", json);
2003    }
2004
2005    #[test]
2006    fn test_json_export_timestamp() {
2007        let mut entries = IndexMap::new();
2008        // 2024-01-15T10:30:00Z = 1705315800000 ms, but let's verify with a known value
2009        // Use 0 = 1970-01-01T00:00:00Z for simplicity
2010        entries.insert("created".to_string(), Value::Timestamp(0, 0));
2011        let doc = TeaLeaf { data: entries, schemas: IndexMap::new(), unions: IndexMap::new(), is_root_array: false };
2012
2013        let json = doc.to_json().unwrap();
2014        assert!(json.contains("1970-01-01"), "Timestamp should export as ISO 8601 date: {}", json);
2015        assert!(json.contains("00:00:00"), "Timestamp time should be epoch: {}", json);
2016    }
2017
2018    #[test]
2019    fn test_json_import_limitation_ref_becomes_object() {
2020        // JSON with $ref pattern should become a plain object, NOT a Ref value
2021        let json = r#"{"config":{"$ref":"base_config"}}"#;
2022        let doc = TeaLeaf::from_json(json).unwrap();
2023
2024        let config = doc.get("config").unwrap();
2025        // This should be an Object, not a Ref
2026        assert!(config.as_object().is_some(), "JSON $ref should become Object, not Ref");
2027        assert!(config.as_ref_name().is_none(), "JSON $ref should NOT become Ref value");
2028    }
2029
2030    #[test]
2031    fn test_json_import_limitation_tagged_becomes_object() {
2032        // JSON with $tag/$value pattern should become a plain object, NOT a Tagged value
2033        let json = r#"{"status":{"$tag":"ok","$value":200}}"#;
2034        let doc = TeaLeaf::from_json(json).unwrap();
2035
2036        let status = doc.get("status").unwrap();
2037        // This should be an Object, not a Tagged
2038        assert!(status.as_object().is_some(), "JSON $tag should become Object, not Tagged");
2039        assert!(status.as_tagged().is_none(), "JSON $tag should NOT become Tagged value");
2040    }
2041
2042    #[test]
2043    fn test_json_import_limitation_timestamp_becomes_string() {
2044        // ISO 8601 strings in JSON should remain strings, NOT become Timestamp
2045        let json = r#"{"created":"2024-01-15T10:30:00Z"}"#;
2046        let doc = TeaLeaf::from_json(json).unwrap();
2047
2048        let created = doc.get("created").unwrap();
2049        // This should be a String, not a Timestamp
2050        assert!(created.as_str().is_some(), "ISO timestamp string should remain String");
2051        assert!(created.as_timestamp_millis().is_none(), "ISO timestamp should NOT become Timestamp value");
2052    }
2053
2054    // =========================================================================
2055    // JSON ↔ Binary Conversion Tests
2056    // =========================================================================
2057
2058    #[test]
2059    fn test_json_to_binary_roundtrip_primitives() {
2060        use tempfile::NamedTempFile;
2061
2062        let json = r#"{"name":"alice","age":30,"score":95.5,"active":true,"nothing":null}"#;
2063        let doc = TeaLeaf::from_json(json).unwrap();
2064
2065        // Compile to binary
2066        let temp = NamedTempFile::new().unwrap();
2067        let path = temp.path();
2068        doc.compile(path, false).unwrap();
2069
2070        // Read back
2071        let reader = Reader::open(path).unwrap();
2072        assert_eq!(reader.get("name").unwrap().as_str(), Some("alice"));
2073        assert_eq!(reader.get("age").unwrap().as_int(), Some(30));
2074        assert_eq!(reader.get("score").unwrap().as_float(), Some(95.5));
2075        assert_eq!(reader.get("active").unwrap().as_bool(), Some(true));
2076        assert!(reader.get("nothing").unwrap().is_null());
2077    }
2078
2079    #[test]
2080    fn test_json_to_binary_roundtrip_arrays() {
2081        use tempfile::NamedTempFile;
2082
2083        let json = r#"{"numbers":[1,2,3,4,5],"names":["alice","bob","charlie"]}"#;
2084        let doc = TeaLeaf::from_json(json).unwrap();
2085
2086        let temp = NamedTempFile::new().unwrap();
2087        doc.compile(temp.path(), false).unwrap();
2088
2089        let reader = Reader::open(temp.path()).unwrap();
2090
2091        let numbers = reader.get("numbers").unwrap();
2092        let arr = numbers.as_array().unwrap();
2093        assert_eq!(arr.len(), 5);
2094        assert_eq!(arr[0].as_int(), Some(1));
2095        assert_eq!(arr[4].as_int(), Some(5));
2096
2097        let names = reader.get("names").unwrap();
2098        let arr = names.as_array().unwrap();
2099        assert_eq!(arr.len(), 3);
2100        assert_eq!(arr[0].as_str(), Some("alice"));
2101    }
2102
2103    #[test]
2104    fn test_json_to_binary_roundtrip_nested_objects() {
2105        use tempfile::NamedTempFile;
2106
2107        let json = r#"{"user":{"name":"alice","profile":{"bio":"dev","settings":{"theme":"dark"}}}}"#;
2108        let doc = TeaLeaf::from_json(json).unwrap();
2109
2110        let temp = NamedTempFile::new().unwrap();
2111        doc.compile(temp.path(), false).unwrap();
2112
2113        let reader = Reader::open(temp.path()).unwrap();
2114        let user = reader.get("user").unwrap();
2115        let user_obj = user.as_object().unwrap();
2116        assert_eq!(user_obj.get("name").unwrap().as_str(), Some("alice"));
2117
2118        let profile = user_obj.get("profile").unwrap().as_object().unwrap();
2119        assert_eq!(profile.get("bio").unwrap().as_str(), Some("dev"));
2120
2121        let settings = profile.get("settings").unwrap().as_object().unwrap();
2122        assert_eq!(settings.get("theme").unwrap().as_str(), Some("dark"));
2123    }
2124
2125    #[test]
2126    fn test_json_to_binary_with_compression() {
2127        use tempfile::NamedTempFile;
2128
2129        // Create a document with repetitive data to test compression
2130        let mut entries = IndexMap::new();
2131        entries.insert("data".to_string(), Value::String("a".repeat(1000)));
2132        entries.insert("count".to_string(), Value::Int(12345));
2133        let doc = TeaLeaf { data: entries, schemas: IndexMap::new(), unions: IndexMap::new(), is_root_array: false };
2134
2135        let temp = NamedTempFile::new().unwrap();
2136        doc.compile(temp.path(), true).unwrap(); // compressed
2137
2138        let reader = Reader::open(temp.path()).unwrap();
2139        assert_eq!(reader.get("data").unwrap().as_str(), Some("a".repeat(1000).as_str()));
2140        assert_eq!(reader.get("count").unwrap().as_int(), Some(12345));
2141    }
2142
2143    #[test]
2144    fn test_tl_to_binary_preserves_ref() {
2145        use tempfile::NamedTempFile;
2146
2147        let mut entries = IndexMap::new();
2148        entries.insert("base".to_string(), Value::Object(vec![
2149            ("host".to_string(), Value::String("localhost".to_string())),
2150        ].into_iter().collect()));
2151        entries.insert("config".to_string(), Value::Ref("base".to_string()));
2152        let doc = TeaLeaf { data: entries, schemas: IndexMap::new(), unions: IndexMap::new(), is_root_array: false };
2153
2154        let temp = NamedTempFile::new().unwrap();
2155        doc.compile(temp.path(), false).unwrap();
2156
2157        let reader = Reader::open(temp.path()).unwrap();
2158        let config = reader.get("config").unwrap();
2159        assert_eq!(config.as_ref_name(), Some("base"));
2160    }
2161
2162    #[test]
2163    fn test_tl_to_binary_preserves_tagged() {
2164        use tempfile::NamedTempFile;
2165
2166        let mut entries = IndexMap::new();
2167        entries.insert("status".to_string(), Value::Tagged("ok".to_string(), Box::new(Value::Int(200))));
2168        let doc = TeaLeaf { data: entries, schemas: IndexMap::new(), unions: IndexMap::new(), is_root_array: false };
2169
2170        let temp = NamedTempFile::new().unwrap();
2171        doc.compile(temp.path(), false).unwrap();
2172
2173        let reader = Reader::open(temp.path()).unwrap();
2174        let status = reader.get("status").unwrap();
2175        let (tag, value) = status.as_tagged().unwrap();
2176        assert_eq!(tag, "ok");
2177        assert_eq!(value.as_int(), Some(200));
2178    }
2179
2180    #[test]
2181    fn test_tl_to_binary_preserves_map() {
2182        use tempfile::NamedTempFile;
2183
2184        let mut entries = IndexMap::new();
2185        entries.insert("lookup".to_string(), Value::Map(vec![
2186            (Value::Int(1), Value::String("one".to_string())),
2187            (Value::Int(2), Value::String("two".to_string())),
2188        ]));
2189        let doc = TeaLeaf { data: entries, schemas: IndexMap::new(), unions: IndexMap::new(), is_root_array: false };
2190
2191        let temp = NamedTempFile::new().unwrap();
2192        doc.compile(temp.path(), false).unwrap();
2193
2194        let reader = Reader::open(temp.path()).unwrap();
2195        let lookup = reader.get("lookup").unwrap();
2196        let map = lookup.as_map().unwrap();
2197        assert_eq!(map.len(), 2);
2198        assert_eq!(map[0].0.as_int(), Some(1));
2199        assert_eq!(map[0].1.as_str(), Some("one"));
2200    }
2201
2202    #[test]
2203    fn test_tl_to_binary_preserves_bytes() {
2204        use tempfile::NamedTempFile;
2205
2206        let mut entries = IndexMap::new();
2207        entries.insert("data".to_string(), Value::Bytes(vec![0xca, 0xfe, 0xf0, 0x0d]));
2208        let doc = TeaLeaf { data: entries, schemas: IndexMap::new(), unions: IndexMap::new(), is_root_array: false };
2209
2210        let temp = NamedTempFile::new().unwrap();
2211        doc.compile(temp.path(), false).unwrap();
2212
2213        let reader = Reader::open(temp.path()).unwrap();
2214        let data = reader.get("data").unwrap();
2215        assert_eq!(data.as_bytes(), Some(vec![0xca, 0xfe, 0xf0, 0x0d].as_slice()));
2216    }
2217
2218    #[test]
2219    fn test_tl_to_binary_preserves_timestamp() {
2220        use tempfile::NamedTempFile;
2221
2222        let mut entries = IndexMap::new();
2223        entries.insert("created".to_string(), Value::Timestamp(1705315800000, 0)); // 2024-01-15T10:30:00Z
2224        let doc = TeaLeaf { data: entries, schemas: IndexMap::new(), unions: IndexMap::new(), is_root_array: false };
2225
2226        let temp = NamedTempFile::new().unwrap();
2227        doc.compile(temp.path(), false).unwrap();
2228
2229        let reader = Reader::open(temp.path()).unwrap();
2230        let created = reader.get("created").unwrap();
2231        assert_eq!(created.as_timestamp_millis(), Some(1705315800000));
2232    }
2233
2234    #[test]
2235    fn test_json_import_limitation_hex_string_remains_string() {
2236        // Hex strings in JSON should remain strings, NOT become Bytes
2237        let json = r#"{"data":"0xcafef00d"}"#;
2238        let doc = TeaLeaf::from_json(json).unwrap();
2239
2240        let data = doc.get("data").unwrap();
2241        // This should be a String, not Bytes
2242        assert!(data.as_str().is_some(), "Hex string should remain String");
2243        assert_eq!(data.as_str(), Some("0xcafef00d"));
2244        assert!(data.as_bytes().is_none(), "Hex string should NOT become Bytes value");
2245    }
2246
2247    #[test]
2248    fn test_json_import_limitation_array_pairs_remain_array() {
2249        // JSON arrays that look like map pairs should remain arrays, NOT become Maps
2250        let json = r#"{"lookup":[[1,"one"],[2,"two"]]}"#;
2251        let doc = TeaLeaf::from_json(json).unwrap();
2252
2253        let lookup = doc.get("lookup").unwrap();
2254        // This should be an Array, not a Map
2255        assert!(lookup.as_array().is_some(), "Array of pairs should remain Array");
2256        assert!(lookup.as_map().is_none(), "Array of pairs should NOT become Map value");
2257
2258        // Verify structure
2259        let arr = lookup.as_array().unwrap();
2260        assert_eq!(arr.len(), 2);
2261        let first_pair = arr[0].as_array().unwrap();
2262        assert_eq!(first_pair[0].as_int(), Some(1));
2263        assert_eq!(first_pair[1].as_str(), Some("one"));
2264    }
2265
2266    // =========================================================================
2267    // Cross-Language Parity Test
2268    // =========================================================================
2269
2270    #[test]
2271    fn test_cross_language_parity_all_types() {
2272        // This test verifies that Rust JSON export matches expected format
2273        // for ALL special types. The same fixture is tested in .NET.
2274
2275        use tempfile::NamedTempFile;
2276
2277        // Create a document with all special types
2278        let mut data = IndexMap::new();
2279        data.insert("null_val".to_string(), Value::Null);
2280        data.insert("bool_true".to_string(), Value::Bool(true));
2281        data.insert("int_val".to_string(), Value::Int(42));
2282        data.insert("float_val".to_string(), Value::Float(3.14159));
2283        data.insert("string_val".to_string(), Value::String("hello".to_string()));
2284        data.insert("bytes_val".to_string(), Value::Bytes(vec![0xca, 0xfe]));
2285        data.insert("timestamp_val".to_string(), Value::Timestamp(0, 0));
2286        data.insert("array_val".to_string(), Value::Array(vec![Value::Int(1), Value::Int(2)]));
2287        data.insert("object_val".to_string(), Value::Object(
2288            vec![("x".to_string(), Value::Int(1))].into_iter().collect()
2289        ));
2290        data.insert("ref_val".to_string(), Value::Ref("object_val".to_string()));
2291        data.insert("tagged_val".to_string(), Value::Tagged("ok".to_string(), Box::new(Value::Int(200))));
2292        data.insert("map_val".to_string(), Value::Map(vec![
2293            (Value::Int(1), Value::String("one".to_string())),
2294        ]));
2295
2296        let doc = TeaLeaf { data, schemas: IndexMap::new(), unions: IndexMap::new(), is_root_array: false };
2297
2298        // Compile to binary and read back
2299        let temp = NamedTempFile::new().unwrap();
2300        doc.compile(temp.path(), false).unwrap();
2301        let reader = Reader::open(temp.path()).unwrap();
2302
2303        // Verify each type survives binary round-trip
2304        assert!(reader.get("null_val").unwrap().is_null());
2305        assert_eq!(reader.get("bool_true").unwrap().as_bool(), Some(true));
2306        assert_eq!(reader.get("int_val").unwrap().as_int(), Some(42));
2307        assert_eq!(reader.get("float_val").unwrap().as_float(), Some(3.14159));
2308        assert_eq!(reader.get("string_val").unwrap().as_str(), Some("hello"));
2309        assert_eq!(reader.get("bytes_val").unwrap().as_bytes(), Some(&[0xca, 0xfe][..]));
2310        assert_eq!(reader.get("timestamp_val").unwrap().as_timestamp_millis(), Some(0));
2311
2312        let arr = reader.get("array_val").unwrap();
2313        assert_eq!(arr.as_array().unwrap().len(), 2);
2314
2315        let obj = reader.get("object_val").unwrap();
2316        assert!(obj.as_object().is_some());
2317
2318        let ref_val = reader.get("ref_val").unwrap();
2319        assert_eq!(ref_val.as_ref_name(), Some("object_val"));
2320
2321        let tagged = reader.get("tagged_val").unwrap();
2322        let (tag, val) = tagged.as_tagged().unwrap();
2323        assert_eq!(tag, "ok");
2324        assert_eq!(val.as_int(), Some(200));
2325
2326        let map = reader.get("map_val").unwrap();
2327        let pairs = map.as_map().unwrap();
2328        assert_eq!(pairs.len(), 1);
2329
2330        // Verify JSON export format matches expected conventions
2331        let json = doc.to_json().unwrap();
2332
2333        // Bytes should be hex string
2334        assert!(json.contains("0xcafe"), "Bytes should export as hex: {}", json);
2335
2336        // Ref should have $ref key
2337        assert!(json.contains("\"$ref\""), "Ref should have $ref key: {}", json);
2338
2339        // Tagged should have $tag and $value
2340        assert!(json.contains("\"$tag\""), "Tagged should have $tag: {}", json);
2341        assert!(json.contains("\"$value\""), "Tagged should have $value: {}", json);
2342
2343        // Map should be array of pairs (nested arrays)
2344        let compact = json.replace(" ", "").replace("\n", "");
2345        assert!(compact.contains("[["), "Map should export as array of pairs: {}", json);
2346
2347        // Timestamp should be ISO 8601
2348        assert!(json.contains("1970-01-01"), "Timestamp should be ISO 8601: {}", json);
2349    }
2350
2351    // =========================================================================
2352    // JSON Conversion Contract Tests
2353    // =========================================================================
2354    // These tests lock down the exact JSON↔TeaLeaf conversion behavior.
2355    // STABILITY POLICY:
2356    // - Plain JSON roundtrip: MUST be lossless for primitives, arrays, objects
2357    // - TeaLeaf→JSON: Special types have FIXED representations that MUST NOT change
2358    // - JSON→TeaLeaf: No magic parsing; $ref/$tag/hex/ISO8601 stay as plain JSON
2359
2360    mod conversion_contracts {
2361        use super::*;
2362
2363        // --- Plain JSON Roundtrip (STABLE) ---
2364
2365        #[test]
2366        fn contract_null_roundtrip() {
2367            let doc = TeaLeaf::from_json("null").unwrap();
2368            assert!(matches!(doc.get("root").unwrap(), Value::Null));
2369        }
2370
2371        #[test]
2372        fn contract_bool_roundtrip() {
2373            let doc = TeaLeaf::from_json(r#"{"t": true, "f": false}"#).unwrap();
2374            assert_eq!(doc.get("t").unwrap().as_bool(), Some(true));
2375            assert_eq!(doc.get("f").unwrap().as_bool(), Some(false));
2376
2377            let json = doc.to_json_compact().unwrap();
2378            assert!(json.contains("true"));
2379            assert!(json.contains("false"));
2380        }
2381
2382        #[test]
2383        fn contract_integer_roundtrip() {
2384            let doc = TeaLeaf::from_json(r#"{"zero": 0, "pos": 42, "neg": -123}"#).unwrap();
2385            assert_eq!(doc.get("zero").unwrap().as_int(), Some(0));
2386            assert_eq!(doc.get("pos").unwrap().as_int(), Some(42));
2387            assert_eq!(doc.get("neg").unwrap().as_int(), Some(-123));
2388        }
2389
2390        #[test]
2391        fn contract_float_roundtrip() {
2392            let doc = TeaLeaf::from_json(r#"{"pi": 3.14159}"#).unwrap();
2393            let pi = doc.get("pi").unwrap().as_float().unwrap();
2394            assert!((pi - 3.14159).abs() < 0.00001);
2395        }
2396
2397        #[test]
2398        fn contract_string_roundtrip() {
2399            let doc = TeaLeaf::from_json(r#"{"s": "hello world", "u": "日本語", "e": ""}"#).unwrap();
2400            assert_eq!(doc.get("s").unwrap().as_str(), Some("hello world"));
2401            assert_eq!(doc.get("u").unwrap().as_str(), Some("日本語"));
2402            assert_eq!(doc.get("e").unwrap().as_str(), Some(""));
2403        }
2404
2405        #[test]
2406        fn contract_array_roundtrip() {
2407            let doc = TeaLeaf::from_json(r#"{"arr": [1, "two", true, null]}"#).unwrap();
2408            let arr = doc.get("arr").unwrap().as_array().unwrap();
2409            assert_eq!(arr.len(), 4);
2410            assert_eq!(arr[0].as_int(), Some(1));
2411            assert_eq!(arr[1].as_str(), Some("two"));
2412            assert_eq!(arr[2].as_bool(), Some(true));
2413            assert!(matches!(arr[3], Value::Null));
2414        }
2415
2416        #[test]
2417        fn contract_nested_array_roundtrip() {
2418            let doc = TeaLeaf::from_json(r#"{"matrix": [[1, 2], [3, 4]]}"#).unwrap();
2419            let matrix = doc.get("matrix").unwrap().as_array().unwrap();
2420            assert_eq!(matrix.len(), 2);
2421            let row0 = matrix[0].as_array().unwrap();
2422            assert_eq!(row0[0].as_int(), Some(1));
2423            assert_eq!(row0[1].as_int(), Some(2));
2424        }
2425
2426        #[test]
2427        fn contract_object_roundtrip() {
2428            let doc = TeaLeaf::from_json(r#"{"user": {"name": "alice", "age": 30}}"#).unwrap();
2429            let user = doc.get("user").unwrap().as_object().unwrap();
2430            assert_eq!(user.get("name").unwrap().as_str(), Some("alice"));
2431            assert_eq!(user.get("age").unwrap().as_int(), Some(30));
2432        }
2433
2434        // --- TeaLeaf→JSON Fixed Representations (STABLE) ---
2435
2436        #[test]
2437        fn contract_bytes_to_json_hex() {
2438            let mut data = IndexMap::new();
2439            data.insert("b".to_string(), Value::Bytes(vec![0xca, 0xfe, 0xba, 0xbe]));
2440            let doc = TeaLeaf { data, schemas: IndexMap::new(), unions: IndexMap::new(), is_root_array: false };
2441
2442            let json = doc.to_json_compact().unwrap();
2443            // CONTRACT: Bytes serialize as lowercase hex with 0x prefix
2444            assert!(json.contains("\"0xcafebabe\""), "Bytes must be 0x-prefixed hex: {}", json);
2445        }
2446
2447        #[test]
2448        fn contract_bytes_empty_to_json() {
2449            let mut data = IndexMap::new();
2450            data.insert("b".to_string(), Value::Bytes(vec![]));
2451            let doc = TeaLeaf { data, schemas: IndexMap::new(), unions: IndexMap::new(), is_root_array: false };
2452
2453            let json = doc.to_json_compact().unwrap();
2454            // CONTRACT: Empty bytes serialize as "0x"
2455            assert!(json.contains("\"0x\""), "Empty bytes must be \"0x\": {}", json);
2456        }
2457
2458        #[test]
2459        fn contract_timestamp_to_json_iso8601() {
2460            let mut data = IndexMap::new();
2461            // 2024-01-15T10:50:00.123Z (verified milliseconds since epoch)
2462            data.insert("ts".to_string(), Value::Timestamp(1705315800123, 0));
2463            let doc = TeaLeaf { data, schemas: IndexMap::new(), unions: IndexMap::new(), is_root_array: false };
2464
2465            let json = doc.to_json_compact().unwrap();
2466            // CONTRACT: Timestamp serializes as ISO 8601 with milliseconds
2467            assert!(json.contains("2024-01-15T10:50:00.123Z"),
2468                "Timestamp must be ISO 8601 with ms: {}", json);
2469        }
2470
2471        #[test]
2472        fn contract_timestamp_epoch_to_json() {
2473            let mut data = IndexMap::new();
2474            data.insert("ts".to_string(), Value::Timestamp(0, 0));
2475            let doc = TeaLeaf { data, schemas: IndexMap::new(), unions: IndexMap::new(), is_root_array: false };
2476
2477            let json = doc.to_json_compact().unwrap();
2478            // CONTRACT: Unix epoch is 1970-01-01T00:00:00Z (no ms for whole seconds)
2479            assert!(json.contains("1970-01-01T00:00:00Z"),
2480                "Epoch must be 1970-01-01T00:00:00Z: {}", json);
2481        }
2482
2483        #[test]
2484        fn contract_ref_to_json() {
2485            let mut data = IndexMap::new();
2486            data.insert("r".to_string(), Value::Ref("target_key".to_string()));
2487            let doc = TeaLeaf { data, schemas: IndexMap::new(), unions: IndexMap::new(), is_root_array: false };
2488
2489            let json = doc.to_json_compact().unwrap();
2490            // CONTRACT: Ref serializes as {"$ref": "name"}
2491            assert!(json.contains("\"$ref\":\"target_key\"") || json.contains("\"$ref\": \"target_key\""),
2492                "Ref must be {{\"$ref\": \"name\"}}: {}", json);
2493        }
2494
2495        #[test]
2496        fn contract_tagged_to_json() {
2497            let mut data = IndexMap::new();
2498            data.insert("t".to_string(), Value::Tagged("ok".to_string(), Box::new(Value::Int(200))));
2499            let doc = TeaLeaf { data, schemas: IndexMap::new(), unions: IndexMap::new(), is_root_array: false };
2500
2501            let json = doc.to_json_compact().unwrap();
2502            // CONTRACT: Tagged serializes with $tag and $value keys
2503            assert!(json.contains("\"$tag\""), "Tagged must have $tag: {}", json);
2504            assert!(json.contains("\"ok\""), "Tag name must be present: {}", json);
2505            assert!(json.contains("\"$value\""), "Tagged must have $value: {}", json);
2506            assert!(json.contains("200"), "Inner value must be present: {}", json);
2507        }
2508
2509        #[test]
2510        fn contract_tagged_null_value_to_json() {
2511            let mut data = IndexMap::new();
2512            data.insert("t".to_string(), Value::Tagged("none".to_string(), Box::new(Value::Null)));
2513            let doc = TeaLeaf { data, schemas: IndexMap::new(), unions: IndexMap::new(), is_root_array: false };
2514
2515            let json = doc.to_json_compact().unwrap();
2516            // CONTRACT: Tagged with null inner still has $value: null
2517            assert!(json.contains("\"$value\":null") || json.contains("\"$value\": null"),
2518                "Tagged with null must have $value:null: {}", json);
2519        }
2520
2521        #[test]
2522        fn contract_map_to_json_pairs() {
2523            let mut data = IndexMap::new();
2524            data.insert("m".to_string(), Value::Map(vec![
2525                (Value::Int(1), Value::String("one".to_string())),
2526                (Value::Int(2), Value::String("two".to_string())),
2527            ]));
2528            let doc = TeaLeaf { data, schemas: IndexMap::new(), unions: IndexMap::new(), is_root_array: false };
2529
2530            let json = doc.to_json_compact().unwrap();
2531            // CONTRACT: Map serializes as array of [key, value] pairs
2532            assert!(json.contains("[[1,\"one\"],[2,\"two\"]]") ||
2533                    json.contains("[[1, \"one\"], [2, \"two\"]]"),
2534                "Map must be [[k,v],...]: {}", json);
2535        }
2536
2537        #[test]
2538        fn contract_map_empty_to_json() {
2539            let mut data = IndexMap::new();
2540            data.insert("m".to_string(), Value::Map(vec![]));
2541            let doc = TeaLeaf { data, schemas: IndexMap::new(), unions: IndexMap::new(), is_root_array: false };
2542
2543            let json = doc.to_json_compact().unwrap();
2544            // CONTRACT: Empty map serializes as empty array
2545            assert!(json.contains("\"m\":[]") || json.contains("\"m\": []"),
2546                "Empty map must be []: {}", json);
2547        }
2548
2549        // --- JSON→TeaLeaf No Magic (STABLE) ---
2550
2551        #[test]
2552        fn contract_json_dollar_ref_stays_object() {
2553            let doc = TeaLeaf::from_json(r#"{"x": {"$ref": "some_key"}}"#).unwrap();
2554            let x = doc.get("x").unwrap();
2555            // CONTRACT: JSON {"$ref": ...} MUST remain Object, NOT become Ref
2556            assert!(x.as_object().is_some(), "$ref in JSON must stay Object, not become Ref");
2557            assert!(x.as_ref_name().is_none(), "$ref must not auto-convert to Ref type");
2558        }
2559
2560        #[test]
2561        fn contract_json_dollar_tag_stays_object() {
2562            let doc = TeaLeaf::from_json(r#"{"x": {"$tag": "ok", "$value": 200}}"#).unwrap();
2563            let x = doc.get("x").unwrap();
2564            // CONTRACT: JSON {"$tag": ..., "$value": ...} MUST remain Object
2565            assert!(x.as_object().is_some(), "$tag in JSON must stay Object, not become Tagged");
2566            assert!(x.as_tagged().is_none(), "$tag must not auto-convert to Tagged type");
2567        }
2568
2569        #[test]
2570        fn contract_json_hex_string_stays_string() {
2571            let doc = TeaLeaf::from_json(r#"{"x": "0xcafef00d"}"#).unwrap();
2572            let x = doc.get("x").unwrap();
2573            // CONTRACT: Hex strings MUST remain String, NOT become Bytes
2574            assert_eq!(x.as_str(), Some("0xcafef00d"));
2575            assert!(x.as_bytes().is_none(), "Hex string must not auto-convert to Bytes");
2576        }
2577
2578        #[test]
2579        fn contract_json_iso_timestamp_stays_string() {
2580            let doc = TeaLeaf::from_json(r#"{"x": "2024-01-15T10:30:00.000Z"}"#).unwrap();
2581            let x = doc.get("x").unwrap();
2582            // CONTRACT: ISO 8601 strings MUST remain String, NOT become Timestamp
2583            assert_eq!(x.as_str(), Some("2024-01-15T10:30:00.000Z"));
2584            assert!(x.as_timestamp_millis().is_none(), "ISO string must not auto-convert to Timestamp");
2585        }
2586
2587        #[test]
2588        fn contract_json_array_pairs_stays_array() {
2589            let doc = TeaLeaf::from_json(r#"{"x": [[1, "one"], [2, "two"]]}"#).unwrap();
2590            let x = doc.get("x").unwrap();
2591            // CONTRACT: Array of pairs MUST remain Array, NOT become Map
2592            assert!(x.as_array().is_some(), "Array of pairs must stay Array, not become Map");
2593            assert!(x.as_map().is_none(), "Array pairs must not auto-convert to Map");
2594        }
2595
2596        // --- Number Type Inference (STABLE) ---
2597
2598        #[test]
2599        fn contract_number_integer_to_int() {
2600            let doc = TeaLeaf::from_json(r#"{"n": 42}"#).unwrap();
2601            // CONTRACT: Integers that fit i64 become Int
2602            assert!(doc.get("n").unwrap().as_int().is_some());
2603        }
2604
2605        #[test]
2606        fn contract_number_large_to_uint() {
2607            // Max u64 = 18446744073709551615, which doesn't fit i64
2608            let doc = TeaLeaf::from_json(r#"{"n": 18446744073709551615}"#).unwrap();
2609            // CONTRACT: Large positive integers that fit u64 become UInt
2610            assert!(doc.get("n").unwrap().as_uint().is_some());
2611        }
2612
2613        #[test]
2614        fn contract_number_decimal_to_float() {
2615            let doc = TeaLeaf::from_json(r#"{"n": 3.14}"#).unwrap();
2616            // CONTRACT: Numbers with decimals become Float
2617            assert!(doc.get("n").unwrap().as_float().is_some());
2618        }
2619
2620        // --- Edge Cases (STABLE) ---
2621
2622        #[test]
2623        fn contract_float_nan_to_null() {
2624            let mut data = IndexMap::new();
2625            data.insert("f".to_string(), Value::Float(f64::NAN));
2626            let doc = TeaLeaf { data, schemas: IndexMap::new(), unions: IndexMap::new(), is_root_array: false };
2627
2628            let json = doc.to_json_compact().unwrap();
2629            // CONTRACT: NaN serializes as null (JSON has no NaN)
2630            assert!(json.contains("null"), "NaN must serialize as null: {}", json);
2631        }
2632
2633        #[test]
2634        fn contract_float_infinity_to_null() {
2635            let mut data = IndexMap::new();
2636            data.insert("f".to_string(), Value::Float(f64::INFINITY));
2637            let doc = TeaLeaf { data, schemas: IndexMap::new(), unions: IndexMap::new(), is_root_array: false };
2638
2639            let json = doc.to_json_compact().unwrap();
2640            // CONTRACT: Infinity serializes as null (JSON has no Infinity)
2641            assert!(json.contains("null"), "Infinity must serialize as null: {}", json);
2642        }
2643
2644        #[test]
2645        fn contract_deep_nesting_preserved() {
2646            let doc = TeaLeaf::from_json(r#"{"a":{"b":{"c":{"d":{"e":5}}}}}"#).unwrap();
2647            let a = doc.get("a").unwrap().as_object().unwrap();
2648            let b = a.get("b").unwrap().as_object().unwrap();
2649            let c = b.get("c").unwrap().as_object().unwrap();
2650            let d = c.get("d").unwrap().as_object().unwrap();
2651            assert_eq!(d.get("e").unwrap().as_int(), Some(5));
2652        }
2653    }
2654
2655    // =========================================================================
2656    // Schema Inference Tests
2657    // =========================================================================
2658
2659    #[test]
2660    fn test_schema_inference_simple_array() {
2661        let json = r#"{"users": [{"name": "alice", "age": 30}, {"name": "bob", "age": 25}]}"#;
2662        let doc = TeaLeaf::from_json_with_schemas(json).unwrap();
2663
2664        // Should have inferred a "user" schema
2665        let schema = doc.schema("user");
2666        assert!(schema.is_some(), "Should infer 'user' schema from 'users' array");
2667
2668        let schema = schema.unwrap();
2669        assert_eq!(schema.fields.len(), 2);
2670
2671        // Fields should preserve insertion order from JSON
2672        assert_eq!(schema.fields[0].name, "name");
2673        assert_eq!(schema.fields[1].name, "age");
2674
2675        // Data should still be accessible
2676        let users = doc.get("users").unwrap().as_array().unwrap();
2677        assert_eq!(users.len(), 2);
2678        assert_eq!(users[0].as_object().unwrap().get("name").unwrap().as_str(), Some("alice"));
2679    }
2680
2681    #[test]
2682    fn test_schema_inference_nested_arrays() {
2683        let json = r#"{
2684            "orders": [
2685                {"id": 1, "items": [{"sku": "A", "qty": 2}, {"sku": "B", "qty": 1}]},
2686                {"id": 2, "items": [{"sku": "C", "qty": 3}]}
2687            ]
2688        }"#;
2689        let doc = TeaLeaf::from_json_with_schemas(json).unwrap();
2690
2691        // Should infer both "order" and "item" schemas
2692        assert!(doc.schema("order").is_some(), "Should infer 'order' schema");
2693        assert!(doc.schema("item").is_some(), "Should infer 'item' schema");
2694
2695        let order_schema = doc.schema("order").unwrap();
2696        // Order should have "id" and "items" fields
2697        assert!(order_schema.fields.iter().any(|f| f.name == "id"));
2698        assert!(order_schema.fields.iter().any(|f| f.name == "items"));
2699
2700        // The "items" field should reference the "item" schema
2701        let items_field = order_schema.fields.iter().find(|f| f.name == "items").unwrap();
2702        assert!(items_field.field_type.is_array);
2703        assert_eq!(items_field.field_type.base, "item");
2704    }
2705
2706    #[test]
2707    fn test_schema_inference_to_tl_text() {
2708        let json = r#"{"products": [{"name": "Widget", "price": 9.99}, {"name": "Gadget", "price": 19.99}]}"#;
2709        let doc = TeaLeaf::from_json_with_schemas(json).unwrap();
2710
2711        let tl_text = doc.to_tl_with_schemas();
2712
2713        // Should contain struct definition
2714        assert!(tl_text.contains("@struct product"), "TeaLeaf text should contain struct definition");
2715        assert!(tl_text.contains("name: string"), "Struct should have name field");
2716        assert!(tl_text.contains("price: float"), "Struct should have price field");
2717
2718        // Should contain @table directive
2719        assert!(tl_text.contains("@table product"), "TeaLeaf text should use @table for data");
2720
2721        // Should contain tuple format data
2722        assert!(tl_text.contains("Widget") || tl_text.contains("\"Widget\""), "Data should be present");
2723    }
2724
2725    #[test]
2726    fn test_schema_inference_roundtrip() {
2727        let json = r#"{"items": [{"id": 1, "name": "A"}, {"id": 2, "name": "B"}]}"#;
2728        let doc = TeaLeaf::from_json_with_schemas(json).unwrap();
2729
2730        // Convert to TeaLeaf text with schemas
2731        let tl_text = doc.to_tl_with_schemas();
2732
2733        // Parse the TeaLeaf text back
2734        let parsed = TeaLeaf::parse(&tl_text).unwrap();
2735
2736        // Should have the same data
2737        let items = parsed.get("items").unwrap().as_array().unwrap();
2738        assert_eq!(items.len(), 2);
2739        assert_eq!(items[0].as_object().unwrap().get("id").unwrap().as_int(), Some(1));
2740        assert_eq!(items[0].as_object().unwrap().get("name").unwrap().as_str(), Some("A"));
2741
2742        // Should have the schema
2743        assert!(parsed.schema("item").is_some());
2744    }
2745
2746    #[test]
2747    fn test_schema_inference_nullable_fields() {
2748        let json = r#"{"users": [{"name": "alice", "email": "a@test.com"}, {"name": "bob", "email": null}]}"#;
2749        let doc = TeaLeaf::from_json_with_schemas(json).unwrap();
2750
2751        let schema = doc.schema("user").unwrap();
2752        let email_field = schema.fields.iter().find(|f| f.name == "email").unwrap();
2753
2754        // Email should be nullable since one value is null
2755        assert!(email_field.field_type.nullable, "Field with null values should be nullable");
2756    }
2757
2758    #[test]
2759    fn test_schema_inference_nested_tuples_no_redundant_table() {
2760        let json = r#"{
2761            "orders": [
2762                {"id": 1, "items": [{"sku": "A", "qty": 2}]}
2763            ]
2764        }"#;
2765        let doc = TeaLeaf::from_json_with_schemas(json).unwrap();
2766        let tl_text = doc.to_tl_with_schemas();
2767
2768        // Count occurrences of @table - should only appear at top level for each schema-typed array
2769        let _table_count = tl_text.matches("@table").count();
2770
2771        // Should have @table for orders, but NOT redundant @table for nested items
2772        // The nested items array should just be [...] with tuples inside
2773        assert!(tl_text.contains("@table order"), "Should have @table for orders");
2774
2775        // Parse and verify the structure is correct
2776        let parsed = TeaLeaf::parse(&tl_text).unwrap();
2777        let orders = parsed.get("orders").unwrap().as_array().unwrap();
2778        let first_order = orders[0].as_object().unwrap();
2779        let items = first_order.get("items").unwrap().as_array().unwrap();
2780        assert_eq!(items[0].as_object().unwrap().get("sku").unwrap().as_str(), Some("A"));
2781    }
2782
2783    #[test]
2784    fn test_schema_inference_mismatched_arrays_not_matched() {
2785        // Test that arrays with different structures don't incorrectly share schemas
2786        let json = r#"{
2787            "users": [{"id": "U1", "name": "Alice"}],
2788            "products": [{"id": "P1", "price": 9.99}]
2789        }"#;
2790        let doc = TeaLeaf::from_json_with_schemas(json).unwrap();
2791
2792        // Should have separate schemas
2793        assert!(doc.schema("user").is_some());
2794        assert!(doc.schema("product").is_some());
2795
2796        // User schema should have name field
2797        let user_schema = doc.schema("user").unwrap();
2798        assert!(user_schema.fields.iter().any(|f| f.name == "name"));
2799
2800        // Product schema should have price field
2801        let product_schema = doc.schema("product").unwrap();
2802        assert!(product_schema.fields.iter().any(|f| f.name == "price"));
2803    }
2804
2805    #[test]
2806    fn test_schema_inference_special_char_quoting() {
2807        // Test that strings with special characters are properly quoted
2808        let json = r#"{"items": [
2809            {"category": "Electronics/Audio", "email": "test@example.com", "path": "a.b.c"}
2810        ]}"#;
2811        let doc = TeaLeaf::from_json_with_schemas(json).unwrap();
2812        let tl_text = doc.to_tl_with_schemas();
2813
2814        // These should be quoted in output since they contain special characters
2815        assert!(tl_text.contains("\"Electronics/Audio\""), "Slash should be quoted: {}", tl_text);
2816        assert!(tl_text.contains("\"test@example.com\""), "@ should be quoted: {}", tl_text);
2817        // Dots are valid in names per spec grammar, so a.b.c should NOT be quoted
2818        assert!(!tl_text.contains("\"a.b.c\""), "Dots should NOT be quoted per spec grammar: {}", tl_text);
2819
2820        // Should parse back correctly
2821        let parsed = TeaLeaf::parse(&tl_text).unwrap();
2822        let items = parsed.get("items").unwrap().as_array().unwrap();
2823        let item = items[0].as_object().unwrap();
2824        assert_eq!(item.get("category").unwrap().as_str(), Some("Electronics/Audio"));
2825        assert_eq!(item.get("email").unwrap().as_str(), Some("test@example.com"));
2826    }
2827
2828    #[test]
2829    fn test_schema_inference_nested_objects() {
2830        // Test that nested objects within array elements get schemas created
2831        let json = r#"{
2832            "customers": [
2833                {
2834                    "id": 1,
2835                    "name": "Alice",
2836                    "billing_address": {
2837                        "street": "123 Main St",
2838                        "city": "Boston",
2839                        "state": "MA",
2840                        "postal_code": "02101",
2841                        "country": "USA"
2842                    },
2843                    "shipping_address": {
2844                        "street": "456 Oak Ave",
2845                        "city": "Cambridge",
2846                        "state": "MA",
2847                        "postal_code": "02139",
2848                        "country": "USA"
2849                    }
2850                },
2851                {
2852                    "id": 2,
2853                    "name": "Bob",
2854                    "billing_address": {
2855                        "street": "789 Elm St",
2856                        "city": "New York",
2857                        "state": "NY",
2858                        "postal_code": "10001",
2859                        "country": "USA"
2860                    },
2861                    "shipping_address": {
2862                        "street": "789 Elm St",
2863                        "city": "New York",
2864                        "state": "NY",
2865                        "postal_code": "10001",
2866                        "country": "USA"
2867                    }
2868                }
2869            ]
2870        }"#;
2871
2872        let doc = TeaLeaf::from_json_with_schemas(json).unwrap();
2873
2874        // Should have schemas for nested objects
2875        assert!(doc.schema("billing_address").is_some(), "Should create billing_address schema");
2876        assert!(doc.schema("shipping_address").is_some(), "Should create shipping_address schema");
2877        assert!(doc.schema("customer").is_some(), "Should create customer schema");
2878
2879        // Check billing_address schema fields
2880        let billing_schema = doc.schema("billing_address").unwrap();
2881        let billing_fields: Vec<&str> = billing_schema.fields.iter().map(|f| f.name.as_str()).collect();
2882        assert!(billing_fields.contains(&"street"), "billing_address should have street field");
2883        assert!(billing_fields.contains(&"city"), "billing_address should have city field");
2884        assert!(billing_fields.contains(&"state"), "billing_address should have state field");
2885        assert!(billing_fields.contains(&"postal_code"), "billing_address should have postal_code field");
2886        assert!(billing_fields.contains(&"country"), "billing_address should have country field");
2887
2888        // Check customer schema references the nested schemas
2889        let customer_schema = doc.schema("customer").unwrap();
2890        let billing_field = customer_schema.fields.iter().find(|f| f.name == "billing_address").unwrap();
2891        assert_eq!(billing_field.field_type.base, "billing_address", "customer.billing_address should reference billing_address schema");
2892
2893        let shipping_field = customer_schema.fields.iter().find(|f| f.name == "shipping_address").unwrap();
2894        assert_eq!(shipping_field.field_type.base, "shipping_address", "customer.shipping_address should reference shipping_address schema");
2895
2896        // Serialize and verify output
2897        let tl_text = doc.to_tl_with_schemas();
2898        assert!(tl_text.contains("@struct billing_address"), "Output should contain billing_address struct");
2899        assert!(tl_text.contains("@struct shipping_address"), "Output should contain shipping_address struct");
2900        assert!(tl_text.contains("billing_address: billing_address"), "customer should have billing_address field with billing_address type");
2901        assert!(tl_text.contains("shipping_address: shipping_address"), "customer should have shipping_address field with shipping_address type");
2902    }
2903
2904    #[test]
2905    fn test_schema_inference_nested_objects_with_nulls() {
2906        // Test that nested objects handle nullable fields correctly
2907        let json = r#"{
2908            "orders": [
2909                {
2910                    "id": 1,
2911                    "customer": {
2912                        "name": "Alice",
2913                        "phone": "555-1234"
2914                    }
2915                },
2916                {
2917                    "id": 2,
2918                    "customer": {
2919                        "name": "Bob",
2920                        "phone": null
2921                    }
2922                }
2923            ]
2924        }"#;
2925
2926        let doc = TeaLeaf::from_json_with_schemas(json).unwrap();
2927
2928        // Customer schema should exist with nullable phone
2929        let customer_schema = doc.schema("customer").unwrap();
2930        let phone_field = customer_schema.fields.iter().find(|f| f.name == "phone").unwrap();
2931        assert!(phone_field.field_type.nullable, "phone field should be nullable");
2932    }
2933
2934    // =========================================================================
2935    // Coverage: dumps(), write_value(), escape_string(), format_float()
2936    // =========================================================================
2937
2938    #[test]
2939    fn test_dumps_all_value_types() {
2940        let mut data = IndexMap::new();
2941        data.insert("null_val".to_string(), Value::Null);
2942        data.insert("bool_val".to_string(), Value::Bool(true));
2943        data.insert("int_val".to_string(), Value::Int(42));
2944        data.insert("uint_val".to_string(), Value::UInt(999));
2945        data.insert("float_val".to_string(), Value::Float(3.14));
2946        data.insert("str_val".to_string(), Value::String("hello".to_string()));
2947        data.insert("bytes_val".to_string(), Value::Bytes(vec![0xca, 0xfe]));
2948        data.insert("arr_val".to_string(), Value::Array(vec![Value::Int(1), Value::Int(2)]));
2949        data.insert("obj_val".to_string(), Value::Object(
2950            vec![("x".to_string(), Value::Int(1))].into_iter().collect()
2951        ));
2952        data.insert("map_val".to_string(), Value::Map(vec![
2953            (Value::Int(1), Value::String("one".to_string())),
2954        ]));
2955        data.insert("ref_val".to_string(), Value::Ref("target".to_string()));
2956        data.insert("tag_val".to_string(), Value::Tagged("ok".to_string(), Box::new(Value::Int(200))));
2957        data.insert("ts_val".to_string(), Value::Timestamp(0, 0));
2958        data.insert("ts_millis".to_string(), Value::Timestamp(1705315800123, 0));
2959
2960        let output = dumps(&data);
2961
2962        assert!(output.contains("~"), "Should contain null");
2963        assert!(output.contains("true"), "Should contain bool");
2964        assert!(output.contains("42"), "Should contain int");
2965        assert!(output.contains("999"), "Should contain uint");
2966        assert!(output.contains("3.14"), "Should contain float");
2967        assert!(output.contains("hello"), "Should contain string");
2968        assert!(output.contains("b\"cafe\""), "Should contain bytes literal");
2969        assert!(output.contains("[1, 2]"), "Should contain array");
2970        assert!(output.contains("@map {"), "Should contain map");
2971        assert!(output.contains("!target"), "Should contain ref");
2972        assert!(output.contains(":ok 200"), "Should contain tagged");
2973        assert!(output.contains("1970-01-01T00:00:00Z"), "Should contain epoch timestamp");
2974        assert!(output.contains(".123Z"), "Should contain millis timestamp");
2975    }
2976
2977    #[test]
2978    fn test_bytes_literal_text_roundtrip() {
2979        // dumps() emits b"..." → parse() reads it back as Value::Bytes
2980        let mut data = IndexMap::new();
2981        data.insert("payload".to_string(), Value::Bytes(vec![0xca, 0xfe, 0xf0, 0x0d]));
2982        data.insert("empty".to_string(), Value::Bytes(vec![]));
2983
2984        let text = dumps(&data);
2985        assert!(text.contains(r#"b"cafef00d""#), "Should emit b\"...\" literal: {}", text);
2986        assert!(text.contains(r#"b"""#), "Should emit empty bytes literal: {}", text);
2987
2988        // Parse the text back
2989        let doc = TeaLeaf::parse(&text).unwrap();
2990        assert_eq!(doc.data.get("payload").unwrap().as_bytes(), Some(&[0xca, 0xfe, 0xf0, 0x0d][..]));
2991        assert_eq!(doc.data.get("empty").unwrap().as_bytes(), Some(&[][..]));
2992    }
2993
2994    #[test]
2995    fn test_dumps_string_quoting() {
2996        let mut data = IndexMap::new();
2997        data.insert("quoted".to_string(), Value::String("hello world".to_string()));
2998        data.insert("unquoted".to_string(), Value::String("hello".to_string()));
2999        data.insert("reserved_true".to_string(), Value::String("true".to_string()));
3000        data.insert("reserved_null".to_string(), Value::String("null".to_string()));
3001        data.insert("reserved_tilde".to_string(), Value::String("~".to_string()));
3002        data.insert("empty".to_string(), Value::String("".to_string()));
3003        data.insert("at_start".to_string(), Value::String("@directive".to_string()));
3004        data.insert("hash_start".to_string(), Value::String("#comment".to_string()));
3005        data.insert("bang_start".to_string(), Value::String("!ref".to_string()));
3006        data.insert("hex_start".to_string(), Value::String("0xabc".to_string()));
3007        data.insert("number_like".to_string(), Value::String("42abc".to_string()));
3008        data.insert("negative_like".to_string(), Value::String("-5".to_string()));
3009        data.insert("slash".to_string(), Value::String("a/b".to_string()));
3010        data.insert("dot".to_string(), Value::String("a.b".to_string()));
3011
3012        let output = dumps(&data);
3013
3014        // Quoted values should be wrapped in double quotes
3015        assert!(output.contains("\"hello world\""), "Spaces need quoting");
3016        assert!(output.contains("\"true\""), "Reserved word true needs quoting");
3017        assert!(output.contains("\"null\""), "Reserved word null needs quoting");
3018        assert!(output.contains("\"~\""), "Tilde needs quoting");
3019        assert!(output.contains("\"\""), "Empty string needs quoting");
3020        assert!(output.contains("\"@directive\""), "@ prefix needs quoting");
3021        assert!(output.contains("\"#comment\""), "# prefix needs quoting");
3022        assert!(output.contains("\"!ref\""), "! prefix needs quoting");
3023        assert!(output.contains("\"0xabc\""), "0x prefix needs quoting");
3024        assert!(output.contains("\"42abc\""), "Digit start needs quoting");
3025        assert!(output.contains("\"-5\""), "Negative number needs quoting");
3026        assert!(output.contains("\"a/b\""), "Slash needs quoting");
3027        assert!(!output.contains("\"a.b\""), "Dot should NOT need quoting per spec grammar");
3028    }
3029
3030    #[test]
3031    fn test_escape_string_control_chars() {
3032        let result = escape_string("tab\there\nnewline\rreturn");
3033        assert!(result.contains("\\t"), "Tab should be escaped");
3034        assert!(result.contains("\\n"), "Newline should be escaped");
3035        assert!(result.contains("\\r"), "CR should be escaped");
3036
3037        let result = escape_string("\x08backspace\x0cformfeed");
3038        assert!(result.contains("\\b"), "Backspace should be escaped");
3039        assert!(result.contains("\\f"), "Formfeed should be escaped");
3040
3041        let result = escape_string("quote\"and\\backslash");
3042        assert!(result.contains("\\\""), "Quote should be escaped");
3043        assert!(result.contains("\\\\"), "Backslash should be escaped");
3044
3045        // Other control characters use \uXXXX
3046        let result = escape_string("\x01");
3047        assert!(result.contains("\\u0001"), "Control char should use \\uXXXX");
3048    }
3049
3050    #[test]
3051    fn test_format_float_both_branches() {
3052        // Whole number float: Rust's to_string() drops .0, so format_float adds it back
3053        assert_eq!(format_float(42.0, false), "42.0");
3054
3055        // Float with decimals should stay as-is
3056        assert_eq!(format_float(3.14, false), "3.14");
3057
3058        // Scientific notation stays as-is
3059        let very_small = format_float(1e-20, false);
3060        assert!(very_small.contains('e') || very_small.contains('.'));
3061    }
3062
3063    #[test]
3064    fn test_format_float_compact_floats() {
3065        // With compact_floats=true, whole-number floats strip .0
3066        assert_eq!(format_float(42.0, true), "42");
3067        assert_eq!(format_float(0.0, true), "0");
3068        assert_eq!(format_float(17164000000.0, true), "17164000000");
3069        assert_eq!(format_float(35934000000.0, true), "35934000000");
3070        assert_eq!(format_float(-100.0, true), "-100");
3071
3072        // Non-whole floats are unaffected
3073        assert_eq!(format_float(3.14, true), "3.14");
3074        assert_eq!(format_float(0.5, true), "0.5");
3075
3076        // Special values unaffected
3077        assert_eq!(format_float(f64::NAN, true), "NaN");
3078        assert_eq!(format_float(f64::INFINITY, true), "inf");
3079        assert_eq!(format_float(f64::NEG_INFINITY, true), "-inf");
3080
3081        // Very large floats use scientific notation (digits > 15), unaffected
3082        let large = format_float(1e20, true);
3083        assert!(large.contains('e'), "Very large should use scientific: {}", large);
3084    }
3085
3086    #[test]
3087    fn test_dumps_with_compact_floats() {
3088        let mut data = IndexMap::new();
3089        data.insert("revenue".to_string(), Value::Float(35934000000.0));
3090        data.insert("ratio".to_string(), Value::Float(3.14));
3091        data.insert("count".to_string(), Value::Int(42));
3092
3093        // Default: whole floats keep .0
3094        let pretty = dumps(&data);
3095        assert!(pretty.contains("35934000000.0"), "Default should have .0: {}", pretty);
3096
3097        // compact_floats: whole floats stripped
3098        let opts = FormatOptions::compact().with_compact_floats();
3099        let compact = dumps_with_options(&data, &opts);
3100        assert!(compact.contains("35934000000"), "Should have whole number: {}", compact);
3101        assert!(!compact.contains("35934000000.0"), "Should NOT have .0: {}", compact);
3102        assert!(compact.contains("3.14"), "Non-whole float preserved: {}", compact);
3103        assert!(compact.contains("42"), "Int preserved: {}", compact);
3104    }
3105
3106    #[test]
3107    fn test_needs_quoting_various_patterns() {
3108        // Should need quoting
3109        assert!(needs_quoting(""), "Empty string");
3110        assert!(needs_quoting("hello world"), "Whitespace");
3111        assert!(needs_quoting("a,b"), "Comma");
3112        assert!(needs_quoting("(x)"), "Parens");
3113        assert!(needs_quoting("[x]"), "Brackets");
3114        assert!(needs_quoting("{x}"), "Braces");
3115        assert!(needs_quoting("a:b"), "Colon");
3116        assert!(needs_quoting("@x"), "At sign");
3117        assert!(needs_quoting("a/b"), "Slash");
3118        assert!(!needs_quoting("a.b"), "Dot is valid in names per spec grammar");
3119        assert!(needs_quoting("true"), "Reserved true");
3120        assert!(needs_quoting("false"), "Reserved false");
3121        assert!(needs_quoting("null"), "Reserved null");
3122        assert!(needs_quoting("~"), "Reserved tilde");
3123        assert!(needs_quoting("!bang"), "Bang prefix");
3124        assert!(needs_quoting("#hash"), "Hash prefix");
3125        assert!(needs_quoting("0xdead"), "Hex prefix");
3126        assert!(needs_quoting("0Xdead"), "Hex prefix uppercase");
3127        assert!(needs_quoting("42abc"), "Starts with digit");
3128        assert!(needs_quoting("-5"), "Starts with minus+digit");
3129        assert!(needs_quoting("+5"), "Starts with plus+digit");
3130
3131        // Should NOT need quoting
3132        assert!(!needs_quoting("hello"), "Simple word");
3133        assert!(!needs_quoting("foo_bar"), "Underscore word");
3134        assert!(!needs_quoting("abc123"), "Alpha then digits");
3135    }
3136
3137    // =========================================================================
3138    // Coverage: singularize()
3139    // =========================================================================
3140
3141    #[test]
3142    fn test_singularize_rules() {
3143        // -ies → -y
3144        assert_eq!(singularize("categories"), "category");
3145        assert_eq!(singularize("entries"), "entry");
3146
3147        // -sses → -ss (special -es rule)
3148        assert_eq!(singularize("classes"), "class");
3149        assert_eq!(singularize("dresses"), "dress");
3150
3151        // -xes → -x
3152        assert_eq!(singularize("boxes"), "box");
3153        assert_eq!(singularize("indexes"), "index");
3154
3155        // -ches → -ch
3156        assert_eq!(singularize("watches"), "watch");
3157
3158        // -shes → -sh
3159        assert_eq!(singularize("dishes"), "dish");
3160
3161        // Regular -s
3162        assert_eq!(singularize("users"), "user");
3163        assert_eq!(singularize("products"), "product");
3164
3165        // Words ending in -ss (should NOT remove s)
3166        assert_eq!(singularize("boss"), "boss");
3167        assert_eq!(singularize("class"), "class");
3168
3169        // Already singular (no trailing s)
3170        assert_eq!(singularize("item"), "item");
3171        assert_eq!(singularize("child"), "child");
3172    }
3173
3174    // =========================================================================
3175    // Coverage: from_json root primitives, loads()
3176    // =========================================================================
3177
3178    #[test]
3179    fn test_from_json_root_primitive() {
3180        // Root-level string
3181        let doc = TeaLeaf::from_json(r#""hello""#).unwrap();
3182        assert_eq!(doc.get("root").unwrap().as_str(), Some("hello"));
3183        assert!(!doc.is_root_array);
3184
3185        // Root-level number
3186        let doc = TeaLeaf::from_json("42").unwrap();
3187        assert_eq!(doc.get("root").unwrap().as_int(), Some(42));
3188
3189        // Root-level bool
3190        let doc = TeaLeaf::from_json("true").unwrap();
3191        assert_eq!(doc.get("root").unwrap().as_bool(), Some(true));
3192
3193        // Root-level null
3194        let doc = TeaLeaf::from_json("null").unwrap();
3195        assert!(doc.get("root").unwrap().is_null());
3196    }
3197
3198    #[test]
3199    fn test_from_json_invalid() {
3200        let result = TeaLeaf::from_json("not valid json {{{");
3201        assert!(result.is_err());
3202    }
3203
3204    #[test]
3205    fn test_loads_convenience() {
3206        let data = loads("name: alice\nage: 30").unwrap();
3207        assert_eq!(data.get("name").unwrap().as_str(), Some("alice"));
3208        assert_eq!(data.get("age").unwrap().as_int(), Some(30));
3209    }
3210
3211    // =========================================================================
3212    // Coverage: InferredType::merge() branches
3213    // =========================================================================
3214
3215    #[test]
3216    fn test_inferred_type_merge_int_float() {
3217        let t = infer_type(&Value::Int(42));
3218        let f = infer_type(&Value::Float(3.14));
3219        let merged = t.merge(&f);
3220        assert_eq!(merged, InferredType::Float);
3221
3222        // Reverse
3223        let merged = f.merge(&t);
3224        assert_eq!(merged, InferredType::Float);
3225    }
3226
3227    #[test]
3228    fn test_inferred_type_merge_null_with_type() {
3229        let n = InferredType::Null;
3230        let s = InferredType::String;
3231        let merged = n.merge(&s);
3232        assert_eq!(merged, InferredType::String);
3233
3234        // Reverse
3235        let merged = s.merge(&n);
3236        assert_eq!(merged, InferredType::String);
3237    }
3238
3239    #[test]
3240    fn test_inferred_type_merge_arrays() {
3241        let a1 = InferredType::Array(Box::new(InferredType::Int));
3242        let a2 = InferredType::Array(Box::new(InferredType::Float));
3243        let merged = a1.merge(&a2);
3244        assert_eq!(merged, InferredType::Array(Box::new(InferredType::Float)));
3245    }
3246
3247    #[test]
3248    fn test_inferred_type_merge_objects_same_fields() {
3249        let o1 = InferredType::Object(vec![
3250            ("a".to_string(), InferredType::Int),
3251            ("b".to_string(), InferredType::String),
3252        ]);
3253        let o2 = InferredType::Object(vec![
3254            ("a".to_string(), InferredType::Float),
3255            ("b".to_string(), InferredType::String),
3256        ]);
3257        let merged = o1.merge(&o2);
3258        if let InferredType::Object(fields) = &merged {
3259            assert_eq!(fields.len(), 2);
3260            assert_eq!(fields[0].1, InferredType::Float); // Int+Float → Float
3261            assert_eq!(fields[1].1, InferredType::String);
3262        } else {
3263            panic!("Expected Object, got {:?}", merged);
3264        }
3265    }
3266
3267    #[test]
3268    fn test_inferred_type_merge_objects_different_fields() {
3269        let o1 = InferredType::Object(vec![
3270            ("a".to_string(), InferredType::Int),
3271        ]);
3272        let o2 = InferredType::Object(vec![
3273            ("b".to_string(), InferredType::String),
3274        ]);
3275        let merged = o1.merge(&o2);
3276        assert_eq!(merged, InferredType::Mixed);
3277    }
3278
3279    #[test]
3280    fn test_inferred_type_merge_incompatible() {
3281        let s = InferredType::String;
3282        let i = InferredType::Int;
3283        let merged = s.merge(&i);
3284        assert_eq!(merged, InferredType::Mixed);
3285    }
3286
3287    #[test]
3288    fn test_inferred_type_to_field_type() {
3289        let schemas = IndexMap::new();
3290
3291        assert_eq!(InferredType::Null.to_field_type(&schemas).base, "string");
3292        assert!(InferredType::Null.to_field_type(&schemas).nullable);
3293        assert_eq!(InferredType::Bool.to_field_type(&schemas).base, "bool");
3294        assert_eq!(InferredType::Int.to_field_type(&schemas).base, "int");
3295        assert_eq!(InferredType::Float.to_field_type(&schemas).base, "float");
3296        assert_eq!(InferredType::String.to_field_type(&schemas).base, "string");
3297        assert_eq!(InferredType::Mixed.to_field_type(&schemas).base, "any");
3298
3299        // Array type
3300        let arr_type = InferredType::Array(Box::new(InferredType::Int));
3301        let ft = arr_type.to_field_type(&schemas);
3302        assert_eq!(ft.base, "int");
3303        assert!(ft.is_array);
3304
3305        // Object with no matching schema → "any" (not "object", which is a value-only type)
3306        let obj_type = InferredType::Object(vec![("x".to_string(), InferredType::Int)]);
3307        assert_eq!(obj_type.to_field_type(&schemas).base, "any");
3308    }
3309
3310    #[test]
3311    fn test_inferred_type_to_field_type_with_matching_schema() {
3312        let mut schemas = IndexMap::new();
3313        let mut schema = Schema::new("point");
3314        schema.add_field("x", FieldType::new("int"));
3315        schema.add_field("y", FieldType::new("int"));
3316        schemas.insert("point".to_string(), schema);
3317
3318        let obj_type = InferredType::Object(vec![
3319            ("x".to_string(), InferredType::Int),
3320            ("y".to_string(), InferredType::Int),
3321        ]);
3322        let ft = obj_type.to_field_type(&schemas);
3323        assert_eq!(ft.base, "point");
3324    }
3325
3326    #[test]
3327    fn test_infer_type_special_values() {
3328        // Bytes, Ref, Tagged, Timestamp, Map all become Mixed
3329        assert_eq!(infer_type(&Value::Bytes(vec![1, 2])), InferredType::Mixed);
3330        assert_eq!(infer_type(&Value::Ref("x".to_string())), InferredType::Mixed);
3331        assert_eq!(infer_type(&Value::Tagged("t".to_string(), Box::new(Value::Null))), InferredType::Mixed);
3332        assert_eq!(infer_type(&Value::Timestamp(0, 0)), InferredType::Mixed);
3333        assert_eq!(infer_type(&Value::Map(vec![])), InferredType::Mixed);
3334
3335        // Empty array
3336        if let InferredType::Array(inner) = infer_type(&Value::Array(vec![])) {
3337            assert_eq!(*inner, InferredType::Mixed);
3338        } else {
3339            panic!("Expected Array");
3340        }
3341
3342        // UInt becomes Int
3343        assert_eq!(infer_type(&Value::UInt(42)), InferredType::Int);
3344    }
3345
3346    #[test]
3347    fn test_json_with_schemas_empty_nested_object_roundtrip() {
3348        // Regression: fuzzer found that [{"n":{}}] crashes because the inferrer
3349        // emits "object" as a field type, which the parser rejects as value-only.
3350        let doc = TeaLeaf::from_json_with_schemas(r#"[{"n":{}}]"#).unwrap();
3351        let tl_text = doc.to_tl_with_schemas();
3352        // Must re-parse without error
3353        let reparsed = TeaLeaf::parse(&tl_text).unwrap();
3354        assert_eq!(doc.data.len(), reparsed.data.len());
3355    }
3356
3357    // =========================================================================
3358    // Coverage: to_tl_with_schemas() edge cases
3359    // =========================================================================
3360
3361    #[test]
3362    fn test_to_tl_with_schemas_no_schemas() {
3363        let mut data = IndexMap::new();
3364        data.insert("name".to_string(), Value::String("alice".to_string()));
3365        let doc = TeaLeaf { data, schemas: IndexMap::new(), unions: IndexMap::new(), is_root_array: false };
3366
3367        let output = doc.to_tl_with_schemas();
3368        assert!(output.contains("name: alice"), "Should use dumps() format");
3369        assert!(!output.contains("@struct"), "No schemas");
3370    }
3371
3372    #[test]
3373    fn test_to_tl_with_schemas_root_array() {
3374        let mut data = IndexMap::new();
3375        data.insert("root".to_string(), Value::Array(vec![Value::Int(1), Value::Int(2)]));
3376        let doc = TeaLeaf { data, schemas: IndexMap::new(), unions: IndexMap::new(), is_root_array: true };
3377
3378        let output = doc.to_tl_with_schemas();
3379        assert!(output.starts_with("@root-array"), "Should have root-array directive");
3380    }
3381
3382    // =========================================================================
3383    // Coverage: write_value_with_schemas() for special types
3384    // =========================================================================
3385
3386    #[test]
3387    fn test_dumps_with_schemas_all_types() {
3388        let mut schemas = IndexMap::new();
3389        let mut schema = Schema::new("item");
3390        schema.add_field("id", FieldType::new("int"));
3391        schema.add_field("name", FieldType::new("string"));
3392        schemas.insert("item".to_string(), schema);
3393
3394        let mut data = IndexMap::new();
3395        // Array matching schema → @table
3396        data.insert("items".to_string(), Value::Array(vec![
3397            Value::Object(vec![
3398                ("id".to_string(), Value::Int(1)),
3399                ("name".to_string(), Value::String("Widget".to_string())),
3400            ].into_iter().collect()),
3401        ]));
3402        // Special types
3403        data.insert("ref_val".to_string(), Value::Ref("target".to_string()));
3404        data.insert("tag_val".to_string(), Value::Tagged("ok".to_string(), Box::new(Value::Int(200))));
3405        data.insert("map_val".to_string(), Value::Map(vec![
3406            (Value::Int(1), Value::String("one".to_string())),
3407        ]));
3408        data.insert("bytes_val".to_string(), Value::Bytes(vec![0xde, 0xad]));
3409        data.insert("ts_val".to_string(), Value::Timestamp(0, 0));
3410        data.insert("ts_millis".to_string(), Value::Timestamp(1705315800123, 0));
3411
3412        let schema_order = vec!["item".to_string()];
3413        let output = dumps_with_schemas(&data, &schemas, &schema_order, &IndexMap::new(), &[]);
3414
3415        assert!(output.contains("@struct item"), "Should contain schema def");
3416        assert!(output.contains("@table item"), "Should use @table format");
3417        assert!(output.contains("!target"), "Should contain ref");
3418        assert!(output.contains(":ok 200"), "Should contain tagged");
3419        assert!(output.contains("@map {"), "Should contain map");
3420        assert!(output.contains("b\"dead\""), "Should contain bytes literal");
3421        assert!(output.contains("1970-01-01T00:00:00Z"), "Should contain timestamp");
3422        assert!(output.contains(".123Z"), "Should contain millis timestamp");
3423    }
3424
3425    #[test]
3426    fn test_dumps_with_schemas_object_value() {
3427        let schemas = IndexMap::new();
3428        let mut data = IndexMap::new();
3429        data.insert("config".to_string(), Value::Object(
3430            vec![
3431                ("host".to_string(), Value::String("localhost".to_string())),
3432                ("port".to_string(), Value::Int(8080)),
3433            ].into_iter().collect()
3434        ));
3435
3436        let output = dumps_with_schemas(&data, &schemas, &[], &IndexMap::new(), &[]);
3437        assert!(output.contains("config:"), "Should contain key");
3438        assert!(output.contains("{"), "Should contain object");
3439    }
3440
3441    #[test]
3442    fn test_write_tuple_with_nested_schema() {
3443        // Test tuple writing with nested struct fields
3444        let mut schemas = IndexMap::new();
3445
3446        let mut addr = Schema::new("address");
3447        addr.add_field("city", FieldType::new("string"));
3448        addr.add_field("zip", FieldType::new("string"));
3449        schemas.insert("address".to_string(), addr);
3450
3451        let mut user = Schema::new("user");
3452        user.add_field("name", FieldType::new("string"));
3453        user.add_field("home", FieldType::new("address"));
3454        schemas.insert("user".to_string(), user);
3455
3456        let mut data = IndexMap::new();
3457        data.insert("users".to_string(), Value::Array(vec![
3458            Value::Object(vec![
3459                ("name".to_string(), Value::String("Alice".to_string())),
3460                ("home".to_string(), Value::Object(vec![
3461                    ("city".to_string(), Value::String("Boston".to_string())),
3462                    ("zip".to_string(), Value::String("02101".to_string())),
3463                ].into_iter().collect())),
3464            ].into_iter().collect()),
3465        ]));
3466
3467        let schema_order = vec!["address".to_string(), "user".to_string()];
3468        let output = dumps_with_schemas(&data, &schemas, &schema_order, &IndexMap::new(), &[]);
3469
3470        assert!(output.contains("@struct address"), "Should have address schema");
3471        assert!(output.contains("@struct user"), "Should have user schema");
3472        assert!(output.contains("@table user"), "Should use @table for users");
3473        // Nested tuples
3474        assert!(output.contains("("), "Should have tuple format");
3475    }
3476
3477    #[test]
3478    fn test_write_tuple_with_schema_array_field() {
3479        // Test tuple writing with array fields that have schemas
3480        let mut schemas = IndexMap::new();
3481
3482        let mut tag = Schema::new("tag");
3483        tag.add_field("name", FieldType::new("string"));
3484        schemas.insert("tag".to_string(), tag);
3485
3486        let mut item = Schema::new("item");
3487        item.add_field("id", FieldType::new("int"));
3488        item.add_field("tags", FieldType { base: "tag".to_string(), nullable: false, is_array: true });
3489        schemas.insert("item".to_string(), item);
3490
3491        let mut data = IndexMap::new();
3492        data.insert("items".to_string(), Value::Array(vec![
3493            Value::Object(vec![
3494                ("id".to_string(), Value::Int(1)),
3495                ("tags".to_string(), Value::Array(vec![
3496                    Value::Object(vec![
3497                        ("name".to_string(), Value::String("rust".to_string())),
3498                    ].into_iter().collect()),
3499                ])),
3500            ].into_iter().collect()),
3501        ]));
3502
3503        let schema_order = vec!["tag".to_string(), "item".to_string()];
3504        let output = dumps_with_schemas(&data, &schemas, &schema_order, &IndexMap::new(), &[]);
3505
3506        assert!(output.contains("@table item"), "Should use @table for items");
3507    }
3508
3509    #[test]
3510    fn test_write_schema_array_empty() {
3511        let schemas = IndexMap::new();
3512        let schema = Schema::new("empty");
3513        let mut out = String::new();
3514        write_schema_array(&mut out, &Value::Array(vec![]), &schema, &schemas, 0, &FormatOptions::default());
3515        assert_eq!(out, "[]");
3516    }
3517
3518    #[test]
3519    fn test_write_schema_array_non_array_fallback() {
3520        let schemas = IndexMap::new();
3521        let schema = Schema::new("test");
3522        let mut out = String::new();
3523        write_schema_array(&mut out, &Value::Int(42), &schema, &schemas, 0, &FormatOptions::default());
3524        assert_eq!(out, "42");
3525    }
3526
3527    #[test]
3528    fn test_write_tuple_missing_field() {
3529        // Test that missing fields in object produce ~
3530        let schemas = IndexMap::new();
3531        let mut schema = Schema::new("test");
3532        schema.add_field("present", FieldType::new("int"));
3533        schema.add_field("missing", FieldType::new("string"));
3534
3535        let value = Value::Object(
3536            vec![("present".to_string(), Value::Int(42))].into_iter().collect()
3537        );
3538
3539        let mut out = String::new();
3540        write_tuple(&mut out, &value, &schema, &schemas, 0, &FormatOptions::default());
3541        assert!(out.contains("42"), "Present field should be written");
3542        assert!(out.contains("~"), "Missing field should be ~");
3543    }
3544
3545    #[test]
3546    fn test_write_tuple_non_object() {
3547        // When tuple receives a non-object value
3548        let schemas = IndexMap::new();
3549        let schema = Schema::new("test");
3550
3551        let mut out = String::new();
3552        write_tuple(&mut out, &Value::Int(42), &schema, &schemas, 0, &FormatOptions::default());
3553        assert_eq!(out, "42");
3554    }
3555
3556    // =========================================================================
3557    // Coverage: array_matches_schema()
3558    // =========================================================================
3559
3560    #[test]
3561    fn test_array_matches_schema_empty() {
3562        let schema = Schema::new("test");
3563        assert!(!array_matches_schema(&[], &schema));
3564    }
3565
3566    #[test]
3567    fn test_array_matches_schema_non_object() {
3568        let schema = Schema::new("test");
3569        assert!(!array_matches_schema(&[Value::Int(1)], &schema));
3570    }
3571
3572    #[test]
3573    fn test_array_matches_schema_matching() {
3574        let mut schema = Schema::new("user");
3575        schema.add_field("name", FieldType::new("string"));
3576        schema.add_field("age", FieldType::new("int"));
3577
3578        let arr = vec![Value::Object(vec![
3579            ("name".to_string(), Value::String("Alice".to_string())),
3580            ("age".to_string(), Value::Int(30)),
3581        ].into_iter().collect())];
3582
3583        assert!(array_matches_schema(&arr, &schema));
3584    }
3585
3586    // =========================================================================
3587    // Coverage: from_dto, from_dto_array, to_dto, to_dto_vec
3588    // =========================================================================
3589
3590    #[test]
3591    fn test_from_dto_and_back() {
3592        use crate::convert::{FromTeaLeaf, ConvertError};
3593
3594        let doc = TeaLeaf::from_dto("greeting", &"hello".to_string());
3595        assert_eq!(doc.get("greeting").unwrap().as_str(), Some("hello"));
3596
3597        let result: std::result::Result<String, ConvertError> = String::from_tealeaf_value(doc.get("greeting").unwrap());
3598        assert_eq!(result.unwrap(), "hello");
3599    }
3600
3601    #[test]
3602    fn test_from_dto_array() {
3603        let items = vec!["apple".to_string(), "banana".to_string()];
3604        let doc = TeaLeaf::from_dto_array("fruits", &items);
3605        let arr = doc.get("fruits").unwrap().as_array().unwrap();
3606        assert_eq!(arr.len(), 2);
3607        assert_eq!(arr[0].as_str(), Some("apple"));
3608    }
3609
3610    #[test]
3611    fn test_to_dto_missing_key() {
3612        let doc = TeaLeaf::new(IndexMap::new(), IndexMap::new());
3613        let result: Result<String> = doc.to_dto("missing");
3614        assert!(result.is_err());
3615    }
3616
3617    #[test]
3618    fn test_to_dto_vec() {
3619        let mut data = IndexMap::new();
3620        data.insert("items".to_string(), Value::Array(vec![
3621            Value::String("a".to_string()),
3622            Value::String("b".to_string()),
3623        ]));
3624        let doc = TeaLeaf::new(IndexMap::new(), data);
3625        let result: Vec<String> = doc.to_dto_vec("items").unwrap();
3626        assert_eq!(result, vec!["a", "b"]);
3627    }
3628
3629    #[test]
3630    fn test_to_dto_vec_not_array() {
3631        let mut data = IndexMap::new();
3632        data.insert("item".to_string(), Value::String("not_an_array".to_string()));
3633        let doc = TeaLeaf::new(IndexMap::new(), data);
3634        let result: Result<Vec<String>> = doc.to_dto_vec("item");
3635        assert!(result.is_err());
3636    }
3637
3638    #[test]
3639    fn test_to_dto_vec_missing_key() {
3640        let doc = TeaLeaf::new(IndexMap::new(), IndexMap::new());
3641        let result: Result<Vec<String>> = doc.to_dto_vec("missing");
3642        assert!(result.is_err());
3643    }
3644
3645    // =========================================================================
3646    // Coverage: set_root_array, SchemaInferrer edge cases
3647    // =========================================================================
3648
3649    #[test]
3650    fn test_set_root_array() {
3651        let mut doc = TeaLeaf::new(IndexMap::new(), IndexMap::new());
3652        assert!(!doc.is_root_array);
3653        doc.set_root_array(true);
3654        assert!(doc.is_root_array);
3655    }
3656
3657    #[test]
3658    fn test_schema_inferrer_non_uniform_array() {
3659        // Array with different object structures should not create a schema
3660        let json = r#"{"items": [{"a": 1}, {"b": 2}]}"#;
3661        let doc = TeaLeaf::from_json_with_schemas(json).unwrap();
3662        assert!(doc.schema("item").is_none(), "Non-uniform array should not produce schema");
3663    }
3664
3665    #[test]
3666    fn test_schema_inferrer_mixed_types_in_array() {
3667        // Array with non-objects
3668        let json = r#"{"items": [1, 2, 3]}"#;
3669        let doc = TeaLeaf::from_json_with_schemas(json).unwrap();
3670        assert!(doc.schema("item").is_none(), "Non-object array should not produce schema");
3671    }
3672
3673    #[test]
3674    fn test_schema_inferrer_empty_array() {
3675        let json = r#"{"items": []}"#;
3676        let doc = TeaLeaf::from_json_with_schemas(json).unwrap();
3677        assert!(doc.schema("item").is_none(), "Empty array should not produce schema");
3678    }
3679
3680    #[test]
3681    fn test_schema_inferrer_duplicate_schema_name() {
3682        // Two arrays that would produce the same schema name
3683        let json = r#"{
3684            "items": [{"id": 1, "name": "A"}],
3685            "nested": {"items": [{"id": 2, "name": "B"}]}
3686        }"#;
3687        let doc = TeaLeaf::from_json_with_schemas(json).unwrap();
3688        // Should have "item" schema (first one wins)
3689        assert!(doc.schema("item").is_some());
3690    }
3691
3692    #[test]
3693    fn test_schema_inferrer_int_float_merge() {
3694        // Field that has int in one record and float in another
3695        let json = r#"{"values": [{"x": 1}, {"x": 2.5}]}"#;
3696        let doc = TeaLeaf::from_json_with_schemas(json).unwrap();
3697        let schema = doc.schema("value").unwrap();
3698        let x_field = schema.fields.iter().find(|f| f.name == "x").unwrap();
3699        assert_eq!(x_field.field_type.base, "float", "Int+Float merge should produce float");
3700    }
3701
3702    #[test]
3703    fn test_schema_inference_with_root_array() {
3704        let json = r#"[{"id": 1, "name": "Alice"}, {"id": 2, "name": "Bob"}]"#;
3705        let doc = TeaLeaf::from_json_with_schemas(json).unwrap();
3706        // Root array is stored under "root" key - the schema name should be derived from "root"
3707        // The singularize of "root" is "root" (no trailing s)
3708        // Actually, root arrays aren't typically analyzed because the key is "root" and it goes through analyze_value
3709        let root_val = doc.get("root").unwrap().as_array().unwrap();
3710        assert_eq!(root_val.len(), 2);
3711    }
3712
3713    // =========================================================================
3714    // Coverage: dumps_with_schemas with quoting in schemas
3715    // =========================================================================
3716
3717    #[test]
3718    fn test_dumps_with_schemas_string_quoting_in_tuples() {
3719        let mut schemas = IndexMap::new();
3720        let mut schema = Schema::new("item");
3721        schema.add_field("name", FieldType::new("string"));
3722        schemas.insert("item".to_string(), schema);
3723
3724        let mut data = IndexMap::new();
3725        data.insert("items".to_string(), Value::Array(vec![
3726            Value::Object(vec![
3727                ("name".to_string(), Value::String("hello world".to_string())),
3728            ].into_iter().collect()),
3729        ]));
3730
3731        let schema_order = vec!["item".to_string()];
3732        let output = dumps_with_schemas(&data, &schemas, &schema_order, &IndexMap::new(), &[]);
3733        assert!(output.contains("\"hello world\""), "String with space should be quoted in tuple");
3734    }
3735
3736    #[test]
3737    fn test_dumps_with_schemas_array_without_schema() {
3738        // Array that doesn't match any schema
3739        let schemas = IndexMap::new();
3740        let mut data = IndexMap::new();
3741        data.insert("nums".to_string(), Value::Array(vec![Value::Int(1), Value::Int(2)]));
3742
3743        let output = dumps_with_schemas(&data, &schemas, &[], &IndexMap::new(), &[]);
3744        assert!(output.contains("[1, 2]"), "Should use regular array format");
3745    }
3746
3747    // =========================================================================
3748    // Coverage: convenience functions open(), parse(), root array to_json
3749    // =========================================================================
3750
3751    #[test]
3752    fn test_open_convenience_function() {
3753        // Write a binary file first, then open with the convenience function
3754        let dir = std::env::temp_dir();
3755        let path = dir.join("test_open_conv.tlbx");
3756
3757        let mut data = IndexMap::new();
3758        data.insert("x".to_string(), Value::Int(42));
3759        let doc = TeaLeaf::new(IndexMap::new(), data);
3760        doc.compile(&path, false).unwrap();
3761
3762        let reader = super::open(&path).unwrap();
3763        assert_eq!(reader.get("x").unwrap().as_int(), Some(42));
3764        std::fs::remove_file(&path).ok();
3765    }
3766
3767    #[test]
3768    fn test_parse_convenience_function() {
3769        let doc = super::parse("greeting: hello").unwrap();
3770        assert_eq!(doc.get("greeting").unwrap().as_str(), Some("hello"));
3771    }
3772
3773    #[test]
3774    fn test_to_json_root_array() {
3775        let mut data = IndexMap::new();
3776        data.insert("root".to_string(), Value::Array(vec![Value::Int(1), Value::Int(2)]));
3777        let mut doc = TeaLeaf::new(IndexMap::new(), data);
3778        doc.set_root_array(true);
3779
3780        let json = doc.to_json().unwrap();
3781        let parsed: serde_json::Value = serde_json::from_str(&json).unwrap();
3782        assert!(parsed.is_array(), "Root array to_json should output array");
3783        assert_eq!(parsed.as_array().unwrap().len(), 2);
3784    }
3785
3786    #[test]
3787    fn test_to_json_compact_root_array() {
3788        let mut data = IndexMap::new();
3789        data.insert("root".to_string(), Value::Array(vec![Value::Int(1)]));
3790        let mut doc = TeaLeaf::new(IndexMap::new(), data);
3791        doc.set_root_array(true);
3792
3793        let json = doc.to_json_compact().unwrap();
3794        assert_eq!(json, "[1]");
3795    }
3796
3797    #[test]
3798    fn test_infer_type_bool_value() {
3799        let it = infer_type(&Value::Bool(true));
3800        assert!(matches!(it, InferredType::Bool));
3801    }
3802
3803    #[test]
3804    fn test_schema_inference_nested_object_fields() {
3805        // JSON with nested objects inside array items
3806        let json = r#"{"records": [
3807            {"id": 1, "details": {"city": "NYC", "zip": "10001"}},
3808            {"id": 2, "details": {"city": "LA", "zip": "90001"}}
3809        ]}"#;
3810        let doc = TeaLeaf::from_json_with_schemas(json).unwrap();
3811        // Should infer both "record" and "detail" schemas
3812        assert!(doc.schema("record").is_some(), "Should infer record schema");
3813    }
3814
3815    #[test]
3816    fn test_schema_inference_not_all_objects_returns_early() {
3817        // Array where second element is not an object
3818        let json = r#"{"items": [{"a": 1}, "not_an_object"]}"#;
3819        let doc = TeaLeaf::from_json_with_schemas(json).unwrap();
3820        assert!(doc.schema("item").is_none(), "Mixed array should not produce schema");
3821    }
3822
3823    #[test]
3824    fn test_to_tl_with_schemas_with_nested_array_field() {
3825        // Schema with an array-typed field
3826        let mut schemas = IndexMap::new();
3827        let mut schema = Schema::new("user");
3828        schema.add_field("name", FieldType::new("string"));
3829        schema.add_field("tags", FieldType::new("string").array());
3830        schemas.insert("user".to_string(), schema);
3831
3832        let mut data = IndexMap::new();
3833        let mut obj = IndexMap::new();
3834        obj.insert("name".to_string(), Value::String("Alice".into()));
3835        obj.insert("tags".to_string(), Value::Array(vec![
3836            Value::String("admin".into()),
3837            Value::String("active".into()),
3838        ]));
3839        data.insert("users".to_string(), Value::Array(vec![Value::Object(obj)]));
3840
3841        let doc = TeaLeaf::new(schemas, data);
3842        let text = doc.to_tl_with_schemas();
3843        assert!(text.contains("@struct user"), "Should have schema definition");
3844        assert!(text.contains("@table user"), "Should use table format");
3845    }
3846
3847    // =========================================================================
3848    // Issue 6: Improved schema matching
3849    // =========================================================================
3850
3851    #[test]
3852    fn test_schema_matching_nullable_fields_allowed_missing() {
3853        // Schema with nullable field should match objects missing that field
3854        let mut schemas = IndexMap::new();
3855        let mut s = Schema::new("Item");
3856        s.add_field("id", FieldType::new("int"));
3857        s.add_field("label", FieldType::new("string").nullable());
3858        schemas.insert("Item".to_string(), s);
3859
3860        let mut obj1 = IndexMap::new();
3861        obj1.insert("id".to_string(), Value::Int(1));
3862        // label is missing — but it's nullable, so it should still match
3863
3864        let doc = TeaLeaf {
3865            schemas,
3866            unions: IndexMap::new(),
3867            data: {
3868                let mut d = IndexMap::new();
3869                d.insert("items".to_string(), Value::Array(vec![Value::Object(obj1)]));
3870                d
3871            },
3872            is_root_array: false,
3873        };
3874        let result = doc.find_schema_for_value(doc.data.get("items").unwrap(), "items");
3875        assert!(result.is_some(), "Should match schema when nullable field is missing");
3876        assert_eq!(result.unwrap().name, "Item");
3877    }
3878
3879    #[test]
3880    fn test_schema_matching_rejects_extra_keys() {
3881        // Objects with extra keys not in schema should not match
3882        let mut schemas = IndexMap::new();
3883        let mut s = Schema::new("Point");
3884        s.add_field("x", FieldType::new("int"));
3885        s.add_field("y", FieldType::new("int"));
3886        schemas.insert("Point".to_string(), s);
3887
3888        let mut obj = IndexMap::new();
3889        obj.insert("x".to_string(), Value::Int(1));
3890        obj.insert("y".to_string(), Value::Int(2));
3891        obj.insert("z".to_string(), Value::Int(3)); // extra field
3892
3893        let doc = TeaLeaf {
3894            schemas,
3895            unions: IndexMap::new(),
3896            data: {
3897                let mut d = IndexMap::new();
3898                d.insert("points".to_string(), Value::Array(vec![Value::Object(obj)]));
3899                d
3900            },
3901            is_root_array: false,
3902        };
3903        let result = doc.find_schema_for_value(doc.data.get("points").unwrap(), "points");
3904        assert!(result.is_none(), "Should NOT match schema when extra keys are present");
3905    }
3906
3907    #[test]
3908    fn test_schema_matching_empty_array_no_matching_name() {
3909        let mut schemas = IndexMap::new();
3910        let mut s = Schema::new("Anything");
3911        s.add_field("x", FieldType::new("int"));
3912        schemas.insert("Anything".to_string(), s);
3913
3914        let doc = TeaLeaf {
3915            schemas,
3916            unions: IndexMap::new(),
3917            data: {
3918                let mut d = IndexMap::new();
3919                d.insert("empty".to_string(), Value::Array(vec![]));
3920                d
3921            },
3922            is_root_array: false,
3923        };
3924        let result = doc.find_schema_for_value(doc.data.get("empty").unwrap(), "empty");
3925        assert!(result.is_none(), "Empty array should return None when no schema name matches");
3926    }
3927
3928    #[test]
3929    fn test_schema_matching_empty_array_matches_by_name() {
3930        let mut schemas = IndexMap::new();
3931        let mut s = Schema::new("item");
3932        s.add_field("id", FieldType::new("int"));
3933        schemas.insert("item".to_string(), s);
3934
3935        let doc = TeaLeaf {
3936            schemas,
3937            unions: IndexMap::new(),
3938            data: {
3939                let mut d = IndexMap::new();
3940                d.insert("items".to_string(), Value::Array(vec![]));
3941                d
3942            },
3943            is_root_array: false,
3944        };
3945        let result = doc.find_schema_for_value(doc.data.get("items").unwrap(), "items");
3946        assert!(result.is_some(), "Empty array should match schema by singularized key name");
3947        assert_eq!(result.unwrap().name, "item");
3948    }
3949
3950    // =========================================================================
3951    // Issue 12: Negative timestamp formatting
3952    // =========================================================================
3953
3954    #[test]
3955    fn test_negative_timestamp_formatting() {
3956        // 1969-12-31T23:59:59Z = -1000 ms (1 second before epoch)
3957        let formatted = format_timestamp_millis(-1000, 0);
3958        assert_eq!(formatted, "1969-12-31T23:59:59Z");
3959    }
3960
3961    #[test]
3962    fn test_negative_timestamp_with_millis() {
3963        // -500 ms = 1969-12-31T23:59:59.500Z
3964        let formatted = format_timestamp_millis(-500, 0);
3965        assert_eq!(formatted, "1969-12-31T23:59:59.500Z");
3966    }
3967
3968    #[test]
3969    fn test_negative_timestamp_full_day() {
3970        // -86400000 ms = exactly one day before epoch = 1969-12-31T00:00:00Z
3971        let formatted = format_timestamp_millis(-86_400_000, 0);
3972        assert_eq!(formatted, "1969-12-31T00:00:00Z");
3973    }
3974
3975    #[test]
3976    fn test_epoch_timestamp() {
3977        let formatted = format_timestamp_millis(0, 0);
3978        assert_eq!(formatted, "1970-01-01T00:00:00Z");
3979    }
3980
3981    #[test]
3982    fn test_positive_timestamp_with_millis() {
3983        // 1123ms = 1 second + 123ms after epoch
3984        let formatted = format_timestamp_millis(1123, 0);
3985        assert_eq!(formatted, "1970-01-01T00:00:01.123Z");
3986    }
3987
3988    #[test]
3989    fn test_negative_timestamp_json_export() {
3990        let mut data = IndexMap::new();
3991        data.insert("ts".to_string(), Value::Timestamp(-1000, 0));
3992        let doc = TeaLeaf::new(IndexMap::new(), data);
3993        let json = doc.to_json().unwrap();
3994        assert!(json.contains("1969-12-31"), "Negative timestamp should format as pre-epoch date: {}", json);
3995    }
3996
3997    // =========================================================================
3998    // Issue 7: Deterministic serialization (IndexMap preserves insertion order)
3999    // =========================================================================
4000
4001    #[test]
4002    fn test_compile_deterministic_key_order() {
4003        // Two documents with the same data in the same insertion order
4004        // should produce identical binary output
4005        let dir = std::env::temp_dir();
4006        let path1 = dir.join("test_deterministic_1.tlbx");
4007        let path2 = dir.join("test_deterministic_2.tlbx");
4008
4009        let mut data1 = IndexMap::new();
4010        data1.insert("alpha".to_string(), Value::Int(1));
4011        data1.insert("beta".to_string(), Value::Int(2));
4012        data1.insert("gamma".to_string(), Value::Int(3));
4013        let doc1 = TeaLeaf::new(IndexMap::new(), data1);
4014        doc1.compile(&path1, false).unwrap();
4015
4016        let mut data2 = IndexMap::new();
4017        data2.insert("alpha".to_string(), Value::Int(1));
4018        data2.insert("beta".to_string(), Value::Int(2));
4019        data2.insert("gamma".to_string(), Value::Int(3));
4020        let doc2 = TeaLeaf::new(IndexMap::new(), data2);
4021        doc2.compile(&path2, false).unwrap();
4022
4023        let bytes1 = std::fs::read(&path1).unwrap();
4024        let bytes2 = std::fs::read(&path2).unwrap();
4025        assert_eq!(bytes1, bytes2, "Binary output should be identical for same insertion order");
4026
4027        std::fs::remove_file(&path1).ok();
4028        std::fs::remove_file(&path2).ok();
4029    }
4030
4031    #[test]
4032    fn test_dumps_deterministic_key_order() {
4033        // dumps() preserves IndexMap insertion order deterministically
4034        let mut data = IndexMap::new();
4035        data.insert("zebra".to_string(), Value::Int(3));
4036        data.insert("alpha".to_string(), Value::Int(1));
4037        data.insert("middle".to_string(), Value::Int(2));
4038
4039        let output1 = dumps(&data);
4040        let output2 = dumps(&data);
4041        assert_eq!(output1, output2, "dumps() should be deterministic");
4042        // Keys should appear in insertion order (IndexMap preserves insertion order)
4043        let lines: Vec<&str> = output1.trim().lines().collect();
4044        assert!(lines[0].starts_with("zebra:"), "First key should be 'zebra', got: {}", lines[0]);
4045        assert!(lines[1].starts_with("alpha:"), "Second key should be 'alpha', got: {}", lines[1]);
4046        assert!(lines[2].starts_with("middle:"), "Third key should be 'middle', got: {}", lines[2]);
4047    }
4048
4049    // =========================================================================
4050    // Order-preservation integration tests
4051    // =========================================================================
4052
4053    #[test]
4054    fn test_json_parse_preserves_key_order() {
4055        // JSON with intentionally non-alphabetical keys
4056        let json = r#"{"zebra": 1, "apple": 2, "mango": 3, "banana": 4}"#;
4057        let doc = TeaLeaf::from_json(json).unwrap();
4058        let keys: Vec<&String> = doc.data.keys().collect();
4059        assert_eq!(keys, &["zebra", "apple", "mango", "banana"],
4060            "JSON parse should preserve key insertion order");
4061    }
4062
4063    #[test]
4064    fn test_json_roundtrip_preserves_key_order() {
4065        let json = r#"{"zebra": 1, "apple": 2, "mango": 3}"#;
4066        let doc = TeaLeaf::from_json(json).unwrap();
4067        let json_out = doc.to_json().unwrap();
4068        // Parse back and verify order
4069        let parsed: serde_json::Value = serde_json::from_str(&json_out).unwrap();
4070        let keys: Vec<&str> = parsed.as_object().unwrap().keys().map(|s| s.as_str()).collect();
4071        assert_eq!(keys, &["zebra", "apple", "mango"],
4072            "JSON round-trip should preserve key order");
4073    }
4074
4075    #[test]
4076    fn test_tl_text_preserves_section_order() {
4077        let input = "zebra: 1\napple: 2\nmango: 3\n";
4078        let doc = TeaLeaf::parse(input).unwrap();
4079        let keys: Vec<&String> = doc.data.keys().collect();
4080        assert_eq!(keys, &["zebra", "apple", "mango"],
4081            "TL text parse should preserve section order");
4082
4083        // Serialize back and verify order
4084        let output = doc.to_tl_with_schemas();
4085        let lines: Vec<&str> = output.trim().lines().collect();
4086        assert!(lines[0].starts_with("zebra:"), "got: {}", lines[0]);
4087        assert!(lines[1].starts_with("apple:"), "got: {}", lines[1]);
4088        assert!(lines[2].starts_with("mango:"), "got: {}", lines[2]);
4089    }
4090
4091    #[test]
4092    fn test_binary_roundtrip_preserves_section_order() {
4093        let json = r#"{"zebra": 1, "apple": 2, "mango": 3, "banana": 4}"#;
4094        let doc = TeaLeaf::from_json(json).unwrap();
4095
4096        let dir = std::env::temp_dir();
4097        let path = dir.join("test_order_preserve.tlbx");
4098        doc.compile(&path, false).unwrap();
4099
4100        let reader = crate::Reader::open(&path).unwrap();
4101        let doc2 = TeaLeaf::from_reader(&reader).unwrap();
4102        let keys: Vec<&String> = doc2.data.keys().collect();
4103        assert_eq!(keys, &["zebra", "apple", "mango", "banana"],
4104            "Binary round-trip should preserve section order");
4105        std::fs::remove_file(&path).ok();
4106    }
4107
4108    #[test]
4109    fn test_object_field_order_preserved_through_binary() {
4110        let json = r#"{"data": {"z_last": 1, "a_first": 2, "m_middle": 3}}"#;
4111        let doc = TeaLeaf::from_json(json).unwrap();
4112
4113        let dir = std::env::temp_dir();
4114        let path = dir.join("test_obj_order.tlbx");
4115        doc.compile(&path, false).unwrap();
4116
4117        let reader = crate::Reader::open(&path).unwrap();
4118        let val = reader.get("data").unwrap();
4119        let obj = val.as_object().unwrap();
4120        let keys: Vec<&String> = obj.keys().collect();
4121        assert_eq!(keys, &["z_last", "a_first", "m_middle"],
4122            "Object field order should be preserved through binary round-trip");
4123        std::fs::remove_file(&path).ok();
4124    }
4125
4126    #[test]
4127    fn test_nested_object_order_preserved() {
4128        let json = r#"{"outer": {"z": {"c": 3, "a": 1, "b": 2}, "a": {"x": 10, "w": 20}}}"#;
4129        let doc = TeaLeaf::from_json(json).unwrap();
4130        let tl = doc.to_tl_with_schemas();
4131
4132        // Parse back and check nested order
4133        let doc2 = TeaLeaf::parse(&tl).unwrap();
4134        let outer = doc2.get("outer").unwrap().as_object().unwrap();
4135        let outer_keys: Vec<&String> = outer.keys().collect();
4136        assert_eq!(outer_keys, &["z", "a"], "Outer keys order preserved");
4137
4138        let z_obj = outer.get("z").unwrap().as_object().unwrap();
4139        let z_keys: Vec<&String> = z_obj.keys().collect();
4140        assert_eq!(z_keys, &["c", "a", "b"], "Nested object keys order preserved");
4141    }
4142
4143    #[test]
4144    fn test_schema_order_preserved_in_text() {
4145        let input = r#"
4146            @struct Zebra (z_name: string)
4147            @struct Apple (a_name: string)
4148            items: [1, 2, 3]
4149        "#;
4150        let doc = TeaLeaf::parse(input).unwrap();
4151        let schema_keys: Vec<&String> = doc.schemas.keys().collect();
4152        assert_eq!(schema_keys, &["Zebra", "Apple"],
4153            "Schema definition order should be preserved");
4154    }
4155
4156    // -------------------------------------------------------------------------
4157    // Fuzz regression tests (full serialize/roundtrip paths)
4158    // -------------------------------------------------------------------------
4159
4160    #[test]
4161    fn test_fuzz_crash_ba05f4f8_serialize_day_zero_no_panic() {
4162        // Regression: fuzz_serialize crash-ba05f4f81615e2bf2b01137126cd772c6c0cc6d2
4163        // Timestamp with month=0 or day=0 caused u32 underflow in days_from_epoch.
4164        // Exercises the full fuzz_serialize path: parse → to_json → to_tl → re-parse.
4165        let inputs = [
4166            "ts: 2024-01-00T10:30:00Z",  // day=0
4167            "ts: 2024-00-15T10:30:00Z",  // month=0
4168            "ts: 6000-00-00T00:00:00Z",  // both zero
4169        ];
4170        for input in &inputs {
4171            // parse must not panic (should return Err)
4172            let result = TeaLeaf::parse(input);
4173            if let Ok(tl) = result {
4174                let _ = tl.to_json();
4175                let _ = tl.to_json_compact();
4176                let text = tl.to_tl_with_schemas();
4177                let _ = TeaLeaf::parse(&text);
4178            }
4179        }
4180    }
4181
4182    #[test]
4183    fn test_fuzz_crash_b085ba0e_roundtrip_day_zero_no_panic() {
4184        // Regression: fuzz_roundtrip crash-b085ba0e656f074031d8c4cb5173313785fa79d1
4185        // Same days_from_epoch underflow, hit through the roundtrip path.
4186        // Exercises the full fuzz_roundtrip path: parse → compile → read → walk.
4187        let inputs = [
4188            "ts: 4001-03-00T00:00:00Z",  // day=0 (pattern from artifact)
4189            "ts: 4401-03-00T00:00:00Z",  // variant
4190        ];
4191        for input in &inputs {
4192            let result = TeaLeaf::parse(input);
4193            if let Ok(tl) = result {
4194                let tmp = tempfile::NamedTempFile::new().unwrap();
4195                if tl.compile(tmp.path(), false).is_ok() {
4196                    let bytes = std::fs::read(tmp.path()).unwrap();
4197                    if let Ok(reader) = Reader::from_bytes(bytes) {
4198                        for key in reader.keys() {
4199                            let _ = reader.get(key);
4200                        }
4201                    }
4202                }
4203            }
4204        }
4205    }
4206
4207    #[test]
4208    fn test_fuzz_crash_48767e10_json_schemas_bare_dash_roundtrip() {
4209        // Regression: fuzz_json_schemas crash-48767e10b4ec71542bfbee2bc358b1e21831a259
4210        // JSON string "-" was serialized unquoted, causing re-parse failure.
4211        for input in [
4212            r#""-""#, r#""+""#, r#""--""#, r#""-foo""#,
4213            r#"{"a": "-"}"#, r#"{"a": "+"}"#,
4214            "\"\\u0660\"",  // Arabic-Indic digit zero
4215        ] {
4216            let tl = TeaLeaf::from_json_with_schemas(input);
4217            if let Ok(tl) = tl {
4218                let text = tl.to_tl_with_schemas();
4219                let reparsed = TeaLeaf::parse(&text);
4220                assert!(
4221                    reparsed.is_ok(),
4222                    "re-parse failed for JSON input {}",
4223                    input,
4224                );
4225            }
4226        }
4227    }
4228
4229    #[test]
4230    fn test_fuzz_crash_820dac71_empty_key_roundtrip() {
4231        // Regression: fuzz_json_schemas crash-820dac71c95d324067cd88de5f24897c65ace57a
4232        // JSON object with empty key was serialized without quoting, losing the key.
4233        for input in [
4234            r#"{"":{}}"#,                // empty key with empty object
4235            r#"[{"":{}}}]"#,             // root array variant (crash-66a8d85176f76ed68ada9f9526abe4efd8352f27)
4236            r#"{"":"value"}"#,            // empty key with string value
4237        ] {
4238            if let Ok(tl) = TeaLeaf::from_json_with_schemas(input) {
4239                let text = tl.to_tl_with_schemas();
4240                let reparsed = TeaLeaf::parse(&text);
4241                assert!(
4242                    reparsed.is_ok(),
4243                    "re-parse failed for JSON input {}",
4244                    input,
4245                );
4246            }
4247        }
4248    }
4249
4250    #[test]
4251    fn test_fuzz_crash_66a8d851_root_array_empty_key() {
4252        // Regression: fuzz_json_schemas crash-66a8d85176f76ed68ada9f9526abe4efd8352f27
4253        // Root array with empty-key object: schema inference + to_tl_with_schemas roundtrip
4254        let input = r#"[{"":{}}]"#;
4255        if let Ok(tl) = TeaLeaf::from_json_with_schemas(input) {
4256            let text = tl.to_tl_with_schemas();
4257            let reparsed = TeaLeaf::parse(&text);
4258            assert!(reparsed.is_ok(), "re-parse failed for root array with empty key");
4259        }
4260    }
4261
4262    #[test]
4263    fn test_fuzz_crash_847a9194_uint_roundtrip() {
4264        // Regression: fuzz_json_schemas crash-847a919462bb567fab268023a5a29d04e92db779
4265        // Large u64 values (> i64::MAX) were demoted to f64 on re-parse, losing precision.
4266        let input = "9999999999999999999";  // > i64::MAX, fits in u64
4267        let tl = TeaLeaf::from_json_with_schemas(input).unwrap();
4268        let text = tl.to_tl_with_schemas();
4269        let reparsed = TeaLeaf::parse(&text).unwrap();
4270        let orig = tl.data.get("root").unwrap();
4271        let re = reparsed.data.get("root").unwrap();
4272        assert_eq!(orig, re, "UInt roundtrip mismatch");
4273    }
4274
4275    #[test]
4276    fn test_fuzz_crash_3902c5cc_float_infinity_roundtrip() {
4277        // Regression: fuzz_serialize crash-3902c5cc99e5e4150d08d40372c86207fbc6db7f
4278        // 5e550 and -5e550 overflow f64 and are now stored as JsonNumber.
4279        // NaN remains Float(NaN).
4280        let tl = TeaLeaf::parse("b: NaN").unwrap();
4281        let text = tl.to_tl_with_schemas();
4282        let reparsed = TeaLeaf::parse(&text).unwrap();
4283        let orig = tl.data.get("b").unwrap();
4284        let re = reparsed.data.get("b").unwrap();
4285        match (orig, re) {
4286            (Value::Float(a), Value::Float(b)) => {
4287                assert_eq!(a.to_bits(), b.to_bits(), "NaN roundtrip failed");
4288            }
4289            _ => panic!("expected Float, got {:?} / {:?}", orig, re),
4290        }
4291
4292        // 5e550 and -5e550 are now JsonNumber (overflow f64)
4293        for input in &["b: 5e550", "b: -5e550"] {
4294            let tl = TeaLeaf::parse(input).unwrap();
4295            let text = tl.to_tl_with_schemas();
4296            let reparsed = TeaLeaf::parse(&text).unwrap();
4297            let orig = tl.data.get("b").unwrap();
4298            let re = reparsed.data.get("b").unwrap();
4299            match (orig, re) {
4300                (Value::JsonNumber(a), Value::JsonNumber(b)) => {
4301                    assert_eq!(a, b, "JsonNumber roundtrip failed for {}", input);
4302                }
4303                _ => panic!("expected JsonNumber, got {:?} / {:?}", orig, re),
4304            }
4305        }
4306    }
4307
4308    #[test]
4309    fn test_needs_quoting_bare_sign() {
4310        assert!(needs_quoting("-"));
4311        assert!(needs_quoting("+"));
4312        assert!(needs_quoting("--"));
4313        assert!(needs_quoting("-foo"));
4314        assert!(needs_quoting("+bar"));
4315        assert!(needs_quoting("-1")); // negative number
4316        assert!(needs_quoting("+1")); // positive number
4317        assert!(needs_quoting("\u{0660}")); // Arabic-Indic digit zero
4318        assert!(!needs_quoting("hello"));
4319        assert!(!needs_quoting("foo-bar"));
4320    }
4321
4322    #[test]
4323    fn test_fuzz_crash_nan_string_needs_quoting() {
4324        // Regression: fuzz_parse/fuzz_serialize crash — string "NaN" must be quoted
4325        // to avoid re-parsing as Float(NaN).
4326        assert!(needs_quoting("NaN"));
4327        assert!(needs_quoting("inf"));
4328        assert!(needs_quoting("Infinity"));
4329
4330        // Roundtrip: String("NaN") must survive parse → dumps → re-parse
4331        for word in &["NaN", "inf", "Infinity"] {
4332            let input = format!("a: \"{}\"", word);
4333            let tl = TeaLeaf::parse(&input).unwrap();
4334            assert!(matches!(tl.get("a"), Some(Value::String(_))));
4335            let text = dumps(&tl.data);
4336            let reparsed = TeaLeaf::parse(&text).unwrap();
4337            assert_eq!(
4338                reparsed.get("a").unwrap().as_str(),
4339                Some(*word),
4340                "roundtrip failed for string {:?}",
4341                word,
4342            );
4343        }
4344    }
4345
4346    #[test]
4347    fn test_json_any_type_compile_roundtrip() {
4348        // Regression: from_json_with_schemas infers "any" for fields whose nested objects
4349        // don't match a schema. encode_typed_value must fall back to generic encoding
4350        // instead of erroring with "requires a schema for encoding".
4351        use tempfile::NamedTempFile;
4352
4353        let json = r#"[
4354            {"name": "alice", "meta": {"x": 1}},
4355            {"name": "bob",   "meta": {"y": "two", "z": true}}
4356        ]"#;
4357        let doc = TeaLeaf::from_json_with_schemas(json).unwrap();
4358        // "meta" has varying shapes → inferred as "any"
4359        let temp = NamedTempFile::new().unwrap();
4360        doc.compile(temp.path(), false).expect("compile with 'any' field must not error");
4361
4362        // Read back and verify data survived
4363        let reader = Reader::open(temp.path()).unwrap();
4364        assert_eq!(reader.keys().len(), doc.data.len());
4365    }
4366
4367    #[test]
4368    fn json_any_array_binary_roundtrip() {
4369        // Regression: []any fields (from JSON inference of heterogeneous arrays inside
4370        // schema-typed objects) caused binary corruption. encode_typed_value wrote
4371        // TLType::Struct as the element type for "any" (the to_tl_type default),
4372        // but the actual data was heterogeneous. The reader then read garbage bytes
4373        // as schema indices, crashing with "schema index N out of bounds".
4374        use tempfile::NamedTempFile;
4375
4376        let json = r#"{
4377            "events": [
4378                {
4379                    "id": "E1",
4380                    "type": "sale",
4381                    "data": ["SKU-100", 3, 29.99, true],
4382                    "tags": ["flash", "online"]
4383                },
4384                {
4385                    "id": "E2",
4386                    "type": "return",
4387                    "data": ["SKU-200", 1, 15.0, false],
4388                    "tags": ["in-store"]
4389                }
4390            ]
4391        }"#;
4392        let doc = TeaLeaf::from_json_with_schemas(json).unwrap();
4393
4394        // Verify inference: "data" should be []any (heterogeneous), "tags" should be []string
4395        let event_schema = doc.schemas.get("event").expect("missing 'event' schema");
4396        let data_field = event_schema.fields.iter().find(|f| f.name == "data").unwrap();
4397        assert!(data_field.field_type.is_array, "data should be array");
4398        assert_eq!(data_field.field_type.base, "any", "data should be []any, got []{}", data_field.field_type.base);
4399
4400        // Compile to binary
4401        let temp = NamedTempFile::new().unwrap();
4402        doc.compile(temp.path(), false).expect("compile must not error");
4403
4404        // Read back and verify full data integrity
4405        let reader = Reader::open(temp.path()).unwrap();
4406        let events_val = reader.get("events").expect("missing 'events' key");
4407        let events = events_val.as_array().expect("events should be array");
4408        assert_eq!(events.len(), 2, "should have 2 events");
4409
4410        // Verify first event's heterogeneous data array
4411        let e1 = events[0].as_object().expect("event should be object");
4412        assert_eq!(e1.get("id").unwrap().as_str(), Some("E1"));
4413        let data1 = e1.get("data").unwrap().as_array().expect("data should be array");
4414        assert_eq!(data1.len(), 4);
4415        assert_eq!(data1[0].as_str(), Some("SKU-100"));
4416        assert_eq!(data1[2].as_float(), Some(29.99));
4417    }
4418
4419    #[test]
4420    fn retail_orders_json_binary_roundtrip() {
4421        // End-to-end: retail_orders.json → infer schemas → compile → read → JSON
4422        // Exercises the full path that was missing from the test suite: complex
4423        // real-world JSON with heterogeneous arrays ([]any) inside schema-typed objects.
4424        use tempfile::NamedTempFile;
4425
4426        let fixture = std::path::Path::new(env!("CARGO_MANIFEST_DIR"))
4427            .join("tests/fixtures/retail_orders.json");
4428        let json = std::fs::read_to_string(&fixture)
4429            .unwrap_or_else(|e| panic!("read fixture {}: {e}", fixture.display()));
4430
4431        let doc = TeaLeaf::from_json_with_schemas(&json).unwrap();
4432        let temp = NamedTempFile::new().unwrap();
4433        doc.compile(temp.path(), true).expect("compile retail_orders must not error");
4434
4435        // Read binary back to JSON and compare
4436        let reader = Reader::open(temp.path()).unwrap();
4437        let keys = reader.keys();
4438        assert_eq!(keys.len(), 5, "expected 5 top-level keys, got {keys:?}");
4439
4440        // Verify all sections are readable and have correct element counts
4441        let orders_val = reader.get("orders").unwrap();
4442        let orders = orders_val.as_array().expect("orders");
4443        assert_eq!(orders.len(), 10, "expected 10 orders");
4444
4445        let products_val = reader.get("products").unwrap();
4446        let products = products_val.as_array().expect("products");
4447        assert_eq!(products.len(), 4, "expected 4 products");
4448
4449        let customers_val = reader.get("customers").unwrap();
4450        let customers = customers_val.as_array().expect("customers");
4451        assert_eq!(customers.len(), 3, "expected 3 customers");
4452
4453        // Spot-check: first order preserves heterogeneous fields
4454        let order1 = orders[0].as_object().expect("order should be object");
4455        assert_eq!(order1.get("order_id").unwrap().as_str(), Some("ORD-2024-00001"));
4456        let items = order1.get("items").unwrap().as_array().expect("items");
4457        assert_eq!(items.len(), 3, "first order should have 3 items");
4458    }
4459
4460    #[test]
4461    fn fuzz_repro_json_schema_bool_field_name() {
4462        // Fuzz crash: field named "bool" conflicts with type keyword
4463        let input = r#"[{"bool":{"b":2}}]"#;
4464        let tl = TeaLeaf::from_json_with_schemas(input).unwrap();
4465        let tl_text = tl.to_tl_with_schemas();
4466        let reparsed = TeaLeaf::parse(&tl_text)
4467            .unwrap_or_else(|e| panic!("Re-parse failed: {e}\nTL text:\n{tl_text}"));
4468        assert_eq!(tl.data.len(), reparsed.data.len(), "key count mismatch");
4469        for (key, orig_val) in &tl.data {
4470            let re_val = reparsed.data.get(key).unwrap_or_else(|| panic!("lost key '{key}'"));
4471            assert_eq!(orig_val, re_val, "value mismatch for key '{key}'");
4472        }
4473    }
4474
4475    /// Helper: verify that a JSON field named after a built-in type correctly
4476    /// round-trips through TL text when schema inference is used.
4477    fn assert_builtin_name_text_roundtrip(type_name: &str, inner_json: &str) {
4478        let input = format!(r#"[{{"{type_name}":{inner_json}}}]"#);
4479        let tl = TeaLeaf::from_json_with_schemas(&input)
4480            .unwrap_or_else(|e| panic!("[{type_name}] from_json_with_schemas failed: {e}"));
4481        let tl_text = tl.to_tl_with_schemas();
4482
4483        // The schema should appear in the text output
4484        assert!(
4485            tl_text.contains(&format!("@struct {type_name}")),
4486            "[{type_name}] expected @struct {type_name} in TL text:\n{tl_text}"
4487        );
4488
4489        let reparsed = TeaLeaf::parse(&tl_text)
4490            .unwrap_or_else(|e| panic!("[{type_name}] re-parse failed: {e}\nTL text:\n{tl_text}"));
4491
4492        assert_eq!(
4493            tl.data.len(), reparsed.data.len(),
4494            "[{type_name}] key count mismatch"
4495        );
4496        for (key, orig_val) in &tl.data {
4497            let re_val = reparsed.data.get(key)
4498                .unwrap_or_else(|| panic!("[{type_name}] lost key '{key}'"));
4499            assert_eq!(orig_val, re_val, "[{type_name}] value mismatch for key '{key}'");
4500        }
4501    }
4502
4503    #[test]
4504    fn schema_name_shadows_builtin_bool() {
4505        assert_builtin_name_text_roundtrip("bool", r#"{"x":1}"#);
4506    }
4507
4508    #[test]
4509    fn schema_name_shadows_builtin_int() {
4510        // Inner value is a string so field type "string" doesn't collide with schema "int"
4511        assert_builtin_name_text_roundtrip("int", r#"{"x":"hello"}"#);
4512    }
4513
4514    #[test]
4515    fn schema_name_shadows_builtin_int8() {
4516        assert_builtin_name_text_roundtrip("int8", r#"{"x":"hello"}"#);
4517    }
4518
4519    #[test]
4520    fn schema_name_shadows_builtin_int16() {
4521        assert_builtin_name_text_roundtrip("int16", r#"{"x":"hello"}"#);
4522    }
4523
4524    #[test]
4525    fn schema_name_shadows_builtin_int32() {
4526        assert_builtin_name_text_roundtrip("int32", r#"{"x":"hello"}"#);
4527    }
4528
4529    #[test]
4530    fn schema_name_shadows_builtin_int64() {
4531        assert_builtin_name_text_roundtrip("int64", r#"{"x":"hello"}"#);
4532    }
4533
4534    #[test]
4535    fn schema_name_shadows_builtin_uint() {
4536        assert_builtin_name_text_roundtrip("uint", r#"{"x":"hello"}"#);
4537    }
4538
4539    #[test]
4540    fn schema_name_shadows_builtin_uint8() {
4541        assert_builtin_name_text_roundtrip("uint8", r#"{"x":"hello"}"#);
4542    }
4543
4544    #[test]
4545    fn schema_name_shadows_builtin_uint16() {
4546        assert_builtin_name_text_roundtrip("uint16", r#"{"x":"hello"}"#);
4547    }
4548
4549    #[test]
4550    fn schema_name_shadows_builtin_uint32() {
4551        assert_builtin_name_text_roundtrip("uint32", r#"{"x":"hello"}"#);
4552    }
4553
4554    #[test]
4555    fn schema_name_shadows_builtin_uint64() {
4556        assert_builtin_name_text_roundtrip("uint64", r#"{"x":"hello"}"#);
4557    }
4558
4559    #[test]
4560    fn schema_name_shadows_builtin_float() {
4561        assert_builtin_name_text_roundtrip("float", r#"{"x":1}"#);
4562    }
4563
4564    #[test]
4565    fn schema_name_shadows_builtin_float32() {
4566        assert_builtin_name_text_roundtrip("float32", r#"{"x":1}"#);
4567    }
4568
4569    #[test]
4570    fn schema_name_shadows_builtin_float64() {
4571        assert_builtin_name_text_roundtrip("float64", r#"{"x":1}"#);
4572    }
4573
4574    #[test]
4575    fn schema_name_shadows_builtin_string() {
4576        assert_builtin_name_text_roundtrip("string", r#"{"x":1}"#);
4577    }
4578
4579    // Note: "bytes" is not tested via JSON inference because singularize("bytes") = "byte"
4580    // which is NOT a built-in type. The direct TL-parsing test below covers "bytes" as a
4581    // schema name.
4582
4583    #[test]
4584    fn schema_name_shadows_builtin_timestamp() {
4585        assert_builtin_name_text_roundtrip("timestamp", r#"{"x":1}"#);
4586    }
4587
4588    /// Test built-in type names as schemas via direct TL text parsing (not JSON inference).
4589    /// This covers names that can't arise through singularization (like "bytes").
4590    #[test]
4591    fn schema_name_shadows_builtin_direct_tl_parse() {
4592        let test_cases = &[
4593            // (TL text, expected field name, expected inner value)
4594            (
4595                "@struct bytes (x: int)\n@struct root (data: bytes)\nroot: @table root [\n  ((42))\n]",
4596                "data",
4597                Value::Object(IndexMap::from([
4598                    ("x".to_string(), Value::Int(42)),
4599                ])),
4600            ),
4601            (
4602                "@struct bool (a: int, b: string)\n@struct root (flag: bool)\nroot: @table root [\n  ((1, hello))\n]",
4603                "flag",
4604                Value::Object(IndexMap::from([
4605                    ("a".to_string(), Value::Int(1)),
4606                    ("b".to_string(), Value::String("hello".into())),
4607                ])),
4608            ),
4609        ];
4610
4611        for (tl_text, field_name, expected_val) in test_cases {
4612            let doc = TeaLeaf::parse(tl_text)
4613                .unwrap_or_else(|e| panic!("parse failed for field '{field_name}': {e}\n{tl_text}"));
4614
4615            let root_arr = doc.data.get("root").expect("missing 'root' key");
4616            if let Value::Array(arr) = root_arr {
4617                if let Value::Object(obj) = &arr[0] {
4618                    let actual = obj.get(*field_name)
4619                        .unwrap_or_else(|| panic!("missing field '{field_name}'"));
4620                    assert_eq!(actual, expected_val, "mismatch for field '{field_name}'");
4621                } else {
4622                    panic!("expected Object, got {:?}", arr[0]);
4623                }
4624            } else {
4625                panic!("expected Array, got {:?}", root_arr);
4626            }
4627        }
4628    }
4629
4630    /// Self-referencing case: @struct int (x: int) where the inner field type
4631    /// matches the schema name. The LParen guard ensures `x: int` resolves to
4632    /// primitive int (next token is a literal, not `(`).
4633    #[test]
4634    fn schema_name_shadows_builtin_self_referencing() {
4635        // JSON: [{"int": {"x": 1}}] — creates @struct int (x: int)
4636        // The inner field "x: int" must resolve to primitive int, not struct "int"
4637        let input = r#"[{"int":{"x":1}}]"#;
4638        let tl = TeaLeaf::from_json_with_schemas(input).unwrap();
4639        let tl_text = tl.to_tl_with_schemas();
4640
4641        assert!(tl_text.contains("@struct int"), "expected @struct int in:\n{tl_text}");
4642
4643        let reparsed = TeaLeaf::parse(&tl_text)
4644            .unwrap_or_else(|e| panic!("re-parse failed: {e}\nTL text:\n{tl_text}"));
4645
4646        for (key, orig_val) in &tl.data {
4647            let re_val = reparsed.data.get(key)
4648                .unwrap_or_else(|| panic!("lost key '{key}'"));
4649            assert_eq!(orig_val, re_val, "value mismatch for key '{key}'");
4650        }
4651    }
4652
4653    /// Self-referencing: @struct int (int: int) — field name AND type both "int"
4654    #[test]
4655    fn schema_name_shadows_builtin_self_ref_same_field_name() {
4656        let tl_text = "\
4657@struct int (int: int)
4658@struct root (val: int)
4659
4660root: @table root [
4661  ((42))
4662]
4663";
4664        let doc = TeaLeaf::parse(tl_text)
4665            .unwrap_or_else(|e| panic!("parse failed: {e}\nTL text:\n{tl_text}"));
4666
4667        let json = doc.to_json().unwrap();
4668        eprintln!("=== JSON ===\n{json}");
4669
4670        // The root array should have one element with field "val" as an Object
4671        let root_arr = doc.data.get("root").expect("missing 'root'");
4672        if let Value::Array(arr) = root_arr {
4673            if let Value::Object(obj) = &arr[0] {
4674                let val = obj.get("val").expect("missing field 'val'");
4675                // val should be Object({"int": Int(42)}) — struct "int" with field "int" = 42
4676                assert_eq!(
4677                    val,
4678                    &Value::Object(IndexMap::from([
4679                        ("int".to_string(), Value::Int(42)),
4680                    ])),
4681                    "expected struct instance, got {val:?}"
4682                );
4683            } else {
4684                panic!("expected Object, got {:?}", arr[0]);
4685            }
4686        } else {
4687            panic!("expected Array, got {root_arr:?}");
4688        }
4689    }
4690
4691    /// Duplicate @struct declarations: second overwrites first
4692    #[test]
4693    fn schema_name_shadows_builtin_duplicate_struct_decl() {
4694        let tl_text = "\
4695@struct int (x: int)
4696@struct int (int: int)
4697@struct root (val: int)
4698
4699root: @table root [
4700  ((42))
4701]
4702";
4703        let result = TeaLeaf::parse(tl_text);
4704        match &result {
4705            Ok(doc) => {
4706                let json = doc.to_json().unwrap();
4707                eprintln!("=== JSON ===\n{json}");
4708                eprintln!("=== schemas ===");
4709                for (name, schema) in &doc.schemas {
4710                    let fields: Vec<String> = schema.fields.iter()
4711                        .map(|f| format!("{}: {}", f.name, f.field_type.base))
4712                        .collect();
4713                    eprintln!("  @struct {name} ({})", fields.join(", "));
4714                }
4715            }
4716            Err(e) => {
4717                eprintln!("=== parse error ===\n{e}");
4718            }
4719        }
4720        // Assert that parsing succeeds
4721        result.unwrap();
4722    }
4723
4724    /// Multiple built-in-named schemas in the same document
4725    #[test]
4726    fn schema_name_shadows_multiple_builtins() {
4727        let input = r#"[{"bool":{"a":1},"int":{"b":"hello"},"float":{"c":true}}]"#;
4728        let tl = TeaLeaf::from_json_with_schemas(input).unwrap();
4729        let tl_text = tl.to_tl_with_schemas();
4730
4731        assert!(tl_text.contains("@struct bool"), "missing @struct bool");
4732        assert!(tl_text.contains("@struct int"), "missing @struct int");
4733        assert!(tl_text.contains("@struct float"), "missing @struct float");
4734
4735        let reparsed = TeaLeaf::parse(&tl_text)
4736            .unwrap_or_else(|e| panic!("re-parse failed: {e}\nTL text:\n{tl_text}"));
4737
4738        for (key, orig_val) in &tl.data {
4739            let re_val = reparsed.data.get(key)
4740                .unwrap_or_else(|| panic!("lost key '{key}'"));
4741            assert_eq!(orig_val, re_val, "value mismatch for key '{key}'");
4742        }
4743    }
4744
4745
4746    /// Fuzz crash: singularize("s") → "" (empty string), producing invalid
4747    /// @struct definitions with missing names.
4748    #[test]
4749    fn fuzz_repro_singularize_single_char_s() {
4750        let input = r#"[{"s":{"b":1}}]"#;
4751        let tl = TeaLeaf::from_json_with_schemas(input).unwrap();
4752        let tl_text = tl.to_tl_with_schemas();
4753
4754        // Schema name must not be empty — singularize("s") should return "s"
4755        assert!(
4756            tl_text.contains("@struct s"),
4757            "expected @struct s in TL text:\n{tl_text}"
4758        );
4759
4760        let reparsed = TeaLeaf::parse(&tl_text)
4761            .unwrap_or_else(|e| panic!("Re-parse failed: {e}\nTL text:\n{tl_text}"));
4762        assert_eq!(tl.data.len(), reparsed.data.len(), "key count mismatch");
4763        for (key, orig_val) in &tl.data {
4764            let re_val = reparsed.data.get(key).unwrap_or_else(|| panic!("lost key '{key}'"));
4765            assert_eq!(orig_val, re_val, "value mismatch for key '{key}'");
4766        }
4767    }
4768
4769    #[test]
4770    fn singularize_does_not_produce_empty_string() {
4771        // All single-character inputs must pass through unchanged
4772        for c in 'a'..='z' {
4773            let s = String::from(c);
4774            let result = super::singularize(&s);
4775            assert!(!result.is_empty(), "singularize({s:?}) produced empty string");
4776            assert_eq!(result, s, "singularize({s:?}) should return {s:?}, got {result:?}");
4777        }
4778    }
4779
4780    /// Fuzz crash: field name with dots causes value mismatch on roundtrip
4781    #[test]
4782    fn fuzz_repro_dots_in_field_name() {
4783        // Fuzz regression: field "root" inside root-array wrapper both singularize to "root",
4784        // causing analyze_nested_objects to create a correct inner schema that analyze_array
4785        // then overwrites with a self-referencing @struct root (root: root).
4786        let input = r#"[{"root":{"Z.lll.i0...A":44444440.0}}]"#;
4787        let tl = TeaLeaf::from_json_with_schemas(input).unwrap();
4788        let tl_text = tl.to_tl_with_schemas();
4789        let reparsed = TeaLeaf::parse(&tl_text)
4790            .unwrap_or_else(|e| panic!("Re-parse failed: {e}\nTL text:\n{tl_text}"));
4791        assert_eq!(tl.data.len(), reparsed.data.len(), "key count mismatch");
4792        for (key, orig_val) in &tl.data {
4793            let re_val = reparsed.data.get(key).unwrap_or_else(|| panic!("lost key '{key}'"));
4794            assert_eq!(orig_val, re_val, "value mismatch for key '{key}'");
4795        }
4796    }
4797
4798    #[test]
4799    fn schema_name_collision_field_matches_parent() {
4800        // When an array field name singularizes to the same name as its parent schema,
4801        // the inner schema should be preserved (not overwritten with a self-reference).
4802        // This tests the general case, not just the root-array wrapper collision.
4803        let input = r#"{"items": [{"items": {"a": 1, "b": 2}}]}"#;
4804        let tl = TeaLeaf::from_json_with_schemas(input).unwrap();
4805        let tl_text = tl.to_tl_with_schemas();
4806        let reparsed = TeaLeaf::parse(&tl_text)
4807            .unwrap_or_else(|e| panic!("Re-parse failed: {e}\nTL text:\n{tl_text}"));
4808        for (key, orig_val) in &tl.data {
4809            let re_val = reparsed.data.get(key).unwrap_or_else(|| panic!("lost key '{key}'"));
4810            assert_eq!(orig_val, re_val, "value mismatch for key '{key}'");
4811        }
4812    }
4813
4814    #[test]
4815    fn analyze_node_nesting_stress_test() {
4816        // Stress test: "node" appears at many nesting levels with different shapes.
4817        // Schema inference should NOT create conflicting schemas or lose data.
4818        let input = r#"{
4819          "node": {
4820            "id": 1,
4821            "name": "root",
4822            "active": true,
4823            "node": {
4824              "id": "child-1",
4825              "metrics": {
4826                "node": {
4827                  "value": 42.7,
4828                  "unit": "ms",
4829                  "thresholds": [10, 20, 30]
4830                }
4831              },
4832              "node": [
4833                {
4834                  "id": 2,
4835                  "enabled": false
4836                },
4837                {
4838                  "id": 3,
4839                  "enabled": "sometimes",
4840                  "node": {
4841                    "status": null,
4842                    "confidence": 0.93
4843                  }
4844                }
4845              ]
4846            }
4847          },
4848          "nodeMetadata": {
4849            "node": {
4850              "version": 5,
4851              "checksum": "a94a8fe5ccb19ba61c4c0873d391e987",
4852              "flags": {
4853                "node": true
4854              }
4855            }
4856          }
4857        }"#;
4858
4859        let tl = TeaLeaf::from_json_with_schemas(input).unwrap();
4860        eprintln!("=== schemas ({}) ===", tl.schemas.len());
4861        for (name, schema) in &tl.schemas {
4862            let fields: Vec<String> = schema.fields.iter()
4863                .map(|f| format!("{}: {}{}{}", f.name, f.field_type.base,
4864                    if f.field_type.is_array { "[]" } else { "" },
4865                    if f.field_type.nullable { "?" } else { "" }))
4866                .collect();
4867            eprintln!("  @struct {name} ({})", fields.join(", "));
4868        }
4869        let tl_text = tl.to_tl_with_schemas();
4870        eprintln!("=== TL text ===\n{tl_text}");
4871
4872        // Core correctness check: round-trip must preserve all data
4873        let reparsed = TeaLeaf::parse(&tl_text)
4874            .unwrap_or_else(|e| panic!("Re-parse failed: {e}\nTL text:\n{tl_text}"));
4875        for (key, orig_val) in &tl.data {
4876            let re_val = reparsed.data.get(key).unwrap_or_else(|| panic!("lost key '{key}'"));
4877            assert_eq!(orig_val, re_val, "value mismatch for key '{key}'");
4878        }
4879    }
4880
4881    #[test]
4882    fn schema_collision_recursive_arrays() {
4883        // "nodes" appears as arrays at two levels with different shapes.
4884        // Inner: [{name, value}], Outer: [{name, nodes}]
4885        // Both singularize to "node" — only one schema can exist.
4886        let input = r#"{
4887          "nodes": [
4888            {
4889              "name": "parent",
4890              "nodes": [
4891                {"name": "child", "value": 42}
4892              ]
4893            }
4894          ]
4895        }"#;
4896        let tl = TeaLeaf::from_json_with_schemas(input).unwrap();
4897        eprintln!("=== schemas ({}) ===", tl.schemas.len());
4898        for (name, schema) in &tl.schemas {
4899            let fields: Vec<String> = schema.fields.iter()
4900                .map(|f| format!("{}: {}{}{}", f.name, f.field_type.base,
4901                    if f.field_type.is_array { "[]" } else { "" },
4902                    if f.field_type.nullable { "?" } else { "" }))
4903                .collect();
4904            eprintln!("  @struct {name} ({})", fields.join(", "));
4905        }
4906        let tl_text = tl.to_tl_with_schemas();
4907        eprintln!("=== TL text ===\n{tl_text}");
4908        let reparsed = TeaLeaf::parse(&tl_text)
4909            .unwrap_or_else(|e| panic!("Re-parse failed: {e}\nTL text:\n{tl_text}"));
4910        for (key, orig_val) in &tl.data {
4911            let re_val = reparsed.data.get(key).unwrap_or_else(|| panic!("lost key '{key}'"));
4912            assert_eq!(orig_val, re_val, "value mismatch for key '{key}'");
4913        }
4914    }
4915
4916    #[test]
4917    fn schema_collision_recursive_same_shape() {
4918        // "nodes" appears at two levels but SAME shape [{id, name}].
4919        // Schema "node" created for inner array should also work for outer.
4920        let input = r#"{
4921          "nodes": [
4922            {
4923              "id": 1,
4924              "name": "parent",
4925              "children": [
4926                {"id": 10, "name": "child-a"},
4927                {"id": 11, "name": "child-b"}
4928              ]
4929            },
4930            {
4931              "id": 2,
4932              "name": "sibling",
4933              "children": [
4934                {"id": 20, "name": "child-c"}
4935              ]
4936            }
4937          ]
4938        }"#;
4939        let tl = TeaLeaf::from_json_with_schemas(input).unwrap();
4940        eprintln!("=== schemas ({}) ===", tl.schemas.len());
4941        for (name, schema) in &tl.schemas {
4942            let fields: Vec<String> = schema.fields.iter()
4943                .map(|f| format!("{}: {}{}{}", f.name, f.field_type.base,
4944                    if f.field_type.is_array { "[]" } else { "" },
4945                    if f.field_type.nullable { "?" } else { "" }))
4946                .collect();
4947            eprintln!("  @struct {name} ({})", fields.join(", "));
4948        }
4949        let tl_text = tl.to_tl_with_schemas();
4950        eprintln!("=== TL text ===\n{tl_text}");
4951        let reparsed = TeaLeaf::parse(&tl_text)
4952            .unwrap_or_else(|e| panic!("Re-parse failed: {e}\nTL text:\n{tl_text}"));
4953        for (key, orig_val) in &tl.data {
4954            let re_val = reparsed.data.get(key).unwrap_or_else(|| panic!("lost key '{key}'"));
4955            assert_eq!(orig_val, re_val, "value mismatch for key '{key}'");
4956        }
4957    }
4958
4959    #[test]
4960    fn schema_collision_three_level_nesting() {
4961        // "nodes" at 3 levels: L1 and L2 have same shape {name, nodes},
4962        // L3 has different shape {name, score}. All singularize to "node".
4963        // The deepest schema wins (depth-first); outer levels fall back to
4964        // generic format. No data loss at any level.
4965        let input = r#"{
4966          "nodes": [
4967            {
4968              "name": "grandparent",
4969              "nodes": [
4970                {
4971                  "name": "parent",
4972                  "nodes": [
4973                    {"name": "leaf-a", "score": 99.5},
4974                    {"name": "leaf-b", "score": 42.0}
4975                  ]
4976                }
4977              ]
4978            },
4979            {
4980              "name": "uncle",
4981              "nodes": [
4982                {
4983                  "name": "cousin",
4984                  "nodes": [
4985                    {"name": "leaf-c", "score": 77.3}
4986                  ]
4987                }
4988              ]
4989            }
4990          ]
4991        }"#;
4992
4993        let tl = TeaLeaf::from_json_with_schemas(input).unwrap();
4994        eprintln!("=== schemas ({}) ===", tl.schemas.len());
4995        for (name, schema) in &tl.schemas {
4996            let fields: Vec<String> = schema.fields.iter()
4997                .map(|f| format!("{}: {}{}{}", f.name, f.field_type.base,
4998                    if f.field_type.is_array { "[]" } else { "" },
4999                    if f.field_type.nullable { "?" } else { "" }))
5000                .collect();
5001            eprintln!("  @struct {name} ({})", fields.join(", "));
5002        }
5003        let tl_text = tl.to_tl_with_schemas();
5004        eprintln!("=== TL text ===\n{tl_text}");
5005
5006        let reparsed = TeaLeaf::parse(&tl_text)
5007            .unwrap_or_else(|e| panic!("Re-parse failed: {e}\nTL text:\n{tl_text}"));
5008        for (key, orig_val) in &tl.data {
5009            let re_val = reparsed.data.get(key).unwrap_or_else(|| panic!("lost key '{key}'"));
5010            assert_eq!(orig_val, re_val, "value mismatch for key '{key}'");
5011        }
5012    }
5013
5014    #[test]
5015    fn schema_collision_three_level_divergent_leaves() {
5016        // L1: [{name, nodes}], L2: [{name, nodes}] (same shape),
5017        // L3: [{id, value}] in one branch, [{identifier, points}] in another.
5018        // The depth-first analysis only sees the first branch's L3 shape.
5019        // The second branch's L3 must fall back to generic format.
5020        let input = r#"{
5021          "nodes": [
5022            {
5023              "name": "grandparent",
5024              "nodes": [
5025                {
5026                  "name": "parent",
5027                  "nodes": [
5028                    {"id": "leaf-a", "value": 99.5},
5029                    {"id": "leaf-b", "value": 42.0}
5030                  ]
5031                }
5032              ]
5033            },
5034            {
5035              "name": "uncle",
5036              "nodes": [
5037                {
5038                  "name": "cousin",
5039                  "nodes": [
5040                    {"identifier": "leaf-c", "points": 77.3}
5041                  ]
5042                }
5043              ]
5044            }
5045          ]
5046        }"#;
5047
5048        let tl = TeaLeaf::from_json_with_schemas(input).unwrap();
5049        eprintln!("=== schemas ({}) ===", tl.schemas.len());
5050        for (name, schema) in &tl.schemas {
5051            let fields: Vec<String> = schema.fields.iter()
5052                .map(|f| format!("{}: {}{}{}", f.name, f.field_type.base,
5053                    if f.field_type.is_array { "[]" } else { "" },
5054                    if f.field_type.nullable { "?" } else { "" }))
5055                .collect();
5056            eprintln!("  @struct {name} ({})", fields.join(", "));
5057        }
5058        let tl_text = tl.to_tl_with_schemas();
5059        eprintln!("=== TL text ===\n{tl_text}");
5060
5061        let reparsed = TeaLeaf::parse(&tl_text)
5062            .unwrap_or_else(|e| panic!("Re-parse failed: {e}\nTL text:\n{tl_text}"));
5063        for (key, orig_val) in &tl.data {
5064            let re_val = reparsed.data.get(key).unwrap_or_else(|| panic!("lost key '{key}'"));
5065            assert_eq!(orig_val, re_val, "value mismatch for key '{key}'");
5066        }
5067    }
5068
5069    #[test]
5070    fn json_inference_nested_array_inside_object() {
5071        // JSON inference must discover array schemas inside nested objects.
5072        // e.g., items[].product.stock[] should get its own @struct stock schema,
5073        // not fall back to []any.
5074        let input = r#"{
5075          "items": [
5076            {
5077              "name": "Widget",
5078              "product": {
5079                "id": "P-1",
5080                "stock": [
5081                  {"warehouse": "W1", "qty": 100, "backordered": false},
5082                  {"warehouse": "W2", "qty": 50, "backordered": true}
5083                ]
5084              }
5085            },
5086            {
5087              "name": "Gadget",
5088              "product": {
5089                "id": "P-2",
5090                "stock": [
5091                  {"warehouse": "W1", "qty": 200, "backordered": false}
5092                ]
5093              }
5094            }
5095          ]
5096        }"#;
5097
5098        let tl = TeaLeaf::from_json_with_schemas(input).unwrap();
5099        let tl_text = tl.to_tl_with_schemas();
5100
5101        // Must have a "stock" schema (from singularize("stock") = "stock")
5102        assert!(tl.schemas.contains_key("stock"),
5103            "Missing 'stock' schema. Schemas: {:?}\nTL:\n{tl_text}",
5104            tl.schemas.keys().collect::<Vec<_>>());
5105
5106        // The product schema must reference stock[] not []any
5107        let product_schema = tl.schemas.get("product").expect("missing product schema");
5108        let stock_field = product_schema.fields.iter().find(|f| f.name == "stock")
5109            .expect("product schema missing stock field");
5110        assert!(stock_field.field_type.is_array, "stock should be array");
5111        assert_eq!(stock_field.field_type.base, "stock",
5112            "stock field type should be 'stock', got '{}'", stock_field.field_type.base);
5113
5114        // Must produce @table for items and tuples for stock inside product
5115        assert!(tl_text.contains("@table item"), "Missing @table item:\n{tl_text}");
5116
5117        // Round-trip: parse back and verify data integrity
5118        let reparsed = TeaLeaf::parse(&tl_text)
5119            .unwrap_or_else(|e| panic!("Re-parse failed: {e}\nTL:\n{tl_text}"));
5120        for (key, orig_val) in &tl.data {
5121            let re_val = reparsed.data.get(key).unwrap_or_else(|| panic!("lost key '{key}'"));
5122            assert_eq!(orig_val, re_val, "value mismatch for key '{key}'");
5123        }
5124    }
5125
5126    // ── Compact formatting tests ──────────────────────────────────────
5127
5128    #[test]
5129    fn test_dumps_compact_basic() {
5130        let mut data = IndexMap::new();
5131        data.insert("name".to_string(), Value::String("alice".to_string()));
5132        data.insert("age".to_string(), Value::Int(30));
5133        let output = dumps_compact(&data);
5134        assert!(output.contains("name:alice\n"), "got: {output}");
5135        assert!(output.contains("age:30\n"), "got: {output}");
5136    }
5137
5138    #[test]
5139    fn test_dumps_compact_array() {
5140        let mut data = IndexMap::new();
5141        data.insert("items".to_string(), Value::Array(vec![
5142            Value::Int(1), Value::Int(2), Value::Int(3),
5143        ]));
5144        let output = dumps_compact(&data);
5145        assert!(output.contains("[1,2,3]"), "got: {output}");
5146    }
5147
5148    #[test]
5149    fn test_dumps_compact_object() {
5150        let mut data = IndexMap::new();
5151        let obj: IndexMap<String, Value> = vec![
5152            ("host".to_string(), Value::String("localhost".to_string())),
5153            ("port".to_string(), Value::Int(8080)),
5154        ].into_iter().collect();
5155        data.insert("config".to_string(), Value::Object(obj));
5156        let output = dumps_compact(&data);
5157        assert!(output.contains("{host:localhost,port:8080}"), "got: {output}");
5158    }
5159
5160    #[test]
5161    fn test_dumps_compact_map() {
5162        let mut data = IndexMap::new();
5163        data.insert("m".to_string(), Value::Map(vec![
5164            (Value::Int(1), Value::String("one".to_string())),
5165            (Value::Int(2), Value::String("two".to_string())),
5166        ]));
5167        let output = dumps_compact(&data);
5168        assert!(output.contains("@map{1:one,2:two}"), "got: {output}");
5169    }
5170
5171    #[test]
5172    fn test_dumps_compact_tagged_keeps_space() {
5173        let mut data = IndexMap::new();
5174        data.insert("val".to_string(), Value::Tagged(
5175            "ok".to_string(), Box::new(Value::Int(200)),
5176        ));
5177        let output = dumps_compact(&data);
5178        assert!(output.contains(":ok 200"), "Space after :tag must be kept (tag/value would merge), got: {output}");
5179    }
5180
5181    #[test]
5182    fn test_compact_struct_definition() {
5183        let json = r#"{"users": [{"id": 1, "name": "alice"}, {"id": 2, "name": "bob"}]}"#;
5184        let doc = TeaLeaf::from_json_with_schemas(json).unwrap();
5185        let compact = doc.to_tl_with_schemas_compact();
5186        // Struct def should have no space before ( and no spaces after ,
5187        assert!(compact.contains("@struct user("), "got: {compact}");
5188        assert!(compact.contains("id:int"), "got: {compact}");
5189        // Table should have no space before [
5190        assert!(compact.contains("@table user["), "got: {compact}");
5191        // No indentation on table rows
5192        assert!(compact.contains("\n("), "rows should start at column 0, got: {compact}");
5193        assert!(!compact.contains("  ("), "no indentation in compact, got: {compact}");
5194        // No blank line between definitions and data
5195        assert!(!compact.contains(")\n\n"), "no blank line after struct def, got: {compact}");
5196    }
5197
5198    #[test]
5199    fn test_compact_is_smaller_than_pretty() {
5200        let json = r#"{"users": [{"id": 1, "name": "alice"}, {"id": 2, "name": "bob"}]}"#;
5201        let doc = TeaLeaf::from_json_with_schemas(json).unwrap();
5202        let pretty = doc.to_tl_with_schemas();
5203        let compact = doc.to_tl_with_schemas_compact();
5204        assert!(
5205            compact.len() < pretty.len(),
5206            "Compact ({}) should be smaller than pretty ({})\nCompact:\n{compact}\nPretty:\n{pretty}",
5207            compact.len(), pretty.len()
5208        );
5209    }
5210
5211    #[test]
5212    fn test_compact_roundtrip() {
5213        // Compact output must re-parse to the same data
5214        let json = r#"{
5215            "company": "FastTrack Logistics",
5216            "shipments": [
5217                {"id": "S1", "origin": "Los Angeles, CA", "weight": 250, "cost": 450.0, "delivered": true},
5218                {"id": "S2", "origin": "Chicago, IL", "weight": 180, "cost": 320.0, "delivered": false}
5219            ]
5220        }"#;
5221        let doc = TeaLeaf::from_json_with_schemas(json).unwrap();
5222        let compact = doc.to_tl_with_schemas_compact();
5223        let reparsed = TeaLeaf::parse(&compact)
5224            .unwrap_or_else(|e| panic!("Failed to re-parse compact: {e}\nCompact:\n{compact}"));
5225
5226        let json1 = doc.to_json().unwrap();
5227        let json2 = reparsed.to_json().unwrap();
5228        let v1: serde_json::Value = serde_json::from_str(&json1).unwrap();
5229        let v2: serde_json::Value = serde_json::from_str(&json2).unwrap();
5230        assert_eq!(v1, v2, "Compact round-trip data mismatch");
5231    }
5232
5233    #[test]
5234    fn test_compact_preserves_quoted_strings() {
5235        // Strings with spaces must keep their quotes and content intact
5236        let json = r#"{"items": [{"city": "New York, NY", "name": "Alice Smith"}]}"#;
5237        let doc = TeaLeaf::from_json_with_schemas(json).unwrap();
5238        let compact = doc.to_tl_with_schemas_compact();
5239        assert!(compact.contains("\"New York, NY\""), "Quoted string must be preserved, got: {compact}");
5240        assert!(compact.contains("\"Alice Smith\""), "Quoted string must be preserved, got: {compact}");
5241    }
5242
5243    #[test]
5244    fn test_compact_root_array_single_newline() {
5245        let json = r#"[1, 2, 3]"#;
5246        let doc = TeaLeaf::from_json(json).unwrap();
5247        let compact = doc.to_tl_with_schemas_compact();
5248        assert!(compact.starts_with("@root-array\n"), "got: {compact}");
5249        assert!(!compact.starts_with("@root-array\n\n"), "Should not have double newline in compact, got: {compact}");
5250    }
5251
5252    #[test]
5253    fn test_compact_no_schemas_path() {
5254        // Documents without schemas should also compact correctly
5255        let mut data = IndexMap::new();
5256        let obj: IndexMap<String, Value> = vec![
5257            ("x".to_string(), Value::Int(1)),
5258            ("y".to_string(), Value::Int(2)),
5259        ].into_iter().collect();
5260        data.insert("point".to_string(), Value::Object(obj));
5261        data.insert("label".to_string(), Value::String("origin".to_string()));
5262        let doc = TeaLeaf {
5263            schemas: IndexMap::new(),
5264            unions: IndexMap::new(),
5265            data,
5266            is_root_array: false,
5267        };
5268        let compact = doc.to_tl_with_schemas_compact();
5269        assert!(compact.contains("point:{x:1,y:2}"), "got: {compact}");
5270        assert!(compact.contains("label:origin"), "got: {compact}");
5271    }
5272
5273    #[test]
5274    fn test_compact_canonical_roundtrip() {
5275        // Verify compact output round-trips for all canonical samples
5276        let canonical_dir = std::path::Path::new(env!("CARGO_MANIFEST_DIR")).join("../canonical/samples");
5277        let samples = [
5278            "primitives", "arrays", "objects", "schemas", "timestamps",
5279            "unicode_escaping", "numbers_extended", "refs_tags_maps",
5280            "special_types", "unions", "mixed_schemas", "large_data", "quoted_keys",
5281        ];
5282        for name in &samples {
5283            let path = canonical_dir.join(format!("{}.tl", name));
5284            if !path.exists() { continue; }
5285            let doc = TeaLeaf::load(&path).unwrap();
5286            let compact = doc.to_tl_with_schemas_compact();
5287            let reparsed = TeaLeaf::parse(&compact)
5288                .unwrap_or_else(|e| panic!("Failed to re-parse compact {name}: {e}\nCompact:\n{compact}"));
5289            let json1 = doc.to_json().unwrap();
5290            let json2 = reparsed.to_json().unwrap();
5291            let v1: serde_json::Value = serde_json::from_str(&json1).unwrap();
5292            let v2: serde_json::Value = serde_json::from_str(&json2).unwrap();
5293            assert_eq!(v1, v2, "Compact round-trip failed for {name}");
5294        }
5295    }
5296
5297    #[test]
5298    fn test_schema_inference_with_at_prefixed_keys() {
5299        // JSON-LD style @type keys should trigger schema inference with quoted field names
5300        let json = r#"{"records": [
5301            {"@type": "MCAP", "name": "alpha"},
5302            {"@type": "DCAT", "name": "beta"}
5303        ]}"#;
5304        let doc = TeaLeaf::from_json_with_schemas(json).unwrap();
5305        let tl_text = doc.to_tl_with_schemas();
5306
5307        // Should have inferred a schema with "@type" quoted
5308        assert!(tl_text.contains("@struct"), "Should infer a schema: {}", tl_text);
5309        assert!(tl_text.contains("\"@type\""), "Field @type should be quoted in schema: {}", tl_text);
5310        assert!(tl_text.contains("@table"), "Should use @table encoding: {}", tl_text);
5311    }
5312
5313    #[test]
5314    fn test_schema_inference_quoted_field_roundtrip() {
5315        // Full JSON -> TL -> JSON roundtrip with @type keys
5316        let json = r#"{"records": [
5317            {"@type": "MCAP", "accessLevel": "public"},
5318            {"@type": "DCAT", "accessLevel": "restricted"}
5319        ]}"#;
5320        let doc = TeaLeaf::from_json_with_schemas(json).unwrap();
5321        let tl_text = doc.to_tl_with_schemas();
5322
5323        // Parse TL back and convert to JSON
5324        let reparsed = TeaLeaf::parse(&tl_text)
5325            .unwrap_or_else(|e| panic!("Failed to re-parse TL with quoted fields: {e}\nTL:\n{tl_text}"));
5326        let json_out = reparsed.to_json().unwrap();
5327
5328        let v1: serde_json::Value = serde_json::from_str(json).unwrap();
5329        let v2: serde_json::Value = serde_json::from_str(&json_out).unwrap();
5330        assert_eq!(v1, v2, "Roundtrip failed.\nTL:\n{tl_text}\nJSON out:\n{json_out}");
5331    }
5332
5333    #[test]
5334    fn test_schema_inference_skips_when_schema_name_needs_quoting() {
5335        // When the inferred schema name itself would need quoting, skip inference
5336        let json = r#"{"@items": [{"name": "x"}, {"name": "y"}]}"#;
5337        let doc = TeaLeaf::from_json_with_schemas(json).unwrap();
5338        let tl_text = doc.to_tl_with_schemas();
5339
5340        // Should NOT have inferred a schema because "@items" -> "@item" needs quoting
5341        assert!(!tl_text.contains("@struct"), "Should NOT infer schema when name needs quoting: {}", tl_text);
5342        assert!(!tl_text.contains("@table"), "Should NOT use @table when name needs quoting: {}", tl_text);
5343    }
5344
5345    #[test]
5346    fn test_schema_inference_root_array_with_at_keys() {
5347        // Root-level array with @type keys should also get schema inference
5348        let json = r#"[
5349            {"@type": "MCAP", "issued": "2026-01-27"},
5350            {"@type": "DCAT", "issued": "2026-02-01"}
5351        ]"#;
5352        let doc = TeaLeaf::from_json_with_schemas(json).unwrap();
5353        let tl_text = doc.to_tl_with_schemas();
5354
5355        assert!(tl_text.contains("@struct"), "Root array should infer schema: {}", tl_text);
5356        assert!(tl_text.contains("\"@type\""), "Field @type should be quoted: {}", tl_text);
5357
5358        // Roundtrip
5359        let reparsed = TeaLeaf::parse(&tl_text)
5360            .unwrap_or_else(|e| panic!("Failed to re-parse: {e}\nTL:\n{tl_text}"));
5361        let json_out = reparsed.to_json().unwrap();
5362        let v1: serde_json::Value = serde_json::from_str(json).unwrap();
5363        let v2: serde_json::Value = serde_json::from_str(&json_out).unwrap();
5364        assert_eq!(v1, v2, "Root array roundtrip failed");
5365    }
5366
5367    #[test]
5368    fn test_schema_inference_dollar_prefixed_keys() {
5369        // JSON Schema / OpenAPI style $ref, $id, $schema keys
5370        let json = r##"{"definitions": [
5371            {"$ref": "#/components/User", "$id": "def1", "name": "UserRef"},
5372            {"$ref": "#/components/Order", "$id": "def2", "name": "OrderRef"}
5373        ]}"##;
5374        let doc = TeaLeaf::from_json_with_schemas(json).unwrap();
5375        let tl_text = doc.to_tl_with_schemas();
5376
5377        assert!(tl_text.contains("@struct"), "Should infer schema with $-prefixed keys: {}", tl_text);
5378        assert!(tl_text.contains("\"$ref\""), "$ref should be quoted: {}", tl_text);
5379        assert!(tl_text.contains("\"$id\""), "$id should be quoted: {}", tl_text);
5380
5381        // Roundtrip
5382        let reparsed = TeaLeaf::parse(&tl_text)
5383            .unwrap_or_else(|e| panic!("Failed to re-parse: {e}\nTL:\n{tl_text}"));
5384        let json_out = reparsed.to_json().unwrap();
5385        let v1: serde_json::Value = serde_json::from_str(json).unwrap();
5386        let v2: serde_json::Value = serde_json::from_str(&json_out).unwrap();
5387        assert_eq!(v1, v2, "Roundtrip failed for $-prefixed keys");
5388    }
5389
5390    #[test]
5391    fn test_schema_inference_hash_prefixed_keys() {
5392        // XML-to-JSON style #text, #cdata keys
5393        let json = r##"{"nodes": [
5394            {"#text": "Hello world", "tag": "p", "#comment": "intro"},
5395            {"#text": "Goodbye", "tag": "span", "#comment": "outro"}
5396        ]}"##;
5397        let doc = TeaLeaf::from_json_with_schemas(json).unwrap();
5398        let tl_text = doc.to_tl_with_schemas();
5399
5400        assert!(tl_text.contains("@struct"), "Should infer schema with #-prefixed keys: {}", tl_text);
5401        assert!(tl_text.contains("\"#text\""), "#text should be quoted: {}", tl_text);
5402        assert!(tl_text.contains("\"#comment\""), "#comment should be quoted: {}", tl_text);
5403
5404        // Roundtrip
5405        let reparsed = TeaLeaf::parse(&tl_text)
5406            .unwrap_or_else(|e| panic!("Failed to re-parse: {e}\nTL:\n{tl_text}"));
5407        let json_out = reparsed.to_json().unwrap();
5408        let v1: serde_json::Value = serde_json::from_str(json).unwrap();
5409        let v2: serde_json::Value = serde_json::from_str(&json_out).unwrap();
5410        assert_eq!(v1, v2, "Roundtrip failed for #-prefixed keys");
5411    }
5412
5413    #[test]
5414    fn test_schema_inference_colon_in_keys() {
5415        // XML namespace style keys like xsi:type, dc:title
5416        let json = r#"{"elements": [
5417            {"xsi:type": "string", "dc:title": "Document A", "id": 1},
5418            {"xsi:type": "int", "dc:title": "Document B", "id": 2}
5419        ]}"#;
5420        let doc = TeaLeaf::from_json_with_schemas(json).unwrap();
5421        let tl_text = doc.to_tl_with_schemas();
5422
5423        assert!(tl_text.contains("@struct"), "Should infer schema with colon keys: {}", tl_text);
5424        assert!(tl_text.contains("\"xsi:type\""), "xsi:type should be quoted: {}", tl_text);
5425        assert!(tl_text.contains("\"dc:title\""), "dc:title should be quoted: {}", tl_text);
5426
5427        // Roundtrip
5428        let reparsed = TeaLeaf::parse(&tl_text)
5429            .unwrap_or_else(|e| panic!("Failed to re-parse: {e}\nTL:\n{tl_text}"));
5430        let json_out = reparsed.to_json().unwrap();
5431        let v1: serde_json::Value = serde_json::from_str(json).unwrap();
5432        let v2: serde_json::Value = serde_json::from_str(&json_out).unwrap();
5433        assert_eq!(v1, v2, "Roundtrip failed for colon keys");
5434    }
5435
5436    #[test]
5437    fn test_schema_inference_odata_keys() {
5438        // OData style @odata.type, @odata.id keys
5439        let json = r##"{"results": [
5440            {"@odata.type": "#Microsoft.Graph.User", "@odata.id": "users/1", "displayName": "Alice"},
5441            {"@odata.type": "#Microsoft.Graph.User", "@odata.id": "users/2", "displayName": "Bob"}
5442        ]}"##;
5443        let doc = TeaLeaf::from_json_with_schemas(json).unwrap();
5444        let tl_text = doc.to_tl_with_schemas();
5445
5446        assert!(tl_text.contains("@struct"), "Should infer schema with OData keys: {}", tl_text);
5447        assert!(tl_text.contains("\"@odata.type\""), "@odata.type should be quoted: {}", tl_text);
5448        assert!(tl_text.contains("\"@odata.id\""), "@odata.id should be quoted: {}", tl_text);
5449
5450        // Roundtrip
5451        let reparsed = TeaLeaf::parse(&tl_text)
5452            .unwrap_or_else(|e| panic!("Failed to re-parse: {e}\nTL:\n{tl_text}"));
5453        let json_out = reparsed.to_json().unwrap();
5454        let v1: serde_json::Value = serde_json::from_str(json).unwrap();
5455        let v2: serde_json::Value = serde_json::from_str(&json_out).unwrap();
5456        assert_eq!(v1, v2, "Roundtrip failed for OData keys");
5457    }
5458
5459    #[test]
5460    fn test_schema_inference_uri_keys() {
5461        // RDF/JSON style with full URI keys
5462        let json = r#"{"triples": [
5463            {"http://schema.org/name": "Alice", "http://schema.org/age": "30", "id": "s1"},
5464            {"http://schema.org/name": "Bob", "http://schema.org/age": "25", "id": "s2"}
5465        ]}"#;
5466        let doc = TeaLeaf::from_json_with_schemas(json).unwrap();
5467        let tl_text = doc.to_tl_with_schemas();
5468
5469        assert!(tl_text.contains("@struct"), "Should infer schema with URI keys: {}", tl_text);
5470        assert!(tl_text.contains("\"http://schema.org/name\""), "URI key should be quoted: {}", tl_text);
5471
5472        // Roundtrip
5473        let reparsed = TeaLeaf::parse(&tl_text)
5474            .unwrap_or_else(|e| panic!("Failed to re-parse: {e}\nTL:\n{tl_text}"));
5475        let json_out = reparsed.to_json().unwrap();
5476        let v1: serde_json::Value = serde_json::from_str(json).unwrap();
5477        let v2: serde_json::Value = serde_json::from_str(&json_out).unwrap();
5478        assert_eq!(v1, v2, "Roundtrip failed for URI keys");
5479    }
5480
5481    #[test]
5482    fn test_schema_inference_space_in_keys() {
5483        // Keys with spaces (common in human-friendly exports, spreadsheet-to-JSON)
5484        let json = r#"{"rows": [
5485            {"First Name": "Alice", "Last Name": "Smith", "age": 30},
5486            {"First Name": "Bob", "Last Name": "Jones", "age": 25}
5487        ]}"#;
5488        let doc = TeaLeaf::from_json_with_schemas(json).unwrap();
5489        let tl_text = doc.to_tl_with_schemas();
5490
5491        assert!(tl_text.contains("@struct"), "Should infer schema with space keys: {}", tl_text);
5492        assert!(tl_text.contains("\"First Name\""), "Space key should be quoted: {}", tl_text);
5493        assert!(tl_text.contains("\"Last Name\""), "Space key should be quoted: {}", tl_text);
5494
5495        // Roundtrip
5496        let reparsed = TeaLeaf::parse(&tl_text)
5497            .unwrap_or_else(|e| panic!("Failed to re-parse: {e}\nTL:\n{tl_text}"));
5498        let json_out = reparsed.to_json().unwrap();
5499        let v1: serde_json::Value = serde_json::from_str(json).unwrap();
5500        let v2: serde_json::Value = serde_json::from_str(&json_out).unwrap();
5501        assert_eq!(v1, v2, "Roundtrip failed for space keys");
5502    }
5503
5504    #[test]
5505    fn test_schema_inference_mixed_special_keys() {
5506        // Mix of regular and special-character keys in one schema
5507        let json = r#"{"catalog": [
5508            {"@type": "Product", "$id": "p1", "name": "Widget", "sku:code": "W-100"},
5509            {"@type": "Product", "$id": "p2", "name": "Gadget", "sku:code": "G-200"}
5510        ]}"#;
5511        let doc = TeaLeaf::from_json_with_schemas(json).unwrap();
5512        let tl_text = doc.to_tl_with_schemas();
5513
5514        assert!(tl_text.contains("@struct"), "Should infer schema with mixed keys: {}", tl_text);
5515        assert!(tl_text.contains("\"@type\""), "@type should be quoted: {}", tl_text);
5516        assert!(tl_text.contains("\"$id\""), "$id should be quoted: {}", tl_text);
5517        assert!(tl_text.contains("\"sku:code\""), "sku:code should be quoted: {}", tl_text);
5518        // Regular key should NOT be quoted
5519        assert!(!tl_text.contains("\"name\""), "Regular key should not be quoted: {}", tl_text);
5520
5521        // Roundtrip
5522        let reparsed = TeaLeaf::parse(&tl_text)
5523            .unwrap_or_else(|e| panic!("Failed to re-parse: {e}\nTL:\n{tl_text}"));
5524        let json_out = reparsed.to_json().unwrap();
5525        let v1: serde_json::Value = serde_json::from_str(json).unwrap();
5526        let v2: serde_json::Value = serde_json::from_str(&json_out).unwrap();
5527        assert_eq!(v1, v2, "Roundtrip failed for mixed special keys");
5528    }
5529}