Skip to main content

tealeaf/
lib.rs

1//! TeaLeaf - Schema-aware data format
2//!
3//! # Example
4//!
5//! ```rust
6//! use tealeaf::{TeaLeaf, Value};
7//!
8//! let doc = TeaLeaf::parse(r#"
9//!     @struct user (id: int, name: string)
10//!     users: @table user [
11//!         (1, alice),
12//!         (2, bob),
13//!     ]
14//! "#).unwrap();
15//!
16//! let users = doc.get("users").unwrap();
17//! ```
18
19mod types;
20mod lexer;
21mod parser;
22mod writer;
23mod reader;
24pub mod convert;
25pub mod builder;
26
27pub use types::{Error, Result, TLType, FieldType, Field, Schema, Union, Variant, Value, ObjectMap, MAGIC, VERSION, VERSION_MAJOR, VERSION_MINOR, HEADER_SIZE, MAX_STRING_LENGTH, MAX_OBJECT_FIELDS, MAX_ARRAY_LENGTH};
28pub use indexmap::IndexMap;
29pub use lexer::{Lexer, Token, TokenKind};
30pub use parser::Parser;
31pub use writer::Writer;
32pub use reader::Reader;
33pub use convert::{ToTeaLeaf, FromTeaLeaf, ConvertError, ToTeaLeafExt};
34pub use builder::TeaLeafBuilder;
35
36// Re-export derive macros when the "derive" feature is enabled
37#[cfg(feature = "derive")]
38pub use tealeaf_derive::{ToTeaLeaf, FromTeaLeaf};
39
40use std::collections::HashSet;
41use std::path::Path;
42
43/// A parsed TeaLeaf document
44pub struct TeaLeaf {
45    pub schemas: IndexMap<String, Schema>,
46    pub unions: IndexMap<String, Union>,
47    pub data: IndexMap<String, Value>,
48    /// Tracks if the source JSON was a root-level array (for round-trip fidelity)
49    is_root_array: bool,
50}
51
52impl TeaLeaf {
53    /// Create a new TeaLeaf document from data and schemas.
54    ///
55    /// This constructor is primarily for programmatic document creation.
56    /// For parsing from formats, use `parse()`, `load()`, or `from_json()`.
57    pub fn new(schemas: IndexMap<String, Schema>, data: IndexMap<String, Value>) -> Self {
58        Self {
59            schemas,
60            unions: IndexMap::new(),
61            data,
62            is_root_array: false,
63        }
64    }
65
66    /// Parse TeaLeaf text format
67    pub fn parse(input: &str) -> Result<Self> {
68        let tokens = Lexer::new(input).tokenize()?;
69        let mut parser = Parser::new(tokens);
70        let data = parser.parse()?;
71        let is_root_array = parser.is_root_array();
72        let (schemas, unions) = parser.into_schemas_and_unions();
73        Ok(Self {
74            schemas,
75            unions,
76            data,
77            is_root_array,
78        })
79    }
80
81    /// Load from text file
82    ///
83    /// Include paths are resolved relative to the loaded file's directory.
84    pub fn load<P: AsRef<Path>>(path: P) -> Result<Self> {
85        let path = path.as_ref();
86        let content = std::fs::read_to_string(path)?;
87        let tokens = Lexer::new(&content).tokenize()?;
88        let mut parser = Parser::new(tokens).with_base_path(path);
89        let data = parser.parse()?;
90        let is_root_array = parser.is_root_array();
91        let (schemas, unions) = parser.into_schemas_and_unions();
92        Ok(Self {
93            schemas,
94            unions,
95            data,
96            is_root_array,
97        })
98    }
99
100    /// Get a value by key
101    pub fn get(&self, key: &str) -> Option<&Value> {
102        self.data.get(key)
103    }
104
105    /// Get a schema by name
106    pub fn schema(&self, name: &str) -> Option<&Schema> {
107        self.schemas.get(name)
108    }
109
110    /// Get a union by name
111    pub fn union(&self, name: &str) -> Option<&Union> {
112        self.unions.get(name)
113    }
114
115    /// Compile to binary format
116    pub fn compile<P: AsRef<Path>>(&self, path: P, compress: bool) -> Result<()> {
117        let mut writer = Writer::new();
118        writer.set_root_array(self.is_root_array);
119        for (_, schema) in &self.schemas {
120            writer.add_schema(schema.clone());
121        }
122        for (_, union_def) in &self.unions {
123            writer.add_union(union_def.clone());
124        }
125        for (key, value) in &self.data {
126            let schema = self.find_schema_for_value(value, key);
127            writer.add_section(key, value, schema)?;
128        }
129        writer.write(path, compress)
130    }
131
132    fn find_schema_for_value(&self, value: &Value, key: &str) -> Option<&Schema> {
133        // Try to find a matching schema for array values
134        if let Value::Array(arr) = value {
135            if arr.is_empty() {
136                // For empty arrays, try name-based matching (singularize key → schema name)
137                let singular = singularize(key);
138                return self.schemas.values().find(|s| s.name.eq_ignore_ascii_case(&singular));
139            }
140
141            // Sample multiple elements: first, middle, last
142            let sample_indices: Vec<usize> = {
143                let mut indices = vec![0];
144                if arr.len() > 2 { indices.push(arr.len() / 2); }
145                if arr.len() > 1 { indices.push(arr.len() - 1); }
146                indices
147            };
148
149            for schema in self.schemas.values() {
150                let all_match = sample_indices.iter().all(|&i| {
151                    if let Some(Value::Object(obj)) = arr.get(i) {
152                        // All required (non-nullable) schema fields must be present
153                        schema.fields.iter().all(|f| {
154                            f.field_type.nullable || obj.contains_key(&f.name)
155                        })
156                        // All obj keys must be schema fields (no extra keys)
157                        && obj.keys().all(|k| schema.fields.iter().any(|f| f.name == *k))
158                    } else {
159                        false
160                    }
161                });
162                if all_match {
163                    return Some(schema);
164                }
165            }
166        }
167        None
168    }
169
170    /// Parse from JSON string.
171    ///
172    /// # Stability Policy
173    ///
174    /// This function follows a **"plain JSON only"** policy:
175    /// - JSON is parsed as-is with **no magic conversion**
176    /// - `{"$ref": "x"}` stays as an Object, NOT a Ref
177    /// - `{"$tag": "ok", "$value": 200}` stays as an Object, NOT a Tagged
178    /// - `"0xcafef00d"` stays as a String, NOT Bytes
179    /// - `"2024-01-15T10:30:00Z"` stays as a String, NOT a Timestamp
180    /// - `[[1, "one"], [2, "two"]]` stays as an Array, NOT a Map
181    ///
182    /// To create special TeaLeaf types, use the text format or binary API directly.
183    ///
184    /// # Number Type Inference
185    ///
186    /// - Integers that fit `i64` → `Value::Int`
187    /// - Large positive integers that fit `u64` → `Value::UInt`
188    /// - Numbers with decimals or scientific notation → `Value::Float`
189    pub fn from_json(json: &str) -> Result<Self> {
190        let json_value: serde_json::Value = serde_json::from_str(json)
191            .map_err(|e| Error::ParseError(format!("Invalid JSON: {}", e)))?;
192
193        let (data, is_root_array) = match json_value {
194            serde_json::Value::Object(obj) => {
195                let map = obj.into_iter()
196                    .map(|(k, v)| (k, json_to_tealeaf_value(v)))
197                    .collect();
198                (map, false)
199            }
200            serde_json::Value::Array(_) => {
201                // Root-level array: store under "root" key but track for round-trip
202                let mut map = IndexMap::new();
203                map.insert("root".to_string(), json_to_tealeaf_value(json_value));
204                (map, true)
205            }
206            _ => {
207                // Other primitives (string, number, bool, null) at root
208                let mut map = IndexMap::new();
209                map.insert("root".to_string(), json_to_tealeaf_value(json_value));
210                (map, false)
211            }
212        };
213
214        Ok(Self {
215            schemas: IndexMap::new(),
216            unions: IndexMap::new(),
217            data,
218            is_root_array,
219        })
220    }
221
222    /// Parse from JSON string with automatic schema inference.
223    ///
224    /// This variant analyzes the JSON structure and automatically:
225    /// - Detects arrays of uniformly-structured objects
226    /// - Infers schema names from parent keys (e.g., "products" → "product")
227    /// - Generates `@struct` definitions for uniform arrays
228    /// - Enables `@table` format output when serialized
229    ///
230    /// Use `to_tl_with_schemas()` to serialize with the inferred schemas.
231    pub fn from_json_with_schemas(json: &str) -> Result<Self> {
232        let doc = Self::from_json(json)?;
233
234        let mut inferrer = SchemaInferrer::new();
235        inferrer.infer(&doc.data);
236        let (schemas, _) = inferrer.into_schemas();
237
238        Ok(Self {
239            schemas,
240            unions: IndexMap::new(),
241            data: doc.data,
242            is_root_array: doc.is_root_array,
243        })
244    }
245
246    /// Serialize to TeaLeaf text format with schemas.
247    ///
248    /// If schemas are present (either from parsing or inference), outputs
249    /// `@struct` definitions and uses `@table` format for matching arrays.
250    ///
251    /// If this document represents a root-level JSON array (from `from_json`),
252    /// the output will include `@root-array` directive for round-trip fidelity.
253    pub fn to_tl_with_schemas(&self) -> String {
254        let mut output = String::new();
255
256        // Emit @root-array directive if this represents a root-level array
257        if self.is_root_array {
258            output.push_str("@root-array\n\n");
259        }
260
261        if self.schemas.is_empty() && self.unions.is_empty() {
262            output.push_str(&dumps(&self.data));
263        } else {
264            // Preserve insertion order from schemas/unions
265            let schema_order: Vec<String> = self.schemas.keys().cloned().collect();
266            let union_order: Vec<String> = self.unions.keys().cloned().collect();
267            output.push_str(&dumps_with_schemas(
268                &self.data, &self.schemas, &schema_order,
269                &self.unions, &union_order,
270            ));
271        }
272
273        output
274    }
275
276    /// Convert to JSON string (pretty-printed).
277    ///
278    /// # Stability Policy - TeaLeaf→JSON Fixed Representations
279    ///
280    /// Special TeaLeaf types serialize to JSON with these **stable formats**:
281    ///
282    /// | TeaLeaf Type | JSON Format                                    |
283    /// |------------|------------------------------------------------|
284    /// | Bytes      | `"0xcafef00d"` (lowercase hex with 0x prefix) |
285    /// | Timestamp  | `"2024-01-15T10:30:00.123Z"` (ISO 8601 UTC)   |
286    /// | Ref        | `{"$ref": "key_name"}`                         |
287    /// | Tagged     | `{"$tag": "tag_name", "$value": <value>}`     |
288    /// | Map        | `[[key1, val1], [key2, val2], ...]`           |
289    /// | Float NaN  | `null` (JSON has no NaN)                       |
290    /// | Float ±Inf | `null` (JSON has no Infinity)                  |
291    ///
292    /// These representations are **contractually stable** and will not change.
293    pub fn to_json(&self) -> Result<String> {
294        // If the source was a root-level array, return it directly (not wrapped in object)
295        if self.is_root_array {
296            if let Some(root_value) = self.data.get("root") {
297                return serde_json::to_string_pretty(&tealeaf_to_json_value(root_value))
298                    .map_err(|e| Error::ParseError(format!("JSON serialization failed: {}", e)));
299            }
300        }
301
302        let json_obj: serde_json::Map<String, serde_json::Value> = self.data
303            .iter()
304            .map(|(k, v)| (k.clone(), tealeaf_to_json_value(v)))
305            .collect();
306
307        serde_json::to_string_pretty(&serde_json::Value::Object(json_obj))
308            .map_err(|e| Error::ParseError(format!("JSON serialization failed: {}", e)))
309    }
310
311    /// Convert to compact JSON string (no pretty printing)
312    pub fn to_json_compact(&self) -> Result<String> {
313        // If the source was a root-level array, return it directly (not wrapped in object)
314        if self.is_root_array {
315            if let Some(root_value) = self.data.get("root") {
316                return serde_json::to_string(&tealeaf_to_json_value(root_value))
317                    .map_err(|e| Error::ParseError(format!("JSON serialization failed: {}", e)));
318            }
319        }
320
321        let json_obj: serde_json::Map<String, serde_json::Value> = self.data
322            .iter()
323            .map(|(k, v)| (k.clone(), tealeaf_to_json_value(v)))
324            .collect();
325
326        serde_json::to_string(&serde_json::Value::Object(json_obj))
327            .map_err(|e| Error::ParseError(format!("JSON serialization failed: {}", e)))
328    }
329
330    /// Set whether the document represents a root-level array.
331    pub fn set_root_array(&mut self, is_root_array: bool) {
332        self.is_root_array = is_root_array;
333    }
334
335    /// Create a TeaLeaf document from a binary Reader.
336    ///
337    /// Reads all sections from the reader and carries schemas and unions through.
338    pub fn from_reader(reader: &Reader) -> Result<Self> {
339        let mut data = IndexMap::new();
340        for key in reader.keys() {
341            data.insert(key.to_string(), reader.get(key)?);
342        }
343        let schemas: IndexMap<String, Schema> = reader.schemas.iter()
344            .map(|s| (s.name.clone(), s.clone()))
345            .collect();
346        let unions: IndexMap<String, Union> = reader.unions.iter()
347            .map(|u| (u.name.clone(), u.clone()))
348            .collect();
349        let mut doc = Self {
350            schemas,
351            unions,
352            data,
353            is_root_array: reader.is_root_array(),
354        };
355        doc.set_root_array(reader.is_root_array());
356        Ok(doc)
357    }
358
359    /// Create a TeaLeaf document from a single DTO.
360    ///
361    /// The DTO is placed under the given `key` in the document data map.
362    /// Schemas are automatically collected from the DTO type.
363    pub fn from_dto<T: convert::ToTeaLeaf>(key: &str, dto: &T) -> Self {
364        let schemas = T::collect_schemas();
365        let unions = T::collect_unions();
366        let mut data = IndexMap::new();
367        data.insert(key.to_string(), dto.to_tealeaf_value());
368        let mut doc = Self::new(schemas, data);
369        doc.unions = unions;
370        doc
371    }
372
373    /// Create a TeaLeaf document from a slice of DTOs.
374    ///
375    /// The array is placed under the given `key` and schemas are
376    /// collected from the element type.
377    pub fn from_dto_array<T: convert::ToTeaLeaf>(key: &str, items: &[T]) -> Self {
378        let schemas = T::collect_schemas();
379        let unions = T::collect_unions();
380        let mut data = IndexMap::new();
381        let arr = Value::Array(items.iter().map(|i| i.to_tealeaf_value()).collect());
382        data.insert(key.to_string(), arr);
383        let mut doc = Self::new(schemas, data);
384        doc.unions = unions;
385        doc
386    }
387
388    /// Extract a DTO from this document by key.
389    pub fn to_dto<T: convert::FromTeaLeaf>(&self, key: &str) -> Result<T> {
390        let value = self
391            .get(key)
392            .ok_or_else(|| Error::MissingField(key.to_string()))?;
393        T::from_tealeaf_value(value).map_err(|e| e.into())
394    }
395
396    /// Extract all values under a key as `Vec<T>`.
397    pub fn to_dto_vec<T: convert::FromTeaLeaf>(&self, key: &str) -> Result<Vec<T>> {
398        let value = self
399            .get(key)
400            .ok_or_else(|| Error::MissingField(key.to_string()))?;
401        let arr = value
402            .as_array()
403            .ok_or_else(|| Error::ParseError("Expected array".into()))?;
404        arr.iter()
405            .map(|v| T::from_tealeaf_value(v).map_err(|e| e.into()))
406            .collect()
407    }
408}
409
410/// Convert JSON value to TeaLeaf value (best-effort)
411fn json_to_tealeaf_value(json: serde_json::Value) -> Value {
412    match json {
413        serde_json::Value::Null => Value::Null,
414        serde_json::Value::Bool(b) => Value::Bool(b),
415        serde_json::Value::Number(n) => {
416            if let Some(i) = n.as_i64() {
417                Value::Int(i)
418            } else if let Some(u) = n.as_u64() {
419                Value::UInt(u)
420            } else {
421                let raw = n.to_string();
422                // Pure integer that doesn't fit i64/u64 → preserve exactly
423                if !raw.contains('.') && !raw.contains('e') && !raw.contains('E') {
424                    Value::JsonNumber(raw)
425                } else {
426                    match n.as_f64() {
427                        Some(f) if f.is_finite() => Value::Float(f),
428                        _ => Value::JsonNumber(raw),
429                    }
430                }
431            }
432        }
433        serde_json::Value::String(s) => Value::String(s),
434        serde_json::Value::Array(arr) => {
435            Value::Array(arr.into_iter().map(json_to_tealeaf_value).collect())
436        }
437        serde_json::Value::Object(obj) => {
438            Value::Object(
439                obj.into_iter()
440                    .map(|(k, v)| (k, json_to_tealeaf_value(v)))
441                    .collect()
442            )
443        }
444    }
445}
446
447/// Convert TeaLeaf value to JSON value
448///
449/// Type preservation:
450/// - Value::Int → JSON integer (e.g., 42)
451/// - Value::UInt → JSON integer (e.g., 18446744073709551615)
452/// - Value::Float → JSON float (e.g., 42.0)
453///
454/// Integer types are tried first during JSON import (i64, then u64) so that
455/// values within 64-bit range stay exact. Only true floats fall through to f64.
456fn tealeaf_to_json_value(tl: &Value) -> serde_json::Value {
457    match tl {
458        Value::Null => serde_json::Value::Null,
459        Value::Bool(b) => serde_json::Value::Bool(*b),
460        Value::Int(i) => serde_json::Value::Number((*i).into()),
461        Value::UInt(u) => serde_json::Value::Number((*u).into()),
462        Value::Float(f) => {
463            // Always output floats as floats - the type distinction is intentional
464            serde_json::Number::from_f64(*f)
465                .map(serde_json::Value::Number)
466                .unwrap_or(serde_json::Value::Null)
467        }
468        Value::String(s) => serde_json::Value::String(s.clone()),
469        Value::Bytes(b) => {
470            // Encode bytes as hex string with 0x prefix
471            let hex: String = b.iter().map(|byte| format!("{:02x}", byte)).collect();
472            serde_json::Value::String(format!("0x{}", hex))
473        }
474        Value::Array(arr) => {
475            serde_json::Value::Array(arr.iter().map(tealeaf_to_json_value).collect())
476        }
477        Value::Object(obj) => {
478            let map: serde_json::Map<String, serde_json::Value> = obj
479                .iter()
480                .map(|(k, v)| (k.clone(), tealeaf_to_json_value(v)))
481                .collect();
482            serde_json::Value::Object(map)
483        }
484        Value::Map(pairs) => {
485            // Convert map to array of [key, value] pairs
486            let arr: Vec<serde_json::Value> = pairs
487                .iter()
488                .map(|(k, v)| {
489                    serde_json::Value::Array(vec![
490                        tealeaf_to_json_value(k),
491                        tealeaf_to_json_value(v),
492                    ])
493                })
494                .collect();
495            serde_json::Value::Array(arr)
496        }
497        Value::Ref(r) => {
498            // Encode ref as object with special key
499            let mut obj = serde_json::Map::new();
500            obj.insert("$ref".to_string(), serde_json::Value::String(r.clone()));
501            serde_json::Value::Object(obj)
502        }
503        Value::Tagged(tag, inner) => {
504            // Encode tagged value as object
505            let mut obj = serde_json::Map::new();
506            obj.insert("$tag".to_string(), serde_json::Value::String(tag.clone()));
507            obj.insert("$value".to_string(), tealeaf_to_json_value(inner));
508            serde_json::Value::Object(obj)
509        }
510        Value::Timestamp(ts, tz) => {
511            serde_json::Value::String(format_timestamp_millis(*ts, *tz))
512        }
513        Value::JsonNumber(s) => {
514            s.parse::<serde_json::Number>()
515                .map(serde_json::Value::Number)
516                .unwrap_or_else(|_| serde_json::Value::String(s.clone()))
517        }
518    }
519}
520
521/// Read a binary TeaLeaf file
522pub fn open<P: AsRef<Path>>(path: P) -> Result<Reader> {
523    Reader::open(path)
524}
525
526/// Parse TeaLeaf text
527pub fn parse(input: &str) -> Result<TeaLeaf> {
528    TeaLeaf::parse(input)
529}
530
531/// Convenience: load and get data
532pub fn loads(input: &str) -> Result<IndexMap<String, Value>> {
533    Ok(TeaLeaf::parse(input)?.data)
534}
535
536/// Convenience: serialize to TeaLeaf text
537/// Check if a string needs quoting when serialized to TeaLeaf format.
538/// Returns true if the string could be misinterpreted as another type.
539fn needs_quoting(s: &str) -> bool {
540    if s.is_empty() {
541        return true;
542    }
543
544    // Reserved words, null literal, and float literals the lexer would interpret
545    if matches!(s, "true" | "false" | "null" | "~" | "NaN" | "inf" | "Infinity") {
546        return true;
547    }
548
549    // Whitelist approach: only allow [a-zA-Z0-9_-.] unquoted (ASCII only).
550    // Matches spec grammar: name = (letter | "_") { letter | digit | "_" | "-" | "." }
551    // Any other character (Unicode digits, whitespace, punctuation, etc.)
552    // requires quoting to ensure safe round-trip through the parser.
553    // Note: '-' is excluded here because strings starting with '-' are caught
554    // by the sign-character check below, and mid-string '-' in identifiers
555    // like "foo-bar" is safe only when the first char is a letter.
556    if s.contains(|c: char| !c.is_ascii_alphanumeric() && c != '_' && c != '-' && c != '.') {
557        return true;
558    }
559
560    // Must start with letter or underscore per grammar: name = (letter | "_") { ... }
561    let first = s.chars().next().unwrap();
562    if !first.is_ascii_alphabetic() && first != '_' {
563        return true;
564    }
565
566    // Starts with 0x/0b (hex/binary literal prefix)
567    if s.starts_with("0x") || s.starts_with("0X") || s.starts_with("0b") || s.starts_with("0B") {
568        return true;
569    }
570
571    // Starts with sign character — always quote to avoid parser ambiguity
572    // (parser may try to interpret as a signed number).
573    if s.starts_with('-') || s.starts_with('+') {
574        return true;
575    }
576
577    // Starts with a digit — could be parsed as a number
578    if first.is_ascii_digit() {
579        return true;
580    }
581
582    false
583}
584
585/// Write a key to the output, quoting if necessary for safe round-trip.
586fn write_key(out: &mut String, key: &str) {
587    if needs_quoting(key) {
588        out.push('"');
589        out.push_str(&escape_string(key));
590        out.push('"');
591    } else {
592        out.push_str(key);
593    }
594}
595
596/// Write a map key per spec grammar: `map_key = string | name | integer`.
597/// Int/UInt are written as-is. String values use `write_key` for quoting.
598/// Other value types (Null, Bool, Float, etc.) are coerced to quoted strings
599/// so that the text format always round-trips through the parser.
600fn write_map_key(out: &mut String, key: &Value) {
601    match key {
602        Value::Int(i) => out.push_str(&i.to_string()),
603        Value::UInt(u) => out.push_str(&u.to_string()),
604        Value::String(s) => write_key(out, s),
605        // Coerce non-spec key types to quoted strings for text format safety
606        Value::Null => out.push_str("\"~\""),
607        Value::Bool(b) => { out.push('"'); out.push_str(if *b { "true" } else { "false" }); out.push('"'); }
608        Value::Float(f) => { out.push('"'); out.push_str(&f.to_string()); out.push('"'); }
609        Value::JsonNumber(s) => { out.push('"'); out.push_str(s); out.push('"'); }
610        Value::Timestamp(ts, tz) => { out.push('"'); out.push_str(&format_timestamp_millis(*ts, *tz)); out.push('"'); }
611        Value::Bytes(b) => {
612            out.push_str("\"0x");
613            for byte in b { out.push_str(&format!("{:02x}", byte)); }
614            out.push('"');
615        }
616        Value::Ref(r) => { out.push('"'); out.push('!'); out.push_str(r); out.push('"'); }
617        Value::Tagged(tag, _) => { out.push('"'); out.push(':'); out.push_str(tag); out.push('"'); }
618        Value::Array(_) | Value::Object(_) | Value::Map(_) => out.push_str("\"\""),
619    }
620}
621
622pub fn dumps(data: &IndexMap<String, Value>) -> String {
623    let mut out = String::new();
624    for (key, value) in data {
625        write_key(&mut out, key);
626        out.push_str(": ");
627        write_value(&mut out, value, 0);
628        out.push('\n');
629    }
630    out
631}
632
633/// Escape a string for TeaLeaf text output.
634/// Handles: \\ \" \n \t \r \b \f and \uXXXX for other control characters.
635fn escape_string(s: &str) -> String {
636    let mut out = String::with_capacity(s.len());
637    for c in s.chars() {
638        match c {
639            '\\' => out.push_str("\\\\"),
640            '"' => out.push_str("\\\""),
641            '\n' => out.push_str("\\n"),
642            '\t' => out.push_str("\\t"),
643            '\r' => out.push_str("\\r"),
644            '\u{0008}' => out.push_str("\\b"),
645            '\u{000C}' => out.push_str("\\f"),
646            c if c.is_control() => {
647                // Other control characters use \uXXXX
648                for unit in c.encode_utf16(&mut [0u16; 2]) {
649                    out.push_str(&format!("\\u{:04x}", unit));
650                }
651            }
652            _ => out.push(c),
653        }
654    }
655    out
656}
657
658/// Format a float ensuring it always has a decimal point or uses scientific notation.
659/// Rust's f64::to_string() expands large/small values (e.g., 6.022e23 becomes
660/// "602200000000000000000000"), which would be reparsed as an integer and overflow.
661/// We use scientific notation for values outside a safe range.
662fn format_float(f: f64) -> String {
663    // Handle non-finite values with keywords the lexer recognizes
664    if f.is_nan() {
665        return "NaN".to_string();
666    }
667    if f.is_infinite() {
668        return if f.is_sign_positive() { "inf".to_string() } else { "-inf".to_string() };
669    }
670
671    let s = f.to_string();
672    if s.contains('.') || s.contains('e') || s.contains('E') {
673        // Already has decimal point or scientific notation — safe as-is
674        s
675    } else {
676        // to_string() produced an integer-looking string (no '.' or 'e').
677        // For large values, use scientific notation to avoid i64 overflow on re-parse.
678        // For small values, just append ".0".
679        let digits = s.trim_start_matches('-').len();
680        if digits > 15 {
681            format!("{:e}", f)
682        } else {
683            format!("{}.0", s)
684        }
685    }
686}
687
688fn write_value(out: &mut String, value: &Value, indent: usize) {
689    match value {
690        Value::Null => out.push('~'),
691        Value::Bool(b) => out.push_str(if *b { "true" } else { "false" }),
692        Value::Int(i) => out.push_str(&i.to_string()),
693        Value::UInt(u) => out.push_str(&u.to_string()),
694        Value::JsonNumber(s) => out.push_str(s),
695        Value::Float(f) => out.push_str(&format_float(*f)),
696        Value::String(s) => {
697            if needs_quoting(s) {
698                out.push('"');
699                out.push_str(&escape_string(s));
700                out.push('"');
701            } else {
702                out.push_str(s);
703            }
704        }
705        Value::Bytes(b) => {
706            out.push_str("b\"");
707            for byte in b {
708                out.push_str(&format!("{:02x}", byte));
709            }
710            out.push('"');
711        }
712        Value::Array(arr) => {
713            out.push('[');
714            for (i, v) in arr.iter().enumerate() {
715                if i > 0 { out.push_str(", "); }
716                write_value(out, v, indent);
717            }
718            out.push(']');
719        }
720        Value::Object(obj) => {
721            out.push('{');
722            for (i, (k, v)) in obj.iter().enumerate() {
723                if i > 0 { out.push_str(", "); }
724                write_key(out, k);
725                out.push_str(": ");
726                write_value(out, v, indent);
727            }
728            out.push('}');
729        }
730        Value::Map(pairs) => {
731            out.push_str("@map {");
732            let mut first = true;
733            for (k, v) in pairs {
734                if !first { out.push_str(", "); }
735                first = false;
736                // Map keys are restricted to string | name | integer per spec.
737                // Write Int/UInt directly; convert other types to quoted strings.
738                write_map_key(out, k);
739                out.push_str(": ");
740                write_value(out, v, indent);
741            }
742            out.push('}');
743        }
744        Value::Ref(r) => {
745            out.push('!');
746            out.push_str(r);
747        }
748        Value::Tagged(tag, inner) => {
749            out.push(':');
750            out.push_str(tag);
751            out.push(' ');
752            write_value(out, inner, indent);
753        }
754        Value::Timestamp(ts, tz) => {
755            out.push_str(&format_timestamp_millis(*ts, *tz));
756        }
757    }
758}
759
760/// Format a Unix-millis timestamp as an ISO 8601 string with timezone offset.
761/// Handles negative timestamps (pre-epoch dates) correctly using Euclidean division.
762/// Years outside [0000, 9999] are clamped to the boundary per spec (4-digit years only).
763/// When tz_offset_minutes is 0, emits 'Z' suffix. Otherwise emits +HH:MM or -HH:MM.
764fn format_timestamp_millis(ts: i64, tz_offset_minutes: i16) -> String {
765    // Clamp to representable ISO 8601 range (years 0000-9999).
766    // Year 0000-01-01T00:00:00Z = -62167219200000 ms
767    // Year 9999-12-31T23:59:59.999Z = 253402300799999 ms
768    const MIN_TS: i64 = -62_167_219_200_000;
769    const MAX_TS: i64 = 253_402_300_799_999;
770    let ts = ts.clamp(MIN_TS, MAX_TS);
771
772    // Apply timezone offset to get local time for display
773    let local_ts = ts + (tz_offset_minutes as i64) * 60_000;
774    let local_ts = local_ts.clamp(MIN_TS, MAX_TS);
775
776    let secs = local_ts.div_euclid(1000);
777    let millis = local_ts.rem_euclid(1000);
778    let days = secs.div_euclid(86400);
779    let time_secs = secs.rem_euclid(86400);
780    let hours = time_secs / 3600;
781    let mins = (time_secs % 3600) / 60;
782    let secs_rem = time_secs % 60;
783    let (year, month, day) = days_to_ymd(days);
784
785    let tz_suffix = if tz_offset_minutes == 0 {
786        "Z".to_string()
787    } else {
788        let sign = if tz_offset_minutes > 0 { '+' } else { '-' };
789        let abs = tz_offset_minutes.unsigned_abs();
790        format!("{}{:02}:{:02}", sign, abs / 60, abs % 60)
791    };
792
793    if millis > 0 {
794        format!("{:04}-{:02}-{:02}T{:02}:{:02}:{:02}.{:03}{}",
795            year, month, day, hours, mins, secs_rem, millis, tz_suffix)
796    } else {
797        format!("{:04}-{:02}-{:02}T{:02}:{:02}:{:02}{}",
798            year, month, day, hours, mins, secs_rem, tz_suffix)
799    }
800}
801
802/// Convert days since Unix epoch to (year, month, day)
803fn days_to_ymd(days: i64) -> (i64, u32, u32) {
804    // Algorithm from Howard Hinnant (extended to i64 for extreme timestamps)
805    let z = days + 719468;
806    let era = if z >= 0 { z } else { z - 146096 } / 146097;
807    let doe = (z - era * 146097) as u32;
808    let yoe = (doe - doe / 1460 + doe / 36524 - doe / 146096) / 365;
809    let y = yoe as i64 + era * 400;
810    let doy = doe - (365 * yoe + yoe / 4 - yoe / 100);
811    let mp = (5 * doy + 2) / 153;
812    let d = doy - (153 * mp + 2) / 5 + 1;
813    let m = if mp < 10 { mp + 3 } else { mp - 9 };
814    let y = if m <= 2 { y + 1 } else { y };
815    (y, m, d)
816}
817
818// =============================================================================
819// Schema Inference
820// =============================================================================
821
822/// Inferred type information for a field
823#[derive(Debug, Clone, PartialEq)]
824enum InferredType {
825    Null,
826    Bool,
827    Int,
828    Float,
829    String,
830    Array(Box<InferredType>),
831    Object(Vec<(String, InferredType)>),  // Ordered fields
832    Mixed,  // Different types seen - fall back to any
833}
834
835impl InferredType {
836    fn merge(&self, other: &InferredType) -> InferredType {
837        if self == other {
838            return self.clone();
839        }
840        match (self, other) {
841            (InferredType::Null, t) | (t, InferredType::Null) => {
842                // Null + T = T (nullable)
843                t.clone()
844            }
845            (InferredType::Int, InferredType::Float) | (InferredType::Float, InferredType::Int) => {
846                InferredType::Float
847            }
848            (InferredType::Array(a), InferredType::Array(b)) => {
849                InferredType::Array(Box::new(a.merge(b)))
850            }
851            (InferredType::Object(a), InferredType::Object(b)) => {
852                // Merge objects: keep fields present in both, track nullability
853                let mut merged = Vec::new();
854                let b_map: IndexMap<&str, &InferredType> = b.iter().map(|(k, v)| (k.as_str(), v)).collect();
855
856                for (key, a_type) in a {
857                    if let Some(b_type) = b_map.get(key.as_str()) {
858                        merged.push((key.clone(), a_type.merge(b_type)));
859                    }
860                    // Fields only in a are dropped (not uniform)
861                }
862
863                // Check if structures are compatible (same fields)
864                if merged.len() == a.len() && merged.len() == b.len() {
865                    InferredType::Object(merged)
866                } else {
867                    InferredType::Mixed
868                }
869            }
870            _ => InferredType::Mixed,
871        }
872    }
873
874    fn to_field_type(&self, schemas: &IndexMap<String, Schema>) -> FieldType {
875        match self {
876            InferredType::Null => FieldType::new("string").nullable(),  // Unknown type, default to string
877            InferredType::Bool => FieldType::new("bool"),
878            InferredType::Int => FieldType::new("int"),
879            InferredType::Float => FieldType::new("float"),
880            InferredType::String => FieldType::new("string"),
881            InferredType::Array(inner) => {
882                let inner_type = inner.to_field_type(schemas);
883                FieldType {
884                    base: inner_type.base,
885                    nullable: inner_type.nullable,
886                    is_array: true,
887                }
888            }
889            InferredType::Object(fields) => {
890                // Check if this matches an existing schema
891                for (name, schema) in schemas {
892                    if schema.fields.len() == fields.len() {
893                        let all_match = schema.fields.iter().all(|sf| {
894                            fields.iter().any(|(k, _)| k == &sf.name)
895                        });
896                        if all_match {
897                            return FieldType::new(name.clone());
898                        }
899                    }
900                }
901                // No matching schema — use "any" (not "object", which is a
902                // value-only type rejected by the parser in schema definitions)
903                FieldType::new("any")
904            }
905            InferredType::Mixed => FieldType::new("any"),
906        }
907    }
908}
909
910fn infer_type(value: &Value) -> InferredType {
911    match value {
912        Value::Null => InferredType::Null,
913        Value::Bool(_) => InferredType::Bool,
914        Value::Int(_) | Value::UInt(_) => InferredType::Int,
915        Value::Float(_) => InferredType::Float,
916        Value::String(_) => InferredType::String,
917        Value::Array(arr) => {
918            if arr.is_empty() {
919                InferredType::Array(Box::new(InferredType::Mixed))
920            } else {
921                let mut element_type = infer_type(&arr[0]);
922                for item in arr.iter().skip(1) {
923                    element_type = element_type.merge(&infer_type(item));
924                }
925                InferredType::Array(Box::new(element_type))
926            }
927        }
928        Value::Object(obj) => {
929            let fields: Vec<(String, InferredType)> = obj
930                .iter()
931                .map(|(k, v)| (k.clone(), infer_type(v)))
932                .collect();
933            InferredType::Object(fields)
934        }
935        _ => InferredType::Mixed,
936    }
937}
938
939/// Singularize a plural name (simple heuristic)
940fn singularize(name: &str) -> String {
941    let name = name.to_lowercase();
942    if name.ends_with("ies") {
943        format!("{}y", &name[..name.len()-3])
944    } else if name.ends_with("es") && (name.ends_with("sses") || name.ends_with("xes") || name.ends_with("ches") || name.ends_with("shes")) {
945        name[..name.len()-2].to_string()
946    } else if name.ends_with('s') && !name.ends_with("ss") {
947        name[..name.len()-1].to_string()
948    } else {
949        name
950    }
951}
952
953/// Check if array elements are objects that match a schema's structure
954fn array_matches_schema(arr: &[Value], schema: &Schema) -> bool {
955    if arr.is_empty() {
956        return false;
957    }
958
959    // Check if first element is an object
960    let first = match &arr[0] {
961        Value::Object(obj) => obj,
962        _ => return false,
963    };
964
965    // Get schema field names
966    let schema_fields: HashSet<_> = schema.fields.iter().map(|f| f.name.as_str()).collect();
967
968    // Get object keys
969    let obj_keys: HashSet<_> = first.keys().map(|k| k.as_str()).collect();
970
971    // Check if there's significant overlap (at least 50% of schema fields present)
972    let overlap = schema_fields.intersection(&obj_keys).count();
973    let required_overlap = schema_fields.len() / 2;
974
975    overlap > required_overlap || overlap == schema_fields.len()
976}
977
978/// Schema inferrer that analyzes data and generates schemas
979pub struct SchemaInferrer {
980    schemas: IndexMap<String, Schema>,
981    schema_order: Vec<String>,  // Track order for output
982}
983
984impl SchemaInferrer {
985    pub fn new() -> Self {
986        Self {
987            schemas: IndexMap::new(),
988            schema_order: Vec::new(),
989        }
990    }
991
992    /// Analyze data and infer schemas from uniform object arrays
993    pub fn infer(&mut self, data: &IndexMap<String, Value>) {
994        for (key, value) in data {
995            self.analyze_value(key, value);
996        }
997    }
998
999    fn analyze_value(&mut self, hint_name: &str, value: &Value) {
1000        if let Value::Array(arr) = value {
1001            self.analyze_array(hint_name, arr);
1002        } else if let Value::Object(obj) = value {
1003            // Recursively analyze nested objects
1004            for (k, v) in obj {
1005                self.analyze_value(k, v);
1006            }
1007        }
1008    }
1009
1010    fn analyze_array(&mut self, hint_name: &str, arr: &[Value]) {
1011        if arr.is_empty() {
1012            return;
1013        }
1014
1015        // Check if all elements are objects with the same structure
1016        let first = match &arr[0] {
1017            Value::Object(obj) => obj,
1018            _ => return,
1019        };
1020
1021        // Collect field names from first object (preserving insertion order)
1022        let field_names: Vec<String> = first.keys().cloned().collect();
1023
1024        // Skip schema inference if fields are empty, or any field name is empty
1025        // or needs quoting — such names can't round-trip through @struct definitions.
1026        if field_names.is_empty()
1027            || field_names.iter().any(|n| n.is_empty() || needs_quoting(n))
1028        {
1029            return;
1030        }
1031
1032        let field_set: std::collections::BTreeSet<&str> = first.keys().map(|k| k.as_str()).collect();
1033
1034        // Verify all objects have the same fields
1035        for item in arr.iter().skip(1) {
1036            if let Value::Object(obj) = item {
1037                let item_set: std::collections::BTreeSet<&str> = obj.keys().map(|k| k.as_str()).collect();
1038                if item_set != field_set {
1039                    return;  // Not uniform
1040                }
1041            } else {
1042                return;  // Not all objects
1043            }
1044        }
1045
1046        // Infer types for each field across all objects
1047        let mut field_types: IndexMap<String, InferredType> = IndexMap::new();
1048        let mut has_null: IndexMap<String, bool> = IndexMap::new();
1049
1050        for item in arr {
1051            if let Value::Object(obj) = item {
1052                for (key, val) in obj {
1053                    let inferred = infer_type(val);
1054                    let is_null = matches!(val, Value::Null);
1055
1056                    *has_null.entry(key.clone()).or_insert(false) |= is_null;
1057
1058                    field_types
1059                        .entry(key.clone())
1060                        .and_modify(|existing| *existing = existing.merge(&inferred))
1061                        .or_insert(inferred);
1062                }
1063            }
1064        }
1065
1066        // Generate schema name from hint
1067        let schema_name = singularize(hint_name);
1068
1069        // Skip if schema already exists
1070        if self.schemas.contains_key(&schema_name) {
1071            return;
1072        }
1073
1074        // First, recursively analyze nested arrays and objects to create their schemas
1075        for item in arr {
1076            if let Value::Object(obj) = item {
1077                for (field_name, field_val) in obj {
1078                    if let Value::Array(nested) = field_val {
1079                        self.analyze_array(field_name, nested);
1080                    }
1081                }
1082                break;  // Only need to process first object for nested arrays
1083            }
1084        }
1085
1086        // Analyze nested object fields - collect all non-null objects for each field
1087        // and create schemas if they're uniform across all array items
1088        for field_name in &field_names {
1089            let nested_objects: Vec<&IndexMap<String, Value>> = arr
1090                .iter()
1091                .filter_map(|item| {
1092                    if let Value::Object(obj) = item {
1093                        if let Some(Value::Object(nested)) = obj.get(field_name) {
1094                            return Some(nested);
1095                        }
1096                    }
1097                    None
1098                })
1099                .collect();
1100
1101            // If we found at least one object, check if they're uniform
1102            if !nested_objects.is_empty() {
1103                self.analyze_nested_objects(field_name, &nested_objects);
1104            }
1105        }
1106
1107        // Build schema
1108        let mut schema = Schema::new(&schema_name);
1109
1110        // Use insertion order from first object
1111        for field_name in &field_names {
1112            if let Some(inferred) = field_types.get(field_name) {
1113                let mut field_type = inferred.to_field_type(&self.schemas);
1114
1115                // Mark as nullable if any null values seen
1116                if has_null.get(field_name).copied().unwrap_or(false) {
1117                    field_type.nullable = true;
1118                }
1119
1120                // Check if there's a nested schema for array fields
1121                if let Value::Object(first_obj) = &arr[0] {
1122                    if let Some(Value::Array(nested_arr)) = first_obj.get(field_name) {
1123                        let nested_schema_name = singularize(field_name);
1124                        if let Some(nested_schema) = self.schemas.get(&nested_schema_name) {
1125                            // Verify array elements are objects matching the schema structure
1126                            if array_matches_schema(nested_arr, nested_schema) {
1127                                field_type = FieldType {
1128                                    base: nested_schema_name,
1129                                    nullable: field_type.nullable,
1130                                    is_array: true,
1131                                };
1132                            }
1133                        }
1134                    }
1135                }
1136
1137                // Check if there's a nested schema for object fields
1138                let nested_schema_name = singularize(field_name);
1139                if self.schemas.contains_key(&nested_schema_name) {
1140                    if matches!(inferred, InferredType::Object(_)) {
1141                        field_type = FieldType {
1142                            base: nested_schema_name,
1143                            nullable: field_type.nullable,
1144                            is_array: false,
1145                        };
1146                    }
1147                }
1148
1149                schema.add_field(field_name, field_type);
1150            }
1151        }
1152
1153        self.schema_order.push(schema_name.clone());
1154        self.schemas.insert(schema_name, schema);
1155    }
1156
1157    /// Analyze a collection of nested objects (from the same field across array items)
1158    /// and create a schema if they have uniform structure
1159    fn analyze_nested_objects(&mut self, field_name: &str, objects: &[&IndexMap<String, Value>]) {
1160        if objects.is_empty() {
1161            return;
1162        }
1163
1164        // Get field names from first object (preserving insertion order)
1165        let first = objects[0];
1166        let nested_field_names: Vec<String> = first.keys().cloned().collect();
1167
1168        // Skip empty objects and objects with field names that can't round-trip
1169        if nested_field_names.is_empty()
1170            || nested_field_names.iter().any(|n| n.is_empty() || needs_quoting(n))
1171        {
1172            return;
1173        }
1174
1175        let field_set: std::collections::BTreeSet<&str> = first.keys().map(|k| k.as_str()).collect();
1176
1177        // Check if all objects have the same fields
1178        for obj in objects.iter().skip(1) {
1179            let obj_set: std::collections::BTreeSet<&str> = obj.keys().map(|k| k.as_str()).collect();
1180            if obj_set != field_set {
1181                return; // Not uniform
1182            }
1183        }
1184
1185        // They're uniform - create a schema
1186        let schema_name = singularize(field_name);
1187
1188        // Skip if schema already exists
1189        if self.schemas.contains_key(&schema_name) {
1190            return;
1191        }
1192
1193        // Infer field types across all objects
1194        let mut field_types: IndexMap<String, InferredType> = IndexMap::new();
1195        let mut has_null: IndexMap<String, bool> = IndexMap::new();
1196
1197        for obj in objects {
1198            for (key, val) in *obj {
1199                let inferred = infer_type(val);
1200                let is_null = matches!(val, Value::Null);
1201
1202                *has_null.entry(key.clone()).or_insert(false) |= is_null;
1203
1204                field_types
1205                    .entry(key.clone())
1206                    .and_modify(|existing| *existing = existing.merge(&inferred))
1207                    .or_insert(inferred);
1208            }
1209        }
1210
1211        // Recursively analyze nested objects within these objects
1212        for nested_field in &nested_field_names {
1213            let deeper_objects: Vec<&IndexMap<String, Value>> = objects
1214                .iter()
1215                .filter_map(|obj| {
1216                    if let Some(Value::Object(nested)) = obj.get(nested_field) {
1217                        Some(nested)
1218                    } else {
1219                        None
1220                    }
1221                })
1222                .collect();
1223
1224            if !deeper_objects.is_empty() {
1225                self.analyze_nested_objects(nested_field, &deeper_objects);
1226            }
1227        }
1228
1229        // Build schema
1230        let mut schema = Schema::new(&schema_name);
1231
1232        for nested_field in &nested_field_names {
1233            if let Some(inferred) = field_types.get(nested_field) {
1234                let mut field_type = inferred.to_field_type(&self.schemas);
1235
1236                if has_null.get(nested_field).copied().unwrap_or(false) {
1237                    field_type.nullable = true;
1238                }
1239
1240                // Check if this field has a nested schema
1241                if let Some(nested_schema) = self.schemas.get(&singularize(nested_field)) {
1242                    if matches!(inferred, InferredType::Object(_)) {
1243                        field_type = FieldType::new(nested_schema.name.clone());
1244                    }
1245                }
1246
1247                schema.add_field(nested_field, field_type);
1248            }
1249        }
1250
1251        self.schema_order.push(schema_name.clone());
1252        self.schemas.insert(schema_name, schema);
1253    }
1254
1255    pub fn into_schemas(self) -> (IndexMap<String, Schema>, Vec<String>) {
1256        (self.schemas, self.schema_order)
1257    }
1258}
1259
1260impl Default for SchemaInferrer {
1261    fn default() -> Self {
1262        Self::new()
1263    }
1264}
1265
1266/// Serialize data to TeaLeaf text format with schemas
1267pub fn dumps_with_schemas(
1268    data: &IndexMap<String, Value>,
1269    schemas: &IndexMap<String, Schema>,
1270    schema_order: &[String],
1271    unions: &IndexMap<String, Union>,
1272    union_order: &[String],
1273) -> String {
1274    let mut out = String::new();
1275    let mut has_definitions = false;
1276
1277    // Write union definitions first (before structs, since structs may reference unions)
1278    for name in union_order {
1279        if let Some(union) = unions.get(name) {
1280            out.push_str("@union ");
1281            out.push_str(&union.name);
1282            out.push_str(" {\n");
1283            for (vi, variant) in union.variants.iter().enumerate() {
1284                out.push_str("  ");
1285                out.push_str(&variant.name);
1286                out.push_str(" (");
1287                for (fi, field) in variant.fields.iter().enumerate() {
1288                    if fi > 0 {
1289                        out.push_str(", ");
1290                    }
1291                    out.push_str(&field.name);
1292                    out.push_str(": ");
1293                    out.push_str(&field.field_type.to_string());
1294                }
1295                out.push(')');
1296                if vi < union.variants.len() - 1 {
1297                    out.push(',');
1298                }
1299                out.push('\n');
1300            }
1301            out.push_str("}\n");
1302            has_definitions = true;
1303        }
1304    }
1305
1306    // Write struct schemas in order
1307    for name in schema_order {
1308        if let Some(schema) = schemas.get(name) {
1309            out.push_str("@struct ");
1310            out.push_str(&schema.name);
1311            out.push_str(" (");
1312            for (i, field) in schema.fields.iter().enumerate() {
1313                if i > 0 {
1314                    out.push_str(", ");
1315                }
1316                write_key(&mut out, &field.name);
1317                out.push_str(": ");
1318                out.push_str(&field.field_type.to_string());
1319            }
1320            out.push_str(")\n");
1321            has_definitions = true;
1322        }
1323    }
1324
1325    if has_definitions {
1326        out.push('\n');
1327    }
1328
1329    // Write data (preserves insertion order)
1330    for (key, value) in data {
1331        write_key(&mut out, key);
1332        out.push_str(": ");
1333        write_value_with_schemas(&mut out, value, schemas, Some(key), 0);
1334        out.push('\n');
1335    }
1336
1337    out
1338}
1339
1340fn write_value_with_schemas(
1341    out: &mut String,
1342    value: &Value,
1343    schemas: &IndexMap<String, Schema>,
1344    hint_name: Option<&str>,
1345    indent: usize,
1346) {
1347    match value {
1348        Value::Null => out.push('~'),
1349        Value::Bool(b) => out.push_str(if *b { "true" } else { "false" }),
1350        Value::Int(i) => out.push_str(&i.to_string()),
1351        Value::UInt(u) => out.push_str(&u.to_string()),
1352        Value::JsonNumber(s) => out.push_str(s),
1353        Value::Float(f) => out.push_str(&format_float(*f)),
1354        Value::String(s) => {
1355            if needs_quoting(s) {
1356                out.push('"');
1357                out.push_str(&escape_string(s));
1358                out.push('"');
1359            } else {
1360                out.push_str(s);
1361            }
1362        }
1363        Value::Bytes(b) => {
1364            out.push_str("b\"");
1365            for byte in b {
1366                out.push_str(&format!("{:02x}", byte));
1367            }
1368            out.push('"');
1369        }
1370        Value::Array(arr) => {
1371            // Check if this array can use @table format
1372            let schema_name = hint_name.map(singularize);
1373            let schema = schema_name.as_ref().and_then(|n| schemas.get(n));
1374
1375            if let Some(schema) = schema {
1376                // Check if first element is an object matching the schema
1377                if let Some(Value::Object(_)) = arr.first() {
1378                    out.push_str("@table ");
1379                    out.push_str(&schema.name);
1380                    out.push_str(" [\n");
1381
1382                    let inner_indent = indent + 2;
1383                    for (i, item) in arr.iter().enumerate() {
1384                        for _ in 0..inner_indent {
1385                            out.push(' ');
1386                        }
1387                        write_tuple(out, item, schema, schemas, inner_indent);
1388                        if i < arr.len() - 1 {
1389                            out.push(',');
1390                        }
1391                        out.push('\n');
1392                    }
1393
1394                    for _ in 0..indent {
1395                        out.push(' ');
1396                    }
1397                    out.push(']');
1398                    return;
1399                }
1400            }
1401
1402            // Fall back to regular array format
1403            out.push('[');
1404            for (i, v) in arr.iter().enumerate() {
1405                if i > 0 {
1406                    out.push_str(", ");
1407                }
1408                write_value_with_schemas(out, v, schemas, None, indent);
1409            }
1410            out.push(']');
1411        }
1412        Value::Object(obj) => {
1413            out.push('{');
1414            for (i, (k, v)) in obj.iter().enumerate() {
1415                if i > 0 {
1416                    out.push_str(", ");
1417                }
1418                write_key(out, k);
1419                out.push_str(": ");
1420                write_value_with_schemas(out, v, schemas, Some(k), indent);
1421            }
1422            out.push('}');
1423        }
1424        Value::Map(pairs) => {
1425            out.push_str("@map {");
1426            let mut first = true;
1427            for (k, v) in pairs {
1428                if !first {
1429                    out.push_str(", ");
1430                }
1431                first = false;
1432                write_map_key(out, k);
1433                out.push_str(": ");
1434                write_value_with_schemas(out, v, schemas, None, indent);
1435            }
1436            out.push('}');
1437        }
1438        Value::Ref(r) => {
1439            out.push('!');
1440            out.push_str(r);
1441        }
1442        Value::Tagged(tag, inner) => {
1443            out.push(':');
1444            out.push_str(tag);
1445            out.push(' ');
1446            write_value_with_schemas(out, inner, schemas, None, indent);
1447        }
1448        Value::Timestamp(ts, tz) => {
1449            out.push_str(&format_timestamp_millis(*ts, *tz));
1450        }
1451    }
1452}
1453
1454fn write_tuple(
1455    out: &mut String,
1456    value: &Value,
1457    schema: &Schema,
1458    schemas: &IndexMap<String, Schema>,
1459    indent: usize,
1460) {
1461    if let Value::Object(obj) = value {
1462        out.push('(');
1463        for (i, field) in schema.fields.iter().enumerate() {
1464            if i > 0 {
1465                out.push_str(", ");
1466            }
1467            if let Some(v) = obj.get(&field.name) {
1468                // For array fields with a known schema type, write tuples directly without @table
1469                if field.field_type.is_array {
1470                    if let Some(item_schema) = schemas.get(&field.field_type.base) {
1471                        // The schema defines the element type - write array with tuples directly
1472                        write_schema_array(out, v, item_schema, schemas, indent);
1473                    } else {
1474                        // No schema for element type - use regular array format
1475                        write_value_with_schemas(out, v, schemas, None, indent);
1476                    }
1477                } else if schemas.contains_key(&field.field_type.base) {
1478                    // Non-array field with schema type - write as nested tuple
1479                    if let Some(nested_schema) = schemas.get(&field.field_type.base) {
1480                        write_tuple(out, v, nested_schema, schemas, indent);
1481                    } else {
1482                        write_value_with_schemas(out, v, schemas, None, indent);
1483                    }
1484                } else {
1485                    write_value_with_schemas(out, v, schemas, None, indent);
1486                }
1487            } else {
1488                out.push('~');
1489            }
1490        }
1491        out.push(')');
1492    } else {
1493        write_value_with_schemas(out, value, schemas, None, indent);
1494    }
1495}
1496
1497/// Write an array of schema-typed values as tuples (without @table annotation)
1498fn write_schema_array(
1499    out: &mut String,
1500    value: &Value,
1501    schema: &Schema,
1502    schemas: &IndexMap<String, Schema>,
1503    indent: usize,
1504) {
1505    if let Value::Array(arr) = value {
1506        if arr.is_empty() {
1507            out.push_str("[]");
1508            return;
1509        }
1510
1511        out.push_str("[\n");
1512        let inner_indent = indent + 2;
1513        for (i, item) in arr.iter().enumerate() {
1514            for _ in 0..inner_indent {
1515                out.push(' ');
1516            }
1517            write_tuple(out, item, schema, schemas, inner_indent);
1518            if i < arr.len() - 1 {
1519                out.push(',');
1520            }
1521            out.push('\n');
1522        }
1523        for _ in 0..indent {
1524            out.push(' ');
1525        }
1526        out.push(']');
1527    } else {
1528        // Not an array - fall back to regular value writing
1529        write_value_with_schemas(out, value, schemas, None, indent);
1530    }
1531}
1532
1533#[cfg(test)]
1534mod tests {
1535    use super::*;
1536
1537    #[test]
1538    fn test_serde_json_number_behavior() {
1539        // Test how serde_json handles different number formats
1540        let json_str = r#"{"int": 42, "float_whole": 42.0, "float_frac": 42.5}"#;
1541        let parsed: serde_json::Value = serde_json::from_str(json_str).unwrap();
1542
1543        if let serde_json::Value::Object(obj) = parsed {
1544            let int_num = obj.get("int").unwrap().as_number().unwrap();
1545            let float_whole = obj.get("float_whole").unwrap().as_number().unwrap();
1546            let float_frac = obj.get("float_frac").unwrap().as_number().unwrap();
1547
1548            println!("int (42): is_i64={}, is_u64={}, is_f64={}",
1549                int_num.is_i64(), int_num.is_u64(), int_num.is_f64());
1550            println!("float_whole (42.0): is_i64={}, is_u64={}, is_f64={}",
1551                float_whole.is_i64(), float_whole.is_u64(), float_whole.is_f64());
1552            println!("float_frac (42.5): is_i64={}, is_u64={}, is_f64={}",
1553                float_frac.is_i64(), float_frac.is_u64(), float_frac.is_f64());
1554
1555            // Assert expected behavior
1556            assert!(int_num.is_i64(), "42 should be parsed as i64");
1557            assert!(float_whole.is_f64(), "42.0 should be parsed as f64");
1558            assert!(float_frac.is_f64(), "42.5 should be parsed as f64");
1559        }
1560
1561        // Test how Rust formats floats
1562        println!("Rust float formatting:");
1563        println!("  42.0f64.to_string() = '{}'", 42.0f64.to_string());
1564        println!("  42.5f64.to_string() = '{}'", 42.5f64.to_string());
1565
1566        // This is the problem! Rust's to_string() drops the .0
1567        // We need to ensure floats always have a decimal point
1568    }
1569
1570    #[test]
1571    fn test_parse_simple() {
1572        let doc = TeaLeaf::parse(r#"
1573            name: alice
1574            age: 30
1575            active: true
1576        "#).unwrap();
1577        
1578        assert_eq!(doc.get("name").unwrap().as_str(), Some("alice"));
1579        assert_eq!(doc.get("age").unwrap().as_int(), Some(30));
1580        assert_eq!(doc.get("active").unwrap().as_bool(), Some(true));
1581    }
1582
1583    #[test]
1584    fn test_parse_struct() {
1585        let doc = TeaLeaf::parse(r#"
1586            @struct user (id: int, name: string, email: string?)
1587            users: @table user [
1588                (1, alice, "alice@test.com"),
1589                (2, bob, ~),
1590            ]
1591        "#).unwrap();
1592        
1593        let schema = doc.schema("user").unwrap();
1594        assert_eq!(schema.fields.len(), 3);
1595        assert!(schema.fields[2].field_type.nullable);
1596        
1597        let users = doc.get("users").unwrap().as_array().unwrap();
1598        assert_eq!(users.len(), 2);
1599    }
1600
1601    #[test]
1602    fn test_nested_struct() {
1603        let doc = TeaLeaf::parse(r#"
1604            @struct address (city: string, zip: string)
1605            @struct user (id: int, name: string, home: address)
1606            users: @table user [
1607                (1, alice, (Berlin, "10115")),
1608                (2, bob, (Paris, "75001")),
1609            ]
1610        "#).unwrap();
1611        
1612        let users = doc.get("users").unwrap().as_array().unwrap();
1613        let alice = users[0].as_object().unwrap();
1614        let home = alice.get("home").unwrap().as_object().unwrap();
1615        assert_eq!(home.get("city").unwrap().as_str(), Some("Berlin"));
1616    }
1617
1618    #[test]
1619    fn test_three_level_nesting() {
1620        let doc = TeaLeaf::parse(r#"
1621            @struct method (type: string, last4: string)
1622            @struct payment (amount: float, method: method)
1623            @struct order (id: int, payment: payment)
1624            orders: @table order [
1625                (1, (99.99, (credit, "4242"))),
1626            ]
1627        "#).unwrap();
1628        
1629        let orders = doc.get("orders").unwrap().as_array().unwrap();
1630        let order = orders[0].as_object().unwrap();
1631        let payment = order.get("payment").unwrap().as_object().unwrap();
1632        let method = payment.get("method").unwrap().as_object().unwrap();
1633        assert_eq!(method.get("type").unwrap().as_str(), Some("credit"));
1634    }
1635
1636    #[test]
1637    fn test_json_roundtrip_basic() {
1638        let json = r#"{"name":"alice","age":30,"active":true,"score":95.5}"#;
1639        let doc = TeaLeaf::from_json(json).unwrap();
1640
1641        assert_eq!(doc.get("name").unwrap().as_str(), Some("alice"));
1642        assert_eq!(doc.get("age").unwrap().as_int(), Some(30));
1643        assert_eq!(doc.get("active").unwrap().as_bool(), Some(true));
1644        assert_eq!(doc.get("score").unwrap().as_float(), Some(95.5));
1645
1646        // Round-trip back to JSON
1647        let json_out = doc.to_json().unwrap();
1648        assert!(json_out.contains("\"name\":\"alice\"") || json_out.contains("\"name\": \"alice\""));
1649    }
1650
1651    #[test]
1652    fn test_json_roundtrip_root_array() {
1653        // Root-level arrays should round-trip without wrapping
1654        let json = r#"[{"id":"0001","type":"donut","name":"Cake"},{"id":"0002","type":"donut","name":"Raised"}]"#;
1655        let doc = TeaLeaf::from_json(json).unwrap();
1656
1657        // Internally stored under "root" key
1658        let root = doc.get("root").unwrap();
1659        let arr = root.as_array().unwrap();
1660        assert_eq!(arr.len(), 2);
1661
1662        // Round-trip should produce the array directly, NOT {"root": [...]}
1663        let json_out = doc.to_json_compact().unwrap();
1664        assert!(json_out.starts_with('['), "Root array should serialize directly: {}", json_out);
1665        assert!(json_out.ends_with(']'), "Root array should end with ]: {}", json_out);
1666        assert!(!json_out.contains("\"root\""), "Should NOT wrap in root object: {}", json_out);
1667
1668        // Verify content preserved
1669        assert!(json_out.contains("\"id\":\"0001\"") || json_out.contains("\"id\": \"0001\""));
1670        assert!(json_out.contains("\"name\":\"Cake\"") || json_out.contains("\"name\": \"Cake\""));
1671    }
1672
1673    #[test]
1674    fn test_json_roundtrip_root_array_empty() {
1675        // Empty array should also round-trip correctly
1676        let json = r#"[]"#;
1677        let doc = TeaLeaf::from_json(json).unwrap();
1678
1679        let json_out = doc.to_json_compact().unwrap();
1680        assert_eq!(json_out, "[]", "Empty array should round-trip: {}", json_out);
1681    }
1682
1683    #[test]
1684    fn test_json_roundtrip_root_object_with_root_key() {
1685        // An object that happens to have a "root" key should NOT be confused
1686        let json = r#"{"root":[1,2,3],"other":"value"}"#;
1687        let doc = TeaLeaf::from_json(json).unwrap();
1688
1689        let json_out = doc.to_json_compact().unwrap();
1690        // This was a root object, so it should stay as an object
1691        assert!(json_out.starts_with('{'), "Root object should stay as object: {}", json_out);
1692        assert!(json_out.contains("\"root\""), "root key should be preserved: {}", json_out);
1693        assert!(json_out.contains("\"other\""), "other key should be preserved: {}", json_out);
1694    }
1695
1696    #[test]
1697    fn test_json_export_bytes() {
1698        // Create a document with bytes programmatically
1699        let mut entries = IndexMap::new();
1700        entries.insert("data".to_string(), Value::Bytes(vec![0xca, 0xfe, 0xf0, 0x0d]));
1701        let doc = TeaLeaf { data: entries, schemas: IndexMap::new(), unions: IndexMap::new(), is_root_array: false };
1702
1703        let json = doc.to_json().unwrap();
1704        assert!(json.contains("0xcafef00d"), "Bytes should export as hex string: {}", json);
1705    }
1706
1707    #[test]
1708    fn test_json_export_ref() {
1709        let mut entries = IndexMap::new();
1710        entries.insert("config".to_string(), Value::Ref("base_config".to_string()));
1711        let doc = TeaLeaf { data: entries, schemas: IndexMap::new(), unions: IndexMap::new(), is_root_array: false };
1712
1713        let json = doc.to_json().unwrap();
1714        assert!(json.contains("\"$ref\""), "Ref should export with $ref key: {}", json);
1715        assert!(json.contains("base_config"), "Ref name should be in output: {}", json);
1716    }
1717
1718    #[test]
1719    fn test_json_export_tagged() {
1720        let mut entries = IndexMap::new();
1721        entries.insert("status".to_string(), Value::Tagged("ok".to_string(), Box::new(Value::Int(200))));
1722        let doc = TeaLeaf { data: entries, schemas: IndexMap::new(), unions: IndexMap::new(), is_root_array: false };
1723
1724        let json = doc.to_json().unwrap();
1725        assert!(json.contains("\"$tag\""), "Tagged should export with $tag key: {}", json);
1726        assert!(json.contains("\"ok\""), "Tag name should be in output: {}", json);
1727        assert!(json.contains("\"$value\""), "Tagged should have $value key: {}", json);
1728    }
1729
1730    #[test]
1731    fn test_json_export_map() {
1732        let mut entries = IndexMap::new();
1733        entries.insert("lookup".to_string(), Value::Map(vec![
1734            (Value::Int(1), Value::String("one".to_string())),
1735            (Value::Int(2), Value::String("two".to_string())),
1736        ]));
1737        let doc = TeaLeaf { data: entries, schemas: IndexMap::new(), unions: IndexMap::new(), is_root_array: false };
1738
1739        let json = doc.to_json().unwrap();
1740        // Map exports as array of [key, value] pairs
1741        // Check that the structure contains the key and values (regardless of formatting)
1742        assert!(json.contains("\"lookup\""), "Map key should be in output: {}", json);
1743        assert!(json.contains("\"one\""), "Map values should be in output: {}", json);
1744        assert!(json.contains("\"two\""), "Map values should be in output: {}", json);
1745        // Verify it's an array structure (has nested arrays)
1746        let compact = json.replace(" ", "").replace("\n", "");
1747        assert!(compact.contains("[["), "Map should export as nested array: {}", json);
1748    }
1749
1750    #[test]
1751    fn test_json_export_timestamp() {
1752        let mut entries = IndexMap::new();
1753        // 2024-01-15T10:30:00Z = 1705315800000 ms, but let's verify with a known value
1754        // Use 0 = 1970-01-01T00:00:00Z for simplicity
1755        entries.insert("created".to_string(), Value::Timestamp(0, 0));
1756        let doc = TeaLeaf { data: entries, schemas: IndexMap::new(), unions: IndexMap::new(), is_root_array: false };
1757
1758        let json = doc.to_json().unwrap();
1759        assert!(json.contains("1970-01-01"), "Timestamp should export as ISO 8601 date: {}", json);
1760        assert!(json.contains("00:00:00"), "Timestamp time should be epoch: {}", json);
1761    }
1762
1763    #[test]
1764    fn test_json_import_limitation_ref_becomes_object() {
1765        // JSON with $ref pattern should become a plain object, NOT a Ref value
1766        let json = r#"{"config":{"$ref":"base_config"}}"#;
1767        let doc = TeaLeaf::from_json(json).unwrap();
1768
1769        let config = doc.get("config").unwrap();
1770        // This should be an Object, not a Ref
1771        assert!(config.as_object().is_some(), "JSON $ref should become Object, not Ref");
1772        assert!(config.as_ref_name().is_none(), "JSON $ref should NOT become Ref value");
1773    }
1774
1775    #[test]
1776    fn test_json_import_limitation_tagged_becomes_object() {
1777        // JSON with $tag/$value pattern should become a plain object, NOT a Tagged value
1778        let json = r#"{"status":{"$tag":"ok","$value":200}}"#;
1779        let doc = TeaLeaf::from_json(json).unwrap();
1780
1781        let status = doc.get("status").unwrap();
1782        // This should be an Object, not a Tagged
1783        assert!(status.as_object().is_some(), "JSON $tag should become Object, not Tagged");
1784        assert!(status.as_tagged().is_none(), "JSON $tag should NOT become Tagged value");
1785    }
1786
1787    #[test]
1788    fn test_json_import_limitation_timestamp_becomes_string() {
1789        // ISO 8601 strings in JSON should remain strings, NOT become Timestamp
1790        let json = r#"{"created":"2024-01-15T10:30:00Z"}"#;
1791        let doc = TeaLeaf::from_json(json).unwrap();
1792
1793        let created = doc.get("created").unwrap();
1794        // This should be a String, not a Timestamp
1795        assert!(created.as_str().is_some(), "ISO timestamp string should remain String");
1796        assert!(created.as_timestamp_millis().is_none(), "ISO timestamp should NOT become Timestamp value");
1797    }
1798
1799    // =========================================================================
1800    // JSON ↔ Binary Conversion Tests
1801    // =========================================================================
1802
1803    #[test]
1804    fn test_json_to_binary_roundtrip_primitives() {
1805        use tempfile::NamedTempFile;
1806
1807        let json = r#"{"name":"alice","age":30,"score":95.5,"active":true,"nothing":null}"#;
1808        let doc = TeaLeaf::from_json(json).unwrap();
1809
1810        // Compile to binary
1811        let temp = NamedTempFile::new().unwrap();
1812        let path = temp.path();
1813        doc.compile(path, false).unwrap();
1814
1815        // Read back
1816        let reader = Reader::open(path).unwrap();
1817        assert_eq!(reader.get("name").unwrap().as_str(), Some("alice"));
1818        assert_eq!(reader.get("age").unwrap().as_int(), Some(30));
1819        assert_eq!(reader.get("score").unwrap().as_float(), Some(95.5));
1820        assert_eq!(reader.get("active").unwrap().as_bool(), Some(true));
1821        assert!(reader.get("nothing").unwrap().is_null());
1822    }
1823
1824    #[test]
1825    fn test_json_to_binary_roundtrip_arrays() {
1826        use tempfile::NamedTempFile;
1827
1828        let json = r#"{"numbers":[1,2,3,4,5],"names":["alice","bob","charlie"]}"#;
1829        let doc = TeaLeaf::from_json(json).unwrap();
1830
1831        let temp = NamedTempFile::new().unwrap();
1832        doc.compile(temp.path(), false).unwrap();
1833
1834        let reader = Reader::open(temp.path()).unwrap();
1835
1836        let numbers = reader.get("numbers").unwrap();
1837        let arr = numbers.as_array().unwrap();
1838        assert_eq!(arr.len(), 5);
1839        assert_eq!(arr[0].as_int(), Some(1));
1840        assert_eq!(arr[4].as_int(), Some(5));
1841
1842        let names = reader.get("names").unwrap();
1843        let arr = names.as_array().unwrap();
1844        assert_eq!(arr.len(), 3);
1845        assert_eq!(arr[0].as_str(), Some("alice"));
1846    }
1847
1848    #[test]
1849    fn test_json_to_binary_roundtrip_nested_objects() {
1850        use tempfile::NamedTempFile;
1851
1852        let json = r#"{"user":{"name":"alice","profile":{"bio":"dev","settings":{"theme":"dark"}}}}"#;
1853        let doc = TeaLeaf::from_json(json).unwrap();
1854
1855        let temp = NamedTempFile::new().unwrap();
1856        doc.compile(temp.path(), false).unwrap();
1857
1858        let reader = Reader::open(temp.path()).unwrap();
1859        let user = reader.get("user").unwrap();
1860        let user_obj = user.as_object().unwrap();
1861        assert_eq!(user_obj.get("name").unwrap().as_str(), Some("alice"));
1862
1863        let profile = user_obj.get("profile").unwrap().as_object().unwrap();
1864        assert_eq!(profile.get("bio").unwrap().as_str(), Some("dev"));
1865
1866        let settings = profile.get("settings").unwrap().as_object().unwrap();
1867        assert_eq!(settings.get("theme").unwrap().as_str(), Some("dark"));
1868    }
1869
1870    #[test]
1871    fn test_json_to_binary_with_compression() {
1872        use tempfile::NamedTempFile;
1873
1874        // Create a document with repetitive data to test compression
1875        let mut entries = IndexMap::new();
1876        entries.insert("data".to_string(), Value::String("a".repeat(1000)));
1877        entries.insert("count".to_string(), Value::Int(12345));
1878        let doc = TeaLeaf { data: entries, schemas: IndexMap::new(), unions: IndexMap::new(), is_root_array: false };
1879
1880        let temp = NamedTempFile::new().unwrap();
1881        doc.compile(temp.path(), true).unwrap(); // compressed
1882
1883        let reader = Reader::open(temp.path()).unwrap();
1884        assert_eq!(reader.get("data").unwrap().as_str(), Some("a".repeat(1000).as_str()));
1885        assert_eq!(reader.get("count").unwrap().as_int(), Some(12345));
1886    }
1887
1888    #[test]
1889    fn test_tl_to_binary_preserves_ref() {
1890        use tempfile::NamedTempFile;
1891
1892        let mut entries = IndexMap::new();
1893        entries.insert("base".to_string(), Value::Object(vec![
1894            ("host".to_string(), Value::String("localhost".to_string())),
1895        ].into_iter().collect()));
1896        entries.insert("config".to_string(), Value::Ref("base".to_string()));
1897        let doc = TeaLeaf { data: entries, schemas: IndexMap::new(), unions: IndexMap::new(), is_root_array: false };
1898
1899        let temp = NamedTempFile::new().unwrap();
1900        doc.compile(temp.path(), false).unwrap();
1901
1902        let reader = Reader::open(temp.path()).unwrap();
1903        let config = reader.get("config").unwrap();
1904        assert_eq!(config.as_ref_name(), Some("base"));
1905    }
1906
1907    #[test]
1908    fn test_tl_to_binary_preserves_tagged() {
1909        use tempfile::NamedTempFile;
1910
1911        let mut entries = IndexMap::new();
1912        entries.insert("status".to_string(), Value::Tagged("ok".to_string(), Box::new(Value::Int(200))));
1913        let doc = TeaLeaf { data: entries, schemas: IndexMap::new(), unions: IndexMap::new(), is_root_array: false };
1914
1915        let temp = NamedTempFile::new().unwrap();
1916        doc.compile(temp.path(), false).unwrap();
1917
1918        let reader = Reader::open(temp.path()).unwrap();
1919        let status = reader.get("status").unwrap();
1920        let (tag, value) = status.as_tagged().unwrap();
1921        assert_eq!(tag, "ok");
1922        assert_eq!(value.as_int(), Some(200));
1923    }
1924
1925    #[test]
1926    fn test_tl_to_binary_preserves_map() {
1927        use tempfile::NamedTempFile;
1928
1929        let mut entries = IndexMap::new();
1930        entries.insert("lookup".to_string(), Value::Map(vec![
1931            (Value::Int(1), Value::String("one".to_string())),
1932            (Value::Int(2), Value::String("two".to_string())),
1933        ]));
1934        let doc = TeaLeaf { data: entries, schemas: IndexMap::new(), unions: IndexMap::new(), is_root_array: false };
1935
1936        let temp = NamedTempFile::new().unwrap();
1937        doc.compile(temp.path(), false).unwrap();
1938
1939        let reader = Reader::open(temp.path()).unwrap();
1940        let lookup = reader.get("lookup").unwrap();
1941        let map = lookup.as_map().unwrap();
1942        assert_eq!(map.len(), 2);
1943        assert_eq!(map[0].0.as_int(), Some(1));
1944        assert_eq!(map[0].1.as_str(), Some("one"));
1945    }
1946
1947    #[test]
1948    fn test_tl_to_binary_preserves_bytes() {
1949        use tempfile::NamedTempFile;
1950
1951        let mut entries = IndexMap::new();
1952        entries.insert("data".to_string(), Value::Bytes(vec![0xca, 0xfe, 0xf0, 0x0d]));
1953        let doc = TeaLeaf { data: entries, schemas: IndexMap::new(), unions: IndexMap::new(), is_root_array: false };
1954
1955        let temp = NamedTempFile::new().unwrap();
1956        doc.compile(temp.path(), false).unwrap();
1957
1958        let reader = Reader::open(temp.path()).unwrap();
1959        let data = reader.get("data").unwrap();
1960        assert_eq!(data.as_bytes(), Some(vec![0xca, 0xfe, 0xf0, 0x0d].as_slice()));
1961    }
1962
1963    #[test]
1964    fn test_tl_to_binary_preserves_timestamp() {
1965        use tempfile::NamedTempFile;
1966
1967        let mut entries = IndexMap::new();
1968        entries.insert("created".to_string(), Value::Timestamp(1705315800000, 0)); // 2024-01-15T10:30:00Z
1969        let doc = TeaLeaf { data: entries, schemas: IndexMap::new(), unions: IndexMap::new(), is_root_array: false };
1970
1971        let temp = NamedTempFile::new().unwrap();
1972        doc.compile(temp.path(), false).unwrap();
1973
1974        let reader = Reader::open(temp.path()).unwrap();
1975        let created = reader.get("created").unwrap();
1976        assert_eq!(created.as_timestamp_millis(), Some(1705315800000));
1977    }
1978
1979    #[test]
1980    fn test_json_import_limitation_hex_string_remains_string() {
1981        // Hex strings in JSON should remain strings, NOT become Bytes
1982        let json = r#"{"data":"0xcafef00d"}"#;
1983        let doc = TeaLeaf::from_json(json).unwrap();
1984
1985        let data = doc.get("data").unwrap();
1986        // This should be a String, not Bytes
1987        assert!(data.as_str().is_some(), "Hex string should remain String");
1988        assert_eq!(data.as_str(), Some("0xcafef00d"));
1989        assert!(data.as_bytes().is_none(), "Hex string should NOT become Bytes value");
1990    }
1991
1992    #[test]
1993    fn test_json_import_limitation_array_pairs_remain_array() {
1994        // JSON arrays that look like map pairs should remain arrays, NOT become Maps
1995        let json = r#"{"lookup":[[1,"one"],[2,"two"]]}"#;
1996        let doc = TeaLeaf::from_json(json).unwrap();
1997
1998        let lookup = doc.get("lookup").unwrap();
1999        // This should be an Array, not a Map
2000        assert!(lookup.as_array().is_some(), "Array of pairs should remain Array");
2001        assert!(lookup.as_map().is_none(), "Array of pairs should NOT become Map value");
2002
2003        // Verify structure
2004        let arr = lookup.as_array().unwrap();
2005        assert_eq!(arr.len(), 2);
2006        let first_pair = arr[0].as_array().unwrap();
2007        assert_eq!(first_pair[0].as_int(), Some(1));
2008        assert_eq!(first_pair[1].as_str(), Some("one"));
2009    }
2010
2011    // =========================================================================
2012    // Cross-Language Parity Test
2013    // =========================================================================
2014
2015    #[test]
2016    fn test_cross_language_parity_all_types() {
2017        // This test verifies that Rust JSON export matches expected format
2018        // for ALL special types. The same fixture is tested in .NET.
2019
2020        use tempfile::NamedTempFile;
2021
2022        // Create a document with all special types
2023        let mut data = IndexMap::new();
2024        data.insert("null_val".to_string(), Value::Null);
2025        data.insert("bool_true".to_string(), Value::Bool(true));
2026        data.insert("int_val".to_string(), Value::Int(42));
2027        data.insert("float_val".to_string(), Value::Float(3.14159));
2028        data.insert("string_val".to_string(), Value::String("hello".to_string()));
2029        data.insert("bytes_val".to_string(), Value::Bytes(vec![0xca, 0xfe]));
2030        data.insert("timestamp_val".to_string(), Value::Timestamp(0, 0));
2031        data.insert("array_val".to_string(), Value::Array(vec![Value::Int(1), Value::Int(2)]));
2032        data.insert("object_val".to_string(), Value::Object(
2033            vec![("x".to_string(), Value::Int(1))].into_iter().collect()
2034        ));
2035        data.insert("ref_val".to_string(), Value::Ref("object_val".to_string()));
2036        data.insert("tagged_val".to_string(), Value::Tagged("ok".to_string(), Box::new(Value::Int(200))));
2037        data.insert("map_val".to_string(), Value::Map(vec![
2038            (Value::Int(1), Value::String("one".to_string())),
2039        ]));
2040
2041        let doc = TeaLeaf { data, schemas: IndexMap::new(), unions: IndexMap::new(), is_root_array: false };
2042
2043        // Compile to binary and read back
2044        let temp = NamedTempFile::new().unwrap();
2045        doc.compile(temp.path(), false).unwrap();
2046        let reader = Reader::open(temp.path()).unwrap();
2047
2048        // Verify each type survives binary round-trip
2049        assert!(reader.get("null_val").unwrap().is_null());
2050        assert_eq!(reader.get("bool_true").unwrap().as_bool(), Some(true));
2051        assert_eq!(reader.get("int_val").unwrap().as_int(), Some(42));
2052        assert_eq!(reader.get("float_val").unwrap().as_float(), Some(3.14159));
2053        assert_eq!(reader.get("string_val").unwrap().as_str(), Some("hello"));
2054        assert_eq!(reader.get("bytes_val").unwrap().as_bytes(), Some(&[0xca, 0xfe][..]));
2055        assert_eq!(reader.get("timestamp_val").unwrap().as_timestamp_millis(), Some(0));
2056
2057        let arr = reader.get("array_val").unwrap();
2058        assert_eq!(arr.as_array().unwrap().len(), 2);
2059
2060        let obj = reader.get("object_val").unwrap();
2061        assert!(obj.as_object().is_some());
2062
2063        let ref_val = reader.get("ref_val").unwrap();
2064        assert_eq!(ref_val.as_ref_name(), Some("object_val"));
2065
2066        let tagged = reader.get("tagged_val").unwrap();
2067        let (tag, val) = tagged.as_tagged().unwrap();
2068        assert_eq!(tag, "ok");
2069        assert_eq!(val.as_int(), Some(200));
2070
2071        let map = reader.get("map_val").unwrap();
2072        let pairs = map.as_map().unwrap();
2073        assert_eq!(pairs.len(), 1);
2074
2075        // Verify JSON export format matches expected conventions
2076        let json = doc.to_json().unwrap();
2077
2078        // Bytes should be hex string
2079        assert!(json.contains("0xcafe"), "Bytes should export as hex: {}", json);
2080
2081        // Ref should have $ref key
2082        assert!(json.contains("\"$ref\""), "Ref should have $ref key: {}", json);
2083
2084        // Tagged should have $tag and $value
2085        assert!(json.contains("\"$tag\""), "Tagged should have $tag: {}", json);
2086        assert!(json.contains("\"$value\""), "Tagged should have $value: {}", json);
2087
2088        // Map should be array of pairs (nested arrays)
2089        let compact = json.replace(" ", "").replace("\n", "");
2090        assert!(compact.contains("[["), "Map should export as array of pairs: {}", json);
2091
2092        // Timestamp should be ISO 8601
2093        assert!(json.contains("1970-01-01"), "Timestamp should be ISO 8601: {}", json);
2094    }
2095
2096    // =========================================================================
2097    // JSON Conversion Contract Tests
2098    // =========================================================================
2099    // These tests lock down the exact JSON↔TeaLeaf conversion behavior.
2100    // STABILITY POLICY:
2101    // - Plain JSON roundtrip: MUST be lossless for primitives, arrays, objects
2102    // - TeaLeaf→JSON: Special types have FIXED representations that MUST NOT change
2103    // - JSON→TeaLeaf: No magic parsing; $ref/$tag/hex/ISO8601 stay as plain JSON
2104
2105    mod conversion_contracts {
2106        use super::*;
2107
2108        // --- Plain JSON Roundtrip (STABLE) ---
2109
2110        #[test]
2111        fn contract_null_roundtrip() {
2112            let doc = TeaLeaf::from_json("null").unwrap();
2113            assert!(matches!(doc.get("root").unwrap(), Value::Null));
2114        }
2115
2116        #[test]
2117        fn contract_bool_roundtrip() {
2118            let doc = TeaLeaf::from_json(r#"{"t": true, "f": false}"#).unwrap();
2119            assert_eq!(doc.get("t").unwrap().as_bool(), Some(true));
2120            assert_eq!(doc.get("f").unwrap().as_bool(), Some(false));
2121
2122            let json = doc.to_json_compact().unwrap();
2123            assert!(json.contains("true"));
2124            assert!(json.contains("false"));
2125        }
2126
2127        #[test]
2128        fn contract_integer_roundtrip() {
2129            let doc = TeaLeaf::from_json(r#"{"zero": 0, "pos": 42, "neg": -123}"#).unwrap();
2130            assert_eq!(doc.get("zero").unwrap().as_int(), Some(0));
2131            assert_eq!(doc.get("pos").unwrap().as_int(), Some(42));
2132            assert_eq!(doc.get("neg").unwrap().as_int(), Some(-123));
2133        }
2134
2135        #[test]
2136        fn contract_float_roundtrip() {
2137            let doc = TeaLeaf::from_json(r#"{"pi": 3.14159}"#).unwrap();
2138            let pi = doc.get("pi").unwrap().as_float().unwrap();
2139            assert!((pi - 3.14159).abs() < 0.00001);
2140        }
2141
2142        #[test]
2143        fn contract_string_roundtrip() {
2144            let doc = TeaLeaf::from_json(r#"{"s": "hello world", "u": "日本語", "e": ""}"#).unwrap();
2145            assert_eq!(doc.get("s").unwrap().as_str(), Some("hello world"));
2146            assert_eq!(doc.get("u").unwrap().as_str(), Some("日本語"));
2147            assert_eq!(doc.get("e").unwrap().as_str(), Some(""));
2148        }
2149
2150        #[test]
2151        fn contract_array_roundtrip() {
2152            let doc = TeaLeaf::from_json(r#"{"arr": [1, "two", true, null]}"#).unwrap();
2153            let arr = doc.get("arr").unwrap().as_array().unwrap();
2154            assert_eq!(arr.len(), 4);
2155            assert_eq!(arr[0].as_int(), Some(1));
2156            assert_eq!(arr[1].as_str(), Some("two"));
2157            assert_eq!(arr[2].as_bool(), Some(true));
2158            assert!(matches!(arr[3], Value::Null));
2159        }
2160
2161        #[test]
2162        fn contract_nested_array_roundtrip() {
2163            let doc = TeaLeaf::from_json(r#"{"matrix": [[1, 2], [3, 4]]}"#).unwrap();
2164            let matrix = doc.get("matrix").unwrap().as_array().unwrap();
2165            assert_eq!(matrix.len(), 2);
2166            let row0 = matrix[0].as_array().unwrap();
2167            assert_eq!(row0[0].as_int(), Some(1));
2168            assert_eq!(row0[1].as_int(), Some(2));
2169        }
2170
2171        #[test]
2172        fn contract_object_roundtrip() {
2173            let doc = TeaLeaf::from_json(r#"{"user": {"name": "alice", "age": 30}}"#).unwrap();
2174            let user = doc.get("user").unwrap().as_object().unwrap();
2175            assert_eq!(user.get("name").unwrap().as_str(), Some("alice"));
2176            assert_eq!(user.get("age").unwrap().as_int(), Some(30));
2177        }
2178
2179        // --- TeaLeaf→JSON Fixed Representations (STABLE) ---
2180
2181        #[test]
2182        fn contract_bytes_to_json_hex() {
2183            let mut data = IndexMap::new();
2184            data.insert("b".to_string(), Value::Bytes(vec![0xca, 0xfe, 0xba, 0xbe]));
2185            let doc = TeaLeaf { data, schemas: IndexMap::new(), unions: IndexMap::new(), is_root_array: false };
2186
2187            let json = doc.to_json_compact().unwrap();
2188            // CONTRACT: Bytes serialize as lowercase hex with 0x prefix
2189            assert!(json.contains("\"0xcafebabe\""), "Bytes must be 0x-prefixed hex: {}", json);
2190        }
2191
2192        #[test]
2193        fn contract_bytes_empty_to_json() {
2194            let mut data = IndexMap::new();
2195            data.insert("b".to_string(), Value::Bytes(vec![]));
2196            let doc = TeaLeaf { data, schemas: IndexMap::new(), unions: IndexMap::new(), is_root_array: false };
2197
2198            let json = doc.to_json_compact().unwrap();
2199            // CONTRACT: Empty bytes serialize as "0x"
2200            assert!(json.contains("\"0x\""), "Empty bytes must be \"0x\": {}", json);
2201        }
2202
2203        #[test]
2204        fn contract_timestamp_to_json_iso8601() {
2205            let mut data = IndexMap::new();
2206            // 2024-01-15T10:50:00.123Z (verified milliseconds since epoch)
2207            data.insert("ts".to_string(), Value::Timestamp(1705315800123, 0));
2208            let doc = TeaLeaf { data, schemas: IndexMap::new(), unions: IndexMap::new(), is_root_array: false };
2209
2210            let json = doc.to_json_compact().unwrap();
2211            // CONTRACT: Timestamp serializes as ISO 8601 with milliseconds
2212            assert!(json.contains("2024-01-15T10:50:00.123Z"),
2213                "Timestamp must be ISO 8601 with ms: {}", json);
2214        }
2215
2216        #[test]
2217        fn contract_timestamp_epoch_to_json() {
2218            let mut data = IndexMap::new();
2219            data.insert("ts".to_string(), Value::Timestamp(0, 0));
2220            let doc = TeaLeaf { data, schemas: IndexMap::new(), unions: IndexMap::new(), is_root_array: false };
2221
2222            let json = doc.to_json_compact().unwrap();
2223            // CONTRACT: Unix epoch is 1970-01-01T00:00:00Z (no ms for whole seconds)
2224            assert!(json.contains("1970-01-01T00:00:00Z"),
2225                "Epoch must be 1970-01-01T00:00:00Z: {}", json);
2226        }
2227
2228        #[test]
2229        fn contract_ref_to_json() {
2230            let mut data = IndexMap::new();
2231            data.insert("r".to_string(), Value::Ref("target_key".to_string()));
2232            let doc = TeaLeaf { data, schemas: IndexMap::new(), unions: IndexMap::new(), is_root_array: false };
2233
2234            let json = doc.to_json_compact().unwrap();
2235            // CONTRACT: Ref serializes as {"$ref": "name"}
2236            assert!(json.contains("\"$ref\":\"target_key\"") || json.contains("\"$ref\": \"target_key\""),
2237                "Ref must be {{\"$ref\": \"name\"}}: {}", json);
2238        }
2239
2240        #[test]
2241        fn contract_tagged_to_json() {
2242            let mut data = IndexMap::new();
2243            data.insert("t".to_string(), Value::Tagged("ok".to_string(), Box::new(Value::Int(200))));
2244            let doc = TeaLeaf { data, schemas: IndexMap::new(), unions: IndexMap::new(), is_root_array: false };
2245
2246            let json = doc.to_json_compact().unwrap();
2247            // CONTRACT: Tagged serializes with $tag and $value keys
2248            assert!(json.contains("\"$tag\""), "Tagged must have $tag: {}", json);
2249            assert!(json.contains("\"ok\""), "Tag name must be present: {}", json);
2250            assert!(json.contains("\"$value\""), "Tagged must have $value: {}", json);
2251            assert!(json.contains("200"), "Inner value must be present: {}", json);
2252        }
2253
2254        #[test]
2255        fn contract_tagged_null_value_to_json() {
2256            let mut data = IndexMap::new();
2257            data.insert("t".to_string(), Value::Tagged("none".to_string(), Box::new(Value::Null)));
2258            let doc = TeaLeaf { data, schemas: IndexMap::new(), unions: IndexMap::new(), is_root_array: false };
2259
2260            let json = doc.to_json_compact().unwrap();
2261            // CONTRACT: Tagged with null inner still has $value: null
2262            assert!(json.contains("\"$value\":null") || json.contains("\"$value\": null"),
2263                "Tagged with null must have $value:null: {}", json);
2264        }
2265
2266        #[test]
2267        fn contract_map_to_json_pairs() {
2268            let mut data = IndexMap::new();
2269            data.insert("m".to_string(), Value::Map(vec![
2270                (Value::Int(1), Value::String("one".to_string())),
2271                (Value::Int(2), Value::String("two".to_string())),
2272            ]));
2273            let doc = TeaLeaf { data, schemas: IndexMap::new(), unions: IndexMap::new(), is_root_array: false };
2274
2275            let json = doc.to_json_compact().unwrap();
2276            // CONTRACT: Map serializes as array of [key, value] pairs
2277            assert!(json.contains("[[1,\"one\"],[2,\"two\"]]") ||
2278                    json.contains("[[1, \"one\"], [2, \"two\"]]"),
2279                "Map must be [[k,v],...]: {}", json);
2280        }
2281
2282        #[test]
2283        fn contract_map_empty_to_json() {
2284            let mut data = IndexMap::new();
2285            data.insert("m".to_string(), Value::Map(vec![]));
2286            let doc = TeaLeaf { data, schemas: IndexMap::new(), unions: IndexMap::new(), is_root_array: false };
2287
2288            let json = doc.to_json_compact().unwrap();
2289            // CONTRACT: Empty map serializes as empty array
2290            assert!(json.contains("\"m\":[]") || json.contains("\"m\": []"),
2291                "Empty map must be []: {}", json);
2292        }
2293
2294        // --- JSON→TeaLeaf No Magic (STABLE) ---
2295
2296        #[test]
2297        fn contract_json_dollar_ref_stays_object() {
2298            let doc = TeaLeaf::from_json(r#"{"x": {"$ref": "some_key"}}"#).unwrap();
2299            let x = doc.get("x").unwrap();
2300            // CONTRACT: JSON {"$ref": ...} MUST remain Object, NOT become Ref
2301            assert!(x.as_object().is_some(), "$ref in JSON must stay Object, not become Ref");
2302            assert!(x.as_ref_name().is_none(), "$ref must not auto-convert to Ref type");
2303        }
2304
2305        #[test]
2306        fn contract_json_dollar_tag_stays_object() {
2307            let doc = TeaLeaf::from_json(r#"{"x": {"$tag": "ok", "$value": 200}}"#).unwrap();
2308            let x = doc.get("x").unwrap();
2309            // CONTRACT: JSON {"$tag": ..., "$value": ...} MUST remain Object
2310            assert!(x.as_object().is_some(), "$tag in JSON must stay Object, not become Tagged");
2311            assert!(x.as_tagged().is_none(), "$tag must not auto-convert to Tagged type");
2312        }
2313
2314        #[test]
2315        fn contract_json_hex_string_stays_string() {
2316            let doc = TeaLeaf::from_json(r#"{"x": "0xcafef00d"}"#).unwrap();
2317            let x = doc.get("x").unwrap();
2318            // CONTRACT: Hex strings MUST remain String, NOT become Bytes
2319            assert_eq!(x.as_str(), Some("0xcafef00d"));
2320            assert!(x.as_bytes().is_none(), "Hex string must not auto-convert to Bytes");
2321        }
2322
2323        #[test]
2324        fn contract_json_iso_timestamp_stays_string() {
2325            let doc = TeaLeaf::from_json(r#"{"x": "2024-01-15T10:30:00.000Z"}"#).unwrap();
2326            let x = doc.get("x").unwrap();
2327            // CONTRACT: ISO 8601 strings MUST remain String, NOT become Timestamp
2328            assert_eq!(x.as_str(), Some("2024-01-15T10:30:00.000Z"));
2329            assert!(x.as_timestamp_millis().is_none(), "ISO string must not auto-convert to Timestamp");
2330        }
2331
2332        #[test]
2333        fn contract_json_array_pairs_stays_array() {
2334            let doc = TeaLeaf::from_json(r#"{"x": [[1, "one"], [2, "two"]]}"#).unwrap();
2335            let x = doc.get("x").unwrap();
2336            // CONTRACT: Array of pairs MUST remain Array, NOT become Map
2337            assert!(x.as_array().is_some(), "Array of pairs must stay Array, not become Map");
2338            assert!(x.as_map().is_none(), "Array pairs must not auto-convert to Map");
2339        }
2340
2341        // --- Number Type Inference (STABLE) ---
2342
2343        #[test]
2344        fn contract_number_integer_to_int() {
2345            let doc = TeaLeaf::from_json(r#"{"n": 42}"#).unwrap();
2346            // CONTRACT: Integers that fit i64 become Int
2347            assert!(doc.get("n").unwrap().as_int().is_some());
2348        }
2349
2350        #[test]
2351        fn contract_number_large_to_uint() {
2352            // Max u64 = 18446744073709551615, which doesn't fit i64
2353            let doc = TeaLeaf::from_json(r#"{"n": 18446744073709551615}"#).unwrap();
2354            // CONTRACT: Large positive integers that fit u64 become UInt
2355            assert!(doc.get("n").unwrap().as_uint().is_some());
2356        }
2357
2358        #[test]
2359        fn contract_number_decimal_to_float() {
2360            let doc = TeaLeaf::from_json(r#"{"n": 3.14}"#).unwrap();
2361            // CONTRACT: Numbers with decimals become Float
2362            assert!(doc.get("n").unwrap().as_float().is_some());
2363        }
2364
2365        // --- Edge Cases (STABLE) ---
2366
2367        #[test]
2368        fn contract_float_nan_to_null() {
2369            let mut data = IndexMap::new();
2370            data.insert("f".to_string(), Value::Float(f64::NAN));
2371            let doc = TeaLeaf { data, schemas: IndexMap::new(), unions: IndexMap::new(), is_root_array: false };
2372
2373            let json = doc.to_json_compact().unwrap();
2374            // CONTRACT: NaN serializes as null (JSON has no NaN)
2375            assert!(json.contains("null"), "NaN must serialize as null: {}", json);
2376        }
2377
2378        #[test]
2379        fn contract_float_infinity_to_null() {
2380            let mut data = IndexMap::new();
2381            data.insert("f".to_string(), Value::Float(f64::INFINITY));
2382            let doc = TeaLeaf { data, schemas: IndexMap::new(), unions: IndexMap::new(), is_root_array: false };
2383
2384            let json = doc.to_json_compact().unwrap();
2385            // CONTRACT: Infinity serializes as null (JSON has no Infinity)
2386            assert!(json.contains("null"), "Infinity must serialize as null: {}", json);
2387        }
2388
2389        #[test]
2390        fn contract_deep_nesting_preserved() {
2391            let doc = TeaLeaf::from_json(r#"{"a":{"b":{"c":{"d":{"e":5}}}}}"#).unwrap();
2392            let a = doc.get("a").unwrap().as_object().unwrap();
2393            let b = a.get("b").unwrap().as_object().unwrap();
2394            let c = b.get("c").unwrap().as_object().unwrap();
2395            let d = c.get("d").unwrap().as_object().unwrap();
2396            assert_eq!(d.get("e").unwrap().as_int(), Some(5));
2397        }
2398    }
2399
2400    // =========================================================================
2401    // Schema Inference Tests
2402    // =========================================================================
2403
2404    #[test]
2405    fn test_schema_inference_simple_array() {
2406        let json = r#"{"users": [{"name": "alice", "age": 30}, {"name": "bob", "age": 25}]}"#;
2407        let doc = TeaLeaf::from_json_with_schemas(json).unwrap();
2408
2409        // Should have inferred a "user" schema
2410        let schema = doc.schema("user");
2411        assert!(schema.is_some(), "Should infer 'user' schema from 'users' array");
2412
2413        let schema = schema.unwrap();
2414        assert_eq!(schema.fields.len(), 2);
2415
2416        // Fields should preserve insertion order from JSON
2417        assert_eq!(schema.fields[0].name, "name");
2418        assert_eq!(schema.fields[1].name, "age");
2419
2420        // Data should still be accessible
2421        let users = doc.get("users").unwrap().as_array().unwrap();
2422        assert_eq!(users.len(), 2);
2423        assert_eq!(users[0].as_object().unwrap().get("name").unwrap().as_str(), Some("alice"));
2424    }
2425
2426    #[test]
2427    fn test_schema_inference_nested_arrays() {
2428        let json = r#"{
2429            "orders": [
2430                {"id": 1, "items": [{"sku": "A", "qty": 2}, {"sku": "B", "qty": 1}]},
2431                {"id": 2, "items": [{"sku": "C", "qty": 3}]}
2432            ]
2433        }"#;
2434        let doc = TeaLeaf::from_json_with_schemas(json).unwrap();
2435
2436        // Should infer both "order" and "item" schemas
2437        assert!(doc.schema("order").is_some(), "Should infer 'order' schema");
2438        assert!(doc.schema("item").is_some(), "Should infer 'item' schema");
2439
2440        let order_schema = doc.schema("order").unwrap();
2441        // Order should have "id" and "items" fields
2442        assert!(order_schema.fields.iter().any(|f| f.name == "id"));
2443        assert!(order_schema.fields.iter().any(|f| f.name == "items"));
2444
2445        // The "items" field should reference the "item" schema
2446        let items_field = order_schema.fields.iter().find(|f| f.name == "items").unwrap();
2447        assert!(items_field.field_type.is_array);
2448        assert_eq!(items_field.field_type.base, "item");
2449    }
2450
2451    #[test]
2452    fn test_schema_inference_to_tl_text() {
2453        let json = r#"{"products": [{"name": "Widget", "price": 9.99}, {"name": "Gadget", "price": 19.99}]}"#;
2454        let doc = TeaLeaf::from_json_with_schemas(json).unwrap();
2455
2456        let tl_text = doc.to_tl_with_schemas();
2457
2458        // Should contain struct definition
2459        assert!(tl_text.contains("@struct product"), "TeaLeaf text should contain struct definition");
2460        assert!(tl_text.contains("name: string"), "Struct should have name field");
2461        assert!(tl_text.contains("price: float"), "Struct should have price field");
2462
2463        // Should contain @table directive
2464        assert!(tl_text.contains("@table product"), "TeaLeaf text should use @table for data");
2465
2466        // Should contain tuple format data
2467        assert!(tl_text.contains("Widget") || tl_text.contains("\"Widget\""), "Data should be present");
2468    }
2469
2470    #[test]
2471    fn test_schema_inference_roundtrip() {
2472        let json = r#"{"items": [{"id": 1, "name": "A"}, {"id": 2, "name": "B"}]}"#;
2473        let doc = TeaLeaf::from_json_with_schemas(json).unwrap();
2474
2475        // Convert to TeaLeaf text with schemas
2476        let tl_text = doc.to_tl_with_schemas();
2477
2478        // Parse the TeaLeaf text back
2479        let parsed = TeaLeaf::parse(&tl_text).unwrap();
2480
2481        // Should have the same data
2482        let items = parsed.get("items").unwrap().as_array().unwrap();
2483        assert_eq!(items.len(), 2);
2484        assert_eq!(items[0].as_object().unwrap().get("id").unwrap().as_int(), Some(1));
2485        assert_eq!(items[0].as_object().unwrap().get("name").unwrap().as_str(), Some("A"));
2486
2487        // Should have the schema
2488        assert!(parsed.schema("item").is_some());
2489    }
2490
2491    #[test]
2492    fn test_schema_inference_nullable_fields() {
2493        let json = r#"{"users": [{"name": "alice", "email": "a@test.com"}, {"name": "bob", "email": null}]}"#;
2494        let doc = TeaLeaf::from_json_with_schemas(json).unwrap();
2495
2496        let schema = doc.schema("user").unwrap();
2497        let email_field = schema.fields.iter().find(|f| f.name == "email").unwrap();
2498
2499        // Email should be nullable since one value is null
2500        assert!(email_field.field_type.nullable, "Field with null values should be nullable");
2501    }
2502
2503    #[test]
2504    fn test_schema_inference_nested_tuples_no_redundant_table() {
2505        let json = r#"{
2506            "orders": [
2507                {"id": 1, "items": [{"sku": "A", "qty": 2}]}
2508            ]
2509        }"#;
2510        let doc = TeaLeaf::from_json_with_schemas(json).unwrap();
2511        let tl_text = doc.to_tl_with_schemas();
2512
2513        // Count occurrences of @table - should only appear at top level for each schema-typed array
2514        let _table_count = tl_text.matches("@table").count();
2515
2516        // Should have @table for orders, but NOT redundant @table for nested items
2517        // The nested items array should just be [...] with tuples inside
2518        assert!(tl_text.contains("@table order"), "Should have @table for orders");
2519
2520        // Parse and verify the structure is correct
2521        let parsed = TeaLeaf::parse(&tl_text).unwrap();
2522        let orders = parsed.get("orders").unwrap().as_array().unwrap();
2523        let first_order = orders[0].as_object().unwrap();
2524        let items = first_order.get("items").unwrap().as_array().unwrap();
2525        assert_eq!(items[0].as_object().unwrap().get("sku").unwrap().as_str(), Some("A"));
2526    }
2527
2528    #[test]
2529    fn test_schema_inference_mismatched_arrays_not_matched() {
2530        // Test that arrays with different structures don't incorrectly share schemas
2531        let json = r#"{
2532            "users": [{"id": "U1", "name": "Alice"}],
2533            "products": [{"id": "P1", "price": 9.99}]
2534        }"#;
2535        let doc = TeaLeaf::from_json_with_schemas(json).unwrap();
2536
2537        // Should have separate schemas
2538        assert!(doc.schema("user").is_some());
2539        assert!(doc.schema("product").is_some());
2540
2541        // User schema should have name field
2542        let user_schema = doc.schema("user").unwrap();
2543        assert!(user_schema.fields.iter().any(|f| f.name == "name"));
2544
2545        // Product schema should have price field
2546        let product_schema = doc.schema("product").unwrap();
2547        assert!(product_schema.fields.iter().any(|f| f.name == "price"));
2548    }
2549
2550    #[test]
2551    fn test_schema_inference_special_char_quoting() {
2552        // Test that strings with special characters are properly quoted
2553        let json = r#"{"items": [
2554            {"category": "Electronics/Audio", "email": "test@example.com", "path": "a.b.c"}
2555        ]}"#;
2556        let doc = TeaLeaf::from_json_with_schemas(json).unwrap();
2557        let tl_text = doc.to_tl_with_schemas();
2558
2559        // These should be quoted in output since they contain special characters
2560        assert!(tl_text.contains("\"Electronics/Audio\""), "Slash should be quoted: {}", tl_text);
2561        assert!(tl_text.contains("\"test@example.com\""), "@ should be quoted: {}", tl_text);
2562        // Dots are valid in names per spec grammar, so a.b.c should NOT be quoted
2563        assert!(!tl_text.contains("\"a.b.c\""), "Dots should NOT be quoted per spec grammar: {}", tl_text);
2564
2565        // Should parse back correctly
2566        let parsed = TeaLeaf::parse(&tl_text).unwrap();
2567        let items = parsed.get("items").unwrap().as_array().unwrap();
2568        let item = items[0].as_object().unwrap();
2569        assert_eq!(item.get("category").unwrap().as_str(), Some("Electronics/Audio"));
2570        assert_eq!(item.get("email").unwrap().as_str(), Some("test@example.com"));
2571    }
2572
2573    #[test]
2574    fn test_schema_inference_nested_objects() {
2575        // Test that nested objects within array elements get schemas created
2576        let json = r#"{
2577            "customers": [
2578                {
2579                    "id": 1,
2580                    "name": "Alice",
2581                    "billing_address": {
2582                        "street": "123 Main St",
2583                        "city": "Boston",
2584                        "state": "MA",
2585                        "postal_code": "02101",
2586                        "country": "USA"
2587                    },
2588                    "shipping_address": {
2589                        "street": "456 Oak Ave",
2590                        "city": "Cambridge",
2591                        "state": "MA",
2592                        "postal_code": "02139",
2593                        "country": "USA"
2594                    }
2595                },
2596                {
2597                    "id": 2,
2598                    "name": "Bob",
2599                    "billing_address": {
2600                        "street": "789 Elm St",
2601                        "city": "New York",
2602                        "state": "NY",
2603                        "postal_code": "10001",
2604                        "country": "USA"
2605                    },
2606                    "shipping_address": {
2607                        "street": "789 Elm St",
2608                        "city": "New York",
2609                        "state": "NY",
2610                        "postal_code": "10001",
2611                        "country": "USA"
2612                    }
2613                }
2614            ]
2615        }"#;
2616
2617        let doc = TeaLeaf::from_json_with_schemas(json).unwrap();
2618
2619        // Should have schemas for nested objects
2620        assert!(doc.schema("billing_address").is_some(), "Should create billing_address schema");
2621        assert!(doc.schema("shipping_address").is_some(), "Should create shipping_address schema");
2622        assert!(doc.schema("customer").is_some(), "Should create customer schema");
2623
2624        // Check billing_address schema fields
2625        let billing_schema = doc.schema("billing_address").unwrap();
2626        let billing_fields: Vec<&str> = billing_schema.fields.iter().map(|f| f.name.as_str()).collect();
2627        assert!(billing_fields.contains(&"street"), "billing_address should have street field");
2628        assert!(billing_fields.contains(&"city"), "billing_address should have city field");
2629        assert!(billing_fields.contains(&"state"), "billing_address should have state field");
2630        assert!(billing_fields.contains(&"postal_code"), "billing_address should have postal_code field");
2631        assert!(billing_fields.contains(&"country"), "billing_address should have country field");
2632
2633        // Check customer schema references the nested schemas
2634        let customer_schema = doc.schema("customer").unwrap();
2635        let billing_field = customer_schema.fields.iter().find(|f| f.name == "billing_address").unwrap();
2636        assert_eq!(billing_field.field_type.base, "billing_address", "customer.billing_address should reference billing_address schema");
2637
2638        let shipping_field = customer_schema.fields.iter().find(|f| f.name == "shipping_address").unwrap();
2639        assert_eq!(shipping_field.field_type.base, "shipping_address", "customer.shipping_address should reference shipping_address schema");
2640
2641        // Serialize and verify output
2642        let tl_text = doc.to_tl_with_schemas();
2643        assert!(tl_text.contains("@struct billing_address"), "Output should contain billing_address struct");
2644        assert!(tl_text.contains("@struct shipping_address"), "Output should contain shipping_address struct");
2645        assert!(tl_text.contains("billing_address: billing_address"), "customer should have billing_address field with billing_address type");
2646        assert!(tl_text.contains("shipping_address: shipping_address"), "customer should have shipping_address field with shipping_address type");
2647    }
2648
2649    #[test]
2650    fn test_schema_inference_nested_objects_with_nulls() {
2651        // Test that nested objects handle nullable fields correctly
2652        let json = r#"{
2653            "orders": [
2654                {
2655                    "id": 1,
2656                    "customer": {
2657                        "name": "Alice",
2658                        "phone": "555-1234"
2659                    }
2660                },
2661                {
2662                    "id": 2,
2663                    "customer": {
2664                        "name": "Bob",
2665                        "phone": null
2666                    }
2667                }
2668            ]
2669        }"#;
2670
2671        let doc = TeaLeaf::from_json_with_schemas(json).unwrap();
2672
2673        // Customer schema should exist with nullable phone
2674        let customer_schema = doc.schema("customer").unwrap();
2675        let phone_field = customer_schema.fields.iter().find(|f| f.name == "phone").unwrap();
2676        assert!(phone_field.field_type.nullable, "phone field should be nullable");
2677    }
2678
2679    // =========================================================================
2680    // Coverage: dumps(), write_value(), escape_string(), format_float()
2681    // =========================================================================
2682
2683    #[test]
2684    fn test_dumps_all_value_types() {
2685        let mut data = IndexMap::new();
2686        data.insert("null_val".to_string(), Value::Null);
2687        data.insert("bool_val".to_string(), Value::Bool(true));
2688        data.insert("int_val".to_string(), Value::Int(42));
2689        data.insert("uint_val".to_string(), Value::UInt(999));
2690        data.insert("float_val".to_string(), Value::Float(3.14));
2691        data.insert("str_val".to_string(), Value::String("hello".to_string()));
2692        data.insert("bytes_val".to_string(), Value::Bytes(vec![0xca, 0xfe]));
2693        data.insert("arr_val".to_string(), Value::Array(vec![Value::Int(1), Value::Int(2)]));
2694        data.insert("obj_val".to_string(), Value::Object(
2695            vec![("x".to_string(), Value::Int(1))].into_iter().collect()
2696        ));
2697        data.insert("map_val".to_string(), Value::Map(vec![
2698            (Value::Int(1), Value::String("one".to_string())),
2699        ]));
2700        data.insert("ref_val".to_string(), Value::Ref("target".to_string()));
2701        data.insert("tag_val".to_string(), Value::Tagged("ok".to_string(), Box::new(Value::Int(200))));
2702        data.insert("ts_val".to_string(), Value::Timestamp(0, 0));
2703        data.insert("ts_millis".to_string(), Value::Timestamp(1705315800123, 0));
2704
2705        let output = dumps(&data);
2706
2707        assert!(output.contains("~"), "Should contain null");
2708        assert!(output.contains("true"), "Should contain bool");
2709        assert!(output.contains("42"), "Should contain int");
2710        assert!(output.contains("999"), "Should contain uint");
2711        assert!(output.contains("3.14"), "Should contain float");
2712        assert!(output.contains("hello"), "Should contain string");
2713        assert!(output.contains("b\"cafe\""), "Should contain bytes literal");
2714        assert!(output.contains("[1, 2]"), "Should contain array");
2715        assert!(output.contains("@map {"), "Should contain map");
2716        assert!(output.contains("!target"), "Should contain ref");
2717        assert!(output.contains(":ok 200"), "Should contain tagged");
2718        assert!(output.contains("1970-01-01T00:00:00Z"), "Should contain epoch timestamp");
2719        assert!(output.contains(".123Z"), "Should contain millis timestamp");
2720    }
2721
2722    #[test]
2723    fn test_bytes_literal_text_roundtrip() {
2724        // dumps() emits b"..." → parse() reads it back as Value::Bytes
2725        let mut data = IndexMap::new();
2726        data.insert("payload".to_string(), Value::Bytes(vec![0xca, 0xfe, 0xf0, 0x0d]));
2727        data.insert("empty".to_string(), Value::Bytes(vec![]));
2728
2729        let text = dumps(&data);
2730        assert!(text.contains(r#"b"cafef00d""#), "Should emit b\"...\" literal: {}", text);
2731        assert!(text.contains(r#"b"""#), "Should emit empty bytes literal: {}", text);
2732
2733        // Parse the text back
2734        let doc = TeaLeaf::parse(&text).unwrap();
2735        assert_eq!(doc.data.get("payload").unwrap().as_bytes(), Some(&[0xca, 0xfe, 0xf0, 0x0d][..]));
2736        assert_eq!(doc.data.get("empty").unwrap().as_bytes(), Some(&[][..]));
2737    }
2738
2739    #[test]
2740    fn test_dumps_string_quoting() {
2741        let mut data = IndexMap::new();
2742        data.insert("quoted".to_string(), Value::String("hello world".to_string()));
2743        data.insert("unquoted".to_string(), Value::String("hello".to_string()));
2744        data.insert("reserved_true".to_string(), Value::String("true".to_string()));
2745        data.insert("reserved_null".to_string(), Value::String("null".to_string()));
2746        data.insert("reserved_tilde".to_string(), Value::String("~".to_string()));
2747        data.insert("empty".to_string(), Value::String("".to_string()));
2748        data.insert("at_start".to_string(), Value::String("@directive".to_string()));
2749        data.insert("hash_start".to_string(), Value::String("#comment".to_string()));
2750        data.insert("bang_start".to_string(), Value::String("!ref".to_string()));
2751        data.insert("hex_start".to_string(), Value::String("0xabc".to_string()));
2752        data.insert("number_like".to_string(), Value::String("42abc".to_string()));
2753        data.insert("negative_like".to_string(), Value::String("-5".to_string()));
2754        data.insert("slash".to_string(), Value::String("a/b".to_string()));
2755        data.insert("dot".to_string(), Value::String("a.b".to_string()));
2756
2757        let output = dumps(&data);
2758
2759        // Quoted values should be wrapped in double quotes
2760        assert!(output.contains("\"hello world\""), "Spaces need quoting");
2761        assert!(output.contains("\"true\""), "Reserved word true needs quoting");
2762        assert!(output.contains("\"null\""), "Reserved word null needs quoting");
2763        assert!(output.contains("\"~\""), "Tilde needs quoting");
2764        assert!(output.contains("\"\""), "Empty string needs quoting");
2765        assert!(output.contains("\"@directive\""), "@ prefix needs quoting");
2766        assert!(output.contains("\"#comment\""), "# prefix needs quoting");
2767        assert!(output.contains("\"!ref\""), "! prefix needs quoting");
2768        assert!(output.contains("\"0xabc\""), "0x prefix needs quoting");
2769        assert!(output.contains("\"42abc\""), "Digit start needs quoting");
2770        assert!(output.contains("\"-5\""), "Negative number needs quoting");
2771        assert!(output.contains("\"a/b\""), "Slash needs quoting");
2772        assert!(!output.contains("\"a.b\""), "Dot should NOT need quoting per spec grammar");
2773    }
2774
2775    #[test]
2776    fn test_escape_string_control_chars() {
2777        let result = escape_string("tab\there\nnewline\rreturn");
2778        assert!(result.contains("\\t"), "Tab should be escaped");
2779        assert!(result.contains("\\n"), "Newline should be escaped");
2780        assert!(result.contains("\\r"), "CR should be escaped");
2781
2782        let result = escape_string("\x08backspace\x0cformfeed");
2783        assert!(result.contains("\\b"), "Backspace should be escaped");
2784        assert!(result.contains("\\f"), "Formfeed should be escaped");
2785
2786        let result = escape_string("quote\"and\\backslash");
2787        assert!(result.contains("\\\""), "Quote should be escaped");
2788        assert!(result.contains("\\\\"), "Backslash should be escaped");
2789
2790        // Other control characters use \uXXXX
2791        let result = escape_string("\x01");
2792        assert!(result.contains("\\u0001"), "Control char should use \\uXXXX");
2793    }
2794
2795    #[test]
2796    fn test_format_float_both_branches() {
2797        // Whole number float: Rust's to_string() drops .0, so format_float adds it back
2798        assert_eq!(format_float(42.0), "42.0");
2799
2800        // Float with decimals should stay as-is
2801        assert_eq!(format_float(3.14), "3.14");
2802
2803        // Scientific notation stays as-is
2804        let very_small = format_float(1e-20);
2805        assert!(very_small.contains('e') || very_small.contains('.'));
2806    }
2807
2808    #[test]
2809    fn test_needs_quoting_various_patterns() {
2810        // Should need quoting
2811        assert!(needs_quoting(""), "Empty string");
2812        assert!(needs_quoting("hello world"), "Whitespace");
2813        assert!(needs_quoting("a,b"), "Comma");
2814        assert!(needs_quoting("(x)"), "Parens");
2815        assert!(needs_quoting("[x]"), "Brackets");
2816        assert!(needs_quoting("{x}"), "Braces");
2817        assert!(needs_quoting("a:b"), "Colon");
2818        assert!(needs_quoting("@x"), "At sign");
2819        assert!(needs_quoting("a/b"), "Slash");
2820        assert!(!needs_quoting("a.b"), "Dot is valid in names per spec grammar");
2821        assert!(needs_quoting("true"), "Reserved true");
2822        assert!(needs_quoting("false"), "Reserved false");
2823        assert!(needs_quoting("null"), "Reserved null");
2824        assert!(needs_quoting("~"), "Reserved tilde");
2825        assert!(needs_quoting("!bang"), "Bang prefix");
2826        assert!(needs_quoting("#hash"), "Hash prefix");
2827        assert!(needs_quoting("0xdead"), "Hex prefix");
2828        assert!(needs_quoting("0Xdead"), "Hex prefix uppercase");
2829        assert!(needs_quoting("42abc"), "Starts with digit");
2830        assert!(needs_quoting("-5"), "Starts with minus+digit");
2831        assert!(needs_quoting("+5"), "Starts with plus+digit");
2832
2833        // Should NOT need quoting
2834        assert!(!needs_quoting("hello"), "Simple word");
2835        assert!(!needs_quoting("foo_bar"), "Underscore word");
2836        assert!(!needs_quoting("abc123"), "Alpha then digits");
2837    }
2838
2839    // =========================================================================
2840    // Coverage: singularize()
2841    // =========================================================================
2842
2843    #[test]
2844    fn test_singularize_rules() {
2845        // -ies → -y
2846        assert_eq!(singularize("categories"), "category");
2847        assert_eq!(singularize("entries"), "entry");
2848
2849        // -sses → -ss (special -es rule)
2850        assert_eq!(singularize("classes"), "class");
2851        assert_eq!(singularize("dresses"), "dress");
2852
2853        // -xes → -x
2854        assert_eq!(singularize("boxes"), "box");
2855        assert_eq!(singularize("indexes"), "index");
2856
2857        // -ches → -ch
2858        assert_eq!(singularize("watches"), "watch");
2859
2860        // -shes → -sh
2861        assert_eq!(singularize("dishes"), "dish");
2862
2863        // Regular -s
2864        assert_eq!(singularize("users"), "user");
2865        assert_eq!(singularize("products"), "product");
2866
2867        // Words ending in -ss (should NOT remove s)
2868        assert_eq!(singularize("boss"), "boss");
2869        assert_eq!(singularize("class"), "class");
2870
2871        // Already singular (no trailing s)
2872        assert_eq!(singularize("item"), "item");
2873        assert_eq!(singularize("child"), "child");
2874    }
2875
2876    // =========================================================================
2877    // Coverage: from_json root primitives, loads()
2878    // =========================================================================
2879
2880    #[test]
2881    fn test_from_json_root_primitive() {
2882        // Root-level string
2883        let doc = TeaLeaf::from_json(r#""hello""#).unwrap();
2884        assert_eq!(doc.get("root").unwrap().as_str(), Some("hello"));
2885        assert!(!doc.is_root_array);
2886
2887        // Root-level number
2888        let doc = TeaLeaf::from_json("42").unwrap();
2889        assert_eq!(doc.get("root").unwrap().as_int(), Some(42));
2890
2891        // Root-level bool
2892        let doc = TeaLeaf::from_json("true").unwrap();
2893        assert_eq!(doc.get("root").unwrap().as_bool(), Some(true));
2894
2895        // Root-level null
2896        let doc = TeaLeaf::from_json("null").unwrap();
2897        assert!(doc.get("root").unwrap().is_null());
2898    }
2899
2900    #[test]
2901    fn test_from_json_invalid() {
2902        let result = TeaLeaf::from_json("not valid json {{{");
2903        assert!(result.is_err());
2904    }
2905
2906    #[test]
2907    fn test_loads_convenience() {
2908        let data = loads("name: alice\nage: 30").unwrap();
2909        assert_eq!(data.get("name").unwrap().as_str(), Some("alice"));
2910        assert_eq!(data.get("age").unwrap().as_int(), Some(30));
2911    }
2912
2913    // =========================================================================
2914    // Coverage: InferredType::merge() branches
2915    // =========================================================================
2916
2917    #[test]
2918    fn test_inferred_type_merge_int_float() {
2919        let t = infer_type(&Value::Int(42));
2920        let f = infer_type(&Value::Float(3.14));
2921        let merged = t.merge(&f);
2922        assert_eq!(merged, InferredType::Float);
2923
2924        // Reverse
2925        let merged = f.merge(&t);
2926        assert_eq!(merged, InferredType::Float);
2927    }
2928
2929    #[test]
2930    fn test_inferred_type_merge_null_with_type() {
2931        let n = InferredType::Null;
2932        let s = InferredType::String;
2933        let merged = n.merge(&s);
2934        assert_eq!(merged, InferredType::String);
2935
2936        // Reverse
2937        let merged = s.merge(&n);
2938        assert_eq!(merged, InferredType::String);
2939    }
2940
2941    #[test]
2942    fn test_inferred_type_merge_arrays() {
2943        let a1 = InferredType::Array(Box::new(InferredType::Int));
2944        let a2 = InferredType::Array(Box::new(InferredType::Float));
2945        let merged = a1.merge(&a2);
2946        assert_eq!(merged, InferredType::Array(Box::new(InferredType::Float)));
2947    }
2948
2949    #[test]
2950    fn test_inferred_type_merge_objects_same_fields() {
2951        let o1 = InferredType::Object(vec![
2952            ("a".to_string(), InferredType::Int),
2953            ("b".to_string(), InferredType::String),
2954        ]);
2955        let o2 = InferredType::Object(vec![
2956            ("a".to_string(), InferredType::Float),
2957            ("b".to_string(), InferredType::String),
2958        ]);
2959        let merged = o1.merge(&o2);
2960        if let InferredType::Object(fields) = &merged {
2961            assert_eq!(fields.len(), 2);
2962            assert_eq!(fields[0].1, InferredType::Float); // Int+Float → Float
2963            assert_eq!(fields[1].1, InferredType::String);
2964        } else {
2965            panic!("Expected Object, got {:?}", merged);
2966        }
2967    }
2968
2969    #[test]
2970    fn test_inferred_type_merge_objects_different_fields() {
2971        let o1 = InferredType::Object(vec![
2972            ("a".to_string(), InferredType::Int),
2973        ]);
2974        let o2 = InferredType::Object(vec![
2975            ("b".to_string(), InferredType::String),
2976        ]);
2977        let merged = o1.merge(&o2);
2978        assert_eq!(merged, InferredType::Mixed);
2979    }
2980
2981    #[test]
2982    fn test_inferred_type_merge_incompatible() {
2983        let s = InferredType::String;
2984        let i = InferredType::Int;
2985        let merged = s.merge(&i);
2986        assert_eq!(merged, InferredType::Mixed);
2987    }
2988
2989    #[test]
2990    fn test_inferred_type_to_field_type() {
2991        let schemas = IndexMap::new();
2992
2993        assert_eq!(InferredType::Null.to_field_type(&schemas).base, "string");
2994        assert!(InferredType::Null.to_field_type(&schemas).nullable);
2995        assert_eq!(InferredType::Bool.to_field_type(&schemas).base, "bool");
2996        assert_eq!(InferredType::Int.to_field_type(&schemas).base, "int");
2997        assert_eq!(InferredType::Float.to_field_type(&schemas).base, "float");
2998        assert_eq!(InferredType::String.to_field_type(&schemas).base, "string");
2999        assert_eq!(InferredType::Mixed.to_field_type(&schemas).base, "any");
3000
3001        // Array type
3002        let arr_type = InferredType::Array(Box::new(InferredType::Int));
3003        let ft = arr_type.to_field_type(&schemas);
3004        assert_eq!(ft.base, "int");
3005        assert!(ft.is_array);
3006
3007        // Object with no matching schema → "any" (not "object", which is a value-only type)
3008        let obj_type = InferredType::Object(vec![("x".to_string(), InferredType::Int)]);
3009        assert_eq!(obj_type.to_field_type(&schemas).base, "any");
3010    }
3011
3012    #[test]
3013    fn test_inferred_type_to_field_type_with_matching_schema() {
3014        let mut schemas = IndexMap::new();
3015        let mut schema = Schema::new("point");
3016        schema.add_field("x", FieldType::new("int"));
3017        schema.add_field("y", FieldType::new("int"));
3018        schemas.insert("point".to_string(), schema);
3019
3020        let obj_type = InferredType::Object(vec![
3021            ("x".to_string(), InferredType::Int),
3022            ("y".to_string(), InferredType::Int),
3023        ]);
3024        let ft = obj_type.to_field_type(&schemas);
3025        assert_eq!(ft.base, "point");
3026    }
3027
3028    #[test]
3029    fn test_infer_type_special_values() {
3030        // Bytes, Ref, Tagged, Timestamp, Map all become Mixed
3031        assert_eq!(infer_type(&Value::Bytes(vec![1, 2])), InferredType::Mixed);
3032        assert_eq!(infer_type(&Value::Ref("x".to_string())), InferredType::Mixed);
3033        assert_eq!(infer_type(&Value::Tagged("t".to_string(), Box::new(Value::Null))), InferredType::Mixed);
3034        assert_eq!(infer_type(&Value::Timestamp(0, 0)), InferredType::Mixed);
3035        assert_eq!(infer_type(&Value::Map(vec![])), InferredType::Mixed);
3036
3037        // Empty array
3038        if let InferredType::Array(inner) = infer_type(&Value::Array(vec![])) {
3039            assert_eq!(*inner, InferredType::Mixed);
3040        } else {
3041            panic!("Expected Array");
3042        }
3043
3044        // UInt becomes Int
3045        assert_eq!(infer_type(&Value::UInt(42)), InferredType::Int);
3046    }
3047
3048    #[test]
3049    fn test_json_with_schemas_empty_nested_object_roundtrip() {
3050        // Regression: fuzzer found that [{"n":{}}] crashes because the inferrer
3051        // emits "object" as a field type, which the parser rejects as value-only.
3052        let doc = TeaLeaf::from_json_with_schemas(r#"[{"n":{}}]"#).unwrap();
3053        let tl_text = doc.to_tl_with_schemas();
3054        // Must re-parse without error
3055        let reparsed = TeaLeaf::parse(&tl_text).unwrap();
3056        assert_eq!(doc.data.len(), reparsed.data.len());
3057    }
3058
3059    // =========================================================================
3060    // Coverage: to_tl_with_schemas() edge cases
3061    // =========================================================================
3062
3063    #[test]
3064    fn test_to_tl_with_schemas_no_schemas() {
3065        let mut data = IndexMap::new();
3066        data.insert("name".to_string(), Value::String("alice".to_string()));
3067        let doc = TeaLeaf { data, schemas: IndexMap::new(), unions: IndexMap::new(), is_root_array: false };
3068
3069        let output = doc.to_tl_with_schemas();
3070        assert!(output.contains("name: alice"), "Should use dumps() format");
3071        assert!(!output.contains("@struct"), "No schemas");
3072    }
3073
3074    #[test]
3075    fn test_to_tl_with_schemas_root_array() {
3076        let mut data = IndexMap::new();
3077        data.insert("root".to_string(), Value::Array(vec![Value::Int(1), Value::Int(2)]));
3078        let doc = TeaLeaf { data, schemas: IndexMap::new(), unions: IndexMap::new(), is_root_array: true };
3079
3080        let output = doc.to_tl_with_schemas();
3081        assert!(output.starts_with("@root-array"), "Should have root-array directive");
3082    }
3083
3084    // =========================================================================
3085    // Coverage: write_value_with_schemas() for special types
3086    // =========================================================================
3087
3088    #[test]
3089    fn test_dumps_with_schemas_all_types() {
3090        let mut schemas = IndexMap::new();
3091        let mut schema = Schema::new("item");
3092        schema.add_field("id", FieldType::new("int"));
3093        schema.add_field("name", FieldType::new("string"));
3094        schemas.insert("item".to_string(), schema);
3095
3096        let mut data = IndexMap::new();
3097        // Array matching schema → @table
3098        data.insert("items".to_string(), Value::Array(vec![
3099            Value::Object(vec![
3100                ("id".to_string(), Value::Int(1)),
3101                ("name".to_string(), Value::String("Widget".to_string())),
3102            ].into_iter().collect()),
3103        ]));
3104        // Special types
3105        data.insert("ref_val".to_string(), Value::Ref("target".to_string()));
3106        data.insert("tag_val".to_string(), Value::Tagged("ok".to_string(), Box::new(Value::Int(200))));
3107        data.insert("map_val".to_string(), Value::Map(vec![
3108            (Value::Int(1), Value::String("one".to_string())),
3109        ]));
3110        data.insert("bytes_val".to_string(), Value::Bytes(vec![0xde, 0xad]));
3111        data.insert("ts_val".to_string(), Value::Timestamp(0, 0));
3112        data.insert("ts_millis".to_string(), Value::Timestamp(1705315800123, 0));
3113
3114        let schema_order = vec!["item".to_string()];
3115        let output = dumps_with_schemas(&data, &schemas, &schema_order, &IndexMap::new(), &[]);
3116
3117        assert!(output.contains("@struct item"), "Should contain schema def");
3118        assert!(output.contains("@table item"), "Should use @table format");
3119        assert!(output.contains("!target"), "Should contain ref");
3120        assert!(output.contains(":ok 200"), "Should contain tagged");
3121        assert!(output.contains("@map {"), "Should contain map");
3122        assert!(output.contains("b\"dead\""), "Should contain bytes literal");
3123        assert!(output.contains("1970-01-01T00:00:00Z"), "Should contain timestamp");
3124        assert!(output.contains(".123Z"), "Should contain millis timestamp");
3125    }
3126
3127    #[test]
3128    fn test_dumps_with_schemas_object_value() {
3129        let schemas = IndexMap::new();
3130        let mut data = IndexMap::new();
3131        data.insert("config".to_string(), Value::Object(
3132            vec![
3133                ("host".to_string(), Value::String("localhost".to_string())),
3134                ("port".to_string(), Value::Int(8080)),
3135            ].into_iter().collect()
3136        ));
3137
3138        let output = dumps_with_schemas(&data, &schemas, &[], &IndexMap::new(), &[]);
3139        assert!(output.contains("config:"), "Should contain key");
3140        assert!(output.contains("{"), "Should contain object");
3141    }
3142
3143    #[test]
3144    fn test_write_tuple_with_nested_schema() {
3145        // Test tuple writing with nested struct fields
3146        let mut schemas = IndexMap::new();
3147
3148        let mut addr = Schema::new("address");
3149        addr.add_field("city", FieldType::new("string"));
3150        addr.add_field("zip", FieldType::new("string"));
3151        schemas.insert("address".to_string(), addr);
3152
3153        let mut user = Schema::new("user");
3154        user.add_field("name", FieldType::new("string"));
3155        user.add_field("home", FieldType::new("address"));
3156        schemas.insert("user".to_string(), user);
3157
3158        let mut data = IndexMap::new();
3159        data.insert("users".to_string(), Value::Array(vec![
3160            Value::Object(vec![
3161                ("name".to_string(), Value::String("Alice".to_string())),
3162                ("home".to_string(), Value::Object(vec![
3163                    ("city".to_string(), Value::String("Boston".to_string())),
3164                    ("zip".to_string(), Value::String("02101".to_string())),
3165                ].into_iter().collect())),
3166            ].into_iter().collect()),
3167        ]));
3168
3169        let schema_order = vec!["address".to_string(), "user".to_string()];
3170        let output = dumps_with_schemas(&data, &schemas, &schema_order, &IndexMap::new(), &[]);
3171
3172        assert!(output.contains("@struct address"), "Should have address schema");
3173        assert!(output.contains("@struct user"), "Should have user schema");
3174        assert!(output.contains("@table user"), "Should use @table for users");
3175        // Nested tuples
3176        assert!(output.contains("("), "Should have tuple format");
3177    }
3178
3179    #[test]
3180    fn test_write_tuple_with_schema_array_field() {
3181        // Test tuple writing with array fields that have schemas
3182        let mut schemas = IndexMap::new();
3183
3184        let mut tag = Schema::new("tag");
3185        tag.add_field("name", FieldType::new("string"));
3186        schemas.insert("tag".to_string(), tag);
3187
3188        let mut item = Schema::new("item");
3189        item.add_field("id", FieldType::new("int"));
3190        item.add_field("tags", FieldType { base: "tag".to_string(), nullable: false, is_array: true });
3191        schemas.insert("item".to_string(), item);
3192
3193        let mut data = IndexMap::new();
3194        data.insert("items".to_string(), Value::Array(vec![
3195            Value::Object(vec![
3196                ("id".to_string(), Value::Int(1)),
3197                ("tags".to_string(), Value::Array(vec![
3198                    Value::Object(vec![
3199                        ("name".to_string(), Value::String("rust".to_string())),
3200                    ].into_iter().collect()),
3201                ])),
3202            ].into_iter().collect()),
3203        ]));
3204
3205        let schema_order = vec!["tag".to_string(), "item".to_string()];
3206        let output = dumps_with_schemas(&data, &schemas, &schema_order, &IndexMap::new(), &[]);
3207
3208        assert!(output.contains("@table item"), "Should use @table for items");
3209    }
3210
3211    #[test]
3212    fn test_write_schema_array_empty() {
3213        let schemas = IndexMap::new();
3214        let schema = Schema::new("empty");
3215        let mut out = String::new();
3216        write_schema_array(&mut out, &Value::Array(vec![]), &schema, &schemas, 0);
3217        assert_eq!(out, "[]");
3218    }
3219
3220    #[test]
3221    fn test_write_schema_array_non_array_fallback() {
3222        let schemas = IndexMap::new();
3223        let schema = Schema::new("test");
3224        let mut out = String::new();
3225        write_schema_array(&mut out, &Value::Int(42), &schema, &schemas, 0);
3226        assert_eq!(out, "42");
3227    }
3228
3229    #[test]
3230    fn test_write_tuple_missing_field() {
3231        // Test that missing fields in object produce ~
3232        let schemas = IndexMap::new();
3233        let mut schema = Schema::new("test");
3234        schema.add_field("present", FieldType::new("int"));
3235        schema.add_field("missing", FieldType::new("string"));
3236
3237        let value = Value::Object(
3238            vec![("present".to_string(), Value::Int(42))].into_iter().collect()
3239        );
3240
3241        let mut out = String::new();
3242        write_tuple(&mut out, &value, &schema, &schemas, 0);
3243        assert!(out.contains("42"), "Present field should be written");
3244        assert!(out.contains("~"), "Missing field should be ~");
3245    }
3246
3247    #[test]
3248    fn test_write_tuple_non_object() {
3249        // When tuple receives a non-object value
3250        let schemas = IndexMap::new();
3251        let schema = Schema::new("test");
3252
3253        let mut out = String::new();
3254        write_tuple(&mut out, &Value::Int(42), &schema, &schemas, 0);
3255        assert_eq!(out, "42");
3256    }
3257
3258    // =========================================================================
3259    // Coverage: array_matches_schema()
3260    // =========================================================================
3261
3262    #[test]
3263    fn test_array_matches_schema_empty() {
3264        let schema = Schema::new("test");
3265        assert!(!array_matches_schema(&[], &schema));
3266    }
3267
3268    #[test]
3269    fn test_array_matches_schema_non_object() {
3270        let schema = Schema::new("test");
3271        assert!(!array_matches_schema(&[Value::Int(1)], &schema));
3272    }
3273
3274    #[test]
3275    fn test_array_matches_schema_matching() {
3276        let mut schema = Schema::new("user");
3277        schema.add_field("name", FieldType::new("string"));
3278        schema.add_field("age", FieldType::new("int"));
3279
3280        let arr = vec![Value::Object(vec![
3281            ("name".to_string(), Value::String("Alice".to_string())),
3282            ("age".to_string(), Value::Int(30)),
3283        ].into_iter().collect())];
3284
3285        assert!(array_matches_schema(&arr, &schema));
3286    }
3287
3288    // =========================================================================
3289    // Coverage: from_dto, from_dto_array, to_dto, to_dto_vec
3290    // =========================================================================
3291
3292    #[test]
3293    fn test_from_dto_and_back() {
3294        use crate::convert::{FromTeaLeaf, ConvertError};
3295
3296        let doc = TeaLeaf::from_dto("greeting", &"hello".to_string());
3297        assert_eq!(doc.get("greeting").unwrap().as_str(), Some("hello"));
3298
3299        let result: std::result::Result<String, ConvertError> = String::from_tealeaf_value(doc.get("greeting").unwrap());
3300        assert_eq!(result.unwrap(), "hello");
3301    }
3302
3303    #[test]
3304    fn test_from_dto_array() {
3305        let items = vec!["apple".to_string(), "banana".to_string()];
3306        let doc = TeaLeaf::from_dto_array("fruits", &items);
3307        let arr = doc.get("fruits").unwrap().as_array().unwrap();
3308        assert_eq!(arr.len(), 2);
3309        assert_eq!(arr[0].as_str(), Some("apple"));
3310    }
3311
3312    #[test]
3313    fn test_to_dto_missing_key() {
3314        let doc = TeaLeaf::new(IndexMap::new(), IndexMap::new());
3315        let result: Result<String> = doc.to_dto("missing");
3316        assert!(result.is_err());
3317    }
3318
3319    #[test]
3320    fn test_to_dto_vec() {
3321        let mut data = IndexMap::new();
3322        data.insert("items".to_string(), Value::Array(vec![
3323            Value::String("a".to_string()),
3324            Value::String("b".to_string()),
3325        ]));
3326        let doc = TeaLeaf::new(IndexMap::new(), data);
3327        let result: Vec<String> = doc.to_dto_vec("items").unwrap();
3328        assert_eq!(result, vec!["a", "b"]);
3329    }
3330
3331    #[test]
3332    fn test_to_dto_vec_not_array() {
3333        let mut data = IndexMap::new();
3334        data.insert("item".to_string(), Value::String("not_an_array".to_string()));
3335        let doc = TeaLeaf::new(IndexMap::new(), data);
3336        let result: Result<Vec<String>> = doc.to_dto_vec("item");
3337        assert!(result.is_err());
3338    }
3339
3340    #[test]
3341    fn test_to_dto_vec_missing_key() {
3342        let doc = TeaLeaf::new(IndexMap::new(), IndexMap::new());
3343        let result: Result<Vec<String>> = doc.to_dto_vec("missing");
3344        assert!(result.is_err());
3345    }
3346
3347    // =========================================================================
3348    // Coverage: set_root_array, SchemaInferrer edge cases
3349    // =========================================================================
3350
3351    #[test]
3352    fn test_set_root_array() {
3353        let mut doc = TeaLeaf::new(IndexMap::new(), IndexMap::new());
3354        assert!(!doc.is_root_array);
3355        doc.set_root_array(true);
3356        assert!(doc.is_root_array);
3357    }
3358
3359    #[test]
3360    fn test_schema_inferrer_non_uniform_array() {
3361        // Array with different object structures should not create a schema
3362        let json = r#"{"items": [{"a": 1}, {"b": 2}]}"#;
3363        let doc = TeaLeaf::from_json_with_schemas(json).unwrap();
3364        assert!(doc.schema("item").is_none(), "Non-uniform array should not produce schema");
3365    }
3366
3367    #[test]
3368    fn test_schema_inferrer_mixed_types_in_array() {
3369        // Array with non-objects
3370        let json = r#"{"items": [1, 2, 3]}"#;
3371        let doc = TeaLeaf::from_json_with_schemas(json).unwrap();
3372        assert!(doc.schema("item").is_none(), "Non-object array should not produce schema");
3373    }
3374
3375    #[test]
3376    fn test_schema_inferrer_empty_array() {
3377        let json = r#"{"items": []}"#;
3378        let doc = TeaLeaf::from_json_with_schemas(json).unwrap();
3379        assert!(doc.schema("item").is_none(), "Empty array should not produce schema");
3380    }
3381
3382    #[test]
3383    fn test_schema_inferrer_duplicate_schema_name() {
3384        // Two arrays that would produce the same schema name
3385        let json = r#"{
3386            "items": [{"id": 1, "name": "A"}],
3387            "nested": {"items": [{"id": 2, "name": "B"}]}
3388        }"#;
3389        let doc = TeaLeaf::from_json_with_schemas(json).unwrap();
3390        // Should have "item" schema (first one wins)
3391        assert!(doc.schema("item").is_some());
3392    }
3393
3394    #[test]
3395    fn test_schema_inferrer_int_float_merge() {
3396        // Field that has int in one record and float in another
3397        let json = r#"{"values": [{"x": 1}, {"x": 2.5}]}"#;
3398        let doc = TeaLeaf::from_json_with_schemas(json).unwrap();
3399        let schema = doc.schema("value").unwrap();
3400        let x_field = schema.fields.iter().find(|f| f.name == "x").unwrap();
3401        assert_eq!(x_field.field_type.base, "float", "Int+Float merge should produce float");
3402    }
3403
3404    #[test]
3405    fn test_schema_inference_with_root_array() {
3406        let json = r#"[{"id": 1, "name": "Alice"}, {"id": 2, "name": "Bob"}]"#;
3407        let doc = TeaLeaf::from_json_with_schemas(json).unwrap();
3408        // Root array is stored under "root" key - the schema name should be derived from "root"
3409        // The singularize of "root" is "root" (no trailing s)
3410        // Actually, root arrays aren't typically analyzed because the key is "root" and it goes through analyze_value
3411        let root_val = doc.get("root").unwrap().as_array().unwrap();
3412        assert_eq!(root_val.len(), 2);
3413    }
3414
3415    // =========================================================================
3416    // Coverage: dumps_with_schemas with quoting in schemas
3417    // =========================================================================
3418
3419    #[test]
3420    fn test_dumps_with_schemas_string_quoting_in_tuples() {
3421        let mut schemas = IndexMap::new();
3422        let mut schema = Schema::new("item");
3423        schema.add_field("name", FieldType::new("string"));
3424        schemas.insert("item".to_string(), schema);
3425
3426        let mut data = IndexMap::new();
3427        data.insert("items".to_string(), Value::Array(vec![
3428            Value::Object(vec![
3429                ("name".to_string(), Value::String("hello world".to_string())),
3430            ].into_iter().collect()),
3431        ]));
3432
3433        let schema_order = vec!["item".to_string()];
3434        let output = dumps_with_schemas(&data, &schemas, &schema_order, &IndexMap::new(), &[]);
3435        assert!(output.contains("\"hello world\""), "String with space should be quoted in tuple");
3436    }
3437
3438    #[test]
3439    fn test_dumps_with_schemas_array_without_schema() {
3440        // Array that doesn't match any schema
3441        let schemas = IndexMap::new();
3442        let mut data = IndexMap::new();
3443        data.insert("nums".to_string(), Value::Array(vec![Value::Int(1), Value::Int(2)]));
3444
3445        let output = dumps_with_schemas(&data, &schemas, &[], &IndexMap::new(), &[]);
3446        assert!(output.contains("[1, 2]"), "Should use regular array format");
3447    }
3448
3449    // =========================================================================
3450    // Coverage: convenience functions open(), parse(), root array to_json
3451    // =========================================================================
3452
3453    #[test]
3454    fn test_open_convenience_function() {
3455        // Write a binary file first, then open with the convenience function
3456        let dir = std::env::temp_dir();
3457        let path = dir.join("test_open_conv.tlbx");
3458
3459        let mut data = IndexMap::new();
3460        data.insert("x".to_string(), Value::Int(42));
3461        let doc = TeaLeaf::new(IndexMap::new(), data);
3462        doc.compile(&path, false).unwrap();
3463
3464        let reader = super::open(&path).unwrap();
3465        assert_eq!(reader.get("x").unwrap().as_int(), Some(42));
3466        std::fs::remove_file(&path).ok();
3467    }
3468
3469    #[test]
3470    fn test_parse_convenience_function() {
3471        let doc = super::parse("greeting: hello").unwrap();
3472        assert_eq!(doc.get("greeting").unwrap().as_str(), Some("hello"));
3473    }
3474
3475    #[test]
3476    fn test_to_json_root_array() {
3477        let mut data = IndexMap::new();
3478        data.insert("root".to_string(), Value::Array(vec![Value::Int(1), Value::Int(2)]));
3479        let mut doc = TeaLeaf::new(IndexMap::new(), data);
3480        doc.set_root_array(true);
3481
3482        let json = doc.to_json().unwrap();
3483        let parsed: serde_json::Value = serde_json::from_str(&json).unwrap();
3484        assert!(parsed.is_array(), "Root array to_json should output array");
3485        assert_eq!(parsed.as_array().unwrap().len(), 2);
3486    }
3487
3488    #[test]
3489    fn test_to_json_compact_root_array() {
3490        let mut data = IndexMap::new();
3491        data.insert("root".to_string(), Value::Array(vec![Value::Int(1)]));
3492        let mut doc = TeaLeaf::new(IndexMap::new(), data);
3493        doc.set_root_array(true);
3494
3495        let json = doc.to_json_compact().unwrap();
3496        assert_eq!(json, "[1]");
3497    }
3498
3499    #[test]
3500    fn test_infer_type_bool_value() {
3501        let it = infer_type(&Value::Bool(true));
3502        assert!(matches!(it, InferredType::Bool));
3503    }
3504
3505    #[test]
3506    fn test_schema_inference_nested_object_fields() {
3507        // JSON with nested objects inside array items
3508        let json = r#"{"records": [
3509            {"id": 1, "details": {"city": "NYC", "zip": "10001"}},
3510            {"id": 2, "details": {"city": "LA", "zip": "90001"}}
3511        ]}"#;
3512        let doc = TeaLeaf::from_json_with_schemas(json).unwrap();
3513        // Should infer both "record" and "detail" schemas
3514        assert!(doc.schema("record").is_some(), "Should infer record schema");
3515    }
3516
3517    #[test]
3518    fn test_schema_inference_not_all_objects_returns_early() {
3519        // Array where second element is not an object
3520        let json = r#"{"items": [{"a": 1}, "not_an_object"]}"#;
3521        let doc = TeaLeaf::from_json_with_schemas(json).unwrap();
3522        assert!(doc.schema("item").is_none(), "Mixed array should not produce schema");
3523    }
3524
3525    #[test]
3526    fn test_to_tl_with_schemas_with_nested_array_field() {
3527        // Schema with an array-typed field
3528        let mut schemas = IndexMap::new();
3529        let mut schema = Schema::new("user");
3530        schema.add_field("name", FieldType::new("string"));
3531        schema.add_field("tags", FieldType::new("string").array());
3532        schemas.insert("user".to_string(), schema);
3533
3534        let mut data = IndexMap::new();
3535        let mut obj = IndexMap::new();
3536        obj.insert("name".to_string(), Value::String("Alice".into()));
3537        obj.insert("tags".to_string(), Value::Array(vec![
3538            Value::String("admin".into()),
3539            Value::String("active".into()),
3540        ]));
3541        data.insert("users".to_string(), Value::Array(vec![Value::Object(obj)]));
3542
3543        let doc = TeaLeaf::new(schemas, data);
3544        let text = doc.to_tl_with_schemas();
3545        assert!(text.contains("@struct user"), "Should have schema definition");
3546        assert!(text.contains("@table user"), "Should use table format");
3547    }
3548
3549    // =========================================================================
3550    // Issue 6: Improved schema matching
3551    // =========================================================================
3552
3553    #[test]
3554    fn test_schema_matching_nullable_fields_allowed_missing() {
3555        // Schema with nullable field should match objects missing that field
3556        let mut schemas = IndexMap::new();
3557        let mut s = Schema::new("Item");
3558        s.add_field("id", FieldType::new("int"));
3559        s.add_field("label", FieldType::new("string").nullable());
3560        schemas.insert("Item".to_string(), s);
3561
3562        let mut obj1 = IndexMap::new();
3563        obj1.insert("id".to_string(), Value::Int(1));
3564        // label is missing — but it's nullable, so it should still match
3565
3566        let doc = TeaLeaf {
3567            schemas,
3568            unions: IndexMap::new(),
3569            data: {
3570                let mut d = IndexMap::new();
3571                d.insert("items".to_string(), Value::Array(vec![Value::Object(obj1)]));
3572                d
3573            },
3574            is_root_array: false,
3575        };
3576        let result = doc.find_schema_for_value(doc.data.get("items").unwrap(), "items");
3577        assert!(result.is_some(), "Should match schema when nullable field is missing");
3578        assert_eq!(result.unwrap().name, "Item");
3579    }
3580
3581    #[test]
3582    fn test_schema_matching_rejects_extra_keys() {
3583        // Objects with extra keys not in schema should not match
3584        let mut schemas = IndexMap::new();
3585        let mut s = Schema::new("Point");
3586        s.add_field("x", FieldType::new("int"));
3587        s.add_field("y", FieldType::new("int"));
3588        schemas.insert("Point".to_string(), s);
3589
3590        let mut obj = IndexMap::new();
3591        obj.insert("x".to_string(), Value::Int(1));
3592        obj.insert("y".to_string(), Value::Int(2));
3593        obj.insert("z".to_string(), Value::Int(3)); // extra field
3594
3595        let doc = TeaLeaf {
3596            schemas,
3597            unions: IndexMap::new(),
3598            data: {
3599                let mut d = IndexMap::new();
3600                d.insert("points".to_string(), Value::Array(vec![Value::Object(obj)]));
3601                d
3602            },
3603            is_root_array: false,
3604        };
3605        let result = doc.find_schema_for_value(doc.data.get("points").unwrap(), "points");
3606        assert!(result.is_none(), "Should NOT match schema when extra keys are present");
3607    }
3608
3609    #[test]
3610    fn test_schema_matching_empty_array_no_matching_name() {
3611        let mut schemas = IndexMap::new();
3612        let mut s = Schema::new("Anything");
3613        s.add_field("x", FieldType::new("int"));
3614        schemas.insert("Anything".to_string(), s);
3615
3616        let doc = TeaLeaf {
3617            schemas,
3618            unions: IndexMap::new(),
3619            data: {
3620                let mut d = IndexMap::new();
3621                d.insert("empty".to_string(), Value::Array(vec![]));
3622                d
3623            },
3624            is_root_array: false,
3625        };
3626        let result = doc.find_schema_for_value(doc.data.get("empty").unwrap(), "empty");
3627        assert!(result.is_none(), "Empty array should return None when no schema name matches");
3628    }
3629
3630    #[test]
3631    fn test_schema_matching_empty_array_matches_by_name() {
3632        let mut schemas = IndexMap::new();
3633        let mut s = Schema::new("item");
3634        s.add_field("id", FieldType::new("int"));
3635        schemas.insert("item".to_string(), s);
3636
3637        let doc = TeaLeaf {
3638            schemas,
3639            unions: IndexMap::new(),
3640            data: {
3641                let mut d = IndexMap::new();
3642                d.insert("items".to_string(), Value::Array(vec![]));
3643                d
3644            },
3645            is_root_array: false,
3646        };
3647        let result = doc.find_schema_for_value(doc.data.get("items").unwrap(), "items");
3648        assert!(result.is_some(), "Empty array should match schema by singularized key name");
3649        assert_eq!(result.unwrap().name, "item");
3650    }
3651
3652    // =========================================================================
3653    // Issue 12: Negative timestamp formatting
3654    // =========================================================================
3655
3656    #[test]
3657    fn test_negative_timestamp_formatting() {
3658        // 1969-12-31T23:59:59Z = -1000 ms (1 second before epoch)
3659        let formatted = format_timestamp_millis(-1000, 0);
3660        assert_eq!(formatted, "1969-12-31T23:59:59Z");
3661    }
3662
3663    #[test]
3664    fn test_negative_timestamp_with_millis() {
3665        // -500 ms = 1969-12-31T23:59:59.500Z
3666        let formatted = format_timestamp_millis(-500, 0);
3667        assert_eq!(formatted, "1969-12-31T23:59:59.500Z");
3668    }
3669
3670    #[test]
3671    fn test_negative_timestamp_full_day() {
3672        // -86400000 ms = exactly one day before epoch = 1969-12-31T00:00:00Z
3673        let formatted = format_timestamp_millis(-86_400_000, 0);
3674        assert_eq!(formatted, "1969-12-31T00:00:00Z");
3675    }
3676
3677    #[test]
3678    fn test_epoch_timestamp() {
3679        let formatted = format_timestamp_millis(0, 0);
3680        assert_eq!(formatted, "1970-01-01T00:00:00Z");
3681    }
3682
3683    #[test]
3684    fn test_positive_timestamp_with_millis() {
3685        // 1123ms = 1 second + 123ms after epoch
3686        let formatted = format_timestamp_millis(1123, 0);
3687        assert_eq!(formatted, "1970-01-01T00:00:01.123Z");
3688    }
3689
3690    #[test]
3691    fn test_negative_timestamp_json_export() {
3692        let mut data = IndexMap::new();
3693        data.insert("ts".to_string(), Value::Timestamp(-1000, 0));
3694        let doc = TeaLeaf::new(IndexMap::new(), data);
3695        let json = doc.to_json().unwrap();
3696        assert!(json.contains("1969-12-31"), "Negative timestamp should format as pre-epoch date: {}", json);
3697    }
3698
3699    // =========================================================================
3700    // Issue 7: Deterministic serialization (IndexMap preserves insertion order)
3701    // =========================================================================
3702
3703    #[test]
3704    fn test_compile_deterministic_key_order() {
3705        // Two documents with the same data in the same insertion order
3706        // should produce identical binary output
3707        let dir = std::env::temp_dir();
3708        let path1 = dir.join("test_deterministic_1.tlbx");
3709        let path2 = dir.join("test_deterministic_2.tlbx");
3710
3711        let mut data1 = IndexMap::new();
3712        data1.insert("alpha".to_string(), Value::Int(1));
3713        data1.insert("beta".to_string(), Value::Int(2));
3714        data1.insert("gamma".to_string(), Value::Int(3));
3715        let doc1 = TeaLeaf::new(IndexMap::new(), data1);
3716        doc1.compile(&path1, false).unwrap();
3717
3718        let mut data2 = IndexMap::new();
3719        data2.insert("alpha".to_string(), Value::Int(1));
3720        data2.insert("beta".to_string(), Value::Int(2));
3721        data2.insert("gamma".to_string(), Value::Int(3));
3722        let doc2 = TeaLeaf::new(IndexMap::new(), data2);
3723        doc2.compile(&path2, false).unwrap();
3724
3725        let bytes1 = std::fs::read(&path1).unwrap();
3726        let bytes2 = std::fs::read(&path2).unwrap();
3727        assert_eq!(bytes1, bytes2, "Binary output should be identical for same insertion order");
3728
3729        std::fs::remove_file(&path1).ok();
3730        std::fs::remove_file(&path2).ok();
3731    }
3732
3733    #[test]
3734    fn test_dumps_deterministic_key_order() {
3735        // dumps() preserves IndexMap insertion order deterministically
3736        let mut data = IndexMap::new();
3737        data.insert("zebra".to_string(), Value::Int(3));
3738        data.insert("alpha".to_string(), Value::Int(1));
3739        data.insert("middle".to_string(), Value::Int(2));
3740
3741        let output1 = dumps(&data);
3742        let output2 = dumps(&data);
3743        assert_eq!(output1, output2, "dumps() should be deterministic");
3744        // Keys should appear in insertion order (IndexMap preserves insertion order)
3745        let lines: Vec<&str> = output1.trim().lines().collect();
3746        assert!(lines[0].starts_with("zebra:"), "First key should be 'zebra', got: {}", lines[0]);
3747        assert!(lines[1].starts_with("alpha:"), "Second key should be 'alpha', got: {}", lines[1]);
3748        assert!(lines[2].starts_with("middle:"), "Third key should be 'middle', got: {}", lines[2]);
3749    }
3750
3751    // =========================================================================
3752    // Order-preservation integration tests
3753    // =========================================================================
3754
3755    #[test]
3756    fn test_json_parse_preserves_key_order() {
3757        // JSON with intentionally non-alphabetical keys
3758        let json = r#"{"zebra": 1, "apple": 2, "mango": 3, "banana": 4}"#;
3759        let doc = TeaLeaf::from_json(json).unwrap();
3760        let keys: Vec<&String> = doc.data.keys().collect();
3761        assert_eq!(keys, &["zebra", "apple", "mango", "banana"],
3762            "JSON parse should preserve key insertion order");
3763    }
3764
3765    #[test]
3766    fn test_json_roundtrip_preserves_key_order() {
3767        let json = r#"{"zebra": 1, "apple": 2, "mango": 3}"#;
3768        let doc = TeaLeaf::from_json(json).unwrap();
3769        let json_out = doc.to_json().unwrap();
3770        // Parse back and verify order
3771        let parsed: serde_json::Value = serde_json::from_str(&json_out).unwrap();
3772        let keys: Vec<&str> = parsed.as_object().unwrap().keys().map(|s| s.as_str()).collect();
3773        assert_eq!(keys, &["zebra", "apple", "mango"],
3774            "JSON round-trip should preserve key order");
3775    }
3776
3777    #[test]
3778    fn test_tl_text_preserves_section_order() {
3779        let input = "zebra: 1\napple: 2\nmango: 3\n";
3780        let doc = TeaLeaf::parse(input).unwrap();
3781        let keys: Vec<&String> = doc.data.keys().collect();
3782        assert_eq!(keys, &["zebra", "apple", "mango"],
3783            "TL text parse should preserve section order");
3784
3785        // Serialize back and verify order
3786        let output = doc.to_tl_with_schemas();
3787        let lines: Vec<&str> = output.trim().lines().collect();
3788        assert!(lines[0].starts_with("zebra:"), "got: {}", lines[0]);
3789        assert!(lines[1].starts_with("apple:"), "got: {}", lines[1]);
3790        assert!(lines[2].starts_with("mango:"), "got: {}", lines[2]);
3791    }
3792
3793    #[test]
3794    fn test_binary_roundtrip_preserves_section_order() {
3795        let json = r#"{"zebra": 1, "apple": 2, "mango": 3, "banana": 4}"#;
3796        let doc = TeaLeaf::from_json(json).unwrap();
3797
3798        let dir = std::env::temp_dir();
3799        let path = dir.join("test_order_preserve.tlbx");
3800        doc.compile(&path, false).unwrap();
3801
3802        let reader = crate::Reader::open(&path).unwrap();
3803        let doc2 = TeaLeaf::from_reader(&reader).unwrap();
3804        let keys: Vec<&String> = doc2.data.keys().collect();
3805        assert_eq!(keys, &["zebra", "apple", "mango", "banana"],
3806            "Binary round-trip should preserve section order");
3807        std::fs::remove_file(&path).ok();
3808    }
3809
3810    #[test]
3811    fn test_object_field_order_preserved_through_binary() {
3812        let json = r#"{"data": {"z_last": 1, "a_first": 2, "m_middle": 3}}"#;
3813        let doc = TeaLeaf::from_json(json).unwrap();
3814
3815        let dir = std::env::temp_dir();
3816        let path = dir.join("test_obj_order.tlbx");
3817        doc.compile(&path, false).unwrap();
3818
3819        let reader = crate::Reader::open(&path).unwrap();
3820        let val = reader.get("data").unwrap();
3821        let obj = val.as_object().unwrap();
3822        let keys: Vec<&String> = obj.keys().collect();
3823        assert_eq!(keys, &["z_last", "a_first", "m_middle"],
3824            "Object field order should be preserved through binary round-trip");
3825        std::fs::remove_file(&path).ok();
3826    }
3827
3828    #[test]
3829    fn test_nested_object_order_preserved() {
3830        let json = r#"{"outer": {"z": {"c": 3, "a": 1, "b": 2}, "a": {"x": 10, "w": 20}}}"#;
3831        let doc = TeaLeaf::from_json(json).unwrap();
3832        let tl = doc.to_tl_with_schemas();
3833
3834        // Parse back and check nested order
3835        let doc2 = TeaLeaf::parse(&tl).unwrap();
3836        let outer = doc2.get("outer").unwrap().as_object().unwrap();
3837        let outer_keys: Vec<&String> = outer.keys().collect();
3838        assert_eq!(outer_keys, &["z", "a"], "Outer keys order preserved");
3839
3840        let z_obj = outer.get("z").unwrap().as_object().unwrap();
3841        let z_keys: Vec<&String> = z_obj.keys().collect();
3842        assert_eq!(z_keys, &["c", "a", "b"], "Nested object keys order preserved");
3843    }
3844
3845    #[test]
3846    fn test_schema_order_preserved_in_text() {
3847        let input = r#"
3848            @struct Zebra (z_name: string)
3849            @struct Apple (a_name: string)
3850            items: [1, 2, 3]
3851        "#;
3852        let doc = TeaLeaf::parse(input).unwrap();
3853        let schema_keys: Vec<&String> = doc.schemas.keys().collect();
3854        assert_eq!(schema_keys, &["Zebra", "Apple"],
3855            "Schema definition order should be preserved");
3856    }
3857
3858    // -------------------------------------------------------------------------
3859    // Fuzz regression tests (full serialize/roundtrip paths)
3860    // -------------------------------------------------------------------------
3861
3862    #[test]
3863    fn test_fuzz_crash_ba05f4f8_serialize_day_zero_no_panic() {
3864        // Regression: fuzz_serialize crash-ba05f4f81615e2bf2b01137126cd772c6c0cc6d2
3865        // Timestamp with month=0 or day=0 caused u32 underflow in days_from_epoch.
3866        // Exercises the full fuzz_serialize path: parse → to_json → to_tl → re-parse.
3867        let inputs = [
3868            "ts: 2024-01-00T10:30:00Z",  // day=0
3869            "ts: 2024-00-15T10:30:00Z",  // month=0
3870            "ts: 6000-00-00T00:00:00Z",  // both zero
3871        ];
3872        for input in &inputs {
3873            // parse must not panic (should return Err)
3874            let result = TeaLeaf::parse(input);
3875            if let Ok(tl) = result {
3876                let _ = tl.to_json();
3877                let _ = tl.to_json_compact();
3878                let text = tl.to_tl_with_schemas();
3879                let _ = TeaLeaf::parse(&text);
3880            }
3881        }
3882    }
3883
3884    #[test]
3885    fn test_fuzz_crash_b085ba0e_roundtrip_day_zero_no_panic() {
3886        // Regression: fuzz_roundtrip crash-b085ba0e656f074031d8c4cb5173313785fa79d1
3887        // Same days_from_epoch underflow, hit through the roundtrip path.
3888        // Exercises the full fuzz_roundtrip path: parse → compile → read → walk.
3889        let inputs = [
3890            "ts: 4001-03-00T00:00:00Z",  // day=0 (pattern from artifact)
3891            "ts: 4401-03-00T00:00:00Z",  // variant
3892        ];
3893        for input in &inputs {
3894            let result = TeaLeaf::parse(input);
3895            if let Ok(tl) = result {
3896                let tmp = tempfile::NamedTempFile::new().unwrap();
3897                if tl.compile(tmp.path(), false).is_ok() {
3898                    let bytes = std::fs::read(tmp.path()).unwrap();
3899                    if let Ok(reader) = Reader::from_bytes(bytes) {
3900                        for key in reader.keys() {
3901                            let _ = reader.get(key);
3902                        }
3903                    }
3904                }
3905            }
3906        }
3907    }
3908
3909    #[test]
3910    fn test_fuzz_crash_48767e10_json_schemas_bare_dash_roundtrip() {
3911        // Regression: fuzz_json_schemas crash-48767e10b4ec71542bfbee2bc358b1e21831a259
3912        // JSON string "-" was serialized unquoted, causing re-parse failure.
3913        for input in [
3914            r#""-""#, r#""+""#, r#""--""#, r#""-foo""#,
3915            r#"{"a": "-"}"#, r#"{"a": "+"}"#,
3916            "\"\\u0660\"",  // Arabic-Indic digit zero
3917        ] {
3918            let tl = TeaLeaf::from_json_with_schemas(input);
3919            if let Ok(tl) = tl {
3920                let text = tl.to_tl_with_schemas();
3921                let reparsed = TeaLeaf::parse(&text);
3922                assert!(
3923                    reparsed.is_ok(),
3924                    "re-parse failed for JSON input {}",
3925                    input,
3926                );
3927            }
3928        }
3929    }
3930
3931    #[test]
3932    fn test_fuzz_crash_820dac71_empty_key_roundtrip() {
3933        // Regression: fuzz_json_schemas crash-820dac71c95d324067cd88de5f24897c65ace57a
3934        // JSON object with empty key was serialized without quoting, losing the key.
3935        for input in [
3936            r#"{"":{}}"#,                // empty key with empty object
3937            r#"[{"":{}}}]"#,             // root array variant (crash-66a8d85176f76ed68ada9f9526abe4efd8352f27)
3938            r#"{"":"value"}"#,            // empty key with string value
3939        ] {
3940            if let Ok(tl) = TeaLeaf::from_json_with_schemas(input) {
3941                let text = tl.to_tl_with_schemas();
3942                let reparsed = TeaLeaf::parse(&text);
3943                assert!(
3944                    reparsed.is_ok(),
3945                    "re-parse failed for JSON input {}",
3946                    input,
3947                );
3948            }
3949        }
3950    }
3951
3952    #[test]
3953    fn test_fuzz_crash_66a8d851_root_array_empty_key() {
3954        // Regression: fuzz_json_schemas crash-66a8d85176f76ed68ada9f9526abe4efd8352f27
3955        // Root array with empty-key object: schema inference + to_tl_with_schemas roundtrip
3956        let input = r#"[{"":{}}]"#;
3957        if let Ok(tl) = TeaLeaf::from_json_with_schemas(input) {
3958            let text = tl.to_tl_with_schemas();
3959            let reparsed = TeaLeaf::parse(&text);
3960            assert!(reparsed.is_ok(), "re-parse failed for root array with empty key");
3961        }
3962    }
3963
3964    #[test]
3965    fn test_fuzz_crash_847a9194_uint_roundtrip() {
3966        // Regression: fuzz_json_schemas crash-847a919462bb567fab268023a5a29d04e92db779
3967        // Large u64 values (> i64::MAX) were demoted to f64 on re-parse, losing precision.
3968        let input = "9999999999999999999";  // > i64::MAX, fits in u64
3969        let tl = TeaLeaf::from_json_with_schemas(input).unwrap();
3970        let text = tl.to_tl_with_schemas();
3971        let reparsed = TeaLeaf::parse(&text).unwrap();
3972        let orig = tl.data.get("root").unwrap();
3973        let re = reparsed.data.get("root").unwrap();
3974        assert_eq!(orig, re, "UInt roundtrip mismatch");
3975    }
3976
3977    #[test]
3978    fn test_fuzz_crash_3902c5cc_float_infinity_roundtrip() {
3979        // Regression: fuzz_serialize crash-3902c5cc99e5e4150d08d40372c86207fbc6db7f
3980        // 5e550 and -5e550 overflow f64 and are now stored as JsonNumber.
3981        // NaN remains Float(NaN).
3982        let tl = TeaLeaf::parse("b: NaN").unwrap();
3983        let text = tl.to_tl_with_schemas();
3984        let reparsed = TeaLeaf::parse(&text).unwrap();
3985        let orig = tl.data.get("b").unwrap();
3986        let re = reparsed.data.get("b").unwrap();
3987        match (orig, re) {
3988            (Value::Float(a), Value::Float(b)) => {
3989                assert_eq!(a.to_bits(), b.to_bits(), "NaN roundtrip failed");
3990            }
3991            _ => panic!("expected Float, got {:?} / {:?}", orig, re),
3992        }
3993
3994        // 5e550 and -5e550 are now JsonNumber (overflow f64)
3995        for input in &["b: 5e550", "b: -5e550"] {
3996            let tl = TeaLeaf::parse(input).unwrap();
3997            let text = tl.to_tl_with_schemas();
3998            let reparsed = TeaLeaf::parse(&text).unwrap();
3999            let orig = tl.data.get("b").unwrap();
4000            let re = reparsed.data.get("b").unwrap();
4001            match (orig, re) {
4002                (Value::JsonNumber(a), Value::JsonNumber(b)) => {
4003                    assert_eq!(a, b, "JsonNumber roundtrip failed for {}", input);
4004                }
4005                _ => panic!("expected JsonNumber, got {:?} / {:?}", orig, re),
4006            }
4007        }
4008    }
4009
4010    #[test]
4011    fn test_needs_quoting_bare_sign() {
4012        assert!(needs_quoting("-"));
4013        assert!(needs_quoting("+"));
4014        assert!(needs_quoting("--"));
4015        assert!(needs_quoting("-foo"));
4016        assert!(needs_quoting("+bar"));
4017        assert!(needs_quoting("-1")); // negative number
4018        assert!(needs_quoting("+1")); // positive number
4019        assert!(needs_quoting("\u{0660}")); // Arabic-Indic digit zero
4020        assert!(!needs_quoting("hello"));
4021        assert!(!needs_quoting("foo-bar"));
4022    }
4023
4024    #[test]
4025    fn test_fuzz_crash_nan_string_needs_quoting() {
4026        // Regression: fuzz_parse/fuzz_serialize crash — string "NaN" must be quoted
4027        // to avoid re-parsing as Float(NaN).
4028        assert!(needs_quoting("NaN"));
4029        assert!(needs_quoting("inf"));
4030        assert!(needs_quoting("Infinity"));
4031
4032        // Roundtrip: String("NaN") must survive parse → dumps → re-parse
4033        for word in &["NaN", "inf", "Infinity"] {
4034            let input = format!("a: \"{}\"", word);
4035            let tl = TeaLeaf::parse(&input).unwrap();
4036            assert!(matches!(tl.get("a"), Some(Value::String(_))));
4037            let text = dumps(&tl.data);
4038            let reparsed = TeaLeaf::parse(&text).unwrap();
4039            assert_eq!(
4040                reparsed.get("a").unwrap().as_str(),
4041                Some(*word),
4042                "roundtrip failed for string {:?}",
4043                word,
4044            );
4045        }
4046    }
4047
4048}