Skip to main content

tealeaf/
writer.rs

1//! Binary format writer for TeaLeaf
2
3use std::collections::HashMap;
4use std::fs::File;
5use std::io::{BufWriter, Write, Seek, SeekFrom};
6use std::path::Path;
7use crate::types::ObjectMap;
8
9use crate::{Result, Value, Schema, Union, FieldType, TLType, MAGIC, VERSION_MAJOR, VERSION_MINOR, HEADER_SIZE,
10    MAX_STRING_LENGTH, MAX_OBJECT_FIELDS, MAX_ARRAY_LENGTH};
11
12pub struct Writer {
13    strings: Vec<String>,
14    string_map: HashMap<String, u32>,
15    schemas: Vec<Schema>,
16    schema_map: HashMap<String, u16>,
17    unions: Vec<Union>,
18    union_map: HashMap<String, u16>,
19    sections: Vec<Section>,
20    /// Indicates the source JSON was a root-level array (for round-trip fidelity)
21    is_root_array: bool,
22}
23
24struct Section {
25    key: String,
26    data: Vec<u8>,
27    schema_idx: i32,
28    tl_type: TLType,
29    is_array: bool,
30    item_count: u32,
31}
32
33impl Writer {
34    pub fn new() -> Self {
35        Self {
36            strings: Vec::new(),
37            string_map: HashMap::new(),
38            schemas: Vec::new(),
39            schema_map: HashMap::new(),
40            unions: Vec::new(),
41            union_map: HashMap::new(),
42            sections: Vec::new(),
43            is_root_array: false,
44        }
45    }
46
47    /// Set whether the source JSON was a root-level array
48    pub fn set_root_array(&mut self, is_root_array: bool) {
49        self.is_root_array = is_root_array;
50    }
51
52    pub fn intern(&mut self, s: &str) -> u32 {
53        if let Some(&idx) = self.string_map.get(s) { return idx; }
54        let idx = self.strings.len() as u32;
55        self.strings.push(s.to_string());
56        self.string_map.insert(s.to_string(), idx);
57        idx
58    }
59
60    pub fn add_schema(&mut self, schema: Schema) -> u16 {
61        if let Some(&idx) = self.schema_map.get(&schema.name) { return idx; }
62        for field in &schema.fields { self.intern(&field.name); }
63        self.intern(&schema.name);
64        let idx = self.schemas.len() as u16;
65        self.schema_map.insert(schema.name.clone(), idx);
66        self.schemas.push(schema);
67        idx
68    }
69
70    pub fn add_union(&mut self, union: Union) -> u16 {
71        if let Some(&idx) = self.union_map.get(&union.name) { return idx; }
72        self.intern(&union.name);
73        for variant in &union.variants {
74            self.intern(&variant.name);
75            for field in &variant.fields {
76                self.intern(&field.name);
77            }
78        }
79        let idx = self.unions.len() as u16;
80        self.union_map.insert(union.name.clone(), idx);
81        self.unions.push(union);
82        idx
83    }
84
85    pub fn add_section(&mut self, key: &str, value: &Value, schema: Option<&Schema>) -> Result<()> {
86        self.intern(key);
87        let (data, tl_type, is_array, item_count) = self.encode_value(value, schema)?;
88        // Compute schema_idx AFTER encoding, since encode_value may register the schema
89        let schema_idx = schema.map(|s| self.schema_map.get(&s.name).copied().unwrap_or(0xFFFF) as i32).unwrap_or(-1);
90        self.sections.push(Section { key: key.to_string(), data, schema_idx, tl_type, is_array, item_count });
91        Ok(())
92    }
93
94    pub fn write<P: AsRef<Path>>(&self, path: P, compress: bool) -> Result<()> {
95        let file = File::create(path)?;
96        let mut w = BufWriter::new(file);
97        w.write_all(&[0u8; HEADER_SIZE])?;
98
99        let str_off = HEADER_SIZE as u64;
100        self.write_string_table(&mut w)?;
101        let sch_off = str_off + self.string_table_size() as u64;
102        self.write_schema_table(&mut w)?;
103        let idx_off = sch_off + self.schema_table_size() as u64;
104        let index_size = 8 + self.sections.len() * 32;
105        w.write_all(&vec![0u8; index_size])?;
106        let data_off = idx_off + index_size as u64;
107
108        let mut entries = Vec::new();
109        let mut cur_off = data_off;
110        for sec in &self.sections {
111            let (written, compressed) = if compress && sec.data.len() > 64 {
112                let c = compress_data(&sec.data)?;
113                if c.len() < (sec.data.len() as f64 * 0.9) as usize { (c, true) } else { (sec.data.clone(), false) }
114            } else { (sec.data.clone(), false) };
115            w.write_all(&written)?;
116            entries.push((self.string_map[&sec.key], cur_off, written.len() as u32, sec.data.len() as u32, sec.schema_idx, sec.tl_type, compressed, sec.is_array, sec.item_count));
117            cur_off += written.len() as u64;
118        }
119
120        w.seek(SeekFrom::Start(0))?;
121        w.write_all(&MAGIC)?;
122        w.write_all(&VERSION_MAJOR.to_le_bytes())?;
123        w.write_all(&VERSION_MINOR.to_le_bytes())?;
124        // Flags: bit 0 = compressed, bit 1 = root_array
125        let mut flags: u32 = 0;
126        if compress { flags |= 0x01; }
127        if self.is_root_array { flags |= 0x02; }
128        w.write_all(&flags.to_le_bytes())?;
129        w.write_all(&0u32.to_le_bytes())?;
130        w.write_all(&str_off.to_le_bytes())?;
131        w.write_all(&sch_off.to_le_bytes())?;
132        w.write_all(&idx_off.to_le_bytes())?;
133        w.write_all(&data_off.to_le_bytes())?;
134        w.write_all(&(self.strings.len() as u32).to_le_bytes())?;
135        w.write_all(&(self.schemas.len() as u32).to_le_bytes())?;
136        w.write_all(&(self.sections.len() as u32).to_le_bytes())?;
137        w.write_all(&0u32.to_le_bytes())?;
138
139        w.seek(SeekFrom::Start(idx_off))?;
140        w.write_all(&(index_size as u32).to_le_bytes())?;
141        w.write_all(&(entries.len() as u32).to_le_bytes())?;
142        for (ki, off, sz, usz, si, pt, comp, arr, cnt) in entries {
143            w.write_all(&ki.to_le_bytes())?;
144            w.write_all(&off.to_le_bytes())?;
145            w.write_all(&sz.to_le_bytes())?;
146            w.write_all(&usz.to_le_bytes())?;
147            w.write_all(&(if si < 0 { 0xFFFFu16 } else { si as u16 }).to_le_bytes())?;
148            w.write_all(&[pt as u8])?;
149            w.write_all(&[(if comp { 1 } else { 0 }) | (if arr { 2 } else { 0 })])?;
150            w.write_all(&cnt.to_le_bytes())?;
151            w.write_all(&[0u8; 4])?;
152        }
153        w.flush()?;
154        Ok(())
155    }
156
157    fn string_table_size(&self) -> usize {
158        8 + self.strings.len() * 8 + self.strings.iter().map(|s| s.len()).sum::<usize>()
159    }
160
161    fn schema_table_size(&self) -> usize {
162        if self.schemas.is_empty() && self.unions.is_empty() { return 8; }
163        let struct_size = self.schemas.len() * 4
164            + self.schemas.iter().map(|s| 8 + s.fields.len() * 8).sum::<usize>();
165        let union_size = self.unions.len() * 4
166            + self.unions.iter().map(|u| {
167                8 + u.variants.iter().map(|v| 8 + v.fields.len() * 8).sum::<usize>()
168            }).sum::<usize>();
169        8 + struct_size + union_size
170    }
171
172    fn write_string_table<W: Write>(&self, w: &mut W) -> Result<()> {
173        let table_size = self.string_table_size();
174        if table_size > u32::MAX as usize {
175            return Err(crate::Error::ValueOutOfRange(
176                format!("String table size {} exceeds u32::MAX", table_size)));
177        }
178        let total_string_bytes: usize = self.strings.iter().map(|s| s.len()).sum();
179        if total_string_bytes > u32::MAX as usize {
180            return Err(crate::Error::ValueOutOfRange(
181                format!("Total string data {} bytes exceeds u32::MAX", total_string_bytes)));
182        }
183        let mut off = 0u32;
184        let offsets: Vec<u32> = self.strings.iter().map(|s| { let o = off; off += s.len() as u32; o }).collect();
185        w.write_all(&(table_size as u32).to_le_bytes())?;
186        w.write_all(&(self.strings.len() as u32).to_le_bytes())?;
187        for o in &offsets { w.write_all(&o.to_le_bytes())?; }
188        for s in &self.strings {
189            if s.len() > MAX_STRING_LENGTH {
190                return Err(crate::Error::ValueOutOfRange(
191                    format!("String length {} exceeds maximum {}", s.len(), MAX_STRING_LENGTH)));
192            }
193            w.write_all(&(s.len() as u32).to_le_bytes())?;
194        }
195        for s in &self.strings { w.write_all(s.as_bytes())?; }
196        Ok(())
197    }
198
199    fn write_schema_table<W: Write>(&self, w: &mut W) -> Result<()> {
200        if self.schemas.is_empty() && self.unions.is_empty() {
201            w.write_all(&8u32.to_le_bytes())?;
202            w.write_all(&0u32.to_le_bytes())?;
203            return Ok(());
204        }
205
206        // --- Struct data ---
207        let mut struct_data = Vec::new();
208        let mut off = 0u32;
209        let struct_offsets: Vec<u32> = self.schemas.iter().map(|s| {
210            let o = off;
211            off += (8 + s.fields.len() * 8) as u32;
212            o
213        }).collect();
214        for schema in &self.schemas {
215            struct_data.extend_from_slice(&self.string_map[&schema.name].to_le_bytes());
216            struct_data.extend_from_slice(&(schema.fields.len() as u16).to_le_bytes());
217            struct_data.extend_from_slice(&0u16.to_le_bytes());
218            for f in &schema.fields {
219                struct_data.extend_from_slice(&self.string_map[&f.name].to_le_bytes());
220                // Resolve union types: if the base name is in union_map, emit Tagged instead of Struct
221                let resolved_tl_type = if self.union_map.contains_key(&f.field_type.base) {
222                    TLType::Tagged
223                } else {
224                    f.field_type.to_tl_type()
225                };
226                struct_data.push(resolved_tl_type as u8);
227                let mut flags: u8 = 0;
228                if f.field_type.nullable { flags |= 0x01; }
229                if f.field_type.is_array { flags |= 0x02; }
230                struct_data.push(flags);
231                // Store struct/union type name string index (0xFFFF = no type)
232                if resolved_tl_type == TLType::Struct {
233                    let type_name_idx = self.string_map.get(&f.field_type.base)
234                        .copied()
235                        .map(|i| i as u16)
236                        .unwrap_or(0xFFFF);
237                    struct_data.extend_from_slice(&type_name_idx.to_le_bytes());
238                } else if resolved_tl_type == TLType::Tagged {
239                    // Union-typed field: store union name string index
240                    let type_name_idx = self.string_map.get(&f.field_type.base)
241                        .copied()
242                        .map(|i| i as u16)
243                        .unwrap_or(0xFFFF);
244                    struct_data.extend_from_slice(&type_name_idx.to_le_bytes());
245                } else {
246                    struct_data.extend_from_slice(&0xFFFFu16.to_le_bytes());
247                }
248            }
249        }
250
251        // --- Union data ---
252        let mut union_data = Vec::new();
253        let mut uoff = 0u32;
254        let union_offsets: Vec<u32> = self.unions.iter().map(|u| {
255            let o = uoff;
256            uoff += (8 + u.variants.iter().map(|v| 8 + v.fields.len() * 8).sum::<usize>()) as u32;
257            o
258        }).collect();
259        for union in &self.unions {
260            union_data.extend_from_slice(&self.string_map[&union.name].to_le_bytes());
261            union_data.extend_from_slice(&(union.variants.len() as u16).to_le_bytes());
262            union_data.extend_from_slice(&0u16.to_le_bytes()); // flags (reserved)
263            for variant in &union.variants {
264                union_data.extend_from_slice(&self.string_map[&variant.name].to_le_bytes());
265                union_data.extend_from_slice(&(variant.fields.len() as u16).to_le_bytes());
266                union_data.extend_from_slice(&0u16.to_le_bytes()); // flags (reserved)
267                for f in &variant.fields {
268                    union_data.extend_from_slice(&self.string_map[&f.name].to_le_bytes());
269                    let resolved_tl_type = if self.union_map.contains_key(&f.field_type.base) {
270                        TLType::Tagged
271                    } else {
272                        f.field_type.to_tl_type()
273                    };
274                    union_data.push(resolved_tl_type as u8);
275                    let mut flags: u8 = 0;
276                    if f.field_type.nullable { flags |= 0x01; }
277                    if f.field_type.is_array { flags |= 0x02; }
278                    union_data.push(flags);
279                    if resolved_tl_type == TLType::Struct || resolved_tl_type == TLType::Tagged {
280                        let type_name_idx = self.string_map.get(&f.field_type.base)
281                            .copied()
282                            .map(|i| i as u16)
283                            .unwrap_or(0xFFFF);
284                        union_data.extend_from_slice(&type_name_idx.to_le_bytes());
285                    } else {
286                        union_data.extend_from_slice(&0xFFFFu16.to_le_bytes());
287                    }
288                }
289            }
290        }
291
292        // --- Write header ---
293        w.write_all(&(self.schema_table_size() as u32).to_le_bytes())?;
294        w.write_all(&(self.schemas.len() as u16).to_le_bytes())?;
295        w.write_all(&(self.unions.len() as u16).to_le_bytes())?; // was padding=0
296        // Struct offsets, then struct data
297        for o in &struct_offsets { w.write_all(&o.to_le_bytes())?; }
298        w.write_all(&struct_data)?;
299        // Union offsets, then union data
300        for o in &union_offsets { w.write_all(&o.to_le_bytes())?; }
301        w.write_all(&union_data)?;
302        Ok(())
303    }
304
305    fn encode_value(&mut self, value: &Value, schema: Option<&Schema>) -> Result<(Vec<u8>, TLType, bool, u32)> {
306        match value {
307            Value::Null => Ok((vec![], TLType::Null, false, 0)),
308            Value::Bool(b) => Ok((vec![if *b { 1 } else { 0 }], TLType::Bool, false, 0)),
309            Value::Int(i) => Ok(encode_int(*i)),
310            Value::UInt(u) => Ok(encode_uint(*u)),
311            Value::Float(f) => Ok((f.to_le_bytes().to_vec(), TLType::Float64, false, 0)),
312            Value::String(s) => { let idx = self.intern(s); Ok((idx.to_le_bytes().to_vec(), TLType::String, false, 0)) }
313            Value::Bytes(b) => { let mut buf = Vec::new(); write_varint(&mut buf, b.len() as u64); buf.extend(b); Ok((buf, TLType::Bytes, false, 0)) }
314            Value::Array(arr) => self.encode_array(arr, schema),
315            Value::Object(obj) => self.encode_object(obj),
316            Value::Map(pairs) => self.encode_map(pairs),
317            Value::Ref(r) => { let idx = self.intern(r); Ok((idx.to_le_bytes().to_vec(), TLType::Ref, false, 0)) }
318            Value::Tagged(tag, inner) => {
319                let ti = self.intern(tag);
320                let (d, t, _, _) = self.encode_value(inner, None)?;
321                let mut buf = ti.to_le_bytes().to_vec();
322                buf.push(t as u8);
323                buf.extend(d);
324                Ok((buf, TLType::Tagged, false, 0))
325            }
326            Value::Timestamp(ts, tz) => {
327                let mut buf = ts.to_le_bytes().to_vec();
328                buf.extend(tz.to_le_bytes());
329                Ok((buf, TLType::Timestamp, false, 0))
330            }
331            Value::JsonNumber(s) => { let idx = self.intern(s); Ok((idx.to_le_bytes().to_vec(), TLType::JsonNumber, false, 0)) }
332        }
333    }
334
335    fn encode_map(&mut self, pairs: &[(Value, Value)]) -> Result<(Vec<u8>, TLType, bool, u32)> {
336        let mut buf = (pairs.len() as u32).to_le_bytes().to_vec();
337        for (k, v) in pairs {
338            // Validate map keys per spec: map_key = string | name | integer
339            match k {
340                Value::String(_) | Value::Int(_) | Value::UInt(_) => {}
341                _ => return Err(crate::Error::ParseError(
342                    format!("Invalid map key type {:?}: map keys must be string, int, or uint per spec", k.tl_type())
343                )),
344            }
345            let (kd, kt, _, _) = self.encode_value(k, None)?;
346            let (vd, vt, _, _) = self.encode_value(v, None)?;
347            buf.push(kt as u8);
348            buf.extend(kd);
349            buf.push(vt as u8);
350            buf.extend(vd);
351        }
352        Ok((buf, TLType::Map, false, pairs.len() as u32))
353    }
354
355    fn encode_array(&mut self, arr: &[Value], schema: Option<&Schema>) -> Result<(Vec<u8>, TLType, bool, u32)> {
356        if arr.len() > MAX_ARRAY_LENGTH {
357            return Err(crate::Error::ValueOutOfRange(
358                format!("Array has {} elements, exceeds maximum {}", arr.len(), MAX_ARRAY_LENGTH)));
359        }
360        let mut buf = (arr.len() as u32).to_le_bytes().to_vec();
361        if arr.is_empty() { return Ok((buf, TLType::Array, true, 0)); }
362        if schema.is_some() && arr.iter().all(|v| matches!(v, Value::Object(_) | Value::Null)) {
363            return self.encode_struct_array(arr, schema.unwrap());
364        }
365        // Spec-conformant homogeneous encoding: only Int32 and String for top-level arrays.
366        // All other types (UInt, Bool, Float, Timestamp, Int64) use heterogeneous 0xFF encoding.
367        // Schema-typed arrays (within @struct) use homogeneous encoding for any type via encode_typed_value.
368        if arr.iter().all(|v| matches!(v, Value::Int(_))) {
369            let all_fit_i32 = arr.iter().all(|v| {
370                if let Value::Int(i) = v { *i >= i32::MIN as i64 && *i <= i32::MAX as i64 } else { false }
371            });
372            if all_fit_i32 {
373                buf.push(TLType::Int32 as u8);
374                for v in arr { if let Value::Int(i) = v { buf.extend((*i as i32).to_le_bytes()); } }
375                return Ok((buf, TLType::Array, true, arr.len() as u32));
376            }
377            // Int values exceeding i32 range fall through to heterogeneous encoding
378        }
379        if arr.iter().all(|v| matches!(v, Value::String(_))) {
380            buf.push(TLType::String as u8);
381            for v in arr { if let Value::String(s) = v { buf.extend(self.intern(s).to_le_bytes()); } }
382            return Ok((buf, TLType::Array, true, arr.len() as u32));
383        }
384        buf.push(0xFF);
385        for v in arr { let (d, t, _, _) = self.encode_value(v, None)?; buf.push(t as u8); buf.extend(d); }
386        Ok((buf, TLType::Array, true, arr.len() as u32))
387    }
388
389    fn encode_struct_array(&mut self, arr: &[Value], schema: &Schema) -> Result<(Vec<u8>, TLType, bool, u32)> {
390        let mut buf = (arr.len() as u32).to_le_bytes().to_vec();
391        let si = match self.schema_map.get(&schema.name) {
392            Some(&idx) => idx,
393            None => self.add_schema(schema.clone()),
394        };
395        buf.extend(si.to_le_bytes());
396        let bms = (schema.fields.len() + 7) / 8;
397        buf.extend((bms as u16).to_le_bytes());
398        // Pre-build schema lookup to avoid O(n×m) linear scan per field per row
399        let nested_schemas: Vec<Option<Schema>> = schema.fields.iter()
400            .map(|f| self.schemas.iter().find(|s| s.name == f.field_type.base).cloned())
401            .collect();
402        for v in arr {
403            if let Value::Object(obj) = v {
404                let mut bitmap = vec![0u8; bms];
405                for (i, f) in schema.fields.iter().enumerate() {
406                    if obj.get(&f.name).map(|v| v.is_null()).unwrap_or(true) {
407                        bitmap[i / 8] |= 1 << (i % 8);
408                    }
409                }
410                buf.extend_from_slice(&bitmap);
411                for (i, f) in schema.fields.iter().enumerate() {
412                    let is_null = bitmap[i / 8] & (1 << (i % 8)) != 0;
413                    if !is_null {
414                        if let Some(v) = obj.get(&f.name) {
415                            let data = self.encode_typed_value(v, &f.field_type, nested_schemas[i].as_ref())?;
416                            buf.extend(data);
417                        }
418                    }
419                }
420            } else {
421                // Null element: write bitmap with all field bits set, no field data
422                let mut bitmap = vec![0u8; bms];
423                for i in 0..schema.fields.len() {
424                    bitmap[i / 8] |= 1 << (i % 8);
425                }
426                buf.extend_from_slice(&bitmap);
427            }
428        }
429        Ok((buf, TLType::Struct, true, arr.len() as u32))
430    }
431
432    /// Encode a value according to a specific field type (schema-aware encoding)
433    fn encode_typed_value(&mut self, value: &Value, field_type: &FieldType, nested_schema: Option<&Schema>) -> Result<Vec<u8>> {
434        use crate::TLType;
435
436        // Handle arrays
437        if field_type.is_array {
438            if let Value::Array(arr) = value {
439                let mut buf = (arr.len() as u32).to_le_bytes().to_vec();
440                if arr.is_empty() { return Ok(buf); }
441
442                // Determine element type, resolving unions via union_map
443                let elem_type = FieldType::new(&field_type.base);
444                let elem_tl_type = if self.union_map.contains_key(&field_type.base) {
445                    TLType::Tagged
446                } else {
447                    elem_type.to_tl_type()
448                };
449
450                // For struct arrays, look up the correct element schema
451                let elem_schema = self.schemas.iter()
452                    .find(|s| s.name == field_type.base)
453                    .cloned();
454
455                // If elem type resolves to Struct but no schema exists (e.g., "any"
456                // pseudo-type from JSON inference), use heterogeneous encoding —
457                // the reader can't decode struct format without a schema.
458                if elem_tl_type == TLType::Struct && elem_schema.is_none() {
459                    buf.push(0xFF);
460                    for v in arr {
461                        let (d, t, _, _) = self.encode_value(v, None)?;
462                        buf.push(t as u8);
463                        buf.extend(d);
464                    }
465                    return Ok(buf);
466                }
467
468                // Write element type byte (standard array format)
469                buf.push(elem_tl_type as u8);
470
471                // Encode each element with proper type
472                for v in arr {
473                    buf.extend(self.encode_typed_value(v, &elem_type, elem_schema.as_ref())?);
474                }
475                return Ok(buf);
476            }
477            // Non-array value for array-typed field: encode as zero-length array
478            // to maintain stream alignment (empty vec would corrupt subsequent fields)
479            return Ok((0u32).to_le_bytes().to_vec());
480        }
481
482        let tl_type = field_type.to_tl_type();
483        match tl_type {
484            TLType::Null => Ok(vec![]),
485            TLType::Bool => {
486                if let Value::Bool(b) = value { Ok(vec![if *b { 1 } else { 0 }]) }
487                else { Ok(vec![0]) }
488            }
489            TLType::Int8 => {
490                let i = checked_int_value(value, i8::MIN as i64, i8::MAX as i64, "int8")?;
491                Ok((i as i8).to_le_bytes().to_vec())
492            }
493            TLType::Int16 => {
494                let i = checked_int_value(value, i16::MIN as i64, i16::MAX as i64, "int16")?;
495                Ok((i as i16).to_le_bytes().to_vec())
496            }
497            TLType::Int32 => {
498                let i = checked_int_value(value, i32::MIN as i64, i32::MAX as i64, "int32")?;
499                Ok((i as i32).to_le_bytes().to_vec())
500            }
501            TLType::Int64 => {
502                let i = checked_int_value(value, i64::MIN, i64::MAX, "int64")?;
503                Ok(i.to_le_bytes().to_vec())
504            }
505            TLType::UInt8 => {
506                let u = checked_uint_value(value, u8::MAX as u64, "uint8")?;
507                Ok((u as u8).to_le_bytes().to_vec())
508            }
509            TLType::UInt16 => {
510                let u = checked_uint_value(value, u16::MAX as u64, "uint16")?;
511                Ok((u as u16).to_le_bytes().to_vec())
512            }
513            TLType::UInt32 => {
514                let u = checked_uint_value(value, u32::MAX as u64, "uint32")?;
515                Ok((u as u32).to_le_bytes().to_vec())
516            }
517            TLType::UInt64 => {
518                let u = checked_uint_value(value, u64::MAX, "uint64")?;
519                Ok(u.to_le_bytes().to_vec())
520            }
521            TLType::Float32 => {
522                let f = match value { Value::Float(f) => *f, Value::Int(i) => *i as f64, Value::UInt(u) => *u as f64, _ => 0.0 };
523                Ok((f as f32).to_le_bytes().to_vec())
524            }
525            TLType::Float64 => {
526                let f = match value { Value::Float(f) => *f, Value::Int(i) => *i as f64, Value::UInt(u) => *u as f64, _ => 0.0 };
527                Ok(f.to_le_bytes().to_vec())
528            }
529            TLType::String => {
530                if let Value::String(s) = value { Ok(self.intern(s).to_le_bytes().to_vec()) }
531                else { Ok(self.intern("").to_le_bytes().to_vec()) }
532            }
533            TLType::Bytes => {
534                if let Value::Bytes(b) = value {
535                    let mut buf = Vec::new();
536                    write_varint(&mut buf, b.len() as u64);
537                    buf.extend(b);
538                    Ok(buf)
539                } else { Ok(vec![0]) }
540            }
541            TLType::Timestamp => {
542                if let Value::Timestamp(ts, tz) = value {
543                    let mut buf = ts.to_le_bytes().to_vec();
544                    buf.extend(tz.to_le_bytes());
545                    Ok(buf)
546                } else {
547                    let mut buf = 0i64.to_le_bytes().to_vec();
548                    buf.extend(0i16.to_le_bytes());
549                    Ok(buf)
550                }
551            }
552            TLType::Struct => {
553                // Check if this is actually a union type resolved at encoding time
554                if self.union_map.contains_key(&field_type.base) {
555                    let (d, _, _, _) = self.encode_value(value, None)?;
556                    return Ok(d);
557                }
558                // Nested struct - encode recursively
559                if let (Value::Object(obj), Some(schema)) = (value, nested_schema) {
560                    let mut buf = Vec::new();
561
562                    // Write schema index
563                    let schema_idx = *self.schema_map.get(&schema.name).unwrap_or(&0);
564                    buf.extend(schema_idx.to_le_bytes());
565
566                    let bms = (schema.fields.len() + 7) / 8;
567
568                    // Bitmap (supports >64 fields)
569                    let mut bitmap = vec![0u8; bms];
570                    for (i, f) in schema.fields.iter().enumerate() {
571                        if obj.get(&f.name).map(|v| v.is_null()).unwrap_or(true) {
572                            bitmap[i / 8] |= 1 << (i % 8);
573                        }
574                    }
575                    buf.extend_from_slice(&bitmap);
576
577                    // Fields
578                    for (i, f) in schema.fields.iter().enumerate() {
579                        let is_null = bitmap[i / 8] & (1 << (i % 8)) != 0;
580                        if !is_null {
581                            if let Some(v) = obj.get(&f.name) {
582                                let nested = self.schemas.iter().find(|s| s.name == f.field_type.base).cloned();
583                                buf.extend(self.encode_typed_value(v, &f.field_type, nested.as_ref())?);
584                            }
585                        }
586                    }
587                    Ok(buf)
588                } else {
589                    // No schema found — fall back to generic encoding
590                    // (e.g., 'any' pseudo-type from JSON schema inference)
591                    let (d, _, _, _) = self.encode_value(value, None)?;
592                    Ok(d)
593                }
594            }
595            _ => {
596                // Fallback to generic encoding
597                let (d, _, _, _) = self.encode_value(value, None)?;
598                Ok(d)
599            }
600        }
601    }
602
603    fn encode_object(&mut self, obj: &ObjectMap<String, Value>) -> Result<(Vec<u8>, TLType, bool, u32)> {
604        if obj.len() > MAX_OBJECT_FIELDS {
605            return Err(crate::Error::ValueOutOfRange(
606                format!("Object has {} fields, exceeds maximum {}", obj.len(), MAX_OBJECT_FIELDS)));
607        }
608        let mut buf = (obj.len() as u16).to_le_bytes().to_vec();
609        for (k, v) in obj {
610            buf.extend(self.intern(k).to_le_bytes());
611            let (d, t, _, _) = self.encode_value(v, None)?;
612            buf.push(t as u8);
613            buf.extend(d);
614        }
615        Ok((buf, TLType::Object, false, 0))
616    }
617}
618
619impl Default for Writer { fn default() -> Self { Self::new() } }
620
621/// Extract an integer value with best-effort coercion for schema-typed fields.
622/// Out-of-range and non-numeric values default to 0 (spec §2.5).
623fn checked_int_value(value: &Value, min: i64, max: i64, _type_name: &str) -> Result<i64> {
624    let i = match value {
625        Value::Int(i) => *i,
626        Value::UInt(u) if *u <= i64::MAX as u64 => *u as i64,
627        Value::UInt(_) => 0,
628        Value::Float(f) => {
629            let f = *f;
630            if f.is_finite() && f >= i64::MIN as f64 && f <= i64::MAX as f64 { f as i64 } else { 0 }
631        }
632        Value::JsonNumber(s) => s.parse::<i64>().unwrap_or(0),
633        _ => 0,
634    };
635    if i < min || i > max { Ok(0) } else { Ok(i) }
636}
637
638/// Extract an unsigned integer value with best-effort coercion for schema-typed fields.
639/// Out-of-range and non-numeric values default to 0 (spec §2.5).
640fn checked_uint_value(value: &Value, max: u64, _type_name: &str) -> Result<u64> {
641    let u = match value {
642        Value::UInt(u) => *u,
643        Value::Int(i) if *i >= 0 => *i as u64,
644        Value::Int(_) => 0,
645        Value::Float(f) => {
646            let f = *f;
647            if f.is_finite() && f >= 0.0 && f <= u64::MAX as f64 { f as u64 } else { 0 }
648        }
649        Value::JsonNumber(s) => s.parse::<u64>().unwrap_or(0),
650        _ => 0,
651    };
652    if u > max { Ok(0) } else { Ok(u) }
653}
654
655fn encode_int(i: i64) -> (Vec<u8>, TLType, bool, u32) {
656    if i >= i8::MIN as i64 && i <= i8::MAX as i64 { ((i as i8).to_le_bytes().to_vec(), TLType::Int8, false, 0) }
657    else if i >= i16::MIN as i64 && i <= i16::MAX as i64 { ((i as i16).to_le_bytes().to_vec(), TLType::Int16, false, 0) }
658    else if i >= i32::MIN as i64 && i <= i32::MAX as i64 { ((i as i32).to_le_bytes().to_vec(), TLType::Int32, false, 0) }
659    else { (i.to_le_bytes().to_vec(), TLType::Int64, false, 0) }
660}
661
662fn encode_uint(u: u64) -> (Vec<u8>, TLType, bool, u32) {
663    if u <= u8::MAX as u64 { ((u as u8).to_le_bytes().to_vec(), TLType::UInt8, false, 0) }
664    else if u <= u16::MAX as u64 { ((u as u16).to_le_bytes().to_vec(), TLType::UInt16, false, 0) }
665    else if u <= u32::MAX as u64 { ((u as u32).to_le_bytes().to_vec(), TLType::UInt32, false, 0) }
666    else { (u.to_le_bytes().to_vec(), TLType::UInt64, false, 0) }
667}
668
669fn write_varint(buf: &mut Vec<u8>, mut v: u64) {
670    while v >= 0x80 { buf.push(((v & 0x7F) | 0x80) as u8); v >>= 7; }
671    buf.push(v as u8);
672}
673
674fn compress_data(data: &[u8]) -> Result<Vec<u8>> {
675    use flate2::Compression;
676    use flate2::write::ZlibEncoder;
677    let mut e = ZlibEncoder::new(Vec::new(), Compression::default());
678    e.write_all(data).map_err(crate::Error::Io)?;
679    e.finish().map_err(crate::Error::Io)
680}
681
682#[cfg(test)]
683mod tests {
684    use super::*;
685    use crate::reader::Reader;
686
687    #[test]
688    fn test_writer_default() {
689        let w = Writer::default();
690        assert_eq!(w.strings.len(), 0);
691        assert_eq!(w.schemas.len(), 0);
692    }
693
694    #[test]
695    fn test_encode_uint_ranges() {
696        // encode_uint for small values (u8 range)
697        let (data, tl_type, _, _) = encode_uint(42);
698        assert_eq!(tl_type, TLType::UInt8);
699        assert_eq!(data, vec![42u8]);
700
701        // encode_uint for u16 range
702        let (data, tl_type, _, _) = encode_uint(300);
703        assert_eq!(tl_type, TLType::UInt16);
704        assert_eq!(data, 300u16.to_le_bytes().to_vec());
705
706        // encode_uint for u32 range
707        let (data, tl_type, _, _) = encode_uint(100_000);
708        assert_eq!(tl_type, TLType::UInt32);
709        assert_eq!(data, 100_000u32.to_le_bytes().to_vec());
710
711        // encode_uint for u64 range
712        let (data, tl_type, _, _) = encode_uint(5_000_000_000);
713        assert_eq!(tl_type, TLType::UInt64);
714        assert_eq!(data, 5_000_000_000u64.to_le_bytes().to_vec());
715    }
716
717    #[test]
718    fn test_uint_value_roundtrip() {
719        let dir = std::env::temp_dir();
720        let path = dir.join("test_uint_roundtrip.tlbx");
721
722        let mut w = Writer::new();
723        w.add_section("small", &Value::UInt(42), None).unwrap();
724        w.add_section("medium", &Value::UInt(300), None).unwrap();
725        w.add_section("large", &Value::UInt(100_000), None).unwrap();
726        w.add_section("huge", &Value::UInt(5_000_000_000), None).unwrap();
727        w.write(&path, false).unwrap();
728
729        let r = Reader::from_bytes(std::fs::read(&path).unwrap()).unwrap();
730        assert_eq!(r.get("small").unwrap().as_uint(), Some(42));
731        assert_eq!(r.get("medium").unwrap().as_uint(), Some(300));
732        assert_eq!(r.get("large").unwrap().as_uint(), Some(100_000));
733        assert_eq!(r.get("huge").unwrap().as_uint(), Some(5_000_000_000));
734        std::fs::remove_file(&path).ok();
735    }
736
737    #[test]
738    fn test_typed_schema_roundtrip() {
739        // Build a schema with various typed fields to exercise encode_typed_value
740        let dir = std::env::temp_dir();
741        let path = dir.join("test_typed_schema.tlbx");
742
743        let mut schema = Schema::new("TypedRecord");
744        schema.add_field("flag", FieldType::new("bool"));
745        schema.add_field("small_int", FieldType::new("int8"));
746        schema.add_field("med_int", FieldType::new("int16"));
747        schema.add_field("int32_val", FieldType::new("int"));
748        schema.add_field("int64_val", FieldType::new("int64"));
749        schema.add_field("small_uint", FieldType::new("uint8"));
750        schema.add_field("med_uint", FieldType::new("uint16"));
751        schema.add_field("uint32_val", FieldType::new("uint"));
752        schema.add_field("uint64_val", FieldType::new("uint64"));
753        schema.add_field("f32_val", FieldType::new("float32"));
754        schema.add_field("f64_val", FieldType::new("float"));
755        schema.add_field("name", FieldType::new("string"));
756        schema.add_field("data", FieldType::new("bytes"));
757
758        let mut w = Writer::new();
759        w.add_schema(schema.clone());
760
761        // Create a record with all typed fields
762        let mut obj = ObjectMap::new();
763        obj.insert("flag".to_string(), Value::Bool(true));
764        obj.insert("small_int".to_string(), Value::Int(42));
765        obj.insert("med_int".to_string(), Value::Int(1000));
766        obj.insert("int32_val".to_string(), Value::Int(50000));
767        obj.insert("int64_val".to_string(), Value::Int(1_000_000_000_000));
768        obj.insert("small_uint".to_string(), Value::UInt(200));
769        obj.insert("med_uint".to_string(), Value::UInt(40000));
770        obj.insert("uint32_val".to_string(), Value::UInt(3_000_000));
771        obj.insert("uint64_val".to_string(), Value::UInt(9_000_000_000));
772        obj.insert("f32_val".to_string(), Value::Float(3.14));
773        obj.insert("f64_val".to_string(), Value::Float(2.718281828));
774        obj.insert("name".to_string(), Value::String("test".into()));
775        obj.insert("data".to_string(), Value::Bytes(vec![0xDE, 0xAD]));
776
777        let arr = Value::Array(vec![Value::Object(obj)]);
778        w.add_section("records", &arr, Some(&schema)).unwrap();
779        w.write(&path, false).unwrap();
780
781        let r = Reader::from_bytes(std::fs::read(&path).unwrap()).unwrap();
782        let records = r.get("records").unwrap();
783        let items = records.as_array().unwrap();
784        assert_eq!(items.len(), 1);
785
786        let rec = items[0].as_object().unwrap();
787        assert_eq!(rec.get("flag").unwrap().as_bool(), Some(true));
788        assert_eq!(rec.get("name").unwrap().as_str(), Some("test"));
789        std::fs::remove_file(&path).ok();
790    }
791
792    #[test]
793    fn test_typed_schema_array_field() {
794        // Schema with an array field to exercise typed array encoding
795        let dir = std::env::temp_dir();
796        let path = dir.join("test_typed_array_field.tlbx");
797
798        let mut schema = Schema::new("WithArray");
799        schema.add_field("name", FieldType::new("string"));
800        schema.add_field("scores", FieldType::new("int").array());
801
802        let mut w = Writer::new();
803        w.add_schema(schema.clone());
804
805        let mut obj = ObjectMap::new();
806        obj.insert("name".to_string(), Value::String("Alice".into()));
807        obj.insert("scores".to_string(), Value::Array(vec![Value::Int(90), Value::Int(85)]));
808
809        let arr = Value::Array(vec![Value::Object(obj)]);
810        w.add_section("users", &arr, Some(&schema)).unwrap();
811        w.write(&path, false).unwrap();
812
813        let r = Reader::from_bytes(std::fs::read(&path).unwrap()).unwrap();
814        let users = r.get("users").unwrap();
815        let items = users.as_array().unwrap();
816        assert_eq!(items.len(), 1);
817        let rec = items[0].as_object().unwrap();
818        assert_eq!(rec.get("name").unwrap().as_str(), Some("Alice"));
819        std::fs::remove_file(&path).ok();
820    }
821
822    #[test]
823    fn test_object_encoding_roundtrip() {
824        // Direct object (non-struct-array) encoding
825        let dir = std::env::temp_dir();
826        let path = dir.join("test_object_enc.tlbx");
827
828        let mut obj = ObjectMap::new();
829        obj.insert("x".to_string(), Value::Int(10));
830        obj.insert("y".to_string(), Value::String("hello".into()));
831
832        let mut w = Writer::new();
833        w.add_section("data", &Value::Object(obj), None).unwrap();
834        w.write(&path, false).unwrap();
835
836        let r = Reader::from_bytes(std::fs::read(&path).unwrap()).unwrap();
837        let val = r.get("data").unwrap();
838        let o = val.as_object().unwrap();
839        assert_eq!(o.get("x").unwrap().as_int(), Some(10));
840        assert_eq!(o.get("y").unwrap().as_str(), Some("hello"));
841        std::fs::remove_file(&path).ok();
842    }
843
844    #[test]
845    fn test_map_roundtrip_binary() {
846        let dir = std::env::temp_dir();
847        let path = dir.join("test_map_binary.tlbx");
848
849        let pairs = vec![
850            (Value::String("key1".into()), Value::Int(100)),
851            (Value::String("key2".into()), Value::Bool(true)),
852        ];
853
854        let mut w = Writer::new();
855        w.add_section("mapping", &Value::Map(pairs), None).unwrap();
856        w.write(&path, false).unwrap();
857
858        let r = Reader::from_bytes(std::fs::read(&path).unwrap()).unwrap();
859        let val = r.get("mapping").unwrap();
860        if let Value::Map(pairs) = val {
861            assert_eq!(pairs.len(), 2);
862        } else {
863            panic!("Expected Map value");
864        }
865        std::fs::remove_file(&path).ok();
866    }
867
868    #[test]
869    fn test_ref_and_tagged_roundtrip() {
870        let dir = std::env::temp_dir();
871        let path = dir.join("test_ref_tagged.tlbx");
872
873        let mut w = Writer::new();
874        w.add_section("myref", &Value::Ref("some_ref".into()), None).unwrap();
875        w.add_section("mytag", &Value::Tagged("label".into(), Box::new(Value::Int(42))), None).unwrap();
876        w.add_section("myts", &Value::Timestamp(1700000000000, 0), None).unwrap();
877        w.write(&path, false).unwrap();
878
879        let r = Reader::from_bytes(std::fs::read(&path).unwrap()).unwrap();
880
881        if let Value::Ref(s) = r.get("myref").unwrap() {
882            assert_eq!(s, "some_ref");
883        } else { panic!("Expected Ref"); }
884
885        if let Value::Tagged(tag, inner) = r.get("mytag").unwrap() {
886            assert_eq!(tag, "label");
887            assert_eq!(inner.as_int(), Some(42));
888        } else { panic!("Expected Tagged"); }
889
890        if let Value::Timestamp(ts, _) = r.get("myts").unwrap() {
891            assert_eq!(ts, 1700000000000);
892        } else { panic!("Expected Timestamp"); }
893
894        std::fs::remove_file(&path).ok();
895    }
896
897    #[test]
898    fn test_compressed_roundtrip() {
899        let dir = std::env::temp_dir();
900        let path = dir.join("test_compressed.tlbx");
901
902        // Create large enough data to trigger compression
903        let mut arr = Vec::new();
904        for i in 0..100 {
905            arr.push(Value::Int(i));
906        }
907
908        let mut w = Writer::new();
909        w.add_section("numbers", &Value::Array(arr), None).unwrap();
910        w.write(&path, true).unwrap();
911
912        let r = Reader::from_bytes(std::fs::read(&path).unwrap()).unwrap();
913        let val = r.get("numbers").unwrap();
914        let items = val.as_array().unwrap();
915        assert_eq!(items.len(), 100);
916        assert_eq!(items[0].as_int(), Some(0));
917        assert_eq!(items[99].as_int(), Some(99));
918        std::fs::remove_file(&path).ok();
919    }
920
921    #[test]
922    fn test_root_array_flag() {
923        let dir = std::env::temp_dir();
924        let path = dir.join("test_root_array_flag.tlbx");
925
926        let mut w = Writer::new();
927        w.set_root_array(true);
928        w.add_section("root", &Value::Array(vec![Value::Int(1)]), None).unwrap();
929        w.write(&path, false).unwrap();
930
931        let r = Reader::from_bytes(std::fs::read(&path).unwrap()).unwrap();
932        assert!(r.is_root_array());
933        std::fs::remove_file(&path).ok();
934    }
935
936    #[test]
937    fn test_bytes_value_roundtrip() {
938        let dir = std::env::temp_dir();
939        let path = dir.join("test_bytes_val.tlbx");
940
941        let mut w = Writer::new();
942        w.add_section("blob", &Value::Bytes(vec![1, 2, 3, 4, 5]), None).unwrap();
943        w.write(&path, false).unwrap();
944
945        let r = Reader::from_bytes(std::fs::read(&path).unwrap()).unwrap();
946        let val = r.get("blob").unwrap();
947        assert_eq!(val.as_bytes(), Some(&[1u8, 2, 3, 4, 5][..]));
948        std::fs::remove_file(&path).ok();
949    }
950
951    #[test]
952    fn test_nested_struct_schema_roundtrip() {
953        // Test encode_typed_value with TLType::Struct (nested struct field)
954        let dir = std::env::temp_dir();
955        let path = dir.join("test_nested_struct.tlbx");
956
957        let mut inner_schema = Schema::new("Address");
958        inner_schema.add_field("city", FieldType::new("string"));
959        inner_schema.add_field("zip", FieldType::new("string"));
960
961        let mut outer_schema = Schema::new("Person");
962        outer_schema.add_field("name", FieldType::new("string"));
963        outer_schema.add_field("home", FieldType::new("Address"));
964
965        let mut w = Writer::new();
966        w.add_schema(inner_schema.clone());
967        w.add_schema(outer_schema.clone());
968
969        let mut addr = ObjectMap::new();
970        addr.insert("city".to_string(), Value::String("Seattle".into()));
971        addr.insert("zip".to_string(), Value::String("98101".into()));
972
973        let mut person = ObjectMap::new();
974        person.insert("name".to_string(), Value::String("Alice".into()));
975        person.insert("home".to_string(), Value::Object(addr));
976
977        let arr = Value::Array(vec![Value::Object(person)]);
978        w.add_section("people", &arr, Some(&outer_schema)).unwrap();
979        w.write(&path, false).unwrap();
980
981        let r = Reader::from_bytes(std::fs::read(&path).unwrap()).unwrap();
982        let people = r.get("people").unwrap();
983        let items = people.as_array().unwrap();
984        assert_eq!(items.len(), 1);
985        let p = items[0].as_object().unwrap();
986        assert_eq!(p.get("name").unwrap().as_str(), Some("Alice"));
987        std::fs::remove_file(&path).ok();
988    }
989
990    #[test]
991    fn test_timestamp_typed_field() {
992        // Struct array with a timestamp field
993        let dir = std::env::temp_dir();
994        let path = dir.join("test_ts_typed.tlbx");
995
996        let mut schema = Schema::new("Event");
997        schema.add_field("name", FieldType::new("string"));
998        schema.add_field("ts", FieldType::new("timestamp"));
999
1000        let mut w = Writer::new();
1001        w.add_schema(schema.clone());
1002
1003        let mut obj = ObjectMap::new();
1004        obj.insert("name".to_string(), Value::String("deploy".into()));
1005        obj.insert("ts".to_string(), Value::Timestamp(1700000000000, 0));
1006
1007        let arr = Value::Array(vec![Value::Object(obj)]);
1008        w.add_section("events", &arr, Some(&schema)).unwrap();
1009        w.write(&path, false).unwrap();
1010
1011        let r = Reader::from_bytes(std::fs::read(&path).unwrap()).unwrap();
1012        let events = r.get("events").unwrap();
1013        let items = events.as_array().unwrap();
1014        assert_eq!(items.len(), 1);
1015        std::fs::remove_file(&path).ok();
1016    }
1017
1018    #[test]
1019    fn test_bytes_typed_field() {
1020        // Struct array with a bytes field
1021        let dir = std::env::temp_dir();
1022        let path = dir.join("test_bytes_typed.tlbx");
1023
1024        let mut schema = Schema::new("Blob");
1025        schema.add_field("name", FieldType::new("string"));
1026        schema.add_field("data", FieldType::new("bytes"));
1027
1028        let mut w = Writer::new();
1029        w.add_schema(schema.clone());
1030
1031        let mut obj = ObjectMap::new();
1032        obj.insert("name".to_string(), Value::String("img".into()));
1033        obj.insert("data".to_string(), Value::Bytes(vec![0xDE, 0xAD, 0xBE, 0xEF]));
1034
1035        let arr = Value::Array(vec![Value::Object(obj)]);
1036        w.add_section("blobs", &arr, Some(&schema)).unwrap();
1037        w.write(&path, false).unwrap();
1038
1039        let r = Reader::from_bytes(std::fs::read(&path).unwrap()).unwrap();
1040        let blobs = r.get("blobs").unwrap();
1041        let items = blobs.as_array().unwrap();
1042        assert_eq!(items.len(), 1);
1043        std::fs::remove_file(&path).ok();
1044    }
1045
1046    // =========================================================================
1047    // Issue 4: Checked numeric downcasting
1048    // =========================================================================
1049
1050    #[test]
1051    fn test_checked_int_value_in_range() {
1052        assert_eq!(checked_int_value(&Value::Int(42), i8::MIN as i64, i8::MAX as i64, "int8").unwrap(), 42);
1053        assert_eq!(checked_int_value(&Value::Int(-128), i8::MIN as i64, i8::MAX as i64, "int8").unwrap(), -128);
1054        assert_eq!(checked_int_value(&Value::Int(127), i8::MIN as i64, i8::MAX as i64, "int8").unwrap(), 127);
1055        assert_eq!(checked_int_value(&Value::UInt(100), i16::MIN as i64, i16::MAX as i64, "int16").unwrap(), 100);
1056    }
1057
1058    #[test]
1059    fn test_checked_int_value_overflow_defaults_to_zero() {
1060        // Spec §2.5: out-of-range defaults to 0, not error
1061        assert_eq!(checked_int_value(&Value::Int(128), i8::MIN as i64, i8::MAX as i64, "int8").unwrap(), 0);
1062    }
1063
1064    #[test]
1065    fn test_checked_int_value_underflow_defaults_to_zero() {
1066        assert_eq!(checked_int_value(&Value::Int(-129), i8::MIN as i64, i8::MAX as i64, "int8").unwrap(), 0);
1067    }
1068
1069    #[test]
1070    fn test_checked_int_value_float_coercion() {
1071        // Spec §2.5: floats coerce to integers
1072        assert_eq!(checked_int_value(&Value::Float(42.7), i8::MIN as i64, i8::MAX as i64, "int8").unwrap(), 42);
1073        assert_eq!(checked_int_value(&Value::Float(-3.9), i8::MIN as i64, i8::MAX as i64, "int8").unwrap(), -3);
1074        // NaN/Inf default to 0
1075        assert_eq!(checked_int_value(&Value::Float(f64::NAN), i8::MIN as i64, i8::MAX as i64, "int8").unwrap(), 0);
1076        assert_eq!(checked_int_value(&Value::Float(f64::INFINITY), i8::MIN as i64, i8::MAX as i64, "int8").unwrap(), 0);
1077        // Float that truncates out of range defaults to 0
1078        assert_eq!(checked_int_value(&Value::Float(200.0), i8::MIN as i64, i8::MAX as i64, "int8").unwrap(), 0);
1079    }
1080
1081    #[test]
1082    fn test_checked_uint_value_in_range() {
1083        assert_eq!(checked_uint_value(&Value::UInt(255), u8::MAX as u64, "uint8").unwrap(), 255);
1084        assert_eq!(checked_uint_value(&Value::Int(42), u8::MAX as u64, "uint8").unwrap(), 42);
1085    }
1086
1087    #[test]
1088    fn test_checked_uint_value_overflow_defaults_to_zero() {
1089        // Spec §2.5: out-of-range defaults to 0, not error
1090        assert_eq!(checked_uint_value(&Value::UInt(256), u8::MAX as u64, "uint8").unwrap(), 0);
1091    }
1092
1093    #[test]
1094    fn test_checked_uint_value_negative_defaults_to_zero() {
1095        // Spec §2.5: negative for unsigned defaults to 0, not error
1096        assert_eq!(checked_uint_value(&Value::Int(-1), u8::MAX as u64, "uint8").unwrap(), 0);
1097    }
1098
1099    #[test]
1100    fn test_checked_uint_value_float_coercion() {
1101        assert_eq!(checked_uint_value(&Value::Float(42.7), u8::MAX as u64, "uint8").unwrap(), 42);
1102        assert_eq!(checked_uint_value(&Value::Float(-1.0), u8::MAX as u64, "uint8").unwrap(), 0);
1103        assert_eq!(checked_uint_value(&Value::Float(f64::NAN), u8::MAX as u64, "uint8").unwrap(), 0);
1104        assert_eq!(checked_uint_value(&Value::Float(300.0), u8::MAX as u64, "uint8").unwrap(), 0);
1105    }
1106
1107    // =========================================================================
1108    // Issue 1: Union/Enum round-trip via union_map
1109    // =========================================================================
1110
1111    #[test]
1112    fn test_union_field_roundtrip() {
1113        // A struct with a field typed as a union should round-trip correctly
1114        let dir = std::env::temp_dir();
1115        let path = dir.join("test_union_field_rt.tlbx");
1116
1117        let mut w = Writer::new();
1118
1119        // Define a union
1120        let mut union_def = crate::Union::new("Shape");
1121        union_def.add_variant(crate::Variant::new("Circle").field("radius", FieldType::new("float64")));
1122        union_def.add_variant(crate::Variant::new("Rect").field("w", FieldType::new("float64")).field("h", FieldType::new("float64")));
1123        w.add_union(union_def);
1124
1125        // Add a tagged value (as if from a union-typed field)
1126        let tagged = Value::Tagged(
1127            "Circle".to_string(),
1128            Box::new(Value::Object({
1129                let mut m = ObjectMap::new();
1130                m.insert("radius".to_string(), Value::Float(5.0));
1131                m
1132            })),
1133        );
1134        w.add_section("shape", &tagged, None).unwrap();
1135        w.write(&path, false).unwrap();
1136
1137        let r = Reader::from_bytes(std::fs::read(&path).unwrap()).unwrap();
1138        let shape = r.get("shape").unwrap();
1139        if let Value::Tagged(tag, inner) = &shape {
1140            assert_eq!(tag, "Circle");
1141            let obj = inner.as_object().unwrap();
1142            assert_eq!(obj.get("radius").unwrap().as_float(), Some(5.0));
1143        } else {
1144            panic!("Expected Tagged value, got {:?}", shape);
1145        }
1146        std::fs::remove_file(&path).ok();
1147    }
1148
1149    #[test]
1150    fn test_union_typed_schema_field_roundtrip() {
1151        // A struct schema where one field is a union type
1152        let dir = std::env::temp_dir();
1153        let path = dir.join("test_union_schema_field.tlbx");
1154
1155        let mut w = Writer::new();
1156
1157        // Union: Status { Ok { code: int }, Err { msg: string } }
1158        let mut union_def = crate::Union::new("Status");
1159        union_def.add_variant(crate::Variant::new("Ok").field("code", FieldType::new("int32")));
1160        union_def.add_variant(crate::Variant::new("Err").field("msg", FieldType::new("string")));
1161        w.add_union(union_def);
1162
1163        // Struct: Response { id: int, status: Status }
1164        let mut schema = Schema::new("Response");
1165        schema.add_field("id", FieldType::new("int32"));
1166        schema.add_field("status", FieldType::new("Status")); // Union-typed field
1167
1168        let mut obj = ObjectMap::new();
1169        obj.insert("id".to_string(), Value::Int(1));
1170        obj.insert("status".to_string(), Value::Tagged(
1171            "Ok".to_string(),
1172            Box::new(Value::Object({
1173                let mut m = ObjectMap::new();
1174                m.insert("code".to_string(), Value::Int(200));
1175                m
1176            })),
1177        ));
1178
1179        let arr = Value::Array(vec![Value::Object(obj)]);
1180        w.add_section("responses", &arr, Some(&schema)).unwrap();
1181        w.write(&path, false).unwrap();
1182
1183        let r = Reader::from_bytes(std::fs::read(&path).unwrap()).unwrap();
1184        // Verify the reader parsed the union and schema correctly
1185        assert!(!r.unions.is_empty(), "Reader should have unions");
1186        assert_eq!(r.unions[0].name, "Status");
1187        assert!(!r.schemas.is_empty(), "Reader should have schemas");
1188        assert_eq!(r.schemas[0].name, "Response");
1189        let responses = r.get("responses").unwrap();
1190        let items = responses.as_array().unwrap();
1191        assert_eq!(items.len(), 1);
1192        let resp = items[0].as_object().unwrap();
1193        assert_eq!(resp.get("id").unwrap().as_int(), Some(1));
1194        if let Value::Tagged(tag, inner) = resp.get("status").unwrap() {
1195            assert_eq!(tag, "Ok");
1196            let obj = inner.as_object().unwrap();
1197            assert_eq!(obj.get("code").unwrap().as_int(), Some(200));
1198        } else {
1199            panic!("Expected Tagged value for status field");
1200        }
1201        std::fs::remove_file(&path).ok();
1202    }
1203
1204    // =========================================================================
1205    // Issue 7: Deterministic serialization (sorted object keys)
1206    // =========================================================================
1207
1208    #[test]
1209    fn test_object_encoding_deterministic() {
1210        // Encoding the same object multiple times should produce identical bytes
1211        let mut obj = ObjectMap::new();
1212        obj.insert("zebra".to_string(), Value::Int(1));
1213        obj.insert("alpha".to_string(), Value::Int(2));
1214        obj.insert("middle".to_string(), Value::Int(3));
1215
1216        let mut w1 = Writer::new();
1217        let (bytes1, _, _, _) = w1.encode_object(&obj).unwrap();
1218
1219        let mut w2 = Writer::new();
1220        let (bytes2, _, _, _) = w2.encode_object(&obj).unwrap();
1221
1222        assert_eq!(bytes1, bytes2, "Object encoding should be deterministic");
1223    }
1224}