tealeaf/
reader.rs

1//! Binary format reader for TeaLeaf
2//!
3//! Supports two modes:
4//! - `open()` - Reads file into memory (Vec<u8>)
5//! - `open_mmap()` - Memory-maps file for zero-copy access
6
7use std::cell::RefCell;
8use std::collections::HashMap;
9use std::fs::File;
10use std::io::Read;
11use std::path::Path;
12use std::sync::Arc;
13use indexmap::IndexMap;
14use crate::types::ObjectMap;
15
16use memmap2::Mmap;
17
18use crate::{Error, Result, Value, Schema, Union, Variant, Field, FieldType, TLType, MAGIC, HEADER_SIZE};
19
20/// Maximum allowed decompressed data size (256 MB)
21const MAX_DECOMPRESSED_SIZE: usize = 256 * 1024 * 1024;
22
23/// Maximum varint encoding length in bytes (ceil(64/7) = 10)
24const MAX_VARINT_BYTES: usize = 10;
25
26/// Maximum recursion depth for nested decode calls (arrays, objects, maps, tagged values).
27/// Set above the text parser's tested 200-level nesting to ensure binary round-trip parity.
28const MAX_DECODE_DEPTH: usize = 256;
29
30/// Maximum number of elements allowed in a single decoded collection (array, map, struct array).
31/// Also used to cap Vec::with_capacity during decode. Prevents OOM from crafted count values
32/// in small files (e.g. a 335-byte file claiming 973M Null elements).
33const MAX_COLLECTION_SIZE: usize = 1024 * 1024;
34
35/// Read a u16 from data at the given offset, with bounds checking
36fn read_u16_at(data: &[u8], offset: usize) -> Result<u16> {
37    let end = offset.checked_add(2)
38        .ok_or_else(|| Error::ParseError("offset overflow".into()))?;
39    if end > data.len() {
40        return Err(Error::ParseError(format!(
41            "read u16 out of bounds at offset {} (data len {})", offset, data.len()
42        )));
43    }
44    Ok(u16::from_le_bytes(data[offset..end].try_into().map_err(|_|
45        Error::ParseError(format!("u16 slice conversion failed at offset {}", offset))
46    )?))
47}
48
49/// Read a u32 from data at the given offset, with bounds checking
50fn read_u32_at(data: &[u8], offset: usize) -> Result<u32> {
51    let end = offset.checked_add(4)
52        .ok_or_else(|| Error::ParseError("offset overflow".into()))?;
53    if end > data.len() {
54        return Err(Error::ParseError(format!(
55            "read u32 out of bounds at offset {} (data len {})", offset, data.len()
56        )));
57    }
58    Ok(u32::from_le_bytes(data[offset..end].try_into().map_err(|_|
59        Error::ParseError(format!("u32 slice conversion failed at offset {}", offset))
60    )?))
61}
62
63/// Read a u64 from data at the given offset, with bounds checking
64fn read_u64_at(data: &[u8], offset: usize) -> Result<u64> {
65    let end = offset.checked_add(8)
66        .ok_or_else(|| Error::ParseError("offset overflow".into()))?;
67    if end > data.len() {
68        return Err(Error::ParseError(format!(
69            "read u64 out of bounds at offset {} (data len {})", offset, data.len()
70        )));
71    }
72    Ok(u64::from_le_bytes(data[offset..end].try_into().map_err(|_|
73        Error::ParseError(format!("u64 slice conversion failed at offset {}", offset))
74    )?))
75}
76
77/// Storage backend for reader data
78enum DataSource {
79    /// Owned bytes (from file read)
80    Owned(Vec<u8>),
81    /// Memory-mapped file (zero-copy)
82    Mapped(Arc<Mmap>),
83}
84
85impl AsRef<[u8]> for DataSource {
86    fn as_ref(&self) -> &[u8] {
87        match self {
88            DataSource::Owned(v) => v.as_slice(),
89            DataSource::Mapped(m) => m.as_ref(),
90        }
91    }
92}
93
94/// Binary format reader with mmap support for zero-copy access
95pub struct Reader {
96    data: DataSource,
97    string_offsets: Vec<u32>,
98    string_lengths: Vec<u32>,
99    string_data_offset: usize,
100    pub schemas: Vec<Schema>,
101    schema_map: HashMap<String, usize>,
102    pub unions: Vec<Union>,
103    union_map: HashMap<String, usize>,
104    sections: IndexMap<String, SectionInfo>,
105    /// Indicates the source JSON was a root-level array (for round-trip fidelity)
106    is_root_array: bool,
107    /// Cache for decompressed and decoded values
108    cache: RefCell<HashMap<String, Value>>,
109}
110
111#[allow(dead_code)]
112struct SectionInfo {
113    offset: u64,
114    size: u32,
115    uncompressed_size: u32,
116    schema_idx: i32,
117    tl_type: TLType,
118    compressed: bool,
119    is_array: bool,
120    item_count: u32,
121}
122
123impl Reader {
124    /// Open a binary TeaLeaf file (reads into memory)
125    pub fn open<P: AsRef<Path>>(path: P) -> Result<Self> {
126        let mut file = File::open(path)?;
127        let mut data = Vec::new();
128        file.read_to_end(&mut data)?;
129        Self::from_bytes(data)
130    }
131
132    /// Open a binary TeaLeaf file with memory mapping (zero-copy)
133    ///
134    /// This is faster for large files as the OS handles paging.
135    /// The file must not be modified while the reader is open.
136    ///
137    /// # Safety
138    /// The underlying file must not be modified while the reader exists.
139    pub fn open_mmap<P: AsRef<Path>>(path: P) -> Result<Self> {
140        let file = File::open(path)?;
141        let mmap = unsafe { Mmap::map(&file)? };
142        Self::from_data_source(DataSource::Mapped(Arc::new(mmap)))
143    }
144
145    /// Create reader from owned bytes
146    pub fn from_bytes(data: Vec<u8>) -> Result<Self> {
147        Self::from_data_source(DataSource::Owned(data))
148    }
149
150    /// Create reader from data source (internal)
151    fn from_data_source(data: DataSource) -> Result<Self> {
152        let bytes = data.as_ref();
153
154        if bytes.len() < HEADER_SIZE {
155            return Err(Error::InvalidMagic);
156        }
157        if &bytes[0..4] != MAGIC {
158            return Err(Error::InvalidMagic);
159        }
160
161        // Check version - we support major version 2
162        let major = read_u16_at(bytes, 4)?;
163        let minor = read_u16_at(bytes, 6)?;
164        if major != 2 {
165            return Err(Error::InvalidVersion { major, minor });
166        }
167
168        // Read flags: bit 0 = compressed (handled per-section), bit 1 = root_array
169        let flags = read_u32_at(bytes, 8)?;
170        let is_root_array = (flags & 0x02) != 0;
171
172        let str_off = read_u64_at(bytes, 16)? as usize;
173        let sch_off = read_u64_at(bytes, 24)? as usize;
174        let idx_off = read_u64_at(bytes, 32)? as usize;
175        let dat_off = read_u64_at(bytes, 40)? as usize;
176        let str_cnt = read_u32_at(bytes, 48)? as usize;
177        let sch_cnt = read_u32_at(bytes, 52)? as usize;
178        let sec_cnt = read_u32_at(bytes, 56)? as usize;
179
180        // Validate region offsets are within file bounds
181        if str_off > bytes.len() || sch_off > bytes.len() || idx_off > bytes.len() || dat_off > bytes.len() {
182            return Err(Error::ParseError("header region offsets exceed file size".into()));
183        }
184
185        // Parse string table
186        let str_header_end = str_off.checked_add(8)
187            .ok_or_else(|| Error::ParseError("string table offset overflow".into()))?;
188        if str_header_end > bytes.len() {
189            return Err(Error::ParseError("string table header out of bounds".into()));
190        }
191
192        let offsets_size = str_cnt.checked_mul(4)
193            .ok_or_else(|| Error::ParseError("string count overflow".into()))?;
194        let lengths_size = str_cnt.checked_mul(4)
195            .ok_or_else(|| Error::ParseError("string count overflow".into()))?;
196        let str_table_end = str_header_end
197            .checked_add(offsets_size)
198            .and_then(|v| v.checked_add(lengths_size))
199            .ok_or_else(|| Error::ParseError("string table size overflow".into()))?;
200        if str_table_end > bytes.len() {
201            return Err(Error::ParseError("string table out of bounds".into()));
202        }
203
204        let mut off = str_off + 8;
205        let string_offsets: Vec<u32> = (0..str_cnt)
206            .map(|i| read_u32_at(bytes, off + i * 4))
207            .collect::<Result<Vec<u32>>>()?;
208        off += offsets_size;
209        let string_lengths: Vec<u32> = (0..str_cnt)
210            .map(|i| read_u32_at(bytes, off + i * 4))
211            .collect::<Result<Vec<u32>>>()?;
212        let string_data_offset = off + lengths_size;
213
214        // Read union_count from schema region header (sch_off+6..sch_off+8)
215        let union_cnt = if sch_off + 8 <= bytes.len() {
216            read_u16_at(bytes, sch_off + 6)? as usize
217        } else {
218            0
219        };
220
221        let mut reader = Self {
222            data,
223            string_offsets,
224            string_lengths,
225            string_data_offset,
226            schemas: Vec::new(),
227            schema_map: HashMap::new(),
228            unions: Vec::new(),
229            union_map: HashMap::new(),
230            sections: IndexMap::new(),
231            is_root_array,
232            cache: RefCell::new(HashMap::new()),
233        };
234
235        reader.parse_schemas(sch_off, sch_cnt)?;
236        if union_cnt > 0 {
237            reader.parse_unions(sch_off, sch_cnt, union_cnt)?;
238        }
239        reader.parse_index(idx_off, sec_cnt)?;
240
241        Ok(reader)
242    }
243
244    /// Get the underlying data as a byte slice
245    fn data(&self) -> &[u8] {
246        self.data.as_ref()
247    }
248
249    /// Get a string by index
250    pub fn get_string(&self, idx: usize) -> Result<String> {
251        if idx >= self.string_offsets.len() {
252            return Err(Error::ParseError(format!("String index {} out of bounds", idx)));
253        }
254        let start = self.string_data_offset
255            .checked_add(self.string_offsets[idx] as usize)
256            .ok_or_else(|| Error::ParseError("string data offset overflow".into()))?;
257        let len = self.string_lengths[idx] as usize;
258        let end = start.checked_add(len)
259            .ok_or_else(|| Error::ParseError("string data range overflow".into()))?;
260        if end > self.data().len() {
261            return Err(Error::ParseError(format!(
262                "string data out of bounds: {}..{} exceeds file size {}", start, end, self.data().len()
263            )));
264        }
265        String::from_utf8(self.data()[start..end].to_vec())
266            .map_err(|_| Error::InvalidUtf8)
267    }
268
269    /// Get section keys
270    pub fn keys(&self) -> Vec<&str> {
271        self.sections.keys().map(|s| s.as_str()).collect()
272    }
273
274    /// Check if the source JSON was a root-level array
275    ///
276    /// When true, the "root" key contains the array and `to_json` should
277    /// output it directly without wrapping in an object.
278    pub fn is_root_array(&self) -> bool {
279        self.is_root_array
280    }
281
282    /// Get a value by key
283    pub fn get(&self, key: &str) -> Result<Value> {
284        // Check cache first
285        if let Some(cached) = self.cache.borrow().get(key) {
286            return Ok(cached.clone());
287        }
288
289        let section = self.sections.get(key)
290            .ok_or_else(|| Error::MissingField(key.to_string()))?;
291
292        let start = section.offset as usize;
293        let end = start.checked_add(section.size as usize)
294            .ok_or_else(|| Error::ParseError("section offset overflow".into()))?;
295        if end > self.data().len() {
296            return Err(Error::ParseError(format!(
297                "section '{}' data range {}..{} exceeds file size {}",
298                key, start, end, self.data().len()
299            )));
300        }
301
302        let data = if section.compressed {
303            decompress_data(&self.data()[start..end])?
304        } else {
305            self.data()[start..end].to_vec()
306        };
307
308        let mut cursor = Cursor::new(&data);
309
310        let result = if section.is_array && section.schema_idx >= 0 {
311            self.decode_struct_array(&mut cursor, section.schema_idx as usize, 0)?
312        } else {
313            match section.tl_type {
314                TLType::Array => self.decode_array(&mut cursor, 0)?,
315                TLType::Object => self.decode_object(&mut cursor, 0)?,
316                TLType::Struct => self.decode_struct(&mut cursor, 0)?,
317                TLType::Map => self.decode_map(&mut cursor, 0)?,
318                _ => self.decode_value(&mut cursor, section.tl_type, 0)?,
319            }
320        };
321
322        self.cache.borrow_mut().insert(key.to_string(), result.clone());
323        Ok(result)
324    }
325
326    /// Clear the decompression cache to free memory
327    pub fn clear_cache(&self) {
328        self.cache.borrow_mut().clear();
329    }
330
331    fn parse_schemas(&mut self, off: usize, count: usize) -> Result<()> {
332        if count == 0 {
333            return Ok(());
334        }
335
336        let data = self.data.as_ref();
337        let o = off.checked_add(8)
338            .ok_or_else(|| Error::ParseError("schema offset overflow".into()))?;
339
340        // Validate offset table bounds
341        let offsets_size = count.checked_mul(4)
342            .ok_or_else(|| Error::ParseError("schema count overflow".into()))?;
343        let offsets_end = o.checked_add(offsets_size)
344            .ok_or_else(|| Error::ParseError("schema offsets overflow".into()))?;
345        if offsets_end > data.len() {
346            return Err(Error::ParseError("schema offset table out of bounds".into()));
347        }
348
349        let offsets: Vec<u32> = (0..count)
350            .map(|i| read_u32_at(data, o + i * 4))
351            .collect::<Result<Vec<u32>>>()?;
352        let start = offsets_end;
353
354        for i in 0..count {
355            let so = start.checked_add(offsets[i] as usize)
356                .ok_or_else(|| Error::ParseError("schema entry offset overflow".into()))?;
357
358            // Need at least 8 bytes for schema entry header
359            if so.checked_add(8).map_or(true, |end| end > data.len()) {
360                return Err(Error::ParseError(format!("schema entry {} out of bounds", i)));
361            }
362
363            let name_idx = read_u32_at(data, so)?;
364            let field_count = read_u16_at(data, so + 4)? as usize;
365
366            let name = self.get_string(name_idx as usize)?;
367            let mut schema = Schema::new(&name);
368
369            let mut fo = so + 8;
370            for fi in 0..field_count {
371                // Each field entry is 8 bytes
372                if fo.checked_add(8).map_or(true, |end| end > data.len()) {
373                    return Err(Error::ParseError(format!(
374                        "schema '{}' field {} out of bounds", name, fi
375                    )));
376                }
377
378                let fname_idx = read_u32_at(data, fo)?;
379                let ftype = data[fo + 4];
380                let fflags = data[fo + 5];
381                let fextra = read_u16_at(data, fo + 6)?;
382
383                let fname = self.get_string(fname_idx as usize)?;
384                let tl_type = TLType::try_from(ftype)?;
385
386                let base = match tl_type {
387                    TLType::Bool => "bool".to_string(),
388                    TLType::Int8 => "int8".to_string(),
389                    TLType::Int16 => "int16".to_string(),
390                    TLType::Int32 => "int".to_string(),
391                    TLType::Int64 => "int64".to_string(),
392                    TLType::UInt8 => "uint8".to_string(),
393                    TLType::UInt16 => "uint16".to_string(),
394                    TLType::UInt32 => "uint".to_string(),
395                    TLType::UInt64 => "uint64".to_string(),
396                    TLType::Float32 => "float32".to_string(),
397                    TLType::Float64 => "float".to_string(),
398                    TLType::String => "string".to_string(),
399                    TLType::Bytes => "bytes".to_string(),
400                    TLType::Timestamp => "timestamp".to_string(),
401                    TLType::Struct => {
402                        // Read struct type name from string table (0xFFFF = no type)
403                        if fextra != 0xFFFF {
404                            self.get_string(fextra as usize)?
405                        } else {
406                            "object".to_string()
407                        }
408                    }
409                    TLType::Tagged => {
410                        // Union-typed field: read union name from string table
411                        if fextra != 0xFFFF {
412                            self.get_string(fextra as usize)?
413                        } else {
414                            "tagged".to_string()
415                        }
416                    }
417                    TLType::Object => "object".to_string(),
418                    TLType::Tuple => "tuple".to_string(),
419                    TLType::Map => "map".to_string(),
420                    _ => "string".to_string(),
421                };
422
423                let mut field_type = FieldType::new(&base);
424                if fflags & 0x01 != 0 {
425                    field_type.nullable = true;
426                }
427                if fflags & 0x02 != 0 {
428                    field_type.is_array = true;
429                }
430
431                schema.fields.push(Field::new(fname, field_type));
432                fo += 8;
433            }
434
435            self.schema_map.insert(name, self.schemas.len());
436            self.schemas.push(schema);
437        }
438
439        Ok(())
440    }
441
442    fn parse_unions(&mut self, sch_off: usize, struct_count: usize, union_count: usize) -> Result<()> {
443        let data = self.data.as_ref();
444
445        // Calculate where struct offsets + struct data end
446        // Schema region layout:
447        //   [region_size: u32][struct_count: u16][union_count: u16]
448        //   [struct_offsets: u32 * struct_count]
449        //   [struct_data...]
450        //   [union_offsets: u32 * union_count]
451        //   [union_data...]
452        let struct_offsets_start = sch_off.checked_add(8)
453            .ok_or_else(|| Error::ParseError("union region offset overflow".into()))?;
454        let struct_offsets_size = struct_count.checked_mul(4)
455            .ok_or_else(|| Error::ParseError("struct count overflow".into()))?;
456        let struct_data_start = struct_offsets_start.checked_add(struct_offsets_size)
457            .ok_or_else(|| Error::ParseError("struct data start overflow".into()))?;
458        let struct_data_size: usize = self.schemas.iter()
459            .map(|s| 8 + s.fields.len() * 8)
460            .sum();
461        let union_offsets_start = struct_data_start.checked_add(struct_data_size)
462            .ok_or_else(|| Error::ParseError("union offsets start overflow".into()))?;
463
464        // Validate union offset table bounds
465        let union_offsets_size = union_count.checked_mul(4)
466            .ok_or_else(|| Error::ParseError("union count overflow".into()))?;
467        let union_offsets_end = union_offsets_start.checked_add(union_offsets_size)
468            .ok_or_else(|| Error::ParseError("union offsets end overflow".into()))?;
469        if union_offsets_end > data.len() {
470            return Err(Error::ParseError("union offset table out of bounds".into()));
471        }
472
473        // Read union offsets
474        let union_offsets: Vec<u32> = (0..union_count)
475            .map(|i| read_u32_at(data, union_offsets_start + i * 4))
476            .collect::<Result<Vec<u32>>>()?;
477        let union_data_start = union_offsets_end;
478
479        for i in 0..union_count {
480            let uo = union_data_start.checked_add(union_offsets[i] as usize)
481                .ok_or_else(|| Error::ParseError("union entry offset overflow".into()))?;
482
483            // Need at least 8 bytes for union entry header
484            if uo.checked_add(8).map_or(true, |end| end > data.len()) {
485                return Err(Error::ParseError(format!("union entry {} out of bounds", i)));
486            }
487
488            let name_idx = read_u32_at(data, uo)?;
489            let variant_count = read_u16_at(data, uo + 4)? as usize;
490            // uo + 6..uo + 8 is flags (reserved)
491
492            let name = self.get_string(name_idx as usize)?;
493            let mut union = Union::new(&name);
494
495            let mut vo = uo + 8;
496            for vi in 0..variant_count {
497                // Need at least 8 bytes for variant header
498                if vo.checked_add(8).map_or(true, |end| end > data.len()) {
499                    return Err(Error::ParseError(format!(
500                        "union '{}' variant {} out of bounds", name, vi
501                    )));
502                }
503
504                let vname_idx = read_u32_at(data, vo)?;
505                let field_count = read_u16_at(data, vo + 4)? as usize;
506                // vo + 6..vo + 8 is flags (reserved)
507
508                let vname = self.get_string(vname_idx as usize)?;
509                let mut variant = Variant::new(&vname);
510
511                let mut fo = vo + 8;
512                for fi in 0..field_count {
513                    // Each field entry is 8 bytes
514                    if fo.checked_add(8).map_or(true, |end| end > data.len()) {
515                        return Err(Error::ParseError(format!(
516                            "union '{}' variant '{}' field {} out of bounds", name, vname, fi
517                        )));
518                    }
519
520                    let fname_idx = read_u32_at(data, fo)?;
521                    let ftype = data[fo + 4];
522                    let fflags = data[fo + 5];
523                    let fextra = read_u16_at(data, fo + 6)?;
524
525                    let fname = self.get_string(fname_idx as usize)?;
526                    let tl_type = TLType::try_from(ftype)?;
527
528                    let base = match tl_type {
529                        TLType::Bool => "bool".to_string(),
530                        TLType::Int8 => "int8".to_string(),
531                        TLType::Int16 => "int16".to_string(),
532                        TLType::Int32 => "int".to_string(),
533                        TLType::Int64 => "int64".to_string(),
534                        TLType::UInt8 => "uint8".to_string(),
535                        TLType::UInt16 => "uint16".to_string(),
536                        TLType::UInt32 => "uint".to_string(),
537                        TLType::UInt64 => "uint64".to_string(),
538                        TLType::Float32 => "float32".to_string(),
539                        TLType::Float64 => "float".to_string(),
540                        TLType::String => "string".to_string(),
541                        TLType::Bytes => "bytes".to_string(),
542                        TLType::Timestamp => "timestamp".to_string(),
543                        TLType::Struct => {
544                            if fextra != 0xFFFF {
545                                self.get_string(fextra as usize)?
546                            } else {
547                                "object".to_string()
548                            }
549                        }
550                        TLType::Tagged => {
551                            if fextra != 0xFFFF {
552                                self.get_string(fextra as usize)?
553                            } else {
554                                "tagged".to_string()
555                            }
556                        }
557                        TLType::Object => "object".to_string(),
558                        TLType::Tuple => "tuple".to_string(),
559                        TLType::Map => "map".to_string(),
560                        _ => "string".to_string(),
561                    };
562
563                    let mut field_type = FieldType::new(&base);
564                    if fflags & 0x01 != 0 { field_type.nullable = true; }
565                    if fflags & 0x02 != 0 { field_type.is_array = true; }
566
567                    variant.fields.push(Field::new(fname, field_type));
568                    fo += 8;
569                }
570
571                union.variants.push(variant);
572                vo = fo;
573            }
574
575            self.union_map.insert(name, self.unions.len());
576            self.unions.push(union);
577        }
578
579        Ok(())
580    }
581
582    fn parse_index(&mut self, off: usize, count: usize) -> Result<()> {
583        let data = self.data.as_ref();
584        let mut o = off.checked_add(8)
585            .ok_or_else(|| Error::ParseError("index offset overflow".into()))?;
586
587        // Validate index table bounds
588        let index_size = count.checked_mul(32)
589            .ok_or_else(|| Error::ParseError("index count overflow".into()))?;
590        let index_end = o.checked_add(index_size)
591            .ok_or_else(|| Error::ParseError("index region overflow".into()))?;
592        if index_end > data.len() {
593            return Err(Error::ParseError("index table out of bounds".into()));
594        }
595
596        for _ in 0..count {
597            let key_idx = read_u32_at(data, o)?;
598            let offset = read_u64_at(data, o + 4)?;
599            let size = read_u32_at(data, o + 12)?;
600            let uncompressed = read_u32_at(data, o + 16)?;
601            let schema_idx = read_u16_at(data, o + 20)?;
602            let ptype = data[o + 22];
603            let flags = data[o + 23];
604            let item_count = read_u32_at(data, o + 24)?;
605
606            let key = self.get_string(key_idx as usize)?;
607
608            // Validate section data range against file bounds
609            let sec_start = offset as usize;
610            let sec_end = sec_start.checked_add(size as usize)
611                .ok_or_else(|| Error::ParseError(format!(
612                    "section '{}' offset overflow", key
613                )))?;
614            if sec_end > data.len() {
615                return Err(Error::ParseError(format!(
616                    "section '{}' data range {}..{} exceeds file size {}",
617                    key, sec_start, sec_end, data.len()
618                )));
619            }
620
621            self.sections.insert(key, SectionInfo {
622                offset,
623                size,
624                uncompressed_size: uncompressed,
625                schema_idx: if schema_idx == 0xFFFF { -1 } else { schema_idx as i32 },
626                tl_type: TLType::try_from(ptype)?,
627                compressed: flags & 0x01 != 0,
628                is_array: flags & 0x02 != 0,
629                item_count,
630            });
631            o += 32;
632        }
633
634        Ok(())
635    }
636
637    fn decode_struct_array(&self, cursor: &mut Cursor, schema_idx: usize, depth: usize) -> Result<Value> {
638        if depth > MAX_DECODE_DEPTH {
639            return Err(Error::ParseError("maximum decode nesting depth exceeded".into()));
640        }
641        let count = cursor.read_u32()?;
642        if count as usize > MAX_COLLECTION_SIZE {
643            return Err(Error::ParseError(format!(
644                "struct array element count {} exceeds limit of {}", count, MAX_COLLECTION_SIZE
645            )));
646        }
647        let _si = cursor.read_u16()?;
648        let bitmap_size = cursor.read_u16()? as usize;
649
650        if schema_idx >= self.schemas.len() {
651            return Err(Error::ParseError(format!(
652                "struct array schema index {} out of bounds ({} schemas available)",
653                schema_idx, self.schemas.len()
654            )));
655        }
656        let schema = &self.schemas[schema_idx];
657        let capacity = (count as usize).min(cursor.remaining()).min(MAX_COLLECTION_SIZE);
658        let mut result = Vec::with_capacity(capacity);
659
660        for _ in 0..count {
661            let mut bitmap = Vec::with_capacity(bitmap_size.min(cursor.remaining()));
662            for _ in 0..bitmap_size {
663                bitmap.push(cursor.read_u8()?);
664            }
665
666            // Check if all field bits are set — indicates a null array element
667            let all_null = (0..schema.fields.len())
668                .all(|i| i / 8 < bitmap.len() && (bitmap[i / 8] & (1 << (i % 8))) != 0);
669
670            if all_null {
671                result.push(Value::Null);
672            } else {
673                let mut obj = ObjectMap::new();
674                for (i, field) in schema.fields.iter().enumerate() {
675                    let is_null = i / 8 < bitmap.len() && (bitmap[i / 8] & (1 << (i % 8))) != 0;
676                    if is_null {
677                        if !field.field_type.nullable {
678                            obj.insert(field.name.clone(), Value::Null);
679                        }
680                    } else {
681                        // Resolve union types: if the base name is in union_map, decode as Tagged
682                        let tl_type = if self.union_map.contains_key(&field.field_type.base) {
683                            TLType::Tagged
684                        } else {
685                            field.field_type.to_tl_type()
686                        };
687                        obj.insert(field.name.clone(), self.decode_value(cursor, tl_type, depth + 1)?);
688                    }
689                }
690                result.push(Value::Object(obj));
691            }
692        }
693
694        Ok(Value::Array(result))
695    }
696
697    fn decode_array(&self, cursor: &mut Cursor, depth: usize) -> Result<Value> {
698        if depth > MAX_DECODE_DEPTH {
699            return Err(Error::ParseError("maximum decode nesting depth exceeded".into()));
700        }
701        let count = cursor.read_u32()?;
702        if count == 0 {
703            return Ok(Value::Array(Vec::new()));
704        }
705        if count as usize > MAX_COLLECTION_SIZE {
706            return Err(Error::ParseError(format!(
707                "array element count {} exceeds limit of {}", count, MAX_COLLECTION_SIZE
708            )));
709        }
710
711        let elem_type = cursor.read_u8()?;
712        let capacity = (count as usize).min(cursor.remaining()).min(MAX_COLLECTION_SIZE);
713        let mut result = Vec::with_capacity(capacity);
714
715        if elem_type == 0xFF {
716            for _ in 0..count {
717                let t = TLType::try_from(cursor.read_u8()?)?;
718                result.push(self.decode_value(cursor, t, depth + 1)?);
719            }
720        } else {
721            let t = TLType::try_from(elem_type)?;
722            for _ in 0..count {
723                result.push(self.decode_value(cursor, t, depth + 1)?);
724            }
725        }
726
727        Ok(Value::Array(result))
728    }
729
730    fn decode_object(&self, cursor: &mut Cursor, depth: usize) -> Result<Value> {
731        if depth > MAX_DECODE_DEPTH {
732            return Err(Error::ParseError("maximum decode nesting depth exceeded".into()));
733        }
734        let count = cursor.read_u16()?;
735        let mut obj = ObjectMap::new();
736
737        for _ in 0..count {
738            let key_idx = cursor.read_u32()?;
739            let t = TLType::try_from(cursor.read_u8()?)?;
740            let key = self.get_string(key_idx as usize)?;
741            obj.insert(key, self.decode_value(cursor, t, depth + 1)?);
742        }
743
744        Ok(Value::Object(obj))
745    }
746
747    fn decode_struct(&self, cursor: &mut Cursor, depth: usize) -> Result<Value> {
748        if depth > MAX_DECODE_DEPTH {
749            return Err(Error::ParseError("maximum decode nesting depth exceeded".into()));
750        }
751        let schema_idx = cursor.read_u16()? as usize;
752        if schema_idx >= self.schemas.len() {
753            return Err(Error::ParseError(format!(
754                "struct schema index {} out of bounds ({} schemas available)",
755                schema_idx, self.schemas.len()
756            )));
757        }
758        let schema = &self.schemas[schema_idx];
759        let bitmap_size = (schema.fields.len() + 7) / 8;
760
761        let mut bitmap = Vec::with_capacity(bitmap_size.min(cursor.remaining()));
762        for _ in 0..bitmap_size {
763            bitmap.push(cursor.read_u8()?);
764        }
765
766        let mut obj = ObjectMap::new();
767        for (i, field) in schema.fields.iter().enumerate() {
768            let is_null = i / 8 < bitmap.len() && (bitmap[i / 8] & (1 << (i % 8))) != 0;
769            if is_null {
770                // For nullable fields, omit null values to preserve absent-field semantics.
771                // For non-nullable fields with null (shouldn't happen but be safe), include it.
772                if !field.field_type.nullable {
773                    obj.insert(field.name.clone(), Value::Null);
774                }
775            } else {
776                // Resolve union types: if the base name is in union_map, decode as Tagged
777                let tl_type = if self.union_map.contains_key(&field.field_type.base) {
778                    TLType::Tagged
779                } else {
780                    field.field_type.to_tl_type()
781                };
782                obj.insert(field.name.clone(), self.decode_value(cursor, tl_type, depth + 1)?);
783            }
784        }
785
786        Ok(Value::Object(obj))
787    }
788
789    fn decode_map(&self, cursor: &mut Cursor, depth: usize) -> Result<Value> {
790        if depth > MAX_DECODE_DEPTH {
791            return Err(Error::ParseError("maximum decode nesting depth exceeded".into()));
792        }
793        let count = cursor.read_u32()?;
794        if count as usize > MAX_COLLECTION_SIZE {
795            return Err(Error::ParseError(format!(
796                "map element count {} exceeds limit of {}", count, MAX_COLLECTION_SIZE
797            )));
798        }
799        let capacity = (count as usize).min(cursor.remaining()).min(MAX_COLLECTION_SIZE);
800        let mut pairs = Vec::with_capacity(capacity);
801
802        for _ in 0..count {
803            let key_type = TLType::try_from(cursor.read_u8()?)?;
804            let key = self.decode_value(cursor, key_type, depth + 1)?;
805            // Validate map key type per spec: map keys must be string, int, or uint
806            match &key {
807                Value::String(_) | Value::Int(_) | Value::UInt(_) => {}
808                _ => return Err(Error::ParseError(
809                    format!("invalid map key type {:?}: map keys must be string, int, or uint", key.tl_type())
810                )),
811            }
812            let val_type = TLType::try_from(cursor.read_u8()?)?;
813            let val = self.decode_value(cursor, val_type, depth + 1)?;
814            pairs.push((key, val));
815        }
816
817        Ok(Value::Map(pairs))
818    }
819
820    fn decode_value(&self, cursor: &mut Cursor, tl_type: TLType, depth: usize) -> Result<Value> {
821        if depth > MAX_DECODE_DEPTH {
822            return Err(Error::ParseError("maximum decode nesting depth exceeded".into()));
823        }
824        Ok(match tl_type {
825            TLType::Null => Value::Null,
826            TLType::Bool => Value::Bool(cursor.read_u8()? != 0),
827            TLType::Int8 => Value::Int(cursor.read_i8()? as i64),
828            TLType::Int16 => Value::Int(cursor.read_i16()? as i64),
829            TLType::Int32 => Value::Int(cursor.read_i32()? as i64),
830            TLType::Int64 => Value::Int(cursor.read_i64()?),
831            TLType::UInt8 => Value::UInt(cursor.read_u8()? as u64),
832            TLType::UInt16 => Value::UInt(cursor.read_u16()? as u64),
833            TLType::UInt32 => Value::UInt(cursor.read_u32()? as u64),
834            TLType::UInt64 => Value::UInt(cursor.read_u64()?),
835            TLType::Float32 => Value::Float(cursor.read_f32()? as f64),
836            TLType::Float64 => Value::Float(cursor.read_f64()?),
837            TLType::String => {
838                let idx = cursor.read_u32()?;
839                Value::String(self.get_string(idx as usize)?)
840            }
841            TLType::Bytes => {
842                let len = cursor.read_varint()? as usize;
843                Value::Bytes(cursor.read_bytes(len)?)
844            }
845            TLType::Array => self.decode_array(cursor, depth)?,
846            TLType::Object => self.decode_object(cursor, depth)?,
847            TLType::Struct => self.decode_struct(cursor, depth)?,
848            TLType::Ref => {
849                let idx = cursor.read_u32()?;
850                Value::Ref(self.get_string(idx as usize)?)
851            }
852            TLType::Tagged => {
853                let tag_idx = cursor.read_u32()?;
854                let inner_type = TLType::try_from(cursor.read_u8()?)?;
855                let tag = self.get_string(tag_idx as usize)?;
856                let inner = self.decode_value(cursor, inner_type, depth + 1)?;
857                Value::Tagged(tag, Box::new(inner))
858            }
859            TLType::Map => self.decode_map(cursor, depth)?,
860            TLType::Timestamp => {
861                let ts = cursor.read_i64()?;
862                let tz = cursor.read_i16()?;
863                Value::Timestamp(ts, tz)
864            }
865            TLType::JsonNumber => {
866                let idx = cursor.read_u32()?;
867                Value::JsonNumber(self.get_string(idx as usize)?)
868            }
869            TLType::Tuple => {
870                // Tuple is decoded as an array
871                self.decode_array(cursor, depth)?
872            }
873        })
874    }
875}
876
877// Simple cursor for reading binary data with bounds checking
878struct Cursor<'a> {
879    data: &'a [u8],
880    pos: usize,
881}
882
883impl<'a> Cursor<'a> {
884    fn new(data: &'a [u8]) -> Self {
885        Self { data, pos: 0 }
886    }
887
888    fn remaining(&self) -> usize {
889        self.data.len().saturating_sub(self.pos)
890    }
891
892    fn check_bounds(&self, len: usize) -> Result<()> {
893        let end = self.pos.checked_add(len)
894            .ok_or_else(|| Error::ParseError("cursor position overflow".into()))?;
895        if end > self.data.len() {
896            return Err(Error::ParseError(format!(
897                "read out of bounds: pos={} len={} data_len={}", self.pos, len, self.data.len()
898            )));
899        }
900        Ok(())
901    }
902
903    fn read_u8(&mut self) -> Result<u8> {
904        self.check_bounds(1)?;
905        let v = self.data[self.pos];
906        self.pos += 1;
907        Ok(v)
908    }
909
910    fn read_i8(&mut self) -> Result<i8> {
911        Ok(self.read_u8()? as i8)
912    }
913
914    fn read_u16(&mut self) -> Result<u16> {
915        self.check_bounds(2)?;
916        let end = self.pos + 2; // safe: check_bounds verified this won't exceed data.len()
917        let v = u16::from_le_bytes(self.data[self.pos..end].try_into().map_err(|_|
918            Error::ParseError("u16 cursor conversion failed".into())
919        )?);
920        self.pos = end;
921        Ok(v)
922    }
923
924    fn read_i16(&mut self) -> Result<i16> {
925        Ok(self.read_u16()? as i16)
926    }
927
928    fn read_u32(&mut self) -> Result<u32> {
929        self.check_bounds(4)?;
930        let end = self.pos + 4;
931        let v = u32::from_le_bytes(self.data[self.pos..end].try_into().map_err(|_|
932            Error::ParseError("u32 cursor conversion failed".into())
933        )?);
934        self.pos = end;
935        Ok(v)
936    }
937
938    fn read_i32(&mut self) -> Result<i32> {
939        Ok(self.read_u32()? as i32)
940    }
941
942    fn read_u64(&mut self) -> Result<u64> {
943        self.check_bounds(8)?;
944        let end = self.pos + 8;
945        let v = u64::from_le_bytes(self.data[self.pos..end].try_into().map_err(|_|
946            Error::ParseError("u64 cursor conversion failed".into())
947        )?);
948        self.pos = end;
949        Ok(v)
950    }
951
952    fn read_i64(&mut self) -> Result<i64> {
953        Ok(self.read_u64()? as i64)
954    }
955
956    fn read_f32(&mut self) -> Result<f32> {
957        self.check_bounds(4)?;
958        let end = self.pos + 4;
959        let v = f32::from_le_bytes(self.data[self.pos..end].try_into().map_err(|_|
960            Error::ParseError("f32 cursor conversion failed".into())
961        )?);
962        self.pos = end;
963        Ok(v)
964    }
965
966    fn read_f64(&mut self) -> Result<f64> {
967        self.check_bounds(8)?;
968        let end = self.pos + 8;
969        let v = f64::from_le_bytes(self.data[self.pos..end].try_into().map_err(|_|
970            Error::ParseError("f64 cursor conversion failed".into())
971        )?);
972        self.pos = end;
973        Ok(v)
974    }
975
976    fn read_varint(&mut self) -> Result<u64> {
977        let mut result: u64 = 0;
978        let mut shift = 0;
979        for _ in 0..MAX_VARINT_BYTES {
980            let b = self.read_u8()?;
981            result |= ((b & 0x7F) as u64) << shift;
982            if b & 0x80 == 0 {
983                return Ok(result);
984            }
985            shift += 7;
986        }
987        Err(Error::ParseError("varint exceeds maximum length".into()))
988    }
989
990    fn read_bytes(&mut self, len: usize) -> Result<Vec<u8>> {
991        self.check_bounds(len)?;
992        let end = self.pos.checked_add(len)
993            .ok_or_else(|| Error::ParseError("read_bytes offset overflow".into()))?;
994        let v = self.data[self.pos..end].to_vec();
995        self.pos = end;
996        Ok(v)
997    }
998}
999
1000fn decompress_data(data: &[u8]) -> Result<Vec<u8>> {
1001    use flate2::read::ZlibDecoder;
1002
1003    let decoder = ZlibDecoder::new(data);
1004    let mut limited = decoder.take((MAX_DECOMPRESSED_SIZE as u64) + 1);
1005    let mut result = Vec::new();
1006    limited.read_to_end(&mut result)
1007        .map_err(|_| Error::ParseError("Decompression failed".to_string()))?;
1008    if result.len() > MAX_DECOMPRESSED_SIZE {
1009        return Err(Error::ParseError(format!(
1010            "Decompressed data exceeds maximum size of {} bytes", MAX_DECOMPRESSED_SIZE
1011        )));
1012    }
1013    Ok(result)
1014}
1015
1016#[cfg(test)]
1017mod tests {
1018    use super::*;
1019    use crate::writer::Writer;
1020
1021    #[test]
1022    fn test_open_mmap() {
1023        // Write a binary file first, then open with mmap
1024        let dir = std::env::temp_dir();
1025        let path = dir.join("test_reader_mmap.tlbx");
1026
1027        let mut w = Writer::new();
1028        w.add_section("val", &Value::Int(42), None).unwrap();
1029        w.write(&path, false).unwrap();
1030
1031        let r = Reader::open_mmap(&path).unwrap();
1032        assert_eq!(r.get("val").unwrap().as_int(), Some(42));
1033        std::fs::remove_file(&path).ok();
1034    }
1035
1036    #[test]
1037    fn test_open_regular() {
1038        let dir = std::env::temp_dir();
1039        let path = dir.join("test_reader_open.tlbx");
1040
1041        let mut w = Writer::new();
1042        w.add_section("greeting", &Value::String("hi".into()), None).unwrap();
1043        w.write(&path, false).unwrap();
1044
1045        let r = Reader::open(&path).unwrap();
1046        assert_eq!(r.get("greeting").unwrap().as_str(), Some("hi"));
1047        std::fs::remove_file(&path).ok();
1048    }
1049
1050    #[test]
1051    fn test_invalid_magic() {
1052        let result = Reader::from_bytes(vec![0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]);
1053        assert!(result.is_err());
1054    }
1055
1056    #[test]
1057    fn test_too_short_data() {
1058        let result = Reader::from_bytes(vec![0; 10]);
1059        assert!(result.is_err());
1060    }
1061
1062    #[test]
1063    fn test_wrong_version() {
1064        let mut data = vec![0u8; 64];
1065        // Set correct magic bytes "TLFX"
1066        data[0] = b'T'; data[1] = b'L'; data[2] = b'F'; data[3] = b'X';
1067        // Set wrong major version (3)
1068        data[4] = 3; data[5] = 0;
1069        let result = Reader::from_bytes(data);
1070        assert!(result.is_err());
1071    }
1072
1073    #[test]
1074    fn test_string_index_out_of_bounds() {
1075        let dir = std::env::temp_dir();
1076        let path = dir.join("test_str_oob.tlbx");
1077
1078        let mut w = Writer::new();
1079        w.add_section("x", &Value::Int(1), None).unwrap();
1080        w.write(&path, false).unwrap();
1081
1082        let r = Reader::from_bytes(std::fs::read(&path).unwrap()).unwrap();
1083        let result = r.get_string(9999);
1084        assert!(result.is_err());
1085        std::fs::remove_file(&path).ok();
1086    }
1087
1088    #[test]
1089    fn test_keys() {
1090        let dir = std::env::temp_dir();
1091        let path = dir.join("test_reader_keys.tlbx");
1092
1093        let mut w = Writer::new();
1094        w.add_section("alpha", &Value::Int(1), None).unwrap();
1095        w.add_section("beta", &Value::Int(2), None).unwrap();
1096        w.write(&path, false).unwrap();
1097
1098        let r = Reader::from_bytes(std::fs::read(&path).unwrap()).unwrap();
1099        let keys = r.keys();
1100        assert!(keys.contains(&"alpha"));
1101        assert!(keys.contains(&"beta"));
1102        std::fs::remove_file(&path).ok();
1103    }
1104
1105    #[test]
1106    fn test_missing_key() {
1107        let dir = std::env::temp_dir();
1108        let path = dir.join("test_reader_missing.tlbx");
1109
1110        let mut w = Writer::new();
1111        w.add_section("exists", &Value::Int(1), None).unwrap();
1112        w.write(&path, false).unwrap();
1113
1114        let r = Reader::from_bytes(std::fs::read(&path).unwrap()).unwrap();
1115        assert!(r.get("nonexistent").is_err());
1116        std::fs::remove_file(&path).ok();
1117    }
1118
1119    #[test]
1120    fn test_struct_section_roundtrip() {
1121        let dir = std::env::temp_dir();
1122        let path = dir.join("test_struct_section.tlbx");
1123
1124        let mut schema = Schema::new("Point");
1125        schema.add_field("x", FieldType::new("int"));
1126        schema.add_field("y", FieldType::new("int"));
1127
1128        let mut w = Writer::new();
1129        w.add_schema(schema.clone());
1130
1131        let mut obj1 = ObjectMap::new();
1132        obj1.insert("x".to_string(), Value::Int(10));
1133        obj1.insert("y".to_string(), Value::Int(20));
1134
1135        let mut obj2 = ObjectMap::new();
1136        obj2.insert("x".to_string(), Value::Int(30));
1137        obj2.insert("y".to_string(), Value::Null);
1138
1139        let arr = Value::Array(vec![Value::Object(obj1), Value::Object(obj2)]);
1140        w.add_section("points", &arr, Some(&schema)).unwrap();
1141        w.write(&path, false).unwrap();
1142
1143        let r = Reader::from_bytes(std::fs::read(&path).unwrap()).unwrap();
1144        assert!(!r.schemas.is_empty());
1145
1146        let points = r.get("points").unwrap();
1147        let items = points.as_array().unwrap();
1148        assert_eq!(items.len(), 2);
1149        let p1 = items[0].as_object().unwrap();
1150        assert_eq!(p1.get("x").unwrap().as_int(), Some(10));
1151        let p2 = items[1].as_object().unwrap();
1152        assert!(p2.get("y").unwrap().is_null());
1153        std::fs::remove_file(&path).ok();
1154    }
1155
1156    #[test]
1157    fn test_heterogeneous_array() {
1158        // Mixed-type array uses 0xFF element type marker
1159        let dir = std::env::temp_dir();
1160        let path = dir.join("test_hetero_arr.tlbx");
1161
1162        let arr = Value::Array(vec![
1163            Value::Int(1),
1164            Value::String("hello".into()),
1165            Value::Bool(true),
1166        ]);
1167
1168        let mut w = Writer::new();
1169        w.add_section("mixed", &arr, None).unwrap();
1170        w.write(&path, false).unwrap();
1171
1172        let r = Reader::from_bytes(std::fs::read(&path).unwrap()).unwrap();
1173        let val = r.get("mixed").unwrap();
1174        let items = val.as_array().unwrap();
1175        assert_eq!(items.len(), 3);
1176        assert_eq!(items[0].as_int(), Some(1));
1177        assert_eq!(items[1].as_str(), Some("hello"));
1178        assert_eq!(items[2].as_bool(), Some(true));
1179        std::fs::remove_file(&path).ok();
1180    }
1181
1182    #[test]
1183    fn test_empty_array() {
1184        let dir = std::env::temp_dir();
1185        let path = dir.join("test_empty_arr.tlbx");
1186
1187        let mut w = Writer::new();
1188        w.add_section("empty", &Value::Array(vec![]), None).unwrap();
1189        w.write(&path, false).unwrap();
1190
1191        let r = Reader::from_bytes(std::fs::read(&path).unwrap()).unwrap();
1192        let val = r.get("empty").unwrap();
1193        let items = val.as_array().unwrap();
1194        assert_eq!(items.len(), 0);
1195        std::fs::remove_file(&path).ok();
1196    }
1197
1198    #[test]
1199    fn test_truncated_section_data() {
1200        // Craft a file where section offset points past end of file
1201        let dir = std::env::temp_dir();
1202        let path = dir.join("test_truncated.tlbx");
1203
1204        let mut w = Writer::new();
1205        w.add_section("val", &Value::Int(42), None).unwrap();
1206        w.write(&path, false).unwrap();
1207
1208        // Read and truncate the data
1209        let mut data = std::fs::read(&path).unwrap();
1210        data.truncate(data.len() - 1); // Remove last byte
1211        // The reader should either error during parsing or during get()
1212        // (depends on what the last byte was), but should not panic
1213        let result = Reader::from_bytes(data);
1214        if let Ok(r) = result {
1215            // If it parsed headers OK, get should fail gracefully
1216            let _ = r.get("val"); // Should not panic
1217        }
1218        std::fs::remove_file(&path).ok();
1219    }
1220
1221    #[test]
1222    fn test_cursor_bounds_checking() {
1223        // Ensure cursor read methods return errors on out-of-bounds
1224        let data = vec![1u8, 2];
1225        let mut cursor = Cursor::new(&data);
1226        assert!(cursor.read_u8().is_ok());
1227        assert!(cursor.read_u8().is_ok());
1228        assert!(cursor.read_u8().is_err()); // Past end
1229
1230        let mut cursor2 = Cursor::new(&data);
1231        assert!(cursor2.read_u32().is_err()); // Only 2 bytes available
1232
1233        let empty: Vec<u8> = vec![];
1234        let mut cursor3 = Cursor::new(&empty);
1235        assert!(cursor3.read_u8().is_err());
1236        assert!(cursor3.read_varint().is_err());
1237    }
1238
1239    #[test]
1240    fn test_varint_too_long() {
1241        // All continuation bytes (0x80) - should error after MAX_VARINT_BYTES
1242        let data = vec![0x80u8; 20];
1243        let mut cursor = Cursor::new(&data);
1244        assert!(cursor.read_varint().is_err());
1245    }
1246
1247    // =========================================================================
1248    // Issue 10: Decompression cache
1249    // =========================================================================
1250
1251    #[test]
1252    fn test_cache_returns_same_value() {
1253        use crate::Writer;
1254        let dir = std::env::temp_dir();
1255        let path = dir.join("test_cache_hit.tlbx");
1256
1257        let mut w = Writer::new();
1258        w.add_section("greeting", &crate::Value::String("hello".into()), None).unwrap();
1259        w.add_section("number", &crate::Value::Int(42), None).unwrap();
1260        w.write(&path, true).unwrap(); // compressed
1261
1262        let r = Reader::from_bytes(std::fs::read(&path).unwrap()).unwrap();
1263
1264        // First call populates cache
1265        let v1 = r.get("greeting").unwrap();
1266        // Second call should return cached value
1267        let v2 = r.get("greeting").unwrap();
1268        assert_eq!(v1, v2);
1269        assert_eq!(v1.as_str(), Some("hello"));
1270
1271        // Verify cache has entry
1272        assert!(r.cache.borrow().contains_key("greeting"));
1273        assert!(!r.cache.borrow().contains_key("number")); // not yet accessed
1274
1275        // Access number
1276        let num = r.get("number").unwrap();
1277        assert_eq!(num.as_int(), Some(42));
1278        assert!(r.cache.borrow().contains_key("number"));
1279
1280        std::fs::remove_file(&path).ok();
1281    }
1282
1283    #[test]
1284    fn test_fuzz_crash_crafted_tlbx_2_no_panic() {
1285        // Regression: fuzz_reader crash-5a6d5f6582c97f5bdc177383430d35c687c640fb
1286        let data: Vec<u8> = vec![
1287            0x54, 0x4C, 0x42, 0x58, 0x02, 0x00, 0x0E, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1288            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3A, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1289            0x3A, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1290            0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x24, 0x00,
1291            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1292            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1293            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1294            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1295            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1296            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x14, 0x00,
1297            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1298            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1299            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1300            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1301            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1302            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x14, 0x00,
1303            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1304            0x00, 0x00, 0x00, 0xC0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1305            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00,
1306            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x58, 0x00, 0x00, 0x00, 0x0A, 0x00, 0x00,
1307            0x00, 0x00, 0x00, 0x0A, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
1308        ];
1309        let result = Reader::from_bytes(data);
1310        if let Ok(r) = result {
1311            for key in r.keys() {
1312                let _ = r.get(key); // Must not panic
1313            }
1314        }
1315    }
1316
1317    #[test]
1318    fn test_fuzz_oom_crafted_tlbx_no_panic() {
1319        // Regression: fuzz_reader oom-1aecaf7b31c65092524b8d5ea7e9b979f06da315
1320        // Crafted TLBX that triggers large allocations via inflated count fields.
1321        // Must return Err (or Ok with bounded memory), not OOM.
1322        let data: Vec<u8> = vec![
1323            0x54, 0x4C, 0x42, 0x58, 0x02, 0x00, 0x0E, 0x03, 0x00, 0x00, 0x00, 0x00, 0x0E, 0x00, 0x00, 0x00,
1324            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3A, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1325            0x3A, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1326            0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, 0x02, 0x24, 0x00,
1327            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1328            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1329            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1330            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1331            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1332            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1333            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1334            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1335            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2D, 0x00, 0x00, 0x00,
1336            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x42, 0x4C,
1337            0x54, 0x26, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x23,
1338            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1339            0x67, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFE, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00,
1340            0x00, 0x00, 0x00, 0x00, 0xFE, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00,
1341            0x00, 0x30, 0x30, 0x30, 0x30, 0x30, 0x20, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30,
1342            0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x00, 0x00, 0x00,
1343            0x00,
1344        ];
1345        let result = Reader::from_bytes(data);
1346        if let Ok(r) = result {
1347            for key in r.keys() {
1348                let _ = r.get(key); // Must not panic or OOM
1349            }
1350        }
1351    }
1352
1353    #[test]
1354    fn test_fuzz_oom_null_array_no_oom() {
1355        // Regression: fuzz_reader oom-691da8e2c0990bf219571aef468e5fce6e23cabc
1356        // 335-byte file with count=973,078,528 and elem_type=Null (0x00).
1357        // Null elements consume 0 cursor bytes, so the loop never fails on its own.
1358        // Without MAX_COLLECTION_SIZE check, this allocates ~31 GB → OOM.
1359        let data: Vec<u8> = vec![
1360            0x54, 0x4C, 0x42, 0x58, 0x02, 0x00, 0x0E, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1361            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3A, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1362            0x3A, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1363            0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x24, 0x00,
1364            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1365            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1366            // ... padding (all zeros for entries 1-6) ...
1367        ];
1368        // Pad to 335 bytes (same structure as the crash artifact)
1369        let mut padded = data.clone();
1370        padded.resize(290, 0x00);
1371        // Entry 7 at offset 290: key_idx=0, offset=29, size=32, ptype=0x20 (Array)
1372        padded.extend_from_slice(&[
1373            0x00, 0x00, 0x00, 0x00,             // key_idx = 0
1374            0x1D, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // offset = 29
1375            0x20, 0x00, 0x00, 0x00,             // size = 32
1376            0x00, 0x00, 0x00, 0x00,             // uncompressed = 0
1377            0x00, 0x00,                         // schema_idx = 0
1378            0x20,                               // ptype = Array
1379            0x00,                               // flags = 0
1380            0x00, 0x58, 0x00, 0x00,             // item_count
1381            0x00, 0x00, 0x00, 0x00,             // padding
1382        ]);
1383        padded.resize(335, 0x00);
1384
1385        let result = Reader::from_bytes(padded);
1386        if let Ok(r) = result {
1387            for key in r.keys() {
1388                let _ = r.get(key); // Must not OOM
1389            }
1390        }
1391    }
1392
1393    #[test]
1394    fn test_fuzz_crash_crafted_tlbx_no_panic() {
1395        // Regression: fuzz_reader crash-b239a207c06b3584ad33adddeac470e0fa792cb9
1396        // Crafted TLBX with valid magic but manipulated section pointers.
1397        // Must return Err, not panic.
1398        let data: Vec<u8> = vec![
1399            0x54, 0x4C, 0x42, 0x58, 0x02, 0x00, 0x0E, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1400            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3A, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1401            0x3A, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1402            0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x24, 0x00,
1403            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1404            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1405            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1406            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x24, 0x00,
1407            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1408            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1409            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1410            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1411            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1412            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x54,
1413            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1414            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1415            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xC0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1416            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x24, 0x00,
1417            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x75, 0x00,
1418            0x00, 0x00, 0x00, 0x00, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
1419            0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
1420            0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00, 0x00,
1421            0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1422            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1423        ];
1424        let result = Reader::from_bytes(data);
1425        if let Ok(r) = result {
1426            for key in r.keys() {
1427                let _ = r.get(key); // Must not panic
1428            }
1429        }
1430    }
1431
1432    #[test]
1433    fn test_clear_cache() {
1434        use crate::Writer;
1435        let dir = std::env::temp_dir();
1436        let path = dir.join("test_cache_clear.tlbx");
1437
1438        let mut w = Writer::new();
1439        w.add_section("val", &crate::Value::Int(99), None).unwrap();
1440        w.write(&path, false).unwrap();
1441
1442        let r = Reader::from_bytes(std::fs::read(&path).unwrap()).unwrap();
1443        let _ = r.get("val").unwrap();
1444        assert_eq!(r.cache.borrow().len(), 1);
1445
1446        r.clear_cache();
1447        assert_eq!(r.cache.borrow().len(), 0);
1448
1449        // Re-access still works
1450        let v = r.get("val").unwrap();
1451        assert_eq!(v.as_int(), Some(99));
1452
1453        std::fs::remove_file(&path).ok();
1454    }
1455}
tealeaf/reader.rs

tealeaf/
reader.rs