tealeaf/
reader.rs

1//! Binary format reader for TeaLeaf
2//!
3//! Supports two modes:
4//! - `open()` - Reads file into memory (Vec<u8>)
5//! - `open_mmap()` - Memory-maps file for zero-copy access
6
7use std::cell::RefCell;
8use std::collections::HashMap;
9use std::fs::File;
10use std::io::Read;
11use std::path::Path;
12use std::sync::Arc;
13use indexmap::IndexMap;
14use crate::types::ObjectMap;
15
16use memmap2::Mmap;
17
18use crate::{Error, Result, Value, Schema, Union, Variant, Field, FieldType, TLType, MAGIC, HEADER_SIZE};
19
20/// Maximum allowed decompressed data size (256 MB)
21const MAX_DECOMPRESSED_SIZE: usize = 256 * 1024 * 1024;
22
23/// Maximum varint encoding length in bytes (ceil(64/7) = 10)
24const MAX_VARINT_BYTES: usize = 10;
25
26/// Maximum recursion depth for nested decode calls (arrays, objects, maps, tagged values).
27/// Set above the text parser's tested 200-level nesting to ensure binary round-trip parity.
28const MAX_DECODE_DEPTH: usize = 256;
29
30/// Maximum number of elements allowed in a single decoded collection (array, map, struct array).
31/// Also used to cap Vec::with_capacity during decode. Prevents OOM from crafted count values
32/// in small files (e.g. a 335-byte file claiming 973M Null elements).
33const MAX_COLLECTION_SIZE: usize = 1024 * 1024;
34
35/// Read a u16 from data at the given offset, with bounds checking
36fn read_u16_at(data: &[u8], offset: usize) -> Result<u16> {
37    let end = offset.checked_add(2)
38        .ok_or_else(|| Error::ParseError("offset overflow".into()))?;
39    if end > data.len() {
40        return Err(Error::ParseError(format!(
41            "read u16 out of bounds at offset {} (data len {})", offset, data.len()
42        )));
43    }
44    Ok(u16::from_le_bytes(data[offset..end].try_into().map_err(|_|
45        Error::ParseError(format!("u16 slice conversion failed at offset {}", offset))
46    )?))
47}
48
49/// Read a u32 from data at the given offset, with bounds checking
50fn read_u32_at(data: &[u8], offset: usize) -> Result<u32> {
51    let end = offset.checked_add(4)
52        .ok_or_else(|| Error::ParseError("offset overflow".into()))?;
53    if end > data.len() {
54        return Err(Error::ParseError(format!(
55            "read u32 out of bounds at offset {} (data len {})", offset, data.len()
56        )));
57    }
58    Ok(u32::from_le_bytes(data[offset..end].try_into().map_err(|_|
59        Error::ParseError(format!("u32 slice conversion failed at offset {}", offset))
60    )?))
61}
62
63/// Read a u64 from data at the given offset, with bounds checking
64fn read_u64_at(data: &[u8], offset: usize) -> Result<u64> {
65    let end = offset.checked_add(8)
66        .ok_or_else(|| Error::ParseError("offset overflow".into()))?;
67    if end > data.len() {
68        return Err(Error::ParseError(format!(
69            "read u64 out of bounds at offset {} (data len {})", offset, data.len()
70        )));
71    }
72    Ok(u64::from_le_bytes(data[offset..end].try_into().map_err(|_|
73        Error::ParseError(format!("u64 slice conversion failed at offset {}", offset))
74    )?))
75}
76
77/// Storage backend for reader data
78enum DataSource {
79    /// Owned bytes (from file read)
80    Owned(Vec<u8>),
81    /// Memory-mapped file (zero-copy)
82    Mapped(Arc<Mmap>),
83}
84
85impl AsRef<[u8]> for DataSource {
86    fn as_ref(&self) -> &[u8] {
87        match self {
88            DataSource::Owned(v) => v.as_slice(),
89            DataSource::Mapped(m) => m.as_ref(),
90        }
91    }
92}
93
94/// Binary format reader with mmap support for zero-copy access
95pub struct Reader {
96    data: DataSource,
97    string_offsets: Vec<u32>,
98    string_lengths: Vec<u32>,
99    string_data_offset: usize,
100    pub schemas: Vec<Schema>,
101    schema_map: HashMap<String, usize>,
102    pub unions: Vec<Union>,
103    union_map: HashMap<String, usize>,
104    sections: IndexMap<String, SectionInfo>,
105    /// Indicates the source JSON was a root-level array (for round-trip fidelity)
106    is_root_array: bool,
107    /// Cache for decompressed and decoded values
108    cache: RefCell<HashMap<String, Value>>,
109}
110
111#[allow(dead_code)]
112struct SectionInfo {
113    offset: u64,
114    size: u32,
115    uncompressed_size: u32,
116    schema_idx: i32,
117    tl_type: TLType,
118    compressed: bool,
119    is_array: bool,
120    item_count: u32,
121}
122
123impl Reader {
124    /// Open a binary TeaLeaf file (reads into memory)
125    pub fn open<P: AsRef<Path>>(path: P) -> Result<Self> {
126        let mut file = File::open(path)?;
127        let mut data = Vec::new();
128        file.read_to_end(&mut data)?;
129        Self::from_bytes(data)
130    }
131
132    /// Open a binary TeaLeaf file with memory mapping (zero-copy)
133    ///
134    /// This is faster for large files as the OS handles paging.
135    /// The file must not be modified while the reader is open.
136    ///
137    /// # Safety
138    /// The underlying file must not be modified while the reader exists.
139    pub fn open_mmap<P: AsRef<Path>>(path: P) -> Result<Self> {
140        let file = File::open(path)?;
141        let mmap = unsafe { Mmap::map(&file)? };
142        Self::from_data_source(DataSource::Mapped(Arc::new(mmap)))
143    }
144
145    /// Create reader from owned bytes
146    pub fn from_bytes(data: Vec<u8>) -> Result<Self> {
147        Self::from_data_source(DataSource::Owned(data))
148    }
149
150    /// Create reader from data source (internal)
151    fn from_data_source(data: DataSource) -> Result<Self> {
152        let bytes = data.as_ref();
153
154        if bytes.len() < HEADER_SIZE {
155            return Err(Error::InvalidMagic);
156        }
157        if &bytes[0..4] != MAGIC {
158            return Err(Error::InvalidMagic);
159        }
160
161        // Check version - we support major version 2
162        let major = read_u16_at(bytes, 4)?;
163        let minor = read_u16_at(bytes, 6)?;
164        if major != 2 {
165            return Err(Error::InvalidVersion { major, minor });
166        }
167
168        // Read flags: bit 0 = compressed (handled per-section), bit 1 = root_array
169        let flags = read_u32_at(bytes, 8)?;
170        let is_root_array = (flags & 0x02) != 0;
171
172        let str_off = read_u64_at(bytes, 16)? as usize;
173        let sch_off = read_u64_at(bytes, 24)? as usize;
174        let idx_off = read_u64_at(bytes, 32)? as usize;
175        let dat_off = read_u64_at(bytes, 40)? as usize;
176        let str_cnt = read_u32_at(bytes, 48)? as usize;
177        let sch_cnt = read_u32_at(bytes, 52)? as usize;
178        let sec_cnt = read_u32_at(bytes, 56)? as usize;
179
180        // Validate region offsets are within file bounds
181        if str_off > bytes.len() || sch_off > bytes.len() || idx_off > bytes.len() || dat_off > bytes.len() {
182            return Err(Error::ParseError("header region offsets exceed file size".into()));
183        }
184
185        // Parse string table
186        let str_header_end = str_off.checked_add(8)
187            .ok_or_else(|| Error::ParseError("string table offset overflow".into()))?;
188        if str_header_end > bytes.len() {
189            return Err(Error::ParseError("string table header out of bounds".into()));
190        }
191
192        let offsets_size = str_cnt.checked_mul(4)
193            .ok_or_else(|| Error::ParseError("string count overflow".into()))?;
194        let lengths_size = str_cnt.checked_mul(4)
195            .ok_or_else(|| Error::ParseError("string count overflow".into()))?;
196        let str_table_end = str_header_end
197            .checked_add(offsets_size)
198            .and_then(|v| v.checked_add(lengths_size))
199            .ok_or_else(|| Error::ParseError("string table size overflow".into()))?;
200        if str_table_end > bytes.len() {
201            return Err(Error::ParseError("string table out of bounds".into()));
202        }
203
204        let mut off = str_off + 8;
205        let string_offsets: Vec<u32> = (0..str_cnt)
206            .map(|i| read_u32_at(bytes, off + i * 4))
207            .collect::<Result<Vec<u32>>>()?;
208        off += offsets_size;
209        let string_lengths: Vec<u32> = (0..str_cnt)
210            .map(|i| read_u32_at(bytes, off + i * 4))
211            .collect::<Result<Vec<u32>>>()?;
212        let string_data_offset = off + lengths_size;
213
214        // Read union_count from schema region header (sch_off+6..sch_off+8)
215        let union_cnt = if sch_off + 8 <= bytes.len() {
216            read_u16_at(bytes, sch_off + 6)? as usize
217        } else {
218            0
219        };
220
221        let mut reader = Self {
222            data,
223            string_offsets,
224            string_lengths,
225            string_data_offset,
226            schemas: Vec::new(),
227            schema_map: HashMap::new(),
228            unions: Vec::new(),
229            union_map: HashMap::new(),
230            sections: IndexMap::new(),
231            is_root_array,
232            cache: RefCell::new(HashMap::new()),
233        };
234
235        reader.parse_schemas(sch_off, sch_cnt)?;
236        if union_cnt > 0 {
237            reader.parse_unions(sch_off, sch_cnt, union_cnt)?;
238        }
239        reader.parse_index(idx_off, sec_cnt)?;
240
241        Ok(reader)
242    }
243
244    /// Get the underlying data as a byte slice
245    fn data(&self) -> &[u8] {
246        self.data.as_ref()
247    }
248
249    /// Get a string by index
250    pub fn get_string(&self, idx: usize) -> Result<String> {
251        if idx >= self.string_offsets.len() {
252            return Err(Error::ParseError(format!("String index {} out of bounds", idx)));
253        }
254        let start = self.string_data_offset
255            .checked_add(self.string_offsets[idx] as usize)
256            .ok_or_else(|| Error::ParseError("string data offset overflow".into()))?;
257        let len = self.string_lengths[idx] as usize;
258        let end = start.checked_add(len)
259            .ok_or_else(|| Error::ParseError("string data range overflow".into()))?;
260        if end > self.data().len() {
261            return Err(Error::ParseError(format!(
262                "string data out of bounds: {}..{} exceeds file size {}", start, end, self.data().len()
263            )));
264        }
265        String::from_utf8(self.data()[start..end].to_vec())
266            .map_err(|_| Error::InvalidUtf8)
267    }
268
269    /// Get section keys
270    pub fn keys(&self) -> Vec<&str> {
271        self.sections.keys().map(|s| s.as_str()).collect()
272    }
273
274    /// Check if the source JSON was a root-level array
275    ///
276    /// When true, the "root" key contains the array and `to_json` should
277    /// output it directly without wrapping in an object.
278    pub fn is_root_array(&self) -> bool {
279        self.is_root_array
280    }
281
282    /// Get a value by key
283    pub fn get(&self, key: &str) -> Result<Value> {
284        // Check cache first
285        if let Some(cached) = self.cache.borrow().get(key) {
286            return Ok(cached.clone());
287        }
288
289        let section = self.sections.get(key)
290            .ok_or_else(|| Error::MissingField(key.to_string()))?;
291
292        let start = section.offset as usize;
293        let end = start.checked_add(section.size as usize)
294            .ok_or_else(|| Error::ParseError("section offset overflow".into()))?;
295        if end > self.data().len() {
296            return Err(Error::ParseError(format!(
297                "section '{}' data range {}..{} exceeds file size {}",
298                key, start, end, self.data().len()
299            )));
300        }
301
302        let data = if section.compressed {
303            decompress_data(&self.data()[start..end])?
304        } else {
305            self.data()[start..end].to_vec()
306        };
307
308        let mut cursor = Cursor::new(&data);
309
310        let result = if section.is_array && section.schema_idx >= 0 {
311            self.decode_struct_array(&mut cursor, section.schema_idx as usize, 0)?
312        } else {
313            match section.tl_type {
314                TLType::Array => self.decode_array(&mut cursor, 0)?,
315                TLType::Object => self.decode_object(&mut cursor, 0)?,
316                TLType::Struct => self.decode_struct(&mut cursor, 0)?,
317                TLType::Map => self.decode_map(&mut cursor, 0)?,
318                _ => self.decode_value(&mut cursor, section.tl_type, 0)?,
319            }
320        };
321
322        self.cache.borrow_mut().insert(key.to_string(), result.clone());
323        Ok(result)
324    }
325
326    /// Clear the decompression cache to free memory
327    pub fn clear_cache(&self) {
328        self.cache.borrow_mut().clear();
329    }
330
331    fn parse_schemas(&mut self, off: usize, count: usize) -> Result<()> {
332        if count == 0 {
333            return Ok(());
334        }
335
336        let data = self.data.as_ref();
337        let o = off.checked_add(8)
338            .ok_or_else(|| Error::ParseError("schema offset overflow".into()))?;
339
340        // Validate offset table bounds
341        let offsets_size = count.checked_mul(4)
342            .ok_or_else(|| Error::ParseError("schema count overflow".into()))?;
343        let offsets_end = o.checked_add(offsets_size)
344            .ok_or_else(|| Error::ParseError("schema offsets overflow".into()))?;
345        if offsets_end > data.len() {
346            return Err(Error::ParseError("schema offset table out of bounds".into()));
347        }
348
349        let offsets: Vec<u32> = (0..count)
350            .map(|i| read_u32_at(data, o + i * 4))
351            .collect::<Result<Vec<u32>>>()?;
352        let start = offsets_end;
353
354        for i in 0..count {
355            let so = start.checked_add(offsets[i] as usize)
356                .ok_or_else(|| Error::ParseError("schema entry offset overflow".into()))?;
357
358            // Need at least 8 bytes for schema entry header
359            if so.checked_add(8).map_or(true, |end| end > data.len()) {
360                return Err(Error::ParseError(format!("schema entry {} out of bounds", i)));
361            }
362
363            let name_idx = read_u32_at(data, so)?;
364            let field_count = read_u16_at(data, so + 4)? as usize;
365
366            let name = self.get_string(name_idx as usize)?;
367            let mut schema = Schema::new(&name);
368
369            let mut fo = so + 8;
370            for fi in 0..field_count {
371                // Each field entry is 8 bytes
372                if fo.checked_add(8).map_or(true, |end| end > data.len()) {
373                    return Err(Error::ParseError(format!(
374                        "schema '{}' field {} out of bounds", name, fi
375                    )));
376                }
377
378                let fname_idx = read_u32_at(data, fo)?;
379                let ftype = data[fo + 4];
380                let fflags = data[fo + 5];
381                let fextra = read_u16_at(data, fo + 6)?;
382
383                let fname = self.get_string(fname_idx as usize)?;
384                let tl_type = TLType::try_from(ftype)?;
385
386                let base = match tl_type {
387                    TLType::Bool => "bool".to_string(),
388                    TLType::Int8 => "int8".to_string(),
389                    TLType::Int16 => "int16".to_string(),
390                    TLType::Int32 => "int".to_string(),
391                    TLType::Int64 => "int64".to_string(),
392                    TLType::UInt8 => "uint8".to_string(),
393                    TLType::UInt16 => "uint16".to_string(),
394                    TLType::UInt32 => "uint".to_string(),
395                    TLType::UInt64 => "uint64".to_string(),
396                    TLType::Float32 => "float32".to_string(),
397                    TLType::Float64 => "float".to_string(),
398                    TLType::String => "string".to_string(),
399                    TLType::Bytes => "bytes".to_string(),
400                    TLType::Timestamp => "timestamp".to_string(),
401                    TLType::Struct => {
402                        // Read struct type name from string table (0xFFFF = no type)
403                        if fextra != 0xFFFF {
404                            self.get_string(fextra as usize)?
405                        } else {
406                            "object".to_string()
407                        }
408                    }
409                    TLType::Tagged => {
410                        // Union-typed field: read union name from string table
411                        if fextra != 0xFFFF {
412                            self.get_string(fextra as usize)?
413                        } else {
414                            "tagged".to_string()
415                        }
416                    }
417                    TLType::Object => "object".to_string(),
418                    TLType::Tuple => "tuple".to_string(),
419                    TLType::Map => "map".to_string(),
420                    _ => "string".to_string(),
421                };
422
423                let mut field_type = FieldType::new(&base);
424                if fflags & 0x01 != 0 {
425                    field_type.nullable = true;
426                }
427                if fflags & 0x02 != 0 {
428                    field_type.is_array = true;
429                }
430
431                schema.fields.push(Field::new(fname, field_type));
432                fo += 8;
433            }
434
435            self.schema_map.insert(name, self.schemas.len());
436            self.schemas.push(schema);
437        }
438
439        Ok(())
440    }
441
442    fn parse_unions(&mut self, sch_off: usize, struct_count: usize, union_count: usize) -> Result<()> {
443        let data = self.data.as_ref();
444
445        // Calculate where struct offsets + struct data end
446        // Schema region layout:
447        //   [region_size: u32][struct_count: u16][union_count: u16]
448        //   [struct_offsets: u32 * struct_count]
449        //   [struct_data...]
450        //   [union_offsets: u32 * union_count]
451        //   [union_data...]
452        let struct_offsets_start = sch_off.checked_add(8)
453            .ok_or_else(|| Error::ParseError("union region offset overflow".into()))?;
454        let struct_offsets_size = struct_count.checked_mul(4)
455            .ok_or_else(|| Error::ParseError("struct count overflow".into()))?;
456        let struct_data_start = struct_offsets_start.checked_add(struct_offsets_size)
457            .ok_or_else(|| Error::ParseError("struct data start overflow".into()))?;
458        let struct_data_size: usize = self.schemas.iter()
459            .map(|s| 8 + s.fields.len() * 8)
460            .sum();
461        let union_offsets_start = struct_data_start.checked_add(struct_data_size)
462            .ok_or_else(|| Error::ParseError("union offsets start overflow".into()))?;
463
464        // Validate union offset table bounds
465        let union_offsets_size = union_count.checked_mul(4)
466            .ok_or_else(|| Error::ParseError("union count overflow".into()))?;
467        let union_offsets_end = union_offsets_start.checked_add(union_offsets_size)
468            .ok_or_else(|| Error::ParseError("union offsets end overflow".into()))?;
469        if union_offsets_end > data.len() {
470            return Err(Error::ParseError("union offset table out of bounds".into()));
471        }
472
473        // Read union offsets
474        let union_offsets: Vec<u32> = (0..union_count)
475            .map(|i| read_u32_at(data, union_offsets_start + i * 4))
476            .collect::<Result<Vec<u32>>>()?;
477        let union_data_start = union_offsets_end;
478
479        for i in 0..union_count {
480            let uo = union_data_start.checked_add(union_offsets[i] as usize)
481                .ok_or_else(|| Error::ParseError("union entry offset overflow".into()))?;
482
483            // Need at least 8 bytes for union entry header
484            if uo.checked_add(8).map_or(true, |end| end > data.len()) {
485                return Err(Error::ParseError(format!("union entry {} out of bounds", i)));
486            }
487
488            let name_idx = read_u32_at(data, uo)?;
489            let variant_count = read_u16_at(data, uo + 4)? as usize;
490            // uo + 6..uo + 8 is flags (reserved)
491
492            let name = self.get_string(name_idx as usize)?;
493            let mut union = Union::new(&name);
494
495            let mut vo = uo + 8;
496            for vi in 0..variant_count {
497                // Need at least 8 bytes for variant header
498                if vo.checked_add(8).map_or(true, |end| end > data.len()) {
499                    return Err(Error::ParseError(format!(
500                        "union '{}' variant {} out of bounds", name, vi
501                    )));
502                }
503
504                let vname_idx = read_u32_at(data, vo)?;
505                let field_count = read_u16_at(data, vo + 4)? as usize;
506                // vo + 6..vo + 8 is flags (reserved)
507
508                let vname = self.get_string(vname_idx as usize)?;
509                let mut variant = Variant::new(&vname);
510
511                let mut fo = vo + 8;
512                for fi in 0..field_count {
513                    // Each field entry is 8 bytes
514                    if fo.checked_add(8).map_or(true, |end| end > data.len()) {
515                        return Err(Error::ParseError(format!(
516                            "union '{}' variant '{}' field {} out of bounds", name, vname, fi
517                        )));
518                    }
519
520                    let fname_idx = read_u32_at(data, fo)?;
521                    let ftype = data[fo + 4];
522                    let fflags = data[fo + 5];
523                    let fextra = read_u16_at(data, fo + 6)?;
524
525                    let fname = self.get_string(fname_idx as usize)?;
526                    let tl_type = TLType::try_from(ftype)?;
527
528                    let base = match tl_type {
529                        TLType::Bool => "bool".to_string(),
530                        TLType::Int8 => "int8".to_string(),
531                        TLType::Int16 => "int16".to_string(),
532                        TLType::Int32 => "int".to_string(),
533                        TLType::Int64 => "int64".to_string(),
534                        TLType::UInt8 => "uint8".to_string(),
535                        TLType::UInt16 => "uint16".to_string(),
536                        TLType::UInt32 => "uint".to_string(),
537                        TLType::UInt64 => "uint64".to_string(),
538                        TLType::Float32 => "float32".to_string(),
539                        TLType::Float64 => "float".to_string(),
540                        TLType::String => "string".to_string(),
541                        TLType::Bytes => "bytes".to_string(),
542                        TLType::Timestamp => "timestamp".to_string(),
543                        TLType::Struct => {
544                            if fextra != 0xFFFF {
545                                self.get_string(fextra as usize)?
546                            } else {
547                                "object".to_string()
548                            }
549                        }
550                        TLType::Tagged => {
551                            if fextra != 0xFFFF {
552                                self.get_string(fextra as usize)?
553                            } else {
554                                "tagged".to_string()
555                            }
556                        }
557                        TLType::Object => "object".to_string(),
558                        TLType::Tuple => "tuple".to_string(),
559                        TLType::Map => "map".to_string(),
560                        _ => "string".to_string(),
561                    };
562
563                    let mut field_type = FieldType::new(&base);
564                    if fflags & 0x01 != 0 { field_type.nullable = true; }
565                    if fflags & 0x02 != 0 { field_type.is_array = true; }
566
567                    variant.fields.push(Field::new(fname, field_type));
568                    fo += 8;
569                }
570
571                union.variants.push(variant);
572                vo = fo;
573            }
574
575            self.union_map.insert(name, self.unions.len());
576            self.unions.push(union);
577        }
578
579        Ok(())
580    }
581
582    fn parse_index(&mut self, off: usize, count: usize) -> Result<()> {
583        let data = self.data.as_ref();
584        let mut o = off.checked_add(8)
585            .ok_or_else(|| Error::ParseError("index offset overflow".into()))?;
586
587        // Validate index table bounds
588        let index_size = count.checked_mul(32)
589            .ok_or_else(|| Error::ParseError("index count overflow".into()))?;
590        let index_end = o.checked_add(index_size)
591            .ok_or_else(|| Error::ParseError("index region overflow".into()))?;
592        if index_end > data.len() {
593            return Err(Error::ParseError("index table out of bounds".into()));
594        }
595
596        for _ in 0..count {
597            let key_idx = read_u32_at(data, o)?;
598            let offset = read_u64_at(data, o + 4)?;
599            let size = read_u32_at(data, o + 12)?;
600            let uncompressed = read_u32_at(data, o + 16)?;
601            let schema_idx = read_u16_at(data, o + 20)?;
602            let ptype = data[o + 22];
603            let flags = data[o + 23];
604            let item_count = read_u32_at(data, o + 24)?;
605
606            let key = self.get_string(key_idx as usize)?;
607
608            // Validate section data range against file bounds
609            let sec_start = offset as usize;
610            let sec_end = sec_start.checked_add(size as usize)
611                .ok_or_else(|| Error::ParseError(format!(
612                    "section '{}' offset overflow", key
613                )))?;
614            if sec_end > data.len() {
615                return Err(Error::ParseError(format!(
616                    "section '{}' data range {}..{} exceeds file size {}",
617                    key, sec_start, sec_end, data.len()
618                )));
619            }
620
621            self.sections.insert(key, SectionInfo {
622                offset,
623                size,
624                uncompressed_size: uncompressed,
625                schema_idx: if schema_idx == 0xFFFF { -1 } else { schema_idx as i32 },
626                tl_type: TLType::try_from(ptype)?,
627                compressed: flags & 0x01 != 0,
628                is_array: flags & 0x02 != 0,
629                item_count,
630            });
631            o += 32;
632        }
633
634        Ok(())
635    }
636
637    fn decode_struct_array(&self, cursor: &mut Cursor, schema_idx: usize, depth: usize) -> Result<Value> {
638        if depth > MAX_DECODE_DEPTH {
639            return Err(Error::ParseError("maximum decode nesting depth exceeded".into()));
640        }
641        let count = cursor.read_u32()?;
642        if count as usize > MAX_COLLECTION_SIZE {
643            return Err(Error::ParseError(format!(
644                "struct array element count {} exceeds limit of {}", count, MAX_COLLECTION_SIZE
645            )));
646        }
647        let _si = cursor.read_u16()?;
648        let bitmap_size = cursor.read_u16()? as usize;
649
650        if schema_idx >= self.schemas.len() {
651            return Err(Error::ParseError(format!(
652                "struct array schema index {} out of bounds ({} schemas available)",
653                schema_idx, self.schemas.len()
654            )));
655        }
656        let schema = &self.schemas[schema_idx];
657        let capacity = (count as usize).min(cursor.remaining()).min(MAX_COLLECTION_SIZE);
658        let mut result = Vec::with_capacity(capacity);
659
660        for _ in 0..count {
661            let mut bitmap = Vec::with_capacity(bitmap_size.min(cursor.remaining()));
662            for _ in 0..bitmap_size {
663                bitmap.push(cursor.read_u8()?);
664            }
665
666            // Check if all field bits are set — indicates a null array element
667            let all_null = (0..schema.fields.len())
668                .all(|i| i / 8 < bitmap.len() && (bitmap[i / 8] & (1 << (i % 8))) != 0);
669
670            if all_null {
671                result.push(Value::Null);
672            } else {
673                let mut obj = ObjectMap::new();
674                for (i, field) in schema.fields.iter().enumerate() {
675                    let is_null = i / 8 < bitmap.len() && (bitmap[i / 8] & (1 << (i % 8))) != 0;
676                    if is_null {
677                        obj.insert(field.name.clone(), Value::Null);
678                    } else {
679                        // Resolve union types: if the base name is in union_map, decode as Tagged
680                        let tl_type = if self.union_map.contains_key(&field.field_type.base) {
681                            TLType::Tagged
682                        } else {
683                            field.field_type.to_tl_type()
684                        };
685                        obj.insert(field.name.clone(), self.decode_value(cursor, tl_type, depth + 1)?);
686                    }
687                }
688                result.push(Value::Object(obj));
689            }
690        }
691
692        Ok(Value::Array(result))
693    }
694
695    fn decode_array(&self, cursor: &mut Cursor, depth: usize) -> Result<Value> {
696        if depth > MAX_DECODE_DEPTH {
697            return Err(Error::ParseError("maximum decode nesting depth exceeded".into()));
698        }
699        let count = cursor.read_u32()?;
700        if count == 0 {
701            return Ok(Value::Array(Vec::new()));
702        }
703        if count as usize > MAX_COLLECTION_SIZE {
704            return Err(Error::ParseError(format!(
705                "array element count {} exceeds limit of {}", count, MAX_COLLECTION_SIZE
706            )));
707        }
708
709        let elem_type = cursor.read_u8()?;
710        let capacity = (count as usize).min(cursor.remaining()).min(MAX_COLLECTION_SIZE);
711        let mut result = Vec::with_capacity(capacity);
712
713        if elem_type == 0xFF {
714            for _ in 0..count {
715                let t = TLType::try_from(cursor.read_u8()?)?;
716                result.push(self.decode_value(cursor, t, depth + 1)?);
717            }
718        } else {
719            let t = TLType::try_from(elem_type)?;
720            for _ in 0..count {
721                result.push(self.decode_value(cursor, t, depth + 1)?);
722            }
723        }
724
725        Ok(Value::Array(result))
726    }
727
728    fn decode_object(&self, cursor: &mut Cursor, depth: usize) -> Result<Value> {
729        if depth > MAX_DECODE_DEPTH {
730            return Err(Error::ParseError("maximum decode nesting depth exceeded".into()));
731        }
732        let count = cursor.read_u16()?;
733        let mut obj = ObjectMap::new();
734
735        for _ in 0..count {
736            let key_idx = cursor.read_u32()?;
737            let t = TLType::try_from(cursor.read_u8()?)?;
738            let key = self.get_string(key_idx as usize)?;
739            obj.insert(key, self.decode_value(cursor, t, depth + 1)?);
740        }
741
742        Ok(Value::Object(obj))
743    }
744
745    fn decode_struct(&self, cursor: &mut Cursor, depth: usize) -> Result<Value> {
746        if depth > MAX_DECODE_DEPTH {
747            return Err(Error::ParseError("maximum decode nesting depth exceeded".into()));
748        }
749        let schema_idx = cursor.read_u16()? as usize;
750        if schema_idx >= self.schemas.len() {
751            return Err(Error::ParseError(format!(
752                "struct schema index {} out of bounds ({} schemas available)",
753                schema_idx, self.schemas.len()
754            )));
755        }
756        let schema = &self.schemas[schema_idx];
757        let bitmap_size = (schema.fields.len() + 7) / 8;
758
759        let mut bitmap = Vec::with_capacity(bitmap_size.min(cursor.remaining()));
760        for _ in 0..bitmap_size {
761            bitmap.push(cursor.read_u8()?);
762        }
763
764        let mut obj = ObjectMap::new();
765        for (i, field) in schema.fields.iter().enumerate() {
766            let is_null = i / 8 < bitmap.len() && (bitmap[i / 8] & (1 << (i % 8))) != 0;
767            if is_null {
768                obj.insert(field.name.clone(), Value::Null);
769            } else {
770                // Resolve union types: if the base name is in union_map, decode as Tagged
771                let tl_type = if self.union_map.contains_key(&field.field_type.base) {
772                    TLType::Tagged
773                } else {
774                    field.field_type.to_tl_type()
775                };
776                obj.insert(field.name.clone(), self.decode_value(cursor, tl_type, depth + 1)?);
777            }
778        }
779
780        Ok(Value::Object(obj))
781    }
782
783    fn decode_map(&self, cursor: &mut Cursor, depth: usize) -> Result<Value> {
784        if depth > MAX_DECODE_DEPTH {
785            return Err(Error::ParseError("maximum decode nesting depth exceeded".into()));
786        }
787        let count = cursor.read_u32()?;
788        if count as usize > MAX_COLLECTION_SIZE {
789            return Err(Error::ParseError(format!(
790                "map element count {} exceeds limit of {}", count, MAX_COLLECTION_SIZE
791            )));
792        }
793        let capacity = (count as usize).min(cursor.remaining()).min(MAX_COLLECTION_SIZE);
794        let mut pairs = Vec::with_capacity(capacity);
795
796        for _ in 0..count {
797            let key_type = TLType::try_from(cursor.read_u8()?)?;
798            let key = self.decode_value(cursor, key_type, depth + 1)?;
799            // Validate map key type per spec: map keys must be string, int, or uint
800            match &key {
801                Value::String(_) | Value::Int(_) | Value::UInt(_) => {}
802                _ => return Err(Error::ParseError(
803                    format!("invalid map key type {:?}: map keys must be string, int, or uint", key.tl_type())
804                )),
805            }
806            let val_type = TLType::try_from(cursor.read_u8()?)?;
807            let val = self.decode_value(cursor, val_type, depth + 1)?;
808            pairs.push((key, val));
809        }
810
811        Ok(Value::Map(pairs))
812    }
813
814    fn decode_value(&self, cursor: &mut Cursor, tl_type: TLType, depth: usize) -> Result<Value> {
815        if depth > MAX_DECODE_DEPTH {
816            return Err(Error::ParseError("maximum decode nesting depth exceeded".into()));
817        }
818        Ok(match tl_type {
819            TLType::Null => Value::Null,
820            TLType::Bool => Value::Bool(cursor.read_u8()? != 0),
821            TLType::Int8 => Value::Int(cursor.read_i8()? as i64),
822            TLType::Int16 => Value::Int(cursor.read_i16()? as i64),
823            TLType::Int32 => Value::Int(cursor.read_i32()? as i64),
824            TLType::Int64 => Value::Int(cursor.read_i64()?),
825            TLType::UInt8 => Value::UInt(cursor.read_u8()? as u64),
826            TLType::UInt16 => Value::UInt(cursor.read_u16()? as u64),
827            TLType::UInt32 => Value::UInt(cursor.read_u32()? as u64),
828            TLType::UInt64 => Value::UInt(cursor.read_u64()?),
829            TLType::Float32 => Value::Float(cursor.read_f32()? as f64),
830            TLType::Float64 => Value::Float(cursor.read_f64()?),
831            TLType::String => {
832                let idx = cursor.read_u32()?;
833                Value::String(self.get_string(idx as usize)?)
834            }
835            TLType::Bytes => {
836                let len = cursor.read_varint()? as usize;
837                Value::Bytes(cursor.read_bytes(len)?)
838            }
839            TLType::Array => self.decode_array(cursor, depth)?,
840            TLType::Object => self.decode_object(cursor, depth)?,
841            TLType::Struct => self.decode_struct(cursor, depth)?,
842            TLType::Ref => {
843                let idx = cursor.read_u32()?;
844                Value::Ref(self.get_string(idx as usize)?)
845            }
846            TLType::Tagged => {
847                let tag_idx = cursor.read_u32()?;
848                let inner_type = TLType::try_from(cursor.read_u8()?)?;
849                let tag = self.get_string(tag_idx as usize)?;
850                let inner = self.decode_value(cursor, inner_type, depth + 1)?;
851                Value::Tagged(tag, Box::new(inner))
852            }
853            TLType::Map => self.decode_map(cursor, depth)?,
854            TLType::Timestamp => {
855                let ts = cursor.read_i64()?;
856                let tz = cursor.read_i16()?;
857                Value::Timestamp(ts, tz)
858            }
859            TLType::JsonNumber => {
860                let idx = cursor.read_u32()?;
861                Value::JsonNumber(self.get_string(idx as usize)?)
862            }
863            TLType::Tuple => {
864                // Tuple is decoded as an array
865                self.decode_array(cursor, depth)?
866            }
867        })
868    }
869}
870
871// Simple cursor for reading binary data with bounds checking
872struct Cursor<'a> {
873    data: &'a [u8],
874    pos: usize,
875}
876
877impl<'a> Cursor<'a> {
878    fn new(data: &'a [u8]) -> Self {
879        Self { data, pos: 0 }
880    }
881
882    fn remaining(&self) -> usize {
883        self.data.len().saturating_sub(self.pos)
884    }
885
886    fn check_bounds(&self, len: usize) -> Result<()> {
887        let end = self.pos.checked_add(len)
888            .ok_or_else(|| Error::ParseError("cursor position overflow".into()))?;
889        if end > self.data.len() {
890            return Err(Error::ParseError(format!(
891                "read out of bounds: pos={} len={} data_len={}", self.pos, len, self.data.len()
892            )));
893        }
894        Ok(())
895    }
896
897    fn read_u8(&mut self) -> Result<u8> {
898        self.check_bounds(1)?;
899        let v = self.data[self.pos];
900        self.pos += 1;
901        Ok(v)
902    }
903
904    fn read_i8(&mut self) -> Result<i8> {
905        Ok(self.read_u8()? as i8)
906    }
907
908    fn read_u16(&mut self) -> Result<u16> {
909        self.check_bounds(2)?;
910        let end = self.pos + 2; // safe: check_bounds verified this won't exceed data.len()
911        let v = u16::from_le_bytes(self.data[self.pos..end].try_into().map_err(|_|
912            Error::ParseError("u16 cursor conversion failed".into())
913        )?);
914        self.pos = end;
915        Ok(v)
916    }
917
918    fn read_i16(&mut self) -> Result<i16> {
919        Ok(self.read_u16()? as i16)
920    }
921
922    fn read_u32(&mut self) -> Result<u32> {
923        self.check_bounds(4)?;
924        let end = self.pos + 4;
925        let v = u32::from_le_bytes(self.data[self.pos..end].try_into().map_err(|_|
926            Error::ParseError("u32 cursor conversion failed".into())
927        )?);
928        self.pos = end;
929        Ok(v)
930    }
931
932    fn read_i32(&mut self) -> Result<i32> {
933        Ok(self.read_u32()? as i32)
934    }
935
936    fn read_u64(&mut self) -> Result<u64> {
937        self.check_bounds(8)?;
938        let end = self.pos + 8;
939        let v = u64::from_le_bytes(self.data[self.pos..end].try_into().map_err(|_|
940            Error::ParseError("u64 cursor conversion failed".into())
941        )?);
942        self.pos = end;
943        Ok(v)
944    }
945
946    fn read_i64(&mut self) -> Result<i64> {
947        Ok(self.read_u64()? as i64)
948    }
949
950    fn read_f32(&mut self) -> Result<f32> {
951        self.check_bounds(4)?;
952        let end = self.pos + 4;
953        let v = f32::from_le_bytes(self.data[self.pos..end].try_into().map_err(|_|
954            Error::ParseError("f32 cursor conversion failed".into())
955        )?);
956        self.pos = end;
957        Ok(v)
958    }
959
960    fn read_f64(&mut self) -> Result<f64> {
961        self.check_bounds(8)?;
962        let end = self.pos + 8;
963        let v = f64::from_le_bytes(self.data[self.pos..end].try_into().map_err(|_|
964            Error::ParseError("f64 cursor conversion failed".into())
965        )?);
966        self.pos = end;
967        Ok(v)
968    }
969
970    fn read_varint(&mut self) -> Result<u64> {
971        let mut result: u64 = 0;
972        let mut shift = 0;
973        for _ in 0..MAX_VARINT_BYTES {
974            let b = self.read_u8()?;
975            result |= ((b & 0x7F) as u64) << shift;
976            if b & 0x80 == 0 {
977                return Ok(result);
978            }
979            shift += 7;
980        }
981        Err(Error::ParseError("varint exceeds maximum length".into()))
982    }
983
984    fn read_bytes(&mut self, len: usize) -> Result<Vec<u8>> {
985        self.check_bounds(len)?;
986        let end = self.pos.checked_add(len)
987            .ok_or_else(|| Error::ParseError("read_bytes offset overflow".into()))?;
988        let v = self.data[self.pos..end].to_vec();
989        self.pos = end;
990        Ok(v)
991    }
992}
993
994fn decompress_data(data: &[u8]) -> Result<Vec<u8>> {
995    use flate2::read::ZlibDecoder;
996
997    let decoder = ZlibDecoder::new(data);
998    let mut limited = decoder.take((MAX_DECOMPRESSED_SIZE as u64) + 1);
999    let mut result = Vec::new();
1000    limited.read_to_end(&mut result)
1001        .map_err(|_| Error::ParseError("Decompression failed".to_string()))?;
1002    if result.len() > MAX_DECOMPRESSED_SIZE {
1003        return Err(Error::ParseError(format!(
1004            "Decompressed data exceeds maximum size of {} bytes", MAX_DECOMPRESSED_SIZE
1005        )));
1006    }
1007    Ok(result)
1008}
1009
1010#[cfg(test)]
1011mod tests {
1012    use super::*;
1013    use crate::writer::Writer;
1014
1015    #[test]
1016    fn test_open_mmap() {
1017        // Write a binary file first, then open with mmap
1018        let dir = std::env::temp_dir();
1019        let path = dir.join("test_reader_mmap.tlbx");
1020
1021        let mut w = Writer::new();
1022        w.add_section("val", &Value::Int(42), None).unwrap();
1023        w.write(&path, false).unwrap();
1024
1025        let r = Reader::open_mmap(&path).unwrap();
1026        assert_eq!(r.get("val").unwrap().as_int(), Some(42));
1027        std::fs::remove_file(&path).ok();
1028    }
1029
1030    #[test]
1031    fn test_open_regular() {
1032        let dir = std::env::temp_dir();
1033        let path = dir.join("test_reader_open.tlbx");
1034
1035        let mut w = Writer::new();
1036        w.add_section("greeting", &Value::String("hi".into()), None).unwrap();
1037        w.write(&path, false).unwrap();
1038
1039        let r = Reader::open(&path).unwrap();
1040        assert_eq!(r.get("greeting").unwrap().as_str(), Some("hi"));
1041        std::fs::remove_file(&path).ok();
1042    }
1043
1044    #[test]
1045    fn test_invalid_magic() {
1046        let result = Reader::from_bytes(vec![0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]);
1047        assert!(result.is_err());
1048    }
1049
1050    #[test]
1051    fn test_too_short_data() {
1052        let result = Reader::from_bytes(vec![0; 10]);
1053        assert!(result.is_err());
1054    }
1055
1056    #[test]
1057    fn test_wrong_version() {
1058        let mut data = vec![0u8; 64];
1059        // Set correct magic bytes "TLFX"
1060        data[0] = b'T'; data[1] = b'L'; data[2] = b'F'; data[3] = b'X';
1061        // Set wrong major version (3)
1062        data[4] = 3; data[5] = 0;
1063        let result = Reader::from_bytes(data);
1064        assert!(result.is_err());
1065    }
1066
1067    #[test]
1068    fn test_string_index_out_of_bounds() {
1069        let dir = std::env::temp_dir();
1070        let path = dir.join("test_str_oob.tlbx");
1071
1072        let mut w = Writer::new();
1073        w.add_section("x", &Value::Int(1), None).unwrap();
1074        w.write(&path, false).unwrap();
1075
1076        let r = Reader::from_bytes(std::fs::read(&path).unwrap()).unwrap();
1077        let result = r.get_string(9999);
1078        assert!(result.is_err());
1079        std::fs::remove_file(&path).ok();
1080    }
1081
1082    #[test]
1083    fn test_keys() {
1084        let dir = std::env::temp_dir();
1085        let path = dir.join("test_reader_keys.tlbx");
1086
1087        let mut w = Writer::new();
1088        w.add_section("alpha", &Value::Int(1), None).unwrap();
1089        w.add_section("beta", &Value::Int(2), None).unwrap();
1090        w.write(&path, false).unwrap();
1091
1092        let r = Reader::from_bytes(std::fs::read(&path).unwrap()).unwrap();
1093        let keys = r.keys();
1094        assert!(keys.contains(&"alpha"));
1095        assert!(keys.contains(&"beta"));
1096        std::fs::remove_file(&path).ok();
1097    }
1098
1099    #[test]
1100    fn test_missing_key() {
1101        let dir = std::env::temp_dir();
1102        let path = dir.join("test_reader_missing.tlbx");
1103
1104        let mut w = Writer::new();
1105        w.add_section("exists", &Value::Int(1), None).unwrap();
1106        w.write(&path, false).unwrap();
1107
1108        let r = Reader::from_bytes(std::fs::read(&path).unwrap()).unwrap();
1109        assert!(r.get("nonexistent").is_err());
1110        std::fs::remove_file(&path).ok();
1111    }
1112
1113    #[test]
1114    fn test_struct_section_roundtrip() {
1115        let dir = std::env::temp_dir();
1116        let path = dir.join("test_struct_section.tlbx");
1117
1118        let mut schema = Schema::new("Point");
1119        schema.add_field("x", FieldType::new("int"));
1120        schema.add_field("y", FieldType::new("int"));
1121
1122        let mut w = Writer::new();
1123        w.add_schema(schema.clone());
1124
1125        let mut obj1 = ObjectMap::new();
1126        obj1.insert("x".to_string(), Value::Int(10));
1127        obj1.insert("y".to_string(), Value::Int(20));
1128
1129        let mut obj2 = ObjectMap::new();
1130        obj2.insert("x".to_string(), Value::Int(30));
1131        obj2.insert("y".to_string(), Value::Null);
1132
1133        let arr = Value::Array(vec![Value::Object(obj1), Value::Object(obj2)]);
1134        w.add_section("points", &arr, Some(&schema)).unwrap();
1135        w.write(&path, false).unwrap();
1136
1137        let r = Reader::from_bytes(std::fs::read(&path).unwrap()).unwrap();
1138        assert!(!r.schemas.is_empty());
1139
1140        let points = r.get("points").unwrap();
1141        let items = points.as_array().unwrap();
1142        assert_eq!(items.len(), 2);
1143        let p1 = items[0].as_object().unwrap();
1144        assert_eq!(p1.get("x").unwrap().as_int(), Some(10));
1145        let p2 = items[1].as_object().unwrap();
1146        assert!(p2.get("y").unwrap().is_null());
1147        std::fs::remove_file(&path).ok();
1148    }
1149
1150    #[test]
1151    fn test_heterogeneous_array() {
1152        // Mixed-type array uses 0xFF element type marker
1153        let dir = std::env::temp_dir();
1154        let path = dir.join("test_hetero_arr.tlbx");
1155
1156        let arr = Value::Array(vec![
1157            Value::Int(1),
1158            Value::String("hello".into()),
1159            Value::Bool(true),
1160        ]);
1161
1162        let mut w = Writer::new();
1163        w.add_section("mixed", &arr, None).unwrap();
1164        w.write(&path, false).unwrap();
1165
1166        let r = Reader::from_bytes(std::fs::read(&path).unwrap()).unwrap();
1167        let val = r.get("mixed").unwrap();
1168        let items = val.as_array().unwrap();
1169        assert_eq!(items.len(), 3);
1170        assert_eq!(items[0].as_int(), Some(1));
1171        assert_eq!(items[1].as_str(), Some("hello"));
1172        assert_eq!(items[2].as_bool(), Some(true));
1173        std::fs::remove_file(&path).ok();
1174    }
1175
1176    #[test]
1177    fn test_empty_array() {
1178        let dir = std::env::temp_dir();
1179        let path = dir.join("test_empty_arr.tlbx");
1180
1181        let mut w = Writer::new();
1182        w.add_section("empty", &Value::Array(vec![]), None).unwrap();
1183        w.write(&path, false).unwrap();
1184
1185        let r = Reader::from_bytes(std::fs::read(&path).unwrap()).unwrap();
1186        let val = r.get("empty").unwrap();
1187        let items = val.as_array().unwrap();
1188        assert_eq!(items.len(), 0);
1189        std::fs::remove_file(&path).ok();
1190    }
1191
1192    #[test]
1193    fn test_truncated_section_data() {
1194        // Craft a file where section offset points past end of file
1195        let dir = std::env::temp_dir();
1196        let path = dir.join("test_truncated.tlbx");
1197
1198        let mut w = Writer::new();
1199        w.add_section("val", &Value::Int(42), None).unwrap();
1200        w.write(&path, false).unwrap();
1201
1202        // Read and truncate the data
1203        let mut data = std::fs::read(&path).unwrap();
1204        data.truncate(data.len() - 1); // Remove last byte
1205        // The reader should either error during parsing or during get()
1206        // (depends on what the last byte was), but should not panic
1207        let result = Reader::from_bytes(data);
1208        if let Ok(r) = result {
1209            // If it parsed headers OK, get should fail gracefully
1210            let _ = r.get("val"); // Should not panic
1211        }
1212        std::fs::remove_file(&path).ok();
1213    }
1214
1215    #[test]
1216    fn test_cursor_bounds_checking() {
1217        // Ensure cursor read methods return errors on out-of-bounds
1218        let data = vec![1u8, 2];
1219        let mut cursor = Cursor::new(&data);
1220        assert!(cursor.read_u8().is_ok());
1221        assert!(cursor.read_u8().is_ok());
1222        assert!(cursor.read_u8().is_err()); // Past end
1223
1224        let mut cursor2 = Cursor::new(&data);
1225        assert!(cursor2.read_u32().is_err()); // Only 2 bytes available
1226
1227        let empty: Vec<u8> = vec![];
1228        let mut cursor3 = Cursor::new(&empty);
1229        assert!(cursor3.read_u8().is_err());
1230        assert!(cursor3.read_varint().is_err());
1231    }
1232
1233    #[test]
1234    fn test_varint_too_long() {
1235        // All continuation bytes (0x80) - should error after MAX_VARINT_BYTES
1236        let data = vec![0x80u8; 20];
1237        let mut cursor = Cursor::new(&data);
1238        assert!(cursor.read_varint().is_err());
1239    }
1240
1241    // =========================================================================
1242    // Issue 10: Decompression cache
1243    // =========================================================================
1244
1245    #[test]
1246    fn test_cache_returns_same_value() {
1247        use crate::Writer;
1248        let dir = std::env::temp_dir();
1249        let path = dir.join("test_cache_hit.tlbx");
1250
1251        let mut w = Writer::new();
1252        w.add_section("greeting", &crate::Value::String("hello".into()), None).unwrap();
1253        w.add_section("number", &crate::Value::Int(42), None).unwrap();
1254        w.write(&path, true).unwrap(); // compressed
1255
1256        let r = Reader::from_bytes(std::fs::read(&path).unwrap()).unwrap();
1257
1258        // First call populates cache
1259        let v1 = r.get("greeting").unwrap();
1260        // Second call should return cached value
1261        let v2 = r.get("greeting").unwrap();
1262        assert_eq!(v1, v2);
1263        assert_eq!(v1.as_str(), Some("hello"));
1264
1265        // Verify cache has entry
1266        assert!(r.cache.borrow().contains_key("greeting"));
1267        assert!(!r.cache.borrow().contains_key("number")); // not yet accessed
1268
1269        // Access number
1270        let num = r.get("number").unwrap();
1271        assert_eq!(num.as_int(), Some(42));
1272        assert!(r.cache.borrow().contains_key("number"));
1273
1274        std::fs::remove_file(&path).ok();
1275    }
1276
1277    #[test]
1278    fn test_fuzz_crash_crafted_tlbx_2_no_panic() {
1279        // Regression: fuzz_reader crash-5a6d5f6582c97f5bdc177383430d35c687c640fb
1280        let data: Vec<u8> = vec![
1281            0x54, 0x4C, 0x42, 0x58, 0x02, 0x00, 0x0E, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1282            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3A, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1283            0x3A, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1284            0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x24, 0x00,
1285            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1286            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1287            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1288            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1289            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1290            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x14, 0x00,
1291            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1292            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1293            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1294            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1295            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1296            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x14, 0x00,
1297            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1298            0x00, 0x00, 0x00, 0xC0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1299            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00,
1300            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x58, 0x00, 0x00, 0x00, 0x0A, 0x00, 0x00,
1301            0x00, 0x00, 0x00, 0x0A, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
1302        ];
1303        let result = Reader::from_bytes(data);
1304        if let Ok(r) = result {
1305            for key in r.keys() {
1306                let _ = r.get(key); // Must not panic
1307            }
1308        }
1309    }
1310
1311    #[test]
1312    fn test_fuzz_oom_crafted_tlbx_no_panic() {
1313        // Regression: fuzz_reader oom-1aecaf7b31c65092524b8d5ea7e9b979f06da315
1314        // Crafted TLBX that triggers large allocations via inflated count fields.
1315        // Must return Err (or Ok with bounded memory), not OOM.
1316        let data: Vec<u8> = vec![
1317            0x54, 0x4C, 0x42, 0x58, 0x02, 0x00, 0x0E, 0x03, 0x00, 0x00, 0x00, 0x00, 0x0E, 0x00, 0x00, 0x00,
1318            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3A, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1319            0x3A, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1320            0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, 0x02, 0x24, 0x00,
1321            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1322            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1323            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1324            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1325            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1326            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1327            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1328            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1329            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2D, 0x00, 0x00, 0x00,
1330            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x42, 0x4C,
1331            0x54, 0x26, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x23,
1332            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1333            0x67, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFE, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00,
1334            0x00, 0x00, 0x00, 0x00, 0xFE, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00,
1335            0x00, 0x30, 0x30, 0x30, 0x30, 0x30, 0x20, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30,
1336            0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x00, 0x00, 0x00,
1337            0x00,
1338        ];
1339        let result = Reader::from_bytes(data);
1340        if let Ok(r) = result {
1341            for key in r.keys() {
1342                let _ = r.get(key); // Must not panic or OOM
1343            }
1344        }
1345    }
1346
1347    #[test]
1348    fn test_fuzz_oom_null_array_no_oom() {
1349        // Regression: fuzz_reader oom-691da8e2c0990bf219571aef468e5fce6e23cabc
1350        // 335-byte file with count=973,078,528 and elem_type=Null (0x00).
1351        // Null elements consume 0 cursor bytes, so the loop never fails on its own.
1352        // Without MAX_COLLECTION_SIZE check, this allocates ~31 GB → OOM.
1353        let data: Vec<u8> = vec![
1354            0x54, 0x4C, 0x42, 0x58, 0x02, 0x00, 0x0E, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1355            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3A, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1356            0x3A, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1357            0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x24, 0x00,
1358            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1359            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1360            // ... padding (all zeros for entries 1-6) ...
1361        ];
1362        // Pad to 335 bytes (same structure as the crash artifact)
1363        let mut padded = data.clone();
1364        padded.resize(290, 0x00);
1365        // Entry 7 at offset 290: key_idx=0, offset=29, size=32, ptype=0x20 (Array)
1366        padded.extend_from_slice(&[
1367            0x00, 0x00, 0x00, 0x00,             // key_idx = 0
1368            0x1D, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // offset = 29
1369            0x20, 0x00, 0x00, 0x00,             // size = 32
1370            0x00, 0x00, 0x00, 0x00,             // uncompressed = 0
1371            0x00, 0x00,                         // schema_idx = 0
1372            0x20,                               // ptype = Array
1373            0x00,                               // flags = 0
1374            0x00, 0x58, 0x00, 0x00,             // item_count
1375            0x00, 0x00, 0x00, 0x00,             // padding
1376        ]);
1377        padded.resize(335, 0x00);
1378
1379        let result = Reader::from_bytes(padded);
1380        if let Ok(r) = result {
1381            for key in r.keys() {
1382                let _ = r.get(key); // Must not OOM
1383            }
1384        }
1385    }
1386
1387    #[test]
1388    fn test_fuzz_crash_crafted_tlbx_no_panic() {
1389        // Regression: fuzz_reader crash-b239a207c06b3584ad33adddeac470e0fa792cb9
1390        // Crafted TLBX with valid magic but manipulated section pointers.
1391        // Must return Err, not panic.
1392        let data: Vec<u8> = vec![
1393            0x54, 0x4C, 0x42, 0x58, 0x02, 0x00, 0x0E, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1394            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3A, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1395            0x3A, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1396            0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x24, 0x00,
1397            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1398            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1399            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1400            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x24, 0x00,
1401            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1402            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1403            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1404            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1405            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1406            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x54,
1407            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1408            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1409            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xC0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1410            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x24, 0x00,
1411            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x75, 0x00,
1412            0x00, 0x00, 0x00, 0x00, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
1413            0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
1414            0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00, 0x00,
1415            0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1416            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1417        ];
1418        let result = Reader::from_bytes(data);
1419        if let Ok(r) = result {
1420            for key in r.keys() {
1421                let _ = r.get(key); // Must not panic
1422            }
1423        }
1424    }
1425
1426    #[test]
1427    fn test_clear_cache() {
1428        use crate::Writer;
1429        let dir = std::env::temp_dir();
1430        let path = dir.join("test_cache_clear.tlbx");
1431
1432        let mut w = Writer::new();
1433        w.add_section("val", &crate::Value::Int(99), None).unwrap();
1434        w.write(&path, false).unwrap();
1435
1436        let r = Reader::from_bytes(std::fs::read(&path).unwrap()).unwrap();
1437        let _ = r.get("val").unwrap();
1438        assert_eq!(r.cache.borrow().len(), 1);
1439
1440        r.clear_cache();
1441        assert_eq!(r.cache.borrow().len(), 0);
1442
1443        // Re-access still works
1444        let v = r.get("val").unwrap();
1445        assert_eq!(v.as_int(), Some(99));
1446
1447        std::fs::remove_file(&path).ok();
1448    }
1449}
tealeaf/reader.rs

tealeaf/
reader.rs