Skip to main content

nodedb_query/msgpack_scan/
reader.rs

1//! Low-level MessagePack binary reader: tag parsing, value skipping, and typed reads.
2//!
3//! All functions operate on `&[u8]` with explicit offsets. Zero allocation,
4//! zero copy. Returns `None` on truncated/invalid data — never panics.
5
6use std::str;
7
8// ── Tag constants ──────────────────────────────────────────────────────
9
10const NIL: u8 = 0xc0;
11const FALSE: u8 = 0xc2;
12const TRUE: u8 = 0xc3;
13const BIN8: u8 = 0xc4;
14const BIN16: u8 = 0xc5;
15const BIN32: u8 = 0xc6;
16const EXT8: u8 = 0xc7;
17const EXT16: u8 = 0xc8;
18const EXT32: u8 = 0xc9;
19const FLOAT32: u8 = 0xca;
20const FLOAT64: u8 = 0xcb;
21const UINT8: u8 = 0xcc;
22const UINT16: u8 = 0xcd;
23const UINT32: u8 = 0xce;
24const UINT64: u8 = 0xcf;
25const INT8: u8 = 0xd0;
26const INT16: u8 = 0xd1;
27const INT32: u8 = 0xd2;
28const INT64: u8 = 0xd3;
29const FIXEXT1: u8 = 0xd4;
30const FIXEXT2: u8 = 0xd5;
31const FIXEXT4: u8 = 0xd6;
32const FIXEXT8: u8 = 0xd7;
33const FIXEXT16: u8 = 0xd8;
34const STR8: u8 = 0xd9;
35const STR16: u8 = 0xda;
36const STR32: u8 = 0xdb;
37const ARRAY16: u8 = 0xdc;
38const ARRAY32: u8 = 0xdd;
39const MAP16: u8 = 0xde;
40const MAP32: u8 = 0xdf;
41
42/// Maximum nesting depth to prevent stack overflow on malicious payloads.
43const MAX_DEPTH: u16 = 128;
44
45// ── Inline helpers ─────────────────────────────────────────────────────
46
47#[inline(always)]
48fn get(buf: &[u8], pos: usize) -> Option<u8> {
49    buf.get(pos).copied()
50}
51
52#[inline(always)]
53fn read_u16_be(buf: &[u8], pos: usize) -> Option<u16> {
54    let bytes = buf.get(pos..pos + 2)?;
55    Some(u16::from_be_bytes([bytes[0], bytes[1]]))
56}
57
58#[inline(always)]
59fn read_u32_be(buf: &[u8], pos: usize) -> Option<u32> {
60    let bytes = buf.get(pos..pos + 4)?;
61    Some(u32::from_be_bytes([bytes[0], bytes[1], bytes[2], bytes[3]]))
62}
63
64#[inline(always)]
65fn read_u64_be(buf: &[u8], pos: usize) -> Option<u64> {
66    let bytes = buf.get(pos..pos + 8)?;
67    Some(u64::from_be_bytes([
68        bytes[0], bytes[1], bytes[2], bytes[3], bytes[4], bytes[5], bytes[6], bytes[7],
69    ]))
70}
71
72/// Return `Some(offset + size)` only if the buffer has enough bytes.
73#[inline(always)]
74fn checked_advance(buf: &[u8], offset: usize, size: usize) -> Option<usize> {
75    let end = offset + size;
76    if end <= buf.len() { Some(end) } else { None }
77}
78
79// ── skip_value ─────────────────────────────────────────────────────────
80
81/// Advance past the MessagePack value starting at `offset`, returning the
82/// offset of the next value. Returns `None` if the buffer is truncated or
83/// nesting exceeds `MAX_DEPTH`.
84///
85/// This is the performance-critical primitive. It never allocates.
86pub fn skip_value(buf: &[u8], offset: usize) -> Option<usize> {
87    skip_value_depth(buf, offset, 0)
88}
89
90fn skip_value_depth(buf: &[u8], offset: usize, depth: u16) -> Option<usize> {
91    if depth > MAX_DEPTH {
92        return None;
93    }
94    let tag = get(buf, offset)?;
95    match tag {
96        // positive fixint (0x00..=0x7f)
97        0x00..=0x7f => Some(offset + 1),
98        // negative fixint (0xe0..=0xff)
99        0xe0..=0xff => Some(offset + 1),
100        // nil, false, true
101        NIL | FALSE | TRUE => Some(offset + 1),
102
103        // fixmap (0x80..=0x8f)
104        0x80..=0x8f => {
105            let count = (tag & 0x0f) as usize;
106            skip_n_pairs(buf, offset + 1, count, depth)
107        }
108        MAP16 => {
109            let count = read_u16_be(buf, offset + 1)? as usize;
110            skip_n_pairs(buf, offset + 3, count, depth)
111        }
112        MAP32 => {
113            let count = read_u32_be(buf, offset + 1)? as usize;
114            skip_n_pairs(buf, offset + 5, count, depth)
115        }
116
117        // fixarray (0x90..=0x9f)
118        0x90..=0x9f => {
119            let count = (tag & 0x0f) as usize;
120            skip_n_values(buf, offset + 1, count, depth)
121        }
122        ARRAY16 => {
123            let count = read_u16_be(buf, offset + 1)? as usize;
124            skip_n_values(buf, offset + 3, count, depth)
125        }
126        ARRAY32 => {
127            let count = read_u32_be(buf, offset + 1)? as usize;
128            skip_n_values(buf, offset + 5, count, depth)
129        }
130
131        // fixstr (0xa0..=0xbf)
132        0xa0..=0xbf => {
133            let len = (tag & 0x1f) as usize;
134            checked_advance(buf, offset, 1 + len)
135        }
136        STR8 => {
137            let len = get(buf, offset + 1)? as usize;
138            checked_advance(buf, offset, 2 + len)
139        }
140        STR16 => {
141            let len = read_u16_be(buf, offset + 1)? as usize;
142            checked_advance(buf, offset, 3 + len)
143        }
144        STR32 => {
145            let len = read_u32_be(buf, offset + 1)? as usize;
146            checked_advance(buf, offset, 5 + len)
147        }
148
149        // bin
150        BIN8 => {
151            let len = get(buf, offset + 1)? as usize;
152            checked_advance(buf, offset, 2 + len)
153        }
154        BIN16 => {
155            let len = read_u16_be(buf, offset + 1)? as usize;
156            checked_advance(buf, offset, 3 + len)
157        }
158        BIN32 => {
159            let len = read_u32_be(buf, offset + 1)? as usize;
160            checked_advance(buf, offset, 5 + len)
161        }
162
163        // fixed-width numerics (bounds-check against buffer length)
164        FLOAT32 => checked_advance(buf, offset, 5),
165        FLOAT64 => checked_advance(buf, offset, 9),
166        UINT8 | INT8 => checked_advance(buf, offset, 2),
167        UINT16 | INT16 => checked_advance(buf, offset, 3),
168        UINT32 | INT32 => checked_advance(buf, offset, 5),
169        UINT64 | INT64 => checked_advance(buf, offset, 9),
170
171        // ext
172        FIXEXT1 => checked_advance(buf, offset, 3),
173        FIXEXT2 => checked_advance(buf, offset, 4),
174        FIXEXT4 => checked_advance(buf, offset, 6),
175        FIXEXT8 => checked_advance(buf, offset, 10),
176        FIXEXT16 => checked_advance(buf, offset, 18),
177        EXT8 => {
178            let len = get(buf, offset + 1)? as usize;
179            checked_advance(buf, offset, 3 + len)
180        }
181        EXT16 => {
182            let len = read_u16_be(buf, offset + 1)? as usize;
183            checked_advance(buf, offset, 4 + len)
184        }
185        EXT32 => {
186            let len = read_u32_be(buf, offset + 1)? as usize;
187            checked_advance(buf, offset, 6 + len)
188        }
189
190        // 0xc1 is never used in the spec
191        _ => None,
192    }
193}
194
195fn skip_n_values(buf: &[u8], mut pos: usize, count: usize, depth: u16) -> Option<usize> {
196    for _ in 0..count {
197        pos = skip_value_depth(buf, pos, depth + 1)?;
198    }
199    Some(pos)
200}
201
202fn skip_n_pairs(buf: &[u8], mut pos: usize, count: usize, depth: u16) -> Option<usize> {
203    for _ in 0..count {
204        pos = skip_value_depth(buf, pos, depth + 1)?; // key
205        pos = skip_value_depth(buf, pos, depth + 1)?; // value
206    }
207    Some(pos)
208}
209
210// ── Typed reads ────────────────────────────────────────────────────────
211
212/// Read an f64 from the value at `offset`. Handles float32, float64,
213/// and all integer types (coerced to f64).
214pub fn read_f64(buf: &[u8], offset: usize) -> Option<f64> {
215    let tag = get(buf, offset)?;
216    match tag {
217        // positive fixint
218        0x00..=0x7f => Some(tag as f64),
219        // negative fixint
220        0xe0..=0xff => Some((tag as i8) as f64),
221        FLOAT64 => {
222            let bits = read_u64_be(buf, offset + 1)?;
223            Some(f64::from_bits(bits))
224        }
225        FLOAT32 => {
226            let bits = read_u32_be(buf, offset + 1)?;
227            Some(f32::from_bits(bits) as f64)
228        }
229        UINT8 => Some(get(buf, offset + 1)? as f64),
230        UINT16 => Some(read_u16_be(buf, offset + 1)? as f64),
231        UINT32 => Some(read_u32_be(buf, offset + 1)? as f64),
232        UINT64 => Some(read_u64_be(buf, offset + 1)? as f64),
233        INT8 => Some(get(buf, offset + 1)? as i8 as f64),
234        INT16 => Some(read_u16_be(buf, offset + 1)? as i16 as f64),
235        INT32 => Some(read_u32_be(buf, offset + 1)? as i32 as f64),
236        INT64 => Some(read_u64_be(buf, offset + 1)? as i64 as f64),
237        _ => None,
238    }
239}
240
241/// Read an i64 from the value at `offset`. Handles all integer types.
242/// Floats return `None` — use `read_f64` for those.
243pub fn read_i64(buf: &[u8], offset: usize) -> Option<i64> {
244    let tag = get(buf, offset)?;
245    match tag {
246        0x00..=0x7f => Some(tag as i64),
247        0xe0..=0xff => Some((tag as i8) as i64),
248        UINT8 => Some(get(buf, offset + 1)? as i64),
249        UINT16 => Some(read_u16_be(buf, offset + 1)? as i64),
250        UINT32 => Some(read_u32_be(buf, offset + 1)? as i64),
251        UINT64 => {
252            let v = read_u64_be(buf, offset + 1)?;
253            Some(v as i64)
254        }
255        INT8 => Some(get(buf, offset + 1)? as i8 as i64),
256        INT16 => Some(read_u16_be(buf, offset + 1)? as i16 as i64),
257        INT32 => Some(read_u32_be(buf, offset + 1)? as i32 as i64),
258        INT64 => {
259            let v = read_u64_be(buf, offset + 1)?;
260            Some(v as i64)
261        }
262        _ => None,
263    }
264}
265
266/// Read a string slice from the value at `offset`. Zero-copy — borrows
267/// directly from the input buffer. Returns `None` for non-string types
268/// or invalid UTF-8.
269pub fn read_str(buf: &[u8], offset: usize) -> Option<&str> {
270    let (start, len) = str_bounds(buf, offset)?;
271    let bytes = buf.get(start..start + len)?;
272    str::from_utf8(bytes).ok()
273}
274
275/// Return `(data_start, byte_len)` for the string at `offset` without
276/// validating UTF-8. Used internally for key comparison.
277pub(crate) fn str_bounds(buf: &[u8], offset: usize) -> Option<(usize, usize)> {
278    let tag = get(buf, offset)?;
279    match tag {
280        0xa0..=0xbf => {
281            let len = (tag & 0x1f) as usize;
282            Some((offset + 1, len))
283        }
284        STR8 => {
285            let len = get(buf, offset + 1)? as usize;
286            Some((offset + 2, len))
287        }
288        STR16 => {
289            let len = read_u16_be(buf, offset + 1)? as usize;
290            Some((offset + 3, len))
291        }
292        STR32 => {
293            let len = read_u32_be(buf, offset + 1)? as usize;
294            Some((offset + 5, len))
295        }
296        _ => None,
297    }
298}
299
300/// Read a boolean from the value at `offset`.
301pub fn read_bool(buf: &[u8], offset: usize) -> Option<bool> {
302    match get(buf, offset)? {
303        TRUE => Some(true),
304        FALSE => Some(false),
305        _ => None,
306    }
307}
308
309/// Check if the value at `offset` is nil.
310pub fn read_null(buf: &[u8], offset: usize) -> bool {
311    get(buf, offset) == Some(NIL)
312}
313
314/// Read a scalar msgpack value at `offset` into `nodedb_types::Value`.
315///
316/// Handles null, bool, integers, floats, and strings. For complex types
317/// (array, map, bin, ext), returns `None` — caller should use
318/// `json_from_msgpack` for those.
319pub fn read_value(buf: &[u8], offset: usize) -> Option<nodedb_types::Value> {
320    let tag = get(buf, offset)?;
321    match tag {
322        NIL => Some(nodedb_types::Value::Null),
323        TRUE => Some(nodedb_types::Value::Bool(true)),
324        FALSE => Some(nodedb_types::Value::Bool(false)),
325        // Integers
326        0x00..=0x7f => Some(nodedb_types::Value::Integer(tag as i64)),
327        0xe0..=0xff => Some(nodedb_types::Value::Integer((tag as i8) as i64)),
328        UINT8 => Some(nodedb_types::Value::Integer(get(buf, offset + 1)? as i64)),
329        UINT16 => Some(nodedb_types::Value::Integer(
330            read_u16_be(buf, offset + 1)? as i64
331        )),
332        UINT32 => Some(nodedb_types::Value::Integer(
333            read_u32_be(buf, offset + 1)? as i64
334        )),
335        UINT64 => Some(nodedb_types::Value::Integer(
336            read_u64_be(buf, offset + 1)? as i64
337        )),
338        INT8 => Some(nodedb_types::Value::Integer(
339            get(buf, offset + 1)? as i8 as i64
340        )),
341        INT16 => Some(nodedb_types::Value::Integer(
342            read_u16_be(buf, offset + 1)? as i16 as i64,
343        )),
344        INT32 => Some(nodedb_types::Value::Integer(
345            read_u32_be(buf, offset + 1)? as i32 as i64,
346        )),
347        INT64 => Some(nodedb_types::Value::Integer(
348            read_u64_be(buf, offset + 1)? as i64
349        )),
350        // Floats
351        FLOAT32 => {
352            let bits = read_u32_be(buf, offset + 1)?;
353            Some(nodedb_types::Value::Float(f32::from_bits(bits) as f64))
354        }
355        FLOAT64 => {
356            let bits = read_u64_be(buf, offset + 1)?;
357            Some(nodedb_types::Value::Float(f64::from_bits(bits)))
358        }
359        // Strings
360        0xa0..=0xbf | STR8 | STR16 | STR32 => {
361            read_str(buf, offset).map(|s| nodedb_types::Value::String(s.to_string()))
362        }
363        _ => None,
364    }
365}
366
367/// Return the number of key-value pairs and the offset of the first pair,
368/// for the map starting at `offset`. Returns `None` if not a map.
369pub fn map_header(buf: &[u8], offset: usize) -> Option<(usize, usize)> {
370    let tag = get(buf, offset)?;
371    match tag {
372        0x80..=0x8f => Some(((tag & 0x0f) as usize, offset + 1)),
373        MAP16 => Some((read_u16_be(buf, offset + 1)? as usize, offset + 3)),
374        MAP32 => Some((read_u32_be(buf, offset + 1)? as usize, offset + 5)),
375        _ => None,
376    }
377}
378
379/// Return the number of elements and the offset of the first element,
380/// for the array starting at `offset`. Returns `None` if not an array.
381pub fn array_header(buf: &[u8], offset: usize) -> Option<(usize, usize)> {
382    let tag = get(buf, offset)?;
383    match tag {
384        0x90..=0x9f => Some(((tag & 0x0f) as usize, offset + 1)),
385        ARRAY16 => Some((read_u16_be(buf, offset + 1)? as usize, offset + 3)),
386        ARRAY32 => Some((read_u32_be(buf, offset + 1)? as usize, offset + 5)),
387        _ => None,
388    }
389}
390
391#[cfg(test)]
392mod tests {
393    use super::*;
394
395    use serde_json::json;
396
397    /// Helper: encode a serde_json::Value to MessagePack bytes.
398    fn encode(v: &serde_json::Value) -> Vec<u8> {
399        nodedb_types::json_msgpack::json_to_msgpack(v).expect("encode")
400    }
401
402    #[test]
403    fn skip_positive_fixint() {
404        let buf = [0x05, 0xff];
405        assert_eq!(skip_value(&buf, 0), Some(1));
406    }
407
408    #[test]
409    fn skip_negative_fixint() {
410        let buf = [0xe0, 0x00];
411        assert_eq!(skip_value(&buf, 0), Some(1));
412    }
413
414    #[test]
415    fn skip_nil_bool() {
416        assert_eq!(skip_value(&[NIL], 0), Some(1));
417        assert_eq!(skip_value(&[TRUE], 0), Some(1));
418        assert_eq!(skip_value(&[FALSE], 0), Some(1));
419    }
420
421    #[test]
422    fn skip_float64() {
423        let buf = encode(&json!(9.81));
424        assert_eq!(skip_value(&buf, 0), Some(buf.len()));
425    }
426
427    #[test]
428    fn skip_string() {
429        let buf = encode(&json!("hello"));
430        assert_eq!(skip_value(&buf, 0), Some(buf.len()));
431    }
432
433    #[test]
434    fn skip_map() {
435        let buf = encode(&json!({"a": 1, "b": 2}));
436        assert_eq!(skip_value(&buf, 0), Some(buf.len()));
437    }
438
439    #[test]
440    fn skip_nested_array() {
441        let buf = encode(&json!([[1, 2], [3, 4, 5]]));
442        assert_eq!(skip_value(&buf, 0), Some(buf.len()));
443    }
444
445    #[test]
446    fn skip_truncated_returns_none() {
447        let buf = [FLOAT64, 0x40]; // truncated float64
448        assert_eq!(skip_value(&buf, 0), None);
449    }
450
451    #[test]
452    fn read_f64_fixint() {
453        assert_eq!(read_f64(&[42u8], 0), Some(42.0));
454    }
455
456    #[test]
457    fn read_f64_negative_fixint() {
458        assert_eq!(read_f64(&[0xffu8], 0), Some(-1.0));
459    }
460
461    #[test]
462    fn read_f64_float64() {
463        let buf = encode(&json!(std::f64::consts::PI));
464        assert_eq!(read_f64(&buf, 0), Some(std::f64::consts::PI));
465    }
466
467    #[test]
468    fn read_f64_uint16() {
469        let buf = encode(&json!(1000));
470        assert_eq!(read_f64(&buf, 0), Some(1000.0));
471    }
472
473    #[test]
474    fn read_i64_values() {
475        assert_eq!(read_i64(&[42u8], 0), Some(42));
476        assert_eq!(read_i64(&[0xffu8], 0), Some(-1));
477
478        let buf = encode(&json!(300));
479        assert_eq!(read_i64(&buf, 0), Some(300));
480
481        let buf = encode(&json!(-500));
482        assert_eq!(read_i64(&buf, 0), Some(-500));
483    }
484
485    #[test]
486    fn read_str_fixstr() {
487        let buf = encode(&json!("hi"));
488        assert_eq!(read_str(&buf, 0), Some("hi"));
489    }
490
491    #[test]
492    fn read_str_str8() {
493        let long = "a".repeat(40);
494        let buf = encode(&json!(long));
495        assert_eq!(read_str(&buf, 0), Some(long.as_str()));
496    }
497
498    #[test]
499    fn read_bool_values() {
500        assert_eq!(read_bool(&[TRUE], 0), Some(true));
501        assert_eq!(read_bool(&[FALSE], 0), Some(false));
502        assert_eq!(read_bool(&[NIL], 0), None);
503    }
504
505    #[test]
506    fn read_null_check() {
507        assert!(read_null(&[NIL], 0));
508        assert!(!read_null(&[TRUE], 0));
509    }
510
511    #[test]
512    fn map_header_fixmap() {
513        let buf = encode(&json!({"x": 1}));
514        let (count, _data_offset) = map_header(&buf, 0).unwrap();
515        assert_eq!(count, 1);
516    }
517
518    #[test]
519    fn skip_bin() {
520        // bin8: 0xc4, len=3, 3 bytes of data
521        let buf = [BIN8, 3, 0xde, 0xad, 0xbe, 0xff];
522        assert_eq!(skip_value(&buf, 0), Some(5));
523    }
524
525    #[test]
526    fn skip_ext() {
527        // fixext1: 0xd4, type byte, 1 data byte
528        let buf = [FIXEXT1, 0x01, 0xab, 0xff];
529        assert_eq!(skip_value(&buf, 0), Some(3));
530    }
531
532    #[test]
533    fn read_f64_float32() {
534        // json! always produces f64, so test float32 with raw bytes
535        // float32 tag (0xca) + 1.5 in IEEE 754 big-endian
536        let buf = [0xca, 0x3f, 0xc0, 0x00, 0x00];
537        let val = read_f64(&buf, 0).unwrap();
538        assert!((val - 1.5).abs() < 1e-6);
539    }
540
541    #[test]
542    fn skip_empty_containers() {
543        // empty fixmap
544        assert_eq!(skip_value(&[0x80], 0), Some(1));
545        // empty fixarray
546        assert_eq!(skip_value(&[0x90], 0), Some(1));
547    }
548
549    #[test]
550    fn array_header_fixarray() {
551        let buf = encode(&json!([10, 20, 30]));
552        let (count, data_offset) = array_header(&buf, 0).unwrap();
553        assert_eq!(count, 3);
554        assert_eq!(read_i64(&buf, data_offset), Some(10));
555    }
556
557    // ── Canonical encoding guarantee tests ─────────────────────────────
558
559    #[test]
560    fn canonical_integer_smallest_representation() {
561        // fixint (0-127): single byte
562        let buf = encode(&json!(42));
563        assert_eq!(buf.len(), 1);
564        assert_eq!(buf[0], 42);
565
566        // 0 as fixint
567        let buf = encode(&json!(0));
568        assert_eq!(buf.len(), 1);
569        assert_eq!(buf[0], 0);
570
571        // 127 as fixint
572        let buf = encode(&json!(127));
573        assert_eq!(buf.len(), 1);
574        assert_eq!(buf[0], 127);
575
576        // 128 should NOT be fixint. JSON parses as i64, so zerompk uses
577        // int16 (0xd1) since 128 > i8::MAX. This is canonical for signed path.
578        let buf = encode(&json!(128));
579        assert_eq!(buf[0], 0xd1); // int16 tag
580        assert_eq!(buf.len(), 3); // tag + 2 bytes
581
582        // negative fixint (-32 to -1)
583        let buf = encode(&json!(-1));
584        assert_eq!(buf.len(), 1);
585        assert_eq!(buf[0], 0xff); // -1 as negative fixint
586
587        let buf = encode(&json!(-32));
588        assert_eq!(buf.len(), 1);
589        assert_eq!(buf[0], 0xe0); // -32 as negative fixint
590    }
591
592    #[test]
593    fn canonical_map_keys_sorted() {
594        // Keys should be lexicographically sorted in msgpack output.
595        // Encode with keys in non-sorted order in JSON source.
596        let buf = encode(&json!({"z": 1, "a": 2, "m": 3}));
597
598        // Parse map and verify keys come out sorted
599        let (count, mut pos) = map_header(&buf, 0).unwrap();
600        assert_eq!(count, 3);
601
602        let mut keys = Vec::new();
603        for _ in 0..count {
604            let key = read_str(&buf, pos).unwrap();
605            keys.push(key.to_string());
606            pos = skip_value(&buf, pos).unwrap(); // skip key
607            pos = skip_value(&buf, pos).unwrap(); // skip value
608        }
609        assert_eq!(keys, vec!["a", "m", "z"]);
610    }
611
612    #[test]
613    fn canonical_deterministic_bytes() {
614        // Same logical document encoded twice must produce identical bytes.
615        let doc1 = encode(&json!({"name": "alice", "age": 30, "active": true}));
616        let doc2 = encode(&json!({"age": 30, "active": true, "name": "alice"}));
617        assert_eq!(
618            doc1, doc2,
619            "same logical doc must produce identical msgpack bytes"
620        );
621    }
622
623    #[test]
624    fn canonical_nested_map_keys_sorted() {
625        let buf = encode(&json!({"outer": {"z": 1, "a": 2}}));
626        // Extract the inner map
627        let (start, _end) = crate::msgpack_scan::field::extract_field(&buf, 0, "outer").unwrap();
628
629        let (count, mut pos) = map_header(&buf, start).unwrap();
630        assert_eq!(count, 2);
631
632        let key1 = read_str(&buf, pos).unwrap();
633        pos = skip_value(&buf, pos).unwrap();
634        pos = skip_value(&buf, pos).unwrap();
635        let key2 = read_str(&buf, pos).unwrap();
636
637        assert_eq!(key1, "a");
638        assert_eq!(key2, "z");
639    }
640
641    // ── Fuzz-style tests ───────────────────────────────────────────────────
642
643    /// Feed every single-byte sequence through all reader functions. None may
644    /// panic — they must return `None` or a valid result.
645    #[test]
646    fn fuzz_all_single_byte_sequences() {
647        for byte in 0u8..=255 {
648            let buf = [byte];
649            // None of these must panic
650            let _ = skip_value(&buf, 0);
651            let _ = read_f64(&buf, 0);
652            let _ = read_i64(&buf, 0);
653            let _ = read_str(&buf, 0);
654            let _ = read_bool(&buf, 0);
655            let _ = read_null(&buf, 0);
656            let _ = map_header(&buf, 0);
657            let _ = array_header(&buf, 0);
658            let _ = read_value(&buf, 0);
659        }
660    }
661
662    /// Feed two-byte patterns to cover tag + partial payload (truncated).
663    #[test]
664    fn fuzz_two_byte_patterns() {
665        // Tags that expect more bytes than we provide
666        let tags_need_extra: &[u8] = &[
667            0xca, // FLOAT32 needs 4 more
668            0xcb, // FLOAT64 needs 8 more
669            0xcc, // UINT8 needs 1 more
670            0xcd, // UINT16 needs 2 more
671            0xce, // UINT32 needs 4 more
672            0xcf, // UINT64 needs 8 more
673            0xd0, // INT8 needs 1 more
674            0xd1, // INT16 needs 2 more
675            0xd2, // INT32 needs 4 more
676            0xd3, // INT64 needs 8 more
677            0xd9, // STR8 length byte then data
678            0xda, // STR16 2-byte length then data
679            0xdb, // STR32 4-byte length then data
680            0xdc, // ARRAY16 2-byte count then elements
681            0xdd, // ARRAY32 4-byte count then elements
682            0xde, // MAP16 2-byte count then pairs
683            0xdf, // MAP32 4-byte count then pairs
684            0xc4, // BIN8
685            0xc5, // BIN16
686            0xc6, // BIN32
687            0xd4, // FIXEXT1
688            0xd5, // FIXEXT2
689            0xd6, // FIXEXT4
690            0xd7, // FIXEXT8
691            0xd8, // FIXEXT16
692        ];
693        for &tag in tags_need_extra {
694            // Single byte (completely truncated payload)
695            let buf = [tag];
696            let _ = skip_value(&buf, 0);
697            let _ = read_f64(&buf, 0);
698            let _ = read_i64(&buf, 0);
699            let _ = read_value(&buf, 0);
700
701            // Tag + one garbage byte
702            for second in [0x00u8, 0x01, 0x7f, 0x80, 0xff] {
703                let buf = [tag, second];
704                let _ = skip_value(&buf, 0);
705                let _ = read_f64(&buf, 0);
706                let _ = read_i64(&buf, 0);
707                let _ = read_value(&buf, 0);
708            }
709        }
710    }
711
712    /// Deterministic pseudo-random byte sequences must not cause panics.
713    #[test]
714    fn fuzz_deterministic_random_payloads() {
715        // Generate deterministic sequences without external crates using a
716        // simple LCG (Knuth multiplicative hash).
717        let mut state: u64 = 0xdeadbeef_cafebabe;
718        let next = |s: &mut u64| -> u8 {
719            *s = s
720                .wrapping_mul(6364136223846793005)
721                .wrapping_add(1442695040888963407);
722            (*s >> 33) as u8
723        };
724
725        let mut buf = vec![0u8; 256];
726        for _ in 0..2000 {
727            // Randomize buffer length (1..=256) and contents
728            let len = (next(&mut state) as usize % 256) + 1;
729            for b in buf[..len].iter_mut() {
730                *b = next(&mut state);
731            }
732            let slice = &buf[..len];
733
734            // Try reading from multiple offsets
735            for offset in [0, 1, len / 2, len.saturating_sub(1)] {
736                let _ = skip_value(slice, offset);
737                let _ = read_f64(slice, offset);
738                let _ = read_i64(slice, offset);
739                let _ = read_str(slice, offset);
740                let _ = read_bool(slice, offset);
741                let _ = read_null(slice, offset);
742                let _ = map_header(slice, offset);
743                let _ = array_header(slice, offset);
744                let _ = read_value(slice, offset);
745            }
746        }
747    }
748
749    /// Truncate a valid msgpack buffer at every byte position.
750    /// All reader functions must return `None` — never panic.
751    #[test]
752    fn fuzz_truncated_valid_payloads() {
753        let docs = [
754            json!({"key": "value", "num": 42, "flag": true}),
755            json!({"nested": {"a": 1, "b": [1, 2, 3]}}),
756            json!([1, "two", 3.0, null, false]),
757            json!({"large": 9999999999_i64}),
758            json!({"float": 1.23456789}),
759        ];
760
761        for doc in &docs {
762            let full = encode(doc);
763            // Truncate at every position from 0 to full.len()-1
764            for truncate_at in 0..full.len() {
765                let slice = &full[..truncate_at];
766                // None of these may panic; result doesn't matter
767                let _ = skip_value(slice, 0);
768                let _ = read_f64(slice, 0);
769                let _ = read_i64(slice, 0);
770                let _ = read_str(slice, 0);
771                let _ = read_bool(slice, 0);
772                let _ = map_header(slice, 0);
773                let _ = array_header(slice, 0);
774                let _ = read_value(slice, 0);
775            }
776        }
777    }
778
779    /// The never-used 0xc1 tag must return `None` for all functions.
780    #[test]
781    fn fuzz_never_used_tag_c1() {
782        // 0xc1 is explicitly "never used" in the msgpack spec
783        let buf = [0xc1u8, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00];
784        assert_eq!(
785            skip_value(&buf, 0),
786            None,
787            "0xc1 must return None from skip_value"
788        );
789        assert_eq!(read_f64(&buf, 0), None);
790        assert_eq!(read_i64(&buf, 0), None);
791        assert_eq!(read_str(&buf, 0), None);
792        assert_eq!(read_bool(&buf, 0), None);
793        assert_eq!(map_header(&buf, 0), None);
794        assert_eq!(array_header(&buf, 0), None);
795        assert_eq!(read_value(&buf, 0), None);
796    }
797
798    /// All tag boundary bytes — test transitions at fixint/fixmap/fixarray/fixstr edges.
799    #[test]
800    fn fuzz_tag_boundaries() {
801        // Each entry: (tag, expected_skip_result)
802        // For tags that are self-contained single bytes, skip returns Some(1).
803        // For tags requiring more data we just verify no panic with empty tail.
804        let boundary_tags: &[(u8, bool)] = &[
805            (0x00, true),  // positive fixint 0
806            (0x7f, true),  // positive fixint 127
807            (0x80, true),  // fixmap length 0 (empty map)
808            (0x8f, false), // fixmap length 15 — needs 15 pairs
809            (0x90, true),  // fixarray length 0 (empty array)
810            (0x9f, false), // fixarray length 15 — needs 15 elements
811            (0xa0, true),  // fixstr length 0 (empty string)
812            (0xbf, false), // fixstr length 31 — needs 31 bytes after
813            (0xc0, true),  // nil
814            (0xc1, false), // never used — must return None
815            (0xc2, true),  // false
816            (0xc3, true),  // true
817            (0xe0, true),  // negative fixint -32
818            (0xff, true),  // negative fixint -1
819        ];
820        for &(tag, self_contained) in boundary_tags {
821            let buf = [tag; 64]; // fill with the same tag as padding
822            let result = skip_value(&buf, 0);
823            if self_contained {
824                assert!(result.is_some(), "tag 0x{tag:02x} should skip OK");
825            } else if tag == 0xc1 {
826                assert_eq!(result, None, "0xc1 must always return None");
827            }
828            // For non-self-contained tags with valid padding we just verify no panic.
829        }
830    }
831
832    /// Buffers where length fields claim enormous sizes but the buffer is tiny.
833    #[test]
834    fn fuzz_adversarial_length_fields() {
835        // STR32: tag 0xdb + 4-byte big-endian length claiming 0xffffffff bytes
836        let buf = [0xdbu8, 0xff, 0xff, 0xff, 0xff, b'x', b'y'];
837        assert_eq!(skip_value(&buf, 0), None);
838        assert_eq!(read_str(&buf, 0), None);
839
840        // STR16: tag 0xda + 2-byte length claiming 0xffff bytes
841        let buf = [0xdau8, 0xff, 0xff, b'x'];
842        assert_eq!(skip_value(&buf, 0), None);
843
844        // ARRAY32: claims 0xffffffff elements but buffer is empty after header
845        let buf = [0xddu8, 0xff, 0xff, 0xff, 0xff];
846        assert_eq!(skip_value(&buf, 0), None);
847
848        // MAP32: claims 0xffffffff pairs but buffer is empty after header
849        let buf = [0xdfu8, 0xff, 0xff, 0xff, 0xff];
850        assert_eq!(skip_value(&buf, 0), None);
851
852        // ARRAY16: claims 0xffff elements
853        let buf = [0xdcu8, 0xff, 0xff];
854        assert_eq!(skip_value(&buf, 0), None);
855
856        // MAP16: claims 0xffff pairs
857        let buf = [0xdeu8, 0xff, 0xff];
858        assert_eq!(skip_value(&buf, 0), None);
859
860        // BIN32: claims max length
861        let buf = [0xc6u8, 0xff, 0xff, 0xff, 0xff, 0x00];
862        assert_eq!(skip_value(&buf, 0), None);
863
864        // EXT32: claims max length
865        let buf = [0xc9u8, 0xff, 0xff, 0xff, 0xff, 0x01, 0x00];
866        assert_eq!(skip_value(&buf, 0), None);
867    }
868
869    /// Deeply nested maps/arrays must cause `skip_value` to return `None`
870    /// once nesting exceeds MAX_DEPTH (128).
871    #[test]
872    fn fuzz_malicious_nesting_depth() {
873        // Build a buffer with 200 levels of fixarray (each containing 1 element)
874        // fixarray tag for 1 element = 0x91
875        let depth = 200usize;
876        let mut buf = vec![0x91u8; depth]; // fixarray(1) — opens 1-element array
877        buf.push(0xc0u8); // nil at the innermost leaf
878
879        // skip_value must return None because nesting > MAX_DEPTH
880        assert_eq!(
881            skip_value(&buf, 0),
882            None,
883            "deeply nested arrays must return None to guard against stack overflow"
884        );
885
886        // Same with maps: fixmap(1) = 0x81, then a fixstr(1) key + value
887        // Build 200 levels of fixmap(1) — each pair is (fixstr key, next map)
888        let mut map_buf: Vec<u8> = Vec::new();
889        for i in 0..(depth as u8) {
890            map_buf.push(0x81); // fixmap(1)
891            map_buf.push(0xa1); // fixstr(1) key
892            map_buf.push(b'a'.wrapping_add(i % 26));
893            // value = next map (already pushed in next iteration), or nil at end
894        }
895        map_buf.push(0xc0); // nil leaf
896
897        assert_eq!(
898            skip_value(&map_buf, 0),
899            None,
900            "deeply nested maps must return None"
901        );
902    }
903
904    /// Verify skip_value correctly consumes exactly the right number of bytes
905    /// for all fixed-width numeric types and returns the correct next offset.
906    #[test]
907    fn fuzz_fixed_width_numeric_skip_offsets() {
908        // (tag, expected_total_bytes_consumed)
909        let cases: &[(u8, usize)] = &[
910            (0xca, 5), // FLOAT32: 1 tag + 4 data
911            (0xcb, 9), // FLOAT64: 1 tag + 8 data
912            (0xcc, 2), // UINT8
913            (0xcd, 3), // UINT16
914            (0xce, 5), // UINT32
915            (0xcf, 9), // UINT64
916            (0xd0, 2), // INT8
917            (0xd1, 3), // INT16
918            (0xd2, 5), // INT32
919            (0xd3, 9), // INT64
920        ];
921        for &(tag, size) in cases {
922            let mut buf = vec![0u8; size + 4]; // extra padding
923            buf[0] = tag;
924            let result = skip_value(&buf, 0);
925            assert_eq!(
926                result,
927                Some(size),
928                "tag 0x{tag:02x} should advance by {size} bytes"
929            );
930        }
931    }
932
933    /// Verify all fixext types consume the correct byte count.
934    #[test]
935    fn fuzz_fixext_skip_offsets() {
936        // (tag, expected_bytes_consumed)
937        let cases: &[(u8, usize)] = &[
938            (0xd4, 3),  // FIXEXT1: 1+1+1
939            (0xd5, 4),  // FIXEXT2: 1+1+2
940            (0xd6, 6),  // FIXEXT4: 1+1+4
941            (0xd7, 10), // FIXEXT8: 1+1+8
942            (0xd8, 18), // FIXEXT16: 1+1+16
943        ];
944        for &(tag, size) in cases {
945            let mut buf = vec![0u8; size + 4];
946            buf[0] = tag;
947            let result = skip_value(&buf, 0);
948            assert_eq!(
949                result,
950                Some(size),
951                "fixext tag 0x{tag:02x} should advance by {size} bytes"
952            );
953        }
954    }
955
956    /// Out-of-bounds offset must return `None` — not panic.
957    #[test]
958    fn fuzz_out_of_bounds_offset() {
959        let buf = encode(&json!({"x": 1}));
960        let way_out = buf.len() + 1000;
961        assert_eq!(skip_value(&buf, way_out), None);
962        assert_eq!(read_f64(&buf, way_out), None);
963        assert_eq!(read_i64(&buf, way_out), None);
964        assert_eq!(read_str(&buf, way_out), None);
965        assert_eq!(read_bool(&buf, way_out), None);
966        assert_eq!(map_header(&buf, way_out), None);
967        assert_eq!(array_header(&buf, way_out), None);
968        assert_eq!(read_value(&buf, way_out), None);
969    }
970
971    /// Empty buffer must return `None` for all functions that can.
972    #[test]
973    fn fuzz_empty_buffer() {
974        let buf: &[u8] = &[];
975        assert_eq!(skip_value(buf, 0), None);
976        assert_eq!(read_f64(buf, 0), None);
977        assert_eq!(read_i64(buf, 0), None);
978        assert_eq!(read_str(buf, 0), None);
979        assert_eq!(read_bool(buf, 0), None);
980        assert!(!read_null(buf, 0)); // returns bool, not Option
981        assert_eq!(map_header(buf, 0), None);
982        assert_eq!(array_header(buf, 0), None);
983        assert_eq!(read_value(buf, 0), None);
984    }
985}