Skip to main content

nodedb_query/msgpack_scan/
reader.rs

1// SPDX-License-Identifier: Apache-2.0
2
3//! Low-level MessagePack binary reader: tag parsing, value skipping, and typed reads.
4//!
5//! All functions operate on `&[u8]` with explicit offsets. Zero allocation,
6//! zero copy. Returns `None` on truncated/invalid data — never panics.
7
8use std::str;
9
10// ── Tag constants ──────────────────────────────────────────────────────
11
12const NIL: u8 = 0xc0;
13const FALSE: u8 = 0xc2;
14const TRUE: u8 = 0xc3;
15const BIN8: u8 = 0xc4;
16const BIN16: u8 = 0xc5;
17const BIN32: u8 = 0xc6;
18const EXT8: u8 = 0xc7;
19const EXT16: u8 = 0xc8;
20const EXT32: u8 = 0xc9;
21const FLOAT32: u8 = 0xca;
22const FLOAT64: u8 = 0xcb;
23const UINT8: u8 = 0xcc;
24const UINT16: u8 = 0xcd;
25const UINT32: u8 = 0xce;
26const UINT64: u8 = 0xcf;
27const INT8: u8 = 0xd0;
28const INT16: u8 = 0xd1;
29const INT32: u8 = 0xd2;
30const INT64: u8 = 0xd3;
31const FIXEXT1: u8 = 0xd4;
32const FIXEXT2: u8 = 0xd5;
33const FIXEXT4: u8 = 0xd6;
34const FIXEXT8: u8 = 0xd7;
35const FIXEXT16: u8 = 0xd8;
36const STR8: u8 = 0xd9;
37const STR16: u8 = 0xda;
38const STR32: u8 = 0xdb;
39const ARRAY16: u8 = 0xdc;
40const ARRAY32: u8 = 0xdd;
41const MAP16: u8 = 0xde;
42const MAP32: u8 = 0xdf;
43
44/// Maximum nesting depth to prevent stack overflow on malicious payloads.
45const MAX_DEPTH: u16 = 128;
46
47// ── Inline helpers ─────────────────────────────────────────────────────
48
49#[inline(always)]
50fn get(buf: &[u8], pos: usize) -> Option<u8> {
51    buf.get(pos).copied()
52}
53
54#[inline(always)]
55fn read_u16_be(buf: &[u8], pos: usize) -> Option<u16> {
56    let bytes = buf.get(pos..pos + 2)?;
57    Some(u16::from_be_bytes([bytes[0], bytes[1]]))
58}
59
60#[inline(always)]
61fn read_u32_be(buf: &[u8], pos: usize) -> Option<u32> {
62    let bytes = buf.get(pos..pos + 4)?;
63    Some(u32::from_be_bytes([bytes[0], bytes[1], bytes[2], bytes[3]]))
64}
65
66#[inline(always)]
67fn read_u64_be(buf: &[u8], pos: usize) -> Option<u64> {
68    let bytes = buf.get(pos..pos + 8)?;
69    Some(u64::from_be_bytes([
70        bytes[0], bytes[1], bytes[2], bytes[3], bytes[4], bytes[5], bytes[6], bytes[7],
71    ]))
72}
73
74/// Return `Some(offset + size)` only if the buffer has enough bytes.
75#[inline(always)]
76fn checked_advance(buf: &[u8], offset: usize, size: usize) -> Option<usize> {
77    let end = offset + size;
78    if end <= buf.len() { Some(end) } else { None }
79}
80
81// ── skip_value ─────────────────────────────────────────────────────────
82
83/// Advance past the MessagePack value starting at `offset`, returning the
84/// offset of the next value. Returns `None` if the buffer is truncated or
85/// nesting exceeds `MAX_DEPTH`.
86///
87/// This is the performance-critical primitive. It never allocates.
88pub fn skip_value(buf: &[u8], offset: usize) -> Option<usize> {
89    skip_value_depth(buf, offset, 0)
90}
91
92fn skip_value_depth(buf: &[u8], offset: usize, depth: u16) -> Option<usize> {
93    if depth > MAX_DEPTH {
94        return None;
95    }
96    let tag = get(buf, offset)?;
97    match tag {
98        // positive fixint (0x00..=0x7f)
99        0x00..=0x7f => Some(offset + 1),
100        // negative fixint (0xe0..=0xff)
101        0xe0..=0xff => Some(offset + 1),
102        // nil, false, true
103        NIL | FALSE | TRUE => Some(offset + 1),
104
105        // fixmap (0x80..=0x8f)
106        0x80..=0x8f => {
107            let count = (tag & 0x0f) as usize;
108            skip_n_pairs(buf, offset + 1, count, depth)
109        }
110        MAP16 => {
111            let count = read_u16_be(buf, offset + 1)? as usize;
112            skip_n_pairs(buf, offset + 3, count, depth)
113        }
114        MAP32 => {
115            let count = read_u32_be(buf, offset + 1)? as usize;
116            skip_n_pairs(buf, offset + 5, count, depth)
117        }
118
119        // fixarray (0x90..=0x9f)
120        0x90..=0x9f => {
121            let count = (tag & 0x0f) as usize;
122            skip_n_values(buf, offset + 1, count, depth)
123        }
124        ARRAY16 => {
125            let count = read_u16_be(buf, offset + 1)? as usize;
126            skip_n_values(buf, offset + 3, count, depth)
127        }
128        ARRAY32 => {
129            let count = read_u32_be(buf, offset + 1)? as usize;
130            skip_n_values(buf, offset + 5, count, depth)
131        }
132
133        // fixstr (0xa0..=0xbf)
134        0xa0..=0xbf => {
135            let len = (tag & 0x1f) as usize;
136            checked_advance(buf, offset, 1 + len)
137        }
138        STR8 => {
139            let len = get(buf, offset + 1)? as usize;
140            checked_advance(buf, offset, 2 + len)
141        }
142        STR16 => {
143            let len = read_u16_be(buf, offset + 1)? as usize;
144            checked_advance(buf, offset, 3 + len)
145        }
146        STR32 => {
147            let len = read_u32_be(buf, offset + 1)? as usize;
148            checked_advance(buf, offset, 5 + len)
149        }
150
151        // bin
152        BIN8 => {
153            let len = get(buf, offset + 1)? as usize;
154            checked_advance(buf, offset, 2 + len)
155        }
156        BIN16 => {
157            let len = read_u16_be(buf, offset + 1)? as usize;
158            checked_advance(buf, offset, 3 + len)
159        }
160        BIN32 => {
161            let len = read_u32_be(buf, offset + 1)? as usize;
162            checked_advance(buf, offset, 5 + len)
163        }
164
165        // fixed-width numerics (bounds-check against buffer length)
166        FLOAT32 => checked_advance(buf, offset, 5),
167        FLOAT64 => checked_advance(buf, offset, 9),
168        UINT8 | INT8 => checked_advance(buf, offset, 2),
169        UINT16 | INT16 => checked_advance(buf, offset, 3),
170        UINT32 | INT32 => checked_advance(buf, offset, 5),
171        UINT64 | INT64 => checked_advance(buf, offset, 9),
172
173        // ext
174        FIXEXT1 => checked_advance(buf, offset, 3),
175        FIXEXT2 => checked_advance(buf, offset, 4),
176        FIXEXT4 => checked_advance(buf, offset, 6),
177        FIXEXT8 => checked_advance(buf, offset, 10),
178        FIXEXT16 => checked_advance(buf, offset, 18),
179        EXT8 => {
180            let len = get(buf, offset + 1)? as usize;
181            checked_advance(buf, offset, 3 + len)
182        }
183        EXT16 => {
184            let len = read_u16_be(buf, offset + 1)? as usize;
185            checked_advance(buf, offset, 4 + len)
186        }
187        EXT32 => {
188            let len = read_u32_be(buf, offset + 1)? as usize;
189            checked_advance(buf, offset, 6 + len)
190        }
191
192        // 0xc1 is never used in the spec
193        _ => None,
194    }
195}
196
197fn skip_n_values(buf: &[u8], mut pos: usize, count: usize, depth: u16) -> Option<usize> {
198    for _ in 0..count {
199        pos = skip_value_depth(buf, pos, depth + 1)?;
200    }
201    Some(pos)
202}
203
204fn skip_n_pairs(buf: &[u8], mut pos: usize, count: usize, depth: u16) -> Option<usize> {
205    for _ in 0..count {
206        pos = skip_value_depth(buf, pos, depth + 1)?; // key
207        pos = skip_value_depth(buf, pos, depth + 1)?; // value
208    }
209    Some(pos)
210}
211
212// ── Typed reads ────────────────────────────────────────────────────────
213
214/// Read an f64 from the value at `offset`. Handles float32, float64,
215/// and all integer types (coerced to f64).
216pub fn read_f64(buf: &[u8], offset: usize) -> Option<f64> {
217    let tag = get(buf, offset)?;
218    match tag {
219        // positive fixint
220        0x00..=0x7f => Some(tag as f64),
221        // negative fixint
222        0xe0..=0xff => Some((tag as i8) as f64),
223        FLOAT64 => {
224            let bits = read_u64_be(buf, offset + 1)?;
225            Some(f64::from_bits(bits))
226        }
227        FLOAT32 => {
228            let bits = read_u32_be(buf, offset + 1)?;
229            Some(f32::from_bits(bits) as f64)
230        }
231        UINT8 => Some(get(buf, offset + 1)? as f64),
232        UINT16 => Some(read_u16_be(buf, offset + 1)? as f64),
233        UINT32 => Some(read_u32_be(buf, offset + 1)? as f64),
234        UINT64 => Some(read_u64_be(buf, offset + 1)? as f64),
235        INT8 => Some(get(buf, offset + 1)? as i8 as f64),
236        INT16 => Some(read_u16_be(buf, offset + 1)? as i16 as f64),
237        INT32 => Some(read_u32_be(buf, offset + 1)? as i32 as f64),
238        INT64 => Some(read_u64_be(buf, offset + 1)? as i64 as f64),
239        _ => None,
240    }
241}
242
243/// Read an i64 from the value at `offset`. Handles all integer types.
244/// Floats return `None` — use `read_f64` for those.
245pub fn read_i64(buf: &[u8], offset: usize) -> Option<i64> {
246    let tag = get(buf, offset)?;
247    match tag {
248        0x00..=0x7f => Some(tag as i64),
249        0xe0..=0xff => Some((tag as i8) as i64),
250        UINT8 => Some(get(buf, offset + 1)? as i64),
251        UINT16 => Some(read_u16_be(buf, offset + 1)? as i64),
252        UINT32 => Some(read_u32_be(buf, offset + 1)? as i64),
253        UINT64 => {
254            let v = read_u64_be(buf, offset + 1)?;
255            Some(v as i64)
256        }
257        INT8 => Some(get(buf, offset + 1)? as i8 as i64),
258        INT16 => Some(read_u16_be(buf, offset + 1)? as i16 as i64),
259        INT32 => Some(read_u32_be(buf, offset + 1)? as i32 as i64),
260        INT64 => {
261            let v = read_u64_be(buf, offset + 1)?;
262            Some(v as i64)
263        }
264        _ => None,
265    }
266}
267
268/// Read a string slice from the value at `offset`. Zero-copy — borrows
269/// directly from the input buffer. Returns `None` for non-string types
270/// or invalid UTF-8.
271pub fn read_str(buf: &[u8], offset: usize) -> Option<&str> {
272    let (start, len) = str_bounds(buf, offset)?;
273    let bytes = buf.get(start..start + len)?;
274    str::from_utf8(bytes).ok()
275}
276
277/// Read a string slice at `*off`, advancing `*off` past it. Zero-copy.
278/// Returns `None` for non-string types, invalid UTF-8, or truncated input.
279pub fn read_str_advance<'a>(buf: &'a [u8], off: &mut usize) -> Option<&'a str> {
280    let (start, len) = str_bounds(buf, *off)?;
281    let bytes = buf.get(start..start + len)?;
282    let s = str::from_utf8(bytes).ok()?;
283    *off = start + len;
284    Some(s)
285}
286
287/// Read a `bin` value at `*off`, advancing `*off` past it. Zero-copy —
288/// the returned slice borrows from `buf`. Returns `None` for non-bin tags
289/// or truncated input.
290pub fn read_bin_advance<'a>(buf: &'a [u8], off: &mut usize) -> Option<&'a [u8]> {
291    let tag = get(buf, *off)?;
292    let (len, header) = match tag {
293        BIN8 => (get(buf, *off + 1)? as usize, 2),
294        BIN16 => (read_u16_be(buf, *off + 1)? as usize, 3),
295        BIN32 => (read_u32_be(buf, *off + 1)? as usize, 5),
296        _ => return None,
297    };
298    let start = *off + header;
299    let end = start + len;
300    let data = buf.get(start..end)?;
301    *off = end;
302    Some(data)
303}
304
305/// Read an unsigned integer that fits in a `u32` at `*off`, advancing `*off`
306/// past it. Accepts positive fixint, uint8, uint16, uint32. Returns `None`
307/// for negative, signed-typed, oversized (uint64), or non-integer values.
308pub fn read_u32_advance(buf: &[u8], off: &mut usize) -> Option<u32> {
309    let tag = get(buf, *off)?;
310    match tag {
311        0x00..=0x7f => {
312            *off += 1;
313            Some(tag as u32)
314        }
315        UINT8 => {
316            let v = get(buf, *off + 1)? as u32;
317            *off += 2;
318            Some(v)
319        }
320        UINT16 => {
321            let v = read_u16_be(buf, *off + 1)? as u32;
322            *off += 3;
323            Some(v)
324        }
325        UINT32 => {
326            let v = read_u32_be(buf, *off + 1)?;
327            *off += 5;
328            Some(v)
329        }
330        _ => None,
331    }
332}
333
334/// Return `(data_start, byte_len)` for the string at `offset` without
335/// validating UTF-8. Used internally for key comparison.
336pub(crate) fn str_bounds(buf: &[u8], offset: usize) -> Option<(usize, usize)> {
337    let tag = get(buf, offset)?;
338    match tag {
339        0xa0..=0xbf => {
340            let len = (tag & 0x1f) as usize;
341            Some((offset + 1, len))
342        }
343        STR8 => {
344            let len = get(buf, offset + 1)? as usize;
345            Some((offset + 2, len))
346        }
347        STR16 => {
348            let len = read_u16_be(buf, offset + 1)? as usize;
349            Some((offset + 3, len))
350        }
351        STR32 => {
352            let len = read_u32_be(buf, offset + 1)? as usize;
353            Some((offset + 5, len))
354        }
355        _ => None,
356    }
357}
358
359/// Read a boolean from the value at `offset`.
360pub fn read_bool(buf: &[u8], offset: usize) -> Option<bool> {
361    match get(buf, offset)? {
362        TRUE => Some(true),
363        FALSE => Some(false),
364        _ => None,
365    }
366}
367
368/// Check if the value at `offset` is nil.
369pub fn read_null(buf: &[u8], offset: usize) -> bool {
370    get(buf, offset) == Some(NIL)
371}
372
373/// Read a scalar msgpack value at `offset` into `nodedb_types::Value`.
374///
375/// Handles null, bool, integers, floats, and strings. For complex types
376/// (array, map, bin, ext), returns `None` — caller should use
377/// `json_from_msgpack` for those.
378pub fn read_value(buf: &[u8], offset: usize) -> Option<nodedb_types::Value> {
379    let tag = get(buf, offset)?;
380    match tag {
381        NIL => Some(nodedb_types::Value::Null),
382        TRUE => Some(nodedb_types::Value::Bool(true)),
383        FALSE => Some(nodedb_types::Value::Bool(false)),
384        // Integers
385        0x00..=0x7f => Some(nodedb_types::Value::Integer(tag as i64)),
386        0xe0..=0xff => Some(nodedb_types::Value::Integer((tag as i8) as i64)),
387        UINT8 => Some(nodedb_types::Value::Integer(get(buf, offset + 1)? as i64)),
388        UINT16 => Some(nodedb_types::Value::Integer(
389            read_u16_be(buf, offset + 1)? as i64
390        )),
391        UINT32 => Some(nodedb_types::Value::Integer(
392            read_u32_be(buf, offset + 1)? as i64
393        )),
394        UINT64 => Some(nodedb_types::Value::Integer(
395            read_u64_be(buf, offset + 1)? as i64
396        )),
397        INT8 => Some(nodedb_types::Value::Integer(
398            get(buf, offset + 1)? as i8 as i64
399        )),
400        INT16 => Some(nodedb_types::Value::Integer(
401            read_u16_be(buf, offset + 1)? as i16 as i64,
402        )),
403        INT32 => Some(nodedb_types::Value::Integer(
404            read_u32_be(buf, offset + 1)? as i32 as i64,
405        )),
406        INT64 => Some(nodedb_types::Value::Integer(
407            read_u64_be(buf, offset + 1)? as i64
408        )),
409        // Floats
410        FLOAT32 => {
411            let bits = read_u32_be(buf, offset + 1)?;
412            Some(nodedb_types::Value::Float(f32::from_bits(bits) as f64))
413        }
414        FLOAT64 => {
415            let bits = read_u64_be(buf, offset + 1)?;
416            Some(nodedb_types::Value::Float(f64::from_bits(bits)))
417        }
418        // Strings
419        0xa0..=0xbf | STR8 | STR16 | STR32 => {
420            read_str(buf, offset).map(|s| nodedb_types::Value::String(s.to_string()))
421        }
422        _ => None,
423    }
424}
425
426/// Return the number of key-value pairs and the offset of the first pair,
427/// for the map starting at `offset`. Returns `None` if not a map.
428pub fn map_header(buf: &[u8], offset: usize) -> Option<(usize, usize)> {
429    let tag = get(buf, offset)?;
430    match tag {
431        0x80..=0x8f => Some(((tag & 0x0f) as usize, offset + 1)),
432        MAP16 => Some((read_u16_be(buf, offset + 1)? as usize, offset + 3)),
433        MAP32 => Some((read_u32_be(buf, offset + 1)? as usize, offset + 5)),
434        _ => None,
435    }
436}
437
438/// Return the number of elements and the offset of the first element,
439/// for the array starting at `offset`. Returns `None` if not an array.
440pub fn array_header(buf: &[u8], offset: usize) -> Option<(usize, usize)> {
441    let tag = get(buf, offset)?;
442    match tag {
443        0x90..=0x9f => Some(((tag & 0x0f) as usize, offset + 1)),
444        ARRAY16 => Some((read_u16_be(buf, offset + 1)? as usize, offset + 3)),
445        ARRAY32 => Some((read_u32_be(buf, offset + 1)? as usize, offset + 5)),
446        _ => None,
447    }
448}
449
450#[cfg(test)]
451mod tests {
452    use super::*;
453
454    use serde_json::json;
455
456    /// Helper: encode a serde_json::Value to MessagePack bytes.
457    fn encode(v: &serde_json::Value) -> Vec<u8> {
458        nodedb_types::json_msgpack::json_to_msgpack(v).expect("encode")
459    }
460
461    #[test]
462    fn skip_positive_fixint() {
463        let buf = [0x05, 0xff];
464        assert_eq!(skip_value(&buf, 0), Some(1));
465    }
466
467    #[test]
468    fn skip_negative_fixint() {
469        let buf = [0xe0, 0x00];
470        assert_eq!(skip_value(&buf, 0), Some(1));
471    }
472
473    #[test]
474    fn skip_nil_bool() {
475        assert_eq!(skip_value(&[NIL], 0), Some(1));
476        assert_eq!(skip_value(&[TRUE], 0), Some(1));
477        assert_eq!(skip_value(&[FALSE], 0), Some(1));
478    }
479
480    #[test]
481    fn skip_float64() {
482        let buf = encode(&json!(9.81));
483        assert_eq!(skip_value(&buf, 0), Some(buf.len()));
484    }
485
486    #[test]
487    fn skip_string() {
488        let buf = encode(&json!("hello"));
489        assert_eq!(skip_value(&buf, 0), Some(buf.len()));
490    }
491
492    #[test]
493    fn skip_map() {
494        let buf = encode(&json!({"a": 1, "b": 2}));
495        assert_eq!(skip_value(&buf, 0), Some(buf.len()));
496    }
497
498    #[test]
499    fn skip_nested_array() {
500        let buf = encode(&json!([[1, 2], [3, 4, 5]]));
501        assert_eq!(skip_value(&buf, 0), Some(buf.len()));
502    }
503
504    #[test]
505    fn skip_truncated_returns_none() {
506        let buf = [FLOAT64, 0x40]; // truncated float64
507        assert_eq!(skip_value(&buf, 0), None);
508    }
509
510    #[test]
511    fn read_f64_fixint() {
512        assert_eq!(read_f64(&[42u8], 0), Some(42.0));
513    }
514
515    #[test]
516    fn read_f64_negative_fixint() {
517        assert_eq!(read_f64(&[0xffu8], 0), Some(-1.0));
518    }
519
520    #[test]
521    fn read_f64_float64() {
522        let buf = encode(&json!(std::f64::consts::PI));
523        assert_eq!(read_f64(&buf, 0), Some(std::f64::consts::PI));
524    }
525
526    #[test]
527    fn read_f64_uint16() {
528        let buf = encode(&json!(1000));
529        assert_eq!(read_f64(&buf, 0), Some(1000.0));
530    }
531
532    #[test]
533    fn read_i64_values() {
534        assert_eq!(read_i64(&[42u8], 0), Some(42));
535        assert_eq!(read_i64(&[0xffu8], 0), Some(-1));
536
537        let buf = encode(&json!(300));
538        assert_eq!(read_i64(&buf, 0), Some(300));
539
540        let buf = encode(&json!(-500));
541        assert_eq!(read_i64(&buf, 0), Some(-500));
542    }
543
544    #[test]
545    fn read_str_fixstr() {
546        let buf = encode(&json!("hi"));
547        assert_eq!(read_str(&buf, 0), Some("hi"));
548    }
549
550    #[test]
551    fn read_str_str8() {
552        let long = "a".repeat(40);
553        let buf = encode(&json!(long));
554        assert_eq!(read_str(&buf, 0), Some(long.as_str()));
555    }
556
557    #[test]
558    fn read_bool_values() {
559        assert_eq!(read_bool(&[TRUE], 0), Some(true));
560        assert_eq!(read_bool(&[FALSE], 0), Some(false));
561        assert_eq!(read_bool(&[NIL], 0), None);
562    }
563
564    #[test]
565    fn read_null_check() {
566        assert!(read_null(&[NIL], 0));
567        assert!(!read_null(&[TRUE], 0));
568    }
569
570    #[test]
571    fn map_header_fixmap() {
572        let buf = encode(&json!({"x": 1}));
573        let (count, _data_offset) = map_header(&buf, 0).unwrap();
574        assert_eq!(count, 1);
575    }
576
577    #[test]
578    fn skip_bin() {
579        // bin8: 0xc4, len=3, 3 bytes of data
580        let buf = [BIN8, 3, 0xde, 0xad, 0xbe, 0xff];
581        assert_eq!(skip_value(&buf, 0), Some(5));
582    }
583
584    #[test]
585    fn skip_ext() {
586        // fixext1: 0xd4, type byte, 1 data byte
587        let buf = [FIXEXT1, 0x01, 0xab, 0xff];
588        assert_eq!(skip_value(&buf, 0), Some(3));
589    }
590
591    #[test]
592    fn read_f64_float32() {
593        // json! always produces f64, so test float32 with raw bytes
594        // float32 tag (0xca) + 1.5 in IEEE 754 big-endian
595        let buf = [0xca, 0x3f, 0xc0, 0x00, 0x00];
596        let val = read_f64(&buf, 0).unwrap();
597        assert!((val - 1.5).abs() < 1e-6);
598    }
599
600    #[test]
601    fn skip_empty_containers() {
602        // empty fixmap
603        assert_eq!(skip_value(&[0x80], 0), Some(1));
604        // empty fixarray
605        assert_eq!(skip_value(&[0x90], 0), Some(1));
606    }
607
608    #[test]
609    fn array_header_fixarray() {
610        let buf = encode(&json!([10, 20, 30]));
611        let (count, data_offset) = array_header(&buf, 0).unwrap();
612        assert_eq!(count, 3);
613        assert_eq!(read_i64(&buf, data_offset), Some(10));
614    }
615
616    // ── Canonical encoding guarantee tests ─────────────────────────────
617
618    #[test]
619    fn canonical_integer_smallest_representation() {
620        // fixint (0-127): single byte
621        let buf = encode(&json!(42));
622        assert_eq!(buf.len(), 1);
623        assert_eq!(buf[0], 42);
624
625        // 0 as fixint
626        let buf = encode(&json!(0));
627        assert_eq!(buf.len(), 1);
628        assert_eq!(buf[0], 0);
629
630        // 127 as fixint
631        let buf = encode(&json!(127));
632        assert_eq!(buf.len(), 1);
633        assert_eq!(buf[0], 127);
634
635        // 128 should NOT be fixint. JSON parses as i64, so zerompk uses
636        // int16 (0xd1) since 128 > i8::MAX. This is canonical for signed path.
637        let buf = encode(&json!(128));
638        assert_eq!(buf[0], 0xd1); // int16 tag
639        assert_eq!(buf.len(), 3); // tag + 2 bytes
640
641        // negative fixint (-32 to -1)
642        let buf = encode(&json!(-1));
643        assert_eq!(buf.len(), 1);
644        assert_eq!(buf[0], 0xff); // -1 as negative fixint
645
646        let buf = encode(&json!(-32));
647        assert_eq!(buf.len(), 1);
648        assert_eq!(buf[0], 0xe0); // -32 as negative fixint
649    }
650
651    #[test]
652    fn canonical_map_keys_sorted() {
653        // Keys should be lexicographically sorted in msgpack output.
654        // Encode with keys in non-sorted order in JSON source.
655        let buf = encode(&json!({"z": 1, "a": 2, "m": 3}));
656
657        // Parse map and verify keys come out sorted
658        let (count, mut pos) = map_header(&buf, 0).unwrap();
659        assert_eq!(count, 3);
660
661        let mut keys = Vec::new();
662        for _ in 0..count {
663            let key = read_str(&buf, pos).unwrap();
664            keys.push(key.to_string());
665            pos = skip_value(&buf, pos).unwrap(); // skip key
666            pos = skip_value(&buf, pos).unwrap(); // skip value
667        }
668        assert_eq!(keys, vec!["a", "m", "z"]);
669    }
670
671    #[test]
672    fn canonical_deterministic_bytes() {
673        // Same logical document encoded twice must produce identical bytes.
674        let doc1 = encode(&json!({"name": "alice", "age": 30, "active": true}));
675        let doc2 = encode(&json!({"age": 30, "active": true, "name": "alice"}));
676        assert_eq!(
677            doc1, doc2,
678            "same logical doc must produce identical msgpack bytes"
679        );
680    }
681
682    #[test]
683    fn canonical_nested_map_keys_sorted() {
684        let buf = encode(&json!({"outer": {"z": 1, "a": 2}}));
685        // Extract the inner map
686        let (start, _end) = crate::msgpack_scan::field::extract_field(&buf, 0, "outer").unwrap();
687
688        let (count, mut pos) = map_header(&buf, start).unwrap();
689        assert_eq!(count, 2);
690
691        let key1 = read_str(&buf, pos).unwrap();
692        pos = skip_value(&buf, pos).unwrap();
693        pos = skip_value(&buf, pos).unwrap();
694        let key2 = read_str(&buf, pos).unwrap();
695
696        assert_eq!(key1, "a");
697        assert_eq!(key2, "z");
698    }
699
700    // ── Fuzz-style tests ───────────────────────────────────────────────────
701
702    /// Feed every single-byte sequence through all reader functions. None may
703    /// panic — they must return `None` or a valid result.
704    #[test]
705    fn fuzz_all_single_byte_sequences() {
706        for byte in 0u8..=255 {
707            let buf = [byte];
708            // None of these must panic
709            let _ = skip_value(&buf, 0);
710            let _ = read_f64(&buf, 0);
711            let _ = read_i64(&buf, 0);
712            let _ = read_str(&buf, 0);
713            let _ = read_bool(&buf, 0);
714            let _ = read_null(&buf, 0);
715            let _ = map_header(&buf, 0);
716            let _ = array_header(&buf, 0);
717            let _ = read_value(&buf, 0);
718        }
719    }
720
721    /// Feed two-byte patterns to cover tag + partial payload (truncated).
722    #[test]
723    fn fuzz_two_byte_patterns() {
724        // Tags that expect more bytes than we provide
725        let tags_need_extra: &[u8] = &[
726            0xca, // FLOAT32 needs 4 more
727            0xcb, // FLOAT64 needs 8 more
728            0xcc, // UINT8 needs 1 more
729            0xcd, // UINT16 needs 2 more
730            0xce, // UINT32 needs 4 more
731            0xcf, // UINT64 needs 8 more
732            0xd0, // INT8 needs 1 more
733            0xd1, // INT16 needs 2 more
734            0xd2, // INT32 needs 4 more
735            0xd3, // INT64 needs 8 more
736            0xd9, // STR8 length byte then data
737            0xda, // STR16 2-byte length then data
738            0xdb, // STR32 4-byte length then data
739            0xdc, // ARRAY16 2-byte count then elements
740            0xdd, // ARRAY32 4-byte count then elements
741            0xde, // MAP16 2-byte count then pairs
742            0xdf, // MAP32 4-byte count then pairs
743            0xc4, // BIN8
744            0xc5, // BIN16
745            0xc6, // BIN32
746            0xd4, // FIXEXT1
747            0xd5, // FIXEXT2
748            0xd6, // FIXEXT4
749            0xd7, // FIXEXT8
750            0xd8, // FIXEXT16
751        ];
752        for &tag in tags_need_extra {
753            // Single byte (completely truncated payload)
754            let buf = [tag];
755            let _ = skip_value(&buf, 0);
756            let _ = read_f64(&buf, 0);
757            let _ = read_i64(&buf, 0);
758            let _ = read_value(&buf, 0);
759
760            // Tag + one garbage byte
761            for second in [0x00u8, 0x01, 0x7f, 0x80, 0xff] {
762                let buf = [tag, second];
763                let _ = skip_value(&buf, 0);
764                let _ = read_f64(&buf, 0);
765                let _ = read_i64(&buf, 0);
766                let _ = read_value(&buf, 0);
767            }
768        }
769    }
770
771    /// Deterministic pseudo-random byte sequences must not cause panics.
772    #[test]
773    fn fuzz_deterministic_random_payloads() {
774        // Generate deterministic sequences without external crates using a
775        // simple LCG (Knuth multiplicative hash).
776        let mut state: u64 = 0xdeadbeef_cafebabe;
777        let next = |s: &mut u64| -> u8 {
778            *s = s
779                .wrapping_mul(6364136223846793005)
780                .wrapping_add(1442695040888963407);
781            (*s >> 33) as u8
782        };
783
784        let mut buf = vec![0u8; 256];
785        for _ in 0..2000 {
786            // Randomize buffer length (1..=256) and contents
787            let len = (next(&mut state) as usize % 256) + 1;
788            for b in buf[..len].iter_mut() {
789                *b = next(&mut state);
790            }
791            let slice = &buf[..len];
792
793            // Try reading from multiple offsets
794            for offset in [0, 1, len / 2, len.saturating_sub(1)] {
795                let _ = skip_value(slice, offset);
796                let _ = read_f64(slice, offset);
797                let _ = read_i64(slice, offset);
798                let _ = read_str(slice, offset);
799                let _ = read_bool(slice, offset);
800                let _ = read_null(slice, offset);
801                let _ = map_header(slice, offset);
802                let _ = array_header(slice, offset);
803                let _ = read_value(slice, offset);
804            }
805        }
806    }
807
808    /// Truncate a valid msgpack buffer at every byte position.
809    /// All reader functions must return `None` — never panic.
810    #[test]
811    fn fuzz_truncated_valid_payloads() {
812        let docs = [
813            json!({"key": "value", "num": 42, "flag": true}),
814            json!({"nested": {"a": 1, "b": [1, 2, 3]}}),
815            json!([1, "two", 3.0, null, false]),
816            json!({"large": 9999999999_i64}),
817            json!({"float": 1.23456789}),
818        ];
819
820        for doc in &docs {
821            let full = encode(doc);
822            // Truncate at every position from 0 to full.len()-1
823            for truncate_at in 0..full.len() {
824                let slice = &full[..truncate_at];
825                // None of these may panic; result doesn't matter
826                let _ = skip_value(slice, 0);
827                let _ = read_f64(slice, 0);
828                let _ = read_i64(slice, 0);
829                let _ = read_str(slice, 0);
830                let _ = read_bool(slice, 0);
831                let _ = map_header(slice, 0);
832                let _ = array_header(slice, 0);
833                let _ = read_value(slice, 0);
834            }
835        }
836    }
837
838    /// The never-used 0xc1 tag must return `None` for all functions.
839    #[test]
840    fn fuzz_never_used_tag_c1() {
841        // 0xc1 is explicitly "never used" in the msgpack spec
842        let buf = [0xc1u8, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00];
843        assert_eq!(
844            skip_value(&buf, 0),
845            None,
846            "0xc1 must return None from skip_value"
847        );
848        assert_eq!(read_f64(&buf, 0), None);
849        assert_eq!(read_i64(&buf, 0), None);
850        assert_eq!(read_str(&buf, 0), None);
851        assert_eq!(read_bool(&buf, 0), None);
852        assert_eq!(map_header(&buf, 0), None);
853        assert_eq!(array_header(&buf, 0), None);
854        assert_eq!(read_value(&buf, 0), None);
855    }
856
857    /// All tag boundary bytes — test transitions at fixint/fixmap/fixarray/fixstr edges.
858    #[test]
859    fn fuzz_tag_boundaries() {
860        // Each entry: (tag, expected_skip_result)
861        // For tags that are self-contained single bytes, skip returns Some(1).
862        // For tags requiring more data we just verify no panic with empty tail.
863        let boundary_tags: &[(u8, bool)] = &[
864            (0x00, true),  // positive fixint 0
865            (0x7f, true),  // positive fixint 127
866            (0x80, true),  // fixmap length 0 (empty map)
867            (0x8f, false), // fixmap length 15 — needs 15 pairs
868            (0x90, true),  // fixarray length 0 (empty array)
869            (0x9f, false), // fixarray length 15 — needs 15 elements
870            (0xa0, true),  // fixstr length 0 (empty string)
871            (0xbf, false), // fixstr length 31 — needs 31 bytes after
872            (0xc0, true),  // nil
873            (0xc1, false), // never used — must return None
874            (0xc2, true),  // false
875            (0xc3, true),  // true
876            (0xe0, true),  // negative fixint -32
877            (0xff, true),  // negative fixint -1
878        ];
879        for &(tag, self_contained) in boundary_tags {
880            let buf = [tag; 64]; // fill with the same tag as padding
881            let result = skip_value(&buf, 0);
882            if self_contained {
883                assert!(result.is_some(), "tag 0x{tag:02x} should skip OK");
884            } else if tag == 0xc1 {
885                assert_eq!(result, None, "0xc1 must always return None");
886            }
887            // For non-self-contained tags with valid padding we just verify no panic.
888        }
889    }
890
891    /// Buffers where length fields claim enormous sizes but the buffer is tiny.
892    #[test]
893    fn fuzz_adversarial_length_fields() {
894        // STR32: tag 0xdb + 4-byte big-endian length claiming 0xffffffff bytes
895        let buf = [0xdbu8, 0xff, 0xff, 0xff, 0xff, b'x', b'y'];
896        assert_eq!(skip_value(&buf, 0), None);
897        assert_eq!(read_str(&buf, 0), None);
898
899        // STR16: tag 0xda + 2-byte length claiming 0xffff bytes
900        let buf = [0xdau8, 0xff, 0xff, b'x'];
901        assert_eq!(skip_value(&buf, 0), None);
902
903        // ARRAY32: claims 0xffffffff elements but buffer is empty after header
904        let buf = [0xddu8, 0xff, 0xff, 0xff, 0xff];
905        assert_eq!(skip_value(&buf, 0), None);
906
907        // MAP32: claims 0xffffffff pairs but buffer is empty after header
908        let buf = [0xdfu8, 0xff, 0xff, 0xff, 0xff];
909        assert_eq!(skip_value(&buf, 0), None);
910
911        // ARRAY16: claims 0xffff elements
912        let buf = [0xdcu8, 0xff, 0xff];
913        assert_eq!(skip_value(&buf, 0), None);
914
915        // MAP16: claims 0xffff pairs
916        let buf = [0xdeu8, 0xff, 0xff];
917        assert_eq!(skip_value(&buf, 0), None);
918
919        // BIN32: claims max length
920        let buf = [0xc6u8, 0xff, 0xff, 0xff, 0xff, 0x00];
921        assert_eq!(skip_value(&buf, 0), None);
922
923        // EXT32: claims max length
924        let buf = [0xc9u8, 0xff, 0xff, 0xff, 0xff, 0x01, 0x00];
925        assert_eq!(skip_value(&buf, 0), None);
926    }
927
928    /// Deeply nested maps/arrays must cause `skip_value` to return `None`
929    /// once nesting exceeds MAX_DEPTH (128).
930    #[test]
931    fn fuzz_malicious_nesting_depth() {
932        // Build a buffer with 200 levels of fixarray (each containing 1 element)
933        // fixarray tag for 1 element = 0x91
934        let depth = 200usize;
935        let mut buf = vec![0x91u8; depth]; // fixarray(1) — opens 1-element array
936        buf.push(0xc0u8); // nil at the innermost leaf
937
938        // skip_value must return None because nesting > MAX_DEPTH
939        assert_eq!(
940            skip_value(&buf, 0),
941            None,
942            "deeply nested arrays must return None to guard against stack overflow"
943        );
944
945        // Same with maps: fixmap(1) = 0x81, then a fixstr(1) key + value
946        // Build 200 levels of fixmap(1) — each pair is (fixstr key, next map)
947        let mut map_buf: Vec<u8> = Vec::new();
948        for i in 0..(depth as u8) {
949            map_buf.push(0x81); // fixmap(1)
950            map_buf.push(0xa1); // fixstr(1) key
951            map_buf.push(b'a'.wrapping_add(i % 26));
952            // value = next map (already pushed in next iteration), or nil at end
953        }
954        map_buf.push(0xc0); // nil leaf
955
956        assert_eq!(
957            skip_value(&map_buf, 0),
958            None,
959            "deeply nested maps must return None"
960        );
961    }
962
963    /// Verify skip_value correctly consumes exactly the right number of bytes
964    /// for all fixed-width numeric types and returns the correct next offset.
965    #[test]
966    fn fuzz_fixed_width_numeric_skip_offsets() {
967        // (tag, expected_total_bytes_consumed)
968        let cases: &[(u8, usize)] = &[
969            (0xca, 5), // FLOAT32: 1 tag + 4 data
970            (0xcb, 9), // FLOAT64: 1 tag + 8 data
971            (0xcc, 2), // UINT8
972            (0xcd, 3), // UINT16
973            (0xce, 5), // UINT32
974            (0xcf, 9), // UINT64
975            (0xd0, 2), // INT8
976            (0xd1, 3), // INT16
977            (0xd2, 5), // INT32
978            (0xd3, 9), // INT64
979        ];
980        for &(tag, size) in cases {
981            let mut buf = vec![0u8; size + 4]; // extra padding
982            buf[0] = tag;
983            let result = skip_value(&buf, 0);
984            assert_eq!(
985                result,
986                Some(size),
987                "tag 0x{tag:02x} should advance by {size} bytes"
988            );
989        }
990    }
991
992    /// Verify all fixext types consume the correct byte count.
993    #[test]
994    fn fuzz_fixext_skip_offsets() {
995        // (tag, expected_bytes_consumed)
996        let cases: &[(u8, usize)] = &[
997            (0xd4, 3),  // FIXEXT1: 1+1+1
998            (0xd5, 4),  // FIXEXT2: 1+1+2
999            (0xd6, 6),  // FIXEXT4: 1+1+4
1000            (0xd7, 10), // FIXEXT8: 1+1+8
1001            (0xd8, 18), // FIXEXT16: 1+1+16
1002        ];
1003        for &(tag, size) in cases {
1004            let mut buf = vec![0u8; size + 4];
1005            buf[0] = tag;
1006            let result = skip_value(&buf, 0);
1007            assert_eq!(
1008                result,
1009                Some(size),
1010                "fixext tag 0x{tag:02x} should advance by {size} bytes"
1011            );
1012        }
1013    }
1014
1015    /// Out-of-bounds offset must return `None` — not panic.
1016    #[test]
1017    fn fuzz_out_of_bounds_offset() {
1018        let buf = encode(&json!({"x": 1}));
1019        let way_out = buf.len() + 1000;
1020        assert_eq!(skip_value(&buf, way_out), None);
1021        assert_eq!(read_f64(&buf, way_out), None);
1022        assert_eq!(read_i64(&buf, way_out), None);
1023        assert_eq!(read_str(&buf, way_out), None);
1024        assert_eq!(read_bool(&buf, way_out), None);
1025        assert_eq!(map_header(&buf, way_out), None);
1026        assert_eq!(array_header(&buf, way_out), None);
1027        assert_eq!(read_value(&buf, way_out), None);
1028    }
1029
1030    #[test]
1031    fn read_bin_advance_all_widths() {
1032        // bin8: 0xc4, len=3
1033        let mut off = 0;
1034        let buf = [BIN8, 3, 0xde, 0xad, 0xbe, 0xff];
1035        assert_eq!(
1036            read_bin_advance(&buf, &mut off),
1037            Some(&[0xde, 0xad, 0xbe][..])
1038        );
1039        assert_eq!(off, 5);
1040
1041        // bin16: 0xc5, big-endian len=4
1042        let mut off = 0;
1043        let buf = [BIN16, 0x00, 0x04, 0x01, 0x02, 0x03, 0x04];
1044        assert_eq!(
1045            read_bin_advance(&buf, &mut off),
1046            Some(&[0x01, 0x02, 0x03, 0x04][..])
1047        );
1048        assert_eq!(off, 7);
1049
1050        // bin32: 0xc6, big-endian len=2
1051        let mut off = 0;
1052        let buf = [BIN32, 0x00, 0x00, 0x00, 0x02, 0xaa, 0xbb];
1053        assert_eq!(read_bin_advance(&buf, &mut off), Some(&[0xaa, 0xbb][..]));
1054        assert_eq!(off, 7);
1055
1056        // Non-bin tag returns None and does not advance.
1057        let mut off = 0;
1058        let buf = [0xc0u8]; // nil
1059        assert_eq!(read_bin_advance(&buf, &mut off), None);
1060        assert_eq!(off, 0);
1061
1062        // Truncated returns None.
1063        let mut off = 0;
1064        let buf = [BIN8, 5, 0x01]; // claims 5 bytes, only 1 present
1065        assert_eq!(read_bin_advance(&buf, &mut off), None);
1066    }
1067
1068    #[test]
1069    fn read_u32_advance_all_widths() {
1070        // positive fixint
1071        let mut off = 0;
1072        assert_eq!(read_u32_advance(&[42u8], &mut off), Some(42));
1073        assert_eq!(off, 1);
1074
1075        // uint8
1076        let mut off = 0;
1077        assert_eq!(read_u32_advance(&[UINT8, 200], &mut off), Some(200));
1078        assert_eq!(off, 2);
1079
1080        // uint16
1081        let mut off = 0;
1082        let buf = [UINT16, 0x12, 0x34];
1083        assert_eq!(read_u32_advance(&buf, &mut off), Some(0x1234));
1084        assert_eq!(off, 3);
1085
1086        // uint32
1087        let mut off = 0;
1088        let buf = [UINT32, 0xde, 0xad, 0xbe, 0xef];
1089        assert_eq!(read_u32_advance(&buf, &mut off), Some(0xdeadbeef));
1090        assert_eq!(off, 5);
1091
1092        // negative fixint, int*, uint64, float, etc. all rejected
1093        let mut off = 0;
1094        assert_eq!(read_u32_advance(&[0xffu8], &mut off), None); // negative fixint
1095        assert_eq!(off, 0);
1096        let mut off = 0;
1097        assert_eq!(read_u32_advance(&[INT8, 5], &mut off), None);
1098        let mut off = 0;
1099        assert_eq!(
1100            read_u32_advance(&[UINT64, 0, 0, 0, 0, 0, 0, 0, 1], &mut off),
1101            None
1102        );
1103
1104        // Truncated returns None.
1105        let mut off = 0;
1106        assert_eq!(read_u32_advance(&[UINT16, 0x12], &mut off), None);
1107    }
1108
1109    #[test]
1110    fn read_str_advance_basic() {
1111        // fixstr "hi"
1112        let mut off = 0;
1113        let buf = encode(&json!("hi"));
1114        assert_eq!(read_str_advance(&buf, &mut off), Some("hi"));
1115        assert_eq!(off, buf.len());
1116
1117        // Sequential reads
1118        let buf = encode(&json!(["one", "two"]));
1119        let (count, mut off) = array_header(&buf, 0).unwrap();
1120        assert_eq!(count, 2);
1121        assert_eq!(read_str_advance(&buf, &mut off), Some("one"));
1122        assert_eq!(read_str_advance(&buf, &mut off), Some("two"));
1123        assert_eq!(off, buf.len());
1124
1125        // Non-string returns None.
1126        let mut off = 0;
1127        assert_eq!(read_str_advance(&[NIL], &mut off), None);
1128        assert_eq!(off, 0);
1129    }
1130
1131    /// Empty buffer must return `None` for all functions that can.
1132    #[test]
1133    fn fuzz_empty_buffer() {
1134        let buf: &[u8] = &[];
1135        assert_eq!(skip_value(buf, 0), None);
1136        assert_eq!(read_f64(buf, 0), None);
1137        assert_eq!(read_i64(buf, 0), None);
1138        assert_eq!(read_str(buf, 0), None);
1139        assert_eq!(read_bool(buf, 0), None);
1140        assert!(!read_null(buf, 0)); // returns bool, not Option
1141        assert_eq!(map_header(buf, 0), None);
1142        assert_eq!(array_header(buf, 0), None);
1143        assert_eq!(read_value(buf, 0), None);
1144    }
1145}