facet_msgpack/
parser.rs

1//! MsgPack parser implementing FormatParser.
2//!
3//! This implements full FormatParser support for MsgPack deserialization,
4//! with Tier-2 JIT support for compatible types.
5
6extern crate alloc;
7
8use alloc::{borrow::Cow, format, vec::Vec};
9
10use crate::error::{MsgPackError, codes};
11use facet_format::{
12    ContainerKind, FieldEvidence, FieldKey, FieldLocationHint, FormatParser, ParseEvent,
13    ProbeStream, ScalarValue,
14};
15
16// MsgPack format constants
17const MSGPACK_NIL: u8 = 0xc0;
18const MSGPACK_FALSE: u8 = 0xc2;
19const MSGPACK_TRUE: u8 = 0xc3;
20const MSGPACK_BIN8: u8 = 0xc4;
21const MSGPACK_BIN16: u8 = 0xc5;
22const MSGPACK_BIN32: u8 = 0xc6;
23const MSGPACK_FLOAT32: u8 = 0xca;
24const MSGPACK_FLOAT64: u8 = 0xcb;
25const MSGPACK_UINT8: u8 = 0xcc;
26const MSGPACK_UINT16: u8 = 0xcd;
27const MSGPACK_UINT32: u8 = 0xce;
28const MSGPACK_UINT64: u8 = 0xcf;
29const MSGPACK_INT8: u8 = 0xd0;
30const MSGPACK_INT16: u8 = 0xd1;
31const MSGPACK_INT32: u8 = 0xd2;
32const MSGPACK_INT64: u8 = 0xd3;
33const MSGPACK_STR8: u8 = 0xd9;
34const MSGPACK_STR16: u8 = 0xda;
35const MSGPACK_STR32: u8 = 0xdb;
36const MSGPACK_ARRAY16: u8 = 0xdc;
37const MSGPACK_ARRAY32: u8 = 0xdd;
38const MSGPACK_MAP16: u8 = 0xde;
39const MSGPACK_MAP32: u8 = 0xdf;
40
41const MSGPACK_POSFIXINT_MAX: u8 = 0x7f;
42const MSGPACK_FIXMAP_MIN: u8 = 0x80;
43const MSGPACK_FIXMAP_MAX: u8 = 0x8f;
44const MSGPACK_FIXARRAY_MIN: u8 = 0x90;
45const MSGPACK_FIXARRAY_MAX: u8 = 0x9f;
46const MSGPACK_FIXSTR_MIN: u8 = 0xa0;
47const MSGPACK_FIXSTR_MAX: u8 = 0xbf;
48const MSGPACK_NEGFIXINT_MIN: u8 = 0xe0;
49
50/// MsgPack parser for deserialization.
51///
52/// Supports both Tier-0 (FormatParser) and Tier-2 (JIT) modes.
53pub struct MsgPackParser<'de> {
54    input: &'de [u8],
55    pos: usize,
56    /// Stack tracking nested containers and their remaining items
57    stack: Vec<ContextState>,
58    /// Cached event for peek_event
59    event_peek: Option<ParseEvent<'de>>,
60}
61
62#[derive(Debug, Clone, Copy)]
63enum ContextState {
64    /// Inside a map, waiting for a key (remaining pairs)
65    MapKey { remaining: usize },
66    /// Inside a map, waiting for a value (remaining pairs after this one)
67    MapValue { remaining: usize },
68    /// Inside an array (remaining items)
69    Array { remaining: usize },
70}
71
72impl<'de> MsgPackParser<'de> {
73    /// Create a new MsgPack parser from input bytes.
74    pub fn new(input: &'de [u8]) -> Self {
75        Self {
76            input,
77            pos: 0,
78            stack: Vec::new(),
79            event_peek: None,
80        }
81    }
82
83    /// Peek at the next byte without consuming it.
84    fn peek_byte(&self) -> Result<u8, MsgPackError> {
85        self.input
86            .get(self.pos)
87            .copied()
88            .ok_or_else(|| MsgPackError::from_code(codes::UNEXPECTED_EOF, self.pos))
89    }
90
91    /// Read a single byte.
92    fn read_byte(&mut self) -> Result<u8, MsgPackError> {
93        let byte = self.peek_byte()?;
94        self.pos += 1;
95        Ok(byte)
96    }
97
98    /// Read N bytes as a slice.
99    fn read_bytes(&mut self, n: usize) -> Result<&'de [u8], MsgPackError> {
100        if self.pos + n > self.input.len() {
101            return Err(MsgPackError::from_code(codes::UNEXPECTED_EOF, self.pos));
102        }
103        let slice = &self.input[self.pos..self.pos + n];
104        self.pos += n;
105        Ok(slice)
106    }
107
108    /// Read a u16 in big-endian.
109    fn read_u16(&mut self) -> Result<u16, MsgPackError> {
110        let bytes = self.read_bytes(2)?;
111        Ok(u16::from_be_bytes([bytes[0], bytes[1]]))
112    }
113
114    /// Read a u32 in big-endian.
115    fn read_u32(&mut self) -> Result<u32, MsgPackError> {
116        let bytes = self.read_bytes(4)?;
117        Ok(u32::from_be_bytes([bytes[0], bytes[1], bytes[2], bytes[3]]))
118    }
119
120    /// Read a u64 in big-endian.
121    fn read_u64(&mut self) -> Result<u64, MsgPackError> {
122        let bytes = self.read_bytes(8)?;
123        Ok(u64::from_be_bytes([
124            bytes[0], bytes[1], bytes[2], bytes[3], bytes[4], bytes[5], bytes[6], bytes[7],
125        ]))
126    }
127
128    /// Read an i8.
129    fn read_i8(&mut self) -> Result<i8, MsgPackError> {
130        Ok(self.read_byte()? as i8)
131    }
132
133    /// Read an i16 in big-endian.
134    fn read_i16(&mut self) -> Result<i16, MsgPackError> {
135        let bytes = self.read_bytes(2)?;
136        Ok(i16::from_be_bytes([bytes[0], bytes[1]]))
137    }
138
139    /// Read an i32 in big-endian.
140    fn read_i32(&mut self) -> Result<i32, MsgPackError> {
141        let bytes = self.read_bytes(4)?;
142        Ok(i32::from_be_bytes([bytes[0], bytes[1], bytes[2], bytes[3]]))
143    }
144
145    /// Read an i64 in big-endian.
146    fn read_i64(&mut self) -> Result<i64, MsgPackError> {
147        let bytes = self.read_bytes(8)?;
148        Ok(i64::from_be_bytes([
149            bytes[0], bytes[1], bytes[2], bytes[3], bytes[4], bytes[5], bytes[6], bytes[7],
150        ]))
151    }
152
153    /// Read an f32 in big-endian.
154    fn read_f32(&mut self) -> Result<f32, MsgPackError> {
155        let bytes = self.read_bytes(4)?;
156        Ok(f32::from_be_bytes([bytes[0], bytes[1], bytes[2], bytes[3]]))
157    }
158
159    /// Read an f64 in big-endian.
160    fn read_f64(&mut self) -> Result<f64, MsgPackError> {
161        let bytes = self.read_bytes(8)?;
162        Ok(f64::from_be_bytes([
163            bytes[0], bytes[1], bytes[2], bytes[3], bytes[4], bytes[5], bytes[6], bytes[7],
164        ]))
165    }
166
167    /// Read a string length based on prefix.
168    fn read_str_len(&mut self, prefix: u8) -> Result<usize, MsgPackError> {
169        match prefix {
170            MSGPACK_FIXSTR_MIN..=MSGPACK_FIXSTR_MAX => Ok((prefix & 0x1f) as usize),
171            MSGPACK_STR8 => Ok(self.read_byte()? as usize),
172            MSGPACK_STR16 => Ok(self.read_u16()? as usize),
173            MSGPACK_STR32 => Ok(self.read_u32()? as usize),
174            _ => Err(MsgPackError {
175                code: codes::EXPECTED_INT,
176                pos: self.pos,
177                message: format!("expected string, got 0x{:02x}", prefix),
178            }),
179        }
180    }
181
182    /// Read a string value.
183    fn read_string(&mut self) -> Result<Cow<'de, str>, MsgPackError> {
184        let prefix = self.read_byte()?;
185        let len = self.read_str_len(prefix)?;
186        let bytes = self.read_bytes(len)?;
187        core::str::from_utf8(bytes)
188            .map(Cow::Borrowed)
189            .map_err(|_| MsgPackError {
190                code: codes::EXPECTED_INT,
191                pos: self.pos - len,
192                message: "invalid UTF-8 in string".into(),
193            })
194    }
195
196    /// Read an array length.
197    fn read_array_len(&mut self, prefix: u8) -> Result<usize, MsgPackError> {
198        match prefix {
199            MSGPACK_FIXARRAY_MIN..=MSGPACK_FIXARRAY_MAX => Ok((prefix & 0x0f) as usize),
200            MSGPACK_ARRAY16 => Ok(self.read_u16()? as usize),
201            MSGPACK_ARRAY32 => Ok(self.read_u32()? as usize),
202            _ => Err(MsgPackError::from_code(codes::EXPECTED_ARRAY, self.pos)),
203        }
204    }
205
206    /// Read a map length.
207    fn read_map_len(&mut self, prefix: u8) -> Result<usize, MsgPackError> {
208        match prefix {
209            MSGPACK_FIXMAP_MIN..=MSGPACK_FIXMAP_MAX => Ok((prefix & 0x0f) as usize),
210            MSGPACK_MAP16 => Ok(self.read_u16()? as usize),
211            MSGPACK_MAP32 => Ok(self.read_u32()? as usize),
212            _ => Err(MsgPackError {
213                code: codes::EXPECTED_INT,
214                pos: self.pos,
215                message: format!("expected map, got 0x{:02x}", prefix),
216            }),
217        }
218    }
219
220    /// Finish processing a value and update parent container state.
221    fn finish_value(&mut self) {
222        if let Some(context) = self.stack.last_mut() {
223            match context {
224                ContextState::MapValue { remaining } => {
225                    // Finished a value, go back to expecting a key (or end)
226                    *context = ContextState::MapKey {
227                        remaining: *remaining,
228                    };
229                }
230                ContextState::MapKey { remaining } => {
231                    // This shouldn't happen (keys transition to values), but handle it
232                    if *remaining > 0 {
233                        *remaining -= 1;
234                    }
235                }
236                ContextState::Array { remaining } => {
237                    if *remaining > 0 {
238                        *remaining -= 1;
239                    }
240                }
241            }
242        }
243    }
244
245    /// Produce the next parse event.
246    fn produce_event(&mut self) -> Result<Option<ParseEvent<'de>>, MsgPackError> {
247        // Check if we need to emit container end events
248        // This can happen when a container has been fully consumed
249        if let Some(context) = self.stack.last() {
250            match context {
251                ContextState::MapKey { remaining: 0 } => {
252                    self.stack.pop();
253                    self.finish_value();
254                    return Ok(Some(ParseEvent::StructEnd));
255                }
256                ContextState::Array { remaining: 0 } => {
257                    self.stack.pop();
258                    self.finish_value();
259                    return Ok(Some(ParseEvent::SequenceEnd));
260                }
261                _ => {}
262            }
263        }
264
265        // Check if we're at EOF
266        if self.pos >= self.input.len() {
267            return Ok(None);
268        }
269
270        // Determine what to do based on context
271        // Check if we're expecting a map key and get the remaining count
272        let expecting_key_remaining = match self.stack.last() {
273            Some(ContextState::MapKey { remaining }) => Some(*remaining),
274            _ => None,
275        };
276
277        if let Some(remaining) = expecting_key_remaining {
278            // We expect a key (string)
279            let key = self.read_string()?;
280
281            // Update the stack: decrement remaining and transition to expecting value
282            let new_remaining = remaining - 1;
283            if let Some(state) = self.stack.last_mut() {
284                *state = ContextState::MapValue {
285                    remaining: new_remaining,
286                };
287            }
288
289            return Ok(Some(ParseEvent::FieldKey(FieldKey::new(
290                key,
291                FieldLocationHint::KeyValue,
292            ))));
293        }
294
295        // Parse the next value
296        let prefix = self.read_byte()?;
297
298        match prefix {
299            // Nil
300            MSGPACK_NIL => {
301                self.finish_value();
302                Ok(Some(ParseEvent::Scalar(ScalarValue::Null)))
303            }
304
305            // Boolean
306            MSGPACK_FALSE => {
307                self.finish_value();
308                Ok(Some(ParseEvent::Scalar(ScalarValue::Bool(false))))
309            }
310            MSGPACK_TRUE => {
311                self.finish_value();
312                Ok(Some(ParseEvent::Scalar(ScalarValue::Bool(true))))
313            }
314
315            // Positive fixint (0x00-0x7f)
316            0x00..=MSGPACK_POSFIXINT_MAX => {
317                self.finish_value();
318                Ok(Some(ParseEvent::Scalar(ScalarValue::U64(prefix as u64))))
319            }
320
321            // Negative fixint (0xe0-0xff)
322            MSGPACK_NEGFIXINT_MIN..=0xff => {
323                self.finish_value();
324                Ok(Some(ParseEvent::Scalar(ScalarValue::I64(
325                    prefix as i8 as i64,
326                ))))
327            }
328
329            // Unsigned integers
330            MSGPACK_UINT8 => {
331                let v = self.read_byte()? as u64;
332                self.finish_value();
333                Ok(Some(ParseEvent::Scalar(ScalarValue::U64(v))))
334            }
335            MSGPACK_UINT16 => {
336                let v = self.read_u16()? as u64;
337                self.finish_value();
338                Ok(Some(ParseEvent::Scalar(ScalarValue::U64(v))))
339            }
340            MSGPACK_UINT32 => {
341                let v = self.read_u32()? as u64;
342                self.finish_value();
343                Ok(Some(ParseEvent::Scalar(ScalarValue::U64(v))))
344            }
345            MSGPACK_UINT64 => {
346                let v = self.read_u64()?;
347                self.finish_value();
348                Ok(Some(ParseEvent::Scalar(ScalarValue::U64(v))))
349            }
350
351            // Signed integers
352            MSGPACK_INT8 => {
353                let v = self.read_i8()? as i64;
354                self.finish_value();
355                Ok(Some(ParseEvent::Scalar(ScalarValue::I64(v))))
356            }
357            MSGPACK_INT16 => {
358                let v = self.read_i16()? as i64;
359                self.finish_value();
360                Ok(Some(ParseEvent::Scalar(ScalarValue::I64(v))))
361            }
362            MSGPACK_INT32 => {
363                let v = self.read_i32()? as i64;
364                self.finish_value();
365                Ok(Some(ParseEvent::Scalar(ScalarValue::I64(v))))
366            }
367            MSGPACK_INT64 => {
368                let v = self.read_i64()?;
369                self.finish_value();
370                Ok(Some(ParseEvent::Scalar(ScalarValue::I64(v))))
371            }
372
373            // Floats
374            MSGPACK_FLOAT32 => {
375                let v = self.read_f32()? as f64;
376                self.finish_value();
377                Ok(Some(ParseEvent::Scalar(ScalarValue::F64(v))))
378            }
379            MSGPACK_FLOAT64 => {
380                let v = self.read_f64()?;
381                self.finish_value();
382                Ok(Some(ParseEvent::Scalar(ScalarValue::F64(v))))
383            }
384
385            // Strings (fixstr, str8, str16, str32)
386            MSGPACK_FIXSTR_MIN..=MSGPACK_FIXSTR_MAX
387            | MSGPACK_STR8
388            | MSGPACK_STR16
389            | MSGPACK_STR32 => {
390                let len = self.read_str_len(prefix)?;
391                let bytes = self.read_bytes(len)?;
392                let s = core::str::from_utf8(bytes)
393                    .map(Cow::Borrowed)
394                    .map_err(|_| MsgPackError {
395                        code: codes::EXPECTED_INT,
396                        pos: self.pos - len,
397                        message: "invalid UTF-8 in string".into(),
398                    })?;
399                self.finish_value();
400                Ok(Some(ParseEvent::Scalar(ScalarValue::Str(s))))
401            }
402
403            // Binary data
404            MSGPACK_BIN8 => {
405                let len = self.read_byte()? as usize;
406                let bytes = self.read_bytes(len)?;
407                self.finish_value();
408                Ok(Some(ParseEvent::Scalar(ScalarValue::Bytes(Cow::Borrowed(
409                    bytes,
410                )))))
411            }
412            MSGPACK_BIN16 => {
413                let len = self.read_u16()? as usize;
414                let bytes = self.read_bytes(len)?;
415                self.finish_value();
416                Ok(Some(ParseEvent::Scalar(ScalarValue::Bytes(Cow::Borrowed(
417                    bytes,
418                )))))
419            }
420            MSGPACK_BIN32 => {
421                let len = self.read_u32()? as usize;
422                let bytes = self.read_bytes(len)?;
423                self.finish_value();
424                Ok(Some(ParseEvent::Scalar(ScalarValue::Bytes(Cow::Borrowed(
425                    bytes,
426                )))))
427            }
428
429            // Arrays
430            MSGPACK_FIXARRAY_MIN..=MSGPACK_FIXARRAY_MAX | MSGPACK_ARRAY16 | MSGPACK_ARRAY32 => {
431                let len = self.read_array_len(prefix)?;
432                self.stack.push(ContextState::Array { remaining: len });
433                Ok(Some(ParseEvent::SequenceStart(ContainerKind::Array)))
434            }
435
436            // Maps
437            MSGPACK_FIXMAP_MIN..=MSGPACK_FIXMAP_MAX | MSGPACK_MAP16 | MSGPACK_MAP32 => {
438                let len = self.read_map_len(prefix)?;
439                self.stack.push(ContextState::MapKey { remaining: len });
440                Ok(Some(ParseEvent::StructStart(ContainerKind::Object)))
441            }
442
443            // Unsupported types (ext, etc.)
444            _ => Err(MsgPackError {
445                code: codes::UNSUPPORTED,
446                pos: self.pos - 1,
447                message: format!("unsupported MsgPack type: 0x{:02x}", prefix),
448            }),
449        }
450    }
451
452    /// Skip a complete value (used for skip_value and probing).
453    fn skip_value_internal(&mut self) -> Result<(), MsgPackError> {
454        let prefix = self.read_byte()?;
455
456        match prefix {
457            // Nil, booleans - already consumed
458            MSGPACK_NIL | MSGPACK_FALSE | MSGPACK_TRUE => Ok(()),
459
460            // Positive fixint - already consumed
461            0x00..=MSGPACK_POSFIXINT_MAX => Ok(()),
462
463            // Negative fixint - already consumed
464            MSGPACK_NEGFIXINT_MIN..=0xff => Ok(()),
465
466            // Unsigned integers
467            MSGPACK_UINT8 => {
468                self.pos += 1;
469                Ok(())
470            }
471            MSGPACK_UINT16 => {
472                self.pos += 2;
473                Ok(())
474            }
475            MSGPACK_UINT32 => {
476                self.pos += 4;
477                Ok(())
478            }
479            MSGPACK_UINT64 => {
480                self.pos += 8;
481                Ok(())
482            }
483
484            // Signed integers
485            MSGPACK_INT8 => {
486                self.pos += 1;
487                Ok(())
488            }
489            MSGPACK_INT16 => {
490                self.pos += 2;
491                Ok(())
492            }
493            MSGPACK_INT32 => {
494                self.pos += 4;
495                Ok(())
496            }
497            MSGPACK_INT64 => {
498                self.pos += 8;
499                Ok(())
500            }
501
502            // Floats
503            MSGPACK_FLOAT32 => {
504                self.pos += 4;
505                Ok(())
506            }
507            MSGPACK_FLOAT64 => {
508                self.pos += 8;
509                Ok(())
510            }
511
512            // Strings
513            MSGPACK_FIXSTR_MIN..=MSGPACK_FIXSTR_MAX => {
514                let len = (prefix & 0x1f) as usize;
515                self.pos += len;
516                Ok(())
517            }
518            MSGPACK_STR8 => {
519                let len = self.read_byte()? as usize;
520                self.pos += len;
521                Ok(())
522            }
523            MSGPACK_STR16 => {
524                let len = self.read_u16()? as usize;
525                self.pos += len;
526                Ok(())
527            }
528            MSGPACK_STR32 => {
529                let len = self.read_u32()? as usize;
530                self.pos += len;
531                Ok(())
532            }
533
534            // Binary
535            MSGPACK_BIN8 => {
536                let len = self.read_byte()? as usize;
537                self.pos += len;
538                Ok(())
539            }
540            MSGPACK_BIN16 => {
541                let len = self.read_u16()? as usize;
542                self.pos += len;
543                Ok(())
544            }
545            MSGPACK_BIN32 => {
546                let len = self.read_u32()? as usize;
547                self.pos += len;
548                Ok(())
549            }
550
551            // Arrays - skip all elements
552            MSGPACK_FIXARRAY_MIN..=MSGPACK_FIXARRAY_MAX => {
553                let len = (prefix & 0x0f) as usize;
554                for _ in 0..len {
555                    self.skip_value_internal()?;
556                }
557                Ok(())
558            }
559            MSGPACK_ARRAY16 => {
560                let len = self.read_u16()? as usize;
561                for _ in 0..len {
562                    self.skip_value_internal()?;
563                }
564                Ok(())
565            }
566            MSGPACK_ARRAY32 => {
567                let len = self.read_u32()? as usize;
568                for _ in 0..len {
569                    self.skip_value_internal()?;
570                }
571                Ok(())
572            }
573
574            // Maps - skip all key-value pairs
575            MSGPACK_FIXMAP_MIN..=MSGPACK_FIXMAP_MAX => {
576                let len = (prefix & 0x0f) as usize;
577                for _ in 0..len {
578                    self.skip_value_internal()?; // key
579                    self.skip_value_internal()?; // value
580                }
581                Ok(())
582            }
583            MSGPACK_MAP16 => {
584                let len = self.read_u16()? as usize;
585                for _ in 0..len {
586                    self.skip_value_internal()?; // key
587                    self.skip_value_internal()?; // value
588                }
589                Ok(())
590            }
591            MSGPACK_MAP32 => {
592                let len = self.read_u32()? as usize;
593                for _ in 0..len {
594                    self.skip_value_internal()?; // key
595                    self.skip_value_internal()?; // value
596                }
597                Ok(())
598            }
599
600            // Extension types - skip
601            0xc7 => {
602                // ext8
603                let len = self.read_byte()? as usize;
604                self.pos += 1 + len; // type byte + data
605                Ok(())
606            }
607            0xc8 => {
608                // ext16
609                let len = self.read_u16()? as usize;
610                self.pos += 1 + len;
611                Ok(())
612            }
613            0xc9 => {
614                // ext32
615                let len = self.read_u32()? as usize;
616                self.pos += 1 + len;
617                Ok(())
618            }
619            0xd4 => {
620                // fixext1
621                self.pos += 2;
622                Ok(())
623            }
624            0xd5 => {
625                // fixext2
626                self.pos += 3;
627                Ok(())
628            }
629            0xd6 => {
630                // fixext4
631                self.pos += 5;
632                Ok(())
633            }
634            0xd7 => {
635                // fixext8
636                self.pos += 9;
637                Ok(())
638            }
639            0xd8 => {
640                // fixext16
641                self.pos += 17;
642                Ok(())
643            }
644
645            _ => Err(MsgPackError {
646                code: codes::UNSUPPORTED,
647                pos: self.pos - 1,
648                message: format!("unsupported MsgPack type: 0x{:02x}", prefix),
649            }),
650        }
651    }
652
653    /// Build probe evidence by scanning ahead in a map.
654    fn build_probe(&self) -> Result<Vec<FieldEvidence<'de>>, MsgPackError> {
655        // Create a temporary parser to scan ahead
656        let mut probe_pos = self.pos;
657        let mut evidence = Vec::new();
658
659        // If we've peeked a StructStart, we need to scan the map that follows
660        // Check if next byte is a map prefix
661        if probe_pos >= self.input.len() {
662            return Ok(evidence);
663        }
664
665        let prefix = self.input[probe_pos];
666        let map_len = match prefix {
667            MSGPACK_FIXMAP_MIN..=MSGPACK_FIXMAP_MAX => {
668                probe_pos += 1;
669                (prefix & 0x0f) as usize
670            }
671            MSGPACK_MAP16 => {
672                if probe_pos + 3 > self.input.len() {
673                    return Ok(evidence);
674                }
675                probe_pos += 1;
676                let len = u16::from_be_bytes([self.input[probe_pos], self.input[probe_pos + 1]]);
677                probe_pos += 2;
678                len as usize
679            }
680            MSGPACK_MAP32 => {
681                if probe_pos + 5 > self.input.len() {
682                    return Ok(evidence);
683                }
684                probe_pos += 1;
685                let len = u32::from_be_bytes([
686                    self.input[probe_pos],
687                    self.input[probe_pos + 1],
688                    self.input[probe_pos + 2],
689                    self.input[probe_pos + 3],
690                ]);
691                probe_pos += 4;
692                len as usize
693            }
694            _ => return Ok(evidence), // Not a map
695        };
696
697        // Scan each key-value pair
698        for _ in 0..map_len {
699            if probe_pos >= self.input.len() {
700                break;
701            }
702
703            // Read key (must be a string)
704            let key_prefix = self.input[probe_pos];
705            let key_len = match key_prefix {
706                MSGPACK_FIXSTR_MIN..=MSGPACK_FIXSTR_MAX => {
707                    probe_pos += 1;
708                    (key_prefix & 0x1f) as usize
709                }
710                MSGPACK_STR8 => {
711                    if probe_pos + 2 > self.input.len() {
712                        break;
713                    }
714                    probe_pos += 1;
715                    let len = self.input[probe_pos] as usize;
716                    probe_pos += 1;
717                    len
718                }
719                MSGPACK_STR16 => {
720                    if probe_pos + 3 > self.input.len() {
721                        break;
722                    }
723                    probe_pos += 1;
724                    let len = u16::from_be_bytes([self.input[probe_pos], self.input[probe_pos + 1]])
725                        as usize;
726                    probe_pos += 2;
727                    len
728                }
729                MSGPACK_STR32 => {
730                    if probe_pos + 5 > self.input.len() {
731                        break;
732                    }
733                    probe_pos += 1;
734                    let len = u32::from_be_bytes([
735                        self.input[probe_pos],
736                        self.input[probe_pos + 1],
737                        self.input[probe_pos + 2],
738                        self.input[probe_pos + 3],
739                    ]) as usize;
740                    probe_pos += 4;
741                    len
742                }
743                _ => break, // Non-string key, stop probing
744            };
745
746            if probe_pos + key_len > self.input.len() {
747                break;
748            }
749
750            let key_bytes = &self.input[probe_pos..probe_pos + key_len];
751            probe_pos += key_len;
752
753            let key = match core::str::from_utf8(key_bytes) {
754                Ok(s) => Cow::Borrowed(s),
755                Err(_) => break,
756            };
757
758            // Try to read scalar value for evidence
759            if probe_pos >= self.input.len() {
760                evidence.push(FieldEvidence::new(
761                    key,
762                    FieldLocationHint::KeyValue,
763                    None,
764                    None,
765                ));
766                break;
767            }
768
769            let value_prefix = self.input[probe_pos];
770            let scalar_value = match value_prefix {
771                MSGPACK_NIL => {
772                    probe_pos += 1;
773                    Some(ScalarValue::Null)
774                }
775                MSGPACK_FALSE => {
776                    probe_pos += 1;
777                    Some(ScalarValue::Bool(false))
778                }
779                MSGPACK_TRUE => {
780                    probe_pos += 1;
781                    Some(ScalarValue::Bool(true))
782                }
783                0x00..=MSGPACK_POSFIXINT_MAX => {
784                    probe_pos += 1;
785                    Some(ScalarValue::U64(value_prefix as u64))
786                }
787                MSGPACK_NEGFIXINT_MIN..=0xff => {
788                    probe_pos += 1;
789                    Some(ScalarValue::I64(value_prefix as i8 as i64))
790                }
791                MSGPACK_FIXSTR_MIN..=MSGPACK_FIXSTR_MAX => {
792                    let str_len = (value_prefix & 0x1f) as usize;
793                    probe_pos += 1;
794                    if probe_pos + str_len <= self.input.len() {
795                        let str_bytes = &self.input[probe_pos..probe_pos + str_len];
796                        probe_pos += str_len;
797                        core::str::from_utf8(str_bytes)
798                            .ok()
799                            .map(|s| ScalarValue::Str(Cow::Borrowed(s)))
800                    } else {
801                        None
802                    }
803                }
804                // For complex types, skip and don't include scalar value
805                _ => {
806                    // Use a temporary parser to skip the value
807                    let mut tmp = MsgPackParser::new(&self.input[probe_pos..]);
808                    if tmp.skip_value_internal().is_ok() {
809                        probe_pos += tmp.pos;
810                        None
811                    } else {
812                        break;
813                    }
814                }
815            };
816
817            if let Some(sv) = scalar_value {
818                evidence.push(FieldEvidence::with_scalar_value(
819                    key,
820                    FieldLocationHint::KeyValue,
821                    None,
822                    sv,
823                    None,
824                ));
825            } else {
826                evidence.push(FieldEvidence::new(
827                    key,
828                    FieldLocationHint::KeyValue,
829                    None,
830                    None,
831                ));
832            }
833        }
834
835        Ok(evidence)
836    }
837}
838
839impl<'de> FormatParser<'de> for MsgPackParser<'de> {
840    type Error = MsgPackError;
841    type Probe<'a>
842        = MsgPackProbe<'de>
843    where
844        Self: 'a;
845
846    fn next_event(&mut self) -> Result<Option<ParseEvent<'de>>, Self::Error> {
847        if let Some(event) = self.event_peek.take() {
848            return Ok(Some(event));
849        }
850        self.produce_event()
851    }
852
853    fn peek_event(&mut self) -> Result<Option<ParseEvent<'de>>, Self::Error> {
854        if let Some(event) = self.event_peek.clone() {
855            return Ok(Some(event));
856        }
857        let event = self.produce_event()?;
858        if let Some(ref e) = event {
859            self.event_peek = Some(e.clone());
860        }
861        Ok(event)
862    }
863
864    fn skip_value(&mut self) -> Result<(), Self::Error> {
865        debug_assert!(
866            self.event_peek.is_none(),
867            "skip_value called while an event is buffered"
868        );
869        self.skip_value_internal()?;
870        self.finish_value();
871        Ok(())
872    }
873
874    fn begin_probe(&mut self) -> Result<Self::Probe<'_>, Self::Error> {
875        let evidence = self.build_probe()?;
876        Ok(MsgPackProbe { evidence, idx: 0 })
877    }
878}
879
880#[cfg(feature = "jit")]
881impl<'de> facet_format::FormatJitParser<'de> for MsgPackParser<'de> {
882    type FormatJit = crate::jit::MsgPackJitFormat;
883
884    fn jit_input(&self) -> &'de [u8] {
885        self.input
886    }
887
888    fn jit_pos(&self) -> Option<usize> {
889        // Tier-2 JIT is only safe at root boundary:
890        // - No peeked event (position would be ambiguous)
891        // - Empty stack (we're at root level, not inside a container)
892        if self.event_peek.is_some() {
893            return None;
894        }
895        if !self.stack.is_empty() {
896            return None;
897        }
898        Some(self.pos)
899    }
900
901    fn jit_set_pos(&mut self, pos: usize) {
902        self.pos = pos;
903        self.event_peek = None;
904        // Stack should already be empty (jit_pos enforces this)
905        debug_assert!(self.stack.is_empty());
906    }
907
908    fn jit_format(&self) -> Self::FormatJit {
909        crate::jit::MsgPackJitFormat
910    }
911
912    fn jit_error(&self, _input: &'de [u8], error_pos: usize, error_code: i32) -> Self::Error {
913        MsgPackError::from_code(error_code, error_pos)
914    }
915}
916
917/// Probe stream for MsgPack.
918pub struct MsgPackProbe<'de> {
919    evidence: Vec<FieldEvidence<'de>>,
920    idx: usize,
921}
922
923impl<'de> ProbeStream<'de> for MsgPackProbe<'de> {
924    type Error = MsgPackError;
925
926    fn next(&mut self) -> Result<Option<FieldEvidence<'de>>, Self::Error> {
927        if self.idx >= self.evidence.len() {
928            Ok(None)
929        } else {
930            let ev = self.evidence[self.idx].clone();
931            self.idx += 1;
932            Ok(Some(ev))
933        }
934    }
935}