facet_msgpack/
from_msgpack.rs

1use crate::constants::*;
2use crate::errors::Error as DecodeError;
3
4use facet_core::{Def, Facet};
5use facet_reflect::{HeapValue, Wip};
6use log::trace;
7
8/// Deserializes MessagePack-encoded data into a type that implements `Facet`.
9///
10/// # Example
11/// ```
12/// use facet::Facet;
13/// use facet_msgpack::from_str;
14///
15/// #[derive(Debug, Facet, PartialEq)]
16/// struct User {
17///     id: u64,
18///     username: String,
19/// }
20///
21/// // MessagePack binary data (equivalent to {"id": 42, "username": "user123"})
22/// let msgpack_data = [
23///     0x82, 0xa2, 0x69, 0x64, 0x2a, 0xa8, 0x75, 0x73,
24///     0x65, 0x72, 0x6e, 0x61, 0x6d, 0x65, 0xa7, 0x75,
25///     0x73, 0x65, 0x72, 0x31, 0x32, 0x33
26/// ];
27///
28/// let user: User = from_str(&msgpack_data).unwrap();
29/// assert_eq!(user, User { id: 42, username: "user123".to_string() });
30/// ```
31pub fn from_slice<T: Facet>(msgpack: &[u8]) -> Result<T, DecodeError> {
32    from_slice_value(Wip::alloc::<T>(), msgpack)?
33        .materialize::<T>()
34        .map_err(|e| DecodeError::UnsupportedType(e.to_string()))
35}
36
37/// Alias for from_slice for backward compatibility
38#[deprecated(since = "0.1.0", note = "Use from_slice instead")]
39pub fn from_str<T: Facet>(msgpack: &[u8]) -> Result<T, DecodeError> {
40    from_slice(msgpack)
41}
42
43/// Deserializes MessagePack-encoded data into a Facet value.
44///
45/// This function takes a MessagePack byte array and populates a Wip object
46/// according to the shape description, returning an Opaque value.
47///
48/// # Example
49///
50/// ```
51/// use facet::Facet;
52/// use facet_msgpack::from_slice;
53///
54/// #[derive(Debug, Facet, PartialEq)]
55/// struct User {
56///     id: u64,
57///     username: String,
58/// }
59///
60/// // MessagePack binary data (equivalent to {"id": 42, "username": "user123"})
61/// let msgpack_data = [
62///     0x82, 0xa2, 0x69, 0x64, 0x2a, 0xa8, 0x75, 0x73,
63///     0x65, 0x72, 0x6e, 0x61, 0x6d, 0x65, 0xa7, 0x75,
64///     0x73, 0x65, 0x72, 0x31, 0x32, 0x33
65/// ];
66///
67/// let user: User = from_slice(&msgpack_data).unwrap();
68/// assert_eq!(user, User { id: 42, username: "user123".to_string() });
69/// ```
70///
71/// # Parameters
72/// * `wip` - A Wip object that will be filled with deserialized data
73/// * `msgpack` - A byte slice containing MessagePack-encoded data
74///
75/// # Returns
76/// * `Ok(Opaque)` containing the deserialized data if successful
77/// * `Err(DecodeError)` if an error occurred during deserialization
78///
79/// # MessagePack Format
80/// This implementation follows the MessagePack specification:
81/// <https://github.com/msgpack/msgpack/blob/master/spec.md>
82#[allow(clippy::needless_lifetimes)]
83pub fn from_slice_value<'mem>(
84    wip: Wip<'mem>,
85    msgpack: &'mem [u8],
86) -> Result<HeapValue<'mem>, DecodeError> {
87    let mut decoder = Decoder::new(msgpack);
88    decoder
89        .deserialize_value(wip)?
90        .build()
91        .map_err(|e| DecodeError::UnsupportedType(e.to_string()))
92}
93
94struct Decoder<'input> {
95    input: &'input [u8],
96    offset: usize,
97}
98
99impl<'input> Decoder<'input> {
100    fn new(input: &'input [u8]) -> Self {
101        Decoder { input, offset: 0 }
102    }
103
104    /// Decodes a single byte from the input.
105    /// This is a low-level method used by other decoders.
106    fn decode_u8(&mut self) -> Result<u8, DecodeError> {
107        if self.offset >= self.input.len() {
108            return Err(DecodeError::InsufficientData);
109        }
110        let value = self.input[self.offset];
111        self.offset += 1;
112        Ok(value)
113    }
114
115    /// Decodes a 16-bit unsigned integer in big-endian byte order.
116    /// This is a low-level method used by other decoders.
117    fn decode_u16(&mut self) -> Result<u16, DecodeError> {
118        if self.offset + 2 > self.input.len() {
119            return Err(DecodeError::InsufficientData);
120        }
121        let value =
122            u16::from_be_bytes(self.input[self.offset..self.offset + 2].try_into().unwrap());
123        self.offset += 2;
124        Ok(value)
125    }
126
127    /// Decodes a 32-bit unsigned integer in big-endian byte order.
128    /// This is a low-level method used by other decoders.
129    fn decode_u32(&mut self) -> Result<u32, DecodeError> {
130        if self.offset + 4 > self.input.len() {
131            return Err(DecodeError::InsufficientData);
132        }
133        let value =
134            u32::from_be_bytes(self.input[self.offset..self.offset + 4].try_into().unwrap());
135        self.offset += 4;
136        Ok(value)
137    }
138
139    /// Decodes a MessagePack-encoded unsigned 64-bit integer.
140    /// Handles the following MessagePack types:
141    /// - positive fixint (0x00 - 0x7f): single-byte positive integer
142    /// - uint8 (0xcc): 8-bit unsigned integer
143    /// - uint16 (0xcd): 16-bit unsigned integer (big-endian)
144    /// - uint32 (0xce): 32-bit unsigned integer (big-endian)
145    /// - uint64 (0xcf): 64-bit unsigned integer (big-endian)
146    ///
147    /// Ref: <https://github.com/msgpack/msgpack/blob/master/spec.md#int-format-family>
148    fn decode_u64(&mut self) -> Result<u64, DecodeError> {
149        match self.decode_u8()? {
150            MSGPACK_UINT8 => Ok(self.decode_u8()? as u64),
151            MSGPACK_UINT16 => Ok(self.decode_u16()? as u64),
152            MSGPACK_UINT32 => Ok(self.decode_u32()? as u64),
153            MSGPACK_UINT64 => {
154                if self.offset + 8 > self.input.len() {
155                    return Err(DecodeError::InsufficientData);
156                }
157                let value = u64::from_be_bytes(
158                    self.input[self.offset..self.offset + 8].try_into().unwrap(),
159                );
160                self.offset += 8;
161                Ok(value)
162            }
163            prefix @ MSGPACK_POSFIXINT_MIN..=MSGPACK_POSFIXINT_MAX => Ok(prefix as u64),
164            _ => Err(DecodeError::UnexpectedType),
165        }
166    }
167
168    /// Decodes a MessagePack-encoded string.
169    /// Handles the following MessagePack types:
170    /// - fixstr (0xa0 - 0xbf): string up to 31 bytes
171    /// - str8 (0xd9): string up to 255 bytes
172    /// - str16 (0xda): string up to 65535 bytes
173    /// - str32 (0xdb): string up to 4294967295 bytes
174    ///
175    /// Ref: <https://github.com/msgpack/msgpack/blob/master/spec.md#formats-str>
176    fn decode_string(&mut self) -> Result<String, DecodeError> {
177        let prefix = self.decode_u8()?;
178
179        let len = match prefix {
180            prefix @ MSGPACK_FIXSTR_MIN..=MSGPACK_FIXSTR_MAX => (prefix & 0x1f) as usize,
181            MSGPACK_STR8 => self.decode_u8()? as usize,
182            MSGPACK_STR16 => self.decode_u16()? as usize,
183            MSGPACK_STR32 => self.decode_u32()? as usize,
184            _ => return Err(DecodeError::UnexpectedType),
185        };
186
187        if self.offset + len > self.input.len() {
188            return Err(DecodeError::InsufficientData);
189        }
190
191        let value = String::from_utf8(self.input[self.offset..self.offset + len].to_vec())
192            .map_err(|_| DecodeError::InvalidData)?;
193        self.offset += len;
194        Ok(value)
195    }
196
197    /// Decodes a MessagePack-encoded map length.
198    /// Handles the following MessagePack types:
199    /// - fixmap (0x80 - 0x8f): map with up to 15 elements
200    /// - map16 (0xde): map with up to 65535 elements
201    /// - map32 (0xdf): map with up to 4294967295 elements
202    ///
203    /// Ref: <https://github.com/msgpack/msgpack/blob/master/spec.md#formats-map>
204    fn decode_map_len(&mut self) -> Result<usize, DecodeError> {
205        let prefix = self.decode_u8()?;
206
207        match prefix {
208            prefix @ MSGPACK_FIXMAP_MIN..=MSGPACK_FIXMAP_MAX => Ok((prefix & 0x0f) as usize),
209            MSGPACK_MAP16 => Ok(self.decode_u16()? as usize),
210            MSGPACK_MAP32 => Ok(self.decode_u32()? as usize),
211            _ => Err(DecodeError::UnexpectedType),
212        }
213    }
214
215    /// Decodes a MessagePack-encoded array length.
216    /// Handles the following MessagePack types:
217    /// - fixarray (0x90 - 0x9f): array with up to 15 elements
218    /// - array16 (0xdc): array with up to 65535 elements
219    /// - array32 (0xdd): array with up to 4294967295 elements
220    ///
221    /// Ref: <https://github.com/msgpack/msgpack/blob/master/spec.md#formats-array>
222    #[allow(dead_code)]
223    fn decode_array_len(&mut self) -> Result<usize, DecodeError> {
224        let prefix = self.decode_u8()?;
225
226        match prefix {
227            prefix @ MSGPACK_FIXARRAY_MIN..=MSGPACK_FIXARRAY_MAX => Ok((prefix & 0x0f) as usize),
228            MSGPACK_ARRAY16 => Ok(self.decode_u16()? as usize),
229            MSGPACK_ARRAY32 => Ok(self.decode_u32()? as usize),
230            _ => Err(DecodeError::UnexpectedType),
231        }
232    }
233
234    /// Decodes a MessagePack-encoded boolean value.
235    /// Handles the following MessagePack types:
236    /// - true (0xc3): boolean true
237    /// - false (0xc2): boolean false
238    ///
239    /// Ref: <https://github.com/msgpack/msgpack/blob/master/spec.md#formats-bool>
240    fn decode_bool(&mut self) -> Result<bool, DecodeError> {
241        match self.decode_u8()? {
242            MSGPACK_TRUE => Ok(true),
243            MSGPACK_FALSE => Ok(false),
244            _ => Err(DecodeError::UnexpectedType),
245        }
246    }
247
248    /// Decodes a MessagePack-encoded nil value.
249    /// Handles the following MessagePack types:
250    /// - nil (0xc0): nil/null value
251    ///
252    /// Ref: <https://github.com/msgpack/msgpack/blob/master/spec.md#formats-nil>
253    #[allow(dead_code)]
254    fn decode_nil(&mut self) -> Result<(), DecodeError> {
255        match self.decode_u8()? {
256            MSGPACK_NIL => Ok(()),
257            _ => Err(DecodeError::UnexpectedType),
258        }
259    }
260
261    /// Peeks at the next byte to check if it's a nil value without advancing the offset.
262    /// Returns true if the next value is nil, false otherwise.
263    #[allow(dead_code)]
264    fn peek_nil(&mut self) -> Result<bool, DecodeError> {
265        if self.offset >= self.input.len() {
266            return Err(DecodeError::InsufficientData);
267        }
268        Ok(self.input[self.offset] == MSGPACK_NIL)
269    }
270
271    /// Skips a MessagePack value of any type.
272    /// This is used when encountering unknown field names in a struct.
273    fn skip_value(&mut self) -> Result<(), DecodeError> {
274        let prefix = self.decode_u8()?;
275
276        match prefix {
277            // String formats
278            prefix @ MSGPACK_FIXSTR_MIN..=MSGPACK_FIXSTR_MAX => {
279                let len = (prefix & 0x1f) as usize;
280                if self.offset + len > self.input.len() {
281                    return Err(DecodeError::InsufficientData);
282                }
283                self.offset += len;
284                Ok(())
285            }
286            MSGPACK_STR8 => {
287                let len = self.decode_u8()? as usize;
288                if self.offset + len > self.input.len() {
289                    return Err(DecodeError::InsufficientData);
290                }
291                self.offset += len;
292                Ok(())
293            }
294            MSGPACK_STR16 => {
295                let len = self.decode_u16()? as usize;
296                if self.offset + len > self.input.len() {
297                    return Err(DecodeError::InsufficientData);
298                }
299                self.offset += len;
300                Ok(())
301            }
302            MSGPACK_STR32 => {
303                let len = self.decode_u32()? as usize;
304                if self.offset + len > self.input.len() {
305                    return Err(DecodeError::InsufficientData);
306                }
307                self.offset += len;
308                Ok(())
309            }
310
311            // Integer formats
312            MSGPACK_UINT8 => {
313                self.offset += 1;
314                Ok(())
315            }
316            MSGPACK_UINT16 => {
317                self.offset += 2;
318                Ok(())
319            }
320            MSGPACK_UINT32 => {
321                self.offset += 4;
322                Ok(())
323            }
324            MSGPACK_UINT64 => {
325                self.offset += 8;
326                Ok(())
327            }
328            MSGPACK_INT8 => {
329                self.offset += 1;
330                Ok(())
331            }
332            MSGPACK_INT16 => {
333                self.offset += 2;
334                Ok(())
335            }
336            MSGPACK_INT32 => {
337                self.offset += 4;
338                Ok(())
339            }
340            MSGPACK_INT64 => {
341                self.offset += 8;
342                Ok(())
343            }
344            // Fixed integers are already handled by decode_u8
345
346            // Boolean and nil
347            MSGPACK_NIL | MSGPACK_TRUE | MSGPACK_FALSE => Ok(()),
348
349            // Map format
350            prefix @ MSGPACK_FIXMAP_MIN..=MSGPACK_FIXMAP_MAX => {
351                let len = (prefix & 0x0f) as usize;
352                for _ in 0..len {
353                    self.skip_value()?; // Skip key
354                    self.skip_value()?; // Skip value
355                }
356                Ok(())
357            }
358            MSGPACK_MAP16 => {
359                let len = self.decode_u16()? as usize;
360                for _ in 0..len {
361                    self.skip_value()?; // Skip key
362                    self.skip_value()?; // Skip value
363                }
364                Ok(())
365            }
366            MSGPACK_MAP32 => {
367                let len = self.decode_u32()? as usize;
368                for _ in 0..len {
369                    self.skip_value()?; // Skip key
370                    self.skip_value()?; // Skip value
371                }
372                Ok(())
373            }
374
375            // Array format
376            prefix @ MSGPACK_FIXARRAY_MIN..=MSGPACK_FIXARRAY_MAX => {
377                let len = (prefix & 0x0f) as usize;
378                for _ in 0..len {
379                    self.skip_value()?;
380                }
381                Ok(())
382            }
383            MSGPACK_ARRAY16 => {
384                let len = self.decode_u16()? as usize;
385                for _ in 0..len {
386                    self.skip_value()?;
387                }
388                Ok(())
389            }
390            MSGPACK_ARRAY32 => {
391                let len = self.decode_u32()? as usize;
392                for _ in 0..len {
393                    self.skip_value()?;
394                }
395                Ok(())
396            }
397
398            _ => Err(DecodeError::UnexpectedType),
399        }
400    }
401
402    fn deserialize_value(&mut self, wip: Wip<'input>) -> Result<Wip<'input>, DecodeError> {
403        let shape = wip.shape();
404        trace!("Deserializing {:?}", shape);
405
406        let wip = match shape.def {
407            Def::Scalar(_) => {
408                trace!("Deserializing scalar");
409                if shape.is_type::<String>() {
410                    let s = self.decode_string()?;
411                    wip.put(s).unwrap()
412                } else if shape.is_type::<u64>() {
413                    let n = self.decode_u64()?;
414                    wip.put(n).unwrap()
415                } else if shape.is_type::<u32>() {
416                    let n = self.decode_u64()?;
417                    if n > u32::MAX as u64 {
418                        return Err(DecodeError::IntegerOverflow);
419                    }
420                    wip.put(n as u32).unwrap()
421                } else if shape.is_type::<u16>() {
422                    let n = self.decode_u64()?;
423                    if n > u16::MAX as u64 {
424                        return Err(DecodeError::IntegerOverflow);
425                    }
426                    wip.put(n as u16).unwrap()
427                } else if shape.is_type::<u8>() {
428                    let n = self.decode_u64()?;
429                    if n > u8::MAX as u64 {
430                        return Err(DecodeError::IntegerOverflow);
431                    }
432                    wip.put(n as u8).unwrap()
433                } else if shape.is_type::<i64>() {
434                    // This is a simplification - need to implement proper int decoding
435                    let n = self.decode_u64()?;
436                    if n > i64::MAX as u64 {
437                        return Err(DecodeError::IntegerOverflow);
438                    }
439                    wip.put(n as i64).unwrap()
440                } else if shape.is_type::<i32>() {
441                    let n = self.decode_u64()?;
442                    if n > i32::MAX as u64 {
443                        return Err(DecodeError::IntegerOverflow);
444                    }
445                    wip.put(n as i32).unwrap()
446                } else if shape.is_type::<bool>() {
447                    let b = self.decode_bool()?;
448                    wip.put(b).unwrap()
449                } else {
450                    return Err(DecodeError::UnsupportedType(format!("{}", shape)));
451                }
452            }
453            Def::Struct(_) => {
454                trace!("Deserializing struct");
455                let map_len = self.decode_map_len()?;
456
457                let mut wip = wip;
458                for _ in 0..map_len {
459                    let key = self.decode_string()?;
460                    match wip.field_index(&key) {
461                        Some(index) => {
462                            wip = self
463                                .deserialize_value(wip.field(index).unwrap())?
464                                .pop()
465                                .unwrap();
466                        }
467                        None => {
468                            // Skip unknown field value
469                            self.skip_value()?;
470                            trace!("Skipping unknown field: {}", key);
471                        }
472                    }
473                }
474                wip
475            }
476            _ => {
477                return Err(DecodeError::UnsupportedShape(format!("{:?}", shape)));
478            }
479        };
480
481        Ok(wip)
482    }
483}