facet_msgpack/
deserialize.rs

1use crate::constants::*;
2use crate::errors::Error as DecodeError;
3
4use facet_core::{Def, Facet, Type, UserType};
5use facet_reflect::{HeapValue, Wip};
6use log::trace;
7
8/// Deserializes MessagePack-encoded data into a type that implements `Facet`.
9///
10/// # Example
11/// ```
12/// use facet::Facet;
13/// use facet_msgpack::from_str;
14///
15/// #[derive(Debug, Facet, PartialEq)]
16/// struct User {
17///     id: u64,
18///     username: String,
19/// }
20///
21/// // MessagePack binary data (equivalent to {"id": 42, "username": "user123"})
22/// let msgpack_data = [
23///     0x82, 0xa2, 0x69, 0x64, 0x2a, 0xa8, 0x75, 0x73,
24///     0x65, 0x72, 0x6e, 0x61, 0x6d, 0x65, 0xa7, 0x75,
25///     0x73, 0x65, 0x72, 0x31, 0x32, 0x33
26/// ];
27///
28/// let user: User = from_str(&msgpack_data).unwrap();
29/// assert_eq!(user, User { id: 42, username: "user123".to_string() });
30/// ```
31pub fn from_slice<'input: 'facet, 'facet, T: Facet<'facet>>(
32    msgpack: &'input [u8],
33) -> Result<T, DecodeError> {
34    from_slice_value(Wip::alloc::<T>()?, msgpack)?
35        .materialize::<T>()
36        .map_err(|e| DecodeError::UnsupportedType(e.to_string()))
37}
38
39/// Alias for from_slice for backward compatibility
40#[deprecated(since = "0.1.0", note = "Use from_slice instead")]
41pub fn from_str<'input: 'facet, 'facet, T: Facet<'facet>>(
42    msgpack: &'input [u8],
43) -> Result<T, DecodeError> {
44    from_slice(msgpack)
45}
46
47/// Deserializes MessagePack-encoded data into a Facet value.
48///
49/// This function takes a MessagePack byte array and populates a Wip object
50/// according to the shape description, returning an Opaque value.
51///
52/// # Example
53///
54/// ```
55/// use facet::Facet;
56/// use facet_msgpack::from_slice;
57///
58/// #[derive(Debug, Facet, PartialEq)]
59/// struct User {
60///     id: u64,
61///     username: String,
62/// }
63///
64/// // MessagePack binary data (equivalent to {"id": 42, "username": "user123"})
65/// let msgpack_data = [
66///     0x82, 0xa2, 0x69, 0x64, 0x2a, 0xa8, 0x75, 0x73,
67///     0x65, 0x72, 0x6e, 0x61, 0x6d, 0x65, 0xa7, 0x75,
68///     0x73, 0x65, 0x72, 0x31, 0x32, 0x33
69/// ];
70///
71/// let user: User = from_slice(&msgpack_data).unwrap();
72/// assert_eq!(user, User { id: 42, username: "user123".to_string() });
73/// ```
74///
75/// # Parameters
76/// * `wip` - A Wip object that will be filled with deserialized data
77/// * `msgpack` - A byte slice containing MessagePack-encoded data
78///
79/// # Returns
80/// * `Ok(Opaque)` containing the deserialized data if successful
81/// * `Err(DecodeError)` if an error occurred during deserialization
82///
83/// # MessagePack Format
84/// This implementation follows the MessagePack specification:
85/// <https://github.com/msgpack/msgpack/blob/master/spec.md>
86#[allow(clippy::needless_lifetimes)]
87pub fn from_slice_value<'mem>(
88    wip: Wip<'mem>,
89    msgpack: &'mem [u8],
90) -> Result<HeapValue<'mem>, DecodeError> {
91    let mut decoder = Decoder::new(msgpack);
92    decoder
93        .deserialize_value(wip)?
94        .build()
95        .map_err(|e| DecodeError::UnsupportedType(e.to_string()))
96}
97
98struct Decoder<'input> {
99    input: &'input [u8],
100    offset: usize,
101}
102
103impl<'input> Decoder<'input> {
104    fn new(input: &'input [u8]) -> Self {
105        Decoder { input, offset: 0 }
106    }
107
108    /// Decodes a single byte from the input.
109    /// This is a low-level method used by other decoders.
110    fn decode_u8(&mut self) -> Result<u8, DecodeError> {
111        if self.offset >= self.input.len() {
112            return Err(DecodeError::InsufficientData);
113        }
114        let value = self.input[self.offset];
115        self.offset += 1;
116        Ok(value)
117    }
118
119    /// Decodes a 16-bit unsigned integer in big-endian byte order.
120    /// This is a low-level method used by other decoders.
121    fn decode_u16(&mut self) -> Result<u16, DecodeError> {
122        if self.offset + 2 > self.input.len() {
123            return Err(DecodeError::InsufficientData);
124        }
125        let value =
126            u16::from_be_bytes(self.input[self.offset..self.offset + 2].try_into().unwrap());
127        self.offset += 2;
128        Ok(value)
129    }
130
131    /// Decodes a 32-bit unsigned integer in big-endian byte order.
132    /// This is a low-level method used by other decoders.
133    fn decode_u32(&mut self) -> Result<u32, DecodeError> {
134        if self.offset + 4 > self.input.len() {
135            return Err(DecodeError::InsufficientData);
136        }
137        let value =
138            u32::from_be_bytes(self.input[self.offset..self.offset + 4].try_into().unwrap());
139        self.offset += 4;
140        Ok(value)
141    }
142
143    /// Decodes a MessagePack-encoded unsigned 64-bit integer.
144    /// Handles the following MessagePack types:
145    /// - positive fixint (0x00 - 0x7f): single-byte positive integer
146    /// - uint8 (0xcc): 8-bit unsigned integer
147    /// - uint16 (0xcd): 16-bit unsigned integer (big-endian)
148    /// - uint32 (0xce): 32-bit unsigned integer (big-endian)
149    /// - uint64 (0xcf): 64-bit unsigned integer (big-endian)
150    ///
151    /// Ref: <https://github.com/msgpack/msgpack/blob/master/spec.md#int-format-family>
152    fn decode_u64(&mut self) -> Result<u64, DecodeError> {
153        match self.decode_u8()? {
154            MSGPACK_UINT8 => Ok(self.decode_u8()? as u64),
155            MSGPACK_UINT16 => Ok(self.decode_u16()? as u64),
156            MSGPACK_UINT32 => Ok(self.decode_u32()? as u64),
157            MSGPACK_UINT64 => {
158                if self.offset + 8 > self.input.len() {
159                    return Err(DecodeError::InsufficientData);
160                }
161                let value = u64::from_be_bytes(
162                    self.input[self.offset..self.offset + 8].try_into().unwrap(),
163                );
164                self.offset += 8;
165                Ok(value)
166            }
167            prefix @ MSGPACK_POSFIXINT_MIN..=MSGPACK_POSFIXINT_MAX => Ok(prefix as u64),
168            _ => Err(DecodeError::UnexpectedType),
169        }
170    }
171
172    /// Decodes a MessagePack-encoded string.
173    /// Handles the following MessagePack types:
174    /// - fixstr (0xa0 - 0xbf): string up to 31 bytes
175    /// - str8 (0xd9): string up to 255 bytes
176    /// - str16 (0xda): string up to 65535 bytes
177    /// - str32 (0xdb): string up to 4294967295 bytes
178    ///
179    /// Ref: <https://github.com/msgpack/msgpack/blob/master/spec.md#formats-str>
180    fn decode_string(&mut self) -> Result<String, DecodeError> {
181        let prefix = self.decode_u8()?;
182
183        let len = match prefix {
184            prefix @ MSGPACK_FIXSTR_MIN..=MSGPACK_FIXSTR_MAX => (prefix & 0x1f) as usize,
185            MSGPACK_STR8 => self.decode_u8()? as usize,
186            MSGPACK_STR16 => self.decode_u16()? as usize,
187            MSGPACK_STR32 => self.decode_u32()? as usize,
188            _ => return Err(DecodeError::UnexpectedType),
189        };
190
191        if self.offset + len > self.input.len() {
192            return Err(DecodeError::InsufficientData);
193        }
194
195        let value = String::from_utf8(self.input[self.offset..self.offset + len].to_vec())
196            .map_err(|_| DecodeError::InvalidData)?;
197        self.offset += len;
198        Ok(value)
199    }
200
201    /// Decodes a MessagePack-encoded map length.
202    /// Handles the following MessagePack types:
203    /// - fixmap (0x80 - 0x8f): map with up to 15 elements
204    /// - map16 (0xde): map with up to 65535 elements
205    /// - map32 (0xdf): map with up to 4294967295 elements
206    ///
207    /// Ref: <https://github.com/msgpack/msgpack/blob/master/spec.md#formats-map>
208    fn decode_map_len(&mut self) -> Result<usize, DecodeError> {
209        let prefix = self.decode_u8()?;
210
211        match prefix {
212            prefix @ MSGPACK_FIXMAP_MIN..=MSGPACK_FIXMAP_MAX => Ok((prefix & 0x0f) as usize),
213            MSGPACK_MAP16 => Ok(self.decode_u16()? as usize),
214            MSGPACK_MAP32 => Ok(self.decode_u32()? as usize),
215            _ => Err(DecodeError::UnexpectedType),
216        }
217    }
218
219    /// Decodes a MessagePack-encoded array length.
220    /// Handles the following MessagePack types:
221    /// - fixarray (0x90 - 0x9f): array with up to 15 elements
222    /// - array16 (0xdc): array with up to 65535 elements
223    /// - array32 (0xdd): array with up to 4294967295 elements
224    ///
225    /// Ref: <https://github.com/msgpack/msgpack/blob/master/spec.md#formats-array>
226    #[allow(dead_code)]
227    fn decode_array_len(&mut self) -> Result<usize, DecodeError> {
228        let prefix = self.decode_u8()?;
229
230        match prefix {
231            prefix @ MSGPACK_FIXARRAY_MIN..=MSGPACK_FIXARRAY_MAX => Ok((prefix & 0x0f) as usize),
232            MSGPACK_ARRAY16 => Ok(self.decode_u16()? as usize),
233            MSGPACK_ARRAY32 => Ok(self.decode_u32()? as usize),
234            _ => Err(DecodeError::UnexpectedType),
235        }
236    }
237
238    /// Decodes a MessagePack-encoded boolean value.
239    /// Handles the following MessagePack types:
240    /// - true (0xc3): boolean true
241    /// - false (0xc2): boolean false
242    ///
243    /// Ref: <https://github.com/msgpack/msgpack/blob/master/spec.md#formats-bool>
244    fn decode_bool(&mut self) -> Result<bool, DecodeError> {
245        match self.decode_u8()? {
246            MSGPACK_TRUE => Ok(true),
247            MSGPACK_FALSE => Ok(false),
248            _ => Err(DecodeError::UnexpectedType),
249        }
250    }
251
252    /// Decodes a MessagePack-encoded nil value.
253    /// Handles the following MessagePack types:
254    /// - nil (0xc0): nil/null value
255    ///
256    /// Ref: <https://github.com/msgpack/msgpack/blob/master/spec.md#formats-nil>
257    #[allow(dead_code)]
258    fn decode_nil(&mut self) -> Result<(), DecodeError> {
259        match self.decode_u8()? {
260            MSGPACK_NIL => Ok(()),
261            _ => Err(DecodeError::UnexpectedType),
262        }
263    }
264
265    /// Peeks at the next byte to check if it's a nil value without advancing the offset.
266    /// Returns true if the next value is nil, false otherwise.
267    #[allow(dead_code)]
268    fn peek_nil(&mut self) -> Result<bool, DecodeError> {
269        if self.offset >= self.input.len() {
270            return Err(DecodeError::InsufficientData);
271        }
272        Ok(self.input[self.offset] == MSGPACK_NIL)
273    }
274
275    /// Skips a MessagePack value of any type.
276    /// This is used when encountering unknown field names in a struct.
277    fn skip_value(&mut self) -> Result<(), DecodeError> {
278        let prefix = self.decode_u8()?;
279
280        match prefix {
281            // String formats
282            prefix @ MSGPACK_FIXSTR_MIN..=MSGPACK_FIXSTR_MAX => {
283                let len = (prefix & 0x1f) as usize;
284                if self.offset + len > self.input.len() {
285                    return Err(DecodeError::InsufficientData);
286                }
287                self.offset += len;
288                Ok(())
289            }
290            MSGPACK_STR8 => {
291                let len = self.decode_u8()? as usize;
292                if self.offset + len > self.input.len() {
293                    return Err(DecodeError::InsufficientData);
294                }
295                self.offset += len;
296                Ok(())
297            }
298            MSGPACK_STR16 => {
299                let len = self.decode_u16()? as usize;
300                if self.offset + len > self.input.len() {
301                    return Err(DecodeError::InsufficientData);
302                }
303                self.offset += len;
304                Ok(())
305            }
306            MSGPACK_STR32 => {
307                let len = self.decode_u32()? as usize;
308                if self.offset + len > self.input.len() {
309                    return Err(DecodeError::InsufficientData);
310                }
311                self.offset += len;
312                Ok(())
313            }
314
315            // Integer formats
316            MSGPACK_UINT8 => {
317                self.offset += 1;
318                Ok(())
319            }
320            MSGPACK_UINT16 => {
321                self.offset += 2;
322                Ok(())
323            }
324            MSGPACK_UINT32 => {
325                self.offset += 4;
326                Ok(())
327            }
328            MSGPACK_UINT64 => {
329                self.offset += 8;
330                Ok(())
331            }
332            MSGPACK_INT8 => {
333                self.offset += 1;
334                Ok(())
335            }
336            MSGPACK_INT16 => {
337                self.offset += 2;
338                Ok(())
339            }
340            MSGPACK_INT32 => {
341                self.offset += 4;
342                Ok(())
343            }
344            MSGPACK_INT64 => {
345                self.offset += 8;
346                Ok(())
347            }
348            // Fixed integers are already handled by decode_u8
349
350            // Boolean and nil
351            MSGPACK_NIL | MSGPACK_TRUE | MSGPACK_FALSE => Ok(()),
352
353            // Map format
354            prefix @ MSGPACK_FIXMAP_MIN..=MSGPACK_FIXMAP_MAX => {
355                let len = (prefix & 0x0f) as usize;
356                for _ in 0..len {
357                    self.skip_value()?; // Skip key
358                    self.skip_value()?; // Skip value
359                }
360                Ok(())
361            }
362            MSGPACK_MAP16 => {
363                let len = self.decode_u16()? as usize;
364                for _ in 0..len {
365                    self.skip_value()?; // Skip key
366                    self.skip_value()?; // Skip value
367                }
368                Ok(())
369            }
370            MSGPACK_MAP32 => {
371                let len = self.decode_u32()? as usize;
372                for _ in 0..len {
373                    self.skip_value()?; // Skip key
374                    self.skip_value()?; // Skip value
375                }
376                Ok(())
377            }
378
379            // Array format
380            prefix @ MSGPACK_FIXARRAY_MIN..=MSGPACK_FIXARRAY_MAX => {
381                let len = (prefix & 0x0f) as usize;
382                for _ in 0..len {
383                    self.skip_value()?;
384                }
385                Ok(())
386            }
387            MSGPACK_ARRAY16 => {
388                let len = self.decode_u16()? as usize;
389                for _ in 0..len {
390                    self.skip_value()?;
391                }
392                Ok(())
393            }
394            MSGPACK_ARRAY32 => {
395                let len = self.decode_u32()? as usize;
396                for _ in 0..len {
397                    self.skip_value()?;
398                }
399                Ok(())
400            }
401
402            _ => Err(DecodeError::UnexpectedType),
403        }
404    }
405
406    fn deserialize_value(&mut self, wip: Wip<'input>) -> Result<Wip<'input>, DecodeError> {
407        let shape = wip.shape();
408        trace!("Deserializing {:?}", shape);
409
410        // First check the type system (Type)
411        if let Type::User(UserType::Struct(_)) = &shape.ty {
412            trace!("Deserializing struct");
413            let map_len = self.decode_map_len()?;
414
415            let mut wip = wip;
416            for _ in 0..map_len {
417                let key = self.decode_string()?;
418                match wip.field_index(&key) {
419                    Some(index) => {
420                        wip = self
421                            .deserialize_value(wip.field(index).unwrap())?
422                            .pop()
423                            .unwrap();
424                    }
425                    None => {
426                        // Skip unknown field value
427                        self.skip_value()?;
428                        trace!("Skipping unknown field: {}", key);
429                    }
430                }
431            }
432            return Ok(wip);
433        }
434
435        // Then check the def system (Def)
436        let wip = match shape.def {
437            Def::Scalar(_) => {
438                trace!("Deserializing scalar");
439                if shape.is_type::<String>() {
440                    let s = self.decode_string()?;
441                    wip.put(s).unwrap()
442                } else if shape.is_type::<u64>() {
443                    let n = self.decode_u64()?;
444                    wip.put(n).unwrap()
445                } else if shape.is_type::<u32>() {
446                    let n = self.decode_u64()?;
447                    if n > u32::MAX as u64 {
448                        return Err(DecodeError::IntegerOverflow);
449                    }
450                    wip.put(n as u32).unwrap()
451                } else if shape.is_type::<u16>() {
452                    let n = self.decode_u64()?;
453                    if n > u16::MAX as u64 {
454                        return Err(DecodeError::IntegerOverflow);
455                    }
456                    wip.put(n as u16).unwrap()
457                } else if shape.is_type::<u8>() {
458                    let n = self.decode_u64()?;
459                    if n > u8::MAX as u64 {
460                        return Err(DecodeError::IntegerOverflow);
461                    }
462                    wip.put(n as u8).unwrap()
463                } else if shape.is_type::<i64>() {
464                    // This is a simplification - need to implement proper int decoding
465                    let n = self.decode_u64()?;
466                    if n > i64::MAX as u64 {
467                        return Err(DecodeError::IntegerOverflow);
468                    }
469                    wip.put(n as i64).unwrap()
470                } else if shape.is_type::<i32>() {
471                    let n = self.decode_u64()?;
472                    if n > i32::MAX as u64 {
473                        return Err(DecodeError::IntegerOverflow);
474                    }
475                    wip.put(n as i32).unwrap()
476                } else if shape.is_type::<bool>() {
477                    let b = self.decode_bool()?;
478                    wip.put(b).unwrap()
479                } else {
480                    return Err(DecodeError::UnsupportedType(format!("{}", shape)));
481                }
482            }
483            _ => {
484                return Err(DecodeError::UnsupportedShape(format!("{:?}", shape)));
485            }
486        };
487
488        Ok(wip)
489    }
490}