Skip to main content

slice_codec/
decoding.rs

1// Copyright (c) ZeroC, Inc.
2
3use crate::buffer::InputSource;
4use crate::decode_from::*;
5use crate::decoder::Decoder;
6use crate::{Error, InvalidDataErrorKind, Result};
7
8// We only support `String`, `Vec`, and `BTreeMap` if the `alloc` crate is available through the `alloc` feature flag.
9#[cfg(feature = "alloc")]
10use alloc::collections::BTreeMap;
11#[cfg(feature = "alloc")]
12use alloc::string::String;
13#[cfg(feature = "alloc")]
14use alloc::vec::Vec;
15#[cfg(feature = "alloc")]
16use core::fmt::Debug;
17
18// We only support `HashMap` if the standard library is available through the `std` feature flag.
19#[cfg(feature = "std")]
20use std::collections::HashMap;
21#[cfg(feature = "std")]
22use std::hash::Hash;
23
24/// TAG_END_MARKER must be encoded at the end of every non-compact type.
25const TAG_END_MARKER: i32 = -1;
26
27// =============================================================================
28// Fixed-length type implementations
29// =============================================================================
30
31/// TODO
32fn illegal_bool_error(value: u8) -> Error {
33    let error = InvalidDataErrorKind::IllegalValue {
34        desc: "bools can only have a numeric value of either '0' or '1'",
35        value: Some(value as i128),
36    };
37    error.into()
38}
39
40impl DecodeFrom for bool {
41    /// Reads a single byte from the buffer and returns `false` if it is `0` or `true` if it is `1`.
42    /// If the byte has any other value, an error is returned instead.
43    fn decode_from(decoder: &mut Decoder<impl InputSource>) -> crate::Result<Self> {
44        let byte = decoder.read_byte()?;
45
46        // We strictly enforce the Slice spec; A bool _must_ be encoded as either `0` or `1`.
47        match byte {
48            0 | 1 => Ok(byte != 0),
49            _ => Err(illegal_bool_error(byte)),
50        }
51    }
52}
53
54impl DecodeFrom for u8 {
55    /// Reads a single byte directly from the buffer and returns it, as is.
56    fn decode_from(decoder: &mut Decoder<impl InputSource>) -> crate::Result<Self> {
57        decoder.read_byte()
58    }
59}
60
61impl DecodeFrom for i8 {
62    /// Reads a single byte directly from the buffer and returns it, as-if it was an `i8`.
63    fn decode_from(decoder: &mut Decoder<impl InputSource>) -> crate::Result<Self> {
64        // In Rust, signed-integers are guaranteed to use a two's complement representation in memory.
65        // Casting between `u8` and `i8` is no-op, and doesn't change this representation, or the sign bit.
66        let byte = decoder.read_byte()?;
67        Ok(byte as i8)
68    }
69}
70
71implement_decode_from_on_numeric_primitive_type! {u16, "Decodes a [`u16`] from 2 bytes (little endian)."}
72implement_decode_from_on_numeric_primitive_type! {i16, "Decodes a [`i16`] from 2 bytes (little endian) in two's complement form."}
73implement_decode_from_on_numeric_primitive_type! {u32, "Decodes a [`u32`] from 4 bytes (little endian)."}
74implement_decode_from_on_numeric_primitive_type! {i32, "Decodes a [`i32`] from 4 bytes (little endian) in two's complement form."}
75implement_decode_from_on_numeric_primitive_type! {u64, "Decodes a [`u64`] from 8 bytes (little endian)."}
76implement_decode_from_on_numeric_primitive_type! {i64, "Decodes a [`i64`] from 8 bytes (little endian) in two's complement form."}
77implement_decode_from_on_numeric_primitive_type! {f32, "Decodes a [`f32`] from 4 bytes (little endian) using the \"binary32\" representation defined in IEEE 754-2008."}
78implement_decode_from_on_numeric_primitive_type! {f64, "Decodes a [`f64`] from 8 bytes (little endian) using the \"binary64\" representation defined in IEEE 754-2008."}
79
80// =============================================================================
81// Variable-length integer type implementations
82// =============================================================================
83
84/// TODO
85// TODO this isn't great. It assumes `T` is a signed integer, and has a size less than `u32::MAX`. For sane users,
86// these will always be true. But if these assumptions don't hold, the 'min' and 'max' this reports will be wrong.
87fn varint_range_error<T>(value: i64) -> Error {
88    let size = core::mem::size_of::<T>() as u32;
89    let shift_count = i128::BITS - (size * 8);
90    let error = InvalidDataErrorKind::OutOfRange {
91        value: value as i128,
92        min: i128::MIN >> shift_count,
93        max: i128::MAX >> shift_count,
94        typename: core::any::type_name::<T>(),
95    };
96    error.into()
97}
98
99/// TODO
100// TODO this isn't great. It assumes `T` is an unsigned integer, and has a size less than `u32::MAX`. For sane users,
101// these will always be true. But if these assumptions don't hold, the 'min' and 'max' this reports will be wrong.
102fn varuint_range_error<T>(value: u64) -> Error {
103    let size = core::mem::size_of::<T>() as u32;
104    let shift_count = u128::BITS - (size * 8);
105    let error = InvalidDataErrorKind::OutOfRange {
106        value: value as i128,
107        min: 0,
108        max: (u128::MAX >> shift_count) as i128,
109        typename: core::any::type_name::<T>(),
110    };
111    error.into()
112}
113
114impl<I: InputSource> Decoder<I> {
115    /// Reads between 1 and 8 bytes from the buffer and decodes a single signed integer from them.
116    /// This integer must of been encoded in the variable length '[varint]' format.
117    ///
118    /// [varint]: https://docs.icerpc.dev/slice/language-guide/primitive-types#variable-size-integral-types
119    pub fn decode_varint<T: TryFrom<i64>>(&mut self) -> Result<T> {
120        // Peek the next byte in the buffer. The lowest two bits of this byte tell us how many total bytes to read, so
121        // we can pick an appropriate decoding function. This works because 'varint's are always encoded little-endian.
122        // See: https://docs.icerpc.dev/slice/encoding/primitive-types#variable-size-integral-types.
123        #[rustfmt::skip] // To keep the match arms aligned for readability.
124        let mut value = match self.peek_byte()? & 0b11 {
125            0b00 =>  i8::decode_from(self)? as i64,
126            0b01 => i16::decode_from(self)? as i64,
127            0b10 => i32::decode_from(self)? as i64,
128            0b11 => i64::decode_from(self)?,
129
130            // # SAFETY
131            // This match is exhaustive. There are only 4 possible values after applying a mask of `0b11` and we cover
132            // all of them. This branch is mathematically impossible to hit, so we inform the compiler of this.
133            _ => unsafe { core::hint::unreachable_unchecked() },
134        };
135
136        // Bit-shift the lowest 2 bits away. These stored the number of bytes to read, and are not part of the value.
137        value >>= 2;
138        // Try to convert the decoded value to the requested type.
139        T::try_from(value).map_err(|_| varint_range_error::<T>(value))
140    }
141
142    /// Reads between 1 and 8 bytes from the buffer and decodes a single unsigned integer from them.
143    /// This integer must of been encoded in the variable length '[varuint]' format.
144    ///
145    /// [varuint]: https://docs.icerpc.dev/slice/language-guide/primitive-types#variable-size-integral-types
146    pub fn decode_varuint<T: TryFrom<u64>>(&mut self) -> Result<T> {
147        // Peek the next byte in the buffer. The lowest two bits of this byte tell us how many total bytes to read, so
148        // we can pick an appropriate decoding function. This works because 'varuint's are always encoded little-endian.
149        // See: https://docs.icerpc.dev/slice/encoding/primitive-types#variable-size-integral-types.
150        #[rustfmt::skip] // To keep the match arms aligned for readability.
151        let mut value = match self.peek_byte()? & 0b11 {
152            0b00 =>  u8::decode_from(self)? as u64,
153            0b01 => u16::decode_from(self)? as u64,
154            0b10 => u32::decode_from(self)? as u64,
155            0b11 => u64::decode_from(self)?,
156
157            // # SAFETY
158            // This match is exhaustive. There are only 4 possible values after applying a mask of `0b11` and we cover
159            // all of them. This branch is mathematically impossible to hit, so we inform the compiler of this.
160            _ => unsafe { core::hint::unreachable_unchecked() },
161        };
162
163        // Bit-shift the lowest 2 bits away. These stored the number of bytes to read, and are not part of the value.
164        value >>= 2;
165        // Try to convert the decoded value to the requested type.
166        T::try_from(value).map_err(|_| varuint_range_error::<T>(value))
167    }
168
169    /// An alias for `[decode_varuint]` to increase readability.
170    pub fn decode_size(&mut self) -> Result<usize> {
171        self.decode_varuint()
172    }
173
174    /// Skips any remaining tagged fields.
175    pub fn skip_tagged_fields(&mut self) -> Result<()> {
176        // Continue decoding tags until we hit 'TAG_END_MARKER'.
177        while self.decode_varint::<i32>()? != TAG_END_MARKER {
178            // Skip over the tagged field.
179            let field_size = self.decode_size()?;
180            self.read_byte_slice_exact(field_size)?;
181        }
182
183        Ok(())
184    }
185}
186
187// =============================================================================
188// Sequence type implementations
189// =============================================================================
190
191#[cfg(feature = "alloc")]
192/// TODO
193impl DecodeFrom for String {
194    fn decode_from(decoder: &mut Decoder<impl InputSource>) -> Result<Self> {
195        // Decode how many bytes are in this string, and attempt to allocate a vec with the necessary capacity.
196        let length = decoder.decode_varuint()?;
197        let mut vector = Vec::new();
198        vector.try_reserve_exact(length)?;
199
200        // Read 'length'-many bytes into the vector, and attempt to decode them as a utf-8 string.
201        unsafe {
202            debug_assert_eq!(vector.len(), 0);
203            let bytes =
204                core::mem::transmute::<&mut [core::mem::MaybeUninit<u8>], &mut [u8]>(vector.spare_capacity_mut());
205            decoder.read_bytes_into_exact(bytes)?;
206            vector.set_len(length);
207        }
208
209        let string = String::from_utf8(vector)?;
210        Ok(string)
211    }
212}
213
214#[cfg(feature = "alloc")]
215impl<T> DecodeFrom for Vec<T>
216where
217    T: DecodeFrom,
218{
219    /// TODO
220    fn decode_from(decoder: &mut Decoder<impl InputSource>) -> Result<Self> {
221        // Decode how many elements are in this sequence, and attempt to allocate a vec with the necessary capacity.
222        let length = decoder.decode_varuint()?;
223        let mut vector = Vec::new();
224        vector.try_reserve_exact(length)?;
225
226        // Decode each element, and push them into the vector, one by one.
227        for _ in 0..length {
228            let element = decoder.decode()?;
229            vector.push(element);
230        }
231        Ok(vector)
232    }
233}
234
235// =============================================================================
236// Dictionary type implementations
237// =============================================================================
238
239#[cfg(feature = "std")]
240impl<K, V> DecodeFrom for HashMap<K, V>
241where
242    K: DecodeFrom + Debug + Eq + Hash,
243    V: DecodeFrom,
244{
245    /// TODO
246    fn decode_from(decoder: &mut Decoder<impl InputSource>) -> Result<Self> {
247        // Decode how many entries are in this dictionary, and attempt to allocate a map with the necessary capacity.
248        let length = decoder.decode_varuint()?;
249        let mut map = HashMap::new();
250        map.try_reserve(length)?;
251
252        // Decode 'length'-many entries into the map.
253        decode_dictionary_entries!(map, decoder, length);
254        Ok(map)
255    }
256}
257
258#[cfg(feature = "alloc")]
259impl<K, V> DecodeFrom for BTreeMap<K, V>
260where
261    K: DecodeFrom + Debug + Ord,
262    V: DecodeFrom,
263{
264    /// TODO
265    fn decode_from(decoder: &mut Decoder<impl InputSource>) -> Result<Self> {
266        // Decode how many entries are in this dictionary, and attempt to allocate a map with the necessary capacity.
267        let length = decoder.decode_varuint()?;
268        let mut map = BTreeMap::new();
269
270        // Decode 'length'-many entries into the map.
271        decode_dictionary_entries!(map, decoder, length);
272        Ok(map)
273    }
274}
275
276// TODO add support for optional sequences and dictionaries