slice_codec/decoding.rs
1// Copyright (c) ZeroC, Inc.
2
3use crate::buffer::InputSource;
4use crate::decode_from::*;
5use crate::decoder::Decoder;
6use crate::{Error, InvalidDataErrorKind, Result};
7
8// We only support `String`, `Vec`, and `BTreeMap` if the `alloc` crate is available through the `alloc` feature flag.
9#[cfg(feature = "alloc")]
10use alloc::collections::BTreeMap;
11#[cfg(feature = "alloc")]
12use alloc::string::String;
13#[cfg(feature = "alloc")]
14use alloc::vec::Vec;
15#[cfg(feature = "alloc")]
16use core::fmt::Debug;
17
18// We only support `HashMap` if the standard library is available through the `std` feature flag.
19#[cfg(feature = "std")]
20use std::collections::HashMap;
21#[cfg(feature = "std")]
22use std::hash::Hash;
23
24/// TAG_END_MARKER must be encoded at the end of every non-compact type.
25const TAG_END_MARKER: i32 = -1;
26
27// =============================================================================
28// Fixed-length type implementations
29// =============================================================================
30
31/// TODO
32fn illegal_bool_error(value: u8) -> Error {
33 let error = InvalidDataErrorKind::IllegalValue {
34 desc: "bools can only have a numeric value of either '0' or '1'",
35 value: Some(value as i128),
36 };
37 error.into()
38}
39
40impl DecodeFrom for bool {
41 /// Reads a single byte from the buffer and returns `false` if it is `0` or `true` if it is `1`.
42 /// If the byte has any other value, an error is returned instead.
43 fn decode_from(decoder: &mut Decoder<impl InputSource>) -> crate::Result<Self> {
44 let byte = decoder.read_byte()?;
45
46 // We strictly enforce the Slice spec; A bool _must_ be encoded as either `0` or `1`.
47 match byte {
48 0 | 1 => Ok(byte != 0),
49 _ => Err(illegal_bool_error(byte)),
50 }
51 }
52}
53
54impl DecodeFrom for u8 {
55 /// Reads a single byte directly from the buffer and returns it, as is.
56 fn decode_from(decoder: &mut Decoder<impl InputSource>) -> crate::Result<Self> {
57 decoder.read_byte()
58 }
59}
60
61impl DecodeFrom for i8 {
62 /// Reads a single byte directly from the buffer and returns it, as-if it was an `i8`.
63 fn decode_from(decoder: &mut Decoder<impl InputSource>) -> crate::Result<Self> {
64 // In Rust, signed-integers are guaranteed to use a two's complement representation in memory.
65 // Casting between `u8` and `i8` is no-op, and doesn't change this representation, or the sign bit.
66 let byte = decoder.read_byte()?;
67 Ok(byte as i8)
68 }
69}
70
71implement_decode_from_on_numeric_primitive_type! {u16, "Decodes a [`u16`] from 2 bytes (little endian)."}
72implement_decode_from_on_numeric_primitive_type! {i16, "Decodes a [`i16`] from 2 bytes (little endian) in two's complement form."}
73implement_decode_from_on_numeric_primitive_type! {u32, "Decodes a [`u32`] from 4 bytes (little endian)."}
74implement_decode_from_on_numeric_primitive_type! {i32, "Decodes a [`i32`] from 4 bytes (little endian) in two's complement form."}
75implement_decode_from_on_numeric_primitive_type! {u64, "Decodes a [`u64`] from 8 bytes (little endian)."}
76implement_decode_from_on_numeric_primitive_type! {i64, "Decodes a [`i64`] from 8 bytes (little endian) in two's complement form."}
77implement_decode_from_on_numeric_primitive_type! {f32, "Decodes a [`f32`] from 4 bytes (little endian) using the \"binary32\" representation defined in IEEE 754-2008."}
78implement_decode_from_on_numeric_primitive_type! {f64, "Decodes a [`f64`] from 8 bytes (little endian) using the \"binary64\" representation defined in IEEE 754-2008."}
79
80// =============================================================================
81// Variable-length integer type implementations
82// =============================================================================
83
84/// TODO
85// TODO this isn't great. It assumes `T` is a signed integer, and has a size less than `u32::MAX`. For sane users,
86// these will always be true. But if these assumptions don't hold, the 'min' and 'max' this reports will be wrong.
87fn varint_range_error<T>(value: i64) -> Error {
88 let size = core::mem::size_of::<T>() as u32;
89 let shift_count = i128::BITS - (size * 8);
90 let error = InvalidDataErrorKind::OutOfRange {
91 value: value as i128,
92 min: i128::MIN >> shift_count,
93 max: i128::MAX >> shift_count,
94 typename: core::any::type_name::<T>(),
95 };
96 error.into()
97}
98
99/// TODO
100// TODO this isn't great. It assumes `T` is an unsigned integer, and has a size less than `u32::MAX`. For sane users,
101// these will always be true. But if these assumptions don't hold, the 'min' and 'max' this reports will be wrong.
102fn varuint_range_error<T>(value: u64) -> Error {
103 let size = core::mem::size_of::<T>() as u32;
104 let shift_count = u128::BITS - (size * 8);
105 let error = InvalidDataErrorKind::OutOfRange {
106 value: value as i128,
107 min: 0,
108 max: (u128::MAX >> shift_count) as i128,
109 typename: core::any::type_name::<T>(),
110 };
111 error.into()
112}
113
114impl<I: InputSource> Decoder<I> {
115 /// Reads between 1 and 8 bytes from the buffer and decodes a single signed integer from them.
116 /// This integer must of been encoded in the variable length '[varint]' format.
117 ///
118 /// [varint]: https://docs.icerpc.dev/slice/language-guide/primitive-types#variable-size-integral-types
119 pub fn decode_varint<T: TryFrom<i64>>(&mut self) -> Result<T> {
120 // Peek the next byte in the buffer. The lowest two bits of this byte tell us how many total bytes to read, so
121 // we can pick an appropriate decoding function. This works because 'varint's are always encoded little-endian.
122 // See: https://docs.icerpc.dev/slice/encoding/primitive-types#variable-size-integral-types.
123 #[rustfmt::skip] // To keep the match arms aligned for readability.
124 let mut value = match self.peek_byte()? & 0b11 {
125 0b00 => i8::decode_from(self)? as i64,
126 0b01 => i16::decode_from(self)? as i64,
127 0b10 => i32::decode_from(self)? as i64,
128 0b11 => i64::decode_from(self)?,
129
130 // # SAFETY
131 // This match is exhaustive. There are only 4 possible values after applying a mask of `0b11` and we cover
132 // all of them. This branch is mathematically impossible to hit, so we inform the compiler of this.
133 _ => unsafe { core::hint::unreachable_unchecked() },
134 };
135
136 // Bit-shift the lowest 2 bits away. These stored the number of bytes to read, and are not part of the value.
137 value >>= 2;
138 // Try to convert the decoded value to the requested type.
139 T::try_from(value).map_err(|_| varint_range_error::<T>(value))
140 }
141
142 /// Reads between 1 and 8 bytes from the buffer and decodes a single unsigned integer from them.
143 /// This integer must of been encoded in the variable length '[varuint]' format.
144 ///
145 /// [varuint]: https://docs.icerpc.dev/slice/language-guide/primitive-types#variable-size-integral-types
146 pub fn decode_varuint<T: TryFrom<u64>>(&mut self) -> Result<T> {
147 // Peek the next byte in the buffer. The lowest two bits of this byte tell us how many total bytes to read, so
148 // we can pick an appropriate decoding function. This works because 'varuint's are always encoded little-endian.
149 // See: https://docs.icerpc.dev/slice/encoding/primitive-types#variable-size-integral-types.
150 #[rustfmt::skip] // To keep the match arms aligned for readability.
151 let mut value = match self.peek_byte()? & 0b11 {
152 0b00 => u8::decode_from(self)? as u64,
153 0b01 => u16::decode_from(self)? as u64,
154 0b10 => u32::decode_from(self)? as u64,
155 0b11 => u64::decode_from(self)?,
156
157 // # SAFETY
158 // This match is exhaustive. There are only 4 possible values after applying a mask of `0b11` and we cover
159 // all of them. This branch is mathematically impossible to hit, so we inform the compiler of this.
160 _ => unsafe { core::hint::unreachable_unchecked() },
161 };
162
163 // Bit-shift the lowest 2 bits away. These stored the number of bytes to read, and are not part of the value.
164 value >>= 2;
165 // Try to convert the decoded value to the requested type.
166 T::try_from(value).map_err(|_| varuint_range_error::<T>(value))
167 }
168
169 /// An alias for `[decode_varuint]` to increase readability.
170 pub fn decode_size(&mut self) -> Result<usize> {
171 self.decode_varuint()
172 }
173
174 /// Skips any remaining tagged fields.
175 pub fn skip_tagged_fields(&mut self) -> Result<()> {
176 // Continue decoding tags until we hit 'TAG_END_MARKER'.
177 while self.decode_varint::<i32>()? != TAG_END_MARKER {
178 // Skip over the tagged field.
179 let field_size = self.decode_size()?;
180 self.read_byte_slice_exact(field_size)?;
181 }
182
183 Ok(())
184 }
185}
186
187// =============================================================================
188// Sequence type implementations
189// =============================================================================
190
191#[cfg(feature = "alloc")]
192/// TODO
193impl DecodeFrom for String {
194 fn decode_from(decoder: &mut Decoder<impl InputSource>) -> Result<Self> {
195 // Decode how many bytes are in this string, and attempt to allocate a vec with the necessary capacity.
196 let length = decoder.decode_varuint()?;
197 let mut vector = Vec::new();
198 vector.try_reserve_exact(length)?;
199
200 // Read 'length'-many bytes into the vector, and attempt to decode them as a utf-8 string.
201 unsafe {
202 debug_assert_eq!(vector.len(), 0);
203 let bytes =
204 core::mem::transmute::<&mut [core::mem::MaybeUninit<u8>], &mut [u8]>(vector.spare_capacity_mut());
205 decoder.read_bytes_into_exact(bytes)?;
206 vector.set_len(length);
207 }
208
209 let string = String::from_utf8(vector)?;
210 Ok(string)
211 }
212}
213
214#[cfg(feature = "alloc")]
215impl<T> DecodeFrom for Vec<T>
216where
217 T: DecodeFrom,
218{
219 /// TODO
220 fn decode_from(decoder: &mut Decoder<impl InputSource>) -> Result<Self> {
221 // Decode how many elements are in this sequence, and attempt to allocate a vec with the necessary capacity.
222 let length = decoder.decode_varuint()?;
223 let mut vector = Vec::new();
224 vector.try_reserve_exact(length)?;
225
226 // Decode each element, and push them into the vector, one by one.
227 for _ in 0..length {
228 let element = decoder.decode()?;
229 vector.push(element);
230 }
231 Ok(vector)
232 }
233}
234
235// =============================================================================
236// Dictionary type implementations
237// =============================================================================
238
239#[cfg(feature = "std")]
240impl<K, V> DecodeFrom for HashMap<K, V>
241where
242 K: DecodeFrom + Debug + Eq + Hash,
243 V: DecodeFrom,
244{
245 /// TODO
246 fn decode_from(decoder: &mut Decoder<impl InputSource>) -> Result<Self> {
247 // Decode how many entries are in this dictionary, and attempt to allocate a map with the necessary capacity.
248 let length = decoder.decode_varuint()?;
249 let mut map = HashMap::new();
250 map.try_reserve(length)?;
251
252 // Decode 'length'-many entries into the map.
253 decode_dictionary_entries!(map, decoder, length);
254 Ok(map)
255 }
256}
257
258#[cfg(feature = "alloc")]
259impl<K, V> DecodeFrom for BTreeMap<K, V>
260where
261 K: DecodeFrom + Debug + Ord,
262 V: DecodeFrom,
263{
264 /// TODO
265 fn decode_from(decoder: &mut Decoder<impl InputSource>) -> Result<Self> {
266 // Decode how many entries are in this dictionary, and attempt to allocate a map with the necessary capacity.
267 let length = decoder.decode_varuint()?;
268 let mut map = BTreeMap::new();
269
270 // Decode 'length'-many entries into the map.
271 decode_dictionary_entries!(map, decoder, length);
272 Ok(map)
273 }
274}
275
276// TODO add support for optional sequences and dictionaries