Skip to main content

tidecoin_consensus_encoding/
compact_size.rs

1// SPDX-License-Identifier: CC0-1.0
2
3//! Compact size codec.
4//!
5//! Compact size is a variable-length integer encoding used throughout the Tidecoin
6//! consensus protocol, usually to encode collection lengths. However, there are
7//! also some unique non-length use cases.
8
9use internals::array_vec::ArrayVec;
10
11use crate::decode::Decoder;
12use crate::encode::{Encoder, ExactSizeEncoder};
13use crate::error::{
14    CompactSizeDecoderError, CompactSizeDecoderErrorInner, LengthPrefixExceedsMaxError,
15};
16
17/// Maximum size, in bytes, of a vector we are allowed to decode.
18///
19/// This is also the default value limit that can be decoded with a decoder from
20/// [`CompactSizeDecoder::new`].
21pub(crate) const MAX_VEC_SIZE: usize = 4_000_000;
22
23/// The maximum length of a compact size encoding.
24const SIZE: usize = 9;
25
26/// Compact size prefix byte indicating a 2-byte `u16` payload follows.
27const PREFIX_U16: u8 = 0xFD;
28/// Compact size prefix byte indicating a 4-byte `u32` payload follows.
29const PREFIX_U32: u8 = 0xFE;
30/// Compact size prefix byte indicating an 8-byte `u64` payload follows.
31const PREFIX_U64: u8 = 0xFF;
32
33/// Encoder for a compact size encoded integer.
34#[derive(Debug, Clone)]
35pub struct CompactSizeEncoder {
36    buf: Option<ArrayVec<u8, SIZE>>,
37}
38
39impl CompactSizeEncoder {
40    /// Constructs a new `CompactSizeEncoder` for a length prefix.
41    ///
42    /// The `usize` type is the natural Rust type for lengths and collection sizes, which is the
43    /// dominant use case for compact size encoding in the Tidecoin protocol. Prefer this constructor
44    /// whenever you are encoding the length of a collection or a byte slice.
45    ///
46    /// Compact size encodings are defined only over the `u64` range. On exotic platforms where
47    /// `usize` is wider than 64 bits the value will be saturated to [`u64::MAX`], but in practice
48    /// any in-memory length that could actually be passed here is well within the `u64` range.
49    ///
50    /// If you need to encode an arbitrary `u64` integer that is not a length prefix, use
51    /// [`Self::new_u64`] instead.
52    pub fn new(value: usize) -> Self {
53        Self { buf: Some(Self::encode(u64::try_from(value).unwrap_or(u64::MAX))) }
54    }
55
56    /// Constructs a new `CompactSizeEncoder` for an arbitrary `u64` integer.
57    ///
58    /// Prefer [`Self::new`] unless you are encoding a non-length integer.
59    ///
60    /// A small number of fields in the Tidecoin protocol are compact-size-encoded integers that are
61    /// not collection lengths (e.g. service flags). Use this constructor for those cases, where the
62    /// natural type of the value is `u64` rather than `usize`.
63    pub fn new_u64(value: u64) -> Self {
64        Self { buf: Some(Self::encode(value)) }
65    }
66
67    /// Returns the number of bytes used to encode this `CompactSize` value.
68    ///
69    /// # Returns
70    ///
71    /// - 1 for 0..=0xFC
72    /// - 3 for 0xFD..=(2^16-1)
73    /// - 5 for 0x10000..=(2^32-1)
74    /// - 9 otherwise.
75    #[inline]
76    pub const fn encoded_size(value: usize) -> usize {
77        match value {
78            0..=0xFC => 1,
79            0xFD..=0xFFFF => 3,
80            0x10000..=0xFFFF_FFFF => 5,
81            _ => 9,
82        }
83    }
84
85    /// Encodes `CompactSize` without allocating.
86    #[inline]
87    fn encode(value: u64) -> ArrayVec<u8, SIZE> {
88        let mut res = ArrayVec::<u8, SIZE>::new();
89        match value {
90            0..=0xFC => {
91                res.push(value as u8); // Cast ok because of match.
92            }
93            0xFD..=0xFFFF => {
94                let v = value as u16; // Cast ok because of match.
95                res.push(PREFIX_U16);
96                res.extend_from_slice(&v.to_le_bytes());
97            }
98            0x10000..=0xFFFF_FFFF => {
99                let v = value as u32; // Cast ok because of match.
100                res.push(PREFIX_U32);
101                res.extend_from_slice(&v.to_le_bytes());
102            }
103            _ => {
104                res.push(PREFIX_U64);
105                res.extend_from_slice(&value.to_le_bytes());
106            }
107        }
108        res
109    }
110}
111
112impl Encoder for CompactSizeEncoder {
113    #[inline]
114    fn current_chunk(&self) -> &[u8] {
115        self.buf.as_ref().map(|b| &b[..]).unwrap_or_default()
116    }
117
118    #[inline]
119    fn advance(&mut self) -> bool {
120        self.buf = None;
121        false
122    }
123}
124
125impl ExactSizeEncoder for CompactSizeEncoder {
126    #[inline]
127    fn len(&self) -> usize {
128        self.buf.map_or(0, |buf| buf.len())
129    }
130}
131
132/// Decodes a compact size encoded integer as a length prefix.
133///
134/// The decoded value is returned as a `usize` and is bounded by a configurable limit (default:
135/// 4,000,000). This limit is a denial-of-service protection: a malicious peer can send a compact
136/// size value up to 2^64-1, and without a limit check the caller might attempt to allocate an
137/// enormous buffer based on that value. [`CompactSizeDecoder`] prevents this by rejecting values
138/// that exceed the limit before returning them to the caller.
139///
140/// If you are decoding an arbitrary `u64` integer that is genuinely not a length prefix, use
141/// [`CompactSizeU64Decoder`] instead.
142///
143/// For more information about decoders see the documentation of the [`Decoder`] trait.
144#[derive(Debug, Clone)]
145pub struct CompactSizeDecoder {
146    buf: ArrayVec<u8, 9>,
147    limit: usize,
148}
149
150impl CompactSizeDecoder {
151    /// Constructs a new compact size decoder with the default length limit.
152    ///
153    /// The decoded value must not exceed 4,000,000 and must fit in a `usize`, otherwise
154    /// [`end`](Self::end) will return an error. This default limit reflects the maximum sensible
155    /// vector length under the 4 MB block weight limit.
156    pub const fn new() -> Self {
157        Self { buf: ArrayVec::new(), limit: MAX_VEC_SIZE }
158    }
159
160    /// Constructs a new compact size decoder with a custom length limit.
161    ///
162    /// The decoded value must not exceed `limit`, otherwise [`end`](Self::end) will return an
163    /// error. Use this when you know the field you are decoding has a tighter bound than the
164    /// default limit of 4,000,000.
165    pub const fn new_with_limit(limit: usize) -> Self {
166        Self { buf: ArrayVec::new(), limit }
167    }
168}
169
170impl Default for CompactSizeDecoder {
171    fn default() -> Self {
172        Self::new()
173    }
174}
175
176impl Decoder for CompactSizeDecoder {
177    type Output = usize;
178    type Error = CompactSizeDecoderError;
179
180    fn push_bytes(&mut self, bytes: &mut &[u8]) -> Result<bool, Self::Error> {
181        Ok(compact_size_push_bytes(&mut self.buf, bytes))
182    }
183
184    fn end(self) -> Result<Self::Output, Self::Error> {
185        use CompactSizeDecoderErrorInner as E;
186
187        let dec_value = compact_size_decode_u64(&self.buf)?;
188
189        // This error is returned if dec_value is outside of the usize range, or
190        // if it is above the given limit.
191        let make_err = || {
192            CompactSizeDecoderError(E::ValueExceedsLimit(LengthPrefixExceedsMaxError {
193                value: dec_value,
194                limit: self.limit,
195            }))
196        };
197
198        usize::try_from(dec_value).map_err(|_| make_err()).and_then(|nsize| {
199            if nsize > self.limit {
200                Err(make_err())
201            } else {
202                Ok(nsize)
203            }
204        })
205    }
206
207    fn read_limit(&self) -> usize {
208        compact_size_read_limit(&self.buf)
209    }
210}
211
212/// Decodes a compact size encoded integer as a raw `u64`.
213///
214/// If you are decoding a length prefix, you probably want [`CompactSizeDecoder`] instead.
215///
216/// This decoder performs no limit check and no conversion to `usize`. It exists for the small
217/// number of Tidecoin protocol fields that are compact-size-encoded integers but are not length
218/// prefixes (e.g. service flags in the `version` message). For those fields the full `u64` range is
219/// meaningful and there is no associated allocation whose size would be controlled by the decoded
220/// value.
221///
222/// # Denial-of-service warning
223///
224/// Do not use this decoder for length prefixes. If the decoded value is used to size an allocation,
225/// for example as the length of a `Vec`, a malicious peer can send a compact size value of up to
226/// 2^64-1 and cause an out-of-memory condition. [`CompactSizeDecoder`] prevents this by enforcing a
227/// configurable upper bound before returning the value.
228///
229/// For more information about decoders see the documentation of the [`Decoder`] trait.
230#[derive(Debug, Clone)]
231pub struct CompactSizeU64Decoder {
232    buf: ArrayVec<u8, 9>,
233}
234
235impl CompactSizeU64Decoder {
236    /// Constructs a new `CompactSizeU64Decoder`.
237    ///
238    /// See the [struct-level documentation](Self) for guidance on when to use this decoder versus
239    /// [`CompactSizeDecoder`].
240    pub const fn new() -> Self {
241        Self { buf: ArrayVec::new() }
242    }
243}
244
245impl Default for CompactSizeU64Decoder {
246    fn default() -> Self {
247        Self::new()
248    }
249}
250
251impl Decoder for CompactSizeU64Decoder {
252    type Output = u64;
253    type Error = CompactSizeDecoderError;
254
255    fn push_bytes(&mut self, bytes: &mut &[u8]) -> Result<bool, Self::Error> {
256        Ok(compact_size_push_bytes(&mut self.buf, bytes))
257    }
258
259    fn end(self) -> Result<Self::Output, Self::Error> {
260        compact_size_decode_u64(&self.buf)
261    }
262
263    fn read_limit(&self) -> usize {
264        compact_size_read_limit(&self.buf)
265    }
266}
267
268/// Pushes bytes into a compact size buffer, returning true if more bytes are needed.
269fn compact_size_push_bytes(buf: &mut ArrayVec<u8, 9>, bytes: &mut &[u8]) -> bool {
270    if bytes.is_empty() {
271        return true;
272    }
273
274    if buf.is_empty() {
275        buf.push(bytes[0]);
276        *bytes = &bytes[1..];
277    }
278    let len = match buf[0] {
279        PREFIX_U64 => 9,
280        PREFIX_U32 => 5,
281        PREFIX_U16 => 3,
282        _ => 1,
283    };
284    let to_copy = bytes.len().min(len - buf.len());
285    buf.extend_from_slice(&bytes[..to_copy]);
286    *bytes = &bytes[to_copy..];
287
288    buf.len() != len
289}
290
291/// Returns the number of bytes the compact size decoder still needs to read.
292fn compact_size_read_limit(buf: &ArrayVec<u8, 9>) -> usize {
293    match buf.len() {
294        0 => 1,
295        already_read => match buf[0] {
296            PREFIX_U64 => 9_usize.saturating_sub(already_read),
297            PREFIX_U32 => 5_usize.saturating_sub(already_read),
298            PREFIX_U16 => 3_usize.saturating_sub(already_read),
299            _ => 0,
300        },
301    }
302}
303
304/// Decodes a compact size buffer to a u64, checking for minimal encoding.
305fn compact_size_decode_u64(buf: &ArrayVec<u8, 9>) -> Result<u64, CompactSizeDecoderError> {
306    use CompactSizeDecoderErrorInner as E;
307
308    fn arr<const N: usize>(slice: &[u8]) -> Result<[u8; N], CompactSizeDecoderError> {
309        slice.try_into().map_err(|_| {
310            CompactSizeDecoderError(E::UnexpectedEof { required: N, received: slice.len() })
311        })
312    }
313
314    let (first, payload) = buf
315        .split_first()
316        .ok_or(CompactSizeDecoderError(E::UnexpectedEof { required: 1, received: 0 }))?;
317
318    match *first {
319        PREFIX_U64 => {
320            let x = u64::from_le_bytes(arr(payload)?);
321            if x < 0x100_000_000 {
322                Err(CompactSizeDecoderError(E::NonMinimal { value: x }))
323            } else {
324                Ok(x)
325            }
326        }
327        PREFIX_U32 => {
328            let x = u32::from_le_bytes(arr(payload)?);
329            if x < 0x10000 {
330                Err(CompactSizeDecoderError(E::NonMinimal { value: x.into() }))
331            } else {
332                Ok(x.into())
333            }
334        }
335        PREFIX_U16 => {
336            let x = u16::from_le_bytes(arr(payload)?);
337            if x < 0xFD {
338                Err(CompactSizeDecoderError(E::NonMinimal { value: x.into() }))
339            } else {
340                Ok(x.into())
341            }
342        }
343        n => Ok(n.into()),
344    }
345}
346
347#[cfg(test)]
348mod tests {
349    use super::*;
350
351    #[test]
352    fn encoded_value_1_byte() {
353        // Check lower bound, upper bound (and implicitly endian-ness).
354        for v in [0x00u64, 0x01, 0x02, 0xFA, 0xFB, 0xFC] {
355            assert_eq!(CompactSizeEncoder::encoded_size(v as usize), 1);
356            // Should be encoded as the value as a u8.
357            let want = [v as u8];
358            let got = CompactSizeEncoder::encode(v);
359            assert_eq!(got.as_slice().len(), 1); // sanity check
360            assert_eq!(got.as_slice(), want);
361        }
362    }
363
364    macro_rules! check_encode {
365        ($($test_name:ident, $size:expr, $value:expr, $want:expr);* $(;)?) => {
366            $(
367                #[test]
368                fn $test_name() {
369                    let value = $value as u64; // Because default integer type is i32.
370                    assert_eq!(CompactSizeEncoder::encoded_size(value as usize), $size);
371                    let got = CompactSizeEncoder::encode(value);
372                    assert_eq!(got.as_slice().len(), $size); // sanity check
373                    assert_eq!(got.as_slice(), &$want);
374                }
375            )*
376        }
377    }
378
379    check_encode! {
380        // 3 byte encoding.
381        encoded_value_3_byte_lower_bound, 3, 0xFD, [0xFD, 0xFD, 0x00]; // 0x00FD
382        encoded_value_3_byte_endianness, 3, 0xABCD, [0xFD, 0xCD, 0xAB];
383        encoded_value_3_byte_upper_bound, 3, 0xFFFF, [0xFD, 0xFF, 0xFF];
384        // 5 byte encoding.
385        encoded_value_5_byte_lower_bound, 5, 0x0001_0000, [0xFE, 0x00, 0x00, 0x01, 0x00];
386        encoded_value_5_byte_endianness, 5, 0x0123_4567, [0xFE, 0x67, 0x45, 0x23, 0x01];
387        encoded_value_5_byte_upper_bound, 5, 0xFFFF_FFFF, [0xFE, 0xFF, 0xFF, 0xFF, 0xFF];
388    }
389
390    // 9-byte encoding requires values above u32::MAX which don't fit in usize on 32-bit platforms.
391    #[cfg(target_pointer_width = "64")]
392    check_encode! {
393        encoded_value_9_byte_lower_bound, 9, 0x0000_0001_0000_0000u64, [0xFF, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00];
394        encoded_value_9_byte_endianness, 9, 0x0123_4567_89AB_CDEFu64, [0xFF, 0xEF, 0xCD, 0xAB, 0x89, 0x67, 0x45, 0x23, 0x01];
395        encoded_value_9_byte_upper_bound, 9, u64::MAX, [0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF];
396    }
397
398    #[test]
399    fn compact_size_new_values_too_large() {
400        use CompactSizeDecoderErrorInner as E;
401
402        const EXCESS_VEC_SIZE: u64 = (MAX_VEC_SIZE + 1) as u64; // can't use try_from for const
403
404        // MAX_VEC_SIZE should succeed for `new` constructor
405        let mut decoder = CompactSizeDecoder::new();
406        decoder.push_bytes(&mut [0xFE, 0x00, 0x09, 0x3D, 0x00].as_slice()).unwrap();
407        let got = decoder.end().unwrap();
408        assert_eq!(got, MAX_VEC_SIZE);
409
410        // MAX_VEC_SIZE + 1 should fail for `new` constructor
411        let mut decoder = CompactSizeDecoder::new();
412        decoder.push_bytes(&mut [0xFE, 0x01, 0x09, 0x3D, 0x00].as_slice()).unwrap();
413        let got = decoder.end().unwrap_err();
414        assert!(matches!(
415            got,
416            CompactSizeDecoderError(E::ValueExceedsLimit(LengthPrefixExceedsMaxError {
417                limit: MAX_VEC_SIZE,
418                value: EXCESS_VEC_SIZE,
419            })),
420        ));
421    }
422
423    #[test]
424    fn compact_size_new_with_limit_values_too_large() {
425        use CompactSizeDecoderErrorInner as E;
426
427        // 240 should succeed for `new_with_limit` constructor
428        let mut decoder = CompactSizeDecoder::new_with_limit(240);
429        decoder.push_bytes(&mut [0xf0].as_slice()).unwrap();
430        let got = decoder.end().unwrap();
431        assert_eq!(got, 240);
432
433        // 241 should fail for `new_with_limit` constructor
434        let mut decoder = CompactSizeDecoder::new_with_limit(240);
435        decoder.push_bytes(&mut [0xf1].as_slice()).unwrap();
436        let got = decoder.end().unwrap_err();
437        assert!(matches!(
438            got,
439            CompactSizeDecoderError(E::ValueExceedsLimit(LengthPrefixExceedsMaxError {
440                limit: 240,
441                value: 241,
442            })),
443        ));
444    }
445}