Skip to main content

bitcoin_consensus_encoding/
compact_size.rs

1// SPDX-License-Identifier: CC0-1.0
2
3//! Compact size codec.
4//!
5//! Compact size is a variable-length integer encoding used throughout the Bitcoin
6//! consensus protocol to encode collection lengths. However, there are also some
7//! unique non-length use cases.
8
9use internals::array_vec::ArrayVec;
10
11use crate::decode::Decoder;
12use crate::encode::{Encoder, ExactSizeEncoder};
13use crate::error::{
14    CompactSizeDecoderError, CompactSizeDecoderErrorInner, LengthPrefixExceedsMaxError,
15};
16
17/// Default maximum size of a decoded object in bytes.
18///
19/// Matches Bitcoin Core's default [serialization limit]. This is
20/// a high level anti-DoS limit which all bitcoin types should
21/// easily fit within.
22///
23/// [serialization limit]: https://github.com/bitcoin/bitcoin/blob/a7c29df0e5ace05b6186612671d6103c112ec922/src/serialize.h#L32
24const MAX_COMPACT_SIZE: usize = 0x0200_0000;
25
26/// The maximum length of a compact size encoding.
27const SIZE: usize = 9;
28
29/// Compact size prefix byte indicating a 2-byte `u16` payload follows.
30const PREFIX_U16: u8 = 0xFD;
31/// Compact size prefix byte indicating a 4-byte `u32` payload follows.
32const PREFIX_U32: u8 = 0xFE;
33/// Compact size prefix byte indicating an 8-byte `u64` payload follows.
34const PREFIX_U64: u8 = 0xFF;
35
36/// Encoder for a compact size encoded integer.
37#[derive(Debug, Clone)]
38pub struct CompactSizeEncoder {
39    buf: Option<ArrayVec<u8, SIZE>>,
40}
41
42impl CompactSizeEncoder {
43    /// Constructs a new `CompactSizeEncoder` for a length prefix.
44    ///
45    /// The `usize` type is the natural Rust type for lengths and collection sizes, which is the
46    /// dominant use case for compact size encoding in the Bitcoin protocol. Prefer this constructor
47    /// whenever you are encoding the length of a collection or a byte slice.
48    ///
49    /// Compact size encodings are defined only over the `u64` range. On exotic platforms where
50    /// `usize` is wider than 64 bits the value will be saturated to [`u64::MAX`], but in practice
51    /// any in-memory length that could actually be passed here is well within the `u64` range.
52    ///
53    /// If you need to encode an arbitrary `u64` integer that is not a length prefix, use
54    /// [`Self::new_u64`] instead.
55    pub fn new(value: usize) -> Self {
56        Self { buf: Some(Self::encode(u64::try_from(value).unwrap_or(u64::MAX))) }
57    }
58
59    /// Constructs a new `CompactSizeEncoder` for an arbitrary `u64` integer.
60    ///
61    /// Prefer [`Self::new`] unless you are encoding a non-length integer.
62    ///
63    /// A small number of fields in the Bitcoin protocol are compact-size-encoded integers that are
64    /// not collection lengths (e.g. service flags). Use this constructor for those cases, where the
65    /// natural type of the value is `u64` rather than `usize`.
66    pub fn new_u64(value: u64) -> Self { Self { buf: Some(Self::encode(value)) } }
67
68    /// Returns the number of bytes used to encode this `CompactSize` value.
69    ///
70    /// # Returns
71    ///
72    /// - 1 for 0..=0xFC
73    /// - 3 for 0xFD..=(2^16-1)
74    /// - 5 for 0x10000..=(2^32-1)
75    /// - 9 otherwise.
76    #[inline]
77    pub const fn encoded_size(value: usize) -> usize {
78        match value {
79            0..=0xFC => 1,
80            0xFD..=0xFFFF => 3,
81            0x10000..=0xFFFF_FFFF => 5,
82            _ => 9,
83        }
84    }
85
86    /// Encodes `CompactSize` without allocating.
87    #[inline]
88    fn encode(value: u64) -> ArrayVec<u8, SIZE> {
89        let mut res = ArrayVec::<u8, SIZE>::new();
90        match value {
91            0..=0xFC => {
92                res.push(value as u8); // Cast ok because of match.
93            }
94            0xFD..=0xFFFF => {
95                let v = value as u16; // Cast ok because of match.
96                res.push(PREFIX_U16);
97                res.extend_from_slice(&v.to_le_bytes());
98            }
99            0x10000..=0xFFFF_FFFF => {
100                let v = value as u32; // Cast ok because of match.
101                res.push(PREFIX_U32);
102                res.extend_from_slice(&v.to_le_bytes());
103            }
104            _ => {
105                res.push(PREFIX_U64);
106                res.extend_from_slice(&value.to_le_bytes());
107            }
108        }
109        res
110    }
111}
112
113impl Encoder for CompactSizeEncoder {
114    #[inline]
115    fn current_chunk(&self) -> &[u8] { self.buf.as_ref().map(|b| &b[..]).unwrap_or_default() }
116
117    #[inline]
118    fn advance(&mut self) -> bool {
119        self.buf = None;
120        false
121    }
122}
123
124impl ExactSizeEncoder for CompactSizeEncoder {
125    #[inline]
126    fn len(&self) -> usize { self.buf.map_or(0, |buf| buf.len()) }
127}
128
129/// Decodes a compact size encoded integer as a length prefix.
130///
131/// The decoded value is returned as a `usize` and is bounded by a configurable limit (default:
132/// 4,000,000). This limit is a denial-of-service protection: a malicious peer can send a compact
133/// size value up to 2^64-1, and without a limit check the caller might attempt to allocate an
134/// enormous buffer based on that value. [`CompactSizeDecoder`] prevents this by rejecting values
135/// that exceed the limit before returning them to the caller.
136///
137/// If you are decoding an arbitrary `u64` integer that is genuinely not a length prefix, use
138/// [`CompactSizeU64Decoder`] instead.
139///
140/// For more information about decoders see the documentation of the [`Decoder`] trait.
141#[derive(Debug, Clone)]
142pub struct CompactSizeDecoder {
143    buf: ArrayVec<u8, 9>,
144    limit: usize,
145}
146
147impl CompactSizeDecoder {
148    /// Constructs a new compact size decoder with the default 32MB length limit.
149    pub const fn new() -> Self { Self { buf: ArrayVec::new(), limit: MAX_COMPACT_SIZE } }
150
151    /// Constructs a new compact size decoder with a custom length limit.
152    ///
153    /// The decoded value must not exceed `limit`, otherwise [`end`](Self::end) will return an
154    /// error. Use this when you know the field you are decoding has a tighter bound than the
155    /// default limit of 32MB.
156    pub const fn new_with_limit(limit: usize) -> Self { Self { buf: ArrayVec::new(), limit } }
157}
158
159impl Default for CompactSizeDecoder {
160    fn default() -> Self { Self::new() }
161}
162
163impl Decoder for CompactSizeDecoder {
164    type Output = usize;
165    type Error = CompactSizeDecoderError;
166
167    fn push_bytes(&mut self, bytes: &mut &[u8]) -> Result<bool, Self::Error> {
168        Ok(compact_size_push_bytes(&mut self.buf, bytes))
169    }
170
171    fn end(self) -> Result<Self::Output, Self::Error> {
172        use CompactSizeDecoderErrorInner as E;
173
174        let dec_value = compact_size_decode_u64(&self.buf)?;
175
176        // This error is returned if dec_value is outside of the usize range, or
177        // if it is above the given limit.
178        let make_err = || {
179            CompactSizeDecoderError(E::ValueExceedsLimit(LengthPrefixExceedsMaxError {
180                value: dec_value,
181                limit: self.limit,
182            }))
183        };
184
185        usize::try_from(dec_value).map_err(|_| make_err()).and_then(|nsize| {
186            if nsize > self.limit {
187                Err(make_err())
188            } else {
189                Ok(nsize)
190            }
191        })
192    }
193
194    fn read_limit(&self) -> usize { compact_size_read_limit(&self.buf) }
195}
196
197/// Decodes a compact size encoded integer as a raw `u64`.
198///
199/// If you are decoding a length prefix, you probably want [`CompactSizeDecoder`] instead.
200///
201/// This decoder performs no limit check and no conversion to `usize`. It exists for the small
202/// number of Bitcoin protocol fields that are compact-size-encoded integers but are not length
203/// prefixes (e.g. service flags in the `version` message). For those fields the full `u64` range is
204/// meaningful and there is no associated allocation whose size would be controlled by the decoded
205/// value.
206///
207/// # Denial-of-service warning
208///
209/// Do not use this decoder for length prefixes. If the decoded value is used to size an allocation,
210/// for example as the length of a `Vec`, a malicious peer can send a compact size value of up to
211/// 2^64-1 and cause an out-of-memory condition. [`CompactSizeDecoder`] prevents this by enforcing a
212/// configurable upper bound before returning the value.
213///
214/// For more information about decoders see the documentation of the [`Decoder`] trait.
215#[derive(Debug, Clone)]
216pub struct CompactSizeU64Decoder {
217    buf: ArrayVec<u8, 9>,
218}
219
220impl CompactSizeU64Decoder {
221    /// Constructs a new `CompactSizeU64Decoder`.
222    ///
223    /// See the [struct-level documentation](Self) for guidance on when to use this decoder versus
224    /// [`CompactSizeDecoder`].
225    pub const fn new() -> Self { Self { buf: ArrayVec::new() } }
226}
227
228impl Default for CompactSizeU64Decoder {
229    fn default() -> Self { Self::new() }
230}
231
232impl Decoder for CompactSizeU64Decoder {
233    type Output = u64;
234    type Error = CompactSizeDecoderError;
235
236    fn push_bytes(&mut self, bytes: &mut &[u8]) -> Result<bool, Self::Error> {
237        Ok(compact_size_push_bytes(&mut self.buf, bytes))
238    }
239
240    fn end(self) -> Result<Self::Output, Self::Error> { compact_size_decode_u64(&self.buf) }
241
242    fn read_limit(&self) -> usize { compact_size_read_limit(&self.buf) }
243}
244
245/// Pushes bytes into a compact size buffer, returning true if more bytes are needed.
246fn compact_size_push_bytes(buf: &mut ArrayVec<u8, 9>, bytes: &mut &[u8]) -> bool {
247    if bytes.is_empty() {
248        return true;
249    }
250
251    if buf.is_empty() {
252        buf.push(bytes[0]);
253        *bytes = &bytes[1..];
254    }
255    let len = match buf[0] {
256        PREFIX_U64 => 9,
257        PREFIX_U32 => 5,
258        PREFIX_U16 => 3,
259        _ => 1,
260    };
261    let to_copy = bytes.len().min(len - buf.len());
262    buf.extend_from_slice(&bytes[..to_copy]);
263    *bytes = &bytes[to_copy..];
264
265    buf.len() != len
266}
267
268/// Returns the number of bytes the compact size decoder still needs to read.
269fn compact_size_read_limit(buf: &ArrayVec<u8, 9>) -> usize {
270    match buf.len() {
271        0 => 1,
272        already_read => match buf[0] {
273            PREFIX_U64 => 9_usize.saturating_sub(already_read),
274            PREFIX_U32 => 5_usize.saturating_sub(already_read),
275            PREFIX_U16 => 3_usize.saturating_sub(already_read),
276            _ => 0,
277        },
278    }
279}
280
281/// Decodes a compact size buffer to a u64, checking for minimal encoding.
282fn compact_size_decode_u64(buf: &ArrayVec<u8, 9>) -> Result<u64, CompactSizeDecoderError> {
283    use CompactSizeDecoderErrorInner as E;
284
285    fn arr<const N: usize>(slice: &[u8]) -> Result<[u8; N], CompactSizeDecoderError> {
286        slice.try_into().map_err(|_| {
287            CompactSizeDecoderError(E::UnexpectedEof { required: N, received: slice.len() })
288        })
289    }
290
291    let (first, payload) = buf
292        .split_first()
293        .ok_or(CompactSizeDecoderError(E::UnexpectedEof { required: 1, received: 0 }))?;
294
295    match *first {
296        PREFIX_U64 => {
297            let x = u64::from_le_bytes(arr(payload)?);
298            if x < 0x100_000_000 {
299                Err(CompactSizeDecoderError(E::NonMinimal { value: x }))
300            } else {
301                Ok(x)
302            }
303        }
304        PREFIX_U32 => {
305            let x = u32::from_le_bytes(arr(payload)?);
306            if x < 0x10000 {
307                Err(CompactSizeDecoderError(E::NonMinimal { value: x.into() }))
308            } else {
309                Ok(x.into())
310            }
311        }
312        PREFIX_U16 => {
313            let x = u16::from_le_bytes(arr(payload)?);
314            if x < 0xFD {
315                Err(CompactSizeDecoderError(E::NonMinimal { value: x.into() }))
316            } else {
317                Ok(x.into())
318            }
319        }
320        n => Ok(n.into()),
321    }
322}
323
324#[cfg(test)]
325mod tests {
326    use super::*;
327
328    #[test]
329    fn encoded_value_1_byte() {
330        // Check lower bound, upper bound (and implicitly endian-ness).
331        for v in [0x00u64, 0x01, 0x02, 0xFA, 0xFB, 0xFC] {
332            assert_eq!(CompactSizeEncoder::encoded_size(v as usize), 1);
333            // Should be encoded as the value as a u8.
334            let want = [v as u8];
335            let got = CompactSizeEncoder::encode(v);
336            assert_eq!(got.as_slice().len(), 1); // sanity check
337            assert_eq!(got.as_slice(), want);
338        }
339    }
340
341    macro_rules! check_encode {
342        ($($test_name:ident, $size:expr, $value:expr, $want:expr);* $(;)?) => {
343            $(
344                #[test]
345                fn $test_name() {
346                    let value = $value as u64; // Because default integer type is i32.
347                    assert_eq!(CompactSizeEncoder::encoded_size(value as usize), $size);
348                    let got = CompactSizeEncoder::encode(value);
349                    assert_eq!(got.as_slice().len(), $size); // sanity check
350                    assert_eq!(got.as_slice(), &$want);
351                }
352            )*
353        }
354    }
355
356    check_encode! {
357        // 3 byte encoding.
358        encoded_value_3_byte_lower_bound, 3, 0xFD, [0xFD, 0xFD, 0x00]; // 0x00FD
359        encoded_value_3_byte_endianness, 3, 0xABCD, [0xFD, 0xCD, 0xAB];
360        encoded_value_3_byte_upper_bound, 3, 0xFFFF, [0xFD, 0xFF, 0xFF];
361        // 5 byte encoding.
362        encoded_value_5_byte_lower_bound, 5, 0x0001_0000, [0xFE, 0x00, 0x00, 0x01, 0x00];
363        encoded_value_5_byte_endianness, 5, 0x0123_4567, [0xFE, 0x67, 0x45, 0x23, 0x01];
364        encoded_value_5_byte_upper_bound, 5, 0xFFFF_FFFF, [0xFE, 0xFF, 0xFF, 0xFF, 0xFF];
365    }
366
367    // 9-byte encoding requires values above u32::MAX which don't fit in usize on 32-bit platforms.
368    #[cfg(target_pointer_width = "64")]
369    check_encode! {
370        encoded_value_9_byte_lower_bound, 9, 0x0000_0001_0000_0000u64, [0xFF, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00];
371        encoded_value_9_byte_endianness, 9, 0x0123_4567_89AB_CDEFu64, [0xFF, 0xEF, 0xCD, 0xAB, 0x89, 0x67, 0x45, 0x23, 0x01];
372        encoded_value_9_byte_upper_bound, 9, u64::MAX, [0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF];
373    }
374
375    #[test]
376    fn compact_size_new_values_too_large() {
377        use CompactSizeDecoderErrorInner as E;
378
379        const EXCESS_COMPACT_SIZE: u64 = (MAX_COMPACT_SIZE + 1) as u64;
380
381        // MAX_COMPACT_SIZE should succeed for `new` constructor
382        // 0x0200_0000 as minimal 5-byte compact size: 0xFE + u32 little-endian
383        let mut decoder = CompactSizeDecoder::new();
384        decoder.push_bytes(&mut [0xFE, 0x00, 0x00, 0x00, 0x02].as_slice()).unwrap();
385        let got = decoder.end().unwrap();
386        assert_eq!(got, MAX_COMPACT_SIZE);
387
388        // MAX_COMPACT_SIZE + 1 should fail for `new` constructor
389        // 0x0200_0001 as minimal 5-byte compact size: 0xFE + u32 little-endian
390        let mut decoder = CompactSizeDecoder::new();
391        decoder.push_bytes(&mut [0xFE, 0x01, 0x00, 0x00, 0x02].as_slice()).unwrap();
392        let got = decoder.end().unwrap_err();
393        assert!(matches!(
394            got,
395            CompactSizeDecoderError(E::ValueExceedsLimit(LengthPrefixExceedsMaxError {
396                limit: MAX_COMPACT_SIZE,
397                value: EXCESS_COMPACT_SIZE,
398            })),
399        ));
400    }
401
402    #[test]
403    fn compact_size_new_with_limit_values_too_large() {
404        use CompactSizeDecoderErrorInner as E;
405
406        // 240 should succeed for `new_with_limit` constructor
407        let mut decoder = CompactSizeDecoder::new_with_limit(240);
408        decoder.push_bytes(&mut [0xf0].as_slice()).unwrap();
409        let got = decoder.end().unwrap();
410        assert_eq!(got, 240);
411
412        // 241 should fail for `new_with_limit` constructor
413        let mut decoder = CompactSizeDecoder::new_with_limit(240);
414        decoder.push_bytes(&mut [0xf1].as_slice()).unwrap();
415        let got = decoder.end().unwrap_err();
416        assert!(matches!(
417            got,
418            CompactSizeDecoderError(E::ValueExceedsLimit(LengthPrefixExceedsMaxError {
419                limit: 240,
420                value: 241,
421            })),
422        ));
423    }
424}