Skip to main content

bitcoin_consensus_encoding/
compact_size.rs

1// SPDX-License-Identifier: CC0-1.0
2
3//! Compact size codec.
4//!
5//! Compact size is a variable-length integer encoding used throughout the Bitcoin
6//! consensus protocol to encode collection lengths. However, there are also some
7//! unique non-length use cases.
8
9use internals::array_vec::ArrayVec;
10
11use crate::decode::Decoder;
12use crate::encode::{Encoder, EncoderStatus, ExactSizeEncoder};
13use crate::error::{
14    CompactSizeDecoderError, CompactSizeDecoderErrorInner, LengthPrefixExceedsMaxError,
15};
16use crate::DecoderStatus;
17
18/// Default maximum size of a decoded object in bytes.
19///
20/// Matches Bitcoin Core's default [serialization limit]. This is
21/// a high level anti-DoS limit which all bitcoin types should
22/// easily fit within.
23///
24/// [serialization limit]: https://github.com/bitcoin/bitcoin/blob/a7c29df0e5ace05b6186612671d6103c112ec922/src/serialize.h#L32
25const MAX_COMPACT_SIZE: usize = 0x0200_0000;
26
27/// The maximum length of a compact size encoding.
28const SIZE: usize = 9;
29
30/// Compact size prefix byte indicating a 2-byte `u16` payload follows.
31const PREFIX_U16: u8 = 0xFD;
32/// Compact size prefix byte indicating a 4-byte `u32` payload follows.
33const PREFIX_U32: u8 = 0xFE;
34/// Compact size prefix byte indicating an 8-byte `u64` payload follows.
35const PREFIX_U64: u8 = 0xFF;
36
37/// Encoder for a compact size encoded integer.
38#[derive(Debug, Clone)]
39pub struct CompactSizeEncoder {
40    buf: ArrayVec<u8, SIZE>,
41}
42
43impl CompactSizeEncoder {
44    /// Constructs a new `CompactSizeEncoder` for a length prefix.
45    ///
46    /// The `usize` type is the natural Rust type for lengths and collection sizes, which is the
47    /// dominant use case for compact size encoding in the Bitcoin protocol. Prefer this constructor
48    /// whenever you are encoding the length of a collection or a byte slice.
49    ///
50    /// Compact size encodings are defined only over the `u64` range. On exotic platforms where
51    /// `usize` is wider than 64 bits the value will be saturated to [`u64::MAX`], but in practice
52    /// any in-memory length that could actually be passed here is well within the `u64` range.
53    ///
54    /// If you need to encode an arbitrary `u64` integer that is not a length prefix, use
55    /// [`Self::new_u64`] instead.
56    pub fn new(value: usize) -> Self {
57        Self { buf: Self::encode(u64::try_from(value).unwrap_or(u64::MAX)) }
58    }
59
60    /// Constructs a new `CompactSizeEncoder` for an arbitrary `u64` integer.
61    ///
62    /// Prefer [`Self::new`] unless you are encoding a non-length integer.
63    ///
64    /// A small number of fields in the Bitcoin protocol are compact-size-encoded integers that are
65    /// not collection lengths (e.g. service flags). Use this constructor for those cases, where the
66    /// natural type of the value is `u64` rather than `usize`.
67    pub fn new_u64(value: u64) -> Self { Self { buf: Self::encode(value) } }
68
69    /// Returns the number of bytes used to encode this `CompactSize` value.
70    ///
71    /// # Returns
72    ///
73    /// - 1 for 0..=0xFC
74    /// - 3 for 0xFD..=(2^16-1)
75    /// - 5 for 0x10000..=(2^32-1)
76    /// - 9 otherwise.
77    #[inline]
78    pub const fn encoded_size(value: usize) -> usize {
79        match value {
80            0..=0xFC => 1,
81            0xFD..=0xFFFF => 3,
82            0x10000..=0xFFFF_FFFF => 5,
83            _ => 9,
84        }
85    }
86
87    /// Encodes `CompactSize` without allocating.
88    #[inline]
89    fn encode(value: u64) -> ArrayVec<u8, SIZE> {
90        let mut res = ArrayVec::<u8, SIZE>::new();
91        match value {
92            0..=0xFC => {
93                res.push(value as u8); // Cast ok because of match.
94            }
95            0xFD..=0xFFFF => {
96                let v = value as u16; // Cast ok because of match.
97                res.push(PREFIX_U16);
98                res.extend_from_slice(&v.to_le_bytes());
99            }
100            0x10000..=0xFFFF_FFFF => {
101                let v = value as u32; // Cast ok because of match.
102                res.push(PREFIX_U32);
103                res.extend_from_slice(&v.to_le_bytes());
104            }
105            _ => {
106                res.push(PREFIX_U64);
107                res.extend_from_slice(&value.to_le_bytes());
108            }
109        }
110        res
111    }
112}
113
114impl Encoder for CompactSizeEncoder {
115    #[inline]
116    fn current_chunk(&self) -> &[u8] { &self.buf }
117
118    #[inline]
119    fn advance(&mut self) -> EncoderStatus { EncoderStatus::Finished }
120}
121
122impl ExactSizeEncoder for CompactSizeEncoder {
123    #[inline]
124    fn len(&self) -> usize { self.buf.len() }
125}
126
127/// Decodes a compact size encoded integer as a length prefix.
128///
129/// The decoded value is returned as a `usize` and is bounded by a configurable limit (default:
130/// 4,000,000). This limit is a denial-of-service protection: a malicious peer can send a compact
131/// size value up to 2^64-1, and without a limit check the caller might attempt to allocate an
132/// enormous buffer based on that value. [`CompactSizeDecoder`] prevents this by rejecting values
133/// that exceed the limit before returning them to the caller.
134///
135/// If you are decoding an arbitrary `u64` integer that is genuinely not a length prefix, use
136/// [`CompactSizeU64Decoder`] instead.
137///
138/// For more information about decoders see the documentation of the [`Decoder`] trait.
139#[derive(Debug, Clone)]
140pub struct CompactSizeDecoder {
141    buf: ArrayVec<u8, 9>,
142    limit: usize,
143}
144
145impl CompactSizeDecoder {
146    /// Constructs a new compact size decoder with the default 32MB length limit.
147    pub const fn new() -> Self { Self { buf: ArrayVec::new(), limit: MAX_COMPACT_SIZE } }
148
149    /// Constructs a new compact size decoder with a custom length limit.
150    ///
151    /// The decoded value must not exceed `limit`, otherwise [`end`](Self::end) will return an
152    /// error. Use this when you know the field you are decoding has a tighter bound than the
153    /// default limit of 32MB.
154    pub const fn new_with_limit(limit: usize) -> Self { Self { buf: ArrayVec::new(), limit } }
155}
156
157impl Default for CompactSizeDecoder {
158    fn default() -> Self { Self::new() }
159}
160
161impl Decoder for CompactSizeDecoder {
162    type Output = usize;
163    type Error = CompactSizeDecoderError;
164
165    fn push_bytes(&mut self, bytes: &mut &[u8]) -> Result<DecoderStatus, Self::Error> {
166        Ok(compact_size_push_bytes(&mut self.buf, bytes))
167    }
168
169    fn end(self) -> Result<Self::Output, Self::Error> {
170        use CompactSizeDecoderErrorInner as E;
171
172        let dec_value = compact_size_decode_u64(&self.buf)?;
173
174        // This error is returned if dec_value is outside of the usize range, or
175        // if it is above the given limit.
176        let make_err = || {
177            CompactSizeDecoderError(E::ValueExceedsLimit(LengthPrefixExceedsMaxError {
178                value: dec_value,
179                limit: self.limit,
180            }))
181        };
182
183        usize::try_from(dec_value).map_err(|_| make_err()).and_then(|nsize| {
184            if nsize > self.limit {
185                Err(make_err())
186            } else {
187                Ok(nsize)
188            }
189        })
190    }
191
192    fn read_limit(&self) -> usize { compact_size_read_limit(&self.buf) }
193}
194
195/// Decodes a compact size encoded integer as a raw `u64`.
196///
197/// If you are decoding a length prefix, you probably want [`CompactSizeDecoder`] instead.
198///
199/// This decoder performs no limit check and no conversion to `usize`. It exists for the small
200/// number of Bitcoin protocol fields that are compact-size-encoded integers but are not length
201/// prefixes (e.g. service flags in the `version` message). For those fields the full `u64` range is
202/// meaningful and there is no associated allocation whose size would be controlled by the decoded
203/// value.
204///
205/// # Denial-of-service warning
206///
207/// Do not use this decoder for length prefixes. If the decoded value is used to size an allocation,
208/// for example as the length of a `Vec`, a malicious peer can send a compact size value of up to
209/// 2^64-1 and cause an out-of-memory condition. [`CompactSizeDecoder`] prevents this by enforcing a
210/// configurable upper bound before returning the value.
211///
212/// For more information about decoders see the documentation of the [`Decoder`] trait.
213#[derive(Debug, Clone)]
214pub struct CompactSizeU64Decoder {
215    buf: ArrayVec<u8, 9>,
216}
217
218impl CompactSizeU64Decoder {
219    /// Constructs a new `CompactSizeU64Decoder`.
220    ///
221    /// See the [struct-level documentation](Self) for guidance on when to use this decoder versus
222    /// [`CompactSizeDecoder`].
223    pub const fn new() -> Self { Self { buf: ArrayVec::new() } }
224}
225
226impl Default for CompactSizeU64Decoder {
227    fn default() -> Self { Self::new() }
228}
229
230impl Decoder for CompactSizeU64Decoder {
231    type Output = u64;
232    type Error = CompactSizeDecoderError;
233
234    fn push_bytes(&mut self, bytes: &mut &[u8]) -> Result<DecoderStatus, Self::Error> {
235        Ok(compact_size_push_bytes(&mut self.buf, bytes))
236    }
237
238    fn end(self) -> Result<Self::Output, Self::Error> { compact_size_decode_u64(&self.buf) }
239
240    fn read_limit(&self) -> usize { compact_size_read_limit(&self.buf) }
241}
242
243/// Pushes bytes into a compact size buffer, returning the decoder status.
244fn compact_size_push_bytes(buf: &mut ArrayVec<u8, 9>, bytes: &mut &[u8]) -> DecoderStatus {
245    if bytes.is_empty() {
246        return DecoderStatus::NeedsMore;
247    }
248
249    if buf.is_empty() {
250        buf.push(bytes[0]);
251        *bytes = &bytes[1..];
252    }
253    let len = match buf[0] {
254        PREFIX_U64 => 9,
255        PREFIX_U32 => 5,
256        PREFIX_U16 => 3,
257        _ => 1,
258    };
259    let to_copy = bytes.len().min(len - buf.len());
260    buf.extend_from_slice(&bytes[..to_copy]);
261    *bytes = &bytes[to_copy..];
262
263    if buf.len() == len {
264        DecoderStatus::Ready
265    } else {
266        DecoderStatus::NeedsMore
267    }
268}
269
270/// Returns the number of bytes the compact size decoder still needs to read.
271fn compact_size_read_limit(buf: &ArrayVec<u8, 9>) -> usize {
272    match buf.len() {
273        0 => 1,
274        already_read => match buf[0] {
275            PREFIX_U64 => 9_usize.saturating_sub(already_read),
276            PREFIX_U32 => 5_usize.saturating_sub(already_read),
277            PREFIX_U16 => 3_usize.saturating_sub(already_read),
278            _ => 0,
279        },
280    }
281}
282
283/// Decodes a compact size buffer to a u64, checking for minimal encoding.
284fn compact_size_decode_u64(buf: &ArrayVec<u8, 9>) -> Result<u64, CompactSizeDecoderError> {
285    use CompactSizeDecoderErrorInner as E;
286
287    fn arr<const N: usize>(slice: &[u8]) -> Result<[u8; N], CompactSizeDecoderError> {
288        slice.try_into().map_err(|_| {
289            CompactSizeDecoderError(E::UnexpectedEof { required: N, received: slice.len() })
290        })
291    }
292
293    let (first, payload) = buf
294        .split_first()
295        .ok_or(CompactSizeDecoderError(E::UnexpectedEof { required: 1, received: 0 }))?;
296
297    match *first {
298        PREFIX_U64 => {
299            let x = u64::from_le_bytes(arr(payload)?);
300            if x < 0x100_000_000 {
301                Err(CompactSizeDecoderError(E::NonMinimal { value: x }))
302            } else {
303                Ok(x)
304            }
305        }
306        PREFIX_U32 => {
307            let x = u32::from_le_bytes(arr(payload)?);
308            if x < 0x10000 {
309                Err(CompactSizeDecoderError(E::NonMinimal { value: x.into() }))
310            } else {
311                Ok(x.into())
312            }
313        }
314        PREFIX_U16 => {
315            let x = u16::from_le_bytes(arr(payload)?);
316            if x < 0xFD {
317                Err(CompactSizeDecoderError(E::NonMinimal { value: x.into() }))
318            } else {
319                Ok(x.into())
320            }
321        }
322        n => Ok(n.into()),
323    }
324}
325
326#[cfg(test)]
327mod tests {
328    use super::*;
329
330    #[test]
331    fn encoded_value_1_byte() {
332        // Check lower bound, upper bound (and implicitly endian-ness).
333        for v in [0x00u64, 0x01, 0x02, 0xFA, 0xFB, 0xFC] {
334            assert_eq!(CompactSizeEncoder::encoded_size(v as usize), 1);
335            // Should be encoded as the value as a u8.
336            let want = [v as u8];
337            let got = CompactSizeEncoder::encode(v);
338            assert_eq!(got.as_slice().len(), 1); // sanity check
339            assert_eq!(got.as_slice(), want);
340        }
341    }
342
343    macro_rules! check_encode {
344        ($($test_name:ident, $size:expr, $value:expr, $want:expr);* $(;)?) => {
345            $(
346                #[test]
347                fn $test_name() {
348                    let value = $value as u64; // Because default integer type is i32.
349                    assert_eq!(CompactSizeEncoder::encoded_size(value as usize), $size);
350                    let got = CompactSizeEncoder::encode(value);
351                    assert_eq!(got.as_slice().len(), $size); // sanity check
352                    assert_eq!(got.as_slice(), &$want);
353                }
354            )*
355        }
356    }
357
358    check_encode! {
359        // 3 byte encoding.
360        encoded_value_3_byte_lower_bound, 3, 0xFD, [0xFD, 0xFD, 0x00]; // 0x00FD
361        encoded_value_3_byte_endianness, 3, 0xABCD, [0xFD, 0xCD, 0xAB];
362        encoded_value_3_byte_upper_bound, 3, 0xFFFF, [0xFD, 0xFF, 0xFF];
363        // 5 byte encoding.
364        encoded_value_5_byte_lower_bound, 5, 0x0001_0000, [0xFE, 0x00, 0x00, 0x01, 0x00];
365        encoded_value_5_byte_endianness, 5, 0x0123_4567, [0xFE, 0x67, 0x45, 0x23, 0x01];
366        encoded_value_5_byte_upper_bound, 5, 0xFFFF_FFFF, [0xFE, 0xFF, 0xFF, 0xFF, 0xFF];
367    }
368
369    // 9-byte encoding requires values above u32::MAX which don't fit in usize on 32-bit platforms.
370    #[cfg(target_pointer_width = "64")]
371    check_encode! {
372        encoded_value_9_byte_lower_bound, 9, 0x0000_0001_0000_0000u64, [0xFF, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00];
373        encoded_value_9_byte_endianness, 9, 0x0123_4567_89AB_CDEFu64, [0xFF, 0xEF, 0xCD, 0xAB, 0x89, 0x67, 0x45, 0x23, 0x01];
374        encoded_value_9_byte_upper_bound, 9, u64::MAX, [0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF];
375    }
376
377    #[test]
378    fn compact_size_new_values_too_large() {
379        use CompactSizeDecoderErrorInner as E;
380
381        const EXCESS_COMPACT_SIZE: u64 = (MAX_COMPACT_SIZE + 1) as u64;
382
383        // MAX_COMPACT_SIZE should succeed for `new` constructor
384        // 0x0200_0000 as minimal 5-byte compact size: 0xFE + u32 little-endian
385        let mut decoder = CompactSizeDecoder::new();
386        let _ = decoder.push_bytes(&mut [0xFE, 0x00, 0x00, 0x00, 0x02].as_slice()).unwrap();
387        let got = decoder.end().unwrap();
388        assert_eq!(got, MAX_COMPACT_SIZE);
389
390        // MAX_COMPACT_SIZE + 1 should fail for `new` constructor
391        // 0x0200_0001 as minimal 5-byte compact size: 0xFE + u32 little-endian
392        let mut decoder = CompactSizeDecoder::new();
393        let _ = decoder.push_bytes(&mut [0xFE, 0x01, 0x00, 0x00, 0x02].as_slice()).unwrap();
394        let got = decoder.end().unwrap_err();
395        assert!(matches!(
396            got,
397            CompactSizeDecoderError(E::ValueExceedsLimit(LengthPrefixExceedsMaxError {
398                limit: MAX_COMPACT_SIZE,
399                value: EXCESS_COMPACT_SIZE,
400            })),
401        ));
402    }
403
404    #[test]
405    fn compact_size_new_with_limit_values_too_large() {
406        use CompactSizeDecoderErrorInner as E;
407
408        // 240 should succeed for `new_with_limit` constructor
409        let mut decoder = CompactSizeDecoder::new_with_limit(240);
410        let _ = decoder.push_bytes(&mut [0xf0].as_slice()).unwrap();
411        let got = decoder.end().unwrap();
412        assert_eq!(got, 240);
413
414        // 241 should fail for `new_with_limit` constructor
415        let mut decoder = CompactSizeDecoder::new_with_limit(240);
416        let _ = decoder.push_bytes(&mut [0xf1].as_slice()).unwrap();
417        let got = decoder.end().unwrap_err();
418        assert!(matches!(
419            got,
420            CompactSizeDecoderError(E::ValueExceedsLimit(LengthPrefixExceedsMaxError {
421                limit: 240,
422                value: 241,
423            })),
424        ));
425    }
426}