bitcoin_internals/
compact_size.rs

1// SPDX-License-Identifier: CC0-1.0
2
3//! Variable length integer encoding A.K.A [`CompactSize`].
4//!
5//! An integer can be encoded depending on the represented value to save space. Variable length
6//! integers always precede an array/vector of a type of data that may vary in length.
7//!
8//! [`CompactSize`]: <https://en.bitcoin.it/wiki/Protocol_documentation#Variable_length_integer>
9
10use crate::array_vec::ArrayVec;
11use crate::ToU64;
12
13/// The maximum size of a serialized object in bytes or number of elements
14/// (for eg vectors) when the size is encoded as `CompactSize`.
15///
16/// This is `MAX_SIZE` in Bitcoin Core.
17// Issue: https://github.com/rust-bitcoin/rust-bitcoin/issues/3264
18pub const MAX_ENCODABLE_VALUE: u64 = 0x0200_0000;
19
20/// The maximum length of an encoding.
21pub const MAX_ENCODING_SIZE: usize = 9;
22
23/// Returns the number of bytes used to encode this `CompactSize` value.
24///
25/// # Returns
26///
27/// - 1 for 0..=0xFC
28/// - 3 for 0xFD..=(2^16-1)
29/// - 5 for 0x10000..=(2^32-1)
30/// - 9 otherwise.
31#[inline]
32pub fn encoded_size(value: impl ToU64) -> usize { encoded_size_const(value.to_u64()) }
33
34/// Returns the number of bytes used to encode this `CompactSize` value (in const context).
35///
36/// # Returns
37///
38/// - 1 for 0..=0xFC
39/// - 3 for 0xFD..=(2^16-1)
40/// - 5 for 0x10000..=(2^32-1)
41/// - 9 otherwise.
42#[inline]
43pub const fn encoded_size_const(value: u64) -> usize {
44    match value {
45        0..=0xFC => 1,
46        0xFD..=0xFFFF => 3,
47        0x10000..=0xFFFFFFFF => 5,
48        _ => 9,
49    }
50}
51
52/// Encodes `CompactSize` without allocating.
53#[inline]
54pub fn encode(value: impl ToU64) -> ArrayVec<u8, MAX_ENCODING_SIZE> {
55    let value = value.to_u64();
56    let mut res = ArrayVec::<u8, MAX_ENCODING_SIZE>::new();
57    match value {
58        0..=0xFC => {
59            res.push(value as u8); // Cast ok because of match.
60        }
61        0xFD..=0xFFFF => {
62            let v = value as u16; // Cast ok because of match.
63            res.push(0xFD);
64            res.extend_from_slice(&v.to_le_bytes());
65        }
66        0x10000..=0xFFFFFFFF => {
67            let v = value as u32; // Cast ok because of match.
68            res.push(0xFE);
69            res.extend_from_slice(&v.to_le_bytes());
70        }
71        _ => {
72            res.push(0xFF);
73            res.extend_from_slice(&value.to_le_bytes());
74        }
75    }
76    res
77}
78
79/// Gets the compact size encoded value from `slice` and moves slice past the encoding.
80///
81/// Caller to guarantee that the encoding is well formed. Well formed is defined as:
82///
83/// * Being at least long enough.
84/// * Containing a minimal encoding.
85///
86/// # Panics
87///
88/// * Panics in release mode if the `slice` does not contain a valid minimal compact size encoding.
89/// * Panics in debug mode if the encoding is not minimal (referred to as "non-canonical" in Core).
90pub fn decode_unchecked(slice: &mut &[u8]) -> u64 {
91    if slice.is_empty() {
92        panic!("tried to decode an empty slice");
93    }
94
95    match slice[0] {
96        0xFF => {
97            const SIZE: usize = 9;
98            if slice.len() < SIZE {
99                panic!("slice too short, expected at least 9 bytes");
100            };
101
102            let mut bytes = [0_u8; SIZE - 1];
103            bytes.copy_from_slice(&slice[1..SIZE]);
104
105            let v = u64::from_le_bytes(bytes);
106            debug_assert!(v > u32::MAX.into(), "non-minimal encoding of a u64");
107            *slice = &slice[SIZE..];
108            v
109        }
110        0xFE => {
111            const SIZE: usize = 5;
112            if slice.len() < SIZE {
113                panic!("slice too short, expected at least 5 bytes");
114            };
115
116            let mut bytes = [0_u8; SIZE - 1];
117            bytes.copy_from_slice(&slice[1..SIZE]);
118
119            let v = u32::from_le_bytes(bytes);
120            debug_assert!(v > u16::MAX.into(), "non-minimal encoding of a u32");
121            *slice = &slice[SIZE..];
122            u64::from(v)
123        }
124        0xFD => {
125            const SIZE: usize = 3;
126            if slice.len() < SIZE {
127                panic!("slice too short, expected at least 3 bytes");
128            };
129
130            let mut bytes = [0_u8; SIZE - 1];
131            bytes.copy_from_slice(&slice[1..SIZE]);
132
133            let v = u16::from_le_bytes(bytes);
134            debug_assert!(v >= 0xFD, "non-minimal encoding of a u16");
135            *slice = &slice[SIZE..];
136            u64::from(v)
137        }
138        n => {
139            *slice = &slice[1..];
140            u64::from(n)
141        }
142    }
143}
144
145#[cfg(test)]
146mod tests {
147    use super::*;
148
149    #[test]
150    fn encoded_value_1_byte() {
151        // Check lower bound, upper bound (and implicitly endian-ness).
152        for v in [0x00, 0x01, 0x02, 0xFA, 0xFB, 0xFC] {
153            let v = v as u32;
154            assert_eq!(encoded_size(v), 1);
155            // Should be encoded as the value as a u8.
156            let want = [v as u8];
157            let got = encode(v);
158            assert_eq!(got.as_slice().len(), 1); // sanity check
159            assert_eq!(got.as_slice(), want);
160        }
161    }
162
163    #[test]
164    fn decode_value_1_byte() {
165        // Check lower bound, upper bound.
166        for v in [0x00, 0x01, 0x02, 0xFA, 0xFB, 0xFC] {
167            let raw = [v];
168            let mut slice = raw.as_slice();
169            let got = decode_unchecked(&mut slice);
170            assert_eq!(got, u64::from(v));
171            assert!(slice.is_empty());
172        }
173    }
174
175    macro_rules! check_encode {
176        ($($test_name:ident, $size:expr, $value:expr, $want:expr);* $(;)?) => {
177            $(
178                #[test]
179                fn $test_name() {
180                    let value = $value as u64; // Because default integer type is i32.
181                    let got = encode(value);
182                    assert_eq!(got.as_slice().len(), $size); // sanity check
183                    assert_eq!(got.as_slice(), &$want);
184                }
185            )*
186        }
187    }
188
189    check_encode! {
190        // 3 byte encoding.
191        encoded_value_3_byte_lower_bound, 3, 0xFD, [0xFD, 0xFD, 0x00]; // 0x00FD
192        encoded_value_3_byte_endianness, 3, 0xABCD, [0xFD, 0xCD, 0xAB];
193        encoded_value_3_byte_upper_bound, 3, 0xFFFF, [0xFD, 0xFF, 0xFF];
194        // 5 byte encoding.
195        encoded_value_5_byte_lower_bound, 5, 0x0001_0000, [0xFE, 0x00, 0x00, 0x01, 0x00];
196        encoded_value_5_byte_endianness, 5, 0x0123_4567, [0xFE, 0x67, 0x45, 0x23, 0x01];
197        encoded_value_5_byte_upper_bound, 5, 0xFFFF_FFFF, [0xFE, 0xFF, 0xFF, 0xFF, 0xFF];
198        // 9 byte encoding.
199        encoded_value_9_byte_lower_bound, 9, 0x0000_0001_0000_0000, [0xFF, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00];
200        encoded_value_9_byte_endianness, 9, 0x0123_4567_89AB_CDEF, [0xFF, 0xEF, 0xCD, 0xAB, 0x89, 0x67, 0x45, 0x23, 0x01];
201        encoded_value_9_byte_upper_bound, 9, u64::MAX, [0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF];
202    }
203
204    macro_rules! check_decode {
205        ($($test_name:ident, $size:expr, $want:expr, $encoded:expr);* $(;)?) => {
206            $(
207                #[test]
208                fn $test_name() {
209                    let mut slice = $encoded.as_slice();
210                    let got = decode_unchecked(&mut slice);
211                    assert_eq!(got, $want);
212                    assert_eq!(slice.len(), $encoded.len() - $size);
213                }
214            )*
215        }
216    }
217
218    check_decode! {
219        // 3 byte encoding.
220        decode_from_3_byte_slice_lower_bound, 3, 0xFD, [0xFD, 0xFD, 0x00];
221        decode_from_3_byte_slice_three_over_lower_bound, 3, 0x0100, [0xFD, 0x00, 0x01];
222        decode_from_3_byte_slice_endianness, 3, 0xABCD, [0xFD, 0xCD, 0xAB];
223        decode_from_3_byte_slice_upper_bound, 3, 0xFFFF, [0xFD, 0xFF, 0xFF];
224
225        // 5 byte encoding.
226        decode_from_5_byte_slice_lower_bound, 5, 0x0001_0000, [0xFE, 0x00, 0x00, 0x01, 0x00];
227        decode_from_5_byte_slice_endianness, 5, 0x0123_4567, [0xFE, 0x67, 0x45, 0x23, 0x01];
228        decode_from_5_byte_slice_upper_bound, 5, 0xFFFF_FFFF, [0xFE, 0xFF, 0xFF, 0xFF, 0xFF];
229        // 9 byte encoding.
230        decode_from_9_byte_slice_lower_bound, 9, 0x0000_0001_0000_0000, [0xFF, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00];
231        decode_from_9_byte_slice_endianness, 9, 0x0123_4567_89AB_CDEF, [0xFF, 0xEF, 0xCD, 0xAB, 0x89, 0x67, 0x45, 0x23, 0x01];
232        decode_from_9_byte_slice_upper_bound, 9, u64::MAX, [0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF];
233
234        // Check slices that are bigger than the actual encoding.
235        decode_1_byte_from_bigger_slice, 1, 32, [0x20, 0xAB, 0xBC];
236        decode_3_byte_from_bigger_slice, 3, 0xFFFF, [0xFD, 0xFF, 0xFF, 0xAB, 0xBC];
237        decode_5_byte_from_bigger_slice, 5, 0xFFFF_FFFF, [0xFE, 0xFF, 0xFF, 0xFF, 0xFF, 0xAB, 0xBC];
238        decode_9_byte_from_bigger_slice, 9, u64::MAX, [0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xAB, 0xBC];
239    }
240
241    #[test]
242    #[should_panic]
243    fn decode_from_empty_slice_panics() {
244        let mut slice = [].as_slice();
245        let _ = decode_unchecked(&mut slice);
246    }
247
248    #[test]
249    #[should_panic]
250    // Non-minimal is referred to as non-canonical in Core (`bitcoin/src/serialize.h`).
251    fn decode_non_minimal_panics() {
252        let mut slice = [0xFE, 0xCD, 0xAB].as_slice();
253        let _ = decode_unchecked(&mut slice);
254    }
255}