bitcoin_internals/
compact_size.rs

1// SPDX-License-Identifier: CC0-1.0
2
3//! Variable length integer encoding A.K.A [`CompactSize`].
4//!
5//! An integer can be encoded depending on the represented value to save space. Variable length
6//! integers always precede an array/vector of a type of data that may vary in length.
7//!
8//! [`CompactSize`]: <https://en.bitcoin.it/wiki/Protocol_documentation#Variable_length_integer>
9
10use crate::array_vec::ArrayVec;
11use crate::ToU64;
12
13/// The maximum size of a serialized object in bytes or number of elements
14/// (for eg vectors) when the size is encoded as `CompactSize`.
15///
16/// This is `MAX_SIZE` in Bitcoin Core.
17// Issue: https://github.com/rust-bitcoin/rust-bitcoin/issues/3264
18pub const MAX_ENCODABLE_VALUE: u64 = 0x0200_0000;
19
20/// The maximum length of an encoding.
21const MAX_ENCODING_SIZE: usize = 9;
22
23/// Returns the number of bytes used to encode this `CompactSize` value.
24///
25/// # Returns
26///
27/// - 1 for 0..=0xFC
28/// - 3 for 0xFD..=(2^16-1)
29/// - 5 for 0x10000..=(2^32-1)
30/// - 9 otherwise.
31#[inline]
32pub fn encoded_size(value: impl ToU64) -> usize {
33    match value.to_u64() {
34        0..=0xFC => 1,
35        0xFD..=0xFFFF => 3,
36        0x10000..=0xFFFFFFFF => 5,
37        _ => 9,
38    }
39}
40
41/// Encodes `CompactSize` without allocating.
42#[inline]
43pub fn encode(value: impl ToU64) -> ArrayVec<u8, MAX_ENCODING_SIZE> {
44    let value = value.to_u64();
45    let mut res = ArrayVec::<u8, MAX_ENCODING_SIZE>::new();
46    match value {
47        0..=0xFC => {
48            res.push(value as u8); // Cast ok because of match.
49        }
50        0xFD..=0xFFFF => {
51            let v = value as u16; // Cast ok because of match.
52            res.push(0xFD);
53            res.extend_from_slice(&v.to_le_bytes());
54        }
55        0x10000..=0xFFFFFFFF => {
56            let v = value as u32; // Cast ok because of match.
57            res.push(0xFE);
58            res.extend_from_slice(&v.to_le_bytes());
59        }
60        _ => {
61            let v = value;
62            res.push(0xFF);
63            res.extend_from_slice(&v.to_le_bytes());
64        }
65    }
66    res
67}
68
69/// Gets the compact size encoded value from `slice` and moves slice past the encoding.
70///
71/// Caller to guarantee that the encoding is well formed. Well formed is defined as:
72///
73/// * Being at least long enough.
74/// * Containing a minimal encoding.
75///
76/// # Panics
77///
78/// * Panics in release mode if the `slice` does not contain a valid minimal compact size encoding.
79/// * Panics in debug mode if the encoding is not minimal (referred to as "non-canonical" in Core).
80pub fn decode_unchecked(slice: &mut &[u8]) -> u64 {
81    if slice.is_empty() {
82        panic!("tried to decode an empty slice");
83    }
84
85    match slice[0] {
86        0xFF => {
87            const SIZE: usize = 9;
88            if slice.len() < SIZE {
89                panic!("slice too short, expected at least 9 bytes");
90            };
91
92            let mut bytes = [0_u8; SIZE - 1];
93            bytes.copy_from_slice(&slice[1..SIZE]);
94
95            let v = u64::from_le_bytes(bytes);
96            debug_assert!(v > u32::MAX.into(), "non-minimal encoding of a u64");
97            *slice = &slice[SIZE..];
98            v
99        }
100        0xFE => {
101            const SIZE: usize = 5;
102            if slice.len() < SIZE {
103                panic!("slice too short, expected at least 5 bytes");
104            };
105
106            let mut bytes = [0_u8; SIZE - 1];
107            bytes.copy_from_slice(&slice[1..SIZE]);
108
109            let v = u32::from_le_bytes(bytes);
110            debug_assert!(v > u16::MAX.into(), "non-minimal encoding of a u32");
111            *slice = &slice[SIZE..];
112            u64::from(v)
113        }
114        0xFD => {
115            const SIZE: usize = 3;
116            if slice.len() < SIZE {
117                panic!("slice too short, expected at least 3 bytes");
118            };
119
120            let mut bytes = [0_u8; SIZE - 1];
121            bytes.copy_from_slice(&slice[1..SIZE]);
122
123            let v = u16::from_le_bytes(bytes);
124            debug_assert!(v >= 0xFD, "non-minimal encoding of a u16");
125            *slice = &slice[SIZE..];
126            u64::from(v)
127        }
128        n => {
129            *slice = &slice[1..];
130            u64::from(n)
131        }
132    }
133}
134
135#[cfg(test)]
136mod tests {
137    use super::*;
138
139    #[test]
140    fn encoded_value_1_byte() {
141        // Check lower bound, upper bound (and implicitly endian-ness).
142        for v in [0x00, 0x01, 0x02, 0xFA, 0xFB, 0xFC] {
143            let v = v as u32;
144            assert_eq!(encoded_size(v), 1);
145            // Should be encoded as the value as a u8.
146            let want = [v as u8];
147            let got = encode(v);
148            assert_eq!(got.as_slice().len(), 1); // sanity check
149            assert_eq!(got.as_slice(), want);
150        }
151    }
152
153    #[test]
154    fn decode_value_1_byte() {
155        // Check lower bound, upper bound.
156        for v in [0x00, 0x01, 0x02, 0xFA, 0xFB, 0xFC] {
157            let raw = [v];
158            let mut slice = raw.as_slice();
159            let got = decode_unchecked(&mut slice);
160            assert_eq!(got, u64::from(v));
161            assert!(slice.is_empty());
162        }
163    }
164
165    macro_rules! check_encode {
166        ($($test_name:ident, $size:expr, $value:expr, $want:expr);* $(;)?) => {
167            $(
168                #[test]
169                fn $test_name() {
170                    let value = $value as u64; // Because default integer type is i32.
171                    let got = encode(value);
172                    assert_eq!(got.as_slice().len(), $size); // sanity check
173                    assert_eq!(got.as_slice(), &$want);
174                }
175            )*
176        }
177    }
178
179    check_encode! {
180        // 3 byte encoding.
181        encoded_value_3_byte_lower_bound, 3, 0xFD, [0xFD, 0xFD, 0x00]; // 0x00FD
182        encoded_value_3_byte_endianness, 3, 0xABCD, [0xFD, 0xCD, 0xAB];
183        encoded_value_3_byte_upper_bound, 3, 0xFFFF, [0xFD, 0xFF, 0xFF];
184        // 5 byte encoding.
185        encoded_value_5_byte_lower_bound, 5, 0x0001_0000, [0xFE, 0x00, 0x00, 0x01, 0x00];
186        encoded_value_5_byte_endianness, 5, 0x0123_4567, [0xFE, 0x67, 0x45, 0x23, 0x01];
187        encoded_value_5_byte_upper_bound, 5, 0xFFFF_FFFF, [0xFE, 0xFF, 0xFF, 0xFF, 0xFF];
188        // 9 byte encoding.
189        encoded_value_9_byte_lower_bound, 9, 0x0000_0001_0000_0000, [0xFF, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00];
190        encoded_value_9_byte_endianness, 9, 0x0123_4567_89AB_CDEF, [0xFF, 0xEF, 0xCD, 0xAB, 0x89, 0x67, 0x45, 0x23, 0x01];
191        encoded_value_9_byte_upper_bound, 9, u64::MAX, [0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF];
192    }
193
194    macro_rules! check_decode {
195        ($($test_name:ident, $size:expr, $want:expr, $encoded:expr);* $(;)?) => {
196            $(
197                #[test]
198                fn $test_name() {
199                    let mut slice = $encoded.as_slice();
200                    let got = decode_unchecked(&mut slice);
201                    assert_eq!(got, $want);
202                    assert_eq!(slice.len(), $encoded.len() - $size);
203                }
204            )*
205        }
206    }
207
208    check_decode! {
209        // 3 byte encoding.
210        decode_from_3_byte_slice_lower_bound, 3, 0xFD, [0xFD, 0xFD, 0x00];
211        decode_from_3_byte_slice_endianness, 3, 0xABCD, [0xFD, 0xCD, 0xAB];
212        decode_from_3_byte_slice_upper_bound, 3, 0xFFFF, [0xFD, 0xFF, 0xFF];
213        // 5 byte encoding.
214        decode_from_5_byte_slice_lower_bound, 5, 0x0001_0000, [0xFE, 0x00, 0x00, 0x01, 0x00];
215        decode_from_5_byte_slice_endianness, 5, 0x0123_4567, [0xFE, 0x67, 0x45, 0x23, 0x01];
216        decode_from_5_byte_slice_upper_bound, 5, 0xFFFF_FFFF, [0xFE, 0xFF, 0xFF, 0xFF, 0xFF];
217        // 9 byte encoding.
218        decode_from_9_byte_slice_lower_bound, 9, 0x0000_0001_0000_0000, [0xFF, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00];
219        decode_from_9_byte_slice_endianness, 9, 0x0123_4567_89AB_CDEF, [0xFF, 0xEF, 0xCD, 0xAB, 0x89, 0x67, 0x45, 0x23, 0x01];
220        decode_from_9_byte_slice_upper_bound, 9, u64::MAX, [0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF];
221
222        // Check slices that are bigger than the actual encoding.
223        decode_1_byte_from_bigger_slice, 1, 32, [0x20, 0xAB, 0xBC];
224        decode_3_byte_from_bigger_slice, 3, 0xFFFF, [0xFD, 0xFF, 0xFF, 0xAB, 0xBC];
225        decode_5_byte_from_bigger_slice, 5, 0xFFFF_FFFF, [0xFE, 0xFF, 0xFF, 0xFF, 0xFF, 0xAB, 0xBC];
226        decode_9_byte_from_bigger_slice, 9, u64::MAX, [0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xAB, 0xBC];
227    }
228
229    #[test]
230    #[should_panic]
231    fn decode_from_empty_slice_panics() {
232        let mut slice = [].as_slice();
233        let _ = decode_unchecked(&mut slice);
234    }
235
236    #[test]
237    #[should_panic]
238    // Non-minimal is referred to as non-canonical in Core (`bitcoin/src/serialize.h`).
239    fn decode_non_minimal_panics() {
240        let mut slice = [0xFE, 0xCD, 0xAB].as_slice();
241        let _ = decode_unchecked(&mut slice);
242    }
243}