Skip to main content

multiversx_sc_codec/
num_conv.rs

1pub type TopEncodeNumberBuffer = [u8; 8];
2
3/// This buffer is needed to provide some underlying structure on stack off which to build a variable-length slice.
4///
5/// Its length is 9 (one more than necessary, to elegantly deal with the edge case "-1").
6pub const fn top_encode_number_buffer() -> TopEncodeNumberBuffer {
7    [0u8; 8]
8}
9
10/// Encodes number to minimum number of bytes (top-encoding).
11///
12/// Smaller types need to be converted to u64 before using this function.
13///
14/// No generics here, we avoid monomorphization to make the SC binary as small as possible.
15pub fn top_encode_number(x: u64, signed: bool, buffer: &mut TopEncodeNumberBuffer) -> &[u8] {
16    let offset = fill_buffer_find_offset(x, signed, buffer);
17
18    debug_assert!(offset < 9);
19
20    unsafe { buffer.get_unchecked(offset..) }
21}
22
23/// At the same time fills the buffer,
24/// and performs the algorithm that tells us how many bytes can be skipped.
25///
26/// Everything done in one function instead of 2, to avoid any unwanted bounds checks.
27///
28/// This function is hyper-optimized to not contain any jumps. There are no ifs or loops in this,
29/// the entire algorithm is performed via arithmetic, boolean and bitwise operations.
30fn fill_buffer_find_offset(x: u64, signed: bool, buffer: &mut TopEncodeNumberBuffer) -> usize {
31    let b0 = ((x >> 56) & 0xff) as u8;
32
33    let negative = signed && msbit_is_one(b0);
34    let skippable_byte = skippable_byte(negative);
35
36    let mut offset = 0usize;
37    let mut cursor = 1usize;
38
39    change_one_to_zero_unless(&mut cursor, b0 == skippable_byte);
40    offset += cursor;
41
42    let b1 = ((x >> 48) & 0xff) as u8;
43    change_one_to_zero_unless(&mut cursor, b1 == skippable_byte);
44    offset += cursor;
45
46    let b2 = ((x >> 40) & 0xff) as u8;
47    change_one_to_zero_unless(&mut cursor, b2 == skippable_byte);
48    offset += cursor;
49
50    let b3 = ((x >> 32) & 0xff) as u8;
51    change_one_to_zero_unless(&mut cursor, b3 == skippable_byte);
52    offset += cursor;
53
54    let b4 = ((x >> 24) & 0xff) as u8;
55    change_one_to_zero_unless(&mut cursor, b4 == skippable_byte);
56    offset += cursor;
57
58    let b5 = ((x >> 16) & 0xff) as u8;
59    change_one_to_zero_unless(&mut cursor, b5 == skippable_byte);
60    offset += cursor;
61
62    let b6 = ((x >> 8) & 0xff) as u8;
63    change_one_to_zero_unless(&mut cursor, b6 == skippable_byte);
64    offset += cursor;
65
66    // The last byte: it can only get skipped for the number 0.
67    // Writing `b7 == skippable_byte` instead would also have caught -1,
68    // but that is an edge case where we do not want the last byte skipped.
69    let b7 = (x & 0xff) as u8;
70    change_one_to_zero_unless(&mut cursor, x == 0);
71    offset += cursor;
72
73    buffer[0] = b0;
74    buffer[1] = b1;
75    buffer[2] = b2;
76    buffer[3] = b3;
77    buffer[4] = b4;
78    buffer[5] = b5;
79    buffer[6] = b6;
80    buffer[7] = b7;
81
82    // For signed numbers, it can sometimes happen that we are skipping too many bytes,
83    // and the most significant bit ends up different than what we started with.
84    // In this case we need to backtrack one step.
85    // e.g. 255: [255] -> [0, 255]
86    // e.g. -129: [0x7f] -> [0xff, 0x7f]
87    cursor = 1;
88    change_one_to_zero_unless(&mut cursor, signed);
89    change_one_to_zero_unless(&mut cursor, offset > 0);
90
91    // The only time when the offset can be 8 (and thus out of bounds)
92    // is for the number 0. Conveniently, for 0 all bytes are 0, so applying modulo 8 does not change the outcome.
93    let byte_at_offset = buffer[offset % 8];
94
95    // The main condition for stepping back one step: the most significant bit changed in the process.
96    let msbit_corrupted = msbit_is_one(byte_at_offset) != msbit_is_one(b0);
97    change_one_to_zero_unless(&mut cursor, msbit_corrupted);
98
99    // According to this algorithm, it should be impossible to underflow
100    // using wrapping_sub to avoid unnecessary underflow check
101    debug_assert!(offset >= cursor);
102    offset = offset.wrapping_sub(cursor);
103
104    offset
105}
106
107/// Handles both top-encoding and nested-encoding, signed and unsigned, of any length.
108///
109/// The result needs to be validated to not exceed limits and then cast to the desired type.
110///
111/// No generics here, we avoid monomorphization to make the SC binary as small as possible.
112pub fn universal_decode_number(bytes: &[u8], signed: bool) -> u64 {
113    // it is almost impossible to get a slice longer than 8
114    // just a basic overflow/underflow protection
115    let safe_len = bytes.len() % 9;
116
117    unsafe { universal_decode_number_impl(bytes.as_ptr(), safe_len, signed) }
118}
119
120/// Same as [`universal_decode_number`], but assumes that the input length does not exceed 8.
121pub fn universal_decode_number_unchecked(bytes: &[u8], signed: bool) -> u64 {
122    unsafe { universal_decode_number_impl(bytes.as_ptr(), bytes.len(), signed) }
123}
124
125unsafe fn universal_decode_number_impl(bytes: *const u8, len: usize, signed: bool) -> u64 {
126    let negative = signed && len > 0 && msbit_is_one(unsafe { *bytes });
127    let skippable_byte = skippable_byte(negative);
128
129    let mut extended_buffer = [skippable_byte; 8];
130    let offset = 8usize.wrapping_sub(len);
131    unsafe {
132        core::ptr::copy_nonoverlapping(bytes, extended_buffer.as_mut_ptr().add(offset), len);
133    }
134
135    u64::from_be_bytes(extended_buffer)
136}
137
138/// Most significant bit is 1.
139#[inline]
140fn msbit_is_one(byte: u8) -> bool {
141    byte >= 0b1000_0000u8
142}
143
144#[inline]
145fn change_one_to_zero_unless(x: &mut usize, condition: bool) {
146    debug_assert!(*x <= 1);
147    *x &= condition as usize;
148}
149
150/// For negative = true, yields 0xff.
151///
152/// For negative = false, yields 0x00.
153///
154/// Has no if, doesn't branch.
155#[inline]
156fn skippable_byte(negative: bool) -> u8 {
157    0u8.wrapping_sub(negative as u8)
158}
159
160#[cfg(test)]
161mod test {
162    use super::*;
163
164    #[test]
165    fn test_set_to_zero_unless() {
166        let mut x = 1;
167        change_one_to_zero_unless(&mut x, true);
168        assert_eq!(x, 1);
169        change_one_to_zero_unless(&mut x, false);
170        assert_eq!(x, 0);
171    }
172
173    #[test]
174    fn test_skippable_byte() {
175        assert_eq!(skippable_byte(true), 0xffu8);
176        assert_eq!(skippable_byte(false), 0x00u8);
177    }
178
179    /// Only checks the filling out of the buffer.
180    #[test]
181    fn test_populate_buffer() {
182        let mut buffer = top_encode_number_buffer();
183        let _ = fill_buffer_find_offset(0x12345678abcdef12, false, &mut buffer);
184        assert_eq!(buffer, [0x12, 0x34, 0x56, 0x78, 0xab, 0xcd, 0xef, 0x12]);
185    }
186
187    fn test_encode_decode(x: u64, signed: bool, bytes: &[u8]) {
188        let mut buffer = top_encode_number_buffer();
189        assert_eq!(
190            top_encode_number(x, signed, &mut buffer),
191            bytes,
192            "encode failed for {x}"
193        );
194        assert_eq!(
195            universal_decode_number(bytes, signed,),
196            x,
197            "decode failed for {x}"
198        );
199    }
200
201    #[test]
202    #[rustfmt::skip]
203    fn test_top_encode_number() {
204        // unsigned
205        test_encode_decode(0x00, false, &[]);
206        test_encode_decode(0x01, false, &[1]);
207        test_encode_decode(0x7f, false, &[0x7f]);
208        test_encode_decode(0x80, false, &[0x80]);
209        test_encode_decode(0xff, false, &[0xff]);
210        test_encode_decode(0x0100, false, &[1, 0]);
211        test_encode_decode(0xff00, false, &[0xff, 0]);
212        test_encode_decode(0xffff, false, &[0xff, 0xff]);
213        test_encode_decode(0xffffffffffffffff, false, &[0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff]);
214
215        // signed, positive
216        test_encode_decode(0x00, true, &[]);
217        test_encode_decode(0x01, true, &[1]);
218        test_encode_decode(0x7f, true, &[0x7f]);
219        test_encode_decode(0x80, true, &[0x00, 0x80]);
220        test_encode_decode(0x0100, true, &[1, 0]);
221        test_encode_decode(0xff00, true, &[0x00, 0xff, 0]);
222        test_encode_decode(0xffff, true, &[0x00, 0xff, 0xff]);
223        test_encode_decode(0x7fffffffffffffff, true, &[0x7f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff]);
224        test_encode_decode(0x8000000000000000, true, &[0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00]);
225
226        // signed, negative
227        test_encode_decode(-1i64 as u64, true, &[0xff]);
228        test_encode_decode(-2i64 as u64, true, &[0xfe]);
229        test_encode_decode(-126i64 as u64, true, &[0x82]);
230        test_encode_decode(-127i64 as u64, true, &[0x81]);
231        test_encode_decode(-128i64 as u64, true, &[0x80]);
232        test_encode_decode(-129i64 as u64, true, &[0xff, 0x7f]);
233        test_encode_decode(-255i64 as u64, true, &[0xff, 0x01]);
234        test_encode_decode(-256i64 as u64, true, &[0xff, 0x00]);
235        test_encode_decode(-257i64 as u64, true, &[0xfe, 0xff]);
236    }
237}