ion_rs/binary/
uint.rs

1use num_bigint::BigUint;
2use std::io::Write;
3use std::mem;
4
5use crate::data_source::IonDataSource;
6use crate::result::{decoding_error, IonResult};
7use crate::types::{Int, UInt};
8
9// This limit is used for stack-allocating buffer space to encode/decode UInts.
10const UINT_STACK_BUFFER_SIZE: usize = 16;
11// This number was chosen somewhat arbitrarily and could be lifted if a use case demands it.
12const MAX_UINT_SIZE_IN_BYTES: usize = 2048;
13
14/// Represents a fixed-length unsigned integer. See the
15/// [UInt and Int Fields](https://amazon-ion.github.io/ion-docs/docs/binary.html#uint-and-int-fields)
16/// section of the binary Ion spec for more details.
17#[derive(Debug)]
18pub struct DecodedUInt {
19    size_in_bytes: usize,
20    value: UInt,
21}
22
23impl DecodedUInt {
24    pub(crate) fn new(value: UInt, size_in_bytes: usize) -> Self {
25        DecodedUInt {
26            size_in_bytes,
27            value,
28        }
29    }
30
31    /// Interprets all of the bytes in the provided slice as big-endian unsigned integer bytes.
32    /// The caller must confirm that `uint_bytes` is no longer than 8 bytes long; otherwise,
33    /// overflow may quietly occur.
34    pub(crate) fn small_uint_from_slice(uint_bytes: &[u8]) -> u64 {
35        let mut magnitude: u64 = 0;
36        for &byte in uint_bytes {
37            let byte = u64::from(byte);
38            magnitude <<= 8;
39            magnitude |= byte;
40        }
41        magnitude
42    }
43
44    /// Interprets all of the bytes in the provided slice as big-endian unsigned integer bytes.
45    pub(crate) fn big_uint_from_slice(uint_bytes: &[u8]) -> BigUint {
46        BigUint::from_bytes_be(uint_bytes)
47    }
48
49    /// Reads a UInt with `length` bytes from the provided data source.
50    pub fn read<R: IonDataSource>(data_source: &mut R, length: usize) -> IonResult<DecodedUInt> {
51        if length > MAX_UINT_SIZE_IN_BYTES {
52            return decoding_error(format!(
53                "Found a {length}-byte UInt. Max supported size is {MAX_UINT_SIZE_IN_BYTES} bytes."
54            ));
55        }
56
57        if length <= UINT_STACK_BUFFER_SIZE {
58            let buffer = &mut [0u8; UINT_STACK_BUFFER_SIZE];
59            DecodedUInt::read_using_buffer(data_source, length, buffer)
60        } else {
61            // We're reading an enormous int. Heap-allocate a Vec to use as storage.
62            let mut buffer = vec![0u8; length];
63            DecodedUInt::read_using_buffer(data_source, length, buffer.as_mut_slice())
64        }
65    }
66
67    fn read_using_buffer<R: IonDataSource>(
68        data_source: &mut R,
69        length: usize,
70        buffer: &mut [u8],
71    ) -> IonResult<DecodedUInt> {
72        // Get a mutable reference to a portion of the buffer just big enough to fit
73        // the requested number of bytes.
74        let buffer = &mut buffer[0..length];
75
76        data_source.read_exact(buffer)?;
77
78        let value = if length <= mem::size_of::<u64>() {
79            // The UInt is small enough to fit in a u64.
80            let mut magnitude: u64 = 0;
81            for &byte in buffer.iter() {
82                let byte = u64::from(byte);
83                magnitude <<= 8;
84                magnitude |= byte;
85            }
86            UInt::U64(magnitude)
87        } else {
88            // The UInt is too large to fit in a u64; read it as a BigUInt instead
89            let magnitude = BigUint::from_bytes_be(buffer);
90            UInt::BigUInt(magnitude)
91        };
92
93        Ok(DecodedUInt {
94            size_in_bytes: length,
95            value,
96        })
97    }
98
99    /// Encodes the provided `magnitude` as a UInt and writes it to the provided `sink`.
100    pub fn write_u64<W: Write>(sink: &mut W, magnitude: u64) -> IonResult<usize> {
101        let encoded = encode_u64(magnitude);
102        let bytes_to_write = encoded.as_ref();
103
104        sink.write_all(bytes_to_write)?;
105        Ok(bytes_to_write.len())
106    }
107
108    /// Returns the magnitude of the unsigned integer.
109    #[inline(always)]
110    pub fn value(&self) -> &UInt {
111        &self.value
112    }
113
114    /// Returns the number of bytes that were read from the data source to construct this
115    /// unsigned integer.
116    #[inline(always)]
117    pub fn size_in_bytes(&self) -> usize {
118        self.size_in_bytes
119    }
120}
121
122impl From<DecodedUInt> for Int {
123    fn from(uint: DecodedUInt) -> Self {
124        let DecodedUInt {
125            value,
126            .. // Ignore 'size_in_bytes'
127        } = uint;
128        Int::from(value)
129    }
130}
131
132/// A buffer for storing a UInt's Big Endian bytes. UInts that can fit in a `u64` will use the
133/// `Stack` storage variant, meaning that no heap allocations are required in the common case.
134#[derive(Clone, Debug, PartialEq, Eq)]
135pub enum UIntBeBytes {
136    Stack([u8; mem::size_of::<u64>()]),
137    Heap(Vec<u8>),
138}
139
140/// The big-endian, compact slice of bytes for a UInt (`u64`). Leading zero
141/// octets are not part of the representation. See the [spec] for more
142/// information.
143///
144/// [spec]: https://amazon-ion.github.io/ion-docs/docs/binary.html#uint-and-int-fields
145#[derive(Clone, Debug, PartialEq, Eq)]
146pub struct EncodedUInt {
147    be_bytes: UIntBeBytes,
148    first_occupied_byte: usize,
149}
150
151impl EncodedUInt {
152    /// Returns the slice view of the encoded UInt.
153    pub fn as_bytes(&self) -> &[u8] {
154        match self.be_bytes {
155            UIntBeBytes::Stack(ref byte_array) => &byte_array[self.first_occupied_byte..],
156            UIntBeBytes::Heap(ref byte_vec) => &byte_vec[self.first_occupied_byte..],
157        }
158    }
159}
160
161impl AsRef<[u8]> for EncodedUInt {
162    /// The same as [EncodedUInt::as_bytes].
163    fn as_ref(&self) -> &[u8] {
164        self.as_bytes()
165    }
166}
167
168/// Returns the magnitude as big-endian bytes.
169///
170/// ```
171/// use ion_rs::binary::uint;
172///
173/// let repr = uint::encode_u64(5u64);
174/// assert_eq!(&[0x05], repr.as_bytes());
175///
176/// let two_bytes = uint::encode_u64(256u64);
177/// assert_eq!(&[0x01, 0x00], two_bytes.as_bytes());
178/// ```
179pub fn encode_u64(magnitude: u64) -> EncodedUInt {
180    // We can divide the number of leading zero bits by 8
181    // to to get the number of leading zero bytes.
182    let empty_leading_bytes: u32 = magnitude.leading_zeros() / 8;
183    let first_occupied_byte = empty_leading_bytes as usize;
184
185    let magnitude_bytes: [u8; mem::size_of::<u64>()] = magnitude.to_be_bytes();
186
187    EncodedUInt {
188        be_bytes: UIntBeBytes::Stack(magnitude_bytes),
189        first_occupied_byte,
190    }
191}
192
193/// Returns the magnitude as big-endian bytes.
194pub fn encode_uint(magnitude: &UInt) -> EncodedUInt {
195    let magnitude: &BigUint = match magnitude {
196        UInt::U64(m) => return encode_u64(*m),
197        UInt::BigUInt(m) => m,
198    };
199
200    let be_bytes = UIntBeBytes::Heap(magnitude.to_bytes_be());
201    let first_occupied_byte = 0;
202
203    EncodedUInt {
204        be_bytes,
205        first_occupied_byte,
206    }
207}
208
209#[cfg(test)]
210mod tests {
211    use super::*;
212    use num_traits::Num;
213    use std::io::Cursor;
214
215    const READ_ERROR_MESSAGE: &str = "Failed to read a UInt from the provided cursor.";
216    const WRITE_ERROR_MESSAGE: &str = "Writing a UInt to the provided sink failed.";
217
218    #[test]
219    fn test_read_one_byte_uint() {
220        let data = &[0b1000_0000];
221        let uint = DecodedUInt::read(&mut Cursor::new(data), data.len()).expect(READ_ERROR_MESSAGE);
222        assert_eq!(uint.size_in_bytes(), 1);
223        assert_eq!(uint.value(), &UInt::U64(128));
224    }
225
226    #[test]
227    fn test_read_two_byte_uint() {
228        let data = &[0b0111_1111, 0b1111_1111];
229        let uint = DecodedUInt::read(&mut Cursor::new(data), data.len()).expect(READ_ERROR_MESSAGE);
230        assert_eq!(uint.size_in_bytes(), 2);
231        assert_eq!(uint.value(), &UInt::U64(32_767));
232    }
233
234    #[test]
235    fn test_read_three_byte_uint() {
236        let data = &[0b0011_1100, 0b1000_0111, 0b1000_0001];
237        let uint = DecodedUInt::read(&mut Cursor::new(data), data.len()).expect(READ_ERROR_MESSAGE);
238        assert_eq!(uint.size_in_bytes(), 3);
239        assert_eq!(uint.value(), &UInt::U64(3_966_849));
240    }
241
242    #[test]
243    fn test_read_ten_byte_uint() {
244        let data = vec![0xFFu8; 10];
245        let uint = DecodedUInt::read(&mut Cursor::new(data.as_slice()), data.len())
246            .expect(READ_ERROR_MESSAGE);
247        assert_eq!(uint.size_in_bytes(), 10);
248        assert_eq!(
249            uint.value(),
250            &UInt::BigUInt(BigUint::from_str_radix("ffffffffffffffffffff", 16).unwrap())
251        );
252    }
253
254    #[test]
255    fn test_read_uint_too_large() {
256        let mut buffer = Vec::with_capacity(MAX_UINT_SIZE_IN_BYTES + 1);
257        buffer.resize(MAX_UINT_SIZE_IN_BYTES + 1, 1);
258        let data = buffer.as_slice();
259        let _uint = DecodedUInt::read(&mut Cursor::new(data), data.len())
260            .expect_err("This exceeded the configured max UInt size.");
261    }
262
263    #[test]
264    fn test_write_ten_byte_uint() {
265        let value = UInt::BigUInt(BigUint::from_str_radix("ffffffffffffffffffff", 16).unwrap());
266        let mut buffer: Vec<u8> = vec![];
267        let encoded = super::encode_uint(&value);
268        buffer.write_all(encoded.as_bytes()).unwrap();
269        let expected_bytes = vec![0xFFu8; 10];
270        assert_eq!(expected_bytes.as_slice(), buffer.as_slice());
271    }
272
273    #[test]
274    fn test_write_eight_byte_uint() {
275        let value = 0x01_23_45_67_89_AB_CD_EF;
276        let mut buffer: Vec<u8> = vec![];
277        DecodedUInt::write_u64(&mut buffer, value).expect(WRITE_ERROR_MESSAGE);
278        let expected_bytes = &[0x01, 0x23, 0x45, 0x67, 0x89, 0xAB, 0xCD, 0xEF];
279        assert_eq!(expected_bytes, buffer.as_slice());
280    }
281
282    #[test]
283    fn test_write_five_byte_uint() {
284        let value = 0x01_23_45_67_89;
285        let mut buffer: Vec<u8> = vec![];
286        DecodedUInt::write_u64(&mut buffer, value).expect(WRITE_ERROR_MESSAGE);
287        let expected_bytes = &[0x01, 0x23, 0x45, 0x67, 0x89];
288        assert_eq!(expected_bytes, buffer.as_slice());
289    }
290
291    #[test]
292    fn test_write_three_byte_uint() {
293        let value = 0x01_23_45;
294        let mut buffer: Vec<u8> = vec![];
295        DecodedUInt::write_u64(&mut buffer, value).expect(WRITE_ERROR_MESSAGE);
296        let expected_bytes: &[u8] = &[0x01, 0x23, 0x45];
297        assert_eq!(expected_bytes, buffer.as_slice());
298    }
299
300    #[test]
301    fn test_write_uint_zero() {
302        let value = 0x00;
303        let mut buffer: Vec<u8> = vec![];
304        DecodedUInt::write_u64(&mut buffer, value).expect(WRITE_ERROR_MESSAGE);
305        let expected_bytes: &[u8] = &[];
306        assert_eq!(expected_bytes, buffer.as_slice());
307    }
308}