ion_rs/binary/
int.rs

1use std::mem;
2
3use crate::data_source::IonDataSource;
4use crate::result::{decoding_error, IonResult};
5use crate::types;
6use crate::types::{Coefficient, Int};
7use num_bigint::{BigInt, Sign};
8use num_traits::Zero;
9use std::io::Write;
10
11type IntStorage = i64;
12const INT_NEGATIVE_ZERO: u8 = 0x80;
13
14// This limit is used for stack-allocating buffer space to encode/decode Ints.
15const INT_STACK_BUFFER_SIZE: usize = 16;
16// This number was chosen somewhat arbitrarily and could be lifted if a use case demands it.
17const MAX_INT_SIZE_IN_BYTES: usize = 2048;
18
19/// Represents a fixed-length signed integer. See the
20/// [UInt and Int Fields](https://amazon-ion.github.io/ion-docs/docs/binary.html#uint-and-int-fields)
21/// section of the binary Ion spec for more details.
22#[derive(Debug)]
23pub struct DecodedInt {
24    size_in_bytes: usize,
25    value: Int,
26    // [Integer] is not capable of natively representing negative zero. We track the sign
27    // of the value separately so we can distinguish between 0 and -0.
28    is_negative: bool,
29}
30
31impl DecodedInt {
32    pub(crate) fn new(value: Int, is_negative: bool, size_in_bytes: usize) -> Self {
33        DecodedInt {
34            size_in_bytes,
35            value,
36            is_negative,
37        }
38    }
39
40    /// Reads an Int with `length` bytes from the provided data source.
41    pub fn read<R: IonDataSource>(data_source: &mut R, length: usize) -> IonResult<DecodedInt> {
42        if length == 0 {
43            return Ok(DecodedInt {
44                size_in_bytes: 0,
45                value: Int::I64(0),
46                is_negative: false,
47            });
48        } else if length > MAX_INT_SIZE_IN_BYTES {
49            return decoding_error(format!(
50                "Found a {length}-byte Int. Max supported size is {MAX_INT_SIZE_IN_BYTES} bytes."
51            ));
52        }
53
54        if length <= INT_STACK_BUFFER_SIZE {
55            let buffer = &mut [0u8; INT_STACK_BUFFER_SIZE];
56            DecodedInt::read_using_buffer(data_source, length, buffer)
57        } else {
58            // We're reading an enormous int. Heap-allocate a Vec to use as storage.
59            let mut buffer = vec![0u8; length];
60            DecodedInt::read_using_buffer(data_source, length, buffer.as_mut_slice())
61        }
62    }
63
64    pub fn read_using_buffer<R: IonDataSource>(
65        data_source: &mut R,
66        length: usize,
67        buffer: &mut [u8],
68    ) -> IonResult<DecodedInt> {
69        // Get a mutable reference to a portion of the buffer just big enough to fit
70        // the requested number of bytes.
71        let buffer = &mut buffer[0..length];
72
73        data_source.read_exact(buffer)?;
74        let mut byte_iter = buffer.iter();
75        let mut is_negative: bool = false;
76
77        let value = if length <= mem::size_of::<i64>() {
78            // This Int will fit in an i64.
79            let first_byte: i64 = i64::from(byte_iter.next().copied().unwrap());
80            let sign: i64 = if first_byte & 0b1000_0000 == 0 {
81                1
82            } else {
83                is_negative = true;
84                -1
85            };
86            let mut magnitude: i64 = first_byte & 0b0111_1111;
87            for &byte in byte_iter {
88                let byte = i64::from(byte);
89                magnitude <<= 8;
90                magnitude |= byte;
91            }
92            Int::I64(sign * magnitude)
93        } else {
94            // This Int is too big for an i64, we'll need to use a BigInt
95            let sign: num_bigint::Sign = if buffer[0] & 0b1000_0000 == 0 {
96                Sign::Plus
97            } else {
98                is_negative = true;
99                Sign::Minus
100            };
101            // We're going to treat the buffer's contents like the big-endian bytes of an
102            // unsigned integer. Now that we've made a note of the sign, set the sign bit
103            // in the buffer to zero.
104            buffer[0] &= 0b0111_1111;
105            let value = BigInt::from_bytes_be(sign, buffer);
106            Int::BigInt(value)
107        };
108
109        Ok(DecodedInt {
110            size_in_bytes: length,
111            value,
112            is_negative,
113        })
114    }
115
116    /// Encodes the provided `value` as an Int and writes it to the provided `sink`.
117    /// Returns the number of bytes written.
118    pub fn write_i64<W: Write>(sink: &mut W, value: i64) -> IonResult<usize> {
119        let magnitude = value.unsigned_abs();
120        // Using leading_zeros() to determine how many empty bytes we can ignore.
121        // We subtract one from the number of leading bits to leave space for a sign bit
122        // and divide by 8 to get the number of bytes.
123        let empty_leading_bytes: u32 = (magnitude.leading_zeros() - 1) >> 3;
124        let first_occupied_byte = empty_leading_bytes as usize;
125
126        let mut magnitude_bytes: [u8; mem::size_of::<u64>()] = magnitude.to_be_bytes();
127        let bytes_to_write: &mut [u8] = &mut magnitude_bytes[first_occupied_byte..];
128        if value < 0 {
129            bytes_to_write[0] |= 0b1000_0000;
130        }
131
132        sink.write_all(bytes_to_write)?;
133        Ok(bytes_to_write.len())
134    }
135
136    /// Encodes a negative zero as an `Int` and writes it to the privided `sink`.
137    /// Returns the number of bytes written.
138    ///
139    /// This method is similar to [Self::write_i64]. However, because an i64 cannot represent a negative
140    /// zero, a separate method is required.
141    pub fn write_negative_zero<W: Write>(sink: &mut W) -> IonResult<usize> {
142        sink.write_all(&[INT_NEGATIVE_ZERO])?;
143        Ok(1)
144    }
145
146    /// Returns `true` if the Int is negative zero.
147    pub fn is_negative_zero(&self) -> bool {
148        // `self.value` can natively represent any negative integer _except_ -0.
149        // To check for negative zero, we need to also look at the sign bit that was encoded
150        // in the stream.
151        self.value.is_zero() && self.is_negative
152    }
153
154    /// Returns the value of the signed integer.
155    #[inline(always)]
156    pub fn value(&self) -> &Int {
157        &self.value
158    }
159
160    /// Returns the number of bytes that were read from the data source to construct this
161    /// signed integer.
162    #[inline(always)]
163    pub fn size_in_bytes(&self) -> usize {
164        self.size_in_bytes
165    }
166
167    /// Constructs a DecodedInt that represents zero. This is useful when reading from a stream
168    /// where a zero-length Int is found, meaning that it is implicitly positive zero.
169    pub fn zero() -> Self {
170        DecodedInt {
171            size_in_bytes: 0,
172            value: Int::I64(0),
173            is_negative: false,
174        }
175    }
176}
177
178impl From<DecodedInt> for Int {
179    /// Note that if the DecodedInt represents -0, converting it to an Integer will result in a 0.
180    /// If negative zero is significant to your use case, check it using [DecodedInt::is_negative_zero]
181    /// before converting it to an Integer.
182    fn from(uint: DecodedInt) -> Self {
183        let DecodedInt {
184            value,
185            .. // Ignore 'size_in_bytes' and 'is_negative'
186        } = uint;
187        value
188    }
189}
190
191impl From<DecodedInt> for Coefficient {
192    fn from(int: DecodedInt) -> Self {
193        let DecodedInt {
194            value,
195            is_negative,
196            .. // ignore `size_in_bytes`
197        } = int;
198        use types::Sign::{Negative, Positive};
199        let sign = if is_negative { Negative } else { Positive };
200        Coefficient::new(sign, value)
201    }
202}
203
204#[cfg(test)]
205mod tests {
206    use super::*;
207    use crate::result::IonResult;
208    use crate::types::Int;
209    use std::io;
210    use std::io::Cursor;
211
212    const READ_ERROR_MESSAGE: &str = "Failed to read an Int from the provided cursor.";
213
214    #[test]
215    fn test_read_three_byte_positive_int() {
216        let data = &[0b0011_1100, 0b1000_0111, 0b1000_0001];
217        let int = DecodedInt::read(&mut Cursor::new(data), data.len()).expect(READ_ERROR_MESSAGE);
218        assert_eq!(int.size_in_bytes(), 3);
219        assert_eq!(int.value(), &Int::I64(3_966_849));
220    }
221
222    #[test]
223    fn test_read_three_byte_negative_int() {
224        let data = &[0b1011_1100, 0b1000_0111, 0b1000_0001];
225        let int = DecodedInt::read(&mut Cursor::new(data), data.len()).expect(READ_ERROR_MESSAGE);
226        assert_eq!(int.size_in_bytes(), 3);
227        assert_eq!(int.value(), &Int::I64(-3_966_849));
228    }
229
230    #[test]
231    fn test_read_int_negative_zero() {
232        let data = &[0b1000_0000]; // Negative zero
233        let int = DecodedInt::read(&mut Cursor::new(data), data.len()).expect(READ_ERROR_MESSAGE);
234        assert_eq!(int.size_in_bytes(), 1);
235        assert_eq!(int.value(), &Int::I64(0));
236        assert!(int.is_negative_zero());
237    }
238
239    #[test]
240    fn test_read_int_positive_zero() {
241        let data = &[0b0000_0000]; // Positive zero
242        let int = DecodedInt::read(&mut Cursor::new(data), data.len()).expect(READ_ERROR_MESSAGE);
243        assert_eq!(int.size_in_bytes(), 1);
244        assert_eq!(int.value(), &Int::I64(0));
245        assert!(!int.is_negative_zero());
246    }
247
248    #[test]
249    fn test_read_two_byte_positive_int() {
250        let data = &[0b0111_1111, 0b1111_1111];
251        let int = DecodedInt::read(&mut Cursor::new(data), data.len()).expect(READ_ERROR_MESSAGE);
252        assert_eq!(int.size_in_bytes(), 2);
253        assert_eq!(int.value(), &Int::I64(32_767));
254    }
255
256    #[test]
257    fn test_read_two_byte_negative_int() {
258        let data = &[0b1111_1111, 0b1111_1111];
259        let int = DecodedInt::read(&mut Cursor::new(data), data.len()).expect(READ_ERROR_MESSAGE);
260        assert_eq!(int.size_in_bytes(), 2);
261        assert_eq!(int.value(), &Int::I64(-32_767));
262    }
263
264    #[test]
265    fn test_read_int_length_zero() {
266        let data = &[];
267        let int = DecodedInt::read(&mut Cursor::new(data), data.len()).expect(READ_ERROR_MESSAGE);
268        assert_eq!(int.size_in_bytes(), 0);
269        assert_eq!(int.value(), &Int::I64(0));
270    }
271
272    #[test]
273    fn test_read_int_overflow() {
274        // A Vec of bytes that's one beyond the maximum allowable Int size. Each byte is a 1.
275        let buffer = vec![1; MAX_INT_SIZE_IN_BYTES + 1];
276        let data = buffer.as_slice();
277        let _int = DecodedInt::read(&mut Cursor::new(data), data.len())
278            .expect_err("This exceeded the configured max Int size.");
279    }
280
281    fn write_int_test(value: i64, expected_bytes: &[u8]) -> IonResult<()> {
282        let mut buffer: Vec<u8> = vec![];
283        DecodedInt::write_i64(&mut buffer, value)?;
284        assert_eq!(buffer.as_slice(), expected_bytes);
285        Ok(())
286    }
287
288    #[test]
289    fn test_write_int_zero() -> IonResult<()> {
290        write_int_test(0, &[0b0000_0000])
291    }
292
293    #[test]
294    fn test_write_int_negative_zero() -> IonResult<()> {
295        let mut buffer: Vec<u8> = vec![];
296        DecodedInt::write_negative_zero(&mut buffer)?;
297        assert_eq!(buffer.as_slice(), &[0b1000_0000]);
298        Ok(())
299    }
300
301    #[test]
302    fn test_write_int_single_byte_values() -> IonResult<()> {
303        write_int_test(1, &[0b0000_0001])?;
304        write_int_test(3, &[0b0000_0011])?;
305        write_int_test(7, &[0b0000_0111])?;
306        write_int_test(100, &[0b0110_0100])?;
307
308        write_int_test(-1, &[0b1000_0001])?;
309        write_int_test(-3, &[0b1000_0011])?;
310        write_int_test(-7, &[0b1000_0111])?;
311        write_int_test(-100, &[0b1110_0100])?;
312        Ok(())
313    }
314
315    #[test]
316    fn test_write_int_two_byte_values() -> IonResult<()> {
317        write_int_test(201, &[0b0000_0000, 0b1100_1001])?;
318        write_int_test(501, &[0b0000_0001, 0b1111_0101])?;
319        write_int_test(16_000, &[0b0011_1110, 0b1000_0000])?;
320
321        write_int_test(-201, &[0b1000_0000, 0b1100_1001])?;
322        write_int_test(-501, &[0b1000_0001, 0b1111_0101])?;
323        write_int_test(-16_000, &[0b1011_1110, 0b1000_0000])?;
324        Ok(())
325    }
326
327    #[test]
328    fn test_write_int_max_i64() -> IonResult<()> {
329        let mut buffer: Vec<u8> = vec![];
330        let length = DecodedInt::write_i64(&mut buffer, i64::MAX)?;
331        let i = DecodedInt::read(&mut io::Cursor::new(buffer.as_slice()), length)?;
332        assert_eq!(i.value, Int::I64(i64::MAX));
333        Ok(())
334    }
335}