big_decimal_byte_string_encoder/
lib.rs

1//! A Rust implementation of Google BigQuery's BigDecimalByteStringEncoder for the NUMERIC data type.
2//!
3//! This crate provides functionality to encode and decode BigDecimal values
4//! to and from byte strings compatible with BigQuery's NUMERIC type, as used in the BigQuery Write API.
5//!
6//! # Examples
7//!
8//! ```
9//! use bigdecimal::BigDecimal;
10//! use big_decimal_byte_string_encoder::{decode_bigquery_bytes_to_bigdecimal, encode_bigdecimal_to_bigquery_bytes};
11//! use std::str::FromStr;
12//!
13//! let decimal = BigDecimal::from_str("123.456").unwrap();
14//! let encoded = encode_bigdecimal_to_bigquery_bytes(&decimal).unwrap();
15//! let decoded = decode_bigquery_bytes_to_bigdecimal(&encoded).unwrap();
16//! assert_eq!(decimal, decoded);
17//! ```
18
19use bigdecimal::BigDecimal;
20use num_bigint::{BigInt, Sign};
21use once_cell::sync::Lazy;
22use std::str::FromStr;
23use thiserror::Error;
24
25/// The scale used for NUMERIC values in BigQuery.
26const NUMERIC_SCALE: i64 = 9;
27
28/// The maximum value for a NUMERIC type in BigQuery.
29static MAX_NUMERIC_VALUE: Lazy<BigDecimal> =
30    Lazy::new(|| BigDecimal::from_str("99999999999999999999999999999.999999999").unwrap());
31
32/// The minimum value for a NUMERIC type in BigQuery.
33static MIN_NUMERIC_VALUE: Lazy<BigDecimal> =
34    Lazy::new(|| BigDecimal::from_str("-99999999999999999999999999999.999999999").unwrap());
35
36/// Errors that can occur during encoding or decoding.
37#[derive(Error, Debug)]
38pub enum NumericEncoderError {
39    #[error("Scale exceeds maximum: {0} (allowed: {1})")]
40    ScaleExceeded(i64, i64),
41    #[error("Numeric overflow: {0}")]
42    Overflow(String),
43}
44
45fn to_java_byte_array(value: &BigInt) -> Vec<u8> {
46    let (sign, mut bytes) = value.to_bytes_be();
47
48    if sign == Sign::Minus {
49        if bytes.is_empty() {
50            bytes.push(0);
51        }
52
53        for byte in &mut bytes {
54            *byte = !*byte;
55        }
56
57        let mut carry = true;
58        for byte in bytes.iter_mut().rev() {
59            if carry {
60                if *byte == 0xFF {
61                    *byte = 0;
62                } else {
63                    *byte += 1;
64                    carry = false;
65                }
66            } else {
67                break;
68            }
69        }
70
71        if carry {
72            bytes.insert(0, 1);
73        }
74
75        if bytes[0] & 0x80 == 0 {
76            bytes.insert(0, 0xFF);
77        }
78    } else if !bytes.is_empty() && bytes[0] & 0x80 != 0 {
79        bytes.insert(0, 0);
80    }
81
82    bytes
83}
84
85fn from_java_byte_array(bytes: &[u8]) -> BigInt {
86    if bytes.is_empty() {
87        return BigInt::from(0);
88    }
89
90    let is_negative = bytes[0] & 0x80 != 0;
91
92    if is_negative {
93        let mut complemented = Vec::with_capacity(bytes.len());
94        let mut carry = true;
95
96        for &byte in bytes.iter().rev() {
97            let mut complemented_byte = !byte;
98            if carry {
99                if complemented_byte == 0xFF {
100                    complemented_byte = 0;
101                } else {
102                    complemented_byte += 1;
103                    carry = false;
104                }
105            }
106            complemented.push(complemented_byte);
107        }
108
109        complemented.reverse();
110
111        while complemented.len() > 1 && complemented[0] == 0xFF {
112            complemented.remove(0);
113        }
114
115        BigInt::from_bytes_be(Sign::Minus, &complemented)
116    } else {
117        let mut start = 0;
118        while start < bytes.len() - 1 && bytes[start] == 0 {
119            start += 1;
120        }
121
122        BigInt::from_bytes_be(Sign::Plus, &bytes[start..])
123    }
124}
125
126/// Encodes a BigDecimal value to a byte string compatible with BigQuery's NUMERIC type.
127///
128/// # Arguments
129///
130/// * `decimal` - The BigDecimal value to encode.
131///
132/// # Returns
133///
134/// A Result containing either the encoded byte string or a NumericEncoderError.
135///
136/// # Examples
137///
138/// ```
139/// use bigdecimal::BigDecimal;
140/// use std::str::FromStr;
141/// use big_decimal_byte_string_encoder::encode_bigdecimal_to_bigquery_bytes;
142///
143/// let decimal = BigDecimal::from_str("123.456").unwrap();
144/// let encoded = encode_bigdecimal_to_bigquery_bytes(&decimal).unwrap();
145/// ```
146pub fn encode_bigdecimal_to_bigquery_bytes(
147    decimal: &BigDecimal,
148) -> Result<Vec<u8>, NumericEncoderError> {
149    let scale = decimal.fractional_digit_count();
150    if !(0..=NUMERIC_SCALE).contains(&scale) {
151        return Err(NumericEncoderError::ScaleExceeded(scale, NUMERIC_SCALE));
152    }
153
154    if decimal < &*MIN_NUMERIC_VALUE || decimal > &*MAX_NUMERIC_VALUE {
155        return Err(NumericEncoderError::Overflow(decimal.to_string()));
156    }
157
158    let scaled = decimal.with_scale(NUMERIC_SCALE);
159    let (scaled_value, _) = scaled.as_bigint_and_exponent();
160    let mut bytes = to_java_byte_array(&scaled_value);
161    bytes.reverse();
162    Ok(bytes)
163}
164
165/// Decodes a byte string to a BigDecimal value.
166///
167/// # Arguments
168///
169/// * `bytes` - The byte string to decode.
170///
171/// # Returns
172///
173/// A Result containing either the decoded BigDecimal value or a NumericEncoderError.
174///
175/// # Examples
176///
177/// ```
178/// use big_decimal_byte_string_encoder::decode_bigquery_bytes_to_bigdecimal;
179///
180/// let encoded = vec![0, 140, 134, 71];
181/// let decoded = decode_bigquery_bytes_to_bigdecimal(&encoded).unwrap();
182/// ```
183pub fn decode_bigquery_bytes_to_bigdecimal(
184    bytes: &[u8],
185) -> Result<BigDecimal, NumericEncoderError> {
186    let mut bytes = bytes.to_vec();
187    bytes.reverse();
188
189    let scaled_value = from_java_byte_array(&bytes);
190
191    let decimal_value = BigDecimal::from((scaled_value, NUMERIC_SCALE));
192    if decimal_value > *MAX_NUMERIC_VALUE || decimal_value < *MIN_NUMERIC_VALUE {
193        return Err(NumericEncoderError::Overflow(decimal_value.to_string()));
194    }
195
196    Ok(decimal_value)
197}
198
199#[cfg(test)]
200mod tests {
201    use super::*;
202    use bigdecimal::FromPrimitive;
203
204    fn test_value(value: &str, binary: Vec<u8>) {
205        let original = BigDecimal::from_str(value).unwrap();
206        let encoded = encode_bigdecimal_to_bigquery_bytes(&original).unwrap();
207        let mut reversed_binary = binary.clone();
208        reversed_binary.reverse();
209        assert_eq!(encoded, reversed_binary);
210        let decoded = decode_bigquery_bytes_to_bigdecimal(&encoded).unwrap();
211        assert_eq!(original, decoded);
212    }
213
214    #[test]
215    fn test_encode_decode() {
216        test_value("0", vec![0]);
217        test_value("1.2", vec![71, 134, 140, 0]);
218        test_value("-1.2", vec![184, 121, 116, 0]);
219        test_value(
220            "99999999999999999999999999999.999999999",
221            vec![
222                75, 59, 76, 168, 90, 134, 196, 122, 9, 138, 34, 63, 255, 255, 255, 255,
223            ],
224        );
225        test_value(
226            "-99999999999999999999999999999.999999999",
227            vec![
228                180, 196, 179, 87, 165, 121, 59, 133, 246, 117, 221, 192, 0, 0, 0, 1,
229            ],
230        );
231        test_value(
232            "-123456789.42001",
233            vec![254, 73, 100, 180, 65, 130, 149, 240],
234        );
235        test_value("12.345", vec![2, 223, 209, 192, 64]);
236        test_value("1", vec![59, 154, 202, 0]);
237        test_value("2", vec![119, 53, 148, 0]);
238        test_value("-1", vec![196, 101, 54, 0]);
239        test_value("128", vec![29, 205, 101, 0, 0]);
240        test_value("-128", vec![226, 50, 155, 0, 0]);
241        test_value("12702228", vec![45, 32, 155, 235, 203, 200, 0]);
242    }
243
244    #[test]
245    fn test_encode_decode_random() {
246        for _ in 0..1000 {
247            let original = BigDecimal::from_f64(rand::random::<f64>()).unwrap();
248            let scale = rand::random::<u32>() % 8 + 2;
249            let original = original.with_scale(scale as i64);
250            let encoded = encode_bigdecimal_to_bigquery_bytes(&original).unwrap();
251            let decoded = decode_bigquery_bytes_to_bigdecimal(&encoded).unwrap();
252            assert_eq!(original, decoded);
253        }
254    }
255
256    #[test]
257    fn test_overflow() {
258        let too_big = BigDecimal::from_str("100000000000000000000000000000").unwrap();
259        assert!(matches!(
260            encode_bigdecimal_to_bigquery_bytes(&too_big),
261            Err(NumericEncoderError::Overflow(_))
262        ));
263    }
264
265    #[test]
266    fn test_scale_exceeded() {
267        let too_precise = BigDecimal::from_str("1.0000000001").unwrap();
268        assert!(matches!(
269            encode_bigdecimal_to_bigquery_bytes(&too_precise),
270            Err(NumericEncoderError::ScaleExceeded(_, _))
271        ));
272    }
273}