Skip to main content

jaq_json/
num.rs

1//! Integer / decimal numbers.
2use super::Rc;
3use alloc::string::{String, ToString};
4use alloc::vec::Vec;
5use core::cmp::Ordering;
6use core::fmt;
7use core::hash::{Hash, Hasher};
8use num_bigint::{BigInt, BigUint, Sign};
9use num_traits::cast::ToPrimitive;
10
11/// Integer / decimal number.
12///
13/// The speciality of this type is that numbers are distinguished into
14/// integers and 64-bit floating-point numbers.
15/// This allows using integers to index arrays,
16/// while using floating-point numbers to do general math.
17///
18/// Operations on numbers follow a few principles:
19/// * The sum, difference, product, and remainder of two integers is integer.
20/// * Any other operation between two numbers yields a float.
21#[derive(Clone, Debug)]
22pub enum Num {
23    /// Machine-size integer
24    Int(isize),
25    /// Arbitrarily large integer
26    BigInt(Rc<BigInt>),
27    /// Floating-point number
28    Float(f64),
29    /// Decimal number
30    Dec(Rc<String>),
31}
32
33impl Num {
34    /// Create a big integer.
35    pub fn big_int(i: BigInt) -> Self {
36        Self::BigInt(i.into())
37    }
38
39    pub(crate) fn from_str(s: &str) -> Self {
40        Self::from_str_radix(s, 10).unwrap_or_else(|| Self::Dec(Rc::new(s.to_string())))
41    }
42
43    /// Convert from an integral type to a machine-sized or big integer.
44    pub fn from_integral<T: Copy + TryInto<isize> + Into<BigInt>>(x: T) -> Self {
45        x.try_into()
46            .map_or_else(|_| Num::big_int(x.into()), Num::Int)
47    }
48
49    /// Try to parse an integer from a string with given radix.
50    pub fn from_str_radix(i: &str, radix: u32) -> Option<Self> {
51        let int = || isize::from_str_radix(i, radix).ok().map(Num::Int);
52        let big = || bigint_from_str_radix(i, radix).map(Self::big_int);
53        int().or_else(big)
54    }
55
56    /// Try to parse a decimal string to a [`Self::Float`], else return NaN.
57    pub fn from_dec_str(n: &str) -> Self {
58        // TODO: changed to NaN!
59        n.parse().map_or(Self::Float(f64::NAN), Self::Float)
60    }
61
62    pub(crate) fn is_int(&self) -> bool {
63        matches!(self, Self::Int(_) | Self::BigInt(_))
64    }
65
66    /// If the value is a machine-sized integer, return it, else fail.
67    pub fn as_isize(&self) -> Option<isize> {
68        match self {
69            Self::Int(i) => Some(*i),
70            Self::BigInt(i) => i.to_isize(),
71            _ => None,
72        }
73    }
74
75    pub(crate) fn as_pos_usize(&self) -> Option<PosUsize> {
76        match self {
77            Self::Int(i) => Some(PosUsize(*i >= 0, i.unsigned_abs())),
78            Self::BigInt(i) => i
79                .magnitude()
80                .to_usize()
81                .map(|u| PosUsize(i.sign() != Sign::Minus, u)),
82            _ => None,
83        }
84    }
85
86    /// If the value is or can be converted to float, return it, else fail.
87    pub(crate) fn as_f64(&self) -> f64 {
88        match self {
89            Self::Int(n) => *n as f64,
90            Self::BigInt(n) => n.to_f64().unwrap(),
91            Self::Float(n) => *n,
92            Self::Dec(n) => n.parse().unwrap(),
93        }
94    }
95
96    pub(crate) fn length(&self) -> Self {
97        match self {
98            Self::Int(i) => Self::Int(i.abs()),
99            Self::BigInt(i) => match i.sign() {
100                Sign::Plus | Sign::NoSign => Self::BigInt(i.clone()),
101                Sign::Minus => Self::BigInt(BigInt::from(i.magnitude().clone()).into()),
102            },
103            Self::Dec(n) => Self::from_dec_str(n).length(),
104            Self::Float(f) => Self::Float(f.abs()),
105        }
106    }
107}
108
109#[derive(Copy, Clone)]
110pub(crate) struct PosUsize(pub(crate) bool, pub(crate) usize);
111
112impl PosUsize {
113    pub fn wrap(&self, len: usize) -> Option<usize> {
114        self.0.then_some(self.1).or_else(|| len.checked_sub(self.1))
115    }
116}
117
118#[test]
119fn wrap_test() {
120    let len = 4;
121    let pos = |i| PosUsize(true, i);
122    let neg = |i| PosUsize(false, i);
123    assert_eq!(pos(0).wrap(len), Some(0));
124    assert_eq!(pos(8).wrap(len), Some(8));
125    assert_eq!(neg(1).wrap(len), Some(3));
126    assert_eq!(neg(4).wrap(len), Some(0));
127    assert_eq!(neg(8).wrap(len), None);
128}
129
130fn int_or_big<const N: usize>(
131    i: Option<isize>,
132    x: [isize; N],
133    f: fn([BigInt; N]) -> BigInt,
134) -> Num {
135    i.map_or_else(|| Num::big_int(f(x.map(BigInt::from))), Num::Int)
136}
137
138// Do not use `BigInt::parse_bytes`, because it accepts arbitrary underscores between digits.
139// https://github.com/rust-num/num-bigint/issues/340
140fn bigint_from_str_radix(s: &str, radix: u32) -> Option<BigInt> {
141    use num_bigint::Sign::{Minus, Plus};
142    let f = |c, sign| s.strip_prefix(c).map(|s| (sign, s));
143    let (sign, num) = f('-', Minus).or_else(|| f('+', Plus)).unwrap_or((Plus, s));
144    biguint_from_str_radix(num, radix).map(|bu| BigInt::from_biguint(sign, bu))
145}
146
147fn biguint_from_str_radix(s: &str, radix: u32) -> Option<BigUint> {
148    assert!((2..=36).contains(&radix));
149    if s.is_empty() {
150        return None;
151    }
152
153    // normalize all characters to plain digit values
154    let digits = s.bytes().map(|b| {
155        Some(match b {
156            b'0'..=b'9' => b - b'0',
157            b'a'..=b'z' => b - b'a' + 10,
158            b'A'..=b'Z' => b - b'A' + 10,
159            _ => return None,
160        })
161        .filter(|d| *d < radix as u8)
162    });
163    BigUint::from_radix_be(&digits.collect::<Option<Vec<_>>>()?, radix)
164}
165
166impl core::ops::Add for Num {
167    type Output = Num;
168    fn add(self, rhs: Self) -> Self::Output {
169        use num_bigint::BigInt;
170        use Num::*;
171        match (self, rhs) {
172            (Int(x), Int(y)) => int_or_big(x.checked_add(y), [x, y], |[x, y]| x + y),
173            (Int(i), BigInt(b)) | (BigInt(b), Int(i)) => Self::big_int(&BigInt::from(i) + &*b),
174            (Int(i), Float(f)) | (Float(f), Int(i)) => Float(f + i as f64),
175            (BigInt(x), BigInt(y)) => Self::big_int(&*x + &*y),
176            (BigInt(i), Float(f)) | (Float(f), BigInt(i)) => Float(f + i.to_f64().unwrap()),
177            (Float(x), Float(y)) => Float(x + y),
178            (Dec(n), r) => Self::from_dec_str(&n) + r,
179            (l, Dec(n)) => l + Self::from_dec_str(&n),
180        }
181    }
182}
183
184impl core::ops::Sub for Num {
185    type Output = Self;
186    fn sub(self, rhs: Self) -> Self::Output {
187        use num_bigint::BigInt;
188        use Num::*;
189        match (self, rhs) {
190            (Int(x), Int(y)) => int_or_big(x.checked_sub(y), [x, y], |[x, y]| x - y),
191            (Int(i), BigInt(b)) => Self::big_int(&BigInt::from(i) - &*b),
192            (BigInt(b), Int(i)) => Self::big_int(&*b - &BigInt::from(i)),
193            (BigInt(x), BigInt(y)) => Self::big_int(&*x - &*y),
194            (Float(f), Int(i)) => Float(f - i as f64),
195            (Int(i), Float(f)) => Float(i as f64 - f),
196            (Float(f), BigInt(i)) => Float(f - i.to_f64().unwrap()),
197            (BigInt(i), Float(f)) => Float(i.to_f64().unwrap() - f),
198            (Float(x), Float(y)) => Float(x - y),
199            (Dec(n), r) => Self::from_dec_str(&n) - r,
200            (l, Dec(n)) => l - Self::from_dec_str(&n),
201        }
202    }
203}
204
205impl core::ops::Mul for Num {
206    type Output = Self;
207    fn mul(self, rhs: Self) -> Self::Output {
208        use num_bigint::BigInt;
209        use Num::*;
210        match (self, rhs) {
211            (Int(x), Int(y)) => int_or_big(x.checked_mul(y), [x, y], |[x, y]| x * y),
212
213            (Int(i), BigInt(b)) | (BigInt(b), Int(i)) => Self::big_int(&BigInt::from(i) * &*b),
214            (BigInt(x), BigInt(y)) => Self::big_int(&*x * &*y),
215            (BigInt(i), Float(f)) | (Float(f), BigInt(i)) => Float(f * i.to_f64().unwrap()),
216            (Float(f), Int(i)) | (Int(i), Float(f)) => Float(f * i as f64),
217            (Float(x), Float(y)) => Float(x * y),
218            (Dec(n), r) => Self::from_dec_str(&n) * r,
219            (l, Dec(n)) => l * Self::from_dec_str(&n),
220        }
221    }
222}
223
224impl core::ops::Div for Num {
225    type Output = Self;
226    fn div(self, rhs: Self) -> Self::Output {
227        use Num::{BigInt, Dec, Float, Int};
228        match (self, rhs) {
229            (Int(l), r) => Float(l as f64) / r,
230            (l, Int(r)) => l / Float(r as f64),
231            (BigInt(l), r) => Float(l.to_f64().unwrap()) / r,
232            (l, BigInt(r)) => l / Float(r.to_f64().unwrap()),
233            (Float(x), Float(y)) => Float(x / y),
234            (Dec(n), r) => Self::from_dec_str(&n) / r,
235            (l, Dec(n)) => l / Self::from_dec_str(&n),
236        }
237    }
238}
239
240impl core::ops::Rem for Num {
241    type Output = Self;
242    fn rem(self, rhs: Self) -> Self::Output {
243        use num_bigint::BigInt;
244        use Num::*;
245        match (self, rhs) {
246            // `x.checked_rem(y)` is `None` only for:
247            //
248            // - `isize::MIN % -1`: The remainder is always 0.
249            // - `x % 0`: This is guarded by `Val`.
250            (Int(x), Int(y)) => Int(x.checked_rem(y).unwrap_or(0)),
251            (BigInt(x), BigInt(y)) => Num::big_int(&*x % &*y),
252            (Int(i), BigInt(b)) => Num::big_int(&BigInt::from(i) % &*b),
253            (BigInt(b), Int(i)) => Num::big_int(&*b % &BigInt::from(i)),
254            (Int(i), Float(f)) => Float(i as f64 % f),
255            (Float(f), Int(i)) => Float(f % i as f64),
256            (BigInt(i), Float(f)) => Float(i.to_f64().unwrap() % f),
257            (Float(f), BigInt(i)) => Float(f % i.to_f64().unwrap()),
258            (Float(x), Float(y)) => Float(x % y),
259            (Dec(n), r) => Self::from_dec_str(&n) % r,
260            (l, Dec(n)) => l % Self::from_dec_str(&n),
261        }
262    }
263}
264
265impl core::ops::Neg for Num {
266    type Output = Self;
267    fn neg(self) -> Self::Output {
268        match self {
269            Self::Int(x) => int_or_big(x.checked_neg(), [x], |[x]| -x),
270            Self::BigInt(x) => Self::big_int(-&*x),
271            Self::Float(x) => Self::Float(-x),
272            Self::Dec(n) => match n.strip_prefix('-') {
273                Some(pos) => Self::Dec(pos.to_string().into()),
274                None => Self::Dec(alloc::format!("-{n}").into()),
275            },
276        }
277    }
278}
279
280impl Hash for Num {
281    fn hash<H: Hasher>(&self, state: &mut H) {
282        match self {
283            // hash machine-sized integers like floating-point numbers,
284            // because they are also compared for equality that way
285            Self::Int(i) => Self::Float(*i as f64).hash(state),
286            // hash all non-finite floats, like NaN and infinity, to the same
287            // note that `Val::hash` assumes that `Num::hash` always starts
288            // with `state.write_u8(n)`, where `n < 2`
289            Self::Float(f) => {
290                state.write_u8(0);
291                if f.is_finite() {
292                    f.to_ne_bytes().hash(state);
293                }
294            }
295            Self::Dec(d) => Self::from_dec_str(d).hash(state),
296            Self::BigInt(i) => {
297                let f = i.to_f64().unwrap();
298                if f.is_finite() {
299                    Self::Float(f).hash(state)
300                } else {
301                    state.write_u8(1);
302                    i.hash(state)
303                }
304            }
305        }
306    }
307}
308
309#[test]
310fn hash_nums() {
311    use core::f64::consts::PI;
312    use core::hash::BuildHasher;
313    let h = |n| foldhash::fast::FixedState::with_seed(42).hash_one(n);
314
315    assert_eq!(h(Num::Int(4096)), h(Num::big_int(4096.into())));
316    assert_eq!(h(Num::Float(0.0)), h(Num::Int(0)));
317    assert_eq!(h(Num::Float(3.0)), h(Num::big_int(3.into())));
318
319    assert_ne!(h(Num::Float(0.2)), h(Num::Float(0.4)));
320    assert_ne!(h(Num::Float(PI)), h(Num::big_int(3.into())));
321    assert_ne!(h(Num::Float(0.2)), h(Num::Int(1)));
322    assert_ne!(h(Num::Int(1)), h(Num::Int(2)));
323}
324
325impl PartialEq for Num {
326    fn eq(&self, other: &Self) -> bool {
327        match (self, other) {
328            (Self::Int(x), Self::Int(y)) => x == y,
329            (Self::Int(i), Self::Float(f)) | (Self::Float(f), Self::Int(i)) => {
330                f.is_finite() && float_eq(*i as f64, *f)
331            }
332            (Self::BigInt(x), Self::BigInt(y)) => x == y,
333            (Self::Int(i), Self::BigInt(b)) | (Self::BigInt(b), Self::Int(i)) => {
334                **b == BigInt::from(*i)
335            }
336            (Self::BigInt(i), Self::Float(f)) | (Self::Float(f), Self::BigInt(i)) => {
337                f.is_finite() && float_eq(i.to_f64().unwrap(), *f)
338            }
339            (Self::Float(x), Self::Float(y)) => float_eq(*x, *y),
340            (Self::Dec(x), Self::Dec(y)) if Rc::ptr_eq(x, y) => true,
341            (Self::Dec(n), y) => &Self::from_dec_str(n) == y,
342            (x, Self::Dec(n)) => x == &Self::from_dec_str(n),
343        }
344    }
345}
346
347impl Eq for Num {}
348
349impl PartialOrd for Num {
350    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
351        Some(self.cmp(other))
352    }
353}
354
355impl Ord for Num {
356    fn cmp(&self, other: &Self) -> Ordering {
357        match (self, other) {
358            (Self::Int(x), Self::Int(y)) => x.cmp(y),
359            (Self::Int(x), Self::BigInt(y)) => BigInt::from(*x).cmp(y),
360            (Self::Int(i), Self::Float(f)) => float_cmp(*i as f64, *f),
361            (Self::BigInt(x), Self::Int(y)) => (**x).cmp(&BigInt::from(*y)),
362            (Self::BigInt(x), Self::BigInt(y)) => x.cmp(y),
363            // BigInt::to_f64 always yields Some, large values become f64::INFINITY
364            (Self::BigInt(x), Self::Float(y)) => float_cmp(x.to_f64().unwrap(), *y),
365            (Self::Float(f), Self::Int(i)) => float_cmp(*f, *i as f64),
366            (Self::Float(x), Self::BigInt(y)) => float_cmp(*x, y.to_f64().unwrap()),
367            (Self::Float(x), Self::Float(y)) => float_cmp(*x, *y),
368            (Self::Dec(x), Self::Dec(y)) if Rc::ptr_eq(x, y) => Ordering::Equal,
369            (Self::Dec(n), y) => Self::from_dec_str(n).cmp(y),
370            (x, Self::Dec(n)) => x.cmp(&Self::from_dec_str(n)),
371        }
372    }
373}
374
375fn float_eq(left: f64, right: f64) -> bool {
376    float_cmp(left, right) == Ordering::Equal
377}
378
379fn float_cmp(left: f64, right: f64) -> Ordering {
380    if left == 0. && right == 0. {
381        // consider negative and positive 0 as equal
382        Ordering::Equal
383    } else if left.is_nan() {
384        // there are more than 50 shades of NaN, and which of these
385        // you strike when you perform a calculation is not deterministic (!),
386        // therefore `total_cmp` may yield different results for the same calculation
387        // so we bite the bullet and handle this like in jq
388        Ordering::Less
389    } else if right.is_nan() {
390        Ordering::Greater
391    } else {
392        f64::total_cmp(&left, &right)
393    }
394}
395
396impl fmt::Display for Num {
397    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
398        match self {
399            Self::Int(i) => write!(f, "{i}"),
400            Self::BigInt(i) => write!(f, "{i}"),
401            Self::Float(x) if x.is_nan() => write!(f, "NaN"),
402            Self::Float(f64::INFINITY) => write!(f, "Infinity"),
403            Self::Float(f64::NEG_INFINITY) => write!(f, "-Infinity"),
404            Self::Float(x) => ryu::Buffer::new().format_finite(*x).fmt(f),
405            Self::Dec(n) => write!(f, "{n}"),
406        }
407    }
408}