ftl_jiter/
number_decoder.rs

1#[cfg(feature = "num-bigint")]
2use num_bigint::BigInt;
3#[cfg(feature = "num-bigint")]
4use num_traits::cast::ToPrimitive;
5
6use std::ops::Range;
7
8use lexical_parse_float::{format as lexical_format, FromLexicalWithOptions, Options as ParseFloatOptions};
9
10use crate::errors::{json_err, json_error, JsonError, JsonResult};
11
12pub trait AbstractNumberDecoder {
13    type Output;
14
15    fn decode(data: &[u8], index: usize, first: u8, allow_inf_nan: bool) -> JsonResult<(Self::Output, usize)>;
16}
17
18/// A number that can be either an [i64] or a [BigInt](num_bigint::BigInt)
19#[derive(Debug, Clone, PartialEq)]
20pub enum NumberInt {
21    Int(i64),
22    #[cfg(feature = "num-bigint")]
23    BigInt(BigInt),
24}
25
26impl From<NumberInt> for f64 {
27    fn from(num: NumberInt) -> Self {
28        match num {
29            NumberInt::Int(int) => int as f64,
30            #[cfg(feature = "num-bigint")]
31            NumberInt::BigInt(big_int) => big_int.to_f64().unwrap_or(f64::NAN),
32        }
33    }
34}
35
36impl TryFrom<&[u8]> for NumberInt {
37    type Error = JsonError;
38
39    fn try_from(value: &[u8]) -> Result<Self, Self::Error> {
40        let first = *value.first().ok_or_else(|| json_error!(InvalidNumber, 0))?;
41        let (int_parse, index) = IntParse::parse(value, 0, first)?;
42        match int_parse {
43            IntParse::Int(int) => {
44                if index == value.len() {
45                    Ok(int)
46                } else {
47                    json_err!(InvalidNumber, index)
48                }
49            }
50            _ => json_err!(InvalidNumber, index),
51        }
52    }
53}
54
55impl AbstractNumberDecoder for NumberInt {
56    type Output = NumberInt;
57
58    fn decode(data: &[u8], index: usize, first: u8, _allow_inf_nan: bool) -> JsonResult<(Self::Output, usize)> {
59        let (int_parse, index) = IntParse::parse(data, index, first)?;
60        match int_parse {
61            IntParse::Int(int) => Ok((int, index)),
62            _ => json_err!(FloatExpectingInt, index),
63        }
64    }
65}
66
67pub struct NumberFloat;
68
69impl AbstractNumberDecoder for NumberFloat {
70    type Output = f64;
71
72    fn decode(data: &[u8], mut index: usize, first: u8, allow_inf_nan: bool) -> JsonResult<(Self::Output, usize)> {
73        let start = index;
74
75        let positive = match first {
76            b'N' => return consume_nan(data, index, allow_inf_nan),
77            b'-' => false,
78            _ => true,
79        };
80        if !positive {
81            // we started with a minus sign, so the first digit is at index + 1
82            index += 1;
83        };
84        let first2 = if positive { Some(&first) } else { data.get(index) };
85
86        if let Some(digit) = first2 {
87            if INT_CHAR_MAP[*digit as usize] {
88                const JSON: u128 = lexical_format::JSON;
89                let options = ParseFloatOptions::new();
90                match f64::from_lexical_partial_with_options::<JSON>(&data[start..], &options) {
91                    Ok((float, index)) => Ok((float, index + start)),
92                    Err(_) => {
93                        // it's impossible to work out the right error from LexicalError here, so we parse again
94                        // with NumberRange and use that error
95                        match NumberRange::decode(data, start, first, allow_inf_nan) {
96                            Err(e) => Err(e),
97                            // NumberRange should always raise an error if `parse_partial_with_options`
98                            // except for Infinity and -Infinity, which are handled above
99                            Ok(_) => unreachable!("NumberRange should always return an error"),
100                        }
101                    }
102                }
103            } else if digit == &b'I' {
104                consume_inf_f64(data, index, positive, allow_inf_nan)
105            } else {
106                json_err!(InvalidNumber, index)
107            }
108        } else {
109            json_err!(EofWhileParsingValue, index)
110        }
111    }
112}
113
114/// A number that can be either a [NumberInt] or an [f64]
115#[derive(Debug, Clone, PartialEq)]
116pub enum NumberAny {
117    Int(NumberInt),
118    Float(f64),
119}
120
121#[cfg(feature = "python")]
122impl pyo3::ToPyObject for NumberAny {
123    fn to_object(&self, py: pyo3::Python<'_>) -> pyo3::PyObject {
124        match self {
125            Self::Int(NumberInt::Int(int)) => int.to_object(py),
126            #[cfg(feature = "num-bigint")]
127            Self::Int(NumberInt::BigInt(big_int)) => big_int.to_object(py),
128            Self::Float(float) => float.to_object(py),
129        }
130    }
131}
132
133impl From<NumberAny> for f64 {
134    fn from(num: NumberAny) -> Self {
135        match num {
136            NumberAny::Int(int) => int.into(),
137            NumberAny::Float(f) => f,
138        }
139    }
140}
141
142impl AbstractNumberDecoder for NumberAny {
143    type Output = NumberAny;
144
145    fn decode(data: &[u8], index: usize, first: u8, allow_inf_nan: bool) -> JsonResult<(Self::Output, usize)> {
146        let start = index;
147        let (int_parse, index) = IntParse::parse(data, index, first)?;
148        match int_parse {
149            IntParse::Int(int) => Ok((Self::Int(int), index)),
150            IntParse::Float => {
151                NumberFloat::decode(data, start, first, allow_inf_nan).map(|(f, index)| (Self::Float(f), index))
152            }
153            IntParse::FloatInf(positive) => {
154                consume_inf_f64(data, index, positive, allow_inf_nan).map(|(f, index)| (Self::Float(f), index))
155            }
156            IntParse::FloatNaN => consume_nan(data, index, allow_inf_nan).map(|(f, index)| (Self::Float(f), index)),
157        }
158    }
159}
160
161fn consume_inf(data: &[u8], index: usize, positive: bool, allow_inf_nan: bool) -> JsonResult<usize> {
162    if allow_inf_nan {
163        crate::parse::consume_infinity(data, index)
164    } else if positive {
165        json_err!(ExpectedSomeValue, index)
166    } else {
167        json_err!(InvalidNumber, index)
168    }
169}
170
171fn consume_inf_f64(data: &[u8], index: usize, positive: bool, allow_inf_nan: bool) -> JsonResult<(f64, usize)> {
172    let end = consume_inf(data, index, positive, allow_inf_nan)?;
173    if positive {
174        Ok((f64::INFINITY, end))
175    } else {
176        Ok((f64::NEG_INFINITY, end))
177    }
178}
179
180fn consume_nan(data: &[u8], index: usize, allow_inf_nan: bool) -> JsonResult<(f64, usize)> {
181    if allow_inf_nan {
182        let end = crate::parse::consume_nan(data, index)?;
183        Ok((f64::NAN, end))
184    } else {
185        json_err!(ExpectedSomeValue, index)
186    }
187}
188
189#[derive(Debug)]
190pub(crate) enum IntParse {
191    Int(NumberInt),
192    Float,
193    FloatInf(bool),
194    FloatNaN,
195}
196
197impl IntParse {
198    pub(crate) fn parse(data: &[u8], mut index: usize, first: u8) -> JsonResult<(Self, usize)> {
199        let start = index;
200        let positive = match first {
201            b'N' => return Ok((Self::FloatNaN, index)),
202            b'-' => false,
203            _ => true,
204        };
205        if !positive {
206            // we started with a minus sign, so the first digit is at index + 1
207            index += 1;
208        };
209        let first2 = if positive { Some(&first) } else { data.get(index) };
210        let first_value = match first2 {
211            Some(b'0') => {
212                index += 1;
213                return match data.get(index) {
214                    Some(b'.') => Ok((Self::Float, index)),
215                    Some(b'e' | b'E') => Ok((Self::Float, index)),
216                    Some(digit) if digit.is_ascii_digit() => json_err!(InvalidNumber, index),
217                    _ => Ok((Self::Int(NumberInt::Int(0)), index)),
218                };
219            }
220            Some(b'I') => return Ok((Self::FloatInf(positive), index)),
221            Some(digit) if (b'1'..=b'9').contains(digit) => (digit & 0x0f) as u64,
222            Some(_) => return json_err!(InvalidNumber, index),
223            None => return json_err!(EofWhileParsingValue, index),
224        };
225
226        index += 1;
227        let (chunk, new_index) = IntChunk::parse_small(data, index, first_value);
228
229        let ongoing: u64 = match chunk {
230            IntChunk::Ongoing(value) => value,
231            IntChunk::Done(value) => {
232                let mut value_i64 = value as i64;
233                if !positive {
234                    value_i64 = -value_i64;
235                }
236                return Ok((Self::Int(NumberInt::Int(value_i64)), new_index));
237            }
238            IntChunk::Float => return Ok((Self::Float, new_index)),
239        };
240
241        // number is too big for i64, we need to use a BigInt,
242        // or error out if num-bigint is not enabled
243
244        #[cfg(not(feature = "num-bigint"))]
245        {
246            // silence unused variable warning
247            let _ = (ongoing, start);
248            return json_err!(NumberOutOfRange, index);
249        }
250
251        #[cfg(feature = "num-bigint")]
252        {
253            #[cfg(target_arch = "aarch64")]
254            // in aarch64 we use a 128 bit registers - 16 bytes
255            const ONGOING_CHUNK_MULTIPLIER: u64 = 10u64.pow(16);
256            #[cfg(not(target_arch = "aarch64"))]
257            // decode_int_chunk_fallback - we parse 18 bytes when the number is ongoing
258            const ONGOING_CHUNK_MULTIPLIER: u64 = 10u64.pow(18);
259
260            const POW_10: [u64; 18] = [
261                10u64.pow(0),
262                10u64.pow(1),
263                10u64.pow(2),
264                10u64.pow(3),
265                10u64.pow(4),
266                10u64.pow(5),
267                10u64.pow(6),
268                10u64.pow(7),
269                10u64.pow(8),
270                10u64.pow(9),
271                10u64.pow(10),
272                10u64.pow(11),
273                10u64.pow(12),
274                10u64.pow(13),
275                10u64.pow(14),
276                10u64.pow(15),
277                10u64.pow(16),
278                10u64.pow(17),
279            ];
280
281            let mut big_value: BigInt = ongoing.into();
282            index = new_index;
283
284            loop {
285                let (chunk, new_index) = IntChunk::parse_big(data, index);
286                if (new_index - start) > 4300 {
287                    return json_err!(NumberOutOfRange, start + 4301);
288                }
289                match chunk {
290                    IntChunk::Ongoing(value) => {
291                        big_value *= ONGOING_CHUNK_MULTIPLIER;
292                        big_value += value;
293                        index = new_index;
294                    }
295                    IntChunk::Done(value) => {
296                        big_value *= POW_10[new_index - index];
297                        big_value += value;
298                        if !positive {
299                            big_value = -big_value;
300                        }
301                        return Ok((Self::Int(NumberInt::BigInt(big_value)), new_index));
302                    }
303                    IntChunk::Float => return Ok((Self::Float, new_index)),
304                }
305            }
306        }
307    }
308}
309
310pub(crate) enum IntChunk {
311    Ongoing(u64),
312    Done(u64),
313    Float,
314}
315
316impl IntChunk {
317    #[inline(always)]
318    fn parse_small(data: &[u8], index: usize, value: u64) -> (Self, usize) {
319        decode_int_chunk_fallback(data, index, value)
320    }
321
322    #[inline(always)]
323    fn parse_big(data: &[u8], index: usize) -> (Self, usize) {
324        // TODO x86_64: use simd
325
326        #[cfg(target_arch = "aarch64")]
327        {
328            crate::simd_aarch64::decode_int_chunk(data, index)
329        }
330        #[cfg(not(target_arch = "aarch64"))]
331        {
332            decode_int_chunk_fallback(data, index, 0)
333        }
334    }
335}
336
337/// Turns out this is faster than fancy bit manipulation, see
338/// https://github.com/Alexhuszagh/rust-lexical/blob/main/lexical-parse-integer/docs/Algorithm.md
339/// for some context
340#[inline(always)]
341pub(crate) fn decode_int_chunk_fallback(data: &[u8], mut index: usize, mut value: u64) -> (IntChunk, usize) {
342    // i64::MAX = 9223372036854775807 (19 chars) - so 18 chars is always valid as an i64
343    for _ in 0..18 {
344        if let Some(digit) = data.get(index) {
345            if INT_CHAR_MAP[*digit as usize] {
346                // we use wrapping add to avoid branching - we know the value cannot wrap
347                value = value.wrapping_mul(10).wrapping_add((digit & 0x0f) as u64);
348                index += 1;
349                continue;
350            } else if matches!(digit, b'.' | b'e' | b'E') {
351                return (IntChunk::Float, index);
352            }
353        }
354        return (IntChunk::Done(value), index);
355    }
356    (IntChunk::Ongoing(value), index)
357}
358
359pub(crate) static INT_CHAR_MAP: [bool; 256] = {
360    const NU: bool = true;
361    const __: bool = false;
362    [
363        //   1   2   3   4   5   6   7   8   9   A   B   C   D   E   F
364        __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 0
365        __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 1
366        __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 2
367        NU, NU, NU, NU, NU, NU, NU, NU, NU, NU, __, __, __, __, __, __, // 3
368        __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 4
369        __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 5
370        __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 6
371        __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 7
372        __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 8
373        __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 9
374        __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // A
375        __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // B
376        __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // C
377        __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // D
378        __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // E
379        __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // F
380    ]
381};
382
383pub struct NumberRange {
384    pub range: Range<usize>,
385    // in some cfg configurations, this field is never read.
386    #[allow(dead_code)]
387    pub is_int: bool,
388}
389
390impl NumberRange {
391    fn int(data: Range<usize>) -> Self {
392        Self {
393            range: data,
394            is_int: true,
395        }
396    }
397
398    fn float(data: Range<usize>) -> Self {
399        Self {
400            range: data,
401            is_int: false,
402        }
403    }
404}
405
406impl AbstractNumberDecoder for NumberRange {
407    type Output = Self;
408
409    fn decode(data: &[u8], mut index: usize, first: u8, allow_inf_nan: bool) -> JsonResult<(Self::Output, usize)> {
410        let start = index;
411
412        let positive = match first {
413            b'N' => {
414                let (_, end) = consume_nan(data, index, allow_inf_nan)?;
415                return Ok((Self::float(start..end), end));
416            }
417            b'-' => false,
418            _ => true,
419        };
420        if !positive {
421            // we started with a minus sign, so the first digit is at index + 1
422            index += 1;
423        };
424
425        match data.get(index) {
426            Some(b'0') => {
427                // numbers start with zero must be floats, next char must be a dot
428                index += 1;
429                return match data.get(index) {
430                    Some(b'.') => {
431                        index += 1;
432                        let end = consume_decimal(data, index)?;
433                        Ok((Self::float(start..end), end))
434                    }
435                    Some(b'e' | b'E') => {
436                        index += 1;
437                        let end = consume_exponential(data, index)?;
438                        Ok((Self::float(start..end), end))
439                    }
440                    Some(digit) if digit.is_ascii_digit() => json_err!(InvalidNumber, index),
441                    _ => return Ok((Self::int(start..index), index)),
442                };
443            }
444            Some(b'I') => {
445                let end = consume_inf(data, index, positive, allow_inf_nan)?;
446                return Ok((Self::float(start..end), end));
447            }
448            Some(digit) if (b'1'..=b'9').contains(digit) => (),
449            Some(_) => return json_err!(InvalidNumber, index),
450            None => return json_err!(EofWhileParsingValue, index),
451        };
452
453        index += 1;
454        for _ in 0..18 {
455            if let Some(digit) = data.get(index) {
456                if INT_CHAR_MAP[*digit as usize] {
457                    index += 1;
458                    continue;
459                } else if matches!(digit, b'.') {
460                    index += 1;
461                    let end = consume_decimal(data, index)?;
462                    return Ok((Self::float(start..end), end));
463                } else if matches!(digit, b'e' | b'E') {
464                    index += 1;
465                    let end = consume_exponential(data, index)?;
466                    return Ok((Self::float(start..end), end));
467                }
468            }
469            return Ok((Self::int(start..index), index));
470        }
471        loop {
472            let (chunk, new_index) = IntChunk::parse_big(data, index);
473            if (new_index - start) > 4300 {
474                return json_err!(NumberOutOfRange, start + 4301);
475            }
476            #[allow(clippy::single_match_else)]
477            match chunk {
478                IntChunk::Ongoing(_) => {
479                    index = new_index;
480                }
481                IntChunk::Done(_) => return Ok((Self::int(start..new_index), new_index)),
482                IntChunk::Float => {
483                    return match data.get(new_index) {
484                        Some(b'.') => {
485                            index = new_index + 1;
486                            let end = consume_decimal(data, index)?;
487                            Ok((Self::float(start..end), end))
488                        }
489                        _ => {
490                            index = new_index + 1;
491                            let end = consume_exponential(data, index)?;
492                            Ok((Self::float(start..end), end))
493                        }
494                    }
495                }
496            }
497        }
498    }
499}
500
501fn consume_exponential(data: &[u8], mut index: usize) -> JsonResult<usize> {
502    match data.get(index) {
503        Some(b'-' | b'+') => {
504            index += 1;
505        }
506        Some(v) if v.is_ascii_digit() => (),
507        Some(_) => return json_err!(InvalidNumber, index),
508        None => return json_err!(EofWhileParsingValue, index),
509    };
510
511    match data.get(index) {
512        Some(v) if v.is_ascii_digit() => (),
513        Some(_) => return json_err!(InvalidNumber, index),
514        None => return json_err!(EofWhileParsingValue, index),
515    };
516    index += 1;
517
518    while let Some(next) = data.get(index) {
519        match next {
520            b'0'..=b'9' => (),
521            _ => break,
522        }
523        index += 1;
524    }
525
526    Ok(index)
527}
528
529fn consume_decimal(data: &[u8], mut index: usize) -> JsonResult<usize> {
530    match data.get(index) {
531        Some(v) if v.is_ascii_digit() => (),
532        Some(_) => return json_err!(InvalidNumber, index),
533        None => return json_err!(EofWhileParsingValue, index),
534    };
535    index += 1;
536
537    while let Some(next) = data.get(index) {
538        match next {
539            b'0'..=b'9' => (),
540            b'e' | b'E' => {
541                index += 1;
542                return consume_exponential(data, index);
543            }
544            _ => break,
545        }
546        index += 1;
547    }
548
549    Ok(index)
550}