lexical_parse_integer/algorithm.rs
1//! Radix-generic, optimized, string-to-integer conversion routines.
2//!
3//! These routines are highly optimized: they use various optimizations
4//! to read multiple digits at-a-time with less multiplication instructions,
5//! as well as other optimizations to avoid unnecessary compile-time branching.
6//!
7//! See [Algorithm](/docs/Algorithm.md) for a more detailed description of
8//! the algorithm choice here. See [Benchmarks.md](/docs/Benchmarks) for
9//! recent benchmark data.
10//!
11//! These allow implementations of partial and complete parsers
12//! using a single code-path via macros.
13//!
14//! This looks relatively, complex, but it's quite simple. Almost all
15//! of these branches are resolved at compile-time, so the resulting
16//! code is quite small while handling all of the internal complexity.
17//!
18//! 1. Helpers to process ok and error results for both complete and partial
19//! parsers. They have different APIs, and mixing the APIs leads to
20//! substantial performance hits.
21//! 2. Overflow checking on invalid digits for partial parsers, while just
22//! returning invalid digits for complete parsers.
23//! 3. A format-aware sign parser.
24//! 4. Digit parsing algorithms which explicitly wrap on overflow, for no
25//! additional overhead. This has major performance wins for **most**
26//! real-world integers, so most valid input will be substantially faster.
27//! 5. An algorithm to detect if overflow occurred. This is comprehensive, and
28//! short-circuits for common cases.
29//! 6. A parsing algorithm for unsigned integers, always producing positive
30//! values. This avoids any unnecessary branching.
31//! 7. Multi-digit optimizations for larger sizes.
32
33#![doc(hidden)]
34
35use lexical_util::digit::char_to_digit_const;
36use lexical_util::error::Error;
37use lexical_util::format::NumberFormat;
38use lexical_util::iterator::{AsBytes, Bytes, DigitsIter, Iter};
39use lexical_util::num::{as_cast, Integer};
40use lexical_util::result::Result;
41
42use crate::Options;
43
44// HELPERS
45
46/// Check if we should do multi-digit optimizations
47const fn can_try_parse_multidigits<'a, Iter: DigitsIter<'a>, const FORMAT: u128>(_: &Iter) -> bool {
48 let format = NumberFormat::<FORMAT> {};
49 Iter::IS_CONTIGUOUS && (cfg!(not(feature = "power-of-two")) || format.mantissa_radix() <= 10)
50}
51
52// Get if digits are required for the format.
53#[cfg_attr(not(feature = "format"), allow(unused_macros))]
54macro_rules! required_digits {
55 () => {
56 NumberFormat::<FORMAT>::REQUIRED_INTEGER_DIGITS
57 || NumberFormat::<FORMAT>::REQUIRED_MANTISSA_DIGITS
58 };
59}
60
61/// Return an value for a complete parser.
62macro_rules! into_ok_complete {
63 ($value:expr, $index:expr, $count:expr) => {{
64 #[cfg(not(feature = "format"))]
65 return Ok(as_cast($value));
66
67 #[cfg(feature = "format")]
68 if required_digits!() && $count == 0 {
69 into_error!(Empty, $index);
70 } else {
71 return Ok(as_cast($value));
72 }
73 }};
74}
75
76/// Return an value and index for a partial parser.
77macro_rules! into_ok_partial {
78 ($value:expr, $index:expr, $count:expr) => {{
79 #[cfg(not(feature = "format"))]
80 return Ok((as_cast($value), $index));
81
82 #[cfg(feature = "format")]
83 if required_digits!() && $count == 0 {
84 into_error!(Empty, $index);
85 } else {
86 return Ok((as_cast($value), $index));
87 }
88 }};
89}
90
91/// Return an error for a complete parser upon an invalid digit.
92macro_rules! invalid_digit_complete {
93 ($value:expr, $index:expr, $count:expr) => {
94 // Don't do any overflow checking here: we don't need it.
95 into_error!(InvalidDigit, $index - 1)
96 };
97}
98
99/// Return a value for a partial parser upon an invalid digit.
100/// This checks for numeric overflow, and returns the appropriate error.
101macro_rules! invalid_digit_partial {
102 ($value:expr, $index:expr, $count:expr) => {
103 // NOTE: The value is already positive/negative
104 into_ok_partial!($value, $index - 1, $count)
105 };
106}
107
108/// Return an error, returning the index and the error.
109macro_rules! into_error {
110 ($code:ident, $index:expr) => {{
111 return Err(Error::$code($index));
112 }};
113}
114
115/// Handle an invalid digit if the format feature is enabled.
116///
117/// This is because we can have special, non-digit characters near
118/// the start or internally. If `$is_end` is set to false, there **MUST**
119/// be elements in the underlying slice after the current iterator.
120#[cfg(feature = "format")]
121macro_rules! fmt_invalid_digit {
122 (
123 $value:ident, $iter:ident, $c:expr, $start_index:ident, $invalid_digit:ident, $is_end:expr
124 ) => {{
125 // NOTE: If we have non-contiguous iterators, we could have a skip character
126 // here at the boundary. This does not affect safety but it does affect
127 // correctness.
128 debug_assert!($iter.is_contiguous() || $is_end);
129
130 let base_suffix = NumberFormat::<FORMAT>::BASE_SUFFIX;
131 let uncased_base_suffix = NumberFormat::<FORMAT>::CASE_SENSITIVE_BASE_SUFFIX;
132 // Need to check for a base suffix, if so, return a valid value.
133 // We can't have a base suffix at the first value (need at least
134 // 1 digit).
135 if base_suffix != 0 && $iter.cursor() - $start_index > 1 {
136 let is_suffix = if uncased_base_suffix {
137 $c == base_suffix
138 } else {
139 $c.eq_ignore_ascii_case(&base_suffix)
140 };
141 // NOTE: If we're using the `take_n` optimization where it can't
142 // be the end, then the iterator cannot be done. So, in that case,
143 // we need to end. `take_n` also can never be used for non-
144 // contiguous iterators.
145 if is_suffix && $is_end && $iter.is_buffer_empty() {
146 // Break out of the loop, we've finished parsing.
147 break;
148 } else if !$iter.is_buffer_empty() {
149 // Haven't finished parsing, so we're going to call
150 // `invalid_digit!`. Need to ensure we include the
151 // base suffix in that.
152
153 // SAFETY: safe since the iterator is not empty, as checked
154 // in `$iter.is_buffer_empty()`. Adding in the check hopefully
155 // will be elided since it's a known constant.
156 unsafe { $iter.step_unchecked() };
157 }
158 }
159 // Might have handled our base-prefix here.
160 $invalid_digit!($value, $iter.cursor(), $iter.current_count())
161 }};
162}
163
164/// Just return an invalid digit
165#[cfg(not(feature = "format"))]
166macro_rules! fmt_invalid_digit {
167 (
168 $value:ident, $iter:ident, $c:expr, $start_index:ident, $invalid_digit:ident, $is_end:expr
169 ) => {{
170 $invalid_digit!($value, $iter.cursor(), $iter.current_count());
171 }};
172}
173
174/// Parse the sign from the leading digits.
175///
176/// This routine does the following:
177///
178/// 1. Parses the sign digit.
179/// 2. Handles if positive signs before integers are not allowed.
180/// 3. Handles negative signs if the type is unsigned.
181/// 4. Handles if the sign is required, but missing.
182/// 5. Handles if the iterator is empty, before or after parsing the sign.
183/// 6. Handles if the iterator has invalid, leading zeros.
184///
185/// Returns if the value is negative, or any values detected when
186/// validating the input.
187#[doc(hidden)]
188#[macro_export]
189macro_rules! parse_sign {
190 (
191 $byte:ident,
192 $is_signed:expr,
193 $no_positive:expr,
194 $required:expr,
195 $invalid_positive:ident,
196 $missing:ident
197 ) => {
198 // NOTE: `read_if` optimizes poorly since we then match after
199 match $byte.integer_iter().first() {
200 Some(&b'+') if !$no_positive => {
201 // SAFETY: We have at least 1 item left since we peaked a value
202 unsafe { $byte.step_unchecked() };
203 Ok(false)
204 },
205 Some(&b'+') if $no_positive => Err(Error::$invalid_positive($byte.cursor())),
206 Some(&b'-') if $is_signed => {
207 // SAFETY: We have at least 1 item left since we peaked a value
208 unsafe { $byte.step_unchecked() };
209 Ok(true)
210 },
211 Some(_) if $required => Err(Error::$missing($byte.cursor())),
212 _ if $required => Err(Error::$missing($byte.cursor())),
213 _ => Ok(false),
214 }
215 };
216}
217
218/// Parse the sign from the leading digits.
219#[cfg_attr(not(feature = "compact"), inline(always))]
220pub fn parse_sign<T: Integer, const FORMAT: u128>(byte: &mut Bytes<'_, FORMAT>) -> Result<bool> {
221 let format = NumberFormat::<FORMAT> {};
222 parse_sign!(
223 byte,
224 T::IS_SIGNED,
225 format.no_positive_mantissa_sign(),
226 format.required_mantissa_sign(),
227 InvalidPositiveSign,
228 MissingSign
229 )
230}
231
232// FOUR DIGITS
233
234/// Determine if 4 bytes, read raw from bytes, are 4 digits for the radix.
235#[cfg_attr(not(feature = "compact"), inline(always))]
236pub fn is_4digits<const FORMAT: u128>(v: u32) -> bool {
237 let radix = NumberFormat::<{ FORMAT }>::MANTISSA_RADIX;
238 debug_assert!(radix <= 10);
239
240 // We want to have a wrapping add and sub such that only values from the
241 // range `[0x30, 0x39]` (or narrower for custom radixes) will not
242 // overflow into the high bit. This means that the value needs to overflow
243 // into into `0x80` if the digit is 1 above, or `0x46` for the value `0x39`.
244 // Likewise, we only valid for `[0x30, 0x38]` for radix 8, so we need
245 // `0x47`.
246 let add = 0x46 + 10 - radix;
247 let add = add + (add << 8) + (add << 16) + (add << 24);
248 // This aims to underflow if anything is below the min digit: if we have any
249 // values under `0x30`, then this underflows and wraps into the high bit.
250 let sub = 0x3030_3030;
251 let a = v.wrapping_add(add);
252 let b = v.wrapping_sub(sub);
253
254 (a | b) & 0x8080_8080 == 0
255}
256
257/// Parse 4 bytes read from bytes into 4 digits.
258#[cfg_attr(not(feature = "compact"), inline(always))]
259pub fn parse_4digits<const FORMAT: u128>(mut v: u32) -> u32 {
260 let radix = NumberFormat::<{ FORMAT }>::MANTISSA_RADIX;
261 debug_assert!(radix <= 10);
262
263 // Normalize our digits to the range `[0, 9]`.
264 v -= 0x3030_3030;
265 // Scale digits in `0 <= Nn <= 99`.
266 v = (v * radix) + (v >> 8);
267 // Scale digits in `0 <= Nnnn <= 9999`.
268 v = ((v & 0x0000007f) * radix * radix) + ((v >> 16) & 0x0000007f);
269
270 v
271}
272
273/// Use a fast-path optimization, where we attempt to parse 4 digits at a time.
274/// This reduces the number of multiplications necessary to 2, instead of 4.
275///
276/// This approach is described in full here:
277/// <https://johnnylee-sde.github.io/Fast-numeric-string-to-int/>
278#[cfg_attr(not(feature = "compact"), inline(always))]
279pub fn try_parse_4digits<'a, T, Iter, const FORMAT: u128>(iter: &mut Iter) -> Option<T>
280where
281 T: Integer,
282 Iter: DigitsIter<'a>,
283{
284 // Can't do fast optimizations with radixes larger than 10, since
285 // we no longer have a contiguous ASCII block. Likewise, cannot
286 // use non-contiguous iterators.
287 debug_assert!(NumberFormat::<{ FORMAT }>::MANTISSA_RADIX <= 10);
288 debug_assert!(Iter::IS_CONTIGUOUS);
289
290 // Read our digits, validate the input, and check from there.
291 let bytes = u32::from_le(iter.peek_u32()?);
292 if is_4digits::<FORMAT>(bytes) {
293 // SAFETY: safe since we have at least 4 bytes in the buffer.
294 unsafe { iter.step_by_unchecked(4) };
295 Some(T::as_cast(parse_4digits::<FORMAT>(bytes)))
296 } else {
297 None
298 }
299}
300
301// EIGHT DIGITS
302
303/// Determine if 8 bytes, read raw from bytes, are 8 digits for the radix.
304/// See `is_4digits` for the algorithm description.
305#[cfg_attr(not(feature = "compact"), inline(always))]
306pub fn is_8digits<const FORMAT: u128>(v: u64) -> bool {
307 let radix = NumberFormat::<{ FORMAT }>::MANTISSA_RADIX;
308 debug_assert!(radix <= 10);
309
310 let add = 0x46 + 10 - radix;
311 let add = add + (add << 8) + (add << 16) + (add << 24);
312 let add = (add as u64) | ((add as u64) << 32);
313 // This aims to underflow if anything is below the min digit: if we have any
314 // values under `0x30`, then this underflows and wraps into the high bit.
315 let sub = 0x3030_3030_3030_3030;
316 let a = v.wrapping_add(add);
317 let b = v.wrapping_sub(sub);
318
319 (a | b) & 0x8080_8080_8080_8080 == 0
320}
321
322/// Parse 8 bytes read from bytes into 8 digits.
323/// Credit for this goes to @aqrit, which further optimizes the
324/// optimization described by Johnny Lee above.
325#[cfg_attr(not(feature = "compact"), inline(always))]
326pub fn parse_8digits<const FORMAT: u128>(mut v: u64) -> u64 {
327 let radix = NumberFormat::<{ FORMAT }>::MANTISSA_RADIX as u64;
328 debug_assert!(radix <= 10);
329
330 // Create our masks. Assume the optimizer will do this at compile time.
331 // It seems like an optimizing compiler **will** do this, so we
332 // should be safe.
333 let radix2 = radix * radix;
334 let radix4 = radix2 * radix2;
335 let radix6 = radix2 * radix4;
336 let mask = 0x0000_00FF_0000_00FFu64;
337 let mul1 = radix2 + (radix6 << 32);
338 let mul2 = 1 + (radix4 << 32);
339
340 // Normalize our digits to the base.
341 v -= 0x3030_3030_3030_3030;
342 // Scale digits in `0 <= Nn <= 99`.
343 v = (v * radix) + (v >> 8);
344 let v1 = (v & mask).wrapping_mul(mul1);
345 let v2 = ((v >> 16) & mask).wrapping_mul(mul2);
346
347 ((v1.wrapping_add(v2) >> 32) as u32) as u64
348}
349
350/// Use a fast-path optimization, where we attempt to parse 8 digits at a time.
351/// This reduces the number of multiplications necessary to 3, instead of 8.
352#[cfg_attr(not(feature = "compact"), inline(always))]
353pub fn try_parse_8digits<'a, T, Iter, const FORMAT: u128>(iter: &mut Iter) -> Option<T>
354where
355 T: Integer,
356 Iter: DigitsIter<'a>,
357{
358 // Can't do fast optimizations with radixes larger than 10, since
359 // we no longer have a contiguous ASCII block. Likewise, cannot
360 // use non-contiguous iterators.
361 debug_assert!(NumberFormat::<{ FORMAT }>::MANTISSA_RADIX <= 10);
362 debug_assert!(Iter::IS_CONTIGUOUS);
363
364 // Read our digits, validate the input, and check from there.
365 let bytes = u64::from_le(iter.peek_u64()?);
366 if is_8digits::<FORMAT>(bytes) {
367 // SAFETY: safe since we have at least 8 bytes in the buffer.
368 unsafe { iter.step_by_unchecked(8) };
369 Some(T::as_cast(parse_8digits::<FORMAT>(bytes)))
370 } else {
371 None
372 }
373}
374
375// ONE DIGIT
376
377/// Run a loop where the integer cannot possibly overflow.
378///
379/// If the length of the str is short compared to the range of the type
380/// we are parsing into, then we can be certain that an overflow will not occur.
381/// This bound is when `radix.pow(digits.len()) - 1 <= T::MAX` but the condition
382/// above is a faster (conservative) approximation of this.
383///
384/// Consider radix 16 as it has the highest information density per digit and
385/// will thus overflow the earliest: `u8::MAX` is `ff` - any str of length 2 is
386/// guaranteed to not overflow. `i8::MAX` is `7f` - only a str of length 1 is
387/// guaranteed to not overflow.
388///
389/// This is based off of [core/num](core).
390///
391/// * `value` - The current parsed value.
392/// * `iter` - An iterator over all bytes in the input.
393/// * `add_op` - The unchecked add/sub op.
394/// * `start_index` - The offset where parsing started.
395/// * `invalid_digit` - Behavior when an invalid digit is found.
396/// * `is_end` - If iter corresponds to the full input.
397///
398/// core: <https://doc.rust-lang.org/1.81.0/src/core/num/mod.rs.html#1480>
399macro_rules! parse_1digit_unchecked {
400 (
401 $value:ident,
402 $iter:ident,
403 $add_op:ident,
404 $start_index:ident,
405 $invalid_digit:ident,
406 $is_end:expr
407 ) => {{
408 // This is a slower parsing algorithm, going 1 digit at a time, but doing it in
409 // an unchecked loop.
410 let radix = NumberFormat::<FORMAT>::MANTISSA_RADIX;
411 while let Some(&c) = $iter.next() {
412 let digit = match char_to_digit_const(c, radix) {
413 Some(v) => v,
414 None => fmt_invalid_digit!($value, $iter, c, $start_index, $invalid_digit, $is_end),
415 };
416 // multiply first since compilers are good at optimizing things out and will do
417 // a fused mul/add We must do this after getting the digit for
418 // partial parsers
419 $value = $value.wrapping_mul(as_cast(radix)).$add_op(as_cast(digit));
420 }
421 }};
422}
423
424/// Run a loop where the integer could overflow.
425///
426/// This is a standard, unoptimized algorithm. This is based off of
427/// [core/num](core)
428///
429/// * `value` - The current parsed value.
430/// * `iter` - An iterator over all bytes in the input.
431/// * `add_op` - The checked add/sub op.
432/// * `start_index` - The offset where parsing started.
433/// * `invalid_digit` - Behavior when an invalid digit is found.
434/// * `overflow` - If the error is overflow or underflow.
435///
436/// core: <https://doc.rust-lang.org/1.81.0/src/core/num/mod.rs.html#1505>
437macro_rules! parse_1digit_checked {
438 (
439 $value:ident,
440 $iter:ident,
441 $add_op:ident,
442 $start_index:ident,
443 $invalid_digit:ident,
444 $overflow:ident
445 ) => {{
446 // This is a slower parsing algorithm, going 1 digit at a time, but doing it in
447 // an unchecked loop.
448 let radix = NumberFormat::<FORMAT>::MANTISSA_RADIX;
449 while let Some(&c) = $iter.next() {
450 let digit = match char_to_digit_const(c, radix) {
451 Some(v) => v,
452 None => fmt_invalid_digit!($value, $iter, c, $start_index, $invalid_digit, true),
453 };
454 // multiply first since compilers are good at optimizing things out and will do
455 // a fused mul/add
456 $value =
457 match $value.checked_mul(as_cast(radix)).and_then(|x| x.$add_op(as_cast(digit))) {
458 Some(value) => value,
459 None => into_error!($overflow, $iter.cursor() - 1),
460 }
461 }
462 }};
463}
464
465// OVERALL DIGITS
466// --------------
467
468/// Run an unchecked loop where digits are processed incrementally.
469///
470/// If the type size is small or there's not many digits, skip multi-digit
471/// optimizations. Otherwise, if the type size is large and we're not manually
472/// skipping manual optimizations, then we do this here.
473///
474/// * `value` - The current parsed value.
475/// * `iter` - An iterator over all bytes in the input.
476/// * `add_op` - The unchecked add/sub op.
477/// * `start_index` - The offset where parsing started.
478/// * `invalid_digit` - Behavior when an invalid digit is found.
479/// * `no_multi_digit` - If to disable multi-digit optimizations.
480/// * `is_end` - If iter corresponds to the full input.
481macro_rules! parse_digits_unchecked {
482 (
483 $value:ident,
484 $iter:ident,
485 $add_op:ident,
486 $start_index:ident,
487 $invalid_digit:ident,
488 $no_multi_digit:expr,
489 $is_end:expr
490 ) => {{
491 let can_multi = can_try_parse_multidigits::<_, FORMAT>(&$iter);
492 let use_multi = can_multi && !$no_multi_digit;
493
494 // these cannot overflow. also, we use at most 3 for a 128-bit float and 1 for a
495 // 64-bit float NOTE: Miri will complain about this if we use radices >=
496 // 16 but since they won't go into `try_parse_8digits!` or
497 // `try_parse_4digits` it will be optimized out and the overflow won't
498 // matter.
499 let format = NumberFormat::<FORMAT> {};
500 if use_multi && T::BITS >= 64 && $iter.buffer_length() >= 8 {
501 // Try our fast, 8-digit at a time optimizations.
502 let radix8 = T::from_u32(format.radix8());
503 while let Some(value) = try_parse_8digits::<T, _, FORMAT>(&mut $iter) {
504 $value = $value.wrapping_mul(radix8).$add_op(value);
505 }
506 } else if use_multi && T::BITS == 32 && $iter.buffer_length() >= 4 {
507 // Try our fast, 8-digit at a time optimizations.
508 let radix4 = T::from_u32(format.radix4());
509 while let Some(value) = try_parse_4digits::<T, _, FORMAT>(&mut $iter) {
510 $value = $value.wrapping_mul(radix4).$add_op(value);
511 }
512 }
513 parse_1digit_unchecked!($value, $iter, $add_op, $start_index, $invalid_digit, $is_end)
514 }};
515}
516
517/// Run checked loop where digits are processed with overflow checking.
518///
519/// If the type size is small or there's not many digits, skip multi-digit
520/// optimizations. Otherwise, if the type size is large and we're not manually
521/// skipping manual optimizations, then we do this here.
522///
523/// * `value` - The current parsed value.
524/// * `iter` - An iterator over all bytes in the input.
525/// * `add_op` - The checked add/sub op.
526/// * `add_op_uc` - The unchecked add/sub op for small digit optimizations.
527/// * `start_index` - The offset where parsing started.
528/// * `invalid_digit` - Behavior when an invalid digit is found.
529/// * `overflow` - If the error is overflow or underflow.
530/// * `no_multi_digit` - If to disable multi-digit optimizations.
531/// * `overflow_digits` - The number of digits before we need to consider
532/// checked ops.
533macro_rules! parse_digits_checked {
534 (
535 $value:ident,
536 $iter:ident,
537 $add_op:ident,
538 $add_op_uc:ident,
539 $start_index:ident,
540 $invalid_digit:ident,
541 $overflow:ident,
542 $no_multi_digit:expr,
543 $overflow_digits:expr
544 ) => {{
545 // Can use the unchecked for the `max_digits` here. If we
546 // have a non-contiguous iterator, we could have a case like
547 // 123__456, with no consecutive digit separators allowed. If
548 // it's broken between the `_` characters, the integer will be
549 // seen as valid when it isn't.
550 if cfg!(not(feature = "format")) || $iter.is_contiguous() {
551 if let Some(mut small) = $iter.take_n($overflow_digits) {
552 let mut small_iter = small.integer_iter();
553 parse_digits_unchecked!(
554 $value,
555 small_iter,
556 $add_op_uc,
557 $start_index,
558 $invalid_digit,
559 $no_multi_digit,
560 false
561 );
562 }
563 }
564
565 // NOTE: all our multi-digit optimizations have been done here: skip this
566 parse_1digit_checked!($value, $iter, $add_op, $start_index, $invalid_digit, $overflow)
567 }};
568}
569
570// ALGORITHM
571
572/// Generic algorithm for both partial and complete parsers.
573///
574/// * `invalid_digit` - Behavior on finding an invalid digit.
575/// * `into_ok` - Behavior when returning a valid value.
576/// * `invalid_digit` - Behavior when an invalid digit is found.
577/// * `no_multi_digit` - If to disable multi-digit optimizations.
578/// * `is_partial` - If the parser is a partial parser.
579#[rustfmt::skip]
580macro_rules! algorithm {
581($bytes:ident, $into_ok:ident, $invalid_digit:ident, $no_multi_digit:expr) => {{
582 // WARNING:
583 // --------
584 // None of this code can be changed for optimization reasons.
585 // Do not change it without benchmarking every change.
586 // 1. You cannot use the `NoSkipIterator` in the loop,
587 // you must either return a subslice (indexing)
588 // or increment outside of the loop.
589 // Failing to do so leads to numerous more, unnecessary
590 // conditional move instructions, killing performance.
591 // 2. Return a 0 or 1 shift, and indexing unchecked outside
592 // of the loop is slightly faster.
593 // 3. Partial and complete parsers cannot be efficiently done
594 // together.
595 //
596 // If you try to refactor without carefully monitoring benchmarks or
597 // assembly generation, please log the number of wasted hours: so
598 // 16 hours so far.
599
600 // With `step_by_unchecked`, this is sufficiently optimized.
601 // Removes conditional paths, to, which simplifies maintenance.
602 // The skip version of the iterator automatically coalesces to
603 // the no-skip iterator.
604 let mut byte = $bytes.bytes::<FORMAT>();
605 let radix = NumberFormat::<FORMAT>::MANTISSA_RADIX;
606
607 let is_negative = parse_sign::<T, FORMAT>(&mut byte)?;
608 let mut iter = byte.integer_iter();
609 if iter.is_buffer_empty() {
610 // Our default format **ALWAYS** requires significant digits, however,
611 // we can have cases where we don
612 #[cfg(not(feature = "format"))]
613 into_error!(Empty, iter.cursor());
614
615 #[cfg(feature = "format")]
616 if required_digits!() {
617 into_error!(Empty, iter.cursor());
618 } else {
619 $into_ok!(T::ZERO, iter.cursor(), 0)
620 }
621 }
622
623 // Feature-gate a lot of format-only code here to simplify analysis with our branching
624 // We only want to skip the zeros if have either require a base prefix or we don't
625 // allow integer leading zeros, since the skip is expensive
626 #[allow(unused_variables, unused_mut)]
627 let mut start_index = iter.cursor();
628 #[cfg_attr(not(feature = "format"), allow(unused_variables))]
629 let format = NumberFormat::<FORMAT> {};
630 #[cfg(feature = "format")]
631 if format.has_base_prefix() || format.no_integer_leading_zeros() {
632 // Skip any leading zeros. We want to do our check if it can't possibly overflow after.
633 // For skipping digit-based formats, this approximation is a way over estimate.
634 // NOTE: Skipping zeros is **EXPENSIVE* so we skip that without our format feature
635 let zeros = iter.skip_zeros();
636 start_index += zeros;
637
638 // Now, check to see if we have a valid base prefix.
639 let mut is_prefix = false;
640 let base_prefix = format.base_prefix();
641 if base_prefix != 0 && zeros == 1 {
642 // Check to see if the next character is the base prefix.
643 // We must have a format like `0x`, `0d`, `0o`. Note:
644 if iter.read_if_value(base_prefix, format.case_sensitive_base_prefix()).is_some() {
645 is_prefix = true;
646 if iter.is_buffer_empty() {
647 into_error!(Empty, iter.cursor());
648 } else {
649 start_index += 1;
650 }
651 }
652 }
653
654 // If we have a format that doesn't accept leading zeros,
655 // check if the next value is invalid. It's invalid if the
656 // first is 0, and the next is not a valid digit.
657 if !is_prefix && format.no_integer_leading_zeros() && zeros != 0 {
658 // Cannot have a base prefix and no leading zeros.
659 let index = iter.cursor() - zeros;
660 if zeros > 1 {
661 into_error!(InvalidLeadingZeros, index);
662 }
663 // NOTE: Zeros has to be 0 here, so our index == 1 or 2 (depending on sign)
664 match iter.peek().map(|&c| char_to_digit_const(c, format.radix())) {
665 // Valid digit, we have an invalid value.
666 Some(Some(_)) => into_error!(InvalidLeadingZeros, index),
667 // Have a non-digit character that follows.
668 Some(None) => $invalid_digit!(<T>::ZERO, iter.cursor() + 1, iter.current_count()),
669 // No digits following, has to be ok
670 None => $into_ok!(<T>::ZERO, index, iter.current_count()),
671 };
672 }
673 }
674
675 // shorter strings cannot possibly overflow so a great optimization
676 let overflow_digits = T::overflow_digits(radix);
677 let cannot_overflow = iter.as_slice().len() <= overflow_digits;
678
679 // NOTE:
680 // Don't add optimizations for 128-bit integers.
681 // 128-bit multiplication is rather efficient, it's only division
682 // that's very slow. Any shortcut optimizations increasing branching,
683 // and even if parsing a 64-bit integer is marginally faster, it
684 // culminates in **way** slower performance overall for simple
685 // integers, and no improvement for large integers.
686 let mut value = T::ZERO;
687 if cannot_overflow && is_negative {
688 parse_digits_unchecked!(value, iter, wrapping_sub, start_index, $invalid_digit, $no_multi_digit, true);
689 } if cannot_overflow {
690 parse_digits_unchecked!(value, iter, wrapping_add, start_index, $invalid_digit, $no_multi_digit, true);
691 } else if is_negative {
692 parse_digits_checked!(value, iter, checked_sub, wrapping_sub, start_index, $invalid_digit, Underflow, $no_multi_digit, overflow_digits);
693 } else {
694 parse_digits_checked!(value, iter, checked_add, wrapping_add, start_index, $invalid_digit, Overflow, $no_multi_digit, overflow_digits);
695 }
696
697 $into_ok!(value, iter.buffer_length(), iter.current_count())
698}};
699}
700
701/// Algorithm for the complete parser.
702#[cfg_attr(not(feature = "compact"), inline(always))]
703pub fn algorithm_complete<T, const FORMAT: u128>(bytes: &[u8], options: &Options) -> Result<T>
704where
705 T: Integer,
706{
707 algorithm!(bytes, into_ok_complete, invalid_digit_complete, options.get_no_multi_digit())
708}
709
710/// Algorithm for the partial parser.
711#[cfg_attr(not(feature = "compact"), inline(always))]
712pub fn algorithm_partial<T, const FORMAT: u128>(
713 bytes: &[u8],
714 options: &Options,
715) -> Result<(T, usize)>
716where
717 T: Integer,
718{
719 algorithm!(bytes, into_ok_partial, invalid_digit_partial, options.get_no_multi_digit())
720}