lexical_write_integer/
algorithm.rs

1//! Radix-generic, optimized, integer-to-string conversion routines.
2//!
3//! These routines are highly optimized: they unroll 4 loops at a time,
4//! using pre-computed base^2 tables. This was popularized by Andrei
5//! Alexandrescu, and uses 2 digits per division, which we further optimize in
6//! up to 4 digits per division with a bit shift.
7//!
8//! See [Algorithm](/docs/Algorithm.md) for a more detailed description of
9//! the algorithm choice here. See [Benchmarks](/docs/Benchmarks.md) for
10//! recent benchmark data.
11
12#![cfg(not(feature = "compact"))]
13#![cfg(feature = "power-of-two")]
14#![doc(hidden)]
15
16use lexical_util::assert::debug_assert_radix;
17use lexical_util::digit::digit_to_char;
18use lexical_util::div128::u128_divrem;
19use lexical_util::format::{radix_from_flags, NumberFormat};
20use lexical_util::num::{AsCast, UnsignedInteger};
21use lexical_util::step::u64_step;
22
23use crate::digit_count::DigitCount;
24
25/// Index a buffer and get a mutable reference, without bounds checking.
26/// The `($x:ident[$i:expr] = $y:ident[$j:expr])` is not used with [`compact`].
27/// The newer version of the lint is `unused_macro_rules`, but this isn't
28/// supported until nightly-2022-05-12.
29///
30/// By default, writers tend to be safe, due to Miri, Valgrind,
31/// and other tests and careful validation against a wide range
32/// of randomized input. Parsers are much trickier to validate.
33///
34/// [`compact`]: crate#compact
35#[allow(unknown_lints, unused_macro_rules)]
36macro_rules! i {
37    ($x:ident[$i:expr]) => {
38        *$x.get_unchecked_mut($i)
39    };
40
41    ($x:ident[$i:expr] = $y:ident[$j:expr]) => {
42        *$x.get_unchecked_mut($i) = *$y.get_unchecked($j)
43    };
44}
45
46/// Write 2 digits to buffer.
47///
48/// # Safety
49///
50/// Safe if `bytes` is large enough to hold 2 characters, `index >= 2`,
51/// and if the 2 * remainder, or `r`, has it so `r + 1 < table.len()`.
52macro_rules! write_digits {
53    ($bytes:ident, $index:ident, $table:ident, $r:ident) => {{
54        debug_assert!($index >= 2);
55        debug_assert!($bytes.len() >= 2);
56        debug_assert!($r + 1 < $table.len());
57        $index -= 1;
58        unsafe { i!($bytes[$index] = $table[$r + 1]) };
59        $index -= 1;
60        unsafe { i!($bytes[$index] = $table[$r]) };
61    }};
62}
63
64/// Write 1 digit to buffer.
65///
66/// # Safety
67///
68/// Safe if `bytes` is large enough to hold 1 characters, and `r < 36`.
69/// Adding in direct safety checks here destroys performance, often by
70/// 30%+ so it's up to the caller to beware.
71macro_rules! write_digit {
72    ($bytes:ident, $index:ident, $r:ident) => {{
73        debug_assert!($index >= 1);
74        debug_assert!($bytes.len() >= 1);
75        debug_assert!($r < 36);
76        $index -= 1;
77        unsafe { i!($bytes[$index]) = digit_to_char($r) };
78    }};
79}
80
81// NOTE: Don't use too many generics:
82//  We don't need generics for most of the internal algorithms,
83//  and doing so kills performance. Why? I don't know, but assuming
84//  it messed with the compiler's code generation.
85
86/// Write integral digits to buffer.
87///
88/// This algorithm first writes 4, then 2 digits at a time, finally
89/// the last 1 or 2 digits, using power reduction to speed up the
90/// algorithm a lot.
91///
92/// # Safety
93///
94/// This is safe as long as the buffer is large enough to hold `T::MAX`
95/// digits in radix `N` and the index >= digit count. Note  that making
96/// small changes here can destroy performance, so it's crucial we do this
97/// correctly.
98///
99/// If `buffer.len() >= T::DIGITS` and `index >= T::DIGITS`, then this is
100/// safe. We first carve off 4 digits off the end, similar to the algorithm
101/// in compact, then 2 at a time, then 1, index will never wrap under 0.
102/// Since we validate the table size and radix inside, this is the only
103/// safety precondition that must be held up.
104///
105/// See [algorithm] and the [crate] documentation for more detailed
106/// information on the safety considerations.
107#[inline(always)]
108unsafe fn write_digits<T: UnsignedInteger>(
109    mut value: T,
110    radix: u32,
111    table: &[u8],
112    buffer: &mut [u8],
113    mut index: usize,
114    count: usize,
115) -> usize {
116    debug_assert_radix(radix);
117    debug_assert!(buffer.len() >= count, "buffer must at least be as the digit count");
118
119    // Calculate if we can do multi-digit optimizations
120    assert!((2..=36).contains(&radix), "radix must be >= 2 and <= 36");
121    let radix2 = radix * radix;
122    let radix4 = radix2 * radix2;
123
124    // Pre-compute our powers of radix.
125    let radix = T::from_u32(radix);
126
127    // SAFETY: All of these are safe for the buffer writes as long as
128    // the buffer is large enough to hold `T::MAX` digits in radix `N`.
129    // We confirm (which will be compiled out) that the table cannot
130    // overflow since it's the indexing is `0..radix^2 * 2`.
131    assert!(table.len() >= radix2 as usize * 2, "table must be 2 * radix^2 long");
132
133    // Decode 4 digits at a time.
134    if T::BITS >= 32 || radix4 < T::MAX.as_u32() {
135        let radix2 = T::from_u32(radix2);
136        let radix4 = T::from_u32(radix4);
137        while value >= radix4 {
138            let r = value % radix4;
139            value /= radix4;
140            let r1 = usize::as_cast(T::TWO * (r / radix2));
141            let r2 = usize::as_cast(T::TWO * (r % radix2));
142
143            // SAFETY: This is always safe, since the table is `2*radix^2`, and
144            // `r1` and `r2` must be in the range `[0, 2*radix^2-1)`, since the maximum
145            // value of r is `radix4-1`, which must have a `div` and `r`
146            // in the range `[0, radix^2-1)`.
147            write_digits!(buffer, index, table, r2);
148            write_digits!(buffer, index, table, r1);
149        }
150    }
151
152    // Decode 2 digits at a time.
153    if T::BITS >= 16 || radix2 < T::MAX.as_u32() {
154        let radix2 = T::from_u32(radix2);
155        while value >= radix2 {
156            let r = usize::as_cast(T::TWO * (value % radix2));
157            value /= radix2;
158
159            // SAFETY: this is always safe, since the table is `2*radix^2`, and
160            // `r` must be in the range `[0, 2*radix^2-1)`.
161            write_digits!(buffer, index, table, r);
162        }
163    }
164
165    // Decode last 2 digits.
166    if value < radix {
167        let r = u32::as_cast(value);
168        // SAFETY: this is always safe, since `value < radix`, so it must be < 36.
169        write_digit!(buffer, index, r);
170    } else {
171        // NOTE: If this is a `u8`, we need to first widen the type.
172        let r = usize::as_cast(T::TWO) * usize::as_cast(value);
173        // SAFETY: this is always safe, since the table is `2*radix^2`, and
174        // the value must `<= radix^2`, so rem must be in the range
175        // `[0, 2*radix^2-1)`.
176        write_digits!(buffer, index, table, r);
177    }
178
179    index
180}
181
182/// Specialized digits writer for u128, since it writes at least step digits.
183///
184/// # Safety
185///
186/// This is safe as long as the buffer is large enough to hold `T::MAX`
187/// digits in radix `N`. See [algorithm] for more safety considerations.
188#[inline(always)]
189unsafe fn write_step_digits<T: UnsignedInteger>(
190    value: T,
191    radix: u32,
192    table: &[u8],
193    buffer: &mut [u8],
194    index: usize,
195    step: usize,
196    count: usize,
197) -> usize {
198    debug_assert_radix(radix);
199
200    let start = index;
201    // SAFETY: safe as long as the call to `write_step_digits` is safe.
202    let index = unsafe { write_digits(value, radix, table, buffer, index, count) };
203    // Write the remaining 0 bytes.
204    let end = start.saturating_sub(step);
205    // SAFETY: this is always safe since `end < index && index < start`.
206    let zeros = unsafe { &mut i!(buffer[end..index]) };
207    zeros.fill(b'0');
208
209    end
210}
211
212/// Optimized implementation for radix-N numbers.
213///
214/// This uses an Alexandrescu algorithm, which prints 2 digits at a time
215/// which is much faster than a naive approach. However, the jeaiii algorithm
216/// can be faster still for decimal numbers:
217///     <https://jk-jeon.github.io/posts/2022/02/jeaiii-algorithm/>
218///
219/// # Safety
220///
221/// Safe as long as [`digit_count`] returns the number of written digits.
222/// For performance reasons, this is always calculated as the exact number
223/// of digits. If the value is too small, then the buffer will underflow,
224/// causing out-of-bounds read/writes. Care must be used that [`digit_count`]
225/// is correctly implemented.
226///
227/// Since [`digit_count`] is implemented as an unsafe trait, these guarantees
228/// must be held.
229///
230/// See the crate [`crate`] documentation for more security considerations.
231///
232/// [`digit_count`]: `crate::digit_count::DigitCount`
233#[inline(always)]
234#[allow(clippy::unnecessary_safety_comment)]
235pub fn algorithm<T>(value: T, radix: u32, table: &[u8], buffer: &mut [u8]) -> usize
236where
237    T: UnsignedInteger + DigitCount,
238{
239    // NOTE: These checks should be resolved at compile time, so
240    // they're unlikely to add any performance overhead.
241    assert!((2..=36).contains(&radix), "radix must be >= 2 and <= 36");
242    assert!(table.len() >= (radix * radix * 2) as usize, "table must be 2 * radix^2 long");
243
244    // get our digit count and only write up until that range
245    // the digit count should be the exact number of digits written by
246    // the number. this is for performance reasons: using `memcpy` destroys
247    // performance.
248    let count = value.digit_count(radix);
249    assert!(
250        count <= buffer.len(),
251        "The buffer must be large enough to contain the significant digits."
252    );
253    let buffer = &mut buffer[..count];
254
255    // SAFETY: Both forms of unchecked indexing cannot overflow.
256    // The table always has `2*radix^2` elements, so it must be a legal index.
257    // The buffer is ensured to have at least `FORMATTED_SIZE` or
258    // `FORMATTED_SIZE_DECIMAL` characters, which is the maximum number of
259    // digits an integer of that size may write.
260    _ = unsafe { write_digits(value, radix, table, buffer, buffer.len(), count) };
261
262    count
263}
264
265/// Optimized implementation for radix-N 128-bit numbers.
266///
267/// # Safety
268///
269/// Safe as long as [`digit_count`] returns the number of written digits.
270/// For performance reasons, this is always calculated as the exact number
271/// of digits. If the value is too small, then the buffer will underflow,
272/// causing out-of-bounds read/writes. Care must be used that [`digit_count`]
273/// is correctly implemented.
274///
275/// Since [`digit_count`] is implemented as an unsafe trait, these guarantees
276/// must be held.
277///
278/// See the crate [`crate`] documentation for more security considerations.
279///
280/// [`digit_count`]: `crate::digit_count::DigitCount`
281#[inline(always)]
282pub fn algorithm_u128<const FORMAT: u128, const MASK: u128, const SHIFT: i32>(
283    value: u128,
284    table: &[u8],
285    buffer: &mut [u8],
286) -> usize {
287    // NOTE: Use the const version of radix for `u64_step` and
288    // `u128_divrem` to ensure they're evaluated at compile time.
289    assert!(NumberFormat::<{ FORMAT }> {}.is_valid());
290    // NOTE: These checks should be resolved at compile time, so
291    // they're unlikely to add any performance overhead.
292    let radix = radix_from_flags(FORMAT, MASK, SHIFT);
293    assert!((2..=36).contains(&radix), "radix must be >= 2 and <= 36");
294    assert!(table.len() >= (radix * radix * 2) as usize, "table must be 2 * radix^2 long");
295
296    // Quick approximations to make the algorithm **a lot** faster.
297    // If the value can be represented in a 64-bit integer, we can
298    // do this as a native integer.
299    if value <= u64::MAX as u128 {
300        return algorithm(value as u64, radix, table, buffer);
301    }
302
303    // get our digit count and only write up until that range
304    // the digit count should be the exact number of digits written by
305    // the number. this is for performance reasons: using `memcpy` destroys
306    // performance.
307    let count = value.digit_count(radix);
308    assert!(
309        count <= buffer.len(),
310        "The buffer must be large enough to contain the significant digits."
311    );
312    let buffer = &mut buffer[..count];
313
314    // LOGIC: Both forms of unchecked indexing cannot overflow.
315    // The table always has `2*radix^2` elements, so it must be a legal index.
316    // The buffer is ensured to have at least `FORMATTED_SIZE` or
317    // `FORMATTED_SIZE_DECIMAL` characters, which is the maximum number of
318    // digits an integer of that size may write.
319
320    // We use a fast 128-bit division algorithm, described in depth
321    // in lexical_util/div128.
322
323    // Decode 4-digits at a time.
324    // To deal with internal 0 values or values with internal 0 digits set,
325    // we store the starting index, and if not all digits are written,
326    // we just skip down `digits` digits for the next value.
327    let step = u64_step(radix);
328    let (value, low) = u128_divrem(value, radix);
329    let mut index = count;
330    index = unsafe { write_step_digits(low, radix, table, buffer, index, step, count) };
331    if value <= u64::MAX as u128 {
332        unsafe { write_digits(value as u64, radix, table, buffer, index, count) };
333        return count;
334    }
335
336    // Value has to be greater than 1.8e38
337    let (value, mid) = u128_divrem(value, radix);
338    index = unsafe { write_step_digits(mid, radix, table, buffer, index, step, count) };
339    if index != 0 {
340        debug_assert!(value != 0, "Writing high digits, must have a non-zero value.");
341        index = unsafe { write_digits(value as u64, radix, table, buffer, index, count) };
342    } else {
343        debug_assert!(value == 0, "No more digits left to write, remainder must be 0.");
344    }
345    debug_assert!(
346        index == 0,
347        "The index after writing all digits should be at the start of the buffer."
348    );
349
350    count
351}