lexical_write_integer/
algorithm.rs

1//! Radix-generic, optimized, integer-to-string conversion routines.
2//!
3//! These routines are highly optimized: they unroll 4 loops at a time,
4//! using pre-computed base^2 tables.
5//!
6//! This was popularized by Andrei Alexandrescu, and uses 2 digits per
7//! division, which we further optimize in up to 4 digits per division
8//! with a bit shift.
9//!
10//! See [Algorithm.md](/docs/Algorithm.md) for a more detailed description of
11//! the algorithm choice here. See [Benchmarks.md](/docs/Benchmarks.md) for
12//! recent benchmark data.
13
14#![cfg(not(feature = "compact"))]
15#![cfg(feature = "power-of-two")]
16
17use lexical_util::assert::debug_assert_radix;
18use lexical_util::digit::digit_to_char;
19use lexical_util::div128::u128_divrem;
20use lexical_util::format::{radix_from_flags, NumberFormat};
21use lexical_util::num::{AsCast, UnsignedInteger};
22use lexical_util::step::u64_step;
23
24use crate::digit_count::DigitCount;
25
26/// Index a buffer and get a mutable reference, without bounds checking.
27/// The `($x:ident[$i:expr] = $y:ident[$j:expr])` is not used with `compact`.
28/// The newer version of the lint is `unused_macro_rules`, but this isn't
29/// supported until nightly-2022-05-12.
30///
31/// By default, writers tend to be safe, due to Miri, Valgrind,
32/// and other tests and careful validation against a wide range
33/// of randomized input. Parsers are much trickier to validate.
34#[allow(unknown_lints, unused_macro_rules)]
35macro_rules! i {
36    ($x:ident[$i:expr]) => {
37        *$x.get_unchecked_mut($i)
38    };
39
40    ($x:ident[$i:expr] = $y:ident[$j:expr]) => {
41        *$x.get_unchecked_mut($i) = *$y.get_unchecked($j)
42    };
43}
44
45/// Write 2 digits to buffer.
46///
47/// # Safety
48///
49/// Safe if `bytes` is large enough to hold 2 characters, `index >= 2`,
50/// and if the 2 * remainder, or `r`, has it so `r + 1 < table.len()`.
51macro_rules! write_digits {
52    ($bytes:ident, $index:ident, $table:ident, $r:ident) => {{
53        debug_assert!($index >= 2);
54        debug_assert!($bytes.len() >= 2);
55        debug_assert!($r + 1 < $table.len());
56        $index -= 1;
57        unsafe { i!($bytes[$index] = $table[$r + 1]) };
58        $index -= 1;
59        unsafe { i!($bytes[$index] = $table[$r]) };
60    }};
61}
62
63/// Write 1 digit to buffer.
64///
65/// # Safety
66///
67/// Safe if `bytes` is large enough to hold 1 characters, and `r < 36`.
68/// Adding in direct safety checks here destroys performance, often by
69/// 30%+ so it's up to the caller to beware.
70macro_rules! write_digit {
71    ($bytes:ident, $index:ident, $r:ident) => {{
72        debug_assert!($index >= 1);
73        debug_assert!($bytes.len() >= 1);
74        debug_assert!($r < 36);
75        $index -= 1;
76        unsafe { i!($bytes[$index]) = digit_to_char($r) };
77    }};
78}
79
80// NOTE: Don't use too many generics:
81//  We don't need generics for most of the internal algorithms,
82//  and doing so kills performance. Why? I don't know, but assuming
83//  it messed with the compiler's code generation.
84
85/// Write integral digits to buffer.
86///
87/// This algorithm first writes 4, then 2 digits at a time, finally
88/// the last 1 or 2 digits, using power reduction to speed up the
89/// algorithm a lot.
90///
91/// # Safety
92///
93/// This is safe as long as the buffer is large enough to hold `T::MAX`
94/// digits in radix `N` and the index >= digit count. Note  that making
95/// small changes here can destroy performance, so it's crucial we do this
96/// correctly.
97///
98/// If `buffer.len() >= T::DIGITS` and `index >= T::DIGITS`, then this is
99/// safe. We first carve off 4 digits off the end, similar to the algorithm
100/// in compact, then 2 at a time, then 1, index will never wrap under 0.
101/// Since we validate the table size and radix inside, this is the only
102/// safety precondition that must be held up.
103///
104/// See [algorithm] and the [crate] documentation for more detailed
105/// information on the safety considerations.
106#[inline(always)]
107unsafe fn write_digits<T: UnsignedInteger>(
108    mut value: T,
109    radix: u32,
110    table: &[u8],
111    buffer: &mut [u8],
112    mut index: usize,
113    count: usize,
114) -> usize {
115    debug_assert_radix(radix);
116    debug_assert!(buffer.len() >= count, "buffer must at least be as the digit count");
117
118    // Calculate if we can do multi-digit optimizations
119    assert!((2..=36).contains(&radix), "radix must be >= 2 and <= 36");
120    let radix2 = radix * radix;
121    let radix4 = radix2 * radix2;
122
123    // Pre-compute our powers of radix.
124    let radix = T::from_u32(radix);
125
126    // SAFETY: All of these are safe for the buffer writes as long as
127    // the buffer is large enough to hold `T::MAX` digits in radix `N`.
128    // We confirm (which will be compiled out) that the table cannot
129    // overflow since it's the indexing is `0..radix^2 * 2`.
130    assert!(table.len() >= radix2 as usize * 2, "table must be 2 * radix^2 long");
131
132    // Decode 4 digits at a time.
133    if T::BITS >= 32 || radix4 < T::MAX.as_u32() {
134        let radix2 = T::from_u32(radix2);
135        let radix4 = T::from_u32(radix4);
136        while value >= radix4 {
137            let r = value % radix4;
138            value /= radix4;
139            let r1 = usize::as_cast(T::TWO * (r / radix2));
140            let r2 = usize::as_cast(T::TWO * (r % radix2));
141
142            // SAFETY: This is always safe, since the table is `2*radix^2`, and
143            // `r1` and `r2` must be in the range `[0, 2*radix^2-1)`, since the maximum
144            // value of r is `radix4-1`, which must have a `div` and `r`
145            // in the range `[0, radix^2-1)`.
146            write_digits!(buffer, index, table, r2);
147            write_digits!(buffer, index, table, r1);
148        }
149    }
150
151    // Decode 2 digits at a time.
152    if T::BITS >= 16 || radix2 < T::MAX.as_u32() {
153        let radix2 = T::from_u32(radix2);
154        while value >= radix2 {
155            let r = usize::as_cast(T::TWO * (value % radix2));
156            value /= radix2;
157
158            // SAFETY: this is always safe, since the table is `2*radix^2`, and
159            // `r` must be in the range `[0, 2*radix^2-1)`.
160            write_digits!(buffer, index, table, r);
161        }
162    }
163
164    // Decode last 2 digits.
165    if value < radix {
166        let r = u32::as_cast(value);
167        // SAFETY: this is always safe, since `value < radix`, so it must be < 36.
168        write_digit!(buffer, index, r);
169    } else {
170        // NOTE: If this is a `u8`, we need to first widen the type.
171        let r = usize::as_cast(T::TWO) * usize::as_cast(value);
172        // SAFETY: this is always safe, since the table is `2*radix^2`, and
173        // the value must `<= radix^2`, so rem must be in the range
174        // `[0, 2*radix^2-1)`.
175        write_digits!(buffer, index, table, r);
176    }
177
178    index
179}
180
181/// Specialized digits writer for u128, since it writes at least step digits.
182///
183/// # Safety
184///
185/// This is safe as long as the buffer is large enough to hold `T::MAX`
186/// digits in radix `N`. See [algorithm] for more safety considerations.
187#[inline(always)]
188unsafe fn write_step_digits<T: UnsignedInteger>(
189    value: T,
190    radix: u32,
191    table: &[u8],
192    buffer: &mut [u8],
193    index: usize,
194    step: usize,
195    count: usize,
196) -> usize {
197    debug_assert_radix(radix);
198
199    let start = index;
200    // SAFETY: safe as long as the call to `write_step_digits` is safe.
201    let index = unsafe { write_digits(value, radix, table, buffer, index, count) };
202    // Write the remaining 0 bytes.
203    let end = start.saturating_sub(step);
204    // SAFETY: this is always safe since `end < index && index < start`.
205    let zeros = unsafe { &mut i!(buffer[end..index]) };
206    zeros.fill(b'0');
207
208    end
209}
210
211/// Optimized implementation for radix-N numbers.
212///
213/// This uses an Alexandrescu algorithm, which prints 2 digits at a time
214/// which is much faster than a naive approach. However, the jeaiii algorithm
215/// can be faster still for decimal numbers:
216///     <https://jk-jeon.github.io/posts/2022/02/jeaiii-algorithm/>
217///
218/// # Safety
219///
220/// Safe as long as [`digit_count`] returns the number of written digits.
221/// For performance reasons, this is always calculated as the exact number
222/// of digits. If the value is too small, then the buffer will underflow,
223/// causing out-of-bounds read/writes. Care must be used that [`digit_count`]
224/// is correctly implemented.
225///
226/// Since [`digit_count`] is implemented as an unsafe trait, these guarantees
227/// must be held.
228///
229/// See the crate [`crate`] documentation for more security considerations.
230///
231/// [`digit_count`]: `crate::digit_count::DigitCount`
232#[inline(always)]
233#[allow(clippy::unnecessary_safety_comment)]
234pub fn algorithm<T>(value: T, radix: u32, table: &[u8], buffer: &mut [u8]) -> usize
235where
236    T: UnsignedInteger + DigitCount,
237{
238    // NOTE: These checks should be resolved at compile time, so
239    // they're unlikely to add any performance overhead.
240    assert!((2..=36).contains(&radix), "radix must be >= 2 and <= 36");
241    assert!(table.len() >= (radix * radix * 2) as usize, "table must be 2 * radix^2 long");
242
243    // get our digit count and only write up until that range
244    // the digit count should be the exact number of digits written by
245    // the number. this is for performance reasons: using `memcpy` destroys
246    // performance.
247    let count = value.digit_count(radix);
248    assert!(
249        count <= buffer.len(),
250        "The buffer must be large enough to contain the significant digits."
251    );
252    let buffer = &mut buffer[..count];
253
254    // SAFETY: Both forms of unchecked indexing cannot overflow.
255    // The table always has `2*radix^2` elements, so it must be a legal index.
256    // The buffer is ensured to have at least `FORMATTED_SIZE` or
257    // `FORMATTED_SIZE_DECIMAL` characters, which is the maximum number of
258    // digits an integer of that size may write.
259    _ = unsafe { write_digits(value, radix, table, buffer, buffer.len(), count) };
260
261    count
262}
263
264/// Optimized implementation for radix-N 128-bit numbers.
265///
266/// # Safety
267///
268/// Safe as long as [`digit_count`] returns the number of written digits.
269/// For performance reasons, this is always calculated as the exact number
270/// of digits. If the value is too small, then the buffer will underflow,
271/// causing out-of-bounds read/writes. Care must be used that [`digit_count`]
272/// is correctly implemented.
273///
274/// Since [`digit_count`] is implemented as an unsafe trait, these guarantees
275/// must be held.
276///
277/// See the crate [`crate`] documentation for more security considerations.
278///
279/// [`digit_count`]: `crate::digit_count::DigitCount`
280#[inline(always)]
281pub fn algorithm_u128<const FORMAT: u128, const MASK: u128, const SHIFT: i32>(
282    value: u128,
283    table: &[u8],
284    buffer: &mut [u8],
285) -> usize {
286    // NOTE: Use the const version of radix for `u64_step` and
287    // `u128_divrem` to ensure they're evaluated at compile time.
288    assert!(NumberFormat::<{ FORMAT }> {}.is_valid());
289    // NOTE: These checks should be resolved at compile time, so
290    // they're unlikely to add any performance overhead.
291    let radix = radix_from_flags(FORMAT, MASK, SHIFT);
292    assert!((2..=36).contains(&radix), "radix must be >= 2 and <= 36");
293    assert!(table.len() >= (radix * radix * 2) as usize, "table must be 2 * radix^2 long");
294
295    // Quick approximations to make the algorithm **a lot** faster.
296    // If the value can be represented in a 64-bit integer, we can
297    // do this as a native integer.
298    if value <= u64::MAX as u128 {
299        return algorithm(value as u64, radix, table, buffer);
300    }
301
302    // get our digit count and only write up until that range
303    // the digit count should be the exact number of digits written by
304    // the number. this is for performance reasons: using `memcpy` destroys
305    // performance.
306    let count = value.digit_count(radix);
307    assert!(
308        count <= buffer.len(),
309        "The buffer must be large enough to contain the significant digits."
310    );
311    let buffer = &mut buffer[..count];
312
313    // LOGIC: Both forms of unchecked indexing cannot overflow.
314    // The table always has `2*radix^2` elements, so it must be a legal index.
315    // The buffer is ensured to have at least `FORMATTED_SIZE` or
316    // `FORMATTED_SIZE_DECIMAL` characters, which is the maximum number of
317    // digits an integer of that size may write.
318
319    // We use a fast 128-bit division algorithm, described in depth
320    // in lexical_util/div128.
321
322    // Decode 4-digits at a time.
323    // To deal with internal 0 values or values with internal 0 digits set,
324    // we store the starting index, and if not all digits are written,
325    // we just skip down `digits` digits for the next value.
326    let step = u64_step(radix);
327    let (value, low) = u128_divrem(value, radix);
328    let mut index = count;
329    index = unsafe { write_step_digits(low, radix, table, buffer, index, step, count) };
330    if value <= u64::MAX as u128 {
331        unsafe { write_digits(value as u64, radix, table, buffer, index, count) };
332        return count;
333    }
334
335    // Value has to be greater than 1.8e38
336    let (value, mid) = u128_divrem(value, radix);
337    index = unsafe { write_step_digits(mid, radix, table, buffer, index, step, count) };
338    if index != 0 {
339        debug_assert!(value != 0, "Writing high digits, must have a non-zero value.");
340        index = unsafe { write_digits(value as u64, radix, table, buffer, index, count) };
341    } else {
342        debug_assert!(value == 0, "No more digits left to write, remainder must be 0.");
343    }
344    debug_assert!(
345        index == 0,
346        "The index after writing all digits should be at the start of the buffer."
347    );
348
349    count
350}
lexical_write_integer/algorithm.rs

lexical_write_integer/
algorithm.rs