zmij/
lib.rs

1//! [![github]](https://github.com/dtolnay/zmij) [![crates-io]](https://crates.io/crates/zmij) [![docs-rs]](https://docs.rs/zmij)
2//!
3//! [github]: https://img.shields.io/badge/github-8da0cb?style=for-the-badge&labelColor=555555&logo=github
4//! [crates-io]: https://img.shields.io/badge/crates.io-fc8d62?style=for-the-badge&labelColor=555555&logo=rust
5//! [docs-rs]: https://img.shields.io/badge/docs.rs-66c2a5?style=for-the-badge&labelColor=555555&logo=docs.rs
6//!
7//! <br>
8//!
9//! A double-to-string conversion algorithm based on [Schubfach] and [yy].
10//!
11//! This Rust implementation is a line-by-line port of Victor Zverovich's
12//! implementation in C++, <https://github.com/vitaut/zmij>.
13//!
14//! [Schubfach]: https://fmt.dev/papers/Schubfach4.pdf
15//! [yy]: https://github.com/ibireme/c_numconv_benchmark/blob/master/vendor/yy_double/yy_double.c
16//!
17//! <br>
18//!
19//! # Example
20//!
21//! ```
22//! fn main() {
23//!     let mut buffer = zmij::Buffer::new();
24//!     let printed = buffer.format(1.234);
25//!     assert_eq!(printed, "1.234");
26//! }
27//! ```
28//!
29//! <br>
30//!
31//! ## Performance
32//!
33//! The [dtoa-benchmark] compares this library and other Rust floating point
34//! formatting implementations across a range of precisions. The vertical axis
35//! in this chart shows nanoseconds taken by a single execution of
36//! `zmij::Buffer::new().format_finite(value)` so a lower result indicates a
37//! faster library.
38//!
39//! [dtoa-benchmark]: https://github.com/dtolnay/dtoa-benchmark
40//!
41//! ![performance](https://raw.githubusercontent.com/dtolnay/zmij/master/dtoa-benchmark.png)
42
43#![no_std]
44#![doc(html_root_url = "https://docs.rs/zmij/1.0.13")]
45#![deny(unsafe_op_in_unsafe_fn)]
46#![allow(non_camel_case_types, non_snake_case)]
47#![allow(
48    clippy::blocks_in_conditions,
49    clippy::cast_possible_truncation,
50    clippy::cast_possible_wrap,
51    clippy::cast_ptr_alignment,
52    clippy::cast_sign_loss,
53    clippy::doc_markdown,
54    clippy::incompatible_msrv,
55    clippy::items_after_statements,
56    clippy::must_use_candidate,
57    clippy::needless_doctest_main,
58    clippy::never_loop,
59    clippy::redundant_else,
60    clippy::similar_names,
61    clippy::too_many_arguments,
62    clippy::too_many_lines,
63    clippy::unreadable_literal,
64    clippy::used_underscore_items,
65    clippy::while_immutable_condition,
66    clippy::wildcard_imports
67)]
68
69#[cfg(all(target_arch = "x86_64", target_feature = "sse2", not(miri)))]
70mod stdarch_x86;
71#[cfg(test)]
72mod tests;
73mod traits;
74
75#[cfg(all(any(target_arch = "aarch64", target_arch = "x86_64"), not(miri)))]
76use core::arch::asm;
77#[cfg(not(zmij_no_select_unpredictable))]
78use core::hint;
79use core::mem::{self, MaybeUninit};
80use core::ptr;
81use core::slice;
82use core::str;
83#[cfg(feature = "no-panic")]
84use no_panic::no_panic;
85
86const BUFFER_SIZE: usize = 24;
87const NAN: &str = "NaN";
88const INFINITY: &str = "inf";
89const NEG_INFINITY: &str = "-inf";
90
91// A decimal floating-point number sig * pow(10, exp).
92// If exp is non_finite_exp then the number is a NaN or an infinity.
93struct dec_fp {
94    sig: i64, // significand
95    exp: i32, // exponent
96}
97
98#[cfg_attr(test, derive(Debug, PartialEq))]
99struct uint128 {
100    hi: u64,
101    lo: u64,
102}
103
104// Computes 128-bit result of multiplication of two 64-bit unsigned integers.
105const fn umul128(x: u64, y: u64) -> u128 {
106    x as u128 * y as u128
107}
108
109const fn umul128_upper64(x: u64, y: u64) -> u64 {
110    (umul128(x, y) >> 64) as u64
111}
112
113#[cfg_attr(feature = "no-panic", no_panic)]
114fn umul192_upper128(x_hi: u64, x_lo: u64, y: u64) -> uint128 {
115    let p = umul128(x_hi, y);
116    let lo = (p as u64).wrapping_add((umul128(x_lo, y) >> 64) as u64);
117    uint128 {
118        hi: (p >> 64) as u64 + u64::from(lo < p as u64),
119        lo,
120    }
121}
122
123// Computes upper 64 bits of multiplication of x and y, discards the least
124// significant bit and rounds to odd, where x = uint128_t(x_hi << 64) | x_lo.
125#[cfg_attr(feature = "no-panic", no_panic)]
126fn umul_upper_inexact_to_odd<UInt>(x_hi: u64, x_lo: u64, y: UInt) -> UInt
127where
128    UInt: traits::UInt,
129{
130    let num_bits = mem::size_of::<UInt>() * 8;
131    if num_bits == 64 {
132        let p = umul192_upper128(x_hi, x_lo, y.into());
133        UInt::truncate(p.hi | u64::from((p.lo >> 1) != 0))
134    } else {
135        let p = (umul128(x_hi, y.into()) >> 32) as u64;
136        UInt::enlarge((p >> 32) as u32 | u32::from((p as u32 >> 1) != 0))
137    }
138}
139
140trait FloatTraits: traits::Float {
141    const NUM_BITS: i32;
142    const NUM_SIG_BITS: i32 = Self::MANTISSA_DIGITS as i32 - 1;
143    const NUM_EXP_BITS: i32 = Self::NUM_BITS - Self::NUM_SIG_BITS - 1;
144    const EXP_MASK: i32 = (1 << Self::NUM_EXP_BITS) - 1;
145    const EXP_BIAS: i32 = (1 << (Self::NUM_EXP_BITS - 1)) - 1;
146
147    type SigType: traits::UInt;
148    const IMPLICIT_BIT: Self::SigType;
149
150    fn to_bits(self) -> Self::SigType;
151
152    fn is_negative(bits: Self::SigType) -> bool {
153        (bits >> (Self::NUM_BITS - 1)) != Self::SigType::from(0)
154    }
155
156    fn get_sig(bits: Self::SigType) -> Self::SigType {
157        bits & (Self::IMPLICIT_BIT - Self::SigType::from(1))
158    }
159
160    fn get_exp(bits: Self::SigType) -> i32 {
161        (bits >> Self::NUM_SIG_BITS).into() as i32 & Self::EXP_MASK
162    }
163}
164
165impl FloatTraits for f32 {
166    const NUM_BITS: i32 = 32;
167    const IMPLICIT_BIT: u32 = 1 << Self::NUM_SIG_BITS;
168
169    type SigType = u32;
170
171    fn to_bits(self) -> Self::SigType {
172        self.to_bits()
173    }
174}
175
176impl FloatTraits for f64 {
177    const NUM_BITS: i32 = 64;
178    const IMPLICIT_BIT: u64 = 1 << Self::NUM_SIG_BITS;
179
180    type SigType = u64;
181
182    fn to_bits(self) -> Self::SigType {
183        self.to_bits()
184    }
185}
186
187struct Pow10SignificandsTable {
188    data: [u64; Self::NUM_POW10 * 2],
189}
190
191impl Pow10SignificandsTable {
192    const SPLIT_TABLES: bool = falsecfg!(target_arch = "aarch64");
193    const NUM_POW10: usize = 617;
194
195    unsafe fn get_unchecked(&self, dec_exp: i32) -> uint128 {
196        const DEC_EXP_MIN: i32 = -292;
197        if !Self::SPLIT_TABLES {
198            let index = ((dec_exp - DEC_EXP_MIN) * 2) as usize;
199            return uint128 {
200                hi: unsafe { *self.data.get_unchecked(index) },
201                lo: unsafe { *self.data.get_unchecked(index + 1) },
202            };
203        }
204
205        unsafe {
206            #[cfg_attr(
207                not(all(any(target_arch = "x86_64", target_arch = "aarch64"), not(miri))),
208                allow(unused_mut)
209            )]
210            let mut hi = self
211                .data
212                .as_ptr()
213                .offset(Self::NUM_POW10 as isize + DEC_EXP_MIN as isize - 1);
214            #[cfg_attr(
215                not(all(any(target_arch = "x86_64", target_arch = "aarch64"), not(miri))),
216                allow(unused_mut)
217            )]
218            let mut lo = hi.add(Self::NUM_POW10);
219
220            // Force indexed loads.
221            #[cfg(all(any(target_arch = "x86_64", target_arch = "aarch64"), not(miri)))]
222            asm!("/*{0}{1}*/", inout(reg) hi, inout(reg) lo);
223            uint128 {
224                hi: *hi.offset(-dec_exp as isize),
225                lo: *lo.offset(-dec_exp as isize),
226            }
227        }
228    }
229
230    #[cfg(test)]
231    fn get(&self, dec_exp: i32) -> uint128 {
232        const DEC_EXP_MIN: i32 = -292;
233        assert!((DEC_EXP_MIN..DEC_EXP_MIN + Self::NUM_POW10 as i32).contains(&dec_exp));
234        unsafe { self.get_unchecked(dec_exp) }
235    }
236}
237
238// 128-bit significands of powers of 10 rounded down.
239// Generated using 192-bit arithmetic method by Dougall Johnson.
240static POW10_SIGNIFICANDS: Pow10SignificandsTable = {
241    let mut data = [0; Pow10SignificandsTable::NUM_POW10 * 2];
242
243    struct uint192 {
244        w0: u64, // least significant
245        w1: u64,
246        w2: u64, // most significant
247    }
248
249    // first element, rounded up to cancel out rounding down in the
250    // multiplication, and minimise significant bits
251    let mut current = uint192 {
252        w0: 0xe000000000000000,
253        w1: 0x25e8e89c13bb0f7a,
254        w2: 0xff77b1fcbebcdc4f,
255    };
256    let ten = 0xa000000000000000;
257    let mut i = 0;
258    while i < Pow10SignificandsTable::NUM_POW10 {
259        if Pow10SignificandsTable::SPLIT_TABLES {
260            data[Pow10SignificandsTable::NUM_POW10 - i - 1] = current.w2;
261            data[Pow10SignificandsTable::NUM_POW10 * 2 - i - 1] = current.w1;
262        } else {
263            data[i * 2] = current.w2;
264            data[i * 2 + 1] = current.w1;
265        }
266
267        let h0: u64 = umul128_upper64(current.w0, ten);
268        let h1: u64 = umul128_upper64(current.w1, ten);
269
270        let c0: u64 = h0.wrapping_add(current.w1.wrapping_mul(ten));
271        let c1: u64 = ((c0 < h0) as u64 + h1).wrapping_add(current.w2.wrapping_mul(ten));
272        let c2: u64 = (c1 < h1) as u64 + umul128_upper64(current.w2, ten); // dodgy carry
273
274        // normalise
275        if (c2 >> 63) != 0 {
276            current = uint192 {
277                w0: c0,
278                w1: c1,
279                w2: c2,
280            };
281        } else {
282            current = uint192 {
283                w0: c0 << 1,
284                w1: c1 << 1 | c0 >> 63,
285                w2: c2 << 1 | c1 >> 63,
286            };
287        }
288
289        i += 1;
290    }
291
292    Pow10SignificandsTable { data }
293};
294
295// Computes the decimal exponent as floor(log10(2**bin_exp)) if regular or
296// floor(log10(3/4 * 2**bin_exp)) otherwise, without branching.
297const fn compute_dec_exp(bin_exp: i32, regular: bool) -> i32 {
298    if true {
    if !(bin_exp >= -1334 && bin_exp <= 2620) {
        ::core::panicking::panic("assertion failed: bin_exp >= -1334 && bin_exp <= 2620")
    };
};debug_assert!(bin_exp >= -1334 && bin_exp <= 2620);
299    // log10_3_over_4_sig = -log10(3/4) * 2**log10_2_exp rounded to a power of 2
300    const LOG10_3_OVER_4_SIG: i32 = 131_072;
301    // log10_2_sig = round(log10(2) * 2**log10_2_exp)
302    const LOG10_2_SIG: i32 = 315_653;
303    const LOG10_2_EXP: i32 = 20;
304    (bin_exp * LOG10_2_SIG - !regular as i32 * LOG10_3_OVER_4_SIG) >> LOG10_2_EXP
305}
306
307const fn do_compute_exp_shift(bin_exp: i32, dec_exp: i32) -> u8 {
308    if true {
    if !(dec_exp >= -350 && dec_exp <= 350) {
        ::core::panicking::panic("assertion failed: dec_exp >= -350 && dec_exp <= 350")
    };
};debug_assert!(dec_exp >= -350 && dec_exp <= 350);
309    // log2_pow10_sig = round(log2(10) * 2**log2_pow10_exp) + 1
310    const LOG2_POW10_SIG: i32 = 217_707;
311    const LOG2_POW10_EXP: i32 = 16;
312    // pow10_bin_exp = floor(log2(10**-dec_exp))
313    let pow10_bin_exp = (-dec_exp * LOG2_POW10_SIG) >> LOG2_POW10_EXP;
314    // pow10 = ((pow10_hi << 64) | pow10_lo) * 2**(pow10_bin_exp - 127)
315    (bin_exp + pow10_bin_exp + 1) as u8
316}
317
318struct ExpShiftTable {
319    data: [u8; if Self::ENABLE {
320        Self::NUM_EXPS as usize
321    } else {
322        1
323    }],
324}
325
326impl ExpShiftTable {
327    const ENABLE: bool = true;
328    const NUM_EXPS: i32 = f64::EXP_MASK + 1;
329    const OFFSET: i32 = f64::NUM_SIG_BITS + f64::EXP_BIAS;
330}
331
332static EXP_SHIFTS: ExpShiftTable = {
333    let mut data = [0u8; if ExpShiftTable::ENABLE {
334        ExpShiftTable::NUM_EXPS as usize
335    } else {
336        1
337    }];
338
339    if ExpShiftTable::ENABLE {
340        let mut raw_exp = 0;
341        while raw_exp < ExpShiftTable::NUM_EXPS {
342            let mut bin_exp = raw_exp - ExpShiftTable::OFFSET;
343            if raw_exp == 0 {
344                bin_exp += 1;
345            }
346            let dec_exp = compute_dec_exp(bin_exp, true);
347            data[raw_exp as usize] = do_compute_exp_shift(bin_exp, dec_exp) as u8;
348            raw_exp += 1;
349        }
350    }
351
352    ExpShiftTable { data }
353};
354
355// Computes a shift so that, after scaling by a power of 10, the intermediate
356// result always has a fixed 128-bit fractional part (for double).
357//
358// Different binary exponents can map to the same decimal exponent, but place
359// the decimal point at different bit positions. The shift compensates for this.
360//
361// For example, both 3 * 2**59 and 3 * 2**60 have dec_exp = 2, but dividing by
362// 10^dec_exp puts the decimal point in different bit positions:
363//   3 * 2**59 / 100 = 1.72...e+16  (needs shift = 1 + 1)
364//   3 * 2**60 / 100 = 3.45...e+16  (needs shift = 2 + 1)
365unsafe fn compute_exp_shift<UInt, const ONLY_REGULAR: bool>(bin_exp: i32, dec_exp: i32) -> u8
366where
367    UInt: traits::UInt,
368{
369    let num_bits = mem::size_of::<UInt>() * 8;
370    if num_bits == 64 && ExpShiftTable::ENABLE && ONLY_REGULAR {
371        unsafe {
372            *EXP_SHIFTS
373                .data
374                .as_ptr()
375                .add((bin_exp + ExpShiftTable::OFFSET) as usize)
376        }
377    } else {
378        do_compute_exp_shift(bin_exp, dec_exp)
379    }
380}
381
382#[cfg_attr(feature = "no-panic", no_panic)]
383fn count_trailing_nonzeros(x: u64) -> usize {
384    // We count the number of bytes until there are only zeros left.
385    // The code is equivalent to
386    //    8 - x.leading_zeros() / 8
387    // but if the BSR instruction is emitted (as gcc on x64 does with default
388    // settings), subtracting the constant before dividing allows the compiler
389    // to combine it with the subtraction which it inserts due to BSR counting
390    // in the opposite direction.
391    //
392    // Additionally, the BSR instruction requires a zero check. Since the high
393    // bit is unused we can avoid the zero check by shifting the datum left by
394    // one and inserting a sentinel bit at the end. This can be faster than the
395    // automatically inserted range check.
396    (70 - ((x.to_le() << 1) | 1).leading_zeros()) as usize / 8
397}
398
399// Align data since unaligned access may be slower when crossing a
400// hardware-specific boundary.
401#[repr(C, align(2))]
402struct Digits2([u8; 200]);
403
404static DIGITS2: Digits2 = Digits2(
405    *b"0001020304050607080910111213141516171819\
406       2021222324252627282930313233343536373839\
407       4041424344454647484950515253545556575859\
408       6061626364656667686970717273747576777879\
409       8081828384858687888990919293949596979899",
410);
411
412// Converts value in the range [0, 100) to a string. GCC generates a bit better
413// code when value is pointer-size (https://www.godbolt.org/z/5fEPMT1cc).
414#[cfg_attr(feature = "no-panic", no_panic)]
415unsafe fn digits2(value: usize) -> &'static u16 {
416    if true {
    if !(value < 100) {
        ::core::panicking::panic("assertion failed: value < 100")
    };
};debug_assert!(value < 100);
417
418    #[allow(clippy::cast_ptr_alignment)]
419    unsafe {
420        &*DIGITS2.0.as_ptr().cast::<u16>().add(value)
421    }
422}
423
424const DIV10K_EXP: i32 = 40;
425const DIV10K_SIG: u32 = ((1u64 << DIV10K_EXP) / 10000 + 1) as u32;
426const NEG10K: u32 = ((1u64 << 32) - 10000) as u32;
427const DIV100_EXP: i32 = 19;
428const DIV100_SIG: u32 = (1 << DIV100_EXP) / 100 + 1;
429const NEG100: u32 = (1 << 16) - 100;
430const DIV10_EXP: i32 = 10;
431const DIV10_SIG: u32 = (1 << DIV10_EXP) / 10 + 1;
432const NEG10: u32 = (1 << 8) - 10;
433
434const ZEROS: u64 = 0x0101010101010101 * b'0' as u64;
435
436#[cfg_attr(feature = "no-panic", no_panic)]
437fn to_bcd8(abcdefgh: u64) -> u64 {
438    // An optimization from Xiang JunBo.
439    // Three steps BCD. Base 10000 -> base 100 -> base 10.
440    // div and mod are evaluated simultaneously as, e.g.
441    //   (abcdefgh / 10000) << 32 + (abcdefgh % 10000)
442    //      == abcdefgh + (2**32 - 10000) * (abcdefgh / 10000)))
443    // where the division on the RHS is implemented by the usual multiply + shift
444    // trick and the fractional bits are masked away.
445    let abcd_efgh =
446        abcdefgh + u64::from(NEG10K) * ((abcdefgh * u64::from(DIV10K_SIG)) >> DIV10K_EXP);
447    let ab_cd_ef_gh = abcd_efgh
448        + u64::from(NEG100) * (((abcd_efgh * u64::from(DIV100_SIG)) >> DIV100_EXP) & 0x7f0000007f);
449    let a_b_c_d_e_f_g_h = ab_cd_ef_gh
450        + u64::from(NEG10)
451            * (((ab_cd_ef_gh * u64::from(DIV10_SIG)) >> DIV10_EXP) & 0xf000f000f000f);
452    a_b_c_d_e_f_g_h.to_be()
453}
454
455unsafe fn write_if(buffer: *mut u8, digit: u32, condition: bool) -> *mut u8 {
456    unsafe {
457        *buffer = b'0' + digit as u8;
458        buffer.add(usize::from(condition))
459    }
460}
461
462unsafe fn write8(buffer: *mut u8, value: u64) {
463    unsafe {
464        buffer.cast::<u64>().write_unaligned(value);
465    }
466}
467
468// Writes a significand consisting of up to 17 decimal digits (16-17 for
469// normals) and removes trailing zeros.
470#[cfg_attr(feature = "no-panic", no_panic)]
471unsafe fn write_significand17(mut buffer: *mut u8, value: u64, has17digits: bool) -> *mut u8 {
472    #[cfg(all(target_arch = "aarch64", target_feature = "neon", not(miri)))]
473    {
474        // An optimized version for NEON by Dougall Johnson.
475
476        use core::arch::aarch64::*;
477
478        const NEG10K: i32 = -10000 + 0x10000;
479
480        struct ToStringConstants {
481            mul_const: u64,
482            hundred_million: u64,
483            multipliers32: int32x4_t,
484            multipliers16: int16x8_t,
485        }
486
487        static CONSTANTS: ToStringConstants = ToStringConstants {
488            mul_const: 0xabcc77118461cefd,
489            hundred_million: 100000000,
490            multipliers32: unsafe {
491                mem::transmute::<[i32; 4], int32x4_t>([
492                    DIV10K_SIG as i32,
493                    NEG10K,
494                    (DIV100_SIG << 12) as i32,
495                    NEG100 as i32,
496                ])
497            },
498            multipliers16: unsafe {
499                mem::transmute::<[i16; 8], int16x8_t>([0xce0, NEG10 as i16, 0, 0, 0, 0, 0, 0])
500            },
501        };
502
503        let mut c = ptr::addr_of!(CONSTANTS);
504
505        // Compiler barrier, or clang doesn't load from memory and generates 15
506        // more instructions
507        let c = unsafe {
508            asm!("/*{0}*/", inout(reg) c);
509            &*c
510        };
511
512        let mut hundred_million = c.hundred_million;
513
514        // Compiler barrier, or clang narrows the load to 32-bit and unpairs it.
515        unsafe {
516            asm!("/*{0}*/", inout(reg) hundred_million);
517        }
518
519        // Equivalent to abbccddee = value / 100000000, ffgghhii = value % 100000000.
520        let abbccddee = (umul128(value, c.mul_const) >> 90) as u64;
521        let ffgghhii = value - abbccddee * hundred_million;
522
523        // We could probably make this bit faster, but we're preferring to
524        // reuse the constants for now.
525        let a = (umul128(abbccddee, c.mul_const) >> 90) as u64;
526        let bbccddee = abbccddee - a * hundred_million;
527
528        buffer = unsafe { write_if(buffer, a as u32, has17digits) };
529
530        unsafe {
531            let ffgghhii_bbccddee_64: uint64x1_t =
532                mem::transmute::<u64, uint64x1_t>((ffgghhii << 32) | bbccddee);
533            let bbccddee_ffgghhii: int32x2_t = vreinterpret_s32_u64(ffgghhii_bbccddee_64);
534
535            let bbcc_ffgg: int32x2_t = vreinterpret_s32_u32(vshr_n_u32(
536                vreinterpret_u32_s32(vqdmulh_n_s32(
537                    bbccddee_ffgghhii,
538                    mem::transmute::<int32x4_t, [i32; 4]>(c.multipliers32)[0],
539                )),
540                9,
541            ));
542            let ddee_bbcc_hhii_ffgg_32: int32x2_t = vmla_n_s32(
543                bbccddee_ffgghhii,
544                bbcc_ffgg,
545                mem::transmute::<int32x4_t, [i32; 4]>(c.multipliers32)[1],
546            );
547
548            let mut ddee_bbcc_hhii_ffgg: int32x4_t =
549                vreinterpretq_s32_u32(vshll_n_u16(vreinterpret_u16_s32(ddee_bbcc_hhii_ffgg_32), 0));
550
551            // Compiler barrier, or clang breaks the subsequent MLA into UADDW +
552            // MUL.
553            asm!("/*{:v}*/", inout(vreg) ddee_bbcc_hhii_ffgg);
554
555            let dd_bb_hh_ff: int32x4_t = vqdmulhq_n_s32(
556                ddee_bbcc_hhii_ffgg,
557                mem::transmute::<int32x4_t, [i32; 4]>(c.multipliers32)[2],
558            );
559            let ee_dd_cc_bb_ii_hh_gg_ff: int16x8_t = vreinterpretq_s16_s32(vmlaq_n_s32(
560                ddee_bbcc_hhii_ffgg,
561                dd_bb_hh_ff,
562                mem::transmute::<int32x4_t, [i32; 4]>(c.multipliers32)[3],
563            ));
564            let high_10s: int16x8_t = vqdmulhq_n_s16(
565                ee_dd_cc_bb_ii_hh_gg_ff,
566                mem::transmute::<int16x8_t, [i16; 8]>(c.multipliers16)[0],
567            );
568            let digits: uint8x16_t = vrev64q_u8(vreinterpretq_u8_s16(vmlaq_n_s16(
569                ee_dd_cc_bb_ii_hh_gg_ff,
570                high_10s,
571                mem::transmute::<int16x8_t, [i16; 8]>(c.multipliers16)[1],
572            )));
573            let str: uint16x8_t = vaddq_u16(
574                vreinterpretq_u16_u8(digits),
575                vreinterpretq_u16_s8(vdupq_n_s8(b'0' as i8)),
576            );
577
578            buffer.cast::<uint16x8_t>().write_unaligned(str);
579
580            let is_zero: uint16x8_t = vreinterpretq_u16_u8(vceqq_u8(digits, vdupq_n_u8(0)));
581            let zeros: u64 = !vget_lane_u64(vreinterpret_u64_u8(vshrn_n_u16(is_zero, 4)), 0);
582
583            buffer.add(16 - (zeros.leading_zeros() as usize >> 2))
584        }
585    }
586
587    #[cfg(all(target_arch = "x86_64", target_feature = "sse2", not(miri)))]
588    {
589        use crate::stdarch_x86::*;
590
591        let abbccddee = (value / 100_000_000) as u32;
592        let ffgghhii = (value % 100_000_000) as u32;
593        let a = abbccddee / 100_000_000;
594        let bbccddee = abbccddee % 100_000_000;
595
596        buffer = unsafe { write_if(buffer, a, has17digits) };
597
598        #[repr(C, align(64))]
599        struct C {
600            div10k: __m128i,
601            divmod10k: __m128i,
602            div100: __m128i,
603            divmod100: __m128i,
604            div10: __m128i,
605            #[cfg(target_feature = "sse4.1")]
606            neg10: __m128i,
607            #[cfg(target_feature = "sse4.1")]
608            bswap: __m128i,
609            #[cfg(not(target_feature = "sse4.1"))]
610            hundred: __m128i,
611            #[cfg(not(target_feature = "sse4.1"))]
612            moddiv10: __m128i,
613            zeros: __m128i,
614        }
615
616        static C: C = C {
617            div10k: _mm_set1_epi64x(DIV10K_SIG as i64),
618            divmod10k: _mm_set1_epi64x(NEG10K as i64),
619            div100: _mm_set1_epi32(DIV100_SIG as i32),
620            divmod100: _mm_set1_epi32(NEG100 as i32),
621            div10: _mm_set1_epi16(((1i32 << 16) / 10 + 1) as i16),
622            #[cfg(target_feature = "sse4.1")]
623            neg10: _mm_set1_epi16((1 << 8) - 10),
624            #[cfg(target_feature = "sse4.1")]
625            bswap: _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15),
626            #[cfg(not(target_feature = "sse4.1"))]
627            hundred: _mm_set1_epi32(100),
628            #[cfg(not(target_feature = "sse4.1"))]
629            moddiv10: _mm_set1_epi16(10 * (1 << 8) - 1),
630            zeros: _mm_set1_epi64x(ZEROS as i64),
631        };
632
633        // The BCD sequences are based on ones provided by Xiang JunBo.
634        unsafe {
635            let x: __m128i = _mm_set_epi64x(i64::from(bbccddee), i64::from(ffgghhii));
636            let y: __m128i = _mm_add_epi64(
637                x,
638                _mm_mul_epu32(
639                    C.divmod10k,
640                    _mm_srli_epi64(_mm_mul_epu32(x, C.div10k), DIV10K_EXP),
641                ),
642            );
643
644            #[cfg(target_feature = "sse4.1")]
645            let bcd: __m128i = {
646                // _mm_mullo_epi32 is SSE 4.1
647                let z: __m128i = _mm_add_epi64(
648                    y,
649                    _mm_mullo_epi32(C.divmod100, _mm_srli_epi32(_mm_mulhi_epu16(y, C.div100), 3)),
650                );
651                let big_endian_bcd: __m128i =
652                    _mm_add_epi64(z, _mm_mullo_epi16(C.neg10, _mm_mulhi_epu16(z, C.div10)));
653                // SSSE3
654                _mm_shuffle_epi8(big_endian_bcd, C.bswap)
655            };
656
657            #[cfg(not(target_feature = "sse4.1"))]
658            let bcd: __m128i = {
659                let y_div_100: __m128i = _mm_srli_epi16(_mm_mulhi_epu16(y, C.div100), 3);
660                let y_mod_100: __m128i = _mm_sub_epi16(y, _mm_mullo_epi16(y_div_100, C.hundred));
661                let z: __m128i = _mm_or_si128(_mm_slli_epi32(y_mod_100, 16), y_div_100);
662                let bcd_shuffled: __m128i = _mm_sub_epi16(
663                    _mm_slli_epi16(z, 8),
664                    _mm_mullo_epi16(C.moddiv10, _mm_mulhi_epu16(z, C.div10)),
665                );
666                _mm_shuffle_epi32(bcd_shuffled, _MM_SHUFFLE(0, 1, 2, 3))
667            };
668
669            let digits = _mm_or_si128(bcd, C.zeros);
670
671            // determine number of leading zeros
672            let mask128: __m128i = _mm_cmpgt_epi8(bcd, _mm_setzero_si128());
673            let mask = _mm_movemask_epi8(mask128) as u16;
674            let len = 32 - u32::from(mask).leading_zeros();
675
676            _mm_storeu_si128(buffer.cast::<__m128i>(), digits);
677            buffer.add(len as usize)
678        }
679    }
680
681    #[cfg(not(any(
682        all(target_arch = "aarch64", target_feature = "neon", not(miri)),
683        all(target_arch = "x86_64", target_feature = "sse2", not(miri)),
684    )))]
685    {
686        // Each digits is denoted by a letter so value is abbccddeeffgghhii.
687        let abbccddee = (value / 100_000_000) as u32;
688        let ffgghhii = (value % 100_000_000) as u32;
689        buffer = unsafe { write_if(buffer, abbccddee / 100_000_000, has17digits) };
690        let bcd = to_bcd8(u64::from(abbccddee % 100_000_000));
691        unsafe {
692            write8(buffer, bcd | ZEROS);
693        }
694        if ffgghhii == 0 {
695            return unsafe { buffer.add(count_trailing_nonzeros(bcd)) };
696        }
697        let bcd = to_bcd8(u64::from(ffgghhii));
698        unsafe {
699            write8(buffer.add(8), bcd | ZEROS);
700            buffer.add(8).add(count_trailing_nonzeros(bcd))
701        }
702    }
703}
704
705// Writes a significand consisting of up to 9 decimal digits (8-9 for normals)
706// and removes trailing zeros.
707#[cfg_attr(feature = "no-panic", no_panic)]
708unsafe fn write_significand9(mut buffer: *mut u8, value: u32, has9digits: bool) -> *mut u8 {
709    buffer = unsafe { write_if(buffer, value / 100_000_000, has9digits) };
710    let bcd = to_bcd8(u64::from(value % 100_000_000));
711    unsafe {
712        write8(buffer, bcd | ZEROS);
713        buffer.add(count_trailing_nonzeros(bcd))
714    }
715}
716
717fn normalize<UInt>(mut dec: dec_fp, subnormal: bool) -> dec_fp
718where
719    UInt: traits::UInt,
720{
721    if !subnormal {
722        return dec;
723    }
724    let num_bits = mem::size_of::<UInt>() * 8;
725    while dec.sig
726        < if num_bits == 64 {
727            10_000_000_000_000_000
728        } else {
729            100_000_000
730        }
731    {
732        dec.sig *= 10;
733        dec.exp -= 1;
734    }
735    dec
736}
737
738// Converts a binary FP number bin_sig * 2**bin_exp to the shortest decimal
739// representation, where bin_exp = raw_exp - num_sig_bits - exp_bias.
740#[cfg_attr(feature = "no-panic", no_panic)]
741fn to_decimal<Float, UInt>(bin_sig: UInt, raw_exp: i32, regular: bool, subnormal: bool) -> dec_fp
742where
743    Float: FloatTraits,
744    UInt: traits::UInt,
745{
746    let mut bin_exp = raw_exp - Float::NUM_SIG_BITS - Float::EXP_BIAS;
747    let num_bits = mem::size_of::<UInt>() as i32 * 8;
748    // An optimization from yy by Yaoyuan Guo:
749    while regular && !subnormal {
750        let dec_exp = compute_dec_exp(bin_exp, true);
751        let exp_shift = unsafe { compute_exp_shift::<UInt, true>(bin_exp, dec_exp) };
752        let pow10 = unsafe { POW10_SIGNIFICANDS.get_unchecked(-dec_exp) };
753
754        let integral; // integral part of bin_sig * pow10
755        let fractional; // fractional part of bin_sig * pow10
756        if num_bits == 64 {
757            let p = umul192_upper128(pow10.hi, pow10.lo, (bin_sig << exp_shift).into());
758            integral = UInt::truncate(p.hi);
759            fractional = p.lo;
760        } else {
761            let p = umul128(pow10.hi, (bin_sig << exp_shift).into());
762            integral = UInt::truncate((p >> 64) as u64);
763            fractional = p as u64;
764        }
765        const HALF_ULP: u64 = 1 << 63;
766
767        // Exact half-ulp tie when rounding to nearest integer.
768        if fractional == HALF_ULP {
769            break;
770        }
771
772        #[cfg(all(any(target_arch = "aarch64", target_arch = "x86_64"), not(miri)))]
773        let digit = {
774            // An optimization of integral % 10 by Dougall Johnson. Relies on
775            // range calculation: (max_bin_sig << max_exp_shift) * max_u128.
776            let quo10 = ((u128::from(integral.into()) * ((1 << 64) / 10 + 1)) >> 64) as u64;
777            let mut digit = integral.into() - quo10 * 10;
778            unsafe {
779                asm!("/*{0}*/", inout(reg) digit); // or it narrows to 32-bit and doesn't use madd/msub
780            }
781            digit
782        };
783        #[cfg(not(all(any(target_arch = "aarch64", target_arch = "x86_64"), not(miri))))]
784        let digit = integral.into() % 10;
785
786        // Switch to a fixed-point representation with the least significant
787        // integral digit in the upper bits and fractional digits in the lower
788        // bits.
789        let num_integral_bits = if num_bits == 64 { 4 } else { 32 };
790        let num_fractional_bits = 64 - num_integral_bits;
791        let ten = 10u64 << num_fractional_bits;
792        // Fixed-point remainder of the scaled significand modulo 10.
793        let scaled_sig_mod10 = (digit << num_fractional_bits) | (fractional >> num_integral_bits);
794
795        // scaled_half_ulp = 0.5 * pow10 in the fixed-point format.
796        // dec_exp is chosen so that 10**dec_exp <= 2**bin_exp < 10**(dec_exp + 1).
797        // Since 1ulp == 2**bin_exp it will be in the range [1, 10) after scaling
798        // by 10**dec_exp. Add 1 to combine the shift with division by two.
799        let scaled_half_ulp = pow10.hi >> (num_integral_bits - exp_shift + 1);
800        let upper = scaled_sig_mod10 + scaled_half_ulp;
801
802        // value = 5.0507837461e-27
803        // next  = 5.0507837461000010e-27
804        //
805        // c = integral.fractional' = 50507837461000003.153987... (value)
806        //                            50507837461000010.328635... (next)
807        //          scaled_half_ulp =                 3.587324...
808        //
809        // fractional' = fractional / 2**64, fractional = 2840565642863009226
810        //
811        //      50507837461000000       c               upper     50507837461000010
812        //              s              l|   L             |               S
813        // ───┬────┬────┼────┬────┬────┼*-──┼────┬────┬───*┬────┬────┬────┼-*--┬───
814        //    8    9    0    1    2    3    4    5    6    7    8    9    0 |  1
815        //            └─────────────────┼─────────────────┘                next
816        //                             1ulp
817        //
818        // s - shorter underestimate, S - shorter overestimate
819        // l - longer underestimate,  L - longer overestimate
820
821        // Check for boundary case when rounding down to nearest 10 and
822        // near-boundary case when rounding up to nearest 10.
823        if scaled_sig_mod10 == scaled_half_ulp
824            // Case where upper == ten is insufficient: 1.342178e+08f.
825            // upper == ten || upper == ten - 1
826            || ten.wrapping_sub(upper) <= 1
827        {
828            break;
829        }
830
831        let round_up = upper >= ten;
832        let shorter = (integral.into() - digit + u64::from(round_up) * 10) as i64;
833        let longer = (integral.into() + u64::from(fractional >= HALF_ULP)) as i64;
834        let use_shorter = scaled_sig_mod10 <= scaled_half_ulp || round_up;
835        return dec_fp {
836            #[cfg(zmij_no_select_unpredictable)]
837            sig: if use_shorter { shorter } else { longer },
838            #[cfg(not(zmij_no_select_unpredictable))]
839            sig: hint::select_unpredictable(use_shorter, shorter, longer),
840            exp: dec_exp,
841        };
842    }
843    bin_exp += i32::from(subnormal);
844
845    let dec_exp = compute_dec_exp(bin_exp, regular);
846    let exp_shift = unsafe { compute_exp_shift::<UInt, false>(bin_exp, dec_exp) };
847    let mut pow10 = unsafe { POW10_SIGNIFICANDS.get_unchecked(-dec_exp) };
848
849    // Fallback to Schubfach to guarantee correctness in boundary cases. This
850    // requires switching to strict overestimates of powers of 10.
851    if num_bits == 64 {
852        pow10.lo += 1;
853    } else {
854        pow10.hi += 1;
855    }
856
857    // Shift the significand so that boundaries are integer.
858    const BOUND_SHIFT: u32 = 2;
859    let bin_sig_shifted = bin_sig << BOUND_SHIFT;
860
861    // Compute the estimates of lower and upper bounds of the rounding interval
862    // by multiplying them by the power of 10 and applying modified rounding.
863    let lsb = bin_sig & UInt::from(1);
864    let lower = (bin_sig_shifted - (UInt::from(regular) + UInt::from(1))) << exp_shift;
865    let lower = umul_upper_inexact_to_odd(pow10.hi, pow10.lo, lower) + lsb;
866    let upper = (bin_sig_shifted + UInt::from(2)) << exp_shift;
867    let upper = umul_upper_inexact_to_odd(pow10.hi, pow10.lo, upper) - lsb;
868
869    // The idea of using a single shorter candidate is by Cassio Neri.
870    // It is less or equal to the upper bound by construction.
871    let shorter = UInt::from(10) * ((upper >> BOUND_SHIFT) / UInt::from(10));
872    if (shorter << BOUND_SHIFT) >= lower {
873        return normalize::<UInt>(
874            dec_fp {
875                sig: shorter.into() as i64,
876                exp: dec_exp,
877            },
878            subnormal,
879        );
880    }
881
882    let scaled_sig = umul_upper_inexact_to_odd(pow10.hi, pow10.lo, bin_sig_shifted << exp_shift);
883    let longer_below = scaled_sig >> BOUND_SHIFT;
884    let longer_above = longer_below + UInt::from(1);
885
886    // Pick the closest of longer_below and longer_above and check if it's in
887    // the rounding interval.
888    let cmp = scaled_sig
889        .wrapping_sub((longer_below + longer_above) << 1)
890        .to_signed();
891    let below_closer = cmp < UInt::from(0).to_signed()
892        || (cmp == UInt::from(0).to_signed() && (longer_below & UInt::from(1)) == UInt::from(0));
893    let below_in = (longer_below << BOUND_SHIFT) >= lower;
894    let dec_sig = if below_closer & below_in {
895        longer_below
896    } else {
897        longer_above
898    };
899    normalize::<UInt>(
900        dec_fp {
901            sig: dec_sig.into() as i64,
902            exp: dec_exp,
903        },
904        subnormal,
905    )
906}
907
908/// Writes the shortest correctly rounded decimal representation of `value` to
909/// `buffer`. `buffer` should point to a buffer of size `buffer_size` or larger.
910#[cfg_attr(feature = "no-panic", no_panic)]
911unsafe fn write<Float>(value: Float, mut buffer: *mut u8) -> *mut u8
912where
913    Float: FloatTraits,
914{
915    let bits = value.to_bits();
916    // It is beneficial to extract exponent and significand early.
917    let bin_exp = Float::get_exp(bits); // binary exponent
918    let mut bin_sig = Float::get_sig(bits); // binary significand
919
920    unsafe {
921        *buffer = b'-';
922    }
923    buffer = unsafe { buffer.add(usize::from(Float::is_negative(bits))) };
924
925    let special = bin_exp == 0;
926    let regular = (bin_sig != Float::SigType::from(0)) | special; // | special slightly improves perf.
927    if special {
928        if bin_sig == Float::SigType::from(0) {
929            return unsafe {
930                *buffer = b'0';
931                *buffer.add(1) = b'.';
932                *buffer.add(2) = b'0';
933                buffer.add(3)
934            };
935        }
936        bin_sig |= Float::IMPLICIT_BIT;
937    }
938    bin_sig ^= Float::IMPLICIT_BIT;
939
940    // Here be 🐉s.
941    let mut dec = to_decimal::<Float, Float::SigType>(bin_sig, bin_exp, regular, special);
942    let mut dec_exp = dec.exp;
943
944    // Write significand.
945    let end = if Float::NUM_BITS == 64 {
946        let has17digits = dec.sig >= 10_000_000_000_000_000;
947        dec_exp += Float::MAX_DIGITS10 as i32 - 2 + i32::from(has17digits);
948        unsafe { write_significand17(buffer.add(1), dec.sig as u64, has17digits) }
949    } else {
950        if dec.sig < 10_000_000 {
951            dec.sig *= 10;
952            dec_exp -= 1;
953        }
954        let has9digits = dec.sig >= 100_000_000;
955        dec_exp += Float::MAX_DIGITS10 as i32 - 2 + i32::from(has9digits);
956        unsafe { write_significand9(buffer.add(1), dec.sig as u32, has9digits) }
957    };
958
959    let length = unsafe { end.offset_from(buffer.add(1)) } as usize;
960
961    if Float::NUM_BITS == 32 && (-6..=12).contains(&dec_exp)
962        || Float::NUM_BITS == 64 && (-5..=15).contains(&dec_exp)
963    {
964        if length as i32 - 1 <= dec_exp {
965            // 1234e7 -> 12340000000.0
966            return unsafe {
967                ptr::copy(buffer.add(1), buffer, length);
968                ptr::write_bytes(buffer.add(length), b'0', dec_exp as usize + 3 - length);
969                *buffer.add(dec_exp as usize + 1) = b'.';
970                buffer.add(dec_exp as usize + 3)
971            };
972        } else if 0 <= dec_exp {
973            // 1234e-2 -> 12.34
974            return unsafe {
975                ptr::copy(buffer.add(1), buffer, dec_exp as usize + 1);
976                *buffer.add(dec_exp as usize + 1) = b'.';
977                buffer.add(length + 1)
978            };
979        } else {
980            // 1234e-6 -> 0.001234
981            return unsafe {
982                ptr::copy(buffer.add(1), buffer.add((1 - dec_exp) as usize), length);
983                ptr::write_bytes(buffer, b'0', (1 - dec_exp) as usize);
984                *buffer.add(1) = b'.';
985                buffer.add((1 - dec_exp) as usize + length)
986            };
987        }
988    }
989
990    unsafe {
991        // 1234e30 -> 1.234e33
992        *buffer = *buffer.add(1);
993        *buffer.add(1) = b'.';
994    }
995    buffer = unsafe { buffer.add(length + usize::from(length > 1)) };
996
997    // Write exponent.
998    let sign_ptr = buffer;
999    let e_sign = if dec_exp >= 0 {
1000        (u16::from(b'+') << 8) | u16::from(b'e')
1001    } else {
1002        (u16::from(b'-') << 8) | u16::from(b'e')
1003    };
1004    buffer = unsafe { buffer.add(1) };
1005    dec_exp = if dec_exp >= 0 { dec_exp } else { -dec_exp };
1006    buffer = unsafe { buffer.add(usize::from(dec_exp >= 10)) };
1007    if Float::MIN_10_EXP >= -99 && Float::MAX_10_EXP <= 99 {
1008        unsafe {
1009            buffer
1010                .cast::<u16>()
1011                .write_unaligned(*digits2(dec_exp as usize));
1012            sign_ptr.cast::<u16>().write_unaligned(e_sign.to_le());
1013            return buffer.add(2);
1014        }
1015    }
1016
1017    let digit = if falsecfg!(all(target_vendor = "apple", target_arch = "aarch64")) {
1018        // Use mulhi to divide by 100.
1019        ((dec_exp as u128 * 0x290000000000000) >> 64) as u32
1020    } else {
1021        // div100_exp=19 is faster or equal to 12 even for 3 digits.
1022        (dec_exp as u32 * DIV100_SIG) >> DIV100_EXP // value / 100
1023    };
1024    unsafe {
1025        *buffer = b'0' + digit as u8;
1026    }
1027    buffer = unsafe { buffer.add(usize::from(dec_exp >= 100)) };
1028    unsafe {
1029        buffer
1030            .cast::<u16>()
1031            .write_unaligned(*digits2((dec_exp as u32 - digit * 100) as usize));
1032        sign_ptr.cast::<u16>().write_unaligned(e_sign.to_le());
1033        buffer.add(2)
1034    }
1035}
1036
1037/// Safe API for formatting floating point numbers to text.
1038///
1039/// ## Example
1040///
1041/// ```
1042/// let mut buffer = zmij::Buffer::new();
1043/// let printed = buffer.format_finite(1.234);
1044/// assert_eq!(printed, "1.234");
1045/// ```
1046pub struct Buffer {
1047    bytes: [MaybeUninit<u8>; BUFFER_SIZE],
1048}
1049
1050impl Buffer {
1051    /// This is a cheap operation; you don't need to worry about reusing buffers
1052    /// for efficiency.
1053    #[inline]
1054    #[cfg_attr(feature = "no-panic", no_panic)]
1055    pub fn new() -> Self {
1056        let bytes = [MaybeUninit::<u8>::uninit(); BUFFER_SIZE];
1057        Buffer { bytes }
1058    }
1059
1060    /// Print a floating point number into this buffer and return a reference to
1061    /// its string representation within the buffer.
1062    ///
1063    /// # Special cases
1064    ///
1065    /// This function formats NaN as the string "NaN", positive infinity as
1066    /// "inf", and negative infinity as "-inf" to match std::fmt.
1067    ///
1068    /// If your input is known to be finite, you may get better performance by
1069    /// calling the `format_finite` method instead of `format` to avoid the
1070    /// checks for special cases.
1071    #[cfg_attr(feature = "no-panic", no_panic)]
1072    pub fn format<F: Float>(&mut self, f: F) -> &str {
1073        if f.is_nonfinite() {
1074            f.format_nonfinite()
1075        } else {
1076            self.format_finite(f)
1077        }
1078    }
1079
1080    /// Print a floating point number into this buffer and return a reference to
1081    /// its string representation within the buffer.
1082    ///
1083    /// # Special cases
1084    ///
1085    /// This function **does not** check for NaN or infinity. If the input
1086    /// number is not a finite float, the printed representation will be some
1087    /// correctly formatted but unspecified numerical value.
1088    ///
1089    /// Please check [`is_finite`] yourself before calling this function, or
1090    /// check [`is_nan`] and [`is_infinite`] and handle those cases yourself.
1091    ///
1092    /// [`is_finite`]: f64::is_finite
1093    /// [`is_nan`]: f64::is_nan
1094    /// [`is_infinite`]: f64::is_infinite
1095    #[cfg_attr(feature = "no-panic", no_panic)]
1096    pub fn format_finite<F: Float>(&mut self, f: F) -> &str {
1097        unsafe {
1098            let end = f.write_to_zmij_buffer(self.bytes.as_mut_ptr().cast::<u8>());
1099            let len = end.offset_from(self.bytes.as_ptr().cast::<u8>()) as usize;
1100            let slice = slice::from_raw_parts(self.bytes.as_ptr().cast::<u8>(), len);
1101            str::from_utf8_unchecked(slice)
1102        }
1103    }
1104}
1105
1106/// A floating point number, f32 or f64, that can be written into a
1107/// [`zmij::Buffer`][Buffer].
1108///
1109/// This trait is sealed and cannot be implemented for types outside of the
1110/// `zmij` crate.
1111#[allow(unknown_lints)] // rustc older than 1.74
1112#[allow(private_bounds)]
1113pub trait Float: private::Sealed {}
1114impl Float for f32 {}
1115impl Float for f64 {}
1116
1117mod private {
1118    pub trait Sealed: crate::traits::Float {
1119        fn is_nonfinite(self) -> bool;
1120        fn format_nonfinite(self) -> &'static str;
1121        unsafe fn write_to_zmij_buffer(self, buffer: *mut u8) -> *mut u8;
1122    }
1123
1124    impl Sealed for f32 {
1125        #[inline]
1126        fn is_nonfinite(self) -> bool {
1127            const EXP_MASK: u32 = 0x7f800000;
1128            let bits = self.to_bits();
1129            bits & EXP_MASK == EXP_MASK
1130        }
1131
1132        #[cold]
1133        #[cfg_attr(feature = "no-panic", inline)]
1134        fn format_nonfinite(self) -> &'static str {
1135            const MANTISSA_MASK: u32 = 0x007fffff;
1136            const SIGN_MASK: u32 = 0x80000000;
1137            let bits = self.to_bits();
1138            if bits & MANTISSA_MASK != 0 {
1139                crate::NAN
1140            } else if bits & SIGN_MASK != 0 {
1141                crate::NEG_INFINITY
1142            } else {
1143                crate::INFINITY
1144            }
1145        }
1146
1147        #[cfg_attr(feature = "no-panic", inline)]
1148        unsafe fn write_to_zmij_buffer(self, buffer: *mut u8) -> *mut u8 {
1149            unsafe { crate::write(self, buffer) }
1150        }
1151    }
1152
1153    impl Sealed for f64 {
1154        #[inline]
1155        fn is_nonfinite(self) -> bool {
1156            const EXP_MASK: u64 = 0x7ff0000000000000;
1157            let bits = self.to_bits();
1158            bits & EXP_MASK == EXP_MASK
1159        }
1160
1161        #[cold]
1162        #[cfg_attr(feature = "no-panic", inline)]
1163        fn format_nonfinite(self) -> &'static str {
1164            const MANTISSA_MASK: u64 = 0x000fffffffffffff;
1165            const SIGN_MASK: u64 = 0x8000000000000000;
1166            let bits = self.to_bits();
1167            if bits & MANTISSA_MASK != 0 {
1168                crate::NAN
1169            } else if bits & SIGN_MASK != 0 {
1170                crate::NEG_INFINITY
1171            } else {
1172                crate::INFINITY
1173            }
1174        }
1175
1176        #[cfg_attr(feature = "no-panic", inline)]
1177        unsafe fn write_to_zmij_buffer(self, buffer: *mut u8) -> *mut u8 {
1178            unsafe { crate::write(self, buffer) }
1179        }
1180    }
1181}
1182
1183impl Default for Buffer {
1184    #[inline]
1185    #[cfg_attr(feature = "no-panic", no_panic)]
1186    fn default() -> Self {
1187        Buffer::new()
1188    }
1189}