ryu_ecmascript/
f2s.rs

1// Translated from C to Rust. The original C code can be found at
2// https://github.com/ulfjack/ryu and carries the following license:
3//
4// Copyright 2018 Ulf Adams
5//
6// The contents of this file may be used under the terms of the Apache License,
7// Version 2.0.
8//
9//    (See accompanying file LICENSE-Apache or copy at
10//     http://www.apache.org/licenses/LICENSE-2.0)
11//
12// Alternatively, the contents of this file may be used under the terms of
13// the Boost Software License, Version 1.0.
14//    (See accompanying file LICENSE-Boost or copy at
15//     https://www.boost.org/LICENSE_1_0.txt)
16//
17// Unless required by applicable law or agreed to in writing, this software
18// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
19// KIND, either express or implied.
20
21use core::{mem, ptr};
22
23use common::*;
24use digit_table::*;
25
26#[cfg(feature = "no-panic")]
27use no_panic::no_panic;
28
29pub const FLOAT_MANTISSA_BITS: u32 = 23;
30pub const FLOAT_EXPONENT_BITS: u32 = 8;
31
32const FLOAT_POW5_INV_BITCOUNT: i32 = 59;
33const FLOAT_POW5_BITCOUNT: i32 = 61;
34
35// This table is generated by PrintFloatLookupTable.
36static FLOAT_POW5_INV_SPLIT: [u64; 32] = [576460752303423489,
37                                          461168601842738791,
38                                          368934881474191033,
39                                          295147905179352826,
40                                          472236648286964522,
41                                          377789318629571618,
42                                          302231454903657294,
43                                          483570327845851670,
44                                          386856262276681336,
45                                          309485009821345069,
46                                          495176015714152110,
47                                          396140812571321688,
48                                          316912650057057351,
49                                          507060240091291761,
50                                          405648192073033409,
51                                          324518553658426727,
52                                          519229685853482763,
53                                          415383748682786211,
54                                          332306998946228969,
55                                          531691198313966350,
56                                          425352958651173080,
57                                          340282366920938464,
58                                          544451787073501542,
59                                          435561429658801234,
60                                          348449143727040987,
61                                          557518629963265579,
62                                          446014903970612463,
63                                          356811923176489971,
64                                          570899077082383953,
65                                          456719261665907162,
66                                          365375409332725730,
67                                          1 << 63];
68
69static FLOAT_POW5_SPLIT: [u64; 47] = [1152921504606846976,
70                                      1441151880758558720,
71                                      1801439850948198400,
72                                      2251799813685248000,
73                                      1407374883553280000,
74                                      1759218604441600000,
75                                      2199023255552000000,
76                                      1374389534720000000,
77                                      1717986918400000000,
78                                      2147483648000000000,
79                                      1342177280000000000,
80                                      1677721600000000000,
81                                      2097152000000000000,
82                                      1310720000000000000,
83                                      1638400000000000000,
84                                      2048000000000000000,
85                                      1280000000000000000,
86                                      1600000000000000000,
87                                      2000000000000000000,
88                                      1250000000000000000,
89                                      1562500000000000000,
90                                      1953125000000000000,
91                                      1220703125000000000,
92                                      1525878906250000000,
93                                      1907348632812500000,
94                                      1192092895507812500,
95                                      1490116119384765625,
96                                      1862645149230957031,
97                                      1164153218269348144,
98                                      1455191522836685180,
99                                      1818989403545856475,
100                                      2273736754432320594,
101                                      1421085471520200371,
102                                      1776356839400250464,
103                                      2220446049250313080,
104                                      1387778780781445675,
105                                      1734723475976807094,
106                                      2168404344971008868,
107                                      1355252715606880542,
108                                      1694065894508600678,
109                                      2117582368135750847,
110                                      1323488980084844279,
111                                      1654361225106055349,
112                                      2067951531382569187,
113                                      1292469707114105741,
114                                      1615587133892632177,
115                                      2019483917365790221];
116
117#[cfg_attr(feature = "no-panic", inline)]
118fn pow5_factor(mut value: u32) -> u32 {
119    let mut count = 0u32;
120    loop {
121        debug_assert!(value != 0);
122        let q = value / 5;
123        let r = value % 5;
124        if r != 0 {
125            break;
126        }
127        value = q;
128        count += 1;
129    }
130    count
131}
132
133// Returns true if value is divisible by 5^p.
134#[cfg_attr(feature = "no-panic", inline)]
135fn multiple_of_power_of_5(value: u32, p: u32) -> bool {
136    pow5_factor(value) >= p
137}
138
139// Returns true if value is divisible by 2^p.
140#[cfg_attr(feature = "no-panic", inline)]
141fn multiple_of_power_of_2(value: u32, p: u32) -> bool {
142    // return __builtin_ctz(value) >= p;
143    (value & ((1u32 << p) - 1)) == 0
144}
145
146// It seems to be slightly faster to avoid uint128_t here, although the
147// generated code for uint128_t looks slightly nicer.
148#[cfg_attr(feature = "no-panic", inline)]
149fn mul_shift(m: u32, factor: u64, shift: i32) -> u32 {
150    debug_assert!(shift > 32);
151
152    // The casts here help MSVC to avoid calls to the __allmul library
153    // function.
154    let factor_lo = factor as u32;
155    let factor_hi = (factor >> 32) as u32;
156    let bits0 = m as u64 * factor_lo as u64;
157    let bits1 = m as u64 * factor_hi as u64;
158
159    let sum = (bits0 >> 32) + bits1;
160    let shifted_sum = sum >> (shift - 32);
161    debug_assert!(shifted_sum <= u32::max_value() as u64);
162    shifted_sum as u32
163}
164
165#[cfg_attr(feature = "no-panic", inline)]
166fn mul_pow5_inv_div_pow2(m: u32, q: u32, j: i32) -> u32 {
167    debug_assert!(q < FLOAT_POW5_INV_SPLIT.len() as u32);
168    unsafe { mul_shift(m, *FLOAT_POW5_INV_SPLIT.get_unchecked(q as usize), j) }
169}
170
171#[cfg_attr(feature = "no-panic", inline)]
172fn mul_pow5_div_pow2(m: u32, i: u32, j: i32) -> u32 {
173    debug_assert!(i < FLOAT_POW5_SPLIT.len() as u32);
174    unsafe { mul_shift(m, *FLOAT_POW5_SPLIT.get_unchecked(i as usize), j) }
175}
176
177#[cfg_attr(feature = "no-panic", inline)]
178pub fn decimal_length(v: u32) -> u32 {
179    // Function precondition: v is not a 10-digit number.
180    // (9 digits are sufficient for round-tripping.)
181    debug_assert!(v < 1000000000);
182
183    if v >= 100000000 {
184        9
185    } else if v >= 10000000 {
186        8
187    } else if v >= 1000000 {
188        7
189    } else if v >= 100000 {
190        6
191    } else if v >= 10000 {
192        5
193    } else if v >= 1000 {
194        4
195    } else if v >= 100 {
196        3
197    } else if v >= 10 {
198        2
199    } else {
200        1
201    }
202}
203
204// A floating decimal representing m * 10^e.
205pub struct FloatingDecimal32 {
206    pub mantissa: u32,
207    pub exponent: i32,
208}
209
210#[cfg_attr(feature = "no-panic", inline)]
211pub fn f2d(ieee_mantissa: u32, ieee_exponent: u32) -> FloatingDecimal32 {
212    let bias = (1u32 << (FLOAT_EXPONENT_BITS - 1)) - 1;
213
214    let (e2, m2) = if ieee_exponent == 0 {
215        (// We subtract 2 so that the bounds computation has 2 additional bits.
216         1 - bias as i32 - FLOAT_MANTISSA_BITS as i32 - 2,
217         ieee_mantissa)
218    } else {
219        (ieee_exponent as i32 - bias as i32 - FLOAT_MANTISSA_BITS as i32 - 2,
220         (1u32 << FLOAT_MANTISSA_BITS) | ieee_mantissa)
221    };
222    let even = (m2 & 1) == 0;
223    let accept_bounds = even;
224
225    // Step 2: Determine the interval of legal decimal representations.
226    let mv = 4 * m2;
227    let mp = 4 * m2 + 2;
228    // Implicit bool -> int conversion. True is 1, false is 0.
229    let mm_shift = (ieee_mantissa != 0 || ieee_exponent <= 1) as u32;
230    let mm = 4 * m2 - 1 - mm_shift;
231
232    // Step 3: Convert to a decimal power base using 64-bit arithmetic.
233    let mut vr: u32;
234    let mut vp: u32;
235    let mut vm: u32;
236    let e10: i32;
237    let mut vm_is_trailing_zeros = false;
238    let mut vr_is_trailing_zeros = false;
239    let mut last_removed_digit = 0u8;
240    if e2 >= 0 {
241        let q = log10_pow2(e2) as u32;
242        e10 = q as i32;
243        let k = FLOAT_POW5_INV_BITCOUNT + pow5bits(q as i32) as i32 - 1;
244        let i = -e2 + q as i32 + k;
245        vr = mul_pow5_inv_div_pow2(mv, q, i);
246        vp = mul_pow5_inv_div_pow2(mp, q, i);
247        vm = mul_pow5_inv_div_pow2(mm, q, i);
248        if q != 0 && (vp - 1) / 10 <= vm / 10 {
249            // We need to know one removed digit even if we are not going to loop below. We could use
250            // q = X - 1 above, except that would require 33 bits for the result, and we've found that
251            // 32-bit arithmetic is faster even on 64-bit machines.
252            let l = FLOAT_POW5_INV_BITCOUNT + pow5bits(q as i32 - 1) as i32 - 1;
253            last_removed_digit = (mul_pow5_inv_div_pow2(mv, q - 1, -e2 + q as i32 - 1 + l) %
254                                  10) as u8;
255        }
256        if q <= 9 {
257            // The largest power of 5 that fits in 24 bits is 5^10, but q <= 9 seems to be safe as well.
258            // Only one of mp, mv, and mm can be a multiple of 5, if any.
259            if mv % 5 == 0 {
260                vr_is_trailing_zeros = multiple_of_power_of_5(mv, q);
261            } else if accept_bounds {
262                vm_is_trailing_zeros = multiple_of_power_of_5(mm, q);
263            } else {
264                vp -= multiple_of_power_of_5(mp, q) as u32;
265            }
266        }
267    } else {
268        let q = log10_pow5(-e2) as u32;
269        e10 = q as i32 + e2;
270        let i = -e2 - q as i32;
271        let k = pow5bits(i) as i32 - FLOAT_POW5_BITCOUNT;
272        let mut j = q as i32 - k;
273        vr = mul_pow5_div_pow2(mv, i as u32, j);
274        vp = mul_pow5_div_pow2(mp, i as u32, j);
275        vm = mul_pow5_div_pow2(mm, i as u32, j);
276        if q != 0 && (vp - 1) / 10 <= vm / 10 {
277            j = q as i32 - 1 - (pow5bits(i + 1) as i32 - FLOAT_POW5_BITCOUNT);
278            last_removed_digit = (mul_pow5_div_pow2(mv, (i + 1) as u32, j) % 10) as u8;
279        }
280        if q <= 1 {
281            // {vr,vp,vm} is trailing zeros if {mv,mp,mm} has at least q trailing 0 bits.
282            // mv = 4 * m2, so it always has at least two trailing 0 bits.
283            vr_is_trailing_zeros = true;
284            if accept_bounds {
285                // mm = mv - 1 - mm_shift, so it has 1 trailing 0 bit iff mm_shift == 1.
286                vm_is_trailing_zeros = mm_shift == 1;
287            } else {
288                // mp = mv + 2, so it always has at least one trailing 0 bit.
289                vp -= 1;
290            }
291        } else if q < 31 {
292            // TODO(ulfjack): Use a tighter bound here.
293            vr_is_trailing_zeros = multiple_of_power_of_2(mv, q - 1);
294        }
295    }
296
297    // Step 4: Find the shortest decimal representation in the interval of legal representations.
298    let mut removed = 0u32;
299    let output = if vm_is_trailing_zeros || vr_is_trailing_zeros {
300        // General case, which happens rarely (~4.0%).
301        while vp / 10 > vm / 10 {
302            vm_is_trailing_zeros &= vm - (vm / 10) * 10 == 0;
303            vr_is_trailing_zeros &= last_removed_digit == 0;
304            last_removed_digit = (vr % 10) as u8;
305            vr /= 10;
306            vp /= 10;
307            vm /= 10;
308            removed += 1;
309        }
310        if vm_is_trailing_zeros {
311            while vm % 10 == 0 {
312                vr_is_trailing_zeros &= last_removed_digit == 0;
313                last_removed_digit = (vr % 10) as u8;
314                vr /= 10;
315                vp /= 10;
316                vm /= 10;
317                removed += 1;
318            }
319        }
320        if vr_is_trailing_zeros && last_removed_digit == 5 && vr % 2 == 0 {
321            // Round even if the exact number is .....50..0.
322            last_removed_digit = 4;
323        }
324        // We need to take vr + 1 if vr is outside bounds or we need to round up.
325        vr +
326        ((vr == vm && (!accept_bounds || !vm_is_trailing_zeros)) || last_removed_digit >= 5) as u32
327    } else {
328        // Specialized for the common case (~96.0%). Percentages below are relative to this.
329        // Loop iterations below (approximately):
330        // 0: 13.6%, 1: 70.7%, 2: 14.1%, 3: 1.39%, 4: 0.14%, 5+: 0.01%
331        while vp / 10 > vm / 10 {
332            last_removed_digit = (vr % 10) as u8;
333            vr /= 10;
334            vp /= 10;
335            vm /= 10;
336            removed += 1;
337        }
338        // We need to take vr + 1 if vr is outside bounds or we need to round up.
339        vr + (vr == vm || last_removed_digit >= 5) as u32
340    };
341    let exp = e10 + removed as i32;
342
343    FloatingDecimal32 {
344        exponent: exp,
345        mantissa: output,
346    }
347}
348
349#[cfg_attr(feature = "no-panic", inline)]
350unsafe fn to_chars(v: FloatingDecimal32, sign: bool, result: *mut u8) -> usize {
351    // Step 5: Print the decimal representation.
352    let mut index = 0isize;
353    if sign {
354        *result.offset(index) = b'-';
355        index += 1;
356    }
357
358    let mut output = v.mantissa;
359    let olength = decimal_length(output);
360
361    // Print the decimal digits.
362    // The following code is equivalent to:
363    // for (uint32_t i = 0; i < olength - 1; ++i) {
364    //   const uint32_t c = output % 10; output /= 10;
365    //   result[index + olength - i] = (char) ('0' + c);
366    // }
367    // result[index] = '0' + output % 10;
368    let mut i = 0isize;
369    while output >= 10000 {
370        let c = output - 10000 * (output / 10000);
371        output /= 10000;
372        let c0 = (c % 100) << 1;
373        let c1 = (c / 100) << 1;
374        ptr::copy_nonoverlapping(DIGIT_TABLE.get_unchecked(c0 as usize),
375                                 result.offset(index + olength as isize - i - 1),
376                                 2);
377        ptr::copy_nonoverlapping(DIGIT_TABLE.get_unchecked(c1 as usize),
378                                 result.offset(index + olength as isize - i - 3),
379                                 2);
380        i += 4;
381    }
382    if output >= 100 {
383        let c = (output % 100) << 1;
384        output /= 100;
385        ptr::copy_nonoverlapping(DIGIT_TABLE.get_unchecked(c as usize),
386                                 result.offset(index + olength as isize - i - 1),
387                                 2);
388        i += 2;
389    }
390    if output >= 10 {
391        let c = output << 1;
392        // We can't use memcpy here: the decimal dot goes between these two digits.
393        *result.offset(index + olength as isize - i) = *DIGIT_TABLE.get_unchecked(c as usize + 1);
394        *result.offset(index) = *DIGIT_TABLE.get_unchecked(c as usize);
395    } else {
396        *result.offset(index) = b'0' + output as u8;
397    }
398
399    // Print decimal point if needed.
400    if olength > 1 {
401        *result.offset(index + 1) = b'.';
402        index += olength as isize + 1;
403    } else {
404        index += 1;
405    }
406
407    // Print the exponent.
408    *result.offset(index) = b'E';
409    index += 1;
410    let mut exp = v.exponent + olength as i32 - 1;
411    if exp < 0 {
412        *result.offset(index) = b'-';
413        index += 1;
414        exp = -exp;
415    }
416
417    if exp >= 10 {
418        ptr::copy_nonoverlapping(DIGIT_TABLE.get_unchecked((2 * exp) as usize),
419                                 result.offset(index),
420                                 2);
421        index += 2;
422    } else {
423        *result.offset(index) = b'0' + exp as u8;
424        index += 1;
425    }
426
427    debug_assert!(index <= 15);
428    index as usize
429}
430
431/// Print f32 to the given buffer and return number of bytes written.
432///
433/// At most 15 bytes will be written.
434///
435/// ## Special cases
436///
437/// This function represents any NaN as `NaN`, positive infinity as `Infinity`,
438/// and negative infinity as `-Infinity`.
439///
440/// ## Safety
441///
442/// The `result` pointer argument must point to sufficiently many writable bytes
443/// to hold Ryū's representation of `f`.
444///
445/// ## Example
446///
447/// ```rust
448/// let f = 1.234f32;
449///
450/// unsafe {
451///     let mut buffer: [u8; 15] = std::mem::uninitialized();
452///     let n = ryu_ecmascript::raw::f2s_buffered_n(f, &mut buffer[0]);
453///     let s = std::str::from_utf8_unchecked(&buffer[..n]);
454///     assert_eq!(s, "1.234E0");
455/// }
456/// ```
457#[cfg_attr(must_use_return, must_use)]
458#[cfg_attr(feature = "no-panic", no_panic)]
459pub unsafe fn f2s_buffered_n(f: f32, result: *mut u8) -> usize {
460    // Step 1: Decode the floating-point number, and unify normalized and subnormal cases.
461    let bits = mem::transmute::<f32, u32>(f);
462
463    // Decode bits into sign, mantissa, and exponent.
464    let ieee_sign = ((bits >> (FLOAT_MANTISSA_BITS + FLOAT_EXPONENT_BITS)) & 1) != 0;
465    let ieee_mantissa = bits & ((1u32 << FLOAT_MANTISSA_BITS) - 1);
466    let ieee_exponent = ((bits >> FLOAT_MANTISSA_BITS) & ((1u32 << FLOAT_EXPONENT_BITS) - 1)) as
467                        u32;
468
469    // Case distinction; exit early for the easy cases.
470    if ieee_exponent == ((1u32 << FLOAT_EXPONENT_BITS) - 1) ||
471       (ieee_exponent == 0 && ieee_mantissa == 0) {
472        return copy_special_str(result, ieee_sign, ieee_exponent != 0, ieee_mantissa != 0);
473    }
474
475    let v = f2d(ieee_mantissa, ieee_exponent);
476    to_chars(v, ieee_sign, result)
477}
ryu_ecmascript/f2s.rs

ryu_ecmascript/
f2s.rs