1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
/* -------------------------------------------------------------------------------------------------- */
/* Port of the Intel Decimal Floating-Point Math Library decimal128 type to Rust. */
/* decmathlib-rs - Copyright (C) 2023-2024 Carlos Guzmán Álvarez */
/* -------------------------------------------------------------------------------------------------- */
/* Licensed under the MIT license. See LICENSE file in the project root for full license information. */
/* -------------------------------------------------------------------------------------------------- */
/* Intel® Decimal Floating-Point Math Library - Copyright (c) 2018, Intel Corp. */
/* -------------------------------------------------------------------------------------------------- */
use crate::bid128::BID_NR_DIGITS;
use crate::bid_internal::{BID_UI64DOUBLE, BID_UINT128, BID_UINT64, MASK_COEFF, MASK_EXP, MASK_EXP2, MASK_SNAN, MASK_SPECIAL};
/// Decomposes given decimal floating point value num into a normalized fraction and an integral power of two.
pub (crate) fn bid128_frexp(x: &BID_UINT128) -> (BID_UINT128, i32) {
/*
If x is not a floating-point number, the results are unspecified (this
implementation returns x and *exp = 0). Otherwise, the frexp function
returns the value res, such that res has a magnitude in the interval
[1/10, 1) or zero, and x = res*2^*exp. If x is zero, both parts of the
result are zero frexp does not raise any exceptions
*/
let mut res: BID_UINT128 = Default::default();
let mut sig_x: BID_UINT128 = Default::default();
let exp_x: u32;
let mut tmp: BID_UI64DOUBLE = Default::default();
let x_nr_bits: usize;
let mut q: i32;
let exp: i32;
if (x.w[1] & MASK_SPECIAL) == MASK_SPECIAL {
// if NaN or infinity
exp = 0;
res = *x;
// the binary frexp quitetizes SNaNs, so do the same
if (x.w[1] & MASK_SNAN) == MASK_SNAN { // x is SNAN
// // set invalid flag
// *pfpsf |= BID_INVALID_EXCEPTION;
// return quiet (x)
res.w[1] = x.w[1] & 0xfdffffffffffffffu64;
}
(res, exp)
} else {
// x is 0, non-canonical, normal, or subnormal
// check for non-canonical values with 114 bit-significands; can be zero too
if (x.w[1] & 0x6000000000000000u64) == 0x6000000000000000u64 {
exp = 0;
exp_x = ((x.w[1] & MASK_EXP2) >> 47) as u32; // biased
res.w[1] = (x.w[1] & 0x8000000000000000u64) | ((exp_x as BID_UINT64) << 49);
// zero of same sign
res.w[0] = 0x0000000000000000u64;
return (res, exp);
}
// unpack x
exp_x = ((x.w[1] & MASK_EXP) >> 49) as u32; // biased
sig_x.w[1] = x.w[1] & MASK_COEFF;
sig_x.w[0] = x.w[0];
// check for non-canonical values or zero
if (sig_x.w[1] > 0x0001ed09bead87c0u64)
|| (sig_x.w[1] == 0x0001ed09bead87c0u64
&& (sig_x.w[0] > 0x378d8e63ffffffffu64))
|| ((sig_x.w[1] == 0x0u64) && (sig_x.w[0] == 0x0u64)) {
exp = 0;
res.w[1] = (x.w[1] & 0x8000000000000000u64) | ((exp_x as BID_UINT64) << 49);
// zero of same sign
res.w[0] = 0x0000000000000000u64;
return (res, exp);
} else {
// continue, x is neither zero nor non-canonical
}
// x is normal or subnormal, with exp_x=biased exponent & sig_x=coefficient
// determine the number of decimal digits in sig_x, which fits in 113 bits
// q = nr. of decimal digits in sig_x (1 <= q <= 34)
// determine first the nr. of bits in sig_x
unsafe {
if sig_x.w[1] == 0 {
if sig_x.w[0] >= 0x0020000000000000u64 { // z >= 2^53
// split the 64-bit value in two 32-bit halves to avoid rounding errors
if sig_x.w[0] >= 0x0000000100000000u64 { // z >= 2^32
tmp.d = (sig_x.w[0] >> 32) as f64; // exact conversion
x_nr_bits = (32 + ((((tmp.ui64 >> 52) as u32) & 0x7ff) - 0x3ff)) as usize;
} else { // z < 2^32
tmp.d = sig_x.w[0] as f64; // exact conversion
x_nr_bits = ((((tmp.ui64 >> 52) as u32) & 0x7ff) - 0x3ff) as usize;
}
} else { // if z < 2^53
tmp.d = sig_x.w[0] as f64; // exact conversion
x_nr_bits = ((((tmp.ui64 >> 52) as u32) & 0x7ff) - 0x3ff) as usize;
}
} else { // sig_x.w[1] != 0 => nr. bits = 65 + nr_bits (sig_x.w[1])
tmp.d = sig_x.w[1] as f64; // exact conversion
x_nr_bits = (64 + ((((tmp.ui64 >> 52) as u32) & 0x7ff) - 0x3ff)) as usize;
}
}
q = BID_NR_DIGITS[x_nr_bits].digits as i32;
if q == 0 {
q = BID_NR_DIGITS[x_nr_bits].digits1 as i32;
if sig_x.w[1] > BID_NR_DIGITS[x_nr_bits].threshold_hi
|| (sig_x.w[1] == BID_NR_DIGITS[x_nr_bits].threshold_hi
&& sig_x.w[0] >= BID_NR_DIGITS[x_nr_bits].threshold_lo) {
q += 1;
}
}
// Do not add trailing zeros if q < 34; leave sig_x with q digits
exp = (exp_x - 6176 + (q as u32)) as i32;
// assemble the result; sig_x < 2^113 so it fits in 113 bits
res.w[1] = (x.w[1] & 0x8001ffffffffffffu64) | ((((-q as i64) + 6176i64) << 49) as BID_UINT64);
res.w[0] = x.w[0];
// replace exponent
(res, exp)
}
}