mod fixed_point;
pub use fixed_point::FixedPoint;
pub use fixed_point::{to_Fx, to_Q};
pub use fixed_point::{Fx, Q};
pub type UInt = u128;
const SIZE: u64 = 64;
const MANT_SIZE: u64 = 52;
const ES: u64 = 11;
const EXP_BIAS: u64 = (1 << (ES - 1)) - 1;
fn mask(size: u32) -> u128 {
u128::MAX >> (128 - size)
}
fn sign(bits: u64) -> u64 {
bits >> (SIZE - 1)
}
fn exp(bits: u64) -> u64 {
((bits & ((1 << (SIZE - 1)) - 1)) >> MANT_SIZE) & ((1 << MANT_SIZE) - 1)
}
fn mant(bits: u64) -> u64 {
bits & ((1 << MANT_SIZE) - 1)
}
pub fn to_fixed(x: f64, m: i32, n: i32, round: bool) -> Result<Q, String> {
let f64_bits = x.to_bits();
let sign = sign(f64_bits);
if x.abs() == 0.0 {
return Ok(Q {
val: (sign << (m + n)) as UInt,
m,
n,
is_exact: true,
});
}
let exp = exp(f64_bits) as i32 - EXP_BIAS as i32;
let mant_plus_one = (1 << MANT_SIZE) | mant(f64_bits);
let bits = mant_plus_one;
let fractional_part = bits as UInt & mask((MANT_SIZE as i32 - exp) as u32) as UInt;
let integer_part = bits
.checked_shr((MANT_SIZE as i32 - exp) as u32)
.unwrap_or(0);
let integer_part_on_m_bits = integer_part as UInt & mask(m as u32) as UInt;
let mut fractional_part_on_n_bits = match (MANT_SIZE as i32 - exp - n) >= 0 {
true => (fractional_part >> (MANT_SIZE as i32 - exp - n) as u32) & (mask(n as u32) as UInt),
_ => (fractional_part << (-(MANT_SIZE as i32 - exp - n))) & (mask(n as u32) as UInt),
};
if integer_part_on_m_bits < integer_part as UInt {
return Err(format!(
"Error: Integer field does not fit into `m` = {} bits.",
m
));
}
let _len = (MANT_SIZE as i32 - exp) - (n + 1);
let round_bit = match _len >= 0 {
true => fractional_part >> (_len) & 1 != 0,
_ => fractional_part.checked_shl(-_len as u32).unwrap_or(0) != 0,
};
if round && round_bit {
fractional_part_on_n_bits += 1;
}
let sticky_bit = match (MANT_SIZE as i32 - exp - n) >= 0 {
true => fractional_part & mask((MANT_SIZE as i32 - exp - n) as u32) as UInt != 0,
_ => false,
};
let is_exact = !sticky_bit && !round_bit;
let ans_signless = ((integer_part_on_m_bits) << n) + fractional_part_on_n_bits;
let ans = match sign == 0 {
true => ans_signless,
false => (!ans_signless + 1 as UInt) & mask((m + n + 1) as u32),
};
Ok(Q {
val: ans,
m,
n,
is_exact,
})
}
#[deprecated(since = "0.4.0")]
pub fn to_float_str(bits: &str, m: i32, b: i32) -> Result<f64, String> {
let n = b - m;
let bits_size = bits.len() as i32;
if bits_size != m + n {
return Err(format!(
"`bits` size does not match the `m` + `n` size you specified. {} != {}",
bits_size,
m + n
));
}
let mut ans = 0.0;
for i in (1..=n).rev() {
let bit = bits
.chars()
.nth(((m - 1 + i) as u16).into())
.unwrap()
.to_digit(2)
.unwrap(); ans += bit as f64 / (1 << i) as f64;
}
for i in 0..m {
let bit = bits
.chars()
.nth(((m - 1 - i) as u16).into())
.unwrap()
.to_digit(2)
.unwrap();
ans += bit as f64 * (1 << i) as f64;
}
Ok(ans)
}
pub fn to_float(bits: UInt, size: i32, m: i32, n: i32) -> Result<f64, String> {
if size != m + n + 1 {
return Err(format!(
"`bits` size does not match the (`m` + `n` + 1) size you specified. {} != {}",
size,
m + n + 1
));
}
let sign = (bits >> (m + n)) as u32;
if sign == 1 && (bits & mask(size as u32 - 1) == 0) {
return Ok(-((1 << m) as f64));
}
let mut bits = match sign == 0 {
true => bits,
false => (!bits + 1) & mask(size as u32),
};
let mut ans = 0.0;
for i in (1..=n).rev() {
ans += match 2_i128.checked_pow(i as u32) {
None => 0.0,
Some(v) => (bits & 1) as f64 / v as f64,
};
bits >>= 1;
}
for i in 0..m {
ans += (bits & 1) as f64 * 2_i128.pow(i as u32) as f64; bits >>= 1;
}
let ans = match sign == 0 {
true => ans,
false => -ans,
};
Ok(ans)
}
#[cfg(test)]
mod tests {
use super::{to_fixed, to_float, to_float_str};
#[test]
fn test_to_float() {
assert_eq!(to_float(0b01010000010110000, 17, 1, 15), Ok(1.25537109375));
assert_eq!(to_float(0b01010000010110000, 17, 1, 14).is_err(), true);
assert_eq!(to_float(0b01010000010110000, 17, 1, 15).is_err(), false);
assert_eq!(to_float(0b01010000010110000, 17, 1, 16).is_err(), true);
assert_eq!(to_float(0b0_000, 4, 3, 0), Ok(0.0));
assert_eq!(to_float(0b0_001, 4, 3, 0), Ok(1.0));
assert_eq!(to_float(0b0_010, 4, 3, 0), Ok(2.0));
assert_eq!(to_float(0b0_011, 4, 3, 0), Ok(3.0));
assert_eq!(to_float(0b0_100, 4, 3, 0), Ok(4.0));
assert_eq!(to_float(0b0_101, 4, 3, 0), Ok(5.0));
assert_eq!(to_float(0b0_110, 4, 3, 0), Ok(6.0));
assert_eq!(to_float(0b0_111, 4, 3, 0), Ok(7.0));
assert_eq!(to_float(0b1_000, 4, 3, 0), Ok(-8.0));
assert_eq!(to_float(0b1_001, 4, 3, 0), Ok(-7.0));
assert_eq!(to_float(0b1_010, 4, 3, 0), Ok(-6.0));
assert_eq!(to_float(0b0_1111_111, 8, 4, 3), Ok(15.875));
assert_eq!(to_float(0b1_0000_000, 8, 4, 3), Ok(-16.0));
assert_eq!(to_float(0b1_0000_001, 8, 4, 3), Ok(-15.875));
}
#[test]
fn test_to_fixed() {
use super::fixed_point::Q;
assert_eq!(to_fixed(4.0, 4, 2, false).unwrap().val, 0b0_0100_00);
assert_eq!(to_fixed(-4.0, 4, 2, false).unwrap().val, 0b1_1100_00);
assert_eq!(to_fixed(8.75, 4, 2, false).unwrap().val, 0b0_1000_11);
assert_eq!(to_fixed(9.5, 4, 2, false).unwrap().val, 0b0_1001_10);
assert_eq!(to_fixed(15.75, 4, 2, false).unwrap().val, 0b0_1111_11);
assert_eq!(to_fixed(15.8, 4, 2, false).unwrap().val, 0b0_1111_11);
assert_eq!(to_fixed(16.0, 4, 2, false).is_err(), true);
assert_eq!(to_fixed(0.0, 2, 1, false).unwrap().val, 0b0_00_0);
assert_eq!(to_fixed(-0.0, 2, 1, false).unwrap().val, 0b1_00_0);
}
#[test]
fn back_and_forth() {
let x = 1.25;
let (m, n) = (1, 3);
assert_eq!(
to_float(to_fixed(x, m, n, false).unwrap().val, 5, m, n).unwrap(),
x
);
}
}