use super::*;
use crate::underlying::const_as;
fn decode_finite_f64<
const N: u32,
const ES: u32,
Int: crate::Int,
const RS: u32,
>(num: f64) -> (Decoded<N, ES, RS, Int>, Int) { debug_assert!(num.is_finite());
const MANTISSA_BITS: u32 = f64::MANTISSA_DIGITS - 1;
const EXP_BIAS: i64 = f64::MIN_EXP as i64 - 1;
const HIDDEN_BIT: i64 = (i64::MIN as u64 >> 1) as i64;
use crate::underlying::Sealed;
let sign = num.is_sign_positive();
let bits = num.abs().to_bits() as i64;
let mantissa = bits.mask_lsb(MANTISSA_BITS);
let mut exponent = bits >> MANTISSA_BITS;
let is_normal = exponent != 0;
exponent -= i64::from(is_normal);
let frac: i64 = {
const SHIFT_LEFT: u32 = 64 - MANTISSA_BITS - 2;
let unsigned_frac = (mantissa << SHIFT_LEFT) | HIDDEN_BIT;
if sign {
unsigned_frac
} else if mantissa != 0 {
-unsigned_frac
} else {
exponent -= 1;
i64::MIN
}
};
let (mut frac, sticky): (Int, Int) = {
let shift_left = Int::BITS as i64 - 64;
if shift_left >= 0 {
let shift_left = shift_left as u32;
let frac = const_as::<i64, Int>(frac) << shift_left;
(frac, Int::ZERO)
} else {
let shift_right = -shift_left as u32;
let sticky = Int::from(frac.mask_lsb(shift_right) != 0);
let frac = const_as::<i64, Int>(frac.lshr(shift_right));
(frac, sticky)
}
};
if !is_normal {
if frac == Int::ZERO {
return (Decoded { frac: Int::ONE, exp: Int::MIN >> 1 }, Int::ZERO)
}
let underflow = unsafe { frac.leading_run_minus_one() };
frac = frac << underflow;
exponent = exponent.wrapping_sub(underflow as i64);
}
let exponent = exponent.wrapping_add(EXP_BIAS);
let exp =
if const { Int::BITS < 64 } && exponent > const_as::<Int, i64>(Int::MAX >> 1) {
Int::MAX >> 1
} else if const { Int::BITS < 64 } && exponent < const_as::<Int, i64>(Int::MIN >> 1) {
Int::MIN >> 1
} else {
const_as::<_, Int>(exponent)
};
(Decoded { exp, frac }, sticky)
}
fn decode_finite_f32<
const N: u32,
const ES: u32,
Int: crate::Int,
const RS: u32,
>(num: f32) -> (Decoded<N, ES, RS, Int>, Int) {
debug_assert!(num.is_finite());
decode_finite_f64(num.into())
}
impl<
const N: u32,
const ES: u32,
Int: crate::Int,
const RS: u32,
> RoundFrom<f32> for Posit<N, ES, Int, RS> {
fn round_from(value: f32) -> Self {
use core::num::FpCategory;
match value.classify() {
FpCategory::Nan | FpCategory::Infinite => Self::NAR,
FpCategory::Zero => Self::ZERO,
FpCategory::Normal | FpCategory::Subnormal => {
let (decoded, sticky) = decode_finite_f32(value);
unsafe { decoded.encode_regular_round(sticky) }
}
}
}
}
impl<
const N: u32,
const ES: u32,
Int: crate::Int,
const RS: u32,
> RoundFrom<f64> for Posit<N, ES, Int, RS> {
fn round_from(value: f64) -> Self {
use core::num::FpCategory;
match value.classify() {
FpCategory::Nan | FpCategory::Infinite => Self::NAR,
FpCategory::Zero => Self::ZERO,
FpCategory::Normal | FpCategory::Subnormal => {
let (decoded, sticky) = decode_finite_f64(value);
unsafe { decoded.encode_regular_round(sticky) }
}
}
}
}
fn encode_finite_f64<
const N: u32,
const ES: u32,
Int: crate::Int,
const RS: u32,
>(decoded: Decoded<N, ES, RS, Int>) -> f64 {
const MANTISSA_BITS: u32 = f64::MANTISSA_DIGITS - 1;
const EXPONENT_BITS: u32 = 64 - MANTISSA_BITS - 1;
let sign = decoded.frac.is_positive();
let (frac_abs, exp) =
if decoded.frac != Int::MIN {
(decoded.frac.wrapping_abs().mask_lsb(Decoded::<N, ES, RS, Int>::FRAC_WIDTH), decoded.exp)
} else {
(Int::ZERO, decoded.exp + Int::ONE)
};
let max_exponent: i64 = (1 << (EXPONENT_BITS - 1)) - 1;
let exponent =
if Int::BITS < EXPONENT_BITS || Posit::<N, ES, Int>::MAX_EXP < const_as(max_exponent) {
const_as::<Int, i64>(exp)
}
else {
if exp > const_as(max_exponent) {
return if sign {f64::INFINITY} else {f64::NEG_INFINITY}
}
else if exp <= const_as(-max_exponent) {
todo!("Subnormal numbers are _not_ currently supported when converting to/from IEEE floats")
}
else {
const_as::<Int, i64>(exp)
}
};
let shift_left = MANTISSA_BITS.saturating_sub(Decoded::<N, ES, RS, Int>::FRAC_WIDTH);
let shift_right = Decoded::<N, ES, RS, Int>::FRAC_WIDTH.saturating_sub(MANTISSA_BITS);
let mantissa = const_as::<Int, i64>(frac_abs >> shift_right) << shift_left;
let lost_bits = if shift_right == 0 {Int::ZERO} else {frac_abs << (Int::BITS - shift_right)};
let round = lost_bits < Int::ZERO;
let sticky = lost_bits << 1 != Int::ZERO;
let odd = mantissa & 1 == 1;
let round_up = round & (odd | sticky);
let mantissa = mantissa + i64::from(round_up);
let exponent = if round_up & (mantissa == 0) {exponent + 1} else {exponent};
let bits =
(u64::from(!sign) << (u64::BITS - 1))
| (((exponent + max_exponent) as u64) << MANTISSA_BITS)
| (mantissa as u64);
f64::from_bits(bits)
}
fn encode_finite_f32<
const N: u32,
const ES: u32,
Int: crate::Int,
const RS: u32,
>(decoded: Decoded<N, ES, RS, Int>) -> f32 {
encode_finite_f64(decoded) as f32
}
impl<
const N: u32,
const ES: u32,
Int: crate::Int,
const RS: u32,
> RoundFrom<Posit<N, ES, Int, RS>> for f32 {
fn round_from(value: Posit<N, ES, Int, RS>) -> Self {
if value == Posit::ZERO {
0.
} else if value == Posit::NAR {
f32::NAN
} else {
let decoded = unsafe { value.decode_regular() };
encode_finite_f32(decoded)
}
}
}
impl<
const N: u32,
const ES: u32,
Int: crate::Int,
const RS: u32,
> RoundFrom<Posit<N, ES, Int, RS>> for f64 {
fn round_from(value: Posit<N, ES, Int, RS>) -> Self {
if value == Posit::ZERO {
0.
} else if value == Posit::NAR {
f64::NAN
} else {
let decoded = unsafe { value.decode_regular() };
encode_finite_f64(decoded)
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use malachite::rational::Rational;
use proptest::prelude::*;
mod float_to_posit {
use super::*;
macro_rules! make_tests {
($float:ty, $posit:ty) => {
use super::*;
#[test]
fn zero() {
assert_eq!(<$posit>::round_from(0.0 as $float), <$posit>::ZERO)
}
#[test]
fn one() {
assert_eq!(<$posit>::round_from(1.0 as $float), <$posit>::ONE)
}
#[test]
fn minus_one() {
assert_eq!(<$posit>::round_from(-1.0 as $float), <$posit>::MINUS_ONE)
}
#[test]
fn nan() {
assert_eq!(<$posit>::round_from(<$float>::NAN), <$posit>::NAR)
}
#[test]
fn min() {
if const { <$posit>::MAX_EXP as i64 <= 127 } {
assert_eq!(<$posit>::round_from(<$float>::MIN), <$posit>::MIN)
}
}
#[test]
fn max() {
if const { <$posit>::MAX_EXP as i64 <= 127 } {
assert_eq!(<$posit>::round_from(<$float>::MAX), <$posit>::MAX)
}
}
#[test]
fn min_positive() {
if const { <$posit>::MAX_EXP as i64 <= 127 } {
assert_eq!(<$posit>::round_from(<$float>::MIN_POSITIVE), <$posit>::MIN_POSITIVE)
}
}
#[test]
fn max_negative() {
if const { <$posit>::MAX_EXP as i64 <= 127 } {
assert_eq!(<$posit>::round_from(-<$float>::MIN_POSITIVE), <$posit>::MAX_NEGATIVE)
}
}
#[test]
fn subnormal_positive() {
if const { <$posit>::MAX_EXP as i64 <= 127 } {
assert_eq!(<$posit>::round_from(<$float>::from_bits(1)), <$posit>::MIN_POSITIVE)
}
}
#[test]
fn subnormal_negative() {
if const { <$posit>::MAX_EXP as i64 <= 127 } {
assert_eq!(<$posit>::round_from(-<$float>::from_bits(1)), <$posit>::MAX_NEGATIVE)
}
}
proptest!{
#![proptest_config(ProptestConfig::with_cases(crate::PROPTEST_CASES))]
#[test]
fn proptest(float: $float) {
let posit = <$posit>::round_from(float);
match Rational::try_from(float) {
Ok(exact) => assert!(super::rational::is_correct_rounded(exact, posit)),
Err(_) => assert!(posit == <$posit>::NAR),
}
}
}
};
}
mod f64 {
use super::*;
mod p8 { make_tests!{f64, crate::p8} }
mod p16 { make_tests!{f64, crate::p16} }
mod p32 { make_tests!{f64, crate::p32} }
mod p64 { make_tests!{f64, crate::p64} }
mod posit_8_0 { make_tests!{f64, Posit::<8, 0, i8>} }
mod posit_10_0 { make_tests!{f64, Posit::<10, 0, i16>} }
mod posit_10_1 { make_tests!{f64, Posit::<10, 1, i16>} }
mod posit_10_2 { make_tests!{f64, Posit::<10, 2, i16>} }
mod posit_10_3 { make_tests!{f64, Posit::<10, 3, i16>} }
mod posit_20_4 { make_tests!{f64, Posit::<20, 4, i32>} }
mod posit_3_0 { make_tests!{f64, Posit::<3, 0, i8>} }
mod posit_4_0 { make_tests!{f64, Posit::<4, 0, i8>} }
mod posit_4_1 { make_tests!{f64, Posit::<4, 1, i8>} }
mod bposit_8_3_6 { make_tests!{f64, Posit::<8, 3, i8, 6>} }
mod bposit_16_5_6 { make_tests!{f64, Posit::<16, 5, i16, 6>} }
mod bposit_32_5_6 { make_tests!{f64, Posit::<32, 5, i32, 6>} }
mod bposit_64_5_6 { make_tests!{f64, Posit::<64, 5, i64, 6>} }
mod bposit_10_2_6 { make_tests!{f64, Posit::<10, 2, i16, 6>} }
mod bposit_10_2_7 { make_tests!{f64, Posit::<10, 2, i16, 7>} }
mod bposit_10_2_8 { make_tests!{f64, Posit::<10, 2, i16, 8>} }
mod bposit_10_2_9 { make_tests!{f64, Posit::<10, 2, i16, 9>} }
}
mod f32 {
use super::*;
mod p8 { make_tests!{f32, crate::p8} }
mod p16 { make_tests!{f32, crate::p16} }
mod p32 { make_tests!{f32, crate::p32} }
mod p64 { make_tests!{f32, crate::p64} }
mod posit_8_0 { make_tests!{f32, Posit::<8, 0, i8>} }
mod posit_10_0 { make_tests!{f32, Posit::<10, 0, i16>} }
mod posit_10_1 { make_tests!{f32, Posit::<10, 1, i16>} }
mod posit_10_2 { make_tests!{f32, Posit::<10, 2, i16>} }
mod posit_10_3 { make_tests!{f32, Posit::<10, 3, i16>} }
mod posit_20_4 { make_tests!{f32, Posit::<20, 4, i32>} }
mod posit_3_0 { make_tests!{f32, Posit::<3, 0, i8>} }
mod posit_4_0 { make_tests!{f32, Posit::<4, 0, i8>} }
mod posit_4_1 { make_tests!{f32, Posit::<4, 1, i8>} }
mod bposit_8_3_6 { make_tests!{f32, Posit::<8, 3, i8, 6>} }
mod bposit_16_5_6 { make_tests!{f32, Posit::<16, 5, i16, 6>} }
mod bposit_32_5_6 { make_tests!{f32, Posit::<32, 5, i32, 6>} }
mod bposit_64_5_6 { make_tests!{f32, Posit::<64, 5, i64, 6>} }
mod bposit_10_2_6 { make_tests!{f32, Posit::<10, 2, i16, 6>} }
mod bposit_10_2_7 { make_tests!{f32, Posit::<10, 2, i16, 7>} }
mod bposit_10_2_8 { make_tests!{f32, Posit::<10, 2, i16, 8>} }
mod bposit_10_2_9 { make_tests!{f32, Posit::<10, 2, i16, 9>} }
}
}
mod posit_to_float {
use super::*;
macro_rules! test_exhaustive {
($float:ty, $posit:ty) => {
use super::*;
#[test]
fn posit_roundtrip_exhaustive() {
for posit in <$posit>::cases_exhaustive_all() {
let float = <$float>::round_from(posit);
let reposit = <$posit>::round_from(float);
assert_eq!(posit, reposit)
}
}
};
}
macro_rules! test_proptest {
($float:ty, $posit:ty) => {
use super::*;
proptest!{
#![proptest_config(ProptestConfig::with_cases(crate::PROPTEST_CASES))]
#[test]
fn posit_roundtrip_proptest(posit in <$posit>::cases_proptest_all()) {
let float = <$float>::round_from(posit);
let reposit = <$posit>::round_from(float);
assert_eq!(posit, reposit)
}
}
};
}
mod f64 {
use super::*;
mod p8 { test_exhaustive!{f64, crate::p8} }
mod p16 { test_exhaustive!{f64, crate::p16} }
mod p32 { test_proptest!{f64, crate::p32} }
mod posit_8_0 { test_exhaustive!{f64, Posit::<8, 0, i8>} }
mod posit_10_0 { test_exhaustive!{f64, Posit::<10, 0, i16>} }
mod posit_10_1 { test_exhaustive!{f64, Posit::<10, 1, i16>} }
mod posit_10_2 { test_exhaustive!{f64, Posit::<10, 2, i16>} }
mod posit_10_3 { test_exhaustive!{f64, Posit::<10, 3, i16>} }
mod posit_20_4 { test_proptest!{f64, Posit::<20, 4, i32>} }
mod posit_3_0 { test_exhaustive!{f64, Posit::<3, 0, i8>} }
mod posit_4_0 { test_exhaustive!{f64, Posit::<4, 0, i8>} }
mod posit_4_1 { test_exhaustive!{f64, Posit::<4, 1, i8>} }
mod bposit_8_3_6 { test_exhaustive!{f64, Posit::<8, 3, i8, 6>} }
mod bposit_16_5_6 { test_exhaustive!{f64, Posit::<16, 5, i16, 6>} }
mod bposit_32_5_6 { test_proptest!{f64, Posit::<32, 5, i32, 6>} }
mod bposit_10_2_6 { test_exhaustive!{f64, Posit::<10, 2, i16, 6>} }
mod bposit_10_2_7 { test_exhaustive!{f64, Posit::<10, 2, i16, 7>} }
mod bposit_10_2_8 { test_exhaustive!{f64, Posit::<10, 2, i16, 8>} }
mod bposit_10_2_9 { test_exhaustive!{f64, Posit::<10, 2, i16, 9>} }
}
mod f32 {
use super::*;
mod p8 { test_exhaustive!{f32, crate::p8} }
mod p16 { test_exhaustive!{f32, crate::p16} }
mod posit_8_0 { test_exhaustive!{f32, Posit::<8, 0, i8>} }
mod posit_10_0 { test_exhaustive!{f32, Posit::<10, 0, i16>} }
mod posit_10_1 { test_exhaustive!{f32, Posit::<10, 1, i16>} }
mod posit_10_2 { test_exhaustive!{f32, Posit::<10, 2, i16>} }
mod posit_10_3 { test_exhaustive!{f32, Posit::<10, 3, i16>} }
mod posit_3_0 { test_exhaustive!{f32, Posit::<3, 0, i8>} }
mod posit_4_0 { test_exhaustive!{f32, Posit::<4, 0, i8>} }
mod posit_4_1 { test_exhaustive!{f32, Posit::<4, 1, i8>} }
mod bposit_8_3_6 { test_exhaustive!{f32, Posit::<8, 3, i8, 6>} }
mod bposit_10_2_6 { test_exhaustive!{f32, Posit::<10, 2, i16, 6>} }
mod bposit_10_2_7 { test_exhaustive!{f32, Posit::<10, 2, i16, 7>} }
mod bposit_10_2_8 { test_exhaustive!{f32, Posit::<10, 2, i16, 8>} }
mod bposit_10_2_9 { test_exhaustive!{f32, Posit::<10, 2, i16, 9>} }
}
}
}