1use super::*;
2
3use crate::underlying::const_as;
4
5fn decode_finite_f64<
8 const N: u32,
9 const ES: u32,
10 Int: crate::Int,
11 const RS: u32,
12>(num: f64) -> (Decoded<N, ES, RS, Int>, Int) { debug_assert!(num.is_finite());
14 const MANTISSA_BITS: u32 = f64::MANTISSA_DIGITS - 1;
15 const EXP_BIAS: i64 = f64::MIN_EXP as i64 - 1;
16 const HIDDEN_BIT: i64 = (i64::MIN as u64 >> 1) as i64;
17
18 use crate::underlying::Sealed;
20 let sign = num.is_sign_positive();
21 let bits = num.abs().to_bits() as i64;
22 let mantissa = bits.mask_lsb(MANTISSA_BITS);
23 let mut exponent = bits >> MANTISSA_BITS;
24
25 let is_normal = exponent != 0;
28 exponent -= i64::from(is_normal);
29
30 let frac: i64 = {
36 const SHIFT_LEFT: u32 = 64 - MANTISSA_BITS - 2;
37 let unsigned_frac = (mantissa << SHIFT_LEFT) | HIDDEN_BIT;
38 if sign {
39 unsigned_frac
40 } else if mantissa != 0 {
41 -unsigned_frac
42 } else {
43 exponent -= 1;
44 i64::MIN
45 }
46 };
47 let (mut frac, sticky): (Int, Int) = {
51 let shift_left = Int::BITS as i64 - 64;
52 if shift_left >= 0 {
53 let shift_left = shift_left as u32;
55 let frac = const_as::<i64, Int>(frac) << shift_left;
56 (frac, Int::ZERO)
57 } else {
58 let shift_right = -shift_left as u32;
60 let sticky = Int::from(frac.mask_lsb(shift_right) != 0);
61 let frac = const_as::<i64, Int>(frac.lshr(shift_right));
62 (frac, sticky)
63 }
64 };
65
66 if !is_normal {
80 if frac == Int::ZERO {
81 return (Decoded { frac: Int::ONE, exp: Int::MIN >> 1 }, Int::ZERO)
82 }
83 let underflow = unsafe { frac.leading_run_minus_one() };
85 frac = frac << underflow;
86 exponent = exponent.wrapping_sub(underflow as i64);
87 }
88
89 let exponent = exponent.wrapping_add(EXP_BIAS);
94 let exp =
95 if const { Int::BITS < 64 } && exponent > const_as::<Int, i64>(Int::MAX >> 1) {
96 Int::MAX >> 1
97 } else if const { Int::BITS < 64 } && exponent < const_as::<Int, i64>(Int::MIN >> 1) {
98 Int::MIN >> 1
99 } else {
100 const_as::<_, Int>(exponent)
101 };
102
103 (Decoded { exp, frac }, sticky)
104}
105
106fn decode_finite_f32<
109 const N: u32,
110 const ES: u32,
111 Int: crate::Int,
112 const RS: u32,
113>(num: f32) -> (Decoded<N, ES, RS, Int>, Int) {
114 debug_assert!(num.is_finite());
115 decode_finite_f64(num.into())
118}
119
120impl<
121 const N: u32,
122 const ES: u32,
123 Int: crate::Int,
124 const RS: u32,
125> RoundFrom<f32> for Posit<N, ES, Int, RS> {
126 fn round_from(value: f32) -> Self {
133 use core::num::FpCategory;
134 match value.classify() {
135 FpCategory::Nan | FpCategory::Infinite => Self::NAR,
136 FpCategory::Zero => Self::ZERO,
137 FpCategory::Normal | FpCategory::Subnormal => {
138 let (decoded, sticky) = decode_finite_f32(value);
139 unsafe { decoded.encode_regular_round(sticky) }
140 }
141 }
142 }
143}
144
145impl<
146 const N: u32,
147 const ES: u32,
148 Int: crate::Int,
149 const RS: u32,
150> RoundFrom<f64> for Posit<N, ES, Int, RS> {
151 fn round_from(value: f64) -> Self {
158 use core::num::FpCategory;
159 match value.classify() {
160 FpCategory::Nan | FpCategory::Infinite => Self::NAR,
161 FpCategory::Zero => Self::ZERO,
162 FpCategory::Normal | FpCategory::Subnormal => {
163 let (decoded, sticky) = decode_finite_f64(value);
164 unsafe { decoded.encode_regular_round(sticky) }
165 }
166 }
167 }
168}
169
170fn encode_finite_f64<
172 const N: u32,
173 const ES: u32,
174 Int: crate::Int,
175 const RS: u32,
176>(decoded: Decoded<N, ES, RS, Int>) -> f64 {
177 const MANTISSA_BITS: u32 = f64::MANTISSA_DIGITS - 1;
180 const EXPONENT_BITS: u32 = 64 - MANTISSA_BITS - 1;
181
182 let sign = decoded.frac.is_positive();
184 let (frac_abs, exp) =
185 if decoded.frac != Int::MIN {
188 (decoded.frac.wrapping_abs().mask_lsb(Decoded::<N, ES, RS, Int>::FRAC_WIDTH), decoded.exp)
189 } else {
190 (Int::ZERO, decoded.exp + Int::ONE)
191 };
192
193 let max_exponent: i64 = (1 << (EXPONENT_BITS - 1)) - 1;
205 let exponent =
206 if Int::BITS < EXPONENT_BITS || Posit::<N, ES, Int>::MAX_EXP < const_as(max_exponent) {
208 const_as::<Int, i64>(exp)
209 }
210 else {
212 if exp > const_as(max_exponent) {
214 return if sign {f64::INFINITY} else {f64::NEG_INFINITY}
215 }
216 else if exp <= const_as(-max_exponent) {
218 todo!("Subnormal numbers are _not_ currently supported when converting to/from IEEE floats")
219 }
220 else {
222 const_as::<Int, i64>(exp)
223 }
224 };
225
226 let shift_left = MANTISSA_BITS.saturating_sub(Decoded::<N, ES, RS, Int>::FRAC_WIDTH);
229 let shift_right = Decoded::<N, ES, RS, Int>::FRAC_WIDTH.saturating_sub(MANTISSA_BITS);
230 let mantissa = const_as::<Int, i64>(frac_abs >> shift_right) << shift_left;
231 let lost_bits = if shift_right == 0 {Int::ZERO} else {frac_abs << (Int::BITS - shift_right)};
237 let round = lost_bits < Int::ZERO;
238 let sticky = lost_bits << 1 != Int::ZERO;
239 let odd = mantissa & 1 == 1;
240 let round_up = round & (odd | sticky);
241
242 let mantissa = mantissa + i64::from(round_up);
245 let exponent = if round_up & (mantissa == 0) {exponent + 1} else {exponent};
246
247 let bits =
249 (u64::from(!sign) << (u64::BITS - 1))
250 | (((exponent + max_exponent) as u64) << MANTISSA_BITS)
251 | (mantissa as u64);
252 f64::from_bits(bits)
253}
254
255fn encode_finite_f32<
257 const N: u32,
258 const ES: u32,
259 Int: crate::Int,
260 const RS: u32,
261>(decoded: Decoded<N, ES, RS, Int>) -> f32 {
262 encode_finite_f64(decoded) as f32
264}
265
266impl<
267 const N: u32,
268 const ES: u32,
269 Int: crate::Int,
270 const RS: u32,
271> RoundFrom<Posit<N, ES, Int, RS>> for f32 {
272 fn round_from(value: Posit<N, ES, Int, RS>) -> Self {
282 if value == Posit::ZERO {
283 0.
284 } else if value == Posit::NAR {
285 f32::NAN
286 } else {
287 let decoded = unsafe { value.decode_regular() };
289 encode_finite_f32(decoded)
290 }
291 }
292}
293
294impl<
295 const N: u32,
296 const ES: u32,
297 Int: crate::Int,
298 const RS: u32,
299> RoundFrom<Posit<N, ES, Int, RS>> for f64 {
300 fn round_from(value: Posit<N, ES, Int, RS>) -> Self {
310 if value == Posit::ZERO {
311 0.
312 } else if value == Posit::NAR {
313 f64::NAN
314 } else {
315 let decoded = unsafe { value.decode_regular() };
317 encode_finite_f64(decoded)
318 }
319 }
320}
321
322#[cfg(test)]
323mod tests {
324 use super::*;
325 use malachite::rational::Rational;
326 use proptest::prelude::*;
327
328 mod float_to_posit {
329 use super::*;
330
331 macro_rules! make_tests {
333 ($float:ty, $posit:ty) => {
334 use super::*;
335
336 #[test]
337 fn zero() {
338 assert_eq!(<$posit>::round_from(0.0 as $float), <$posit>::ZERO)
339 }
340
341 #[test]
342 fn one() {
343 assert_eq!(<$posit>::round_from(1.0 as $float), <$posit>::ONE)
344 }
345
346 #[test]
347 fn minus_one() {
348 assert_eq!(<$posit>::round_from(-1.0 as $float), <$posit>::MINUS_ONE)
349 }
350
351 #[test]
352 fn nan() {
353 assert_eq!(<$posit>::round_from(<$float>::NAN), <$posit>::NAR)
354 }
355
356 #[test]
357 fn min() {
358 if const { <$posit>::MAX_EXP as i64 <= 127 } {
359 assert_eq!(<$posit>::round_from(<$float>::MIN), <$posit>::MIN)
360 }
361 }
362
363 #[test]
364 fn max() {
365 if const { <$posit>::MAX_EXP as i64 <= 127 } {
366 assert_eq!(<$posit>::round_from(<$float>::MAX), <$posit>::MAX)
367 }
368 }
369
370 #[test]
371 fn min_positive() {
372 if const { <$posit>::MAX_EXP as i64 <= 127 } {
373 assert_eq!(<$posit>::round_from(<$float>::MIN_POSITIVE), <$posit>::MIN_POSITIVE)
374 }
375 }
376
377 #[test]
378 fn max_negative() {
379 if const { <$posit>::MAX_EXP as i64 <= 127 } {
380 assert_eq!(<$posit>::round_from(-<$float>::MIN_POSITIVE), <$posit>::MAX_NEGATIVE)
381 }
382 }
383
384 #[test]
385 fn subnormal_positive() {
386 if const { <$posit>::MAX_EXP as i64 <= 127 } {
387 assert_eq!(<$posit>::round_from(<$float>::from_bits(1)), <$posit>::MIN_POSITIVE)
388 }
389 }
390
391 #[test]
392 fn subnormal_negative() {
393 if const { <$posit>::MAX_EXP as i64 <= 127 } {
394 assert_eq!(<$posit>::round_from(-<$float>::from_bits(1)), <$posit>::MAX_NEGATIVE)
395 }
396 }
397
398 proptest!{
399 #![proptest_config(ProptestConfig::with_cases(crate::PROPTEST_CASES))]
400 #[test]
401 fn proptest(float: $float) {
402 let posit = <$posit>::round_from(float);
403 match Rational::try_from(float) {
404 Ok(exact) => assert!(super::rational::is_correct_rounded(exact, posit)),
405 Err(_) => assert!(posit == <$posit>::NAR),
406 }
407 }
408 }
409 };
410 }
411
412 mod f64 {
413 use super::*;
414
415 mod p8 { make_tests!{f64, crate::p8} }
416 mod p16 { make_tests!{f64, crate::p16} }
417 mod p32 { make_tests!{f64, crate::p32} }
418 mod p64 { make_tests!{f64, crate::p64} }
419
420 mod posit_8_0 { make_tests!{f64, Posit::<8, 0, i8>} }
421 mod posit_10_0 { make_tests!{f64, Posit::<10, 0, i16>} }
422 mod posit_10_1 { make_tests!{f64, Posit::<10, 1, i16>} }
423 mod posit_10_2 { make_tests!{f64, Posit::<10, 2, i16>} }
424 mod posit_10_3 { make_tests!{f64, Posit::<10, 3, i16>} }
425 mod posit_20_4 { make_tests!{f64, Posit::<20, 4, i32>} }
426
427 mod posit_3_0 { make_tests!{f64, Posit::<3, 0, i8>} }
428 mod posit_4_0 { make_tests!{f64, Posit::<4, 0, i8>} }
429 mod posit_4_1 { make_tests!{f64, Posit::<4, 1, i8>} }
430
431 mod bposit_8_3_6 { make_tests!{f64, Posit::<8, 3, i8, 6>} }
432 mod bposit_16_5_6 { make_tests!{f64, Posit::<16, 5, i16, 6>} }
433 mod bposit_32_5_6 { make_tests!{f64, Posit::<32, 5, i32, 6>} }
434 mod bposit_64_5_6 { make_tests!{f64, Posit::<64, 5, i64, 6>} }
435 mod bposit_10_2_6 { make_tests!{f64, Posit::<10, 2, i16, 6>} }
436 mod bposit_10_2_7 { make_tests!{f64, Posit::<10, 2, i16, 7>} }
437 mod bposit_10_2_8 { make_tests!{f64, Posit::<10, 2, i16, 8>} }
438 mod bposit_10_2_9 { make_tests!{f64, Posit::<10, 2, i16, 9>} }
439 }
440
441 mod f32 {
442 use super::*;
443
444 mod p8 { make_tests!{f32, crate::p8} }
445 mod p16 { make_tests!{f32, crate::p16} }
446 mod p32 { make_tests!{f32, crate::p32} }
447 mod p64 { make_tests!{f32, crate::p64} }
448
449 mod posit_8_0 { make_tests!{f32, Posit::<8, 0, i8>} }
450 mod posit_10_0 { make_tests!{f32, Posit::<10, 0, i16>} }
451 mod posit_10_1 { make_tests!{f32, Posit::<10, 1, i16>} }
452 mod posit_10_2 { make_tests!{f32, Posit::<10, 2, i16>} }
453 mod posit_10_3 { make_tests!{f32, Posit::<10, 3, i16>} }
454 mod posit_20_4 { make_tests!{f32, Posit::<20, 4, i32>} }
455
456 mod posit_3_0 { make_tests!{f32, Posit::<3, 0, i8>} }
457 mod posit_4_0 { make_tests!{f32, Posit::<4, 0, i8>} }
458 mod posit_4_1 { make_tests!{f32, Posit::<4, 1, i8>} }
459
460 mod bposit_8_3_6 { make_tests!{f32, Posit::<8, 3, i8, 6>} }
461 mod bposit_16_5_6 { make_tests!{f32, Posit::<16, 5, i16, 6>} }
462 mod bposit_32_5_6 { make_tests!{f32, Posit::<32, 5, i32, 6>} }
463 mod bposit_64_5_6 { make_tests!{f32, Posit::<64, 5, i64, 6>} }
464 mod bposit_10_2_6 { make_tests!{f32, Posit::<10, 2, i16, 6>} }
465 mod bposit_10_2_7 { make_tests!{f32, Posit::<10, 2, i16, 7>} }
466 mod bposit_10_2_8 { make_tests!{f32, Posit::<10, 2, i16, 8>} }
467 mod bposit_10_2_9 { make_tests!{f32, Posit::<10, 2, i16, 9>} }
468 }
469 }
470
471 mod posit_to_float {
472 use super::*;
473
474 macro_rules! test_exhaustive {
479 ($float:ty, $posit:ty) => {
480 use super::*;
481
482 #[test]
483 fn posit_roundtrip_exhaustive() {
484 for posit in <$posit>::cases_exhaustive_all() {
485 let float = <$float>::round_from(posit);
486 let reposit = <$posit>::round_from(float);
487 assert_eq!(posit, reposit)
488 }
489 }
490
491 };
498 }
499
500 macro_rules! test_proptest {
502 ($float:ty, $posit:ty) => {
503 use super::*;
504
505 proptest!{
506 #![proptest_config(ProptestConfig::with_cases(crate::PROPTEST_CASES))]
507
508 #[test]
509 fn posit_roundtrip_proptest(posit in <$posit>::cases_proptest_all()) {
510 let float = <$float>::round_from(posit);
511 let reposit = <$posit>::round_from(float);
512 assert_eq!(posit, reposit)
513 }
514
515 }
522 };
523 }
524
525 mod f64 {
526 use super::*;
527
528 mod p8 { test_exhaustive!{f64, crate::p8} }
529 mod p16 { test_exhaustive!{f64, crate::p16} }
530 mod p32 { test_proptest!{f64, crate::p32} }
531 mod posit_8_0 { test_exhaustive!{f64, Posit::<8, 0, i8>} }
534 mod posit_10_0 { test_exhaustive!{f64, Posit::<10, 0, i16>} }
535 mod posit_10_1 { test_exhaustive!{f64, Posit::<10, 1, i16>} }
536 mod posit_10_2 { test_exhaustive!{f64, Posit::<10, 2, i16>} }
537 mod posit_10_3 { test_exhaustive!{f64, Posit::<10, 3, i16>} }
538 mod posit_20_4 { test_proptest!{f64, Posit::<20, 4, i32>} }
539
540 mod posit_3_0 { test_exhaustive!{f64, Posit::<3, 0, i8>} }
541 mod posit_4_0 { test_exhaustive!{f64, Posit::<4, 0, i8>} }
542 mod posit_4_1 { test_exhaustive!{f64, Posit::<4, 1, i8>} }
543
544 mod bposit_8_3_6 { test_exhaustive!{f64, Posit::<8, 3, i8, 6>} }
545 mod bposit_16_5_6 { test_exhaustive!{f64, Posit::<16, 5, i16, 6>} }
546 mod bposit_32_5_6 { test_proptest!{f64, Posit::<32, 5, i32, 6>} }
547 mod bposit_10_2_6 { test_exhaustive!{f64, Posit::<10, 2, i16, 6>} }
549 mod bposit_10_2_7 { test_exhaustive!{f64, Posit::<10, 2, i16, 7>} }
550 mod bposit_10_2_8 { test_exhaustive!{f64, Posit::<10, 2, i16, 8>} }
551 mod bposit_10_2_9 { test_exhaustive!{f64, Posit::<10, 2, i16, 9>} }
552 }
553
554 mod f32 {
555 use super::*;
556
557 mod p8 { test_exhaustive!{f32, crate::p8} }
558 mod p16 { test_exhaustive!{f32, crate::p16} }
559 mod posit_8_0 { test_exhaustive!{f32, Posit::<8, 0, i8>} }
563 mod posit_10_0 { test_exhaustive!{f32, Posit::<10, 0, i16>} }
564 mod posit_10_1 { test_exhaustive!{f32, Posit::<10, 1, i16>} }
565 mod posit_10_2 { test_exhaustive!{f32, Posit::<10, 2, i16>} }
566 mod posit_10_3 { test_exhaustive!{f32, Posit::<10, 3, i16>} }
567 mod posit_3_0 { test_exhaustive!{f32, Posit::<3, 0, i8>} }
570 mod posit_4_0 { test_exhaustive!{f32, Posit::<4, 0, i8>} }
571 mod posit_4_1 { test_exhaustive!{f32, Posit::<4, 1, i8>} }
572
573 mod bposit_8_3_6 { test_exhaustive!{f32, Posit::<8, 3, i8, 6>} }
574 mod bposit_10_2_6 { test_exhaustive!{f32, Posit::<10, 2, i16, 6>} }
578 mod bposit_10_2_7 { test_exhaustive!{f32, Posit::<10, 2, i16, 7>} }
579 mod bposit_10_2_8 { test_exhaustive!{f32, Posit::<10, 2, i16, 8>} }
580 mod bposit_10_2_9 { test_exhaustive!{f32, Posit::<10, 2, i16, 9>} }
581 }
582 }
583}