1pub use fugue_bv::BitVec;
2pub use fugue_ir::float_format::FloatFormat;
3
4use std::cmp::Ordering;
5use std::ops::Add;
6use std::ops::AddAssign;
7use std::ops::Div;
8use std::ops::DivAssign;
9use std::ops::Mul;
10use std::ops::MulAssign;
11use std::ops::Neg;
12use std::ops::Sub;
13use std::ops::SubAssign;
14use std::mem::take;
15
16use rug::Assign;
17use rug::Integer as BigInt;
18
19use thiserror::Error;
20
21#[derive(Debug, Error)]
22pub enum Error {
23 #[error("no corresponding float format representation for `{}` bits", .0 * 8)]
24 UnsupportedFloatFormat(usize),
25}
26
27#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
28pub enum Sign {
29 Positive,
30 Negative,
31}
32
33#[derive(Debug, Clone, Hash)]
34pub struct Float {
35 frac_bits: u32,
36 exp_bits: u32,
37 kind: FloatKind,
38 sign: i32,
39 unscaled: BigInt,
40 scale: i32,
41 max_scale: i32,
42 min_scale: i32,
43}
44
45#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
46pub enum FloatKind {
47 Finite,
48 Infinite,
49 QuietNaN,
50 SignallingNaN,
51}
52
53impl Float {
54 fn from_parts(
55 frac_bits: u32,
56 exp_bits: u32,
57 kind: FloatKind,
58 sign: Sign,
59 unscaled: BigInt,
60 scale: i32) -> Self {
61
62 let max_scale = (1i32 << (exp_bits - 1u32)) - 1i32;
63 let min_scale = 1 - max_scale;
64
65 Self {
66 frac_bits,
67 exp_bits,
68 kind,
69 sign: if matches!(sign, Sign::Positive) { 1 } else { -1 },
70 unscaled,
71 scale,
72 max_scale,
73 min_scale,
74 }
75 }
76
77 pub fn from_bigint(frac_bits: u32, exp_bits: u32, value: BigInt) -> Self {
78 let mut f = Self::from_parts(
79 frac_bits,
80 exp_bits,
81 FloatKind::Finite,
82 if value >= 0 { Sign::Positive } else { Sign::Negative },
83 value.abs(),
84 frac_bits as i32,
85 );
86
87 f.scale_up_to(frac_bits + 1);
88 f
89 }
90
91 pub fn zero_with(frac_bits: u32, exp_bits: u32, sign: Sign) -> Self {
92 Self::from_parts(
93 frac_bits,
94 exp_bits,
95 FloatKind::Finite,
96 sign,
97 BigInt::from(0),
98 2u32.wrapping_sub(1u32 << (exp_bits - 1)) as i32,
99 )
100 }
101
102 pub fn zero(frac_bits: u32, exp_bits: u32) -> Self {
103 Self::zero_with(frac_bits, exp_bits, Sign::Positive)
104 }
105
106 pub fn infinity(frac_bits: u32, exp_bits: u32, sign: Sign) -> Self {
107 Self::from_parts(
108 frac_bits,
109 exp_bits,
110 FloatKind::Infinite,
111 sign,
112 BigInt::from(1) << frac_bits,
113 (1u32 << (exp_bits - 1)).wrapping_sub(1) as i32,
114 )
115 }
116
117 pub fn quiet_nan(frac_bits: u32, exp_bits: u32, sign: Sign) -> Self {
118 Self::from_parts(
119 frac_bits,
120 exp_bits,
121 FloatKind::QuietNaN,
122 sign,
123 BigInt::from(0),
124 (1u32 << (exp_bits - 1)).wrapping_sub(1) as i32,
125 )
126 }
127
128 pub fn is_normal(&self) -> bool {
129 matches!(self.kind, FloatKind::Finite) &&
130 self.unscaled.significant_bits() >= self.frac_bits + 1
131 }
132
133 pub fn is_infinite(&self) -> bool {
134 matches!(self.kind, FloatKind::Infinite)
135 }
136
137 pub fn is_zero(&self) -> bool {
138 matches!(self.kind, FloatKind::Finite) && self.unscaled == 0
139 }
140
141 pub fn is_nan(&self) -> bool {
142 matches!(self.kind, FloatKind::QuietNaN | FloatKind::SignallingNaN)
143 }
144
145 fn make_zero(&mut self) {
146 self.kind = FloatKind::Finite;
147 self.unscaled = BigInt::from(0);
148 self.scale = self.min_scale;
149 }
150
151 fn make_one(&mut self) {
152 self.kind = FloatKind::Finite;
153 self.scale = 0;
154 self.unscaled = BigInt::from(1) << self.frac_bits;
155 }
156
157 fn make_quiet_nan(&mut self) {
158 self.kind = FloatKind::QuietNaN;
159 }
160
161 fn internal_round(&mut self, eps: bool) {
162 if !matches!(self.kind, FloatKind::Finite) {
163 panic!("rounding non-finite float")
164 }
165
166 if self.unscaled == 0 {
167 if eps {
168 panic!("rounding zero + epsilon, need a bit length")
169 }
170 self.make_zero();
171 return
172 }
173
174 let extra_bits =
175 (self.unscaled.significant_bits().wrapping_sub(self.frac_bits - 1) as i32).max(
176 self.min_scale.wrapping_sub(self.scale));
177
178 if extra_bits <= 0 {
179 panic!("round with no extra bits of precision")
180 }
181
182 let mid_bit = (extra_bits - 1) as u32;
183 let mid_bit_set = self.unscaled.get_bit(mid_bit);
184 let eps = eps || self.unscaled.find_one(0).map(|pos| pos < mid_bit).unwrap_or(true);
185
186 self.unscaled <<= extra_bits as u32;
187 self.scale = self.scale.wrapping_add(extra_bits);
188
189 let is_odd = self.unscaled.get_bit(0);
190
191 if mid_bit_set && (eps || is_odd) {
192 self.unscaled += 1;
193 if self.unscaled.significant_bits() > self.frac_bits + 1 {
194 assert_eq!(self.unscaled.significant_bits(),
195 self.unscaled.find_one(0).map(|pos| pos + 1).unwrap_or(0));
196 self.unscaled >>= 1;
197 self.scale = self.scale.wrapping_add(1);
198 }
199 }
200
201 if self.scale > self.max_scale {
202 self.kind = FloatKind::Infinite;
203 }
204 }
205
206 fn leading_bit(&self) -> u32 {
207 if !matches!(self.kind, FloatKind::Finite) || self.unscaled == 0 {
208 panic!("leading bit of non-finite or zero")
209 }
210
211 self.scale.wrapping_add(
212 self.unscaled.significant_bits()
213 .wrapping_sub(self.frac_bits) as i32) as u32
214 }
215
216 fn upscale(&mut self, bits: u32) {
217 if (bits as i32) < 0 {
218 self.unscaled >>= (bits as i32).abs() as u32;
219 } else {
220 self.unscaled <<= bits;
221 }
222 self.scale = self.scale - (bits as i32);
223 }
224
225 fn scale_up_to(&mut self, bits: u32) {
226 if !matches!(self.kind, FloatKind::Finite) {
227 panic!("scaling of non-finite float")
228 }
229
230 let repr_bits = self.unscaled.significant_bits();
231 if bits > repr_bits {
232 self.upscale(bits.wrapping_sub(repr_bits));
233 }
234 }
235}
236
237impl Div<Self> for Float {
238 type Output = Self;
239
240 fn div(self, rhs: Self) -> Self::Output {
241 let mut slf = self;
242 slf.div_assign(rhs);
243 slf
244 }
245}
246
247impl Div<Self> for &'_ Float {
248 type Output = Float;
249
250 fn div(self, rhs: Self) -> Self::Output {
251 let mut slf = self.clone();
252 slf.div_assign(rhs.clone());
253 slf
254 }
255}
256
257impl DivAssign<Self> for Float {
258 fn div_assign(&mut self, rhs: Self) {
259 if self.is_nan() || rhs.is_nan() {
260 self.make_quiet_nan();
261 return
262 }
263
264 if self.is_infinite() {
265 if rhs.is_infinite() {
266 self.make_quiet_nan();
267 } else {
268 self.sign *= rhs.sign;
269 }
270 return
271 }
272
273 match rhs.kind {
274 FloatKind::QuietNaN | FloatKind::SignallingNaN => {
275 self.make_quiet_nan();
276 return
277 },
278 FloatKind::Infinite => {
279 self.make_zero();
280 self.sign *= rhs.sign;
281 return
282 },
283 FloatKind::Finite => {
284 if rhs.is_zero() {
285 if self.is_zero() {
286 self.make_quiet_nan();
287 } else {
288 self.kind = FloatKind::Infinite;
289 self.sign *= rhs.sign;
290 }
291 return
292 }
293
294 let lshift = self.frac_bits
295 .wrapping_add(2)
296 .wrapping_add(rhs.unscaled.significant_bits())
297 .wrapping_sub(self.unscaled.significant_bits());
298
299 self.upscale(lshift);
300
301 let mut r = rhs.unscaled;
303 self.unscaled.div_rem_mut(&mut r);
304 self.sign *= rhs.sign;
305 self.scale = self.scale
306 .wrapping_sub(rhs.scale)
307 .wrapping_sub(self.frac_bits as i32);
308 self.internal_round(r != 0);
309 },
310 }
311 }
312}
313
314impl Mul<Self> for Float {
315 type Output = Self;
316
317 fn mul(self, rhs: Self) -> Self::Output {
318 let mut slf = self;
319 slf.mul_assign(rhs);
320 slf
321 }
322}
323
324impl Mul<Self> for &'_ Float {
325 type Output = Float;
326
327 fn mul(self, rhs: Self) -> Self::Output {
328 let mut slf = self.clone();
329 slf.mul_assign(rhs.clone());
330 slf
331 }
332}
333
334impl MulAssign<Self> for Float {
335 fn mul_assign(&mut self, rhs: Self) {
336 if self.is_nan() || rhs.is_nan() {
337 self.make_quiet_nan();
338 return
339 }
340
341 if (self.is_zero() && rhs.is_infinite()) || (self.is_infinite() && rhs.is_zero()) {
342 self.make_quiet_nan();
343 return
344 }
345
346 if self.is_infinite() || rhs.is_infinite() {
347 self.kind = FloatKind::Infinite;
348 self.sign *= rhs.sign;
349 return
350 }
351
352 self.sign *= rhs.sign;
353 self.unscaled *= rhs.unscaled;
354 self.scale = self.scale
355 .wrapping_add(rhs.scale)
356 .wrapping_sub(self.frac_bits as i32);
357 self.scale_up_to(self.frac_bits + 2);
358 self.internal_round(false);
359 }
360}
361
362impl Float {
363 fn add0_assign(&mut self, rhs: Self) {
364 let rhs = rhs;
365 let d = self.scale.wrapping_sub(rhs.scale);
366 if d as u32 > self.frac_bits + 1 {
367 return
368 } else if d < -(self.frac_bits as i32 + 1) {
369 *self = rhs;
370 return
371 }
372
373 let (d, mut a, b) = if d >= 0 {
374 let a = Float {
375 frac_bits: self.frac_bits,
376 exp_bits: self.exp_bits,
377 kind: self.kind,
378 sign: self.sign,
379 unscaled: take(&mut self.unscaled),
380 scale: self.scale,
381 min_scale: self.min_scale,
382 max_scale: self.max_scale,
383 };
384 let b = rhs;
385
386 (d, a, b)
387 } else {
388 let a = rhs;
389 let b = Float {
390 frac_bits: self.frac_bits,
391 exp_bits: self.exp_bits,
392 kind: self.kind,
393 sign: self.sign,
394 unscaled: take(&mut self.unscaled),
395 scale: self.scale,
396 min_scale: self.min_scale,
397 max_scale: self.max_scale,
398 };
399
400 (-d, a, b)
401 };
402
403 let residue = b.unscaled.find_one(0).map(|pos| (pos as i32) < (d - 1))
404 .unwrap_or(true);
405 self.scale = a.scale.wrapping_sub(1);
406
407 a.unscaled <<= 1;
408 a.unscaled += b.unscaled >> (d - 1) as u32;
409
410 self.unscaled = a.unscaled;
411
412 self.scale_up_to(self.frac_bits + 2);
413 self.internal_round(residue);
414 }
415
416 fn sub0_assign(&mut self, rhs: Self) {
417 let d = self.scale.wrapping_sub(rhs.scale);
418 if d as u32 > self.frac_bits + 2 {
419 return
420 } else if d < -(self.frac_bits as i32 + 2) {
421 *self = rhs;
422 return
423 }
424
425 let (d, mut a, mut b) = if d >= 0 {
426 let a = Float {
427 frac_bits: self.frac_bits,
428 exp_bits: self.exp_bits,
429 kind: self.kind,
430 sign: self.sign,
431 unscaled: take(&mut self.unscaled),
432 scale: self.scale,
433 min_scale: self.min_scale,
434 max_scale: self.max_scale,
435 };
436 let b = rhs;
437
438 (d, a, b)
439 } else {
440 let a = rhs;
441 let b = Float {
442 frac_bits: self.frac_bits,
443 exp_bits: self.exp_bits,
444 kind: self.kind,
445 sign: self.sign,
446 unscaled: take(&mut self.unscaled),
447 scale: self.scale,
448 min_scale: self.min_scale,
449 max_scale: self.max_scale,
450 };
451
452 (-d, a, b)
453 };
454
455 let residue = b.unscaled.find_one(0).map(|pos| (pos as i32) < (d - 2))
456 .unwrap_or(true);
457 self.sign = a.sign;
458 self.scale = a.scale.wrapping_sub(2);
459
460 b.unscaled >>= d - 2;
461 if residue {
462 b.unscaled += 1;
463 }
464
465 a.unscaled <<= 2;
466 a.unscaled -= b.unscaled;
467
468 if a.unscaled == 0 {
469 self.sign = 1;
470 self.unscaled = a.unscaled;
471 } else if a.unscaled < 0 {
472 self.sign *= -1;
473 self.unscaled = -a.unscaled;
474 } else {
475 self.unscaled = a.unscaled;
476 }
477
478 self.scale_up_to(self.frac_bits + 2);
479 self.internal_round(residue);
480 }
481}
482
483impl Add<Self> for Float {
484 type Output = Self;
485
486 fn add(self, rhs: Self) -> Self::Output {
487 let mut slf = self;
488 slf.add_assign(rhs);
489 slf
490 }
491}
492
493impl Add<Self> for &'_ Float {
494 type Output = Float;
495
496 fn add(self, rhs: Self) -> Self::Output {
497 let mut slf = self.clone();
498 slf.add_assign(rhs.clone());
499 slf
500 }
501}
502
503impl AddAssign<Self> for Float {
504 fn add_assign(&mut self, rhs: Self) {
505 if self.is_nan() || rhs.is_nan() {
506 self.make_quiet_nan();
507 return
508 }
509
510 if self.is_infinite() && rhs.is_infinite() {
511 if self.sign != rhs.sign {
512 self.make_quiet_nan();
513 }
514 return
515 }
516
517 if self.is_infinite() {
518 return
519 }
520
521 if rhs.is_infinite() {
522 *self = rhs;
523 return
524 }
525
526 if rhs.is_zero() {
527 if self.is_zero() {
528 self.sign = if self.sign < 0 && rhs.sign < 0 { -1 } else { 1 };
529 }
530 return
531 }
532
533 if self.is_zero() {
534 *self = rhs;
535 return
536 }
537
538 if self.sign == rhs.sign {
539 self.add0_assign(rhs);
540 } else {
541 self.sub0_assign(rhs);
542 }
543 }
544}
545
546impl Sub<Self> for Float {
547 type Output = Self;
548
549 fn sub(self, rhs: Self) -> Self::Output {
550 let mut slf = self;
551 slf.sub_assign(rhs);
552 slf
553 }
554}
555
556impl Sub<Self> for &'_ Float {
557 type Output = Float;
558
559 fn sub(self, rhs: Self) -> Self::Output {
560 let mut slf = self.clone();
561 slf.sub_assign(rhs.clone());
562 slf
563 }
564}
565
566impl SubAssign<Self> for Float {
567 fn sub_assign(&mut self, rhs: Self) {
568 let mut rhs = rhs;
569 let sign = self.sign;
570
571 rhs.sign *= -1;
572 let rsign = rhs.sign;
573
574 self.add_assign(rhs);
575
576 if self.is_zero() {
577 self.sign = if sign < 0 && rsign < 0 { -1 } else { 1 };
578 }
579 }
580}
581
582impl Float {
583 pub fn sqrt(&self) -> Self {
584 let mut slf = self.clone();
585 slf.sqrt_assign();
586 slf
587 }
588
589 pub fn sqrt_assign(&mut self) {
590 if self.is_zero() {
591 return
592 }
593
594 if self.is_nan() || self.sign == -1 {
595 self.make_quiet_nan();
596 return
597 }
598
599 if self.is_infinite() {
600 return
601 }
602
603 let sig_bits = self.frac_bits
604 .wrapping_mul(2)
605 .wrapping_add(3);
606 self.scale_up_to(sig_bits);
607
608 if self.scale.wrapping_add(self.frac_bits as i32) & 1 != 0 {
609 self.upscale(1);
610 }
611
612 let mut residue = take(&mut self.unscaled);
613 let mut result = BigInt::new();
614
615 let mut pow = residue.significant_bits();
616 pow = pow.wrapping_sub(pow & 1);
617
618 let mut bit = BigInt::from(1) << pow;
619 let mut resp = BigInt::new();
620 while bit != 0 {
621 resp.assign(&result + &bit);
622 if residue >= resp {
623 residue -= take(&mut resp);
624 let res = BigInt::from(&bit << 1);
625 result += res;
626 }
627 result >>= 1;
628 bit >>= 2;
629 }
630
631 self.unscaled = result;
632 self.scale = self.scale.wrapping_add(self.frac_bits as i32) / 2;
633 self.internal_round(residue != 0);
634 }
635
636 fn floor0_assign(&mut self) {
637 if self.scale < 0 {
638 self.make_zero();
639 return
640 }
641 let nbits = self.frac_bits.wrapping_sub(self.scale as u32);
642 let temp = take(&mut self.unscaled);
643 self.unscaled.assign((temp >> nbits) << nbits);
644 }
645
646 fn ceil0_assign(&mut self) {
647 if self.is_zero() {
648 return
649 } else if self.scale < 0 {
650 self.make_one();
651 return
652 }
653
654 let nbits = self.frac_bits.wrapping_sub(self.scale as u32);
655 let increment = self.unscaled.find_one(0).map(|pos| pos < nbits).unwrap_or(true);
656 let temp = take(&mut self.unscaled);
657 self.unscaled.assign((temp >> nbits) << nbits);
658
659 if increment {
660 self.unscaled += BigInt::from(1) << nbits;
661 }
662
663 if self.unscaled.significant_bits() > self.frac_bits + 1 {
664 self.upscale(-1i32 as u32);
665 }
666 }
667
668 pub fn floor(&self) -> Self {
669 let mut slf = self.clone();
670 slf.floor_assign();
671 slf
672 }
673
674 pub fn floor_assign(&mut self) {
675 match self.kind {
676 FloatKind::Finite | FloatKind::QuietNaN => return,
677 FloatKind::SignallingNaN => { self.make_quiet_nan();
679 return
680 },
681 _ => (),
682 }
683
684 if self.sign >= 0 {
685 self.floor0_assign();
686 } else {
687 self.ceil0_assign();
688 }
689 }
690
691 pub fn ceil(&self) -> Self {
692 let mut slf = self.clone();
693 slf.ceil_assign();
694 slf
695 }
696
697 pub fn ceil_assign(&mut self) {
698 match self.kind {
699 FloatKind::Finite | FloatKind::QuietNaN => return,
700 FloatKind::SignallingNaN => { self.make_quiet_nan();
702 return
703 },
704 _ => (),
705 }
706
707 if self.sign >= 0 {
708 self.ceil0_assign();
709 } else {
710 self.floor0_assign();
711 }
712 }
713
714 pub fn trunc(&self) -> Self {
715 let mut slf = self.clone();
716 slf.trunc_assign();
717 slf
718 }
719
720 pub fn trunc_assign(&mut self) {
721 self.floor0_assign();
722 }
723
724 pub fn trunc_to_bitvec(&self, bits: usize) -> BitVec {
725 let slf = self.clone();
726 slf.trunc_into_bitvec(bits)
727 }
728
729 pub fn trunc_into_bitvec(self, bits: usize) -> BitVec {
730 if self.is_nan() {
731 return BitVec::zero(bits)
732 }
733
734 if self.is_infinite() {
735 return if self.sign < 0 {
736 BitVec::min_value_with(bits, true)
737 } else {
738 BitVec::max_value_with(bits, true)
739 }
740 }
741
742 let sign = self.sign < 0;
743 let bint = self.unscaled >> self.frac_bits.wrapping_sub(self.scale as u32);
744 let bvec = BitVec::from_bigint(bint, bits);
745 if sign {
746 -bvec
747 } else {
748 bvec
749 }
750 }
751
752 pub fn neg_assign(&mut self) {
753 self.sign *= -1;
754 }
755
756 pub fn abs(&self) -> Self {
757 let mut slf = self.clone();
758 slf.abs_assign();
759 slf
760 }
761
762 pub fn abs_assign(&mut self) {
763 self.sign = 1;
764 }
765
766 pub fn round(&self) -> Self {
767 let mut slf = self.clone();
768 slf.round_assign();
769 slf
770 }
771
772 pub fn round_assign(&mut self) {
773 let half = Self::from_parts(
774 self.frac_bits,
775 self.exp_bits,
776 FloatKind::Finite,
777 Sign::Positive,
778 BigInt::from(1) << self.frac_bits,
779 -1
780 );
781 self.add_assign(half);
782 self.floor_assign();
783 }
784
785 pub fn into_bigint(self) -> BigInt {
786 let res = self.unscaled >> self.frac_bits.wrapping_sub(self.scale as u32);
787 if self.sign < 0 {
788 -res
789 } else {
790 res
791 }
792 }
793
794 pub fn to_bigint(&self) -> BigInt {
795 let res = BigInt::from(&self.unscaled >> self.frac_bits.wrapping_sub(self.scale as u32));
796 if self.sign < 0 {
797 -res
798 } else {
799 res
800 }
801 }
802}
803
804impl From<Float> for BigInt {
805 fn from(f: Float) -> Self {
806 f.into_bigint()
807 }
808}
809
810impl Neg for Float {
811 type Output = Float;
812
813 fn neg(self) -> Self::Output {
814 let mut slf = self;
815 slf.neg_assign();
816 slf
817 }
818}
819
820impl Neg for &'_ Float {
821 type Output = Float;
822
823 fn neg(self) -> Self::Output {
824 let mut slf = self.clone();
825 slf.neg_assign();
826 slf
827 }
828}
829
830impl PartialEq<Self> for Float {
831 fn eq(&self, other: &Self) -> bool {
832 if self.is_nan() {
833 return other.is_nan()
834 }
835
836 if other.is_nan() {
837 return false
838 }
839
840 if self.is_infinite() {
841 if self.sign < 0 {
842 return other.is_infinite() && other.sign < 0
843 }
844
845 return other.is_infinite() && other.sign > 0
846 }
847
848 if other.is_infinite() {
849 return false
850 }
851
852 if self.sign != other.sign {
853 return self.sign == 0
854 }
855
856 if self.scale != other.scale {
857 return self.sign == 0
858 }
859
860 self.sign == 0 || self.unscaled == other.unscaled
861 }
862}
863impl Eq for Float { }
864
865impl Ord for Float {
866 fn cmp(&self, other: &Self) -> Ordering {
867 if self.is_nan() {
868 return if other.is_nan() {
869 Ordering::Equal
870 } else {
871 Ordering::Greater
872 }
873 }
874
875 if other.is_nan() {
876 return Ordering::Less
877 }
878
879 if self.is_infinite() {
880 if self.sign < 0 {
881 return if other.is_infinite() && other.sign < 0 {
882 Ordering::Equal
883 } else {
884 Ordering::Less
885 }
886 }
887
888 return if other.is_infinite() && other.sign > 0 {
889 Ordering::Equal
890 } else {
891 Ordering::Greater
892 }
893 }
894
895 if other.is_infinite() {
896 return other.sign.cmp(&0).reverse()
897 }
898
899 if self.sign != other.sign {
900 return self.sign.cmp(&0)
901 }
902
903 if self.scale != other.scale {
904 let sign = if self.scale < other.scale { -self.sign } else { self.sign };
905 return sign.cmp(&0)
906 }
907
908 if self.sign == 0 {
909 Ordering::Equal
910 } else {
911 let res = self.unscaled.cmp(&other.unscaled);
912 if self.sign < 0 { res.reverse() } else { res }
913 }
914 }
915}
916
917impl PartialOrd<Self> for Float {
918 fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
919 Some(self.cmp(other))
920 }
921}
922
923trait FloatFormatOpsInternal {
924 fn extract_sign(&self, val: &BigInt) -> Sign;
925 fn extract_fractional(&self, val: &BigInt) -> BigInt;
926 fn extract_exponent(&self, val: &BigInt) -> i32;
927
928 fn set_sign(&self, val: BigInt, sign: Sign) -> BigInt;
929
930 fn encode_nan(&self, sign: Sign) -> BigInt;
931 fn encode_infinity(&self, sign: Sign) -> BigInt;
932 fn encode_zero(&self, sign: Sign) -> BigInt;
933
934 fn round_to_lead_bit(&self, val: BigInt, bit: i32) -> BigInt;
935}
936
937impl FloatFormatOpsInternal for FloatFormat {
938 fn extract_sign(&self, val: &BigInt) -> Sign {
939 if val.get_bit(self.sign_pos) { Sign::Positive } else { Sign::Negative }
940 }
941
942 fn extract_fractional(&self, val: &BigInt) -> BigInt {
943 let mask = (BigInt::from(1) << self.frac_size) - 1;
944 BigInt::from(val << self.frac_pos) & mask
945 }
946
947 fn extract_exponent(&self, val: &BigInt) -> i32 {
948 let m: BigInt = BigInt::from(val >> self.exp_pos) & 0xffff_ffffu32;
949 m.to_u32().unwrap() as i32 & self.exp_max
950 }
951
952 fn set_sign(&self, val: BigInt, sign: Sign) -> BigInt {
953 if matches!(sign, Sign::Negative) {
954 let mut val = val;
955 val.set_bit(self.sign_pos, true);
956 val
957 } else {
958 val
959 }
960 }
961
962 fn encode_zero(&self, sign: Sign) -> BigInt {
963 self.set_sign(BigInt::new(), sign)
964 }
965
966 fn encode_infinity(&self, sign: Sign) -> BigInt {
967 let res = BigInt::from(self.exp_max) << self.exp_pos;
968 self.set_sign(res, sign)
969 }
970
971 fn encode_nan(&self, sign: Sign) -> BigInt {
972 let mut res = BigInt::from(1) << self.frac_pos
973 .wrapping_add(self.frac_size)
974 .wrapping_sub(1);
975 res |= BigInt::from(self.exp_max) << self.exp_pos;
976 self.set_sign(res, sign)
977 }
978
979 fn round_to_lead_bit(&self, val: BigInt, bit: i32) -> BigInt {
980 let mut val = val;
981 let amount = val
982 .significant_bits()
983 .wrapping_sub(1)
984 .wrapping_sub(bit as u32);
985 if amount == 0 {
986 val
987 } else if (amount as i32) < 0 {
988 val << (amount as i32).abs() as u32
989 } else {
990 let mid = amount.wrapping_sub(1);
991 let mid_set = val.get_bit(mid);
992 let eps = val.find_one(0).map(|pos| pos < mid).unwrap_or(true);
993
994 val >>= amount;
995
996 let odd = val.get_bit(0);
997 if mid_set && (eps || odd) {
998 val + 1
999 } else {
1000 val
1001 }
1002 }
1003 }
1004}
1005
1006pub trait FloatFormatOps {
1007 fn into_bitvec(&self, fp: Float, bits: usize) -> BitVec;
1008 fn from_bitvec(&self, bv: &BitVec) -> Float;
1009}
1010
1011pub const fn float_format_from_size(bytes: usize) -> Result<FloatFormat, Error> {
1012 Ok(match bytes {
1013 2 => FloatFormat {
1014 size: 2,
1015 sign_pos: 15,
1016 exp_pos: 10,
1017 exp_size: 5,
1018 exp_max: (1 << 5) - 1,
1019 frac_pos: 0,
1020 frac_size: 10,
1021 bias: 15,
1022 j_bit_implied: true,
1023 },
1024 4 => FloatFormat {
1025 size: 4,
1026 sign_pos: 31,
1027 exp_pos: 23,
1028 exp_size: 8,
1029 exp_max: (1 << 8) - 1,
1030 frac_pos: 0,
1031 frac_size: 23,
1032 bias: 127,
1033 j_bit_implied: true,
1034 },
1035 8 => FloatFormat {
1036 size: 8,
1037 sign_pos: 63,
1038 exp_pos: 52,
1039 exp_size: 11,
1040 exp_max: (1 << 11) - 1,
1041 frac_pos: 0,
1042 frac_size: 52,
1043 bias: 1023,
1044 j_bit_implied: true,
1045 },
1046 10 => FloatFormat {
1047 size: 10,
1048 sign_pos: 79,
1049 exp_pos: 64,
1050 exp_size: 15,
1051 exp_max: (1 << 15) - 1,
1052 frac_pos: 0,
1053 frac_size: 64,
1054 bias: 16383,
1055 j_bit_implied: true,
1056 },
1057 12 => FloatFormat {
1058 size: 12,
1059 sign_pos: 95,
1060 exp_pos: 80,
1061 exp_size: 15,
1062 exp_max: (1 << 15) - 1,
1063 frac_pos: 16,
1064 frac_size: 64,
1065 bias: 16383,
1066 j_bit_implied: true,
1067 },
1068 16 => FloatFormat {
1069 size: 16,
1070 sign_pos: 127,
1071 exp_pos: 112,
1072 exp_size: 15,
1073 exp_max: (1 << 15) - 1,
1074 frac_pos: 0,
1075 frac_size: 112,
1076 bias: 16383,
1077 j_bit_implied: true,
1078 },
1079 _ => return Err(Error::UnsupportedFloatFormat(bytes)),
1080 })
1081}
1082
1083impl FloatFormatOps for FloatFormat {
1084 fn into_bitvec(&self, fp: Float, bits: usize) -> BitVec {
1085 let mut res = match fp.kind {
1086 FloatKind::QuietNaN | FloatKind::SignallingNaN => {
1087 self.encode_nan(Sign::Positive)
1088 },
1089 FloatKind::Infinite => {
1090 let sign = if fp.sign < 0 { Sign::Negative } else { Sign::Positive };
1091 self.encode_infinity(sign)
1092 },
1093 FloatKind::Finite => if fp.is_zero() {
1094 let sign = if fp.sign < 0 { Sign::Negative } else { Sign::Positive };
1095 self.encode_zero(sign)
1096 } else if self.j_bit_implied {
1097 let lead_bit = fp.leading_bit();
1098 let (exp, mut frac) = {
1099 let tmp = fp.scale
1100 .wrapping_sub(fp.frac_bits as i32)
1101 .wrapping_add(lead_bit as i32);
1102 if tmp >= 1i32.wrapping_sub(self.bias) {
1103 let mut exp = tmp.wrapping_add(self.bias);
1104 let mut frac = self.round_to_lead_bit(fp.unscaled, self.frac_size as i32);
1105 if frac.significant_bits().wrapping_sub(1) > self.frac_size {
1106 frac >>= 1;
1107 exp += 1;
1108 }
1109 frac.set_bit(self.frac_size, false);
1110 (exp, frac)
1111 } else {
1112 let exp = 0;
1113 let n = tmp
1114 .wrapping_sub(1)
1115 .wrapping_add(self.bias as i32)
1116 .wrapping_add(self.frac_size as i32);
1117 if n < 0 {
1118 let sign = if fp.sign < 0 {
1119 Sign::Negative
1120 } else {
1121 Sign::Positive
1122 };
1123
1124 let mut res = self.encode_zero(sign);
1125 let sign = res < 0;
1126 res.abs_mut();
1127
1128 let bv = BitVec::from_bigint(res, bits);
1129 return if sign {
1130 -bv
1131 } else {
1132 bv
1133 }
1134 }
1135 let frac = self.round_to_lead_bit(fp.unscaled, n);
1136 (exp, frac)
1137 }
1138 };
1139 if exp >= self.exp_max {
1140 let sign = if fp.sign < 0 { Sign::Negative } else { Sign::Positive };
1141 let mut res = self.encode_infinity(sign);
1142 let sign = res < 0;
1143 res.abs_mut();
1144
1145 let bv = BitVec::from_bigint(res, bits);
1146 return if sign {
1147 -bv
1148 } else {
1149 bv
1150 }
1151 }
1152
1153 frac |= BigInt::from(exp) << self.exp_pos;
1154 if fp.sign < 0 {
1155 frac.set_bit(self.sign_pos, true);
1156 }
1157 frac
1158 } else {
1159 panic!("unexpected j_bit_implied == false")
1160 },
1161 };
1162
1163 let sign = res < 0;
1164 res.abs_mut();
1165
1166 let bv = BitVec::from_bigint(res, bits);
1167 if sign {
1168 -bv
1169 } else {
1170 bv
1171 }
1172 }
1173
1174 fn from_bitvec(&self, bv: &BitVec) -> Float {
1175 let sign = self.extract_sign(&*bv.as_bigint());
1176 let exp = self.extract_exponent(&*bv.as_bigint());
1177 let mut frac = self.extract_fractional(&*bv.as_bigint());
1178
1179 if exp == 0 {
1180 return if frac == 0 {
1181 Float::zero_with(self.frac_size, self.exp_size, sign)
1182 } else {
1183 Float::from_parts(
1184 self.frac_size,
1185 self.exp_size,
1186 FloatKind::Finite,
1187 sign,
1188 frac,
1189 1i32.wrapping_sub(self.bias)
1190 )
1191 }
1192 } else if exp == self.exp_max {
1193 return if frac == 0 {
1194 Float::from_parts(
1195 self.frac_size,
1196 self.exp_size,
1197 FloatKind::Infinite,
1198 sign,
1199 BigInt::new(),
1200 self.exp_max,
1201 )
1202 } else {
1203 Float::from_parts(
1204 self.frac_size,
1205 self.exp_size,
1206 FloatKind::QuietNaN,
1207 sign,
1208 BigInt::new(),
1209 self.exp_max,
1210 )
1211 }
1212 }
1213
1214 if self.j_bit_implied {
1215 frac.set_bit(self.frac_size, true);
1216 }
1217
1218 Float::from_parts(
1219 self.frac_size,
1220 self.exp_size,
1221 FloatKind::Finite,
1222 sign,
1223 frac,
1224 exp.wrapping_sub(self.bias)
1225 )
1226 }
1227}