fugue_fp/
lib.rs

1pub use fugue_bv::BitVec;
2pub use fugue_ir::float_format::FloatFormat;
3
4use std::cmp::Ordering;
5use std::ops::Add;
6use std::ops::AddAssign;
7use std::ops::Div;
8use std::ops::DivAssign;
9use std::ops::Mul;
10use std::ops::MulAssign;
11use std::ops::Neg;
12use std::ops::Sub;
13use std::ops::SubAssign;
14use std::mem::take;
15
16use rug::Assign;
17use rug::Integer as BigInt;
18
19use thiserror::Error;
20
21#[derive(Debug, Error)]
22pub enum Error {
23    #[error("no corresponding float format representation for `{}` bits", .0 * 8)]
24    UnsupportedFloatFormat(usize),
25}
26
27#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
28pub enum Sign {
29    Positive,
30    Negative,
31}
32
33#[derive(Debug, Clone, Hash)]
34pub struct Float {
35    frac_bits: u32,
36    exp_bits: u32,
37    kind: FloatKind,
38    sign: i32,
39    unscaled: BigInt,
40    scale: i32,
41    max_scale: i32,
42    min_scale: i32,
43}
44
45#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
46pub enum FloatKind {
47    Finite,
48    Infinite,
49    QuietNaN,
50    SignallingNaN,
51}
52
53impl Float {
54    fn from_parts(
55        frac_bits: u32,
56        exp_bits: u32,
57        kind: FloatKind,
58        sign: Sign,
59        unscaled: BigInt,
60        scale: i32) -> Self {
61
62        let max_scale = (1i32 << (exp_bits - 1u32)) - 1i32;
63        let min_scale = 1 - max_scale;
64
65        Self {
66            frac_bits,
67            exp_bits,
68            kind,
69            sign: if matches!(sign, Sign::Positive) { 1 } else { -1 },
70            unscaled,
71            scale,
72            max_scale,
73            min_scale,
74        }
75    }
76
77    pub fn from_bigint(frac_bits: u32, exp_bits: u32, value: BigInt) -> Self {
78        let mut f = Self::from_parts(
79            frac_bits,
80            exp_bits,
81            FloatKind::Finite,
82            if value >= 0 { Sign::Positive } else { Sign::Negative },
83            value.abs(),
84            frac_bits as i32,
85        );
86
87        f.scale_up_to(frac_bits + 1);
88        f
89    }
90
91    pub fn zero_with(frac_bits: u32, exp_bits: u32, sign: Sign) -> Self {
92        Self::from_parts(
93            frac_bits,
94            exp_bits,
95            FloatKind::Finite,
96            sign,
97            BigInt::from(0),
98            2u32.wrapping_sub(1u32 << (exp_bits - 1)) as i32,
99        )
100    }
101
102    pub fn zero(frac_bits: u32, exp_bits: u32) -> Self {
103        Self::zero_with(frac_bits, exp_bits, Sign::Positive)
104    }
105
106    pub fn infinity(frac_bits: u32, exp_bits: u32, sign: Sign) -> Self {
107        Self::from_parts(
108            frac_bits,
109            exp_bits,
110            FloatKind::Infinite,
111            sign,
112            BigInt::from(1) << frac_bits,
113            (1u32 << (exp_bits - 1)).wrapping_sub(1) as i32,
114        )
115    }
116
117    pub fn quiet_nan(frac_bits: u32, exp_bits: u32, sign: Sign) -> Self {
118        Self::from_parts(
119            frac_bits,
120            exp_bits,
121            FloatKind::QuietNaN,
122            sign,
123            BigInt::from(0),
124            (1u32 << (exp_bits - 1)).wrapping_sub(1) as i32,
125        )
126    }
127
128    pub fn is_normal(&self) -> bool {
129        matches!(self.kind, FloatKind::Finite) &&
130            self.unscaled.significant_bits() >= self.frac_bits + 1
131    }
132
133    pub fn is_infinite(&self) -> bool {
134        matches!(self.kind, FloatKind::Infinite)
135    }
136
137    pub fn is_zero(&self) -> bool {
138        matches!(self.kind, FloatKind::Finite) && self.unscaled == 0
139    }
140
141    pub fn is_nan(&self) -> bool {
142        matches!(self.kind, FloatKind::QuietNaN | FloatKind::SignallingNaN)
143    }
144
145    fn make_zero(&mut self) {
146        self.kind = FloatKind::Finite;
147        self.unscaled = BigInt::from(0);
148        self.scale = self.min_scale;
149    }
150
151    fn make_one(&mut self) {
152        self.kind = FloatKind::Finite;
153        self.scale = 0;
154        self.unscaled = BigInt::from(1) << self.frac_bits;
155    }
156
157    fn make_quiet_nan(&mut self) {
158        self.kind = FloatKind::QuietNaN;
159    }
160
161    fn internal_round(&mut self, eps: bool) {
162        if !matches!(self.kind, FloatKind::Finite) {
163            panic!("rounding non-finite float")
164        }
165
166        if self.unscaled == 0 {
167            if eps {
168                panic!("rounding zero + epsilon, need a bit length")
169            }
170            self.make_zero();
171            return
172        }
173
174        let extra_bits =
175            (self.unscaled.significant_bits().wrapping_sub(self.frac_bits - 1) as i32).max(
176                self.min_scale.wrapping_sub(self.scale));
177
178        if extra_bits <= 0 {
179            panic!("round with no extra bits of precision")
180        }
181
182        let mid_bit = (extra_bits - 1) as u32;
183        let mid_bit_set = self.unscaled.get_bit(mid_bit);
184        let eps = eps || self.unscaled.find_one(0).map(|pos| pos < mid_bit).unwrap_or(true);
185
186        self.unscaled <<= extra_bits as u32;
187        self.scale = self.scale.wrapping_add(extra_bits);
188
189        let is_odd = self.unscaled.get_bit(0);
190
191        if mid_bit_set && (eps || is_odd) {
192            self.unscaled += 1;
193            if self.unscaled.significant_bits() > self.frac_bits + 1 {
194                assert_eq!(self.unscaled.significant_bits(),
195                           self.unscaled.find_one(0).map(|pos| pos + 1).unwrap_or(0));
196                self.unscaled >>= 1;
197                self.scale = self.scale.wrapping_add(1);
198            }
199        }
200
201        if self.scale > self.max_scale {
202            self.kind = FloatKind::Infinite;
203        }
204    }
205
206    fn leading_bit(&self) -> u32 {
207        if !matches!(self.kind, FloatKind::Finite) || self.unscaled == 0 {
208            panic!("leading bit of non-finite or zero")
209        }
210
211        self.scale.wrapping_add(
212            self.unscaled.significant_bits()
213                .wrapping_sub(self.frac_bits) as i32) as u32
214    }
215
216    fn upscale(&mut self, bits: u32) {
217        if (bits as i32) < 0 {
218            self.unscaled >>= (bits as i32).abs() as u32;
219        } else {
220            self.unscaled <<= bits;
221        }
222        self.scale = self.scale - (bits as i32);
223    }
224
225    fn scale_up_to(&mut self, bits: u32) {
226        if !matches!(self.kind, FloatKind::Finite) {
227            panic!("scaling of non-finite float")
228        }
229
230        let repr_bits = self.unscaled.significant_bits();
231        if bits > repr_bits {
232            self.upscale(bits.wrapping_sub(repr_bits));
233        }
234    }
235}
236
237impl Div<Self> for Float {
238    type Output = Self;
239
240    fn div(self, rhs: Self) -> Self::Output {
241        let mut slf = self;
242        slf.div_assign(rhs);
243        slf
244    }
245}
246
247impl Div<Self> for &'_ Float {
248    type Output = Float;
249
250    fn div(self, rhs: Self) -> Self::Output {
251        let mut slf = self.clone();
252        slf.div_assign(rhs.clone());
253        slf
254    }
255}
256
257impl DivAssign<Self> for Float {
258    fn div_assign(&mut self, rhs: Self) {
259        if self.is_nan() || rhs.is_nan() {
260            self.make_quiet_nan();
261            return
262        }
263
264        if self.is_infinite() {
265            if rhs.is_infinite() {
266                self.make_quiet_nan();
267            } else {
268                self.sign *= rhs.sign;
269            }
270            return
271        }
272
273        match rhs.kind {
274            FloatKind::QuietNaN | FloatKind::SignallingNaN => {
275                self.make_quiet_nan();
276                return
277            },
278            FloatKind::Infinite => {
279                self.make_zero();
280                self.sign *= rhs.sign;
281                return
282            },
283            FloatKind::Finite => {
284                if rhs.is_zero() {
285                    if self.is_zero() {
286                        self.make_quiet_nan();
287                    } else {
288                        self.kind = FloatKind::Infinite;
289                        self.sign *= rhs.sign;
290                    }
291                    return
292                }
293
294                let lshift = self.frac_bits
295                    .wrapping_add(2)
296                    .wrapping_add(rhs.unscaled.significant_bits())
297                    .wrapping_sub(self.unscaled.significant_bits());
298
299                self.upscale(lshift);
300
301                // q in self; r in rhs
302                let mut r = rhs.unscaled;
303                self.unscaled.div_rem_mut(&mut r);
304                self.sign *= rhs.sign;
305                self.scale = self.scale
306                    .wrapping_sub(rhs.scale)
307                    .wrapping_sub(self.frac_bits as i32);
308                self.internal_round(r != 0);
309            },
310        }
311    }
312}
313
314impl Mul<Self> for Float {
315    type Output = Self;
316
317    fn mul(self, rhs: Self) -> Self::Output {
318        let mut slf = self;
319        slf.mul_assign(rhs);
320        slf
321    }
322}
323
324impl Mul<Self> for &'_ Float {
325    type Output = Float;
326
327    fn mul(self, rhs: Self) -> Self::Output {
328        let mut slf = self.clone();
329        slf.mul_assign(rhs.clone());
330        slf
331    }
332}
333
334impl MulAssign<Self> for Float {
335    fn mul_assign(&mut self, rhs: Self) {
336        if self.is_nan() || rhs.is_nan() {
337            self.make_quiet_nan();
338            return
339        }
340
341        if (self.is_zero() && rhs.is_infinite()) || (self.is_infinite() && rhs.is_zero()) {
342            self.make_quiet_nan();
343            return
344        }
345
346        if self.is_infinite() || rhs.is_infinite() {
347            self.kind = FloatKind::Infinite;
348            self.sign *= rhs.sign;
349            return
350        }
351
352        self.sign *= rhs.sign;
353        self.unscaled *= rhs.unscaled;
354        self.scale = self.scale
355            .wrapping_add(rhs.scale)
356            .wrapping_sub(self.frac_bits as i32);
357        self.scale_up_to(self.frac_bits + 2);
358        self.internal_round(false);
359    }
360}
361
362impl Float {
363    fn add0_assign(&mut self, rhs: Self) {
364        let rhs = rhs;
365        let d = self.scale.wrapping_sub(rhs.scale);
366        if d as u32 > self.frac_bits + 1 {
367            return
368        } else if d < -(self.frac_bits as i32 + 1) {
369            *self = rhs;
370            return
371        }
372
373        let (d, mut a, b) = if d >= 0 {
374            let a = Float {
375                frac_bits: self.frac_bits,
376                exp_bits: self.exp_bits,
377                kind: self.kind,
378                sign: self.sign,
379                unscaled: take(&mut self.unscaled),
380                scale: self.scale,
381                min_scale: self.min_scale,
382                max_scale: self.max_scale,
383            };
384            let b = rhs;
385
386            (d, a, b)
387        } else {
388            let a = rhs;
389            let b = Float {
390                frac_bits: self.frac_bits,
391                exp_bits: self.exp_bits,
392                kind: self.kind,
393                sign: self.sign,
394                unscaled: take(&mut self.unscaled),
395                scale: self.scale,
396                min_scale: self.min_scale,
397                max_scale: self.max_scale,
398            };
399
400            (-d, a, b)
401        };
402
403        let residue = b.unscaled.find_one(0).map(|pos| (pos as i32) < (d - 1))
404            .unwrap_or(true);
405        self.scale = a.scale.wrapping_sub(1);
406
407        a.unscaled <<= 1;
408        a.unscaled += b.unscaled >> (d - 1) as u32;
409
410        self.unscaled = a.unscaled;
411
412        self.scale_up_to(self.frac_bits + 2);
413        self.internal_round(residue);
414    }
415
416    fn sub0_assign(&mut self, rhs: Self) {
417        let d = self.scale.wrapping_sub(rhs.scale);
418        if d as u32 > self.frac_bits + 2 {
419            return
420        } else if d < -(self.frac_bits as i32 + 2) {
421            *self = rhs;
422            return
423        }
424
425        let (d, mut a, mut b) = if d >= 0 {
426            let a = Float {
427                frac_bits: self.frac_bits,
428                exp_bits: self.exp_bits,
429                kind: self.kind,
430                sign: self.sign,
431                unscaled: take(&mut self.unscaled),
432                scale: self.scale,
433                min_scale: self.min_scale,
434                max_scale: self.max_scale,
435            };
436            let b = rhs;
437
438            (d, a, b)
439        } else {
440            let a = rhs;
441            let b = Float {
442                frac_bits: self.frac_bits,
443                exp_bits: self.exp_bits,
444                kind: self.kind,
445                sign: self.sign,
446                unscaled: take(&mut self.unscaled),
447                scale: self.scale,
448                min_scale: self.min_scale,
449                max_scale: self.max_scale,
450            };
451
452            (-d, a, b)
453        };
454
455        let residue = b.unscaled.find_one(0).map(|pos| (pos as i32) < (d - 2))
456            .unwrap_or(true);
457        self.sign = a.sign;
458        self.scale = a.scale.wrapping_sub(2);
459
460        b.unscaled >>= d - 2;
461        if residue {
462            b.unscaled += 1;
463        }
464
465        a.unscaled <<= 2;
466        a.unscaled -= b.unscaled;
467
468        if a.unscaled == 0 {
469            self.sign = 1;
470            self.unscaled = a.unscaled;
471        } else if a.unscaled < 0 {
472            self.sign *= -1;
473            self.unscaled = -a.unscaled;
474        } else {
475            self.unscaled = a.unscaled;
476        }
477
478        self.scale_up_to(self.frac_bits + 2);
479        self.internal_round(residue);
480    }
481}
482
483impl Add<Self> for Float {
484    type Output = Self;
485
486    fn add(self, rhs: Self) -> Self::Output {
487        let mut slf = self;
488        slf.add_assign(rhs);
489        slf
490    }
491}
492
493impl Add<Self> for &'_ Float {
494    type Output = Float;
495
496    fn add(self, rhs: Self) -> Self::Output {
497        let mut slf = self.clone();
498        slf.add_assign(rhs.clone());
499        slf
500    }
501}
502
503impl AddAssign<Self> for Float {
504    fn add_assign(&mut self, rhs: Self) {
505        if self.is_nan() || rhs.is_nan() {
506            self.make_quiet_nan();
507            return
508        }
509
510        if self.is_infinite() && rhs.is_infinite() {
511            if self.sign != rhs.sign {
512                self.make_quiet_nan();
513            }
514            return
515        }
516
517        if self.is_infinite() {
518            return
519        }
520
521        if rhs.is_infinite() {
522            *self = rhs;
523            return
524        }
525
526        if rhs.is_zero() {
527            if self.is_zero() {
528                self.sign = if self.sign < 0 && rhs.sign < 0 { -1 } else { 1 };
529            }
530            return
531        }
532
533        if self.is_zero() {
534            *self = rhs;
535            return
536        }
537
538        if self.sign == rhs.sign {
539            self.add0_assign(rhs);
540        } else {
541            self.sub0_assign(rhs);
542        }
543    }
544}
545
546impl Sub<Self> for Float {
547    type Output = Self;
548
549    fn sub(self, rhs: Self) -> Self::Output {
550        let mut slf = self;
551        slf.sub_assign(rhs);
552        slf
553    }
554}
555
556impl Sub<Self> for &'_ Float {
557    type Output = Float;
558
559    fn sub(self, rhs: Self) -> Self::Output {
560        let mut slf = self.clone();
561        slf.sub_assign(rhs.clone());
562        slf
563    }
564}
565
566impl SubAssign<Self> for Float {
567    fn sub_assign(&mut self, rhs: Self) {
568        let mut rhs = rhs;
569        let sign = self.sign;
570
571        rhs.sign *= -1;
572        let rsign = rhs.sign;
573
574        self.add_assign(rhs);
575
576        if self.is_zero() {
577            self.sign = if sign < 0 && rsign < 0 { -1 } else { 1 };
578        }
579    }
580}
581
582impl Float {
583    pub fn sqrt(&self) -> Self {
584        let mut slf = self.clone();
585        slf.sqrt_assign();
586        slf
587    }
588
589    pub fn sqrt_assign(&mut self) {
590        if self.is_zero() {
591            return
592        }
593
594        if self.is_nan() || self.sign == -1 {
595            self.make_quiet_nan();
596            return
597        }
598
599        if self.is_infinite() {
600            return
601        }
602
603        let sig_bits = self.frac_bits
604            .wrapping_mul(2)
605            .wrapping_add(3);
606        self.scale_up_to(sig_bits);
607
608        if self.scale.wrapping_add(self.frac_bits as i32) & 1 != 0 {
609            self.upscale(1);
610        }
611
612        let mut residue = take(&mut self.unscaled);
613        let mut result = BigInt::new();
614
615        let mut pow = residue.significant_bits();
616        pow = pow.wrapping_sub(pow & 1);
617
618        let mut bit = BigInt::from(1) << pow;
619        let mut resp = BigInt::new();
620        while bit != 0 {
621            resp.assign(&result + &bit);
622            if residue >= resp {
623                residue -= take(&mut resp);
624                let res = BigInt::from(&bit << 1);
625                result += res;
626            }
627            result >>= 1;
628            bit >>= 2;
629        }
630
631        self.unscaled = result;
632        self.scale = self.scale.wrapping_add(self.frac_bits as i32) / 2;
633        self.internal_round(residue != 0);
634    }
635
636    fn floor0_assign(&mut self) {
637        if self.scale < 0 {
638            self.make_zero();
639            return
640        }
641        let nbits = self.frac_bits.wrapping_sub(self.scale as u32);
642        let temp = take(&mut self.unscaled);
643        self.unscaled.assign((temp >> nbits) << nbits);
644    }
645
646    fn ceil0_assign(&mut self) {
647        if self.is_zero() {
648            return
649        } else if self.scale < 0 {
650            self.make_one();
651            return
652        }
653
654        let nbits = self.frac_bits.wrapping_sub(self.scale as u32);
655        let increment = self.unscaled.find_one(0).map(|pos| pos < nbits).unwrap_or(true);
656        let temp = take(&mut self.unscaled);
657        self.unscaled.assign((temp >> nbits) << nbits);
658
659        if increment {
660            self.unscaled += BigInt::from(1) << nbits;
661        }
662
663        if self.unscaled.significant_bits() > self.frac_bits + 1 {
664            self.upscale(-1i32 as u32);
665        }
666    }
667
668    pub fn floor(&self) -> Self {
669        let mut slf = self.clone();
670        slf.floor_assign();
671        slf
672    }
673
674    pub fn floor_assign(&mut self) {
675        match self.kind {
676            FloatKind::Finite | FloatKind::QuietNaN => return,
677            FloatKind::SignallingNaN => { // should we return here?
678                self.make_quiet_nan();
679                return
680            },
681            _ => (),
682        }
683
684        if self.sign >= 0 {
685            self.floor0_assign();
686        } else {
687            self.ceil0_assign();
688        }
689    }
690
691    pub fn ceil(&self) -> Self {
692        let mut slf = self.clone();
693        slf.ceil_assign();
694        slf
695    }
696
697    pub fn ceil_assign(&mut self) {
698        match self.kind {
699            FloatKind::Finite | FloatKind::QuietNaN => return,
700            FloatKind::SignallingNaN => { // should we return here?
701                self.make_quiet_nan();
702                return
703            },
704            _ => (),
705        }
706
707        if self.sign >= 0 {
708            self.ceil0_assign();
709        } else {
710            self.floor0_assign();
711        }
712    }
713
714    pub fn trunc(&self) -> Self {
715        let mut slf = self.clone();
716        slf.trunc_assign();
717        slf
718    }
719
720    pub fn trunc_assign(&mut self) {
721        self.floor0_assign();
722    }
723
724    pub fn trunc_to_bitvec(&self, bits: usize) -> BitVec {
725        let slf = self.clone();
726        slf.trunc_into_bitvec(bits)
727    }
728
729    pub fn trunc_into_bitvec(self, bits: usize) -> BitVec {
730        if self.is_nan() {
731            return BitVec::zero(bits)
732        }
733
734        if self.is_infinite() {
735            return if self.sign < 0 {
736                BitVec::min_value_with(bits, true)
737            } else {
738                BitVec::max_value_with(bits, true)
739            }
740        }
741
742        let sign = self.sign < 0;
743        let bint = self.unscaled >> self.frac_bits.wrapping_sub(self.scale as u32);
744        let bvec = BitVec::from_bigint(bint, bits);
745        if sign {
746            -bvec
747        } else {
748            bvec
749        }
750    }
751
752    pub fn neg_assign(&mut self) {
753        self.sign *= -1;
754    }
755
756    pub fn abs(&self) -> Self {
757        let mut slf = self.clone();
758        slf.abs_assign();
759        slf
760    }
761
762    pub fn abs_assign(&mut self) {
763        self.sign = 1;
764    }
765
766    pub fn round(&self) -> Self {
767        let mut slf = self.clone();
768        slf.round_assign();
769        slf
770    }
771
772    pub fn round_assign(&mut self) {
773        let half = Self::from_parts(
774            self.frac_bits,
775            self.exp_bits,
776            FloatKind::Finite,
777            Sign::Positive,
778            BigInt::from(1) << self.frac_bits,
779            -1
780        );
781        self.add_assign(half);
782        self.floor_assign();
783    }
784
785    pub fn into_bigint(self) -> BigInt {
786        let res = self.unscaled >> self.frac_bits.wrapping_sub(self.scale as u32);
787        if self.sign < 0 {
788            -res
789        } else {
790            res
791        }
792    }
793
794    pub fn to_bigint(&self) -> BigInt {
795        let res = BigInt::from(&self.unscaled >> self.frac_bits.wrapping_sub(self.scale as u32));
796        if self.sign < 0 {
797            -res
798        } else {
799            res
800        }
801    }
802}
803
804impl From<Float> for BigInt {
805    fn from(f: Float) -> Self {
806        f.into_bigint()
807    }
808}
809
810impl Neg for Float {
811    type Output = Float;
812
813    fn neg(self) -> Self::Output {
814        let mut slf = self;
815        slf.neg_assign();
816        slf
817    }
818}
819
820impl Neg for &'_ Float {
821    type Output = Float;
822
823    fn neg(self) -> Self::Output {
824        let mut slf = self.clone();
825        slf.neg_assign();
826        slf
827    }
828}
829
830impl PartialEq<Self> for Float {
831    fn eq(&self, other: &Self) -> bool {
832        if self.is_nan() {
833            return other.is_nan()
834        }
835
836        if other.is_nan() {
837            return false
838        }
839
840        if self.is_infinite() {
841            if self.sign < 0 {
842                return other.is_infinite() && other.sign < 0
843            }
844
845            return other.is_infinite() && other.sign > 0
846        }
847
848        if other.is_infinite() {
849            return false
850        }
851
852        if self.sign != other.sign {
853            return self.sign == 0
854        }
855
856        if self.scale != other.scale {
857            return self.sign == 0
858        }
859
860        self.sign == 0 || self.unscaled == other.unscaled
861    }
862}
863impl Eq for Float { }
864
865impl Ord for Float {
866    fn cmp(&self, other: &Self) -> Ordering {
867        if self.is_nan() {
868            return if other.is_nan() {
869                Ordering::Equal
870            } else {
871                Ordering::Greater
872            }
873        }
874
875        if other.is_nan() {
876            return Ordering::Less
877        }
878
879        if self.is_infinite() {
880            if self.sign < 0 {
881                return if other.is_infinite() && other.sign < 0 {
882                    Ordering::Equal
883                } else {
884                    Ordering::Less
885                }
886            }
887
888            return if other.is_infinite() && other.sign > 0 {
889                Ordering::Equal
890            } else {
891                Ordering::Greater
892            }
893        }
894
895        if other.is_infinite() {
896            return other.sign.cmp(&0).reverse()
897        }
898
899        if self.sign != other.sign {
900            return self.sign.cmp(&0)
901        }
902
903        if self.scale != other.scale {
904            let sign = if self.scale < other.scale { -self.sign } else { self.sign };
905            return sign.cmp(&0)
906        }
907
908        if self.sign == 0 {
909            Ordering::Equal
910        } else {
911            let res = self.unscaled.cmp(&other.unscaled);
912            if self.sign < 0 { res.reverse() } else { res }
913        }
914    }
915}
916
917impl PartialOrd<Self> for Float {
918    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
919        Some(self.cmp(other))
920    }
921}
922
923trait FloatFormatOpsInternal {
924    fn extract_sign(&self, val: &BigInt) -> Sign;
925    fn extract_fractional(&self, val: &BigInt) -> BigInt;
926    fn extract_exponent(&self, val: &BigInt) -> i32;
927
928    fn set_sign(&self, val: BigInt, sign: Sign) -> BigInt;
929
930    fn encode_nan(&self, sign: Sign) -> BigInt;
931    fn encode_infinity(&self, sign: Sign) -> BigInt;
932    fn encode_zero(&self, sign: Sign) -> BigInt;
933
934    fn round_to_lead_bit(&self, val: BigInt, bit: i32) -> BigInt;
935}
936
937impl FloatFormatOpsInternal for FloatFormat {
938    fn extract_sign(&self, val: &BigInt) -> Sign {
939        if val.get_bit(self.sign_pos) { Sign::Positive } else { Sign::Negative }
940    }
941
942    fn extract_fractional(&self, val: &BigInt) -> BigInt {
943        let mask = (BigInt::from(1) << self.frac_size) - 1;
944        BigInt::from(val << self.frac_pos) & mask
945    }
946
947    fn extract_exponent(&self, val: &BigInt) -> i32 {
948        let m: BigInt = BigInt::from(val >> self.exp_pos) & 0xffff_ffffu32;
949        m.to_u32().unwrap() as i32 & self.exp_max
950    }
951
952    fn set_sign(&self, val: BigInt, sign: Sign) -> BigInt {
953        if matches!(sign, Sign::Negative) {
954            let mut val = val;
955            val.set_bit(self.sign_pos, true);
956            val
957        } else {
958            val
959        }
960    }
961
962    fn encode_zero(&self, sign: Sign) -> BigInt {
963        self.set_sign(BigInt::new(), sign)
964    }
965
966    fn encode_infinity(&self, sign: Sign) -> BigInt {
967        let res = BigInt::from(self.exp_max) << self.exp_pos;
968        self.set_sign(res, sign)
969    }
970
971    fn encode_nan(&self, sign: Sign) -> BigInt {
972        let mut res = BigInt::from(1) << self.frac_pos
973            .wrapping_add(self.frac_size)
974            .wrapping_sub(1);
975        res |= BigInt::from(self.exp_max) << self.exp_pos;
976        self.set_sign(res, sign)
977    }
978
979    fn round_to_lead_bit(&self, val: BigInt, bit: i32) -> BigInt {
980        let mut val = val;
981        let amount = val
982            .significant_bits()
983            .wrapping_sub(1)
984            .wrapping_sub(bit as u32);
985        if amount == 0 {
986            val
987        } else if (amount as i32) < 0 {
988            val << (amount as i32).abs() as u32
989        } else {
990            let mid = amount.wrapping_sub(1);
991            let mid_set = val.get_bit(mid);
992            let eps = val.find_one(0).map(|pos| pos < mid).unwrap_or(true);
993
994            val >>= amount;
995
996            let odd = val.get_bit(0);
997            if mid_set && (eps || odd) {
998                val + 1
999            } else {
1000                val
1001            }
1002        }
1003    }
1004}
1005
1006pub trait FloatFormatOps {
1007    fn into_bitvec(&self, fp: Float, bits: usize) -> BitVec;
1008    fn from_bitvec(&self, bv: &BitVec) -> Float;
1009}
1010
1011pub const fn float_format_from_size(bytes: usize) -> Result<FloatFormat, Error> {
1012    Ok(match bytes {
1013        2 => FloatFormat {
1014            size: 2,
1015            sign_pos: 15,
1016            exp_pos: 10,
1017            exp_size: 5,
1018            exp_max: (1 << 5) - 1,
1019            frac_pos: 0,
1020            frac_size: 10,
1021            bias: 15,
1022            j_bit_implied: true,
1023        },
1024        4 => FloatFormat {
1025            size: 4,
1026            sign_pos: 31,
1027            exp_pos: 23,
1028            exp_size: 8,
1029            exp_max: (1 << 8) - 1,
1030            frac_pos: 0,
1031            frac_size: 23,
1032            bias: 127,
1033            j_bit_implied: true,
1034        },
1035        8 => FloatFormat {
1036            size: 8,
1037            sign_pos: 63,
1038            exp_pos: 52,
1039            exp_size: 11,
1040            exp_max: (1 << 11) - 1,
1041            frac_pos: 0,
1042            frac_size: 52,
1043            bias: 1023,
1044            j_bit_implied: true,
1045        },
1046        10 => FloatFormat {
1047            size: 10,
1048            sign_pos: 79,
1049            exp_pos: 64,
1050            exp_size: 15,
1051            exp_max: (1 << 15) - 1,
1052            frac_pos: 0,
1053            frac_size: 64,
1054            bias: 16383,
1055            j_bit_implied: true,
1056        },
1057        12 => FloatFormat {
1058            size: 12,
1059            sign_pos: 95,
1060            exp_pos: 80,
1061            exp_size: 15,
1062            exp_max: (1 << 15) - 1,
1063            frac_pos: 16,
1064            frac_size: 64,
1065            bias: 16383,
1066            j_bit_implied: true,
1067        },
1068        16 => FloatFormat {
1069            size: 16,
1070            sign_pos: 127,
1071            exp_pos: 112,
1072            exp_size: 15,
1073            exp_max: (1 << 15) - 1,
1074            frac_pos: 0,
1075            frac_size: 112,
1076            bias: 16383,
1077            j_bit_implied: true,
1078        },
1079        _ => return Err(Error::UnsupportedFloatFormat(bytes)),
1080    })
1081}
1082
1083impl FloatFormatOps for FloatFormat {
1084    fn into_bitvec(&self, fp: Float, bits: usize) -> BitVec {
1085        let mut res = match fp.kind {
1086            FloatKind::QuietNaN | FloatKind::SignallingNaN => {
1087                self.encode_nan(Sign::Positive)
1088            },
1089            FloatKind::Infinite => {
1090                let sign = if fp.sign < 0 { Sign::Negative } else { Sign::Positive };
1091                self.encode_infinity(sign)
1092            },
1093            FloatKind::Finite => if fp.is_zero() {
1094                let sign = if fp.sign < 0 { Sign::Negative } else { Sign::Positive };
1095                self.encode_zero(sign)
1096            } else if self.j_bit_implied {
1097                let lead_bit = fp.leading_bit();
1098                let (exp, mut frac) = {
1099                    let tmp = fp.scale
1100                        .wrapping_sub(fp.frac_bits as i32)
1101                        .wrapping_add(lead_bit as i32);
1102                    if tmp >= 1i32.wrapping_sub(self.bias) {
1103                        let mut exp = tmp.wrapping_add(self.bias);
1104                        let mut frac = self.round_to_lead_bit(fp.unscaled, self.frac_size as i32);
1105                        if frac.significant_bits().wrapping_sub(1) > self.frac_size {
1106                            frac >>= 1;
1107                            exp += 1;
1108                        }
1109                        frac.set_bit(self.frac_size, false);
1110                        (exp, frac)
1111                    } else {
1112                        let exp = 0;
1113                        let n = tmp
1114                            .wrapping_sub(1)
1115                            .wrapping_add(self.bias as i32)
1116                            .wrapping_add(self.frac_size as i32);
1117                        if n < 0 {
1118                            let sign = if fp.sign < 0 {
1119                                Sign::Negative
1120                            } else {
1121                                Sign::Positive
1122                            };
1123
1124                            let mut res = self.encode_zero(sign);
1125                            let sign = res < 0;
1126                            res.abs_mut();
1127
1128                            let bv = BitVec::from_bigint(res, bits);
1129                            return if sign {
1130                                -bv
1131                            } else {
1132                                bv
1133                            }
1134                        }
1135                        let frac = self.round_to_lead_bit(fp.unscaled, n);
1136                        (exp, frac)
1137                    }
1138                };
1139                if exp >= self.exp_max {
1140                    let sign = if fp.sign < 0 { Sign::Negative } else { Sign::Positive };
1141                    let mut res = self.encode_infinity(sign);
1142                    let sign = res < 0;
1143                    res.abs_mut();
1144
1145                    let bv = BitVec::from_bigint(res, bits);
1146                    return if sign {
1147                        -bv
1148                    } else {
1149                        bv
1150                    }
1151                }
1152
1153                frac |= BigInt::from(exp) << self.exp_pos;
1154                if fp.sign < 0 {
1155                    frac.set_bit(self.sign_pos, true);
1156                }
1157                frac
1158            } else {
1159                panic!("unexpected j_bit_implied == false")
1160            },
1161        };
1162
1163        let sign = res < 0;
1164        res.abs_mut();
1165
1166        let bv = BitVec::from_bigint(res, bits);
1167        if sign {
1168            -bv
1169        } else {
1170            bv
1171        }
1172    }
1173
1174    fn from_bitvec(&self, bv: &BitVec) -> Float {
1175        let sign = self.extract_sign(&*bv.as_bigint());
1176        let exp = self.extract_exponent(&*bv.as_bigint());
1177        let mut frac = self.extract_fractional(&*bv.as_bigint());
1178
1179        if exp == 0 {
1180            return if frac == 0 {
1181                Float::zero_with(self.frac_size, self.exp_size, sign)
1182            } else {
1183                Float::from_parts(
1184                    self.frac_size,
1185                    self.exp_size,
1186                    FloatKind::Finite,
1187                    sign,
1188                    frac,
1189                    1i32.wrapping_sub(self.bias)
1190                )
1191            }
1192        } else if exp == self.exp_max {
1193            return if frac == 0 {
1194                Float::from_parts(
1195                    self.frac_size,
1196                    self.exp_size,
1197                    FloatKind::Infinite,
1198                    sign,
1199                    BigInt::new(),
1200                    self.exp_max,
1201                )
1202            } else {
1203                Float::from_parts(
1204                    self.frac_size,
1205                    self.exp_size,
1206                    FloatKind::QuietNaN,
1207                    sign,
1208                    BigInt::new(),
1209                    self.exp_max,
1210                )
1211            }
1212        }
1213
1214        if self.j_bit_implied {
1215            frac.set_bit(self.frac_size, true);
1216        }
1217
1218        Float::from_parts(
1219            self.frac_size,
1220            self.exp_size,
1221            FloatKind::Finite,
1222            sign,
1223            frac,
1224            exp.wrapping_sub(self.bias)
1225        )
1226    }
1227}