fast_posit/posit/mod.rs
1//! This module and its submodules contain a software implementation of a standard-compliant Posit
2//! floating point type, with arbitrary width and exponent width (up to 128).
3//!
4//! This module is **EXTENSIVELY** documented! If you want to learn more about Posits and an
5//! optimised software implementation thereof (or about floating point implementations in
6//! general!), you might profit from reading carefully through the code :) We assume basic
7//! familiarity with both the Posit format and with two's complement integer arithmetic;
8//! everything else we try to explain.
9//!
10//! If you know nothing about Posits and want to learn more, a good place to start is
11//! <https://posithub.org/docs/Posits4.pdf>. The most up to date standard is at
12//! <https://posithub.org/docs/posit_standard-2.pdf>.
13//!
14//! Some notation used in the comments:
15//!
16//! - **Leftmost bits/msb**: most-significant bits.
17//! - **Rightmost bits/lsb**: least-significant bits.
18//! - **Bit 0, bit 1, .. bit N-1**: numbered least significant to most significant.
19//! - [a; b[ for a range that is closed (inclusive) on `a` and open (exclusive) on `b`.
20//!
21//! Suggested reading order: [Posit] and [Decoded] types, [basics] and [constants](consts),
22//! [decode] implementation (Posit→Decoded), [encode] implementation (Decoded→Posit),
23//! [elementary arithmetic](ops).
24
25/// A *posit* floating point number with `N` bits and `ES` exponent bits, using `Int` as its
26/// underlying type.
27///
28/// # Examples:
29///
30/// ```
31/// # use fast_posit::Posit;
32/// type Foo = Posit::<32, 2, i32>; // A 32-bit posit with 2-bit exponent field, represented in a
33/// // 32-bit machine type
34/// type Bar = Posit::<6, 1, i8>; // A 6-bit posit with 1-bit exponent field, represented in an
35/// // 8-bit machine type.
36/// ```
37///
38/// Type aliases are provided at the crate root for the posit types defined in
39/// [the standard](https://posithub.org/docs/posit_standard-2.pdf#subsection.3.1).
40#[derive(Clone, Copy)]
41#[derive(Eq, PartialEq, Ord, PartialOrd, Hash)] // Eq and Ord are the same as for two's complement int
42#[derive(Default)]
43pub struct Posit<
44 const N: u32,
45 const ES: u32,
46 Int: crate::Int,
47> (Int);
48
49/// In order to perform most nontrivial operations, a `Posit<N, ES, Int>` needs to be *decoded*
50/// into the form `f × 2^e` (with rational fraction `f` and integer exponent `e`), a form that is
51/// amenable for further manipulation.
52///
53/// This is represented as a `Decoded<N, ES, Int>`, a struct that contains two integer fields,
54/// `frac` and `exp`, such that it represents the value
55///
56/// ```md
57/// `frac` / `FRAC_DENOM` × 2 ^ `exp`
58/// ```
59///
60/// where `FRAC_DENOM` is a fixed power of two, `2 ^ (B-2)`, where `B` = `Int::BITS`.
61///
62/// That is to say: this encodes the `f × 2^e` referred above using two integers: the integer `exp`
63/// is the integer `e`, and the integer `frac` is the rational `f` *with an implicit denominator*
64/// of `1 << (B-2)`.
65///
66/// Another way to think of it is that `frac` is a fixed-point rational number, where the dot is
67/// two places from the left. For instance (for an 8-bit `frac`):
68///
69/// - 0b01_000000 = +1.00
70/// - 0b01_100000 = +1.50
71/// - 0b10_000000 = -2.00
72/// - 0b11_100000 = -0.50
73///
74/// and so on. See the docstrings for [both](Decoded::frac) [fields](Decoded::exp) for more detail
75/// about their values.
76///
77/// Extracting these fields from a posit, and converting back to a posit with correct rounding, can
78/// be done **very** efficiently, and indeed those two algorithms lie at the heart of many
79/// operations.
80#[derive(Clone, Copy)]
81#[derive(Eq, PartialEq, Hash)]
82pub struct Decoded<
83 const N: u32,
84 const ES: u32,
85 Int: crate::Int,
86> {
87 /// The `frac`tion is the `frac / FRAC_DENOM` part of the posit value. Since the constant
88 /// `FRAC_DENOM` = `1 << (Int::BITS - 2)` is fixed, one can simply look at the values of `frac`
89 /// as fixed-point numbers where the dot is two places from the left.
90 ///
91 /// Examples (8-bit posit):
92 ///
93 /// - `0b01_000000` = +1.0
94 /// - `0b01_100000` = +1.5
95 /// - `0b01_110000` = +1.75
96 /// - `0b01_010000` = +1.25
97 /// - `0b01_111111` = +1.984375
98 ///
99 /// and negative numbers
100 ///
101 /// - `0b10_000000` = -2.0
102 /// - `0b10_100000` = -1.5
103 /// - `0b10_110000` = -1.25
104 /// - `0b10_010000` = -1.75
105 /// - `0b10_000001` = -1.015625
106 /// - `0b10_111111` = -1.984375
107 ///
108 /// # Valid ranges
109 ///
110 /// Now, the result of [Posit::decode_regular] always has a `frac` lying within the following
111 /// ranges:
112 ///
113 /// - [+1.0, +2.0[ for positive numbers
114 /// - [-2.0, -1.0[ for negative numbers
115 ///
116 /// Note that these are not symmetric! In particular, a positive `frac` may be +1.0
117 /// (`0b01_000…`) but not +2.0, and a negative `frac` may be -2.0 (`0b10_000…`) but not -1.0.
118 /// This is an artefact of how the posit format works, and it enables very efficient
119 /// implementations at key points in many algorithms.
120 ///
121 /// In terms of bit patterns, this corresponds to requiring that the `frac` starts with either
122 /// `0b01` (positive) or `0b10` (negative), and never with `0b00` or `0b11`.
123 ///
124 /// Likewise, for the input to [Decoded::encode_regular] we **also** require that `frac` **must**
125 /// always be in such a valid range. Whenever this is not the case, we say that the `frac`
126 /// is "*underflowing*".
127 ///
128 /// Often, when we feed a [Decoded] to [Decoded::encode_regular], such as when implementing
129 /// arithmetic operations, we will need to adjust the `frac` so that it is in the correct range
130 /// (and possibly compensate by adjusting the `exp`onent in the opposite direction).
131 pub frac: Int,
132 /// The `exp`onent is the `2 ^ exp` part. of the posit value.
133 ///
134 /// The `exp` field is made up from both the "regime" and "exponent" fields of a posit: the
135 /// lowest `ES` bits are the exponent field exactly, while the highest come from the regime's
136 /// length and sign. The structure is apparent when looking at the binary `exp`.
137 ///
138 /// Examples (8-bit posit, 2-bit exponent):
139 ///
140 /// - `0b00001_01` (exp = +5, regime = +1, exponent = +1)
141 /// - `0b11110_11` (exp = -5, regime = -2, exponent = +3)
142 ///
143 /// # Valid ranges
144 ///
145 /// For reasons that become apparent when implementing [Self::encode_regular], we will also
146 /// require that `exp` lies in a certain range, namely between `Int::MIN / 2` and `Int::MAX /
147 /// 2`, inclusive.
148 ///
149 /// In terms of bit patterns, this corresponds to requiring that `exp` starts with `0b00`
150 /// (positive) or `0b11` (negative), and never with `0b01` or `0b10`.
151 ///
152 /// This is not a concern unless `ES` takes absurdly big values, in which case compile-time
153 /// checks will trigger an error.
154 pub exp: Int,
155}
156
157/// Some basic constants and functions, such as a check that `N` and `ES` make sense for `Int`, or
158/// functions to convert to/from raw bits.
159mod basics;
160
161/// Numeric constants: zero, NaR, min, min_positive, etc.
162mod consts;
163
164/// [`Posit`] → [`Decoded`].
165mod decode;
166
167/// [`Decoded`] → [`Posit`], including correct rounding.
168mod encode;
169
170/// Small fns of one posit argument: neg, prior, next, is_positive, etc.
171mod unary;
172
173/// The four basic arithmetic operations: +, -, ×, ÷.
174mod ops;
175
176/// Quire (the fixed-point accumulator for error-free sums and dot products)
177pub mod quire;
178
179/// Conversions to and from integers, to and from floats, and between different posit types.
180///
181/// Two sorts of conversions are implemented:
182/// - The conversions prescribed in the posit standard (using `round_from`).
183/// - The "Rusty" conversions (using `from` for unfallible conversions and `try_from` for
184/// fallible ones).
185///
186/// The [convert::RoundFrom] and [convert::RoundInto] traits are defined here.
187pub mod convert;
188
189/// Traits for [`core::fmt::Display`] and [`core::fmt::Debug`].
190mod fmt;
191
192/// Conversions to and from an arbitrary-precision [malachite::rational::Rational], for testing
193/// purposes. This enables us to verify our algorithms by checking that the exact rationals match.
194/// For example:
195///
196/// - rational(p1 + p2) = rational(p1) + rational(p2)
197/// - rational(p1::ONE) = rational(1)
198/// - rational(p1) = rational(decoded(p1))
199#[cfg(test)]
200mod rational;
201
202/// Miscellaneous helpers for testing.
203#[cfg(test)]
204mod test;