fast_posit/posit/
mod.rs

1//! This module and its submodules contain a software implementation of a standard-compliant Posit
2//! floating point type, with arbitrary width and exponent width (up to 128).
3//!
4//! This module is **EXTENSIVELY** documented! If you want to learn more about Posits and an
5//! optimised software implementation thereof (or about floating point implementations in
6//! general!), you might profit from reading carefully through the code :) We assume basic
7//! familiarity with both the Posit format and with two's complement integer arithmetic;
8//! everything else we try to explain.
9//!
10//! If you know nothing about Posits and want to learn more, a good place to start is
11//! <https://posithub.org/docs/Posits4.pdf>. The most up to date standard is at
12//! <https://posithub.org/docs/posit_standard-2.pdf>.
13//!
14//! Some notation used in the comments:
15//!
16//!   - **Leftmost bits/msb**: most-significant bits.
17//!   - **Rightmost bits/lsb**: least-significant bits.
18//!   - **Bit 0, bit 1, .. bit N-1**: numbered least significant to most significant.
19//!   - [a; b[ for a range that is closed (inclusive) on `a` and open (exclusive) on `b`.
20//!
21//! Suggested reading order: [Posit] and [Decoded] types, [basics] and [constants](consts),
22//! [decode] implementation (Posit→Decoded), [encode] implementation (Decoded→Posit),
23//! [elementary arithmetic](ops).
24
25/// A *posit* floating point number with `N` bits and `ES` exponent bits, using `Int` as its
26/// underlying type.
27///
28/// # Examples:
29///
30/// ```
31/// # use fast_posit::Posit;
32/// type Foo = Posit::<32, 2, i32>;  // A 32-bit posit with 2-bit exponent field, represented in a
33///                                  // 32-bit machine type
34/// type Bar = Posit::<6, 1, i8>;  // A 6-bit posit with 1-bit exponent field, represented in an
35///                                // 8-bit machine type.
36/// ```
37///
38/// Type aliases are provided at the crate root for the posit types defined in
39/// [the standard](https://posithub.org/docs/posit_standard-2.pdf#subsection.3.1).
40#[derive(Clone, Copy)]
41#[derive(Eq, PartialEq, Ord, PartialOrd, Hash)]  // Eq and Ord are the same as for two's complement int
42#[derive(Default)]
43pub struct Posit<
44  const N: u32,
45  const ES: u32,
46  Int: crate::Int,
47> (Int);
48
49/// In order to perform most nontrivial operations, a `Posit<N, ES, Int>` needs to be *decoded*
50/// into the form `f × 2^e` (with rational fraction `f` and integer exponent `e`), a form that is
51/// amenable for further manipulation.
52///
53/// This is represented as a `Decoded<N, ES, Int>`, a struct that contains two integer fields,
54/// `frac` and `exp`, such that it represents the value
55///
56/// ```md
57/// `frac` / `FRAC_DENOM` × 2 ^ `exp`
58/// ```
59///
60/// where `FRAC_DENOM` is a fixed power of two, `2 ^ (B-2)`, where `B` = `Int::BITS`.
61///
62/// That is to say: this encodes the `f × 2^e` referred above using two integers: the integer `exp`
63/// is the integer `e`, and the integer `frac` is the rational `f` *with an implicit denominator*
64/// of `1 << (B-2)`.
65///
66/// Another way to think of it is that `frac` is a fixed-point rational number, where the dot is
67/// two places from the left. For instance (for an 8-bit `frac`):
68///
69///   - 0b01_000000 = +1.00
70///   - 0b01_100000 = +1.50
71///   - 0b10_000000 = -2.00
72///   - 0b11_100000 = -0.50
73///
74/// and so on. See the docstrings for [both](Decoded::frac) [fields](Decoded::exp) for more detail
75/// about their values.
76///
77/// Extracting these fields from a posit, and converting back to a posit with correct rounding, can
78/// be done **very** efficiently, and indeed those two algorithms lie at the heart of many
79/// operations.
80#[derive(Clone, Copy)]
81#[derive(Eq, PartialEq, Hash)]
82pub struct Decoded<
83  const N: u32,
84  const ES: u32,
85  Int: crate::Int,
86> {
87  /// The `frac`tion is the `frac / FRAC_DENOM` part of the posit value. Since the constant
88  /// `FRAC_DENOM` = `1 << (Int::BITS - 2)` is fixed, one can simply look at the values of `frac`
89  /// as fixed-point numbers where the dot is two places from the left.
90  ///
91  /// Examples (8-bit posit):
92  ///
93  ///   - `0b01_000000`  = +1.0
94  ///   - `0b01_100000`  = +1.5
95  ///   - `0b01_110000`  = +1.75
96  ///   - `0b01_010000`  = +1.25
97  ///   - `0b01_111111`  = +1.984375
98  ///
99  /// and negative numbers
100  ///
101  ///   - `0b10_000000`  = -2.0
102  ///   - `0b10_100000`  = -1.5
103  ///   - `0b10_110000`  = -1.25
104  ///   - `0b10_010000`  = -1.75
105  ///   - `0b10_000001`  = -1.015625
106  ///   - `0b10_111111`  = -1.984375
107  ///
108  /// # Valid ranges
109  ///
110  /// Now, the result of [Posit::decode_regular] always has a `frac` lying within the following
111  /// ranges:
112  ///
113  ///   - [+1.0, +2.0[ for positive numbers
114  ///   - [-2.0, -1.0[ for negative numbers
115  ///
116  /// Note that these are not symmetric! In particular, a positive `frac` may be +1.0
117  /// (`0b01_000…`) but not +2.0, and a negative `frac` may be -2.0 (`0b10_000…`) but not -1.0.
118  /// This is an artefact of how the posit format works, and it enables very efficient
119  /// implementations at key points in many algorithms.
120  ///
121  /// In terms of bit patterns, this corresponds to requiring that the `frac` starts with either
122  /// `0b01` (positive) or `0b10` (negative), and never with `0b00` or `0b11`.
123  ///
124  /// Likewise, for the input to [Decoded::encode_regular] we **also** require that `frac` **must**
125  /// always be in such a valid range. Whenever this is not the case, we say that the `frac`
126  /// is "*underflowing*".
127  ///
128  /// Often, when we feed a [Decoded] to [Decoded::encode_regular], such as when implementing
129  /// arithmetic operations, we will need to adjust the `frac` so that it is in the correct range
130  /// (and possibly compensate by adjusting the `exp`onent in the opposite direction).
131  pub frac: Int,
132  /// The `exp`onent is the `2 ^ exp` part. of the posit value.
133  ///
134  /// The `exp` field is made up from both the "regime" and "exponent" fields of a posit: the
135  /// lowest `ES` bits are the exponent field exactly, while the highest come from the regime's
136  /// length and sign. The structure is apparent when looking at the binary `exp`.
137  ///
138  /// Examples (8-bit posit, 2-bit exponent):
139  ///
140  ///   - `0b00001_01` (exp = +5, regime = +1, exponent = +1)
141  ///   - `0b11110_11` (exp = -5, regime = -2, exponent = +3)
142  ///
143  /// # Valid ranges
144  ///
145  /// For reasons that become apparent when implementing [Self::encode_regular], we will also
146  /// require that `exp` lies in a certain range, namely between `Int::MIN / 2` and `Int::MAX /
147  /// 2`, inclusive.
148  ///
149  /// In terms of bit patterns, this corresponds to requiring that `exp` starts with `0b00`
150  /// (positive) or `0b11` (negative), and never with `0b01` or `0b10`.
151  ///
152  /// This is not a concern unless `ES` takes absurdly big values, in which case compile-time
153  /// checks will trigger an error.
154  pub exp: Int,
155}
156
157/// Some basic constants and functions, such as a check that `N` and `ES` make sense for `Int`, or
158/// functions to convert to/from raw bits.
159mod basics;
160
161/// Numeric constants: zero, NaR, min, min_positive, etc.
162mod consts;
163
164/// [`Posit`] → [`Decoded`].
165mod decode;
166
167/// [`Decoded`] → [`Posit`], including correct rounding.
168mod encode;
169
170/// Small fns of one posit argument: neg, prior, next, is_positive, etc.
171mod unary;
172
173/// The four basic arithmetic operations: +, -, ×, ÷.
174mod ops;
175
176/// Quire (the fixed-point accumulator for error-free sums and dot products)
177pub mod quire;
178
179/// Conversions to and from integers, to and from floats, and between different posit types.
180///
181/// Two sorts of conversions are implemented:
182///   - The conversions prescribed in the posit standard (using `round_from`).
183///   - The "Rusty" conversions (using `from` for unfallible conversions and `try_from` for
184///     fallible ones).
185///
186/// The [convert::RoundFrom] and [convert::RoundInto] traits are defined here.
187pub mod convert;
188
189/// Traits for [`core::fmt::Display`] and [`core::fmt::Debug`].
190mod fmt;
191
192/// Conversions to and from an arbitrary-precision [malachite::rational::Rational], for testing
193/// purposes. This enables us to verify our algorithms by checking that the exact rationals match.
194/// For example:
195///
196///   - rational(p1 + p2) = rational(p1) + rational(p2)
197///   - rational(p1::ONE) = rational(1)
198///   - rational(p1) = rational(decoded(p1))
199#[cfg(test)]
200mod rational;
201
202/// Miscellaneous helpers for testing.
203#[cfg(test)]
204mod test;
fast_posit/posit/mod.rs

fast_posit/posit/
mod.rs