fixed2float/
lib.rs

1//!
2//! Fixed point to float an vice versa conversion utility.
3//! Use the Q (Qm.n) and the VisSim (Fxm.b) [notations](https://en.wikipedia.org/wiki/Fixed-point_arithmetic#Notations).
4//!
5//!
6mod fixed_point;
7pub use fixed_point::FixedPoint;
8pub use fixed_point::{to_Fx, to_Q};
9pub use fixed_point::{Fx, Q};
10
11pub type UInt = u128;
12
13const SIZE: u64 = 64;
14const MANT_SIZE: u64 = 52;
15const ES: u64 = 11;
16const EXP_BIAS: u64 = (1 << (ES - 1)) - 1;
17
18/// Create bit mask
19/// ```rust
20/// use crate::fixed2float::{Fx, FixedPoint};
21/// assert_eq!(Fx::new(0b11111111111111111111110010100000010001001011000100100110000000000000000000000000000000000000000000000000000000000000000000000000, 31, 128, true).eval(), -863.7316719293594);
22/// ```
23fn mask(size: u32) -> u128 {
24  // https://users.rust-lang.org/t/how-to-make-an-integer-with-n-bits-set-without-overflow/63078/3
25  u128::MAX >> (128 - size)
26
27  // This will not work for e.g.:
28  // ```rust
29  // let a = Fx::new(0b11111111111111111111110010100000010001001011000100100110000000000000000000000000000000000000000000000000000000000000000000000000, 31, 128, true);
30  // println!("{:?}", a.eval());
31  // ```
32  // match 1_u128.wrapping_shl(size) {
33  //   Some(v) => v - 1,
34  //   None => 0,
35  // }
36}
37
38fn sign(bits: u64) -> u64 {
39  bits >> (SIZE - 1)
40}
41
42fn exp(bits: u64) -> u64 {
43  ((bits & ((1 << (SIZE - 1)) - 1)) >> MANT_SIZE) & ((1 << MANT_SIZE) - 1)
44}
45
46fn mant(bits: u64) -> u64 {
47  bits & ((1 << MANT_SIZE) - 1)
48}
49
50/// Convert `x` (f64) into fixed point format (Qm.n), if possible.
51/// ```rust
52/// use fixed2float::{to_Q, Q};
53/// use fixed2float::to_fixed;
54/// assert_eq!(
55///     to_Q(1.5, 1, 3, true),
56///     Ok(
57///         Q {
58///             val: 0b1100,
59///             m: 1,
60///             n: 3,
61///             is_exact: true,
62///         }
63///     )
64/// );
65/// assert_eq!(
66///     to_Q(-2.5, 3, 3, true),
67///     Ok(
68///         Q {
69///             val: 0b1101100,
70///             m: 3,
71///             n: 3,
72///             is_exact: true,
73///         }
74///     )
75/// );
76/// assert_eq!(to_Q(1.5, 1, 3, true).unwrap().val, 0b1100);
77/// assert_eq!(to_Q(0.0, 1, 5, true).unwrap().val, 0);
78/// assert_eq!(to_Q(1.5, 1, 3, true).unwrap().is_exact, true);
79///
80/// assert_eq!(to_fixed(-2.5, 3, 3, false).unwrap().val, 108);
81/// ```
82pub fn to_fixed(x: f64, m: i32, n: i32, round: bool) -> Result<Q, String> {
83  let f64_bits = x.to_bits();
84
85  let sign = sign(f64_bits);
86
87  if x.abs() == 0.0 {
88    return Ok(Q {
89      val: (sign << (m + n)) as UInt,
90      m,
91      n,
92      is_exact: true,
93    });
94  }
95
96  let exp = exp(f64_bits) as i32 - EXP_BIAS as i32;
97
98  let mant_plus_one = (1 << MANT_SIZE) | mant(f64_bits); // Q1.MANT_SIZE
99
100  let bits = mant_plus_one; // bits is mant_plus_one. the only thing that changes
101                            // is where _you_ interpret the point to be, which depends on `exp` at this point.
102                            // now all you have to do is slice out the fractional and non-fractional parts individually.
103
104  let fractional_part = bits as UInt & mask((MANT_SIZE as i32 - exp) as u32) as UInt;
105  let integer_part = bits
106    .checked_shr((MANT_SIZE as i32 - exp) as u32)
107    .unwrap_or(0);
108
109  // now, depending on `m` and `n` you need to figure out whether rouding occurs.
110  // if that's the case, that information is reported back to the user via the `is_exact` flag.
111  // whereas if the integer part does not fit into `m` bits you return the Err variant instead.
112
113  let integer_part_on_m_bits = integer_part as UInt & mask(m as u32) as UInt;
114
115  let mut fractional_part_on_n_bits = match (MANT_SIZE as i32 - exp - n) >= 0 {
116    true => (fractional_part >> (MANT_SIZE as i32 - exp - n) as u32) & (mask(n as u32) as UInt),
117    _ => (fractional_part << (-(MANT_SIZE as i32 - exp - n))) & (mask(n as u32) as UInt),
118  };
119
120  if integer_part_on_m_bits < integer_part as UInt {
121    return Err(format!(
122      "Error: Integer field does not fit into `m` = {} bits.",
123      m
124    ));
125  }
126
127  let _len = (MANT_SIZE as i32 - exp) - (n + 1);
128  let round_bit = match _len >= 0 {
129    true => fractional_part >> (_len) & 1 != 0,
130    _ => fractional_part.checked_shl(-_len as u32).unwrap_or(0) != 0,
131  };
132
133  if round && round_bit {
134    fractional_part_on_n_bits += 1;
135  }
136
137  let sticky_bit = match (MANT_SIZE as i32 - exp - n) >= 0 {
138    true => fractional_part & mask((MANT_SIZE as i32 - exp - n) as u32) as UInt != 0,
139    _ => false,
140  };
141
142  let is_exact = !sticky_bit && !round_bit;
143  let ans_signless = ((integer_part_on_m_bits) << n) + fractional_part_on_n_bits;
144
145  let ans = match sign == 0 {
146    true => ans_signless,
147    false => (!ans_signless + 1 as UInt) & mask((m + n + 1) as u32),
148  };
149
150  Ok(Q {
151    val: ans,
152    m,
153    n,
154    is_exact,
155  })
156}
157
158#[deprecated(since = "0.4.0")]
159/// Compute the real value represented by `bits` (str).
160/// ```rust
161/// use fixed2float::to_float_str;
162/// assert_eq!(to_float_str("00010011000000100001", 12, 20), Ok(304.12890625));
163/// ```
164pub fn to_float_str(bits: &str, m: i32, b: i32) -> Result<f64, String> {
165  let n = b - m;
166  let bits_size = bits.len() as i32;
167  if bits_size != m + n {
168    return Err(format!(
169      "`bits` size  does not match the `m` + `n` size you specified. {} != {}",
170      bits_size,
171      m + n
172    ));
173  }
174
175  let mut ans = 0.0;
176
177  for i in (1..=n).rev() {
178    let bit = bits
179      .chars()
180      .nth(((m - 1 + i) as u16).into())
181      .unwrap()
182      .to_digit(2)
183      .unwrap(); //. parse::<i32>().unwrap();
184    ans += bit as f64 / (1 << i) as f64;
185  }
186
187  for i in 0..m {
188    let bit = bits
189      .chars()
190      .nth(((m - 1 - i) as u16).into())
191      .unwrap()
192      .to_digit(2)
193      .unwrap();
194    ans += bit as f64 * (1 << i) as f64;
195  }
196
197  Ok(ans)
198}
199
200/// Compute the real value represented by `bits`.
201/// ```rust
202/// use fixed2float::to_float;
203/// assert_eq!(to_float(0b0_000100110000_00100001, 21, 12, 8), Ok(304.12890625));
204/// ```
205pub fn to_float(bits: UInt, size: i32, m: i32, n: i32) -> Result<f64, String> {
206  if size != m + n + 1 {
207    return Err(format!(
208      "`bits` size  does not match the (`m` + `n` + 1) size you specified. {} != {}",
209      size,
210      m + n + 1
211    ));
212  }
213
214  let sign = (bits >> (m + n)) as u32;
215
216  if sign == 1 && (bits & mask(size as u32 - 1) == 0) {
217    return Ok(-((1 << m) as f64));
218  }
219
220  let mut bits = match sign == 0 {
221    true => bits,
222    false => (!bits + 1) & mask(size as u32),
223  };
224
225  let mut ans = 0.0;
226
227  for i in (1..=n).rev() {
228    ans += match 2_i128.checked_pow(i as u32) {
229      None => 0.0,
230      Some(v) => (bits & 1) as f64 / v as f64,
231    };
232
233    //2_i128.pow (i as u32) as f64; //  (1 << i) as f64;
234    bits >>= 1;
235  }
236  for i in 0..m {
237    ans += (bits & 1) as f64 * 2_i128.pow(i as u32) as f64; // (1 << i) as f64;
238    bits >>= 1;
239  }
240
241  let ans = match sign == 0 {
242    true => ans,
243    false => -ans,
244  };
245
246  Ok(ans)
247}
248
249#[cfg(test)]
250mod tests {
251  use super::{to_fixed, to_float, to_float_str};
252
253  #[test]
254  fn test_to_float() {
255    assert_eq!(to_float(0b01010000010110000, 17, 1, 15), Ok(1.25537109375));
256    assert_eq!(to_float(0b01010000010110000, 17, 1, 14).is_err(), true);
257    assert_eq!(to_float(0b01010000010110000, 17, 1, 15).is_err(), false);
258    assert_eq!(to_float(0b01010000010110000, 17, 1, 16).is_err(), true);
259    // assert_eq!(to_float_str("1010000010110000", 1, 16), Ok(1.25537109375));
260    // assert_eq!(to_float_str("1010000010110000", 1, 15).is_err(), true);
261    // assert_eq!(to_float_str("1010000010110000", 1, 16).is_err(), false);
262    // assert_eq!(to_float_str("1010000010110000", 1, 17).is_err(), true);
263
264    // Fx<3,4>
265    assert_eq!(to_float(0b0_000, 4, 3, 0), Ok(0.0));
266    assert_eq!(to_float(0b0_001, 4, 3, 0), Ok(1.0));
267    assert_eq!(to_float(0b0_010, 4, 3, 0), Ok(2.0));
268    assert_eq!(to_float(0b0_011, 4, 3, 0), Ok(3.0));
269    assert_eq!(to_float(0b0_100, 4, 3, 0), Ok(4.0));
270    assert_eq!(to_float(0b0_101, 4, 3, 0), Ok(5.0));
271    assert_eq!(to_float(0b0_110, 4, 3, 0), Ok(6.0));
272    assert_eq!(to_float(0b0_111, 4, 3, 0), Ok(7.0));
273    assert_eq!(to_float(0b1_000, 4, 3, 0), Ok(-8.0));
274    assert_eq!(to_float(0b1_001, 4, 3, 0), Ok(-7.0));
275    assert_eq!(to_float(0b1_010, 4, 3, 0), Ok(-6.0));
276
277    // Fx<3,8>
278    assert_eq!(to_float(0b0_1111_111, 8, 4, 3), Ok(15.875));
279    assert_eq!(to_float(0b1_0000_000, 8, 4, 3), Ok(-16.0));
280    assert_eq!(to_float(0b1_0000_001, 8, 4, 3), Ok(-15.875));
281  }
282
283  #[test]
284  fn test_to_fixed() {
285    use super::fixed_point::Q;
286
287    // assert_eq!(to_fixed(10.25, 4, 3, true), Ok(Q::new(82, 4, 3, true)));
288    // assert_eq!(to_fixed(10.25, 3, 3, true).is_err(), true);
289    // assert_eq!(to_fixed(10.25, 8, 3, true), Ok(Q::new(82, 8, 3, true)));
290    // assert_eq!(to_fixed(10.25, 8, 2, true), Ok(Q::new(41, 8, 2, true)));
291    // assert_eq!(to_fixed(10.25, 8, 1, true), Ok(Q::new(21, 8, 1, false)));
292    // assert_eq!(to_fixed(10.25, 8, 0, true), Ok(Q::new(10, 8, 0, false)));
293    // assert_eq!(to_fixed(0.0078125, 1, 1, true), Ok(Q::new(0, 1, 1, false)));
294    // assert_eq!(to_fixed(0.0078125, 1, 2, true), Ok(Q::new(0, 1, 2, false)));
295    // assert_eq!(to_fixed(0.0078125, 1, 3, true), Ok(Q::new(0, 1, 3, false)));
296    // assert_eq!(to_fixed(0.0078125, 1, 4, true), Ok(Q::new(0, 1, 4, false)));
297    // assert_eq!(to_fixed(0.0078125, 1, 5, true), Ok(Q::new(0, 1, 5, false)));
298    // assert_eq!(to_fixed(0.0078125, 1, 6, true), Ok(Q::new(1, 1, 6, false)));
299    // assert_eq!(to_fixed(0.0078125, 1, 7, true), Ok(Q::new(1, 1, 7, true)));
300    // assert_eq!(to_fixed(0.0078125, 1, 8, true), Ok(Q::new(2, 1, 8, true)));
301    // assert_eq!(to_fixed(0.0078125, 1, 9, true), Ok(Q::new(4, 1, 9, true)));
302    // assert_eq!(to_fixed(1.387, 2, 15, true).unwrap().val, 45449);
303    // assert_eq!(to_fixed(4.3, 2, 15, true).is_err(), true);
304
305    assert_eq!(to_fixed(4.0, 4, 2, false).unwrap().val, 0b0_0100_00);
306    assert_eq!(to_fixed(-4.0, 4, 2, false).unwrap().val, 0b1_1100_00);
307    assert_eq!(to_fixed(8.75, 4, 2, false).unwrap().val, 0b0_1000_11);
308    assert_eq!(to_fixed(9.5, 4, 2, false).unwrap().val, 0b0_1001_10);
309    assert_eq!(to_fixed(15.75, 4, 2, false).unwrap().val, 0b0_1111_11);
310    assert_eq!(to_fixed(15.8, 4, 2, false).unwrap().val, 0b0_1111_11);
311    assert_eq!(to_fixed(16.0, 4, 2, false).is_err(), true);
312    assert_eq!(to_fixed(0.0, 2, 1, false).unwrap().val, 0b0_00_0);
313    assert_eq!(to_fixed(-0.0, 2, 1, false).unwrap().val, 0b1_00_0);
314  }
315
316  #[test]
317  fn back_and_forth() {
318    // 0_1_010
319    let x = 1.25;
320    let (m, n) = (1, 3);
321    assert_eq!(
322      to_float(to_fixed(x, m, n, false).unwrap().val, 5, m, n).unwrap(),
323      x
324    );
325  }
326}