1mod fixed_point;
7pub use fixed_point::FixedPoint;
8pub use fixed_point::{to_Fx, to_Q};
9pub use fixed_point::{Fx, Q};
10
11pub type UInt = u128;
12
13const SIZE: u64 = 64;
14const MANT_SIZE: u64 = 52;
15const ES: u64 = 11;
16const EXP_BIAS: u64 = (1 << (ES - 1)) - 1;
17
18fn mask(size: u32) -> u128 {
24 u128::MAX >> (128 - size)
26
27 }
37
38fn sign(bits: u64) -> u64 {
39 bits >> (SIZE - 1)
40}
41
42fn exp(bits: u64) -> u64 {
43 ((bits & ((1 << (SIZE - 1)) - 1)) >> MANT_SIZE) & ((1 << MANT_SIZE) - 1)
44}
45
46fn mant(bits: u64) -> u64 {
47 bits & ((1 << MANT_SIZE) - 1)
48}
49
50pub fn to_fixed(x: f64, m: i32, n: i32, round: bool) -> Result<Q, String> {
83 let f64_bits = x.to_bits();
84
85 let sign = sign(f64_bits);
86
87 if x.abs() == 0.0 {
88 return Ok(Q {
89 val: (sign << (m + n)) as UInt,
90 m,
91 n,
92 is_exact: true,
93 });
94 }
95
96 let exp = exp(f64_bits) as i32 - EXP_BIAS as i32;
97
98 let mant_plus_one = (1 << MANT_SIZE) | mant(f64_bits); let bits = mant_plus_one; let fractional_part = bits as UInt & mask((MANT_SIZE as i32 - exp) as u32) as UInt;
105 let integer_part = bits
106 .checked_shr((MANT_SIZE as i32 - exp) as u32)
107 .unwrap_or(0);
108
109 let integer_part_on_m_bits = integer_part as UInt & mask(m as u32) as UInt;
114
115 let mut fractional_part_on_n_bits = match (MANT_SIZE as i32 - exp - n) >= 0 {
116 true => (fractional_part >> (MANT_SIZE as i32 - exp - n) as u32) & (mask(n as u32) as UInt),
117 _ => (fractional_part << (-(MANT_SIZE as i32 - exp - n))) & (mask(n as u32) as UInt),
118 };
119
120 if integer_part_on_m_bits < integer_part as UInt {
121 return Err(format!(
122 "Error: Integer field does not fit into `m` = {} bits.",
123 m
124 ));
125 }
126
127 let _len = (MANT_SIZE as i32 - exp) - (n + 1);
128 let round_bit = match _len >= 0 {
129 true => fractional_part >> (_len) & 1 != 0,
130 _ => fractional_part.checked_shl(-_len as u32).unwrap_or(0) != 0,
131 };
132
133 if round && round_bit {
134 fractional_part_on_n_bits += 1;
135 }
136
137 let sticky_bit = match (MANT_SIZE as i32 - exp - n) >= 0 {
138 true => fractional_part & mask((MANT_SIZE as i32 - exp - n) as u32) as UInt != 0,
139 _ => false,
140 };
141
142 let is_exact = !sticky_bit && !round_bit;
143 let ans_signless = ((integer_part_on_m_bits) << n) + fractional_part_on_n_bits;
144
145 let ans = match sign == 0 {
146 true => ans_signless,
147 false => (!ans_signless + 1 as UInt) & mask((m + n + 1) as u32),
148 };
149
150 Ok(Q {
151 val: ans,
152 m,
153 n,
154 is_exact,
155 })
156}
157
158#[deprecated(since = "0.4.0")]
159pub fn to_float_str(bits: &str, m: i32, b: i32) -> Result<f64, String> {
165 let n = b - m;
166 let bits_size = bits.len() as i32;
167 if bits_size != m + n {
168 return Err(format!(
169 "`bits` size does not match the `m` + `n` size you specified. {} != {}",
170 bits_size,
171 m + n
172 ));
173 }
174
175 let mut ans = 0.0;
176
177 for i in (1..=n).rev() {
178 let bit = bits
179 .chars()
180 .nth(((m - 1 + i) as u16).into())
181 .unwrap()
182 .to_digit(2)
183 .unwrap(); ans += bit as f64 / (1 << i) as f64;
185 }
186
187 for i in 0..m {
188 let bit = bits
189 .chars()
190 .nth(((m - 1 - i) as u16).into())
191 .unwrap()
192 .to_digit(2)
193 .unwrap();
194 ans += bit as f64 * (1 << i) as f64;
195 }
196
197 Ok(ans)
198}
199
200pub fn to_float(bits: UInt, size: i32, m: i32, n: i32) -> Result<f64, String> {
206 if size != m + n + 1 {
207 return Err(format!(
208 "`bits` size does not match the (`m` + `n` + 1) size you specified. {} != {}",
209 size,
210 m + n + 1
211 ));
212 }
213
214 let sign = (bits >> (m + n)) as u32;
215
216 if sign == 1 && (bits & mask(size as u32 - 1) == 0) {
217 return Ok(-((1 << m) as f64));
218 }
219
220 let mut bits = match sign == 0 {
221 true => bits,
222 false => (!bits + 1) & mask(size as u32),
223 };
224
225 let mut ans = 0.0;
226
227 for i in (1..=n).rev() {
228 ans += match 2_i128.checked_pow(i as u32) {
229 None => 0.0,
230 Some(v) => (bits & 1) as f64 / v as f64,
231 };
232
233 bits >>= 1;
235 }
236 for i in 0..m {
237 ans += (bits & 1) as f64 * 2_i128.pow(i as u32) as f64; bits >>= 1;
239 }
240
241 let ans = match sign == 0 {
242 true => ans,
243 false => -ans,
244 };
245
246 Ok(ans)
247}
248
249#[cfg(test)]
250mod tests {
251 use super::{to_fixed, to_float, to_float_str};
252
253 #[test]
254 fn test_to_float() {
255 assert_eq!(to_float(0b01010000010110000, 17, 1, 15), Ok(1.25537109375));
256 assert_eq!(to_float(0b01010000010110000, 17, 1, 14).is_err(), true);
257 assert_eq!(to_float(0b01010000010110000, 17, 1, 15).is_err(), false);
258 assert_eq!(to_float(0b01010000010110000, 17, 1, 16).is_err(), true);
259 assert_eq!(to_float(0b0_000, 4, 3, 0), Ok(0.0));
266 assert_eq!(to_float(0b0_001, 4, 3, 0), Ok(1.0));
267 assert_eq!(to_float(0b0_010, 4, 3, 0), Ok(2.0));
268 assert_eq!(to_float(0b0_011, 4, 3, 0), Ok(3.0));
269 assert_eq!(to_float(0b0_100, 4, 3, 0), Ok(4.0));
270 assert_eq!(to_float(0b0_101, 4, 3, 0), Ok(5.0));
271 assert_eq!(to_float(0b0_110, 4, 3, 0), Ok(6.0));
272 assert_eq!(to_float(0b0_111, 4, 3, 0), Ok(7.0));
273 assert_eq!(to_float(0b1_000, 4, 3, 0), Ok(-8.0));
274 assert_eq!(to_float(0b1_001, 4, 3, 0), Ok(-7.0));
275 assert_eq!(to_float(0b1_010, 4, 3, 0), Ok(-6.0));
276
277 assert_eq!(to_float(0b0_1111_111, 8, 4, 3), Ok(15.875));
279 assert_eq!(to_float(0b1_0000_000, 8, 4, 3), Ok(-16.0));
280 assert_eq!(to_float(0b1_0000_001, 8, 4, 3), Ok(-15.875));
281 }
282
283 #[test]
284 fn test_to_fixed() {
285 use super::fixed_point::Q;
286
287 assert_eq!(to_fixed(4.0, 4, 2, false).unwrap().val, 0b0_0100_00);
306 assert_eq!(to_fixed(-4.0, 4, 2, false).unwrap().val, 0b1_1100_00);
307 assert_eq!(to_fixed(8.75, 4, 2, false).unwrap().val, 0b0_1000_11);
308 assert_eq!(to_fixed(9.5, 4, 2, false).unwrap().val, 0b0_1001_10);
309 assert_eq!(to_fixed(15.75, 4, 2, false).unwrap().val, 0b0_1111_11);
310 assert_eq!(to_fixed(15.8, 4, 2, false).unwrap().val, 0b0_1111_11);
311 assert_eq!(to_fixed(16.0, 4, 2, false).is_err(), true);
312 assert_eq!(to_fixed(0.0, 2, 1, false).unwrap().val, 0b0_00_0);
313 assert_eq!(to_fixed(-0.0, 2, 1, false).unwrap().val, 0b1_00_0);
314 }
315
316 #[test]
317 fn back_and_forth() {
318 let x = 1.25;
320 let (m, n) = (1, 3);
321 assert_eq!(
322 to_float(to_fixed(x, m, n, false).unwrap().val, 5, m, n).unwrap(),
323 x
324 );
325 }
326}