hexf_parse/
lib.rs

1//! Parses hexadecimal float literals.
2//! There are two functions `parse_hexf32` and `parse_hexf64` provided for each type.
3//!
4//! ```rust
5//! use hexf_parse::*;
6//! assert_eq!(parse_hexf32("0x1.99999ap-4", false), Ok(0.1f32));
7//! assert_eq!(parse_hexf64("0x1.999999999999ap-4", false), Ok(0.1f64));
8//! ```
9//!
10//! An additional `bool` parameter can be set to true if you want to allow underscores.
11//!
12//! ```rust
13//! use hexf_parse::*;
14//! assert!(parse_hexf64("0x0.1_7p8", false).is_err());
15//! assert_eq!(parse_hexf64("0x0.1_7p8", true), Ok(23.0f64));
16//! ```
17//!
18//! The error is reported via an opaque `ParseHexfError` type.
19
20use std::{f32, f64, fmt, isize, str};
21
22/// An opaque error type from `parse_hexf32` and `parse_hexf64`.
23#[derive(Debug, Clone, PartialEq, Eq)]
24pub struct ParseHexfError {
25    kind: ParseHexfErrorKind,
26}
27
28#[derive(Debug, Clone, PartialEq, Eq)]
29enum ParseHexfErrorKind {
30    Empty,
31    Invalid,
32    Inexact,
33}
34
35const EMPTY: ParseHexfError = ParseHexfError {
36    kind: ParseHexfErrorKind::Empty,
37};
38const INVALID: ParseHexfError = ParseHexfError {
39    kind: ParseHexfErrorKind::Invalid,
40};
41const INEXACT: ParseHexfError = ParseHexfError {
42    kind: ParseHexfErrorKind::Inexact,
43};
44
45impl ParseHexfError {
46    fn text(&self) -> &'static str {
47        match self.kind {
48            ParseHexfErrorKind::Empty => "cannot parse float from empty string",
49            ParseHexfErrorKind::Invalid => "invalid hexadecimal float literal",
50            ParseHexfErrorKind::Inexact => "cannot exactly represent float in target type",
51        }
52    }
53}
54
55impl fmt::Display for ParseHexfError {
56    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
57        fmt::Display::fmt(self.text(), f)
58    }
59}
60
61impl std::error::Error for ParseHexfError {
62    fn description(&self) -> &'static str {
63        self.text()
64    }
65}
66
67fn parse(s: &[u8], allow_underscore: bool) -> Result<(bool, u64, isize), ParseHexfError> {
68    // ^[+-]?
69    let (s, negative) = match s.split_first() {
70        Some((&b'+', s)) => (s, false),
71        Some((&b'-', s)) => (s, true),
72        Some(_) => (s, false),
73        None => return Err(EMPTY),
74    };
75
76    // 0[xX]
77    if !(s.starts_with(b"0x") || s.starts_with(b"0X")) {
78        return Err(INVALID);
79    }
80
81    // ([0-9a-fA-F][0-9a-fA-F_]*)?
82    let mut s = &s[2..];
83    let mut acc = 0; // the accumulated mantissa
84    let mut digit_seen = false;
85    loop {
86        let (s_, digit) = match s.split_first() {
87            Some((&c @ b'0'..=b'9', s)) => (s, c - b'0'),
88            Some((&c @ b'a'..=b'f', s)) => (s, c - b'a' + 10),
89            Some((&c @ b'A'..=b'F', s)) => (s, c - b'A' + 10),
90            Some((&b'_', s_)) if allow_underscore && digit_seen => {
91                s = s_;
92                continue;
93            }
94            _ => break,
95        };
96
97        s = s_;
98        digit_seen = true;
99
100        // if `acc << 4` fails, mantissa definitely exceeds 64 bits so we should bail out
101        if acc >> 60 != 0 {
102            return Err(INEXACT);
103        }
104        acc = acc << 4 | digit as u64;
105    }
106
107    // (\.[0-9a-fA-F][0-9a-fA-F_]*)?
108    // we want to ignore trailing zeroes but shifting at each digit will overflow first.
109    // therefore we separately count the number of zeroes and flush it on non-zero digits.
110    let mut nfracs = 0isize; // this is suboptimal but also practical, see below
111    let mut nzeroes = 0isize;
112    let mut frac_digit_seen = false;
113    if s.starts_with(b".") {
114        s = &s[1..];
115        loop {
116            let (s_, digit) = match s.split_first() {
117                Some((&c @ b'0'..=b'9', s)) => (s, c - b'0'),
118                Some((&c @ b'a'..=b'f', s)) => (s, c - b'a' + 10),
119                Some((&c @ b'A'..=b'F', s)) => (s, c - b'A' + 10),
120                Some((&b'_', s_)) if allow_underscore && frac_digit_seen => {
121                    s = s_;
122                    continue;
123                }
124                _ => break,
125            };
126
127            s = s_;
128            frac_digit_seen = true;
129
130            if digit == 0 {
131                nzeroes = nzeroes.checked_add(1).ok_or(INEXACT)?;
132            } else {
133                // flush nzeroes
134                let nnewdigits = nzeroes.checked_add(1).ok_or(INEXACT)?;
135                nfracs = nfracs.checked_add(nnewdigits).ok_or(INEXACT)?;
136                nzeroes = 0;
137
138                // if the accumulator is non-zero, the shift cannot exceed 64
139                // (therefore the number of new digits cannot exceed 16).
140                // this will catch e.g. `0.40000....00001` with sufficiently many zeroes
141                if acc != 0 {
142                    if nnewdigits >= 16 || acc >> (64 - nnewdigits * 4) != 0 {
143                        return Err(INEXACT);
144                    }
145                    acc = acc << (nnewdigits * 4);
146                }
147                acc |= digit as u64;
148            }
149        }
150    }
151
152    // at least one digit should be present
153    if !(digit_seen || frac_digit_seen) {
154        return Err(INVALID);
155    }
156
157    // [pP]
158    let s = match s.split_first() {
159        Some((&b'P', s)) | Some((&b'p', s)) => s,
160        _ => return Err(INVALID),
161    };
162
163    // [+-]?
164    let (mut s, negative_exponent) = match s.split_first() {
165        Some((&b'+', s)) => (s, false),
166        Some((&b'-', s)) => (s, true),
167        Some(_) => (s, false),
168        None => return Err(INVALID),
169    };
170
171    // [0-9_]*[0-9][0-9_]*$
172    let mut digit_seen = false;
173    let mut exponent = 0isize; // this is suboptimal but also practical, see below
174    loop {
175        let (s_, digit) = match s.split_first() {
176            Some((&c @ b'0'..=b'9', s)) => (s, c - b'0'),
177            Some((&b'_', s_)) if allow_underscore => {
178                s = s_;
179                continue;
180            }
181            None if digit_seen => break,
182            // no more bytes expected, and at least one exponent digit should be present
183            _ => return Err(INVALID),
184        };
185
186        s = s_;
187        digit_seen = true;
188
189        // if we have no non-zero digits at this point, ignore the exponent :-)
190        if acc != 0 {
191            exponent = exponent
192                .checked_mul(10)
193                .and_then(|v| v.checked_add(digit as isize))
194                .ok_or(INEXACT)?;
195        }
196    }
197    if negative_exponent {
198        exponent = -exponent;
199    }
200
201    if acc == 0 {
202        // ignore the exponent as above
203        Ok((negative, 0, 0))
204    } else {
205        // the exponent should be biased by (nfracs * 4) to match with the mantissa read.
206        // we still miss valid inputs like `0.0000...0001pX` where the input is filling
207        // at least 1/4 of the total addressable memory, but I dare not handle them!
208        let exponent = nfracs
209            .checked_mul(4)
210            .and_then(|v| exponent.checked_sub(v))
211            .ok_or(INEXACT)?;
212        Ok((negative, acc, exponent))
213    }
214}
215
216#[test]
217fn test_parse() {
218    assert_eq!(parse(b"", false), Err(EMPTY));
219    assert_eq!(parse(b" ", false), Err(INVALID));
220    assert_eq!(parse(b"3.14", false), Err(INVALID));
221    assert_eq!(parse(b"0x3.14", false), Err(INVALID));
222    assert_eq!(parse(b"0x3.14fp+3", false), Ok((false, 0x314f, 3 - 12)));
223    assert_eq!(parse(b" 0x3.14p+3", false), Err(INVALID));
224    assert_eq!(parse(b"0x3.14p+3 ", false), Err(INVALID));
225    assert_eq!(parse(b"+0x3.14fp+3", false), Ok((false, 0x314f, 3 - 12)));
226    assert_eq!(parse(b"-0x3.14fp+3", false), Ok((true, 0x314f, 3 - 12)));
227    assert_eq!(parse(b"0xAbC.p1", false), Ok((false, 0xabc, 1)));
228    assert_eq!(parse(b"0x0.7p1", false), Ok((false, 0x7, 1 - 4)));
229    assert_eq!(parse(b"0x.dEfP-1", false), Ok((false, 0xdef, -1 - 12)));
230    assert_eq!(parse(b"0x.p1", false), Err(INVALID));
231    assert_eq!(parse(b"0x.P1", false), Err(INVALID));
232    assert_eq!(parse(b"0xp1", false), Err(INVALID));
233    assert_eq!(parse(b"0xP1", false), Err(INVALID));
234    assert_eq!(parse(b"0x0p", false), Err(INVALID));
235    assert_eq!(parse(b"0xp", false), Err(INVALID));
236    assert_eq!(parse(b"0x.p", false), Err(INVALID));
237    assert_eq!(parse(b"0x0p1", false), Ok((false, 0, 0)));
238    assert_eq!(parse(b"0x0P1", false), Ok((false, 0, 0)));
239    assert_eq!(parse(b"0x0.p1", false), Ok((false, 0, 0)));
240    assert_eq!(parse(b"0x0.P1", false), Ok((false, 0, 0)));
241    assert_eq!(parse(b"0x0.0p1", false), Ok((false, 0, 0)));
242    assert_eq!(parse(b"0x0.0P1", false), Ok((false, 0, 0)));
243    assert_eq!(parse(b"0x.0p1", false), Ok((false, 0, 0)));
244    assert_eq!(parse(b"0x.0P1", false), Ok((false, 0, 0)));
245    assert_eq!(parse(b"0x0p0", false), Ok((false, 0, 0)));
246    assert_eq!(parse(b"0x0.p999999999", false), Ok((false, 0, 0)));
247    assert_eq!(
248        parse(b"0x0.p99999999999999999999999999999", false),
249        Ok((false, 0, 0))
250    );
251    assert_eq!(
252        parse(b"0x0.p-99999999999999999999999999999", false),
253        Ok((false, 0, 0))
254    );
255    assert_eq!(
256        parse(b"0x1.p99999999999999999999999999999", false),
257        Err(INEXACT)
258    );
259    assert_eq!(
260        parse(b"0x1.p-99999999999999999999999999999", false),
261        Err(INEXACT)
262    );
263    assert_eq!(
264        parse(b"0x4.00000000000000000000p55", false),
265        Ok((false, 4, 55))
266    );
267    assert_eq!(
268        parse(b"0x4.00001000000000000000p55", false),
269        Ok((false, 0x400001, 55 - 20))
270    );
271    assert_eq!(parse(b"0x4.00000000000000000001p55", false), Err(INEXACT));
272
273    // underscore insertion
274    assert_eq!(
275        parse(b"-0x3____.1_4___p+___5___", true),
276        Ok((true, 0x314, 5 - 8))
277    );
278    assert_eq!(parse(b"-_0x3.14p+5", true), Err(INVALID));
279    assert_eq!(parse(b"_0x3.14p+5", true), Err(INVALID));
280    assert_eq!(parse(b"0x_3.14p+5", true), Err(INVALID));
281    assert_eq!(parse(b"0x3._14p+5", true), Err(INVALID));
282    assert_eq!(parse(b"0x3.14p_+5", true), Err(INVALID));
283    assert_eq!(parse(b"-0x____.1_4___p+___5___", true), Err(INVALID));
284    assert_eq!(parse(b"-0x3____.____p+___5___", true), Err(INVALID));
285    assert_eq!(parse(b"-0x3____.1_4___p+______", true), Err(INVALID));
286    assert_eq!(parse(b"0x_p0", false), Err(INVALID));
287    assert_eq!(parse(b"0x_0p0", true), Err(INVALID));
288    assert_eq!(parse(b"0x_p0", true), Err(INVALID));
289    assert_eq!(parse(b"0x._p0", true), Err(INVALID));
290    assert_eq!(parse(b"0x._0p0", true), Err(INVALID));
291    assert_eq!(parse(b"0x0._0p0", true), Err(INVALID));
292    assert_eq!(parse(b"0x0_p0", true), Ok((false, 0, 0)));
293    assert_eq!(parse(b"0x.0_p0", true), Ok((false, 0, 0)));
294    assert_eq!(parse(b"0x0.0_p0", true), Ok((false, 0, 0)));
295
296    // issues
297    // #11 (https://github.com/lifthrasiir/hexf/issues/11)
298    assert_eq!(parse(b"0x1p-149", false), parse(b"0x1.0p-149", false));
299}
300
301macro_rules! define_convert {
302    ($name:ident => $f:ident) => {
303        fn $name(negative: bool, mantissa: u64, exponent: isize) -> Result<$f, ParseHexfError> {
304            // guard the exponent with the definitely safe range (we will exactly bound it later)
305            if exponent < -0xffff || exponent > 0xffff {
306                return Err(INEXACT);
307            }
308
309            // strip the trailing zeroes in mantissa and adjust exponent.
310            // we do this because a unit in the least significant bit of mantissa is
311            // always safe to represent while one in the most significant bit isn't.
312            let trailing = mantissa.trailing_zeros() & 63; // guard mantissa=0 case
313            let mantissa = mantissa >> trailing;
314            let exponent = exponent + trailing as isize;
315
316            // normalize the exponent that the number is (1.xxxx * 2^normalexp),
317            // and check for the mantissa and exponent ranges
318            let leading = mantissa.leading_zeros();
319            let normalexp = exponent + (63 - leading as isize);
320            let mantissasize = if normalexp < $f::MIN_EXP as isize - $f::MANTISSA_DIGITS as isize {
321                // the number is smaller than the minimal denormal number
322                return Err(INEXACT);
323            } else if normalexp < ($f::MIN_EXP - 1) as isize {
324                // the number is denormal, the # of bits in the mantissa is:
325                // - minimum (1) at MIN_EXP - MANTISSA_DIGITS
326                // - maximum (MANTISSA_DIGITS - 1) at MIN_EXP - 2
327                $f::MANTISSA_DIGITS as isize - $f::MIN_EXP as isize + normalexp + 1
328            } else if normalexp < $f::MAX_EXP as isize {
329                // the number is normal, the # of bits in the mantissa is fixed
330                $f::MANTISSA_DIGITS as isize
331            } else {
332                // the number is larger than the maximal denormal number
333                // ($f::MAX_EXP denotes NaN and infinities here)
334                return Err(INEXACT);
335            };
336
337            if mantissa >> mantissasize == 0 {
338                let mut mantissa = mantissa as $f;
339                if negative {
340                    mantissa = -mantissa;
341                }
342                // yes, powi somehow does not work!
343                Ok(mantissa * (2.0 as $f).powf(exponent as $f))
344            } else {
345                Err(INEXACT)
346            }
347        }
348    };
349}
350
351define_convert!(convert_hexf32 => f32);
352define_convert!(convert_hexf64 => f64);
353
354#[test]
355fn test_convert_hexf32() {
356    assert_eq!(convert_hexf32(false, 0, 0), Ok(0.0));
357    assert_eq!(convert_hexf32(false, 1, 0), Ok(1.0));
358    assert_eq!(convert_hexf32(false, 10, 0), Ok(10.0));
359    assert_eq!(convert_hexf32(false, 10, 1), Ok(20.0));
360    assert_eq!(convert_hexf32(false, 10, -1), Ok(5.0));
361    assert_eq!(convert_hexf32(true, 0, 0), Ok(-0.0));
362    assert_eq!(convert_hexf32(true, 1, 0), Ok(-1.0));
363
364    // negative zeroes
365    assert_eq!(convert_hexf32(false, 0, 0).unwrap().signum(), 1.0);
366    assert_eq!(convert_hexf32(true, 0, 0).unwrap().signum(), -1.0);
367
368    // normal truncation
369    assert_eq!(
370        convert_hexf32(false, 0x0000_0000_00ff_ffff, 0),
371        Ok(16777215.0)
372    );
373    assert_eq!(
374        convert_hexf32(false, 0x0000_0000_01ff_ffff, 0),
375        Err(INEXACT)
376    );
377    assert_eq!(
378        convert_hexf32(false, 0xffff_ff00_0000_0000, -40),
379        Ok(16777215.0)
380    );
381    assert_eq!(
382        convert_hexf32(false, 0xffff_ff80_0000_0000, -40),
383        Err(INEXACT)
384    );
385
386    // denormal truncation
387    assert!(convert_hexf32(false, 0x0000_0000_007f_ffff, -149).is_ok());
388    assert!(convert_hexf32(false, 0x0000_0000_00ff_ffff, -150).is_err());
389    assert!(convert_hexf32(false, 0x0000_0000_00ff_fffe, -150).is_ok());
390    assert!(convert_hexf32(false, 0xffff_ff00_0000_0000, -190).is_err());
391    assert!(convert_hexf32(false, 0xffff_fe00_0000_0000, -190).is_ok());
392
393    // minimum
394    assert!(convert_hexf32(false, 0x0000_0000_0000_0001, -149).is_ok());
395    assert!(convert_hexf32(false, 0x0000_0000_0000_0001, -150).is_err());
396    assert!(convert_hexf32(false, 0x0000_0000_0000_0002, -150).is_ok());
397    assert!(convert_hexf32(false, 0x0000_0000_0000_0002, -151).is_err());
398    assert!(convert_hexf32(false, 0x0000_0000_0000_0003, -150).is_err());
399    assert!(convert_hexf32(false, 0x0000_0000_0000_0003, -151).is_err());
400    assert!(convert_hexf32(false, 0x8000_0000_0000_0000, -212).is_ok());
401    assert!(convert_hexf32(false, 0x8000_0000_0000_0000, -213).is_err());
402
403    // maximum
404    assert_eq!(
405        convert_hexf32(false, 0x0000_0000_00ff_ffff, 104),
406        Ok(f32::MAX)
407    );
408    assert_eq!(
409        convert_hexf32(false, 0x0000_0000_01ff_ffff, 104),
410        Err(INEXACT)
411    );
412    assert_eq!(
413        convert_hexf32(false, 0x0000_0000_01ff_fffe, 104),
414        Err(INEXACT)
415    );
416    assert_eq!(
417        convert_hexf32(false, 0x0000_0000_0000_0001, 128),
418        Err(INEXACT)
419    );
420    assert_eq!(
421        convert_hexf32(false, 0x8000_0000_0000_0000, 65),
422        Err(INEXACT)
423    );
424    assert_eq!(
425        convert_hexf32(false, 0xffff_ff00_0000_0000, 64),
426        Ok(f32::MAX)
427    );
428    assert_eq!(
429        convert_hexf32(false, 0xffff_ff80_0000_0000, 64),
430        Err(INEXACT)
431    );
432}
433
434#[test]
435fn test_convert_hexf64() {
436    assert_eq!(convert_hexf64(false, 0, 0), Ok(0.0));
437    assert_eq!(convert_hexf64(false, 1, 0), Ok(1.0));
438    assert_eq!(convert_hexf64(false, 10, 0), Ok(10.0));
439    assert_eq!(convert_hexf64(false, 10, 1), Ok(20.0));
440    assert_eq!(convert_hexf64(false, 10, -1), Ok(5.0));
441    assert_eq!(convert_hexf64(true, 0, 0), Ok(-0.0));
442    assert_eq!(convert_hexf64(true, 1, 0), Ok(-1.0));
443
444    // negative zeroes
445    assert_eq!(convert_hexf64(false, 0, 0).unwrap().signum(), 1.0);
446    assert_eq!(convert_hexf64(true, 0, 0).unwrap().signum(), -1.0);
447
448    // normal truncation
449    assert_eq!(
450        convert_hexf64(false, 0x001f_ffff_ffff_ffff, 0),
451        Ok(9007199254740991.0)
452    );
453    assert_eq!(
454        convert_hexf64(false, 0x003f_ffff_ffff_ffff, 0),
455        Err(INEXACT)
456    );
457    assert_eq!(
458        convert_hexf64(false, 0xffff_ffff_ffff_f800, -11),
459        Ok(9007199254740991.0)
460    );
461    assert_eq!(
462        convert_hexf64(false, 0xffff_ffff_ffff_fc00, -11),
463        Err(INEXACT)
464    );
465
466    // denormal truncation
467    assert!(convert_hexf64(false, 0x000f_ffff_ffff_ffff, -1074).is_ok());
468    assert!(convert_hexf64(false, 0x001f_ffff_ffff_ffff, -1075).is_err());
469    assert!(convert_hexf64(false, 0x001f_ffff_ffff_fffe, -1075).is_ok());
470    assert!(convert_hexf64(false, 0xffff_ffff_ffff_f800, -1086).is_err());
471    assert!(convert_hexf64(false, 0xffff_ffff_ffff_f000, -1086).is_ok());
472
473    // minimum
474    assert!(convert_hexf64(false, 0x0000_0000_0000_0001, -1074).is_ok());
475    assert!(convert_hexf64(false, 0x0000_0000_0000_0001, -1075).is_err());
476    assert!(convert_hexf64(false, 0x0000_0000_0000_0002, -1075).is_ok());
477    assert!(convert_hexf64(false, 0x0000_0000_0000_0002, -1076).is_err());
478    assert!(convert_hexf64(false, 0x0000_0000_0000_0003, -1075).is_err());
479    assert!(convert_hexf64(false, 0x0000_0000_0000_0003, -1076).is_err());
480    assert!(convert_hexf64(false, 0x8000_0000_0000_0000, -1137).is_ok());
481    assert!(convert_hexf64(false, 0x8000_0000_0000_0000, -1138).is_err());
482
483    // maximum
484    assert_eq!(
485        convert_hexf64(false, 0x001f_ffff_ffff_ffff, 971),
486        Ok(f64::MAX)
487    );
488    assert_eq!(
489        convert_hexf64(false, 0x003f_ffff_ffff_ffff, 971),
490        Err(INEXACT)
491    );
492    assert_eq!(
493        convert_hexf64(false, 0x003f_ffff_ffff_fffe, 971),
494        Err(INEXACT)
495    );
496    assert_eq!(
497        convert_hexf32(false, 0x0000_0000_0000_0001, 1024),
498        Err(INEXACT)
499    );
500    assert_eq!(
501        convert_hexf32(false, 0x8000_0000_0000_0000, 961),
502        Err(INEXACT)
503    );
504    assert_eq!(
505        convert_hexf64(false, 0xffff_ffff_ffff_f800, 960),
506        Ok(f64::MAX)
507    );
508    assert_eq!(
509        convert_hexf64(false, 0xffff_ffff_ffff_fc00, 960),
510        Err(INEXACT)
511    );
512}
513
514/// Tries to parse a hexadecimal float literal to `f32`.
515/// The underscore is allowed only when `allow_underscore` is true.
516pub fn parse_hexf32(s: &str, allow_underscore: bool) -> Result<f32, ParseHexfError> {
517    let (negative, mantissa, exponent) = parse(s.as_bytes(), allow_underscore)?;
518    convert_hexf32(negative, mantissa, exponent)
519}
520
521/// Tries to parse a hexadecimal float literal to `f64`.
522/// The underscore is allowed only when `allow_underscore` is true.
523pub fn parse_hexf64(s: &str, allow_underscore: bool) -> Result<f64, ParseHexfError> {
524    let (negative, mantissa, exponent) = parse(s.as_bytes(), allow_underscore)?;
525    convert_hexf64(negative, mantissa, exponent)
526}
527
528#[test]
529fn test_parse_hexf() {
530    // issues
531    // #6 (https://github.com/lifthrasiir/hexf/issues/6)
532    assert!(parse_hexf64("0x.000000000000000000102", false).is_err());
533}