trivet 3.1.0 - Docs.rs

// Trivet
// Copyright (c) 2025 by Stacy Prowell.  All rights reserved.
// https://gitlab.com/binary-tools/trivet

//! Parse binary-encoded floating point numbers.

use crate::{
    errors::{syntax_error, ParseResult},
    ParserCore,
};

use super::NumberParserSettings;

/// Parse a floating point number represented in binary.  This presumes the sign and any radix
/// signifier have been parsed, and the parser is at the first digit of the binary number.
/// If the number is negative, indicate this with the `negative` argument.
///
/// This method does not handle `inf`, `infinity`, or `nan`.  Parse those separately.
///
/// Non-binary digits (2-9) cause an error.  If no digits are found,
/// this will cause an error.  A character that causes the error is consumed.
///
/// If there are too many significant digits then the mantissa will overflow and this will generate
/// an error.
///
/// The exponent, if present, is presumed to be in *decimal*.
///
/// For example, the following are ways to encode 1/2.
///
/// - `0.1`
/// - `1e-1`
/// - `0.100000`
/// - `.1`
/// - `1000e-4`
///
pub fn parse_binary_float(
    parser: &mut ParserCore,
    negative: bool,
    settings: &NumberParserSettings,
) -> ParseResult<f64> {
    /* Some examples.
     *
     * 100          -> 1. e 2
     * 100.1        -> 1.001 e 2
     * 100.0000     -> 1. e 2
     * 000100       -> 1. e 2
     * 0.01         -> 1. e -2
     * 0.01001      -> 1. e -2
     * .01001       -> 1.001 e -2
     *
     * The initial exponent depends on the position of the first non-zero digit, if
     * any.  We work as follows.
     *
     * If we have not hit the decimal, but we have hit the first non-zero digit, then
     * add one to the exponent for every additional digit we find.
     *
     * If we have hit the decimal, but have not hit the first non-zero digit, then
     * subtract one for each digit we find, including the first non-zero digit, if any.
     *
     * State Machine for Mantissa and Initial Exponent:
     *
     * Have not found a digit or a decimal point.
     * [] 0 -> [0]
     *    1 -> [1]
     *    . -> [.]
     *    _ -> []
     *
     * Have found a digit, but not a leading one or decimal point.
     * [0] 0 -> [0]
     *     1 -> [1]
     *     . -> [0.]
     *     _ -> [0]
     *
     * Have found a leading one, but not a decimal point.
     * [1] 0 -> [1]     exponent += 1
     *     1 -> [1]     exponent += 1
     *     . -> [1.]
     *     _ -> [1]
     *
     * Have found a decimal point, but not a digit.
     * [.] 0 -> [0.]     exponent -= 1
     *     1 -> [1.]    exponent -= 1
     *     . -> err
     *     _ -> [.]
     *
     * Have found a digit and decimal point, but not a leading one.
     * [0.] 0 -> [0.]   exponent -= 1
     *      1 -> [1.]   exponent -= 1
     *      . -> err
     *      _ -> [0.]
     *
     * [1.] 0 -> [1.]
     *      1 -> [1.]
     *      . -> err
     *      _ -> [1.]
     *
     * State    name
     * []       None
     * [0]      Digit
     * [1]      One
     * [.]      Decimal
     * [0.]     DigitDecimal
     * [1.]     OneDecimal
     *
     * Examples:
     *
     * decimal  state           exponent
     *          None            0
     * 0        Digit           0
     * .        DigitDecimal    0
     * 0        DigitDecimal    -1
     * 1        OneDecimal      -2
     *
     * decimal  state           exponent
     *          None            0
     * 0        Digit           0
     * 1        One             0
     * 0        One             1
     * 0        One             2
     * .        OneDecimal      2
     * 0        OneDecimal      2
     * 0        OneDecimal      2
     */

    #[derive(Debug)]
    enum State {
        None,
        Digit,
        One,
        Decimal,
        DigitDecimal,
        OneDecimal,
    }
    let mut state = State::None;
    let mut mantissa = 0u64;
    let mut exponent = 0i32;

    // Parse the significand.  This obtains the mantissa and the exponent.  This implements
    // the state machine given above.
    let mut whole_empty = false;
    let mut fraction_empty = false;
    let loc = parser.loc();
    while !parser.is_at_eof() {
        let ch = parser.peek();
        if settings.permit_underscores && ch == '_' {
            parser.consume();
            continue;
        }
        match state {
            State::None => match ch {
                '0' => {
                    state = State::Digit;
                }
                '1' => {
                    mantissa = 1;
                    state = State::One;
                }
                '.' => {
                    whole_empty = true;
                    fraction_empty = true;
                    state = State::Decimal;
                }
                ch if ch.is_ascii_digit() => {
                    parser.consume();
                    return Err(syntax_error(parser.loc(), "Invalid binary digit"));
                }
                _ => {
                    break;
                }
            },
            State::Digit => match ch {
                '0' => {}
                '1' => {
                    mantissa = 1;
                    state = State::One;
                }
                '.' => {
                    fraction_empty = true;
                    state = State::DigitDecimal;
                }
                ch if ch.is_ascii_digit() => {
                    parser.consume();
                    return Err(syntax_error(parser.loc(), "Invalid binary digit"));
                }
                _ => {
                    break;
                }
            },
            State::One => match ch {
                '0' => {
                    if mantissa >= 0x8000_0000_0000_0000 {
                        parser.consume();
                        return Err(syntax_error(parser.loc(), "Too many significant digits"));
                    }
                    mantissa <<= 1;
                    exponent += 1;
                }
                '1' => {
                    if mantissa >= 0x8000_0000_0000_0000 {
                        parser.consume();
                        return Err(syntax_error(parser.loc(), "Too many significant digits"));
                    }
                    mantissa <<= 1;
                    mantissa |= 1;
                    exponent += 1;
                }
                '.' => {
                    fraction_empty = true;
                    state = State::OneDecimal;
                }
                ch if ch.is_ascii_digit() => {
                    parser.consume();
                    return Err(syntax_error(parser.loc(), "Invalid binary digit"));
                }
                _ => {
                    break;
                }
            },
            State::Decimal => match ch {
                '0' => {
                    exponent -= 1;
                    state = State::DigitDecimal;
                    fraction_empty = false;
                }
                '1' => {
                    mantissa = 1;
                    exponent -= 1;
                    state = State::OneDecimal;
                    fraction_empty = false;
                }
                ch if ch.is_ascii_digit() => {
                    parser.consume();
                    return Err(syntax_error(parser.loc(), "Invalid binary digit"));
                }
                _ => {
                    break;
                }
            },
            State::DigitDecimal => match ch {
                '0' => {
                    exponent -= 1;
                    fraction_empty = false;
                }
                '1' => {
                    mantissa = 1;
                    exponent -= 1;
                    state = State::OneDecimal;
                    fraction_empty = false;
                }
                ch if ch.is_ascii_digit() => {
                    parser.consume();
                    return Err(syntax_error(parser.loc(), "Invalid binary digit"));
                }
                _ => {
                    break;
                }
            },
            State::OneDecimal => match ch {
                '0' => {
                    if mantissa >= 0x8000_0000_0000_0000 {
                        parser.consume();
                        return Err(syntax_error(parser.loc(), "Too many significant digits"));
                    }
                    mantissa <<= 1;
                }
                '1' => {
                    if mantissa >= 0x8000_0000_0000_0000 {
                        parser.consume();
                        return Err(syntax_error(parser.loc(), "Too many significant digits"));
                    }
                    mantissa <<= 1;
                    mantissa |= 1;
                }
                ch if ch.is_ascii_digit() => {
                    parser.consume();
                    return Err(syntax_error(parser.loc(), "Invalid binary digit"));
                }
                _ => {
                    break;
                }
            },
        }
        parser.consume();
    }

    // Look for and parse any power specification.
    let ch = parser.peek();
    if ch == 'e' || ch == 'p' || ch == 'P' || ch == 'E' {
        parser.consume();
        let loc = parser.loc();
        let negexp = if parser.peek_and_consume('-') {
            true
        } else {
            parser.peek_and_consume('+');
            false
        };
        let digits = if settings.permit_underscores {
            parser.take_while_unless(|ch| ch.is_ascii_digit(), |ch| ch == '_')
        } else {
            parser.take_while(|ch| ch.is_ascii_digit())
        };
        let power = match digits.parse::<i32>() {
            Ok(value) => value,
            Err(msg) => return Err(syntax_error(loc, &msg.to_string())),
        };
        if negexp {
            exponent -= power;
        } else {
            exponent += power;
        }
    }

    // If we didn't find a digit then this is not a valid number.
    match state {
        State::None | State::Decimal => {
            return Err(syntax_error(loc, "Expected a number but did not find one"));
        }
        _ => {}
    }

    if whole_empty && !settings.permit_empty_whole {
        return Err(syntax_error(
            loc,
            "An empty whole part is not permitted; there must be digits to the left of the decimal."
        ));
    }

    if fraction_empty && !settings.permit_empty_fraction {
        return Err(syntax_error(
            loc,
            "An empty fraction part is not permitted; there must be digits to the right of the decimal, if present."
        ));
    }

    // If the mantissa is zero, return zero.  The exponent doesn't matter.
    match state {
        State::One | State::OneDecimal => {}
        _ => return Ok(0.0),
    }

    /*
     * At this point there is a leading one somehwere.
     *
     * Now we need to create a float from the pieces.  The first bit (63) is the sign bit.
     * The next eleven bits (62 through 52, inclusive) are the exponent biased by 1024.
     * The remaining 52 bits (51 through 0, inclusive) are the mantissa.
     *
     * 6666 5555 5555 5544 4444 4444 3333 3333 3322 2222 2222 1111 1111 1100 0000 0000
     * 3210 9876 5432 1098 7654 3210 9876 5432 1098 7654 3210 9876 5432 1098 7654 3210
     * seee eeee eeee mmmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm
     */

    // Bias the exponent and then shift it into place.
    if !(f64::MIN_EXP - 1..f64::MAX_EXP).contains(&exponent) {
        return Err(syntax_error(parser.loc(), "Exponent is out of range"));
    }
    let mut bits = ((exponent + 1023) as u64) << 52;

    // Shift the mantissa so that the first non-zero digit is at position 52.  To do
    // that we get the number of leading zeros.  This means the first one (and there
    // must be one because we check above) will be at 63 - leading zero count.  That
    // is, we want 11 leading zeros.
    let leading_zeros = mantissa.leading_zeros();
    match leading_zeros.cmp(&11) {
        std::cmp::Ordering::Less => {
            mantissa >>= 11 - leading_zeros;
        }
        std::cmp::Ordering::Greater => {
            mantissa <<= leading_zeros - 11;
        }
        _ => {}
    }

    // Now discard the leading one from the mantissa.  There must be one since we dealt with zeros
    // above.
    mantissa &= 0x000f_ffff_ffff_ffff;

    // Add the mantissa to the bits.
    bits |= mantissa;

    // Add the sign bit.
    if negative {
        bits |= 0x8000_0000_0000_0000;
    }

    // Construct and return the double from the bits.
    Ok(f64::from_bits(bits))
}

#[cfg(test)]
mod test {
    use crate::{
        decoder::Decode,
        numbers::{binary_float::parse_binary_float, NumberParserSettings},
        ParserCore,
    };

    fn parse(value: &str) -> ParserCore {
        let decoder = Decode::from_string(value);
        ParserCore::new("<string>>", decoder)
    }

    #[test]
    fn zero_test() {
        let mut settings = NumberParserSettings::new();
        settings.permit_underscores = false;
        assert!(parse_binary_float(&mut parse(""), false, &settings).is_err());
        assert_eq!(
            parse_binary_float(&mut parse("0"), false, &settings).unwrap(),
            0.0
        );
        assert_eq!(
            parse_binary_float(&mut parse("0"), true, &settings).unwrap(),
            0.0
        );
    }

    #[test]
    fn one_test() {
        let mut settings = NumberParserSettings::new();
        settings.permit_underscores = false;
        assert_eq!(
            parse_binary_float(&mut parse("1"), false, &settings).unwrap(),
            1.0
        );
        assert_eq!(
            parse_binary_float(&mut parse("1"), true, &settings).unwrap(),
            -1.0
        );
        assert_eq!(
            parse_binary_float(&mut parse("01"), false, &settings).unwrap(),
            1.0
        );
        assert_eq!(
            parse_binary_float(&mut parse("001"), true, &settings).unwrap(),
            -1.0
        );
    }

    #[test]
    fn integer_test() {
        let mut settings = NumberParserSettings::new();
        settings.permit_underscores = false;
        assert_eq!(
            parse_binary_float(&mut parse("1111"), false, &settings).unwrap(),
            15.0
        );
        assert_eq!(
            parse_binary_float(&mut parse("1111"), true, &settings).unwrap(),
            -15.0
        );
        settings.permit_underscores = true;
        assert_eq!(
            parse_binary_float(&mut parse("10_1001_1001"), false, &settings).unwrap(),
            665.0
        );
    }

    #[test]
    fn fraction_test() {
        let mut settings = NumberParserSettings::new();
        settings.permit_underscores = false;
        assert_eq!(
            parse_binary_float(&mut parse(".1"), false, &settings).unwrap(),
            0.5
        );
        assert_eq!(
            parse_binary_float(&mut parse(".01"), false, &settings).unwrap(),
            0.25
        );
        assert_eq!(
            parse_binary_float(&mut parse("1.01"), false, &settings).unwrap(),
            1.25
        );
        assert_eq!(
            parse_binary_float(&mut parse("0.010"), false, &settings).unwrap(),
            0.25
        );
    }

    #[test]
    fn reject_test_1() {
        let mut settings = NumberParserSettings::new();
        settings.permit_underscores = false;
        assert!(parse_binary_float(&mut parse("2"), false, &settings).is_err());
        assert!(parse_binary_float(&mut parse("k"), false, &settings).is_err());
        assert!(parse_binary_float(&mut parse("02"), false, &settings).is_err());
        assert!(parse_binary_float(&mut parse("12"), false, &settings).is_err());
    }

    #[test]
    fn reject_test_2() {
        let mut settings = NumberParserSettings::new();
        settings.permit_underscores = false;
        assert!(parse_binary_float(&mut parse("0.02"), false, &settings).is_err());
        assert!(parse_binary_float(&mut parse("10.02"), false, &settings).is_err());
        assert!(parse_binary_float(&mut parse(".2"), false, &settings).is_err());
        assert!(parse_binary_float(&mut parse(".12"), false, &settings).is_err());
    }

    #[test]
    fn reject_test_3() {
        let mut settings = NumberParserSettings::new();
        settings.permit_underscores = false;
        assert!(parse_binary_float(&mut parse(".k"), false, &settings).is_err());
        assert_eq!(
            parse_binary_float(&mut parse("0.k"), false, &settings).unwrap(),
            0.0
        );
        settings.permit_underscores = true;
        assert!(parse_binary_float(
            &mut parse("11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111_0"),
            false,
            &settings
        )
        .is_err());
        assert!(parse_binary_float(
            &mut parse("11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111_1"),
            false,
            &settings
        )
        .is_err());
    }

    #[test]
    fn reject_test_4() {
        let mut settings = NumberParserSettings::new();
        settings.permit_underscores = true;
        assert!(parse_binary_float(
            &mut parse(
                "1.1111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111_0"
            ),
            false,
            &settings
        )
        .is_err());
        assert!(parse_binary_float(
            &mut parse(
                "1.1111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111_1"
            ),
            false,
            &settings
        )
        .is_err());
        assert!(parse_binary_float(&mut parse("1e4294967296"), false, &settings).is_err());
    }

    #[test]
    fn exponent_test() {
        let mut settings = NumberParserSettings::new();
        settings.permit_underscores = false;
        assert_eq!(
            parse_binary_float(&mut parse("0e1000"), false, &settings).unwrap(),
            0.0
        );
        assert_eq!(
            parse_binary_float(&mut parse("1e2"), false, &settings).unwrap(),
            4.0
        );
    }

    #[test]
    fn limits_test() {
        let mut settings = NumberParserSettings::new();
        settings.permit_underscores = true;
        assert_eq!(
            parse_binary_float(
                &mut parse(
                    "1.1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111e1023"
                ),
                false,
                &settings
            )
            .unwrap(),
            f64::MAX
        );
        assert_eq!(
            parse_binary_float(
                &mut parse(
                    "1.1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111e1023"
                ),
                true,
                &settings
            )
            .unwrap(),
            f64::MIN
        );
        assert_eq!(
            parse_binary_float(&mut parse("1e-1022"), false, &settings).unwrap(),
            f64::MIN_POSITIVE
        );
    }

    #[test]
    fn overflow_test() {
        let mut settings = NumberParserSettings::new();
        settings.permit_underscores = false;
        assert!(parse_binary_float(&mut parse("1e1024"), false, &settings).is_err());
        assert!(parse_binary_float(&mut parse("1e-1023"), false, &settings).is_err());
        settings.permit_underscores = true;
        assert_eq!(
            parse_binary_float(
                &mut parse(
                    "11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111"
                ),
                false,
                &settings
            )
            .unwrap(),
            1.844674407370955e19f64
        );
        assert!(parse_binary_float(
            &mut parse("11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111_1"),
            false,
            &settings
        )
        .is_err());
    }

    #[test]
    fn empty_parts_test() {
        let mut settings = NumberParserSettings::new();
        settings.permit_empty_whole = false;
        settings.permit_empty_fraction = false;
        assert!(parse_binary_float(&mut parse("10.e1"), false, &settings).is_err());
        assert!(parse_binary_float(&mut parse("10."), false, &settings).is_err());
        assert!(parse_binary_float(&mut parse(".10e1"), false, &settings).is_err());
        assert!(parse_binary_float(&mut parse(".10"), false, &settings).is_err());
    }
}