trivet 3.1.0 - Docs.rs

// Trivet
// Copyright (c) 2025 by Stacy Prowell.  All rights reserved.
// https://gitlab.com/binary-tools/trivet

//! Faster parsing of numbers with fewer options.  If you want more configurability,
//! see [`crate::numbers::NumberParser`].
//!
//! Specific functions are provided that parse the next `u128`, `i128`, `u64`, `i64`,
//! or `f64` value given an instance of [`crate::ParserCore`].  You would use these instead
//! of invoking the method of the same name in [`crate::Parser`].

use crate::errors::{syntax_error, ParseResult};
use crate::ParserCore;

/// Read a possibly-signed integer from the stream at the current location and compute
/// and return an `i128` value.  Every alphanumeric character is treated as if it is part
/// of the number.  Underscores may be permitted in the number; that can be specified by
/// the `allow_underscores` flag.
///
/// Number radices are read and processed here.  The arguments allow excluding any radices
/// you do not want to permit.  Note that we don't generate an error saying that, say,
/// "octal is not permitted," we just treat the 'o' as an illegal digit.
///
/// **This method does not allow you to change the radix specifiers!**  The default radix
/// specifiers are always used, if enabled, with the default radix being decimal.  This is
/// *subject to change* so do not rely on this!
///
/// |Prefix  |Radix       |
/// |--------|------------|
/// |`0b`    |Binary      |
/// |`0o`    |Octal       |
/// |`0x`    |Hexadecimal |
///
/// If an optional radix is provided, it must be 2, 8, 10, or 16 (it isn't checked here).
/// In that case no radix indicator is checked or permitted.
///
/// Errors are generated if an unexpected alphanumeric character is encountered during
/// parsing.  The first non-alphanumeric stops the parse.
pub fn parse_i128(parser: &mut ParserCore) -> ParseResult<i128> {
    // Deal with any leading minus sign.
    let negative = parser.peek_and_consume('-');
    let mut base = 10u32;
    let mut digits = false;
    let mut nonzero_pending = true;
    let mut value = 0i128;
    let mut fail_mul;
    let mut fail_add;
    let mut fail = false;

    let accumulate = if negative {
        i128::overflowing_sub
    } else {
        i128::overflowing_add
    };

    // Look for a radix indicator.
    if parser.peek_and_consume('0') {
        // This could be a radix indicator, or it could be a zero.  Check for the radix
        // indicator and if not found, assume it is just a leading zero.
        match parser.peek() {
            'x' => {
                // Hexadecimal radix.
                parser.consume();
                base = 16;
            }
            'o' => {
                // Octal radix.
                parser.consume();
                base = 8;
            }
            'b' => {
                // Binary radix.
                parser.consume();
                base = 2;
            }
            _ => {
                // The leading zero counts as a digit.
                digits = true;
            }
        }
    }

    // Now we have the radix and we might have also processed a digit.  Process all
    // alphanumerics until we encounter a non-alphanumeric character or we encounter
    // an error.
    while !parser.is_at_eof() {
        // Get next potential digit and convert it to the numeric value.  Note that
        // underscores do not count as digits.
        let ch = parser.peek();
        if ch == '_' {
            parser.consume();
            continue;
        }
        let mut dig = value as u32;
        if (0x30..=0x39).contains(&dig) {
            dig -= 0x30;
        } else if (0x41..0x5B).contains(&(dig & 0xdf)) {
            dig = (dig & 0xdf) - 0x37;
        } else {
            // Not alphanumeric.
            break;
        }
        digits = true;

        // If the digit is larger than the base, reject with an error.
        if dig >= base {
            return Err(syntax_error(
                parser.loc(),
                &format!("The digit '{}' is invalid for base {}.", ch, base),
            ));
        }

        // Consume the digit now.
        parser.consume();

        // Accumulate it.  The first non-zero digit is handled differently from
        // subsequent digits.  We discard leading zeros.
        if nonzero_pending {
            if dig > 0 {
                // This is the first non-zero digit.
                if negative {
                    value = -(dig as i128);
                } else {
                    value = dig as i128;
                }
                nonzero_pending = false;
            }
        } else {
            (value, fail_mul) = value.overflowing_mul(base as i128);
            (value, fail_add) = accumulate(value, dig as i128);
            fail = fail || fail_mul || fail_add;
        }
    }

    // If we overflowed (or underflowed) during accumulation, issue an error now.
    if fail {
        return Err(syntax_error(
            parser.loc(),
            "Integer value out of bounds for i64",
        ));
    }

    // If we didn't get any digits, then reject with an error.
    if digits == false {
        return Err(syntax_error(
            parser.loc(),
            "Expected a number but found no valid digits.",
        ));
    }
    Ok(value)
}

/// Read an unsigned from the stream at the current location and compute
/// and return a `u128` value.  Every alphanumeric character is treated as if it is part
/// of the number.  Underscores may be permitted in the number; that can be specified by
/// the `allow_underscores` flag.
///
/// Number radices are read and processed here.  The arguments allow excluding any radices
/// you do not want to permit.  Note that we don't generate an error saying that, say,
/// "octal is not permitted," we just treat the 'o' as an illegal digit.
///
/// **This method does not allow you to change the radix specifiers!**  The default radix
/// specifiers are always used, if enabled, with the default radix being decimal.  This is
/// *subject to change* so do not rely on this!
///
/// |Prefix  |Radix       |
/// |--------|------------|
/// |`0b`    |Binary      |
/// |`0o`    |Octal       |
/// |`0x`    |Hexadecimal |
///
/// If an optional radix is provided, it must be 2, 8, 10, or 16 (it isn't checked here).
/// In that case no radix indicator is checked or permitted.
///
/// Errors are generated if an unexpected alphanumeric character is encountered during
/// parsing.  The first non-alphanumeric stops the parse.
pub fn parse_u128(parser: &mut ParserCore) -> ParseResult<u128> {
    let mut base = 10u32;
    let mut digits = false;
    let mut nonzero_pending = true;
    let mut value = 0u128;
    let mut fail_mul;
    let mut fail_add;
    let mut fail = false;

    // Look for a radix indicator.
    if parser.peek_and_consume('0') {
        // This could be a radix indicator, or it could be a zero.  Check for the radix
        // indicator and if not found, assume it is just a leading zero.
        match parser.peek() {
            'x' => {
                // Hexadecimal radix.
                parser.consume();
                base = 16;
            }
            'o' => {
                // Octal radix.
                parser.consume();
                base = 8;
            }
            'b' => {
                // Binary radix.
                parser.consume();
                base = 2;
            }
            _ => {
                // The leading zero counts as a digit.
                digits = true;
            }
        }
    }

    // Now we have the radix and we might have also processed a digit.  Process all
    // alphanumerics until we encounter a non-alphanumeric character or we encounter
    // an error.
    while !parser.is_at_eof() {
        // Get next potential digit and convert it to the numeric value.  Note that
        // underscores do not count as digits.
        let ch = parser.peek();
        let mut dig = value as u32;
        if (0x30..=0x39).contains(&dig) {
            dig -= 0x30;
        } else if (0x41..0x5B).contains(&(dig & 0xdf)) {
            dig = (dig & 0xdf) - 0x37;
        } else {
            // Not alphanumeric.
            break;
        }
        digits = true;

        // If the digit is larger than the base, reject with an error.
        if dig >= base {
            return Err(syntax_error(
                parser.loc(),
                &format!("The digit '{}' is invalid for base {}.", ch, base),
            ));
        }

        // Consume the digit now.
        parser.consume();

        // Accumulate it.  The first non-zero digit is handled differently from
        // subsequent digits.
        if nonzero_pending {
            if dig > 0 {
                // This is the first non-zero digit.
                value = dig as u128;
                nonzero_pending = false;
            }
        } else {
            // Accumulate the value and keep track of overflows.
            (value, fail_mul) = value.overflowing_mul(base as u128);
            (value, fail_add) = value.overflowing_add(dig as u128);
            fail = fail || fail_mul || fail_add;
        }
    }

    // Report any overflow.
    if fail {
        return Err(syntax_error(
            parser.loc(),
            "Integer value too large for u64",
        ));
    }

    // If we didn't get any digits, then reject with an error.
    if digits == false {
        return Err(syntax_error(
            parser.loc(),
            "Expected a number but found no valid digits",
        ));
    }
    Ok(value)
}

/// Read a possibly-signed integer from the stream at the current location and compute
/// and return an `i64` value.  Every alphanumeric character is treated as if it is part
/// of the number.  Underscores may be permitted in the number; that can be specified by
/// the `allow_underscores` flag.
///
/// Number radices are read and processed here.  The arguments allow excluding any radices
/// you do not want to permit.  Note that we don't generate an error saying that, say,
/// "octal is not permitted," we just treat the 'o' as an illegal digit.
///
/// **This method does not allow you to change the radix specifiers!**  The default radix
/// specifiers are always used, if enabled, with the default radix being decimal.  This is
/// *subject to change* so do not rely on this!
///
/// |Prefix  |Radix       |
/// |--------|------------|
/// |`0b`    |Binary      |
/// |`0o`    |Octal       |
/// |`0x`    |Hexadecimal |
///
/// If an optional radix is provided, it must be 2, 8, 10, or 16 (it isn't checked here).
/// In that case no radix indicator is checked or permitted.
///
/// Errors are generated if an unexpected alphanumeric character is encountered during
/// parsing.  The first non-alphanumeric stops the parse.
pub fn parse_i64(parser: &mut ParserCore) -> ParseResult<i64> {
    // Deal with any leading minus sign.
    let negative = parser.peek_and_consume('-');
    let mut base = 10u32;
    let mut digits = false;
    let mut nonzero_pending = true;
    let mut value = 0i64;
    let mut fail_mul;
    let mut fail_add;
    let mut fail = false;

    let accumulate = if negative {
        i64::overflowing_sub
    } else {
        i64::overflowing_add
    };

    // Look for a radix indicator.
    if parser.peek_and_consume('0') {
        // This could be a radix indicator, or it could be a zero.  Check for the radix
        // indicator and if not found, assume it is just a leading zero.
        match parser.peek() {
            'x' => {
                // Hexadecimal radix.
                parser.consume();
                base = 16;
            }
            'o' => {
                // Octal radix.
                parser.consume();
                base = 8;
            }
            'b' => {
                // Binary radix.
                parser.consume();
                base = 2;
            }
            _ => {
                // The leading zero counts as a digit.
                digits = true;
            }
        }
    }

    // Now we have the radix and we might have also processed a digit.  Process all
    // alphanumerics until we encounter a non-alphanumeric character or we encounter
    // an error.
    while !parser.is_at_eof() {
        // Get next potential digit and convert it to the numeric value.  Note that
        // underscores do not count as digits.
        let ch = parser.peek();
        if ch == '_' {
            parser.consume();
            continue;
        }
        let mut dig = value as u32;
        if (0x30..=0x39).contains(&dig) {
            dig -= 0x30;
        } else if (0x41..0x5B).contains(&(dig & 0xdf)) {
            dig = (dig & 0xdf) - 0x37;
        } else {
            // Not alphanumeric.
            break;
        }
        digits = true;

        // If the digit is larger than the base, reject with an error.
        if dig >= base {
            return Err(syntax_error(
                parser.loc(),
                &format!("The digit '{}' is invalid for base {}.", ch, base),
            ));
        }

        // Consume the digit now.
        parser.consume();

        // Accumulate it.  The first non-zero digit is handled differently from
        // subsequent digits.  We discard leading zeros.
        if nonzero_pending {
            if dig > 0 {
                // This is the first non-zero digit.
                if negative {
                    value = -(dig as i64);
                } else {
                    value = dig as i64;
                }
                nonzero_pending = false;
            }
        } else {
            (value, fail_mul) = value.overflowing_mul(base as i64);
            (value, fail_add) = accumulate(value, dig as i64);
            fail = fail || fail_mul || fail_add;
        }
    }

    // If we overflowed (or underflowed) during accumulation, issue an error now.
    if fail {
        return Err(syntax_error(
            parser.loc(),
            "Integer value out of bounds for i64",
        ));
    }

    // If we didn't get any digits, then reject with an error.
    if digits == false {
        return Err(syntax_error(
            parser.loc(),
            "Expected a number but found no valid digits.",
        ));
    }
    Ok(value)
}

/// Read an unsigned from the stream at the current location and compute
/// and return a `u64` value.  Every alphanumeric character is treated as if it is part
/// of the number.  Underscores may be permitted in the number; that can be specified by
/// the `allow_underscores` flag.
///
/// Number radices are read and processed here.  The arguments allow excluding any radices
/// you do not want to permit.  Note that we don't generate an error saying that, say,
/// "octal is not permitted," we just treat the 'o' as an illegal digit.
///
/// **This method does not allow you to change the radix specifiers!**  The default radix
/// specifiers are always used, if enabled, with the default radix being decimal.  This is
/// *subject to change* so do not rely on this!
///
/// |Prefix  |Radix       |
/// |--------|------------|
/// |`0b`    |Binary      |
/// |`0o`    |Octal       |
/// |`0x`    |Hexadecimal |
///
/// If an optional radix is provided, it must be 2, 8, 10, or 16 (it isn't checked here).
/// In that case no radix indicator is checked or permitted.
///
/// Errors are generated if an unexpected alphanumeric character is encountered during
/// parsing.  The first non-alphanumeric stops the parse.
pub fn parse_u64(parser: &mut ParserCore) -> ParseResult<u64> {
    let mut base = 10u32;
    let mut digits = false;
    let mut nonzero_pending = true;
    let mut value = 0u64;
    let mut fail_mul;
    let mut fail_add;
    let mut fail = false;

    // Look for a radix indicator.
    if parser.peek_and_consume('0') {
        // This could be a radix indicator, or it could be a zero.  Check for the radix
        // indicator and if not found, assume it is just a leading zero.
        match parser.peek() {
            'x' => {
                // Hexadecimal radix.
                parser.consume();
                base = 16;
            }
            'o' => {
                // Octal radix.
                parser.consume();
                base = 8;
            }
            'b' => {
                // Binary radix.
                parser.consume();
                base = 2;
            }
            _ => {
                // The leading zero counts as a digit.
                digits = true;
            }
        }
    }

    // Now we have the radix and we might have also processed a digit.  Process all
    // alphanumerics until we encounter a non-alphanumeric character or we encounter
    // an error.
    while !parser.is_at_eof() {
        // Get next potential digit and convert it to the numeric value.  Note that
        // underscores do not count as digits.
        let ch = parser.peek();
        let mut dig = value as u32;
        if (0x30..=0x39).contains(&dig) {
            dig -= 0x30;
        } else if (0x41..0x5B).contains(&(dig & 0xdf)) {
            dig = (dig & 0xdf) - 0x37;
        } else {
            // Not alphanumeric.
            break;
        }
        digits = true;

        // If the digit is larger than the base, reject with an error.
        if dig >= base {
            return Err(syntax_error(
                parser.loc(),
                &format!("The digit '{}' is invalid for base {}.", ch, base),
            ));
        }

        // Consume the digit now.
        parser.consume();

        // Accumulate it.  The first non-zero digit is handled differently from
        // subsequent digits.
        if nonzero_pending {
            if dig > 0 {
                // This is the first non-zero digit.
                value = dig as u64;
                nonzero_pending = false;
            }
        } else {
            // Accumulate the value and keep track of overflows.
            (value, fail_mul) = value.overflowing_mul(base as u64);
            (value, fail_add) = value.overflowing_add(dig as u64);
            fail = fail || fail_mul || fail_add;
        }
    }

    // Report any overflow.
    if fail {
        return Err(syntax_error(
            parser.loc(),
            "Integer value too large for u64",
        ));
    }

    // If we didn't get any digits, then reject with an error.
    if digits == false {
        return Err(syntax_error(
            parser.loc(),
            "Expected a number but found no valid digits",
        ));
    }
    Ok(value)
}

/// Parse a floating point number.  This only reads decimal numbers, and radix indicators are not
/// permitted.  Parsing stops at the first non-float character (so not in [0-9.eE]).  The Rust
/// parsing method is used to generate the f64 at the end of the parse, and an error will be
/// returned if this cannot be done.
pub fn parse_f64_decimal(parser: &mut ParserCore) -> ParseResult<f64> {
    let loc = parser.loc();
    let mut text = String::new();
    if parser.peek_and_consume('-') {
        text.push('-');
    }
    let digits = parser.take_while(|ch| ch.is_ascii_digit() || ch == '.' || ch == 'e' || ch == 'E');
    text.push_str(&digits);
    match text.parse::<f64>() {
        Err(err) => Err(syntax_error(loc, &err.to_string())),
        Ok(value) => Ok(value),
    }
}