use std::error::Error;
use std::fmt::{Display, Formatter};
pub const ENDF_REAL_MAX_LEN: usize = 11;
pub const ENDF_REAL_RADIX: u32 = 10;
pub const ENDF_REAL_REGEX: &str = r"^[ ]*[+\-]?(?:0|[1-9]\d*)(?:\.\d+)?(?:[eE]?[+\-]\d+)?$";
#[derive(Debug)]
pub struct ParseEndfRealError {
kind: ParseEndfRealErrorKind,
}
impl ParseEndfRealError {
pub fn kind(&self) -> ParseEndfRealErrorKind {
self.kind
}
}
impl Display for ParseEndfRealError {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
match self.kind() {
ParseEndfRealErrorKind::Empty => write!(f, "cannot parse ENDF real from empty string"),
ParseEndfRealErrorKind::EmptyIntegerPart => {
write!(f, "cannot parse ENDF real without integer part")
}
ParseEndfRealErrorKind::EmptyFractionalPart => {
write!(f, "cannot parse ENDF real without fractional part")
}
ParseEndfRealErrorKind::EmptyExponentPart => {
write!(f, "cannot parse ENDF real without exponent part")
}
ParseEndfRealErrorKind::InvalidDecimalSeparator(x) => write!(
f,
"cannort parse ENDF real, found invalid decimal separator in string: {}",
x.escape_default().collect::<String>()
),
ParseEndfRealErrorKind::InvalidExponentSeparator(x) => write!(
f,
"cannort parse ENDF real, found invalid exponent separator in string: {}",
x.escape_default().collect::<String>()
),
ParseEndfRealErrorKind::InvalidExponentSign(x) => write!(
f,
"cannort parse ENDF real, found invalid exponent sign in string: {}",
x.escape_default().collect::<String>()
),
ParseEndfRealErrorKind::InvalidSign(x) => write!(
f,
"cannort parse ENDF real, found invalid sign in string: {}",
x.escape_default().collect::<String>()
),
ParseEndfRealErrorKind::NonAscii => {
write!(f, "cannot parse ENDF real from non ASCII string")
}
ParseEndfRealErrorKind::Tail => {
write!(f, "cannot parse ENDF real from string with tail")
}
ParseEndfRealErrorKind::TooLong => write!(f, "cannot parse ENDF real, string too long"),
}
}
}
impl Error for ParseEndfRealError {}
#[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
#[non_exhaustive]
pub enum ParseEndfRealErrorKind {
Empty,
EmptyIntegerPart,
EmptyFractionalPart,
EmptyExponentPart,
InvalidDecimalSeparator(char),
InvalidExponentSeparator(char),
InvalidExponentSign(char),
InvalidSign(char),
NonAscii,
Tail,
TooLong,
}
pub fn parse(real: &str) -> Result<f64, ParseEndfRealError> {
if !real.is_ascii() {
return Err(ParseEndfRealError {
kind: ParseEndfRealErrorKind::NonAscii,
});
}
let bytes = real.trim().as_bytes();
if bytes.len() > ENDF_REAL_MAX_LEN {
return Err(ParseEndfRealError {
kind: ParseEndfRealErrorKind::TooLong,
});
}
let (neg, tail) = match bytes.first() {
Some(&b'-') => (true, &bytes[1..]),
Some(&b'+') => (false, &bytes[1..]),
Some(x) if &b'0' <= x && x <= &b'9' => (false, bytes),
Some(x) => {
return Err(ParseEndfRealError {
kind: ParseEndfRealErrorKind::InvalidSign(char::from(*x)),
})
}
None => {
return Err(ParseEndfRealError {
kind: ParseEndfRealErrorKind::Empty,
})
}
};
let (int_part, tail) = retrieve_digits(tail);
if int_part.is_empty() {
return Err(ParseEndfRealError {
kind: ParseEndfRealErrorKind::EmptyIntegerPart,
});
}
let tail = match tail.first() {
Some(&b'.') => &tail[1..],
Some(x) => {
return Err(ParseEndfRealError {
kind: ParseEndfRealErrorKind::InvalidDecimalSeparator(char::from(*x)),
})
}
None => return Ok(parse_integer(neg, int_part)),
};
let (frac_part, tail) = retrieve_digits(tail);
if frac_part.is_empty() {
return Err(ParseEndfRealError {
kind: ParseEndfRealErrorKind::EmptyFractionalPart,
});
}
let tail = match tail.first() {
Some(&b'e') | Some(&b'E') => &tail[1..],
Some(&b'-') | Some(&b'+') => tail,
Some(x) => {
return Err(ParseEndfRealError {
kind: ParseEndfRealErrorKind::InvalidExponentSeparator(char::from(*x)),
})
}
None => return Ok(parse_decimal(neg, int_part, frac_part)),
};
let (neg_exp, tail) = match tail.first() {
Some(&b'-') => (true, &tail[1..]),
Some(&b'+') => (false, &tail[1..]),
Some(x) => {
return Err(ParseEndfRealError {
kind: ParseEndfRealErrorKind::InvalidExponentSign(char::from(*x)),
})
}
None => {
return Err(ParseEndfRealError {
kind: ParseEndfRealErrorKind::EmptyExponentPart,
})
}
};
let (exp_part, tail) = retrieve_digits(tail);
if exp_part.is_empty() {
return Err(ParseEndfRealError {
kind: ParseEndfRealErrorKind::EmptyExponentPart,
});
}
if !tail.is_empty() {
return Err(ParseEndfRealError {
kind: ParseEndfRealErrorKind::Tail,
});
}
Ok(parse_scientific(
neg, int_part, frac_part, neg_exp, exp_part,
))
}
fn retrieve_digits(bytes: &[u8]) -> (&[u8], &[u8]) {
let mut i = 0;
while i < bytes.len() && b'0' <= bytes[i] && bytes[i] <= b'9' {
i += 1
}
(&bytes[..i], &bytes[i..])
}
fn parse_integer(neg: bool, int_part: &[u8]) -> f64 {
let number = parse_integer_unchecked(int_part) as f64;
apply_sign(neg, number)
}
fn parse_decimal(neg: bool, int_part: &[u8], frac_part: &[u8]) -> f64 {
let mantissa = parse_integer_unchecked(int_part.iter().chain(frac_part.iter())) as f64;
let exp = -(frac_part.len() as i32);
let number = mantissa * f64::from(ENDF_REAL_RADIX).powi(exp);
apply_sign(neg, number)
}
fn parse_scientific(
neg: bool,
int_part: &[u8],
frac_part: &[u8],
neg_exp: bool,
exp_part: &[u8],
) -> f64 {
let mantissa = parse_integer_unchecked(int_part.iter().chain(frac_part.iter())) as f64;
let mut exponent = parse_integer_unchecked(exp_part) as i32;
exponent = apply_sign(neg_exp, exponent);
exponent -= frac_part.len() as i32;
let number = mantissa * 10_f64.powi(exponent);
apply_sign(neg, number)
}
fn parse_integer_unchecked<'a, T>(bytes: T) -> u64
where
T: IntoIterator<Item = &'a u8>,
{
let mut result = 0;
for &digit in bytes {
result = result * 10 + (digit - b'0') as u64
}
result
}
fn apply_sign<T>(negative: bool, number: T) -> T
where
T: std::ops::Neg<Output = T>,
{
if negative {
-number
} else {
number
}
}
#[cfg(test)]
mod tests {
use super::*;
use regex::Regex;
fn assert_endf_real_eq(expected: f64, actual: f64) {
let mut exp = if expected.abs() <= 1. {
0
} else {
expected.abs().log10().floor() as i32
};
exp -= ENDF_REAL_MAX_LEN as i32;
let delta = 10_f64.powi(exp);
let diff = (expected - actual).abs();
if diff > delta {
panic!(
"assertion failed: (expected == actual)\n\
expected = `{:?}\n\
actual = `{:?}\n\
diff = `{:?}`\n\
delta = `{:?}\n",
expected, actual, diff, delta
)
}
}
#[test]
fn parse_empty() {
let real = "";
let expected = ParseEndfRealErrorKind::Empty;
let actual = parse(real).unwrap_err().kind();
assert_eq!(expected, actual);
}
#[test]
fn parse_whitespace() {
let expected = ParseEndfRealErrorKind::Empty;
assert_eq!(expected, parse(" ").unwrap_err().kind());
assert_eq!(expected, parse("\t").unwrap_err().kind());
assert_eq!(expected, parse("\n").unwrap_err().kind());
assert_eq!(expected, parse("\r").unwrap_err().kind());
assert_eq!(expected, parse(" \n\t\r").unwrap_err().kind());
}
#[test]
fn parse_empty_exponent() {
let expected = ParseEndfRealErrorKind::EmptyExponentPart;
assert_eq!(expected, parse("1.2345e+").unwrap_err().kind());
assert_eq!(expected, parse("1.2345e-").unwrap_err().kind());
assert_eq!(expected, parse("1.2345+").unwrap_err().kind());
assert_eq!(expected, parse("1.2345-").unwrap_err().kind());
}
#[test]
fn parse_empty_fraction() {
let expected = ParseEndfRealErrorKind::EmptyFractionalPart;
assert_eq!(expected, parse(" 1.").unwrap_err().kind());
assert_eq!(expected, parse("+1.").unwrap_err().kind());
assert_eq!(expected, parse("-1.").unwrap_err().kind());
}
#[test]
fn parse_empty_integer() {
let expected = ParseEndfRealErrorKind::EmptyIntegerPart;
assert_eq!(expected, parse("-.1").unwrap_err().kind());
assert_eq!(expected, parse("+.1").unwrap_err().kind());
}
#[test]
fn parse_sign_only() {
let real = "-";
let expected = ParseEndfRealErrorKind::EmptyIntegerPart;
assert_eq!(expected, parse(real).unwrap_err().kind());
let real = "+";
assert_eq!(expected, parse(real).unwrap_err().kind());
}
#[test]
fn parse_invalid() {
let real = "1.2345e+6a";
let expected = ParseEndfRealErrorKind::Tail;
assert_eq!(expected, parse(real).unwrap_err().kind());
}
#[test]
fn parse_invalid_decimal_separator() {
let real = "-1,23456789";
let expected = ParseEndfRealErrorKind::InvalidDecimalSeparator(',');
assert_eq!(expected, parse(real).unwrap_err().kind());
let real = "-1|23456789";
let expected = ParseEndfRealErrorKind::InvalidDecimalSeparator('|');
assert_eq!(expected, parse(real).unwrap_err().kind());
}
#[test]
fn parse_invalid_exponent_separator() {
let real = "-1.2345d+12";
let expected = ParseEndfRealErrorKind::InvalidExponentSeparator('d');
assert_eq!(expected, parse(real).unwrap_err().kind());
let real = "-1.2345f+12";
let expected = ParseEndfRealErrorKind::InvalidExponentSeparator('f');
assert_eq!(expected, parse(real).unwrap_err().kind());
let real = "-1.2345^+12";
let expected = ParseEndfRealErrorKind::InvalidExponentSeparator('^');
assert_eq!(expected, parse(real).unwrap_err().kind());
}
#[test]
fn parse_invalid_sign() {
let real = "a1.23456789";
let expected = ParseEndfRealErrorKind::InvalidSign('a');
assert_eq!(expected, parse(real).unwrap_err().kind());
let real = "1.234567ea1";
let expected = ParseEndfRealErrorKind::InvalidExponentSign('a');
assert_eq!(expected, parse(real).unwrap_err().kind())
}
#[test]
fn parse_non_ascii() {
let real = "µ";
let expected = ParseEndfRealErrorKind::NonAscii;
assert_eq!(expected, parse(real).unwrap_err().kind());
}
#[test]
fn parse_invalid_length() {
let real = "+1.234567+12";
let expected = ParseEndfRealErrorKind::TooLong;
assert_eq!(expected, parse(real).unwrap_err().kind());
let real = "+1.23456e+12";
assert_eq!(expected, parse(real).unwrap_err().kind());
let real = "+12345.67890";
assert_eq!(expected, parse(real).unwrap_err().kind());
let real = "-12345.67890";
assert_eq!(expected, parse(real).unwrap_err().kind());
}
#[test]
fn parse_valid_integer() {
assert_endf_real_eq(1_234_567_890_f64, parse(" 1234567890").unwrap());
assert_endf_real_eq(1_234_567_890_f64, parse("+1234567890").unwrap());
assert_endf_real_eq(-1_234_567_890_f64, parse("-1234567890").unwrap());
}
#[test]
fn parse_valid_real_max_precision() {
assert_endf_real_eq(0., parse(" 0.00000000").unwrap());
assert_endf_real_eq(0., parse("+0.00000000").unwrap());
assert_endf_real_eq(0., parse("-0.00000000").unwrap());
assert_endf_real_eq(1.234_567_89, parse(" 1.23456789").unwrap());
assert_endf_real_eq(1.234_567_89, parse("+1.23456789").unwrap());
assert_endf_real_eq(-1.234_567_89, parse("-1.23456789").unwrap());
}
#[test]
fn parse_valid_real_eless_mid_precision() {
assert_endf_real_eq(1.234_567e+1, parse(" 1.234567+1").unwrap());
assert_endf_real_eq(1.234_567e+1, parse("+1.234567+1").unwrap());
assert_endf_real_eq(-1.234_567e+1, parse("-1.234567+1").unwrap());
assert_endf_real_eq(1.234_567e-1, parse(" 1.234567-1").unwrap());
assert_endf_real_eq(1.234_567e-1, parse("+1.234567-1").unwrap());
assert_endf_real_eq(-1.234_567e-1, parse("-1.234567-1").unwrap());
}
#[test]
fn parse_valid_real_eless_min_precision() {
assert_endf_real_eq(1.23456e+12, parse(" 1.23456+12").unwrap());
assert_endf_real_eq(1.23456e+12, parse("+1.23456+12").unwrap());
assert_endf_real_eq(-1.23456e+12, parse("-1.23456+12").unwrap());
assert_endf_real_eq(1.23456e-12, parse(" 1.23456-12").unwrap());
assert_endf_real_eq(1.23456e-12, parse("+1.23456-12").unwrap());
assert_endf_real_eq(-1.23456e-12, parse("-1.23456-12").unwrap());
}
#[test]
fn parse_valid_real_scientific_mid_precision() {
assert_endf_real_eq(1.23456e+1, parse(" 1.23456e+1").unwrap());
assert_endf_real_eq(1.23456e+1, parse("+1.23456e+1").unwrap());
assert_endf_real_eq(-1.23456e+1, parse("-1.23456e+1").unwrap());
assert_endf_real_eq(1.23456e+1, parse(" 1.23456E+1").unwrap());
assert_endf_real_eq(1.23456e+1, parse("+1.23456E+1").unwrap());
assert_endf_real_eq(-1.23456e+1, parse("-1.23456E+1").unwrap());
assert_endf_real_eq(1.23456e-1, parse(" 1.23456e-1").unwrap());
assert_endf_real_eq(1.23456e-1, parse("+1.23456e-1").unwrap());
assert_endf_real_eq(-1.23456e-1, parse("-1.23456e-1").unwrap());
assert_endf_real_eq(1.23456e-1, parse(" 1.23456E-1").unwrap());
assert_endf_real_eq(1.23456e-1, parse("+1.23456E-1").unwrap());
assert_endf_real_eq(-1.23456e-1, parse("-1.23456E-1").unwrap());
}
#[test]
fn parse_valid_real_scientific_min_precision() {
assert_endf_real_eq(1.2345e+12, parse(" 1.2345e+12").unwrap());
assert_endf_real_eq(1.2345e+12, parse("+1.2345e+12").unwrap());
assert_endf_real_eq(-1.2345e+12, parse("-1.2345e+12").unwrap());
assert_endf_real_eq(1.2345e+12, parse(" 1.2345E+12").unwrap());
assert_endf_real_eq(1.2345e+12, parse("+1.2345E+12").unwrap());
assert_endf_real_eq(-1.2345e+12, parse("-1.2345E+12").unwrap());
assert_endf_real_eq(1.2345e-12, parse(" 1.2345e-12").unwrap());
assert_endf_real_eq(1.2345e-12, parse("+1.2345e-12").unwrap());
assert_endf_real_eq(-1.2345e-12, parse("-1.2345e-12").unwrap());
assert_endf_real_eq(1.2345e-12, parse(" 1.2345E-12").unwrap());
assert_endf_real_eq(1.2345e-12, parse("+1.2345E-12").unwrap());
assert_endf_real_eq(-1.2345e-12, parse("-1.2345E-12").unwrap());
}
#[test]
fn regex_integer() {
let regex = Regex::new(ENDF_REAL_REGEX).unwrap();
assert!(regex.is_match("1"));
assert!(regex.is_match("+1"));
assert!(regex.is_match("-1"));
assert!(regex.is_match(" 1234567890"));
assert!(regex.is_match("-1234567890"));
assert!(regex.is_match("+1234567890"));
assert!(regex.is_match(" 1"));
assert!(regex.is_match(" +1"));
assert!(regex.is_match(" -1"));
}
#[test]
fn regex_decimal() {
let regex = Regex::new(ENDF_REAL_REGEX).unwrap();
assert!(regex.is_match("-1.23456789"));
assert!(regex.is_match("+1.23456789"));
assert!(regex.is_match(" 1.23456789"));
}
#[test]
fn regex_scientific() {
let regex = Regex::new(ENDF_REAL_REGEX).unwrap();
assert!(regex.is_match(" +1.2345e+1"));
assert!(regex.is_match(" +1.2345E+1"));
assert!(regex.is_match(" -1.2345e+1"));
assert!(regex.is_match(" 1.2345e+1"));
assert!(regex.is_match(" +1.2345e-1"));
assert!(regex.is_match(" -1.2345e-1"));
assert!(regex.is_match(" 1.2345e-1"));
assert!(regex.is_match(" 1.2345-1"));
assert!(regex.is_match(" -1.2345-1"));
assert!(regex.is_match(" +1.2345-1"));
assert!(regex.is_match(" +1.2345-12"));
assert!(regex.is_match(" -1.2345-12"));
assert!(regex.is_match(" 1.2345-12"));
}
#[test]
fn regex_invalid() {
let regex = Regex::new(ENDF_REAL_REGEX).unwrap();
assert!(!regex.is_match(""));
assert!(!regex.is_match(" "));
assert!(!regex.is_match("."));
assert!(!regex.is_match("e"));
assert!(!regex.is_match("E"));
assert!(!regex.is_match("0."));
assert!(!regex.is_match("1."));
assert!(!regex.is_match("-1."));
assert!(!regex.is_match("+1."));
assert!(!regex.is_match("1e1"));
assert!(!regex.is_match("1e"));
}
}