//! Module providing utilities for dealing with ENDF-6 format integers.
//!
//! ENDF integers format is described in section `0.6.2` of ENDF-6 Formats
//! Manual.
//!
//! `ENDF-6 Formats Manual` available at
//! <https://www.nndc.bnl.gov/csewg/docs/endf-manual.pdf>
//!
//! # Format
//! ENDF integers can be read with FORTRAN77 `I11` format specification.
//!
//! ENDF integers have the following format:
//! ```text
//! endf_int = (space* sign? digit+){1..11}
//! space = ' '
//! sign = '-' | '+'
//! digit = ['0'-'9']
//! ```
//!
//! Format can be checked with [`ENDF_INT_REGEX`].
//!
//! ENDF integers are included in `-9_999_999_999..=9_999_999_999`
//! (see [`ENDF_INT_ABS_MAX`]).
//!
//! # Parsing ENDF-6 Format integers
//!
//! ```rust
//! use endf_format::integer::ParseEndfIntError;
//! fn parse_endf_int(integer: &str) -> Result<i64, ParseEndfIntError> {
//!     endf_format::integer::parse(integer)
//! }
//! ```
//!
//! [`ENDF_INT_REGEX`]: constant.ENDF_INT_REGEX.html
//! [`ENDF_INT_ABS_MAX`]: constant.ENDF_INT_ABS_MAX.html

use std::error::Error;
use std::fmt::{Display, Formatter};

/// ENDF-6 format integers maximum absolute value.
///
/// ENDF-6 format integers are included in
/// `-ENDF_INT_ABS_MAX..=ENDF_INT_ABS_MAX`
pub const ENDF_INT_ABS_MAX: i64 = 9_999_999_999;

/// ENDF-6 format integers maximum length (characters).
pub const ENDF_INT_MAX_LEN: usize = 11;

/// ENDF-6 format integer regular expression.
///
/// Regular expression provided for checking [`parse`] input beforehand.
/// ```rust
/// use regex::Regex;
///
/// let regex = Regex::new(endf_format::integer::ENDF_INT_REGEX).unwrap();
/// let integer = "-1234567890";
/// assert!(regex.is_match(integer));
/// let x = endf_format::integer::parse(integer).unwrap();
/// ```
/// [`parse`]: fn.parse.html
pub const ENDF_INT_REGEX: &str = r"^[ ]*[-+]?(?:0|[1-9]\d{0,9}?)[ ]*$";

/// Error returned when parsing ENDF-6 format integers failed.
///
/// `ParseEndfIntError` provides a [`kind()`] ([`ParseEndfIntErrorKind`]) method
/// if fine error handling is needed.
///
/// [`ParseEndfIntErrorKind`]: enum.ParseEndfIntErrorKind.html
/// [`kind()`]: struct.ParseEndfIntError.html#method.kind
#[derive(Debug)]
pub struct ParseEndfIntError {
    kind: ParseEndfIntErrorKind,
}

impl ParseEndfIntError {
    /// Returns the corresponding `ParseEndfIntErrorKind` for this error.
    pub fn kind(&self) -> ParseEndfIntErrorKind {
        self.kind
    }
}

impl Display for ParseEndfIntError {
    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
        match self.kind() {
            ParseEndfIntErrorKind::Empty => {
                write!(f, "cannot parse ENDF integer from empty string")
            }
            ParseEndfIntErrorKind::EmptyIntegerPart => {
                write!(f, "cannot parse ENDF integer without integer part")
            }
            ParseEndfIntErrorKind::InvalidDigit(c) => write!(
                f,
                "cannot parse ENDF integer, invalid digit found in string: {}",
                c
            ),
            ParseEndfIntErrorKind::InvalidSign(c) => write!(
                f,
                "cannot parse ENDF integer, invalid sign found in string: {}",
                c
            ),
            ParseEndfIntErrorKind::NonAscii => {
                write!(f, "cannot parse ENDF integer from non ASCII string")
            }
            ParseEndfIntErrorKind::Overflow => {
                write!(f, "cannot parse ENDF integer, number too large")
            }
            ParseEndfIntErrorKind::TooLarge => {
                write!(f, "cannot parse ENDF integer, string too long")
            }
        }
    }
}

impl Error for ParseEndfIntError {}

/// Non exhaustive list of errors that can occur while parsing ENDF integers.
#[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
#[non_exhaustive]
pub enum ParseEndfIntErrorKind {
    Empty,
    EmptyIntegerPart,
    InvalidDigit(char),
    InvalidSign(char),
    NonAscii,
    Overflow,
    TooLarge,
}

/// Parse ENDF-6 Format integer.
///
/// # Format
/// ENDF integers can be read with FORTRAN77 `I11` format specification.
///
/// ENDF integers have the following format:
/// ```text
/// endf_int = (space* sign? digit+){1..11}
/// space = ' '
/// sign = '-' | '+'
/// digit = ['0'-'9']
/// ```
///
/// With this format ENDF-6 format integers are included in
/// `-9_999_999_999..=9_999_999_999`
///
/// # Example
///
/// ```rust
/// let x = endf_format::integer::parse("-1234567890");
/// assert_eq!(-1_234_567_890_i64, x.unwrap());
/// ```
///
/// # Details
///
/// This function trims the specified string and handle only ASCII input.
/// Following is redundant and should be avoided:
/// ```rust
/// let integer = "    -123456";
/// if integer.is_ascii() {
/// //             ^^^^^^^^^^ redundant check
///     let x = endf_format::integer::parse(integer.trim());
/// //                                                  ^^^^ redundant trim
///     assert_eq!(-123_456_i64, x.unwrap());
/// }
/// ```
///
/// # Errors
///
/// A [`ParseEndfIntError`] is returned if the specified string could not be
/// parsed.
///
/// ## Empty
/// Empty/Blank strings are rejected.
/// ```rust
/// use endf_format::integer::ParseEndfIntErrorKind;
///
/// // empty
/// let x = endf_format::integer::parse("");
/// assert!(x.is_err());
/// assert_eq!(x.unwrap_err().kind(), ParseEndfIntErrorKind::Empty);
///
/// // blank
/// let y = endf_format::integer::parse(" \t\n\r");
/// //                                   ^ space char
/// assert!(y.is_err());
/// assert_eq!(y.unwrap_err().kind(), ParseEndfIntErrorKind::Empty);
///
/// // sign only
/// let z = endf_format::integer::parse("-");
/// assert!(z.is_err());
/// assert_eq!(z.unwrap_err().kind(), ParseEndfIntErrorKind::EmptyIntegerPart);
/// ```
///
/// ## Invalid characters
///
/// Invalid sign/digits are rejected.
/// ```rust
/// use endf_format::integer::ParseEndfIntErrorKind;
///
/// // invalid sign
/// let x = endf_format::integer::parse("%123");
/// assert!(x.is_err());
/// assert_eq!(x.unwrap_err().kind(), ParseEndfIntErrorKind::InvalidSign('%'));
///
/// // invalid digit
/// let y = endf_format::integer::parse("-12a");
/// assert!(y.is_err());
/// assert_eq!(y.unwrap_err().kind(), ParseEndfIntErrorKind::InvalidDigit('a'));
/// ```
///
/// ## Non ASCII
///
/// ENDF-6 format allows only ASCII characters. This method fail-fast on non
/// ASCII input.
/// ```rust
/// use endf_format::integer::ParseEndfIntErrorKind;
/// let x = endf_format::integer::parse("\u{1F600}");
/// assert!(x.is_err());
/// assert_eq!(x.unwrap_err().kind(), ParseEndfIntErrorKind::NonAscii);
/// ```
///
/// # TooLarge
///
/// Too large number/string are rejected to prevent overflow.
/// ```rust
/// use endf_format::integer::ParseEndfIntErrorKind;
///
/// // too large, 12 (> 11) characters
/// let x = endf_format::integer::parse("-11111111111");
/// assert!(x.is_err());
/// assert_eq!(x.unwrap_err().kind(), ParseEndfIntErrorKind::TooLarge);
///
/// // overflow > 9_999_999_999
/// let y = endf_format::integer::parse("11111111111");
/// assert!(y.is_err());
/// assert_eq!(y.unwrap_err().kind(), ParseEndfIntErrorKind::Overflow);
/// ```
///
/// # Difference with `std::i64::from_str_radix(..., 10)`
///
/// This function includes following difference with
/// `std::i64::from_str_radix(..., 10)`:
/// - the specified string is trimmed (leading/trailing whitespace is removed)
/// - the parsed number is checked against ENDF-6 format integer range
/// (`-9_999_999_999..=9_999_999_999`)
/// - too large string (> ENDF_INT_MAX_LEN) will be rejected
///
/// # Regex
///
/// Validating input is possible with [`ENDF_INT_REGEX`]:
/// ```rust
/// use regex::Regex;
///
/// let regex = Regex::new(endf_format::integer::ENDF_INT_REGEX).unwrap();
/// assert!(regex.is_match("-1234567890"));
/// ```
///
/// # Reference
///
/// ENDF integer format is described in section `0.6.2` of
/// [ENDF-6 Formats Manual](https://www.nndc.bnl.gov/csewg/docs/endf-manual.pdf)
///
/// [`ENDF_INT_REGEX`]: constant.ENDF_INT_REGEX.html
/// [`ParseEndfIntError`]: struct.ParseEndfIntError.html
pub fn parse(integer: &str) -> Result<i64, ParseEndfIntError> {
    // ENDF-6 format allows only ASCII files
    if !integer.is_ascii() {
        return Err(ParseEndfIntError {
            kind: ParseEndfIntErrorKind::NonAscii,
        });
    }
    // integer is ASCII => iterating over bytes <=> iterating over characters
    let bytes = integer.trim().as_bytes();
    if bytes.len() > ENDF_INT_MAX_LEN {
        return Err(ParseEndfIntError {
            kind: ParseEndfIntErrorKind::TooLarge,
        });
    }
    // bytes.len() < ENDF_INT_MAX_LEN (=11) => integer cannot overflow i64
    let (neg, digits) = match bytes.first() {
        Some(&b'-') => (true, &bytes[1..]),
        Some(&b'+') => (false, &bytes[1..]),
        Some(x) if x >= &b'0' && x <= &b'9' => (false, bytes),
        Some(x) => {
            return Err(ParseEndfIntError {
                kind: ParseEndfIntErrorKind::InvalidSign(*x as char),
            })
        }
        None => {
            return Err(ParseEndfIntError {
                kind: ParseEndfIntErrorKind::Empty,
            })
        }
    };
    if digits.is_empty() {
        return Err(ParseEndfIntError {
            kind: ParseEndfIntErrorKind::EmptyIntegerPart,
        });
    }
    let mut number: i64 = 0;
    for &digit in digits {
        if digit >= b'0' && digit <= b'9' {
            number = 10 * number + ((digit - b'0') as i64)
        } else {
            return Err(ParseEndfIntError {
                kind: ParseEndfIntErrorKind::InvalidDigit(digit as char),
            });
        }
    }
    // ENDF_INT_MAX_LEN include sign,
    // ENDF integers ([-9_999_999_999; 9_999_999_999]) overflow can occur
    // example: 99999999999, len = 11 and integer = 99_999_999_999 => overflow
    if number > ENDF_INT_ABS_MAX {
        return Err(ParseEndfIntError {
            kind: ParseEndfIntErrorKind::Overflow,
        });
    }
    if neg {
        Ok(-number)
    } else {
        Ok(number)
    }
}

#[cfg(test)]
mod tests {
    use super::*;
    use regex::Regex;

    #[test]
    fn parse_empty() {
        let integer = "";
        let expected = ParseEndfIntErrorKind::Empty;
        let actual = parse(integer).unwrap_err();
        assert_eq!(expected, actual.kind());
    }

    #[test]
    fn parse_whitespace() {
        // Rust allows following whitespace:
        // - ' ' (space)
        // - '\n' (new line)
        // - '\t' (tab)
        // - '\r' (line feed)
        let expected = ParseEndfIntErrorKind::Empty;
        assert_eq!(expected, parse(" ").unwrap_err().kind());
        assert_eq!(expected, parse("\n").unwrap_err().kind());
        assert_eq!(expected, parse("\t").unwrap_err().kind());
        assert_eq!(expected, parse("\r").unwrap_err().kind());
        assert_eq!(expected, parse(" \n\t\r").unwrap_err().kind());
    }

    #[test]
    fn parse_sign_only() {
        let expected = ParseEndfIntErrorKind::EmptyIntegerPart;
        assert_eq!(expected, parse("-").unwrap_err().kind());
        assert_eq!(expected, parse("+").unwrap_err().kind());
    }

    #[test]
    fn parse_invalid() {
        let integer = "string";
        let expected = ParseEndfIntErrorKind::InvalidSign('s');
        assert_eq!(expected, parse(integer).unwrap_err().kind());
    }

    #[test]
    fn parse_invalid_digit() {
        let integer = "1.0";
        let expected = ParseEndfIntErrorKind::InvalidDigit('.');
        assert_eq!(expected, parse(integer).unwrap_err().kind());
        let integer = "1e+25";
        let expected = ParseEndfIntErrorKind::InvalidDigit('e');
        assert_eq!(expected, parse(integer).unwrap_err().kind());
        let integer = "1+2";
        let expected = ParseEndfIntErrorKind::InvalidDigit('+');
        assert_eq!(expected, parse(integer).unwrap_err().kind());
        let integer = "-1        2";
        let expected = ParseEndfIntErrorKind::InvalidDigit(' ');
        assert_eq!(expected, parse(integer).unwrap_err().kind());
    }

    #[test]
    fn parse_invalid_sign() {
        let integer = "%1234567890";
        let expected = ParseEndfIntErrorKind::InvalidSign('%');
        assert_eq!(expected, parse(integer).unwrap_err().kind());
    }

    #[test]
    fn parse_non_ascii() {
        let integer = "µ";
        let expected = ParseEndfIntErrorKind::NonAscii;
        assert_eq!(expected, parse(integer).unwrap_err().kind());
    }

    #[test]
    fn parse_overflow() {
        let integer = "10000000000";
        let expected = ParseEndfIntErrorKind::Overflow;
        assert_eq!(expected, parse(integer).unwrap_err().kind());
    }

    #[test]
    fn parse_too_long() {
        let integer = "-11111111111";
        let expected = ParseEndfIntErrorKind::TooLarge;
        assert_eq!(expected, parse(integer).unwrap_err().kind());
    }

    #[test]
    fn parse_valid_near_zero() {
        assert_eq!(0, parse("0").unwrap());
        assert_eq!(1, parse("1").unwrap());
        assert_eq!(-1, parse("-1").unwrap());
        assert_eq!(1, parse("+1").unwrap());
    }

    #[test]
    fn parse_valid_limits() {
        assert_eq!(9_999_999_999, parse("9999999999").unwrap());
        assert_eq!(9_999_999_999, parse("+9999999999").unwrap());
        assert_eq!(9_999_999_999, parse(" 9999999999").unwrap());
        assert_eq!(-9_999_999_999, parse("-9999999999").unwrap());
    }

    #[test]
    fn parse_valid_positive_unsigned() {
        assert_eq!(1, parse("          1").unwrap());
        assert_eq!(12, parse("         12").unwrap());
        assert_eq!(123, parse("        123").unwrap());
        assert_eq!(1234, parse("       1234").unwrap());
        assert_eq!(12345, parse("      12345").unwrap());
        assert_eq!(123_456, parse("     123456").unwrap());
        assert_eq!(1_234_567, parse("    1234567").unwrap());
        assert_eq!(12_345_678, parse("   12345678").unwrap());
        assert_eq!(123_456_789, parse("  123456789").unwrap());
        assert_eq!(1_234_567_890, parse(" 1234567890").unwrap());
    }

    #[test]
    fn parse_valid_positive_signed() {
        assert_eq!(1, parse("         +1").unwrap());
        assert_eq!(12, parse("        +12").unwrap());
        assert_eq!(123, parse("       +123").unwrap());
        assert_eq!(1234, parse("      +1234").unwrap());
        assert_eq!(12345, parse("     +12345").unwrap());
        assert_eq!(123_456, parse("    +123456").unwrap());
        assert_eq!(1_234_567, parse("   +1234567").unwrap());
        assert_eq!(12_345_678, parse("  +12345678").unwrap());
        assert_eq!(123_456_789, parse(" +123456789").unwrap());
        assert_eq!(1_234_567_890, parse("+1234567890").unwrap());
    }

    #[test]
    fn parse_valid_negative_signed() {
        assert_eq!(-1, parse("         -1").unwrap());
        assert_eq!(-12, parse("        -12").unwrap());
        assert_eq!(-123, parse("       -123").unwrap());
        assert_eq!(-1234, parse("      -1234").unwrap());
        assert_eq!(-12345, parse("     -12345").unwrap());
        assert_eq!(-123_456, parse("    -123456").unwrap());
        assert_eq!(-1_234_567, parse("   -1234567").unwrap());
        assert_eq!(-12_345_678, parse("  -12345678").unwrap());
        assert_eq!(-123_456_789, parse(" -123456789").unwrap());
        assert_eq!(-1_234_567_890, parse("-1234567890").unwrap());
    }

    #[test]
    fn parse_valid_zero_padded() {
        // '0' padding is supported but not described in ENDF-6 format
        assert_eq!(1, parse("00000000001").unwrap());
        assert_eq!(12, parse("00000000012").unwrap());
        assert_eq!(123, parse("00000000123").unwrap());
        assert_eq!(1234, parse("00000001234").unwrap());
        assert_eq!(12345, parse("00000012345").unwrap());
        assert_eq!(123_456, parse("00000123456").unwrap());
        assert_eq!(1_234_567, parse("00001234567").unwrap());
        assert_eq!(12_345_678, parse("00012345678").unwrap());
        assert_eq!(123_456_789, parse("00123456789").unwrap());
        assert_eq!(1_234_567_890, parse("01234567890").unwrap());
    }

    #[test]
    fn regex_valid() {
        let regex = Regex::new(ENDF_INT_REGEX).unwrap();
        assert!(regex.is_match("0"));
        assert!(regex.is_match("          0"));
        assert!(regex.is_match("         -0"));
        assert!(regex.is_match("1"));
        assert!(regex.is_match("+1"));
        assert!(regex.is_match("-1"));
        assert!(regex.is_match(" 1234567890"));
        assert!(regex.is_match("-1234567890"));
        assert!(regex.is_match("+1234567890"));
        assert!(regex.is_match("          1"));
        assert!(regex.is_match("     1     "));
        assert!(regex.is_match("1          "));
        assert!(regex.is_match("         +1"));
        assert!(regex.is_match("    +1     "));
        assert!(regex.is_match("+1         "));
        assert!(regex.is_match("         -1"));
        assert!(regex.is_match("    -1     "));
        assert!(regex.is_match("-1         "));
    }

    #[test]
    fn regex_invalid() {
        let regex = Regex::new(ENDF_INT_REGEX).unwrap();
        assert!(!regex.is_match("12345678901")); // overflow
        assert!(!regex.is_match("-")); // sign only
        assert!(!regex.is_match("+")); // sign only
        assert!(!regex.is_match("1.23456789")); // invalid digit '.'
        assert!(!regex.is_match("-1.2345e+1")); // invalid digit 'e'
        assert!(!regex.is_match("-1.2345e-1")); // invalid digit 'e'
        assert!(!regex.is_match("invalid")); // invalid sign 'i'
        assert!(!regex.is_match("+         1")); // invalid digit ' '
        assert!(!regex.is_match("-         1")); // invalid digit ' '
    }
}