use nom::{
branch::alt,
bytes::complete::tag,
character::complete::{char, digit1},
combinator::{cut, map, opt, value},
error::{ErrorKind as NomErrorKind, ParseError},
multi::{many1, separated_list1},
sequence::{delimited, preceded, terminated},
};
use super::error::{Error, ErrorKind};
use super::nom_recipes::{map_res, rtrim};
use super::types::{Input, ParseResult};
#[derive(Clone, Debug, PartialEq)]
pub enum Token {
Byte(u8),
MaskedByte(u8, Mask),
Jump(Jump),
Alternatives(Vec<Vec<Token>>),
}
#[derive(Clone, Debug, PartialEq, Eq)]
pub enum Mask {
Left,
Right,
All,
}
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct Jump {
pub from: u32,
pub to: Option<u32>,
}
const JUMP_LIMIT_IN_ALTERNATIVES: u32 = 200;
fn hex_digit(mut input: Input) -> ParseResult<u8> {
match input.cursor().chars().next().and_then(|c| {
#[allow(clippy::cast_possible_truncation)]
c.to_digit(16).map(|v| v as u8)
}) {
Some(v) => {
input.advance(1);
Ok((input, v))
}
_ => Err(nom::Err::Error(Error::from_error_kind(
input,
NomErrorKind::HexDigit,
))),
}
}
fn byte(input: Input) -> ParseResult<u8> {
let (input, digit0) = hex_digit(input)?;
map(rtrim(hex_digit), move |digit1| (digit0 << 4) | digit1)(input)
}
fn masked_byte(input: Input) -> ParseResult<(u8, Mask)> {
rtrim(alt((
map(tag("??"), |_| (0, Mask::All)),
map(preceded(char('?'), hex_digit), |v| (v, Mask::Left)),
map(terminated(hex_digit, char('?')), |v| (v, Mask::Right)),
)))(input)
}
fn range(input: Input) -> ParseResult<Jump> {
let start = input;
let (input, _) = rtrim(char('['))(input)?;
let (input, from) = opt(map_res(rtrim(digit1), |v| {
str::parse::<u32>(v.cursor()).map_err(ErrorKind::StrToIntError)
}))(input)?;
let (input, to) = match from {
Some(from) => {
alt((
delimited(
rtrim(char('-')),
opt(map_res(rtrim(digit1), |v| {
str::parse(v.cursor()).map_err(ErrorKind::StrToIntError)
})),
rtrim(char(']')),
),
value(Some(from), rtrim(char(']'))),
))(input)?
}
None => delimited(
rtrim(char('-')),
opt(map_res(rtrim(digit1), |v| {
str::parse(v.cursor()).map_err(ErrorKind::StrToIntError)
})),
rtrim(char(']')),
)(input)?,
};
let jump = Jump {
from: from.unwrap_or(0),
to,
};
if let Err(kind) = validate_jump(&jump) {
return Err(nom::Err::Failure(Error::new(
input.get_span_from(start),
kind,
)));
}
Ok((input, jump))
}
fn validate_jump(range: &Jump) -> Result<(), ErrorKind> {
if let Some(to) = range.to {
if range.from == 0 && to == 0 {
return Err(ErrorKind::JumpEmpty);
}
if range.from > to {
return Err(ErrorKind::JumpRangeInvalid {
from: range.from,
to,
});
}
}
Ok(())
}
fn alternatives(input: Input) -> ParseResult<Token> {
let (input, _) = rtrim(char('('))(input)?;
cut(terminated(
map(
separated_list1(rtrim(char('|')), |input| tokens(input, true)),
Token::Alternatives,
),
rtrim(char(')')),
))(input)
}
fn range_as_hex_token(input: Input, in_alternatives: bool) -> ParseResult<Token> {
let start = input;
let (input, range) = range(input)?;
if in_alternatives {
if let Err(kind) = validate_jump_in_alternatives(&range) {
return Err(nom::Err::Failure(Error::new(
input.get_span_from(start),
kind,
)));
}
}
if let Some(to) = &range.to {
if range.from == *to && range.from == 1 {
return Ok((input, Token::MaskedByte(0, Mask::All)));
}
}
Ok((input, Token::Jump(range)))
}
fn validate_jump_in_alternatives(jump: &Jump) -> Result<(), ErrorKind> {
match jump.to {
None => Err(ErrorKind::JumpUnboundedInAlternation),
Some(to) => {
if to > JUMP_LIMIT_IN_ALTERNATIVES {
Err(ErrorKind::JumpTooBigInAlternation {
limit: JUMP_LIMIT_IN_ALTERNATIVES,
})
} else {
Ok(())
}
}
}
}
fn hex_token(input: Input, in_alternatives: bool) -> ParseResult<Token> {
alt((
map(masked_byte, |(v, mask)| Token::MaskedByte(v, mask)),
map(byte, Token::Byte),
|input| range_as_hex_token(input, in_alternatives),
alternatives,
))(input)
}
fn tokens(input: Input, in_alternatives: bool) -> ParseResult<Vec<Token>> {
let start = input;
let (input, tokens) = many1(|input| hex_token(input, in_alternatives))(input)?;
if matches!(tokens[0], Token::Jump(_))
|| (tokens.len() > 1 && matches!(tokens[tokens.len() - 1], Token::Jump(_)))
{
Err(nom::Err::Failure(Error::new(
input.get_span_from(start),
ErrorKind::JumpAtBound,
)))
} else {
Ok((input, tokens))
}
}
pub(crate) fn hex_string(input: Input) -> ParseResult<Vec<Token>> {
let (input, _) = rtrim(char('{'))(input)?;
cut(terminated(|input| tokens(input, false), rtrim(char('}'))))(input)
}
#[cfg(test)]
mod tests {
use super::*;
use crate::test_helpers::{parse, parse_err, test_public_type};
#[test]
fn test_parse_hex_byte() {
parse(byte, "AF", "", 0xAF);
parse(byte, "10F", "F", 0x10);
parse(byte, "9E 1", "1", 0x9E);
parse_err(byte, "G1");
parse_err(byte, "1G");
parse_err(byte, "1");
parse_err(byte, " ");
}
#[test]
fn test_parse_masked_byte() {
parse(masked_byte, "?1", "", (1, Mask::Left));
parse(masked_byte, "C??", "?", (0xC, Mask::Right));
parse(masked_byte, "?? ", "", (0, Mask::All));
parse_err(masked_byte, "AB");
parse_err(masked_byte, " ?");
parse_err(masked_byte, "G?");
parse_err(masked_byte, "?G");
}
#[test]
fn test_range() {
parse(range, "[-] a", "a", Jump { from: 0, to: None });
parse(
range,
"[ 15 -35]",
"",
Jump {
from: 15,
to: Some(35),
},
);
parse(range, "[1- ]", "", Jump { from: 1, to: None });
parse(
range,
"[1-2]]",
"]",
Jump {
from: 1,
to: Some(2),
},
);
parse(
range,
"[ 1 - 2 ]",
"",
Jump {
from: 1,
to: Some(2),
},
);
parse(
range,
"[-1]",
"",
Jump {
from: 0,
to: Some(1),
},
);
parse(
range,
"[12 ]",
"",
Jump {
from: 12,
to: Some(12),
},
);
parse_err(range, "[");
parse_err(range, "[]");
parse_err(range, "[--]");
parse_err(range, "[1-2-3]");
parse_err(range, "[1-2-]");
parse_err(range, "[-2-]");
parse_err(range, "[d-e]");
parse_err(range, "[1 2]");
parse_err(range, "[999999999999-]");
parse_err(range, "[1-999999999999]");
parse_err(range, "[-999999999999]");
parse_err(range, "[4-2]");
parse_err(range, "[4-3]");
parse(
range,
"[4-4]",
"",
Jump {
from: 4,
to: Some(4),
},
);
parse_err(range, "[0]");
parse_err(range, "[0-0]");
parse(
range,
"[1]",
"",
Jump {
from: 1,
to: Some(1),
},
);
}
#[test]
fn test_alternatives() {
parse(
alternatives,
"( AB | 56 ?F ) ",
"",
Token::Alternatives(vec![
vec![Token::Byte(0xAB)],
vec![Token::Byte(0x56), Token::MaskedByte(0x0F, Mask::Left)],
]),
);
parse(
alternatives,
"(12[1]C?|??[3-5]33)",
"",
Token::Alternatives(vec![
vec![
Token::Byte(0x12),
Token::MaskedByte(0, Mask::All),
Token::MaskedByte(0x0C, Mask::Right),
],
vec![
Token::MaskedByte(0x00, Mask::All),
Token::Jump(Jump {
from: 3,
to: Some(5),
}),
Token::Byte(0x33),
],
]),
);
parse(
alternatives,
"( ( ?D | 23)| 15) ",
"",
Token::Alternatives(vec![
vec![Token::Alternatives(vec![
vec![Token::MaskedByte(0x0D, Mask::Left)],
vec![Token::Byte(0x23)],
])],
vec![Token::Byte(0x15)],
]),
);
parse(
alternatives,
"( AA (BB | CC) | DD | EE FF )",
"",
Token::Alternatives(vec![
vec![
Token::Byte(0xAA),
Token::Alternatives(vec![vec![Token::Byte(0xBB)], vec![Token::Byte(0xCC)]]),
],
vec![Token::Byte(0xDD)],
vec![Token::Byte(0xEE), Token::Byte(0xFF)],
]),
);
parse_err(alternatives, "( AB | [-] )");
parse_err(alternatives, "( AB | [1-] )");
parse_err(alternatives, "( AB | [1-250] )");
parse_err(alternatives, "( AB | [199-201] )");
parse_err(alternatives, "( AB | [200-201] )");
parse_err(alternatives, ")");
parse_err(alternatives, "()");
parse_err(alternatives, "(");
parse_err(alternatives, "(|)");
parse_err(alternatives, "(|");
parse_err(alternatives, "(AB|)");
parse_err(alternatives, "(|12)");
parse_err(alternatives, "(|123)");
parse_err(alternatives, "( [-] AB | CD )");
parse_err(alternatives, "( AB [1-2] | CD )");
parse_err(alternatives, "( AB | [3-] CD )");
parse_err(alternatives, "( AB | CD EF [-5] )");
}
#[test]
fn test_hex_string() {
parse(hex_string, "{ AB }", "", vec![Token::Byte(0xAB)]);
parse(
hex_string,
"{ DE AD BE EF }",
"",
vec![
Token::Byte(0xDE),
Token::Byte(0xAD),
Token::Byte(0xBE),
Token::Byte(0xEF),
],
);
parse(
hex_string,
"{ 01 ?2 ?? 3? [1-] ( AF | DC ) }",
"",
vec![
Token::Byte(1),
Token::MaskedByte(2, Mask::Left),
Token::MaskedByte(0, Mask::All),
Token::MaskedByte(3, Mask::Right),
Token::Jump(Jump { from: 1, to: None }),
Token::Alternatives(vec![vec![Token::Byte(0xAF)], vec![Token::Byte(0xDC)]]),
],
);
parse_err(hex_string, "{ [-] }");
parse_err(hex_string, "{ [-] AB }");
parse_err(hex_string, "{ AB CD [-] }");
parse_err(hex_string, "AB");
parse_err(hex_string, "{");
parse_err(hex_string, "{}");
parse_err(hex_string, "{A}");
parse_err(hex_string, "{ABA}");
parse_err(hex_string, "{AB");
}
#[test]
fn test_public_types() {
test_public_type(Token::Byte(3));
test_public_type(Mask::Left);
test_public_type(Jump { from: 3, to: None });
}
}