use nom::{
IResult, Parser,
branch::alt,
bytes::complete::{tag, take_while},
character::complete::{char, multispace0, one_of},
combinator::opt,
error::Error as NomError,
multi::many0,
sequence::preceded,
};
use log::warn;
use crate::parser::ast::{
Endianness, IndirectAdjustmentOp, MagicRule, MetaType, OffsetSpec, Operator, StrengthModifier,
TypeKind, Value,
};
mod numbers;
mod type_suffix;
mod value;
pub use numbers::parse_number;
pub use value::parse_value;
use numbers::parse_decimal_number;
#[cfg(test)]
use numbers::parse_hex_number;
use type_suffix::{
parse_attached_operator, parse_pstring_suffix, parse_regex_suffix, parse_search_suffix,
parse_value_transform,
};
#[cfg(test)]
use value::{parse_escape_sequence, parse_hex_bytes, parse_numeric_value, parse_quoted_string};
fn pointer_specifier_to_type(spec: char) -> Option<(TypeKind, Endianness)> {
match spec {
'b' => Some((TypeKind::Byte { signed: true }, Endianness::Little)),
'B' => Some((TypeKind::Byte { signed: true }, Endianness::Big)),
's' => Some((
TypeKind::Short {
endian: Endianness::Little,
signed: true,
},
Endianness::Little,
)),
'S' => Some((
TypeKind::Short {
endian: Endianness::Big,
signed: true,
},
Endianness::Big,
)),
'l' => Some((
TypeKind::Long {
endian: Endianness::Little,
signed: true,
},
Endianness::Little,
)),
'L' => Some((
TypeKind::Long {
endian: Endianness::Big,
signed: true,
},
Endianness::Big,
)),
'q' => Some((
TypeKind::Quad {
endian: Endianness::Little,
signed: true,
},
Endianness::Little,
)),
'Q' => Some((
TypeKind::Quad {
endian: Endianness::Big,
signed: true,
},
Endianness::Big,
)),
#[allow(clippy::match_same_arms)]
'i' => Some((
TypeKind::Long {
endian: Endianness::Little,
signed: true,
},
Endianness::Little,
)),
#[allow(clippy::match_same_arms)]
'I' => Some((
TypeKind::Long {
endian: Endianness::Big,
signed: true,
},
Endianness::Big,
)),
_ => None,
}
}
fn parse_indirect_offset(input: &str) -> IResult<&str, OffsetSpec> {
fn parse_operand(input: &str) -> IResult<&str, i64> {
if let Some(rest) = input.strip_prefix('(') {
let (rest, n) = parse_number(rest)?;
let (rest, _) = char(')')(rest)?;
Ok((rest, n))
} else {
parse_number(input)
}
}
fn parse_inside_adjustment(input: &str) -> IResult<&str, Option<(IndirectAdjustmentOp, i64)>> {
if let Some(rest) = input.strip_prefix('+') {
let (rest, n) = parse_operand(rest)?;
Ok((rest, Some((IndirectAdjustmentOp::Add, n))))
} else if input.starts_with('-') {
let (rest, n) = parse_number(input)?;
Ok((rest, Some((IndirectAdjustmentOp::Add, n))))
} else if let Some(rest) = input.strip_prefix('*') {
let (rest, n) = parse_operand(rest)?;
Ok((rest, Some((IndirectAdjustmentOp::Mul, n))))
} else if let Some(rest) = input.strip_prefix('/') {
let (rest, n) = parse_operand(rest)?;
Ok((rest, Some((IndirectAdjustmentOp::Div, n))))
} else if let Some(rest) = input.strip_prefix('%') {
let (rest, n) = parse_operand(rest)?;
Ok((rest, Some((IndirectAdjustmentOp::Mod, n))))
} else if let Some(rest) = input.strip_prefix('&') {
let (rest, n) = parse_operand(rest)?;
Ok((rest, Some((IndirectAdjustmentOp::And, n))))
} else if let Some(rest) = input.strip_prefix('|') {
let (rest, n) = parse_operand(rest)?;
Ok((rest, Some((IndirectAdjustmentOp::Or, n))))
} else if let Some(rest) = input.strip_prefix('^') {
let (rest, n) = parse_operand(rest)?;
Ok((rest, Some((IndirectAdjustmentOp::Xor, n))))
} else {
Ok((input, None))
}
}
fn parse_outside_adjustment(input: &str) -> IResult<&str, Option<i64>> {
if let Some(rest) = input.strip_prefix('+') {
let (rest, n) = parse_number(rest)?;
Ok((rest, Some(n)))
} else if input.starts_with('-') {
let (rest, n) = parse_number(input)?;
Ok((rest, Some(n)))
} else {
Ok((input, None))
}
}
let (input, _) = char('(')(input)?;
let (input, base_relative) = if let Some(rest) = input.strip_prefix('&') {
(rest, true)
} else {
(input, false)
};
let (input, base_offset) = parse_number(input)?;
let (input, sep) = one_of(".,").parse(input)?;
if sep == ',' {
warn!(
"Indirect offset uses ',' as separator (magic(5) requires '.'); \
accepting for GNU `file` typo-tolerance compatibility"
);
}
let (input, spec_char) = one_of("bBsSlLqQiI")(input)?;
let (pointer_type, endian) = pointer_specifier_to_type(spec_char)
.ok_or_else(|| nom::Err::Error(NomError::new(input, nom::error::ErrorKind::OneOf)))?;
let (input, inside) = parse_inside_adjustment(input)?;
let (input, _) = char(')')(input)?;
let (input, adjustment_op, adjustment) = if let Some((op, n)) = inside {
(input, op, n)
} else {
let (input, outside) = parse_outside_adjustment(input)?;
(input, IndirectAdjustmentOp::Add, outside.unwrap_or(0))
};
Ok((
input,
OffsetSpec::Indirect {
base_offset,
base_relative,
pointer_type,
adjustment,
adjustment_op,
result_relative: false,
endian,
},
))
}
pub fn parse_offset(input: &str) -> IResult<&str, OffsetSpec> {
let (input, _) = multispace0(input)?;
if input.starts_with('(') {
let (input, spec) = parse_indirect_offset(input)?;
let (input, _) = multispace0(input)?;
Ok((input, spec))
} else if let Some(rest) = input.strip_prefix('&')
&& rest.starts_with('(')
{
let (rest, mut spec) = parse_indirect_offset(rest)?;
if let OffsetSpec::Indirect {
ref mut result_relative,
..
} = spec
{
*result_relative = true;
}
let (rest, _) = multispace0(rest)?;
Ok((rest, spec))
} else if let Some(rest) = input.strip_prefix('&') {
let (rest, value) = if let Some(after_plus) = rest.strip_prefix('+') {
parse_number(after_plus)?
} else {
parse_number(rest)?
};
let (rest, _) = multispace0(rest)?;
Ok((rest, OffsetSpec::Relative(value)))
} else {
let (input, offset_value) = parse_number(input)?;
let (input, _) = multispace0(input)?;
Ok((input, OffsetSpec::Absolute(offset_value)))
}
}
pub fn parse_operator(input: &str) -> IResult<&str, Operator> {
let (input, _) = multispace0(input)?;
let bytes = input.as_bytes();
let err = || nom::Err::Error(nom::error::Error::new(input, nom::error::ErrorKind::Tag));
let (op, consumed) = match bytes.first().copied() {
Some(b'=') => {
if bytes.get(1).copied() == Some(b'=') {
if bytes.get(2).copied() == Some(b'=') {
return Err(err());
}
(Operator::Equal, 2)
} else {
(Operator::Equal, 1)
}
}
Some(b'!') => {
if bytes.get(1).copied() == Some(b'=') {
(Operator::NotEqual, 2)
} else {
(Operator::NotEqual, 1)
}
}
Some(b'<') => {
match bytes.get(1).copied() {
Some(b'=') => (Operator::LessEqual, 2),
Some(b'>') => (Operator::NotEqual, 2),
_ => (Operator::LessThan, 1),
}
}
Some(b'>') => {
if bytes.get(1).copied() == Some(b'=') {
(Operator::GreaterEqual, 2)
} else {
(Operator::GreaterThan, 1)
}
}
Some(b'&') => {
if bytes.get(1).copied() == Some(b'&') {
return Err(err());
}
(Operator::BitwiseAnd, 1)
}
Some(b'^') => {
if bytes.get(1).copied() == Some(b'^') {
return Err(err());
}
(Operator::BitwiseXor, 1)
}
Some(b'~') => {
if bytes.get(1).copied() == Some(b'~') {
return Err(err());
}
(Operator::BitwiseNot, 1)
}
Some(b'x') => {
if input
.get(1..)
.is_some_and(|s| s.starts_with(|c: char| c.is_alphanumeric() || c == '_'))
{
return Err(err());
}
(Operator::AnyValue, 1)
}
_ => return Err(err()),
};
let remaining = &input[consumed..];
let (remaining, _) = multispace0(remaining)?;
Ok((remaining, op))
}
fn parse_name_or_use_meta<'a>(
type_name: &str,
input: &'a str,
) -> IResult<
&'a str,
(
TypeKind,
Option<Operator>,
Option<crate::parser::ast::ValueTransform>,
),
> {
use nom::character::complete::space1;
let (input, _) = space1(input)?;
let input = if type_name == "use" {
if let Some(rest) = input.strip_prefix("\\^") {
warn!(
"use directive with `\\^` prefix: endian-flip semantics \
are not yet implemented (issue #236). Subroutine reads \
will use their declared endianness; metadata fields may \
be incorrect. Identifier: {:?}",
rest.split_whitespace().next().unwrap_or("")
);
rest
} else {
input
}
} else {
input
};
let (after_id, id) =
take_while(|c: char| c.is_alphanumeric() || c == '_' || c == '-').parse(input)?;
if id.is_empty() {
return Err(nom::Err::Error(NomError::new(
after_id,
nom::error::ErrorKind::AlphaNumeric,
)));
}
if let Some(next_char) = after_id.chars().next()
&& !matches!(next_char, ' ' | '\t' | '\n' | '\r')
{
return Err(nom::Err::Error(NomError::new(
after_id,
nom::error::ErrorKind::Alpha,
)));
}
let mut tail = after_id;
while let Some(rest) = tail.strip_prefix(' ').or_else(|| tail.strip_prefix('\t')) {
tail = rest;
}
if let Some(next_char) = tail.chars().next()
&& !matches!(next_char, '\n' | '\r')
{
let line_end = tail.find(['\n', '\r']).unwrap_or(tail.len());
tail = &tail[line_end..];
}
let meta = if type_name == "name" {
MetaType::Name(id.to_string())
} else {
MetaType::Use(id.to_string())
};
let (input, _) = multispace0(tail)?;
Ok((input, (TypeKind::Meta(meta), None, None)))
}
pub fn parse_type_and_operator(
input: &str,
) -> IResult<
&str,
(
TypeKind,
Option<Operator>,
Option<crate::parser::ast::ValueTransform>,
),
> {
use crate::parser::ast::{PStringLengthWidth, RegexCount, RegexFlags};
let (input, _) = multispace0(input)?;
let (mut input, type_name) = crate::parser::types::parse_type_keyword(input)?;
if type_name == "name" || type_name == "use" {
return parse_name_or_use_meta(type_name, input);
}
let mut pstring_length_width = PStringLengthWidth::OneByte;
let mut pstring_length_includes_itself = false;
if type_name == "pstring"
&& let Some(suffix_rest) = input.strip_prefix('/')
{
let (rest, width, includes_j) = parse_pstring_suffix(suffix_rest)?;
input = rest;
pstring_length_width = width;
pstring_length_includes_itself = includes_j;
}
let mut regex_flags = RegexFlags::default();
let mut regex_count = RegexCount::Default;
if type_name == "regex"
&& let Some(suffix_rest) = input.strip_prefix('/')
{
let (rest, (flags, count)) = parse_regex_suffix(input, suffix_rest)?;
regex_flags = flags;
regex_count = count;
input = rest;
}
let mut search_range: Option<::std::num::NonZeroUsize> = None;
if type_name == "search"
&& let Some(suffix_rest) = input.strip_prefix('/')
{
let (rest, range) = parse_search_suffix(input, suffix_rest)?;
search_range = Some(range);
input = rest;
}
if type_name == "string"
&& let Some(suffix_rest) = input.strip_prefix('/')
{
let mut consumed = 0usize;
for ch in suffix_rest.chars() {
if matches!(ch, 'W' | 'w' | 'c' | 'C' | 't' | 'T' | 'B' | 'b') {
consumed += ch.len_utf8();
} else {
break;
}
}
if consumed > 0 {
warn!(
"string flag suffix `/{flags}` parsed but not yet evaluated \
(issue #234); comparison uses byte-exact semantics regardless of flags",
flags = &suffix_rest[..consumed]
);
input = &suffix_rest[consumed..];
} else {
}
}
let (input, value_transform) = parse_value_transform(input)?;
let (input, attached_op) = parse_attached_operator(input)?;
let (input, _) = multispace0(input)?;
let type_kind = match type_name {
"regex" => TypeKind::Regex {
flags: regex_flags,
count: regex_count,
},
"search" => {
let range = search_range.ok_or_else(|| {
nom::Err::Error(nom::error::Error::new(input, nom::error::ErrorKind::Tag))
})?;
TypeKind::Search { range }
}
_ => {
let Ok(Some(mut kind)) = crate::parser::types::type_keyword_to_kind(type_name) else {
return Err(nom::Err::Error(nom::error::Error::new(
input,
nom::error::ErrorKind::Tag,
)));
};
if let TypeKind::PString { max_length, .. } = kind {
kind = TypeKind::PString {
max_length,
length_width: pstring_length_width,
length_includes_itself: pstring_length_includes_itself,
};
}
kind
}
};
Ok((input, (type_kind, attached_op, value_transform)))
}
#[allow(dead_code)] pub fn parse_type(input: &str) -> IResult<&str, TypeKind> {
let (input, (type_kind, _, _)) = parse_type_and_operator(input)?;
Ok((input, type_kind))
}
pub fn parse_rule_offset(input: &str) -> IResult<&str, (u32, OffsetSpec)> {
let (input, _) = multispace0(input)?;
let (input, level_chars) = many0(char('>')).parse(input)?;
let level = u32::try_from(level_chars.len()).unwrap_or(0);
let (input, offset_spec) = parse_offset(input)?;
Ok((input, (level, offset_spec)))
}
pub fn parse_message(input: &str) -> IResult<&str, String> {
let (input, _) = multispace0(input)?;
let (input, message_text) = take_while(|c: char| c != '\n' && c != '\r').parse(input)?;
let message = message_text.trim().to_string();
Ok((input, message))
}
pub fn parse_strength_directive(input: &str) -> IResult<&str, StrengthModifier> {
#[allow(clippy::cast_possible_truncation)]
fn clamp_to_i32(n: i64) -> i32 {
n.clamp(i64::from(i32::MIN), i64::from(i32::MAX)) as i32
}
let (input, _) = multispace0(input)?;
let (input, _) = tag("!:strength")(input)?;
let (input, _) = multispace0(input)?;
let (input, modifier) = alt((
preceded((char('+'), multispace0), parse_number)
.map(|n| StrengthModifier::Add(clamp_to_i32(n))),
preceded((char('-'), multispace0), parse_decimal_number)
.map(|n| StrengthModifier::Subtract(clamp_to_i32(n))),
preceded((char('*'), multispace0), parse_number)
.map(|n| StrengthModifier::Multiply(clamp_to_i32(n))),
preceded((char('/'), multispace0), parse_number)
.map(|n| StrengthModifier::Divide(clamp_to_i32(n))),
preceded((char('='), multispace0), parse_number)
.map(|n| StrengthModifier::Set(clamp_to_i32(n))),
parse_number.map(|n| StrengthModifier::Set(clamp_to_i32(n))),
))
.parse(input)?;
Ok((input, modifier))
}
#[must_use]
pub fn is_strength_directive(input: &str) -> bool {
input.trim().starts_with("!:strength")
}
fn strip_optional_x_operator(input: &str) -> &str {
let trimmed = input.trim_start_matches([' ', '\t']);
if let Some(rest) = trimmed.strip_prefix('x') {
if rest.is_empty() || rest.starts_with([' ', '\t', '\n', '\r']) {
return rest.trim_start_matches([' ', '\t']);
}
}
input
}
pub fn parse_magic_rule(input: &str) -> IResult<&str, MagicRule> {
let (input, _) = multispace0(input)?;
let (input, (level, offset)) = parse_rule_offset(input)?;
let (input, (typ, attached_op, value_transform)) = parse_type_and_operator(input)?;
if matches!(typ, TypeKind::Meta(_)) {
if attached_op.is_some() {
return Err(nom::Err::Error(nom::error::Error::new(
input,
nom::error::ErrorKind::Verify,
)));
}
let input = strip_optional_x_operator(input);
let (input, message) = if input.trim().is_empty() {
(input, String::new())
} else {
parse_message(input)?
};
let rule = MagicRule {
offset,
typ,
op: Operator::AnyValue,
value: Value::Uint(0),
message,
children: vec![],
level,
strength_modifier: None,
value_transform: None,
};
return Ok((input, rule));
}
let (input, separate_op) = opt(parse_operator).parse(input)?;
let (op, value_transform) = match (attached_op, separate_op) {
(Some(Operator::BitwiseAndMask(mask)), Some(separate)) => {
if value_transform.is_some() {
return Err(nom::Err::Error(NomError::new(
input,
nom::error::ErrorKind::Tag,
)));
}
#[allow(clippy::cast_possible_wrap)]
let promoted = crate::parser::ast::ValueTransform {
op: crate::parser::ast::ValueTransformOp::BitAnd,
operand: mask as i64,
};
(separate, Some(promoted))
}
(Some(attached), _) => (attached, value_transform),
(None, Some(separate)) => (separate, value_transform),
(None, None) => (Operator::Equal, value_transform),
};
let is_string_family_type = matches!(
typ,
TypeKind::String { .. }
| TypeKind::String16 { .. }
| TypeKind::PString { .. }
| TypeKind::Regex { .. }
| TypeKind::Search { .. }
);
let (input, value) = if op == Operator::AnyValue {
(input, Value::Uint(0))
} else if is_string_family_type {
match parse_value(input) {
Ok(ok) => ok,
Err(orig_err) => parse_bare_string_value(input).map_err(|_| orig_err)?,
}
} else {
parse_value(input)?
};
let (input, message) = if input.trim().is_empty() {
(input, String::new())
} else {
parse_message(input)?
};
let rule = MagicRule {
offset,
typ,
op,
value,
message,
children: vec![], level,
strength_modifier: None, value_transform,
};
Ok((input, rule))
}
fn parse_bare_string_value(input: &str) -> IResult<&str, Value> {
let (input, _) = multispace0(input)?;
if input.is_empty() || input.starts_with(|c: char| c.is_whitespace() || c == '\n' || c == '\r')
{
return Err(nom::Err::Error(NomError::new(
input,
nom::error::ErrorKind::TakeWhile1,
)));
}
let mut bytes: Vec<u8> = Vec::new();
let mut remaining = input;
while let Some(ch) = remaining.chars().next() {
if ch.is_whitespace() || ch == '\n' || ch == '\r' {
break;
}
if ch == '\\' {
if let Ok((rest, b)) = value::parse_hex_byte_with_prefix(remaining) {
bytes.push(b);
remaining = rest;
continue;
}
if let Ok((rest, esc)) = value::parse_escape_sequence(remaining) {
let code = esc as u32;
if let Ok(byte) = u8::try_from(code) {
bytes.push(byte);
} else {
let mut buf = [0u8; 4];
bytes.extend_from_slice(esc.encode_utf8(&mut buf).as_bytes());
}
remaining = rest;
continue;
}
bytes.push(b'\\');
remaining = &remaining[1..];
continue;
}
let mut buf = [0u8; 4];
let utf8 = ch.encode_utf8(&mut buf).as_bytes();
bytes.extend_from_slice(utf8);
remaining = &remaining[ch.len_utf8()..];
}
if bytes.is_empty() {
return Err(nom::Err::Error(NomError::new(
input,
nom::error::ErrorKind::TakeWhile1,
)));
}
let value = String::from_utf8_lossy(&bytes).into_owned();
Ok((remaining, Value::String(value)))
}
pub fn parse_comment(input: &str) -> IResult<&str, String> {
let (input, _) = multispace0(input)?;
let (input, _) = char('#').parse(input)?;
let (input, comment_text) = take_while(|c: char| c != '\n' && c != '\r').parse(input)?;
let comment = comment_text.trim().to_string();
Ok((input, comment))
}
#[must_use]
pub fn is_empty_line(input: &str) -> bool {
input.trim().is_empty()
}
#[must_use]
pub fn is_comment_line(input: &str) -> bool {
input.trim().starts_with('#')
}
#[must_use]
pub fn has_continuation(input: &str) -> bool {
input.trim_end().ends_with('\\')
}
#[cfg(test)]
mod tests;