use crate::parser::ast::{MagicRule, MetaType, TypeKind};
use crate::{EvaluationConfig, LibmagicError};
use super::{EvaluationContext, RecursionGuard, RuleMatch, offset, operators, types};
use log::{debug, warn};
use std::sync::atomic::{AtomicBool, Ordering};
struct AnchorScope<'a> {
context: &'a mut EvaluationContext,
saved_anchor: usize,
saved_base: usize,
}
impl<'a> AnchorScope<'a> {
fn enter(context: &'a mut EvaluationContext, new_anchor: usize) -> Self {
let saved_anchor = context.last_match_end();
let saved_base = context.base_offset();
context.set_last_match_end(new_anchor);
context.set_base_offset(0);
Self {
context,
saved_anchor,
saved_base,
}
}
fn context(&mut self) -> &mut EvaluationContext {
self.context
}
}
impl Drop for AnchorScope<'_> {
fn drop(&mut self) {
self.context.set_last_match_end(self.saved_anchor);
self.context.set_base_offset(self.saved_base);
}
}
struct SubroutineScope<'a> {
context: &'a mut EvaluationContext,
saved_anchor: usize,
saved_base: usize,
}
impl<'a> SubroutineScope<'a> {
fn enter(context: &'a mut EvaluationContext, use_site: usize) -> Self {
let saved_anchor = context.last_match_end();
let saved_base = context.base_offset();
context.set_last_match_end(use_site);
context.set_base_offset(use_site);
Self {
context,
saved_anchor,
saved_base,
}
}
fn context(&mut self) -> &mut EvaluationContext {
self.context
}
}
impl Drop for SubroutineScope<'_> {
fn drop(&mut self) {
self.context.set_last_match_end(self.saved_anchor);
self.context.set_base_offset(self.saved_base);
}
}
static USE_WITHOUT_RULE_ENV_WARNED: AtomicBool = AtomicBool::new(false);
static INDIRECT_WITHOUT_RULE_ENV_WARNED: AtomicBool = AtomicBool::new(false);
pub fn evaluate_single_rule(
rule: &MagicRule,
buffer: &[u8],
context: &mut EvaluationContext,
) -> Result<Vec<RuleMatch>, LibmagicError> {
evaluate_rules(std::slice::from_ref(rule), buffer, context)
}
fn evaluate_single_rule_with_anchor(
rule: &MagicRule,
buffer: &[u8],
last_match_end: usize,
base_offset: usize,
) -> Result<Option<(usize, crate::parser::ast::Value)>, LibmagicError> {
use crate::parser::ast::TypeKind;
let absolute_offset =
offset::resolve_offset_with_base(&rule.offset, buffer, last_match_end, base_offset)?;
let (matched, read_value) = match &rule.typ {
TypeKind::Meta(MetaType::Name(name)) => {
debug!(
"Name rule '{name}' reached evaluator (likely bypassed name-table extraction); treating as no-op"
);
return Ok(None);
}
TypeKind::Meta(MetaType::Use(_)) => {
return Ok(None);
}
TypeKind::Meta(_) => return Ok(None),
TypeKind::Regex { .. } | TypeKind::Search { .. } => {
evaluate_pattern_rule(rule, buffer, absolute_offset)?
}
_ => evaluate_value_rule(rule, buffer, absolute_offset)?,
};
Ok(matched.then_some((absolute_offset, read_value)))
}
fn evaluate_use_rule(
rule: &MagicRule,
name: &str,
buffer: &[u8],
context: &mut EvaluationContext,
) -> Result<(Option<usize>, Vec<RuleMatch>), LibmagicError> {
let Some(env) = context.rule_env() else {
if USE_WITHOUT_RULE_ENV_WARNED.swap(true, Ordering::Relaxed) {
debug!("use directive '{name}' evaluated without a rule environment; no-op");
} else {
warn!(
"use directive '{name}' evaluated without a rule environment; treating as no-op (subsequent occurrences suppressed)"
);
}
return Ok((None, Vec::new()));
};
let Some(subroutine_rules) = env.name_table.get(name) else {
warn!("use directive references unknown name '{name}'");
return Ok((None, Vec::new()));
};
let absolute_offset = offset::resolve_offset_with_base(
&rule.offset,
buffer,
context.last_match_end(),
context.base_offset(),
)?;
let (subroutine_matches, terminal_anchor) = {
let mut scope = SubroutineScope::enter(context, absolute_offset);
let mut guard = RecursionGuard::enter(scope.context())?;
let matches = evaluate_rules(&subroutine_rules, buffer, guard.context())?;
let terminal = guard.context().last_match_end();
(matches, terminal)
};
Ok((Some(terminal_anchor), subroutine_matches))
}
fn evaluate_pattern_rule(
rule: &MagicRule,
buffer: &[u8],
absolute_offset: usize,
) -> Result<(bool, crate::parser::ast::Value), LibmagicError> {
let match_outcome =
types::read_pattern_match(buffer, absolute_offset, &rule.typ, Some(&rule.value))
.map_err(|e| LibmagicError::EvaluationError(e.into()))?;
let pattern_found = match_outcome.is_some();
let matched = match &rule.op {
crate::parser::ast::Operator::Equal => pattern_found,
crate::parser::ast::Operator::NotEqual => !pattern_found,
other => {
return Err(LibmagicError::EvaluationError(
types::TypeReadError::UnsupportedType {
type_name: format!(
"operator {other:?} is not supported for pattern-bearing type {:?}; only Equal (=) and NotEqual (!=) are allowed",
rule.typ
),
}
.into(),
));
}
};
let value = match_outcome.unwrap_or_else(|| crate::parser::ast::Value::String(String::new()));
Ok((matched, value))
}
fn evaluate_value_rule(
rule: &MagicRule,
buffer: &[u8],
absolute_offset: usize,
) -> Result<(bool, crate::parser::ast::Value), LibmagicError> {
let read_value =
types::read_typed_value_with_pattern(buffer, absolute_offset, &rule.typ, Some(&rule.value))
.map_err(|e| LibmagicError::EvaluationError(e.into()))?;
let transformed_value = match rule.value_transform {
None => read_value,
Some(t) => operators::apply_value_transform(&read_value, t)
.map_err(LibmagicError::EvaluationError)?,
};
let expected_value = types::coerce_value_to_type(&rule.value, &rule.typ);
let expected_ref: &crate::parser::ast::Value = expected_value.as_ref();
let matched = match &rule.op {
crate::parser::ast::Operator::BitwiseNot => operators::apply_bitwise_not_with_width(
&transformed_value,
expected_ref,
rule.typ.bit_width(),
),
op => operators::apply_operator(op, &transformed_value, expected_ref),
};
Ok((matched, transformed_value))
}
fn evaluate_children_or_warn(
rule: &MagicRule,
rule_kind: &str,
buffer: &[u8],
context: &mut EvaluationContext,
matches: &mut Vec<RuleMatch>,
) -> Result<(), LibmagicError> {
if rule.children.is_empty() {
return Ok(());
}
let mut guard = RecursionGuard::enter(context)?;
match evaluate_rules(&rule.children, buffer, guard.context()) {
Ok(child_matches) => {
matches.extend(child_matches);
}
Err(LibmagicError::Timeout { timeout_ms }) => {
return Err(LibmagicError::Timeout { timeout_ms });
}
#[allow(clippy::match_same_arms)]
Err(
e @ LibmagicError::EvaluationError(
crate::error::EvaluationError::RecursionLimitExceeded { .. },
),
) => return Err(e),
Err(
e @ (LibmagicError::EvaluationError(
crate::error::EvaluationError::BufferOverrun { .. }
| crate::error::EvaluationError::InvalidOffset { .. }
| crate::error::EvaluationError::InvalidValueTransform { .. }
| crate::error::EvaluationError::TypeReadError(
crate::evaluator::types::TypeReadError::BufferOverrun { .. }
| crate::evaluator::types::TypeReadError::InvalidPStringLength { .. },
),
)
| LibmagicError::IoError(_)),
) => {
warn!(
"Discarding child evaluation under {} rule '{}' due to unexpected error: {} -- parent match is still emitted",
rule_kind, rule.message, e
);
}
Err(e) => return Err(e),
}
Ok(())
}
#[allow(clippy::too_many_lines)]
pub fn evaluate_rules(
rules: &[MagicRule],
buffer: &[u8],
context: &mut EvaluationContext,
) -> Result<Vec<RuleMatch>, LibmagicError> {
let mut matches = Vec::with_capacity(8);
let start_time = std::time::Instant::now();
let mut rule_count = 0u32;
let mut sibling_matched: bool = false;
let entry_anchor = context.last_match_end();
let is_indirect_reentry = context.take_indirect_reentry();
let is_child_sibling_list = context.recursion_depth() > 0 && !is_indirect_reentry;
if let Some(timeout_ms) = context.timeout_ms()
&& start_time.elapsed().as_millis() >= u128::from(timeout_ms)
{
return Err(LibmagicError::Timeout { timeout_ms });
}
for rule in rules {
if is_child_sibling_list {
context.set_last_match_end(entry_anchor);
}
rule_count = rule_count.wrapping_add(1);
if rule_count.trailing_zeros() >= 4
&& let Some(timeout_ms) = context.timeout_ms()
&& start_time.elapsed().as_millis() >= u128::from(timeout_ms)
{
return Err(LibmagicError::Timeout { timeout_ms });
}
if let TypeKind::Meta(MetaType::Clear) = &rule.typ {
sibling_matched = false;
continue;
}
if let TypeKind::Meta(MetaType::Default) = &rule.typ {
if !sibling_matched {
let matches_before = matches.len();
let match_result = RuleMatch {
message: rule.message.clone(),
offset: context.last_match_end(),
level: rule.level,
value: crate::parser::ast::Value::Uint(0),
type_kind: rule.typ.clone(),
confidence: RuleMatch::calculate_confidence(rule.level),
};
matches.push(match_result);
evaluate_children_or_warn(rule, "default", buffer, context, &mut matches)?;
sibling_matched = true;
if matches.len() > matches_before && context.should_stop_at_first_match() {
break;
}
}
continue;
}
if let TypeKind::Meta(MetaType::Indirect) = &rule.typ {
let absolute_offset = match offset::resolve_offset_with_base(
&rule.offset,
buffer,
context.last_match_end(),
context.base_offset(),
) {
Ok(o) => o,
Err(
e @ LibmagicError::EvaluationError(
crate::error::EvaluationError::BufferOverrun { .. }
| crate::error::EvaluationError::InvalidOffset { .. },
),
) => {
debug!("Skipping indirect rule '{}': {}", rule.message, e);
continue;
}
Err(e) => return Err(e),
};
let Some(root_rules) = context.rule_env().map(|e| e.root_rules.clone()) else {
debug!(
"indirect rule '{}' evaluated without a rule environment; treating as no-op",
rule.message
);
continue;
};
let Some(sub_buffer) = buffer.get(absolute_offset..) else {
debug!(
"Skipping indirect rule '{}': offset {} past buffer end ({} bytes)",
rule.message,
absolute_offset,
buffer.len()
);
continue;
};
let matches_before = matches.len();
context.set_last_match_end(absolute_offset);
let indirect_match = RuleMatch {
message: rule.message.clone(),
offset: absolute_offset,
level: rule.level,
value: crate::parser::ast::Value::String("indirect".to_string()),
type_kind: rule.typ.clone(),
confidence: RuleMatch::calculate_confidence(rule.level),
};
matches.push(indirect_match);
sibling_matched = true;
{
let mut guard = RecursionGuard::enter(context)?;
let mut anchor_scope = AnchorScope::enter(guard.context(), 0);
anchor_scope.context().set_indirect_reentry(true);
match evaluate_rules(&root_rules, sub_buffer, anchor_scope.context()) {
Ok(sub_matches) => {
matches.extend(sub_matches);
}
Err(LibmagicError::Timeout { timeout_ms }) => {
return Err(LibmagicError::Timeout { timeout_ms });
}
Err(e) => return Err(e),
}
}
evaluate_children_or_warn(rule, "indirect", buffer, context, &mut matches)?;
if matches.len() > matches_before && context.should_stop_at_first_match() {
break;
}
continue;
}
if let TypeKind::Meta(MetaType::Offset) = &rule.typ {
if !matches!(rule.op, crate::parser::ast::Operator::AnyValue) {
debug!(
"offset rule '{}': non-`x` operator {:?} not supported; skipping",
rule.message, rule.op
);
continue;
}
let absolute_offset = match offset::resolve_offset_with_base(
&rule.offset,
buffer,
context.last_match_end(),
context.base_offset(),
) {
Ok(o) => o,
Err(
e @ LibmagicError::EvaluationError(
crate::error::EvaluationError::BufferOverrun { .. }
| crate::error::EvaluationError::InvalidOffset { .. },
),
) => {
debug!("Skipping offset rule '{}': {}", rule.message, e);
continue;
}
Err(e) => return Err(e),
};
let matches_before = matches.len();
context.set_last_match_end(absolute_offset);
let offset_match = RuleMatch {
message: rule.message.clone(),
offset: absolute_offset,
level: rule.level,
value: crate::parser::ast::Value::Uint(absolute_offset as u64),
type_kind: rule.typ.clone(),
confidence: RuleMatch::calculate_confidence(rule.level),
};
matches.push(offset_match);
sibling_matched = true;
evaluate_children_or_warn(rule, "offset", buffer, context, &mut matches)?;
if matches.len() > matches_before && context.should_stop_at_first_match() {
break;
}
continue;
}
if let TypeKind::Meta(MetaType::Use(name)) = &rule.typ {
let matches_before = matches.len();
let use_resolved = match evaluate_use_rule(rule, name, buffer, context) {
Ok((Some(terminal_anchor), subroutine_matches)) => {
matches.extend(subroutine_matches);
context.set_last_match_end(terminal_anchor);
true
}
Ok((None, _)) => {
false
}
Err(
e @ LibmagicError::EvaluationError(
crate::error::EvaluationError::BufferOverrun { .. }
| crate::error::EvaluationError::InvalidOffset { .. },
),
) => {
debug!("Skipping use rule '{name}': {e}");
false
}
Err(e) => return Err(e),
};
if use_resolved {
evaluate_children_or_warn(rule, "use", buffer, context, &mut matches)?;
}
if use_resolved {
sibling_matched = true;
}
if matches.len() > matches_before && context.should_stop_at_first_match() {
break;
}
continue;
}
let match_data = match evaluate_single_rule_with_anchor(
rule,
buffer,
context.last_match_end(),
context.base_offset(),
) {
Ok(data) => data,
Err(
e @ (LibmagicError::EvaluationError(
crate::error::EvaluationError::BufferOverrun { .. }
| crate::error::EvaluationError::InvalidOffset { .. }
| crate::error::EvaluationError::InvalidValueTransform { .. }
| crate::error::EvaluationError::TypeReadError(
crate::evaluator::types::TypeReadError::BufferOverrun { .. }
| crate::evaluator::types::TypeReadError::InvalidPStringLength { .. },
),
)
| LibmagicError::IoError(_)),
) => {
debug!("Skipping rule '{}': {}", rule.message, e);
continue;
}
Err(e) => {
return Err(e);
}
};
if let Some((absolute_offset, read_value)) = match_data {
let consumed = types::bytes_consumed_with_pattern(
buffer,
absolute_offset,
&rule.typ,
Some(&rule.value),
);
let new_anchor = absolute_offset.saturating_add(consumed);
context.set_last_match_end(new_anchor);
sibling_matched = true;
let match_result = RuleMatch {
message: rule.message.clone(),
offset: absolute_offset,
level: rule.level,
value: read_value,
type_kind: rule.typ.clone(),
confidence: RuleMatch::calculate_confidence(rule.level),
};
matches.push(match_result);
if !rule.children.is_empty() {
let mut guard = RecursionGuard::enter(context)?;
match evaluate_rules(&rule.children, buffer, guard.context()) {
Ok(child_matches) => {
matches.extend(child_matches);
}
Err(LibmagicError::Timeout { timeout_ms }) => {
return Err(LibmagicError::Timeout { timeout_ms });
}
Err(
e @ (LibmagicError::EvaluationError(
crate::error::EvaluationError::BufferOverrun { .. }
| crate::error::EvaluationError::InvalidOffset { .. }
| crate::error::EvaluationError::InvalidValueTransform { .. }
| crate::error::EvaluationError::TypeReadError(
crate::evaluator::types::TypeReadError::BufferOverrun { .. }
| crate::evaluator::types::TypeReadError::InvalidPStringLength {
..
},
),
)
| LibmagicError::IoError(_)),
) => {
warn!(
"Discarding child evaluation under rule '{}' due to unexpected error: {} -- parent match is still emitted; investigate the recursive evaluate_rules error-handling path",
rule.message, e
);
}
Err(e) => {
return Err(e);
}
}
}
if context.should_stop_at_first_match() {
break;
}
}
}
Ok(matches)
}
pub fn evaluate_rules_with_config(
rules: &[MagicRule],
buffer: &[u8],
config: &EvaluationConfig,
) -> Result<Vec<RuleMatch>, LibmagicError> {
config.validate()?;
#[cfg(debug_assertions)]
if contains_indirect_rule(rules)
&& !INDIRECT_WITHOUT_RULE_ENV_WARNED.swap(true, Ordering::Relaxed)
{
warn!(
"{} (subsequent occurrences suppressed)",
crate::error::EvaluationError::indirect_without_environment()
);
}
crate::evaluator::types::regex::reset_regex_cache();
let mut context = EvaluationContext::new(config.clone());
evaluate_rules(rules, buffer, &mut context)
}
fn contains_indirect_rule(rules: &[MagicRule]) -> bool {
rules.iter().any(|rule| {
matches!(rule.typ, TypeKind::Meta(MetaType::Indirect))
|| contains_indirect_rule(&rule.children)
})
}
#[cfg(test)]
mod tests;