libmagic_rs/evaluator/engine/mod.rs
1// Copyright (c) 2025-2026 the libmagic-rs contributors
2// SPDX-License-Identifier: Apache-2.0
3
4//! Core evaluation engine for magic rules.
5//!
6//! This module contains the core recursive evaluation logic for executing magic
7//! rules against file buffers. It is responsible for:
8//! - Evaluating a single rule via [`evaluate_single_rule`] (a thin wrapper
9//! around `evaluate_rules` that delegates one rule through the full
10//! context-aware pipeline)
11//! - Evaluating hierarchical rule sets with context (`evaluate_rules`)
12//! - Providing a convenience wrapper for evaluation with configuration
13//! (`evaluate_rules_with_config`)
14
15use crate::parser::ast::{MagicRule, MetaType, TypeKind};
16use crate::{EvaluationConfig, LibmagicError};
17
18use super::{EvaluationContext, RecursionGuard, RuleMatch, offset, operators, types};
19use log::{debug, warn};
20use std::sync::atomic::{AtomicBool, Ordering};
21
22/// RAII guard that saves the GNU `file` previous-match anchor **and**
23/// `base_offset` on entry and restores both on drop.
24///
25/// `MetaType::Indirect` re-evaluates the root rule list at the resolved
26/// offset. The re-entered rules are top-level-semantic (`base_offset=0`)
27/// and must start with a fresh anchor (the resolved indirect offset).
28/// When `indirect` fires inside a `MetaType::Use` subroutine, the outer
29/// subroutine's non-zero `base_offset` would otherwise leak into the
30/// root re-entry, causing every positive absolute offset in the re-entered
31/// database to be biased by the outer use-site -- producing reads at the
32/// wrong positions. Saving and restoring `base_offset` here prevents that.
33///
34/// Without an RAII wrapper, every early-return path inside the indirect
35/// branch would have to remember to restore both fields manually.
36struct AnchorScope<'a> {
37 context: &'a mut EvaluationContext,
38 saved_anchor: usize,
39 saved_base: usize,
40}
41
42impl<'a> AnchorScope<'a> {
43 /// Save the current anchor and `base_offset`, then seed the context
44 /// with `new_anchor` and reset `base_offset` to 0.
45 fn enter(context: &'a mut EvaluationContext, new_anchor: usize) -> Self {
46 let saved_anchor = context.last_match_end();
47 let saved_base = context.base_offset();
48 context.set_last_match_end(new_anchor);
49 context.set_base_offset(0);
50 Self {
51 context,
52 saved_anchor,
53 saved_base,
54 }
55 }
56
57 /// Access the underlying context for the duration of the guard.
58 fn context(&mut self) -> &mut EvaluationContext {
59 self.context
60 }
61}
62
63impl Drop for AnchorScope<'_> {
64 fn drop(&mut self) {
65 self.context.set_last_match_end(self.saved_anchor);
66 self.context.set_base_offset(self.saved_base);
67 }
68}
69
70/// RAII guard for `MetaType::Use` subroutine dispatch.
71///
72/// Saves `last_match_end` and `base_offset` on entry, seeds the context
73/// with the use-site offset (for both fields so that a subroutine's
74/// `&0` relative offset resolves to the use-site and its positive
75/// absolute offsets bias against the use-site per magic(5)), and
76/// restores both on drop.
77///
78/// This is the safety net for early-return paths inside
79/// `evaluate_use_rule`: a `RecursionGuard::enter` failure or a
80/// `Timeout`/`RecursionLimitExceeded` inside the subroutine body would
81/// otherwise leave the caller's context with corrupted anchor and
82/// base-offset state. The guard's `Drop` impl restores both fields on
83/// every exit path, error or success.
84struct SubroutineScope<'a> {
85 context: &'a mut EvaluationContext,
86 saved_anchor: usize,
87 saved_base: usize,
88}
89
90impl<'a> SubroutineScope<'a> {
91 fn enter(context: &'a mut EvaluationContext, use_site: usize) -> Self {
92 let saved_anchor = context.last_match_end();
93 let saved_base = context.base_offset();
94 context.set_last_match_end(use_site);
95 context.set_base_offset(use_site);
96 Self {
97 context,
98 saved_anchor,
99 saved_base,
100 }
101 }
102
103 fn context(&mut self) -> &mut EvaluationContext {
104 self.context
105 }
106}
107
108impl Drop for SubroutineScope<'_> {
109 fn drop(&mut self) {
110 self.context.set_last_match_end(self.saved_anchor);
111 self.context.set_base_offset(self.saved_base);
112 }
113}
114
115/// Process-local once guard for the "use directive without rule environment"
116/// warning. Ensures we surface the misconfiguration exactly once per process
117/// so low-level programmatic consumers of [`evaluate_rules`] (tests, fuzz
118/// harnesses) that intentionally run without a `MagicDatabase`-attached
119/// environment do not flood the log on every `Use` rule they encounter.
120static USE_WITHOUT_RULE_ENV_WARNED: AtomicBool = AtomicBool::new(false);
121
122/// Process-local once guard for the "`evaluate_rules_with_config` called
123/// with an `indirect` rule but without a `RuleEnvironment`" warning.
124/// Same rationale as `USE_WITHOUT_RULE_ENV_WARNED`: surface the
125/// misconfiguration exactly once per process so a large corpus of
126/// env-less `indirect` rules does not flood the log.
127static INDIRECT_WITHOUT_RULE_ENV_WARNED: AtomicBool = AtomicBool::new(false);
128
129/// Evaluate a single magic rule against a file buffer
130///
131/// This is a thin wrapper around [`evaluate_rules`] that evaluates exactly
132/// one top-level rule (and any of its children) against a buffer, using the
133/// caller-provided [`EvaluationContext`] to enforce timeout, recursion, and
134/// string-size limits. It is a BREAKING API change introduced in pre-1.0:
135/// earlier versions took no context and returned `Option<(usize, Value)>`.
136///
137/// # Arguments
138///
139/// * `rule` - The magic rule to evaluate
140/// * `buffer` - The file buffer to evaluate against
141/// * `context` - Mutable evaluation context that carries the configured
142/// safety limits (timeout, max recursion depth, max string length) and
143/// the GNU `file` previous-match anchor used for relative-offset
144/// resolution. Callers reusing a context across multiple buffers must
145/// call [`EvaluationContext::reset`](crate::evaluator::EvaluationContext::reset)
146/// between calls -- see [`evaluate_rules`] for details.
147///
148/// # Returns
149///
150/// Returns `Ok(Vec<RuleMatch>)` containing the parent match (if the rule
151/// matched) plus any child matches collected recursively. An empty vector
152/// means the rule did not match or was skipped due to a data-dependent
153/// evaluation error (buffer overrun, invalid offset, etc.). Only critical
154/// failures such as `LibmagicError::Timeout` or recursion-limit exhaustion
155/// are returned as `Err`.
156///
157/// # Examples
158///
159/// ```rust
160/// use libmagic_rs::evaluator::{evaluate_single_rule, EvaluationContext};
161/// use libmagic_rs::EvaluationConfig;
162/// use libmagic_rs::parser::ast::{MagicRule, OffsetSpec, TypeKind, Operator, Value};
163///
164/// // Create a rule to check for ELF magic bytes at offset 0
165/// let rule = MagicRule::new(OffsetSpec::Absolute(0), TypeKind::Byte { signed: true }, Operator::Equal, Value::Uint(0x7f), "ELF magic".to_string());
166///
167/// let mut context = EvaluationContext::new(EvaluationConfig::default());
168/// let elf_buffer = &[0x7f, 0x45, 0x4c, 0x46]; // ELF magic bytes
169/// let matches = evaluate_single_rule(&rule, elf_buffer, &mut context).unwrap();
170/// assert_eq!(matches.len(), 1); // Should match
171///
172/// context.reset();
173/// let non_elf_buffer = &[0x50, 0x4b, 0x03, 0x04]; // ZIP magic bytes
174/// let matches = evaluate_single_rule(&rule, non_elf_buffer, &mut context).unwrap();
175/// assert!(matches.is_empty()); // Should not match
176/// ```
177///
178/// # Errors
179///
180/// * `LibmagicError::Timeout` - If evaluation exceeds the configured timeout
181/// * `LibmagicError::EvaluationError` - For critical failures such as the
182/// recursion limit being exceeded. Data-dependent errors (buffer overrun,
183/// invalid offset, malformed pstring length) are handled gracefully by
184/// [`evaluate_rules`] and surface as an empty match vector rather than
185/// an error.
186pub fn evaluate_single_rule(
187 rule: &MagicRule,
188 buffer: &[u8],
189 context: &mut EvaluationContext,
190) -> Result<Vec<RuleMatch>, LibmagicError> {
191 evaluate_rules(std::slice::from_ref(rule), buffer, context)
192}
193
194/// Internal: evaluate a single rule against a buffer, supplying an explicit
195/// anchor for relative-offset resolution.
196///
197/// This is the worker behind both [`evaluate_single_rule`] (which defaults
198/// the anchor to 0) and [`evaluate_rules`] (which threads the anchor from
199/// `EvaluationContext::last_match_end()`).
200fn evaluate_single_rule_with_anchor(
201 rule: &MagicRule,
202 buffer: &[u8],
203 last_match_end: usize,
204 base_offset: usize,
205 max_string_length: usize,
206) -> Result<Option<(usize, crate::parser::ast::Value)>, LibmagicError> {
207 use crate::parser::ast::TypeKind;
208
209 // Step 1: Resolve the offset specification to an absolute position.
210 // `base_offset` is non-zero only inside a `MetaType::Use` subroutine
211 // body, where it biases positive absolute offsets to the use-site.
212 let absolute_offset =
213 offset::resolve_offset_with_base(&rule.offset, buffer, last_match_end, base_offset)?;
214
215 // Step 2 & 3: Dispatch on type category. Pattern-bearing types
216 // (Regex, Search) take a different path from fixed-width types
217 // because the rule's `value` operand is the *pattern*, not an
218 // expected matched value. Running those through `apply_operator`
219 // would compare matched text ("123") against the pattern literal
220 // ("[0-9]+") and produce false negatives on any regex with
221 // metacharacters.
222 //
223 // Meta-type directives (`default`, `clear`, `name`, `use`,
224 // `indirect`, `offset`) are dispatched by `evaluate_rules` at the
225 // outer loop level (not here) -- this single-rule helper is only
226 // invoked for non-meta rules. Short-circuiting the Meta arms here
227 // with `Ok(None)` is defense-in-depth for programmatic callers
228 // (property tests, fuzz harnesses) that hand-build a Meta rule
229 // and feed it directly to `evaluate_single_rule`; without the
230 // guard, the value/pattern paths would surface
231 // `TypeReadError::UnsupportedType`.
232 let (matched, read_value) = match &rule.typ {
233 TypeKind::Meta(MetaType::Name(name)) => {
234 // `Name` rules are normally hoisted into the name table at
235 // parse time and should not reach the evaluator. Programmatic
236 // consumers (e.g. fuzz harnesses, property tests) can still
237 // construct them directly; treat that as a no-op rather than
238 // a hard failure so the evaluator-never-panics invariant is
239 // preserved.
240 debug!(
241 "Name rule '{name}' reached evaluator (likely bypassed name-table extraction); treating as no-op"
242 );
243 return Ok(None);
244 }
245 TypeKind::Meta(MetaType::Use(_)) => {
246 // `Use` is dispatched inline by `evaluate_rules` so it can
247 // push the subroutine's matches into the caller's match
248 // vector. Reaching this arm means the rule went through the
249 // single-rule path (e.g. via `evaluate_single_rule`) which
250 // lacks that wiring; treat it as a silent no-op.
251 return Ok(None);
252 }
253 TypeKind::Meta(_) => return Ok(None),
254 TypeKind::Regex { .. } | TypeKind::Search { .. } => {
255 evaluate_pattern_rule(rule, buffer, absolute_offset, max_string_length)?
256 }
257 // Flagged `string` rules route through the pattern-bearing path
258 // (see GOTCHAS S2.4 for the contract) so `compare_string_with_flags`
259 // can do the case-fold / whitespace-flexible match in one pass.
260 // Default-flag strings (the common case) take the existing
261 // value-rule fast path with byte-exact `apply_equal`.
262 TypeKind::String { flags, .. } if !flags.is_empty() => {
263 evaluate_pattern_rule(rule, buffer, absolute_offset, max_string_length)?
264 }
265 _ => evaluate_value_rule(rule, buffer, absolute_offset, max_string_length)?,
266 };
267 Ok(matched.then_some((absolute_offset, read_value)))
268}
269
270/// Evaluate a `TypeKind::Meta(MetaType::Use(name))` rule inline.
271///
272/// Looks up `name` in the context's rule environment, temporarily sets the
273/// GNU `file` previous-match anchor to the resolved offset, and recursively
274/// evaluates the subroutine's rules against `buffer`. Any matches produced
275/// by the subroutine are returned in document order and are intended to be
276/// pushed into the caller's match vector *before* the synthetic `Use` match
277/// itself (matching GNU `file` behavior where a `use` site is replaced by
278/// its expansion in the output).
279///
280/// Returns `Ok((Some(absolute_offset), matches))` on a successful resolution
281/// (even if the subroutine produced no matches), or `Ok((None, vec![]))`
282/// when:
283/// - the context has no rule environment attached (programmatic consumers
284/// bypassing `MagicDatabase`)
285/// - the referenced name is not in the table (logged at warn level)
286///
287/// Recursion-limit propagation is handled via [`RecursionGuard`] so that a
288/// subroutine calling `use` on itself triggers `RecursionLimitExceeded`
289/// instead of a stack overflow.
290fn evaluate_use_rule(
291 rule: &MagicRule,
292 name: &str,
293 buffer: &[u8],
294 context: &mut EvaluationContext,
295) -> Result<(Option<usize>, Vec<RuleMatch>), LibmagicError> {
296 let Some(env) = context.rule_env() else {
297 // Surface the misconfiguration once per process at warn! level so
298 // it is visible in default logging, then gate subsequent hits so a
299 // magic file with many `use` directives does not flood the log.
300 // Use `Ordering::Relaxed`: the flag is an idempotent diagnostic
301 // latch, not a synchronization primitive guarding other state.
302 if USE_WITHOUT_RULE_ENV_WARNED.swap(true, Ordering::Relaxed) {
303 debug!("use directive '{name}' evaluated without a rule environment; no-op");
304 } else {
305 warn!(
306 "use directive '{name}' evaluated without a rule environment; treating as no-op (subsequent occurrences suppressed)"
307 );
308 }
309 return Ok((None, Vec::new()));
310 };
311
312 let Some(subroutine_rules) = env.name_table.get(name) else {
313 warn!("use directive references unknown name '{name}'");
314 return Ok((None, Vec::new()));
315 };
316 // `NameTable::get` returns an `Arc<[MagicRule]>`, so this clone is a
317 // reference-count increment rather than a deep copy of the rule tree.
318 // The Arc is cloned here to release the immutable borrow of `context`
319 // (via `env`) before we mutably borrow the context below.
320
321 // Resolve the use-site offset under the *caller's* base, not the
322 // subroutine's -- the use rule itself is in the caller's scope.
323 let absolute_offset = offset::resolve_offset_with_base(
324 &rule.offset,
325 buffer,
326 context.last_match_end(),
327 context.base_offset(),
328 )?;
329
330 // `SubroutineScope` seeds `last_match_end` and `base_offset` with
331 // the use-site offset and restores both on drop. This is the
332 // safety net for early-return paths below -- if
333 // `RecursionGuard::enter` or the inner `evaluate_rules` returns
334 // `Err(Timeout)` / `Err(RecursionLimitExceeded)`, the `?` unwinds
335 // through the guard's `Drop` impl and the caller's context
336 // returns to its pre-use state. Without the RAII wrapper a manual
337 // save/restore pair would be bypassed on every error path.
338 // Capture both the subroutine's matches AND the terminal anchor
339 // where the subroutine left `last_match_end`. The terminal anchor
340 // is what GNU `file`-compatible inlining semantics require: sibling
341 // rules after the `use` site must resolve `&N` against the position
342 // the subroutine reached, not the use-site offset. Reading the
343 // anchor INSIDE the scope (before Drop restores the caller's value)
344 // preserves it for the caller.
345 let (subroutine_matches, terminal_anchor) = {
346 let mut scope = SubroutineScope::enter(context, absolute_offset);
347 let mut guard = RecursionGuard::enter(scope.context())?;
348 let matches = evaluate_rules(&subroutine_rules, buffer, guard.context())?;
349 let terminal = guard.context().last_match_end();
350 (matches, terminal)
351 };
352
353 Ok((Some(terminal_anchor), subroutine_matches))
354}
355
356/// Evaluate a pattern-bearing rule (`TypeKind::Regex` / `TypeKind::Search`).
357///
358/// `read_pattern_match` returns `Some(value)` on a successful match
359/// (possibly zero-width, e.g., `a*`) and `None` on a genuine miss; the
360/// engine translates those directly into `Equal`/`NotEqual`. Any other
361/// operator on a pattern-bearing type is a magic-file semantic bug and
362/// surfaces as [`TypeReadError::UnsupportedType`] -- the earlier
363/// fallthrough to `apply_operator` masked this by producing nonsense
364/// ordering comparisons against the pattern source text.
365///
366/// On a miss we return `Value::String(String::new())` as a display
367/// placeholder; the engine has already decided `matched = false` by
368/// then, so the placeholder only affects display and
369/// `bytes_consumed_with_pattern` (which re-derives the match position
370/// from the pattern, not this value).
371fn evaluate_pattern_rule(
372 rule: &MagicRule,
373 buffer: &[u8],
374 absolute_offset: usize,
375 max_string_length: usize,
376) -> Result<(bool, crate::parser::ast::Value), LibmagicError> {
377 let match_outcome = types::read_pattern_match(
378 buffer,
379 absolute_offset,
380 &rule.typ,
381 Some(&rule.value),
382 max_string_length,
383 )
384 .map_err(|e| LibmagicError::EvaluationError(e.into()))?;
385 let pattern_found = match_outcome.is_some();
386 let matched = match &rule.op {
387 crate::parser::ast::Operator::Equal => pattern_found,
388 crate::parser::ast::Operator::NotEqual => !pattern_found,
389 other => {
390 return Err(LibmagicError::EvaluationError(
391 types::TypeReadError::UnsupportedType {
392 type_name: format!(
393 "operator {other:?} is not supported for pattern-bearing type {:?}; only Equal (=) and NotEqual (!=) are allowed",
394 rule.typ
395 ),
396 }
397 .into(),
398 ));
399 }
400 };
401 let value = match_outcome.unwrap_or_else(|| crate::parser::ast::Value::String(String::new()));
402 Ok((matched, value))
403}
404
405/// Evaluate a value-based rule (all non-pattern-bearing `TypeKind` variants).
406///
407/// Reads the typed value at `absolute_offset`, coerces the rule's
408/// expected value to the target type's signedness/width (zero-copy via
409/// `Cow::Borrowed` on the hot path), and applies the operator.
410/// `BitwiseNot` needs type-aware width masking so the complement is
411/// computed at the type's natural width (e.g. byte `NOT 0x00 = 0xFF`,
412/// not `u64::MAX`).
413fn evaluate_value_rule(
414 rule: &MagicRule,
415 buffer: &[u8],
416 absolute_offset: usize,
417 max_string_length: usize,
418) -> Result<(bool, crate::parser::ast::Value), LibmagicError> {
419 let read_value = types::read_typed_value_with_pattern(
420 buffer,
421 absolute_offset,
422 &rule.typ,
423 Some(&rule.value),
424 max_string_length,
425 )
426 .map_err(|e| LibmagicError::EvaluationError(e.into()))?;
427
428 // Apply any pre-comparison value transform (`type+N`/`type-N`/`type*N`/
429 // `type/N`/`type%N`/`type|N`/`type^N`). The transform runs on the read
430 // value before the comparison operator and before printf-style format
431 // substitution, so `%d` in the message renders the post-transform
432 // number. `&MASK` is *not* handled here -- it lives at the operator
433 // layer via `Operator::BitwiseAndMask`.
434 let transformed_value = match rule.value_transform {
435 None => read_value,
436 Some(t) => operators::apply_value_transform(&read_value, t)
437 .map_err(LibmagicError::EvaluationError)?,
438 };
439
440 let expected_value = types::coerce_value_to_type(&rule.value, &rule.typ);
441 let expected_ref: &crate::parser::ast::Value = expected_value.as_ref();
442
443 let matched = match &rule.op {
444 crate::parser::ast::Operator::BitwiseNot => operators::apply_bitwise_not_with_width(
445 &transformed_value,
446 expected_ref,
447 rule.typ.bit_width(),
448 ),
449 op => operators::apply_operator(op, &transformed_value, expected_ref),
450 };
451 Ok((matched, transformed_value))
452}
453
454/// Evaluate a rule's children under the standard recursion-guard/graceful-skip discipline.
455///
456/// This helper centralises the `RecursionGuard` + `evaluate_rules` + error-dispatch
457/// pattern that is identical across the `Default`, `Indirect`, `Offset`, and `Use`
458/// meta-type arms in [`evaluate_rules`]. Extracting it prevents the four copies
459/// from drifting apart during future maintenance.
460///
461/// # Behaviour
462///
463/// * If `rule.children` is empty the function is a no-op (returns `Ok(())`).
464/// * Child matches are appended to `matches` in document order.
465/// * `LibmagicError::Timeout` and `LibmagicError::EvaluationError(RecursionLimitExceeded)`
466/// propagate immediately as `Err` so the caller can bail out.
467/// * Data-dependent errors (`BufferOverrun`, `InvalidOffset`,
468/// `TypeReadError::BufferOverrun`, `TypeReadError::InvalidPStringLength`,
469/// `IoError`) are logged at `warn!` and swallowed; the parent match
470/// already in `matches` is left intact. This mirrors the defensive
471/// comment in each arm: the inner `evaluate_rules` already catches and
472/// logs individual child failures, so this arm only fires if that
473/// strategy changes.
474///
475/// # Arguments
476///
477/// * `rule` – The parent rule whose children will be evaluated.
478/// * `rule_kind` – A short label for the rule kind used in the `warn!`
479/// message (e.g. `"default"`, `"indirect"`, `"offset"`, `"use"`).
480/// * `buffer` – The file buffer passed to the recursive call.
481/// * `context` – Mutable evaluation context; the recursion depth is
482/// incremented on entry and decremented on drop via [`RecursionGuard`].
483/// * `matches` – Output vector; child matches are appended here.
484fn evaluate_children_or_warn(
485 rule: &MagicRule,
486 rule_kind: &str,
487 buffer: &[u8],
488 context: &mut EvaluationContext,
489 matches: &mut Vec<RuleMatch>,
490) -> Result<(), LibmagicError> {
491 if rule.children.is_empty() {
492 return Ok(());
493 }
494 let mut guard = RecursionGuard::enter(context)?;
495 match evaluate_rules(&rule.children, buffer, guard.context()) {
496 Ok(child_matches) => {
497 matches.extend(child_matches);
498 }
499 Err(LibmagicError::Timeout { timeout_ms }) => {
500 return Err(LibmagicError::Timeout { timeout_ms });
501 }
502 // `RecursionLimitExceeded` is listed explicitly (rather than
503 // relying on the catch-all below) so a future maintainer adding
504 // another swallowed variant cannot accidentally swallow it.
505 // Both this arm and the catch-all intentionally propagate via
506 // `return Err(e)`; `match_same_arms` is suppressed because the
507 // explicit arm's purpose is documentation and future-proofing,
508 // not different behavior. See GOTCHAS S13 for the recursion-
509 // depth guard contract.
510 #[allow(clippy::match_same_arms)]
511 Err(
512 e @ LibmagicError::EvaluationError(
513 crate::error::EvaluationError::RecursionLimitExceeded { .. },
514 ),
515 ) => return Err(e),
516 Err(
517 e @ (LibmagicError::EvaluationError(
518 crate::error::EvaluationError::BufferOverrun { .. }
519 | crate::error::EvaluationError::InvalidOffset { .. }
520 | crate::error::EvaluationError::InvalidValueTransform { .. }
521 | crate::error::EvaluationError::TypeReadError(
522 crate::evaluator::types::TypeReadError::BufferOverrun { .. }
523 | crate::evaluator::types::TypeReadError::InvalidPStringLength { .. },
524 ),
525 )
526 | LibmagicError::IoError(_)),
527 ) => {
528 warn!(
529 "Discarding child evaluation under {} rule '{}' due to unexpected error: {} -- parent match is still emitted",
530 rule_kind, rule.message, e
531 );
532 }
533 Err(e) => return Err(e),
534 }
535 Ok(())
536}
537
538/// Evaluate a list of magic rules against a file buffer with hierarchical processing
539///
540/// This function implements the core hierarchical rule evaluation algorithm with graceful
541/// error handling:
542/// 1. Evaluates each top-level rule in sequence
543/// 2. If a parent rule matches, evaluates its child rules for refinement
544/// 3. Collects all matches or stops at first match based on configuration
545/// 4. Maintains evaluation context for recursion limits and state
546/// 5. Implements graceful degradation by skipping problematic rules and continuing evaluation
547///
548/// The hierarchical evaluation follows these principles:
549/// - Parent rules must match before children are evaluated
550/// - Child rules provide refinement and additional detail
551/// - Evaluation can stop at first match or continue for all matches
552/// - Recursion depth is limited to prevent infinite loops
553/// - Problematic rules are skipped to allow evaluation to continue
554///
555/// # Arguments
556///
557/// * `rules` - The list of magic rules to evaluate
558/// * `buffer` - The file buffer to evaluate against
559/// * `context` - Mutable evaluation context for state management. **Callers
560/// reusing a context across multiple buffers must call
561/// [`EvaluationContext::reset`](crate::evaluator::EvaluationContext::reset)
562/// between calls** -- the GNU `file` previous-match anchor and the
563/// recursion-depth counter both advance during evaluation and would
564/// otherwise leak across buffers. The same applies when this function
565/// returns `Err` mid-evaluation (e.g., `LibmagicError::Timeout` or
566/// `RecursionLimitExceeded`): both the anchor and (potentially) the
567/// recursion depth are left in a partially-advanced state, and a retry
568/// on the same context without `reset()` will resolve relative offsets
569/// against the stale anchor and apply the wrong recursion budget.
570/// [`evaluate_rules_with_config`] always builds a fresh context and is the
571/// safer choice when context reuse isn't required.
572///
573/// # Returns
574///
575/// Returns `Ok(Vec<RuleMatch>)` containing all matches found. Errors in individual rules
576/// are skipped to allow evaluation to continue. Only returns `Err(LibmagicError)`
577/// for critical failures like timeout or recursion limit exceeded.
578///
579/// # Examples
580///
581/// ```rust
582/// use libmagic_rs::evaluator::{evaluate_rules, EvaluationContext, RuleMatch};
583/// use libmagic_rs::parser::ast::{MagicRule, OffsetSpec, TypeKind, Operator, Value};
584/// use libmagic_rs::EvaluationConfig;
585///
586/// // Create a hierarchical rule set for ELF files
587/// let parent_rule = MagicRule::new(
588/// OffsetSpec::Absolute(0),
589/// TypeKind::Byte { signed: true },
590/// Operator::Equal,
591/// Value::Uint(0x7f),
592/// "ELF".to_string(),
593/// )
594/// .with_children(vec![
595/// MagicRule::new(
596/// OffsetSpec::Absolute(4),
597/// TypeKind::Byte { signed: true },
598/// Operator::Equal,
599/// Value::Uint(2),
600/// "64-bit".to_string(),
601/// )
602/// .with_level(1),
603/// ]);
604///
605/// let rules = vec![parent_rule];
606/// let buffer = &[0x7f, 0x45, 0x4c, 0x46, 0x02, 0x01]; // ELF64 header
607/// let config = EvaluationConfig::default();
608/// let mut context = EvaluationContext::new(config);
609///
610/// let matches = evaluate_rules(&rules, buffer, &mut context).unwrap();
611/// assert_eq!(matches.len(), 2); // Parent and child should both match
612/// ```
613///
614/// # Errors
615///
616/// * `LibmagicError::Timeout` - If evaluation exceeds configured timeout
617/// * `LibmagicError::EvaluationError` - Only for critical failures like recursion limit exceeded
618///
619/// Individual rule evaluation errors are handled gracefully and do not stop the overall evaluation.
620#[allow(clippy::too_many_lines)]
621pub fn evaluate_rules(
622 rules: &[MagicRule],
623 buffer: &[u8],
624 context: &mut EvaluationContext,
625) -> Result<Vec<RuleMatch>, LibmagicError> {
626 let mut matches = Vec::with_capacity(8);
627 let start_time = std::time::Instant::now();
628 let mut rule_count = 0u32;
629
630 // Per-level "did any sibling match yet?" flag for `default`/`clear`
631 // dispatch. Each recursive descent gets its own fresh flag, so child
632 // sibling chains track their own state independently of the parent.
633 let mut sibling_matched: bool = false;
634
635 // Per-level entry anchor: captured at the start of this sibling list's
636 // evaluation. For CHILD sibling lists (recursion_depth > 0), the
637 // GNU `file`/libmagic previous-match anchor is reset to this value
638 // between sibling iterations so that `&N` offsets on continuation
639 // siblings resolve against the parent-level anchor, not against
640 // whatever the *previous sibling* left the anchor at. This matches
641 // libmagic's continuation-level model (`ms->c.li[cont_level]`)
642 // where each level tracks its own anchor; a sibling at level L does
643 // not inherit the post-match anchor of another sibling at level L.
644 //
645 // TOP-LEVEL siblings (recursion_depth == 0) are independent
646 // classification attempts -- each top-level rule intentionally sees
647 // the anchor advance that prior top-level rules produced (see
648 // GOTCHAS S3.8 and the `relative_anchor_can_decrease_...`
649 // integration test). Gate the reset on recursion_depth to preserve
650 // that documented discipline while still fixing the continuation-
651 // sibling behavior that the GNU `file` `searchbug.magic` fixture
652 // relies on.
653 //
654 // Recursing into a matched rule's children still carries forward the
655 // post-match anchor (via the current value of `last_match_end()` at
656 // the point of recursion), so child sibling lists see their parent's
657 // resolved position as their own entry anchor.
658 //
659 // INDIRECT RE-ENTRY exception: `MetaType::Indirect` dispatches its
660 // sub-evaluation via `RecursionGuard::enter` (to bound the recursion
661 // cycle), which forces `recursion_depth > 0`. But an indirect
662 // re-entry semantically evaluates the root rule list with TOP-LEVEL
663 // sibling semantics -- each rule is an independent classification
664 // attempt against the re-entered sub-buffer, NOT a continuation
665 // list. The indirect dispatch sets `context.set_indirect_reentry(true)`
666 // just before this call; `take_indirect_reentry()` consumes it at
667 // entry so only this iteration treats siblings as top-level.
668 // Children of matched rules inside the re-entry still see the flag
669 // as false (consumed) and correctly fall back to continuation
670 // semantics via `recursion_depth > 0`.
671 let entry_anchor = context.last_match_end();
672 let is_indirect_reentry = context.take_indirect_reentry();
673 let is_child_sibling_list = context.recursion_depth() > 0 && !is_indirect_reentry;
674
675 // Entry-point timeout check: ensures every recursive descent is bounded
676 // and that evaluations of small rule sets (< 16 rules) are still guarded.
677 // Without this, the periodic every-16-rules check below never fires for
678 // flat rule lists with fewer than 16 rules, and recursion into children
679 // also restarts `rule_count` at 0.
680 if let Some(timeout_ms) = context.timeout_ms()
681 && start_time.elapsed().as_millis() >= u128::from(timeout_ms)
682 {
683 return Err(LibmagicError::Timeout { timeout_ms });
684 }
685
686 for rule in rules {
687 // For continuation siblings (child recursion), reset the
688 // previous-match anchor to the entry anchor so `&N` offsets
689 // resolve against the parent-level position. Top-level
690 // siblings (depth 0) keep the chaining behavior documented in
691 // GOTCHAS S3.8. See the `entry_anchor` comment above.
692 if is_child_sibling_list {
693 context.set_last_match_end(entry_anchor);
694 }
695
696 // Check timeout periodically (every 16 rules) to reduce syscall overhead
697 rule_count = rule_count.wrapping_add(1);
698 if rule_count.trailing_zeros() >= 4
699 && let Some(timeout_ms) = context.timeout_ms()
700 && start_time.elapsed().as_millis() >= u128::from(timeout_ms)
701 {
702 return Err(LibmagicError::Timeout { timeout_ms });
703 }
704
705 // `Clear` resets the per-level "sibling matched" flag so a
706 // subsequent `default` sibling can fire even if an earlier
707 // sibling matched. It does not produce a match, evaluate
708 // children, or advance the anchor.
709 if let TypeKind::Meta(MetaType::Clear) = &rule.typ {
710 sibling_matched = false;
711 continue;
712 }
713
714 // `Default` fires only when no earlier sibling at this level has
715 // matched yet. The anchor is intentionally not advanced -- the
716 // directive does not consume bytes -- but its children are
717 // evaluated and the per-level "sibling matched" flag is set so
718 // any later `default` sibling at the same level is suppressed.
719 if let TypeKind::Meta(MetaType::Default) = &rule.typ {
720 if !sibling_matched {
721 let matches_before = matches.len();
722
723 let match_result = RuleMatch::new(
724 rule.message.clone(),
725 context.last_match_end(),
726 rule.level,
727 crate::parser::ast::Value::Uint(0),
728 rule.typ.clone(),
729 RuleMatch::calculate_confidence(rule.level),
730 );
731 matches.push(match_result);
732
733 // `default` is treated as a successful match at this
734 // level, so its children are evaluated under the same
735 // recursion-guard pattern as every other successful rule.
736 evaluate_children_or_warn(rule, "default", buffer, context, &mut matches)?;
737
738 sibling_matched = true;
739
740 if matches.len() > matches_before && context.should_stop_at_first_match() {
741 break;
742 }
743 }
744 continue;
745 }
746
747 // `Indirect` re-evaluates the root rule list at the resolved
748 // offset, mirroring libmagic's indirect-type semantics. The
749 // sub-evaluation runs against `buffer[absolute_offset..]` with a
750 // fresh anchor (0) so relative offsets inside the root rules
751 // resolve correctly; the caller's anchor is restored on exit
752 // via `AnchorScope`. Without an attached `RuleEnvironment`
753 // (programmatic consumers bypassing `MagicDatabase`) the
754 // directive is a silent no-op.
755 if let TypeKind::Meta(MetaType::Indirect) = &rule.typ {
756 // Resolve the offset first so a malformed offset surfaces
757 // as a graceful skip rather than a hard error.
758 let absolute_offset = match offset::resolve_offset_with_base(
759 &rule.offset,
760 buffer,
761 context.last_match_end(),
762 context.base_offset(),
763 ) {
764 Ok(o) => o,
765 Err(
766 e @ LibmagicError::EvaluationError(
767 crate::error::EvaluationError::BufferOverrun { .. }
768 | crate::error::EvaluationError::InvalidOffset { .. },
769 ),
770 ) => {
771 debug!("Skipping indirect rule '{}': {}", rule.message, e);
772 continue;
773 }
774 Err(e) => return Err(e),
775 };
776
777 // Pull the root rules out of the rule environment. Without
778 // an environment there is nothing to re-enter, so this is a
779 // silent no-op (matching the `Use`-without-env behavior).
780 //
781 // We use `debug!` rather than `debug_assert!` here because
782 // property tests (`prop_arbitrary_rule_evaluation_never_panics`)
783 // synthesize arbitrary `TypeKind::Meta(MetaType::Indirect)`
784 // rules and run them without attaching a `RuleEnvironment`;
785 // a panic on this path would break the never-panics invariant.
786 // See GOTCHAS S2.1 for the same rationale on the leaked-Name arm.
787 let Some(root_rules) = context.rule_env().map(|e| e.root_rules.clone()) else {
788 debug!(
789 "indirect rule '{}' evaluated without a rule environment; treating as no-op",
790 rule.message
791 );
792 continue;
793 };
794
795 // Bounds-check before slicing. An indirect offset past the
796 // end of the buffer is a data-dependent skip, not an error.
797 let Some(sub_buffer) = buffer.get(absolute_offset..) else {
798 debug!(
799 "Skipping indirect rule '{}': offset {} past buffer end ({} bytes)",
800 rule.message,
801 absolute_offset,
802 buffer.len()
803 );
804 continue;
805 };
806
807 let matches_before = matches.len();
808
809 // Advance the GNU `file` previous-match anchor to the indirect's
810 // resolved offset and emit a `RuleMatch` for the indirect rule
811 // itself BEFORE descending into the root re-entry or children.
812 // This matches the shared successful-match flow used by every
813 // other rule kind: advance anchor first, record the match, then
814 // recurse. Without this, sibling rules of the `indirect` resolve
815 // their relative offsets against the stale anchor and the
816 // directive's own `message` never surfaces in the output.
817 context.set_last_match_end(absolute_offset);
818
819 let indirect_match = RuleMatch::new(
820 rule.message.clone(),
821 absolute_offset,
822 rule.level,
823 crate::parser::ast::Value::String("indirect".to_string()),
824 rule.typ.clone(),
825 RuleMatch::calculate_confidence(rule.level),
826 );
827 matches.push(indirect_match);
828
829 // Indirect counts as a match for `sibling_matched` regardless of
830 // whether the sub-evaluation produced any matches -- the directive
831 // itself successfully dispatched.
832 sibling_matched = true;
833
834 // Recursion guard + anchor scope: nested indirect / use cycles
835 // surface as `RecursionLimitExceeded` instead of a stack overflow,
836 // and the caller's anchor is restored on every exit path.
837 //
838 // Mark the upcoming `evaluate_rules` call as a top-level
839 // re-entry (consumed at entry) so sibling anchor-reset
840 // semantics do NOT fire -- root rules in the re-entered
841 // database chain their anchors across siblings like any
842 // other top-level evaluation.
843 {
844 let mut guard = RecursionGuard::enter(context)?;
845 let mut anchor_scope = AnchorScope::enter(guard.context(), 0);
846 anchor_scope.context().set_indirect_reentry(true);
847 match evaluate_rules(&root_rules, sub_buffer, anchor_scope.context()) {
848 Ok(sub_matches) => {
849 matches.extend(sub_matches);
850 }
851 Err(LibmagicError::Timeout { timeout_ms }) => {
852 return Err(LibmagicError::Timeout { timeout_ms });
853 }
854 Err(e) => return Err(e),
855 }
856 // anchor_scope drops here, restoring the saved anchor
857 // (which is now `absolute_offset`, set above before the
858 // scope was entered).
859 // guard drops next, decrementing the recursion depth.
860 }
861
862 // Evaluate the indirect rule's own children under the same
863 // recursion-guard pattern used by every other successful rule.
864 evaluate_children_or_warn(rule, "indirect", buffer, context, &mut matches)?;
865
866 if matches.len() > matches_before && context.should_stop_at_first_match() {
867 break;
868 }
869 continue;
870 }
871
872 // `Offset` reports the resolved file offset as the rule's read
873 // value, matching GNU `file`'s `FILE_OFFSET` semantics: the match
874 // emits a value-bearing `RuleMatch` whose `value` is the absolute
875 // position, which downstream message formatting substitutes into
876 // `%lld` / `%d` specifiers via `output::format::format_magic_message`.
877 //
878 // Per magic(5) the only legal operator is `x` (AnyValue); any
879 // other operator is a magic-file semantic error. Matching the
880 // evaluator's graceful-skip discipline, we `debug!`-log and skip
881 // rather than erroring -- a rogue rule shouldn't poison the rest
882 // of the evaluation.
883 if let TypeKind::Meta(MetaType::Offset) = &rule.typ {
884 if !matches!(rule.op, crate::parser::ast::Operator::AnyValue) {
885 debug!(
886 "offset rule '{}': non-`x` operator {:?} not supported; skipping",
887 rule.message, rule.op
888 );
889 continue;
890 }
891
892 // Resolve the offset first so a malformed offset surfaces as
893 // a graceful skip rather than a hard error. Mirrors the
894 // `Indirect` dispatch above.
895 let absolute_offset = match offset::resolve_offset_with_base(
896 &rule.offset,
897 buffer,
898 context.last_match_end(),
899 context.base_offset(),
900 ) {
901 Ok(o) => o,
902 Err(
903 e @ LibmagicError::EvaluationError(
904 crate::error::EvaluationError::BufferOverrun { .. }
905 | crate::error::EvaluationError::InvalidOffset { .. },
906 ),
907 ) => {
908 debug!("Skipping offset rule '{}': {}", rule.message, e);
909 continue;
910 }
911 Err(e) => return Err(e),
912 };
913
914 let matches_before = matches.len();
915
916 // Advance the anchor BEFORE emitting the match so sibling
917 // rules resolve their relative offsets against the offset
918 // directive's resolved position. Same discipline as
919 // `Indirect` and every other value-bearing rule.
920 context.set_last_match_end(absolute_offset);
921
922 let offset_match = RuleMatch::new(
923 rule.message.clone(),
924 absolute_offset,
925 rule.level,
926 crate::parser::ast::Value::Uint(absolute_offset as u64),
927 rule.typ.clone(),
928 RuleMatch::calculate_confidence(rule.level),
929 );
930 matches.push(offset_match);
931
932 sibling_matched = true;
933
934 // Evaluate children under the recursion-guard pattern used
935 // by every other successful rule.
936 evaluate_children_or_warn(rule, "offset", buffer, context, &mut matches)?;
937
938 if matches.len() > matches_before && context.should_stop_at_first_match() {
939 break;
940 }
941 continue;
942 }
943
944 // `Use` is handled inline so the subroutine's matches can be
945 // spliced into the caller's match vector in document order.
946 // Routing this through `evaluate_single_rule_with_anchor` would
947 // force the helper to return a `Vec<RuleMatch>`, which would
948 // reshape the single-rule return type for every other variant.
949 //
950 // On a successful use path we must also descend into the rule's
951 // own children, matching the flow of every other successful rule
952 // kind. libmagic chains like `>>0 use part2` often carry
953 // continuation rules (siblings and descendants of the `use` site)
954 // that depend on the anchor the subroutine left behind; skipping
955 // them produces user-visible false negatives.
956 if let TypeKind::Meta(MetaType::Use(name)) = &rule.typ {
957 let matches_before = matches.len();
958 let use_resolved = match evaluate_use_rule(rule, name, buffer, context) {
959 Ok((Some(terminal_anchor), subroutine_matches)) => {
960 matches.extend(subroutine_matches);
961
962 // A `use` rule does not produce a surface
963 // `RuleMatch` itself -- the subroutine's rules
964 // carry the visible messages. Advance the
965 // caller's anchor to the subroutine's TERMINAL
966 // anchor (where the subroutine left `last_match_end`),
967 // not the use-site offset. This makes `use`
968 // behave like inlining the subroutine: sibling
969 // rules after the `use` see `&N` resolve against
970 // the subroutine's final match position.
971 context.set_last_match_end(terminal_anchor);
972 true
973 }
974 Ok((None, _)) => {
975 // No environment, or name not found -- silent no-op.
976 false
977 }
978 Err(
979 e @ LibmagicError::EvaluationError(
980 crate::error::EvaluationError::BufferOverrun { .. }
981 | crate::error::EvaluationError::InvalidOffset { .. },
982 ),
983 ) => {
984 debug!("Skipping use rule '{name}': {e}");
985 false
986 }
987 Err(e) => return Err(e),
988 };
989
990 // Evaluate the use rule's own children exactly like any other
991 // successful rule. Subroutine matches are already appended
992 // above, so children are spliced in after them to preserve
993 // document order. The recursion guard mirrors the non-`Use`
994 // path so a `use`-site chain cannot blow past the configured
995 // recursion limit.
996 if use_resolved {
997 evaluate_children_or_warn(rule, "use", buffer, context, &mut matches)?;
998 }
999
1000 // A successful `use` site is treated as a sibling match for
1001 // `default`/`clear` dispatch purposes -- subsequent `default`
1002 // siblings should not fire if the subroutine resolved.
1003 if use_resolved {
1004 sibling_matched = true;
1005 }
1006
1007 // Apply stop-at-first-match with the same semantics as every
1008 // other successful rule kind: if this `use` site contributed
1009 // any matches (either from the subroutine or from its own
1010 // children) and the caller configured first-match
1011 // short-circuiting, halt evaluation of further siblings.
1012 if matches.len() > matches_before && context.should_stop_at_first_match() {
1013 break;
1014 }
1015 continue;
1016 }
1017
1018 // Evaluate the current rule with graceful error handling.
1019 // Pass the GNU `file` anchor so OffsetSpec::Relative resolves
1020 // correctly against the previous match's end position.
1021 let match_data = match evaluate_single_rule_with_anchor(
1022 rule,
1023 buffer,
1024 context.last_match_end(),
1025 context.base_offset(),
1026 context.max_string_length(),
1027 ) {
1028 Ok(data) => data,
1029 Err(
1030 e @ (LibmagicError::EvaluationError(
1031 crate::error::EvaluationError::BufferOverrun { .. }
1032 | crate::error::EvaluationError::InvalidOffset { .. }
1033 | crate::error::EvaluationError::InvalidValueTransform { .. }
1034 | crate::error::EvaluationError::TypeReadError(
1035 crate::evaluator::types::TypeReadError::BufferOverrun { .. }
1036 | crate::evaluator::types::TypeReadError::InvalidPStringLength { .. },
1037 ),
1038 )
1039 | LibmagicError::IoError(_)),
1040 ) => {
1041 // Expected data-dependent evaluation errors -- skip gracefully.
1042 // TypeReadError::UnsupportedType is intentionally NOT caught here
1043 // so that evaluator capability gaps propagate as errors.
1044 debug!("Skipping rule '{}': {}", rule.message, e);
1045 continue;
1046 }
1047 Err(e) => {
1048 // Unexpected errors (InternalError, UnsupportedType, etc.) should propagate
1049 return Err(e);
1050 }
1051 };
1052
1053 if let Some((absolute_offset, read_value)) = match_data {
1054 // Advance the GNU `file` previous-match anchor BEFORE recursing
1055 // into children, so children and their descendants see the new
1056 // anchor. The anchor is updated unconditionally to the end of
1057 // this match -- it may move forward or backward depending on
1058 // where successive rules match (it is *not* a high-watermark).
1059 let consumed = types::bytes_consumed_with_pattern(
1060 buffer,
1061 absolute_offset,
1062 &rule.typ,
1063 Some(&rule.value),
1064 );
1065 let new_anchor = absolute_offset.saturating_add(consumed);
1066 context.set_last_match_end(new_anchor);
1067
1068 // Mark this level as "matched" so any subsequent `default`
1069 // sibling at the same level is suppressed, matching libmagic's
1070 // default-after-match semantics.
1071 sibling_matched = true;
1072
1073 let match_result = RuleMatch::new(
1074 rule.message.clone(),
1075 absolute_offset,
1076 rule.level,
1077 read_value,
1078 rule.typ.clone(),
1079 RuleMatch::calculate_confidence(rule.level),
1080 );
1081 matches.push(match_result);
1082
1083 // If this rule has children, evaluate them recursively
1084 if !rule.children.is_empty() {
1085 // Check recursion depth limit - this is a critical error that should stop evaluation.
1086 // `RecursionGuard` decrements the depth on drop, so every exit path below
1087 // (Ok, graceful warn!, or early-return via `?`) restores the counter.
1088 let mut guard = RecursionGuard::enter(context)?;
1089
1090 // Recursively evaluate child rules with graceful error handling
1091 match evaluate_rules(&rule.children, buffer, guard.context()) {
1092 Ok(child_matches) => {
1093 matches.extend(child_matches);
1094 }
1095 Err(LibmagicError::Timeout { timeout_ms }) => {
1096 // Timeout is critical, propagate it up (guard drops here).
1097 return Err(LibmagicError::Timeout { timeout_ms });
1098 }
1099 Err(
1100 e @ (LibmagicError::EvaluationError(
1101 crate::error::EvaluationError::BufferOverrun { .. }
1102 | crate::error::EvaluationError::InvalidOffset { .. }
1103 | crate::error::EvaluationError::InvalidValueTransform { .. }
1104 | crate::error::EvaluationError::TypeReadError(
1105 crate::evaluator::types::TypeReadError::BufferOverrun { .. }
1106 | crate::evaluator::types::TypeReadError::InvalidPStringLength {
1107 ..
1108 },
1109 ),
1110 )
1111 | LibmagicError::IoError(_)),
1112 ) => {
1113 // Defensive: under the current implementation, individual child
1114 // failures are caught and logged inside the recursive evaluate_rules
1115 // call (they never propagate here). This arm guards against future
1116 // changes that might alter that error-handling strategy.
1117 //
1118 // If this fires, the parent match is still emitted but the entire
1119 // child subtree is silently dropped -- which means a partial,
1120 // possibly-incorrect classification is returned to the caller.
1121 // Logged at warn! (not debug!) so the asymmetry is visible.
1122 warn!(
1123 "Discarding child evaluation under rule '{}' due to unexpected error: {} -- parent match is still emitted; investigate the recursive evaluate_rules error-handling path",
1124 rule.message, e
1125 );
1126 }
1127 Err(e) => {
1128 // Unexpected errors in children (including RecursionLimitExceeded)
1129 // should propagate. The guard drops here, decrementing the depth.
1130 return Err(e);
1131 }
1132 }
1133 // `guard` drops here, decrementing the recursion depth.
1134 }
1135
1136 // Stop at first match if configured to do so
1137 if context.should_stop_at_first_match() {
1138 break;
1139 }
1140 }
1141 }
1142
1143 Ok(matches)
1144}
1145
1146/// Evaluate magic rules with a fresh context
1147///
1148/// This is a convenience function that creates a new evaluation context
1149/// and evaluates the rules. Useful for simple evaluation scenarios.
1150///
1151/// # Arguments
1152///
1153/// * `rules` - The list of magic rules to evaluate
1154/// * `buffer` - The file buffer to evaluate against
1155/// * `config` - Configuration for evaluation behavior
1156///
1157/// # Returns
1158///
1159/// Returns `Ok(Vec<RuleMatch>)` containing all matches found, or `Err(LibmagicError)`
1160/// if evaluation fails.
1161///
1162/// # Examples
1163///
1164/// ```rust
1165/// use libmagic_rs::evaluator::{evaluate_rules_with_config, RuleMatch};
1166/// use libmagic_rs::parser::ast::{MagicRule, OffsetSpec, TypeKind, Operator, Value};
1167/// use libmagic_rs::EvaluationConfig;
1168///
1169/// let rule = MagicRule::new(OffsetSpec::Absolute(0), TypeKind::Byte { signed: true }, Operator::Equal, Value::Uint(0x7f), "ELF magic".to_string());
1170///
1171/// let rules = vec![rule];
1172/// let buffer = &[0x7f, 0x45, 0x4c, 0x46];
1173/// let config = EvaluationConfig::default();
1174///
1175/// let matches = evaluate_rules_with_config(&rules, buffer, &config).unwrap();
1176/// assert_eq!(matches.len(), 1);
1177/// assert_eq!(matches[0].message, "ELF magic");
1178/// ```
1179///
1180/// # Errors
1181///
1182/// * `LibmagicError::EvaluationError` - If rule evaluation fails
1183/// * `LibmagicError::Timeout` - If evaluation exceeds configured timeout
1184pub fn evaluate_rules_with_config(
1185 rules: &[MagicRule],
1186 buffer: &[u8],
1187 config: &EvaluationConfig,
1188) -> Result<Vec<RuleMatch>, LibmagicError> {
1189 // Validate the configuration before constructing a context so that
1190 // out-of-range values (e.g. zero recursion depth, excessive timeouts)
1191 // are rejected at the API boundary rather than triggering subtle
1192 // failures during evaluation.
1193 config.validate()?;
1194 // Diagnostic guard: `evaluate_rules_with_config` builds a context
1195 // without an attached `RuleEnvironment`, which means any
1196 // `MetaType::Indirect` rule reached during evaluation is silently
1197 // no-op'd at runtime. That is the intentional behavior for low-level
1198 // callers (matching the `Use`-without-env contract), but we surface
1199 // the misconfiguration at `warn!` level (once per process) so a
1200 // consumer who wires up env-less `indirect` rules will see the
1201 // diagnostic in default logging rather than only at debug level.
1202 // The tree walk runs only in debug builds -- in release builds the
1203 // `cfg(debug_assertions)` gate prevents the O(n) scan on every
1204 // top-level evaluation. Using `debug_assert!` would panic in test
1205 // builds and break the "evaluator never panics" invariant documented
1206 // in GOTCHAS S2.4 -- a misconfigured caller should get a no-op with
1207 // a log entry, not a crash.
1208 #[cfg(debug_assertions)]
1209 if contains_indirect_rule(rules)
1210 && !INDIRECT_WITHOUT_RULE_ENV_WARNED.swap(true, Ordering::Relaxed)
1211 {
1212 warn!(
1213 "{} (subsequent occurrences suppressed)",
1214 crate::error::EvaluationError::indirect_without_environment()
1215 );
1216 }
1217 // Clear the thread-local regex compile cache so it is bounded to
1218 // the lifetime of a single top-level evaluation call. Cache
1219 // entries from a previous rule set would otherwise persist on the
1220 // current thread until process exit. See
1221 // `evaluator::types::regex::reset_regex_cache` for rationale.
1222 crate::evaluator::types::regex::reset_regex_cache();
1223 let mut context = EvaluationContext::new(config.clone());
1224 evaluate_rules(rules, buffer, &mut context)
1225}
1226
1227/// Recursively walk `rules` (including children) looking for any
1228/// [`MetaType::Indirect`] directive.
1229///
1230/// Used by the diagnostic guard in [`evaluate_rules_with_config`]: the
1231/// low-level `_with_config` entry point builds a context without a
1232/// [`crate::evaluator::RuleEnvironment`], so any `indirect` rule is
1233/// silently no-op'd at runtime. The check logs the misconfiguration at
1234/// `debug!` level so consumer tests can detect it without panicking (see
1235/// GOTCHAS S2.4 for why `debug_assert!` would be wrong here).
1236fn contains_indirect_rule(rules: &[MagicRule]) -> bool {
1237 rules.iter().any(|rule| {
1238 matches!(rule.typ, TypeKind::Meta(MetaType::Indirect))
1239 || contains_indirect_rule(&rule.children)
1240 })
1241}
1242
1243#[cfg(test)]
1244mod tests;