Skip to main content

libmagic_rs/evaluator/engine/
mod.rs

1// Copyright (c) 2025-2026 the libmagic-rs contributors
2// SPDX-License-Identifier: Apache-2.0
3
4//! Core evaluation engine for magic rules.
5//!
6//! This module contains the core recursive evaluation logic for executing magic
7//! rules against file buffers. It is responsible for:
8//! - Evaluating a single rule via [`evaluate_single_rule`] (a thin wrapper
9//!   around `evaluate_rules` that delegates one rule through the full
10//!   context-aware pipeline)
11//! - Evaluating hierarchical rule sets with context (`evaluate_rules`)
12//! - Providing a convenience wrapper for evaluation with configuration
13//!   (`evaluate_rules_with_config`)
14
15use crate::parser::ast::{MagicRule, MetaType, TypeKind};
16use crate::{EvaluationConfig, LibmagicError};
17
18use super::{EvaluationContext, RecursionGuard, RuleMatch, offset, operators, types};
19use log::{debug, warn};
20use std::sync::atomic::{AtomicBool, Ordering};
21
22/// RAII guard that saves the GNU `file` previous-match anchor **and**
23/// `base_offset` on entry and restores both on drop.
24///
25/// `MetaType::Indirect` re-evaluates the root rule list at the resolved
26/// offset. The re-entered rules are top-level-semantic (`base_offset=0`)
27/// and must start with a fresh anchor (the resolved indirect offset).
28/// When `indirect` fires inside a `MetaType::Use` subroutine, the outer
29/// subroutine's non-zero `base_offset` would otherwise leak into the
30/// root re-entry, causing every positive absolute offset in the re-entered
31/// database to be biased by the outer use-site -- producing reads at the
32/// wrong positions. Saving and restoring `base_offset` here prevents that.
33///
34/// Without an RAII wrapper, every early-return path inside the indirect
35/// branch would have to remember to restore both fields manually.
36struct AnchorScope<'a> {
37    context: &'a mut EvaluationContext,
38    saved_anchor: usize,
39    saved_base: usize,
40}
41
42impl<'a> AnchorScope<'a> {
43    /// Save the current anchor and `base_offset`, then seed the context
44    /// with `new_anchor` and reset `base_offset` to 0.
45    fn enter(context: &'a mut EvaluationContext, new_anchor: usize) -> Self {
46        let saved_anchor = context.last_match_end();
47        let saved_base = context.base_offset();
48        context.set_last_match_end(new_anchor);
49        context.set_base_offset(0);
50        Self {
51            context,
52            saved_anchor,
53            saved_base,
54        }
55    }
56
57    /// Access the underlying context for the duration of the guard.
58    fn context(&mut self) -> &mut EvaluationContext {
59        self.context
60    }
61}
62
63impl Drop for AnchorScope<'_> {
64    fn drop(&mut self) {
65        self.context.set_last_match_end(self.saved_anchor);
66        self.context.set_base_offset(self.saved_base);
67    }
68}
69
70/// RAII guard for `MetaType::Use` subroutine dispatch.
71///
72/// Saves `last_match_end` and `base_offset` on entry, seeds the context
73/// with the use-site offset (for both fields so that a subroutine's
74/// `&0` relative offset resolves to the use-site and its positive
75/// absolute offsets bias against the use-site per magic(5)), and
76/// restores both on drop.
77///
78/// This is the safety net for early-return paths inside
79/// `evaluate_use_rule`: a `RecursionGuard::enter` failure or a
80/// `Timeout`/`RecursionLimitExceeded` inside the subroutine body would
81/// otherwise leave the caller's context with corrupted anchor and
82/// base-offset state. The guard's `Drop` impl restores both fields on
83/// every exit path, error or success.
84struct SubroutineScope<'a> {
85    context: &'a mut EvaluationContext,
86    saved_anchor: usize,
87    saved_base: usize,
88}
89
90impl<'a> SubroutineScope<'a> {
91    fn enter(context: &'a mut EvaluationContext, use_site: usize) -> Self {
92        let saved_anchor = context.last_match_end();
93        let saved_base = context.base_offset();
94        context.set_last_match_end(use_site);
95        context.set_base_offset(use_site);
96        Self {
97            context,
98            saved_anchor,
99            saved_base,
100        }
101    }
102
103    fn context(&mut self) -> &mut EvaluationContext {
104        self.context
105    }
106}
107
108impl Drop for SubroutineScope<'_> {
109    fn drop(&mut self) {
110        self.context.set_last_match_end(self.saved_anchor);
111        self.context.set_base_offset(self.saved_base);
112    }
113}
114
115/// Process-local once guard for the "use directive without rule environment"
116/// warning. Ensures we surface the misconfiguration exactly once per process
117/// so low-level programmatic consumers of [`evaluate_rules`] (tests, fuzz
118/// harnesses) that intentionally run without a `MagicDatabase`-attached
119/// environment do not flood the log on every `Use` rule they encounter.
120static USE_WITHOUT_RULE_ENV_WARNED: AtomicBool = AtomicBool::new(false);
121
122/// Process-local once guard for the "`evaluate_rules_with_config` called
123/// with an `indirect` rule but without a `RuleEnvironment`" warning.
124/// Same rationale as `USE_WITHOUT_RULE_ENV_WARNED`: surface the
125/// misconfiguration exactly once per process so a large corpus of
126/// env-less `indirect` rules does not flood the log.
127static INDIRECT_WITHOUT_RULE_ENV_WARNED: AtomicBool = AtomicBool::new(false);
128
129/// Evaluate a single magic rule against a file buffer
130///
131/// This is a thin wrapper around [`evaluate_rules`] that evaluates exactly
132/// one top-level rule (and any of its children) against a buffer, using the
133/// caller-provided [`EvaluationContext`] to enforce timeout, recursion, and
134/// string-size limits. It is a BREAKING API change introduced in pre-1.0:
135/// earlier versions took no context and returned `Option<(usize, Value)>`.
136///
137/// # Arguments
138///
139/// * `rule` - The magic rule to evaluate
140/// * `buffer` - The file buffer to evaluate against
141/// * `context` - Mutable evaluation context that carries the configured
142///   safety limits (timeout, max recursion depth, max string length) and
143///   the GNU `file` previous-match anchor used for relative-offset
144///   resolution. Callers reusing a context across multiple buffers must
145///   call [`EvaluationContext::reset`](crate::evaluator::EvaluationContext::reset)
146///   between calls -- see [`evaluate_rules`] for details.
147///
148/// # Returns
149///
150/// Returns `Ok(Vec<RuleMatch>)` containing the parent match (if the rule
151/// matched) plus any child matches collected recursively. An empty vector
152/// means the rule did not match or was skipped due to a data-dependent
153/// evaluation error (buffer overrun, invalid offset, etc.). Only critical
154/// failures such as `LibmagicError::Timeout` or recursion-limit exhaustion
155/// are returned as `Err`.
156///
157/// # Examples
158///
159/// ```rust
160/// use libmagic_rs::evaluator::{evaluate_single_rule, EvaluationContext};
161/// use libmagic_rs::EvaluationConfig;
162/// use libmagic_rs::parser::ast::{MagicRule, OffsetSpec, TypeKind, Operator, Value};
163///
164/// // Create a rule to check for ELF magic bytes at offset 0
165/// let rule = MagicRule {
166///     offset: OffsetSpec::Absolute(0),
167///     typ: TypeKind::Byte { signed: true },
168///     op: Operator::Equal,
169///     value: Value::Uint(0x7f),
170///     message: "ELF magic".to_string(),
171///     children: vec![],
172///     level: 0,
173///     strength_modifier: None,
174/// value_transform: None,
175/// };
176///
177/// let mut context = EvaluationContext::new(EvaluationConfig::default());
178/// let elf_buffer = &[0x7f, 0x45, 0x4c, 0x46]; // ELF magic bytes
179/// let matches = evaluate_single_rule(&rule, elf_buffer, &mut context).unwrap();
180/// assert_eq!(matches.len(), 1); // Should match
181///
182/// context.reset();
183/// let non_elf_buffer = &[0x50, 0x4b, 0x03, 0x04]; // ZIP magic bytes
184/// let matches = evaluate_single_rule(&rule, non_elf_buffer, &mut context).unwrap();
185/// assert!(matches.is_empty()); // Should not match
186/// ```
187///
188/// # Errors
189///
190/// * `LibmagicError::Timeout` - If evaluation exceeds the configured timeout
191/// * `LibmagicError::EvaluationError` - For critical failures such as the
192///   recursion limit being exceeded. Data-dependent errors (buffer overrun,
193///   invalid offset, malformed pstring length) are handled gracefully by
194///   [`evaluate_rules`] and surface as an empty match vector rather than
195///   an error.
196pub fn evaluate_single_rule(
197    rule: &MagicRule,
198    buffer: &[u8],
199    context: &mut EvaluationContext,
200) -> Result<Vec<RuleMatch>, LibmagicError> {
201    evaluate_rules(std::slice::from_ref(rule), buffer, context)
202}
203
204/// Internal: evaluate a single rule against a buffer, supplying an explicit
205/// anchor for relative-offset resolution.
206///
207/// This is the worker behind both [`evaluate_single_rule`] (which defaults
208/// the anchor to 0) and [`evaluate_rules`] (which threads the anchor from
209/// `EvaluationContext::last_match_end()`).
210fn evaluate_single_rule_with_anchor(
211    rule: &MagicRule,
212    buffer: &[u8],
213    last_match_end: usize,
214    base_offset: usize,
215) -> Result<Option<(usize, crate::parser::ast::Value)>, LibmagicError> {
216    use crate::parser::ast::TypeKind;
217
218    // Step 1: Resolve the offset specification to an absolute position.
219    // `base_offset` is non-zero only inside a `MetaType::Use` subroutine
220    // body, where it biases positive absolute offsets to the use-site.
221    let absolute_offset =
222        offset::resolve_offset_with_base(&rule.offset, buffer, last_match_end, base_offset)?;
223
224    // Step 2 & 3: Dispatch on type category. Pattern-bearing types
225    // (Regex, Search) take a different path from fixed-width types
226    // because the rule's `value` operand is the *pattern*, not an
227    // expected matched value. Running those through `apply_operator`
228    // would compare matched text ("123") against the pattern literal
229    // ("[0-9]+") and produce false negatives on any regex with
230    // metacharacters.
231    //
232    // Meta-type directives (`default`, `clear`, `name`, `use`,
233    // `indirect`, `offset`) are dispatched by `evaluate_rules` at the
234    // outer loop level (not here) -- this single-rule helper is only
235    // invoked for non-meta rules. Short-circuiting the Meta arms here
236    // with `Ok(None)` is defense-in-depth for programmatic callers
237    // (property tests, fuzz harnesses) that hand-build a Meta rule
238    // and feed it directly to `evaluate_single_rule`; without the
239    // guard, the value/pattern paths would surface
240    // `TypeReadError::UnsupportedType`.
241    let (matched, read_value) = match &rule.typ {
242        TypeKind::Meta(MetaType::Name(name)) => {
243            // `Name` rules are normally hoisted into the name table at
244            // parse time and should not reach the evaluator. Programmatic
245            // consumers (e.g. fuzz harnesses, property tests) can still
246            // construct them directly; treat that as a no-op rather than
247            // a hard failure so the evaluator-never-panics invariant is
248            // preserved.
249            debug!(
250                "Name rule '{name}' reached evaluator (likely bypassed name-table extraction); treating as no-op"
251            );
252            return Ok(None);
253        }
254        TypeKind::Meta(MetaType::Use(_)) => {
255            // `Use` is dispatched inline by `evaluate_rules` so it can
256            // push the subroutine's matches into the caller's match
257            // vector. Reaching this arm means the rule went through the
258            // single-rule path (e.g. via `evaluate_single_rule`) which
259            // lacks that wiring; treat it as a silent no-op.
260            return Ok(None);
261        }
262        TypeKind::Meta(_) => return Ok(None),
263        TypeKind::Regex { .. } | TypeKind::Search { .. } => {
264            evaluate_pattern_rule(rule, buffer, absolute_offset)?
265        }
266        // Flagged `string` rules route through the pattern-bearing path
267        // (see GOTCHAS S2.4 for the contract) so `compare_string_with_flags`
268        // can do the case-fold / whitespace-flexible match in one pass.
269        // Default-flag strings (the common case) take the existing
270        // value-rule fast path with byte-exact `apply_equal`.
271        TypeKind::String { flags, .. } if !flags.is_empty() => {
272            evaluate_pattern_rule(rule, buffer, absolute_offset)?
273        }
274        _ => evaluate_value_rule(rule, buffer, absolute_offset)?,
275    };
276    Ok(matched.then_some((absolute_offset, read_value)))
277}
278
279/// Evaluate a `TypeKind::Meta(MetaType::Use(name))` rule inline.
280///
281/// Looks up `name` in the context's rule environment, temporarily sets the
282/// GNU `file` previous-match anchor to the resolved offset, and recursively
283/// evaluates the subroutine's rules against `buffer`. Any matches produced
284/// by the subroutine are returned in document order and are intended to be
285/// pushed into the caller's match vector *before* the synthetic `Use` match
286/// itself (matching GNU `file` behavior where a `use` site is replaced by
287/// its expansion in the output).
288///
289/// Returns `Ok((Some(absolute_offset), matches))` on a successful resolution
290/// (even if the subroutine produced no matches), or `Ok((None, vec![]))`
291/// when:
292/// - the context has no rule environment attached (programmatic consumers
293///   bypassing `MagicDatabase`)
294/// - the referenced name is not in the table (logged at warn level)
295///
296/// Recursion-limit propagation is handled via [`RecursionGuard`] so that a
297/// subroutine calling `use` on itself triggers `RecursionLimitExceeded`
298/// instead of a stack overflow.
299fn evaluate_use_rule(
300    rule: &MagicRule,
301    name: &str,
302    buffer: &[u8],
303    context: &mut EvaluationContext,
304) -> Result<(Option<usize>, Vec<RuleMatch>), LibmagicError> {
305    let Some(env) = context.rule_env() else {
306        // Surface the misconfiguration once per process at warn! level so
307        // it is visible in default logging, then gate subsequent hits so a
308        // magic file with many `use` directives does not flood the log.
309        // Use `Ordering::Relaxed`: the flag is an idempotent diagnostic
310        // latch, not a synchronization primitive guarding other state.
311        if USE_WITHOUT_RULE_ENV_WARNED.swap(true, Ordering::Relaxed) {
312            debug!("use directive '{name}' evaluated without a rule environment; no-op");
313        } else {
314            warn!(
315                "use directive '{name}' evaluated without a rule environment; treating as no-op (subsequent occurrences suppressed)"
316            );
317        }
318        return Ok((None, Vec::new()));
319    };
320
321    let Some(subroutine_rules) = env.name_table.get(name) else {
322        warn!("use directive references unknown name '{name}'");
323        return Ok((None, Vec::new()));
324    };
325    // `NameTable::get` returns an `Arc<[MagicRule]>`, so this clone is a
326    // reference-count increment rather than a deep copy of the rule tree.
327    // The Arc is cloned here to release the immutable borrow of `context`
328    // (via `env`) before we mutably borrow the context below.
329
330    // Resolve the use-site offset under the *caller's* base, not the
331    // subroutine's -- the use rule itself is in the caller's scope.
332    let absolute_offset = offset::resolve_offset_with_base(
333        &rule.offset,
334        buffer,
335        context.last_match_end(),
336        context.base_offset(),
337    )?;
338
339    // `SubroutineScope` seeds `last_match_end` and `base_offset` with
340    // the use-site offset and restores both on drop. This is the
341    // safety net for early-return paths below -- if
342    // `RecursionGuard::enter` or the inner `evaluate_rules` returns
343    // `Err(Timeout)` / `Err(RecursionLimitExceeded)`, the `?` unwinds
344    // through the guard's `Drop` impl and the caller's context
345    // returns to its pre-use state. Without the RAII wrapper a manual
346    // save/restore pair would be bypassed on every error path.
347    // Capture both the subroutine's matches AND the terminal anchor
348    // where the subroutine left `last_match_end`. The terminal anchor
349    // is what GNU `file`-compatible inlining semantics require: sibling
350    // rules after the `use` site must resolve `&N` against the position
351    // the subroutine reached, not the use-site offset. Reading the
352    // anchor INSIDE the scope (before Drop restores the caller's value)
353    // preserves it for the caller.
354    let (subroutine_matches, terminal_anchor) = {
355        let mut scope = SubroutineScope::enter(context, absolute_offset);
356        let mut guard = RecursionGuard::enter(scope.context())?;
357        let matches = evaluate_rules(&subroutine_rules, buffer, guard.context())?;
358        let terminal = guard.context().last_match_end();
359        (matches, terminal)
360    };
361
362    Ok((Some(terminal_anchor), subroutine_matches))
363}
364
365/// Evaluate a pattern-bearing rule (`TypeKind::Regex` / `TypeKind::Search`).
366///
367/// `read_pattern_match` returns `Some(value)` on a successful match
368/// (possibly zero-width, e.g., `a*`) and `None` on a genuine miss; the
369/// engine translates those directly into `Equal`/`NotEqual`. Any other
370/// operator on a pattern-bearing type is a magic-file semantic bug and
371/// surfaces as [`TypeReadError::UnsupportedType`] -- the earlier
372/// fallthrough to `apply_operator` masked this by producing nonsense
373/// ordering comparisons against the pattern source text.
374///
375/// On a miss we return `Value::String(String::new())` as a display
376/// placeholder; the engine has already decided `matched = false` by
377/// then, so the placeholder only affects display and
378/// `bytes_consumed_with_pattern` (which re-derives the match position
379/// from the pattern, not this value).
380fn evaluate_pattern_rule(
381    rule: &MagicRule,
382    buffer: &[u8],
383    absolute_offset: usize,
384) -> Result<(bool, crate::parser::ast::Value), LibmagicError> {
385    let match_outcome =
386        types::read_pattern_match(buffer, absolute_offset, &rule.typ, Some(&rule.value))
387            .map_err(|e| LibmagicError::EvaluationError(e.into()))?;
388    let pattern_found = match_outcome.is_some();
389    let matched = match &rule.op {
390        crate::parser::ast::Operator::Equal => pattern_found,
391        crate::parser::ast::Operator::NotEqual => !pattern_found,
392        other => {
393            return Err(LibmagicError::EvaluationError(
394                types::TypeReadError::UnsupportedType {
395                    type_name: format!(
396                        "operator {other:?} is not supported for pattern-bearing type {:?}; only Equal (=) and NotEqual (!=) are allowed",
397                        rule.typ
398                    ),
399                }
400                .into(),
401            ));
402        }
403    };
404    let value = match_outcome.unwrap_or_else(|| crate::parser::ast::Value::String(String::new()));
405    Ok((matched, value))
406}
407
408/// Evaluate a value-based rule (all non-pattern-bearing `TypeKind` variants).
409///
410/// Reads the typed value at `absolute_offset`, coerces the rule's
411/// expected value to the target type's signedness/width (zero-copy via
412/// `Cow::Borrowed` on the hot path), and applies the operator.
413/// `BitwiseNot` needs type-aware width masking so the complement is
414/// computed at the type's natural width (e.g. byte `NOT 0x00 = 0xFF`,
415/// not `u64::MAX`).
416fn evaluate_value_rule(
417    rule: &MagicRule,
418    buffer: &[u8],
419    absolute_offset: usize,
420) -> Result<(bool, crate::parser::ast::Value), LibmagicError> {
421    let read_value =
422        types::read_typed_value_with_pattern(buffer, absolute_offset, &rule.typ, Some(&rule.value))
423            .map_err(|e| LibmagicError::EvaluationError(e.into()))?;
424
425    // Apply any pre-comparison value transform (`type+N`/`type-N`/`type*N`/
426    // `type/N`/`type%N`/`type|N`/`type^N`). The transform runs on the read
427    // value before the comparison operator and before printf-style format
428    // substitution, so `%d` in the message renders the post-transform
429    // number. `&MASK` is *not* handled here -- it lives at the operator
430    // layer via `Operator::BitwiseAndMask`.
431    let transformed_value = match rule.value_transform {
432        None => read_value,
433        Some(t) => operators::apply_value_transform(&read_value, t)
434            .map_err(LibmagicError::EvaluationError)?,
435    };
436
437    let expected_value = types::coerce_value_to_type(&rule.value, &rule.typ);
438    let expected_ref: &crate::parser::ast::Value = expected_value.as_ref();
439
440    let matched = match &rule.op {
441        crate::parser::ast::Operator::BitwiseNot => operators::apply_bitwise_not_with_width(
442            &transformed_value,
443            expected_ref,
444            rule.typ.bit_width(),
445        ),
446        op => operators::apply_operator(op, &transformed_value, expected_ref),
447    };
448    Ok((matched, transformed_value))
449}
450
451/// Evaluate a rule's children under the standard recursion-guard/graceful-skip discipline.
452///
453/// This helper centralises the `RecursionGuard` + `evaluate_rules` + error-dispatch
454/// pattern that is identical across the `Default`, `Indirect`, `Offset`, and `Use`
455/// meta-type arms in [`evaluate_rules`]. Extracting it prevents the four copies
456/// from drifting apart during future maintenance.
457///
458/// # Behaviour
459///
460/// * If `rule.children` is empty the function is a no-op (returns `Ok(())`).
461/// * Child matches are appended to `matches` in document order.
462/// * `LibmagicError::Timeout` and `LibmagicError::EvaluationError(RecursionLimitExceeded)`
463///   propagate immediately as `Err` so the caller can bail out.
464/// * Data-dependent errors (`BufferOverrun`, `InvalidOffset`,
465///   `TypeReadError::BufferOverrun`, `TypeReadError::InvalidPStringLength`,
466///   `IoError`) are logged at `warn!` and swallowed; the parent match
467///   already in `matches` is left intact. This mirrors the defensive
468///   comment in each arm: the inner `evaluate_rules` already catches and
469///   logs individual child failures, so this arm only fires if that
470///   strategy changes.
471///
472/// # Arguments
473///
474/// * `rule`      – The parent rule whose children will be evaluated.
475/// * `rule_kind` – A short label for the rule kind used in the `warn!`
476///   message (e.g. `"default"`, `"indirect"`, `"offset"`, `"use"`).
477/// * `buffer`    – The file buffer passed to the recursive call.
478/// * `context`   – Mutable evaluation context; the recursion depth is
479///   incremented on entry and decremented on drop via [`RecursionGuard`].
480/// * `matches`   – Output vector; child matches are appended here.
481fn evaluate_children_or_warn(
482    rule: &MagicRule,
483    rule_kind: &str,
484    buffer: &[u8],
485    context: &mut EvaluationContext,
486    matches: &mut Vec<RuleMatch>,
487) -> Result<(), LibmagicError> {
488    if rule.children.is_empty() {
489        return Ok(());
490    }
491    let mut guard = RecursionGuard::enter(context)?;
492    match evaluate_rules(&rule.children, buffer, guard.context()) {
493        Ok(child_matches) => {
494            matches.extend(child_matches);
495        }
496        Err(LibmagicError::Timeout { timeout_ms }) => {
497            return Err(LibmagicError::Timeout { timeout_ms });
498        }
499        // `RecursionLimitExceeded` is listed explicitly (rather than
500        // relying on the catch-all below) so a future maintainer adding
501        // another swallowed variant cannot accidentally swallow it.
502        // Both this arm and the catch-all intentionally propagate via
503        // `return Err(e)`; `match_same_arms` is suppressed because the
504        // explicit arm's purpose is documentation and future-proofing,
505        // not different behavior. See GOTCHAS S13 for the recursion-
506        // depth guard contract.
507        #[allow(clippy::match_same_arms)]
508        Err(
509            e @ LibmagicError::EvaluationError(
510                crate::error::EvaluationError::RecursionLimitExceeded { .. },
511            ),
512        ) => return Err(e),
513        Err(
514            e @ (LibmagicError::EvaluationError(
515                crate::error::EvaluationError::BufferOverrun { .. }
516                | crate::error::EvaluationError::InvalidOffset { .. }
517                | crate::error::EvaluationError::InvalidValueTransform { .. }
518                | crate::error::EvaluationError::TypeReadError(
519                    crate::evaluator::types::TypeReadError::BufferOverrun { .. }
520                    | crate::evaluator::types::TypeReadError::InvalidPStringLength { .. },
521                ),
522            )
523            | LibmagicError::IoError(_)),
524        ) => {
525            warn!(
526                "Discarding child evaluation under {} rule '{}' due to unexpected error: {} -- parent match is still emitted",
527                rule_kind, rule.message, e
528            );
529        }
530        Err(e) => return Err(e),
531    }
532    Ok(())
533}
534
535/// Evaluate a list of magic rules against a file buffer with hierarchical processing
536///
537/// This function implements the core hierarchical rule evaluation algorithm with graceful
538/// error handling:
539/// 1. Evaluates each top-level rule in sequence
540/// 2. If a parent rule matches, evaluates its child rules for refinement
541/// 3. Collects all matches or stops at first match based on configuration
542/// 4. Maintains evaluation context for recursion limits and state
543/// 5. Implements graceful degradation by skipping problematic rules and continuing evaluation
544///
545/// The hierarchical evaluation follows these principles:
546/// - Parent rules must match before children are evaluated
547/// - Child rules provide refinement and additional detail
548/// - Evaluation can stop at first match or continue for all matches
549/// - Recursion depth is limited to prevent infinite loops
550/// - Problematic rules are skipped to allow evaluation to continue
551///
552/// # Arguments
553///
554/// * `rules` - The list of magic rules to evaluate
555/// * `buffer` - The file buffer to evaluate against
556/// * `context` - Mutable evaluation context for state management. **Callers
557///   reusing a context across multiple buffers must call
558///   [`EvaluationContext::reset`](crate::evaluator::EvaluationContext::reset)
559///   between calls** -- the GNU `file` previous-match anchor and the
560///   recursion-depth counter both advance during evaluation and would
561///   otherwise leak across buffers. The same applies when this function
562///   returns `Err` mid-evaluation (e.g., `LibmagicError::Timeout` or
563///   `RecursionLimitExceeded`): both the anchor and (potentially) the
564///   recursion depth are left in a partially-advanced state, and a retry
565///   on the same context without `reset()` will resolve relative offsets
566///   against the stale anchor and apply the wrong recursion budget.
567///   [`evaluate_rules_with_config`] always builds a fresh context and is the
568///   safer choice when context reuse isn't required.
569///
570/// # Returns
571///
572/// Returns `Ok(Vec<RuleMatch>)` containing all matches found. Errors in individual rules
573/// are skipped to allow evaluation to continue. Only returns `Err(LibmagicError)`
574/// for critical failures like timeout or recursion limit exceeded.
575///
576/// # Examples
577///
578/// ```rust
579/// use libmagic_rs::evaluator::{evaluate_rules, EvaluationContext, RuleMatch};
580/// use libmagic_rs::parser::ast::{MagicRule, OffsetSpec, TypeKind, Operator, Value};
581/// use libmagic_rs::EvaluationConfig;
582///
583/// // Create a hierarchical rule set for ELF files
584/// let parent_rule = MagicRule {
585///     offset: OffsetSpec::Absolute(0),
586///     typ: TypeKind::Byte { signed: true },
587///     op: Operator::Equal,
588///     value: Value::Uint(0x7f),
589///     message: "ELF".to_string(),
590///     children: vec![
591///         MagicRule {
592///             offset: OffsetSpec::Absolute(4),
593///             typ: TypeKind::Byte { signed: true },
594///             op: Operator::Equal,
595///             value: Value::Uint(2),
596///             message: "64-bit".to_string(),
597///             children: vec![],
598///             level: 1,
599///             strength_modifier: None,
600///         value_transform: None,
601///         }
602///     ],
603///     level: 0,
604///     strength_modifier: None,
605/// value_transform: None,
606/// };
607///
608/// let rules = vec![parent_rule];
609/// let buffer = &[0x7f, 0x45, 0x4c, 0x46, 0x02, 0x01]; // ELF64 header
610/// let config = EvaluationConfig::default();
611/// let mut context = EvaluationContext::new(config);
612///
613/// let matches = evaluate_rules(&rules, buffer, &mut context).unwrap();
614/// assert_eq!(matches.len(), 2); // Parent and child should both match
615/// ```
616///
617/// # Errors
618///
619/// * `LibmagicError::Timeout` - If evaluation exceeds configured timeout
620/// * `LibmagicError::EvaluationError` - Only for critical failures like recursion limit exceeded
621///
622/// Individual rule evaluation errors are handled gracefully and do not stop the overall evaluation.
623#[allow(clippy::too_many_lines)]
624pub fn evaluate_rules(
625    rules: &[MagicRule],
626    buffer: &[u8],
627    context: &mut EvaluationContext,
628) -> Result<Vec<RuleMatch>, LibmagicError> {
629    let mut matches = Vec::with_capacity(8);
630    let start_time = std::time::Instant::now();
631    let mut rule_count = 0u32;
632
633    // Per-level "did any sibling match yet?" flag for `default`/`clear`
634    // dispatch. Each recursive descent gets its own fresh flag, so child
635    // sibling chains track their own state independently of the parent.
636    let mut sibling_matched: bool = false;
637
638    // Per-level entry anchor: captured at the start of this sibling list's
639    // evaluation. For CHILD sibling lists (recursion_depth > 0), the
640    // GNU `file`/libmagic previous-match anchor is reset to this value
641    // between sibling iterations so that `&N` offsets on continuation
642    // siblings resolve against the parent-level anchor, not against
643    // whatever the *previous sibling* left the anchor at. This matches
644    // libmagic's continuation-level model (`ms->c.li[cont_level]`)
645    // where each level tracks its own anchor; a sibling at level L does
646    // not inherit the post-match anchor of another sibling at level L.
647    //
648    // TOP-LEVEL siblings (recursion_depth == 0) are independent
649    // classification attempts -- each top-level rule intentionally sees
650    // the anchor advance that prior top-level rules produced (see
651    // GOTCHAS S3.8 and the `relative_anchor_can_decrease_...`
652    // integration test). Gate the reset on recursion_depth to preserve
653    // that documented discipline while still fixing the continuation-
654    // sibling behavior that the GNU `file` `searchbug.magic` fixture
655    // relies on.
656    //
657    // Recursing into a matched rule's children still carries forward the
658    // post-match anchor (via the current value of `last_match_end()` at
659    // the point of recursion), so child sibling lists see their parent's
660    // resolved position as their own entry anchor.
661    //
662    // INDIRECT RE-ENTRY exception: `MetaType::Indirect` dispatches its
663    // sub-evaluation via `RecursionGuard::enter` (to bound the recursion
664    // cycle), which forces `recursion_depth > 0`. But an indirect
665    // re-entry semantically evaluates the root rule list with TOP-LEVEL
666    // sibling semantics -- each rule is an independent classification
667    // attempt against the re-entered sub-buffer, NOT a continuation
668    // list. The indirect dispatch sets `context.set_indirect_reentry(true)`
669    // just before this call; `take_indirect_reentry()` consumes it at
670    // entry so only this iteration treats siblings as top-level.
671    // Children of matched rules inside the re-entry still see the flag
672    // as false (consumed) and correctly fall back to continuation
673    // semantics via `recursion_depth > 0`.
674    let entry_anchor = context.last_match_end();
675    let is_indirect_reentry = context.take_indirect_reentry();
676    let is_child_sibling_list = context.recursion_depth() > 0 && !is_indirect_reentry;
677
678    // Entry-point timeout check: ensures every recursive descent is bounded
679    // and that evaluations of small rule sets (< 16 rules) are still guarded.
680    // Without this, the periodic every-16-rules check below never fires for
681    // flat rule lists with fewer than 16 rules, and recursion into children
682    // also restarts `rule_count` at 0.
683    if let Some(timeout_ms) = context.timeout_ms()
684        && start_time.elapsed().as_millis() >= u128::from(timeout_ms)
685    {
686        return Err(LibmagicError::Timeout { timeout_ms });
687    }
688
689    for rule in rules {
690        // For continuation siblings (child recursion), reset the
691        // previous-match anchor to the entry anchor so `&N` offsets
692        // resolve against the parent-level position. Top-level
693        // siblings (depth 0) keep the chaining behavior documented in
694        // GOTCHAS S3.8. See the `entry_anchor` comment above.
695        if is_child_sibling_list {
696            context.set_last_match_end(entry_anchor);
697        }
698
699        // Check timeout periodically (every 16 rules) to reduce syscall overhead
700        rule_count = rule_count.wrapping_add(1);
701        if rule_count.trailing_zeros() >= 4
702            && let Some(timeout_ms) = context.timeout_ms()
703            && start_time.elapsed().as_millis() >= u128::from(timeout_ms)
704        {
705            return Err(LibmagicError::Timeout { timeout_ms });
706        }
707
708        // `Clear` resets the per-level "sibling matched" flag so a
709        // subsequent `default` sibling can fire even if an earlier
710        // sibling matched. It does not produce a match, evaluate
711        // children, or advance the anchor.
712        if let TypeKind::Meta(MetaType::Clear) = &rule.typ {
713            sibling_matched = false;
714            continue;
715        }
716
717        // `Default` fires only when no earlier sibling at this level has
718        // matched yet. The anchor is intentionally not advanced -- the
719        // directive does not consume bytes -- but its children are
720        // evaluated and the per-level "sibling matched" flag is set so
721        // any later `default` sibling at the same level is suppressed.
722        if let TypeKind::Meta(MetaType::Default) = &rule.typ {
723            if !sibling_matched {
724                let matches_before = matches.len();
725
726                let match_result = RuleMatch {
727                    message: rule.message.clone(),
728                    offset: context.last_match_end(),
729                    level: rule.level,
730                    value: crate::parser::ast::Value::Uint(0),
731                    type_kind: rule.typ.clone(),
732                    confidence: RuleMatch::calculate_confidence(rule.level),
733                };
734                matches.push(match_result);
735
736                // `default` is treated as a successful match at this
737                // level, so its children are evaluated under the same
738                // recursion-guard pattern as every other successful rule.
739                evaluate_children_or_warn(rule, "default", buffer, context, &mut matches)?;
740
741                sibling_matched = true;
742
743                if matches.len() > matches_before && context.should_stop_at_first_match() {
744                    break;
745                }
746            }
747            continue;
748        }
749
750        // `Indirect` re-evaluates the root rule list at the resolved
751        // offset, mirroring libmagic's indirect-type semantics. The
752        // sub-evaluation runs against `buffer[absolute_offset..]` with a
753        // fresh anchor (0) so relative offsets inside the root rules
754        // resolve correctly; the caller's anchor is restored on exit
755        // via `AnchorScope`. Without an attached `RuleEnvironment`
756        // (programmatic consumers bypassing `MagicDatabase`) the
757        // directive is a silent no-op.
758        if let TypeKind::Meta(MetaType::Indirect) = &rule.typ {
759            // Resolve the offset first so a malformed offset surfaces
760            // as a graceful skip rather than a hard error.
761            let absolute_offset = match offset::resolve_offset_with_base(
762                &rule.offset,
763                buffer,
764                context.last_match_end(),
765                context.base_offset(),
766            ) {
767                Ok(o) => o,
768                Err(
769                    e @ LibmagicError::EvaluationError(
770                        crate::error::EvaluationError::BufferOverrun { .. }
771                        | crate::error::EvaluationError::InvalidOffset { .. },
772                    ),
773                ) => {
774                    debug!("Skipping indirect rule '{}': {}", rule.message, e);
775                    continue;
776                }
777                Err(e) => return Err(e),
778            };
779
780            // Pull the root rules out of the rule environment. Without
781            // an environment there is nothing to re-enter, so this is a
782            // silent no-op (matching the `Use`-without-env behavior).
783            //
784            // We use `debug!` rather than `debug_assert!` here because
785            // property tests (`prop_arbitrary_rule_evaluation_never_panics`)
786            // synthesize arbitrary `TypeKind::Meta(MetaType::Indirect)`
787            // rules and run them without attaching a `RuleEnvironment`;
788            // a panic on this path would break the never-panics invariant.
789            // See GOTCHAS S2.1 for the same rationale on the leaked-Name arm.
790            let Some(root_rules) = context.rule_env().map(|e| e.root_rules.clone()) else {
791                debug!(
792                    "indirect rule '{}' evaluated without a rule environment; treating as no-op",
793                    rule.message
794                );
795                continue;
796            };
797
798            // Bounds-check before slicing. An indirect offset past the
799            // end of the buffer is a data-dependent skip, not an error.
800            let Some(sub_buffer) = buffer.get(absolute_offset..) else {
801                debug!(
802                    "Skipping indirect rule '{}': offset {} past buffer end ({} bytes)",
803                    rule.message,
804                    absolute_offset,
805                    buffer.len()
806                );
807                continue;
808            };
809
810            let matches_before = matches.len();
811
812            // Advance the GNU `file` previous-match anchor to the indirect's
813            // resolved offset and emit a `RuleMatch` for the indirect rule
814            // itself BEFORE descending into the root re-entry or children.
815            // This matches the shared successful-match flow used by every
816            // other rule kind: advance anchor first, record the match, then
817            // recurse. Without this, sibling rules of the `indirect` resolve
818            // their relative offsets against the stale anchor and the
819            // directive's own `message` never surfaces in the output.
820            context.set_last_match_end(absolute_offset);
821
822            let indirect_match = RuleMatch {
823                message: rule.message.clone(),
824                offset: absolute_offset,
825                level: rule.level,
826                value: crate::parser::ast::Value::String("indirect".to_string()),
827                type_kind: rule.typ.clone(),
828                confidence: RuleMatch::calculate_confidence(rule.level),
829            };
830            matches.push(indirect_match);
831
832            // Indirect counts as a match for `sibling_matched` regardless of
833            // whether the sub-evaluation produced any matches -- the directive
834            // itself successfully dispatched.
835            sibling_matched = true;
836
837            // Recursion guard + anchor scope: nested indirect / use cycles
838            // surface as `RecursionLimitExceeded` instead of a stack overflow,
839            // and the caller's anchor is restored on every exit path.
840            //
841            // Mark the upcoming `evaluate_rules` call as a top-level
842            // re-entry (consumed at entry) so sibling anchor-reset
843            // semantics do NOT fire -- root rules in the re-entered
844            // database chain their anchors across siblings like any
845            // other top-level evaluation.
846            {
847                let mut guard = RecursionGuard::enter(context)?;
848                let mut anchor_scope = AnchorScope::enter(guard.context(), 0);
849                anchor_scope.context().set_indirect_reentry(true);
850                match evaluate_rules(&root_rules, sub_buffer, anchor_scope.context()) {
851                    Ok(sub_matches) => {
852                        matches.extend(sub_matches);
853                    }
854                    Err(LibmagicError::Timeout { timeout_ms }) => {
855                        return Err(LibmagicError::Timeout { timeout_ms });
856                    }
857                    Err(e) => return Err(e),
858                }
859                // anchor_scope drops here, restoring the saved anchor
860                // (which is now `absolute_offset`, set above before the
861                // scope was entered).
862                // guard drops next, decrementing the recursion depth.
863            }
864
865            // Evaluate the indirect rule's own children under the same
866            // recursion-guard pattern used by every other successful rule.
867            evaluate_children_or_warn(rule, "indirect", buffer, context, &mut matches)?;
868
869            if matches.len() > matches_before && context.should_stop_at_first_match() {
870                break;
871            }
872            continue;
873        }
874
875        // `Offset` reports the resolved file offset as the rule's read
876        // value, matching GNU `file`'s `FILE_OFFSET` semantics: the match
877        // emits a value-bearing `RuleMatch` whose `value` is the absolute
878        // position, which downstream message formatting substitutes into
879        // `%lld` / `%d` specifiers via `output::format::format_magic_message`.
880        //
881        // Per magic(5) the only legal operator is `x` (AnyValue); any
882        // other operator is a magic-file semantic error. Matching the
883        // evaluator's graceful-skip discipline, we `debug!`-log and skip
884        // rather than erroring -- a rogue rule shouldn't poison the rest
885        // of the evaluation.
886        if let TypeKind::Meta(MetaType::Offset) = &rule.typ {
887            if !matches!(rule.op, crate::parser::ast::Operator::AnyValue) {
888                debug!(
889                    "offset rule '{}': non-`x` operator {:?} not supported; skipping",
890                    rule.message, rule.op
891                );
892                continue;
893            }
894
895            // Resolve the offset first so a malformed offset surfaces as
896            // a graceful skip rather than a hard error. Mirrors the
897            // `Indirect` dispatch above.
898            let absolute_offset = match offset::resolve_offset_with_base(
899                &rule.offset,
900                buffer,
901                context.last_match_end(),
902                context.base_offset(),
903            ) {
904                Ok(o) => o,
905                Err(
906                    e @ LibmagicError::EvaluationError(
907                        crate::error::EvaluationError::BufferOverrun { .. }
908                        | crate::error::EvaluationError::InvalidOffset { .. },
909                    ),
910                ) => {
911                    debug!("Skipping offset rule '{}': {}", rule.message, e);
912                    continue;
913                }
914                Err(e) => return Err(e),
915            };
916
917            let matches_before = matches.len();
918
919            // Advance the anchor BEFORE emitting the match so sibling
920            // rules resolve their relative offsets against the offset
921            // directive's resolved position. Same discipline as
922            // `Indirect` and every other value-bearing rule.
923            context.set_last_match_end(absolute_offset);
924
925            let offset_match = RuleMatch {
926                message: rule.message.clone(),
927                offset: absolute_offset,
928                level: rule.level,
929                value: crate::parser::ast::Value::Uint(absolute_offset as u64),
930                type_kind: rule.typ.clone(),
931                confidence: RuleMatch::calculate_confidence(rule.level),
932            };
933            matches.push(offset_match);
934
935            sibling_matched = true;
936
937            // Evaluate children under the recursion-guard pattern used
938            // by every other successful rule.
939            evaluate_children_or_warn(rule, "offset", buffer, context, &mut matches)?;
940
941            if matches.len() > matches_before && context.should_stop_at_first_match() {
942                break;
943            }
944            continue;
945        }
946
947        // `Use` is handled inline so the subroutine's matches can be
948        // spliced into the caller's match vector in document order.
949        // Routing this through `evaluate_single_rule_with_anchor` would
950        // force the helper to return a `Vec<RuleMatch>`, which would
951        // reshape the single-rule return type for every other variant.
952        //
953        // On a successful use path we must also descend into the rule's
954        // own children, matching the flow of every other successful rule
955        // kind. libmagic chains like `>>0 use part2` often carry
956        // continuation rules (siblings and descendants of the `use` site)
957        // that depend on the anchor the subroutine left behind; skipping
958        // them produces user-visible false negatives.
959        if let TypeKind::Meta(MetaType::Use(name)) = &rule.typ {
960            let matches_before = matches.len();
961            let use_resolved = match evaluate_use_rule(rule, name, buffer, context) {
962                Ok((Some(terminal_anchor), subroutine_matches)) => {
963                    matches.extend(subroutine_matches);
964
965                    // A `use` rule does not produce a surface
966                    // `RuleMatch` itself -- the subroutine's rules
967                    // carry the visible messages. Advance the
968                    // caller's anchor to the subroutine's TERMINAL
969                    // anchor (where the subroutine left `last_match_end`),
970                    // not the use-site offset. This makes `use`
971                    // behave like inlining the subroutine: sibling
972                    // rules after the `use` see `&N` resolve against
973                    // the subroutine's final match position.
974                    context.set_last_match_end(terminal_anchor);
975                    true
976                }
977                Ok((None, _)) => {
978                    // No environment, or name not found -- silent no-op.
979                    false
980                }
981                Err(
982                    e @ LibmagicError::EvaluationError(
983                        crate::error::EvaluationError::BufferOverrun { .. }
984                        | crate::error::EvaluationError::InvalidOffset { .. },
985                    ),
986                ) => {
987                    debug!("Skipping use rule '{name}': {e}");
988                    false
989                }
990                Err(e) => return Err(e),
991            };
992
993            // Evaluate the use rule's own children exactly like any other
994            // successful rule. Subroutine matches are already appended
995            // above, so children are spliced in after them to preserve
996            // document order. The recursion guard mirrors the non-`Use`
997            // path so a `use`-site chain cannot blow past the configured
998            // recursion limit.
999            if use_resolved {
1000                evaluate_children_or_warn(rule, "use", buffer, context, &mut matches)?;
1001            }
1002
1003            // A successful `use` site is treated as a sibling match for
1004            // `default`/`clear` dispatch purposes -- subsequent `default`
1005            // siblings should not fire if the subroutine resolved.
1006            if use_resolved {
1007                sibling_matched = true;
1008            }
1009
1010            // Apply stop-at-first-match with the same semantics as every
1011            // other successful rule kind: if this `use` site contributed
1012            // any matches (either from the subroutine or from its own
1013            // children) and the caller configured first-match
1014            // short-circuiting, halt evaluation of further siblings.
1015            if matches.len() > matches_before && context.should_stop_at_first_match() {
1016                break;
1017            }
1018            continue;
1019        }
1020
1021        // Evaluate the current rule with graceful error handling.
1022        // Pass the GNU `file` anchor so OffsetSpec::Relative resolves
1023        // correctly against the previous match's end position.
1024        let match_data = match evaluate_single_rule_with_anchor(
1025            rule,
1026            buffer,
1027            context.last_match_end(),
1028            context.base_offset(),
1029        ) {
1030            Ok(data) => data,
1031            Err(
1032                e @ (LibmagicError::EvaluationError(
1033                    crate::error::EvaluationError::BufferOverrun { .. }
1034                    | crate::error::EvaluationError::InvalidOffset { .. }
1035                    | crate::error::EvaluationError::InvalidValueTransform { .. }
1036                    | crate::error::EvaluationError::TypeReadError(
1037                        crate::evaluator::types::TypeReadError::BufferOverrun { .. }
1038                        | crate::evaluator::types::TypeReadError::InvalidPStringLength { .. },
1039                    ),
1040                )
1041                | LibmagicError::IoError(_)),
1042            ) => {
1043                // Expected data-dependent evaluation errors -- skip gracefully.
1044                // TypeReadError::UnsupportedType is intentionally NOT caught here
1045                // so that evaluator capability gaps propagate as errors.
1046                debug!("Skipping rule '{}': {}", rule.message, e);
1047                continue;
1048            }
1049            Err(e) => {
1050                // Unexpected errors (InternalError, UnsupportedType, etc.) should propagate
1051                return Err(e);
1052            }
1053        };
1054
1055        if let Some((absolute_offset, read_value)) = match_data {
1056            // Advance the GNU `file` previous-match anchor BEFORE recursing
1057            // into children, so children and their descendants see the new
1058            // anchor. The anchor is updated unconditionally to the end of
1059            // this match -- it may move forward or backward depending on
1060            // where successive rules match (it is *not* a high-watermark).
1061            let consumed = types::bytes_consumed_with_pattern(
1062                buffer,
1063                absolute_offset,
1064                &rule.typ,
1065                Some(&rule.value),
1066            );
1067            let new_anchor = absolute_offset.saturating_add(consumed);
1068            context.set_last_match_end(new_anchor);
1069
1070            // Mark this level as "matched" so any subsequent `default`
1071            // sibling at the same level is suppressed, matching libmagic's
1072            // default-after-match semantics.
1073            sibling_matched = true;
1074
1075            let match_result = RuleMatch {
1076                message: rule.message.clone(),
1077                offset: absolute_offset,
1078                level: rule.level,
1079                value: read_value,
1080                type_kind: rule.typ.clone(),
1081                confidence: RuleMatch::calculate_confidence(rule.level),
1082            };
1083            matches.push(match_result);
1084
1085            // If this rule has children, evaluate them recursively
1086            if !rule.children.is_empty() {
1087                // Check recursion depth limit - this is a critical error that should stop evaluation.
1088                // `RecursionGuard` decrements the depth on drop, so every exit path below
1089                // (Ok, graceful warn!, or early-return via `?`) restores the counter.
1090                let mut guard = RecursionGuard::enter(context)?;
1091
1092                // Recursively evaluate child rules with graceful error handling
1093                match evaluate_rules(&rule.children, buffer, guard.context()) {
1094                    Ok(child_matches) => {
1095                        matches.extend(child_matches);
1096                    }
1097                    Err(LibmagicError::Timeout { timeout_ms }) => {
1098                        // Timeout is critical, propagate it up (guard drops here).
1099                        return Err(LibmagicError::Timeout { timeout_ms });
1100                    }
1101                    Err(
1102                        e @ (LibmagicError::EvaluationError(
1103                            crate::error::EvaluationError::BufferOverrun { .. }
1104                            | crate::error::EvaluationError::InvalidOffset { .. }
1105                            | crate::error::EvaluationError::InvalidValueTransform { .. }
1106                            | crate::error::EvaluationError::TypeReadError(
1107                                crate::evaluator::types::TypeReadError::BufferOverrun { .. }
1108                                | crate::evaluator::types::TypeReadError::InvalidPStringLength {
1109                                    ..
1110                                },
1111                            ),
1112                        )
1113                        | LibmagicError::IoError(_)),
1114                    ) => {
1115                        // Defensive: under the current implementation, individual child
1116                        // failures are caught and logged inside the recursive evaluate_rules
1117                        // call (they never propagate here). This arm guards against future
1118                        // changes that might alter that error-handling strategy.
1119                        //
1120                        // If this fires, the parent match is still emitted but the entire
1121                        // child subtree is silently dropped -- which means a partial,
1122                        // possibly-incorrect classification is returned to the caller.
1123                        // Logged at warn! (not debug!) so the asymmetry is visible.
1124                        warn!(
1125                            "Discarding child evaluation under rule '{}' due to unexpected error: {} -- parent match is still emitted; investigate the recursive evaluate_rules error-handling path",
1126                            rule.message, e
1127                        );
1128                    }
1129                    Err(e) => {
1130                        // Unexpected errors in children (including RecursionLimitExceeded)
1131                        // should propagate. The guard drops here, decrementing the depth.
1132                        return Err(e);
1133                    }
1134                }
1135                // `guard` drops here, decrementing the recursion depth.
1136            }
1137
1138            // Stop at first match if configured to do so
1139            if context.should_stop_at_first_match() {
1140                break;
1141            }
1142        }
1143    }
1144
1145    Ok(matches)
1146}
1147
1148/// Evaluate magic rules with a fresh context
1149///
1150/// This is a convenience function that creates a new evaluation context
1151/// and evaluates the rules. Useful for simple evaluation scenarios.
1152///
1153/// # Arguments
1154///
1155/// * `rules` - The list of magic rules to evaluate
1156/// * `buffer` - The file buffer to evaluate against
1157/// * `config` - Configuration for evaluation behavior
1158///
1159/// # Returns
1160///
1161/// Returns `Ok(Vec<RuleMatch>)` containing all matches found, or `Err(LibmagicError)`
1162/// if evaluation fails.
1163///
1164/// # Examples
1165///
1166/// ```rust
1167/// use libmagic_rs::evaluator::{evaluate_rules_with_config, RuleMatch};
1168/// use libmagic_rs::parser::ast::{MagicRule, OffsetSpec, TypeKind, Operator, Value};
1169/// use libmagic_rs::EvaluationConfig;
1170///
1171/// let rule = MagicRule {
1172///     offset: OffsetSpec::Absolute(0),
1173///     typ: TypeKind::Byte { signed: true },
1174///     op: Operator::Equal,
1175///     value: Value::Uint(0x7f),
1176///     message: "ELF magic".to_string(),
1177///     children: vec![],
1178///     level: 0,
1179///     strength_modifier: None,
1180/// value_transform: None,
1181/// };
1182///
1183/// let rules = vec![rule];
1184/// let buffer = &[0x7f, 0x45, 0x4c, 0x46];
1185/// let config = EvaluationConfig::default();
1186///
1187/// let matches = evaluate_rules_with_config(&rules, buffer, &config).unwrap();
1188/// assert_eq!(matches.len(), 1);
1189/// assert_eq!(matches[0].message, "ELF magic");
1190/// ```
1191///
1192/// # Errors
1193///
1194/// * `LibmagicError::EvaluationError` - If rule evaluation fails
1195/// * `LibmagicError::Timeout` - If evaluation exceeds configured timeout
1196pub fn evaluate_rules_with_config(
1197    rules: &[MagicRule],
1198    buffer: &[u8],
1199    config: &EvaluationConfig,
1200) -> Result<Vec<RuleMatch>, LibmagicError> {
1201    // Validate the configuration before constructing a context so that
1202    // out-of-range values (e.g. zero recursion depth, excessive timeouts)
1203    // are rejected at the API boundary rather than triggering subtle
1204    // failures during evaluation.
1205    config.validate()?;
1206    // Diagnostic guard: `evaluate_rules_with_config` builds a context
1207    // without an attached `RuleEnvironment`, which means any
1208    // `MetaType::Indirect` rule reached during evaluation is silently
1209    // no-op'd at runtime. That is the intentional behavior for low-level
1210    // callers (matching the `Use`-without-env contract), but we surface
1211    // the misconfiguration at `warn!` level (once per process) so a
1212    // consumer who wires up env-less `indirect` rules will see the
1213    // diagnostic in default logging rather than only at debug level.
1214    // The tree walk runs only in debug builds -- in release builds the
1215    // `cfg(debug_assertions)` gate prevents the O(n) scan on every
1216    // top-level evaluation. Using `debug_assert!` would panic in test
1217    // builds and break the "evaluator never panics" invariant documented
1218    // in GOTCHAS S2.4 -- a misconfigured caller should get a no-op with
1219    // a log entry, not a crash.
1220    #[cfg(debug_assertions)]
1221    if contains_indirect_rule(rules)
1222        && !INDIRECT_WITHOUT_RULE_ENV_WARNED.swap(true, Ordering::Relaxed)
1223    {
1224        warn!(
1225            "{} (subsequent occurrences suppressed)",
1226            crate::error::EvaluationError::indirect_without_environment()
1227        );
1228    }
1229    // Clear the thread-local regex compile cache so it is bounded to
1230    // the lifetime of a single top-level evaluation call. Cache
1231    // entries from a previous rule set would otherwise persist on the
1232    // current thread until process exit. See
1233    // `evaluator::types::regex::reset_regex_cache` for rationale.
1234    crate::evaluator::types::regex::reset_regex_cache();
1235    let mut context = EvaluationContext::new(config.clone());
1236    evaluate_rules(rules, buffer, &mut context)
1237}
1238
1239/// Recursively walk `rules` (including children) looking for any
1240/// [`MetaType::Indirect`] directive.
1241///
1242/// Used by the diagnostic guard in [`evaluate_rules_with_config`]: the
1243/// low-level `_with_config` entry point builds a context without a
1244/// [`crate::evaluator::RuleEnvironment`], so any `indirect` rule is
1245/// silently no-op'd at runtime. The check logs the misconfiguration at
1246/// `debug!` level so consumer tests can detect it without panicking (see
1247/// GOTCHAS S2.4 for why `debug_assert!` would be wrong here).
1248fn contains_indirect_rule(rules: &[MagicRule]) -> bool {
1249    rules.iter().any(|rule| {
1250        matches!(rule.typ, TypeKind::Meta(MetaType::Indirect))
1251            || contains_indirect_rule(&rule.children)
1252    })
1253}
1254
1255#[cfg(test)]
1256mod tests;