Skip to main content

libmagic_rs/evaluator/
mod.rs

1// Copyright (c) 2025-2026 the libmagic-rs contributors
2// SPDX-License-Identifier: Apache-2.0
3
4//! Rule evaluation engine
5//!
6//! This module provides the public interface for magic rule evaluation,
7//! including data types for evaluation state and match results, and
8//! re-exports the core evaluation functions from submodules.
9
10use crate::{EvaluationConfig, LibmagicError};
11use serde::{Deserialize, Serialize};
12
13mod engine;
14pub mod offset;
15pub mod operators;
16pub mod strength;
17pub mod types;
18
19pub use engine::{evaluate_rules, evaluate_rules_with_config, evaluate_single_rule};
20
21/// Shared environment attached to an [`EvaluationContext`] so the engine can
22/// resolve whole-database operations (currently: `Use` subroutine lookups;
23/// eventually `indirect` whole-tree re-entry).
24///
25/// Stored as an `Arc` so cloning a context across recursive calls is cheap
26/// and the rule data can be shared safely across threads.
27#[derive(Debug, Clone)]
28pub(crate) struct RuleEnvironment {
29    /// Named subroutine table, keyed by identifier.
30    pub(crate) name_table: std::sync::Arc<crate::parser::name_table::NameTable>,
31    /// Top-level rule list retained for future whole-database operations.
32    #[allow(dead_code)]
33    pub(crate) root_rules: std::sync::Arc<[crate::parser::ast::MagicRule]>,
34}
35
36/// Context for maintaining evaluation state during rule processing
37///
38/// The `EvaluationContext` tracks the current state of rule evaluation,
39/// including the current offset position, recursion depth for nested rules,
40/// and configuration settings that control evaluation behavior.
41///
42/// # Examples
43///
44/// ```rust
45/// use libmagic_rs::evaluator::EvaluationContext;
46/// use libmagic_rs::EvaluationConfig;
47///
48/// let config = EvaluationConfig::default();
49/// let context = EvaluationContext::new(config);
50///
51/// assert_eq!(context.current_offset(), 0);
52/// assert_eq!(context.recursion_depth(), 0);
53/// ```
54#[derive(Debug, Clone)]
55pub struct EvaluationContext {
56    /// Current offset position in the file buffer
57    current_offset: usize,
58    /// End offset of the most recent successful match.
59    ///
60    /// This is the GNU `file`/libmagic anchor used to resolve relative
61    /// (`&+N` / `&-N`) offsets. It is updated to the end of the most
62    /// recently matched rule -- the value may *increase or decrease* as
63    /// successive rules match at different positions; it is not a
64    /// high-watermark. A fresh context starts with this set to 0, which
65    /// matches libmagic's behavior of resolving top-level relative offsets
66    /// from the file start.
67    last_match_end: usize,
68    /// Current recursion depth for nested rule evaluation
69    recursion_depth: u32,
70    /// Configuration settings for evaluation behavior
71    config: EvaluationConfig,
72    /// Optional rule environment (name table + root rules) threaded from
73    /// [`MagicDatabase`](crate::MagicDatabase). Evaluations that come in
74    /// through the low-level [`evaluate_rules`] / [`evaluate_rules_with_config`]
75    /// surface (tests, programmatic consumers) run with `rule_env = None`,
76    /// in which case `MetaType::Use` rules are silent no-ops.
77    rule_env: Option<std::sync::Arc<RuleEnvironment>>,
78    /// Base offset applied to absolute offset resolution.
79    ///
80    /// Normally 0. When evaluating a subroutine body via `MetaType::Use`,
81    /// this is set to the use-site offset so that the subroutine's
82    /// `OffsetSpec::Absolute(n)` rules resolve to `base + n` (matching
83    /// magic(5) / libmagic semantics: subroutines see offsets relative
84    /// to the caller's invocation point, not absolute file positions).
85    /// Restored to the caller's value on subroutine exit via the
86    /// `SubroutineScope` RAII guard in `engine/mod.rs`, which saves
87    /// and restores both `last_match_end` and `base_offset` together.
88    base_offset: usize,
89    /// One-shot flag set by `MetaType::Indirect` dispatch before
90    /// re-entering the root rule list. When true, the next entry to
91    /// `evaluate_rules` treats the iteration as a top-level sibling
92    /// chain (anchor chains across siblings per GOTCHAS S3.8) rather
93    /// than as a continuation list (anchor resets between siblings).
94    /// Consumed at entry — children of a matched rule inside the
95    /// re-entry see the flag cleared, so their own continuation-reset
96    /// semantics kick in via the `recursion_depth > 0` gate.
97    ///
98    /// Without this flag, `indirect` wrapping re-entry under
99    /// `RecursionGuard` forces `recursion_depth > 0`, which forces
100    /// continuation-reset semantics on the root rule list — wrong,
101    /// because top-level rules in the re-entered database should
102    /// chain sibling anchors like any other top-level evaluation.
103    indirect_reentry: bool,
104}
105
106impl EvaluationContext {
107    /// Create a new evaluation context with the given configuration
108    ///
109    /// # Arguments
110    ///
111    /// * `config` - Configuration settings for evaluation behavior
112    ///
113    /// # Examples
114    ///
115    /// ```rust
116    /// use libmagic_rs::evaluator::EvaluationContext;
117    /// use libmagic_rs::EvaluationConfig;
118    ///
119    /// let config = EvaluationConfig::default();
120    /// let context = EvaluationContext::new(config);
121    /// ```
122    #[must_use]
123    pub const fn new(config: EvaluationConfig) -> Self {
124        Self {
125            current_offset: 0,
126            last_match_end: 0,
127            recursion_depth: 0,
128            config,
129            rule_env: None,
130            base_offset: 0,
131            indirect_reentry: false,
132        }
133    }
134
135    /// Read-only access to the subroutine base offset. Non-zero only
136    /// during a `MetaType::Use` body evaluation.
137    #[must_use]
138    pub(crate) const fn base_offset(&self) -> usize {
139        self.base_offset
140    }
141
142    /// Set the subroutine base offset.
143    ///
144    /// `pub(crate)` and owned by the engine's `SubroutineScope` RAII
145    /// guard -- no external caller should set this directly.
146    pub(crate) fn set_base_offset(&mut self, offset: usize) {
147        self.base_offset = offset;
148    }
149
150    /// Read-and-clear the indirect-reentry flag. Used by `evaluate_rules`
151    /// at entry to decide whether the iteration is a top-level re-entry
152    /// (no anchor reset between siblings) or a continuation list (reset
153    /// between siblings). Cleared on read so children of a matched rule
154    /// inside the re-entry see the flag as false and fall back to the
155    /// `recursion_depth > 0` gate for their own continuation semantics.
156    pub(crate) fn take_indirect_reentry(&mut self) -> bool {
157        std::mem::take(&mut self.indirect_reentry)
158    }
159
160    /// Set the indirect-reentry flag.
161    ///
162    /// `pub(crate)` and owned by the `MetaType::Indirect` dispatch in
163    /// `engine/mod.rs`. Callers should set this true exactly once
164    /// before invoking `evaluate_rules` on the root rule list.
165    pub(crate) fn set_indirect_reentry(&mut self, flag: bool) {
166        self.indirect_reentry = flag;
167    }
168
169    /// Attach a rule environment to this context.
170    ///
171    /// The environment carries the name-subroutine table and root rule list
172    /// so the engine can resolve `MetaType::Use` rules and (eventually)
173    /// `MetaType::Indirect` re-entries. Intended to be called once by
174    /// [`MagicDatabase`](crate::MagicDatabase) before handing the context
175    /// to [`evaluate_rules`].
176    #[must_use]
177    pub(crate) fn with_rule_env(mut self, env: std::sync::Arc<RuleEnvironment>) -> Self {
178        self.rule_env = Some(env);
179        self
180    }
181
182    /// Read-only access to the attached rule environment, if any.
183    #[must_use]
184    pub(crate) fn rule_env(&self) -> Option<&RuleEnvironment> {
185        self.rule_env.as_deref()
186    }
187
188    /// Get the current offset position
189    ///
190    /// # Returns
191    ///
192    /// The current offset position in the file buffer
193    #[must_use]
194    pub const fn current_offset(&self) -> usize {
195        self.current_offset
196    }
197
198    /// Set the current offset position
199    ///
200    /// # Arguments
201    ///
202    /// * `offset` - The new offset position
203    pub fn set_current_offset(&mut self, offset: usize) {
204        self.current_offset = offset;
205    }
206
207    /// Get the end offset of the most recent successful match.
208    ///
209    /// This is the GNU `file`/libmagic anchor used to resolve relative
210    /// (`&+N` / `&-N`) offset specifications. A fresh context returns 0,
211    /// which makes top-level relative offsets resolve from the file start.
212    ///
213    /// `pub(crate)` because the anchor is an internal engine detail; external
214    /// consumers should not couple to it.
215    #[must_use]
216    pub(crate) const fn last_match_end(&self) -> usize {
217        self.last_match_end
218    }
219
220    /// Set the end offset of the most recent successful match.
221    ///
222    /// Called by the evaluation engine after a rule matches, to advance the
223    /// anchor used by subsequent relative offset resolution. The new value
224    /// is typically `match_offset + bytes_consumed_by_type`.
225    ///
226    /// `pub(crate)` because external callers should not be able to inject
227    /// arbitrary anchor state. External callers that need to clear the
228    /// anchor between buffer evaluations should call
229    /// `EvaluationContext::reset()`, which resets the anchor, current
230    /// offset, and recursion depth together.
231    pub(crate) fn set_last_match_end(&mut self, offset: usize) {
232        self.last_match_end = offset;
233    }
234
235    /// Get the current recursion depth
236    ///
237    /// # Returns
238    ///
239    /// The current recursion depth for nested rule evaluation
240    #[must_use]
241    pub const fn recursion_depth(&self) -> u32 {
242        self.recursion_depth
243    }
244
245    /// Increment the recursion depth
246    ///
247    /// # Returns
248    ///
249    /// `Ok(())` if the recursion depth is within limits, or `Err(LibmagicError)`
250    /// if the maximum recursion depth would be exceeded
251    ///
252    /// # Errors
253    ///
254    /// Returns `LibmagicError::EvaluationError` if incrementing would exceed
255    /// the maximum recursion depth configured in the evaluation config.
256    pub(crate) fn increment_recursion_depth(&mut self) -> Result<(), LibmagicError> {
257        if self.recursion_depth >= self.config.max_recursion_depth {
258            return Err(LibmagicError::EvaluationError(
259                crate::error::EvaluationError::recursion_limit_exceeded(self.recursion_depth),
260            ));
261        }
262        self.recursion_depth += 1;
263        Ok(())
264    }
265
266    /// Decrement the recursion depth
267    ///
268    /// # Errors
269    ///
270    /// Returns an error if the recursion depth is already 0, as this indicates
271    /// a programming error in the evaluation logic (mismatched increment/decrement calls).
272    pub(crate) fn decrement_recursion_depth(&mut self) -> Result<(), LibmagicError> {
273        if self.recursion_depth == 0 {
274            return Err(LibmagicError::EvaluationError(
275                crate::error::EvaluationError::internal_error(
276                    "Attempted to decrement recursion depth below 0",
277                ),
278            ));
279        }
280        self.recursion_depth -= 1;
281        Ok(())
282    }
283
284    /// Get a reference to the evaluation configuration
285    ///
286    /// # Returns
287    ///
288    /// A reference to the `EvaluationConfig` used by this context
289    #[must_use]
290    pub const fn config(&self) -> &EvaluationConfig {
291        &self.config
292    }
293
294    /// Check if evaluation should stop at the first match
295    ///
296    /// # Returns
297    ///
298    /// `true` if evaluation should stop at the first match, `false` otherwise
299    #[must_use]
300    pub const fn should_stop_at_first_match(&self) -> bool {
301        self.config.stop_at_first_match
302    }
303
304    /// Get the maximum string length allowed
305    ///
306    /// # Returns
307    ///
308    /// The maximum string length that should be read during evaluation
309    #[must_use]
310    pub const fn max_string_length(&self) -> usize {
311        self.config.max_string_length
312    }
313
314    /// Check if MIME type mapping is enabled
315    ///
316    /// # Returns
317    ///
318    /// `true` if MIME type mapping should be performed, `false` otherwise
319    #[must_use]
320    pub const fn enable_mime_types(&self) -> bool {
321        self.config.enable_mime_types
322    }
323
324    /// Get the evaluation timeout in milliseconds
325    ///
326    /// # Returns
327    ///
328    /// The timeout duration in milliseconds, or `None` if no timeout is set
329    #[must_use]
330    pub const fn timeout_ms(&self) -> Option<u64> {
331        self.config.timeout_ms
332    }
333
334    /// Reset the context to initial state while preserving configuration
335    ///
336    /// This resets the current offset and recursion depth to 0, but keeps
337    /// the same configuration settings.
338    pub fn reset(&mut self) {
339        self.current_offset = 0;
340        self.last_match_end = 0;
341        self.recursion_depth = 0;
342        self.base_offset = 0;
343        self.indirect_reentry = false;
344    }
345}
346
347/// RAII guard that increments recursion depth on entry and decrements on drop.
348///
349/// Replaces the manual `increment_recursion_depth` / `decrement_recursion_depth`
350/// pair with a scope-based guard, eliminating the risk of mismatched calls and
351/// the need to swallow cleanup errors on error-return paths.
352///
353/// Obtain a guard via [`RecursionGuard::enter`], which borrows the context
354/// mutably for the guard's lifetime. Use [`RecursionGuard::context`] to access
355/// the borrowed context for the duration of the recursive call. The guard
356/// automatically decrements the recursion depth when it goes out of scope.
357///
358/// The guard is `pub(crate)` because recursion-depth management is an internal
359/// detail of the evaluation engine.
360pub(crate) struct RecursionGuard<'a> {
361    context: &'a mut EvaluationContext,
362}
363
364impl<'a> RecursionGuard<'a> {
365    /// Enter a new recursion level, incrementing the context's recursion depth.
366    ///
367    /// # Errors
368    ///
369    /// Returns `LibmagicError::EvaluationError` if incrementing would exceed
370    /// the maximum recursion depth configured in the evaluation config.
371    pub(crate) fn enter(context: &'a mut EvaluationContext) -> Result<Self, LibmagicError> {
372        context.increment_recursion_depth()?;
373        Ok(Self { context })
374    }
375
376    /// Access the underlying context for the duration of the guard.
377    pub(crate) fn context(&mut self) -> &mut EvaluationContext {
378        self.context
379    }
380}
381
382impl Drop for RecursionGuard<'_> {
383    fn drop(&mut self) {
384        // Safe to ignore: `decrement_recursion_depth` only fails when the
385        // depth is already 0, which is impossible here because `enter` just
386        // incremented it and the depth is only mutated through guard pairs.
387        let result = self.context.decrement_recursion_depth();
388        debug_assert!(
389            result.is_ok(),
390            "RecursionGuard invariant violated: decrement failed after successful enter()"
391        );
392    }
393}
394
395/// Result of evaluating a magic rule
396///
397/// Contains information extracted from a successful rule match, including
398/// the matched value, position, and confidence score.
399#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
400pub struct RuleMatch {
401    /// The message associated with the matching rule
402    pub message: String,
403    /// The offset where the match occurred
404    pub offset: usize,
405    /// The rule level (depth in hierarchy)
406    pub level: u32,
407    /// The matched value
408    pub value: crate::parser::ast::Value,
409    /// The type used to read the matched value
410    ///
411    /// Carries the source `TypeKind` so downstream consumers (e.g., output
412    /// formatting) can determine the on-disk width of the matched value.
413    pub type_kind: crate::parser::ast::TypeKind,
414    /// Confidence score (0.0 to 1.0)
415    ///
416    /// Calculated based on match depth in the rule hierarchy.
417    /// Deeper matches indicate more specific file type identification
418    /// and thus higher confidence.
419    pub confidence: f64,
420}
421
422impl RuleMatch {
423    /// Calculate confidence score based on rule depth
424    ///
425    /// Formula: min(1.0, 0.3 + (level * 0.2))
426    /// - Level 0 (root): 0.3
427    /// - Level 1: 0.5
428    /// - Level 2: 0.7
429    /// - Level 3: 0.9
430    /// - Level 4+: 1.0 (capped)
431    ///
432    /// # Examples
433    ///
434    /// ```
435    /// use libmagic_rs::evaluator::RuleMatch;
436    ///
437    /// assert!((RuleMatch::calculate_confidence(0) - 0.3).abs() < 0.001);
438    /// assert!((RuleMatch::calculate_confidence(3) - 0.9).abs() < 0.001);
439    /// assert!((RuleMatch::calculate_confidence(10) - 1.0).abs() < 0.001);
440    /// ```
441    #[must_use]
442    pub fn calculate_confidence(level: u32) -> f64 {
443        (0.3 + (f64::from(level) * 0.2)).min(1.0)
444    }
445}
446
447#[cfg(test)]
448mod tests;