libmagic_rs/evaluator/mod.rs
1// Copyright (c) 2025-2026 the libmagic-rs contributors
2// SPDX-License-Identifier: Apache-2.0
3
4//! Rule evaluation engine
5//!
6//! This module provides the public interface for magic rule evaluation,
7//! including data types for evaluation state and match results, and
8//! re-exports the core evaluation functions from submodules.
9
10use crate::{EvaluationConfig, LibmagicError};
11use serde::{Deserialize, Serialize};
12
13mod engine;
14pub mod offset;
15pub mod operators;
16pub mod strength;
17pub mod types;
18
19pub use engine::{evaluate_rules, evaluate_rules_with_config, evaluate_single_rule};
20
21/// Shared environment attached to an [`EvaluationContext`] so the engine can
22/// resolve whole-database operations (currently: `Use` subroutine lookups;
23/// eventually `indirect` whole-tree re-entry).
24///
25/// Stored as an `Arc` so cloning a context across recursive calls is cheap
26/// and the rule data can be shared safely across threads.
27#[derive(Debug, Clone)]
28pub(crate) struct RuleEnvironment {
29 /// Named subroutine table, keyed by identifier.
30 pub(crate) name_table: std::sync::Arc<crate::parser::name_table::NameTable>,
31 /// Top-level rule list retained for future whole-database operations.
32 #[allow(dead_code)]
33 pub(crate) root_rules: std::sync::Arc<[crate::parser::ast::MagicRule]>,
34}
35
36/// Context for maintaining evaluation state during rule processing
37///
38/// The `EvaluationContext` tracks the current state of rule evaluation,
39/// including the current offset position, recursion depth for nested rules,
40/// and configuration settings that control evaluation behavior.
41///
42/// # Examples
43///
44/// ```rust
45/// use libmagic_rs::evaluator::EvaluationContext;
46/// use libmagic_rs::EvaluationConfig;
47///
48/// let config = EvaluationConfig::default();
49/// let context = EvaluationContext::new(config);
50///
51/// assert_eq!(context.current_offset(), 0);
52/// assert_eq!(context.recursion_depth(), 0);
53/// ```
54#[derive(Debug, Clone)]
55pub struct EvaluationContext {
56 /// Current offset position in the file buffer
57 current_offset: usize,
58 /// End offset of the most recent successful match.
59 ///
60 /// This is the GNU `file`/libmagic anchor used to resolve relative
61 /// (`&+N` / `&-N`) offsets. It is updated to the end of the most
62 /// recently matched rule -- the value may *increase or decrease* as
63 /// successive rules match at different positions; it is not a
64 /// high-watermark. A fresh context starts with this set to 0, which
65 /// matches libmagic's behavior of resolving top-level relative offsets
66 /// from the file start.
67 last_match_end: usize,
68 /// Current recursion depth for nested rule evaluation
69 recursion_depth: u32,
70 /// Configuration settings for evaluation behavior
71 config: EvaluationConfig,
72 /// Optional rule environment (name table + root rules) threaded from
73 /// [`MagicDatabase`](crate::MagicDatabase). Evaluations that come in
74 /// through the low-level [`evaluate_rules`] / [`evaluate_rules_with_config`]
75 /// surface (tests, programmatic consumers) run with `rule_env = None`,
76 /// in which case `MetaType::Use` rules are silent no-ops.
77 rule_env: Option<std::sync::Arc<RuleEnvironment>>,
78 /// Base offset applied to absolute offset resolution.
79 ///
80 /// Normally 0. When evaluating a subroutine body via `MetaType::Use`,
81 /// this is set to the use-site offset so that the subroutine's
82 /// `OffsetSpec::Absolute(n)` rules resolve to `base + n` (matching
83 /// magic(5) / libmagic semantics: subroutines see offsets relative
84 /// to the caller's invocation point, not absolute file positions).
85 /// Restored to the caller's value on subroutine exit via the
86 /// `SubroutineScope` RAII guard in `engine/mod.rs`, which saves
87 /// and restores both `last_match_end` and `base_offset` together.
88 base_offset: usize,
89 /// One-shot flag set by `MetaType::Indirect` dispatch before
90 /// re-entering the root rule list. When true, the next entry to
91 /// `evaluate_rules` treats the iteration as a top-level sibling
92 /// chain (anchor chains across siblings per GOTCHAS S3.8) rather
93 /// than as a continuation list (anchor resets between siblings).
94 /// Consumed at entry — children of a matched rule inside the
95 /// re-entry see the flag cleared, so their own continuation-reset
96 /// semantics kick in via the `recursion_depth > 0` gate.
97 ///
98 /// Without this flag, `indirect` wrapping re-entry under
99 /// `RecursionGuard` forces `recursion_depth > 0`, which forces
100 /// continuation-reset semantics on the root rule list — wrong,
101 /// because top-level rules in the re-entered database should
102 /// chain sibling anchors like any other top-level evaluation.
103 indirect_reentry: bool,
104}
105
106impl EvaluationContext {
107 /// Create a new evaluation context with the given configuration
108 ///
109 /// # Arguments
110 ///
111 /// * `config` - Configuration settings for evaluation behavior
112 ///
113 /// # Examples
114 ///
115 /// ```rust
116 /// use libmagic_rs::evaluator::EvaluationContext;
117 /// use libmagic_rs::EvaluationConfig;
118 ///
119 /// let config = EvaluationConfig::default();
120 /// let context = EvaluationContext::new(config);
121 /// ```
122 #[must_use]
123 pub const fn new(config: EvaluationConfig) -> Self {
124 Self {
125 current_offset: 0,
126 last_match_end: 0,
127 recursion_depth: 0,
128 config,
129 rule_env: None,
130 base_offset: 0,
131 indirect_reentry: false,
132 }
133 }
134
135 /// Read-only access to the subroutine base offset. Non-zero only
136 /// during a `MetaType::Use` body evaluation.
137 #[must_use]
138 pub(crate) const fn base_offset(&self) -> usize {
139 self.base_offset
140 }
141
142 /// Set the subroutine base offset.
143 ///
144 /// `pub(crate)` and owned by the engine's `SubroutineScope` RAII
145 /// guard -- no external caller should set this directly.
146 pub(crate) fn set_base_offset(&mut self, offset: usize) {
147 self.base_offset = offset;
148 }
149
150 /// Read-and-clear the indirect-reentry flag. Used by `evaluate_rules`
151 /// at entry to decide whether the iteration is a top-level re-entry
152 /// (no anchor reset between siblings) or a continuation list (reset
153 /// between siblings). Cleared on read so children of a matched rule
154 /// inside the re-entry see the flag as false and fall back to the
155 /// `recursion_depth > 0` gate for their own continuation semantics.
156 pub(crate) fn take_indirect_reentry(&mut self) -> bool {
157 std::mem::take(&mut self.indirect_reentry)
158 }
159
160 /// Set the indirect-reentry flag.
161 ///
162 /// `pub(crate)` and owned by the `MetaType::Indirect` dispatch in
163 /// `engine/mod.rs`. Callers should set this true exactly once
164 /// before invoking `evaluate_rules` on the root rule list.
165 pub(crate) fn set_indirect_reentry(&mut self, flag: bool) {
166 self.indirect_reentry = flag;
167 }
168
169 /// Attach a rule environment to this context.
170 ///
171 /// The environment carries the name-subroutine table and root rule list
172 /// so the engine can resolve `MetaType::Use` rules and (eventually)
173 /// `MetaType::Indirect` re-entries. Intended to be called once by
174 /// [`MagicDatabase`](crate::MagicDatabase) before handing the context
175 /// to [`evaluate_rules`].
176 #[must_use]
177 pub(crate) fn with_rule_env(mut self, env: std::sync::Arc<RuleEnvironment>) -> Self {
178 self.rule_env = Some(env);
179 self
180 }
181
182 /// Read-only access to the attached rule environment, if any.
183 #[must_use]
184 pub(crate) fn rule_env(&self) -> Option<&RuleEnvironment> {
185 self.rule_env.as_deref()
186 }
187
188 /// Get the current offset position
189 ///
190 /// # Returns
191 ///
192 /// The current offset position in the file buffer
193 #[must_use]
194 pub const fn current_offset(&self) -> usize {
195 self.current_offset
196 }
197
198 /// Set the current offset position
199 ///
200 /// # Arguments
201 ///
202 /// * `offset` - The new offset position
203 pub fn set_current_offset(&mut self, offset: usize) {
204 self.current_offset = offset;
205 }
206
207 /// Get the end offset of the most recent successful match.
208 ///
209 /// This is the GNU `file`/libmagic anchor used to resolve relative
210 /// (`&+N` / `&-N`) offset specifications. A fresh context returns 0,
211 /// which makes top-level relative offsets resolve from the file start.
212 ///
213 /// `pub(crate)` because the anchor is an internal engine detail; external
214 /// consumers should not couple to it.
215 #[must_use]
216 pub(crate) const fn last_match_end(&self) -> usize {
217 self.last_match_end
218 }
219
220 /// Set the end offset of the most recent successful match.
221 ///
222 /// Called by the evaluation engine after a rule matches, to advance the
223 /// anchor used by subsequent relative offset resolution. The new value
224 /// is typically `match_offset + bytes_consumed_by_type`.
225 ///
226 /// `pub(crate)` because external callers should not be able to inject
227 /// arbitrary anchor state. External callers that need to clear the
228 /// anchor between buffer evaluations should call
229 /// `EvaluationContext::reset()`, which resets the anchor, current
230 /// offset, and recursion depth together.
231 pub(crate) fn set_last_match_end(&mut self, offset: usize) {
232 self.last_match_end = offset;
233 }
234
235 /// Get the current recursion depth
236 ///
237 /// # Returns
238 ///
239 /// The current recursion depth for nested rule evaluation
240 #[must_use]
241 pub const fn recursion_depth(&self) -> u32 {
242 self.recursion_depth
243 }
244
245 /// Increment the recursion depth
246 ///
247 /// # Returns
248 ///
249 /// `Ok(())` if the recursion depth is within limits, or `Err(LibmagicError)`
250 /// if the maximum recursion depth would be exceeded
251 ///
252 /// # Errors
253 ///
254 /// Returns `LibmagicError::EvaluationError` if incrementing would exceed
255 /// the maximum recursion depth configured in the evaluation config.
256 pub(crate) fn increment_recursion_depth(&mut self) -> Result<(), LibmagicError> {
257 if self.recursion_depth >= self.config.max_recursion_depth {
258 return Err(LibmagicError::EvaluationError(
259 crate::error::EvaluationError::recursion_limit_exceeded(self.recursion_depth),
260 ));
261 }
262 self.recursion_depth += 1;
263 Ok(())
264 }
265
266 /// Decrement the recursion depth
267 ///
268 /// # Errors
269 ///
270 /// Returns an error if the recursion depth is already 0, as this indicates
271 /// a programming error in the evaluation logic (mismatched increment/decrement calls).
272 pub(crate) fn decrement_recursion_depth(&mut self) -> Result<(), LibmagicError> {
273 if self.recursion_depth == 0 {
274 return Err(LibmagicError::EvaluationError(
275 crate::error::EvaluationError::internal_error(
276 "Attempted to decrement recursion depth below 0",
277 ),
278 ));
279 }
280 self.recursion_depth -= 1;
281 Ok(())
282 }
283
284 /// Get a reference to the evaluation configuration
285 ///
286 /// # Returns
287 ///
288 /// A reference to the `EvaluationConfig` used by this context
289 #[must_use]
290 pub const fn config(&self) -> &EvaluationConfig {
291 &self.config
292 }
293
294 /// Check if evaluation should stop at the first match
295 ///
296 /// # Returns
297 ///
298 /// `true` if evaluation should stop at the first match, `false` otherwise
299 #[must_use]
300 pub const fn should_stop_at_first_match(&self) -> bool {
301 self.config.stop_at_first_match
302 }
303
304 /// Get the maximum string length allowed
305 ///
306 /// # Returns
307 ///
308 /// The maximum string length that should be read during evaluation
309 #[must_use]
310 pub const fn max_string_length(&self) -> usize {
311 self.config.max_string_length
312 }
313
314 /// Check if MIME type mapping is enabled
315 ///
316 /// # Returns
317 ///
318 /// `true` if MIME type mapping should be performed, `false` otherwise
319 #[must_use]
320 pub const fn enable_mime_types(&self) -> bool {
321 self.config.enable_mime_types
322 }
323
324 /// Get the evaluation timeout in milliseconds
325 ///
326 /// # Returns
327 ///
328 /// The timeout duration in milliseconds, or `None` if no timeout is set
329 #[must_use]
330 pub const fn timeout_ms(&self) -> Option<u64> {
331 self.config.timeout_ms
332 }
333
334 /// Reset the context to initial state while preserving configuration
335 ///
336 /// This resets the current offset and recursion depth to 0, but keeps
337 /// the same configuration settings.
338 pub fn reset(&mut self) {
339 self.current_offset = 0;
340 self.last_match_end = 0;
341 self.recursion_depth = 0;
342 self.base_offset = 0;
343 self.indirect_reentry = false;
344 }
345}
346
347/// RAII guard that increments recursion depth on entry and decrements on drop.
348///
349/// Replaces the manual `increment_recursion_depth` / `decrement_recursion_depth`
350/// pair with a scope-based guard, eliminating the risk of mismatched calls and
351/// the need to swallow cleanup errors on error-return paths.
352///
353/// Obtain a guard via [`RecursionGuard::enter`], which borrows the context
354/// mutably for the guard's lifetime. Use [`RecursionGuard::context`] to access
355/// the borrowed context for the duration of the recursive call. The guard
356/// automatically decrements the recursion depth when it goes out of scope.
357///
358/// The guard is `pub(crate)` because recursion-depth management is an internal
359/// detail of the evaluation engine.
360pub(crate) struct RecursionGuard<'a> {
361 context: &'a mut EvaluationContext,
362}
363
364impl<'a> RecursionGuard<'a> {
365 /// Enter a new recursion level, incrementing the context's recursion depth.
366 ///
367 /// # Errors
368 ///
369 /// Returns `LibmagicError::EvaluationError` if incrementing would exceed
370 /// the maximum recursion depth configured in the evaluation config.
371 pub(crate) fn enter(context: &'a mut EvaluationContext) -> Result<Self, LibmagicError> {
372 context.increment_recursion_depth()?;
373 Ok(Self { context })
374 }
375
376 /// Access the underlying context for the duration of the guard.
377 pub(crate) fn context(&mut self) -> &mut EvaluationContext {
378 self.context
379 }
380}
381
382impl Drop for RecursionGuard<'_> {
383 fn drop(&mut self) {
384 // Safe to ignore: `decrement_recursion_depth` only fails when the
385 // depth is already 0, which is impossible here because `enter` just
386 // incremented it and the depth is only mutated through guard pairs.
387 let result = self.context.decrement_recursion_depth();
388 debug_assert!(
389 result.is_ok(),
390 "RecursionGuard invariant violated: decrement failed after successful enter()"
391 );
392 }
393}
394
395/// Result of evaluating a magic rule
396///
397/// Contains information extracted from a successful rule match, including
398/// the matched value, position, and confidence score.
399#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
400pub struct RuleMatch {
401 /// The message associated with the matching rule
402 pub message: String,
403 /// The offset where the match occurred
404 pub offset: usize,
405 /// The rule level (depth in hierarchy)
406 pub level: u32,
407 /// The matched value
408 pub value: crate::parser::ast::Value,
409 /// The type used to read the matched value
410 ///
411 /// Carries the source `TypeKind` so downstream consumers (e.g., output
412 /// formatting) can determine the on-disk width of the matched value.
413 pub type_kind: crate::parser::ast::TypeKind,
414 /// Confidence score (0.0 to 1.0)
415 ///
416 /// Calculated based on match depth in the rule hierarchy.
417 /// Deeper matches indicate more specific file type identification
418 /// and thus higher confidence.
419 pub confidence: f64,
420}
421
422impl RuleMatch {
423 /// Calculate confidence score based on rule depth
424 ///
425 /// Formula: min(1.0, 0.3 + (level * 0.2))
426 /// - Level 0 (root): 0.3
427 /// - Level 1: 0.5
428 /// - Level 2: 0.7
429 /// - Level 3: 0.9
430 /// - Level 4+: 1.0 (capped)
431 ///
432 /// # Examples
433 ///
434 /// ```
435 /// use libmagic_rs::evaluator::RuleMatch;
436 ///
437 /// assert!((RuleMatch::calculate_confidence(0) - 0.3).abs() < 0.001);
438 /// assert!((RuleMatch::calculate_confidence(3) - 0.9).abs() < 0.001);
439 /// assert!((RuleMatch::calculate_confidence(10) - 1.0).abs() < 0.001);
440 /// ```
441 #[must_use]
442 pub fn calculate_confidence(level: u32) -> f64 {
443 (0.3 + (f64::from(level) * 0.2)).min(1.0)
444 }
445}
446
447#[cfg(test)]
448mod tests;