libmagic_rs/evaluator/mod.rs
1// Copyright (c) 2025-2026 the libmagic-rs contributors
2// SPDX-License-Identifier: Apache-2.0
3
4//! Rule evaluation engine
5//!
6//! This module contains the core evaluation logic for executing magic rules
7//! against file buffers to identify file types.
8
9use crate::parser::ast::MagicRule;
10use crate::{EvaluationConfig, LibmagicError};
11use serde::{Deserialize, Serialize};
12
13pub mod offset;
14pub mod operators;
15pub mod strength;
16pub mod types;
17
18/// Context for maintaining evaluation state during rule processing
19///
20/// The `EvaluationContext` tracks the current state of rule evaluation,
21/// including the current offset position, recursion depth for nested rules,
22/// and configuration settings that control evaluation behavior.
23///
24/// # Examples
25///
26/// ```rust
27/// use libmagic_rs::evaluator::EvaluationContext;
28/// use libmagic_rs::EvaluationConfig;
29///
30/// let config = EvaluationConfig::default();
31/// let context = EvaluationContext::new(config);
32///
33/// assert_eq!(context.current_offset(), 0);
34/// assert_eq!(context.recursion_depth(), 0);
35/// ```
36#[derive(Debug, Clone)]
37pub struct EvaluationContext {
38 /// Current offset position in the file buffer
39 current_offset: usize,
40 /// Current recursion depth for nested rule evaluation
41 recursion_depth: u32,
42 /// Configuration settings for evaluation behavior
43 config: EvaluationConfig,
44}
45
46impl EvaluationContext {
47 /// Create a new evaluation context with the given configuration
48 ///
49 /// # Arguments
50 ///
51 /// * `config` - Configuration settings for evaluation behavior
52 ///
53 /// # Examples
54 ///
55 /// ```rust
56 /// use libmagic_rs::evaluator::EvaluationContext;
57 /// use libmagic_rs::EvaluationConfig;
58 ///
59 /// let config = EvaluationConfig::default();
60 /// let context = EvaluationContext::new(config);
61 /// ```
62 #[must_use]
63 pub const fn new(config: EvaluationConfig) -> Self {
64 Self {
65 current_offset: 0,
66 recursion_depth: 0,
67 config,
68 }
69 }
70
71 /// Get the current offset position
72 ///
73 /// # Returns
74 ///
75 /// The current offset position in the file buffer
76 #[must_use]
77 pub const fn current_offset(&self) -> usize {
78 self.current_offset
79 }
80
81 /// Set the current offset position
82 ///
83 /// # Arguments
84 ///
85 /// * `offset` - The new offset position
86 pub fn set_current_offset(&mut self, offset: usize) {
87 self.current_offset = offset;
88 }
89
90 /// Get the current recursion depth
91 ///
92 /// # Returns
93 ///
94 /// The current recursion depth for nested rule evaluation
95 #[must_use]
96 pub const fn recursion_depth(&self) -> u32 {
97 self.recursion_depth
98 }
99
100 /// Increment the recursion depth
101 ///
102 /// # Returns
103 ///
104 /// `Ok(())` if the recursion depth is within limits, or `Err(LibmagicError)`
105 /// if the maximum recursion depth would be exceeded
106 ///
107 /// # Errors
108 ///
109 /// Returns `LibmagicError::EvaluationError` if incrementing would exceed
110 /// the maximum recursion depth configured in the evaluation config.
111 pub fn increment_recursion_depth(&mut self) -> Result<(), LibmagicError> {
112 if self.recursion_depth >= self.config.max_recursion_depth {
113 return Err(LibmagicError::EvaluationError(
114 crate::error::EvaluationError::recursion_limit_exceeded(self.recursion_depth),
115 ));
116 }
117 self.recursion_depth += 1;
118 Ok(())
119 }
120
121 /// Decrement the recursion depth
122 ///
123 /// # Errors
124 ///
125 /// Returns an error if the recursion depth is already 0, as this indicates
126 /// a programming error in the evaluation logic (mismatched increment/decrement calls).
127 pub fn decrement_recursion_depth(&mut self) -> Result<(), LibmagicError> {
128 if self.recursion_depth == 0 {
129 return Err(LibmagicError::EvaluationError(
130 crate::error::EvaluationError::internal_error(
131 "Attempted to decrement recursion depth below 0",
132 ),
133 ));
134 }
135 self.recursion_depth -= 1;
136 Ok(())
137 }
138
139 /// Get a reference to the evaluation configuration
140 ///
141 /// # Returns
142 ///
143 /// A reference to the `EvaluationConfig` used by this context
144 #[must_use]
145 pub const fn config(&self) -> &EvaluationConfig {
146 &self.config
147 }
148
149 /// Check if evaluation should stop at the first match
150 ///
151 /// # Returns
152 ///
153 /// `true` if evaluation should stop at the first match, `false` otherwise
154 #[must_use]
155 pub const fn should_stop_at_first_match(&self) -> bool {
156 self.config.stop_at_first_match
157 }
158
159 /// Get the maximum string length allowed
160 ///
161 /// # Returns
162 ///
163 /// The maximum string length that should be read during evaluation
164 #[must_use]
165 pub const fn max_string_length(&self) -> usize {
166 self.config.max_string_length
167 }
168
169 /// Check if MIME type mapping is enabled
170 ///
171 /// # Returns
172 ///
173 /// `true` if MIME type mapping should be performed, `false` otherwise
174 #[must_use]
175 pub const fn enable_mime_types(&self) -> bool {
176 self.config.enable_mime_types
177 }
178
179 /// Get the evaluation timeout in milliseconds
180 ///
181 /// # Returns
182 ///
183 /// The timeout duration in milliseconds, or `None` if no timeout is set
184 #[must_use]
185 pub const fn timeout_ms(&self) -> Option<u64> {
186 self.config.timeout_ms
187 }
188
189 /// Reset the context to initial state while preserving configuration
190 ///
191 /// This resets the current offset and recursion depth to 0, but keeps
192 /// the same configuration settings.
193 pub fn reset(&mut self) {
194 self.current_offset = 0;
195 self.recursion_depth = 0;
196 }
197}
198
199/// Result of evaluating a magic rule
200///
201/// Contains information about a successful rule match, including the rule
202/// that matched and its associated message.
203#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
204pub struct RuleMatch {
205 /// The message associated with the matching rule
206 pub message: String,
207 /// The offset where the match occurred
208 pub offset: usize,
209 /// The rule level (depth in hierarchy)
210 pub level: u32,
211 /// The matched value
212 pub value: crate::parser::ast::Value,
213 /// Confidence score (0.0 to 1.0)
214 ///
215 /// Calculated based on match depth in the rule hierarchy.
216 /// Deeper matches indicate more specific file type identification
217 /// and thus higher confidence.
218 pub confidence: f64,
219}
220
221impl RuleMatch {
222 /// Calculate confidence score based on rule depth
223 ///
224 /// Formula: min(1.0, 0.3 + (level * 0.2))
225 /// - Level 0 (root): 0.3
226 /// - Level 1: 0.5
227 /// - Level 2: 0.7
228 /// - Level 3: 0.9
229 /// - Level 4+: 1.0 (capped)
230 ///
231 /// # Examples
232 ///
233 /// ```
234 /// use libmagic_rs::evaluator::RuleMatch;
235 ///
236 /// assert!((RuleMatch::calculate_confidence(0) - 0.3).abs() < 0.001);
237 /// assert!((RuleMatch::calculate_confidence(3) - 0.9).abs() < 0.001);
238 /// assert!((RuleMatch::calculate_confidence(10) - 1.0).abs() < 0.001);
239 /// ```
240 #[must_use]
241 pub fn calculate_confidence(level: u32) -> f64 {
242 (0.3 + (f64::from(level) * 0.2)).min(1.0)
243 }
244}
245
246/// Evaluate a single magic rule against a file buffer
247///
248/// This function performs the core rule evaluation by:
249/// 1. Resolving the rule's offset specification to an absolute position
250/// 2. Reading and interpreting bytes at that position according to the rule's type
251/// 3. Applying the rule's operator to compare the read value with the expected value
252///
253/// # Arguments
254///
255/// * `rule` - The magic rule to evaluate
256/// * `buffer` - The file buffer to evaluate against
257///
258/// # Returns
259///
260/// Returns `Ok(Some((offset, value)))` if the rule matches (with the resolved offset and
261/// read value), `Ok(None)` if it doesn't match, or `Err(LibmagicError)` if evaluation
262/// fails due to buffer access issues or other errors.
263///
264/// # Examples
265///
266/// ```rust
267/// use libmagic_rs::evaluator::evaluate_single_rule;
268/// use libmagic_rs::parser::ast::{MagicRule, OffsetSpec, TypeKind, Operator, Value};
269///
270/// // Create a rule to check for ELF magic bytes at offset 0
271/// let rule = MagicRule {
272/// offset: OffsetSpec::Absolute(0),
273/// typ: TypeKind::Byte { signed: true },
274/// op: Operator::Equal,
275/// value: Value::Uint(0x7f),
276/// message: "ELF magic".to_string(),
277/// children: vec![],
278/// level: 0,
279/// strength_modifier: None,
280/// };
281///
282/// let elf_buffer = &[0x7f, 0x45, 0x4c, 0x46]; // ELF magic bytes
283/// let result = evaluate_single_rule(&rule, elf_buffer).unwrap();
284/// assert!(result.is_some()); // Should match
285///
286/// let non_elf_buffer = &[0x50, 0x4b, 0x03, 0x04]; // ZIP magic bytes
287/// let result = evaluate_single_rule(&rule, non_elf_buffer).unwrap();
288/// assert!(result.is_none()); // Should not match
289/// ```
290///
291/// # Errors
292///
293/// * `LibmagicError::EvaluationError` - If offset resolution fails, buffer access is out of bounds,
294/// or type interpretation fails
295pub fn evaluate_single_rule(
296 rule: &MagicRule,
297 buffer: &[u8],
298) -> Result<Option<(usize, crate::parser::ast::Value)>, LibmagicError> {
299 // Step 1: Resolve the offset specification to an absolute position
300 let absolute_offset = offset::resolve_offset(&rule.offset, buffer)?;
301
302 // Step 2: Read and interpret bytes at the resolved offset according to the rule's type
303 let read_value = types::read_typed_value(buffer, absolute_offset, &rule.typ)
304 .map_err(|e| LibmagicError::EvaluationError(e.into()))?;
305
306 // Step 3: Coerce the rule's expected value to match the type's signedness/width
307 let expected_value = types::coerce_value_to_type(&rule.value, &rule.typ);
308
309 // Step 4: Apply the operator to compare the read value with the expected value
310 // BitwiseNot needs type-aware bit-width masking so the complement is computed
311 // at the type's natural width (e.g., byte NOT of 0x00 = 0xFF, not u64::MAX).
312 let matched = match &rule.op {
313 crate::parser::ast::Operator::BitwiseNot => operators::apply_bitwise_not_with_width(
314 &read_value,
315 &expected_value,
316 rule.typ.bit_width(),
317 ),
318 op => operators::apply_operator(op, &read_value, &expected_value),
319 };
320 Ok(matched.then_some((absolute_offset, read_value)))
321}
322
323/// Evaluate a list of magic rules against a file buffer with hierarchical processing
324///
325/// This function implements the core hierarchical rule evaluation algorithm with graceful
326/// error handling:
327/// 1. Evaluates each top-level rule in sequence
328/// 2. If a parent rule matches, evaluates its child rules for refinement
329/// 3. Collects all matches or stops at first match based on configuration
330/// 4. Maintains evaluation context for recursion limits and state
331/// 5. Implements graceful degradation by skipping problematic rules and continuing evaluation
332///
333/// The hierarchical evaluation follows these principles:
334/// - Parent rules must match before children are evaluated
335/// - Child rules provide refinement and additional detail
336/// - Evaluation can stop at first match or continue for all matches
337/// - Recursion depth is limited to prevent infinite loops
338/// - Problematic rules are skipped to allow evaluation to continue
339///
340/// # Arguments
341///
342/// * `rules` - The list of magic rules to evaluate
343/// * `buffer` - The file buffer to evaluate against
344/// * `context` - Mutable evaluation context for state management
345///
346/// # Returns
347///
348/// Returns `Ok(Vec<RuleMatch>)` containing all matches found. Errors in individual rules
349/// are logged and skipped to allow evaluation to continue. Only returns `Err(LibmagicError)`
350/// for critical failures like timeout or recursion limit exceeded.
351///
352/// # Examples
353///
354/// ```rust
355/// use libmagic_rs::evaluator::{evaluate_rules, EvaluationContext, RuleMatch};
356/// use libmagic_rs::parser::ast::{MagicRule, OffsetSpec, TypeKind, Operator, Value};
357/// use libmagic_rs::EvaluationConfig;
358///
359/// // Create a hierarchical rule set for ELF files
360/// let parent_rule = MagicRule {
361/// offset: OffsetSpec::Absolute(0),
362/// typ: TypeKind::Byte { signed: true },
363/// op: Operator::Equal,
364/// value: Value::Uint(0x7f),
365/// message: "ELF".to_string(),
366/// children: vec![
367/// MagicRule {
368/// offset: OffsetSpec::Absolute(4),
369/// typ: TypeKind::Byte { signed: true },
370/// op: Operator::Equal,
371/// value: Value::Uint(2),
372/// message: "64-bit".to_string(),
373/// children: vec![],
374/// level: 1,
375/// strength_modifier: None,
376/// }
377/// ],
378/// level: 0,
379/// strength_modifier: None,
380/// };
381///
382/// let rules = vec![parent_rule];
383/// let buffer = &[0x7f, 0x45, 0x4c, 0x46, 0x02, 0x01]; // ELF64 header
384/// let config = EvaluationConfig::default();
385/// let mut context = EvaluationContext::new(config);
386///
387/// let matches = evaluate_rules(&rules, buffer, &mut context).unwrap();
388/// assert_eq!(matches.len(), 2); // Parent and child should both match
389/// ```
390///
391/// # Errors
392///
393/// * `LibmagicError::Timeout` - If evaluation exceeds configured timeout
394/// * `LibmagicError::EvaluationError` - Only for critical failures like recursion limit exceeded
395///
396/// Individual rule evaluation errors are handled gracefully and do not stop the overall evaluation.
397pub fn evaluate_rules(
398 rules: &[MagicRule],
399 buffer: &[u8],
400 context: &mut EvaluationContext,
401) -> Result<Vec<RuleMatch>, LibmagicError> {
402 let mut matches = Vec::with_capacity(8);
403 let start_time = std::time::Instant::now();
404 let mut rule_count = 0u32;
405
406 for rule in rules {
407 // Check timeout periodically (every 16 rules) to reduce syscall overhead
408 rule_count = rule_count.wrapping_add(1);
409 if rule_count.trailing_zeros() >= 4
410 && let Some(timeout_ms) = context.timeout_ms()
411 && start_time.elapsed().as_millis() > u128::from(timeout_ms)
412 {
413 return Err(LibmagicError::Timeout { timeout_ms });
414 }
415
416 // Evaluate the current rule with graceful error handling
417 let match_data = match evaluate_single_rule(rule, buffer) {
418 Ok(data) => data,
419 Err(
420 LibmagicError::EvaluationError(
421 crate::error::EvaluationError::BufferOverrun { .. }
422 | crate::error::EvaluationError::InvalidOffset { .. }
423 | crate::error::EvaluationError::TypeReadError(_),
424 )
425 | LibmagicError::IoError(_),
426 ) => {
427 // Expected evaluation errors for individual rules -- skip gracefully
428 continue;
429 }
430 Err(e) => {
431 // Unexpected errors (InternalError, UnsupportedType, etc.) should propagate
432 return Err(e);
433 }
434 };
435
436 if let Some((absolute_offset, read_value)) = match_data {
437 let match_result = RuleMatch {
438 message: rule.message.clone(),
439 offset: absolute_offset,
440 level: rule.level,
441 value: read_value,
442 confidence: RuleMatch::calculate_confidence(rule.level),
443 };
444 matches.push(match_result);
445
446 // If this rule has children, evaluate them recursively
447 if !rule.children.is_empty() {
448 // Check recursion depth limit - this is a critical error that should stop evaluation
449 context.increment_recursion_depth()?;
450
451 // Recursively evaluate child rules with graceful error handling
452 match evaluate_rules(&rule.children, buffer, context) {
453 Ok(child_matches) => {
454 matches.extend(child_matches);
455 }
456 Err(LibmagicError::Timeout { .. }) => {
457 // Timeout is critical, propagate it up
458 context.decrement_recursion_depth()?;
459 return Err(LibmagicError::Timeout {
460 timeout_ms: context.timeout_ms().unwrap_or(0),
461 });
462 }
463 Err(LibmagicError::EvaluationError(
464 crate::error::EvaluationError::RecursionLimitExceeded { .. },
465 )) => {
466 // Recursion limit is critical, propagate it up
467 context.decrement_recursion_depth()?;
468 return Err(LibmagicError::EvaluationError(
469 crate::error::EvaluationError::RecursionLimitExceeded {
470 depth: context.recursion_depth(),
471 },
472 ));
473 }
474 Err(
475 LibmagicError::EvaluationError(
476 crate::error::EvaluationError::BufferOverrun { .. }
477 | crate::error::EvaluationError::InvalidOffset { .. }
478 | crate::error::EvaluationError::TypeReadError(_),
479 )
480 | LibmagicError::IoError(_),
481 ) => {
482 // Expected child evaluation errors -- skip gracefully
483 }
484 Err(e) => {
485 // Unexpected errors in children should propagate
486 context.decrement_recursion_depth()?;
487 return Err(e);
488 }
489 }
490
491 // Restore recursion depth
492 context.decrement_recursion_depth()?;
493 }
494
495 // Stop at first match if configured to do so
496 if context.should_stop_at_first_match() {
497 break;
498 }
499 }
500 }
501
502 Ok(matches)
503}
504
505/// Evaluate magic rules with a fresh context
506///
507/// This is a convenience function that creates a new evaluation context
508/// and evaluates the rules. Useful for simple evaluation scenarios.
509///
510/// # Arguments
511///
512/// * `rules` - The list of magic rules to evaluate
513/// * `buffer` - The file buffer to evaluate against
514/// * `config` - Configuration for evaluation behavior
515///
516/// # Returns
517///
518/// Returns `Ok(Vec<RuleMatch>)` containing all matches found, or `Err(LibmagicError)`
519/// if evaluation fails.
520///
521/// # Examples
522///
523/// ```rust
524/// use libmagic_rs::evaluator::{evaluate_rules_with_config, RuleMatch};
525/// use libmagic_rs::parser::ast::{MagicRule, OffsetSpec, TypeKind, Operator, Value};
526/// use libmagic_rs::EvaluationConfig;
527///
528/// let rule = MagicRule {
529/// offset: OffsetSpec::Absolute(0),
530/// typ: TypeKind::Byte { signed: true },
531/// op: Operator::Equal,
532/// value: Value::Uint(0x7f),
533/// message: "ELF magic".to_string(),
534/// children: vec![],
535/// level: 0,
536/// strength_modifier: None,
537/// };
538///
539/// let rules = vec![rule];
540/// let buffer = &[0x7f, 0x45, 0x4c, 0x46];
541/// let config = EvaluationConfig::default();
542///
543/// let matches = evaluate_rules_with_config(&rules, buffer, &config).unwrap();
544/// assert_eq!(matches.len(), 1);
545/// assert_eq!(matches[0].message, "ELF magic");
546/// ```
547///
548/// # Errors
549///
550/// * `LibmagicError::EvaluationError` - If rule evaluation fails
551/// * `LibmagicError::Timeout` - If evaluation exceeds configured timeout
552pub fn evaluate_rules_with_config(
553 rules: &[MagicRule],
554 buffer: &[u8],
555 config: &EvaluationConfig,
556) -> Result<Vec<RuleMatch>, LibmagicError> {
557 let mut context = EvaluationContext::new(config.clone());
558 evaluate_rules(rules, buffer, &mut context)
559}
560
561#[cfg(test)]
562mod tests;