libmagic_rs/evaluator/
mod.rs

1// Copyright (c) 2025-2026 the libmagic-rs contributors
2// SPDX-License-Identifier: Apache-2.0
3
4//! Rule evaluation engine
5//!
6//! This module contains the core evaluation logic for executing magic rules
7//! against file buffers to identify file types.
8
9use crate::parser::ast::MagicRule;
10use crate::{EvaluationConfig, LibmagicError};
11use serde::{Deserialize, Serialize};
12
13pub mod offset;
14pub mod operators;
15pub mod strength;
16pub mod types;
17
18/// Context for maintaining evaluation state during rule processing
19///
20/// The `EvaluationContext` tracks the current state of rule evaluation,
21/// including the current offset position, recursion depth for nested rules,
22/// and configuration settings that control evaluation behavior.
23///
24/// # Examples
25///
26/// ```rust
27/// use libmagic_rs::evaluator::EvaluationContext;
28/// use libmagic_rs::EvaluationConfig;
29///
30/// let config = EvaluationConfig::default();
31/// let context = EvaluationContext::new(config);
32///
33/// assert_eq!(context.current_offset(), 0);
34/// assert_eq!(context.recursion_depth(), 0);
35/// ```
36#[derive(Debug, Clone)]
37pub struct EvaluationContext {
38    /// Current offset position in the file buffer
39    current_offset: usize,
40    /// Current recursion depth for nested rule evaluation
41    recursion_depth: u32,
42    /// Configuration settings for evaluation behavior
43    config: EvaluationConfig,
44}
45
46impl EvaluationContext {
47    /// Create a new evaluation context with the given configuration
48    ///
49    /// # Arguments
50    ///
51    /// * `config` - Configuration settings for evaluation behavior
52    ///
53    /// # Examples
54    ///
55    /// ```rust
56    /// use libmagic_rs::evaluator::EvaluationContext;
57    /// use libmagic_rs::EvaluationConfig;
58    ///
59    /// let config = EvaluationConfig::default();
60    /// let context = EvaluationContext::new(config);
61    /// ```
62    #[must_use]
63    pub const fn new(config: EvaluationConfig) -> Self {
64        Self {
65            current_offset: 0,
66            recursion_depth: 0,
67            config,
68        }
69    }
70
71    /// Get the current offset position
72    ///
73    /// # Returns
74    ///
75    /// The current offset position in the file buffer
76    #[must_use]
77    pub const fn current_offset(&self) -> usize {
78        self.current_offset
79    }
80
81    /// Set the current offset position
82    ///
83    /// # Arguments
84    ///
85    /// * `offset` - The new offset position
86    pub fn set_current_offset(&mut self, offset: usize) {
87        self.current_offset = offset;
88    }
89
90    /// Get the current recursion depth
91    ///
92    /// # Returns
93    ///
94    /// The current recursion depth for nested rule evaluation
95    #[must_use]
96    pub const fn recursion_depth(&self) -> u32 {
97        self.recursion_depth
98    }
99
100    /// Increment the recursion depth
101    ///
102    /// # Returns
103    ///
104    /// `Ok(())` if the recursion depth is within limits, or `Err(LibmagicError)`
105    /// if the maximum recursion depth would be exceeded
106    ///
107    /// # Errors
108    ///
109    /// Returns `LibmagicError::EvaluationError` if incrementing would exceed
110    /// the maximum recursion depth configured in the evaluation config.
111    pub fn increment_recursion_depth(&mut self) -> Result<(), LibmagicError> {
112        if self.recursion_depth >= self.config.max_recursion_depth {
113            return Err(LibmagicError::EvaluationError(
114                crate::error::EvaluationError::recursion_limit_exceeded(self.recursion_depth),
115            ));
116        }
117        self.recursion_depth += 1;
118        Ok(())
119    }
120
121    /// Decrement the recursion depth
122    ///
123    /// # Errors
124    ///
125    /// Returns an error if the recursion depth is already 0, as this indicates
126    /// a programming error in the evaluation logic (mismatched increment/decrement calls).
127    pub fn decrement_recursion_depth(&mut self) -> Result<(), LibmagicError> {
128        if self.recursion_depth == 0 {
129            return Err(LibmagicError::EvaluationError(
130                crate::error::EvaluationError::internal_error(
131                    "Attempted to decrement recursion depth below 0",
132                ),
133            ));
134        }
135        self.recursion_depth -= 1;
136        Ok(())
137    }
138
139    /// Get a reference to the evaluation configuration
140    ///
141    /// # Returns
142    ///
143    /// A reference to the `EvaluationConfig` used by this context
144    #[must_use]
145    pub const fn config(&self) -> &EvaluationConfig {
146        &self.config
147    }
148
149    /// Check if evaluation should stop at the first match
150    ///
151    /// # Returns
152    ///
153    /// `true` if evaluation should stop at the first match, `false` otherwise
154    #[must_use]
155    pub const fn should_stop_at_first_match(&self) -> bool {
156        self.config.stop_at_first_match
157    }
158
159    /// Get the maximum string length allowed
160    ///
161    /// # Returns
162    ///
163    /// The maximum string length that should be read during evaluation
164    #[must_use]
165    pub const fn max_string_length(&self) -> usize {
166        self.config.max_string_length
167    }
168
169    /// Check if MIME type mapping is enabled
170    ///
171    /// # Returns
172    ///
173    /// `true` if MIME type mapping should be performed, `false` otherwise
174    #[must_use]
175    pub const fn enable_mime_types(&self) -> bool {
176        self.config.enable_mime_types
177    }
178
179    /// Get the evaluation timeout in milliseconds
180    ///
181    /// # Returns
182    ///
183    /// The timeout duration in milliseconds, or `None` if no timeout is set
184    #[must_use]
185    pub const fn timeout_ms(&self) -> Option<u64> {
186        self.config.timeout_ms
187    }
188
189    /// Reset the context to initial state while preserving configuration
190    ///
191    /// This resets the current offset and recursion depth to 0, but keeps
192    /// the same configuration settings.
193    pub fn reset(&mut self) {
194        self.current_offset = 0;
195        self.recursion_depth = 0;
196    }
197}
198
199/// Result of evaluating a magic rule
200///
201/// Contains information about a successful rule match, including the rule
202/// that matched and its associated message.
203#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
204pub struct RuleMatch {
205    /// The message associated with the matching rule
206    pub message: String,
207    /// The offset where the match occurred
208    pub offset: usize,
209    /// The rule level (depth in hierarchy)
210    pub level: u32,
211    /// The matched value
212    pub value: crate::parser::ast::Value,
213    /// Confidence score (0.0 to 1.0)
214    ///
215    /// Calculated based on match depth in the rule hierarchy.
216    /// Deeper matches indicate more specific file type identification
217    /// and thus higher confidence.
218    pub confidence: f64,
219}
220
221impl RuleMatch {
222    /// Calculate confidence score based on rule depth
223    ///
224    /// Formula: min(1.0, 0.3 + (level * 0.2))
225    /// - Level 0 (root): 0.3
226    /// - Level 1: 0.5
227    /// - Level 2: 0.7
228    /// - Level 3: 0.9
229    /// - Level 4+: 1.0 (capped)
230    ///
231    /// # Examples
232    ///
233    /// ```
234    /// use libmagic_rs::evaluator::RuleMatch;
235    ///
236    /// assert!((RuleMatch::calculate_confidence(0) - 0.3).abs() < 0.001);
237    /// assert!((RuleMatch::calculate_confidence(3) - 0.9).abs() < 0.001);
238    /// assert!((RuleMatch::calculate_confidence(10) - 1.0).abs() < 0.001);
239    /// ```
240    #[must_use]
241    pub fn calculate_confidence(level: u32) -> f64 {
242        (0.3 + (f64::from(level) * 0.2)).min(1.0)
243    }
244}
245
246/// Evaluate a single magic rule against a file buffer
247///
248/// This function performs the core rule evaluation by:
249/// 1. Resolving the rule's offset specification to an absolute position
250/// 2. Reading and interpreting bytes at that position according to the rule's type
251/// 3. Applying the rule's operator to compare the read value with the expected value
252///
253/// # Arguments
254///
255/// * `rule` - The magic rule to evaluate
256/// * `buffer` - The file buffer to evaluate against
257///
258/// # Returns
259///
260/// Returns `Ok(Some((offset, value)))` if the rule matches (with the resolved offset and
261/// read value), `Ok(None)` if it doesn't match, or `Err(LibmagicError)` if evaluation
262/// fails due to buffer access issues or other errors.
263///
264/// # Examples
265///
266/// ```rust
267/// use libmagic_rs::evaluator::evaluate_single_rule;
268/// use libmagic_rs::parser::ast::{MagicRule, OffsetSpec, TypeKind, Operator, Value};
269///
270/// // Create a rule to check for ELF magic bytes at offset 0
271/// let rule = MagicRule {
272///     offset: OffsetSpec::Absolute(0),
273///     typ: TypeKind::Byte { signed: true },
274///     op: Operator::Equal,
275///     value: Value::Uint(0x7f),
276///     message: "ELF magic".to_string(),
277///     children: vec![],
278///     level: 0,
279///     strength_modifier: None,
280/// };
281///
282/// let elf_buffer = &[0x7f, 0x45, 0x4c, 0x46]; // ELF magic bytes
283/// let result = evaluate_single_rule(&rule, elf_buffer).unwrap();
284/// assert!(result.is_some()); // Should match
285///
286/// let non_elf_buffer = &[0x50, 0x4b, 0x03, 0x04]; // ZIP magic bytes
287/// let result = evaluate_single_rule(&rule, non_elf_buffer).unwrap();
288/// assert!(result.is_none()); // Should not match
289/// ```
290///
291/// # Errors
292///
293/// * `LibmagicError::EvaluationError` - If offset resolution fails, buffer access is out of bounds,
294///   or type interpretation fails
295pub fn evaluate_single_rule(
296    rule: &MagicRule,
297    buffer: &[u8],
298) -> Result<Option<(usize, crate::parser::ast::Value)>, LibmagicError> {
299    // Step 1: Resolve the offset specification to an absolute position
300    let absolute_offset = offset::resolve_offset(&rule.offset, buffer)?;
301
302    // Step 2: Read and interpret bytes at the resolved offset according to the rule's type
303    let read_value = types::read_typed_value(buffer, absolute_offset, &rule.typ)
304        .map_err(|e| LibmagicError::EvaluationError(e.into()))?;
305
306    // Step 3: Coerce the rule's expected value to match the type's signedness/width
307    let expected_value = types::coerce_value_to_type(&rule.value, &rule.typ);
308
309    // Step 4: Apply the operator to compare the read value with the expected value
310    // BitwiseNot needs type-aware bit-width masking so the complement is computed
311    // at the type's natural width (e.g., byte NOT of 0x00 = 0xFF, not u64::MAX).
312    let matched = match &rule.op {
313        crate::parser::ast::Operator::BitwiseNot => operators::apply_bitwise_not_with_width(
314            &read_value,
315            &expected_value,
316            rule.typ.bit_width(),
317        ),
318        op => operators::apply_operator(op, &read_value, &expected_value),
319    };
320    Ok(matched.then_some((absolute_offset, read_value)))
321}
322
323/// Evaluate a list of magic rules against a file buffer with hierarchical processing
324///
325/// This function implements the core hierarchical rule evaluation algorithm with graceful
326/// error handling:
327/// 1. Evaluates each top-level rule in sequence
328/// 2. If a parent rule matches, evaluates its child rules for refinement
329/// 3. Collects all matches or stops at first match based on configuration
330/// 4. Maintains evaluation context for recursion limits and state
331/// 5. Implements graceful degradation by skipping problematic rules and continuing evaluation
332///
333/// The hierarchical evaluation follows these principles:
334/// - Parent rules must match before children are evaluated
335/// - Child rules provide refinement and additional detail
336/// - Evaluation can stop at first match or continue for all matches
337/// - Recursion depth is limited to prevent infinite loops
338/// - Problematic rules are skipped to allow evaluation to continue
339///
340/// # Arguments
341///
342/// * `rules` - The list of magic rules to evaluate
343/// * `buffer` - The file buffer to evaluate against
344/// * `context` - Mutable evaluation context for state management
345///
346/// # Returns
347///
348/// Returns `Ok(Vec<RuleMatch>)` containing all matches found. Errors in individual rules
349/// are logged and skipped to allow evaluation to continue. Only returns `Err(LibmagicError)`
350/// for critical failures like timeout or recursion limit exceeded.
351///
352/// # Examples
353///
354/// ```rust
355/// use libmagic_rs::evaluator::{evaluate_rules, EvaluationContext, RuleMatch};
356/// use libmagic_rs::parser::ast::{MagicRule, OffsetSpec, TypeKind, Operator, Value};
357/// use libmagic_rs::EvaluationConfig;
358///
359/// // Create a hierarchical rule set for ELF files
360/// let parent_rule = MagicRule {
361///     offset: OffsetSpec::Absolute(0),
362///     typ: TypeKind::Byte { signed: true },
363///     op: Operator::Equal,
364///     value: Value::Uint(0x7f),
365///     message: "ELF".to_string(),
366///     children: vec![
367///         MagicRule {
368///             offset: OffsetSpec::Absolute(4),
369///             typ: TypeKind::Byte { signed: true },
370///             op: Operator::Equal,
371///             value: Value::Uint(2),
372///             message: "64-bit".to_string(),
373///             children: vec![],
374///             level: 1,
375///             strength_modifier: None,
376///         }
377///     ],
378///     level: 0,
379///     strength_modifier: None,
380/// };
381///
382/// let rules = vec![parent_rule];
383/// let buffer = &[0x7f, 0x45, 0x4c, 0x46, 0x02, 0x01]; // ELF64 header
384/// let config = EvaluationConfig::default();
385/// let mut context = EvaluationContext::new(config);
386///
387/// let matches = evaluate_rules(&rules, buffer, &mut context).unwrap();
388/// assert_eq!(matches.len(), 2); // Parent and child should both match
389/// ```
390///
391/// # Errors
392///
393/// * `LibmagicError::Timeout` - If evaluation exceeds configured timeout
394/// * `LibmagicError::EvaluationError` - Only for critical failures like recursion limit exceeded
395///
396/// Individual rule evaluation errors are handled gracefully and do not stop the overall evaluation.
397pub fn evaluate_rules(
398    rules: &[MagicRule],
399    buffer: &[u8],
400    context: &mut EvaluationContext,
401) -> Result<Vec<RuleMatch>, LibmagicError> {
402    let mut matches = Vec::with_capacity(8);
403    let start_time = std::time::Instant::now();
404    let mut rule_count = 0u32;
405
406    for rule in rules {
407        // Check timeout periodically (every 16 rules) to reduce syscall overhead
408        rule_count = rule_count.wrapping_add(1);
409        if rule_count.trailing_zeros() >= 4
410            && let Some(timeout_ms) = context.timeout_ms()
411            && start_time.elapsed().as_millis() > u128::from(timeout_ms)
412        {
413            return Err(LibmagicError::Timeout { timeout_ms });
414        }
415
416        // Evaluate the current rule with graceful error handling
417        let match_data = match evaluate_single_rule(rule, buffer) {
418            Ok(data) => data,
419            Err(
420                LibmagicError::EvaluationError(
421                    crate::error::EvaluationError::BufferOverrun { .. }
422                    | crate::error::EvaluationError::InvalidOffset { .. }
423                    | crate::error::EvaluationError::TypeReadError(_),
424                )
425                | LibmagicError::IoError(_),
426            ) => {
427                // Expected evaluation errors for individual rules -- skip gracefully
428                continue;
429            }
430            Err(e) => {
431                // Unexpected errors (InternalError, UnsupportedType, etc.) should propagate
432                return Err(e);
433            }
434        };
435
436        if let Some((absolute_offset, read_value)) = match_data {
437            let match_result = RuleMatch {
438                message: rule.message.clone(),
439                offset: absolute_offset,
440                level: rule.level,
441                value: read_value,
442                confidence: RuleMatch::calculate_confidence(rule.level),
443            };
444            matches.push(match_result);
445
446            // If this rule has children, evaluate them recursively
447            if !rule.children.is_empty() {
448                // Check recursion depth limit - this is a critical error that should stop evaluation
449                context.increment_recursion_depth()?;
450
451                // Recursively evaluate child rules with graceful error handling
452                match evaluate_rules(&rule.children, buffer, context) {
453                    Ok(child_matches) => {
454                        matches.extend(child_matches);
455                    }
456                    Err(LibmagicError::Timeout { .. }) => {
457                        // Timeout is critical, propagate it up
458                        context.decrement_recursion_depth()?;
459                        return Err(LibmagicError::Timeout {
460                            timeout_ms: context.timeout_ms().unwrap_or(0),
461                        });
462                    }
463                    Err(LibmagicError::EvaluationError(
464                        crate::error::EvaluationError::RecursionLimitExceeded { .. },
465                    )) => {
466                        // Recursion limit is critical, propagate it up
467                        context.decrement_recursion_depth()?;
468                        return Err(LibmagicError::EvaluationError(
469                            crate::error::EvaluationError::RecursionLimitExceeded {
470                                depth: context.recursion_depth(),
471                            },
472                        ));
473                    }
474                    Err(
475                        LibmagicError::EvaluationError(
476                            crate::error::EvaluationError::BufferOverrun { .. }
477                            | crate::error::EvaluationError::InvalidOffset { .. }
478                            | crate::error::EvaluationError::TypeReadError(_),
479                        )
480                        | LibmagicError::IoError(_),
481                    ) => {
482                        // Expected child evaluation errors -- skip gracefully
483                    }
484                    Err(e) => {
485                        // Unexpected errors in children should propagate
486                        context.decrement_recursion_depth()?;
487                        return Err(e);
488                    }
489                }
490
491                // Restore recursion depth
492                context.decrement_recursion_depth()?;
493            }
494
495            // Stop at first match if configured to do so
496            if context.should_stop_at_first_match() {
497                break;
498            }
499        }
500    }
501
502    Ok(matches)
503}
504
505/// Evaluate magic rules with a fresh context
506///
507/// This is a convenience function that creates a new evaluation context
508/// and evaluates the rules. Useful for simple evaluation scenarios.
509///
510/// # Arguments
511///
512/// * `rules` - The list of magic rules to evaluate
513/// * `buffer` - The file buffer to evaluate against
514/// * `config` - Configuration for evaluation behavior
515///
516/// # Returns
517///
518/// Returns `Ok(Vec<RuleMatch>)` containing all matches found, or `Err(LibmagicError)`
519/// if evaluation fails.
520///
521/// # Examples
522///
523/// ```rust
524/// use libmagic_rs::evaluator::{evaluate_rules_with_config, RuleMatch};
525/// use libmagic_rs::parser::ast::{MagicRule, OffsetSpec, TypeKind, Operator, Value};
526/// use libmagic_rs::EvaluationConfig;
527///
528/// let rule = MagicRule {
529///     offset: OffsetSpec::Absolute(0),
530///     typ: TypeKind::Byte { signed: true },
531///     op: Operator::Equal,
532///     value: Value::Uint(0x7f),
533///     message: "ELF magic".to_string(),
534///     children: vec![],
535///     level: 0,
536///     strength_modifier: None,
537/// };
538///
539/// let rules = vec![rule];
540/// let buffer = &[0x7f, 0x45, 0x4c, 0x46];
541/// let config = EvaluationConfig::default();
542///
543/// let matches = evaluate_rules_with_config(&rules, buffer, &config).unwrap();
544/// assert_eq!(matches.len(), 1);
545/// assert_eq!(matches[0].message, "ELF magic");
546/// ```
547///
548/// # Errors
549///
550/// * `LibmagicError::EvaluationError` - If rule evaluation fails
551/// * `LibmagicError::Timeout` - If evaluation exceeds configured timeout
552pub fn evaluate_rules_with_config(
553    rules: &[MagicRule],
554    buffer: &[u8],
555    config: &EvaluationConfig,
556) -> Result<Vec<RuleMatch>, LibmagicError> {
557    let mut context = EvaluationContext::new(config.clone());
558    evaluate_rules(rules, buffer, &mut context)
559}
560
561#[cfg(test)]
562mod tests;
libmagic_rs/evaluator/mod.rs

libmagic_rs/evaluator/
mod.rs