#![deny(missing_docs)]
#![deny(unsafe_code)]
#![deny(clippy::all)]
#![warn(clippy::pedantic)]
use std::path::{Path, PathBuf};
use serde::{Deserialize, Serialize};
pub mod builtin_rules;
pub mod error;
pub mod evaluator;
pub mod io;
pub mod mime;
pub mod output;
pub mod parser;
pub mod tags;
#[cfg(any(test, doc))]
pub mod build_helpers;
pub use parser::ast::{
Endianness, MagicRule, OffsetSpec, Operator, StrengthModifier, TypeKind, Value,
};
pub use evaluator::{EvaluationContext, RuleMatch};
pub use error::{EvaluationError, LibmagicError, ParseError};
pub type Result<T> = std::result::Result<T, LibmagicError>;
impl From<crate::io::IoError> for LibmagicError {
fn from(err: crate::io::IoError) -> Self {
LibmagicError::FileError(err.to_string())
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct EvaluationConfig {
pub max_recursion_depth: u32,
pub max_string_length: usize,
pub stop_at_first_match: bool,
pub enable_mime_types: bool,
pub timeout_ms: Option<u64>,
}
impl Default for EvaluationConfig {
fn default() -> Self {
Self {
max_recursion_depth: 20,
max_string_length: 8192,
stop_at_first_match: true,
enable_mime_types: false,
timeout_ms: None,
}
}
}
impl EvaluationConfig {
#[must_use]
pub fn new() -> Self {
Self::default()
}
#[must_use]
pub const fn performance() -> Self {
Self {
max_recursion_depth: 10,
max_string_length: 1024,
stop_at_first_match: true,
enable_mime_types: false,
timeout_ms: Some(1000), }
}
#[must_use]
pub const fn comprehensive() -> Self {
Self {
max_recursion_depth: 50,
max_string_length: 32768,
stop_at_first_match: false,
enable_mime_types: true,
timeout_ms: Some(30000), }
}
pub fn validate(&self) -> Result<()> {
self.validate_recursion_depth()?;
self.validate_string_length()?;
self.validate_timeout()?;
self.validate_resource_combination()?;
Ok(())
}
fn validate_recursion_depth(&self) -> Result<()> {
const MAX_SAFE_RECURSION_DEPTH: u32 = 1000;
if self.max_recursion_depth == 0 {
return Err(LibmagicError::ConfigError {
reason: "max_recursion_depth must be greater than 0".to_string(),
});
}
if self.max_recursion_depth > MAX_SAFE_RECURSION_DEPTH {
return Err(LibmagicError::ConfigError {
reason: format!(
"max_recursion_depth must not exceed {MAX_SAFE_RECURSION_DEPTH} to prevent stack overflow"
),
});
}
Ok(())
}
fn validate_string_length(&self) -> Result<()> {
const MAX_SAFE_STRING_LENGTH: usize = 1_048_576;
if self.max_string_length == 0 {
return Err(LibmagicError::ConfigError {
reason: "max_string_length must be greater than 0".to_string(),
});
}
if self.max_string_length > MAX_SAFE_STRING_LENGTH {
return Err(LibmagicError::ConfigError {
reason: format!(
"max_string_length must not exceed {MAX_SAFE_STRING_LENGTH} bytes to prevent memory exhaustion"
),
});
}
Ok(())
}
fn validate_timeout(&self) -> Result<()> {
const MAX_SAFE_TIMEOUT_MS: u64 = 300_000;
if let Some(timeout) = self.timeout_ms {
if timeout == 0 {
return Err(LibmagicError::ConfigError {
reason: "timeout_ms must be greater than 0 if specified".to_string(),
});
}
if timeout > MAX_SAFE_TIMEOUT_MS {
return Err(LibmagicError::ConfigError {
reason: format!(
"timeout_ms must not exceed {MAX_SAFE_TIMEOUT_MS} (5 minutes) to prevent denial of service"
),
});
}
}
Ok(())
}
fn validate_resource_combination(&self) -> Result<()> {
const HIGH_RECURSION_THRESHOLD: u32 = 100;
const LARGE_STRING_THRESHOLD: usize = 65536;
if self.max_recursion_depth > HIGH_RECURSION_THRESHOLD
&& self.max_string_length > LARGE_STRING_THRESHOLD
{
return Err(LibmagicError::ConfigError {
reason: format!(
"High recursion depth (>{HIGH_RECURSION_THRESHOLD}) combined with large string length (>{LARGE_STRING_THRESHOLD}) may cause resource exhaustion"
),
});
}
Ok(())
}
}
#[derive(Debug)]
pub struct MagicDatabase {
rules: Vec<MagicRule>,
config: EvaluationConfig,
source_path: Option<PathBuf>,
mime_mapper: mime::MimeMapper,
}
impl MagicDatabase {
pub fn with_builtin_rules() -> Result<Self> {
Self::with_builtin_rules_and_config(EvaluationConfig::default())
}
pub fn with_builtin_rules_and_config(config: EvaluationConfig) -> Result<Self> {
config.validate()?;
Ok(Self {
rules: crate::builtin_rules::get_builtin_rules(),
config,
source_path: None,
mime_mapper: mime::MimeMapper::new(),
})
}
pub fn load_from_file<P: AsRef<Path>>(path: P) -> Result<Self> {
Self::load_from_file_with_config(path, EvaluationConfig::default())
}
pub fn load_from_file_with_config<P: AsRef<Path>>(
path: P,
config: EvaluationConfig,
) -> Result<Self> {
config.validate()?;
let rules = parser::load_magic_file(path.as_ref()).map_err(|e| match e {
ParseError::IoError(io_err) => LibmagicError::IoError(io_err),
other => LibmagicError::ParseError(other),
})?;
Ok(Self {
rules,
config,
source_path: Some(path.as_ref().to_path_buf()),
mime_mapper: mime::MimeMapper::new(),
})
}
pub fn evaluate_file<P: AsRef<Path>>(&self, path: P) -> Result<EvaluationResult> {
use crate::evaluator::evaluate_rules_with_config;
use crate::io::FileBuffer;
use std::fs;
use std::time::Instant;
let start_time = Instant::now();
let path = path.as_ref();
let file_metadata = fs::metadata(path)?;
let file_size = file_metadata.len();
if file_size == 0 {
let mut result = self.evaluate_buffer_internal(b"", start_time)?;
result.metadata.file_size = 0;
result.metadata.magic_file.clone_from(&self.source_path);
return Ok(result);
}
let file_buffer = FileBuffer::new(path)?;
let buffer = file_buffer.as_slice();
let matches = if self.rules.is_empty() {
vec![]
} else {
evaluate_rules_with_config(&self.rules, buffer, &self.config)?
};
Ok(self.build_result(matches, file_size, start_time))
}
pub fn evaluate_buffer(&self, buffer: &[u8]) -> Result<EvaluationResult> {
use std::time::Instant;
self.evaluate_buffer_internal(buffer, Instant::now())
}
fn evaluate_buffer_internal(
&self,
buffer: &[u8],
start_time: std::time::Instant,
) -> Result<EvaluationResult> {
use crate::evaluator::evaluate_rules_with_config;
let file_size = buffer.len() as u64;
let matches = if self.rules.is_empty() {
vec![]
} else {
evaluate_rules_with_config(&self.rules, buffer, &self.config)?
};
Ok(self.build_result(matches, file_size, start_time))
}
fn build_result(
&self,
matches: Vec<evaluator::RuleMatch>,
file_size: u64,
start_time: std::time::Instant,
) -> EvaluationResult {
let (description, confidence) = if matches.is_empty() {
("data".to_string(), 0.0)
} else {
(
Self::concatenate_messages(&matches),
matches.first().map_or(0.0, |m| m.confidence),
)
};
let mime_type = if self.config.enable_mime_types {
self.mime_mapper.get_mime_type(&description)
} else {
None
};
EvaluationResult {
description,
mime_type,
confidence,
matches,
metadata: EvaluationMetadata {
file_size,
evaluation_time_ms: start_time.elapsed().as_secs_f64() * 1000.0,
rules_evaluated: self.rules.len(),
magic_file: self.source_path.clone(),
timed_out: false,
},
}
}
fn concatenate_messages(matches: &[evaluator::RuleMatch]) -> String {
let capacity: usize = matches.iter().map(|m| m.message.len() + 1).sum();
let mut result = String::with_capacity(capacity);
for m in matches {
if let Some(rest) = m.message.strip_prefix('\u{0008}') {
result.push_str(rest);
} else if !result.is_empty() {
result.push(' ');
result.push_str(&m.message);
} else {
result.push_str(&m.message);
}
}
result
}
#[must_use]
pub fn config(&self) -> &EvaluationConfig {
&self.config
}
#[must_use]
pub fn source_path(&self) -> Option<&Path> {
self.source_path.as_deref()
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct EvaluationMetadata {
pub file_size: u64,
pub evaluation_time_ms: f64,
pub rules_evaluated: usize,
pub magic_file: Option<PathBuf>,
pub timed_out: bool,
}
impl Default for EvaluationMetadata {
fn default() -> Self {
Self {
file_size: 0,
evaluation_time_ms: 0.0,
rules_evaluated: 0,
magic_file: None,
timed_out: false,
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct EvaluationResult {
pub description: String,
pub mime_type: Option<String>,
pub confidence: f64,
pub matches: Vec<evaluator::RuleMatch>,
pub metadata: EvaluationMetadata,
}
#[cfg(test)]
mod tests;