use log::warn;
use crate::error::ParseError;
use crate::parser::ParsedMagic;
use crate::parser::name_table::NameTable;
use std::path::{Path, PathBuf};
use super::format::{MagicFileFormat, detect_format};
pub const MAX_MAGIC_FILE_SIZE: u64 = 1024 * 1024 * 1024;
fn read_magic_file_bounded(path: &Path) -> Result<String, ParseError> {
let metadata = std::fs::metadata(path).map_err(|e| {
ParseError::IoError(std::io::Error::new(
e.kind(),
format!("Failed to read metadata for '{}': {}", path.display(), e),
))
})?;
if metadata.len() > MAX_MAGIC_FILE_SIZE {
return Err(ParseError::invalid_syntax(
0,
format!(
"Magic file '{}' is too large: {} bytes (maximum allowed: {} bytes)",
path.display(),
metadata.len(),
MAX_MAGIC_FILE_SIZE
),
));
}
let bytes = std::fs::read(path).map_err(ParseError::from)?;
match String::from_utf8(bytes) {
Ok(s) => Ok(s),
Err(e) => {
warn!(
"Magic file '{}' contains non-UTF-8 bytes; they were replaced with U+FFFD. \
Rule parsing proceeds, but replacements inside rule bodies may alter matching.",
path.display()
);
Ok(String::from_utf8_lossy(&e.into_bytes()).into_owned())
}
}
}
pub fn load_magic_directory(dir_path: &Path) -> Result<ParsedMagic, ParseError> {
use std::fs;
let entries = fs::read_dir(dir_path).map_err(|e| {
ParseError::invalid_syntax(
0,
format!("Failed to read directory '{}': {}", dir_path.display(), e),
)
})?;
let mut file_paths: Vec<std::path::PathBuf> = Vec::new();
for entry in entries {
let entry = entry.map_err(|e| {
ParseError::invalid_syntax(
0,
format!(
"Failed to read directory entry in '{}': {}",
dir_path.display(),
e
),
)
})?;
let path = entry.path();
let file_type = entry.file_type().map_err(|e| {
ParseError::invalid_syntax(
0,
format!("Failed to read file type for '{}': {}", path.display(), e),
)
})?;
if file_type.is_file() && !file_type.is_symlink() {
file_paths.push(path);
}
}
file_paths.sort_by_key(|path| path.file_name().map(std::ffi::OsStr::to_os_string));
let mut all_rules = Vec::new();
let mut merged_table = NameTable::empty();
let mut parse_failures: Vec<(PathBuf, ParseError)> = Vec::new();
let mut any_success = false;
let file_count = file_paths.len();
for path in file_paths {
let contents = match read_magic_file_bounded(&path) {
Ok(contents) => contents,
Err(e) => {
return Err(ParseError::invalid_syntax(
0,
format!("Failed to read file '{}': {}", path.display(), e),
));
}
};
match super::parse_text_magic_file(&contents) {
Ok(parsed) => {
any_success = true;
all_rules.extend(parsed.rules);
merged_table.merge(parsed.name_table);
}
Err(e) => {
parse_failures.push((path, e));
}
}
}
if !any_success && !parse_failures.is_empty() {
use std::fmt::Write;
let failure_details: Vec<String> = parse_failures
.iter()
.take(3) .map(|(path, e)| format!(" - {}: {}", path.display(), e))
.collect();
let mut message = format!("All {file_count} magic file(s) in directory failed to parse");
if !failure_details.is_empty() {
message.push_str(":\n");
message.push_str(&failure_details.join("\n"));
if parse_failures.len() > 3 {
let _ = write!(message, "\n ... and {} more", parse_failures.len() - 3);
}
}
return Err(ParseError::invalid_syntax(0, message));
}
for (path, e) in &parse_failures {
warn!("Failed to parse '{}': {}", path.display(), e);
}
Ok(ParsedMagic {
rules: all_rules,
name_table: merged_table,
})
}
pub fn load_magic_file(path: &Path) -> Result<ParsedMagic, ParseError> {
let format = detect_format(path)?;
match format {
MagicFileFormat::Text => {
let content = read_magic_file_bounded(path)?;
super::parse_text_magic_file(&content)
}
MagicFileFormat::Directory => {
load_magic_directory(path)
}
MagicFileFormat::Binary => {
Err(ParseError::unsupported_format(
0,
"binary .mgc file",
"Binary compiled magic files (.mgc) are not supported for parsing.\n\
Use the --use-builtin option to use the built-in magic rules instead,\n\
or provide a text-based magic file or directory.",
))
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_load_directory_critical_error_io() {
use std::path::Path;
let non_existent = Path::new("/this/should/not/exist/anywhere/at/all");
let result = load_magic_directory(non_existent);
assert!(
result.is_err(),
"Should return error for non-existent directory"
);
let err = result.unwrap_err();
assert!(err.to_string().contains("Failed to read directory"));
}
#[test]
fn test_load_directory_non_critical_error_parse() {
use std::fs;
use tempfile::TempDir;
let temp_dir = TempDir::new().expect("Failed to create temp dir");
let valid_path = temp_dir.path().join("valid.magic");
fs::write(&valid_path, "0 string \\x01\\x02 valid\n").expect("Failed to write valid file");
let invalid_path = temp_dir.path().join("invalid.magic");
fs::write(&invalid_path, "this is invalid syntax\n").expect("Failed to write invalid file");
let parsed = load_magic_directory(temp_dir.path()).expect("Should load valid files");
assert_eq!(parsed.rules.len(), 1, "Should load only valid file");
assert_eq!(parsed.rules[0].message, "valid");
}
#[test]
fn test_load_directory_empty_files() {
use std::fs;
use tempfile::TempDir;
let temp_dir = TempDir::new().expect("Failed to create temp dir");
let empty_path = temp_dir.path().join("empty.magic");
fs::write(&empty_path, "").expect("Failed to write empty file");
let comments_path = temp_dir.path().join("comments.magic");
fs::write(&comments_path, "# Just comments\n# Nothing else\n")
.expect("Failed to write comments file");
let parsed = load_magic_directory(temp_dir.path()).expect("Should handle empty files");
assert_eq!(
parsed.rules.len(),
0,
"Empty files should contribute no rules"
);
}
#[test]
fn test_load_directory_binary_files() {
use std::fs;
use tempfile::TempDir;
let temp_dir = TempDir::new().expect("Failed to create temp dir");
let binary_path = temp_dir.path().join("binary.dat");
fs::write(&binary_path, [0xFF, 0xFE, 0xFF, 0xFE]).expect("Failed to write binary file");
let valid_path = temp_dir.path().join("valid.magic");
fs::write(&valid_path, "0 string \\x01\\x02 valid\n").expect("Failed to write valid file");
let parsed = load_magic_directory(temp_dir.path())
.expect("Directory with a binary file alongside a valid file should still load");
assert_eq!(
parsed.rules.len(),
1,
"Only the valid magic file should contribute rules"
);
assert_eq!(parsed.rules[0].message, "valid");
}
#[test]
fn test_load_directory_mixed_extensions() {
use std::fs;
use tempfile::TempDir;
let temp_dir = TempDir::new().expect("Failed to create temp dir");
fs::write(
temp_dir.path().join("file.magic"),
"0 string \\x01\\x02 magic\n",
)
.expect("Failed to write .magic file");
fs::write(
temp_dir.path().join("file.txt"),
"0 string \\x03\\x04 txt\n",
)
.expect("Failed to write .txt file");
fs::write(temp_dir.path().join("noext"), "0 string \\x05\\x06 noext\n")
.expect("Failed to write no-ext file");
let parsed = load_magic_directory(temp_dir.path())
.expect("Should load all files regardless of extension");
assert_eq!(
parsed.rules.len(),
3,
"Should process all files regardless of extension"
);
let messages: Vec<&str> = parsed.rules.iter().map(|r| r.message.as_str()).collect();
assert!(messages.contains(&"magic"));
assert!(messages.contains(&"txt"));
assert!(messages.contains(&"noext"));
}
#[test]
fn test_load_directory_alphabetical_ordering() {
use std::fs;
use tempfile::TempDir;
let temp_dir = TempDir::new().expect("Failed to create temp dir");
fs::write(
temp_dir.path().join("03-third"),
"0 string \\x07\\x08\\x09 third\n",
)
.expect("Failed to write third file");
fs::write(
temp_dir.path().join("01-first"),
"0 string \\x01\\x02\\x03 first\n",
)
.expect("Failed to write first file");
fs::write(
temp_dir.path().join("02-second"),
"0 string \\x04\\x05\\x06 second\n",
)
.expect("Failed to write second file");
let parsed = load_magic_directory(temp_dir.path()).expect("Should load directory in order");
assert_eq!(parsed.rules.len(), 3);
assert_eq!(parsed.rules[0].message, "first");
assert_eq!(parsed.rules[1].message, "second");
assert_eq!(parsed.rules[2].message, "third");
}
#[test]
fn test_load_magic_file_text_format() {
use std::fs;
use tempfile::TempDir;
let temp_dir = TempDir::new().expect("Failed to create temp dir");
let magic_file = temp_dir.path().join("magic.txt");
fs::write(&magic_file, "0 string \\x7fELF ELF executable\n")
.expect("Failed to write magic file");
let parsed = load_magic_file(&magic_file).expect("Failed to load text magic file");
assert_eq!(parsed.rules.len(), 1);
assert_eq!(parsed.rules[0].message, "ELF executable");
}
#[test]
fn test_load_magic_file_directory_format() {
use std::fs;
use tempfile::TempDir;
let temp_dir = TempDir::new().expect("Failed to create temp dir");
let magic_dir = temp_dir.path().join("magic.d");
fs::create_dir(&magic_dir).expect("Failed to create magic directory");
fs::write(
magic_dir.join("00_elf"),
"0 string \\x7fELF ELF executable\n",
)
.expect("Failed to write elf file");
fs::write(
magic_dir.join("01_zip"),
"0 string \\x50\\x4b\\x03\\x04 ZIP archive\n",
)
.expect("Failed to write zip file");
let parsed = load_magic_file(&magic_dir).expect("Failed to load directory");
assert_eq!(parsed.rules.len(), 2);
assert_eq!(parsed.rules[0].message, "ELF executable");
assert_eq!(parsed.rules[1].message, "ZIP archive");
}
#[test]
fn test_load_magic_file_binary_format_error() {
use std::fs::File;
use std::io::Write;
use tempfile::TempDir;
let temp_dir = TempDir::new().expect("Failed to create temp dir");
let binary_file = temp_dir.path().join("magic.mgc");
let mut file = File::create(&binary_file).expect("Failed to create binary file");
let magic_number: [u8; 4] = [0x1C, 0x04, 0x1E, 0xF1]; file.write_all(&magic_number)
.expect("Failed to write magic number");
let result = load_magic_file(&binary_file);
assert!(result.is_err(), "Should fail to load binary .mgc file");
let error = result.unwrap_err();
let error_msg = error.to_string();
assert!(
error_msg.contains("Binary") || error_msg.contains("binary"),
"Error should mention binary format: {error_msg}",
);
assert!(
error_msg.contains("--use-builtin") || error_msg.contains("built-in"),
"Error should mention --use-builtin option: {error_msg}",
);
}
#[test]
fn test_load_magic_file_io_error() {
use std::path::Path;
let non_existent = Path::new("/this/path/should/not/exist/magic.txt");
let result = load_magic_file(non_existent);
assert!(result.is_err(), "Should fail for non-existent file");
}
#[test]
fn test_load_magic_file_parse_error_propagation() {
use std::fs;
use tempfile::TempDir;
let temp_dir = TempDir::new().expect("Failed to create temp dir");
let invalid_file = temp_dir.path().join("invalid.magic");
fs::write(&invalid_file, "string test invalid\n").expect("Failed to write invalid file");
let result = load_magic_file(&invalid_file);
assert!(result.is_err(), "Should fail for file with parse errors");
let error = result.unwrap_err();
let error_msg = format!("{error:?}");
assert!(
error_msg.contains("InvalidSyntax") || error_msg.contains("syntax"),
"Error should be parse error: {error_msg}",
);
}
#[test]
fn test_max_magic_file_size_matches_file_buffer_limit() {
assert_eq!(
MAX_MAGIC_FILE_SIZE,
crate::io::FileBuffer::MAX_FILE_SIZE,
"MAX_MAGIC_FILE_SIZE must match FileBuffer::MAX_FILE_SIZE"
);
}
#[test]
fn test_load_magic_file_rejects_oversized_file() {
use std::fs::File;
use tempfile::TempDir;
let temp_dir = TempDir::new().expect("Failed to create temp dir");
let oversized = temp_dir.path().join("huge.magic");
let file = File::create(&oversized).expect("Failed to create oversized file");
file.set_len(MAX_MAGIC_FILE_SIZE + 1)
.expect("Failed to set sparse file length");
drop(file);
let result = load_magic_file(&oversized);
assert!(
result.is_err(),
"Loading a file larger than MAX_MAGIC_FILE_SIZE must fail"
);
let err_msg = result.unwrap_err().to_string();
assert!(
err_msg.contains("too large"),
"Error should indicate size limit violation, got: {err_msg}"
);
assert!(
err_msg.contains(&MAX_MAGIC_FILE_SIZE.to_string()),
"Error should mention the maximum allowed size, got: {err_msg}"
);
}
#[test]
fn test_load_magic_file_tolerates_non_utf8_in_comment() {
use std::fs;
use tempfile::TempDir;
let temp_dir = TempDir::new().expect("Failed to create temp dir");
let magic_path = temp_dir.path().join("with-latin1-comment.magic");
let mut bytes: Vec<u8> = Vec::new();
bytes.extend_from_slice(b"# From: Thomas Wei");
bytes.push(0xdf); bytes.extend_from_slice(b"schuh <thomas@example.invalid>\n");
bytes.extend_from_slice(b"0 string \\x7fELF ELF executable\n");
fs::write(&magic_path, &bytes).expect("Failed to write magic file with non-UTF-8 byte");
let parsed = load_magic_file(&magic_path)
.expect("Magic file with non-UTF-8 bytes in a comment must still load");
assert_eq!(
parsed.rules.len(),
1,
"The ELF rule should be parsed; the comment is stripped"
);
assert_eq!(parsed.rules[0].message, "ELF executable");
}
#[test]
fn test_load_directory_merges_name_tables() {
use std::fs;
use tempfile::TempDir;
let temp_dir = TempDir::new().expect("Failed to create temp dir");
fs::write(
temp_dir.path().join("00_first"),
"0 name sub_a\n>0 byte 1 a-body\n",
)
.expect("Failed to write sub_a file");
fs::write(
temp_dir.path().join("01_second"),
"0 name sub_b\n>0 byte 2 b-body\n",
)
.expect("Failed to write sub_b file");
let parsed =
load_magic_directory(temp_dir.path()).expect("Should load both name subroutines");
assert_eq!(parsed.rules.len(), 0);
assert!(parsed.name_table.get("sub_a").is_some());
assert!(parsed.name_table.get("sub_b").is_some());
}
}