libmagic_rs/parser/loader.rs
1// Copyright (c) 2025-2026 the libmagic-rs contributors
2// SPDX-License-Identifier: Apache-2.0
3
4//! File and directory loading for magic files.
5//!
6//! Provides functions for loading magic rules from individual files and
7//! directories, with automatic format detection and error handling.
8
9use crate::error::ParseError;
10use crate::parser::ast::MagicRule;
11use std::path::{Path, PathBuf};
12
13use super::format::{MagicFileFormat, detect_format};
14
15/// Loads and parses all magic files from a directory, merging them into a single rule set.
16///
17/// This function reads all regular files in the specified directory, parses each as a magic file,
18/// and combines the resulting rules into a single `Vec<MagicRule>`. Files are processed in
19/// alphabetical order by filename to ensure deterministic results.
20///
21/// # Error Handling Strategy
22///
23/// This function distinguishes between critical and non-critical errors:
24///
25/// - **Critical errors** (I/O failures, directory access issues, encoding problems):
26/// These cause immediate failure and return a `ParseError`. The function stops processing
27/// and propagates the error to the caller.
28///
29/// - **Non-critical errors** (individual file parse failures):
30/// These are logged to stderr with a warning message and the file is skipped. Processing
31/// continues with remaining files.
32///
33/// # Behavior
34///
35/// - Subdirectories are skipped (not recursively processed)
36/// - Symbolic links are skipped
37/// - Empty directories return an empty rules vector
38/// - Files are processed in alphabetical order by filename
39/// - All successfully parsed rules are merged in order
40///
41/// # Examples
42///
43/// Loading a directory of magic files:
44///
45/// ```rust,no_run
46/// use libmagic_rs::parser::load_magic_directory;
47/// use std::path::Path;
48///
49/// let rules = load_magic_directory(Path::new("/usr/share/file/magic.d"))?;
50/// println!("Loaded {} rules from directory", rules.len());
51/// # Ok::<(), libmagic_rs::ParseError>(())
52/// ```
53///
54/// Creating a Magdir-style directory structure:
55///
56/// ```rust,no_run
57/// use libmagic_rs::parser::load_magic_directory;
58/// use std::path::Path;
59///
60/// // Directory structure:
61/// // magic.d/
62/// // ├── 01-elf
63/// // ├── 02-archive
64/// // └── 03-text
65///
66/// let rules = load_magic_directory(Path::new("./magic.d"))?;
67/// // Rules from all three files are merged in alphabetical order
68/// # Ok::<(), libmagic_rs::ParseError>(())
69/// ```
70///
71/// # Errors
72///
73/// Returns `ParseError` if:
74/// - The directory does not exist or cannot be accessed
75/// - Directory entries cannot be read
76/// - A file cannot be read due to I/O errors
77/// - A file contains invalid UTF-8 encoding
78///
79/// # Panics
80///
81/// This function does not panic under normal operation.
82#[allow(clippy::print_stderr)]
83pub fn load_magic_directory(dir_path: &Path) -> Result<Vec<MagicRule>, ParseError> {
84 use std::fs;
85
86 // Read directory entries
87 let entries = fs::read_dir(dir_path).map_err(|e| {
88 ParseError::invalid_syntax(
89 0,
90 format!("Failed to read directory '{}': {}", dir_path.display(), e),
91 )
92 })?;
93
94 // Collect and sort entries by filename for deterministic ordering
95 let mut file_paths: Vec<std::path::PathBuf> = Vec::new();
96 for entry in entries {
97 let entry = entry.map_err(|e| {
98 ParseError::invalid_syntax(
99 0,
100 format!(
101 "Failed to read directory entry in '{}': {}",
102 dir_path.display(),
103 e
104 ),
105 )
106 })?;
107
108 let path = entry.path();
109 let file_type = entry.file_type().map_err(|e| {
110 ParseError::invalid_syntax(
111 0,
112 format!("Failed to read file type for '{}': {}", path.display(), e),
113 )
114 })?;
115
116 // Only process regular files, skip directories and symlinks
117 if file_type.is_file() && !file_type.is_symlink() {
118 file_paths.push(path);
119 }
120 }
121
122 // Sort by filename for deterministic ordering
123 file_paths.sort_by_key(|path| path.file_name().map(std::ffi::OsStr::to_os_string));
124
125 // Accumulate rules from all files
126 let mut all_rules = Vec::new();
127 let mut parse_failures: Vec<(PathBuf, ParseError)> = Vec::new();
128 let file_count = file_paths.len();
129
130 for path in file_paths {
131 // Read file contents
132 let contents = match fs::read_to_string(&path) {
133 Ok(contents) => contents,
134 Err(e) => {
135 // I/O errors are critical
136 return Err(ParseError::invalid_syntax(
137 0,
138 format!("Failed to read file '{}': {}", path.display(), e),
139 ));
140 }
141 };
142
143 // Parse the file
144 match super::parse_text_magic_file(&contents) {
145 Ok(rules) => {
146 // Successfully parsed - merge rules
147 all_rules.extend(rules);
148 }
149 Err(e) => {
150 // Track parse failures for reporting
151 parse_failures.push((path, e));
152 }
153 }
154 }
155
156 // If all files failed to parse, return an error
157 if all_rules.is_empty() && !parse_failures.is_empty() {
158 use std::fmt::Write;
159
160 let failure_details: Vec<String> = parse_failures
161 .iter()
162 .take(3) // Limit to first 3 failures for brevity
163 .map(|(path, e)| format!(" - {}: {}", path.display(), e))
164 .collect();
165
166 let mut message = format!("All {file_count} magic file(s) in directory failed to parse");
167 if !failure_details.is_empty() {
168 message.push_str(":\n");
169 message.push_str(&failure_details.join("\n"));
170 if parse_failures.len() > 3 {
171 let _ = write!(message, "\n ... and {} more", parse_failures.len() - 3);
172 }
173 }
174
175 return Err(ParseError::invalid_syntax(0, message));
176 }
177
178 // Log warnings for partial failures (some files parsed, some failed)
179 // Note: Using eprintln for now; consider a logging framework in the future
180 #[allow(clippy::print_stderr)]
181 for (path, e) in &parse_failures {
182 eprintln!("Warning: Failed to parse '{}': {}", path.display(), e);
183 }
184
185 Ok(all_rules)
186}
187
188/// Loads magic rules from a file or directory, automatically detecting the format.
189///
190/// This is the unified entry point for loading magic rules from the filesystem. It
191/// automatically detects whether the path points to a text magic file, a directory
192/// containing magic files, or a binary compiled magic file, and dispatches to the
193/// appropriate handler.
194///
195/// # Format Detection and Handling
196///
197/// The function uses [`detect_format()`] to determine the file type and handles each
198/// format as follows:
199///
200/// - **Text format**: Reads the file contents and parses using [`super::parse_text_magic_file()`]
201/// - **Directory format**: Loads all magic files from the directory using [`load_magic_directory()`]
202/// - **Binary format**: Returns an error with guidance to use the `--use-builtin` option
203///
204/// # Arguments
205///
206/// * `path` - Path to a magic file or directory. Can be absolute or relative.
207///
208/// # Returns
209///
210/// Returns `Ok(Vec<MagicRule>)` containing all successfully parsed magic rules. For
211/// directories, rules from all files are merged in alphabetical order by filename.
212///
213/// # Errors
214///
215/// This function returns a [`ParseError`] in the following cases:
216///
217/// - **File not found**: The specified path does not exist
218/// - **Unsupported format**: The file is a binary compiled magic file (`.mgc`)
219/// - **Parse errors**: The magic file contains syntax errors or invalid rules
220/// - **I/O errors**: File system errors during reading (permissions, disk errors, etc.)
221///
222/// # Examples
223///
224/// ## Loading a text magic file
225///
226/// ```no_run
227/// use libmagic_rs::parser::load_magic_file;
228/// use std::path::Path;
229///
230/// let rules = load_magic_file(Path::new("/usr/share/misc/magic"))?;
231/// println!("Loaded {} magic rules", rules.len());
232/// # Ok::<(), libmagic_rs::ParseError>(())
233/// ```
234///
235/// ## Loading a directory of magic files
236///
237/// ```no_run
238/// use libmagic_rs::parser::load_magic_file;
239/// use std::path::Path;
240///
241/// let rules = load_magic_file(Path::new("/usr/share/misc/magic.d"))?;
242/// println!("Loaded {} rules from directory", rules.len());
243/// # Ok::<(), libmagic_rs::ParseError>(())
244/// ```
245///
246/// ## Handling binary format errors
247///
248/// ```no_run
249/// use libmagic_rs::parser::load_magic_file;
250/// use std::path::Path;
251///
252/// match load_magic_file(Path::new("/usr/share/misc/magic.mgc")) {
253/// Ok(rules) => println!("Loaded {} rules", rules.len()),
254/// Err(e) => {
255/// eprintln!("Error loading magic file: {}", e);
256/// eprintln!("Hint: Use --use-builtin for binary files");
257/// }
258/// }
259/// # Ok::<(), libmagic_rs::ParseError>(())
260/// ```
261///
262/// # Security
263///
264/// This function delegates to [`super::parse_text_magic_file()`] or [`load_magic_directory()`]
265/// based on format detection. Security considerations are handled by those functions:
266///
267/// - Rule hierarchy depth is bounded during parsing
268/// - Invalid syntax is rejected with descriptive errors
269/// - Binary `.mgc` files are rejected (not parsed)
270///
271/// Note: File size limits and memory exhaustion protection are not currently implemented.
272/// Large magic files will be loaded entirely into memory.
273///
274/// # See Also
275///
276/// - [`detect_format()`] - Format detection logic
277/// - [`super::parse_text_magic_file()`] - Text file parser
278/// - [`load_magic_directory()`] - Directory loader
279pub fn load_magic_file(path: &Path) -> Result<Vec<MagicRule>, ParseError> {
280 // Detect the magic file format
281 let format = detect_format(path)?;
282
283 // Dispatch to appropriate handler based on format
284 match format {
285 MagicFileFormat::Text => {
286 // Read file contents and parse as text magic file
287 let content = std::fs::read_to_string(path)?;
288 super::parse_text_magic_file(&content)
289 }
290 MagicFileFormat::Directory => {
291 // Load all magic files from directory
292 load_magic_directory(path)
293 }
294 MagicFileFormat::Binary => {
295 // Binary compiled magic files are not supported
296 Err(ParseError::unsupported_format(
297 0,
298 "binary .mgc file",
299 "Binary compiled magic files (.mgc) are not supported for parsing.\n\
300 Use the --use-builtin option to use the built-in magic rules instead,\n\
301 or provide a text-based magic file or directory.",
302 ))
303 }
304 }
305}
306
307#[cfg(test)]
308mod tests {
309 use super::*;
310
311 // ============================================================
312 // Tests for load_magic_directory (6+ test cases)
313 // ============================================================
314
315 #[test]
316 fn test_load_directory_critical_error_io() {
317 use std::path::Path;
318
319 let non_existent = Path::new("/this/should/not/exist/anywhere/at/all");
320 let result = load_magic_directory(non_existent);
321
322 assert!(
323 result.is_err(),
324 "Should return error for non-existent directory"
325 );
326 let err = result.unwrap_err();
327 assert!(err.to_string().contains("Failed to read directory"));
328 }
329
330 #[test]
331 fn test_load_directory_non_critical_error_parse() {
332 use std::fs;
333 use tempfile::TempDir;
334
335 let temp_dir = TempDir::new().expect("Failed to create temp dir");
336
337 // Create a valid file
338 let valid_path = temp_dir.path().join("valid.magic");
339 fs::write(&valid_path, "0 string \\x01\\x02 valid\n").expect("Failed to write valid file");
340
341 // Create an invalid file
342 let invalid_path = temp_dir.path().join("invalid.magic");
343 fs::write(&invalid_path, "this is invalid syntax\n").expect("Failed to write invalid file");
344
345 // Should succeed, loading only the valid file
346 let rules = load_magic_directory(temp_dir.path()).expect("Should load valid files");
347
348 assert_eq!(rules.len(), 1, "Should load only valid file");
349 assert_eq!(rules[0].message, "valid");
350 }
351
352 #[test]
353 fn test_load_directory_empty_files() {
354 use std::fs;
355 use tempfile::TempDir;
356
357 let temp_dir = TempDir::new().expect("Failed to create temp dir");
358
359 // Create an empty file
360 let empty_path = temp_dir.path().join("empty.magic");
361 fs::write(&empty_path, "").expect("Failed to write empty file");
362
363 // Create a file with only comments
364 let comments_path = temp_dir.path().join("comments.magic");
365 fs::write(&comments_path, "# Just comments\n# Nothing else\n")
366 .expect("Failed to write comments file");
367
368 // Should succeed with no rules
369 let rules = load_magic_directory(temp_dir.path()).expect("Should handle empty files");
370
371 assert_eq!(rules.len(), 0, "Empty files should contribute no rules");
372 }
373
374 #[test]
375 fn test_load_directory_binary_files() {
376 use std::fs;
377 use tempfile::TempDir;
378
379 let temp_dir = TempDir::new().expect("Failed to create temp dir");
380
381 // Create a binary file (invalid UTF-8)
382 let binary_path = temp_dir.path().join("binary.dat");
383 fs::write(&binary_path, [0xFF, 0xFE, 0xFF, 0xFE]).expect("Failed to write binary file");
384
385 // Create a valid text file
386 let valid_path = temp_dir.path().join("valid.magic");
387 fs::write(&valid_path, "0 string \\x01\\x02 valid\n").expect("Failed to write valid file");
388
389 // Binary file should cause a critical error (invalid UTF-8)
390 let result = load_magic_directory(temp_dir.path());
391
392 // The function should fail when encountering binary files (critical I/O error)
393 assert!(
394 result.is_err(),
395 "Binary files should cause critical error due to invalid UTF-8"
396 );
397 }
398
399 #[test]
400 fn test_load_directory_mixed_extensions() {
401 use std::fs;
402 use tempfile::TempDir;
403
404 let temp_dir = TempDir::new().expect("Failed to create temp dir");
405
406 // Create files with different extensions
407 fs::write(
408 temp_dir.path().join("file.magic"),
409 "0 string \\x01\\x02 magic\n",
410 )
411 .expect("Failed to write .magic file");
412 fs::write(
413 temp_dir.path().join("file.txt"),
414 "0 string \\x03\\x04 txt\n",
415 )
416 .expect("Failed to write .txt file");
417 fs::write(temp_dir.path().join("noext"), "0 string \\x05\\x06 noext\n")
418 .expect("Failed to write no-ext file");
419
420 let rules = load_magic_directory(temp_dir.path())
421 .expect("Should load all files regardless of extension");
422
423 assert_eq!(
424 rules.len(),
425 3,
426 "Should process all files regardless of extension"
427 );
428
429 let messages: Vec<&str> = rules.iter().map(|r| r.message.as_str()).collect();
430 assert!(messages.contains(&"magic"));
431 assert!(messages.contains(&"txt"));
432 assert!(messages.contains(&"noext"));
433 }
434
435 #[test]
436 fn test_load_directory_alphabetical_ordering() {
437 use std::fs;
438 use tempfile::TempDir;
439
440 let temp_dir = TempDir::new().expect("Failed to create temp dir");
441
442 // Create files in non-alphabetical order - using valid magic syntax with hex escapes
443 fs::write(
444 temp_dir.path().join("03-third"),
445 "0 string \\x07\\x08\\x09 third\n",
446 )
447 .expect("Failed to write third file");
448 fs::write(
449 temp_dir.path().join("01-first"),
450 "0 string \\x01\\x02\\x03 first\n",
451 )
452 .expect("Failed to write first file");
453 fs::write(
454 temp_dir.path().join("02-second"),
455 "0 string \\x04\\x05\\x06 second\n",
456 )
457 .expect("Failed to write second file");
458
459 let rules = load_magic_directory(temp_dir.path()).expect("Should load directory in order");
460
461 assert_eq!(rules.len(), 3);
462 // Should be sorted alphabetically by filename
463 assert_eq!(rules[0].message, "first");
464 assert_eq!(rules[1].message, "second");
465 assert_eq!(rules[2].message, "third");
466 }
467
468 // ============================================================
469 // Tests for load_magic_file (5+ test cases)
470 // ============================================================
471
472 #[test]
473 fn test_load_magic_file_text_format() {
474 use std::fs;
475 use tempfile::TempDir;
476
477 let temp_dir = TempDir::new().expect("Failed to create temp dir");
478 let magic_file = temp_dir.path().join("magic.txt");
479
480 // Create text magic file with valid content
481 fs::write(&magic_file, "0 string \\x7fELF ELF executable\n")
482 .expect("Failed to write magic file");
483
484 // Load using load_magic_file
485 let rules = load_magic_file(&magic_file).expect("Failed to load text magic file");
486
487 assert_eq!(rules.len(), 1);
488 assert_eq!(rules[0].message, "ELF executable");
489 }
490
491 #[test]
492 fn test_load_magic_file_directory_format() {
493 use std::fs;
494 use tempfile::TempDir;
495
496 let temp_dir = TempDir::new().expect("Failed to create temp dir");
497 let magic_dir = temp_dir.path().join("magic.d");
498 fs::create_dir(&magic_dir).expect("Failed to create magic directory");
499
500 // Create multiple files in directory
501 fs::write(
502 magic_dir.join("00_elf"),
503 "0 string \\x7fELF ELF executable\n",
504 )
505 .expect("Failed to write elf file");
506 fs::write(
507 magic_dir.join("01_zip"),
508 "0 string \\x50\\x4b\\x03\\x04 ZIP archive\n",
509 )
510 .expect("Failed to write zip file");
511
512 // Load using load_magic_file
513 let rules = load_magic_file(&magic_dir).expect("Failed to load directory");
514
515 assert_eq!(rules.len(), 2);
516 assert_eq!(rules[0].message, "ELF executable");
517 assert_eq!(rules[1].message, "ZIP archive");
518 }
519
520 #[test]
521 fn test_load_magic_file_binary_format_error() {
522 use std::fs::File;
523 use std::io::Write;
524 use tempfile::TempDir;
525
526 let temp_dir = TempDir::new().expect("Failed to create temp dir");
527 let binary_file = temp_dir.path().join("magic.mgc");
528
529 // Create binary file with .mgc magic number
530 let mut file = File::create(&binary_file).expect("Failed to create binary file");
531 let magic_number: [u8; 4] = [0x1C, 0x04, 0x1E, 0xF1]; // Little-endian 0xF11E041C
532 file.write_all(&magic_number)
533 .expect("Failed to write magic number");
534
535 // Attempt to load binary file
536 let result = load_magic_file(&binary_file);
537
538 assert!(result.is_err(), "Should fail to load binary .mgc file");
539
540 let error = result.unwrap_err();
541 let error_msg = error.to_string();
542
543 // Verify error mentions unsupported format and --use-builtin
544 assert!(
545 error_msg.contains("Binary") || error_msg.contains("binary"),
546 "Error should mention binary format: {error_msg}",
547 );
548 assert!(
549 error_msg.contains("--use-builtin") || error_msg.contains("built-in"),
550 "Error should mention --use-builtin option: {error_msg}",
551 );
552 }
553
554 #[test]
555 fn test_load_magic_file_io_error() {
556 use std::path::Path;
557
558 // Try to load non-existent file
559 let non_existent = Path::new("/this/path/should/not/exist/magic.txt");
560 let result = load_magic_file(non_existent);
561
562 assert!(result.is_err(), "Should fail for non-existent file");
563 }
564
565 #[test]
566 fn test_load_magic_file_parse_error_propagation() {
567 use std::fs;
568 use tempfile::TempDir;
569
570 let temp_dir = TempDir::new().expect("Failed to create temp dir");
571 let invalid_file = temp_dir.path().join("invalid.magic");
572
573 // Create file with invalid syntax (missing offset)
574 fs::write(&invalid_file, "string test invalid\n").expect("Failed to write invalid file");
575
576 // Attempt to load file with parse errors
577 let result = load_magic_file(&invalid_file);
578
579 assert!(result.is_err(), "Should fail for file with parse errors");
580
581 // Error should be a parse error (not I/O error)
582 let error = result.unwrap_err();
583 let error_msg = format!("{error:?}");
584 assert!(
585 error_msg.contains("InvalidSyntax") || error_msg.contains("syntax"),
586 "Error should be parse error: {error_msg}",
587 );
588 }
589}