Skip to main content

libmagic_rs/parser/
loader.rs

1// Copyright (c) 2025-2026 the libmagic-rs contributors
2// SPDX-License-Identifier: Apache-2.0
3
4//! File and directory loading for magic files.
5//!
6//! Provides functions for loading magic rules from individual files and
7//! directories, with automatic format detection and error handling.
8
9use crate::error::ParseError;
10use crate::parser::ast::MagicRule;
11use std::path::{Path, PathBuf};
12
13use super::format::{MagicFileFormat, detect_format};
14
15/// Loads and parses all magic files from a directory, merging them into a single rule set.
16///
17/// This function reads all regular files in the specified directory, parses each as a magic file,
18/// and combines the resulting rules into a single `Vec<MagicRule>`. Files are processed in
19/// alphabetical order by filename to ensure deterministic results.
20///
21/// # Error Handling Strategy
22///
23/// This function distinguishes between critical and non-critical errors:
24///
25/// - **Critical errors** (I/O failures, directory access issues, encoding problems):
26///   These cause immediate failure and return a `ParseError`. The function stops processing
27///   and propagates the error to the caller.
28///
29/// - **Non-critical errors** (individual file parse failures):
30///   These are logged to stderr with a warning message and the file is skipped. Processing
31///   continues with remaining files.
32///
33/// # Behavior
34///
35/// - Subdirectories are skipped (not recursively processed)
36/// - Symbolic links are skipped
37/// - Empty directories return an empty rules vector
38/// - Files are processed in alphabetical order by filename
39/// - All successfully parsed rules are merged in order
40///
41/// # Examples
42///
43/// Loading a directory of magic files:
44///
45/// ```rust,no_run
46/// use libmagic_rs::parser::load_magic_directory;
47/// use std::path::Path;
48///
49/// let rules = load_magic_directory(Path::new("/usr/share/file/magic.d"))?;
50/// println!("Loaded {} rules from directory", rules.len());
51/// # Ok::<(), libmagic_rs::ParseError>(())
52/// ```
53///
54/// Creating a Magdir-style directory structure:
55///
56/// ```rust,no_run
57/// use libmagic_rs::parser::load_magic_directory;
58/// use std::path::Path;
59///
60/// // Directory structure:
61/// // magic.d/
62/// //   ├── 01-elf
63/// //   ├── 02-archive
64/// //   └── 03-text
65///
66/// let rules = load_magic_directory(Path::new("./magic.d"))?;
67/// // Rules from all three files are merged in alphabetical order
68/// # Ok::<(), libmagic_rs::ParseError>(())
69/// ```
70///
71/// # Errors
72///
73/// Returns `ParseError` if:
74/// - The directory does not exist or cannot be accessed
75/// - Directory entries cannot be read
76/// - A file cannot be read due to I/O errors
77/// - A file contains invalid UTF-8 encoding
78///
79/// # Panics
80///
81/// This function does not panic under normal operation.
82#[allow(clippy::print_stderr)]
83pub fn load_magic_directory(dir_path: &Path) -> Result<Vec<MagicRule>, ParseError> {
84    use std::fs;
85
86    // Read directory entries
87    let entries = fs::read_dir(dir_path).map_err(|e| {
88        ParseError::invalid_syntax(
89            0,
90            format!("Failed to read directory '{}': {}", dir_path.display(), e),
91        )
92    })?;
93
94    // Collect and sort entries by filename for deterministic ordering
95    let mut file_paths: Vec<std::path::PathBuf> = Vec::new();
96    for entry in entries {
97        let entry = entry.map_err(|e| {
98            ParseError::invalid_syntax(
99                0,
100                format!(
101                    "Failed to read directory entry in '{}': {}",
102                    dir_path.display(),
103                    e
104                ),
105            )
106        })?;
107
108        let path = entry.path();
109        let file_type = entry.file_type().map_err(|e| {
110            ParseError::invalid_syntax(
111                0,
112                format!("Failed to read file type for '{}': {}", path.display(), e),
113            )
114        })?;
115
116        // Only process regular files, skip directories and symlinks
117        if file_type.is_file() && !file_type.is_symlink() {
118            file_paths.push(path);
119        }
120    }
121
122    // Sort by filename for deterministic ordering
123    file_paths.sort_by_key(|path| path.file_name().map(std::ffi::OsStr::to_os_string));
124
125    // Accumulate rules from all files
126    let mut all_rules = Vec::new();
127    let mut parse_failures: Vec<(PathBuf, ParseError)> = Vec::new();
128    let file_count = file_paths.len();
129
130    for path in file_paths {
131        // Read file contents
132        let contents = match fs::read_to_string(&path) {
133            Ok(contents) => contents,
134            Err(e) => {
135                // I/O errors are critical
136                return Err(ParseError::invalid_syntax(
137                    0,
138                    format!("Failed to read file '{}': {}", path.display(), e),
139                ));
140            }
141        };
142
143        // Parse the file
144        match super::parse_text_magic_file(&contents) {
145            Ok(rules) => {
146                // Successfully parsed - merge rules
147                all_rules.extend(rules);
148            }
149            Err(e) => {
150                // Track parse failures for reporting
151                parse_failures.push((path, e));
152            }
153        }
154    }
155
156    // If all files failed to parse, return an error
157    if all_rules.is_empty() && !parse_failures.is_empty() {
158        use std::fmt::Write;
159
160        let failure_details: Vec<String> = parse_failures
161            .iter()
162            .take(3) // Limit to first 3 failures for brevity
163            .map(|(path, e)| format!("  - {}: {}", path.display(), e))
164            .collect();
165
166        let mut message = format!("All {file_count} magic file(s) in directory failed to parse");
167        if !failure_details.is_empty() {
168            message.push_str(":\n");
169            message.push_str(&failure_details.join("\n"));
170            if parse_failures.len() > 3 {
171                let _ = write!(message, "\n  ... and {} more", parse_failures.len() - 3);
172            }
173        }
174
175        return Err(ParseError::invalid_syntax(0, message));
176    }
177
178    // Log warnings for partial failures (some files parsed, some failed)
179    // Note: Using eprintln for now; consider a logging framework in the future
180    #[allow(clippy::print_stderr)]
181    for (path, e) in &parse_failures {
182        eprintln!("Warning: Failed to parse '{}': {}", path.display(), e);
183    }
184
185    Ok(all_rules)
186}
187
188/// Loads magic rules from a file or directory, automatically detecting the format.
189///
190/// This is the unified entry point for loading magic rules from the filesystem. It
191/// automatically detects whether the path points to a text magic file, a directory
192/// containing magic files, or a binary compiled magic file, and dispatches to the
193/// appropriate handler.
194///
195/// # Format Detection and Handling
196///
197/// The function uses [`detect_format()`] to determine the file type and handles each
198/// format as follows:
199///
200/// - **Text format**: Reads the file contents and parses using [`super::parse_text_magic_file()`]
201/// - **Directory format**: Loads all magic files from the directory using [`load_magic_directory()`]
202/// - **Binary format**: Returns an error with guidance to use the `--use-builtin` option
203///
204/// # Arguments
205///
206/// * `path` - Path to a magic file or directory. Can be absolute or relative.
207///
208/// # Returns
209///
210/// Returns `Ok(Vec<MagicRule>)` containing all successfully parsed magic rules. For
211/// directories, rules from all files are merged in alphabetical order by filename.
212///
213/// # Errors
214///
215/// This function returns a [`ParseError`] in the following cases:
216///
217/// - **File not found**: The specified path does not exist
218/// - **Unsupported format**: The file is a binary compiled magic file (`.mgc`)
219/// - **Parse errors**: The magic file contains syntax errors or invalid rules
220/// - **I/O errors**: File system errors during reading (permissions, disk errors, etc.)
221///
222/// # Examples
223///
224/// ## Loading a text magic file
225///
226/// ```no_run
227/// use libmagic_rs::parser::load_magic_file;
228/// use std::path::Path;
229///
230/// let rules = load_magic_file(Path::new("/usr/share/misc/magic"))?;
231/// println!("Loaded {} magic rules", rules.len());
232/// # Ok::<(), libmagic_rs::ParseError>(())
233/// ```
234///
235/// ## Loading a directory of magic files
236///
237/// ```no_run
238/// use libmagic_rs::parser::load_magic_file;
239/// use std::path::Path;
240///
241/// let rules = load_magic_file(Path::new("/usr/share/misc/magic.d"))?;
242/// println!("Loaded {} rules from directory", rules.len());
243/// # Ok::<(), libmagic_rs::ParseError>(())
244/// ```
245///
246/// ## Handling binary format errors
247///
248/// ```no_run
249/// use libmagic_rs::parser::load_magic_file;
250/// use std::path::Path;
251///
252/// match load_magic_file(Path::new("/usr/share/misc/magic.mgc")) {
253///     Ok(rules) => println!("Loaded {} rules", rules.len()),
254///     Err(e) => {
255///         eprintln!("Error loading magic file: {}", e);
256///         eprintln!("Hint: Use --use-builtin for binary files");
257///     }
258/// }
259/// # Ok::<(), libmagic_rs::ParseError>(())
260/// ```
261///
262/// # Security
263///
264/// This function delegates to [`super::parse_text_magic_file()`] or [`load_magic_directory()`]
265/// based on format detection. Security considerations are handled by those functions:
266///
267/// - Rule hierarchy depth is bounded during parsing
268/// - Invalid syntax is rejected with descriptive errors
269/// - Binary `.mgc` files are rejected (not parsed)
270///
271/// Note: File size limits and memory exhaustion protection are not currently implemented.
272/// Large magic files will be loaded entirely into memory.
273///
274/// # See Also
275///
276/// - [`detect_format()`] - Format detection logic
277/// - [`super::parse_text_magic_file()`] - Text file parser
278/// - [`load_magic_directory()`] - Directory loader
279pub fn load_magic_file(path: &Path) -> Result<Vec<MagicRule>, ParseError> {
280    // Detect the magic file format
281    let format = detect_format(path)?;
282
283    // Dispatch to appropriate handler based on format
284    match format {
285        MagicFileFormat::Text => {
286            // Read file contents and parse as text magic file
287            let content = std::fs::read_to_string(path)?;
288            super::parse_text_magic_file(&content)
289        }
290        MagicFileFormat::Directory => {
291            // Load all magic files from directory
292            load_magic_directory(path)
293        }
294        MagicFileFormat::Binary => {
295            // Binary compiled magic files are not supported
296            Err(ParseError::unsupported_format(
297                0,
298                "binary .mgc file",
299                "Binary compiled magic files (.mgc) are not supported for parsing.\n\
300                 Use the --use-builtin option to use the built-in magic rules instead,\n\
301                 or provide a text-based magic file or directory.",
302            ))
303        }
304    }
305}
306
307#[cfg(test)]
308mod tests {
309    use super::*;
310
311    // ============================================================
312    // Tests for load_magic_directory (6+ test cases)
313    // ============================================================
314
315    #[test]
316    fn test_load_directory_critical_error_io() {
317        use std::path::Path;
318
319        let non_existent = Path::new("/this/should/not/exist/anywhere/at/all");
320        let result = load_magic_directory(non_existent);
321
322        assert!(
323            result.is_err(),
324            "Should return error for non-existent directory"
325        );
326        let err = result.unwrap_err();
327        assert!(err.to_string().contains("Failed to read directory"));
328    }
329
330    #[test]
331    fn test_load_directory_non_critical_error_parse() {
332        use std::fs;
333        use tempfile::TempDir;
334
335        let temp_dir = TempDir::new().expect("Failed to create temp dir");
336
337        // Create a valid file
338        let valid_path = temp_dir.path().join("valid.magic");
339        fs::write(&valid_path, "0 string \\x01\\x02 valid\n").expect("Failed to write valid file");
340
341        // Create an invalid file
342        let invalid_path = temp_dir.path().join("invalid.magic");
343        fs::write(&invalid_path, "this is invalid syntax\n").expect("Failed to write invalid file");
344
345        // Should succeed, loading only the valid file
346        let rules = load_magic_directory(temp_dir.path()).expect("Should load valid files");
347
348        assert_eq!(rules.len(), 1, "Should load only valid file");
349        assert_eq!(rules[0].message, "valid");
350    }
351
352    #[test]
353    fn test_load_directory_empty_files() {
354        use std::fs;
355        use tempfile::TempDir;
356
357        let temp_dir = TempDir::new().expect("Failed to create temp dir");
358
359        // Create an empty file
360        let empty_path = temp_dir.path().join("empty.magic");
361        fs::write(&empty_path, "").expect("Failed to write empty file");
362
363        // Create a file with only comments
364        let comments_path = temp_dir.path().join("comments.magic");
365        fs::write(&comments_path, "# Just comments\n# Nothing else\n")
366            .expect("Failed to write comments file");
367
368        // Should succeed with no rules
369        let rules = load_magic_directory(temp_dir.path()).expect("Should handle empty files");
370
371        assert_eq!(rules.len(), 0, "Empty files should contribute no rules");
372    }
373
374    #[test]
375    fn test_load_directory_binary_files() {
376        use std::fs;
377        use tempfile::TempDir;
378
379        let temp_dir = TempDir::new().expect("Failed to create temp dir");
380
381        // Create a binary file (invalid UTF-8)
382        let binary_path = temp_dir.path().join("binary.dat");
383        fs::write(&binary_path, [0xFF, 0xFE, 0xFF, 0xFE]).expect("Failed to write binary file");
384
385        // Create a valid text file
386        let valid_path = temp_dir.path().join("valid.magic");
387        fs::write(&valid_path, "0 string \\x01\\x02 valid\n").expect("Failed to write valid file");
388
389        // Binary file should cause a critical error (invalid UTF-8)
390        let result = load_magic_directory(temp_dir.path());
391
392        // The function should fail when encountering binary files (critical I/O error)
393        assert!(
394            result.is_err(),
395            "Binary files should cause critical error due to invalid UTF-8"
396        );
397    }
398
399    #[test]
400    fn test_load_directory_mixed_extensions() {
401        use std::fs;
402        use tempfile::TempDir;
403
404        let temp_dir = TempDir::new().expect("Failed to create temp dir");
405
406        // Create files with different extensions
407        fs::write(
408            temp_dir.path().join("file.magic"),
409            "0 string \\x01\\x02 magic\n",
410        )
411        .expect("Failed to write .magic file");
412        fs::write(
413            temp_dir.path().join("file.txt"),
414            "0 string \\x03\\x04 txt\n",
415        )
416        .expect("Failed to write .txt file");
417        fs::write(temp_dir.path().join("noext"), "0 string \\x05\\x06 noext\n")
418            .expect("Failed to write no-ext file");
419
420        let rules = load_magic_directory(temp_dir.path())
421            .expect("Should load all files regardless of extension");
422
423        assert_eq!(
424            rules.len(),
425            3,
426            "Should process all files regardless of extension"
427        );
428
429        let messages: Vec<&str> = rules.iter().map(|r| r.message.as_str()).collect();
430        assert!(messages.contains(&"magic"));
431        assert!(messages.contains(&"txt"));
432        assert!(messages.contains(&"noext"));
433    }
434
435    #[test]
436    fn test_load_directory_alphabetical_ordering() {
437        use std::fs;
438        use tempfile::TempDir;
439
440        let temp_dir = TempDir::new().expect("Failed to create temp dir");
441
442        // Create files in non-alphabetical order - using valid magic syntax with hex escapes
443        fs::write(
444            temp_dir.path().join("03-third"),
445            "0 string \\x07\\x08\\x09 third\n",
446        )
447        .expect("Failed to write third file");
448        fs::write(
449            temp_dir.path().join("01-first"),
450            "0 string \\x01\\x02\\x03 first\n",
451        )
452        .expect("Failed to write first file");
453        fs::write(
454            temp_dir.path().join("02-second"),
455            "0 string \\x04\\x05\\x06 second\n",
456        )
457        .expect("Failed to write second file");
458
459        let rules = load_magic_directory(temp_dir.path()).expect("Should load directory in order");
460
461        assert_eq!(rules.len(), 3);
462        // Should be sorted alphabetically by filename
463        assert_eq!(rules[0].message, "first");
464        assert_eq!(rules[1].message, "second");
465        assert_eq!(rules[2].message, "third");
466    }
467
468    // ============================================================
469    // Tests for load_magic_file (5+ test cases)
470    // ============================================================
471
472    #[test]
473    fn test_load_magic_file_text_format() {
474        use std::fs;
475        use tempfile::TempDir;
476
477        let temp_dir = TempDir::new().expect("Failed to create temp dir");
478        let magic_file = temp_dir.path().join("magic.txt");
479
480        // Create text magic file with valid content
481        fs::write(&magic_file, "0 string \\x7fELF ELF executable\n")
482            .expect("Failed to write magic file");
483
484        // Load using load_magic_file
485        let rules = load_magic_file(&magic_file).expect("Failed to load text magic file");
486
487        assert_eq!(rules.len(), 1);
488        assert_eq!(rules[0].message, "ELF executable");
489    }
490
491    #[test]
492    fn test_load_magic_file_directory_format() {
493        use std::fs;
494        use tempfile::TempDir;
495
496        let temp_dir = TempDir::new().expect("Failed to create temp dir");
497        let magic_dir = temp_dir.path().join("magic.d");
498        fs::create_dir(&magic_dir).expect("Failed to create magic directory");
499
500        // Create multiple files in directory
501        fs::write(
502            magic_dir.join("00_elf"),
503            "0 string \\x7fELF ELF executable\n",
504        )
505        .expect("Failed to write elf file");
506        fs::write(
507            magic_dir.join("01_zip"),
508            "0 string \\x50\\x4b\\x03\\x04 ZIP archive\n",
509        )
510        .expect("Failed to write zip file");
511
512        // Load using load_magic_file
513        let rules = load_magic_file(&magic_dir).expect("Failed to load directory");
514
515        assert_eq!(rules.len(), 2);
516        assert_eq!(rules[0].message, "ELF executable");
517        assert_eq!(rules[1].message, "ZIP archive");
518    }
519
520    #[test]
521    fn test_load_magic_file_binary_format_error() {
522        use std::fs::File;
523        use std::io::Write;
524        use tempfile::TempDir;
525
526        let temp_dir = TempDir::new().expect("Failed to create temp dir");
527        let binary_file = temp_dir.path().join("magic.mgc");
528
529        // Create binary file with .mgc magic number
530        let mut file = File::create(&binary_file).expect("Failed to create binary file");
531        let magic_number: [u8; 4] = [0x1C, 0x04, 0x1E, 0xF1]; // Little-endian 0xF11E041C
532        file.write_all(&magic_number)
533            .expect("Failed to write magic number");
534
535        // Attempt to load binary file
536        let result = load_magic_file(&binary_file);
537
538        assert!(result.is_err(), "Should fail to load binary .mgc file");
539
540        let error = result.unwrap_err();
541        let error_msg = error.to_string();
542
543        // Verify error mentions unsupported format and --use-builtin
544        assert!(
545            error_msg.contains("Binary") || error_msg.contains("binary"),
546            "Error should mention binary format: {error_msg}",
547        );
548        assert!(
549            error_msg.contains("--use-builtin") || error_msg.contains("built-in"),
550            "Error should mention --use-builtin option: {error_msg}",
551        );
552    }
553
554    #[test]
555    fn test_load_magic_file_io_error() {
556        use std::path::Path;
557
558        // Try to load non-existent file
559        let non_existent = Path::new("/this/path/should/not/exist/magic.txt");
560        let result = load_magic_file(non_existent);
561
562        assert!(result.is_err(), "Should fail for non-existent file");
563    }
564
565    #[test]
566    fn test_load_magic_file_parse_error_propagation() {
567        use std::fs;
568        use tempfile::TempDir;
569
570        let temp_dir = TempDir::new().expect("Failed to create temp dir");
571        let invalid_file = temp_dir.path().join("invalid.magic");
572
573        // Create file with invalid syntax (missing offset)
574        fs::write(&invalid_file, "string test invalid\n").expect("Failed to write invalid file");
575
576        // Attempt to load file with parse errors
577        let result = load_magic_file(&invalid_file);
578
579        assert!(result.is_err(), "Should fail for file with parse errors");
580
581        // Error should be a parse error (not I/O error)
582        let error = result.unwrap_err();
583        let error_msg = format!("{error:?}");
584        assert!(
585            error_msg.contains("InvalidSyntax") || error_msg.contains("syntax"),
586            "Error should be parse error: {error_msg}",
587        );
588    }
589}