Skip to main content

libmagic_rs/parser/
format.rs

1// Copyright (c) 2025-2026 the libmagic-rs contributors
2// SPDX-License-Identifier: Apache-2.0
3
4//! Format detection for magic files.
5//!
6//! Detects whether a path points to a text magic file, a directory of magic files,
7//! or a binary compiled magic file (.mgc format).
8
9use crate::error::ParseError;
10use std::io::Read;
11use std::path::Path;
12
13/// Represents the format of a magic file or directory
14#[derive(Debug, Clone, Copy, PartialEq, Eq)]
15pub enum MagicFileFormat {
16    /// Text-based magic file (human-readable)
17    Text,
18    /// Directory containing multiple magic files (Magdir pattern)
19    Directory,
20    /// Binary compiled magic file (.mgc format)
21    Binary,
22}
23
24/// Detect the format of a magic file or directory
25///
26/// This function examines the filesystem metadata and file contents to determine
27/// whether the path points to a text magic file, a directory, or a binary .mgc file.
28///
29/// # Detection Logic
30///
31/// 1. Check if path is a directory -> `MagicFileFormat::Directory`
32/// 2. Read first 4 bytes and check for binary magic number `0xF11E041C` -> `MagicFileFormat::Binary`
33/// 3. Otherwise -> `MagicFileFormat::Text`
34///
35/// # Arguments
36///
37/// * `path` - Path to the magic file or directory to detect
38///
39/// # Errors
40///
41/// Returns `ParseError::IoError` if the path doesn't exist or cannot be read.
42///
43/// # Notes
44///
45/// This function only detects the format and returns it. It does not validate whether
46/// the format is supported by the parser. Higher-level code should check the returned
47/// format and decide how to handle unsupported formats (e.g., binary .mgc files).
48///
49/// # Examples
50///
51/// ```rust,no_run
52/// use libmagic_rs::parser::detect_format;
53/// use std::path::Path;
54///
55/// let format = detect_format(Path::new("/usr/share/file/magic"))?;
56/// # Ok::<(), libmagic_rs::ParseError>(())
57/// ```
58pub fn detect_format(path: &Path) -> Result<MagicFileFormat, ParseError> {
59    // Check if path exists and is accessible
60    let metadata = std::fs::metadata(path)?;
61
62    // Check if it's a directory
63    if metadata.is_dir() {
64        return Ok(MagicFileFormat::Directory);
65    }
66
67    // Read first 4 bytes to check for binary magic number
68    let mut file = std::fs::File::open(path)?;
69
70    let mut magic_bytes = [0u8; 4];
71
72    match file.read_exact(&mut magic_bytes) {
73        Ok(()) => {
74            // Check for binary magic number 0xF11E041C in little-endian format
75            let magic_number = u32::from_le_bytes(magic_bytes);
76            if magic_number == 0xF11E_041C {
77                return Ok(MagicFileFormat::Binary);
78            }
79            // Not a binary magic file, assume text
80            Ok(MagicFileFormat::Text)
81        }
82        Err(e) if e.kind() == std::io::ErrorKind::UnexpectedEof => {
83            // File is too small to be a binary magic file, assume text
84            Ok(MagicFileFormat::Text)
85        }
86        Err(e) => Err(ParseError::IoError(e)),
87    }
88}
89
90#[cfg(test)]
91mod tests {
92    use super::*;
93    use std::fs;
94    use std::io::Write;
95
96    #[test]
97    fn test_detect_format_text_file() {
98        let temp_dir = std::env::temp_dir();
99        let text_file = temp_dir.join("test_text_magic.txt");
100        fs::write(&text_file, "# Magic file\n0 string test Test").unwrap();
101
102        let format = detect_format(&text_file).unwrap();
103        assert_eq!(format, MagicFileFormat::Text);
104
105        fs::remove_file(&text_file).unwrap();
106    }
107
108    #[test]
109    fn test_detect_format_directory() {
110        let temp_dir = std::env::temp_dir().join("test_magic_dir");
111        fs::create_dir_all(&temp_dir).unwrap();
112
113        let format = detect_format(&temp_dir).unwrap();
114        assert_eq!(format, MagicFileFormat::Directory);
115
116        fs::remove_dir_all(&temp_dir).unwrap();
117    }
118
119    #[test]
120    fn test_detect_format_binary_mgc() {
121        let temp_dir = std::env::temp_dir();
122        let binary_file = temp_dir.join("test_binary.mgc");
123
124        // Write binary magic number 0xF11E041C in little-endian
125        let mut file = fs::File::create(&binary_file).unwrap();
126        file.write_all(&[0x1C, 0x04, 0x1E, 0xF1]).unwrap();
127        file.write_all(b"additional binary data").unwrap();
128
129        let result = detect_format(&binary_file);
130        assert!(result.is_ok());
131
132        match result.unwrap() {
133            MagicFileFormat::Binary => {
134                // Expected result
135            }
136            other => panic!("Expected Binary format, got {other:?}"),
137        }
138
139        fs::remove_file(&binary_file).unwrap();
140    }
141
142    #[test]
143    fn test_detect_format_nonexistent_path() {
144        let nonexistent = std::env::temp_dir().join("nonexistent_magic_file.txt");
145
146        let result = detect_format(&nonexistent);
147        assert!(result.is_err());
148
149        match result.unwrap_err() {
150            ParseError::IoError(e) => {
151                assert_eq!(e.kind(), std::io::ErrorKind::NotFound);
152            }
153            other => panic!("Expected IoError, got: {other:?}"),
154        }
155    }
156
157    #[test]
158    fn test_detect_format_empty_file() {
159        let temp_dir = std::env::temp_dir();
160        let empty_file = temp_dir.join("test_empty_magic.txt");
161        fs::write(&empty_file, "").unwrap();
162
163        // Empty files should be detected as text (too small for binary magic)
164        let format = detect_format(&empty_file).unwrap();
165        assert_eq!(format, MagicFileFormat::Text);
166
167        fs::remove_file(&empty_file).unwrap();
168    }
169
170    #[test]
171    fn test_detect_format_small_file() {
172        let temp_dir = std::env::temp_dir();
173        let small_file = temp_dir.join("test_small_magic.txt");
174        fs::write(&small_file, "ab").unwrap(); // Only 2 bytes
175
176        // Small files should be detected as text
177        let format = detect_format(&small_file).unwrap();
178        assert_eq!(format, MagicFileFormat::Text);
179
180        fs::remove_file(&small_file).unwrap();
181    }
182
183    #[test]
184    fn test_detect_format_text_with_binary_content() {
185        let temp_dir = std::env::temp_dir();
186        let binary_text_file = temp_dir.join("test_binary_text.txt");
187
188        // Write binary data that's NOT the magic number
189        let mut file = fs::File::create(&binary_text_file).unwrap();
190        file.write_all(&[0xFF, 0xFE, 0xFD, 0xFC]).unwrap();
191        file.write_all(b"some text").unwrap();
192
193        // Should be detected as text (wrong magic number)
194        let format = detect_format(&binary_text_file).unwrap();
195        assert_eq!(format, MagicFileFormat::Text);
196
197        fs::remove_file(&binary_text_file).unwrap();
198    }
199
200    #[test]
201    fn test_magic_file_format_enum_equality() {
202        assert_eq!(MagicFileFormat::Text, MagicFileFormat::Text);
203        assert_eq!(MagicFileFormat::Directory, MagicFileFormat::Directory);
204        assert_eq!(MagicFileFormat::Binary, MagicFileFormat::Binary);
205
206        assert_ne!(MagicFileFormat::Text, MagicFileFormat::Directory);
207        assert_ne!(MagicFileFormat::Text, MagicFileFormat::Binary);
208        assert_ne!(MagicFileFormat::Directory, MagicFileFormat::Binary);
209    }
210
211    #[test]
212    fn test_magic_file_format_debug() {
213        let text_format = MagicFileFormat::Text;
214        let debug_str = format!("{text_format:?}");
215        assert!(debug_str.contains("Text"));
216    }
217
218    #[test]
219    fn test_magic_file_format_clone() {
220        let original = MagicFileFormat::Directory;
221        let cloned = original;
222        assert_eq!(original, cloned);
223    }
224
225    #[test]
226    fn test_magic_file_format_copy() {
227        let original = MagicFileFormat::Binary;
228        let copied = original; // Copy trait allows this
229        assert_eq!(original, copied);
230    }
231}