Skip to main content

provenant/utils/
magic.rs

1// SPDX-FileCopyrightText: Provenant contributors
2// SPDX-License-Identifier: Apache-2.0
3
4//! File magic byte detection utilities.
5//!
6//! Provides low-level file format detection by reading and checking magic bytes
7//! at the beginning of files. Used by parsers to disambiguate file types that
8//! share the same extension (e.g., Alpine .apk vs Android .apk).
9
10use std::fs::File;
11use std::io::{BufReader, Read};
12use std::path::Path;
13
14/// Check if file starts with ZIP magic bytes (PK\x03\x04).
15///
16/// ZIP format is used by many file types including Android APK, JAR, InstallShield installers, etc.
17///
18/// # Returns
19/// `true` if the file starts with the ZIP signature, `false` otherwise or on IO error.
20pub fn is_zip(path: &Path) -> bool {
21    check_magic_bytes(path, &[0x50, 0x4B, 0x03, 0x04])
22}
23
24pub fn is_gzip(path: &Path) -> bool {
25    check_magic_bytes(path, &[0x1F, 0x8B])
26}
27
28/// Check if file starts with Squashfs magic bytes.
29///
30/// Squashfs filesystems can be either little-endian (hsqs) or big-endian (sqsh).
31/// This function checks for both variants.
32///
33/// # Returns
34/// `true` if the file starts with either Squashfs signature, `false` otherwise or on IO error.
35pub fn is_squashfs(path: &Path) -> bool {
36    // Little-endian: hsqs (0x68, 0x73, 0x71, 0x73)
37    // Big-endian: sqsh (0x73, 0x71, 0x73, 0x68)
38    check_magic_bytes(path, &[0x68, 0x73, 0x71, 0x73])
39        || check_magic_bytes(path, &[0x73, 0x71, 0x73, 0x68])
40}
41
42/// Check if file contains the NSIS installer signature.
43///
44/// NSIS installers are Windows executables that contain the
45/// `Nullsoft.NSIS.exehead` marker. Real-world installers can place this marker
46/// well beyond the first few kilobytes, so search the file in streaming chunks
47/// instead of assuming it appears near the beginning.
48///
49/// # Returns
50/// `true` if the NSIS signature is found anywhere in the file, `false` otherwise or on IO error.
51pub fn is_nsis_installer(path: &Path) -> bool {
52    const CHUNK_SIZE: usize = 64 * 1024;
53    const NSIS_SIGNATURE: &[u8] = b"Nullsoft.NSIS.exehead";
54
55    let mut file = match File::open(path) {
56        Ok(f) => f,
57        Err(_) => return false,
58    };
59
60    let mut reader = BufReader::new(&mut file);
61    let overlap = NSIS_SIGNATURE.len().saturating_sub(1);
62    let mut buffer = vec![0u8; CHUNK_SIZE + overlap];
63    let mut carry_len = 0;
64
65    loop {
66        let bytes_read = match reader.read(&mut buffer[carry_len..carry_len + CHUNK_SIZE]) {
67            Ok(n) => n,
68            Err(_) => return false,
69        };
70
71        if bytes_read == 0 {
72            return false;
73        }
74
75        let search_len = carry_len + bytes_read;
76        if buffer[..search_len]
77            .windows(NSIS_SIGNATURE.len())
78            .any(|window| window == NSIS_SIGNATURE)
79        {
80            return true;
81        }
82
83        if overlap == 0 || search_len <= overlap {
84            return false;
85        }
86
87        let carry_start = search_len - overlap;
88        buffer.copy_within(carry_start..search_len, 0);
89        carry_len = overlap;
90    }
91}
92
93/// Helper function to check if a file starts with specific magic bytes.
94///
95/// Reads only the minimum number of bytes needed for comparison.
96fn check_magic_bytes(path: &Path, magic: &[u8]) -> bool {
97    let mut file = match File::open(path) {
98        Ok(f) => f,
99        Err(_) => return false,
100    };
101
102    let mut buffer = vec![0u8; magic.len()];
103    match file.read_exact(&mut buffer) {
104        Ok(()) => buffer == magic,
105        Err(_) => false,
106    }
107}
108
109#[cfg(test)]
110mod tests {
111    use super::*;
112    use std::io::Write;
113    use tempfile::NamedTempFile;
114
115    #[test]
116    fn test_is_zip() {
117        // Create a file with ZIP magic bytes
118        let mut file = NamedTempFile::new().unwrap();
119        file.write_all(&[0x50, 0x4B, 0x03, 0x04, 0x00, 0x00])
120            .unwrap();
121        assert!(is_zip(file.path()));
122
123        // Create a file without ZIP magic bytes
124        let mut file2 = NamedTempFile::new().unwrap();
125        file2.write_all(&[0x1F, 0x8B, 0x08, 0x00]).unwrap();
126        assert!(!is_zip(file2.path()));
127
128        // Non-existent file
129        assert!(!is_zip(Path::new("/nonexistent/file.zip")));
130    }
131
132    #[test]
133    fn test_is_gzip() {
134        let mut file = NamedTempFile::new().unwrap();
135        file.write_all(&[0x1F, 0x8B, 0x08, 0x00]).unwrap();
136        assert!(is_gzip(file.path()));
137
138        let mut file2 = NamedTempFile::new().unwrap();
139        file2.write_all(&[0x50, 0x4B, 0x03, 0x04]).unwrap();
140        assert!(!is_gzip(file2.path()));
141    }
142
143    #[test]
144    fn test_is_squashfs_little_endian() {
145        // Create a file with Squashfs little-endian magic (hsqs)
146        let mut file = NamedTempFile::new().unwrap();
147        file.write_all(&[0x68, 0x73, 0x71, 0x73, 0x00, 0x00])
148            .unwrap();
149        assert!(is_squashfs(file.path()));
150    }
151
152    #[test]
153    fn test_is_squashfs_big_endian() {
154        // Create a file with Squashfs big-endian magic (sqsh)
155        let mut file = NamedTempFile::new().unwrap();
156        file.write_all(&[0x73, 0x71, 0x73, 0x68, 0x00, 0x00])
157            .unwrap();
158        assert!(is_squashfs(file.path()));
159    }
160
161    #[test]
162    fn test_is_squashfs_negative() {
163        // Create a file without Squashfs magic
164        let mut file = NamedTempFile::new().unwrap();
165        file.write_all(&[0x50, 0x4B, 0x03, 0x04]).unwrap();
166        assert!(!is_squashfs(file.path()));
167
168        // Non-existent file
169        assert!(!is_squashfs(Path::new("/nonexistent/file.squashfs")));
170    }
171
172    #[test]
173    fn test_is_nsis_installer() {
174        // Create a file with NSIS signature at the beginning
175        let mut file = NamedTempFile::new().unwrap();
176        file.write_all(b"MZ\x90\x00").unwrap(); // DOS header
177        file.write_all(b"Nullsoft.NSIS.exehead").unwrap();
178        file.write_all(&[0u8; 100]).unwrap();
179        assert!(is_nsis_installer(file.path()));
180
181        // Create a file with NSIS signature in the middle
182        let mut file2 = NamedTempFile::new().unwrap();
183        file2.write_all(&vec![0u8; 1000]).unwrap();
184        file2.write_all(b"Nullsoft.NSIS.exehead").unwrap();
185        assert!(is_nsis_installer(file2.path()));
186
187        // Create a file without NSIS signature
188        let mut file3 = NamedTempFile::new().unwrap();
189        file3.write_all(b"This is not an NSIS installer").unwrap();
190        assert!(!is_nsis_installer(file3.path()));
191
192        // Non-existent file
193        assert!(!is_nsis_installer(Path::new("/nonexistent/setup.exe")));
194    }
195
196    #[test]
197    fn test_is_nsis_installer_beyond_initial_chunk() {
198        // Real NSIS installers can place the signature well past the opening bytes.
199        let mut file = NamedTempFile::new().unwrap();
200        file.write_all(&vec![0u8; 70_000]).unwrap();
201        file.write_all(b"Nullsoft.NSIS.exehead").unwrap();
202        assert!(is_nsis_installer(file.path()));
203    }
204
205    #[test]
206    fn test_check_magic_bytes_short_file() {
207        // File shorter than expected magic bytes
208        let mut file = NamedTempFile::new().unwrap();
209        file.write_all(&[0x50, 0x4B]).unwrap(); // Only 2 bytes
210        assert!(!check_magic_bytes(file.path(), &[0x50, 0x4B, 0x03, 0x04]));
211    }
212
213    #[test]
214    fn test_check_magic_bytes_empty_file() {
215        // Empty file
216        let file = NamedTempFile::new().unwrap();
217        assert!(!check_magic_bytes(file.path(), &[0x50, 0x4B]));
218    }
219}