keramics_formats/
scanner.rs

1/* Copyright 2024-2025 Joachim Metz <joachim.metz@gmail.com>
2 *
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License. You may
5 * obtain a copy of the License at https://www.apache.org/licenses/LICENSE-2.0
6 *
7 * Unless required by applicable law or agreed to in writing, software
8 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
9 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
10 * License for the specific language governing permissions and limitations
11 * under the License.
12 */
13
14use std::collections::HashSet;
15use std::io::SeekFrom;
16
17use keramics_core::{DataStreamReference, ErrorTrace};
18use keramics_sigscan::{BuildError, PatternType, ScanContext, Scanner, Signature};
19
20use super::enums::FormatIdentifier;
21
22/// Format scanner.
23pub struct FormatScanner {
24    /// Signature scanner.
25    signature_scanner: Scanner,
26}
27
28impl FormatScanner {
29    /// Creates a new format scanner.
30    pub fn new() -> Self {
31        Self {
32            signature_scanner: Scanner::new(),
33        }
34    }
35
36    /// Adds Apple Partition Map (APM) signatures.
37    pub fn add_apm_signatures(&mut self) {
38        // APM signature.
39        // Note that technically "PM" at offset 512 is the Apple Partion Map
40        // signature but using the partition type is less error prone.
41        self.signature_scanner.add_signature(Signature::new(
42            "apm1",
43            PatternType::BoundToStart,
44            560,
45            &[
46                0x41, 0x70, 0x70, 0x6c, 0x65, 0x5f, 0x70, 0x61, 0x72, 0x74, 0x69, 0x74, 0x69, 0x6f,
47                0x6e, 0x5f, 0x6d, 0x61, 0x70, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
48                0x00, 0x00, 0x00, 0x00,
49            ],
50        ));
51    }
52
53    /// Adds Extended File System (ext) signatures.
54    pub fn add_ext_signatures(&mut self) {
55        // Signature in superblock.
56        self.signature_scanner.add_signature(Signature::new(
57            "ext1",
58            PatternType::BoundToStart,
59            1080,
60            &[0x53, 0xef],
61        ));
62    }
63
64    /// Adds Expert Witness Compression Format (EWF) signatures.
65    pub fn add_ewf_signatures(&mut self) {
66        // Version 1 signature in file header.
67        self.signature_scanner.add_signature(Signature::new(
68            "ewf1",
69            PatternType::BoundToStart,
70            0,
71            &[0x45, 0x56, 0x46, 0x09, 0x0d, 0x0a, 0xff, 0x00],
72        ));
73    }
74
75    /// Adds GUID Partition Table (GPT) signatures.
76    pub fn add_gpt_signatures(&mut self) {
77        // Signature for 512 bytes per sector.
78        self.signature_scanner.add_signature(Signature::new(
79            "gpt1",
80            PatternType::BoundToStart,
81            512,
82            &[0x45, 0x46, 0x49, 0x20, 0x50, 0x41, 0x52, 0x54],
83        ));
84        // Signature for 1024 bytes per sector.
85        self.signature_scanner.add_signature(Signature::new(
86            "gpt2",
87            PatternType::BoundToStart,
88            1024,
89            &[0x45, 0x46, 0x49, 0x20, 0x50, 0x41, 0x52, 0x54],
90        ));
91        // Signature for 2048 bytes per sector.
92        self.signature_scanner.add_signature(Signature::new(
93            "gpt3",
94            PatternType::BoundToStart,
95            2048,
96            &[0x45, 0x46, 0x49, 0x20, 0x50, 0x41, 0x52, 0x54],
97        ));
98        // Signature for 4096 bytes per sector.
99        self.signature_scanner.add_signature(Signature::new(
100            "gpt4",
101            PatternType::BoundToStart,
102            4096,
103            &[0x45, 0x46, 0x49, 0x20, 0x50, 0x41, 0x52, 0x54],
104        ));
105    }
106
107    /// Adds Master Boot Record (MBR) signatures.
108    pub fn add_mbr_signatures(&mut self) {
109        // Signature for 512 bytes per sector.
110        self.signature_scanner.add_signature(Signature::new(
111            "mbr1",
112            PatternType::BoundToStart,
113            510,
114            &[0x55, 0xaa],
115        ));
116        // Signature for 1024 bytes per sector.
117        self.signature_scanner.add_signature(Signature::new(
118            "mbr2",
119            PatternType::BoundToStart,
120            1022,
121            &[0x55, 0xaa],
122        ));
123        // Signature for 2048 bytes per sector.
124        self.signature_scanner.add_signature(Signature::new(
125            "mbr3",
126            PatternType::BoundToStart,
127            2046,
128            &[0x55, 0xaa],
129        ));
130        // Signature for 4096 bytes per sector.
131        self.signature_scanner.add_signature(Signature::new(
132            "mbr4",
133            PatternType::BoundToStart,
134            4094,
135            &[0x55, 0xaa],
136        ));
137    }
138
139    /// Adds New Technologies File System (NTFS) signatures.
140    pub fn add_ntfs_signatures(&mut self) {
141        // Signature in boot record.
142        self.signature_scanner.add_signature(Signature::new(
143            "ntfs1",
144            PatternType::BoundToStart,
145            3,
146            &[0x4e, 0x54, 0x46, 0x53, 0x20, 0x20, 0x20, 0x20],
147        ));
148    }
149
150    /// Adds QEMU Copy-On-Write (QCOW) signatures.
151    pub fn add_qcow_signatures(&mut self) {
152        // Version 1 signature and version in header.
153        self.signature_scanner.add_signature(Signature::new(
154            "qcow1",
155            PatternType::BoundToStart,
156            0,
157            &[0x51, 0x46, 0x49, 0xfb, 0x00, 0x00, 0x00, 0x01],
158        ));
159        // Version 2 signature and version in header.
160        self.signature_scanner.add_signature(Signature::new(
161            "qcow2",
162            PatternType::BoundToStart,
163            0,
164            &[0x51, 0x46, 0x49, 0xfb, 0x00, 0x00, 0x00, 0x02],
165        ));
166        // Version 3 signature and version in header.
167        self.signature_scanner.add_signature(Signature::new(
168            "qcow3",
169            PatternType::BoundToStart,
170            0,
171            &[0x51, 0x46, 0x49, 0xfb, 0x00, 0x00, 0x00, 0x03],
172        ));
173    }
174
175    /// Adds Mac OS sparse image (.sparseimage) signatures.
176    pub fn add_sparseimage_signatures(&mut self) {
177        // Signature in header.
178        self.signature_scanner.add_signature(Signature::new(
179            "sparseimage1",
180            PatternType::BoundToStart,
181            0,
182            &[0x73, 0x70, 0x72, 0x73],
183        ));
184    }
185
186    /// Adds Universal Disk Image Format (UDIF) (signatures.
187    pub fn add_udif_signatures(&mut self) {
188        // Signature in footer.
189        self.signature_scanner.add_signature(Signature::new(
190            "udif1",
191            PatternType::BoundToEnd,
192            512,
193            &[0x6b, 0x6f, 0x6c, 0x79],
194        ));
195    }
196
197    /// Adds Virtual Hard Disk (VHD) signatures.
198    pub fn add_vhd_signatures(&mut self) {
199        // Signature in footer.
200        self.signature_scanner.add_signature(Signature::new(
201            "vhd1",
202            PatternType::BoundToEnd,
203            512,
204            &[0x63, 0x6f, 0x6e, 0x65, 0x63, 0x74, 0x69, 0x78],
205        ));
206    }
207
208    /// Adds Virtual Hard Disk version 2 (VHDX) signatures.
209    pub fn add_vhdx_signatures(&mut self) {
210        // Signature in header.
211        self.signature_scanner.add_signature(Signature::new(
212            "vhdx1",
213            PatternType::BoundToStart,
214            0,
215            &[0x76, 0x68, 0x64, 0x78, 0x66, 0x69, 0x6c, 0x65],
216        ));
217    }
218
219    /// Builds the format signature scanner.
220    pub fn build(&mut self) -> Result<(), BuildError> {
221        self.signature_scanner.build()
222    }
223
224    /// Scans a data stream for format signatures.
225    pub fn scan_data_stream(
226        &self,
227        data_stream: &DataStreamReference,
228    ) -> Result<HashSet<FormatIdentifier>, ErrorTrace> {
229        let data_size: u64 = keramics_core::data_stream_get_size!(data_stream);
230
231        let mut scan_context: ScanContext = ScanContext::new(&self.signature_scanner, data_size);
232
233        // The size of the header range can be larger than the size of the data stream.
234        let mut data: Vec<u8> = vec![0; scan_context.header_range_size as usize];
235
236        keramics_core::data_stream_read_at_position!(data_stream, &mut data, SeekFrom::Start(0));
237        scan_context.data_offset = 0;
238        scan_context.scan_buffer(&data);
239
240        // The size of the footer range can be larger than the size of the data stream.
241        let mut data: Vec<u8> = vec![0; scan_context.footer_range_size as usize];
242
243        let data_offset: usize = if scan_context.footer_range_size < data_size {
244            0
245        } else {
246            (scan_context.footer_range_size - data_size) as usize
247        };
248        let data_stream_offset: u64 = if scan_context.footer_range_size < data_size {
249            data_size - scan_context.footer_range_size
250        } else {
251            0
252        };
253        keramics_core::data_stream_read_at_position!(
254            data_stream,
255            &mut data[data_offset..],
256            SeekFrom::Start(data_stream_offset)
257        );
258        scan_context.data_offset = data_stream_offset;
259        scan_context.scan_buffer(&data);
260
261        let mut scan_results: HashSet<FormatIdentifier> = HashSet::new();
262        for signature in scan_context.results.values() {
263            let format_identifier: FormatIdentifier = match signature.identifier.as_str() {
264                "apm1" => FormatIdentifier::Apm,
265                "ext1" => FormatIdentifier::Ext,
266                "ewf1" => FormatIdentifier::Ewf,
267                "gpt1" | "gpt2" | "gpt3" | "gpt4" => FormatIdentifier::Gpt,
268                "mbr1" | "mbr2" | "mbr3" | "mbr4" => FormatIdentifier::Mbr,
269                "ntfs1" => FormatIdentifier::Ntfs,
270                "qcow1" | "qcow2" | "qcow3" => FormatIdentifier::Qcow,
271                "sparseimage1" => FormatIdentifier::SparseImage,
272                "udif1" => FormatIdentifier::Udif,
273                "vhd1" => FormatIdentifier::Vhd,
274                "vhdx1" => FormatIdentifier::Vhdx,
275                _ => FormatIdentifier::Unknown,
276            };
277            scan_results.insert(format_identifier);
278        }
279        Ok(scan_results)
280    }
281}
282
283#[cfg(test)]
284mod tests {
285    use super::*;
286
287    use std::path::PathBuf;
288
289    use keramics_core::open_os_data_stream;
290
291    #[test]
292    fn test_build() -> Result<(), BuildError> {
293        let mut format_scanner: FormatScanner = FormatScanner::new();
294        format_scanner.add_apm_signatures();
295        format_scanner.add_ext_signatures();
296        format_scanner.add_ewf_signatures();
297        format_scanner.add_gpt_signatures();
298        format_scanner.add_ntfs_signatures();
299        format_scanner.add_qcow_signatures();
300        format_scanner.add_sparseimage_signatures();
301        format_scanner.add_udif_signatures();
302        format_scanner.add_vhd_signatures();
303        format_scanner.add_vhdx_signatures();
304
305        format_scanner.build()
306    }
307
308    #[test]
309    fn test_scan_data_stream() -> Result<(), ErrorTrace> {
310        let mut format_scanner: FormatScanner = FormatScanner::new();
311        format_scanner.add_apm_signatures();
312        format_scanner.add_ext_signatures();
313        format_scanner.add_ewf_signatures();
314        format_scanner.add_gpt_signatures();
315        format_scanner.add_ntfs_signatures();
316        format_scanner.add_qcow_signatures();
317        format_scanner.add_sparseimage_signatures();
318        format_scanner.add_udif_signatures();
319        format_scanner.add_vhd_signatures();
320        format_scanner.add_vhdx_signatures();
321
322        match format_scanner.build() {
323            Ok(_) => {}
324            Err(error) => {
325                return Err(keramics_core::error_trace_new_with_error!(
326                    "Unable to build format scanner",
327                    error
328                ));
329            }
330        };
331        let path_buf: PathBuf = PathBuf::from("../test_data/qcow/ext2.qcow2");
332        let data_stream: DataStreamReference = open_os_data_stream(&path_buf)?;
333        let scan_results: HashSet<FormatIdentifier> =
334            format_scanner.scan_data_stream(&data_stream)?;
335
336        assert_eq!(scan_results.len(), 1);
337        assert!(scan_results.iter().next() == Some(&FormatIdentifier::Qcow));
338
339        Ok(())
340    }
341}