open_detect/
scanner.rs

1use crate::{errors::Result, scan_result::ScanResult, signature::SigSet};
2use archive::{ArchiveExtractor, ArchiveFormat};
3use mime_type::{MimeFormat, MimeType};
4use std::path::Path;
5
6pub struct Scanner {
7    sig_set: SigSet,
8    max_extracted_size: usize,
9    max_total_extracted_size: usize,
10}
11
12impl Scanner {
13    /// Create a new Scanner from a `SigSet` with default size limits.
14    ///
15    /// # Default Limits
16    /// - Max extracted file size: 500 MB
17    /// - Max total extracted size: 2 GB
18    ///
19    /// # Examples
20    ///
21    /// ```no_run
22    /// use open_detect::{Scanner, SigSet, Signature};
23    ///
24    /// let sig_set = SigSet::from_signature(
25    ///     Signature("rule test { condition: true }".to_string())
26    /// ).unwrap();
27    /// let scanner = Scanner::new(sig_set);
28    /// ```
29    #[must_use]
30    pub fn new(sig_set: SigSet) -> Self {
31        Self {
32            sig_set,
33            max_extracted_size: 500 * 1024 * 1024, // 500 MB
34            max_total_extracted_size: 2 * 1024 * 1024 * 1024, // 2 GB
35        }
36    }
37
38    /// Set the maximum size for individual extracted files (default: 500 MB).
39    ///
40    /// This limit applies when scanning archives. Files larger than this limit
41    /// will be skipped during archive extraction.
42    ///
43    /// # Examples
44    ///
45    /// ```no_run
46    /// use open_detect::{Scanner, SigSet};
47    /// # let sig_set = SigSet::new();
48    ///
49    /// let scanner = Scanner::new(sig_set)
50    ///     .with_max_extracted_size(100 * 1024 * 1024); // 100 MB
51    /// ```
52    #[must_use]
53    pub fn with_max_extracted_size(mut self, size: usize) -> Self {
54        self.max_extracted_size = size;
55        self
56    }
57
58    /// Set the maximum total size for all extracted files (default: 2 GB).
59    ///
60    /// This limit applies when scanning archives. Once the total size of extracted
61    /// files exceeds this limit, extraction stops.
62    ///
63    /// # Examples
64    ///
65    /// ```no_run
66    /// use open_detect::{Scanner, SigSet};
67    /// # let sig_set = SigSet::new();
68    ///
69    /// let scanner = Scanner::new(sig_set)
70    ///     .with_max_total_extracted_size(1024 * 1024 * 1024); // 1 GB
71    /// ```
72    #[must_use]
73    pub fn with_max_total_extracted_size(mut self, size: usize) -> Self {
74        self.max_total_extracted_size = size;
75        self
76    }
77
78    /// Scan a buffer of data for malicious content.
79    ///
80    /// Automatically detects and extracts archives (ZIP, TAR, etc.) before scanning.
81    /// If the buffer contains an archive, all files within will be scanned recursively.
82    ///
83    /// # Errors
84    ///
85    /// Returns an error if:
86    /// - The YARA scanner fails to scan the data
87    /// - Archive extraction fails (corrupted archive, etc.)
88    ///
89    /// # Examples
90    ///
91    /// ```no_run
92    /// use open_detect::{Scanner, SigSet, Signature, ScanResult};
93    ///
94    /// # let sig_set = SigSet::from_signature(
95    /// #     Signature("rule test { condition: true }".to_string())
96    /// # ).unwrap();
97    /// let scanner = Scanner::new(sig_set);
98    /// let data = b"data to scan";
99    ///
100    /// match scanner.scan_buf(data).unwrap() {
101    ///     ScanResult::Clean => println!("No threats detected"),
102    ///     ScanResult::Malicious(detections) => {
103    ///         println!("Detected {} threats", detections.len());
104    ///     }
105    /// }
106    /// ```
107    pub fn scan_buf(&self, buf: &[u8]) -> Result<ScanResult> {
108        if let Some(file_type) = Self::infer_file_type(buf) {
109            if ArchiveFormat::is_supported_mime(&file_type) {
110                return self.scan_buf_ft(buf, &file_type);
111            }
112        }
113        let mut scanner = yara_x::Scanner::new(&self.sig_set.rules);
114        let sr = scanner.scan(buf)?.into();
115        Ok(sr)
116    }
117
118    /// Scan a file for malicious content.
119    ///
120    /// Reads the entire file into memory and scans it. Automatically detects
121    /// and extracts archives before scanning.
122    ///
123    /// # Errors
124    ///
125    /// Returns an error if:
126    /// - The file cannot be read
127    /// - The YARA scanner fails to scan the data
128    /// - Archive extraction fails
129    ///
130    /// # Examples
131    ///
132    /// ```no_run
133    /// use open_detect::{Scanner, SigSet};
134    /// use std::path::Path;
135    ///
136    /// # let sig_set = SigSet::new();
137    /// let scanner = Scanner::new(sig_set);
138    /// let result = scanner.scan_file(Path::new("suspicious.exe")).unwrap();
139    /// ```
140    pub fn scan_file(&self, path: &Path) -> Result<ScanResult> {
141        let buf = std::fs::read(path)?;
142        self.scan_buf(&buf)
143    }
144
145    /// Scan a buffer with an explicitly specified file type.
146    ///
147    /// This is useful when you know the file type and want to skip automatic detection.
148    ///
149    /// # Errors
150    ///
151    /// Returns an error if:
152    /// - The YARA scanner fails to scan the data
153    /// - Archive extraction fails
154    ///
155    /// # Examples
156    ///
157    /// ```no_run
158    /// use open_detect::{Scanner, SigSet};
159    /// use mime_type::{MimeType, Archive};
160    ///
161    /// # let sig_set = SigSet::new();
162    /// let scanner = Scanner::new(sig_set);
163    /// let data = b"PK\x03\x04..."; // ZIP file data
164    /// let result = scanner.scan_buf_ft(
165    ///     data,
166    ///     &MimeType::Archive(Archive::Zip)
167    /// ).unwrap();
168    /// ```
169    pub fn scan_buf_ft(&self, buf: &[u8], file_type: &MimeType) -> Result<ScanResult> {
170        if ArchiveFormat::is_supported_mime(file_type) {
171            self.scan_archive_buf(buf, file_type)
172        } else {
173            let mut scanner = yara_x::Scanner::new(&self.sig_set.rules);
174            let sr = scanner.scan(buf)?.into();
175            Ok(sr)
176        }
177    }
178
179    /// Scan a file with an explicitly specified file type.
180    ///
181    /// This is useful when you know the file type and want to skip automatic detection.
182    ///
183    /// # Errors
184    ///
185    /// Returns an error if:
186    /// - The file cannot be read
187    /// - The YARA scanner fails to scan the data
188    /// - Archive extraction fails
189    ///
190    /// # Examples
191    ///
192    /// ```no_run
193    /// use open_detect::{Scanner, SigSet};
194    /// use mime_type::{MimeType, Archive};
195    /// use std::path::Path;
196    ///
197    /// # let sig_set = SigSet::new();
198    /// let scanner = Scanner::new(sig_set);
199    /// let result = scanner.scan_file_ft(
200    ///     Path::new("archive.zip"),
201    ///     &MimeType::Archive(Archive::Zip)
202    /// ).unwrap();
203    /// ```
204    pub fn scan_file_ft(&self, path: &Path, file_type: &MimeType) -> Result<ScanResult> {
205        let buf = std::fs::read(path)?;
206        self.scan_buf_ft(&buf, file_type)
207    }
208
209    fn scan_archive_buf(&self, buf: &[u8], file_type: &MimeType) -> Result<ScanResult> {
210        let format = match ArchiveFormat::try_from(file_type) {
211            Ok(fmt) => fmt,
212            Err(_) => {
213                // If we can't handle it as an archive, scan directly
214                let mut scanner = yara_x::Scanner::new(&self.sig_set.rules);
215                let sr = scanner.scan(buf)?.into();
216                return Ok(sr);
217            }
218        };
219
220        self.scan_archive(buf, format)
221    }
222
223    /// Scan an archive using the unified archive crate
224    fn scan_archive(&self, buf: &[u8], format: ArchiveFormat) -> Result<ScanResult> {
225        // Create extractor with reasonable limits
226        let extractor = ArchiveExtractor::new()
227            .with_max_file_size(self.max_extracted_size)
228            .with_max_total_size(self.max_total_extracted_size);
229
230        // Extract all files from the archive
231        let extracted_files = extractor
232            .extract(buf, format)
233            .map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidData, e.to_string()))?;
234
235        let mut all_detections = Vec::new();
236
237        // Scan each extracted file
238        for file in extracted_files {
239            // Skip directories
240            if file.is_directory {
241                continue;
242            }
243
244            // Recursively scan the contents (might be nested archives)
245            let result = self.scan_buf(&file.data)?;
246
247            if let ScanResult::Malicious(detections) = result {
248                all_detections.extend(detections);
249            }
250        }
251
252        if all_detections.is_empty() {
253            Ok(ScanResult::Clean)
254        } else {
255            Ok(ScanResult::Malicious(all_detections))
256        }
257    }
258
259    /// Infer file type from buffer using the infer crate
260    fn infer_file_type(buf: &[u8]) -> Option<MimeType> {
261        infer::get(buf)
262            .map(|kind| kind.mime_type().to_string())
263            .and_then(|mime| MimeType::from_mime(&mime))
264    }
265}
266
267impl From<SigSet> for Scanner {
268    fn from(sig_set: SigSet) -> Self {
269        Scanner::new(sig_set)
270    }
271}
272
273#[cfg(test)]
274mod tests {
275    use super::*;
276    use crate::signature::Signature;
277
278    #[test]
279    fn scan_one_sig_matches() {
280        let signature_set =
281            SigSet::from_signature(Signature("rule test { condition: true }".to_string())).unwrap();
282        let scanner = Scanner::from(signature_set);
283
284        let result = scanner.scan_buf(b"test input").unwrap();
285        assert_eq!(ScanResult::from("test"), result);
286    }
287
288    #[test]
289    fn scan_one_sig_no_match() {
290        let signature_set =
291            SigSet::from_signature(Signature("rule test { condition: false }".to_string()))
292                .unwrap();
293        let scanner = Scanner::from(signature_set);
294        let result = scanner.scan_buf(b"test input").unwrap();
295        assert_eq!(ScanResult::Clean, result);
296    }
297
298    #[test]
299    fn scan_multiple_sigs_match() {
300        let signature_set = SigSet::from_signatures(vec![
301            Signature("rule test1 { condition: true }".to_string()),
302            Signature("rule test2 { condition: true }".to_string()),
303        ])
304        .unwrap();
305        let scanner = Scanner::from(signature_set);
306        let result = scanner.scan_buf(b"test input").unwrap();
307        assert_eq!(ScanResult::from(vec!["test1", "test2"]), result);
308    }
309
310    #[test]
311    fn test_scanner_new() {
312        let signature_set =
313            SigSet::from_signature(Signature("rule test { condition: true }".to_string())).unwrap();
314
315        let scanner = Scanner::new(signature_set);
316        assert_eq!(scanner.max_extracted_size, 500 * 1024 * 1024);
317        assert_eq!(scanner.max_total_extracted_size, 2 * 1024 * 1024 * 1024);
318    }
319
320    #[test]
321    fn test_scanner_with_custom_sizes() {
322        let signature_set =
323            SigSet::from_signature(Signature("rule test { condition: true }".to_string())).unwrap();
324
325        let scanner = Scanner::new(signature_set)
326            .with_max_extracted_size(100 * 1024 * 1024) // 100 MB
327            .with_max_total_extracted_size(1024 * 1024 * 1024); // 1 GB
328
329        assert_eq!(scanner.max_extracted_size, 100 * 1024 * 1024);
330        assert_eq!(scanner.max_total_extracted_size, 1024 * 1024 * 1024);
331    }
332
333    #[test]
334    fn test_infer_file_type() {
335        // Test ZIP detection
336        let zip_magic = b"PK\x03\x04";
337        assert_eq!(
338            Scanner::infer_file_type(zip_magic),
339            Some(MimeType::Archive(mime_type::Archive::Zip))
340        );
341
342        let text = b"hello world";
343        let result = Scanner::infer_file_type(text);
344        assert!(result.is_none());
345    }
346
347    #[test]
348    fn test_scanner_is_send_and_sync() {
349        // Compile-time check that Scanner implements Send and Sync
350        fn assert_send<T: Send>() {}
351        fn assert_sync<T: Sync>() {}
352        assert_send::<Scanner>();
353        assert_sync::<Scanner>();
354    }
355}