open_detect/scanner.rs
1use crate::{errors::Result, scan_result::ScanResult, signature::SigSet};
2use archive::{ArchiveExtractor, ArchiveFormat};
3use mime_type::{MimeFormat, MimeType};
4use std::path::Path;
5
6pub struct Scanner {
7 sig_set: SigSet,
8 max_extracted_size: usize,
9 max_total_extracted_size: usize,
10}
11
12impl Scanner {
13 /// Create a new Scanner from a `SigSet` with default size limits.
14 ///
15 /// # Default Limits
16 /// - Max extracted file size: 500 MB
17 /// - Max total extracted size: 2 GB
18 ///
19 /// # Examples
20 ///
21 /// ```no_run
22 /// use open_detect::{Scanner, SigSet, Signature};
23 ///
24 /// let sig_set = SigSet::from_signature(
25 /// Signature("rule test { condition: true }".to_string())
26 /// ).unwrap();
27 /// let scanner = Scanner::new(sig_set);
28 /// ```
29 #[must_use]
30 pub fn new(sig_set: SigSet) -> Self {
31 Self {
32 sig_set,
33 max_extracted_size: 500 * 1024 * 1024, // 500 MB
34 max_total_extracted_size: 2 * 1024 * 1024 * 1024, // 2 GB
35 }
36 }
37
38 /// Set the maximum size for individual extracted files (default: 500 MB).
39 ///
40 /// This limit applies when scanning archives. Files larger than this limit
41 /// will be skipped during archive extraction.
42 ///
43 /// # Examples
44 ///
45 /// ```no_run
46 /// use open_detect::{Scanner, SigSet};
47 /// # let sig_set = SigSet::new();
48 ///
49 /// let scanner = Scanner::new(sig_set)
50 /// .with_max_extracted_size(100 * 1024 * 1024); // 100 MB
51 /// ```
52 #[must_use]
53 pub fn with_max_extracted_size(mut self, size: usize) -> Self {
54 self.max_extracted_size = size;
55 self
56 }
57
58 /// Set the maximum total size for all extracted files (default: 2 GB).
59 ///
60 /// This limit applies when scanning archives. Once the total size of extracted
61 /// files exceeds this limit, extraction stops.
62 ///
63 /// # Examples
64 ///
65 /// ```no_run
66 /// use open_detect::{Scanner, SigSet};
67 /// # let sig_set = SigSet::new();
68 ///
69 /// let scanner = Scanner::new(sig_set)
70 /// .with_max_total_extracted_size(1024 * 1024 * 1024); // 1 GB
71 /// ```
72 #[must_use]
73 pub fn with_max_total_extracted_size(mut self, size: usize) -> Self {
74 self.max_total_extracted_size = size;
75 self
76 }
77
78 /// Scan a buffer of data for malicious content.
79 ///
80 /// Automatically detects and extracts archives (ZIP, TAR, etc.) before scanning.
81 /// If the buffer contains an archive, all files within will be scanned recursively.
82 ///
83 /// # Errors
84 ///
85 /// Returns an error if:
86 /// - The YARA scanner fails to scan the data
87 /// - Archive extraction fails (corrupted archive, etc.)
88 ///
89 /// # Examples
90 ///
91 /// ```no_run
92 /// use open_detect::{Scanner, SigSet, Signature, ScanResult};
93 ///
94 /// # let sig_set = SigSet::from_signature(
95 /// # Signature("rule test { condition: true }".to_string())
96 /// # ).unwrap();
97 /// let scanner = Scanner::new(sig_set);
98 /// let data = b"data to scan";
99 ///
100 /// match scanner.scan_buf(data).unwrap() {
101 /// ScanResult::Clean => println!("No threats detected"),
102 /// ScanResult::Malicious(detections) => {
103 /// println!("Detected {} threats", detections.len());
104 /// }
105 /// }
106 /// ```
107 pub fn scan_buf(&self, buf: &[u8]) -> Result<ScanResult> {
108 if let Some(file_type) = Self::infer_file_type(buf) {
109 if ArchiveFormat::is_supported_mime(&file_type) {
110 return self.scan_buf_ft(buf, &file_type);
111 }
112 }
113 let mut scanner = yara_x::Scanner::new(&self.sig_set.rules);
114 let sr = scanner.scan(buf)?.into();
115 Ok(sr)
116 }
117
118 /// Scan a file for malicious content.
119 ///
120 /// Reads the entire file into memory and scans it. Automatically detects
121 /// and extracts archives before scanning.
122 ///
123 /// # Errors
124 ///
125 /// Returns an error if:
126 /// - The file cannot be read
127 /// - The YARA scanner fails to scan the data
128 /// - Archive extraction fails
129 ///
130 /// # Examples
131 ///
132 /// ```no_run
133 /// use open_detect::{Scanner, SigSet};
134 /// use std::path::Path;
135 ///
136 /// # let sig_set = SigSet::new();
137 /// let scanner = Scanner::new(sig_set);
138 /// let result = scanner.scan_file(Path::new("suspicious.exe")).unwrap();
139 /// ```
140 pub fn scan_file(&self, path: &Path) -> Result<ScanResult> {
141 let buf = std::fs::read(path)?;
142 self.scan_buf(&buf)
143 }
144
145 /// Scan a buffer with an explicitly specified file type.
146 ///
147 /// This is useful when you know the file type and want to skip automatic detection.
148 ///
149 /// # Errors
150 ///
151 /// Returns an error if:
152 /// - The YARA scanner fails to scan the data
153 /// - Archive extraction fails
154 ///
155 /// # Examples
156 ///
157 /// ```no_run
158 /// use open_detect::{Scanner, SigSet};
159 /// use mime_type::{MimeType, Archive};
160 ///
161 /// # let sig_set = SigSet::new();
162 /// let scanner = Scanner::new(sig_set);
163 /// let data = b"PK\x03\x04..."; // ZIP file data
164 /// let result = scanner.scan_buf_ft(
165 /// data,
166 /// &MimeType::Archive(Archive::Zip)
167 /// ).unwrap();
168 /// ```
169 pub fn scan_buf_ft(&self, buf: &[u8], file_type: &MimeType) -> Result<ScanResult> {
170 if ArchiveFormat::is_supported_mime(file_type) {
171 self.scan_archive_buf(buf, file_type)
172 } else {
173 let mut scanner = yara_x::Scanner::new(&self.sig_set.rules);
174 let sr = scanner.scan(buf)?.into();
175 Ok(sr)
176 }
177 }
178
179 /// Scan a file with an explicitly specified file type.
180 ///
181 /// This is useful when you know the file type and want to skip automatic detection.
182 ///
183 /// # Errors
184 ///
185 /// Returns an error if:
186 /// - The file cannot be read
187 /// - The YARA scanner fails to scan the data
188 /// - Archive extraction fails
189 ///
190 /// # Examples
191 ///
192 /// ```no_run
193 /// use open_detect::{Scanner, SigSet};
194 /// use mime_type::{MimeType, Archive};
195 /// use std::path::Path;
196 ///
197 /// # let sig_set = SigSet::new();
198 /// let scanner = Scanner::new(sig_set);
199 /// let result = scanner.scan_file_ft(
200 /// Path::new("archive.zip"),
201 /// &MimeType::Archive(Archive::Zip)
202 /// ).unwrap();
203 /// ```
204 pub fn scan_file_ft(&self, path: &Path, file_type: &MimeType) -> Result<ScanResult> {
205 let buf = std::fs::read(path)?;
206 self.scan_buf_ft(&buf, file_type)
207 }
208
209 fn scan_archive_buf(&self, buf: &[u8], file_type: &MimeType) -> Result<ScanResult> {
210 let format = match ArchiveFormat::try_from(file_type) {
211 Ok(fmt) => fmt,
212 Err(_) => {
213 // If we can't handle it as an archive, scan directly
214 let mut scanner = yara_x::Scanner::new(&self.sig_set.rules);
215 let sr = scanner.scan(buf)?.into();
216 return Ok(sr);
217 }
218 };
219
220 self.scan_archive(buf, format)
221 }
222
223 /// Scan an archive using the unified archive crate
224 fn scan_archive(&self, buf: &[u8], format: ArchiveFormat) -> Result<ScanResult> {
225 // Create extractor with reasonable limits
226 let extractor = ArchiveExtractor::new()
227 .with_max_file_size(self.max_extracted_size)
228 .with_max_total_size(self.max_total_extracted_size);
229
230 // Extract all files from the archive
231 let extracted_files = extractor
232 .extract(buf, format)
233 .map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidData, e.to_string()))?;
234
235 let mut all_detections = Vec::new();
236
237 // Scan each extracted file
238 for file in extracted_files {
239 // Skip directories
240 if file.is_directory {
241 continue;
242 }
243
244 // Recursively scan the contents (might be nested archives)
245 let result = self.scan_buf(&file.data)?;
246
247 if let ScanResult::Malicious(detections) = result {
248 all_detections.extend(detections);
249 }
250 }
251
252 if all_detections.is_empty() {
253 Ok(ScanResult::Clean)
254 } else {
255 Ok(ScanResult::Malicious(all_detections))
256 }
257 }
258
259 /// Infer file type from buffer using the infer crate
260 fn infer_file_type(buf: &[u8]) -> Option<MimeType> {
261 infer::get(buf)
262 .map(|kind| kind.mime_type().to_string())
263 .and_then(|mime| MimeType::from_mime(&mime))
264 }
265}
266
267impl From<SigSet> for Scanner {
268 fn from(sig_set: SigSet) -> Self {
269 Scanner::new(sig_set)
270 }
271}
272
273#[cfg(test)]
274mod tests {
275 use super::*;
276 use crate::signature::Signature;
277
278 #[test]
279 fn scan_one_sig_matches() {
280 let signature_set =
281 SigSet::from_signature(Signature("rule test { condition: true }".to_string())).unwrap();
282 let scanner = Scanner::from(signature_set);
283
284 let result = scanner.scan_buf(b"test input").unwrap();
285 assert_eq!(ScanResult::from("test"), result);
286 }
287
288 #[test]
289 fn scan_one_sig_no_match() {
290 let signature_set =
291 SigSet::from_signature(Signature("rule test { condition: false }".to_string()))
292 .unwrap();
293 let scanner = Scanner::from(signature_set);
294 let result = scanner.scan_buf(b"test input").unwrap();
295 assert_eq!(ScanResult::Clean, result);
296 }
297
298 #[test]
299 fn scan_multiple_sigs_match() {
300 let signature_set = SigSet::from_signatures(vec![
301 Signature("rule test1 { condition: true }".to_string()),
302 Signature("rule test2 { condition: true }".to_string()),
303 ])
304 .unwrap();
305 let scanner = Scanner::from(signature_set);
306 let result = scanner.scan_buf(b"test input").unwrap();
307 assert_eq!(ScanResult::from(vec!["test1", "test2"]), result);
308 }
309
310 #[test]
311 fn test_scanner_new() {
312 let signature_set =
313 SigSet::from_signature(Signature("rule test { condition: true }".to_string())).unwrap();
314
315 let scanner = Scanner::new(signature_set);
316 assert_eq!(scanner.max_extracted_size, 500 * 1024 * 1024);
317 assert_eq!(scanner.max_total_extracted_size, 2 * 1024 * 1024 * 1024);
318 }
319
320 #[test]
321 fn test_scanner_with_custom_sizes() {
322 let signature_set =
323 SigSet::from_signature(Signature("rule test { condition: true }".to_string())).unwrap();
324
325 let scanner = Scanner::new(signature_set)
326 .with_max_extracted_size(100 * 1024 * 1024) // 100 MB
327 .with_max_total_extracted_size(1024 * 1024 * 1024); // 1 GB
328
329 assert_eq!(scanner.max_extracted_size, 100 * 1024 * 1024);
330 assert_eq!(scanner.max_total_extracted_size, 1024 * 1024 * 1024);
331 }
332
333 #[test]
334 fn test_infer_file_type() {
335 // Test ZIP detection
336 let zip_magic = b"PK\x03\x04";
337 assert_eq!(
338 Scanner::infer_file_type(zip_magic),
339 Some(MimeType::Archive(mime_type::Archive::Zip))
340 );
341
342 let text = b"hello world";
343 let result = Scanner::infer_file_type(text);
344 assert!(result.is_none());
345 }
346
347 #[test]
348 fn test_scanner_is_send_and_sync() {
349 // Compile-time check that Scanner implements Send and Sync
350 fn assert_send<T: Send>() {}
351 fn assert_sync<T: Sync>() {}
352 assert_send::<Scanner>();
353 assert_sync::<Scanner>();
354 }
355}