Skip to main content

cc_audit/engine/
scanner.rs

1//! Scanner traits and configuration for the detection layer (L5).
2//!
3//! This module provides file-system oriented scanning interfaces:
4//! - `Scanner` trait for scanning files and directories
5//! - `ContentScanner` trait for content-based scanning
6//! - `ScannerConfig` for common scanner configuration
7
8use crate::error::{AuditError, Result};
9use crate::ignore::IgnoreFilter;
10use crate::rules::{DynamicRule, Finding, RuleEngine};
11use std::fs;
12use std::path::Path;
13use tracing::{debug, trace};
14
15/// Core trait for all security scanners.
16///
17/// Scanners implement this trait to provide file and directory scanning capabilities.
18/// The default `scan_path` implementation handles path validation and delegates to
19/// either `scan_file` or `scan_directory` based on the path type.
20pub trait Scanner {
21    /// Scan a single file and return findings.
22    fn scan_file(&self, path: &Path) -> Result<Vec<Finding>>;
23
24    /// Scan a directory and return findings.
25    fn scan_directory(&self, dir: &Path) -> Result<Vec<Finding>>;
26
27    /// Scan a path (file or directory).
28    ///
29    /// This is the main entry point for scanning. It validates the path
30    /// and delegates to either `scan_file` or `scan_directory`.
31    fn scan_path(&self, path: &Path) -> Result<Vec<Finding>> {
32        trace!(path = %path.display(), "Scanning path");
33
34        if !path.exists() {
35            debug!(path = %path.display(), "Path not found");
36            return Err(AuditError::FileNotFound(path.display().to_string()));
37        }
38
39        if path.is_file() {
40            trace!(path = %path.display(), "Scanning as file");
41            return self.scan_file(path);
42        }
43
44        if !path.is_dir() {
45            debug!(path = %path.display(), "Path is not a directory");
46            return Err(AuditError::NotADirectory(path.display().to_string()));
47        }
48
49        trace!(path = %path.display(), "Scanning as directory");
50        self.scan_directory(path)
51    }
52}
53
54/// Extended trait for scanners that support content-based scanning.
55///
56/// This trait provides a unified interface for scanning raw content strings,
57/// which is useful for testing and for scanners that parse structured files
58/// (like JSON) before applying rules.
59pub trait ContentScanner: Scanner {
60    /// Returns a reference to the scanner's configuration.
61    fn config(&self) -> &ScannerConfig;
62
63    /// Scans content and returns findings.
64    ///
65    /// Default implementation delegates to ScannerConfig::check_content.
66    /// Override this method for scanners that need custom content processing
67    /// (e.g., JSON parsing, frontmatter extraction).
68    fn scan_content(&self, content: &str, file_path: &str) -> Result<Vec<Finding>> {
69        Ok(self.config().check_content(content, file_path))
70    }
71}
72
73/// Common configuration shared by all scanners.
74///
75/// This struct provides a unified way to manage RuleEngine settings,
76/// ignore filters, and common file operations across different scanner implementations.
77pub struct ScannerConfig {
78    engine: RuleEngine,
79    ignore_filter: Option<IgnoreFilter>,
80    skip_comments: bool,
81    strict_secrets: bool,
82    recursive: bool,
83}
84
85impl ScannerConfig {
86    /// Creates a new ScannerConfig with default settings.
87    pub fn new() -> Self {
88        Self {
89            engine: RuleEngine::new(),
90            ignore_filter: None,
91            skip_comments: false,
92            strict_secrets: false,
93            recursive: true,
94        }
95    }
96
97    /// Enables or disables recursive scanning.
98    /// When disabled, only scans the immediate directory (max_depth = 1).
99    pub fn with_recursive(mut self, recursive: bool) -> Self {
100        self.recursive = recursive;
101        self
102    }
103
104    /// Returns whether recursive scanning is enabled.
105    pub fn is_recursive(&self) -> bool {
106        self.recursive
107    }
108
109    /// Returns the max_depth for directory walking based on recursive setting.
110    /// - recursive = true: None (unlimited depth)
111    /// - recursive = false: Some(3) (default depth for reasonable scanning)
112    pub fn max_depth(&self) -> Option<usize> {
113        if self.recursive { None } else { Some(3) }
114    }
115
116    /// Enables or disables comment skipping during scanning.
117    pub fn with_skip_comments(mut self, skip: bool) -> Self {
118        self.skip_comments = skip;
119        self.engine = self.engine.with_skip_comments(skip);
120        self
121    }
122
123    /// Enables or disables strict secrets mode.
124    /// When enabled, dummy key heuristics are disabled for test files.
125    pub fn with_strict_secrets(mut self, strict: bool) -> Self {
126        self.strict_secrets = strict;
127        self.engine = self.engine.with_strict_secrets(strict);
128        self
129    }
130
131    /// Sets an ignore filter for file filtering.
132    pub fn with_ignore_filter(mut self, filter: IgnoreFilter) -> Self {
133        self.ignore_filter = Some(filter);
134        self
135    }
136
137    /// Adds dynamic rules loaded from custom YAML files.
138    pub fn with_dynamic_rules(mut self, rules: Vec<DynamicRule>) -> Self {
139        self.engine = self.engine.with_dynamic_rules(rules);
140        self
141    }
142
143    /// Returns whether the given path should be ignored.
144    pub fn is_ignored(&self, path: &Path) -> bool {
145        self.ignore_filter
146            .as_ref()
147            .is_some_and(|f| f.is_ignored(path))
148    }
149
150    /// Reads a file and returns its content as a string.
151    pub fn read_file(&self, path: &Path) -> Result<String> {
152        trace!(path = %path.display(), "Reading file");
153        fs::read_to_string(path).map_err(|e| {
154            debug!(path = %path.display(), error = %e, "Failed to read file");
155            AuditError::ReadError {
156                path: path.display().to_string(),
157                source: e,
158            }
159        })
160    }
161
162    /// Checks the content against all rules and returns findings.
163    pub fn check_content(&self, content: &str, file_path: &str) -> Vec<Finding> {
164        trace!(
165            file = file_path,
166            content_len = content.len(),
167            "Checking content"
168        );
169        let findings = self.engine.check_content(content, file_path);
170        if !findings.is_empty() {
171            debug!(file = file_path, count = findings.len(), "Found issues");
172        }
173        findings
174    }
175
176    /// Checks YAML frontmatter for specific rules (e.g., OP-001).
177    pub fn check_frontmatter(&self, frontmatter: &str, file_path: &str) -> Vec<Finding> {
178        self.engine.check_frontmatter(frontmatter, file_path)
179    }
180
181    /// Returns whether skip_comments is enabled.
182    pub fn skip_comments(&self) -> bool {
183        self.skip_comments
184    }
185
186    /// Returns whether strict_secrets is enabled.
187    pub fn strict_secrets(&self) -> bool {
188        self.strict_secrets
189    }
190
191    /// Returns a reference to the underlying RuleEngine.
192    pub fn engine(&self) -> &RuleEngine {
193        &self.engine
194    }
195}
196
197impl Default for ScannerConfig {
198    fn default() -> Self {
199        Self::new()
200    }
201}
202
203#[cfg(test)]
204mod tests {
205    use super::*;
206    use tempfile::TempDir;
207
208    #[test]
209    fn test_new_config() {
210        let config = ScannerConfig::new();
211        assert!(!config.skip_comments());
212    }
213
214    #[test]
215    fn test_with_skip_comments() {
216        let config = ScannerConfig::new().with_skip_comments(true);
217        assert!(config.skip_comments());
218    }
219
220    #[test]
221    fn test_default_config() {
222        let config = ScannerConfig::default();
223        assert!(!config.skip_comments());
224    }
225
226    #[test]
227    fn test_is_ignored_without_filter() {
228        let config = ScannerConfig::new();
229        assert!(!config.is_ignored(Path::new("test.rs")));
230    }
231
232    #[test]
233    fn test_read_file_success() {
234        let dir = TempDir::new().unwrap();
235        let file_path = dir.path().join("test.txt");
236        fs::write(&file_path, "test content").unwrap();
237
238        let config = ScannerConfig::new();
239        let content = config.read_file(&file_path).unwrap();
240        assert_eq!(content, "test content");
241    }
242
243    #[test]
244    fn test_read_file_not_found() {
245        let config = ScannerConfig::new();
246        let result = config.read_file(Path::new("/nonexistent/file.txt"));
247        assert!(result.is_err());
248    }
249
250    #[test]
251    fn test_check_content_detects_sudo() {
252        let config = ScannerConfig::new();
253        let findings = config.check_content("sudo rm -rf /", "test.sh");
254        assert!(findings.iter().any(|f| f.id == "PE-001"));
255    }
256
257    #[test]
258    fn test_check_content_skip_comments() {
259        let config = ScannerConfig::new().with_skip_comments(true);
260        let findings = config.check_content("# sudo rm -rf /", "test.sh");
261        assert!(findings.iter().all(|f| f.id != "PE-001"));
262    }
263
264    #[test]
265    fn test_check_frontmatter_wildcard() {
266        let config = ScannerConfig::new();
267        let findings = config.check_frontmatter("allowed-tools: *", "SKILL.md");
268        assert!(findings.iter().any(|f| f.id == "OP-001"));
269    }
270
271    #[test]
272    fn test_engine_accessor() {
273        let config = ScannerConfig::new();
274        let _engine = config.engine();
275    }
276}