Skip to main content

sandbox_scan/
engine.rs

1//! Orchestrator for the scan pipeline: cache → YARA → heuristics → compose
2//! → suppressions.
3//!
4//! Caches *pre*-suppression findings so adjusting the user's ignore file
5//! takes effect without re-running expensive motors. Suppression matching
6//! is keyed by `(rule_id, project_hash)` per OQ-007; callers pass the short
7//! project hash because the CLI already knows it.
8
9use std::path::{Path, PathBuf};
10
11use crate::findings::Findings;
12use crate::{
13    Result, cache, compose, heuristics, project_hash, suppress::IgnoreList, yara::YaraEngine,
14};
15
16/// Optional knobs for `scan`. Defaults: cache enabled, no suppression file,
17/// no project hash (so suppressions don't fire even if a file is provided).
18#[derive(Debug, Clone, Default)]
19pub struct ScanOpts {
20    pub no_cache: bool,
21    pub cache_dir: Option<PathBuf>,
22    pub ignore_file: Option<PathBuf>,
23    /// Short project hash (the one shown in `sandbox ps`). Required for
24    /// suppression matching; without it, ignore entries can't be keyed.
25    pub project_hash: Option<String>,
26}
27
28/// Result of a scan plus the inputs that produced it. `from_cache=true`
29/// means the motors didn't actually run this invocation.
30#[derive(Debug, Clone)]
31pub struct ScanReport {
32    pub content_hash: String,
33    pub findings: Findings,
34    pub from_cache: bool,
35}
36
37pub fn scan(project_root: &Path, opts: &ScanOpts) -> Result<ScanReport> {
38    let files = project_hash::list_files(project_root)?;
39    let content_hash = project_hash::hash_files(project_root, &files)?;
40
41    // Cache lookup is best-effort and pre-suppression — see module doc.
42    if !opts.no_cache
43        && let Some(cache_dir) = opts.cache_dir.as_deref()
44        && let Some(mut cached) = cache::lookup(cache_dir, &content_hash)
45    {
46        apply_suppressions(&mut cached, opts)?;
47        return Ok(ScanReport {
48            content_hash,
49            findings: cached,
50            from_cache: true,
51        });
52    }
53
54    let mut findings = Findings::new();
55
56    let yara_engine = YaraEngine::builtin()?;
57    let yara_findings = yara_engine.scan_files(project_root, &files)?;
58    findings.extend(yara_findings.items);
59
60    let heuristic_findings = heuristics::scan_files(project_root, &files)?;
61    findings.extend(heuristic_findings.items);
62
63    let compose_findings = compose::scan(project_root)?;
64    findings.extend(compose_findings.items);
65
66    findings.sort_canonical();
67
68    // Persist the pre-suppression view so the cache stays useful across
69    // changes to the user's ignore file.
70    if let Some(cache_dir) = opts.cache_dir.as_deref() {
71        cache::store(cache_dir, &content_hash, &findings)?;
72    }
73
74    apply_suppressions(&mut findings, opts)?;
75
76    Ok(ScanReport {
77        content_hash,
78        findings,
79        from_cache: false,
80    })
81}
82
83fn apply_suppressions(findings: &mut Findings, opts: &ScanOpts) -> Result<()> {
84    let Some(ignore_path) = opts.ignore_file.as_deref() else {
85        return Ok(());
86    };
87    let Some(hash) = opts.project_hash.as_deref() else {
88        return Ok(());
89    };
90    let list = IgnoreList::load(ignore_path)?;
91    list.apply(findings, hash);
92    Ok(())
93}
94
95#[cfg(test)]
96mod tests {
97    use super::*;
98    use crate::findings::Severity;
99
100    type TestResult = std::result::Result<(), Box<dyn std::error::Error>>;
101
102    fn write_evil_project(root: &Path) -> std::io::Result<()> {
103        std::fs::write(
104            root.join("server.js"),
105            "const _ = new (Function.constructor)('require','m','...');\n\
106             const c2 = 'Y2hhaW5saW5rLWFwaS12My5saXY=';\n\
107             const endpoint = '/api/service/token/abc';\n",
108        )
109    }
110
111    #[test]
112    fn end_to_end_flags_yara_critical_on_known_pattern() -> TestResult {
113        let tmp = tempfile::tempdir()?;
114        write_evil_project(tmp.path())?;
115        let report = scan(tmp.path(), &ScanOpts::default())?;
116        assert!(!report.from_cache);
117        assert_eq!(report.findings.worst_severity(), Some(Severity::Critical));
118        assert!(
119            report
120                .findings
121                .iter()
122                .any(|f| f.rule_id == "yara/contagious_interview_profile_js")
123        );
124        Ok(())
125    }
126
127    #[test]
128    fn second_run_hits_cache() -> TestResult {
129        // Cache dir must live OUTSIDE the project root — otherwise the
130        // listing walkdir picks it up and changes the content hash on the
131        // second pass, defeating the cache.
132        let project = tempfile::tempdir()?;
133        let cache_home = tempfile::tempdir()?;
134        write_evil_project(project.path())?;
135
136        let opts = ScanOpts {
137            no_cache: false,
138            cache_dir: Some(cache_home.path().to_path_buf()),
139            ..ScanOpts::default()
140        };
141        let first = scan(project.path(), &opts)?;
142        assert!(!first.from_cache);
143
144        let second = scan(project.path(), &opts)?;
145        assert!(second.from_cache);
146        assert_eq!(first.findings, second.findings);
147        Ok(())
148    }
149
150    #[test]
151    fn no_cache_skips_lookup() -> TestResult {
152        let project = tempfile::tempdir()?;
153        let cache_home = tempfile::tempdir()?;
154        write_evil_project(project.path())?;
155        let opts = ScanOpts {
156            no_cache: false,
157            cache_dir: Some(cache_home.path().to_path_buf()),
158            ..ScanOpts::default()
159        };
160        scan(project.path(), &opts)?; // populate
161
162        let opts_no_cache = ScanOpts {
163            no_cache: true,
164            ..opts.clone()
165        };
166        let report = scan(project.path(), &opts_no_cache)?;
167        assert!(!report.from_cache);
168        Ok(())
169    }
170
171    #[test]
172    fn suppression_drops_only_matching_pair() -> TestResult {
173        let tmp = tempfile::tempdir()?;
174        write_evil_project(tmp.path())?;
175        let ignore = tmp.path().join("ignore.toml");
176        std::fs::write(
177            &ignore,
178            "[[ignore]]\n\
179             rule_id = \"yara/contagious_interview_c2_domain\"\n\
180             project_hash = \"deadbeef\"\n",
181        )?;
182        let opts = ScanOpts {
183            ignore_file: Some(ignore),
184            project_hash: Some("deadbeef".into()),
185            ..ScanOpts::default()
186        };
187        let report = scan(tmp.path(), &opts)?;
188        // The C2-domain rule is suppressed; the strict profile_js rule is not.
189        assert!(
190            report
191                .findings
192                .iter()
193                .all(|f| f.rule_id != "yara/contagious_interview_c2_domain")
194        );
195        assert!(
196            report
197                .findings
198                .iter()
199                .any(|f| f.rule_id == "yara/contagious_interview_profile_js")
200        );
201        Ok(())
202    }
203
204    #[test]
205    fn clean_project_yields_empty_findings() -> TestResult {
206        let tmp = tempfile::tempdir()?;
207        std::fs::write(tmp.path().join("index.js"), b"console.log('hi');\n")?;
208        let report = scan(tmp.path(), &ScanOpts::default())?;
209        assert!(report.findings.is_empty(), "got {:?}", report.findings);
210        Ok(())
211    }
212}