Skip to main content

sentio_core/
scanner.rs

1use crate::finding::Finding;
2use crate::rules::{convert_severity, RuleContext, RuleRegistry, SuppressionSet};
3use crate::syntax::{parse_rust_files, ParseFailure, ParsedFile, SyntaxReport};
4use serde::Serialize;
5use std::path::{Path, PathBuf};
6use walkdir::WalkDir;
7
8#[derive(Debug, Clone, Default)]
9pub struct ScanOptions {
10    pub include_tests: bool,
11    pub rule_filter: Option<String>,
12}
13
14#[derive(Debug, Clone, Default, Serialize)]
15pub struct ScanResult {
16    pub findings: Vec<Finding>,
17    pub files_scanned: usize,
18    pub files_parsed: usize,
19    pub parse_failures: Vec<ParseFailure>,
20}
21
22#[derive(Default)]
23pub struct Scanner {
24    rules: RuleRegistry,
25}
26
27impl Scanner {
28    pub fn new() -> Self {
29        Self {
30            rules: RuleRegistry::baseline(),
31        }
32    }
33
34    pub fn scan_path(&self, path: &str, options: &ScanOptions) -> ScanResult {
35        let (roots, anchor_programs) = resolve_scan_roots(path);
36
37        if let Some(ref programs) = anchor_programs {
38            eprintln!(
39                "Anchor workspace detected — scanning {} program(s): {}",
40                programs.len(),
41                programs.join(", ")
42            );
43        }
44
45        let file_paths: Vec<PathBuf> = roots
46            .iter()
47            .flat_map(|root| discover_rust_files(root, options))
48            .collect();
49
50        let files_scanned = file_paths.len();
51        let syntax_report = parse_rust_files(file_paths);
52        self.scan_report(files_scanned, syntax_report, options)
53    }
54
55    pub fn scan_report(
56        &self,
57        files_scanned: usize,
58        report: SyntaxReport,
59        options: &ScanOptions,
60    ) -> ScanResult {
61        let files_parsed = report.files.len();
62        let findings = self.run_rules(&report.files, options);
63        let parse_failures = report.parse_failures;
64
65        ScanResult {
66            findings,
67            files_scanned,
68            files_parsed,
69            parse_failures,
70        }
71    }
72
73    fn run_rules(&self, files: &[ParsedFile], options: &ScanOptions) -> Vec<Finding> {
74        let ctx = RuleContext { files };
75        let suppressions: Vec<(String, SuppressionSet)> = files
76            .iter()
77            .map(|file| {
78                (
79                    file.path.display().to_string(),
80                    SuppressionSet::from_source(&file.source),
81                )
82            })
83            .collect();
84
85        let mut findings = Vec::new();
86        for file in files {
87            for rule in self.rules.matching_rules(options.rule_filter.as_deref()) {
88                for matched in rule.match_file(file, &ctx) {
89                    let finding = Finding {
90                        rule_id: matched.rule_id.to_string(),
91                        severity: convert_severity(matched.severity),
92                        message: matched.message,
93                        location: matched.location,
94                        help: matched.help,
95                        suppressed: false,
96                    };
97
98                    if is_suppressed(&finding, &suppressions) {
99                        continue;
100                    }
101
102                    findings.push(finding);
103                }
104            }
105        }
106
107        findings
108    }
109}
110
111/// Detects an Anchor workspace at `path` by looking for `Anchor.toml`.
112/// If found, expands `[workspace] members` glob patterns and returns the program roots.
113/// Returns `(roots_to_scan, Some(program_names))` on detection, or `([path], None)` otherwise.
114fn resolve_scan_roots(path: &str) -> (Vec<PathBuf>, Option<Vec<String>>) {
115    let root = PathBuf::from(path);
116    let anchor_toml_path = root.join("Anchor.toml");
117
118    if !anchor_toml_path.exists() {
119        return (vec![root], None);
120    }
121
122    let content = match std::fs::read_to_string(&anchor_toml_path) {
123        Ok(c) => c,
124        Err(_) => return (vec![root], None),
125    };
126
127    let parsed: toml::Value = match content.parse() {
128        Ok(v) => v,
129        Err(_) => return (vec![root], None),
130    };
131
132    let members: Vec<&str> = parsed
133        .get("workspace")
134        .and_then(|w| w.get("members"))
135        .and_then(|m| m.as_array())
136        .map(|arr| arr.iter().filter_map(|v| v.as_str()).collect())
137        .unwrap_or_default();
138
139    // No [workspace] section or empty members — fall back to programs/ convention
140    if members.is_empty() {
141        return fallback_to_programs_dir(&root);
142    }
143
144    let mut roots: Vec<PathBuf> = Vec::new();
145    for member in &members {
146        if let Some(prefix) = member.strip_suffix("/*") {
147            // glob pattern like "programs/*" — expand to all subdirs with a Cargo.toml
148            let dir = root.join(prefix);
149            if let Ok(entries) = std::fs::read_dir(&dir) {
150                let mut subdirs: Vec<PathBuf> = entries
151                    .flatten()
152                    .map(|e| e.path())
153                    .filter(|p| p.is_dir() && p.join("Cargo.toml").exists())
154                    .collect();
155                subdirs.sort();
156                roots.extend(subdirs);
157            }
158        } else {
159            let p = root.join(member);
160            if p.exists() {
161                roots.push(p);
162            }
163        }
164    }
165
166    if roots.is_empty() {
167        return fallback_to_programs_dir(&root);
168    }
169
170    let names: Vec<String> = roots
171        .iter()
172        .filter_map(|r| r.file_name())
173        .map(|n| n.to_string_lossy().into_owned())
174        .collect();
175
176    (roots, Some(names))
177}
178
179/// Falls back to scanning `programs/` when Anchor.toml has no `[workspace] members`.
180/// This covers the majority of single-program Anchor projects.
181fn fallback_to_programs_dir(root: &Path) -> (Vec<PathBuf>, Option<Vec<String>>) {
182    let programs_dir = root.join("programs");
183    if !programs_dir.is_dir() {
184        return (vec![root.to_path_buf()], None);
185    }
186
187    let mut roots: Vec<PathBuf> = std::fs::read_dir(&programs_dir)
188        .into_iter()
189        .flatten()
190        .flatten()
191        .map(|e| e.path())
192        .filter(|p| p.is_dir() && p.join("Cargo.toml").exists())
193        .collect();
194    roots.sort();
195
196    if roots.is_empty() {
197        return (vec![root.to_path_buf()], None);
198    }
199
200    let names: Vec<String> = roots
201        .iter()
202        .filter_map(|r| r.file_name())
203        .map(|n| n.to_string_lossy().into_owned())
204        .collect();
205
206    (roots, Some(names))
207}
208
209fn is_suppressed(finding: &Finding, suppressions: &[(String, SuppressionSet)]) -> bool {
210    suppressions
211        .iter()
212        .find(|(path, _)| path == &finding.location.path)
213        .is_some_and(|(_, set)| set.is_suppressed(finding))
214}
215
216fn discover_rust_files<'a>(
217    root: &'a Path,
218    options: &'a ScanOptions,
219) -> impl Iterator<Item = PathBuf> + 'a {
220    WalkDir::new(root)
221        .into_iter()
222        .filter_map(Result::ok)
223        .filter(|entry| entry.file_type().is_file())
224        .filter(|entry| entry.path().extension().and_then(|ext| ext.to_str()) == Some("rs"))
225        .filter(|entry| !is_excluded_path(entry.path()))
226        .filter(move |entry| options.include_tests || !is_test_path(entry.path()))
227        .map(|entry| entry.into_path())
228}
229
230fn is_excluded_path(path: &Path) -> bool {
231    path.components().any(|component| {
232        let part = component.as_os_str().to_string_lossy();
233        matches!(part.as_ref(), "target" | ".git")
234    })
235}
236
237fn is_test_path(path: &Path) -> bool {
238    path.components().any(|component| {
239        let part = component.as_os_str().to_string_lossy();
240        matches!(part.as_ref(), "tests" | "test" | "fixtures")
241    })
242}