Skip to main content

cargo_capsec/
detector.rs

1//! The detection engine — matches parsed call sites against the authority registry.
2//!
3//! This is the core of `cargo-capsec`. It takes a [`ParsedFile`] from the parser,
4//! expands call paths using import information, and matches them against the
5//! [`authority registry`](crate::authorities::build_registry). The output is a
6//! list of [`Finding`]s, each representing one instance of ambient authority usage.
7//!
8//! # Two-pass matching
9//!
10//! The detector uses a two-pass approach per function:
11//!
12//! 1. **Pass 1**: Match all [`AuthorityPattern::Path`] patterns and record which
13//!    patterns were found (needed for contextual matching).
14//! 2. **Pass 2**: Match [`AuthorityPattern::MethodWithContext`] patterns, which only
15//!    fire if their required context path was found in pass 1.
16//!
17//! This eliminates false positives from common method names like `.status()` and `.output()`.
18
19use crate::authorities::{
20    Authority, AuthorityPattern, Category, CustomAuthority, Risk, build_registry,
21};
22use crate::parser::{CallKind, ImportPath, ParsedFile};
23use serde::Serialize;
24use std::collections::HashSet;
25
26/// A single instance of ambient authority usage found in source code.
27///
28/// Each finding represents one call site where code exercises authority over the
29/// filesystem, network, environment, or process table. Findings are the primary
30/// output of the audit pipeline.
31///
32/// # Deduplication
33///
34/// The detector deduplicates findings by `(file, function, call_line, call_col)`,
35/// so each unique call site appears at most once even if multiple import paths
36/// could match it.
37#[derive(Debug, Clone, Serialize)]
38pub struct Finding {
39    /// Source file path.
40    pub file: String,
41    /// Name of the function containing the call.
42    pub function: String,
43    /// Line where the containing function is defined.
44    pub function_line: usize,
45    /// Line of the call expression.
46    pub call_line: usize,
47    /// Column of the call expression.
48    pub call_col: usize,
49    /// The expanded call path (e.g., `"std::fs::read"`).
50    pub call_text: String,
51    /// What kind of ambient authority this exercises.
52    pub category: Category,
53    /// Finer-grained classification (e.g., `"read"`, `"connect"`, `"spawn"`).
54    pub subcategory: String,
55    /// How dangerous this call is.
56    pub risk: Risk,
57    /// Human-readable description.
58    pub description: String,
59    /// Whether this call is inside a `build.rs` `main()` function.
60    pub is_build_script: bool,
61    /// Name of the crate containing this call.
62    pub crate_name: String,
63    /// Version of the crate containing this call.
64    pub crate_version: String,
65    /// True if this finding is inside a `#[capsec::deny(...)]` function
66    /// whose denied categories cover this finding's category.
67    /// Deny violations are always promoted to `Critical` risk.
68    pub is_deny_violation: bool,
69}
70
71/// The ambient authority detector.
72///
73/// Holds the built-in authority registry plus any user-defined custom authorities
74/// from `.capsec.toml`. Create one with [`Detector::new`], optionally extend it
75/// with [`add_custom_authorities`](Detector::add_custom_authorities), then call
76/// [`analyse`](Detector::analyse) on each parsed file.
77///
78/// # Example
79///
80/// ```
81/// use cargo_capsec::parser::parse_source;
82/// use cargo_capsec::detector::Detector;
83///
84/// let source = r#"
85///     use std::fs;
86///     fn load() { let _ = fs::read("data.bin"); }
87/// "#;
88///
89/// let parsed = parse_source(source, "example.rs").unwrap();
90/// let detector = Detector::new();
91/// let findings = detector.analyse(&parsed, "my-crate", "0.1.0");
92/// assert_eq!(findings.len(), 1);
93/// ```
94pub struct Detector {
95    authorities: Vec<Authority>,
96    custom_paths: Vec<(Vec<String>, Category, Risk, String)>,
97}
98
99impl Default for Detector {
100    fn default() -> Self {
101        Self::new()
102    }
103}
104
105impl Detector {
106    /// Creates a new detector with the built-in authority registry.
107    pub fn new() -> Self {
108        Self {
109            authorities: build_registry(),
110            custom_paths: Vec::new(),
111        }
112    }
113
114    /// Extends the detector with custom authority patterns from `.capsec.toml`.
115    pub fn add_custom_authorities(&mut self, customs: &[CustomAuthority]) {
116        for c in customs {
117            self.custom_paths.push((
118                c.path.clone(),
119                c.category.clone(),
120                c.risk,
121                c.description.clone(),
122            ));
123        }
124    }
125
126    /// Analyses a parsed file and returns all ambient authority findings.
127    ///
128    /// Expands call paths using the file's `use` imports, matches against the
129    /// authority registry (built-in + custom), and deduplicates by call site.
130    pub fn analyse(
131        &self,
132        file: &ParsedFile,
133        crate_name: &str,
134        crate_version: &str,
135    ) -> Vec<Finding> {
136        let mut findings = Vec::new();
137        let (import_map, glob_prefixes) = build_import_map(&file.use_imports);
138
139        for func in &file.functions {
140            // Expand all calls upfront for context lookups
141            let expanded_calls: Vec<Vec<String>> = func
142                .calls
143                .iter()
144                .map(|call| {
145                    expand_call(
146                        &call.segments,
147                        &import_map,
148                        &glob_prefixes,
149                        &self.authorities,
150                    )
151                })
152                .collect();
153
154            // Pass 1: collect path-based findings and build a set of matched patterns.
155            // We store the *pattern* (e.g. ["Command", "new"]), not the expanded call path.
156            // This is correct: if someone writes `use std::process::Command; Command::new("sh")`,
157            // import expansion produces `std::process::Command::new`, which suffix-matches
158            // the pattern ["Command", "new"]. Pass 2 then checks for pattern co-occurrence,
159            // so `.output()` fires only when the Command::new *pattern* was matched in pass 1.
160            let mut matched_paths: HashSet<Vec<String>> = HashSet::new();
161
162            for (call, expanded) in func.calls.iter().zip(expanded_calls.iter()) {
163                for authority in &self.authorities {
164                    if let AuthorityPattern::Path(pattern) = &authority.pattern
165                        && matches_path(expanded, pattern)
166                    {
167                        matched_paths.insert(pattern.iter().map(|s| s.to_string()).collect());
168                        findings.push(make_finding(
169                            file,
170                            func,
171                            call,
172                            expanded,
173                            authority,
174                            crate_name,
175                            crate_version,
176                        ));
177                        break;
178                    }
179                }
180
181                // Custom path authorities
182                for (pattern, category, risk, description) in &self.custom_paths {
183                    if matches_custom_path(expanded, pattern) {
184                        let deny_violation = is_category_denied(&func.deny_categories, category);
185                        findings.push(Finding {
186                            file: file.path.clone(),
187                            function: func.name.clone(),
188                            function_line: func.line,
189                            call_line: call.line,
190                            call_col: call.col,
191                            call_text: expanded.join("::"),
192                            category: category.clone(),
193                            subcategory: "custom".to_string(),
194                            risk: if deny_violation {
195                                Risk::Critical
196                            } else {
197                                *risk
198                            },
199                            description: if deny_violation {
200                                format!("DENY VIOLATION: {} (in #[deny] function)", description)
201                            } else {
202                                description.clone()
203                            },
204                            is_build_script: func.is_build_script,
205                            crate_name: crate_name.to_string(),
206                            crate_version: crate_version.to_string(),
207                            is_deny_violation: deny_violation,
208                        });
209                        break;
210                    }
211                }
212            }
213
214            // Pass 2: resolve MethodWithContext — only match if requires_path
215            // was found in pass 1 (co-occurrence in same function)
216            for (call, expanded) in func.calls.iter().zip(expanded_calls.iter()) {
217                for authority in &self.authorities {
218                    if let AuthorityPattern::MethodWithContext {
219                        method,
220                        requires_path,
221                    } = &authority.pattern
222                        && matches!(call.kind, CallKind::MethodCall { method: ref m } if m == method)
223                    {
224                        let required: Vec<String> =
225                            requires_path.iter().map(|s| s.to_string()).collect();
226                        if matched_paths.contains(&required) {
227                            findings.push(make_finding(
228                                file,
229                                func,
230                                call,
231                                expanded,
232                                authority,
233                                crate_name,
234                                crate_version,
235                            ));
236                            break;
237                        }
238                    }
239                }
240            }
241        }
242
243        // Extern blocks (not inside a function, so no deny context)
244        for ext in &file.extern_blocks {
245            findings.push(Finding {
246                file: file.path.clone(),
247                function: format!("extern \"{}\"", ext.abi.as_deref().unwrap_or("C")),
248                function_line: ext.line,
249                call_line: ext.line,
250                call_col: 0,
251                call_text: format!(
252                    "extern block ({} functions: {})",
253                    ext.functions.len(),
254                    ext.functions.join(", ")
255                ),
256                category: Category::Ffi,
257                subcategory: "extern".to_string(),
258                risk: Risk::High,
259                description: "Foreign function interface — bypasses Rust safety".to_string(),
260                is_build_script: file.path.ends_with("build.rs"),
261                crate_name: crate_name.to_string(),
262                crate_version: crate_version.to_string(),
263                is_deny_violation: false,
264            });
265        }
266
267        // Fix #5: dedup by (file, function, call_line, call_col)
268        let mut seen = HashSet::new();
269        findings
270            .retain(|f| seen.insert((f.file.clone(), f.function.clone(), f.call_line, f.call_col)));
271
272        findings
273    }
274}
275
276fn make_finding(
277    file: &ParsedFile,
278    func: &crate::parser::ParsedFunction,
279    call: &crate::parser::CallSite,
280    expanded: &[String],
281    authority: &Authority,
282    crate_name: &str,
283    crate_version: &str,
284) -> Finding {
285    let is_deny_violation = is_category_denied(&func.deny_categories, &authority.category);
286    Finding {
287        file: file.path.clone(),
288        function: func.name.clone(),
289        function_line: func.line,
290        call_line: call.line,
291        call_col: call.col,
292        call_text: expanded.join("::"),
293        category: authority.category.clone(),
294        subcategory: authority.subcategory.to_string(),
295        risk: if is_deny_violation {
296            Risk::Critical
297        } else {
298            authority.risk
299        },
300        description: if is_deny_violation {
301            format!(
302                "DENY VIOLATION: {} (in #[deny] function)",
303                authority.description
304            )
305        } else {
306            authority.description.to_string()
307        },
308        is_build_script: func.is_build_script,
309        crate_name: crate_name.to_string(),
310        crate_version: crate_version.to_string(),
311        is_deny_violation,
312    }
313}
314
315/// Checks if a finding's category is covered by the function's deny list.
316fn is_category_denied(deny_categories: &[String], finding_category: &Category) -> bool {
317    if deny_categories.is_empty() {
318        return false;
319    }
320    for denied in deny_categories {
321        match denied.as_str() {
322            "all" => return true,
323            "fs" if *finding_category == Category::Fs => return true,
324            "net" if *finding_category == Category::Net => return true,
325            "env" if *finding_category == Category::Env => return true,
326            "process" if *finding_category == Category::Process => return true,
327            "ffi" if *finding_category == Category::Ffi => return true,
328            _ => {}
329        }
330    }
331    false
332}
333
334type ImportMap = Vec<(String, Vec<String>)>;
335type GlobPrefixes = Vec<Vec<String>>;
336
337fn build_import_map(imports: &[ImportPath]) -> (ImportMap, GlobPrefixes) {
338    let mut map = Vec::new();
339    let mut glob_prefixes = Vec::new();
340
341    for imp in imports {
342        if imp.segments.last().map(|s| s.as_str()) == Some("*") {
343            // Glob import: store the prefix (everything before "*")
344            glob_prefixes.push(imp.segments[..imp.segments.len() - 1].to_vec());
345        } else {
346            let short_name = imp
347                .alias
348                .clone()
349                .unwrap_or_else(|| imp.segments.last().cloned().unwrap_or_default());
350            map.push((short_name, imp.segments.clone()));
351        }
352    }
353
354    (map, glob_prefixes)
355}
356
357fn expand_call(
358    segments: &[String],
359    import_map: &[(String, Vec<String>)],
360    glob_prefixes: &[Vec<String>],
361    authorities: &[Authority],
362) -> Vec<String> {
363    if segments.is_empty() {
364        return Vec::new();
365    }
366
367    // First: try explicit import expansion (takes priority per RFC 1560)
368    for (short_name, full_path) in import_map {
369        if segments[0] == *short_name {
370            let mut expanded = full_path.clone();
371            expanded.extend_from_slice(&segments[1..]);
372            return expanded;
373        }
374    }
375
376    // Fallback: try glob import expansion for single-segment bare calls
377    if segments.len() == 1 {
378        for prefix in glob_prefixes {
379            let mut candidate = prefix.clone();
380            candidate.push(segments[0].clone());
381            // Only expand if the candidate matches a known authority pattern
382            for authority in authorities {
383                if let AuthorityPattern::Path(pattern) = &authority.pattern
384                    && matches_path(&candidate, pattern)
385                {
386                    return candidate;
387                }
388            }
389        }
390    }
391
392    segments.to_vec()
393}
394
395fn matches_path(expanded_path: &[String], pattern: &[&str]) -> bool {
396    if expanded_path.len() < pattern.len() {
397        return false;
398    }
399    let offset = expanded_path.len() - pattern.len();
400    expanded_path[offset..]
401        .iter()
402        .zip(pattern.iter())
403        .all(|(a, b)| a.as_str() == *b)
404}
405
406fn matches_custom_path(expanded_path: &[String], pattern: &[String]) -> bool {
407    if expanded_path.len() < pattern.len() {
408        return false;
409    }
410    let offset = expanded_path.len() - pattern.len();
411    expanded_path[offset..]
412        .iter()
413        .zip(pattern.iter())
414        .all(|(a, b)| a == b)
415}
416
417#[cfg(test)]
418mod tests {
419    use super::*;
420    use crate::parser::parse_source;
421
422    #[test]
423    fn detect_fs_read() {
424        let source = r#"
425            use std::fs;
426            fn load() {
427                let _ = fs::read("test");
428            }
429        "#;
430        let parsed = parse_source(source, "test.rs").unwrap();
431        let detector = Detector::new();
432        let findings = detector.analyse(&parsed, "test-crate", "0.1.0");
433        assert!(!findings.is_empty());
434        assert_eq!(findings[0].category, Category::Fs);
435    }
436
437    #[test]
438    fn detect_import_expanded_call() {
439        let source = r#"
440            use std::fs::read_to_string;
441            fn load() {
442                let _ = read_to_string("/etc/passwd");
443            }
444        "#;
445        let parsed = parse_source(source, "test.rs").unwrap();
446        let detector = Detector::new();
447        let findings = detector.analyse(&parsed, "test-crate", "0.1.0");
448        assert!(!findings.is_empty());
449        assert_eq!(findings[0].category, Category::Fs);
450        assert!(findings[0].call_text.contains("read_to_string"));
451    }
452
453    #[test]
454    fn method_with_context_fires_when_context_present() {
455        let source = r#"
456            use std::process::Command;
457            fn run() {
458                let cmd = Command::new("sh");
459                cmd.output();
460            }
461        "#;
462        let parsed = parse_source(source, "test.rs").unwrap();
463        let detector = Detector::new();
464        let findings = detector.analyse(&parsed, "test-crate", "0.1.0");
465        let proc_findings: Vec<_> = findings
466            .iter()
467            .filter(|f| f.category == Category::Process)
468            .collect();
469        // Should find Command::new AND .output() (context satisfied)
470        assert!(
471            proc_findings.len() >= 2,
472            "Expected Command::new + .output(), got {proc_findings:?}"
473        );
474    }
475
476    #[test]
477    fn method_without_context_does_not_fire() {
478        // .status() on something that is NOT Command — should not flag
479        let source = r#"
480            fn check() {
481                let response = get_response();
482                let s = response.status();
483            }
484        "#;
485        let parsed = parse_source(source, "test.rs").unwrap();
486        let detector = Detector::new();
487        let findings = detector.analyse(&parsed, "test-crate", "0.1.0");
488        let proc_findings: Vec<_> = findings
489            .iter()
490            .filter(|f| f.category == Category::Process)
491            .collect();
492        assert!(
493            proc_findings.is_empty(),
494            "Should NOT flag .status() without Command::new context"
495        );
496    }
497
498    #[test]
499    fn detect_extern_block() {
500        let source = r#"
501            extern "C" {
502                fn open(path: *const u8, flags: i32) -> i32;
503            }
504        "#;
505        let parsed = parse_source(source, "test.rs").unwrap();
506        let detector = Detector::new();
507        let findings = detector.analyse(&parsed, "test-crate", "0.1.0");
508        assert_eq!(findings.len(), 1);
509        assert_eq!(findings[0].category, Category::Ffi);
510    }
511
512    #[test]
513    fn clean_code_no_findings() {
514        let source = r#"
515            fn add(a: i32, b: i32) -> i32 { a + b }
516        "#;
517        let parsed = parse_source(source, "test.rs").unwrap();
518        let detector = Detector::new();
519        let findings = detector.analyse(&parsed, "test-crate", "0.1.0");
520        assert!(findings.is_empty());
521    }
522
523    #[test]
524    fn detect_command_new() {
525        let source = r#"
526            use std::process::Command;
527            fn run() {
528                let _ = Command::new("sh");
529            }
530        "#;
531        let parsed = parse_source(source, "test.rs").unwrap();
532        let detector = Detector::new();
533        let findings = detector.analyse(&parsed, "test-crate", "0.1.0");
534        assert!(!findings.is_empty());
535        assert_eq!(findings[0].category, Category::Process);
536        assert_eq!(findings[0].risk, Risk::Critical);
537    }
538
539    #[test]
540    fn dedup_prevents_double_counting() {
541        // Even if import expansion creates two matching paths, we only report once per call site
542        let source = r#"
543            use std::fs;
544            use std::fs::read;
545            fn load() {
546                let _ = fs::read("test");
547            }
548        "#;
549        let parsed = parse_source(source, "test.rs").unwrap();
550        let detector = Detector::new();
551        let findings = detector.analyse(&parsed, "test-crate", "0.1.0");
552        // Each unique (file, function, line, col) should appear at most once
553        let mut seen = std::collections::HashSet::new();
554        for f in &findings {
555            assert!(
556                seen.insert((&f.file, &f.function, f.call_line, f.call_col)),
557                "Duplicate finding at {}:{}",
558                f.call_line,
559                f.call_col
560            );
561        }
562    }
563
564    #[test]
565    fn deny_violation_promotes_to_critical() {
566        let source = r#"
567            use std::fs;
568            #[doc = "capsec::deny(all)"]
569            fn pure_function() {
570                let _ = fs::read("secret.key");
571            }
572        "#;
573        let parsed = parse_source(source, "test.rs").unwrap();
574        let detector = Detector::new();
575        let findings = detector.analyse(&parsed, "test-crate", "0.1.0");
576        assert!(!findings.is_empty());
577        assert!(findings[0].is_deny_violation);
578        assert_eq!(findings[0].risk, Risk::Critical);
579        assert!(findings[0].description.contains("DENY VIOLATION"));
580    }
581
582    #[test]
583    fn deny_fs_only_flags_fs_not_net() {
584        let source = r#"
585            use std::fs;
586            use std::net::TcpStream;
587            #[doc = "capsec::deny(fs)"]
588            fn mostly_pure() {
589                let _ = fs::read("data");
590                let _ = TcpStream::connect("127.0.0.1:80");
591            }
592        "#;
593        let parsed = parse_source(source, "test.rs").unwrap();
594        let detector = Detector::new();
595        let findings = detector.analyse(&parsed, "test-crate", "0.1.0");
596        let fs_findings: Vec<_> = findings
597            .iter()
598            .filter(|f| f.category == Category::Fs)
599            .collect();
600        let net_findings: Vec<_> = findings
601            .iter()
602            .filter(|f| f.category == Category::Net)
603            .collect();
604        assert!(fs_findings[0].is_deny_violation);
605        assert_eq!(fs_findings[0].risk, Risk::Critical);
606        assert!(!net_findings[0].is_deny_violation);
607    }
608
609    #[test]
610    fn no_deny_annotation_no_violation() {
611        let source = r#"
612            use std::fs;
613            fn normal() {
614                let _ = fs::read("data");
615            }
616        "#;
617        let parsed = parse_source(source, "test.rs").unwrap();
618        let detector = Detector::new();
619        let findings = detector.analyse(&parsed, "test-crate", "0.1.0");
620        assert!(!findings.is_empty());
621        assert!(!findings[0].is_deny_violation);
622    }
623
624    #[test]
625    fn detect_aliased_import() {
626        let source = r#"
627            use std::fs::read as load;
628            fn fetch() {
629                let _ = load("data.bin");
630            }
631        "#;
632        let parsed = parse_source(source, "test.rs").unwrap();
633        let detector = Detector::new();
634        let findings = detector.analyse(&parsed, "test-crate", "0.1.0");
635        assert!(
636            !findings.is_empty(),
637            "Should detect aliased import: use std::fs::read as load"
638        );
639        assert_eq!(findings[0].category, Category::Fs);
640        assert!(findings[0].call_text.contains("std::fs::read"));
641    }
642
643    #[test]
644    fn detect_impl_block_method() {
645        let source = r#"
646            use std::fs;
647            struct Loader;
648            impl Loader {
649                fn load(&self) -> Vec<u8> {
650                    fs::read("data.bin").unwrap()
651                }
652            }
653        "#;
654        let parsed = parse_source(source, "test.rs").unwrap();
655        let detector = Detector::new();
656        let findings = detector.analyse(&parsed, "test-crate", "0.1.0");
657        assert!(
658            !findings.is_empty(),
659            "Should detect fs::read inside impl block"
660        );
661        assert_eq!(findings[0].function, "load");
662    }
663}