Skip to main content

cargo_capsec/
parser.rs

1//! Rust source file parser built on [`syn`].
2//!
3//! Parses `.rs` files into a structured representation that captures the information
4//! the [`Detector`](crate::detector::Detector) needs: function boundaries, call sites,
5//! `use` imports, and `extern` blocks. Handles free functions, `impl` block methods,
6//! and trait default methods.
7//!
8//! The parser uses [`syn::visit::Visit`] to walk the AST. It does **not** perform type
9//! resolution — all matching is done on syntactic path segments. Import aliases are
10//! tracked so the [`Detector`](crate::detector::Detector) can expand them.
11
12use std::path::Path;
13use syn::visit::Visit;
14
15/// The parsed representation of a single `.rs` source file.
16///
17/// Contains every function body, `use` import, and `extern` block found in the file.
18/// This is the input to [`Detector::analyse`](crate::detector::Detector::analyse).
19#[derive(Debug, Clone)]
20pub struct ParsedFile {
21    /// File path (for reporting).
22    pub path: String,
23    /// All functions found: free functions, `impl` methods, and trait default methods.
24    pub functions: Vec<ParsedFunction>,
25    /// All `use` imports, with aliases tracked.
26    pub use_imports: Vec<ImportPath>,
27    /// All `extern` blocks (FFI declarations).
28    pub extern_blocks: Vec<ExternBlock>,
29}
30
31/// Extracted visibility for a parsed function.
32///
33/// Best-effort heuristic — tracks `pub`, `pub(crate)`, and private functions.
34/// Does not resolve `pub use` re-exports or trait method visibility.
35#[derive(Debug, Clone, PartialEq, Eq)]
36pub enum Visibility {
37    /// `pub fn` — visible to downstream crates.
38    Public,
39    /// `pub(crate) fn` — visible within the crate only.
40    CratePub,
41    /// `pub(super) fn`, `pub(in path) fn`, or other restricted visibility.
42    Restricted,
43    /// No `pub` keyword — private.
44    Private,
45}
46
47/// A single function (free, `impl` method, or trait default method) and its call sites.
48#[derive(Debug, Clone)]
49pub struct ParsedFunction {
50    /// The function name (e.g., `"load_config"`).
51    pub name: String,
52    /// Line number where the function is defined.
53    pub line: usize,
54    /// Every call expression found inside the function body.
55    pub calls: Vec<CallSite>,
56    /// True if this is the `main()` function inside a `build.rs` file.
57    pub is_build_script: bool,
58    /// Categories denied by `#[capsec::deny(...)]` on this function.
59    /// Parsed from `#[doc = "capsec::deny(...)"]` attributes.
60    pub deny_categories: Vec<String>,
61    /// Best-effort visibility of this function.
62    #[allow(dead_code)]
63    pub visibility: Visibility,
64}
65
66/// A single call expression inside a function body.
67///
68/// Call sites are either qualified function calls (`fs::read(...)`) or method calls
69/// (`stream.connect(...)`). The [`segments`](CallSite::segments) field holds the
70/// raw path segments before import expansion.
71#[derive(Debug, Clone)]
72pub struct CallSite {
73    /// Path segments of the call (e.g., `["fs", "read"]` or `["TcpStream", "connect"]`).
74    pub segments: Vec<String>,
75    /// Source line number.
76    pub line: usize,
77    /// Source column number.
78    pub col: usize,
79    /// Whether this is a function call or a method call.
80    pub kind: CallKind,
81}
82
83/// Distinguishes qualified function calls from method calls.
84#[derive(Debug, Clone)]
85pub enum CallKind {
86    /// A qualified path call like `fs::read(...)` or `Command::new(...)`.
87    FunctionCall,
88    /// A method call like `stream.connect(...)` or `cmd.output()`.
89    MethodCall {
90        /// The method name (e.g., `"connect"`, `"output"`).
91        method: String,
92    },
93}
94
95/// A `use` import statement, with optional alias.
96///
97/// For `use std::fs::read as load`, the segments are `["std", "fs", "read"]` and
98/// the alias is `Some("load")`. The [`Detector`](crate::detector::Detector) uses
99/// this to expand bare calls: when it sees `load(...)`, it looks up the alias and
100/// expands it to `std::fs::read`.
101#[derive(Debug, Clone)]
102pub struct ImportPath {
103    /// The full path segments (e.g., `["std", "fs", "read"]`).
104    pub segments: Vec<String>,
105    /// The `as` alias, if any (e.g., `Some("load")` for `use std::fs::read as load`).
106    pub alias: Option<String>,
107}
108
109/// An `extern` block declaring foreign functions.
110///
111/// Any `extern` block is flagged as [`Category::Ffi`](crate::authorities::Category::Ffi)
112/// by the detector, since FFI calls bypass Rust's safety model entirely.
113#[derive(Debug, Clone)]
114pub struct ExternBlock {
115    /// The ABI string (e.g., `Some("C")` for `extern "C"`).
116    pub abi: Option<String>,
117    /// Names of functions declared in the block.
118    pub functions: Vec<String>,
119    /// Source line number.
120    pub line: usize,
121}
122
123/// Parses a `.rs` file from disk into a [`ParsedFile`].
124///
125/// Requires an [`FsRead`](capsec_core::permission::FsRead) capability token,
126/// proving the caller has permission to read files. This is the dogfood example —
127/// `cargo capsec audit` flagged this function's `std::fs::read_to_string` call,
128/// and now it's gated by the capsec type system.
129///
130/// # Example
131///
132/// ```rust,ignore
133/// use capsec_core::root::test_root;
134/// use capsec_core::permission::FsRead;
135///
136/// let root = test_root();
137/// let cap = root.grant::<FsRead>();
138/// let parsed = parse_file(Path::new("src/main.rs"), &cap).unwrap();
139/// ```
140pub fn parse_file(
141    path: &Path,
142    cap: &impl capsec_core::cap_provider::CapProvider<capsec_core::permission::FsRead>,
143) -> Result<ParsedFile, String> {
144    let source = capsec_std::fs::read_to_string(path, cap)
145        .map_err(|e| format!("Failed to read {}: {e}", path.display()))?;
146    parse_source(&source, &path.display().to_string())
147}
148
149/// Parses Rust source code from a string into a [`ParsedFile`].
150///
151/// This is the primary entry point for programmatic usage and testing.
152/// The `path` parameter is used only for error messages and the
153/// [`ParsedFile::path`] field — it doesn't need to be a real file.
154///
155/// # Errors
156///
157/// Returns an error string if [`syn::parse_file`] fails (e.g., invalid Rust syntax).
158pub fn parse_source(source: &str, path: &str) -> Result<ParsedFile, String> {
159    let syntax = syn::parse_file(source).map_err(|e| format!("Failed to parse {path}: {e}"))?;
160
161    let mut visitor = FileVisitor::new(path.to_string());
162    visitor.visit_file(&syntax);
163
164    Ok(ParsedFile {
165        path: path.to_string(),
166        functions: visitor.functions,
167        use_imports: visitor.imports,
168        extern_blocks: visitor.extern_blocks,
169    })
170}
171
172struct FileVisitor {
173    file_path: String,
174    functions: Vec<ParsedFunction>,
175    imports: Vec<ImportPath>,
176    extern_blocks: Vec<ExternBlock>,
177    current_function: Option<ParsedFunction>,
178}
179
180impl FileVisitor {
181    fn new(file_path: String) -> Self {
182        Self {
183            file_path,
184            functions: Vec::new(),
185            imports: Vec::new(),
186            extern_blocks: Vec::new(),
187            current_function: None,
188        }
189    }
190}
191
192impl<'ast> Visit<'ast> for FileVisitor {
193    fn visit_item_fn(&mut self, node: &'ast syn::ItemFn) {
194        let func = ParsedFunction {
195            name: node.sig.ident.to_string(),
196            line: node.sig.ident.span().start().line,
197            calls: Vec::new(),
198            is_build_script: self.file_path.ends_with("build.rs") && node.sig.ident == "main",
199            deny_categories: extract_deny_categories(&node.attrs),
200            visibility: extract_visibility(&node.vis),
201        };
202
203        let prev = self.current_function.take();
204        self.current_function = Some(func);
205
206        syn::visit::visit_item_fn(self, node);
207
208        if let Some(func) = self.current_function.take() {
209            self.functions.push(func);
210        }
211        self.current_function = prev;
212    }
213
214    fn visit_impl_item_fn(&mut self, node: &'ast syn::ImplItemFn) {
215        let func = ParsedFunction {
216            name: node.sig.ident.to_string(),
217            line: node.sig.ident.span().start().line,
218            calls: Vec::new(),
219            is_build_script: false,
220            deny_categories: extract_deny_categories(&node.attrs),
221            visibility: extract_visibility(&node.vis),
222        };
223
224        let prev = self.current_function.take();
225        self.current_function = Some(func);
226
227        syn::visit::visit_impl_item_fn(self, node);
228
229        if let Some(func) = self.current_function.take() {
230            self.functions.push(func);
231        }
232        self.current_function = prev;
233    }
234
235    fn visit_trait_item_fn(&mut self, node: &'ast syn::TraitItemFn) {
236        // Only visit if there's a default body
237        if node.default.is_some() {
238            let func = ParsedFunction {
239                name: node.sig.ident.to_string(),
240                line: node.sig.ident.span().start().line,
241                calls: Vec::new(),
242                is_build_script: false,
243                deny_categories: extract_deny_categories(&node.attrs),
244                // Trait methods are effectively public if the trait is public
245                visibility: Visibility::Public,
246            };
247
248            let prev = self.current_function.take();
249            self.current_function = Some(func);
250
251            syn::visit::visit_trait_item_fn(self, node);
252
253            if let Some(func) = self.current_function.take() {
254                self.functions.push(func);
255            }
256            self.current_function = prev;
257        } else {
258            syn::visit::visit_trait_item_fn(self, node);
259        }
260    }
261
262    fn visit_expr_call(&mut self, node: &'ast syn::ExprCall) {
263        if let Some(ref mut func) = self.current_function
264            && let syn::Expr::Path(ref path) = *node.func
265        {
266            let segments: Vec<String> = path
267                .path
268                .segments
269                .iter()
270                .map(|s| s.ident.to_string())
271                .collect();
272
273            if !segments.is_empty() {
274                func.calls.push(CallSite {
275                    segments,
276                    line: path
277                        .path
278                        .segments
279                        .first()
280                        .map(|s| s.ident.span().start().line)
281                        .unwrap_or(0),
282                    col: path
283                        .path
284                        .segments
285                        .first()
286                        .map(|s| s.ident.span().start().column)
287                        .unwrap_or(0),
288                    kind: CallKind::FunctionCall,
289                });
290            }
291        }
292
293        syn::visit::visit_expr_call(self, node);
294    }
295
296    fn visit_expr_method_call(&mut self, node: &'ast syn::ExprMethodCall) {
297        if let Some(ref mut func) = self.current_function {
298            func.calls.push(CallSite {
299                segments: vec![node.method.to_string()],
300                line: node.method.span().start().line,
301                col: node.method.span().start().column,
302                kind: CallKind::MethodCall {
303                    method: node.method.to_string(),
304                },
305            });
306        }
307
308        syn::visit::visit_expr_method_call(self, node);
309    }
310
311    fn visit_item_use(&mut self, node: &'ast syn::ItemUse) {
312        let mut paths = Vec::new();
313        collect_use_paths(&node.tree, &mut Vec::new(), &mut paths);
314        self.imports.extend(paths);
315
316        syn::visit::visit_item_use(self, node);
317    }
318
319    fn visit_item_foreign_mod(&mut self, node: &'ast syn::ItemForeignMod) {
320        let functions: Vec<String> = node
321            .items
322            .iter()
323            .filter_map(|item| {
324                if let syn::ForeignItem::Fn(f) = item {
325                    Some(f.sig.ident.to_string())
326                } else {
327                    None
328                }
329            })
330            .collect();
331
332        self.extern_blocks.push(ExternBlock {
333            abi: node.abi.name.as_ref().map(|n| n.value()),
334            functions,
335            line: node.abi.extern_token.span.start().line,
336        });
337
338        syn::visit::visit_item_foreign_mod(self, node);
339    }
340}
341
342/// Extracts denied categories from `#[doc = "capsec::deny(...)"]` attributes.
343///
344/// The `#[capsec::deny(...)]` macro emits a doc attribute like
345/// `#[doc = "capsec::deny(all, fs)"]`. This function parses that string
346/// and returns the category names (e.g., `["all", "fs"]`).
347fn extract_deny_categories(attrs: &[syn::Attribute]) -> Vec<String> {
348    let mut categories = Vec::new();
349    for attr in attrs {
350        if !attr.path().is_ident("doc") {
351            continue;
352        }
353        if let syn::Meta::NameValue(nv) = &attr.meta
354            && let syn::Expr::Lit(syn::ExprLit {
355                lit: syn::Lit::Str(lit_str),
356                ..
357            }) = &nv.value
358        {
359            let value = lit_str.value();
360            if let Some(inner) = value
361                .strip_prefix("capsec::deny(")
362                .and_then(|s| s.strip_suffix(')'))
363            {
364                for cat in inner.split(',') {
365                    let trimmed = cat.trim();
366                    if !trimmed.is_empty() {
367                        categories.push(trimmed.to_string());
368                    }
369                }
370            }
371        }
372    }
373    categories
374}
375
376/// Extracts the visibility from a `syn::Visibility`.
377fn extract_visibility(vis: &syn::Visibility) -> Visibility {
378    match vis {
379        syn::Visibility::Public(_) => Visibility::Public,
380        syn::Visibility::Restricted(r) => {
381            // pub(crate), pub(super), pub(in path)
382            if r.path.is_ident("crate") {
383                Visibility::CratePub
384            } else {
385                Visibility::Restricted
386            }
387        }
388        syn::Visibility::Inherited => Visibility::Private,
389    }
390}
391
392fn collect_use_paths(tree: &syn::UseTree, prefix: &mut Vec<String>, out: &mut Vec<ImportPath>) {
393    match tree {
394        syn::UseTree::Path(p) => {
395            prefix.push(p.ident.to_string());
396            collect_use_paths(&p.tree, prefix, out);
397            prefix.pop();
398        }
399        syn::UseTree::Name(n) => {
400            let mut segments = prefix.clone();
401            segments.push(n.ident.to_string());
402            out.push(ImportPath {
403                segments,
404                alias: None,
405            });
406        }
407        syn::UseTree::Rename(r) => {
408            let mut segments = prefix.clone();
409            segments.push(r.ident.to_string());
410            out.push(ImportPath {
411                segments,
412                alias: Some(r.rename.to_string()),
413            });
414        }
415        syn::UseTree::Group(g) => {
416            for item in &g.items {
417                collect_use_paths(item, prefix, out);
418            }
419        }
420        syn::UseTree::Glob(_) => {
421            let mut segments = prefix.clone();
422            segments.push("*".to_string());
423            out.push(ImportPath {
424                segments,
425                alias: None,
426            });
427        }
428    }
429}
430
431#[cfg(test)]
432mod tests {
433    use super::*;
434
435    #[test]
436    fn parse_function_calls() {
437        let source = r#"
438            use std::fs;
439            fn do_stuff() {
440                let _ = fs::read("test");
441            }
442        "#;
443        let parsed = parse_source(source, "test.rs").unwrap();
444        assert_eq!(parsed.functions.len(), 1);
445        assert_eq!(parsed.functions[0].name, "do_stuff");
446        assert!(!parsed.functions[0].calls.is_empty());
447    }
448
449    #[test]
450    fn parse_use_statements() {
451        let source = r#"
452            use std::fs::read;
453            use std::net::{TcpStream, TcpListener};
454            use std::env::var as get_env;
455        "#;
456        let parsed = parse_source(source, "test.rs").unwrap();
457        assert_eq!(parsed.use_imports.len(), 4);
458
459        let read_import = &parsed.use_imports[0];
460        assert_eq!(read_import.segments, vec!["std", "fs", "read"]);
461        assert!(read_import.alias.is_none());
462
463        let alias_import = parsed
464            .use_imports
465            .iter()
466            .find(|i| i.alias.is_some())
467            .unwrap();
468        assert_eq!(alias_import.segments, vec!["std", "env", "var"]);
469        assert_eq!(alias_import.alias.as_deref(), Some("get_env"));
470    }
471
472    #[test]
473    fn parse_method_calls() {
474        let source = r#"
475            fn network() {
476                let stream = something();
477                stream.connect("127.0.0.1:8080");
478                stream.send_to(b"data", "addr");
479            }
480        "#;
481        let parsed = parse_source(source, "test.rs").unwrap();
482        let func = &parsed.functions[0];
483        let method_calls: Vec<&CallSite> = func
484            .calls
485            .iter()
486            .filter(|c| matches!(c.kind, CallKind::MethodCall { .. }))
487            .collect();
488        assert_eq!(method_calls.len(), 2);
489    }
490
491    #[test]
492    fn parse_extern_blocks() {
493        let source = r#"
494            extern "C" {
495                fn open(path: *const u8, flags: i32) -> i32;
496                fn close(fd: i32) -> i32;
497            }
498        "#;
499        let parsed = parse_source(source, "test.rs").unwrap();
500        assert_eq!(parsed.extern_blocks.len(), 1);
501        assert_eq!(parsed.extern_blocks[0].abi.as_deref(), Some("C"));
502        assert_eq!(parsed.extern_blocks[0].functions, vec!["open", "close"]);
503    }
504
505    #[test]
506    fn parse_error_returns_err() {
507        let source = "this is not valid rust {{{";
508        assert!(parse_source(source, "bad.rs").is_err());
509    }
510
511    #[test]
512    fn parse_impl_block_methods() {
513        let source = r#"
514            use std::fs;
515            struct Loader;
516            impl Loader {
517                fn load(&self) -> Vec<u8> {
518                    fs::read("data.bin").unwrap()
519                }
520                fn name(&self) -> &str {
521                    "loader"
522                }
523            }
524        "#;
525        let parsed = parse_source(source, "test.rs").unwrap();
526        assert_eq!(parsed.functions.len(), 2);
527        let load = parsed.functions.iter().find(|f| f.name == "load").unwrap();
528        assert!(!load.calls.is_empty());
529    }
530
531    #[test]
532    fn enum_variants_not_captured_as_calls() {
533        let source = r#"
534            enum Category { Fs, Net }
535            fn classify() -> Category {
536                let cat = Category::Fs;
537                let none: Option<i32> = Option::None;
538                cat
539            }
540        "#;
541        let parsed = parse_source(source, "test.rs").unwrap();
542        let func = parsed
543            .functions
544            .iter()
545            .find(|f| f.name == "classify")
546            .unwrap();
547        let fn_calls: Vec<&CallSite> = func
548            .calls
549            .iter()
550            .filter(|c| matches!(c.kind, CallKind::FunctionCall))
551            .collect();
552        assert!(
553            fn_calls.is_empty(),
554            "Enum variants should not be captured as function calls, got: {:?}",
555            fn_calls
556                .iter()
557                .map(|c| c.segments.join("::"))
558                .collect::<Vec<_>>()
559        );
560    }
561
562    #[test]
563    fn parse_deny_annotation() {
564        let source = r#"
565            #[doc = "capsec::deny(all)"]
566            fn pure_function() {
567                let x = 1 + 2;
568            }
569        "#;
570        let parsed = parse_source(source, "test.rs").unwrap();
571        assert_eq!(parsed.functions.len(), 1);
572        assert_eq!(parsed.functions[0].deny_categories, vec!["all"]);
573    }
574
575    #[test]
576    fn parse_deny_specific_categories() {
577        let source = r#"
578            #[doc = "capsec::deny(fs, net)"]
579            fn no_io() {}
580        "#;
581        let parsed = parse_source(source, "test.rs").unwrap();
582        assert_eq!(parsed.functions[0].deny_categories, vec!["fs", "net"]);
583    }
584
585    #[test]
586    fn parse_no_deny_annotation() {
587        let source = r#"
588            fn normal() {}
589        "#;
590        let parsed = parse_source(source, "test.rs").unwrap();
591        assert!(parsed.functions[0].deny_categories.is_empty());
592    }
593
594    #[test]
595    fn parse_visibility_public() {
596        let source = r#"
597            pub fn public_func() {}
598        "#;
599        let parsed = parse_source(source, "test.rs").unwrap();
600        assert_eq!(parsed.functions[0].visibility, Visibility::Public);
601    }
602
603    #[test]
604    fn parse_visibility_private() {
605        let source = r#"
606            fn private_func() {}
607        "#;
608        let parsed = parse_source(source, "test.rs").unwrap();
609        assert_eq!(parsed.functions[0].visibility, Visibility::Private);
610    }
611
612    #[test]
613    fn parse_visibility_crate_pub() {
614        let source = r#"
615            pub(crate) fn crate_func() {}
616        "#;
617        let parsed = parse_source(source, "test.rs").unwrap();
618        assert_eq!(parsed.functions[0].visibility, Visibility::CratePub);
619    }
620
621    #[test]
622    fn parse_visibility_impl_method() {
623        let source = r#"
624            struct Foo;
625            impl Foo {
626                pub fn public_method(&self) {}
627                fn private_method(&self) {}
628            }
629        "#;
630        let parsed = parse_source(source, "test.rs").unwrap();
631        let public = parsed
632            .functions
633            .iter()
634            .find(|f| f.name == "public_method")
635            .unwrap();
636        let private = parsed
637            .functions
638            .iter()
639            .find(|f| f.name == "private_method")
640            .unwrap();
641        assert_eq!(public.visibility, Visibility::Public);
642        assert_eq!(private.visibility, Visibility::Private);
643    }
644
645    #[test]
646    fn parse_trait_default_methods() {
647        let source = r#"
648            use std::fs;
649            trait Readable {
650                fn read_data(&self) -> Vec<u8> {
651                    fs::read("default.dat").unwrap()
652                }
653                fn name(&self) -> &str;
654            }
655        "#;
656        let parsed = parse_source(source, "test.rs").unwrap();
657        // Only the default method with a body should be captured
658        assert_eq!(parsed.functions.len(), 1);
659        assert_eq!(parsed.functions[0].name, "read_data");
660    }
661}