Skip to main content

cargo_capsec/
parser.rs

1//! Rust source file parser built on [`syn`].
2//!
3//! Parses `.rs` files into a structured representation that captures the information
4//! the [`Detector`](crate::detector::Detector) needs: function boundaries, call sites,
5//! `use` imports, and `extern` blocks. Handles free functions, `impl` block methods,
6//! and trait default methods.
7//!
8//! The parser uses [`syn::visit::Visit`] to walk the AST. It does **not** perform type
9//! resolution — all matching is done on syntactic path segments. Import aliases are
10//! tracked so the [`Detector`](crate::detector::Detector) can expand them.
11
12use std::path::Path;
13use syn::visit::Visit;
14
15/// The parsed representation of a single `.rs` source file.
16///
17/// Contains every function body, `use` import, and `extern` block found in the file.
18/// This is the input to [`Detector::analyse`](crate::detector::Detector::analyse).
19#[derive(Debug, Clone)]
20pub struct ParsedFile {
21    /// File path (for reporting).
22    pub path: String,
23    /// All functions found: free functions, `impl` methods, and trait default methods.
24    pub functions: Vec<ParsedFunction>,
25    /// All `use` imports, with aliases tracked.
26    pub use_imports: Vec<ImportPath>,
27    /// All `extern` blocks (FFI declarations).
28    pub extern_blocks: Vec<ExternBlock>,
29}
30
31/// A single function (free, `impl` method, or trait default method) and its call sites.
32#[derive(Debug, Clone)]
33pub struct ParsedFunction {
34    /// The function name (e.g., `"load_config"`).
35    pub name: String,
36    /// Line number where the function is defined.
37    pub line: usize,
38    /// Every call expression found inside the function body.
39    pub calls: Vec<CallSite>,
40    /// True if this is the `main()` function inside a `build.rs` file.
41    pub is_build_script: bool,
42}
43
44/// A single call expression inside a function body.
45///
46/// Call sites are either qualified function calls (`fs::read(...)`) or method calls
47/// (`stream.connect(...)`). The [`segments`](CallSite::segments) field holds the
48/// raw path segments before import expansion.
49#[derive(Debug, Clone)]
50pub struct CallSite {
51    /// Path segments of the call (e.g., `["fs", "read"]` or `["TcpStream", "connect"]`).
52    pub segments: Vec<String>,
53    /// Source line number.
54    pub line: usize,
55    /// Source column number.
56    pub col: usize,
57    /// Whether this is a function call or a method call.
58    pub kind: CallKind,
59}
60
61/// Distinguishes qualified function calls from method calls.
62#[derive(Debug, Clone)]
63pub enum CallKind {
64    /// A qualified path call like `fs::read(...)` or `Command::new(...)`.
65    FunctionCall,
66    /// A method call like `stream.connect(...)` or `cmd.output()`.
67    MethodCall {
68        /// The method name (e.g., `"connect"`, `"output"`).
69        method: String,
70    },
71}
72
73/// A `use` import statement, with optional alias.
74///
75/// For `use std::fs::read as load`, the segments are `["std", "fs", "read"]` and
76/// the alias is `Some("load")`. The [`Detector`](crate::detector::Detector) uses
77/// this to expand bare calls: when it sees `load(...)`, it looks up the alias and
78/// expands it to `std::fs::read`.
79#[derive(Debug, Clone)]
80pub struct ImportPath {
81    /// The full path segments (e.g., `["std", "fs", "read"]`).
82    pub segments: Vec<String>,
83    /// The `as` alias, if any (e.g., `Some("load")` for `use std::fs::read as load`).
84    pub alias: Option<String>,
85}
86
87/// An `extern` block declaring foreign functions.
88///
89/// Any `extern` block is flagged as [`Category::Ffi`](crate::authorities::Category::Ffi)
90/// by the detector, since FFI calls bypass Rust's safety model entirely.
91#[derive(Debug, Clone)]
92pub struct ExternBlock {
93    /// The ABI string (e.g., `Some("C")` for `extern "C"`).
94    pub abi: Option<String>,
95    /// Names of functions declared in the block.
96    pub functions: Vec<String>,
97    /// Source line number.
98    pub line: usize,
99}
100
101/// Parses a `.rs` file from disk into a [`ParsedFile`].
102///
103/// Reads the file contents and delegates to [`parse_source`]. Returns an error
104/// string if the file cannot be read or parsed.
105pub fn parse_file(path: &Path) -> Result<ParsedFile, String> {
106    let source =
107        std::fs::read_to_string(path).map_err(|e| format!("Failed to read {}: {e}", path.display()))?;
108    parse_source(&source, &path.display().to_string())
109}
110
111/// Parses Rust source code from a string into a [`ParsedFile`].
112///
113/// This is the primary entry point for programmatic usage and testing.
114/// The `path` parameter is used only for error messages and the
115/// [`ParsedFile::path`] field — it doesn't need to be a real file.
116///
117/// # Errors
118///
119/// Returns an error string if [`syn::parse_file`] fails (e.g., invalid Rust syntax).
120pub fn parse_source(source: &str, path: &str) -> Result<ParsedFile, String> {
121    let syntax = syn::parse_file(source).map_err(|e| format!("Failed to parse {path}: {e}"))?;
122
123    let mut visitor = FileVisitor::new(path.to_string());
124    visitor.visit_file(&syntax);
125
126    Ok(ParsedFile {
127        path: path.to_string(),
128        functions: visitor.functions,
129        use_imports: visitor.imports,
130        extern_blocks: visitor.extern_blocks,
131    })
132}
133
134struct FileVisitor {
135    file_path: String,
136    functions: Vec<ParsedFunction>,
137    imports: Vec<ImportPath>,
138    extern_blocks: Vec<ExternBlock>,
139    current_function: Option<ParsedFunction>,
140}
141
142impl FileVisitor {
143    fn new(file_path: String) -> Self {
144        Self {
145            file_path,
146            functions: Vec::new(),
147            imports: Vec::new(),
148            extern_blocks: Vec::new(),
149            current_function: None,
150        }
151    }
152}
153
154impl<'ast> Visit<'ast> for FileVisitor {
155    fn visit_item_fn(&mut self, node: &'ast syn::ItemFn) {
156        let func = ParsedFunction {
157            name: node.sig.ident.to_string(),
158            line: node.sig.ident.span().start().line,
159            calls: Vec::new(),
160            is_build_script: self.file_path.ends_with("build.rs") && node.sig.ident == "main",
161        };
162
163        let prev = self.current_function.take();
164        self.current_function = Some(func);
165
166        syn::visit::visit_item_fn(self, node);
167
168        if let Some(func) = self.current_function.take() {
169            self.functions.push(func);
170        }
171        self.current_function = prev;
172    }
173
174    fn visit_impl_item_fn(&mut self, node: &'ast syn::ImplItemFn) {
175        let func = ParsedFunction {
176            name: node.sig.ident.to_string(),
177            line: node.sig.ident.span().start().line,
178            calls: Vec::new(),
179            is_build_script: false,
180        };
181
182        let prev = self.current_function.take();
183        self.current_function = Some(func);
184
185        syn::visit::visit_impl_item_fn(self, node);
186
187        if let Some(func) = self.current_function.take() {
188            self.functions.push(func);
189        }
190        self.current_function = prev;
191    }
192
193    fn visit_trait_item_fn(&mut self, node: &'ast syn::TraitItemFn) {
194        // Only visit if there's a default body
195        if node.default.is_some() {
196            let func = ParsedFunction {
197                name: node.sig.ident.to_string(),
198                line: node.sig.ident.span().start().line,
199                calls: Vec::new(),
200                is_build_script: false,
201            };
202
203            let prev = self.current_function.take();
204            self.current_function = Some(func);
205
206            syn::visit::visit_trait_item_fn(self, node);
207
208            if let Some(func) = self.current_function.take() {
209                self.functions.push(func);
210            }
211            self.current_function = prev;
212        } else {
213            syn::visit::visit_trait_item_fn(self, node);
214        }
215    }
216
217    fn visit_expr_path(&mut self, node: &'ast syn::ExprPath) {
218        if let Some(ref mut func) = self.current_function {
219            let segments: Vec<String> =
220                node.path.segments.iter().map(|s| s.ident.to_string()).collect();
221
222            if !segments.is_empty() {
223                func.calls.push(CallSite {
224                    segments,
225                    line: node
226                        .path
227                        .segments
228                        .first()
229                        .map(|s| s.ident.span().start().line)
230                        .unwrap_or(0),
231                    col: node
232                        .path
233                        .segments
234                        .first()
235                        .map(|s| s.ident.span().start().column)
236                        .unwrap_or(0),
237                    kind: CallKind::FunctionCall,
238                });
239            }
240        }
241
242        syn::visit::visit_expr_path(self, node);
243    }
244
245    fn visit_expr_method_call(&mut self, node: &'ast syn::ExprMethodCall) {
246        if let Some(ref mut func) = self.current_function {
247            func.calls.push(CallSite {
248                segments: vec![node.method.to_string()],
249                line: node.method.span().start().line,
250                col: node.method.span().start().column,
251                kind: CallKind::MethodCall {
252                    method: node.method.to_string(),
253                },
254            });
255        }
256
257        syn::visit::visit_expr_method_call(self, node);
258    }
259
260    fn visit_item_use(&mut self, node: &'ast syn::ItemUse) {
261        let mut paths = Vec::new();
262        collect_use_paths(&node.tree, &mut Vec::new(), &mut paths);
263        self.imports.extend(paths);
264
265        syn::visit::visit_item_use(self, node);
266    }
267
268    fn visit_item_foreign_mod(&mut self, node: &'ast syn::ItemForeignMod) {
269        let functions: Vec<String> = node
270            .items
271            .iter()
272            .filter_map(|item| {
273                if let syn::ForeignItem::Fn(f) = item {
274                    Some(f.sig.ident.to_string())
275                } else {
276                    None
277                }
278            })
279            .collect();
280
281        self.extern_blocks.push(ExternBlock {
282            abi: node.abi.name.as_ref().map(|n| n.value()),
283            functions,
284            line: node.abi.extern_token.span.start().line,
285        });
286
287        syn::visit::visit_item_foreign_mod(self, node);
288    }
289}
290
291fn collect_use_paths(tree: &syn::UseTree, prefix: &mut Vec<String>, out: &mut Vec<ImportPath>) {
292    match tree {
293        syn::UseTree::Path(p) => {
294            prefix.push(p.ident.to_string());
295            collect_use_paths(&p.tree, prefix, out);
296            prefix.pop();
297        }
298        syn::UseTree::Name(n) => {
299            let mut segments = prefix.clone();
300            segments.push(n.ident.to_string());
301            out.push(ImportPath {
302                segments,
303                alias: None,
304            });
305        }
306        syn::UseTree::Rename(r) => {
307            let mut segments = prefix.clone();
308            segments.push(r.ident.to_string());
309            out.push(ImportPath {
310                segments,
311                alias: Some(r.rename.to_string()),
312            });
313        }
314        syn::UseTree::Group(g) => {
315            for item in &g.items {
316                collect_use_paths(item, prefix, out);
317            }
318        }
319        syn::UseTree::Glob(_) => {
320            let mut segments = prefix.clone();
321            segments.push("*".to_string());
322            out.push(ImportPath {
323                segments,
324                alias: None,
325            });
326        }
327    }
328}
329
330#[cfg(test)]
331mod tests {
332    use super::*;
333
334    #[test]
335    fn parse_function_calls() {
336        let source = r#"
337            use std::fs;
338            fn do_stuff() {
339                let _ = fs::read("test");
340            }
341        "#;
342        let parsed = parse_source(source, "test.rs").unwrap();
343        assert_eq!(parsed.functions.len(), 1);
344        assert_eq!(parsed.functions[0].name, "do_stuff");
345        assert!(!parsed.functions[0].calls.is_empty());
346    }
347
348    #[test]
349    fn parse_use_statements() {
350        let source = r#"
351            use std::fs::read;
352            use std::net::{TcpStream, TcpListener};
353            use std::env::var as get_env;
354        "#;
355        let parsed = parse_source(source, "test.rs").unwrap();
356        assert_eq!(parsed.use_imports.len(), 4);
357
358        let read_import = &parsed.use_imports[0];
359        assert_eq!(read_import.segments, vec!["std", "fs", "read"]);
360        assert!(read_import.alias.is_none());
361
362        let alias_import = parsed.use_imports.iter().find(|i| i.alias.is_some()).unwrap();
363        assert_eq!(alias_import.segments, vec!["std", "env", "var"]);
364        assert_eq!(alias_import.alias.as_deref(), Some("get_env"));
365    }
366
367    #[test]
368    fn parse_method_calls() {
369        let source = r#"
370            fn network() {
371                let stream = something();
372                stream.connect("127.0.0.1:8080");
373                stream.send_to(b"data", "addr");
374            }
375        "#;
376        let parsed = parse_source(source, "test.rs").unwrap();
377        let func = &parsed.functions[0];
378        let method_calls: Vec<&CallSite> = func
379            .calls
380            .iter()
381            .filter(|c| matches!(c.kind, CallKind::MethodCall { .. }))
382            .collect();
383        assert_eq!(method_calls.len(), 2);
384    }
385
386    #[test]
387    fn parse_extern_blocks() {
388        let source = r#"
389            extern "C" {
390                fn open(path: *const u8, flags: i32) -> i32;
391                fn close(fd: i32) -> i32;
392            }
393        "#;
394        let parsed = parse_source(source, "test.rs").unwrap();
395        assert_eq!(parsed.extern_blocks.len(), 1);
396        assert_eq!(parsed.extern_blocks[0].abi.as_deref(), Some("C"));
397        assert_eq!(parsed.extern_blocks[0].functions, vec!["open", "close"]);
398    }
399
400    #[test]
401    fn parse_error_returns_err() {
402        let source = "this is not valid rust {{{";
403        assert!(parse_source(source, "bad.rs").is_err());
404    }
405
406    #[test]
407    fn parse_impl_block_methods() {
408        let source = r#"
409            use std::fs;
410            struct Loader;
411            impl Loader {
412                fn load(&self) -> Vec<u8> {
413                    fs::read("data.bin").unwrap()
414                }
415                fn name(&self) -> &str {
416                    "loader"
417                }
418            }
419        "#;
420        let parsed = parse_source(source, "test.rs").unwrap();
421        assert_eq!(parsed.functions.len(), 2);
422        let load = parsed.functions.iter().find(|f| f.name == "load").unwrap();
423        assert!(!load.calls.is_empty());
424    }
425
426    #[test]
427    fn parse_trait_default_methods() {
428        let source = r#"
429            use std::fs;
430            trait Readable {
431                fn read_data(&self) -> Vec<u8> {
432                    fs::read("default.dat").unwrap()
433                }
434                fn name(&self) -> &str;
435            }
436        "#;
437        let parsed = parse_source(source, "test.rs").unwrap();
438        // Only the default method with a body should be captured
439        assert_eq!(parsed.functions.len(), 1);
440        assert_eq!(parsed.functions[0].name, "read_data");
441    }
442}