use std::path::Path;
use anyhow::{Context, Result};
use chrono::{DateTime, Utc};
use syn::visit::Visit;
use syn::{Attribute, Expr, ImplItem, Item, ItemImpl, Meta, Visibility};
use syn::spanned::Spanned;
use uuid::Uuid;
use walkdir::WalkDir;
#[derive(Debug, Default)]
pub struct SymbolScan {
pub snapshot_id: Uuid,
pub ts: DateTime<Utc>,
pub repo: String,
pub symbols: Vec<SymbolRow>,
pub calls: Vec<CallEdgeRow>,
pub features: Vec<FeatureGateRow>,
}
#[derive(Debug, Clone, serde::Serialize)]
pub struct SymbolRow {
pub crate_name: String,
pub module_path: String,
pub item_kind: String,
pub item_name: String,
pub visibility: String,
pub file: String,
pub line: u32,
pub doc_lines: u32,
pub signature: Option<String>,
}
#[derive(Debug, Clone, serde::Serialize)]
pub struct CallEdgeRow {
pub crate_name: String,
pub caller_path: String,
pub callee_ident: String,
pub call_kind: String,
pub file: String,
pub line: u32,
}
#[derive(Debug, Clone)]
pub struct FeatureGateRow {
pub crate_name: String,
pub module_path: String,
pub item_name: String,
pub cfg_expr: String,
pub file: String,
pub line: u32,
}
pub fn scan_repo(
repo_root: &Path,
repo_name: &str,
snapshot_id: Uuid,
ts: DateTime<Utc>,
) -> Result<SymbolScan> {
let mut out = SymbolScan {
snapshot_id,
ts,
repo: repo_name.to_string(),
..Default::default()
};
for entry in WalkDir::new(repo_root)
.into_iter()
.filter_entry(|e| !is_skipped_dir(&e.file_name().to_string_lossy()))
{
let entry = entry.context("walkdir")?;
if entry.file_name() == "Cargo.toml" {
scan_one_crate(repo_root, entry.path(), &mut out);
}
}
Ok(out)
}
fn is_skipped_dir(name: &str) -> bool {
matches!(name, "target" | ".git" | "node_modules" | ".nornir")
}
fn scan_one_crate(repo_root: &Path, cargo_toml: &Path, out: &mut SymbolScan) {
let Ok(text) = std::fs::read_to_string(cargo_toml) else { return };
let Ok(doc) = text.parse::<toml::Value>() else { return };
let Some(crate_name) = doc
.get("package")
.and_then(|p| p.get("name"))
.and_then(|n| n.as_str())
else { return };
let Some(crate_dir) = cargo_toml.parent() else { return };
for sub in &["src", "tests", "benches", "examples"] {
let dir = crate_dir.join(sub);
if !dir.is_dir() { continue; }
for entry in WalkDir::new(&dir)
.into_iter()
.filter_entry(|e| !is_skipped_dir(&e.file_name().to_string_lossy()))
{
let Ok(entry) = entry else { continue };
if entry.file_type().is_file()
&& entry.path().extension().and_then(|e| e.to_str()) == Some("rs")
{
scan_one_file(repo_root, entry.path(), crate_name, sub, out);
}
}
}
}
fn scan_one_file(
repo_root: &Path,
file: &Path,
crate_name: &str,
section: &str,
out: &mut SymbolScan,
) {
let Ok(text) = std::fs::read_to_string(file) else { return };
let Ok(syntax) = syn::parse_file(&text) else { return };
let rel = file.strip_prefix(repo_root).unwrap_or(file).to_string_lossy().to_string();
let stem = file.file_stem().and_then(|s| s.to_str()).unwrap_or("");
let base_module = if section == "src" && (stem == "lib" || stem == "main") {
crate_name.to_string()
} else if section == "src" {
format!("{crate_name}::{stem}")
} else {
format!("{crate_name}::{section}::{stem}")
};
let mut v = Walker {
crate_name: crate_name.to_string(),
file: rel,
module_stack: vec![base_module],
out,
};
v.walk_items(&syntax.items);
}
struct Walker<'a> {
crate_name: String,
file: String,
module_stack: Vec<String>,
out: &'a mut SymbolScan,
}
impl<'a> Walker<'a> {
fn cur(&self) -> &str {
self.module_stack.last().map(|s| s.as_str()).unwrap_or("")
}
fn walk_items(&mut self, items: &[Item]) {
for item in items {
self.walk_item(item);
}
}
fn walk_item(&mut self, item: &Item) {
match item {
Item::Fn(f) => {
self.emit("fn", &f.sig.ident.to_string(), &vis(&f.vis),
Some(format!("fn {}{}", f.sig.ident, fn_inputs(&f.sig.inputs))),
&f.attrs, f.sig.ident.span());
self.walk_fn_body(&f.sig.ident.to_string(), &f.block);
}
Item::Struct(s) => self.emit("struct", &s.ident.to_string(), &vis(&s.vis), None, &s.attrs, s.ident.span()),
Item::Enum(e) => self.emit("enum", &e.ident.to_string(), &vis(&e.vis), None, &e.attrs, e.ident.span()),
Item::Trait(t) => self.emit("trait", &t.ident.to_string(), &vis(&t.vis), None, &t.attrs, t.ident.span()),
Item::Type(t) => self.emit("type", &t.ident.to_string(), &vis(&t.vis), None, &t.attrs, t.ident.span()),
Item::Const(c) => self.emit("const", &c.ident.to_string(), &vis(&c.vis), None, &c.attrs, c.ident.span()),
Item::Static(s) => self.emit("static", &s.ident.to_string(), &vis(&s.vis), None, &s.attrs, s.ident.span()),
Item::Macro(m) => {
if let Some(id) = &m.ident {
self.emit("macro", &id.to_string(), "pub", None, &m.attrs, id.span());
}
}
Item::Mod(m) => {
let nested = format!("{}::{}", self.cur(), m.ident);
self.emit("mod", &m.ident.to_string(), &vis(&m.vis), None, &m.attrs, m.ident.span());
if let Some((_, sub)) = &m.content {
self.module_stack.push(nested);
self.walk_items(sub);
self.module_stack.pop();
}
}
Item::Impl(i) => self.walk_impl(i),
_ => {}
}
}
fn walk_impl(&mut self, i: &ItemImpl) {
let self_ty = tokens_to_string(&i.self_ty);
let trait_part = i.trait_.as_ref()
.map(|(_, p, _)| path_to_string(p))
.unwrap_or_default();
let impl_label = if trait_part.is_empty() {
format!("impl {self_ty}")
} else {
format!("impl {trait_part} for {self_ty}")
};
self.emit("impl", &impl_label, "pub", None, &i.attrs, i.self_ty.span());
let parent = self.cur().to_string();
self.module_stack.push(format!("{parent}::{self_ty}"));
for it in &i.items {
if let ImplItem::Fn(f) = it {
self.emit("fn", &f.sig.ident.to_string(), &vis(&f.vis),
Some(format!("fn {}{}", f.sig.ident, fn_inputs(&f.sig.inputs))),
&f.attrs, f.sig.ident.span());
self.walk_fn_body(&f.sig.ident.to_string(), &f.block);
}
}
self.module_stack.pop();
}
fn walk_fn_body(&mut self, fn_name: &str, body: &syn::Block) {
let caller = format!("{}::{}", self.cur(), fn_name);
let mut cv = CallCollector {
caller,
crate_name: self.crate_name.clone(),
file: self.file.clone(),
out: &mut self.out.calls,
};
cv.visit_block(body);
}
fn emit(
&mut self,
kind: &str,
name: &str,
visi: &str,
sig: Option<String>,
attrs: &[Attribute],
span: proc_macro2::Span,
) {
let line = span.start().line as u32;
let doc_lines = attrs.iter().filter(|a| a.path().is_ident("doc")).count() as u32;
self.out.symbols.push(SymbolRow {
crate_name: self.crate_name.clone(),
module_path: self.cur().to_string(),
item_kind: kind.to_string(),
item_name: name.to_string(),
visibility: visi.to_string(),
file: self.file.clone(),
line,
doc_lines,
signature: sig,
});
for cfg in extract_cfgs(attrs) {
self.out.features.push(FeatureGateRow {
crate_name: self.crate_name.clone(),
module_path: self.cur().to_string(),
item_name: name.to_string(),
cfg_expr: cfg,
file: self.file.clone(),
line,
});
}
}
}
struct CallCollector<'a> {
caller: String,
crate_name: String,
file: String,
out: &'a mut Vec<CallEdgeRow>,
}
impl<'ast, 'a> Visit<'ast> for CallCollector<'a> {
fn visit_expr(&mut self, e: &'ast Expr) {
match e {
Expr::Call(c) => {
let ident = match &*c.func {
Expr::Path(p) => path_to_string(&p.path),
other => tokens_to_string(other),
};
self.out.push(CallEdgeRow {
crate_name: self.crate_name.clone(),
caller_path: self.caller.clone(),
callee_ident: ident,
call_kind: "call".into(),
file: self.file.clone(),
line: c.func.span().start().line as u32,
});
}
Expr::MethodCall(m) => {
self.out.push(CallEdgeRow {
crate_name: self.crate_name.clone(),
caller_path: self.caller.clone(),
callee_ident: m.method.to_string(),
call_kind: "method".into(),
file: self.file.clone(),
line: m.method.span().start().line as u32,
});
}
_ => {}
}
syn::visit::visit_expr(self, e);
}
}
fn vis(v: &Visibility) -> String {
match v {
Visibility::Public(_) => "pub".into(),
Visibility::Restricted(r) => format!("pub({})", path_to_string(&r.path)),
Visibility::Inherited => "priv".into(),
}
}
fn path_to_string(p: &syn::Path) -> String {
p.segments.iter().map(|s| s.ident.to_string()).collect::<Vec<_>>().join("::")
}
fn tokens_to_string<T: quote::ToTokens>(t: &T) -> String {
let mut ts = proc_macro2::TokenStream::new();
t.to_tokens(&mut ts);
ts.to_string()
}
fn fn_inputs(inputs: &syn::punctuated::Punctuated<syn::FnArg, syn::Token![,]>) -> String {
let parts: Vec<String> = inputs.iter().map(|a| match a {
syn::FnArg::Receiver(_) => "self".into(),
syn::FnArg::Typed(t) => tokens_to_string(&t.ty),
}).collect();
format!("({})", parts.join(", "))
}
fn extract_cfgs(attrs: &[Attribute]) -> Vec<String> {
let mut out = Vec::new();
for a in attrs {
if a.path().is_ident("cfg") {
if let Meta::List(l) = &a.meta {
out.push(l.tokens.to_string());
}
}
}
out
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn scans_basic_crate() {
let dir = tempfile::tempdir().unwrap();
let root = dir.path();
std::fs::create_dir_all(root.join("src")).unwrap();
std::fs::write(root.join("Cargo.toml"), r#"[package]
name = "demo"
version = "0.1.0"
edition = "2021"
"#).unwrap();
std::fs::write(root.join("src/lib.rs"), r#"
pub fn outer(x: u32) -> u32 {
helper(x) + 1
}
fn helper(x: u32) -> u32 { x.saturating_add(2) }
pub struct Foo { pub n: u32 }
impl Foo {
pub fn make() -> Self { Self { n: 0 } }
}
pub trait Bar { fn baz(&self); }
#[cfg(feature = "experimental")]
pub fn gated() {}
"#).unwrap();
let scan = scan_repo(root, "demo", Uuid::new_v4(), Utc::now()).unwrap();
let kinds: Vec<_> = scan.symbols.iter().map(|s| (s.item_kind.as_str(), s.item_name.as_str())).collect();
assert!(kinds.contains(&("fn", "outer")), "{kinds:?}");
assert!(kinds.contains(&("fn", "helper")));
assert!(kinds.contains(&("struct", "Foo")));
assert!(kinds.iter().any(|(k, _)| *k == "impl"));
assert!(kinds.contains(&("fn", "make")));
assert!(kinds.contains(&("trait", "Bar")));
assert!(kinds.contains(&("fn", "gated")));
let callees: Vec<&str> = scan.calls.iter().map(|c| c.callee_ident.as_str()).collect();
assert!(callees.contains(&"helper"), "{callees:?}");
assert!(callees.contains(&"saturating_add"));
assert!(scan.features.iter().any(|f| f.item_name == "gated" && f.cfg_expr.contains("experimental")));
}
#[test]
fn captures_real_source_lines() {
let dir = tempfile::tempdir().unwrap();
let root = dir.path();
std::fs::create_dir_all(root.join("src")).unwrap();
std::fs::write(root.join("Cargo.toml"), "[package]\nname = \"demo\"\nversion = \"0.1.0\"\nedition = \"2021\"\n").unwrap();
std::fs::write(root.join("src/lib.rs"), r#"
pub fn outer(x: u32) -> u32 {
helper(x) + 1
}
fn helper(x: u32) -> u32 { x.saturating_add(2) }
pub struct Foo { pub n: u32 }
#[cfg(feature = "experimental")]
pub fn gated() {}
"#).unwrap();
let scan = scan_repo(root, "demo", Uuid::new_v4(), Utc::now()).unwrap();
let line_of = |name: &str| scan.symbols.iter().find(|s| s.item_name == name).map(|s| s.line);
assert_eq!(line_of("outer"), Some(2));
assert_eq!(line_of("helper"), Some(5));
assert_eq!(line_of("Foo"), Some(7));
assert_eq!(line_of("gated"), Some(10));
assert!(scan.symbols.iter().all(|s| s.line > 0), "{:?}",
scan.symbols.iter().map(|s| (&s.item_name, s.line)).collect::<Vec<_>>());
let helper_call = scan.calls.iter().find(|c| c.callee_ident == "helper").unwrap();
assert_eq!(helper_call.line, 3);
assert!(scan.calls.iter().all(|c| c.line > 0));
let gate = scan.features.iter().find(|f| f.item_name == "gated").unwrap();
assert_eq!(gate.line, 10);
}
}