use std::path::Path;
use anyhow::{Context, Result};
use chrono::{DateTime, Utc};
use syn::visit::Visit;
use syn::{Attribute, Expr, ImplItem, Item, ItemImpl, Meta, Visibility};
use syn::spanned::Spanned;
use uuid::Uuid;
use walkdir::WalkDir;
#[derive(Debug, Default)]
pub struct SymbolScan {
pub snapshot_id: Uuid,
pub ts: DateTime<Utc>,
pub repo: String,
pub symbols: Vec<SymbolRow>,
pub calls: Vec<CallEdgeRow>,
pub features: Vec<FeatureGateRow>,
pub tests: Vec<TestDefRow>,
}
#[derive(Debug, Clone, serde::Serialize)]
pub struct SymbolRow {
pub crate_name: String,
pub module_path: String,
pub item_kind: String,
pub item_name: String,
pub visibility: String,
pub file: String,
pub line: u32,
pub doc_lines: u32,
pub signature: Option<String>,
}
#[derive(Debug, Clone, serde::Serialize)]
pub struct CallEdgeRow {
pub crate_name: String,
pub caller_path: String,
pub callee_ident: String,
pub call_kind: String,
pub file: String,
pub line: u32,
}
#[derive(Debug, Clone, serde::Serialize)]
pub struct TestDefRow {
pub crate_name: String,
pub module_path: String,
pub test_name: String,
pub file: String,
pub line: u32,
pub is_heavy: bool,
pub is_async: bool,
}
#[derive(Debug, Clone)]
pub struct FeatureGateRow {
pub crate_name: String,
pub module_path: String,
pub item_name: String,
pub cfg_expr: String,
pub file: String,
pub line: u32,
}
pub fn scan_repo(
repo_root: &Path,
repo_name: &str,
snapshot_id: Uuid,
ts: DateTime<Utc>,
) -> Result<SymbolScan> {
let mut files: Vec<(std::path::PathBuf, String, &'static str)> = Vec::new();
for entry in WalkDir::new(repo_root)
.into_iter()
.filter_entry(|e| !is_skipped_dir(&e.file_name().to_string_lossy()))
{
let entry = entry.context("walkdir")?;
if entry.file_name() == "Cargo.toml" {
collect_crate_files(entry.path(), &mut files);
}
}
let mut out = SymbolScan {
snapshot_id,
ts,
repo: repo_name.to_string(),
..Default::default()
};
for (file, crate_name, section) in &files {
scan_one_file(repo_root, file, crate_name, section, &mut out);
}
Ok(out)
}
fn is_skipped_dir(name: &str) -> bool {
matches!(name, "target" | ".git" | "node_modules" | ".nornir")
}
fn collect_crate_files(
cargo_toml: &Path,
files: &mut Vec<(std::path::PathBuf, String, &'static str)>,
) {
let Ok(text) = std::fs::read_to_string(cargo_toml) else { return };
let Ok(doc) = text.parse::<toml::Value>() else { return };
let Some(crate_name) = doc
.get("package")
.and_then(|p| p.get("name"))
.and_then(|n| n.as_str())
else { return };
let Some(crate_dir) = cargo_toml.parent() else { return };
for sub in &["src", "tests", "benches", "examples"] {
let dir = crate_dir.join(sub);
if !dir.is_dir() { continue; }
for entry in WalkDir::new(&dir)
.into_iter()
.filter_entry(|e| !is_skipped_dir(&e.file_name().to_string_lossy()))
{
let Ok(entry) = entry else { continue };
if entry.file_type().is_file()
&& entry.path().extension().and_then(|e| e.to_str()) == Some("rs")
{
files.push((entry.path().to_path_buf(), crate_name.to_string(), *sub));
}
}
}
}
fn scan_one_file(
repo_root: &Path,
file: &Path,
crate_name: &str,
section: &str,
out: &mut SymbolScan,
) {
let Ok(text) = std::fs::read_to_string(file) else { return };
let rel = file.strip_prefix(repo_root).unwrap_or(file).to_string_lossy().to_string();
let stem = file.file_stem().and_then(|s| s.to_str()).unwrap_or("");
scan_content(&text, &rel, crate_name, section, stem, out);
}
fn scan_content(
text: &str,
rel: &str,
crate_name: &str,
section: &str,
stem: &str,
out: &mut SymbolScan,
) {
let Ok(syntax) = syn::parse_file(text) else { return };
let base_module = if section == "src" && (stem == "lib" || stem == "main") {
crate_name.to_string()
} else if section == "src" {
format!("{crate_name}::{stem}")
} else {
format!("{crate_name}::{section}::{stem}")
};
let mut v = Walker {
crate_name: crate_name.to_string(),
file: rel.to_string(),
module_stack: vec![base_module],
out,
};
v.walk_items(&syntax.items);
}
pub fn scan_file(
crate_name: &str,
tar_path: &str,
content: &str,
repo_name: &str,
snapshot_id: Uuid,
ts: DateTime<Utc>,
) -> SymbolScan {
let mut out = SymbolScan {
snapshot_id,
ts,
repo: repo_name.to_string(),
..Default::default()
};
if !tar_path.ends_with(".rs") {
return out;
}
let rel = tar_path.splitn(2, '/').nth(1).unwrap_or(tar_path);
let section = rel.split('/').next().unwrap_or("");
if !matches!(section, "src" | "tests" | "benches" | "examples") {
return out;
}
let stem = Path::new(rel).file_stem().and_then(|s| s.to_str()).unwrap_or("");
scan_content(content, rel, crate_name, section, stem, &mut out);
out
}
pub fn scan_crate_entries(
crate_name: &str,
entries: &[(String, Vec<u8>)],
repo_name: &str,
snapshot_id: Uuid,
ts: DateTime<Utc>,
) -> SymbolScan {
let mut out = SymbolScan {
snapshot_id,
ts,
repo: repo_name.to_string(),
..Default::default()
};
for (path, bytes) in entries {
if !path.ends_with(".rs") {
continue;
}
let rel = path.splitn(2, '/').nth(1).unwrap_or(path);
let section = rel.split('/').next().unwrap_or("");
if !matches!(section, "src" | "tests" | "benches" | "examples") {
continue;
}
let Ok(text) = std::str::from_utf8(bytes) else { continue };
let stem = Path::new(rel).file_stem().and_then(|s| s.to_str()).unwrap_or("");
scan_content(text, rel, crate_name, section, stem, &mut out);
}
out
}
struct Walker<'a> {
crate_name: String,
file: String,
module_stack: Vec<String>,
out: &'a mut SymbolScan,
}
impl<'a> Walker<'a> {
fn cur(&self) -> &str {
self.module_stack.last().map(|s| s.as_str()).unwrap_or("")
}
fn walk_items(&mut self, items: &[Item]) {
for item in items {
self.walk_item(item);
}
}
fn walk_item(&mut self, item: &Item) {
match item {
Item::Fn(f) => {
self.emit("fn", &f.sig.ident.to_string(), &vis(&f.vis),
Some(format!("fn {}{}", f.sig.ident, fn_inputs(&f.sig.inputs))),
&f.attrs, f.sig.ident.span());
self.maybe_emit_test(&f.sig.ident.to_string(), &f.attrs, f.sig.asyncness.is_some(), f.sig.ident.span());
self.walk_fn_body(&f.sig.ident.to_string(), &f.block);
}
Item::Struct(s) => self.emit("struct", &s.ident.to_string(), &vis(&s.vis), None, &s.attrs, s.ident.span()),
Item::Enum(e) => self.emit("enum", &e.ident.to_string(), &vis(&e.vis), None, &e.attrs, e.ident.span()),
Item::Trait(t) => self.emit("trait", &t.ident.to_string(), &vis(&t.vis), None, &t.attrs, t.ident.span()),
Item::Type(t) => self.emit("type", &t.ident.to_string(), &vis(&t.vis), None, &t.attrs, t.ident.span()),
Item::Const(c) => self.emit("const", &c.ident.to_string(), &vis(&c.vis), None, &c.attrs, c.ident.span()),
Item::Static(s) => self.emit("static", &s.ident.to_string(), &vis(&s.vis), None, &s.attrs, s.ident.span()),
Item::Macro(m) => {
if let Some(id) = &m.ident {
self.emit("macro", &id.to_string(), "pub", None, &m.attrs, id.span());
}
}
Item::Mod(m) => {
let nested = format!("{}::{}", self.cur(), m.ident);
self.emit("mod", &m.ident.to_string(), &vis(&m.vis), None, &m.attrs, m.ident.span());
if let Some((_, sub)) = &m.content {
self.module_stack.push(nested);
self.walk_items(sub);
self.module_stack.pop();
}
}
Item::Impl(i) => self.walk_impl(i),
_ => {}
}
}
fn walk_impl(&mut self, i: &ItemImpl) {
let self_ty = tokens_to_string(&i.self_ty);
let trait_part = i.trait_.as_ref()
.map(|(_, p, _)| path_to_string(p))
.unwrap_or_default();
let impl_label = if trait_part.is_empty() {
format!("impl {self_ty}")
} else {
format!("impl {trait_part} for {self_ty}")
};
self.emit("impl", &impl_label, "pub", None, &i.attrs, i.self_ty.span());
let parent = self.cur().to_string();
self.module_stack.push(format!("{parent}::{self_ty}"));
for it in &i.items {
if let ImplItem::Fn(f) = it {
self.emit("fn", &f.sig.ident.to_string(), &vis(&f.vis),
Some(format!("fn {}{}", f.sig.ident, fn_inputs(&f.sig.inputs))),
&f.attrs, f.sig.ident.span());
self.maybe_emit_test(&f.sig.ident.to_string(), &f.attrs, f.sig.asyncness.is_some(), f.sig.ident.span());
self.walk_fn_body(&f.sig.ident.to_string(), &f.block);
}
}
self.module_stack.pop();
}
fn walk_fn_body(&mut self, fn_name: &str, body: &syn::Block) {
let caller = format!("{}::{}", self.cur(), fn_name);
let mut cv = CallCollector {
caller,
crate_name: self.crate_name.clone(),
file: self.file.clone(),
out: &mut self.out.calls,
};
cv.visit_block(body);
}
fn emit(
&mut self,
kind: &str,
name: &str,
visi: &str,
sig: Option<String>,
attrs: &[Attribute],
span: proc_macro2::Span,
) {
let line = span.start().line as u32;
let doc_lines = attrs.iter().filter(|a| a.path().is_ident("doc")).count() as u32;
self.out.symbols.push(SymbolRow {
crate_name: self.crate_name.clone(),
module_path: self.cur().to_string(),
item_kind: kind.to_string(),
item_name: name.to_string(),
visibility: visi.to_string(),
file: self.file.clone(),
line,
doc_lines,
signature: sig,
});
for cfg in extract_cfgs(attrs) {
self.out.features.push(FeatureGateRow {
crate_name: self.crate_name.clone(),
module_path: self.cur().to_string(),
item_name: name.to_string(),
cfg_expr: cfg,
file: self.file.clone(),
line,
});
}
}
fn maybe_emit_test(&mut self, name: &str, attrs: &[Attribute], asyncness: bool, span: proc_macro2::Span) {
if !is_test_attr(attrs) {
return;
}
let is_heavy = attrs.iter().any(|a| a.path().is_ident("ignore"));
let is_async = asyncness || attrs.iter().any(|a| attr_last_seg(a) == "tokio" || attr_is_async_test(a));
self.out.tests.push(TestDefRow {
crate_name: self.crate_name.clone(),
module_path: self.cur().to_string(),
test_name: name.to_string(),
file: self.file.clone(),
line: span.start().line as u32,
is_heavy,
is_async,
});
}
}
fn is_test_attr(attrs: &[Attribute]) -> bool {
attrs.iter().any(|a| {
let last = attr_last_seg(a);
matches!(last.as_str(), "test" | "rstest" | "test_case" | "proptest")
})
}
fn attr_is_async_test(a: &Attribute) -> bool {
let segs: Vec<String> = a.path().segments.iter().map(|s| s.ident.to_string()).collect();
matches!(segs.as_slice(), [first, second] if (first == "tokio" || first == "async_std") && second == "test")
}
fn attr_last_seg(a: &Attribute) -> String {
a.path().segments.last().map(|s| s.ident.to_string()).unwrap_or_default()
}
struct CallCollector<'a> {
caller: String,
crate_name: String,
file: String,
out: &'a mut Vec<CallEdgeRow>,
}
impl<'ast, 'a> Visit<'ast> for CallCollector<'a> {
fn visit_expr(&mut self, e: &'ast Expr) {
match e {
Expr::Call(c) => {
let ident = match &*c.func {
Expr::Path(p) => path_to_string(&p.path),
other => tokens_to_string(other),
};
self.out.push(CallEdgeRow {
crate_name: self.crate_name.clone(),
caller_path: self.caller.clone(),
callee_ident: ident,
call_kind: "call".into(),
file: self.file.clone(),
line: c.func.span().start().line as u32,
});
}
Expr::MethodCall(m) => {
self.out.push(CallEdgeRow {
crate_name: self.crate_name.clone(),
caller_path: self.caller.clone(),
callee_ident: m.method.to_string(),
call_kind: "method".into(),
file: self.file.clone(),
line: m.method.span().start().line as u32,
});
}
_ => {}
}
syn::visit::visit_expr(self, e);
}
}
fn vis(v: &Visibility) -> String {
match v {
Visibility::Public(_) => "pub".into(),
Visibility::Restricted(r) => format!("pub({})", path_to_string(&r.path)),
Visibility::Inherited => "priv".into(),
}
}
fn path_to_string(p: &syn::Path) -> String {
p.segments.iter().map(|s| s.ident.to_string()).collect::<Vec<_>>().join("::")
}
fn tokens_to_string<T: quote::ToTokens>(t: &T) -> String {
let mut ts = proc_macro2::TokenStream::new();
t.to_tokens(&mut ts);
ts.to_string()
}
fn fn_inputs(inputs: &syn::punctuated::Punctuated<syn::FnArg, syn::Token![,]>) -> String {
let parts: Vec<String> = inputs.iter().map(|a| match a {
syn::FnArg::Receiver(_) => "self".into(),
syn::FnArg::Typed(t) => tokens_to_string(&t.ty),
}).collect();
format!("({})", parts.join(", "))
}
fn extract_cfgs(attrs: &[Attribute]) -> Vec<String> {
let mut out = Vec::new();
for a in attrs {
if a.path().is_ident("cfg") {
if let Meta::List(l) = &a.meta {
out.push(l.tokens.to_string());
}
}
}
out
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn scans_basic_crate() {
let dir = tempfile::tempdir().unwrap();
let root = dir.path();
std::fs::create_dir_all(root.join("src")).unwrap();
std::fs::write(root.join("Cargo.toml"), r#"[package]
name = "demo"
version = "0.1.0"
edition = "2021"
"#).unwrap();
std::fs::write(root.join("src/lib.rs"), r#"
pub fn outer(x: u32) -> u32 {
helper(x) + 1
}
fn helper(x: u32) -> u32 { x.saturating_add(2) }
pub struct Foo { pub n: u32 }
impl Foo {
pub fn make() -> Self { Self { n: 0 } }
}
pub trait Bar { fn baz(&self); }
#[cfg(feature = "experimental")]
pub fn gated() {}
"#).unwrap();
let scan = scan_repo(root, "demo", Uuid::new_v4(), Utc::now()).unwrap();
let kinds: Vec<_> = scan.symbols.iter().map(|s| (s.item_kind.as_str(), s.item_name.as_str())).collect();
assert!(kinds.contains(&("fn", "outer")), "{kinds:?}");
assert!(kinds.contains(&("fn", "helper")));
assert!(kinds.contains(&("struct", "Foo")));
assert!(kinds.iter().any(|(k, _)| *k == "impl"));
assert!(kinds.contains(&("fn", "make")));
assert!(kinds.contains(&("trait", "Bar")));
assert!(kinds.contains(&("fn", "gated")));
let callees: Vec<&str> = scan.calls.iter().map(|c| c.callee_ident.as_str()).collect();
assert!(callees.contains(&"helper"), "{callees:?}");
assert!(callees.contains(&"saturating_add"));
assert!(scan.features.iter().any(|f| f.item_name == "gated" && f.cfg_expr.contains("experimental")));
}
#[test]
fn captures_real_source_lines() {
let dir = tempfile::tempdir().unwrap();
let root = dir.path();
std::fs::create_dir_all(root.join("src")).unwrap();
std::fs::write(root.join("Cargo.toml"), "[package]\nname = \"demo\"\nversion = \"0.1.0\"\nedition = \"2021\"\n").unwrap();
std::fs::write(root.join("src/lib.rs"), r#"
pub fn outer(x: u32) -> u32 {
helper(x) + 1
}
fn helper(x: u32) -> u32 { x.saturating_add(2) }
pub struct Foo { pub n: u32 }
#[cfg(feature = "experimental")]
pub fn gated() {}
"#).unwrap();
let scan = scan_repo(root, "demo", Uuid::new_v4(), Utc::now()).unwrap();
let line_of = |name: &str| scan.symbols.iter().find(|s| s.item_name == name).map(|s| s.line);
assert_eq!(line_of("outer"), Some(2));
assert_eq!(line_of("helper"), Some(5));
assert_eq!(line_of("Foo"), Some(7));
assert_eq!(line_of("gated"), Some(10));
assert!(scan.symbols.iter().all(|s| s.line > 0), "{:?}",
scan.symbols.iter().map(|s| (&s.item_name, s.line)).collect::<Vec<_>>());
let helper_call = scan.calls.iter().find(|c| c.callee_ident == "helper").unwrap();
assert_eq!(helper_call.line, 3);
assert!(scan.calls.iter().all(|c| c.line > 0));
let gate = scan.features.iter().find(|f| f.item_name == "gated").unwrap();
assert_eq!(gate.line, 10);
}
#[test]
fn scans_test_inventory_with_heavy_and_async_flags() {
let dir = tempfile::tempdir().unwrap();
let root = dir.path();
std::fs::create_dir_all(root.join("src")).unwrap();
std::fs::write(
root.join("Cargo.toml"),
"[package]\nname = \"demo\"\nversion = \"0.1.0\"\nedition = \"2021\"\n",
)
.unwrap();
std::fs::write(
root.join("src/lib.rs"),
r#"
pub fn not_a_test() {}
#[cfg(test)]
mod tests {
#[test]
fn light_one() { assert_eq!(1, 1); }
#[tokio::test]
async fn async_one() { assert!(true); }
#[test]
#[ignore]
fn heavy_corpus() { assert!(true); }
#[test_case(1 ; "case a")]
fn macro_cased(x: u32) { let _ = x; }
}
"#,
)
.unwrap();
let scan = scan_repo(root, "demo", Uuid::new_v4(), Utc::now()).unwrap();
let by_name = |n: &str| scan.tests.iter().find(|t| t.test_name == n);
let names: Vec<&str> = scan.tests.iter().map(|t| t.test_name.as_str()).collect();
assert!(names.contains(&"light_one"), "{names:?}");
assert!(names.contains(&"async_one"));
assert!(names.contains(&"heavy_corpus"));
assert!(names.contains(&"macro_cased"));
assert!(!names.contains(&"not_a_test"), "plain fn must not be a test row");
assert!(by_name("heavy_corpus").unwrap().is_heavy, "ignore → heavy");
assert!(!by_name("light_one").unwrap().is_heavy);
assert!(by_name("async_one").unwrap().is_async, "tokio::test → async");
assert!(!by_name("light_one").unwrap().is_async);
let light = by_name("light_one").unwrap();
assert!(light.module_path.contains("tests"), "module path: {}", light.module_path);
assert!(light.line > 0);
assert_eq!(light.crate_name, "demo");
}
}