use anyhow::Result;
use petgraph::prelude::*;
use serde::Serialize;
use std::collections::{HashMap, HashSet};
use std::path::{Path, PathBuf};
use std::process::Command as ProcessCommand;
use super::*;
#[derive(Parser, Debug, Clone)]
pub(crate) struct FilesArgs {
#[arg(default_value = ".")]
pub path: String,
#[arg(long, value_enum)]
pub ecosystem: Option<Ecosystem>,
#[arg(long, default_value_t = false)]
pub include_tests: bool,
#[arg(long, default_value_t = false)]
pub include_all: bool,
#[arg(short, long, value_enum, default_value_t = Metric::Pagerank)]
pub metric: Metric,
#[arg(short = 'n', long, default_value_t = 25)]
pub top: usize,
#[arg(long, value_enum, default_value_t = OutputFormat::Text)]
pub format: OutputFormat,
#[arg(long, default_value_t = false)]
pub directory: bool,
#[arg(long)]
pub focus: Option<String>,
#[arg(long, default_value_t = false)]
pub git: bool,
#[arg(long, default_value_t = false)]
pub cache: bool,
#[arg(long, default_value_t = 90)]
pub git_days: u64,
#[arg(long, default_value_t = true, action = clap::ArgAction::Set)]
pub store: bool,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub(crate) enum FileRole {
LibRoot,
BinEntry,
Source,
Test,
Bench,
Example,
Build,
}
fn classify_rust_file(path: &Path, project_root: &Path) -> FileRole {
let rel = path.strip_prefix(project_root).unwrap_or(path);
let rel_str = rel.to_string_lossy();
if rel_str == "build.rs" {
return FileRole::Build;
}
if rel_str.starts_with("tests/") || rel_str.starts_with("tests\\") {
return FileRole::Test;
}
if rel_str.starts_with("benches/") || rel_str.starts_with("benches\\") {
return FileRole::Bench;
}
if rel_str.starts_with("examples/") || rel_str.starts_with("examples\\") {
return FileRole::Example;
}
if rel_str == "src/main.rs" || rel_str.starts_with("src/bin/") {
return FileRole::BinEntry;
}
if rel_str == "src/lib.rs" {
return FileRole::LibRoot;
}
FileRole::Source
}
fn classify_python_file(path: &Path, project_root: &Path) -> FileRole {
let rel = path.strip_prefix(project_root).unwrap_or(path);
let rel_str = rel.to_string_lossy();
let file_name = path.file_name().and_then(|n| n.to_str()).unwrap_or("");
if rel_str.starts_with("tests/")
|| rel_str.starts_with("test/")
|| file_name.starts_with("test_")
|| file_name.ends_with("_test.py")
|| file_name == "conftest.py"
{
return FileRole::Test;
}
if rel_str.starts_with("benchmarks/") || rel_str.starts_with("bench/") {
return FileRole::Bench;
}
if rel_str.starts_with("examples/") {
return FileRole::Example;
}
if file_name == "setup.py" || file_name == "setup.cfg" {
return FileRole::Build;
}
if file_name == "__main__.py" {
return FileRole::BinEntry;
}
if file_name == "__init__.py" {
return FileRole::LibRoot;
}
FileRole::Source
}
fn classify_js_file(path: &Path, project_root: &Path) -> FileRole {
let rel = path.strip_prefix(project_root).unwrap_or(path);
let rel_str = rel.to_string_lossy();
let file_name = path.file_name().and_then(|n| n.to_str()).unwrap_or("");
if file_name.contains(".test.")
|| file_name.contains(".spec.")
|| rel_str.starts_with("__tests__/")
|| rel_str.contains("/__tests__/")
|| rel_str.starts_with("tests/")
|| rel_str.starts_with("test/")
{
return FileRole::Test;
}
if rel_str.starts_with("examples/") || rel_str.starts_with("example/") {
return FileRole::Example;
}
if file_name.contains("config.") || file_name.starts_with("webpack.") {
return FileRole::Build;
}
if file_name == "index.ts"
|| file_name == "index.js"
|| file_name == "index.tsx"
|| file_name == "index.jsx"
{
return FileRole::LibRoot;
}
FileRole::Source
}
fn classify_go_file(path: &Path, project_root: &Path) -> FileRole {
let rel = path.strip_prefix(project_root).unwrap_or(path);
let rel_str = rel.to_string_lossy();
let file_name = path.file_name().and_then(|n| n.to_str()).unwrap_or("");
if file_name.ends_with("_bench_test.go") {
return FileRole::Bench;
}
if file_name.ends_with("_test.go") {
return FileRole::Test;
}
if rel_str.starts_with("cmd/") && file_name == "main.go" {
return FileRole::BinEntry;
}
if file_name == "main.go" {
return FileRole::BinEntry;
}
if rel_str.starts_with("examples/") || rel_str.starts_with("example/") {
return FileRole::Example;
}
FileRole::Source
}
fn classify_file(path: &Path, project_root: &Path, ecosystem: Ecosystem) -> FileRole {
match ecosystem {
Ecosystem::Cargo => classify_rust_file(path, project_root),
Ecosystem::Python => classify_python_file(path, project_root),
Ecosystem::Npm => classify_js_file(path, project_root),
Ecosystem::Go => classify_go_file(path, project_root),
}
}
fn should_include(role: FileRole, args: &FilesArgs) -> bool {
match role {
FileRole::LibRoot | FileRole::BinEntry | FileRole::Source => true,
FileRole::Test => args.include_tests || args.include_all,
FileRole::Bench | FileRole::Example | FileRole::Build => args.include_all,
}
}
#[allow(dead_code)]
pub(crate) fn detect_ecosystem(dir: &Path) -> Option<Ecosystem> {
detect_all_ecosystems(dir).into_iter().next()
}
pub(crate) fn detect_all_ecosystems(dir: &Path) -> Vec<Ecosystem> {
let mut ecosystems = Vec::new();
if dir.join("Cargo.toml").exists() {
ecosystems.push(Ecosystem::Cargo);
}
if dir.join("go.mod").exists() {
ecosystems.push(Ecosystem::Go);
}
if dir.join("pyproject.toml").exists()
|| dir.join("uv.lock").exists()
|| dir.join("setup.py").exists()
{
ecosystems.push(Ecosystem::Python);
}
if dir.join("package.json").exists()
|| dir.join("package-lock.json").exists()
|| dir.join("deno.json").exists()
|| dir.join("deno.jsonc").exists()
|| dir.join("import_map.json").exists()
{
ecosystems.push(Ecosystem::Npm);
}
ecosystems
}
fn discover_files(root: &Path, ecosystem: Ecosystem) -> Vec<PathBuf> {
let extensions: &[&str] = match ecosystem {
Ecosystem::Cargo => &["rs"],
Ecosystem::Python => &["py"],
Ecosystem::Npm => &["ts", "tsx", "js", "jsx", "mjs"],
Ecosystem::Go => &["go"],
};
let mut files = Vec::new();
walk_dir(root, extensions, &mut files);
files.sort();
files
}
fn walk_dir(dir: &Path, extensions: &[&str], out: &mut Vec<PathBuf>) {
let entries = match std::fs::read_dir(dir) {
Ok(e) => e,
Err(_) => return,
};
for entry in entries.flatten() {
let path = entry.path();
if path.is_dir() {
let name = path.file_name().and_then(|n| n.to_str()).unwrap_or("");
if matches!(
name,
"target"
| "node_modules"
| ".git"
| "__pycache__"
| ".mypy_cache"
| ".pytest_cache"
| "dist"
| "build"
| ".next"
| ".vercel"
| ".nuxt"
| ".svelte-kit"
| ".angular"
| "coverage"
| "vendor"
| ".venv"
| "venv"
| ".tox"
| "archive"
) {
continue;
}
walk_dir(&path, extensions, out);
} else if let Some(ext) = path.extension().and_then(|e| e.to_str()) {
if extensions.contains(&ext) {
out.push(path);
}
}
}
}
#[derive(Debug, Clone)]
struct FileEdge {
from: PathBuf,
to: PathBuf,
}
fn extract_external_deps(
files: &[PathBuf],
ecosystem: Ecosystem,
internal_prefixes: &HashSet<String>,
) -> HashMap<PathBuf, Vec<String>> {
let mut result: HashMap<PathBuf, Vec<String>> = HashMap::new();
let std_prefixes: HashSet<&str> = match ecosystem {
Ecosystem::Cargo => ["std", "core", "alloc", "proc_macro", "test"]
.iter()
.copied()
.collect(),
Ecosystem::Python => {
HashSet::new()
}
Ecosystem::Npm | Ecosystem::Go => HashSet::new(),
};
for file in files {
let content = match std::fs::read_to_string(file) {
Ok(c) => c,
Err(_) => continue,
};
let mut deps: HashSet<String> = HashSet::new();
for line in content.lines() {
let line = line.trim();
match ecosystem {
Ecosystem::Cargo => {
let use_part = strip_visibility(line)
.strip_prefix("use ")
.or_else(|| line.strip_prefix("use "));
if let Some(use_part) = use_part {
let use_part = use_part.trim_end_matches(';').trim();
if use_part.starts_with("crate::")
|| use_part.starts_with("super::")
|| use_part.starts_with("self::")
{
continue;
}
let first_seg = use_part.split("::").next().unwrap_or("");
if !first_seg.is_empty()
&& !internal_prefixes.contains(first_seg)
&& !std_prefixes.contains(first_seg)
{
deps.insert(first_seg.to_string());
}
}
}
Ecosystem::Python => {
if let Some(rest) = line.strip_prefix("import ") {
let mod_name = rest.split(' ').next().unwrap_or(rest);
let top = mod_name.split('.').next().unwrap_or(mod_name);
if !top.is_empty()
&& !top.starts_with('.')
&& !internal_prefixes.contains(top)
{
deps.insert(top.to_string());
}
} else if let Some(rest) = line.strip_prefix("from ") {
if let Some((mod_part, _)) = rest.split_once(" import ") {
let mod_part = mod_part.trim();
if !mod_part.starts_with('.') {
let top = mod_part.split('.').next().unwrap_or(mod_part);
if !internal_prefixes.contains(top) {
deps.insert(top.to_string());
}
}
}
}
}
Ecosystem::Npm => {
for spec in extract_js_import_specifiers(line) {
if !spec.starts_with('.') && !spec.starts_with('@') {
let pkg = spec.split('/').next().unwrap_or(&spec);
deps.insert(pkg.to_string());
} else if spec.starts_with("@") {
let parts: Vec<&str> = spec.splitn(3, '/').collect();
if parts.len() >= 2 {
deps.insert(format!("{}/{}", parts[0], parts[1]));
}
}
}
}
Ecosystem::Go => {
if let Some(import_path) = extract_go_import(line) {
if !internal_prefixes
.iter()
.any(|p| import_path.starts_with(p.as_str()))
{
let parts: Vec<&str> = import_path.splitn(4, '/').collect();
if parts.len() >= 3 {
deps.insert(format!("{}/{}/{}", parts[0], parts[1], parts[2]));
}
}
}
}
}
}
if !deps.is_empty() {
let mut sorted: Vec<String> = deps.into_iter().collect();
sorted.sort();
result.insert(file.clone(), sorted);
}
}
result
}
fn parse_rust_imports(root: &Path, files: &[PathBuf]) -> Vec<FileEdge> {
let crate_roots = find_rust_crate_roots(root);
let mut mod_to_file: HashMap<String, PathBuf> = HashMap::new();
let mut file_to_mod: HashMap<PathBuf, String> = HashMap::new();
let mut file_to_crate: HashMap<PathBuf, String> = HashMap::new();
let mut known_crates: HashSet<String> = HashSet::new();
for (crate_dir, crate_name) in &crate_roots {
known_crates.insert(crate_name.clone());
let src_dir = crate_dir.join("src");
for file in files {
if !file.starts_with(&src_dir) {
continue;
}
if let Some(mod_path) = rust_file_to_mod_path(file, &src_dir, crate_name) {
mod_to_file.insert(mod_path.clone(), file.clone());
file_to_mod.insert(file.clone(), mod_path);
file_to_crate.insert(file.clone(), crate_name.clone());
}
}
}
let mut edges = Vec::new();
for file in files {
let content = match std::fs::read_to_string(file) {
Ok(c) => c,
Err(_) => continue,
};
let this_mod = file_to_mod.get(file).cloned().unwrap_or_default();
let crate_name = file_to_crate
.get(file)
.cloned()
.unwrap_or_else(|| "crate".to_string());
let logical_lines = join_rust_logical_lines(&content);
for line in &logical_lines {
let line = line.trim();
if let Some(mod_name) = parse_mod_declaration(line) {
let child_mod = format!("{}::{}", this_mod, mod_name);
if let Some(target) = mod_to_file.get(&child_mod) {
if target != file {
edges.push(FileEdge {
from: file.clone(),
to: target.clone(),
});
}
}
}
if let Some(targets) =
parse_use_statement(line, &this_mod, &crate_name, &known_crates, &mod_to_file)
{
for target in targets {
if target != *file {
edges.push(FileEdge {
from: file.clone(),
to: target,
});
}
}
}
}
}
edges
}
fn join_rust_logical_lines(content: &str) -> Vec<String> {
let mut result = Vec::new();
let mut accum = String::new();
let mut in_use = false;
let mut brace_depth: i32 = 0;
for line in content.lines() {
let trimmed = line.trim();
if in_use {
accum.push(' ');
accum.push_str(trimmed);
brace_depth += trimmed.chars().filter(|c| *c == '{').count() as i32;
brace_depth -= trimmed.chars().filter(|c| *c == '}').count() as i32;
if brace_depth <= 0 && trimmed.ends_with(';') {
result.push(std::mem::take(&mut accum));
in_use = false;
brace_depth = 0;
}
continue;
}
let stripped = strip_visibility(trimmed);
let is_use_start = stripped.starts_with("use ") || trimmed.starts_with("use ");
if is_use_start && !trimmed.ends_with(';') {
in_use = true;
brace_depth = trimmed.chars().filter(|c| *c == '{').count() as i32;
brace_depth -= trimmed.chars().filter(|c| *c == '}').count() as i32;
accum = trimmed.to_string();
continue;
}
result.push(trimmed.to_string());
}
if !accum.is_empty() {
result.push(accum);
}
result
}
fn join_paren_lines(content: &str) -> Vec<String> {
let mut result = Vec::new();
let mut accum = String::new();
let mut depth: i32 = 0;
for line in content.lines() {
let trimmed = line.trim();
if depth > 0 {
accum.push(' ');
accum.push_str(trimmed);
depth += trimmed.chars().filter(|c| *c == '(').count() as i32;
depth -= trimmed.chars().filter(|c| *c == ')').count() as i32;
if depth <= 0 {
result.push(std::mem::take(&mut accum));
depth = 0;
}
continue;
}
let opens = trimmed.chars().filter(|c| *c == '(').count() as i32;
let closes = trimmed.chars().filter(|c| *c == ')').count() as i32;
if opens > closes {
depth = opens - closes;
accum = trimmed.to_string();
continue;
}
result.push(trimmed.to_string());
}
if !accum.is_empty() {
result.push(accum);
}
result
}
fn find_rust_crate_roots(root: &Path) -> Vec<(PathBuf, String)> {
let mut roots = Vec::new();
let root_cargo = root.join("Cargo.toml");
if root_cargo.exists() {
let src = root.join("src");
if src.is_dir() {
roots.push((root.to_path_buf(), read_rust_crate_name(root)));
}
}
walk_for_cargo_tomls(root, &mut roots, 0);
roots.sort_by(|a, b| a.0.cmp(&b.0));
roots.dedup_by(|a, b| a.0 == b.0);
roots
}
fn walk_for_cargo_tomls(dir: &Path, roots: &mut Vec<(PathBuf, String)>, depth: usize) {
if depth > 5 {
return;
}
let entries = match std::fs::read_dir(dir) {
Ok(e) => e,
Err(_) => return,
};
for entry in entries.flatten() {
let path = entry.path();
if path.is_dir() {
let name = path.file_name().and_then(|n| n.to_str()).unwrap_or("");
if matches!(name, "target" | ".git" | "node_modules" | "vendor") {
continue;
}
let cargo_toml = path.join("Cargo.toml");
let src_dir = path.join("src");
if cargo_toml.exists() && src_dir.is_dir() {
roots.push((path.clone(), read_rust_crate_name(&path)));
}
walk_for_cargo_tomls(&path, roots, depth + 1);
}
}
}
fn read_rust_crate_name(root: &Path) -> String {
let cargo_toml = root.join("Cargo.toml");
if let Ok(raw) = std::fs::read_to_string(&cargo_toml) {
if let Ok(val) = raw.parse::<toml::Value>() {
if let Some(name) = val
.get("lib")
.and_then(|l| l.get("name"))
.and_then(|n| n.as_str())
{
return name.replace('-', "_");
}
if let Some(name) = val
.get("package")
.and_then(|p| p.get("name"))
.and_then(|n| n.as_str())
{
return name.replace('-', "_");
}
}
}
root.file_name()
.and_then(|n| n.to_str())
.unwrap_or("crate")
.replace('-', "_")
}
fn rust_file_to_mod_path(file: &Path, src_dir: &Path, crate_name: &str) -> Option<String> {
let rel = file.strip_prefix(src_dir).ok()?;
let rel_str = rel.to_string_lossy().replace('\\', "/");
if rel_str == "lib.rs" || rel_str == "main.rs" {
return Some(crate_name.to_string());
}
let rel_str = if rel_str.ends_with("/mod.rs") {
rel_str.trim_end_matches("/mod.rs").to_string()
} else {
rel_str.trim_end_matches(".rs").to_string()
};
if rel_str.starts_with("bin/") {
return Some(format!("bin::{}", rel_str.trim_start_matches("bin/")));
}
let mod_path = rel_str.replace('/', "::");
Some(format!("{}::{}", crate_name, mod_path))
}
fn parse_mod_declaration(line: &str) -> Option<String> {
let line = line.trim();
if !line.ends_with(';') {
return None;
}
let stripped = strip_visibility(line);
let stripped = stripped.trim();
if let Some(rest) = stripped.strip_prefix("mod ") {
let name = rest.trim_end_matches(';').trim();
if name.chars().all(|c| c.is_alphanumeric() || c == '_') && !name.is_empty() {
return Some(name.to_string());
}
}
None
}
fn strip_visibility(line: &str) -> &str {
if let Some(rest) = line.strip_prefix("pub(crate) ") {
return rest;
}
if let Some(rest) = line.strip_prefix("pub(super) ") {
return rest;
}
if let Some(rest) = line.strip_prefix("pub(self) ") {
return rest;
}
if line.starts_with("pub(") {
if let Some(close) = line.find(") ") {
return &line[close + 2..];
}
}
if let Some(rest) = line.strip_prefix("pub ") {
return rest;
}
line
}
fn parse_use_statement(
line: &str,
current_mod: &str,
crate_name: &str,
known_crates: &HashSet<String>,
mod_to_file: &HashMap<String, PathBuf>,
) -> Option<Vec<PathBuf>> {
let line = line.trim();
let use_part = if let Some(rest) = strip_visibility(line).strip_prefix("use ") {
rest
} else if let Some(rest) = line.strip_prefix("use ") {
rest
} else {
return None;
};
let use_part = use_part.trim_end_matches(';').trim();
let (base_mod, _rest) = if use_part.starts_with("crate::") {
let resolved = format!("{}{}", crate_name, &use_part["crate".len()..]);
(resolved, "")
} else if use_part.starts_with("super::") {
let mut base = current_mod.to_string();
let mut rest = use_part;
while let Some(after) = rest.strip_prefix("super::") {
base = base
.rsplit_once("::")
.map(|(p, _)| p.to_string())
.unwrap_or_default();
rest = after;
}
let resolved = if rest.is_empty() || base.is_empty() {
if base.is_empty() {
rest.to_string()
} else {
base
}
} else {
format!("{}::{}", base, rest)
};
(resolved, "")
} else if use_part.starts_with("self::") {
let relative = use_part.strip_prefix("self::").unwrap_or(use_part);
(format!("{}::{}", current_mod, relative), "")
} else {
let first_seg = use_part.split("::").next().unwrap_or("");
if known_crates.contains(first_seg) {
(use_part.to_string(), "")
} else {
let sibling_mod = format!("{}::{}", current_mod, first_seg);
let sibling_exists = mod_to_file.contains_key(&sibling_mod)
|| mod_to_file
.keys()
.any(|k| k.starts_with(&format!("{}::", sibling_mod)));
if sibling_exists {
(format!("{}::{}", current_mod, use_part), "")
} else {
return None;
}
}
};
let mut targets = Vec::new();
resolve_use_path(&base_mod, mod_to_file, &mut targets);
if targets.is_empty() {
None
} else {
Some(targets)
}
}
fn resolve_use_path(path: &str, mod_to_file: &HashMap<String, PathBuf>, out: &mut Vec<PathBuf>) {
if let Some(brace_start) = path.find('{') {
let prefix = &path[..brace_start];
let rest = &path[brace_start + 1..];
let rest = rest.trim_end_matches('}');
for item in rest.split(',') {
let item = item.trim();
if item.is_empty() {
continue;
}
let full = format!("{}{}", prefix, item);
resolve_use_path(&full, mod_to_file, out);
}
return;
}
let mut path_str = path.to_string();
loop {
if let Some(file) = mod_to_file.get(&path_str) {
out.push(file.clone());
return;
}
match path_str.rsplit_once("::") {
Some((parent, _)) => path_str = parent.to_string(),
None => break,
}
}
}
fn parse_python_imports(root: &Path, files: &[PathBuf]) -> Vec<FileEdge> {
let packages = detect_python_packages(root);
let mut mod_to_file: HashMap<String, PathBuf> = HashMap::new();
let mut file_to_mod: HashMap<PathBuf, String> = HashMap::new();
for (pkg_name, pkg_dir) in &packages {
for file in files {
if !file.starts_with(pkg_dir) {
continue;
}
if let Some(mod_path) = python_file_to_mod_path(file, pkg_dir, pkg_name) {
mod_to_file.insert(mod_path.clone(), file.clone());
file_to_mod.insert(file.clone(), mod_path);
}
}
}
if let Some((pkg_name, pkg_dir)) = packages.first() {
for file in files {
if !file_to_mod.contains_key(file) {
if let Some(mod_path) = python_file_to_mod_path(file, pkg_dir, pkg_name) {
mod_to_file.insert(mod_path.clone(), file.clone());
file_to_mod.insert(file.clone(), mod_path);
}
}
}
}
let pkg_name = packages
.first()
.map(|(n, _)| n.clone())
.unwrap_or_else(|| "pkg".to_string());
let mut edges = Vec::new();
for file in files {
let content = match std::fs::read_to_string(file) {
Ok(c) => c,
Err(_) => continue,
};
let this_mod = file_to_mod.get(file).cloned().unwrap_or_default();
let logical_lines = join_paren_lines(&content);
for line in &logical_lines {
let line = line.trim();
if let Some(targets) =
parse_python_from_import(line, &this_mod, &pkg_name, &mod_to_file)
{
for target in targets {
if target != *file {
edges.push(FileEdge {
from: file.clone(),
to: target,
});
}
}
continue;
}
if let Some(targets) = parse_python_import(line, &mod_to_file) {
for target in targets {
if target != *file {
edges.push(FileEdge {
from: file.clone(),
to: target,
});
}
}
}
}
}
edges
}
fn detect_python_packages(root: &Path) -> Vec<(String, PathBuf)> {
let mut packages = Vec::new();
let src = root.join("src");
if src.is_dir() {
if let Ok(entries) = std::fs::read_dir(&src) {
for entry in entries.flatten() {
let p = entry.path();
if p.is_dir() && p.join("__init__.py").exists() {
let name = p
.file_name()
.and_then(|n| n.to_str())
.unwrap_or("pkg")
.to_string();
packages.push((name, p));
}
}
}
}
if let Ok(entries) = std::fs::read_dir(root) {
for entry in entries.flatten() {
let p = entry.path();
if p.is_dir() && p.join("__init__.py").exists() {
let name = p
.file_name()
.and_then(|n| n.to_str())
.unwrap_or("pkg")
.to_string();
if !matches!(
name.as_str(),
"tests" | "test" | "docs" | "examples" | "benchmarks"
) && !packages.iter().any(|(n, _)| n == &name)
{
packages.push((name, p));
}
}
}
}
if packages.is_empty() {
if let Some(project_name) = read_pyproject_name(root) {
let normalized = project_name.replace('-', "_");
for candidate in [root.join("src").join(&normalized), root.join(&normalized)] {
if candidate.is_dir() {
packages.push((normalized.clone(), candidate));
break;
}
}
}
}
if packages.is_empty() {
let name = root
.file_name()
.and_then(|n| n.to_str())
.unwrap_or("pkg")
.to_string();
packages.push((name, root.to_path_buf()));
}
packages
}
fn read_pyproject_name(root: &Path) -> Option<String> {
let pyproject = root.join("pyproject.toml");
let raw = std::fs::read_to_string(&pyproject).ok()?;
let val: toml::Value = toml::from_str(&raw).ok()?;
val.get("project")
.and_then(|p| p.get("name"))
.and_then(|n| n.as_str())
.map(|s| s.to_string())
}
fn detect_python_package(root: &Path) -> (String, PathBuf) {
detect_python_packages(root)
.into_iter()
.next()
.unwrap_or_else(|| {
let name = root
.file_name()
.and_then(|n| n.to_str())
.unwrap_or("pkg")
.to_string();
(name, root.to_path_buf())
})
}
fn python_file_to_mod_path(file: &Path, pkg_dir: &Path, pkg_name: &str) -> Option<String> {
let rel = file.strip_prefix(pkg_dir).ok()?;
let rel_str = rel.to_string_lossy().replace('\\', "/");
if rel_str == "__init__.py" {
return Some(pkg_name.to_string());
}
let rel_str = if rel_str.ends_with("/__init__.py") {
rel_str.trim_end_matches("/__init__.py").to_string()
} else {
rel_str.trim_end_matches(".py").to_string()
};
let mod_path = rel_str.replace('/', ".");
Some(format!("{}.{}", pkg_name, mod_path))
}
fn parse_python_from_import(
line: &str,
current_mod: &str,
pkg_name: &str,
mod_to_file: &HashMap<String, PathBuf>,
) -> Option<Vec<PathBuf>> {
let rest = line.strip_prefix("from ")?;
let (module_part, import_names) = rest.split_once(" import ")?;
let module_part = module_part.trim();
let resolved = if module_part.starts_with('.') {
let dots = module_part.chars().take_while(|c| *c == '.').count();
let relative = &module_part[dots..];
let mut base = current_mod.to_string();
for _ in 0..dots {
base = base
.rsplit_once('.')
.map(|(p, _)| p.to_string())
.unwrap_or_default();
}
if relative.is_empty() {
base
} else {
format!("{}.{}", base, relative)
}
} else if module_part.starts_with(pkg_name) {
module_part.to_string()
} else {
return None;
};
let mut targets = Vec::new();
resolve_python_path(&resolved, mod_to_file, &mut targets);
for name in import_names.split(',') {
let name = name.trim().split(" as ").next().unwrap_or("").trim();
if name.is_empty() || name == "*" {
continue;
}
let submod = format!("{}.{}", resolved, name);
resolve_python_path(&submod, mod_to_file, &mut targets);
}
if targets.is_empty() {
None
} else {
Some(targets)
}
}
fn parse_python_import(line: &str, mod_to_file: &HashMap<String, PathBuf>) -> Option<Vec<PathBuf>> {
let rest = line.strip_prefix("import ")?;
if line.starts_with("from ") {
return None;
}
let mut targets = Vec::new();
for part in rest.split(',') {
let part = part.trim();
let mod_path = part.split(" as ").next().unwrap_or(part).trim();
resolve_python_path(mod_path, mod_to_file, &mut targets);
}
if targets.is_empty() {
None
} else {
Some(targets)
}
}
fn resolve_python_path(path: &str, mod_to_file: &HashMap<String, PathBuf>, out: &mut Vec<PathBuf>) {
let mut path_str = path.to_string();
loop {
if let Some(file) = mod_to_file.get(&path_str) {
out.push(file.clone());
return;
}
match path_str.rsplit_once('.') {
Some((parent, _)) => path_str = parent.to_string(),
None => break,
}
}
}
fn parse_js_imports(root: &Path, files: &[PathBuf]) -> Vec<FileEdge> {
let mut edges = Vec::new();
let file_set: HashSet<PathBuf> = files.iter().cloned().collect();
let path_aliases = read_tsconfig_paths(root);
for file in files {
let content = match std::fs::read_to_string(file) {
Ok(c) => c,
Err(_) => continue,
};
let dir = file.parent().unwrap_or(root);
for line in content.lines() {
let line = line.trim();
for spec in extract_js_import_specifiers(line) {
let resolved = if spec.starts_with('.') {
resolve_js_import(dir, &spec, &file_set)
} else if let Some(abs_path) = resolve_alias_to_path(&spec, &path_aliases) {
let parent = abs_path.parent().unwrap_or(root).to_path_buf();
let fname = abs_path
.file_name()
.unwrap_or_default()
.to_string_lossy()
.to_string();
resolve_js_import(&parent, &format!("./{}", fname), &file_set)
} else {
continue;
};
if let Some(resolved) = resolved {
if resolved != *file {
edges.push(FileEdge {
from: file.clone(),
to: resolved,
});
}
}
}
}
}
edges
}
fn read_tsconfig_paths(root: &Path) -> Vec<(String, PathBuf)> {
let mut aliases = Vec::new();
let candidates = ["tsconfig.json", "jsconfig.json"];
for name in &candidates {
let path = root.join(name);
if let Ok(raw) = std::fs::read_to_string(&path) {
let cleaned: String = raw
.lines()
.map(|l| {
if let Some(pos) = l.find("//") {
&l[..pos]
} else {
l
}
})
.collect::<Vec<_>>()
.join("\n");
if let Ok(val) = serde_json::from_str::<serde_json::Value>(&cleaned) {
aliases = extract_path_aliases(&val, root);
break;
}
}
}
discover_workspace_packages(root, &mut aliases);
if aliases.is_empty() {
aliases.push(("@/".to_string(), root.to_path_buf()));
}
aliases
}
fn extract_path_aliases(tsconfig: &serde_json::Value, root: &Path) -> Vec<(String, PathBuf)> {
let mut aliases = Vec::new();
let base_url = tsconfig
.get("compilerOptions")
.and_then(|c| c.get("baseUrl"))
.and_then(|b| b.as_str())
.unwrap_or(".");
let base_dir = root.join(base_url);
if let Some(paths) = tsconfig
.get("compilerOptions")
.and_then(|c| c.get("paths"))
.and_then(|p| p.as_object())
{
for (pattern, targets) in paths {
let prefix = pattern.trim_end_matches('*');
if let Some(first_target) = targets.as_array().and_then(|a| a.first()) {
if let Some(target_str) = first_target.as_str() {
let target_path = target_str.trim_end_matches('*');
let resolved = base_dir.join(target_path);
aliases.push((prefix.to_string(), resolved));
}
}
}
}
discover_workspace_packages(root, &mut aliases);
aliases
}
fn discover_workspace_packages(root: &Path, aliases: &mut Vec<(String, PathBuf)>) {
let root_pkg = root.join("package.json");
if !root_pkg.exists() {
return;
}
let raw = match std::fs::read_to_string(&root_pkg) {
Ok(r) => r,
Err(_) => return,
};
let val: serde_json::Value = match serde_json::from_str(&raw) {
Ok(v) => v,
Err(_) => return,
};
let mut search_dirs: Vec<PathBuf> = Vec::new();
if let Some(workspaces) = val.get("workspaces") {
let patterns = if let Some(arr) = workspaces.as_array() {
arr.iter()
.filter_map(|v| v.as_str())
.map(|s| s.to_string())
.collect::<Vec<_>>()
} else if let Some(obj) = workspaces.as_object() {
obj.get("packages")
.and_then(|p| p.as_array())
.map(|arr| {
arr.iter()
.filter_map(|v| v.as_str())
.map(|s| s.to_string())
.collect::<Vec<_>>()
})
.unwrap_or_default()
} else {
Vec::new()
};
for pattern in patterns {
let pattern = pattern.trim_end_matches("/*").trim_end_matches("/**");
let dir = root.join(pattern);
if dir.is_dir() {
search_dirs.push(dir);
}
}
}
if search_dirs.is_empty() {
for dir_name in ["packages", "apps", "libs", "modules"] {
let d = root.join(dir_name);
if d.is_dir() {
search_dirs.push(d);
}
}
}
for dir in &search_dirs {
let entries = match std::fs::read_dir(dir) {
Ok(e) => e,
Err(_) => continue,
};
for entry in entries.flatten() {
let pkg_json = entry.path().join("package.json");
if !pkg_json.exists() {
continue;
}
let raw = match std::fs::read_to_string(&pkg_json) {
Ok(r) => r,
Err(_) => continue,
};
let pkg_val: serde_json::Value = match serde_json::from_str(&raw) {
Ok(v) => v,
Err(_) => continue,
};
if let Some(name) = pkg_val.get("name").and_then(|n| n.as_str()) {
let pkg_dir = entry.path();
let src_dir = if pkg_dir.join("src").is_dir() {
pkg_dir.join("src")
} else {
pkg_dir.clone()
};
aliases.push((format!("{}/", name), src_dir));
}
}
}
}
fn resolve_alias_to_path(spec: &str, aliases: &[(String, PathBuf)]) -> Option<PathBuf> {
for (prefix, target_dir) in aliases {
if let Some(rest) = spec.strip_prefix(prefix.as_str()) {
return Some(target_dir.join(rest));
}
}
None
}
fn extract_js_import_specifiers(line: &str) -> Vec<String> {
let mut specs = Vec::new();
if line.starts_with("import ") || line.starts_with("export ") {
if let Some(spec) = extract_string_after(line, " from ") {
specs.push(spec);
} else if line.starts_with("import '") || line.starts_with("import \"") {
if let Some(spec) = extract_quoted_string(&line["import ".len()..]) {
specs.push(spec);
}
}
}
if let Some(pos) = line.find("require(") {
let before = if pos > 0 {
line.as_bytes()[pos - 1]
} else {
b' '
};
if matches!(before, b' ' | b'=' | b'(' | b'\t' | b',') || pos == 0 {
let after = &line[pos + "require(".len()..];
if let Some(spec) = extract_quoted_string(after) {
specs.push(spec);
}
}
}
specs
}
fn extract_string_after(line: &str, marker: &str) -> Option<String> {
let pos = line.find(marker)?;
let after = &line[pos + marker.len()..];
extract_quoted_string(after)
}
fn extract_quoted_string(s: &str) -> Option<String> {
let s = s.trim();
let (quote, rest) = if let Some(rest) = s.strip_prefix('\'') {
('\'', rest)
} else if let Some(rest) = s.strip_prefix('"') {
('"', rest)
} else {
return None;
};
let end = rest.find(quote)?;
Some(rest[..end].to_string())
}
fn resolve_js_import(dir: &Path, spec: &str, file_set: &HashSet<PathBuf>) -> Option<PathBuf> {
let base = dir.join(spec);
let extensions = ["", ".ts", ".tsx", ".js", ".jsx", ".mjs"];
for ext in &extensions {
let candidate = PathBuf::from(format!("{}{}", base.display(), ext));
if let Ok(canonical) = candidate.canonicalize() {
if file_set.contains(&canonical) {
return Some(canonical);
}
}
if file_set.contains(&candidate) {
return Some(candidate);
}
}
let dir_extensions = ["index.ts", "index.tsx", "index.js", "index.jsx"];
for idx in &dir_extensions {
let candidate = base.join(idx);
if let Ok(canonical) = candidate.canonicalize() {
if file_set.contains(&canonical) {
return Some(canonical);
}
}
if file_set.contains(&candidate) {
return Some(candidate);
}
}
None
}
fn parse_go_imports(root: &Path, files: &[PathBuf]) -> Vec<FileEdge> {
let module_name = read_go_module_name(root);
let mut pkg_to_files: HashMap<String, Vec<PathBuf>> = HashMap::new();
for file in files {
if let Some(pkg_path) = go_file_to_pkg_path(file, root, &module_name) {
pkg_to_files.entry(pkg_path).or_default().push(file.clone());
}
}
let mut pkg_canonical: HashMap<String, PathBuf> = HashMap::new();
for (pkg, pkg_files) in &mut pkg_to_files {
pkg_files.sort();
let pkg_leaf = pkg.rsplit('/').next().unwrap_or(pkg);
let canonical = pkg_files
.iter()
.find(|f| {
f.file_stem()
.and_then(|s| s.to_str())
.map(|s| s == pkg_leaf)
.unwrap_or(false)
})
.or_else(|| {
pkg_files.iter().find(|f| {
f.file_name()
.and_then(|n| n.to_str())
.map(|n| n != "doc.go")
.unwrap_or(true)
})
})
.or(pkg_files.first())
.cloned();
if let Some(c) = canonical {
pkg_canonical.insert(pkg.clone(), c);
}
}
let mut edges = Vec::new();
for file in files {
let content = match std::fs::read_to_string(file) {
Ok(c) => c,
Err(_) => continue,
};
for line in content.lines() {
let line = line.trim();
if let Some(import_path) = extract_go_import(line) {
if !import_path.starts_with(&module_name) {
continue;
}
if let Some(target) = pkg_canonical.get(&import_path) {
if target != file {
edges.push(FileEdge {
from: file.clone(),
to: target.clone(),
});
}
}
}
}
}
edges
}
fn read_go_module_name(root: &Path) -> String {
let go_mod = root.join("go.mod");
if let Ok(content) = std::fs::read_to_string(&go_mod) {
for line in content.lines() {
if let Some(rest) = line.strip_prefix("module ") {
return rest.trim().to_string();
}
}
}
String::new()
}
fn go_file_to_pkg_path(file: &Path, root: &Path, module_name: &str) -> Option<String> {
let dir = file.parent()?;
let rel = dir.strip_prefix(root).ok()?;
let rel_str = rel.to_string_lossy().replace('\\', "/");
if rel_str.is_empty() {
Some(module_name.to_string())
} else {
Some(format!("{}/{}", module_name, rel_str))
}
}
fn extract_go_import(line: &str) -> Option<String> {
let line = line.trim();
if line == "import (" || line == ")" || line == "import" {
return None;
}
extract_quoted_string(line).or_else(|| {
let parts: Vec<&str> = line.splitn(2, ' ').collect();
if parts.len() == 2 {
extract_quoted_string(parts[1])
} else {
None
}
})
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub(crate) struct FileRow {
pub file: String,
pub role: FileRole,
pub in_degree: usize,
pub out_degree: usize,
pub dependents: usize,
pub dependencies: usize,
pub pagerank: f64,
pub consumers_pagerank: f64,
pub betweenness: f64,
pub orphan: bool,
#[serde(skip_serializing_if = "Option::is_none")]
pub cycle_id: Option<usize>,
#[serde(skip_serializing_if = "Option::is_none")]
pub commits: Option<usize>,
#[serde(skip_serializing_if = "Option::is_none")]
pub churn_risk: Option<f64>,
#[serde(skip_serializing_if = "Vec::is_empty")]
pub co_changers: Vec<(String, usize)>,
pub instability: f64,
pub structure: String,
#[serde(skip_serializing_if = "Vec::is_empty")]
pub external_deps: Vec<String>,
}
#[derive(Debug, Serialize, Deserialize)]
pub(crate) struct FilesResult {
pub rows: Vec<FileRow>,
pub nodes: usize,
pub edges: usize,
pub ecosystem: Ecosystem,
pub orphan_count: usize,
pub cycles: Vec<Vec<String>>,
#[serde(skip)]
pub direct_edges: Vec<(String, String)>,
}
struct GitStats {
counts: HashMap<String, usize>,
co_changers: HashMap<String, Vec<(String, usize)>>,
}
fn git_file_stats(root: &Path, days: u64) -> GitStats {
let since = format!("--since={} days ago", days);
let out = ProcessCommand::new("git")
.args(["log", "--name-only", "--pretty=format:%x00", &since])
.current_dir(root)
.output();
let out = match out {
Ok(o) if o.status.success() => o,
_ => {
return GitStats {
counts: HashMap::new(),
co_changers: HashMap::new(),
}
}
};
let stdout = String::from_utf8_lossy(&out.stdout);
let mut counts: HashMap<String, usize> = HashMap::new();
let mut pair_counts: HashMap<(String, String), usize> = HashMap::new();
for commit_block in stdout.split('\0') {
let files: Vec<&str> = commit_block
.lines()
.map(|l| l.trim())
.filter(|l| !l.is_empty())
.collect();
for &f in &files {
*counts.entry(f.to_string()).or_insert(0) += 1;
}
if files.len() >= 2 && files.len() <= 20 {
for i in 0..files.len() {
for j in (i + 1)..files.len() {
let (a, b) = if files[i] < files[j] {
(files[i].to_string(), files[j].to_string())
} else {
(files[j].to_string(), files[i].to_string())
};
*pair_counts.entry((a, b)).or_insert(0) += 1;
}
}
}
}
let mut co_changers: HashMap<String, Vec<(String, usize)>> = HashMap::new();
for ((a, b), count) in &pair_counts {
if *count >= 2 {
co_changers
.entry(a.clone())
.or_default()
.push((b.clone(), *count));
co_changers
.entry(b.clone())
.or_default()
.push((a.clone(), *count));
}
}
for partners in co_changers.values_mut() {
partners.sort_by(|a, b| b.1.cmp(&a.1));
partners.truncate(5);
}
GitStats {
counts,
co_changers,
}
}
fn contract_to_directories(
graph: &DiGraph<PathBuf, f64>,
root: &Path,
) -> (DiGraph<String, f64>, Vec<String>, HashMap<String, usize>) {
let mut dir_indices: HashMap<String, NodeIndex> = HashMap::new();
let mut contracted: DiGraph<String, f64> = DiGraph::new();
let mut file_to_dir_idx: Vec<NodeIndex> = Vec::new();
let mut file_counts: HashMap<String, usize> = HashMap::new();
for n in graph.node_indices() {
let file = graph.nw(n);
let rel = file.strip_prefix(root).unwrap_or(file);
let dir = rel
.parent()
.map(|p| p.to_string_lossy().to_string())
.unwrap_or_else(|| ".".to_string());
let dir = if dir.is_empty() { ".".to_string() } else { dir };
*file_counts.entry(dir.clone()).or_insert(0) += 1;
let dir_idx = *dir_indices
.entry(dir.clone())
.or_insert_with(|| contracted.add_node(dir));
file_to_dir_idx.push(dir_idx);
}
for e in graph.edge_references() {
let from_dir = file_to_dir_idx[e.source().index()];
let to_dir = file_to_dir_idx[e.target().index()];
if from_dir == to_dir {
continue;
}
let cur = contracted
.find_edge(from_dir, to_dir)
.and_then(|ei| contracted.edge_weight(ei).copied())
.unwrap_or(0.0);
contracted.update_edge(from_dir, to_dir, cur + e.weight());
}
let labels: Vec<String> = contracted
.node_indices()
.map(|n| contracted.nw(n).clone())
.collect();
(contracted, labels, file_counts)
}
fn detect_ffi_seams(root: &Path, files: &[PathBuf], ecosystems: &[Ecosystem]) -> Vec<FileEdge> {
let mut edges = Vec::new();
let mut rust_modules: Vec<(String, PathBuf, &str)> = Vec::new(); for file in files {
if file.extension().and_then(|e| e.to_str()) != Some("rs") {
continue;
}
if let Ok(content) = std::fs::read_to_string(file) {
let crate_dir = file
.ancestors()
.find(|p| p.join("Cargo.toml").exists())
.unwrap_or(root);
let crate_name = read_rust_crate_name(crate_dir);
if content.contains("#[pymodule]") || content.contains("#[pymodule(") {
rust_modules.push((crate_name.clone(), file.clone(), "pyo3"));
}
if content.contains("#[napi]") || content.contains("#[napi(") {
rust_modules.push((crate_name, file.clone(), "napi"));
}
}
}
if rust_modules.is_empty() {
return edges;
}
let pyo3_names: HashSet<&str> = rust_modules
.iter()
.filter(|(_, _, k)| *k == "pyo3")
.map(|(n, _, _)| n.as_str())
.collect();
let napi_names: HashSet<&str> = rust_modules
.iter()
.filter(|(_, _, k)| *k == "napi")
.map(|(n, _, _)| n.as_str())
.collect();
for file in files {
let ext = file.extension().and_then(|e| e.to_str()).unwrap_or("");
if ext == "py" && ecosystems.contains(&Ecosystem::Python) {
if let Ok(content) = std::fs::read_to_string(file) {
for line in content.lines() {
let line = line.trim();
if !line.starts_with("import ") && !line.starts_with("from ") {
continue;
}
for mod_name in &pyo3_names {
if line.contains(mod_name) {
for (name, rust_file, kind) in &rust_modules {
if *kind == "pyo3" && name == mod_name {
edges.push(FileEdge {
from: file.clone(),
to: rust_file.clone(),
});
}
}
}
}
}
}
}
if matches!(ext, "ts" | "tsx" | "js" | "jsx" | "mjs")
&& ecosystems.contains(&Ecosystem::Npm)
{
if let Ok(content) = std::fs::read_to_string(file) {
for line in content.lines() {
let line = line.trim();
for mod_name in &napi_names {
if line.contains(mod_name)
&& (line.starts_with("import ") || line.contains("require("))
{
for (name, rust_file, kind) in &rust_modules {
if *kind == "napi" && name == mod_name {
edges.push(FileEdge {
from: file.clone(),
to: rust_file.clone(),
});
}
}
}
}
}
}
}
}
edges
}
fn clone_repo_to_temp(url: &str) -> Result<PathBuf> {
let tmp = std::env::temp_dir().join(format!("pkgrank-{:016x}", fnv1a64(url.as_bytes())));
if tmp.exists() {
return Ok(tmp);
}
eprintln!("cloning {} → {}", url, tmp.display());
let out = ProcessCommand::new("git")
.args(["clone", "--depth", "1", url])
.arg(&tmp)
.output()
.map_err(|e| anyhow::anyhow!("git clone failed: {}", e))?;
if !out.status.success() {
let stderr = String::from_utf8_lossy(&out.stderr);
return Err(anyhow::anyhow!("git clone failed: {}", stderr.trim()));
}
Ok(tmp)
}
fn is_url(s: &str) -> bool {
s.starts_with("https://") || s.starts_with("http://") || s.starts_with("git@")
}
fn expand_uri(s: &str) -> String {
if is_url(s) || PathBuf::from(s).exists() {
return s.to_string();
}
let parts: Vec<&str> = s.split('/').collect();
if parts.len() == 2
&& parts
.iter()
.all(|p| !p.is_empty() && !p.contains('.') && !p.contains(' '))
{
return format!("https://github.com/{}/{}", parts[0], parts[1]);
}
s.to_string()
}
fn files_cache_key(files: &[PathBuf], args: &FilesArgs) -> u64 {
let mut material = format!(
"v2\necosystem={:?}\ndir={}\ngit={}\ngit_days={}\ntests={}\n",
args.ecosystem, args.directory, args.git, args.git_days, args.include_tests
);
for f in files {
let mtime = f
.metadata()
.and_then(|m| m.modified())
.ok()
.and_then(|t| t.duration_since(std::time::UNIX_EPOCH).ok())
.map(|d| d.as_secs())
.unwrap_or(0);
material.push_str(&format!("{}:{}\n", f.display(), mtime));
}
fnv1a64(material.as_bytes())
}
fn files_cache_read(cache_dir: &Path, key: u64) -> Option<FilesResult> {
let path = cache_dir.join(format!("files_{:016x}.bin", key));
let raw = std::fs::read(&path).ok()?;
bincode::deserialize(&raw).ok()
}
fn files_cache_write(cache_dir: &Path, key: u64, result: &FilesResult) {
let _ = std::fs::create_dir_all(cache_dir);
let path = cache_dir.join(format!("files_{:016x}.bin", key));
if let Ok(bytes) = bincode::serialize(result) {
let _ = std::fs::write(&path, bytes);
}
}
pub(crate) fn files_analyze(args: &FilesArgs) -> Result<FilesResult> {
let uri = expand_uri(&args.path);
let root = if is_url(&uri) {
clone_repo_to_temp(&uri)?
} else {
let p = PathBuf::from(&uri);
if p.is_file() {
p.parent().unwrap_or(Path::new(".")).to_path_buf()
} else {
p
}
};
let ecosystems: Vec<Ecosystem> = if let Some(eco) = args.ecosystem {
vec![eco]
} else {
let detected = detect_all_ecosystems(&root);
if detected.is_empty() {
return Err(anyhow::anyhow!(
"Could not detect ecosystem in {}. Pass --ecosystem explicitly.",
root.display()
));
}
detected
};
let ecosystem = ecosystems[0];
let mut all_files = Vec::new();
for eco in &ecosystems {
all_files.extend(discover_files(&root, *eco));
}
all_files.sort();
all_files.dedup();
let cache_dir = root.join("evals/pkgrank/files_cache");
if args.cache {
let key = files_cache_key(&all_files, args);
if let Some(cached) = files_cache_read(&cache_dir, key) {
return Ok(cached);
}
}
let mut included_files: Vec<PathBuf> = Vec::new();
let mut file_roles: HashMap<PathBuf, FileRole> = HashMap::new();
for file in &all_files {
let role = classify_file(file, &root, ecosystem);
if should_include(role, args) {
included_files.push(file.clone());
file_roles.insert(file.clone(), role);
}
}
let mut edges = Vec::new();
for eco in &ecosystems {
let eco_files: Vec<PathBuf> = included_files
.iter()
.filter(|f| {
let ext = f.extension().and_then(|e| e.to_str()).unwrap_or("");
match eco {
Ecosystem::Cargo => ext == "rs",
Ecosystem::Python => ext == "py",
Ecosystem::Npm => matches!(ext, "ts" | "tsx" | "js" | "jsx" | "mjs"),
Ecosystem::Go => ext == "go",
}
})
.cloned()
.collect();
let eco_edges = match eco {
Ecosystem::Cargo => parse_rust_imports(&root, &eco_files),
Ecosystem::Python => parse_python_imports(&root, &eco_files),
Ecosystem::Npm => parse_js_imports(&root, &eco_files),
Ecosystem::Go => parse_go_imports(&root, &eco_files),
};
edges.extend(eco_edges);
}
if ecosystems.contains(&Ecosystem::Cargo) && ecosystems.len() > 1 {
edges.extend(detect_ffi_seams(&root, &included_files, &ecosystems));
}
let mut internal_prefixes: HashSet<String> = HashSet::new();
for eco in &ecosystems {
match eco {
Ecosystem::Cargo => {
let crate_roots = find_rust_crate_roots(&root);
for (_, name) in &crate_roots {
internal_prefixes.insert(name.clone());
}
for file in &included_files {
if let Ok(content) = std::fs::read_to_string(file) {
for line in content.lines() {
if let Some(mod_name) = parse_mod_declaration(line.trim()) {
internal_prefixes.insert(mod_name);
}
}
}
}
}
Ecosystem::Python => {
let (pkg_name, _) = detect_python_package(&root);
internal_prefixes.insert(pkg_name);
}
Ecosystem::Go => {
let mod_name = read_go_module_name(&root);
if !mod_name.is_empty() {
internal_prefixes.insert(mod_name);
}
}
Ecosystem::Npm => {} }
}
let external_deps_map = extract_external_deps(&included_files, ecosystem, &internal_prefixes);
let mut graph: DiGraph<PathBuf, f64> = DiGraph::new();
let mut node_map: HashMap<PathBuf, NodeIndex> = HashMap::new();
for file in &included_files {
let idx = graph.add_node(file.clone());
node_map.insert(file.clone(), idx);
}
let mut seen_edges: HashSet<(usize, usize)> = HashSet::new();
for edge in &edges {
if let (Some(&from_idx), Some(&to_idx)) = (node_map.get(&edge.from), node_map.get(&edge.to))
{
let key = (from_idx.index(), to_idx.index());
if seen_edges.insert(key) {
graph.update_edge(from_idx, to_idx, 1.0);
}
}
}
struct AnalysisVecs {
labels: Vec<String>,
pr: Vec<f64>,
consumers_pr: Vec<f64>,
bc: Vec<f64>,
in_degrees: Vec<usize>,
out_degrees: Vec<usize>,
transitive_dependents: Vec<usize>,
transitive_deps: Vec<usize>,
scc_labels: Vec<usize>,
direct_edges: Vec<(String, String)>,
node_count: usize,
edge_count: usize,
}
fn compute_analysis<N: Clone + std::fmt::Debug>(
g: &DiGraph<N, f64>,
labels: Vec<String>,
) -> AnalysisVecs {
let pr = pagerank_auto(g);
let consumers_pr = pagerank_auto(&reverse_graph(g));
let bc = betweenness_centrality(g);
let in_degrees: Vec<usize> = g
.node_indices()
.map(|n| g.neighbors_directed(n, Direction::Incoming).count())
.collect();
let out_degrees: Vec<usize> = g
.node_indices()
.map(|n| g.neighbors_directed(n, Direction::Outgoing).count())
.collect();
let mut ep: Vec<(usize, usize)> = Vec::new();
for e in g.edge_references() {
ep.push((e.source().index(), e.target().index()));
}
let (td, tdd) = reachability_counts_edges(g.node_count(), &ep);
let scc = strongly_connected_components(g);
let direct_edges: Vec<(String, String)> = g
.edge_references()
.map(|e| {
(
labels[e.source().index()].clone(),
labels[e.target().index()].clone(),
)
})
.collect();
AnalysisVecs {
labels,
pr,
consumers_pr,
bc,
in_degrees,
out_degrees,
transitive_dependents: td,
transitive_deps: tdd,
scc_labels: scc,
direct_edges,
node_count: g.node_count(),
edge_count: g.edge_count(),
}
}
let av = if args.directory {
let (contracted, labels, _counts) = contract_to_directories(&graph, &root);
compute_analysis(&contracted, labels)
} else {
let labels: Vec<String> = graph
.node_indices()
.map(|n| {
graph
.nw(n)
.strip_prefix(&root)
.unwrap_or(graph.nw(n))
.to_string_lossy()
.to_string()
})
.collect();
compute_analysis(&graph, labels)
};
let mut scc_sizes: HashMap<usize, usize> = HashMap::new();
for &label in &av.scc_labels {
*scc_sizes.entry(label).or_insert(0) += 1;
}
let mut cycles: Vec<Vec<String>> = Vec::new();
let mut cycle_id_map: HashMap<usize, usize> = HashMap::new();
{
let mut cycle_labels: Vec<usize> = scc_sizes
.iter()
.filter(|(_, &size)| size > 1)
.map(|(&label, _)| label)
.collect();
cycle_labels.sort();
for (cycle_idx, &scc_label) in cycle_labels.iter().enumerate() {
cycle_id_map.insert(scc_label, cycle_idx);
let members: Vec<String> = (0..av.node_count)
.filter(|&i| av.scc_labels[i] == scc_label)
.map(|i| av.labels[i].clone())
.collect();
cycles.push(members);
}
}
let git_stats = if args.git {
git_file_stats(&root, args.git_days)
} else {
GitStats {
counts: HashMap::new(),
co_changers: HashMap::new(),
}
};
let max_commits = git_stats.counts.values().copied().max().unwrap_or(1).max(1) as f64;
let mut rows: Vec<FileRow> = (0..av.node_count)
.map(|i| {
let rel = av.labels[i].clone();
let role = if args.directory {
FileRole::Source
} else {
let full = root.join(&rel);
file_roles.get(&full).copied().unwrap_or(FileRole::Source)
};
let in_degree = av.in_degrees[i];
let out_degree = av.out_degrees[i];
let cycle_id = cycle_id_map.get(&av.scc_labels[i]).copied();
let commits = if args.git {
if args.directory {
Some(
git_stats
.counts
.iter()
.filter(|(path, _)| {
Path::new(path)
.parent()
.map(|p| {
p.to_string_lossy() == rel
|| (rel == "." && p == Path::new(""))
})
.unwrap_or(false)
})
.map(|(_, &c)| c)
.sum::<usize>(),
)
} else {
Some(git_stats.counts.get(&rel).copied().unwrap_or(0))
}
} else {
None
};
let churn_risk = commits.map(|c| av.pr[i] * (c as f64 / max_commits));
let co_changers = if args.git {
git_stats.co_changers.get(&rel).cloned().unwrap_or_default()
} else {
Vec::new()
};
let ext_deps = if args.directory {
Vec::new()
} else {
let full = root.join(&rel);
external_deps_map.get(&full).cloned().unwrap_or_default()
};
FileRow {
file: rel,
role,
in_degree,
out_degree,
dependents: av.transitive_dependents[i],
dependencies: av.transitive_deps[i],
pagerank: av.pr[i],
consumers_pagerank: av.consumers_pr[i],
betweenness: av.bc[i],
orphan: in_degree == 0 && out_degree == 0,
cycle_id,
commits,
churn_risk,
co_changers,
instability: if in_degree + out_degree > 0 {
out_degree as f64 / (in_degree + out_degree) as f64
} else {
0.0
},
structure: String::new(), external_deps: ext_deps,
}
})
.collect();
let median_in = {
let mut ins: Vec<usize> = rows.iter().map(|r| r.in_degree).collect();
ins.sort();
if ins.is_empty() {
0
} else {
ins[ins.len() / 2]
}
};
let median_out = {
let mut outs: Vec<usize> = rows.iter().map(|r| r.out_degree).collect();
outs.sort();
if outs.is_empty() {
0
} else {
outs[outs.len() / 2]
}
};
let median_churn = if args.git {
let mut churns: Vec<f64> = rows
.iter()
.filter_map(|r| r.churn_risk)
.filter(|c| *c > 0.0)
.collect();
churns.sort_by(|a, b| a.total_cmp(b));
if churns.is_empty() {
0.0
} else {
churns[churns.len() / 2]
}
} else {
0.0
};
for row in &mut rows {
let base = match (row.in_degree > median_in, row.out_degree > median_out) {
(true, false) => "foundation",
(true, true) => "hub",
(false, true) => "consumer",
(false, false) => {
if row.orphan {
"orphan"
} else {
"leaf"
}
}
};
if args.git && row.churn_risk.unwrap_or(0.0) > 0.0 {
let is_volatile = row.churn_risk.unwrap_or(0.0) > median_churn;
let is_central = row.in_degree > median_in;
row.structure = match (is_central, is_volatile) {
(true, true) => format!("{}!!", base), (true, false) => format!("{}", base), (false, true) => format!("{}~", base), (false, false) => base.to_string(), };
} else {
row.structure = base.to_string();
}
}
if args.git && matches!(args.metric, Metric::Pagerank) {
rows.sort_by(|a, b| {
b.churn_risk
.unwrap_or(0.0)
.total_cmp(&a.churn_risk.unwrap_or(0.0))
});
} else {
rows.sort_by(|a, b| match args.metric {
Metric::Pagerank => b.pagerank.total_cmp(&a.pagerank),
Metric::ConsumersPagerank => b.consumers_pagerank.total_cmp(&a.consumers_pagerank),
Metric::Betweenness => b.betweenness.total_cmp(&a.betweenness),
Metric::Indegree => b.in_degree.cmp(&a.in_degree),
Metric::Outdegree => b.out_degree.cmp(&a.out_degree),
});
}
let orphan_count = rows.iter().filter(|r| r.orphan).count();
let result = FilesResult {
nodes: av.node_count,
edges: av.edge_count,
ecosystem,
orphan_count,
cycles,
rows,
direct_edges: av.direct_edges,
};
if args.cache {
let key = files_cache_key(&all_files, args);
files_cache_write(&cache_dir, key, &result);
}
Ok(result)
}
pub(crate) fn run_files(args: &FilesArgs) -> Result<()> {
let result = files_analyze(args)?;
let fmt = effective_format(args.format);
match fmt {
OutputFormat::Json => {
#[derive(Serialize)]
struct Out {
schema_version: u32,
ok: bool,
command: &'static str,
ecosystem: Ecosystem,
nodes: usize,
edges: usize,
orphan_count: usize,
cycle_count: usize,
cycles: Vec<Vec<String>>,
rows_total: usize,
rows_returned: usize,
rows: Vec<FileRow>,
}
let rows_total = result.rows.len();
let rows: Vec<FileRow> = result.rows.iter().take(args.top).cloned().collect();
let out = Out {
schema_version: 1,
ok: true,
command: "files",
ecosystem: result.ecosystem,
nodes: result.nodes,
edges: result.edges,
orphan_count: result.orphan_count,
cycle_count: result.cycles.len(),
cycles: result.cycles.clone(),
rows_total,
rows_returned: rows.len(),
rows,
};
println!("{}", serde_json::to_string_pretty(&out)?);
}
OutputFormat::Text => {
let git_label = if args.git {
format!(" git_days={}", args.git_days)
} else {
String::new()
};
println!(
"pkgrank files ecosystem={} metric={:?} include_tests={}{}\n",
result.ecosystem, args.metric, args.include_tests, git_label
);
if args.git {
println!(
"{:>4} {:>8} {:>5} {:>10} {:>5} {:>3} {:>3} {:<10} file",
"rank", "churn", "comms", "pr", "blast", "in", "out", "role"
);
} else {
println!(
"{:>4} {:>10} {:>10} {:>9} {:>5} {:>5} {:>3} {:>3} {:<10} file",
"rank", "pr", "cons_pr", "between", "blast", "deps", "in", "out", "role"
);
}
println!("{:\u{2500}<110}", "");
for (i, r) in result.rows.iter().take(args.top).enumerate() {
let mut label = r.structure.clone();
if r.cycle_id.is_some() {
label.push('*');
}
if args.git {
println!(
"{:>4}. {:>8.6} {:>5} {:>10.6} {:>5} {:>3} {:>3} {:<10} {}",
i + 1,
r.churn_risk.unwrap_or(0.0),
r.commits.unwrap_or(0),
r.pagerank,
r.dependents,
r.in_degree,
r.out_degree,
label,
r.file
);
} else {
println!(
"{:>4}. {:>10.6} {:>10.6} {:>9.6} {:>5} {:>5} {:>3} {:>3} {:<10} {}",
i + 1,
r.pagerank,
r.consumers_pagerank,
r.betweenness,
r.dependents,
r.dependencies,
r.in_degree,
r.out_degree,
label,
r.file
);
}
}
let density = if result.nodes > 1 {
result.edges as f64 / (result.nodes as f64 * (result.nodes as f64 - 1.0))
} else {
0.0
};
println!(
"\n{} files, {} edges, density={:.4}, {} orphans, {} cycles",
result.nodes,
result.edges,
density,
result.orphan_count,
result.cycles.len()
);
if result.nodes > 3 {
let mut by_in: Vec<&FileRow> = result.rows.iter().collect();
by_in.sort_by(|a, b| b.in_degree.cmp(&a.in_degree));
println!("\nhubs (most depended-on):");
for r in by_in.iter().take(3) {
println!(
" {} ({} dependents, blast={}, {})",
r.file, r.in_degree, r.dependents, r.structure
);
}
let mut struct_counts: HashMap<&str, usize> = HashMap::new();
for r in &result.rows {
let base = r.structure.trim_end_matches('!').trim_end_matches('~');
*struct_counts.entry(base).or_insert(0) += 1;
}
let danger_count = result
.rows
.iter()
.filter(|r| r.structure.contains("!!"))
.count();
if !struct_counts.is_empty() {
let parts: Vec<String> = struct_counts
.iter()
.map(|(k, v)| format!("{}: {}", k, v))
.collect();
println!("\nstructure: {}", parts.join(", "));
if danger_count > 0 {
println!(
" {} files in danger zone (central + volatile)",
danger_count
);
}
}
let mut consumers: Vec<&FileRow> = result
.rows
.iter()
.filter(|r| r.in_degree == 0 && r.out_degree > 0)
.collect();
consumers.sort_by(|a, b| b.out_degree.cmp(&a.out_degree));
if !consumers.is_empty() {
println!("\nentry points (no dependents, import others):");
for r in consumers.iter().take(3) {
println!(" {} (imports {})", r.file, r.out_degree);
}
}
}
if !args.directory {
let mut dep_usage: HashMap<&str, usize> = HashMap::new();
for r in &result.rows {
for dep in &r.external_deps {
*dep_usage.entry(dep.as_str()).or_insert(0) += 1;
}
}
if !dep_usage.is_empty() {
let mut sorted_deps: Vec<(&&str, &usize)> = dep_usage.iter().collect();
sorted_deps.sort_by(|a, b| b.1.cmp(a.1));
println!(
"\nexternal deps ({} unique, top by file count):",
dep_usage.len()
);
for (dep, count) in sorted_deps.iter().take(5) {
println!(" {} ({} files)", dep, count);
}
}
}
if !result.cycles.is_empty() {
println!("\ncycles (* in table):");
for (i, cycle) in result.cycles.iter().take(5).enumerate() {
let preview: Vec<&str> = cycle.iter().take(5).map(|s| s.as_str()).collect();
let suffix = if cycle.len() > 5 {
format!(", ... (+{})", cycle.len() - 5)
} else {
String::new()
};
println!(
" cycle {}: {} files [{}{}]",
i,
cycle.len(),
preview.join(", "),
suffix
);
}
if result.cycles.len() > 5 {
println!(" ... (+{} more cycles)", result.cycles.len() - 5);
}
}
}
}
if args.store {
let db_path = crate::store::default_db_path();
match crate::store::open_db(&db_path) {
Ok(conn) => {
let project_path = args.path.clone();
match crate::store::store_snapshot(&conn, &project_path, &result) {
Ok(snap_id) => {
eprintln!("stored snapshot {} in {}", snap_id, db_path.display());
}
Err(e) => {
eprintln!("warning: failed to store snapshot: {}", e);
}
}
}
Err(e) => {
eprintln!("warning: failed to open db: {}", e);
}
}
}
if let Some(focus) = &args.focus {
print_focus(focus, &result);
}
Ok(())
}
fn print_focus(query: &str, result: &FilesResult) {
let matches: Vec<&FileRow> = result
.rows
.iter()
.filter(|r| r.file.contains(query))
.collect();
if matches.is_empty() {
eprintln!("no file matching '{}' found", query);
return;
}
for row in &matches {
println!("\n{:=<80}", "");
println!("focus: {}", row.file);
println!(
" role={:?} pagerank={:.6} betweenness={:.6}",
row.role, row.pagerank, row.betweenness
);
println!(
" in_degree={} out_degree={} blast_radius={} deps={}",
row.in_degree, row.out_degree, row.dependents, row.dependencies
);
if let Some(c) = row.commits {
println!(
" commits={} churn_risk={:.6}",
c,
row.churn_risk.unwrap_or(0.0)
);
}
if let Some(cid) = row.cycle_id {
println!(
" cycle_id={} ({} files)",
cid,
result.cycles.get(cid).map(|c| c.len()).unwrap_or(0)
);
}
if !row.co_changers.is_empty() {
println!(" co-changes with ({}):", row.co_changers.len());
for (partner, count) in &row.co_changers {
println!(" ~{} {}", count, partner);
}
}
let imports: Vec<&str> = result
.direct_edges
.iter()
.filter(|(from, _)| from == &row.file)
.map(|(_, to)| to.as_str())
.collect();
if !imports.is_empty() {
println!(" imports ({}):", imports.len());
for imp in &imports {
println!(" -> {}", imp);
}
}
let dependents: Vec<&str> = result
.direct_edges
.iter()
.filter(|(_, to)| to == &row.file)
.map(|(from, _)| from.as_str())
.collect();
if !dependents.is_empty() {
println!(" imported by ({}):", dependents.len());
for dep in dependents.iter().take(15) {
println!(" <- {}", dep);
}
if dependents.len() > 15 {
println!(" ... (+{} more)", dependents.len() - 15);
}
}
}
}