use serde::{Deserialize, Serialize};
use std::collections::{HashMap, HashSet};
use std::path::{Path, PathBuf};
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
pub struct ExternalDependency {
pub name: String,
pub version: String,
pub ecosystem: Ecosystem,
}
#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, Hash)]
pub enum Ecosystem {
Cargo,
Npm,
Python,
GoModules,
RubyGems,
Composer,
}
impl std::fmt::Display for Ecosystem {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Ecosystem::Cargo => write!(f, "cargo"),
Ecosystem::Npm => write!(f, "npm"),
Ecosystem::Python => write!(f, "python"),
Ecosystem::GoModules => write!(f, "go"),
Ecosystem::RubyGems => write!(f, "rubygems"),
Ecosystem::Composer => write!(f, "composer"),
}
}
}
#[derive(Debug, Clone, Default)]
pub struct ExternalDependencyRegistry {
by_name: HashMap<String, ExternalDependency>,
prefix_map: HashMap<String, String>,
}
impl ExternalDependencyRegistry {
pub fn new() -> Self {
Self::default()
}
pub fn from_project(root: &Path) -> Self {
Self::from_manifest_paths(root, &discover_dependency_manifests(root, None))
}
}
fn source_priority(file_name: &str) -> u8 {
match file_name {
"Cargo.toml" | "package.json" | "pyproject.toml" | "go.mod" => 0,
_ => 1, }
}
impl ExternalDependencyRegistry {
pub fn from_manifest_paths(root: &Path, manifest_paths: &[PathBuf]) -> Self {
let mut sorted_paths = manifest_paths.to_vec();
sorted_paths.sort_by(|a, b| {
let a_name = a.file_name().and_then(|n| n.to_str()).unwrap_or("");
let b_name = b.file_name().and_then(|n| n.to_str()).unwrap_or("");
source_priority(a_name).cmp(&source_priority(b_name))
});
let mut workspaces: std::collections::BTreeMap<PathBuf, Vec<PathBuf>> =
std::collections::BTreeMap::new();
for manifest_path in &sorted_paths {
let path = if manifest_path.is_absolute() {
manifest_path.clone()
} else {
root.join(manifest_path)
};
let ws_root = path.parent().unwrap_or(root).to_path_buf();
workspaces.entry(ws_root).or_default().push(path);
}
let mut registry = Self::new();
for (_ws_root, paths) in workspaces {
let mut local = Self::new();
for path in &paths {
let Some(file_name) = path.file_name().and_then(|name| name.to_str()) else {
continue;
};
let Ok(content) = std::fs::read_to_string(path) else {
continue;
};
match file_name {
"Cargo.lock" => local.parse_cargo_lock(&content),
"Cargo.toml" => local.parse_cargo_toml(&content),
"package-lock.json" | "npm-shrinkwrap.json" => {
local.parse_package_lock_json(&content)
}
"package.json" => local.parse_package_json(&content),
"yarn.lock" => local.parse_yarn_lock(&content),
"pnpm-lock.yaml" => local.parse_pnpm_lock(&content),
"requirements.txt" => local.parse_requirements_txt(&content),
"Pipfile.lock" => local.parse_pipfile_lock(&content),
"pyproject.toml" => local.parse_pyproject_toml(&content),
"poetry.lock" => local.parse_poetry_lock(&content),
"go.mod" => local.parse_go_mod(&content),
"go.sum" => local.parse_go_sum(&content),
"Gemfile.lock" => local.parse_gemfile_lock(&content),
"composer.lock" => local.parse_composer_lock(&content),
_ => {}
}
}
registry.merge(local);
}
registry
}
pub fn len(&self) -> usize {
self.by_name.len()
}
pub fn is_empty(&self) -> bool {
self.by_name.is_empty()
}
pub fn all_dependencies(&self) -> Vec<&ExternalDependency> {
self.by_name.values().collect()
}
pub fn merge(&mut self, other: ExternalDependencyRegistry) {
for (name, dep) in other.by_name {
self.by_name.insert(name, dep);
}
for (prefix, canonical) in other.prefix_map {
self.prefix_map.insert(prefix, canonical);
}
}
pub fn resolve(&self, import_path: &str) -> Option<&ExternalDependency> {
let normalised = normalise_import(import_path);
if let Some(canonical) = self.prefix_map.get(&normalised) {
return self.by_name.get(canonical);
}
let parts: Vec<&str> = normalised.split('.').collect();
for i in (1..parts.len()).rev() {
let prefix = parts[..i].join(".");
if let Some(canonical) = self.prefix_map.get(&prefix) {
return self.by_name.get(canonical);
}
}
let alt = normalised.replace('-', "_");
if alt != normalised {
if let Some(canonical) = self.prefix_map.get(&alt) {
return self.by_name.get(canonical);
}
}
let alt2 = normalised.replace('_', "-");
if alt2 != normalised {
if let Some(canonical) = self.prefix_map.get(&alt2) {
return self.by_name.get(canonical);
}
}
None
}
fn insert(&mut self, dep: ExternalDependency) {
let normalised = normalise_import(&dep.name);
self.prefix_map.insert(normalised, dep.name.clone());
if dep.ecosystem == Ecosystem::Python {
for alias in python_import_aliases(&dep.name) {
self.prefix_map.insert(alias, dep.name.clone());
}
}
self.by_name.insert(dep.name.clone(), dep);
}
fn parse_cargo_lock(&mut self, content: &str) {
let mut current_name: Option<String> = None;
let mut current_version: Option<String> = None;
for line in content.lines() {
let trimmed = line.trim();
if trimmed == "[[package]]" {
if let (Some(name), Some(version)) = (current_name.take(), current_version.take()) {
self.insert(ExternalDependency {
name,
version,
ecosystem: Ecosystem::Cargo,
});
}
current_name = None;
current_version = None;
continue;
}
if let Some(rest) = trimmed.strip_prefix("name = ") {
current_name = Some(unquote(rest));
} else if let Some(rest) = trimmed.strip_prefix("version = ") {
current_version = Some(unquote(rest));
}
}
if let (Some(name), Some(version)) = (current_name, current_version) {
self.insert(ExternalDependency {
name,
version,
ecosystem: Ecosystem::Cargo,
});
}
}
fn parse_cargo_toml(&mut self, content: &str) {
let mut in_dependencies = false;
for raw in content.lines() {
let line = raw.split('#').next().unwrap_or("").trim();
if line.is_empty() {
continue;
}
if line.starts_with('[') && line.ends_with(']') {
let section = line.trim_matches(&['[', ']'][..]).to_lowercase();
in_dependencies = section.contains("dependencies");
continue;
}
if !in_dependencies {
continue;
}
let Some((name_raw, value_raw)) = line.split_once('=') else {
continue;
};
let name = name_raw.trim().trim_matches('"').trim_matches('\'');
if name.is_empty() {
continue;
}
let value = value_raw.trim();
let version = if value.starts_with('"') || value.starts_with('\'') {
unquote(value)
} else if let Some(pos) = value.find("version") {
let rest = &value[pos + "version".len()..];
if let Some((_, rhs)) = rest.split_once('=') {
let candidate = rhs
.split(',')
.next()
.unwrap_or(rhs)
.trim()
.trim_end_matches('}');
let parsed = unquote(candidate);
if parsed.is_empty() {
"*".to_string()
} else {
parsed
}
} else {
"*".to_string()
}
} else {
"*".to_string()
};
self.insert(ExternalDependency {
name: name.to_string(),
version,
ecosystem: Ecosystem::Cargo,
});
}
}
fn parse_package_lock_json(&mut self, content: &str) {
let Ok(parsed) = serde_json::from_str::<serde_json::Value>(content) else {
return;
};
if let Some(packages) = parsed.get("packages").and_then(|v| v.as_object()) {
for (key, val) in packages {
let name = key.strip_prefix("node_modules/").unwrap_or(key).to_string();
if name.is_empty() || name == "." {
continue;
}
let version = val
.get("version")
.and_then(|v| v.as_str())
.unwrap_or("*")
.to_string();
self.insert(ExternalDependency {
name,
version,
ecosystem: Ecosystem::Npm,
});
}
return;
}
if let Some(deps) = parsed.get("dependencies").and_then(|v| v.as_object()) {
for (name, val) in deps {
let version = val
.get("version")
.and_then(|v| v.as_str())
.unwrap_or("*")
.to_string();
self.insert(ExternalDependency {
name: name.clone(),
version,
ecosystem: Ecosystem::Npm,
});
}
}
}
fn parse_package_json(&mut self, content: &str) {
let Ok(parsed) = serde_json::from_str::<serde_json::Value>(content) else {
return;
};
for section in &[
"dependencies",
"devDependencies",
"peerDependencies",
"optionalDependencies",
] {
if let Some(deps) = parsed.get(section).and_then(|v| v.as_object()) {
for (name, version) in deps {
let version = version.as_str().unwrap_or("*").to_string();
self.insert(ExternalDependency {
name: name.clone(),
version,
ecosystem: Ecosystem::Npm,
});
}
}
}
}
fn parse_yarn_lock(&mut self, content: &str) {
let mut current_name: Option<String> = None;
for line in content.lines() {
let trimmed = line.trim();
if !trimmed.starts_with(' ') && !trimmed.starts_with('#') && trimmed.ends_with(':') {
let spec = trimmed.trim_end_matches(':');
let spec = spec.trim_matches('"');
let name = if let Some(stripped) = spec.strip_prefix('@') {
if let Some(at_pos) = stripped.rfind('@') {
spec[..at_pos + 1].to_string()
} else {
spec.to_string()
}
} else if let Some(at_pos) = spec.rfind('@') {
spec[..at_pos].to_string()
} else {
spec.to_string()
};
let name = name.split(',').next().unwrap_or(&name).trim().to_string();
current_name = Some(name);
continue;
}
if let Some(rest) = trimmed.strip_prefix("version ") {
if let Some(ref name) = current_name {
let version = unquote(rest);
self.insert(ExternalDependency {
name: name.clone(),
version,
ecosystem: Ecosystem::Npm,
});
current_name = None;
}
}
}
}
fn parse_pnpm_lock(&mut self, content: &str) {
for line in content.lines() {
let trimmed = line.trim();
if !trimmed.starts_with('/') || !trimmed.ends_with(':') {
continue;
}
let spec = trimmed.trim_end_matches(':').trim_start_matches('/');
if spec.is_empty() {
continue;
}
let (name, mut version) = if let Some(stripped) = spec.strip_prefix('@') {
if let Some(pos) = stripped.rfind('@').map(|p| p + 1) {
(spec[..pos].to_string(), spec[pos + 1..].to_string())
} else {
(spec.to_string(), "*".to_string())
}
} else if let Some(pos) = spec.rfind('@') {
(spec[..pos].to_string(), spec[pos + 1..].to_string())
} else {
(spec.to_string(), "*".to_string())
};
if let Some(paren) = version.find('(') {
version = version[..paren].to_string();
}
if !name.is_empty() {
self.insert(ExternalDependency {
name,
version,
ecosystem: Ecosystem::Npm,
});
}
}
}
fn parse_requirements_txt(&mut self, content: &str) {
for line in content.lines() {
let trimmed = line.trim();
if trimmed.is_empty() || trimmed.starts_with('#') || trimmed.starts_with('-') {
continue;
}
let (name, version) = if let Some(pos) = trimmed.find(|c: char| "=<>~!".contains(c)) {
let n = trimmed[..pos].trim();
let v = trimmed[pos..].trim();
(n.to_string(), v.to_string())
} else {
(trimmed.to_string(), "*".to_string())
};
let name = name.split('[').next().unwrap_or(&name).trim().to_string();
if name.is_empty() {
continue;
}
self.insert(ExternalDependency {
name,
version,
ecosystem: Ecosystem::Python,
});
}
}
fn parse_pipfile_lock(&mut self, content: &str) {
let Ok(parsed) = serde_json::from_str::<serde_json::Value>(content) else {
return;
};
for section in &["default", "develop"] {
if let Some(deps) = parsed.get(section).and_then(|v| v.as_object()) {
for (name, val) in deps {
let version = val
.get("version")
.and_then(|v| v.as_str())
.unwrap_or("*")
.to_string();
self.insert(ExternalDependency {
name: name.clone(),
version,
ecosystem: Ecosystem::Python,
});
}
}
}
}
fn parse_pyproject_toml(&mut self, content: &str) {
let mut section = String::new();
let mut collecting_project_deps = false;
let mut project_deps_buf = String::new();
for raw in content.lines() {
let line = raw.split('#').next().unwrap_or("").trim();
if line.is_empty() {
continue;
}
if line.starts_with('[') && line.ends_with(']') {
section = line.trim_matches(&['[', ']'][..]).to_lowercase();
collecting_project_deps = false;
project_deps_buf.clear();
continue;
}
if section == "project" {
if let Some(rhs) = line.strip_prefix("dependencies = ") {
if rhs.contains(']') {
for dep in parse_python_dependency_list(rhs) {
self.insert(dep);
}
} else {
collecting_project_deps = true;
project_deps_buf.push_str(rhs);
project_deps_buf.push('\n');
}
continue;
}
if collecting_project_deps {
project_deps_buf.push_str(line);
project_deps_buf.push('\n');
if line.contains(']') {
collecting_project_deps = false;
for dep in parse_python_dependency_list(&project_deps_buf) {
self.insert(dep);
}
project_deps_buf.clear();
}
}
continue;
}
if section.starts_with("tool.poetry.dependencies")
|| section.starts_with("tool.poetry.group.") && section.ends_with(".dependencies")
{
let Some((name_raw, value_raw)) = line.split_once('=') else {
continue;
};
let name = name_raw.trim().trim_matches('"').trim_matches('\'');
if name.is_empty() || name.eq_ignore_ascii_case("python") {
continue;
}
let version = value_raw
.trim()
.split(',')
.next()
.map(unquote)
.filter(|v| !v.is_empty())
.unwrap_or_else(|| "*".to_string());
self.insert(ExternalDependency {
name: name.to_string(),
version,
ecosystem: Ecosystem::Python,
});
}
}
}
fn parse_poetry_lock(&mut self, content: &str) {
let mut current_name: Option<String> = None;
let mut current_version: Option<String> = None;
for line in content.lines() {
let trimmed = line.trim();
if trimmed == "[[package]]" {
if let (Some(name), Some(version)) = (current_name.take(), current_version.take()) {
self.insert(ExternalDependency {
name,
version,
ecosystem: Ecosystem::Python,
});
}
current_name = None;
current_version = None;
continue;
}
if let Some(rest) = trimmed.strip_prefix("name = ") {
current_name = Some(unquote(rest));
} else if let Some(rest) = trimmed.strip_prefix("version = ") {
current_version = Some(unquote(rest));
}
}
if let (Some(name), Some(version)) = (current_name, current_version) {
self.insert(ExternalDependency {
name,
version,
ecosystem: Ecosystem::Python,
});
}
}
fn parse_go_sum(&mut self, content: &str) {
let mut seen = std::collections::HashSet::new();
for line in content.lines() {
let parts: Vec<&str> = line.split_whitespace().collect();
if parts.len() < 2 {
continue;
}
let module = parts[0];
let version = parts[1].trim_end_matches("/go.mod");
let key = format!("{}@{}", module, version);
if seen.contains(&key) {
continue;
}
seen.insert(key);
self.insert(ExternalDependency {
name: module.to_string(),
version: version.to_string(),
ecosystem: Ecosystem::GoModules,
});
}
}
fn parse_go_mod(&mut self, content: &str) {
let mut in_require_block = false;
for raw in content.lines() {
let line = raw.split("//").next().unwrap_or("").trim();
if line.is_empty() {
continue;
}
if line.starts_with("require (") {
in_require_block = true;
continue;
}
if in_require_block && line == ")" {
in_require_block = false;
continue;
}
if in_require_block {
if let Some((name, version)) = parse_go_requirement(line) {
self.insert(ExternalDependency {
name,
version,
ecosystem: Ecosystem::GoModules,
});
}
continue;
}
if let Some(rest) = line.strip_prefix("require ") {
if let Some((name, version)) = parse_go_requirement(rest.trim()) {
self.insert(ExternalDependency {
name,
version,
ecosystem: Ecosystem::GoModules,
});
}
}
}
}
fn parse_gemfile_lock(&mut self, content: &str) {
let mut in_specs = false;
for line in content.lines() {
let trimmed = line.trim();
if trimmed == "specs:" {
in_specs = true;
continue;
}
if in_specs && !line.starts_with(' ') && !line.starts_with('\t') && !trimmed.is_empty()
{
in_specs = false;
continue;
}
if in_specs {
let indent = line.len() - line.trim_start().len();
if indent <= 6 && trimmed.contains('(') {
if let Some((name, rest)) = trimmed.split_once('(') {
let version = rest.trim_end_matches(')').trim().to_string();
let name = name.trim().to_string();
if !name.is_empty() {
self.insert(ExternalDependency {
name,
version,
ecosystem: Ecosystem::RubyGems,
});
}
}
}
}
}
}
fn parse_composer_lock(&mut self, content: &str) {
let Ok(parsed) = serde_json::from_str::<serde_json::Value>(content) else {
return;
};
for section in &["packages", "packages-dev"] {
if let Some(pkgs) = parsed.get(section).and_then(|v| v.as_array()) {
for pkg in pkgs {
let name = pkg.get("name").and_then(|v| v.as_str()).unwrap_or("");
let version = pkg.get("version").and_then(|v| v.as_str()).unwrap_or("*");
if !name.is_empty() {
self.insert(ExternalDependency {
name: name.to_string(),
version: version.to_string(),
ecosystem: Ecosystem::Composer,
});
}
}
}
}
}
}
fn normalise_import(raw: &str) -> String {
raw.replace("::", ".")
.replace(['/', '\\', ':'], ".")
.replace("..", ".")
.trim_matches('.')
.to_lowercase()
}
fn unquote(s: &str) -> String {
s.trim().trim_matches('"').trim_matches('\'').to_string()
}
fn parse_python_dependency_list(raw: &str) -> Vec<ExternalDependency> {
let mut deps = Vec::new();
let mut item = String::new();
let mut in_quotes = false;
for ch in raw.chars() {
match ch {
'"' | '\'' => {
in_quotes = !in_quotes;
}
',' if !in_quotes => {
let parsed = item.trim();
if !parsed.is_empty() {
if let Some(dep) = parse_python_requirement_spec(parsed) {
deps.push(dep);
}
}
item.clear();
}
'[' | ']' if !in_quotes => {}
_ => item.push(ch),
}
}
let parsed = item.trim();
if !parsed.is_empty() {
if let Some(dep) = parse_python_requirement_spec(parsed) {
deps.push(dep);
}
}
deps
}
fn parse_python_requirement_spec(spec: &str) -> Option<ExternalDependency> {
let spec = spec
.trim()
.trim_matches('"')
.trim_matches('\'')
.split(';')
.next()
.unwrap_or(spec)
.trim();
if spec.is_empty() {
return None;
}
let first_constraint = spec
.find(|c: char| c.is_whitespace() || "=<>!~".contains(c))
.unwrap_or(spec.len());
let name = spec[..first_constraint]
.trim()
.split('[')
.next()
.unwrap_or("")
.trim();
if name.is_empty() {
return None;
}
let version = if first_constraint < spec.len() {
spec[first_constraint..].trim().to_string()
} else {
"*".to_string()
};
Some(ExternalDependency {
name: name.to_string(),
version,
ecosystem: Ecosystem::Python,
})
}
fn parse_go_requirement(raw: &str) -> Option<(String, String)> {
let parts: Vec<&str> = raw.split_whitespace().collect();
if parts.len() < 2 {
return None;
}
let name = parts[0].trim();
let version = parts[1].trim();
if name.is_empty() || version.is_empty() {
return None;
}
Some((name.to_string(), version.to_string()))
}
fn python_import_aliases(package_name: &str) -> Vec<String> {
let canonical = package_name.to_lowercase();
let mut aliases = vec![
normalise_import(&canonical),
normalise_import(&canonical.replace('-', "_")),
normalise_import(&canonical.replace('_', "-")),
];
if let Some(stripped) = canonical.strip_prefix("python-") {
aliases.push(normalise_import(stripped));
}
let known = [
("beautifulsoup4", "bs4"),
("pyyaml", "yaml"),
("python-dateutil", "dateutil"),
("opencv-python", "cv2"),
("scikit-learn", "sklearn"),
("scikit-image", "skimage"),
("pillow", "pil"),
("pyjwt", "jwt"),
];
for (dist, import) in known {
if canonical == dist {
aliases.push(normalise_import(import));
}
}
aliases.sort();
aliases.dedup();
aliases
}
fn is_probable_builtin_import(import_name: &str) -> bool {
let normalized = normalise_import(import_name);
if normalized.is_empty() {
return false;
}
const RUST_BUILTINS: &[&str] = &["std", "core", "alloc", "proc_macro", "test"];
const NODE_BUILTINS: &[&str] = &[
"assert",
"buffer",
"child_process",
"crypto",
"events",
"fs",
"http",
"https",
"net",
"os",
"path",
"stream",
"tls",
"url",
"util",
"worker_threads",
"zlib",
];
const PY_BUILTINS: &[&str] = &[
"abc",
"argparse",
"asyncio",
"collections",
"datetime",
"functools",
"itertools",
"json",
"logging",
"math",
"os",
"pathlib",
"re",
"subprocess",
"sys",
"time",
"typing",
"unittest",
];
const GO_BUILTINS: &[&str] = &[
"bufio",
"bytes",
"context",
"crypto",
"database.sql",
"encoding.json",
"errors",
"fmt",
"io",
"log",
"math",
"net.http",
"net.url",
"os",
"path",
"regexp",
"sort",
"strconv",
"strings",
"sync",
"testing",
"time",
];
RUST_BUILTINS
.iter()
.chain(NODE_BUILTINS.iter())
.chain(PY_BUILTINS.iter())
.chain(GO_BUILTINS.iter())
.any(|prefix| {
normalized == *prefix
|| normalized.starts_with(&format!("{}.", prefix))
|| normalized == format!("node.{}", prefix)
|| normalized.starts_with(&format!("node.{}.", prefix))
})
}
pub fn annotate_external_nodes(
pdg: &mut crate::graph::pdg::ProgramDependenceGraph,
registry: &ExternalDependencyRegistry,
) -> AnnotationStats {
use crate::graph::pdg::NodeType;
let mut stats = AnnotationStats::default();
let mut unresolved_imports = HashSet::new();
let external_nodes: Vec<crate::graph::pdg::NodeId> = pdg
.node_indices()
.filter(|&idx| {
pdg.get_node(idx)
.map(|n| {
matches!(n.node_type, NodeType::External) || n.language == "external"
})
.unwrap_or(false)
})
.collect();
stats.total_external = external_nodes.len();
for node_id in external_nodes {
let import_name = {
let Some(node) = pdg.get_node(node_id) else {
continue;
};
node.name.clone()
};
if let Some(dep) = registry.resolve(&import_name) {
stats.resolved += 1;
if let Some(node) = pdg.get_node_mut(node_id) {
node.language = format!("external:{}", dep.ecosystem);
if !dep.version.is_empty() && dep.version != "*" {
node.id = format!(
"{}@{}",
node.id.split('@').next().unwrap_or(&node.id),
dep.version
);
}
}
} else if is_probable_builtin_import(&import_name) {
stats.builtin += 1;
if let Some(node) = pdg.get_node_mut(node_id) {
node.language = "external:system".to_string();
}
} else {
unresolved_imports.insert(import_name);
}
}
stats.unresolved = unresolved_imports.len();
stats
}
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct AnnotationStats {
pub total_external: usize,
pub resolved: usize,
pub builtin: usize,
pub unresolved: usize,
}
pub fn discover_dependency_manifests(
root: &Path,
exclude_dirs: Option<&[String]>,
) -> Vec<std::path::PathBuf> {
const MANIFEST_NAMES: &[&str] = &[
"Cargo.lock",
"Cargo.toml",
"Gemfile.lock",
"Pipfile.lock",
"composer.lock",
"go.mod",
"go.sum",
"npm-shrinkwrap.json",
"package-lock.json",
"package.json",
"pnpm-lock.yaml",
"poetry.lock",
"pyproject.toml",
"requirements.txt",
"yarn.lock",
];
use crate::cli::skip_dirs::SKIP_DIRS;
let mut discovered = Vec::new();
let mut walker = walkdir::WalkDir::new(root).follow_links(false).into_iter();
while let Some(entry) = walker.next() {
let Ok(entry) = entry else {
continue;
};
let path = entry.path();
let file_name = entry.file_name().to_string_lossy();
if path != root && file_name.starts_with('.') && entry.file_type().is_dir() {
walker.skip_current_dir();
continue;
}
if entry.file_type().is_dir() {
if SKIP_DIRS.contains(&file_name.as_ref()) {
walker.skip_current_dir();
continue;
}
if let Some(excluded) = exclude_dirs {
if excluded.iter().any(|p| {
let trimmed = p.trim_matches('*').trim_matches('/');
let relative = path
.strip_prefix(root)
.ok()
.and_then(|r| r.to_str())
.unwrap_or("");
let dir_relative = if relative.ends_with('/') {
relative.to_string()
} else {
format!("{}/", relative)
};
file_name.as_ref() == trimmed
|| trimmed == relative
|| trimmed == relative.trim_end_matches('/')
|| dir_relative.starts_with(&format!("{}/", trimmed))
}) {
walker.skip_current_dir();
continue;
}
}
continue;
}
if entry.file_type().is_file() && MANIFEST_NAMES.binary_search(&file_name.as_ref()).is_ok()
{
discovered.push(path.to_path_buf());
}
}
discovered.sort_by(|a, b| {
let a_depth = a
.strip_prefix(root)
.map(|p| p.components().count())
.unwrap_or(usize::MAX);
let b_depth = b
.strip_prefix(root)
.map(|p| p.components().count())
.unwrap_or(usize::MAX);
a_depth.cmp(&b_depth).then_with(|| a.cmp(b))
});
discovered
}
#[cfg(test)]
mod tests {
use super::*;
use std::sync::Arc;
#[test]
fn parse_cargo_lock() {
let content = r#"
[[package]]
name = "serde"
version = "1.0.210"
[[package]]
name = "serde_json"
version = "1.0.128"
[[package]]
name = "tokio"
version = "1.40.0"
"#;
let mut registry = ExternalDependencyRegistry::new();
registry.parse_cargo_lock(content);
assert_eq!(registry.len(), 3);
let serde = registry.resolve("serde").unwrap();
assert_eq!(serde.name, "serde");
assert_eq!(serde.version, "1.0.210");
assert_eq!(serde.ecosystem, Ecosystem::Cargo);
let serde_json = registry.resolve("serde_json").unwrap();
assert_eq!(serde_json.name, "serde_json");
assert_eq!(serde_json.version, "1.0.128");
}
#[test]
fn parse_package_lock_json_v2() {
let content = r#"{
"packages": {
"": { "name": "my-app" },
"node_modules/react": { "version": "18.2.0" },
"node_modules/@types/react": { "version": "18.2.45" },
"node_modules/lodash": { "version": "4.17.21" }
}
}"#;
let mut registry = ExternalDependencyRegistry::new();
registry.parse_package_lock_json(content);
assert_eq!(registry.len(), 3);
let react = registry.resolve("react").unwrap();
assert_eq!(react.version, "18.2.0");
assert_eq!(react.ecosystem, Ecosystem::Npm);
let types_react = registry.resolve("@types/react").unwrap();
assert_eq!(types_react.version, "18.2.45");
}
#[test]
fn parse_package_json_dependencies() {
let content = r#"{
"dependencies": {
"react": "^18.2.0"
},
"devDependencies": {
"typescript": "^5.4.0"
}
}"#;
let mut registry = ExternalDependencyRegistry::new();
registry.parse_package_json(content);
assert!(registry.resolve("react").is_some());
assert!(registry.resolve("typescript").is_some());
}
#[test]
fn parse_yarn_lock() {
let content = r#"# yarn lockfile v1
"@babel/core@^7.0.0":
version "7.24.0"
resolved "https://registry.yarnpkg.com/@babel/core/-/core-7.24.0.tgz"
react@^18.0.0:
version "18.2.0"
resolved "https://registry.yarnpkg.com/react/-/react-18.2.0.tgz"
"#;
let mut registry = ExternalDependencyRegistry::new();
registry.parse_yarn_lock(content);
assert_eq!(registry.len(), 2);
let react = registry.resolve("react").unwrap();
assert_eq!(react.version, "18.2.0");
}
#[test]
fn parse_pnpm_lock() {
let content = r#"
/react@18.2.0:
resolution: {integrity: sha512-abc}
/@babel/core@7.24.0:
resolution: {integrity: sha512-def}
"#;
let mut registry = ExternalDependencyRegistry::new();
registry.parse_pnpm_lock(content);
assert!(registry.resolve("react").is_some());
assert!(registry.resolve("@babel/core").is_some());
}
#[test]
fn parse_requirements_txt() {
let content = r#"
numpy==1.26.0
requests>=2.31.0
flask
# comment
-e git+https://example.com
beautifulsoup4[extra]==4.12.0
"#;
let mut registry = ExternalDependencyRegistry::new();
registry.parse_requirements_txt(content);
assert_eq!(registry.len(), 4);
let numpy = registry.resolve("numpy").unwrap();
assert_eq!(numpy.version, "==1.26.0");
assert_eq!(numpy.ecosystem, Ecosystem::Python);
let flask = registry.resolve("flask").unwrap();
assert_eq!(flask.version, "*");
let bs4 = registry.resolve("beautifulsoup4").unwrap();
assert_eq!(bs4.version, "==4.12.0");
}
#[test]
fn parse_pipfile_lock() {
let content = r#"{
"_meta": {},
"default": {
"numpy": { "version": "==1.26.0" },
"requests": { "version": "==2.31.0" }
},
"develop": {
"pytest": { "version": "==7.4.0" }
}
}"#;
let mut registry = ExternalDependencyRegistry::new();
registry.parse_pipfile_lock(content);
assert_eq!(registry.len(), 3);
let numpy = registry.resolve("numpy").unwrap();
assert_eq!(numpy.version, "==1.26.0");
let pytest = registry.resolve("pytest").unwrap();
assert_eq!(pytest.version, "==7.4.0");
}
#[test]
fn parse_pyproject_toml_project_dependencies() {
let content = r#"
[project]
dependencies = [
"requests>=2.31.0",
"beautifulsoup4==4.12.0"
]
"#;
let mut registry = ExternalDependencyRegistry::new();
registry.parse_pyproject_toml(content);
assert!(registry.resolve("requests").is_some());
assert!(registry.resolve("bs4").is_some());
}
#[test]
fn parse_poetry_lock() {
let content = r#"
[[package]]
name = "fastapi"
version = "0.110.0"
[[package]]
name = "uvicorn"
version = "0.29.0"
"#;
let mut registry = ExternalDependencyRegistry::new();
registry.parse_poetry_lock(content);
assert!(registry.resolve("fastapi").is_some());
assert!(registry.resolve("uvicorn").is_some());
}
#[test]
fn python_aliases_resolve_import_names() {
let mut registry = ExternalDependencyRegistry::new();
registry.insert(ExternalDependency {
name: "python-dateutil".to_string(),
version: "2.9.0".to_string(),
ecosystem: Ecosystem::Python,
});
registry.insert(ExternalDependency {
name: "beautifulsoup4".to_string(),
version: "4.12.0".to_string(),
ecosystem: Ecosystem::Python,
});
assert!(registry.resolve("dateutil").is_some());
assert!(registry.resolve("bs4").is_some());
}
#[test]
fn parse_go_sum() {
let content = r#"github.com/gorilla/mux v1.8.1 h1:abc123
github.com/gorilla/mux v1.8.1/go.mod h1:xyz789
github.com/stretchr/testify v1.9.0 h1:def456
"#;
let mut registry = ExternalDependencyRegistry::new();
registry.parse_go_sum(content);
assert_eq!(registry.len(), 2);
let mux = registry.resolve("github.com/gorilla/mux").unwrap();
assert_eq!(mux.version, "v1.8.1");
assert_eq!(mux.ecosystem, Ecosystem::GoModules);
}
#[test]
fn parse_gemfile_lock() {
let content = r#"GEM
remote: https://rubygems.org/
specs:
rails (7.1.0)
activesupport (= 7.1.0)
activesupport (7.1.0)
minitest (5.20.0)
PLATFORMS
ruby
"#;
let mut registry = ExternalDependencyRegistry::new();
registry.parse_gemfile_lock(content);
assert!(registry.len() >= 2);
let rails = registry.resolve("rails").unwrap();
assert_eq!(rails.version, "7.1.0");
assert_eq!(rails.ecosystem, Ecosystem::RubyGems);
}
#[test]
fn parse_composer_lock() {
let content = r#"{
"packages": [
{ "name": "monolog/monolog", "version": "3.5.0" },
{ "name": "symfony/console", "version": "v6.4.0" }
],
"packages-dev": [
{ "name": "phpunit/phpunit", "version": "10.5.0" }
]
}"#;
let mut registry = ExternalDependencyRegistry::new();
registry.parse_composer_lock(content);
assert_eq!(registry.len(), 3);
let monolog = registry.resolve("monolog/monolog").unwrap();
assert_eq!(monolog.version, "3.5.0");
assert_eq!(monolog.ecosystem, Ecosystem::Composer);
}
#[test]
fn resolve_with_prefix_matching() {
let mut registry = ExternalDependencyRegistry::new();
registry.insert(ExternalDependency {
name: "serde".to_string(),
version: "1.0.210".to_string(),
ecosystem: Ecosystem::Cargo,
});
assert!(registry.resolve("serde").is_some());
assert!(registry.resolve("serde.de").is_some());
assert!(registry.resolve("serde.ser.Serializer").is_some());
assert!(registry.resolve("tokio").is_none());
}
#[test]
fn resolve_with_hyphen_underscore_normalisation() {
let mut registry = ExternalDependencyRegistry::new();
registry.insert(ExternalDependency {
name: "serde-json".to_string(),
version: "1.0.0".to_string(),
ecosystem: Ecosystem::Cargo,
});
assert!(registry.resolve("serde_json").is_some());
assert!(registry.resolve("serde-json").is_some());
}
#[test]
fn resolve_normalises_import_path() {
let mut registry = ExternalDependencyRegistry::new();
registry.insert(ExternalDependency {
name: "github.com/gorilla/mux".to_string(),
version: "v1.8.1".to_string(),
ecosystem: Ecosystem::GoModules,
});
assert!(registry.resolve("github.com/gorilla/mux").is_some());
assert!(registry.resolve("github.com.gorilla.mux").is_some());
}
#[test]
fn annotate_external_nodes_works() {
use crate::graph::pdg::{Node, NodeType, ProgramDependenceGraph};
let mut pdg = ProgramDependenceGraph::new();
let ext_id = pdg.add_node(Node {
id: "serde".to_string(),
name: "serde".to_string(),
node_type: NodeType::External,
file_path: Arc::from(""),
byte_range: (0, 0),
language: "external".to_string(),
complexity: 0,
});
let internal_id = pdg.add_node(Node {
id: "my_func".to_string(),
name: "my_func".to_string(),
node_type: NodeType::Function,
file_path: Arc::from("src/lib.rs"),
byte_range: (0, 100),
language: "rust".to_string(),
complexity: 5,
});
let mut registry = ExternalDependencyRegistry::new();
registry.insert(ExternalDependency {
name: "serde".to_string(),
version: "1.0.210".to_string(),
ecosystem: Ecosystem::Cargo,
});
let stats = annotate_external_nodes(&mut pdg, ®istry);
assert_eq!(stats.total_external, 1);
assert_eq!(stats.resolved, 1);
assert_eq!(stats.unresolved, 0);
let node = pdg.get_node(ext_id).unwrap();
assert_eq!(node.language, "external:cargo");
assert!(node.id.contains("1.0.210"));
let internal = pdg.get_node(internal_id).unwrap();
assert_eq!(internal.language, "rust");
}
#[test]
fn from_project_empty_dir_returns_empty() {
let dir = tempfile::tempdir().expect("tempdir");
let registry = ExternalDependencyRegistry::from_project(dir.path());
assert_eq!(registry.len(), 0);
}
#[test]
fn from_project_with_cargo_lock() {
let dir = tempfile::tempdir().expect("tempdir");
std::fs::write(
dir.path().join("Cargo.lock"),
"[[package]]\nname = \"anyhow\"\nversion = \"1.0.86\"\n",
)
.expect("write");
let registry = ExternalDependencyRegistry::from_project(dir.path());
assert_eq!(registry.len(), 1);
assert!(registry.resolve("anyhow").is_some());
}
#[test]
fn from_project_with_cargo_toml_fallback() {
let dir = tempfile::tempdir().expect("tempdir");
std::fs::write(
dir.path().join("Cargo.toml"),
"[package]\nname = \"demo\"\nversion = \"0.1.0\"\n\n[dependencies]\nserde = \"1.0\"\n",
)
.expect("write");
let registry = ExternalDependencyRegistry::from_project(dir.path());
assert!(registry.resolve("serde").is_some());
}
#[test]
fn from_project_with_requirements_txt() {
let dir = tempfile::tempdir().expect("tempdir");
std::fs::write(
dir.path().join("requirements.txt"),
"numpy==1.26.0\nrequests>=2.31.0\n",
)
.expect("write");
let registry = ExternalDependencyRegistry::from_project(dir.path());
assert_eq!(registry.len(), 2);
assert!(registry.resolve("numpy").is_some());
assert!(registry.resolve("requests").is_some());
}
#[test]
fn from_project_with_package_json() {
let dir = tempfile::tempdir().expect("tempdir");
std::fs::write(
dir.path().join("package.json"),
r#"{"dependencies":{"react":"^18.2.0"}}"#,
)
.expect("write");
let registry = ExternalDependencyRegistry::from_project(dir.path());
assert!(registry.resolve("react").is_some());
}
#[test]
fn from_project_discovers_nested_workspace_manifests() {
let dir = tempfile::tempdir().expect("tempdir");
let nested = dir.path().join("packages").join("web");
std::fs::create_dir_all(&nested).expect("mkdir");
std::fs::write(
nested.join("package.json"),
r#"{"dependencies":{"react":"^18.2.0","zod":"^3.23.8"}}"#,
)
.expect("write");
let registry = ExternalDependencyRegistry::from_project(dir.path());
assert!(registry.resolve("react").is_some());
assert!(registry.resolve("zod").is_some());
}
#[test]
fn from_project_with_go_mod() {
let dir = tempfile::tempdir().expect("tempdir");
std::fs::write(
dir.path().join("go.mod"),
"module demo\n\nrequire github.com/gorilla/mux v1.8.1\n",
)
.expect("write");
let registry = ExternalDependencyRegistry::from_project(dir.path());
assert!(registry.resolve("github.com/gorilla/mux").is_some());
}
#[test]
fn annotate_external_nodes_marks_builtin_modules() {
use crate::graph::pdg::{Node, NodeType, ProgramDependenceGraph};
let mut pdg = ProgramDependenceGraph::new();
let ext_id = pdg.add_node(Node {
id: "std".to_string(),
name: "std".to_string(),
node_type: NodeType::External,
file_path: Arc::from(""),
byte_range: (0, 0),
language: "external".to_string(),
complexity: 0,
});
let registry = ExternalDependencyRegistry::new();
let stats = annotate_external_nodes(&mut pdg, ®istry);
assert_eq!(stats.total_external, 1);
assert_eq!(stats.resolved, 0);
assert_eq!(stats.builtin, 1);
assert_eq!(stats.unresolved, 0);
assert_eq!(pdg.get_node(ext_id).unwrap().language, "external:system");
}
#[test]
fn annotate_external_nodes_deduplicates_unresolved_import_names() {
use crate::graph::pdg::{Node, NodeType, ProgramDependenceGraph};
let mut pdg = ProgramDependenceGraph::new();
pdg.add_node(Node {
id: "src/main.rs:__external__:react".to_string(),
name: "react".to_string(),
node_type: NodeType::External,
file_path: Arc::from("src/main.rs"),
byte_range: (0, 0),
language: "external".to_string(),
complexity: 0,
});
pdg.add_node(Node {
id: "src/app.ts:__external__:react".to_string(),
name: "react".to_string(),
node_type: NodeType::External,
file_path: Arc::from("src/app.ts"),
byte_range: (0, 0),
language: "external".to_string(),
complexity: 0,
});
let registry = ExternalDependencyRegistry::new();
let stats = annotate_external_nodes(&mut pdg, ®istry);
assert_eq!(stats.total_external, 2);
assert_eq!(stats.unresolved, 1);
}
}