use crate::fingerprint::fingerprint;
use crate::known::{normalize_dist, Known};
use camino::Utf8Path;
use mollify_graph::ModuleGraph;
use mollify_types::{Action, Category, Confidence, Finding, Location, Severity};
use rustc_hash::FxHashSet;
pub fn analyze(root: &Utf8Path, graph: &ModuleGraph) -> Vec<Finding> {
let mut findings = Vec::new();
let pyproject_path = root.join("pyproject.toml");
let mut declared = FxHashSet::default();
let mut dev_only: FxHashSet<String> = FxHashSet::default();
let mut manifest = pyproject_path.clone();
let mut has_manifest = false;
if let Ok(text) = std::fs::read_to_string(&pyproject_path) {
has_manifest = true;
if let Ok(table) = text.parse::<toml::Table>() {
let val = toml::Value::Table(table);
declared.extend(declared_dependencies(&val));
let prod = prod_dependencies(&val);
for d in dev_dependencies(&val) {
if !prod.contains(&d) {
dev_only.insert(d);
}
}
}
}
for entry in std::fs::read_dir(root).into_iter().flatten().flatten() {
let fname = entry.file_name();
let fname = fname.to_string_lossy();
if fname.starts_with("requirements") && fname.ends_with(".txt") {
if let Ok(text) = std::fs::read_to_string(entry.path()) {
let before = declared.len();
for line in text.lines() {
let line = line.split('#').next().unwrap_or("").trim();
if line.is_empty() || line.starts_with('-') {
continue;
}
if let Some(name) = spec_name(line) {
declared.insert(name);
}
}
has_manifest = true;
if declared.len() > before && !pyproject_path.exists() {
if let Ok(p) = camino::Utf8PathBuf::from_path_buf(entry.path()) {
manifest = p;
}
}
}
}
}
let pyproject_path = manifest;
if !has_manifest {
return findings;
}
let known = Known::new();
let internal_tops = internal_top_levels(graph);
let installed = crate::installed::discover(root);
let used_dists = used_distributions(graph, &known, &internal_tops, installed.as_ref());
let confidence = if graph.global_dynamic {
Confidence::Uncertain
} else {
Confidence::Likely
};
for dist in &declared {
if dist == "python" {
continue;
}
if !used_dists.contains(dist) {
let rule = "unused-dependency";
findings.push(Finding {
fingerprint: fingerprint(rule, &[dist]),
rule: rule.into(),
category: Category::DependencyHygiene,
severity: Severity::Warn,
confidence,
attribution: None,
reason: format!("declared dependency `{dist}` is never imported"),
location: Location {
path: pyproject_path.clone(),
line: 1,
column: 0,
end_line: None,
},
actions: vec![Action {
kind: "remove-dependency".into(),
description: format!("Remove unused dependency `{dist}` from pyproject.toml"),
auto_fixable: false,
suppression_comment: Some(format!("# mollify: ignore[{rule}]")),
}],
});
}
}
for dist in &used_dists {
if declared.contains(dist) {
continue;
}
let is_transitive = installed.as_ref().is_some_and(|i| i.dists.contains(dist));
let (rule, reason, action) = if is_transitive {
(
"transitive-dependency",
format!("`{dist}` is imported and installed, but only as a transitive dependency — declare it directly"),
format!("Add `{dist}` to your direct dependencies (currently transitive)"),
)
} else {
(
"missing-dependency",
format!("`{dist}` is imported but not declared in the project manifest"),
format!("Add `{dist}` to project dependencies"),
)
};
findings.push(Finding {
fingerprint: fingerprint(rule, &[dist]),
rule: rule.into(),
category: Category::DependencyHygiene,
severity: Severity::Warn,
confidence,
attribution: None,
reason,
location: Location {
path: pyproject_path.clone(),
line: 1,
column: 0,
end_line: None,
},
actions: vec![Action {
kind: "add-dependency".into(),
description: action,
auto_fixable: false,
suppression_comment: Some(format!("# mollify: ignore[{rule}]")),
}],
});
}
if !dev_only.is_empty() {
let mut seen: FxHashSet<String> = FxHashSet::default();
for m in &graph.modules {
if is_test_module(&m.path) {
continue;
}
for dist in module_imported_dists(m, &known, &internal_tops, installed.as_ref()) {
if !dev_only.contains(&dist) || !seen.insert(dist.clone()) {
continue;
}
let rule = "misplaced-dev-dependency";
findings.push(Finding {
fingerprint: fingerprint(rule, &[&dist]),
rule: rule.into(),
category: Category::DependencyHygiene,
severity: Severity::Warn,
confidence,
attribution: None,
reason: format!(
"`{dist}` is declared only as a dev dependency but is imported by production module `{}`",
m.dotted
),
location: Location {
path: pyproject_path.clone(),
line: 1,
column: 0,
end_line: None,
},
actions: vec![Action {
kind: "move-dependency".into(),
description: format!(
"Move `{dist}` from the dev group to runtime dependencies"
),
auto_fixable: false,
suppression_comment: Some(format!("# mollify: ignore[{rule}]")),
}],
});
}
}
}
findings
}
pub fn unresolved(graph: &ModuleGraph) -> Vec<Finding> {
let mut findings = Vec::new();
for u in graph.unresolved_imports() {
let rule = "unresolved-import";
let confidence = if u.relative {
Confidence::Certain
} else {
Confidence::Likely
};
let kind = if u.relative {
"relative"
} else {
"first-party"
};
findings.push(Finding {
fingerprint: fingerprint(
rule,
&[u.importer.as_str(), &u.line.to_string(), &u.display],
),
rule: rule.into(),
category: Category::DependencyHygiene,
severity: Severity::Warn,
confidence,
attribution: None,
reason: format!(
"{kind} import `{}` does not resolve to any module in the project",
u.display
),
location: Location {
path: u.importer.clone(),
line: u.line,
column: 0,
end_line: None,
},
actions: vec![Action {
kind: "fix-import".into(),
description: format!(
"Fix or remove the broken import `{}` (check the module path / refactor)",
u.display
),
auto_fixable: false,
suppression_comment: Some(format!("# mollify: ignore[{rule}]")),
}],
});
}
findings
}
fn declared_dependencies(value: &toml::Value) -> FxHashSet<String> {
let mut set = FxHashSet::default();
if let Some(arr) = value
.get("project")
.and_then(|p| p.get("dependencies"))
.and_then(|d| d.as_array())
{
for item in arr {
if let Some(s) = item.as_str() {
if let Some(name) = spec_name(s) {
set.insert(name);
}
}
}
}
for key in ["optional-dependencies"] {
if let Some(tbl) = value
.get("project")
.and_then(|p| p.get(key))
.and_then(|t| t.as_table())
{
for (_group, arr) in tbl {
if let Some(arr) = arr.as_array() {
for item in arr {
if let Some(s) = item.as_str() {
if let Some(name) = spec_name(s) {
set.insert(name);
}
}
}
}
}
}
}
if let Some(tbl) = value.get("dependency-groups").and_then(|t| t.as_table()) {
for (_g, arr) in tbl {
if let Some(arr) = arr.as_array() {
for item in arr {
if let Some(s) = item.as_str() {
if let Some(name) = spec_name(s) {
set.insert(name);
}
}
}
}
}
}
if let Some(tbl) = value
.get("tool")
.and_then(|t| t.get("poetry"))
.and_then(|p| p.get("dependencies"))
.and_then(|d| d.as_table())
{
for name in tbl.keys() {
set.insert(normalize_dist(name));
}
}
if let Some(groups) = value
.get("tool")
.and_then(|t| t.get("poetry"))
.and_then(|p| p.get("group"))
.and_then(|g| g.as_table())
{
for (_g, gv) in groups {
if let Some(tbl) = gv.get("dependencies").and_then(|d| d.as_table()) {
for name in tbl.keys() {
set.insert(normalize_dist(name));
}
}
}
}
if let Some(tbl) = value
.get("tool")
.and_then(|t| t.get("poetry"))
.and_then(|p| p.get("dev-dependencies"))
.and_then(|d| d.as_table())
{
for name in tbl.keys() {
set.insert(normalize_dist(name));
}
}
if let Some(arr) = value
.get("tool")
.and_then(|t| t.get("uv"))
.and_then(|u| u.get("dev-dependencies"))
.and_then(|d| d.as_array())
{
for item in arr {
if let Some(name) = item.as_str().and_then(spec_name) {
set.insert(name);
}
}
}
if let Some(tbl) = value
.get("tool")
.and_then(|t| t.get("pdm"))
.and_then(|p| p.get("dev-dependencies"))
.and_then(|d| d.as_table())
{
for (_g, arr) in tbl {
if let Some(arr) = arr.as_array() {
for item in arr {
if let Some(name) = item.as_str().and_then(spec_name) {
set.insert(name);
}
}
}
}
}
set
}
fn spec_name(spec: &str) -> Option<String> {
let end = spec
.find(|c: char| " <>=!~;[(".contains(c))
.unwrap_or(spec.len());
let name = spec[..end].trim();
if name.is_empty() {
None
} else {
Some(normalize_dist(name))
}
}
fn internal_top_levels(graph: &ModuleGraph) -> FxHashSet<String> {
let mut set = FxHashSet::default();
for m in &graph.modules {
if let Some(first) = m.dotted.split('.').next() {
if !first.is_empty() {
set.insert(first.to_string());
}
}
}
set
}
fn used_distributions(
graph: &ModuleGraph,
known: &Known,
internal: &FxHashSet<String>,
installed: Option<&crate::installed::Installed>,
) -> FxHashSet<String> {
let mut set = FxHashSet::default();
for m in &graph.modules {
for imp in &m.parsed.imports {
if imp.relative_dots > 0 {
continue; }
let Some(top) = imp.module.split('.').next() else {
continue;
};
if top.is_empty() || internal.contains(top) || known.is_stdlib(top) {
continue;
}
let dist = installed
.and_then(|i| i.import_to_dist.get(top).cloned())
.unwrap_or_else(|| known.dist_for_import(top));
set.insert(dist);
}
}
set
}
fn dev_dependencies(value: &toml::Value) -> FxHashSet<String> {
let mut set = FxHashSet::default();
let add_spec_array = |arr: &toml::Value, set: &mut FxHashSet<String>| {
if let Some(arr) = arr.as_array() {
for item in arr {
if let Some(name) = item.as_str().and_then(spec_name) {
set.insert(name);
}
}
}
};
if let Some(tbl) = value.get("dependency-groups").and_then(|t| t.as_table()) {
for (_g, arr) in tbl {
add_spec_array(arr, &mut set);
}
}
if let Some(groups) = value
.get("tool")
.and_then(|t| t.get("poetry"))
.and_then(|p| p.get("group"))
.and_then(|g| g.as_table())
{
for (_g, gv) in groups {
if let Some(tbl) = gv.get("dependencies").and_then(|d| d.as_table()) {
for name in tbl.keys() {
set.insert(normalize_dist(name));
}
}
}
}
if let Some(tbl) = value
.get("tool")
.and_then(|t| t.get("poetry"))
.and_then(|p| p.get("dev-dependencies"))
.and_then(|d| d.as_table())
{
for name in tbl.keys() {
set.insert(normalize_dist(name));
}
}
if let Some(arr) = value
.get("tool")
.and_then(|t| t.get("uv"))
.and_then(|u| u.get("dev-dependencies"))
{
add_spec_array(arr, &mut set);
}
if let Some(tbl) = value
.get("tool")
.and_then(|t| t.get("pdm"))
.and_then(|p| p.get("dev-dependencies"))
.and_then(|d| d.as_table())
{
for (_g, arr) in tbl {
add_spec_array(arr, &mut set);
}
}
set
}
fn prod_dependencies(value: &toml::Value) -> FxHashSet<String> {
let mut set = FxHashSet::default();
if let Some(arr) = value
.get("project")
.and_then(|p| p.get("dependencies"))
.and_then(|d| d.as_array())
{
for item in arr {
if let Some(name) = item.as_str().and_then(spec_name) {
set.insert(name);
}
}
}
if let Some(tbl) = value
.get("tool")
.and_then(|t| t.get("poetry"))
.and_then(|p| p.get("dependencies"))
.and_then(|d| d.as_table())
{
for name in tbl.keys() {
set.insert(normalize_dist(name));
}
}
set
}
fn module_imported_dists(
m: &mollify_graph::ModuleInfo,
known: &Known,
internal: &FxHashSet<String>,
installed: Option<&crate::installed::Installed>,
) -> FxHashSet<String> {
let mut set = FxHashSet::default();
for imp in &m.parsed.imports {
if imp.relative_dots > 0 {
continue;
}
let Some(top) = imp.module.split('.').next() else {
continue;
};
if top.is_empty() || internal.contains(top) || known.is_stdlib(top) {
continue;
}
let dist = installed
.and_then(|i| i.import_to_dist.get(top).cloned())
.unwrap_or_else(|| known.dist_for_import(top));
set.insert(dist);
}
set
}
fn is_test_module(path: &Utf8Path) -> bool {
let p = path.as_str();
let name = path.file_name().unwrap_or("");
p.contains("/tests/")
|| p.contains("/test/")
|| p.starts_with("tests/")
|| p.starts_with("test/")
|| name.starts_with("test_")
|| name.ends_with("_test.py")
|| name == "conftest.py"
}
#[cfg(test)]
mod tests {
use super::*;
use camino::Utf8PathBuf;
use mollify_graph::discover_python_files;
fn temp(tag: &str) -> Utf8PathBuf {
let base =
std::env::temp_dir().join(format!("mollify-core-deps-{}-{tag}", std::process::id()));
let _ = std::fs::remove_dir_all(&base);
std::fs::create_dir_all(&base).unwrap();
Utf8PathBuf::from_path_buf(base).unwrap()
}
#[test]
fn flags_misplaced_dev_dependency_used_in_prod() {
let d = temp("devdep");
std::fs::write(
d.join("pyproject.toml"),
"[project]\nname = \"x\"\ndependencies = [\"requests\"]\n\n\
[dependency-groups]\ndev = [\"pytest\"]\n",
)
.unwrap();
std::fs::write(d.join("app.py"), "import requests\nimport pytest\n").unwrap();
std::fs::create_dir_all(d.join("tests")).unwrap();
std::fs::write(d.join("tests/test_app.py"), "import pytest\n").unwrap();
let files = discover_python_files(&d);
let g = ModuleGraph::build(&d, &files);
let f = analyze(&d, &g);
let mis: Vec<_> = f
.iter()
.filter(|x| x.rule == "misplaced-dev-dependency")
.collect();
assert_eq!(mis.len(), 1, "expected one misplaced dep, got {f:?}");
assert!(mis[0].reason.contains("pytest") && mis[0].reason.contains("app"));
std::fs::remove_dir_all(&d).ok();
}
#[test]
fn flags_unresolved_relative_and_firstparty_imports() {
let d = temp("unresolved");
std::fs::write(d.join("__init__.py"), "").unwrap();
std::fs::write(
d.join("app.py"),
"from .missing_mod import thing\nimport app.nope\nimport os\nfrom .real import x\n",
)
.unwrap();
std::fs::write(d.join("real.py"), "x = 1\n").unwrap();
let files = discover_python_files(&d);
let g = ModuleGraph::build(&d, &files);
let f = unresolved(&g);
let rel = f
.iter()
.find(|x| x.reason.contains("missing_mod"))
.expect("relative unresolved");
assert_eq!(rel.confidence, Confidence::Certain);
assert!(f
.iter()
.any(|x| x.reason.contains("app.nope") && x.confidence == Confidence::Likely));
assert!(!f.iter().any(|x| x.reason.contains("`os`")));
assert!(!f.iter().any(|x| x.reason.contains(".real")));
std::fs::remove_dir_all(&d).ok();
}
#[test]
fn detects_unused_and_missing() {
let d = temp("mix");
std::fs::write(
d.join("pyproject.toml"),
"[project]\nname = \"x\"\ndependencies = [\"requests>=2\", \"unused-lib\"]\n",
)
.unwrap();
std::fs::write(
d.join("app.py"),
"import requests\nimport numpy\nimport os\nrequests.get('x')\nnumpy.array([])\n",
)
.unwrap();
let files = discover_python_files(&d);
let g = ModuleGraph::build(&d, &files);
let f = analyze(&d, &g);
assert!(
f.iter()
.any(|x| x.rule == "unused-dependency" && x.reason.contains("unused-lib")),
"expected unused-lib, got {f:?}"
);
assert!(
f.iter()
.any(|x| x.rule == "missing-dependency" && x.reason.contains("numpy")),
"expected missing numpy, got {f:?}"
);
assert!(!f.iter().any(|x| x.reason.contains("requests")));
assert!(!f.iter().any(|x| x.reason.contains("`os`")));
std::fs::remove_dir_all(&d).ok();
}
#[test]
fn legacy_poetry_dev_dependencies_count_as_declared() {
let d = temp("poetry-legacy");
std::fs::write(
d.join("pyproject.toml"),
"[tool.poetry]\nname = \"x\"\n\n\
[tool.poetry.dependencies]\npython = \"^3.10\"\nrequests = \"2.31.0\"\n\n\
[tool.poetry.dev-dependencies]\nblack = \"24.0.0\"\n",
)
.unwrap();
std::fs::write(
d.join("app.py"),
"import black\nimport requests\nblack.format_str('x')\nrequests.get('y')\n",
)
.unwrap();
let files = discover_python_files(&d);
let g = ModuleGraph::build(&d, &files);
let f = analyze(&d, &g);
assert!(
!f.iter().any(|x| matches!(
x.rule.as_str(),
"missing-dependency" | "unused-dependency"
) && x.reason.contains("black")),
"black is declared (legacy dev-deps) → not missing/unused, got {f:?}"
);
assert!(
f.iter()
.any(|x| x.rule == "misplaced-dev-dependency" && x.reason.contains("black")),
"black (dev-only) imported in prod → misplaced, got {f:?}"
);
assert!(!f.iter().any(|x| x.reason.contains("requests")));
std::fs::remove_dir_all(&d).ok();
}
#[test]
fn transitive_when_installed_but_undeclared() {
let d = temp("trans");
std::fs::write(
d.join("pyproject.toml"),
"[project]\nname = \"x\"\ndependencies = []\n",
)
.unwrap();
std::fs::write(d.join("app.py"), "import requests\nrequests.get('x')\n").unwrap();
let sp = d.join(".venv/lib/python3.11/site-packages/requests-2.31.0.dist-info");
std::fs::create_dir_all(&sp).unwrap();
std::fs::write(sp.join("METADATA"), "Name: requests\n").unwrap();
std::fs::write(sp.join("top_level.txt"), "requests\n").unwrap();
let files = discover_python_files(&d);
let g = ModuleGraph::build(&d, &files);
let f = analyze(&d, &g);
assert!(
f.iter()
.any(|x| x.rule == "transitive-dependency" && x.reason.contains("requests")),
"got {f:?}"
);
std::fs::remove_dir_all(&d).ok();
}
#[test]
fn reads_requirements_txt_when_no_pyproject() {
let d = temp("req");
std::fs::write(
d.join("requirements.txt"),
"requests==2.0\nunused-lib==1.0\n",
)
.unwrap();
std::fs::write(
d.join("app.py"),
"import requests\nimport numpy\nrequests.get('x')\nnumpy.array([])\n",
)
.unwrap();
let files = discover_python_files(&d);
let g = ModuleGraph::build(&d, &files);
let f = analyze(&d, &g);
assert!(
f.iter()
.any(|x| x.rule == "unused-dependency" && x.reason.contains("unused-lib")),
"got {f:?}"
);
assert!(
f.iter()
.any(|x| x.rule == "missing-dependency" && x.reason.contains("numpy")),
"got {f:?}"
);
std::fs::remove_dir_all(&d).ok();
}
#[test]
fn spec_name_strips_versions_and_extras() {
assert_eq!(
spec_name("uvicorn[standard]>=0.20").as_deref(),
Some("uvicorn")
);
assert_eq!(spec_name("Flask_Login").as_deref(), Some("flask-login"));
}
}