Skip to main content

mollify_core/
lib.rs

1//! # mollify-core
2//!
3//! Analysis orchestration. Builds the graph, runs the engines, and assembles the
4//! kind-discriminated [`mollify_types::Report`] envelopes. Engines: dead-code,
5//! dependency hygiene, architecture (cycles/layers/contracts/policies),
6//! complexity + hotspots, duplication, type-health, security, cohesion,
7//! commented-code, coverage, and supply-chain — all folded into `audit`.
8
9use camino::Utf8Path;
10use mollify_graph::{discover_python_files_excluding, ModuleGraph};
11use mollify_types::{
12    sort_findings, AuditReport, Category, Finding, FindingsReport, Report, Severity, Summary,
13    SCHEMA_VERSION,
14};
15
16pub mod agents;
17pub mod apihygiene;
18pub mod arch;
19pub mod baseline;
20pub mod cohesion;
21pub mod commented;
22pub mod complexity;
23pub mod config;
24pub mod coverage;
25pub mod deadcode;
26pub mod deps;
27pub mod dupes;
28pub mod explain;
29pub mod fingerprint;
30pub mod fix;
31pub mod git;
32pub mod hotspots;
33pub mod installed;
34pub mod known;
35pub mod members;
36pub mod metrics;
37pub mod plugins;
38pub mod policy;
39pub mod sarif;
40pub mod security;
41pub mod suffix;
42pub mod supplychain;
43pub mod trace;
44pub mod typehealth;
45pub mod version;
46
47/// Build the graph for a project root once, to be shared across engines.
48/// Honors `.mollifyrc.json`'s `exclude_dirs` in addition to the builtin
49/// discovery denylist (VCS metadata, virtualenvs, build/cache output).
50pub fn build_graph(root: &Utf8Path) -> ModuleGraph {
51    let cfg = config::load(root);
52    let files = discover_python_files_excluding(root, &cfg.exclude_dirs);
53    ModuleGraph::build(root, &files)
54}
55
56/// Sort, apply inline `# mollify: ignore[...]` suppressions and `.mollifyrc`
57/// (severity overrides + ignore), then summarize.
58fn finalize(
59    cfg: &config::Config,
60    graph: &ModuleGraph,
61    mut findings: Vec<Finding>,
62) -> FindingsReport {
63    apply_suppressions(graph, &mut findings);
64    config::apply(cfg, &mut findings);
65    sort_findings(&mut findings);
66    FindingsReport {
67        schema_version: SCHEMA_VERSION.into(),
68        summary: Summary::from_findings(&findings, graph.modules.len()),
69        findings,
70    }
71}
72
73/// Drop findings silenced by an inline `# mollify: ignore[<rule>]` comment on
74/// the finding's line (or a bare `# mollify: ignore` matching any rule).
75pub fn apply_suppressions(graph: &ModuleGraph, findings: &mut Vec<Finding>) {
76    use rustc_hash::FxHashMap;
77    // (path, line) -> set of suppressed rules ("*" = all).
78    let mut sup: FxHashMap<(&str, u32), Vec<&str>> = FxHashMap::default();
79    for m in &graph.modules {
80        for (line, rule) in &m.parsed.ignores {
81            sup.entry((m.path.as_str(), *line))
82                .or_default()
83                .push(rule.as_str());
84        }
85    }
86    if sup.is_empty() {
87        return;
88    }
89    findings.retain(|f| {
90        if let Some(rules) = sup.get(&(f.location.path.as_str(), f.location.line)) {
91            !rules.iter().any(|r| *r == "*" || *r == f.rule)
92        } else {
93            true
94        }
95    });
96}
97
98/// `mollify dead-code` — reachability-based unused files/symbols.
99pub fn dead_code_report(root: &Utf8Path) -> FindingsReport {
100    let graph = build_graph(root);
101    let mut findings = deadcode::analyze(&graph);
102    findings.extend(members::analyze(&graph));
103    findings.extend(commented::analyze(&graph));
104    finalize(&config::load(root), &graph, findings)
105}
106
107/// `mollify deps` — dependency hygiene.
108pub fn deps_report(root: &Utf8Path) -> FindingsReport {
109    let graph = build_graph(root);
110    let mut findings = deps::analyze(root, &graph);
111    findings.extend(deps::unresolved(&graph));
112    finalize(&config::load(root), &graph, findings)
113}
114
115/// `mollify arch` — circular dependencies (boundary presets later).
116pub fn arch_report(root: &Utf8Path) -> FindingsReport {
117    let graph = build_graph(root);
118    let cfg = config::load(root);
119    let mut findings = arch::analyze(&graph);
120    findings.extend(arch::analyze_layers(&graph, &cfg.arch_layers));
121    findings.extend(arch::analyze_contracts(&graph, &cfg.contracts));
122    findings.extend(arch::private_imports(&graph));
123    findings.extend(policy::analyze(&graph, &cfg.policies));
124    finalize(&cfg, &graph, findings)
125}
126
127/// `mollify complexity` / `mollify health` — complexity hotspots.
128pub fn complexity_report(root: &Utf8Path) -> FindingsReport {
129    let graph = build_graph(root);
130    let cfg = config::load(root);
131    let mut findings = complexity::analyze_with(&graph, cfg.max_cyclomatic, cfg.max_cognitive);
132    findings.extend(hotspots::analyze(root, &graph));
133    findings.extend(cohesion::analyze(&graph));
134    finalize(&cfg, &graph, findings)
135}
136
137/// `mollify dupes` — duplication / clone families.
138pub fn dupes_report(root: &Utf8Path) -> FindingsReport {
139    let graph = build_graph(root);
140    let cfg = config::load(root);
141    let findings = dupes::analyze_with(&graph, cfg.dup_min_tokens, cfg.dup_min_lines);
142    finalize(&cfg, &graph, findings)
143}
144
145/// `mollify types` — type-annotation health + API-hygiene (private-type leaks).
146pub fn types_report(root: &Utf8Path) -> FindingsReport {
147    let graph = build_graph(root);
148    let mut findings = typehealth::analyze(&graph);
149    findings.extend(apihygiene::analyze(&graph));
150    finalize(&config::load(root), &graph, findings)
151}
152
153/// `mollify security` — security candidates (deterministic; review before acting).
154pub fn security_report(root: &Utf8Path) -> FindingsReport {
155    let graph = build_graph(root);
156    finalize(&config::load(root), &graph, security::analyze(&graph))
157}
158
159/// `mollify coverage` — cold-path analysis from a coverage.py JSON report.
160pub fn coverage_report(root: &Utf8Path, coverage_path: &Utf8Path) -> FindingsReport {
161    let graph = build_graph(root);
162    let findings = coverage::analyze(root, &graph, coverage_path);
163    finalize(&config::load(root), &graph, findings)
164}
165
166/// `mollify supply-chain` — match pinned/locked dependency versions against a
167/// local advisory database (`vulnerable-dependency`). The DB is an input file,
168/// so analysis stays deterministic and offline.
169pub fn supply_chain_report(root: &Utf8Path, db_path: &Utf8Path) -> FindingsReport {
170    let advisories = supplychain::load_db(db_path).unwrap_or_default();
171    supply_chain_report_with(root, &advisories)
172}
173
174/// Like [`supply_chain_report`] but against an already-loaded advisory set (e.g.
175/// fetched live by the CLI). Keeps the network out of `mollify-core`.
176pub fn supply_chain_report_with(
177    root: &Utf8Path,
178    advisories: &[supplychain::Advisory],
179) -> FindingsReport {
180    let graph = build_graph(root);
181    let findings = supplychain::analyze(root, advisories);
182    finalize(&config::load(root), &graph, findings)
183}
184
185/// The default advisory DB path checked by `audit` when present.
186pub const DEFAULT_ADVISORY_DB: &str = ".mollify/advisories.json";
187
188/// A per-file evidence bundle: the matched module, its findings, and its import
189/// neighborhood. Shared by `mollify inspect` (CLI) and the `mollify_inspect`
190/// MCP tool.
191pub struct Inspection {
192    pub file: String,
193    pub module: Option<String>,
194    pub findings: Vec<Finding>,
195    pub imports: Vec<String>,
196    pub imported_by: Vec<String>,
197}
198
199/// Returns true if `path` matches the user's `file` argument (exact, or as a
200/// trailing path fragment).
201fn path_matches(path: &str, file: &str) -> bool {
202    path == file || path.ends_with(file) || path.ends_with(&format!("/{file}"))
203}
204
205/// Build the evidence bundle for a single file.
206pub fn inspect(root: &Utf8Path, file: &str) -> Inspection {
207    let report = audit_report(root);
208    let findings: Vec<Finding> = report
209        .findings
210        .into_iter()
211        .filter(|f| path_matches(f.location.path.as_str(), file))
212        .collect();
213    let graph = build_graph(root);
214    let module = graph
215        .modules
216        .iter()
217        .find(|m| path_matches(m.path.as_str(), file))
218        .map(|m| m.dotted.clone());
219    let trace = module.as_deref().and_then(|d| trace::module(&graph, d));
220    Inspection {
221        file: file.to_string(),
222        module,
223        findings,
224        imports: trace
225            .as_ref()
226            .map(|t| t.imports.clone())
227            .unwrap_or_default(),
228        imported_by: trace
229            .as_ref()
230            .map(|t| t.imported_by.clone())
231            .unwrap_or_default(),
232    }
233}
234
235/// File-local diagnostics from an in-memory buffer (no disk, no graph) — the
236/// live LSP path for `textDocument/didChange`. Covers the intra-file rules
237/// (security, unused variables/parameters, complexity, commented-out code);
238/// cross-file rules (dead exports, deps, architecture) are produced by the full
239/// audit on save. Returns sorted findings, honoring inline suppressions.
240pub fn analyze_text(path: &Utf8Path, source: &str) -> Vec<Finding> {
241    let mut parser = match mollify_parse::PyParser::new() {
242        Ok(p) => p,
243        Err(_) => return Vec::new(),
244    };
245    let Ok(parsed) = parser.parse(path, source) else {
246        return Vec::new();
247    };
248    let mut findings = Vec::new();
249    findings.extend(security::analyze_parsed(path, &parsed));
250    findings.extend(commented::analyze_source(path, source));
251    // Unused local variables / parameters.
252    for s in &parsed.scope_findings {
253        let (rule, kind, confidence) = if s.is_param {
254            (
255                "unused-parameter",
256                "parameter",
257                mollify_types::Confidence::Uncertain,
258            )
259        } else {
260            (
261                "unused-variable",
262                "local variable",
263                mollify_types::Confidence::Likely,
264            )
265        };
266        findings.push(Finding {
267            fingerprint: fingerprint::fingerprint(
268                rule,
269                &[path.as_str(), &s.name, &s.line.to_string()],
270            ),
271            rule: rule.into(),
272            category: Category::DeadCode,
273            severity: Severity::Warn,
274            confidence,
275            attribution: None,
276            reason: format!("{kind} `{}` is assigned but never used", s.name),
277            location: mollify_types::Location {
278                path: path.to_owned(),
279                line: s.line,
280                column: 0,
281                end_line: None,
282            },
283            actions: vec![],
284        });
285    }
286    // High complexity over default thresholds.
287    for f in &parsed.functions {
288        if f.cyclomatic > complexity::DEFAULT_CYCLOMATIC
289            || f.cognitive > complexity::DEFAULT_COGNITIVE
290        {
291            findings.push(Finding {
292                fingerprint: fingerprint::fingerprint("high-complexity", &[path.as_str(), &f.name]),
293                rule: "high-complexity".into(),
294                category: Category::Complexity,
295                severity: Severity::Warn,
296                confidence: mollify_types::Confidence::Certain,
297                attribution: None,
298                reason: format!(
299                    "function `{}` is complex (cyclomatic {}, cognitive {})",
300                    f.name, f.cyclomatic, f.cognitive
301                ),
302                location: mollify_types::Location {
303                    path: path.to_owned(),
304                    line: f.line,
305                    column: 0,
306                    end_line: Some(f.end_line),
307                },
308                actions: vec![],
309            });
310        }
311    }
312    // Honor inline `# mollify: ignore[...]` on the buffer's own lines.
313    let mut sup: rustc_hash::FxHashMap<u32, Vec<&str>> = rustc_hash::FxHashMap::default();
314    for (line, rule) in &parsed.ignores {
315        sup.entry(*line).or_default().push(rule.as_str());
316    }
317    findings.retain(|f| {
318        sup.get(&f.location.line)
319            .map(|rules| !rules.iter().any(|r| *r == "*" || *r == f.rule))
320            .unwrap_or(true)
321    });
322    sort_findings(&mut findings);
323    findings
324}
325
326/// Export the module import graph as Graphviz DOT or Mermaid `flowchart`.
327pub fn graph_export(root: &Utf8Path, mermaid: bool) -> String {
328    let graph = build_graph(root);
329    let mut edges: Vec<(String, String)> = graph
330        .import_edges()
331        .into_iter()
332        .map(|(a, b)| (a.to_string(), b.to_string()))
333        .collect();
334    edges.sort();
335    edges.dedup();
336    let id = |s: &str| s.replace(['.', '-', '/'], "_");
337    let mut out = String::new();
338    if mermaid {
339        out.push_str("flowchart LR\n");
340        for (a, b) in &edges {
341            out.push_str(&format!("    {}[\"{a}\"] --> {}[\"{b}\"]\n", id(a), id(b)));
342        }
343    } else {
344        out.push_str("digraph imports {\n  rankdir=LR;\n  node [shape=box];\n");
345        for (a, b) in &edges {
346            out.push_str(&format!("  \"{a}\" -> \"{b}\";\n"));
347        }
348        out.push_str("}\n");
349    }
350    out
351}
352
353/// Topology listing for `mollify list` / `mollify_list`.
354pub fn list_topology(root: &Utf8Path, kind: &str) -> Vec<String> {
355    let graph = build_graph(root);
356    let mut rows: Vec<String> = match kind {
357        "files" => graph
358            .modules
359            .iter()
360            .map(|m| format!("{}\t{}", m.dotted, m.path))
361            .collect(),
362        "frameworks" => {
363            let mut fw: std::collections::BTreeSet<String> = Default::default();
364            for m in &graph.modules {
365                for d in &m.parsed.definitions {
366                    if plugins::is_framework_entry(d) {
367                        for dec in &d.decorators {
368                            fw.insert(dec.split('.').next().unwrap_or(dec).to_string());
369                        }
370                    }
371                }
372            }
373            fw.into_iter().collect()
374        }
375        _ => graph
376            .modules
377            .iter()
378            .filter(|m| m.is_entry)
379            .map(|m| format!("{}\t{}", m.dotted, m.path))
380            .collect(),
381    };
382    rows.sort();
383    rows
384}
385
386/// `mollify audit` — the unified pass across all engines. Produces a quality
387/// score over the combined findings.
388pub fn audit_report(root: &Utf8Path) -> AuditReport {
389    let graph = build_graph(root);
390    let cfg = config::load(root);
391    let mut findings: Vec<Finding> = Vec::new();
392    findings.extend(deadcode::analyze(&graph));
393    findings.extend(members::analyze(&graph));
394    findings.extend(commented::analyze(&graph));
395    findings.extend(deps::analyze(root, &graph));
396    findings.extend(deps::unresolved(&graph));
397    findings.extend(arch::analyze(&graph));
398    findings.extend(arch::analyze_layers(&graph, &cfg.arch_layers));
399    findings.extend(arch::analyze_contracts(&graph, &cfg.contracts));
400    findings.extend(arch::private_imports(&graph));
401    findings.extend(policy::analyze(&graph, &cfg.policies));
402    findings.extend(complexity::analyze_with(
403        &graph,
404        cfg.max_cyclomatic,
405        cfg.max_cognitive,
406    ));
407    findings.extend(dupes::analyze_with(
408        &graph,
409        cfg.dup_min_tokens,
410        cfg.dup_min_lines,
411    ));
412    findings.extend(typehealth::analyze(&graph));
413    findings.extend(apihygiene::analyze(&graph));
414    findings.extend(security::analyze(&graph));
415    findings.extend(hotspots::analyze(root, &graph));
416    findings.extend(cohesion::analyze(&graph));
417    // Supply-chain runs only when a local advisory DB is present (keeps audit
418    // offline + deterministic; no implicit network).
419    let db_path = root.join(DEFAULT_ADVISORY_DB);
420    if let Some(advisories) = supplychain::load_db(&db_path) {
421        findings.extend(supplychain::analyze(root, &advisories));
422    }
423    apply_suppressions(&graph, &mut findings);
424    config::apply(&cfg, &mut findings);
425    sort_findings(&mut findings);
426    let files = graph.modules.len();
427    let summary = Summary::from_findings(&findings, files);
428    AuditReport {
429        schema_version: SCHEMA_VERSION.into(),
430        quality_score: quality_score(&findings, files),
431        summary,
432        findings,
433    }
434}
435
436/// Wrap a findings report in the right `Report` variant for a given category.
437pub fn into_report(category: Option<Category>, report: FindingsReport) -> Report {
438    match category {
439        Some(Category::DependencyHygiene) => Report::Deps(report),
440        _ => Report::DeadCode(report),
441    }
442}
443
444/// A simple, deterministic 0–100 health score: start at 100, subtract weighted
445/// penalties per finding (errors hurt more than warnings), floor at 0. Tunable.
446fn quality_score(findings: &[Finding], files: usize) -> u8 {
447    if files == 0 {
448        return 100;
449    }
450    let mut penalty = 0.0f64;
451    for f in findings {
452        penalty += match f.severity {
453            Severity::Error => 3.0,
454            Severity::Warn => 1.0,
455            Severity::Off => 0.0,
456        };
457    }
458    // Normalize against project size so big repos aren't unfairly punished.
459    let per_file = penalty / files as f64;
460    let score = (100.0 - per_file * 10.0).clamp(0.0, 100.0);
461    score.round() as u8
462}
463
464#[cfg(test)]
465mod tests {
466    use super::*;
467    use camino::Utf8PathBuf;
468
469    fn temp(tag: &str) -> Utf8PathBuf {
470        let base =
471            std::env::temp_dir().join(format!("mollify-core-lib-{}-{tag}", std::process::id()));
472        let _ = std::fs::remove_dir_all(&base);
473        std::fs::create_dir_all(&base).unwrap();
474        Utf8PathBuf::from_path_buf(base).unwrap()
475    }
476
477    #[test]
478    fn inline_suppression_drops_finding() {
479        let d = temp("suppress");
480        std::fs::write(d.join("__main__.py"), "print('hi')\n").unwrap();
481        // `_dead` is a certain unused-export; the inline comment silences it.
482        std::fs::write(
483            d.join("lib.py"),
484            "def _dead():  # mollify: ignore[unused-export]\n    return 1\n",
485        )
486        .unwrap();
487        let r = dead_code_report(&d);
488        assert!(
489            !r.findings.iter().any(|f| f.reason.contains("_dead")),
490            "suppressed finding leaked: {:?}",
491            r.findings
492        );
493        std::fs::remove_dir_all(&d).ok();
494    }
495
496    #[test]
497    fn audit_is_deterministic_and_scored() {
498        let d = temp("audit");
499        std::fs::write(d.join("__main__.py"), "print('hi')\n").unwrap();
500        std::fs::write(d.join("lib.py"), "def dead():\n    return 1\n").unwrap();
501        let r1 = audit_report(&d);
502        let r2 = audit_report(&d);
503        // Determinism: identical serialization across runs.
504        let j1 = serde_json::to_string(&Report::Audit(r1.clone())).unwrap();
505        let j2 = serde_json::to_string(&Report::Audit(r2)).unwrap();
506        assert_eq!(j1, j2);
507        assert!(r1.quality_score <= 100);
508        assert!(r1.findings.iter().any(|f| f.rule == "unused-export"));
509        std::fs::remove_dir_all(&d).ok();
510    }
511}