Skip to main content

mollify_core/
lib.rs

1//! # mollify-core
2//!
3//! Analysis orchestration. Builds the graph, runs the engines, and assembles the
4//! kind-discriminated [`mollify_types::Report`] envelopes. Engines: dead-code,
5//! dependency hygiene, architecture (cycles/layers/contracts/policies),
6//! complexity + hotspots, duplication, type-health, security, cohesion,
7//! commented-code, coverage, and supply-chain — all folded into `audit`.
8
9use camino::Utf8Path;
10use mollify_graph::{discover_python_files, ModuleGraph};
11use mollify_types::{
12    sort_findings, AuditReport, Category, Finding, FindingsReport, Report, Severity, Summary,
13    SCHEMA_VERSION,
14};
15
16pub mod agents;
17pub mod apihygiene;
18pub mod arch;
19pub mod baseline;
20pub mod cohesion;
21pub mod commented;
22pub mod complexity;
23pub mod config;
24pub mod coverage;
25pub mod deadcode;
26pub mod deps;
27pub mod dupes;
28pub mod explain;
29pub mod fingerprint;
30pub mod fix;
31pub mod git;
32pub mod hotspots;
33pub mod installed;
34pub mod known;
35pub mod members;
36pub mod metrics;
37pub mod plugins;
38pub mod policy;
39pub mod sarif;
40pub mod security;
41pub mod suffix;
42pub mod supplychain;
43pub mod trace;
44pub mod typehealth;
45pub mod version;
46
47/// Build the graph for a project root once, to be shared across engines.
48pub fn build_graph(root: &Utf8Path) -> ModuleGraph {
49    let files = discover_python_files(root);
50    ModuleGraph::build(root, &files)
51}
52
53/// Sort, apply inline `# mollify: ignore[...]` suppressions and `.mollifyrc`
54/// (severity overrides + ignore), then summarize.
55fn finalize(
56    cfg: &config::Config,
57    graph: &ModuleGraph,
58    mut findings: Vec<Finding>,
59) -> FindingsReport {
60    apply_suppressions(graph, &mut findings);
61    config::apply(cfg, &mut findings);
62    sort_findings(&mut findings);
63    FindingsReport {
64        schema_version: SCHEMA_VERSION.into(),
65        summary: Summary::from_findings(&findings, graph.modules.len()),
66        findings,
67    }
68}
69
70/// Drop findings silenced by an inline `# mollify: ignore[<rule>]` comment on
71/// the finding's line (or a bare `# mollify: ignore` matching any rule).
72pub fn apply_suppressions(graph: &ModuleGraph, findings: &mut Vec<Finding>) {
73    use rustc_hash::FxHashMap;
74    // (path, line) -> set of suppressed rules ("*" = all).
75    let mut sup: FxHashMap<(&str, u32), Vec<&str>> = FxHashMap::default();
76    for m in &graph.modules {
77        for (line, rule) in &m.parsed.ignores {
78            sup.entry((m.path.as_str(), *line))
79                .or_default()
80                .push(rule.as_str());
81        }
82    }
83    if sup.is_empty() {
84        return;
85    }
86    findings.retain(|f| {
87        if let Some(rules) = sup.get(&(f.location.path.as_str(), f.location.line)) {
88            !rules.iter().any(|r| *r == "*" || *r == f.rule)
89        } else {
90            true
91        }
92    });
93}
94
95/// `mollify dead-code` — reachability-based unused files/symbols.
96pub fn dead_code_report(root: &Utf8Path) -> FindingsReport {
97    let graph = build_graph(root);
98    let mut findings = deadcode::analyze(&graph);
99    findings.extend(members::analyze(&graph));
100    findings.extend(commented::analyze(&graph));
101    finalize(&config::load(root), &graph, findings)
102}
103
104/// `mollify deps` — dependency hygiene.
105pub fn deps_report(root: &Utf8Path) -> FindingsReport {
106    let graph = build_graph(root);
107    let mut findings = deps::analyze(root, &graph);
108    findings.extend(deps::unresolved(&graph));
109    finalize(&config::load(root), &graph, findings)
110}
111
112/// `mollify arch` — circular dependencies (boundary presets later).
113pub fn arch_report(root: &Utf8Path) -> FindingsReport {
114    let graph = build_graph(root);
115    let cfg = config::load(root);
116    let mut findings = arch::analyze(&graph);
117    findings.extend(arch::analyze_layers(&graph, &cfg.arch_layers));
118    findings.extend(arch::analyze_contracts(&graph, &cfg.contracts));
119    findings.extend(arch::private_imports(&graph));
120    findings.extend(policy::analyze(&graph, &cfg.policies));
121    finalize(&cfg, &graph, findings)
122}
123
124/// `mollify complexity` / `mollify health` — complexity hotspots.
125pub fn complexity_report(root: &Utf8Path) -> FindingsReport {
126    let graph = build_graph(root);
127    let cfg = config::load(root);
128    let mut findings = complexity::analyze_with(&graph, cfg.max_cyclomatic, cfg.max_cognitive);
129    findings.extend(hotspots::analyze(root, &graph));
130    findings.extend(cohesion::analyze(&graph));
131    finalize(&cfg, &graph, findings)
132}
133
134/// `mollify dupes` — duplication / clone families.
135pub fn dupes_report(root: &Utf8Path) -> FindingsReport {
136    let graph = build_graph(root);
137    let cfg = config::load(root);
138    let findings = dupes::analyze_with(&graph, cfg.dup_min_tokens, cfg.dup_min_lines);
139    finalize(&cfg, &graph, findings)
140}
141
142/// `mollify types` — type-annotation health + API-hygiene (private-type leaks).
143pub fn types_report(root: &Utf8Path) -> FindingsReport {
144    let graph = build_graph(root);
145    let mut findings = typehealth::analyze(&graph);
146    findings.extend(apihygiene::analyze(&graph));
147    finalize(&config::load(root), &graph, findings)
148}
149
150/// `mollify security` — security candidates (deterministic; review before acting).
151pub fn security_report(root: &Utf8Path) -> FindingsReport {
152    let graph = build_graph(root);
153    finalize(&config::load(root), &graph, security::analyze(&graph))
154}
155
156/// `mollify coverage` — cold-path analysis from a coverage.py JSON report.
157pub fn coverage_report(root: &Utf8Path, coverage_path: &Utf8Path) -> FindingsReport {
158    let graph = build_graph(root);
159    let findings = coverage::analyze(root, &graph, coverage_path);
160    finalize(&config::load(root), &graph, findings)
161}
162
163/// `mollify supply-chain` — match pinned/locked dependency versions against a
164/// local advisory database (`vulnerable-dependency`). The DB is an input file,
165/// so analysis stays deterministic and offline.
166pub fn supply_chain_report(root: &Utf8Path, db_path: &Utf8Path) -> FindingsReport {
167    let advisories = supplychain::load_db(db_path).unwrap_or_default();
168    supply_chain_report_with(root, &advisories)
169}
170
171/// Like [`supply_chain_report`] but against an already-loaded advisory set (e.g.
172/// fetched live by the CLI). Keeps the network out of `mollify-core`.
173pub fn supply_chain_report_with(
174    root: &Utf8Path,
175    advisories: &[supplychain::Advisory],
176) -> FindingsReport {
177    let graph = build_graph(root);
178    let findings = supplychain::analyze(root, advisories);
179    finalize(&config::load(root), &graph, findings)
180}
181
182/// The default advisory DB path checked by `audit` when present.
183pub const DEFAULT_ADVISORY_DB: &str = ".mollify/advisories.json";
184
185/// A per-file evidence bundle: the matched module, its findings, and its import
186/// neighborhood. Shared by `mollify inspect` (CLI) and the `mollify_inspect`
187/// MCP tool.
188pub struct Inspection {
189    pub file: String,
190    pub module: Option<String>,
191    pub findings: Vec<Finding>,
192    pub imports: Vec<String>,
193    pub imported_by: Vec<String>,
194}
195
196/// Returns true if `path` matches the user's `file` argument (exact, or as a
197/// trailing path fragment).
198fn path_matches(path: &str, file: &str) -> bool {
199    path == file || path.ends_with(file) || path.ends_with(&format!("/{file}"))
200}
201
202/// Build the evidence bundle for a single file.
203pub fn inspect(root: &Utf8Path, file: &str) -> Inspection {
204    let report = audit_report(root);
205    let findings: Vec<Finding> = report
206        .findings
207        .into_iter()
208        .filter(|f| path_matches(f.location.path.as_str(), file))
209        .collect();
210    let graph = build_graph(root);
211    let module = graph
212        .modules
213        .iter()
214        .find(|m| path_matches(m.path.as_str(), file))
215        .map(|m| m.dotted.clone());
216    let trace = module.as_deref().and_then(|d| trace::module(&graph, d));
217    Inspection {
218        file: file.to_string(),
219        module,
220        findings,
221        imports: trace
222            .as_ref()
223            .map(|t| t.imports.clone())
224            .unwrap_or_default(),
225        imported_by: trace
226            .as_ref()
227            .map(|t| t.imported_by.clone())
228            .unwrap_or_default(),
229    }
230}
231
232/// File-local diagnostics from an in-memory buffer (no disk, no graph) — the
233/// live LSP path for `textDocument/didChange`. Covers the intra-file rules
234/// (security, unused variables/parameters, complexity, commented-out code);
235/// cross-file rules (dead exports, deps, architecture) are produced by the full
236/// audit on save. Returns sorted findings, honoring inline suppressions.
237pub fn analyze_text(path: &Utf8Path, source: &str) -> Vec<Finding> {
238    let mut parser = match mollify_parse::PyParser::new() {
239        Ok(p) => p,
240        Err(_) => return Vec::new(),
241    };
242    let Ok(parsed) = parser.parse(path, source) else {
243        return Vec::new();
244    };
245    let mut findings = Vec::new();
246    findings.extend(security::analyze_parsed(path, &parsed));
247    findings.extend(commented::analyze_source(path, source));
248    // Unused local variables / parameters.
249    for s in &parsed.scope_findings {
250        let (rule, kind, confidence) = if s.is_param {
251            (
252                "unused-parameter",
253                "parameter",
254                mollify_types::Confidence::Uncertain,
255            )
256        } else {
257            (
258                "unused-variable",
259                "local variable",
260                mollify_types::Confidence::Likely,
261            )
262        };
263        findings.push(Finding {
264            fingerprint: fingerprint::fingerprint(
265                rule,
266                &[path.as_str(), &s.name, &s.line.to_string()],
267            ),
268            rule: rule.into(),
269            category: Category::DeadCode,
270            severity: Severity::Warn,
271            confidence,
272            attribution: None,
273            reason: format!("{kind} `{}` is assigned but never used", s.name),
274            location: mollify_types::Location {
275                path: path.to_owned(),
276                line: s.line,
277                column: 0,
278                end_line: None,
279            },
280            actions: vec![],
281        });
282    }
283    // High complexity over default thresholds.
284    for f in &parsed.functions {
285        if f.cyclomatic > complexity::DEFAULT_CYCLOMATIC
286            || f.cognitive > complexity::DEFAULT_COGNITIVE
287        {
288            findings.push(Finding {
289                fingerprint: fingerprint::fingerprint("high-complexity", &[path.as_str(), &f.name]),
290                rule: "high-complexity".into(),
291                category: Category::Complexity,
292                severity: Severity::Warn,
293                confidence: mollify_types::Confidence::Certain,
294                attribution: None,
295                reason: format!(
296                    "function `{}` is complex (cyclomatic {}, cognitive {})",
297                    f.name, f.cyclomatic, f.cognitive
298                ),
299                location: mollify_types::Location {
300                    path: path.to_owned(),
301                    line: f.line,
302                    column: 0,
303                    end_line: Some(f.end_line),
304                },
305                actions: vec![],
306            });
307        }
308    }
309    // Honor inline `# mollify: ignore[...]` on the buffer's own lines.
310    let mut sup: rustc_hash::FxHashMap<u32, Vec<&str>> = rustc_hash::FxHashMap::default();
311    for (line, rule) in &parsed.ignores {
312        sup.entry(*line).or_default().push(rule.as_str());
313    }
314    findings.retain(|f| {
315        sup.get(&f.location.line)
316            .map(|rules| !rules.iter().any(|r| *r == "*" || *r == f.rule))
317            .unwrap_or(true)
318    });
319    sort_findings(&mut findings);
320    findings
321}
322
323/// Export the module import graph as Graphviz DOT or Mermaid `flowchart`.
324pub fn graph_export(root: &Utf8Path, mermaid: bool) -> String {
325    let graph = build_graph(root);
326    let mut edges: Vec<(String, String)> = graph
327        .import_edges()
328        .into_iter()
329        .map(|(a, b)| (a.to_string(), b.to_string()))
330        .collect();
331    edges.sort();
332    edges.dedup();
333    let id = |s: &str| s.replace(['.', '-', '/'], "_");
334    let mut out = String::new();
335    if mermaid {
336        out.push_str("flowchart LR\n");
337        for (a, b) in &edges {
338            out.push_str(&format!("    {}[\"{a}\"] --> {}[\"{b}\"]\n", id(a), id(b)));
339        }
340    } else {
341        out.push_str("digraph imports {\n  rankdir=LR;\n  node [shape=box];\n");
342        for (a, b) in &edges {
343            out.push_str(&format!("  \"{a}\" -> \"{b}\";\n"));
344        }
345        out.push_str("}\n");
346    }
347    out
348}
349
350/// Topology listing for `mollify list` / `mollify_list`.
351pub fn list_topology(root: &Utf8Path, kind: &str) -> Vec<String> {
352    let graph = build_graph(root);
353    let mut rows: Vec<String> = match kind {
354        "files" => graph
355            .modules
356            .iter()
357            .map(|m| format!("{}\t{}", m.dotted, m.path))
358            .collect(),
359        "frameworks" => {
360            let mut fw: std::collections::BTreeSet<String> = Default::default();
361            for m in &graph.modules {
362                for d in &m.parsed.definitions {
363                    if plugins::is_framework_entry(d) {
364                        for dec in &d.decorators {
365                            fw.insert(dec.split('.').next().unwrap_or(dec).to_string());
366                        }
367                    }
368                }
369            }
370            fw.into_iter().collect()
371        }
372        _ => graph
373            .modules
374            .iter()
375            .filter(|m| m.is_entry)
376            .map(|m| format!("{}\t{}", m.dotted, m.path))
377            .collect(),
378    };
379    rows.sort();
380    rows
381}
382
383/// `mollify audit` — the unified pass across all engines. Produces a quality
384/// score over the combined findings.
385pub fn audit_report(root: &Utf8Path) -> AuditReport {
386    let graph = build_graph(root);
387    let cfg = config::load(root);
388    let mut findings: Vec<Finding> = Vec::new();
389    findings.extend(deadcode::analyze(&graph));
390    findings.extend(members::analyze(&graph));
391    findings.extend(commented::analyze(&graph));
392    findings.extend(deps::analyze(root, &graph));
393    findings.extend(deps::unresolved(&graph));
394    findings.extend(arch::analyze(&graph));
395    findings.extend(arch::analyze_layers(&graph, &cfg.arch_layers));
396    findings.extend(arch::analyze_contracts(&graph, &cfg.contracts));
397    findings.extend(arch::private_imports(&graph));
398    findings.extend(policy::analyze(&graph, &cfg.policies));
399    findings.extend(complexity::analyze_with(
400        &graph,
401        cfg.max_cyclomatic,
402        cfg.max_cognitive,
403    ));
404    findings.extend(dupes::analyze_with(
405        &graph,
406        cfg.dup_min_tokens,
407        cfg.dup_min_lines,
408    ));
409    findings.extend(typehealth::analyze(&graph));
410    findings.extend(apihygiene::analyze(&graph));
411    findings.extend(security::analyze(&graph));
412    findings.extend(hotspots::analyze(root, &graph));
413    findings.extend(cohesion::analyze(&graph));
414    // Supply-chain runs only when a local advisory DB is present (keeps audit
415    // offline + deterministic; no implicit network).
416    let db_path = root.join(DEFAULT_ADVISORY_DB);
417    if let Some(advisories) = supplychain::load_db(&db_path) {
418        findings.extend(supplychain::analyze(root, &advisories));
419    }
420    apply_suppressions(&graph, &mut findings);
421    config::apply(&cfg, &mut findings);
422    sort_findings(&mut findings);
423    let files = graph.modules.len();
424    let summary = Summary::from_findings(&findings, files);
425    AuditReport {
426        schema_version: SCHEMA_VERSION.into(),
427        quality_score: quality_score(&findings, files),
428        summary,
429        findings,
430    }
431}
432
433/// Wrap a findings report in the right `Report` variant for a given category.
434pub fn into_report(category: Option<Category>, report: FindingsReport) -> Report {
435    match category {
436        Some(Category::DependencyHygiene) => Report::Deps(report),
437        _ => Report::DeadCode(report),
438    }
439}
440
441/// A simple, deterministic 0–100 health score: start at 100, subtract weighted
442/// penalties per finding (errors hurt more than warnings), floor at 0. Tunable.
443fn quality_score(findings: &[Finding], files: usize) -> u8 {
444    if files == 0 {
445        return 100;
446    }
447    let mut penalty = 0.0f64;
448    for f in findings {
449        penalty += match f.severity {
450            Severity::Error => 3.0,
451            Severity::Warn => 1.0,
452            Severity::Off => 0.0,
453        };
454    }
455    // Normalize against project size so big repos aren't unfairly punished.
456    let per_file = penalty / files as f64;
457    let score = (100.0 - per_file * 10.0).clamp(0.0, 100.0);
458    score.round() as u8
459}
460
461#[cfg(test)]
462mod tests {
463    use super::*;
464    use camino::Utf8PathBuf;
465
466    fn temp(tag: &str) -> Utf8PathBuf {
467        let base =
468            std::env::temp_dir().join(format!("mollify-core-lib-{}-{tag}", std::process::id()));
469        let _ = std::fs::remove_dir_all(&base);
470        std::fs::create_dir_all(&base).unwrap();
471        Utf8PathBuf::from_path_buf(base).unwrap()
472    }
473
474    #[test]
475    fn inline_suppression_drops_finding() {
476        let d = temp("suppress");
477        std::fs::write(d.join("__main__.py"), "print('hi')\n").unwrap();
478        // `_dead` is a certain unused-export; the inline comment silences it.
479        std::fs::write(
480            d.join("lib.py"),
481            "def _dead():  # mollify: ignore[unused-export]\n    return 1\n",
482        )
483        .unwrap();
484        let r = dead_code_report(&d);
485        assert!(
486            !r.findings.iter().any(|f| f.reason.contains("_dead")),
487            "suppressed finding leaked: {:?}",
488            r.findings
489        );
490        std::fs::remove_dir_all(&d).ok();
491    }
492
493    #[test]
494    fn audit_is_deterministic_and_scored() {
495        let d = temp("audit");
496        std::fs::write(d.join("__main__.py"), "print('hi')\n").unwrap();
497        std::fs::write(d.join("lib.py"), "def dead():\n    return 1\n").unwrap();
498        let r1 = audit_report(&d);
499        let r2 = audit_report(&d);
500        // Determinism: identical serialization across runs.
501        let j1 = serde_json::to_string(&Report::Audit(r1.clone())).unwrap();
502        let j2 = serde_json::to_string(&Report::Audit(r2)).unwrap();
503        assert_eq!(j1, j2);
504        assert!(r1.quality_score <= 100);
505        assert!(r1.findings.iter().any(|f| f.rule == "unused-export"));
506        std::fs::remove_dir_all(&d).ok();
507    }
508}