Skip to main content

mollify_types/
lib.rs

1//! # mollify-types
2//!
3//! The shared, versioned **data contract** for Mollify. Every command emits a
4//! JSON envelope with a discriminating top-level `kind`; downstream agents and
5//! CI depend on this JSON shape, not on Mollify's internal Rust types.
6//!
7//! Invariants (ported from fallow's design — see `RESEARCH.md` §2.11):
8//! - **Determinism:** identical input → byte-identical output. All collections
9//!   that reach output are sorted deterministically before serialization.
10//! - **Evidence, not decisions:** every [`Finding`] carries a stable
11//!   [`Finding::fingerprint`], a [`Confidence`] tier, and a human `reason`.
12//! - **Candidate/verifier separation:** [`Action`]s are *proposed*; only
13//!   `auto_fixable` + `Confidence::Certain` may be applied without a human.
14
15use serde::{Deserialize, Serialize};
16
17/// Current schema version of the JSON contract. Bump the minor on additive
18/// changes, the major on breaking ones. Agent skills pin to this.
19pub const SCHEMA_VERSION: &str = "0.1";
20
21/// Confidence tier attached to every finding. This is the core honesty
22/// mechanism: Python dead-code detection is undecidable in general, so Mollify
23/// never claims boolean certainty.
24#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize)]
25#[serde(rename_all = "snake_case")]
26pub enum Confidence {
27    /// Syntactically provable (e.g. code after `return`, unused local with no
28    /// dynamic sink in scope). Safe to auto-fix.
29    Certain,
30    /// Strong static signal but a residual dynamic risk. Suggest, don't apply.
31    Likely,
32    /// Public surface, near `getattr`/`eval`, or framework-adjacent. Report only.
33    Uncertain,
34}
35
36/// Severity controls CI exit behavior.
37#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize)]
38#[serde(rename_all = "snake_case")]
39pub enum Severity {
40    /// Fails CI (non-zero exit) by default.
41    Error,
42    /// Reported, exit 0.
43    Warn,
44    /// Suppressed.
45    Off,
46}
47
48/// Whether a finding was introduced by the current change or inherited from the
49/// base. The PR gate (`--gate new-only`) keys on [`Attribution::Introduced`].
50#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize)]
51#[serde(rename_all = "snake_case")]
52pub enum Attribution {
53    Introduced,
54    Inherited,
55}
56
57/// The five co-equal analysis areas (plus dependency hygiene), mirroring
58/// fallow's "never reduce it to a dead-code tool" rule.
59#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize)]
60#[serde(rename_all = "kebab-case")]
61pub enum Category {
62    DeadCode,
63    Duplication,
64    CircularDependency,
65    Complexity,
66    Architecture,
67    DependencyHygiene,
68    /// Type-annotation health (Python-specific; no fallow analog).
69    TypeHealth,
70    /// Security candidates (syntactic; never confirmed vulnerabilities).
71    Security,
72}
73
74/// A source location, 1-based line/column, workspace-relative path.
75#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize)]
76pub struct Location {
77    pub path: camino::Utf8PathBuf,
78    pub line: u32,
79    #[serde(default, skip_serializing_if = "is_zero")]
80    pub column: u32,
81    #[serde(default, skip_serializing_if = "Option::is_none")]
82    pub end_line: Option<u32>,
83}
84
85fn is_zero(n: &u32) -> bool {
86    *n == 0
87}
88
89/// A proposed, machine-actionable remediation for a finding. The agent decides
90/// whether to apply it; Mollify never auto-applies non-`Certain` findings.
91#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
92pub struct Action {
93    /// e.g. `remove-symbol`, `remove-import`, `remove-dependency`.
94    #[serde(rename = "type")]
95    pub kind: String,
96    pub description: String,
97    /// True only when Mollify can apply this deterministically and safely.
98    pub auto_fixable: bool,
99    /// The inline comment that would suppress this finding instead of fixing it.
100    #[serde(default, skip_serializing_if = "Option::is_none")]
101    pub suppression_comment: Option<String>,
102}
103
104/// A single piece of deterministic evidence. The atom of every report.
105#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
106pub struct Finding {
107    /// Stable cross-run id, `<rule>:<hex>` — survives reordering and minor edits
108    /// so it can be referenced in commits and baselines.
109    pub fingerprint: String,
110    /// Machine rule id, e.g. `unused-export`, `unused-dependency`, `cycle`.
111    pub rule: String,
112    pub category: Category,
113    pub severity: Severity,
114    pub confidence: Confidence,
115    #[serde(default, skip_serializing_if = "Option::is_none")]
116    pub attribution: Option<Attribution>,
117    /// Human-readable explanation — the "why" of the evidence.
118    pub reason: String,
119    pub location: Location,
120    #[serde(default, skip_serializing_if = "Vec::is_empty")]
121    pub actions: Vec<Action>,
122}
123
124/// The kind-discriminated output envelope. `kind` lets clients switch on the
125/// result type and iterate `findings`.
126// `Eq` is intentionally omitted: `MetricsReport` carries `f64` fields.
127#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
128#[serde(tag = "kind", rename_all = "kebab-case")]
129pub enum Report {
130    /// Full unified report across all analysis areas.
131    Audit(AuditReport),
132    /// Dead-code-only report.
133    DeadCode(FindingsReport),
134    /// Dependency-hygiene-only report.
135    Deps(FindingsReport),
136    /// Architecture (circular dependencies, boundaries).
137    Arch(FindingsReport),
138    /// Complexity hotspots.
139    Complexity(FindingsReport),
140    /// Duplication / clone families.
141    Dupes(FindingsReport),
142    /// Type-annotation health.
143    Types(FindingsReport),
144    /// Security candidates.
145    Security(FindingsReport),
146    /// Runtime-coverage cold-path analysis.
147    Coverage(FindingsReport),
148    /// Code-metrics report (Maintainability Index, Halstead, raw LOC).
149    Metrics(MetricsReport),
150}
151
152/// Per-file code metrics (radon/wily-style), plus project totals.
153#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
154pub struct MetricsReport {
155    pub schema_version: String,
156    pub files: Vec<FileMetrics>,
157    pub totals: MetricsTotals,
158}
159
160/// Maintainability and size metrics for one file.
161#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
162pub struct FileMetrics {
163    pub path: camino::Utf8PathBuf,
164    /// Physical lines of code.
165    pub loc: u32,
166    /// Source lines (non-blank, non-comment).
167    pub sloc: u32,
168    pub comment_lines: u32,
169    pub blank_lines: u32,
170    pub functions: u32,
171    /// Sum of per-function cyclomatic complexity.
172    pub total_cyclomatic: u32,
173    pub max_cyclomatic: u32,
174    /// Maintainability Index, normalized to 0–100 (higher is better).
175    pub maintainability_index: f64,
176    /// MI rank: `A` (20–100), `B` (10–<20), `C` (<10) — radon's mapping.
177    pub mi_rank: char,
178}
179
180/// Project-wide metric totals.
181#[derive(Debug, Clone, Default, PartialEq, Serialize, Deserialize)]
182pub struct MetricsTotals {
183    pub files: usize,
184    pub loc: u32,
185    pub sloc: u32,
186    pub functions: u32,
187    /// Mean Maintainability Index across files.
188    pub mean_maintainability_index: f64,
189}
190
191/// A report that is just a sorted list of findings plus a summary.
192#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
193pub struct FindingsReport {
194    pub schema_version: String,
195    pub summary: Summary,
196    pub findings: Vec<Finding>,
197}
198
199/// The full audit envelope: a quality score plus the findings.
200#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
201pub struct AuditReport {
202    pub schema_version: String,
203    /// 0–100 health score (higher is better).
204    pub quality_score: u8,
205    pub summary: Summary,
206    pub findings: Vec<Finding>,
207}
208
209/// Aggregate counts, always present so CI can gate without scanning findings.
210#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize)]
211pub struct Summary {
212    pub total: usize,
213    pub errors: usize,
214    pub warnings: usize,
215    pub files_analyzed: usize,
216    #[serde(default, skip_serializing_if = "is_usize_zero")]
217    pub introduced: usize,
218}
219
220fn is_usize_zero(n: &usize) -> bool {
221    *n == 0
222}
223
224impl Summary {
225    /// Build a summary from a finding slice (counts errors/warnings/introduced).
226    pub fn from_findings(findings: &[Finding], files_analyzed: usize) -> Self {
227        let mut s = Summary {
228            total: findings.len(),
229            files_analyzed,
230            ..Default::default()
231        };
232        for f in findings {
233            match f.severity {
234                Severity::Error => s.errors += 1,
235                Severity::Warn => s.warnings += 1,
236                Severity::Off => {}
237            }
238            if f.attribution == Some(Attribution::Introduced) {
239                s.introduced += 1;
240            }
241        }
242        s
243    }
244}
245
246/// Deterministic ordering for findings: by path, then line, then rule, then
247/// fingerprint. Call before serializing any report.
248pub fn sort_findings(findings: &mut [Finding]) {
249    findings.sort_by(|a, b| {
250        a.location
251            .path
252            .cmp(&b.location.path)
253            .then(a.location.line.cmp(&b.location.line))
254            .then(a.rule.cmp(&b.rule))
255            .then(a.fingerprint.cmp(&b.fingerprint))
256    });
257}
258
259#[cfg(test)]
260mod tests {
261    use super::*;
262
263    fn sample_finding(path: &str, line: u32, rule: &str) -> Finding {
264        Finding {
265            fingerprint: format!("{rule}:0000"),
266            rule: rule.to_string(),
267            category: Category::DeadCode,
268            severity: Severity::Error,
269            confidence: Confidence::Certain,
270            attribution: None,
271            reason: "test".into(),
272            location: Location {
273                path: path.into(),
274                line,
275                column: 0,
276                end_line: None,
277            },
278            actions: vec![],
279        }
280    }
281
282    #[test]
283    fn envelope_has_kind_discriminator() {
284        let report = Report::DeadCode(FindingsReport {
285            schema_version: SCHEMA_VERSION.into(),
286            summary: Summary::default(),
287            findings: vec![],
288        });
289        let json = serde_json::to_string(&report).unwrap();
290        assert!(json.contains("\"kind\":\"dead-code\""));
291    }
292
293    #[test]
294    fn confidence_serializes_snake_case() {
295        assert_eq!(
296            serde_json::to_string(&Confidence::Uncertain).unwrap(),
297            "\"uncertain\""
298        );
299    }
300
301    #[test]
302    fn sort_is_deterministic() {
303        let mut a = vec![
304            sample_finding("b.py", 1, "x"),
305            sample_finding("a.py", 9, "x"),
306            sample_finding("a.py", 2, "y"),
307        ];
308        sort_findings(&mut a);
309        assert_eq!(a[0].location.path, "a.py");
310        assert_eq!(a[0].location.line, 2);
311        assert_eq!(a[2].location.path, "b.py");
312    }
313
314    #[test]
315    fn summary_counts_severities() {
316        let mut f = sample_finding("a.py", 1, "x");
317        f.severity = Severity::Warn;
318        let s = Summary::from_findings(&[sample_finding("a.py", 1, "x"), f], 1);
319        assert_eq!(s.total, 2);
320        assert_eq!(s.errors, 1);
321        assert_eq!(s.warnings, 1);
322    }
323}