sdivi_snapshot/snapshot.rs
1//! [`Snapshot`] — versioned snapshot of pipeline stage outputs.
2
3use std::collections::BTreeMap;
4
5use sdivi_detection::partition::LeidenPartition;
6use sdivi_graph::metrics::GraphMetrics;
7use sdivi_patterns::PatternCatalog;
8use serde::{Deserialize, Serialize};
9
10use crate::change_coupling::ChangeCouplingResult;
11
12/// Snapshot schema version emitted by sdivi-rust.
13///
14/// This constant is `"1.0"` for all sdivi-rust output. Bumping this value is a
15/// breaking change (Rule 16).
16pub const SNAPSHOT_VERSION: &str = "1.0";
17
18/// Intent-divergence summary derived from the caller's boundary representation.
19///
20/// Present in a [`Snapshot`] only when the caller supplied a boundary count to
21/// [`assemble_snapshot`] (typically because a `.sdivi/boundaries.yaml` was
22/// found at snapshot time, but any source of a count is valid).
23///
24/// # Examples
25///
26/// ```rust
27/// use sdivi_snapshot::snapshot::IntentDivergenceInfo;
28///
29/// let info = IntentDivergenceInfo { boundary_count: 3, violation_count: 0 };
30/// assert_eq!(info.boundary_count, 3);
31/// ```
32#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
33pub struct IntentDivergenceInfo {
34 /// Number of boundaries declared by the caller.
35 pub boundary_count: usize,
36 /// Number of cross-boundary dependency violations detected.
37 pub violation_count: u32,
38}
39
40/// Pattern metrics derived from the catalog — carried in every snapshot.
41///
42/// Computed by `sdivi_core::compute_pattern_metrics` or populated by
43/// `sdivi_pipeline::Pipeline` from the full catalog.
44///
45/// # Examples
46///
47/// ```rust
48/// use std::collections::BTreeMap;
49/// use sdivi_snapshot::snapshot::PatternMetricsResult;
50///
51/// let m = PatternMetricsResult {
52/// entropy_per_category: BTreeMap::new(),
53/// total_entropy: 0.0,
54/// convention_drift: 0.0,
55/// convention_drift_per_category: BTreeMap::new(),
56/// };
57/// assert_eq!(m.total_entropy, 0.0);
58/// ```
59#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Default)]
60pub struct PatternMetricsResult {
61 /// Shannon entropy of pattern fingerprints per category.
62 pub entropy_per_category: BTreeMap<String, f64>,
63 /// Sum of per-category entropies.
64 pub total_entropy: f64,
65 /// Average fraction of distinct fingerprints per category (0–1).
66 ///
67 /// Defined as: for each category, `distinct_fingerprints / total_instances`,
68 /// then average across all categories. `0.0` when no instances exist.
69 pub convention_drift: f64,
70 /// Per-category fraction of distinct fingerprints: `distinct / total` for each category.
71 ///
72 /// Source of truth for per-category override filtering in `compute_thresholds_check`.
73 /// The scalar `convention_drift` is the average of this map's values.
74 #[serde(default)]
75 pub convention_drift_per_category: BTreeMap<String, f64>,
76}
77
78/// A versioned snapshot of all pipeline stage outputs for one point in time.
79///
80/// # Examples
81///
82/// ```rust
83/// use std::collections::BTreeMap;
84/// use sdivi_snapshot::snapshot::{assemble_snapshot, PatternMetricsResult, SNAPSHOT_VERSION};
85/// use sdivi_graph::metrics::GraphMetrics;
86/// use sdivi_detection::partition::LeidenPartition;
87/// use sdivi_patterns::PatternCatalog;
88///
89/// let graph = GraphMetrics {
90/// node_count: 0, edge_count: 0, density: 0.0,
91/// cycle_count: 0, top_hubs: vec![], component_count: 0,
92/// };
93/// let partition = LeidenPartition {
94/// assignments: BTreeMap::new(), stability: BTreeMap::new(),
95/// modularity: 0.0, seed: 42,
96/// };
97/// let snap = assemble_snapshot(
98/// graph, partition, PatternCatalog::default(),
99/// PatternMetricsResult::default(), None,
100/// "2026-04-29T00:00:00Z", None, None, 0,
101/// );
102/// assert_eq!(snap.snapshot_version, SNAPSHOT_VERSION);
103/// ```
104#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
105pub struct Snapshot {
106 /// Always `"1.0"` for sdivi-rust output.
107 pub snapshot_version: String,
108 /// ISO 8601 UTC timestamp at which the snapshot was taken.
109 pub timestamp: String,
110 /// Git commit SHA at the time of the snapshot, when available.
111 #[serde(skip_serializing_if = "Option::is_none")]
112 pub commit: Option<String>,
113 /// Graph metrics computed from the dependency graph.
114 pub graph: GraphMetrics,
115 /// Leiden community detection result.
116 pub partition: LeidenPartition,
117 /// Pattern fingerprint catalog with per-category entropy.
118 pub catalog: PatternCatalog,
119 /// Pattern metrics (entropy, convention drift) for this snapshot.
120 pub pattern_metrics: PatternMetricsResult,
121 /// Intent divergence against the caller-declared boundaries.
122 ///
123 /// `None` (omitted from JSON) when no boundary count was supplied to
124 /// [`assemble_snapshot`] (typically because `.sdivi/boundaries.yaml` was
125 /// not present).
126 #[serde(skip_serializing_if = "Option::is_none")]
127 pub intent_divergence: Option<IntentDivergenceInfo>,
128 /// File-path → community-ID assignments for boundary inference.
129 ///
130 /// Maps each source file's repo-relative path to its community ID from the
131 /// Leiden partition at snapshot time. Populated by `sdivi-pipeline` from the
132 /// `DependencyGraph` + `LeidenPartition`. Absent (empty) in snapshots
133 /// produced without path context (e.g., pure-compute path); boundary
134 /// inference from such snapshots yields no proposals.
135 #[serde(default, skip_serializing_if = "BTreeMap::is_empty")]
136 pub path_partition: BTreeMap<String, u32>,
137
138 /// Change-coupling analysis result.
139 ///
140 /// `None` when the repo has no git history or `history_depth = 0`.
141 /// `#[serde(default)]` ensures M14-era snapshots deserialize as `None`.
142 #[serde(default, skip_serializing_if = "Option::is_none")]
143 pub change_coupling: Option<ChangeCouplingResult>,
144}
145
146/// Assembles a [`Snapshot`] from pipeline stage outputs.
147///
148/// When `boundary_count` is `Some`, an [`IntentDivergenceInfo`] is included with
149/// that count and the caller-supplied `violation_count`. The caller is responsible
150/// for deriving `boundary_count` from a `BoundarySpec` (or any equivalent source);
151/// this function is intentionally agnostic to the spec type so non-FS callers
152/// (WASM, embedders with their own boundary representation) can use it directly
153/// without constructing a `sdivi_config::BoundarySpec`.
154///
155/// # Examples
156///
157/// ```rust
158/// use std::collections::BTreeMap;
159/// use sdivi_snapshot::snapshot::{assemble_snapshot, PatternMetricsResult, SNAPSHOT_VERSION};
160/// use sdivi_graph::metrics::GraphMetrics;
161/// use sdivi_detection::partition::LeidenPartition;
162/// use sdivi_patterns::PatternCatalog;
163///
164/// let graph = GraphMetrics {
165/// node_count: 1, edge_count: 0, density: 0.0,
166/// cycle_count: 0, top_hubs: vec![], component_count: 1,
167/// };
168/// let partition = LeidenPartition {
169/// assignments: BTreeMap::from([(0, 0)]),
170/// stability: BTreeMap::from([(0, 1.0)]),
171/// modularity: 0.0, seed: 42,
172/// };
173/// let snap = assemble_snapshot(
174/// graph, partition, PatternCatalog::default(),
175/// PatternMetricsResult::default(), None,
176/// "2026-04-29T00:00:00Z", Some("abc123"), None, 0,
177/// );
178/// assert_eq!(snap.commit.as_deref(), Some("abc123"));
179/// ```
180#[allow(clippy::too_many_arguments)] // 9 args: every field is load-bearing; seam between sdivi-pipeline and sdivi-core
181pub fn assemble_snapshot(
182 graph: GraphMetrics,
183 partition: LeidenPartition,
184 catalog: PatternCatalog,
185 pattern_metrics: PatternMetricsResult,
186 boundary_count: Option<usize>,
187 timestamp: &str,
188 commit: Option<&str>,
189 change_coupling: Option<ChangeCouplingResult>,
190 violation_count: u32,
191) -> Snapshot {
192 let intent_divergence = boundary_count.map(|boundary_count| IntentDivergenceInfo {
193 boundary_count,
194 violation_count,
195 });
196
197 Snapshot {
198 snapshot_version: SNAPSHOT_VERSION.to_string(),
199 timestamp: timestamp.to_string(),
200 commit: commit.map(str::to_string),
201 graph,
202 partition,
203 catalog,
204 pattern_metrics,
205 intent_divergence,
206 path_partition: BTreeMap::new(),
207 change_coupling,
208 }
209}
210
211impl Snapshot {
212 /// Loads a [`Snapshot`] from a JSON file at `path`.
213 ///
214 /// Only available with the `pipeline-records` feature (default ON).
215 ///
216 /// # Examples
217 ///
218 /// ```rust,no_run
219 /// use std::path::Path;
220 /// use sdivi_snapshot::snapshot::Snapshot;
221 ///
222 /// let snap = Snapshot::load(Path::new(".sdivi/snapshots/snapshot_2026.json"));
223 /// ```
224 #[cfg(feature = "pipeline-records")]
225 pub fn load(path: &std::path::Path) -> std::io::Result<Self> {
226 let content = std::fs::read_to_string(path)?;
227 serde_json::from_str(&content)
228 .map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidData, e))
229 }
230}