Skip to main content

sdivi_snapshot/
snapshot.rs

1//! [`Snapshot`] — versioned snapshot of pipeline stage outputs.
2
3use std::collections::BTreeMap;
4
5use sdivi_detection::partition::LeidenPartition;
6use sdivi_graph::metrics::GraphMetrics;
7use sdivi_patterns::PatternCatalog;
8use serde::{Deserialize, Serialize};
9
10use crate::change_coupling::ChangeCouplingResult;
11
12/// Snapshot schema version emitted by sdivi-rust.
13///
14/// This constant is `"1.0"` for all sdivi-rust output.  Bumping this value is a
15/// breaking change (Rule 16).
16pub const SNAPSHOT_VERSION: &str = "1.0";
17
18/// Intent-divergence summary derived from the caller's boundary representation.
19///
20/// Present in a [`Snapshot`] only when the caller supplied a boundary count to
21/// [`assemble_snapshot`] (typically because a `.sdivi/boundaries.yaml` was
22/// found at snapshot time, but any source of a count is valid).
23///
24/// # Examples
25///
26/// ```rust
27/// use sdivi_snapshot::snapshot::IntentDivergenceInfo;
28///
29/// let info = IntentDivergenceInfo { boundary_count: 3, violation_count: 0 };
30/// assert_eq!(info.boundary_count, 3);
31/// ```
32#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
33pub struct IntentDivergenceInfo {
34    /// Number of boundaries declared by the caller.
35    pub boundary_count: usize,
36    /// Number of cross-boundary dependency violations detected.
37    pub violation_count: u32,
38}
39
40/// Pattern metrics derived from the catalog — carried in every snapshot.
41///
42/// Computed by `sdivi_core::compute_pattern_metrics` or populated by
43/// `sdivi_pipeline::Pipeline` from the full catalog.
44///
45/// # Examples
46///
47/// ```rust
48/// use std::collections::BTreeMap;
49/// use sdivi_snapshot::snapshot::PatternMetricsResult;
50///
51/// let m = PatternMetricsResult {
52///     entropy_per_category: BTreeMap::new(),
53///     total_entropy: 0.0,
54///     convention_drift: 0.0,
55///     convention_drift_per_category: BTreeMap::new(),
56/// };
57/// assert_eq!(m.total_entropy, 0.0);
58/// ```
59#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Default)]
60pub struct PatternMetricsResult {
61    /// Shannon entropy of pattern fingerprints per category.
62    pub entropy_per_category: BTreeMap<String, f64>,
63    /// Sum of per-category entropies.
64    pub total_entropy: f64,
65    /// Average fraction of distinct fingerprints per category (0–1).
66    ///
67    /// Defined as: for each category, `distinct_fingerprints / total_instances`,
68    /// then average across all categories.  `0.0` when no instances exist.
69    pub convention_drift: f64,
70    /// Per-category fraction of distinct fingerprints: `distinct / total` for each category.
71    ///
72    /// Source of truth for per-category override filtering in `compute_thresholds_check`.
73    /// The scalar `convention_drift` is the average of this map's values.
74    #[serde(default)]
75    pub convention_drift_per_category: BTreeMap<String, f64>,
76}
77
78/// A versioned snapshot of all pipeline stage outputs for one point in time.
79///
80/// # Examples
81///
82/// ```rust
83/// use std::collections::BTreeMap;
84/// use sdivi_snapshot::snapshot::{assemble_snapshot, PatternMetricsResult, SNAPSHOT_VERSION};
85/// use sdivi_graph::metrics::GraphMetrics;
86/// use sdivi_detection::partition::LeidenPartition;
87/// use sdivi_patterns::PatternCatalog;
88///
89/// let graph = GraphMetrics {
90///     node_count: 0, edge_count: 0, density: 0.0,
91///     cycle_count: 0, top_hubs: vec![], component_count: 0,
92/// };
93/// let partition = LeidenPartition {
94///     assignments: BTreeMap::new(), stability: BTreeMap::new(),
95///     modularity: 0.0, seed: 42,
96/// };
97/// let snap = assemble_snapshot(
98///     graph, partition, PatternCatalog::default(),
99///     PatternMetricsResult::default(), None,
100///     "2026-04-29T00:00:00Z", None, None, 0,
101/// );
102/// assert_eq!(snap.snapshot_version, SNAPSHOT_VERSION);
103/// ```
104#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
105pub struct Snapshot {
106    /// Always `"1.0"` for sdivi-rust output.
107    pub snapshot_version: String,
108    /// ISO 8601 UTC timestamp at which the snapshot was taken.
109    pub timestamp: String,
110    /// Git commit SHA at the time of the snapshot, when available.
111    #[serde(skip_serializing_if = "Option::is_none")]
112    pub commit: Option<String>,
113    /// Graph metrics computed from the dependency graph.
114    pub graph: GraphMetrics,
115    /// Leiden community detection result.
116    pub partition: LeidenPartition,
117    /// Pattern fingerprint catalog with per-category entropy.
118    pub catalog: PatternCatalog,
119    /// Pattern metrics (entropy, convention drift) for this snapshot.
120    pub pattern_metrics: PatternMetricsResult,
121    /// Intent divergence against the caller-declared boundaries.
122    ///
123    /// `None` (omitted from JSON) when no boundary count was supplied to
124    /// [`assemble_snapshot`] (typically because `.sdivi/boundaries.yaml` was
125    /// not present).
126    #[serde(skip_serializing_if = "Option::is_none")]
127    pub intent_divergence: Option<IntentDivergenceInfo>,
128    /// File-path → community-ID assignments for boundary inference.
129    ///
130    /// Maps each source file's repo-relative path to its community ID from the
131    /// Leiden partition at snapshot time. Populated by `sdivi-pipeline` from the
132    /// `DependencyGraph` + `LeidenPartition`. Absent (empty) in snapshots
133    /// produced without path context (e.g., pure-compute path); boundary
134    /// inference from such snapshots yields no proposals.
135    #[serde(default, skip_serializing_if = "BTreeMap::is_empty")]
136    pub path_partition: BTreeMap<String, u32>,
137
138    /// Change-coupling analysis result.
139    ///
140    /// `None` when the repo has no git history or `history_depth = 0`.
141    /// `#[serde(default)]` ensures M14-era snapshots deserialize as `None`.
142    #[serde(default, skip_serializing_if = "Option::is_none")]
143    pub change_coupling: Option<ChangeCouplingResult>,
144}
145
146/// Assembles a [`Snapshot`] from pipeline stage outputs.
147///
148/// When `boundary_count` is `Some`, an [`IntentDivergenceInfo`] is included with
149/// that count and the caller-supplied `violation_count`. The caller is responsible
150/// for deriving `boundary_count` from a `BoundarySpec` (or any equivalent source);
151/// this function is intentionally agnostic to the spec type so non-FS callers
152/// (WASM, embedders with their own boundary representation) can use it directly
153/// without constructing a `sdivi_config::BoundarySpec`.
154///
155/// # Examples
156///
157/// ```rust
158/// use std::collections::BTreeMap;
159/// use sdivi_snapshot::snapshot::{assemble_snapshot, PatternMetricsResult, SNAPSHOT_VERSION};
160/// use sdivi_graph::metrics::GraphMetrics;
161/// use sdivi_detection::partition::LeidenPartition;
162/// use sdivi_patterns::PatternCatalog;
163///
164/// let graph = GraphMetrics {
165///     node_count: 1, edge_count: 0, density: 0.0,
166///     cycle_count: 0, top_hubs: vec![], component_count: 1,
167/// };
168/// let partition = LeidenPartition {
169///     assignments: BTreeMap::from([(0, 0)]),
170///     stability: BTreeMap::from([(0, 1.0)]),
171///     modularity: 0.0, seed: 42,
172/// };
173/// let snap = assemble_snapshot(
174///     graph, partition, PatternCatalog::default(),
175///     PatternMetricsResult::default(), None,
176///     "2026-04-29T00:00:00Z", Some("abc123"), None, 0,
177/// );
178/// assert_eq!(snap.commit.as_deref(), Some("abc123"));
179/// ```
180#[allow(clippy::too_many_arguments)] // 9 args: every field is load-bearing; seam between sdivi-pipeline and sdivi-core
181pub fn assemble_snapshot(
182    graph: GraphMetrics,
183    partition: LeidenPartition,
184    catalog: PatternCatalog,
185    pattern_metrics: PatternMetricsResult,
186    boundary_count: Option<usize>,
187    timestamp: &str,
188    commit: Option<&str>,
189    change_coupling: Option<ChangeCouplingResult>,
190    violation_count: u32,
191) -> Snapshot {
192    let intent_divergence = boundary_count.map(|boundary_count| IntentDivergenceInfo {
193        boundary_count,
194        violation_count,
195    });
196
197    Snapshot {
198        snapshot_version: SNAPSHOT_VERSION.to_string(),
199        timestamp: timestamp.to_string(),
200        commit: commit.map(str::to_string),
201        graph,
202        partition,
203        catalog,
204        pattern_metrics,
205        intent_divergence,
206        path_partition: BTreeMap::new(),
207        change_coupling,
208    }
209}
210
211impl Snapshot {
212    /// Loads a [`Snapshot`] from a JSON file at `path`.
213    ///
214    /// Only available with the `pipeline-records` feature (default ON).
215    ///
216    /// # Examples
217    ///
218    /// ```rust,no_run
219    /// use std::path::Path;
220    /// use sdivi_snapshot::snapshot::Snapshot;
221    ///
222    /// let snap = Snapshot::load(Path::new(".sdivi/snapshots/snapshot_2026.json"));
223    /// ```
224    #[cfg(feature = "pipeline-records")]
225    pub fn load(path: &std::path::Path) -> std::io::Result<Self> {
226        let content = std::fs::read_to_string(path)?;
227        serde_json::from_str(&content)
228            .map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidData, e))
229    }
230}