sdivi_snapshot/boundary_inference.rs
1//! [`infer_boundaries`] — propose community boundaries from partition history.
2
3use std::collections::BTreeMap;
4
5use serde::{Deserialize, Serialize};
6
7/// A proposed boundary derived from consecutive-snapshot community stability.
8///
9/// # Examples
10///
11/// ```rust
12/// use sdivi_snapshot::boundary_inference::BoundaryProposal;
13///
14/// let proposal = BoundaryProposal {
15/// community_id: 0,
16/// stable_snapshots: 3,
17/// node_ids: vec!["src/lib.rs".to_string()],
18/// };
19/// assert_eq!(proposal.community_id, 0);
20/// ```
21#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
22pub struct BoundaryProposal {
23 /// The community ID (from the most recent partition).
24 pub community_id: u32,
25 /// Number of consecutive snapshots this community has been stable.
26 pub stable_snapshots: u32,
27 /// Node IDs (paths) belonging to this community in the most recent partition.
28 pub node_ids: Vec<String>,
29}
30
31/// Result of [`infer_boundaries`].
32///
33/// # Examples
34///
35/// ```rust
36/// use sdivi_snapshot::boundary_inference::infer_boundaries;
37///
38/// let result = infer_boundaries(&[], 3);
39/// assert!(result.proposals.is_empty());
40/// ```
41#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
42pub struct BoundaryInferenceResult {
43 /// Communities that have been stable for at least `stability_threshold`
44 /// consecutive snapshots, proposed as boundary candidates.
45 pub proposals: Vec<BoundaryProposal>,
46 /// Number of partitions considered (the length of the input slice).
47 pub partition_count: usize,
48}
49
50/// A prior partition supplied to boundary inference.
51///
52/// Each entry maps a node ID (repo-relative path string) to its community ID.
53#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
54pub struct PriorPartition {
55 /// Node ID → community ID mapping.
56 pub cluster_assignments: BTreeMap<String, u32>,
57}
58
59/// Infers boundary proposals from a sequence of prior partitions.
60///
61/// `partitions` must be ordered oldest → newest. The most recent entry is the
62/// proposal source; the earlier entries supply the stability history.
63///
64/// A community is proposed as a boundary candidate when it has been present
65/// (same set of node IDs, regardless of numeric ID churn) in at least
66/// `stability_threshold` consecutive partition pairs ending at the most recent
67/// partition.
68///
69/// **Stability definition:** two consecutive partitions agree on a community
70/// when the same set of node IDs is assigned to the same logical group.
71/// Community IDs may renumber across snapshots; we match by node-set membership.
72///
73/// Returns an empty `proposals` list when:
74/// - `partitions` is empty
75/// - the slice has fewer entries than `stability_threshold`
76///
77/// # Examples
78///
79/// ```rust
80/// use sdivi_snapshot::boundary_inference::{infer_boundaries, PriorPartition};
81/// use std::collections::BTreeMap;
82///
83/// let result = infer_boundaries(&[], 3);
84/// assert!(result.proposals.is_empty());
85/// assert_eq!(result.partition_count, 0);
86/// ```
87pub fn infer_boundaries(
88 partitions: &[PriorPartition],
89 stability_threshold: u32,
90) -> BoundaryInferenceResult {
91 let partition_count = partitions.len();
92
93 if partition_count == 0 || stability_threshold == 0 {
94 return BoundaryInferenceResult {
95 proposals: vec![],
96 partition_count,
97 };
98 }
99
100 let latest = partitions.last().unwrap();
101 let latest_communities = invert_assignments(&latest.cluster_assignments);
102
103 // For each community in the latest partition, count how many consecutive
104 // pairs (ending at latest) agree on that node set.
105 let mut proposals: Vec<BoundaryProposal> = Vec::new();
106
107 for (comm_id, node_set) in &latest_communities {
108 let stable = count_stable_tail(partitions, node_set);
109 if stable >= stability_threshold {
110 let mut node_ids: Vec<String> = node_set.iter().cloned().collect();
111 node_ids.sort();
112 proposals.push(BoundaryProposal {
113 community_id: *comm_id,
114 stable_snapshots: stable,
115 node_ids,
116 });
117 }
118 }
119
120 proposals.sort_by_key(|p| p.community_id);
121
122 BoundaryInferenceResult {
123 proposals,
124 partition_count,
125 }
126}
127
128/// Inverts a node→community map to community→{node_ids} map.
129fn invert_assignments(
130 assignments: &BTreeMap<String, u32>,
131) -> BTreeMap<u32, std::collections::BTreeSet<String>> {
132 let mut result: BTreeMap<u32, std::collections::BTreeSet<String>> = BTreeMap::new();
133 for (node, &comm) in assignments {
134 result.entry(comm).or_default().insert(node.clone());
135 }
136 result
137}
138
139/// Counts how many consecutive pairs ending at the last partition agree on
140/// `node_set` (i.e., that exact node set forms a community in each partition).
141fn count_stable_tail(
142 partitions: &[PriorPartition],
143 node_set: &std::collections::BTreeSet<String>,
144) -> u32 {
145 if partitions.len() < 2 {
146 // Only one partition; no pairs to check.
147 return 0;
148 }
149
150 let mut stable = 0u32;
151 // Walk preceding partitions from newest-1 backwards; stop on first miss.
152 for i in (0..partitions.len() - 1).rev() {
153 let p = &partitions[i];
154 let communities = invert_assignments(&p.cluster_assignments);
155 let found = communities.values().any(|s| s == node_set);
156 if found {
157 stable += 1;
158 } else {
159 break;
160 }
161 }
162 stable
163}
164
165#[cfg(test)]
166mod tests {
167 use super::*;
168
169 fn partition_from(pairs: &[(&str, u32)]) -> PriorPartition {
170 PriorPartition {
171 cluster_assignments: pairs.iter().map(|(k, v)| (k.to_string(), *v)).collect(),
172 }
173 }
174
175 #[test]
176 fn empty_input_returns_no_proposals() {
177 let r = infer_boundaries(&[], 3);
178 assert!(r.proposals.is_empty());
179 assert_eq!(r.partition_count, 0);
180 }
181
182 #[test]
183 fn single_partition_no_proposals_due_to_no_history() {
184 let p = partition_from(&[("a", 0), ("b", 0), ("c", 1)]);
185 let r = infer_boundaries(&[p], 2);
186 // threshold 2 but only 1 partition → no pairs to check → 0 stable → no proposals
187 assert!(r.proposals.is_empty());
188 }
189
190 #[test]
191 fn stable_community_proposed() {
192 // Three partitions; community {a, b} stable across all three pairs.
193 let p1 = partition_from(&[("a", 0), ("b", 0), ("c", 1)]);
194 let p2 = partition_from(&[("a", 0), ("b", 0), ("c", 1)]);
195 let p3 = partition_from(&[("a", 0), ("b", 0), ("c", 1)]);
196 let r = infer_boundaries(&[p1, p2, p3], 2);
197 // community 0 ({a,b}) stable for 2 pairs → should be proposed
198 assert!(!r.proposals.is_empty());
199 let prop = r.proposals.iter().find(|p| p.node_ids == vec!["a", "b"]);
200 assert!(prop.is_some());
201 }
202
203 #[test]
204 fn threshold_not_met_no_proposal() {
205 let p1 = partition_from(&[("a", 0), ("b", 0)]);
206 let p2 = partition_from(&[("a", 0), ("b", 0)]);
207 let r = infer_boundaries(&[p1, p2], 3);
208 // only 1 pair, threshold=3 → no proposals
209 assert!(r.proposals.is_empty());
210 }
211}