Skip to main content

sdivi_snapshot/
boundary_inference.rs

1//! [`infer_boundaries`] — propose community boundaries from partition history.
2
3use std::collections::BTreeMap;
4
5use serde::{Deserialize, Serialize};
6
7/// A proposed boundary derived from consecutive-snapshot community stability.
8///
9/// # Examples
10///
11/// ```rust
12/// use sdivi_snapshot::boundary_inference::BoundaryProposal;
13///
14/// let proposal = BoundaryProposal {
15///     community_id: 0,
16///     stable_snapshots: 3,
17///     node_ids: vec!["src/lib.rs".to_string()],
18/// };
19/// assert_eq!(proposal.community_id, 0);
20/// ```
21#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
22pub struct BoundaryProposal {
23    /// The community ID (from the most recent partition).
24    pub community_id: u32,
25    /// Number of consecutive snapshots this community has been stable.
26    pub stable_snapshots: u32,
27    /// Node IDs (paths) belonging to this community in the most recent partition.
28    pub node_ids: Vec<String>,
29}
30
31/// Result of [`infer_boundaries`].
32///
33/// # Examples
34///
35/// ```rust
36/// use sdivi_snapshot::boundary_inference::infer_boundaries;
37///
38/// let result = infer_boundaries(&[], 3);
39/// assert!(result.proposals.is_empty());
40/// ```
41#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
42pub struct BoundaryInferenceResult {
43    /// Communities that have been stable for at least `stability_threshold`
44    /// consecutive snapshots, proposed as boundary candidates.
45    pub proposals: Vec<BoundaryProposal>,
46    /// Number of partitions considered (the length of the input slice).
47    pub partition_count: usize,
48}
49
50/// A prior partition supplied to boundary inference.
51///
52/// Each entry maps a node ID (repo-relative path string) to its community ID.
53#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
54pub struct PriorPartition {
55    /// Node ID → community ID mapping.
56    pub cluster_assignments: BTreeMap<String, u32>,
57}
58
59/// Infers boundary proposals from a sequence of prior partitions.
60///
61/// `partitions` must be ordered oldest → newest.  The most recent entry is the
62/// proposal source; the earlier entries supply the stability history.
63///
64/// A community is proposed as a boundary candidate when it has been present
65/// (same set of node IDs, regardless of numeric ID churn) in at least
66/// `stability_threshold` consecutive partition pairs ending at the most recent
67/// partition.
68///
69/// **Stability definition:** two consecutive partitions agree on a community
70/// when the same set of node IDs is assigned to the same logical group.
71/// Community IDs may renumber across snapshots; we match by node-set membership.
72///
73/// Returns an empty `proposals` list when:
74/// - `partitions` is empty
75/// - the slice has fewer entries than `stability_threshold`
76///
77/// # Examples
78///
79/// ```rust
80/// use sdivi_snapshot::boundary_inference::{infer_boundaries, PriorPartition};
81/// use std::collections::BTreeMap;
82///
83/// let result = infer_boundaries(&[], 3);
84/// assert!(result.proposals.is_empty());
85/// assert_eq!(result.partition_count, 0);
86/// ```
87pub fn infer_boundaries(
88    partitions: &[PriorPartition],
89    stability_threshold: u32,
90) -> BoundaryInferenceResult {
91    let partition_count = partitions.len();
92
93    if partition_count == 0 || stability_threshold == 0 {
94        return BoundaryInferenceResult {
95            proposals: vec![],
96            partition_count,
97        };
98    }
99
100    let latest = partitions.last().unwrap();
101    let latest_communities = invert_assignments(&latest.cluster_assignments);
102
103    // For each community in the latest partition, count how many consecutive
104    // pairs (ending at latest) agree on that node set.
105    let mut proposals: Vec<BoundaryProposal> = Vec::new();
106
107    for (comm_id, node_set) in &latest_communities {
108        let stable = count_stable_tail(partitions, node_set);
109        if stable >= stability_threshold {
110            let mut node_ids: Vec<String> = node_set.iter().cloned().collect();
111            node_ids.sort();
112            proposals.push(BoundaryProposal {
113                community_id: *comm_id,
114                stable_snapshots: stable,
115                node_ids,
116            });
117        }
118    }
119
120    proposals.sort_by_key(|p| p.community_id);
121
122    BoundaryInferenceResult {
123        proposals,
124        partition_count,
125    }
126}
127
128/// Inverts a node→community map to community→{node_ids} map.
129fn invert_assignments(
130    assignments: &BTreeMap<String, u32>,
131) -> BTreeMap<u32, std::collections::BTreeSet<String>> {
132    let mut result: BTreeMap<u32, std::collections::BTreeSet<String>> = BTreeMap::new();
133    for (node, &comm) in assignments {
134        result.entry(comm).or_default().insert(node.clone());
135    }
136    result
137}
138
139/// Counts how many consecutive pairs ending at the last partition agree on
140/// `node_set` (i.e., that exact node set forms a community in each partition).
141fn count_stable_tail(
142    partitions: &[PriorPartition],
143    node_set: &std::collections::BTreeSet<String>,
144) -> u32 {
145    if partitions.len() < 2 {
146        // Only one partition; no pairs to check.
147        return 0;
148    }
149
150    let mut stable = 0u32;
151    // Walk preceding partitions from newest-1 backwards; stop on first miss.
152    for i in (0..partitions.len() - 1).rev() {
153        let p = &partitions[i];
154        let communities = invert_assignments(&p.cluster_assignments);
155        let found = communities.values().any(|s| s == node_set);
156        if found {
157            stable += 1;
158        } else {
159            break;
160        }
161    }
162    stable
163}
164
165#[cfg(test)]
166mod tests {
167    use super::*;
168
169    fn partition_from(pairs: &[(&str, u32)]) -> PriorPartition {
170        PriorPartition {
171            cluster_assignments: pairs.iter().map(|(k, v)| (k.to_string(), *v)).collect(),
172        }
173    }
174
175    #[test]
176    fn empty_input_returns_no_proposals() {
177        let r = infer_boundaries(&[], 3);
178        assert!(r.proposals.is_empty());
179        assert_eq!(r.partition_count, 0);
180    }
181
182    #[test]
183    fn single_partition_no_proposals_due_to_no_history() {
184        let p = partition_from(&[("a", 0), ("b", 0), ("c", 1)]);
185        let r = infer_boundaries(&[p], 2);
186        // threshold 2 but only 1 partition → no pairs to check → 0 stable → no proposals
187        assert!(r.proposals.is_empty());
188    }
189
190    #[test]
191    fn stable_community_proposed() {
192        // Three partitions; community {a, b} stable across all three pairs.
193        let p1 = partition_from(&[("a", 0), ("b", 0), ("c", 1)]);
194        let p2 = partition_from(&[("a", 0), ("b", 0), ("c", 1)]);
195        let p3 = partition_from(&[("a", 0), ("b", 0), ("c", 1)]);
196        let r = infer_boundaries(&[p1, p2, p3], 2);
197        // community 0 ({a,b}) stable for 2 pairs → should be proposed
198        assert!(!r.proposals.is_empty());
199        let prop = r.proposals.iter().find(|p| p.node_ids == vec!["a", "b"]);
200        assert!(prop.is_some());
201    }
202
203    #[test]
204    fn threshold_not_met_no_proposal() {
205        let p1 = partition_from(&[("a", 0), ("b", 0)]);
206        let p2 = partition_from(&[("a", 0), ("b", 0)]);
207        let r = infer_boundaries(&[p1, p2], 3);
208        // only 1 pair, threshold=3 → no proposals
209        assert!(r.proposals.is_empty());
210    }
211}