Skip to main content

semantic/analysis/
analysis_aggregate.rs

1// SPDX-License-Identifier: Apache-2.0
2//! Change aggregation — groups related semantic changes into logical review units.
3//!
4//! When an agent renames a symbol across 50 files, the raw change list has 50 entries.
5//! Aggregation collapses those into one "Renamed X → Y across 50 files" group.
6
7use std::{collections::HashMap, path::PathBuf};
8
9use objects::object::{ChangeImportance, ModificationKind, SemanticChange};
10
11/// What kind of aggregate group this is.
12#[derive(Clone, Debug, PartialEq, Eq)]
13pub enum AggregateKind {
14    /// Formatting/whitespace pass across many files.
15    FormattingPass,
16    /// Import updates across many files.
17    ImportUpdates,
18    /// Comment updates across many files.
19    CommentUpdates,
20    /// Cross-file function rename (same old→new name in multiple files).
21    FunctionRename,
22    /// Same dependency added/removed across multiple files.
23    DependencyChange,
24}
25
26/// A group of related semantic changes collapsed into one review unit.
27#[derive(Clone, Debug)]
28pub struct AggregatedChange {
29    /// Human-readable label, e.g. "Formatting pass: 38 files".
30    pub label: String,
31    /// What kind of aggregate.
32    pub kind: AggregateKind,
33    /// Files involved.
34    pub files: Vec<PathBuf>,
35    /// Overall importance of the group.
36    pub importance: ChangeImportance,
37    /// The individual changes that were collapsed.
38    pub children: Vec<SemanticChange>,
39}
40
41/// Result of aggregation: ungrouped changes + aggregate groups.
42#[derive(Clone, Debug, Default)]
43pub struct AggregationResult {
44    /// Changes that didn't fit any aggregation pattern (shown individually).
45    pub individual: Vec<SemanticChange>,
46    /// Aggregated groups.
47    pub groups: Vec<AggregatedChange>,
48}
49
50/// Aggregate a flat list of semantic changes into groups where possible.
51pub fn aggregate_changes(changes: Vec<SemanticChange>) -> AggregationResult {
52    let mut formatting_files: Vec<(PathBuf, SemanticChange)> = Vec::new();
53    let mut import_files: Vec<(PathBuf, SemanticChange)> = Vec::new();
54    let mut comment_files: Vec<(PathBuf, SemanticChange)> = Vec::new();
55    // Key: (old_name, new_name) → list of (file, change)
56    let mut fn_renames: HashMap<(String, String), Vec<(PathBuf, SemanticChange)>> = HashMap::new();
57    // Key: (dep name, version) → list of changes. Version is part of the key so
58    // that two files adding `serde 1.0` vs `serde 2.0` don't silently collapse
59    // into one group (heddle#119; sibling of heddle#68 r1, fixed in PR #114
60    // commit c5a2f75 by keying ItemKey on more than just name).
61    let mut dep_added: HashMap<(String, String), Vec<SemanticChange>> = HashMap::new();
62    let mut dep_removed: HashMap<String, Vec<SemanticChange>> = HashMap::new();
63
64    let mut individual: Vec<SemanticChange> = Vec::new();
65
66    for change in changes {
67        match &change {
68            SemanticChange::FileModified {
69                path,
70                classification: Some(cls),
71                ..
72            } => match cls {
73                ModificationKind::FormattingOnly | ModificationKind::WhitespaceOnly => {
74                    formatting_files.push((path.clone(), change));
75                }
76                ModificationKind::ImportsOnly => {
77                    import_files.push((path.clone(), change));
78                }
79                ModificationKind::CommentsOnly => {
80                    comment_files.push((path.clone(), change));
81                }
82                _ => {
83                    individual.push(change);
84                }
85            },
86            SemanticChange::FunctionRenamed {
87                file,
88                old_name,
89                new_name,
90                ..
91            } => {
92                fn_renames
93                    .entry((old_name.clone(), new_name.clone()))
94                    .or_default()
95                    .push((file.clone(), change));
96            }
97            SemanticChange::DependencyAdded { name, version } => {
98                dep_added
99                    .entry((name.clone(), version.clone()))
100                    .or_default()
101                    .push(change);
102            }
103            SemanticChange::DependencyRemoved { name } => {
104                dep_removed.entry(name.clone()).or_default().push(change);
105            }
106            _ => {
107                individual.push(change);
108            }
109        }
110    }
111
112    let mut groups: Vec<AggregatedChange> = Vec::new();
113
114    // Formatting pass group (only aggregate if 2+ files).
115    if formatting_files.len() >= 2 {
116        let count = formatting_files.len();
117        let files: Vec<PathBuf> = formatting_files.iter().map(|(p, _)| p.clone()).collect();
118        let children: Vec<SemanticChange> = formatting_files.into_iter().map(|(_, c)| c).collect();
119        groups.push(AggregatedChange {
120            label: format!("Formatting pass: {} files", count),
121            kind: AggregateKind::FormattingPass,
122            files,
123            importance: ChangeImportance::Noise,
124            children,
125        });
126    } else {
127        individual.extend(formatting_files.into_iter().map(|(_, c)| c));
128    }
129
130    // Import updates group.
131    if import_files.len() >= 2 {
132        let count = import_files.len();
133        let files: Vec<PathBuf> = import_files.iter().map(|(p, _)| p.clone()).collect();
134        let children: Vec<SemanticChange> = import_files.into_iter().map(|(_, c)| c).collect();
135        groups.push(AggregatedChange {
136            label: format!("Import updates: {} files", count),
137            kind: AggregateKind::ImportUpdates,
138            files,
139            importance: ChangeImportance::Low,
140            children,
141        });
142    } else {
143        individual.extend(import_files.into_iter().map(|(_, c)| c));
144    }
145
146    // Comment updates group.
147    if comment_files.len() >= 2 {
148        let count = comment_files.len();
149        let files: Vec<PathBuf> = comment_files.iter().map(|(p, _)| p.clone()).collect();
150        let children: Vec<SemanticChange> = comment_files.into_iter().map(|(_, c)| c).collect();
151        groups.push(AggregatedChange {
152            label: format!("Comment updates: {} files", count),
153            kind: AggregateKind::CommentUpdates,
154            files,
155            importance: ChangeImportance::Low,
156            children,
157        });
158    } else {
159        individual.extend(comment_files.into_iter().map(|(_, c)| c));
160    }
161
162    // Cross-file function renames (only aggregate if 2+ files share the same rename).
163    for ((old_name, new_name), entries) in fn_renames {
164        if entries.len() >= 2 {
165            let count = entries.len();
166            let files: Vec<PathBuf> = entries.iter().map(|(p, _)| p.clone()).collect();
167            let children: Vec<SemanticChange> = entries.into_iter().map(|(_, c)| c).collect();
168            groups.push(AggregatedChange {
169                label: format!("Renamed {} → {} across {} files", old_name, new_name, count),
170                kind: AggregateKind::FunctionRename,
171                files,
172                importance: ChangeImportance::Low,
173                children,
174            });
175        } else {
176            individual.extend(entries.into_iter().map(|(_, c)| c));
177        }
178    }
179
180    // Dependency groups (only aggregate if same dep+version appears 2+ times).
181    for ((name, version), entries) in dep_added {
182        if entries.len() >= 2 {
183            let count = entries.len();
184            groups.push(AggregatedChange {
185                label: format!("Added dependency {} {} ({} files)", name, version, count),
186                kind: AggregateKind::DependencyChange,
187                files: Vec::new(),
188                importance: ChangeImportance::Low,
189                children: entries,
190            });
191        } else {
192            individual.extend(entries);
193        }
194    }
195    for (name, entries) in dep_removed {
196        if entries.len() >= 2 {
197            let count = entries.len();
198            groups.push(AggregatedChange {
199                label: format!("Removed dependency {} ({} files)", name, count),
200                kind: AggregateKind::DependencyChange,
201                files: Vec::new(),
202                importance: ChangeImportance::Low,
203                children: entries,
204            });
205        } else {
206            individual.extend(entries);
207        }
208    }
209
210    AggregationResult { individual, groups }
211}
212
213#[cfg(test)]
214mod tests {
215    use super::*;
216
217    #[test]
218    fn test_formatting_files_aggregate() {
219        let changes = vec![
220            SemanticChange::FileModified {
221                path: "a.rs".into(),
222                classification: Some(ModificationKind::FormattingOnly),
223                importance: Some(ChangeImportance::Noise),
224                confidence: None,
225            },
226            SemanticChange::FileModified {
227                path: "b.rs".into(),
228                classification: Some(ModificationKind::FormattingOnly),
229                importance: Some(ChangeImportance::Noise),
230                confidence: None,
231            },
232            SemanticChange::FileModified {
233                path: "c.rs".into(),
234                classification: Some(ModificationKind::FormattingOnly),
235                importance: Some(ChangeImportance::Noise),
236                confidence: None,
237            },
238            SemanticChange::FileModified {
239                path: "logic.rs".into(),
240                classification: Some(ModificationKind::Logic),
241                importance: Some(ChangeImportance::High),
242                confidence: None,
243            },
244        ];
245
246        let result = aggregate_changes(changes);
247        assert_eq!(result.groups.len(), 1);
248        assert_eq!(result.groups[0].kind, AggregateKind::FormattingPass);
249        assert_eq!(result.groups[0].files.len(), 3);
250        assert_eq!(result.groups[0].children.len(), 3);
251        // The logic file stays individual.
252        assert_eq!(result.individual.len(), 1);
253    }
254
255    #[test]
256    fn test_single_formatting_file_not_aggregated() {
257        let changes = vec![SemanticChange::FileModified {
258            path: "a.rs".into(),
259            classification: Some(ModificationKind::FormattingOnly),
260            importance: Some(ChangeImportance::Noise),
261            confidence: None,
262        }];
263        let result = aggregate_changes(changes);
264        assert_eq!(result.groups.len(), 0);
265        assert_eq!(result.individual.len(), 1);
266    }
267
268    #[test]
269    fn test_cross_file_rename_aggregates() {
270        let changes = vec![
271            SemanticChange::FunctionRenamed {
272                file: "a.rs".into(),
273                old_name: "foo".into(),
274                new_name: "bar".into(),
275                importance: Some(ChangeImportance::Low),
276            },
277            SemanticChange::FunctionRenamed {
278                file: "b.rs".into(),
279                old_name: "foo".into(),
280                new_name: "bar".into(),
281                importance: Some(ChangeImportance::Low),
282            },
283            SemanticChange::FunctionRenamed {
284                file: "c.rs".into(),
285                old_name: "baz".into(),
286                new_name: "qux".into(),
287                importance: Some(ChangeImportance::Low),
288            },
289        ];
290
291        let result = aggregate_changes(changes);
292        // foo→bar aggregates (2 files), baz→qux stays individual (1 file).
293        assert_eq!(result.groups.len(), 1);
294        assert!(result.groups[0].label.contains("foo"));
295        assert_eq!(result.groups[0].files.len(), 2);
296        assert_eq!(result.individual.len(), 1);
297    }
298
299    #[test]
300    fn test_dep_added_distinguishes_versions() {
301        // Same dep name at two different versions must NOT collapse into one group.
302        // Pre-fix the key was just `name`, so `serde 1.0` and `serde 2.0` merged.
303        let changes = vec![
304            SemanticChange::DependencyAdded {
305                name: "serde".into(),
306                version: "1.0".into(),
307            },
308            SemanticChange::DependencyAdded {
309                name: "serde".into(),
310                version: "1.0".into(),
311            },
312            SemanticChange::DependencyAdded {
313                name: "serde".into(),
314                version: "2.0".into(),
315            },
316            SemanticChange::DependencyAdded {
317                name: "serde".into(),
318                version: "2.0".into(),
319            },
320        ];
321
322        let result = aggregate_changes(changes);
323        assert_eq!(
324            result.groups.len(),
325            2,
326            "expected separate groups for serde 1.0 and serde 2.0, got {:?}",
327            result.groups.iter().map(|g| &g.label).collect::<Vec<_>>()
328        );
329        for g in &result.groups {
330            assert_eq!(g.kind, AggregateKind::DependencyChange);
331            assert_eq!(g.children.len(), 2);
332        }
333        let labels: Vec<&String> = result.groups.iter().map(|g| &g.label).collect();
334        assert!(
335            labels.iter().any(|l| l.contains("1.0")),
336            "expected a label mentioning 1.0, got {:?}",
337            labels
338        );
339        assert!(
340            labels.iter().any(|l| l.contains("2.0")),
341            "expected a label mentioning 2.0, got {:?}",
342            labels
343        );
344    }
345
346    #[test]
347    fn test_mixed_aggregation() {
348        let changes = vec![
349            SemanticChange::FileModified {
350                path: "fmt1.rs".into(),
351                classification: Some(ModificationKind::FormattingOnly),
352                importance: Some(ChangeImportance::Noise),
353                confidence: None,
354            },
355            SemanticChange::FileModified {
356                path: "fmt2.rs".into(),
357                classification: Some(ModificationKind::WhitespaceOnly),
358                importance: Some(ChangeImportance::Noise),
359                confidence: None,
360            },
361            SemanticChange::FileModified {
362                path: "imp1.rs".into(),
363                classification: Some(ModificationKind::ImportsOnly),
364                importance: Some(ChangeImportance::Low),
365                confidence: None,
366            },
367            SemanticChange::FileModified {
368                path: "imp2.rs".into(),
369                classification: Some(ModificationKind::ImportsOnly),
370                importance: Some(ChangeImportance::Low),
371                confidence: None,
372            },
373            SemanticChange::FileAdded {
374                path: "new.rs".into(),
375            },
376        ];
377
378        let result = aggregate_changes(changes);
379        assert_eq!(result.groups.len(), 2); // formatting + imports
380        assert_eq!(result.individual.len(), 1); // FileAdded
381    }
382}