Skip to main content

semantic/analysis/
analysis_aggregate.rs

1// SPDX-License-Identifier: Apache-2.0
2//! Change aggregation — groups related semantic changes into logical review units.
3//!
4//! When an agent renames a symbol across 50 files, the raw change list has 50 entries.
5//! Aggregation collapses those into one "Renamed X → Y across 50 files" group.
6
7use std::{collections::HashMap, path::PathBuf};
8
9use objects::object::{ChangeImportance, ModificationKind, SemanticChange};
10
11/// What kind of aggregate group this is.
12#[derive(Clone, Debug, PartialEq, Eq)]
13pub enum AggregateKind {
14    /// Formatting/whitespace pass across many files.
15    FormattingPass,
16    /// Import updates across many files.
17    ImportUpdates,
18    /// Comment updates across many files.
19    CommentUpdates,
20    /// Cross-file function rename (same old→new name in multiple files).
21    FunctionRename,
22    /// Same dependency added/removed across multiple files.
23    DependencyChange,
24}
25
26/// A group of related semantic changes collapsed into one review unit.
27#[derive(Clone, Debug)]
28pub struct AggregatedChange {
29    /// Human-readable label, e.g. "Formatting pass: 38 files".
30    pub label: String,
31    /// What kind of aggregate.
32    pub kind: AggregateKind,
33    /// Files involved.
34    pub files: Vec<PathBuf>,
35    /// Overall importance of the group.
36    pub importance: ChangeImportance,
37    /// The individual changes that were collapsed.
38    pub children: Vec<SemanticChange>,
39}
40
41/// Result of aggregation: ungrouped changes + aggregate groups.
42#[derive(Clone, Debug, Default)]
43pub struct AggregationResult {
44    /// Changes that didn't fit any aggregation pattern (shown individually).
45    pub individual: Vec<SemanticChange>,
46    /// Aggregated groups.
47    pub groups: Vec<AggregatedChange>,
48}
49
50/// Aggregate a flat list of semantic changes into groups where possible.
51pub fn aggregate_changes(changes: Vec<SemanticChange>) -> AggregationResult {
52    let mut formatting_files: Vec<(PathBuf, SemanticChange)> = Vec::new();
53    let mut import_files: Vec<(PathBuf, SemanticChange)> = Vec::new();
54    let mut comment_files: Vec<(PathBuf, SemanticChange)> = Vec::new();
55    // Key: (old_name, new_name) → list of (file, change)
56    let mut fn_renames: HashMap<(String, String), Vec<(PathBuf, SemanticChange)>> = HashMap::new();
57    // Key: dep name → list of changes
58    let mut dep_added: HashMap<String, Vec<SemanticChange>> = HashMap::new();
59    let mut dep_removed: HashMap<String, Vec<SemanticChange>> = HashMap::new();
60
61    let mut individual: Vec<SemanticChange> = Vec::new();
62
63    for change in changes {
64        match &change {
65            SemanticChange::FileModified {
66                path,
67                classification: Some(cls),
68                ..
69            } => match cls {
70                ModificationKind::FormattingOnly | ModificationKind::WhitespaceOnly => {
71                    formatting_files.push((path.clone(), change));
72                }
73                ModificationKind::ImportsOnly => {
74                    import_files.push((path.clone(), change));
75                }
76                ModificationKind::CommentsOnly => {
77                    comment_files.push((path.clone(), change));
78                }
79                _ => {
80                    individual.push(change);
81                }
82            },
83            SemanticChange::FunctionRenamed {
84                file,
85                old_name,
86                new_name,
87                ..
88            } => {
89                fn_renames
90                    .entry((old_name.clone(), new_name.clone()))
91                    .or_default()
92                    .push((file.clone(), change));
93            }
94            SemanticChange::DependencyAdded { name, .. } => {
95                dep_added.entry(name.clone()).or_default().push(change);
96            }
97            SemanticChange::DependencyRemoved { name } => {
98                dep_removed.entry(name.clone()).or_default().push(change);
99            }
100            _ => {
101                individual.push(change);
102            }
103        }
104    }
105
106    let mut groups: Vec<AggregatedChange> = Vec::new();
107
108    // Formatting pass group (only aggregate if 2+ files).
109    if formatting_files.len() >= 2 {
110        let count = formatting_files.len();
111        let files: Vec<PathBuf> = formatting_files.iter().map(|(p, _)| p.clone()).collect();
112        let children: Vec<SemanticChange> = formatting_files.into_iter().map(|(_, c)| c).collect();
113        groups.push(AggregatedChange {
114            label: format!("Formatting pass: {} files", count),
115            kind: AggregateKind::FormattingPass,
116            files,
117            importance: ChangeImportance::Noise,
118            children,
119        });
120    } else {
121        individual.extend(formatting_files.into_iter().map(|(_, c)| c));
122    }
123
124    // Import updates group.
125    if import_files.len() >= 2 {
126        let count = import_files.len();
127        let files: Vec<PathBuf> = import_files.iter().map(|(p, _)| p.clone()).collect();
128        let children: Vec<SemanticChange> = import_files.into_iter().map(|(_, c)| c).collect();
129        groups.push(AggregatedChange {
130            label: format!("Import updates: {} files", count),
131            kind: AggregateKind::ImportUpdates,
132            files,
133            importance: ChangeImportance::Low,
134            children,
135        });
136    } else {
137        individual.extend(import_files.into_iter().map(|(_, c)| c));
138    }
139
140    // Comment updates group.
141    if comment_files.len() >= 2 {
142        let count = comment_files.len();
143        let files: Vec<PathBuf> = comment_files.iter().map(|(p, _)| p.clone()).collect();
144        let children: Vec<SemanticChange> = comment_files.into_iter().map(|(_, c)| c).collect();
145        groups.push(AggregatedChange {
146            label: format!("Comment updates: {} files", count),
147            kind: AggregateKind::CommentUpdates,
148            files,
149            importance: ChangeImportance::Low,
150            children,
151        });
152    } else {
153        individual.extend(comment_files.into_iter().map(|(_, c)| c));
154    }
155
156    // Cross-file function renames (only aggregate if 2+ files share the same rename).
157    for ((old_name, new_name), entries) in fn_renames {
158        if entries.len() >= 2 {
159            let count = entries.len();
160            let files: Vec<PathBuf> = entries.iter().map(|(p, _)| p.clone()).collect();
161            let children: Vec<SemanticChange> = entries.into_iter().map(|(_, c)| c).collect();
162            groups.push(AggregatedChange {
163                label: format!("Renamed {} → {} across {} files", old_name, new_name, count),
164                kind: AggregateKind::FunctionRename,
165                files,
166                importance: ChangeImportance::Low,
167                children,
168            });
169        } else {
170            individual.extend(entries.into_iter().map(|(_, c)| c));
171        }
172    }
173
174    // Dependency groups (only aggregate if same dep appears 2+ times).
175    for (name, entries) in dep_added {
176        if entries.len() >= 2 {
177            let count = entries.len();
178            groups.push(AggregatedChange {
179                label: format!("Added dependency {} ({} files)", name, count),
180                kind: AggregateKind::DependencyChange,
181                files: Vec::new(),
182                importance: ChangeImportance::Low,
183                children: entries,
184            });
185        } else {
186            individual.extend(entries);
187        }
188    }
189    for (name, entries) in dep_removed {
190        if entries.len() >= 2 {
191            let count = entries.len();
192            groups.push(AggregatedChange {
193                label: format!("Removed dependency {} ({} files)", name, count),
194                kind: AggregateKind::DependencyChange,
195                files: Vec::new(),
196                importance: ChangeImportance::Low,
197                children: entries,
198            });
199        } else {
200            individual.extend(entries);
201        }
202    }
203
204    AggregationResult { individual, groups }
205}
206
207#[cfg(test)]
208mod tests {
209    use super::*;
210
211    #[test]
212    fn test_formatting_files_aggregate() {
213        let changes = vec![
214            SemanticChange::FileModified {
215                path: "a.rs".into(),
216                classification: Some(ModificationKind::FormattingOnly),
217                importance: Some(ChangeImportance::Noise),
218                confidence: None,
219            },
220            SemanticChange::FileModified {
221                path: "b.rs".into(),
222                classification: Some(ModificationKind::FormattingOnly),
223                importance: Some(ChangeImportance::Noise),
224                confidence: None,
225            },
226            SemanticChange::FileModified {
227                path: "c.rs".into(),
228                classification: Some(ModificationKind::FormattingOnly),
229                importance: Some(ChangeImportance::Noise),
230                confidence: None,
231            },
232            SemanticChange::FileModified {
233                path: "logic.rs".into(),
234                classification: Some(ModificationKind::Logic),
235                importance: Some(ChangeImportance::High),
236                confidence: None,
237            },
238        ];
239
240        let result = aggregate_changes(changes);
241        assert_eq!(result.groups.len(), 1);
242        assert_eq!(result.groups[0].kind, AggregateKind::FormattingPass);
243        assert_eq!(result.groups[0].files.len(), 3);
244        assert_eq!(result.groups[0].children.len(), 3);
245        // The logic file stays individual.
246        assert_eq!(result.individual.len(), 1);
247    }
248
249    #[test]
250    fn test_single_formatting_file_not_aggregated() {
251        let changes = vec![SemanticChange::FileModified {
252            path: "a.rs".into(),
253            classification: Some(ModificationKind::FormattingOnly),
254            importance: Some(ChangeImportance::Noise),
255            confidence: None,
256        }];
257        let result = aggregate_changes(changes);
258        assert_eq!(result.groups.len(), 0);
259        assert_eq!(result.individual.len(), 1);
260    }
261
262    #[test]
263    fn test_cross_file_rename_aggregates() {
264        let changes = vec![
265            SemanticChange::FunctionRenamed {
266                file: "a.rs".into(),
267                old_name: "foo".into(),
268                new_name: "bar".into(),
269                importance: Some(ChangeImportance::Low),
270            },
271            SemanticChange::FunctionRenamed {
272                file: "b.rs".into(),
273                old_name: "foo".into(),
274                new_name: "bar".into(),
275                importance: Some(ChangeImportance::Low),
276            },
277            SemanticChange::FunctionRenamed {
278                file: "c.rs".into(),
279                old_name: "baz".into(),
280                new_name: "qux".into(),
281                importance: Some(ChangeImportance::Low),
282            },
283        ];
284
285        let result = aggregate_changes(changes);
286        // foo→bar aggregates (2 files), baz→qux stays individual (1 file).
287        assert_eq!(result.groups.len(), 1);
288        assert!(result.groups[0].label.contains("foo"));
289        assert_eq!(result.groups[0].files.len(), 2);
290        assert_eq!(result.individual.len(), 1);
291    }
292
293    #[test]
294    fn test_mixed_aggregation() {
295        let changes = vec![
296            SemanticChange::FileModified {
297                path: "fmt1.rs".into(),
298                classification: Some(ModificationKind::FormattingOnly),
299                importance: Some(ChangeImportance::Noise),
300                confidence: None,
301            },
302            SemanticChange::FileModified {
303                path: "fmt2.rs".into(),
304                classification: Some(ModificationKind::WhitespaceOnly),
305                importance: Some(ChangeImportance::Noise),
306                confidence: None,
307            },
308            SemanticChange::FileModified {
309                path: "imp1.rs".into(),
310                classification: Some(ModificationKind::ImportsOnly),
311                importance: Some(ChangeImportance::Low),
312                confidence: None,
313            },
314            SemanticChange::FileModified {
315                path: "imp2.rs".into(),
316                classification: Some(ModificationKind::ImportsOnly),
317                importance: Some(ChangeImportance::Low),
318                confidence: None,
319            },
320            SemanticChange::FileAdded {
321                path: "new.rs".into(),
322            },
323        ];
324
325        let result = aggregate_changes(changes);
326        assert_eq!(result.groups.len(), 2); // formatting + imports
327        assert_eq!(result.individual.len(), 1); // FileAdded
328    }
329}