Skip to main content

sem_core/parser/
hotspot.rs

1//! Hotspot analysis: find entities that change most frequently across git history.
2//! High-churn entities are statistically more likely to contain bugs.
3
4use std::collections::HashMap;
5
6use crate::git::bridge::GitBridge;
7use crate::git::types::DiffScope;
8use crate::parser::differ::compute_semantic_diff;
9use crate::parser::registry::ParserRegistry;
10
11#[derive(Debug, Clone)]
12pub struct EntityHotspot {
13    pub entity_name: String,
14    pub entity_type: String,
15    pub file_path: String,
16    pub change_count: usize,
17}
18
19/// Walk git history and count how often each entity appears in semantic diffs.
20///
21/// - `file_path`: if Some, only track changes to entities in this file
22/// - `max_commits`: maximum number of commits to walk (default 50)
23///
24/// Returns hotspots sorted by change_count descending.
25pub fn compute_hotspots(
26    git: &GitBridge,
27    registry: &ParserRegistry,
28    file_path: Option<&str>,
29    max_commits: usize,
30) -> Vec<EntityHotspot> {
31    let commits = match git.get_log(max_commits + 1) {
32        Ok(c) => c,
33        Err(_) => return Vec::new(),
34    };
35
36    if commits.len() < 2 {
37        return Vec::new();
38    }
39
40    // entity key (name, type, file) -> count
41    let mut churn: HashMap<(String, String, String), usize> = HashMap::new();
42
43    let pathspecs: Vec<String> = file_path.map(|f| vec![f.to_string()]).unwrap_or_default();
44
45    // Compare consecutive commit pairs
46    for window in commits.windows(2) {
47        let newer = &window[0];
48        let older = &window[1];
49
50        let scope = DiffScope::Range {
51            from: older.sha.clone(),
52            to: newer.sha.clone(),
53        };
54
55        let file_changes = match git.get_changed_files(&scope, &pathspecs) {
56            Ok(fc) => fc,
57            Err(_) => continue,
58        };
59
60        let diff = compute_semantic_diff(&file_changes, registry, Some(&newer.sha), None);
61
62        for change in &diff.changes {
63            // Filter to target file if specified
64            if let Some(fp) = file_path {
65                if change.file_path != fp {
66                    continue;
67                }
68            }
69
70            let key = (
71                change.entity_name.clone(),
72                change.entity_type.clone(),
73                change.file_path.clone(),
74            );
75            *churn.entry(key).or_insert(0) += 1;
76        }
77    }
78
79    let mut hotspots: Vec<EntityHotspot> = churn
80        .into_iter()
81        .map(|((name, entity_type, file_path), count)| EntityHotspot {
82            entity_name: name,
83            entity_type,
84            file_path,
85            change_count: count,
86        })
87        .collect();
88
89    hotspots.sort_by(|a, b| b.change_count.cmp(&a.change_count));
90    hotspots
91}