use std::{
collections::BTreeMap,
path::{Path, PathBuf},
time::Instant,
};
use objects::{
object::{ChangeId, SemanticChange, State},
store::ObjectStore,
};
use crate::{
cache::SemanticParseCache,
diff::{SemanticDiffOptions, semantic_diff_with_cache},
};
#[derive(Copy, Clone, Debug, Eq, PartialEq)]
pub enum HotSpotKey {
File,
Function,
}
#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
pub enum HotEventKind {
FileAdded,
FileDeleted,
FileModified,
FileRenamed,
FunctionExtracted,
FunctionDeleted,
FunctionRenamed,
FunctionModified,
FunctionMoved,
SignatureChanged,
DependencyChanged,
}
impl HotEventKind {
fn classify(change: &SemanticChange) -> Option<Self> {
Some(match change {
SemanticChange::FileAdded { .. } => HotEventKind::FileAdded,
SemanticChange::FileDeleted { .. } => HotEventKind::FileDeleted,
SemanticChange::FileModified { .. } => HotEventKind::FileModified,
SemanticChange::FileRenamed { .. } => HotEventKind::FileRenamed,
SemanticChange::FunctionAdded { .. } | SemanticChange::FunctionExtracted { .. } => {
HotEventKind::FunctionExtracted
}
SemanticChange::FunctionDeleted { .. } => HotEventKind::FunctionDeleted,
SemanticChange::FunctionRenamed { .. } => HotEventKind::FunctionRenamed,
SemanticChange::FunctionModified { .. } => HotEventKind::FunctionModified,
SemanticChange::FunctionMoved { .. } => HotEventKind::FunctionMoved,
SemanticChange::SignatureChanged { .. } => HotEventKind::SignatureChanged,
SemanticChange::DependencyAdded { .. } | SemanticChange::DependencyRemoved { .. } => {
HotEventKind::DependencyChanged
}
SemanticChange::Custom { .. } => return None,
})
}
}
#[derive(Clone, Debug, Eq, PartialEq, Ord, PartialOrd)]
pub enum HotSpotKeyValue {
File { path: PathBuf },
Function { path: PathBuf, name: String },
}
impl HotSpotKeyValue {
pub fn path(&self) -> &Path {
match self {
HotSpotKeyValue::File { path } => path,
HotSpotKeyValue::Function { path, .. } => path,
}
}
pub fn function_name(&self) -> Option<&str> {
match self {
HotSpotKeyValue::Function { name, .. } => Some(name),
HotSpotKeyValue::File { .. } => None,
}
}
}
#[derive(Clone, Debug)]
pub struct HotSpot {
pub key: HotSpotKeyValue,
pub event_count: usize,
pub state_count: usize,
pub first_seen: ChangeId,
pub last_seen: ChangeId,
pub by_kind: BTreeMap<HotEventKind, usize>,
pub by_actor: Option<BTreeMap<String, usize>>,
}
#[derive(Clone, Debug)]
pub struct HotSpotParams {
pub limit_states: Option<usize>,
pub group_by: HotSpotKey,
pub include_kinds: Vec<HotEventKind>,
pub include_paths: Vec<String>,
pub exclude_paths: Vec<String>,
pub top_n: usize,
pub include_actors: bool,
pub diff_options: SemanticDiffOptions,
}
impl Default for HotSpotParams {
fn default() -> Self {
Self {
limit_states: Some(200),
group_by: HotSpotKey::File,
include_kinds: Vec::new(),
include_paths: Vec::new(),
exclude_paths: Vec::new(),
top_n: 20,
include_actors: false,
diff_options: SemanticDiffOptions::default(),
}
}
}
#[derive(Clone, Debug, Default)]
pub struct HotSpotsReport {
pub spots: Vec<HotSpot>,
pub states_walked: usize,
pub total_events: usize,
}
pub fn analyze_hot_spots(
store: &dyn ObjectStore,
walk_from: ChangeId,
params: &HotSpotParams,
) -> Result<HotSpotsReport, anyhow::Error> {
let started = Instant::now();
let cache = SemanticParseCache::shared();
let limit = params.limit_states.unwrap_or(usize::MAX);
let mut slots: BTreeMap<HotSpotKeyValue, SlotAccumulator> = BTreeMap::new();
let mut total_events = 0usize;
let mut states_walked = 0usize;
let mut current_id = walk_from;
let mut current = match store.get_state(¤t_id)? {
Some(s) => s,
None => return Ok(HotSpotsReport::default()),
};
while states_walked < limit {
let Some(parent_id) = current.first_parent().copied() else {
break;
};
let parent = match store.get_state(&parent_id)? {
Some(s) => s,
None => break,
};
let diff = semantic_diff_with_cache(
store,
&parent.tree,
¤t.tree,
¶ms.diff_options,
cache,
)?;
let actor_label = if params.include_actors {
Some(current.attribution.to_string())
} else {
None
};
let mut touched_this_state: std::collections::BTreeSet<HotSpotKeyValue> =
Default::default();
for change in &diff.changes {
let Some(kind) = HotEventKind::classify(change) else {
continue;
};
if !params.include_kinds.is_empty() && !params.include_kinds.contains(&kind) {
continue;
}
let key = match (params.group_by, change_to_key(change)) {
(HotSpotKey::File, Some((path, _))) => HotSpotKeyValue::File { path },
(HotSpotKey::Function, Some((path, Some(name)))) => {
HotSpotKeyValue::Function { path, name }
}
_ => continue,
};
if !path_passes_filter(key.path(), ¶ms.include_paths, ¶ms.exclude_paths) {
continue;
}
total_events += 1;
let slot = slots
.entry(key.clone())
.or_insert_with(|| SlotAccumulator::new(current_id));
slot.event_count += 1;
slot.last_seen = current_id;
*slot.by_kind.entry(kind).or_insert(0) += 1;
if let Some(actor) = &actor_label {
let by_actor = slot.by_actor.get_or_insert_with(BTreeMap::new);
*by_actor.entry(actor.clone()).or_insert(0) += 1;
}
touched_this_state.insert(key);
}
for key in touched_this_state {
if let Some(slot) = slots.get_mut(&key) {
slot.state_count += 1;
}
}
states_walked += 1;
current_id = parent_id;
current = parent;
}
let _ = started;
let mut ranked: Vec<(HotSpotKeyValue, SlotAccumulator)> = slots.into_iter().collect();
ranked.sort_by(|a, b| {
b.1.event_count
.cmp(&a.1.event_count)
.then(b.1.state_count.cmp(&a.1.state_count))
.then(a.0.cmp(&b.0))
});
let spots = ranked
.into_iter()
.take(params.top_n)
.map(|(key, slot)| HotSpot {
key,
event_count: slot.event_count,
state_count: slot.state_count,
first_seen: slot.first_seen,
last_seen: slot.last_seen,
by_kind: slot.by_kind,
by_actor: slot.by_actor,
})
.collect();
Ok(HotSpotsReport {
spots,
states_walked,
total_events,
})
}
struct SlotAccumulator {
event_count: usize,
state_count: usize,
first_seen: ChangeId,
last_seen: ChangeId,
by_kind: BTreeMap<HotEventKind, usize>,
by_actor: Option<BTreeMap<String, usize>>,
}
impl SlotAccumulator {
fn new(seen: ChangeId) -> Self {
Self {
event_count: 0,
state_count: 0,
first_seen: seen,
last_seen: seen,
by_kind: BTreeMap::new(),
by_actor: None,
}
}
}
fn change_to_key(change: &SemanticChange) -> Option<(PathBuf, Option<String>)> {
match change {
SemanticChange::FileAdded { path }
| SemanticChange::FileDeleted { path }
| SemanticChange::FileModified { path, .. } => Some((path.clone(), None)),
SemanticChange::FileRenamed { to, .. } => Some((to.clone(), None)),
SemanticChange::FunctionAdded { file, name, .. }
| SemanticChange::FunctionExtracted { file, name, .. } => {
Some((file.clone(), Some(name.clone())))
}
SemanticChange::FunctionDeleted { file, name, .. } => {
Some((file.clone(), Some(name.clone())))
}
SemanticChange::FunctionRenamed { file, new_name, .. } => {
Some((file.clone(), Some(new_name.clone())))
}
SemanticChange::FunctionModified { file, name, .. } => {
Some((file.clone(), Some(name.clone())))
}
SemanticChange::FunctionMoved { file, name, .. } => {
Some((file.clone(), Some(name.clone())))
}
SemanticChange::SignatureChanged { file, name, .. } => {
Some((file.clone(), Some(name.clone())))
}
SemanticChange::DependencyAdded { .. }
| SemanticChange::DependencyRemoved { .. }
| SemanticChange::Custom { .. } => None,
}
}
fn path_passes_filter(path: &Path, includes: &[String], excludes: &[String]) -> bool {
let s = path.to_string_lossy();
if !includes.is_empty() && !includes.iter().any(|inc| s.contains(inc.as_str())) {
return false;
}
if excludes.iter().any(|exc| s.contains(exc.as_str())) {
return false;
}
true
}
pub fn analyze_actor_histogram(
store: &dyn ObjectStore,
walk_from: ChangeId,
limit_states: Option<usize>,
) -> Result<BTreeMap<String, usize>, anyhow::Error> {
let limit = limit_states.unwrap_or(usize::MAX);
let mut histogram: BTreeMap<String, usize> = BTreeMap::new();
let mut steps = 0usize;
let Some(mut current) = store.get_state(&walk_from)? else {
return Ok(histogram);
};
*histogram
.entry(current.attribution.to_string())
.or_insert(0) += 1;
steps += 1;
while steps < limit {
let Some(parent_id) = current.first_parent().copied() else {
break;
};
let Some(parent) = store.get_state(&parent_id)? else {
break;
};
*histogram.entry(parent.attribution.to_string()).or_insert(0) += 1;
steps += 1;
current = parent;
}
Ok(histogram)
}
#[allow(dead_code)]
fn _state_anchor(_: &State) {}
#[cfg(test)]
mod tests {
use objects::{
object::{Attribution, ChangeId, Principal, State, Tree, TreeEntry},
store::InMemoryStore,
};
use super::*;
fn principal(label: &str) -> Principal {
Principal::new(label.to_string(), format!("{label}@example.com"))
}
fn build_three_state_chain() -> (ChangeId, InMemoryStore) {
let store = InMemoryStore::new();
let blob_a = store
.put_blob(&objects::object::Blob::from_slice(
b"fn one() {}\nfn two() {}\n",
))
.unwrap();
let tree_a = store
.put_tree(&Tree::from_entries(vec![
TreeEntry::file("lib.rs".to_string(), blob_a, false).unwrap(),
]))
.unwrap();
let attrib_a = Attribution::human(principal("alice"));
let state_a = State::new(tree_a, Vec::new(), attrib_a);
store.put_state(&state_a).unwrap();
let id_a = state_a.change_id;
let blob_b = store
.put_blob(&objects::object::Blob::from_slice(
b"fn one() { println!(\"hi\"); }\nfn two() {}\n",
))
.unwrap();
let tree_b = store
.put_tree(&Tree::from_entries(vec![
TreeEntry::file("lib.rs".to_string(), blob_b, false).unwrap(),
]))
.unwrap();
let state_b = State::new(tree_b, vec![id_a], Attribution::human(principal("bob")));
store.put_state(&state_b).unwrap();
let id_b = state_b.change_id;
let blob_c = store
.put_blob(&objects::object::Blob::from_slice(
b"fn one() { println!(\"hello\"); }\nfn two() {}\nfn three() {}\n",
))
.unwrap();
let tree_c = store
.put_tree(&Tree::from_entries(vec![
TreeEntry::file("lib.rs".to_string(), blob_c, false).unwrap(),
]))
.unwrap();
let state_c = State::new(tree_c, vec![id_b], Attribution::human(principal("carol")));
store.put_state(&state_c).unwrap();
let id_c = state_c.change_id;
(id_c, store)
}
#[test]
fn walks_first_parent_chain_to_root() {
let (head, store) = build_three_state_chain();
let report = analyze_hot_spots(&store, head, &HotSpotParams::default()).unwrap();
assert_eq!(report.states_walked, 2);
let lib_path: PathBuf = "lib.rs".into();
let file_spot = report
.spots
.iter()
.find(|s| matches!(&s.key, HotSpotKeyValue::File { path } if path == &lib_path))
.expect("expected lib.rs hot-spot");
assert!(file_spot.event_count >= 2);
assert_eq!(file_spot.state_count, 2);
}
#[test]
fn limit_states_caps_the_walk() {
let (head, store) = build_three_state_chain();
let params = HotSpotParams {
limit_states: Some(1),
..HotSpotParams::default()
};
let report = analyze_hot_spots(&store, head, ¶ms).unwrap();
assert_eq!(
report.states_walked, 1,
"limit_states=1 should walk one pair"
);
}
#[test]
fn group_by_function_skips_pure_file_events() {
let (head, store) = build_three_state_chain();
let params = HotSpotParams {
group_by: HotSpotKey::Function,
..HotSpotParams::default()
};
let report = analyze_hot_spots(&store, head, ¶ms).unwrap();
for spot in &report.spots {
assert!(
matches!(&spot.key, HotSpotKeyValue::Function { .. }),
"group_by=Function should only emit Function keys, got {:?}",
spot.key
);
}
}
#[test]
fn include_actors_populates_per_actor_histogram() {
let (head, store) = build_three_state_chain();
let params = HotSpotParams {
include_actors: true,
..HotSpotParams::default()
};
let report = analyze_hot_spots(&store, head, ¶ms).unwrap();
let any = report.spots.first().expect("expected at least one spot");
let actors = any
.by_actor
.as_ref()
.expect("include_actors=true should populate by_actor");
assert!(
actors
.keys()
.any(|k| k.contains("bob") || k.contains("carol")),
"expected bob or carol in actor histogram, got {:?}",
actors.keys().collect::<Vec<_>>()
);
}
#[test]
fn path_filter_excludes_substring_match() {
let (head, store) = build_three_state_chain();
let params = HotSpotParams {
exclude_paths: vec!["lib.rs".to_string()],
..HotSpotParams::default()
};
let report = analyze_hot_spots(&store, head, ¶ms).unwrap();
assert!(
report.spots.is_empty(),
"exclude path 'lib.rs' should remove every spot, got {:?}",
report.spots
);
}
#[test]
fn actor_histogram_walks_chain_independently_of_diff_path() {
let (head, store) = build_three_state_chain();
let hist = analyze_actor_histogram(&store, head, Some(10)).unwrap();
assert_eq!(hist.values().sum::<usize>(), 3);
assert_eq!(hist.len(), 3);
}
#[test]
fn empty_chain_returns_empty_report() {
let store = InMemoryStore::new();
let blob = store
.put_blob(&objects::object::Blob::from_slice(b"fn solo() {}"))
.unwrap();
let tree = store
.put_tree(&Tree::from_entries(vec![
TreeEntry::file("solo.rs".to_string(), blob, false).unwrap(),
]))
.unwrap();
let state = State::new(tree, Vec::new(), Attribution::human(principal("alice")));
store.put_state(&state).unwrap();
let report = analyze_hot_spots(&store, state.change_id, &HotSpotParams::default()).unwrap();
assert_eq!(report.states_walked, 0);
assert_eq!(report.total_events, 0);
assert!(report.spots.is_empty());
}
}