repo/
context_suggestions.rs1use std::collections::{BTreeMap, BTreeSet};
5
6use objects::object::{ContextTarget, State};
7
8use crate::{HistoryQuery, Repository, staleness};
9
10pub const SUGGESTION_WINDOW: usize = 24;
11pub const MEDIUM_SUGGESTION_THRESHOLD: u32 = 45;
12pub const HIGH_SUGGESTION_THRESHOLD: u32 = 70;
13pub const MAJOR_REWRITE_THRESHOLD_PCT: u32 = 50;
14
15const CHANGE_WEIGHT: u32 = 16;
16const DISTINCT_STATE_WEIGHT: u32 = 8;
17const DISTINCT_AGENT_WEIGHT: u32 = 10;
18const RECENCY_WEIGHT: u32 = 12;
19const STALE_WEIGHT: u32 = 18;
20const HAS_CONTEXT_PENALTY: u32 = 35;
21
22#[derive(Clone, Debug, PartialEq, Eq)]
23pub enum ContextSuggestionTier {
24 Medium,
25 High,
26}
27
28#[derive(Clone, Debug, PartialEq, Eq)]
29pub struct ContextSuggestion {
30 pub path: String,
31 pub score: u32,
32 pub tier: ContextSuggestionTier,
33 pub reasons: Vec<String>,
34 pub recent_changes: u32,
35 pub distinct_states: u32,
36 pub distinct_agents: u32,
37 pub has_context: bool,
38 pub stale_annotations: u32,
39}
40
41#[derive(Default)]
42struct SuggestionSignal {
43 recent_changes: u32,
44 distinct_states: BTreeSet<String>,
45 distinct_agents: BTreeSet<String>,
46 latest_seen_index: Option<usize>,
47}
48
49impl Repository {
50 pub fn suggest_context_targets(
51 &self,
52 state: &State,
53 limit: usize,
54 ) -> Result<Vec<ContextSuggestion>, anyhow::Error> {
55 let history = self.collect_state_window(state, SUGGESTION_WINDOW)?;
56 let mut signals: BTreeMap<String, SuggestionSignal> = BTreeMap::new();
57
58 for (index, candidate) in history.iter().enumerate() {
59 let parent_tree = candidate
60 .first_parent()
61 .and_then(|parent_id| self.store().get_state(parent_id).ok().flatten())
62 .map(|parent| parent.tree);
63
64 let changes = if let Some(parent_tree) = parent_tree {
65 self.diff_trees(&parent_tree, &candidate.tree)?
66 } else {
67 self.diff_trees(&objects::object::Tree::new().hash(), &candidate.tree)?
68 };
69
70 for change in changes {
71 let signal = signals.entry(change.path).or_default();
72 signal.recent_changes += 1;
73 signal
74 .distinct_states
75 .insert(candidate.change_id.to_string_full());
76 if let Some(agent) = &candidate.attribution.agent {
77 signal
78 .distinct_agents
79 .insert(format!("{}/{}", agent.provider, agent.model));
80 }
81 signal.latest_seen_index = Some(
82 signal
83 .latest_seen_index
84 .map_or(index, |current| current.min(index)),
85 );
86 }
87 }
88
89 let stale_map = staleness::check_context_staleness(self, state)?;
90 let active_context = match &state.context {
91 Some(root) => self.list_context_entries(root, None)?,
92 None => Vec::new(),
93 };
94
95 let active_paths: BTreeSet<String> = active_context
96 .iter()
97 .filter_map(|entry| match &entry.target {
98 ContextTarget::File { path } => Some(path.clone()),
99 ContextTarget::State { .. } => None,
100 })
101 .collect();
102
103 let mut suggestions = Vec::new();
104 for (path, signal) in signals {
105 let has_context = active_paths.contains(&path);
106 let stale_annotations = stale_map
107 .iter()
108 .filter(|(key, status)| {
109 key.starts_with(&format!("{path}:"))
110 && !matches!(status, staleness::StalenessStatus::Fresh)
111 })
112 .count() as u32;
113
114 let mut score = signal.recent_changes.saturating_mul(CHANGE_WEIGHT);
115 score += (signal.distinct_states.len() as u32).saturating_mul(DISTINCT_STATE_WEIGHT);
116 score += (signal.distinct_agents.len() as u32).saturating_mul(DISTINCT_AGENT_WEIGHT);
117 if signal.latest_seen_index.unwrap_or(usize::MAX) <= 3 {
118 score += RECENCY_WEIGHT;
119 }
120 if stale_annotations > 0 {
121 score += stale_annotations.saturating_mul(STALE_WEIGHT);
122 }
123 if has_context && stale_annotations == 0 {
124 score = score.saturating_sub(HAS_CONTEXT_PENALTY);
125 }
126
127 let tier = if score >= HIGH_SUGGESTION_THRESHOLD {
128 Some(ContextSuggestionTier::High)
129 } else if score >= MEDIUM_SUGGESTION_THRESHOLD {
130 Some(ContextSuggestionTier::Medium)
131 } else {
132 None
133 };
134
135 let Some(tier) = tier else {
136 continue;
137 };
138
139 let mut reasons = Vec::new();
140 if signal.recent_changes >= 3 {
141 reasons.push(format!(
142 "{} recent changes across the last {} states",
143 signal.recent_changes,
144 history.len()
145 ));
146 }
147 if signal.distinct_agents.len() >= 2 {
148 reasons.push(format!(
149 "{} distinct agents touched this file",
150 signal.distinct_agents.len()
151 ));
152 }
153 if stale_annotations > 0 {
154 reasons.push(format!("{stale_annotations} annotation(s) may be stale"));
155 }
156 if !has_context {
157 reasons.push("no active file guidance exists yet".to_string());
158 }
159
160 suggestions.push(ContextSuggestion {
161 path,
162 score,
163 tier,
164 reasons,
165 recent_changes: signal.recent_changes,
166 distinct_states: signal.distinct_states.len() as u32,
167 distinct_agents: signal.distinct_agents.len() as u32,
168 has_context,
169 stale_annotations,
170 });
171 }
172
173 suggestions.sort_by(|a, b| b.score.cmp(&a.score).then_with(|| a.path.cmp(&b.path)));
174 suggestions.truncate(limit);
175 Ok(suggestions)
176 }
177
178 fn collect_state_window(
179 &self,
180 state: &State,
181 limit: usize,
182 ) -> Result<Vec<State>, anyhow::Error> {
183 let query = HistoryQuery::new(Some(state.change_id)).with_limit(limit);
184 Ok(self.query_history(&query)?)
185 }
186}
187
188pub fn compute_rewrite_pct(previous: &str, next: &str) -> u32 {
189 let prev_tokens = normalize_tokens(previous);
190 let next_tokens = normalize_tokens(next);
191
192 if prev_tokens.is_empty() && next_tokens.is_empty() {
193 return 0;
194 }
195 if prev_tokens.is_empty() || next_tokens.is_empty() {
196 return 100;
197 }
198
199 let prev_set: BTreeSet<_> = prev_tokens.iter().cloned().collect();
200 let next_set: BTreeSet<_> = next_tokens.iter().cloned().collect();
201 let intersection = prev_set.intersection(&next_set).count() as f64;
202 let union = prev_set.union(&next_set).count() as f64;
203 let similarity = if union == 0.0 {
204 1.0
205 } else {
206 intersection / union
207 };
208 ((1.0 - similarity) * 100.0).round() as u32
209}
210
211pub fn is_major_rewrite(rewrite_pct: u32) -> bool {
212 rewrite_pct >= MAJOR_REWRITE_THRESHOLD_PCT
213}
214
215fn normalize_tokens(input: &str) -> Vec<String> {
216 input
217 .lines()
218 .flat_map(|line| {
219 line.to_lowercase()
220 .split(|ch: char| !ch.is_alphanumeric())
221 .filter(|token| !token.is_empty())
222 .map(str::to_string)
223 .collect::<Vec<_>>()
224 })
225 .collect()
226}
227
228#[cfg(test)]
229mod tests {
230 use super::*;
231
232 #[test]
233 fn rewrite_pct_is_zero_for_identical_content() {
234 assert_eq!(compute_rewrite_pct("same tokens", "same tokens"), 0);
235 }
236
237 #[test]
238 fn rewrite_pct_detects_major_changes() {
239 let pct = compute_rewrite_pct("alpha beta gamma", "delta epsilon zeta");
240 assert!(pct >= MAJOR_REWRITE_THRESHOLD_PCT);
241 assert!(is_major_rewrite(pct));
242 }
243}