repo/
context_suggestions.rs1use std::collections::{BTreeMap, BTreeSet};
5
6use objects::{
7 object::{ContextTarget, State},
8 store::ObjectStore,
9};
10
11use crate::{HistoryQuery, Repository, staleness};
12
13pub const SUGGESTION_WINDOW: usize = 24;
14pub const MEDIUM_SUGGESTION_THRESHOLD: u32 = 45;
15pub const HIGH_SUGGESTION_THRESHOLD: u32 = 70;
16pub const MAJOR_REWRITE_THRESHOLD_PCT: u32 = 50;
17
18const CHANGE_WEIGHT: u32 = 16;
19const DISTINCT_STATE_WEIGHT: u32 = 8;
20const DISTINCT_AGENT_WEIGHT: u32 = 10;
21const RECENCY_WEIGHT: u32 = 12;
22const STALE_WEIGHT: u32 = 18;
23const HAS_CONTEXT_PENALTY: u32 = 35;
24
25#[derive(Clone, Debug, PartialEq, Eq)]
26pub enum ContextSuggestionTier {
27 Medium,
28 High,
29}
30
31#[derive(Clone, Debug, PartialEq, Eq)]
32pub struct ContextSuggestion {
33 pub path: String,
34 pub score: u32,
35 pub tier: ContextSuggestionTier,
36 pub reasons: Vec<String>,
37 pub recent_changes: u32,
38 pub distinct_states: u32,
39 pub distinct_agents: u32,
40 pub has_context: bool,
41 pub stale_annotations: u32,
42}
43
44#[derive(Default)]
45struct SuggestionSignal {
46 recent_changes: u32,
47 distinct_states: BTreeSet<String>,
48 distinct_agents: BTreeSet<String>,
49 latest_seen_index: Option<usize>,
50}
51
52impl Repository {
53 pub fn suggest_context_targets(
54 &self,
55 state: &State,
56 limit: usize,
57 ) -> Result<Vec<ContextSuggestion>, anyhow::Error> {
58 let history = self.collect_state_window(state, SUGGESTION_WINDOW)?;
59 let mut signals: BTreeMap<String, SuggestionSignal> = BTreeMap::new();
60
61 for (index, candidate) in history.iter().enumerate() {
62 let parent_tree = candidate
63 .first_parent()
64 .and_then(|parent_id| self.store().get_state(parent_id).ok().flatten())
65 .map(|parent| parent.tree);
66
67 let changes = if let Some(parent_tree) = parent_tree {
68 self.diff_trees(&parent_tree, &candidate.tree)?
69 } else {
70 self.diff_trees(&objects::object::Tree::new().hash(), &candidate.tree)?
71 };
72
73 for change in changes {
74 let signal = signals.entry(change.path).or_default();
75 signal.recent_changes += 1;
76 signal
77 .distinct_states
78 .insert(candidate.change_id.to_string_full());
79 if let Some(agent) = &candidate.attribution.agent {
80 signal
81 .distinct_agents
82 .insert(format!("{}/{}", agent.provider, agent.model));
83 }
84 signal.latest_seen_index = Some(
85 signal
86 .latest_seen_index
87 .map_or(index, |current| current.min(index)),
88 );
89 }
90 }
91
92 let stale_map = staleness::check_context_staleness(self, state)?;
93 let active_context = match &state.context {
94 Some(root) => self.list_context_entries(root, None)?,
95 None => Vec::new(),
96 };
97
98 let active_paths: BTreeSet<String> = active_context
99 .iter()
100 .filter_map(|entry| match &entry.target {
101 ContextTarget::File { path } => Some(path.clone()),
102 ContextTarget::State { .. } => None,
103 })
104 .collect();
105
106 let mut suggestions = Vec::new();
107 for (path, signal) in signals {
108 let has_context = active_paths.contains(&path);
109 let stale_annotations = stale_map
110 .iter()
111 .filter(|(key, status)| {
112 key.starts_with(&format!("{path}:"))
113 && !matches!(status, staleness::StalenessStatus::Fresh)
114 })
115 .count() as u32;
116
117 let mut score = signal.recent_changes.saturating_mul(CHANGE_WEIGHT);
118 score += (signal.distinct_states.len() as u32).saturating_mul(DISTINCT_STATE_WEIGHT);
119 score += (signal.distinct_agents.len() as u32).saturating_mul(DISTINCT_AGENT_WEIGHT);
120 if signal.latest_seen_index.unwrap_or(usize::MAX) <= 3 {
121 score += RECENCY_WEIGHT;
122 }
123 if stale_annotations > 0 {
124 score += stale_annotations.saturating_mul(STALE_WEIGHT);
125 }
126 if has_context && stale_annotations == 0 {
127 score = score.saturating_sub(HAS_CONTEXT_PENALTY);
128 }
129
130 let tier = if score >= HIGH_SUGGESTION_THRESHOLD {
131 Some(ContextSuggestionTier::High)
132 } else if score >= MEDIUM_SUGGESTION_THRESHOLD {
133 Some(ContextSuggestionTier::Medium)
134 } else {
135 None
136 };
137
138 let Some(tier) = tier else {
139 continue;
140 };
141
142 let mut reasons = Vec::new();
143 if signal.recent_changes >= 3 {
144 reasons.push(format!(
145 "{} recent changes across the last {} states",
146 signal.recent_changes,
147 history.len()
148 ));
149 }
150 if signal.distinct_agents.len() >= 2 {
151 reasons.push(format!(
152 "{} distinct agents touched this file",
153 signal.distinct_agents.len()
154 ));
155 }
156 if stale_annotations > 0 {
157 reasons.push(format!("{stale_annotations} annotation(s) may be stale"));
158 }
159 if !has_context {
160 reasons.push("no active file guidance exists yet".to_string());
161 }
162
163 suggestions.push(ContextSuggestion {
164 path,
165 score,
166 tier,
167 reasons,
168 recent_changes: signal.recent_changes,
169 distinct_states: signal.distinct_states.len() as u32,
170 distinct_agents: signal.distinct_agents.len() as u32,
171 has_context,
172 stale_annotations,
173 });
174 }
175
176 suggestions.sort_by(|a, b| b.score.cmp(&a.score).then_with(|| a.path.cmp(&b.path)));
177 suggestions.truncate(limit);
178 Ok(suggestions)
179 }
180
181 fn collect_state_window(
182 &self,
183 state: &State,
184 limit: usize,
185 ) -> Result<Vec<State>, anyhow::Error> {
186 let query = HistoryQuery::new(Some(state.change_id)).with_limit(limit);
187 Ok(self.query_history(&query)?)
188 }
189}
190
191pub fn compute_rewrite_pct(previous: &str, next: &str) -> u32 {
192 let prev_tokens = normalize_tokens(previous);
193 let next_tokens = normalize_tokens(next);
194
195 if prev_tokens.is_empty() && next_tokens.is_empty() {
196 return 0;
197 }
198 if prev_tokens.is_empty() || next_tokens.is_empty() {
199 return 100;
200 }
201
202 let prev_set: BTreeSet<_> = prev_tokens.iter().cloned().collect();
203 let next_set: BTreeSet<_> = next_tokens.iter().cloned().collect();
204 let intersection = prev_set.intersection(&next_set).count() as f64;
205 let union = prev_set.union(&next_set).count() as f64;
206 let similarity = if union == 0.0 {
207 1.0
208 } else {
209 intersection / union
210 };
211 ((1.0 - similarity) * 100.0).round() as u32
212}
213
214pub fn is_major_rewrite(rewrite_pct: u32) -> bool {
215 rewrite_pct >= MAJOR_REWRITE_THRESHOLD_PCT
216}
217
218fn normalize_tokens(input: &str) -> Vec<String> {
219 input
220 .lines()
221 .flat_map(|line| {
222 line.to_lowercase()
223 .split(|ch: char| !ch.is_alphanumeric())
224 .filter(|token| !token.is_empty())
225 .map(str::to_string)
226 .collect::<Vec<_>>()
227 })
228 .collect()
229}
230
231#[cfg(test)]
232mod tests {
233 use super::*;
234
235 #[test]
236 fn rewrite_pct_is_zero_for_identical_content() {
237 assert_eq!(compute_rewrite_pct("same tokens", "same tokens"), 0);
238 }
239
240 #[test]
241 fn rewrite_pct_detects_major_changes() {
242 let pct = compute_rewrite_pct("alpha beta gamma", "delta epsilon zeta");
243 assert!(pct >= MAJOR_REWRITE_THRESHOLD_PCT);
244 assert!(is_major_rewrite(pct));
245 }
246}