Skip to main content

repo/
context_suggestions.rs

1// SPDX-License-Identifier: Apache-2.0
2//! Context rewrite scoring and low-noise suggestion heuristics.
3
4use std::collections::{BTreeMap, BTreeSet};
5
6use objects::{
7    object::{ContextTarget, State},
8    store::ObjectStore,
9};
10
11use crate::{HistoryQuery, Repository, staleness};
12
13pub const SUGGESTION_WINDOW: usize = 24;
14pub const MEDIUM_SUGGESTION_THRESHOLD: u32 = 45;
15pub const HIGH_SUGGESTION_THRESHOLD: u32 = 70;
16pub const MAJOR_REWRITE_THRESHOLD_PCT: u32 = 50;
17
18const CHANGE_WEIGHT: u32 = 16;
19const DISTINCT_STATE_WEIGHT: u32 = 8;
20const DISTINCT_AGENT_WEIGHT: u32 = 10;
21const RECENCY_WEIGHT: u32 = 12;
22const STALE_WEIGHT: u32 = 18;
23const HAS_CONTEXT_PENALTY: u32 = 35;
24
25#[derive(Clone, Debug, PartialEq, Eq)]
26pub enum ContextSuggestionTier {
27    Medium,
28    High,
29}
30
31#[derive(Clone, Debug, PartialEq, Eq)]
32pub struct ContextSuggestion {
33    pub path: String,
34    pub score: u32,
35    pub tier: ContextSuggestionTier,
36    pub reasons: Vec<String>,
37    pub recent_changes: u32,
38    pub distinct_states: u32,
39    pub distinct_agents: u32,
40    pub has_context: bool,
41    pub stale_annotations: u32,
42}
43
44#[derive(Default)]
45struct SuggestionSignal {
46    recent_changes: u32,
47    distinct_states: BTreeSet<String>,
48    distinct_agents: BTreeSet<String>,
49    latest_seen_index: Option<usize>,
50}
51
52impl Repository {
53    pub fn suggest_context_targets(
54        &self,
55        state: &State,
56        limit: usize,
57    ) -> Result<Vec<ContextSuggestion>, anyhow::Error> {
58        let history = self.collect_state_window(state, SUGGESTION_WINDOW)?;
59        let mut signals: BTreeMap<String, SuggestionSignal> = BTreeMap::new();
60
61        for (index, candidate) in history.iter().enumerate() {
62            let parent_tree = candidate
63                .first_parent()
64                .and_then(|parent_id| self.store().get_state(parent_id).ok().flatten())
65                .map(|parent| parent.tree);
66
67            let changes = if let Some(parent_tree) = parent_tree {
68                self.diff_trees(&parent_tree, &candidate.tree)?
69            } else {
70                self.diff_trees(&objects::object::Tree::new().hash(), &candidate.tree)?
71            };
72
73            for change in changes {
74                let signal = signals.entry(change.path).or_default();
75                signal.recent_changes += 1;
76                signal
77                    .distinct_states
78                    .insert(candidate.change_id.to_string_full());
79                if let Some(agent) = &candidate.attribution.agent {
80                    signal
81                        .distinct_agents
82                        .insert(format!("{}/{}", agent.provider, agent.model));
83                }
84                signal.latest_seen_index = Some(
85                    signal
86                        .latest_seen_index
87                        .map_or(index, |current| current.min(index)),
88                );
89            }
90        }
91
92        let stale_map = staleness::check_context_staleness(self, state)?;
93        let active_context = match &state.context {
94            Some(root) => self.list_context_entries(root, None)?,
95            None => Vec::new(),
96        };
97
98        let active_paths: BTreeSet<String> = active_context
99            .iter()
100            .filter_map(|entry| match &entry.target {
101                ContextTarget::File { path } => Some(path.clone()),
102                ContextTarget::State { .. } => None,
103            })
104            .collect();
105
106        let mut suggestions = Vec::new();
107        for (path, signal) in signals {
108            let has_context = active_paths.contains(&path);
109            let stale_annotations = stale_map
110                .iter()
111                .filter(|(key, status)| {
112                    key.starts_with(&format!("{path}:"))
113                        && !matches!(status, staleness::StalenessStatus::Fresh)
114                })
115                .count() as u32;
116
117            let mut score = signal.recent_changes.saturating_mul(CHANGE_WEIGHT);
118            score += (signal.distinct_states.len() as u32).saturating_mul(DISTINCT_STATE_WEIGHT);
119            score += (signal.distinct_agents.len() as u32).saturating_mul(DISTINCT_AGENT_WEIGHT);
120            if signal.latest_seen_index.unwrap_or(usize::MAX) <= 3 {
121                score += RECENCY_WEIGHT;
122            }
123            if stale_annotations > 0 {
124                score += stale_annotations.saturating_mul(STALE_WEIGHT);
125            }
126            if has_context && stale_annotations == 0 {
127                score = score.saturating_sub(HAS_CONTEXT_PENALTY);
128            }
129
130            let tier = if score >= HIGH_SUGGESTION_THRESHOLD {
131                Some(ContextSuggestionTier::High)
132            } else if score >= MEDIUM_SUGGESTION_THRESHOLD {
133                Some(ContextSuggestionTier::Medium)
134            } else {
135                None
136            };
137
138            let Some(tier) = tier else {
139                continue;
140            };
141
142            let mut reasons = Vec::new();
143            if signal.recent_changes >= 3 {
144                reasons.push(format!(
145                    "{} recent changes across the last {} states",
146                    signal.recent_changes,
147                    history.len()
148                ));
149            }
150            if signal.distinct_agents.len() >= 2 {
151                reasons.push(format!(
152                    "{} distinct agents touched this file",
153                    signal.distinct_agents.len()
154                ));
155            }
156            if stale_annotations > 0 {
157                reasons.push(format!("{stale_annotations} annotation(s) may be stale"));
158            }
159            if !has_context {
160                reasons.push("no active file guidance exists yet".to_string());
161            }
162
163            suggestions.push(ContextSuggestion {
164                path,
165                score,
166                tier,
167                reasons,
168                recent_changes: signal.recent_changes,
169                distinct_states: signal.distinct_states.len() as u32,
170                distinct_agents: signal.distinct_agents.len() as u32,
171                has_context,
172                stale_annotations,
173            });
174        }
175
176        suggestions.sort_by(|a, b| b.score.cmp(&a.score).then_with(|| a.path.cmp(&b.path)));
177        suggestions.truncate(limit);
178        Ok(suggestions)
179    }
180
181    fn collect_state_window(
182        &self,
183        state: &State,
184        limit: usize,
185    ) -> Result<Vec<State>, anyhow::Error> {
186        let query = HistoryQuery::new(Some(state.change_id)).with_limit(limit);
187        Ok(self.query_history(&query)?)
188    }
189}
190
191pub fn compute_rewrite_pct(previous: &str, next: &str) -> u32 {
192    let prev_tokens = normalize_tokens(previous);
193    let next_tokens = normalize_tokens(next);
194
195    if prev_tokens.is_empty() && next_tokens.is_empty() {
196        return 0;
197    }
198    if prev_tokens.is_empty() || next_tokens.is_empty() {
199        return 100;
200    }
201
202    let prev_set: BTreeSet<_> = prev_tokens.iter().cloned().collect();
203    let next_set: BTreeSet<_> = next_tokens.iter().cloned().collect();
204    let intersection = prev_set.intersection(&next_set).count() as f64;
205    let union = prev_set.union(&next_set).count() as f64;
206    let similarity = if union == 0.0 {
207        1.0
208    } else {
209        intersection / union
210    };
211    ((1.0 - similarity) * 100.0).round() as u32
212}
213
214pub fn is_major_rewrite(rewrite_pct: u32) -> bool {
215    rewrite_pct >= MAJOR_REWRITE_THRESHOLD_PCT
216}
217
218fn normalize_tokens(input: &str) -> Vec<String> {
219    input
220        .lines()
221        .flat_map(|line| {
222            line.to_lowercase()
223                .split(|ch: char| !ch.is_alphanumeric())
224                .filter(|token| !token.is_empty())
225                .map(str::to_string)
226                .collect::<Vec<_>>()
227        })
228        .collect()
229}
230
231#[cfg(test)]
232mod tests {
233    use super::*;
234
235    #[test]
236    fn rewrite_pct_is_zero_for_identical_content() {
237        assert_eq!(compute_rewrite_pct("same tokens", "same tokens"), 0);
238    }
239
240    #[test]
241    fn rewrite_pct_detects_major_changes() {
242        let pct = compute_rewrite_pct("alpha beta gamma", "delta epsilon zeta");
243        assert!(pct >= MAJOR_REWRITE_THRESHOLD_PCT);
244        assert!(is_major_rewrite(pct));
245    }
246}