Skip to main content

repo/
context_suggestions.rs

1// SPDX-License-Identifier: Apache-2.0
2//! Context rewrite scoring and low-noise suggestion heuristics.
3
4use std::collections::{BTreeMap, BTreeSet};
5
6use objects::object::{ContextTarget, State};
7
8use crate::{HistoryQuery, Repository, staleness};
9
10pub const SUGGESTION_WINDOW: usize = 24;
11pub const MEDIUM_SUGGESTION_THRESHOLD: u32 = 45;
12pub const HIGH_SUGGESTION_THRESHOLD: u32 = 70;
13pub const MAJOR_REWRITE_THRESHOLD_PCT: u32 = 50;
14
15const CHANGE_WEIGHT: u32 = 16;
16const DISTINCT_STATE_WEIGHT: u32 = 8;
17const DISTINCT_AGENT_WEIGHT: u32 = 10;
18const RECENCY_WEIGHT: u32 = 12;
19const STALE_WEIGHT: u32 = 18;
20const HAS_CONTEXT_PENALTY: u32 = 35;
21
22#[derive(Clone, Debug, PartialEq, Eq)]
23pub enum ContextSuggestionTier {
24    Medium,
25    High,
26}
27
28#[derive(Clone, Debug, PartialEq, Eq)]
29pub struct ContextSuggestion {
30    pub path: String,
31    pub score: u32,
32    pub tier: ContextSuggestionTier,
33    pub reasons: Vec<String>,
34    pub recent_changes: u32,
35    pub distinct_states: u32,
36    pub distinct_agents: u32,
37    pub has_context: bool,
38    pub stale_annotations: u32,
39}
40
41#[derive(Default)]
42struct SuggestionSignal {
43    recent_changes: u32,
44    distinct_states: BTreeSet<String>,
45    distinct_agents: BTreeSet<String>,
46    latest_seen_index: Option<usize>,
47}
48
49impl Repository {
50    pub fn suggest_context_targets(
51        &self,
52        state: &State,
53        limit: usize,
54    ) -> Result<Vec<ContextSuggestion>, anyhow::Error> {
55        let history = self.collect_state_window(state, SUGGESTION_WINDOW)?;
56        let mut signals: BTreeMap<String, SuggestionSignal> = BTreeMap::new();
57
58        for (index, candidate) in history.iter().enumerate() {
59            let parent_tree = candidate
60                .first_parent()
61                .and_then(|parent_id| self.store().get_state(parent_id).ok().flatten())
62                .map(|parent| parent.tree);
63
64            let changes = if let Some(parent_tree) = parent_tree {
65                self.diff_trees(&parent_tree, &candidate.tree)?
66            } else {
67                self.diff_trees(&objects::object::Tree::new().hash(), &candidate.tree)?
68            };
69
70            for change in changes {
71                let signal = signals.entry(change.path).or_default();
72                signal.recent_changes += 1;
73                signal
74                    .distinct_states
75                    .insert(candidate.change_id.to_string_full());
76                if let Some(agent) = &candidate.attribution.agent {
77                    signal
78                        .distinct_agents
79                        .insert(format!("{}/{}", agent.provider, agent.model));
80                }
81                signal.latest_seen_index = Some(
82                    signal
83                        .latest_seen_index
84                        .map_or(index, |current| current.min(index)),
85                );
86            }
87        }
88
89        let stale_map = staleness::check_context_staleness(self, state)?;
90        let active_context = match &state.context {
91            Some(root) => self.list_context_entries(root, None)?,
92            None => Vec::new(),
93        };
94
95        let active_paths: BTreeSet<String> = active_context
96            .iter()
97            .filter_map(|entry| match &entry.target {
98                ContextTarget::File { path } => Some(path.clone()),
99                ContextTarget::State { .. } => None,
100            })
101            .collect();
102
103        let mut suggestions = Vec::new();
104        for (path, signal) in signals {
105            let has_context = active_paths.contains(&path);
106            let stale_annotations = stale_map
107                .iter()
108                .filter(|(key, status)| {
109                    key.starts_with(&format!("{path}:"))
110                        && !matches!(status, staleness::StalenessStatus::Fresh)
111                })
112                .count() as u32;
113
114            let mut score = signal.recent_changes.saturating_mul(CHANGE_WEIGHT);
115            score += (signal.distinct_states.len() as u32).saturating_mul(DISTINCT_STATE_WEIGHT);
116            score += (signal.distinct_agents.len() as u32).saturating_mul(DISTINCT_AGENT_WEIGHT);
117            if signal.latest_seen_index.unwrap_or(usize::MAX) <= 3 {
118                score += RECENCY_WEIGHT;
119            }
120            if stale_annotations > 0 {
121                score += stale_annotations.saturating_mul(STALE_WEIGHT);
122            }
123            if has_context && stale_annotations == 0 {
124                score = score.saturating_sub(HAS_CONTEXT_PENALTY);
125            }
126
127            let tier = if score >= HIGH_SUGGESTION_THRESHOLD {
128                Some(ContextSuggestionTier::High)
129            } else if score >= MEDIUM_SUGGESTION_THRESHOLD {
130                Some(ContextSuggestionTier::Medium)
131            } else {
132                None
133            };
134
135            let Some(tier) = tier else {
136                continue;
137            };
138
139            let mut reasons = Vec::new();
140            if signal.recent_changes >= 3 {
141                reasons.push(format!(
142                    "{} recent changes across the last {} states",
143                    signal.recent_changes,
144                    history.len()
145                ));
146            }
147            if signal.distinct_agents.len() >= 2 {
148                reasons.push(format!(
149                    "{} distinct agents touched this file",
150                    signal.distinct_agents.len()
151                ));
152            }
153            if stale_annotations > 0 {
154                reasons.push(format!("{stale_annotations} annotation(s) may be stale"));
155            }
156            if !has_context {
157                reasons.push("no active file guidance exists yet".to_string());
158            }
159
160            suggestions.push(ContextSuggestion {
161                path,
162                score,
163                tier,
164                reasons,
165                recent_changes: signal.recent_changes,
166                distinct_states: signal.distinct_states.len() as u32,
167                distinct_agents: signal.distinct_agents.len() as u32,
168                has_context,
169                stale_annotations,
170            });
171        }
172
173        suggestions.sort_by(|a, b| b.score.cmp(&a.score).then_with(|| a.path.cmp(&b.path)));
174        suggestions.truncate(limit);
175        Ok(suggestions)
176    }
177
178    fn collect_state_window(
179        &self,
180        state: &State,
181        limit: usize,
182    ) -> Result<Vec<State>, anyhow::Error> {
183        let query = HistoryQuery::new(Some(state.change_id)).with_limit(limit);
184        Ok(self.query_history(&query)?)
185    }
186}
187
188pub fn compute_rewrite_pct(previous: &str, next: &str) -> u32 {
189    let prev_tokens = normalize_tokens(previous);
190    let next_tokens = normalize_tokens(next);
191
192    if prev_tokens.is_empty() && next_tokens.is_empty() {
193        return 0;
194    }
195    if prev_tokens.is_empty() || next_tokens.is_empty() {
196        return 100;
197    }
198
199    let prev_set: BTreeSet<_> = prev_tokens.iter().cloned().collect();
200    let next_set: BTreeSet<_> = next_tokens.iter().cloned().collect();
201    let intersection = prev_set.intersection(&next_set).count() as f64;
202    let union = prev_set.union(&next_set).count() as f64;
203    let similarity = if union == 0.0 {
204        1.0
205    } else {
206        intersection / union
207    };
208    ((1.0 - similarity) * 100.0).round() as u32
209}
210
211pub fn is_major_rewrite(rewrite_pct: u32) -> bool {
212    rewrite_pct >= MAJOR_REWRITE_THRESHOLD_PCT
213}
214
215fn normalize_tokens(input: &str) -> Vec<String> {
216    input
217        .lines()
218        .flat_map(|line| {
219            line.to_lowercase()
220                .split(|ch: char| !ch.is_alphanumeric())
221                .filter(|token| !token.is_empty())
222                .map(str::to_string)
223                .collect::<Vec<_>>()
224        })
225        .collect()
226}
227
228#[cfg(test)]
229mod tests {
230    use super::*;
231
232    #[test]
233    fn rewrite_pct_is_zero_for_identical_content() {
234        assert_eq!(compute_rewrite_pct("same tokens", "same tokens"), 0);
235    }
236
237    #[test]
238    fn rewrite_pct_detects_major_changes() {
239        let pct = compute_rewrite_pct("alpha beta gamma", "delta epsilon zeta");
240        assert!(pct >= MAJOR_REWRITE_THRESHOLD_PCT);
241        assert!(is_major_rewrite(pct));
242    }
243}