Skip to main content

repo/
context_suggestions.rs

1// SPDX-License-Identifier: Apache-2.0
2//! Context rewrite scoring and low-noise suggestion heuristics.
3
4use std::collections::{BTreeMap, BTreeSet};
5
6pub use objects::object::{
7    ContextSuggestion, ContextSuggestionTier, HIGH_SUGGESTION_THRESHOLD,
8    MAJOR_REWRITE_THRESHOLD_PCT, MEDIUM_SUGGESTION_THRESHOLD, SUGGESTION_WINDOW,
9};
10use objects::{
11    object::{ContextTarget, State, SuggestionInputs, SuggestionSignal, score_suggestions},
12    store::ObjectStore,
13};
14
15use crate::{HistoryQuery, Repository, staleness};
16
17impl Repository {
18    pub fn suggest_context_targets(
19        &self,
20        state: &State,
21        limit: usize,
22    ) -> Result<Vec<ContextSuggestion>, anyhow::Error> {
23        let history = self.collect_state_window(state, SUGGESTION_WINDOW)?;
24        let mut signals: BTreeMap<String, SuggestionSignal> = BTreeMap::new();
25
26        for (index, candidate) in history.iter().enumerate() {
27            let parent_tree = candidate
28                .first_parent()
29                .and_then(|parent_id| self.store().get_state(parent_id).ok().flatten())
30                .map(|parent| parent.tree);
31
32            let changes = if let Some(parent_tree) = parent_tree {
33                self.diff_trees(&parent_tree, &candidate.tree)?
34            } else {
35                self.diff_trees(&objects::object::Tree::new().hash(), &candidate.tree)?
36            };
37
38            for change in changes {
39                let signal = signals.entry(change.path).or_default();
40                signal.recent_changes += 1;
41                signal
42                    .distinct_states
43                    .insert(candidate.change_id.to_string_full());
44                if let Some(agent) = &candidate.attribution.agent {
45                    signal
46                        .distinct_agents
47                        .insert(format!("{}/{}", agent.provider, agent.model));
48                }
49                signal.latest_seen_index = Some(
50                    signal
51                        .latest_seen_index
52                        .map_or(index, |current| current.min(index)),
53                );
54            }
55        }
56
57        let stale_map = staleness::check_context_staleness(self, state)?;
58        let active_context = match &state.context {
59            Some(root) => self.list_context_entries(root, None)?,
60            None => Vec::new(),
61        };
62
63        let active_paths: BTreeSet<String> = active_context
64            .iter()
65            .filter_map(|entry| match &entry.target {
66                ContextTarget::File { path } => Some(path.clone()),
67                ContextTarget::State { .. } => None,
68            })
69            .collect();
70
71        Ok(score_suggestions(
72            SuggestionInputs {
73                signals,
74                stale_map,
75                active_paths,
76                history_len: history.len(),
77            },
78            limit,
79        ))
80    }
81
82    fn collect_state_window(
83        &self,
84        state: &State,
85        limit: usize,
86    ) -> Result<Vec<State>, anyhow::Error> {
87        let query = HistoryQuery::new(Some(state.change_id)).with_limit(limit);
88        Ok(self.query_history(&query)?)
89    }
90}
91
92pub fn compute_rewrite_pct(previous: &str, next: &str) -> u32 {
93    let prev_tokens = normalize_tokens(previous);
94    let next_tokens = normalize_tokens(next);
95
96    if prev_tokens.is_empty() && next_tokens.is_empty() {
97        return 0;
98    }
99    if prev_tokens.is_empty() || next_tokens.is_empty() {
100        return 100;
101    }
102
103    let prev_set: BTreeSet<_> = prev_tokens.iter().cloned().collect();
104    let next_set: BTreeSet<_> = next_tokens.iter().cloned().collect();
105    let intersection = prev_set.intersection(&next_set).count() as f64;
106    let union = prev_set.union(&next_set).count() as f64;
107    let similarity = if union == 0.0 {
108        1.0
109    } else {
110        intersection / union
111    };
112    ((1.0 - similarity) * 100.0).round() as u32
113}
114
115pub fn is_major_rewrite(rewrite_pct: u32) -> bool {
116    rewrite_pct >= MAJOR_REWRITE_THRESHOLD_PCT
117}
118
119fn normalize_tokens(input: &str) -> Vec<String> {
120    input
121        .lines()
122        .flat_map(|line| {
123            line.to_lowercase()
124                .split(|ch: char| !ch.is_alphanumeric())
125                .filter(|token| !token.is_empty())
126                .map(str::to_string)
127                .collect::<Vec<_>>()
128        })
129        .collect()
130}
131
132#[cfg(test)]
133mod tests {
134    use std::fs;
135
136    use super::*;
137
138    #[test]
139    fn rewrite_pct_is_zero_for_identical_content() {
140        assert_eq!(compute_rewrite_pct("same tokens", "same tokens"), 0);
141    }
142
143    #[test]
144    fn rewrite_pct_detects_major_changes() {
145        let pct = compute_rewrite_pct("alpha beta gamma", "delta epsilon zeta");
146        assert!(pct >= MAJOR_REWRITE_THRESHOLD_PCT);
147        assert!(is_major_rewrite(pct));
148    }
149
150    #[test]
151    fn suggest_context_targets_matches_golden_fixture() {
152        let dir = tempfile::TempDir::new().unwrap();
153        let repo = Repository::init_default(dir.path()).unwrap();
154
155        fs::create_dir_all(dir.path().join("src")).unwrap();
156        fs::write(dir.path().join("src/a.rs"), "one\n").unwrap();
157        repo.snapshot(Some("add a".to_string()), None).unwrap();
158
159        fs::write(dir.path().join("src/b.rs"), "one\n").unwrap();
160        repo.snapshot(Some("add b".to_string()), None).unwrap();
161
162        fs::write(dir.path().join("src/a.rs"), "two\n").unwrap();
163        repo.snapshot(Some("update a".to_string()), None).unwrap();
164
165        fs::write(dir.path().join("src/a.rs"), "three\n").unwrap();
166        let head = repo
167            .snapshot(Some("update a again".to_string()), None)
168            .unwrap();
169
170        let suggestions = repo.suggest_context_targets(&head, 10).unwrap();
171
172        assert_eq!(
173            suggestions,
174            vec![ContextSuggestion {
175                path: "src/a.rs".to_string(),
176                score: 84,
177                tier: ContextSuggestionTier::High,
178                reasons: vec![
179                    "3 recent changes across the last 5 states".to_string(),
180                    "no active file guidance exists yet".to_string(),
181                ],
182                recent_changes: 3,
183                distinct_states: 3,
184                distinct_agents: 0,
185                has_context: false,
186                stale_annotations: 0,
187            }]
188        );
189    }
190}