avocado_core/
diff.rs

1//! Working set diff module
2//!
3//! Compares two working sets to identify added, removed, and reranked spans.
4//! Useful for auditing corpus changes and understanding retrieval drift.
5
6use crate::types::{DiffEntry, RerankEntry, WorkingSet, WorkingSetDiff};
7use std::collections::HashMap;
8
9/// Compute diff between two working sets
10///
11/// # Arguments
12///
13/// * `before` - The "before" working set
14/// * `after` - The "after" working set
15///
16/// # Returns
17///
18/// Diff showing added, removed, and reranked spans
19pub fn diff_working_sets(before: &WorkingSet, after: &WorkingSet) -> WorkingSetDiff {
20    // Build maps of span_id -> (rank, score, path, lines)
21    let before_map: HashMap<&str, (usize, f32, &str, (usize, usize))> = before
22        .citations
23        .iter()
24        .enumerate()
25        .map(|(rank, c)| {
26            (
27                c.span_id.as_str(),
28                (rank + 1, c.score, c.artifact_path.as_str(), (c.start_line, c.end_line)),
29            )
30        })
31        .collect();
32
33    let after_map: HashMap<&str, (usize, f32, &str, (usize, usize))> = after
34        .citations
35        .iter()
36        .enumerate()
37        .map(|(rank, c)| {
38            (
39                c.span_id.as_str(),
40                (rank + 1, c.score, c.artifact_path.as_str(), (c.start_line, c.end_line)),
41            )
42        })
43        .collect();
44
45    let mut added = Vec::new();
46    let mut removed = Vec::new();
47    let mut reranked = Vec::new();
48
49    // Find added spans (in after but not in before)
50    for (span_id, (rank, score, path, lines)) in &after_map {
51        if !before_map.contains_key(*span_id) {
52            added.push(DiffEntry {
53                span_id: span_id.to_string(),
54                artifact_path: path.to_string(),
55                lines: *lines,
56                score: *score,
57                rank: *rank,
58            });
59        }
60    }
61
62    // Find removed spans (in before but not in after)
63    for (span_id, (rank, score, path, lines)) in &before_map {
64        if !after_map.contains_key(*span_id) {
65            removed.push(DiffEntry {
66                span_id: span_id.to_string(),
67                artifact_path: path.to_string(),
68                lines: *lines,
69                score: *score,
70                rank: *rank,
71            });
72        }
73    }
74
75    // Find reranked spans (in both but different rank/score)
76    for (span_id, (old_rank, old_score, path, _)) in &before_map {
77        if let Some((new_rank, new_score, _, _)) = after_map.get(*span_id) {
78            if old_rank != new_rank || (old_score - new_score).abs() > 0.001 {
79                reranked.push(RerankEntry {
80                    span_id: span_id.to_string(),
81                    artifact_path: path.to_string(),
82                    old_rank: *old_rank,
83                    new_rank: *new_rank,
84                    old_score: *old_score,
85                    new_score: *new_score,
86                });
87            }
88        }
89    }
90
91    // Sort for deterministic output
92    added.sort_by_key(|e| e.rank);
93    removed.sort_by_key(|e| e.rank);
94    reranked.sort_by_key(|e| e.new_rank);
95
96    WorkingSetDiff {
97        query: after.query.clone(),
98        before_hash: before.deterministic_hash(),
99        after_hash: after.deterministic_hash(),
100        added,
101        removed,
102        reranked,
103    }
104}
105
106/// Check if two working sets are identical
107pub fn working_sets_identical(before: &WorkingSet, after: &WorkingSet) -> bool {
108    before.deterministic_hash() == after.deterministic_hash()
109}
110
111/// Summarize diff as human-readable string
112pub fn summarize_diff(diff: &WorkingSetDiff) -> String {
113    let mut parts = Vec::new();
114
115    if diff.added.is_empty() && diff.removed.is_empty() && diff.reranked.is_empty() {
116        return "No changes".to_string();
117    }
118
119    if !diff.added.is_empty() {
120        parts.push(format!("{} added", diff.added.len()));
121    }
122    if !diff.removed.is_empty() {
123        parts.push(format!("{} removed", diff.removed.len()));
124    }
125    if !diff.reranked.is_empty() {
126        parts.push(format!("{} reranked", diff.reranked.len()));
127    }
128
129    parts.join(", ")
130}
131
132#[cfg(test)]
133mod tests {
134    use super::*;
135    use crate::types::Citation;
136
137    fn make_working_set(citations: Vec<(&str, &str, f32)>) -> WorkingSet {
138        let cites: Vec<Citation> = citations
139            .into_iter()
140            .map(|(id, path, score)| Citation {
141                span_id: id.to_string(),
142                artifact_id: "art".to_string(),
143                artifact_path: path.to_string(),
144                start_line: 1,
145                end_line: 10,
146                score,
147            })
148            .collect();
149
150        WorkingSet {
151            text: cites.iter().map(|c| c.artifact_path.as_str()).collect::<Vec<_>>().join(","),
152            spans: vec![],
153            citations: cites,
154            tokens_used: 100,
155            query: "test".to_string(),
156            compilation_time_ms: 50,
157            manifest: None,
158            explain: None,
159        }
160    }
161
162    #[test]
163    fn test_diff_identical() {
164        let ws = make_working_set(vec![("1", "a.md", 0.9), ("2", "b.md", 0.8)]);
165        let diff = diff_working_sets(&ws, &ws);
166
167        assert!(diff.added.is_empty());
168        assert!(diff.removed.is_empty());
169        assert!(diff.reranked.is_empty());
170    }
171
172    #[test]
173    fn test_diff_added() {
174        let before = make_working_set(vec![("1", "a.md", 0.9)]);
175        let after = make_working_set(vec![("1", "a.md", 0.9), ("2", "b.md", 0.8)]);
176        let diff = diff_working_sets(&before, &after);
177
178        assert_eq!(diff.added.len(), 1);
179        assert_eq!(diff.added[0].span_id, "2");
180        assert!(diff.removed.is_empty());
181    }
182
183    #[test]
184    fn test_diff_removed() {
185        let before = make_working_set(vec![("1", "a.md", 0.9), ("2", "b.md", 0.8)]);
186        let after = make_working_set(vec![("1", "a.md", 0.9)]);
187        let diff = diff_working_sets(&before, &after);
188
189        assert!(diff.added.is_empty());
190        assert_eq!(diff.removed.len(), 1);
191        assert_eq!(diff.removed[0].span_id, "2");
192    }
193
194    #[test]
195    fn test_diff_reranked() {
196        let before = make_working_set(vec![("1", "a.md", 0.9), ("2", "b.md", 0.8)]);
197        let after = make_working_set(vec![("2", "b.md", 0.95), ("1", "a.md", 0.85)]);
198        let diff = diff_working_sets(&before, &after);
199
200        assert!(diff.added.is_empty());
201        assert!(diff.removed.is_empty());
202        assert_eq!(diff.reranked.len(), 2);
203    }
204}