Skip to main content

harn_hostlib/code_index/
overlay.rs

1//! Per-branch CDC overlay over the workspace symbol graph.
2//!
3//! The base [`SymbolGraph`] is what `rebuild` produces for the currently
4//! checked-out branch. When a request asks for a *different* branch via
5//! `code_index_branch_overlay(branch)`, we lazily layer a delta on top:
6//! per-file replacements + deletions that mirror what changed between the
7//! base and the named branch. The base graph is still the authoritative
8//! source for every untouched file, so switching branches reuses ≥95% of
9//! the main index by construction (only the changed slice gets re-parsed).
10//!
11//! ## Wire model
12//!
13//! - [`OverlayState::activate`] swaps the currently-active overlay; calls
14//!   that follow observe the merged view.
15//! - [`OverlayState::reuse_fraction`] reports the share of base files
16//!   that the active overlay reused without touching — the metric the
17//!   issue's "≥95% reuse" acceptance criterion measures.
18//! - [`OverlayState::apply_file`] is the per-file delta primitive used
19//!   by both manual stage-and-apply tests and a future Git-driven
20//!   change-detector.
21
22use std::collections::{BTreeMap, BTreeSet, VecDeque};
23
24use crate::ast::Language;
25
26use super::file_table::FileId;
27use super::symbol_graph::SymbolGraph;
28
29/// Cap on the number of branch overlays kept in memory at once.
30///
31/// Each registered overlay holds a full `materialized` clone of the base
32/// graph (see [`BranchOverlay::materialize`]), so unbounded registration
33/// is a real memory leak for long-running daemons that swap between many
34/// branches. The cap evicts the least-recently-set overlay when this is
35/// exceeded — chosen large enough to comfortably cover typical
36/// stack-of-PRs flows while keeping the bound predictable.
37pub const OVERLAY_CAPACITY: usize = 8;
38
39/// Per-file delta entry kept on an overlay.
40#[derive(Debug, Clone)]
41pub enum FileDelta {
42    /// The branch has different bytes for this file; rebuild it.
43    Modified {
44        /// Workspace-relative path of the file on the branch.
45        path: String,
46        /// Tree-sitter language to parse the branch source as.
47        language: Language,
48        /// Branch-specific source.
49        source: String,
50        /// Raw import strings to register against the branch view.
51        imports: Vec<String>,
52    },
53    /// The branch no longer has this file at all.
54    Removed,
55}
56
57/// A delta layered on top of the base [`SymbolGraph`]. The overlay
58/// owns the rebuilt slice for every changed file — the base graph is
59/// never mutated when an overlay is activated.
60#[derive(Debug, Clone)]
61pub struct BranchOverlay {
62    /// Branch name (e.g. `topic/auth-refactor`).
63    pub branch: String,
64    deltas: BTreeMap<FileId, FileDelta>,
65    materialized: SymbolGraph,
66    materialized_files: BTreeSet<FileId>,
67}
68
69impl BranchOverlay {
70    /// Make a fresh overlay anchored on `branch`. It carries no deltas
71    /// until [`Self::stage`] is called.
72    pub fn new(branch: impl Into<String>) -> Self {
73        Self {
74            branch: branch.into(),
75            deltas: BTreeMap::new(),
76            materialized: SymbolGraph::new(),
77            materialized_files: BTreeSet::new(),
78        }
79    }
80
81    /// Record a per-file delta. Replaces any previous entry for the
82    /// same `file_id`.
83    pub fn stage(&mut self, file_id: FileId, delta: FileDelta) {
84        self.deltas.insert(file_id, delta);
85    }
86
87    /// Total number of changed files this overlay covers.
88    pub fn changed_count(&self) -> usize {
89        self.deltas.len()
90    }
91
92    /// Files that the overlay either adds/modifies or removes.
93    pub fn changed_files(&self) -> impl Iterator<Item = FileId> + '_ {
94        self.deltas.keys().copied()
95    }
96
97    /// Apply every staged delta to the overlay's private graph, using
98    /// the base graph as the starting point for inheritance. Returns the
99    /// new node count of the overlay-private graph.
100    pub fn materialize(&mut self, base: &SymbolGraph) -> usize {
101        self.materialized = base.clone();
102        self.materialized_files.clear();
103        for (file_id, delta) in &self.deltas {
104            self.materialized_files.insert(*file_id);
105            match delta {
106                FileDelta::Removed => {
107                    self.materialized.remove_file(*file_id);
108                }
109                FileDelta::Modified {
110                    path,
111                    language,
112                    source,
113                    imports,
114                } => {
115                    self.materialized
116                        .rebuild_file(*file_id, path, *language, source, imports);
117                }
118            }
119        }
120        self.materialized.node_count()
121    }
122
123    /// Borrow the materialized view. Callers must call
124    /// [`Self::materialize`] first or this returns an empty graph.
125    pub fn graph(&self) -> &SymbolGraph {
126        &self.materialized
127    }
128}
129
130/// Holder for the active overlay (if any). Threaded through the index
131/// state so every Cypher query can opt into the per-branch view.
132///
133/// Overlay storage is bounded by [`OVERLAY_CAPACITY`]; once that many
134/// branches have been registered, [`Self::set`] evicts the
135/// least-recently-set overlay to keep daemon memory predictable. Each
136/// overlay still owns a full base-graph clone (see
137/// [`BranchOverlay::materialize`]), so the cap matters in practice.
138#[derive(Debug, Default, Clone)]
139pub struct OverlayState {
140    overlays: BTreeMap<String, BranchOverlay>,
141    /// Insertion order of branch names, used to evict the oldest entry
142    /// when `overlays.len()` would exceed [`OVERLAY_CAPACITY`].
143    insertion_order: VecDeque<String>,
144    active: Option<String>,
145}
146
147impl OverlayState {
148    /// Construct an empty overlay registry with no active branch.
149    pub fn new() -> Self {
150        Self::default()
151    }
152
153    /// Insert or replace a branch overlay. Activating it is a separate
154    /// step ([`Self::activate`]).
155    ///
156    /// When the registry already holds [`OVERLAY_CAPACITY`] distinct
157    /// branches, the oldest one is evicted before insertion. If the
158    /// evicted overlay happens to be currently active, the active slot
159    /// is cleared so we don't leave a dangling pointer.
160    pub fn set(&mut self, overlay: BranchOverlay) {
161        let branch = overlay.branch.clone();
162        // Bump branch to "most recent" if it was already present.
163        if self.overlays.contains_key(&branch) {
164            self.insertion_order.retain(|b| b != &branch);
165        } else if self.overlays.len() >= OVERLAY_CAPACITY {
166            if let Some(victim) = self.insertion_order.pop_front() {
167                self.overlays.remove(&victim);
168                if self.active.as_deref() == Some(victim.as_str()) {
169                    self.active = None;
170                }
171            }
172        }
173        self.insertion_order.push_back(branch.clone());
174        self.overlays.insert(branch, overlay);
175    }
176
177    /// Borrow a stored overlay (if registered).
178    pub fn get(&self, branch: &str) -> Option<&BranchOverlay> {
179        self.overlays.get(branch)
180    }
181
182    /// Mutable borrow of a stored overlay.
183    pub fn get_mut(&mut self, branch: &str) -> Option<&mut BranchOverlay> {
184        self.overlays.get_mut(branch)
185    }
186
187    /// Name of the currently active overlay (if any).
188    pub fn active(&self) -> Option<&str> {
189        self.active.as_deref()
190    }
191
192    /// Switch the active overlay. Pass `None` to fall back to the base
193    /// graph.
194    pub fn activate(&mut self, branch: Option<String>) {
195        self.active = branch;
196    }
197
198    /// Borrow the active overlay's materialized graph, falling back to
199    /// `base` when no overlay is active.
200    pub fn graph<'a>(&'a self, base: &'a SymbolGraph) -> &'a SymbolGraph {
201        let Some(name) = self.active.as_deref() else {
202            return base;
203        };
204        match self.overlays.get(name) {
205            Some(overlay) => overlay.graph(),
206            None => base,
207        }
208    }
209
210    /// Share of `base` files the active overlay reused untouched. Returns
211    /// 1.0 when no overlay is active (the base is fully reused).
212    pub fn reuse_fraction(&self, base: &SymbolGraph) -> f64 {
213        let total = base.file_ids().len();
214        if total == 0 {
215            return 1.0;
216        }
217        let Some(name) = self.active.as_deref() else {
218            return 1.0;
219        };
220        let Some(overlay) = self.overlays.get(name) else {
221            return 1.0;
222        };
223        let changed = overlay
224            .materialized_files
225            .iter()
226            .filter(|fid| base.file_ids().contains(fid))
227            .count();
228        ((total - changed) as f64) / (total as f64)
229    }
230}
231
232#[cfg(test)]
233mod tests {
234    use super::*;
235    use crate::ast::Language;
236
237    fn base_graph() -> SymbolGraph {
238        let mut g = SymbolGraph::new();
239        g.rebuild_file(1, "src/a.rs", Language::Rust, "fn a() {}\n", &[]);
240        g.rebuild_file(2, "src/b.rs", Language::Rust, "fn b() {}\n", &[]);
241        g.rebuild_file(3, "src/c.rs", Language::Rust, "fn c() {}\n", &[]);
242        g
243    }
244
245    #[test]
246    fn unmodified_overlay_returns_base_view() {
247        let base = base_graph();
248        let mut overlay = BranchOverlay::new("topic/x");
249        overlay.materialize(&base);
250        let mut state = OverlayState::new();
251        state.set(overlay);
252        state.activate(Some("topic/x".into()));
253        assert_eq!(state.graph(&base).node_count(), base.node_count());
254        assert!(state.reuse_fraction(&base) >= 0.99);
255    }
256
257    #[test]
258    fn modified_overlay_replaces_a_single_file() {
259        let base = base_graph();
260        let mut overlay = BranchOverlay::new("topic/y");
261        overlay.stage(
262            2,
263            FileDelta::Modified {
264                path: "src/b.rs".into(),
265                language: Language::Rust,
266                source: "fn b2() {}\nfn b3() {}\n".into(),
267                imports: Vec::new(),
268            },
269        );
270        overlay.materialize(&base);
271        let mut state = OverlayState::new();
272        state.set(overlay);
273        state.activate(Some("topic/y".into()));
274        let view = state.graph(&base);
275        // The Function `b` should no longer appear (only the module of
276        // that name remains, derived from the basename of src/b.rs).
277        let b_funcs: Vec<_> = view
278            .iter_nodes()
279            .filter(|n| n.kind == super::super::symbol_graph::NodeKind::Function && n.name == "b")
280            .collect();
281        assert!(b_funcs.is_empty(), "fn b should be gone after overlay");
282        assert!(!view.nodes_named("b2").is_empty());
283        let reuse = state.reuse_fraction(&base);
284        assert!(
285            (0.66..1.0).contains(&reuse),
286            "expected ~2/3 reuse, got {reuse}"
287        );
288    }
289
290    #[test]
291    fn registry_evicts_oldest_overlay_past_capacity() {
292        let base = base_graph();
293        let mut state = OverlayState::new();
294        // Fill the registry exactly to capacity.
295        for i in 0..OVERLAY_CAPACITY {
296            let mut overlay = BranchOverlay::new(format!("topic/{i}"));
297            overlay.materialize(&base);
298            state.set(overlay);
299        }
300        assert!(state.get("topic/0").is_some());
301        assert_eq!(state.overlays.len(), OVERLAY_CAPACITY);
302
303        // One more — the oldest (topic/0) should be evicted.
304        let mut overflow = BranchOverlay::new("topic/new");
305        overflow.materialize(&base);
306        state.set(overflow);
307        assert_eq!(state.overlays.len(), OVERLAY_CAPACITY);
308        assert!(
309            state.get("topic/0").is_none(),
310            "expected oldest overlay to be evicted"
311        );
312        assert!(state.get("topic/new").is_some());
313
314        // Re-setting an existing branch must not evict anything — it
315        // just refreshes recency.
316        let mut refreshed = BranchOverlay::new("topic/1");
317        refreshed.materialize(&base);
318        state.set(refreshed);
319        assert_eq!(state.overlays.len(), OVERLAY_CAPACITY);
320        assert!(state.get("topic/1").is_some());
321    }
322
323    #[test]
324    fn evicting_active_overlay_clears_active_slot() {
325        let base = base_graph();
326        let mut state = OverlayState::new();
327        for i in 0..OVERLAY_CAPACITY {
328            let mut overlay = BranchOverlay::new(format!("topic/{i}"));
329            overlay.materialize(&base);
330            state.set(overlay);
331        }
332        // Activate the soon-to-be-evicted entry.
333        state.activate(Some("topic/0".into()));
334        assert_eq!(state.active(), Some("topic/0"));
335
336        let mut overflow = BranchOverlay::new("topic/new");
337        overflow.materialize(&base);
338        state.set(overflow);
339        assert_eq!(
340            state.active(),
341            None,
342            "active slot should clear when its overlay is evicted"
343        );
344    }
345
346    #[test]
347    fn removed_overlay_deletes_file_slice() {
348        let base = base_graph();
349        let mut overlay = BranchOverlay::new("topic/del");
350        overlay.stage(3, FileDelta::Removed);
351        overlay.materialize(&base);
352        let mut state = OverlayState::new();
353        state.set(overlay);
354        state.activate(Some("topic/del".into()));
355        assert!(state.graph(&base).nodes_named("c").is_empty());
356    }
357}