Skip to main content

wicked_estate_store/
lib.rs

1//! `wicked-estate-store` — [`GraphStore`] implementations.
2//!
3//! [`MemStore`] is the in-memory reference impl that proves the [`GraphStore`] contract (it
4//! passes `wicked_estate_core::conformance::graph_store_suite`). The SQLite+FTS5+sqlite-vec default store
5//! and the SurrealDB challenger land at Wave 1.5 behind the same trait, chosen by bake-off
6//!.
7
8pub mod sqlite;
9pub use sqlite::{CompactStats, SqliteStore};
10// `Annotation` now lives in the core spine (the typed-annotations seam); re-export it here so
11// existing callers of `wicked_estate_store::Annotation` keep compiling.
12pub use wicked_estate_core::Annotation;
13
14// W1.5 bake-off challenger — compiled ONLY with --features surrealdb.
15#[cfg(feature = "surrealdb")]
16pub mod surreal;
17#[cfg(feature = "surrealdb")]
18pub use surreal::SurrealStore;
19
20#[cfg(feature = "pool")]
21pub mod pool;
22#[cfg(feature = "pool")]
23pub use pool::{SqlitePool, open_sqlite_pool};
24
25// Postgres backend — compiled ONLY with --features postgres.
26#[cfg(feature = "postgres")]
27pub mod postgres;
28#[cfg(feature = "postgres")]
29pub use postgres::PostgresStore;
30
31// ── Vector math helpers for MemStore (no external deps) ────────────────────
32
33#[inline]
34fn mem_l2_norm(v: &[f32]) -> f32 {
35    v.iter().map(|x| x * x).sum::<f32>().sqrt()
36}
37
38#[inline]
39fn mem_cosine_similarity(a: &[f32], b: &[f32], a_norm: f32) -> f32 {
40    debug_assert_eq!(a.len(), b.len());
41    let b_norm = mem_l2_norm(b);
42    if a_norm == 0.0 || b_norm == 0.0 {
43        return 0.0;
44    }
45    let dot: f32 = a.iter().zip(b.iter()).map(|(x, y)| x * y).sum();
46    (dot / (a_norm * b_norm)).clamp(-1.0, 1.0)
47}
48
49use std::collections::{BTreeMap, HashMap, HashSet, VecDeque};
50// `Annotation` is brought into module scope by the `pub use wicked_estate_core::Annotation` above.
51use wicked_estate_core::{
52    Change, ChangeOp, Direction, Edge, EdgeKind, Error, GraphRead, GraphStats, GraphStore,
53    GraphWrite, HistoricalEdge, Node, NodeKind, NodeSemantics, RepoInfo, Result, StoreCapabilities,
54    Subgraph, SymbolId, SymbolIndex, SymbolQuery, TraversalSpec, UnresolvedRef,
55};
56
57/// In-memory graph store: reference implementation + test double for the trait contract.
58#[derive(Debug, Default)]
59pub struct MemStore {
60    nodes: HashMap<SymbolId, Node>,
61    edges: Vec<Edge>,
62    unresolved: Vec<UnresolvedRef>,
63    /// Wave 2.6: file → content digest map for incremental re-indexing.
64    file_digests: HashMap<String, String>,
65    in_batch: bool,
66    // W11.1: content-addressed source-text store (git_sha → text).
67    content: HashMap<String, String>,
68    // W11.1: file → git_sha pointer into content.
69    file_git_shas: HashMap<String, String>,
70    // W11.2: versioned query cache.
71    cache: HashMap<String, (i64, String)>, // key → (version, value)
72    graph_version: i64,
73    // W5.2: per-symbol embedding vectors.
74    embeddings: HashMap<SymbolId, Vec<f32>>,
75    // W7.4 / W11.3: arbitrary key-value meta store (mirrors SqliteStore meta table).
76    pub meta: HashMap<String, String>,
77    // W7: repo provenance.
78    repo_info: Option<RepoInfo>,
79    // Semantic linking: symbol → NodeSemantics (description / requirement / validated).
80    semantics: HashMap<SymbolId, NodeSemantics>,
81    // W7: change log.
82    changes: Vec<Change>,
83    change_seq: u64,
84    // W7: read-only edge history.
85    // edge_history_files[i] is the file that was removed when edge_history[i] was archived.
86    // Stored separately because HistoricalEdge (wicked-estate-core) carries no file field — the archived
87    // edge may have been created without a location (e.g. synthetic edges).
88    edge_history: Vec<HistoricalEdge>,
89    edge_history_files: Vec<String>,
90    history_archive_seq: u64,
91    history_enabled: bool,
92    // Typed annotations, kept in insertion order so per-symbol reads and type-filtered reads are
93    // deterministic. A bare push (not upsert) — many annotations per symbol, including duplicate
94    // (type, key). Mirrors the SQLite `annotations` table (no FK enforced; annotate is a no-op for
95    // absent symbols, matching the SQLite sid-lookup behaviour).
96    annotations: Vec<(SymbolId, Annotation)>,
97    // Monotonic counter used to stamp `ts` when an annotation is written with ts == 0, so ordering
98    // by ts is stable in-memory without a wall clock (the SQLite store uses strftime there).
99    annotation_seq: i64,
100}
101
102impl MemStore {
103    /// Create a new in-memory store. `history_enabled` defaults to `false` (opt-in).
104    pub fn new() -> Self {
105        Self {
106            history_enabled: false,
107            ..Default::default()
108        }
109    }
110
111    /// Create a new in-memory store with edge-history archival enabled.
112    /// Used by conformance tests that assert history behaviour.
113    pub fn new_with_history() -> Self {
114        Self {
115            history_enabled: true,
116            ..Default::default()
117        }
118    }
119
120    /// Enable or disable edge-history archival (default: `true`).
121    pub fn set_history_enabled(&mut self, on: bool) {
122        self.history_enabled = on;
123    }
124
125    fn kind_allowed(spec_kinds: &[EdgeKind], kind: &EdgeKind) -> bool {
126        spec_kinds.is_empty() || spec_kinds.contains(kind)
127    }
128
129    /// All file paths that have a stored digest. Used by the incremental CLI to detect deletions.
130    pub fn indexed_files(&self) -> Vec<String> {
131        self.file_digests.keys().cloned().collect()
132    }
133
134    /// Remove the digest entry for `file`. Called when a deleted file is cleaned up.
135    pub fn remove_file_digest(&mut self, file: &str) {
136        self.file_digests.remove(file);
137    }
138
139    // -----------------------------------------------------------------------
140    // W11.2 — Versioned query cache (prior art versioned cache-port pattern).
141    // -----------------------------------------------------------------------
142
143    /// Return the cached value for `key` only if it was stored at the current graph version.
144    /// Returns `None` when the key is absent or was stored at a prior version.
145    pub fn cache_get(&self, key: &str) -> Result<Option<String>> {
146        match self.cache.get(key) {
147            Some((ver, val)) if *ver == self.graph_version => Ok(Some(val.clone())),
148            _ => Ok(None),
149        }
150    }
151
152    /// Store `value` for `key` at the current graph version.
153    pub fn cache_put(&mut self, key: &str, value: &str) -> Result<()> {
154        self.cache
155            .insert(key.to_string(), (self.graph_version, value.to_string()));
156        Ok(())
157    }
158
159    /// Increment the graph version. All cache entries stored at prior versions become stale.
160    pub fn bump_version(&mut self) -> Result<()> {
161        self.graph_version += 1;
162        Ok(())
163    }
164
165    // -----------------------------------------------------------------------
166    // W5.2 — Vector embeddings (inherent, mirrors SqliteStore API).
167    // -----------------------------------------------------------------------
168
169    /// Store (or replace) the embedding vector for `symbol`.
170    pub fn set_embedding(&mut self, symbol: &SymbolId, vec: &[f32]) -> Result<()> {
171        if vec.is_empty() {
172            return Err(wicked_estate_core::Error::Invalid(
173                "embedding vector must be non-empty".into(),
174            ));
175        }
176        self.embeddings.insert(symbol.clone(), vec.to_vec());
177        Ok(())
178    }
179
180    /// Retrieve the stored embedding vector for `symbol`, or `None` if absent.
181    pub fn embedding(&self, symbol: &SymbolId) -> Result<Option<Vec<f32>>> {
182        Ok(self.embeddings.get(symbol).cloned())
183    }
184
185    /// Compact the store: prune dangling edges, stale cache entries, orphan embeddings and
186    /// content, and edge-history beyond the 20-row-per-file retention window.
187    /// Mirrors [`SqliteStore::compact`] — no VACUUM step because there is no on-disk file.
188    pub fn compact(&mut self) -> Result<crate::sqlite::CompactStats> {
189        // (1) prune dangling edges.
190        let dangling_edges = self.prune_dangling_edges()?;
191
192        // (2) prune stale cache rows.
193        let current_ver = self.graph_version;
194        let before_cache = self.cache.len();
195        self.cache.retain(|_, (ver, _)| *ver >= current_ver);
196        let stale_cache_rows = before_cache - self.cache.len();
197
198        // (3) orphan embeddings: symbol not in nodes.
199        let before_emb = self.embeddings.len();
200        let nodes = &self.nodes;
201        self.embeddings.retain(|sym, _| nodes.contains_key(sym));
202        let orphan_embeddings = before_emb - self.embeddings.len();
203
204        // (4) orphan content: git_sha not referenced by file_git_shas AND not referenced by
205        //     any edge_history row.
206        let live_shas: HashSet<&str> = self.file_git_shas.values().map(|s| s.as_str()).collect();
207        let history_shas: HashSet<&str> = self
208            .edge_history
209            .iter()
210            .map(|h| h.git_sha.as_str())
211            .collect();
212        let before_content = self.content.len();
213        self.content.retain(|sha, _| {
214            live_shas.contains(sha.as_str()) || history_shas.contains(sha.as_str())
215        });
216        let orphan_content = before_content - self.content.len();
217
218        // (5) edge_history retention: keep newest 20 per file; delete older.
219        // edge_history_files[i] is the file that was archived for edge_history[i].
220        // Group by file using the parallel files vector (HistoricalEdge has no file field).
221        debug_assert_eq!(
222            self.edge_history.len(),
223            self.edge_history_files.len(),
224            "edge_history and edge_history_files must stay in sync"
225        );
226        let mut file_to_seqs: HashMap<&str, Vec<u64>> = HashMap::new();
227        for (h, f) in self.edge_history.iter().zip(self.edge_history_files.iter()) {
228            file_to_seqs
229                .entry(f.as_str())
230                .or_default()
231                .push(h.archived_seq);
232        }
233        let mut keep_seqs: HashSet<u64> = HashSet::new();
234        for seqs in file_to_seqs.values_mut() {
235            seqs.sort_unstable_by(|a, b| b.cmp(a)); // descending
236            for &seq in seqs.iter().take(20) {
237                keep_seqs.insert(seq);
238            }
239        }
240        let before_hist = self.edge_history.len();
241        // Retain both vecs in sync by using index-based filtering.
242        let mut new_history: Vec<HistoricalEdge> = Vec::with_capacity(self.edge_history.len());
243        let mut new_files: Vec<String> = Vec::with_capacity(self.edge_history_files.len());
244        for (h, f) in self
245            .edge_history
246            .drain(..)
247            .zip(self.edge_history_files.drain(..))
248        {
249            if keep_seqs.contains(&h.archived_seq) {
250                new_history.push(h);
251                new_files.push(f);
252            }
253        }
254        self.edge_history = new_history;
255        self.edge_history_files = new_files;
256        let history_rows_pruned = before_hist - self.edge_history.len();
257
258        Ok(crate::sqlite::CompactStats {
259            dangling_edges,
260            stale_cache_rows,
261            orphan_embeddings,
262            orphan_content,
263            history_rows_pruned,
264        })
265    }
266
267    /// Find the `k` nearest symbols to `query` by cosine similarity (brute-force).
268    ///
269    /// Returns `(SymbolId, cosine_similarity)` pairs sorted descending (highest similarity
270    /// first).  Ties broken by `SymbolId` lexicographic order for deterministic output.
271    pub fn nearest(&self, query: &[f32], k: usize) -> Result<Vec<(SymbolId, f32)>> {
272        if query.is_empty() || k == 0 {
273            return Ok(vec![]);
274        }
275        let q_norm = mem_l2_norm(query);
276        if q_norm == 0.0 {
277            return Ok(vec![]);
278        }
279        let dim = query.len();
280        let mut scored: Vec<(SymbolId, f32)> = self
281            .embeddings
282            .iter()
283            .filter(|(_, v)| v.len() == dim)
284            .map(|(id, v)| {
285                let sim = mem_cosine_similarity(query, v, q_norm);
286                (id.clone(), sim)
287            })
288            .collect();
289        scored.sort_by(|a, b| {
290            b.1.partial_cmp(&a.1)
291                .unwrap_or(std::cmp::Ordering::Equal)
292                .then_with(|| a.0.0.cmp(&b.0.0))
293        });
294        scored.truncate(k);
295        Ok(scored)
296    }
297
298    /// Return every stored `(symbol, embedding)` pair. Order is unspecified.
299    ///
300    /// Unlike [`nearest`](Self::nearest) (a top-k point query), this hands back the full vector
301    /// set so analyses that operate over *all* embeddings — semantic clustering — can run without
302    /// issuing N queries. O(n·d) clone.
303    pub fn all_embeddings(&self) -> Result<Vec<(SymbolId, Vec<f32>)>> {
304        Ok(self
305            .embeddings
306            .iter()
307            .map(|(id, v)| (id.clone(), v.clone()))
308            .collect())
309    }
310}
311
312impl GraphWrite for MemStore {
313    fn begin_batch(&mut self) -> Result<()> {
314        self.in_batch = true;
315        Ok(())
316    }
317
318    fn commit_batch(&mut self) -> Result<()> {
319        self.in_batch = false;
320        Ok(())
321    }
322
323    fn upsert_nodes(&mut self, nodes: &[Node]) -> Result<()> {
324        for n in nodes {
325            self.nodes.insert(n.symbol.clone(), n.clone());
326        }
327        Ok(())
328    }
329
330    fn upsert_edges(&mut self, edges: &[Edge]) -> Result<()> {
331        for e in edges {
332            let key = e.dedup_key();
333            match self.edges.iter_mut().find(|x| x.dedup_key() == key) {
334                // On a collision the higher-confidence edge wins (W3.4 max-confidence merge).
335                Some(existing) if e.confidence.get() >= existing.confidence.get() => {
336                    *existing = e.clone();
337                }
338                Some(_) => {}
339                None => self.edges.push(e.clone()),
340            }
341        }
342        Ok(())
343    }
344
345    fn upsert_unresolved_refs(&mut self, refs: &[UnresolvedRef]) -> Result<()> {
346        self.unresolved.extend_from_slice(refs);
347        Ok(())
348    }
349
350    fn remove_file(&mut self, file: &str) -> Result<()> {
351        // Step 1: read current git_sha for this file (the version being superseded).
352        let current_git_sha = self.file_git_shas.get(file).cloned().unwrap_or_default();
353
354        // Step 2: collect the set of symbols defined in this file BEFORE we remove nodes.
355        // We need it for archival (edges whose source is in this file) and for Step 3.
356        let file_symbols: HashSet<SymbolId> = self
357            .nodes
358            .values()
359            .filter(|n| n.location.file == file)
360            .map(|n| n.symbol.clone())
361            .collect();
362
363        // Step 3: if history enabled, archive edges that belong to this file.
364        // An edge "belongs to" this file when its location.file matches OR its source symbol
365        // is defined in this file — covering edges created without an explicit location.
366        if self.history_enabled {
367            let edges_to_archive: Vec<Edge> = self
368                .edges
369                .iter()
370                .filter(|e| {
371                    let loc_file = e.location.as_ref().map(|l| l.file.as_str()).unwrap_or("");
372                    loc_file == file || file_symbols.contains(&e.source)
373                })
374                .cloned()
375                .collect();
376            for edge in edges_to_archive {
377                self.history_archive_seq += 1;
378                self.edge_history.push(HistoricalEdge {
379                    git_sha: current_git_sha.clone(),
380                    archived_seq: self.history_archive_seq,
381                    edge,
382                });
383                self.edge_history_files.push(file.to_string());
384            }
385        }
386
387        // Step 4: remove nodes, edges, unresolved refs, digest, git_sha pointer.
388        // file_symbols was already computed in Step 2 above.
389        self.nodes.retain(|_, n| n.location.file != file);
390        self.edges.retain(|e| {
391            let loc_file = e.location.as_ref().map(|l| l.file.as_str()).unwrap_or("");
392            loc_file != file && !file_symbols.contains(&e.source)
393        });
394        self.unresolved.retain(|r| r.location.file != file);
395        self.file_digests.remove(file);
396        self.file_git_shas.remove(file);
397        // NOTE: do NOT remove from self.content — content is content-addressed and may be
398        // retained for history; orphans are pruned in compact().
399        // Remove embeddings for all removed symbols.
400        for sym in &file_symbols {
401            self.embeddings.remove(sym);
402        }
403        Ok(())
404    }
405
406    fn set_file_digest(&mut self, file: &str, digest: &str) -> Result<()> {
407        self.file_digests
408            .insert(file.to_string(), digest.to_string());
409        Ok(())
410    }
411
412    fn set_file_content(&mut self, file: &str, text: &str) -> Result<()> {
413        let sha = crate::sqlite::git_blob_sha(text);
414        // Dedup: INSERT OR IGNORE semantics — only store if sha not already present.
415        self.content
416            .entry(sha.clone())
417            .or_insert_with(|| text.to_string());
418        // Update the file → sha pointer.
419        self.file_git_shas.insert(file.to_string(), sha);
420        Ok(())
421    }
422
423    fn prune_dangling_edges(&mut self) -> Result<usize> {
424        let before = self.edges.len();
425        self.edges
426            .retain(|e| self.nodes.contains_key(&e.source) && self.nodes.contains_key(&e.target));
427        Ok(before - self.edges.len())
428    }
429
430    fn set_repo_info(&mut self, info: &RepoInfo) -> Result<()> {
431        self.repo_info = Some(info.clone());
432        Ok(())
433    }
434
435    fn log_change(&mut self, op: ChangeOp, target: &str) -> Result<()> {
436        self.change_seq += 1;
437        self.changes.push(Change {
438            seq: self.change_seq,
439            op,
440            target: target.to_string(),
441        });
442        Ok(())
443    }
444
445    fn set_node_semantics(
446        &mut self,
447        symbol: &SymbolId,
448        description: Option<&str>,
449        requirement: Option<&str>,
450        requirement_validated: Option<bool>,
451    ) -> Result<()> {
452        // No-op if the symbol is not a node.
453        if !self.nodes.contains_key(symbol) {
454            return Ok(());
455        }
456        // No-op if nothing is being changed.
457        if description.is_none() && requirement.is_none() && requirement_validated.is_none() {
458            return Ok(());
459        }
460        let entry = self.semantics.entry(symbol.clone()).or_default();
461        if let Some(d) = description {
462            entry.description = Some(d.to_string());
463        }
464        if let Some(r) = requirement {
465            entry.requirement = Some(r.to_string());
466        }
467        if let Some(v) = requirement_validated {
468            entry.requirement_validated = v;
469        }
470        Ok(())
471    }
472
473    fn annotate(&mut self, symbol: &SymbolId, mut annotation: Annotation) -> Result<()> {
474        // No-op if the symbol is not a node (mirrors the SQLite sid-lookup no-op).
475        if !self.nodes.contains_key(symbol) {
476            return Ok(());
477        }
478        // Stamp ts from a monotonic counter when unset, so ordering by ts is deterministic
479        // in-memory (the SQLite store relies on the strftime column default for this).
480        if annotation.ts == 0 {
481            self.annotation_seq += 1;
482            annotation.ts = self.annotation_seq;
483        }
484        // Bare push (NOT upsert): many annotations per symbol, including duplicate (type, key).
485        self.annotations.push((symbol.clone(), annotation));
486        Ok(())
487    }
488
489    fn delete_annotations(
490        &mut self,
491        symbol: &SymbolId,
492        ty: Option<&str>,
493        key: &str,
494    ) -> Result<usize> {
495        let before = self.annotations.len();
496        // Remove rows for this symbol matching `key`, scoped to `ty` when Some.
497        // `type` is matched as an opaque string — no per-type branching (rules-as-DATA).
498        self.annotations.retain(|(s, a)| {
499            let matches = s == symbol && a.key == key && ty.is_none_or(|t| a.r#type == t);
500            !matches
501        });
502        Ok(before - self.annotations.len())
503    }
504}
505
506impl GraphRead for MemStore {
507    fn capabilities(&self) -> StoreCapabilities {
508        StoreCapabilities {
509            full_text_search: false,
510            vector_search: true, // W5.2: brute-force cosine via embeddings HashMap
511            server_side_traversal: true, // in-process, no round-trips
512            transactional_batch: false, // begin/commit are no-ops
513            shared_writers: false,
514        }
515    }
516
517    fn get_node(&self, id: &SymbolId) -> Result<Option<Node>> {
518        Ok(self.nodes.get(id).cloned())
519    }
520
521    fn find_symbols(&self, query: &SymbolQuery) -> Result<Vec<Node>> {
522        let mut out: Vec<Node> = self
523            .nodes
524            .values()
525            .filter(|n| {
526                if let Some(name) = &query.exact_name {
527                    if &n.name != name {
528                        return false;
529                    }
530                }
531                if let Some(text) = &query.text {
532                    let hay = format!("{} {}", n.name, n.signature.clone().unwrap_or_default())
533                        .to_lowercase();
534                    if !hay.contains(&text.to_lowercase()) {
535                        return false;
536                    }
537                }
538                if !query.kinds.is_empty() && !query.kinds.contains(&n.kind) {
539                    return false;
540                }
541                if let Some(lang) = &query.language {
542                    if &n.language != lang {
543                        return false;
544                    }
545                }
546                true
547            })
548            .cloned()
549            .collect();
550        out.sort_by(|a, b| a.symbol.0.cmp(&b.symbol.0)); // deterministic
551        if let Some(limit) = query.limit {
552            out.truncate(limit);
553        }
554        Ok(out)
555    }
556
557    fn neighbors(&self, id: &SymbolId, dir: Direction) -> Result<Vec<Edge>> {
558        Ok(self
559            .edges
560            .iter()
561            .filter(|e| match dir {
562                Direction::Dependents => &e.target == id,
563                Direction::Dependencies => &e.source == id,
564                Direction::Both => &e.source == id || &e.target == id,
565            })
566            .cloned()
567            .collect())
568    }
569
570    fn traverse(&self, start: &SymbolId, spec: &TraversalSpec) -> Result<Subgraph> {
571        let mut depths: BTreeMap<String, u32> = BTreeMap::new();
572        let mut sub_nodes: Vec<Node> = Vec::new();
573        let mut sub_edges: Vec<Edge> = Vec::new();
574        let mut seen: HashSet<SymbolId> = HashSet::new();
575        let mut queue: VecDeque<(SymbolId, u32)> = VecDeque::new();
576        let mut truncated = false;
577
578        seen.insert(start.clone());
579        queue.push_back((start.clone(), 0));
580        if let Some(n) = self.nodes.get(start) {
581            sub_nodes.push(n.clone());
582        }
583
584        while let Some((cur, depth)) = queue.pop_front() {
585            if depth >= spec.max_depth {
586                continue;
587            }
588            for e in self.neighbors(&cur, spec.direction)? {
589                if e.confidence.get() < spec.min_confidence {
590                    continue;
591                }
592                if !Self::kind_allowed(&spec.edge_kinds, &e.kind) {
593                    continue;
594                }
595                // The endpoint we advance to, relative to the traversal direction.
596                let next = match spec.direction {
597                    Direction::Dependents => e.source.clone(),
598                    Direction::Dependencies => e.target.clone(),
599                    Direction::Both => {
600                        if e.source == cur {
601                            e.target.clone()
602                        } else {
603                            e.source.clone()
604                        }
605                    }
606                };
607                sub_edges.push(e.clone());
608                if seen.contains(&next) {
609                    continue;
610                }
611                if sub_nodes.len() >= spec.max_nodes {
612                    truncated = true;
613                    continue;
614                }
615                seen.insert(next.clone());
616                depths.insert(next.0.clone(), depth + 1);
617                if let Some(n) = self.nodes.get(&next) {
618                    sub_nodes.push(n.clone());
619                }
620                queue.push_back((next, depth + 1));
621            }
622        }
623
624        Ok(Subgraph {
625            nodes: sub_nodes,
626            edges: sub_edges,
627            depths,
628            truncated,
629        })
630    }
631
632    fn all_nodes(&self) -> Result<Vec<Node>> {
633        Ok(self.nodes.values().cloned().collect())
634    }
635
636    fn all_edges(&self) -> Result<Vec<Edge>> {
637        Ok(self.edges.clone())
638    }
639
640    fn unresolved_refs_for_name(&self, name: &str) -> Result<Vec<UnresolvedRef>> {
641        Ok(self
642            .unresolved
643            .iter()
644            .filter(|r| r.raw_name == name)
645            .cloned()
646            .collect())
647    }
648
649    fn file_digest(&self, file: &str) -> Result<Option<String>> {
650        Ok(self.file_digests.get(file).cloned())
651    }
652
653    fn file_git_sha(&self, file: &str) -> Result<Option<String>> {
654        Ok(self.file_git_shas.get(file).cloned())
655    }
656
657    fn repo_info(&self) -> Result<Option<RepoInfo>> {
658        Ok(self.repo_info.clone())
659    }
660
661    fn changes_since(&self, cursor: u64) -> Result<Vec<Change>> {
662        let out: Vec<Change> = self
663            .changes
664            .iter()
665            .filter(|c| c.seq > cursor)
666            .take(10_000)
667            .cloned()
668            .collect();
669        Ok(out)
670    }
671
672    fn edge_history(&self, file: &str) -> Result<Vec<HistoricalEdge>> {
673        // edge_history_files[i] is the file that was removed when edge_history[i] was archived.
674        // Return entries for this file, newest first.
675        let mut out: Vec<HistoricalEdge> = self
676            .edge_history
677            .iter()
678            .zip(self.edge_history_files.iter())
679            .filter(|(_, f)| f.as_str() == file)
680            .map(|(h, _)| h.clone())
681            .collect();
682        out.sort_by_key(|h| std::cmp::Reverse(h.archived_seq));
683        Ok(out)
684    }
685
686    fn file_content(&self, file: &str) -> Result<Option<String>> {
687        // Resolve via content-addressed join: file_git_shas[file] → content[sha].
688        let text = self
689            .file_git_shas
690            .get(file)
691            .and_then(|sha| self.content.get(sha))
692            .cloned();
693        Ok(text)
694    }
695
696    fn symbol_source(&self, node: &Node) -> Result<Option<String>> {
697        let span = node.location.span;
698        if span.start_byte == 0 && span.end_byte == 0 {
699            return Ok(None);
700        }
701        let text = match self.file_content(&node.location.file)? {
702            Some(t) => t,
703            None => return Ok(None),
704        };
705        let start = span.start_byte as usize;
706        let end = span.end_byte as usize;
707        if start > end || end > text.len() {
708            return Ok(None);
709        }
710        if !text.is_char_boundary(start) || !text.is_char_boundary(end) {
711            return Ok(None);
712        }
713        Ok(Some(text[start..end].to_string()))
714    }
715
716    fn node_semantics(&self, symbol: &SymbolId) -> Result<Option<NodeSemantics>> {
717        // Return None if the symbol has no recorded semantics (not the same as "absent node").
718        Ok(self.semantics.get(symbol).cloned())
719    }
720
721    fn find_by_requirement(&self, requirement: &str) -> Result<Vec<Node>> {
722        let mut out: Vec<Node> = self
723            .semantics
724            .iter()
725            .filter(|(_, s)| s.requirement.as_deref() == Some(requirement))
726            .filter_map(|(sym, _)| self.nodes.get(sym).cloned())
727            .collect();
728        out.sort_by(|a, b| a.symbol.0.cmp(&b.symbol.0)); // deterministic
729        Ok(out)
730    }
731
732    fn annotations(&self, symbol: &SymbolId) -> Result<Vec<Annotation>> {
733        // Insertion order == ts-ascending order (ts is stamped from a monotonic counter).
734        Ok(self
735            .annotations
736            .iter()
737            .filter(|(s, _)| s == symbol)
738            .map(|(_, a)| a.clone())
739            .collect())
740    }
741
742    fn annotations_by_type(&self, ty: &str) -> Result<Vec<(SymbolId, Annotation)>> {
743        // `type` matched as an opaque string (known convention OR custom — identical treatment).
744        let mut out: Vec<(SymbolId, Annotation)> = self
745            .annotations
746            .iter()
747            .filter(|(_, a)| a.r#type == ty)
748            .map(|(s, a)| (s.clone(), a.clone()))
749            .collect();
750        // Deterministic: by symbol, then ts (insertion order within a symbol is already ts order).
751        out.sort_by(|(sa, aa), (sb, ab)| sa.0.cmp(&sb.0).then(aa.ts.cmp(&ab.ts)));
752        Ok(out)
753    }
754
755    fn annotations_stale_since(&self, cutoff: i64) -> Result<Vec<(SymbolId, Annotation)>> {
756        // Freshness read: last_verified STRICTLY BEFORE cutoff (never-verified rows have
757        // last_verified == 0 → stale for any positive cutoff). Same is_stale_since rule as the
758        // struct, same ordering contract as annotations_by_type.
759        let mut out: Vec<(SymbolId, Annotation)> = self
760            .annotations
761            .iter()
762            .filter(|(_, a)| a.is_stale_since(cutoff))
763            .map(|(s, a)| (s.clone(), a.clone()))
764            .collect();
765        out.sort_by(|(sa, aa), (sb, ab)| sa.0.cmp(&sb.0).then(aa.ts.cmp(&ab.ts)));
766        Ok(out)
767    }
768
769    fn stats(&self) -> Result<GraphStats> {
770        let mut nodes_by_kind: BTreeMap<String, u64> = BTreeMap::new();
771        let mut file_count = 0u64;
772        for n in self.nodes.values() {
773            *nodes_by_kind
774                .entry(serde_json::to_string(&n.kind).unwrap_or_default())
775                .or_default() += 1;
776            if matches!(n.kind, NodeKind::File) {
777                file_count += 1;
778            }
779        }
780        let mut edges_by_kind: BTreeMap<String, u64> = BTreeMap::new();
781        for e in &self.edges {
782            *edges_by_kind
783                .entry(serde_json::to_string(&e.kind).unwrap_or_default())
784                .or_default() += 1;
785        }
786        Ok(GraphStats {
787            node_count: self.nodes.len() as u64,
788            edge_count: self.edges.len() as u64,
789            file_count,
790            unresolved_ref_count: self.unresolved.len() as u64,
791            nodes_by_kind,
792            edges_by_kind,
793            db_size_bytes: 0,
794        })
795    }
796}
797
798/// MemStore also serves as a [`SymbolIndex`] for the resolver pass.
799impl SymbolIndex for MemStore {
800    fn by_name(&self, name: &str) -> Vec<Node> {
801        self.nodes
802            .values()
803            .filter(|n| n.name == name)
804            .cloned()
805            .collect()
806    }
807    fn get(&self, id: &SymbolId) -> Option<Node> {
808        self.nodes.get(id).cloned()
809    }
810}
811
812// ---------------------------------------------------------------------------
813// Backend factory — the external-DB seam (docs/adr/ADR-003-storage-backends.md).
814// Only SQLite is built; Postgres / SurrealDB are *designed* and return a clear
815// "not yet built" error. Adding an external backend later is one match arm here,
816// with zero changes to any caller (CLI / MCP / bench / indexer).
817// ---------------------------------------------------------------------------
818
819/// Where the graph lives, parsed from a connection spec by [`open_store`].
820#[derive(Debug, Clone, PartialEq, Eq)]
821pub enum StoreBackend {
822    /// `sqlite://<path>`, a bare path, or `:memory:`.
823    Sqlite { path: String },
824    /// `postgres://…` — external relational backend (designed, ADR-003).
825    Postgres { url: String },
826    /// `surrealdb://…` — server graph backend (W1.5 bake-off challenger).
827    SurrealDb { url: String },
828}
829
830impl StoreBackend {
831    pub fn parse(spec: &str) -> StoreBackend {
832        if spec == ":memory:" {
833            StoreBackend::Sqlite {
834                path: ":memory:".into(),
835            }
836        } else if let Some(rest) = spec.strip_prefix("sqlite://") {
837            StoreBackend::Sqlite {
838                path: rest.to_string(),
839            }
840        } else if spec.starts_with("postgres://") || spec.starts_with("postgresql://") {
841            StoreBackend::Postgres {
842                url: spec.to_string(),
843            }
844        } else if let Some(rest) = spec.strip_prefix("surrealdb://") {
845            StoreBackend::SurrealDb {
846                url: rest.to_string(),
847            }
848        } else {
849            StoreBackend::Sqlite {
850                path: spec.to_string(),
851            } // bare path → sqlite file
852        }
853    }
854}
855
856/// Open a graph store from a connection spec. Every entrypoint goes through this one seam, so
857/// an external backend drops in here with no caller changes.
858pub fn open_store(spec: &str) -> Result<Box<dyn GraphStore>> {
859    match StoreBackend::parse(spec) {
860        StoreBackend::Sqlite { path } if path == ":memory:" => {
861            Ok(Box::new(SqliteStore::in_memory()?))
862        }
863        StoreBackend::Sqlite { path } => Ok(Box::new(SqliteStore::open(path)?)),
864        #[cfg(feature = "postgres")]
865        StoreBackend::Postgres { url } => Ok(Box::new(PostgresStore::open(&url)?)),
866        #[cfg(not(feature = "postgres"))]
867        StoreBackend::Postgres { .. } => Err(Error::Invalid(
868            "postgres backend requires the 'postgres' feature (ADR-003)".into(),
869        )),
870        StoreBackend::SurrealDb { .. } => Err(Error::Invalid(
871            "surrealdb backend lands in the W1.5 bake-off".into(),
872        )),
873    }
874}
875
876// ─────────────────────────────────────────────────────────────────────────────
877// Extension traits for store operations beyond the frozen wicked-estate-core GraphStore trait.
878//
879// The `wicked-estate-core` GraphStore trait is object-safe and frozen.  `bump_version`, `cache_put`,
880// `cache_get`, `meta_set`, `meta_get` are inherent methods on `SqliteStore` and `MemStore`.
881//
882// `GraphStoreMutExt` is an **extension supertrait** of `GraphStore` defined here in wicked-estate-store
883// (not in wicked-estate-core).  `open_store_ext` returns `Box<dyn GraphStoreMutExt>`.  Callers that
884// need versioning/meta/cache use `open_store_ext` instead of `open_store`.  Callers that
885// only need topology (MCP reads) continue using `open_store` / `&dyn GraphRead`.
886// ─────────────────────────────────────────────────────────────────────────────
887
888/// Extension trait for mutable store operations not on the frozen wicked-estate-core GraphStore trait.
889/// Object-safe (all methods take/return concrete types via `&str` / `Option<String>`).
890/// Implemented for every concrete store shipped in this crate.
891pub trait GraphStoreMutExt: GraphStore {
892    /// Increment the graph version, invalidating all prior cache entries.
893    fn version_bump(&mut self);
894    /// Write an arbitrary key→value pair to the meta store (survives across sessions).
895    fn meta_set_key(&mut self, key: &str, value: &str);
896    /// Read a meta key. Returns `None` when absent.
897    fn meta_get_key(&self, key: &str) -> Option<String>;
898    /// Write a versioned cache entry (stale at next `version_bump`).
899    fn cache_put_key(&mut self, key: &str, value: &str);
900    /// Read a versioned cache entry. Returns `None` when absent or stale.
901    fn cache_get_key(&self, key: &str) -> Option<String>;
902
903    /// Upsert nodes into the nodes table WITHOUT touching the FTS index.
904    ///
905    /// Used by the hot write path in `index_path`: all nodes are written first with this
906    /// cheaper call, then [`bulk_rebuild_fts_for_files`] populates FTS in one SQL pass after
907    /// all rows exist.  This avoids the O(2 × nodes) per-node DELETE+INSERT into the FTS5
908    /// shadow tables during the main write loop.
909    ///
910    /// For `MemStore` this is identical to `upsert_nodes` (no separate FTS structure).
911    fn upsert_nodes_skip_fts(
912        &mut self,
913        nodes: &[wicked_estate_core::Node],
914    ) -> wicked_estate_core::Result<()>;
915
916    /// Bulk-rebuild the FTS index for every node that belongs to any of the given `files`.
917    ///
918    /// For `SqliteStore`: executes
919    ///   `DELETE FROM nodes_fts WHERE symbol IN (SELECT symbol FROM nodes WHERE file IN (...))`
920    ///   followed by
921    ///   `INSERT INTO nodes_fts(symbol,name,signature,doc) SELECT symbol,name,...`
922    /// — two statements regardless of the number of nodes, replacing the old O(2 × nodes)
923    /// per-node loop.
924    ///
925    /// For `MemStore`: no-op (MemStore has no FTS shadow table).
926    fn bulk_rebuild_fts_for_files(&mut self, files: &[&str]) -> wicked_estate_core::Result<()>;
927
928    /// Reclaim freelist pages back to the OS via `PRAGMA incremental_vacuum`.
929    ///
930    /// Only meaningful for `SqliteStore` (which sets `auto_vacuum=INCREMENTAL`).  All other
931    /// backends use the default no-op below.  Never returns an error for the NONE-mode case
932    /// — that PRAGMA is a documented no-op when `auto_vacuum=NONE`.
933    fn incremental_vacuum(&mut self) -> wicked_estate_core::Result<()> {
934        Ok(())
935    }
936}
937
938impl GraphStoreMutExt for SqliteStore {
939    fn version_bump(&mut self) {
940        let _ = self.bump_version();
941    }
942    fn meta_set_key(&mut self, key: &str, value: &str) {
943        let _ = self.meta_set(key, value);
944    }
945    fn meta_get_key(&self, key: &str) -> Option<String> {
946        self.meta_get(key).ok().flatten()
947    }
948    fn cache_put_key(&mut self, key: &str, value: &str) {
949        let _ = self.cache_put(key, value);
950    }
951    fn cache_get_key(&self, key: &str) -> Option<String> {
952        self.cache_get(key).ok().flatten()
953    }
954
955    fn upsert_nodes_skip_fts(
956        &mut self,
957        nodes: &[wicked_estate_core::Node],
958    ) -> wicked_estate_core::Result<()> {
959        self.upsert_nodes_no_fts(nodes)
960    }
961
962    fn bulk_rebuild_fts_for_files(&mut self, files: &[&str]) -> wicked_estate_core::Result<()> {
963        self.rebuild_fts_for_files(files)
964    }
965
966    fn incremental_vacuum(&mut self) -> wicked_estate_core::Result<()> {
967        self.incremental_vacuum()
968    }
969}
970
971impl GraphStoreMutExt for MemStore {
972    fn version_bump(&mut self) {
973        let _ = self.bump_version();
974    }
975    fn meta_set_key(&mut self, key: &str, value: &str) {
976        self.meta.insert(key.to_string(), value.to_string());
977    }
978    fn meta_get_key(&self, key: &str) -> Option<String> {
979        self.meta.get(key).cloned()
980    }
981    fn cache_put_key(&mut self, key: &str, value: &str) {
982        let _ = self.cache_put(key, value);
983    }
984    fn cache_get_key(&self, key: &str) -> Option<String> {
985        self.cache_get(key).ok().flatten()
986    }
987
988    /// MemStore has no FTS shadow table — identical to `upsert_nodes`.
989    fn upsert_nodes_skip_fts(
990        &mut self,
991        nodes: &[wicked_estate_core::Node],
992    ) -> wicked_estate_core::Result<()> {
993        use wicked_estate_core::GraphWrite;
994        self.upsert_nodes(nodes)
995    }
996
997    /// MemStore has no FTS shadow table — no-op.
998    fn bulk_rebuild_fts_for_files(&mut self, _files: &[&str]) -> wicked_estate_core::Result<()> {
999        Ok(())
1000    }
1001}
1002
1003/// Open a store from a spec and return a `Box<dyn GraphStoreMutExt>`.
1004///
1005/// Use instead of `open_store` when the caller needs versioning, meta, or cache access.
1006/// `Box<dyn GraphStoreMutExt>` coerces to `Box<dyn GraphStore>` via deref, and the inner
1007/// value implements all of `GraphRead`, `GraphWrite`, and `GraphStore`.
1008pub fn open_store_ext(spec: &str) -> wicked_estate_core::Result<Box<dyn GraphStoreMutExt>> {
1009    match StoreBackend::parse(spec) {
1010        StoreBackend::Sqlite { path } if path == ":memory:" => {
1011            Ok(Box::new(SqliteStore::in_memory()?))
1012        }
1013        StoreBackend::Sqlite { path } => Ok(Box::new(SqliteStore::open(path)?)),
1014        #[cfg(feature = "postgres")]
1015        StoreBackend::Postgres { url } => Ok(Box::new(PostgresStore::open(&url)?)),
1016        #[cfg(not(feature = "postgres"))]
1017        StoreBackend::Postgres { .. } => Err(Error::Invalid(
1018            "postgres backend requires the 'postgres' feature (ADR-003)".into(),
1019        )),
1020        StoreBackend::SurrealDb { .. } => Err(Error::Invalid(
1021            "surrealdb backend lands in the W1.5 bake-off".into(),
1022        )),
1023    }
1024}
1025
1026#[cfg(feature = "postgres")]
1027impl GraphStoreMutExt for PostgresStore {
1028    fn version_bump(&mut self) {
1029        let _ = self.bump_version();
1030    }
1031    fn meta_set_key(&mut self, key: &str, value: &str) {
1032        let _ = self.meta_set(key, value);
1033    }
1034    fn meta_get_key(&self, key: &str) -> Option<String> {
1035        self.meta_get(key).ok().flatten()
1036    }
1037    fn cache_put_key(&mut self, key: &str, value: &str) {
1038        let _ = self.cache_put(key, value);
1039    }
1040    fn cache_get_key(&self, key: &str) -> Option<String> {
1041        self.cache_get(key).ok().flatten()
1042    }
1043
1044    /// PostgresStore uses column-level trigram index — no separate FTS shadow table.
1045    /// Identical to `upsert_nodes`.
1046    fn upsert_nodes_skip_fts(
1047        &mut self,
1048        nodes: &[wicked_estate_core::Node],
1049    ) -> wicked_estate_core::Result<()> {
1050        use wicked_estate_core::GraphWrite;
1051        self.upsert_nodes(nodes)
1052    }
1053
1054    /// PostgresStore uses column-level trigram index — no separate FTS table to rebuild.
1055    fn bulk_rebuild_fts_for_files(&mut self, _files: &[&str]) -> wicked_estate_core::Result<()> {
1056        Ok(())
1057    }
1058}
1059
1060// ─────────────────────────────────────────────────────────────────────────────
1061// Tests — W5.2 vector storage (MemStore)
1062// ─────────────────────────────────────────────────────────────────────────────
1063
1064#[cfg(test)]
1065mod tests {
1066    use super::*;
1067
1068    fn sym(s: &str) -> SymbolId {
1069        SymbolId(s.to_string())
1070    }
1071
1072    // -- helper: build a normalised unit vector pointing mostly along axis `i` ----
1073    fn unit_vec(dim: usize, i: usize) -> Vec<f32> {
1074        let mut v = vec![0.0f32; dim];
1075        v[i] = 1.0;
1076        v
1077    }
1078
1079    // -- round-trip ---------------------------------------------------------------
1080
1081    #[test]
1082    fn mem_set_get_embedding_roundtrip() {
1083        let mut store = MemStore::new();
1084        let id = sym("foo");
1085        let vec = vec![0.1_f32, 0.2, 0.3];
1086        store.set_embedding(&id, &vec).unwrap();
1087        let got = store.embedding(&id).unwrap().expect("should be present");
1088        assert_eq!(got.len(), 3);
1089        for (a, b) in got.iter().zip(vec.iter()) {
1090            assert!((a - b).abs() < 1e-6, "roundtrip value mismatch");
1091        }
1092    }
1093
1094    #[test]
1095    fn mem_embedding_absent_returns_none() {
1096        let store = MemStore::new();
1097        assert!(store.embedding(&sym("missing")).unwrap().is_none());
1098    }
1099
1100    #[test]
1101    fn mem_set_embedding_empty_vec_returns_error() {
1102        let mut store = MemStore::new();
1103        assert!(store.set_embedding(&sym("bad"), &[]).is_err());
1104    }
1105
1106    // -- nearest ------------------------------------------------------------------
1107
1108    #[test]
1109    fn mem_nearest_returns_closest_first() {
1110        let mut store = MemStore::new();
1111        // dim=4; each symbol aligns with a different axis.
1112        store.set_embedding(&sym("a"), &unit_vec(4, 0)).unwrap(); // [1,0,0,0]
1113        store.set_embedding(&sym("b"), &unit_vec(4, 1)).unwrap(); // [0,1,0,0]
1114        store.set_embedding(&sym("c"), &unit_vec(4, 2)).unwrap(); // [0,0,1,0]
1115
1116        // Query close to "a".
1117        let q = vec![0.9_f32, 0.1, 0.0, 0.0];
1118        let results = store.nearest(&q, 3).unwrap();
1119
1120        assert_eq!(results.len(), 3);
1121        assert_eq!(results[0].0, sym("a"), "a should be nearest");
1122        // Similarities must be non-increasing.
1123        assert!(results[0].1 >= results[1].1);
1124        assert!(results[1].1 >= results[2].1);
1125    }
1126
1127    #[test]
1128    fn mem_nearest_exact_match_scores_one() {
1129        let mut store = MemStore::new();
1130        let v = unit_vec(3, 0);
1131        store.set_embedding(&sym("x"), &v).unwrap();
1132        let results = store.nearest(&v, 1).unwrap();
1133        assert_eq!(results.len(), 1);
1134        assert!(
1135            (results[0].1 - 1.0).abs() < 1e-5,
1136            "cosine of identical vectors = 1"
1137        );
1138    }
1139
1140    #[test]
1141    fn mem_nearest_k_larger_than_store_returns_all() {
1142        let mut store = MemStore::new();
1143        store.set_embedding(&sym("p"), &unit_vec(2, 0)).unwrap();
1144        store.set_embedding(&sym("q"), &unit_vec(2, 1)).unwrap();
1145        let results = store.nearest(&[1.0, 0.0], 100).unwrap();
1146        assert_eq!(results.len(), 2, "k > stored count → return all");
1147    }
1148
1149    #[test]
1150    fn mem_nearest_dim_mismatch_skipped() {
1151        let mut store = MemStore::new();
1152        store.set_embedding(&sym("dim2"), &[1.0_f32, 0.0]).unwrap();
1153        // Query with dim=3 — should not panic, should skip dim-2 vector.
1154        let results = store.nearest(&[1.0_f32, 0.0, 0.0], 5).unwrap();
1155        assert!(results.is_empty(), "dim-mismatch entries silently skipped");
1156    }
1157
1158    #[test]
1159    fn mem_nearest_deterministic_ordering() {
1160        let mut store = MemStore::new();
1161        // Two vectors with identical cosine similarity to query.
1162        let v = unit_vec(2, 0);
1163        store.set_embedding(&sym("z"), &v).unwrap();
1164        store.set_embedding(&sym("a"), &v).unwrap();
1165        let r1 = store.nearest(&v, 2).unwrap();
1166        let r2 = store.nearest(&v, 2).unwrap();
1167        let ids1: Vec<_> = r1.iter().map(|(id, _)| id.0.clone()).collect();
1168        let ids2: Vec<_> = r2.iter().map(|(id, _)| id.0.clone()).collect();
1169        assert_eq!(ids1, ids2, "identical calls must return identical order");
1170        // Tie broken by SymbolId lex order: "a" < "z".
1171        assert_eq!(ids1[0], "a");
1172        assert_eq!(ids1[1], "z");
1173    }
1174
1175    #[test]
1176    fn mem_nearest_empty_store_returns_empty() {
1177        let store = MemStore::new();
1178        let results = store.nearest(&[1.0, 0.0], 5).unwrap();
1179        assert!(results.is_empty());
1180    }
1181
1182    #[test]
1183    fn mem_capabilities_vector_search_true() {
1184        let store = MemStore::new();
1185        assert!(
1186            store.capabilities().vector_search,
1187            "MemStore must report vector_search = true"
1188        );
1189    }
1190
1191    // ── Fix A: remove_file clears content + embeddings ───────────────────────
1192
1193    fn make_node(symbol: &str, file: &str) -> wicked_estate_core::Node {
1194        wicked_estate_core::Node::new(
1195            wicked_estate_core::SymbolId(symbol.to_string()),
1196            wicked_estate_core::NodeKind::Function,
1197            symbol,
1198            wicked_estate_core::Language::new("rust"),
1199            wicked_estate_core::Location::new(file, wicked_estate_core::Span::ZERO),
1200        )
1201    }
1202
1203    #[test]
1204    fn mem_remove_file_clears_content_row() {
1205        let mut store = MemStore::new();
1206        store
1207            .upsert_nodes(&[make_node("fn_a", "src/a.rs")])
1208            .unwrap();
1209        store.set_file_content("src/a.rs", "fn fn_a() {}").unwrap();
1210
1211        assert!(store.file_content("src/a.rs").unwrap().is_some());
1212
1213        // remove_file removes the file→sha pointer, so file_content returns None even
1214        // though the content itself may still sit in the content store until compact().
1215        store.remove_file("src/a.rs").unwrap();
1216        assert!(
1217            store.file_content("src/a.rs").unwrap().is_none(),
1218            "file_content must return None when the file→sha pointer is removed"
1219        );
1220    }
1221
1222    #[test]
1223    fn mem_remove_file_clears_embeddings() {
1224        let mut store = MemStore::new();
1225        let id = wicked_estate_core::SymbolId("fn_b".to_string());
1226        store
1227            .upsert_nodes(&[make_node("fn_b", "src/b.rs")])
1228            .unwrap();
1229        store.set_embedding(&id, &[1.0_f32, 0.0]).unwrap();
1230
1231        assert!(store.embedding(&id).unwrap().is_some());
1232
1233        store.remove_file("src/b.rs").unwrap();
1234        assert!(
1235            store.embedding(&id).unwrap().is_none(),
1236            "embedding must be cleared when owning file is removed"
1237        );
1238    }
1239
1240    // ── prune_dangling_edges ─────────────────────────────────────────────────
1241
1242    #[test]
1243    fn mem_prune_dangling_edges_removes_orphans_keeps_valid() {
1244        use wicked_estate_core::{Edge, EdgeKind, GraphWrite, ResolutionTier};
1245        let mut store = MemStore::new();
1246
1247        let a = sym("a");
1248        let b = sym("b");
1249        let ghost = sym("ghost");
1250
1251        store
1252            .upsert_nodes(&[make_node("a", "src/lib.rs"), make_node("b", "src/lib.rs")])
1253            .unwrap();
1254        let valid_edge = Edge::new(
1255            a.clone(),
1256            b.clone(),
1257            EdgeKind::Calls,
1258            ResolutionTier::Parsed,
1259            "test",
1260        );
1261        let dangling_edge = Edge::new(
1262            a.clone(),
1263            ghost.clone(),
1264            EdgeKind::Calls,
1265            ResolutionTier::Parsed,
1266            "test",
1267        );
1268        store.upsert_edges(&[valid_edge, dangling_edge]).unwrap();
1269        assert_eq!(store.all_edges().unwrap().len(), 2);
1270
1271        let pruned = store.prune_dangling_edges().unwrap();
1272        assert_eq!(pruned, 1, "one dangling edge removed");
1273
1274        let remaining = store.all_edges().unwrap();
1275        assert_eq!(remaining.len(), 1);
1276        assert_eq!(remaining[0].source, a);
1277        assert_eq!(remaining[0].target, b);
1278    }
1279
1280    // ── compact ──────────────────────────────────────────────────────────────
1281
1282    #[test]
1283    fn mem_compact_prunes_stale_cache_and_reports_stats() {
1284        let mut store = MemStore::new();
1285        store
1286            .upsert_nodes(&[make_node("fn_c", "src/c.rs")])
1287            .unwrap();
1288        store.set_file_content("src/c.rs", "fn fn_c() {}").unwrap();
1289
1290        // Insert a cache entry at version 0, then bump so it becomes stale.
1291        store.cache_put("old_key", "old_val").unwrap();
1292        store.bump_version().unwrap();
1293        store.cache_put("new_key", "new_val").unwrap();
1294
1295        let stats = store.compact().unwrap();
1296        assert_eq!(
1297            stats.stale_cache_rows, 1,
1298            "stale entry at version 0 must be pruned"
1299        );
1300        assert_eq!(stats.dangling_edges, 0);
1301        assert_eq!(stats.orphan_embeddings, 0);
1302        assert_eq!(stats.orphan_content, 0);
1303
1304        assert_eq!(
1305            store.cache_get("new_key").unwrap(),
1306            Some("new_val".to_string())
1307        );
1308    }
1309
1310    #[test]
1311    fn mem_compact_prunes_orphan_embeddings_and_content() {
1312        use wicked_estate_core::{Edge, EdgeKind, GraphWrite, ResolutionTier};
1313        let mut store = MemStore::new();
1314
1315        let a = sym("a");
1316        let ghost = sym("ghost");
1317
1318        store.upsert_nodes(&[make_node("a", "src/a.rs")]).unwrap();
1319        store.set_file_content("src/a.rs", "fn a() {}").unwrap();
1320        // Orphan content: insert directly into content map with a sha that no file references.
1321        store
1322            .content
1323            .insert("deadbeef".to_string(), "// dead".to_string());
1324        store.set_embedding(&a, &[1.0_f32, 0.0]).unwrap();
1325        store.set_embedding(&ghost, &[0.0_f32, 1.0]).unwrap();
1326        let dangling = Edge::new(
1327            a.clone(),
1328            ghost.clone(),
1329            EdgeKind::Calls,
1330            ResolutionTier::Parsed,
1331            "test",
1332        );
1333        store.upsert_edges(&[dangling]).unwrap();
1334
1335        let stats = store.compact().unwrap();
1336        assert_eq!(stats.dangling_edges, 1);
1337        assert_eq!(stats.orphan_embeddings, 1);
1338        assert_eq!(stats.orphan_content, 1);
1339
1340        assert!(store.embedding(&a).unwrap().is_some());
1341        assert!(store.file_content("src/a.rs").unwrap().is_some());
1342    }
1343
1344    // ── Wave 7: git blob SHA + content-addressing ────────────────────────────
1345
1346    #[test]
1347    fn mem_file_git_sha_after_set_file_content() {
1348        use wicked_estate_core::GraphWrite;
1349        let mut store = MemStore::new();
1350        store.set_file_content("src/hello.rs", "hello").unwrap();
1351        let sha = store
1352            .file_git_sha("src/hello.rs")
1353            .unwrap()
1354            .expect("sha must be set");
1355        assert_eq!(sha, "b6fc4c620b67d95f953a5c1c1230aaab5db5a1b0");
1356    }
1357
1358    #[test]
1359    fn mem_content_dedup_identical_text() {
1360        use wicked_estate_core::GraphWrite;
1361        let mut store = MemStore::new();
1362        store.set_file_content("a.rs", "fn x() {}").unwrap();
1363        store.set_file_content("b.rs", "fn x() {}").unwrap();
1364        let sha_a = store.file_git_sha("a.rs").unwrap().unwrap();
1365        let sha_b = store.file_git_sha("b.rs").unwrap().unwrap();
1366        assert_eq!(sha_a, sha_b, "identical content → same git_sha");
1367        assert_eq!(store.content.len(), 1, "one content row for identical text");
1368        assert_eq!(
1369            store.file_content("a.rs").unwrap(),
1370            Some("fn x() {}".to_string())
1371        );
1372        assert_eq!(
1373            store.file_content("b.rs").unwrap(),
1374            Some("fn x() {}".to_string())
1375        );
1376    }
1377
1378    // ── Wave 7.1: changes_since ─────────────────────────────────────────────
1379
1380    #[test]
1381    fn mem_changes_since_order_and_resume() {
1382        use wicked_estate_core::{ChangeOp, GraphWrite};
1383        let mut store = MemStore::new();
1384        store.log_change(ChangeOp::Upsert, "a.rs").unwrap();
1385        store.log_change(ChangeOp::Upsert, "b.rs").unwrap();
1386        store.log_change(ChangeOp::Remove, "c.rs").unwrap();
1387
1388        let all = store.changes_since(0).unwrap();
1389        assert_eq!(all.len(), 3);
1390        assert_eq!(all[0].target, "a.rs");
1391        assert_eq!(all[2].op, ChangeOp::Remove);
1392
1393        let after = store.changes_since(all[1].seq).unwrap();
1394        assert_eq!(after.len(), 1);
1395        assert_eq!(after[0].target, "c.rs");
1396    }
1397
1398    // ── Wave 7: repo_info round-trip ─────────────────────────────────────────
1399
1400    #[test]
1401    fn mem_repo_info_roundtrip() {
1402        use wicked_estate_core::{GraphWrite, RepoInfo};
1403        let mut store = MemStore::new();
1404        assert!(store.repo_info().unwrap().is_none());
1405
1406        let info = RepoInfo {
1407            commit: Some("abc123".to_string()),
1408            branch: Some("main".to_string()),
1409            remote: None,
1410            dirty: false,
1411        };
1412        store.set_repo_info(&info).unwrap();
1413        let got = store.repo_info().unwrap().expect("must be Some after set");
1414        assert_eq!(got.commit, Some("abc123".to_string()));
1415        assert_eq!(got.branch, Some("main".to_string()));
1416        assert!(!got.dirty);
1417    }
1418
1419    // ── Wave 7: edge_history archival ────────────────────────────────────────
1420
1421    #[test]
1422    fn mem_edge_history_archived_on_remove_file() {
1423        use wicked_estate_core::{Edge, EdgeKind, GraphWrite, ResolutionTier};
1424        // history must be ON to assert archival behaviour.
1425        let mut store = MemStore::new_with_history();
1426
1427        let v1_text = "fn foo() {}";
1428        store.set_file_content("src/foo.rs", v1_text).unwrap();
1429        let v1_sha = store.file_git_sha("src/foo.rs").unwrap().unwrap();
1430
1431        store
1432            .upsert_nodes(&[make_node("foo", "src/foo.rs")])
1433            .unwrap();
1434        store
1435            .upsert_nodes(&[make_node("bar", "src/bar.rs")])
1436            .unwrap();
1437        let e = Edge::new(
1438            wicked_estate_core::SymbolId("foo".to_string()),
1439            wicked_estate_core::SymbolId("bar".to_string()),
1440            EdgeKind::Calls,
1441            ResolutionTier::Parsed,
1442            "test",
1443        );
1444        store.upsert_edges(&[e]).unwrap();
1445
1446        store.remove_file("src/foo.rs").unwrap();
1447
1448        let history = store.edge_history("src/foo.rs").unwrap();
1449        assert_eq!(history.len(), 1, "one superseded edge must be in history");
1450        assert_eq!(history[0].git_sha, v1_sha);
1451    }
1452
1453    // ── Wave 7: edge_history retention prune ─────────────────────────────────
1454
1455    #[test]
1456    fn mem_compact_prunes_edge_history_beyond_retention() {
1457        use wicked_estate_core::{Edge, EdgeKind, GraphWrite, ResolutionTier};
1458        // history must be ON to populate edge_history via remove_file.
1459        let mut store = MemStore::new_with_history();
1460
1461        store
1462            .upsert_nodes(&[make_node("target", "src/other.rs")])
1463            .unwrap();
1464        for i in 0..25_u32 {
1465            let text = format!("fn ver_{i}() {{}}");
1466            store.set_file_content("src/ver.rs", &text).unwrap();
1467            store
1468                .upsert_nodes(&[make_node("ver_fn", "src/ver.rs")])
1469                .unwrap();
1470            let e = Edge::new(
1471                wicked_estate_core::SymbolId("ver_fn".to_string()),
1472                wicked_estate_core::SymbolId("target".to_string()),
1473                EdgeKind::Calls,
1474                ResolutionTier::Parsed,
1475                "test",
1476            );
1477            store.upsert_edges(&[e]).unwrap();
1478            store.remove_file("src/ver.rs").unwrap();
1479        }
1480
1481        let before = store.edge_history.len();
1482        assert_eq!(before, 25);
1483
1484        let stats = store.compact().unwrap();
1485        assert_eq!(stats.history_rows_pruned, 5);
1486        assert_eq!(store.edge_history.len(), 20);
1487    }
1488
1489    // ── Semantic linking (MemStore) ──────────────────────────────────────────
1490
1491    #[test]
1492    fn mem_node_semantics_absent_before_annotation() {
1493        let mut store = MemStore::new();
1494        store
1495            .upsert_nodes(&[make_node("fn_a", "src/a.rs")])
1496            .unwrap();
1497        let got = store.node_semantics(&sym("fn_a")).unwrap();
1498        assert!(
1499            got.is_none(),
1500            "node_semantics must be None before any annotation"
1501        );
1502    }
1503
1504    #[test]
1505    fn mem_node_semantics_full_roundtrip() {
1506        let mut store = MemStore::new();
1507        store
1508            .upsert_nodes(&[make_node("fn_b", "src/b.rs")])
1509            .unwrap();
1510        store
1511            .set_node_semantics(
1512                &sym("fn_b"),
1513                Some("does the thing"),
1514                Some("REQ-42"),
1515                Some(true),
1516            )
1517            .unwrap();
1518        let got = store
1519            .node_semantics(&sym("fn_b"))
1520            .unwrap()
1521            .expect("must be Some after full write");
1522        assert_eq!(got.description, Some("does the thing".to_string()));
1523        assert_eq!(got.requirement, Some("REQ-42".to_string()));
1524        assert!(got.requirement_validated);
1525    }
1526
1527    #[test]
1528    fn mem_node_semantics_partial_update_preserves_untouched_fields() {
1529        let mut store = MemStore::new();
1530        store
1531            .upsert_nodes(&[make_node("fn_c", "src/c.rs")])
1532            .unwrap();
1533        store
1534            .set_node_semantics(&sym("fn_c"), Some("original"), Some("REQ-7"), Some(true))
1535            .unwrap();
1536        // Partial: change only description.
1537        store
1538            .set_node_semantics(&sym("fn_c"), Some("updated"), None, None)
1539            .unwrap();
1540        let got = store
1541            .node_semantics(&sym("fn_c"))
1542            .unwrap()
1543            .expect("must still be Some");
1544        assert_eq!(
1545            got.description,
1546            Some("updated".to_string()),
1547            "description updated"
1548        );
1549        assert_eq!(
1550            got.requirement,
1551            Some("REQ-7".to_string()),
1552            "requirement unchanged"
1553        );
1554        assert!(got.requirement_validated, "validated flag unchanged");
1555    }
1556
1557    #[test]
1558    fn mem_find_by_requirement_returns_annotated_nodes() {
1559        let mut store = MemStore::new();
1560        store
1561            .upsert_nodes(&[make_node("fn_x", "src/x.rs"), make_node("fn_y", "src/y.rs")])
1562            .unwrap();
1563        store
1564            .set_node_semantics(&sym("fn_x"), Some("desc x"), Some("REQ-99"), Some(false))
1565            .unwrap();
1566        store
1567            .set_node_semantics(&sym("fn_y"), Some("desc y"), Some("REQ-other"), Some(false))
1568            .unwrap();
1569        let found = store.find_by_requirement("REQ-99").unwrap();
1570        assert_eq!(found.len(), 1, "exactly one node matches REQ-99");
1571        assert_eq!(found[0].symbol, sym("fn_x"));
1572    }
1573
1574    #[test]
1575    fn mem_set_node_semantics_absent_symbol_noop() {
1576        let mut store = MemStore::new();
1577        store
1578            .set_node_semantics(&sym("ghost"), Some("desc"), Some("REQ-1"), Some(false))
1579            .unwrap();
1580        assert!(
1581            store.node_semantics(&sym("ghost")).unwrap().is_none(),
1582            "absent symbol must remain without semantics"
1583        );
1584    }
1585
1586    #[test]
1587    fn mem_set_node_semantics_all_none_noop() {
1588        let mut store = MemStore::new();
1589        store
1590            .upsert_nodes(&[make_node("fn_d", "src/d.rs")])
1591            .unwrap();
1592        store
1593            .set_node_semantics(&sym("fn_d"), None, None, None)
1594            .unwrap();
1595        assert!(
1596            store.node_semantics(&sym("fn_d")).unwrap().is_none(),
1597            "all-None call must leave semantics as None"
1598        );
1599    }
1600}