lean_semantic_search_store/lib.rs
1//! Persistent `SQLite`-backed semantic index implementing the retrieval
2//! [`Corpus`](lean_semantic_search_retrieval::Corpus) seam.
3//!
4//! This crate is the large-corpus counterpart to the in-memory inverted index
5//! in `lean-semantic-search-retrieval`. It owns the **semantic index only**:
6//! opaque-key postings, per-key fanout, the document total, and the contract
7//! [`DeclarationFeatureRow`](lean_semantic_search_contract::DeclarationFeatureRow)s
8//! needed to rebuild an anchor from a corpus member. It carries no declaration
9//! display text, module or kind fields, provenance, labels, probe caches, or any
10//! duplicate-audit or proof-agent vocabulary — those stay with consumers.
11//!
12//! Build a corpus with [`StoreBuilder`], publishing it atomically; open it
13//! read-only with [`Store`], which implements `Corpus` so retrieval ranks over a
14//! persisted index without loading it into memory. The ranking algorithm, anchor
15//! planning, policy, and output shape are unchanged: a `Store` is just another
16//! `Corpus`, and `retrieve_across` fans one anchor across several of them.
17//!
18//! Reuse is gated by [`Store::open_fresh`], which accepts a persisted corpus only
19//! on a matching opaque `corpus_token` and matching versions and reports every
20//! mismatch or corruption as a [`CacheMiss`] rather than an error. The neutral
21//! [`set_latest`]/[`cleanup`] primitives manage content-addressed corpus
22//! directories and the atomic latest-pointer the caller resolves.
23//!
24//! See `docs/architecture/05-sqlite-store.md` for the schema and the read/write
25//! design, and `docs/architecture/06-cache-lifecycle.md` for the freshness
26//! contract and the lifecycle primitives.
27
28mod freshness;
29mod lifecycle;
30mod read;
31mod schema;
32mod write;
33
34pub use freshness::{CacheMiss, CorpusLookup, open_latest_fresh};
35pub use lifecycle::{
36 CleanupEntry, CleanupMode, CleanupReport, cleanup, corpus_dir, index_path, latest_index_path, latest_name,
37 set_latest,
38};
39pub use read::Store;
40pub use schema::STORE_SCHEMA_VERSION;
41pub use write::{Ingest, StoreBuilder};
42
43use std::fmt;
44
45/// An error from building or opening a persisted corpus.
46///
47/// The [`Corpus`](lean_semantic_search_retrieval::Corpus) read methods are
48/// infallible by trait contract — a `Store` validates its schema and metadata at
49/// [`Store::open`], so subsequent reads degrade to empty results rather than
50/// surfacing an error. This type covers only the fallible build and open steps.
51#[derive(Debug)]
52#[non_exhaustive]
53pub enum StoreError {
54 /// A `SQLite` operation failed.
55 Sqlite(rusqlite::Error),
56 /// A filesystem operation failed.
57 Io(std::io::Error),
58 /// A feature row could not be serialized to JSON.
59 Json(serde_json::Error),
60 /// The opened store's schema version is not the one this build understands.
61 SchemaMismatch {
62 /// The schema version stored in the file.
63 found: String,
64 /// The schema version this build writes and reads.
65 expected: &'static str,
66 },
67 /// A required metadata fact was absent from the opened store.
68 MissingMetadata(String),
69 /// A stored metadata value was present but unparseable.
70 Corrupt(&'static str),
71 /// The builder was used after it had already been published.
72 Closed,
73}
74
75impl fmt::Display for StoreError {
76 fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
77 match self {
78 Self::Sqlite(error) => write!(formatter, "sqlite error: {error}"),
79 Self::Io(error) => write!(formatter, "io error: {error}"),
80 Self::Json(error) => write!(formatter, "json error: {error}"),
81 Self::SchemaMismatch { found, expected } => {
82 write!(formatter, "store schema version {found} is not the expected {expected}")
83 }
84 Self::MissingMetadata(key) => write!(formatter, "store is missing required metadata: {key}"),
85 Self::Corrupt(key) => write!(formatter, "store metadata value is corrupt: {key}"),
86 Self::Closed => write!(formatter, "store builder has already been published"),
87 }
88 }
89}
90
91impl std::error::Error for StoreError {
92 fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
93 match self {
94 Self::Sqlite(error) => Some(error),
95 Self::Io(error) => Some(error),
96 Self::Json(error) => Some(error),
97 Self::SchemaMismatch { .. } | Self::MissingMetadata(_) | Self::Corrupt(_) | Self::Closed => None,
98 }
99 }
100}
101
102impl From<rusqlite::Error> for StoreError {
103 fn from(error: rusqlite::Error) -> Self {
104 Self::Sqlite(error)
105 }
106}
107
108impl From<std::io::Error> for StoreError {
109 fn from(error: std::io::Error) -> Self {
110 Self::Io(error)
111 }
112}
113
114impl From<serde_json::Error> for StoreError {
115 fn from(error: serde_json::Error) -> Self {
116 Self::Json(error)
117 }
118}