Skip to main content

solo_storage/
lib.rs

1// SPDX-License-Identifier: Apache-2.0
2
3//! Solo storage: SQLite + SQLCipher persistence layer.
4//!
5//! ## Concurrency invariants (per ADR-0003)
6//!
7//!   * **Writes go through `WriteHandle`; reads go through `ReaderPool`.**
8//!     Direct connection access is an anti-pattern outside the actor + pool.
9//!   * The writer connection opens once and is owned by the writer thread for
10//!     the daemon's lifetime.
11//!   * The read pool's `post_create` hook binds the raw SQLCipher key on each
12//!     new connection.
13//!   * `pending_index` ordering is **always** SQL COMMIT → HNSW.add → drain
14//!     row. Never reverse.
15//!   * `Arc<dyn VectorIndex + Send + Sync>` is shared between the writer and
16//!     the read pool; concurrency is provided by the impl (e.g., hnsw_rs's
17//!     internal `parking_lot::RwLock`), not by application-level locks.
18//!
19//! ## Module layout
20//!
21//! Commit 1.1 — `solo init` building blocks:
22//!
23//!   - `path_validation` — refuse cloud-sync data dirs.
24//!   - `key_material`    — Argon2id passphrase → 32-byte SQLCipher key.
25//!   - `config`          — `solo.config.toml` (salt + embedder identity).
26//!   - `migration`       — runner + the v0 schema (migrations/0001_initial.sql).
27//!   - `lockfile`        — RAII `solo.lock` to serialize concurrent runs.
28//!   - `init`            — orchestrator: `solo_storage::init(params)`.
29//!
30//! Commit 1.2 — single-writer actor + read pool:
31//!
32//!   - `writer`          — `WriterActor`, `WriteHandle`, `WriteCommand`.
33//!   - `reader`          — `ReaderPool` (deadpool-sqlite + post_create raw-key).
34//!
35//! Commit 1.3 — HNSW backing for `solo_core::VectorIndex` + snapshot I/O:
36//!
37//!   - `vector_index`    — `HnswIndex` (`hnsw_rs` wrapper), `HnswFactory`.
38//!   - `snapshot`        — atomic two-file save (live/`_bak`/`_tmp` basenames)
39//!                         + `load`/`load_bak` per ADR-0003 §"Startup
40//!                         file-existence decision tree".
41//!   - `recovery`        — `replay_pending_index`, `detect_drift`. Used by
42//!                         the daemon-main startup chain (commit 1.5).
43//!
44//! Embedder impls:
45//!
46//!   - `embedder::stub` — `StubEmbedder`, deterministic hash-based F32
47//!                        embedder for tests + offline development.
48//!   - `embedder::ollama` — `OllamaEmbedder`, real semantic embeddings
49//!                          via a local Ollama daemon (`/api/embeddings`).
50//!                          The recommended production backend since
51//!                          v0.5.1; default for new deployments.
52//!
53//! (v0.5.x also shipped a BGE-M3 / candle-transformers backend; it was
54//! deprecated in v0.5.0 and removed in v0.6.0. The replacement is
55//! `OllamaEmbedder`.)
56//!
57//! Commit 1.5+ (daemon main + signal handlers) lands in subsequent files;
58//! the surfaces here are stable for that wiring.
59
60#![allow(dead_code)]
61
62pub mod audit;
63pub mod backup;
64pub mod config;
65pub mod document;
66pub mod embedder;
67pub mod embedder_registry;
68pub mod gdpr;
69pub mod hnsw_id;
70pub mod hnsw_rebuild;
71pub mod init;
72pub mod llm;
73pub mod key_material;
74pub mod lockfile;
75pub mod merge_candidates;
76pub mod migration;
77pub mod path_validation;
78pub mod reader;
79pub mod recovery;
80pub mod redaction;
81pub mod snapshot;
82pub mod startup;
83pub mod steward_factory;
84pub mod tenant_backup;
85pub mod tenants;
86pub mod triples_batch;
87pub mod vector_index;
88pub mod writer;
89
90#[cfg(test)]
91mod properties;
92
93#[cfg(any(test, feature = "test-support"))]
94pub mod test_support;
95
96// Re-exports for the most common surface:
97pub use audit::{
98    AuditEvent, AuditOperation, AuditResult, AuditWriter, AuditWriterShutdown,
99    insert_audit_admin_row, insert_audit_row_in_tx, purge_older_than,
100};
101pub use backup::{
102    DEFAULT_BACKUP_PAGES_PER_STEP, backup_database, backup_from_connection,
103    paths_refer_to_same_file,
104};
105pub use config::{
106    AuditSettings, AuthSettings, CustomRedactionPattern, DocumentConfig, EmbedderConfig,
107    IdentityConfig, LlmSettings, RedactionConfig, SamplingConfig, SamplingConfigDiagnostic,
108    SoloConfig, StewardSettings, TriplesConfig,
109};
110pub use gdpr::{ForgetReport, estimate_forget_scope, forget_principal};
111pub use redaction::{RedactionMatch, RedactionRegistry, RedactionResult};
112pub use steward_factory::{McpSamplingStewardFactory, StaticStewardFactory, StewardFactory};
113pub use tenant_backup::{BackupReport, RestoreReport, backup_tenant, restore_tenant};
114pub use document::{ChunkConfig, ChunkSpec, ParseError, ParsedDocument, chunk_text, parse_file};
115pub use embedder::{
116    OllamaEmbedder, StubEmbedder, build_embedder_from_env, probe_embedder_config_from_env,
117};
118// v0.9.0 P3: BundledEmbedder + its identity constants are re-exported
119// from the crate root only when the `bundled-embedder` Cargo feature
120// is on. Downstream code that needs to interrogate the feature gate
121// at runtime should use `cfg!(feature = "bundled-embedder")`.
122#[cfg(feature = "bundled-embedder")]
123pub use embedder::{
124    BUNDLED_EMBEDDER_DIM, BUNDLED_EMBEDDER_NAME, BUNDLED_EMBEDDER_VERSION,
125    BundledEmbedder,
126};
127pub use embedder_registry::{EmbedderIdentity, get_or_insert_embedder_id};
128pub use hnsw_id::{HNSW_CHUNK_BIT, HnswIdKind, chunk_hnsw_id, decode_hnsw_id, episode_hnsw_id};
129pub use init::{InitOutcome, InitParams, default_data_dir, default_embedder, init, open_sqlcipher};
130pub use key_material::KeyMaterial;
131pub use lockfile::Lockfile;
132pub use merge_candidates::{MergeCandidateStats, count_existing_merge_candidates};
133pub use migration::{
134    current_tenants_index_version, current_version, run_migrations,
135    run_tenants_index_migrations,
136};
137pub use path_validation::validate_data_dir;
138pub use reader::{DEFAULT_POOL_SIZE, ReaderPool};
139pub use recovery::{
140    DriftReport, RebuildReport, ReplayReport, detect_drift, rebuild_hnsw_from_sql,
141    replay_pending_index,
142};
143pub use snapshot::{BAK_BASENAME, LIVE_BASENAME, TMP_BASENAME};
144pub use startup::{StartupOutcome, StartupParams, run as startup_run};
145pub use tenants::{
146    TENANTS_INDEX_FILENAME, TENANTS_SUBDIR, TenantCostNumbers, TenantHandle,
147    TenantOpenParams, TenantRecord, TenantRegistry, TenantRegistryParams,
148    TenantStatus, TenantsIndex, migrate_v071_to_v080,
149};
150pub use triples_batch::TriplesBatchSignal;
151pub use vector_index::{HnswFactory, HnswIndex, HnswParams};
152pub use writer::{
153    AttachAbstractionBatchReport, DEFAULT_CHANNEL_CAPACITY, DEFAULT_INGEST_MAX_BYTES,
154    ConsolidationReport, ConsolidationScope, ForgetDocumentReport, IngestReport,
155    MAX_REMEMBER_BATCH_SIZE, NormalizeReport, ReembedReport, ReembedScope,
156    ResolveContradictionReport, WriteCommand, WriteHandle, WriterActor, WriterSpawn,
157    resolve_ingest_max_bytes,
158};