solo-storage 0.5.0

Solo: SQLite + SQLCipher persistence layer
Documentation
// SPDX-License-Identifier: Apache-2.0

//! Solo storage: SQLite + SQLCipher persistence layer.
//!
//! ## Concurrency invariants (per ADR-0003)
//!
//!   * **Writes go through `WriteHandle`; reads go through `ReaderPool`.**
//!     Direct connection access is an anti-pattern outside the actor + pool.
//!   * The writer connection opens once and is owned by the writer thread for
//!     the daemon's lifetime.
//!   * The read pool's `post_create` hook binds the raw SQLCipher key on each
//!     new connection.
//!   * `pending_index` ordering is **always** SQL COMMIT → HNSW.add → drain
//!     row. Never reverse.
//!   * `Arc<dyn VectorIndex + Send + Sync>` is shared between the writer and
//!     the read pool; concurrency is provided by the impl (e.g., hnsw_rs's
//!     internal `parking_lot::RwLock`), not by application-level locks.
//!
//! ## Module layout
//!
//! Commit 1.1 — `solo init` building blocks:
//!
//!   - `path_validation` — refuse cloud-sync data dirs.
//!   - `key_material`    — Argon2id passphrase → 32-byte SQLCipher key.
//!   - `config`          — `solo.config.toml` (salt + embedder identity).
//!   - `migration`       — runner + the v0 schema (migrations/0001_initial.sql).
//!   - `lockfile`        — RAII `solo.lock` to serialize concurrent runs.
//!   - `init`            — orchestrator: `solo_storage::init(params)`.
//!
//! Commit 1.2 — single-writer actor + read pool:
//!
//!   - `writer`          — `WriterActor`, `WriteHandle`, `WriteCommand`.
//!   - `reader`          — `ReaderPool` (deadpool-sqlite + post_create raw-key).
//!
//! Commit 1.3 — HNSW backing for `solo_core::VectorIndex` + snapshot I/O:
//!
//!   - `vector_index`    — `HnswIndex` (`hnsw_rs` wrapper), `HnswFactory`.
//!   - `snapshot`        — atomic two-file save (live/`_bak`/`_tmp` basenames)
//!                         + `load`/`load_bak` per ADR-0003 §"Startup
//!                         file-existence decision tree".
//!   - `recovery`        — `replay_pending_index`, `detect_drift`. Used by
//!                         the daemon-main startup chain (commit 1.5).
//!
//! Commit 1.4.a — `solo_core::Embedder` impls + BGE-M3 file discovery:
//!
//!   - `embedder::stub` — `StubEmbedder`, deterministic hash-based F32
//!                        embedder for tests + daemon bring-up.
//!   - `embedder::bge_m3` — `BgeM3Loader` validates a HuggingFace BGE-M3
//!                          model directory (`config.json`, `tokenizer.json`,
//!                          `model.safetensors` / `pytorch_model.bin`) and
//!                          parses the model config. The forward pass via
//!                          candle-core + candle-transformers `xlm_roberta`
//!                          + the `tokenizers` crate is commit 1.4.b.
//!
//! Commit 1.5+ (daemon main + signal handlers) lands in subsequent files;
//! the surfaces here are stable for that wiring.

#![allow(dead_code)]

pub mod backup;
pub mod config;
pub mod embedder;
pub mod embedder_registry;
pub mod init;
pub mod llm;
pub mod key_material;
pub mod lockfile;
pub mod merge_candidates;
pub mod migration;
pub mod path_validation;
pub mod reader;
pub mod recovery;
pub mod snapshot;
pub mod startup;
pub mod vector_index;
pub mod writer;

#[cfg(test)]
mod properties;

#[cfg(any(test, feature = "test-support"))]
pub mod test_support;

// Re-exports for the most common surface:
pub use backup::{
    DEFAULT_BACKUP_PAGES_PER_STEP, backup_database, backup_from_connection,
    paths_refer_to_same_file,
};
pub use config::{EmbedderConfig, IdentityConfig, SoloConfig};
pub use embedder::{BgeM3Config, BgeM3Loader, BgeM3Manifest, StubEmbedder};
pub use embedder_registry::{EmbedderIdentity, get_or_insert_embedder_id};
pub use init::{InitOutcome, InitParams, default_data_dir, default_embedder, init, open_sqlcipher};
pub use key_material::KeyMaterial;
pub use lockfile::Lockfile;
pub use merge_candidates::{MergeCandidateStats, count_existing_merge_candidates};
pub use migration::{current_version, run_migrations};
pub use path_validation::validate_data_dir;
pub use reader::{DEFAULT_POOL_SIZE, ReaderPool};
pub use recovery::{
    DriftReport, RebuildReport, ReplayReport, detect_drift, rebuild_hnsw_from_sql,
    replay_pending_index,
};
pub use snapshot::{BAK_BASENAME, LIVE_BASENAME, TMP_BASENAME};
pub use startup::{StartupOutcome, StartupParams, run as startup_run};
pub use vector_index::{HnswFactory, HnswIndex, HnswParams};
pub use writer::{
    DEFAULT_CHANNEL_CAPACITY, ConsolidationReport, ConsolidationScope, ReembedReport,
    ReembedScope, WriteCommand, WriteHandle, WriterActor, WriterSpawn,
};