Skip to main content

solo_storage/
init.rs

1// SPDX-License-Identifier: Apache-2.0
2
3//! `solo init`: create a fresh Solo data directory.
4//!
5//! The orchestrator wires together every primitive in this crate:
6//!
7//!   1. `path_validation::validate_data_dir` — refuse cloud-sync folders.
8//!   2. Detect existing init state.
9//!        - If a v0.7.1 single-DB layout is present (`solo.db` at the data
10//!          dir root, no `tenants_index.db`), this is an upgrade — invoke
11//!          the v0.7.1 → v0.8.0 mass-data-move helper instead of refusing.
12//!        - If a v0.8.0 per-tenant layout is already in place
13//!          (`tenants_index.db` exists, with at least the default tenant
14//!          registered), refuse with Conflict unless `force = true`.
15//!        - On `force`, wipe Solo-owned files in the data dir (NOT the dir
16//!          itself, in case the user keeps other stuff there) and proceed
17//!          to fresh-install creation.
18//!   3. Create the data directory.
19//!   4. Acquire `solo.lock` (RAII) so a parallel `solo init` or `solo daemon`
20//!      can't race us.
21//!   5. Generate a fresh 16-byte salt, derive the SQLCipher key via Argon2id.
22//!   6. Create `tenants/` subdir + `tenants_index.db` (apply migration 0004)
23//!      + per-tenant `tenants/default.db` (apply migrations 0001-0003) +
24//!      register the default tenant in the index.
25//!   7. Write `solo.config.toml` (salt + embedder identity).
26//!   8. Drop the lockfile (RAII — also runs on any error path between steps).
27//!
28//! On any error after the data dir is created, the partial state on disk is
29//! left for inspection. The caller can re-run with `--force` to wipe and
30//! retry.
31//!
32//! ## Layout (v0.8.0)
33//!
34//! After a successful `init` on a fresh install:
35//!
36//! ```text
37//! <data_dir>/
38//!   tenants_index.db          -- registry (SQLCipher; same key as below)
39//!   tenants/
40//!     default.db              -- per-tenant SQLCipher DB
41//!     default.db-wal
42//!     default.db-shm
43//!   solo.config.toml
44//! ```
45//!
46//! The `solo.db` at the data dir root that v0.7.1 used is no longer created.
47//! Existing v0.7.1 data dirs are upgraded into the new layout on the first
48//! `solo init`-or-daemon-boot call by the mass-data-move helper.
49
50use rusqlite::Connection;
51use solo_core::{Embedder, Error, Result, TenantId};
52use std::path::{Path, PathBuf};
53use zeroize::Zeroizing;
54
55use crate::{
56    config::{EmbedderConfig, LlmSettings, SoloConfig},
57    key_material::KeyMaterial,
58    lockfile::Lockfile,
59    migration,
60    path_validation::validate_data_dir,
61    tenants::{
62        TENANTS_INDEX_FILENAME, TENANTS_SUBDIR, TenantStatus, TenantsIndex, migrate_v071_to_v080,
63    },
64};
65
66/// Default data dir: `~/.solo/`. Honors the home-dir resolution `dirs` crate
67/// performs (Windows: `%USERPROFILE%`; Unix: `$HOME`). Returns `None` if no
68/// home directory can be found.
69pub fn default_data_dir() -> Option<PathBuf> {
70    dirs::home_dir().map(|h| h.join(".solo"))
71}
72
73/// File names at the data dir root that Solo owns. `--force` removes these,
74/// and v0.8.0 layout detection looks at them. Anything else in the dir is
75/// left untouched.
76///
77/// HNSW snapshot filenames are derived from the basenames in
78/// `crate::snapshot` (`hnsw_episodes`, `hnsw_episodes_bak`, `hnsw_episodes_tmp`)
79/// + the suffixes hnsw_rs's `file_dump` writes (`.hnsw.data`, `.hnsw.graph`).
80/// Keep this list in sync with `snapshot::{LIVE_BASENAME, BAK_BASENAME,
81/// TMP_BASENAME}` if those ever change.
82///
83/// **Note**: v0.7.1 `solo.db` and HNSW snapshots are listed for `--force` wipe
84/// purposes (a `--force` re-init must clear them) AND for v0.7.1 install
85/// detection (the legacy `solo.db` at the root is the v0.7.1 marker). The
86/// v0.8.0 layout puts these files under `<data_dir>/tenants/` — they are
87/// wiped via a directory-tree walk in `wipe_solo_owned_files`.
88const SOLO_OWNED_FILES_ROOT: &[&str] = &[
89    // v0.7.1 single-DB layout (legacy; only present pre-migration). Listed
90    // first so a v0.7.1 install upgraded via mass-data-move clears any
91    // stragglers if the upgrade had to be aborted and retried with --force.
92    "solo.db",
93    "solo.db-wal",
94    "solo.db-shm",
95    // v0.7.1 HNSW snapshots at root (live + bak + tmp pairs).
96    "hnsw_episodes.hnsw.data",
97    "hnsw_episodes.hnsw.graph",
98    "hnsw_episodes_bak.hnsw.data",
99    "hnsw_episodes_bak.hnsw.graph",
100    "hnsw_episodes_tmp.hnsw.data",
101    "hnsw_episodes_tmp.hnsw.graph",
102    // Top-level Solo files (still at root in v0.8.0).
103    "solo.config.toml",
104    "solo.config.toml.tmp",
105    "solo.lock",
106    // v0.8.0 tenant registry.
107    TENANTS_INDEX_FILENAME,
108    "tenants_index.db-wal",
109    "tenants_index.db-shm",
110];
111
112/// `solo init` parameters. Built by the CLI layer.
113#[derive(Debug, Clone)]
114pub struct InitParams {
115    /// Where to put the data dir. Created if missing.
116    pub data_dir: PathBuf,
117    /// Resolved passphrase, wrapped in `Zeroizing` so the buffer is wiped
118    /// when this struct drops. CLI layer reads it via prompt or env var.
119    pub passphrase: Zeroizing<String>,
120    /// If true, wipe Solo-owned files in `data_dir` before initializing.
121    pub force: bool,
122    /// Embedder identity to record in the config. For commit 1.1 this is the
123    /// BGE-M3 default; commit 1.4 (embedder loader) will produce it from the
124    /// loaded model.
125    pub embedder: EmbedderConfig,
126}
127
128/// Default embedder identity recorded in `solo.config.toml` when the
129/// CLI hasn't probed a real backend via
130/// [`crate::embedder::probe_embedder_config_from_env`].
131///
132/// In production, `solo init` always calls `probe_embedder_config_from_env`,
133/// which picks between Ollama (probes the real dim) and Stub (32-dim,
134/// deterministic). This function exists for test fixtures + downstream
135/// callers that want a parameterless identity for first-init flows; it
136/// returns the Stub identity, matching `StubEmbedder::default_stub()`
137/// (name=`stub`, version=`v1`, dim=32).
138///
139/// Historically this returned the BGE-M3 identity (BAAI/bge-m3, 1024-dim).
140/// BGE-M3 was removed in v0.6.0 — see `docs/dev-log/0071-v0.5.x-roadmap.md`
141/// Priority 9. Callers that need a deterministic non-stub identity for
142/// tests should build an `EmbedderConfig` literal directly.
143pub fn default_embedder() -> EmbedderConfig {
144    let stub = crate::embedder::StubEmbedder::default_stub();
145    EmbedderConfig {
146        name: stub.name().to_string(),
147        version: stub.version().to_string(),
148        dim: stub.dim() as u32,
149        dtype: "f32".into(),
150    }
151}
152
153/// v0.9.0 P1 (plan BLOCKER 2 resolution): pick the `[llm]` block default
154/// for a freshly-initialised data dir based on the surrounding env.
155///
156/// Precedence:
157///   1. `ANTHROPIC_API_KEY` non-empty → `Anthropic` variant with
158///      `api_key_env = "ANTHROPIC_API_KEY"` and the plan's
159///      `claude-sonnet-4-6` default model.
160///   2. (Future P1 follow-up may add `OPENAI_API_KEY` here; for v0.9.0
161///      P1 we keep the surface minimal — the operator edits the file if
162///      they want OpenAI, Ollama, or MCP-sampling.)
163///   3. otherwise → `None` variant. The Steward runs cluster-only.
164///
165/// Empty values are treated as unset — guards against shells that set
166/// vars to the empty string to mean "leave default".
167pub fn default_llm_settings_from_env() -> LlmSettings {
168    fn env_non_empty(name: &str) -> bool {
169        std::env::var(name)
170            .map(|v| !v.trim().is_empty())
171            .unwrap_or(false)
172    }
173    if env_non_empty("ANTHROPIC_API_KEY") {
174        LlmSettings::Anthropic {
175            api_key_env: "ANTHROPIC_API_KEY".to_string(),
176            model: "claude-sonnet-4-6".to_string(),
177        }
178    } else {
179        LlmSettings::None
180    }
181}
182
183/// Outcome reported back to the CLI layer for human-readable success output.
184#[derive(Debug)]
185pub struct InitOutcome {
186    pub data_dir: PathBuf,
187    /// Per-tenant DB path. v0.8.0+: `<data_dir>/tenants/default.db`.
188    /// Pre-v0.8.0 callers consumed `<data_dir>/solo.db`; they continue to
189    /// work as long as they treat this purely as "the SQLCipher file to
190    /// open." P2 retires this field in favour of a `TenantHandle` per
191    /// tenant.
192    pub db_path: PathBuf,
193    pub config_path: PathBuf,
194    /// Highest applied per-tenant schema version. Equal to the highest
195    /// `version` in `tenants/default.db::schema_migrations`. **Not** the
196    /// same number as the tenants_index schema; that's tracked via
197    /// `tenants_index_schema_version` below.
198    pub schema_version: u32,
199    /// Path to the new v0.8.0 tenant registry at
200    /// `<data_dir>/tenants_index.db`. P2 callers open this to enumerate
201    /// the tenants present in the data dir.
202    pub tenants_index_path: PathBuf,
203    /// Highest applied version in tenants_index.db. As of v0.8.0 this is
204    /// `4` (the foundation migration). Tracked separately from the
205    /// per-tenant schema chain.
206    pub tenants_index_schema_version: u32,
207    /// True iff this `init` call upgraded a v0.7.1 layout in place (i.e.
208    /// `solo.db` was present at the root, and the mass-data-move helper
209    /// ran). False for fresh installs. Used by the CLI to print a
210    /// distinct success message.
211    pub upgraded_from_v071: bool,
212}
213
214/// Run `solo init`. See module docstring for the step list.
215///
216/// Branches:
217///
218///   * **Fresh install** (no `solo.db` at root, no `tenants_index.db`) —
219///     create the v0.8.0 per-tenant layout.
220///   * **v0.7.1 upgrade** (`solo.db` at root, no `tenants_index.db`) — run
221///     the mass-data-move helper, then load the existing config (no new
222///     salt — the user's existing passphrase + salt must still decrypt).
223///   * **Already v0.8.0** (`tenants_index.db` exists) — refuse with
224///     Conflict unless `force = true`. With `force`, wipe + re-init.
225pub fn init(params: InitParams) -> Result<InitOutcome> {
226    let InitParams {
227        data_dir,
228        passphrase,
229        force,
230        embedder,
231    } = params;
232
233    if passphrase.is_empty() {
234        return Err(Error::invalid_input(
235            "passphrase must not be empty (Solo uses it to derive the SQLCipher key)",
236        ));
237    }
238
239    validate_data_dir(&data_dir)?;
240
241    let config_path = data_dir.join("solo.config.toml");
242    let lock_path = data_dir.join("solo.lock");
243    let tenants_index_path = data_dir.join(TENANTS_INDEX_FILENAME);
244    let tenants_dir = data_dir.join(TENANTS_SUBDIR);
245    let legacy_db_path = data_dir.join("solo.db");
246    let new_default_db_path = tenants_dir.join("default.db");
247
248    // Detect existing layout. Three cases:
249    //   - v0.7.1: solo.db at root + (likely) solo.config.toml at root +
250    //             no tenants_index.db
251    //   - v0.8.0: tenants_index.db at root (regardless of whether
252    //             tenants/default.db is present — `pending_migration`
253    //             status covers the transient state)
254    //   - fresh:  none of the above
255    let has_v071_db = legacy_db_path.is_file();
256    let has_v080_index = tenants_index_path.is_file();
257    let has_config = config_path.is_file();
258
259    // Make sure the parent dir exists before we try to acquire the lockfile.
260    std::fs::create_dir_all(&data_dir)
261        .map_err(|e| Error::storage(format!("create data dir {}: {e}", data_dir.display())))?;
262
263    let _lock = Lockfile::acquire(&lock_path)?;
264
265    if has_v080_index && !force {
266        return Err(Error::conflict(format!(
267            "data directory is already initialized (v0.8.0 layout): {}\n\
268             Re-run with --force to wipe and re-initialize \
269             (DESTRUCTIVE — all stored memories will be lost).",
270            data_dir.display()
271        )));
272    }
273
274    if has_v080_index && force {
275        // --force: wipe everything Solo owns + start over with a fresh
276        // salt. Equivalent semantics to the pre-v0.8.0 --force path.
277        wipe_solo_owned_files(&data_dir)?;
278        // Fall through into the fresh-install branch below.
279    } else if has_v071_db && !has_v080_index {
280        // v0.7.1 → v0.8.0 upgrade. The existing config + key MUST still
281        // be valid; refuse if no config is present, because we have no
282        // salt to derive the key.
283        if !has_config {
284            return Err(Error::conflict(format!(
285                "data dir has a v0.7.1 solo.db but no solo.config.toml: {}\n\
286                 Cannot upgrade in place without the persisted salt. \
287                 Either restore the missing config or use --force to wipe.",
288                data_dir.display()
289            )));
290        }
291        if force {
292            // --force against a v0.7.1 install: still wipe + start over,
293            // matching the pre-v0.8.0 semantics. The user explicitly
294            // asked for a destructive re-init.
295            wipe_solo_owned_files(&data_dir)?;
296            // Fall through into fresh-install branch.
297        } else {
298            // In-place upgrade. Read the existing config to recover the
299            // salt; derive the key from the user-supplied passphrase +
300            // that salt. The key must successfully decrypt the existing
301            // solo.db, otherwise the user typed the wrong passphrase.
302            let cfg = SoloConfig::read(&config_path)
303                .map_err(|e| Error::storage(format!("read config for v0.7.1 upgrade: {e}")))?;
304            let salt = cfg.salt_bytes()?;
305            let key = KeyMaterial::derive(&passphrase, &salt)?;
306
307            // Run the mass-data-move helper. Idempotent + crash-recoverable.
308            migrate_v071_to_v080(&data_dir, &key)?;
309
310            // Smoke-test: open the migrated DB and read schema_migrations
311            // to confirm the same key still decrypts it. If the user
312            // typed the wrong passphrase, this surfaces the failure
313            // here, not silently when the daemon next boots.
314            let conn = open_sqlcipher(&new_default_db_path, &key)?;
315            let schema_version: u32 = conn
316                .query_row("SELECT MAX(version) FROM schema_migrations", [], |row| {
317                    row.get(0)
318                })
319                .map_err(|e| {
320                    Error::storage(format!("verify v0.7.1 upgrade cipher round-trip: {e}"))
321                })?;
322            drop(conn);
323
324            // tenants_index.db's own migration version — informational
325            // only, but the InitOutcome surfaces it.
326            let tenants_index_version = {
327                let conn = open_sqlcipher(&tenants_index_path, &key)?;
328                migration::current_tenants_index_version(&conn)?
329            };
330
331            return Ok(InitOutcome {
332                data_dir,
333                db_path: new_default_db_path,
334                config_path,
335                schema_version,
336                tenants_index_path,
337                tenants_index_schema_version: tenants_index_version,
338                upgraded_from_v071: true,
339            });
340        }
341    }
342
343    // ---- Fresh install (or post-`force` wipe) ----
344    //
345    // We arrive here when the data dir has no v0.7.1 / v0.8.0 markers,
346    // OR when `--force` wiped them above.
347    let salt = KeyMaterial::fresh_salt()?;
348    let key = KeyMaterial::derive(&passphrase, &salt)?;
349
350    // Create the per-tenant subdir.
351    std::fs::create_dir_all(&tenants_dir).map_err(|e| {
352        Error::storage(format!(
353            "create tenants subdir {}: {e}",
354            tenants_dir.display()
355        ))
356    })?;
357
358    // Create tenants_index.db + apply migration 0004.
359    let mut index = TenantsIndex::open(&data_dir, &key)?;
360
361    // Create the per-tenant default.db + apply migrations 0001-0003.
362    let mut tenant_conn = open_sqlcipher(&new_default_db_path, &key)?;
363    let schema_version = migration::run_migrations(&mut tenant_conn)?;
364
365    // Cipher round-trip smoke check on the per-tenant DB.
366    drop(tenant_conn);
367    let conn2 = open_sqlcipher(&new_default_db_path, &key)?;
368    let highest: u32 = conn2
369        .query_row("SELECT MAX(version) FROM schema_migrations", [], |row| {
370            row.get(0)
371        })
372        .map_err(|e| Error::storage(format!("verify cipher round-trip (default tenant): {e}")))?;
373    drop(conn2);
374    if highest != schema_version {
375        return Err(Error::storage(format!(
376            "cipher round-trip read drift (default tenant): wrote {schema_version}, read {highest}"
377        )));
378    }
379
380    // Register the default tenant. Idempotent against re-runs, but on a
381    // fresh install this is the row's first insertion.
382    let default_id = TenantId::default_tenant();
383    if index.lookup(&default_id)?.is_none() {
384        index.register_with_status(
385            &default_id,
386            "default.db",
387            Some("Default tenant"),
388            TenantStatus::Active,
389        )?;
390    }
391
392    // tenants_index.db schema version — for the outcome struct.
393    let tenants_index_version = migration::current_tenants_index_version(index.connection())?;
394    drop(index);
395
396    // Persist config. v0.9.0 P1: write an env-detected `[llm]` default
397    // (BLOCKER 2 resolution): `ANTHROPIC_API_KEY` set → mode = "anthropic";
398    // otherwise → mode = "none". Operators who want OpenAI / Ollama /
399    // MCP-sampling edit the file post-init. v0.10.0 will retire the
400    // env-var-only fallback path; landing the explicit default during
401    // `solo init` is the migration ramp.
402    let mut cfg = SoloConfig::new(salt, embedder);
403    cfg.llm = Some(default_llm_settings_from_env());
404    cfg.write(&config_path)?;
405
406    Ok(InitOutcome {
407        data_dir,
408        db_path: new_default_db_path,
409        config_path,
410        schema_version,
411        tenants_index_path,
412        tenants_index_schema_version: tenants_index_version,
413        upgraded_from_v071: false,
414    })
415}
416
417/// Open a SQLCipher database, bind the raw key, and set the journal-mode +
418/// foreign-keys pragmas. Used by `init` and exposed for tests.
419pub fn open_sqlcipher(db_path: &Path, key: &KeyMaterial) -> Result<Connection> {
420    let conn = Connection::open(db_path)
421        .map_err(|e| Error::storage(format!("open {}: {e}", db_path.display())))?;
422    // PRAGMA key MUST be the first statement on a fresh connection.
423    // `as_hex()` returns Zeroizing<String>; wrap the formatted PRAGMA in
424    // Zeroizing<String> so the raw key bytes are wiped on drop rather
425    // than lingering in the heap until the allocator reuses the region.
426    let key_pragma: zeroize::Zeroizing<String> = {
427        let hex = key.as_hex();
428        zeroize::Zeroizing::new(format!("PRAGMA key = \"x'{}'\"", &*hex))
429    };
430    conn.execute_batch(&key_pragma)
431        .map_err(|e| Error::storage(format!("PRAGMA key: {e}")))?;
432    // Standard pragmas. journal_mode=wal returns the new mode as a row, so we
433    // use query_row; the others execute fine via execute_batch.
434    let mode: String = conn
435        .query_row("PRAGMA journal_mode = wal", [], |row| row.get(0))
436        .map_err(|e| Error::storage(format!("set journal_mode=wal: {e}")))?;
437    if mode.to_lowercase() != "wal" {
438        return Err(Error::storage(format!(
439            "expected WAL journal mode, got {mode}"
440        )));
441    }
442    conn.execute_batch(
443        "PRAGMA foreign_keys = ON;
444         PRAGMA busy_timeout = 5000;
445         PRAGMA synchronous = NORMAL;",
446    )
447    .map_err(|e| Error::storage(format!("set startup pragmas: {e}")))?;
448    Ok(conn)
449}
450
451fn wipe_solo_owned_files(data_dir: &Path) -> Result<()> {
452    if !data_dir.exists() {
453        return Ok(());
454    }
455    // Root-level files (legacy v0.7.1 + v0.8.0 top-level).
456    for name in SOLO_OWNED_FILES_ROOT {
457        let p = data_dir.join(name);
458        if p.is_file() {
459            std::fs::remove_file(&p)
460                .map_err(|e| Error::storage(format!("remove {}: {e}", p.display())))?;
461        }
462    }
463    // v0.8.0 per-tenant subdir — everything inside, then the directory.
464    // We don't use a recursive remove of arbitrary subdirs (defensive against
465    // operator surgery that might have nested unrelated state under the
466    // data dir); we only touch the explicit `tenants/` subdir Solo owns.
467    let tenants = data_dir.join(TENANTS_SUBDIR);
468    if tenants.is_dir() {
469        for entry in std::fs::read_dir(&tenants)
470            .map_err(|e| Error::storage(format!("read tenants dir {}: {e}", tenants.display())))?
471        {
472            let entry = entry.map_err(|e| {
473                Error::storage(format!("scan tenants dir {}: {e}", tenants.display()))
474            })?;
475            let p = entry.path();
476            if p.is_file() {
477                std::fs::remove_file(&p)
478                    .map_err(|e| Error::storage(format!("remove {}: {e}", p.display())))?;
479            }
480        }
481        // Best-effort rmdir — leave the dir if some non-Solo content sneaked in.
482        let _ = std::fs::remove_dir(&tenants);
483    }
484    Ok(())
485}
486
487#[cfg(test)]
488mod tests {
489    use super::*;
490    use tempfile::TempDir;
491
492    // v0.9.1 P1 Fix 2: env vars are process-global mutable state and
493    // every test that touches `ANTHROPIC_API_KEY` / `OPENAI_API_KEY` in
494    // the `solo-storage` test binary serializes on a single shared
495    // mutex. Lifted into `test_support::LLM_ENV_LOCK` so the sibling
496    // module `crate::llm::anthropic::tests` (which also mutates these
497    // vars) shares the same lock. Previously the lock lived here at
498    // module scope, so `init::tests` and `anthropic::tests` raced and
499    // `init_writes_llm_anthropic_when_env_key_present` was flaky under
500    // `cargo test --workspace`.
501    use crate::test_support::LLM_ENV_LOCK as ENV_LOCK;
502
503    /// Clears LLM-related env vars for a clean baseline. SAFETY: the
504    /// caller must hold ENV_LOCK so no other thread is racing.
505    struct LlmEnvGuard;
506    impl Drop for LlmEnvGuard {
507        fn drop(&mut self) {
508            for k in ["ANTHROPIC_API_KEY", "OPENAI_API_KEY"] {
509                unsafe { std::env::remove_var(k) };
510            }
511        }
512    }
513    fn fresh_llm_env() -> LlmEnvGuard {
514        for k in ["ANTHROPIC_API_KEY", "OPENAI_API_KEY"] {
515            unsafe { std::env::remove_var(k) };
516        }
517        LlmEnvGuard
518    }
519
520    fn fixture_params(dir: &Path) -> InitParams {
521        InitParams {
522            data_dir: dir.to_path_buf(),
523            passphrase: Zeroizing::new("correct horse battery staple".into()),
524            force: false,
525            embedder: default_embedder(),
526        }
527    }
528
529    #[test]
530    fn happy_path_creates_db_and_config() {
531        let tmp = TempDir::new().unwrap();
532        let dir = tmp.path().join("solo-data");
533        let outcome = init(fixture_params(&dir)).expect("init should succeed");
534        assert_eq!(outcome.data_dir, dir);
535        assert!(outcome.db_path.exists(), "default.db must exist");
536        assert!(outcome.config_path.exists(), "solo.config.toml must exist");
537        // v0.11.1 raises the per-tenant chain to version 10
538        // (contradiction lifecycle), after the v0.8.x audit,
539        // principal-attribution, and triples-source migrations.
540        assert_eq!(outcome.schema_version, 10);
541        // v0.8.0 layout: db_path lives under tenants/, not at root.
542        assert_eq!(outcome.db_path, dir.join("tenants").join("default.db"));
543        // tenants_index.db present + at the current head version.
544        assert!(outcome.tenants_index_path.is_file());
545        // v0.9.0 P1 advanced the tenants_index chain to 9
546        // (last_accessed column, closing v0.8.0 doc-vs-code gap).
547        // Previously: v0.8.1 P3 took it to 8 (quota_bytes); v0.8.0 had 4.
548        assert_eq!(outcome.tenants_index_schema_version, 9);
549        assert!(!outcome.upgraded_from_v071);
550        // Legacy v0.7.1 file must NOT be created on fresh install.
551        assert!(!dir.join("solo.db").exists());
552        // Lockfile should be cleaned up on the success path (RAII drop).
553        assert!(!dir.join("solo.lock").exists(), "lockfile must be removed");
554    }
555
556    #[test]
557    fn fresh_install_registers_default_tenant_active() {
558        let tmp = TempDir::new().unwrap();
559        let dir = tmp.path().join("solo-data");
560        let outcome = init(fixture_params(&dir)).unwrap();
561
562        // Re-open the tenants_index and confirm exactly one Active default.
563        let cfg = SoloConfig::read(&outcome.config_path).unwrap();
564        let salt = cfg.salt_bytes().unwrap();
565        let key = KeyMaterial::derive("correct horse battery staple", &salt).unwrap();
566        let idx = crate::tenants::TenantsIndex::open(&dir, &key).unwrap();
567        let listed = idx.list().unwrap();
568        assert_eq!(listed.len(), 1);
569        assert_eq!(listed[0].tenant_id, TenantId::default_tenant());
570        assert_eq!(listed[0].status, crate::tenants::TenantStatus::Active);
571        assert_eq!(listed[0].db_filename, "default.db");
572    }
573
574    #[test]
575    fn config_round_trips_salt_correctly() {
576        let tmp = TempDir::new().unwrap();
577        let dir = tmp.path().join("solo-data");
578        let outcome = init(fixture_params(&dir)).unwrap();
579        let cfg = SoloConfig::read(&outcome.config_path).unwrap();
580        let salt = cfg.salt_bytes().unwrap();
581        // Re-derive the key from the persisted salt + the same passphrase;
582        // open the DB; should succeed.
583        let key = KeyMaterial::derive("correct horse battery staple", &salt).unwrap();
584        let conn = open_sqlcipher(&outcome.db_path, &key).unwrap();
585        let v: u32 = conn
586            .query_row("SELECT MAX(version) FROM schema_migrations", [], |row| {
587                row.get(0)
588            })
589            .unwrap();
590        // v0.8.1 P1 advanced per-tenant chain to version 7
591        // (`triples.source_episode_id` FK for GDPR cascade); v0.8.0
592        // P5+P6 had taken us to 6 (principal-attribution columns); 5
593        // was audit_events. Version 4 lives in tenants_index.db (P1
594        // registry), not here — the numbering skip is intentional, see
595        // migration::MIGRATIONS.
596        assert_eq!(v, 10);
597    }
598
599    #[test]
600    #[ignore = "requires SQLCipher: under plain bundled SQLite, PRAGMA key is a no-op so wrong keys silently succeed. Run with the workspace's bundled-sqlcipher-vendored-openssl feature: `cargo test -p solo-storage -- --include-ignored`"]
601    fn wrong_passphrase_fails_to_open() {
602        let tmp = TempDir::new().unwrap();
603        let dir = tmp.path().join("solo-data");
604        let outcome = init(fixture_params(&dir)).unwrap();
605        let cfg = SoloConfig::read(&outcome.config_path).unwrap();
606        let salt = cfg.salt_bytes().unwrap();
607        let bad_key = KeyMaterial::derive("WRONG PASSPHRASE", &salt).unwrap();
608        // open_sqlcipher itself only sets pragmas; the actual decryption
609        // failure surfaces on the first real query.
610        let conn = open_sqlcipher(&outcome.db_path, &bad_key);
611        let conn = match conn {
612            Ok(c) => c,
613            Err(_) => return, // failed at PRAGMA stage — also acceptable.
614        };
615        let res: rusqlite::Result<u32> =
616            conn.query_row("SELECT MAX(version) FROM schema_migrations", [], |row| {
617                row.get(0)
618            });
619        assert!(res.is_err(), "wrong passphrase must fail to read");
620    }
621
622    #[test]
623    fn second_init_without_force_refuses() {
624        let tmp = TempDir::new().unwrap();
625        let dir = tmp.path().join("solo-data");
626        init(fixture_params(&dir)).unwrap();
627        let err = init(fixture_params(&dir)).unwrap_err();
628        assert!(
629            matches!(err, Error::Conflict(_)),
630            "expected Conflict, got {err:?}"
631        );
632        assert!(err.to_string().contains("already initialized"));
633    }
634
635    #[test]
636    fn force_wipes_and_re_inits() {
637        let tmp = TempDir::new().unwrap();
638        let dir = tmp.path().join("solo-data");
639        let first = init(fixture_params(&dir)).unwrap();
640        let first_cfg = SoloConfig::read(&first.config_path).unwrap();
641
642        let mut params = fixture_params(&dir);
643        params.force = true;
644        let second = init(params).unwrap();
645        let second_cfg = SoloConfig::read(&second.config_path).unwrap();
646
647        // A new salt should have been generated; same passphrase => different
648        // derived key. Compare salts directly.
649        assert_ne!(first_cfg.salt_hex, second_cfg.salt_hex);
650    }
651
652    /// Regression: SOLO_OWNED_FILES had stale HNSW filenames
653    /// (`hnsw_episodes.bin`, `.graph`, `.data`, etc.) that didn't match
654    /// the current snapshot module's actual output (`.hnsw.data`,
655    /// `.hnsw.graph` on the live/_bak/_tmp basenames). `solo init --force`
656    /// would skip wiping those files, leaving stale data after re-init.
657    /// This test plants snapshot files using the current naming scheme
658    /// then verifies --force removes them all.
659    #[test]
660    fn force_wipes_current_hnsw_snapshot_files() {
661        let tmp = TempDir::new().unwrap();
662        let dir = tmp.path().join("solo-data");
663        let _ = init(fixture_params(&dir)).unwrap();
664
665        // Plant snapshot files using the names snapshot.rs actually writes.
666        let planted = [
667            "hnsw_episodes.hnsw.data",
668            "hnsw_episodes.hnsw.graph",
669            "hnsw_episodes_bak.hnsw.data",
670            "hnsw_episodes_bak.hnsw.graph",
671            "hnsw_episodes_tmp.hnsw.data",
672            "hnsw_episodes_tmp.hnsw.graph",
673        ];
674        for name in &planted {
675            std::fs::write(dir.join(name), b"stale snapshot data").unwrap();
676        }
677
678        let mut params = fixture_params(&dir);
679        params.force = true;
680        let _ = init(params).unwrap();
681
682        // All planted files must be gone after --force.
683        for name in &planted {
684            let p = dir.join(name);
685            assert!(
686                !p.exists(),
687                "{} should have been wiped by --force",
688                p.display()
689            );
690        }
691    }
692
693    #[test]
694    fn empty_passphrase_rejected() {
695        let tmp = TempDir::new().unwrap();
696        let mut params = fixture_params(tmp.path());
697        params.passphrase.clear();
698        let err = init(params).unwrap_err();
699        assert!(matches!(err, Error::InvalidInput(_)), "got: {err:?}");
700    }
701
702    /// Simulate a v0.7.1 install (`solo.db` + `solo.config.toml` at root,
703    /// no `tenants_index.db`), then run `init()` against it. The upgrade
704    /// path should kick in: `solo.db` moves into `tenants/default.db`,
705    /// the registry is created with the default tenant active, and the
706    /// outcome reports `upgraded_from_v071 = true`.
707    #[test]
708    fn init_upgrades_v071_install_in_place() {
709        let tmp = TempDir::new().unwrap();
710        let dir = tmp.path().join("solo-data");
711        std::fs::create_dir_all(&dir).unwrap();
712
713        // Plant a v0.7.1 layout: persist a SoloConfig with a known salt,
714        // write a SQLCipher solo.db at the root that opens under the
715        // passphrase + that salt.
716        let passphrase = "v071-upgrade-passphrase";
717        let salt = KeyMaterial::fresh_salt().unwrap();
718        let key = KeyMaterial::derive(passphrase, &salt).unwrap();
719        let cfg = SoloConfig::new(salt, default_embedder());
720        cfg.write(&dir.join("solo.config.toml")).unwrap();
721        let legacy_db = dir.join("solo.db");
722        let mut conn = open_sqlcipher(&legacy_db, &key).unwrap();
723        migration::run_migrations(&mut conn).unwrap();
724        drop(conn);
725
726        // Run init with the same passphrase + force=false.
727        let outcome = init(InitParams {
728            data_dir: dir.clone(),
729            passphrase: Zeroizing::new(passphrase.into()),
730            force: false,
731            embedder: default_embedder(),
732        })
733        .unwrap();
734
735        assert!(outcome.upgraded_from_v071);
736        assert_eq!(outcome.db_path, dir.join("tenants").join("default.db"));
737        assert!(outcome.db_path.is_file());
738        assert!(outcome.tenants_index_path.is_file());
739        // v0.9.0 P1 advanced the tenants_index chain to 9
740        // (last_accessed column). Previously: 8 (quota_bytes, v0.8.1 P3).
741        assert_eq!(outcome.tenants_index_schema_version, 9);
742        // Legacy file gone.
743        assert!(!legacy_db.exists());
744        // v0.8.1 P1: per-tenant chain advances to 7 on first open
745        // (migrations 0005 + 0006 + 0007 all run even on v0.7.1
746        // upgrades — the audit table, principal-attribution columns,
747        // and triples.source_episode_id are all additive).
748        assert_eq!(outcome.schema_version, 10);
749    }
750
751    /// v0.7.1 upgrade with wrong passphrase must fail because the
752    /// SQLCipher open of the migrated default.db won't decrypt.
753    #[test]
754    #[ignore = "requires SQLCipher: under plain bundled SQLite, PRAGMA key is a no-op so wrong keys silently succeed."]
755    fn init_v071_upgrade_with_wrong_passphrase_errors() {
756        let tmp = TempDir::new().unwrap();
757        let dir = tmp.path().join("solo-data");
758        std::fs::create_dir_all(&dir).unwrap();
759
760        // v0.7.1 plant under the GOOD passphrase.
761        let salt = KeyMaterial::fresh_salt().unwrap();
762        let key = KeyMaterial::derive("right-passphrase", &salt).unwrap();
763        let cfg = SoloConfig::new(salt, default_embedder());
764        cfg.write(&dir.join("solo.config.toml")).unwrap();
765        let legacy_db = dir.join("solo.db");
766        let mut conn = open_sqlcipher(&legacy_db, &key).unwrap();
767        migration::run_migrations(&mut conn).unwrap();
768        drop(conn);
769
770        // Call init() with the WRONG passphrase. The move itself may
771        // succeed (rename doesn't decrypt), but the post-move smoke open
772        // of the migrated default.db must fail.
773        let err = init(InitParams {
774            data_dir: dir,
775            passphrase: Zeroizing::new("wrong-passphrase".into()),
776            force: false,
777            embedder: default_embedder(),
778        })
779        .unwrap_err();
780        assert!(
781            matches!(err, Error::Storage(_)),
782            "wrong passphrase must surface as a Storage error, got {err:?}"
783        );
784    }
785
786    // ----------------------------------------------------------------
787    // v0.9.0 P1: init writes an env-detected `[llm]` default
788    // ----------------------------------------------------------------
789
790    #[test]
791    fn init_writes_llm_none_when_no_env_key_present() {
792        let _lock = ENV_LOCK.lock().unwrap_or_else(|p| p.into_inner());
793        let _g = fresh_llm_env();
794        let tmp = TempDir::new().unwrap();
795        let dir = tmp.path().join("solo-data");
796        let outcome = init(fixture_params(&dir)).expect("init should succeed");
797        let cfg = SoloConfig::read(&outcome.config_path).unwrap();
798        assert_eq!(
799            cfg.llm,
800            Some(LlmSettings::None),
801            "no ANTHROPIC_API_KEY in env → init writes [llm] mode = \"none\""
802        );
803    }
804
805    #[test]
806    fn init_writes_llm_anthropic_when_env_key_present() {
807        let _lock = ENV_LOCK.lock().unwrap_or_else(|p| p.into_inner());
808        let _g = fresh_llm_env();
809        // SAFETY: ENV_LOCK held; LlmEnvGuard cleans up on drop.
810        unsafe { std::env::set_var("ANTHROPIC_API_KEY", "sk-ant-test-fixture") };
811        let tmp = TempDir::new().unwrap();
812        let dir = tmp.path().join("solo-data");
813        let outcome = init(fixture_params(&dir)).expect("init should succeed");
814        let cfg = SoloConfig::read(&outcome.config_path).unwrap();
815        match cfg.llm {
816            Some(LlmSettings::Anthropic {
817                ref api_key_env,
818                ref model,
819            }) => {
820                assert_eq!(api_key_env, "ANTHROPIC_API_KEY");
821                assert_eq!(model, "claude-sonnet-4-6");
822            }
823            other => panic!("expected Anthropic variant from env-detected default, got {other:?}"),
824        }
825    }
826
827    #[test]
828    fn default_llm_settings_from_env_picks_anthropic_when_set() {
829        let _lock = ENV_LOCK.lock().unwrap_or_else(|p| p.into_inner());
830        let _g = fresh_llm_env();
831        unsafe { std::env::set_var("ANTHROPIC_API_KEY", "sk-ant-fixture") };
832        match default_llm_settings_from_env() {
833            LlmSettings::Anthropic { api_key_env, .. } => {
834                assert_eq!(api_key_env, "ANTHROPIC_API_KEY");
835            }
836            other => panic!("expected Anthropic, got {other:?}"),
837        }
838    }
839
840    #[test]
841    fn default_llm_settings_from_env_returns_none_when_empty_value() {
842        let _lock = ENV_LOCK.lock().unwrap_or_else(|p| p.into_inner());
843        let _g = fresh_llm_env();
844        // Empty value should NOT trigger Anthropic — operators sometimes
845        // set vars to "" to mean "leave default" (especially under bash
846        // / nix-shell). Mirrors the env_trimmed helper in solo-steward.
847        unsafe { std::env::set_var("ANTHROPIC_API_KEY", "") };
848        assert_eq!(default_llm_settings_from_env(), LlmSettings::None);
849    }
850
851    #[test]
852    fn default_llm_settings_from_env_returns_none_when_unset() {
853        let _lock = ENV_LOCK.lock().unwrap_or_else(|p| p.into_inner());
854        let _g = fresh_llm_env();
855        assert_eq!(default_llm_settings_from_env(), LlmSettings::None);
856    }
857
858    #[test]
859    fn cloud_sync_path_rejected() {
860        // We don't actually create files; validate_data_dir runs first.
861        let placeholder = std::env::temp_dir().join("solo-init-cloud-test");
862        let mut params = fixture_params(&placeholder);
863        // Force a cloud-sync component into the path. Must be absolute on
864        // both Unix and Windows so validate_data_dir's absolute-path check
865        // doesn't short-circuit before the cloud-sync check we want to
866        // exercise.
867        #[cfg(windows)]
868        let cloud = std::path::PathBuf::from(r"C:\Users\x\Dropbox\solo");
869        #[cfg(not(windows))]
870        let cloud = std::path::PathBuf::from("/Users/x/Dropbox/solo");
871        params.data_dir = cloud;
872        let err = init(params).unwrap_err();
873        assert!(err.to_string().contains("cloud-sync"), "got: {err}");
874    }
875}