solo_storage/init.rs
1// SPDX-License-Identifier: Apache-2.0
2
3//! `solo init`: create a fresh Solo data directory.
4//!
5//! The orchestrator wires together every primitive in this crate:
6//!
7//! 1. `path_validation::validate_data_dir` — refuse cloud-sync folders.
8//! 2. Detect existing init state.
9//! - If a v0.7.1 single-DB layout is present (`solo.db` at the data
10//! dir root, no `tenants_index.db`), this is an upgrade — invoke
11//! the v0.7.1 → v0.8.0 mass-data-move helper instead of refusing.
12//! - If a v0.8.0 per-tenant layout is already in place
13//! (`tenants_index.db` exists, with at least the default tenant
14//! registered), refuse with Conflict unless `force = true`.
15//! - On `force`, wipe Solo-owned files in the data dir (NOT the dir
16//! itself, in case the user keeps other stuff there) and proceed
17//! to fresh-install creation.
18//! 3. Create the data directory.
19//! 4. Acquire `solo.lock` (RAII) so a parallel `solo init` or `solo daemon`
20//! can't race us.
21//! 5. Generate a fresh 16-byte salt, derive the SQLCipher key via Argon2id.
22//! 6. Create `tenants/` subdir + `tenants_index.db` (apply migration 0004)
23//! + per-tenant `tenants/default.db` (apply migrations 0001-0003) +
24//! register the default tenant in the index.
25//! 7. Write `solo.config.toml` (salt + embedder identity).
26//! 8. Drop the lockfile (RAII — also runs on any error path between steps).
27//!
28//! On any error after the data dir is created, the partial state on disk is
29//! left for inspection. The caller can re-run with `--force` to wipe and
30//! retry.
31//!
32//! ## Layout (v0.8.0)
33//!
34//! After a successful `init` on a fresh install:
35//!
36//! ```text
37//! <data_dir>/
38//! tenants_index.db -- registry (SQLCipher; same key as below)
39//! tenants/
40//! default.db -- per-tenant SQLCipher DB
41//! default.db-wal
42//! default.db-shm
43//! solo.config.toml
44//! ```
45//!
46//! The `solo.db` at the data dir root that v0.7.1 used is no longer created.
47//! Existing v0.7.1 data dirs are upgraded into the new layout on the first
48//! `solo init`-or-daemon-boot call by the mass-data-move helper.
49
50use rusqlite::Connection;
51use solo_core::{Embedder, Error, Result, TenantId};
52use std::path::{Path, PathBuf};
53use zeroize::Zeroizing;
54
55use crate::{
56 config::{EmbedderConfig, LlmSettings, SoloConfig},
57 key_material::KeyMaterial,
58 lockfile::Lockfile,
59 migration,
60 path_validation::validate_data_dir,
61 tenants::{
62 TENANTS_INDEX_FILENAME, TENANTS_SUBDIR, TenantStatus, TenantsIndex, migrate_v071_to_v080,
63 },
64};
65
66/// Default data dir: `~/.solo/`. Honors the home-dir resolution `dirs` crate
67/// performs (Windows: `%USERPROFILE%`; Unix: `$HOME`). Returns `None` if no
68/// home directory can be found.
69pub fn default_data_dir() -> Option<PathBuf> {
70 dirs::home_dir().map(|h| h.join(".solo"))
71}
72
73/// File names at the data dir root that Solo owns. `--force` removes these,
74/// and v0.8.0 layout detection looks at them. Anything else in the dir is
75/// left untouched.
76///
77/// HNSW snapshot filenames are derived from the basenames in
78/// `crate::snapshot` (`hnsw_episodes`, `hnsw_episodes_bak`, `hnsw_episodes_tmp`)
79/// + the suffixes hnsw_rs's `file_dump` writes (`.hnsw.data`, `.hnsw.graph`).
80/// Keep this list in sync with `snapshot::{LIVE_BASENAME, BAK_BASENAME,
81/// TMP_BASENAME}` if those ever change.
82///
83/// **Note**: v0.7.1 `solo.db` and HNSW snapshots are listed for `--force` wipe
84/// purposes (a `--force` re-init must clear them) AND for v0.7.1 install
85/// detection (the legacy `solo.db` at the root is the v0.7.1 marker). The
86/// v0.8.0 layout puts these files under `<data_dir>/tenants/` — they are
87/// wiped via a directory-tree walk in `wipe_solo_owned_files`.
88const SOLO_OWNED_FILES_ROOT: &[&str] = &[
89 // v0.7.1 single-DB layout (legacy; only present pre-migration). Listed
90 // first so a v0.7.1 install upgraded via mass-data-move clears any
91 // stragglers if the upgrade had to be aborted and retried with --force.
92 "solo.db",
93 "solo.db-wal",
94 "solo.db-shm",
95 // v0.7.1 HNSW snapshots at root (live + bak + tmp pairs).
96 "hnsw_episodes.hnsw.data",
97 "hnsw_episodes.hnsw.graph",
98 "hnsw_episodes_bak.hnsw.data",
99 "hnsw_episodes_bak.hnsw.graph",
100 "hnsw_episodes_tmp.hnsw.data",
101 "hnsw_episodes_tmp.hnsw.graph",
102 // Top-level Solo files (still at root in v0.8.0).
103 "solo.config.toml",
104 "solo.config.toml.tmp",
105 "solo.lock",
106 // v0.8.0 tenant registry.
107 TENANTS_INDEX_FILENAME,
108 "tenants_index.db-wal",
109 "tenants_index.db-shm",
110];
111
112/// `solo init` parameters. Built by the CLI layer.
113#[derive(Debug, Clone)]
114pub struct InitParams {
115 /// Where to put the data dir. Created if missing.
116 pub data_dir: PathBuf,
117 /// Resolved passphrase, wrapped in `Zeroizing` so the buffer is wiped
118 /// when this struct drops. CLI layer reads it via prompt or env var.
119 pub passphrase: Zeroizing<String>,
120 /// If true, wipe Solo-owned files in `data_dir` before initializing.
121 pub force: bool,
122 /// Embedder identity to record in the config. For commit 1.1 this is the
123 /// BGE-M3 default; commit 1.4 (embedder loader) will produce it from the
124 /// loaded model.
125 pub embedder: EmbedderConfig,
126}
127
128/// Default embedder identity recorded in `solo.config.toml` when the
129/// CLI hasn't probed a real backend via
130/// [`crate::embedder::probe_embedder_config_from_env`].
131///
132/// In production, `solo init` always calls `probe_embedder_config_from_env`,
133/// which picks between Ollama (probes the real dim) and Stub (32-dim,
134/// deterministic). This function exists for test fixtures + downstream
135/// callers that want a parameterless identity for first-init flows; it
136/// returns the Stub identity, matching `StubEmbedder::default_stub()`
137/// (name=`stub`, version=`v1`, dim=32).
138///
139/// Historically this returned the BGE-M3 identity (BAAI/bge-m3, 1024-dim).
140/// BGE-M3 was removed in v0.6.0 — see `docs/dev-log/0071-v0.5.x-roadmap.md`
141/// Priority 9. Callers that need a deterministic non-stub identity for
142/// tests should build an `EmbedderConfig` literal directly.
143pub fn default_embedder() -> EmbedderConfig {
144 let stub = crate::embedder::StubEmbedder::default_stub();
145 EmbedderConfig {
146 name: stub.name().to_string(),
147 version: stub.version().to_string(),
148 dim: stub.dim() as u32,
149 dtype: "f32".into(),
150 }
151}
152
153/// v0.9.0 P1 (plan BLOCKER 2 resolution): pick the `[llm]` block default
154/// for a freshly-initialised data dir based on the surrounding env.
155///
156/// Precedence:
157/// 1. `ANTHROPIC_API_KEY` non-empty → `Anthropic` variant with
158/// `api_key_env = "ANTHROPIC_API_KEY"` and the plan's
159/// `claude-sonnet-4-6` default model.
160/// 2. (Future P1 follow-up may add `OPENAI_API_KEY` here; for v0.9.0
161/// P1 we keep the surface minimal — the operator edits the file if
162/// they want OpenAI, Ollama, or MCP-sampling.)
163/// 3. otherwise → `None` variant. The Steward runs cluster-only.
164///
165/// Empty values are treated as unset — guards against shells that set
166/// vars to the empty string to mean "leave default".
167pub fn default_llm_settings_from_env() -> LlmSettings {
168 fn env_non_empty(name: &str) -> bool {
169 std::env::var(name)
170 .map(|v| !v.trim().is_empty())
171 .unwrap_or(false)
172 }
173 if env_non_empty("ANTHROPIC_API_KEY") {
174 LlmSettings::Anthropic {
175 api_key_env: "ANTHROPIC_API_KEY".to_string(),
176 model: "claude-sonnet-4-6".to_string(),
177 }
178 } else {
179 LlmSettings::None
180 }
181}
182
183/// Outcome reported back to the CLI layer for human-readable success output.
184#[derive(Debug)]
185pub struct InitOutcome {
186 pub data_dir: PathBuf,
187 /// Per-tenant DB path. v0.8.0+: `<data_dir>/tenants/default.db`.
188 /// Pre-v0.8.0 callers consumed `<data_dir>/solo.db`; they continue to
189 /// work as long as they treat this purely as "the SQLCipher file to
190 /// open." P2 retires this field in favour of a `TenantHandle` per
191 /// tenant.
192 pub db_path: PathBuf,
193 pub config_path: PathBuf,
194 /// Highest applied per-tenant schema version. Equal to the highest
195 /// `version` in `tenants/default.db::schema_migrations`. **Not** the
196 /// same number as the tenants_index schema; that's tracked via
197 /// `tenants_index_schema_version` below.
198 pub schema_version: u32,
199 /// Path to the new v0.8.0 tenant registry at
200 /// `<data_dir>/tenants_index.db`. P2 callers open this to enumerate
201 /// the tenants present in the data dir.
202 pub tenants_index_path: PathBuf,
203 /// Highest applied version in tenants_index.db. As of v0.8.0 this is
204 /// `4` (the foundation migration). Tracked separately from the
205 /// per-tenant schema chain.
206 pub tenants_index_schema_version: u32,
207 /// True iff this `init` call upgraded a v0.7.1 layout in place (i.e.
208 /// `solo.db` was present at the root, and the mass-data-move helper
209 /// ran). False for fresh installs. Used by the CLI to print a
210 /// distinct success message.
211 pub upgraded_from_v071: bool,
212}
213
214/// Run `solo init`. See module docstring for the step list.
215///
216/// Branches:
217///
218/// * **Fresh install** (no `solo.db` at root, no `tenants_index.db`) —
219/// create the v0.8.0 per-tenant layout.
220/// * **v0.7.1 upgrade** (`solo.db` at root, no `tenants_index.db`) — run
221/// the mass-data-move helper, then load the existing config (no new
222/// salt — the user's existing passphrase + salt must still decrypt).
223/// * **Already v0.8.0** (`tenants_index.db` exists) — refuse with
224/// Conflict unless `force = true`. With `force`, wipe + re-init.
225pub fn init(params: InitParams) -> Result<InitOutcome> {
226 let InitParams {
227 data_dir,
228 passphrase,
229 force,
230 embedder,
231 } = params;
232
233 if passphrase.is_empty() {
234 return Err(Error::invalid_input(
235 "passphrase must not be empty (Solo uses it to derive the SQLCipher key)",
236 ));
237 }
238
239 validate_data_dir(&data_dir)?;
240
241 let config_path = data_dir.join("solo.config.toml");
242 let lock_path = data_dir.join("solo.lock");
243 let tenants_index_path = data_dir.join(TENANTS_INDEX_FILENAME);
244 let tenants_dir = data_dir.join(TENANTS_SUBDIR);
245 let legacy_db_path = data_dir.join("solo.db");
246 let new_default_db_path = tenants_dir.join("default.db");
247
248 // Detect existing layout. Three cases:
249 // - v0.7.1: solo.db at root + (likely) solo.config.toml at root +
250 // no tenants_index.db
251 // - v0.8.0: tenants_index.db at root (regardless of whether
252 // tenants/default.db is present — `pending_migration`
253 // status covers the transient state)
254 // - fresh: none of the above
255 let has_v071_db = legacy_db_path.is_file();
256 let has_v080_index = tenants_index_path.is_file();
257 let has_config = config_path.is_file();
258
259 // Make sure the parent dir exists before we try to acquire the lockfile.
260 std::fs::create_dir_all(&data_dir)
261 .map_err(|e| Error::storage(format!("create data dir {}: {e}", data_dir.display())))?;
262
263 let _lock = Lockfile::acquire(&lock_path)?;
264
265 if has_v080_index && !force {
266 return Err(Error::conflict(format!(
267 "data directory is already initialized (v0.8.0 layout): {}\n\
268 Re-run with --force to wipe and re-initialize \
269 (DESTRUCTIVE — all stored memories will be lost).",
270 data_dir.display()
271 )));
272 }
273
274 if has_v080_index && force {
275 // --force: wipe everything Solo owns + start over with a fresh
276 // salt. Equivalent semantics to the pre-v0.8.0 --force path.
277 wipe_solo_owned_files(&data_dir)?;
278 // Fall through into the fresh-install branch below.
279 } else if has_v071_db && !has_v080_index {
280 // v0.7.1 → v0.8.0 upgrade. The existing config + key MUST still
281 // be valid; refuse if no config is present, because we have no
282 // salt to derive the key.
283 if !has_config {
284 return Err(Error::conflict(format!(
285 "data dir has a v0.7.1 solo.db but no solo.config.toml: {}\n\
286 Cannot upgrade in place without the persisted salt. \
287 Either restore the missing config or use --force to wipe.",
288 data_dir.display()
289 )));
290 }
291 if force {
292 // --force against a v0.7.1 install: still wipe + start over,
293 // matching the pre-v0.8.0 semantics. The user explicitly
294 // asked for a destructive re-init.
295 wipe_solo_owned_files(&data_dir)?;
296 // Fall through into fresh-install branch.
297 } else {
298 // In-place upgrade. Read the existing config to recover the
299 // salt; derive the key from the user-supplied passphrase +
300 // that salt. The key must successfully decrypt the existing
301 // solo.db, otherwise the user typed the wrong passphrase.
302 let cfg = SoloConfig::read(&config_path)
303 .map_err(|e| Error::storage(format!("read config for v0.7.1 upgrade: {e}")))?;
304 let salt = cfg.salt_bytes()?;
305 let key = KeyMaterial::derive(&passphrase, &salt)?;
306
307 // Run the mass-data-move helper. Idempotent + crash-recoverable.
308 migrate_v071_to_v080(&data_dir, &key)?;
309
310 // Smoke-test: open the migrated DB and read schema_migrations
311 // to confirm the same key still decrypts it. If the user
312 // typed the wrong passphrase, this surfaces the failure
313 // here, not silently when the daemon next boots.
314 let conn = open_sqlcipher(&new_default_db_path, &key)?;
315 let schema_version: u32 = conn
316 .query_row("SELECT MAX(version) FROM schema_migrations", [], |row| {
317 row.get(0)
318 })
319 .map_err(|e| {
320 Error::storage(format!("verify v0.7.1 upgrade cipher round-trip: {e}"))
321 })?;
322 drop(conn);
323
324 // tenants_index.db's own migration version — informational
325 // only, but the InitOutcome surfaces it.
326 let tenants_index_version = {
327 let conn = open_sqlcipher(&tenants_index_path, &key)?;
328 migration::current_tenants_index_version(&conn)?
329 };
330
331 return Ok(InitOutcome {
332 data_dir,
333 db_path: new_default_db_path,
334 config_path,
335 schema_version,
336 tenants_index_path,
337 tenants_index_schema_version: tenants_index_version,
338 upgraded_from_v071: true,
339 });
340 }
341 }
342
343 // ---- Fresh install (or post-`force` wipe) ----
344 //
345 // We arrive here when the data dir has no v0.7.1 / v0.8.0 markers,
346 // OR when `--force` wiped them above.
347 let salt = KeyMaterial::fresh_salt()?;
348 let key = KeyMaterial::derive(&passphrase, &salt)?;
349
350 // Create the per-tenant subdir.
351 std::fs::create_dir_all(&tenants_dir).map_err(|e| {
352 Error::storage(format!(
353 "create tenants subdir {}: {e}",
354 tenants_dir.display()
355 ))
356 })?;
357
358 // Create tenants_index.db + apply migration 0004.
359 let mut index = TenantsIndex::open(&data_dir, &key)?;
360
361 // Create the per-tenant default.db + apply migrations 0001-0003.
362 let mut tenant_conn = open_sqlcipher(&new_default_db_path, &key)?;
363 let schema_version = migration::run_migrations(&mut tenant_conn)?;
364
365 // Cipher round-trip smoke check on the per-tenant DB.
366 drop(tenant_conn);
367 let conn2 = open_sqlcipher(&new_default_db_path, &key)?;
368 let highest: u32 = conn2
369 .query_row("SELECT MAX(version) FROM schema_migrations", [], |row| {
370 row.get(0)
371 })
372 .map_err(|e| Error::storage(format!("verify cipher round-trip (default tenant): {e}")))?;
373 drop(conn2);
374 if highest != schema_version {
375 return Err(Error::storage(format!(
376 "cipher round-trip read drift (default tenant): wrote {schema_version}, read {highest}"
377 )));
378 }
379
380 // Register the default tenant. Idempotent against re-runs, but on a
381 // fresh install this is the row's first insertion.
382 let default_id = TenantId::default_tenant();
383 if index.lookup(&default_id)?.is_none() {
384 index.register_with_status(
385 &default_id,
386 "default.db",
387 Some("Default tenant"),
388 TenantStatus::Active,
389 )?;
390 }
391
392 // tenants_index.db schema version — for the outcome struct.
393 let tenants_index_version = migration::current_tenants_index_version(index.connection())?;
394 drop(index);
395
396 // Persist config. v0.9.0 P1: write an env-detected `[llm]` default
397 // (BLOCKER 2 resolution): `ANTHROPIC_API_KEY` set → mode = "anthropic";
398 // otherwise → mode = "none". Operators who want OpenAI / Ollama /
399 // MCP-sampling edit the file post-init. v0.10.0 will retire the
400 // env-var-only fallback path; landing the explicit default during
401 // `solo init` is the migration ramp.
402 let mut cfg = SoloConfig::new(salt, embedder);
403 cfg.llm = Some(default_llm_settings_from_env());
404 cfg.write(&config_path)?;
405
406 Ok(InitOutcome {
407 data_dir,
408 db_path: new_default_db_path,
409 config_path,
410 schema_version,
411 tenants_index_path,
412 tenants_index_schema_version: tenants_index_version,
413 upgraded_from_v071: false,
414 })
415}
416
417/// Open a SQLCipher database, bind the raw key, and set the journal-mode +
418/// foreign-keys pragmas. Used by `init` and exposed for tests.
419pub fn open_sqlcipher(db_path: &Path, key: &KeyMaterial) -> Result<Connection> {
420 let conn = Connection::open(db_path)
421 .map_err(|e| Error::storage(format!("open {}: {e}", db_path.display())))?;
422 // PRAGMA key MUST be the first statement on a fresh connection.
423 // `as_hex()` returns Zeroizing<String>; wrap the formatted PRAGMA in
424 // Zeroizing<String> so the raw key bytes are wiped on drop rather
425 // than lingering in the heap until the allocator reuses the region.
426 let key_pragma: zeroize::Zeroizing<String> = {
427 let hex = key.as_hex();
428 zeroize::Zeroizing::new(format!("PRAGMA key = \"x'{}'\"", &*hex))
429 };
430 conn.execute_batch(&key_pragma)
431 .map_err(|e| Error::storage(format!("PRAGMA key: {e}")))?;
432 // Standard pragmas. journal_mode=wal returns the new mode as a row, so we
433 // use query_row; the others execute fine via execute_batch.
434 let mode: String = conn
435 .query_row("PRAGMA journal_mode = wal", [], |row| row.get(0))
436 .map_err(|e| Error::storage(format!("set journal_mode=wal: {e}")))?;
437 if mode.to_lowercase() != "wal" {
438 return Err(Error::storage(format!(
439 "expected WAL journal mode, got {mode}"
440 )));
441 }
442 conn.execute_batch(
443 "PRAGMA foreign_keys = ON;
444 PRAGMA busy_timeout = 5000;
445 PRAGMA synchronous = NORMAL;",
446 )
447 .map_err(|e| Error::storage(format!("set startup pragmas: {e}")))?;
448 Ok(conn)
449}
450
451fn wipe_solo_owned_files(data_dir: &Path) -> Result<()> {
452 if !data_dir.exists() {
453 return Ok(());
454 }
455 // Root-level files (legacy v0.7.1 + v0.8.0 top-level).
456 for name in SOLO_OWNED_FILES_ROOT {
457 let p = data_dir.join(name);
458 if p.is_file() {
459 std::fs::remove_file(&p)
460 .map_err(|e| Error::storage(format!("remove {}: {e}", p.display())))?;
461 }
462 }
463 // v0.8.0 per-tenant subdir — everything inside, then the directory.
464 // We don't use a recursive remove of arbitrary subdirs (defensive against
465 // operator surgery that might have nested unrelated state under the
466 // data dir); we only touch the explicit `tenants/` subdir Solo owns.
467 let tenants = data_dir.join(TENANTS_SUBDIR);
468 if tenants.is_dir() {
469 for entry in std::fs::read_dir(&tenants)
470 .map_err(|e| Error::storage(format!("read tenants dir {}: {e}", tenants.display())))?
471 {
472 let entry = entry.map_err(|e| {
473 Error::storage(format!("scan tenants dir {}: {e}", tenants.display()))
474 })?;
475 let p = entry.path();
476 if p.is_file() {
477 std::fs::remove_file(&p)
478 .map_err(|e| Error::storage(format!("remove {}: {e}", p.display())))?;
479 }
480 }
481 // Best-effort rmdir — leave the dir if some non-Solo content sneaked in.
482 let _ = std::fs::remove_dir(&tenants);
483 }
484 Ok(())
485}
486
487#[cfg(test)]
488mod tests {
489 use super::*;
490 use tempfile::TempDir;
491
492 // v0.9.1 P1 Fix 2: env vars are process-global mutable state and
493 // every test that touches `ANTHROPIC_API_KEY` / `OPENAI_API_KEY` in
494 // the `solo-storage` test binary serializes on a single shared
495 // mutex. Lifted into `test_support::LLM_ENV_LOCK` so the sibling
496 // module `crate::llm::anthropic::tests` (which also mutates these
497 // vars) shares the same lock. Previously the lock lived here at
498 // module scope, so `init::tests` and `anthropic::tests` raced and
499 // `init_writes_llm_anthropic_when_env_key_present` was flaky under
500 // `cargo test --workspace`.
501 use crate::test_support::LLM_ENV_LOCK as ENV_LOCK;
502
503 /// Clears LLM-related env vars for a clean baseline. SAFETY: the
504 /// caller must hold ENV_LOCK so no other thread is racing.
505 struct LlmEnvGuard;
506 impl Drop for LlmEnvGuard {
507 fn drop(&mut self) {
508 for k in ["ANTHROPIC_API_KEY", "OPENAI_API_KEY"] {
509 unsafe { std::env::remove_var(k) };
510 }
511 }
512 }
513 fn fresh_llm_env() -> LlmEnvGuard {
514 for k in ["ANTHROPIC_API_KEY", "OPENAI_API_KEY"] {
515 unsafe { std::env::remove_var(k) };
516 }
517 LlmEnvGuard
518 }
519
520 fn fixture_params(dir: &Path) -> InitParams {
521 InitParams {
522 data_dir: dir.to_path_buf(),
523 passphrase: Zeroizing::new("correct horse battery staple".into()),
524 force: false,
525 embedder: default_embedder(),
526 }
527 }
528
529 #[test]
530 fn happy_path_creates_db_and_config() {
531 let tmp = TempDir::new().unwrap();
532 let dir = tmp.path().join("solo-data");
533 let outcome = init(fixture_params(&dir)).expect("init should succeed");
534 assert_eq!(outcome.data_dir, dir);
535 assert!(outcome.db_path.exists(), "default.db must exist");
536 assert!(outcome.config_path.exists(), "solo.config.toml must exist");
537 // v0.11.1 raises the per-tenant chain to version 10
538 // (contradiction lifecycle), after the v0.8.x audit,
539 // principal-attribution, and triples-source migrations.
540 assert_eq!(outcome.schema_version, 10);
541 // v0.8.0 layout: db_path lives under tenants/, not at root.
542 assert_eq!(outcome.db_path, dir.join("tenants").join("default.db"));
543 // tenants_index.db present + at the current head version.
544 assert!(outcome.tenants_index_path.is_file());
545 // v0.9.0 P1 advanced the tenants_index chain to 9
546 // (last_accessed column, closing v0.8.0 doc-vs-code gap).
547 // Previously: v0.8.1 P3 took it to 8 (quota_bytes); v0.8.0 had 4.
548 assert_eq!(outcome.tenants_index_schema_version, 9);
549 assert!(!outcome.upgraded_from_v071);
550 // Legacy v0.7.1 file must NOT be created on fresh install.
551 assert!(!dir.join("solo.db").exists());
552 // Lockfile should be cleaned up on the success path (RAII drop).
553 assert!(!dir.join("solo.lock").exists(), "lockfile must be removed");
554 }
555
556 #[test]
557 fn fresh_install_registers_default_tenant_active() {
558 let tmp = TempDir::new().unwrap();
559 let dir = tmp.path().join("solo-data");
560 let outcome = init(fixture_params(&dir)).unwrap();
561
562 // Re-open the tenants_index and confirm exactly one Active default.
563 let cfg = SoloConfig::read(&outcome.config_path).unwrap();
564 let salt = cfg.salt_bytes().unwrap();
565 let key = KeyMaterial::derive("correct horse battery staple", &salt).unwrap();
566 let idx = crate::tenants::TenantsIndex::open(&dir, &key).unwrap();
567 let listed = idx.list().unwrap();
568 assert_eq!(listed.len(), 1);
569 assert_eq!(listed[0].tenant_id, TenantId::default_tenant());
570 assert_eq!(listed[0].status, crate::tenants::TenantStatus::Active);
571 assert_eq!(listed[0].db_filename, "default.db");
572 }
573
574 #[test]
575 fn config_round_trips_salt_correctly() {
576 let tmp = TempDir::new().unwrap();
577 let dir = tmp.path().join("solo-data");
578 let outcome = init(fixture_params(&dir)).unwrap();
579 let cfg = SoloConfig::read(&outcome.config_path).unwrap();
580 let salt = cfg.salt_bytes().unwrap();
581 // Re-derive the key from the persisted salt + the same passphrase;
582 // open the DB; should succeed.
583 let key = KeyMaterial::derive("correct horse battery staple", &salt).unwrap();
584 let conn = open_sqlcipher(&outcome.db_path, &key).unwrap();
585 let v: u32 = conn
586 .query_row("SELECT MAX(version) FROM schema_migrations", [], |row| {
587 row.get(0)
588 })
589 .unwrap();
590 // v0.8.1 P1 advanced per-tenant chain to version 7
591 // (`triples.source_episode_id` FK for GDPR cascade); v0.8.0
592 // P5+P6 had taken us to 6 (principal-attribution columns); 5
593 // was audit_events. Version 4 lives in tenants_index.db (P1
594 // registry), not here — the numbering skip is intentional, see
595 // migration::MIGRATIONS.
596 assert_eq!(v, 10);
597 }
598
599 #[test]
600 #[ignore = "requires SQLCipher: under plain bundled SQLite, PRAGMA key is a no-op so wrong keys silently succeed. Run with the workspace's bundled-sqlcipher-vendored-openssl feature: `cargo test -p solo-storage -- --include-ignored`"]
601 fn wrong_passphrase_fails_to_open() {
602 let tmp = TempDir::new().unwrap();
603 let dir = tmp.path().join("solo-data");
604 let outcome = init(fixture_params(&dir)).unwrap();
605 let cfg = SoloConfig::read(&outcome.config_path).unwrap();
606 let salt = cfg.salt_bytes().unwrap();
607 let bad_key = KeyMaterial::derive("WRONG PASSPHRASE", &salt).unwrap();
608 // open_sqlcipher itself only sets pragmas; the actual decryption
609 // failure surfaces on the first real query.
610 let conn = open_sqlcipher(&outcome.db_path, &bad_key);
611 let conn = match conn {
612 Ok(c) => c,
613 Err(_) => return, // failed at PRAGMA stage — also acceptable.
614 };
615 let res: rusqlite::Result<u32> =
616 conn.query_row("SELECT MAX(version) FROM schema_migrations", [], |row| {
617 row.get(0)
618 });
619 assert!(res.is_err(), "wrong passphrase must fail to read");
620 }
621
622 #[test]
623 fn second_init_without_force_refuses() {
624 let tmp = TempDir::new().unwrap();
625 let dir = tmp.path().join("solo-data");
626 init(fixture_params(&dir)).unwrap();
627 let err = init(fixture_params(&dir)).unwrap_err();
628 assert!(
629 matches!(err, Error::Conflict(_)),
630 "expected Conflict, got {err:?}"
631 );
632 assert!(err.to_string().contains("already initialized"));
633 }
634
635 #[test]
636 fn force_wipes_and_re_inits() {
637 let tmp = TempDir::new().unwrap();
638 let dir = tmp.path().join("solo-data");
639 let first = init(fixture_params(&dir)).unwrap();
640 let first_cfg = SoloConfig::read(&first.config_path).unwrap();
641
642 let mut params = fixture_params(&dir);
643 params.force = true;
644 let second = init(params).unwrap();
645 let second_cfg = SoloConfig::read(&second.config_path).unwrap();
646
647 // A new salt should have been generated; same passphrase => different
648 // derived key. Compare salts directly.
649 assert_ne!(first_cfg.salt_hex, second_cfg.salt_hex);
650 }
651
652 /// Regression: SOLO_OWNED_FILES had stale HNSW filenames
653 /// (`hnsw_episodes.bin`, `.graph`, `.data`, etc.) that didn't match
654 /// the current snapshot module's actual output (`.hnsw.data`,
655 /// `.hnsw.graph` on the live/_bak/_tmp basenames). `solo init --force`
656 /// would skip wiping those files, leaving stale data after re-init.
657 /// This test plants snapshot files using the current naming scheme
658 /// then verifies --force removes them all.
659 #[test]
660 fn force_wipes_current_hnsw_snapshot_files() {
661 let tmp = TempDir::new().unwrap();
662 let dir = tmp.path().join("solo-data");
663 let _ = init(fixture_params(&dir)).unwrap();
664
665 // Plant snapshot files using the names snapshot.rs actually writes.
666 let planted = [
667 "hnsw_episodes.hnsw.data",
668 "hnsw_episodes.hnsw.graph",
669 "hnsw_episodes_bak.hnsw.data",
670 "hnsw_episodes_bak.hnsw.graph",
671 "hnsw_episodes_tmp.hnsw.data",
672 "hnsw_episodes_tmp.hnsw.graph",
673 ];
674 for name in &planted {
675 std::fs::write(dir.join(name), b"stale snapshot data").unwrap();
676 }
677
678 let mut params = fixture_params(&dir);
679 params.force = true;
680 let _ = init(params).unwrap();
681
682 // All planted files must be gone after --force.
683 for name in &planted {
684 let p = dir.join(name);
685 assert!(
686 !p.exists(),
687 "{} should have been wiped by --force",
688 p.display()
689 );
690 }
691 }
692
693 #[test]
694 fn empty_passphrase_rejected() {
695 let tmp = TempDir::new().unwrap();
696 let mut params = fixture_params(tmp.path());
697 params.passphrase.clear();
698 let err = init(params).unwrap_err();
699 assert!(matches!(err, Error::InvalidInput(_)), "got: {err:?}");
700 }
701
702 /// Simulate a v0.7.1 install (`solo.db` + `solo.config.toml` at root,
703 /// no `tenants_index.db`), then run `init()` against it. The upgrade
704 /// path should kick in: `solo.db` moves into `tenants/default.db`,
705 /// the registry is created with the default tenant active, and the
706 /// outcome reports `upgraded_from_v071 = true`.
707 #[test]
708 fn init_upgrades_v071_install_in_place() {
709 let tmp = TempDir::new().unwrap();
710 let dir = tmp.path().join("solo-data");
711 std::fs::create_dir_all(&dir).unwrap();
712
713 // Plant a v0.7.1 layout: persist a SoloConfig with a known salt,
714 // write a SQLCipher solo.db at the root that opens under the
715 // passphrase + that salt.
716 let passphrase = "v071-upgrade-passphrase";
717 let salt = KeyMaterial::fresh_salt().unwrap();
718 let key = KeyMaterial::derive(passphrase, &salt).unwrap();
719 let cfg = SoloConfig::new(salt, default_embedder());
720 cfg.write(&dir.join("solo.config.toml")).unwrap();
721 let legacy_db = dir.join("solo.db");
722 let mut conn = open_sqlcipher(&legacy_db, &key).unwrap();
723 migration::run_migrations(&mut conn).unwrap();
724 drop(conn);
725
726 // Run init with the same passphrase + force=false.
727 let outcome = init(InitParams {
728 data_dir: dir.clone(),
729 passphrase: Zeroizing::new(passphrase.into()),
730 force: false,
731 embedder: default_embedder(),
732 })
733 .unwrap();
734
735 assert!(outcome.upgraded_from_v071);
736 assert_eq!(outcome.db_path, dir.join("tenants").join("default.db"));
737 assert!(outcome.db_path.is_file());
738 assert!(outcome.tenants_index_path.is_file());
739 // v0.9.0 P1 advanced the tenants_index chain to 9
740 // (last_accessed column). Previously: 8 (quota_bytes, v0.8.1 P3).
741 assert_eq!(outcome.tenants_index_schema_version, 9);
742 // Legacy file gone.
743 assert!(!legacy_db.exists());
744 // v0.8.1 P1: per-tenant chain advances to 7 on first open
745 // (migrations 0005 + 0006 + 0007 all run even on v0.7.1
746 // upgrades — the audit table, principal-attribution columns,
747 // and triples.source_episode_id are all additive).
748 assert_eq!(outcome.schema_version, 10);
749 }
750
751 /// v0.7.1 upgrade with wrong passphrase must fail because the
752 /// SQLCipher open of the migrated default.db won't decrypt.
753 #[test]
754 #[ignore = "requires SQLCipher: under plain bundled SQLite, PRAGMA key is a no-op so wrong keys silently succeed."]
755 fn init_v071_upgrade_with_wrong_passphrase_errors() {
756 let tmp = TempDir::new().unwrap();
757 let dir = tmp.path().join("solo-data");
758 std::fs::create_dir_all(&dir).unwrap();
759
760 // v0.7.1 plant under the GOOD passphrase.
761 let salt = KeyMaterial::fresh_salt().unwrap();
762 let key = KeyMaterial::derive("right-passphrase", &salt).unwrap();
763 let cfg = SoloConfig::new(salt, default_embedder());
764 cfg.write(&dir.join("solo.config.toml")).unwrap();
765 let legacy_db = dir.join("solo.db");
766 let mut conn = open_sqlcipher(&legacy_db, &key).unwrap();
767 migration::run_migrations(&mut conn).unwrap();
768 drop(conn);
769
770 // Call init() with the WRONG passphrase. The move itself may
771 // succeed (rename doesn't decrypt), but the post-move smoke open
772 // of the migrated default.db must fail.
773 let err = init(InitParams {
774 data_dir: dir,
775 passphrase: Zeroizing::new("wrong-passphrase".into()),
776 force: false,
777 embedder: default_embedder(),
778 })
779 .unwrap_err();
780 assert!(
781 matches!(err, Error::Storage(_)),
782 "wrong passphrase must surface as a Storage error, got {err:?}"
783 );
784 }
785
786 // ----------------------------------------------------------------
787 // v0.9.0 P1: init writes an env-detected `[llm]` default
788 // ----------------------------------------------------------------
789
790 #[test]
791 fn init_writes_llm_none_when_no_env_key_present() {
792 let _lock = ENV_LOCK.lock().unwrap_or_else(|p| p.into_inner());
793 let _g = fresh_llm_env();
794 let tmp = TempDir::new().unwrap();
795 let dir = tmp.path().join("solo-data");
796 let outcome = init(fixture_params(&dir)).expect("init should succeed");
797 let cfg = SoloConfig::read(&outcome.config_path).unwrap();
798 assert_eq!(
799 cfg.llm,
800 Some(LlmSettings::None),
801 "no ANTHROPIC_API_KEY in env → init writes [llm] mode = \"none\""
802 );
803 }
804
805 #[test]
806 fn init_writes_llm_anthropic_when_env_key_present() {
807 let _lock = ENV_LOCK.lock().unwrap_or_else(|p| p.into_inner());
808 let _g = fresh_llm_env();
809 // SAFETY: ENV_LOCK held; LlmEnvGuard cleans up on drop.
810 unsafe { std::env::set_var("ANTHROPIC_API_KEY", "sk-ant-test-fixture") };
811 let tmp = TempDir::new().unwrap();
812 let dir = tmp.path().join("solo-data");
813 let outcome = init(fixture_params(&dir)).expect("init should succeed");
814 let cfg = SoloConfig::read(&outcome.config_path).unwrap();
815 match cfg.llm {
816 Some(LlmSettings::Anthropic {
817 ref api_key_env,
818 ref model,
819 }) => {
820 assert_eq!(api_key_env, "ANTHROPIC_API_KEY");
821 assert_eq!(model, "claude-sonnet-4-6");
822 }
823 other => panic!("expected Anthropic variant from env-detected default, got {other:?}"),
824 }
825 }
826
827 #[test]
828 fn default_llm_settings_from_env_picks_anthropic_when_set() {
829 let _lock = ENV_LOCK.lock().unwrap_or_else(|p| p.into_inner());
830 let _g = fresh_llm_env();
831 unsafe { std::env::set_var("ANTHROPIC_API_KEY", "sk-ant-fixture") };
832 match default_llm_settings_from_env() {
833 LlmSettings::Anthropic { api_key_env, .. } => {
834 assert_eq!(api_key_env, "ANTHROPIC_API_KEY");
835 }
836 other => panic!("expected Anthropic, got {other:?}"),
837 }
838 }
839
840 #[test]
841 fn default_llm_settings_from_env_returns_none_when_empty_value() {
842 let _lock = ENV_LOCK.lock().unwrap_or_else(|p| p.into_inner());
843 let _g = fresh_llm_env();
844 // Empty value should NOT trigger Anthropic — operators sometimes
845 // set vars to "" to mean "leave default" (especially under bash
846 // / nix-shell). Mirrors the env_trimmed helper in solo-steward.
847 unsafe { std::env::set_var("ANTHROPIC_API_KEY", "") };
848 assert_eq!(default_llm_settings_from_env(), LlmSettings::None);
849 }
850
851 #[test]
852 fn default_llm_settings_from_env_returns_none_when_unset() {
853 let _lock = ENV_LOCK.lock().unwrap_or_else(|p| p.into_inner());
854 let _g = fresh_llm_env();
855 assert_eq!(default_llm_settings_from_env(), LlmSettings::None);
856 }
857
858 #[test]
859 fn cloud_sync_path_rejected() {
860 // We don't actually create files; validate_data_dir runs first.
861 let placeholder = std::env::temp_dir().join("solo-init-cloud-test");
862 let mut params = fixture_params(&placeholder);
863 // Force a cloud-sync component into the path. Must be absolute on
864 // both Unix and Windows so validate_data_dir's absolute-path check
865 // doesn't short-circuit before the cloud-sync check we want to
866 // exercise.
867 #[cfg(windows)]
868 let cloud = std::path::PathBuf::from(r"C:\Users\x\Dropbox\solo");
869 #[cfg(not(windows))]
870 let cloud = std::path::PathBuf::from("/Users/x/Dropbox/solo");
871 params.data_dir = cloud;
872 let err = init(params).unwrap_err();
873 assert!(err.to_string().contains("cloud-sync"), "got: {err}");
874 }
875}