solo-storage 0.11.5

Solo: SQLite + SQLCipher persistence layer
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
// SPDX-License-Identifier: Apache-2.0

//! `solo init`: create a fresh Solo data directory.
//!
//! The orchestrator wires together every primitive in this crate:
//!
//!   1. `path_validation::validate_data_dir` — refuse cloud-sync folders.
//!   2. Detect existing init state.
//!        - If a v0.7.1 single-DB layout is present (`solo.db` at the data
//!          dir root, no `tenants_index.db`), this is an upgrade — invoke
//!          the v0.7.1 → v0.8.0 mass-data-move helper instead of refusing.
//!        - If a v0.8.0 per-tenant layout is already in place
//!          (`tenants_index.db` exists, with at least the default tenant
//!          registered), refuse with Conflict unless `force = true`.
//!        - On `force`, wipe Solo-owned files in the data dir (NOT the dir
//!          itself, in case the user keeps other stuff there) and proceed
//!          to fresh-install creation.
//!   3. Create the data directory.
//!   4. Acquire `solo.lock` (RAII) so a parallel `solo init` or `solo daemon`
//!      can't race us.
//!   5. Generate a fresh 16-byte salt, derive the SQLCipher key via Argon2id.
//!   6. Create `tenants/` subdir + `tenants_index.db` (apply migration 0004)
//!      + per-tenant `tenants/default.db` (apply migrations 0001-0003) +
//!      register the default tenant in the index.
//!   7. Write `solo.config.toml` (salt + embedder identity).
//!   8. Drop the lockfile (RAII — also runs on any error path between steps).
//!
//! On any error after the data dir is created, the partial state on disk is
//! left for inspection. The caller can re-run with `--force` to wipe and
//! retry.
//!
//! ## Layout (v0.8.0)
//!
//! After a successful `init` on a fresh install:
//!
//! ```text
//! <data_dir>/
//!   tenants_index.db          -- registry (SQLCipher; same key as below)
//!   tenants/
//!     default.db              -- per-tenant SQLCipher DB
//!     default.db-wal
//!     default.db-shm
//!   solo.config.toml
//! ```
//!
//! The `solo.db` at the data dir root that v0.7.1 used is no longer created.
//! Existing v0.7.1 data dirs are upgraded into the new layout on the first
//! `solo init`-or-daemon-boot call by the mass-data-move helper.

use rusqlite::Connection;
use solo_core::{Embedder, Error, Result, TenantId};
use std::path::{Path, PathBuf};
use zeroize::Zeroizing;

use crate::{
    config::{EmbedderConfig, LlmSettings, SoloConfig},
    key_material::KeyMaterial,
    lockfile::Lockfile,
    migration,
    path_validation::validate_data_dir,
    tenants::{
        TENANTS_INDEX_FILENAME, TENANTS_SUBDIR, TenantStatus, TenantsIndex, migrate_v071_to_v080,
    },
};

/// Default data dir: `~/.solo/`. Honors the home-dir resolution `dirs` crate
/// performs (Windows: `%USERPROFILE%`; Unix: `$HOME`). Returns `None` if no
/// home directory can be found.
pub fn default_data_dir() -> Option<PathBuf> {
    dirs::home_dir().map(|h| h.join(".solo"))
}

/// File names at the data dir root that Solo owns. `--force` removes these,
/// and v0.8.0 layout detection looks at them. Anything else in the dir is
/// left untouched.
///
/// HNSW snapshot filenames are derived from the basenames in
/// `crate::snapshot` (`hnsw_episodes`, `hnsw_episodes_bak`, `hnsw_episodes_tmp`)
/// + the suffixes hnsw_rs's `file_dump` writes (`.hnsw.data`, `.hnsw.graph`).
/// Keep this list in sync with `snapshot::{LIVE_BASENAME, BAK_BASENAME,
/// TMP_BASENAME}` if those ever change.
///
/// **Note**: v0.7.1 `solo.db` and HNSW snapshots are listed for `--force` wipe
/// purposes (a `--force` re-init must clear them) AND for v0.7.1 install
/// detection (the legacy `solo.db` at the root is the v0.7.1 marker). The
/// v0.8.0 layout puts these files under `<data_dir>/tenants/` — they are
/// wiped via a directory-tree walk in `wipe_solo_owned_files`.
const SOLO_OWNED_FILES_ROOT: &[&str] = &[
    // v0.7.1 single-DB layout (legacy; only present pre-migration). Listed
    // first so a v0.7.1 install upgraded via mass-data-move clears any
    // stragglers if the upgrade had to be aborted and retried with --force.
    "solo.db",
    "solo.db-wal",
    "solo.db-shm",
    // v0.7.1 HNSW snapshots at root (live + bak + tmp pairs).
    "hnsw_episodes.hnsw.data",
    "hnsw_episodes.hnsw.graph",
    "hnsw_episodes_bak.hnsw.data",
    "hnsw_episodes_bak.hnsw.graph",
    "hnsw_episodes_tmp.hnsw.data",
    "hnsw_episodes_tmp.hnsw.graph",
    // Top-level Solo files (still at root in v0.8.0).
    "solo.config.toml",
    "solo.config.toml.tmp",
    "solo.lock",
    // v0.8.0 tenant registry.
    TENANTS_INDEX_FILENAME,
    "tenants_index.db-wal",
    "tenants_index.db-shm",
];

/// `solo init` parameters. Built by the CLI layer.
#[derive(Debug, Clone)]
pub struct InitParams {
    /// Where to put the data dir. Created if missing.
    pub data_dir: PathBuf,
    /// Resolved passphrase, wrapped in `Zeroizing` so the buffer is wiped
    /// when this struct drops. CLI layer reads it via prompt or env var.
    pub passphrase: Zeroizing<String>,
    /// If true, wipe Solo-owned files in `data_dir` before initializing.
    pub force: bool,
    /// Embedder identity to record in the config. For commit 1.1 this is the
    /// BGE-M3 default; commit 1.4 (embedder loader) will produce it from the
    /// loaded model.
    pub embedder: EmbedderConfig,
}

/// Default embedder identity recorded in `solo.config.toml` when the
/// CLI hasn't probed a real backend via
/// [`crate::embedder::probe_embedder_config_from_env`].
///
/// In production, `solo init` always calls `probe_embedder_config_from_env`,
/// which picks between Ollama (probes the real dim) and Stub (32-dim,
/// deterministic). This function exists for test fixtures + downstream
/// callers that want a parameterless identity for first-init flows; it
/// returns the Stub identity, matching `StubEmbedder::default_stub()`
/// (name=`stub`, version=`v1`, dim=32).
///
/// Historically this returned the BGE-M3 identity (BAAI/bge-m3, 1024-dim).
/// BGE-M3 was removed in v0.6.0 — see `docs/dev-log/0071-v0.5.x-roadmap.md`
/// Priority 9. Callers that need a deterministic non-stub identity for
/// tests should build an `EmbedderConfig` literal directly.
pub fn default_embedder() -> EmbedderConfig {
    let stub = crate::embedder::StubEmbedder::default_stub();
    EmbedderConfig {
        name: stub.name().to_string(),
        version: stub.version().to_string(),
        dim: stub.dim() as u32,
        dtype: "f32".into(),
    }
}

/// v0.9.0 P1 (plan BLOCKER 2 resolution): pick the `[llm]` block default
/// for a freshly-initialised data dir based on the surrounding env.
///
/// Precedence:
///   1. `ANTHROPIC_API_KEY` non-empty → `Anthropic` variant with
///      `api_key_env = "ANTHROPIC_API_KEY"` and the plan's
///      `claude-sonnet-4-6` default model.
///   2. (Future P1 follow-up may add `OPENAI_API_KEY` here; for v0.9.0
///      P1 we keep the surface minimal — the operator edits the file if
///      they want OpenAI, Ollama, or MCP-sampling.)
///   3. otherwise → `None` variant. The Steward runs cluster-only.
///
/// Empty values are treated as unset — guards against shells that set
/// vars to the empty string to mean "leave default".
pub fn default_llm_settings_from_env() -> LlmSettings {
    fn env_non_empty(name: &str) -> bool {
        std::env::var(name)
            .map(|v| !v.trim().is_empty())
            .unwrap_or(false)
    }
    if env_non_empty("ANTHROPIC_API_KEY") {
        LlmSettings::Anthropic {
            api_key_env: "ANTHROPIC_API_KEY".to_string(),
            model: "claude-sonnet-4-6".to_string(),
        }
    } else {
        LlmSettings::None
    }
}

/// Outcome reported back to the CLI layer for human-readable success output.
#[derive(Debug)]
pub struct InitOutcome {
    pub data_dir: PathBuf,
    /// Per-tenant DB path. v0.8.0+: `<data_dir>/tenants/default.db`.
    /// Pre-v0.8.0 callers consumed `<data_dir>/solo.db`; they continue to
    /// work as long as they treat this purely as "the SQLCipher file to
    /// open." P2 retires this field in favour of a `TenantHandle` per
    /// tenant.
    pub db_path: PathBuf,
    pub config_path: PathBuf,
    /// Highest applied per-tenant schema version. Equal to the highest
    /// `version` in `tenants/default.db::schema_migrations`. **Not** the
    /// same number as the tenants_index schema; that's tracked via
    /// `tenants_index_schema_version` below.
    pub schema_version: u32,
    /// Path to the new v0.8.0 tenant registry at
    /// `<data_dir>/tenants_index.db`. P2 callers open this to enumerate
    /// the tenants present in the data dir.
    pub tenants_index_path: PathBuf,
    /// Highest applied version in tenants_index.db. As of v0.8.0 this is
    /// `4` (the foundation migration). Tracked separately from the
    /// per-tenant schema chain.
    pub tenants_index_schema_version: u32,
    /// True iff this `init` call upgraded a v0.7.1 layout in place (i.e.
    /// `solo.db` was present at the root, and the mass-data-move helper
    /// ran). False for fresh installs. Used by the CLI to print a
    /// distinct success message.
    pub upgraded_from_v071: bool,
}

/// Run `solo init`. See module docstring for the step list.
///
/// Branches:
///
///   * **Fresh install** (no `solo.db` at root, no `tenants_index.db`) —
///     create the v0.8.0 per-tenant layout.
///   * **v0.7.1 upgrade** (`solo.db` at root, no `tenants_index.db`) — run
///     the mass-data-move helper, then load the existing config (no new
///     salt — the user's existing passphrase + salt must still decrypt).
///   * **Already v0.8.0** (`tenants_index.db` exists) — refuse with
///     Conflict unless `force = true`. With `force`, wipe + re-init.
pub fn init(params: InitParams) -> Result<InitOutcome> {
    let InitParams {
        data_dir,
        passphrase,
        force,
        embedder,
    } = params;

    if passphrase.is_empty() {
        return Err(Error::invalid_input(
            "passphrase must not be empty (Solo uses it to derive the SQLCipher key)",
        ));
    }

    validate_data_dir(&data_dir)?;

    let config_path = data_dir.join("solo.config.toml");
    let lock_path = data_dir.join("solo.lock");
    let tenants_index_path = data_dir.join(TENANTS_INDEX_FILENAME);
    let tenants_dir = data_dir.join(TENANTS_SUBDIR);
    let legacy_db_path = data_dir.join("solo.db");
    let new_default_db_path = tenants_dir.join("default.db");

    // Detect existing layout. Three cases:
    //   - v0.7.1: solo.db at root + (likely) solo.config.toml at root +
    //             no tenants_index.db
    //   - v0.8.0: tenants_index.db at root (regardless of whether
    //             tenants/default.db is present — `pending_migration`
    //             status covers the transient state)
    //   - fresh:  none of the above
    let has_v071_db = legacy_db_path.is_file();
    let has_v080_index = tenants_index_path.is_file();
    let has_config = config_path.is_file();

    // Make sure the parent dir exists before we try to acquire the lockfile.
    std::fs::create_dir_all(&data_dir)
        .map_err(|e| Error::storage(format!("create data dir {}: {e}", data_dir.display())))?;

    let _lock = Lockfile::acquire(&lock_path)?;

    if has_v080_index && !force {
        return Err(Error::conflict(format!(
            "data directory is already initialized (v0.8.0 layout): {}\n\
             Re-run with --force to wipe and re-initialize \
             (DESTRUCTIVE — all stored memories will be lost).",
            data_dir.display()
        )));
    }

    if has_v080_index && force {
        // --force: wipe everything Solo owns + start over with a fresh
        // salt. Equivalent semantics to the pre-v0.8.0 --force path.
        wipe_solo_owned_files(&data_dir)?;
        // Fall through into the fresh-install branch below.
    } else if has_v071_db && !has_v080_index {
        // v0.7.1 → v0.8.0 upgrade. The existing config + key MUST still
        // be valid; refuse if no config is present, because we have no
        // salt to derive the key.
        if !has_config {
            return Err(Error::conflict(format!(
                "data dir has a v0.7.1 solo.db but no solo.config.toml: {}\n\
                 Cannot upgrade in place without the persisted salt. \
                 Either restore the missing config or use --force to wipe.",
                data_dir.display()
            )));
        }
        if force {
            // --force against a v0.7.1 install: still wipe + start over,
            // matching the pre-v0.8.0 semantics. The user explicitly
            // asked for a destructive re-init.
            wipe_solo_owned_files(&data_dir)?;
            // Fall through into fresh-install branch.
        } else {
            // In-place upgrade. Read the existing config to recover the
            // salt; derive the key from the user-supplied passphrase +
            // that salt. The key must successfully decrypt the existing
            // solo.db, otherwise the user typed the wrong passphrase.
            let cfg = SoloConfig::read(&config_path)
                .map_err(|e| Error::storage(format!("read config for v0.7.1 upgrade: {e}")))?;
            let salt = cfg.salt_bytes()?;
            let key = KeyMaterial::derive(&passphrase, &salt)?;

            // Run the mass-data-move helper. Idempotent + crash-recoverable.
            migrate_v071_to_v080(&data_dir, &key)?;

            // Smoke-test: open the migrated DB and read schema_migrations
            // to confirm the same key still decrypts it. If the user
            // typed the wrong passphrase, this surfaces the failure
            // here, not silently when the daemon next boots.
            let conn = open_sqlcipher(&new_default_db_path, &key)?;
            let schema_version: u32 = conn
                .query_row("SELECT MAX(version) FROM schema_migrations", [], |row| {
                    row.get(0)
                })
                .map_err(|e| {
                    Error::storage(format!("verify v0.7.1 upgrade cipher round-trip: {e}"))
                })?;
            drop(conn);

            // tenants_index.db's own migration version — informational
            // only, but the InitOutcome surfaces it.
            let tenants_index_version = {
                let conn = open_sqlcipher(&tenants_index_path, &key)?;
                migration::current_tenants_index_version(&conn)?
            };

            return Ok(InitOutcome {
                data_dir,
                db_path: new_default_db_path,
                config_path,
                schema_version,
                tenants_index_path,
                tenants_index_schema_version: tenants_index_version,
                upgraded_from_v071: true,
            });
        }
    }

    // ---- Fresh install (or post-`force` wipe) ----
    //
    // We arrive here when the data dir has no v0.7.1 / v0.8.0 markers,
    // OR when `--force` wiped them above.
    let salt = KeyMaterial::fresh_salt()?;
    let key = KeyMaterial::derive(&passphrase, &salt)?;

    // Create the per-tenant subdir.
    std::fs::create_dir_all(&tenants_dir).map_err(|e| {
        Error::storage(format!(
            "create tenants subdir {}: {e}",
            tenants_dir.display()
        ))
    })?;

    // Create tenants_index.db + apply migration 0004.
    let mut index = TenantsIndex::open(&data_dir, &key)?;

    // Create the per-tenant default.db + apply migrations 0001-0003.
    let mut tenant_conn = open_sqlcipher(&new_default_db_path, &key)?;
    let schema_version = migration::run_migrations(&mut tenant_conn)?;

    // Cipher round-trip smoke check on the per-tenant DB.
    drop(tenant_conn);
    let conn2 = open_sqlcipher(&new_default_db_path, &key)?;
    let highest: u32 = conn2
        .query_row("SELECT MAX(version) FROM schema_migrations", [], |row| {
            row.get(0)
        })
        .map_err(|e| Error::storage(format!("verify cipher round-trip (default tenant): {e}")))?;
    drop(conn2);
    if highest != schema_version {
        return Err(Error::storage(format!(
            "cipher round-trip read drift (default tenant): wrote {schema_version}, read {highest}"
        )));
    }

    // Register the default tenant. Idempotent against re-runs, but on a
    // fresh install this is the row's first insertion.
    let default_id = TenantId::default_tenant();
    if index.lookup(&default_id)?.is_none() {
        index.register_with_status(
            &default_id,
            "default.db",
            Some("Default tenant"),
            TenantStatus::Active,
        )?;
    }

    // tenants_index.db schema version — for the outcome struct.
    let tenants_index_version = migration::current_tenants_index_version(index.connection())?;
    drop(index);

    // Persist config. v0.9.0 P1: write an env-detected `[llm]` default
    // (BLOCKER 2 resolution): `ANTHROPIC_API_KEY` set → mode = "anthropic";
    // otherwise → mode = "none". Operators who want OpenAI / Ollama /
    // MCP-sampling edit the file post-init. v0.10.0 will retire the
    // env-var-only fallback path; landing the explicit default during
    // `solo init` is the migration ramp.
    let mut cfg = SoloConfig::new(salt, embedder);
    cfg.llm = Some(default_llm_settings_from_env());
    cfg.write(&config_path)?;

    Ok(InitOutcome {
        data_dir,
        db_path: new_default_db_path,
        config_path,
        schema_version,
        tenants_index_path,
        tenants_index_schema_version: tenants_index_version,
        upgraded_from_v071: false,
    })
}

/// Open a SQLCipher database, bind the raw key, and set the journal-mode +
/// foreign-keys pragmas. Used by `init` and exposed for tests.
pub fn open_sqlcipher(db_path: &Path, key: &KeyMaterial) -> Result<Connection> {
    let conn = Connection::open(db_path)
        .map_err(|e| Error::storage(format!("open {}: {e}", db_path.display())))?;
    // PRAGMA key MUST be the first statement on a fresh connection.
    // `as_hex()` returns Zeroizing<String>; wrap the formatted PRAGMA in
    // Zeroizing<String> so the raw key bytes are wiped on drop rather
    // than lingering in the heap until the allocator reuses the region.
    let key_pragma: zeroize::Zeroizing<String> = {
        let hex = key.as_hex();
        zeroize::Zeroizing::new(format!("PRAGMA key = \"x'{}'\"", &*hex))
    };
    conn.execute_batch(&key_pragma)
        .map_err(|e| Error::storage(format!("PRAGMA key: {e}")))?;
    // Standard pragmas. journal_mode=wal returns the new mode as a row, so we
    // use query_row; the others execute fine via execute_batch.
    let mode: String = conn
        .query_row("PRAGMA journal_mode = wal", [], |row| row.get(0))
        .map_err(|e| Error::storage(format!("set journal_mode=wal: {e}")))?;
    if mode.to_lowercase() != "wal" {
        return Err(Error::storage(format!(
            "expected WAL journal mode, got {mode}"
        )));
    }
    conn.execute_batch(
        "PRAGMA foreign_keys = ON;
         PRAGMA busy_timeout = 5000;
         PRAGMA synchronous = NORMAL;",
    )
    .map_err(|e| Error::storage(format!("set startup pragmas: {e}")))?;
    Ok(conn)
}

fn wipe_solo_owned_files(data_dir: &Path) -> Result<()> {
    if !data_dir.exists() {
        return Ok(());
    }
    // Root-level files (legacy v0.7.1 + v0.8.0 top-level).
    for name in SOLO_OWNED_FILES_ROOT {
        let p = data_dir.join(name);
        if p.is_file() {
            std::fs::remove_file(&p)
                .map_err(|e| Error::storage(format!("remove {}: {e}", p.display())))?;
        }
    }
    // v0.8.0 per-tenant subdir — everything inside, then the directory.
    // We don't use a recursive remove of arbitrary subdirs (defensive against
    // operator surgery that might have nested unrelated state under the
    // data dir); we only touch the explicit `tenants/` subdir Solo owns.
    let tenants = data_dir.join(TENANTS_SUBDIR);
    if tenants.is_dir() {
        for entry in std::fs::read_dir(&tenants)
            .map_err(|e| Error::storage(format!("read tenants dir {}: {e}", tenants.display())))?
        {
            let entry = entry.map_err(|e| {
                Error::storage(format!("scan tenants dir {}: {e}", tenants.display()))
            })?;
            let p = entry.path();
            if p.is_file() {
                std::fs::remove_file(&p)
                    .map_err(|e| Error::storage(format!("remove {}: {e}", p.display())))?;
            }
        }
        // Best-effort rmdir — leave the dir if some non-Solo content sneaked in.
        let _ = std::fs::remove_dir(&tenants);
    }
    Ok(())
}

#[cfg(test)]
mod tests {
    use super::*;
    use tempfile::TempDir;

    // v0.9.1 P1 Fix 2: env vars are process-global mutable state and
    // every test that touches `ANTHROPIC_API_KEY` / `OPENAI_API_KEY` in
    // the `solo-storage` test binary serializes on a single shared
    // mutex. Lifted into `test_support::LLM_ENV_LOCK` so the sibling
    // module `crate::llm::anthropic::tests` (which also mutates these
    // vars) shares the same lock. Previously the lock lived here at
    // module scope, so `init::tests` and `anthropic::tests` raced and
    // `init_writes_llm_anthropic_when_env_key_present` was flaky under
    // `cargo test --workspace`.
    use crate::test_support::LLM_ENV_LOCK as ENV_LOCK;

    /// Clears LLM-related env vars for a clean baseline. SAFETY: the
    /// caller must hold ENV_LOCK so no other thread is racing.
    struct LlmEnvGuard;
    impl Drop for LlmEnvGuard {
        fn drop(&mut self) {
            for k in ["ANTHROPIC_API_KEY", "OPENAI_API_KEY"] {
                unsafe { std::env::remove_var(k) };
            }
        }
    }
    fn fresh_llm_env() -> LlmEnvGuard {
        for k in ["ANTHROPIC_API_KEY", "OPENAI_API_KEY"] {
            unsafe { std::env::remove_var(k) };
        }
        LlmEnvGuard
    }

    fn fixture_params(dir: &Path) -> InitParams {
        InitParams {
            data_dir: dir.to_path_buf(),
            passphrase: Zeroizing::new("correct horse battery staple".into()),
            force: false,
            embedder: default_embedder(),
        }
    }

    #[test]
    fn happy_path_creates_db_and_config() {
        let tmp = TempDir::new().unwrap();
        let dir = tmp.path().join("solo-data");
        let outcome = init(fixture_params(&dir)).expect("init should succeed");
        assert_eq!(outcome.data_dir, dir);
        assert!(outcome.db_path.exists(), "default.db must exist");
        assert!(outcome.config_path.exists(), "solo.config.toml must exist");
        // v0.11.1 raises the per-tenant chain to version 10
        // (contradiction lifecycle), after the v0.8.x audit,
        // principal-attribution, and triples-source migrations.
        assert_eq!(outcome.schema_version, 10);
        // v0.8.0 layout: db_path lives under tenants/, not at root.
        assert_eq!(outcome.db_path, dir.join("tenants").join("default.db"));
        // tenants_index.db present + at the current head version.
        assert!(outcome.tenants_index_path.is_file());
        // v0.9.0 P1 advanced the tenants_index chain to 9
        // (last_accessed column, closing v0.8.0 doc-vs-code gap).
        // Previously: v0.8.1 P3 took it to 8 (quota_bytes); v0.8.0 had 4.
        assert_eq!(outcome.tenants_index_schema_version, 9);
        assert!(!outcome.upgraded_from_v071);
        // Legacy v0.7.1 file must NOT be created on fresh install.
        assert!(!dir.join("solo.db").exists());
        // Lockfile should be cleaned up on the success path (RAII drop).
        assert!(!dir.join("solo.lock").exists(), "lockfile must be removed");
    }

    #[test]
    fn fresh_install_registers_default_tenant_active() {
        let tmp = TempDir::new().unwrap();
        let dir = tmp.path().join("solo-data");
        let outcome = init(fixture_params(&dir)).unwrap();

        // Re-open the tenants_index and confirm exactly one Active default.
        let cfg = SoloConfig::read(&outcome.config_path).unwrap();
        let salt = cfg.salt_bytes().unwrap();
        let key = KeyMaterial::derive("correct horse battery staple", &salt).unwrap();
        let idx = crate::tenants::TenantsIndex::open(&dir, &key).unwrap();
        let listed = idx.list().unwrap();
        assert_eq!(listed.len(), 1);
        assert_eq!(listed[0].tenant_id, TenantId::default_tenant());
        assert_eq!(listed[0].status, crate::tenants::TenantStatus::Active);
        assert_eq!(listed[0].db_filename, "default.db");
    }

    #[test]
    fn config_round_trips_salt_correctly() {
        let tmp = TempDir::new().unwrap();
        let dir = tmp.path().join("solo-data");
        let outcome = init(fixture_params(&dir)).unwrap();
        let cfg = SoloConfig::read(&outcome.config_path).unwrap();
        let salt = cfg.salt_bytes().unwrap();
        // Re-derive the key from the persisted salt + the same passphrase;
        // open the DB; should succeed.
        let key = KeyMaterial::derive("correct horse battery staple", &salt).unwrap();
        let conn = open_sqlcipher(&outcome.db_path, &key).unwrap();
        let v: u32 = conn
            .query_row("SELECT MAX(version) FROM schema_migrations", [], |row| {
                row.get(0)
            })
            .unwrap();
        // v0.8.1 P1 advanced per-tenant chain to version 7
        // (`triples.source_episode_id` FK for GDPR cascade); v0.8.0
        // P5+P6 had taken us to 6 (principal-attribution columns); 5
        // was audit_events. Version 4 lives in tenants_index.db (P1
        // registry), not here — the numbering skip is intentional, see
        // migration::MIGRATIONS.
        assert_eq!(v, 10);
    }

    #[test]
    #[ignore = "requires SQLCipher: under plain bundled SQLite, PRAGMA key is a no-op so wrong keys silently succeed. Run with the workspace's bundled-sqlcipher-vendored-openssl feature: `cargo test -p solo-storage -- --include-ignored`"]
    fn wrong_passphrase_fails_to_open() {
        let tmp = TempDir::new().unwrap();
        let dir = tmp.path().join("solo-data");
        let outcome = init(fixture_params(&dir)).unwrap();
        let cfg = SoloConfig::read(&outcome.config_path).unwrap();
        let salt = cfg.salt_bytes().unwrap();
        let bad_key = KeyMaterial::derive("WRONG PASSPHRASE", &salt).unwrap();
        // open_sqlcipher itself only sets pragmas; the actual decryption
        // failure surfaces on the first real query.
        let conn = open_sqlcipher(&outcome.db_path, &bad_key);
        let conn = match conn {
            Ok(c) => c,
            Err(_) => return, // failed at PRAGMA stage — also acceptable.
        };
        let res: rusqlite::Result<u32> =
            conn.query_row("SELECT MAX(version) FROM schema_migrations", [], |row| {
                row.get(0)
            });
        assert!(res.is_err(), "wrong passphrase must fail to read");
    }

    #[test]
    fn second_init_without_force_refuses() {
        let tmp = TempDir::new().unwrap();
        let dir = tmp.path().join("solo-data");
        init(fixture_params(&dir)).unwrap();
        let err = init(fixture_params(&dir)).unwrap_err();
        assert!(
            matches!(err, Error::Conflict(_)),
            "expected Conflict, got {err:?}"
        );
        assert!(err.to_string().contains("already initialized"));
    }

    #[test]
    fn force_wipes_and_re_inits() {
        let tmp = TempDir::new().unwrap();
        let dir = tmp.path().join("solo-data");
        let first = init(fixture_params(&dir)).unwrap();
        let first_cfg = SoloConfig::read(&first.config_path).unwrap();

        let mut params = fixture_params(&dir);
        params.force = true;
        let second = init(params).unwrap();
        let second_cfg = SoloConfig::read(&second.config_path).unwrap();

        // A new salt should have been generated; same passphrase => different
        // derived key. Compare salts directly.
        assert_ne!(first_cfg.salt_hex, second_cfg.salt_hex);
    }

    /// Regression: SOLO_OWNED_FILES had stale HNSW filenames
    /// (`hnsw_episodes.bin`, `.graph`, `.data`, etc.) that didn't match
    /// the current snapshot module's actual output (`.hnsw.data`,
    /// `.hnsw.graph` on the live/_bak/_tmp basenames). `solo init --force`
    /// would skip wiping those files, leaving stale data after re-init.
    /// This test plants snapshot files using the current naming scheme
    /// then verifies --force removes them all.
    #[test]
    fn force_wipes_current_hnsw_snapshot_files() {
        let tmp = TempDir::new().unwrap();
        let dir = tmp.path().join("solo-data");
        let _ = init(fixture_params(&dir)).unwrap();

        // Plant snapshot files using the names snapshot.rs actually writes.
        let planted = [
            "hnsw_episodes.hnsw.data",
            "hnsw_episodes.hnsw.graph",
            "hnsw_episodes_bak.hnsw.data",
            "hnsw_episodes_bak.hnsw.graph",
            "hnsw_episodes_tmp.hnsw.data",
            "hnsw_episodes_tmp.hnsw.graph",
        ];
        for name in &planted {
            std::fs::write(dir.join(name), b"stale snapshot data").unwrap();
        }

        let mut params = fixture_params(&dir);
        params.force = true;
        let _ = init(params).unwrap();

        // All planted files must be gone after --force.
        for name in &planted {
            let p = dir.join(name);
            assert!(
                !p.exists(),
                "{} should have been wiped by --force",
                p.display()
            );
        }
    }

    #[test]
    fn empty_passphrase_rejected() {
        let tmp = TempDir::new().unwrap();
        let mut params = fixture_params(tmp.path());
        params.passphrase.clear();
        let err = init(params).unwrap_err();
        assert!(matches!(err, Error::InvalidInput(_)), "got: {err:?}");
    }

    /// Simulate a v0.7.1 install (`solo.db` + `solo.config.toml` at root,
    /// no `tenants_index.db`), then run `init()` against it. The upgrade
    /// path should kick in: `solo.db` moves into `tenants/default.db`,
    /// the registry is created with the default tenant active, and the
    /// outcome reports `upgraded_from_v071 = true`.
    #[test]
    fn init_upgrades_v071_install_in_place() {
        let tmp = TempDir::new().unwrap();
        let dir = tmp.path().join("solo-data");
        std::fs::create_dir_all(&dir).unwrap();

        // Plant a v0.7.1 layout: persist a SoloConfig with a known salt,
        // write a SQLCipher solo.db at the root that opens under the
        // passphrase + that salt.
        let passphrase = "v071-upgrade-passphrase";
        let salt = KeyMaterial::fresh_salt().unwrap();
        let key = KeyMaterial::derive(passphrase, &salt).unwrap();
        let cfg = SoloConfig::new(salt, default_embedder());
        cfg.write(&dir.join("solo.config.toml")).unwrap();
        let legacy_db = dir.join("solo.db");
        let mut conn = open_sqlcipher(&legacy_db, &key).unwrap();
        migration::run_migrations(&mut conn).unwrap();
        drop(conn);

        // Run init with the same passphrase + force=false.
        let outcome = init(InitParams {
            data_dir: dir.clone(),
            passphrase: Zeroizing::new(passphrase.into()),
            force: false,
            embedder: default_embedder(),
        })
        .unwrap();

        assert!(outcome.upgraded_from_v071);
        assert_eq!(outcome.db_path, dir.join("tenants").join("default.db"));
        assert!(outcome.db_path.is_file());
        assert!(outcome.tenants_index_path.is_file());
        // v0.9.0 P1 advanced the tenants_index chain to 9
        // (last_accessed column). Previously: 8 (quota_bytes, v0.8.1 P3).
        assert_eq!(outcome.tenants_index_schema_version, 9);
        // Legacy file gone.
        assert!(!legacy_db.exists());
        // v0.8.1 P1: per-tenant chain advances to 7 on first open
        // (migrations 0005 + 0006 + 0007 all run even on v0.7.1
        // upgrades — the audit table, principal-attribution columns,
        // and triples.source_episode_id are all additive).
        assert_eq!(outcome.schema_version, 10);
    }

    /// v0.7.1 upgrade with wrong passphrase must fail because the
    /// SQLCipher open of the migrated default.db won't decrypt.
    #[test]
    #[ignore = "requires SQLCipher: under plain bundled SQLite, PRAGMA key is a no-op so wrong keys silently succeed."]
    fn init_v071_upgrade_with_wrong_passphrase_errors() {
        let tmp = TempDir::new().unwrap();
        let dir = tmp.path().join("solo-data");
        std::fs::create_dir_all(&dir).unwrap();

        // v0.7.1 plant under the GOOD passphrase.
        let salt = KeyMaterial::fresh_salt().unwrap();
        let key = KeyMaterial::derive("right-passphrase", &salt).unwrap();
        let cfg = SoloConfig::new(salt, default_embedder());
        cfg.write(&dir.join("solo.config.toml")).unwrap();
        let legacy_db = dir.join("solo.db");
        let mut conn = open_sqlcipher(&legacy_db, &key).unwrap();
        migration::run_migrations(&mut conn).unwrap();
        drop(conn);

        // Call init() with the WRONG passphrase. The move itself may
        // succeed (rename doesn't decrypt), but the post-move smoke open
        // of the migrated default.db must fail.
        let err = init(InitParams {
            data_dir: dir,
            passphrase: Zeroizing::new("wrong-passphrase".into()),
            force: false,
            embedder: default_embedder(),
        })
        .unwrap_err();
        assert!(
            matches!(err, Error::Storage(_)),
            "wrong passphrase must surface as a Storage error, got {err:?}"
        );
    }

    // ----------------------------------------------------------------
    // v0.9.0 P1: init writes an env-detected `[llm]` default
    // ----------------------------------------------------------------

    #[test]
    fn init_writes_llm_none_when_no_env_key_present() {
        let _lock = ENV_LOCK.lock().unwrap_or_else(|p| p.into_inner());
        let _g = fresh_llm_env();
        let tmp = TempDir::new().unwrap();
        let dir = tmp.path().join("solo-data");
        let outcome = init(fixture_params(&dir)).expect("init should succeed");
        let cfg = SoloConfig::read(&outcome.config_path).unwrap();
        assert_eq!(
            cfg.llm,
            Some(LlmSettings::None),
            "no ANTHROPIC_API_KEY in env → init writes [llm] mode = \"none\""
        );
    }

    #[test]
    fn init_writes_llm_anthropic_when_env_key_present() {
        let _lock = ENV_LOCK.lock().unwrap_or_else(|p| p.into_inner());
        let _g = fresh_llm_env();
        // SAFETY: ENV_LOCK held; LlmEnvGuard cleans up on drop.
        unsafe { std::env::set_var("ANTHROPIC_API_KEY", "sk-ant-test-fixture") };
        let tmp = TempDir::new().unwrap();
        let dir = tmp.path().join("solo-data");
        let outcome = init(fixture_params(&dir)).expect("init should succeed");
        let cfg = SoloConfig::read(&outcome.config_path).unwrap();
        match cfg.llm {
            Some(LlmSettings::Anthropic {
                ref api_key_env,
                ref model,
            }) => {
                assert_eq!(api_key_env, "ANTHROPIC_API_KEY");
                assert_eq!(model, "claude-sonnet-4-6");
            }
            other => panic!("expected Anthropic variant from env-detected default, got {other:?}"),
        }
    }

    #[test]
    fn default_llm_settings_from_env_picks_anthropic_when_set() {
        let _lock = ENV_LOCK.lock().unwrap_or_else(|p| p.into_inner());
        let _g = fresh_llm_env();
        unsafe { std::env::set_var("ANTHROPIC_API_KEY", "sk-ant-fixture") };
        match default_llm_settings_from_env() {
            LlmSettings::Anthropic { api_key_env, .. } => {
                assert_eq!(api_key_env, "ANTHROPIC_API_KEY");
            }
            other => panic!("expected Anthropic, got {other:?}"),
        }
    }

    #[test]
    fn default_llm_settings_from_env_returns_none_when_empty_value() {
        let _lock = ENV_LOCK.lock().unwrap_or_else(|p| p.into_inner());
        let _g = fresh_llm_env();
        // Empty value should NOT trigger Anthropic — operators sometimes
        // set vars to "" to mean "leave default" (especially under bash
        // / nix-shell). Mirrors the env_trimmed helper in solo-steward.
        unsafe { std::env::set_var("ANTHROPIC_API_KEY", "") };
        assert_eq!(default_llm_settings_from_env(), LlmSettings::None);
    }

    #[test]
    fn default_llm_settings_from_env_returns_none_when_unset() {
        let _lock = ENV_LOCK.lock().unwrap_or_else(|p| p.into_inner());
        let _g = fresh_llm_env();
        assert_eq!(default_llm_settings_from_env(), LlmSettings::None);
    }

    #[test]
    fn cloud_sync_path_rejected() {
        // We don't actually create files; validate_data_dir runs first.
        let placeholder = std::env::temp_dir().join("solo-init-cloud-test");
        let mut params = fixture_params(&placeholder);
        // Force a cloud-sync component into the path. Must be absolute on
        // both Unix and Windows so validate_data_dir's absolute-path check
        // doesn't short-circuit before the cloud-sync check we want to
        // exercise.
        #[cfg(windows)]
        let cloud = std::path::PathBuf::from(r"C:\Users\x\Dropbox\solo");
        #[cfg(not(windows))]
        let cloud = std::path::PathBuf::from("/Users/x/Dropbox/solo");
        params.data_dir = cloud;
        let err = init(params).unwrap_err();
        assert!(err.to_string().contains("cloud-sync"), "got: {err}");
    }
}