Skip to main content

sqlite_graphrag/commands/
init.rs

1//! Handler for the `init` CLI subcommand.
2
3use crate::errors::AppError;
4use crate::output;
5use crate::paths::AppPaths;
6use crate::pragmas::{apply_init_pragmas, ensure_wal_mode};
7use crate::storage::connection::open_rw;
8use serde::Serialize;
9
10/// Embedding model choices exposed through `--model`.
11///
12/// Currently only `multilingual-e5-small` is supported. Additional variants
13/// will be added here as new models are integrated; the `value_enum` derive
14/// ensures the CLI rejects unknown strings at parse time rather than at runtime.
15#[derive(Copy, Clone, Debug, PartialEq, Eq, clap::ValueEnum)]
16pub enum EmbeddingModelChoice {
17    #[value(name = "multilingual-e5-small")]
18    MultilingualE5Small,
19}
20
21#[derive(clap::Args)]
22pub struct InitArgs {
23    /// Path to graphrag.sqlite. Defaults to `./graphrag.sqlite` in the current directory.
24    /// Resolution precedence (highest to lowest): `--db` flag > `SQLITE_GRAPHRAG_DB_PATH` env >
25    /// `SQLITE_GRAPHRAG_HOME` env (used as base directory) > cwd.
26    #[arg(long, env = "SQLITE_GRAPHRAG_DB_PATH")]
27    pub db: Option<String>,
28    /// Embedding model identifier. Currently only `multilingual-e5-small` is supported.
29    /// Reserved for future multi-model support; safe to omit.
30    #[arg(long, value_enum)]
31    pub model: Option<EmbeddingModelChoice>,
32    /// Force re-initialization, overwriting any existing schema metadata.
33    /// Use only when the schema is corrupted; loses configuration but preserves data.
34    #[arg(long)]
35    pub force: bool,
36    /// Initial namespace to resolve. Aligned with bilingual docs that mention `init --namespace`.
37    /// When provided, overrides `SQLITE_GRAPHRAG_NAMESPACE`; otherwise resolves via env or fallback `global`.
38    #[arg(long)]
39    pub namespace: Option<String>,
40    #[arg(long, hide = true, help = "No-op; JSON is always emitted on stdout")]
41    pub json: bool,
42}
43
44#[derive(Serialize)]
45struct InitResponse {
46    db_path: String,
47    /// Latest applied migration number from `refinery_schema_history`.
48    /// Emitted as a JSON number for cross-command consistency with `health` and `stats` (since v1.0.35).
49    schema_version: u32,
50    model: String,
51    dim: usize,
52    /// Active namespace resolved during initialisation, aligned with the bilingual docs.
53    namespace: String,
54    status: String,
55    /// Total execution time in milliseconds from handler start to serialisation.
56    elapsed_ms: u64,
57}
58
59pub fn run(args: InitArgs) -> Result<(), AppError> {
60    let start = std::time::Instant::now();
61    let paths = AppPaths::resolve(args.db.as_deref())?;
62    paths.ensure_dirs()?;
63
64    let namespace = crate::namespace::resolve_namespace(args.namespace.as_deref())?;
65
66    let mut conn = open_rw(&paths.db)?;
67
68    apply_init_pragmas(&conn)?;
69
70    crate::migrations::runner()
71        .run(&mut conn)
72        .map_err(|e| AppError::Internal(anyhow::anyhow!("migration failed: {e}")))?;
73
74    conn.execute_batch(&format!(
75        "PRAGMA user_version = {};",
76        crate::constants::SCHEMA_USER_VERSION
77    ))?;
78
79    // Defensive re-assertion: refinery may revert journal_mode during migrations.
80    ensure_wal_mode(&conn)?;
81
82    let schema_version = latest_schema_version(&conn)?;
83
84    conn.execute(
85        "INSERT OR REPLACE INTO schema_meta (key, value) VALUES ('schema_version', ?1)",
86        rusqlite::params![schema_version],
87    )?;
88    conn.execute(
89        "INSERT OR REPLACE INTO schema_meta (key, value) VALUES ('model', 'multilingual-e5-small')",
90        [],
91    )?;
92    conn.execute(
93        "INSERT OR REPLACE INTO schema_meta (key, value) VALUES ('dim', '384')",
94        [],
95    )?;
96    conn.execute(
97        "INSERT OR REPLACE INTO schema_meta (key, value) VALUES ('created_at', CAST(unixepoch() AS TEXT))",
98        [],
99    )?;
100    conn.execute(
101        "INSERT OR REPLACE INTO schema_meta (key, value) VALUES ('sqlite-graphrag_version', ?1)",
102        rusqlite::params![crate::constants::SQLITE_GRAPHRAG_VERSION],
103    )?;
104    // Persist the resolved namespace so downstream tools can inspect it without re-resolving.
105    conn.execute(
106        "INSERT OR REPLACE INTO schema_meta (key, value) VALUES ('namespace_initial', ?1)",
107        rusqlite::params![namespace],
108    )?;
109
110    output::emit_progress_i18n(
111        "Initializing embedding model (may download on first run)...",
112        crate::i18n::validation::runtime_pt::initializing_embedding_model(),
113    );
114
115    let test_emb = crate::daemon::embed_passage_or_local(&paths.models, "smoke test")?;
116
117    output::emit_json(&InitResponse {
118        db_path: paths.db.display().to_string(),
119        schema_version,
120        model: "multilingual-e5-small".to_string(),
121        dim: test_emb.len(),
122        namespace,
123        status: "ok".to_string(),
124        elapsed_ms: start.elapsed().as_millis() as u64,
125    })?;
126
127    Ok(())
128}
129
130fn latest_schema_version(conn: &rusqlite::Connection) -> Result<u32, AppError> {
131    match conn.query_row(
132        "SELECT version FROM refinery_schema_history ORDER BY version DESC LIMIT 1",
133        [],
134        |row| row.get::<_, i64>(0),
135    ) {
136        Ok(version) => Ok(version.max(0) as u32),
137        Err(rusqlite::Error::QueryReturnedNoRows) => Ok(0),
138        Err(err) => Err(AppError::Database(err)),
139    }
140}
141
142#[cfg(test)]
143mod tests {
144    use super::*;
145
146    #[test]
147    fn init_response_serializes_all_fields() {
148        let resp = InitResponse {
149            db_path: "/tmp/test.sqlite".to_string(),
150            schema_version: 6,
151            model: "multilingual-e5-small".to_string(),
152            dim: 384,
153            namespace: "global".to_string(),
154            status: "ok".to_string(),
155            elapsed_ms: 100,
156        };
157        let json = serde_json::to_value(&resp).expect("serialization failed");
158        assert_eq!(json["db_path"], "/tmp/test.sqlite");
159        assert_eq!(json["schema_version"], 6);
160        assert_eq!(json["model"], "multilingual-e5-small");
161        assert_eq!(json["dim"], 384usize);
162        assert_eq!(json["namespace"], "global");
163        assert_eq!(json["status"], "ok");
164        assert!(json["elapsed_ms"].is_number());
165    }
166
167    #[test]
168    fn latest_schema_version_returns_zero_for_empty_db() {
169        let conn = rusqlite::Connection::open_in_memory().expect("failed to open in-memory db");
170        conn.execute_batch("CREATE TABLE refinery_schema_history (version INTEGER NOT NULL);")
171            .expect("failed to create table");
172
173        let version = latest_schema_version(&conn).expect("latest_schema_version failed");
174        assert_eq!(version, 0u32, "empty db must return schema_version 0");
175    }
176
177    #[test]
178    fn latest_schema_version_returns_max_version() {
179        let conn = rusqlite::Connection::open_in_memory().expect("failed to open in-memory db");
180        conn.execute_batch(
181            "CREATE TABLE refinery_schema_history (version INTEGER NOT NULL);
182             INSERT INTO refinery_schema_history VALUES (1);
183             INSERT INTO refinery_schema_history VALUES (3);
184             INSERT INTO refinery_schema_history VALUES (2);",
185        )
186        .expect("failed to populate table");
187
188        let version = latest_schema_version(&conn).expect("latest_schema_version failed");
189        assert_eq!(version, 3u32, "must return the highest version present");
190    }
191
192    #[test]
193    fn init_response_dim_aligned_with_constant() {
194        assert_eq!(
195            crate::constants::EMBEDDING_DIM,
196            384,
197            "dim must be aligned with EMBEDDING_DIM=384"
198        );
199    }
200
201    #[test]
202    fn init_response_namespace_aligned_with_schema() {
203        // Verify namespace field survives round-trip serialization with correct value.
204        let resp = InitResponse {
205            db_path: "/tmp/x.sqlite".to_string(),
206            schema_version: 6,
207            model: "multilingual-e5-small".to_string(),
208            dim: 384,
209            namespace: "my-project".to_string(),
210            status: "ok".to_string(),
211            elapsed_ms: 0,
212        };
213        let json = serde_json::to_value(&resp).expect("serialization failed");
214        assert_eq!(json["namespace"], "my-project");
215    }
216}