Skip to main content

sqlite_graphrag/commands/
init.rs

1//! Handler for the `init` CLI subcommand.
2
3use crate::errors::AppError;
4use crate::output;
5use crate::paths::AppPaths;
6use crate::pragmas::apply_init_pragmas;
7use crate::storage::connection::open_rw;
8use serde::Serialize;
9
10/// Embedding model choices exposed through `--model`.
11///
12/// Currently only `multilingual-e5-small` is supported. Additional variants
13/// will be added here as new models are integrated; the `value_enum` derive
14/// ensures the CLI rejects unknown strings at parse time rather than at runtime.
15#[derive(Copy, Clone, Debug, PartialEq, Eq, clap::ValueEnum)]
16pub enum EmbeddingModelChoice {
17    #[value(name = "multilingual-e5-small")]
18    MultilingualE5Small,
19}
20
21#[derive(clap::Args)]
22pub struct InitArgs {
23    /// Path to graphrag.sqlite. Defaults to `./graphrag.sqlite` in the current directory.
24    /// Resolution precedence (highest to lowest): `--db` flag > `SQLITE_GRAPHRAG_DB_PATH` env >
25    /// `SQLITE_GRAPHRAG_HOME` env (used as base directory) > cwd.
26    #[arg(long, env = "SQLITE_GRAPHRAG_DB_PATH")]
27    pub db: Option<String>,
28    /// Embedding model identifier. Currently only `multilingual-e5-small` is supported.
29    /// Reserved for future multi-model support; safe to omit.
30    #[arg(long, value_enum)]
31    pub model: Option<EmbeddingModelChoice>,
32    /// Force re-initialization, overwriting any existing schema metadata.
33    /// Use only when the schema is corrupted; loses configuration but preserves data.
34    #[arg(long)]
35    pub force: bool,
36    /// Initial namespace to resolve. Aligned with bilingual docs that mention `init --namespace`.
37    /// When provided, overrides `SQLITE_GRAPHRAG_NAMESPACE`; otherwise resolves via env or fallback `global`.
38    #[arg(long)]
39    pub namespace: Option<String>,
40    #[arg(long, help = "No-op; JSON is always emitted on stdout")]
41    pub json: bool,
42}
43
44#[derive(Serialize)]
45struct InitResponse {
46    db_path: String,
47    schema_version: String,
48    model: String,
49    dim: usize,
50    /// Active namespace resolved during initialisation, aligned with the bilingual docs.
51    namespace: String,
52    status: String,
53    /// Total execution time in milliseconds from handler start to serialisation.
54    elapsed_ms: u64,
55}
56
57pub fn run(args: InitArgs) -> Result<(), AppError> {
58    let inicio = std::time::Instant::now();
59    let paths = AppPaths::resolve(args.db.as_deref())?;
60    paths.ensure_dirs()?;
61
62    let namespace = crate::namespace::resolve_namespace(args.namespace.as_deref())?;
63
64    let mut conn = open_rw(&paths.db)?;
65
66    apply_init_pragmas(&conn)?;
67
68    crate::migrations::runner()
69        .run(&mut conn)
70        .map_err(|e| AppError::Internal(anyhow::anyhow!("migration failed: {e}")))?;
71
72    conn.execute_batch(&format!(
73        "PRAGMA user_version = {};",
74        crate::constants::SCHEMA_USER_VERSION
75    ))?;
76
77    let schema_version = latest_schema_version(&conn)?;
78
79    conn.execute(
80        "INSERT OR REPLACE INTO schema_meta (key, value) VALUES ('schema_version', ?1)",
81        rusqlite::params![schema_version],
82    )?;
83    conn.execute(
84        "INSERT OR REPLACE INTO schema_meta (key, value) VALUES ('model', 'multilingual-e5-small')",
85        [],
86    )?;
87    conn.execute(
88        "INSERT OR REPLACE INTO schema_meta (key, value) VALUES ('dim', '384')",
89        [],
90    )?;
91    conn.execute(
92        "INSERT OR REPLACE INTO schema_meta (key, value) VALUES ('created_at', CAST(unixepoch() AS TEXT))",
93        [],
94    )?;
95    conn.execute(
96        "INSERT OR REPLACE INTO schema_meta (key, value) VALUES ('sqlite-graphrag_version', ?1)",
97        rusqlite::params![crate::constants::SQLITE_GRAPHRAG_VERSION],
98    )?;
99    // Persist the resolved namespace so downstream tools can inspect it without re-resolving.
100    conn.execute(
101        "INSERT OR REPLACE INTO schema_meta (key, value) VALUES ('namespace_initial', ?1)",
102        rusqlite::params![namespace],
103    )?;
104
105    output::emit_progress_i18n(
106        "Initializing embedding model (may download on first run)...",
107        "Inicializando modelo de embedding (pode baixar na primeira execução)...",
108    );
109
110    let test_emb = crate::daemon::embed_passage_or_local(&paths.models, "smoke test")?;
111
112    output::emit_json(&InitResponse {
113        db_path: paths.db.display().to_string(),
114        schema_version,
115        model: "multilingual-e5-small".to_string(),
116        dim: test_emb.len(),
117        namespace,
118        status: "ok".to_string(),
119        elapsed_ms: inicio.elapsed().as_millis() as u64,
120    })?;
121
122    Ok(())
123}
124
125fn latest_schema_version(conn: &rusqlite::Connection) -> Result<String, AppError> {
126    match conn.query_row(
127        "SELECT version FROM refinery_schema_history ORDER BY version DESC LIMIT 1",
128        [],
129        |row| row.get::<_, i64>(0),
130    ) {
131        Ok(version) => Ok(version.to_string()),
132        Err(rusqlite::Error::QueryReturnedNoRows) => Ok("0".to_string()),
133        Err(err) => Err(AppError::Database(err)),
134    }
135}
136
137#[cfg(test)]
138mod tests {
139    use super::*;
140
141    #[test]
142    fn init_response_serializa_todos_campos() {
143        let resp = InitResponse {
144            db_path: "/tmp/test.sqlite".to_string(),
145            schema_version: "6".to_string(),
146            model: "multilingual-e5-small".to_string(),
147            dim: 384,
148            namespace: "global".to_string(),
149            status: "ok".to_string(),
150            elapsed_ms: 100,
151        };
152        let json = serde_json::to_value(&resp).expect("serialização falhou");
153        assert_eq!(json["db_path"], "/tmp/test.sqlite");
154        assert_eq!(json["schema_version"], "6");
155        assert_eq!(json["model"], "multilingual-e5-small");
156        assert_eq!(json["dim"], 384usize);
157        assert_eq!(json["namespace"], "global");
158        assert_eq!(json["status"], "ok");
159        assert!(json["elapsed_ms"].is_number());
160    }
161
162    #[test]
163    fn latest_schema_version_retorna_zero_para_banco_vazio() {
164        let conn = rusqlite::Connection::open_in_memory().expect("falha ao abrir banco em memória");
165        conn.execute_batch("CREATE TABLE refinery_schema_history (version INTEGER NOT NULL);")
166            .expect("falha ao criar tabela");
167
168        let versao = latest_schema_version(&conn).expect("latest_schema_version falhou");
169        assert_eq!(versao, "0", "banco vazio deve retornar schema_version '0'");
170    }
171
172    #[test]
173    fn latest_schema_version_retorna_versao_maxima() {
174        let conn = rusqlite::Connection::open_in_memory().expect("falha ao abrir banco em memória");
175        conn.execute_batch(
176            "CREATE TABLE refinery_schema_history (version INTEGER NOT NULL);
177             INSERT INTO refinery_schema_history VALUES (1);
178             INSERT INTO refinery_schema_history VALUES (3);
179             INSERT INTO refinery_schema_history VALUES (2);",
180        )
181        .expect("falha ao popular tabela");
182
183        let versao = latest_schema_version(&conn).expect("latest_schema_version falhou");
184        assert_eq!(versao, "3", "deve retornar a maior versão presente");
185    }
186
187    #[test]
188    fn init_response_dim_alinhado_com_constante() {
189        assert_eq!(
190            crate::constants::EMBEDDING_DIM,
191            384,
192            "dim deve estar alinhado com EMBEDDING_DIM=384"
193        );
194    }
195
196    #[test]
197    fn init_response_namespace_alinhado_com_schema() {
198        // Verify namespace field survives round-trip serialization with correct value.
199        let resp = InitResponse {
200            db_path: "/tmp/x.sqlite".to_string(),
201            schema_version: "6".to_string(),
202            model: "multilingual-e5-small".to_string(),
203            dim: 384,
204            namespace: "meu-projeto".to_string(),
205            status: "ok".to_string(),
206            elapsed_ms: 0,
207        };
208        let json = serde_json::to_value(&resp).expect("serialização falhou");
209        assert_eq!(json["namespace"], "meu-projeto");
210    }
211}