Skip to main content

sqlite_graphrag/commands/
init.rs

1use crate::errors::AppError;
2use crate::output;
3use crate::paths::AppPaths;
4use crate::pragmas::apply_init_pragmas;
5use crate::storage::connection::open_rw;
6use serde::Serialize;
7
8/// Embedding model choices exposed through `--model`.
9///
10/// Currently only `multilingual-e5-small` is supported. Additional variants
11/// will be added here as new models are integrated; the `value_enum` derive
12/// ensures the CLI rejects unknown strings at parse time rather than at runtime.
13#[derive(Copy, Clone, Debug, PartialEq, Eq, clap::ValueEnum)]
14pub enum EmbeddingModelChoice {
15    #[value(name = "multilingual-e5-small")]
16    MultilingualE5Small,
17}
18
19#[derive(clap::Args)]
20pub struct InitArgs {
21    /// Path to graphrag.sqlite. Defaults to `./graphrag.sqlite` in the current directory.
22    /// Resolution precedence (highest to lowest): `--db` flag > `SQLITE_GRAPHRAG_DB_PATH` env >
23    /// `SQLITE_GRAPHRAG_HOME` env (used as base directory) > cwd.
24    #[arg(long, env = "SQLITE_GRAPHRAG_DB_PATH")]
25    pub db: Option<String>,
26    /// Embedding model identifier. Currently only `multilingual-e5-small` is supported.
27    /// Reserved for future multi-model support; safe to omit.
28    #[arg(long, value_enum)]
29    pub model: Option<EmbeddingModelChoice>,
30    /// Force re-initialization, overwriting any existing schema metadata.
31    /// Use only when the schema is corrupted; loses configuration but preserves data.
32    #[arg(long)]
33    pub force: bool,
34    /// Initial namespace to resolve. Aligned with bilingual docs that mention `init --namespace`.
35    /// When provided, overrides `SQLITE_GRAPHRAG_NAMESPACE`; otherwise resolves via env or fallback `global`.
36    #[arg(long)]
37    pub namespace: Option<String>,
38    #[arg(long, help = "No-op; JSON is always emitted on stdout")]
39    pub json: bool,
40}
41
42#[derive(Serialize)]
43struct InitResponse {
44    db_path: String,
45    schema_version: String,
46    model: String,
47    dim: usize,
48    /// Namespace ativo resolvido durante a inicialização, alinhado à doc bilíngue.
49    namespace: String,
50    status: String,
51    /// Tempo total de execução em milissegundos desde início do handler até serialização.
52    elapsed_ms: u64,
53}
54
55pub fn run(args: InitArgs) -> Result<(), AppError> {
56    let inicio = std::time::Instant::now();
57    let paths = AppPaths::resolve(args.db.as_deref())?;
58    paths.ensure_dirs()?;
59
60    let namespace = crate::namespace::resolve_namespace(args.namespace.as_deref())?;
61
62    let mut conn = open_rw(&paths.db)?;
63
64    apply_init_pragmas(&conn)?;
65
66    crate::migrations::runner()
67        .run(&mut conn)
68        .map_err(|e| AppError::Internal(anyhow::anyhow!("migration failed: {e}")))?;
69
70    conn.execute_batch(&format!(
71        "PRAGMA user_version = {};",
72        crate::constants::SCHEMA_USER_VERSION
73    ))?;
74
75    let schema_version = latest_schema_version(&conn)?;
76
77    conn.execute(
78        "INSERT OR REPLACE INTO schema_meta (key, value) VALUES ('schema_version', ?1)",
79        rusqlite::params![schema_version],
80    )?;
81    conn.execute(
82        "INSERT OR REPLACE INTO schema_meta (key, value) VALUES ('model', 'multilingual-e5-small')",
83        [],
84    )?;
85    conn.execute(
86        "INSERT OR REPLACE INTO schema_meta (key, value) VALUES ('dim', '384')",
87        [],
88    )?;
89    conn.execute(
90        "INSERT OR REPLACE INTO schema_meta (key, value) VALUES ('created_at', CAST(unixepoch() AS TEXT))",
91        [],
92    )?;
93    conn.execute(
94        "INSERT OR REPLACE INTO schema_meta (key, value) VALUES ('sqlite-graphrag_version', ?1)",
95        rusqlite::params![crate::constants::SQLITE_GRAPHRAG_VERSION],
96    )?;
97    // Persist the resolved namespace so downstream tools can inspect it without re-resolving.
98    conn.execute(
99        "INSERT OR REPLACE INTO schema_meta (key, value) VALUES ('namespace_initial', ?1)",
100        rusqlite::params![namespace],
101    )?;
102
103    output::emit_progress_i18n(
104        "Initializing embedding model (may download on first run)...",
105        "Inicializando modelo de embedding (pode baixar na primeira execução)...",
106    );
107
108    let test_emb = crate::daemon::embed_passage_or_local(&paths.models, "smoke test")?;
109
110    output::emit_json(&InitResponse {
111        db_path: paths.db.display().to_string(),
112        schema_version,
113        model: "multilingual-e5-small".to_string(),
114        dim: test_emb.len(),
115        namespace,
116        status: "ok".to_string(),
117        elapsed_ms: inicio.elapsed().as_millis() as u64,
118    })?;
119
120    Ok(())
121}
122
123fn latest_schema_version(conn: &rusqlite::Connection) -> Result<String, AppError> {
124    match conn.query_row(
125        "SELECT version FROM refinery_schema_history ORDER BY version DESC LIMIT 1",
126        [],
127        |row| row.get::<_, i64>(0),
128    ) {
129        Ok(version) => Ok(version.to_string()),
130        Err(rusqlite::Error::QueryReturnedNoRows) => Ok("0".to_string()),
131        Err(err) => Err(AppError::Database(err)),
132    }
133}
134
135#[cfg(test)]
136mod testes {
137    use super::*;
138
139    #[test]
140    fn init_response_serializa_todos_campos() {
141        let resp = InitResponse {
142            db_path: "/tmp/test.sqlite".to_string(),
143            schema_version: "6".to_string(),
144            model: "multilingual-e5-small".to_string(),
145            dim: 384,
146            namespace: "global".to_string(),
147            status: "ok".to_string(),
148            elapsed_ms: 100,
149        };
150        let json = serde_json::to_value(&resp).expect("serialização falhou");
151        assert_eq!(json["db_path"], "/tmp/test.sqlite");
152        assert_eq!(json["schema_version"], "6");
153        assert_eq!(json["model"], "multilingual-e5-small");
154        assert_eq!(json["dim"], 384usize);
155        assert_eq!(json["namespace"], "global");
156        assert_eq!(json["status"], "ok");
157        assert!(json["elapsed_ms"].is_number());
158    }
159
160    #[test]
161    fn latest_schema_version_retorna_zero_para_banco_vazio() {
162        let conn = rusqlite::Connection::open_in_memory().expect("falha ao abrir banco em memória");
163        conn.execute_batch("CREATE TABLE refinery_schema_history (version INTEGER NOT NULL);")
164            .expect("falha ao criar tabela");
165
166        let versao = latest_schema_version(&conn).expect("latest_schema_version falhou");
167        assert_eq!(versao, "0", "banco vazio deve retornar schema_version '0'");
168    }
169
170    #[test]
171    fn latest_schema_version_retorna_versao_maxima() {
172        let conn = rusqlite::Connection::open_in_memory().expect("falha ao abrir banco em memória");
173        conn.execute_batch(
174            "CREATE TABLE refinery_schema_history (version INTEGER NOT NULL);
175             INSERT INTO refinery_schema_history VALUES (1);
176             INSERT INTO refinery_schema_history VALUES (3);
177             INSERT INTO refinery_schema_history VALUES (2);",
178        )
179        .expect("falha ao popular tabela");
180
181        let versao = latest_schema_version(&conn).expect("latest_schema_version falhou");
182        assert_eq!(versao, "3", "deve retornar a maior versão presente");
183    }
184
185    #[test]
186    fn init_response_dim_alinhado_com_constante() {
187        assert_eq!(
188            crate::constants::EMBEDDING_DIM,
189            384,
190            "dim deve estar alinhado com EMBEDDING_DIM=384"
191        );
192    }
193
194    #[test]
195    fn init_response_namespace_alinhado_com_schema() {
196        // Verify namespace field survives round-trip serialization with correct value.
197        let resp = InitResponse {
198            db_path: "/tmp/x.sqlite".to_string(),
199            schema_version: "6".to_string(),
200            model: "multilingual-e5-small".to_string(),
201            dim: 384,
202            namespace: "meu-projeto".to_string(),
203            status: "ok".to_string(),
204            elapsed_ms: 0,
205        };
206        let json = serde_json::to_value(&resp).expect("serialização falhou");
207        assert_eq!(json["namespace"], "meu-projeto");
208    }
209}