sqlite_graphrag/commands/
init.rs1use crate::errors::AppError;
4use crate::output;
5use crate::paths::AppPaths;
6use crate::pragmas::{apply_init_pragmas, ensure_wal_mode};
7use crate::storage::connection::open_rw;
8use serde::Serialize;
9
10#[derive(Copy, Clone, Debug, PartialEq, Eq, clap::ValueEnum)]
16pub enum EmbeddingModelChoice {
17 #[value(name = "multilingual-e5-small")]
18 MultilingualE5Small,
19}
20
21#[derive(clap::Args)]
22#[command(after_long_help = "EXAMPLES:\n \
23 # Initialize a new database in the current directory\n \
24 sqlite-graphrag init\n\n \
25 # Initialize with a specific namespace\n \
26 sqlite-graphrag init --namespace my-project\n\n \
27 # Initialize at a custom database path\n \
28 sqlite-graphrag init --db /path/to/graphrag.sqlite")]
29pub struct InitArgs {
30 #[arg(long, env = "SQLITE_GRAPHRAG_DB_PATH")]
34 pub db: Option<String>,
35 #[arg(long, value_enum)]
38 pub model: Option<EmbeddingModelChoice>,
39 #[arg(long)]
42 pub force: bool,
43 #[arg(long)]
46 pub namespace: Option<String>,
47 #[arg(long, hide = true, help = "No-op; JSON is always emitted on stdout")]
48 pub json: bool,
49}
50
51#[derive(Serialize)]
52struct InitResponse {
53 db_path: String,
54 schema_version: u32,
57 model: String,
58 dim: usize,
59 namespace: String,
61 status: String,
62 elapsed_ms: u64,
64}
65
66pub fn run(
67 args: InitArgs,
68 llm_backend: crate::cli::LlmBackendChoice,
69 embedding_backend: crate::cli::EmbeddingBackendChoice,
70) -> Result<(), AppError> {
71 let start = std::time::Instant::now();
72 let paths = AppPaths::resolve(args.db.as_deref())?;
73 paths.ensure_dirs()?;
74
75 let namespace = crate::namespace::resolve_namespace(args.namespace.as_deref())?;
76
77 let mut conn = open_rw(&paths.db)?;
78
79 apply_init_pragmas(&conn)?;
80
81 crate::migrations::runner()
82 .run(&mut conn)
83 .map_err(|e| AppError::Internal(anyhow::anyhow!("migration failed: {e}")))?;
84
85 conn.execute_batch(&format!(
86 "PRAGMA user_version = {};",
87 crate::constants::SCHEMA_USER_VERSION
88 ))?;
89
90 ensure_wal_mode(&conn)?;
92
93 let schema_version = latest_schema_version(&conn)?;
94
95 conn.execute(
96 "INSERT OR REPLACE INTO schema_meta (key, value) VALUES ('schema_version', ?1)",
97 rusqlite::params![schema_version],
98 )?;
99 conn.execute(
100 "INSERT OR REPLACE INTO schema_meta (key, value) VALUES ('model', ?1)",
101 rusqlite::params![crate::constants::SQLITE_GRAPHRAG_VERSION],
102 )?;
103 conn.execute(
108 "INSERT OR IGNORE INTO schema_meta (key, value) VALUES ('dim', ?1)",
109 rusqlite::params![crate::constants::embedding_dim().to_string()],
110 )?;
111 conn.execute(
112 "INSERT OR REPLACE INTO schema_meta (key, value) VALUES ('created_at', CAST(unixepoch() AS TEXT))",
113 [],
114 )?;
115 conn.execute(
116 "INSERT OR REPLACE INTO schema_meta (key, value) VALUES ('sqlite-graphrag_version', ?1)",
117 rusqlite::params![crate::constants::SQLITE_GRAPHRAG_VERSION],
118 )?;
119 conn.execute(
121 "INSERT OR REPLACE INTO schema_meta (key, value) VALUES ('namespace_initial', ?1)",
122 rusqlite::params![namespace],
123 )?;
124
125 output::emit_progress_i18n(
126 "Validating embedding backend...",
127 "Validando backend de embedding...",
128 );
129
130 let (dim, status) = match crate::embedder::embed_passage_with_embedding_choice(
137 &paths.models,
138 "smoke test",
139 embedding_backend,
140 llm_backend,
141 ) {
142 Ok((v, _backend)) => (v.len(), "ok"),
143 Err(crate::errors::AppError::Validation(msg)) => {
144 return Err(crate::errors::AppError::Validation(msg))
145 }
146 Err(e) => {
147 tracing::warn!(target: "init", error = %e, "embedding smoke test failed; init continues without LLM validation");
148 (crate::constants::embedding_dim(), "ok_no_embedding")
149 }
150 };
151
152 output::emit_json(&InitResponse {
153 db_path: paths.db.display().to_string(),
154 schema_version,
155 model: crate::constants::SQLITE_GRAPHRAG_VERSION.to_string(),
156 dim,
157 namespace,
158 status: status.to_string(),
159 elapsed_ms: start.elapsed().as_millis() as u64,
160 })?;
161
162 Ok(())
163}
164
165fn latest_schema_version(conn: &rusqlite::Connection) -> Result<u32, AppError> {
166 match conn.query_row(
167 "SELECT version FROM refinery_schema_history ORDER BY version DESC LIMIT 1",
168 [],
169 |row| row.get::<_, i64>(0),
170 ) {
171 Ok(version) => Ok(version.max(0) as u32),
172 Err(rusqlite::Error::QueryReturnedNoRows) => Ok(0),
173 Err(err) => Err(AppError::Database(err)),
174 }
175}
176
177#[cfg(test)]
178mod tests {
179 use super::*;
180
181 #[test]
182 fn init_response_serializes_all_fields() {
183 let resp = InitResponse {
184 db_path: "/tmp/test.sqlite".to_string(),
185 schema_version: 6,
186 model: crate::constants::SQLITE_GRAPHRAG_VERSION.to_string(),
187 dim: 384,
188 namespace: "global".to_string(),
189 status: "ok".to_string(),
190 elapsed_ms: 100,
191 };
192 let json = serde_json::to_value(&resp).expect("serialization failed");
193 assert_eq!(json["db_path"], "/tmp/test.sqlite");
194 assert_eq!(json["schema_version"], 6);
195 assert_eq!(json["model"], crate::constants::SQLITE_GRAPHRAG_VERSION);
196 assert_eq!(json["dim"], 384usize);
197 assert_eq!(json["namespace"], "global");
198 assert_eq!(json["status"], "ok");
199 assert!(json["elapsed_ms"].is_number());
200 }
201
202 #[test]
203 fn latest_schema_version_returns_zero_for_empty_db() {
204 let conn = rusqlite::Connection::open_in_memory().expect("failed to open in-memory db");
205 conn.execute_batch("CREATE TABLE refinery_schema_history (version INTEGER NOT NULL);")
206 .expect("failed to create table");
207
208 let version = latest_schema_version(&conn).expect("latest_schema_version failed");
209 assert_eq!(version, 0u32, "empty db must return schema_version 0");
210 }
211
212 #[test]
213 fn latest_schema_version_returns_max_version() {
214 let conn = rusqlite::Connection::open_in_memory().expect("failed to open in-memory db");
215 conn.execute_batch(
216 "CREATE TABLE refinery_schema_history (version INTEGER NOT NULL);
217 INSERT INTO refinery_schema_history VALUES (1);
218 INSERT INTO refinery_schema_history VALUES (3);
219 INSERT INTO refinery_schema_history VALUES (2);",
220 )
221 .expect("failed to populate table");
222
223 let version = latest_schema_version(&conn).expect("latest_schema_version failed");
224 assert_eq!(version, 3u32, "must return the highest version present");
225 }
226
227 #[test]
228 fn init_default_dim_is_64() {
229 assert_eq!(
233 crate::constants::DEFAULT_EMBEDDING_DIM,
234 64,
235 "default dim must be 64 in the LLM-only build"
236 );
237 }
238
239 #[test]
240 fn init_response_namespace_aligned_with_schema() {
241 let resp = InitResponse {
243 db_path: "/tmp/x.sqlite".to_string(),
244 schema_version: 6,
245 model: crate::constants::SQLITE_GRAPHRAG_VERSION.to_string(),
246 dim: 384,
247 namespace: "my-project".to_string(),
248 status: "ok".to_string(),
249 elapsed_ms: 0,
250 };
251 let json = serde_json::to_value(&resp).expect("serialization failed");
252 assert_eq!(json["namespace"], "my-project");
253 }
254}