sqlite_graphrag/commands/
init.rs1use crate::errors::AppError;
4use crate::output;
5use crate::paths::AppPaths;
6use crate::pragmas::{apply_init_pragmas, ensure_wal_mode};
7use crate::storage::connection::open_rw;
8use serde::Serialize;
9
10#[derive(Copy, Clone, Debug, PartialEq, Eq, clap::ValueEnum)]
16pub enum EmbeddingModelChoice {
17 #[value(name = "multilingual-e5-small")]
18 MultilingualE5Small,
19}
20
21#[derive(clap::Args)]
22#[command(after_long_help = "EXAMPLES:\n \
23 # Initialize a new database in the current directory\n \
24 sqlite-graphrag init\n\n \
25 # Initialize with a specific namespace\n \
26 sqlite-graphrag init --namespace my-project\n\n \
27 # Initialize at a custom database path\n \
28 sqlite-graphrag init --db /path/to/graphrag.sqlite")]
29pub struct InitArgs {
30 #[arg(long, env = "SQLITE_GRAPHRAG_DB_PATH")]
34 pub db: Option<String>,
35 #[arg(long, value_enum)]
38 pub model: Option<EmbeddingModelChoice>,
39 #[arg(long)]
42 pub force: bool,
43 #[arg(long)]
46 pub namespace: Option<String>,
47 #[arg(long, hide = true, help = "No-op; JSON is always emitted on stdout")]
48 pub json: bool,
49}
50
51#[derive(Serialize)]
52struct InitResponse {
53 db_path: String,
54 schema_version: u32,
57 model: String,
58 dim: usize,
59 namespace: String,
61 status: String,
62 elapsed_ms: u64,
64}
65
66pub fn run(args: InitArgs) -> Result<(), AppError> {
67 let start = std::time::Instant::now();
68 let paths = AppPaths::resolve(args.db.as_deref())?;
69 paths.ensure_dirs()?;
70
71 let namespace = crate::namespace::resolve_namespace(args.namespace.as_deref())?;
72
73 let mut conn = open_rw(&paths.db)?;
74
75 apply_init_pragmas(&conn)?;
76
77 crate::migrations::runner()
78 .run(&mut conn)
79 .map_err(|e| AppError::Internal(anyhow::anyhow!("migration failed: {e}")))?;
80
81 conn.execute_batch(&format!(
82 "PRAGMA user_version = {};",
83 crate::constants::SCHEMA_USER_VERSION
84 ))?;
85
86 ensure_wal_mode(&conn)?;
88
89 let schema_version = latest_schema_version(&conn)?;
90
91 conn.execute(
92 "INSERT OR REPLACE INTO schema_meta (key, value) VALUES ('schema_version', ?1)",
93 rusqlite::params![schema_version],
94 )?;
95 conn.execute(
96 "INSERT OR REPLACE INTO schema_meta (key, value) VALUES ('model', 'multilingual-e5-small')",
97 [],
98 )?;
99 conn.execute(
100 "INSERT OR REPLACE INTO schema_meta (key, value) VALUES ('dim', '384')",
101 [],
102 )?;
103 conn.execute(
104 "INSERT OR REPLACE INTO schema_meta (key, value) VALUES ('created_at', CAST(unixepoch() AS TEXT))",
105 [],
106 )?;
107 conn.execute(
108 "INSERT OR REPLACE INTO schema_meta (key, value) VALUES ('sqlite-graphrag_version', ?1)",
109 rusqlite::params![crate::constants::SQLITE_GRAPHRAG_VERSION],
110 )?;
111 conn.execute(
113 "INSERT OR REPLACE INTO schema_meta (key, value) VALUES ('namespace_initial', ?1)",
114 rusqlite::params![namespace],
115 )?;
116
117 output::emit_progress_i18n(
118 "Initializing embedding model (may download on first run)...",
119 crate::i18n::validation::runtime_pt::initializing_embedding_model(),
120 );
121
122 let test_emb = crate::daemon::embed_passage_or_local(&paths.models, "smoke test")?;
123
124 output::emit_json(&InitResponse {
125 db_path: paths.db.display().to_string(),
126 schema_version,
127 model: "multilingual-e5-small".to_string(),
128 dim: test_emb.len(),
129 namespace,
130 status: "ok".to_string(),
131 elapsed_ms: start.elapsed().as_millis() as u64,
132 })?;
133
134 Ok(())
135}
136
137fn latest_schema_version(conn: &rusqlite::Connection) -> Result<u32, AppError> {
138 match conn.query_row(
139 "SELECT version FROM refinery_schema_history ORDER BY version DESC LIMIT 1",
140 [],
141 |row| row.get::<_, i64>(0),
142 ) {
143 Ok(version) => Ok(version.max(0) as u32),
144 Err(rusqlite::Error::QueryReturnedNoRows) => Ok(0),
145 Err(err) => Err(AppError::Database(err)),
146 }
147}
148
149#[cfg(test)]
150mod tests {
151 use super::*;
152
153 #[test]
154 fn init_response_serializes_all_fields() {
155 let resp = InitResponse {
156 db_path: "/tmp/test.sqlite".to_string(),
157 schema_version: 6,
158 model: "multilingual-e5-small".to_string(),
159 dim: 384,
160 namespace: "global".to_string(),
161 status: "ok".to_string(),
162 elapsed_ms: 100,
163 };
164 let json = serde_json::to_value(&resp).expect("serialization failed");
165 assert_eq!(json["db_path"], "/tmp/test.sqlite");
166 assert_eq!(json["schema_version"], 6);
167 assert_eq!(json["model"], "multilingual-e5-small");
168 assert_eq!(json["dim"], 384usize);
169 assert_eq!(json["namespace"], "global");
170 assert_eq!(json["status"], "ok");
171 assert!(json["elapsed_ms"].is_number());
172 }
173
174 #[test]
175 fn latest_schema_version_returns_zero_for_empty_db() {
176 let conn = rusqlite::Connection::open_in_memory().expect("failed to open in-memory db");
177 conn.execute_batch("CREATE TABLE refinery_schema_history (version INTEGER NOT NULL);")
178 .expect("failed to create table");
179
180 let version = latest_schema_version(&conn).expect("latest_schema_version failed");
181 assert_eq!(version, 0u32, "empty db must return schema_version 0");
182 }
183
184 #[test]
185 fn latest_schema_version_returns_max_version() {
186 let conn = rusqlite::Connection::open_in_memory().expect("failed to open in-memory db");
187 conn.execute_batch(
188 "CREATE TABLE refinery_schema_history (version INTEGER NOT NULL);
189 INSERT INTO refinery_schema_history VALUES (1);
190 INSERT INTO refinery_schema_history VALUES (3);
191 INSERT INTO refinery_schema_history VALUES (2);",
192 )
193 .expect("failed to populate table");
194
195 let version = latest_schema_version(&conn).expect("latest_schema_version failed");
196 assert_eq!(version, 3u32, "must return the highest version present");
197 }
198
199 #[test]
200 fn init_response_dim_aligned_with_constant() {
201 assert_eq!(
202 crate::constants::EMBEDDING_DIM,
203 384,
204 "dim must be aligned with EMBEDDING_DIM=384"
205 );
206 }
207
208 #[test]
209 fn init_response_namespace_aligned_with_schema() {
210 let resp = InitResponse {
212 db_path: "/tmp/x.sqlite".to_string(),
213 schema_version: 6,
214 model: "multilingual-e5-small".to_string(),
215 dim: 384,
216 namespace: "my-project".to_string(),
217 status: "ok".to_string(),
218 elapsed_ms: 0,
219 };
220 let json = serde_json::to_value(&resp).expect("serialization failed");
221 assert_eq!(json["namespace"], "my-project");
222 }
223}