Skip to main content

sqlite_graphrag/storage/
connection.rs

1//! SQLite connection setup with PRAGMAs and 0600 permissions.
2//!
3//! Opens (or creates) the database file, loads the `sqlite-vec` extension,
4//! applies WAL/journal PRAGMAs, and enforces 0600 file permissions on Unix.
5
6use crate::errors::AppError;
7use crate::paths::AppPaths;
8use crate::pragmas::{apply_connection_pragmas, apply_init_pragmas, ensure_wal_mode};
9use rusqlite::Connection;
10use sqlite_vec::sqlite3_vec_init;
11use std::path::Path;
12use std::sync::OnceLock;
13
14static VEC_EXTENSION_REGISTERED: OnceLock<()> = OnceLock::new();
15
16/// Register sqlite-vec GLOBALLY before any connection is opened.
17///
18/// Idempotent: subsequent calls are no-ops thanks to `OnceLock`. Safe to invoke from
19/// both the binary entry point (`main.rs`) and library helpers like `ensure_db_ready`
20/// so unit tests that exercise CRUD handlers do not need to pre-register the extension.
21pub fn register_vec_extension() {
22    VEC_EXTENSION_REGISTERED.get_or_init(|| {
23        // SAFETY: sqlite3_auto_extension is a C FFI function that registers a callback
24        // invoked when SQLite opens any new connection. Soundness assumptions:
25        // 1. `sqlite3_vec_init` has the exact ABI signature `extern "C" fn(...) -> i32`
26        //    expected by SQLite's auto-extension API (verified by sqlite-vec crate).
27        // 2. The transmute from `*const ()` to the expected fn pointer is valid because
28        //    both have identical layout on supported platforms (Linux, macOS, Windows).
29        // 3. `OnceLock::get_or_init` guarantees this closure runs at most once across
30        //    all threads; the auto-extension list is mutated exactly one time.
31        #[allow(clippy::missing_transmute_annotations)]
32        unsafe {
33            rusqlite::ffi::sqlite3_auto_extension(Some(std::mem::transmute(
34                sqlite3_vec_init as *const (),
35            )));
36        }
37    });
38}
39
40pub fn open_rw(path: &Path) -> Result<Connection, AppError> {
41    let conn = Connection::open(path)?;
42    apply_connection_pragmas(&conn)?;
43    apply_secure_permissions(path);
44    Ok(conn)
45}
46
47pub fn ensure_schema(conn: &mut Connection) -> Result<(), AppError> {
48    crate::migrations::runner()
49        .run(conn)
50        .map_err(|e| AppError::Internal(anyhow::anyhow!("migration failed: {e}")))?;
51    conn.execute_batch(&format!(
52        "PRAGMA user_version = {};",
53        crate::constants::SCHEMA_USER_VERSION
54    ))?;
55    Ok(())
56}
57
58/// Ensures the database file exists and the schema is at the current version.
59///
60/// Behavior:
61/// - DB does not exist: creates the file, applies init PRAGMAs, runs all migrations,
62///   sets `PRAGMA user_version`, and populates `schema_meta` with default values.
63///   Emits `tracing::info!` on creation.
64/// - DB exists with `user_version` below `SCHEMA_USER_VERSION`: runs the remaining
65///   migrations and updates `user_version`. Emits `tracing::warn!` on auto-migration.
66/// - DB exists with `user_version` equal to `SCHEMA_USER_VERSION`: no-op.
67///
68/// This helper unifies the auto-init contract across CRUD handlers so users can run
69/// any subcommand on a fresh directory without invoking `init` first. Idempotent
70/// and safe to call before every handler that needs a ready database.
71pub fn ensure_db_ready(paths: &AppPaths) -> Result<(), AppError> {
72    register_vec_extension();
73    paths.ensure_dirs()?;
74
75    let db_existed = paths.db.exists();
76
77    if !db_existed {
78        tracing::info!(
79            path = %paths.db.display(),
80            schema_version = crate::constants::CURRENT_SCHEMA_VERSION,
81            "creating database (auto-init)"
82        );
83    }
84
85    let mut conn = open_rw(&paths.db)?;
86
87    if !db_existed {
88        apply_init_pragmas(&conn)?;
89    }
90
91    let current_user_version: i64 = conn
92        .query_row("PRAGMA user_version", [], |row| row.get(0))
93        .unwrap_or(0);
94    let target_user_version = crate::constants::SCHEMA_USER_VERSION;
95
96    if current_user_version < target_user_version {
97        if db_existed {
98            tracing::warn!(
99                from = current_user_version,
100                to = target_user_version,
101                path = %paths.db.display(),
102                "auto-migrating database schema"
103            );
104        }
105        crate::migrations::runner()
106            .run(&mut conn)
107            .map_err(|e| AppError::Internal(anyhow::anyhow!("auto-migration failed: {e}")))?;
108        conn.execute_batch(&format!("PRAGMA user_version = {target_user_version};"))?;
109
110        if !db_existed {
111            insert_default_schema_meta(&conn)?;
112        }
113
114        // Defensive re-assertion: refinery's migration runner may open internal
115        // handles that revert journal_mode to delete on some platforms. Re-apply
116        // WAL after migrations to guarantee the documented contract holds for
117        // every command that goes through the auto-init path.
118        ensure_wal_mode(&conn)?;
119    }
120
121    Ok(())
122}
123
124fn insert_default_schema_meta(conn: &Connection) -> Result<(), AppError> {
125    conn.execute(
126        "INSERT OR REPLACE INTO schema_meta (key, value) VALUES ('schema_version', ?1)",
127        rusqlite::params![crate::constants::CURRENT_SCHEMA_VERSION.to_string()],
128    )?;
129    conn.execute(
130        "INSERT OR REPLACE INTO schema_meta (key, value) VALUES ('model', 'multilingual-e5-small')",
131        [],
132    )?;
133    conn.execute(
134        "INSERT OR REPLACE INTO schema_meta (key, value) VALUES ('dim', '384')",
135        [],
136    )?;
137    conn.execute(
138        "INSERT OR REPLACE INTO schema_meta (key, value) VALUES ('created_at', CAST(unixepoch() AS TEXT))",
139        [],
140    )?;
141    conn.execute(
142        "INSERT OR REPLACE INTO schema_meta (key, value) VALUES ('sqlite-graphrag_version', ?1)",
143        rusqlite::params![crate::constants::SQLITE_GRAPHRAG_VERSION],
144    )?;
145    Ok(())
146}
147
148/// Applies 600 permissions (owner read/write only) to the SQLite file and its WAL/SHM
149/// companion files on Unix to prevent leaking private memories in shared directories
150/// (e.g. multi-user /tmp, Dropbox, NFS). No-op on Windows. Failures are silent to avoid
151/// blocking the operation when the process does not own the file (e.g. read-only mount).
152#[allow(unused_variables)]
153fn apply_secure_permissions(path: &Path) {
154    #[cfg(unix)]
155    {
156        use std::os::unix::fs::PermissionsExt;
157        let candidates = [
158            path.to_path_buf(),
159            path.with_extension(format!(
160                "{}-wal",
161                path.extension()
162                    .and_then(|e| e.to_str())
163                    .unwrap_or("sqlite")
164            )),
165            path.with_extension(format!(
166                "{}-shm",
167                path.extension()
168                    .and_then(|e| e.to_str())
169                    .unwrap_or("sqlite")
170            )),
171        ];
172        for file in candidates.iter() {
173            if file.exists() {
174                if let Ok(meta) = std::fs::metadata(file) {
175                    let mut perms = meta.permissions();
176                    perms.set_mode(0o600);
177                    let _ = std::fs::set_permissions(file, perms);
178                }
179            }
180        }
181    }
182}
183
184pub fn open_ro(path: &Path) -> Result<Connection, AppError> {
185    let conn = Connection::open_with_flags(
186        path,
187        rusqlite::OpenFlags::SQLITE_OPEN_READ_ONLY | rusqlite::OpenFlags::SQLITE_OPEN_URI,
188    )?;
189    conn.execute_batch("PRAGMA foreign_keys = ON;")?;
190    Ok(conn)
191}