Skip to main content

vldb_sqlite/
runtime.rs

1use crate::db_lock::DatabaseFileLock;
2use crate::tokenizer::ensure_jieba_tokenizer_registered;
3use rusqlite::{Connection, OpenFlags};
4use std::collections::HashMap;
5use std::error::Error;
6use std::io;
7use std::path::{Path, PathBuf};
8use std::sync::{Arc, Mutex};
9
10/// 库模式与运行时模式通用错误类型。
11/// Shared error type for library mode and runtime mode.
12pub type BoxError = Box<dyn Error + Send + Sync + 'static>;
13
14/// SQLite 连接层 pragma 配置。
15/// SQLite pragma configuration used by the library/runtime layer.
16#[derive(Debug, Clone)]
17pub struct SqlitePragmaOptions {
18    /// 期望的 journal 模式。
19    /// Requested journal mode.
20    pub journal_mode: String,
21    /// 期望的 synchronous 模式。
22    /// Requested synchronous mode.
23    pub synchronous: String,
24    /// 是否启用外键。
25    /// Whether foreign keys are enabled.
26    pub foreign_keys: bool,
27    /// 临时存储模式。
28    /// Temporary storage mode.
29    pub temp_store: String,
30    /// WAL 自动 checkpoint 页数。
31    /// WAL auto-checkpoint page count.
32    pub wal_autocheckpoint_pages: u32,
33    /// SQLite cache 大小(KiB)。
34    /// SQLite cache size in KiB.
35    pub cache_size_kib: i64,
36    /// mmap 大小(字节)。
37    /// mmap size in bytes.
38    pub mmap_size_bytes: u64,
39}
40
41/// SQLite 连接层硬化配置。
42/// SQLite hardening options used by the library/runtime layer.
43#[derive(Debug, Clone)]
44pub struct SqliteHardeningOptions {
45    /// 是否启用数据库文件锁。
46    /// Whether a database file lock should be enforced.
47    pub enforce_db_file_lock: bool,
48    /// 是否以只读模式打开数据库。
49    /// Whether the database should be opened in read-only mode.
50    pub read_only: bool,
51    /// 是否允许 SQLite URI 文件名。
52    /// Whether SQLite URI filenames are allowed.
53    pub allow_uri_filenames: bool,
54    /// 是否启用 trusted_schema。
55    /// Whether trusted_schema is enabled.
56    pub trusted_schema: bool,
57    /// 是否启用 defensive 模式。
58    /// Whether SQLite defensive mode is enabled.
59    pub defensive: bool,
60}
61
62/// 单个 SQLite 库的程序化打开选项。
63/// Programmatic open options for a single SQLite database.
64#[derive(Debug, Clone)]
65pub struct SqliteOpenOptions {
66    /// 建议连接池大小;库 runtime 当前主要将其作为元信息保留。
67    /// Suggested connection-pool size; currently retained as metadata by the library runtime.
68    pub connection_pool_size: usize,
69    /// busy_timeout,单位毫秒。
70    /// busy_timeout in milliseconds.
71    pub busy_timeout_ms: u64,
72    /// SQLite pragma 选项。
73    /// SQLite pragma options.
74    pub pragmas: SqlitePragmaOptions,
75    /// SQLite 安全硬化选项。
76    /// SQLite hardening options.
77    pub hardening: SqliteHardeningOptions,
78}
79
80impl Default for SqlitePragmaOptions {
81    fn default() -> Self {
82        Self {
83            journal_mode: "WAL".to_string(),
84            synchronous: "NORMAL".to_string(),
85            foreign_keys: true,
86            temp_store: "MEMORY".to_string(),
87            wal_autocheckpoint_pages: 1_000,
88            cache_size_kib: 65_536,
89            mmap_size_bytes: 268_435_456,
90        }
91    }
92}
93
94impl Default for SqliteHardeningOptions {
95    fn default() -> Self {
96        Self {
97            enforce_db_file_lock: true,
98            read_only: false,
99            allow_uri_filenames: false,
100            trusted_schema: false,
101            defensive: true,
102        }
103    }
104}
105
106impl Default for SqliteOpenOptions {
107    fn default() -> Self {
108        Self {
109            connection_pool_size: 8,
110            busy_timeout_ms: 5_000,
111            pragmas: SqlitePragmaOptions::default(),
112            hardening: SqliteHardeningOptions::default(),
113        }
114    }
115}
116
117/// `vldb-sqlite` 纯库多库运行时。
118/// Pure library multi-database runtime for `vldb-sqlite`.
119#[derive(Debug)]
120pub struct SqliteRuntime {
121    default_options: SqliteOpenOptions,
122    databases: Mutex<HashMap<String, Arc<SqliteDatabaseHandle>>>,
123}
124
125/// 由 runtime 管理的单库句柄。
126/// A single database handle managed by the runtime.
127#[derive(Debug)]
128pub struct SqliteDatabaseHandle {
129    db_path: String,
130    options: SqliteOpenOptions,
131    file_lock: Option<DatabaseFileLock>,
132}
133
134impl Default for SqliteRuntime {
135    fn default() -> Self {
136        Self::new()
137    }
138}
139
140impl SqliteRuntime {
141    /// 使用默认参数创建一个新的多库 runtime。
142    /// Create a new multi-database runtime with default options.
143    pub fn new() -> Self {
144        Self::with_default_options(SqliteOpenOptions::default())
145    }
146
147    /// 使用给定默认参数创建多库 runtime。
148    /// Create a multi-database runtime with caller-provided default options.
149    pub fn with_default_options(default_options: SqliteOpenOptions) -> Self {
150        Self {
151            default_options,
152            databases: Mutex::new(HashMap::new()),
153        }
154    }
155
156    /// 获取默认打开选项。
157    /// Get the default open options.
158    pub fn default_options(&self) -> &SqliteOpenOptions {
159        &self.default_options
160    }
161
162    /// 打开或复用指定路径的数据库句柄。
163    /// Open or reuse a database handle for the specified path.
164    pub fn open_database(&self, db_path: impl AsRef<str>) -> Result<Arc<SqliteDatabaseHandle>, BoxError> {
165        self.open_database_with_options(db_path, self.default_options.clone())
166    }
167
168    /// 使用显式选项打开或复用指定路径的数据库句柄。
169    /// Open or reuse a database handle for the specified path with explicit options.
170    pub fn open_database_with_options(
171        &self,
172        db_path: impl AsRef<str>,
173        options: SqliteOpenOptions,
174    ) -> Result<Arc<SqliteDatabaseHandle>, BoxError> {
175        let normalized = normalize_db_path(
176            db_path.as_ref(),
177            options.hardening.allow_uri_filenames,
178        )?;
179
180        let mut guard = self
181            .databases
182            .lock()
183            .unwrap_or_else(|poisoned| poisoned.into_inner());
184
185        if let Some(existing) = guard.get(&normalized) {
186            return Ok(Arc::clone(existing));
187        }
188
189        let handle = Arc::new(SqliteDatabaseHandle::new(normalized.clone(), options)?);
190        guard.insert(normalized, Arc::clone(&handle));
191        Ok(handle)
192    }
193
194    /// 获取已缓存的数据库句柄。
195    /// Get an already-cached database handle.
196    pub fn get_database(&self, db_path: impl AsRef<str>) -> Option<Arc<SqliteDatabaseHandle>> {
197        let normalized = normalize_db_path(
198            db_path.as_ref(),
199            self.default_options.hardening.allow_uri_filenames,
200        )
201        .ok()?;
202        self.databases
203            .lock()
204            .ok()
205            .and_then(|guard| guard.get(&normalized).cloned())
206    }
207
208    /// 关闭并移除指定数据库句柄。
209    /// Close and remove a database handle.
210    pub fn close_database(&self, db_path: impl AsRef<str>) -> bool {
211        let normalized = match normalize_db_path(
212            db_path.as_ref(),
213            self.default_options.hardening.allow_uri_filenames,
214        ) {
215            Ok(path) => path,
216            Err(_) => return false,
217        };
218
219        self.databases
220            .lock()
221            .map(|mut guard| guard.remove(&normalized).is_some())
222            .unwrap_or(false)
223    }
224
225    /// 列出当前 runtime 中已注册的数据库路径。
226    /// List database paths currently registered in the runtime.
227    pub fn list_databases(&self) -> Vec<String> {
228        self.databases
229            .lock()
230            .map(|guard| guard.keys().cloned().collect())
231            .unwrap_or_default()
232    }
233
234    /// 返回当前 runtime 中已注册的数据库数量。
235    /// Return the number of databases currently registered in the runtime.
236    pub fn database_count(&self) -> usize {
237        self.databases
238            .lock()
239            .map(|guard| guard.len())
240            .unwrap_or_default()
241    }
242}
243
244impl SqliteDatabaseHandle {
245    /// 创建单库句柄。
246    /// Create a single database handle.
247    pub fn new(db_path: String, options: SqliteOpenOptions) -> Result<Self, BoxError> {
248        if !is_special_db_path(&db_path) && !looks_like_sqlite_uri(&db_path) {
249            if let Some(parent) = Path::new(&db_path).parent() {
250                if !parent.as_os_str().is_empty() {
251                    std::fs::create_dir_all(parent)?;
252                }
253            }
254        }
255
256        let file_lock = if options.hardening.enforce_db_file_lock && !is_special_db_path(&db_path) {
257            Some(DatabaseFileLock::acquire(Path::new(&db_path))?)
258        } else {
259            None
260        };
261
262        Ok(Self {
263            db_path,
264            options,
265            file_lock,
266        })
267    }
268
269    /// 获取数据库路径。
270    /// Get the database path.
271    pub fn db_path(&self) -> &str {
272        &self.db_path
273    }
274
275    /// 获取当前句柄的打开选项。
276    /// Get the current handle's open options.
277    pub fn options(&self) -> &SqliteOpenOptions {
278        &self.options
279    }
280
281    /// 返回关联的锁文件路径(如果有)。
282    /// Return the associated lock-file path, if any.
283    pub fn lock_path(&self) -> Option<&Path> {
284        self.file_lock.as_ref().map(DatabaseFileLock::path)
285    }
286
287    /// 打开一个已按运行时规则初始化的 SQLite 连接。
288    /// Open a SQLite connection initialized with runtime rules.
289    pub fn open_connection(&self) -> Result<Connection, BoxError> {
290        open_sqlite_connection(self.db_path.as_str(), &self.options)
291    }
292}
293
294/// 判断特殊数据库路径(例如内存库或共享内存 URI)。
295/// Detect special database paths such as in-memory or shared-memory URI paths.
296pub fn is_special_db_path(value: &str) -> bool {
297    let trimmed = value.trim();
298    trimmed == ":memory:" || trimmed.starts_with("file:") && trimmed.contains("mode=memory")
299}
300
301/// 按路径与选项打开 SQLite 连接,并自动应用 pragma 与 tokenizer 初始化。
302/// Open a SQLite connection from a path and options, then apply pragmas and tokenizer setup.
303pub fn open_sqlite_connection(
304    db_path: &str,
305    options: &SqliteOpenOptions,
306) -> Result<Connection, BoxError> {
307    let flags = build_sqlite_open_flags(options);
308    let conn = if db_path == ":memory:" {
309        Connection::open_in_memory_with_flags(flags)?
310    } else {
311        Connection::open_with_flags(db_path, flags)?
312    };
313
314    apply_sqlite_connection_pragmas(&conn, db_path, options)?;
315    Ok(conn)
316}
317
318/// 对已打开连接应用运行时级别的 pragma 和 tokenizer 初始化。
319/// Apply runtime-level pragmas and tokenizer initialization to an opened connection.
320pub fn apply_sqlite_connection_pragmas(
321    conn: &Connection,
322    db_path: &str,
323    options: &SqliteOpenOptions,
324) -> Result<(), BoxError> {
325    let mut effective_journal_mode = None;
326    if !options.hardening.read_only {
327        effective_journal_mode = Some(conn.pragma_update_and_check(
328            None,
329            "journal_mode",
330            options.pragmas.journal_mode.as_str(),
331            |row| row.get::<_, String>(0),
332        )?);
333        conn.pragma_update(None, "synchronous", options.pragmas.synchronous.as_str())?;
334        conn.pragma_update(
335            None,
336            "wal_autocheckpoint",
337            options.pragmas.wal_autocheckpoint_pages,
338        )?;
339    }
340
341    conn.pragma_update(
342        None,
343        "busy_timeout",
344        i64::try_from(options.busy_timeout_ms).unwrap_or(i64::MAX),
345    )?;
346    conn.pragma_update(None, "foreign_keys", options.pragmas.foreign_keys)?;
347    conn.pragma_update(None, "temp_store", options.pragmas.temp_store.as_str())?;
348    conn.pragma_update(None, "trusted_schema", options.hardening.trusted_schema)?;
349    conn.pragma_update(None, "defensive", options.hardening.defensive)?;
350    conn.pragma_update(None, "cache_size", -options.pragmas.cache_size_kib)?;
351    conn.pragma_update(
352        None,
353        "mmap_size",
354        i64::try_from(options.pragmas.mmap_size_bytes).unwrap_or(i64::MAX),
355    )?;
356
357    if options.hardening.read_only {
358        conn.pragma_update(None, "query_only", true)?;
359    }
360
361    ensure_requested_wal_mode(conn, db_path, options, effective_journal_mode.as_deref())?;
362    ensure_jieba_tokenizer_registered(conn)?;
363
364    Ok(())
365}
366
367fn ensure_requested_wal_mode(
368    conn: &Connection,
369    db_path: &str,
370    options: &SqliteOpenOptions,
371    effective_journal_mode: Option<&str>,
372) -> Result<(), BoxError> {
373    if !options.pragmas.journal_mode.eq_ignore_ascii_case("WAL") || is_special_db_path(db_path) {
374        return Ok(());
375    }
376
377    let effective_mode = match effective_journal_mode {
378        Some(mode) => mode.trim().to_ascii_uppercase(),
379        None => conn
380            .query_row("PRAGMA journal_mode", [], |row| row.get::<_, String>(0))?
381            .trim()
382            .to_ascii_uppercase(),
383    };
384
385    if effective_mode == "WAL" {
386        return Ok(());
387    }
388
389    Err(Box::new(io::Error::new(
390        io::ErrorKind::InvalidInput,
391        format!(
392            "SQLite journal_mode=WAL was requested for file database {db_path}, but SQLite reported journal_mode={effective_mode}"
393        ),
394    )))
395}
396
397/// 基于运行时打开选项构造 SQLite open flags。
398/// Build SQLite open flags from runtime open options.
399pub fn build_sqlite_open_flags(options: &SqliteOpenOptions) -> OpenFlags {
400    let mut flags = OpenFlags::SQLITE_OPEN_NO_MUTEX;
401    if options.hardening.read_only {
402        flags |= OpenFlags::SQLITE_OPEN_READ_ONLY;
403    } else {
404        flags |= OpenFlags::SQLITE_OPEN_READ_WRITE | OpenFlags::SQLITE_OPEN_CREATE;
405    }
406    if options.hardening.allow_uri_filenames {
407        flags |= OpenFlags::SQLITE_OPEN_URI;
408    }
409    flags
410}
411
412fn normalize_db_path(raw: &str, allow_uri_filenames: bool) -> Result<String, BoxError> {
413    let trimmed = raw.trim();
414    if trimmed.is_empty() {
415        return Err(Box::new(io::Error::new(
416            io::ErrorKind::InvalidInput,
417            "db_path must not be empty",
418        )));
419    }
420
421    if trimmed == ":memory:" {
422        return Ok(trimmed.to_string());
423    }
424
425    if looks_like_sqlite_uri(trimmed) {
426        if !allow_uri_filenames {
427            return Err(Box::new(io::Error::new(
428                io::ErrorKind::InvalidInput,
429                "SQLite URI filenames are disabled for the current library runtime options",
430            )));
431        }
432        return Ok(trimmed.to_string());
433    }
434
435    let path = Path::new(trimmed);
436    let absolute = if path.is_absolute() {
437        path.to_path_buf()
438    } else {
439        std::env::current_dir()?.join(path)
440    };
441
442    Ok(normalize_path_like_string(&absolute))
443}
444
445fn normalize_path_like_string(path: &Path) -> String {
446    let mut normalized = PathBuf::new();
447    for component in path.components() {
448        match component {
449            std::path::Component::CurDir => {}
450            std::path::Component::ParentDir => {
451                normalized.pop();
452            }
453            other => normalized.push(other.as_os_str()),
454        }
455    }
456    normalized.to_string_lossy().to_string()
457}
458
459fn looks_like_sqlite_uri(value: &str) -> bool {
460    value.starts_with("file:")
461}
462
463#[cfg(test)]
464mod tests {
465    use super::{BoxError, SqliteRuntime, is_special_db_path};
466    use crate::tokenizer::{list_custom_words, upsert_custom_word};
467    use rusqlite::Connection;
468    use std::path::PathBuf;
469    use std::time::{SystemTime, UNIX_EPOCH};
470
471    fn unique_test_db_path(prefix: &str) -> PathBuf {
472        let millis = SystemTime::now()
473            .duration_since(UNIX_EPOCH)
474            .expect("clock should be after unix epoch")
475            .as_millis();
476        std::env::temp_dir().join(format!("vldb-sqlite-runtime-{prefix}-{millis}.db"))
477    }
478
479    #[test]
480    fn special_db_path_detection_matches_memory_variants() {
481        assert!(is_special_db_path(":memory:"));
482        assert!(is_special_db_path("file:demo.db?mode=memory&cache=shared"));
483        assert!(!is_special_db_path("./data/demo.db"));
484    }
485
486    #[test]
487    fn runtime_can_manage_multiple_databases_without_config_file() -> Result<(), BoxError> {
488        let runtime = SqliteRuntime::new();
489        let db_a_path = unique_test_db_path("a");
490        let db_b_path = unique_test_db_path("b");
491        let _cleanup_a = std::fs::remove_file(&db_a_path);
492        let _cleanup_b = std::fs::remove_file(&db_b_path);
493
494        let db_a = runtime.open_database(db_a_path.to_string_lossy())?;
495        let db_b = runtime.open_database(db_b_path.to_string_lossy())?;
496
497        {
498            let conn_a = db_a.open_connection()?;
499            upsert_custom_word(&conn_a, "田-女士", 42)?;
500            let listed = list_custom_words(&conn_a)?;
501            assert_eq!(listed.words.len(), 1);
502        }
503
504        {
505            let conn_b = db_b.open_connection()?;
506            let listed = list_custom_words(&conn_b)?;
507            assert!(listed.words.is_empty());
508            Connection::execute_batch(&conn_b, "CREATE TABLE IF NOT EXISTS marker(id INTEGER);")?;
509        }
510
511        assert_eq!(runtime.database_count(), 2);
512        assert!(runtime.get_database(db_a_path.to_string_lossy()).is_some());
513        assert!(runtime.get_database(db_b_path.to_string_lossy()).is_some());
514        assert!(runtime.close_database(db_a_path.to_string_lossy()));
515        assert_eq!(runtime.database_count(), 1);
516
517        let _ = std::fs::remove_file(&db_a_path);
518        let _ = std::fs::remove_file(&db_b_path);
519        let _ = std::fs::remove_file(format!("{}-wal", db_a_path.to_string_lossy()));
520        let _ = std::fs::remove_file(format!("{}-shm", db_a_path.to_string_lossy()));
521        let _ = std::fs::remove_file(format!("{}-wal", db_b_path.to_string_lossy()));
522        let _ = std::fs::remove_file(format!("{}-shm", db_b_path.to_string_lossy()));
523        Ok(())
524    }
525}