1use crate::db_lock::DatabaseFileLock;
2use crate::tokenizer::ensure_jieba_tokenizer_registered;
3use rusqlite::{Connection, OpenFlags};
4use std::collections::HashMap;
5use std::error::Error;
6use std::io;
7use std::path::{Path, PathBuf};
8use std::sync::{Arc, Mutex};
9
10pub type BoxError = Box<dyn Error + Send + Sync + 'static>;
13
14#[derive(Debug, Clone)]
17pub struct SqlitePragmaOptions {
18 pub journal_mode: String,
21 pub synchronous: String,
24 pub foreign_keys: bool,
27 pub temp_store: String,
30 pub wal_autocheckpoint_pages: u32,
33 pub cache_size_kib: i64,
36 pub mmap_size_bytes: u64,
39}
40
41#[derive(Debug, Clone)]
44pub struct SqliteHardeningOptions {
45 pub enforce_db_file_lock: bool,
48 pub read_only: bool,
51 pub allow_uri_filenames: bool,
54 pub trusted_schema: bool,
57 pub defensive: bool,
60}
61
62#[derive(Debug, Clone)]
65pub struct SqliteOpenOptions {
66 pub connection_pool_size: usize,
69 pub busy_timeout_ms: u64,
72 pub pragmas: SqlitePragmaOptions,
75 pub hardening: SqliteHardeningOptions,
78}
79
80impl Default for SqlitePragmaOptions {
81 fn default() -> Self {
82 Self {
83 journal_mode: "WAL".to_string(),
84 synchronous: "NORMAL".to_string(),
85 foreign_keys: true,
86 temp_store: "MEMORY".to_string(),
87 wal_autocheckpoint_pages: 1_000,
88 cache_size_kib: 65_536,
89 mmap_size_bytes: 268_435_456,
90 }
91 }
92}
93
94impl Default for SqliteHardeningOptions {
95 fn default() -> Self {
96 Self {
97 enforce_db_file_lock: true,
98 read_only: false,
99 allow_uri_filenames: false,
100 trusted_schema: false,
101 defensive: true,
102 }
103 }
104}
105
106impl Default for SqliteOpenOptions {
107 fn default() -> Self {
108 Self {
109 connection_pool_size: 8,
110 busy_timeout_ms: 5_000,
111 pragmas: SqlitePragmaOptions::default(),
112 hardening: SqliteHardeningOptions::default(),
113 }
114 }
115}
116
117#[derive(Debug)]
120pub struct SqliteRuntime {
121 default_options: SqliteOpenOptions,
122 databases: Mutex<HashMap<String, Arc<SqliteDatabaseHandle>>>,
123}
124
125#[derive(Debug)]
128pub struct SqliteDatabaseHandle {
129 db_path: String,
130 options: SqliteOpenOptions,
131 file_lock: Option<DatabaseFileLock>,
132}
133
134impl Default for SqliteRuntime {
135 fn default() -> Self {
136 Self::new()
137 }
138}
139
140impl SqliteRuntime {
141 pub fn new() -> Self {
144 Self::with_default_options(SqliteOpenOptions::default())
145 }
146
147 pub fn with_default_options(default_options: SqliteOpenOptions) -> Self {
150 Self {
151 default_options,
152 databases: Mutex::new(HashMap::new()),
153 }
154 }
155
156 pub fn default_options(&self) -> &SqliteOpenOptions {
159 &self.default_options
160 }
161
162 pub fn open_database(&self, db_path: impl AsRef<str>) -> Result<Arc<SqliteDatabaseHandle>, BoxError> {
165 self.open_database_with_options(db_path, self.default_options.clone())
166 }
167
168 pub fn open_database_with_options(
171 &self,
172 db_path: impl AsRef<str>,
173 options: SqliteOpenOptions,
174 ) -> Result<Arc<SqliteDatabaseHandle>, BoxError> {
175 let normalized = normalize_db_path(
176 db_path.as_ref(),
177 options.hardening.allow_uri_filenames,
178 )?;
179
180 let mut guard = self
181 .databases
182 .lock()
183 .unwrap_or_else(|poisoned| poisoned.into_inner());
184
185 if let Some(existing) = guard.get(&normalized) {
186 return Ok(Arc::clone(existing));
187 }
188
189 let handle = Arc::new(SqliteDatabaseHandle::new(normalized.clone(), options)?);
190 guard.insert(normalized, Arc::clone(&handle));
191 Ok(handle)
192 }
193
194 pub fn get_database(&self, db_path: impl AsRef<str>) -> Option<Arc<SqliteDatabaseHandle>> {
197 let normalized = normalize_db_path(
198 db_path.as_ref(),
199 self.default_options.hardening.allow_uri_filenames,
200 )
201 .ok()?;
202 self.databases
203 .lock()
204 .ok()
205 .and_then(|guard| guard.get(&normalized).cloned())
206 }
207
208 pub fn close_database(&self, db_path: impl AsRef<str>) -> bool {
211 let normalized = match normalize_db_path(
212 db_path.as_ref(),
213 self.default_options.hardening.allow_uri_filenames,
214 ) {
215 Ok(path) => path,
216 Err(_) => return false,
217 };
218
219 self.databases
220 .lock()
221 .map(|mut guard| guard.remove(&normalized).is_some())
222 .unwrap_or(false)
223 }
224
225 pub fn list_databases(&self) -> Vec<String> {
228 self.databases
229 .lock()
230 .map(|guard| guard.keys().cloned().collect())
231 .unwrap_or_default()
232 }
233
234 pub fn database_count(&self) -> usize {
237 self.databases
238 .lock()
239 .map(|guard| guard.len())
240 .unwrap_or_default()
241 }
242}
243
244impl SqliteDatabaseHandle {
245 pub fn new(db_path: String, options: SqliteOpenOptions) -> Result<Self, BoxError> {
248 if !is_special_db_path(&db_path) && !looks_like_sqlite_uri(&db_path) {
249 if let Some(parent) = Path::new(&db_path).parent() {
250 if !parent.as_os_str().is_empty() {
251 std::fs::create_dir_all(parent)?;
252 }
253 }
254 }
255
256 let file_lock = if options.hardening.enforce_db_file_lock && !is_special_db_path(&db_path) {
257 Some(DatabaseFileLock::acquire(Path::new(&db_path))?)
258 } else {
259 None
260 };
261
262 Ok(Self {
263 db_path,
264 options,
265 file_lock,
266 })
267 }
268
269 pub fn db_path(&self) -> &str {
272 &self.db_path
273 }
274
275 pub fn options(&self) -> &SqliteOpenOptions {
278 &self.options
279 }
280
281 pub fn lock_path(&self) -> Option<&Path> {
284 self.file_lock.as_ref().map(DatabaseFileLock::path)
285 }
286
287 pub fn open_connection(&self) -> Result<Connection, BoxError> {
290 open_sqlite_connection(self.db_path.as_str(), &self.options)
291 }
292}
293
294pub fn is_special_db_path(value: &str) -> bool {
297 let trimmed = value.trim();
298 trimmed == ":memory:" || trimmed.starts_with("file:") && trimmed.contains("mode=memory")
299}
300
301pub fn open_sqlite_connection(
304 db_path: &str,
305 options: &SqliteOpenOptions,
306) -> Result<Connection, BoxError> {
307 let flags = build_sqlite_open_flags(options);
308 let conn = if db_path == ":memory:" {
309 Connection::open_in_memory_with_flags(flags)?
310 } else {
311 Connection::open_with_flags(db_path, flags)?
312 };
313
314 apply_sqlite_connection_pragmas(&conn, db_path, options)?;
315 Ok(conn)
316}
317
318pub fn apply_sqlite_connection_pragmas(
321 conn: &Connection,
322 db_path: &str,
323 options: &SqliteOpenOptions,
324) -> Result<(), BoxError> {
325 let mut effective_journal_mode = None;
326 if !options.hardening.read_only {
327 effective_journal_mode = Some(conn.pragma_update_and_check(
328 None,
329 "journal_mode",
330 options.pragmas.journal_mode.as_str(),
331 |row| row.get::<_, String>(0),
332 )?);
333 conn.pragma_update(None, "synchronous", options.pragmas.synchronous.as_str())?;
334 conn.pragma_update(
335 None,
336 "wal_autocheckpoint",
337 options.pragmas.wal_autocheckpoint_pages,
338 )?;
339 }
340
341 conn.pragma_update(
342 None,
343 "busy_timeout",
344 i64::try_from(options.busy_timeout_ms).unwrap_or(i64::MAX),
345 )?;
346 conn.pragma_update(None, "foreign_keys", options.pragmas.foreign_keys)?;
347 conn.pragma_update(None, "temp_store", options.pragmas.temp_store.as_str())?;
348 conn.pragma_update(None, "trusted_schema", options.hardening.trusted_schema)?;
349 conn.pragma_update(None, "defensive", options.hardening.defensive)?;
350 conn.pragma_update(None, "cache_size", -options.pragmas.cache_size_kib)?;
351 conn.pragma_update(
352 None,
353 "mmap_size",
354 i64::try_from(options.pragmas.mmap_size_bytes).unwrap_or(i64::MAX),
355 )?;
356
357 if options.hardening.read_only {
358 conn.pragma_update(None, "query_only", true)?;
359 }
360
361 ensure_requested_wal_mode(conn, db_path, options, effective_journal_mode.as_deref())?;
362 ensure_jieba_tokenizer_registered(conn)?;
363
364 Ok(())
365}
366
367fn ensure_requested_wal_mode(
368 conn: &Connection,
369 db_path: &str,
370 options: &SqliteOpenOptions,
371 effective_journal_mode: Option<&str>,
372) -> Result<(), BoxError> {
373 if !options.pragmas.journal_mode.eq_ignore_ascii_case("WAL") || is_special_db_path(db_path) {
374 return Ok(());
375 }
376
377 let effective_mode = match effective_journal_mode {
378 Some(mode) => mode.trim().to_ascii_uppercase(),
379 None => conn
380 .query_row("PRAGMA journal_mode", [], |row| row.get::<_, String>(0))?
381 .trim()
382 .to_ascii_uppercase(),
383 };
384
385 if effective_mode == "WAL" {
386 return Ok(());
387 }
388
389 Err(Box::new(io::Error::new(
390 io::ErrorKind::InvalidInput,
391 format!(
392 "SQLite journal_mode=WAL was requested for file database {db_path}, but SQLite reported journal_mode={effective_mode}"
393 ),
394 )))
395}
396
397pub fn build_sqlite_open_flags(options: &SqliteOpenOptions) -> OpenFlags {
400 let mut flags = OpenFlags::SQLITE_OPEN_NO_MUTEX;
401 if options.hardening.read_only {
402 flags |= OpenFlags::SQLITE_OPEN_READ_ONLY;
403 } else {
404 flags |= OpenFlags::SQLITE_OPEN_READ_WRITE | OpenFlags::SQLITE_OPEN_CREATE;
405 }
406 if options.hardening.allow_uri_filenames {
407 flags |= OpenFlags::SQLITE_OPEN_URI;
408 }
409 flags
410}
411
412fn normalize_db_path(raw: &str, allow_uri_filenames: bool) -> Result<String, BoxError> {
413 let trimmed = raw.trim();
414 if trimmed.is_empty() {
415 return Err(Box::new(io::Error::new(
416 io::ErrorKind::InvalidInput,
417 "db_path must not be empty",
418 )));
419 }
420
421 if trimmed == ":memory:" {
422 return Ok(trimmed.to_string());
423 }
424
425 if looks_like_sqlite_uri(trimmed) {
426 if !allow_uri_filenames {
427 return Err(Box::new(io::Error::new(
428 io::ErrorKind::InvalidInput,
429 "SQLite URI filenames are disabled for the current library runtime options",
430 )));
431 }
432 return Ok(trimmed.to_string());
433 }
434
435 let path = Path::new(trimmed);
436 let absolute = if path.is_absolute() {
437 path.to_path_buf()
438 } else {
439 std::env::current_dir()?.join(path)
440 };
441
442 Ok(normalize_path_like_string(&absolute))
443}
444
445fn normalize_path_like_string(path: &Path) -> String {
446 let mut normalized = PathBuf::new();
447 for component in path.components() {
448 match component {
449 std::path::Component::CurDir => {}
450 std::path::Component::ParentDir => {
451 normalized.pop();
452 }
453 other => normalized.push(other.as_os_str()),
454 }
455 }
456 normalized.to_string_lossy().to_string()
457}
458
459fn looks_like_sqlite_uri(value: &str) -> bool {
460 value.starts_with("file:")
461}
462
463#[cfg(test)]
464mod tests {
465 use super::{BoxError, SqliteRuntime, is_special_db_path};
466 use crate::tokenizer::{list_custom_words, upsert_custom_word};
467 use rusqlite::Connection;
468 use std::path::PathBuf;
469 use std::time::{SystemTime, UNIX_EPOCH};
470
471 fn unique_test_db_path(prefix: &str) -> PathBuf {
472 let millis = SystemTime::now()
473 .duration_since(UNIX_EPOCH)
474 .expect("clock should be after unix epoch")
475 .as_millis();
476 std::env::temp_dir().join(format!("vldb-sqlite-runtime-{prefix}-{millis}.db"))
477 }
478
479 #[test]
480 fn special_db_path_detection_matches_memory_variants() {
481 assert!(is_special_db_path(":memory:"));
482 assert!(is_special_db_path("file:demo.db?mode=memory&cache=shared"));
483 assert!(!is_special_db_path("./data/demo.db"));
484 }
485
486 #[test]
487 fn runtime_can_manage_multiple_databases_without_config_file() -> Result<(), BoxError> {
488 let runtime = SqliteRuntime::new();
489 let db_a_path = unique_test_db_path("a");
490 let db_b_path = unique_test_db_path("b");
491 let _cleanup_a = std::fs::remove_file(&db_a_path);
492 let _cleanup_b = std::fs::remove_file(&db_b_path);
493
494 let db_a = runtime.open_database(db_a_path.to_string_lossy())?;
495 let db_b = runtime.open_database(db_b_path.to_string_lossy())?;
496
497 {
498 let conn_a = db_a.open_connection()?;
499 upsert_custom_word(&conn_a, "田-女士", 42)?;
500 let listed = list_custom_words(&conn_a)?;
501 assert_eq!(listed.words.len(), 1);
502 }
503
504 {
505 let conn_b = db_b.open_connection()?;
506 let listed = list_custom_words(&conn_b)?;
507 assert!(listed.words.is_empty());
508 Connection::execute_batch(&conn_b, "CREATE TABLE IF NOT EXISTS marker(id INTEGER);")?;
509 }
510
511 assert_eq!(runtime.database_count(), 2);
512 assert!(runtime.get_database(db_a_path.to_string_lossy()).is_some());
513 assert!(runtime.get_database(db_b_path.to_string_lossy()).is_some());
514 assert!(runtime.close_database(db_a_path.to_string_lossy()));
515 assert_eq!(runtime.database_count(), 1);
516
517 let _ = std::fs::remove_file(&db_a_path);
518 let _ = std::fs::remove_file(&db_b_path);
519 let _ = std::fs::remove_file(format!("{}-wal", db_a_path.to_string_lossy()));
520 let _ = std::fs::remove_file(format!("{}-shm", db_a_path.to_string_lossy()));
521 let _ = std::fs::remove_file(format!("{}-wal", db_b_path.to_string_lossy()));
522 let _ = std::fs::remove_file(format!("{}-shm", db_b_path.to_string_lossy()));
523 Ok(())
524 }
525}