Skip to main content

victauri_plugin/
database.rs

1#[cfg(feature = "sqlite")]
2use std::path::{Path, PathBuf};
3#[cfg(feature = "sqlite")]
4use std::sync::Arc;
5#[cfg(feature = "sqlite")]
6use std::sync::atomic::{AtomicBool, Ordering};
7#[cfg(feature = "sqlite")]
8use std::time::{Duration, Instant};
9
10/// Arms a wall-clock watchdog that calls `Connection::interrupt()` at the deadline, then stops
11/// and joins it on drop. The `progress_handler` only fires every N VDBE opcodes, so a single
12/// long-running op (a big sort/scan, `quick_check` on a huge DB) can run well past the deadline
13/// before the handler is consulted; the interrupt handle makes the CPU deadline a hard one.
14#[cfg(feature = "sqlite")]
15pub(crate) struct InterruptGuard {
16    done: Arc<AtomicBool>,
17    handle: Option<std::thread::JoinHandle<()>>,
18}
19
20#[cfg(feature = "sqlite")]
21impl InterruptGuard {
22    pub(crate) fn arm(conn: &rusqlite::Connection, deadline: Duration) -> Self {
23        let done = Arc::new(AtomicBool::new(false));
24        let interrupt = conn.get_interrupt_handle();
25        let done_for_thread = done.clone();
26        let poll = Duration::from_millis(25).min(deadline.max(Duration::from_millis(1)));
27        let handle = std::thread::spawn(move || {
28            let start = Instant::now();
29            while start.elapsed() < deadline {
30                if done_for_thread.load(Ordering::Acquire) {
31                    return;
32                }
33                std::thread::sleep(poll);
34            }
35            if !done_for_thread.load(Ordering::Acquire) {
36                interrupt.interrupt();
37            }
38        });
39        Self {
40            done,
41            handle: Some(handle),
42        }
43    }
44}
45
46#[cfg(feature = "sqlite")]
47impl Drop for InterruptGuard {
48    fn drop(&mut self) {
49        self.done.store(true, Ordering::Release);
50        if let Some(h) = self.handle.take() {
51            let _ = h.join();
52        }
53    }
54}
55
56#[cfg(feature = "sqlite")]
57const MAX_ROWS_DEFAULT: usize = 100;
58#[cfg(feature = "sqlite")]
59const MAX_ROWS_LIMIT: usize = 10_000;
60#[cfg(feature = "sqlite")]
61const MAX_QUERY_CELL_BYTES: i32 = 1_048_576;
62#[cfg(feature = "sqlite")]
63const MAX_QUERY_RESULT_BYTES: usize = 5_000_000;
64#[cfg(feature = "sqlite")]
65const MAX_QUERY_SQL_BYTES: usize = 1_000_000;
66#[cfg(feature = "sqlite")]
67const QUERY_TIMEOUT: Duration = Duration::from_secs(5);
68#[cfg(feature = "sqlite")]
69const QUERY_PROGRESS_OPS: i32 = 10_000;
70
71#[cfg(feature = "sqlite")]
72static READ_ONLY_PREFIXES: &[&str] = &["select", "pragma", "explain", "with"];
73
74#[cfg(feature = "sqlite")]
75fn strip_sql_comments(sql: &str) -> String {
76    let mut result = String::with_capacity(sql.len());
77    let bytes = sql.as_bytes();
78    let len = bytes.len();
79    let mut i = 0;
80    while i < len {
81        if i + 1 < len && bytes[i] == b'-' && bytes[i + 1] == b'-' {
82            while i < len && bytes[i] != b'\n' {
83                i += 1;
84            }
85        } else if i + 1 < len && bytes[i] == b'/' && bytes[i + 1] == b'*' {
86            i += 2;
87            while i + 1 < len && !(bytes[i] == b'*' && bytes[i + 1] == b'/') {
88                i += 1;
89            }
90            if i + 1 < len {
91                i += 2;
92            }
93            result.push(' ');
94        } else {
95            result.push(bytes[i] as char);
96            i += 1;
97        }
98    }
99    result
100}
101
102#[cfg(feature = "sqlite")]
103fn is_read_only(sql: &str) -> bool {
104    let cleaned = strip_sql_comments(sql);
105    let trimmed = cleaned.trim_start().to_lowercase();
106    if trimmed.is_empty() {
107        return false;
108    }
109    READ_ONLY_PREFIXES
110        .iter()
111        .any(|prefix| trimmed.starts_with(prefix))
112}
113
114/// Returns true if `sql` is the write form of a PRAGMA (`PRAGMA name = value`).
115///
116/// The read forms (`PRAGMA name`, `PRAGMA name(arg)`) are not flagged. An `=`
117/// is only significant when it appears outside of any quoted string.
118#[cfg(feature = "sqlite")]
119fn is_pragma_write(sql: &str) -> bool {
120    let cleaned = strip_sql_comments(sql);
121    let trimmed = cleaned.trim_start();
122    if !trimmed.to_lowercase().starts_with("pragma") {
123        return false;
124    }
125    let bytes = trimmed.as_bytes();
126    let mut in_single = false;
127    let mut in_double = false;
128    for &b in bytes {
129        match b {
130            b'\'' if !in_double => in_single = !in_single,
131            b'"' if !in_single => in_double = !in_double,
132            b'=' if !in_single && !in_double => return true,
133            _ => {}
134        }
135    }
136    false
137}
138
139/// Read-only / introspection PRAGMAs permitted on the user-facing `query` path.
140///
141/// A positive allowlist (audit C10): even without an `=`, some PRAGMAs have side
142/// effects (`wal_checkpoint`, `optimize`, `incremental_vacuum`, `shrink_memory`,
143/// `wal_checkpoint(TRUNCATE)`). The `READ_ONLY` open flag already blocks real
144/// writes, but allowlisting the PRAGMA name makes the read-only contract explicit
145/// and refuses side-effecting introspection outright.
146#[cfg(feature = "sqlite")]
147static SAFE_PRAGMAS: &[&str] = &[
148    "table_info",
149    "table_xinfo",
150    "table_list",
151    "index_list",
152    "index_info",
153    "index_xinfo",
154    "foreign_key_list",
155    "foreign_key_check",
156    "collation_list",
157    "database_list",
158    "compile_options",
159    "function_list",
160    "module_list",
161    "pragma_list",
162    "journal_mode",
163    "journal_size_limit",
164    "page_count",
165    "page_size",
166    "max_page_count",
167    "schema_version",
168    "user_version",
169    "application_id",
170    "data_version",
171    "freelist_count",
172    "cache_size",
173    "encoding",
174    "auto_vacuum",
175    "busy_timeout",
176    "wal_autocheckpoint",
177    "legacy_file_format",
178    "locking_mode",
179    "secure_delete",
180    "synchronous",
181    "temp_store",
182    "mmap_size",
183    "cache_spill",
184    "cell_size_check",
185    "integrity_check",
186    "quick_check",
187    "stats",
188];
189
190/// Extract the lowercased PRAGMA name from a `PRAGMA [schema.]name ...` statement,
191/// tolerating an optional `schema.` qualifier. Returns `None` for a non-PRAGMA or
192/// a malformed one.
193#[cfg(feature = "sqlite")]
194fn pragma_name(sql: &str) -> Option<String> {
195    let cleaned = strip_sql_comments(sql);
196    let lower = cleaned.trim_start().to_lowercase();
197    let stripped = lower.strip_prefix("pragma")?.trim_start();
198    // Normalize away SQL identifier-quoting so quoted forms like `PRAGMA "main".table_info`
199    // or `PRAGMA [main].table_info` parse to the same name as the bare form (avoids a
200    // false-positive block of a legitimate quoted read PRAGMA).
201    let normalized: String = stripped
202        .chars()
203        .filter(|c| !matches!(c, '"' | '`' | '[' | ']'))
204        .collect();
205    let rest = normalized.trim_start();
206    // Optional `schema.` qualifier: only treat the part before the first '.' as a
207    // schema when it's a bare identifier (no '(', '=', whitespace) — otherwise the
208    // '.' belongs to a quoted arg and `rest` already starts with the name.
209    let after_schema = match rest.split_once('.') {
210        Some((maybe_schema, tail))
211            if !maybe_schema.is_empty()
212                && maybe_schema
213                    .chars()
214                    .all(|c| c.is_alphanumeric() || c == '_') =>
215        {
216            tail
217        }
218        _ => rest,
219    };
220    let name: String = after_schema
221        .trim_start()
222        .chars()
223        .take_while(|c| c.is_alphanumeric() || *c == '_')
224        .collect();
225    if name.is_empty() { None } else { Some(name) }
226}
227
228/// True if `sql` is a PRAGMA whose name is NOT on [`SAFE_PRAGMAS`] (a malformed
229/// PRAGMA is also rejected). Non-PRAGMA statements are not flagged here.
230#[cfg(feature = "sqlite")]
231fn is_disallowed_pragma(sql: &str) -> bool {
232    let cleaned = strip_sql_comments(sql);
233    if !cleaned.trim_start().to_lowercase().starts_with("pragma") {
234        return false;
235    }
236    match pragma_name(sql) {
237        Some(name) => !SAFE_PRAGMAS.contains(&name.as_str()),
238        None => true,
239    }
240}
241
242/// Discover `SQLite` database files in a directory (non-recursive, max depth 2).
243#[cfg(feature = "sqlite")]
244#[must_use]
245pub fn discover_databases(dir: &Path) -> Vec<PathBuf> {
246    let mut results = Vec::new();
247    let Ok(base) = std::fs::canonicalize(dir) else {
248        return results;
249    };
250    discover_recursive(dir, &base, 0, 2, &mut results);
251    results
252}
253
254#[cfg(feature = "sqlite")]
255fn discover_recursive(
256    dir: &Path,
257    base: &Path,
258    depth: u32,
259    max_depth: u32,
260    results: &mut Vec<PathBuf>,
261) {
262    let Ok(entries) = std::fs::read_dir(dir) else {
263        return;
264    };
265    for entry in entries.flatten() {
266        let path = entry.path();
267        if path.is_symlink() {
268            continue;
269        }
270        // Windows junctions/reparse points are not always reported by
271        // `Path::is_symlink`; canonical containment is the real boundary.
272        let Ok(canonical) = std::fs::canonicalize(&path) else {
273            continue;
274        };
275        if !canonical.starts_with(base) {
276            continue;
277        }
278        if path.is_file() {
279            if let Some(ext) = path.extension().and_then(|e| e.to_str())
280                && matches!(ext, "sqlite" | "sqlite3" | "db" | "sdb")
281            {
282                results.push(path);
283            }
284        } else if path.is_dir() && depth < max_depth {
285            discover_recursive(&path, base, depth + 1, max_depth, results);
286        }
287    }
288}
289
290/// File basenames (matched case-insensitively, with or without a `SQLite` extension)
291/// that are browser-engine internal databases, never the application's own DB
292/// (Chromium/WebKit profile stores). Selecting one of these is the audit/red-team
293/// "wrong database" bug: an agent would confidently inspect `WebView` state instead of
294/// the app's data.
295#[cfg(feature = "sqlite")]
296const WEBVIEW_DB_BASENAMES: &[&str] = &[
297    "cookies",
298    "quotamanager",
299    "web data",
300    "history",
301    "favicons",
302    "top sites",
303    "login data",
304    "network action predictor",
305    "transportsecurity",
306    "trust tokens",
307    "sharedstorage",
308    "reporting and ntp",
309    "media history",
310    "affiliation database",
311    "site characteristics database",
312    "webdata",
313];
314
315/// Directory names (matched case-insensitively, anywhere in the path) that belong to a
316/// `WebView`/browser engine's private storage area. Any `.db`/`.sqlite` under one of these
317/// is an engine internal, not the app DB.
318#[cfg(feature = "sqlite")]
319const WEBVIEW_DIR_NAMES: &[&str] = &[
320    "ebwebview",
321    "wkwebview",
322    "webkit",
323    "local storage",
324    "indexeddb",
325    "session storage",
326    "service worker",
327    "gpucache",
328    "code cache",
329    "blob_storage",
330    "shared proto db",
331    "websql",
332];
333
334/// Whether a discovered database path is a `WebView`/browser-engine internal store rather
335/// than the application's own database (audit / red-team "wrong DB" finding).
336#[cfg(feature = "sqlite")]
337#[must_use]
338pub fn is_webview_internal(path: &Path) -> bool {
339    if let Some(name) = path.file_stem().and_then(|n| n.to_str()) {
340        let name = name.to_ascii_lowercase();
341        if WEBVIEW_DB_BASENAMES.iter().any(|n| name == *n) {
342            return true;
343        }
344    }
345    path.components().any(|c| {
346        let seg = c.as_os_str().to_string_lossy().to_ascii_lowercase();
347        WEBVIEW_DIR_NAMES.iter().any(|d| seg == *d)
348    })
349}
350
351/// A discovered database candidate with the metadata needed to disambiguate which DB the
352/// application actually uses.
353#[cfg(feature = "sqlite")]
354#[derive(Debug, Clone)]
355pub struct DbCandidate {
356    /// Absolute path to the discovered database file.
357    pub path: PathBuf,
358    /// File size in bytes (0 if it could not be stat'd).
359    pub size_bytes: u64,
360    /// Whether this is a `WebView`/browser-engine internal store rather than an app DB.
361    pub webview_internal: bool,
362}
363
364/// Classify every database discovered under `dirs`, returning application candidates first
365/// (non-`WebView`, largest by size — the substantial app DB outranks incidental ones) and
366/// `WebView` internals last. De-duplicates paths discovered via overlapping roots.
367#[cfg(feature = "sqlite")]
368#[must_use]
369pub fn classify_databases(dirs: &[PathBuf]) -> Vec<DbCandidate> {
370    let mut seen = std::collections::HashSet::new();
371    let mut candidates: Vec<DbCandidate> = Vec::new();
372    for dir in dirs {
373        for path in discover_databases(dir) {
374            let key = std::fs::canonicalize(&path).unwrap_or_else(|_| path.clone());
375            if !seen.insert(key) {
376                continue;
377            }
378            let size_bytes = std::fs::metadata(&path).map_or(0, |m| m.len());
379            let webview_internal = is_webview_internal(&path);
380            candidates.push(DbCandidate {
381                path,
382                size_bytes,
383                webview_internal,
384            });
385        }
386    }
387    // Application DBs first, then by size descending (larger ⇒ more likely the real DB).
388    candidates.sort_by(|a, b| {
389        a.webview_internal
390            .cmp(&b.webview_internal)
391            .then(b.size_bytes.cmp(&a.size_bytes))
392    });
393    candidates
394}
395
396/// Select the single most likely application database from `dirs`, excluding `WebView`
397/// internals.
398///
399/// # Errors
400/// Returns `Err` with a diagnostic when no application database is found — either no
401/// databases at all, or only `WebView`/browser-engine internal stores (the error lists
402/// the skipped internals so the caller can tell an agent to register the real DB
403/// directory via `db_search_paths` or pass an explicit `path`).
404#[cfg(feature = "sqlite")]
405pub fn select_app_database(dirs: &[PathBuf]) -> Result<PathBuf, String> {
406    let candidates = classify_databases(dirs);
407    if let Some(app) = candidates.iter().find(|c| !c.webview_internal) {
408        return Ok(app.path.clone());
409    }
410    if candidates.is_empty() {
411        let dirs_str = dirs
412            .iter()
413            .map(|d| d.display().to_string())
414            .collect::<Vec<_>>()
415            .join(", ");
416        return Err(format!("no SQLite databases found in: {dirs_str}"));
417    }
418    let internals = candidates
419        .iter()
420        .map(|c| c.path.display().to_string())
421        .collect::<Vec<_>>()
422        .join(", ");
423    Err(format!(
424        "only WebView/browser-engine internal databases were found ({internals}); none looks \
425         like an application database. Register the app's DB directory via \
426         VictauriBuilder::db_search_paths, or pass an explicit `path`."
427    ))
428}
429
430/// Execute a read-only SQL query against a `SQLite` database.
431///
432/// # Errors
433///
434/// Returns an error if the query is not read-only, the database cannot be opened,
435/// or the query fails.
436#[cfg(feature = "sqlite")]
437pub fn query(
438    db_path: &Path,
439    sql: &str,
440    params: &[serde_json::Value],
441    max_rows: Option<usize>,
442) -> Result<serde_json::Value, String> {
443    query_with_limits(
444        db_path,
445        sql,
446        params,
447        max_rows,
448        QUERY_TIMEOUT,
449        MAX_QUERY_RESULT_BYTES,
450    )
451}
452
453#[cfg(feature = "sqlite")]
454fn query_with_limits(
455    db_path: &Path,
456    sql: &str,
457    params: &[serde_json::Value],
458    max_rows: Option<usize>,
459    query_timeout: Duration,
460    max_result_bytes: usize,
461) -> Result<serde_json::Value, String> {
462    if sql.len() > MAX_QUERY_SQL_BYTES {
463        return Err(format!(
464            "query exceeds maximum length ({MAX_QUERY_SQL_BYTES} bytes)"
465        ));
466    }
467    if !is_read_only(sql) {
468        return Err(
469            "only SELECT, PRAGMA, EXPLAIN, and WITH queries are allowed (read-only access)"
470                .to_string(),
471        );
472    }
473
474    // Defence in depth: the connection is opened READ_ONLY (SQLite rejects
475    // actual writes), but explicitly reject the write form of PRAGMA
476    // (`PRAGMA name = value`) so the read-only contract is self-evident and
477    // not solely reliant on the open flags. The read forms `PRAGMA name` and
478    // `PRAGMA name(arg)` remain allowed.
479    if is_pragma_write(sql) {
480        return Err(
481            "PRAGMA writes (PRAGMA name = value) are not allowed (read-only access)".to_string(),
482        );
483    }
484
485    // Positive PRAGMA allowlist (audit C10): reject side-effecting PRAGMAs
486    // (wal_checkpoint, optimize, incremental_vacuum, …) even without an `=`.
487    if is_disallowed_pragma(sql) {
488        return Err(
489            "only read-only introspection PRAGMAs are allowed (e.g. table_info, \
490             integrity_check, page_count); side-effecting PRAGMAs such as \
491             wal_checkpoint/optimize/incremental_vacuum are blocked"
492                .to_string(),
493        );
494    }
495
496    let cleaned = strip_sql_comments(sql);
497    if cleaned.contains(';') {
498        let parts: Vec<&str> = cleaned
499            .split(';')
500            .filter(|s| !s.trim().is_empty())
501            .collect();
502        if parts.len() > 1 {
503            return Err(
504                "stacked queries (multiple statements separated by ;) are not allowed".to_string(),
505            );
506        }
507    }
508
509    let max_rows = max_rows.unwrap_or(MAX_ROWS_DEFAULT).min(MAX_ROWS_LIMIT);
510
511    let conn = rusqlite::Connection::open_with_flags(
512        db_path,
513        rusqlite::OpenFlags::SQLITE_OPEN_READ_ONLY | rusqlite::OpenFlags::SQLITE_OPEN_NO_MUTEX,
514    )
515    .map_err(|e| format!("failed to open database: {e}"))?;
516
517    // Limit lock waits separately from the CPU deadline enforced below.
518    conn.busy_timeout(std::time::Duration::from_secs(5))
519        .map_err(|e| format!("failed to set timeout: {e}"))?;
520
521    // Bound both SQLite's per-value/row allocation and CPU time. `busy_timeout`
522    // only limits lock waits; it does not stop a CPU-heavy recursive query.
523    conn.set_limit(
524        rusqlite::limits::Limit::SQLITE_LIMIT_LENGTH,
525        MAX_QUERY_CELL_BYTES,
526    );
527    conn.set_limit(
528        rusqlite::limits::Limit::SQLITE_LIMIT_SQL_LENGTH,
529        MAX_QUERY_SQL_BYTES as i32,
530    );
531    let started = Instant::now();
532    conn.progress_handler(
533        QUERY_PROGRESS_OPS,
534        Some(move || started.elapsed() >= query_timeout),
535    );
536    // Hard wall-clock backstop: the progress handler under-samples a single long op, so also
537    // arm an interrupt-handle watchdog. Lives until the query completes (drops/joins on every
538    // return path, including `?` errors below).
539    let _interrupt = InterruptGuard::arm(&conn, query_timeout);
540
541    let mut stmt = conn
542        .prepare(sql)
543        .map_err(|e| sqlite_query_error("failed to prepare query", e, query_timeout))?;
544
545    let column_names: Vec<String> = stmt
546        .column_names()
547        .iter()
548        .map(|s| (*s).to_string())
549        .collect();
550    let column_count = column_names.len();
551
552    let sqlite_params: Vec<Box<dyn rusqlite::types::ToSql>> =
553        params.iter().map(json_to_sql).collect();
554    let param_refs: Vec<&dyn rusqlite::types::ToSql> = sqlite_params.iter().map(|b| &**b).collect();
555
556    let mut rows_out: Vec<serde_json::Value> = Vec::new();
557    let mut rows = stmt
558        .query(param_refs.as_slice())
559        .map_err(|e| sqlite_query_error("query execution failed", e, query_timeout))?;
560    let mut result_bytes = serde_json::to_vec(&column_names)
561        .map_err(|e| format!("failed to size query result columns: {e}"))?
562        .len();
563    let mut truncated = false;
564
565    while let Some(row) = rows
566        .next()
567        .map_err(|e| sqlite_query_error("row read failed", e, query_timeout))?
568    {
569        if rows_out.len() >= max_rows {
570            truncated = true;
571            break;
572        }
573        let mut obj = serde_json::Map::new();
574        for (i, col_name) in column_names.iter().enumerate().take(column_count) {
575            let value = row_value_to_json(row, i);
576            obj.insert(col_name.clone(), value);
577        }
578        let row_value = serde_json::Value::Object(obj);
579        let row_bytes = serde_json::to_vec(&row_value)
580            .map_err(|e| format!("failed to size query result row: {e}"))?
581            .len();
582        if result_bytes.saturating_add(row_bytes) > max_result_bytes {
583            truncated = true;
584            break;
585        }
586        result_bytes = result_bytes.saturating_add(row_bytes);
587        rows_out.push(row_value);
588    }
589
590    Ok(serde_json::json!({
591        "columns": column_names,
592        "rows": rows_out,
593        "row_count": rows_out.len(),
594        "truncated": truncated,
595        "max_rows": max_rows,
596        "result_bytes": result_bytes,
597        "max_result_bytes": max_result_bytes,
598    }))
599}
600
601#[cfg(feature = "sqlite")]
602fn sqlite_query_error(context: &str, error: rusqlite::Error, timeout: Duration) -> String {
603    if error.sqlite_error_code() == Some(rusqlite::ffi::ErrorCode::OperationInterrupted) {
604        format!(
605            "{context}: query timed out after {} ms",
606            timeout.as_millis()
607        )
608    } else {
609        format!("{context}: {error}")
610    }
611}
612
613#[cfg(feature = "sqlite")]
614fn json_to_sql(val: &serde_json::Value) -> Box<dyn rusqlite::types::ToSql> {
615    match val {
616        serde_json::Value::Null => Box::new(rusqlite::types::Null),
617        serde_json::Value::Bool(b) => Box::new(*b),
618        serde_json::Value::Number(n) => {
619            if let Some(i) = n.as_i64() {
620                Box::new(i)
621            } else if let Some(f) = n.as_f64() {
622                Box::new(f)
623            } else {
624                Box::new(n.to_string())
625            }
626        }
627        serde_json::Value::String(s) => Box::new(s.clone()),
628        other => Box::new(other.to_string()),
629    }
630}
631
632#[cfg(feature = "sqlite")]
633fn row_value_to_json(row: &rusqlite::Row, idx: usize) -> serde_json::Value {
634    use rusqlite::types::ValueRef;
635    match row.get_ref(idx) {
636        Ok(ValueRef::Null) => serde_json::Value::Null,
637        Ok(ValueRef::Integer(i)) => serde_json::json!(i),
638        Ok(ValueRef::Real(f)) => serde_json::json!(f),
639        Ok(ValueRef::Text(t)) => {
640            let s = String::from_utf8_lossy(t);
641            if let Ok(parsed) = serde_json::from_str::<serde_json::Value>(&s)
642                && (parsed.is_object() || parsed.is_array())
643            {
644                return parsed;
645            }
646            serde_json::Value::String(s.into_owned())
647        }
648        Ok(ValueRef::Blob(b)) => {
649            use base64::Engine;
650            serde_json::json!({
651                "__blob": true,
652                "size": b.len(),
653                "base64": base64::engine::general_purpose::STANDARD.encode(b),
654            })
655        }
656        Err(_) => serde_json::Value::Null,
657    }
658}
659
660#[cfg(all(test, feature = "sqlite"))]
661mod tests {
662    use super::*;
663
664    fn create_test_db() -> (tempfile::NamedTempFile, PathBuf) {
665        let file = tempfile::NamedTempFile::with_suffix(".sqlite").unwrap();
666        let path = file.path().to_path_buf();
667        let conn = rusqlite::Connection::open(&path).unwrap();
668        conn.execute_batch(
669            "CREATE TABLE users (id INTEGER PRIMARY KEY, name TEXT, score REAL);
670             INSERT INTO users VALUES (1, 'Alice', 95.5);
671             INSERT INTO users VALUES (2, 'Bob', 87.0);
672             INSERT INTO users VALUES (3, 'Charlie', 92.3);",
673        )
674        .unwrap();
675        (file, path)
676    }
677
678    #[test]
679    fn select_all_rows() {
680        let (_f, path) = create_test_db();
681        let result = query(&path, "SELECT * FROM users", &[], None).unwrap();
682        assert_eq!(result["row_count"], 3);
683        assert_eq!(
684            result["columns"],
685            serde_json::json!(["id", "name", "score"])
686        );
687        assert_eq!(result["rows"][0]["name"], "Alice");
688        assert_eq!(result["rows"][1]["name"], "Bob");
689    }
690
691    #[test]
692    fn select_with_params() {
693        let (_f, path) = create_test_db();
694        let result = query(
695            &path,
696            "SELECT name FROM users WHERE score > ?",
697            &[serde_json::json!(90.0)],
698            None,
699        )
700        .unwrap();
701        assert_eq!(result["row_count"], 2);
702    }
703
704    #[test]
705    fn max_rows_truncation() {
706        let (_f, path) = create_test_db();
707        let result = query(&path, "SELECT * FROM users", &[], Some(2)).unwrap();
708        assert_eq!(result["row_count"], 2);
709        assert_eq!(result["truncated"], true);
710    }
711
712    #[test]
713    fn exact_max_rows_is_not_truncated() {
714        let (_f, path) = create_test_db();
715        let result = query(&path, "SELECT * FROM users", &[], Some(3)).unwrap();
716        assert_eq!(result["row_count"], 3);
717        assert_eq!(result["truncated"], false);
718    }
719
720    #[test]
721    fn result_byte_limit_truncates_before_unbounded_allocation() {
722        let file = tempfile::NamedTempFile::with_suffix(".sqlite").unwrap();
723        let path = file.path().to_path_buf();
724        let conn = rusqlite::Connection::open(&path).unwrap();
725        conn.execute_batch("CREATE TABLE payloads (data TEXT)")
726            .unwrap();
727        let payload = "x".repeat(120_000);
728        for _ in 0..5 {
729            conn.execute("INSERT INTO payloads VALUES (?)", [&payload])
730                .unwrap();
731        }
732
733        let result = query_with_limits(
734            &path,
735            "SELECT data FROM payloads",
736            &[],
737            None,
738            QUERY_TIMEOUT,
739            250_000,
740        )
741        .unwrap();
742        assert_eq!(result["row_count"], 2);
743        assert_eq!(result["truncated"], true);
744        assert!(result["result_bytes"].as_u64().unwrap() <= 250_000);
745    }
746
747    #[test]
748    fn cpu_heavy_query_times_out() {
749        let (_f, path) = create_test_db();
750        let err = query_with_limits(
751            &path,
752            "WITH RECURSIVE cnt(x) AS (VALUES(1) UNION ALL SELECT x + 1 FROM cnt) \
753             SELECT sum(x) FROM cnt",
754            &[],
755            None,
756            Duration::from_millis(1),
757            MAX_QUERY_RESULT_BYTES,
758        )
759        .unwrap_err();
760        assert!(err.contains("timed out"), "unexpected timeout error: {err}");
761    }
762
763    #[test]
764    fn oversized_sql_is_rejected_before_prepare() {
765        let (_f, path) = create_test_db();
766        let sql = format!("SELECT 1 /*{}*/", "x".repeat(MAX_QUERY_SQL_BYTES));
767        let err = query(&path, &sql, &[], None).unwrap_err();
768        assert!(err.contains("maximum length"), "unexpected error: {err}");
769    }
770
771    #[test]
772    fn rejects_insert() {
773        let (_f, path) = create_test_db();
774        let err = query(
775            &path,
776            "INSERT INTO users VALUES (4, 'Eve', 99.0)",
777            &[],
778            None,
779        )
780        .unwrap_err();
781        assert!(err.contains("read-only"));
782    }
783
784    #[test]
785    fn rejects_delete() {
786        let (_f, path) = create_test_db();
787        let err = query(&path, "DELETE FROM users", &[], None).unwrap_err();
788        assert!(err.contains("read-only"));
789    }
790
791    #[test]
792    fn rejects_drop() {
793        let (_f, path) = create_test_db();
794        let err = query(&path, "DROP TABLE users", &[], None).unwrap_err();
795        assert!(err.contains("read-only"));
796    }
797
798    #[test]
799    fn rejects_update() {
800        let (_f, path) = create_test_db();
801        let err = query(&path, "UPDATE users SET name = 'X'", &[], None).unwrap_err();
802        assert!(err.contains("read-only"));
803    }
804
805    #[test]
806    fn pragma_works() {
807        let (_f, path) = create_test_db();
808        let result = query(&path, "PRAGMA table_info(users)", &[], None).unwrap();
809        assert!(result["row_count"].as_u64().unwrap() >= 3);
810    }
811
812    #[test]
813    fn pragma_read_allowed() {
814        let (_f, path) = create_test_db();
815        assert!(query(&path, "PRAGMA journal_mode", &[], None).is_ok());
816        assert!(query(&path, "PRAGMA user_version", &[], None).is_ok());
817    }
818
819    #[test]
820    fn rejects_side_effecting_pragmas() {
821        // Audit C10: side-effecting PRAGMAs without `=` are blocked by the allowlist.
822        let (_f, path) = create_test_db();
823        for sql in [
824            "PRAGMA wal_checkpoint",
825            "PRAGMA wal_checkpoint(TRUNCATE)",
826            "PRAGMA optimize",
827            "PRAGMA incremental_vacuum",
828            "PRAGMA shrink_memory",
829            "PRAGMA main.wal_checkpoint",
830            "pragma  optimize ",
831        ] {
832            let err = query(&path, sql, &[], None).unwrap_err();
833            assert!(
834                err.contains("read-only introspection PRAGMAs"),
835                "expected allowlist block for: {sql} (got: {err})"
836            );
837        }
838    }
839
840    #[test]
841    fn allows_safe_introspection_pragmas() {
842        let (_f, path) = create_test_db();
843        for sql in [
844            "PRAGMA table_info(users)",
845            "PRAGMA integrity_check",
846            "PRAGMA page_count",
847            "PRAGMA foreign_key_list(users)",
848            "PRAGMA main.table_info(users)",
849        ] {
850            assert!(
851                query(&path, sql, &[], None).is_ok(),
852                "expected ok for: {sql}"
853            );
854        }
855    }
856
857    #[test]
858    fn pragma_name_handles_schema_qualifier_and_args() {
859        assert_eq!(
860            pragma_name("PRAGMA wal_checkpoint").as_deref(),
861            Some("wal_checkpoint")
862        );
863        assert_eq!(
864            pragma_name("PRAGMA main.table_info(users)").as_deref(),
865            Some("table_info")
866        );
867        assert_eq!(
868            pragma_name("PRAGMA table_info(users)").as_deref(),
869            Some("table_info")
870        );
871        // Quoted/bracketed schema qualifiers normalize to the same name (no false block).
872        assert_eq!(
873            pragma_name(r#"PRAGMA "main".table_info(users)"#).as_deref(),
874            Some("table_info")
875        );
876        assert_eq!(
877            pragma_name("PRAGMA [main].wal_checkpoint").as_deref(),
878            Some("wal_checkpoint")
879        );
880        assert_eq!(pragma_name("SELECT 1"), None);
881    }
882
883    #[test]
884    fn quoted_schema_read_pragma_is_allowed_but_quoted_side_effect_blocked() {
885        let (_f, path) = create_test_db();
886        // A legitimate quoted-schema read PRAGMA must not be falsely blocked.
887        assert!(query(&path, r#"PRAGMA "main".table_info(users)"#, &[], None).is_ok());
888        // …but a side-effecting one stays blocked even when quoted.
889        let err = query(&path, "PRAGMA [main].wal_checkpoint", &[], None).unwrap_err();
890        assert!(err.contains("read-only introspection PRAGMAs"));
891    }
892
893    #[test]
894    fn rejects_pragma_write_form() {
895        let (_f, path) = create_test_db();
896        for sql in [
897            "PRAGMA journal_mode=DELETE",
898            "PRAGMA journal_mode = WAL",
899            "PRAGMA user_version=12345",
900            "  pragma  synchronous = 0 ",
901        ] {
902            let err = query(&path, sql, &[], None).unwrap_err();
903            assert!(err.contains("PRAGMA writes"), "expected block for: {sql}");
904        }
905    }
906
907    #[test]
908    fn is_pragma_write_ignores_equals_in_strings() {
909        // A read-form PRAGMA whose argument contains '=' inside quotes is not a write.
910        assert!(!is_pragma_write("PRAGMA table_info('a=b')"));
911        assert!(is_pragma_write("PRAGMA foo = 'a=b'"));
912        assert!(!is_pragma_write("SELECT 1 = 1"));
913    }
914
915    #[test]
916    fn with_cte_works() {
917        let (_f, path) = create_test_db();
918        let result = query(
919            &path,
920            "WITH top AS (SELECT * FROM users WHERE score > 90) SELECT name FROM top",
921            &[],
922            None,
923        )
924        .unwrap();
925        assert_eq!(result["row_count"], 2);
926    }
927
928    #[test]
929    fn nonexistent_db_fails() {
930        let err = query(Path::new("/nonexistent/db.sqlite"), "SELECT 1", &[], None).unwrap_err();
931        assert!(err.contains("failed to open"));
932    }
933
934    #[test]
935    fn json_column_parsed() {
936        let file = tempfile::NamedTempFile::with_suffix(".sqlite").unwrap();
937        let path = file.path().to_path_buf();
938        let conn = rusqlite::Connection::open(&path).unwrap();
939        conn.execute_batch(
940            r#"CREATE TABLE config (key TEXT, value TEXT);
941               INSERT INTO config VALUES ('settings', '{"theme":"dark","lang":"en"}');"#,
942        )
943        .unwrap();
944        let result = query(&path, "SELECT * FROM config", &[], None).unwrap();
945        assert!(result["rows"][0]["value"].is_object());
946        assert_eq!(result["rows"][0]["value"]["theme"], "dark");
947    }
948
949    #[test]
950    fn discover_finds_sqlite_files() {
951        let dir = tempfile::tempdir().unwrap();
952        std::fs::File::create(dir.path().join("app.sqlite")).unwrap();
953        std::fs::File::create(dir.path().join("cache.db")).unwrap();
954        std::fs::File::create(dir.path().join("readme.txt")).unwrap();
955        let sub = dir.path().join("subdir");
956        std::fs::create_dir(&sub).unwrap();
957        std::fs::File::create(sub.join("deep.sqlite3")).unwrap();
958
959        let dbs = discover_databases(dir.path());
960        assert_eq!(dbs.len(), 3);
961    }
962
963    #[cfg(unix)]
964    #[test]
965    fn discover_does_not_follow_directory_symlink_outside_root() {
966        use std::os::unix::fs::symlink;
967
968        let dir = tempfile::tempdir().unwrap();
969        let outside = tempfile::tempdir().unwrap();
970        std::fs::File::create(outside.path().join("outside.db")).unwrap();
971        symlink(outside.path(), dir.path().join("escape")).unwrap();
972
973        assert!(discover_databases(dir.path()).is_empty());
974    }
975
976    #[test]
977    fn rejects_comment_bypass_block() {
978        let (_f, path) = create_test_db();
979        let err = query(&path, "/* sneaky */DELETE FROM users", &[], None).unwrap_err();
980        assert!(err.contains("read-only"));
981    }
982
983    #[test]
984    fn rejects_line_comment_bypass() {
985        let (_f, path) = create_test_db();
986        let err = query(&path, "-- comment\nDELETE FROM users", &[], None).unwrap_err();
987        assert!(err.contains("read-only"));
988    }
989
990    #[test]
991    fn rejects_stacked_queries() {
992        let (_f, path) = create_test_db();
993        let err = query(&path, "SELECT 1; DROP TABLE users", &[], None).unwrap_err();
994        assert!(err.contains("stacked queries"));
995    }
996
997    #[test]
998    fn allows_trailing_semicolon() {
999        let (_f, path) = create_test_db();
1000        let result = query(&path, "SELECT * FROM users;", &[], None).unwrap();
1001        assert_eq!(result["row_count"], 3);
1002    }
1003
1004    #[test]
1005    fn allows_select_with_block_comment() {
1006        let (_f, path) = create_test_db();
1007        let result = query(
1008            &path,
1009            "/* filter */ SELECT name FROM users WHERE id = 1",
1010            &[],
1011            None,
1012        )
1013        .unwrap();
1014        assert_eq!(result["row_count"], 1);
1015        assert_eq!(result["rows"][0]["name"], "Alice");
1016    }
1017
1018    #[test]
1019    fn rejects_empty_query() {
1020        let (_f, path) = create_test_db();
1021        let err = query(&path, "", &[], None).unwrap_err();
1022        assert!(err.contains("read-only"));
1023    }
1024
1025    #[test]
1026    fn rejects_comment_only_query() {
1027        let (_f, path) = create_test_db();
1028        let err = query(&path, "/* just a comment */", &[], None).unwrap_err();
1029        assert!(err.contains("read-only"));
1030    }
1031
1032    #[test]
1033    fn rejects_nested_comment_bypass() {
1034        let (_f, path) = create_test_db();
1035        let err = query(
1036            &path,
1037            "/* outer /* inner */ still comment */ DROP TABLE users",
1038            &[],
1039            None,
1040        )
1041        .unwrap_err();
1042        assert!(err.contains("read-only"));
1043    }
1044
1045    #[test]
1046    fn blob_column_base64() {
1047        let file = tempfile::NamedTempFile::with_suffix(".sqlite").unwrap();
1048        let path = file.path().to_path_buf();
1049        let conn = rusqlite::Connection::open(&path).unwrap();
1050        conn.execute_batch("CREATE TABLE blobs (id INTEGER, data BLOB)")
1051            .unwrap();
1052        conn.execute("INSERT INTO blobs VALUES (1, X'DEADBEEF')", [])
1053            .unwrap();
1054        let result = query(&path, "SELECT * FROM blobs", &[], None).unwrap();
1055        assert!(result["rows"][0]["data"]["__blob"].as_bool().unwrap());
1056        assert_eq!(result["rows"][0]["data"]["size"], 4);
1057    }
1058
1059    // ── WebView-internal exclusion + app-DB selection (audit / red-team "wrong DB") ──
1060
1061    fn write_sqlite(path: &Path, rows: usize) {
1062        if let Some(parent) = path.parent() {
1063            std::fs::create_dir_all(parent).unwrap();
1064        }
1065        let conn = rusqlite::Connection::open(path).unwrap();
1066        conn.execute_batch("CREATE TABLE t (id INTEGER PRIMARY KEY, blob TEXT)")
1067            .unwrap();
1068        for i in 0..rows {
1069            conn.execute("INSERT INTO t (blob) VALUES (?)", [format!("row-{i}")])
1070                .unwrap();
1071        }
1072    }
1073
1074    #[test]
1075    fn flags_webview_internal_stores() {
1076        assert!(is_webview_internal(Path::new(
1077            "/app/EBWebView/Default/Cookies"
1078        )));
1079        assert!(is_webview_internal(Path::new(
1080            "/app/EBWebView/Default/QuotaManager"
1081        )));
1082        assert!(is_webview_internal(Path::new(
1083            "/Users/x/Library/WebKit/IndexedDB/file__0.indexeddb.sqlite3"
1084        )));
1085        assert!(is_webview_internal(Path::new(
1086            "/app/Local Storage/leveldb.db"
1087        )));
1088        assert!(is_webview_internal(Path::new("/app/data/web data")));
1089        // Real application DBs are NOT flagged.
1090        assert!(!is_webview_internal(Path::new("/app/data/4da.db")));
1091        assert!(!is_webview_internal(Path::new("/app/data/app.sqlite")));
1092        assert!(!is_webview_internal(Path::new("/app/notes.db")));
1093    }
1094
1095    #[test]
1096    fn selects_app_db_over_webview_internals() {
1097        // Reproduces the red-team layout: a WebView profile dir full of engine SQLite
1098        // files sitting next to the real (larger) application DB. The selector must pick
1099        // the app DB, never Cookies/QuotaManager.
1100        let dir = tempfile::tempdir().unwrap();
1101        let root = dir.path();
1102        // Engine internals: flagged either by basename (Cookies/QuotaManager) or by living
1103        // under a WebView profile dir (EBWebView). Real Chromium files are extensionless
1104        // (and thus skipped by the extension filter entirely); we give them a recognized
1105        // extension here precisely to prove the denylist also catches the extensioned forms
1106        // (e.g. WebKit `*.indexeddb.sqlite3`).
1107        write_sqlite(&root.join("EBWebView/Default/Cookies.db"), 1);
1108        write_sqlite(&root.join("EBWebView/Default/QuotaManager.sqlite"), 1);
1109        write_sqlite(&root.join("app.sqlite"), 200); // the real app DB (largest)
1110
1111        let selected = select_app_database(&[root.to_path_buf()]).unwrap();
1112        assert_eq!(selected.file_name().unwrap(), "app.sqlite");
1113
1114        let classified = classify_databases(&[root.to_path_buf()]);
1115        assert!(!classified[0].webview_internal, "app DB must rank first");
1116        assert_eq!(classified[0].path.file_name().unwrap(), "app.sqlite");
1117        assert!(
1118            classified.iter().filter(|c| c.webview_internal).count() >= 2,
1119            "Cookies + QuotaManager must be tagged as internal"
1120        );
1121    }
1122
1123    #[test]
1124    fn errors_clearly_when_only_webview_internals_present() {
1125        let dir = tempfile::tempdir().unwrap();
1126        let root = dir.path();
1127        write_sqlite(&root.join("EBWebView/Default/Cookies.db"), 1);
1128        write_sqlite(&root.join("EBWebView/Default/QuotaManager.sqlite"), 1);
1129
1130        let err = select_app_database(&[root.to_path_buf()]).unwrap_err();
1131        assert!(
1132            err.contains("WebView") && err.contains("db_search_paths"),
1133            "error should name the cause and the fix: {err}"
1134        );
1135    }
1136
1137    #[test]
1138    fn larger_app_db_outranks_smaller_one() {
1139        let dir = tempfile::tempdir().unwrap();
1140        let root = dir.path();
1141        write_sqlite(&root.join("small.db"), 1);
1142        write_sqlite(&root.join("big.db"), 500);
1143        let selected = select_app_database(&[root.to_path_buf()]).unwrap();
1144        assert_eq!(selected.file_name().unwrap(), "big.db");
1145    }
1146}