Skip to main content

scitadel_db/sqlite/
mod.rs

1mod annotations;
2mod assessments;
3mod citations;
4mod migrations;
5mod paper_aliases;
6mod paper_state;
7mod paper_tags;
8mod papers;
9mod questions;
10mod searches;
11mod shortlist;
12mod tui_state;
13
14pub use annotations::{SqliteAnnotationRepository, resolve_anchor};
15
16pub use assessments::SqliteAssessmentRepository;
17pub use citations::SqliteCitationRepository;
18pub use migrations::run_migrations;
19pub use paper_aliases::{SOURCE_BIBTEX_IMPORT, SOURCE_REKEY, SqlitePaperAliasRepository};
20pub use paper_state::{PaperState, SqlitePaperStateRepository};
21pub use paper_tags::{SqlitePaperTagRepository, TAG_SOURCE_BIBTEX_IMPORT};
22pub use papers::SqlitePaperRepository;
23pub use questions::SqliteQuestionRepository;
24/// Re-export of `rusqlite::Transaction` so downstream crates (e.g.
25/// `scitadel-mcp`'s bib-import orchestrator in #157) can drive multi-
26/// repo transactions without taking a direct rusqlite dependency.
27pub use rusqlite::Transaction as SqliteTransaction;
28pub use searches::SqliteSearchRepository;
29pub use shortlist::SqliteShortlistRepository;
30pub use tui_state::{SqliteTuiStateRepository, TuiState};
31
32use r2d2::Pool;
33use r2d2_sqlite::SqliteConnectionManager;
34use rusqlite::functions::FunctionFlags;
35use std::path::Path;
36
37use crate::error::DbError;
38
39/// Register `unicode_lower(text)` — a Unicode-aware lowercase scalar
40/// function for SQL queries. SQLite's built-in `LOWER()` is ASCII-only
41/// (`Ü` stays `Ü`), so case-insensitive title comparisons miss
42/// non-ASCII case variants. Registered on every connection at init
43/// time so any pooled handle can use it. (#159)
44fn register_unicode_lower(conn: &rusqlite::Connection) -> rusqlite::Result<()> {
45    conn.create_scalar_function(
46        "unicode_lower",
47        1,
48        FunctionFlags::SQLITE_UTF8 | FunctionFlags::SQLITE_DETERMINISTIC,
49        |ctx| {
50            let s = ctx.get::<Option<String>>(0)?;
51            Ok(s.map(|v| v.to_lowercase()))
52        },
53    )
54}
55
56/// Parse an RFC3339 timestamp string, falling back to now on parse errors.
57pub(crate) fn parse_rfc3339_or_now(s: &str) -> chrono::DateTime<chrono::Utc> {
58    chrono::DateTime::parse_from_rfc3339(s)
59        .map_or_else(|_| chrono::Utc::now(), |dt| dt.with_timezone(&chrono::Utc))
60}
61
62/// Database connection pool with migration support.
63#[derive(Clone)]
64pub struct Database {
65    pool: Pool<SqliteConnectionManager>,
66}
67
68impl Database {
69    /// Open (or create) a database at the given path with WAL mode and FK enforcement.
70    pub fn open(path: &Path) -> Result<Self, DbError> {
71        if let Some(parent) = path.parent() {
72            std::fs::create_dir_all(parent)
73                .map_err(|e| DbError::Migration(format!("failed to create db directory: {e}")))?;
74        }
75
76        let manager = SqliteConnectionManager::file(path).with_init(|conn| {
77            // synchronous=NORMAL is the recommended pairing with WAL: full
78            // sync is unnecessary because WAL replay covers durability,
79            // and NORMAL roughly halves write latency. busy_timeout lets
80            // the cross-process 2-pane workflow ride through transient
81            // writer locks instead of failing reads. (#121)
82            conn.execute_batch(
83                "PRAGMA journal_mode=WAL;
84                     PRAGMA synchronous=NORMAL;
85                     PRAGMA foreign_keys=ON;
86                     PRAGMA busy_timeout=5000;",
87            )?;
88            register_unicode_lower(conn)?;
89            Ok(())
90        });
91
92        let pool = Pool::builder().max_size(4).build(manager)?;
93
94        Ok(Self { pool })
95    }
96
97    /// Open an in-memory database (for testing).
98    pub fn open_in_memory() -> Result<Self, DbError> {
99        let manager = SqliteConnectionManager::memory().with_init(|conn| {
100            conn.execute_batch(
101                "PRAGMA journal_mode=WAL;
102                     PRAGMA foreign_keys=ON;",
103            )?;
104            register_unicode_lower(conn)?;
105            Ok(())
106        });
107
108        let pool = Pool::builder().max_size(1).build(manager)?;
109
110        Ok(Self { pool })
111    }
112
113    /// Run all pending migrations, then backfill any paper rows that
114    /// lack a stable `bibtex_key` (#132). The backfill is idempotent —
115    /// on every subsequent call it's a no-op because every paper
116    /// already has a key. Papers gain keys via `save`/`save_many`
117    /// thereafter, keeping this migrate-call the only place that
118    /// needs to know about the assignment algorithm.
119    pub fn migrate(&self) -> Result<(), DbError> {
120        let conn = self.pool.get()?;
121        run_migrations(&conn)?;
122        drop(conn);
123        self.backfill_bibtex_keys()?;
124        Ok(())
125    }
126
127    /// Walk every paper without a `bibtex_key` and assign one via the
128    /// Better-BibTeX-style algorithm in
129    /// `scitadel_core::bibtex_key::assign_keys`, preserving uniqueness
130    /// against already-assigned keys. Called from `migrate` on upgrade
131    /// from pre-0.6.0 schemas and as a safety net on every startup.
132    fn backfill_bibtex_keys(&self) -> Result<(), DbError> {
133        use scitadel_core::bibtex_key::assign_keys;
134        use std::collections::HashSet;
135
136        let conn = self.pool.get()?;
137
138        // Already-assigned keys become the `taken` set.
139        let mut taken: HashSet<String> = conn
140            .prepare("SELECT bibtex_key FROM papers WHERE bibtex_key IS NOT NULL")?
141            .query_map([], |row| row.get::<_, String>(0))?
142            .filter_map(Result::ok)
143            .collect();
144
145        // Load papers needing a key. Only the minimum columns the
146        // algorithm touches — avoids the full row_to_paper deserialize.
147        let mut stmt =
148            conn.prepare("SELECT id, title, authors, year FROM papers WHERE bibtex_key IS NULL")?;
149        let rows: Vec<(String, String, String, Option<i32>)> = stmt
150            .query_map([], |r| Ok((r.get(0)?, r.get(1)?, r.get(2)?, r.get(3)?)))?
151            .filter_map(Result::ok)
152            .collect();
153        drop(stmt);
154
155        if rows.is_empty() {
156            return Ok(());
157        }
158
159        // Reconstitute a minimal `Paper` per row — authors is a JSON
160        // array; everything else the algorithm needs is in the columns.
161        let papers: Vec<scitadel_core::models::Paper> = rows
162            .iter()
163            .map(|(id, title, authors_json, year)| {
164                let mut p = scitadel_core::models::Paper::new(title);
165                p.id = scitadel_core::models::PaperId::from(id.as_str());
166                p.authors = serde_json::from_str(authors_json).unwrap_or_default();
167                p.year = *year;
168                p
169            })
170            .collect();
171
172        let keys = assign_keys(&papers, &mut taken);
173        for (paper, key) in papers.iter().zip(keys) {
174            conn.execute(
175                "UPDATE papers SET bibtex_key = ?1 WHERE id = ?2",
176                rusqlite::params![key, paper.id.as_str()],
177            )?;
178        }
179        Ok(())
180    }
181
182    /// Get a connection from the pool.
183    pub fn conn(&self) -> Result<r2d2::PooledConnection<SqliteConnectionManager>, DbError> {
184        Ok(self.pool.get()?)
185    }
186
187    /// Create all repository instances sharing this database.
188    pub fn repositories(
189        &self,
190    ) -> (
191        SqlitePaperRepository,
192        SqliteSearchRepository,
193        SqliteQuestionRepository,
194        SqliteAssessmentRepository,
195        SqliteCitationRepository,
196    ) {
197        let db = self.clone();
198        (
199            SqlitePaperRepository::new(db.clone()),
200            SqliteSearchRepository::new(db.clone()),
201            SqliteQuestionRepository::new(db.clone()),
202            SqliteAssessmentRepository::new(db.clone()),
203            SqliteCitationRepository::new(db),
204        )
205    }
206}
207
208#[cfg(test)]
209mod tests {
210    use super::*;
211    use scitadel_core::models::Paper;
212    use scitadel_core::ports::PaperRepository;
213
214    /// Two `Database` instances pointing at the same file simulate the
215    /// 2-pane workflow: TUI process holds one, `scitadel mcp` process
216    /// holds the other. A write through one must surface through the
217    /// other — that's the contract WAL mode buys us. (#121)
218    #[test]
219    fn cross_process_write_visible_within_one_redraw() {
220        let dir = tempfile::tempdir().unwrap();
221        let db_path = dir.path().join("scitadel.db");
222
223        // Process A (e.g. the TUI) opens, migrates, holds open.
224        let db_a = Database::open(&db_path).unwrap();
225        db_a.migrate().unwrap();
226        let (paper_repo_a, _, _, _, _) = db_a.repositories();
227
228        // Process B (e.g. scitadel mcp) opens the same file independently.
229        let db_b = Database::open(&db_path).unwrap();
230        let (paper_repo_b, _, _, _, _) = db_b.repositories();
231
232        // Pre-write sanity: A sees nothing.
233        assert!(paper_repo_a.list_all(10, 0).unwrap().is_empty());
234
235        // B writes.
236        let p = Paper::new("MCP-side write");
237        paper_repo_b.save(&p).unwrap();
238
239        // A must see it on the very next read — no commit barrier, no
240        // sleep, no reconnect. This is the property the 2-pane workflow
241        // depends on.
242        let papers = paper_repo_a.list_all(10, 0).unwrap();
243        assert_eq!(papers.len(), 1, "TUI process must see MCP process's write");
244        assert_eq!(papers[0].title, "MCP-side write");
245    }
246
247    /// Backfill invariant (#132): after `migrate()`, every paper has a
248    /// `bibtex_key`, keys are unique, and re-migrating is a no-op.
249    #[test]
250    fn migrate_backfills_bibtex_keys() {
251        let dir = tempfile::tempdir().unwrap();
252        let db_path = dir.path().join("backfill.db");
253        let db = Database::open(&db_path).unwrap();
254        db.migrate().unwrap();
255
256        // Seed three papers with distinct metadata, omitting bibtex_key.
257        let conn = db.conn().unwrap();
258        for (id, title, authors, year) in [
259            (
260                "p-1",
261                "Attention Is All You Need",
262                r#"["Vaswani, A."]"#,
263                2017,
264            ),
265            ("p-2", "Deep Residual Learning", r#"["Kaiming He"]"#, 2015),
266            ("p-3", "Quantum Computing", r#"["Müller, Hans"]"#, 2023),
267        ] {
268            conn.execute(
269                "INSERT INTO papers (id, title, authors, year, created_at, updated_at)
270                 VALUES (?1, ?2, ?3, ?4, datetime('now'), datetime('now'))",
271                rusqlite::params![id, title, authors, year],
272            )
273            .unwrap();
274        }
275        // Null out the key column (migrate() would have backfilled above).
276        conn.execute("UPDATE papers SET bibtex_key = NULL", [])
277            .unwrap();
278        drop(conn);
279
280        db.migrate().unwrap();
281
282        let conn = db.conn().unwrap();
283        let keys: Vec<String> = conn
284            .prepare("SELECT bibtex_key FROM papers ORDER BY id")
285            .unwrap()
286            .query_map([], |r| r.get::<_, String>(0))
287            .unwrap()
288            .filter_map(Result::ok)
289            .collect();
290        assert_eq!(keys.len(), 3, "every paper got a key");
291        assert!(
292            keys.contains(&"vaswani2017attention".to_string())
293                || keys.contains(&"vaswani2017transformer".to_string())
294                || keys.iter().any(|k| k.starts_with("vaswani2017")),
295            "got: {keys:?}"
296        );
297        // All keys distinct.
298        let unique: std::collections::HashSet<_> = keys.iter().collect();
299        assert_eq!(unique.len(), keys.len());
300
301        // Re-run is a no-op — keys don't change.
302        db.migrate().unwrap();
303        let keys2: Vec<String> = db
304            .conn()
305            .unwrap()
306            .prepare("SELECT bibtex_key FROM papers ORDER BY id")
307            .unwrap()
308            .query_map([], |r| r.get::<_, String>(0))
309            .unwrap()
310            .filter_map(Result::ok)
311            .collect();
312        assert_eq!(keys, keys2, "re-migrate is idempotent");
313    }
314
315    #[test]
316    fn pragma_journal_mode_is_wal_on_disk() {
317        let dir = tempfile::tempdir().unwrap();
318        let db_path = dir.path().join("pragma.db");
319        let db = Database::open(&db_path).unwrap();
320        let conn = db.conn().unwrap();
321        let mode: String = conn
322            .query_row("PRAGMA journal_mode", [], |r| r.get(0))
323            .unwrap();
324        assert_eq!(mode.to_lowercase(), "wal");
325    }
326}