1use crate::Result;
8use rusqlite::Connection;
9
10pub const SCHEMA_VERSION: &str = "2.0.0";
15
16pub fn initialize(conn: &Connection) -> Result<()> {
25 set_pragmas(conn)?;
26 let migrated = migrate_if_needed(conn)?;
27 create_tables(conn)?;
28 if migrated {
29 rebuild_fts(conn)?;
30 }
31 set_schema_version(conn)?;
32 Ok(())
33}
34
35fn migrate_if_needed(conn: &Connection) -> Result<bool> {
43 let has_shadow: bool = conn
45 .query_row(
46 "SELECT COUNT(*) > 0 FROM sqlite_master WHERE type='table' AND name='chunks_fts_content'",
47 [],
48 |row| row.get(0),
49 )
50 .unwrap_or(false);
51
52 if !has_shadow {
53 return Ok(false);
54 }
55
56 conn.execute_batch(
58 "DROP TRIGGER IF EXISTS chunks_ai;
59 DROP TRIGGER IF EXISTS chunks_ad;
60 DROP TRIGGER IF EXISTS chunks_au;
61 DROP TABLE IF EXISTS chunks_fts;",
62 )
63 .map_err(|e| crate::Error::Query(format!("v1→v2 migration: drop old FTS failed: {e}")))?;
64
65 Ok(true)
66}
67
68fn set_pragmas(conn: &Connection) -> Result<()> {
69 conn.execute_batch(
70 "PRAGMA journal_mode = WAL;
71 PRAGMA synchronous = NORMAL;
72 PRAGMA cache_size = -65536;
73 PRAGMA mmap_size = 268435456;
74 PRAGMA foreign_keys = ON;
75 PRAGMA busy_timeout = 5000;",
76 )
77 .map_err(|e| crate::Error::Query(format!("Failed to set pragmas: {e}")))?;
78 Ok(())
79}
80
81fn create_tables(conn: &Connection) -> Result<()> {
82 conn.execute_batch(
83 "CREATE TABLE IF NOT EXISTS documents (
84 id TEXT PRIMARY KEY,
85 title TEXT,
86 source TEXT,
87 content TEXT NOT NULL,
88 metadata TEXT,
89 chunk_count INTEGER DEFAULT 0,
90 created_at INTEGER NOT NULL DEFAULT (unixepoch()),
91 updated_at INTEGER NOT NULL DEFAULT (unixepoch())
92 );
93
94 CREATE TABLE IF NOT EXISTS chunks (
95 id TEXT PRIMARY KEY,
96 doc_id TEXT NOT NULL REFERENCES documents(id) ON DELETE CASCADE,
97 content TEXT NOT NULL,
98 position INTEGER NOT NULL,
99 metadata TEXT,
100 UNIQUE(doc_id, position)
101 );
102
103 CREATE VIRTUAL TABLE IF NOT EXISTS chunks_fts USING fts5(
104 content,
105 content=chunks,
106 content_rowid=rowid,
107 tokenize='porter unicode61'
108 );
109
110 CREATE TRIGGER IF NOT EXISTS chunks_ai AFTER INSERT ON chunks BEGIN
111 INSERT INTO chunks_fts(rowid, content) VALUES (new.rowid, new.content);
112 END;
113
114 CREATE TRIGGER IF NOT EXISTS chunks_ad AFTER DELETE ON chunks BEGIN
115 INSERT INTO chunks_fts(chunks_fts, rowid, content) VALUES('delete', old.rowid, old.content);
116 END;
117
118 CREATE TRIGGER IF NOT EXISTS chunks_au AFTER UPDATE ON chunks BEGIN
119 INSERT INTO chunks_fts(chunks_fts, rowid, content) VALUES('delete', old.rowid, old.content);
120 INSERT INTO chunks_fts(rowid, content) VALUES (new.rowid, new.content);
121 END;
122
123 CREATE TABLE IF NOT EXISTS fingerprints (
124 doc_path TEXT PRIMARY KEY,
125 blake3_hash BLOB NOT NULL,
126 chunk_count INTEGER NOT NULL,
127 indexed_at INTEGER NOT NULL DEFAULT (unixepoch())
128 );
129
130 CREATE TABLE IF NOT EXISTS metadata (
131 key TEXT PRIMARY KEY,
132 value TEXT NOT NULL
133 );",
134 )
135 .map_err(|e| crate::Error::Query(format!("Failed to create tables: {e}")))?;
136 Ok(())
137}
138
139fn rebuild_fts(conn: &Connection) -> Result<()> {
144 conn.execute("INSERT INTO chunks_fts(chunks_fts) VALUES('rebuild')", [])
145 .map_err(|e| crate::Error::Query(format!("FTS5 rebuild failed: {e}")))?;
146 Ok(())
147}
148
149fn set_schema_version(conn: &Connection) -> Result<()> {
150 conn.execute(
151 "INSERT OR REPLACE INTO metadata (key, value) VALUES ('schema_version', ?1)",
152 [SCHEMA_VERSION],
153 )
154 .map_err(|e| crate::Error::Query(format!("Failed to set schema version: {e}")))?;
155 Ok(())
156}
157
158pub fn check_version(conn: &Connection) -> Result<bool> {
160 let version: Option<String> = conn
161 .query_row("SELECT value FROM metadata WHERE key = 'schema_version'", [], |row| row.get(0))
162 .ok();
163 Ok(version.as_deref() == Some(SCHEMA_VERSION))
164}
165
166#[cfg(test)]
167mod tests {
168 use super::*;
169
170 #[test]
171 fn test_initialize_creates_tables() {
172 let conn = Connection::open_in_memory().unwrap();
173 initialize(&conn).unwrap();
174
175 let doc_count: i64 =
177 conn.query_row("SELECT COUNT(*) FROM documents", [], |r| r.get(0)).unwrap();
178 assert_eq!(doc_count, 0);
179
180 let chunk_count: i64 =
181 conn.query_row("SELECT COUNT(*) FROM chunks", [], |r| r.get(0)).unwrap();
182 assert_eq!(chunk_count, 0);
183
184 let fp_count: i64 =
185 conn.query_row("SELECT COUNT(*) FROM fingerprints", [], |r| r.get(0)).unwrap();
186 assert_eq!(fp_count, 0);
187 }
188
189 #[test]
190 fn test_schema_version_set() {
191 let conn = Connection::open_in_memory().unwrap();
192 initialize(&conn).unwrap();
193 assert!(check_version(&conn).unwrap());
194 }
195
196 #[test]
197 fn test_initialize_idempotent() {
198 let conn = Connection::open_in_memory().unwrap();
199 initialize(&conn).unwrap();
200 initialize(&conn).unwrap(); assert!(check_version(&conn).unwrap());
202 }
203
204 #[test]
205 fn test_wal_mode_enabled() {
206 let conn = Connection::open_in_memory().unwrap();
207 initialize(&conn).unwrap();
208 let mode: String = conn.query_row("PRAGMA journal_mode", [], |r| r.get(0)).unwrap();
211 assert!(!mode.is_empty());
212 }
213
214 #[test]
215 fn test_external_content_fts5_no_shadow_content_table() {
216 let conn = Connection::open_in_memory().unwrap();
217 initialize(&conn).unwrap();
218
219 let has_content_table: bool = conn
221 .query_row(
222 "SELECT COUNT(*) > 0 FROM sqlite_master WHERE type='table' AND name='chunks_fts_content'",
223 [],
224 |row| row.get(0),
225 )
226 .unwrap();
227 assert!(!has_content_table, "External content FTS5 should not create chunks_fts_content");
228 }
229
230 #[test]
231 fn test_v1_to_v2_migration() {
232 let conn = Connection::open_in_memory().unwrap();
233
234 set_pragmas(&conn).unwrap();
236 conn.execute_batch(
237 "CREATE TABLE IF NOT EXISTS documents (
238 id TEXT PRIMARY KEY, title TEXT, source TEXT, content TEXT NOT NULL,
239 metadata TEXT, chunk_count INTEGER DEFAULT 0,
240 created_at INTEGER NOT NULL DEFAULT (unixepoch()),
241 updated_at INTEGER NOT NULL DEFAULT (unixepoch())
242 );
243 CREATE TABLE IF NOT EXISTS chunks (
244 id TEXT PRIMARY KEY,
245 doc_id TEXT NOT NULL REFERENCES documents(id) ON DELETE CASCADE,
246 content TEXT NOT NULL, position INTEGER NOT NULL, metadata TEXT,
247 UNIQUE(doc_id, position)
248 );
249 CREATE VIRTUAL TABLE chunks_fts USING fts5(content, tokenize='porter unicode61');
250 CREATE TRIGGER chunks_ai AFTER INSERT ON chunks BEGIN
251 INSERT INTO chunks_fts(rowid, content) VALUES (new.rowid, new.content);
252 END;
253 CREATE TRIGGER chunks_ad AFTER DELETE ON chunks BEGIN
254 DELETE FROM chunks_fts WHERE rowid = old.rowid;
255 END;
256 CREATE TRIGGER chunks_au AFTER UPDATE ON chunks BEGIN
257 DELETE FROM chunks_fts WHERE rowid = old.rowid;
258 INSERT INTO chunks_fts(rowid, content) VALUES (new.rowid, new.content);
259 END;
260 CREATE TABLE IF NOT EXISTS fingerprints (
261 doc_path TEXT PRIMARY KEY, blake3_hash BLOB NOT NULL,
262 chunk_count INTEGER NOT NULL, indexed_at INTEGER NOT NULL DEFAULT (unixepoch())
263 );
264 CREATE TABLE IF NOT EXISTS metadata (key TEXT PRIMARY KEY, value TEXT NOT NULL);
265 INSERT OR REPLACE INTO metadata (key, value) VALUES ('schema_version', '1.0.0');",
266 )
267 .unwrap();
268
269 conn.execute("INSERT INTO documents (id, content) VALUES ('doc1', 'test document')", [])
271 .unwrap();
272 conn.execute(
273 "INSERT INTO chunks (id, doc_id, content, position) VALUES ('c1', 'doc1', 'SIMD vector operations', 0)",
274 [],
275 )
276 .unwrap();
277
278 let has_shadow: bool = conn
280 .query_row(
281 "SELECT COUNT(*) > 0 FROM sqlite_master WHERE type='table' AND name='chunks_fts_content'",
282 [],
283 |row| row.get(0),
284 )
285 .unwrap();
286 assert!(has_shadow, "v1 should have chunks_fts_content shadow table");
287
288 initialize(&conn).unwrap();
290
291 let has_shadow_after: bool = conn
293 .query_row(
294 "SELECT COUNT(*) > 0 FROM sqlite_master WHERE type='table' AND name='chunks_fts_content'",
295 [],
296 |row| row.get(0),
297 )
298 .unwrap();
299 assert!(!has_shadow_after, "v2 should not have chunks_fts_content");
300
301 assert!(check_version(&conn).unwrap());
303
304 let count: i64 = conn
306 .query_row("SELECT COUNT(*) FROM chunks_fts WHERE chunks_fts MATCH 'SIMD'", [], |r| {
307 r.get(0)
308 })
309 .unwrap();
310 assert_eq!(count, 1, "FTS search should find migrated data");
311 }
312
313 #[test]
314 fn test_v2_fresh_install_no_migration() {
315 let conn = Connection::open_in_memory().unwrap();
316 let migrated = migrate_if_needed(&conn).unwrap();
317 assert!(!migrated, "Fresh install should not trigger migration");
318 }
319}