agentroot_core/db/
schema.rs

1//! Database schema and initialization
2
3use crate::error::Result;
4use rusqlite::{params, Connection};
5use std::path::Path;
6
7/// Main database handle
8pub struct Database {
9    pub(crate) conn: Connection,
10}
11
12const SCHEMA_VERSION: i32 = 6;
13
14const CREATE_TABLES: &str = r#"
15-- Content storage (content-addressable by SHA-256 hash)
16CREATE TABLE IF NOT EXISTS content (
17    hash TEXT PRIMARY KEY,
18    doc TEXT NOT NULL,
19    created_at TEXT NOT NULL
20);
21
22-- Document metadata
23CREATE TABLE IF NOT EXISTS documents (
24    id INTEGER PRIMARY KEY AUTOINCREMENT,
25    collection TEXT NOT NULL,
26    path TEXT NOT NULL,
27    title TEXT NOT NULL,
28    hash TEXT NOT NULL REFERENCES content(hash),
29    created_at TEXT NOT NULL,
30    modified_at TEXT NOT NULL,
31    active INTEGER NOT NULL DEFAULT 1,
32    source_type TEXT NOT NULL DEFAULT 'file',
33    source_uri TEXT,
34    llm_summary TEXT,
35    llm_title TEXT,
36    llm_keywords TEXT,
37    llm_category TEXT,
38    llm_intent TEXT,
39    llm_concepts TEXT,
40    llm_difficulty TEXT,
41    llm_queries TEXT,
42    llm_metadata_generated_at TEXT,
43    llm_model TEXT,
44    user_metadata TEXT,
45    UNIQUE(collection, path)
46);
47
48-- Full-text search index
49CREATE VIRTUAL TABLE IF NOT EXISTS documents_fts USING fts5(
50    filepath,
51    title,
52    body,
53    llm_summary,
54    llm_title,
55    llm_keywords,
56    llm_intent,
57    llm_concepts,
58    user_metadata,
59    modified_at,
60    tokenize='porter unicode61'
61);
62
63-- Vector embeddings metadata
64CREATE TABLE IF NOT EXISTS content_vectors (
65    hash TEXT NOT NULL,
66    seq INTEGER NOT NULL,
67    pos INTEGER NOT NULL,
68    model TEXT NOT NULL,
69    chunk_hash TEXT,
70    created_at TEXT NOT NULL,
71    PRIMARY KEY (hash, seq)
72);
73
74-- Model metadata for dimension validation
75CREATE TABLE IF NOT EXISTS model_metadata (
76    model TEXT PRIMARY KEY,
77    dimensions INTEGER NOT NULL,
78    created_at TEXT NOT NULL,
79    last_used_at TEXT NOT NULL
80);
81
82-- Global chunk embeddings cache
83CREATE TABLE IF NOT EXISTS chunk_embeddings (
84    chunk_hash TEXT NOT NULL,
85    model TEXT NOT NULL,
86    embedding BLOB NOT NULL,
87    created_at TEXT NOT NULL,
88    PRIMARY KEY (chunk_hash, model)
89);
90
91-- LLM response cache
92CREATE TABLE IF NOT EXISTS llm_cache (
93    key TEXT PRIMARY KEY,
94    value TEXT NOT NULL,
95    model TEXT NOT NULL,
96    created_at TEXT NOT NULL
97);
98
99-- Collections metadata
100CREATE TABLE IF NOT EXISTS collections (
101    name TEXT PRIMARY KEY,
102    path TEXT NOT NULL,
103    pattern TEXT NOT NULL DEFAULT '**/*.md',
104    created_at TEXT NOT NULL,
105    updated_at TEXT NOT NULL,
106    provider_type TEXT NOT NULL DEFAULT 'file',
107    provider_config TEXT
108);
109
110-- Context metadata (hierarchical context for paths)
111CREATE TABLE IF NOT EXISTS contexts (
112    path TEXT PRIMARY KEY,
113    context TEXT NOT NULL,
114    created_at TEXT NOT NULL
115);
116
117-- Schema version tracking
118CREATE TABLE IF NOT EXISTS schema_version (
119    version INTEGER PRIMARY KEY
120);
121
122-- Indexes
123CREATE INDEX IF NOT EXISTS idx_documents_collection ON documents(collection);
124CREATE INDEX IF NOT EXISTS idx_documents_hash ON documents(hash);
125CREATE INDEX IF NOT EXISTS idx_documents_active ON documents(active);
126CREATE INDEX IF NOT EXISTS idx_content_vectors_hash ON content_vectors(hash);
127CREATE INDEX IF NOT EXISTS idx_content_vectors_chunk_hash ON content_vectors(chunk_hash);
128CREATE INDEX IF NOT EXISTS idx_chunk_embeddings_hash ON chunk_embeddings(chunk_hash);
129"#;
130
131const CREATE_TRIGGERS: &str = r#"
132-- Sync FTS on insert (only for active documents)
133CREATE TRIGGER IF NOT EXISTS documents_ai
134AFTER INSERT ON documents
135WHEN new.active = 1
136BEGIN
137    INSERT INTO documents_fts(rowid, filepath, title, body, llm_summary, llm_title, llm_keywords, llm_intent, llm_concepts, user_metadata, modified_at)
138    SELECT
139        new.id,
140        new.collection || '/' || new.path,
141        new.title,
142        (SELECT doc FROM content WHERE hash = new.hash),
143        new.llm_summary,
144        new.llm_title,
145        new.llm_keywords,
146        new.llm_intent,
147        new.llm_concepts,
148        new.user_metadata,
149        new.modified_at;
150END;
151
152-- Sync FTS on update: handle activation/deactivation/content change
153CREATE TRIGGER IF NOT EXISTS documents_au
154AFTER UPDATE ON documents
155BEGIN
156    DELETE FROM documents_fts WHERE rowid = old.id;
157    INSERT INTO documents_fts(rowid, filepath, title, body, llm_summary, llm_title, llm_keywords, llm_intent, llm_concepts, user_metadata, modified_at)
158    SELECT
159        new.id,
160        new.collection || '/' || new.path,
161        new.title,
162        (SELECT doc FROM content WHERE hash = new.hash),
163        new.llm_summary,
164        new.llm_title,
165        new.llm_keywords,
166        new.llm_intent,
167        new.llm_concepts,
168        new.user_metadata,
169        new.modified_at
170    WHERE new.active = 1;
171END;
172
173-- Sync FTS on delete
174CREATE TRIGGER IF NOT EXISTS documents_ad
175AFTER DELETE ON documents
176BEGIN
177    DELETE FROM documents_fts WHERE rowid = old.id;
178END;
179"#;
180
181impl Database {
182    /// Open database at path, creating if necessary
183    pub fn open(path: impl AsRef<Path>) -> Result<Self> {
184        let path = path.as_ref();
185        if let Some(parent) = path.parent() {
186            std::fs::create_dir_all(parent)?;
187        }
188
189        let conn = Connection::open(path)?;
190        Ok(Self { conn })
191    }
192
193    /// Open in-memory database (for testing)
194    pub fn open_in_memory() -> Result<Self> {
195        let conn = Connection::open_in_memory()?;
196        Ok(Self { conn })
197    }
198
199    /// Initialize database schema
200    pub fn initialize(&self) -> Result<()> {
201        // Set PRAGMAs for performance
202        self.conn.execute_batch(
203            "PRAGMA journal_mode = WAL;
204             PRAGMA synchronous = NORMAL;
205             PRAGMA foreign_keys = ON;
206             PRAGMA cache_size = -64000;
207             PRAGMA busy_timeout = 5000;",
208        )?;
209
210        // Create tables
211        self.conn.execute_batch(CREATE_TABLES)?;
212
213        // Create triggers
214        self.conn.execute_batch(CREATE_TRIGGERS)?;
215
216        // Run migrations to upgrade existing databases (BEFORE setting version)
217        self.migrate()?;
218
219        // Set schema version (after migrations complete)
220        self.conn.execute(
221            "INSERT OR REPLACE INTO schema_version (version) VALUES (?1)",
222            params![SCHEMA_VERSION],
223        )?;
224
225        Ok(())
226    }
227
228    /// Get current schema version
229    pub fn schema_version(&self) -> Result<Option<i32>> {
230        let version = self
231            .conn
232            .query_row(
233                "SELECT version FROM schema_version ORDER BY version DESC LIMIT 1",
234                [],
235                |row| row.get(0),
236            )
237            .ok();
238        Ok(version)
239    }
240
241    /// Run migrations to upgrade schema to current version
242    pub fn migrate(&self) -> Result<()> {
243        let current = self.schema_version()?.unwrap_or(0);
244
245        if current < 2 {
246            self.migrate_to_v2()?;
247        }
248
249        if current < 3 {
250            self.migrate_to_v3()?;
251        }
252
253        if current < 4 {
254            self.migrate_to_v4()?;
255        }
256
257        if current < 5 {
258            self.migrate_to_v5()?;
259        }
260
261        if current < 6 {
262            self.migrate_to_v6()?;
263        }
264
265        Ok(())
266    }
267
268    fn migrate_to_v2(&self) -> Result<()> {
269        // Add chunk_hash column to content_vectors if not exists
270        let has_chunk_hash: bool = self.conn.query_row(
271            "SELECT COUNT(*) > 0 FROM pragma_table_info('content_vectors') WHERE name = 'chunk_hash'",
272            [],
273            |row| row.get(0),
274        ).unwrap_or(false);
275
276        if !has_chunk_hash {
277            self.conn
278                .execute("ALTER TABLE content_vectors ADD COLUMN chunk_hash TEXT", [])?;
279            self.conn.execute(
280                "CREATE INDEX IF NOT EXISTS idx_content_vectors_chunk_hash ON content_vectors(chunk_hash)",
281                [],
282            )?;
283        }
284
285        // Create model_metadata table
286        self.conn.execute(
287            "CREATE TABLE IF NOT EXISTS model_metadata (
288                model TEXT PRIMARY KEY,
289                dimensions INTEGER NOT NULL,
290                created_at TEXT NOT NULL,
291                last_used_at TEXT NOT NULL
292            )",
293            [],
294        )?;
295
296        // Create chunk_embeddings cache table
297        self.conn.execute(
298            "CREATE TABLE IF NOT EXISTS chunk_embeddings (
299                chunk_hash TEXT NOT NULL,
300                model TEXT NOT NULL,
301                embedding BLOB NOT NULL,
302                created_at TEXT NOT NULL,
303                PRIMARY KEY (chunk_hash, model)
304            )",
305            [],
306        )?;
307        self.conn.execute(
308            "CREATE INDEX IF NOT EXISTS idx_chunk_embeddings_hash ON chunk_embeddings(chunk_hash)",
309            [],
310        )?;
311
312        // Update schema version
313        self.conn.execute(
314            "INSERT OR REPLACE INTO schema_version (version) VALUES (?1)",
315            params![2],
316        )?;
317
318        Ok(())
319    }
320
321    fn migrate_to_v3(&self) -> Result<()> {
322        // Add source_type column to documents if not exists
323        let has_source_type: bool = self
324            .conn
325            .query_row(
326                "SELECT COUNT(*) > 0 FROM pragma_table_info('documents') WHERE name = 'source_type'",
327                [],
328                |row| row.get(0),
329            )
330            .unwrap_or(false);
331
332        if !has_source_type {
333            self.conn.execute(
334                "ALTER TABLE documents ADD COLUMN source_type TEXT NOT NULL DEFAULT 'file'",
335                [],
336            )?;
337        }
338
339        // Add source_uri column to documents if not exists
340        let has_source_uri: bool = self
341            .conn
342            .query_row(
343                "SELECT COUNT(*) > 0 FROM pragma_table_info('documents') WHERE name = 'source_uri'",
344                [],
345                |row| row.get(0),
346            )
347            .unwrap_or(false);
348
349        if !has_source_uri {
350            self.conn
351                .execute("ALTER TABLE documents ADD COLUMN source_uri TEXT", [])?;
352        }
353
354        // Add provider_type column to collections if not exists
355        let has_provider_type: bool = self
356            .conn
357            .query_row(
358                "SELECT COUNT(*) > 0 FROM pragma_table_info('collections') WHERE name = 'provider_type'",
359                [],
360                |row| row.get(0),
361            )
362            .unwrap_or(false);
363
364        if !has_provider_type {
365            self.conn.execute(
366                "ALTER TABLE collections ADD COLUMN provider_type TEXT NOT NULL DEFAULT 'file'",
367                [],
368            )?;
369        }
370
371        // Add provider_config column to collections if not exists
372        let has_provider_config: bool = self
373            .conn
374            .query_row(
375                "SELECT COUNT(*) > 0 FROM pragma_table_info('collections') WHERE name = 'provider_config'",
376                [],
377                |row| row.get(0),
378            )
379            .unwrap_or(false);
380
381        if !has_provider_config {
382            self.conn.execute(
383                "ALTER TABLE collections ADD COLUMN provider_config TEXT",
384                [],
385            )?;
386        }
387
388        // Update schema version
389        self.conn.execute(
390            "INSERT OR REPLACE INTO schema_version (version) VALUES (?1)",
391            params![3],
392        )?;
393
394        Ok(())
395    }
396
397    fn migrate_to_v4(&self) -> Result<()> {
398        // Add LLM metadata columns to documents table
399        let columns_to_add = vec![
400            "llm_summary",
401            "llm_title",
402            "llm_keywords",
403            "llm_category",
404            "llm_intent",
405            "llm_concepts",
406            "llm_difficulty",
407            "llm_queries",
408            "llm_metadata_generated_at",
409            "llm_model",
410        ];
411
412        for column in columns_to_add {
413            let has_column: bool = self
414                .conn
415                .query_row(
416                    "SELECT COUNT(*) > 0 FROM pragma_table_info('documents') WHERE name = ?1",
417                    params![column],
418                    |row| row.get(0),
419                )
420                .unwrap_or(false);
421
422            if !has_column {
423                self.conn.execute(
424                    &format!("ALTER TABLE documents ADD COLUMN {} TEXT", column),
425                    [],
426                )?;
427            }
428        }
429
430        // Rebuild FTS index to include metadata columns
431        // Drop and recreate is the safest approach for FTS5
432        self.conn
433            .execute("DROP TABLE IF EXISTS documents_fts", [])?;
434        self.conn.execute(
435            "CREATE VIRTUAL TABLE documents_fts USING fts5(
436                filepath,
437                title,
438                body,
439                llm_summary,
440                llm_title,
441                llm_keywords,
442                llm_intent,
443                llm_concepts,
444                tokenize='porter unicode61'
445            )",
446            [],
447        )?;
448
449        // Rebuild FTS data from existing documents
450        self.conn.execute(
451            "INSERT INTO documents_fts(rowid, filepath, title, body, llm_summary, llm_title, llm_keywords, llm_intent, llm_concepts)
452             SELECT
453                d.id,
454                d.collection || '/' || d.path,
455                d.title,
456                c.doc,
457                d.llm_summary,
458                d.llm_title,
459                d.llm_keywords,
460                d.llm_intent,
461                d.llm_concepts
462             FROM documents d
463             JOIN content c ON c.hash = d.hash
464             WHERE d.active = 1",
465            [],
466        )?;
467
468        // Recreate triggers with metadata support
469        self.conn
470            .execute("DROP TRIGGER IF EXISTS documents_ai", [])?;
471        self.conn
472            .execute("DROP TRIGGER IF EXISTS documents_au", [])?;
473        self.conn
474            .execute("DROP TRIGGER IF EXISTS documents_ad", [])?;
475
476        self.conn.execute(
477            "CREATE TRIGGER documents_ai
478             AFTER INSERT ON documents
479             WHEN new.active = 1
480             BEGIN
481                 INSERT INTO documents_fts(rowid, filepath, title, body, llm_summary, llm_title, llm_keywords, llm_intent, llm_concepts)
482                 SELECT
483                     new.id,
484                     new.collection || '/' || new.path,
485                     new.title,
486                     (SELECT doc FROM content WHERE hash = new.hash),
487                     new.llm_summary,
488                     new.llm_title,
489                     new.llm_keywords,
490                     new.llm_intent,
491                     new.llm_concepts;
492             END",
493            [],
494        )?;
495
496        self.conn.execute(
497            "CREATE TRIGGER documents_au
498             AFTER UPDATE ON documents
499             BEGIN
500                 DELETE FROM documents_fts WHERE rowid = old.id;
501                 INSERT INTO documents_fts(rowid, filepath, title, body, llm_summary, llm_title, llm_keywords, llm_intent, llm_concepts)
502                 SELECT
503                     new.id,
504                     new.collection || '/' || new.path,
505                     new.title,
506                     (SELECT doc FROM content WHERE hash = new.hash),
507                     new.llm_summary,
508                     new.llm_title,
509                     new.llm_keywords,
510                     new.llm_intent,
511                     new.llm_concepts
512                 WHERE new.active = 1;
513             END",
514            [],
515        )?;
516
517        self.conn.execute(
518            "CREATE TRIGGER documents_ad
519             AFTER DELETE ON documents
520             BEGIN
521                 DELETE FROM documents_fts WHERE rowid = old.id;
522             END",
523            [],
524        )?;
525
526        // Update schema version
527        self.conn.execute(
528            "INSERT OR REPLACE INTO schema_version (version) VALUES (?1)",
529            params![4],
530        )?;
531
532        Ok(())
533    }
534
535    fn migrate_to_v5(&self) -> Result<()> {
536        // Add user_metadata column to documents table
537        let has_user_metadata: bool = self
538            .conn
539            .query_row(
540                "SELECT COUNT(*) > 0 FROM pragma_table_info('documents') WHERE name = 'user_metadata'",
541                [],
542                |row| row.get(0),
543            )
544            .unwrap_or(false);
545
546        if !has_user_metadata {
547            self.conn
548                .execute("ALTER TABLE documents ADD COLUMN user_metadata TEXT", [])?;
549        }
550
551        // Create index on user_metadata for efficient queries
552        self.conn.execute(
553            "CREATE INDEX IF NOT EXISTS idx_documents_user_metadata ON documents(user_metadata)",
554            [],
555        )?;
556
557        // Update schema version
558        self.conn.execute(
559            "INSERT OR REPLACE INTO schema_version (version) VALUES (?1)",
560            params![5],
561        )?;
562
563        Ok(())
564    }
565
566    fn migrate_to_v6(&self) -> Result<()> {
567        // Rebuild FTS index to include user_metadata and modified_at
568        // This makes user metadata and timestamps full-text searchable
569
570        // Drop and recreate FTS table with new columns
571        self.conn
572            .execute("DROP TABLE IF EXISTS documents_fts", [])?;
573
574        self.conn.execute(
575            "CREATE VIRTUAL TABLE documents_fts USING fts5(
576                filepath,
577                title,
578                body,
579                llm_summary,
580                llm_title,
581                llm_keywords,
582                llm_intent,
583                llm_concepts,
584                user_metadata,
585                modified_at,
586                tokenize='porter unicode61'
587            )",
588            [],
589        )?;
590
591        // Rebuild FTS data from existing documents
592        self.conn.execute(
593            "INSERT INTO documents_fts(rowid, filepath, title, body, llm_summary, llm_title, llm_keywords, llm_intent, llm_concepts, user_metadata, modified_at)
594             SELECT
595                d.id,
596                d.collection || '/' || d.path,
597                d.title,
598                c.doc,
599                d.llm_summary,
600                d.llm_title,
601                d.llm_keywords,
602                d.llm_intent,
603                d.llm_concepts,
604                d.user_metadata,
605                d.modified_at
606             FROM documents d
607             JOIN content c ON c.hash = d.hash
608             WHERE d.active = 1",
609            [],
610        )?;
611
612        // Recreate triggers with user_metadata and modified_at support
613        self.conn
614            .execute("DROP TRIGGER IF EXISTS documents_ai", [])?;
615        self.conn
616            .execute("DROP TRIGGER IF EXISTS documents_au", [])?;
617        self.conn
618            .execute("DROP TRIGGER IF EXISTS documents_ad", [])?;
619
620        self.conn.execute(
621            "CREATE TRIGGER documents_ai
622             AFTER INSERT ON documents
623             WHEN new.active = 1
624             BEGIN
625                 INSERT INTO documents_fts(rowid, filepath, title, body, llm_summary, llm_title, llm_keywords, llm_intent, llm_concepts, user_metadata, modified_at)
626                 SELECT
627                     new.id,
628                     new.collection || '/' || new.path,
629                     new.title,
630                     (SELECT doc FROM content WHERE hash = new.hash),
631                     new.llm_summary,
632                     new.llm_title,
633                     new.llm_keywords,
634                     new.llm_intent,
635                     new.llm_concepts,
636                     new.user_metadata,
637                     new.modified_at;
638             END",
639            [],
640        )?;
641
642        self.conn.execute(
643            "CREATE TRIGGER documents_au
644             AFTER UPDATE ON documents
645             BEGIN
646                 DELETE FROM documents_fts WHERE rowid = old.id;
647                 INSERT INTO documents_fts(rowid, filepath, title, body, llm_summary, llm_title, llm_keywords, llm_intent, llm_concepts, user_metadata, modified_at)
648                 SELECT
649                     new.id,
650                     new.collection || '/' || new.path,
651                     new.title,
652                     (SELECT doc FROM content WHERE hash = new.hash),
653                     new.llm_summary,
654                     new.llm_title,
655                     new.llm_keywords,
656                     new.llm_intent,
657                     new.llm_concepts,
658                     new.user_metadata,
659                     new.modified_at
660                 WHERE new.active = 1;
661             END",
662            [],
663        )?;
664
665        self.conn.execute(
666            "CREATE TRIGGER documents_ad
667             AFTER DELETE ON documents
668             BEGIN
669                 DELETE FROM documents_fts WHERE rowid = old.id;
670             END",
671            [],
672        )?;
673
674        // Update schema version
675        self.conn.execute(
676            "INSERT OR REPLACE INTO schema_version (version) VALUES (?1)",
677            params![6],
678        )?;
679
680        Ok(())
681    }
682}
683
684#[cfg(test)]
685mod tests {
686    use super::*;
687
688    #[test]
689    fn test_open_in_memory() {
690        let db = Database::open_in_memory().unwrap();
691        db.initialize().unwrap();
692        assert_eq!(db.schema_version().unwrap(), Some(SCHEMA_VERSION));
693    }
694
695    #[test]
696    fn test_migration_v2_to_v3() {
697        let db = Database::open_in_memory().unwrap();
698
699        db.conn
700            .execute_batch(
701                "CREATE TABLE collections (
702                name TEXT PRIMARY KEY,
703                path TEXT NOT NULL,
704                pattern TEXT NOT NULL DEFAULT '**/*.md',
705                created_at TEXT NOT NULL,
706                updated_at TEXT NOT NULL
707            );
708            CREATE TABLE documents (
709                id INTEGER PRIMARY KEY AUTOINCREMENT,
710                collection TEXT NOT NULL,
711                path TEXT NOT NULL,
712                title TEXT NOT NULL,
713                hash TEXT NOT NULL,
714                created_at TEXT NOT NULL,
715                modified_at TEXT NOT NULL,
716                active INTEGER NOT NULL DEFAULT 1,
717                UNIQUE(collection, path)
718            );
719            CREATE TABLE schema_version (version INTEGER PRIMARY KEY);
720            INSERT INTO schema_version VALUES (2);",
721            )
722            .unwrap();
723
724        assert_eq!(db.schema_version().unwrap(), Some(2));
725
726        db.initialize().unwrap();
727
728        assert_eq!(db.schema_version().unwrap(), Some(6));
729
730        let has_provider_type: bool = db.conn.query_row(
731            "SELECT COUNT(*) > 0 FROM pragma_table_info('collections') WHERE name = 'provider_type'",
732            [],
733            |row| row.get(0),
734        ).unwrap();
735        assert!(
736            has_provider_type,
737            "collections should have provider_type column"
738        );
739
740        let has_provider_config: bool = db.conn.query_row(
741            "SELECT COUNT(*) > 0 FROM pragma_table_info('collections') WHERE name = 'provider_config'",
742            [],
743            |row| row.get(0),
744        ).unwrap();
745        assert!(
746            has_provider_config,
747            "collections should have provider_config column"
748        );
749
750        let has_source_type: bool = db.conn.query_row(
751            "SELECT COUNT(*) > 0 FROM pragma_table_info('documents') WHERE name = 'source_type'",
752            [],
753            |row| row.get(0),
754        ).unwrap();
755        assert!(has_source_type, "documents should have source_type column");
756
757        let has_source_uri: bool = db
758            .conn
759            .query_row(
760                "SELECT COUNT(*) > 0 FROM pragma_table_info('documents') WHERE name = 'source_uri'",
761                [],
762                |row| row.get(0),
763            )
764            .unwrap();
765        assert!(has_source_uri, "documents should have source_uri column");
766    }
767
768    #[test]
769    fn test_migration_v3_to_v4() {
770        let db = Database::open_in_memory().unwrap();
771
772        db.conn
773            .execute_batch(
774                "CREATE TABLE collections (
775                name TEXT PRIMARY KEY,
776                path TEXT NOT NULL,
777                pattern TEXT NOT NULL DEFAULT '**/*.md',
778                created_at TEXT NOT NULL,
779                updated_at TEXT NOT NULL,
780                provider_type TEXT NOT NULL DEFAULT 'file',
781                provider_config TEXT
782            );
783            CREATE TABLE documents (
784                id INTEGER PRIMARY KEY AUTOINCREMENT,
785                collection TEXT NOT NULL,
786                path TEXT NOT NULL,
787                title TEXT NOT NULL,
788                hash TEXT NOT NULL,
789                created_at TEXT NOT NULL,
790                modified_at TEXT NOT NULL,
791                active INTEGER NOT NULL DEFAULT 1,
792                source_type TEXT NOT NULL DEFAULT 'file',
793                source_uri TEXT,
794                UNIQUE(collection, path)
795            );
796            CREATE TABLE schema_version (version INTEGER PRIMARY KEY);
797            INSERT INTO schema_version VALUES (3);",
798            )
799            .unwrap();
800
801        assert_eq!(db.schema_version().unwrap(), Some(3));
802
803        db.initialize().unwrap();
804
805        assert_eq!(db.schema_version().unwrap(), Some(6));
806
807        let metadata_columns = vec![
808            "llm_summary",
809            "llm_title",
810            "llm_keywords",
811            "llm_category",
812            "llm_intent",
813            "llm_concepts",
814            "llm_difficulty",
815            "llm_queries",
816            "llm_metadata_generated_at",
817            "llm_model",
818        ];
819
820        for column in metadata_columns {
821            let has_column: bool = db
822                .conn
823                .query_row(
824                    "SELECT COUNT(*) > 0 FROM pragma_table_info('documents') WHERE name = ?1",
825                    params![column],
826                    |row| row.get(0),
827                )
828                .unwrap();
829            assert!(has_column, "documents should have {} column", column);
830        }
831    }
832
833    #[test]
834    fn test_migration_v4_to_v5() {
835        let db = Database::open_in_memory().unwrap();
836
837        db.conn
838            .execute_batch(
839                "CREATE TABLE collections (
840                name TEXT PRIMARY KEY,
841                path TEXT NOT NULL,
842                pattern TEXT NOT NULL DEFAULT '**/*.md',
843                created_at TEXT NOT NULL,
844                updated_at TEXT NOT NULL,
845                provider_type TEXT NOT NULL DEFAULT 'file',
846                provider_config TEXT
847            );
848            CREATE TABLE documents (
849                id INTEGER PRIMARY KEY AUTOINCREMENT,
850                collection TEXT NOT NULL,
851                path TEXT NOT NULL,
852                title TEXT NOT NULL,
853                hash TEXT NOT NULL,
854                created_at TEXT NOT NULL,
855                modified_at TEXT NOT NULL,
856                active INTEGER NOT NULL DEFAULT 1,
857                source_type TEXT NOT NULL DEFAULT 'file',
858                source_uri TEXT,
859                llm_summary TEXT,
860                llm_title TEXT,
861                llm_keywords TEXT,
862                llm_category TEXT,
863                llm_intent TEXT,
864                llm_concepts TEXT,
865                llm_difficulty TEXT,
866                llm_queries TEXT,
867                llm_metadata_generated_at TEXT,
868                llm_model TEXT,
869                UNIQUE(collection, path)
870            );
871            CREATE TABLE content (
872                hash TEXT PRIMARY KEY,
873                doc TEXT NOT NULL,
874                created_at TEXT NOT NULL
875            );
876            CREATE VIRTUAL TABLE documents_fts USING fts5(
877                filepath,
878                title,
879                body,
880                llm_summary,
881                llm_title,
882                llm_keywords,
883                llm_intent,
884                llm_concepts,
885                tokenize='porter unicode61'
886            );
887            CREATE TABLE schema_version (version INTEGER PRIMARY KEY);
888            INSERT INTO schema_version VALUES (4);",
889            )
890            .unwrap();
891
892        assert_eq!(db.schema_version().unwrap(), Some(4));
893
894        db.initialize().unwrap();
895
896        assert_eq!(db.schema_version().unwrap(), Some(6));
897
898        let has_user_metadata: bool = db
899            .conn
900            .query_row(
901                "SELECT COUNT(*) > 0 FROM pragma_table_info('documents') WHERE name = 'user_metadata'",
902                [],
903                |row| row.get(0),
904            )
905            .unwrap();
906        assert!(
907            has_user_metadata,
908            "documents should have user_metadata column"
909        );
910
911        let has_index: bool = db
912            .conn
913            .query_row(
914                "SELECT COUNT(*) > 0 FROM sqlite_master WHERE type = 'index' AND name = 'idx_documents_user_metadata'",
915                [],
916                |row| row.get(0),
917            )
918            .unwrap();
919        assert!(has_index, "user_metadata should have index");
920    }
921}