Skip to main content

rlm_rs/storage/
schema.rs

1//! Database schema definitions.
2//!
3//! Contains SQL schema and migration logic for the RLM `SQLite` database.
4
5/// Current schema version.
6pub const CURRENT_SCHEMA_VERSION: u32 = 3;
7
8/// SQL schema for initial database setup.
9pub const SCHEMA_SQL: &str = r"
10-- Schema version tracking
11CREATE TABLE IF NOT EXISTS schema_info (
12    key TEXT PRIMARY KEY,
13    value TEXT NOT NULL
14);
15
16-- RLM Context state
17CREATE TABLE IF NOT EXISTS context (
18    id INTEGER PRIMARY KEY CHECK (id = 1),  -- Singleton
19    data TEXT NOT NULL,  -- JSON serialized Context
20    created_at INTEGER NOT NULL,
21    updated_at INTEGER NOT NULL
22);
23
24-- Buffers (text content containers)
25CREATE TABLE IF NOT EXISTS buffers (
26    id INTEGER PRIMARY KEY AUTOINCREMENT,
27    name TEXT,
28    source_path TEXT,
29    content TEXT NOT NULL,
30    content_type TEXT,
31    content_hash TEXT,
32    size INTEGER NOT NULL,
33    line_count INTEGER,
34    chunk_count INTEGER,
35    created_at INTEGER NOT NULL,
36    updated_at INTEGER NOT NULL
37);
38
39-- Index for buffer lookup by name
40CREATE INDEX IF NOT EXISTS idx_buffers_name ON buffers(name);
41
42-- Index for buffer lookup by hash (deduplication)
43CREATE INDEX IF NOT EXISTS idx_buffers_hash ON buffers(content_hash);
44
45-- Chunks (segments of buffer content)
46CREATE TABLE IF NOT EXISTS chunks (
47    id INTEGER PRIMARY KEY AUTOINCREMENT,
48    buffer_id INTEGER NOT NULL,
49    content TEXT NOT NULL,
50    byte_start INTEGER NOT NULL,
51    byte_end INTEGER NOT NULL,
52    chunk_index INTEGER NOT NULL,
53    strategy TEXT,
54    token_count INTEGER,
55    line_start INTEGER,
56    line_end INTEGER,
57    has_overlap INTEGER NOT NULL DEFAULT 0,
58    content_hash TEXT,
59    custom_metadata TEXT,  -- JSON for extensible metadata
60    created_at INTEGER NOT NULL,
61    FOREIGN KEY (buffer_id) REFERENCES buffers(id) ON DELETE CASCADE
62);
63
64-- Index for chunk lookup by buffer
65CREATE INDEX IF NOT EXISTS idx_chunks_buffer ON chunks(buffer_id);
66
67-- Index for chunk ordering
68CREATE INDEX IF NOT EXISTS idx_chunks_order ON chunks(buffer_id, chunk_index);
69
70-- Metadata key-value store for extensibility
71CREATE TABLE IF NOT EXISTS metadata (
72    key TEXT PRIMARY KEY,
73    value TEXT NOT NULL,
74    created_at INTEGER NOT NULL,
75    updated_at INTEGER NOT NULL
76);
77
78-- Chunk embeddings for semantic search (v2)
79CREATE TABLE IF NOT EXISTS chunk_embeddings (
80    chunk_id INTEGER PRIMARY KEY,
81    embedding BLOB NOT NULL,  -- f32 array serialized as bytes
82    dimensions INTEGER NOT NULL,
83    model_name TEXT,
84    created_at INTEGER NOT NULL,
85    FOREIGN KEY (chunk_id) REFERENCES chunks(id) ON DELETE CASCADE
86);
87
88-- FTS5 virtual table for BM25 full-text search (v2)
89CREATE VIRTUAL TABLE IF NOT EXISTS chunks_fts USING fts5(
90    content,
91    content='chunks',
92    content_rowid='id',
93    tokenize='porter unicode61'
94);
95
96-- Triggers to keep FTS5 index in sync with chunks table (v2)
97CREATE TRIGGER IF NOT EXISTS chunks_ai AFTER INSERT ON chunks BEGIN
98    INSERT INTO chunks_fts(rowid, content) VALUES (new.id, new.content);
99END;
100
101CREATE TRIGGER IF NOT EXISTS chunks_ad AFTER DELETE ON chunks BEGIN
102    INSERT INTO chunks_fts(chunks_fts, rowid, content) VALUES('delete', old.id, old.content);
103END;
104
105CREATE TRIGGER IF NOT EXISTS chunks_au AFTER UPDATE ON chunks BEGIN
106    INSERT INTO chunks_fts(chunks_fts, rowid, content) VALUES('delete', old.id, old.content);
107    INSERT INTO chunks_fts(chunks_fts, rowid, content) VALUES (new.id, new.content);
108END;
109";
110
111/// SQL to check if schema is initialized.
112pub const CHECK_SCHEMA_SQL: &str = r"
113SELECT COUNT(*) FROM sqlite_master
114WHERE type='table' AND name='schema_info';
115";
116
117/// SQL to get schema version.
118pub const GET_VERSION_SQL: &str = r"
119SELECT value FROM schema_info WHERE key = 'version';
120";
121
122/// SQL to set schema version.
123pub const SET_VERSION_SQL: &str = r"
124INSERT OR REPLACE INTO schema_info (key, value) VALUES ('version', ?);
125";
126
127/// Migrations from older schema versions.
128pub struct Migration {
129    /// Version this migration upgrades from.
130    pub from_version: u32,
131    /// Version this migration upgrades to.
132    pub to_version: u32,
133    /// SQL statements to execute.
134    pub sql: &'static str,
135}
136
137/// SQL for v1 to v2 migration (adds embeddings + FTS5).
138const MIGRATION_V1_TO_V2: &str = r"
139-- Chunk embeddings for semantic search
140CREATE TABLE IF NOT EXISTS chunk_embeddings (
141    chunk_id INTEGER PRIMARY KEY,
142    embedding BLOB NOT NULL,
143    dimensions INTEGER NOT NULL,
144    model_name TEXT,
145    created_at INTEGER NOT NULL,
146    FOREIGN KEY (chunk_id) REFERENCES chunks(id) ON DELETE CASCADE
147);
148
149-- FTS5 virtual table for BM25 full-text search
150CREATE VIRTUAL TABLE IF NOT EXISTS chunks_fts USING fts5(
151    content,
152    content='chunks',
153    content_rowid='id',
154    tokenize='porter unicode61'
155);
156
157-- Triggers to keep FTS5 index in sync
158CREATE TRIGGER IF NOT EXISTS chunks_ai AFTER INSERT ON chunks BEGIN
159    INSERT INTO chunks_fts(rowid, content) VALUES (new.id, new.content);
160END;
161
162CREATE TRIGGER IF NOT EXISTS chunks_ad AFTER DELETE ON chunks BEGIN
163    INSERT INTO chunks_fts(chunks_fts, rowid, content) VALUES('delete', old.id, old.content);
164END;
165
166CREATE TRIGGER IF NOT EXISTS chunks_au AFTER UPDATE ON chunks BEGIN
167    INSERT INTO chunks_fts(chunks_fts, rowid, content) VALUES('delete', old.id, old.content);
168    INSERT INTO chunks_fts(chunks_fts, rowid, content) VALUES (new.id, new.content);
169END;
170
171-- Populate FTS5 index from existing chunks
172INSERT INTO chunks_fts(rowid, content) SELECT id, content FROM chunks;
173";
174
175/// SQL for v2 to v3 migration (clear embeddings for BGE-M3 model switch).
176///
177/// BGE-M3 uses 1024 dimensions vs 384 for all-MiniLM-L6-v2.
178/// Existing embeddings are incompatible and must be regenerated.
179const MIGRATION_V2_TO_V3: &str = r"
180-- Clear existing embeddings (incompatible dimensions: 384 -> 1024)
181-- Users must re-run embedding generation after this migration
182DELETE FROM chunk_embeddings;
183";
184
185/// Available migrations.
186pub const MIGRATIONS: &[Migration] = &[
187    Migration {
188        from_version: 1,
189        to_version: 2,
190        sql: MIGRATION_V1_TO_V2,
191    },
192    Migration {
193        from_version: 2,
194        to_version: 3,
195        sql: MIGRATION_V2_TO_V3,
196    },
197];
198
199/// Gets migrations needed to upgrade from a version.
200#[must_use]
201pub fn get_migrations_from(current_version: u32) -> Vec<&'static Migration> {
202    MIGRATIONS
203        .iter()
204        .filter(|m| m.from_version >= current_version && m.to_version <= CURRENT_SCHEMA_VERSION)
205        .collect()
206}
207
208#[cfg(test)]
209mod tests {
210    use super::*;
211
212    #[test]
213    fn test_schema_version() {
214        const _: () = assert!(CURRENT_SCHEMA_VERSION >= 1);
215    }
216
217    #[test]
218    fn test_schema_sql_not_empty() {
219        assert!(!SCHEMA_SQL.is_empty());
220        assert!(SCHEMA_SQL.contains("CREATE TABLE"));
221    }
222
223    #[test]
224    fn test_migrations_ordered() {
225        for migration in MIGRATIONS {
226            assert!(migration.to_version > migration.from_version);
227        }
228    }
229
230    #[test]
231    fn test_get_migrations_from() {
232        let migrations = get_migrations_from(0);
233        // Should return all migrations for fresh install
234        assert!(migrations.len() <= MIGRATIONS.len());
235    }
236}