Skip to main content

keel_core/
sqlite.rs

1use rusqlite::{params, Connection, Result as SqlResult};
2
3use crate::types::{ExternalEndpoint, GraphError, GraphNode, NodeKind};
4
5const SCHEMA_VERSION: u32 = 4;
6
7/// SQLite-backed implementation of the GraphStore trait.
8pub struct SqliteGraphStore {
9    pub(crate) conn: Connection,
10}
11
12impl SqliteGraphStore {
13    /// Open or create a graph database at the given path.
14    pub fn open(path: &str) -> Result<Self, GraphError> {
15        let conn = Connection::open(path)?;
16        Self::set_performance_pragmas(&conn)?;
17        let store = SqliteGraphStore { conn };
18        store.initialize_schema()?;
19        Ok(store)
20    }
21
22    /// Create an in-memory graph database (for testing).
23    pub fn in_memory() -> Result<Self, GraphError> {
24        let conn = Connection::open_in_memory()?;
25        Self::set_performance_pragmas(&conn)?;
26        let store = SqliteGraphStore { conn };
27        store.initialize_schema()?;
28        Ok(store)
29    }
30
31    /// Apply SQLite performance pragmas for faster reads and writes.
32    fn set_performance_pragmas(conn: &Connection) -> Result<(), GraphError> {
33        conn.execute_batch(
34            "
35            PRAGMA journal_mode = WAL;
36            PRAGMA synchronous = NORMAL;
37            PRAGMA cache_size = -8000;
38            PRAGMA temp_store = MEMORY;
39            PRAGMA mmap_size = 268435456;
40            PRAGMA foreign_keys = ON;
41            ",
42        )?;
43        Ok(())
44    }
45
46    /// Temporarily disable foreign key enforcement (for bulk re-map operations).
47    /// Returns the actual FK state after the change (for verification).
48    pub fn set_foreign_keys(&self, enabled: bool) -> Result<bool, GraphError> {
49        let val = if enabled { "ON" } else { "OFF" };
50        self.conn
51            .execute_batch(&format!("PRAGMA foreign_keys = {};", val))?;
52        // Verify the change took effect
53        let actual: i32 = self
54            .conn
55            .pragma_query_value(None, "foreign_keys", |row| row.get(0))
56            .unwrap_or(if enabled { 1 } else { 0 });
57        Ok(actual != 0)
58    }
59
60    fn initialize_schema(&self) -> Result<(), GraphError> {
61        self.conn.execute_batch(
62            "
63            -- Schema version tracking
64            CREATE TABLE IF NOT EXISTS keel_meta (
65                key TEXT PRIMARY KEY,
66                value TEXT NOT NULL
67            );
68
69            -- Nodes
70            CREATE TABLE IF NOT EXISTS nodes (
71                id INTEGER PRIMARY KEY,
72                hash TEXT NOT NULL UNIQUE,
73                kind TEXT NOT NULL CHECK (kind IN ('module', 'class', 'function')),
74                name TEXT NOT NULL,
75                signature TEXT NOT NULL DEFAULT '',
76                file_path TEXT NOT NULL,
77                line_start INTEGER NOT NULL,
78                line_end INTEGER NOT NULL,
79                docstring TEXT,
80                is_public INTEGER NOT NULL DEFAULT 0,
81                type_hints_present INTEGER NOT NULL DEFAULT 0,
82                has_docstring INTEGER NOT NULL DEFAULT 0,
83                module_id INTEGER REFERENCES nodes(id),
84                package TEXT DEFAULT NULL,
85                resolution_tier TEXT NOT NULL DEFAULT '',
86                created_at TEXT NOT NULL DEFAULT (datetime('now')),
87                updated_at TEXT NOT NULL DEFAULT (datetime('now'))
88            );
89            CREATE INDEX IF NOT EXISTS idx_nodes_hash ON nodes(hash);
90            CREATE INDEX IF NOT EXISTS idx_nodes_file ON nodes(file_path);
91            CREATE INDEX IF NOT EXISTS idx_nodes_module ON nodes(module_id);
92            CREATE INDEX IF NOT EXISTS idx_nodes_kind ON nodes(kind);
93            CREATE INDEX IF NOT EXISTS idx_nodes_name_kind ON nodes(name, kind);
94
95            -- Previous hashes for rename tracking
96            CREATE TABLE IF NOT EXISTS previous_hashes (
97                node_id INTEGER NOT NULL REFERENCES nodes(id) ON DELETE CASCADE,
98                hash TEXT NOT NULL,
99                created_at TEXT NOT NULL DEFAULT (datetime('now')),
100                PRIMARY KEY (node_id, hash)
101            );
102
103            -- External endpoints
104            CREATE TABLE IF NOT EXISTS external_endpoints (
105                id INTEGER PRIMARY KEY,
106                node_id INTEGER NOT NULL REFERENCES nodes(id) ON DELETE CASCADE,
107                kind TEXT NOT NULL,
108                method TEXT NOT NULL DEFAULT '',
109                path TEXT NOT NULL,
110                direction TEXT NOT NULL CHECK (direction IN ('serves', 'calls'))
111            );
112            CREATE INDEX IF NOT EXISTS idx_endpoints_node ON external_endpoints(node_id);
113
114            -- Edges
115            CREATE TABLE IF NOT EXISTS edges (
116                id INTEGER PRIMARY KEY,
117                source_id INTEGER NOT NULL REFERENCES nodes(id) ON DELETE CASCADE,
118                target_id INTEGER NOT NULL REFERENCES nodes(id) ON DELETE CASCADE,
119                kind TEXT NOT NULL CHECK (kind IN ('calls', 'imports', 'inherits', 'contains')),
120                confidence REAL NOT NULL DEFAULT 1.0,
121                file_path TEXT NOT NULL,
122                line INTEGER NOT NULL,
123                UNIQUE(source_id, target_id, kind, file_path, line)
124            );
125            CREATE INDEX IF NOT EXISTS idx_edges_source ON edges(source_id);
126            CREATE INDEX IF NOT EXISTS idx_edges_target ON edges(target_id);
127            CREATE INDEX IF NOT EXISTS idx_edges_source_kind ON edges(source_id, kind);
128
129            -- Module profiles
130            CREATE TABLE IF NOT EXISTS module_profiles (
131                module_id INTEGER PRIMARY KEY REFERENCES nodes(id) ON DELETE CASCADE,
132                path TEXT NOT NULL,
133                function_count INTEGER NOT NULL DEFAULT 0,
134                class_count INTEGER NOT NULL DEFAULT 0,
135                line_count INTEGER NOT NULL DEFAULT 0,
136                function_name_prefixes TEXT NOT NULL DEFAULT '[]',
137                primary_types TEXT NOT NULL DEFAULT '[]',
138                import_sources TEXT NOT NULL DEFAULT '[]',
139                export_targets TEXT NOT NULL DEFAULT '[]',
140                external_endpoint_count INTEGER NOT NULL DEFAULT 0,
141                responsibility_keywords TEXT NOT NULL DEFAULT '[]'
142            );
143
144            -- Resolution cache
145            CREATE TABLE IF NOT EXISTS resolution_cache (
146                call_site_hash TEXT PRIMARY KEY,
147                resolved_node_id INTEGER REFERENCES nodes(id),
148                confidence REAL NOT NULL,
149                resolution_tier TEXT NOT NULL,
150                cached_at TEXT NOT NULL DEFAULT (datetime('now'))
151            );
152
153            -- Circuit breaker state
154            CREATE TABLE IF NOT EXISTS circuit_breaker (
155                error_code TEXT NOT NULL,
156                hash TEXT NOT NULL,
157                consecutive_failures INTEGER NOT NULL DEFAULT 0,
158                last_failure_at TEXT NOT NULL DEFAULT (datetime('now')),
159                downgraded INTEGER NOT NULL DEFAULT 0,
160                PRIMARY KEY (error_code, hash)
161            );
162            ",
163        )?;
164
165        // Set schema version if not present (new databases get current version)
166        self.conn.execute(
167            "INSERT OR IGNORE INTO keel_meta (key, value) VALUES ('schema_version', ?1)",
168            params![SCHEMA_VERSION.to_string()],
169        )?;
170
171        // Run migrations for existing databases
172        self.run_migrations()?;
173
174        // Create indexes that depend on columns added by migrations.
175        // These use IF NOT EXISTS so they're safe to run on every open.
176        let _ = self
177            .conn
178            .execute_batch("CREATE INDEX IF NOT EXISTS idx_nodes_package ON nodes(package)");
179
180        Ok(())
181    }
182
183    /// Run schema migrations from current version to SCHEMA_VERSION.
184    fn run_migrations(&self) -> Result<(), GraphError> {
185        let current = self.schema_version()?;
186        if current >= SCHEMA_VERSION {
187            return Ok(());
188        }
189        if current < 2 {
190            self.migrate_v1_to_v2()?;
191        }
192        if current < 3 {
193            self.migrate_v2_to_v3()?;
194        }
195        if current < 4 {
196            self.migrate_v3_to_v4()?;
197        }
198        Ok(())
199    }
200
201    /// Migrate from schema v1 to v2: add resolution_tier to nodes, confidence to edges.
202    fn migrate_v1_to_v2(&self) -> Result<(), GraphError> {
203        // Add resolution_tier column to nodes (ignore if already exists)
204        let _ = self
205            .conn
206            .execute_batch("ALTER TABLE nodes ADD COLUMN resolution_tier TEXT NOT NULL DEFAULT ''");
207        // Add confidence column to edges (ignore if already exists)
208        let _ = self
209            .conn
210            .execute_batch("ALTER TABLE edges ADD COLUMN confidence REAL NOT NULL DEFAULT 1.0");
211        // Update schema version to 2
212        self.conn.execute(
213            "UPDATE keel_meta SET value = '2' WHERE key = 'schema_version'",
214            [],
215        )?;
216        Ok(())
217    }
218
219    /// Migrate from schema v2 to v3: add package column to nodes.
220    fn migrate_v2_to_v3(&self) -> Result<(), GraphError> {
221        let _ = self
222            .conn
223            .execute_batch("ALTER TABLE nodes ADD COLUMN package TEXT DEFAULT NULL");
224        let _ = self
225            .conn
226            .execute_batch("CREATE INDEX IF NOT EXISTS idx_nodes_package ON nodes(package)");
227        self.conn.execute(
228            "UPDATE keel_meta SET value = '3' WHERE key = 'schema_version'",
229            [],
230        )?;
231        Ok(())
232    }
233
234    /// Migrate from schema v3 to v4: extend resolution_cache for Tier 3.
235    fn migrate_v3_to_v4(&self) -> Result<(), GraphError> {
236        let _ = self.conn.execute_batch(
237            "ALTER TABLE resolution_cache ADD COLUMN file_content_hash TEXT DEFAULT NULL",
238        );
239        let _ = self
240            .conn
241            .execute_batch("ALTER TABLE resolution_cache ADD COLUMN target_file TEXT DEFAULT NULL");
242        let _ = self
243            .conn
244            .execute_batch("ALTER TABLE resolution_cache ADD COLUMN target_name TEXT DEFAULT NULL");
245        let _ = self
246            .conn
247            .execute_batch("ALTER TABLE resolution_cache ADD COLUMN provider TEXT DEFAULT NULL");
248        self.conn.execute(
249            "UPDATE keel_meta SET value = '4' WHERE key = 'schema_version'",
250            [],
251        )?;
252        Ok(())
253    }
254
255    /// Get the current schema version.
256    pub fn schema_version(&self) -> Result<u32, GraphError> {
257        let version: String = self.conn.query_row(
258            "SELECT value FROM keel_meta WHERE key = 'schema_version'",
259            [],
260            |row| row.get(0),
261        )?;
262        version
263            .parse()
264            .map_err(|e| GraphError::Internal(format!("Invalid schema version: {}", e)))
265    }
266
267    /// Remove edges whose source or target node no longer exists.
268    pub fn cleanup_orphaned_edges(&self) -> Result<u64, GraphError> {
269        let deleted = self.conn.execute(
270            "DELETE FROM edges WHERE source_id NOT IN (SELECT id FROM nodes) OR target_id NOT IN (SELECT id FROM nodes)",
271            [],
272        )?;
273        Ok(deleted as u64)
274    }
275
276    /// Clear all graph data (nodes, edges, etc.) for a full re-map.
277    /// Preserves schema and metadata.
278    pub fn clear_all(&mut self) -> Result<(), GraphError> {
279        self.conn.execute_batch(
280            "
281            DELETE FROM edges;
282            DELETE FROM resolution_cache;
283            DELETE FROM circuit_breaker;
284            DELETE FROM module_profiles;
285            DELETE FROM external_endpoints;
286            DELETE FROM previous_hashes;
287            DELETE FROM nodes;
288            ",
289        )?;
290        Ok(())
291    }
292
293    /// Convert a SQLite row into a `GraphNode` (without relations loaded).
294    pub(crate) fn row_to_node(row: &rusqlite::Row) -> SqlResult<GraphNode> {
295        let kind_str: String = row.get("kind")?;
296        let kind = match kind_str.as_str() {
297            "module" => NodeKind::Module,
298            "class" => NodeKind::Class,
299            "function" => NodeKind::Function,
300            _ => NodeKind::Function, // fallback
301        };
302        Ok(GraphNode {
303            id: row.get("id")?,
304            hash: row.get("hash")?,
305            kind,
306            name: row.get("name")?,
307            signature: row.get("signature")?,
308            file_path: row.get("file_path")?,
309            line_start: row.get("line_start")?,
310            line_end: row.get("line_end")?,
311            docstring: row.get("docstring")?,
312            is_public: row.get::<_, i32>("is_public")? != 0,
313            type_hints_present: row.get::<_, i32>("type_hints_present")? != 0,
314            has_docstring: row.get::<_, i32>("has_docstring")? != 0,
315            external_endpoints: Vec::new(), // loaded separately
316            previous_hashes: Vec::new(),    // loaded separately
317            module_id: row.get::<_, Option<u64>>("module_id")?.unwrap_or(0),
318            package: row.get::<_, Option<String>>("package").unwrap_or(None),
319        })
320    }
321
322    /// Load all external endpoints associated with a given node.
323    pub(crate) fn load_endpoints(&self, node_id: u64) -> Vec<ExternalEndpoint> {
324        let mut stmt = match self.conn.prepare(
325            "SELECT kind, method, path, direction FROM external_endpoints WHERE node_id = ?1",
326        ) {
327            Ok(s) => s,
328            Err(e) => {
329                eprintln!("[keel] load_endpoints: prepare failed: {e}");
330                return Vec::new();
331            }
332        };
333
334        let result = match stmt.query_map(params![node_id], |row| {
335            Ok(ExternalEndpoint {
336                kind: row.get(0)?,
337                method: row.get(1)?,
338                path: row.get(2)?,
339                direction: row.get(3)?,
340            })
341        }) {
342            Ok(rows) => rows.filter_map(|r| r.ok()).collect(),
343            Err(e) => {
344                eprintln!("[keel] load_endpoints: query failed: {e}");
345                Vec::new()
346            }
347        };
348        result
349    }
350
351    /// Load the most recent previous hashes for a node (up to 3, newest first).
352    pub(crate) fn load_previous_hashes(&self, node_id: u64) -> Vec<String> {
353        let mut stmt = match self.conn.prepare(
354            "SELECT hash FROM previous_hashes WHERE node_id = ?1 ORDER BY created_at DESC LIMIT 3",
355        ) {
356            Ok(s) => s,
357            Err(e) => {
358                eprintln!("[keel] load_previous_hashes: prepare failed: {e}");
359                return Vec::new();
360            }
361        };
362
363        let result = match stmt.query_map(params![node_id], |row| row.get(0)) {
364            Ok(rows) => rows.filter_map(|r| r.ok()).collect(),
365            Err(e) => {
366                eprintln!("[keel] load_previous_hashes: query failed: {e}");
367                Vec::new()
368            }
369        };
370        result
371    }
372
373    /// Attach endpoints and previous hashes to a node, returning the enriched node.
374    pub(crate) fn node_with_relations(&self, mut node: GraphNode) -> GraphNode {
375        node.external_endpoints = self.load_endpoints(node.id);
376        node.previous_hashes = self.load_previous_hashes(node.id);
377        node
378    }
379
380    /// Insert or update module profiles in bulk.
381    /// Uses INSERT ... ON CONFLICT DO UPDATE for upsert semantics.
382    pub fn upsert_module_profiles(
383        &self,
384        profiles: Vec<crate::types::ModuleProfile>,
385    ) -> Result<(), GraphError> {
386        let tx = self.conn.unchecked_transaction()?;
387        {
388            let mut stmt = tx.prepare(
389                "INSERT INTO module_profiles (
390                    module_id, path, function_count, class_count, line_count,
391                    function_name_prefixes, primary_types, import_sources,
392                    export_targets, external_endpoint_count, responsibility_keywords
393                ) VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11)
394                ON CONFLICT(module_id) DO UPDATE SET
395                    path = excluded.path,
396                    function_count = excluded.function_count,
397                    class_count = excluded.class_count,
398                    line_count = excluded.line_count,
399                    function_name_prefixes = excluded.function_name_prefixes,
400                    primary_types = excluded.primary_types,
401                    import_sources = excluded.import_sources,
402                    export_targets = excluded.export_targets,
403                    external_endpoint_count = excluded.external_endpoint_count,
404                    responsibility_keywords = excluded.responsibility_keywords",
405            )?;
406            for p in &profiles {
407                let prefixes_json = serde_json::to_string(&p.function_name_prefixes)
408                    .unwrap_or_else(|_| "[]".to_string());
409                let types_json =
410                    serde_json::to_string(&p.primary_types).unwrap_or_else(|_| "[]".to_string());
411                let imports_json =
412                    serde_json::to_string(&p.import_sources).unwrap_or_else(|_| "[]".to_string());
413                let exports_json =
414                    serde_json::to_string(&p.export_targets).unwrap_or_else(|_| "[]".to_string());
415                let keywords_json = serde_json::to_string(&p.responsibility_keywords)
416                    .unwrap_or_else(|_| "[]".to_string());
417                stmt.execute(params![
418                    p.module_id,
419                    p.path,
420                    p.function_count,
421                    p.class_count,
422                    p.line_count,
423                    prefixes_json,
424                    types_json,
425                    imports_json,
426                    exports_json,
427                    p.external_endpoint_count,
428                    keywords_json,
429                ])?;
430            }
431        }
432        tx.commit()?;
433        Ok(())
434    }
435}
436
437#[cfg(test)]
438#[path = "sqlite_tests.rs"]
439mod tests;