Skip to main content

mirage/storage/
sqlite_backend.rs

1//! SQLite backend implementation for mirage storage trait
2//!
3//! Uses direct rusqlite queries to cfg_blocks table for CFG data.
4//!
5//! # Design
6//!
7//! - Implements `StorageTrait` for SQLite databases
8//! - Uses `prepare_cached` for query performance
9//! - Leverages Magellan's cfg_blocks table (schema v7+)
10//! - Provides backend-agnostic `CfgBlockData` from SQL queries
11//!
12//! # Examples
13//!
14//! ```ignore
15//! # use mirage_analyzer::storage::sqlite_backend::SqliteStorage;
16//! # use anyhow::Result;
17//! # fn main() -> Result<()> {
18//! let storage = SqliteStorage::open("/path/to/codegraph.db")?;
19//! let blocks = storage.get_cfg_blocks(123)?;
20//! # Ok(())
21//! # }
22//! ```
23
24use anyhow::{Context, Result};
25use rusqlite::{params, Connection};
26use std::path::Path as StdPath;
27
28use super::{CfgBlockData, StorageTrait};
29use crate::cfg::Path;
30
31/// Convert string from database to PathKind
32fn str_to_path_kind(s: &str) -> Result<crate::cfg::PathKind> {
33    match s {
34        "Normal" => Ok(crate::cfg::PathKind::Normal),
35        "Error" => Ok(crate::cfg::PathKind::Error),
36        "Degenerate" => Ok(crate::cfg::PathKind::Degenerate),
37        "Unreachable" => Ok(crate::cfg::PathKind::Unreachable),
38        _ => anyhow::bail!("Unknown path kind: {}", s),
39    }
40}
41
42/// SQLite backend implementation
43///
44/// Wraps a rusqlite Connection and implements StorageTrait
45/// using direct SQL queries to Magellan's cfg_blocks table.
46#[derive(Debug)]
47pub struct SqliteStorage {
48    conn: Connection,
49}
50
51impl SqliteStorage {
52    /// Open SQLite database at the given path
53    ///
54    /// # Arguments
55    ///
56    /// * `db_path` - Path to the SQLite database file
57    ///
58    /// # Returns
59    ///
60    /// * `Ok(SqliteStorage)` - Storage instance ready for queries
61    /// * `Err(...)` - Error if file cannot be opened
62    ///
63    /// # Examples
64    ///
65    /// ```ignore
66    /// # use mirage_analyzer::storage::sqlite_backend::SqliteStorage;
67    /// # fn main() -> anyhow::Result<()> {
68    /// let storage = SqliteStorage::open("codegraph.db")?;
69    /// # Ok(())
70    /// # }
71    /// ```
72    pub fn open(db_path: &StdPath) -> Result<Self> {
73        let conn = Connection::open(db_path)
74            .map_err(|e| anyhow::anyhow!("Failed to open SQLite database: {}", e))?;
75        Ok(Self { conn })
76    }
77
78    /// Get a reference to the underlying Connection
79    ///
80    /// This is useful for legacy code that needs direct SQL access.
81    pub fn conn(&self) -> &Connection {
82        &self.conn
83    }
84}
85
86impl StorageTrait for SqliteStorage {
87    /// Get CFG blocks for a function from SQLite backend
88    ///
89    /// Queries Magellan's cfg_blocks table for all blocks belonging
90    /// to the given function_id, ordered by block ID.
91    ///
92    /// # Arguments
93    ///
94    /// * `function_id` - ID of the function in graph_entities
95    ///
96    /// # Returns
97    ///
98    /// * `Ok(Vec<CfgBlockData>)` - Vector of CFG block data
99    /// * `Err(...)` - Error if query fails
100    ///
101    /// # Note
102    ///
103    /// - Uses prepare_cached for performance on repeated calls
104    /// - Returns empty Vec if function has no CFG blocks (not an error)
105    fn get_cfg_blocks(&self, function_id: i64) -> Result<Vec<CfgBlockData>> {
106        let mut stmt = self
107            .conn
108            .prepare_cached(
109                "SELECT id, kind, terminator, byte_start, byte_end,
110                    start_line, start_col, end_line, end_col,
111                    coord_x, coord_y, coord_z
112             FROM cfg_blocks
113             WHERE function_id = ?
114             ORDER BY id ASC",
115            )
116            .map_err(|e| anyhow::anyhow!("Failed to prepare cfg_blocks query: {}", e))?;
117
118        let blocks = stmt
119            .query_map(params![function_id], |row| {
120                Ok(CfgBlockData {
121                    id: row.get(0)?,
122                    kind: row.get(1)?,
123                    terminator: row.get(2)?,
124                    byte_start: row.get::<_, Option<i64>>(3)?.unwrap_or(0) as u64,
125                    byte_end: row.get::<_, Option<i64>>(4)?.unwrap_or(0) as u64,
126                    start_line: row.get::<_, Option<i64>>(5)?.unwrap_or(0) as u64,
127                    start_col: row.get::<_, Option<i64>>(6)?.unwrap_or(0) as u64,
128                    end_line: row.get::<_, Option<i64>>(7)?.unwrap_or(0) as u64,
129                    end_col: row.get::<_, Option<i64>>(8)?.unwrap_or(0) as u64,
130                    // 4D spatial coordinates from Magellan's cfg_blocks table
131                    coord_x: row.get::<_, Option<i64>>(9)?.unwrap_or(0),
132                    coord_y: row.get::<_, Option<i64>>(10)?.unwrap_or(0),
133                    coord_z: row.get::<_, Option<i64>>(11)?.unwrap_or(0),
134                })
135            })
136            .map_err(|e| anyhow::anyhow!("Failed to execute cfg_blocks query: {}", e))?
137            .collect::<Result<Vec<_>, _>>()
138            .map_err(|e| anyhow::anyhow!("Failed to collect cfg_blocks rows: {}", e))?;
139
140        Ok(blocks)
141    }
142
143    /// Get entity by ID from SQLite backend
144    ///
145    /// Queries the graph_entities table for the entity with the given ID.
146    ///
147    /// # Arguments
148    ///
149    /// * `entity_id` - ID of the entity
150    ///
151    /// # Returns
152    ///
153    /// * `Some(GraphEntity)` - Entity if found
154    /// * `None` - Entity not found
155    fn get_entity(&self, entity_id: i64) -> Option<sqlitegraph::GraphEntity> {
156        self.conn
157            .query_row(
158                "SELECT id, kind, name, file_path, data
159                 FROM graph_entities
160                 WHERE id = ?",
161                params![entity_id],
162                |row| {
163                    Ok(sqlitegraph::GraphEntity {
164                        id: row.get(0)?,
165                        kind: row.get(1)?,
166                        name: row.get(2)?,
167                        file_path: row.get(3)?,
168                        data: serde_json::from_str(row.get::<_, String>(4)?.as_str())
169                            .unwrap_or_default(),
170                    })
171                },
172            )
173            .ok()
174    }
175
176    /// Get cached paths for a function from SQLite backend
177    ///
178    /// Queries the cfg_paths table for cached enumerated paths.
179    ///
180    /// # Arguments
181    ///
182    /// * `function_id` - ID of the function
183    ///
184    /// # Returns
185    ///
186    /// * `Ok(Some(paths))` - Cached paths if available
187    /// * `Ok(None)` - No cached paths
188    /// * `Err(...)` - Error if query fails
189    fn get_cached_paths(&self, function_id: i64) -> Result<Option<Vec<Path>>> {
190        // Query cfg_paths table for all paths of this function
191        let mut stmt = self
192            .conn
193            .prepare(
194                "SELECT path_id, path_kind, entry_block, exit_block
195             FROM cfg_paths
196             WHERE function_id = ?1",
197            )
198            .context("Failed to prepare cfg_paths query")?;
199
200        let path_rows = stmt
201            .query_map(params![function_id], |row| {
202                Ok((
203                    row.get::<_, String>(0)?,
204                    row.get::<_, String>(1)?,
205                    row.get::<_, i64>(2)?,
206                    row.get::<_, i64>(3)?,
207                ))
208            })
209            .context("Failed to execute cfg_paths query")?;
210
211        let mut paths = Vec::new();
212
213        for path_row in path_rows {
214            let (path_id, kind_str, entry, exit) = path_row?;
215            let kind = str_to_path_kind(&kind_str)
216                .with_context(|| format!("Invalid path kind: {}", kind_str))?;
217
218            // Query cfg_path_elements for blocks in this path
219            let mut elem_stmt = self
220                .conn
221                .prepare(
222                    "SELECT block_id
223                 FROM cfg_path_elements
224                 WHERE path_id = ?1
225                 ORDER BY sequence_order ASC",
226                )
227                .context("Failed to prepare cfg_path_elements query")?;
228
229            let block_rows = elem_stmt
230                .query_map(params![&path_id], |row| row.get::<_, i64>(0))
231                .context("Failed to execute cfg_path_elements query")?;
232
233            let mut blocks = Vec::new();
234            for block_row in block_rows {
235                let block_id: i64 = block_row?;
236                // BlockId in Path is usize, convert from i64
237                blocks.push(block_id as usize);
238            }
239
240            paths.push(Path {
241                path_id,
242                blocks,
243                kind,
244                entry: entry as usize,
245                exit: exit as usize,
246            });
247        }
248
249        if paths.is_empty() {
250            Ok(None)
251        } else {
252            Ok(Some(paths))
253        }
254    }
255
256    fn get_callees(&self, function_id: i64) -> Result<Vec<i64>> {
257        // Magellan schema: functions have CALLER edges to call-site entities,
258        // and call-site entities have CALLS edges to callee functions.
259        let mut stmt = self
260            .conn
261            .prepare(
262                "SELECT DISTINCT g2.to_id
263                 FROM graph_edges g1
264                 JOIN graph_edges g2 ON g1.to_id = g2.from_id
265                 WHERE g1.from_id = ? AND g1.edge_type = 'CALLER'
266                   AND g2.edge_type = 'CALLS'",
267            )
268            .map_err(|e| anyhow::anyhow!("Failed to prepare get_callees query: {}", e))?;
269
270        let callees = stmt
271            .query_map(params![function_id], |row| row.get::<_, i64>(0))
272            .map_err(|e| anyhow::anyhow!("Failed to execute get_callees query: {}", e))?
273            .collect::<Result<Vec<_>, _>>()
274            .map_err(|e| anyhow::anyhow!("Failed to collect callee rows: {}", e))?;
275
276        Ok(callees)
277    }
278}
279
280#[cfg(test)]
281mod tests {
282    use super::*;
283
284    /// Helper to create a test database with cfg_blocks table
285    fn create_test_db() -> tempfile::NamedTempFile {
286        let temp_file = tempfile::NamedTempFile::new().unwrap();
287        let conn = Connection::open(temp_file.path()).unwrap();
288
289        // Create magellan_meta table
290        conn.execute(
291            "CREATE TABLE magellan_meta (
292                id INTEGER PRIMARY KEY CHECK (id = 1),
293                magellan_schema_version INTEGER NOT NULL,
294                sqlitegraph_schema_version INTEGER NOT NULL,
295                created_at INTEGER NOT NULL
296            )",
297            [],
298        )
299        .unwrap();
300
301        conn.execute(
302            "INSERT INTO magellan_meta (id, magellan_schema_version, sqlitegraph_schema_version, created_at)
303             VALUES (1, 7, 3, 0)",
304            [],
305        ).unwrap();
306
307        // Create graph_entities table
308        conn.execute(
309            "CREATE TABLE graph_entities (
310                id INTEGER PRIMARY KEY AUTOINCREMENT,
311                kind TEXT NOT NULL,
312                name TEXT NOT NULL,
313                file_path TEXT,
314                data TEXT NOT NULL
315            )",
316            [],
317        )
318        .unwrap();
319
320        // Create cfg_blocks table
321        conn.execute(
322            "CREATE TABLE cfg_blocks (
323                id INTEGER PRIMARY KEY AUTOINCREMENT,
324                function_id INTEGER NOT NULL,
325                kind TEXT NOT NULL,
326                terminator TEXT NOT NULL,
327                byte_start INTEGER,
328                byte_end INTEGER,
329                start_line INTEGER,
330                start_col INTEGER,
331                end_line INTEGER,
332                end_col INTEGER,
333                coord_x INTEGER DEFAULT 0,
334                coord_y INTEGER DEFAULT 0,
335                coord_z INTEGER DEFAULT 0,
336                FOREIGN KEY (function_id) REFERENCES graph_entities(id)
337            )",
338            [],
339        )
340        .unwrap();
341
342        conn.execute(
343            "CREATE INDEX idx_cfg_blocks_function ON cfg_blocks(function_id)",
344            [],
345        )
346        .unwrap();
347
348        // Create cfg_paths table
349        conn.execute(
350            "CREATE TABLE cfg_paths (
351                path_id TEXT PRIMARY KEY,
352                function_id INTEGER NOT NULL,
353                path_kind TEXT NOT NULL,
354                entry_block INTEGER NOT NULL,
355                exit_block INTEGER NOT NULL,
356                length INTEGER NOT NULL,
357                created_at INTEGER NOT NULL,
358                FOREIGN KEY (function_id) REFERENCES graph_entities(id)
359            )",
360            [],
361        )
362        .unwrap();
363
364        conn.execute(
365            "CREATE INDEX IF NOT EXISTS idx_cfg_paths_function ON cfg_paths(function_id)",
366            [],
367        )
368        .unwrap();
369
370        // Create cfg_path_elements table
371        conn.execute(
372            "CREATE TABLE cfg_path_elements (
373                path_id TEXT NOT NULL,
374                sequence_order INTEGER NOT NULL,
375                block_id INTEGER NOT NULL,
376                PRIMARY KEY (path_id, sequence_order),
377                FOREIGN KEY (path_id) REFERENCES cfg_paths(path_id)
378            )",
379            [],
380        )
381        .unwrap();
382
383        // Insert a test function
384        conn.execute(
385            "INSERT INTO graph_entities (kind, name, file_path, data)
386             VALUES ('Symbol', 'test_function', '/tmp/test.rs', '{\"kind\": \"Function\"}')",
387            [],
388        )
389        .unwrap();
390
391        // Insert test CFG blocks
392        conn.execute(
393            "INSERT INTO cfg_blocks (function_id, kind, terminator, byte_start, byte_end,
394                                   start_line, start_col, end_line, end_col)
395             VALUES (1, 'entry', 'fallthrough', 0, 10, 1, 0, 1, 10),
396                    (1, 'normal', 'conditional', 10, 50, 2, 4, 5, 8),
397                    (1, 'return', 'return', 50, 60, 5, 0, 5, 10)",
398            [],
399        )
400        .unwrap();
401
402        temp_file
403    }
404
405    #[test]
406    fn test_sqlite_storage_open() {
407        let temp_file = create_test_db();
408        let result = SqliteStorage::open(temp_file.path());
409        assert!(result.is_ok(), "Should open test database");
410    }
411
412    #[test]
413    fn test_sqlite_storage_get_cfg_blocks() {
414        let temp_file = create_test_db();
415        let storage = SqliteStorage::open(temp_file.path()).unwrap();
416
417        let blocks = storage.get_cfg_blocks(1).unwrap();
418        assert_eq!(blocks.len(), 3, "Should have 3 CFG blocks");
419
420        // Check first block (entry)
421        assert_eq!(blocks[0].kind, "entry");
422        assert_eq!(blocks[0].terminator, "fallthrough");
423        assert_eq!(blocks[0].byte_start, 0);
424        assert_eq!(blocks[0].byte_end, 10);
425
426        // Check second block (conditional)
427        assert_eq!(blocks[1].kind, "normal");
428        assert_eq!(blocks[1].terminator, "conditional");
429
430        // Check third block (return)
431        assert_eq!(blocks[2].kind, "return");
432        assert_eq!(blocks[2].terminator, "return");
433    }
434
435    #[test]
436    fn test_sqlite_storage_get_cfg_blocks_empty() {
437        let temp_file = create_test_db();
438        let storage = SqliteStorage::open(temp_file.path()).unwrap();
439
440        // Function 999 doesn't exist
441        let blocks = storage.get_cfg_blocks(999).unwrap();
442        assert_eq!(
443            blocks.len(),
444            0,
445            "Should return empty Vec for non-existent function"
446        );
447    }
448
449    #[test]
450    fn test_sqlite_storage_get_entity() {
451        let temp_file = create_test_db();
452        let storage = SqliteStorage::open(temp_file.path()).unwrap();
453
454        let entity = storage.get_entity(1);
455        assert!(entity.is_some(), "Should find entity with ID 1");
456        let entity = entity.unwrap();
457        assert_eq!(entity.id, 1);
458        assert_eq!(entity.kind, "Symbol");
459        assert_eq!(entity.name, "test_function");
460    }
461
462    #[test]
463    fn test_sqlite_storage_get_entity_not_found() {
464        let temp_file = create_test_db();
465        let storage = SqliteStorage::open(temp_file.path()).unwrap();
466
467        let entity = storage.get_entity(999);
468        assert!(
469            entity.is_none(),
470            "Should return None for non-existent entity"
471        );
472    }
473
474    #[test]
475    fn test_sqlite_storage_get_cached_paths_none_when_empty() {
476        let temp_file = create_test_db();
477        let storage = SqliteStorage::open(temp_file.path()).unwrap();
478
479        // No cached paths for function 1
480        let paths = storage.get_cached_paths(1).unwrap();
481        assert!(paths.is_none(), "Should return None when no cached paths");
482    }
483
484    #[test]
485    fn test_sqlite_storage_get_cached_paths_with_data() {
486        let temp_file = create_test_db();
487        let conn = Connection::open(temp_file.path()).unwrap();
488
489        // Insert a test path into cfg_paths
490        conn.execute(
491            "INSERT INTO cfg_paths (path_id, function_id, path_kind, entry_block, exit_block, length, created_at)
492             VALUES ('test_path_123', 1, 'Normal', 100, 102, 3, 1000)",
493            [],
494        ).unwrap();
495
496        // Insert path elements into cfg_path_elements
497        conn.execute(
498            "INSERT INTO cfg_path_elements (path_id, sequence_order, block_id) VALUES
499             ('test_path_123', 0, 100),
500             ('test_path_123', 1, 101),
501             ('test_path_123', 2, 102)",
502            [],
503        )
504        .unwrap();
505
506        let storage = SqliteStorage::open(temp_file.path()).unwrap();
507        let paths = storage.get_cached_paths(1).unwrap();
508
509        assert!(
510            paths.is_some(),
511            "Should return Some when cached paths exist"
512        );
513        let paths = paths.unwrap();
514        assert_eq!(paths.len(), 1, "Should have 1 path");
515
516        let path = &paths[0];
517        assert_eq!(path.path_id, "test_path_123");
518        assert_eq!(path.blocks, vec![100, 101, 102]);
519        assert_eq!(path.kind, crate::cfg::PathKind::Normal);
520        assert_eq!(path.entry, 100);
521        assert_eq!(path.exit, 102);
522    }
523}