Skip to main content

mirage_analyzer/storage/
sqlite_backend.rs

1//! SQLite backend implementation for mirage storage trait
2//!
3//! Uses direct rusqlite queries to cfg_blocks table for CFG data.
4//!
5//! # Design
6//!
7//! - Implements `StorageTrait` for SQLite databases
8//! - Uses `prepare_cached` for query performance
9//! - Leverages Magellan's cfg_blocks table (schema v7+)
10//! - Provides backend-agnostic `CfgBlockData` from SQL queries
11//!
12//! # Examples
13//!
14//! ```ignore
15//! # use mirage_analyzer::storage::sqlite_backend::SqliteStorage;
16//! # use anyhow::Result;
17//! # fn main() -> Result<()> {
18//! let storage = SqliteStorage::open("/path/to/codegraph.db")?;
19//! let blocks = storage.get_cfg_blocks(123)?;
20//! # Ok(())
21//! # }
22//! ```
23
24use anyhow::{Context, Result};
25use rusqlite::{Connection, params};
26use std::path::Path as StdPath;
27
28use super::{CfgBlockData, StorageTrait};
29use crate::cfg::Path;
30
31/// Convert string from database to PathKind
32fn str_to_path_kind(s: &str) -> Result<crate::cfg::PathKind> {
33    match s {
34        "Normal" => Ok(crate::cfg::PathKind::Normal),
35        "Error" => Ok(crate::cfg::PathKind::Error),
36        "Degenerate" => Ok(crate::cfg::PathKind::Degenerate),
37        "Unreachable" => Ok(crate::cfg::PathKind::Unreachable),
38        _ => anyhow::bail!("Unknown path kind: {}", s),
39    }
40}
41
42/// SQLite backend implementation
43///
44/// Wraps a rusqlite Connection and implements StorageTrait
45/// using direct SQL queries to Magellan's cfg_blocks table.
46#[derive(Debug)]
47pub struct SqliteStorage {
48    conn: Connection,
49}
50
51impl SqliteStorage {
52    /// Open SQLite database at the given path
53    ///
54    /// # Arguments
55    ///
56    /// * `db_path` - Path to the SQLite database file
57    ///
58    /// # Returns
59    ///
60    /// * `Ok(SqliteStorage)` - Storage instance ready for queries
61    /// * `Err(...)` - Error if file cannot be opened
62    ///
63    /// # Examples
64    ///
65    /// ```ignore
66    /// # use mirage_analyzer::storage::sqlite_backend::SqliteStorage;
67    /// # fn main() -> anyhow::Result<()> {
68    /// let storage = SqliteStorage::open("codegraph.db")?;
69    /// # Ok(())
70    /// # }
71    /// ```
72    pub fn open(db_path: &StdPath) -> Result<Self> {
73        let conn = Connection::open(db_path)
74            .map_err(|e| anyhow::anyhow!("Failed to open SQLite database: {}", e))?;
75        Ok(Self { conn })
76    }
77
78    /// Get a reference to the underlying Connection
79    ///
80    /// This is useful for legacy code that needs direct SQL access.
81    pub fn conn(&self) -> &Connection {
82        &self.conn
83    }
84}
85
86impl StorageTrait for SqliteStorage {
87    /// Get CFG blocks for a function from SQLite backend
88    ///
89    /// Queries Magellan's cfg_blocks table for all blocks belonging
90    /// to the given function_id, ordered by block ID.
91    ///
92    /// # Arguments
93    ///
94    /// * `function_id` - ID of the function in graph_entities
95    ///
96    /// # Returns
97    ///
98    /// * `Ok(Vec<CfgBlockData>)` - Vector of CFG block data
99    /// * `Err(...)` - Error if query fails
100    ///
101    /// # Note
102    ///
103    /// - Uses prepare_cached for performance on repeated calls
104    /// - Returns empty Vec if function has no CFG blocks (not an error)
105    fn get_cfg_blocks(&self, function_id: i64) -> Result<Vec<CfgBlockData>> {
106        let mut stmt = self.conn.prepare_cached(
107            "SELECT id, kind, terminator, byte_start, byte_end,
108                    start_line, start_col, end_line, end_col
109             FROM cfg_blocks
110             WHERE function_id = ?
111             ORDER BY id ASC"
112        ).map_err(|e| anyhow::anyhow!("Failed to prepare cfg_blocks query: {}", e))?;
113
114        let blocks = stmt.query_map(params![function_id], |row| {
115            Ok(CfgBlockData {
116                id: row.get(0)?,
117                kind: row.get(1)?,
118                terminator: row.get(2)?,
119                byte_start: row.get::<_, Option<i64>>(3)?.unwrap_or(0) as u64,
120                byte_end: row.get::<_, Option<i64>>(4)?.unwrap_or(0) as u64,
121                start_line: row.get::<_, Option<i64>>(5)?.unwrap_or(0) as u64,
122                start_col: row.get::<_, Option<i64>>(6)?.unwrap_or(0) as u64,
123                end_line: row.get::<_, Option<i64>>(7)?.unwrap_or(0) as u64,
124                end_col: row.get::<_, Option<i64>>(8)?.unwrap_or(0) as u64,
125            })
126        })
127        .map_err(|e| anyhow::anyhow!("Failed to execute cfg_blocks query: {}", e))?
128        .collect::<Result<Vec<_>, _>>()
129        .map_err(|e| anyhow::anyhow!("Failed to collect cfg_blocks rows: {}", e))?;
130
131        Ok(blocks)
132    }
133
134    /// Get entity by ID from SQLite backend
135    ///
136    /// Queries the graph_entities table for the entity with the given ID.
137    ///
138    /// # Arguments
139    ///
140    /// * `entity_id` - ID of the entity
141    ///
142    /// # Returns
143    ///
144    /// * `Some(GraphEntity)` - Entity if found
145    /// * `None` - Entity not found
146    fn get_entity(&self, entity_id: i64) -> Option<sqlitegraph::GraphEntity> {
147        self.conn
148            .query_row(
149                "SELECT id, kind, name, file_path, data
150                 FROM graph_entities
151                 WHERE id = ?",
152                params![entity_id],
153                |row| {
154                    Ok(sqlitegraph::GraphEntity {
155                        id: row.get(0)?,
156                        kind: row.get(1)?,
157                        name: row.get(2)?,
158                        file_path: row.get(3)?,
159                        data: serde_json::from_str(row.get::<_, String>(4)?.as_str())
160                            .unwrap_or_default(),
161                    })
162                },
163            )
164            .ok()
165    }
166
167    /// Get cached paths for a function from SQLite backend
168    ///
169    /// Queries the cfg_paths table for cached enumerated paths.
170    ///
171    /// # Arguments
172    ///
173    /// * `function_id` - ID of the function
174    ///
175    /// # Returns
176    ///
177    /// * `Ok(Some(paths))` - Cached paths if available
178    /// * `Ok(None)` - No cached paths
179    /// * `Err(...)` - Error if query fails
180    fn get_cached_paths(&self, function_id: i64) -> Result<Option<Vec<Path>>> {
181        // Query cfg_paths table for all paths of this function
182        let mut stmt = self.conn.prepare(
183            "SELECT path_id, path_kind, entry_block, exit_block
184             FROM cfg_paths
185             WHERE function_id = ?1"
186        ).context("Failed to prepare cfg_paths query")?;
187
188        let path_rows = stmt.query_map(params![function_id], |row| {
189            Ok((
190                row.get::<_, String>(0)?,
191                row.get::<_, String>(1)?,
192                row.get::<_, i64>(2)?,
193                row.get::<_, i64>(3)?,
194            ))
195        }).context("Failed to execute cfg_paths query")?;
196
197        let mut paths = Vec::new();
198
199        for path_row in path_rows {
200            let (path_id, kind_str, entry, exit) = path_row?;
201            let kind = str_to_path_kind(&kind_str)
202                .with_context(|| format!("Invalid path kind: {}", kind_str))?;
203
204            // Query cfg_path_elements for blocks in this path
205            let mut elem_stmt = self.conn.prepare(
206                "SELECT block_id
207                 FROM cfg_path_elements
208                 WHERE path_id = ?1
209                 ORDER BY sequence_order ASC"
210            ).context("Failed to prepare cfg_path_elements query")?;
211
212            let block_rows = elem_stmt.query_map(params![&path_id], |row| {
213                row.get::<_, i64>(0)
214            }).context("Failed to execute cfg_path_elements query")?;
215
216            let mut blocks = Vec::new();
217            for block_row in block_rows {
218                let block_id: i64 = block_row?;
219                // BlockId in Path is usize, convert from i64
220                blocks.push(block_id as usize);
221            }
222
223            paths.push(Path {
224                path_id,
225                blocks,
226                kind,
227                entry: entry as usize,
228                exit: exit as usize,
229            });
230        }
231
232        if paths.is_empty() {
233            Ok(None)
234        } else {
235            Ok(Some(paths))
236        }
237    }
238}
239
240#[cfg(test)]
241mod tests {
242    use super::*;
243    use std::io::Write;
244
245    /// Helper to create a test database with cfg_blocks table
246    fn create_test_db() -> tempfile::NamedTempFile {
247        let temp_file = tempfile::NamedTempFile::new().unwrap();
248        let conn = Connection::open(temp_file.path()).unwrap();
249
250        // Create magellan_meta table
251        conn.execute(
252            "CREATE TABLE magellan_meta (
253                id INTEGER PRIMARY KEY CHECK (id = 1),
254                magellan_schema_version INTEGER NOT NULL,
255                sqlitegraph_schema_version INTEGER NOT NULL,
256                created_at INTEGER NOT NULL
257            )",
258            [],
259        ).unwrap();
260
261        conn.execute(
262            "INSERT INTO magellan_meta (id, magellan_schema_version, sqlitegraph_schema_version, created_at)
263             VALUES (1, 7, 3, 0)",
264            [],
265        ).unwrap();
266
267        // Create graph_entities table
268        conn.execute(
269            "CREATE TABLE graph_entities (
270                id INTEGER PRIMARY KEY AUTOINCREMENT,
271                kind TEXT NOT NULL,
272                name TEXT NOT NULL,
273                file_path TEXT,
274                data TEXT NOT NULL
275            )",
276            [],
277        ).unwrap();
278
279        // Create cfg_blocks table
280        conn.execute(
281            "CREATE TABLE cfg_blocks (
282                id INTEGER PRIMARY KEY AUTOINCREMENT,
283                function_id INTEGER NOT NULL,
284                kind TEXT NOT NULL,
285                terminator TEXT NOT NULL,
286                byte_start INTEGER,
287                byte_end INTEGER,
288                start_line INTEGER,
289                start_col INTEGER,
290                end_line INTEGER,
291                end_col INTEGER,
292                FOREIGN KEY (function_id) REFERENCES graph_entities(id)
293            )",
294            [],
295        ).unwrap();
296
297        conn.execute(
298            "CREATE INDEX idx_cfg_blocks_function ON cfg_blocks(function_id)",
299            [],
300        ).unwrap();
301
302        // Create cfg_paths table
303        conn.execute(
304            "CREATE TABLE cfg_paths (
305                path_id TEXT PRIMARY KEY,
306                function_id INTEGER NOT NULL,
307                path_kind TEXT NOT NULL,
308                entry_block INTEGER NOT NULL,
309                exit_block INTEGER NOT NULL,
310                length INTEGER NOT NULL,
311                created_at INTEGER NOT NULL,
312                FOREIGN KEY (function_id) REFERENCES graph_entities(id)
313            )",
314            [],
315        ).unwrap();
316
317        conn.execute(
318            "CREATE INDEX IF NOT EXISTS idx_cfg_paths_function ON cfg_paths(function_id)",
319            [],
320        ).unwrap();
321
322        // Create cfg_path_elements table
323        conn.execute(
324            "CREATE TABLE cfg_path_elements (
325                path_id TEXT NOT NULL,
326                sequence_order INTEGER NOT NULL,
327                block_id INTEGER NOT NULL,
328                PRIMARY KEY (path_id, sequence_order),
329                FOREIGN KEY (path_id) REFERENCES cfg_paths(path_id)
330            )",
331            [],
332        ).unwrap();
333
334        // Insert a test function
335        conn.execute(
336            "INSERT INTO graph_entities (kind, name, file_path, data)
337             VALUES ('Symbol', 'test_function', '/tmp/test.rs', '{\"kind\": \"Function\"}')",
338            [],
339        ).unwrap();
340
341        // Insert test CFG blocks
342        conn.execute(
343            "INSERT INTO cfg_blocks (function_id, kind, terminator, byte_start, byte_end,
344                                   start_line, start_col, end_line, end_col)
345             VALUES (1, 'entry', 'fallthrough', 0, 10, 1, 0, 1, 10),
346                    (1, 'normal', 'conditional', 10, 50, 2, 4, 5, 8),
347                    (1, 'return', 'return', 50, 60, 5, 0, 5, 10)",
348            [],
349        ).unwrap();
350
351        temp_file
352    }
353
354    #[test]
355    fn test_sqlite_storage_open() {
356        let temp_file = create_test_db();
357        let result = SqliteStorage::open(temp_file.path());
358        assert!(result.is_ok(), "Should open test database");
359    }
360
361    #[test]
362    fn test_sqlite_storage_get_cfg_blocks() {
363        let temp_file = create_test_db();
364        let storage = SqliteStorage::open(temp_file.path()).unwrap();
365
366        let blocks = storage.get_cfg_blocks(1).unwrap();
367        assert_eq!(blocks.len(), 3, "Should have 3 CFG blocks");
368
369        // Check first block (entry)
370        assert_eq!(blocks[0].kind, "entry");
371        assert_eq!(blocks[0].terminator, "fallthrough");
372        assert_eq!(blocks[0].byte_start, 0);
373        assert_eq!(blocks[0].byte_end, 10);
374
375        // Check second block (conditional)
376        assert_eq!(blocks[1].kind, "normal");
377        assert_eq!(blocks[1].terminator, "conditional");
378
379        // Check third block (return)
380        assert_eq!(blocks[2].kind, "return");
381        assert_eq!(blocks[2].terminator, "return");
382    }
383
384    #[test]
385    fn test_sqlite_storage_get_cfg_blocks_empty() {
386        let temp_file = create_test_db();
387        let storage = SqliteStorage::open(temp_file.path()).unwrap();
388
389        // Function 999 doesn't exist
390        let blocks = storage.get_cfg_blocks(999).unwrap();
391        assert_eq!(blocks.len(), 0, "Should return empty Vec for non-existent function");
392    }
393
394    #[test]
395    fn test_sqlite_storage_get_entity() {
396        let temp_file = create_test_db();
397        let storage = SqliteStorage::open(temp_file.path()).unwrap();
398
399        let entity = storage.get_entity(1);
400        assert!(entity.is_some(), "Should find entity with ID 1");
401        let entity = entity.unwrap();
402        assert_eq!(entity.id, 1);
403        assert_eq!(entity.kind, "Symbol");
404        assert_eq!(entity.name, "test_function");
405    }
406
407    #[test]
408    fn test_sqlite_storage_get_entity_not_found() {
409        let temp_file = create_test_db();
410        let storage = SqliteStorage::open(temp_file.path()).unwrap();
411
412        let entity = storage.get_entity(999);
413        assert!(entity.is_none(), "Should return None for non-existent entity");
414    }
415
416    #[test]
417    fn test_sqlite_storage_get_cached_paths_none_when_empty() {
418        let temp_file = create_test_db();
419        let storage = SqliteStorage::open(temp_file.path()).unwrap();
420
421        // No cached paths for function 1
422        let paths = storage.get_cached_paths(1).unwrap();
423        assert!(paths.is_none(), "Should return None when no cached paths");
424    }
425
426    #[test]
427    fn test_sqlite_storage_get_cached_paths_with_data() {
428        let temp_file = create_test_db();
429        let conn = Connection::open(temp_file.path()).unwrap();
430
431        // Insert a test path into cfg_paths
432        conn.execute(
433            "INSERT INTO cfg_paths (path_id, function_id, path_kind, entry_block, exit_block, length, created_at)
434             VALUES ('test_path_123', 1, 'Normal', 100, 102, 3, 1000)",
435            [],
436        ).unwrap();
437
438        // Insert path elements into cfg_path_elements
439        conn.execute(
440            "INSERT INTO cfg_path_elements (path_id, sequence_order, block_id) VALUES
441             ('test_path_123', 0, 100),
442             ('test_path_123', 1, 101),
443             ('test_path_123', 2, 102)",
444            [],
445        ).unwrap();
446
447        let storage = SqliteStorage::open(temp_file.path()).unwrap();
448        let paths = storage.get_cached_paths(1).unwrap();
449
450        assert!(paths.is_some(), "Should return Some when cached paths exist");
451        let paths = paths.unwrap();
452        assert_eq!(paths.len(), 1, "Should have 1 path");
453
454        let path = &paths[0];
455        assert_eq!(path.path_id, "test_path_123");
456        assert_eq!(path.blocks, vec![100, 101, 102]);
457        assert_eq!(path.kind, crate::cfg::PathKind::Normal);
458        assert_eq!(path.entry, 100);
459        assert_eq!(path.exit, 102);
460    }
461}