Skip to main content

mirage/storage/
mirage_db.rs

1use anyhow::{Context, Result};
2use rusqlite::{params, Connection, OptionalExtension};
3use sqlitegraph::{open_graph, GraphBackend, GraphConfig, SnapshotId};
4use std::path::Path;
5
6use super::{
7    Backend, BackendFormat, CfgBlockRow, MIN_MAGELLAN_SCHEMA_VERSION, MIRAGE_SCHEMA_VERSION,
8};
9
10#[cfg(feature = "backend-sqlite")]
11use super::schema::{create_schema, migrate_schema};
12
13#[cfg(feature = "backend-sqlite")]
14use super::operations::resolve_function_name_sqlite;
15
16use super::operations::load_cfg_from_rows;
17
18/// Database connection wrapper
19///
20/// Uses Backend enum for CFG queries (Phase 069-02) and GraphBackend for entity queries.
21/// This dual-backend approach allows gradual migration from direct Connection usage.
22pub struct MirageDb {
23    /// Storage backend for CFG queries (Phase 069-02)
24    /// Wraps SqliteStorage for backend-agnostic CFG access.
25    storage: Backend,
26
27    /// Backend-agnostic graph interface for entity queries
28    /// Used for entity_ids(), get_node(), kv_get() and other GraphBackend operations.
29    graph_backend: Box<dyn GraphBackend>,
30
31    /// Snapshot ID for consistent reads
32    snapshot_id: SnapshotId,
33
34    /// Path to the database file (for CodeGraph::open in SymbolNavigator)
35    db_path: std::path::PathBuf,
36
37    // SQLite-specific connection (only available with sqlite feature)
38    // DEPRECATED: Use storage field instead for new code
39    #[cfg(feature = "backend-sqlite")]
40    conn: Option<Connection>,
41}
42
43impl std::fmt::Debug for MirageDb {
44    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
45        f.debug_struct("MirageDb")
46            .field("snapshot_id", &self.snapshot_id)
47            .field("storage", &self.storage)
48            .field("graph_backend", &"<GraphBackend>")
49            .finish()
50    }
51}
52
53impl MirageDb {
54    /// Open database at the given path
55    ///
56    /// This can open:
57    /// - A Mirage database (with mirage_meta table)
58    /// - A Magellan database (extends it with Mirage tables)
59    ///
60    /// Phase 069-02: Uses Backend::detect_and_open() for CFG queries
61    /// and open_graph() for entity queries (GraphBackend).
62    pub fn open<P: AsRef<Path>>(path: P) -> Result<Self> {
63        let path = path.as_ref();
64        if !path.exists() {
65            anyhow::bail!("Database not found: {}", path.display());
66        }
67
68        // Phase 069-02: Use Backend::detect_and_open() for storage layer
69        let storage = Backend::detect_and_open(path).context("Failed to open storage backend")?;
70
71        // Detect backend format from file header for GraphBackend creation
72        let detected_backend =
73            BackendFormat::detect(path).context("Failed to detect backend format")?;
74
75        // Select appropriate GraphConfig based on detected backend
76        let cfg = match detected_backend {
77            BackendFormat::SQLite => GraphConfig::sqlite(),
78            BackendFormat::Unknown => {
79                anyhow::bail!(
80                    "Unknown database format: {}. Cannot determine backend.",
81                    path.display()
82                );
83            }
84        };
85
86        // Use open_graph factory to create GraphBackend for entity queries
87        let graph_backend = open_graph(path, &cfg).context("Failed to open graph database")?;
88
89        let snapshot_id = SnapshotId::current();
90
91        // For SQLite backend, open Connection and validate schema
92        #[cfg(feature = "backend-sqlite")]
93        let conn = {
94            let mut conn = Connection::open(path).context("Failed to open SQLite connection")?;
95            Self::validate_schema_sqlite(&mut conn, path)?;
96            Some(conn)
97        };
98
99        Ok(Self {
100            storage,
101            graph_backend,
102            snapshot_id,
103            db_path: path.to_path_buf(),
104            #[cfg(feature = "backend-sqlite")]
105            conn,
106        })
107    }
108
109    /// Validate database schema for SQLite backend
110    #[cfg(feature = "backend-sqlite")]
111    fn validate_schema_sqlite(conn: &mut Connection, _path: &Path) -> Result<()> {
112        // Check if mirage_meta table exists
113        let mirage_meta_exists: bool = conn
114            .query_row(
115                "SELECT 1 FROM sqlite_master WHERE type='table' AND name='mirage_meta'",
116                [],
117                |row| row.get(0),
118            )
119            .optional()?
120            .unwrap_or(0)
121            == 1;
122
123        // Get Mirage schema version (0 if table doesn't exist)
124        let mirage_version: i32 = if mirage_meta_exists {
125            conn.query_row(
126                "SELECT mirage_schema_version FROM mirage_meta WHERE id = 1",
127                [],
128                |row| row.get(0),
129            )
130            .optional()?
131            .flatten()
132            .unwrap_or(0)
133        } else {
134            0
135        };
136
137        if mirage_version > MIRAGE_SCHEMA_VERSION {
138            anyhow::bail!(
139                "Database schema version {} is newer than supported version {}.
140                 Please update Mirage.",
141                mirage_version,
142                MIRAGE_SCHEMA_VERSION
143            );
144        }
145
146        // Check Magellan schema compatibility
147        let magellan_version: i32 = conn
148            .query_row(
149                "SELECT magellan_schema_version FROM magellan_meta WHERE id = 1",
150                [],
151                |row| row.get(0),
152            )
153            .optional()?
154            .flatten()
155            .unwrap_or(0);
156
157        if magellan_version < MIN_MAGELLAN_SCHEMA_VERSION {
158            anyhow::bail!(
159                "Magellan schema version {} is too old (minimum {}). \
160                 Please update Magellan and run 'magellan watch' to rebuild CFGs.",
161                magellan_version,
162                MIN_MAGELLAN_SCHEMA_VERSION
163            );
164        }
165
166        // Check for cfg_blocks table existence (Magellan v7+)
167        let cfg_blocks_exists: bool = conn
168            .query_row(
169                "SELECT 1 FROM sqlite_master WHERE type='table' AND name='cfg_blocks'",
170                [],
171                |row| row.get(0),
172            )
173            .optional()?
174            .unwrap_or(0)
175            == 1;
176
177        if !cfg_blocks_exists {
178            anyhow::bail!(
179                "CFG blocks table not found. Magellan schema v7+ required. \
180                 Run 'magellan watch' to build CFGs."
181            );
182        }
183
184        // If mirage_meta doesn't exist, this is a pure Magellan database.
185        // Initialize Mirage tables to extend it.
186        if !mirage_meta_exists {
187            create_schema(conn, magellan_version)?;
188        } else if mirage_version < MIRAGE_SCHEMA_VERSION {
189            migrate_schema(conn)?;
190        }
191
192        Ok(())
193    }
194
195    /// Get a reference to the underlying Connection (SQLite backend only)
196    ///
197    /// Phase 069-02: DEPRECATED - Use storage() for CFG queries, backend() for entity queries.
198    #[cfg(feature = "backend-sqlite")]
199    pub fn conn(&self) -> Result<&Connection, anyhow::Error> {
200        self.conn.as_ref().ok_or_else(|| {
201            anyhow::anyhow!(
202                "Direct Connection access deprecated. Use storage() for CFG queries or backend() for entity queries."
203            )
204        })
205    }
206
207    /// Get a mutable reference to the underlying Connection (SQLite backend only)
208    ///
209    /// Phase 069-02: DEPRECATED - Use storage() for CFG queries, backend() for entity queries.
210    #[cfg(feature = "backend-sqlite")]
211    pub fn conn_mut(&mut self) -> Result<&mut Connection, anyhow::Error> {
212        self.conn.as_mut().ok_or_else(|| {
213            anyhow::anyhow!(
214                "Direct Connection access deprecated. Use storage() for CFG queries or backend() for entity queries."
215            )
216        })
217    }
218
219    /// Get a reference to the storage backend for CFG queries
220    ///
221    /// Phase 069-02: Use this to access CFG-specific storage operations
222    /// like get_cfg_blocks(), get_entity(), and get_cached_paths().
223    ///
224    /// This is the preferred way to access CFG data in new code.
225    pub fn storage(&self) -> &Backend {
226        &self.storage
227    }
228
229    /// Get a reference to the backend-agnostic GraphBackend interface
230    ///
231    /// Use this for entity queries (entity_ids, get_node, kv_get, etc.).
232    /// Phase 069-02: This now returns the GraphBackend used for entity queries,
233    /// while storage() provides the Backend enum for CFG queries.
234    pub fn backend(&self) -> &dyn GraphBackend {
235        self.graph_backend.as_ref()
236    }
237
238    /// Check if the database backend is SQLite
239    ///
240    /// This is useful for runtime checks when certain features
241    /// are only available with specific backends (e.g., path caching).
242    #[cfg(feature = "backend-sqlite")]
243    pub fn is_sqlite(&self) -> bool {
244        self.conn.is_some()
245    }
246
247    /// List source documents from graph memory tables
248    pub fn list_source_documents(&self) -> Result<Vec<super::DocumentInfo>> {
249        self.storage.list_source_documents()
250    }
251
252    /// Get database statistics
253    ///
254    /// Note: cfg_edges count is included for backward compatibility but edges
255    /// are now computed in memory from terminator data, not stored.
256    #[cfg(feature = "backend-sqlite")]
257    pub fn status(&self) -> Result<DatabaseStatus> {
258        match self.conn.as_ref() {
259            Some(conn) => {
260                let cfg_blocks: i64 = conn
261                    .query_row("SELECT COUNT(*) FROM cfg_blocks", [], |row| row.get(0))
262                    .unwrap_or(0);
263
264                let cfg_edges: i64 = conn
265                    .query_row("SELECT COUNT(*) FROM cfg_edges", [], |row| row.get(0))
266                    .unwrap_or(0);
267
268                let cfg_paths: i64 = conn
269                    .query_row("SELECT COUNT(*) FROM cfg_paths", [], |row| row.get(0))
270                    .unwrap_or(0);
271
272                let cfg_dominators: i64 = conn
273                    .query_row("SELECT COUNT(*) FROM cfg_dominators", [], |row| row.get(0))
274                    .unwrap_or(0);
275
276                let mirage_schema_version: i32 = conn
277                    .query_row(
278                        "SELECT mirage_schema_version FROM mirage_meta WHERE id = 1",
279                        [],
280                        |row| row.get(0),
281                    )
282                    .unwrap_or(0);
283
284                let magellan_schema_version: i32 = conn
285                    .query_row(
286                        "SELECT magellan_schema_version FROM magellan_meta WHERE id = 1",
287                        [],
288                        |row| row.get(0),
289                    )
290                    .unwrap_or(0);
291
292                #[allow(deprecated)]
293                Ok(DatabaseStatus {
294                    cfg_blocks,
295                    cfg_edges,
296                    cfg_paths,
297                    cfg_dominators,
298                    mirage_schema_version,
299                    magellan_schema_version,
300                })
301            }
302            None => self.status_via_storage(),
303        }
304    }
305
306    /// Helper function to get status via storage backend (for non-SQLite backends)
307    #[cfg(feature = "backend-sqlite")]
308    fn status_via_storage(&self) -> Result<DatabaseStatus> {
309        #[allow(deprecated)]
310        Ok(DatabaseStatus {
311            cfg_blocks: 0,
312            cfg_edges: 0,
313            cfg_paths: 0,
314            cfg_dominators: 0,
315            mirage_schema_version: MIRAGE_SCHEMA_VERSION,
316            magellan_schema_version: MIN_MAGELLAN_SCHEMA_VERSION,
317        })
318    }
319
320    /// Resolve a function name or ID to a function_id (backend-agnostic)
321    ///
322    /// # Examples
323    ///
324    /// ```no_run
325    /// # use mirage_analyzer::storage::MirageDb;
326    /// # fn main() -> anyhow::Result<()> {
327    /// # let db = MirageDb::open("test.db")?;
328    /// let func_id = db.resolve_function_name("123")?;
329    /// let func_id = db.resolve_function_name("my_function")?;
330    /// # Ok(())
331    /// # }
332    /// ```
333    #[cfg(feature = "backend-sqlite")]
334    pub fn resolve_function_name(&self, name_or_id: &str) -> Result<i64> {
335        self.resolve_function_name_with_file(name_or_id, None)
336    }
337
338    /// Resolve a function name or ID to a function_id with optional file filter
339    ///
340    /// Uses magellan's `SymbolNavigator` for name-based resolution, falling back
341    /// to direct SQL for symbol_id hash lookup.
342    #[cfg(feature = "backend-sqlite")]
343    pub fn resolve_function_name_with_file(
344        &self,
345        name_or_id: &str,
346        file_filter: Option<&str>,
347    ) -> Result<i64> {
348        if let Ok(id) = name_or_id.parse::<i64>() {
349            return Ok(id);
350        }
351
352        if let Ok(conn) = self.conn() {
353            if let Ok(id) = resolve_function_name_sqlite(conn, name_or_id, file_filter) {
354                return Ok(id);
355            }
356        }
357
358        let graph = magellan::CodeGraph::open(&self.db_path)
359            .context("Failed to open CodeGraph for symbol resolution")?;
360        let nav = graph.navigator();
361        let resolved = nav
362            .resolve(name_or_id)
363            .context(format!("Symbol resolution failed for '{}'", name_or_id))?;
364
365        let mut candidates: Vec<_> = resolved
366            .into_iter()
367            .filter(|s| s.kind == "Function" || s.kind == "Method")
368            .collect();
369
370        if let Some(file_path) = file_filter {
371            candidates.retain(|s| {
372                s.file_path
373                    .as_deref()
374                    .map(|p| p.contains(file_path))
375                    .unwrap_or(false)
376            });
377        }
378
379        if candidates.is_empty() {
380            anyhow::bail!(
381                "Function '{}' not found in database. Run 'magellan watch' to index functions.",
382                name_or_id
383            );
384        }
385
386        if candidates.len() > 1 {
387            let locations: Vec<String> = candidates
388                .iter()
389                .filter_map(|s| {
390                    s.file_path
391                        .as_deref()
392                        .map(|p| format!("{}:{}", p, s.start_line))
393                })
394                .collect();
395            anyhow::bail!(
396                "Ambiguous function name '{}' matches {} symbols: {}",
397                name_or_id,
398                candidates.len(),
399                locations.join(", ")
400            );
401        }
402
403        Ok(candidates[0].id)
404    }
405
406    /// Load a CFG from the database (backend-agnostic)
407    ///
408    /// For SQLite backend: uses SQL query on cfg_blocks table
409    ///
410    /// # Examples
411    ///
412    /// ```no_run
413    /// # use mirage_analyzer::storage::MirageDb;
414    /// # fn main() -> anyhow::Result<()> {
415    /// # let db = MirageDb::open("test.db")?;
416    /// let cfg = db.load_cfg(123)?;
417    /// # Ok(())
418    /// # }
419    /// ```
420    #[cfg(feature = "backend-sqlite")]
421    pub fn load_cfg(&self, function_id: i64) -> Result<crate::cfg::Cfg> {
422        let blocks = self.storage().get_cfg_blocks(function_id)?;
423
424        if blocks.is_empty() {
425            anyhow::bail!(
426                "No CFG blocks found for function_id {}. Run 'magellan watch' to build CFGs.",
427                function_id
428            );
429        }
430
431        let file_path = self.get_function_file(function_id);
432
433        let block_rows: Vec<CfgBlockRow> = blocks
434            .into_iter()
435            .enumerate()
436            .map(|(idx, b)| {
437                (
438                    idx as i64,
439                    b.kind,
440                    Some(b.terminator),
441                    Some(b.byte_start as i64),
442                    Some(b.byte_end as i64),
443                    Some(b.start_line as i64),
444                    Some(b.start_col as i64),
445                    Some(b.end_line as i64),
446                    Some(b.end_col as i64),
447                    Some(b.coord_x),
448                    Some(b.coord_y),
449                    Some(b.coord_z),
450                    b.cfg_condition,
451                )
452            })
453            .collect();
454
455        let cfg_edges: Vec<(i64, i64, String)> = if let Ok(conn) = self.conn() {
456            match conn.prepare_cached(
457                "SELECT source_idx, target_idx, edge_type
458                 FROM cfg_edges
459                 WHERE function_id = ?
460                 ORDER BY source_idx, target_idx",
461            ) {
462                Ok(mut stmt) => {
463                    match stmt.query_map(params![function_id], |row| {
464                        Ok((row.get(0)?, row.get(1)?, row.get(2)?))
465                    }) {
466                        Ok(rows) => rows.collect::<Result<Vec<_>, _>>().unwrap_or_default(),
467                        Err(_) => vec![],
468                    }
469                }
470                Err(_) => vec![],
471            }
472        } else {
473            vec![]
474        };
475
476        load_cfg_from_rows(
477            block_rows,
478            file_path.map(std::path::PathBuf::from),
479            cfg_edges,
480        )
481    }
482
483    /// Get the function name for a given function_id (backend-agnostic)
484    ///
485    /// For SQLite backend: queries the graph_entities table
486    pub fn get_function_name(&self, function_id: i64) -> Option<String> {
487        let snapshot = SnapshotId::current();
488        self.backend()
489            .get_node(snapshot, function_id)
490            .ok()
491            .and_then(|entity| {
492                if entity.kind == "Symbol"
493                    && entity.data.get("kind").and_then(|v| v.as_str()) == Some("Function")
494                {
495                    Some(entity.name)
496                } else {
497                    None
498                }
499            })
500    }
501
502    /// Get the file path for a given function_id (backend-agnostic)
503    pub fn get_function_file(&self, function_id: i64) -> Option<String> {
504        let snapshot = SnapshotId::current();
505        self.backend()
506            .get_node(snapshot, function_id)
507            .ok()
508            .and_then(|entity| entity.file_path)
509    }
510
511    /// Check if a function has CFG blocks (SQLite backend)
512    #[cfg(feature = "backend-sqlite")]
513    pub fn function_exists(&self, function_id: i64) -> bool {
514        use crate::storage::function_exists;
515        self.conn()
516            .map(|conn| function_exists(conn, function_id))
517            .unwrap_or(false)
518    }
519
520    /// Get the function hash for path caching (SQLite backend)
521    #[cfg(feature = "backend-sqlite")]
522    pub fn get_function_hash(&self, function_id: i64) -> Option<String> {
523        use crate::storage::get_function_hash;
524        self.conn()
525            .map(|conn| get_function_hash(conn, function_id))
526            .ok()
527            .flatten()
528    }
529}
530
531/// Database status information
532#[derive(Debug, Clone, serde::Serialize)]
533pub struct DatabaseStatus {
534    pub cfg_blocks: i64,
535    #[deprecated(note = "Edges are now computed in memory, not stored")]
536    pub cfg_edges: i64,
537    pub cfg_paths: i64,
538    pub cfg_dominators: i64,
539    pub mirage_schema_version: i32,
540    pub magellan_schema_version: i32,
541}
542
543#[cfg(all(test, feature = "sqlite"))]
544mod tests {
545    use super::*;
546    use rusqlite::Connection;
547
548    #[test]
549    fn test_open_database_old_magellan_schema() {
550        let db_file = tempfile::NamedTempFile::new().unwrap();
551        {
552            let conn = Connection::open(db_file.path()).unwrap();
553            conn.execute(
554                "CREATE TABLE magellan_meta (
555                    id INTEGER PRIMARY KEY CHECK (id = 1),
556                    magellan_schema_version INTEGER NOT NULL,
557                    sqlitegraph_schema_version INTEGER NOT NULL,
558                    created_at INTEGER NOT NULL
559                )",
560                [],
561            )
562            .unwrap();
563            conn.execute(
564                "INSERT INTO magellan_meta (id, magellan_schema_version, sqlitegraph_schema_version, created_at)
565                 VALUES (1, 6, 3, 0)",
566                [],
567            ).unwrap();
568            conn.execute(
569                "CREATE TABLE graph_entities (
570                    id INTEGER PRIMARY KEY AUTOINCREMENT,
571                    kind TEXT NOT NULL,
572                    name TEXT NOT NULL,
573                    file_path TEXT,
574                    data TEXT NOT NULL
575                )",
576                [],
577            )
578            .unwrap();
579        }
580
581        let result = MirageDb::open(db_file.path());
582        assert!(result.is_err(), "Should fail with old Magellan schema");
583
584        let err_msg = result.unwrap_err().to_string();
585        assert!(
586            err_msg.contains("too old") || err_msg.contains("minimum"),
587            "Error should mention schema too old: {}",
588            err_msg
589        );
590        assert!(
591            err_msg.contains("magellan watch"),
592            "Error should suggest running magellan watch: {}",
593            err_msg
594        );
595    }
596}