Skip to main content

forgekit_core/graph/
mod.rs

1//! Graph module - Symbol and reference queries using sqlitegraph.
2//!
3//! This module provides access to code graph for querying symbols,
4//! finding references, and running graph algorithms.
5
6use crate::error::Result;
7use crate::storage::UnifiedGraphStore;
8use crate::types::{Cycle, CycleMember, Reference, ReferenceKind, Symbol, SymbolId};
9use std::collections::{HashMap, HashSet, VecDeque};
10use std::sync::Arc;
11
12/// Impacted symbol from k-hop impact analysis.
13#[derive(Debug, Clone)]
14pub struct ImpactedSymbol {
15    pub symbol_id: i64,
16    pub name: String,
17    pub kind: String,
18    pub file_path: String,
19    pub hop_distance: u32,
20    pub edge_type: String,
21}
22
23/// Graph module for symbol and reference queries.
24///
25/// # Examples
26///
27/// See crate-level documentation for usage examples.
28#[derive(Clone)]
29pub struct GraphModule {
30    store: Arc<UnifiedGraphStore>,
31}
32
33impl GraphModule {
34    pub(crate) fn new(store: Arc<UnifiedGraphStore>) -> Self {
35        Self { store }
36    }
37
38    /// Get the underlying store for advanced operations
39    pub fn store(&self) -> &UnifiedGraphStore {
40        &self.store
41    }
42
43    /// Finds symbols by name (exact match).
44    ///
45    /// # Arguments
46    ///
47    /// * `name` - The symbol name to search for
48    ///
49    /// # Returns
50    ///
51    /// A vector of matching symbols, or empty if the graph DB does not exist.
52    pub async fn find_symbol(&self, name: &str) -> Result<Vec<Symbol>> {
53        use magellan::CodeGraph;
54
55        let db_path = &self.store.db_path;
56        if !db_path.exists() {
57            return Ok(Vec::new());
58        }
59
60        let graph = CodeGraph::open(db_path).map_err(|e| {
61            crate::error::ForgeError::DatabaseError(format!("Failed to open magellan graph: {}", e))
62        })?;
63
64        let results = graph.search_symbols_by_name(name).map_err(|e| {
65            crate::error::ForgeError::DatabaseError(format!("Symbol search failed: {}", e))
66        })?;
67
68        Ok(results
69            .into_iter()
70            .map(|r| {
71                let file_path = std::path::PathBuf::from(&r.file_path);
72                let line_number = std::fs::read(&file_path)
73                    .map(|content| byte_offset_to_line_number(&content, r.byte_start))
74                    .unwrap_or(0);
75                Symbol {
76                    id: SymbolId(r.entity_id),
77                    name: Arc::from(r.name.clone()),
78                    fully_qualified_name: Arc::from(r.name.clone()),
79                    kind: parse_symbol_kind_str(&r.kind),
80                    language: map_magellan_language(&file_path),
81                    location: crate::types::Location {
82                        file_path,
83                        byte_start: r.byte_start as u32,
84                        byte_end: r.byte_end as u32,
85                        line_number,
86                    },
87                    parent_id: None,
88                    metadata: serde_json::Value::Null,
89                }
90            })
91            .collect())
92    }
93
94    /// Finds a symbol by its stable ID.
95    ///
96    /// # Arguments
97    ///
98    /// * `id` - The symbol identifier
99    ///
100    /// # Returns
101    ///
102    /// The symbol with the given ID
103    pub async fn find_symbol_by_id(&self, id: SymbolId) -> Result<Symbol> {
104        self.store.get_symbol(id).await
105    }
106
107    /// Finds all callers of a symbol by name.
108    ///
109    /// # Arguments
110    ///
111    /// * `name` - The symbol name
112    ///
113    /// # Returns
114    ///
115    /// A vector of call-references to this symbol, or empty if the graph DB does not exist.
116    pub async fn callers_of(&self, name: &str) -> Result<Vec<Reference>> {
117        use magellan::CodeGraph;
118
119        let db_path = &self.store.db_path;
120        if !db_path.exists() {
121            return Ok(Vec::new());
122        }
123
124        let mut graph = CodeGraph::open(db_path).map_err(|e| {
125            crate::error::ForgeError::DatabaseError(format!("Failed to open magellan graph: {}", e))
126        })?;
127
128        let symbols = graph.search_symbols_by_name(name).map_err(|e| {
129            crate::error::ForgeError::DatabaseError(format!("Symbol search failed: {}", e))
130        })?;
131
132        let mut callers = Vec::new();
133        for sym in &symbols {
134            if let Ok(call_facts) = graph.callers_of_symbol(&sym.file_path, name) {
135                for fact in call_facts {
136                    callers.push(Reference {
137                        from: SymbolId(0),
138                        to: SymbolId(0),
139                        from_name: Some(fact.caller.clone()),
140                        to_name: Some(fact.callee.clone()),
141                        kind: ReferenceKind::Call,
142                        location: crate::types::Location {
143                            file_path: fact.file_path.clone(),
144                            byte_start: fact.byte_start as u32,
145                            byte_end: fact.byte_end as u32,
146                            line_number: fact.start_line,
147                        },
148                    });
149                }
150            }
151        }
152
153        Ok(callers)
154    }
155
156    /// Finds all cross-file references to a symbol by FQN.
157    ///
158    /// # Arguments
159    ///
160    /// * `name` - The symbol fully-qualified name
161    ///
162    /// # Returns
163    ///
164    /// A vector of all cross-file references, or empty if the graph DB does not exist.
165    pub async fn references(&self, name: &str) -> Result<Vec<Reference>> {
166        use magellan::{cross_file_references_to, CodeGraph};
167
168        let db_path = &self.store.db_path;
169        if !db_path.exists() {
170            return Ok(Vec::new());
171        }
172
173        let graph = CodeGraph::open(db_path).map_err(|e| {
174            crate::error::ForgeError::DatabaseError(format!("Failed to open magellan graph: {}", e))
175        })?;
176
177        let cross_refs = cross_file_references_to(&graph, name).map_err(|e| {
178            crate::error::ForgeError::DatabaseError(format!("Reference query failed: {}", e))
179        })?;
180
181        Ok(cross_refs
182            .into_iter()
183            .map(|r| Reference {
184                from: SymbolId(0),
185                to: SymbolId(0),
186                from_name: Some(r.from_symbol_id.clone()),
187                to_name: Some(r.to_symbol_id.clone()),
188                kind: ReferenceKind::TypeReference,
189                location: crate::types::Location {
190                    file_path: std::path::PathBuf::from(&r.file_path),
191                    byte_start: r.byte_start as u32,
192                    byte_end: r.byte_end as u32,
193                    line_number: r.line_number,
194                },
195            })
196            .collect())
197    }
198
199    /// Finds all symbols reachable from a given symbol.
200    ///
201    /// Uses BFS traversal to find all symbols that can be reached
202    /// from the starting symbol through the call graph.
203    ///
204    /// # Arguments
205    ///
206    /// * `id` - The starting symbol ID
207    ///
208    /// # Returns
209    ///
210    /// A vector of reachable symbol IDs
211    pub async fn reachable_from(&self, id: SymbolId) -> Result<Vec<SymbolId>> {
212        // Build adjacency list for BFS
213        let mut adjacency: HashMap<SymbolId, Vec<SymbolId>> = HashMap::new();
214
215        // Query references to build the graph
216        let refs = self.store.query_references(id).await?;
217        for reference in &refs {
218            adjacency
219                .entry(reference.from)
220                .or_default()
221                .push(reference.to);
222        }
223
224        // BFS from the starting node
225        let mut visited = HashSet::new();
226        let mut queue = VecDeque::new();
227        let mut reachable = Vec::new();
228
229        queue.push_back(id);
230        visited.insert(id);
231
232        while let Some(current) = queue.pop_front() {
233            if let Some(neighbors) = adjacency.get(&current) {
234                for &neighbor in neighbors {
235                    if visited.insert(neighbor) {
236                        queue.push_back(neighbor);
237                        reachable.push(neighbor);
238                    }
239                }
240            }
241        }
242
243        Ok(reachable)
244    }
245
246    /// Detects cycles in the call graph using SCC condensation.
247    ///
248    /// Supernodes with more than one member represent strongly connected
249    /// components (mutual recursion / call cycles).
250    ///
251    /// # Returns
252    ///
253    /// A vector of detected cycles, or empty if the graph DB does not exist.
254    pub async fn cycles(&self) -> Result<Vec<Cycle>> {
255        use magellan::CodeGraph;
256
257        let db_path = &self.store.db_path;
258        if !db_path.exists() {
259            return Ok(Vec::new());
260        }
261
262        let graph = CodeGraph::open(db_path).map_err(|e| {
263            crate::error::ForgeError::DatabaseError(format!("Failed to open magellan graph: {}", e))
264        })?;
265
266        let report = graph.detect_cycles().map_err(|e| {
267            crate::error::ForgeError::DatabaseError(format!("Cycle detection failed: {}", e))
268        })?;
269
270        Ok(report
271            .cycles
272            .into_iter()
273            .map(|c| Cycle {
274                members: c
275                    .members
276                    .into_iter()
277                    .map(|si| CycleMember {
278                        symbol_id: si.symbol_id,
279                        fqn: si.fqn,
280                        file_path: si.file_path,
281                        kind: si.kind,
282                    })
283                    .collect(),
284            })
285            .collect())
286    }
287
288    /// Returns the number of symbols in the graph.
289    pub async fn symbol_count(&self) -> Result<usize> {
290        self.store.symbol_count().await
291    }
292
293    /// Analyze the impact of changing a symbol.
294    ///
295    /// Performs k-hop traversal to find all symbols that would be affected
296    /// by modifying the given symbol.
297    ///
298    /// # Arguments
299    ///
300    /// * `symbol_name` - The name of the symbol to analyze
301    /// * `max_hops` - Maximum traversal depth (default: 2)
302    ///
303    /// # Returns
304    ///
305    /// A vector of impacted symbols with their hop distance from the target,
306    /// or empty if the graph DB does not exist.
307    pub async fn impact_analysis(
308        &self,
309        symbol_name: &str,
310        max_hops: Option<u32>,
311    ) -> Result<Vec<ImpactedSymbol>> {
312        use magellan::CodeGraph;
313
314        let db_path = &self.store.db_path;
315        if !db_path.exists() {
316            return Ok(Vec::new());
317        }
318
319        let mut graph = CodeGraph::open(db_path).map_err(|e| {
320            crate::error::ForgeError::DatabaseError(format!("Failed to open magellan graph: {}", e))
321        })?;
322
323        let symbols = graph.search_symbols_by_name(symbol_name).map_err(|e| {
324            crate::error::ForgeError::DatabaseError(format!("Symbol search failed: {}", e))
325        })?;
326
327        let start_entity_id = match symbols.first() {
328            Some(s) => s.entity_id,
329            None => return Ok(Vec::new()),
330        };
331
332        let hops = max_hops.unwrap_or(2);
333
334        let mut impacted = Vec::new();
335        let mut visited = HashSet::new();
336        visited.insert(start_entity_id);
337        let mut current_level = vec![start_entity_id];
338
339        for hop in 1..=hops {
340            let mut next_level = Vec::new();
341            for &entity_id in &current_level {
342                if let Ok(sym) = graph.symbol_by_entity_id(entity_id) {
343                    let file = &sym.file_path;
344                    if let Ok(callers) =
345                        graph.callers_of_symbol(file, sym.fqn.as_deref().unwrap_or(&sym.kind))
346                    {
347                        for fact in callers {
348                            let caller_entity = graph
349                                .symbol_id_by_name(
350                                    fact.file_path.to_str().unwrap_or(""),
351                                    &fact.caller,
352                                )
353                                .ok()
354                                .flatten();
355                            if let Some(cid) = caller_entity {
356                                if visited.insert(cid) {
357                                    next_level.push(cid);
358                                    if let Ok(info) = graph.symbol_by_entity_id(cid) {
359                                        impacted.push(ImpactedSymbol {
360                                            symbol_id: cid,
361                                            name: info.fqn.clone().unwrap_or_default(),
362                                            kind: info.kind,
363                                            file_path: info.file_path,
364                                            hop_distance: hop,
365                                            edge_type: "call".to_string(),
366                                        });
367                                    }
368                                }
369                            }
370                        }
371                    }
372                }
373            }
374            current_level = next_level;
375            if current_level.is_empty() {
376                break;
377            }
378        }
379
380        Ok(impacted)
381    }
382
383    /// Indexes the codebase using magellan.
384    ///
385    /// This runs the magellan indexer to extract symbols and references
386    /// from the codebase and populate the graph database.
387    ///
388    /// For Native V3 backend, also indexes cross-file references using
389    /// sqlitegraph directly (a capability SQLite doesn't support).
390    ///
391    /// # Returns
392    ///
393    /// Ok(()) on success, or an error if indexing fails.
394    pub async fn index(&self) -> Result<()> {
395        use magellan::CodeGraph;
396        use std::path::Path;
397
398        let codebase_path = &self.store.codebase_path;
399        let db_path = &self.store.db_path;
400
401        let mut graph = CodeGraph::open(db_path).map_err(|e| {
402            crate::error::ForgeError::DatabaseError(format!("Failed to open magellan graph: {}", e))
403        })?;
404
405        let count = graph
406            .scan_directory(Path::new(codebase_path), None)
407            .map_err(|e| {
408                crate::error::ForgeError::DatabaseError(format!("Failed to scan directory: {}", e))
409            })?;
410
411        tracing::info!("Indexed {} symbols from {}", count, codebase_path.display());
412
413        let _ = graph.rebuild_fts5();
414
415        Self::index_references_recursive(&mut graph, codebase_path, codebase_path).await
416    }
417
418    async fn index_references_recursive(
419        graph: &mut magellan::CodeGraph,
420        codebase_path: &std::path::Path,
421        current_dir: &std::path::Path,
422    ) -> Result<()> {
423        use tokio::fs;
424
425        let mut entries = fs::read_dir(current_dir).await.map_err(|e| {
426            crate::error::ForgeError::DatabaseError(format!("Failed to read dir: {}", e))
427        })?;
428
429        while let Some(entry) = entries.next_entry().await.map_err(|e| {
430            crate::error::ForgeError::DatabaseError(format!("Failed to read entry: {}", e))
431        })? {
432            let path = entry.path();
433            if path.is_dir() {
434                // Recurse into subdirectories
435                Box::pin(Self::index_references_recursive(
436                    graph,
437                    codebase_path,
438                    &path,
439                ))
440                .await?;
441            } else if path.is_file() && path.extension().map(|e| e == "rs").unwrap_or(false) {
442                // Get relative path from codebase root
443                let relative_path = path
444                    .strip_prefix(codebase_path)
445                    .unwrap_or(&path)
446                    .to_string_lossy();
447
448                if let Ok(source) = fs::read_to_string(&path).await {
449                    // Index references using relative path
450                    let _ = graph.index_references(&relative_path, source.as_bytes());
451                    // Index calls using relative path
452                    let _ = graph.index_calls(&relative_path, source.as_bytes());
453                }
454            }
455        }
456
457        Ok(())
458    }
459}
460
461fn parse_symbol_kind_str(kind: &str) -> crate::types::SymbolKind {
462    use crate::types::SymbolKind;
463    match kind {
464        "fn" | "function" => SymbolKind::Function,
465        "method" => SymbolKind::Method,
466        "struct" | "class" => SymbolKind::Struct,
467        "trait" | "interface" => SymbolKind::Trait,
468        "enum" => SymbolKind::Enum,
469        "module" | "namespace" => SymbolKind::Module,
470        "type_alias" | "type" => SymbolKind::TypeAlias,
471        _ => SymbolKind::Function,
472    }
473}
474
475fn map_magellan_language(file_path: &std::path::Path) -> crate::types::Language {
476    use crate::types::Language;
477
478    match file_path.extension().and_then(|e| e.to_str()) {
479        Some("rs") => Language::Rust,
480        Some("py") => Language::Python,
481        Some("c") => Language::C,
482        Some("cpp") | Some("cc") | Some("cxx") => Language::Cpp,
483        Some("java") => Language::Java,
484        Some("js") => Language::JavaScript,
485        Some("ts") => Language::TypeScript,
486        Some("go") => Language::Go,
487        _ => Language::Unknown("other".to_string()),
488    }
489}
490
491/// Returns the 1-indexed line number for a byte offset within file content.
492///
493/// Counts `\n` bytes before `byte_offset` and adds 1. If `byte_offset` exceeds
494/// the content length, returns the last line number.
495pub(crate) fn byte_offset_to_line_number(content: &[u8], byte_offset: usize) -> usize {
496    let clamped = byte_offset.min(content.len());
497    content[..clamped].iter().filter(|&&b| b == b'\n').count() + 1
498}
499
500#[cfg(test)]
501mod tests {
502    use super::*;
503
504    async fn test_forge(dir: &std::path::Path) -> crate::Forge {
505        crate::ForgeBuilder::new()
506            .path(dir)
507            .db_path(dir.join("test-graph.db"))
508            .build()
509            .await
510            .unwrap()
511    }
512
513    #[tokio::test]
514    async fn test_graph_module_creation() {
515        let temp_dir = tempfile::tempdir().unwrap();
516        let forge = test_forge(temp_dir.path()).await;
517        let module = forge.graph();
518        assert_eq!(
519            module.store().db_path,
520            temp_dir.path().join("test-graph.db")
521        );
522    }
523
524    #[tokio::test]
525    async fn test_find_symbol_empty() {
526        let temp_dir = tempfile::tempdir().unwrap();
527        let forge = test_forge(temp_dir.path()).await;
528        let module = forge.graph();
529        let symbols = module.find_symbol("nonexistent").await.unwrap();
530        assert_eq!(symbols.len(), 0);
531    }
532
533    #[tokio::test]
534    async fn test_find_symbol_by_id_not_found() {
535        let temp_dir = tempfile::tempdir().unwrap();
536        let forge = test_forge(temp_dir.path()).await;
537        let module = forge.graph();
538        let result = module.find_symbol_by_id(SymbolId(999)).await;
539        assert!(result.is_err());
540    }
541
542    #[tokio::test]
543    async fn test_callers_of_empty() {
544        let temp_dir = tempfile::tempdir().unwrap();
545        let forge = test_forge(temp_dir.path()).await;
546        let module = forge.graph();
547        let callers = module.callers_of("nonexistent").await.unwrap();
548        assert_eq!(callers.len(), 0);
549    }
550
551    #[test]
552    fn test_byte_offset_to_line_number_first_line() {
553        let content = b"fn foo() {}\nfn bar() {}\n";
554        assert_eq!(byte_offset_to_line_number(content, 0), 1);
555        assert_eq!(byte_offset_to_line_number(content, 5), 1);
556    }
557
558    #[test]
559    fn test_byte_offset_to_line_number_second_line() {
560        let content = b"fn foo() {}\nfn bar() {}\n";
561        // byte 12 is start of "fn bar" (after the \n at byte 11)
562        assert_eq!(byte_offset_to_line_number(content, 12), 2);
563    }
564
565    #[test]
566    fn test_byte_offset_to_line_number_third_line() {
567        let content = b"line1\nline2\nline3\n";
568        assert_eq!(byte_offset_to_line_number(content, 12), 3);
569    }
570
571    #[test]
572    fn test_byte_offset_to_line_number_clamps_to_end() {
573        // Content without trailing newline: "abc\ndef" — 1 newline, so last line is 2
574        let content = b"abc\ndef";
575        assert_eq!(byte_offset_to_line_number(content, 9999), 2);
576    }
577
578    #[test]
579    fn test_byte_offset_to_line_number_empty_content() {
580        assert_eq!(byte_offset_to_line_number(b"", 0), 1);
581    }
582
583    #[tokio::test]
584    async fn test_find_symbol_after_index() {
585        let temp_dir = tempfile::tempdir().unwrap();
586        let src_dir = temp_dir.path().join("src");
587        tokio::fs::create_dir_all(&src_dir).await.unwrap();
588        tokio::fs::write(
589            src_dir.join("lib.rs"),
590            "fn hello() {}\nfn world() -> i32 { 42 }\n",
591        )
592        .await
593        .unwrap();
594
595        let forge = test_forge(temp_dir.path()).await;
596        forge.graph().index().await.unwrap();
597
598        let symbols = forge.graph().find_symbol("hello").await.unwrap();
599        assert!(!symbols.is_empty());
600        assert_eq!(symbols[0].name.as_ref(), "hello");
601    }
602
603    #[tokio::test]
604    async fn test_callers_of_after_index() {
605        let temp_dir = tempfile::tempdir().unwrap();
606        let src_dir = temp_dir.path().join("src");
607        tokio::fs::create_dir_all(&src_dir).await.unwrap();
608        tokio::fs::write(
609            src_dir.join("lib.rs"),
610            "fn helper() -> i32 { 1 }\nfn caller() -> i32 { helper() }\n",
611        )
612        .await
613        .unwrap();
614
615        let forge = test_forge(temp_dir.path()).await;
616        forge.graph().index().await.unwrap();
617
618        let callers = forge.graph().callers_of("helper").await.unwrap();
619        assert!(!callers.is_empty(), "should find caller calling helper");
620    }
621
622    #[tokio::test]
623    async fn test_cycles_detect_mutual_recursion() {
624        let temp_dir = tempfile::tempdir().unwrap();
625        let src_dir = temp_dir.path().join("src");
626        tokio::fs::create_dir_all(&src_dir).await.unwrap();
627        tokio::fs::write(src_dir.join("lib.rs"), "fn a() { b() }\nfn b() { a() }\n")
628            .await
629            .unwrap();
630
631        let forge = test_forge(temp_dir.path()).await;
632        forge.graph().index().await.unwrap();
633
634        let cycles = forge.graph().cycles().await.unwrap();
635        assert!(
636            !cycles.is_empty(),
637            "should detect mutual recursion between a and b"
638        );
639        let cycle = &cycles[0];
640        assert!(cycle.members.len() >= 2);
641        assert!(cycle.members.iter().any(|m| m.fqn.as_deref() == Some("a")));
642        assert!(cycle.members.iter().any(|m| m.fqn.as_deref() == Some("b")));
643    }
644
645    #[tokio::test]
646    async fn test_impact_analysis_after_index() {
647        let temp_dir = tempfile::tempdir().unwrap();
648        let src_dir = temp_dir.path().join("src");
649        tokio::fs::create_dir_all(&src_dir).await.unwrap();
650        tokio::fs::write(
651            src_dir.join("lib.rs"),
652            "fn base() -> i32 { 1 }\nfn mid() -> i32 { base() }\nfn top() -> i32 { mid() }\n",
653        )
654        .await
655        .unwrap();
656
657        let forge = test_forge(temp_dir.path()).await;
658        forge.graph().index().await.unwrap();
659
660        let impacted = forge
661            .graph()
662            .impact_analysis("base", Some(2))
663            .await
664            .unwrap();
665        assert!(
666            !impacted.is_empty(),
667            "base should have mid and/or top as impacted"
668        );
669        let has_correct_hop = impacted.iter().any(|s| s.hop_distance == 1);
670        assert!(has_correct_hop, "at least one symbol should be at hop 1");
671    }
672}