llmcc_core/
graph_builder.rs

1use std::collections::HashSet;
2use std::marker::PhantomData;
3
4pub use crate::block::{BasicBlock, BlockId, BlockKind, BlockRelation};
5use crate::block::{
6    BlockCall, BlockClass, BlockConst, BlockEnum, BlockField, BlockFunc, BlockImpl, BlockRoot,
7    BlockStmt,
8};
9use crate::block_rel::BlockRelationMap;
10use crate::context::{CompileCtxt, CompileUnit};
11use crate::ir::HirNode;
12use crate::lang_def::LanguageTrait;
13use crate::symbol::{SymId, Symbol};
14use crate::visit::HirVisitor;
15
16#[derive(Debug, Clone)]
17pub struct UnitGraph {
18    /// Compile unit this graph belongs to
19    unit_index: usize,
20    /// Root block ID of this unit
21    root: BlockId,
22    /// Edges of this graph unit
23    edges: BlockRelationMap,
24}
25
26impl UnitGraph {
27    pub fn new(unit_index: usize, root: BlockId, edges: BlockRelationMap) -> Self {
28        Self {
29            unit_index,
30            root,
31            edges,
32        }
33    }
34
35    pub fn unit_index(&self) -> usize {
36        self.unit_index
37    }
38
39    pub fn root(&self) -> BlockId {
40        self.root
41    }
42
43    pub fn edges(&self) -> &BlockRelationMap {
44        &self.edges
45    }
46}
47
48#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
49pub struct GraphNode {
50    pub unit_index: usize,
51    pub block_id: BlockId,
52}
53
54/// ProjectGraph represents a complete compilation project with all units and their inter-dependencies.
55///
56/// # Overview
57/// ProjectGraph maintains a collection of per-unit compilation graphs (UnitGraph) and facilitates
58/// cross-unit dependency resolution. It provides efficient multi-dimensional indexing for block
59/// lookups by name, kind, unit, and ID, enabling quick context retrieval for LLM consumption.
60///
61/// # Architecture
62/// The graph consists of:
63/// - **UnitGraphs**: One per compilation unit (file), containing blocks and intra-unit relations
64/// - **Block Indexes**: Multi-dimensional indexes via BlockIndexMaps for O(1) to O(log n) lookups
65/// - **Cross-unit Links**: Dependencies tracked between blocks across different units
66///
67/// # Primary Use Cases
68/// 1. **Symbol Resolution**: Find blocks by name across the entire project
69/// 2. **Context Gathering**: Collect all related blocks for code analysis
70/// 3. **LLM Serialization**: Export graph as text or JSON for LLM model consumption
71/// 4. **Dependency Analysis**: Traverse dependency graphs to understand block relationships
72///
73#[derive(Debug)]
74pub struct ProjectGraph<'tcx> {
75    /// Reference to the compilation context containing all symbols, HIR nodes, and blocks
76    pub cc: &'tcx CompileCtxt<'tcx>,
77    /// Per-unit graphs containing blocks and intra-unit relations
78    units: Vec<UnitGraph>,
79}
80
81impl<'tcx> ProjectGraph<'tcx> {
82    pub fn new(cc: &'tcx CompileCtxt<'tcx>) -> Self {
83        Self {
84            cc,
85            units: Vec::new(),
86        }
87    }
88
89    pub fn add_child(&mut self, graph: UnitGraph) {
90        self.units.push(graph);
91    }
92
93    pub fn link_units(&mut self) {
94        if self.units.is_empty() {
95            return;
96        }
97
98        let mut unresolved = self.cc.unresolve_symbols.borrow_mut();
99
100        unresolved.retain(|symbol_ref| {
101            let target = *symbol_ref;
102            let Some(target_block) = target.block_id() else {
103                return false;
104            };
105
106            let dependents: Vec<SymId> = target.depended.borrow().clone();
107            for dependent_id in dependents {
108                let Some(source_symbol) = self.cc.opt_get_symbol(dependent_id) else {
109                    continue;
110                };
111                let Some(from_block) = source_symbol.block_id() else {
112                    continue;
113                };
114                self.add_cross_edge(
115                    source_symbol.unit_index().unwrap(),
116                    target.unit_index().unwrap(),
117                    from_block,
118                    target_block,
119                );
120            }
121
122            false
123        });
124    }
125
126    pub fn units(&self) -> &[UnitGraph] {
127        &self.units
128    }
129
130    pub fn block_by_name(&self, name: &str) -> Option<GraphNode> {
131        let block_indexes = self.cc.block_indexes.borrow();
132        let matches = block_indexes.find_by_name(name);
133
134        matches.first().map(|(unit_index, _, block_id)| GraphNode {
135            unit_index: *unit_index,
136            block_id: *block_id,
137        })
138    }
139
140    pub fn blocks_by_name(&self, name: &str) -> Vec<GraphNode> {
141        let block_indexes = self.cc.block_indexes.borrow();
142        let matches = block_indexes.find_by_name(name);
143
144        matches
145            .into_iter()
146            .map(|(unit_index, _, block_id)| GraphNode {
147                unit_index,
148                block_id,
149            })
150            .collect()
151    }
152
153    pub fn block_by_name_in(&self, unit_index: usize, name: &str) -> Option<GraphNode> {
154        let block_indexes = self.cc.block_indexes.borrow();
155        let matches = block_indexes.find_by_name(name);
156
157        matches
158            .iter()
159            .find(|(u, _, _)| *u == unit_index)
160            .map(|(_, _, block_id)| GraphNode {
161                unit_index,
162                block_id: *block_id,
163            })
164    }
165
166    pub fn blocks_by_kind(&self, block_kind: BlockKind) -> Vec<GraphNode> {
167        let block_indexes = self.cc.block_indexes.borrow();
168        let matches = block_indexes.find_by_kind(block_kind);
169
170        matches
171            .into_iter()
172            .map(|(unit_index, _, block_id)| GraphNode {
173                unit_index,
174                block_id,
175            })
176            .collect()
177    }
178
179    pub fn blocks_by_kind_in(&self, block_kind: BlockKind, unit_index: usize) -> Vec<GraphNode> {
180        let block_indexes = self.cc.block_indexes.borrow();
181        let block_ids = block_indexes.find_by_kind_and_unit(block_kind, unit_index);
182
183        block_ids
184            .into_iter()
185            .map(|block_id| GraphNode {
186                unit_index,
187                block_id,
188            })
189            .collect()
190    }
191
192    pub fn blocks_in(&self, unit_index: usize) -> Vec<GraphNode> {
193        let block_indexes = self.cc.block_indexes.borrow();
194        let matches = block_indexes.find_by_unit(unit_index);
195
196        matches
197            .into_iter()
198            .map(|(_, _, block_id)| GraphNode {
199                unit_index,
200                block_id,
201            })
202            .collect()
203    }
204
205    pub fn block_info(&self, block_id: BlockId) -> Option<(usize, Option<String>, BlockKind)> {
206        let block_indexes = self.cc.block_indexes.borrow();
207        block_indexes.get_block_info(block_id)
208    }
209
210    pub fn find_related_blocks(
211        &self,
212        node: GraphNode,
213        relations: Vec<BlockRelation>,
214    ) -> Vec<GraphNode> {
215        if node.unit_index >= self.units.len() {
216            return Vec::new();
217        }
218
219        let unit = &self.units[node.unit_index];
220        let mut result = Vec::new();
221
222        for relation in relations {
223            match relation {
224                BlockRelation::DependsOn => {
225                    // Get all blocks that this block depends on
226                    let dependencies = unit
227                        .edges
228                        .get_related(node.block_id, BlockRelation::DependsOn);
229                    for dep_block_id in dependencies {
230                        result.push(GraphNode {
231                            unit_index: node.unit_index,
232                            block_id: dep_block_id,
233                        });
234                    }
235                }
236                BlockRelation::DependedBy => {
237                    let mut seen = HashSet::new();
238
239                    // Direct dependents tracked on this unit (covers cross-unit edges too)
240                    let dependents = unit
241                        .edges
242                        .get_related(node.block_id, BlockRelation::DependedBy);
243                    if !dependents.is_empty() {
244                        let indexes = self.cc.block_indexes.borrow();
245                        for dep_block_id in dependents {
246                            if !seen.insert(dep_block_id) {
247                                continue;
248                            }
249                            if let Some((dep_unit_idx, _, _)) = indexes.get_block_info(dep_block_id)
250                            {
251                                result.push(GraphNode {
252                                    unit_index: dep_unit_idx,
253                                    block_id: dep_block_id,
254                                });
255                            } else {
256                                result.push(GraphNode {
257                                    unit_index: node.unit_index,
258                                    block_id: dep_block_id,
259                                });
260                            }
261                        }
262                    }
263
264                    // Fallback: scan current unit for reverse DependsOn edges
265                    let local_dependents = unit
266                        .edges
267                        .find_reverse_relations(node.block_id, BlockRelation::DependsOn);
268                    for dep_block_id in local_dependents {
269                        if !seen.insert(dep_block_id) {
270                            continue;
271                        }
272                        result.push(GraphNode {
273                            unit_index: node.unit_index,
274                            block_id: dep_block_id,
275                        });
276                    }
277                }
278                BlockRelation::Unknown => {
279                    // Skip unknown relations
280                }
281            }
282        }
283
284        result
285    }
286
287    pub fn find_dpends_blocks_recursive(&self, node: GraphNode) -> HashSet<GraphNode> {
288        let mut visited = HashSet::new();
289        let mut stack = vec![node];
290        let relations = vec![BlockRelation::DependsOn];
291
292        while let Some(current) = stack.pop() {
293            if visited.contains(&current) {
294                continue;
295            }
296            visited.insert(current);
297
298            for related in self.find_related_blocks(current, relations.clone()) {
299                if !visited.contains(&related) {
300                    stack.push(related);
301                }
302            }
303        }
304
305        visited.remove(&node);
306        visited
307    }
308
309    pub fn traverse_bfs<F>(&self, start: GraphNode, mut callback: F)
310    where
311        F: FnMut(GraphNode),
312    {
313        let mut visited = HashSet::new();
314        let mut queue = vec![start];
315        let relations = vec![BlockRelation::DependsOn, BlockRelation::DependedBy];
316
317        while !queue.is_empty() {
318            let current = queue.remove(0);
319            if visited.contains(&current) {
320                continue;
321            }
322            visited.insert(current);
323            callback(current);
324
325            for related in self.find_related_blocks(current, relations.clone()) {
326                if !visited.contains(&related) {
327                    queue.push(related);
328                }
329            }
330        }
331    }
332
333    pub fn traverse_dfs<F>(&self, start: GraphNode, mut callback: F)
334    where
335        F: FnMut(GraphNode),
336    {
337        let mut visited = HashSet::new();
338        self.traverse_dfs_impl(start, &mut visited, &mut callback);
339    }
340
341    fn traverse_dfs_impl<F>(
342        &self,
343        node: GraphNode,
344        visited: &mut HashSet<GraphNode>,
345        callback: &mut F,
346    ) where
347        F: FnMut(GraphNode),
348    {
349        if visited.contains(&node) {
350            return;
351        }
352        visited.insert(node);
353        callback(node);
354
355        let relations = vec![BlockRelation::DependsOn, BlockRelation::DependedBy];
356        for related in self.find_related_blocks(node, relations) {
357            if !visited.contains(&related) {
358                self.traverse_dfs_impl(related, visited, callback);
359            }
360        }
361    }
362
363    pub fn get_block_depends(&self, node: GraphNode) -> HashSet<GraphNode> {
364        if node.unit_index >= self.units.len() {
365            return HashSet::new();
366        }
367
368        let unit = &self.units[node.unit_index];
369        let mut result = HashSet::new();
370        let mut visited = HashSet::new();
371        let mut stack = vec![node.block_id];
372
373        while let Some(current_block) = stack.pop() {
374            if visited.contains(&current_block) {
375                continue;
376            }
377            visited.insert(current_block);
378
379            let dependencies = unit
380                .edges
381                .get_related(current_block, BlockRelation::DependsOn);
382            for dep_block_id in dependencies {
383                if dep_block_id != node.block_id {
384                    result.insert(GraphNode {
385                        unit_index: node.unit_index,
386                        block_id: dep_block_id,
387                    });
388                    stack.push(dep_block_id);
389                }
390            }
391        }
392
393        result
394    }
395
396    pub fn get_block_depended(&self, node: GraphNode) -> HashSet<GraphNode> {
397        if node.unit_index >= self.units.len() {
398            return HashSet::new();
399        }
400
401        let unit = &self.units[node.unit_index];
402        let mut result = HashSet::new();
403        let mut visited = HashSet::new();
404        let mut stack = vec![node.block_id];
405
406        while let Some(current_block) = stack.pop() {
407            if visited.contains(&current_block) {
408                continue;
409            }
410            visited.insert(current_block);
411
412            let dependencies = unit
413                .edges
414                .get_related(current_block, BlockRelation::DependedBy);
415            for dep_block_id in dependencies {
416                if dep_block_id != node.block_id {
417                    result.insert(GraphNode {
418                        unit_index: node.unit_index,
419                        block_id: dep_block_id,
420                    });
421                    stack.push(dep_block_id);
422                }
423            }
424        }
425
426        result
427    }
428
429    fn add_cross_edge(
430        &self,
431        from_idx: usize,
432        to_idx: usize,
433        from_block: BlockId,
434        to_block: BlockId,
435    ) {
436        if from_idx == to_idx {
437            let unit = &self.units[from_idx];
438            if !unit
439                .edges
440                .has_relation(from_block, BlockRelation::DependsOn, to_block)
441            {
442                unit.edges.add_relation(from_block, to_block);
443            }
444            return;
445        }
446
447        let from_unit = &self.units[from_idx];
448        from_unit
449            .edges
450            .add_relation_if_not_exists(from_block, BlockRelation::DependsOn, to_block);
451
452        let to_unit = &self.units[to_idx];
453        to_unit
454            .edges
455            .add_relation_if_not_exists(to_block, BlockRelation::DependedBy, from_block);
456    }
457}
458
459#[derive(Debug)]
460struct GraphBuilder<'tcx, Language> {
461    unit: CompileUnit<'tcx>,
462    root: Option<BlockId>,
463    children_stack: Vec<Vec<BlockId>>,
464    _marker: PhantomData<Language>,
465}
466
467impl<'tcx, Language: LanguageTrait> GraphBuilder<'tcx, Language> {
468    fn new(unit: CompileUnit<'tcx>) -> Self {
469        Self {
470            unit,
471            root: None,
472            children_stack: Vec::new(),
473            _marker: PhantomData,
474        }
475    }
476
477    fn next_id(&self) -> BlockId {
478        self.unit.reserve_block_id()
479    }
480
481    fn create_block(
482        &self,
483        id: BlockId,
484        node: HirNode<'tcx>,
485        kind: BlockKind,
486        parent: Option<BlockId>,
487        children: Vec<BlockId>,
488    ) -> BasicBlock<'tcx> {
489        let arena = &self.unit.cc.block_arena;
490        match kind {
491            BlockKind::Root => {
492                // Extract file_name from HirFile node if available
493                let file_name = node.as_file().map(|file| file.file_path.clone());
494                let block = BlockRoot::from_hir(id, node, parent, children, file_name);
495                BasicBlock::Root(arena.alloc(block))
496            }
497            BlockKind::Func => {
498                let block = BlockFunc::from_hir(id, node, parent, children);
499                BasicBlock::Func(arena.alloc(block))
500            }
501            BlockKind::Class => {
502                let block = BlockClass::from_hir(id, node, parent, children);
503                BasicBlock::Class(arena.alloc(block))
504            }
505            BlockKind::Stmt => {
506                let stmt = BlockStmt::from_hir(id, node, parent, children);
507                BasicBlock::Stmt(arena.alloc(stmt))
508            }
509            BlockKind::Call => {
510                let stmt = BlockCall::from_hir(id, node, parent, children);
511                BasicBlock::Call(arena.alloc(stmt))
512            }
513            BlockKind::Enum => {
514                let enum_ty = BlockEnum::from_hir(id, node, parent, children);
515                BasicBlock::Enum(arena.alloc(enum_ty))
516            }
517            BlockKind::Const => {
518                let stmt = BlockConst::from_hir(id, node, parent, children);
519                BasicBlock::Const(arena.alloc(stmt))
520            }
521            BlockKind::Impl => {
522                let block = BlockImpl::from_hir(id, node, parent, children);
523                BasicBlock::Impl(arena.alloc(block))
524            }
525            BlockKind::Field => {
526                let block = BlockField::from_hir(id, node, parent, children);
527                BasicBlock::Field(arena.alloc(block))
528            }
529            _ => {
530                panic!("unknown block kind: {}", kind)
531            }
532        }
533    }
534
535    fn build_edges(&self, node: HirNode<'tcx>) -> BlockRelationMap {
536        let edges = BlockRelationMap::default();
537        let mut visited = HashSet::new();
538        let mut unresolved = HashSet::new();
539        self.collect_edges(node, &edges, &mut visited, &mut unresolved);
540        edges
541    }
542
543    fn collect_edges(
544        &self,
545        node: HirNode<'tcx>,
546        edges: &BlockRelationMap,
547        visited: &mut HashSet<SymId>,
548        unresolved: &mut HashSet<SymId>,
549    ) {
550        // Try to process symbol dependencies for this node
551        if let Some(scope) = self.unit.opt_get_scope(node.hir_id()) {
552            if let Some(symbol) = scope.symbol() {
553                self.process_symbol(symbol, edges, visited, unresolved);
554            }
555        }
556
557        // Recurse into children
558        for &child_id in node.children() {
559            let child = self.unit.hir_node(child_id);
560            self.collect_edges(child, edges, visited, unresolved);
561        }
562    }
563
564    fn process_symbol(
565        &self,
566        symbol: &'tcx Symbol,
567        edges: &BlockRelationMap,
568        visited: &mut HashSet<SymId>,
569        unresolved: &mut HashSet<SymId>,
570    ) {
571        let symbol_id = symbol.id;
572
573        // Avoid processing the same symbol twice
574        if !visited.insert(symbol_id) {
575            return;
576        }
577
578        let Some(from_block) = symbol.block_id() else {
579            return;
580        };
581
582        for &dep_id in symbol.depends.borrow().iter() {
583            self.link_dependency(dep_id, from_block, edges, unresolved);
584        }
585    }
586    fn link_dependency(
587        &self,
588        dep_id: SymId,
589        from_block: BlockId,
590        edges: &BlockRelationMap,
591        unresolved: &mut HashSet<SymId>,
592    ) {
593        // If target symbol exists and has a block, add the dependency edge
594        if let Some(target_symbol) = self.unit.opt_get_symbol(dep_id) {
595            if let Some(to_block) = target_symbol.block_id() {
596                if !edges.has_relation(from_block, BlockRelation::DependsOn, to_block) {
597                    edges.add_relation(from_block, to_block);
598                }
599                let target_unit = target_symbol.unit_index();
600                if target_unit.is_some()
601                    && target_unit != Some(self.unit.index)
602                    && unresolved.insert(dep_id)
603                {
604                    self.unit.add_unresolved_symbol(target_symbol);
605                }
606                return;
607            }
608
609            // Target symbol exists but block not yet known
610            if unresolved.insert(dep_id) {
611                self.unit.add_unresolved_symbol(target_symbol);
612            }
613            return;
614        }
615
616        // Target symbol not found at all
617        unresolved.insert(dep_id);
618    }
619
620    fn build_block(&mut self, node: HirNode<'tcx>, parent: BlockId, recursive: bool) {
621        let id = self.next_id();
622        let block_kind = Language::block_kind(node.kind_id());
623        assert_ne!(block_kind, BlockKind::Undefined);
624
625        if self.root.is_none() {
626            self.root = Some(id);
627        }
628
629        let children = if recursive {
630            self.children_stack.push(Vec::new());
631            self.visit_children(node, id);
632
633            self.children_stack.pop().unwrap()
634        } else {
635            Vec::new()
636        };
637
638        let block = self.create_block(id, node, block_kind, Some(parent), children);
639        if let Some(scope) = self.unit.opt_get_scope(node.hir_id()) {
640            if let Some(symbol) = scope.symbol() {
641                // Only set the block ID if it hasn't been set before
642                // This prevents impl blocks from overwriting struct block IDs
643                if symbol.block_id().is_none() {
644                    symbol.set_block_id(Some(id));
645                }
646            }
647        }
648        self.unit.insert_block(id, block, parent);
649
650        if let Some(children) = self.children_stack.last_mut() {
651            children.push(id);
652        }
653    }
654}
655
656impl<'tcx, Language: LanguageTrait> HirVisitor<'tcx> for GraphBuilder<'tcx, Language> {
657    fn unit(&self) -> CompileUnit<'tcx> {
658        self.unit
659    }
660
661    fn visit_file(&mut self, node: HirNode<'tcx>, parent: BlockId) {
662        self.children_stack.push(Vec::new());
663        self.build_block(node, parent, true);
664    }
665
666    fn visit_internal(&mut self, node: HirNode<'tcx>, parent: BlockId) {
667        if Language::block_kind(node.kind_id()) != BlockKind::Undefined {
668            self.build_block(node, parent, false);
669        } else {
670            self.visit_children(node, parent);
671        }
672    }
673
674    fn visit_scope(&mut self, node: HirNode<'tcx>, parent: BlockId) {
675        match Language::block_kind(node.kind_id()) {
676            BlockKind::Func
677            | BlockKind::Class
678            | BlockKind::Enum
679            | BlockKind::Const
680            | BlockKind::Impl
681            | BlockKind::Field => self.build_block(node, parent, true),
682            _ => self.visit_children(node, parent),
683        }
684    }
685}
686
687pub fn build_llmcc_graph<'tcx, L: LanguageTrait>(
688    unit: CompileUnit<'tcx>,
689    unit_index: usize,
690) -> Result<UnitGraph, Box<dyn std::error::Error>> {
691    let root_hir = unit
692        .file_start_hir_id()
693        .ok_or("missing file start HIR id")?;
694    let mut builder = GraphBuilder::<L>::new(unit);
695    let root_node = unit.hir_node(root_hir);
696    builder.visit_node(root_node, BlockId::ROOT_PARENT);
697
698    let root_block = builder.root;
699    let root_block = root_block.ok_or("graph builder produced no root")?;
700    let edges = builder.build_edges(root_node);
701    Ok(UnitGraph::new(unit_index, root_block, edges))
702}