llmcc_core/
graph_builder.rs

1use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet};
2use std::marker::PhantomData;
3use std::path::Path;
4
5pub use crate::block::{BasicBlock, BlockId, BlockKind, BlockRelation};
6use crate::block::{
7    BlockCall, BlockClass, BlockConst, BlockEnum, BlockField, BlockFunc, BlockImpl, BlockRoot,
8    BlockStmt,
9};
10use crate::block_rel::BlockRelationMap;
11use crate::context::{CompileCtxt, CompileUnit};
12use crate::ir::HirNode;
13use crate::lang_def::LanguageTrait;
14use crate::pagerank::PageRanker;
15use crate::symbol::{SymId, Symbol};
16use crate::visit::HirVisitor;
17
18const COMPACT_INTERESTING_KINDS: [BlockKind; 2] = [BlockKind::Class, BlockKind::Enum];
19
20#[derive(Debug, Clone)]
21pub struct UnitGraph {
22    /// Compile unit this graph belongs to
23    unit_index: usize,
24    /// Root block ID of this unit
25    root: BlockId,
26    /// Edges of this graph unit
27    edges: BlockRelationMap,
28}
29
30impl UnitGraph {
31    pub fn new(unit_index: usize, root: BlockId, edges: BlockRelationMap) -> Self {
32        Self {
33            unit_index,
34            root,
35            edges,
36        }
37    }
38
39    pub fn unit_index(&self) -> usize {
40        self.unit_index
41    }
42
43    pub fn root(&self) -> BlockId {
44        self.root
45    }
46
47    pub fn edges(&self) -> &BlockRelationMap {
48        &self.edges
49    }
50}
51
52#[derive(Debug, Clone, Copy, Default)]
53pub struct GraphBuildConfig {
54    pub compact: bool,
55}
56
57impl GraphBuildConfig {
58    pub fn compact() -> Self {
59        Self { compact: true }
60    }
61}
62
63#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
64pub struct GraphNode {
65    pub unit_index: usize,
66    pub block_id: BlockId,
67}
68
69/// ProjectGraph represents a complete compilation project with all units and their inter-dependencies.
70///
71/// # Overview
72/// ProjectGraph maintains a collection of per-unit compilation graphs (UnitGraph) and facilitates
73/// cross-unit dependency resolution. It provides efficient multi-dimensional indexing for block
74/// lookups by name, kind, unit, and ID, enabling quick context retrieval for LLM consumption.
75///
76/// # Architecture
77/// The graph consists of:
78/// - **UnitGraphs**: One per compilation unit (file), containing blocks and intra-unit relations
79/// - **Block Indexes**: Multi-dimensional indexes via BlockIndexMaps for O(1) to O(log n) lookups
80/// - **Cross-unit Links**: Dependencies tracked between blocks across different units
81///
82/// # Primary Use Cases
83/// 1. **Symbol Resolution**: Find blocks by name across the entire project
84/// 2. **Context Gathering**: Collect all related blocks for code analysis
85/// 3. **LLM Serialization**: Export graph as text or JSON for LLM model consumption
86/// 4. **Dependency Analysis**: Traverse dependency graphs to understand block relationships
87///
88#[derive(Debug)]
89pub struct ProjectGraph<'tcx> {
90    /// Reference to the compilation context containing all symbols, HIR nodes, and blocks
91    pub cc: &'tcx CompileCtxt<'tcx>,
92    /// Per-unit graphs containing blocks and intra-unit relations
93    units: Vec<UnitGraph>,
94    compact_rank_limit: Option<usize>,
95    pagerank_enabled: bool,
96}
97
98impl<'tcx> ProjectGraph<'tcx> {
99    pub fn new(cc: &'tcx CompileCtxt<'tcx>) -> Self {
100        Self {
101            cc,
102            units: Vec::new(),
103            compact_rank_limit: None,
104            pagerank_enabled: false,
105        }
106    }
107
108    pub fn add_child(&mut self, graph: UnitGraph) {
109        self.units.push(graph);
110    }
111
112    /// Configure the number of PageRank-filtered nodes retained when rendering compact graphs.
113    pub fn set_compact_rank_limit(&mut self, limit: Option<usize>) {
114        self.compact_rank_limit = match limit {
115            Some(0) => None,
116            other => other,
117        };
118        self.pagerank_enabled = self.compact_rank_limit.is_some();
119    }
120
121    pub fn link_units(&mut self) {
122        if self.units.is_empty() {
123            return;
124        }
125
126        let mut unresolved = self.cc.unresolve_symbols.borrow_mut();
127
128        unresolved.retain(|symbol_ref| {
129            let target = *symbol_ref;
130            let Some(target_block) = target.block_id() else {
131                return false;
132            };
133
134            let dependents: Vec<SymId> = target.depended.borrow().clone();
135            for dependent_id in dependents {
136                let Some(source_symbol) = self.cc.opt_get_symbol(dependent_id) else {
137                    continue;
138                };
139                let Some(from_block) = source_symbol.block_id() else {
140                    continue;
141                };
142                self.add_cross_edge(
143                    source_symbol.unit_index().unwrap(),
144                    target.unit_index().unwrap(),
145                    from_block,
146                    target_block,
147                );
148            }
149
150            false
151        });
152    }
153
154    pub fn units(&self) -> &[UnitGraph] {
155        &self.units
156    }
157
158    pub fn unit_graph(&self, unit_index: usize) -> Option<&UnitGraph> {
159        self.units
160            .iter()
161            .find(|unit| unit.unit_index() == unit_index)
162    }
163
164    pub fn block_by_name(&self, name: &str) -> Option<GraphNode> {
165        let block_indexes = self.cc.block_indexes.borrow();
166        let matches = block_indexes.find_by_name(name);
167
168        matches.first().map(|(unit_index, _, block_id)| GraphNode {
169            unit_index: *unit_index,
170            block_id: *block_id,
171        })
172    }
173
174    pub fn blocks_by_name(&self, name: &str) -> Vec<GraphNode> {
175        let block_indexes = self.cc.block_indexes.borrow();
176        let matches = block_indexes.find_by_name(name);
177
178        matches
179            .into_iter()
180            .map(|(unit_index, _, block_id)| GraphNode {
181                unit_index,
182                block_id,
183            })
184            .collect()
185    }
186
187    pub fn render_compact_graph(&self) -> String {
188        self.render_compact_graph_inner(self.compact_rank_limit)
189    }
190
191    fn ranked_block_filter(
192        &self,
193        top_k: Option<usize>,
194        interesting_kinds: &[BlockKind],
195    ) -> Option<HashSet<BlockId>> {
196        let ranked_order = top_k.and_then(|limit| {
197            let ranker = PageRanker::new(self);
198            let mut collected = Vec::new();
199
200            for ranked in ranker.rank() {
201                if interesting_kinds.contains(&ranked.kind) {
202                    collected.push(ranked.node.block_id);
203                }
204                if collected.len() >= limit {
205                    break;
206                }
207            }
208
209            if collected.is_empty() {
210                None
211            } else {
212                Some(collected)
213            }
214        });
215
216        ranked_order.map(|ordered| ordered.into_iter().collect())
217    }
218
219    fn collect_compact_nodes(
220        &self,
221        interesting_kinds: &[BlockKind],
222        ranked_filter: Option<&HashSet<BlockId>>,
223    ) -> Vec<CompactNode> {
224        let block_indexes = self.cc.block_indexes.borrow();
225        block_indexes
226            .block_id_index
227            .iter()
228            .filter_map(|(&block_id, (unit_index, name_opt, kind))| {
229                if !interesting_kinds.contains(kind) {
230                    return None;
231                }
232
233                if let Some(ids) = ranked_filter {
234                    if !ids.contains(&block_id) {
235                        return None;
236                    }
237                }
238
239                let unit = self.cc.compile_unit(*unit_index);
240                let block = unit.bb(block_id);
241                let display_name = name_opt
242                    .clone()
243                    .or_else(|| {
244                        block
245                            .base()
246                            .and_then(|base| base.opt_get_name().map(|s| s.to_string()))
247                    })
248                    .unwrap_or_else(|| format!("{}:{}", kind, block_id.as_u32()));
249
250                let raw_path = unit
251                    .file_path()
252                    .or_else(|| unit.file().path())
253                    .unwrap_or("<unknown>");
254
255                let path = std::fs::canonicalize(raw_path)
256                    .map(|p| p.to_string_lossy().to_string())
257                    .unwrap_or_else(|_| raw_path.to_string());
258
259                let location = block
260                    .opt_node()
261                    .and_then(|node| {
262                        unit.file()
263                            .file
264                            .content
265                            .as_ref()
266                            .map(|bytes| compact_byte_to_line(bytes.as_slice(), node.start_byte()))
267                            .map(|line| format!("{path}:{line}"))
268                    })
269                    .or(Some(path.clone()));
270
271                let group = location
272                    .as_ref()
273                    .map(|loc| extract_group_path(loc))
274                    .unwrap_or_else(|| extract_group_path(&path));
275
276                Some(CompactNode {
277                    block_id,
278                    unit_index: *unit_index,
279                    name: display_name,
280                    location,
281                    group,
282                })
283            })
284            .collect()
285    }
286
287    fn collect_sorted_compact_nodes(&self, top_k: Option<usize>) -> Vec<CompactNode> {
288        let ranked_filter = if self.pagerank_enabled {
289            self.ranked_block_filter(top_k, &COMPACT_INTERESTING_KINDS)
290        } else {
291            None
292        };
293        let mut nodes =
294            self.collect_compact_nodes(&COMPACT_INTERESTING_KINDS, ranked_filter.as_ref());
295        nodes.sort_by(|a, b| a.name.cmp(&b.name));
296        nodes
297    }
298
299    fn collect_compact_edges(
300        &self,
301        nodes: &[CompactNode],
302        node_index: &HashMap<BlockId, usize>,
303    ) -> BTreeSet<(usize, usize)> {
304        let mut edges = BTreeSet::new();
305
306        for node in nodes {
307            let Some(unit_graph) = self.unit_graph(node.unit_index) else {
308                continue;
309            };
310            let from_idx = node_index[&node.block_id];
311
312            let dependencies = unit_graph
313                .edges()
314                .get_related(node.block_id, BlockRelation::DependsOn);
315
316            for dep_block_id in dependencies {
317                if let Some(&to_idx) = node_index.get(&dep_block_id) {
318                    edges.insert((from_idx, to_idx));
319                }
320            }
321        }
322
323        edges
324    }
325
326    pub fn block_by_name_in(&self, unit_index: usize, name: &str) -> Option<GraphNode> {
327        let block_indexes = self.cc.block_indexes.borrow();
328        let matches = block_indexes.find_by_name(name);
329
330        matches
331            .iter()
332            .find(|(u, _, _)| *u == unit_index)
333            .map(|(_, _, block_id)| GraphNode {
334                unit_index,
335                block_id: *block_id,
336            })
337    }
338
339    pub fn blocks_by_kind(&self, block_kind: BlockKind) -> Vec<GraphNode> {
340        let block_indexes = self.cc.block_indexes.borrow();
341        let matches = block_indexes.find_by_kind(block_kind);
342
343        matches
344            .into_iter()
345            .map(|(unit_index, _, block_id)| GraphNode {
346                unit_index,
347                block_id,
348            })
349            .collect()
350    }
351
352    pub fn blocks_by_kind_in(&self, block_kind: BlockKind, unit_index: usize) -> Vec<GraphNode> {
353        let block_indexes = self.cc.block_indexes.borrow();
354        let block_ids = block_indexes.find_by_kind_and_unit(block_kind, unit_index);
355
356        block_ids
357            .into_iter()
358            .map(|block_id| GraphNode {
359                unit_index,
360                block_id,
361            })
362            .collect()
363    }
364
365    pub fn blocks_in(&self, unit_index: usize) -> Vec<GraphNode> {
366        let block_indexes = self.cc.block_indexes.borrow();
367        let matches = block_indexes.find_by_unit(unit_index);
368
369        matches
370            .into_iter()
371            .map(|(_, _, block_id)| GraphNode {
372                unit_index,
373                block_id,
374            })
375            .collect()
376    }
377
378    pub fn block_info(&self, block_id: BlockId) -> Option<(usize, Option<String>, BlockKind)> {
379        let block_indexes = self.cc.block_indexes.borrow();
380        block_indexes.get_block_info(block_id)
381    }
382
383    pub fn find_related_blocks(
384        &self,
385        node: GraphNode,
386        relations: Vec<BlockRelation>,
387    ) -> Vec<GraphNode> {
388        if node.unit_index >= self.units.len() {
389            return Vec::new();
390        }
391
392        let unit = &self.units[node.unit_index];
393        let mut result = Vec::new();
394
395        for relation in relations {
396            match relation {
397                BlockRelation::DependsOn => {
398                    // Get all blocks that this block depends on
399                    let dependencies = unit
400                        .edges
401                        .get_related(node.block_id, BlockRelation::DependsOn);
402                    let block_indexes = self.cc.block_indexes.borrow();
403                    for dep_block_id in dependencies {
404                        let dep_unit_index = block_indexes
405                            .get_block_info(dep_block_id)
406                            .map(|(idx, _, _)| idx)
407                            .unwrap_or(node.unit_index);
408                        result.push(GraphNode {
409                            unit_index: dep_unit_index,
410                            block_id: dep_block_id,
411                        });
412                    }
413                }
414                BlockRelation::DependedBy => {
415                    let mut seen = HashSet::new();
416
417                    // Direct dependents tracked on this unit (covers cross-unit edges too)
418                    let dependents = unit
419                        .edges
420                        .get_related(node.block_id, BlockRelation::DependedBy);
421                    if !dependents.is_empty() {
422                        let indexes = self.cc.block_indexes.borrow();
423                        for dep_block_id in dependents {
424                            if !seen.insert(dep_block_id) {
425                                continue;
426                            }
427                            if let Some((dep_unit_idx, _, _)) = indexes.get_block_info(dep_block_id)
428                            {
429                                result.push(GraphNode {
430                                    unit_index: dep_unit_idx,
431                                    block_id: dep_block_id,
432                                });
433                            } else {
434                                result.push(GraphNode {
435                                    unit_index: node.unit_index,
436                                    block_id: dep_block_id,
437                                });
438                            }
439                        }
440                    }
441
442                    // Fallback: scan current unit for reverse DependsOn edges
443                    let local_dependents = unit
444                        .edges
445                        .find_reverse_relations(node.block_id, BlockRelation::DependsOn);
446                    for dep_block_id in local_dependents {
447                        if !seen.insert(dep_block_id) {
448                            continue;
449                        }
450                        result.push(GraphNode {
451                            unit_index: node.unit_index,
452                            block_id: dep_block_id,
453                        });
454                    }
455                }
456                BlockRelation::Unknown => {
457                    // Skip unknown relations
458                }
459            }
460        }
461
462        result
463    }
464
465    pub fn find_dpends_blocks_recursive(&self, node: GraphNode) -> HashSet<GraphNode> {
466        let mut visited = HashSet::new();
467        let mut stack = vec![node];
468        let relations = vec![BlockRelation::DependsOn];
469
470        while let Some(current) = stack.pop() {
471            if visited.contains(&current) {
472                continue;
473            }
474            visited.insert(current);
475
476            for related in self.find_related_blocks(current, relations.clone()) {
477                if !visited.contains(&related) {
478                    stack.push(related);
479                }
480            }
481        }
482
483        visited.remove(&node);
484        visited
485    }
486
487    pub fn find_depended_blocks_recursive(&self, node: GraphNode) -> HashSet<GraphNode> {
488        let mut visited = HashSet::new();
489        let mut stack = vec![node];
490        let relations = vec![BlockRelation::DependedBy];
491
492        while let Some(current) = stack.pop() {
493            if visited.contains(&current) {
494                continue;
495            }
496            visited.insert(current);
497
498            for related in self.find_related_blocks(current, relations.clone()) {
499                if !visited.contains(&related) {
500                    stack.push(related);
501                }
502            }
503        }
504
505        visited.remove(&node);
506        visited
507    }
508
509    pub fn traverse_bfs<F>(&self, start: GraphNode, mut callback: F)
510    where
511        F: FnMut(GraphNode),
512    {
513        let mut visited = HashSet::new();
514        let mut queue = vec![start];
515        let relations = vec![BlockRelation::DependsOn, BlockRelation::DependedBy];
516
517        while !queue.is_empty() {
518            let current = queue.remove(0);
519            if visited.contains(&current) {
520                continue;
521            }
522            visited.insert(current);
523            callback(current);
524
525            for related in self.find_related_blocks(current, relations.clone()) {
526                if !visited.contains(&related) {
527                    queue.push(related);
528                }
529            }
530        }
531    }
532
533    pub fn traverse_dfs<F>(&self, start: GraphNode, mut callback: F)
534    where
535        F: FnMut(GraphNode),
536    {
537        let mut visited = HashSet::new();
538        self.traverse_dfs_impl(start, &mut visited, &mut callback);
539    }
540
541    fn traverse_dfs_impl<F>(
542        &self,
543        node: GraphNode,
544        visited: &mut HashSet<GraphNode>,
545        callback: &mut F,
546    ) where
547        F: FnMut(GraphNode),
548    {
549        if visited.contains(&node) {
550            return;
551        }
552        visited.insert(node);
553        callback(node);
554
555        let relations = vec![BlockRelation::DependsOn, BlockRelation::DependedBy];
556        for related in self.find_related_blocks(node, relations) {
557            if !visited.contains(&related) {
558                self.traverse_dfs_impl(related, visited, callback);
559            }
560        }
561    }
562
563    pub fn get_block_depends(&self, node: GraphNode) -> HashSet<GraphNode> {
564        if node.unit_index >= self.units.len() {
565            return HashSet::new();
566        }
567
568        let unit = &self.units[node.unit_index];
569        let mut result = HashSet::new();
570        let mut visited = HashSet::new();
571        let mut stack = vec![node.block_id];
572        let block_indexes = self.cc.block_indexes.borrow();
573
574        while let Some(current_block) = stack.pop() {
575            if visited.contains(&current_block) {
576                continue;
577            }
578            visited.insert(current_block);
579
580            let dependencies = unit
581                .edges
582                .get_related(current_block, BlockRelation::DependsOn);
583            for dep_block_id in dependencies {
584                if dep_block_id != node.block_id {
585                    let dep_unit_index = block_indexes
586                        .get_block_info(dep_block_id)
587                        .map(|(idx, _, _)| idx)
588                        .unwrap_or(node.unit_index);
589                    result.insert(GraphNode {
590                        unit_index: dep_unit_index,
591                        block_id: dep_block_id,
592                    });
593                    stack.push(dep_block_id);
594                }
595            }
596        }
597
598        result
599    }
600
601    pub fn get_block_depended(&self, node: GraphNode) -> HashSet<GraphNode> {
602        if node.unit_index >= self.units.len() {
603            return HashSet::new();
604        }
605
606        let unit = &self.units[node.unit_index];
607        let mut result = HashSet::new();
608        let mut visited = HashSet::new();
609        let mut stack = vec![node.block_id];
610        let block_indexes = self.cc.block_indexes.borrow();
611
612        while let Some(current_block) = stack.pop() {
613            if visited.contains(&current_block) {
614                continue;
615            }
616            visited.insert(current_block);
617
618            let dependencies = unit
619                .edges
620                .get_related(current_block, BlockRelation::DependedBy);
621            for dep_block_id in dependencies {
622                if dep_block_id != node.block_id {
623                    let dep_unit_index = block_indexes
624                        .get_block_info(dep_block_id)
625                        .map(|(idx, _, _)| idx)
626                        .unwrap_or(node.unit_index);
627                    result.insert(GraphNode {
628                        unit_index: dep_unit_index,
629                        block_id: dep_block_id,
630                    });
631                    stack.push(dep_block_id);
632                }
633            }
634        }
635
636        result
637    }
638
639    fn render_compact_graph_inner(&self, top_k: Option<usize>) -> String {
640        let nodes = self.collect_sorted_compact_nodes(top_k);
641
642        if nodes.is_empty() {
643            return "digraph CompactProject {\n}\n".to_string();
644        }
645
646        let node_index = build_compact_node_index(&nodes);
647        let edges = self.collect_compact_edges(&nodes, &node_index);
648
649        let pruned = prune_compact_components(&nodes, &edges);
650        if pruned.nodes.is_empty() {
651            return "digraph CompactProject {\n}\n".to_string();
652        }
653
654        let reduced_edges = reduce_transitive_edges(&pruned.nodes, &pruned.edges);
655
656        render_compact_dot(&pruned.nodes, &reduced_edges)
657    }
658
659    fn add_cross_edge(
660        &self,
661        from_idx: usize,
662        to_idx: usize,
663        from_block: BlockId,
664        to_block: BlockId,
665    ) {
666        if from_idx == to_idx {
667            let unit = &self.units[from_idx];
668            if !unit
669                .edges
670                .has_relation(from_block, BlockRelation::DependsOn, to_block)
671            {
672                unit.edges.add_relation(from_block, to_block);
673            }
674            return;
675        }
676
677        let from_unit = &self.units[from_idx];
678        from_unit
679            .edges
680            .add_relation_if_not_exists(from_block, BlockRelation::DependsOn, to_block);
681
682        let to_unit = &self.units[to_idx];
683        to_unit
684            .edges
685            .add_relation_if_not_exists(to_block, BlockRelation::DependedBy, from_block);
686    }
687}
688
689#[derive(Debug)]
690struct GraphBuilder<'tcx, Language> {
691    unit: CompileUnit<'tcx>,
692    root: Option<BlockId>,
693    children_stack: Vec<Vec<BlockId>>,
694    _config: GraphBuildConfig,
695    _marker: PhantomData<Language>,
696}
697
698impl<'tcx, Language: LanguageTrait> GraphBuilder<'tcx, Language> {
699    fn new(unit: CompileUnit<'tcx>, _config: GraphBuildConfig) -> Self {
700        Self {
701            unit,
702            root: None,
703            children_stack: Vec::new(),
704            _config,
705            _marker: PhantomData,
706        }
707    }
708
709    fn next_id(&self) -> BlockId {
710        self.unit.reserve_block_id()
711    }
712
713    fn create_block(
714        &self,
715        id: BlockId,
716        node: HirNode<'tcx>,
717        kind: BlockKind,
718        parent: Option<BlockId>,
719        children: Vec<BlockId>,
720    ) -> BasicBlock<'tcx> {
721        let arena = &self.unit.cc.block_arena;
722        match kind {
723            BlockKind::Root => {
724                // Extract file_name from HirFile node if available
725                let file_name = node.as_file().map(|file| file.file_path.clone());
726                let block = BlockRoot::from_hir(id, node, parent, children, file_name);
727                BasicBlock::Root(arena.alloc(block))
728            }
729            BlockKind::Func => {
730                let block = BlockFunc::from_hir(id, node, parent, children);
731                BasicBlock::Func(arena.alloc(block))
732            }
733            BlockKind::Class => {
734                let block = BlockClass::from_hir(id, node, parent, children);
735                BasicBlock::Class(arena.alloc(block))
736            }
737            BlockKind::Stmt => {
738                let stmt = BlockStmt::from_hir(id, node, parent, children);
739                BasicBlock::Stmt(arena.alloc(stmt))
740            }
741            BlockKind::Call => {
742                let stmt = BlockCall::from_hir(id, node, parent, children);
743                BasicBlock::Call(arena.alloc(stmt))
744            }
745            BlockKind::Enum => {
746                let enum_ty = BlockEnum::from_hir(id, node, parent, children);
747                BasicBlock::Enum(arena.alloc(enum_ty))
748            }
749            BlockKind::Const => {
750                let stmt = BlockConst::from_hir(id, node, parent, children);
751                BasicBlock::Const(arena.alloc(stmt))
752            }
753            BlockKind::Impl => {
754                let block = BlockImpl::from_hir(id, node, parent, children);
755                BasicBlock::Impl(arena.alloc(block))
756            }
757            BlockKind::Field => {
758                let block = BlockField::from_hir(id, node, parent, children);
759                BasicBlock::Field(arena.alloc(block))
760            }
761            _ => {
762                panic!("unknown block kind: {}", kind)
763            }
764        }
765    }
766
767    fn build_edges(&self, node: HirNode<'tcx>) -> BlockRelationMap {
768        let edges = BlockRelationMap::default();
769        let mut visited = HashSet::new();
770        let mut unresolved = HashSet::new();
771        self.collect_edges(node, &edges, &mut visited, &mut unresolved);
772        edges
773    }
774
775    fn collect_edges(
776        &self,
777        node: HirNode<'tcx>,
778        edges: &BlockRelationMap,
779        visited: &mut HashSet<SymId>,
780        unresolved: &mut HashSet<SymId>,
781    ) {
782        // Try to process symbol dependencies for this node
783        if let Some(scope) = self.unit.opt_get_scope(node.hir_id()) {
784            if let Some(symbol) = scope.symbol() {
785                self.process_symbol(symbol, edges, visited, unresolved);
786            }
787        }
788
789        // Recurse into children
790        for &child_id in node.children() {
791            let child = self.unit.hir_node(child_id);
792            self.collect_edges(child, edges, visited, unresolved);
793        }
794    }
795
796    fn process_symbol(
797        &self,
798        symbol: &'tcx Symbol,
799        edges: &BlockRelationMap,
800        visited: &mut HashSet<SymId>,
801        unresolved: &mut HashSet<SymId>,
802    ) {
803        let symbol_id = symbol.id;
804
805        // Avoid processing the same symbol twice
806        if !visited.insert(symbol_id) {
807            return;
808        }
809
810        let Some(from_block) = symbol.block_id() else {
811            return;
812        };
813
814        for &dep_id in symbol.depends.borrow().iter() {
815            self.link_dependency(dep_id, from_block, edges, unresolved);
816        }
817    }
818    fn link_dependency(
819        &self,
820        dep_id: SymId,
821        from_block: BlockId,
822        edges: &BlockRelationMap,
823        unresolved: &mut HashSet<SymId>,
824    ) {
825        // If target symbol exists and has a block, add the dependency edge
826        if let Some(target_symbol) = self.unit.opt_get_symbol(dep_id) {
827            if let Some(to_block) = target_symbol.block_id() {
828                if !edges.has_relation(from_block, BlockRelation::DependsOn, to_block) {
829                    edges.add_relation(from_block, to_block);
830                }
831                let target_unit = target_symbol.unit_index();
832                if target_unit.is_some()
833                    && target_unit != Some(self.unit.index)
834                    && unresolved.insert(dep_id)
835                {
836                    self.unit.add_unresolved_symbol(target_symbol);
837                }
838                return;
839            }
840
841            // Target symbol exists but block not yet known
842            if unresolved.insert(dep_id) {
843                self.unit.add_unresolved_symbol(target_symbol);
844            }
845            return;
846        }
847
848        // Target symbol not found at all
849        unresolved.insert(dep_id);
850    }
851
852    fn build_block(&mut self, node: HirNode<'tcx>, parent: BlockId, recursive: bool) {
853        let id = self.next_id();
854        let block_kind = Language::block_kind(node.kind_id());
855        assert_ne!(block_kind, BlockKind::Undefined);
856
857        if self.root.is_none() {
858            self.root = Some(id);
859        }
860
861        let children = if recursive {
862            self.children_stack.push(Vec::new());
863            self.visit_children(node, id);
864
865            self.children_stack.pop().unwrap()
866        } else {
867            Vec::new()
868        };
869
870        let block = self.create_block(id, node, block_kind, Some(parent), children);
871        if let Some(scope) = self.unit.opt_get_scope(node.hir_id()) {
872            if let Some(symbol) = scope.symbol() {
873                // Only set the block ID if it hasn't been set before
874                // This prevents impl blocks from overwriting struct block IDs
875                if symbol.block_id().is_none() {
876                    symbol.set_block_id(Some(id));
877                }
878            }
879        }
880        self.unit.insert_block(id, block, parent);
881
882        if let Some(children) = self.children_stack.last_mut() {
883            children.push(id);
884        }
885    }
886}
887
888impl<'tcx, Language: LanguageTrait> HirVisitor<'tcx> for GraphBuilder<'tcx, Language> {
889    fn unit(&self) -> CompileUnit<'tcx> {
890        self.unit
891    }
892
893    fn visit_file(&mut self, node: HirNode<'tcx>, parent: BlockId) {
894        self.children_stack.push(Vec::new());
895        self.build_block(node, parent, true);
896    }
897
898    fn visit_internal(&mut self, node: HirNode<'tcx>, parent: BlockId) {
899        let kind = Language::block_kind(node.kind_id());
900        // if self.config.compact {
901        //     if kind == BlockKind::Root {
902        //         self.build_block(node, parent, true);
903        //     } else {
904        //         self.visit_children(node, parent);
905        //     }
906        //     return;
907        // }
908
909        // Non-compact mode: process all defined kinds
910        if kind != BlockKind::Undefined {
911            self.build_block(node, parent, false);
912        } else {
913            self.visit_children(node, parent);
914        }
915    }
916
917    fn visit_scope(&mut self, node: HirNode<'tcx>, parent: BlockId) {
918        let kind = Language::block_kind(node.kind_id());
919        // if self.config.compact {
920        //     // In compact mode, only create blocks for major constructs (Class, Enum, Impl)
921        //     // Skip functions, fields, and scopes to reduce graph size
922        //     match kind {
923        //         BlockKind::Class | BlockKind::Enum => {
924        //             // Build with recursion enabled to capture nested major constructs
925        //             self.build_block(node, parent, true);
926        //         }
927        //         // Skip all other scopes - don't recurse
928        //         _ => {
929        //             // Stop here, do not visit children
930        //             // self.visit_children(node, parent);
931        //         }
932        //     }
933        //     return;
934        // }
935
936        // Non-compact mode: build blocks for all major constructs
937        match kind {
938            BlockKind::Func
939            | BlockKind::Class
940            | BlockKind::Enum
941            | BlockKind::Const
942            | BlockKind::Impl
943            | BlockKind::Field => self.build_block(node, parent, true),
944            _ => self.visit_children(node, parent),
945        }
946    }
947}
948
949pub fn build_llmcc_graph_with_config<'tcx, L: LanguageTrait>(
950    unit: CompileUnit<'tcx>,
951    unit_index: usize,
952    config: GraphBuildConfig,
953) -> Result<UnitGraph, Box<dyn std::error::Error>> {
954    let root_hir = unit
955        .file_start_hir_id()
956        .ok_or("missing file start HIR id")?;
957    let mut builder = GraphBuilder::<L>::new(unit, config);
958    let root_node = unit.hir_node(root_hir);
959    builder.visit_node(root_node, BlockId::ROOT_PARENT);
960
961    let root_block = builder.root;
962    let root_block = root_block.ok_or("graph builder produced no root")?;
963    let edges = builder.build_edges(root_node);
964    Ok(UnitGraph::new(unit_index, root_block, edges))
965}
966
967pub fn build_llmcc_graph<'tcx, L: LanguageTrait>(
968    unit: CompileUnit<'tcx>,
969    unit_index: usize,
970) -> Result<UnitGraph, Box<dyn std::error::Error>> {
971    build_llmcc_graph_with_config::<L>(unit, unit_index, GraphBuildConfig::default())
972}
973
974#[derive(Clone)]
975struct CompactNode {
976    block_id: BlockId,
977    unit_index: usize,
978    name: String,
979    location: Option<String>,
980    group: String,
981}
982
983fn compact_byte_to_line(content: &[u8], byte_pos: usize) -> usize {
984    let clamped = byte_pos.min(content.len());
985    content[..clamped].iter().filter(|&&ch| ch == b'\n').count() + 1
986}
987
988fn escape_dot_label(input: &str) -> String {
989    input
990        .replace('\\', "\\\\")
991        .replace('"', "\\\"")
992        .replace('\n', "\\n")
993}
994
995fn escape_dot_attr(input: &str) -> String {
996    input.replace('\\', "\\\\").replace('"', "\\\"")
997}
998
999fn summarize_location(location: &str) -> (String, String) {
1000    let (path_part, line_part) = location
1001        .rsplit_once(':')
1002        .map(|(path, line)| (path, Some(line)))
1003        .unwrap_or((location, None));
1004
1005    let path = Path::new(path_part);
1006    let components: Vec<_> = path
1007        .components()
1008        .filter_map(|comp| comp.as_os_str().to_str())
1009        .collect();
1010
1011    let start = components.len().saturating_sub(3);
1012    let mut shortened = components[start..].join("/");
1013    if shortened.is_empty() {
1014        shortened = path
1015            .file_name()
1016            .and_then(|name| name.to_str())
1017            .unwrap_or(path_part)
1018            .to_string();
1019    }
1020
1021    let display = if let Some(line) = line_part {
1022        format!("{shortened}:{line}")
1023    } else {
1024        shortened
1025    };
1026
1027    (display, location.to_string())
1028}
1029
1030fn extract_crate_path(location: &str) -> String {
1031    let path = location.split(':').next().unwrap_or(location);
1032    let parts: Vec<&str> = path.split(['/', '\\']).collect();
1033
1034    if let Some(src_idx) = parts.iter().position(|&p| p == "src") {
1035        if src_idx > 0 {
1036            return parts[src_idx - 1].to_string();
1037        }
1038    }
1039
1040    if let Some(filename) = parts.last() {
1041        if !filename.is_empty() {
1042            return filename.split('.').next().unwrap_or("unknown").to_string();
1043        }
1044    }
1045
1046    "unknown".to_string()
1047}
1048
1049fn extract_python_module_path(location: &str) -> String {
1050    const MAX_MODULE_DEPTH: usize = 2;
1051
1052    let path_str = location.split(':').next().unwrap_or(location);
1053    let path = Path::new(path_str);
1054
1055    if path.extension().and_then(|ext| ext.to_str()) != Some("py") {
1056        return extract_crate_path(location);
1057    }
1058
1059    let file_stem = path
1060        .file_stem()
1061        .and_then(|s| s.to_str())
1062        .map(|s| s.to_string());
1063
1064    let mut packages: Vec<String> = Vec::new();
1065    let mut current = path.parent();
1066
1067    while let Some(dir) = current {
1068        let dir_name = match dir.file_name().and_then(|n| n.to_str()) {
1069            Some(name) if !name.is_empty() => name.to_string(),
1070            _ => break,
1071        };
1072
1073        let has_init = dir.join("__init__.py").exists() || dir.join("__init__.pyi").exists();
1074
1075        if has_init {
1076            packages.push(dir_name);
1077        }
1078
1079        current = dir.parent();
1080    }
1081
1082    if packages.is_empty() {
1083        if let Some(stem) = file_stem
1084            .as_ref()
1085            .filter(|stem| stem.as_str() != "__init__")
1086        {
1087            return stem.clone();
1088        }
1089
1090        if let Some(parent_name) = path
1091            .parent()
1092            .and_then(|dir| dir.file_name().and_then(|n| n.to_str()))
1093            .map(|s| s.to_string())
1094        {
1095            return parent_name;
1096        }
1097
1098        return "unknown".to_string();
1099    }
1100
1101    packages.reverse();
1102    if packages.len() > MAX_MODULE_DEPTH {
1103        packages.truncate(MAX_MODULE_DEPTH);
1104    }
1105
1106    packages.join(".")
1107}
1108
1109fn extract_group_path(location: &str) -> String {
1110    let path = location.split(':').next().unwrap_or(location);
1111    if path.ends_with(".py") {
1112        extract_python_module_path(location)
1113    } else {
1114        extract_crate_path(location)
1115    }
1116}
1117
1118fn render_compact_dot(nodes: &[CompactNode], edges: &BTreeSet<(usize, usize)>) -> String {
1119    let mut crate_groups: BTreeMap<String, Vec<usize>> = BTreeMap::new();
1120    for (idx, node) in nodes.iter().enumerate() {
1121        crate_groups
1122            .entry(node.group.clone())
1123            .or_default()
1124            .push(idx);
1125    }
1126
1127    let mut output = String::from("digraph CompactProject {\n");
1128
1129    for (subgraph_counter, (crate_path, node_indices)) in crate_groups.iter().enumerate() {
1130        output.push_str(&format!("  subgraph cluster_{} {{\n", subgraph_counter));
1131        output.push_str(&format!(
1132            "    label=\"{}\";\n",
1133            escape_dot_label(crate_path)
1134        ));
1135        output.push_str("    style=filled;\n");
1136        output.push_str("    color=lightgrey;\n");
1137
1138        for &idx in node_indices {
1139            let node = &nodes[idx];
1140            let label = escape_dot_label(&node.name);
1141            let mut attrs = vec![format!("label=\"{}\"", label)];
1142
1143            if let Some(location) = &node.location {
1144                let (_display, full) = summarize_location(location);
1145                let escaped_full = escape_dot_attr(&full);
1146                attrs.push(format!("full_path=\"{}\"", escaped_full));
1147            }
1148
1149            output.push_str(&format!("    n{} [{}];\n", idx, attrs.join(", ")));
1150        }
1151
1152        output.push_str("  }\n");
1153    }
1154
1155    for &(from, to) in edges {
1156        output.push_str(&format!("  n{} -> n{};\n", from, to));
1157    }
1158
1159    output.push_str("}\n");
1160    output
1161}
1162
1163fn build_compact_node_index(nodes: &[CompactNode]) -> HashMap<BlockId, usize> {
1164    let mut node_index = HashMap::with_capacity(nodes.len());
1165    for (idx, node) in nodes.iter().enumerate() {
1166        node_index.insert(node.block_id, idx);
1167    }
1168    node_index
1169}
1170
1171struct PrunedGraph {
1172    nodes: Vec<CompactNode>,
1173    edges: BTreeSet<(usize, usize)>,
1174}
1175
1176fn prune_compact_components(
1177    nodes: &[CompactNode],
1178    edges: &BTreeSet<(usize, usize)>,
1179) -> PrunedGraph {
1180    if nodes.is_empty() {
1181        return PrunedGraph {
1182            nodes: Vec::new(),
1183            edges: BTreeSet::new(),
1184        };
1185    }
1186
1187    let components = find_connected_components(nodes.len(), edges);
1188    if components.is_empty() {
1189        return PrunedGraph {
1190            nodes: nodes.to_vec(),
1191            edges: edges.clone(),
1192        };
1193    }
1194
1195    let mut retained_indices = HashSet::new();
1196    for component in components {
1197        if component.len() == 1 {
1198            let idx = component[0];
1199            let has_edges = edges.iter().any(|&(from, to)| from == idx || to == idx);
1200            if !has_edges {
1201                continue;
1202            }
1203        }
1204        retained_indices.extend(component);
1205    }
1206
1207    if retained_indices.is_empty() {
1208        return PrunedGraph {
1209            nodes: Vec::new(),
1210            edges: BTreeSet::new(),
1211        };
1212    }
1213
1214    let mut retained_nodes = Vec::new();
1215    let mut old_to_new = HashMap::new();
1216    for (new_idx, old_idx) in retained_indices.iter().enumerate() {
1217        retained_nodes.push(nodes[*old_idx].clone());
1218        old_to_new.insert(*old_idx, new_idx);
1219    }
1220
1221    let mut retained_edges = BTreeSet::new();
1222    for &(from, to) in edges {
1223        if let (Some(&new_from), Some(&new_to)) = (old_to_new.get(&from), old_to_new.get(&to)) {
1224            retained_edges.insert((new_from, new_to));
1225        }
1226    }
1227
1228    PrunedGraph {
1229        nodes: retained_nodes,
1230        edges: retained_edges,
1231    }
1232}
1233
1234fn find_connected_components(
1235    node_count: usize,
1236    edges: &BTreeSet<(usize, usize)>,
1237) -> Vec<Vec<usize>> {
1238    if node_count == 0 {
1239        return Vec::new();
1240    }
1241
1242    let mut graph: HashMap<usize, Vec<usize>> = HashMap::new();
1243    for &(from, to) in edges.iter() {
1244        graph.entry(from).or_default().push(to);
1245        graph.entry(to).or_default().push(from);
1246    }
1247
1248    let mut visited = HashSet::new();
1249    let mut components = Vec::new();
1250
1251    for node in 0..node_count {
1252        if visited.contains(&node) {
1253            continue;
1254        }
1255
1256        let mut component = Vec::new();
1257        let mut stack = vec![node];
1258
1259        while let Some(current) = stack.pop() {
1260            if !visited.insert(current) {
1261                continue;
1262            }
1263
1264            component.push(current);
1265
1266            if let Some(neighbors) = graph.get(&current) {
1267                for &neighbor in neighbors {
1268                    if !visited.contains(&neighbor) {
1269                        stack.push(neighbor);
1270                    }
1271                }
1272            }
1273        }
1274
1275        components.push(component);
1276    }
1277
1278    components
1279}
1280
1281fn reduce_transitive_edges(
1282    nodes: &[CompactNode],
1283    edges: &BTreeSet<(usize, usize)>,
1284) -> BTreeSet<(usize, usize)> {
1285    if nodes.is_empty() {
1286        return BTreeSet::new();
1287    }
1288
1289    let mut adjacency: HashMap<usize, Vec<usize>> = HashMap::new();
1290    for &(from, to) in edges.iter() {
1291        adjacency.entry(from).or_default().push(to);
1292    }
1293
1294    let mut minimal_edges = BTreeSet::new();
1295
1296    for &(from, to) in edges.iter() {
1297        if !has_alternative_path(from, to, &adjacency, (from, to)) {
1298            minimal_edges.insert((from, to));
1299        }
1300    }
1301
1302    minimal_edges
1303}
1304
1305fn has_alternative_path(
1306    start: usize,
1307    target: usize,
1308    adjacency: &HashMap<usize, Vec<usize>>,
1309    edge_to_skip: (usize, usize),
1310) -> bool {
1311    let mut visited = HashSet::new();
1312    let mut stack: Vec<usize> = adjacency
1313        .get(&start)
1314        .into_iter()
1315        .flat_map(|neighbors| neighbors.iter())
1316        .filter_map(|&neighbor| {
1317            if (start, neighbor) == edge_to_skip {
1318                None
1319            } else {
1320                Some(neighbor)
1321            }
1322        })
1323        .collect();
1324
1325    while let Some(current) = stack.pop() {
1326        if !visited.insert(current) {
1327            continue;
1328        }
1329
1330        if current == target {
1331            return true;
1332        }
1333
1334        if let Some(neighbors) = adjacency.get(&current) {
1335            for &neighbor in neighbors {
1336                if (current, neighbor) == edge_to_skip {
1337                    continue;
1338                }
1339                if !visited.contains(&neighbor) {
1340                    stack.push(neighbor);
1341                }
1342            }
1343        }
1344    }
1345
1346    false
1347}