llmcc_core/
graph_builder.rs

1use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet};
2use std::marker::PhantomData;
3use std::path::Path;
4
5pub use crate::block::{BasicBlock, BlockId, BlockKind, BlockRelation};
6use crate::block::{
7    BlockCall, BlockClass, BlockConst, BlockEnum, BlockField, BlockFunc, BlockImpl, BlockRoot,
8    BlockStmt,
9};
10use crate::block_rel::BlockRelationMap;
11use crate::context::{CompileCtxt, CompileUnit};
12use crate::ir::HirNode;
13use crate::lang_def::LanguageTrait;
14use crate::pagerank::{PageRankDirection, PageRanker};
15use crate::symbol::{SymId, Symbol};
16use crate::visit::HirVisitor;
17
18const COMPACT_INTERESTING_KINDS: [BlockKind; 2] = [BlockKind::Class, BlockKind::Enum];
19
20#[derive(Debug, Clone)]
21pub struct UnitGraph {
22    /// Compile unit this graph belongs to
23    unit_index: usize,
24    /// Root block ID of this unit
25    root: BlockId,
26    /// Edges of this graph unit
27    edges: BlockRelationMap,
28}
29
30impl UnitGraph {
31    pub fn new(unit_index: usize, root: BlockId, edges: BlockRelationMap) -> Self {
32        Self {
33            unit_index,
34            root,
35            edges,
36        }
37    }
38
39    pub fn unit_index(&self) -> usize {
40        self.unit_index
41    }
42
43    pub fn root(&self) -> BlockId {
44        self.root
45    }
46
47    pub fn edges(&self) -> &BlockRelationMap {
48        &self.edges
49    }
50}
51
52#[derive(Debug, Clone, Copy)]
53pub struct GraphBuildConfig {
54    pub compact: bool,
55}
56
57impl GraphBuildConfig {
58    pub fn compact() -> Self {
59        Self { compact: true }
60    }
61}
62
63impl Default for GraphBuildConfig {
64    fn default() -> Self {
65        Self { compact: false }
66    }
67}
68
69#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
70pub struct GraphNode {
71    pub unit_index: usize,
72    pub block_id: BlockId,
73}
74
75/// ProjectGraph represents a complete compilation project with all units and their inter-dependencies.
76///
77/// # Overview
78/// ProjectGraph maintains a collection of per-unit compilation graphs (UnitGraph) and facilitates
79/// cross-unit dependency resolution. It provides efficient multi-dimensional indexing for block
80/// lookups by name, kind, unit, and ID, enabling quick context retrieval for LLM consumption.
81///
82/// # Architecture
83/// The graph consists of:
84/// - **UnitGraphs**: One per compilation unit (file), containing blocks and intra-unit relations
85/// - **Block Indexes**: Multi-dimensional indexes via BlockIndexMaps for O(1) to O(log n) lookups
86/// - **Cross-unit Links**: Dependencies tracked between blocks across different units
87///
88/// # Primary Use Cases
89/// 1. **Symbol Resolution**: Find blocks by name across the entire project
90/// 2. **Context Gathering**: Collect all related blocks for code analysis
91/// 3. **LLM Serialization**: Export graph as text or JSON for LLM model consumption
92/// 4. **Dependency Analysis**: Traverse dependency graphs to understand block relationships
93///
94#[derive(Debug)]
95pub struct ProjectGraph<'tcx> {
96    /// Reference to the compilation context containing all symbols, HIR nodes, and blocks
97    pub cc: &'tcx CompileCtxt<'tcx>,
98    /// Per-unit graphs containing blocks and intra-unit relations
99    units: Vec<UnitGraph>,
100    compact_rank_limit: Option<usize>,
101    pagerank_direction: PageRankDirection,
102}
103
104impl<'tcx> ProjectGraph<'tcx> {
105    pub fn new(cc: &'tcx CompileCtxt<'tcx>) -> Self {
106        Self {
107            cc,
108            units: Vec::new(),
109            compact_rank_limit: None,
110            pagerank_direction: PageRankDirection::default(),
111        }
112    }
113
114    pub fn add_child(&mut self, graph: UnitGraph) {
115        self.units.push(graph);
116    }
117
118    /// Configure the number of PageRank-filtered nodes retained when rendering compact graphs.
119    pub fn set_compact_rank_limit(&mut self, limit: Option<usize>) {
120        self.compact_rank_limit = match limit {
121            Some(0) => None,
122            other => other,
123        };
124    }
125
126    /// Configure the PageRank direction for ranking nodes.
127    pub fn set_pagerank_direction(&mut self, direction: PageRankDirection) {
128        self.pagerank_direction = direction;
129    }
130
131    pub fn link_units(&mut self) {
132        if self.units.is_empty() {
133            return;
134        }
135
136        let mut unresolved = self.cc.unresolve_symbols.borrow_mut();
137
138        unresolved.retain(|symbol_ref| {
139            let target = *symbol_ref;
140            let Some(target_block) = target.block_id() else {
141                return false;
142            };
143
144            let dependents: Vec<SymId> = target.depended.borrow().clone();
145            for dependent_id in dependents {
146                let Some(source_symbol) = self.cc.opt_get_symbol(dependent_id) else {
147                    continue;
148                };
149                let Some(from_block) = source_symbol.block_id() else {
150                    continue;
151                };
152                self.add_cross_edge(
153                    source_symbol.unit_index().unwrap(),
154                    target.unit_index().unwrap(),
155                    from_block,
156                    target_block,
157                );
158            }
159
160            false
161        });
162    }
163
164    pub fn units(&self) -> &[UnitGraph] {
165        &self.units
166    }
167
168    pub fn unit_graph(&self, unit_index: usize) -> Option<&UnitGraph> {
169        self.units
170            .iter()
171            .find(|unit| unit.unit_index() == unit_index)
172    }
173
174    pub fn block_by_name(&self, name: &str) -> Option<GraphNode> {
175        let block_indexes = self.cc.block_indexes.borrow();
176        let matches = block_indexes.find_by_name(name);
177
178        matches.first().map(|(unit_index, _, block_id)| GraphNode {
179            unit_index: *unit_index,
180            block_id: *block_id,
181        })
182    }
183
184    pub fn blocks_by_name(&self, name: &str) -> Vec<GraphNode> {
185        let block_indexes = self.cc.block_indexes.borrow();
186        let matches = block_indexes.find_by_name(name);
187
188        matches
189            .into_iter()
190            .map(|(unit_index, _, block_id)| GraphNode {
191                unit_index,
192                block_id,
193            })
194            .collect()
195    }
196
197    pub fn render_compact_graph(&self) -> String {
198        self.render_compact_graph_inner(self.compact_rank_limit)
199    }
200
201    fn ranked_block_filter(
202        &self,
203        top_k: Option<usize>,
204        interesting_kinds: &[BlockKind],
205    ) -> Option<HashSet<BlockId>> {
206        let ranked_order = top_k.and_then(|limit| {
207            let mut ranker = PageRanker::new(self);
208            ranker.config_mut().direction = self.pagerank_direction;
209            let mut collected = Vec::new();
210
211            for ranked in ranker.rank() {
212                if interesting_kinds.contains(&ranked.kind) {
213                    collected.push(ranked.node.block_id);
214                }
215                if collected.len() >= limit {
216                    break;
217                }
218            }
219
220            if collected.is_empty() {
221                None
222            } else {
223                Some(collected)
224            }
225        });
226
227        ranked_order.map(|ordered| ordered.into_iter().collect())
228    }
229
230    fn collect_compact_nodes(
231        &self,
232        interesting_kinds: &[BlockKind],
233        ranked_filter: Option<&HashSet<BlockId>>,
234    ) -> Vec<CompactNode> {
235        let block_indexes = self.cc.block_indexes.borrow();
236        block_indexes
237            .block_id_index
238            .iter()
239            .filter_map(|(&block_id, (unit_index, name_opt, kind))| {
240                if !interesting_kinds.contains(kind) {
241                    return None;
242                }
243
244                if let Some(ids) = ranked_filter {
245                    if !ids.contains(&block_id) {
246                        return None;
247                    }
248                }
249
250                let unit = self.cc.compile_unit(*unit_index);
251                let block = unit.bb(block_id);
252                let display_name = name_opt
253                    .clone()
254                    .or_else(|| {
255                        block
256                            .base()
257                            .and_then(|base| base.opt_get_name().map(|s| s.to_string()))
258                    })
259                    .unwrap_or_else(|| format!("{}:{}", kind, block_id.as_u32()));
260
261                let raw_path = unit
262                    .file_path()
263                    .or_else(|| unit.file().path())
264                    .unwrap_or("<unknown>");
265
266                let path = std::fs::canonicalize(raw_path)
267                    .map(|p| p.to_string_lossy().to_string())
268                    .unwrap_or_else(|_| raw_path.to_string());
269
270                let location = block
271                    .opt_node()
272                    .and_then(|node| {
273                        unit.file()
274                            .file
275                            .content
276                            .as_ref()
277                            .map(|bytes| compact_byte_to_line(bytes.as_slice(), node.start_byte()))
278                            .map(|line| format!("{path}:{line}"))
279                    })
280                    .or(Some(path.clone()));
281
282                let group = location
283                    .as_ref()
284                    .map(|loc| extract_group_path(loc))
285                    .unwrap_or_else(|| extract_group_path(&path));
286
287                Some(CompactNode {
288                    block_id,
289                    unit_index: *unit_index,
290                    name: display_name,
291                    location,
292                    group,
293                })
294            })
295            .collect()
296    }
297
298    fn collect_sorted_compact_nodes(&self, top_k: Option<usize>) -> Vec<CompactNode> {
299        let ranked_filter = self.ranked_block_filter(top_k, &COMPACT_INTERESTING_KINDS);
300        let mut nodes =
301            self.collect_compact_nodes(&COMPACT_INTERESTING_KINDS, ranked_filter.as_ref());
302        nodes.sort_by(|a, b| a.name.cmp(&b.name));
303        nodes
304    }
305
306    fn collect_compact_edges(
307        &self,
308        nodes: &[CompactNode],
309        node_index: &HashMap<BlockId, usize>,
310    ) -> BTreeSet<(usize, usize)> {
311        let mut edges = BTreeSet::new();
312
313        for node in nodes {
314            let Some(unit_graph) = self.unit_graph(node.unit_index) else {
315                continue;
316            };
317            let from_idx = node_index[&node.block_id];
318
319            let dependencies = unit_graph
320                .edges()
321                .get_related(node.block_id, BlockRelation::DependsOn);
322
323            for dep_block_id in dependencies {
324                if let Some(&to_idx) = node_index.get(&dep_block_id) {
325                    edges.insert((from_idx, to_idx));
326                }
327            }
328        }
329
330        edges
331    }
332
333    pub fn block_by_name_in(&self, unit_index: usize, name: &str) -> Option<GraphNode> {
334        let block_indexes = self.cc.block_indexes.borrow();
335        let matches = block_indexes.find_by_name(name);
336
337        matches
338            .iter()
339            .find(|(u, _, _)| *u == unit_index)
340            .map(|(_, _, block_id)| GraphNode {
341                unit_index,
342                block_id: *block_id,
343            })
344    }
345
346    pub fn blocks_by_kind(&self, block_kind: BlockKind) -> Vec<GraphNode> {
347        let block_indexes = self.cc.block_indexes.borrow();
348        let matches = block_indexes.find_by_kind(block_kind);
349
350        matches
351            .into_iter()
352            .map(|(unit_index, _, block_id)| GraphNode {
353                unit_index,
354                block_id,
355            })
356            .collect()
357    }
358
359    pub fn blocks_by_kind_in(&self, block_kind: BlockKind, unit_index: usize) -> Vec<GraphNode> {
360        let block_indexes = self.cc.block_indexes.borrow();
361        let block_ids = block_indexes.find_by_kind_and_unit(block_kind, unit_index);
362
363        block_ids
364            .into_iter()
365            .map(|block_id| GraphNode {
366                unit_index,
367                block_id,
368            })
369            .collect()
370    }
371
372    pub fn blocks_in(&self, unit_index: usize) -> Vec<GraphNode> {
373        let block_indexes = self.cc.block_indexes.borrow();
374        let matches = block_indexes.find_by_unit(unit_index);
375
376        matches
377            .into_iter()
378            .map(|(_, _, block_id)| GraphNode {
379                unit_index,
380                block_id,
381            })
382            .collect()
383    }
384
385    pub fn block_info(&self, block_id: BlockId) -> Option<(usize, Option<String>, BlockKind)> {
386        let block_indexes = self.cc.block_indexes.borrow();
387        block_indexes.get_block_info(block_id)
388    }
389
390    pub fn find_related_blocks(
391        &self,
392        node: GraphNode,
393        relations: Vec<BlockRelation>,
394    ) -> Vec<GraphNode> {
395        if node.unit_index >= self.units.len() {
396            return Vec::new();
397        }
398
399        let unit = &self.units[node.unit_index];
400        let mut result = Vec::new();
401
402        for relation in relations {
403            match relation {
404                BlockRelation::DependsOn => {
405                    // Get all blocks that this block depends on
406                    let dependencies = unit
407                        .edges
408                        .get_related(node.block_id, BlockRelation::DependsOn);
409                    let block_indexes = self.cc.block_indexes.borrow();
410                    for dep_block_id in dependencies {
411                        let dep_unit_index = block_indexes
412                            .get_block_info(dep_block_id)
413                            .map(|(idx, _, _)| idx)
414                            .unwrap_or(node.unit_index);
415                        result.push(GraphNode {
416                            unit_index: dep_unit_index,
417                            block_id: dep_block_id,
418                        });
419                    }
420                }
421                BlockRelation::DependedBy => {
422                    let mut seen = HashSet::new();
423
424                    // Direct dependents tracked on this unit (covers cross-unit edges too)
425                    let dependents = unit
426                        .edges
427                        .get_related(node.block_id, BlockRelation::DependedBy);
428                    if !dependents.is_empty() {
429                        let indexes = self.cc.block_indexes.borrow();
430                        for dep_block_id in dependents {
431                            if !seen.insert(dep_block_id) {
432                                continue;
433                            }
434                            if let Some((dep_unit_idx, _, _)) = indexes.get_block_info(dep_block_id)
435                            {
436                                result.push(GraphNode {
437                                    unit_index: dep_unit_idx,
438                                    block_id: dep_block_id,
439                                });
440                            } else {
441                                result.push(GraphNode {
442                                    unit_index: node.unit_index,
443                                    block_id: dep_block_id,
444                                });
445                            }
446                        }
447                    }
448
449                    // Fallback: scan current unit for reverse DependsOn edges
450                    let local_dependents = unit
451                        .edges
452                        .find_reverse_relations(node.block_id, BlockRelation::DependsOn);
453                    for dep_block_id in local_dependents {
454                        if !seen.insert(dep_block_id) {
455                            continue;
456                        }
457                        result.push(GraphNode {
458                            unit_index: node.unit_index,
459                            block_id: dep_block_id,
460                        });
461                    }
462                }
463                BlockRelation::Unknown => {
464                    // Skip unknown relations
465                }
466            }
467        }
468
469        result
470    }
471
472    pub fn find_dpends_blocks_recursive(&self, node: GraphNode) -> HashSet<GraphNode> {
473        let mut visited = HashSet::new();
474        let mut stack = vec![node];
475        let relations = vec![BlockRelation::DependsOn];
476
477        while let Some(current) = stack.pop() {
478            if visited.contains(&current) {
479                continue;
480            }
481            visited.insert(current);
482
483            for related in self.find_related_blocks(current, relations.clone()) {
484                if !visited.contains(&related) {
485                    stack.push(related);
486                }
487            }
488        }
489
490        visited.remove(&node);
491        visited
492    }
493
494    pub fn traverse_bfs<F>(&self, start: GraphNode, mut callback: F)
495    where
496        F: FnMut(GraphNode),
497    {
498        let mut visited = HashSet::new();
499        let mut queue = vec![start];
500        let relations = vec![BlockRelation::DependsOn, BlockRelation::DependedBy];
501
502        while !queue.is_empty() {
503            let current = queue.remove(0);
504            if visited.contains(&current) {
505                continue;
506            }
507            visited.insert(current);
508            callback(current);
509
510            for related in self.find_related_blocks(current, relations.clone()) {
511                if !visited.contains(&related) {
512                    queue.push(related);
513                }
514            }
515        }
516    }
517
518    pub fn traverse_dfs<F>(&self, start: GraphNode, mut callback: F)
519    where
520        F: FnMut(GraphNode),
521    {
522        let mut visited = HashSet::new();
523        self.traverse_dfs_impl(start, &mut visited, &mut callback);
524    }
525
526    fn traverse_dfs_impl<F>(
527        &self,
528        node: GraphNode,
529        visited: &mut HashSet<GraphNode>,
530        callback: &mut F,
531    ) where
532        F: FnMut(GraphNode),
533    {
534        if visited.contains(&node) {
535            return;
536        }
537        visited.insert(node);
538        callback(node);
539
540        let relations = vec![BlockRelation::DependsOn, BlockRelation::DependedBy];
541        for related in self.find_related_blocks(node, relations) {
542            if !visited.contains(&related) {
543                self.traverse_dfs_impl(related, visited, callback);
544            }
545        }
546    }
547
548    pub fn get_block_depends(&self, node: GraphNode) -> HashSet<GraphNode> {
549        if node.unit_index >= self.units.len() {
550            return HashSet::new();
551        }
552
553        let unit = &self.units[node.unit_index];
554        let mut result = HashSet::new();
555        let mut visited = HashSet::new();
556        let mut stack = vec![node.block_id];
557        let block_indexes = self.cc.block_indexes.borrow();
558
559        while let Some(current_block) = stack.pop() {
560            if visited.contains(&current_block) {
561                continue;
562            }
563            visited.insert(current_block);
564
565            let dependencies = unit
566                .edges
567                .get_related(current_block, BlockRelation::DependsOn);
568            for dep_block_id in dependencies {
569                if dep_block_id != node.block_id {
570                    let dep_unit_index = block_indexes
571                        .get_block_info(dep_block_id)
572                        .map(|(idx, _, _)| idx)
573                        .unwrap_or(node.unit_index);
574                    result.insert(GraphNode {
575                        unit_index: dep_unit_index,
576                        block_id: dep_block_id,
577                    });
578                    stack.push(dep_block_id);
579                }
580            }
581        }
582
583        result
584    }
585
586    pub fn get_block_depended(&self, node: GraphNode) -> HashSet<GraphNode> {
587        if node.unit_index >= self.units.len() {
588            return HashSet::new();
589        }
590
591        let unit = &self.units[node.unit_index];
592        let mut result = HashSet::new();
593        let mut visited = HashSet::new();
594        let mut stack = vec![node.block_id];
595        let block_indexes = self.cc.block_indexes.borrow();
596
597        while let Some(current_block) = stack.pop() {
598            if visited.contains(&current_block) {
599                continue;
600            }
601            visited.insert(current_block);
602
603            let dependencies = unit
604                .edges
605                .get_related(current_block, BlockRelation::DependedBy);
606            for dep_block_id in dependencies {
607                if dep_block_id != node.block_id {
608                    let dep_unit_index = block_indexes
609                        .get_block_info(dep_block_id)
610                        .map(|(idx, _, _)| idx)
611                        .unwrap_or(node.unit_index);
612                    result.insert(GraphNode {
613                        unit_index: dep_unit_index,
614                        block_id: dep_block_id,
615                    });
616                    stack.push(dep_block_id);
617                }
618            }
619        }
620
621        result
622    }
623
624    fn render_compact_graph_inner(&self, top_k: Option<usize>) -> String {
625        let nodes = self.collect_sorted_compact_nodes(top_k);
626
627        if nodes.is_empty() {
628            return "digraph CompactProject {\n}\n".to_string();
629        }
630
631        let node_index = build_compact_node_index(&nodes);
632        let edges = self.collect_compact_edges(&nodes, &node_index);
633
634        let pruned = prune_compact_components(&nodes, &edges);
635        if pruned.nodes.is_empty() {
636            return "digraph CompactProject {\n}\n".to_string();
637        }
638
639        let reduced_edges = reduce_transitive_edges(&pruned.nodes, &pruned.edges);
640
641        render_compact_dot(&pruned.nodes, &reduced_edges)
642    }
643
644    fn add_cross_edge(
645        &self,
646        from_idx: usize,
647        to_idx: usize,
648        from_block: BlockId,
649        to_block: BlockId,
650    ) {
651        if from_idx == to_idx {
652            let unit = &self.units[from_idx];
653            if !unit
654                .edges
655                .has_relation(from_block, BlockRelation::DependsOn, to_block)
656            {
657                unit.edges.add_relation(from_block, to_block);
658            }
659            return;
660        }
661
662        let from_unit = &self.units[from_idx];
663        from_unit
664            .edges
665            .add_relation_if_not_exists(from_block, BlockRelation::DependsOn, to_block);
666
667        let to_unit = &self.units[to_idx];
668        to_unit
669            .edges
670            .add_relation_if_not_exists(to_block, BlockRelation::DependedBy, from_block);
671    }
672}
673
674#[derive(Debug)]
675struct GraphBuilder<'tcx, Language> {
676    unit: CompileUnit<'tcx>,
677    root: Option<BlockId>,
678    children_stack: Vec<Vec<BlockId>>,
679    config: GraphBuildConfig,
680    _marker: PhantomData<Language>,
681}
682
683impl<'tcx, Language: LanguageTrait> GraphBuilder<'tcx, Language> {
684    fn new(unit: CompileUnit<'tcx>, config: GraphBuildConfig) -> Self {
685        Self {
686            unit,
687            root: None,
688            children_stack: Vec::new(),
689            config,
690            _marker: PhantomData,
691        }
692    }
693
694    fn next_id(&self) -> BlockId {
695        self.unit.reserve_block_id()
696    }
697
698    fn create_block(
699        &self,
700        id: BlockId,
701        node: HirNode<'tcx>,
702        kind: BlockKind,
703        parent: Option<BlockId>,
704        children: Vec<BlockId>,
705    ) -> BasicBlock<'tcx> {
706        let arena = &self.unit.cc.block_arena;
707        match kind {
708            BlockKind::Root => {
709                // Extract file_name from HirFile node if available
710                let file_name = node.as_file().map(|file| file.file_path.clone());
711                let block = BlockRoot::from_hir(id, node, parent, children, file_name);
712                BasicBlock::Root(arena.alloc(block))
713            }
714            BlockKind::Func => {
715                let block = BlockFunc::from_hir(id, node, parent, children);
716                BasicBlock::Func(arena.alloc(block))
717            }
718            BlockKind::Class => {
719                let block = BlockClass::from_hir(id, node, parent, children);
720                BasicBlock::Class(arena.alloc(block))
721            }
722            BlockKind::Stmt => {
723                let stmt = BlockStmt::from_hir(id, node, parent, children);
724                BasicBlock::Stmt(arena.alloc(stmt))
725            }
726            BlockKind::Call => {
727                let stmt = BlockCall::from_hir(id, node, parent, children);
728                BasicBlock::Call(arena.alloc(stmt))
729            }
730            BlockKind::Enum => {
731                let enum_ty = BlockEnum::from_hir(id, node, parent, children);
732                BasicBlock::Enum(arena.alloc(enum_ty))
733            }
734            BlockKind::Const => {
735                let stmt = BlockConst::from_hir(id, node, parent, children);
736                BasicBlock::Const(arena.alloc(stmt))
737            }
738            BlockKind::Impl => {
739                let block = BlockImpl::from_hir(id, node, parent, children);
740                BasicBlock::Impl(arena.alloc(block))
741            }
742            BlockKind::Field => {
743                let block = BlockField::from_hir(id, node, parent, children);
744                BasicBlock::Field(arena.alloc(block))
745            }
746            _ => {
747                panic!("unknown block kind: {}", kind)
748            }
749        }
750    }
751
752    fn build_edges(&self, node: HirNode<'tcx>) -> BlockRelationMap {
753        let edges = BlockRelationMap::default();
754        let mut visited = HashSet::new();
755        let mut unresolved = HashSet::new();
756        self.collect_edges(node, &edges, &mut visited, &mut unresolved);
757        edges
758    }
759
760    fn collect_edges(
761        &self,
762        node: HirNode<'tcx>,
763        edges: &BlockRelationMap,
764        visited: &mut HashSet<SymId>,
765        unresolved: &mut HashSet<SymId>,
766    ) {
767        // Try to process symbol dependencies for this node
768        if let Some(scope) = self.unit.opt_get_scope(node.hir_id()) {
769            if let Some(symbol) = scope.symbol() {
770                self.process_symbol(symbol, edges, visited, unresolved);
771            }
772        }
773
774        // Recurse into children
775        for &child_id in node.children() {
776            let child = self.unit.hir_node(child_id);
777            self.collect_edges(child, edges, visited, unresolved);
778        }
779    }
780
781    fn process_symbol(
782        &self,
783        symbol: &'tcx Symbol,
784        edges: &BlockRelationMap,
785        visited: &mut HashSet<SymId>,
786        unresolved: &mut HashSet<SymId>,
787    ) {
788        let symbol_id = symbol.id;
789
790        // Avoid processing the same symbol twice
791        if !visited.insert(symbol_id) {
792            return;
793        }
794
795        let Some(from_block) = symbol.block_id() else {
796            return;
797        };
798
799        for &dep_id in symbol.depends.borrow().iter() {
800            self.link_dependency(dep_id, from_block, edges, unresolved);
801        }
802    }
803    fn link_dependency(
804        &self,
805        dep_id: SymId,
806        from_block: BlockId,
807        edges: &BlockRelationMap,
808        unresolved: &mut HashSet<SymId>,
809    ) {
810        // If target symbol exists and has a block, add the dependency edge
811        if let Some(target_symbol) = self.unit.opt_get_symbol(dep_id) {
812            if let Some(to_block) = target_symbol.block_id() {
813                if !edges.has_relation(from_block, BlockRelation::DependsOn, to_block) {
814                    edges.add_relation(from_block, to_block);
815                }
816                let target_unit = target_symbol.unit_index();
817                if target_unit.is_some()
818                    && target_unit != Some(self.unit.index)
819                    && unresolved.insert(dep_id)
820                {
821                    self.unit.add_unresolved_symbol(target_symbol);
822                }
823                return;
824            }
825
826            // Target symbol exists but block not yet known
827            if unresolved.insert(dep_id) {
828                self.unit.add_unresolved_symbol(target_symbol);
829            }
830            return;
831        }
832
833        // Target symbol not found at all
834        unresolved.insert(dep_id);
835    }
836
837    fn build_block(&mut self, node: HirNode<'tcx>, parent: BlockId, recursive: bool) {
838        let id = self.next_id();
839        let block_kind = Language::block_kind(node.kind_id());
840        assert_ne!(block_kind, BlockKind::Undefined);
841
842        if self.root.is_none() {
843            self.root = Some(id);
844        }
845
846        let children = if recursive {
847            self.children_stack.push(Vec::new());
848            self.visit_children(node, id);
849
850            self.children_stack.pop().unwrap()
851        } else {
852            Vec::new()
853        };
854
855        let block = self.create_block(id, node, block_kind, Some(parent), children);
856        if let Some(scope) = self.unit.opt_get_scope(node.hir_id()) {
857            if let Some(symbol) = scope.symbol() {
858                // Only set the block ID if it hasn't been set before
859                // This prevents impl blocks from overwriting struct block IDs
860                if symbol.block_id().is_none() {
861                    symbol.set_block_id(Some(id));
862                }
863            }
864        }
865        self.unit.insert_block(id, block, parent);
866
867        if let Some(children) = self.children_stack.last_mut() {
868            children.push(id);
869        }
870    }
871}
872
873impl<'tcx, Language: LanguageTrait> HirVisitor<'tcx> for GraphBuilder<'tcx, Language> {
874    fn unit(&self) -> CompileUnit<'tcx> {
875        self.unit
876    }
877
878    fn visit_file(&mut self, node: HirNode<'tcx>, parent: BlockId) {
879        self.children_stack.push(Vec::new());
880        self.build_block(node, parent, true);
881    }
882
883    fn visit_internal(&mut self, node: HirNode<'tcx>, parent: BlockId) {
884        let kind = Language::block_kind(node.kind_id());
885        // if self.config.compact {
886        //     if kind == BlockKind::Root {
887        //         self.build_block(node, parent, true);
888        //     } else {
889        //         self.visit_children(node, parent);
890        //     }
891        //     return;
892        // }
893
894        // Non-compact mode: process all defined kinds
895        if kind != BlockKind::Undefined {
896            self.build_block(node, parent, false);
897        } else {
898            self.visit_children(node, parent);
899        }
900    }
901
902    fn visit_scope(&mut self, node: HirNode<'tcx>, parent: BlockId) {
903        let kind = Language::block_kind(node.kind_id());
904        // if self.config.compact {
905        //     // In compact mode, only create blocks for major constructs (Class, Enum, Impl)
906        //     // Skip functions, fields, and scopes to reduce graph size
907        //     match kind {
908        //         BlockKind::Class | BlockKind::Enum => {
909        //             // Build with recursion enabled to capture nested major constructs
910        //             self.build_block(node, parent, true);
911        //         }
912        //         // Skip all other scopes - don't recurse
913        //         _ => {
914        //             // Stop here, do not visit children
915        //             // self.visit_children(node, parent);
916        //         }
917        //     }
918        //     return;
919        // }
920
921        // Non-compact mode: build blocks for all major constructs
922        match kind {
923            BlockKind::Func
924            | BlockKind::Class
925            | BlockKind::Enum
926            | BlockKind::Const
927            | BlockKind::Impl
928            | BlockKind::Field => self.build_block(node, parent, true),
929            _ => self.visit_children(node, parent),
930        }
931    }
932}
933
934pub fn build_llmcc_graph_with_config<'tcx, L: LanguageTrait>(
935    unit: CompileUnit<'tcx>,
936    unit_index: usize,
937    config: GraphBuildConfig,
938) -> Result<UnitGraph, Box<dyn std::error::Error>> {
939    let root_hir = unit
940        .file_start_hir_id()
941        .ok_or("missing file start HIR id")?;
942    let mut builder = GraphBuilder::<L>::new(unit, config);
943    let root_node = unit.hir_node(root_hir);
944    builder.visit_node(root_node, BlockId::ROOT_PARENT);
945
946    let root_block = builder.root;
947    let root_block = root_block.ok_or("graph builder produced no root")?;
948    let edges = builder.build_edges(root_node);
949    Ok(UnitGraph::new(unit_index, root_block, edges))
950}
951
952pub fn build_llmcc_graph<'tcx, L: LanguageTrait>(
953    unit: CompileUnit<'tcx>,
954    unit_index: usize,
955) -> Result<UnitGraph, Box<dyn std::error::Error>> {
956    build_llmcc_graph_with_config::<L>(unit, unit_index, GraphBuildConfig::default())
957}
958
959#[derive(Clone)]
960struct CompactNode {
961    block_id: BlockId,
962    unit_index: usize,
963    name: String,
964    location: Option<String>,
965    group: String,
966}
967
968fn compact_byte_to_line(content: &[u8], byte_pos: usize) -> usize {
969    let clamped = byte_pos.min(content.len());
970    content[..clamped].iter().filter(|&&ch| ch == b'\n').count() + 1
971}
972
973fn escape_dot_label(input: &str) -> String {
974    input
975        .replace('\\', "\\\\")
976        .replace('"', "\\\"")
977        .replace('\n', "\\n")
978}
979
980fn escape_dot_attr(input: &str) -> String {
981    input.replace('\\', "\\\\").replace('"', "\\\"")
982}
983
984fn summarize_location(location: &str) -> (String, String) {
985    let (path_part, line_part) = location
986        .rsplit_once(':')
987        .map(|(path, line)| (path, Some(line)))
988        .unwrap_or((location, None));
989
990    let path = Path::new(path_part);
991    let components: Vec<_> = path
992        .components()
993        .filter_map(|comp| comp.as_os_str().to_str())
994        .collect();
995
996    let start = components.len().saturating_sub(3);
997    let mut shortened = components[start..].join("/");
998    if shortened.is_empty() {
999        shortened = path
1000            .file_name()
1001            .and_then(|name| name.to_str())
1002            .unwrap_or(path_part)
1003            .to_string();
1004    }
1005
1006    let display = if let Some(line) = line_part {
1007        format!("{shortened}:{line}")
1008    } else {
1009        shortened
1010    };
1011
1012    (display, location.to_string())
1013}
1014
1015fn extract_crate_path(location: &str) -> String {
1016    let path = location.split(':').next().unwrap_or(location);
1017    let parts: Vec<&str> = path.split(['/', '\\']).collect();
1018
1019    if let Some(src_idx) = parts.iter().position(|&p| p == "src") {
1020        if src_idx > 0 {
1021            return parts[src_idx - 1].to_string();
1022        }
1023    }
1024
1025    if let Some(filename) = parts.last() {
1026        if !filename.is_empty() {
1027            return filename.split('.').next().unwrap_or("unknown").to_string();
1028        }
1029    }
1030
1031    "unknown".to_string()
1032}
1033
1034fn extract_python_module_path(location: &str) -> String {
1035    const MAX_MODULE_DEPTH: usize = 2;
1036
1037    let path_str = location.split(':').next().unwrap_or(location);
1038    let path = Path::new(path_str);
1039
1040    if path.extension().and_then(|ext| ext.to_str()) != Some("py") {
1041        return extract_crate_path(location);
1042    }
1043
1044    let file_stem = path
1045        .file_stem()
1046        .and_then(|s| s.to_str())
1047        .map(|s| s.to_string());
1048
1049    let mut packages: Vec<String> = Vec::new();
1050    let mut current = path.parent();
1051
1052    while let Some(dir) = current {
1053        let dir_name = match dir.file_name().and_then(|n| n.to_str()) {
1054            Some(name) if !name.is_empty() => name.to_string(),
1055            _ => break,
1056        };
1057
1058        let has_init = dir.join("__init__.py").exists() || dir.join("__init__.pyi").exists();
1059
1060        if has_init {
1061            packages.push(dir_name);
1062        }
1063
1064        current = dir.parent();
1065    }
1066
1067    if packages.is_empty() {
1068        if let Some(stem) = file_stem
1069            .as_ref()
1070            .filter(|stem| stem.as_str() != "__init__")
1071        {
1072            return stem.clone();
1073        }
1074
1075        if let Some(parent_name) = path
1076            .parent()
1077            .and_then(|dir| dir.file_name().and_then(|n| n.to_str()))
1078            .map(|s| s.to_string())
1079        {
1080            return parent_name;
1081        }
1082
1083        return "unknown".to_string();
1084    }
1085
1086    packages.reverse();
1087    if packages.len() > MAX_MODULE_DEPTH {
1088        packages.truncate(MAX_MODULE_DEPTH);
1089    }
1090
1091    packages.join(".")
1092}
1093
1094fn extract_group_path(location: &str) -> String {
1095    let path = location.split(':').next().unwrap_or(location);
1096    if path.ends_with(".py") {
1097        extract_python_module_path(location)
1098    } else {
1099        extract_crate_path(location)
1100    }
1101}
1102
1103fn render_compact_dot(nodes: &[CompactNode], edges: &BTreeSet<(usize, usize)>) -> String {
1104    let mut crate_groups: BTreeMap<String, Vec<usize>> = BTreeMap::new();
1105    for (idx, node) in nodes.iter().enumerate() {
1106        crate_groups
1107            .entry(node.group.clone())
1108            .or_default()
1109            .push(idx);
1110    }
1111
1112    let mut output = String::from("digraph CompactProject {\n");
1113
1114    let mut subgraph_counter = 0;
1115    for (crate_path, node_indices) in crate_groups.iter() {
1116        output.push_str(&format!("  subgraph cluster_{} {{\n", subgraph_counter));
1117        output.push_str(&format!(
1118            "    label=\"{}\";\n",
1119            escape_dot_label(crate_path)
1120        ));
1121        output.push_str("    style=filled;\n");
1122        output.push_str("    color=lightgrey;\n");
1123
1124        for &idx in node_indices {
1125            let node = &nodes[idx];
1126            let label = escape_dot_label(&node.name);
1127            let mut attrs = vec![format!("label=\"{}\"", label)];
1128
1129            if let Some(location) = &node.location {
1130                let (_display, full) = summarize_location(location);
1131                let escaped_full = escape_dot_attr(&full);
1132                attrs.push(format!("full_path=\"{}\"", escaped_full));
1133            }
1134
1135            output.push_str(&format!("    n{} [{}];\n", idx, attrs.join(", ")));
1136        }
1137
1138        output.push_str("  }\n");
1139        subgraph_counter += 1;
1140    }
1141
1142    for &(from, to) in edges {
1143        output.push_str(&format!("  n{} -> n{};\n", from, to));
1144    }
1145
1146    output.push_str("}\n");
1147    output
1148}
1149
1150fn build_compact_node_index(nodes: &[CompactNode]) -> HashMap<BlockId, usize> {
1151    let mut node_index = HashMap::with_capacity(nodes.len());
1152    for (idx, node) in nodes.iter().enumerate() {
1153        node_index.insert(node.block_id, idx);
1154    }
1155    node_index
1156}
1157
1158struct PrunedGraph {
1159    nodes: Vec<CompactNode>,
1160    edges: BTreeSet<(usize, usize)>,
1161}
1162
1163fn prune_compact_components(
1164    nodes: &[CompactNode],
1165    edges: &BTreeSet<(usize, usize)>,
1166) -> PrunedGraph {
1167    if nodes.is_empty() {
1168        return PrunedGraph {
1169            nodes: Vec::new(),
1170            edges: BTreeSet::new(),
1171        };
1172    }
1173
1174    let components = find_connected_components(nodes.len(), edges);
1175    if components.is_empty() {
1176        return PrunedGraph {
1177            nodes: nodes.to_vec(),
1178            edges: edges.clone(),
1179        };
1180    }
1181
1182    let mut retained_indices = HashSet::new();
1183    for component in components {
1184        if component.len() == 1 {
1185            let idx = component[0];
1186            let has_edges = edges.iter().any(|&(from, to)| from == idx || to == idx);
1187            if !has_edges {
1188                continue;
1189            }
1190        }
1191        retained_indices.extend(component);
1192    }
1193
1194    if retained_indices.is_empty() {
1195        return PrunedGraph {
1196            nodes: Vec::new(),
1197            edges: BTreeSet::new(),
1198        };
1199    }
1200
1201    let mut retained_nodes = Vec::new();
1202    let mut old_to_new = HashMap::new();
1203    for (new_idx, old_idx) in retained_indices.iter().enumerate() {
1204        retained_nodes.push(nodes[*old_idx].clone());
1205        old_to_new.insert(*old_idx, new_idx);
1206    }
1207
1208    let mut retained_edges = BTreeSet::new();
1209    for &(from, to) in edges {
1210        if let (Some(&new_from), Some(&new_to)) = (old_to_new.get(&from), old_to_new.get(&to)) {
1211            retained_edges.insert((new_from, new_to));
1212        }
1213    }
1214
1215    PrunedGraph {
1216        nodes: retained_nodes,
1217        edges: retained_edges,
1218    }
1219}
1220
1221fn find_connected_components(
1222    node_count: usize,
1223    edges: &BTreeSet<(usize, usize)>,
1224) -> Vec<Vec<usize>> {
1225    if node_count == 0 {
1226        return Vec::new();
1227    }
1228
1229    let mut graph: HashMap<usize, Vec<usize>> = HashMap::new();
1230    for &(from, to) in edges.iter() {
1231        graph.entry(from).or_default().push(to);
1232        graph.entry(to).or_default().push(from);
1233    }
1234
1235    let mut visited = HashSet::new();
1236    let mut components = Vec::new();
1237
1238    for node in 0..node_count {
1239        if visited.contains(&node) {
1240            continue;
1241        }
1242
1243        let mut component = Vec::new();
1244        let mut stack = vec![node];
1245
1246        while let Some(current) = stack.pop() {
1247            if !visited.insert(current) {
1248                continue;
1249            }
1250
1251            component.push(current);
1252
1253            if let Some(neighbors) = graph.get(&current) {
1254                for &neighbor in neighbors {
1255                    if !visited.contains(&neighbor) {
1256                        stack.push(neighbor);
1257                    }
1258                }
1259            }
1260        }
1261
1262        components.push(component);
1263    }
1264
1265    components
1266}
1267
1268fn reduce_transitive_edges(
1269    nodes: &[CompactNode],
1270    edges: &BTreeSet<(usize, usize)>,
1271) -> BTreeSet<(usize, usize)> {
1272    if nodes.is_empty() {
1273        return BTreeSet::new();
1274    }
1275
1276    let mut adjacency: HashMap<usize, Vec<usize>> = HashMap::new();
1277    for &(from, to) in edges.iter() {
1278        adjacency.entry(from).or_default().push(to);
1279    }
1280
1281    let mut minimal_edges = BTreeSet::new();
1282
1283    for &(from, to) in edges.iter() {
1284        if !has_alternative_path(from, to, &adjacency, (from, to)) {
1285            minimal_edges.insert((from, to));
1286        }
1287    }
1288
1289    minimal_edges
1290}
1291
1292fn has_alternative_path(
1293    start: usize,
1294    target: usize,
1295    adjacency: &HashMap<usize, Vec<usize>>,
1296    edge_to_skip: (usize, usize),
1297) -> bool {
1298    let mut visited = HashSet::new();
1299    let mut stack: Vec<usize> = adjacency
1300        .get(&start)
1301        .into_iter()
1302        .flat_map(|neighbors| neighbors.iter())
1303        .filter_map(|&neighbor| {
1304            if (start, neighbor) == edge_to_skip {
1305                None
1306            } else {
1307                Some(neighbor)
1308            }
1309        })
1310        .collect();
1311
1312    while let Some(current) = stack.pop() {
1313        if !visited.insert(current) {
1314            continue;
1315        }
1316
1317        if current == target {
1318            return true;
1319        }
1320
1321        if let Some(neighbors) = adjacency.get(&current) {
1322            for &neighbor in neighbors {
1323                if (current, neighbor) == edge_to_skip {
1324                    continue;
1325                }
1326                if !visited.contains(&neighbor) {
1327                    stack.push(neighbor);
1328                }
1329            }
1330        }
1331    }
1332
1333    false
1334}