llmcc_core/
graph_builder.rs

1use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet};
2use std::marker::PhantomData;
3use std::path::Path;
4
5pub use crate::block::{BasicBlock, BlockId, BlockKind, BlockRelation};
6use crate::block::{
7    BlockCall, BlockClass, BlockConst, BlockEnum, BlockField, BlockFunc, BlockImpl, BlockRoot,
8    BlockStmt,
9};
10use crate::block_rel::BlockRelationMap;
11use crate::context::{CompileCtxt, CompileUnit};
12use crate::ir::HirNode;
13use crate::lang_def::LanguageTrait;
14use crate::pagerank::PageRanker;
15use crate::symbol::{SymId, Symbol};
16use crate::visit::HirVisitor;
17
18const COMPACT_INTERESTING_KINDS: [BlockKind; 2] = [BlockKind::Class, BlockKind::Enum];
19
20#[derive(Debug, Clone)]
21pub struct UnitGraph {
22    /// Compile unit this graph belongs to
23    unit_index: usize,
24    /// Root block ID of this unit
25    root: BlockId,
26    /// Edges of this graph unit
27    edges: BlockRelationMap,
28}
29
30impl UnitGraph {
31    pub fn new(unit_index: usize, root: BlockId, edges: BlockRelationMap) -> Self {
32        Self {
33            unit_index,
34            root,
35            edges,
36        }
37    }
38
39    pub fn unit_index(&self) -> usize {
40        self.unit_index
41    }
42
43    pub fn root(&self) -> BlockId {
44        self.root
45    }
46
47    pub fn edges(&self) -> &BlockRelationMap {
48        &self.edges
49    }
50}
51
52#[derive(Debug, Clone, Copy)]
53pub struct GraphBuildConfig {
54    pub compact: bool,
55}
56
57impl GraphBuildConfig {
58    pub fn compact() -> Self {
59        Self { compact: true }
60    }
61}
62
63impl Default for GraphBuildConfig {
64    fn default() -> Self {
65        Self { compact: false }
66    }
67}
68
69#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
70pub struct GraphNode {
71    pub unit_index: usize,
72    pub block_id: BlockId,
73}
74
75/// ProjectGraph represents a complete compilation project with all units and their inter-dependencies.
76///
77/// # Overview
78/// ProjectGraph maintains a collection of per-unit compilation graphs (UnitGraph) and facilitates
79/// cross-unit dependency resolution. It provides efficient multi-dimensional indexing for block
80/// lookups by name, kind, unit, and ID, enabling quick context retrieval for LLM consumption.
81///
82/// # Architecture
83/// The graph consists of:
84/// - **UnitGraphs**: One per compilation unit (file), containing blocks and intra-unit relations
85/// - **Block Indexes**: Multi-dimensional indexes via BlockIndexMaps for O(1) to O(log n) lookups
86/// - **Cross-unit Links**: Dependencies tracked between blocks across different units
87///
88/// # Primary Use Cases
89/// 1. **Symbol Resolution**: Find blocks by name across the entire project
90/// 2. **Context Gathering**: Collect all related blocks for code analysis
91/// 3. **LLM Serialization**: Export graph as text or JSON for LLM model consumption
92/// 4. **Dependency Analysis**: Traverse dependency graphs to understand block relationships
93///
94#[derive(Debug)]
95pub struct ProjectGraph<'tcx> {
96    /// Reference to the compilation context containing all symbols, HIR nodes, and blocks
97    pub cc: &'tcx CompileCtxt<'tcx>,
98    /// Per-unit graphs containing blocks and intra-unit relations
99    units: Vec<UnitGraph>,
100    compact_rank_limit: Option<usize>,
101}
102
103impl<'tcx> ProjectGraph<'tcx> {
104    pub fn new(cc: &'tcx CompileCtxt<'tcx>) -> Self {
105        Self {
106            cc,
107            units: Vec::new(),
108            compact_rank_limit: None,
109        }
110    }
111
112    pub fn add_child(&mut self, graph: UnitGraph) {
113        self.units.push(graph);
114    }
115
116    /// Configure the number of PageRank-filtered nodes retained when rendering compact graphs.
117    pub fn set_compact_rank_limit(&mut self, limit: Option<usize>) {
118        self.compact_rank_limit = match limit {
119            Some(0) => None,
120            other => other,
121        };
122    }
123
124    pub fn link_units(&mut self) {
125        if self.units.is_empty() {
126            return;
127        }
128
129        let mut unresolved = self.cc.unresolve_symbols.borrow_mut();
130
131        unresolved.retain(|symbol_ref| {
132            let target = *symbol_ref;
133            let Some(target_block) = target.block_id() else {
134                return false;
135            };
136
137            let dependents: Vec<SymId> = target.depended.borrow().clone();
138            for dependent_id in dependents {
139                let Some(source_symbol) = self.cc.opt_get_symbol(dependent_id) else {
140                    continue;
141                };
142                let Some(from_block) = source_symbol.block_id() else {
143                    continue;
144                };
145                self.add_cross_edge(
146                    source_symbol.unit_index().unwrap(),
147                    target.unit_index().unwrap(),
148                    from_block,
149                    target_block,
150                );
151            }
152
153            false
154        });
155    }
156
157    pub fn units(&self) -> &[UnitGraph] {
158        &self.units
159    }
160
161    pub fn unit_graph(&self, unit_index: usize) -> Option<&UnitGraph> {
162        self.units
163            .iter()
164            .find(|unit| unit.unit_index() == unit_index)
165    }
166
167    pub fn block_by_name(&self, name: &str) -> Option<GraphNode> {
168        let block_indexes = self.cc.block_indexes.borrow();
169        let matches = block_indexes.find_by_name(name);
170
171        matches.first().map(|(unit_index, _, block_id)| GraphNode {
172            unit_index: *unit_index,
173            block_id: *block_id,
174        })
175    }
176
177    pub fn blocks_by_name(&self, name: &str) -> Vec<GraphNode> {
178        let block_indexes = self.cc.block_indexes.borrow();
179        let matches = block_indexes.find_by_name(name);
180
181        matches
182            .into_iter()
183            .map(|(unit_index, _, block_id)| GraphNode {
184                unit_index,
185                block_id,
186            })
187            .collect()
188    }
189
190    pub fn render_compact_graph(&self) -> String {
191        self.render_compact_graph_inner(self.compact_rank_limit)
192    }
193
194    fn ranked_block_filter(
195        &self,
196        top_k: Option<usize>,
197        interesting_kinds: &[BlockKind],
198    ) -> Option<HashSet<BlockId>> {
199        let ranked_order = top_k.and_then(|limit| {
200            let ranker = PageRanker::new(self);
201            let mut collected = Vec::new();
202
203            for ranked in ranker.rank() {
204                if interesting_kinds.contains(&ranked.kind) {
205                    collected.push(ranked.node.block_id);
206                }
207                if collected.len() >= limit {
208                    break;
209                }
210            }
211
212            if collected.is_empty() {
213                None
214            } else {
215                Some(collected)
216            }
217        });
218
219        ranked_order.map(|ordered| ordered.into_iter().collect())
220    }
221
222    fn collect_compact_nodes(
223        &self,
224        interesting_kinds: &[BlockKind],
225        ranked_filter: Option<&HashSet<BlockId>>,
226    ) -> Vec<CompactNode> {
227        let block_indexes = self.cc.block_indexes.borrow();
228        block_indexes
229            .block_id_index
230            .iter()
231            .filter_map(|(&block_id, (unit_index, name_opt, kind))| {
232                if !interesting_kinds.contains(kind) {
233                    return None;
234                }
235
236                if let Some(ids) = ranked_filter {
237                    if !ids.contains(&block_id) {
238                        return None;
239                    }
240                }
241
242                let unit = self.cc.compile_unit(*unit_index);
243                let block = unit.bb(block_id);
244                let display_name = name_opt
245                    .clone()
246                    .or_else(|| {
247                        block
248                            .base()
249                            .and_then(|base| base.opt_get_name().map(|s| s.to_string()))
250                    })
251                    .unwrap_or_else(|| format!("{}:{}", kind, block_id.as_u32()));
252
253                let raw_path = unit
254                    .file_path()
255                    .or_else(|| unit.file().path())
256                    .unwrap_or("<unknown>");
257
258                let path = std::fs::canonicalize(raw_path)
259                    .map(|p| p.to_string_lossy().to_string())
260                    .unwrap_or_else(|_| raw_path.to_string());
261
262                let location = block
263                    .opt_node()
264                    .and_then(|node| {
265                        unit.file()
266                            .file
267                            .content
268                            .as_ref()
269                            .map(|bytes| compact_byte_to_line(bytes.as_slice(), node.start_byte()))
270                            .map(|line| format!("{path}:{line}"))
271                    })
272                    .or(Some(path.clone()));
273
274                let group = location
275                    .as_ref()
276                    .map(|loc| extract_group_path(loc))
277                    .unwrap_or_else(|| extract_group_path(&path));
278
279                Some(CompactNode {
280                    block_id,
281                    unit_index: *unit_index,
282                    name: display_name,
283                    location,
284                    group,
285                })
286            })
287            .collect()
288    }
289
290    fn collect_sorted_compact_nodes(&self, top_k: Option<usize>) -> Vec<CompactNode> {
291        let ranked_filter = self.ranked_block_filter(top_k, &COMPACT_INTERESTING_KINDS);
292        let mut nodes =
293            self.collect_compact_nodes(&COMPACT_INTERESTING_KINDS, ranked_filter.as_ref());
294        nodes.sort_by(|a, b| a.name.cmp(&b.name));
295        nodes
296    }
297
298    fn collect_compact_edges(
299        &self,
300        nodes: &[CompactNode],
301        node_index: &HashMap<BlockId, usize>,
302    ) -> BTreeSet<(usize, usize)> {
303        let mut edges = BTreeSet::new();
304
305        for node in nodes {
306            let Some(unit_graph) = self.unit_graph(node.unit_index) else {
307                continue;
308            };
309            let from_idx = node_index[&node.block_id];
310
311            let dependencies = unit_graph
312                .edges()
313                .get_related(node.block_id, BlockRelation::DependsOn);
314
315            for dep_block_id in dependencies {
316                if let Some(&to_idx) = node_index.get(&dep_block_id) {
317                    edges.insert((from_idx, to_idx));
318                }
319            }
320        }
321
322        edges
323    }
324
325    pub fn block_by_name_in(&self, unit_index: usize, name: &str) -> Option<GraphNode> {
326        let block_indexes = self.cc.block_indexes.borrow();
327        let matches = block_indexes.find_by_name(name);
328
329        matches
330            .iter()
331            .find(|(u, _, _)| *u == unit_index)
332            .map(|(_, _, block_id)| GraphNode {
333                unit_index,
334                block_id: *block_id,
335            })
336    }
337
338    pub fn blocks_by_kind(&self, block_kind: BlockKind) -> Vec<GraphNode> {
339        let block_indexes = self.cc.block_indexes.borrow();
340        let matches = block_indexes.find_by_kind(block_kind);
341
342        matches
343            .into_iter()
344            .map(|(unit_index, _, block_id)| GraphNode {
345                unit_index,
346                block_id,
347            })
348            .collect()
349    }
350
351    pub fn blocks_by_kind_in(&self, block_kind: BlockKind, unit_index: usize) -> Vec<GraphNode> {
352        let block_indexes = self.cc.block_indexes.borrow();
353        let block_ids = block_indexes.find_by_kind_and_unit(block_kind, unit_index);
354
355        block_ids
356            .into_iter()
357            .map(|block_id| GraphNode {
358                unit_index,
359                block_id,
360            })
361            .collect()
362    }
363
364    pub fn blocks_in(&self, unit_index: usize) -> Vec<GraphNode> {
365        let block_indexes = self.cc.block_indexes.borrow();
366        let matches = block_indexes.find_by_unit(unit_index);
367
368        matches
369            .into_iter()
370            .map(|(_, _, block_id)| GraphNode {
371                unit_index,
372                block_id,
373            })
374            .collect()
375    }
376
377    pub fn block_info(&self, block_id: BlockId) -> Option<(usize, Option<String>, BlockKind)> {
378        let block_indexes = self.cc.block_indexes.borrow();
379        block_indexes.get_block_info(block_id)
380    }
381
382    pub fn find_related_blocks(
383        &self,
384        node: GraphNode,
385        relations: Vec<BlockRelation>,
386    ) -> Vec<GraphNode> {
387        if node.unit_index >= self.units.len() {
388            return Vec::new();
389        }
390
391        let unit = &self.units[node.unit_index];
392        let mut result = Vec::new();
393
394        for relation in relations {
395            match relation {
396                BlockRelation::DependsOn => {
397                    // Get all blocks that this block depends on
398                    let dependencies = unit
399                        .edges
400                        .get_related(node.block_id, BlockRelation::DependsOn);
401                    let block_indexes = self.cc.block_indexes.borrow();
402                    for dep_block_id in dependencies {
403                        let dep_unit_index = block_indexes
404                            .get_block_info(dep_block_id)
405                            .map(|(idx, _, _)| idx)
406                            .unwrap_or(node.unit_index);
407                        result.push(GraphNode {
408                            unit_index: dep_unit_index,
409                            block_id: dep_block_id,
410                        });
411                    }
412                }
413                BlockRelation::DependedBy => {
414                    let mut seen = HashSet::new();
415
416                    // Direct dependents tracked on this unit (covers cross-unit edges too)
417                    let dependents = unit
418                        .edges
419                        .get_related(node.block_id, BlockRelation::DependedBy);
420                    if !dependents.is_empty() {
421                        let indexes = self.cc.block_indexes.borrow();
422                        for dep_block_id in dependents {
423                            if !seen.insert(dep_block_id) {
424                                continue;
425                            }
426                            if let Some((dep_unit_idx, _, _)) = indexes.get_block_info(dep_block_id)
427                            {
428                                result.push(GraphNode {
429                                    unit_index: dep_unit_idx,
430                                    block_id: dep_block_id,
431                                });
432                            } else {
433                                result.push(GraphNode {
434                                    unit_index: node.unit_index,
435                                    block_id: dep_block_id,
436                                });
437                            }
438                        }
439                    }
440
441                    // Fallback: scan current unit for reverse DependsOn edges
442                    let local_dependents = unit
443                        .edges
444                        .find_reverse_relations(node.block_id, BlockRelation::DependsOn);
445                    for dep_block_id in local_dependents {
446                        if !seen.insert(dep_block_id) {
447                            continue;
448                        }
449                        result.push(GraphNode {
450                            unit_index: node.unit_index,
451                            block_id: dep_block_id,
452                        });
453                    }
454                }
455                BlockRelation::Unknown => {
456                    // Skip unknown relations
457                }
458            }
459        }
460
461        result
462    }
463
464    pub fn find_dpends_blocks_recursive(&self, node: GraphNode) -> HashSet<GraphNode> {
465        let mut visited = HashSet::new();
466        let mut stack = vec![node];
467        let relations = vec![BlockRelation::DependsOn];
468
469        while let Some(current) = stack.pop() {
470            if visited.contains(&current) {
471                continue;
472            }
473            visited.insert(current);
474
475            for related in self.find_related_blocks(current, relations.clone()) {
476                if !visited.contains(&related) {
477                    stack.push(related);
478                }
479            }
480        }
481
482        visited.remove(&node);
483        visited
484    }
485
486    pub fn find_depended_blocks_recursive(&self, node: GraphNode) -> HashSet<GraphNode> {
487        let mut visited = HashSet::new();
488        let mut stack = vec![node];
489        let relations = vec![BlockRelation::DependedBy];
490
491        while let Some(current) = stack.pop() {
492            if visited.contains(&current) {
493                continue;
494            }
495            visited.insert(current);
496
497            for related in self.find_related_blocks(current, relations.clone()) {
498                if !visited.contains(&related) {
499                    stack.push(related);
500                }
501            }
502        }
503
504        visited.remove(&node);
505        visited
506    }
507
508    pub fn traverse_bfs<F>(&self, start: GraphNode, mut callback: F)
509    where
510        F: FnMut(GraphNode),
511    {
512        let mut visited = HashSet::new();
513        let mut queue = vec![start];
514        let relations = vec![BlockRelation::DependsOn, BlockRelation::DependedBy];
515
516        while !queue.is_empty() {
517            let current = queue.remove(0);
518            if visited.contains(&current) {
519                continue;
520            }
521            visited.insert(current);
522            callback(current);
523
524            for related in self.find_related_blocks(current, relations.clone()) {
525                if !visited.contains(&related) {
526                    queue.push(related);
527                }
528            }
529        }
530    }
531
532    pub fn traverse_dfs<F>(&self, start: GraphNode, mut callback: F)
533    where
534        F: FnMut(GraphNode),
535    {
536        let mut visited = HashSet::new();
537        self.traverse_dfs_impl(start, &mut visited, &mut callback);
538    }
539
540    fn traverse_dfs_impl<F>(
541        &self,
542        node: GraphNode,
543        visited: &mut HashSet<GraphNode>,
544        callback: &mut F,
545    ) where
546        F: FnMut(GraphNode),
547    {
548        if visited.contains(&node) {
549            return;
550        }
551        visited.insert(node);
552        callback(node);
553
554        let relations = vec![BlockRelation::DependsOn, BlockRelation::DependedBy];
555        for related in self.find_related_blocks(node, relations) {
556            if !visited.contains(&related) {
557                self.traverse_dfs_impl(related, visited, callback);
558            }
559        }
560    }
561
562    pub fn get_block_depends(&self, node: GraphNode) -> HashSet<GraphNode> {
563        if node.unit_index >= self.units.len() {
564            return HashSet::new();
565        }
566
567        let unit = &self.units[node.unit_index];
568        let mut result = HashSet::new();
569        let mut visited = HashSet::new();
570        let mut stack = vec![node.block_id];
571        let block_indexes = self.cc.block_indexes.borrow();
572
573        while let Some(current_block) = stack.pop() {
574            if visited.contains(&current_block) {
575                continue;
576            }
577            visited.insert(current_block);
578
579            let dependencies = unit
580                .edges
581                .get_related(current_block, BlockRelation::DependsOn);
582            for dep_block_id in dependencies {
583                if dep_block_id != node.block_id {
584                    let dep_unit_index = block_indexes
585                        .get_block_info(dep_block_id)
586                        .map(|(idx, _, _)| idx)
587                        .unwrap_or(node.unit_index);
588                    result.insert(GraphNode {
589                        unit_index: dep_unit_index,
590                        block_id: dep_block_id,
591                    });
592                    stack.push(dep_block_id);
593                }
594            }
595        }
596
597        result
598    }
599
600    pub fn get_block_depended(&self, node: GraphNode) -> HashSet<GraphNode> {
601        if node.unit_index >= self.units.len() {
602            return HashSet::new();
603        }
604
605        let unit = &self.units[node.unit_index];
606        let mut result = HashSet::new();
607        let mut visited = HashSet::new();
608        let mut stack = vec![node.block_id];
609        let block_indexes = self.cc.block_indexes.borrow();
610
611        while let Some(current_block) = stack.pop() {
612            if visited.contains(&current_block) {
613                continue;
614            }
615            visited.insert(current_block);
616
617            let dependencies = unit
618                .edges
619                .get_related(current_block, BlockRelation::DependedBy);
620            for dep_block_id in dependencies {
621                if dep_block_id != node.block_id {
622                    let dep_unit_index = block_indexes
623                        .get_block_info(dep_block_id)
624                        .map(|(idx, _, _)| idx)
625                        .unwrap_or(node.unit_index);
626                    result.insert(GraphNode {
627                        unit_index: dep_unit_index,
628                        block_id: dep_block_id,
629                    });
630                    stack.push(dep_block_id);
631                }
632            }
633        }
634
635        result
636    }
637
638    fn render_compact_graph_inner(&self, top_k: Option<usize>) -> String {
639        let nodes = self.collect_sorted_compact_nodes(top_k);
640
641        if nodes.is_empty() {
642            return "digraph CompactProject {\n}\n".to_string();
643        }
644
645        let node_index = build_compact_node_index(&nodes);
646        let edges = self.collect_compact_edges(&nodes, &node_index);
647
648        let pruned = prune_compact_components(&nodes, &edges);
649        if pruned.nodes.is_empty() {
650            return "digraph CompactProject {\n}\n".to_string();
651        }
652
653        let reduced_edges = reduce_transitive_edges(&pruned.nodes, &pruned.edges);
654
655        render_compact_dot(&pruned.nodes, &reduced_edges)
656    }
657
658    fn add_cross_edge(
659        &self,
660        from_idx: usize,
661        to_idx: usize,
662        from_block: BlockId,
663        to_block: BlockId,
664    ) {
665        if from_idx == to_idx {
666            let unit = &self.units[from_idx];
667            if !unit
668                .edges
669                .has_relation(from_block, BlockRelation::DependsOn, to_block)
670            {
671                unit.edges.add_relation(from_block, to_block);
672            }
673            return;
674        }
675
676        let from_unit = &self.units[from_idx];
677        from_unit
678            .edges
679            .add_relation_if_not_exists(from_block, BlockRelation::DependsOn, to_block);
680
681        let to_unit = &self.units[to_idx];
682        to_unit
683            .edges
684            .add_relation_if_not_exists(to_block, BlockRelation::DependedBy, from_block);
685    }
686}
687
688#[derive(Debug)]
689struct GraphBuilder<'tcx, Language> {
690    unit: CompileUnit<'tcx>,
691    root: Option<BlockId>,
692    children_stack: Vec<Vec<BlockId>>,
693    _config: GraphBuildConfig,
694    _marker: PhantomData<Language>,
695}
696
697impl<'tcx, Language: LanguageTrait> GraphBuilder<'tcx, Language> {
698    fn new(unit: CompileUnit<'tcx>, _config: GraphBuildConfig) -> Self {
699        Self {
700            unit,
701            root: None,
702            children_stack: Vec::new(),
703            _config,
704            _marker: PhantomData,
705        }
706    }
707
708    fn next_id(&self) -> BlockId {
709        self.unit.reserve_block_id()
710    }
711
712    fn create_block(
713        &self,
714        id: BlockId,
715        node: HirNode<'tcx>,
716        kind: BlockKind,
717        parent: Option<BlockId>,
718        children: Vec<BlockId>,
719    ) -> BasicBlock<'tcx> {
720        let arena = &self.unit.cc.block_arena;
721        match kind {
722            BlockKind::Root => {
723                // Extract file_name from HirFile node if available
724                let file_name = node.as_file().map(|file| file.file_path.clone());
725                let block = BlockRoot::from_hir(id, node, parent, children, file_name);
726                BasicBlock::Root(arena.alloc(block))
727            }
728            BlockKind::Func => {
729                let block = BlockFunc::from_hir(id, node, parent, children);
730                BasicBlock::Func(arena.alloc(block))
731            }
732            BlockKind::Class => {
733                let block = BlockClass::from_hir(id, node, parent, children);
734                BasicBlock::Class(arena.alloc(block))
735            }
736            BlockKind::Stmt => {
737                let stmt = BlockStmt::from_hir(id, node, parent, children);
738                BasicBlock::Stmt(arena.alloc(stmt))
739            }
740            BlockKind::Call => {
741                let stmt = BlockCall::from_hir(id, node, parent, children);
742                BasicBlock::Call(arena.alloc(stmt))
743            }
744            BlockKind::Enum => {
745                let enum_ty = BlockEnum::from_hir(id, node, parent, children);
746                BasicBlock::Enum(arena.alloc(enum_ty))
747            }
748            BlockKind::Const => {
749                let stmt = BlockConst::from_hir(id, node, parent, children);
750                BasicBlock::Const(arena.alloc(stmt))
751            }
752            BlockKind::Impl => {
753                let block = BlockImpl::from_hir(id, node, parent, children);
754                BasicBlock::Impl(arena.alloc(block))
755            }
756            BlockKind::Field => {
757                let block = BlockField::from_hir(id, node, parent, children);
758                BasicBlock::Field(arena.alloc(block))
759            }
760            _ => {
761                panic!("unknown block kind: {}", kind)
762            }
763        }
764    }
765
766    fn build_edges(&self, node: HirNode<'tcx>) -> BlockRelationMap {
767        let edges = BlockRelationMap::default();
768        let mut visited = HashSet::new();
769        let mut unresolved = HashSet::new();
770        self.collect_edges(node, &edges, &mut visited, &mut unresolved);
771        edges
772    }
773
774    fn collect_edges(
775        &self,
776        node: HirNode<'tcx>,
777        edges: &BlockRelationMap,
778        visited: &mut HashSet<SymId>,
779        unresolved: &mut HashSet<SymId>,
780    ) {
781        // Try to process symbol dependencies for this node
782        if let Some(scope) = self.unit.opt_get_scope(node.hir_id()) {
783            if let Some(symbol) = scope.symbol() {
784                self.process_symbol(symbol, edges, visited, unresolved);
785            }
786        }
787
788        // Recurse into children
789        for &child_id in node.children() {
790            let child = self.unit.hir_node(child_id);
791            self.collect_edges(child, edges, visited, unresolved);
792        }
793    }
794
795    fn process_symbol(
796        &self,
797        symbol: &'tcx Symbol,
798        edges: &BlockRelationMap,
799        visited: &mut HashSet<SymId>,
800        unresolved: &mut HashSet<SymId>,
801    ) {
802        let symbol_id = symbol.id;
803
804        // Avoid processing the same symbol twice
805        if !visited.insert(symbol_id) {
806            return;
807        }
808
809        let Some(from_block) = symbol.block_id() else {
810            return;
811        };
812
813        for &dep_id in symbol.depends.borrow().iter() {
814            self.link_dependency(dep_id, from_block, edges, unresolved);
815        }
816    }
817    fn link_dependency(
818        &self,
819        dep_id: SymId,
820        from_block: BlockId,
821        edges: &BlockRelationMap,
822        unresolved: &mut HashSet<SymId>,
823    ) {
824        // If target symbol exists and has a block, add the dependency edge
825        if let Some(target_symbol) = self.unit.opt_get_symbol(dep_id) {
826            if let Some(to_block) = target_symbol.block_id() {
827                if !edges.has_relation(from_block, BlockRelation::DependsOn, to_block) {
828                    edges.add_relation(from_block, to_block);
829                }
830                let target_unit = target_symbol.unit_index();
831                if target_unit.is_some()
832                    && target_unit != Some(self.unit.index)
833                    && unresolved.insert(dep_id)
834                {
835                    self.unit.add_unresolved_symbol(target_symbol);
836                }
837                return;
838            }
839
840            // Target symbol exists but block not yet known
841            if unresolved.insert(dep_id) {
842                self.unit.add_unresolved_symbol(target_symbol);
843            }
844            return;
845        }
846
847        // Target symbol not found at all
848        unresolved.insert(dep_id);
849    }
850
851    fn build_block(&mut self, node: HirNode<'tcx>, parent: BlockId, recursive: bool) {
852        let id = self.next_id();
853        let block_kind = Language::block_kind(node.kind_id());
854        assert_ne!(block_kind, BlockKind::Undefined);
855
856        if self.root.is_none() {
857            self.root = Some(id);
858        }
859
860        let children = if recursive {
861            self.children_stack.push(Vec::new());
862            self.visit_children(node, id);
863
864            self.children_stack.pop().unwrap()
865        } else {
866            Vec::new()
867        };
868
869        let block = self.create_block(id, node, block_kind, Some(parent), children);
870        if let Some(scope) = self.unit.opt_get_scope(node.hir_id()) {
871            if let Some(symbol) = scope.symbol() {
872                // Only set the block ID if it hasn't been set before
873                // This prevents impl blocks from overwriting struct block IDs
874                if symbol.block_id().is_none() {
875                    symbol.set_block_id(Some(id));
876                }
877            }
878        }
879        self.unit.insert_block(id, block, parent);
880
881        if let Some(children) = self.children_stack.last_mut() {
882            children.push(id);
883        }
884    }
885}
886
887impl<'tcx, Language: LanguageTrait> HirVisitor<'tcx> for GraphBuilder<'tcx, Language> {
888    fn unit(&self) -> CompileUnit<'tcx> {
889        self.unit
890    }
891
892    fn visit_file(&mut self, node: HirNode<'tcx>, parent: BlockId) {
893        self.children_stack.push(Vec::new());
894        self.build_block(node, parent, true);
895    }
896
897    fn visit_internal(&mut self, node: HirNode<'tcx>, parent: BlockId) {
898        let kind = Language::block_kind(node.kind_id());
899        // if self.config.compact {
900        //     if kind == BlockKind::Root {
901        //         self.build_block(node, parent, true);
902        //     } else {
903        //         self.visit_children(node, parent);
904        //     }
905        //     return;
906        // }
907
908        // Non-compact mode: process all defined kinds
909        if kind != BlockKind::Undefined {
910            self.build_block(node, parent, false);
911        } else {
912            self.visit_children(node, parent);
913        }
914    }
915
916    fn visit_scope(&mut self, node: HirNode<'tcx>, parent: BlockId) {
917        let kind = Language::block_kind(node.kind_id());
918        // if self.config.compact {
919        //     // In compact mode, only create blocks for major constructs (Class, Enum, Impl)
920        //     // Skip functions, fields, and scopes to reduce graph size
921        //     match kind {
922        //         BlockKind::Class | BlockKind::Enum => {
923        //             // Build with recursion enabled to capture nested major constructs
924        //             self.build_block(node, parent, true);
925        //         }
926        //         // Skip all other scopes - don't recurse
927        //         _ => {
928        //             // Stop here, do not visit children
929        //             // self.visit_children(node, parent);
930        //         }
931        //     }
932        //     return;
933        // }
934
935        // Non-compact mode: build blocks for all major constructs
936        match kind {
937            BlockKind::Func
938            | BlockKind::Class
939            | BlockKind::Enum
940            | BlockKind::Const
941            | BlockKind::Impl
942            | BlockKind::Field => self.build_block(node, parent, true),
943            _ => self.visit_children(node, parent),
944        }
945    }
946}
947
948pub fn build_llmcc_graph_with_config<'tcx, L: LanguageTrait>(
949    unit: CompileUnit<'tcx>,
950    unit_index: usize,
951    config: GraphBuildConfig,
952) -> Result<UnitGraph, Box<dyn std::error::Error>> {
953    let root_hir = unit
954        .file_start_hir_id()
955        .ok_or("missing file start HIR id")?;
956    let mut builder = GraphBuilder::<L>::new(unit, config);
957    let root_node = unit.hir_node(root_hir);
958    builder.visit_node(root_node, BlockId::ROOT_PARENT);
959
960    let root_block = builder.root;
961    let root_block = root_block.ok_or("graph builder produced no root")?;
962    let edges = builder.build_edges(root_node);
963    Ok(UnitGraph::new(unit_index, root_block, edges))
964}
965
966pub fn build_llmcc_graph<'tcx, L: LanguageTrait>(
967    unit: CompileUnit<'tcx>,
968    unit_index: usize,
969) -> Result<UnitGraph, Box<dyn std::error::Error>> {
970    build_llmcc_graph_with_config::<L>(unit, unit_index, GraphBuildConfig::default())
971}
972
973#[derive(Clone)]
974struct CompactNode {
975    block_id: BlockId,
976    unit_index: usize,
977    name: String,
978    location: Option<String>,
979    group: String,
980}
981
982fn compact_byte_to_line(content: &[u8], byte_pos: usize) -> usize {
983    let clamped = byte_pos.min(content.len());
984    content[..clamped].iter().filter(|&&ch| ch == b'\n').count() + 1
985}
986
987fn escape_dot_label(input: &str) -> String {
988    input
989        .replace('\\', "\\\\")
990        .replace('"', "\\\"")
991        .replace('\n', "\\n")
992}
993
994fn escape_dot_attr(input: &str) -> String {
995    input.replace('\\', "\\\\").replace('"', "\\\"")
996}
997
998fn summarize_location(location: &str) -> (String, String) {
999    let (path_part, line_part) = location
1000        .rsplit_once(':')
1001        .map(|(path, line)| (path, Some(line)))
1002        .unwrap_or((location, None));
1003
1004    let path = Path::new(path_part);
1005    let components: Vec<_> = path
1006        .components()
1007        .filter_map(|comp| comp.as_os_str().to_str())
1008        .collect();
1009
1010    let start = components.len().saturating_sub(3);
1011    let mut shortened = components[start..].join("/");
1012    if shortened.is_empty() {
1013        shortened = path
1014            .file_name()
1015            .and_then(|name| name.to_str())
1016            .unwrap_or(path_part)
1017            .to_string();
1018    }
1019
1020    let display = if let Some(line) = line_part {
1021        format!("{shortened}:{line}")
1022    } else {
1023        shortened
1024    };
1025
1026    (display, location.to_string())
1027}
1028
1029fn extract_crate_path(location: &str) -> String {
1030    let path = location.split(':').next().unwrap_or(location);
1031    let parts: Vec<&str> = path.split(['/', '\\']).collect();
1032
1033    if let Some(src_idx) = parts.iter().position(|&p| p == "src") {
1034        if src_idx > 0 {
1035            return parts[src_idx - 1].to_string();
1036        }
1037    }
1038
1039    if let Some(filename) = parts.last() {
1040        if !filename.is_empty() {
1041            return filename.split('.').next().unwrap_or("unknown").to_string();
1042        }
1043    }
1044
1045    "unknown".to_string()
1046}
1047
1048fn extract_python_module_path(location: &str) -> String {
1049    const MAX_MODULE_DEPTH: usize = 2;
1050
1051    let path_str = location.split(':').next().unwrap_or(location);
1052    let path = Path::new(path_str);
1053
1054    if path.extension().and_then(|ext| ext.to_str()) != Some("py") {
1055        return extract_crate_path(location);
1056    }
1057
1058    let file_stem = path
1059        .file_stem()
1060        .and_then(|s| s.to_str())
1061        .map(|s| s.to_string());
1062
1063    let mut packages: Vec<String> = Vec::new();
1064    let mut current = path.parent();
1065
1066    while let Some(dir) = current {
1067        let dir_name = match dir.file_name().and_then(|n| n.to_str()) {
1068            Some(name) if !name.is_empty() => name.to_string(),
1069            _ => break,
1070        };
1071
1072        let has_init = dir.join("__init__.py").exists() || dir.join("__init__.pyi").exists();
1073
1074        if has_init {
1075            packages.push(dir_name);
1076        }
1077
1078        current = dir.parent();
1079    }
1080
1081    if packages.is_empty() {
1082        if let Some(stem) = file_stem
1083            .as_ref()
1084            .filter(|stem| stem.as_str() != "__init__")
1085        {
1086            return stem.clone();
1087        }
1088
1089        if let Some(parent_name) = path
1090            .parent()
1091            .and_then(|dir| dir.file_name().and_then(|n| n.to_str()))
1092            .map(|s| s.to_string())
1093        {
1094            return parent_name;
1095        }
1096
1097        return "unknown".to_string();
1098    }
1099
1100    packages.reverse();
1101    if packages.len() > MAX_MODULE_DEPTH {
1102        packages.truncate(MAX_MODULE_DEPTH);
1103    }
1104
1105    packages.join(".")
1106}
1107
1108fn extract_group_path(location: &str) -> String {
1109    let path = location.split(':').next().unwrap_or(location);
1110    if path.ends_with(".py") {
1111        extract_python_module_path(location)
1112    } else {
1113        extract_crate_path(location)
1114    }
1115}
1116
1117fn render_compact_dot(nodes: &[CompactNode], edges: &BTreeSet<(usize, usize)>) -> String {
1118    let mut crate_groups: BTreeMap<String, Vec<usize>> = BTreeMap::new();
1119    for (idx, node) in nodes.iter().enumerate() {
1120        crate_groups
1121            .entry(node.group.clone())
1122            .or_default()
1123            .push(idx);
1124    }
1125
1126    let mut output = String::from("digraph CompactProject {\n");
1127
1128    let mut subgraph_counter = 0;
1129    for (crate_path, node_indices) in crate_groups.iter() {
1130        output.push_str(&format!("  subgraph cluster_{} {{\n", subgraph_counter));
1131        output.push_str(&format!(
1132            "    label=\"{}\";\n",
1133            escape_dot_label(crate_path)
1134        ));
1135        output.push_str("    style=filled;\n");
1136        output.push_str("    color=lightgrey;\n");
1137
1138        for &idx in node_indices {
1139            let node = &nodes[idx];
1140            let label = escape_dot_label(&node.name);
1141            let mut attrs = vec![format!("label=\"{}\"", label)];
1142
1143            if let Some(location) = &node.location {
1144                let (_display, full) = summarize_location(location);
1145                let escaped_full = escape_dot_attr(&full);
1146                attrs.push(format!("full_path=\"{}\"", escaped_full));
1147            }
1148
1149            output.push_str(&format!("    n{} [{}];\n", idx, attrs.join(", ")));
1150        }
1151
1152        output.push_str("  }\n");
1153        subgraph_counter += 1;
1154    }
1155
1156    for &(from, to) in edges {
1157        output.push_str(&format!("  n{} -> n{};\n", from, to));
1158    }
1159
1160    output.push_str("}\n");
1161    output
1162}
1163
1164fn build_compact_node_index(nodes: &[CompactNode]) -> HashMap<BlockId, usize> {
1165    let mut node_index = HashMap::with_capacity(nodes.len());
1166    for (idx, node) in nodes.iter().enumerate() {
1167        node_index.insert(node.block_id, idx);
1168    }
1169    node_index
1170}
1171
1172struct PrunedGraph {
1173    nodes: Vec<CompactNode>,
1174    edges: BTreeSet<(usize, usize)>,
1175}
1176
1177fn prune_compact_components(
1178    nodes: &[CompactNode],
1179    edges: &BTreeSet<(usize, usize)>,
1180) -> PrunedGraph {
1181    if nodes.is_empty() {
1182        return PrunedGraph {
1183            nodes: Vec::new(),
1184            edges: BTreeSet::new(),
1185        };
1186    }
1187
1188    let components = find_connected_components(nodes.len(), edges);
1189    if components.is_empty() {
1190        return PrunedGraph {
1191            nodes: nodes.to_vec(),
1192            edges: edges.clone(),
1193        };
1194    }
1195
1196    let mut retained_indices = HashSet::new();
1197    for component in components {
1198        if component.len() == 1 {
1199            let idx = component[0];
1200            let has_edges = edges.iter().any(|&(from, to)| from == idx || to == idx);
1201            if !has_edges {
1202                continue;
1203            }
1204        }
1205        retained_indices.extend(component);
1206    }
1207
1208    if retained_indices.is_empty() {
1209        return PrunedGraph {
1210            nodes: Vec::new(),
1211            edges: BTreeSet::new(),
1212        };
1213    }
1214
1215    let mut retained_nodes = Vec::new();
1216    let mut old_to_new = HashMap::new();
1217    for (new_idx, old_idx) in retained_indices.iter().enumerate() {
1218        retained_nodes.push(nodes[*old_idx].clone());
1219        old_to_new.insert(*old_idx, new_idx);
1220    }
1221
1222    let mut retained_edges = BTreeSet::new();
1223    for &(from, to) in edges {
1224        if let (Some(&new_from), Some(&new_to)) = (old_to_new.get(&from), old_to_new.get(&to)) {
1225            retained_edges.insert((new_from, new_to));
1226        }
1227    }
1228
1229    PrunedGraph {
1230        nodes: retained_nodes,
1231        edges: retained_edges,
1232    }
1233}
1234
1235fn find_connected_components(
1236    node_count: usize,
1237    edges: &BTreeSet<(usize, usize)>,
1238) -> Vec<Vec<usize>> {
1239    if node_count == 0 {
1240        return Vec::new();
1241    }
1242
1243    let mut graph: HashMap<usize, Vec<usize>> = HashMap::new();
1244    for &(from, to) in edges.iter() {
1245        graph.entry(from).or_default().push(to);
1246        graph.entry(to).or_default().push(from);
1247    }
1248
1249    let mut visited = HashSet::new();
1250    let mut components = Vec::new();
1251
1252    for node in 0..node_count {
1253        if visited.contains(&node) {
1254            continue;
1255        }
1256
1257        let mut component = Vec::new();
1258        let mut stack = vec![node];
1259
1260        while let Some(current) = stack.pop() {
1261            if !visited.insert(current) {
1262                continue;
1263            }
1264
1265            component.push(current);
1266
1267            if let Some(neighbors) = graph.get(&current) {
1268                for &neighbor in neighbors {
1269                    if !visited.contains(&neighbor) {
1270                        stack.push(neighbor);
1271                    }
1272                }
1273            }
1274        }
1275
1276        components.push(component);
1277    }
1278
1279    components
1280}
1281
1282fn reduce_transitive_edges(
1283    nodes: &[CompactNode],
1284    edges: &BTreeSet<(usize, usize)>,
1285) -> BTreeSet<(usize, usize)> {
1286    if nodes.is_empty() {
1287        return BTreeSet::new();
1288    }
1289
1290    let mut adjacency: HashMap<usize, Vec<usize>> = HashMap::new();
1291    for &(from, to) in edges.iter() {
1292        adjacency.entry(from).or_default().push(to);
1293    }
1294
1295    let mut minimal_edges = BTreeSet::new();
1296
1297    for &(from, to) in edges.iter() {
1298        if !has_alternative_path(from, to, &adjacency, (from, to)) {
1299            minimal_edges.insert((from, to));
1300        }
1301    }
1302
1303    minimal_edges
1304}
1305
1306fn has_alternative_path(
1307    start: usize,
1308    target: usize,
1309    adjacency: &HashMap<usize, Vec<usize>>,
1310    edge_to_skip: (usize, usize),
1311) -> bool {
1312    let mut visited = HashSet::new();
1313    let mut stack: Vec<usize> = adjacency
1314        .get(&start)
1315        .into_iter()
1316        .flat_map(|neighbors| neighbors.iter())
1317        .filter_map(|&neighbor| {
1318            if (start, neighbor) == edge_to_skip {
1319                None
1320            } else {
1321                Some(neighbor)
1322            }
1323        })
1324        .collect();
1325
1326    while let Some(current) = stack.pop() {
1327        if !visited.insert(current) {
1328            continue;
1329        }
1330
1331        if current == target {
1332            return true;
1333        }
1334
1335        if let Some(neighbors) = adjacency.get(&current) {
1336            for &neighbor in neighbors {
1337                if (current, neighbor) == edge_to_skip {
1338                    continue;
1339                }
1340                if !visited.contains(&neighbor) {
1341                    stack.push(neighbor);
1342                }
1343            }
1344        }
1345    }
1346
1347    false
1348}