llmcc_core/
graph_builder.rs

1use std::collections::{BTreeSet, HashMap, HashSet};
2use std::marker::PhantomData;
3
4pub use crate::block::{BasicBlock, BlockId, BlockKind, BlockRelation};
5use crate::block::{
6    BlockCall, BlockClass, BlockConst, BlockEnum, BlockField, BlockFunc, BlockImpl, BlockRoot,
7    BlockStmt,
8};
9use crate::block_rel::BlockRelationMap;
10use crate::context::{CompileCtxt, CompileUnit};
11use crate::ir::HirNode;
12use crate::lang_def::LanguageTrait;
13use crate::pagerank::{PageRankDirection, PageRanker};
14use crate::symbol::{SymId, Symbol};
15use crate::visit::HirVisitor;
16
17#[derive(Debug, Clone)]
18pub struct UnitGraph {
19    /// Compile unit this graph belongs to
20    unit_index: usize,
21    /// Root block ID of this unit
22    root: BlockId,
23    /// Edges of this graph unit
24    edges: BlockRelationMap,
25}
26
27impl UnitGraph {
28    pub fn new(unit_index: usize, root: BlockId, edges: BlockRelationMap) -> Self {
29        Self {
30            unit_index,
31            root,
32            edges,
33        }
34    }
35
36    pub fn unit_index(&self) -> usize {
37        self.unit_index
38    }
39
40    pub fn root(&self) -> BlockId {
41        self.root
42    }
43
44    pub fn edges(&self) -> &BlockRelationMap {
45        &self.edges
46    }
47}
48
49#[derive(Debug, Clone, Copy)]
50pub struct GraphBuildConfig {
51    pub compact: bool,
52}
53
54impl GraphBuildConfig {
55    pub fn compact() -> Self {
56        Self { compact: true }
57    }
58}
59
60impl Default for GraphBuildConfig {
61    fn default() -> Self {
62        Self { compact: false }
63    }
64}
65
66#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
67pub struct GraphNode {
68    pub unit_index: usize,
69    pub block_id: BlockId,
70}
71
72/// ProjectGraph represents a complete compilation project with all units and their inter-dependencies.
73///
74/// # Overview
75/// ProjectGraph maintains a collection of per-unit compilation graphs (UnitGraph) and facilitates
76/// cross-unit dependency resolution. It provides efficient multi-dimensional indexing for block
77/// lookups by name, kind, unit, and ID, enabling quick context retrieval for LLM consumption.
78///
79/// # Architecture
80/// The graph consists of:
81/// - **UnitGraphs**: One per compilation unit (file), containing blocks and intra-unit relations
82/// - **Block Indexes**: Multi-dimensional indexes via BlockIndexMaps for O(1) to O(log n) lookups
83/// - **Cross-unit Links**: Dependencies tracked between blocks across different units
84///
85/// # Primary Use Cases
86/// 1. **Symbol Resolution**: Find blocks by name across the entire project
87/// 2. **Context Gathering**: Collect all related blocks for code analysis
88/// 3. **LLM Serialization**: Export graph as text or JSON for LLM model consumption
89/// 4. **Dependency Analysis**: Traverse dependency graphs to understand block relationships
90///
91#[derive(Debug)]
92pub struct ProjectGraph<'tcx> {
93    /// Reference to the compilation context containing all symbols, HIR nodes, and blocks
94    pub cc: &'tcx CompileCtxt<'tcx>,
95    /// Per-unit graphs containing blocks and intra-unit relations
96    units: Vec<UnitGraph>,
97    compact_rank_limit: Option<usize>,
98    pagerank_direction: PageRankDirection,
99}
100
101impl<'tcx> ProjectGraph<'tcx> {
102    pub fn new(cc: &'tcx CompileCtxt<'tcx>) -> Self {
103        Self {
104            cc,
105            units: Vec::new(),
106            compact_rank_limit: None,
107            pagerank_direction: PageRankDirection::default(),
108        }
109    }
110
111    pub fn add_child(&mut self, graph: UnitGraph) {
112        self.units.push(graph);
113    }
114
115    /// Configure the number of PageRank-filtered nodes retained when rendering compact graphs.
116    pub fn set_compact_rank_limit(&mut self, limit: Option<usize>) {
117        self.compact_rank_limit = match limit {
118            Some(0) => None,
119            other => other,
120        };
121    }
122
123    /// Configure the PageRank direction for ranking nodes.
124    pub fn set_pagerank_direction(&mut self, direction: PageRankDirection) {
125        self.pagerank_direction = direction;
126    }
127
128    pub fn link_units(&mut self) {
129        if self.units.is_empty() {
130            return;
131        }
132
133        let mut unresolved = self.cc.unresolve_symbols.borrow_mut();
134
135        unresolved.retain(|symbol_ref| {
136            let target = *symbol_ref;
137            let Some(target_block) = target.block_id() else {
138                return false;
139            };
140
141            let dependents: Vec<SymId> = target.depended.borrow().clone();
142            for dependent_id in dependents {
143                let Some(source_symbol) = self.cc.opt_get_symbol(dependent_id) else {
144                    continue;
145                };
146                let Some(from_block) = source_symbol.block_id() else {
147                    continue;
148                };
149                self.add_cross_edge(
150                    source_symbol.unit_index().unwrap(),
151                    target.unit_index().unwrap(),
152                    from_block,
153                    target_block,
154                );
155            }
156
157            false
158        });
159    }
160
161    pub fn units(&self) -> &[UnitGraph] {
162        &self.units
163    }
164
165    pub fn unit_graph(&self, unit_index: usize) -> Option<&UnitGraph> {
166        self.units
167            .iter()
168            .find(|unit| unit.unit_index() == unit_index)
169    }
170
171    pub fn block_by_name(&self, name: &str) -> Option<GraphNode> {
172        let block_indexes = self.cc.block_indexes.borrow();
173        let matches = block_indexes.find_by_name(name);
174
175        matches.first().map(|(unit_index, _, block_id)| GraphNode {
176            unit_index: *unit_index,
177            block_id: *block_id,
178        })
179    }
180
181    pub fn blocks_by_name(&self, name: &str) -> Vec<GraphNode> {
182        let block_indexes = self.cc.block_indexes.borrow();
183        let matches = block_indexes.find_by_name(name);
184
185        matches
186            .into_iter()
187            .map(|(unit_index, _, block_id)| GraphNode {
188                unit_index,
189                block_id,
190            })
191            .collect()
192    }
193
194    pub fn render_compact_graph(&self) -> String {
195        self.render_compact_graph_inner(self.compact_rank_limit)
196    }
197
198    pub fn block_by_name_in(&self, unit_index: usize, name: &str) -> Option<GraphNode> {
199        let block_indexes = self.cc.block_indexes.borrow();
200        let matches = block_indexes.find_by_name(name);
201
202        matches
203            .iter()
204            .find(|(u, _, _)| *u == unit_index)
205            .map(|(_, _, block_id)| GraphNode {
206                unit_index,
207                block_id: *block_id,
208            })
209    }
210
211    pub fn blocks_by_kind(&self, block_kind: BlockKind) -> Vec<GraphNode> {
212        let block_indexes = self.cc.block_indexes.borrow();
213        let matches = block_indexes.find_by_kind(block_kind);
214
215        matches
216            .into_iter()
217            .map(|(unit_index, _, block_id)| GraphNode {
218                unit_index,
219                block_id,
220            })
221            .collect()
222    }
223
224    pub fn blocks_by_kind_in(&self, block_kind: BlockKind, unit_index: usize) -> Vec<GraphNode> {
225        let block_indexes = self.cc.block_indexes.borrow();
226        let block_ids = block_indexes.find_by_kind_and_unit(block_kind, unit_index);
227
228        block_ids
229            .into_iter()
230            .map(|block_id| GraphNode {
231                unit_index,
232                block_id,
233            })
234            .collect()
235    }
236
237    pub fn blocks_in(&self, unit_index: usize) -> Vec<GraphNode> {
238        let block_indexes = self.cc.block_indexes.borrow();
239        let matches = block_indexes.find_by_unit(unit_index);
240
241        matches
242            .into_iter()
243            .map(|(_, _, block_id)| GraphNode {
244                unit_index,
245                block_id,
246            })
247            .collect()
248    }
249
250    pub fn block_info(&self, block_id: BlockId) -> Option<(usize, Option<String>, BlockKind)> {
251        let block_indexes = self.cc.block_indexes.borrow();
252        block_indexes.get_block_info(block_id)
253    }
254
255    pub fn find_related_blocks(
256        &self,
257        node: GraphNode,
258        relations: Vec<BlockRelation>,
259    ) -> Vec<GraphNode> {
260        if node.unit_index >= self.units.len() {
261            return Vec::new();
262        }
263
264        let unit = &self.units[node.unit_index];
265        let mut result = Vec::new();
266
267        for relation in relations {
268            match relation {
269                BlockRelation::DependsOn => {
270                    // Get all blocks that this block depends on
271                    let dependencies = unit
272                        .edges
273                        .get_related(node.block_id, BlockRelation::DependsOn);
274                    for dep_block_id in dependencies {
275                        result.push(GraphNode {
276                            unit_index: node.unit_index,
277                            block_id: dep_block_id,
278                        });
279                    }
280                }
281                BlockRelation::DependedBy => {
282                    let mut seen = HashSet::new();
283
284                    // Direct dependents tracked on this unit (covers cross-unit edges too)
285                    let dependents = unit
286                        .edges
287                        .get_related(node.block_id, BlockRelation::DependedBy);
288                    if !dependents.is_empty() {
289                        let indexes = self.cc.block_indexes.borrow();
290                        for dep_block_id in dependents {
291                            if !seen.insert(dep_block_id) {
292                                continue;
293                            }
294                            if let Some((dep_unit_idx, _, _)) = indexes.get_block_info(dep_block_id)
295                            {
296                                result.push(GraphNode {
297                                    unit_index: dep_unit_idx,
298                                    block_id: dep_block_id,
299                                });
300                            } else {
301                                result.push(GraphNode {
302                                    unit_index: node.unit_index,
303                                    block_id: dep_block_id,
304                                });
305                            }
306                        }
307                    }
308
309                    // Fallback: scan current unit for reverse DependsOn edges
310                    let local_dependents = unit
311                        .edges
312                        .find_reverse_relations(node.block_id, BlockRelation::DependsOn);
313                    for dep_block_id in local_dependents {
314                        if !seen.insert(dep_block_id) {
315                            continue;
316                        }
317                        result.push(GraphNode {
318                            unit_index: node.unit_index,
319                            block_id: dep_block_id,
320                        });
321                    }
322                }
323                BlockRelation::Unknown => {
324                    // Skip unknown relations
325                }
326            }
327        }
328
329        result
330    }
331
332    pub fn find_dpends_blocks_recursive(&self, node: GraphNode) -> HashSet<GraphNode> {
333        let mut visited = HashSet::new();
334        let mut stack = vec![node];
335        let relations = vec![BlockRelation::DependsOn];
336
337        while let Some(current) = stack.pop() {
338            if visited.contains(&current) {
339                continue;
340            }
341            visited.insert(current);
342
343            for related in self.find_related_blocks(current, relations.clone()) {
344                if !visited.contains(&related) {
345                    stack.push(related);
346                }
347            }
348        }
349
350        visited.remove(&node);
351        visited
352    }
353
354    pub fn traverse_bfs<F>(&self, start: GraphNode, mut callback: F)
355    where
356        F: FnMut(GraphNode),
357    {
358        let mut visited = HashSet::new();
359        let mut queue = vec![start];
360        let relations = vec![BlockRelation::DependsOn, BlockRelation::DependedBy];
361
362        while !queue.is_empty() {
363            let current = queue.remove(0);
364            if visited.contains(&current) {
365                continue;
366            }
367            visited.insert(current);
368            callback(current);
369
370            for related in self.find_related_blocks(current, relations.clone()) {
371                if !visited.contains(&related) {
372                    queue.push(related);
373                }
374            }
375        }
376    }
377
378    pub fn traverse_dfs<F>(&self, start: GraphNode, mut callback: F)
379    where
380        F: FnMut(GraphNode),
381    {
382        let mut visited = HashSet::new();
383        self.traverse_dfs_impl(start, &mut visited, &mut callback);
384    }
385
386    fn traverse_dfs_impl<F>(
387        &self,
388        node: GraphNode,
389        visited: &mut HashSet<GraphNode>,
390        callback: &mut F,
391    ) where
392        F: FnMut(GraphNode),
393    {
394        if visited.contains(&node) {
395            return;
396        }
397        visited.insert(node);
398        callback(node);
399
400        let relations = vec![BlockRelation::DependsOn, BlockRelation::DependedBy];
401        for related in self.find_related_blocks(node, relations) {
402            if !visited.contains(&related) {
403                self.traverse_dfs_impl(related, visited, callback);
404            }
405        }
406    }
407
408    pub fn get_block_depends(&self, node: GraphNode) -> HashSet<GraphNode> {
409        if node.unit_index >= self.units.len() {
410            return HashSet::new();
411        }
412
413        let unit = &self.units[node.unit_index];
414        let mut result = HashSet::new();
415        let mut visited = HashSet::new();
416        let mut stack = vec![node.block_id];
417
418        while let Some(current_block) = stack.pop() {
419            if visited.contains(&current_block) {
420                continue;
421            }
422            visited.insert(current_block);
423
424            let dependencies = unit
425                .edges
426                .get_related(current_block, BlockRelation::DependsOn);
427            for dep_block_id in dependencies {
428                if dep_block_id != node.block_id {
429                    result.insert(GraphNode {
430                        unit_index: node.unit_index,
431                        block_id: dep_block_id,
432                    });
433                    stack.push(dep_block_id);
434                }
435            }
436        }
437
438        result
439    }
440
441    pub fn get_block_depended(&self, node: GraphNode) -> HashSet<GraphNode> {
442        if node.unit_index >= self.units.len() {
443            return HashSet::new();
444        }
445
446        let unit = &self.units[node.unit_index];
447        let mut result = HashSet::new();
448        let mut visited = HashSet::new();
449        let mut stack = vec![node.block_id];
450
451        while let Some(current_block) = stack.pop() {
452            if visited.contains(&current_block) {
453                continue;
454            }
455            visited.insert(current_block);
456
457            let dependencies = unit
458                .edges
459                .get_related(current_block, BlockRelation::DependedBy);
460            for dep_block_id in dependencies {
461                if dep_block_id != node.block_id {
462                    result.insert(GraphNode {
463                        unit_index: node.unit_index,
464                        block_id: dep_block_id,
465                    });
466                    stack.push(dep_block_id);
467                }
468            }
469        }
470
471        result
472    }
473
474    fn render_compact_graph_inner(&self, top_k: Option<usize>) -> String {
475        #[derive(Clone)]
476        struct CompactNode {
477            block_id: BlockId,
478            unit_index: usize,
479            kind: BlockKind,
480            name: String,
481            location: Option<String>,
482        }
483
484        fn byte_to_line(content: &[u8], byte_pos: usize) -> usize {
485            let clamped = byte_pos.min(content.len());
486            let mut line = 1;
487            for &ch in &content[..clamped] {
488                if ch == b'\n' {
489                    line += 1;
490                }
491            }
492            line
493        }
494
495        fn escape_label(input: &str) -> String {
496            input
497                .replace('\\', "\\\\")
498                .replace('"', "\\\"")
499                .replace('\n', "\\n")
500        }
501
502        fn extract_crate_path(location: &str) -> String {
503            // Extract crate/module path from file location
504            // Examples:
505            //   /path/to/crate/src/module/file.rs -> crate/module
506            //   /path/to/crate/src/file.rs -> crate
507            //   C:\path\to\crate\src\module\file.rs -> crate/module
508
509            let path = location.split(':').next().unwrap_or(location);
510            let parts: Vec<&str> = path.split(['/', '\\']).collect();
511
512            // Find the "src" directory index
513            if let Some(src_idx) = parts.iter().position(|&p| p == "src") {
514                if src_idx > 0 {
515                    // Get crate name (directory before src)
516                    let crate_name = parts[src_idx - 1];
517
518                    // Get all module directories after src (excluding filename)
519                    let mut result = crate_name.to_string();
520                    for &part in &parts[src_idx + 1..] {
521                        if !part.is_empty() && !part.contains('.') {
522                            result.push('/');
523                            result.push_str(part);
524                        }
525                    }
526                    return result;
527                }
528            }
529
530            // Fallback: try to extract just the filename without extension
531            if let Some(filename) = parts.last() {
532                if !filename.is_empty() {
533                    return filename.split('.').next().unwrap_or("unknown").to_string();
534                }
535            }
536
537            "unknown".to_string()
538        }
539
540        fn strongly_connected_components(adjacency: &[Vec<usize>]) -> Vec<Vec<usize>> {
541            fn strongconnect(
542                v: usize,
543                index: &mut usize,
544                adjacency: &[Vec<usize>],
545                indices: &mut [Option<usize>],
546                lowlink: &mut [usize],
547                stack: &mut Vec<usize>,
548                on_stack: &mut [bool],
549                components: &mut Vec<Vec<usize>>,
550            ) {
551                indices[v] = Some(*index);
552                lowlink[v] = *index;
553                *index += 1;
554                stack.push(v);
555                on_stack[v] = true;
556
557                for &w in &adjacency[v] {
558                    if indices[w].is_none() {
559                        strongconnect(
560                            w, index, adjacency, indices, lowlink, stack, on_stack, components,
561                        );
562                        lowlink[v] = lowlink[v].min(lowlink[w]);
563                    } else if on_stack[w] {
564                        let w_index = indices[w].unwrap();
565                        lowlink[v] = lowlink[v].min(w_index);
566                    }
567                }
568
569                if lowlink[v] == indices[v].unwrap() {
570                    let mut component = Vec::new();
571                    while let Some(w) = stack.pop() {
572                        on_stack[w] = false;
573                        component.push(w);
574                        if w == v {
575                            break;
576                        }
577                    }
578                    components.push(component);
579                }
580            }
581
582            let mut index = 0;
583            let mut stack = Vec::new();
584            let mut indices = vec![None; adjacency.len()];
585            let mut lowlink = vec![0; adjacency.len()];
586            let mut on_stack = vec![false; adjacency.len()];
587            let mut components = Vec::new();
588
589            for v in 0..adjacency.len() {
590                if indices[v].is_none() {
591                    strongconnect(
592                        v,
593                        &mut index,
594                        adjacency,
595                        &mut indices,
596                        &mut lowlink,
597                        &mut stack,
598                        &mut on_stack,
599                        &mut components,
600                    );
601                }
602            }
603
604            components
605        }
606
607        let interesting_kinds = [BlockKind::Class, BlockKind::Enum];
608
609        let ranked_order = top_k.and_then(|limit| {
610            let mut ranker = PageRanker::new(self);
611            // Apply configured PageRank direction
612            ranker.config_mut().direction = self.pagerank_direction;
613            let mut collected = Vec::new();
614
615            for ranked in ranker.rank() {
616                if interesting_kinds.contains(&ranked.kind) {
617                    collected.push(ranked.node.block_id);
618                }
619                if collected.len() >= limit {
620                    break;
621                }
622            }
623
624            if collected.is_empty() {
625                None
626            } else {
627                Some(collected)
628            }
629        });
630
631        let ranked_filter: Option<HashSet<BlockId>> = ranked_order
632            .as_ref()
633            .map(|ordered| ordered.iter().copied().collect());
634
635        let mut nodes: Vec<CompactNode> = {
636            let block_indexes = self.cc.block_indexes.borrow();
637            block_indexes
638                .block_id_index
639                .iter()
640                .filter_map(|(&block_id, (unit_index, name_opt, kind))| {
641                    if !interesting_kinds.contains(kind) {
642                        return None;
643                    }
644
645                    if let Some(ref ranked_ids) = ranked_filter {
646                        if !ranked_ids.contains(&block_id) {
647                            return None;
648                        }
649                    }
650
651                    let unit = self.cc.compile_unit(*unit_index);
652                    let block = unit.bb(block_id);
653                    let display_name = name_opt
654                        .clone()
655                        .or_else(|| {
656                            block
657                                .base()
658                                .and_then(|base| base.opt_get_name().map(|s| s.to_string()))
659                        })
660                        .unwrap_or_else(|| format!("{}:{}", kind, block_id.as_u32()));
661
662                    let path = std::fs::canonicalize(
663                        unit.file_path()
664                            .or_else(|| unit.file().path())
665                            .unwrap_or("<unknown>"),
666                    )
667                    .map(|p| p.to_string_lossy().to_string())
668                    .unwrap_or_else(|_| {
669                        unit.file_path()
670                            .or_else(|| unit.file().path())
671                            .unwrap_or("<unknown>")
672                            .to_string()
673                    });
674                    let location = block
675                        .opt_node()
676                        .and_then(|node| {
677                            unit.file()
678                                .file
679                                .content
680                                .as_ref()
681                                .map(|bytes| byte_to_line(bytes.as_slice(), node.start_byte()))
682                                .map(|line| format!("{path}:{line}"))
683                        })
684                        .or_else(|| Some(path.clone()));
685
686                    Some(CompactNode {
687                        block_id,
688                        unit_index: *unit_index,
689                        kind: *kind,
690                        name: display_name,
691                        location,
692                    })
693                })
694                .collect()
695        };
696
697        nodes.sort_by(|a, b| a.name.cmp(&b.name));
698
699        if nodes.is_empty() {
700            return "digraph CompactProject {\n}\n".to_string();
701        }
702
703        let mut node_index = HashMap::new();
704        for (idx, node) in nodes.iter().enumerate() {
705            node_index.insert(node.block_id, idx);
706        }
707
708        let mut adjacency: Vec<Vec<usize>> = vec![Vec::new(); nodes.len()];
709        for node in &nodes {
710            let Some(unit_graph) = self.unit_graph(node.unit_index) else {
711                continue;
712            };
713            let from_idx = node_index[&node.block_id];
714
715            let dependencies = unit_graph
716                .edges()
717                .get_related(node.block_id, BlockRelation::DependsOn);
718            let mut targets = dependencies
719                .into_iter()
720                .filter_map(|dep_id| node_index.get(&dep_id).copied())
721                .collect::<Vec<_>>();
722
723            targets.sort_unstable();
724            targets.dedup();
725            adjacency[from_idx] = targets;
726        }
727
728        let mut components: Vec<Vec<usize>> = strongly_connected_components(&adjacency)
729            .into_iter()
730            .filter(|component| !component.is_empty())
731            .collect();
732
733        if components.is_empty() {
734            return "digraph CompactProject {\n}\n".to_string();
735        }
736
737        components.sort_by(|a, b| b.len().cmp(&a.len()));
738
739        let target_limit = ranked_order
740            .as_ref()
741            .map(|order| order.len())
742            .unwrap_or_else(|| nodes.len());
743
744        let mut keep = vec![false; nodes.len()];
745        let mut kept = 0usize;
746
747        for component in components.iter().filter(|component| component.len() > 1) {
748            for &idx in component {
749                if !keep[idx] {
750                    keep[idx] = true;
751                    kept += 1;
752                }
753            }
754            if kept >= target_limit {
755                break;
756            }
757        }
758
759        if kept < target_limit {
760            if let Some(order) = ranked_order.as_ref() {
761                for block_id in order {
762                    if let Some(&idx) = node_index.get(block_id) {
763                        if !keep[idx] {
764                            keep[idx] = true;
765                            kept += 1;
766                            if kept >= target_limit {
767                                break;
768                            }
769                        }
770                    }
771                }
772            } else {
773                for idx in 0..nodes.len() {
774                    if !keep[idx] {
775                        keep[idx] = true;
776                        kept += 1;
777                        if kept >= target_limit {
778                            break;
779                        }
780                    }
781                }
782            }
783        }
784
785        if kept == 0 {
786            if let Some(component) = components.first() {
787                for &idx in component {
788                    keep[idx] = true;
789                }
790                kept = component.len();
791            }
792        }
793
794        let mut filtered_nodes = Vec::with_capacity(kept);
795        let mut remap = HashMap::new();
796        for (old_idx, node) in nodes.into_iter().enumerate() {
797            if keep[old_idx] {
798                let new_idx = filtered_nodes.len();
799                remap.insert(old_idx, new_idx);
800                filtered_nodes.push(node);
801            }
802        }
803
804        let nodes = filtered_nodes;
805
806        let mut edges = BTreeSet::new();
807        for (old_idx, neighbours) in adjacency.iter().enumerate() {
808            let Some(&from_idx) = remap.get(&old_idx) else {
809                continue;
810            };
811            for &target in neighbours {
812                if let Some(&to_idx) = remap.get(&target) {
813                    edges.insert((from_idx, to_idx));
814                }
815            }
816        }
817
818        // Remove isolated nodes (nodes with no incoming or outgoing edges in the filtered graph).
819        let mut in_degree = vec![0usize; nodes.len()];
820        let mut out_degree = vec![0usize; nodes.len()];
821        for &(from, to) in &edges {
822            out_degree[from] += 1;
823            in_degree[to] += 1;
824        }
825
826        let final_keep: Vec<bool> = (0..nodes.len())
827            .map(|idx| in_degree[idx] > 0 || out_degree[idx] > 0)
828            .collect();
829
830        let mut final_nodes = Vec::new();
831        let mut final_remap = HashMap::new();
832        for (old_idx, node) in nodes.into_iter().enumerate() {
833            if final_keep[old_idx] {
834                let new_idx = final_nodes.len();
835                final_remap.insert(old_idx, new_idx);
836                final_nodes.push(node);
837            }
838        }
839
840        let nodes = final_nodes;
841
842        let final_edges: BTreeSet<_> = edges
843            .into_iter()
844            .filter_map(|(from, to)| {
845                let new_from = final_remap.get(&from).copied()?;
846                let new_to = final_remap.get(&to).copied()?;
847                Some((new_from, new_to))
848            })
849            .collect();
850        let edges = final_edges;
851
852        // Extract crate/module paths from node locations
853        let mut crate_groups: HashMap<String, Vec<usize>> = HashMap::new();
854        for (idx, node) in nodes.iter().enumerate() {
855            if let Some(location) = &node.location {
856                // Extract crate path from location
857                // Format: /path/to/crate/src/module/file.rs -> crate/module
858                let crate_path = extract_crate_path(location);
859                crate_groups
860                    .entry(crate_path)
861                    .or_insert_with(Vec::new)
862                    .push(idx);
863            }
864        }
865
866        let mut output = String::from("digraph CompactProject {\n");
867
868        // Generate subgraphs for each crate/module group
869        let mut subgraph_counter = 0;
870        for (crate_path, node_indices) in crate_groups.iter() {
871            output.push_str(&format!("  subgraph cluster_{} {{\n", subgraph_counter));
872            output.push_str(&format!("    label=\"{}\";\n", escape_label(crate_path)));
873            output.push_str("    style=filled;\n");
874            output.push_str("    color=lightgrey;\n");
875
876            for &idx in node_indices {
877                let node = &nodes[idx];
878                let mut parts = vec![node.name.clone(), format!("({})", node.kind)];
879                if let Some(location) = &node.location {
880                    parts.push(location.clone());
881                }
882                let label = parts
883                    .into_iter()
884                    .map(|part| escape_label(&part))
885                    .collect::<Vec<_>>()
886                    .join("\\n");
887                output.push_str(&format!("    n{} [label=\"{}\"];\n", idx, label));
888            }
889
890            output.push_str("  }\n");
891            subgraph_counter += 1;
892        }
893
894        for (from, to) in edges {
895            output.push_str(&format!("  n{} -> n{};\n", from, to));
896        }
897        output.push_str("}\n");
898        output
899    }
900
901    fn add_cross_edge(
902        &self,
903        from_idx: usize,
904        to_idx: usize,
905        from_block: BlockId,
906        to_block: BlockId,
907    ) {
908        if from_idx == to_idx {
909            let unit = &self.units[from_idx];
910            if !unit
911                .edges
912                .has_relation(from_block, BlockRelation::DependsOn, to_block)
913            {
914                unit.edges.add_relation(from_block, to_block);
915            }
916            return;
917        }
918
919        let from_unit = &self.units[from_idx];
920        from_unit
921            .edges
922            .add_relation_if_not_exists(from_block, BlockRelation::DependsOn, to_block);
923
924        let to_unit = &self.units[to_idx];
925        to_unit
926            .edges
927            .add_relation_if_not_exists(to_block, BlockRelation::DependedBy, from_block);
928    }
929}
930
931#[derive(Debug)]
932struct GraphBuilder<'tcx, Language> {
933    unit: CompileUnit<'tcx>,
934    root: Option<BlockId>,
935    children_stack: Vec<Vec<BlockId>>,
936    config: GraphBuildConfig,
937    _marker: PhantomData<Language>,
938}
939
940impl<'tcx, Language: LanguageTrait> GraphBuilder<'tcx, Language> {
941    fn new(unit: CompileUnit<'tcx>, config: GraphBuildConfig) -> Self {
942        Self {
943            unit,
944            root: None,
945            children_stack: Vec::new(),
946            config,
947            _marker: PhantomData,
948        }
949    }
950
951    fn next_id(&self) -> BlockId {
952        self.unit.reserve_block_id()
953    }
954
955    fn create_block(
956        &self,
957        id: BlockId,
958        node: HirNode<'tcx>,
959        kind: BlockKind,
960        parent: Option<BlockId>,
961        children: Vec<BlockId>,
962    ) -> BasicBlock<'tcx> {
963        let arena = &self.unit.cc.block_arena;
964        match kind {
965            BlockKind::Root => {
966                // Extract file_name from HirFile node if available
967                let file_name = node.as_file().map(|file| file.file_path.clone());
968                let block = BlockRoot::from_hir(id, node, parent, children, file_name);
969                BasicBlock::Root(arena.alloc(block))
970            }
971            BlockKind::Func => {
972                let block = BlockFunc::from_hir(id, node, parent, children);
973                BasicBlock::Func(arena.alloc(block))
974            }
975            BlockKind::Class => {
976                let block = BlockClass::from_hir(id, node, parent, children);
977                BasicBlock::Class(arena.alloc(block))
978            }
979            BlockKind::Stmt => {
980                let stmt = BlockStmt::from_hir(id, node, parent, children);
981                BasicBlock::Stmt(arena.alloc(stmt))
982            }
983            BlockKind::Call => {
984                let stmt = BlockCall::from_hir(id, node, parent, children);
985                BasicBlock::Call(arena.alloc(stmt))
986            }
987            BlockKind::Enum => {
988                let enum_ty = BlockEnum::from_hir(id, node, parent, children);
989                BasicBlock::Enum(arena.alloc(enum_ty))
990            }
991            BlockKind::Const => {
992                let stmt = BlockConst::from_hir(id, node, parent, children);
993                BasicBlock::Const(arena.alloc(stmt))
994            }
995            BlockKind::Impl => {
996                let block = BlockImpl::from_hir(id, node, parent, children);
997                BasicBlock::Impl(arena.alloc(block))
998            }
999            BlockKind::Field => {
1000                let block = BlockField::from_hir(id, node, parent, children);
1001                BasicBlock::Field(arena.alloc(block))
1002            }
1003            _ => {
1004                panic!("unknown block kind: {}", kind)
1005            }
1006        }
1007    }
1008
1009    fn build_edges(&self, node: HirNode<'tcx>) -> BlockRelationMap {
1010        let edges = BlockRelationMap::default();
1011        let mut visited = HashSet::new();
1012        let mut unresolved = HashSet::new();
1013        self.collect_edges(node, &edges, &mut visited, &mut unresolved);
1014        edges
1015    }
1016
1017    fn collect_edges(
1018        &self,
1019        node: HirNode<'tcx>,
1020        edges: &BlockRelationMap,
1021        visited: &mut HashSet<SymId>,
1022        unresolved: &mut HashSet<SymId>,
1023    ) {
1024        // Try to process symbol dependencies for this node
1025        if let Some(scope) = self.unit.opt_get_scope(node.hir_id()) {
1026            if let Some(symbol) = scope.symbol() {
1027                self.process_symbol(symbol, edges, visited, unresolved);
1028            }
1029        }
1030
1031        // Recurse into children
1032        for &child_id in node.children() {
1033            let child = self.unit.hir_node(child_id);
1034            self.collect_edges(child, edges, visited, unresolved);
1035        }
1036    }
1037
1038    fn process_symbol(
1039        &self,
1040        symbol: &'tcx Symbol,
1041        edges: &BlockRelationMap,
1042        visited: &mut HashSet<SymId>,
1043        unresolved: &mut HashSet<SymId>,
1044    ) {
1045        let symbol_id = symbol.id;
1046
1047        // Avoid processing the same symbol twice
1048        if !visited.insert(symbol_id) {
1049            return;
1050        }
1051
1052        let Some(from_block) = symbol.block_id() else {
1053            return;
1054        };
1055
1056        for &dep_id in symbol.depends.borrow().iter() {
1057            self.link_dependency(dep_id, from_block, edges, unresolved);
1058        }
1059    }
1060    fn link_dependency(
1061        &self,
1062        dep_id: SymId,
1063        from_block: BlockId,
1064        edges: &BlockRelationMap,
1065        unresolved: &mut HashSet<SymId>,
1066    ) {
1067        // If target symbol exists and has a block, add the dependency edge
1068        if let Some(target_symbol) = self.unit.opt_get_symbol(dep_id) {
1069            if let Some(to_block) = target_symbol.block_id() {
1070                if !edges.has_relation(from_block, BlockRelation::DependsOn, to_block) {
1071                    edges.add_relation(from_block, to_block);
1072                }
1073                let target_unit = target_symbol.unit_index();
1074                if target_unit.is_some()
1075                    && target_unit != Some(self.unit.index)
1076                    && unresolved.insert(dep_id)
1077                {
1078                    self.unit.add_unresolved_symbol(target_symbol);
1079                }
1080                return;
1081            }
1082
1083            // Target symbol exists but block not yet known
1084            if unresolved.insert(dep_id) {
1085                self.unit.add_unresolved_symbol(target_symbol);
1086            }
1087            return;
1088        }
1089
1090        // Target symbol not found at all
1091        unresolved.insert(dep_id);
1092    }
1093
1094    fn build_block(&mut self, node: HirNode<'tcx>, parent: BlockId, recursive: bool) {
1095        let id = self.next_id();
1096        let block_kind = Language::block_kind(node.kind_id());
1097        assert_ne!(block_kind, BlockKind::Undefined);
1098
1099        if self.root.is_none() {
1100            self.root = Some(id);
1101        }
1102
1103        let children = if recursive {
1104            self.children_stack.push(Vec::new());
1105            self.visit_children(node, id);
1106
1107            self.children_stack.pop().unwrap()
1108        } else {
1109            Vec::new()
1110        };
1111
1112        let block = self.create_block(id, node, block_kind, Some(parent), children);
1113        if let Some(scope) = self.unit.opt_get_scope(node.hir_id()) {
1114            if let Some(symbol) = scope.symbol() {
1115                // Only set the block ID if it hasn't been set before
1116                // This prevents impl blocks from overwriting struct block IDs
1117                if symbol.block_id().is_none() {
1118                    symbol.set_block_id(Some(id));
1119                }
1120            }
1121        }
1122        self.unit.insert_block(id, block, parent);
1123
1124        if let Some(children) = self.children_stack.last_mut() {
1125            children.push(id);
1126        }
1127    }
1128}
1129
1130impl<'tcx, Language: LanguageTrait> HirVisitor<'tcx> for GraphBuilder<'tcx, Language> {
1131    fn unit(&self) -> CompileUnit<'tcx> {
1132        self.unit
1133    }
1134
1135    fn visit_file(&mut self, node: HirNode<'tcx>, parent: BlockId) {
1136        self.children_stack.push(Vec::new());
1137        self.build_block(node, parent, true);
1138    }
1139
1140    fn visit_internal(&mut self, node: HirNode<'tcx>, parent: BlockId) {
1141        let kind = Language::block_kind(node.kind_id());
1142        if self.config.compact {
1143            if kind == BlockKind::Root {
1144                self.build_block(node, parent, true);
1145            } else {
1146                self.visit_children(node, parent);
1147            }
1148            return;
1149        }
1150
1151        // Non-compact mode: process all defined kinds
1152        if kind != BlockKind::Undefined {
1153            self.build_block(node, parent, false);
1154        } else {
1155            self.visit_children(node, parent);
1156        }
1157    }
1158
1159    fn visit_scope(&mut self, node: HirNode<'tcx>, parent: BlockId) {
1160        let kind = Language::block_kind(node.kind_id());
1161        if self.config.compact {
1162            // In compact mode, only create blocks for major constructs (Class, Enum, Impl)
1163            // Skip functions, fields, and scopes to reduce graph size
1164            match kind {
1165                BlockKind::Class | BlockKind::Enum => {
1166                    // Build with recursion enabled to capture nested major constructs
1167                    self.build_block(node, parent, true);
1168                }
1169                // Skip all other scopes - don't recurse
1170                _ => {
1171                    // Stop here, do not visit children
1172                    // self.visit_children(node, parent);
1173                }
1174            }
1175            return;
1176        }
1177
1178        // Non-compact mode: build blocks for all major constructs
1179        match kind {
1180            BlockKind::Func
1181            | BlockKind::Class
1182            | BlockKind::Enum
1183            | BlockKind::Const
1184            | BlockKind::Impl
1185            | BlockKind::Field => self.build_block(node, parent, true),
1186            _ => self.visit_children(node, parent),
1187        }
1188    }
1189}
1190
1191pub fn build_llmcc_graph_with_config<'tcx, L: LanguageTrait>(
1192    unit: CompileUnit<'tcx>,
1193    unit_index: usize,
1194    config: GraphBuildConfig,
1195) -> Result<UnitGraph, Box<dyn std::error::Error>> {
1196    let root_hir = unit
1197        .file_start_hir_id()
1198        .ok_or("missing file start HIR id")?;
1199    let mut builder = GraphBuilder::<L>::new(unit, config);
1200    let root_node = unit.hir_node(root_hir);
1201    builder.visit_node(root_node, BlockId::ROOT_PARENT);
1202
1203    let root_block = builder.root;
1204    let root_block = root_block.ok_or("graph builder produced no root")?;
1205    let edges = builder.build_edges(root_node);
1206    Ok(UnitGraph::new(unit_index, root_block, edges))
1207}
1208
1209pub fn build_llmcc_graph<'tcx, L: LanguageTrait>(
1210    unit: CompileUnit<'tcx>,
1211    unit_index: usize,
1212) -> Result<UnitGraph, Box<dyn std::error::Error>> {
1213    build_llmcc_graph_with_config::<L>(unit, unit_index, GraphBuildConfig::default())
1214}