llmcc_core/
graph_builder.rs

1use std::collections::{BTreeSet, HashMap, HashSet};
2use std::marker::PhantomData;
3
4pub use crate::block::{BasicBlock, BlockId, BlockKind, BlockRelation};
5use crate::block::{
6    BlockCall, BlockClass, BlockConst, BlockEnum, BlockField, BlockFunc, BlockImpl, BlockRoot,
7    BlockStmt,
8};
9use crate::block_rel::BlockRelationMap;
10use crate::context::{CompileCtxt, CompileUnit};
11use crate::ir::HirNode;
12use crate::lang_def::LanguageTrait;
13use crate::pagerank::{PageRankDirection, PageRanker};
14use crate::symbol::{SymId, Symbol};
15use crate::visit::HirVisitor;
16
17#[derive(Debug, Clone)]
18pub struct UnitGraph {
19    /// Compile unit this graph belongs to
20    unit_index: usize,
21    /// Root block ID of this unit
22    root: BlockId,
23    /// Edges of this graph unit
24    edges: BlockRelationMap,
25}
26
27impl UnitGraph {
28    pub fn new(unit_index: usize, root: BlockId, edges: BlockRelationMap) -> Self {
29        Self {
30            unit_index,
31            root,
32            edges,
33        }
34    }
35
36    pub fn unit_index(&self) -> usize {
37        self.unit_index
38    }
39
40    pub fn root(&self) -> BlockId {
41        self.root
42    }
43
44    pub fn edges(&self) -> &BlockRelationMap {
45        &self.edges
46    }
47}
48
49#[derive(Debug, Clone, Copy)]
50pub struct GraphBuildConfig {
51    pub compact: bool,
52}
53
54impl GraphBuildConfig {
55    pub fn compact() -> Self {
56        Self { compact: true }
57    }
58}
59
60impl Default for GraphBuildConfig {
61    fn default() -> Self {
62        Self { compact: false }
63    }
64}
65
66#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
67pub struct GraphNode {
68    pub unit_index: usize,
69    pub block_id: BlockId,
70}
71
72/// ProjectGraph represents a complete compilation project with all units and their inter-dependencies.
73///
74/// # Overview
75/// ProjectGraph maintains a collection of per-unit compilation graphs (UnitGraph) and facilitates
76/// cross-unit dependency resolution. It provides efficient multi-dimensional indexing for block
77/// lookups by name, kind, unit, and ID, enabling quick context retrieval for LLM consumption.
78///
79/// # Architecture
80/// The graph consists of:
81/// - **UnitGraphs**: One per compilation unit (file), containing blocks and intra-unit relations
82/// - **Block Indexes**: Multi-dimensional indexes via BlockIndexMaps for O(1) to O(log n) lookups
83/// - **Cross-unit Links**: Dependencies tracked between blocks across different units
84///
85/// # Primary Use Cases
86/// 1. **Symbol Resolution**: Find blocks by name across the entire project
87/// 2. **Context Gathering**: Collect all related blocks for code analysis
88/// 3. **LLM Serialization**: Export graph as text or JSON for LLM model consumption
89/// 4. **Dependency Analysis**: Traverse dependency graphs to understand block relationships
90///
91#[derive(Debug)]
92pub struct ProjectGraph<'tcx> {
93    /// Reference to the compilation context containing all symbols, HIR nodes, and blocks
94    pub cc: &'tcx CompileCtxt<'tcx>,
95    /// Per-unit graphs containing blocks and intra-unit relations
96    units: Vec<UnitGraph>,
97    compact_rank_limit: Option<usize>,
98    pagerank_direction: PageRankDirection,
99}
100
101impl<'tcx> ProjectGraph<'tcx> {
102    pub fn new(cc: &'tcx CompileCtxt<'tcx>) -> Self {
103        Self {
104            cc,
105            units: Vec::new(),
106            compact_rank_limit: None,
107            pagerank_direction: PageRankDirection::default(),
108        }
109    }
110
111    pub fn add_child(&mut self, graph: UnitGraph) {
112        self.units.push(graph);
113    }
114
115    /// Configure the number of PageRank-filtered nodes retained when rendering compact graphs.
116    pub fn set_compact_rank_limit(&mut self, limit: Option<usize>) {
117        self.compact_rank_limit = match limit {
118            Some(0) => None,
119            other => other,
120        };
121    }
122
123    /// Configure the PageRank direction for ranking nodes.
124    pub fn set_pagerank_direction(&mut self, direction: PageRankDirection) {
125        self.pagerank_direction = direction;
126    }
127
128    pub fn link_units(&mut self) {
129        if self.units.is_empty() {
130            return;
131        }
132
133        let mut unresolved = self.cc.unresolve_symbols.borrow_mut();
134
135        unresolved.retain(|symbol_ref| {
136            let target = *symbol_ref;
137            let Some(target_block) = target.block_id() else {
138                return false;
139            };
140
141            let dependents: Vec<SymId> = target.depended.borrow().clone();
142            for dependent_id in dependents {
143                let Some(source_symbol) = self.cc.opt_get_symbol(dependent_id) else {
144                    continue;
145                };
146                let Some(from_block) = source_symbol.block_id() else {
147                    continue;
148                };
149                self.add_cross_edge(
150                    source_symbol.unit_index().unwrap(),
151                    target.unit_index().unwrap(),
152                    from_block,
153                    target_block,
154                );
155            }
156
157            false
158        });
159    }
160
161    pub fn units(&self) -> &[UnitGraph] {
162        &self.units
163    }
164
165    pub fn unit_graph(&self, unit_index: usize) -> Option<&UnitGraph> {
166        self.units
167            .iter()
168            .find(|unit| unit.unit_index() == unit_index)
169    }
170
171    pub fn block_by_name(&self, name: &str) -> Option<GraphNode> {
172        let block_indexes = self.cc.block_indexes.borrow();
173        let matches = block_indexes.find_by_name(name);
174
175        matches.first().map(|(unit_index, _, block_id)| GraphNode {
176            unit_index: *unit_index,
177            block_id: *block_id,
178        })
179    }
180
181    pub fn blocks_by_name(&self, name: &str) -> Vec<GraphNode> {
182        let block_indexes = self.cc.block_indexes.borrow();
183        let matches = block_indexes.find_by_name(name);
184
185        matches
186            .into_iter()
187            .map(|(unit_index, _, block_id)| GraphNode {
188                unit_index,
189                block_id,
190            })
191            .collect()
192    }
193
194    pub fn render_compact_graph(&self) -> String {
195        self.render_compact_graph_inner(self.compact_rank_limit)
196    }
197
198    pub fn block_by_name_in(&self, unit_index: usize, name: &str) -> Option<GraphNode> {
199        let block_indexes = self.cc.block_indexes.borrow();
200        let matches = block_indexes.find_by_name(name);
201
202        matches
203            .iter()
204            .find(|(u, _, _)| *u == unit_index)
205            .map(|(_, _, block_id)| GraphNode {
206                unit_index,
207                block_id: *block_id,
208            })
209    }
210
211    pub fn blocks_by_kind(&self, block_kind: BlockKind) -> Vec<GraphNode> {
212        let block_indexes = self.cc.block_indexes.borrow();
213        let matches = block_indexes.find_by_kind(block_kind);
214
215        matches
216            .into_iter()
217            .map(|(unit_index, _, block_id)| GraphNode {
218                unit_index,
219                block_id,
220            })
221            .collect()
222    }
223
224    pub fn blocks_by_kind_in(&self, block_kind: BlockKind, unit_index: usize) -> Vec<GraphNode> {
225        let block_indexes = self.cc.block_indexes.borrow();
226        let block_ids = block_indexes.find_by_kind_and_unit(block_kind, unit_index);
227
228        block_ids
229            .into_iter()
230            .map(|block_id| GraphNode {
231                unit_index,
232                block_id,
233            })
234            .collect()
235    }
236
237    pub fn blocks_in(&self, unit_index: usize) -> Vec<GraphNode> {
238        let block_indexes = self.cc.block_indexes.borrow();
239        let matches = block_indexes.find_by_unit(unit_index);
240
241        matches
242            .into_iter()
243            .map(|(_, _, block_id)| GraphNode {
244                unit_index,
245                block_id,
246            })
247            .collect()
248    }
249
250    pub fn block_info(&self, block_id: BlockId) -> Option<(usize, Option<String>, BlockKind)> {
251        let block_indexes = self.cc.block_indexes.borrow();
252        block_indexes.get_block_info(block_id)
253    }
254
255    pub fn find_related_blocks(
256        &self,
257        node: GraphNode,
258        relations: Vec<BlockRelation>,
259    ) -> Vec<GraphNode> {
260        if node.unit_index >= self.units.len() {
261            return Vec::new();
262        }
263
264        let unit = &self.units[node.unit_index];
265        let mut result = Vec::new();
266
267        for relation in relations {
268            match relation {
269                BlockRelation::DependsOn => {
270                    // Get all blocks that this block depends on
271                    let dependencies = unit
272                        .edges
273                        .get_related(node.block_id, BlockRelation::DependsOn);
274                    for dep_block_id in dependencies {
275                        result.push(GraphNode {
276                            unit_index: node.unit_index,
277                            block_id: dep_block_id,
278                        });
279                    }
280                }
281                BlockRelation::DependedBy => {
282                    let mut seen = HashSet::new();
283
284                    // Direct dependents tracked on this unit (covers cross-unit edges too)
285                    let dependents = unit
286                        .edges
287                        .get_related(node.block_id, BlockRelation::DependedBy);
288                    if !dependents.is_empty() {
289                        let indexes = self.cc.block_indexes.borrow();
290                        for dep_block_id in dependents {
291                            if !seen.insert(dep_block_id) {
292                                continue;
293                            }
294                            if let Some((dep_unit_idx, _, _)) = indexes.get_block_info(dep_block_id)
295                            {
296                                result.push(GraphNode {
297                                    unit_index: dep_unit_idx,
298                                    block_id: dep_block_id,
299                                });
300                            } else {
301                                result.push(GraphNode {
302                                    unit_index: node.unit_index,
303                                    block_id: dep_block_id,
304                                });
305                            }
306                        }
307                    }
308
309                    // Fallback: scan current unit for reverse DependsOn edges
310                    let local_dependents = unit
311                        .edges
312                        .find_reverse_relations(node.block_id, BlockRelation::DependsOn);
313                    for dep_block_id in local_dependents {
314                        if !seen.insert(dep_block_id) {
315                            continue;
316                        }
317                        result.push(GraphNode {
318                            unit_index: node.unit_index,
319                            block_id: dep_block_id,
320                        });
321                    }
322                }
323                BlockRelation::Unknown => {
324                    // Skip unknown relations
325                }
326            }
327        }
328
329        result
330    }
331
332    pub fn find_dpends_blocks_recursive(&self, node: GraphNode) -> HashSet<GraphNode> {
333        let mut visited = HashSet::new();
334        let mut stack = vec![node];
335        let relations = vec![BlockRelation::DependsOn];
336
337        while let Some(current) = stack.pop() {
338            if visited.contains(&current) {
339                continue;
340            }
341            visited.insert(current);
342
343            for related in self.find_related_blocks(current, relations.clone()) {
344                if !visited.contains(&related) {
345                    stack.push(related);
346                }
347            }
348        }
349
350        visited.remove(&node);
351        visited
352    }
353
354    pub fn traverse_bfs<F>(&self, start: GraphNode, mut callback: F)
355    where
356        F: FnMut(GraphNode),
357    {
358        let mut visited = HashSet::new();
359        let mut queue = vec![start];
360        let relations = vec![BlockRelation::DependsOn, BlockRelation::DependedBy];
361
362        while !queue.is_empty() {
363            let current = queue.remove(0);
364            if visited.contains(&current) {
365                continue;
366            }
367            visited.insert(current);
368            callback(current);
369
370            for related in self.find_related_blocks(current, relations.clone()) {
371                if !visited.contains(&related) {
372                    queue.push(related);
373                }
374            }
375        }
376    }
377
378    pub fn traverse_dfs<F>(&self, start: GraphNode, mut callback: F)
379    where
380        F: FnMut(GraphNode),
381    {
382        let mut visited = HashSet::new();
383        self.traverse_dfs_impl(start, &mut visited, &mut callback);
384    }
385
386    fn traverse_dfs_impl<F>(
387        &self,
388        node: GraphNode,
389        visited: &mut HashSet<GraphNode>,
390        callback: &mut F,
391    ) where
392        F: FnMut(GraphNode),
393    {
394        if visited.contains(&node) {
395            return;
396        }
397        visited.insert(node);
398        callback(node);
399
400        let relations = vec![BlockRelation::DependsOn, BlockRelation::DependedBy];
401        for related in self.find_related_blocks(node, relations) {
402            if !visited.contains(&related) {
403                self.traverse_dfs_impl(related, visited, callback);
404            }
405        }
406    }
407
408    pub fn get_block_depends(&self, node: GraphNode) -> HashSet<GraphNode> {
409        if node.unit_index >= self.units.len() {
410            return HashSet::new();
411        }
412
413        let unit = &self.units[node.unit_index];
414        let mut result = HashSet::new();
415        let mut visited = HashSet::new();
416        let mut stack = vec![node.block_id];
417
418        while let Some(current_block) = stack.pop() {
419            if visited.contains(&current_block) {
420                continue;
421            }
422            visited.insert(current_block);
423
424            let dependencies = unit
425                .edges
426                .get_related(current_block, BlockRelation::DependsOn);
427            for dep_block_id in dependencies {
428                if dep_block_id != node.block_id {
429                    result.insert(GraphNode {
430                        unit_index: node.unit_index,
431                        block_id: dep_block_id,
432                    });
433                    stack.push(dep_block_id);
434                }
435            }
436        }
437
438        result
439    }
440
441    pub fn get_block_depended(&self, node: GraphNode) -> HashSet<GraphNode> {
442        if node.unit_index >= self.units.len() {
443            return HashSet::new();
444        }
445
446        let unit = &self.units[node.unit_index];
447        let mut result = HashSet::new();
448        let mut visited = HashSet::new();
449        let mut stack = vec![node.block_id];
450
451        while let Some(current_block) = stack.pop() {
452            if visited.contains(&current_block) {
453                continue;
454            }
455            visited.insert(current_block);
456
457            let dependencies = unit
458                .edges
459                .get_related(current_block, BlockRelation::DependedBy);
460            for dep_block_id in dependencies {
461                if dep_block_id != node.block_id {
462                    result.insert(GraphNode {
463                        unit_index: node.unit_index,
464                        block_id: dep_block_id,
465                    });
466                    stack.push(dep_block_id);
467                }
468            }
469        }
470
471        result
472    }
473
474    fn render_compact_graph_inner(&self, top_k: Option<usize>) -> String {
475        #[derive(Clone)]
476        struct CompactNode {
477            block_id: BlockId,
478            unit_index: usize,
479            kind: BlockKind,
480            name: String,
481            location: Option<String>,
482        }
483
484        fn byte_to_line(content: &[u8], byte_pos: usize) -> usize {
485            let clamped = byte_pos.min(content.len());
486            let mut line = 1;
487            for &ch in &content[..clamped] {
488                if ch == b'\n' {
489                    line += 1;
490                }
491            }
492            line
493        }
494
495        fn escape_label(input: &str) -> String {
496            input
497                .replace('\\', "\\\\")
498                .replace('"', "\\\"")
499                .replace('\n', "\\n")
500        }
501
502        fn extract_crate_path(location: &str) -> String {
503            // Extract crate path from file location so all nodes from the same crate cluster together.
504            // Examples:
505            //   /path/to/crate/src/module/file.rs -> crate
506            //   C:\path\to\crate\src\module\file.rs -> crate
507
508            let path = location.split(':').next().unwrap_or(location);
509            let parts: Vec<&str> = path.split(['/', '\\']).collect();
510
511            // Find the "src" directory index
512            if let Some(src_idx) = parts.iter().position(|&p| p == "src") {
513                if src_idx > 0 {
514                    // The directory before `src` is the crate root.
515                    return parts[src_idx - 1].to_string();
516                }
517            }
518
519            // Fallback: try to extract just the filename without extension
520            if let Some(filename) = parts.last() {
521                if !filename.is_empty() {
522                    return filename.split('.').next().unwrap_or("unknown").to_string();
523                }
524            }
525
526            "unknown".to_string()
527        }
528
529        fn strongly_connected_components(adjacency: &[Vec<usize>]) -> Vec<Vec<usize>> {
530            fn strongconnect(
531                v: usize,
532                index: &mut usize,
533                adjacency: &[Vec<usize>],
534                indices: &mut [Option<usize>],
535                lowlink: &mut [usize],
536                stack: &mut Vec<usize>,
537                on_stack: &mut [bool],
538                components: &mut Vec<Vec<usize>>,
539            ) {
540                indices[v] = Some(*index);
541                lowlink[v] = *index;
542                *index += 1;
543                stack.push(v);
544                on_stack[v] = true;
545
546                for &w in &adjacency[v] {
547                    if indices[w].is_none() {
548                        strongconnect(
549                            w, index, adjacency, indices, lowlink, stack, on_stack, components,
550                        );
551                        lowlink[v] = lowlink[v].min(lowlink[w]);
552                    } else if on_stack[w] {
553                        let w_index = indices[w].unwrap();
554                        lowlink[v] = lowlink[v].min(w_index);
555                    }
556                }
557
558                if lowlink[v] == indices[v].unwrap() {
559                    let mut component = Vec::new();
560                    while let Some(w) = stack.pop() {
561                        on_stack[w] = false;
562                        component.push(w);
563                        if w == v {
564                            break;
565                        }
566                    }
567                    components.push(component);
568                }
569            }
570
571            let mut index = 0;
572            let mut stack = Vec::new();
573            let mut indices = vec![None; adjacency.len()];
574            let mut lowlink = vec![0; adjacency.len()];
575            let mut on_stack = vec![false; adjacency.len()];
576            let mut components = Vec::new();
577
578            for v in 0..adjacency.len() {
579                if indices[v].is_none() {
580                    strongconnect(
581                        v,
582                        &mut index,
583                        adjacency,
584                        &mut indices,
585                        &mut lowlink,
586                        &mut stack,
587                        &mut on_stack,
588                        &mut components,
589                    );
590                }
591            }
592
593            components
594        }
595
596        let interesting_kinds = [BlockKind::Class, BlockKind::Enum];
597
598        let ranked_order = top_k.and_then(|limit| {
599            let mut ranker = PageRanker::new(self);
600            // Apply configured PageRank direction
601            ranker.config_mut().direction = self.pagerank_direction;
602            let mut collected = Vec::new();
603
604            for ranked in ranker.rank() {
605                if interesting_kinds.contains(&ranked.kind) {
606                    collected.push(ranked.node.block_id);
607                }
608                if collected.len() >= limit {
609                    break;
610                }
611            }
612
613            if collected.is_empty() {
614                None
615            } else {
616                Some(collected)
617            }
618        });
619
620        let ranked_filter: Option<HashSet<BlockId>> = ranked_order
621            .as_ref()
622            .map(|ordered| ordered.iter().copied().collect());
623
624        let mut nodes: Vec<CompactNode> = {
625            let block_indexes = self.cc.block_indexes.borrow();
626            block_indexes
627                .block_id_index
628                .iter()
629                .filter_map(|(&block_id, (unit_index, name_opt, kind))| {
630                    if !interesting_kinds.contains(kind) {
631                        return None;
632                    }
633
634                    if let Some(ref ranked_ids) = ranked_filter {
635                        if !ranked_ids.contains(&block_id) {
636                            return None;
637                        }
638                    }
639
640                    let unit = self.cc.compile_unit(*unit_index);
641                    let block = unit.bb(block_id);
642                    let display_name = name_opt
643                        .clone()
644                        .or_else(|| {
645                            block
646                                .base()
647                                .and_then(|base| base.opt_get_name().map(|s| s.to_string()))
648                        })
649                        .unwrap_or_else(|| format!("{}:{}", kind, block_id.as_u32()));
650
651                    let path = std::fs::canonicalize(
652                        unit.file_path()
653                            .or_else(|| unit.file().path())
654                            .unwrap_or("<unknown>"),
655                    )
656                    .map(|p| p.to_string_lossy().to_string())
657                    .unwrap_or_else(|_| {
658                        unit.file_path()
659                            .or_else(|| unit.file().path())
660                            .unwrap_or("<unknown>")
661                            .to_string()
662                    });
663                    let location = block
664                        .opt_node()
665                        .and_then(|node| {
666                            unit.file()
667                                .file
668                                .content
669                                .as_ref()
670                                .map(|bytes| byte_to_line(bytes.as_slice(), node.start_byte()))
671                                .map(|line| format!("{path}:{line}"))
672                        })
673                        .or_else(|| Some(path.clone()));
674
675                    Some(CompactNode {
676                        block_id,
677                        unit_index: *unit_index,
678                        kind: *kind,
679                        name: display_name,
680                        location,
681                    })
682                })
683                .collect()
684        };
685
686        nodes.sort_by(|a, b| a.name.cmp(&b.name));
687
688        if nodes.is_empty() {
689            return "digraph CompactProject {\n}\n".to_string();
690        }
691
692        let mut node_index = HashMap::new();
693        for (idx, node) in nodes.iter().enumerate() {
694            node_index.insert(node.block_id, idx);
695        }
696
697        let mut adjacency: Vec<Vec<usize>> = vec![Vec::new(); nodes.len()];
698        for node in &nodes {
699            let Some(unit_graph) = self.unit_graph(node.unit_index) else {
700                continue;
701            };
702            let from_idx = node_index[&node.block_id];
703
704            let dependencies = unit_graph
705                .edges()
706                .get_related(node.block_id, BlockRelation::DependsOn);
707            let mut targets = dependencies
708                .into_iter()
709                .filter_map(|dep_id| node_index.get(&dep_id).copied())
710                .collect::<Vec<_>>();
711
712            targets.sort_unstable();
713            targets.dedup();
714            adjacency[from_idx] = targets;
715        }
716
717        let mut components: Vec<Vec<usize>> = strongly_connected_components(&adjacency)
718            .into_iter()
719            .filter(|component| !component.is_empty())
720            .collect();
721
722        if components.is_empty() {
723            return "digraph CompactProject {\n}\n".to_string();
724        }
725
726        components.sort_by(|a, b| b.len().cmp(&a.len()));
727
728        let target_limit = ranked_order
729            .as_ref()
730            .map(|order| order.len())
731            .unwrap_or_else(|| nodes.len());
732
733        let mut keep = vec![false; nodes.len()];
734        let mut kept = 0usize;
735
736        for component in components.iter().filter(|component| component.len() > 1) {
737            for &idx in component {
738                if !keep[idx] {
739                    keep[idx] = true;
740                    kept += 1;
741                }
742            }
743            if kept >= target_limit {
744                break;
745            }
746        }
747
748        if kept < target_limit {
749            if let Some(order) = ranked_order.as_ref() {
750                for block_id in order {
751                    if let Some(&idx) = node_index.get(block_id) {
752                        if !keep[idx] {
753                            keep[idx] = true;
754                            kept += 1;
755                            if kept >= target_limit {
756                                break;
757                            }
758                        }
759                    }
760                }
761            } else {
762                for idx in 0..nodes.len() {
763                    if !keep[idx] {
764                        keep[idx] = true;
765                        kept += 1;
766                        if kept >= target_limit {
767                            break;
768                        }
769                    }
770                }
771            }
772        }
773
774        if kept == 0 {
775            if let Some(component) = components.first() {
776                for &idx in component {
777                    keep[idx] = true;
778                }
779                kept = component.len();
780            }
781        }
782
783        let mut filtered_nodes = Vec::with_capacity(kept);
784        let mut remap = HashMap::new();
785        for (old_idx, node) in nodes.into_iter().enumerate() {
786            if keep[old_idx] {
787                let new_idx = filtered_nodes.len();
788                remap.insert(old_idx, new_idx);
789                filtered_nodes.push(node);
790            }
791        }
792
793        let nodes = filtered_nodes;
794
795        let mut edges = BTreeSet::new();
796        for (old_idx, neighbours) in adjacency.iter().enumerate() {
797            let Some(&from_idx) = remap.get(&old_idx) else {
798                continue;
799            };
800            for &target in neighbours {
801                if let Some(&to_idx) = remap.get(&target) {
802                    edges.insert((from_idx, to_idx));
803                }
804            }
805        }
806
807        // Remove isolated nodes (nodes with no incoming or outgoing edges in the filtered graph).
808        let mut in_degree = vec![0usize; nodes.len()];
809        let mut out_degree = vec![0usize; nodes.len()];
810        for &(from, to) in &edges {
811            out_degree[from] += 1;
812            in_degree[to] += 1;
813        }
814
815        let final_keep: Vec<bool> = (0..nodes.len())
816            .map(|idx| in_degree[idx] > 0 || out_degree[idx] > 0)
817            .collect();
818
819        let mut final_nodes = Vec::new();
820        let mut final_remap = HashMap::new();
821        for (old_idx, node) in nodes.into_iter().enumerate() {
822            if final_keep[old_idx] {
823                let new_idx = final_nodes.len();
824                final_remap.insert(old_idx, new_idx);
825                final_nodes.push(node);
826            }
827        }
828
829        let nodes = final_nodes;
830
831        let final_edges: BTreeSet<_> = edges
832            .into_iter()
833            .filter_map(|(from, to)| {
834                let new_from = final_remap.get(&from).copied()?;
835                let new_to = final_remap.get(&to).copied()?;
836                Some((new_from, new_to))
837            })
838            .collect();
839        let edges = final_edges;
840
841        // Extract crate/module paths from node locations
842        let mut crate_groups: HashMap<String, Vec<usize>> = HashMap::new();
843        for (idx, node) in nodes.iter().enumerate() {
844            if let Some(location) = &node.location {
845                // Extract crate path from location
846                // Format: /path/to/crate/src/module/file.rs -> crate/module
847                let crate_path = extract_crate_path(location);
848                crate_groups
849                    .entry(crate_path)
850                    .or_insert_with(Vec::new)
851                    .push(idx);
852            }
853        }
854
855        let mut output = String::from("digraph CompactProject {\n");
856
857        // Generate subgraphs for each crate/module group
858        let mut subgraph_counter = 0;
859        for (crate_path, node_indices) in crate_groups.iter() {
860            output.push_str(&format!("  subgraph cluster_{} {{\n", subgraph_counter));
861            output.push_str(&format!("    label=\"{}\";\n", escape_label(crate_path)));
862            output.push_str("    style=filled;\n");
863            output.push_str("    color=lightgrey;\n");
864
865            for &idx in node_indices {
866                let node = &nodes[idx];
867                let mut parts = vec![node.name.clone(), format!("({})", node.kind)];
868                if let Some(location) = &node.location {
869                    parts.push(location.clone());
870                }
871                let label = parts
872                    .into_iter()
873                    .map(|part| escape_label(&part))
874                    .collect::<Vec<_>>()
875                    .join("\\n");
876                output.push_str(&format!("    n{} [label=\"{}\"];\n", idx, label));
877            }
878
879            output.push_str("  }\n");
880            subgraph_counter += 1;
881        }
882
883        for (from, to) in edges {
884            output.push_str(&format!("  n{} -> n{};\n", from, to));
885        }
886        output.push_str("}\n");
887        output
888    }
889
890    fn add_cross_edge(
891        &self,
892        from_idx: usize,
893        to_idx: usize,
894        from_block: BlockId,
895        to_block: BlockId,
896    ) {
897        if from_idx == to_idx {
898            let unit = &self.units[from_idx];
899            if !unit
900                .edges
901                .has_relation(from_block, BlockRelation::DependsOn, to_block)
902            {
903                unit.edges.add_relation(from_block, to_block);
904            }
905            return;
906        }
907
908        let from_unit = &self.units[from_idx];
909        from_unit
910            .edges
911            .add_relation_if_not_exists(from_block, BlockRelation::DependsOn, to_block);
912
913        let to_unit = &self.units[to_idx];
914        to_unit
915            .edges
916            .add_relation_if_not_exists(to_block, BlockRelation::DependedBy, from_block);
917    }
918}
919
920#[derive(Debug)]
921struct GraphBuilder<'tcx, Language> {
922    unit: CompileUnit<'tcx>,
923    root: Option<BlockId>,
924    children_stack: Vec<Vec<BlockId>>,
925    config: GraphBuildConfig,
926    _marker: PhantomData<Language>,
927}
928
929impl<'tcx, Language: LanguageTrait> GraphBuilder<'tcx, Language> {
930    fn new(unit: CompileUnit<'tcx>, config: GraphBuildConfig) -> Self {
931        Self {
932            unit,
933            root: None,
934            children_stack: Vec::new(),
935            config,
936            _marker: PhantomData,
937        }
938    }
939
940    fn next_id(&self) -> BlockId {
941        self.unit.reserve_block_id()
942    }
943
944    fn create_block(
945        &self,
946        id: BlockId,
947        node: HirNode<'tcx>,
948        kind: BlockKind,
949        parent: Option<BlockId>,
950        children: Vec<BlockId>,
951    ) -> BasicBlock<'tcx> {
952        let arena = &self.unit.cc.block_arena;
953        match kind {
954            BlockKind::Root => {
955                // Extract file_name from HirFile node if available
956                let file_name = node.as_file().map(|file| file.file_path.clone());
957                let block = BlockRoot::from_hir(id, node, parent, children, file_name);
958                BasicBlock::Root(arena.alloc(block))
959            }
960            BlockKind::Func => {
961                let block = BlockFunc::from_hir(id, node, parent, children);
962                BasicBlock::Func(arena.alloc(block))
963            }
964            BlockKind::Class => {
965                let block = BlockClass::from_hir(id, node, parent, children);
966                BasicBlock::Class(arena.alloc(block))
967            }
968            BlockKind::Stmt => {
969                let stmt = BlockStmt::from_hir(id, node, parent, children);
970                BasicBlock::Stmt(arena.alloc(stmt))
971            }
972            BlockKind::Call => {
973                let stmt = BlockCall::from_hir(id, node, parent, children);
974                BasicBlock::Call(arena.alloc(stmt))
975            }
976            BlockKind::Enum => {
977                let enum_ty = BlockEnum::from_hir(id, node, parent, children);
978                BasicBlock::Enum(arena.alloc(enum_ty))
979            }
980            BlockKind::Const => {
981                let stmt = BlockConst::from_hir(id, node, parent, children);
982                BasicBlock::Const(arena.alloc(stmt))
983            }
984            BlockKind::Impl => {
985                let block = BlockImpl::from_hir(id, node, parent, children);
986                BasicBlock::Impl(arena.alloc(block))
987            }
988            BlockKind::Field => {
989                let block = BlockField::from_hir(id, node, parent, children);
990                BasicBlock::Field(arena.alloc(block))
991            }
992            _ => {
993                panic!("unknown block kind: {}", kind)
994            }
995        }
996    }
997
998    fn build_edges(&self, node: HirNode<'tcx>) -> BlockRelationMap {
999        let edges = BlockRelationMap::default();
1000        let mut visited = HashSet::new();
1001        let mut unresolved = HashSet::new();
1002        self.collect_edges(node, &edges, &mut visited, &mut unresolved);
1003        edges
1004    }
1005
1006    fn collect_edges(
1007        &self,
1008        node: HirNode<'tcx>,
1009        edges: &BlockRelationMap,
1010        visited: &mut HashSet<SymId>,
1011        unresolved: &mut HashSet<SymId>,
1012    ) {
1013        // Try to process symbol dependencies for this node
1014        if let Some(scope) = self.unit.opt_get_scope(node.hir_id()) {
1015            if let Some(symbol) = scope.symbol() {
1016                self.process_symbol(symbol, edges, visited, unresolved);
1017            }
1018        }
1019
1020        // Recurse into children
1021        for &child_id in node.children() {
1022            let child = self.unit.hir_node(child_id);
1023            self.collect_edges(child, edges, visited, unresolved);
1024        }
1025    }
1026
1027    fn process_symbol(
1028        &self,
1029        symbol: &'tcx Symbol,
1030        edges: &BlockRelationMap,
1031        visited: &mut HashSet<SymId>,
1032        unresolved: &mut HashSet<SymId>,
1033    ) {
1034        let symbol_id = symbol.id;
1035
1036        // Avoid processing the same symbol twice
1037        if !visited.insert(symbol_id) {
1038            return;
1039        }
1040
1041        let Some(from_block) = symbol.block_id() else {
1042            return;
1043        };
1044
1045        for &dep_id in symbol.depends.borrow().iter() {
1046            self.link_dependency(dep_id, from_block, edges, unresolved);
1047        }
1048    }
1049    fn link_dependency(
1050        &self,
1051        dep_id: SymId,
1052        from_block: BlockId,
1053        edges: &BlockRelationMap,
1054        unresolved: &mut HashSet<SymId>,
1055    ) {
1056        // If target symbol exists and has a block, add the dependency edge
1057        if let Some(target_symbol) = self.unit.opt_get_symbol(dep_id) {
1058            if let Some(to_block) = target_symbol.block_id() {
1059                if !edges.has_relation(from_block, BlockRelation::DependsOn, to_block) {
1060                    edges.add_relation(from_block, to_block);
1061                }
1062                let target_unit = target_symbol.unit_index();
1063                if target_unit.is_some()
1064                    && target_unit != Some(self.unit.index)
1065                    && unresolved.insert(dep_id)
1066                {
1067                    self.unit.add_unresolved_symbol(target_symbol);
1068                }
1069                return;
1070            }
1071
1072            // Target symbol exists but block not yet known
1073            if unresolved.insert(dep_id) {
1074                self.unit.add_unresolved_symbol(target_symbol);
1075            }
1076            return;
1077        }
1078
1079        // Target symbol not found at all
1080        unresolved.insert(dep_id);
1081    }
1082
1083    fn build_block(&mut self, node: HirNode<'tcx>, parent: BlockId, recursive: bool) {
1084        let id = self.next_id();
1085        let block_kind = Language::block_kind(node.kind_id());
1086        assert_ne!(block_kind, BlockKind::Undefined);
1087
1088        if self.root.is_none() {
1089            self.root = Some(id);
1090        }
1091
1092        let children = if recursive {
1093            self.children_stack.push(Vec::new());
1094            self.visit_children(node, id);
1095
1096            self.children_stack.pop().unwrap()
1097        } else {
1098            Vec::new()
1099        };
1100
1101        let block = self.create_block(id, node, block_kind, Some(parent), children);
1102        if let Some(scope) = self.unit.opt_get_scope(node.hir_id()) {
1103            if let Some(symbol) = scope.symbol() {
1104                // Only set the block ID if it hasn't been set before
1105                // This prevents impl blocks from overwriting struct block IDs
1106                if symbol.block_id().is_none() {
1107                    symbol.set_block_id(Some(id));
1108                }
1109            }
1110        }
1111        self.unit.insert_block(id, block, parent);
1112
1113        if let Some(children) = self.children_stack.last_mut() {
1114            children.push(id);
1115        }
1116    }
1117}
1118
1119impl<'tcx, Language: LanguageTrait> HirVisitor<'tcx> for GraphBuilder<'tcx, Language> {
1120    fn unit(&self) -> CompileUnit<'tcx> {
1121        self.unit
1122    }
1123
1124    fn visit_file(&mut self, node: HirNode<'tcx>, parent: BlockId) {
1125        self.children_stack.push(Vec::new());
1126        self.build_block(node, parent, true);
1127    }
1128
1129    fn visit_internal(&mut self, node: HirNode<'tcx>, parent: BlockId) {
1130        let kind = Language::block_kind(node.kind_id());
1131        if self.config.compact {
1132            if kind == BlockKind::Root {
1133                self.build_block(node, parent, true);
1134            } else {
1135                self.visit_children(node, parent);
1136            }
1137            return;
1138        }
1139
1140        // Non-compact mode: process all defined kinds
1141        if kind != BlockKind::Undefined {
1142            self.build_block(node, parent, false);
1143        } else {
1144            self.visit_children(node, parent);
1145        }
1146    }
1147
1148    fn visit_scope(&mut self, node: HirNode<'tcx>, parent: BlockId) {
1149        let kind = Language::block_kind(node.kind_id());
1150        if self.config.compact {
1151            // In compact mode, only create blocks for major constructs (Class, Enum, Impl)
1152            // Skip functions, fields, and scopes to reduce graph size
1153            match kind {
1154                BlockKind::Class | BlockKind::Enum => {
1155                    // Build with recursion enabled to capture nested major constructs
1156                    self.build_block(node, parent, true);
1157                }
1158                // Skip all other scopes - don't recurse
1159                _ => {
1160                    // Stop here, do not visit children
1161                    // self.visit_children(node, parent);
1162                }
1163            }
1164            return;
1165        }
1166
1167        // Non-compact mode: build blocks for all major constructs
1168        match kind {
1169            BlockKind::Func
1170            | BlockKind::Class
1171            | BlockKind::Enum
1172            | BlockKind::Const
1173            | BlockKind::Impl
1174            | BlockKind::Field => self.build_block(node, parent, true),
1175            _ => self.visit_children(node, parent),
1176        }
1177    }
1178}
1179
1180pub fn build_llmcc_graph_with_config<'tcx, L: LanguageTrait>(
1181    unit: CompileUnit<'tcx>,
1182    unit_index: usize,
1183    config: GraphBuildConfig,
1184) -> Result<UnitGraph, Box<dyn std::error::Error>> {
1185    let root_hir = unit
1186        .file_start_hir_id()
1187        .ok_or("missing file start HIR id")?;
1188    let mut builder = GraphBuilder::<L>::new(unit, config);
1189    let root_node = unit.hir_node(root_hir);
1190    builder.visit_node(root_node, BlockId::ROOT_PARENT);
1191
1192    let root_block = builder.root;
1193    let root_block = root_block.ok_or("graph builder produced no root")?;
1194    let edges = builder.build_edges(root_node);
1195    Ok(UnitGraph::new(unit_index, root_block, edges))
1196}
1197
1198pub fn build_llmcc_graph<'tcx, L: LanguageTrait>(
1199    unit: CompileUnit<'tcx>,
1200    unit_index: usize,
1201) -> Result<UnitGraph, Box<dyn std::error::Error>> {
1202    build_llmcc_graph_with_config::<L>(unit, unit_index, GraphBuildConfig::default())
1203}