llmcc_core/
graph_builder.rs

1use std::collections::{BTreeSet, HashMap, HashSet};
2use std::path::Path;
3use std::marker::PhantomData;
4
5pub use crate::block::{BasicBlock, BlockId, BlockKind, BlockRelation};
6use crate::block::{
7    BlockCall, BlockClass, BlockConst, BlockEnum, BlockField, BlockFunc, BlockImpl, BlockRoot,
8    BlockStmt,
9};
10use crate::block_rel::BlockRelationMap;
11use crate::context::{CompileCtxt, CompileUnit};
12use crate::ir::HirNode;
13use crate::lang_def::LanguageTrait;
14use crate::pagerank::{PageRankDirection, PageRanker};
15use crate::symbol::{SymId, Symbol};
16use crate::visit::HirVisitor;
17
18#[derive(Debug, Clone)]
19pub struct UnitGraph {
20    /// Compile unit this graph belongs to
21    unit_index: usize,
22    /// Root block ID of this unit
23    root: BlockId,
24    /// Edges of this graph unit
25    edges: BlockRelationMap,
26}
27
28impl UnitGraph {
29    pub fn new(unit_index: usize, root: BlockId, edges: BlockRelationMap) -> Self {
30        Self {
31            unit_index,
32            root,
33            edges,
34        }
35    }
36
37    pub fn unit_index(&self) -> usize {
38        self.unit_index
39    }
40
41    pub fn root(&self) -> BlockId {
42        self.root
43    }
44
45    pub fn edges(&self) -> &BlockRelationMap {
46        &self.edges
47    }
48}
49
50#[derive(Debug, Clone, Copy)]
51pub struct GraphBuildConfig {
52    pub compact: bool,
53}
54
55impl GraphBuildConfig {
56    pub fn compact() -> Self {
57        Self { compact: true }
58    }
59}
60
61impl Default for GraphBuildConfig {
62    fn default() -> Self {
63        Self { compact: false }
64    }
65}
66
67#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
68pub struct GraphNode {
69    pub unit_index: usize,
70    pub block_id: BlockId,
71}
72
73/// ProjectGraph represents a complete compilation project with all units and their inter-dependencies.
74///
75/// # Overview
76/// ProjectGraph maintains a collection of per-unit compilation graphs (UnitGraph) and facilitates
77/// cross-unit dependency resolution. It provides efficient multi-dimensional indexing for block
78/// lookups by name, kind, unit, and ID, enabling quick context retrieval for LLM consumption.
79///
80/// # Architecture
81/// The graph consists of:
82/// - **UnitGraphs**: One per compilation unit (file), containing blocks and intra-unit relations
83/// - **Block Indexes**: Multi-dimensional indexes via BlockIndexMaps for O(1) to O(log n) lookups
84/// - **Cross-unit Links**: Dependencies tracked between blocks across different units
85///
86/// # Primary Use Cases
87/// 1. **Symbol Resolution**: Find blocks by name across the entire project
88/// 2. **Context Gathering**: Collect all related blocks for code analysis
89/// 3. **LLM Serialization**: Export graph as text or JSON for LLM model consumption
90/// 4. **Dependency Analysis**: Traverse dependency graphs to understand block relationships
91///
92#[derive(Debug)]
93pub struct ProjectGraph<'tcx> {
94    /// Reference to the compilation context containing all symbols, HIR nodes, and blocks
95    pub cc: &'tcx CompileCtxt<'tcx>,
96    /// Per-unit graphs containing blocks and intra-unit relations
97    units: Vec<UnitGraph>,
98    compact_rank_limit: Option<usize>,
99    pagerank_direction: PageRankDirection,
100}
101
102impl<'tcx> ProjectGraph<'tcx> {
103    pub fn new(cc: &'tcx CompileCtxt<'tcx>) -> Self {
104        Self {
105            cc,
106            units: Vec::new(),
107            compact_rank_limit: None,
108            pagerank_direction: PageRankDirection::default(),
109        }
110    }
111
112    pub fn add_child(&mut self, graph: UnitGraph) {
113        self.units.push(graph);
114    }
115
116    /// Configure the number of PageRank-filtered nodes retained when rendering compact graphs.
117    pub fn set_compact_rank_limit(&mut self, limit: Option<usize>) {
118        self.compact_rank_limit = match limit {
119            Some(0) => None,
120            other => other,
121        };
122    }
123
124    /// Configure the PageRank direction for ranking nodes.
125    pub fn set_pagerank_direction(&mut self, direction: PageRankDirection) {
126        self.pagerank_direction = direction;
127    }
128
129    pub fn link_units(&mut self) {
130        if self.units.is_empty() {
131            return;
132        }
133
134        let mut unresolved = self.cc.unresolve_symbols.borrow_mut();
135
136        unresolved.retain(|symbol_ref| {
137            let target = *symbol_ref;
138            let Some(target_block) = target.block_id() else {
139                return false;
140            };
141
142            let dependents: Vec<SymId> = target.depended.borrow().clone();
143            for dependent_id in dependents {
144                let Some(source_symbol) = self.cc.opt_get_symbol(dependent_id) else {
145                    continue;
146                };
147                let Some(from_block) = source_symbol.block_id() else {
148                    continue;
149                };
150                self.add_cross_edge(
151                    source_symbol.unit_index().unwrap(),
152                    target.unit_index().unwrap(),
153                    from_block,
154                    target_block,
155                );
156            }
157
158            false
159        });
160    }
161
162    pub fn units(&self) -> &[UnitGraph] {
163        &self.units
164    }
165
166    pub fn unit_graph(&self, unit_index: usize) -> Option<&UnitGraph> {
167        self.units
168            .iter()
169            .find(|unit| unit.unit_index() == unit_index)
170    }
171
172    pub fn block_by_name(&self, name: &str) -> Option<GraphNode> {
173        let block_indexes = self.cc.block_indexes.borrow();
174        let matches = block_indexes.find_by_name(name);
175
176        matches.first().map(|(unit_index, _, block_id)| GraphNode {
177            unit_index: *unit_index,
178            block_id: *block_id,
179        })
180    }
181
182    pub fn blocks_by_name(&self, name: &str) -> Vec<GraphNode> {
183        let block_indexes = self.cc.block_indexes.borrow();
184        let matches = block_indexes.find_by_name(name);
185
186        matches
187            .into_iter()
188            .map(|(unit_index, _, block_id)| GraphNode {
189                unit_index,
190                block_id,
191            })
192            .collect()
193    }
194
195    pub fn render_compact_graph(&self) -> String {
196        self.render_compact_graph_inner(self.compact_rank_limit)
197    }
198
199    pub fn block_by_name_in(&self, unit_index: usize, name: &str) -> Option<GraphNode> {
200        let block_indexes = self.cc.block_indexes.borrow();
201        let matches = block_indexes.find_by_name(name);
202
203        matches
204            .iter()
205            .find(|(u, _, _)| *u == unit_index)
206            .map(|(_, _, block_id)| GraphNode {
207                unit_index,
208                block_id: *block_id,
209            })
210    }
211
212    pub fn blocks_by_kind(&self, block_kind: BlockKind) -> Vec<GraphNode> {
213        let block_indexes = self.cc.block_indexes.borrow();
214        let matches = block_indexes.find_by_kind(block_kind);
215
216        matches
217            .into_iter()
218            .map(|(unit_index, _, block_id)| GraphNode {
219                unit_index,
220                block_id,
221            })
222            .collect()
223    }
224
225    pub fn blocks_by_kind_in(&self, block_kind: BlockKind, unit_index: usize) -> Vec<GraphNode> {
226        let block_indexes = self.cc.block_indexes.borrow();
227        let block_ids = block_indexes.find_by_kind_and_unit(block_kind, unit_index);
228
229        block_ids
230            .into_iter()
231            .map(|block_id| GraphNode {
232                unit_index,
233                block_id,
234            })
235            .collect()
236    }
237
238    pub fn blocks_in(&self, unit_index: usize) -> Vec<GraphNode> {
239        let block_indexes = self.cc.block_indexes.borrow();
240        let matches = block_indexes.find_by_unit(unit_index);
241
242        matches
243            .into_iter()
244            .map(|(_, _, block_id)| GraphNode {
245                unit_index,
246                block_id,
247            })
248            .collect()
249    }
250
251    pub fn block_info(&self, block_id: BlockId) -> Option<(usize, Option<String>, BlockKind)> {
252        let block_indexes = self.cc.block_indexes.borrow();
253        block_indexes.get_block_info(block_id)
254    }
255
256    pub fn find_related_blocks(
257        &self,
258        node: GraphNode,
259        relations: Vec<BlockRelation>,
260    ) -> Vec<GraphNode> {
261        if node.unit_index >= self.units.len() {
262            return Vec::new();
263        }
264
265        let unit = &self.units[node.unit_index];
266        let mut result = Vec::new();
267
268        for relation in relations {
269            match relation {
270                BlockRelation::DependsOn => {
271                    // Get all blocks that this block depends on
272                    let dependencies = unit
273                        .edges
274                        .get_related(node.block_id, BlockRelation::DependsOn);
275                    for dep_block_id in dependencies {
276                        result.push(GraphNode {
277                            unit_index: node.unit_index,
278                            block_id: dep_block_id,
279                        });
280                    }
281                }
282                BlockRelation::DependedBy => {
283                    let mut seen = HashSet::new();
284
285                    // Direct dependents tracked on this unit (covers cross-unit edges too)
286                    let dependents = unit
287                        .edges
288                        .get_related(node.block_id, BlockRelation::DependedBy);
289                    if !dependents.is_empty() {
290                        let indexes = self.cc.block_indexes.borrow();
291                        for dep_block_id in dependents {
292                            if !seen.insert(dep_block_id) {
293                                continue;
294                            }
295                            if let Some((dep_unit_idx, _, _)) = indexes.get_block_info(dep_block_id)
296                            {
297                                result.push(GraphNode {
298                                    unit_index: dep_unit_idx,
299                                    block_id: dep_block_id,
300                                });
301                            } else {
302                                result.push(GraphNode {
303                                    unit_index: node.unit_index,
304                                    block_id: dep_block_id,
305                                });
306                            }
307                        }
308                    }
309
310                    // Fallback: scan current unit for reverse DependsOn edges
311                    let local_dependents = unit
312                        .edges
313                        .find_reverse_relations(node.block_id, BlockRelation::DependsOn);
314                    for dep_block_id in local_dependents {
315                        if !seen.insert(dep_block_id) {
316                            continue;
317                        }
318                        result.push(GraphNode {
319                            unit_index: node.unit_index,
320                            block_id: dep_block_id,
321                        });
322                    }
323                }
324                BlockRelation::Unknown => {
325                    // Skip unknown relations
326                }
327            }
328        }
329
330        result
331    }
332
333    pub fn find_dpends_blocks_recursive(&self, node: GraphNode) -> HashSet<GraphNode> {
334        let mut visited = HashSet::new();
335        let mut stack = vec![node];
336        let relations = vec![BlockRelation::DependsOn];
337
338        while let Some(current) = stack.pop() {
339            if visited.contains(&current) {
340                continue;
341            }
342            visited.insert(current);
343
344            for related in self.find_related_blocks(current, relations.clone()) {
345                if !visited.contains(&related) {
346                    stack.push(related);
347                }
348            }
349        }
350
351        visited.remove(&node);
352        visited
353    }
354
355    pub fn traverse_bfs<F>(&self, start: GraphNode, mut callback: F)
356    where
357        F: FnMut(GraphNode),
358    {
359        let mut visited = HashSet::new();
360        let mut queue = vec![start];
361        let relations = vec![BlockRelation::DependsOn, BlockRelation::DependedBy];
362
363        while !queue.is_empty() {
364            let current = queue.remove(0);
365            if visited.contains(&current) {
366                continue;
367            }
368            visited.insert(current);
369            callback(current);
370
371            for related in self.find_related_blocks(current, relations.clone()) {
372                if !visited.contains(&related) {
373                    queue.push(related);
374                }
375            }
376        }
377    }
378
379    pub fn traverse_dfs<F>(&self, start: GraphNode, mut callback: F)
380    where
381        F: FnMut(GraphNode),
382    {
383        let mut visited = HashSet::new();
384        self.traverse_dfs_impl(start, &mut visited, &mut callback);
385    }
386
387    fn traverse_dfs_impl<F>(
388        &self,
389        node: GraphNode,
390        visited: &mut HashSet<GraphNode>,
391        callback: &mut F,
392    ) where
393        F: FnMut(GraphNode),
394    {
395        if visited.contains(&node) {
396            return;
397        }
398        visited.insert(node);
399        callback(node);
400
401        let relations = vec![BlockRelation::DependsOn, BlockRelation::DependedBy];
402        for related in self.find_related_blocks(node, relations) {
403            if !visited.contains(&related) {
404                self.traverse_dfs_impl(related, visited, callback);
405            }
406        }
407    }
408
409    pub fn get_block_depends(&self, node: GraphNode) -> HashSet<GraphNode> {
410        if node.unit_index >= self.units.len() {
411            return HashSet::new();
412        }
413
414        let unit = &self.units[node.unit_index];
415        let mut result = HashSet::new();
416        let mut visited = HashSet::new();
417        let mut stack = vec![node.block_id];
418
419        while let Some(current_block) = stack.pop() {
420            if visited.contains(&current_block) {
421                continue;
422            }
423            visited.insert(current_block);
424
425            let dependencies = unit
426                .edges
427                .get_related(current_block, BlockRelation::DependsOn);
428            for dep_block_id in dependencies {
429                if dep_block_id != node.block_id {
430                    result.insert(GraphNode {
431                        unit_index: node.unit_index,
432                        block_id: dep_block_id,
433                    });
434                    stack.push(dep_block_id);
435                }
436            }
437        }
438
439        result
440    }
441
442    pub fn get_block_depended(&self, node: GraphNode) -> HashSet<GraphNode> {
443        if node.unit_index >= self.units.len() {
444            return HashSet::new();
445        }
446
447        let unit = &self.units[node.unit_index];
448        let mut result = HashSet::new();
449        let mut visited = HashSet::new();
450        let mut stack = vec![node.block_id];
451
452        while let Some(current_block) = stack.pop() {
453            if visited.contains(&current_block) {
454                continue;
455            }
456            visited.insert(current_block);
457
458            let dependencies = unit
459                .edges
460                .get_related(current_block, BlockRelation::DependedBy);
461            for dep_block_id in dependencies {
462                if dep_block_id != node.block_id {
463                    result.insert(GraphNode {
464                        unit_index: node.unit_index,
465                        block_id: dep_block_id,
466                    });
467                    stack.push(dep_block_id);
468                }
469            }
470        }
471
472        result
473    }
474
475    fn render_compact_graph_inner(&self, top_k: Option<usize>) -> String {
476        #[derive(Clone)]
477        struct CompactNode {
478            block_id: BlockId,
479            unit_index: usize,
480            // kind: BlockKind,
481            name: String,
482            location: Option<String>,
483        }
484
485        fn byte_to_line(content: &[u8], byte_pos: usize) -> usize {
486            let clamped = byte_pos.min(content.len());
487            let mut line = 1;
488            for &ch in &content[..clamped] {
489                if ch == b'\n' {
490                    line += 1;
491                }
492            }
493            line
494        }
495
496        fn escape_label(input: &str) -> String {
497            input
498                .replace('\\', "\\\\")
499                .replace('"', "\\\"")
500                .replace('\n', "\\n")
501        }
502
503        fn escape_attr(input: &str) -> String {
504            input.replace('\\', "\\\\").replace('"', "\\\"")
505        }
506
507        fn summarize_location(location: &str) -> (String, String) {
508            let (path_part, line_part) = location
509                .rsplit_once(':')
510                .map(|(path, line)| (path, Some(line)))
511                .unwrap_or((location, None));
512
513            let path = Path::new(path_part);
514            let components: Vec<_> = path
515                .components()
516                .filter_map(|comp| comp.as_os_str().to_str())
517                .collect();
518
519            let start = components
520                .len()
521                .saturating_sub(3);
522            let mut shortened = components[start..].join("/");
523            if shortened.is_empty() {
524                shortened = path
525                    .file_name()
526                    .and_then(|name| name.to_str())
527                    .unwrap_or(path_part)
528                    .to_string();
529            }
530
531            let display = if let Some(line) = line_part {
532                format!("{shortened}:{line}")
533            } else {
534                shortened
535            };
536
537            (display, location.to_string())
538        }
539
540        fn extract_crate_path(location: &str) -> String {
541            // Extract crate path from file location so all nodes from the same crate cluster together.
542            // Examples:
543            //   /path/to/crate/src/module/file.rs -> crate
544            //   C:\path\to\crate\src\module\file.rs -> crate
545
546            let path = location.split(':').next().unwrap_or(location);
547            let parts: Vec<&str> = path.split(['/', '\\']).collect();
548
549            // Find the "src" directory index
550            if let Some(src_idx) = parts.iter().position(|&p| p == "src") {
551                if src_idx > 0 {
552                    // The directory before `src` is the crate root.
553                    return parts[src_idx - 1].to_string();
554                }
555            }
556
557            // Fallback: try to extract just the filename without extension
558            if let Some(filename) = parts.last() {
559                if !filename.is_empty() {
560                    return filename.split('.').next().unwrap_or("unknown").to_string();
561                }
562            }
563
564            "unknown".to_string()
565        }
566
567        fn extract_python_module_path(location: &str) -> String {
568            const MAX_MODULE_DEPTH: usize = 2;
569
570            let path_str = location.split(':').next().unwrap_or(location);
571            let path = Path::new(path_str);
572
573            // If the path is not a Python file, fall back to generic handling.
574            if path.extension().and_then(|ext| ext.to_str()) != Some("py") {
575                return extract_crate_path(location);
576            }
577
578            let file_stem = path
579                .file_stem()
580                .and_then(|s| s.to_str())
581                .map(|s| s.to_string());
582
583            let mut packages: Vec<String> = Vec::new();
584            let mut current = path.parent();
585
586            while let Some(dir) = current {
587                let dir_name = match dir.file_name().and_then(|n| n.to_str()) {
588                    Some(name) if !name.is_empty() => name.to_string(),
589                    _ => break,
590                };
591
592                let has_init = dir.join("__init__.py").exists() || dir.join("__init__.pyi").exists();
593
594                if has_init {
595                    packages.push(dir_name);
596                }
597
598                current = dir.parent();
599            }
600
601            if packages.is_empty() {
602                if let Some(stem) = file_stem.as_ref().filter(|stem| stem.as_str() != "__init__") {
603                    return stem.clone();
604                }
605
606                if let Some(parent_name) = path
607                    .parent()
608                    .and_then(|dir| dir.file_name().and_then(|n| n.to_str()))
609                    .map(|s| s.to_string())
610                {
611                    return parent_name;
612                }
613
614                return "unknown".to_string();
615            }
616
617            packages.reverse();
618            if packages.len() > MAX_MODULE_DEPTH {
619                packages.truncate(MAX_MODULE_DEPTH);
620            }
621
622            packages.join(".")
623        }
624
625        fn extract_group_path(location: &str) -> String {
626            let path = location.split(':').next().unwrap_or(location);
627            if path.ends_with(".py") {
628                extract_python_module_path(location)
629            } else {
630                extract_crate_path(location)
631            }
632        }
633
634        fn strongly_connected_components(adjacency: &[Vec<usize>]) -> Vec<Vec<usize>> {
635            fn strongconnect(
636                v: usize,
637                index: &mut usize,
638                adjacency: &[Vec<usize>],
639                indices: &mut [Option<usize>],
640                lowlink: &mut [usize],
641                stack: &mut Vec<usize>,
642                on_stack: &mut [bool],
643                components: &mut Vec<Vec<usize>>,
644            ) {
645                indices[v] = Some(*index);
646                lowlink[v] = *index;
647                *index += 1;
648                stack.push(v);
649                on_stack[v] = true;
650
651                for &w in &adjacency[v] {
652                    if indices[w].is_none() {
653                        strongconnect(
654                            w, index, adjacency, indices, lowlink, stack, on_stack, components,
655                        );
656                        lowlink[v] = lowlink[v].min(lowlink[w]);
657                    } else if on_stack[w] {
658                        let w_index = indices[w].unwrap();
659                        lowlink[v] = lowlink[v].min(w_index);
660                    }
661                }
662
663                if lowlink[v] == indices[v].unwrap() {
664                    let mut component = Vec::new();
665                    while let Some(w) = stack.pop() {
666                        on_stack[w] = false;
667                        component.push(w);
668                        if w == v {
669                            break;
670                        }
671                    }
672                    components.push(component);
673                }
674            }
675
676            let mut index = 0;
677            let mut stack = Vec::new();
678            let mut indices = vec![None; adjacency.len()];
679            let mut lowlink = vec![0; adjacency.len()];
680            let mut on_stack = vec![false; adjacency.len()];
681            let mut components = Vec::new();
682
683            for v in 0..adjacency.len() {
684                if indices[v].is_none() {
685                    strongconnect(
686                        v,
687                        &mut index,
688                        adjacency,
689                        &mut indices,
690                        &mut lowlink,
691                        &mut stack,
692                        &mut on_stack,
693                        &mut components,
694                    );
695                }
696            }
697
698            components
699        }
700
701        let interesting_kinds = [BlockKind::Class, BlockKind::Enum];
702
703        let ranked_order = top_k.and_then(|limit| {
704            let mut ranker = PageRanker::new(self);
705            // Apply configured PageRank direction
706            ranker.config_mut().direction = self.pagerank_direction;
707            let mut collected = Vec::new();
708
709            for ranked in ranker.rank() {
710                if interesting_kinds.contains(&ranked.kind) {
711                    collected.push(ranked.node.block_id);
712                }
713                if collected.len() >= limit {
714                    break;
715                }
716            }
717
718            if collected.is_empty() {
719                None
720            } else {
721                Some(collected)
722            }
723        });
724
725        let ranked_filter: Option<HashSet<BlockId>> = ranked_order
726            .as_ref()
727            .map(|ordered| ordered.iter().copied().collect());
728
729        let mut nodes: Vec<CompactNode> = {
730            let block_indexes = self.cc.block_indexes.borrow();
731            block_indexes
732                .block_id_index
733                .iter()
734                .filter_map(|(&block_id, (unit_index, name_opt, kind))| {
735                    if !interesting_kinds.contains(kind) {
736                        return None;
737                    }
738
739                    if let Some(ref ranked_ids) = ranked_filter {
740                        if !ranked_ids.contains(&block_id) {
741                            return None;
742                        }
743                    }
744
745                    let unit = self.cc.compile_unit(*unit_index);
746                    let block = unit.bb(block_id);
747                    let display_name = name_opt
748                        .clone()
749                        .or_else(|| {
750                            block
751                                .base()
752                                .and_then(|base| base.opt_get_name().map(|s| s.to_string()))
753                        })
754                        .unwrap_or_else(|| format!("{}:{}", kind, block_id.as_u32()));
755
756                    let path = std::fs::canonicalize(
757                        unit.file_path()
758                            .or_else(|| unit.file().path())
759                            .unwrap_or("<unknown>"),
760                    )
761                    .map(|p| p.to_string_lossy().to_string())
762                    .unwrap_or_else(|_| {
763                        unit.file_path()
764                            .or_else(|| unit.file().path())
765                            .unwrap_or("<unknown>")
766                            .to_string()
767                    });
768                    let location = block
769                        .opt_node()
770                        .and_then(|node| {
771                            unit.file()
772                                .file
773                                .content
774                                .as_ref()
775                                .map(|bytes| byte_to_line(bytes.as_slice(), node.start_byte()))
776                                .map(|line| format!("{path}:{line}"))
777                        })
778                        .or_else(|| Some(path.clone()));
779
780                    Some(CompactNode {
781                        block_id,
782                        unit_index: *unit_index,
783                        // kind: *kind,
784                        name: display_name,
785                        location,
786                    })
787                })
788                .collect()
789        };
790
791        nodes.sort_by(|a, b| a.name.cmp(&b.name));
792
793        if nodes.is_empty() {
794            return "digraph CompactProject {\n}\n".to_string();
795        }
796
797        let mut node_index = HashMap::new();
798        for (idx, node) in nodes.iter().enumerate() {
799            node_index.insert(node.block_id, idx);
800        }
801
802        let mut adjacency: Vec<Vec<usize>> = vec![Vec::new(); nodes.len()];
803        for node in &nodes {
804            let Some(unit_graph) = self.unit_graph(node.unit_index) else {
805                continue;
806            };
807            let from_idx = node_index[&node.block_id];
808
809            let dependencies = unit_graph
810                .edges()
811                .get_related(node.block_id, BlockRelation::DependsOn);
812            let mut targets = dependencies
813                .into_iter()
814                .filter_map(|dep_id| node_index.get(&dep_id).copied())
815                .collect::<Vec<_>>();
816
817            targets.sort_unstable();
818            targets.dedup();
819            adjacency[from_idx] = targets;
820        }
821
822        let mut components: Vec<Vec<usize>> = strongly_connected_components(&adjacency)
823            .into_iter()
824            .filter(|component| !component.is_empty())
825            .collect();
826
827        if components.is_empty() {
828            return "digraph CompactProject {\n}\n".to_string();
829        }
830
831        components.sort_by(|a, b| b.len().cmp(&a.len()));
832
833        let target_limit = ranked_order
834            .as_ref()
835            .map(|order| order.len())
836            .unwrap_or_else(|| nodes.len());
837
838        let mut keep = vec![false; nodes.len()];
839        let mut kept = 0usize;
840
841        for component in components.iter().filter(|component| component.len() > 1) {
842            for &idx in component {
843                if !keep[idx] {
844                    keep[idx] = true;
845                    kept += 1;
846                }
847            }
848            if kept >= target_limit {
849                break;
850            }
851        }
852
853        if kept < target_limit {
854            if let Some(order) = ranked_order.as_ref() {
855                for block_id in order {
856                    if let Some(&idx) = node_index.get(block_id) {
857                        if !keep[idx] {
858                            keep[idx] = true;
859                            kept += 1;
860                            if kept >= target_limit {
861                                break;
862                            }
863                        }
864                    }
865                }
866            } else {
867                for idx in 0..nodes.len() {
868                    if !keep[idx] {
869                        keep[idx] = true;
870                        kept += 1;
871                        if kept >= target_limit {
872                            break;
873                        }
874                    }
875                }
876            }
877        }
878
879        if kept == 0 {
880            if let Some(component) = components.first() {
881                for &idx in component {
882                    keep[idx] = true;
883                }
884                kept = component.len();
885            }
886        }
887
888        let mut filtered_nodes = Vec::with_capacity(kept);
889        let mut remap = HashMap::new();
890        for (old_idx, node) in nodes.into_iter().enumerate() {
891            if keep[old_idx] {
892                let new_idx = filtered_nodes.len();
893                remap.insert(old_idx, new_idx);
894                filtered_nodes.push(node);
895            }
896        }
897
898        let nodes = filtered_nodes;
899
900        let mut edges = BTreeSet::new();
901        for (old_idx, neighbours) in adjacency.iter().enumerate() {
902            let Some(&from_idx) = remap.get(&old_idx) else {
903                continue;
904            };
905            for &target in neighbours {
906                if let Some(&to_idx) = remap.get(&target) {
907                    edges.insert((from_idx, to_idx));
908                }
909            }
910        }
911
912        // Remove isolated nodes (nodes with no incoming or outgoing edges in the filtered graph).
913        let mut in_degree = vec![0usize; nodes.len()];
914        let mut out_degree = vec![0usize; nodes.len()];
915        for &(from, to) in &edges {
916            out_degree[from] += 1;
917            in_degree[to] += 1;
918        }
919
920        let mut final_keep: Vec<bool> = (0..nodes.len())
921            .map(|idx| in_degree[idx] > 0 || out_degree[idx] > 0)
922            .collect();
923
924        let mut kept_after_degree = final_keep.iter().filter(|&&keep| keep).count();
925        if kept_after_degree < target_limit {
926            for idx in 0..nodes.len() {
927                if !final_keep[idx] {
928                    final_keep[idx] = true;
929                    kept_after_degree += 1;
930                    if kept_after_degree >= target_limit {
931                        break;
932                    }
933                }
934            }
935        }
936
937        if kept_after_degree == 0 && !nodes.is_empty() {
938            let retain = target_limit.max(1).min(nodes.len());
939            for idx in 0..retain {
940                final_keep[idx] = true;
941            }
942        }
943
944        let mut final_nodes = Vec::new();
945        let mut final_remap = HashMap::new();
946        for (old_idx, node) in nodes.into_iter().enumerate() {
947            if final_keep[old_idx] {
948                let new_idx = final_nodes.len();
949                final_remap.insert(old_idx, new_idx);
950                final_nodes.push(node);
951            }
952        }
953
954        let nodes = final_nodes;
955
956        let final_edges: BTreeSet<_> = edges
957            .into_iter()
958            .filter_map(|(from, to)| {
959                let new_from = final_remap.get(&from).copied()?;
960                let new_to = final_remap.get(&to).copied()?;
961                Some((new_from, new_to))
962            })
963            .collect();
964        let edges = final_edges;
965
966        // Extract crate/module paths from node locations
967        let mut crate_groups: HashMap<String, Vec<usize>> = HashMap::new();
968        for (idx, node) in nodes.iter().enumerate() {
969            if let Some(location) = &node.location {
970                let crate_path = extract_group_path(location);
971                crate_groups
972                    .entry(crate_path)
973                    .or_insert_with(Vec::new)
974                    .push(idx);
975            }
976        }
977
978        let mut output = String::from("digraph CompactProject {\n");
979
980        // Generate subgraphs for each crate/module group
981        let mut subgraph_counter = 0;
982        for (crate_path, node_indices) in crate_groups.iter() {
983            output.push_str(&format!("  subgraph cluster_{} {{\n", subgraph_counter));
984            output.push_str(&format!("    label=\"{}\";\n", escape_label(crate_path)));
985            output.push_str("    style=filled;\n");
986            output.push_str("    color=lightgrey;\n");
987
988            for &idx in node_indices {
989                let node = &nodes[idx];
990                let parts = vec![node.name.clone()];
991                let mut tooltip = None;
992                if let Some(location) = &node.location {
993                    let (_display, full) = summarize_location(location);
994                    tooltip = Some(full);
995                }
996                let label = parts
997                    .into_iter()
998                    .map(|part| escape_label(&part))
999                    .collect::<Vec<_>>()
1000                    .join("\\n");
1001                let mut attrs = vec![format!("label=\"{}\"", label)];
1002                if let Some(full) = tooltip {
1003                    let escaped_full = escape_attr(&full);
1004                    attrs.push(format!("full_path=\"{}\"", escaped_full));
1005                }
1006                output.push_str(&format!("    n{} [{}];\n", idx, attrs.join(", ")));
1007            }
1008
1009            output.push_str("  }\n");
1010            subgraph_counter += 1;
1011        }
1012
1013        for (from, to) in edges {
1014            output.push_str(&format!("  n{} -> n{};\n", from, to));
1015        }
1016        output.push_str("}\n");
1017        output
1018    }
1019
1020    fn add_cross_edge(
1021        &self,
1022        from_idx: usize,
1023        to_idx: usize,
1024        from_block: BlockId,
1025        to_block: BlockId,
1026    ) {
1027        if from_idx == to_idx {
1028            let unit = &self.units[from_idx];
1029            if !unit
1030                .edges
1031                .has_relation(from_block, BlockRelation::DependsOn, to_block)
1032            {
1033                unit.edges.add_relation(from_block, to_block);
1034            }
1035            return;
1036        }
1037
1038        let from_unit = &self.units[from_idx];
1039        from_unit
1040            .edges
1041            .add_relation_if_not_exists(from_block, BlockRelation::DependsOn, to_block);
1042
1043        let to_unit = &self.units[to_idx];
1044        to_unit
1045            .edges
1046            .add_relation_if_not_exists(to_block, BlockRelation::DependedBy, from_block);
1047    }
1048}
1049
1050#[derive(Debug)]
1051struct GraphBuilder<'tcx, Language> {
1052    unit: CompileUnit<'tcx>,
1053    root: Option<BlockId>,
1054    children_stack: Vec<Vec<BlockId>>,
1055    config: GraphBuildConfig,
1056    _marker: PhantomData<Language>,
1057}
1058
1059impl<'tcx, Language: LanguageTrait> GraphBuilder<'tcx, Language> {
1060    fn new(unit: CompileUnit<'tcx>, config: GraphBuildConfig) -> Self {
1061        Self {
1062            unit,
1063            root: None,
1064            children_stack: Vec::new(),
1065            config,
1066            _marker: PhantomData,
1067        }
1068    }
1069
1070    fn next_id(&self) -> BlockId {
1071        self.unit.reserve_block_id()
1072    }
1073
1074    fn create_block(
1075        &self,
1076        id: BlockId,
1077        node: HirNode<'tcx>,
1078        kind: BlockKind,
1079        parent: Option<BlockId>,
1080        children: Vec<BlockId>,
1081    ) -> BasicBlock<'tcx> {
1082        let arena = &self.unit.cc.block_arena;
1083        match kind {
1084            BlockKind::Root => {
1085                // Extract file_name from HirFile node if available
1086                let file_name = node.as_file().map(|file| file.file_path.clone());
1087                let block = BlockRoot::from_hir(id, node, parent, children, file_name);
1088                BasicBlock::Root(arena.alloc(block))
1089            }
1090            BlockKind::Func => {
1091                let block = BlockFunc::from_hir(id, node, parent, children);
1092                BasicBlock::Func(arena.alloc(block))
1093            }
1094            BlockKind::Class => {
1095                let block = BlockClass::from_hir(id, node, parent, children);
1096                BasicBlock::Class(arena.alloc(block))
1097            }
1098            BlockKind::Stmt => {
1099                let stmt = BlockStmt::from_hir(id, node, parent, children);
1100                BasicBlock::Stmt(arena.alloc(stmt))
1101            }
1102            BlockKind::Call => {
1103                let stmt = BlockCall::from_hir(id, node, parent, children);
1104                BasicBlock::Call(arena.alloc(stmt))
1105            }
1106            BlockKind::Enum => {
1107                let enum_ty = BlockEnum::from_hir(id, node, parent, children);
1108                BasicBlock::Enum(arena.alloc(enum_ty))
1109            }
1110            BlockKind::Const => {
1111                let stmt = BlockConst::from_hir(id, node, parent, children);
1112                BasicBlock::Const(arena.alloc(stmt))
1113            }
1114            BlockKind::Impl => {
1115                let block = BlockImpl::from_hir(id, node, parent, children);
1116                BasicBlock::Impl(arena.alloc(block))
1117            }
1118            BlockKind::Field => {
1119                let block = BlockField::from_hir(id, node, parent, children);
1120                BasicBlock::Field(arena.alloc(block))
1121            }
1122            _ => {
1123                panic!("unknown block kind: {}", kind)
1124            }
1125        }
1126    }
1127
1128    fn build_edges(&self, node: HirNode<'tcx>) -> BlockRelationMap {
1129        let edges = BlockRelationMap::default();
1130        let mut visited = HashSet::new();
1131        let mut unresolved = HashSet::new();
1132        self.collect_edges(node, &edges, &mut visited, &mut unresolved);
1133        edges
1134    }
1135
1136    fn collect_edges(
1137        &self,
1138        node: HirNode<'tcx>,
1139        edges: &BlockRelationMap,
1140        visited: &mut HashSet<SymId>,
1141        unresolved: &mut HashSet<SymId>,
1142    ) {
1143        // Try to process symbol dependencies for this node
1144        if let Some(scope) = self.unit.opt_get_scope(node.hir_id()) {
1145            if let Some(symbol) = scope.symbol() {
1146                self.process_symbol(symbol, edges, visited, unresolved);
1147            }
1148        }
1149
1150        // Recurse into children
1151        for &child_id in node.children() {
1152            let child = self.unit.hir_node(child_id);
1153            self.collect_edges(child, edges, visited, unresolved);
1154        }
1155    }
1156
1157    fn process_symbol(
1158        &self,
1159        symbol: &'tcx Symbol,
1160        edges: &BlockRelationMap,
1161        visited: &mut HashSet<SymId>,
1162        unresolved: &mut HashSet<SymId>,
1163    ) {
1164        let symbol_id = symbol.id;
1165
1166        // Avoid processing the same symbol twice
1167        if !visited.insert(symbol_id) {
1168            return;
1169        }
1170
1171        let Some(from_block) = symbol.block_id() else {
1172            return;
1173        };
1174
1175        for &dep_id in symbol.depends.borrow().iter() {
1176            self.link_dependency(dep_id, from_block, edges, unresolved);
1177        }
1178    }
1179    fn link_dependency(
1180        &self,
1181        dep_id: SymId,
1182        from_block: BlockId,
1183        edges: &BlockRelationMap,
1184        unresolved: &mut HashSet<SymId>,
1185    ) {
1186        // If target symbol exists and has a block, add the dependency edge
1187        if let Some(target_symbol) = self.unit.opt_get_symbol(dep_id) {
1188            if let Some(to_block) = target_symbol.block_id() {
1189                if !edges.has_relation(from_block, BlockRelation::DependsOn, to_block) {
1190                    edges.add_relation(from_block, to_block);
1191                }
1192                let target_unit = target_symbol.unit_index();
1193                if target_unit.is_some()
1194                    && target_unit != Some(self.unit.index)
1195                    && unresolved.insert(dep_id)
1196                {
1197                    self.unit.add_unresolved_symbol(target_symbol);
1198                }
1199                return;
1200            }
1201
1202            // Target symbol exists but block not yet known
1203            if unresolved.insert(dep_id) {
1204                self.unit.add_unresolved_symbol(target_symbol);
1205            }
1206            return;
1207        }
1208
1209        // Target symbol not found at all
1210        unresolved.insert(dep_id);
1211    }
1212
1213    fn build_block(&mut self, node: HirNode<'tcx>, parent: BlockId, recursive: bool) {
1214        let id = self.next_id();
1215        let block_kind = Language::block_kind(node.kind_id());
1216        assert_ne!(block_kind, BlockKind::Undefined);
1217
1218        if self.root.is_none() {
1219            self.root = Some(id);
1220        }
1221
1222        let children = if recursive {
1223            self.children_stack.push(Vec::new());
1224            self.visit_children(node, id);
1225
1226            self.children_stack.pop().unwrap()
1227        } else {
1228            Vec::new()
1229        };
1230
1231        let block = self.create_block(id, node, block_kind, Some(parent), children);
1232        if let Some(scope) = self.unit.opt_get_scope(node.hir_id()) {
1233            if let Some(symbol) = scope.symbol() {
1234                // Only set the block ID if it hasn't been set before
1235                // This prevents impl blocks from overwriting struct block IDs
1236                if symbol.block_id().is_none() {
1237                    symbol.set_block_id(Some(id));
1238                }
1239            }
1240        }
1241        self.unit.insert_block(id, block, parent);
1242
1243        if let Some(children) = self.children_stack.last_mut() {
1244            children.push(id);
1245        }
1246    }
1247}
1248
1249impl<'tcx, Language: LanguageTrait> HirVisitor<'tcx> for GraphBuilder<'tcx, Language> {
1250    fn unit(&self) -> CompileUnit<'tcx> {
1251        self.unit
1252    }
1253
1254    fn visit_file(&mut self, node: HirNode<'tcx>, parent: BlockId) {
1255        self.children_stack.push(Vec::new());
1256        self.build_block(node, parent, true);
1257    }
1258
1259    fn visit_internal(&mut self, node: HirNode<'tcx>, parent: BlockId) {
1260        let kind = Language::block_kind(node.kind_id());
1261        if self.config.compact {
1262            if kind == BlockKind::Root {
1263                self.build_block(node, parent, true);
1264            } else {
1265                self.visit_children(node, parent);
1266            }
1267            return;
1268        }
1269
1270        // Non-compact mode: process all defined kinds
1271        if kind != BlockKind::Undefined {
1272            self.build_block(node, parent, false);
1273        } else {
1274            self.visit_children(node, parent);
1275        }
1276    }
1277
1278    fn visit_scope(&mut self, node: HirNode<'tcx>, parent: BlockId) {
1279        let kind = Language::block_kind(node.kind_id());
1280        if self.config.compact {
1281            // In compact mode, only create blocks for major constructs (Class, Enum, Impl)
1282            // Skip functions, fields, and scopes to reduce graph size
1283            match kind {
1284                BlockKind::Class | BlockKind::Enum => {
1285                    // Build with recursion enabled to capture nested major constructs
1286                    self.build_block(node, parent, true);
1287                }
1288                // Skip all other scopes - don't recurse
1289                _ => {
1290                    // Stop here, do not visit children
1291                    // self.visit_children(node, parent);
1292                }
1293            }
1294            return;
1295        }
1296
1297        // Non-compact mode: build blocks for all major constructs
1298        match kind {
1299            BlockKind::Func
1300            | BlockKind::Class
1301            | BlockKind::Enum
1302            | BlockKind::Const
1303            | BlockKind::Impl
1304            | BlockKind::Field => self.build_block(node, parent, true),
1305            _ => self.visit_children(node, parent),
1306        }
1307    }
1308}
1309
1310pub fn build_llmcc_graph_with_config<'tcx, L: LanguageTrait>(
1311    unit: CompileUnit<'tcx>,
1312    unit_index: usize,
1313    config: GraphBuildConfig,
1314) -> Result<UnitGraph, Box<dyn std::error::Error>> {
1315    let root_hir = unit
1316        .file_start_hir_id()
1317        .ok_or("missing file start HIR id")?;
1318    let mut builder = GraphBuilder::<L>::new(unit, config);
1319    let root_node = unit.hir_node(root_hir);
1320    builder.visit_node(root_node, BlockId::ROOT_PARENT);
1321
1322    let root_block = builder.root;
1323    let root_block = root_block.ok_or("graph builder produced no root")?;
1324    let edges = builder.build_edges(root_node);
1325    Ok(UnitGraph::new(unit_index, root_block, edges))
1326}
1327
1328pub fn build_llmcc_graph<'tcx, L: LanguageTrait>(
1329    unit: CompileUnit<'tcx>,
1330    unit_index: usize,
1331) -> Result<UnitGraph, Box<dyn std::error::Error>> {
1332    build_llmcc_graph_with_config::<L>(unit, unit_index, GraphBuildConfig::default())
1333}