context_creator/core/
semantic_graph.rs

1//! Dependency graph coordination for semantic analysis
2//!
3//! This module has been refactored to use separate modules for different responsibilities:
4//! - GraphBuilder: Constructs the dependency graph
5//! - GraphTraverser: Handles graph traversal algorithms
6//! - ParallelAnalyzer: Manages parallel file analysis
7//!
8//! This module now serves as a thin coordination layer that maintains backward compatibility.
9
10use crate::core::cache::FileCache;
11use crate::core::semantic::cycle_detector::{CycleResolution, TarjanCycleDetector};
12use crate::core::semantic::graph_builder::GraphBuilder;
13use crate::core::semantic::graph_traverser::GraphTraverser;
14use crate::core::semantic::parallel_analyzer::{AnalysisOptions, ParallelAnalyzer};
15use crate::core::semantic::SemanticOptions;
16use crate::core::walker::FileInfo;
17use anyhow::Result;
18use std::collections::HashMap;
19use std::path::PathBuf;
20
21/// Performs sophisticated semantic analysis with proper dependency graph traversal
22/// This is the main entry point that maintains backward compatibility
23pub fn perform_semantic_analysis_graph(
24    files: &mut [FileInfo],
25    config: &crate::cli::Config,
26    cache: &FileCache,
27) -> Result<()> {
28    // Skip if no semantic analysis is requested
29    if !config.trace_imports && !config.include_callers && !config.include_types {
30        return Ok(());
31    }
32
33    let semantic_options = SemanticOptions::from_config(config);
34
35    // Detect project root from first file
36    let project_root = if let Some(first_file) = files.first() {
37        detect_project_root(&first_file.path)
38    } else {
39        std::env::current_dir().unwrap_or_else(|_| PathBuf::from("."))
40    };
41
42    // Step 1: Parallel file analysis
43    let analyzer = ParallelAnalyzer::new(cache);
44    let analysis_options = AnalysisOptions {
45        semantic_depth: semantic_options.semantic_depth,
46        trace_imports: semantic_options.trace_imports,
47        include_types: semantic_options.include_types,
48        include_functions: semantic_options.include_callers,
49    };
50
51    let file_paths: Vec<_> = files.iter().map(|f| f.path.clone()).collect();
52    let valid_files: std::collections::HashSet<PathBuf> = files
53        .iter()
54        .map(|f| f.path.canonicalize().unwrap_or_else(|_| f.path.clone()))
55        .collect();
56    let analysis_results =
57        analyzer.analyze_files(&file_paths, &project_root, &analysis_options, &valid_files)?;
58
59    // Step 2: Build dependency graph
60    let builder = GraphBuilder::new();
61    let (mut graph, node_map) = builder.build(files)?;
62
63    // Create path to index mapping
64    let path_to_index: HashMap<PathBuf, usize> = files
65        .iter()
66        .enumerate()
67        .map(|(i, f)| (f.path.clone(), i))
68        .collect();
69
70    // Add edges from analysis results
71    builder.build_edges_from_analysis(&mut graph, &analysis_results, &path_to_index, &node_map);
72
73    // Step 3: Detect and handle cycles
74    let mut cycle_detector = TarjanCycleDetector::new();
75    let cycle_result = cycle_detector.detect_cycles(&graph);
76
77    if cycle_result.has_cycles {
78        // Report all detected cycles
79        eprintln!(
80            "Warning: {} circular dependencies detected:",
81            cycle_result.cycles.len()
82        );
83        for (i, cycle) in cycle_result.cycles.iter().enumerate() {
84            let cycle_num = i + 1;
85            eprintln!("\nCycle {cycle_num}:");
86            for &node_idx in cycle {
87                let node = &graph[node_idx];
88                let path = node.path.display();
89                eprintln!("  - {path}");
90            }
91        }
92        eprintln!(
93            "\nWarning: Processing files in partial order, some dependencies may be incomplete."
94        );
95    }
96
97    // Step 4: Traverse graph and apply results
98    let _traverser = GraphTraverser::new();
99    let resolution = cycle_detector.handle_cycles(&graph, cycle_result.cycles);
100
101    match resolution {
102        CycleResolution::PartialOrder(node_order) => {
103            // Process nodes in the computed order
104            for &node_idx in &node_order {
105                let rich_node = &graph[node_idx];
106                let file_idx = rich_node.file_index;
107
108                // Apply analysis results to files
109                if file_idx < analysis_results.len() {
110                    let result = &analysis_results[file_idx];
111                    let file = &mut files[file_idx];
112
113                    // Update function calls
114                    file.function_calls = result.function_calls.clone();
115
116                    // Update type references
117                    file.type_references = result.type_references.clone();
118
119                    // Update exported functions
120                    file.exported_functions = result.exported_functions.clone();
121                }
122            }
123        }
124        CycleResolution::BreakEdges(_) => {
125            unimplemented!("Edge breaking strategy not yet implemented");
126        }
127        CycleResolution::MergeComponents(_) => {
128            unimplemented!("Component merging strategy not yet implemented");
129        }
130    }
131
132    // Step 5: Apply import relationships
133    apply_import_relationships(files, &analysis_results, &path_to_index);
134
135    // Step 6: Process function calls and type references
136    process_function_calls(files);
137    process_type_references(files);
138
139    Ok(())
140}
141
142/// Detect the project root directory
143fn detect_project_root(start_path: &std::path::Path) -> PathBuf {
144    let mut current = start_path.parent().unwrap_or(start_path);
145
146    // First try to find git root
147    loop {
148        if current.join(".git").exists() {
149            return current.to_path_buf();
150        }
151        if let Some(parent) = current.parent() {
152            current = parent;
153        } else {
154            break;
155        }
156    }
157
158    // Fallback: Look for common project markers
159    current = start_path.parent().unwrap_or(start_path);
160    loop {
161        if current.join("Cargo.toml").exists()
162            || current.join("package.json").exists()
163            || current.join("pyproject.toml").exists()
164            || current.join("setup.py").exists()
165        {
166            return current.to_path_buf();
167        }
168
169        if let Some(parent) = current.parent() {
170            current = parent;
171        } else {
172            break;
173        }
174    }
175
176    // Ultimate fallback
177    start_path.parent().unwrap_or(start_path).to_path_buf()
178}
179
180/// Apply import relationships from analysis results
181fn apply_import_relationships(
182    files: &mut [FileInfo],
183    analysis_results: &[crate::core::semantic::dependency_types::FileAnalysisResult],
184    path_to_index: &HashMap<PathBuf, usize>,
185) {
186    // First pass: collect all imports
187    for result in analysis_results {
188        if result.file_index < files.len() {
189            let file = &mut files[result.file_index];
190
191            // Add resolved imports
192            for (import_path, _) in &result.imports {
193                if !file.imports.contains(import_path) {
194                    file.imports.push(import_path.clone());
195                }
196            }
197        }
198    }
199
200    // Second pass: build reverse dependencies
201    let mut reverse_deps: Vec<(usize, PathBuf)> = Vec::new();
202    for file in files.iter() {
203        for import in &file.imports {
204            if let Some(&imported_idx) = path_to_index.get(import) {
205                reverse_deps.push((imported_idx, file.path.clone()));
206            }
207        }
208    }
209
210    // Build HashSets for existing imported_by for O(1) lookups
211    let mut existing_imported_by: Vec<std::collections::HashSet<PathBuf>> = files
212        .iter()
213        .map(|f| f.imported_by.iter().cloned().collect())
214        .collect();
215
216    // Apply reverse dependencies
217    for (imported_idx, importing_path) in reverse_deps {
218        if imported_idx < files.len()
219            && !existing_imported_by[imported_idx].contains(&importing_path)
220        {
221            files[imported_idx].imported_by.push(importing_path.clone());
222            existing_imported_by[imported_idx].insert(importing_path);
223        }
224    }
225}
226
227/// Process function calls to determine caller relationships
228fn process_function_calls(files: &mut [FileInfo]) {
229    use std::collections::HashMap;
230
231    // Build module name to file index mapping
232    let mut module_to_files: HashMap<String, Vec<usize>> = HashMap::new();
233
234    for (idx, file) in files.iter().enumerate() {
235        if let Some(stem) = file.path.file_stem() {
236            let module_name = stem.to_string_lossy().to_string();
237            module_to_files
238                .entry(module_name.clone())
239                .or_default()
240                .push(idx);
241
242            // Handle index files
243            if stem == "mod" || stem == "index" || stem == "__init__" {
244                if let Some(parent) = file.path.parent() {
245                    if let Some(parent_name) = parent.file_name() {
246                        let parent_module = parent_name.to_string_lossy().to_string();
247                        module_to_files.entry(parent_module).or_default().push(idx);
248                    }
249                }
250            }
251        }
252    }
253
254    // Find caller relationships
255    let mut relationships: Vec<(usize, usize)> = Vec::new();
256
257    for (caller_idx, file) in files.iter().enumerate() {
258        for func_call in &file.function_calls {
259            if let Some(module) = &func_call.module {
260                if let Some(file_indices) = module_to_files.get(module) {
261                    for &called_idx in file_indices {
262                        if called_idx != caller_idx {
263                            relationships.push((caller_idx, called_idx));
264                        }
265                    }
266                }
267            }
268        }
269    }
270
271    // Apply relationships using HashSet for O(1) lookups
272    use std::collections::HashSet;
273
274    // Build HashSets for existing imports/imported_by for O(1) lookups
275    let mut existing_imports: Vec<HashSet<PathBuf>> = files
276        .iter()
277        .map(|f| f.imports.iter().cloned().collect())
278        .collect();
279
280    let mut existing_imported_by: Vec<HashSet<PathBuf>> = files
281        .iter()
282        .map(|f| f.imported_by.iter().cloned().collect())
283        .collect();
284
285    for (caller_idx, called_idx) in relationships {
286        let called_path = files[called_idx].path.clone();
287        if !existing_imports[caller_idx].contains(&called_path) {
288            files[caller_idx].imports.push(called_path.clone());
289            existing_imports[caller_idx].insert(called_path);
290        }
291
292        let caller_path = files[caller_idx].path.clone();
293        if !existing_imported_by[called_idx].contains(&caller_path) {
294            files[called_idx].imported_by.push(caller_path.clone());
295            existing_imported_by[called_idx].insert(caller_path);
296        }
297    }
298}
299
300/// Process type references to determine type relationships
301fn process_type_references(files: &mut [FileInfo]) {
302    use std::collections::HashMap;
303
304    // Build type name to file index mapping
305    let mut type_to_files: HashMap<String, Vec<(usize, PathBuf)>> = HashMap::new();
306
307    for (idx, file) in files.iter().enumerate() {
308        if let Some(stem) = file.path.file_stem() {
309            let type_name = stem.to_string_lossy().to_string();
310
311            // Add capitalized version
312            let capitalized = capitalize_first(&type_name);
313            type_to_files
314                .entry(capitalized)
315                .or_default()
316                .push((idx, file.path.clone()));
317
318            // Add original name
319            type_to_files
320                .entry(type_name)
321                .or_default()
322                .push((idx, file.path.clone()));
323        }
324    }
325
326    // Update type references with definition paths
327    for file in files.iter_mut() {
328        for type_ref in &mut file.type_references {
329            // Skip if already has definition path or is external
330            if type_ref.definition_path.is_some() || type_ref.is_external {
331                continue;
332            }
333
334            // Try to find the definition file
335            if let Some(file_info) = type_to_files.get(&type_ref.name) {
336                // Use the first match that's not the current file
337                for (_def_idx, def_path) in file_info {
338                    if &file.path != def_path {
339                        type_ref.definition_path = Some(def_path.clone());
340                        break;
341                    }
342                }
343            }
344        }
345    }
346
347    // Find type usage relationships
348    let mut relationships: Vec<(usize, usize)> = Vec::new();
349
350    for (user_idx, file) in files.iter().enumerate() {
351        for type_ref in &file.type_references {
352            if let Some(file_info) = type_to_files.get(&type_ref.name) {
353                for &(def_idx, _) in file_info {
354                    if def_idx != user_idx {
355                        relationships.push((user_idx, def_idx));
356                    }
357                }
358            }
359        }
360    }
361
362    // Apply relationships using HashSet for O(1) lookups
363    use std::collections::HashSet;
364
365    // Build HashSets for existing imports/imported_by for O(1) lookups
366    let mut existing_imports: Vec<HashSet<PathBuf>> = files
367        .iter()
368        .map(|f| f.imports.iter().cloned().collect())
369        .collect();
370
371    let mut existing_imported_by: Vec<HashSet<PathBuf>> = files
372        .iter()
373        .map(|f| f.imported_by.iter().cloned().collect())
374        .collect();
375
376    for (user_idx, def_idx) in relationships {
377        let def_path = files[def_idx].path.clone();
378        if !existing_imports[user_idx].contains(&def_path) {
379            files[user_idx].imports.push(def_path.clone());
380            existing_imports[user_idx].insert(def_path);
381        }
382
383        let user_path = files[user_idx].path.clone();
384        if !existing_imported_by[def_idx].contains(&user_path) {
385            files[def_idx].imported_by.push(user_path.clone());
386            existing_imported_by[def_idx].insert(user_path);
387        }
388    }
389}
390
391/// Capitalize the first letter of a string
392fn capitalize_first(s: &str) -> String {
393    let mut chars = s.chars();
394    match chars.next() {
395        None => String::new(),
396        Some(first) => first.to_uppercase().collect::<String>() + chars.as_str(),
397    }
398}