context_creator/core/semantic/
parallel_analyzer.rs

1//! Parallel file analysis module for semantic analysis
2//!
3//! This module is responsible for managing parallel processing of file analysis.
4//! It follows the Single Responsibility Principle by focusing solely on parallelization.
5
6use crate::core::cache::FileCache;
7use crate::core::semantic::analyzer::SemanticContext;
8use crate::core::semantic::dependency_types::{DependencyEdgeType, FileAnalysisResult};
9use crate::core::semantic::{get_analyzer_for_file, get_resolver_for_file};
10use crate::core::semantic_cache::SemanticCache;
11use anyhow::Result;
12use rayon::prelude::*;
13use std::path::{Path, PathBuf};
14use std::sync::{Arc, Mutex};
15use tracing::warn;
16
17/// Options for file analysis
18#[derive(Debug, Clone)]
19pub struct AnalysisOptions {
20    /// Maximum depth for semantic analysis
21    pub semantic_depth: usize,
22    /// Whether to trace imports
23    pub trace_imports: bool,
24    /// Whether to include type references
25    pub include_types: bool,
26    /// Whether to include function calls
27    pub include_functions: bool,
28}
29
30impl Default for AnalysisOptions {
31    fn default() -> Self {
32        Self {
33            semantic_depth: 3,
34            trace_imports: true,
35            include_types: true,
36            include_functions: true,
37        }
38    }
39}
40
41/// Parallel analyzer for file processing
42pub struct ParallelAnalyzer<'a> {
43    cache: &'a FileCache,
44    semantic_cache: Arc<SemanticCache>,
45    thread_count: Option<usize>,
46    options: AnalysisOptions,
47}
48
49impl<'a> ParallelAnalyzer<'a> {
50    /// Create a new ParallelAnalyzer
51    pub fn new(cache: &'a FileCache) -> Self {
52        Self {
53            cache,
54            semantic_cache: Arc::new(SemanticCache::new()),
55            thread_count: None,
56            options: AnalysisOptions::default(),
57        }
58    }
59
60    /// Create a new ParallelAnalyzer with a specific thread count
61    pub fn with_thread_count(cache: &'a FileCache, thread_count: usize) -> Self {
62        Self {
63            cache,
64            semantic_cache: Arc::new(SemanticCache::new()),
65            thread_count: Some(thread_count),
66            options: AnalysisOptions::default(),
67        }
68    }
69
70    /// Create a new ParallelAnalyzer with specific options
71    pub fn with_options(cache: &'a FileCache, options: AnalysisOptions) -> Self {
72        Self {
73            cache,
74            semantic_cache: Arc::new(SemanticCache::new()),
75            thread_count: None,
76            options,
77        }
78    }
79
80    /// Analyze multiple files in parallel
81    pub fn analyze_files(
82        &self,
83        files: &[PathBuf],
84        project_root: &Path,
85        options: &AnalysisOptions,
86        valid_files: &std::collections::HashSet<PathBuf>,
87    ) -> Result<Vec<FileAnalysisResult>> {
88        // Configure thread pool if specified
89        if let Some(count) = self.thread_count {
90            rayon::ThreadPoolBuilder::new()
91                .num_threads(count)
92                .build_global()
93                .ok(); // Ignore error if already initialized
94        }
95
96        // Create error collector
97        let errors = Arc::new(Mutex::new(Vec::new()));
98        let errors_ref = &errors;
99
100        // Analyze files in parallel
101        let results: Vec<FileAnalysisResult> = files
102            .par_iter()
103            .enumerate()
104            .map(|(index, file_path)| {
105                match self.analyze_single_file(index, file_path, project_root, options, valid_files)
106                {
107                    Ok(result) => result,
108                    Err(e) => {
109                        let error_msg = format!("Failed to analyze {}: {}", file_path.display(), e);
110                        errors_ref.lock().unwrap().push(error_msg.clone());
111
112                        // Return a minimal result with error
113                        FileAnalysisResult {
114                            file_index: index,
115                            imports: Vec::new(),
116                            function_calls: Vec::new(),
117                            type_references: Vec::new(),
118                            exported_functions: Vec::new(),
119                            content_hash: None,
120                            error: Some(error_msg),
121                        }
122                    }
123                }
124            })
125            .collect();
126
127        // Print collected errors
128        let error_list = errors.lock().unwrap();
129        for error in error_list.iter() {
130            warn!("{}", error);
131        }
132
133        Ok(results)
134    }
135
136    /// Analyze a single file
137    #[allow(clippy::too_many_arguments)]
138    fn analyze_single_file(
139        &self,
140        file_index: usize,
141        file_path: &Path,
142        project_root: &Path,
143        options: &AnalysisOptions,
144        valid_files: &std::collections::HashSet<PathBuf>,
145    ) -> Result<FileAnalysisResult> {
146        // Get analyzer for the file type
147        let analyzer = match get_analyzer_for_file(file_path)? {
148            Some(analyzer) => analyzer,
149            None => {
150                // No analyzer for this file type - return empty result
151                return Ok(FileAnalysisResult {
152                    file_index,
153                    imports: Vec::new(),
154                    function_calls: Vec::new(),
155                    type_references: Vec::new(),
156                    exported_functions: Vec::new(),
157                    content_hash: Some(self.compute_content_hash(file_path)?),
158                    error: None,
159                });
160            }
161        };
162
163        // Read file content
164        let content = self.cache.get_or_load(file_path)?;
165
166        // Compute content hash
167        let content_hash = {
168            use std::collections::hash_map::DefaultHasher;
169            use std::hash::{Hash, Hasher};
170            let mut hasher = DefaultHasher::new();
171            content.hash(&mut hasher);
172            hasher.finish()
173        };
174
175        // Check semantic cache first
176        let analysis_result =
177            if let Some(cached_result) = self.semantic_cache.get(file_path, content_hash) {
178                // Cache hit - use cached result
179                (*cached_result).clone()
180            } else {
181                // Cache miss - perform analysis
182                // Create semantic context
183                let context = SemanticContext::new(
184                    file_path.to_path_buf(),
185                    project_root.to_path_buf(),
186                    options.semantic_depth,
187                );
188
189                // Perform analysis
190                let result = analyzer.analyze_file(file_path, &content, &context)?;
191
192                // Store in cache
193                self.semantic_cache
194                    .insert(file_path, content_hash, result.clone());
195
196                result
197            };
198
199        // Process imports if enabled
200        let imports = if options.trace_imports {
201            self.process_imports(
202                file_path,
203                project_root,
204                &analysis_result.imports,
205                valid_files,
206            )?
207        } else {
208            Vec::new()
209        };
210
211        // Filter results based on options
212        let function_calls = if options.include_functions {
213            analysis_result.function_calls
214        } else {
215            Vec::new()
216        };
217
218        let type_references = if options.include_types {
219            analysis_result.type_references
220        } else {
221            Vec::new()
222        };
223
224        let exported_functions = if self.options.include_functions {
225            analysis_result.exported_functions
226        } else {
227            Vec::new()
228        };
229
230        Ok(FileAnalysisResult {
231            file_index,
232            imports,
233            function_calls,
234            type_references,
235            exported_functions,
236            content_hash: Some(content_hash),
237            error: None,
238        })
239    }
240
241    /// Process imports to create typed edges
242    fn process_imports(
243        &self,
244        file_path: &Path,
245        project_root: &Path,
246        imports: &[crate::core::semantic::analyzer::Import],
247        _valid_files: &std::collections::HashSet<PathBuf>,
248    ) -> Result<Vec<(PathBuf, DependencyEdgeType)>> {
249        let mut typed_imports = Vec::new();
250
251        // Get resolver for the file type
252        if let Some(resolver) = get_resolver_for_file(file_path)? {
253            for import in imports {
254                // Debug logging
255                tracing::debug!(
256                    "Resolving import '{}' with items {:?} from file {}",
257                    import.module,
258                    import.items,
259                    file_path.display()
260                );
261
262                // Try to resolve the import
263                match resolver.resolve_import(&import.module, file_path, project_root) {
264                    Ok(resolved) => {
265                        tracing::debug!(
266                            "  Resolved to: {} (external: {})",
267                            resolved.path.display(),
268                            resolved.is_external
269                        );
270                        if !resolved.is_external {
271                            // For trace_imports, we want to track ALL imports,
272                            // not just those in valid_files, to support file expansion
273                            let edge_type = DependencyEdgeType::Import {
274                                symbols: import.items.clone(),
275                            };
276                            typed_imports.push((resolved.path, edge_type));
277                        }
278                    }
279                    Err(e) => {
280                        tracing::debug!("  Failed to resolve: {}", e);
281                        // For relative imports, try to resolve manually
282                        if import.module.starts_with(".") {
283                            if let Some(parent) = file_path.parent() {
284                                let module_base = import.module.trim_start_matches("./");
285
286                                // Try common extensions
287                                for ext in &["js", "jsx", "ts", "tsx"] {
288                                    let potential_path =
289                                        parent.join(format!("{module_base}.{ext}"));
290
291                                    if potential_path.exists() {
292                                        let edge_type = DependencyEdgeType::Import {
293                                            symbols: import.items.clone(),
294                                        };
295                                        typed_imports.push((potential_path, edge_type));
296                                        break;
297                                    }
298                                }
299                            }
300                        } else {
301                            // Fallback: For trace_imports, track the import even if unresolved
302                            // This allows the file expander to attempt resolution later
303                            let fallback_path = PathBuf::from(&import.module);
304                            if fallback_path.is_absolute() && fallback_path.exists() {
305                                let edge_type = DependencyEdgeType::Import {
306                                    symbols: import.items.clone(),
307                                };
308                                typed_imports.push((fallback_path, edge_type));
309                            }
310                        }
311                    }
312                }
313            }
314        } else {
315            // No resolver available - for trace_imports, track absolute paths that exist
316            for import in imports {
317                let import_path = PathBuf::from(&import.module);
318                if import_path.is_absolute() && import_path.exists() {
319                    let edge_type = DependencyEdgeType::Import {
320                        symbols: import.items.clone(),
321                    };
322                    typed_imports.push((import_path, edge_type));
323                }
324            }
325        }
326
327        Ok(typed_imports)
328    }
329
330    /// Compute content hash for a file
331    fn compute_content_hash(&self, file_path: &Path) -> Result<u64> {
332        let content = self.cache.get_or_load(file_path)?;
333
334        use std::collections::hash_map::DefaultHasher;
335        use std::hash::{Hash, Hasher};
336        let mut hasher = DefaultHasher::new();
337        content.hash(&mut hasher);
338        Ok(hasher.finish())
339    }
340}
341
342#[cfg(test)]
343#[path = "parallel_analyzer_tests.rs"]
344mod tests;