context_creator/core/semantic/
parallel_analyzer.rs

1//! Parallel file analysis module for semantic analysis
2//!
3//! This module is responsible for managing parallel processing of file analysis.
4//! It follows the Single Responsibility Principle by focusing solely on parallelization.
5
6use crate::core::cache::FileCache;
7use crate::core::semantic::analyzer::SemanticContext;
8use crate::core::semantic::dependency_types::{DependencyEdgeType, FileAnalysisResult};
9use crate::core::semantic::{get_analyzer_for_file, get_resolver_for_file};
10use anyhow::Result;
11use rayon::prelude::*;
12use std::path::{Path, PathBuf};
13use std::sync::{Arc, Mutex};
14
15/// Options for file analysis
16#[derive(Debug, Clone)]
17pub struct AnalysisOptions {
18    /// Maximum depth for semantic analysis
19    pub semantic_depth: usize,
20    /// Whether to trace imports
21    pub trace_imports: bool,
22    /// Whether to include type references
23    pub include_types: bool,
24    /// Whether to include function calls
25    pub include_functions: bool,
26}
27
28impl Default for AnalysisOptions {
29    fn default() -> Self {
30        Self {
31            semantic_depth: 3,
32            trace_imports: true,
33            include_types: true,
34            include_functions: true,
35        }
36    }
37}
38
39/// Parallel analyzer for file processing
40pub struct ParallelAnalyzer<'a> {
41    cache: &'a FileCache,
42    thread_count: Option<usize>,
43}
44
45impl<'a> ParallelAnalyzer<'a> {
46    /// Create a new ParallelAnalyzer
47    pub fn new(cache: &'a FileCache) -> Self {
48        Self {
49            cache,
50            thread_count: None,
51        }
52    }
53
54    /// Create a new ParallelAnalyzer with a specific thread count
55    pub fn with_thread_count(cache: &'a FileCache, thread_count: usize) -> Self {
56        Self {
57            cache,
58            thread_count: Some(thread_count),
59        }
60    }
61
62    /// Analyze multiple files in parallel
63    pub fn analyze_files(
64        &self,
65        files: &[PathBuf],
66        project_root: &Path,
67        options: &AnalysisOptions,
68        valid_files: &std::collections::HashSet<PathBuf>,
69    ) -> Result<Vec<FileAnalysisResult>> {
70        // Configure thread pool if specified
71        if let Some(count) = self.thread_count {
72            rayon::ThreadPoolBuilder::new()
73                .num_threads(count)
74                .build_global()
75                .ok(); // Ignore error if already initialized
76        }
77
78        // Create error collector
79        let errors = Arc::new(Mutex::new(Vec::new()));
80        let errors_ref = &errors;
81
82        // Analyze files in parallel
83        let results: Vec<FileAnalysisResult> = files
84            .par_iter()
85            .enumerate()
86            .map(|(index, file_path)| {
87                match self.analyze_single_file(index, file_path, project_root, options, valid_files)
88                {
89                    Ok(result) => result,
90                    Err(e) => {
91                        let error_msg = format!("Failed to analyze {}: {}", file_path.display(), e);
92                        errors_ref.lock().unwrap().push(error_msg.clone());
93
94                        // Return a minimal result with error
95                        FileAnalysisResult {
96                            file_index: index,
97                            imports: Vec::new(),
98                            function_calls: Vec::new(),
99                            type_references: Vec::new(),
100                            content_hash: None,
101                            error: Some(error_msg),
102                        }
103                    }
104                }
105            })
106            .collect();
107
108        // Print collected errors
109        let error_list = errors.lock().unwrap();
110        for error in error_list.iter() {
111            eprintln!("Warning: {error}");
112        }
113
114        Ok(results)
115    }
116
117    /// Analyze a single file
118    #[allow(clippy::too_many_arguments)]
119    fn analyze_single_file(
120        &self,
121        file_index: usize,
122        file_path: &Path,
123        project_root: &Path,
124        options: &AnalysisOptions,
125        valid_files: &std::collections::HashSet<PathBuf>,
126    ) -> Result<FileAnalysisResult> {
127        // Get analyzer for the file type
128        let analyzer = match get_analyzer_for_file(file_path)? {
129            Some(analyzer) => analyzer,
130            None => {
131                // No analyzer for this file type - return empty result
132                return Ok(FileAnalysisResult {
133                    file_index,
134                    imports: Vec::new(),
135                    function_calls: Vec::new(),
136                    type_references: Vec::new(),
137                    content_hash: Some(self.compute_content_hash(file_path)?),
138                    error: None,
139                });
140            }
141        };
142
143        // Read file content
144        let content = self.cache.get_or_load(file_path)?;
145
146        // Compute content hash
147        let content_hash = {
148            use std::collections::hash_map::DefaultHasher;
149            use std::hash::{Hash, Hasher};
150            let mut hasher = DefaultHasher::new();
151            content.hash(&mut hasher);
152            hasher.finish()
153        };
154
155        // Create semantic context
156        let context = SemanticContext::new(
157            file_path.to_path_buf(),
158            project_root.to_path_buf(),
159            options.semantic_depth,
160        );
161
162        // Perform analysis
163        let analysis_result = analyzer.analyze_file(file_path, &content, &context)?;
164
165        // Process imports if enabled
166        let imports = if options.trace_imports {
167            self.process_imports(
168                file_path,
169                project_root,
170                &analysis_result.imports,
171                valid_files,
172            )?
173        } else {
174            Vec::new()
175        };
176
177        // Filter results based on options
178        let function_calls = if options.include_functions {
179            analysis_result.function_calls
180        } else {
181            Vec::new()
182        };
183
184        let type_references = if options.include_types {
185            analysis_result.type_references
186        } else {
187            Vec::new()
188        };
189
190        Ok(FileAnalysisResult {
191            file_index,
192            imports,
193            function_calls,
194            type_references,
195            content_hash: Some(content_hash),
196            error: None,
197        })
198    }
199
200    /// Process imports to create typed edges
201    fn process_imports(
202        &self,
203        file_path: &Path,
204        project_root: &Path,
205        imports: &[crate::core::semantic::analyzer::Import],
206        valid_files: &std::collections::HashSet<PathBuf>,
207    ) -> Result<Vec<(PathBuf, DependencyEdgeType)>> {
208        let mut typed_imports = Vec::new();
209
210        // Get resolver for the file type
211        if let Some(resolver) = get_resolver_for_file(file_path)? {
212            for import in imports {
213                // Try to resolve the import
214                match resolver.resolve_import(&import.module, file_path, project_root) {
215                    Ok(resolved) => {
216                        if !resolved.is_external && valid_files.contains(&resolved.path) {
217                            let edge_type = DependencyEdgeType::Import {
218                                symbols: import.items.clone(),
219                            };
220                            typed_imports.push((resolved.path, edge_type));
221                        }
222                    }
223                    Err(_) => {
224                        // For relative imports, try to resolve manually
225                        if import.module.starts_with(".") {
226                            if let Some(parent) = file_path.parent() {
227                                let module_base = import.module.trim_start_matches("./");
228
229                                // Try common extensions
230                                for ext in &["js", "jsx", "ts", "tsx"] {
231                                    let potential_path =
232                                        parent.join(format!("{module_base}.{ext}"));
233
234                                    if valid_files.contains(&potential_path) {
235                                        let edge_type = DependencyEdgeType::Import {
236                                            symbols: import.items.clone(),
237                                        };
238                                        typed_imports.push((potential_path, edge_type));
239                                        break;
240                                    }
241                                }
242                            }
243                        } else {
244                            // Fallback: use module path as-is only if it's in valid files
245                            let fallback_path = PathBuf::from(&import.module);
246                            if valid_files.contains(&fallback_path) {
247                                let edge_type = DependencyEdgeType::Import {
248                                    symbols: import.items.clone(),
249                                };
250                                typed_imports.push((fallback_path, edge_type));
251                            }
252                        }
253                    }
254                }
255            }
256        } else {
257            // No resolver available - use basic import edges only if in valid files
258            for import in imports {
259                let import_path = PathBuf::from(&import.module);
260                if valid_files.contains(&import_path) {
261                    let edge_type = DependencyEdgeType::Import {
262                        symbols: import.items.clone(),
263                    };
264                    typed_imports.push((import_path, edge_type));
265                }
266            }
267        }
268
269        Ok(typed_imports)
270    }
271
272    /// Compute content hash for a file
273    fn compute_content_hash(&self, file_path: &Path) -> Result<u64> {
274        let content = self.cache.get_or_load(file_path)?;
275
276        use std::collections::hash_map::DefaultHasher;
277        use std::hash::{Hash, Hasher};
278        let mut hasher = DefaultHasher::new();
279        content.hash(&mut hasher);
280        Ok(hasher.finish())
281    }
282}
283
284#[cfg(test)]
285#[path = "parallel_analyzer_tests.rs"]
286mod tests;