Skip to main content

mir_analyzer/batch/
run.rs

1use super::*;
2
3impl AnalysisSession {
4    /// Run the full batch analysis pipeline on a set of file paths.
5    pub fn analyze_paths(&self, paths: &[PathBuf], opts: &BatchOptions) -> AnalysisResult {
6        let php_version = self.batch_php_version(opts);
7        let mut all_issues = Vec::new();
8        let _t0 = std::time::Instant::now();
9
10        // ---- Load PHP built-in stubs (before definition collection so user code can override)
11        self.load_batch_stubs(php_version);
12        let _t_stubs = _t0.elapsed();
13
14        // ---- Read files in parallel ----------------------------------
15        let parsed_files: Vec<ParsedProjectFile> = paths
16            .par_iter()
17            .filter_map(|path| match std::fs::read_to_string(path) {
18                Ok(src) => {
19                    let file = Arc::from(path.to_string_lossy().as_ref());
20                    Some(ParsedProjectFile::new(file, Arc::from(src)))
21                }
22                Err(e) => {
23                    eprintln!("Cannot read {}: {}", path.display(), e);
24                    None
25                }
26            })
27            .collect();
28        let _t_read = _t0.elapsed();
29
30        let file_data: Vec<(Arc<str>, Arc<str>)> = parsed_files
31            .iter()
32            .map(|parsed| (parsed.file.clone(), parsed.source.clone()))
33            .collect();
34
35        // ---- Pre-analysis invalidation: evict dependents of changed/removed files
36        if let Some(cache) = &self.cache {
37            let mut invalidated: Vec<String> = file_data
38                .par_iter()
39                .filter_map(|(f, src)| {
40                    let h = hash_content(src.as_ref());
41                    if cache.get(f, &h).is_none() {
42                        Some(f.to_string())
43                    } else {
44                        None
45                    }
46                })
47                .collect();
48
49            // Files analyzed in a previous run but now gone from disk: their
50            // dependents hold stale results that still assume the deleted
51            // definitions exist. A file merely absent from this run's path set
52            // (but still on disk) is NOT a deletion — checking disk existence
53            // avoids evicting dependents during partial-path analysis.
54            let current: std::collections::HashSet<&str> =
55                file_data.iter().map(|(f, _)| f.as_ref()).collect();
56            let removed: Vec<String> = cache
57                .cached_files()
58                .into_iter()
59                .filter(|f| !current.contains(f.as_str()) && !std::path::Path::new(f).exists())
60                .collect();
61            for f in &removed {
62                cache.evict(f);
63            }
64            invalidated.extend(removed);
65
66            if !invalidated.is_empty() {
67                cache.evict_with_dependents(&invalidated);
68            }
69        }
70
71        // ---- Register Salsa source inputs for incremental follow-up calls ----
72        {
73            let mut guard = self.db.salsa.write();
74            for parsed in &parsed_files {
75                guard.upsert_source_file(parsed.file.clone(), parsed.source.clone());
76            }
77        }
78        let _t_salsa_reg = _t0.elapsed();
79
80        // ---- Definition collection from the already-parsed AST -------
81        // Returns (FileDefinitions, content_hash, has_hard_parse_errors) so we
82        // can prime the parse cache before the pre-warm loop below.
83        type Pass1Entry = (FileDefinitions, [u8; 32], bool);
84        let file_defs: Vec<Pass1Entry> = parsed_files
85            .par_iter()
86            .map(|parsed| {
87                let content_hash = hash_source(parsed.source());
88                let has_hard_parse_errors = parsed
89                    .errors()
90                    .iter()
91                    .any(crate::parser::is_hard_parse_error);
92                let mut all_issues: Vec<Issue> = parsed
93                    .errors()
94                    .iter()
95                    .filter(|err| !crate::parser::is_spurious_reserved_class_error(err))
96                    .map(|err| {
97                        crate::parser::parse_error_to_issue(
98                            err,
99                            &parsed.file,
100                            parsed.source(),
101                            parsed.source_map(),
102                        )
103                    })
104                    .collect();
105                let collector = crate::collector::DefinitionCollector::new_for_slice(
106                    parsed.file.clone(),
107                    parsed.source(),
108                    parsed.source_map(),
109                );
110                let (mut slice, collector_issues) = collector.collect_slice(parsed.owned());
111                all_issues.extend(collector_issues);
112                mir_codebase::storage::deduplicate_params_in_slice(&mut slice);
113                let defs = FileDefinitions {
114                    slice: Arc::new(slice),
115                    issues: Arc::new(all_issues),
116                };
117                (defs, content_hash, has_hard_parse_errors)
118            })
119            .collect();
120        let _t_collect_defs = _t0.elapsed();
121
122        // Prime the in-process parse cache so the pre-warm loop below avoids
123        // re-parsing every project file through collect_file_definitions.
124        {
125            let guard = self.db.salsa.read();
126            for (defs, hash, has_hard_parse_errors) in &file_defs {
127                if !*has_hard_parse_errors {
128                    guard.prime_parse_cache(*hash, Arc::clone(&defs.slice));
129                }
130            }
131        }
132
133        let mut files_with_parse_errors: HashSet<Arc<str>> = HashSet::default();
134        for (defs, _hash, _hard_err) in file_defs {
135            for issue in defs.issues.iter() {
136                if matches!(issue.kind, mir_issues::IssueKind::ParseError { .. })
137                    && issue.severity == mir_issues::Severity::Error
138                {
139                    files_with_parse_errors.insert(issue.location.file.clone());
140                }
141            }
142            all_issues.extend(Arc::unwrap_or_clone(defs.issues));
143        }
144        let _t_ingest = _t0.elapsed();
145
146        // ---- Pre-warm collect_file_definitions for project files -------------
147        {
148            let db_prewarm = {
149                let guard = self.db.salsa.read();
150                (**guard).clone()
151            };
152            let project_source_files: Vec<SourceFile> = {
153                let guard = self.db.salsa.read();
154                parsed_files
155                    .iter()
156                    .filter_map(|p| (**guard).lookup_source_file(&p.file))
157                    .collect()
158            };
159            project_source_files
160                .into_par_iter()
161                .for_each_with(db_prewarm, |db, sf| {
162                    let _ = collect_file_definitions(db as &dyn MirDatabase, sf);
163                });
164        }
165        let _t_prewarm_ms = (_t0.elapsed() - _t_ingest).as_secs_f64() * 1000.0;
166
167        // Fold the freshly-registered project files into the workspace symbol
168        // index singleton. The singleton may have been built from vendor before
169        // this run (CLI indexes vendor before analyze_paths); since adding files
170        // no longer nulls it, project classes would otherwise be invisible to
171        // find_class_like and reported as false UndefinedClass.
172        self.refresh_workspace_index();
173
174        // ---- Lazy-load unknown classes via PSR-4 ----------------------------
175        let _t_before_lazy = _t0.elapsed();
176        if let Some(psr4) = self.psr4.clone() {
177            self.lazy_load_missing_classes(psr4, php_version, &mut all_issues);
178        }
179        let _t_lazyload_ms = (_t0.elapsed() - _t_before_lazy).as_secs_f64() * 1000.0;
180
181        // ---- Class-level checks ---------------------------------------------
182        let analyzed_file_set: HashSet<Arc<str>> =
183            file_data.iter().map(|(f, _)| f.clone()).collect();
184        let _t_class_analyzer = std::time::Instant::now();
185        {
186            let class_db = {
187                let guard = self.db.salsa.read();
188                (**guard).clone()
189            };
190            let class_issues = crate::class::ClassAnalyzer::with_files(
191                &class_db,
192                analyzed_file_set.clone(),
193                &file_data,
194            )
195            .analyze_all();
196            all_issues.extend(class_issues);
197        }
198        let _t_class_analyzer_ms = _t_class_analyzer.elapsed().as_secs_f64() * 1000.0;
199
200        let _t_class_checks = _t0.elapsed();
201
202        let mut db_main = {
203            let guard = self.db.salsa.read();
204            (**guard).clone()
205        };
206        // All index mutation for the body pass is done (lazy_load_missing_classes
207        // + refresh ran above; lazy_load_from_body_issues runs *after* this pass
208        // on a separate db). Freeze the index on this ephemeral clone so each
209        // find_class_like borrows it instead of cloning the singleton's three
210        // Arcs per call — the per-worker `map_with` clone bumps the refcount once.
211        db_main.freeze_workspace_index();
212
213        // ---- Body analysis: function/method bodies in parallel --------------
214        type BodyResult = (
215            Arc<str>,
216            Vec<Issue>,
217            Vec<crate::symbol::ResolvedSymbol>,
218            Vec<RefLoc>,
219        );
220        let body_results: Vec<BodyResult> = parsed_files
221            .par_iter()
222            .filter(|parsed| !files_with_parse_errors.contains(&parsed.file))
223            .map_with(db_main, |db, parsed| {
224                let driver = BodyAnalyzer::new(&*db as &dyn MirDatabase, php_version);
225                let (issues, symbols) = if let Some(cache) = &self.cache {
226                    let h = hash_content(parsed.source());
227                    if let Some((cached_issues, ref_locs)) = cache.get(&parsed.file, &h) {
228                        // Cache replay: rebuild the file's complete reference
229                        // set straight from the cached tuples — no pending-
230                        // buffer detour.
231                        let locs: Vec<RefLoc> = ref_locs
232                            .iter()
233                            .map(|(symbol, line, col_start, col_end)| RefLoc {
234                                symbol_key: Arc::from(symbol.as_str()),
235                                file: parsed.file.clone(),
236                                line: *line,
237                                col_start: *col_start,
238                                col_end: *col_end,
239                            })
240                            .collect();
241                        return (parsed.file.clone(), cached_issues, Vec::new(), locs);
242                    }
243                    let (issues, symbols) = driver.analyze_bodies(
244                        parsed.owned(),
245                        parsed.file.clone(),
246                        parsed.source(),
247                        parsed.source_map(),
248                    );
249                    let pending = db.take_pending_ref_locs();
250                    let cache_locs = pending
251                        .iter()
252                        .map(|r| (r.symbol_key.to_string(), r.line, r.col_start, r.col_end))
253                        .collect();
254                    cache.put(&parsed.file, h, issues.clone(), cache_locs);
255                    if let Some(cb) = &opts.on_file_done {
256                        cb();
257                    }
258                    let symbols = if opts.skip_symbols {
259                        Vec::new()
260                    } else {
261                        symbols
262                    };
263                    return (parsed.file.clone(), issues, symbols, pending);
264                } else {
265                    driver.analyze_bodies(
266                        parsed.owned(),
267                        parsed.file.clone(),
268                        parsed.source(),
269                        parsed.source_map(),
270                    )
271                };
272                let pending = db.take_pending_ref_locs();
273                if let Some(cb) = &opts.on_file_done {
274                    cb();
275                }
276                // Drop the per-file symbol vec inside the worker when the
277                // consumer opted out — the orchestrator never accumulates.
278                let symbols = if opts.skip_symbols {
279                    Vec::new()
280                } else {
281                    symbols
282                };
283                (parsed.file.clone(), issues, symbols, pending)
284            })
285            .collect();
286
287        let _t_body_analysis = _t0.elapsed();
288
289        // Serial commit with replace semantics: each file's output (or cache
290        // replay) is its complete reference set, so stale entries from a
291        // prior run cannot survive an append.
292        let mut all_symbols = Vec::new();
293        {
294            let guard = self.db.salsa.read();
295            for (file, issues, symbols, ref_locs) in body_results {
296                all_issues.extend(issues);
297                all_symbols.extend(symbols);
298                guard.set_file_reference_locations(file.as_ref(), ref_locs);
299            }
300        }
301
302        // ---- Post-analysis lazy loading: FQCNs used without `use` imports ------
303        if let Some(psr4) = self.psr4.clone() {
304            self.lazy_load_from_body_issues(
305                psr4,
306                php_version,
307                &file_data,
308                &files_with_parse_errors,
309                &mut all_issues,
310                &mut all_symbols,
311                opts.skip_symbols,
312            );
313        }
314
315        // ---- Build reverse dep graph and persist it for the next run ---------
316        // Must run AFTER `commit_reference_locations_batch` (above): the graph's
317        // call-site / instantiation / inferred-return edges are derived from the
318        // committed reference-location map. Built any earlier (the salsa db is
319        // fresh each session) that map is empty, so only structural edges
320        // (parent/interface/trait/declared types) survive — and any dependent
321        // reachable only through a call site or inferred type goes stale.
322        if let Some(cache) = &self.cache {
323            let db_snapshot = {
324                let guard = self.db.salsa.read();
325                (**guard).clone()
326            };
327            let rev = build_reverse_deps(&db_snapshot);
328            cache.set_reverse_deps(rev);
329        }
330
331        // Persist cache hits/misses to disk
332        if let Some(cache) = &self.cache {
333            cache.flush();
334        }
335
336        // ---- Dead-code detection -------------------------------------------
337        if opts.should_run_dead_code() {
338            let salsa = self.snapshot_db();
339            let _t_dead_code = std::time::Instant::now();
340            let dead_code_issues =
341                crate::dead_code::DeadCodeAnalyzer::with_files(&salsa, analyzed_file_set.clone())
342                    .analyze();
343            all_issues.extend(dead_code_issues);
344            if std::env::var("MIR_TIMING").is_ok() {
345                eprintln!(
346                    "[timing] dead_code_analyzer={:.0}ms",
347                    _t_dead_code.elapsed().as_secs_f64() * 1000.0
348                );
349            }
350        }
351
352        let _t_total = _t0.elapsed();
353        if std::env::var("MIR_TIMING").is_ok() {
354            eprintln!(
355                "[timing] stubs={:.0}ms read={:.0}ms salsa_reg={:.0}ms collect_defs={:.0}ms ingest={:.0}ms class_checks={:.0}ms (prewarm={:.0}ms lazy_load={:.0}ms class_analyzer={:.0}ms) body_analysis={:.0}ms total={:.0}ms",
356                _t_stubs.as_secs_f64() * 1000.0,
357                (_t_read - _t_stubs).as_secs_f64() * 1000.0,
358                (_t_salsa_reg - _t_read).as_secs_f64() * 1000.0,
359                (_t_collect_defs - _t_salsa_reg).as_secs_f64() * 1000.0,
360                (_t_ingest - _t_collect_defs).as_secs_f64() * 1000.0,
361                (_t_class_checks - _t_ingest).as_secs_f64() * 1000.0,
362                _t_prewarm_ms,
363                _t_lazyload_ms,
364                _t_class_analyzer_ms,
365                (_t_body_analysis - _t_class_checks).as_secs_f64() * 1000.0,
366                _t_total.as_secs_f64() * 1000.0,
367            );
368        }
369
370        opts.apply(&mut all_issues);
371        let analyzed_files_vec: Vec<Arc<str>> = analyzed_file_set.iter().cloned().collect();
372        self.apply_suppressions_and_emit_unused(&mut all_issues, &analyzed_files_vec);
373        if let Some(dump) = crate::metrics::dump() {
374            eprintln!("{dump}");
375        }
376
377        // ---- Build workspace symbol index singleton -------------------------
378        {
379            let mut guard = self.db.salsa.write();
380            guard.rebuild_workspace_symbol_index();
381        }
382
383        AnalysisResult::build(all_issues, rustc_hash::FxHashMap::default(), all_symbols)
384    }
385    /// Re-analyze a single file (definition collection + body analysis) within the batch context.
386    ///
387    /// Mirrors the old `ProjectAnalyzer::re_analyze_file` cache-aware path.
388    /// Use [`Self::reanalyze_dependents`] for LSP-style per-file flows that
389    /// don't need batch options.
390    pub fn re_analyze_file(
391        &self,
392        file_path: &str,
393        new_content: &str,
394        opts: &BatchOptions,
395    ) -> AnalysisResult {
396        let php_version = self.batch_php_version(opts);
397
398        // Fast path: content unchanged and cache has a valid entry.
399        if let Some(cache) = &self.cache {
400            let h = hash_content(new_content);
401            if let Some((mut issues, ref_locs)) = cache.get(file_path, &h) {
402                let file: Arc<str> = Arc::from(file_path);
403                // Replace semantics: the cached set is the file's complete
404                // reference set, so stale entries from a prior version are
405                // cleared rather than appended over.
406                let locs: Vec<RefLoc> = ref_locs
407                    .iter()
408                    .map(|(symbol, line, col_start, col_end)| RefLoc {
409                        symbol_key: Arc::from(symbol.as_str()),
410                        file: file.clone(),
411                        line: *line,
412                        col_start: *col_start,
413                        col_end: *col_end,
414                    })
415                    .collect();
416                let guard = self.db.salsa.read();
417                guard.set_file_reference_locations(file_path, locs);
418                drop(guard);
419                opts.apply(&mut issues);
420                self.apply_suppressions_and_emit_unused(&mut issues, std::slice::from_ref(&file));
421                return AnalysisResult::build(issues, HashMap::default(), Vec::new());
422            }
423        }
424
425        let file: Arc<str> = Arc::from(file_path);
426
427        {
428            let mut guard = self.db.salsa.write();
429            guard.remove_file_definitions(file_path);
430        }
431
432        let file_defs = {
433            let mut guard = self.db.salsa.write();
434            let salsa_file = guard.upsert_source_file(file.clone(), Arc::from(new_content));
435            collect_file_definitions(&**guard, salsa_file)
436        };
437
438        let mut all_issues: Vec<Issue> = Arc::unwrap_or_clone(file_defs.issues.clone());
439
440        {
441            let mut guard = self.db.salsa.write();
442            if guard.workspace_symbol_index_singleton().is_some() {
443                if let Some(sf) = guard.lookup_source_file(file.as_ref()) {
444                    if guard.file_declarations_changed(sf) {
445                        guard.rebuild_workspace_symbol_index();
446                    }
447                }
448            }
449        }
450
451        let symbols = {
452            let guard = self.db.salsa.write();
453
454            let parsed = php_rs_parser::parse(new_content);
455
456            let has_hard_errors = parsed.errors.iter().any(crate::parser::is_hard_parse_error);
457            if !has_hard_errors {
458                let db_ref: &dyn MirDatabase = &**guard;
459                let driver = BodyAnalyzer::new(db_ref, php_version);
460                let (body_issues, symbols) = driver.analyze_bodies(
461                    &parsed.program,
462                    file.clone(),
463                    new_content,
464                    &parsed.source_map,
465                );
466                all_issues.extend(body_issues);
467                let pending = guard.take_pending_ref_locs();
468                guard.set_file_reference_locations(file.as_ref(), pending);
469                symbols
470            } else {
471                Vec::new()
472            }
473        };
474
475        // Bake inline-suppression marks in *before* caching: suppression is a
476        // pure function of file content (and the cache key hashes content), so
477        // the cached issues should already carry their marks. The cache-hit
478        // branch above replays this file's source without re-registering the
479        // `SourceFile` input, so the db-backed post-filter cannot recompute
480        // marks there — caching the canonical result is what keeps a fresh
481        // process honoring `@mir-ignore` on an unchanged file.
482        mark_suppressed(
483            &mut all_issues,
484            &crate::suppression::SuppressionMap::from_source(new_content),
485        );
486
487        if let Some(cache) = &self.cache {
488            let h = hash_content(new_content);
489            cache.evict_with_dependents(&[file_path.to_string()]);
490            let db = self.snapshot_db();
491            let ref_locs = extract_reference_locations(&db, &file);
492            cache.put(file_path, h, all_issues.clone(), ref_locs);
493        }
494
495        opts.apply(&mut all_issues);
496        AnalysisResult::build(all_issues, HashMap::default(), symbols)
497    }
498
499    /// Collect type definitions only from `paths` into the codebase
500    /// without analyzing method bodies or emitting issues. Used to load
501    /// vendor types.
502    ///
503    /// When a disk-backed cache is attached, per-file `StubSlice` results
504    /// from previous runs are reused on a content-hash match, eliminating
505    /// the parse + definition-collection step. Cache misses run the normal
506    /// pipeline and write back so subsequent runs hit.
507    pub fn collect_definitions(&self, paths: &[PathBuf]) {
508        let _timing = std::env::var("MIR_TIMING").is_ok();
509        let _t0 = std::time::Instant::now();
510
511        let php_v = self.php_version.cache_byte();
512
513        struct FileEntry {
514            file: Arc<str>,
515            src: Arc<str>,
516            hash: [u8; 32],
517            cached: Option<mir_codebase::storage::StubSlice>,
518        }
519        let entries: Vec<FileEntry> = paths
520            .par_iter()
521            .filter_map(|path| {
522                let src = std::fs::read_to_string(path).ok()?;
523                let file: Arc<str> = Arc::from(path.to_string_lossy().as_ref());
524                let src: Arc<str> = Arc::from(src);
525                let hash = hash_source(&src);
526                let cached = self.db.stub_cache.as_ref().and_then(|c| {
527                    let mut slice = c.get(&file, &hash, php_v)?;
528                    prepare_for_ingest(&mut slice);
529                    Some(slice)
530                });
531                Some(FileEntry {
532                    file,
533                    src,
534                    hash,
535                    cached,
536                })
537            })
538            .collect();
539        let _t_read = _t0.elapsed();
540
541        let source_files: Vec<SourceFile> = {
542            let mut guard = self.db.salsa.write();
543            entries
544                .iter()
545                .map(|e| {
546                    guard.upsert_source_file_with_durability(
547                        e.file.clone(),
548                        e.src.clone(),
549                        salsa::Durability::HIGH,
550                    )
551                })
552                .collect()
553        };
554        let _t_reg = _t0.elapsed();
555
556        let db_pass1 = {
557            let guard = self.db.salsa.read();
558            (**guard).clone()
559        };
560        let stub_cache = self.db.stub_cache.clone();
561        let prepared: Vec<mir_codebase::storage::StubSlice> = entries
562            .into_par_iter()
563            .zip(source_files.into_par_iter())
564            .map_with(db_pass1, |db, (mut entry, salsa_file)| {
565                if let Some(slice) = entry.cached.take() {
566                    let slice_arc = Arc::new(slice);
567                    db.parse_cache().insert(entry.hash, Arc::clone(&slice_arc));
568                    return (*slice_arc).clone();
569                }
570                let defs = collect_file_definitions(&*db, salsa_file);
571                if let Some(cache) = stub_cache.as_ref() {
572                    cache.put(&entry.file, &entry.hash, php_v, &defs.slice);
573                }
574                (*defs.slice).clone()
575            })
576            .collect();
577        let _t_collect = _t0.elapsed();
578        drop(prepared);
579        let _t_ingest = _t0.elapsed();
580
581        if _timing {
582            let (hits, misses) = self.stub_cache_stats();
583            eprintln!(
584                "[vendor] read={:.0}ms reg={:.0}ms collect={:.0}ms ingest={:.0}ms total={:.0}ms (cache hits={hits} misses={misses})",
585                _t_read.as_secs_f64() * 1000.0,
586                (_t_reg - _t_read).as_secs_f64() * 1000.0,
587                (_t_collect - _t_reg).as_secs_f64() * 1000.0,
588                (_t_ingest - _t_collect).as_secs_f64() * 1000.0,
589                _t_ingest.as_secs_f64() * 1000.0,
590            );
591        }
592
593        {
594            let mut guard = self.db.salsa.write();
595            guard.rebuild_workspace_symbol_index();
596        }
597
598        crate::collector::print_collector_stats();
599    }
600}