Skip to main content

mir_analyzer/batch/
run.rs

1use super::*;
2
3impl AnalysisSession {
4    /// Run the full batch analysis pipeline on a set of file paths.
5    pub fn analyze_paths(&self, paths: &[PathBuf], opts: &BatchOptions) -> AnalysisResult {
6        let php_version = self.batch_php_version(opts);
7        let mut all_issues = Vec::new();
8        let _t0 = std::time::Instant::now();
9
10        // ---- Load PHP built-in stubs (before definition collection so user code can override)
11        self.load_batch_stubs(php_version);
12        // Index vendor autoload.files (global function/constant helpers such as
13        // Laravel's `confirm()`, `select()`, etc.) before body analysis so
14        // calls to these functions resolve rather than emitting UndefinedFunction.
15        self.ensure_vendor_eager_functions();
16        let _t_stubs = _t0.elapsed();
17
18        // ---- Read files in parallel ----------------------------------
19        let parsed_files: Vec<ParsedProjectFile> = paths
20            .par_iter()
21            .filter_map(|path| match std::fs::read_to_string(path) {
22                Ok(src) => {
23                    let file = Arc::from(path.to_string_lossy().as_ref());
24                    Some(ParsedProjectFile::new(file, Arc::from(src)))
25                }
26                Err(e) => {
27                    eprintln!("Cannot read {}: {}", path.display(), e);
28                    None
29                }
30            })
31            .collect();
32        let _t_read = _t0.elapsed();
33
34        let file_data: Vec<(Arc<str>, Arc<str>)> = parsed_files
35            .iter()
36            .map(|parsed| (parsed.file.clone(), parsed.source.clone()))
37            .collect();
38
39        // ---- Pre-analysis invalidation: evict dependents of changed/removed files
40        if let Some(cache) = &self.cache {
41            let mut invalidated: Vec<String> = file_data
42                .par_iter()
43                .filter_map(|(f, src)| {
44                    let h = hash_content(src.as_ref());
45                    if cache.get(f, &h).is_none() {
46                        Some(f.to_string())
47                    } else {
48                        None
49                    }
50                })
51                .collect();
52
53            // Files analyzed in a previous run but now gone from disk: their
54            // dependents hold stale results that still assume the deleted
55            // definitions exist. A file merely absent from this run's path set
56            // (but still on disk) is NOT a deletion — checking disk existence
57            // avoids evicting dependents during partial-path analysis.
58            let current: std::collections::HashSet<&str> =
59                file_data.iter().map(|(f, _)| f.as_ref()).collect();
60            let removed: Vec<String> = cache
61                .cached_files()
62                .into_iter()
63                .filter(|f| !current.contains(f.as_str()) && !std::path::Path::new(f).exists())
64                .collect();
65            for f in &removed {
66                cache.evict(f);
67            }
68            invalidated.extend(removed);
69
70            if !invalidated.is_empty() {
71                cache.evict_with_dependents(&invalidated);
72            }
73        }
74
75        // ---- Register Salsa source inputs for incremental follow-up calls ----
76        {
77            let mut guard = self.db.salsa.write();
78            for parsed in &parsed_files {
79                guard.upsert_source_file(parsed.file.clone(), parsed.source.clone());
80            }
81        }
82        let _t_salsa_reg = _t0.elapsed();
83
84        // ---- Definition collection from the already-parsed AST -------
85        // Returns (FileDefinitions, content_hash, has_hard_parse_errors) so we
86        // can prime the parse cache before the pre-warm loop below.
87        type Pass1Entry = (FileDefinitions, [u8; 32], bool);
88        let file_defs: Vec<Pass1Entry> = parsed_files
89            .par_iter()
90            .map(|parsed| {
91                let content_hash = hash_source(parsed.source());
92                let has_hard_parse_errors = parsed
93                    .errors()
94                    .iter()
95                    .any(crate::parser::is_hard_parse_error);
96                let mut all_issues: Vec<Issue> = parsed
97                    .errors()
98                    .iter()
99                    .filter(|err| !crate::parser::is_spurious_reserved_class_error(err))
100                    .map(|err| {
101                        crate::parser::parse_error_to_issue(
102                            err,
103                            &parsed.file,
104                            parsed.source(),
105                            parsed.source_map(),
106                        )
107                    })
108                    .collect();
109                let collector = crate::collector::DefinitionCollector::new_for_slice(
110                    parsed.file.clone(),
111                    parsed.source(),
112                    parsed.source_map(),
113                );
114                let (mut slice, collector_issues) = collector.collect_slice(parsed.owned());
115                all_issues.extend(collector_issues);
116                mir_codebase::storage::deduplicate_params_in_slice(&mut slice);
117                let defs = FileDefinitions {
118                    slice: Arc::new(slice),
119                    issues: Arc::new(all_issues),
120                };
121                (defs, content_hash, has_hard_parse_errors)
122            })
123            .collect();
124        let _t_collect_defs = _t0.elapsed();
125
126        // Prime the in-process parse cache so the pre-warm loop below avoids
127        // re-parsing every project file through collect_file_definitions.
128        {
129            let guard = self.db.salsa.read();
130            for (defs, hash, has_hard_parse_errors) in &file_defs {
131                if !*has_hard_parse_errors {
132                    guard.prime_parse_cache(*hash, Arc::clone(&defs.slice));
133                }
134            }
135        }
136
137        let mut files_with_parse_errors: HashSet<Arc<str>> = HashSet::default();
138        for (defs, _hash, _hard_err) in file_defs {
139            for issue in defs.issues.iter() {
140                if matches!(issue.kind, mir_issues::IssueKind::ParseError { .. })
141                    && issue.severity == mir_issues::Severity::Error
142                {
143                    files_with_parse_errors.insert(issue.location.file.clone());
144                }
145            }
146            all_issues.extend(Arc::unwrap_or_clone(defs.issues));
147        }
148        let _t_ingest = _t0.elapsed();
149
150        // ---- Pre-warm collect_file_definitions for project files -------------
151        {
152            let db_prewarm = {
153                let guard = self.db.salsa.read();
154                (**guard).clone()
155            };
156            let project_source_files: Vec<SourceFile> = {
157                let guard = self.db.salsa.read();
158                parsed_files
159                    .iter()
160                    .filter_map(|p| (**guard).lookup_source_file(&p.file))
161                    .collect()
162            };
163            project_source_files
164                .into_par_iter()
165                .for_each_with(db_prewarm, |db, sf| {
166                    let _ = collect_file_definitions(db as &dyn MirDatabase, sf);
167                });
168        }
169        let _t_prewarm_ms = (_t0.elapsed() - _t_ingest).as_secs_f64() * 1000.0;
170
171        // Fold the freshly-registered project files into the workspace symbol
172        // index singleton. The singleton may have been built from vendor before
173        // this run (CLI indexes vendor before analyze_paths); since adding files
174        // no longer nulls it, project classes would otherwise be invisible to
175        // find_class_like and reported as false UndefinedClass.
176        self.refresh_workspace_index();
177
178        // ---- Lazy-load unknown classes via PSR-4 ----------------------------
179        let _t_before_lazy = _t0.elapsed();
180        if let Some(psr4) = self.psr4.clone() {
181            self.lazy_load_missing_classes(psr4, php_version, &mut all_issues);
182        }
183        let _t_lazyload_ms = (_t0.elapsed() - _t_before_lazy).as_secs_f64() * 1000.0;
184
185        // ---- Class-level checks ---------------------------------------------
186        let analyzed_file_set: HashSet<Arc<str>> =
187            file_data.iter().map(|(f, _)| f.clone()).collect();
188        let _t_class_analyzer = std::time::Instant::now();
189        {
190            let class_db = {
191                let guard = self.db.salsa.read();
192                (**guard).clone()
193            };
194            let class_issues = crate::class::ClassAnalyzer::with_files(
195                &class_db,
196                analyzed_file_set.clone(),
197                &file_data,
198            )
199            .analyze_all();
200            all_issues.extend(class_issues);
201        }
202        let _t_class_analyzer_ms = _t_class_analyzer.elapsed().as_secs_f64() * 1000.0;
203
204        let _t_class_checks = _t0.elapsed();
205
206        let mut db_main = {
207            let guard = self.db.salsa.read();
208            (**guard).clone()
209        };
210        // All index mutation for the body pass is done (lazy_load_missing_classes
211        // + refresh ran above; lazy_load_from_body_issues runs *after* this pass
212        // on a separate db). Freeze the index on this ephemeral clone so each
213        // find_class_like borrows it instead of cloning the singleton's three
214        // Arcs per call — the per-worker `map_with` clone bumps the refcount once.
215        db_main.freeze_workspace_index();
216
217        // ---- Body analysis: function/method bodies in parallel --------------
218        type BodyResult = (
219            Arc<str>,
220            Vec<Issue>,
221            Vec<crate::symbol::ResolvedSymbol>,
222            Vec<RefLoc>,
223        );
224        let body_results: Vec<BodyResult> = parsed_files
225            .par_iter()
226            .filter(|parsed| !files_with_parse_errors.contains(&parsed.file))
227            .map_with(db_main, |db, parsed| {
228                let driver = BodyAnalyzer::new(&*db as &dyn MirDatabase, php_version);
229                let (issues, symbols) = if let Some(cache) = &self.cache {
230                    let h = hash_content(parsed.source());
231                    if let Some((cached_issues, ref_locs)) = cache.get(&parsed.file, &h) {
232                        // Cache replay: rebuild the file's complete reference
233                        // set straight from the cached tuples — no pending-
234                        // buffer detour.
235                        let locs: Vec<RefLoc> = ref_locs
236                            .iter()
237                            .map(|(symbol, line, col_start, col_end)| RefLoc {
238                                symbol_key: Arc::from(symbol.as_str()),
239                                file: parsed.file.clone(),
240                                line: *line,
241                                col_start: *col_start,
242                                col_end: *col_end,
243                            })
244                            .collect();
245                        return (parsed.file.clone(), cached_issues, Vec::new(), locs);
246                    }
247                    let (issues, symbols) = driver.analyze_bodies(
248                        parsed.owned(),
249                        parsed.file.clone(),
250                        parsed.source(),
251                        parsed.source_map(),
252                    );
253                    let pending = db.take_pending_ref_locs();
254                    let cache_locs = pending
255                        .iter()
256                        .map(|r| (r.symbol_key.to_string(), r.line, r.col_start, r.col_end))
257                        .collect();
258                    cache.put(&parsed.file, h, issues.clone(), cache_locs);
259                    if let Some(cb) = &opts.on_file_done {
260                        cb();
261                    }
262                    let symbols = if opts.skip_symbols {
263                        Vec::new()
264                    } else {
265                        symbols
266                    };
267                    return (parsed.file.clone(), issues, symbols, pending);
268                } else {
269                    driver.analyze_bodies(
270                        parsed.owned(),
271                        parsed.file.clone(),
272                        parsed.source(),
273                        parsed.source_map(),
274                    )
275                };
276                let pending = db.take_pending_ref_locs();
277                if let Some(cb) = &opts.on_file_done {
278                    cb();
279                }
280                // Drop the per-file symbol vec inside the worker when the
281                // consumer opted out — the orchestrator never accumulates.
282                let symbols = if opts.skip_symbols {
283                    Vec::new()
284                } else {
285                    symbols
286                };
287                (parsed.file.clone(), issues, symbols, pending)
288            })
289            .collect();
290
291        let _t_body_analysis = _t0.elapsed();
292
293        // Serial commit with replace semantics: each file's output (or cache
294        // replay) is its complete reference set, so stale entries from a
295        // prior run cannot survive an append.
296        let mut all_symbols = Vec::new();
297        {
298            let guard = self.db.salsa.read();
299            for (file, issues, symbols, ref_locs) in body_results {
300                all_issues.extend(issues);
301                all_symbols.extend(symbols);
302                guard.set_file_reference_locations(file.as_ref(), ref_locs);
303            }
304        }
305
306        // ---- Post-analysis lazy loading: FQCNs used without `use` imports ------
307        if let Some(psr4) = self.psr4.clone() {
308            self.lazy_load_from_body_issues(
309                psr4,
310                php_version,
311                &file_data,
312                &files_with_parse_errors,
313                &mut all_issues,
314                &mut all_symbols,
315                opts.skip_symbols,
316            );
317        }
318
319        // ---- Build reverse dep graph and persist it for the next run ---------
320        // Must run AFTER `commit_reference_locations_batch` (above): the graph's
321        // call-site / instantiation / inferred-return edges are derived from the
322        // committed reference-location map. Built any earlier (the salsa db is
323        // fresh each session) that map is empty, so only structural edges
324        // (parent/interface/trait/declared types) survive — and any dependent
325        // reachable only through a call site or inferred type goes stale.
326        if let Some(cache) = &self.cache {
327            let db_snapshot = {
328                let guard = self.db.salsa.read();
329                (**guard).clone()
330            };
331            let rev = build_reverse_deps(&db_snapshot);
332            cache.set_reverse_deps(rev);
333        }
334
335        // Persist cache hits/misses to disk
336        if let Some(cache) = &self.cache {
337            cache.flush();
338        }
339
340        // ---- Dead-code detection -------------------------------------------
341        if opts.should_run_dead_code() {
342            let salsa = self.snapshot_db();
343            let _t_dead_code = std::time::Instant::now();
344            let dead_code_issues =
345                crate::dead_code::DeadCodeAnalyzer::with_files(&salsa, analyzed_file_set.clone())
346                    .analyze();
347            all_issues.extend(dead_code_issues);
348            if std::env::var("MIR_TIMING").is_ok() {
349                eprintln!(
350                    "[timing] dead_code_analyzer={:.0}ms",
351                    _t_dead_code.elapsed().as_secs_f64() * 1000.0
352                );
353            }
354        }
355
356        let _t_total = _t0.elapsed();
357        if std::env::var("MIR_TIMING").is_ok() {
358            eprintln!(
359                "[timing] stubs={:.0}ms read={:.0}ms salsa_reg={:.0}ms collect_defs={:.0}ms ingest={:.0}ms class_checks={:.0}ms (prewarm={:.0}ms lazy_load={:.0}ms class_analyzer={:.0}ms) body_analysis={:.0}ms total={:.0}ms",
360                _t_stubs.as_secs_f64() * 1000.0,
361                (_t_read - _t_stubs).as_secs_f64() * 1000.0,
362                (_t_salsa_reg - _t_read).as_secs_f64() * 1000.0,
363                (_t_collect_defs - _t_salsa_reg).as_secs_f64() * 1000.0,
364                (_t_ingest - _t_collect_defs).as_secs_f64() * 1000.0,
365                (_t_class_checks - _t_ingest).as_secs_f64() * 1000.0,
366                _t_prewarm_ms,
367                _t_lazyload_ms,
368                _t_class_analyzer_ms,
369                (_t_body_analysis - _t_class_checks).as_secs_f64() * 1000.0,
370                _t_total.as_secs_f64() * 1000.0,
371            );
372        }
373
374        opts.apply(&mut all_issues);
375        let analyzed_files_vec: Vec<Arc<str>> = analyzed_file_set.iter().cloned().collect();
376        self.apply_suppressions_and_emit_unused(&mut all_issues, &analyzed_files_vec);
377        if let Some(dump) = crate::metrics::dump() {
378            eprintln!("{dump}");
379        }
380
381        // ---- Build workspace symbol index singleton -------------------------
382        {
383            let mut guard = self.db.salsa.write();
384            guard.rebuild_workspace_symbol_index();
385        }
386
387        AnalysisResult::build(all_issues, rustc_hash::FxHashMap::default(), all_symbols)
388    }
389    /// Re-analyze a single file (definition collection + body analysis) within the batch context.
390    ///
391    /// Mirrors the old `ProjectAnalyzer::re_analyze_file` cache-aware path.
392    /// Use [`Self::reanalyze_dependents`] for LSP-style per-file flows that
393    /// don't need batch options.
394    pub fn re_analyze_file(
395        &self,
396        file_path: &str,
397        new_content: &str,
398        opts: &BatchOptions,
399    ) -> AnalysisResult {
400        let php_version = self.batch_php_version(opts);
401
402        // Fast path: content unchanged and cache has a valid entry.
403        if let Some(cache) = &self.cache {
404            let h = hash_content(new_content);
405            if let Some((mut issues, ref_locs)) = cache.get(file_path, &h) {
406                let file: Arc<str> = Arc::from(file_path);
407                // Replace semantics: the cached set is the file's complete
408                // reference set, so stale entries from a prior version are
409                // cleared rather than appended over.
410                let locs: Vec<RefLoc> = ref_locs
411                    .iter()
412                    .map(|(symbol, line, col_start, col_end)| RefLoc {
413                        symbol_key: Arc::from(symbol.as_str()),
414                        file: file.clone(),
415                        line: *line,
416                        col_start: *col_start,
417                        col_end: *col_end,
418                    })
419                    .collect();
420                let guard = self.db.salsa.read();
421                guard.set_file_reference_locations(file_path, locs);
422                drop(guard);
423                opts.apply(&mut issues);
424                self.apply_suppressions_and_emit_unused(&mut issues, std::slice::from_ref(&file));
425                return AnalysisResult::build(issues, HashMap::default(), Vec::new());
426            }
427        }
428
429        let file: Arc<str> = Arc::from(file_path);
430
431        {
432            let mut guard = self.db.salsa.write();
433            guard.remove_file_definitions(file_path);
434        }
435
436        let file_defs = {
437            let mut guard = self.db.salsa.write();
438            let salsa_file = guard.upsert_source_file(file.clone(), Arc::from(new_content));
439            collect_file_definitions(&**guard, salsa_file)
440        };
441
442        let mut all_issues: Vec<Issue> = Arc::unwrap_or_clone(file_defs.issues.clone());
443
444        {
445            let mut guard = self.db.salsa.write();
446            if guard.workspace_symbol_index_singleton().is_some() {
447                if let Some(sf) = guard.lookup_source_file(file.as_ref()) {
448                    if guard.file_declarations_changed(sf) {
449                        guard.rebuild_workspace_symbol_index();
450                    }
451                }
452            }
453        }
454
455        let symbols = {
456            let guard = self.db.salsa.write();
457
458            let parsed = php_rs_parser::parse(new_content);
459
460            let has_hard_errors = parsed.errors.iter().any(crate::parser::is_hard_parse_error);
461            if !has_hard_errors {
462                let db_ref: &dyn MirDatabase = &**guard;
463                let driver = BodyAnalyzer::new(db_ref, php_version);
464                let (body_issues, symbols) = driver.analyze_bodies(
465                    &parsed.program,
466                    file.clone(),
467                    new_content,
468                    &parsed.source_map,
469                );
470                all_issues.extend(body_issues);
471                let pending = guard.take_pending_ref_locs();
472                guard.set_file_reference_locations(file.as_ref(), pending);
473                symbols
474            } else {
475                Vec::new()
476            }
477        };
478
479        // Bake inline-suppression marks in *before* caching: suppression is a
480        // pure function of file content (and the cache key hashes content), so
481        // the cached issues should already carry their marks. The cache-hit
482        // branch above replays this file's source without re-registering the
483        // `SourceFile` input, so the db-backed post-filter cannot recompute
484        // marks there — caching the canonical result is what keeps a fresh
485        // process honoring `@mir-ignore` on an unchanged file.
486        mark_suppressed(
487            &mut all_issues,
488            &crate::suppression::SuppressionMap::from_source(new_content),
489        );
490
491        if let Some(cache) = &self.cache {
492            let h = hash_content(new_content);
493            cache.evict_with_dependents(&[file_path.to_string()]);
494            let db = self.snapshot_db();
495            let ref_locs = extract_reference_locations(&db, &file);
496            cache.put(file_path, h, all_issues.clone(), ref_locs);
497        }
498
499        opts.apply(&mut all_issues);
500        AnalysisResult::build(all_issues, HashMap::default(), symbols)
501    }
502
503    /// Collect type definitions only from `paths` into the codebase
504    /// without analyzing method bodies or emitting issues. Used to load
505    /// vendor types.
506    ///
507    /// When a disk-backed cache is attached, per-file `StubSlice` results
508    /// from previous runs are reused on a content-hash match, eliminating
509    /// the parse + definition-collection step. Cache misses run the normal
510    /// pipeline and write back so subsequent runs hit.
511    pub fn collect_definitions(&self, paths: &[PathBuf]) {
512        let _timing = std::env::var("MIR_TIMING").is_ok();
513        let _t0 = std::time::Instant::now();
514
515        let php_v = self.php_version.cache_byte();
516
517        struct FileEntry {
518            file: Arc<str>,
519            src: Arc<str>,
520            hash: [u8; 32],
521            cached: Option<mir_codebase::storage::StubSlice>,
522        }
523        let entries: Vec<FileEntry> = paths
524            .par_iter()
525            .filter_map(|path| {
526                let src = std::fs::read_to_string(path).ok()?;
527                let file: Arc<str> = Arc::from(path.to_string_lossy().as_ref());
528                let src: Arc<str> = Arc::from(src);
529                let hash = hash_source(&src);
530                let cached = self.db.stub_cache.as_ref().and_then(|c| {
531                    let mut slice = c.get(&file, &hash, php_v)?;
532                    prepare_for_ingest(&mut slice);
533                    Some(slice)
534                });
535                Some(FileEntry {
536                    file,
537                    src,
538                    hash,
539                    cached,
540                })
541            })
542            .collect();
543        let _t_read = _t0.elapsed();
544
545        let source_files: Vec<SourceFile> = {
546            let mut guard = self.db.salsa.write();
547            entries
548                .iter()
549                .map(|e| {
550                    guard.upsert_source_file_with_durability(
551                        e.file.clone(),
552                        e.src.clone(),
553                        salsa::Durability::HIGH,
554                    )
555                })
556                .collect()
557        };
558        let _t_reg = _t0.elapsed();
559
560        let db_pass1 = {
561            let guard = self.db.salsa.read();
562            (**guard).clone()
563        };
564        let stub_cache = self.db.stub_cache.clone();
565        let prepared: Vec<mir_codebase::storage::StubSlice> = entries
566            .into_par_iter()
567            .zip(source_files.into_par_iter())
568            .map_with(db_pass1, |db, (mut entry, salsa_file)| {
569                if let Some(slice) = entry.cached.take() {
570                    let slice_arc = Arc::new(slice);
571                    db.parse_cache().insert(entry.hash, Arc::clone(&slice_arc));
572                    return (*slice_arc).clone();
573                }
574                let defs = collect_file_definitions(&*db, salsa_file);
575                if let Some(cache) = stub_cache.as_ref() {
576                    cache.put(&entry.file, &entry.hash, php_v, &defs.slice);
577                }
578                (*defs.slice).clone()
579            })
580            .collect();
581        let _t_collect = _t0.elapsed();
582        drop(prepared);
583        let _t_ingest = _t0.elapsed();
584
585        if _timing {
586            let (hits, misses) = self.stub_cache_stats();
587            eprintln!(
588                "[vendor] read={:.0}ms reg={:.0}ms collect={:.0}ms ingest={:.0}ms total={:.0}ms (cache hits={hits} misses={misses})",
589                _t_read.as_secs_f64() * 1000.0,
590                (_t_reg - _t_read).as_secs_f64() * 1000.0,
591                (_t_collect - _t_reg).as_secs_f64() * 1000.0,
592                (_t_ingest - _t_collect).as_secs_f64() * 1000.0,
593                _t_ingest.as_secs_f64() * 1000.0,
594            );
595        }
596
597        {
598            let mut guard = self.db.salsa.write();
599            guard.rebuild_workspace_symbol_index();
600        }
601
602        crate::collector::print_collector_stats();
603    }
604}