Skip to main content

mir_analyzer/
batch.rs

1//! Batch-oriented project analysis on [`AnalysisSession`].
2//!
3//! This module hosts the multi-file orchestration that used to live on the
4//! retired `ProjectAnalyzer`: parallel definition collection, lazy class loading, dead-code
5//! sweep, reverse-dependency index, and the [`AnalysisResult`] return type.
6//! Per-file (LSP) entry points stay on `AnalysisSession` itself in
7//! `session.rs`.
8//!
9//! All methods are `impl AnalysisSession`; configuration that's only
10//! meaningful for batch runs (issue suppressions, progress callback, optional
11//! PHP version override) is grouped in [`BatchOptions`] and passed in rather
12//! than stored on the session.
13
14use std::path::{Path, PathBuf};
15use std::sync::Arc;
16
17use rayon::prelude::*;
18use rustc_hash::{FxHashMap as HashMap, FxHashSet as HashSet};
19
20use mir_issues::Issue;
21use mir_types::{Atomic, Type};
22
23use crate::body_analysis::BodyAnalyzer;
24use crate::cache::hash_content;
25use crate::db::{
26    collect_file_definitions, FileDefinitions, MirDatabase, MirDbStorage, RefLoc, SourceFile,
27};
28use crate::php_version::PhpVersion;
29use crate::session::AnalysisSession;
30use crate::stub_cache::{hash_source, prepare_for_ingest};
31
32/// Issue kinds emitted by [`crate::dead_code::DeadCodeAnalyzer`].
33///
34/// The dead-code pass is just an error group — these names participate in
35/// [`BatchOptions::suppressed_issue_kinds`] like any other `IssueKind`. If
36/// every kind listed here is suppressed, the dead-code pass is skipped
37/// entirely.
38pub fn dead_code_issue_kinds() -> &'static [&'static str] {
39    &["UnusedMethod", "UnusedProperty", "UnusedFunction"]
40}
41
42/// Per-batch options for [`AnalysisSession::analyze_paths`] and friends.
43///
44/// Configuration that only makes sense for full-project (batch) analysis
45/// lives here instead of on [`AnalysisSession`], so the per-file LSP API
46/// isn't bloated with state nothing else reads.
47#[derive(Clone, Default)]
48pub struct BatchOptions {
49    /// Names of `IssueKind` variants to drop from the final result, e.g.
50    /// `["MissingThrowsDocblock", "UnusedMethod"]`. Applied as a final
51    /// post-filter so analyzer internals don't need to know which
52    /// diagnostics the consumer cares about. Empty by default.
53    pub suppressed_issue_kinds: HashSet<String>,
54    /// Called once after each file completes body analysis (progress reporting).
55    pub on_file_done: Option<Arc<dyn Fn() + Send + Sync>>,
56    /// Override the session's configured PHP version for this run. `None`
57    /// uses the session's version.
58    pub php_version_override: Option<PhpVersion>,
59}
60
61impl BatchOptions {
62    pub fn new() -> Self {
63        Self::default()
64    }
65
66    pub fn with_suppressed<I, S>(mut self, kinds: I) -> Self
67    where
68        I: IntoIterator<Item = S>,
69        S: Into<String>,
70    {
71        self.suppressed_issue_kinds = kinds.into_iter().map(Into::into).collect();
72        self
73    }
74
75    pub fn with_progress_callback(mut self, callback: Arc<dyn Fn() + Send + Sync>) -> Self {
76        self.on_file_done = Some(callback);
77        self
78    }
79
80    pub fn with_php_version(mut self, version: PhpVersion) -> Self {
81        self.php_version_override = Some(version);
82        self
83    }
84
85    /// True iff at least one dead-code [`IssueKind`] would be emitted (i.e.
86    /// not all of them are suppressed).
87    fn should_run_dead_code(&self) -> bool {
88        dead_code_issue_kinds()
89            .iter()
90            .any(|k| !self.suppressed_issue_kinds.contains(*k))
91    }
92
93    /// Drop issues whose [`IssueKind::name()`] is listed in
94    /// [`Self::suppressed_issue_kinds`].
95    fn apply(&self, issues: &mut Vec<Issue>) {
96        if self.suppressed_issue_kinds.is_empty() {
97            return;
98        }
99        issues.retain(|i| !self.suppressed_issue_kinds.contains(i.kind.name()));
100    }
101}
102
103struct ParsedProjectFile {
104    file: Arc<str>,
105    source: Arc<str>,
106    parsed: php_rs_parser::ParseResult,
107}
108
109impl ParsedProjectFile {
110    fn new(file: Arc<str>, source: Arc<str>) -> Self {
111        let parsed = php_rs_parser::parse(source.as_ref());
112        Self {
113            file,
114            source,
115            parsed,
116        }
117    }
118
119    fn source(&self) -> &str {
120        self.source.as_ref()
121    }
122
123    fn source_map(&self) -> &php_rs_parser::source_map::SourceMap {
124        &self.parsed.source_map
125    }
126
127    fn errors(&self) -> &[php_rs_parser::diagnostics::ParseError] {
128        &self.parsed.errors
129    }
130
131    fn owned(&self) -> &php_ast::owned::Program {
132        &self.parsed.program
133    }
134}
135
136impl AnalysisSession {
137    /// Cumulative hit / miss counts on the persistent definition cache attached
138    /// to this session. `(0, 0)` when no cache is configured.
139    #[doc(hidden)]
140    pub fn stub_cache_stats(&self) -> (u64, u64) {
141        match self.db.stub_cache.as_deref() {
142            Some(c) => (c.hits(), c.misses()),
143            None => (0, 0),
144        }
145    }
146
147    fn batch_php_version(&self, opts: &BatchOptions) -> PhpVersion {
148        opts.php_version_override.unwrap_or(self.php_version)
149    }
150
151    fn type_exists(&self, fqcn: &str) -> bool {
152        let db = self.snapshot_db();
153        crate::db::class_exists(&db, fqcn)
154    }
155
156    fn collect_and_ingest_source(
157        &self,
158        file: Arc<str>,
159        src: &str,
160        php_version: PhpVersion,
161    ) -> FileDefinitions {
162        self.db.collect_and_ingest_file(file, src, php_version)
163    }
164
165    /// Load the configured PHP version + built-in stubs + user stubs into
166    /// the shared db. Called by [`Self::analyze_paths`] and
167    /// [`Self::collect_definitions`].
168    fn load_batch_stubs(&self, php_version: PhpVersion) {
169        // Wire the PHP version into the db before any SourceFile inputs are
170        // registered — collect_file_definitions reads it for @since/@removed filtering.
171        {
172            let version_str = Arc::from(php_version.to_string().as_str());
173            self.db.salsa.write().set_php_version(version_str);
174        }
175
176        // Built-in stubs for the configured PHP version.
177        let paths: Vec<&'static str> = crate::stubs::stub_files().iter().map(|&(p, _)| p).collect();
178        self.db.ingest_stub_paths(&paths, php_version);
179
180        // User-configured stubs.
181        self.db
182            .ingest_user_stubs(&self.user_stub_files, &self.user_stub_dirs);
183
184        // Ensure a resolver is configured so pull-path lookups can map
185        // built-in FQCNs to the stub VFS paths registered above.
186        let mut guard = self.db.salsa.write();
187        if guard.current_resolver().is_none() {
188            let resolver: Arc<dyn crate::ClassResolver> = Arc::new(crate::StubClassResolver);
189            guard.set_resolver(Some(resolver));
190        }
191    }
192
193    /// Run the full batch analysis pipeline on a set of file paths.
194    pub fn analyze_paths(&self, paths: &[PathBuf], opts: &BatchOptions) -> AnalysisResult {
195        let php_version = self.batch_php_version(opts);
196        let mut all_issues = Vec::new();
197        let _t0 = std::time::Instant::now();
198
199        // ---- Load PHP built-in stubs (before definition collection so user code can override)
200        self.load_batch_stubs(php_version);
201        let _t_stubs = _t0.elapsed();
202
203        // ---- Read files in parallel ----------------------------------
204        let parsed_files: Vec<ParsedProjectFile> = paths
205            .par_iter()
206            .filter_map(|path| match std::fs::read_to_string(path) {
207                Ok(src) => {
208                    let file = Arc::from(path.to_string_lossy().as_ref());
209                    Some(ParsedProjectFile::new(file, Arc::from(src)))
210                }
211                Err(e) => {
212                    eprintln!("Cannot read {}: {}", path.display(), e);
213                    None
214                }
215            })
216            .collect();
217        let _t_read = _t0.elapsed();
218
219        let file_data: Vec<(Arc<str>, Arc<str>)> = parsed_files
220            .iter()
221            .map(|parsed| (parsed.file.clone(), parsed.source.clone()))
222            .collect();
223
224        // ---- Pre-analysis invalidation: evict dependents of changed files ------
225        if let Some(cache) = &self.cache {
226            let changed: Vec<String> = file_data
227                .par_iter()
228                .filter_map(|(f, src)| {
229                    let h = hash_content(src.as_ref());
230                    if cache.get(f, &h).is_none() {
231                        Some(f.to_string())
232                    } else {
233                        None
234                    }
235                })
236                .collect();
237            if !changed.is_empty() {
238                cache.evict_with_dependents(&changed);
239            }
240        }
241
242        // ---- Register Salsa source inputs for incremental follow-up calls ----
243        {
244            let mut guard = self.db.salsa.write();
245            for parsed in &parsed_files {
246                guard.upsert_source_file(parsed.file.clone(), parsed.source.clone());
247            }
248        }
249        let _t_salsa_reg = _t0.elapsed();
250
251        // ---- Definition collection from the already-parsed AST -------
252        // Returns (FileDefinitions, content_hash, has_hard_parse_errors) so we
253        // can prime the parse cache before the pre-warm loop below.
254        type Pass1Entry = (FileDefinitions, [u8; 32], bool);
255        let file_defs: Vec<Pass1Entry> = parsed_files
256            .par_iter()
257            .map(|parsed| {
258                let content_hash = hash_source(parsed.source());
259                let has_hard_parse_errors = parsed
260                    .errors()
261                    .iter()
262                    .any(crate::parser::is_hard_parse_error);
263                let mut all_issues: Vec<Issue> = parsed
264                    .errors()
265                    .iter()
266                    .map(|err| {
267                        crate::parser::parse_error_to_issue(
268                            err,
269                            &parsed.file,
270                            parsed.source(),
271                            parsed.source_map(),
272                        )
273                    })
274                    .collect();
275                let collector = crate::collector::DefinitionCollector::new_for_slice(
276                    parsed.file.clone(),
277                    parsed.source(),
278                    parsed.source_map(),
279                );
280                let (mut slice, collector_issues) = collector.collect_slice(parsed.owned());
281                all_issues.extend(collector_issues);
282                mir_codebase::storage::deduplicate_params_in_slice(&mut slice);
283                let defs = FileDefinitions {
284                    slice: Arc::new(slice),
285                    issues: Arc::new(all_issues),
286                };
287                (defs, content_hash, has_hard_parse_errors)
288            })
289            .collect();
290        let _t_collect_defs = _t0.elapsed();
291
292        // Prime the in-process parse cache so the pre-warm loop below avoids
293        // re-parsing every project file through collect_file_definitions.
294        {
295            let guard = self.db.salsa.read();
296            for (defs, hash, has_hard_parse_errors) in &file_defs {
297                if !*has_hard_parse_errors {
298                    guard.prime_parse_cache(*hash, Arc::clone(&defs.slice));
299                }
300            }
301        }
302
303        let mut files_with_parse_errors: HashSet<Arc<str>> = HashSet::default();
304        for (defs, _hash, _hard_err) in file_defs {
305            for issue in defs.issues.iter() {
306                if matches!(issue.kind, mir_issues::IssueKind::ParseError { .. })
307                    && issue.severity == mir_issues::Severity::Error
308                {
309                    files_with_parse_errors.insert(issue.location.file.clone());
310                }
311            }
312            all_issues.extend(Arc::unwrap_or_clone(defs.issues));
313        }
314        let _t_ingest = _t0.elapsed();
315
316        // ---- Pre-warm collect_file_definitions for project files -------------
317        {
318            let db_prewarm = {
319                let guard = self.db.salsa.read();
320                (**guard).clone()
321            };
322            let project_source_files: Vec<SourceFile> = {
323                let guard = self.db.salsa.read();
324                parsed_files
325                    .iter()
326                    .filter_map(|p| (**guard).lookup_source_file(&p.file))
327                    .collect()
328            };
329            project_source_files
330                .into_par_iter()
331                .for_each_with(db_prewarm, |db, sf| {
332                    let _ = collect_file_definitions(db as &dyn MirDatabase, sf);
333                });
334        }
335
336        // ---- Lazy-load unknown classes via PSR-4 ----------------------------
337        if let Some(psr4) = self.psr4.clone() {
338            self.lazy_load_missing_classes(psr4, php_version, &mut all_issues);
339        }
340
341        // ---- Build reverse dep graph and persist it for the next run ---------
342        if let Some(cache) = &self.cache {
343            let db_snapshot = {
344                let guard = self.db.salsa.read();
345                (**guard).clone()
346            };
347            let rev = build_reverse_deps(&db_snapshot);
348            cache.set_reverse_deps(rev);
349        }
350
351        // ---- Class-level checks ---------------------------------------------
352        let analyzed_file_set: HashSet<Arc<str>> =
353            file_data.iter().map(|(f, _)| f.clone()).collect();
354        {
355            let class_db = {
356                let guard = self.db.salsa.read();
357                (**guard).clone()
358            };
359            let class_issues =
360                crate::class::ClassAnalyzer::with_files(&class_db, analyzed_file_set, &file_data)
361                    .analyze_all();
362            all_issues.extend(class_issues);
363        }
364
365        let _t_class_checks = _t0.elapsed();
366
367        let db_main = {
368            let guard = self.db.salsa.read();
369            (**guard).clone()
370        };
371
372        // ---- Body analysis: function/method bodies in parallel --------------
373        let body_results: Vec<(Vec<Issue>, Vec<crate::symbol::ResolvedSymbol>, Vec<RefLoc>)> =
374            parsed_files
375                .par_iter()
376                .filter(|parsed| !files_with_parse_errors.contains(&parsed.file))
377                .map_with(db_main, |db, parsed| {
378                    let driver = BodyAnalyzer::new(&*db as &dyn MirDatabase, php_version);
379                    let (issues, symbols) = if let Some(cache) = &self.cache {
380                        let h = hash_content(parsed.source());
381                        if let Some((cached_issues, ref_locs)) = cache.get(&parsed.file, &h) {
382                            db.replay_reference_locations(parsed.file.clone(), &ref_locs);
383                            (cached_issues, Vec::new())
384                        } else {
385                            let (issues, symbols) = driver.analyze_bodies(
386                                parsed.owned(),
387                                parsed.file.clone(),
388                                parsed.source(),
389                                parsed.source_map(),
390                            );
391                            let pending = db.take_pending_ref_locs();
392                            let cache_locs = pending
393                                .iter()
394                                .map(|r| (r.symbol_key.to_string(), r.line, r.col_start, r.col_end))
395                                .collect();
396                            cache.put(&parsed.file, h, issues.clone(), cache_locs);
397                            if let Some(cb) = &opts.on_file_done {
398                                cb();
399                            }
400                            return (issues, symbols, pending);
401                        }
402                    } else {
403                        driver.analyze_bodies(
404                            parsed.owned(),
405                            parsed.file.clone(),
406                            parsed.source(),
407                            parsed.source_map(),
408                        )
409                    };
410                    let pending = db.take_pending_ref_locs();
411                    if let Some(cb) = &opts.on_file_done {
412                        cb();
413                    }
414                    (issues, symbols, pending)
415                })
416                .collect();
417
418        let _t_body_analysis = _t0.elapsed();
419
420        // Serial commit: one lock acquisition per map for all files combined.
421        let mut all_ref_locs: Vec<RefLoc> = Vec::new();
422        let mut all_symbols = Vec::new();
423        for (issues, symbols, ref_locs) in body_results {
424            all_issues.extend(issues);
425            all_symbols.extend(symbols);
426            all_ref_locs.extend(ref_locs);
427        }
428        {
429            let guard = self.db.salsa.read();
430            guard.commit_reference_locations_batch(all_ref_locs);
431        }
432
433        // ---- Post-analysis lazy loading: FQCNs used without `use` imports ------
434        if let Some(psr4) = self.psr4.clone() {
435            self.lazy_load_from_body_issues(
436                psr4,
437                php_version,
438                &file_data,
439                &files_with_parse_errors,
440                &mut all_issues,
441                &mut all_symbols,
442            );
443        }
444
445        // Persist cache hits/misses to disk
446        if let Some(cache) = &self.cache {
447            cache.flush();
448        }
449
450        // ---- Dead-code detection -------------------------------------------
451        if opts.should_run_dead_code() {
452            let salsa = self.snapshot_db();
453            let dead_code_issues = crate::dead_code::DeadCodeAnalyzer::new(&salsa).analyze();
454            all_issues.extend(dead_code_issues);
455        }
456
457        let _t_total = _t0.elapsed();
458        if std::env::var("MIR_TIMING").is_ok() {
459            eprintln!(
460                "[timing] stubs={:.0}ms read={:.0}ms salsa_reg={:.0}ms collect_defs={:.0}ms ingest={:.0}ms class_checks={:.0}ms body_analysis={:.0}ms total={:.0}ms",
461                _t_stubs.as_secs_f64() * 1000.0,
462                (_t_read - _t_stubs).as_secs_f64() * 1000.0,
463                (_t_salsa_reg - _t_read).as_secs_f64() * 1000.0,
464                (_t_collect_defs - _t_salsa_reg).as_secs_f64() * 1000.0,
465                (_t_ingest - _t_collect_defs).as_secs_f64() * 1000.0,
466                (_t_class_checks - _t_ingest).as_secs_f64() * 1000.0,
467                (_t_body_analysis - _t_class_checks).as_secs_f64() * 1000.0,
468                _t_total.as_secs_f64() * 1000.0,
469            );
470        }
471
472        opts.apply(&mut all_issues);
473        if let Some(dump) = crate::metrics::dump() {
474            eprintln!("{dump}");
475        }
476
477        // ---- Build workspace symbol index singleton -------------------------
478        {
479            let mut guard = self.db.salsa.write();
480            guard.rebuild_workspace_symbol_index();
481        }
482
483        AnalysisResult::build(all_issues, rustc_hash::FxHashMap::default(), all_symbols)
484    }
485
486    fn lazy_load_missing_classes(
487        &self,
488        psr4: Arc<crate::composer::Psr4Map>,
489        php_version: PhpVersion,
490        all_issues: &mut Vec<Issue>,
491    ) {
492        let max_depth = 10;
493        let mut loaded: HashSet<String> = HashSet::default();
494        let mut scanned: HashSet<Arc<str>> = HashSet::default();
495
496        for _ in 0..max_depth {
497            let mut to_load: Vec<(String, PathBuf)> = Vec::new();
498
499            let mut try_queue = |fqcn: &str| {
500                if !self.type_exists(fqcn) && !loaded.contains(fqcn) {
501                    if let Some(path) = psr4.resolve(fqcn) {
502                        to_load.push((fqcn.to_string(), path));
503                    }
504                }
505            };
506
507            let mut candidates: Vec<String> = Vec::new();
508            let import_candidates = {
509                let db_owned = self.snapshot_db();
510                let db = &db_owned;
511                for fqcn in crate::db::workspace_classes(db).iter() {
512                    if scanned.contains(fqcn.as_ref()) {
513                        continue;
514                    }
515                    let here = crate::db::Fqcn::from_str(db, fqcn.as_ref());
516                    let Some(class) = crate::db::find_class_like(db, here) else {
517                        continue;
518                    };
519                    scanned.insert(fqcn.clone());
520                    collect_class_referenced_fqcns(&class, &mut candidates);
521                }
522                db.file_import_snapshots()
523                    .into_iter()
524                    .flat_map(|(_, imports)| {
525                        imports
526                            .values()
527                            .map(|sym| sym.as_str().to_string())
528                            .collect::<Vec<_>>()
529                    })
530                    .collect::<Vec<_>>()
531            };
532            for fqcn in candidates {
533                try_queue(&fqcn);
534            }
535            for fqcn in import_candidates {
536                try_queue(&fqcn);
537            }
538
539            if to_load.is_empty() {
540                break;
541            }
542
543            for (fqcn, path) in to_load {
544                loaded.insert(fqcn);
545                if let Ok(src) = std::fs::read_to_string(&path) {
546                    let file: Arc<str> = Arc::from(path.to_string_lossy().as_ref());
547                    let is_vendor = file.contains("/vendor/") || file.contains("\\vendor\\");
548                    let defs = self.collect_and_ingest_source(file, &src, php_version);
549                    if !is_vendor {
550                        all_issues.extend(Arc::unwrap_or_clone(defs.issues));
551                    }
552                }
553            }
554        }
555    }
556
557    fn lazy_load_from_body_issues(
558        &self,
559        psr4: Arc<crate::composer::Psr4Map>,
560        php_version: PhpVersion,
561        file_data: &[(Arc<str>, Arc<str>)],
562        files_with_parse_errors: &HashSet<Arc<str>>,
563        all_issues: &mut Vec<Issue>,
564        all_symbols: &mut Vec<crate::symbol::ResolvedSymbol>,
565    ) {
566        use mir_issues::IssueKind;
567
568        let max_depth = 5;
569        let mut loaded: HashSet<String> = HashSet::default();
570
571        for _ in 0..max_depth {
572            let mut to_load: HashMap<String, PathBuf> = HashMap::default();
573
574            for issue in all_issues.iter() {
575                if let IssueKind::UndefinedClass { name } = &issue.kind {
576                    if !self.type_exists(name) && !loaded.contains(name) {
577                        if let Some(path) = psr4.resolve(name) {
578                            to_load.entry(name.clone()).or_insert(path);
579                        }
580                    }
581                }
582            }
583
584            if to_load.is_empty() {
585                break;
586            }
587
588            loaded.extend(to_load.keys().cloned());
589
590            for path in to_load.values() {
591                if let Ok(src) = std::fs::read_to_string(path) {
592                    let file: Arc<str> = Arc::from(path.to_string_lossy().as_ref());
593                    let _ = self.collect_and_ingest_source(file, &src, php_version);
594                }
595            }
596
597            self.lazy_load_missing_classes(psr4.clone(), php_version, all_issues);
598
599            let files_to_reanalyze: HashSet<Arc<str>> = all_issues
600                .iter()
601                .filter_map(|i| {
602                    if let IssueKind::UndefinedClass { name } = &i.kind {
603                        if self.type_exists(name) {
604                            return Some(i.location.file.clone());
605                        }
606                    }
607                    None
608                })
609                .collect();
610
611            if files_to_reanalyze.is_empty() {
612                break;
613            }
614
615            all_issues.retain(|i| !files_to_reanalyze.contains(&i.location.file));
616            all_symbols.retain(|s| !files_to_reanalyze.contains(&s.file));
617
618            let db_full = {
619                let guard = self.db.salsa.read();
620                (**guard).clone()
621            };
622
623            let reanalysis: Vec<(Vec<Issue>, Vec<crate::symbol::ResolvedSymbol>, Vec<RefLoc>)> =
624                file_data
625                    .par_iter()
626                    .filter(|(f, _)| {
627                        !files_with_parse_errors.contains(f) && files_to_reanalyze.contains(f)
628                    })
629                    .map_with(db_full, |db, (file, src)| {
630                        let driver = BodyAnalyzer::new(&*db as &dyn MirDatabase, php_version);
631                        let parsed = php_rs_parser::parse(src);
632                        let (issues, symbols) = driver.analyze_bodies(
633                            &parsed.program,
634                            file.clone(),
635                            src,
636                            &parsed.source_map,
637                        );
638                        let pending = db.take_pending_ref_locs();
639                        (issues, symbols, pending)
640                    })
641                    .collect();
642
643            let mut reanalysis_ref_locs: Vec<RefLoc> = Vec::new();
644            for (issues, symbols, ref_locs) in reanalysis {
645                all_issues.extend(issues);
646                all_symbols.extend(symbols);
647                reanalysis_ref_locs.extend(ref_locs);
648            }
649            {
650                let guard = self.db.salsa.read();
651                guard.commit_reference_locations_batch(reanalysis_ref_locs);
652            }
653        }
654    }
655
656    /// Re-analyze a single file (definition collection + body analysis) within the batch context.
657    ///
658    /// Mirrors the old `ProjectAnalyzer::re_analyze_file` cache-aware path.
659    /// Use [`Self::reanalyze_dependents`] for LSP-style per-file flows that
660    /// don't need batch options.
661    pub fn re_analyze_file(
662        &self,
663        file_path: &str,
664        new_content: &str,
665        opts: &BatchOptions,
666    ) -> AnalysisResult {
667        let php_version = self.batch_php_version(opts);
668
669        // Fast path: content unchanged and cache has a valid entry.
670        if let Some(cache) = &self.cache {
671            let h = hash_content(new_content);
672            if let Some((mut issues, ref_locs)) = cache.get(file_path, &h) {
673                let file: Arc<str> = Arc::from(file_path);
674                let guard = self.db.salsa.read();
675                guard.replay_reference_locations(file, &ref_locs);
676                guard.commit_pending_to_maps();
677                opts.apply(&mut issues);
678                return AnalysisResult::build(issues, HashMap::default(), Vec::new());
679            }
680        }
681
682        let file: Arc<str> = Arc::from(file_path);
683
684        {
685            let mut guard = self.db.salsa.write();
686            guard.remove_file_definitions(file_path);
687        }
688
689        let file_defs = {
690            let mut guard = self.db.salsa.write();
691            let salsa_file = guard.upsert_source_file(file.clone(), Arc::from(new_content));
692            collect_file_definitions(&**guard, salsa_file)
693        };
694
695        let mut all_issues: Vec<Issue> = Arc::unwrap_or_clone(file_defs.issues.clone());
696
697        {
698            let mut guard = self.db.salsa.write();
699            if guard.workspace_symbol_index_singleton().is_some() {
700                if let Some(sf) = guard.lookup_source_file(file.as_ref()) {
701                    if guard.file_declarations_changed(sf) {
702                        guard.rebuild_workspace_symbol_index();
703                    }
704                }
705            }
706        }
707
708        let symbols = {
709            let guard = self.db.salsa.write();
710
711            let parsed = php_rs_parser::parse(new_content);
712
713            let has_hard_errors = parsed.errors.iter().any(crate::parser::is_hard_parse_error);
714            if !has_hard_errors {
715                let db_ref: &dyn MirDatabase = &**guard;
716                let driver = BodyAnalyzer::new(db_ref, php_version);
717                let (body_issues, symbols) = driver.analyze_bodies(
718                    &parsed.program,
719                    file.clone(),
720                    new_content,
721                    &parsed.source_map,
722                );
723                all_issues.extend(body_issues);
724                guard.commit_pending_to_maps();
725                symbols
726            } else {
727                Vec::new()
728            }
729        };
730
731        if let Some(cache) = &self.cache {
732            let h = hash_content(new_content);
733            cache.evict_with_dependents(&[file_path.to_string()]);
734            let db = self.snapshot_db();
735            let ref_locs = extract_reference_locations(&db, &file);
736            cache.put(file_path, h, all_issues.clone(), ref_locs);
737        }
738
739        opts.apply(&mut all_issues);
740        AnalysisResult::build(all_issues, HashMap::default(), symbols)
741    }
742
743    /// Collect type definitions only from `paths` into the codebase
744    /// without analyzing method bodies or emitting issues. Used to load
745    /// vendor types.
746    ///
747    /// When a disk-backed cache is attached, per-file `StubSlice` results
748    /// from previous runs are reused on a content-hash match, eliminating
749    /// the parse + definition-collection step. Cache misses run the normal
750    /// pipeline and write back so subsequent runs hit.
751    pub fn collect_definitions(&self, paths: &[PathBuf]) {
752        let _timing = std::env::var("MIR_TIMING").is_ok();
753        let _t0 = std::time::Instant::now();
754
755        let php_v = self.php_version.cache_byte();
756
757        struct FileEntry {
758            file: Arc<str>,
759            src: Arc<str>,
760            hash: [u8; 32],
761            cached: Option<mir_codebase::storage::StubSlice>,
762        }
763        let entries: Vec<FileEntry> = paths
764            .par_iter()
765            .filter_map(|path| {
766                let src = std::fs::read_to_string(path).ok()?;
767                let file: Arc<str> = Arc::from(path.to_string_lossy().as_ref());
768                let src: Arc<str> = Arc::from(src);
769                let hash = hash_source(&src);
770                let cached = self.db.stub_cache.as_ref().and_then(|c| {
771                    let mut slice = c.get(&file, &hash, php_v)?;
772                    prepare_for_ingest(&mut slice);
773                    Some(slice)
774                });
775                Some(FileEntry {
776                    file,
777                    src,
778                    hash,
779                    cached,
780                })
781            })
782            .collect();
783        let _t_read = _t0.elapsed();
784
785        let source_files: Vec<SourceFile> = {
786            let mut guard = self.db.salsa.write();
787            entries
788                .iter()
789                .map(|e| {
790                    guard.upsert_source_file_with_durability(
791                        e.file.clone(),
792                        e.src.clone(),
793                        salsa::Durability::HIGH,
794                    )
795                })
796                .collect()
797        };
798        let _t_reg = _t0.elapsed();
799
800        let db_pass1 = {
801            let guard = self.db.salsa.read();
802            (**guard).clone()
803        };
804        let stub_cache = self.db.stub_cache.clone();
805        let prepared: Vec<mir_codebase::storage::StubSlice> = entries
806            .into_par_iter()
807            .zip(source_files.into_par_iter())
808            .map_with(db_pass1, |db, (mut entry, salsa_file)| {
809                if let Some(slice) = entry.cached.take() {
810                    let slice_arc = Arc::new(slice);
811                    db.parse_cache().insert(entry.hash, Arc::clone(&slice_arc));
812                    return (*slice_arc).clone();
813                }
814                let defs = collect_file_definitions(&*db, salsa_file);
815                if let Some(cache) = stub_cache.as_ref() {
816                    cache.put(&entry.file, &entry.hash, php_v, &defs.slice);
817                }
818                (*defs.slice).clone()
819            })
820            .collect();
821        let _t_collect = _t0.elapsed();
822        drop(prepared);
823        let _t_ingest = _t0.elapsed();
824
825        if _timing {
826            let (hits, misses) = self.stub_cache_stats();
827            eprintln!(
828                "[vendor] read={:.0}ms reg={:.0}ms collect={:.0}ms ingest={:.0}ms total={:.0}ms (cache hits={hits} misses={misses})",
829                _t_read.as_secs_f64() * 1000.0,
830                (_t_reg - _t_read).as_secs_f64() * 1000.0,
831                (_t_collect - _t_reg).as_secs_f64() * 1000.0,
832                (_t_ingest - _t_collect).as_secs_f64() * 1000.0,
833                _t_ingest.as_secs_f64() * 1000.0,
834            );
835        }
836
837        {
838            let mut guard = self.db.salsa.write();
839            guard.rebuild_workspace_symbol_index();
840        }
841
842        crate::collector::print_collector_stats();
843    }
844}
845
846/// Analyze a PHP source string without a real file path. Useful for tests
847/// and single-file LSP mode. Allocates a throwaway db; doesn't touch any
848/// existing session.
849pub fn analyze_source(source: &str) -> AnalysisResult {
850    let php_version = PhpVersion::LATEST;
851    let file: Arc<str> = Arc::from("<source>");
852    let mut db = MirDbStorage::default();
853    db.set_php_version(Arc::from(php_version.to_string().as_str()));
854    crate::stubs::load_stubs_for_version(&mut db, php_version);
855    let salsa_file = SourceFile::new(&db, file.clone(), Arc::from(source));
856    let file_defs = collect_file_definitions(&db, salsa_file);
857    let mut all_issues = Arc::unwrap_or_clone(file_defs.issues);
858    if all_issues.iter().any(|issue| {
859        matches!(issue.kind, mir_issues::IssueKind::ParseError { .. })
860            && issue.severity == mir_issues::Severity::Error
861    }) {
862        return AnalysisResult::build(all_issues, rustc_hash::FxHashMap::default(), Vec::new());
863    }
864    let mut type_envs = rustc_hash::FxHashMap::default();
865    let mut all_symbols = Vec::new();
866    let result = php_rs_parser::parse(source);
867
868    let driver = BodyAnalyzer::new(&db, php_version);
869    all_issues.extend(driver.analyze_bodies_typed(
870        &result.program,
871        file.clone(),
872        source,
873        &result.source_map,
874        &mut type_envs,
875        &mut all_symbols,
876    ));
877    AnalysisResult::build(all_issues, type_envs, all_symbols)
878}
879
880/// Discover all `.php` files under a directory, recursively.
881pub fn discover_files(root: &Path) -> Vec<PathBuf> {
882    if root.is_file() {
883        return vec![root.to_path_buf()];
884    }
885    let mut files = Vec::new();
886    collect_php_files(root, &mut files);
887    files
888}
889
890pub(crate) fn collect_php_files(dir: &Path, out: &mut Vec<PathBuf>) {
891    if let Ok(entries) = std::fs::read_dir(dir) {
892        for entry in entries.flatten() {
893            if entry.file_type().map(|ft| ft.is_symlink()).unwrap_or(false) {
894                continue;
895            }
896            let path = entry.path();
897            if path.is_dir() {
898                let name = path.file_name().and_then(|n| n.to_str()).unwrap_or("");
899                if matches!(
900                    name,
901                    "vendor" | ".git" | "node_modules" | ".cache" | ".pnpm-store"
902                ) {
903                    continue;
904                }
905                collect_php_files(&path, out);
906            } else if path.extension().and_then(|e| e.to_str()) == Some("php") {
907                out.push(path);
908            }
909        }
910    }
911}
912
913// ---------------------------------------------------------------------------
914// FQCN reference walk — collects every class-name reference reachable from a
915// ClassLike's signature surface. Used by lazy_load_missing_classes to chase
916// transitive vendor types.
917// ---------------------------------------------------------------------------
918
919fn collect_class_referenced_fqcns(class: &crate::db::ClassLike, out: &mut Vec<String>) {
920    if let Some(p) = class.parent() {
921        out.push(p.to_string());
922    }
923    for i in class.interfaces() {
924        out.push(i.to_string());
925    }
926    for e in class.extends() {
927        out.push(e.to_string());
928    }
929    for t in class.class_traits() {
930        out.push(t.to_string());
931    }
932    for m in class.mixins() {
933        out.push(m.to_string());
934    }
935    for u in class.extends_type_args() {
936        collect_fqcns_in_union(u, out);
937    }
938    for (iface, args) in class.implements_type_args() {
939        out.push(iface.to_string());
940        for u in args {
941            collect_fqcns_in_union(u, out);
942        }
943    }
944    for (_, m) in class.own_methods().iter() {
945        for p in m.params.iter() {
946            if let Some(t) = &p.ty {
947                collect_fqcns_in_union(t, out);
948            }
949        }
950        if let Some(t) = &m.return_type {
951            collect_fqcns_in_union(t, out);
952        }
953        for thrown in m.throws.iter() {
954            out.push(thrown.to_string());
955        }
956    }
957    if let Some(props) = class.own_properties() {
958        for (_, p) in props.iter() {
959            if let Some(t) = &p.ty {
960                collect_fqcns_in_union(t, out);
961            }
962        }
963    }
964    for (_, c) in class.own_constants().iter() {
965        collect_fqcns_in_union(&c.ty, out);
966    }
967}
968
969fn collect_fqcns_in_union(u: &Type, out: &mut Vec<String>) {
970    for atom in u.types.iter() {
971        collect_fqcns_in_atomic(atom, out);
972    }
973}
974
975fn collect_fqcns_in_simple(t: &mir_types::compact::SimpleType, out: &mut Vec<String>) {
976    if let mir_types::compact::SimpleType::Complex(u) = t {
977        collect_fqcns_in_union(u, out);
978    }
979}
980
981fn collect_fqcns_in_atomic(a: &Atomic, out: &mut Vec<String>) {
982    match a {
983        Atomic::TNamedObject { fqcn, type_params } => {
984            out.push(fqcn.to_string());
985            for tp in type_params.iter() {
986                collect_fqcns_in_union(tp, out);
987            }
988        }
989        Atomic::TStaticObject { fqcn } | Atomic::TSelf { fqcn } | Atomic::TParent { fqcn } => {
990            out.push(fqcn.to_string());
991        }
992        Atomic::TLiteralEnumCase { enum_fqcn, .. } => {
993            out.push(enum_fqcn.to_string());
994        }
995        Atomic::TClassString(Some(s)) => {
996            out.push(s.to_string());
997        }
998        Atomic::TArray { key, value } | Atomic::TNonEmptyArray { key, value } => {
999            collect_fqcns_in_union(key, out);
1000            collect_fqcns_in_union(value, out);
1001        }
1002        Atomic::TList { value } | Atomic::TNonEmptyList { value } => {
1003            collect_fqcns_in_union(value, out);
1004        }
1005        Atomic::TKeyedArray { properties, .. } => {
1006            for (_, kp) in properties.iter() {
1007                collect_fqcns_in_union(&kp.ty, out);
1008            }
1009        }
1010        Atomic::TClosure {
1011            params,
1012            return_type,
1013            this_type,
1014        } => {
1015            for p in params {
1016                if let Some(t) = &p.ty {
1017                    collect_fqcns_in_simple(t, out);
1018                }
1019            }
1020            collect_fqcns_in_union(return_type, out);
1021            if let Some(t) = this_type {
1022                collect_fqcns_in_union(t, out);
1023            }
1024        }
1025        Atomic::TCallable {
1026            params,
1027            return_type,
1028        } => {
1029            if let Some(ps) = params {
1030                for p in ps {
1031                    if let Some(t) = &p.ty {
1032                        collect_fqcns_in_simple(t, out);
1033                    }
1034                }
1035            }
1036            if let Some(rt) = return_type {
1037                collect_fqcns_in_union(rt, out);
1038            }
1039        }
1040        Atomic::TIntersection { parts } => {
1041            for p in parts.iter() {
1042                collect_fqcns_in_union(p, out);
1043            }
1044        }
1045        Atomic::TConditional {
1046            param_name: _,
1047            subject,
1048            if_true,
1049            if_false,
1050        } => {
1051            collect_fqcns_in_union(subject, out);
1052            collect_fqcns_in_union(if_true, out);
1053            collect_fqcns_in_union(if_false, out);
1054        }
1055        Atomic::TTemplateParam { as_type, .. } => {
1056            collect_fqcns_in_union(as_type, out);
1057        }
1058        _ => {}
1059    }
1060}
1061
1062fn build_reverse_deps(db: &dyn crate::db::MirDatabase) -> HashMap<String, HashSet<String>> {
1063    let mut reverse: HashMap<String, HashSet<String>> = HashMap::default();
1064
1065    let mut add_edge = |symbol: &str, dependent_file: &str| {
1066        if let Some(defining_file) = db.symbol_defining_file(symbol) {
1067            let def = defining_file.as_ref().to_string();
1068            if def != dependent_file {
1069                reverse
1070                    .entry(def)
1071                    .or_default()
1072                    .insert(dependent_file.to_string());
1073            }
1074        }
1075    };
1076
1077    for (file, imports) in db.file_import_snapshots() {
1078        let file = file.as_ref().to_string();
1079        for fqcn in imports.values() {
1080            add_edge(fqcn.as_str(), &file);
1081        }
1082    }
1083
1084    let extract_named_objects = |union: &mir_types::Type| {
1085        union
1086            .types
1087            .iter()
1088            .filter_map(|atomic| match atomic {
1089                mir_types::atomic::Atomic::TNamedObject { fqcn, .. } => Some(*fqcn),
1090                _ => None,
1091            })
1092            .collect::<Vec<_>>()
1093    };
1094
1095    for fqcn in crate::db::workspace_classes(db).iter() {
1096        let here = crate::db::Fqcn::from_str(db, fqcn.as_ref());
1097        let Some(class) = crate::db::find_class_like(db, here) else {
1098            continue;
1099        };
1100        if class.is_interface() || class.is_trait() || class.is_enum() {
1101            continue;
1102        }
1103        let Some(file) = db
1104            .symbol_defining_file(fqcn.as_ref())
1105            .map(|f| f.as_ref().to_string())
1106            .or_else(|| class.location().map(|l| l.file.as_ref().to_string()))
1107        else {
1108            continue;
1109        };
1110
1111        if let Some(parent) = class.parent() {
1112            add_edge(parent.as_ref(), &file);
1113        }
1114        for iface in class.interfaces().iter() {
1115            add_edge(iface.as_ref(), &file);
1116        }
1117        for tr in class.class_traits().iter() {
1118            add_edge(tr.as_ref(), &file);
1119        }
1120        if let Some(props) = class.own_properties() {
1121            for (_, p) in props.iter() {
1122                if let Some(ty) = &p.ty {
1123                    for named in extract_named_objects(ty) {
1124                        add_edge(named.as_ref(), &file);
1125                    }
1126                }
1127            }
1128        }
1129        for (_, method) in class.own_methods().iter() {
1130            for param in method.params.iter() {
1131                if let Some(ty) = &param.ty {
1132                    for named in extract_named_objects(ty.as_ref()) {
1133                        add_edge(named.as_ref(), &file);
1134                    }
1135                }
1136            }
1137            if let Some(rt) = method.return_type.as_deref() {
1138                for named in extract_named_objects(rt) {
1139                    add_edge(named.as_ref(), &file);
1140                }
1141            }
1142        }
1143    }
1144
1145    for fqn in crate::db::workspace_functions(db).iter() {
1146        let here = crate::db::Fqcn::from_str(db, fqn.as_ref());
1147        let Some(f) = crate::db::find_function(db, here) else {
1148            continue;
1149        };
1150        let Some(file) = db
1151            .symbol_defining_file(fqn.as_ref())
1152            .map(|f| f.as_ref().to_string())
1153            .or_else(|| f.location.as_ref().map(|l| l.file.as_ref().to_string()))
1154        else {
1155            continue;
1156        };
1157
1158        for param in f.params.iter() {
1159            if let Some(ty) = &param.ty {
1160                for named in extract_named_objects(ty.as_ref()) {
1161                    add_edge(named.as_ref(), &file);
1162                }
1163            }
1164        }
1165        if let Some(rt) = f.return_type.as_deref() {
1166            for named in extract_named_objects(rt) {
1167                add_edge(named.as_ref(), &file);
1168            }
1169        }
1170    }
1171
1172    for (ref_file, symbol_key) in db.all_reference_location_pairs() {
1173        let file_str = ref_file.as_ref().to_string();
1174        let lookup: &str = match symbol_key.split_once("::") {
1175            Some((class, _)) => class,
1176            None => &symbol_key,
1177        };
1178        add_edge(lookup, &file_str);
1179    }
1180
1181    reverse
1182}
1183
1184fn extract_reference_locations(
1185    db: &dyn crate::db::MirDatabase,
1186    file: &Arc<str>,
1187) -> Vec<(String, u32, u16, u16)> {
1188    db.extract_file_reference_locations(file.as_ref())
1189        .into_iter()
1190        .map(|(sym, line, col_start, col_end)| (sym.to_string(), line, col_start, col_end))
1191        .collect()
1192}
1193
1194pub struct AnalysisResult {
1195    pub issues: Vec<Issue>,
1196    #[doc(hidden)]
1197    pub type_envs: rustc_hash::FxHashMap<crate::type_env::ScopeId, crate::type_env::TypeEnv>,
1198    /// Per-expression resolved symbols from body analysis, sorted by file path.
1199    pub symbols: Vec<crate::symbol::ResolvedSymbol>,
1200    /// Maps each file path to the contiguous range within `symbols` that
1201    /// belongs to it.
1202    symbols_by_file: HashMap<Arc<str>, std::ops::Range<usize>>,
1203}
1204
1205impl AnalysisResult {
1206    fn build(
1207        issues: Vec<Issue>,
1208        type_envs: rustc_hash::FxHashMap<crate::type_env::ScopeId, crate::type_env::TypeEnv>,
1209        mut symbols: Vec<crate::symbol::ResolvedSymbol>,
1210    ) -> Self {
1211        symbols.sort_unstable_by(|a, b| a.file.as_ref().cmp(b.file.as_ref()));
1212        let mut symbols_by_file: HashMap<Arc<str>, std::ops::Range<usize>> = HashMap::default();
1213        let mut i = 0;
1214        while i < symbols.len() {
1215            let file = Arc::clone(&symbols[i].file);
1216            let start = i;
1217            while i < symbols.len() && symbols[i].file == file {
1218                i += 1;
1219            }
1220            symbols_by_file.insert(file, start..i);
1221        }
1222        Self {
1223            issues,
1224            type_envs,
1225            symbols,
1226            symbols_by_file,
1227        }
1228    }
1229
1230    pub fn error_count(&self) -> usize {
1231        self.issues
1232            .iter()
1233            .filter(|i| i.severity == mir_issues::Severity::Error)
1234            .count()
1235    }
1236
1237    pub fn warning_count(&self) -> usize {
1238        self.issues
1239            .iter()
1240            .filter(|i| i.severity == mir_issues::Severity::Warning)
1241            .count()
1242    }
1243
1244    pub fn issues_by_file(&self) -> HashMap<Arc<str>, Vec<&Issue>> {
1245        let mut map: HashMap<Arc<str>, Vec<&Issue>> = HashMap::default();
1246        for issue in &self.issues {
1247            map.entry(issue.location.file.clone())
1248                .or_default()
1249                .push(issue);
1250        }
1251        map
1252    }
1253
1254    pub fn count_by_severity(&self) -> Vec<(mir_issues::Severity, usize)> {
1255        let mut counts: std::collections::BTreeMap<mir_issues::Severity, usize> =
1256            std::collections::BTreeMap::new();
1257        for issue in &self.issues {
1258            *counts.entry(issue.severity).or_insert(0) += 1;
1259        }
1260        counts.into_iter().collect()
1261    }
1262
1263    pub fn total_issue_count(&self) -> usize {
1264        self.issues.len()
1265    }
1266
1267    pub fn filter_issues<'a, F>(&'a self, predicate: F) -> impl Iterator<Item = &'a Issue>
1268    where
1269        F: Fn(&Issue) -> bool + 'a,
1270    {
1271        self.issues.iter().filter(move |i| predicate(i))
1272    }
1273
1274    pub fn symbol_at(
1275        &self,
1276        file: &str,
1277        byte_offset: u32,
1278    ) -> Option<&crate::symbol::ResolvedSymbol> {
1279        let range = self.symbols_by_file.get(file)?;
1280        self.symbols[range.clone()]
1281            .iter()
1282            .filter(|s| s.span.start <= byte_offset && byte_offset < s.span.end)
1283            .min_by_key(|s| s.span.end - s.span.start)
1284    }
1285}