Skip to main content

mir_analyzer/
batch.rs

1//! Batch-oriented project analysis on [`AnalysisSession`].
2//!
3//! This module hosts the multi-file orchestration that used to live on the
4//! retired `ProjectAnalyzer`: parallel definition collection, lazy class loading, dead-code
5//! sweep, reverse-dependency index, and the [`AnalysisResult`] return type.
6//! Per-file (LSP) entry points stay on `AnalysisSession` itself in
7//! `session.rs`.
8//!
9//! All methods are `impl AnalysisSession`; configuration that's only
10//! meaningful for batch runs (issue suppressions, progress callback, optional
11//! PHP version override) is grouped in [`BatchOptions`] and passed in rather
12//! than stored on the session.
13
14use std::path::{Path, PathBuf};
15use std::sync::Arc;
16
17use rayon::prelude::*;
18use rustc_hash::{FxHashMap as HashMap, FxHashSet as HashSet};
19
20use mir_issues::Issue;
21use mir_types::{Atomic, Type};
22
23use crate::body_analysis::BodyAnalyzer;
24use crate::cache::hash_content;
25use crate::db::{
26    collect_file_definitions, FileDefinitions, MirDatabase, MirDbStorage, RefLoc, SourceFile,
27};
28use crate::php_version::PhpVersion;
29use crate::session::AnalysisSession;
30use crate::stub_cache::{hash_source, prepare_for_ingest};
31
32/// Issue kinds emitted by [`crate::dead_code::DeadCodeAnalyzer`].
33///
34/// The dead-code pass is just an error group — these names participate in
35/// [`BatchOptions::suppressed_issue_kinds`] like any other `IssueKind`. If
36/// every kind listed here is suppressed, the dead-code pass is skipped
37/// entirely.
38pub fn dead_code_issue_kinds() -> &'static [&'static str] {
39    &[
40        "UnusedMethod",
41        "UnusedProperty",
42        "UnusedFunction",
43        "UnusedClass",
44    ]
45}
46
47/// Per-batch options for [`AnalysisSession::analyze_paths`] and friends.
48///
49/// Configuration that only makes sense for full-project (batch) analysis
50/// lives here instead of on [`AnalysisSession`], so the per-file LSP API
51/// isn't bloated with state nothing else reads.
52#[derive(Clone, Default)]
53pub struct BatchOptions {
54    /// Names of `IssueKind` variants to drop from the final result, e.g.
55    /// `["MissingThrowsDocblock", "UnusedMethod"]`. Applied as a final
56    /// post-filter so analyzer internals don't need to know which
57    /// diagnostics the consumer cares about. Empty by default.
58    pub suppressed_issue_kinds: HashSet<String>,
59    /// Called once after each file completes body analysis (progress reporting).
60    pub on_file_done: Option<Arc<dyn Fn() + Send + Sync>>,
61    /// Override the session's configured PHP version for this run. `None`
62    /// uses the session's version.
63    pub php_version_override: Option<PhpVersion>,
64    /// Skip collecting per-expression [`crate::symbol::ResolvedSymbol`]s
65    /// into the [`AnalysisResult`]. Defaults to `false` (symbols collected)
66    /// so existing consumers — LSP servers using
67    /// [`AnalysisResult::symbol_at`] for hover/go-to-definition — are
68    /// unaffected. Diagnostics-only consumers (the CLI) opt out: a
69    /// Laravel-scale batch retains ~600k symbols nothing reads.
70    pub skip_symbols: bool,
71}
72
73impl BatchOptions {
74    pub fn new() -> Self {
75        Self::default()
76    }
77
78    pub fn with_suppressed<I, S>(mut self, kinds: I) -> Self
79    where
80        I: IntoIterator<Item = S>,
81        S: Into<String>,
82    {
83        self.suppressed_issue_kinds = kinds.into_iter().map(Into::into).collect();
84        self
85    }
86
87    pub fn with_progress_callback(mut self, callback: Arc<dyn Fn() + Send + Sync>) -> Self {
88        self.on_file_done = Some(callback);
89        self
90    }
91
92    pub fn with_php_version(mut self, version: PhpVersion) -> Self {
93        self.php_version_override = Some(version);
94        self
95    }
96
97    /// Don't collect per-expression symbols into the result (see
98    /// [`Self::skip_symbols`]). For diagnostics-only consumers;
99    /// [`AnalysisResult::symbol_at`] will find nothing on the batch result.
100    pub fn without_symbols(mut self) -> Self {
101        self.skip_symbols = true;
102        self
103    }
104
105    /// True iff at least one dead-code [`IssueKind`] would be emitted (i.e.
106    /// not all of them are suppressed).
107    fn should_run_dead_code(&self) -> bool {
108        dead_code_issue_kinds()
109            .iter()
110            .any(|k| !self.suppressed_issue_kinds.contains(*k))
111    }
112
113    /// Drop issues whose [`IssueKind::name()`] is listed in
114    /// [`Self::suppressed_issue_kinds`].
115    fn apply(&self, issues: &mut Vec<Issue>) {
116        if self.suppressed_issue_kinds.is_empty() {
117            return;
118        }
119        issues.retain(|i| !self.suppressed_issue_kinds.contains(i.kind.name()));
120    }
121}
122
123struct ParsedProjectFile {
124    file: Arc<str>,
125    source: Arc<str>,
126    parsed: php_rs_parser::ParseResult,
127}
128
129impl ParsedProjectFile {
130    fn new(file: Arc<str>, source: Arc<str>) -> Self {
131        let parsed = php_rs_parser::parse(source.as_ref());
132        Self {
133            file,
134            source,
135            parsed,
136        }
137    }
138
139    fn source(&self) -> &str {
140        self.source.as_ref()
141    }
142
143    fn source_map(&self) -> &php_rs_parser::source_map::SourceMap {
144        &self.parsed.source_map
145    }
146
147    fn errors(&self) -> &[php_rs_parser::diagnostics::ParseError] {
148        &self.parsed.errors
149    }
150
151    fn owned(&self) -> &php_ast::owned::Program {
152        &self.parsed.program
153    }
154}
155
156impl AnalysisSession {
157    /// Cumulative hit / miss counts on the persistent definition cache attached
158    /// to this session. `(0, 0)` when no cache is configured.
159    #[doc(hidden)]
160    pub fn stub_cache_stats(&self) -> (u64, u64) {
161        match self.db.stub_cache.as_deref() {
162            Some(c) => (c.hits(), c.misses()),
163            None => (0, 0),
164        }
165    }
166
167    fn batch_php_version(&self, opts: &BatchOptions) -> PhpVersion {
168        opts.php_version_override.unwrap_or(self.php_version)
169    }
170
171    /// Mark issues silenced by inline suppression comments
172    /// (`@mir-ignore`, `@psalm-suppress`, `@phpstan-ignore*`, …) as suppressed.
173    ///
174    /// Runs as a final post-filter over the merged issue list so it applies
175    /// uniformly to every emitting pass — body analysis, the collector, class
176    /// checks and dead-code detection — including diagnostics the per-statement
177    /// `@psalm-suppress` path in `stmt/mod.rs` structurally cannot reach.
178    ///
179    /// Issues are *marked* rather than dropped, mirroring that per-statement
180    /// path and the kind-level `mir.xml` suppress handler; every consumer (CLI,
181    /// WASM, the test harness) already skips [`Issue::suppressed`].
182    /// Apply inline suppressions and then emit `UnusedPsalmSuppress` issues for
183    /// any named `@suppress`/`@psalm-suppress` annotations that matched nothing.
184    ///
185    /// `analyzed_files` must list every file that was analyzed in this batch so
186    /// that files with *zero* existing issues still have their suppression maps
187    /// inspected for unused annotations.
188    fn apply_suppressions_and_emit_unused(
189        &self,
190        issues: &mut Vec<Issue>,
191        analyzed_files: &[Arc<str>],
192    ) {
193        use crate::suppression::SuppressionMap;
194        let db = self.snapshot_db();
195        let mut cache: HashMap<Arc<str>, Option<SuppressionMap>> = HashMap::default();
196        for issue in issues.iter_mut() {
197            if issue.suppressed {
198                continue;
199            }
200            let map = cache.entry(issue.location.file.clone()).or_insert_with(|| {
201                db.lookup_source_file(&issue.location.file)
202                    .map(|sf| SuppressionMap::from_source(&sf.text(&db)))
203            });
204            if let Some(map) = map.as_ref() {
205                if map.is_suppressed(issue.location.line, issue.kind.name(), issue.kind.code()) {
206                    issue.suppressed = true;
207                }
208            }
209        }
210        // Ensure suppression maps are built for every analyzed file, not just
211        // those that already have at least one issue (files with no issues would
212        // otherwise be skipped and their unused suppressions never detected).
213        for file in analyzed_files {
214            cache.entry(file.clone()).or_insert_with(|| {
215                db.lookup_source_file(file)
216                    .map(|sf| SuppressionMap::from_source(&sf.text(&db)))
217            });
218        }
219        // Now emit UnusedPsalmSuppress for each file that has named suppressions.
220        let files: Vec<Arc<str>> = cache
221            .iter()
222            .filter_map(|(f, m)| m.as_ref().map(|_| f.clone()))
223            .collect();
224        let mut new_issues: Vec<Issue> = Vec::new();
225        for file in files {
226            if let Some(Some(map)) = cache.get(&file) {
227                if map.named_suppressions.is_empty() {
228                    continue;
229                }
230                let file_issues: Vec<Issue> = issues
231                    .iter()
232                    .filter(|i| i.location.file == file)
233                    .cloned()
234                    .collect();
235                // Pre-suppressed issues arrived with suppressed=true from the
236                // IssueBuffer mechanism (collector / body analysis). They may be
237                // at a different line than the SuppressionMap target and need
238                // special handling in unused_named.
239                let pre_suppressed: Vec<&Issue> =
240                    file_issues.iter().filter(|i| i.suppressed).collect();
241                // Issues newly suppressed by the SuppressionMap in this pass
242                // arrived with suppressed=false; after the marking loop they
243                // also have suppressed=true. Pass all file issues for exact-line
244                // matching; pre_suppressed enables the docblock-range fallback.
245                let unused = map.unused_named(&file_issues, &pre_suppressed);
246                for (line, kind) in unused {
247                    let loc = mir_types::Location::new(file.clone(), line, line, 0, 0);
248                    let mut issue =
249                        Issue::new(mir_issues::IssueKind::UnusedPsalmSuppress { kind }, loc);
250                    if map.is_suppressed(line, issue.kind.name(), issue.kind.code()) {
251                        issue.suppressed = true;
252                    }
253                    new_issues.push(issue);
254                }
255            }
256        }
257        issues.extend(new_issues);
258    }
259
260    fn type_exists(&self, fqcn: &str) -> bool {
261        let db = self.snapshot_db();
262        crate::db::class_exists(&db, fqcn)
263    }
264
265    fn collect_and_ingest_source(
266        &self,
267        file: Arc<str>,
268        src: &str,
269        php_version: PhpVersion,
270    ) -> FileDefinitions {
271        self.db.collect_and_ingest_file(file, src, php_version)
272    }
273
274    /// Rebuild the workspace symbol index singleton from every registered source
275    /// file. Required in the batch path because `workspace_index` reads the
276    /// maintained singleton, and that singleton is built from vendor *before*
277    /// `analyze_paths` registers project files (and before `lazy_load_*` faults
278    /// in referenced classes). Without refreshing it, `find_class_like` /
279    /// `class_exists` miss every project and lazy-loaded class, yielding false
280    /// `UndefinedClass`. Cheap after the definition caches are warm (no parsing).
281    fn refresh_workspace_index(&self) {
282        let mut guard = self.db.salsa.write();
283        guard.rebuild_workspace_symbol_index();
284    }
285
286    /// Load the configured PHP version + built-in stubs + user stubs into
287    /// the shared db. Called by [`Self::analyze_paths`] and
288    /// [`Self::collect_definitions`].
289    fn load_batch_stubs(&self, php_version: PhpVersion) {
290        // Wire the PHP version into the db before any SourceFile inputs are
291        // registered — collect_file_definitions reads it for @since/@removed filtering.
292        {
293            let version_str = Arc::from(php_version.to_string().as_str());
294            self.db.salsa.write().set_php_version(version_str);
295        }
296
297        // Built-in stubs for the configured PHP version.
298        let paths: Vec<&'static str> = crate::stubs::stub_files().iter().map(|&(p, _)| p).collect();
299        self.db.ingest_stub_paths(&paths, php_version);
300
301        // User-configured stubs.
302        self.db
303            .ingest_user_stubs(&self.user_stub_files, &self.user_stub_dirs);
304
305        // Ensure a resolver is configured so pull-path lookups can map
306        // built-in FQCNs to the stub VFS paths registered above.
307        let mut guard = self.db.salsa.write();
308        if guard.current_resolver().is_none() {
309            let resolver: Arc<dyn crate::ClassResolver> = Arc::new(crate::StubClassResolver);
310            guard.set_resolver(Some(resolver));
311        }
312    }
313
314    /// Run the full batch analysis pipeline on a set of file paths.
315    pub fn analyze_paths(&self, paths: &[PathBuf], opts: &BatchOptions) -> AnalysisResult {
316        let php_version = self.batch_php_version(opts);
317        let mut all_issues = Vec::new();
318        let _t0 = std::time::Instant::now();
319
320        // ---- Load PHP built-in stubs (before definition collection so user code can override)
321        self.load_batch_stubs(php_version);
322        let _t_stubs = _t0.elapsed();
323
324        // ---- Read files in parallel ----------------------------------
325        let parsed_files: Vec<ParsedProjectFile> = paths
326            .par_iter()
327            .filter_map(|path| match std::fs::read_to_string(path) {
328                Ok(src) => {
329                    let file = Arc::from(path.to_string_lossy().as_ref());
330                    Some(ParsedProjectFile::new(file, Arc::from(src)))
331                }
332                Err(e) => {
333                    eprintln!("Cannot read {}: {}", path.display(), e);
334                    None
335                }
336            })
337            .collect();
338        let _t_read = _t0.elapsed();
339
340        let file_data: Vec<(Arc<str>, Arc<str>)> = parsed_files
341            .iter()
342            .map(|parsed| (parsed.file.clone(), parsed.source.clone()))
343            .collect();
344
345        // ---- Pre-analysis invalidation: evict dependents of changed/removed files
346        if let Some(cache) = &self.cache {
347            let mut invalidated: Vec<String> = file_data
348                .par_iter()
349                .filter_map(|(f, src)| {
350                    let h = hash_content(src.as_ref());
351                    if cache.get(f, &h).is_none() {
352                        Some(f.to_string())
353                    } else {
354                        None
355                    }
356                })
357                .collect();
358
359            // Files analyzed in a previous run but now gone from disk: their
360            // dependents hold stale results that still assume the deleted
361            // definitions exist. A file merely absent from this run's path set
362            // (but still on disk) is NOT a deletion — checking disk existence
363            // avoids evicting dependents during partial-path analysis.
364            let current: std::collections::HashSet<&str> =
365                file_data.iter().map(|(f, _)| f.as_ref()).collect();
366            let removed: Vec<String> = cache
367                .cached_files()
368                .into_iter()
369                .filter(|f| !current.contains(f.as_str()) && !std::path::Path::new(f).exists())
370                .collect();
371            for f in &removed {
372                cache.evict(f);
373            }
374            invalidated.extend(removed);
375
376            if !invalidated.is_empty() {
377                cache.evict_with_dependents(&invalidated);
378            }
379        }
380
381        // ---- Register Salsa source inputs for incremental follow-up calls ----
382        {
383            let mut guard = self.db.salsa.write();
384            for parsed in &parsed_files {
385                guard.upsert_source_file(parsed.file.clone(), parsed.source.clone());
386            }
387        }
388        let _t_salsa_reg = _t0.elapsed();
389
390        // ---- Definition collection from the already-parsed AST -------
391        // Returns (FileDefinitions, content_hash, has_hard_parse_errors) so we
392        // can prime the parse cache before the pre-warm loop below.
393        type Pass1Entry = (FileDefinitions, [u8; 32], bool);
394        let file_defs: Vec<Pass1Entry> = parsed_files
395            .par_iter()
396            .map(|parsed| {
397                let content_hash = hash_source(parsed.source());
398                let has_hard_parse_errors = parsed
399                    .errors()
400                    .iter()
401                    .any(crate::parser::is_hard_parse_error);
402                let mut all_issues: Vec<Issue> = parsed
403                    .errors()
404                    .iter()
405                    .map(|err| {
406                        crate::parser::parse_error_to_issue(
407                            err,
408                            &parsed.file,
409                            parsed.source(),
410                            parsed.source_map(),
411                        )
412                    })
413                    .collect();
414                let collector = crate::collector::DefinitionCollector::new_for_slice(
415                    parsed.file.clone(),
416                    parsed.source(),
417                    parsed.source_map(),
418                );
419                let (mut slice, collector_issues) = collector.collect_slice(parsed.owned());
420                all_issues.extend(collector_issues);
421                mir_codebase::storage::deduplicate_params_in_slice(&mut slice);
422                let defs = FileDefinitions {
423                    slice: Arc::new(slice),
424                    issues: Arc::new(all_issues),
425                };
426                (defs, content_hash, has_hard_parse_errors)
427            })
428            .collect();
429        let _t_collect_defs = _t0.elapsed();
430
431        // Prime the in-process parse cache so the pre-warm loop below avoids
432        // re-parsing every project file through collect_file_definitions.
433        {
434            let guard = self.db.salsa.read();
435            for (defs, hash, has_hard_parse_errors) in &file_defs {
436                if !*has_hard_parse_errors {
437                    guard.prime_parse_cache(*hash, Arc::clone(&defs.slice));
438                }
439            }
440        }
441
442        let mut files_with_parse_errors: HashSet<Arc<str>> = HashSet::default();
443        for (defs, _hash, _hard_err) in file_defs {
444            for issue in defs.issues.iter() {
445                if matches!(issue.kind, mir_issues::IssueKind::ParseError { .. })
446                    && issue.severity == mir_issues::Severity::Error
447                {
448                    files_with_parse_errors.insert(issue.location.file.clone());
449                }
450            }
451            all_issues.extend(Arc::unwrap_or_clone(defs.issues));
452        }
453        let _t_ingest = _t0.elapsed();
454
455        // ---- Pre-warm collect_file_definitions for project files -------------
456        {
457            let db_prewarm = {
458                let guard = self.db.salsa.read();
459                (**guard).clone()
460            };
461            let project_source_files: Vec<SourceFile> = {
462                let guard = self.db.salsa.read();
463                parsed_files
464                    .iter()
465                    .filter_map(|p| (**guard).lookup_source_file(&p.file))
466                    .collect()
467            };
468            project_source_files
469                .into_par_iter()
470                .for_each_with(db_prewarm, |db, sf| {
471                    let _ = collect_file_definitions(db as &dyn MirDatabase, sf);
472                });
473        }
474        let _t_prewarm_ms = (_t0.elapsed() - _t_ingest).as_secs_f64() * 1000.0;
475
476        // Fold the freshly-registered project files into the workspace symbol
477        // index singleton. The singleton may have been built from vendor before
478        // this run (CLI indexes vendor before analyze_paths); since adding files
479        // no longer nulls it, project classes would otherwise be invisible to
480        // find_class_like and reported as false UndefinedClass.
481        self.refresh_workspace_index();
482
483        // ---- Lazy-load unknown classes via PSR-4 ----------------------------
484        let _t_before_lazy = _t0.elapsed();
485        if let Some(psr4) = self.psr4.clone() {
486            self.lazy_load_missing_classes(psr4, php_version, &mut all_issues);
487        }
488        let _t_lazyload_ms = (_t0.elapsed() - _t_before_lazy).as_secs_f64() * 1000.0;
489
490        // ---- Class-level checks ---------------------------------------------
491        let analyzed_file_set: HashSet<Arc<str>> =
492            file_data.iter().map(|(f, _)| f.clone()).collect();
493        let _t_class_analyzer = std::time::Instant::now();
494        {
495            let class_db = {
496                let guard = self.db.salsa.read();
497                (**guard).clone()
498            };
499            let class_issues = crate::class::ClassAnalyzer::with_files(
500                &class_db,
501                analyzed_file_set.clone(),
502                &file_data,
503            )
504            .analyze_all();
505            all_issues.extend(class_issues);
506        }
507        let _t_class_analyzer_ms = _t_class_analyzer.elapsed().as_secs_f64() * 1000.0;
508
509        let _t_class_checks = _t0.elapsed();
510
511        let mut db_main = {
512            let guard = self.db.salsa.read();
513            (**guard).clone()
514        };
515        // All index mutation for the body pass is done (lazy_load_missing_classes
516        // + refresh ran above; lazy_load_from_body_issues runs *after* this pass
517        // on a separate db). Freeze the index on this ephemeral clone so each
518        // find_class_like borrows it instead of cloning the singleton's three
519        // Arcs per call — the per-worker `map_with` clone bumps the refcount once.
520        db_main.freeze_workspace_index();
521
522        // ---- Body analysis: function/method bodies in parallel --------------
523        type BodyResult = (
524            Arc<str>,
525            Vec<Issue>,
526            Vec<crate::symbol::ResolvedSymbol>,
527            Vec<RefLoc>,
528        );
529        let body_results: Vec<BodyResult> = parsed_files
530            .par_iter()
531            .filter(|parsed| !files_with_parse_errors.contains(&parsed.file))
532            .map_with(db_main, |db, parsed| {
533                let driver = BodyAnalyzer::new(&*db as &dyn MirDatabase, php_version);
534                let (issues, symbols) = if let Some(cache) = &self.cache {
535                    let h = hash_content(parsed.source());
536                    if let Some((cached_issues, ref_locs)) = cache.get(&parsed.file, &h) {
537                        // Cache replay: rebuild the file's complete reference
538                        // set straight from the cached tuples — no pending-
539                        // buffer detour.
540                        let locs: Vec<RefLoc> = ref_locs
541                            .iter()
542                            .map(|(symbol, line, col_start, col_end)| RefLoc {
543                                symbol_key: Arc::from(symbol.as_str()),
544                                file: parsed.file.clone(),
545                                line: *line,
546                                col_start: *col_start,
547                                col_end: *col_end,
548                            })
549                            .collect();
550                        return (parsed.file.clone(), cached_issues, Vec::new(), locs);
551                    }
552                    let (issues, symbols) = driver.analyze_bodies(
553                        parsed.owned(),
554                        parsed.file.clone(),
555                        parsed.source(),
556                        parsed.source_map(),
557                    );
558                    let pending = db.take_pending_ref_locs();
559                    let cache_locs = pending
560                        .iter()
561                        .map(|r| (r.symbol_key.to_string(), r.line, r.col_start, r.col_end))
562                        .collect();
563                    cache.put(&parsed.file, h, issues.clone(), cache_locs);
564                    if let Some(cb) = &opts.on_file_done {
565                        cb();
566                    }
567                    let symbols = if opts.skip_symbols {
568                        Vec::new()
569                    } else {
570                        symbols
571                    };
572                    return (parsed.file.clone(), issues, symbols, pending);
573                } else {
574                    driver.analyze_bodies(
575                        parsed.owned(),
576                        parsed.file.clone(),
577                        parsed.source(),
578                        parsed.source_map(),
579                    )
580                };
581                let pending = db.take_pending_ref_locs();
582                if let Some(cb) = &opts.on_file_done {
583                    cb();
584                }
585                // Drop the per-file symbol vec inside the worker when the
586                // consumer opted out — the orchestrator never accumulates.
587                let symbols = if opts.skip_symbols {
588                    Vec::new()
589                } else {
590                    symbols
591                };
592                (parsed.file.clone(), issues, symbols, pending)
593            })
594            .collect();
595
596        let _t_body_analysis = _t0.elapsed();
597
598        // Serial commit with replace semantics: each file's output (or cache
599        // replay) is its complete reference set, so stale entries from a
600        // prior run cannot survive an append.
601        let mut all_symbols = Vec::new();
602        {
603            let guard = self.db.salsa.read();
604            for (file, issues, symbols, ref_locs) in body_results {
605                all_issues.extend(issues);
606                all_symbols.extend(symbols);
607                guard.set_file_reference_locations(file.as_ref(), ref_locs);
608            }
609        }
610
611        // ---- Post-analysis lazy loading: FQCNs used without `use` imports ------
612        if let Some(psr4) = self.psr4.clone() {
613            self.lazy_load_from_body_issues(
614                psr4,
615                php_version,
616                &file_data,
617                &files_with_parse_errors,
618                &mut all_issues,
619                &mut all_symbols,
620                opts.skip_symbols,
621            );
622        }
623
624        // ---- Build reverse dep graph and persist it for the next run ---------
625        // Must run AFTER `commit_reference_locations_batch` (above): the graph's
626        // call-site / instantiation / inferred-return edges are derived from the
627        // committed reference-location map. Built any earlier (the salsa db is
628        // fresh each session) that map is empty, so only structural edges
629        // (parent/interface/trait/declared types) survive — and any dependent
630        // reachable only through a call site or inferred type goes stale.
631        if let Some(cache) = &self.cache {
632            let db_snapshot = {
633                let guard = self.db.salsa.read();
634                (**guard).clone()
635            };
636            let rev = build_reverse_deps(&db_snapshot);
637            cache.set_reverse_deps(rev);
638        }
639
640        // Persist cache hits/misses to disk
641        if let Some(cache) = &self.cache {
642            cache.flush();
643        }
644
645        // ---- Dead-code detection -------------------------------------------
646        if opts.should_run_dead_code() {
647            let salsa = self.snapshot_db();
648            let _t_dead_code = std::time::Instant::now();
649            let dead_code_issues =
650                crate::dead_code::DeadCodeAnalyzer::with_files(&salsa, analyzed_file_set.clone())
651                    .analyze();
652            all_issues.extend(dead_code_issues);
653            if std::env::var("MIR_TIMING").is_ok() {
654                eprintln!(
655                    "[timing] dead_code_analyzer={:.0}ms",
656                    _t_dead_code.elapsed().as_secs_f64() * 1000.0
657                );
658            }
659        }
660
661        let _t_total = _t0.elapsed();
662        if std::env::var("MIR_TIMING").is_ok() {
663            eprintln!(
664                "[timing] stubs={:.0}ms read={:.0}ms salsa_reg={:.0}ms collect_defs={:.0}ms ingest={:.0}ms class_checks={:.0}ms (prewarm={:.0}ms lazy_load={:.0}ms class_analyzer={:.0}ms) body_analysis={:.0}ms total={:.0}ms",
665                _t_stubs.as_secs_f64() * 1000.0,
666                (_t_read - _t_stubs).as_secs_f64() * 1000.0,
667                (_t_salsa_reg - _t_read).as_secs_f64() * 1000.0,
668                (_t_collect_defs - _t_salsa_reg).as_secs_f64() * 1000.0,
669                (_t_ingest - _t_collect_defs).as_secs_f64() * 1000.0,
670                (_t_class_checks - _t_ingest).as_secs_f64() * 1000.0,
671                _t_prewarm_ms,
672                _t_lazyload_ms,
673                _t_class_analyzer_ms,
674                (_t_body_analysis - _t_class_checks).as_secs_f64() * 1000.0,
675                _t_total.as_secs_f64() * 1000.0,
676            );
677        }
678
679        opts.apply(&mut all_issues);
680        let analyzed_files_vec: Vec<Arc<str>> = analyzed_file_set.iter().cloned().collect();
681        self.apply_suppressions_and_emit_unused(&mut all_issues, &analyzed_files_vec);
682        if let Some(dump) = crate::metrics::dump() {
683            eprintln!("{dump}");
684        }
685
686        // ---- Build workspace symbol index singleton -------------------------
687        {
688            let mut guard = self.db.salsa.write();
689            guard.rebuild_workspace_symbol_index();
690        }
691
692        AnalysisResult::build(all_issues, rustc_hash::FxHashMap::default(), all_symbols)
693    }
694
695    fn lazy_load_missing_classes(
696        &self,
697        psr4: Arc<crate::composer::Psr4Map>,
698        php_version: PhpVersion,
699        all_issues: &mut Vec<Issue>,
700    ) {
701        let max_depth = 10;
702        let mut loaded: HashSet<String> = HashSet::default();
703        let mut scanned: HashSet<Arc<str>> = HashSet::default();
704
705        for _ in 0..max_depth {
706            let mut to_load: Vec<(String, PathBuf)> = Vec::new();
707
708            let mut try_queue = |fqcn: &str| {
709                if !self.type_exists(fqcn) && !loaded.contains(fqcn) {
710                    if let Some(path) = psr4.resolve(fqcn) {
711                        to_load.push((fqcn.to_string(), path));
712                    }
713                }
714            };
715
716            let mut candidates: Vec<String> = Vec::new();
717            let import_candidates = {
718                let db_owned = self.snapshot_db();
719                let db = &db_owned;
720                for fqcn in crate::db::workspace_classes(db).iter() {
721                    if scanned.contains(fqcn.as_ref()) {
722                        continue;
723                    }
724                    let here = crate::db::Fqcn::from_str(db, fqcn.as_ref());
725                    let Some(class) = crate::db::find_class_like(db, here) else {
726                        continue;
727                    };
728                    scanned.insert(fqcn.clone());
729                    collect_class_referenced_fqcns(&class, &mut candidates);
730                }
731                db.file_import_snapshots()
732                    .into_iter()
733                    .flat_map(|(_, imports)| {
734                        imports
735                            .values()
736                            .map(|sym| sym.as_str().to_string())
737                            .collect::<Vec<_>>()
738                    })
739                    .collect::<Vec<_>>()
740            };
741            for fqcn in candidates {
742                try_queue(&fqcn);
743            }
744            for fqcn in import_candidates {
745                try_queue(&fqcn);
746            }
747
748            if to_load.is_empty() {
749                break;
750            }
751
752            // Mark everything queued as loaded up-front so a file that fails to
753            // read isn't retried on the next depth iteration (matches the serial
754            // behaviour, where `loaded.insert` ran before the read attempt).
755            for (fqcn, _) in &to_load {
756                loaded.insert(fqcn.clone());
757            }
758
759            // Read + parse + ingest the missing classes in parallel. The parse
760            // and definition walk inside `collect_and_ingest_source` already run
761            // off the salsa write lock (it takes the lock only for the brief
762            // input upsert), so fanning the per-file work across the rayon pool
763            // turns this previously-serial phase — the dominant cost on the lazy
764            // path — concurrent. `collect()` on a rayon map preserves input
765            // order, so the resulting issue ordering matches the serial version.
766            let per_file_issues: Vec<Vec<Issue>> = to_load
767                .par_iter()
768                .map(|(_, path)| -> Vec<Issue> {
769                    let Ok(src) = std::fs::read_to_string(path) else {
770                        return Vec::new();
771                    };
772                    let file: Arc<str> = Arc::from(path.to_string_lossy().as_ref());
773                    let is_vendor = file.contains("/vendor/") || file.contains("\\vendor\\");
774                    let defs = self.collect_and_ingest_source(file, &src, php_version);
775                    if is_vendor {
776                        Vec::new()
777                    } else {
778                        Arc::unwrap_or_clone(defs.issues)
779                    }
780                })
781                .collect();
782            for mut issues in per_file_issues {
783                all_issues.append(&mut issues);
784            }
785
786            // Make the just-loaded classes visible to the next iteration's
787            // transitive scan and to the caller's post-lazy-load snapshot.
788            self.refresh_workspace_index();
789        }
790    }
791
792    #[allow(clippy::too_many_arguments)]
793    fn lazy_load_from_body_issues(
794        &self,
795        psr4: Arc<crate::composer::Psr4Map>,
796        php_version: PhpVersion,
797        file_data: &[(Arc<str>, Arc<str>)],
798        files_with_parse_errors: &HashSet<Arc<str>>,
799        all_issues: &mut Vec<Issue>,
800        all_symbols: &mut Vec<crate::symbol::ResolvedSymbol>,
801        skip_symbols: bool,
802    ) {
803        use mir_issues::IssueKind;
804
805        let max_depth = 5;
806        let mut loaded: HashSet<String> = HashSet::default();
807
808        for _ in 0..max_depth {
809            let mut to_load: HashMap<String, PathBuf> = HashMap::default();
810
811            for issue in all_issues.iter() {
812                if let IssueKind::UndefinedClass { name } = &issue.kind {
813                    if !self.type_exists(name) && !loaded.contains(name) {
814                        if let Some(path) = psr4.resolve(name) {
815                            to_load.entry(name.clone()).or_insert(path);
816                        }
817                    }
818                }
819            }
820
821            if to_load.is_empty() {
822                break;
823            }
824
825            loaded.extend(to_load.keys().cloned());
826
827            for path in to_load.values() {
828                if let Ok(src) = std::fs::read_to_string(path) {
829                    let file: Arc<str> = Arc::from(path.to_string_lossy().as_ref());
830                    let _ = self.collect_and_ingest_source(file, &src, php_version);
831                }
832            }
833
834            // Make the loaded classes visible to the type_exists() check below
835            // (and to the reanalysis snapshot) so resolved files are detected.
836            self.refresh_workspace_index();
837
838            self.lazy_load_missing_classes(psr4.clone(), php_version, all_issues);
839
840            let files_to_reanalyze: HashSet<Arc<str>> = all_issues
841                .iter()
842                .filter_map(|i| {
843                    if let IssueKind::UndefinedClass { name } = &i.kind {
844                        if self.type_exists(name) {
845                            return Some(i.location.file.clone());
846                        }
847                    }
848                    None
849                })
850                .collect();
851
852            if files_to_reanalyze.is_empty() {
853                break;
854            }
855
856            all_issues.retain(|i| !files_to_reanalyze.contains(&i.location.file));
857            all_symbols.retain(|s| !files_to_reanalyze.contains(&s.file));
858
859            let db_full = {
860                let guard = self.db.salsa.read();
861                (**guard).clone()
862            };
863
864            let reanalysis: Vec<(Vec<Issue>, Vec<crate::symbol::ResolvedSymbol>, Vec<RefLoc>)> =
865                file_data
866                    .par_iter()
867                    .filter(|(f, _)| {
868                        !files_with_parse_errors.contains(f) && files_to_reanalyze.contains(f)
869                    })
870                    .map_with(db_full, |db, (file, src)| {
871                        let driver = BodyAnalyzer::new(&*db as &dyn MirDatabase, php_version);
872                        let parsed = php_rs_parser::parse(src);
873                        let (issues, symbols) = driver.analyze_bodies(
874                            &parsed.program,
875                            file.clone(),
876                            src,
877                            &parsed.source_map,
878                        );
879                        let pending = db.take_pending_ref_locs();
880                        (issues, symbols, pending)
881                    })
882                    .collect();
883
884            let mut reanalysis_ref_locs: Vec<RefLoc> = Vec::new();
885            for (issues, symbols, ref_locs) in reanalysis {
886                all_issues.extend(issues);
887                if !skip_symbols {
888                    all_symbols.extend(symbols);
889                }
890                reanalysis_ref_locs.extend(ref_locs);
891            }
892            {
893                let guard = self.db.salsa.read();
894                guard.commit_reference_locations_batch(reanalysis_ref_locs);
895            }
896        }
897    }
898
899    /// Re-analyze a single file (definition collection + body analysis) within the batch context.
900    ///
901    /// Mirrors the old `ProjectAnalyzer::re_analyze_file` cache-aware path.
902    /// Use [`Self::reanalyze_dependents`] for LSP-style per-file flows that
903    /// don't need batch options.
904    pub fn re_analyze_file(
905        &self,
906        file_path: &str,
907        new_content: &str,
908        opts: &BatchOptions,
909    ) -> AnalysisResult {
910        let php_version = self.batch_php_version(opts);
911
912        // Fast path: content unchanged and cache has a valid entry.
913        if let Some(cache) = &self.cache {
914            let h = hash_content(new_content);
915            if let Some((mut issues, ref_locs)) = cache.get(file_path, &h) {
916                let file: Arc<str> = Arc::from(file_path);
917                // Replace semantics: the cached set is the file's complete
918                // reference set, so stale entries from a prior version are
919                // cleared rather than appended over.
920                let locs: Vec<RefLoc> = ref_locs
921                    .iter()
922                    .map(|(symbol, line, col_start, col_end)| RefLoc {
923                        symbol_key: Arc::from(symbol.as_str()),
924                        file: file.clone(),
925                        line: *line,
926                        col_start: *col_start,
927                        col_end: *col_end,
928                    })
929                    .collect();
930                let guard = self.db.salsa.read();
931                guard.set_file_reference_locations(file_path, locs);
932                drop(guard);
933                opts.apply(&mut issues);
934                self.apply_suppressions_and_emit_unused(&mut issues, std::slice::from_ref(&file));
935                return AnalysisResult::build(issues, HashMap::default(), Vec::new());
936            }
937        }
938
939        let file: Arc<str> = Arc::from(file_path);
940
941        {
942            let mut guard = self.db.salsa.write();
943            guard.remove_file_definitions(file_path);
944        }
945
946        let file_defs = {
947            let mut guard = self.db.salsa.write();
948            let salsa_file = guard.upsert_source_file(file.clone(), Arc::from(new_content));
949            collect_file_definitions(&**guard, salsa_file)
950        };
951
952        let mut all_issues: Vec<Issue> = Arc::unwrap_or_clone(file_defs.issues.clone());
953
954        {
955            let mut guard = self.db.salsa.write();
956            if guard.workspace_symbol_index_singleton().is_some() {
957                if let Some(sf) = guard.lookup_source_file(file.as_ref()) {
958                    if guard.file_declarations_changed(sf) {
959                        guard.rebuild_workspace_symbol_index();
960                    }
961                }
962            }
963        }
964
965        let symbols = {
966            let guard = self.db.salsa.write();
967
968            let parsed = php_rs_parser::parse(new_content);
969
970            let has_hard_errors = parsed.errors.iter().any(crate::parser::is_hard_parse_error);
971            if !has_hard_errors {
972                let db_ref: &dyn MirDatabase = &**guard;
973                let driver = BodyAnalyzer::new(db_ref, php_version);
974                let (body_issues, symbols) = driver.analyze_bodies(
975                    &parsed.program,
976                    file.clone(),
977                    new_content,
978                    &parsed.source_map,
979                );
980                all_issues.extend(body_issues);
981                let pending = guard.take_pending_ref_locs();
982                guard.set_file_reference_locations(file.as_ref(), pending);
983                symbols
984            } else {
985                Vec::new()
986            }
987        };
988
989        // Bake inline-suppression marks in *before* caching: suppression is a
990        // pure function of file content (and the cache key hashes content), so
991        // the cached issues should already carry their marks. The cache-hit
992        // branch above replays this file's source without re-registering the
993        // `SourceFile` input, so the db-backed post-filter cannot recompute
994        // marks there — caching the canonical result is what keeps a fresh
995        // process honoring `@mir-ignore` on an unchanged file.
996        mark_suppressed(
997            &mut all_issues,
998            &crate::suppression::SuppressionMap::from_source(new_content),
999        );
1000
1001        if let Some(cache) = &self.cache {
1002            let h = hash_content(new_content);
1003            cache.evict_with_dependents(&[file_path.to_string()]);
1004            let db = self.snapshot_db();
1005            let ref_locs = extract_reference_locations(&db, &file);
1006            cache.put(file_path, h, all_issues.clone(), ref_locs);
1007        }
1008
1009        opts.apply(&mut all_issues);
1010        AnalysisResult::build(all_issues, HashMap::default(), symbols)
1011    }
1012
1013    /// Collect type definitions only from `paths` into the codebase
1014    /// without analyzing method bodies or emitting issues. Used to load
1015    /// vendor types.
1016    ///
1017    /// When a disk-backed cache is attached, per-file `StubSlice` results
1018    /// from previous runs are reused on a content-hash match, eliminating
1019    /// the parse + definition-collection step. Cache misses run the normal
1020    /// pipeline and write back so subsequent runs hit.
1021    pub fn collect_definitions(&self, paths: &[PathBuf]) {
1022        let _timing = std::env::var("MIR_TIMING").is_ok();
1023        let _t0 = std::time::Instant::now();
1024
1025        let php_v = self.php_version.cache_byte();
1026
1027        struct FileEntry {
1028            file: Arc<str>,
1029            src: Arc<str>,
1030            hash: [u8; 32],
1031            cached: Option<mir_codebase::storage::StubSlice>,
1032        }
1033        let entries: Vec<FileEntry> = paths
1034            .par_iter()
1035            .filter_map(|path| {
1036                let src = std::fs::read_to_string(path).ok()?;
1037                let file: Arc<str> = Arc::from(path.to_string_lossy().as_ref());
1038                let src: Arc<str> = Arc::from(src);
1039                let hash = hash_source(&src);
1040                let cached = self.db.stub_cache.as_ref().and_then(|c| {
1041                    let mut slice = c.get(&file, &hash, php_v)?;
1042                    prepare_for_ingest(&mut slice);
1043                    Some(slice)
1044                });
1045                Some(FileEntry {
1046                    file,
1047                    src,
1048                    hash,
1049                    cached,
1050                })
1051            })
1052            .collect();
1053        let _t_read = _t0.elapsed();
1054
1055        let source_files: Vec<SourceFile> = {
1056            let mut guard = self.db.salsa.write();
1057            entries
1058                .iter()
1059                .map(|e| {
1060                    guard.upsert_source_file_with_durability(
1061                        e.file.clone(),
1062                        e.src.clone(),
1063                        salsa::Durability::HIGH,
1064                    )
1065                })
1066                .collect()
1067        };
1068        let _t_reg = _t0.elapsed();
1069
1070        let db_pass1 = {
1071            let guard = self.db.salsa.read();
1072            (**guard).clone()
1073        };
1074        let stub_cache = self.db.stub_cache.clone();
1075        let prepared: Vec<mir_codebase::storage::StubSlice> = entries
1076            .into_par_iter()
1077            .zip(source_files.into_par_iter())
1078            .map_with(db_pass1, |db, (mut entry, salsa_file)| {
1079                if let Some(slice) = entry.cached.take() {
1080                    let slice_arc = Arc::new(slice);
1081                    db.parse_cache().insert(entry.hash, Arc::clone(&slice_arc));
1082                    return (*slice_arc).clone();
1083                }
1084                let defs = collect_file_definitions(&*db, salsa_file);
1085                if let Some(cache) = stub_cache.as_ref() {
1086                    cache.put(&entry.file, &entry.hash, php_v, &defs.slice);
1087                }
1088                (*defs.slice).clone()
1089            })
1090            .collect();
1091        let _t_collect = _t0.elapsed();
1092        drop(prepared);
1093        let _t_ingest = _t0.elapsed();
1094
1095        if _timing {
1096            let (hits, misses) = self.stub_cache_stats();
1097            eprintln!(
1098                "[vendor] read={:.0}ms reg={:.0}ms collect={:.0}ms ingest={:.0}ms total={:.0}ms (cache hits={hits} misses={misses})",
1099                _t_read.as_secs_f64() * 1000.0,
1100                (_t_reg - _t_read).as_secs_f64() * 1000.0,
1101                (_t_collect - _t_reg).as_secs_f64() * 1000.0,
1102                (_t_ingest - _t_collect).as_secs_f64() * 1000.0,
1103                _t_ingest.as_secs_f64() * 1000.0,
1104            );
1105        }
1106
1107        {
1108            let mut guard = self.db.salsa.write();
1109            guard.rebuild_workspace_symbol_index();
1110        }
1111
1112        crate::collector::print_collector_stats();
1113    }
1114}
1115
1116/// Analyze a PHP source string without a real file path. Useful for tests
1117/// and single-file LSP mode. Allocates a throwaway db; doesn't touch any
1118/// existing session.
1119pub fn analyze_source(source: &str) -> AnalysisResult {
1120    let php_version = PhpVersion::LATEST;
1121    let file: Arc<str> = Arc::from("<source>");
1122    let mut db = MirDbStorage::default();
1123    db.set_php_version(Arc::from(php_version.to_string().as_str()));
1124    crate::stubs::load_stubs_for_version(&mut db, php_version);
1125    let salsa_file = SourceFile::new(&db, file.clone(), Arc::from(source));
1126    let file_defs = collect_file_definitions(&db, salsa_file);
1127    let suppressions = crate::suppression::SuppressionMap::from_source(source);
1128    let mut all_issues = Arc::unwrap_or_clone(file_defs.issues);
1129    if all_issues.iter().any(|issue| {
1130        matches!(issue.kind, mir_issues::IssueKind::ParseError { .. })
1131            && issue.severity == mir_issues::Severity::Error
1132    }) {
1133        mark_suppressed(&mut all_issues, &suppressions);
1134        return AnalysisResult::build(all_issues, rustc_hash::FxHashMap::default(), Vec::new());
1135    }
1136    let mut type_envs = rustc_hash::FxHashMap::default();
1137    let mut all_symbols = Vec::new();
1138    let result = php_rs_parser::parse(source);
1139
1140    let driver = BodyAnalyzer::new(&db, php_version);
1141    all_issues.extend(driver.analyze_bodies_typed(
1142        &result.program,
1143        file.clone(),
1144        source,
1145        &result.source_map,
1146        &mut type_envs,
1147        &mut all_symbols,
1148    ));
1149    mark_suppressed(&mut all_issues, &suppressions);
1150    emit_unused_suppressions(&mut all_issues, &suppressions, &file);
1151    AnalysisResult::build(all_issues, type_envs, all_symbols)
1152}
1153
1154/// Mark issues silenced by a single file's [`SuppressionMap`]. Shared by the
1155/// in-memory [`analyze_source`] entry point, which has the source in hand and
1156/// does not go through the db-backed batch post-filter.
1157fn mark_suppressed(issues: &mut [Issue], suppressions: &crate::suppression::SuppressionMap) {
1158    if suppressions.is_empty() {
1159        return;
1160    }
1161    for issue in issues.iter_mut() {
1162        if !issue.suppressed
1163            && suppressions.is_suppressed(issue.location.line, issue.kind.name(), issue.kind.code())
1164        {
1165            issue.suppressed = true;
1166        }
1167    }
1168}
1169
1170/// Append `UnusedPsalmSuppress` issues for any named `@suppress`/`@psalm-suppress`
1171/// annotations that did not match any issue in `all_issues`. The new issues are
1172/// themselves subject to suppression (so `@suppress UnusedPsalmSuppress` works).
1173fn emit_unused_suppressions(
1174    all_issues: &mut Vec<Issue>,
1175    suppressions: &crate::suppression::SuppressionMap,
1176    file: &std::sync::Arc<str>,
1177) {
1178    let pre_suppressed_cloned: Vec<Issue> = all_issues
1179        .iter()
1180        .filter(|i| i.suppressed)
1181        .cloned()
1182        .collect();
1183    let pre_suppressed: Vec<&Issue> = pre_suppressed_cloned.iter().collect();
1184    let unused = suppressions.unused_named(all_issues, &pre_suppressed);
1185    for (line, kind) in unused {
1186        let loc = mir_types::Location::new(file.clone(), line, line, 0, 0);
1187        let mut issue = Issue::new(mir_issues::IssueKind::UnusedPsalmSuppress { kind }, loc);
1188        if suppressions.is_suppressed(line, issue.kind.name(), issue.kind.code()) {
1189            issue.suppressed = true;
1190        }
1191        all_issues.push(issue);
1192    }
1193}
1194
1195/// Discover all `.php` files under a directory, recursively.
1196pub fn discover_files(root: &Path) -> Vec<PathBuf> {
1197    if root.is_file() {
1198        return vec![root.to_path_buf()];
1199    }
1200    let mut files = Vec::new();
1201    collect_php_files(root, &mut files);
1202    files
1203}
1204
1205pub(crate) fn collect_php_files(dir: &Path, out: &mut Vec<PathBuf>) {
1206    if let Ok(entries) = std::fs::read_dir(dir) {
1207        for entry in entries.flatten() {
1208            if entry.file_type().map(|ft| ft.is_symlink()).unwrap_or(false) {
1209                continue;
1210            }
1211            let path = entry.path();
1212            if path.is_dir() {
1213                let name = path.file_name().and_then(|n| n.to_str()).unwrap_or("");
1214                if matches!(
1215                    name,
1216                    "vendor" | ".git" | "node_modules" | ".cache" | ".pnpm-store"
1217                ) {
1218                    continue;
1219                }
1220                collect_php_files(&path, out);
1221            } else if path.extension().and_then(|e| e.to_str()) == Some("php") {
1222                out.push(path);
1223            }
1224        }
1225    }
1226}
1227
1228// ---------------------------------------------------------------------------
1229// FQCN reference walk — collects every class-name reference reachable from a
1230// ClassLike's signature surface. Used by lazy_load_missing_classes to chase
1231// transitive vendor types.
1232// ---------------------------------------------------------------------------
1233
1234pub(crate) fn collect_class_referenced_fqcns(class: &crate::db::ClassLike, out: &mut Vec<String>) {
1235    if let Some(p) = class.parent() {
1236        out.push(p.to_string());
1237    }
1238    for i in class.interfaces() {
1239        out.push(i.to_string());
1240    }
1241    for e in class.extends() {
1242        out.push(e.to_string());
1243    }
1244    for t in class.class_traits() {
1245        out.push(t.to_string());
1246    }
1247    for m in class.mixins() {
1248        out.push(m.to_string());
1249    }
1250    for u in class.extends_type_args() {
1251        collect_fqcns_in_union(u, out);
1252    }
1253    for (iface, args) in class.implements_type_args() {
1254        out.push(iface.to_string());
1255        for u in args {
1256            collect_fqcns_in_union(u, out);
1257        }
1258    }
1259    for (_, m) in class.own_methods().iter() {
1260        for p in m.params.iter() {
1261            if let Some(t) = &p.ty {
1262                collect_fqcns_in_union(t, out);
1263            }
1264        }
1265        if let Some(t) = &m.return_type {
1266            collect_fqcns_in_union(t, out);
1267        }
1268        for thrown in m.throws.iter() {
1269            out.push(thrown.to_string());
1270        }
1271    }
1272    if let Some(props) = class.own_properties() {
1273        for (_, p) in props.iter() {
1274            if let Some(t) = &p.ty {
1275                collect_fqcns_in_union(t, out);
1276            }
1277        }
1278    }
1279    for (_, c) in class.own_constants().iter() {
1280        collect_fqcns_in_union(&c.ty, out);
1281    }
1282}
1283
1284pub(crate) fn collect_fqcns_in_union(u: &Type, out: &mut Vec<String>) {
1285    for atom in u.types.iter() {
1286        collect_fqcns_in_atomic(atom, out);
1287    }
1288}
1289
1290fn collect_fqcns_in_simple(t: &mir_types::compact::SimpleType, out: &mut Vec<String>) {
1291    if let mir_types::compact::SimpleType::Complex(u) = t {
1292        collect_fqcns_in_union(u, out);
1293    }
1294}
1295
1296pub(crate) fn collect_fqcns_in_atomic(a: &Atomic, out: &mut Vec<String>) {
1297    match a {
1298        Atomic::TNamedObject { fqcn, type_params } => {
1299            out.push(fqcn.to_string());
1300            for tp in type_params.iter() {
1301                collect_fqcns_in_union(tp, out);
1302            }
1303        }
1304        Atomic::TStaticObject { fqcn } | Atomic::TSelf { fqcn } | Atomic::TParent { fqcn } => {
1305            out.push(fqcn.to_string());
1306        }
1307        Atomic::TLiteralEnumCase { enum_fqcn, .. } => {
1308            out.push(enum_fqcn.to_string());
1309        }
1310        Atomic::TClassString(Some(s)) => {
1311            out.push(s.to_string());
1312        }
1313        Atomic::TArray { key, value } | Atomic::TNonEmptyArray { key, value } => {
1314            collect_fqcns_in_union(key, out);
1315            collect_fqcns_in_union(value, out);
1316        }
1317        Atomic::TList { value } | Atomic::TNonEmptyList { value } => {
1318            collect_fqcns_in_union(value, out);
1319        }
1320        Atomic::TKeyedArray { properties, .. } => {
1321            for (_, kp) in properties.iter() {
1322                collect_fqcns_in_union(&kp.ty, out);
1323            }
1324        }
1325        Atomic::TClosure {
1326            params,
1327            return_type,
1328            this_type,
1329        } => {
1330            for p in params {
1331                if let Some(t) = &p.ty {
1332                    collect_fqcns_in_simple(t, out);
1333                }
1334            }
1335            collect_fqcns_in_union(return_type, out);
1336            if let Some(t) = this_type {
1337                collect_fqcns_in_union(t, out);
1338            }
1339        }
1340        Atomic::TCallable {
1341            params,
1342            return_type,
1343        } => {
1344            if let Some(ps) = params {
1345                for p in ps {
1346                    if let Some(t) = &p.ty {
1347                        collect_fqcns_in_simple(t, out);
1348                    }
1349                }
1350            }
1351            if let Some(rt) = return_type {
1352                collect_fqcns_in_union(rt, out);
1353            }
1354        }
1355        Atomic::TIntersection { parts } => {
1356            for p in parts.iter() {
1357                collect_fqcns_in_union(p, out);
1358            }
1359        }
1360        Atomic::TConditional {
1361            param_name: _,
1362            subject,
1363            if_true,
1364            if_false,
1365        } => {
1366            collect_fqcns_in_union(subject, out);
1367            collect_fqcns_in_union(if_true, out);
1368            collect_fqcns_in_union(if_false, out);
1369        }
1370        Atomic::TTemplateParam { as_type, .. } => {
1371            collect_fqcns_in_union(as_type, out);
1372        }
1373        _ => {}
1374    }
1375}
1376
1377fn build_reverse_deps(db: &dyn crate::db::MirDatabase) -> HashMap<String, HashSet<String>> {
1378    let mut reverse: HashMap<String, HashSet<String>> = HashMap::default();
1379
1380    let mut add_edge = |symbol: &str, dependent_file: &str| {
1381        if let Some(defining_file) = db.symbol_defining_file(symbol) {
1382            let def = defining_file.as_ref().to_string();
1383            if def != dependent_file {
1384                reverse
1385                    .entry(def)
1386                    .or_default()
1387                    .insert(dependent_file.to_string());
1388            }
1389        }
1390    };
1391
1392    for (file, imports) in db.file_import_snapshots() {
1393        let file = file.as_ref().to_string();
1394        for fqcn in imports.values() {
1395            add_edge(fqcn.as_str(), &file);
1396        }
1397    }
1398
1399    let extract_named_objects = |union: &mir_types::Type| {
1400        union
1401            .types
1402            .iter()
1403            .filter_map(|atomic| match atomic {
1404                mir_types::atomic::Atomic::TNamedObject { fqcn, .. } => Some(*fqcn),
1405                _ => None,
1406            })
1407            .collect::<Vec<_>>()
1408    };
1409
1410    for fqcn in crate::db::workspace_classes(db).iter() {
1411        let here = crate::db::Fqcn::from_str(db, fqcn.as_ref());
1412        let Some(class) = crate::db::find_class_like(db, here) else {
1413            continue;
1414        };
1415        if class.is_interface() || class.is_trait() || class.is_enum() {
1416            continue;
1417        }
1418        let Some(file) = db
1419            .symbol_defining_file(fqcn.as_ref())
1420            .map(|f| f.as_ref().to_string())
1421            .or_else(|| class.location().map(|l| l.file.as_ref().to_string()))
1422        else {
1423            continue;
1424        };
1425
1426        if let Some(parent) = class.parent() {
1427            add_edge(parent.as_ref(), &file);
1428        }
1429        for iface in class.interfaces().iter() {
1430            add_edge(iface.as_ref(), &file);
1431        }
1432        for tr in class.class_traits().iter() {
1433            add_edge(tr.as_ref(), &file);
1434        }
1435        if let Some(props) = class.own_properties() {
1436            for (_, p) in props.iter() {
1437                if let Some(ty) = &p.ty {
1438                    for named in extract_named_objects(ty) {
1439                        add_edge(named.as_ref(), &file);
1440                    }
1441                }
1442            }
1443        }
1444        for (_, method) in class.own_methods().iter() {
1445            for param in method.params.iter() {
1446                if let Some(ty) = &param.ty {
1447                    for named in extract_named_objects(ty.as_ref()) {
1448                        add_edge(named.as_ref(), &file);
1449                    }
1450                }
1451            }
1452            if let Some(rt) = method.return_type.as_deref() {
1453                for named in extract_named_objects(rt) {
1454                    add_edge(named.as_ref(), &file);
1455                }
1456            }
1457        }
1458    }
1459
1460    for fqn in crate::db::workspace_functions(db).iter() {
1461        let here = crate::db::Fqcn::from_str(db, fqn.as_ref());
1462        let Some(f) = crate::db::find_function(db, here) else {
1463            continue;
1464        };
1465        let Some(file) = db
1466            .symbol_defining_file(fqn.as_ref())
1467            .map(|f| f.as_ref().to_string())
1468            .or_else(|| f.location.as_ref().map(|l| l.file.as_ref().to_string()))
1469        else {
1470            continue;
1471        };
1472
1473        for param in f.params.iter() {
1474            if let Some(ty) = &param.ty {
1475                for named in extract_named_objects(ty.as_ref()) {
1476                    add_edge(named.as_ref(), &file);
1477                }
1478            }
1479        }
1480        if let Some(rt) = f.return_type.as_deref() {
1481            for named in extract_named_objects(rt) {
1482                add_edge(named.as_ref(), &file);
1483            }
1484        }
1485    }
1486
1487    for (ref_file, symbol_key) in db.all_reference_location_pairs() {
1488        let file_str = ref_file.as_ref().to_string();
1489        let lookup: &str = match symbol_key.split_once("::") {
1490            Some((class, _)) => class,
1491            None => &symbol_key,
1492        };
1493        add_edge(lookup, &file_str);
1494    }
1495
1496    reverse
1497}
1498
1499fn extract_reference_locations(
1500    db: &dyn crate::db::MirDatabase,
1501    file: &Arc<str>,
1502) -> Vec<(String, u32, u16, u16)> {
1503    db.extract_file_reference_locations(file.as_ref())
1504        .into_iter()
1505        .map(|(sym, line, col_start, col_end)| (sym.to_string(), line, col_start, col_end))
1506        .collect()
1507}
1508
1509pub struct AnalysisResult {
1510    pub issues: Vec<Issue>,
1511    #[doc(hidden)]
1512    pub type_envs: rustc_hash::FxHashMap<crate::type_env::ScopeId, crate::type_env::TypeEnv>,
1513    /// Per-expression resolved symbols from body analysis, sorted by file path.
1514    pub symbols: Vec<crate::symbol::ResolvedSymbol>,
1515    /// Maps each file path to the contiguous range within `symbols` that
1516    /// belongs to it.
1517    symbols_by_file: HashMap<Arc<str>, std::ops::Range<usize>>,
1518}
1519
1520impl AnalysisResult {
1521    fn build(
1522        issues: Vec<Issue>,
1523        type_envs: rustc_hash::FxHashMap<crate::type_env::ScopeId, crate::type_env::TypeEnv>,
1524        mut symbols: Vec<crate::symbol::ResolvedSymbol>,
1525    ) -> Self {
1526        symbols.sort_unstable_by(|a, b| a.file.as_ref().cmp(b.file.as_ref()));
1527        let mut symbols_by_file: HashMap<Arc<str>, std::ops::Range<usize>> = HashMap::default();
1528        let mut i = 0;
1529        while i < symbols.len() {
1530            let file = Arc::clone(&symbols[i].file);
1531            let start = i;
1532            while i < symbols.len() && symbols[i].file == file {
1533                i += 1;
1534            }
1535            symbols_by_file.insert(file, start..i);
1536        }
1537        Self {
1538            issues,
1539            type_envs,
1540            symbols,
1541            symbols_by_file,
1542        }
1543    }
1544
1545    pub fn error_count(&self) -> usize {
1546        self.issues
1547            .iter()
1548            .filter(|i| i.severity == mir_issues::Severity::Error)
1549            .count()
1550    }
1551
1552    pub fn warning_count(&self) -> usize {
1553        self.issues
1554            .iter()
1555            .filter(|i| i.severity == mir_issues::Severity::Warning)
1556            .count()
1557    }
1558
1559    pub fn issues_by_file(&self) -> HashMap<Arc<str>, Vec<&Issue>> {
1560        let mut map: HashMap<Arc<str>, Vec<&Issue>> = HashMap::default();
1561        for issue in &self.issues {
1562            map.entry(issue.location.file.clone())
1563                .or_default()
1564                .push(issue);
1565        }
1566        map
1567    }
1568
1569    pub fn count_by_severity(&self) -> Vec<(mir_issues::Severity, usize)> {
1570        let mut counts: std::collections::BTreeMap<mir_issues::Severity, usize> =
1571            std::collections::BTreeMap::new();
1572        for issue in &self.issues {
1573            *counts.entry(issue.severity).or_insert(0) += 1;
1574        }
1575        counts.into_iter().collect()
1576    }
1577
1578    pub fn total_issue_count(&self) -> usize {
1579        self.issues.len()
1580    }
1581
1582    pub fn filter_issues<'a, F>(&'a self, predicate: F) -> impl Iterator<Item = &'a Issue>
1583    where
1584        F: Fn(&Issue) -> bool + 'a,
1585    {
1586        self.issues.iter().filter(move |i| predicate(i))
1587    }
1588
1589    pub fn symbol_at(
1590        &self,
1591        file: &str,
1592        byte_offset: u32,
1593    ) -> Option<&crate::symbol::ResolvedSymbol> {
1594        let range = self.symbols_by_file.get(file)?;
1595        let symbols = &self.symbols[range.clone()];
1596
1597        // Primary: cursor is on an identifier token.
1598        if let Some(sym) = symbols
1599            .iter()
1600            .filter(|s| s.span.start <= byte_offset && byte_offset < s.span.end)
1601            .min_by_key(|s| s.span.end - s.span.start)
1602        {
1603            return Some(sym);
1604        }
1605
1606        // Fallback: cursor is in a call-expression gap (e.g. the whitespace or
1607        // argument list between two chained method calls).  Match against the
1608        // full expression span recorded for call-like symbols and return the
1609        // innermost (smallest) enclosing call, mirroring what an AST-walk to
1610        // the innermost containing call expression would produce.
1611        symbols
1612            .iter()
1613            .filter(|s| {
1614                s.expr_span
1615                    .is_some_and(|es| es.start <= byte_offset && byte_offset < es.end)
1616            })
1617            .min_by_key(|s| {
1618                let es = s.expr_span.unwrap();
1619                es.end - es.start
1620            })
1621    }
1622}