Skip to main content

mir_analyzer/
batch.rs

1//! Batch-oriented project analysis on [`AnalysisSession`].
2//!
3//! This module hosts the multi-file orchestration that used to live on the
4//! retired `ProjectAnalyzer`: parallel definition collection, lazy class loading, dead-code
5//! sweep, reverse-dependency index, and the [`AnalysisResult`] return type.
6//! Per-file (LSP) entry points stay on `AnalysisSession` itself in
7//! `session.rs`.
8//!
9//! All methods are `impl AnalysisSession`; configuration that's only
10//! meaningful for batch runs (issue suppressions, progress callback, optional
11//! PHP version override) is grouped in [`BatchOptions`] and passed in rather
12//! than stored on the session.
13
14use std::path::{Path, PathBuf};
15use std::sync::Arc;
16
17use rayon::prelude::*;
18use rustc_hash::{FxHashMap as HashMap, FxHashSet as HashSet};
19
20use mir_issues::Issue;
21use mir_types::{Atomic, Type};
22
23use crate::body_analysis::BodyAnalyzer;
24use crate::cache::hash_content;
25use crate::db::{
26    collect_file_definitions, FileDefinitions, MirDatabase, MirDbStorage, RefLoc, SourceFile,
27};
28use crate::php_version::PhpVersion;
29use crate::session::AnalysisSession;
30use crate::stub_cache::{hash_source, prepare_for_ingest};
31
32/// Issue kinds emitted by [`crate::dead_code::DeadCodeAnalyzer`].
33///
34/// The dead-code pass is just an error group — these names participate in
35/// [`BatchOptions::suppressed_issue_kinds`] like any other `IssueKind`. If
36/// every kind listed here is suppressed, the dead-code pass is skipped
37/// entirely.
38pub fn dead_code_issue_kinds() -> &'static [&'static str] {
39    &["UnusedMethod", "UnusedProperty", "UnusedFunction"]
40}
41
42/// Per-batch options for [`AnalysisSession::analyze_paths`] and friends.
43///
44/// Configuration that only makes sense for full-project (batch) analysis
45/// lives here instead of on [`AnalysisSession`], so the per-file LSP API
46/// isn't bloated with state nothing else reads.
47#[derive(Clone, Default)]
48pub struct BatchOptions {
49    /// Names of `IssueKind` variants to drop from the final result, e.g.
50    /// `["MissingThrowsDocblock", "UnusedMethod"]`. Applied as a final
51    /// post-filter so analyzer internals don't need to know which
52    /// diagnostics the consumer cares about. Empty by default.
53    pub suppressed_issue_kinds: HashSet<String>,
54    /// Called once after each file completes body analysis (progress reporting).
55    pub on_file_done: Option<Arc<dyn Fn() + Send + Sync>>,
56    /// Override the session's configured PHP version for this run. `None`
57    /// uses the session's version.
58    pub php_version_override: Option<PhpVersion>,
59    /// Skip collecting per-expression [`crate::symbol::ResolvedSymbol`]s
60    /// into the [`AnalysisResult`]. Defaults to `false` (symbols collected)
61    /// so existing consumers — LSP servers using
62    /// [`AnalysisResult::symbol_at`] for hover/go-to-definition — are
63    /// unaffected. Diagnostics-only consumers (the CLI) opt out: a
64    /// Laravel-scale batch retains ~600k symbols nothing reads.
65    pub skip_symbols: bool,
66}
67
68impl BatchOptions {
69    pub fn new() -> Self {
70        Self::default()
71    }
72
73    pub fn with_suppressed<I, S>(mut self, kinds: I) -> Self
74    where
75        I: IntoIterator<Item = S>,
76        S: Into<String>,
77    {
78        self.suppressed_issue_kinds = kinds.into_iter().map(Into::into).collect();
79        self
80    }
81
82    pub fn with_progress_callback(mut self, callback: Arc<dyn Fn() + Send + Sync>) -> Self {
83        self.on_file_done = Some(callback);
84        self
85    }
86
87    pub fn with_php_version(mut self, version: PhpVersion) -> Self {
88        self.php_version_override = Some(version);
89        self
90    }
91
92    /// Don't collect per-expression symbols into the result (see
93    /// [`Self::skip_symbols`]). For diagnostics-only consumers;
94    /// [`AnalysisResult::symbol_at`] will find nothing on the batch result.
95    pub fn without_symbols(mut self) -> Self {
96        self.skip_symbols = true;
97        self
98    }
99
100    /// True iff at least one dead-code [`IssueKind`] would be emitted (i.e.
101    /// not all of them are suppressed).
102    fn should_run_dead_code(&self) -> bool {
103        dead_code_issue_kinds()
104            .iter()
105            .any(|k| !self.suppressed_issue_kinds.contains(*k))
106    }
107
108    /// Drop issues whose [`IssueKind::name()`] is listed in
109    /// [`Self::suppressed_issue_kinds`].
110    fn apply(&self, issues: &mut Vec<Issue>) {
111        if self.suppressed_issue_kinds.is_empty() {
112            return;
113        }
114        issues.retain(|i| !self.suppressed_issue_kinds.contains(i.kind.name()));
115    }
116}
117
118struct ParsedProjectFile {
119    file: Arc<str>,
120    source: Arc<str>,
121    parsed: php_rs_parser::ParseResult,
122}
123
124impl ParsedProjectFile {
125    fn new(file: Arc<str>, source: Arc<str>) -> Self {
126        let parsed = php_rs_parser::parse(source.as_ref());
127        Self {
128            file,
129            source,
130            parsed,
131        }
132    }
133
134    fn source(&self) -> &str {
135        self.source.as_ref()
136    }
137
138    fn source_map(&self) -> &php_rs_parser::source_map::SourceMap {
139        &self.parsed.source_map
140    }
141
142    fn errors(&self) -> &[php_rs_parser::diagnostics::ParseError] {
143        &self.parsed.errors
144    }
145
146    fn owned(&self) -> &php_ast::owned::Program {
147        &self.parsed.program
148    }
149}
150
151impl AnalysisSession {
152    /// Cumulative hit / miss counts on the persistent definition cache attached
153    /// to this session. `(0, 0)` when no cache is configured.
154    #[doc(hidden)]
155    pub fn stub_cache_stats(&self) -> (u64, u64) {
156        match self.db.stub_cache.as_deref() {
157            Some(c) => (c.hits(), c.misses()),
158            None => (0, 0),
159        }
160    }
161
162    fn batch_php_version(&self, opts: &BatchOptions) -> PhpVersion {
163        opts.php_version_override.unwrap_or(self.php_version)
164    }
165
166    /// Mark issues silenced by inline suppression comments
167    /// (`@mir-ignore`, `@psalm-suppress`, `@phpstan-ignore*`, …) as suppressed.
168    ///
169    /// Runs as a final post-filter over the merged issue list so it applies
170    /// uniformly to every emitting pass — body analysis, the collector, class
171    /// checks and dead-code detection — including diagnostics the per-statement
172    /// `@psalm-suppress` path in `stmt/mod.rs` structurally cannot reach.
173    ///
174    /// Issues are *marked* rather than dropped, mirroring that per-statement
175    /// path and the kind-level `mir.xml` suppress handler; every consumer (CLI,
176    /// WASM, the test harness) already skips [`Issue::suppressed`].
177    fn apply_inline_suppressions(&self, issues: &mut [Issue]) {
178        use crate::suppression::SuppressionMap;
179        if issues.iter().all(|i| i.suppressed) {
180            return;
181        }
182        let db = self.snapshot_db();
183        // One map per distinct file, built lazily; `None` once we know a file
184        // has no source registered or no suppression comments.
185        let mut cache: HashMap<Arc<str>, Option<SuppressionMap>> = HashMap::default();
186        for issue in issues.iter_mut() {
187            if issue.suppressed {
188                continue;
189            }
190            let map = cache.entry(issue.location.file.clone()).or_insert_with(|| {
191                db.lookup_source_file(&issue.location.file)
192                    .map(|sf| SuppressionMap::from_source(&sf.text(&db)))
193                    .filter(|m| !m.is_empty())
194            });
195            if let Some(map) = map.as_ref() {
196                if map.is_suppressed(issue.location.line, issue.kind.name(), issue.kind.code()) {
197                    issue.suppressed = true;
198                }
199            }
200        }
201    }
202
203    fn type_exists(&self, fqcn: &str) -> bool {
204        let db = self.snapshot_db();
205        crate::db::class_exists(&db, fqcn)
206    }
207
208    fn collect_and_ingest_source(
209        &self,
210        file: Arc<str>,
211        src: &str,
212        php_version: PhpVersion,
213    ) -> FileDefinitions {
214        self.db.collect_and_ingest_file(file, src, php_version)
215    }
216
217    /// Rebuild the workspace symbol index singleton from every registered source
218    /// file. Required in the batch path because `workspace_index` reads the
219    /// maintained singleton, and that singleton is built from vendor *before*
220    /// `analyze_paths` registers project files (and before `lazy_load_*` faults
221    /// in referenced classes). Without refreshing it, `find_class_like` /
222    /// `class_exists` miss every project and lazy-loaded class, yielding false
223    /// `UndefinedClass`. Cheap after the definition caches are warm (no parsing).
224    fn refresh_workspace_index(&self) {
225        let mut guard = self.db.salsa.write();
226        guard.rebuild_workspace_symbol_index();
227    }
228
229    /// Load the configured PHP version + built-in stubs + user stubs into
230    /// the shared db. Called by [`Self::analyze_paths`] and
231    /// [`Self::collect_definitions`].
232    fn load_batch_stubs(&self, php_version: PhpVersion) {
233        // Wire the PHP version into the db before any SourceFile inputs are
234        // registered — collect_file_definitions reads it for @since/@removed filtering.
235        {
236            let version_str = Arc::from(php_version.to_string().as_str());
237            self.db.salsa.write().set_php_version(version_str);
238        }
239
240        // Built-in stubs for the configured PHP version.
241        let paths: Vec<&'static str> = crate::stubs::stub_files().iter().map(|&(p, _)| p).collect();
242        self.db.ingest_stub_paths(&paths, php_version);
243
244        // User-configured stubs.
245        self.db
246            .ingest_user_stubs(&self.user_stub_files, &self.user_stub_dirs);
247
248        // Ensure a resolver is configured so pull-path lookups can map
249        // built-in FQCNs to the stub VFS paths registered above.
250        let mut guard = self.db.salsa.write();
251        if guard.current_resolver().is_none() {
252            let resolver: Arc<dyn crate::ClassResolver> = Arc::new(crate::StubClassResolver);
253            guard.set_resolver(Some(resolver));
254        }
255    }
256
257    /// Run the full batch analysis pipeline on a set of file paths.
258    pub fn analyze_paths(&self, paths: &[PathBuf], opts: &BatchOptions) -> AnalysisResult {
259        let php_version = self.batch_php_version(opts);
260        let mut all_issues = Vec::new();
261        let _t0 = std::time::Instant::now();
262
263        // ---- Load PHP built-in stubs (before definition collection so user code can override)
264        self.load_batch_stubs(php_version);
265        let _t_stubs = _t0.elapsed();
266
267        // ---- Read files in parallel ----------------------------------
268        let parsed_files: Vec<ParsedProjectFile> = paths
269            .par_iter()
270            .filter_map(|path| match std::fs::read_to_string(path) {
271                Ok(src) => {
272                    let file = Arc::from(path.to_string_lossy().as_ref());
273                    Some(ParsedProjectFile::new(file, Arc::from(src)))
274                }
275                Err(e) => {
276                    eprintln!("Cannot read {}: {}", path.display(), e);
277                    None
278                }
279            })
280            .collect();
281        let _t_read = _t0.elapsed();
282
283        let file_data: Vec<(Arc<str>, Arc<str>)> = parsed_files
284            .iter()
285            .map(|parsed| (parsed.file.clone(), parsed.source.clone()))
286            .collect();
287
288        // ---- Pre-analysis invalidation: evict dependents of changed/removed files
289        if let Some(cache) = &self.cache {
290            let mut invalidated: Vec<String> = file_data
291                .par_iter()
292                .filter_map(|(f, src)| {
293                    let h = hash_content(src.as_ref());
294                    if cache.get(f, &h).is_none() {
295                        Some(f.to_string())
296                    } else {
297                        None
298                    }
299                })
300                .collect();
301
302            // Files analyzed in a previous run but now gone from disk: their
303            // dependents hold stale results that still assume the deleted
304            // definitions exist. A file merely absent from this run's path set
305            // (but still on disk) is NOT a deletion — checking disk existence
306            // avoids evicting dependents during partial-path analysis.
307            let current: std::collections::HashSet<&str> =
308                file_data.iter().map(|(f, _)| f.as_ref()).collect();
309            let removed: Vec<String> = cache
310                .cached_files()
311                .into_iter()
312                .filter(|f| !current.contains(f.as_str()) && !std::path::Path::new(f).exists())
313                .collect();
314            for f in &removed {
315                cache.evict(f);
316            }
317            invalidated.extend(removed);
318
319            if !invalidated.is_empty() {
320                cache.evict_with_dependents(&invalidated);
321            }
322        }
323
324        // ---- Register Salsa source inputs for incremental follow-up calls ----
325        {
326            let mut guard = self.db.salsa.write();
327            for parsed in &parsed_files {
328                guard.upsert_source_file(parsed.file.clone(), parsed.source.clone());
329            }
330        }
331        let _t_salsa_reg = _t0.elapsed();
332
333        // ---- Definition collection from the already-parsed AST -------
334        // Returns (FileDefinitions, content_hash, has_hard_parse_errors) so we
335        // can prime the parse cache before the pre-warm loop below.
336        type Pass1Entry = (FileDefinitions, [u8; 32], bool);
337        let file_defs: Vec<Pass1Entry> = parsed_files
338            .par_iter()
339            .map(|parsed| {
340                let content_hash = hash_source(parsed.source());
341                let has_hard_parse_errors = parsed
342                    .errors()
343                    .iter()
344                    .any(crate::parser::is_hard_parse_error);
345                let mut all_issues: Vec<Issue> = parsed
346                    .errors()
347                    .iter()
348                    .map(|err| {
349                        crate::parser::parse_error_to_issue(
350                            err,
351                            &parsed.file,
352                            parsed.source(),
353                            parsed.source_map(),
354                        )
355                    })
356                    .collect();
357                let collector = crate::collector::DefinitionCollector::new_for_slice(
358                    parsed.file.clone(),
359                    parsed.source(),
360                    parsed.source_map(),
361                );
362                let (mut slice, collector_issues) = collector.collect_slice(parsed.owned());
363                all_issues.extend(collector_issues);
364                mir_codebase::storage::deduplicate_params_in_slice(&mut slice);
365                let defs = FileDefinitions {
366                    slice: Arc::new(slice),
367                    issues: Arc::new(all_issues),
368                };
369                (defs, content_hash, has_hard_parse_errors)
370            })
371            .collect();
372        let _t_collect_defs = _t0.elapsed();
373
374        // Prime the in-process parse cache so the pre-warm loop below avoids
375        // re-parsing every project file through collect_file_definitions.
376        {
377            let guard = self.db.salsa.read();
378            for (defs, hash, has_hard_parse_errors) in &file_defs {
379                if !*has_hard_parse_errors {
380                    guard.prime_parse_cache(*hash, Arc::clone(&defs.slice));
381                }
382            }
383        }
384
385        let mut files_with_parse_errors: HashSet<Arc<str>> = HashSet::default();
386        for (defs, _hash, _hard_err) in file_defs {
387            for issue in defs.issues.iter() {
388                if matches!(issue.kind, mir_issues::IssueKind::ParseError { .. })
389                    && issue.severity == mir_issues::Severity::Error
390                {
391                    files_with_parse_errors.insert(issue.location.file.clone());
392                }
393            }
394            all_issues.extend(Arc::unwrap_or_clone(defs.issues));
395        }
396        let _t_ingest = _t0.elapsed();
397
398        // ---- Pre-warm collect_file_definitions for project files -------------
399        {
400            let db_prewarm = {
401                let guard = self.db.salsa.read();
402                (**guard).clone()
403            };
404            let project_source_files: Vec<SourceFile> = {
405                let guard = self.db.salsa.read();
406                parsed_files
407                    .iter()
408                    .filter_map(|p| (**guard).lookup_source_file(&p.file))
409                    .collect()
410            };
411            project_source_files
412                .into_par_iter()
413                .for_each_with(db_prewarm, |db, sf| {
414                    let _ = collect_file_definitions(db as &dyn MirDatabase, sf);
415                });
416        }
417        let _t_prewarm_ms = (_t0.elapsed() - _t_ingest).as_secs_f64() * 1000.0;
418
419        // Fold the freshly-registered project files into the workspace symbol
420        // index singleton. The singleton may have been built from vendor before
421        // this run (CLI indexes vendor before analyze_paths); since adding files
422        // no longer nulls it, project classes would otherwise be invisible to
423        // find_class_like and reported as false UndefinedClass.
424        self.refresh_workspace_index();
425
426        // ---- Lazy-load unknown classes via PSR-4 ----------------------------
427        let _t_before_lazy = _t0.elapsed();
428        if let Some(psr4) = self.psr4.clone() {
429            self.lazy_load_missing_classes(psr4, php_version, &mut all_issues);
430        }
431        let _t_lazyload_ms = (_t0.elapsed() - _t_before_lazy).as_secs_f64() * 1000.0;
432
433        // ---- Class-level checks ---------------------------------------------
434        let analyzed_file_set: HashSet<Arc<str>> =
435            file_data.iter().map(|(f, _)| f.clone()).collect();
436        let _t_class_analyzer = std::time::Instant::now();
437        {
438            let class_db = {
439                let guard = self.db.salsa.read();
440                (**guard).clone()
441            };
442            let class_issues = crate::class::ClassAnalyzer::with_files(
443                &class_db,
444                analyzed_file_set.clone(),
445                &file_data,
446            )
447            .analyze_all();
448            all_issues.extend(class_issues);
449        }
450        let _t_class_analyzer_ms = _t_class_analyzer.elapsed().as_secs_f64() * 1000.0;
451
452        let _t_class_checks = _t0.elapsed();
453
454        let mut db_main = {
455            let guard = self.db.salsa.read();
456            (**guard).clone()
457        };
458        // All index mutation for the body pass is done (lazy_load_missing_classes
459        // + refresh ran above; lazy_load_from_body_issues runs *after* this pass
460        // on a separate db). Freeze the index on this ephemeral clone so each
461        // find_class_like borrows it instead of cloning the singleton's three
462        // Arcs per call — the per-worker `map_with` clone bumps the refcount once.
463        db_main.freeze_workspace_index();
464
465        // ---- Body analysis: function/method bodies in parallel --------------
466        type BodyResult = (
467            Arc<str>,
468            Vec<Issue>,
469            Vec<crate::symbol::ResolvedSymbol>,
470            Vec<RefLoc>,
471        );
472        let body_results: Vec<BodyResult> = parsed_files
473            .par_iter()
474            .filter(|parsed| !files_with_parse_errors.contains(&parsed.file))
475            .map_with(db_main, |db, parsed| {
476                let driver = BodyAnalyzer::new(&*db as &dyn MirDatabase, php_version);
477                let (issues, symbols) = if let Some(cache) = &self.cache {
478                    let h = hash_content(parsed.source());
479                    if let Some((cached_issues, ref_locs)) = cache.get(&parsed.file, &h) {
480                        // Cache replay: rebuild the file's complete reference
481                        // set straight from the cached tuples — no pending-
482                        // buffer detour.
483                        let locs: Vec<RefLoc> = ref_locs
484                            .iter()
485                            .map(|(symbol, line, col_start, col_end)| RefLoc {
486                                symbol_key: Arc::from(symbol.as_str()),
487                                file: parsed.file.clone(),
488                                line: *line,
489                                col_start: *col_start,
490                                col_end: *col_end,
491                            })
492                            .collect();
493                        return (parsed.file.clone(), cached_issues, Vec::new(), locs);
494                    }
495                    let (issues, symbols) = driver.analyze_bodies(
496                        parsed.owned(),
497                        parsed.file.clone(),
498                        parsed.source(),
499                        parsed.source_map(),
500                    );
501                    let pending = db.take_pending_ref_locs();
502                    let cache_locs = pending
503                        .iter()
504                        .map(|r| (r.symbol_key.to_string(), r.line, r.col_start, r.col_end))
505                        .collect();
506                    cache.put(&parsed.file, h, issues.clone(), cache_locs);
507                    if let Some(cb) = &opts.on_file_done {
508                        cb();
509                    }
510                    let symbols = if opts.skip_symbols {
511                        Vec::new()
512                    } else {
513                        symbols
514                    };
515                    return (parsed.file.clone(), issues, symbols, pending);
516                } else {
517                    driver.analyze_bodies(
518                        parsed.owned(),
519                        parsed.file.clone(),
520                        parsed.source(),
521                        parsed.source_map(),
522                    )
523                };
524                let pending = db.take_pending_ref_locs();
525                if let Some(cb) = &opts.on_file_done {
526                    cb();
527                }
528                // Drop the per-file symbol vec inside the worker when the
529                // consumer opted out — the orchestrator never accumulates.
530                let symbols = if opts.skip_symbols {
531                    Vec::new()
532                } else {
533                    symbols
534                };
535                (parsed.file.clone(), issues, symbols, pending)
536            })
537            .collect();
538
539        let _t_body_analysis = _t0.elapsed();
540
541        // Serial commit with replace semantics: each file's output (or cache
542        // replay) is its complete reference set, so stale entries from a
543        // prior run cannot survive an append.
544        let mut all_symbols = Vec::new();
545        {
546            let guard = self.db.salsa.read();
547            for (file, issues, symbols, ref_locs) in body_results {
548                all_issues.extend(issues);
549                all_symbols.extend(symbols);
550                guard.set_file_reference_locations(file.as_ref(), ref_locs);
551            }
552        }
553
554        // ---- Post-analysis lazy loading: FQCNs used without `use` imports ------
555        if let Some(psr4) = self.psr4.clone() {
556            self.lazy_load_from_body_issues(
557                psr4,
558                php_version,
559                &file_data,
560                &files_with_parse_errors,
561                &mut all_issues,
562                &mut all_symbols,
563                opts.skip_symbols,
564            );
565        }
566
567        // ---- Build reverse dep graph and persist it for the next run ---------
568        // Must run AFTER `commit_reference_locations_batch` (above): the graph's
569        // call-site / instantiation / inferred-return edges are derived from the
570        // committed reference-location map. Built any earlier (the salsa db is
571        // fresh each session) that map is empty, so only structural edges
572        // (parent/interface/trait/declared types) survive — and any dependent
573        // reachable only through a call site or inferred type goes stale.
574        if let Some(cache) = &self.cache {
575            let db_snapshot = {
576                let guard = self.db.salsa.read();
577                (**guard).clone()
578            };
579            let rev = build_reverse_deps(&db_snapshot);
580            cache.set_reverse_deps(rev);
581        }
582
583        // Persist cache hits/misses to disk
584        if let Some(cache) = &self.cache {
585            cache.flush();
586        }
587
588        // ---- Dead-code detection -------------------------------------------
589        if opts.should_run_dead_code() {
590            let salsa = self.snapshot_db();
591            let _t_dead_code = std::time::Instant::now();
592            let dead_code_issues =
593                crate::dead_code::DeadCodeAnalyzer::with_files(&salsa, analyzed_file_set.clone())
594                    .analyze();
595            all_issues.extend(dead_code_issues);
596            if std::env::var("MIR_TIMING").is_ok() {
597                eprintln!(
598                    "[timing] dead_code_analyzer={:.0}ms",
599                    _t_dead_code.elapsed().as_secs_f64() * 1000.0
600                );
601            }
602        }
603
604        let _t_total = _t0.elapsed();
605        if std::env::var("MIR_TIMING").is_ok() {
606            eprintln!(
607                "[timing] stubs={:.0}ms read={:.0}ms salsa_reg={:.0}ms collect_defs={:.0}ms ingest={:.0}ms class_checks={:.0}ms (prewarm={:.0}ms lazy_load={:.0}ms class_analyzer={:.0}ms) body_analysis={:.0}ms total={:.0}ms",
608                _t_stubs.as_secs_f64() * 1000.0,
609                (_t_read - _t_stubs).as_secs_f64() * 1000.0,
610                (_t_salsa_reg - _t_read).as_secs_f64() * 1000.0,
611                (_t_collect_defs - _t_salsa_reg).as_secs_f64() * 1000.0,
612                (_t_ingest - _t_collect_defs).as_secs_f64() * 1000.0,
613                (_t_class_checks - _t_ingest).as_secs_f64() * 1000.0,
614                _t_prewarm_ms,
615                _t_lazyload_ms,
616                _t_class_analyzer_ms,
617                (_t_body_analysis - _t_class_checks).as_secs_f64() * 1000.0,
618                _t_total.as_secs_f64() * 1000.0,
619            );
620        }
621
622        opts.apply(&mut all_issues);
623        self.apply_inline_suppressions(&mut all_issues);
624        if let Some(dump) = crate::metrics::dump() {
625            eprintln!("{dump}");
626        }
627
628        // ---- Build workspace symbol index singleton -------------------------
629        {
630            let mut guard = self.db.salsa.write();
631            guard.rebuild_workspace_symbol_index();
632        }
633
634        AnalysisResult::build(all_issues, rustc_hash::FxHashMap::default(), all_symbols)
635    }
636
637    fn lazy_load_missing_classes(
638        &self,
639        psr4: Arc<crate::composer::Psr4Map>,
640        php_version: PhpVersion,
641        all_issues: &mut Vec<Issue>,
642    ) {
643        let max_depth = 10;
644        let mut loaded: HashSet<String> = HashSet::default();
645        let mut scanned: HashSet<Arc<str>> = HashSet::default();
646
647        for _ in 0..max_depth {
648            let mut to_load: Vec<(String, PathBuf)> = Vec::new();
649
650            let mut try_queue = |fqcn: &str| {
651                if !self.type_exists(fqcn) && !loaded.contains(fqcn) {
652                    if let Some(path) = psr4.resolve(fqcn) {
653                        to_load.push((fqcn.to_string(), path));
654                    }
655                }
656            };
657
658            let mut candidates: Vec<String> = Vec::new();
659            let import_candidates = {
660                let db_owned = self.snapshot_db();
661                let db = &db_owned;
662                for fqcn in crate::db::workspace_classes(db).iter() {
663                    if scanned.contains(fqcn.as_ref()) {
664                        continue;
665                    }
666                    let here = crate::db::Fqcn::from_str(db, fqcn.as_ref());
667                    let Some(class) = crate::db::find_class_like(db, here) else {
668                        continue;
669                    };
670                    scanned.insert(fqcn.clone());
671                    collect_class_referenced_fqcns(&class, &mut candidates);
672                }
673                db.file_import_snapshots()
674                    .into_iter()
675                    .flat_map(|(_, imports)| {
676                        imports
677                            .values()
678                            .map(|sym| sym.as_str().to_string())
679                            .collect::<Vec<_>>()
680                    })
681                    .collect::<Vec<_>>()
682            };
683            for fqcn in candidates {
684                try_queue(&fqcn);
685            }
686            for fqcn in import_candidates {
687                try_queue(&fqcn);
688            }
689
690            if to_load.is_empty() {
691                break;
692            }
693
694            // Mark everything queued as loaded up-front so a file that fails to
695            // read isn't retried on the next depth iteration (matches the serial
696            // behaviour, where `loaded.insert` ran before the read attempt).
697            for (fqcn, _) in &to_load {
698                loaded.insert(fqcn.clone());
699            }
700
701            // Read + parse + ingest the missing classes in parallel. The parse
702            // and definition walk inside `collect_and_ingest_source` already run
703            // off the salsa write lock (it takes the lock only for the brief
704            // input upsert), so fanning the per-file work across the rayon pool
705            // turns this previously-serial phase — the dominant cost on the lazy
706            // path — concurrent. `collect()` on a rayon map preserves input
707            // order, so the resulting issue ordering matches the serial version.
708            let per_file_issues: Vec<Vec<Issue>> = to_load
709                .par_iter()
710                .map(|(_, path)| -> Vec<Issue> {
711                    let Ok(src) = std::fs::read_to_string(path) else {
712                        return Vec::new();
713                    };
714                    let file: Arc<str> = Arc::from(path.to_string_lossy().as_ref());
715                    let is_vendor = file.contains("/vendor/") || file.contains("\\vendor\\");
716                    let defs = self.collect_and_ingest_source(file, &src, php_version);
717                    if is_vendor {
718                        Vec::new()
719                    } else {
720                        Arc::unwrap_or_clone(defs.issues)
721                    }
722                })
723                .collect();
724            for mut issues in per_file_issues {
725                all_issues.append(&mut issues);
726            }
727
728            // Make the just-loaded classes visible to the next iteration's
729            // transitive scan and to the caller's post-lazy-load snapshot.
730            self.refresh_workspace_index();
731        }
732    }
733
734    #[allow(clippy::too_many_arguments)]
735    fn lazy_load_from_body_issues(
736        &self,
737        psr4: Arc<crate::composer::Psr4Map>,
738        php_version: PhpVersion,
739        file_data: &[(Arc<str>, Arc<str>)],
740        files_with_parse_errors: &HashSet<Arc<str>>,
741        all_issues: &mut Vec<Issue>,
742        all_symbols: &mut Vec<crate::symbol::ResolvedSymbol>,
743        skip_symbols: bool,
744    ) {
745        use mir_issues::IssueKind;
746
747        let max_depth = 5;
748        let mut loaded: HashSet<String> = HashSet::default();
749
750        for _ in 0..max_depth {
751            let mut to_load: HashMap<String, PathBuf> = HashMap::default();
752
753            for issue in all_issues.iter() {
754                if let IssueKind::UndefinedClass { name } = &issue.kind {
755                    if !self.type_exists(name) && !loaded.contains(name) {
756                        if let Some(path) = psr4.resolve(name) {
757                            to_load.entry(name.clone()).or_insert(path);
758                        }
759                    }
760                }
761            }
762
763            if to_load.is_empty() {
764                break;
765            }
766
767            loaded.extend(to_load.keys().cloned());
768
769            for path in to_load.values() {
770                if let Ok(src) = std::fs::read_to_string(path) {
771                    let file: Arc<str> = Arc::from(path.to_string_lossy().as_ref());
772                    let _ = self.collect_and_ingest_source(file, &src, php_version);
773                }
774            }
775
776            // Make the loaded classes visible to the type_exists() check below
777            // (and to the reanalysis snapshot) so resolved files are detected.
778            self.refresh_workspace_index();
779
780            self.lazy_load_missing_classes(psr4.clone(), php_version, all_issues);
781
782            let files_to_reanalyze: HashSet<Arc<str>> = all_issues
783                .iter()
784                .filter_map(|i| {
785                    if let IssueKind::UndefinedClass { name } = &i.kind {
786                        if self.type_exists(name) {
787                            return Some(i.location.file.clone());
788                        }
789                    }
790                    None
791                })
792                .collect();
793
794            if files_to_reanalyze.is_empty() {
795                break;
796            }
797
798            all_issues.retain(|i| !files_to_reanalyze.contains(&i.location.file));
799            all_symbols.retain(|s| !files_to_reanalyze.contains(&s.file));
800
801            let db_full = {
802                let guard = self.db.salsa.read();
803                (**guard).clone()
804            };
805
806            let reanalysis: Vec<(Vec<Issue>, Vec<crate::symbol::ResolvedSymbol>, Vec<RefLoc>)> =
807                file_data
808                    .par_iter()
809                    .filter(|(f, _)| {
810                        !files_with_parse_errors.contains(f) && files_to_reanalyze.contains(f)
811                    })
812                    .map_with(db_full, |db, (file, src)| {
813                        let driver = BodyAnalyzer::new(&*db as &dyn MirDatabase, php_version);
814                        let parsed = php_rs_parser::parse(src);
815                        let (issues, symbols) = driver.analyze_bodies(
816                            &parsed.program,
817                            file.clone(),
818                            src,
819                            &parsed.source_map,
820                        );
821                        let pending = db.take_pending_ref_locs();
822                        (issues, symbols, pending)
823                    })
824                    .collect();
825
826            let mut reanalysis_ref_locs: Vec<RefLoc> = Vec::new();
827            for (issues, symbols, ref_locs) in reanalysis {
828                all_issues.extend(issues);
829                if !skip_symbols {
830                    all_symbols.extend(symbols);
831                }
832                reanalysis_ref_locs.extend(ref_locs);
833            }
834            {
835                let guard = self.db.salsa.read();
836                guard.commit_reference_locations_batch(reanalysis_ref_locs);
837            }
838        }
839    }
840
841    /// Re-analyze a single file (definition collection + body analysis) within the batch context.
842    ///
843    /// Mirrors the old `ProjectAnalyzer::re_analyze_file` cache-aware path.
844    /// Use [`Self::reanalyze_dependents`] for LSP-style per-file flows that
845    /// don't need batch options.
846    pub fn re_analyze_file(
847        &self,
848        file_path: &str,
849        new_content: &str,
850        opts: &BatchOptions,
851    ) -> AnalysisResult {
852        let php_version = self.batch_php_version(opts);
853
854        // Fast path: content unchanged and cache has a valid entry.
855        if let Some(cache) = &self.cache {
856            let h = hash_content(new_content);
857            if let Some((mut issues, ref_locs)) = cache.get(file_path, &h) {
858                let file: Arc<str> = Arc::from(file_path);
859                // Replace semantics: the cached set is the file's complete
860                // reference set, so stale entries from a prior version are
861                // cleared rather than appended over.
862                let locs: Vec<RefLoc> = ref_locs
863                    .iter()
864                    .map(|(symbol, line, col_start, col_end)| RefLoc {
865                        symbol_key: Arc::from(symbol.as_str()),
866                        file: file.clone(),
867                        line: *line,
868                        col_start: *col_start,
869                        col_end: *col_end,
870                    })
871                    .collect();
872                let guard = self.db.salsa.read();
873                guard.set_file_reference_locations(file_path, locs);
874                drop(guard);
875                opts.apply(&mut issues);
876                self.apply_inline_suppressions(&mut issues);
877                return AnalysisResult::build(issues, HashMap::default(), Vec::new());
878            }
879        }
880
881        let file: Arc<str> = Arc::from(file_path);
882
883        {
884            let mut guard = self.db.salsa.write();
885            guard.remove_file_definitions(file_path);
886        }
887
888        let file_defs = {
889            let mut guard = self.db.salsa.write();
890            let salsa_file = guard.upsert_source_file(file.clone(), Arc::from(new_content));
891            collect_file_definitions(&**guard, salsa_file)
892        };
893
894        let mut all_issues: Vec<Issue> = Arc::unwrap_or_clone(file_defs.issues.clone());
895
896        {
897            let mut guard = self.db.salsa.write();
898            if guard.workspace_symbol_index_singleton().is_some() {
899                if let Some(sf) = guard.lookup_source_file(file.as_ref()) {
900                    if guard.file_declarations_changed(sf) {
901                        guard.rebuild_workspace_symbol_index();
902                    }
903                }
904            }
905        }
906
907        let symbols = {
908            let guard = self.db.salsa.write();
909
910            let parsed = php_rs_parser::parse(new_content);
911
912            let has_hard_errors = parsed.errors.iter().any(crate::parser::is_hard_parse_error);
913            if !has_hard_errors {
914                let db_ref: &dyn MirDatabase = &**guard;
915                let driver = BodyAnalyzer::new(db_ref, php_version);
916                let (body_issues, symbols) = driver.analyze_bodies(
917                    &parsed.program,
918                    file.clone(),
919                    new_content,
920                    &parsed.source_map,
921                );
922                all_issues.extend(body_issues);
923                let pending = guard.take_pending_ref_locs();
924                guard.set_file_reference_locations(file.as_ref(), pending);
925                symbols
926            } else {
927                Vec::new()
928            }
929        };
930
931        // Bake inline-suppression marks in *before* caching: suppression is a
932        // pure function of file content (and the cache key hashes content), so
933        // the cached issues should already carry their marks. The cache-hit
934        // branch above replays this file's source without re-registering the
935        // `SourceFile` input, so the db-backed post-filter cannot recompute
936        // marks there — caching the canonical result is what keeps a fresh
937        // process honoring `@mir-ignore` on an unchanged file.
938        mark_suppressed(
939            &mut all_issues,
940            &crate::suppression::SuppressionMap::from_source(new_content),
941        );
942
943        if let Some(cache) = &self.cache {
944            let h = hash_content(new_content);
945            cache.evict_with_dependents(&[file_path.to_string()]);
946            let db = self.snapshot_db();
947            let ref_locs = extract_reference_locations(&db, &file);
948            cache.put(file_path, h, all_issues.clone(), ref_locs);
949        }
950
951        opts.apply(&mut all_issues);
952        AnalysisResult::build(all_issues, HashMap::default(), symbols)
953    }
954
955    /// Collect type definitions only from `paths` into the codebase
956    /// without analyzing method bodies or emitting issues. Used to load
957    /// vendor types.
958    ///
959    /// When a disk-backed cache is attached, per-file `StubSlice` results
960    /// from previous runs are reused on a content-hash match, eliminating
961    /// the parse + definition-collection step. Cache misses run the normal
962    /// pipeline and write back so subsequent runs hit.
963    pub fn collect_definitions(&self, paths: &[PathBuf]) {
964        let _timing = std::env::var("MIR_TIMING").is_ok();
965        let _t0 = std::time::Instant::now();
966
967        let php_v = self.php_version.cache_byte();
968
969        struct FileEntry {
970            file: Arc<str>,
971            src: Arc<str>,
972            hash: [u8; 32],
973            cached: Option<mir_codebase::storage::StubSlice>,
974        }
975        let entries: Vec<FileEntry> = paths
976            .par_iter()
977            .filter_map(|path| {
978                let src = std::fs::read_to_string(path).ok()?;
979                let file: Arc<str> = Arc::from(path.to_string_lossy().as_ref());
980                let src: Arc<str> = Arc::from(src);
981                let hash = hash_source(&src);
982                let cached = self.db.stub_cache.as_ref().and_then(|c| {
983                    let mut slice = c.get(&file, &hash, php_v)?;
984                    prepare_for_ingest(&mut slice);
985                    Some(slice)
986                });
987                Some(FileEntry {
988                    file,
989                    src,
990                    hash,
991                    cached,
992                })
993            })
994            .collect();
995        let _t_read = _t0.elapsed();
996
997        let source_files: Vec<SourceFile> = {
998            let mut guard = self.db.salsa.write();
999            entries
1000                .iter()
1001                .map(|e| {
1002                    guard.upsert_source_file_with_durability(
1003                        e.file.clone(),
1004                        e.src.clone(),
1005                        salsa::Durability::HIGH,
1006                    )
1007                })
1008                .collect()
1009        };
1010        let _t_reg = _t0.elapsed();
1011
1012        let db_pass1 = {
1013            let guard = self.db.salsa.read();
1014            (**guard).clone()
1015        };
1016        let stub_cache = self.db.stub_cache.clone();
1017        let prepared: Vec<mir_codebase::storage::StubSlice> = entries
1018            .into_par_iter()
1019            .zip(source_files.into_par_iter())
1020            .map_with(db_pass1, |db, (mut entry, salsa_file)| {
1021                if let Some(slice) = entry.cached.take() {
1022                    let slice_arc = Arc::new(slice);
1023                    db.parse_cache().insert(entry.hash, Arc::clone(&slice_arc));
1024                    return (*slice_arc).clone();
1025                }
1026                let defs = collect_file_definitions(&*db, salsa_file);
1027                if let Some(cache) = stub_cache.as_ref() {
1028                    cache.put(&entry.file, &entry.hash, php_v, &defs.slice);
1029                }
1030                (*defs.slice).clone()
1031            })
1032            .collect();
1033        let _t_collect = _t0.elapsed();
1034        drop(prepared);
1035        let _t_ingest = _t0.elapsed();
1036
1037        if _timing {
1038            let (hits, misses) = self.stub_cache_stats();
1039            eprintln!(
1040                "[vendor] read={:.0}ms reg={:.0}ms collect={:.0}ms ingest={:.0}ms total={:.0}ms (cache hits={hits} misses={misses})",
1041                _t_read.as_secs_f64() * 1000.0,
1042                (_t_reg - _t_read).as_secs_f64() * 1000.0,
1043                (_t_collect - _t_reg).as_secs_f64() * 1000.0,
1044                (_t_ingest - _t_collect).as_secs_f64() * 1000.0,
1045                _t_ingest.as_secs_f64() * 1000.0,
1046            );
1047        }
1048
1049        {
1050            let mut guard = self.db.salsa.write();
1051            guard.rebuild_workspace_symbol_index();
1052        }
1053
1054        crate::collector::print_collector_stats();
1055    }
1056}
1057
1058/// Analyze a PHP source string without a real file path. Useful for tests
1059/// and single-file LSP mode. Allocates a throwaway db; doesn't touch any
1060/// existing session.
1061pub fn analyze_source(source: &str) -> AnalysisResult {
1062    let php_version = PhpVersion::LATEST;
1063    let file: Arc<str> = Arc::from("<source>");
1064    let mut db = MirDbStorage::default();
1065    db.set_php_version(Arc::from(php_version.to_string().as_str()));
1066    crate::stubs::load_stubs_for_version(&mut db, php_version);
1067    let salsa_file = SourceFile::new(&db, file.clone(), Arc::from(source));
1068    let file_defs = collect_file_definitions(&db, salsa_file);
1069    let suppressions = crate::suppression::SuppressionMap::from_source(source);
1070    let mut all_issues = Arc::unwrap_or_clone(file_defs.issues);
1071    if all_issues.iter().any(|issue| {
1072        matches!(issue.kind, mir_issues::IssueKind::ParseError { .. })
1073            && issue.severity == mir_issues::Severity::Error
1074    }) {
1075        mark_suppressed(&mut all_issues, &suppressions);
1076        return AnalysisResult::build(all_issues, rustc_hash::FxHashMap::default(), Vec::new());
1077    }
1078    let mut type_envs = rustc_hash::FxHashMap::default();
1079    let mut all_symbols = Vec::new();
1080    let result = php_rs_parser::parse(source);
1081
1082    let driver = BodyAnalyzer::new(&db, php_version);
1083    all_issues.extend(driver.analyze_bodies_typed(
1084        &result.program,
1085        file.clone(),
1086        source,
1087        &result.source_map,
1088        &mut type_envs,
1089        &mut all_symbols,
1090    ));
1091    mark_suppressed(&mut all_issues, &suppressions);
1092    AnalysisResult::build(all_issues, type_envs, all_symbols)
1093}
1094
1095/// Mark issues silenced by a single file's [`SuppressionMap`]. Shared by the
1096/// in-memory [`analyze_source`] entry point, which has the source in hand and
1097/// does not go through the db-backed batch post-filter.
1098fn mark_suppressed(issues: &mut [Issue], suppressions: &crate::suppression::SuppressionMap) {
1099    if suppressions.is_empty() {
1100        return;
1101    }
1102    for issue in issues.iter_mut() {
1103        if !issue.suppressed
1104            && suppressions.is_suppressed(issue.location.line, issue.kind.name(), issue.kind.code())
1105        {
1106            issue.suppressed = true;
1107        }
1108    }
1109}
1110
1111/// Discover all `.php` files under a directory, recursively.
1112pub fn discover_files(root: &Path) -> Vec<PathBuf> {
1113    if root.is_file() {
1114        return vec![root.to_path_buf()];
1115    }
1116    let mut files = Vec::new();
1117    collect_php_files(root, &mut files);
1118    files
1119}
1120
1121pub(crate) fn collect_php_files(dir: &Path, out: &mut Vec<PathBuf>) {
1122    if let Ok(entries) = std::fs::read_dir(dir) {
1123        for entry in entries.flatten() {
1124            if entry.file_type().map(|ft| ft.is_symlink()).unwrap_or(false) {
1125                continue;
1126            }
1127            let path = entry.path();
1128            if path.is_dir() {
1129                let name = path.file_name().and_then(|n| n.to_str()).unwrap_or("");
1130                if matches!(
1131                    name,
1132                    "vendor" | ".git" | "node_modules" | ".cache" | ".pnpm-store"
1133                ) {
1134                    continue;
1135                }
1136                collect_php_files(&path, out);
1137            } else if path.extension().and_then(|e| e.to_str()) == Some("php") {
1138                out.push(path);
1139            }
1140        }
1141    }
1142}
1143
1144// ---------------------------------------------------------------------------
1145// FQCN reference walk — collects every class-name reference reachable from a
1146// ClassLike's signature surface. Used by lazy_load_missing_classes to chase
1147// transitive vendor types.
1148// ---------------------------------------------------------------------------
1149
1150pub(crate) fn collect_class_referenced_fqcns(class: &crate::db::ClassLike, out: &mut Vec<String>) {
1151    if let Some(p) = class.parent() {
1152        out.push(p.to_string());
1153    }
1154    for i in class.interfaces() {
1155        out.push(i.to_string());
1156    }
1157    for e in class.extends() {
1158        out.push(e.to_string());
1159    }
1160    for t in class.class_traits() {
1161        out.push(t.to_string());
1162    }
1163    for m in class.mixins() {
1164        out.push(m.to_string());
1165    }
1166    for u in class.extends_type_args() {
1167        collect_fqcns_in_union(u, out);
1168    }
1169    for (iface, args) in class.implements_type_args() {
1170        out.push(iface.to_string());
1171        for u in args {
1172            collect_fqcns_in_union(u, out);
1173        }
1174    }
1175    for (_, m) in class.own_methods().iter() {
1176        for p in m.params.iter() {
1177            if let Some(t) = &p.ty {
1178                collect_fqcns_in_union(t, out);
1179            }
1180        }
1181        if let Some(t) = &m.return_type {
1182            collect_fqcns_in_union(t, out);
1183        }
1184        for thrown in m.throws.iter() {
1185            out.push(thrown.to_string());
1186        }
1187    }
1188    if let Some(props) = class.own_properties() {
1189        for (_, p) in props.iter() {
1190            if let Some(t) = &p.ty {
1191                collect_fqcns_in_union(t, out);
1192            }
1193        }
1194    }
1195    for (_, c) in class.own_constants().iter() {
1196        collect_fqcns_in_union(&c.ty, out);
1197    }
1198}
1199
1200pub(crate) fn collect_fqcns_in_union(u: &Type, out: &mut Vec<String>) {
1201    for atom in u.types.iter() {
1202        collect_fqcns_in_atomic(atom, out);
1203    }
1204}
1205
1206fn collect_fqcns_in_simple(t: &mir_types::compact::SimpleType, out: &mut Vec<String>) {
1207    if let mir_types::compact::SimpleType::Complex(u) = t {
1208        collect_fqcns_in_union(u, out);
1209    }
1210}
1211
1212pub(crate) fn collect_fqcns_in_atomic(a: &Atomic, out: &mut Vec<String>) {
1213    match a {
1214        Atomic::TNamedObject { fqcn, type_params } => {
1215            out.push(fqcn.to_string());
1216            for tp in type_params.iter() {
1217                collect_fqcns_in_union(tp, out);
1218            }
1219        }
1220        Atomic::TStaticObject { fqcn } | Atomic::TSelf { fqcn } | Atomic::TParent { fqcn } => {
1221            out.push(fqcn.to_string());
1222        }
1223        Atomic::TLiteralEnumCase { enum_fqcn, .. } => {
1224            out.push(enum_fqcn.to_string());
1225        }
1226        Atomic::TClassString(Some(s)) => {
1227            out.push(s.to_string());
1228        }
1229        Atomic::TArray { key, value } | Atomic::TNonEmptyArray { key, value } => {
1230            collect_fqcns_in_union(key, out);
1231            collect_fqcns_in_union(value, out);
1232        }
1233        Atomic::TList { value } | Atomic::TNonEmptyList { value } => {
1234            collect_fqcns_in_union(value, out);
1235        }
1236        Atomic::TKeyedArray { properties, .. } => {
1237            for (_, kp) in properties.iter() {
1238                collect_fqcns_in_union(&kp.ty, out);
1239            }
1240        }
1241        Atomic::TClosure {
1242            params,
1243            return_type,
1244            this_type,
1245        } => {
1246            for p in params {
1247                if let Some(t) = &p.ty {
1248                    collect_fqcns_in_simple(t, out);
1249                }
1250            }
1251            collect_fqcns_in_union(return_type, out);
1252            if let Some(t) = this_type {
1253                collect_fqcns_in_union(t, out);
1254            }
1255        }
1256        Atomic::TCallable {
1257            params,
1258            return_type,
1259        } => {
1260            if let Some(ps) = params {
1261                for p in ps {
1262                    if let Some(t) = &p.ty {
1263                        collect_fqcns_in_simple(t, out);
1264                    }
1265                }
1266            }
1267            if let Some(rt) = return_type {
1268                collect_fqcns_in_union(rt, out);
1269            }
1270        }
1271        Atomic::TIntersection { parts } => {
1272            for p in parts.iter() {
1273                collect_fqcns_in_union(p, out);
1274            }
1275        }
1276        Atomic::TConditional {
1277            param_name: _,
1278            subject,
1279            if_true,
1280            if_false,
1281        } => {
1282            collect_fqcns_in_union(subject, out);
1283            collect_fqcns_in_union(if_true, out);
1284            collect_fqcns_in_union(if_false, out);
1285        }
1286        Atomic::TTemplateParam { as_type, .. } => {
1287            collect_fqcns_in_union(as_type, out);
1288        }
1289        _ => {}
1290    }
1291}
1292
1293fn build_reverse_deps(db: &dyn crate::db::MirDatabase) -> HashMap<String, HashSet<String>> {
1294    let mut reverse: HashMap<String, HashSet<String>> = HashMap::default();
1295
1296    let mut add_edge = |symbol: &str, dependent_file: &str| {
1297        if let Some(defining_file) = db.symbol_defining_file(symbol) {
1298            let def = defining_file.as_ref().to_string();
1299            if def != dependent_file {
1300                reverse
1301                    .entry(def)
1302                    .or_default()
1303                    .insert(dependent_file.to_string());
1304            }
1305        }
1306    };
1307
1308    for (file, imports) in db.file_import_snapshots() {
1309        let file = file.as_ref().to_string();
1310        for fqcn in imports.values() {
1311            add_edge(fqcn.as_str(), &file);
1312        }
1313    }
1314
1315    let extract_named_objects = |union: &mir_types::Type| {
1316        union
1317            .types
1318            .iter()
1319            .filter_map(|atomic| match atomic {
1320                mir_types::atomic::Atomic::TNamedObject { fqcn, .. } => Some(*fqcn),
1321                _ => None,
1322            })
1323            .collect::<Vec<_>>()
1324    };
1325
1326    for fqcn in crate::db::workspace_classes(db).iter() {
1327        let here = crate::db::Fqcn::from_str(db, fqcn.as_ref());
1328        let Some(class) = crate::db::find_class_like(db, here) else {
1329            continue;
1330        };
1331        if class.is_interface() || class.is_trait() || class.is_enum() {
1332            continue;
1333        }
1334        let Some(file) = db
1335            .symbol_defining_file(fqcn.as_ref())
1336            .map(|f| f.as_ref().to_string())
1337            .or_else(|| class.location().map(|l| l.file.as_ref().to_string()))
1338        else {
1339            continue;
1340        };
1341
1342        if let Some(parent) = class.parent() {
1343            add_edge(parent.as_ref(), &file);
1344        }
1345        for iface in class.interfaces().iter() {
1346            add_edge(iface.as_ref(), &file);
1347        }
1348        for tr in class.class_traits().iter() {
1349            add_edge(tr.as_ref(), &file);
1350        }
1351        if let Some(props) = class.own_properties() {
1352            for (_, p) in props.iter() {
1353                if let Some(ty) = &p.ty {
1354                    for named in extract_named_objects(ty) {
1355                        add_edge(named.as_ref(), &file);
1356                    }
1357                }
1358            }
1359        }
1360        for (_, method) in class.own_methods().iter() {
1361            for param in method.params.iter() {
1362                if let Some(ty) = &param.ty {
1363                    for named in extract_named_objects(ty.as_ref()) {
1364                        add_edge(named.as_ref(), &file);
1365                    }
1366                }
1367            }
1368            if let Some(rt) = method.return_type.as_deref() {
1369                for named in extract_named_objects(rt) {
1370                    add_edge(named.as_ref(), &file);
1371                }
1372            }
1373        }
1374    }
1375
1376    for fqn in crate::db::workspace_functions(db).iter() {
1377        let here = crate::db::Fqcn::from_str(db, fqn.as_ref());
1378        let Some(f) = crate::db::find_function(db, here) else {
1379            continue;
1380        };
1381        let Some(file) = db
1382            .symbol_defining_file(fqn.as_ref())
1383            .map(|f| f.as_ref().to_string())
1384            .or_else(|| f.location.as_ref().map(|l| l.file.as_ref().to_string()))
1385        else {
1386            continue;
1387        };
1388
1389        for param in f.params.iter() {
1390            if let Some(ty) = &param.ty {
1391                for named in extract_named_objects(ty.as_ref()) {
1392                    add_edge(named.as_ref(), &file);
1393                }
1394            }
1395        }
1396        if let Some(rt) = f.return_type.as_deref() {
1397            for named in extract_named_objects(rt) {
1398                add_edge(named.as_ref(), &file);
1399            }
1400        }
1401    }
1402
1403    for (ref_file, symbol_key) in db.all_reference_location_pairs() {
1404        let file_str = ref_file.as_ref().to_string();
1405        let lookup: &str = match symbol_key.split_once("::") {
1406            Some((class, _)) => class,
1407            None => &symbol_key,
1408        };
1409        add_edge(lookup, &file_str);
1410    }
1411
1412    reverse
1413}
1414
1415fn extract_reference_locations(
1416    db: &dyn crate::db::MirDatabase,
1417    file: &Arc<str>,
1418) -> Vec<(String, u32, u16, u16)> {
1419    db.extract_file_reference_locations(file.as_ref())
1420        .into_iter()
1421        .map(|(sym, line, col_start, col_end)| (sym.to_string(), line, col_start, col_end))
1422        .collect()
1423}
1424
1425pub struct AnalysisResult {
1426    pub issues: Vec<Issue>,
1427    #[doc(hidden)]
1428    pub type_envs: rustc_hash::FxHashMap<crate::type_env::ScopeId, crate::type_env::TypeEnv>,
1429    /// Per-expression resolved symbols from body analysis, sorted by file path.
1430    pub symbols: Vec<crate::symbol::ResolvedSymbol>,
1431    /// Maps each file path to the contiguous range within `symbols` that
1432    /// belongs to it.
1433    symbols_by_file: HashMap<Arc<str>, std::ops::Range<usize>>,
1434}
1435
1436impl AnalysisResult {
1437    fn build(
1438        issues: Vec<Issue>,
1439        type_envs: rustc_hash::FxHashMap<crate::type_env::ScopeId, crate::type_env::TypeEnv>,
1440        mut symbols: Vec<crate::symbol::ResolvedSymbol>,
1441    ) -> Self {
1442        symbols.sort_unstable_by(|a, b| a.file.as_ref().cmp(b.file.as_ref()));
1443        let mut symbols_by_file: HashMap<Arc<str>, std::ops::Range<usize>> = HashMap::default();
1444        let mut i = 0;
1445        while i < symbols.len() {
1446            let file = Arc::clone(&symbols[i].file);
1447            let start = i;
1448            while i < symbols.len() && symbols[i].file == file {
1449                i += 1;
1450            }
1451            symbols_by_file.insert(file, start..i);
1452        }
1453        Self {
1454            issues,
1455            type_envs,
1456            symbols,
1457            symbols_by_file,
1458        }
1459    }
1460
1461    pub fn error_count(&self) -> usize {
1462        self.issues
1463            .iter()
1464            .filter(|i| i.severity == mir_issues::Severity::Error)
1465            .count()
1466    }
1467
1468    pub fn warning_count(&self) -> usize {
1469        self.issues
1470            .iter()
1471            .filter(|i| i.severity == mir_issues::Severity::Warning)
1472            .count()
1473    }
1474
1475    pub fn issues_by_file(&self) -> HashMap<Arc<str>, Vec<&Issue>> {
1476        let mut map: HashMap<Arc<str>, Vec<&Issue>> = HashMap::default();
1477        for issue in &self.issues {
1478            map.entry(issue.location.file.clone())
1479                .or_default()
1480                .push(issue);
1481        }
1482        map
1483    }
1484
1485    pub fn count_by_severity(&self) -> Vec<(mir_issues::Severity, usize)> {
1486        let mut counts: std::collections::BTreeMap<mir_issues::Severity, usize> =
1487            std::collections::BTreeMap::new();
1488        for issue in &self.issues {
1489            *counts.entry(issue.severity).or_insert(0) += 1;
1490        }
1491        counts.into_iter().collect()
1492    }
1493
1494    pub fn total_issue_count(&self) -> usize {
1495        self.issues.len()
1496    }
1497
1498    pub fn filter_issues<'a, F>(&'a self, predicate: F) -> impl Iterator<Item = &'a Issue>
1499    where
1500        F: Fn(&Issue) -> bool + 'a,
1501    {
1502        self.issues.iter().filter(move |i| predicate(i))
1503    }
1504
1505    pub fn symbol_at(
1506        &self,
1507        file: &str,
1508        byte_offset: u32,
1509    ) -> Option<&crate::symbol::ResolvedSymbol> {
1510        let range = self.symbols_by_file.get(file)?;
1511        let symbols = &self.symbols[range.clone()];
1512
1513        // Primary: cursor is on an identifier token.
1514        if let Some(sym) = symbols
1515            .iter()
1516            .filter(|s| s.span.start <= byte_offset && byte_offset < s.span.end)
1517            .min_by_key(|s| s.span.end - s.span.start)
1518        {
1519            return Some(sym);
1520        }
1521
1522        // Fallback: cursor is in a call-expression gap (e.g. the whitespace or
1523        // argument list between two chained method calls).  Match against the
1524        // full expression span recorded for call-like symbols and return the
1525        // innermost (smallest) enclosing call, mirroring what an AST-walk to
1526        // the innermost containing call expression would produce.
1527        symbols
1528            .iter()
1529            .filter(|s| {
1530                s.expr_span
1531                    .is_some_and(|es| es.start <= byte_offset && byte_offset < es.end)
1532            })
1533            .min_by_key(|s| {
1534                let es = s.expr_span.unwrap();
1535                es.end - es.start
1536            })
1537    }
1538}