Skip to main content

mir_analyzer/
project.rs

1/// Project-level orchestration: file discovery, pass 1, pass 2.
2use std::mem::ManuallyDrop;
3use std::path::{Path, PathBuf};
4use std::sync::Arc;
5
6use rayon::prelude::*;
7
8use std::collections::{HashMap, HashSet};
9
10use crate::cache::{hash_content, AnalysisCache};
11use crate::db::{
12    collect_file_definitions, collect_file_definitions_uncached, FileDefinitions, MirDatabase,
13    MirDb, RefLoc, SourceFile,
14};
15use crate::pass2::{InferredTypes, Pass2Driver};
16use crate::php_version::PhpVersion;
17use crate::shared_db::SharedDb;
18use crate::stub_cache::{hash_source, prepare_for_ingest};
19use mir_issues::Issue;
20
21pub(crate) use crate::pass2::merge_return_types;
22
23/// Issue kinds emitted by [`crate::dead_code::DeadCodeAnalyzer`].
24///
25/// The dead-code pass is just an error group — these names participate in
26/// `suppressed_issue_kinds` like any other [`IssueKind`]. If every kind
27/// listed here is suppressed, the dead-code pass is skipped entirely (it
28/// has nothing to contribute).
29pub fn dead_code_issue_kinds() -> &'static [&'static str] {
30    &["UnusedMethod", "UnusedProperty", "UnusedFunction"]
31}
32
33/// Batch-oriented analyzer: file discovery, parsing, and analysis.
34///
35/// ProjectAnalyzer is the primary entry point for analyzing a project as a whole.
36/// It orchestrates parallel file discovery and parsing, using the same core
37/// analysis engine as [`AnalysisSession`] (salsa database and Pass 2 driver).
38///
39/// **Unified Design:** ProjectAnalyzer and `AnalysisSession` now share the same
40/// database management via [`SharedDb`]. ProjectAnalyzer is the batch API
41/// (all files at once), while `AnalysisSession` is the incremental API (file-by-file).
42/// Both use `Pass2Driver`, the same definition collection logic, and identical
43/// database operations, eliminating code duplication.
44///
45/// [`AnalysisSession`]: crate::session::AnalysisSession
46pub struct ProjectAnalyzer {
47    /// Shared database management (salsa, file registry, stub tracking).
48    /// Extracted to allow code sharing with AnalysisSession.
49    shared_db: Arc<SharedDb>,
50    /// Optional cache — when `Some`, Pass 2 results are read/written per file.
51    cache: Option<AnalysisCache>,
52    /// Called once after each file completes Pass 2 (used for progress reporting).
53    pub on_file_done: Option<Arc<dyn Fn() + Send + Sync>>,
54    /// PSR-4 autoloader mapping from composer.json, if available.
55    pub psr4: Option<Arc<crate::composer::Psr4Map>>,
56    /// Names of `IssueKind` variants to drop from the final result, e.g.
57    /// `["MissingThrowsDocblock", "UnusedMethod"]`. Applied as a final
58    /// post-filter on every `analyze()` return path, so analyzer internals
59    /// don't need to know which diagnostics the consumer cares about.
60    ///
61    /// Defaults to an empty set — nothing is suppressed unless the
62    /// consumer (CLI, test fixture, programmatic caller) adds names. The
63    /// dead-code pass is skipped automatically when every
64    /// [`dead_code_issue_kinds`] entry is in this set.
65    pub suppressed_issue_kinds: std::collections::HashSet<String>,
66    /// Target PHP language version. `None` means "not configured"; resolved to
67    /// `PhpVersion::LATEST` when passed down to `StatementsAnalyzer`.
68    pub php_version: Option<PhpVersion>,
69    /// Additional stub files to parse before analysis (absolute paths).
70    pub stub_files: Vec<PathBuf>,
71    /// Additional stub directories to walk and parse before analysis (absolute paths).
72    pub stub_dirs: Vec<PathBuf>,
73}
74
75struct ParsedProjectFile {
76    file: Arc<str>,
77    source: Arc<str>,
78    parsed: ManuallyDrop<php_rs_parser::ParseResult<'static, 'static>>,
79    arena: ManuallyDrop<Box<bumpalo::Bump>>,
80}
81
82impl ParsedProjectFile {
83    fn new(file: Arc<str>, source: Arc<str>) -> Self {
84        let arena = Box::new(crate::arena::create_parse_arena(source.len()));
85        let parsed = php_rs_parser::parse(&arena, &source);
86        // SAFETY: `parsed` borrows from `arena` and `source`, both owned by this
87        // struct and kept alive until `Drop`. `Drop` manually destroys `parsed`
88        // before releasing either owner, so the widened lifetimes never escape.
89        let parsed = unsafe {
90            std::mem::transmute::<
91                php_rs_parser::ParseResult<'_, '_>,
92                php_rs_parser::ParseResult<'static, 'static>,
93            >(parsed)
94        };
95        Self {
96            file,
97            source,
98            parsed: ManuallyDrop::new(parsed),
99            arena: ManuallyDrop::new(arena),
100        }
101    }
102
103    fn source(&self) -> &str {
104        self.source.as_ref()
105    }
106
107    fn parsed(&self) -> &php_rs_parser::ParseResult<'_, '_> {
108        &self.parsed
109    }
110}
111
112impl Drop for ParsedProjectFile {
113    fn drop(&mut self) {
114        unsafe {
115            ManuallyDrop::drop(&mut self.parsed);
116            ManuallyDrop::drop(&mut self.arena);
117        }
118    }
119}
120
121// SAFETY: after construction the parsed AST and source map are read-only. The
122// bump arena is never mutated again; it only owns backing storage for AST nodes
123// and is dropped after all parallel analysis has completed.
124unsafe impl Send for ParsedProjectFile {}
125unsafe impl Sync for ParsedProjectFile {}
126
127impl ProjectAnalyzer {
128    pub fn new() -> Self {
129        Self {
130            shared_db: Arc::new(SharedDb::new()),
131            cache: None,
132            on_file_done: None,
133            psr4: None,
134            suppressed_issue_kinds: std::collections::HashSet::new(),
135            php_version: None,
136            stub_files: Vec::new(),
137            stub_dirs: Vec::new(),
138        }
139    }
140
141    /// Create a `ProjectAnalyzer` with a disk-backed cache stored under `cache_dir`.
142    pub fn with_cache(cache_dir: &Path) -> Self {
143        Self {
144            shared_db: Arc::new(SharedDb::new().with_cache_dir(cache_dir)),
145            cache: Some(AnalysisCache::open(cache_dir)),
146            on_file_done: None,
147            psr4: None,
148            suppressed_issue_kinds: std::collections::HashSet::new(),
149            php_version: None,
150            stub_files: Vec::new(),
151            stub_dirs: Vec::new(),
152        }
153    }
154
155    /// Enable the disk-backed cache for an already-constructed analyzer.
156    pub fn set_cache_dir(&mut self, cache_dir: &Path) {
157        // Rebuild SharedDb to attach the Pass-1 stub cache. Must be called
158        // before any file is ingested — a previously-populated SharedDb's
159        // state would be silently discarded here, which is almost certainly
160        // a caller bug rather than the intended behavior.
161        debug_assert_eq!(
162            self.shared_db.source_file_count(),
163            0,
164            "ProjectAnalyzer::set_cache_dir must be called before any file is ingested"
165        );
166        self.shared_db = Arc::new(SharedDb::new().with_cache_dir(cache_dir));
167        self.cache = Some(AnalysisCache::open(cache_dir));
168    }
169
170    /// Create a `ProjectAnalyzer` from a project root containing `composer.json`.
171    /// Returns the analyzer (with `psr4` set) and the `Psr4Map` so callers can
172    /// call `map.project_files()` / `map.vendor_files()`.
173    pub fn from_composer(
174        root: &Path,
175    ) -> Result<(Self, crate::composer::Psr4Map), crate::composer::ComposerError> {
176        let map = crate::composer::Psr4Map::from_composer(root)?;
177        let psr4 = Arc::new(map.clone());
178        let analyzer = Self {
179            shared_db: Arc::new(SharedDb::new()),
180            cache: None,
181            on_file_done: None,
182            psr4: Some(psr4),
183            suppressed_issue_kinds: std::collections::HashSet::new(),
184            php_version: None,
185            stub_files: Vec::new(),
186            stub_dirs: Vec::new(),
187        };
188        Ok((analyzer, map))
189    }
190
191    /// Builder method: set the target PHP version.
192    pub fn with_php_version(mut self, version: PhpVersion) -> Self {
193        self.php_version = Some(version);
194        self
195    }
196
197    /// True iff at least one [`IssueKind`] emitted by the dead-code pass is
198    /// not currently suppressed, so it's worth running.
199    fn should_run_dead_code(&self) -> bool {
200        dead_code_issue_kinds()
201            .iter()
202            .any(|k| !self.suppressed_issue_kinds.contains(*k))
203    }
204
205    /// Builder method: set a progress callback invoked once per analyzed file.
206    pub fn with_progress_callback(mut self, callback: Arc<dyn Fn() + Send + Sync>) -> Self {
207        self.on_file_done = Some(callback);
208        self
209    }
210
211    /// Builder method: add user stub files.
212    pub fn with_stub_files(mut self, files: Vec<PathBuf>) -> Self {
213        self.stub_files = files;
214        self
215    }
216
217    /// Builder method: add user stub directories.
218    pub fn with_stub_dirs(mut self, dirs: Vec<PathBuf>) -> Self {
219        self.stub_dirs = dirs;
220        self
221    }
222
223    /// Drop issues whose [`IssueKind::name()`] is listed in
224    /// [`Self::suppressed_issue_kinds`]. Centralized post-filter so analyzer
225    /// internals never need to know what the consumer cares about.
226    fn apply_issue_suppressions(&self, issues: &mut Vec<mir_issues::Issue>) {
227        if self.suppressed_issue_kinds.is_empty() {
228            return;
229        }
230        issues.retain(|i| !self.suppressed_issue_kinds.contains(i.kind.name()));
231    }
232
233    /// Builder method: configure a disk-backed cache at the given directory.
234    pub fn with_cache_dir(mut self, cache_dir: &Path) -> Self {
235        debug_assert_eq!(
236            self.shared_db.source_file_count(),
237            0,
238            "ProjectAnalyzer::with_cache_dir must be called before any file is ingested"
239        );
240        self.shared_db = Arc::new(SharedDb::new().with_cache_dir(cache_dir));
241        self.cache = Some(AnalysisCache::open(cache_dir));
242        self
243    }
244
245    /// Builder method: attach a PSR-4 autoloader map.
246    pub fn with_psr4(mut self, map: Arc<crate::composer::Psr4Map>) -> Self {
247        self.psr4 = Some(map);
248        self
249    }
250
251    /// Resolve the configured PHP version, defaulting to `PhpVersion::LATEST`
252    /// when none has been set.
253    fn resolved_php_version(&self) -> PhpVersion {
254        self.php_version.unwrap_or(PhpVersion::LATEST)
255    }
256
257    /// Cumulative hit / miss counts on the persistent Pass-1 cache attached
258    /// to this analyzer. `(0, 0)` when no cache is configured. Used by
259    /// integration tests and benchmarks to assert the cache actually fires.
260    #[doc(hidden)]
261    pub fn stub_cache_stats(&self) -> (u64, u64) {
262        match self.shared_db.stub_cache.as_deref() {
263            Some(c) => (c.hits(), c.misses()),
264            None => (0, 0),
265        }
266    }
267
268    fn type_exists(&self, fqcn: &str) -> bool {
269        let db = self.snapshot_db();
270        crate::db::type_exists_via_db(&db, fqcn)
271    }
272
273    /// Returns `true` if a function with `fqn` is registered and active.
274    pub fn contains_function(&self, fqn: &str) -> bool {
275        let db = self.snapshot_db();
276        db.lookup_function_node(fqn).is_some_and(|n| n.active(&db))
277    }
278
279    /// Returns `true` if a class / interface / trait / enum is registered.
280    pub fn contains_class(&self, fqcn: &str) -> bool {
281        let db = self.snapshot_db();
282        db.lookup_class_node(fqcn).is_some_and(|n| n.active(&db))
283    }
284
285    /// Returns `true` if `class` has a method named `name` (case-insensitive).
286    pub fn contains_method(&self, class: &str, name: &str) -> bool {
287        let db = self.snapshot_db();
288        let name_lower = name.to_ascii_lowercase();
289        db.lookup_method_node(class, &name_lower)
290            .is_some_and(|n| n.active(&db))
291    }
292
293    /// Acquire a cheap clone of the salsa db for a read-only query.
294    /// The lock is held only for the duration of the clone, so concurrent
295    /// readers never serialize on each other or on writes longer than the
296    /// clone itself.
297    fn snapshot_db(&self) -> MirDb {
298        self.shared_db.snapshot_db()
299    }
300
301    /// Internal: expose the salsa db for unit tests that need a `&dyn MirDatabase`.
302    #[doc(hidden)]
303    pub fn salsa_db_for_test(&self) -> parking_lot::MappedRwLockWriteGuard<'_, MirDb> {
304        let guard = self.shared_db.salsa.write();
305        parking_lot::RwLockWriteGuard::map(guard, |rw| &mut **rw)
306    }
307
308    /// Legacy: look up the source location of a class member by name.
309    ///
310    /// Prefer [`Self::definition_of`] with [`crate::Symbol::method`] etc.
311    #[doc(hidden)]
312    pub fn member_location(
313        &self,
314        fqcn: &str,
315        member_name: &str,
316    ) -> Option<mir_codebase::storage::Location> {
317        let db = self.snapshot_db();
318        crate::db::member_location_via_db(&db, fqcn, member_name)
319    }
320
321    /// Legacy: look up a top-level symbol location.
322    ///
323    /// Prefer [`Self::definition_of`] with [`crate::Symbol`].
324    #[doc(hidden)]
325    pub fn symbol_location(&self, symbol: &str) -> Option<mir_codebase::storage::Location> {
326        let db = self.snapshot_db();
327        db.lookup_class_node(symbol)
328            .filter(|n| n.active(&db))
329            .and_then(|n| n.location(&db))
330            .or_else(|| {
331                db.lookup_function_node(symbol)
332                    .filter(|n| n.active(&db))
333                    .and_then(|n| n.location(&db))
334            })
335    }
336
337    /// Legacy: raw reference locations as `(file, line, col_start, col_end)`.
338    ///
339    /// Prefer [`Self::references_to`] which returns `(Arc<str>, Range)` pairs
340    /// and takes a strongly-typed [`crate::Symbol`].
341    #[doc(hidden)]
342    pub fn reference_locations(&self, symbol: &str) -> Vec<(Arc<str>, u32, u16, u16)> {
343        let db = self.snapshot_db();
344        db.reference_locations(symbol)
345    }
346
347    /// Resolve a symbol to its declaration location.
348    ///
349    /// Mirrors [`crate::AnalysisSession::definition_of`].
350    pub fn definition_of(
351        &self,
352        symbol: &crate::Symbol,
353    ) -> Result<mir_codebase::storage::Location, crate::SymbolLookupError> {
354        let db = self.snapshot_db();
355        match symbol {
356            crate::Symbol::Class(fqcn) => {
357                let node = db
358                    .lookup_class_node(fqcn.as_ref())
359                    .filter(|n| n.active(&db))
360                    .ok_or(crate::SymbolLookupError::NotFound)?;
361                node.location(&db)
362                    .ok_or(crate::SymbolLookupError::NoSourceLocation)
363            }
364            crate::Symbol::Function(fqn) => {
365                let node = db
366                    .lookup_function_node(fqn.as_ref())
367                    .filter(|n| n.active(&db))
368                    .ok_or(crate::SymbolLookupError::NotFound)?;
369                node.location(&db)
370                    .ok_or(crate::SymbolLookupError::NoSourceLocation)
371            }
372            crate::Symbol::Method { class, name }
373            | crate::Symbol::Property { class, name }
374            | crate::Symbol::ClassConstant { class, name } => {
375                crate::db::member_location_via_db(&db, class, name)
376                    .ok_or(crate::SymbolLookupError::NotFound)
377            }
378            crate::Symbol::GlobalConstant(_) => Err(crate::SymbolLookupError::NoSourceLocation),
379        }
380    }
381
382    /// All recorded references to a symbol, as `(file, range)` pairs.
383    ///
384    /// Mirrors [`crate::AnalysisSession::references_to`].
385    pub fn references_to(&self, symbol: &crate::Symbol) -> Vec<(Arc<str>, crate::Range)> {
386        let db = self.snapshot_db();
387        let key = symbol.codebase_key();
388        db.reference_locations(&key)
389            .into_iter()
390            .map(|(file, line, col_start, col_end)| {
391                let range = crate::Range {
392                    start: crate::Position {
393                        line,
394                        column: col_start as u32,
395                    },
396                    end: crate::Position {
397                        line,
398                        column: col_end as u32,
399                    },
400                };
401                (file, range)
402            })
403            .collect()
404    }
405
406    /// Load PHP built-in stubs. Called automatically by `analyze` if not done yet.
407    /// Stubs are filtered against the configured target PHP version (or
408    /// `PhpVersion::LATEST` if none was set).
409    pub fn load_stubs(&self) {
410        let php_version = self.resolved_php_version();
411
412        // Load all built-in stubs for the configured PHP version
413        let paths: Vec<&'static str> = crate::stubs::stub_files().iter().map(|&(p, _)| p).collect();
414        self.shared_db.ingest_stub_paths(&paths, php_version);
415
416        // Load user-configured stubs
417        self.shared_db
418            .ingest_user_stubs(&self.stub_files, &self.stub_dirs);
419    }
420
421    fn collect_and_ingest_source(&self, file: Arc<str>, src: &str) -> FileDefinitions {
422        self.shared_db
423            .collect_and_ingest_file(file, src, self.resolved_php_version())
424    }
425
426    /// Run the full analysis pipeline on a set of file paths.
427    pub fn analyze(&self, paths: &[PathBuf]) -> AnalysisResult {
428        let mut all_issues = Vec::new();
429        let _t0 = std::time::Instant::now();
430
431        // ---- Load PHP built-in stubs (before Pass 1 so user code can override)
432        self.load_stubs();
433        let _t_stubs = _t0.elapsed();
434
435        // ---- Pass 1: read files in parallel ----------------------------------
436        let parsed_files: Vec<ParsedProjectFile> = paths
437            .par_iter()
438            .filter_map(|path| match std::fs::read_to_string(path) {
439                Ok(src) => {
440                    let file = Arc::from(path.to_string_lossy().as_ref());
441                    Some(ParsedProjectFile::new(file, Arc::from(src)))
442                }
443                Err(e) => {
444                    eprintln!("Cannot read {}: {}", path.display(), e);
445                    None
446                }
447            })
448            .collect();
449        let _t_read = _t0.elapsed();
450
451        let file_data: Vec<(Arc<str>, Arc<str>)> = parsed_files
452            .iter()
453            .map(|parsed| (parsed.file.clone(), parsed.source.clone()))
454            .collect();
455
456        // ---- Pre-Pass-2 invalidation: evict dependents of changed files ------
457        if let Some(cache) = &self.cache {
458            let changed: Vec<String> = file_data
459                .par_iter()
460                .filter_map(|(f, src)| {
461                    let h = hash_content(src.as_ref());
462                    if cache.get(f, &h).is_none() {
463                        Some(f.to_string())
464                    } else {
465                        None
466                    }
467                })
468                .collect();
469            if !changed.is_empty() {
470                cache.evict_with_dependents(&changed);
471            }
472        }
473
474        // ---- Register Salsa source inputs for incremental follow-up calls ----
475        {
476            let mut guard = self.shared_db.salsa.write();
477            for parsed in &parsed_files {
478                guard.upsert_source_file(parsed.file.clone(), parsed.source.clone());
479            }
480        }
481        let _t_salsa_reg = _t0.elapsed();
482
483        // ---- Pass 1: definition collection from the already-parsed AST -------
484        let file_defs: Vec<FileDefinitions> = parsed_files
485            .par_iter()
486            .map(|parsed| {
487                let parse_result = parsed.parsed();
488                let mut all_issues: Vec<Issue> = parse_result
489                    .errors
490                    .iter()
491                    .map(|err| {
492                        crate::parser::parse_error_to_issue(
493                            err,
494                            &parsed.file,
495                            parsed.source(),
496                            &parse_result.source_map,
497                        )
498                    })
499                    .collect();
500                let collector = crate::collector::DefinitionCollector::new_for_slice(
501                    parsed.file.clone(),
502                    parsed.source(),
503                    &parse_result.source_map,
504                );
505                let (mut slice, collector_issues) = collector.collect_slice(&parse_result.program);
506                all_issues.extend(collector_issues);
507                mir_codebase::storage::deduplicate_params_in_slice(&mut slice);
508                FileDefinitions {
509                    slice: Arc::new(slice),
510                    issues: Arc::new(all_issues),
511                }
512            })
513            .collect();
514        let _t_pass1 = _t0.elapsed();
515
516        let mut files_with_parse_errors: std::collections::HashSet<Arc<str>> =
517            std::collections::HashSet::new();
518        {
519            let mut guard = self.shared_db.salsa.write();
520            for defs in file_defs {
521                for issue in defs.issues.iter() {
522                    if matches!(issue.kind, mir_issues::IssueKind::ParseError { .. })
523                        && issue.severity == mir_issues::Severity::Error
524                    {
525                        files_with_parse_errors.insert(issue.location.file.clone());
526                    }
527                }
528                guard.ingest_stub_slice(&defs.slice);
529                all_issues.extend(Arc::unwrap_or_clone(defs.issues));
530            }
531        }
532        let _t_ingest = _t0.elapsed();
533
534        // ---- Lazy-load unknown classes via PSR-4 (issue #50) ----------------
535        if let Some(psr4) = &self.psr4 {
536            self.lazy_load_missing_classes(psr4.clone(), &mut all_issues);
537        }
538
539        // ---- Resolve @psalm-import-type declarations now that all Pass 1
540        // classes (including their `type_aliases`) are populated.
541        // ---- Build reverse dep graph and persist it for the next run ---------
542        if let Some(cache) = &self.cache {
543            let db_snapshot = {
544                let guard = self.shared_db.salsa.read();
545                (**guard).clone()
546            };
547            let rev = build_reverse_deps(&db_snapshot);
548            cache.set_reverse_deps(rev);
549        }
550
551        // ---- Class-level checks (M11) ----------------------------------------
552        let analyzed_file_set: std::collections::HashSet<std::sync::Arc<str>> =
553            file_data.iter().map(|(f, _)| f.clone()).collect();
554        {
555            let class_db = {
556                let guard = self.shared_db.salsa.read();
557                (**guard).clone()
558            };
559            let class_issues =
560                crate::class::ClassAnalyzer::with_files(&class_db, analyzed_file_set, &file_data)
561                    .analyze_all();
562            all_issues.extend(class_issues);
563        }
564
565        // ---- Inference pre-sweep: prime inferred return types ----------------
566        // Run an inference-only Pass 2 over each file in parallel using direct
567        // rayon (no Salsa tracked-query overhead per file), collect the results,
568        // then commit them to Salsa INPUT fields.  The full Pass 2 then reads
569        // those fields via O(1) accesses with no lock contention.
570        //
571        // We use `Pass2Driver::new_inference_only` directly rather than the
572        // Salsa-tracked `infer_file_return_types` query so that the batch path
573        // avoids per-file Salsa lock acquisition and memo-table overhead on every
574        // cold start.  `infer_file_return_types` is reserved for the incremental
575        // LSP path (AnalysisSession) where Salsa cache hits across edits matter.
576        //
577        // `map_with` clones `db_priming` once per rayon worker thread (not once
578        // per file as the old `in_place_scope` loop did). For N files on T threads
579        // this reduces clones from N to T.  Results are returned by value and
580        // flattened after `collect()`, replacing the Arc<Mutex<Vec>> accumulator.
581        // All per-thread db clones are dropped when `collect()` returns, so
582        // `commit_inferred_return_types` (which calls Salsa setters that wait for
583        // strong_count == 1) cannot deadlock.
584        {
585            let db_priming = {
586                let guard = self.shared_db.salsa.read();
587                (**guard).clone()
588            };
589            let php_version = self.resolved_php_version();
590            let all_inferred: Vec<InferredTypes> = parsed_files
591                .par_iter()
592                .filter(|parsed| !files_with_parse_errors.contains(&parsed.file))
593                .map_with(db_priming, |db, parsed| {
594                    let driver = Pass2Driver::new_inference_only(
595                        db as &dyn crate::db::MirDatabase,
596                        php_version,
597                    );
598                    let parse_result = parsed.parsed();
599                    driver.analyze_bodies(
600                        &parse_result.program,
601                        parsed.file.clone(),
602                        parsed.source(),
603                        &parse_result.source_map,
604                    );
605                    driver.take_inferred_types()
606                })
607                .collect();
608            // db_priming is consumed by map_with; per-thread clones dropped by collect().
609            let mut functions = Vec::new();
610            let mut methods = Vec::new();
611            for inferred in all_inferred {
612                functions.extend(inferred.functions);
613                methods.extend(inferred.methods);
614            }
615            let mut guard = self.shared_db.salsa.write();
616            guard.commit_inferred_return_types(functions, methods);
617        }
618        let _t_presweep = _t0.elapsed();
619
620        let db_main = {
621            let guard = self.shared_db.salsa.read();
622            (**guard).clone()
623        };
624
625        // ---- Pass 2: analyze function/method bodies in parallel (M14) --------
626        // Each worker db clone has its own `pending_ref_locs` buffer (custom
627        // Clone returns empty).  Workers push reference locations there instead
628        // of into the shared Arc<Mutex<...>> maps, eliminating cross-thread
629        // contention.  After collect() we commit all batches serially in a
630        // single lock acquisition per map.
631        let pass2_results: Vec<(Vec<Issue>, Vec<crate::symbol::ResolvedSymbol>, Vec<RefLoc>)> =
632            parsed_files
633                .par_iter()
634                .filter(|parsed| !files_with_parse_errors.contains(&parsed.file))
635                .map_with(db_main, |db, parsed| {
636                    let driver =
637                        Pass2Driver::new(&*db as &dyn MirDatabase, self.resolved_php_version());
638                    let (issues, symbols) = if let Some(cache) = &self.cache {
639                        let h = hash_content(parsed.source());
640                        if let Some((cached_issues, ref_locs)) = cache.get(&parsed.file, &h) {
641                            db.replay_reference_locations(parsed.file.clone(), &ref_locs);
642                            (cached_issues, Vec::new())
643                        } else {
644                            let parse_result = parsed.parsed();
645                            let (issues, symbols) = driver.analyze_bodies(
646                                &parse_result.program,
647                                parsed.file.clone(),
648                                parsed.source(),
649                                &parse_result.source_map,
650                            );
651                            let pending = db.take_pending_ref_locs();
652                            let cache_locs = pending
653                                .iter()
654                                .map(|r| (r.symbol_key.to_string(), r.line, r.col_start, r.col_end))
655                                .collect();
656                            cache.put(&parsed.file, h, issues.clone(), cache_locs);
657                            if let Some(cb) = &self.on_file_done {
658                                cb();
659                            }
660                            return (issues, symbols, pending);
661                        }
662                    } else {
663                        let parse_result = parsed.parsed();
664                        driver.analyze_bodies(
665                            &parse_result.program,
666                            parsed.file.clone(),
667                            parsed.source(),
668                            &parse_result.source_map,
669                        )
670                    };
671                    let pending = db.take_pending_ref_locs();
672                    if let Some(cb) = &self.on_file_done {
673                        cb();
674                    }
675                    (issues, symbols, pending)
676                })
677                .collect();
678
679        let _t_pass2 = _t0.elapsed();
680
681        // Serial commit: one lock acquisition per map for all files combined.
682        let mut all_ref_locs: Vec<RefLoc> = Vec::new();
683        let mut all_symbols = Vec::new();
684        for (issues, symbols, ref_locs) in pass2_results {
685            all_issues.extend(issues);
686            all_symbols.extend(symbols);
687            all_ref_locs.extend(ref_locs);
688        }
689        {
690            let guard = self.shared_db.salsa.read();
691            guard.commit_reference_locations_batch(all_ref_locs);
692        }
693
694        // ---- Post-Pass-2 lazy loading: FQCNs used without `use` imports ------
695        // FQCNs in function/method bodies aren't visible until Pass 2 runs, so
696        // the pre-Pass-2 lazy load misses them.  We collect UndefinedClass names,
697        // resolve them via PSR-4, load those files, re-finalize, then re-analyze
698        // only the affected files to clear the false positives.
699        if let Some(psr4) = &self.psr4 {
700            self.lazy_load_from_body_issues(
701                psr4.clone(),
702                &file_data,
703                &files_with_parse_errors,
704                &mut all_issues,
705                &mut all_symbols,
706            );
707        }
708
709        // Persist cache hits/misses to disk
710        if let Some(cache) = &self.cache {
711            cache.flush();
712        }
713
714        // ---- Compact the reference index ------------------------------------
715        // ---- Dead-code detection (M18) --------------------------------------
716        if self.should_run_dead_code() {
717            let salsa = self.snapshot_db();
718            let dead_code_issues = crate::dead_code::DeadCodeAnalyzer::new(&salsa).analyze();
719            all_issues.extend(dead_code_issues);
720        }
721
722        let _t_total = _t0.elapsed();
723        if std::env::var("MIR_TIMING").is_ok() {
724            eprintln!(
725                "[timing] stubs={:.0}ms read={:.0}ms salsa_reg={:.0}ms pass1={:.0}ms ingest={:.0}ms presweep={:.0}ms pass2={:.0}ms total={:.0}ms",
726                _t_stubs.as_secs_f64() * 1000.0,
727                (_t_read - _t_stubs).as_secs_f64() * 1000.0,
728                (_t_salsa_reg - _t_read).as_secs_f64() * 1000.0,
729                (_t_pass1 - _t_salsa_reg).as_secs_f64() * 1000.0,
730                (_t_ingest - _t_pass1).as_secs_f64() * 1000.0,
731                (_t_presweep - _t_ingest).as_secs_f64() * 1000.0,
732                (_t_pass2 - _t_presweep).as_secs_f64() * 1000.0,
733                _t_total.as_secs_f64() * 1000.0,
734            );
735        }
736
737        self.apply_issue_suppressions(&mut all_issues);
738        AnalysisResult::build(all_issues, std::collections::HashMap::new(), all_symbols)
739    }
740
741    fn lazy_load_missing_classes(
742        &self,
743        psr4: Arc<crate::composer::Psr4Map>,
744        all_issues: &mut Vec<Issue>,
745    ) {
746        use std::collections::HashSet;
747        use std::sync::Arc;
748
749        let max_depth = 10;
750        let mut loaded: HashSet<String> = HashSet::new();
751        let mut scanned: HashSet<Arc<str>> = HashSet::new();
752
753        for _ in 0..max_depth {
754            let mut to_load: Vec<(String, PathBuf)> = Vec::new();
755
756            let mut try_queue = |fqcn: &str| {
757                if !self.type_exists(fqcn) && !loaded.contains(fqcn) {
758                    if let Some(path) = psr4.resolve(fqcn) {
759                        to_load.push((fqcn.to_string(), path));
760                    }
761                }
762            };
763
764            // Collect inheritance and import candidates. Only scan classes that
765            // haven't been scanned yet (optimization: avoid redundant full scans).
766            let mut inheritance_candidates = Vec::new();
767            let import_candidates = {
768                let db_owned = self.snapshot_db();
769                let db = &db_owned;
770                for fqcn in db.active_class_node_fqcns() {
771                    if scanned.contains(fqcn.as_ref()) {
772                        continue;
773                    }
774                    let Some(node) = db.lookup_class_node(&fqcn) else {
775                        continue;
776                    };
777                    scanned.insert(fqcn.clone());
778                    if node.is_interface(db) {
779                        for parent in node.extends(db).iter() {
780                            inheritance_candidates.push(parent.to_string());
781                        }
782                    } else if node.is_enum(db) {
783                        for iface in node.interfaces(db).iter() {
784                            inheritance_candidates.push(iface.to_string());
785                        }
786                    } else if node.is_trait(db) {
787                        for used in node.traits(db).iter() {
788                            inheritance_candidates.push(used.to_string());
789                        }
790                    } else {
791                        if let Some(parent) = node.parent(db) {
792                            inheritance_candidates.push(parent.to_string());
793                        }
794                        for iface in node.interfaces(db).iter() {
795                            inheritance_candidates.push(iface.to_string());
796                        }
797                    }
798                }
799                db.file_import_snapshots()
800                    .into_iter()
801                    .flat_map(|(_, imports)| imports.into_values())
802                    .collect::<Vec<_>>()
803            };
804            for fqcn in inheritance_candidates {
805                try_queue(&fqcn);
806            }
807
808            // Also lazy-load any type referenced via `use` imports that isn't yet
809            // in the codebase (covers enums and classes used only in type hints or
810            // static calls, which never appear in the inheritance scan above).
811            for fqcn in import_candidates {
812                try_queue(&fqcn);
813            }
814
815            if to_load.is_empty() {
816                break;
817            }
818
819            for (fqcn, path) in to_load {
820                loaded.insert(fqcn);
821                if let Ok(src) = std::fs::read_to_string(&path) {
822                    let file: Arc<str> = Arc::from(path.to_string_lossy().as_ref());
823                    let defs = self.collect_and_ingest_source(file, &src);
824                    all_issues.extend(Arc::unwrap_or_clone(defs.issues));
825                }
826            }
827        }
828    }
829
830    fn lazy_load_from_body_issues(
831        &self,
832        psr4: Arc<crate::composer::Psr4Map>,
833        file_data: &[(Arc<str>, Arc<str>)],
834        files_with_parse_errors: &HashSet<Arc<str>>,
835        all_issues: &mut Vec<Issue>,
836        all_symbols: &mut Vec<crate::symbol::ResolvedSymbol>,
837    ) {
838        use mir_issues::IssueKind;
839
840        let max_depth = 5;
841        let mut loaded: HashSet<String> = HashSet::new();
842
843        for _ in 0..max_depth {
844            // Deduplicate by FQCN: HashMap prevents loading the same class twice
845            // when multiple files share the same UndefinedClass diagnostic.
846            let mut to_load: HashMap<String, PathBuf> = HashMap::new();
847
848            for issue in all_issues.iter() {
849                if let IssueKind::UndefinedClass { name } = &issue.kind {
850                    if !self.type_exists(name) && !loaded.contains(name) {
851                        if let Some(path) = psr4.resolve(name) {
852                            to_load.entry(name.clone()).or_insert(path);
853                        }
854                    }
855                }
856            }
857
858            if to_load.is_empty() {
859                break;
860            }
861
862            loaded.extend(to_load.keys().cloned());
863
864            for path in to_load.values() {
865                if let Ok(src) = std::fs::read_to_string(path) {
866                    let file: Arc<str> = Arc::from(path.to_string_lossy().as_ref());
867                    let _ = self.collect_and_ingest_source(file, &src);
868                }
869            }
870
871            // Load inheritance deps of newly-added types and finalize.
872            // This covers e.g. `class Helper extends \App\Base` where Base is
873            // also not in the initial file set.
874            self.lazy_load_missing_classes(psr4.clone(), all_issues);
875
876            // Re-analyze every file that has an UndefinedClass for a type now
877            // present in the codebase — covers both direct and transitive loads.
878            let files_to_reanalyze: HashSet<Arc<str>> = all_issues
879                .iter()
880                .filter_map(|i| {
881                    if let IssueKind::UndefinedClass { name } = &i.kind {
882                        if self.type_exists(name) {
883                            return Some(i.location.file.clone());
884                        }
885                    }
886                    None
887                })
888                .collect();
889
890            if files_to_reanalyze.is_empty() {
891                break;
892            }
893
894            all_issues.retain(|i| !files_to_reanalyze.contains(&i.location.file));
895            all_symbols.retain(|s| !files_to_reanalyze.contains(&s.file));
896
897            let db_full = {
898                let guard = self.shared_db.salsa.read();
899                (**guard).clone()
900            };
901
902            let reanalysis: Vec<(Vec<Issue>, Vec<crate::symbol::ResolvedSymbol>, Vec<RefLoc>)> =
903                file_data
904                    .par_iter()
905                    .filter(|(f, _)| {
906                        !files_with_parse_errors.contains(f) && files_to_reanalyze.contains(f)
907                    })
908                    .map_with(db_full, |db, (file, src)| {
909                        let driver =
910                            Pass2Driver::new(&*db as &dyn MirDatabase, self.resolved_php_version());
911                        let arena = crate::arena::create_parse_arena(src.len());
912                        let parsed = php_rs_parser::parse(&arena, src);
913                        let (issues, symbols) = driver.analyze_bodies(
914                            &parsed.program,
915                            file.clone(),
916                            src,
917                            &parsed.source_map,
918                        );
919                        let pending = db.take_pending_ref_locs();
920                        (issues, symbols, pending)
921                    })
922                    .collect();
923
924            let mut reanalysis_ref_locs: Vec<RefLoc> = Vec::new();
925            for (issues, symbols, ref_locs) in reanalysis {
926                all_issues.extend(issues);
927                all_symbols.extend(symbols);
928                reanalysis_ref_locs.extend(ref_locs);
929            }
930            {
931                let guard = self.shared_db.salsa.read();
932                guard.commit_reference_locations_batch(reanalysis_ref_locs);
933            }
934        }
935    }
936
937    /// Re-analyze a single file within the existing codebase.
938    ///
939    /// This is the incremental analysis API for LSP:
940    /// 1. Removes old definitions from this file
941    /// 2. Re-runs Pass 1 (definition collection) on the new content
942    /// 3. Resolves any newly-collected `@psalm-import-type` declarations
943    /// 4. Re-runs Pass 2 (body analysis) on this file
944    /// 5. Returns the analysis result for this file only
945    pub fn re_analyze_file(&self, file_path: &str, new_content: &str) -> AnalysisResult {
946        // Fast path: content unchanged and cache has a valid entry — skip full re-analysis.
947        if let Some(cache) = &self.cache {
948            let h = hash_content(new_content);
949            if let Some((mut issues, ref_locs)) = cache.get(file_path, &h) {
950                let file: Arc<str> = Arc::from(file_path);
951                let guard = self.shared_db.salsa.read();
952                guard.replay_reference_locations(file, &ref_locs);
953                guard.commit_pending_to_maps();
954                self.apply_issue_suppressions(&mut issues);
955                return AnalysisResult::build(issues, HashMap::new(), Vec::new());
956            }
957        }
958
959        let file: Arc<str> = Arc::from(file_path);
960
961        {
962            let mut guard = self.shared_db.salsa.write();
963            guard.remove_file_definitions(file_path);
964        }
965
966        // --- Salsa-backed Pass 1: memoized parse + definition collection ------
967        let file_defs = {
968            let mut guard = self.shared_db.salsa.write();
969            let salsa_file = guard.upsert_source_file(file.clone(), Arc::from(new_content));
970            collect_file_definitions(&**guard, salsa_file)
971        };
972
973        let mut all_issues: Vec<Issue> = Arc::unwrap_or_clone(file_defs.issues.clone());
974
975        // --- S2 + Pass 2: hold the Salsa lock for ClassNode upserts and body
976        // analysis so the db reference is live during Pass 2 (S5).
977        let symbols = {
978            let mut guard = self.shared_db.salsa.write();
979
980            guard.ingest_stub_slice(&file_defs.slice);
981
982            // Resolve any newly-collected @psalm-import-type declarations so
983            // Pass 2 reads the imported aliases out of `type_aliases`.
984            // Re-parse in the arena so Pass 2 can walk the AST.
985            let arena = bumpalo::Bump::new();
986            let parsed = php_rs_parser::parse(&arena, new_content);
987
988            let has_hard_errors = parsed.errors.iter().any(crate::parser::is_hard_parse_error);
989            if !has_hard_errors {
990                let db_ref: &dyn MirDatabase = &**guard;
991                let driver = Pass2Driver::new(db_ref, self.resolved_php_version());
992                let (body_issues, symbols) = driver.analyze_bodies(
993                    &parsed.program,
994                    file.clone(),
995                    new_content,
996                    &parsed.source_map,
997                );
998                all_issues.extend(body_issues);
999                guard.commit_pending_to_maps();
1000                symbols
1001            } else {
1002                Vec::new()
1003            }
1004        };
1005
1006        if let Some(cache) = &self.cache {
1007            let h = hash_content(new_content);
1008            cache.evict_with_dependents(&[file_path.to_string()]);
1009            let db = self.snapshot_db();
1010            let ref_locs = extract_reference_locations(&db, &file);
1011            cache.put(file_path, h, all_issues.clone(), ref_locs);
1012        }
1013
1014        self.apply_issue_suppressions(&mut all_issues);
1015        AnalysisResult::build(all_issues, HashMap::new(), symbols)
1016    }
1017
1018    /// Analyze a PHP source string without a real file path.
1019    /// Useful for tests and LSP single-file mode.
1020    pub fn analyze_source(source: &str) -> AnalysisResult {
1021        let analyzer = ProjectAnalyzer::new();
1022        let file: Arc<str> = Arc::from("<source>");
1023        let mut db = MirDb::default();
1024        for slice in crate::stubs::builtin_stub_slices_for_version(analyzer.resolved_php_version())
1025        {
1026            db.ingest_stub_slice(&slice);
1027        }
1028        let salsa_file = SourceFile::new(&db, file.clone(), Arc::from(source));
1029        let file_defs = collect_file_definitions(&db, salsa_file);
1030        db.ingest_stub_slice(&file_defs.slice);
1031        let mut all_issues = Arc::unwrap_or_clone(file_defs.issues);
1032        if all_issues.iter().any(|issue| {
1033            matches!(issue.kind, mir_issues::IssueKind::ParseError { .. })
1034                && issue.severity == mir_issues::Severity::Error
1035        }) {
1036            analyzer.apply_issue_suppressions(&mut all_issues);
1037            return AnalysisResult::build(all_issues, std::collections::HashMap::new(), Vec::new());
1038        }
1039        let mut type_envs = std::collections::HashMap::new();
1040        let mut all_symbols = Vec::new();
1041        let arena = bumpalo::Bump::new();
1042        let result = php_rs_parser::parse(&arena, source);
1043
1044        let driver = Pass2Driver::new(&db, analyzer.resolved_php_version());
1045        all_issues.extend(driver.analyze_bodies_typed(
1046            &result.program,
1047            file.clone(),
1048            source,
1049            &result.source_map,
1050            &mut type_envs,
1051            &mut all_symbols,
1052        ));
1053        analyzer.apply_issue_suppressions(&mut all_issues);
1054        AnalysisResult::build(all_issues, type_envs, all_symbols)
1055    }
1056
1057    /// Discover all `.php` files under a directory, recursively.
1058    pub fn discover_files(root: &Path) -> Vec<PathBuf> {
1059        if root.is_file() {
1060            return vec![root.to_path_buf()];
1061        }
1062        let mut files = Vec::new();
1063        collect_php_files(root, &mut files);
1064        files
1065    }
1066
1067    /// Pass 1 only: collect type definitions from `paths` into the codebase without
1068    /// analyzing method bodies or emitting issues. Used to load vendor types.
1069    ///
1070    /// When [`Self::with_cache`] is enabled, per-file [`StubSlice`] results from
1071    /// previous runs are reused on a content-hash match, eliminating the
1072    /// parse + definition-collection step (which is ~95% of vendor wall-time
1073    /// on Laravel). Cache misses run the normal pipeline and write back so
1074    /// subsequent runs hit.
1075    ///
1076    /// [`StubSlice`]: mir_codebase::storage::StubSlice
1077    pub fn collect_types_only(&self, paths: &[PathBuf]) {
1078        let _timing = std::env::var("MIR_TIMING").is_ok();
1079        let _t0 = std::time::Instant::now();
1080
1081        let php_v = self.resolved_php_version().cache_byte();
1082
1083        // ---- Phase 1: read + try cache, in parallel ------------------------
1084        // Each entry carries either a ready-to-ingest cached slice, or the
1085        // source text + hash for the miss path that runs Pass 1.
1086        struct FileEntry {
1087            file: Arc<str>,
1088            src: Arc<str>,
1089            hash: [u8; 32],
1090            cached: Option<mir_codebase::storage::StubSlice>,
1091        }
1092        let entries: Vec<FileEntry> = paths
1093            .par_iter()
1094            .filter_map(|path| {
1095                let src = std::fs::read_to_string(path).ok()?;
1096                let file: Arc<str> = Arc::from(path.to_string_lossy().as_ref());
1097                let src: Arc<str> = Arc::from(src);
1098                let hash = hash_source(&src);
1099                let cached = self.shared_db.stub_cache.as_ref().and_then(|c| {
1100                    let mut slice = c.get(&file, &hash, php_v)?;
1101                    // Re-run dedup outside the serial ingest section so commit
1102                    // 3018a1d's parallel-dedup win is preserved on cache hits.
1103                    prepare_for_ingest(&mut slice);
1104                    Some(slice)
1105                });
1106                Some(FileEntry {
1107                    file,
1108                    src,
1109                    hash,
1110                    cached,
1111                })
1112            })
1113            .collect();
1114        let _t_read = _t0.elapsed();
1115
1116        // ---- Phase 2: register all SourceFile inputs in salsa --------------
1117        // Lazy-load (e.g. UndefinedClass → vendor file) may later query any of
1118        // these as a salsa input, so we register both hits and misses.
1119        let source_files: Vec<SourceFile> = {
1120            let mut guard = self.shared_db.salsa.write();
1121            entries
1122                .iter()
1123                .map(|e| guard.upsert_source_file(e.file.clone(), e.src.clone()))
1124                .collect()
1125        };
1126        let _t_reg = _t0.elapsed();
1127
1128        // ---- Phase 3: Pass 1 for misses, cache write-back, in parallel -----
1129        let db_pass1 = {
1130            let guard = self.shared_db.salsa.read();
1131            (**guard).clone()
1132        };
1133        let stub_cache = self.shared_db.stub_cache.clone();
1134        // `into_par_iter` so cached slices can be moved (not cloned) into the
1135        // result vec. Cloning 10k StubSlices on warm vendor would burn most
1136        // of the churn-reduction win the cache exists to produce.
1137        let prepared: Vec<mir_codebase::storage::StubSlice> = entries
1138            .into_par_iter()
1139            .zip(source_files.into_par_iter())
1140            .map_with(db_pass1, |db, (mut entry, salsa_file)| {
1141                if let Some(slice) = entry.cached.take() {
1142                    return slice;
1143                }
1144                let defs = collect_file_definitions_uncached(&*db, salsa_file);
1145                let slice = Arc::unwrap_or_clone(defs.slice);
1146                if let Some(cache) = stub_cache.as_ref() {
1147                    cache.put(&entry.file, &entry.hash, php_v, &slice);
1148                }
1149                slice
1150            })
1151            .collect();
1152        let _t_collect = _t0.elapsed();
1153
1154        // ---- Phase 4: serial ingest under the write lock -------------------
1155        let mut guard = self.shared_db.salsa.write();
1156        for slice in &prepared {
1157            guard.ingest_stub_slice(slice);
1158        }
1159        drop(guard);
1160        let _t_ingest = _t0.elapsed();
1161
1162        if _timing {
1163            let (hits, misses) = self.stub_cache_stats();
1164            eprintln!(
1165                "[vendor] read={:.0}ms reg={:.0}ms collect={:.0}ms ingest={:.0}ms total={:.0}ms (cache hits={hits} misses={misses})",
1166                _t_read.as_secs_f64() * 1000.0,
1167                (_t_reg - _t_read).as_secs_f64() * 1000.0,
1168                (_t_collect - _t_reg).as_secs_f64() * 1000.0,
1169                (_t_ingest - _t_collect).as_secs_f64() * 1000.0,
1170                _t_ingest.as_secs_f64() * 1000.0,
1171            );
1172        }
1173
1174        // Print profiling statistics for the collection phase.
1175        crate::collector::print_collector_stats();
1176    }
1177}
1178
1179impl Default for ProjectAnalyzer {
1180    fn default() -> Self {
1181        Self::new()
1182    }
1183}
1184
1185pub(crate) fn collect_php_files(dir: &Path, out: &mut Vec<PathBuf>) {
1186    if let Ok(entries) = std::fs::read_dir(dir) {
1187        for entry in entries.flatten() {
1188            if entry.file_type().map(|ft| ft.is_symlink()).unwrap_or(false) {
1189                continue;
1190            }
1191            let path = entry.path();
1192            if path.is_dir() {
1193                let name = path.file_name().and_then(|n| n.to_str()).unwrap_or("");
1194                if matches!(
1195                    name,
1196                    "vendor" | ".git" | "node_modules" | ".cache" | ".pnpm-store"
1197                ) {
1198                    continue;
1199                }
1200                collect_php_files(&path, out);
1201            } else if path.extension().and_then(|e| e.to_str()) == Some("php") {
1202                out.push(path);
1203            }
1204        }
1205    }
1206}
1207
1208// build_reverse_deps
1209
1210fn build_reverse_deps(db: &dyn crate::db::MirDatabase) -> HashMap<String, HashSet<String>> {
1211    let mut reverse: HashMap<String, HashSet<String>> = HashMap::new();
1212
1213    let mut add_edge = |symbol: &str, dependent_file: &str| {
1214        if let Some(defining_file) = db.symbol_defining_file(symbol) {
1215            let def = defining_file.as_ref().to_string();
1216            if def != dependent_file {
1217                reverse
1218                    .entry(def)
1219                    .or_default()
1220                    .insert(dependent_file.to_string());
1221            }
1222        }
1223    };
1224
1225    for (file, imports) in db.file_import_snapshots() {
1226        let file = file.as_ref().to_string();
1227        for fqcn in imports.values() {
1228            add_edge(fqcn, &file);
1229        }
1230    }
1231
1232    let extract_named_objects = |union: &mir_types::Union| {
1233        union
1234            .types
1235            .iter()
1236            .filter_map(|atomic| match atomic {
1237                mir_types::atomic::Atomic::TNamedObject { fqcn, .. } => Some(fqcn.clone()),
1238                _ => None,
1239            })
1240            .collect::<Vec<_>>()
1241    };
1242
1243    for fqcn in db.active_class_node_fqcns() {
1244        // Only true classes contribute class-direction edges in this loop.
1245        // Interface / trait / enum edges are not currently emitted here —
1246        // this function only ever read classes.
1247        let kind = match crate::db::class_kind_via_db(db, fqcn.as_ref()) {
1248            Some(k) if !k.is_interface && !k.is_trait && !k.is_enum => k,
1249            _ => continue,
1250        };
1251        let _ = kind;
1252        let Some(file) = db
1253            .symbol_defining_file(fqcn.as_ref())
1254            .map(|f| f.as_ref().to_string())
1255        else {
1256            continue;
1257        };
1258
1259        let Some(node) = db.lookup_class_node(fqcn.as_ref()) else {
1260            continue;
1261        };
1262        if let Some(parent) = node.parent(db) {
1263            add_edge(parent.as_ref(), &file);
1264        }
1265        for iface in node.interfaces(db).iter() {
1266            add_edge(iface.as_ref(), &file);
1267        }
1268        for tr in node.traits(db).iter() {
1269            add_edge(tr.as_ref(), &file);
1270        }
1271
1272        // Add types from properties
1273        for prop in db.class_own_properties(fqcn.as_ref()).iter() {
1274            if let Some(ty) = prop.ty(db) {
1275                for named in extract_named_objects(&ty) {
1276                    add_edge(named.as_ref(), &file);
1277                }
1278            }
1279        }
1280
1281        // Add types from methods
1282        for method in db.class_own_methods(fqcn.as_ref()).iter() {
1283            // Parameter types
1284            for param in method.params(db).iter() {
1285                if let Some(ty) = &param.ty {
1286                    for named in extract_named_objects(ty.as_ref()) {
1287                        add_edge(named.as_ref(), &file);
1288                    }
1289                }
1290            }
1291            // Return type
1292            if let Some(rt) = method.return_type(db) {
1293                for named in extract_named_objects(rt.as_ref()) {
1294                    add_edge(named.as_ref(), &file);
1295                }
1296            }
1297        }
1298    }
1299
1300    // Add types from global functions
1301    for fqn in db.active_function_node_fqns() {
1302        let Some(node) = db.lookup_function_node(fqn.as_ref()) else {
1303            continue;
1304        };
1305        let Some(file) = db
1306            .symbol_defining_file(fqn.as_ref())
1307            .map(|f| f.as_ref().to_string())
1308        else {
1309            continue;
1310        };
1311
1312        // Parameter types
1313        for param in node.params(db).iter() {
1314            if let Some(ty) = &param.ty {
1315                for named in extract_named_objects(ty.as_ref()) {
1316                    add_edge(named.as_ref(), &file);
1317                }
1318            }
1319        }
1320        // Return type
1321        if let Some(rt) = node.return_type(db) {
1322            for named in extract_named_objects(rt.as_ref()) {
1323                add_edge(named.as_ref(), &file);
1324            }
1325        }
1326    }
1327
1328    // Also wire in bare-FQN references from Pass 2 (new \Foo(), \Foo::method(), \foo())
1329    // that do not appear in use-import statements.
1330    for (ref_file, symbol_key) in db.all_reference_location_pairs() {
1331        let file_str = ref_file.as_ref().to_string();
1332        let lookup: &str = match symbol_key.split_once("::") {
1333            Some((class, _)) => class,
1334            None => &symbol_key,
1335        };
1336        add_edge(lookup, &file_str);
1337    }
1338
1339    reverse
1340}
1341
1342fn extract_reference_locations(
1343    db: &dyn crate::db::MirDatabase,
1344    file: &Arc<str>,
1345) -> Vec<(String, u32, u16, u16)> {
1346    db.extract_file_reference_locations(file.as_ref())
1347        .into_iter()
1348        .map(|(sym, line, col_start, col_end)| (sym.to_string(), line, col_start, col_end))
1349        .collect()
1350}
1351
1352pub struct AnalysisResult {
1353    pub issues: Vec<Issue>,
1354    #[doc(hidden)]
1355    pub type_envs: std::collections::HashMap<crate::type_env::ScopeId, crate::type_env::TypeEnv>,
1356    /// Per-expression resolved symbols from Pass 2, sorted by file path.
1357    pub symbols: Vec<crate::symbol::ResolvedSymbol>,
1358    /// Maps each file path to the contiguous range within `symbols` that belongs
1359    /// to it. Built once after analysis; allows `symbol_at` to scan only the
1360    /// relevant file's slice rather than the entire codebase-wide vector.
1361    symbols_by_file: HashMap<Arc<str>, std::ops::Range<usize>>,
1362}
1363
1364impl AnalysisResult {
1365    fn build(
1366        issues: Vec<Issue>,
1367        type_envs: std::collections::HashMap<crate::type_env::ScopeId, crate::type_env::TypeEnv>,
1368        mut symbols: Vec<crate::symbol::ResolvedSymbol>,
1369    ) -> Self {
1370        symbols.sort_unstable_by(|a, b| a.file.as_ref().cmp(b.file.as_ref()));
1371        let mut symbols_by_file: HashMap<Arc<str>, std::ops::Range<usize>> = HashMap::new();
1372        let mut i = 0;
1373        while i < symbols.len() {
1374            let file = Arc::clone(&symbols[i].file);
1375            let start = i;
1376            while i < symbols.len() && symbols[i].file == file {
1377                i += 1;
1378            }
1379            symbols_by_file.insert(file, start..i);
1380        }
1381        Self {
1382            issues,
1383            type_envs,
1384            symbols,
1385            symbols_by_file,
1386        }
1387    }
1388}
1389
1390impl AnalysisResult {
1391    pub fn error_count(&self) -> usize {
1392        self.issues
1393            .iter()
1394            .filter(|i| i.severity == mir_issues::Severity::Error)
1395            .count()
1396    }
1397
1398    pub fn warning_count(&self) -> usize {
1399        self.issues
1400            .iter()
1401            .filter(|i| i.severity == mir_issues::Severity::Warning)
1402            .count()
1403    }
1404
1405    /// Group issues by source file.
1406    pub fn issues_by_file(&self) -> HashMap<std::sync::Arc<str>, Vec<&Issue>> {
1407        let mut map: HashMap<std::sync::Arc<str>, Vec<&Issue>> = HashMap::new();
1408        for issue in &self.issues {
1409            map.entry(issue.location.file.clone())
1410                .or_default()
1411                .push(issue);
1412        }
1413        map
1414    }
1415
1416    /// Count issues by severity. Returned as `(severity, count)` pairs sorted
1417    /// by severity (Info, Warning, Error).
1418    pub fn count_by_severity(&self) -> Vec<(mir_issues::Severity, usize)> {
1419        let mut counts: std::collections::BTreeMap<mir_issues::Severity, usize> =
1420            std::collections::BTreeMap::new();
1421        for issue in &self.issues {
1422            *counts.entry(issue.severity).or_insert(0) += 1;
1423        }
1424        counts.into_iter().collect()
1425    }
1426
1427    /// Total number of issues across all severities and files.
1428    pub fn total_issue_count(&self) -> usize {
1429        self.issues.len()
1430    }
1431
1432    /// Iterator of issues matching `predicate`. Useful for filtering by
1433    /// severity, kind, or file without materializing intermediate vectors.
1434    pub fn filter_issues<'a, F>(&'a self, predicate: F) -> impl Iterator<Item = &'a Issue>
1435    where
1436        F: Fn(&Issue) -> bool + 'a,
1437    {
1438        self.issues.iter().filter(move |i| predicate(i))
1439    }
1440
1441    /// Return the innermost resolved symbol whose span contains `byte_offset`
1442    /// in `file`, or `None` if no symbol was recorded at that position.
1443    pub fn symbol_at(
1444        &self,
1445        file: &str,
1446        byte_offset: u32,
1447    ) -> Option<&crate::symbol::ResolvedSymbol> {
1448        let range = self.symbols_by_file.get(file)?;
1449        self.symbols[range.clone()]
1450            .iter()
1451            .filter(|s| s.span.start <= byte_offset && byte_offset < s.span.end)
1452            .min_by_key(|s| s.span.end - s.span.start)
1453    }
1454}