Skip to main content

mir_analyzer/
project.rs

1/// Project-level orchestration: file discovery, pass 1, pass 2.
2use std::mem::ManuallyDrop;
3use std::path::{Path, PathBuf};
4use std::sync::Arc;
5
6use rayon::prelude::*;
7
8use std::collections::{HashMap, HashSet};
9
10use crate::cache::{hash_content, AnalysisCache};
11use crate::db::{
12    collect_file_definitions, collect_file_definitions_uncached, FileDefinitions, MirDatabase,
13    MirDb, RefLoc, SourceFile,
14};
15use crate::pass2::{InferredTypes, Pass2Driver};
16use crate::php_version::PhpVersion;
17use crate::shared_db::SharedDb;
18use crate::stub_cache::{hash_source, prepare_for_ingest};
19use mir_issues::Issue;
20
21pub(crate) use crate::pass2::merge_return_types;
22
23/// Batch-oriented analyzer: file discovery, parsing, and analysis.
24///
25/// ProjectAnalyzer is the primary entry point for analyzing a project as a whole.
26/// It orchestrates parallel file discovery and parsing, using the same core
27/// analysis engine as [`AnalysisSession`] (salsa database and Pass 2 driver).
28///
29/// **Unified Design:** ProjectAnalyzer and `AnalysisSession` now share the same
30/// database management via [`SharedDb`]. ProjectAnalyzer is the batch API
31/// (all files at once), while `AnalysisSession` is the incremental API (file-by-file).
32/// Both use `Pass2Driver`, the same definition collection logic, and identical
33/// database operations, eliminating code duplication.
34///
35/// [`AnalysisSession`]: crate::session::AnalysisSession
36pub struct ProjectAnalyzer {
37    /// Shared database management (salsa, file registry, stub tracking).
38    /// Extracted to allow code sharing with AnalysisSession.
39    shared_db: Arc<SharedDb>,
40    /// Optional cache — when `Some`, Pass 2 results are read/written per file.
41    cache: Option<AnalysisCache>,
42    /// Called once after each file completes Pass 2 (used for progress reporting).
43    pub on_file_done: Option<Arc<dyn Fn() + Send + Sync>>,
44    /// PSR-4 autoloader mapping from composer.json, if available.
45    pub psr4: Option<Arc<crate::composer::Psr4Map>>,
46    /// When true, run dead code detection at the end of analysis.
47    pub find_dead_code: bool,
48    /// Target PHP language version. `None` means "not configured"; resolved to
49    /// `PhpVersion::LATEST` when passed down to `StatementsAnalyzer`.
50    pub php_version: Option<PhpVersion>,
51    /// Additional stub files to parse before analysis (absolute paths).
52    pub stub_files: Vec<PathBuf>,
53    /// Additional stub directories to walk and parse before analysis (absolute paths).
54    pub stub_dirs: Vec<PathBuf>,
55}
56
57struct ParsedProjectFile {
58    file: Arc<str>,
59    source: Arc<str>,
60    parsed: ManuallyDrop<php_rs_parser::ParseResult<'static, 'static>>,
61    arena: ManuallyDrop<Box<bumpalo::Bump>>,
62}
63
64impl ParsedProjectFile {
65    fn new(file: Arc<str>, source: Arc<str>) -> Self {
66        let arena = Box::new(crate::arena::create_parse_arena(source.len()));
67        let parsed = php_rs_parser::parse(&arena, &source);
68        // SAFETY: `parsed` borrows from `arena` and `source`, both owned by this
69        // struct and kept alive until `Drop`. `Drop` manually destroys `parsed`
70        // before releasing either owner, so the widened lifetimes never escape.
71        let parsed = unsafe {
72            std::mem::transmute::<
73                php_rs_parser::ParseResult<'_, '_>,
74                php_rs_parser::ParseResult<'static, 'static>,
75            >(parsed)
76        };
77        Self {
78            file,
79            source,
80            parsed: ManuallyDrop::new(parsed),
81            arena: ManuallyDrop::new(arena),
82        }
83    }
84
85    fn source(&self) -> &str {
86        self.source.as_ref()
87    }
88
89    fn parsed(&self) -> &php_rs_parser::ParseResult<'_, '_> {
90        &self.parsed
91    }
92}
93
94impl Drop for ParsedProjectFile {
95    fn drop(&mut self) {
96        unsafe {
97            ManuallyDrop::drop(&mut self.parsed);
98            ManuallyDrop::drop(&mut self.arena);
99        }
100    }
101}
102
103// SAFETY: after construction the parsed AST and source map are read-only. The
104// bump arena is never mutated again; it only owns backing storage for AST nodes
105// and is dropped after all parallel analysis has completed.
106unsafe impl Send for ParsedProjectFile {}
107unsafe impl Sync for ParsedProjectFile {}
108
109impl ProjectAnalyzer {
110    pub fn new() -> Self {
111        Self {
112            shared_db: Arc::new(SharedDb::new()),
113            cache: None,
114            on_file_done: None,
115            psr4: None,
116            find_dead_code: false,
117            php_version: None,
118            stub_files: Vec::new(),
119            stub_dirs: Vec::new(),
120        }
121    }
122
123    /// Create a `ProjectAnalyzer` with a disk-backed cache stored under `cache_dir`.
124    pub fn with_cache(cache_dir: &Path) -> Self {
125        Self {
126            shared_db: Arc::new(SharedDb::new().with_cache_dir(cache_dir)),
127            cache: Some(AnalysisCache::open(cache_dir)),
128            on_file_done: None,
129            psr4: None,
130            find_dead_code: false,
131            php_version: None,
132            stub_files: Vec::new(),
133            stub_dirs: Vec::new(),
134        }
135    }
136
137    /// Enable the disk-backed cache for an already-constructed analyzer.
138    pub fn set_cache_dir(&mut self, cache_dir: &Path) {
139        // Rebuild SharedDb to attach the Pass-1 stub cache. Must be called
140        // before any file is ingested — a previously-populated SharedDb's
141        // state would be silently discarded here, which is almost certainly
142        // a caller bug rather than the intended behavior.
143        debug_assert_eq!(
144            self.shared_db.source_file_count(),
145            0,
146            "ProjectAnalyzer::set_cache_dir must be called before any file is ingested"
147        );
148        self.shared_db = Arc::new(SharedDb::new().with_cache_dir(cache_dir));
149        self.cache = Some(AnalysisCache::open(cache_dir));
150    }
151
152    /// Create a `ProjectAnalyzer` from a project root containing `composer.json`.
153    /// Returns the analyzer (with `psr4` set) and the `Psr4Map` so callers can
154    /// call `map.project_files()` / `map.vendor_files()`.
155    pub fn from_composer(
156        root: &Path,
157    ) -> Result<(Self, crate::composer::Psr4Map), crate::composer::ComposerError> {
158        let map = crate::composer::Psr4Map::from_composer(root)?;
159        let psr4 = Arc::new(map.clone());
160        let analyzer = Self {
161            shared_db: Arc::new(SharedDb::new()),
162            cache: None,
163            on_file_done: None,
164            psr4: Some(psr4),
165            find_dead_code: false,
166            php_version: None,
167            stub_files: Vec::new(),
168            stub_dirs: Vec::new(),
169        };
170        Ok((analyzer, map))
171    }
172
173    /// Builder method: set the target PHP version.
174    pub fn with_php_version(mut self, version: PhpVersion) -> Self {
175        self.php_version = Some(version);
176        self
177    }
178
179    /// Builder method: enable dead-code detection at the end of analysis.
180    pub fn with_dead_code(mut self, enabled: bool) -> Self {
181        self.find_dead_code = enabled;
182        self
183    }
184
185    /// Builder method: set a progress callback invoked once per analyzed file.
186    pub fn with_progress_callback(mut self, callback: Arc<dyn Fn() + Send + Sync>) -> Self {
187        self.on_file_done = Some(callback);
188        self
189    }
190
191    /// Builder method: add user stub files.
192    pub fn with_stub_files(mut self, files: Vec<PathBuf>) -> Self {
193        self.stub_files = files;
194        self
195    }
196
197    /// Builder method: add user stub directories.
198    pub fn with_stub_dirs(mut self, dirs: Vec<PathBuf>) -> Self {
199        self.stub_dirs = dirs;
200        self
201    }
202
203    /// Builder method: configure a disk-backed cache at the given directory.
204    pub fn with_cache_dir(mut self, cache_dir: &Path) -> Self {
205        debug_assert_eq!(
206            self.shared_db.source_file_count(),
207            0,
208            "ProjectAnalyzer::with_cache_dir must be called before any file is ingested"
209        );
210        self.shared_db = Arc::new(SharedDb::new().with_cache_dir(cache_dir));
211        self.cache = Some(AnalysisCache::open(cache_dir));
212        self
213    }
214
215    /// Builder method: attach a PSR-4 autoloader map.
216    pub fn with_psr4(mut self, map: Arc<crate::composer::Psr4Map>) -> Self {
217        self.psr4 = Some(map);
218        self
219    }
220
221    /// Resolve the configured PHP version, defaulting to `PhpVersion::LATEST`
222    /// when none has been set.
223    fn resolved_php_version(&self) -> PhpVersion {
224        self.php_version.unwrap_or(PhpVersion::LATEST)
225    }
226
227    /// Cumulative hit / miss counts on the persistent Pass-1 cache attached
228    /// to this analyzer. `(0, 0)` when no cache is configured. Used by
229    /// integration tests and benchmarks to assert the cache actually fires.
230    #[doc(hidden)]
231    pub fn stub_cache_stats(&self) -> (u64, u64) {
232        match self.shared_db.stub_cache.as_deref() {
233            Some(c) => (c.hits(), c.misses()),
234            None => (0, 0),
235        }
236    }
237
238    fn type_exists(&self, fqcn: &str) -> bool {
239        let db = self.snapshot_db();
240        crate::db::type_exists_via_db(&db, fqcn)
241    }
242
243    /// Returns `true` if a function with `fqn` is registered and active.
244    pub fn contains_function(&self, fqn: &str) -> bool {
245        let db = self.snapshot_db();
246        db.lookup_function_node(fqn).is_some_and(|n| n.active(&db))
247    }
248
249    /// Returns `true` if a class / interface / trait / enum is registered.
250    pub fn contains_class(&self, fqcn: &str) -> bool {
251        let db = self.snapshot_db();
252        db.lookup_class_node(fqcn).is_some_and(|n| n.active(&db))
253    }
254
255    /// Returns `true` if `class` has a method named `name` (case-insensitive).
256    pub fn contains_method(&self, class: &str, name: &str) -> bool {
257        let db = self.snapshot_db();
258        let name_lower = name.to_ascii_lowercase();
259        db.lookup_method_node(class, &name_lower)
260            .is_some_and(|n| n.active(&db))
261    }
262
263    /// Acquire a cheap clone of the salsa db for a read-only query.
264    /// The lock is held only for the duration of the clone, so concurrent
265    /// readers never serialize on each other or on writes longer than the
266    /// clone itself.
267    fn snapshot_db(&self) -> MirDb {
268        self.shared_db.snapshot_db()
269    }
270
271    /// Internal: expose the salsa db for unit tests that need a `&dyn MirDatabase`.
272    #[doc(hidden)]
273    pub fn salsa_db_for_test(&self) -> parking_lot::MappedRwLockWriteGuard<'_, MirDb> {
274        let guard = self.shared_db.salsa.write();
275        parking_lot::RwLockWriteGuard::map(guard, |rw| &mut **rw)
276    }
277
278    /// Legacy: look up the source location of a class member by name.
279    ///
280    /// Prefer [`Self::definition_of`] with [`crate::Symbol::method`] etc.
281    #[doc(hidden)]
282    pub fn member_location(
283        &self,
284        fqcn: &str,
285        member_name: &str,
286    ) -> Option<mir_codebase::storage::Location> {
287        let db = self.snapshot_db();
288        crate::db::member_location_via_db(&db, fqcn, member_name)
289    }
290
291    /// Legacy: look up a top-level symbol location.
292    ///
293    /// Prefer [`Self::definition_of`] with [`crate::Symbol`].
294    #[doc(hidden)]
295    pub fn symbol_location(&self, symbol: &str) -> Option<mir_codebase::storage::Location> {
296        let db = self.snapshot_db();
297        db.lookup_class_node(symbol)
298            .filter(|n| n.active(&db))
299            .and_then(|n| n.location(&db))
300            .or_else(|| {
301                db.lookup_function_node(symbol)
302                    .filter(|n| n.active(&db))
303                    .and_then(|n| n.location(&db))
304            })
305    }
306
307    /// Legacy: raw reference locations as `(file, line, col_start, col_end)`.
308    ///
309    /// Prefer [`Self::references_to`] which returns `(Arc<str>, Range)` pairs
310    /// and takes a strongly-typed [`crate::Symbol`].
311    #[doc(hidden)]
312    pub fn reference_locations(&self, symbol: &str) -> Vec<(Arc<str>, u32, u16, u16)> {
313        let db = self.snapshot_db();
314        db.reference_locations(symbol)
315    }
316
317    /// Resolve a symbol to its declaration location.
318    ///
319    /// Mirrors [`crate::AnalysisSession::definition_of`].
320    pub fn definition_of(
321        &self,
322        symbol: &crate::Symbol,
323    ) -> Result<mir_codebase::storage::Location, crate::SymbolLookupError> {
324        let db = self.snapshot_db();
325        match symbol {
326            crate::Symbol::Class(fqcn) => {
327                let node = db
328                    .lookup_class_node(fqcn.as_ref())
329                    .filter(|n| n.active(&db))
330                    .ok_or(crate::SymbolLookupError::NotFound)?;
331                node.location(&db)
332                    .ok_or(crate::SymbolLookupError::NoSourceLocation)
333            }
334            crate::Symbol::Function(fqn) => {
335                let node = db
336                    .lookup_function_node(fqn.as_ref())
337                    .filter(|n| n.active(&db))
338                    .ok_or(crate::SymbolLookupError::NotFound)?;
339                node.location(&db)
340                    .ok_or(crate::SymbolLookupError::NoSourceLocation)
341            }
342            crate::Symbol::Method { class, name }
343            | crate::Symbol::Property { class, name }
344            | crate::Symbol::ClassConstant { class, name } => {
345                crate::db::member_location_via_db(&db, class, name)
346                    .ok_or(crate::SymbolLookupError::NotFound)
347            }
348            crate::Symbol::GlobalConstant(_) => Err(crate::SymbolLookupError::NoSourceLocation),
349        }
350    }
351
352    /// All recorded references to a symbol, as `(file, range)` pairs.
353    ///
354    /// Mirrors [`crate::AnalysisSession::references_to`].
355    pub fn references_to(&self, symbol: &crate::Symbol) -> Vec<(Arc<str>, crate::Range)> {
356        let db = self.snapshot_db();
357        let key = symbol.codebase_key();
358        db.reference_locations(&key)
359            .into_iter()
360            .map(|(file, line, col_start, col_end)| {
361                let range = crate::Range {
362                    start: crate::Position {
363                        line,
364                        column: col_start as u32,
365                    },
366                    end: crate::Position {
367                        line,
368                        column: col_end as u32,
369                    },
370                };
371                (file, range)
372            })
373            .collect()
374    }
375
376    /// Load PHP built-in stubs. Called automatically by `analyze` if not done yet.
377    /// Stubs are filtered against the configured target PHP version (or
378    /// `PhpVersion::LATEST` if none was set).
379    pub fn load_stubs(&self) {
380        let php_version = self.resolved_php_version();
381
382        // Load all built-in stubs for the configured PHP version
383        let paths: Vec<&'static str> = crate::stubs::stub_files().iter().map(|&(p, _)| p).collect();
384        self.shared_db.ingest_stub_paths(&paths, php_version);
385
386        // Load user-configured stubs
387        self.shared_db
388            .ingest_user_stubs(&self.stub_files, &self.stub_dirs);
389    }
390
391    fn collect_and_ingest_source(&self, file: Arc<str>, src: &str) -> FileDefinitions {
392        self.shared_db
393            .collect_and_ingest_file(file, src, self.resolved_php_version())
394    }
395
396    /// Run the full analysis pipeline on a set of file paths.
397    pub fn analyze(&self, paths: &[PathBuf]) -> AnalysisResult {
398        let mut all_issues = Vec::new();
399        let _t0 = std::time::Instant::now();
400
401        // ---- Load PHP built-in stubs (before Pass 1 so user code can override)
402        self.load_stubs();
403        let _t_stubs = _t0.elapsed();
404
405        // ---- Pass 1: read files in parallel ----------------------------------
406        let parsed_files: Vec<ParsedProjectFile> = paths
407            .par_iter()
408            .filter_map(|path| match std::fs::read_to_string(path) {
409                Ok(src) => {
410                    let file = Arc::from(path.to_string_lossy().as_ref());
411                    Some(ParsedProjectFile::new(file, Arc::from(src)))
412                }
413                Err(e) => {
414                    eprintln!("Cannot read {}: {}", path.display(), e);
415                    None
416                }
417            })
418            .collect();
419        let _t_read = _t0.elapsed();
420
421        let file_data: Vec<(Arc<str>, Arc<str>)> = parsed_files
422            .iter()
423            .map(|parsed| (parsed.file.clone(), parsed.source.clone()))
424            .collect();
425
426        // ---- Pre-Pass-2 invalidation: evict dependents of changed files ------
427        if let Some(cache) = &self.cache {
428            let changed: Vec<String> = file_data
429                .par_iter()
430                .filter_map(|(f, src)| {
431                    let h = hash_content(src.as_ref());
432                    if cache.get(f, &h).is_none() {
433                        Some(f.to_string())
434                    } else {
435                        None
436                    }
437                })
438                .collect();
439            if !changed.is_empty() {
440                cache.evict_with_dependents(&changed);
441            }
442        }
443
444        // ---- Register Salsa source inputs for incremental follow-up calls ----
445        {
446            let mut guard = self.shared_db.salsa.write();
447            for parsed in &parsed_files {
448                guard.upsert_source_file(parsed.file.clone(), parsed.source.clone());
449            }
450        }
451        let _t_salsa_reg = _t0.elapsed();
452
453        // ---- Pass 1: definition collection from the already-parsed AST -------
454        let file_defs: Vec<FileDefinitions> = parsed_files
455            .par_iter()
456            .map(|parsed| {
457                let parse_result = parsed.parsed();
458                let mut all_issues: Vec<Issue> = parse_result
459                    .errors
460                    .iter()
461                    .map(|err| {
462                        Issue::new(
463                            mir_issues::IssueKind::ParseError {
464                                message: err.to_string(),
465                            },
466                            mir_issues::Location {
467                                file: parsed.file.clone(),
468                                line: 1,
469                                line_end: 1,
470                                col_start: 0,
471                                col_end: 0,
472                            },
473                        )
474                    })
475                    .collect();
476                let collector = crate::collector::DefinitionCollector::new_for_slice(
477                    parsed.file.clone(),
478                    parsed.source(),
479                    &parse_result.source_map,
480                );
481                let (mut slice, collector_issues) = collector.collect_slice(&parse_result.program);
482                all_issues.extend(collector_issues);
483                mir_codebase::storage::deduplicate_params_in_slice(&mut slice);
484                FileDefinitions {
485                    slice: Arc::new(slice),
486                    issues: Arc::new(all_issues),
487                }
488            })
489            .collect();
490        let _t_pass1 = _t0.elapsed();
491
492        let mut files_with_parse_errors: std::collections::HashSet<Arc<str>> =
493            std::collections::HashSet::new();
494        {
495            let mut guard = self.shared_db.salsa.write();
496            for defs in file_defs {
497                for issue in defs.issues.iter() {
498                    if matches!(issue.kind, mir_issues::IssueKind::ParseError { .. }) {
499                        files_with_parse_errors.insert(issue.location.file.clone());
500                    }
501                }
502                guard.ingest_stub_slice(&defs.slice);
503                all_issues.extend(Arc::unwrap_or_clone(defs.issues));
504            }
505        }
506        let _t_ingest = _t0.elapsed();
507
508        // ---- Lazy-load unknown classes via PSR-4 (issue #50) ----------------
509        if let Some(psr4) = &self.psr4 {
510            self.lazy_load_missing_classes(psr4.clone(), &mut all_issues);
511        }
512
513        // ---- Resolve @psalm-import-type declarations now that all Pass 1
514        // classes (including their `type_aliases`) are populated.
515        // ---- Build reverse dep graph and persist it for the next run ---------
516        if let Some(cache) = &self.cache {
517            let db_snapshot = {
518                let guard = self.shared_db.salsa.read();
519                (**guard).clone()
520            };
521            let rev = build_reverse_deps(&db_snapshot);
522            cache.set_reverse_deps(rev);
523        }
524
525        // ---- Class-level checks (M11) ----------------------------------------
526        let analyzed_file_set: std::collections::HashSet<std::sync::Arc<str>> =
527            file_data.iter().map(|(f, _)| f.clone()).collect();
528        {
529            let class_db = {
530                let guard = self.shared_db.salsa.read();
531                (**guard).clone()
532            };
533            let class_issues =
534                crate::class::ClassAnalyzer::with_files(&class_db, analyzed_file_set, &file_data)
535                    .analyze_all();
536            all_issues.extend(class_issues);
537        }
538
539        // ---- Inference pre-sweep: prime inferred return types ----------------
540        // Run an inference-only Pass 2 over each file in parallel using direct
541        // rayon (no Salsa tracked-query overhead per file), collect the results,
542        // then commit them to Salsa INPUT fields.  The full Pass 2 then reads
543        // those fields via O(1) accesses with no lock contention.
544        //
545        // We use `Pass2Driver::new_inference_only` directly rather than the
546        // Salsa-tracked `infer_file_return_types` query so that the batch path
547        // avoids per-file Salsa lock acquisition and memo-table overhead on every
548        // cold start.  `infer_file_return_types` is reserved for the incremental
549        // LSP path (AnalysisSession) where Salsa cache hits across edits matter.
550        //
551        // `map_with` clones `db_priming` once per rayon worker thread (not once
552        // per file as the old `in_place_scope` loop did). For N files on T threads
553        // this reduces clones from N to T.  Results are returned by value and
554        // flattened after `collect()`, replacing the Arc<Mutex<Vec>> accumulator.
555        // All per-thread db clones are dropped when `collect()` returns, so
556        // `commit_inferred_return_types` (which calls Salsa setters that wait for
557        // strong_count == 1) cannot deadlock.
558        {
559            let db_priming = {
560                let guard = self.shared_db.salsa.read();
561                (**guard).clone()
562            };
563            let php_version = self.resolved_php_version();
564            let all_inferred: Vec<InferredTypes> = parsed_files
565                .par_iter()
566                .filter(|parsed| !files_with_parse_errors.contains(&parsed.file))
567                .map_with(db_priming, |db, parsed| {
568                    let driver = Pass2Driver::new_inference_only(
569                        db as &dyn crate::db::MirDatabase,
570                        php_version,
571                    );
572                    let parse_result = parsed.parsed();
573                    driver.analyze_bodies(
574                        &parse_result.program,
575                        parsed.file.clone(),
576                        parsed.source(),
577                        &parse_result.source_map,
578                    );
579                    driver.take_inferred_types()
580                })
581                .collect();
582            // db_priming is consumed by map_with; per-thread clones dropped by collect().
583            let mut functions = Vec::new();
584            let mut methods = Vec::new();
585            for inferred in all_inferred {
586                functions.extend(inferred.functions);
587                methods.extend(inferred.methods);
588            }
589            let mut guard = self.shared_db.salsa.write();
590            guard.commit_inferred_return_types(functions, methods);
591        }
592        let _t_presweep = _t0.elapsed();
593
594        let db_main = {
595            let guard = self.shared_db.salsa.read();
596            (**guard).clone()
597        };
598
599        // ---- Pass 2: analyze function/method bodies in parallel (M14) --------
600        // Each worker db clone has its own `pending_ref_locs` buffer (custom
601        // Clone returns empty).  Workers push reference locations there instead
602        // of into the shared Arc<Mutex<...>> maps, eliminating cross-thread
603        // contention.  After collect() we commit all batches serially in a
604        // single lock acquisition per map.
605        let pass2_results: Vec<(Vec<Issue>, Vec<crate::symbol::ResolvedSymbol>, Vec<RefLoc>)> =
606            parsed_files
607                .par_iter()
608                .filter(|parsed| !files_with_parse_errors.contains(&parsed.file))
609                .map_with(db_main, |db, parsed| {
610                    let driver =
611                        Pass2Driver::new(&*db as &dyn MirDatabase, self.resolved_php_version());
612                    let (issues, symbols) = if let Some(cache) = &self.cache {
613                        let h = hash_content(parsed.source());
614                        if let Some((cached_issues, ref_locs)) = cache.get(&parsed.file, &h) {
615                            db.replay_reference_locations(parsed.file.clone(), &ref_locs);
616                            (cached_issues, Vec::new())
617                        } else {
618                            let parse_result = parsed.parsed();
619                            let (issues, symbols) = driver.analyze_bodies(
620                                &parse_result.program,
621                                parsed.file.clone(),
622                                parsed.source(),
623                                &parse_result.source_map,
624                            );
625                            let pending = db.take_pending_ref_locs();
626                            let cache_locs = pending
627                                .iter()
628                                .map(|r| (r.symbol_key.to_string(), r.line, r.col_start, r.col_end))
629                                .collect();
630                            cache.put(&parsed.file, h, issues.clone(), cache_locs);
631                            if let Some(cb) = &self.on_file_done {
632                                cb();
633                            }
634                            return (issues, symbols, pending);
635                        }
636                    } else {
637                        let parse_result = parsed.parsed();
638                        driver.analyze_bodies(
639                            &parse_result.program,
640                            parsed.file.clone(),
641                            parsed.source(),
642                            &parse_result.source_map,
643                        )
644                    };
645                    let pending = db.take_pending_ref_locs();
646                    if let Some(cb) = &self.on_file_done {
647                        cb();
648                    }
649                    (issues, symbols, pending)
650                })
651                .collect();
652
653        let _t_pass2 = _t0.elapsed();
654
655        // Serial commit: one lock acquisition per map for all files combined.
656        let mut all_ref_locs: Vec<RefLoc> = Vec::new();
657        let mut all_symbols = Vec::new();
658        for (issues, symbols, ref_locs) in pass2_results {
659            all_issues.extend(issues);
660            all_symbols.extend(symbols);
661            all_ref_locs.extend(ref_locs);
662        }
663        {
664            let guard = self.shared_db.salsa.read();
665            guard.commit_reference_locations_batch(all_ref_locs);
666        }
667
668        // ---- Post-Pass-2 lazy loading: FQCNs used without `use` imports ------
669        // FQCNs in function/method bodies aren't visible until Pass 2 runs, so
670        // the pre-Pass-2 lazy load misses them.  We collect UndefinedClass names,
671        // resolve them via PSR-4, load those files, re-finalize, then re-analyze
672        // only the affected files to clear the false positives.
673        if let Some(psr4) = &self.psr4 {
674            self.lazy_load_from_body_issues(
675                psr4.clone(),
676                &file_data,
677                &files_with_parse_errors,
678                &mut all_issues,
679                &mut all_symbols,
680            );
681        }
682
683        // Persist cache hits/misses to disk
684        if let Some(cache) = &self.cache {
685            cache.flush();
686        }
687
688        // ---- Compact the reference index ------------------------------------
689        // ---- Dead-code detection (M18) --------------------------------------
690        if self.find_dead_code {
691            let salsa = self.snapshot_db();
692            let dead_code_issues = crate::dead_code::DeadCodeAnalyzer::new(&salsa).analyze();
693            all_issues.extend(dead_code_issues);
694        }
695
696        let _t_total = _t0.elapsed();
697        if std::env::var("MIR_TIMING").is_ok() {
698            eprintln!(
699                "[timing] stubs={:.0}ms read={:.0}ms salsa_reg={:.0}ms pass1={:.0}ms ingest={:.0}ms presweep={:.0}ms pass2={:.0}ms total={:.0}ms",
700                _t_stubs.as_secs_f64() * 1000.0,
701                (_t_read - _t_stubs).as_secs_f64() * 1000.0,
702                (_t_salsa_reg - _t_read).as_secs_f64() * 1000.0,
703                (_t_pass1 - _t_salsa_reg).as_secs_f64() * 1000.0,
704                (_t_ingest - _t_pass1).as_secs_f64() * 1000.0,
705                (_t_presweep - _t_ingest).as_secs_f64() * 1000.0,
706                (_t_pass2 - _t_presweep).as_secs_f64() * 1000.0,
707                _t_total.as_secs_f64() * 1000.0,
708            );
709        }
710
711        AnalysisResult::build(all_issues, std::collections::HashMap::new(), all_symbols)
712    }
713
714    fn lazy_load_missing_classes(
715        &self,
716        psr4: Arc<crate::composer::Psr4Map>,
717        all_issues: &mut Vec<Issue>,
718    ) {
719        use std::collections::HashSet;
720        use std::sync::Arc;
721
722        let max_depth = 10;
723        let mut loaded: HashSet<String> = HashSet::new();
724        let mut scanned: HashSet<Arc<str>> = HashSet::new();
725
726        for _ in 0..max_depth {
727            let mut to_load: Vec<(String, PathBuf)> = Vec::new();
728
729            let mut try_queue = |fqcn: &str| {
730                if !self.type_exists(fqcn) && !loaded.contains(fqcn) {
731                    if let Some(path) = psr4.resolve(fqcn) {
732                        to_load.push((fqcn.to_string(), path));
733                    }
734                }
735            };
736
737            // Collect inheritance and import candidates. Only scan classes that
738            // haven't been scanned yet (optimization: avoid redundant full scans).
739            let mut inheritance_candidates = Vec::new();
740            let import_candidates = {
741                let db_owned = self.snapshot_db();
742                let db = &db_owned;
743                for fqcn in db.active_class_node_fqcns() {
744                    if scanned.contains(fqcn.as_ref()) {
745                        continue;
746                    }
747                    let Some(node) = db.lookup_class_node(&fqcn) else {
748                        continue;
749                    };
750                    scanned.insert(fqcn.clone());
751                    if node.is_interface(db) {
752                        for parent in node.extends(db).iter() {
753                            inheritance_candidates.push(parent.to_string());
754                        }
755                    } else if node.is_enum(db) {
756                        for iface in node.interfaces(db).iter() {
757                            inheritance_candidates.push(iface.to_string());
758                        }
759                    } else if node.is_trait(db) {
760                        for used in node.traits(db).iter() {
761                            inheritance_candidates.push(used.to_string());
762                        }
763                    } else {
764                        if let Some(parent) = node.parent(db) {
765                            inheritance_candidates.push(parent.to_string());
766                        }
767                        for iface in node.interfaces(db).iter() {
768                            inheritance_candidates.push(iface.to_string());
769                        }
770                    }
771                }
772                db.file_import_snapshots()
773                    .into_iter()
774                    .flat_map(|(_, imports)| imports.into_values())
775                    .collect::<Vec<_>>()
776            };
777            for fqcn in inheritance_candidates {
778                try_queue(&fqcn);
779            }
780
781            // Also lazy-load any type referenced via `use` imports that isn't yet
782            // in the codebase (covers enums and classes used only in type hints or
783            // static calls, which never appear in the inheritance scan above).
784            for fqcn in import_candidates {
785                try_queue(&fqcn);
786            }
787
788            if to_load.is_empty() {
789                break;
790            }
791
792            for (fqcn, path) in to_load {
793                loaded.insert(fqcn);
794                if let Ok(src) = std::fs::read_to_string(&path) {
795                    let file: Arc<str> = Arc::from(path.to_string_lossy().as_ref());
796                    let defs = self.collect_and_ingest_source(file, &src);
797                    all_issues.extend(Arc::unwrap_or_clone(defs.issues));
798                }
799            }
800        }
801    }
802
803    fn lazy_load_from_body_issues(
804        &self,
805        psr4: Arc<crate::composer::Psr4Map>,
806        file_data: &[(Arc<str>, Arc<str>)],
807        files_with_parse_errors: &HashSet<Arc<str>>,
808        all_issues: &mut Vec<Issue>,
809        all_symbols: &mut Vec<crate::symbol::ResolvedSymbol>,
810    ) {
811        use mir_issues::IssueKind;
812
813        let max_depth = 5;
814        let mut loaded: HashSet<String> = HashSet::new();
815
816        for _ in 0..max_depth {
817            // Deduplicate by FQCN: HashMap prevents loading the same class twice
818            // when multiple files share the same UndefinedClass diagnostic.
819            let mut to_load: HashMap<String, PathBuf> = HashMap::new();
820
821            for issue in all_issues.iter() {
822                if let IssueKind::UndefinedClass { name } = &issue.kind {
823                    if !self.type_exists(name) && !loaded.contains(name) {
824                        if let Some(path) = psr4.resolve(name) {
825                            to_load.entry(name.clone()).or_insert(path);
826                        }
827                    }
828                }
829            }
830
831            if to_load.is_empty() {
832                break;
833            }
834
835            loaded.extend(to_load.keys().cloned());
836
837            for path in to_load.values() {
838                if let Ok(src) = std::fs::read_to_string(path) {
839                    let file: Arc<str> = Arc::from(path.to_string_lossy().as_ref());
840                    let _ = self.collect_and_ingest_source(file, &src);
841                }
842            }
843
844            // Load inheritance deps of newly-added types and finalize.
845            // This covers e.g. `class Helper extends \App\Base` where Base is
846            // also not in the initial file set.
847            self.lazy_load_missing_classes(psr4.clone(), all_issues);
848
849            // Re-analyze every file that has an UndefinedClass for a type now
850            // present in the codebase — covers both direct and transitive loads.
851            let files_to_reanalyze: HashSet<Arc<str>> = all_issues
852                .iter()
853                .filter_map(|i| {
854                    if let IssueKind::UndefinedClass { name } = &i.kind {
855                        if self.type_exists(name) {
856                            return Some(i.location.file.clone());
857                        }
858                    }
859                    None
860                })
861                .collect();
862
863            if files_to_reanalyze.is_empty() {
864                break;
865            }
866
867            all_issues.retain(|i| !files_to_reanalyze.contains(&i.location.file));
868            all_symbols.retain(|s| !files_to_reanalyze.contains(&s.file));
869
870            let db_full = {
871                let guard = self.shared_db.salsa.read();
872                (**guard).clone()
873            };
874
875            let reanalysis: Vec<(Vec<Issue>, Vec<crate::symbol::ResolvedSymbol>, Vec<RefLoc>)> =
876                file_data
877                    .par_iter()
878                    .filter(|(f, _)| {
879                        !files_with_parse_errors.contains(f) && files_to_reanalyze.contains(f)
880                    })
881                    .map_with(db_full, |db, (file, src)| {
882                        let driver =
883                            Pass2Driver::new(&*db as &dyn MirDatabase, self.resolved_php_version());
884                        let arena = crate::arena::create_parse_arena(src.len());
885                        let parsed = php_rs_parser::parse(&arena, src);
886                        let (issues, symbols) = driver.analyze_bodies(
887                            &parsed.program,
888                            file.clone(),
889                            src,
890                            &parsed.source_map,
891                        );
892                        let pending = db.take_pending_ref_locs();
893                        (issues, symbols, pending)
894                    })
895                    .collect();
896
897            let mut reanalysis_ref_locs: Vec<RefLoc> = Vec::new();
898            for (issues, symbols, ref_locs) in reanalysis {
899                all_issues.extend(issues);
900                all_symbols.extend(symbols);
901                reanalysis_ref_locs.extend(ref_locs);
902            }
903            {
904                let guard = self.shared_db.salsa.read();
905                guard.commit_reference_locations_batch(reanalysis_ref_locs);
906            }
907        }
908    }
909
910    /// Re-analyze a single file within the existing codebase.
911    ///
912    /// This is the incremental analysis API for LSP:
913    /// 1. Removes old definitions from this file
914    /// 2. Re-runs Pass 1 (definition collection) on the new content
915    /// 3. Resolves any newly-collected `@psalm-import-type` declarations
916    /// 4. Re-runs Pass 2 (body analysis) on this file
917    /// 5. Returns the analysis result for this file only
918    pub fn re_analyze_file(&self, file_path: &str, new_content: &str) -> AnalysisResult {
919        // Fast path: content unchanged and cache has a valid entry — skip full re-analysis.
920        if let Some(cache) = &self.cache {
921            let h = hash_content(new_content);
922            if let Some((issues, ref_locs)) = cache.get(file_path, &h) {
923                let file: Arc<str> = Arc::from(file_path);
924                let guard = self.shared_db.salsa.read();
925                guard.replay_reference_locations(file, &ref_locs);
926                guard.commit_pending_to_maps();
927                return AnalysisResult::build(issues, HashMap::new(), Vec::new());
928            }
929        }
930
931        let file: Arc<str> = Arc::from(file_path);
932
933        {
934            let mut guard = self.shared_db.salsa.write();
935            guard.remove_file_definitions(file_path);
936        }
937
938        // --- Salsa-backed Pass 1: memoized parse + definition collection ------
939        let file_defs = {
940            let mut guard = self.shared_db.salsa.write();
941            let salsa_file = guard.upsert_source_file(file.clone(), Arc::from(new_content));
942            collect_file_definitions(&**guard, salsa_file)
943        };
944
945        let mut all_issues: Vec<Issue> = Arc::unwrap_or_clone(file_defs.issues.clone());
946
947        // --- S2 + Pass 2: hold the Salsa lock for ClassNode upserts and body
948        // analysis so the db reference is live during Pass 2 (S5).
949        let symbols = {
950            let mut guard = self.shared_db.salsa.write();
951
952            guard.ingest_stub_slice(&file_defs.slice);
953
954            // Resolve any newly-collected @psalm-import-type declarations so
955            // Pass 2 reads the imported aliases out of `type_aliases`.
956            // Re-parse in the arena so Pass 2 can walk the AST.
957            let arena = bumpalo::Bump::new();
958            let parsed = php_rs_parser::parse(&arena, new_content);
959
960            if parsed.errors.is_empty() {
961                let db_ref: &dyn MirDatabase = &**guard;
962                let driver = Pass2Driver::new(db_ref, self.resolved_php_version());
963                let (body_issues, symbols) = driver.analyze_bodies(
964                    &parsed.program,
965                    file.clone(),
966                    new_content,
967                    &parsed.source_map,
968                );
969                all_issues.extend(body_issues);
970                guard.commit_pending_to_maps();
971                symbols
972            } else {
973                Vec::new()
974            }
975        };
976
977        if let Some(cache) = &self.cache {
978            let h = hash_content(new_content);
979            cache.evict_with_dependents(&[file_path.to_string()]);
980            let db = self.snapshot_db();
981            let ref_locs = extract_reference_locations(&db, &file);
982            cache.put(file_path, h, all_issues.clone(), ref_locs);
983        }
984
985        AnalysisResult::build(all_issues, HashMap::new(), symbols)
986    }
987
988    /// Analyze a PHP source string without a real file path.
989    /// Useful for tests and LSP single-file mode.
990    pub fn analyze_source(source: &str) -> AnalysisResult {
991        let analyzer = ProjectAnalyzer::new();
992        let file: Arc<str> = Arc::from("<source>");
993        let mut db = MirDb::default();
994        for slice in crate::stubs::builtin_stub_slices_for_version(analyzer.resolved_php_version())
995        {
996            db.ingest_stub_slice(&slice);
997        }
998        let salsa_file = SourceFile::new(&db, file.clone(), Arc::from(source));
999        let file_defs = collect_file_definitions(&db, salsa_file);
1000        db.ingest_stub_slice(&file_defs.slice);
1001        let mut all_issues = Arc::unwrap_or_clone(file_defs.issues);
1002        if all_issues
1003            .iter()
1004            .any(|issue| matches!(issue.kind, mir_issues::IssueKind::ParseError { .. }))
1005        {
1006            return AnalysisResult::build(all_issues, std::collections::HashMap::new(), Vec::new());
1007        }
1008        let mut type_envs = std::collections::HashMap::new();
1009        let mut all_symbols = Vec::new();
1010        let arena = bumpalo::Bump::new();
1011        let result = php_rs_parser::parse(&arena, source);
1012
1013        let driver = Pass2Driver::new(&db, analyzer.resolved_php_version());
1014        all_issues.extend(driver.analyze_bodies_typed(
1015            &result.program,
1016            file.clone(),
1017            source,
1018            &result.source_map,
1019            &mut type_envs,
1020            &mut all_symbols,
1021        ));
1022        AnalysisResult::build(all_issues, type_envs, all_symbols)
1023    }
1024
1025    /// Discover all `.php` files under a directory, recursively.
1026    pub fn discover_files(root: &Path) -> Vec<PathBuf> {
1027        if root.is_file() {
1028            return vec![root.to_path_buf()];
1029        }
1030        let mut files = Vec::new();
1031        collect_php_files(root, &mut files);
1032        files
1033    }
1034
1035    /// Pass 1 only: collect type definitions from `paths` into the codebase without
1036    /// analyzing method bodies or emitting issues. Used to load vendor types.
1037    ///
1038    /// When [`Self::with_cache`] is enabled, per-file [`StubSlice`] results from
1039    /// previous runs are reused on a content-hash match, eliminating the
1040    /// parse + definition-collection step (which is ~95% of vendor wall-time
1041    /// on Laravel). Cache misses run the normal pipeline and write back so
1042    /// subsequent runs hit.
1043    ///
1044    /// [`StubSlice`]: mir_codebase::storage::StubSlice
1045    pub fn collect_types_only(&self, paths: &[PathBuf]) {
1046        let _timing = std::env::var("MIR_TIMING").is_ok();
1047        let _t0 = std::time::Instant::now();
1048
1049        let php_v = self.resolved_php_version().cache_byte();
1050
1051        // ---- Phase 1: read + try cache, in parallel ------------------------
1052        // Each entry carries either a ready-to-ingest cached slice, or the
1053        // source text + hash for the miss path that runs Pass 1.
1054        struct FileEntry {
1055            file: Arc<str>,
1056            src: Arc<str>,
1057            hash: [u8; 32],
1058            cached: Option<mir_codebase::storage::StubSlice>,
1059        }
1060        let entries: Vec<FileEntry> = paths
1061            .par_iter()
1062            .filter_map(|path| {
1063                let src = std::fs::read_to_string(path).ok()?;
1064                let file: Arc<str> = Arc::from(path.to_string_lossy().as_ref());
1065                let src: Arc<str> = Arc::from(src);
1066                let hash = hash_source(&src);
1067                let cached = self.shared_db.stub_cache.as_ref().and_then(|c| {
1068                    let mut slice = c.get(&file, &hash, php_v)?;
1069                    // Re-run dedup outside the serial ingest section so commit
1070                    // 3018a1d's parallel-dedup win is preserved on cache hits.
1071                    prepare_for_ingest(&mut slice);
1072                    Some(slice)
1073                });
1074                Some(FileEntry {
1075                    file,
1076                    src,
1077                    hash,
1078                    cached,
1079                })
1080            })
1081            .collect();
1082        let _t_read = _t0.elapsed();
1083
1084        // ---- Phase 2: register all SourceFile inputs in salsa --------------
1085        // Lazy-load (e.g. UndefinedClass → vendor file) may later query any of
1086        // these as a salsa input, so we register both hits and misses.
1087        let source_files: Vec<SourceFile> = {
1088            let mut guard = self.shared_db.salsa.write();
1089            entries
1090                .iter()
1091                .map(|e| guard.upsert_source_file(e.file.clone(), e.src.clone()))
1092                .collect()
1093        };
1094        let _t_reg = _t0.elapsed();
1095
1096        // ---- Phase 3: Pass 1 for misses, cache write-back, in parallel -----
1097        let db_pass1 = {
1098            let guard = self.shared_db.salsa.read();
1099            (**guard).clone()
1100        };
1101        let stub_cache = self.shared_db.stub_cache.clone();
1102        // `into_par_iter` so cached slices can be moved (not cloned) into the
1103        // result vec. Cloning 10k StubSlices on warm vendor would burn most
1104        // of the churn-reduction win the cache exists to produce.
1105        let prepared: Vec<mir_codebase::storage::StubSlice> = entries
1106            .into_par_iter()
1107            .zip(source_files.into_par_iter())
1108            .map_with(db_pass1, |db, (mut entry, salsa_file)| {
1109                if let Some(slice) = entry.cached.take() {
1110                    return slice;
1111                }
1112                let defs = collect_file_definitions_uncached(&*db, salsa_file);
1113                let slice = Arc::unwrap_or_clone(defs.slice);
1114                if let Some(cache) = stub_cache.as_ref() {
1115                    cache.put(&entry.file, &entry.hash, php_v, &slice);
1116                }
1117                slice
1118            })
1119            .collect();
1120        let _t_collect = _t0.elapsed();
1121
1122        // ---- Phase 4: serial ingest under the write lock -------------------
1123        let mut guard = self.shared_db.salsa.write();
1124        for slice in &prepared {
1125            guard.ingest_stub_slice(slice);
1126        }
1127        drop(guard);
1128        let _t_ingest = _t0.elapsed();
1129
1130        if _timing {
1131            let (hits, misses) = self.stub_cache_stats();
1132            eprintln!(
1133                "[vendor] read={:.0}ms reg={:.0}ms collect={:.0}ms ingest={:.0}ms total={:.0}ms (cache hits={hits} misses={misses})",
1134                _t_read.as_secs_f64() * 1000.0,
1135                (_t_reg - _t_read).as_secs_f64() * 1000.0,
1136                (_t_collect - _t_reg).as_secs_f64() * 1000.0,
1137                (_t_ingest - _t_collect).as_secs_f64() * 1000.0,
1138                _t_ingest.as_secs_f64() * 1000.0,
1139            );
1140        }
1141
1142        // Print profiling statistics for the collection phase.
1143        crate::collector::print_collector_stats();
1144    }
1145}
1146
1147impl Default for ProjectAnalyzer {
1148    fn default() -> Self {
1149        Self::new()
1150    }
1151}
1152
1153pub(crate) fn collect_php_files(dir: &Path, out: &mut Vec<PathBuf>) {
1154    if let Ok(entries) = std::fs::read_dir(dir) {
1155        for entry in entries.flatten() {
1156            if entry.file_type().map(|ft| ft.is_symlink()).unwrap_or(false) {
1157                continue;
1158            }
1159            let path = entry.path();
1160            if path.is_dir() {
1161                let name = path.file_name().and_then(|n| n.to_str()).unwrap_or("");
1162                if matches!(
1163                    name,
1164                    "vendor" | ".git" | "node_modules" | ".cache" | ".pnpm-store"
1165                ) {
1166                    continue;
1167                }
1168                collect_php_files(&path, out);
1169            } else if path.extension().and_then(|e| e.to_str()) == Some("php") {
1170                out.push(path);
1171            }
1172        }
1173    }
1174}
1175
1176// build_reverse_deps
1177
1178fn build_reverse_deps(db: &dyn crate::db::MirDatabase) -> HashMap<String, HashSet<String>> {
1179    let mut reverse: HashMap<String, HashSet<String>> = HashMap::new();
1180
1181    let mut add_edge = |symbol: &str, dependent_file: &str| {
1182        if let Some(defining_file) = db.symbol_defining_file(symbol) {
1183            let def = defining_file.as_ref().to_string();
1184            if def != dependent_file {
1185                reverse
1186                    .entry(def)
1187                    .or_default()
1188                    .insert(dependent_file.to_string());
1189            }
1190        }
1191    };
1192
1193    for (file, imports) in db.file_import_snapshots() {
1194        let file = file.as_ref().to_string();
1195        for fqcn in imports.values() {
1196            add_edge(fqcn, &file);
1197        }
1198    }
1199
1200    let extract_named_objects = |union: &mir_types::Union| {
1201        union
1202            .types
1203            .iter()
1204            .filter_map(|atomic| match atomic {
1205                mir_types::atomic::Atomic::TNamedObject { fqcn, .. } => Some(fqcn.clone()),
1206                _ => None,
1207            })
1208            .collect::<Vec<_>>()
1209    };
1210
1211    for fqcn in db.active_class_node_fqcns() {
1212        // Only true classes contribute class-direction edges in this loop.
1213        // Interface / trait / enum edges are not currently emitted here —
1214        // this function only ever read classes.
1215        let kind = match crate::db::class_kind_via_db(db, fqcn.as_ref()) {
1216            Some(k) if !k.is_interface && !k.is_trait && !k.is_enum => k,
1217            _ => continue,
1218        };
1219        let _ = kind;
1220        let Some(file) = db
1221            .symbol_defining_file(fqcn.as_ref())
1222            .map(|f| f.as_ref().to_string())
1223        else {
1224            continue;
1225        };
1226
1227        let Some(node) = db.lookup_class_node(fqcn.as_ref()) else {
1228            continue;
1229        };
1230        if let Some(parent) = node.parent(db) {
1231            add_edge(parent.as_ref(), &file);
1232        }
1233        for iface in node.interfaces(db).iter() {
1234            add_edge(iface.as_ref(), &file);
1235        }
1236        for tr in node.traits(db).iter() {
1237            add_edge(tr.as_ref(), &file);
1238        }
1239
1240        // Add types from properties
1241        for prop in db.class_own_properties(fqcn.as_ref()).iter() {
1242            if let Some(ty) = prop.ty(db) {
1243                for named in extract_named_objects(&ty) {
1244                    add_edge(named.as_ref(), &file);
1245                }
1246            }
1247        }
1248
1249        // Add types from methods
1250        for method in db.class_own_methods(fqcn.as_ref()).iter() {
1251            // Parameter types
1252            for param in method.params(db).iter() {
1253                if let Some(ty) = &param.ty {
1254                    for named in extract_named_objects(ty.as_ref()) {
1255                        add_edge(named.as_ref(), &file);
1256                    }
1257                }
1258            }
1259            // Return type
1260            if let Some(rt) = method.return_type(db) {
1261                for named in extract_named_objects(rt.as_ref()) {
1262                    add_edge(named.as_ref(), &file);
1263                }
1264            }
1265        }
1266    }
1267
1268    // Add types from global functions
1269    for fqn in db.active_function_node_fqns() {
1270        let Some(node) = db.lookup_function_node(fqn.as_ref()) else {
1271            continue;
1272        };
1273        let Some(file) = db
1274            .symbol_defining_file(fqn.as_ref())
1275            .map(|f| f.as_ref().to_string())
1276        else {
1277            continue;
1278        };
1279
1280        // Parameter types
1281        for param in node.params(db).iter() {
1282            if let Some(ty) = &param.ty {
1283                for named in extract_named_objects(ty.as_ref()) {
1284                    add_edge(named.as_ref(), &file);
1285                }
1286            }
1287        }
1288        // Return type
1289        if let Some(rt) = node.return_type(db) {
1290            for named in extract_named_objects(rt.as_ref()) {
1291                add_edge(named.as_ref(), &file);
1292            }
1293        }
1294    }
1295
1296    // Also wire in bare-FQN references from Pass 2 (new \Foo(), \Foo::method(), \foo())
1297    // that do not appear in use-import statements.
1298    for (ref_file, symbol_key) in db.all_reference_location_pairs() {
1299        let file_str = ref_file.as_ref().to_string();
1300        let lookup: &str = match symbol_key.split_once("::") {
1301            Some((class, _)) => class,
1302            None => &symbol_key,
1303        };
1304        add_edge(lookup, &file_str);
1305    }
1306
1307    reverse
1308}
1309
1310fn extract_reference_locations(
1311    db: &dyn crate::db::MirDatabase,
1312    file: &Arc<str>,
1313) -> Vec<(String, u32, u16, u16)> {
1314    db.extract_file_reference_locations(file.as_ref())
1315        .into_iter()
1316        .map(|(sym, line, col_start, col_end)| (sym.to_string(), line, col_start, col_end))
1317        .collect()
1318}
1319
1320pub struct AnalysisResult {
1321    pub issues: Vec<Issue>,
1322    #[doc(hidden)]
1323    pub type_envs: std::collections::HashMap<crate::type_env::ScopeId, crate::type_env::TypeEnv>,
1324    /// Per-expression resolved symbols from Pass 2, sorted by file path.
1325    pub symbols: Vec<crate::symbol::ResolvedSymbol>,
1326    /// Maps each file path to the contiguous range within `symbols` that belongs
1327    /// to it. Built once after analysis; allows `symbol_at` to scan only the
1328    /// relevant file's slice rather than the entire codebase-wide vector.
1329    symbols_by_file: HashMap<Arc<str>, std::ops::Range<usize>>,
1330}
1331
1332impl AnalysisResult {
1333    fn build(
1334        issues: Vec<Issue>,
1335        type_envs: std::collections::HashMap<crate::type_env::ScopeId, crate::type_env::TypeEnv>,
1336        mut symbols: Vec<crate::symbol::ResolvedSymbol>,
1337    ) -> Self {
1338        symbols.sort_unstable_by(|a, b| a.file.as_ref().cmp(b.file.as_ref()));
1339        let mut symbols_by_file: HashMap<Arc<str>, std::ops::Range<usize>> = HashMap::new();
1340        let mut i = 0;
1341        while i < symbols.len() {
1342            let file = Arc::clone(&symbols[i].file);
1343            let start = i;
1344            while i < symbols.len() && symbols[i].file == file {
1345                i += 1;
1346            }
1347            symbols_by_file.insert(file, start..i);
1348        }
1349        Self {
1350            issues,
1351            type_envs,
1352            symbols,
1353            symbols_by_file,
1354        }
1355    }
1356}
1357
1358impl AnalysisResult {
1359    pub fn error_count(&self) -> usize {
1360        self.issues
1361            .iter()
1362            .filter(|i| i.severity == mir_issues::Severity::Error)
1363            .count()
1364    }
1365
1366    pub fn warning_count(&self) -> usize {
1367        self.issues
1368            .iter()
1369            .filter(|i| i.severity == mir_issues::Severity::Warning)
1370            .count()
1371    }
1372
1373    /// Group issues by source file.
1374    pub fn issues_by_file(&self) -> HashMap<std::sync::Arc<str>, Vec<&Issue>> {
1375        let mut map: HashMap<std::sync::Arc<str>, Vec<&Issue>> = HashMap::new();
1376        for issue in &self.issues {
1377            map.entry(issue.location.file.clone())
1378                .or_default()
1379                .push(issue);
1380        }
1381        map
1382    }
1383
1384    /// Count issues by severity. Returned as `(severity, count)` pairs sorted
1385    /// by severity (Info, Warning, Error).
1386    pub fn count_by_severity(&self) -> Vec<(mir_issues::Severity, usize)> {
1387        let mut counts: std::collections::BTreeMap<mir_issues::Severity, usize> =
1388            std::collections::BTreeMap::new();
1389        for issue in &self.issues {
1390            *counts.entry(issue.severity).or_insert(0) += 1;
1391        }
1392        counts.into_iter().collect()
1393    }
1394
1395    /// Total number of issues across all severities and files.
1396    pub fn total_issue_count(&self) -> usize {
1397        self.issues.len()
1398    }
1399
1400    /// Iterator of issues matching `predicate`. Useful for filtering by
1401    /// severity, kind, or file without materializing intermediate vectors.
1402    pub fn filter_issues<'a, F>(&'a self, predicate: F) -> impl Iterator<Item = &'a Issue>
1403    where
1404        F: Fn(&Issue) -> bool + 'a,
1405    {
1406        self.issues.iter().filter(move |i| predicate(i))
1407    }
1408
1409    /// Return the innermost resolved symbol whose span contains `byte_offset`
1410    /// in `file`, or `None` if no symbol was recorded at that position.
1411    pub fn symbol_at(
1412        &self,
1413        file: &str,
1414        byte_offset: u32,
1415    ) -> Option<&crate::symbol::ResolvedSymbol> {
1416        let range = self.symbols_by_file.get(file)?;
1417        self.symbols[range.clone()]
1418            .iter()
1419            .filter(|s| s.span.start <= byte_offset && byte_offset < s.span.end)
1420            .min_by_key(|s| s.span.end - s.span.start)
1421    }
1422}