Skip to main content

mir_analyzer/
project.rs

1/// Project-level orchestration: file discovery, pass 1, pass 2.
2use std::mem::ManuallyDrop;
3use std::path::{Path, PathBuf};
4use std::sync::Arc;
5
6use rayon::prelude::*;
7
8use std::collections::{HashMap, HashSet};
9
10use crate::cache::{hash_content, AnalysisCache};
11use crate::db::{
12    collect_file_definitions, collect_file_definitions_uncached, FileDefinitions, MirDatabase,
13    MirDb, RefLoc, SourceFile,
14};
15use crate::pass2::{InferredTypes, Pass2Driver};
16use crate::php_version::PhpVersion;
17use crate::shared_db::SharedDb;
18use mir_issues::Issue;
19
20pub(crate) use crate::pass2::merge_return_types;
21
22/// Batch-oriented analyzer: file discovery, parsing, and analysis.
23///
24/// ProjectAnalyzer is the primary entry point for analyzing a project as a whole.
25/// It orchestrates parallel file discovery and parsing, using the same core
26/// analysis engine as [`AnalysisSession`] (salsa database and Pass 2 driver).
27///
28/// **Unified Design:** ProjectAnalyzer and `AnalysisSession` now share the same
29/// database management via [`SharedDb`]. ProjectAnalyzer is the batch API
30/// (all files at once), while `AnalysisSession` is the incremental API (file-by-file).
31/// Both use `Pass2Driver`, the same definition collection logic, and identical
32/// database operations, eliminating code duplication.
33///
34/// [`AnalysisSession`]: crate::session::AnalysisSession
35pub struct ProjectAnalyzer {
36    /// Shared database management (salsa, file registry, stub tracking).
37    /// Extracted to allow code sharing with AnalysisSession.
38    shared_db: Arc<SharedDb>,
39    /// Optional cache — when `Some`, Pass 2 results are read/written per file.
40    cache: Option<AnalysisCache>,
41    /// Called once after each file completes Pass 2 (used for progress reporting).
42    pub on_file_done: Option<Arc<dyn Fn() + Send + Sync>>,
43    /// PSR-4 autoloader mapping from composer.json, if available.
44    pub psr4: Option<Arc<crate::composer::Psr4Map>>,
45    /// When true, run dead code detection at the end of analysis.
46    pub find_dead_code: bool,
47    /// Target PHP language version. `None` means "not configured"; resolved to
48    /// `PhpVersion::LATEST` when passed down to `StatementsAnalyzer`.
49    pub php_version: Option<PhpVersion>,
50    /// Additional stub files to parse before analysis (absolute paths).
51    pub stub_files: Vec<PathBuf>,
52    /// Additional stub directories to walk and parse before analysis (absolute paths).
53    pub stub_dirs: Vec<PathBuf>,
54}
55
56struct ParsedProjectFile {
57    file: Arc<str>,
58    source: Arc<str>,
59    parsed: ManuallyDrop<php_rs_parser::ParseResult<'static, 'static>>,
60    arena: ManuallyDrop<Box<bumpalo::Bump>>,
61}
62
63impl ParsedProjectFile {
64    fn new(file: Arc<str>, source: Arc<str>) -> Self {
65        let arena = Box::new(crate::arena::create_parse_arena(source.len()));
66        let parsed = php_rs_parser::parse(&arena, &source);
67        // SAFETY: `parsed` borrows from `arena` and `source`, both owned by this
68        // struct and kept alive until `Drop`. `Drop` manually destroys `parsed`
69        // before releasing either owner, so the widened lifetimes never escape.
70        let parsed = unsafe {
71            std::mem::transmute::<
72                php_rs_parser::ParseResult<'_, '_>,
73                php_rs_parser::ParseResult<'static, 'static>,
74            >(parsed)
75        };
76        Self {
77            file,
78            source,
79            parsed: ManuallyDrop::new(parsed),
80            arena: ManuallyDrop::new(arena),
81        }
82    }
83
84    fn source(&self) -> &str {
85        self.source.as_ref()
86    }
87
88    fn parsed(&self) -> &php_rs_parser::ParseResult<'_, '_> {
89        &self.parsed
90    }
91}
92
93impl Drop for ParsedProjectFile {
94    fn drop(&mut self) {
95        unsafe {
96            ManuallyDrop::drop(&mut self.parsed);
97            ManuallyDrop::drop(&mut self.arena);
98        }
99    }
100}
101
102// SAFETY: after construction the parsed AST and source map are read-only. The
103// bump arena is never mutated again; it only owns backing storage for AST nodes
104// and is dropped after all parallel analysis has completed.
105unsafe impl Send for ParsedProjectFile {}
106unsafe impl Sync for ParsedProjectFile {}
107
108impl ProjectAnalyzer {
109    pub fn new() -> Self {
110        Self {
111            shared_db: Arc::new(SharedDb::new()),
112            cache: None,
113            on_file_done: None,
114            psr4: None,
115            find_dead_code: false,
116            php_version: None,
117            stub_files: Vec::new(),
118            stub_dirs: Vec::new(),
119        }
120    }
121
122    /// Create a `ProjectAnalyzer` with a disk-backed cache stored under `cache_dir`.
123    pub fn with_cache(cache_dir: &Path) -> Self {
124        Self {
125            shared_db: Arc::new(SharedDb::new()),
126            cache: Some(AnalysisCache::open(cache_dir)),
127            on_file_done: None,
128            psr4: None,
129            find_dead_code: false,
130            php_version: None,
131            stub_files: Vec::new(),
132            stub_dirs: Vec::new(),
133        }
134    }
135
136    /// Enable the disk-backed cache for an already-constructed analyzer.
137    pub fn set_cache_dir(&mut self, cache_dir: &Path) {
138        self.cache = Some(AnalysisCache::open(cache_dir));
139    }
140
141    /// Create a `ProjectAnalyzer` from a project root containing `composer.json`.
142    /// Returns the analyzer (with `psr4` set) and the `Psr4Map` so callers can
143    /// call `map.project_files()` / `map.vendor_files()`.
144    pub fn from_composer(
145        root: &Path,
146    ) -> Result<(Self, crate::composer::Psr4Map), crate::composer::ComposerError> {
147        let map = crate::composer::Psr4Map::from_composer(root)?;
148        let psr4 = Arc::new(map.clone());
149        let analyzer = Self {
150            shared_db: Arc::new(SharedDb::new()),
151            cache: None,
152            on_file_done: None,
153            psr4: Some(psr4),
154            find_dead_code: false,
155            php_version: None,
156            stub_files: Vec::new(),
157            stub_dirs: Vec::new(),
158        };
159        Ok((analyzer, map))
160    }
161
162    /// Builder method: set the target PHP version.
163    pub fn with_php_version(mut self, version: PhpVersion) -> Self {
164        self.php_version = Some(version);
165        self
166    }
167
168    /// Builder method: enable dead-code detection at the end of analysis.
169    pub fn with_dead_code(mut self, enabled: bool) -> Self {
170        self.find_dead_code = enabled;
171        self
172    }
173
174    /// Builder method: set a progress callback invoked once per analyzed file.
175    pub fn with_progress_callback(mut self, callback: Arc<dyn Fn() + Send + Sync>) -> Self {
176        self.on_file_done = Some(callback);
177        self
178    }
179
180    /// Builder method: add user stub files.
181    pub fn with_stub_files(mut self, files: Vec<PathBuf>) -> Self {
182        self.stub_files = files;
183        self
184    }
185
186    /// Builder method: add user stub directories.
187    pub fn with_stub_dirs(mut self, dirs: Vec<PathBuf>) -> Self {
188        self.stub_dirs = dirs;
189        self
190    }
191
192    /// Builder method: configure a disk-backed cache at the given directory.
193    pub fn with_cache_dir(mut self, cache_dir: &Path) -> Self {
194        self.cache = Some(AnalysisCache::open(cache_dir));
195        self
196    }
197
198    /// Builder method: attach a PSR-4 autoloader map.
199    pub fn with_psr4(mut self, map: Arc<crate::composer::Psr4Map>) -> Self {
200        self.psr4 = Some(map);
201        self
202    }
203
204    /// Resolve the configured PHP version, defaulting to `PhpVersion::LATEST`
205    /// when none has been set.
206    fn resolved_php_version(&self) -> PhpVersion {
207        self.php_version.unwrap_or(PhpVersion::LATEST)
208    }
209
210    fn type_exists(&self, fqcn: &str) -> bool {
211        let db = self.snapshot_db();
212        crate::db::type_exists_via_db(&db, fqcn)
213    }
214
215    /// Returns `true` if a function with `fqn` is registered and active.
216    pub fn contains_function(&self, fqn: &str) -> bool {
217        let db = self.snapshot_db();
218        db.lookup_function_node(fqn).is_some_and(|n| n.active(&db))
219    }
220
221    /// Returns `true` if a class / interface / trait / enum is registered.
222    pub fn contains_class(&self, fqcn: &str) -> bool {
223        let db = self.snapshot_db();
224        db.lookup_class_node(fqcn).is_some_and(|n| n.active(&db))
225    }
226
227    /// Returns `true` if `class` has a method named `name` (case-insensitive).
228    pub fn contains_method(&self, class: &str, name: &str) -> bool {
229        let db = self.snapshot_db();
230        let name_lower = name.to_ascii_lowercase();
231        db.lookup_method_node(class, &name_lower)
232            .is_some_and(|n| n.active(&db))
233    }
234
235    /// Acquire a cheap clone of the salsa db for a read-only query.
236    /// The lock is held only for the duration of the clone, so concurrent
237    /// readers never serialize on each other or on writes longer than the
238    /// clone itself.
239    fn snapshot_db(&self) -> MirDb {
240        self.shared_db.snapshot_db()
241    }
242
243    /// Internal: expose the salsa db for unit tests that need a `&dyn MirDatabase`.
244    #[doc(hidden)]
245    pub fn salsa_db_for_test(&self) -> parking_lot::MappedRwLockWriteGuard<'_, MirDb> {
246        let guard = self.shared_db.salsa.write();
247        parking_lot::RwLockWriteGuard::map(guard, |rw| &mut **rw)
248    }
249
250    /// Legacy: look up the source location of a class member by name.
251    ///
252    /// Prefer [`Self::definition_of`] with [`crate::Symbol::method`] etc.
253    #[doc(hidden)]
254    pub fn member_location(
255        &self,
256        fqcn: &str,
257        member_name: &str,
258    ) -> Option<mir_codebase::storage::Location> {
259        let db = self.snapshot_db();
260        crate::db::member_location_via_db(&db, fqcn, member_name)
261    }
262
263    /// Legacy: look up a top-level symbol location.
264    ///
265    /// Prefer [`Self::definition_of`] with [`crate::Symbol`].
266    #[doc(hidden)]
267    pub fn symbol_location(&self, symbol: &str) -> Option<mir_codebase::storage::Location> {
268        let db = self.snapshot_db();
269        db.lookup_class_node(symbol)
270            .filter(|n| n.active(&db))
271            .and_then(|n| n.location(&db))
272            .or_else(|| {
273                db.lookup_function_node(symbol)
274                    .filter(|n| n.active(&db))
275                    .and_then(|n| n.location(&db))
276            })
277    }
278
279    /// Legacy: raw reference locations as `(file, line, col_start, col_end)`.
280    ///
281    /// Prefer [`Self::references_to`] which returns `(Arc<str>, Range)` pairs
282    /// and takes a strongly-typed [`crate::Symbol`].
283    #[doc(hidden)]
284    pub fn reference_locations(&self, symbol: &str) -> Vec<(Arc<str>, u32, u16, u16)> {
285        let db = self.snapshot_db();
286        db.reference_locations(symbol)
287    }
288
289    /// Resolve a symbol to its declaration location.
290    ///
291    /// Mirrors [`crate::AnalysisSession::definition_of`].
292    pub fn definition_of(
293        &self,
294        symbol: &crate::Symbol,
295    ) -> Result<mir_codebase::storage::Location, crate::SymbolLookupError> {
296        let db = self.snapshot_db();
297        match symbol {
298            crate::Symbol::Class(fqcn) => {
299                let node = db
300                    .lookup_class_node(fqcn.as_ref())
301                    .filter(|n| n.active(&db))
302                    .ok_or(crate::SymbolLookupError::NotFound)?;
303                node.location(&db)
304                    .ok_or(crate::SymbolLookupError::NoSourceLocation)
305            }
306            crate::Symbol::Function(fqn) => {
307                let node = db
308                    .lookup_function_node(fqn.as_ref())
309                    .filter(|n| n.active(&db))
310                    .ok_or(crate::SymbolLookupError::NotFound)?;
311                node.location(&db)
312                    .ok_or(crate::SymbolLookupError::NoSourceLocation)
313            }
314            crate::Symbol::Method { class, name }
315            | crate::Symbol::Property { class, name }
316            | crate::Symbol::ClassConstant { class, name } => {
317                crate::db::member_location_via_db(&db, class, name)
318                    .ok_or(crate::SymbolLookupError::NotFound)
319            }
320            crate::Symbol::GlobalConstant(_) => Err(crate::SymbolLookupError::NoSourceLocation),
321        }
322    }
323
324    /// All recorded references to a symbol, as `(file, range)` pairs.
325    ///
326    /// Mirrors [`crate::AnalysisSession::references_to`].
327    pub fn references_to(&self, symbol: &crate::Symbol) -> Vec<(Arc<str>, crate::Range)> {
328        let db = self.snapshot_db();
329        let key = symbol.codebase_key();
330        db.reference_locations(&key)
331            .into_iter()
332            .map(|(file, line, col_start, col_end)| {
333                let range = crate::Range {
334                    start: crate::Position {
335                        line,
336                        column: col_start as u32,
337                    },
338                    end: crate::Position {
339                        line,
340                        column: col_end as u32,
341                    },
342                };
343                (file, range)
344            })
345            .collect()
346    }
347
348    /// Load PHP built-in stubs. Called automatically by `analyze` if not done yet.
349    /// Stubs are filtered against the configured target PHP version (or
350    /// `PhpVersion::LATEST` if none was set).
351    pub fn load_stubs(&self) {
352        let php_version = self.resolved_php_version();
353
354        // Load all built-in stubs for the configured PHP version
355        let paths: Vec<&'static str> = crate::stubs::stub_files().iter().map(|&(p, _)| p).collect();
356        self.shared_db.ingest_stub_paths(&paths, php_version);
357
358        // Load user-configured stubs
359        self.shared_db
360            .ingest_user_stubs(&self.stub_files, &self.stub_dirs);
361    }
362
363    fn collect_and_ingest_source(&self, file: Arc<str>, src: &str) -> FileDefinitions {
364        self.shared_db.collect_and_ingest_file(file, src)
365    }
366
367    /// Run the full analysis pipeline on a set of file paths.
368    pub fn analyze(&self, paths: &[PathBuf]) -> AnalysisResult {
369        let mut all_issues = Vec::new();
370        let _t0 = std::time::Instant::now();
371
372        // ---- Load PHP built-in stubs (before Pass 1 so user code can override)
373        self.load_stubs();
374        let _t_stubs = _t0.elapsed();
375
376        // ---- Pass 1: read files in parallel ----------------------------------
377        let parsed_files: Vec<ParsedProjectFile> = paths
378            .par_iter()
379            .filter_map(|path| match std::fs::read_to_string(path) {
380                Ok(src) => {
381                    let file = Arc::from(path.to_string_lossy().as_ref());
382                    Some(ParsedProjectFile::new(file, Arc::from(src)))
383                }
384                Err(e) => {
385                    eprintln!("Cannot read {}: {}", path.display(), e);
386                    None
387                }
388            })
389            .collect();
390        let _t_read = _t0.elapsed();
391
392        let file_data: Vec<(Arc<str>, Arc<str>)> = parsed_files
393            .iter()
394            .map(|parsed| (parsed.file.clone(), parsed.source.clone()))
395            .collect();
396
397        // ---- Pre-Pass-2 invalidation: evict dependents of changed files ------
398        if let Some(cache) = &self.cache {
399            let changed: Vec<String> = file_data
400                .par_iter()
401                .filter_map(|(f, src)| {
402                    let h = hash_content(src.as_ref());
403                    if cache.get(f, &h).is_none() {
404                        Some(f.to_string())
405                    } else {
406                        None
407                    }
408                })
409                .collect();
410            if !changed.is_empty() {
411                cache.evict_with_dependents(&changed);
412            }
413        }
414
415        // ---- Register Salsa source inputs for incremental follow-up calls ----
416        {
417            let mut guard = self.shared_db.salsa.write();
418            for parsed in &parsed_files {
419                guard.upsert_source_file(parsed.file.clone(), parsed.source.clone());
420            }
421        }
422        let _t_salsa_reg = _t0.elapsed();
423
424        // ---- Pass 1: definition collection from the already-parsed AST -------
425        let file_defs: Vec<FileDefinitions> = parsed_files
426            .par_iter()
427            .map(|parsed| {
428                let parse_result = parsed.parsed();
429                let mut all_issues: Vec<Issue> = parse_result
430                    .errors
431                    .iter()
432                    .map(|err| {
433                        Issue::new(
434                            mir_issues::IssueKind::ParseError {
435                                message: err.to_string(),
436                            },
437                            mir_issues::Location {
438                                file: parsed.file.clone(),
439                                line: 1,
440                                line_end: 1,
441                                col_start: 0,
442                                col_end: 0,
443                            },
444                        )
445                    })
446                    .collect();
447                let collector = crate::collector::DefinitionCollector::new_for_slice(
448                    parsed.file.clone(),
449                    parsed.source(),
450                    &parse_result.source_map,
451                );
452                let (mut slice, collector_issues) = collector.collect_slice(&parse_result.program);
453                all_issues.extend(collector_issues);
454                mir_codebase::storage::deduplicate_params_in_slice(&mut slice);
455                FileDefinitions {
456                    slice: Arc::new(slice),
457                    issues: Arc::new(all_issues),
458                }
459            })
460            .collect();
461        let _t_pass1 = _t0.elapsed();
462
463        let mut files_with_parse_errors: std::collections::HashSet<Arc<str>> =
464            std::collections::HashSet::new();
465        {
466            let mut guard = self.shared_db.salsa.write();
467            for defs in file_defs {
468                for issue in defs.issues.iter() {
469                    if matches!(issue.kind, mir_issues::IssueKind::ParseError { .. }) {
470                        files_with_parse_errors.insert(issue.location.file.clone());
471                    }
472                }
473                guard.ingest_stub_slice(&defs.slice);
474                all_issues.extend(Arc::unwrap_or_clone(defs.issues));
475            }
476        }
477        let _t_ingest = _t0.elapsed();
478
479        // ---- Lazy-load unknown classes via PSR-4 (issue #50) ----------------
480        if let Some(psr4) = &self.psr4 {
481            self.lazy_load_missing_classes(psr4.clone(), &mut all_issues);
482        }
483
484        // ---- Resolve @psalm-import-type declarations now that all Pass 1
485        // classes (including their `type_aliases`) are populated.
486        // ---- Build reverse dep graph and persist it for the next run ---------
487        if let Some(cache) = &self.cache {
488            let db_snapshot = {
489                let guard = self.shared_db.salsa.read();
490                (**guard).clone()
491            };
492            let rev = build_reverse_deps(&db_snapshot);
493            cache.set_reverse_deps(rev);
494        }
495
496        // ---- Class-level checks (M11) ----------------------------------------
497        let analyzed_file_set: std::collections::HashSet<std::sync::Arc<str>> =
498            file_data.iter().map(|(f, _)| f.clone()).collect();
499        {
500            let class_db = {
501                let guard = self.shared_db.salsa.read();
502                (**guard).clone()
503            };
504            let class_issues =
505                crate::class::ClassAnalyzer::with_files(&class_db, analyzed_file_set, &file_data)
506                    .analyze_all();
507            all_issues.extend(class_issues);
508        }
509
510        // ---- Inference pre-sweep: prime inferred return types ----------------
511        // Run an inference-only Pass 2 over each file in parallel using direct
512        // rayon (no Salsa tracked-query overhead per file), collect the results,
513        // then commit them to Salsa INPUT fields.  The full Pass 2 then reads
514        // those fields via O(1) accesses with no lock contention.
515        //
516        // We use `Pass2Driver::new_inference_only` directly rather than the
517        // Salsa-tracked `infer_file_return_types` query so that the batch path
518        // avoids per-file Salsa lock acquisition and memo-table overhead on every
519        // cold start.  `infer_file_return_types` is reserved for the incremental
520        // LSP path (AnalysisSession) where Salsa cache hits across edits matter.
521        //
522        // `map_with` clones `db_priming` once per rayon worker thread (not once
523        // per file as the old `in_place_scope` loop did). For N files on T threads
524        // this reduces clones from N to T.  Results are returned by value and
525        // flattened after `collect()`, replacing the Arc<Mutex<Vec>> accumulator.
526        // All per-thread db clones are dropped when `collect()` returns, so
527        // `commit_inferred_return_types` (which calls Salsa setters that wait for
528        // strong_count == 1) cannot deadlock.
529        {
530            let db_priming = {
531                let guard = self.shared_db.salsa.read();
532                (**guard).clone()
533            };
534            let php_version = self.resolved_php_version();
535            let all_inferred: Vec<InferredTypes> = parsed_files
536                .par_iter()
537                .filter(|parsed| !files_with_parse_errors.contains(&parsed.file))
538                .map_with(db_priming, |db, parsed| {
539                    let driver = Pass2Driver::new_inference_only(
540                        db as &dyn crate::db::MirDatabase,
541                        php_version,
542                    );
543                    let parse_result = parsed.parsed();
544                    driver.analyze_bodies(
545                        &parse_result.program,
546                        parsed.file.clone(),
547                        parsed.source(),
548                        &parse_result.source_map,
549                    );
550                    driver.take_inferred_types()
551                })
552                .collect();
553            // db_priming is consumed by map_with; per-thread clones dropped by collect().
554            let mut functions = Vec::new();
555            let mut methods = Vec::new();
556            for inferred in all_inferred {
557                functions.extend(inferred.functions);
558                methods.extend(inferred.methods);
559            }
560            let mut guard = self.shared_db.salsa.write();
561            guard.commit_inferred_return_types(functions, methods);
562        }
563        let _t_presweep = _t0.elapsed();
564
565        let db_main = {
566            let guard = self.shared_db.salsa.read();
567            (**guard).clone()
568        };
569
570        // ---- Pass 2: analyze function/method bodies in parallel (M14) --------
571        // Each worker db clone has its own `pending_ref_locs` buffer (custom
572        // Clone returns empty).  Workers push reference locations there instead
573        // of into the shared Arc<Mutex<...>> maps, eliminating cross-thread
574        // contention.  After collect() we commit all batches serially in a
575        // single lock acquisition per map.
576        let pass2_results: Vec<(Vec<Issue>, Vec<crate::symbol::ResolvedSymbol>, Vec<RefLoc>)> =
577            parsed_files
578                .par_iter()
579                .filter(|parsed| !files_with_parse_errors.contains(&parsed.file))
580                .map_with(db_main, |db, parsed| {
581                    let driver =
582                        Pass2Driver::new(&*db as &dyn MirDatabase, self.resolved_php_version());
583                    let (issues, symbols) = if let Some(cache) = &self.cache {
584                        let h = hash_content(parsed.source());
585                        if let Some((cached_issues, ref_locs)) = cache.get(&parsed.file, &h) {
586                            db.replay_reference_locations(parsed.file.clone(), &ref_locs);
587                            (cached_issues, Vec::new())
588                        } else {
589                            let parse_result = parsed.parsed();
590                            let (issues, symbols) = driver.analyze_bodies(
591                                &parse_result.program,
592                                parsed.file.clone(),
593                                parsed.source(),
594                                &parse_result.source_map,
595                            );
596                            let pending = db.take_pending_ref_locs();
597                            let cache_locs = pending
598                                .iter()
599                                .map(|r| (r.symbol_key.to_string(), r.line, r.col_start, r.col_end))
600                                .collect();
601                            cache.put(&parsed.file, h, issues.clone(), cache_locs);
602                            if let Some(cb) = &self.on_file_done {
603                                cb();
604                            }
605                            return (issues, symbols, pending);
606                        }
607                    } else {
608                        let parse_result = parsed.parsed();
609                        driver.analyze_bodies(
610                            &parse_result.program,
611                            parsed.file.clone(),
612                            parsed.source(),
613                            &parse_result.source_map,
614                        )
615                    };
616                    let pending = db.take_pending_ref_locs();
617                    if let Some(cb) = &self.on_file_done {
618                        cb();
619                    }
620                    (issues, symbols, pending)
621                })
622                .collect();
623
624        let _t_pass2 = _t0.elapsed();
625
626        // Serial commit: one lock acquisition per map for all files combined.
627        let mut all_ref_locs: Vec<RefLoc> = Vec::new();
628        let mut all_symbols = Vec::new();
629        for (issues, symbols, ref_locs) in pass2_results {
630            all_issues.extend(issues);
631            all_symbols.extend(symbols);
632            all_ref_locs.extend(ref_locs);
633        }
634        {
635            let guard = self.shared_db.salsa.read();
636            guard.commit_reference_locations_batch(all_ref_locs);
637        }
638
639        // ---- Post-Pass-2 lazy loading: FQCNs used without `use` imports ------
640        // FQCNs in function/method bodies aren't visible until Pass 2 runs, so
641        // the pre-Pass-2 lazy load misses them.  We collect UndefinedClass names,
642        // resolve them via PSR-4, load those files, re-finalize, then re-analyze
643        // only the affected files to clear the false positives.
644        if let Some(psr4) = &self.psr4 {
645            self.lazy_load_from_body_issues(
646                psr4.clone(),
647                &file_data,
648                &files_with_parse_errors,
649                &mut all_issues,
650                &mut all_symbols,
651            );
652        }
653
654        // Persist cache hits/misses to disk
655        if let Some(cache) = &self.cache {
656            cache.flush();
657        }
658
659        // ---- Compact the reference index ------------------------------------
660        // ---- Dead-code detection (M18) --------------------------------------
661        if self.find_dead_code {
662            let salsa = self.snapshot_db();
663            let dead_code_issues = crate::dead_code::DeadCodeAnalyzer::new(&salsa).analyze();
664            all_issues.extend(dead_code_issues);
665        }
666
667        let _t_total = _t0.elapsed();
668        if std::env::var("MIR_TIMING").is_ok() {
669            eprintln!(
670                "[timing] stubs={:.0}ms read={:.0}ms salsa_reg={:.0}ms pass1={:.0}ms ingest={:.0}ms presweep={:.0}ms pass2={:.0}ms total={:.0}ms",
671                _t_stubs.as_secs_f64() * 1000.0,
672                (_t_read - _t_stubs).as_secs_f64() * 1000.0,
673                (_t_salsa_reg - _t_read).as_secs_f64() * 1000.0,
674                (_t_pass1 - _t_salsa_reg).as_secs_f64() * 1000.0,
675                (_t_ingest - _t_pass1).as_secs_f64() * 1000.0,
676                (_t_presweep - _t_ingest).as_secs_f64() * 1000.0,
677                (_t_pass2 - _t_presweep).as_secs_f64() * 1000.0,
678                _t_total.as_secs_f64() * 1000.0,
679            );
680        }
681
682        AnalysisResult::build(all_issues, std::collections::HashMap::new(), all_symbols)
683    }
684
685    fn lazy_load_missing_classes(
686        &self,
687        psr4: Arc<crate::composer::Psr4Map>,
688        all_issues: &mut Vec<Issue>,
689    ) {
690        use std::collections::HashSet;
691        use std::sync::Arc;
692
693        let max_depth = 10;
694        let mut loaded: HashSet<String> = HashSet::new();
695        let mut scanned: HashSet<Arc<str>> = HashSet::new();
696
697        for _ in 0..max_depth {
698            let mut to_load: Vec<(String, PathBuf)> = Vec::new();
699
700            let mut try_queue = |fqcn: &str| {
701                if !self.type_exists(fqcn) && !loaded.contains(fqcn) {
702                    if let Some(path) = psr4.resolve(fqcn) {
703                        to_load.push((fqcn.to_string(), path));
704                    }
705                }
706            };
707
708            // Collect inheritance and import candidates. Only scan classes that
709            // haven't been scanned yet (optimization: avoid redundant full scans).
710            let mut inheritance_candidates = Vec::new();
711            let import_candidates = {
712                let db_owned = self.snapshot_db();
713                let db = &db_owned;
714                for fqcn in db.active_class_node_fqcns() {
715                    if scanned.contains(fqcn.as_ref()) {
716                        continue;
717                    }
718                    let Some(node) = db.lookup_class_node(&fqcn) else {
719                        continue;
720                    };
721                    scanned.insert(fqcn.clone());
722                    if node.is_interface(db) {
723                        for parent in node.extends(db).iter() {
724                            inheritance_candidates.push(parent.to_string());
725                        }
726                    } else if node.is_enum(db) {
727                        for iface in node.interfaces(db).iter() {
728                            inheritance_candidates.push(iface.to_string());
729                        }
730                    } else if node.is_trait(db) {
731                        for used in node.traits(db).iter() {
732                            inheritance_candidates.push(used.to_string());
733                        }
734                    } else {
735                        if let Some(parent) = node.parent(db) {
736                            inheritance_candidates.push(parent.to_string());
737                        }
738                        for iface in node.interfaces(db).iter() {
739                            inheritance_candidates.push(iface.to_string());
740                        }
741                    }
742                }
743                db.file_import_snapshots()
744                    .into_iter()
745                    .flat_map(|(_, imports)| imports.into_values())
746                    .collect::<Vec<_>>()
747            };
748            for fqcn in inheritance_candidates {
749                try_queue(&fqcn);
750            }
751
752            // Also lazy-load any type referenced via `use` imports that isn't yet
753            // in the codebase (covers enums and classes used only in type hints or
754            // static calls, which never appear in the inheritance scan above).
755            for fqcn in import_candidates {
756                try_queue(&fqcn);
757            }
758
759            if to_load.is_empty() {
760                break;
761            }
762
763            for (fqcn, path) in to_load {
764                loaded.insert(fqcn);
765                if let Ok(src) = std::fs::read_to_string(&path) {
766                    let file: Arc<str> = Arc::from(path.to_string_lossy().as_ref());
767                    let defs = self.collect_and_ingest_source(file, &src);
768                    all_issues.extend(Arc::unwrap_or_clone(defs.issues));
769                }
770            }
771        }
772    }
773
774    fn lazy_load_from_body_issues(
775        &self,
776        psr4: Arc<crate::composer::Psr4Map>,
777        file_data: &[(Arc<str>, Arc<str>)],
778        files_with_parse_errors: &HashSet<Arc<str>>,
779        all_issues: &mut Vec<Issue>,
780        all_symbols: &mut Vec<crate::symbol::ResolvedSymbol>,
781    ) {
782        use mir_issues::IssueKind;
783
784        let max_depth = 5;
785        let mut loaded: HashSet<String> = HashSet::new();
786
787        for _ in 0..max_depth {
788            // Deduplicate by FQCN: HashMap prevents loading the same class twice
789            // when multiple files share the same UndefinedClass diagnostic.
790            let mut to_load: HashMap<String, PathBuf> = HashMap::new();
791
792            for issue in all_issues.iter() {
793                if let IssueKind::UndefinedClass { name } = &issue.kind {
794                    if !self.type_exists(name) && !loaded.contains(name) {
795                        if let Some(path) = psr4.resolve(name) {
796                            to_load.entry(name.clone()).or_insert(path);
797                        }
798                    }
799                }
800            }
801
802            if to_load.is_empty() {
803                break;
804            }
805
806            loaded.extend(to_load.keys().cloned());
807
808            for path in to_load.values() {
809                if let Ok(src) = std::fs::read_to_string(path) {
810                    let file: Arc<str> = Arc::from(path.to_string_lossy().as_ref());
811                    let _ = self.collect_and_ingest_source(file, &src);
812                }
813            }
814
815            // Load inheritance deps of newly-added types and finalize.
816            // This covers e.g. `class Helper extends \App\Base` where Base is
817            // also not in the initial file set.
818            self.lazy_load_missing_classes(psr4.clone(), all_issues);
819
820            // Re-analyze every file that has an UndefinedClass for a type now
821            // present in the codebase — covers both direct and transitive loads.
822            let files_to_reanalyze: HashSet<Arc<str>> = all_issues
823                .iter()
824                .filter_map(|i| {
825                    if let IssueKind::UndefinedClass { name } = &i.kind {
826                        if self.type_exists(name) {
827                            return Some(i.location.file.clone());
828                        }
829                    }
830                    None
831                })
832                .collect();
833
834            if files_to_reanalyze.is_empty() {
835                break;
836            }
837
838            all_issues.retain(|i| !files_to_reanalyze.contains(&i.location.file));
839            all_symbols.retain(|s| !files_to_reanalyze.contains(&s.file));
840
841            let db_full = {
842                let guard = self.shared_db.salsa.read();
843                (**guard).clone()
844            };
845
846            let reanalysis: Vec<(Vec<Issue>, Vec<crate::symbol::ResolvedSymbol>, Vec<RefLoc>)> =
847                file_data
848                    .par_iter()
849                    .filter(|(f, _)| {
850                        !files_with_parse_errors.contains(f) && files_to_reanalyze.contains(f)
851                    })
852                    .map_with(db_full, |db, (file, src)| {
853                        let driver =
854                            Pass2Driver::new(&*db as &dyn MirDatabase, self.resolved_php_version());
855                        let arena = crate::arena::create_parse_arena(src.len());
856                        let parsed = php_rs_parser::parse(&arena, src);
857                        let (issues, symbols) = driver.analyze_bodies(
858                            &parsed.program,
859                            file.clone(),
860                            src,
861                            &parsed.source_map,
862                        );
863                        let pending = db.take_pending_ref_locs();
864                        (issues, symbols, pending)
865                    })
866                    .collect();
867
868            let mut reanalysis_ref_locs: Vec<RefLoc> = Vec::new();
869            for (issues, symbols, ref_locs) in reanalysis {
870                all_issues.extend(issues);
871                all_symbols.extend(symbols);
872                reanalysis_ref_locs.extend(ref_locs);
873            }
874            {
875                let guard = self.shared_db.salsa.read();
876                guard.commit_reference_locations_batch(reanalysis_ref_locs);
877            }
878        }
879    }
880
881    /// Re-analyze a single file within the existing codebase.
882    ///
883    /// This is the incremental analysis API for LSP:
884    /// 1. Removes old definitions from this file
885    /// 2. Re-runs Pass 1 (definition collection) on the new content
886    /// 3. Resolves any newly-collected `@psalm-import-type` declarations
887    /// 4. Re-runs Pass 2 (body analysis) on this file
888    /// 5. Returns the analysis result for this file only
889    pub fn re_analyze_file(&self, file_path: &str, new_content: &str) -> AnalysisResult {
890        // Fast path: content unchanged and cache has a valid entry — skip full re-analysis.
891        if let Some(cache) = &self.cache {
892            let h = hash_content(new_content);
893            if let Some((issues, ref_locs)) = cache.get(file_path, &h) {
894                let file: Arc<str> = Arc::from(file_path);
895                let guard = self.shared_db.salsa.read();
896                guard.replay_reference_locations(file, &ref_locs);
897                guard.commit_pending_to_maps();
898                return AnalysisResult::build(issues, HashMap::new(), Vec::new());
899            }
900        }
901
902        let file: Arc<str> = Arc::from(file_path);
903
904        {
905            let mut guard = self.shared_db.salsa.write();
906            guard.remove_file_definitions(file_path);
907        }
908
909        // --- Salsa-backed Pass 1: memoized parse + definition collection ------
910        let file_defs = {
911            let mut guard = self.shared_db.salsa.write();
912            let salsa_file = guard.upsert_source_file(file.clone(), Arc::from(new_content));
913            collect_file_definitions(&**guard, salsa_file)
914        };
915
916        let mut all_issues: Vec<Issue> = Arc::unwrap_or_clone(file_defs.issues.clone());
917
918        // --- S2 + Pass 2: hold the Salsa lock for ClassNode upserts and body
919        // analysis so the db reference is live during Pass 2 (S5).
920        let symbols = {
921            let mut guard = self.shared_db.salsa.write();
922
923            guard.ingest_stub_slice(&file_defs.slice);
924
925            // Resolve any newly-collected @psalm-import-type declarations so
926            // Pass 2 reads the imported aliases out of `type_aliases`.
927            // Re-parse in the arena so Pass 2 can walk the AST.
928            let arena = bumpalo::Bump::new();
929            let parsed = php_rs_parser::parse(&arena, new_content);
930
931            if parsed.errors.is_empty() {
932                let db_ref: &dyn MirDatabase = &**guard;
933                let driver = Pass2Driver::new(db_ref, self.resolved_php_version());
934                let (body_issues, symbols) = driver.analyze_bodies(
935                    &parsed.program,
936                    file.clone(),
937                    new_content,
938                    &parsed.source_map,
939                );
940                all_issues.extend(body_issues);
941                guard.commit_pending_to_maps();
942                symbols
943            } else {
944                Vec::new()
945            }
946        };
947
948        if let Some(cache) = &self.cache {
949            let h = hash_content(new_content);
950            cache.evict_with_dependents(&[file_path.to_string()]);
951            let db = self.snapshot_db();
952            let ref_locs = extract_reference_locations(&db, &file);
953            cache.put(file_path, h, all_issues.clone(), ref_locs);
954        }
955
956        AnalysisResult::build(all_issues, HashMap::new(), symbols)
957    }
958
959    /// Analyze a PHP source string without a real file path.
960    /// Useful for tests and LSP single-file mode.
961    pub fn analyze_source(source: &str) -> AnalysisResult {
962        let analyzer = ProjectAnalyzer::new();
963        let file: Arc<str> = Arc::from("<source>");
964        let mut db = MirDb::default();
965        for slice in crate::stubs::builtin_stub_slices_for_version(analyzer.resolved_php_version())
966        {
967            db.ingest_stub_slice(&slice);
968        }
969        let salsa_file = SourceFile::new(&db, file.clone(), Arc::from(source));
970        let file_defs = collect_file_definitions(&db, salsa_file);
971        db.ingest_stub_slice(&file_defs.slice);
972        let mut all_issues = Arc::unwrap_or_clone(file_defs.issues);
973        if all_issues
974            .iter()
975            .any(|issue| matches!(issue.kind, mir_issues::IssueKind::ParseError { .. }))
976        {
977            return AnalysisResult::build(all_issues, std::collections::HashMap::new(), Vec::new());
978        }
979        let mut type_envs = std::collections::HashMap::new();
980        let mut all_symbols = Vec::new();
981        let arena = bumpalo::Bump::new();
982        let result = php_rs_parser::parse(&arena, source);
983
984        let driver = Pass2Driver::new(&db, analyzer.resolved_php_version());
985        all_issues.extend(driver.analyze_bodies_typed(
986            &result.program,
987            file.clone(),
988            source,
989            &result.source_map,
990            &mut type_envs,
991            &mut all_symbols,
992        ));
993        AnalysisResult::build(all_issues, type_envs, all_symbols)
994    }
995
996    /// Discover all `.php` files under a directory, recursively.
997    pub fn discover_files(root: &Path) -> Vec<PathBuf> {
998        if root.is_file() {
999            return vec![root.to_path_buf()];
1000        }
1001        let mut files = Vec::new();
1002        collect_php_files(root, &mut files);
1003        files
1004    }
1005
1006    /// Pass 1 only: collect type definitions from `paths` into the codebase without
1007    /// analyzing method bodies or emitting issues. Used to load vendor types.
1008    pub fn collect_types_only(&self, paths: &[PathBuf]) {
1009        let _timing = std::env::var("MIR_TIMING").is_ok();
1010        let _t0 = std::time::Instant::now();
1011
1012        let file_data: Vec<(Arc<str>, Arc<str>)> = paths
1013            .par_iter()
1014            .filter_map(|path| {
1015                let src = std::fs::read_to_string(path).ok()?;
1016                Some((
1017                    Arc::from(path.to_string_lossy().as_ref()),
1018                    Arc::<str>::from(src),
1019                ))
1020            })
1021            .collect();
1022        let _t_read = _t0.elapsed();
1023
1024        let source_files: Vec<SourceFile> = {
1025            let mut guard = self.shared_db.salsa.write();
1026            file_data
1027                .iter()
1028                .map(|(file, src)| guard.upsert_source_file(file.clone(), src.clone()))
1029                .collect()
1030        };
1031        let _t_reg = _t0.elapsed();
1032
1033        let db_pass1 = {
1034            let guard = self.shared_db.salsa.read();
1035            (**guard).clone()
1036        };
1037
1038        let file_defs: Vec<FileDefinitions> = source_files
1039            .par_iter()
1040            .map_with(db_pass1, |db, salsa_file| {
1041                collect_file_definitions_uncached(&*db, *salsa_file)
1042            })
1043            .collect();
1044        let _t_collect = _t0.elapsed();
1045
1046        let mut guard = self.shared_db.salsa.write();
1047        for defs in file_defs {
1048            guard.ingest_stub_slice(&defs.slice);
1049        }
1050        drop(guard);
1051        let _t_ingest = _t0.elapsed();
1052
1053        if _timing {
1054            eprintln!(
1055                "[vendor] read={:.0}ms reg={:.0}ms collect={:.0}ms ingest={:.0}ms total={:.0}ms",
1056                _t_read.as_secs_f64() * 1000.0,
1057                (_t_reg - _t_read).as_secs_f64() * 1000.0,
1058                (_t_collect - _t_reg).as_secs_f64() * 1000.0,
1059                (_t_ingest - _t_collect).as_secs_f64() * 1000.0,
1060                _t_ingest.as_secs_f64() * 1000.0,
1061            );
1062        }
1063
1064        // Print profiling statistics for the collection phase.
1065        crate::collector::print_collector_stats();
1066    }
1067}
1068
1069impl Default for ProjectAnalyzer {
1070    fn default() -> Self {
1071        Self::new()
1072    }
1073}
1074
1075pub(crate) fn collect_php_files(dir: &Path, out: &mut Vec<PathBuf>) {
1076    if let Ok(entries) = std::fs::read_dir(dir) {
1077        for entry in entries.flatten() {
1078            if entry.file_type().map(|ft| ft.is_symlink()).unwrap_or(false) {
1079                continue;
1080            }
1081            let path = entry.path();
1082            if path.is_dir() {
1083                let name = path.file_name().and_then(|n| n.to_str()).unwrap_or("");
1084                if matches!(
1085                    name,
1086                    "vendor" | ".git" | "node_modules" | ".cache" | ".pnpm-store"
1087                ) {
1088                    continue;
1089                }
1090                collect_php_files(&path, out);
1091            } else if path.extension().and_then(|e| e.to_str()) == Some("php") {
1092                out.push(path);
1093            }
1094        }
1095    }
1096}
1097
1098// build_reverse_deps
1099
1100fn build_reverse_deps(db: &dyn crate::db::MirDatabase) -> HashMap<String, HashSet<String>> {
1101    let mut reverse: HashMap<String, HashSet<String>> = HashMap::new();
1102
1103    let mut add_edge = |symbol: &str, dependent_file: &str| {
1104        if let Some(defining_file) = db.symbol_defining_file(symbol) {
1105            let def = defining_file.as_ref().to_string();
1106            if def != dependent_file {
1107                reverse
1108                    .entry(def)
1109                    .or_default()
1110                    .insert(dependent_file.to_string());
1111            }
1112        }
1113    };
1114
1115    for (file, imports) in db.file_import_snapshots() {
1116        let file = file.as_ref().to_string();
1117        for fqcn in imports.values() {
1118            add_edge(fqcn, &file);
1119        }
1120    }
1121
1122    let extract_named_objects = |union: &mir_types::Union| {
1123        union
1124            .types
1125            .iter()
1126            .filter_map(|atomic| match atomic {
1127                mir_types::atomic::Atomic::TNamedObject { fqcn, .. } => Some(fqcn.clone()),
1128                _ => None,
1129            })
1130            .collect::<Vec<_>>()
1131    };
1132
1133    for fqcn in db.active_class_node_fqcns() {
1134        // Only true classes contribute class-direction edges in this loop.
1135        // Interface / trait / enum edges are not currently emitted here —
1136        // this function only ever read classes.
1137        let kind = match crate::db::class_kind_via_db(db, fqcn.as_ref()) {
1138            Some(k) if !k.is_interface && !k.is_trait && !k.is_enum => k,
1139            _ => continue,
1140        };
1141        let _ = kind;
1142        let Some(file) = db
1143            .symbol_defining_file(fqcn.as_ref())
1144            .map(|f| f.as_ref().to_string())
1145        else {
1146            continue;
1147        };
1148
1149        let Some(node) = db.lookup_class_node(fqcn.as_ref()) else {
1150            continue;
1151        };
1152        if let Some(parent) = node.parent(db) {
1153            add_edge(parent.as_ref(), &file);
1154        }
1155        for iface in node.interfaces(db).iter() {
1156            add_edge(iface.as_ref(), &file);
1157        }
1158        for tr in node.traits(db).iter() {
1159            add_edge(tr.as_ref(), &file);
1160        }
1161
1162        // Add types from properties
1163        for prop in db.class_own_properties(fqcn.as_ref()).iter() {
1164            if let Some(ty) = prop.ty(db) {
1165                for named in extract_named_objects(&ty) {
1166                    add_edge(named.as_ref(), &file);
1167                }
1168            }
1169        }
1170
1171        // Add types from methods
1172        for method in db.class_own_methods(fqcn.as_ref()).iter() {
1173            // Parameter types
1174            for param in method.params(db).iter() {
1175                if let Some(ty) = &param.ty {
1176                    for named in extract_named_objects(ty.as_ref()) {
1177                        add_edge(named.as_ref(), &file);
1178                    }
1179                }
1180            }
1181            // Return type
1182            if let Some(rt) = method.return_type(db) {
1183                for named in extract_named_objects(rt.as_ref()) {
1184                    add_edge(named.as_ref(), &file);
1185                }
1186            }
1187        }
1188    }
1189
1190    // Add types from global functions
1191    for fqn in db.active_function_node_fqns() {
1192        let Some(node) = db.lookup_function_node(fqn.as_ref()) else {
1193            continue;
1194        };
1195        let Some(file) = db
1196            .symbol_defining_file(fqn.as_ref())
1197            .map(|f| f.as_ref().to_string())
1198        else {
1199            continue;
1200        };
1201
1202        // Parameter types
1203        for param in node.params(db).iter() {
1204            if let Some(ty) = &param.ty {
1205                for named in extract_named_objects(ty.as_ref()) {
1206                    add_edge(named.as_ref(), &file);
1207                }
1208            }
1209        }
1210        // Return type
1211        if let Some(rt) = node.return_type(db) {
1212            for named in extract_named_objects(rt.as_ref()) {
1213                add_edge(named.as_ref(), &file);
1214            }
1215        }
1216    }
1217
1218    // Also wire in bare-FQN references from Pass 2 (new \Foo(), \Foo::method(), \foo())
1219    // that do not appear in use-import statements.
1220    for (ref_file, symbol_key) in db.all_reference_location_pairs() {
1221        let file_str = ref_file.as_ref().to_string();
1222        let lookup: &str = match symbol_key.split_once("::") {
1223            Some((class, _)) => class,
1224            None => &symbol_key,
1225        };
1226        add_edge(lookup, &file_str);
1227    }
1228
1229    reverse
1230}
1231
1232fn extract_reference_locations(
1233    db: &dyn crate::db::MirDatabase,
1234    file: &Arc<str>,
1235) -> Vec<(String, u32, u16, u16)> {
1236    db.extract_file_reference_locations(file.as_ref())
1237        .into_iter()
1238        .map(|(sym, line, col_start, col_end)| (sym.to_string(), line, col_start, col_end))
1239        .collect()
1240}
1241
1242pub struct AnalysisResult {
1243    pub issues: Vec<Issue>,
1244    #[doc(hidden)]
1245    pub type_envs: std::collections::HashMap<crate::type_env::ScopeId, crate::type_env::TypeEnv>,
1246    /// Per-expression resolved symbols from Pass 2, sorted by file path.
1247    pub symbols: Vec<crate::symbol::ResolvedSymbol>,
1248    /// Maps each file path to the contiguous range within `symbols` that belongs
1249    /// to it. Built once after analysis; allows `symbol_at` to scan only the
1250    /// relevant file's slice rather than the entire codebase-wide vector.
1251    symbols_by_file: HashMap<Arc<str>, std::ops::Range<usize>>,
1252}
1253
1254impl AnalysisResult {
1255    fn build(
1256        issues: Vec<Issue>,
1257        type_envs: std::collections::HashMap<crate::type_env::ScopeId, crate::type_env::TypeEnv>,
1258        mut symbols: Vec<crate::symbol::ResolvedSymbol>,
1259    ) -> Self {
1260        symbols.sort_unstable_by(|a, b| a.file.as_ref().cmp(b.file.as_ref()));
1261        let mut symbols_by_file: HashMap<Arc<str>, std::ops::Range<usize>> = HashMap::new();
1262        let mut i = 0;
1263        while i < symbols.len() {
1264            let file = Arc::clone(&symbols[i].file);
1265            let start = i;
1266            while i < symbols.len() && symbols[i].file == file {
1267                i += 1;
1268            }
1269            symbols_by_file.insert(file, start..i);
1270        }
1271        Self {
1272            issues,
1273            type_envs,
1274            symbols,
1275            symbols_by_file,
1276        }
1277    }
1278}
1279
1280impl AnalysisResult {
1281    pub fn error_count(&self) -> usize {
1282        self.issues
1283            .iter()
1284            .filter(|i| i.severity == mir_issues::Severity::Error)
1285            .count()
1286    }
1287
1288    pub fn warning_count(&self) -> usize {
1289        self.issues
1290            .iter()
1291            .filter(|i| i.severity == mir_issues::Severity::Warning)
1292            .count()
1293    }
1294
1295    /// Group issues by source file.
1296    pub fn issues_by_file(&self) -> HashMap<std::sync::Arc<str>, Vec<&Issue>> {
1297        let mut map: HashMap<std::sync::Arc<str>, Vec<&Issue>> = HashMap::new();
1298        for issue in &self.issues {
1299            map.entry(issue.location.file.clone())
1300                .or_default()
1301                .push(issue);
1302        }
1303        map
1304    }
1305
1306    /// Count issues by severity. Returned as `(severity, count)` pairs sorted
1307    /// by severity (Info, Warning, Error).
1308    pub fn count_by_severity(&self) -> Vec<(mir_issues::Severity, usize)> {
1309        let mut counts: std::collections::BTreeMap<mir_issues::Severity, usize> =
1310            std::collections::BTreeMap::new();
1311        for issue in &self.issues {
1312            *counts.entry(issue.severity).or_insert(0) += 1;
1313        }
1314        counts.into_iter().collect()
1315    }
1316
1317    /// Total number of issues across all severities and files.
1318    pub fn total_issue_count(&self) -> usize {
1319        self.issues.len()
1320    }
1321
1322    /// Iterator of issues matching `predicate`. Useful for filtering by
1323    /// severity, kind, or file without materializing intermediate vectors.
1324    pub fn filter_issues<'a, F>(&'a self, predicate: F) -> impl Iterator<Item = &'a Issue>
1325    where
1326        F: Fn(&Issue) -> bool + 'a,
1327    {
1328        self.issues.iter().filter(move |i| predicate(i))
1329    }
1330
1331    /// Return the innermost resolved symbol whose span contains `byte_offset`
1332    /// in `file`, or `None` if no symbol was recorded at that position.
1333    pub fn symbol_at(
1334        &self,
1335        file: &str,
1336        byte_offset: u32,
1337    ) -> Option<&crate::symbol::ResolvedSymbol> {
1338        let range = self.symbols_by_file.get(file)?;
1339        self.symbols[range.clone()]
1340            .iter()
1341            .filter(|s| s.span.start <= byte_offset && byte_offset < s.span.end)
1342            .min_by_key(|s| s.span.end - s.span.start)
1343    }
1344}