Skip to main content

mir_analyzer/
project.rs

1/// Project-level orchestration: file discovery, pass 1, pass 2.
2use std::mem::ManuallyDrop;
3use std::path::{Path, PathBuf};
4use std::sync::Arc;
5
6use rayon::prelude::*;
7
8use std::collections::{HashMap, HashSet};
9
10use crate::cache::{hash_content, AnalysisCache};
11use crate::db::{
12    collect_file_definitions, collect_file_definitions_uncached, FileDefinitions, MirDatabase,
13    MirDb, SourceFile,
14};
15use crate::pass2::Pass2Driver;
16use crate::php_version::PhpVersion;
17use crate::shared_db::SharedDb;
18use mir_issues::Issue;
19
20pub(crate) use crate::pass2::merge_return_types;
21
22/// Batch-oriented analyzer: file discovery, parsing, and analysis.
23///
24/// ProjectAnalyzer is the primary entry point for analyzing a project as a whole.
25/// It orchestrates parallel file discovery and parsing, using the same core
26/// analysis engine as [`AnalysisSession`] (salsa database and Pass 2 driver).
27///
28/// **Unified Design:** ProjectAnalyzer and `AnalysisSession` now share the same
29/// database management via [`SharedDb`]. ProjectAnalyzer is the batch API
30/// (all files at once), while `AnalysisSession` is the incremental API (file-by-file).
31/// Both use `Pass2Driver`, the same definition collection logic, and identical
32/// database operations, eliminating code duplication.
33///
34/// [`AnalysisSession`]: crate::session::AnalysisSession
35pub struct ProjectAnalyzer {
36    /// Shared database management (salsa, file registry, stub tracking).
37    /// Extracted to allow code sharing with AnalysisSession.
38    shared_db: Arc<SharedDb>,
39    /// Optional cache — when `Some`, Pass 2 results are read/written per file.
40    cache: Option<AnalysisCache>,
41    /// Called once after each file completes Pass 2 (used for progress reporting).
42    pub on_file_done: Option<Arc<dyn Fn() + Send + Sync>>,
43    /// PSR-4 autoloader mapping from composer.json, if available.
44    pub psr4: Option<Arc<crate::composer::Psr4Map>>,
45    /// When true, run dead code detection at the end of analysis.
46    pub find_dead_code: bool,
47    /// Target PHP language version. `None` means "not configured"; resolved to
48    /// `PhpVersion::LATEST` when passed down to `StatementsAnalyzer`.
49    pub php_version: Option<PhpVersion>,
50    /// Additional stub files to parse before analysis (absolute paths).
51    pub stub_files: Vec<PathBuf>,
52    /// Additional stub directories to walk and parse before analysis (absolute paths).
53    pub stub_dirs: Vec<PathBuf>,
54}
55
56struct ParsedProjectFile {
57    file: Arc<str>,
58    source: Arc<str>,
59    parsed: ManuallyDrop<php_rs_parser::ParseResult<'static, 'static>>,
60    arena: ManuallyDrop<Box<bumpalo::Bump>>,
61}
62
63impl ParsedProjectFile {
64    fn new(file: Arc<str>, source: Arc<str>) -> Self {
65        let arena = Box::new(crate::arena::create_parse_arena(source.len()));
66        let parsed = php_rs_parser::parse(&arena, &source);
67        // SAFETY: `parsed` borrows from `arena` and `source`, both owned by this
68        // struct and kept alive until `Drop`. `Drop` manually destroys `parsed`
69        // before releasing either owner, so the widened lifetimes never escape.
70        let parsed = unsafe {
71            std::mem::transmute::<
72                php_rs_parser::ParseResult<'_, '_>,
73                php_rs_parser::ParseResult<'static, 'static>,
74            >(parsed)
75        };
76        Self {
77            file,
78            source,
79            parsed: ManuallyDrop::new(parsed),
80            arena: ManuallyDrop::new(arena),
81        }
82    }
83
84    fn source(&self) -> &str {
85        self.source.as_ref()
86    }
87
88    fn parsed(&self) -> &php_rs_parser::ParseResult<'_, '_> {
89        &self.parsed
90    }
91}
92
93impl Drop for ParsedProjectFile {
94    fn drop(&mut self) {
95        unsafe {
96            ManuallyDrop::drop(&mut self.parsed);
97            ManuallyDrop::drop(&mut self.arena);
98        }
99    }
100}
101
102// SAFETY: after construction the parsed AST and source map are read-only. The
103// bump arena is never mutated again; it only owns backing storage for AST nodes
104// and is dropped after all parallel analysis has completed.
105unsafe impl Send for ParsedProjectFile {}
106unsafe impl Sync for ParsedProjectFile {}
107
108impl ProjectAnalyzer {
109    pub fn new() -> Self {
110        Self {
111            shared_db: Arc::new(SharedDb::new()),
112            cache: None,
113            on_file_done: None,
114            psr4: None,
115            find_dead_code: false,
116            php_version: None,
117            stub_files: Vec::new(),
118            stub_dirs: Vec::new(),
119        }
120    }
121
122    /// Create a `ProjectAnalyzer` with a disk-backed cache stored under `cache_dir`.
123    pub fn with_cache(cache_dir: &Path) -> Self {
124        Self {
125            shared_db: Arc::new(SharedDb::new()),
126            cache: Some(AnalysisCache::open(cache_dir)),
127            on_file_done: None,
128            psr4: None,
129            find_dead_code: false,
130            php_version: None,
131            stub_files: Vec::new(),
132            stub_dirs: Vec::new(),
133        }
134    }
135
136    /// Enable the disk-backed cache for an already-constructed analyzer.
137    pub fn set_cache_dir(&mut self, cache_dir: &Path) {
138        self.cache = Some(AnalysisCache::open(cache_dir));
139    }
140
141    /// Create a `ProjectAnalyzer` from a project root containing `composer.json`.
142    /// Returns the analyzer (with `psr4` set) and the `Psr4Map` so callers can
143    /// call `map.project_files()` / `map.vendor_files()`.
144    pub fn from_composer(
145        root: &Path,
146    ) -> Result<(Self, crate::composer::Psr4Map), crate::composer::ComposerError> {
147        let map = crate::composer::Psr4Map::from_composer(root)?;
148        let psr4 = Arc::new(map.clone());
149        let analyzer = Self {
150            shared_db: Arc::new(SharedDb::new()),
151            cache: None,
152            on_file_done: None,
153            psr4: Some(psr4),
154            find_dead_code: false,
155            php_version: None,
156            stub_files: Vec::new(),
157            stub_dirs: Vec::new(),
158        };
159        Ok((analyzer, map))
160    }
161
162    /// Builder method: set the target PHP version.
163    pub fn with_php_version(mut self, version: PhpVersion) -> Self {
164        self.php_version = Some(version);
165        self
166    }
167
168    /// Builder method: enable dead-code detection at the end of analysis.
169    pub fn with_dead_code(mut self, enabled: bool) -> Self {
170        self.find_dead_code = enabled;
171        self
172    }
173
174    /// Builder method: set a progress callback invoked once per analyzed file.
175    pub fn with_progress_callback(mut self, callback: Arc<dyn Fn() + Send + Sync>) -> Self {
176        self.on_file_done = Some(callback);
177        self
178    }
179
180    /// Builder method: add user stub files.
181    pub fn with_stub_files(mut self, files: Vec<PathBuf>) -> Self {
182        self.stub_files = files;
183        self
184    }
185
186    /// Builder method: add user stub directories.
187    pub fn with_stub_dirs(mut self, dirs: Vec<PathBuf>) -> Self {
188        self.stub_dirs = dirs;
189        self
190    }
191
192    /// Builder method: configure a disk-backed cache at the given directory.
193    pub fn with_cache_dir(mut self, cache_dir: &Path) -> Self {
194        self.cache = Some(AnalysisCache::open(cache_dir));
195        self
196    }
197
198    /// Builder method: attach a PSR-4 autoloader map.
199    pub fn with_psr4(mut self, map: Arc<crate::composer::Psr4Map>) -> Self {
200        self.psr4 = Some(map);
201        self
202    }
203
204    /// Resolve the configured PHP version, defaulting to `PhpVersion::LATEST`
205    /// when none has been set.
206    fn resolved_php_version(&self) -> PhpVersion {
207        self.php_version.unwrap_or(PhpVersion::LATEST)
208    }
209
210    fn type_exists(&self, fqcn: &str) -> bool {
211        let db = self.snapshot_db();
212        crate::db::type_exists_via_db(&db, fqcn)
213    }
214
215    /// Returns `true` if a function with `fqn` is registered and active.
216    pub fn contains_function(&self, fqn: &str) -> bool {
217        let db = self.snapshot_db();
218        db.lookup_function_node(fqn).is_some_and(|n| n.active(&db))
219    }
220
221    /// Returns `true` if a class / interface / trait / enum is registered.
222    pub fn contains_class(&self, fqcn: &str) -> bool {
223        let db = self.snapshot_db();
224        db.lookup_class_node(fqcn).is_some_and(|n| n.active(&db))
225    }
226
227    /// Returns `true` if `class` has a method named `name` (case-insensitive).
228    pub fn contains_method(&self, class: &str, name: &str) -> bool {
229        let db = self.snapshot_db();
230        let name_lower = name.to_ascii_lowercase();
231        db.lookup_method_node(class, &name_lower)
232            .is_some_and(|n| n.active(&db))
233    }
234
235    /// Acquire a cheap clone of the salsa db for a read-only query.
236    /// The lock is held only for the duration of the clone, so concurrent
237    /// readers never serialize on each other or on writes longer than the
238    /// clone itself.
239    fn snapshot_db(&self) -> MirDb {
240        self.shared_db.snapshot_db()
241    }
242
243    /// Internal: expose the salsa Mutex for unit tests that need a `&dyn MirDatabase`.
244    #[doc(hidden)]
245    pub fn salsa_db_for_test(&self) -> &parking_lot::Mutex<MirDb> {
246        &self.shared_db.salsa
247    }
248
249    /// Legacy: look up the source location of a class member by name.
250    ///
251    /// Prefer [`Self::definition_of`] with [`crate::Symbol::method`] etc.
252    #[doc(hidden)]
253    pub fn member_location(
254        &self,
255        fqcn: &str,
256        member_name: &str,
257    ) -> Option<mir_codebase::storage::Location> {
258        let db = self.snapshot_db();
259        crate::db::member_location_via_db(&db, fqcn, member_name)
260    }
261
262    /// Legacy: look up a top-level symbol location.
263    ///
264    /// Prefer [`Self::definition_of`] with [`crate::Symbol`].
265    #[doc(hidden)]
266    pub fn symbol_location(&self, symbol: &str) -> Option<mir_codebase::storage::Location> {
267        let db = self.snapshot_db();
268        db.lookup_class_node(symbol)
269            .filter(|n| n.active(&db))
270            .and_then(|n| n.location(&db))
271            .or_else(|| {
272                db.lookup_function_node(symbol)
273                    .filter(|n| n.active(&db))
274                    .and_then(|n| n.location(&db))
275            })
276    }
277
278    /// Legacy: raw reference locations as `(file, line, col_start, col_end)`.
279    ///
280    /// Prefer [`Self::references_to`] which returns `(Arc<str>, Range)` pairs
281    /// and takes a strongly-typed [`crate::Symbol`].
282    #[doc(hidden)]
283    pub fn reference_locations(&self, symbol: &str) -> Vec<(Arc<str>, u32, u16, u16)> {
284        let db = self.snapshot_db();
285        db.reference_locations(symbol)
286    }
287
288    /// Resolve a symbol to its declaration location.
289    ///
290    /// Mirrors [`crate::AnalysisSession::definition_of`].
291    pub fn definition_of(
292        &self,
293        symbol: &crate::Symbol,
294    ) -> Result<mir_codebase::storage::Location, crate::SymbolLookupError> {
295        let db = self.snapshot_db();
296        match symbol {
297            crate::Symbol::Class(fqcn) => {
298                let node = db
299                    .lookup_class_node(fqcn.as_ref())
300                    .filter(|n| n.active(&db))
301                    .ok_or(crate::SymbolLookupError::NotFound)?;
302                node.location(&db)
303                    .ok_or(crate::SymbolLookupError::NoSourceLocation)
304            }
305            crate::Symbol::Function(fqn) => {
306                let node = db
307                    .lookup_function_node(fqn.as_ref())
308                    .filter(|n| n.active(&db))
309                    .ok_or(crate::SymbolLookupError::NotFound)?;
310                node.location(&db)
311                    .ok_or(crate::SymbolLookupError::NoSourceLocation)
312            }
313            crate::Symbol::Method { class, name }
314            | crate::Symbol::Property { class, name }
315            | crate::Symbol::ClassConstant { class, name } => {
316                crate::db::member_location_via_db(&db, class, name)
317                    .ok_or(crate::SymbolLookupError::NotFound)
318            }
319            crate::Symbol::GlobalConstant(_) => Err(crate::SymbolLookupError::NoSourceLocation),
320        }
321    }
322
323    /// All recorded references to a symbol, as `(file, range)` pairs.
324    ///
325    /// Mirrors [`crate::AnalysisSession::references_to`].
326    pub fn references_to(&self, symbol: &crate::Symbol) -> Vec<(Arc<str>, crate::Range)> {
327        let db = self.snapshot_db();
328        let key = symbol.codebase_key();
329        db.reference_locations(&key)
330            .into_iter()
331            .map(|(file, line, col_start, col_end)| {
332                let range = crate::Range {
333                    start: crate::Position {
334                        line,
335                        column: col_start as u32,
336                    },
337                    end: crate::Position {
338                        line,
339                        column: col_end as u32,
340                    },
341                };
342                (file, range)
343            })
344            .collect()
345    }
346
347    /// Load PHP built-in stubs. Called automatically by `analyze` if not done yet.
348    /// Stubs are filtered against the configured target PHP version (or
349    /// `PhpVersion::LATEST` if none was set).
350    pub fn load_stubs(&self) {
351        let php_version = self.resolved_php_version();
352
353        // Load all built-in stubs for the configured PHP version
354        let paths: Vec<&'static str> = crate::stubs::stub_files().iter().map(|&(p, _)| p).collect();
355        self.shared_db.ingest_stub_paths(&paths, php_version);
356
357        // Load user-configured stubs
358        self.shared_db
359            .ingest_user_stubs(&self.stub_files, &self.stub_dirs);
360    }
361
362    fn collect_and_ingest_source(&self, file: Arc<str>, src: &str) -> FileDefinitions {
363        self.shared_db.collect_and_ingest_file(file, src)
364    }
365
366    /// Run the full analysis pipeline on a set of file paths.
367    pub fn analyze(&self, paths: &[PathBuf]) -> AnalysisResult {
368        let mut all_issues = Vec::new();
369
370        // ---- Load PHP built-in stubs (before Pass 1 so user code can override)
371        self.load_stubs();
372
373        // ---- Pass 1: read files in parallel ----------------------------------
374        let parsed_files: Vec<ParsedProjectFile> = paths
375            .par_iter()
376            .filter_map(|path| match std::fs::read_to_string(path) {
377                Ok(src) => {
378                    let file = Arc::from(path.to_string_lossy().as_ref());
379                    Some(ParsedProjectFile::new(file, Arc::from(src)))
380                }
381                Err(e) => {
382                    eprintln!("Cannot read {}: {}", path.display(), e);
383                    None
384                }
385            })
386            .collect();
387
388        let file_data: Vec<(Arc<str>, Arc<str>)> = parsed_files
389            .iter()
390            .map(|parsed| (parsed.file.clone(), parsed.source.clone()))
391            .collect();
392
393        // ---- Pre-Pass-2 invalidation: evict dependents of changed files ------
394        if let Some(cache) = &self.cache {
395            let changed: Vec<String> = file_data
396                .par_iter()
397                .filter_map(|(f, src)| {
398                    let h = hash_content(src.as_ref());
399                    if cache.get(f, &h).is_none() {
400                        Some(f.to_string())
401                    } else {
402                        None
403                    }
404                })
405                .collect();
406            if !changed.is_empty() {
407                cache.evict_with_dependents(&changed);
408            }
409        }
410
411        // ---- Register Salsa source inputs for incremental follow-up calls ----
412        {
413            let mut guard = self.shared_db.salsa.lock();
414            for parsed in &parsed_files {
415                guard.upsert_source_file(parsed.file.clone(), parsed.source.clone());
416            }
417        }
418
419        // ---- Pass 1: definition collection from the already-parsed AST -------
420        let file_defs: Vec<FileDefinitions> = parsed_files
421            .par_iter()
422            .map(|parsed| {
423                let parse_result = parsed.parsed();
424                let mut all_issues: Vec<Issue> = parse_result
425                    .errors
426                    .iter()
427                    .map(|err| {
428                        Issue::new(
429                            mir_issues::IssueKind::ParseError {
430                                message: err.to_string(),
431                            },
432                            mir_issues::Location {
433                                file: parsed.file.clone(),
434                                line: 1,
435                                line_end: 1,
436                                col_start: 0,
437                                col_end: 0,
438                            },
439                        )
440                    })
441                    .collect();
442                let collector = crate::collector::DefinitionCollector::new_for_slice(
443                    parsed.file.clone(),
444                    parsed.source(),
445                    &parse_result.source_map,
446                );
447                let (slice, collector_issues) = collector.collect_slice(&parse_result.program);
448                all_issues.extend(collector_issues);
449                FileDefinitions {
450                    slice: Arc::new(slice),
451                    issues: Arc::new(all_issues),
452                }
453            })
454            .collect();
455
456        let mut files_with_parse_errors: std::collections::HashSet<Arc<str>> =
457            std::collections::HashSet::new();
458        {
459            let mut guard = self.shared_db.salsa.lock();
460            for defs in file_defs {
461                for issue in defs.issues.iter() {
462                    if matches!(issue.kind, mir_issues::IssueKind::ParseError { .. }) {
463                        files_with_parse_errors.insert(issue.location.file.clone());
464                    }
465                }
466                guard.ingest_stub_slice(&defs.slice);
467                all_issues.extend(Arc::unwrap_or_clone(defs.issues));
468            }
469        }
470
471        // ---- Lazy-load unknown classes via PSR-4 (issue #50) ----------------
472        if let Some(psr4) = &self.psr4 {
473            self.lazy_load_missing_classes(psr4.clone(), &mut all_issues);
474        }
475
476        // ---- Resolve @psalm-import-type declarations now that all Pass 1
477        // classes (including their `type_aliases`) are populated.
478        // ---- Build reverse dep graph and persist it for the next run ---------
479        if let Some(cache) = &self.cache {
480            let db_snapshot = {
481                let guard = self.shared_db.salsa.lock();
482                guard.clone()
483            };
484            let rev = build_reverse_deps(&db_snapshot);
485            cache.set_reverse_deps(rev);
486        }
487
488        // ---- Class-level checks (M11) ----------------------------------------
489        let analyzed_file_set: std::collections::HashSet<std::sync::Arc<str>> =
490            file_data.iter().map(|(f, _)| f.clone()).collect();
491        {
492            let class_db = {
493                let guard = self.shared_db.salsa.lock();
494                guard.clone()
495            };
496            let class_issues =
497                crate::class::ClassAnalyzer::with_files(&class_db, analyzed_file_set, &file_data)
498                    .analyze_all();
499            all_issues.extend(class_issues);
500        }
501
502        // ---- Inference pre-sweep: prime inferred return types ----------------
503        // Run an inference-only Pass 2 over each file in parallel using direct
504        // rayon (no Salsa tracked-query overhead per file), collect the results,
505        // then commit them to Salsa INPUT fields.  The full Pass 2 then reads
506        // those fields via O(1) accesses with no lock contention.
507        //
508        // We use `Pass2Driver::new_inference_only` directly rather than the
509        // Salsa-tracked `infer_file_return_types` query so that the batch path
510        // avoids per-file Salsa lock acquisition and memo-table overhead on every
511        // cold start.  `infer_file_return_types` is reserved for the incremental
512        // LSP path (AnalysisSession) where Salsa cache hits across edits matter.
513        {
514            let db_priming = {
515                let guard = self.shared_db.salsa.lock();
516                guard.clone()
517            };
518            let php_version = self.resolved_php_version();
519            let functions = std::sync::Arc::new(std::sync::Mutex::new(Vec::new()));
520            let methods = std::sync::Arc::new(std::sync::Mutex::new(Vec::new()));
521            rayon::in_place_scope(|s| {
522                for parsed in &parsed_files {
523                    if files_with_parse_errors.contains(&parsed.file) {
524                        continue;
525                    }
526                    let db = db_priming.clone();
527                    let functions = std::sync::Arc::clone(&functions);
528                    let methods = std::sync::Arc::clone(&methods);
529                    s.spawn(move |_| {
530                        let driver = Pass2Driver::new_inference_only(
531                            &db as &dyn crate::db::MirDatabase,
532                            php_version,
533                        );
534                        let parse_result = parsed.parsed();
535                        driver.analyze_bodies(
536                            &parse_result.program,
537                            parsed.file.clone(),
538                            parsed.source(),
539                            &parse_result.source_map,
540                        );
541                        let inferred = driver.take_inferred_types();
542                        functions.lock().unwrap().extend(inferred.functions);
543                        methods.lock().unwrap().extend(inferred.methods);
544                    });
545                }
546            });
547            // Drop db_priming before committing: commit_inferred_return_types calls
548            // salsa setters which go through cancel_others, which waits until the
549            // storage strong-count drops to 1. db_priming is a sibling clone that
550            // keeps the count at 2, causing a deadlock if it outlives the commit.
551            drop(db_priming);
552            let functions = std::sync::Arc::try_unwrap(functions)
553                .map(|m| m.into_inner().unwrap())
554                .unwrap_or_default();
555            let methods = std::sync::Arc::try_unwrap(methods)
556                .map(|m| m.into_inner().unwrap())
557                .unwrap_or_default();
558            let mut guard = self.shared_db.salsa.lock();
559            guard.commit_inferred_return_types(functions, methods);
560        }
561
562        let db_main = {
563            let guard = self.shared_db.salsa.lock();
564            guard.clone()
565        };
566
567        // ---- Pass 2: analyze function/method bodies in parallel (M14) --------
568        let pass2_results: Vec<(Vec<Issue>, Vec<crate::symbol::ResolvedSymbol>)> = parsed_files
569            .par_iter()
570            .filter(|parsed| !files_with_parse_errors.contains(&parsed.file))
571            .map_with(db_main, |db, parsed| {
572                let driver =
573                    Pass2Driver::new(&*db as &dyn MirDatabase, self.resolved_php_version());
574                let result = if let Some(cache) = &self.cache {
575                    let h = hash_content(parsed.source());
576                    if let Some((cached_issues, ref_locs)) = cache.get(&parsed.file, &h) {
577                        db.replay_reference_locations(parsed.file.clone(), &ref_locs);
578                        (cached_issues, Vec::new())
579                    } else {
580                        let parse_result = parsed.parsed();
581                        let (issues, symbols) = driver.analyze_bodies(
582                            &parse_result.program,
583                            parsed.file.clone(),
584                            parsed.source(),
585                            &parse_result.source_map,
586                        );
587                        let ref_locs = extract_reference_locations(&*db, &parsed.file);
588                        cache.put(&parsed.file, h, issues.clone(), ref_locs);
589                        (issues, symbols)
590                    }
591                } else {
592                    let parse_result = parsed.parsed();
593                    driver.analyze_bodies(
594                        &parse_result.program,
595                        parsed.file.clone(),
596                        parsed.source(),
597                        &parse_result.source_map,
598                    )
599                };
600                if let Some(cb) = &self.on_file_done {
601                    cb();
602                }
603                result
604            })
605            .collect();
606
607        let mut all_symbols = Vec::new();
608        for (issues, symbols) in pass2_results {
609            all_issues.extend(issues);
610            all_symbols.extend(symbols);
611        }
612
613        // ---- Post-Pass-2 lazy loading: FQCNs used without `use` imports ------
614        // FQCNs in function/method bodies aren't visible until Pass 2 runs, so
615        // the pre-Pass-2 lazy load misses them.  We collect UndefinedClass names,
616        // resolve them via PSR-4, load those files, re-finalize, then re-analyze
617        // only the affected files to clear the false positives.
618        if let Some(psr4) = &self.psr4 {
619            self.lazy_load_from_body_issues(
620                psr4.clone(),
621                &file_data,
622                &files_with_parse_errors,
623                &mut all_issues,
624                &mut all_symbols,
625            );
626        }
627
628        // Persist cache hits/misses to disk
629        if let Some(cache) = &self.cache {
630            cache.flush();
631        }
632
633        // ---- Compact the reference index ------------------------------------
634        // ---- Dead-code detection (M18) --------------------------------------
635        if self.find_dead_code {
636            let salsa = self.shared_db.salsa.lock();
637            let dead_code_issues = crate::dead_code::DeadCodeAnalyzer::new(&*salsa).analyze();
638            drop(salsa);
639            all_issues.extend(dead_code_issues);
640        }
641
642        AnalysisResult::build(all_issues, std::collections::HashMap::new(), all_symbols)
643    }
644
645    fn lazy_load_missing_classes(
646        &self,
647        psr4: Arc<crate::composer::Psr4Map>,
648        all_issues: &mut Vec<Issue>,
649    ) {
650        use std::collections::HashSet;
651        use std::sync::Arc;
652
653        let max_depth = 10;
654        let mut loaded: HashSet<String> = HashSet::new();
655        let mut scanned: HashSet<Arc<str>> = HashSet::new();
656
657        for _ in 0..max_depth {
658            let mut to_load: Vec<(String, PathBuf)> = Vec::new();
659
660            let mut try_queue = |fqcn: &str| {
661                if !self.type_exists(fqcn) && !loaded.contains(fqcn) {
662                    if let Some(path) = psr4.resolve(fqcn) {
663                        to_load.push((fqcn.to_string(), path));
664                    }
665                }
666            };
667
668            // Collect inheritance and import candidates. Only scan classes that
669            // haven't been scanned yet (optimization: avoid redundant full scans).
670            let mut inheritance_candidates = Vec::new();
671            let import_candidates = {
672                let guard = self.shared_db.salsa.lock();
673                let db = &*guard;
674                for fqcn in db.active_class_node_fqcns() {
675                    if scanned.contains(fqcn.as_ref()) {
676                        continue;
677                    }
678                    let Some(node) = db.lookup_class_node(&fqcn) else {
679                        continue;
680                    };
681                    scanned.insert(fqcn.clone());
682                    if node.is_interface(db) {
683                        for parent in node.extends(db).iter() {
684                            inheritance_candidates.push(parent.to_string());
685                        }
686                    } else if node.is_enum(db) {
687                        for iface in node.interfaces(db).iter() {
688                            inheritance_candidates.push(iface.to_string());
689                        }
690                    } else if node.is_trait(db) {
691                        for used in node.traits(db).iter() {
692                            inheritance_candidates.push(used.to_string());
693                        }
694                    } else {
695                        if let Some(parent) = node.parent(db) {
696                            inheritance_candidates.push(parent.to_string());
697                        }
698                        for iface in node.interfaces(db).iter() {
699                            inheritance_candidates.push(iface.to_string());
700                        }
701                    }
702                }
703                db.file_import_snapshots()
704                    .into_iter()
705                    .flat_map(|(_, imports)| imports.into_values())
706                    .collect::<Vec<_>>()
707            };
708            for fqcn in inheritance_candidates {
709                try_queue(&fqcn);
710            }
711
712            // Also lazy-load any type referenced via `use` imports that isn't yet
713            // in the codebase (covers enums and classes used only in type hints or
714            // static calls, which never appear in the inheritance scan above).
715            for fqcn in import_candidates {
716                try_queue(&fqcn);
717            }
718
719            if to_load.is_empty() {
720                break;
721            }
722
723            for (fqcn, path) in to_load {
724                loaded.insert(fqcn);
725                if let Ok(src) = std::fs::read_to_string(&path) {
726                    let file: Arc<str> = Arc::from(path.to_string_lossy().as_ref());
727                    let defs = self.collect_and_ingest_source(file, &src);
728                    all_issues.extend(Arc::unwrap_or_clone(defs.issues));
729                }
730            }
731        }
732    }
733
734    fn lazy_load_from_body_issues(
735        &self,
736        psr4: Arc<crate::composer::Psr4Map>,
737        file_data: &[(Arc<str>, Arc<str>)],
738        files_with_parse_errors: &HashSet<Arc<str>>,
739        all_issues: &mut Vec<Issue>,
740        all_symbols: &mut Vec<crate::symbol::ResolvedSymbol>,
741    ) {
742        use mir_issues::IssueKind;
743
744        let max_depth = 5;
745        let mut loaded: HashSet<String> = HashSet::new();
746
747        for _ in 0..max_depth {
748            // Deduplicate by FQCN: HashMap prevents loading the same class twice
749            // when multiple files share the same UndefinedClass diagnostic.
750            let mut to_load: HashMap<String, PathBuf> = HashMap::new();
751
752            for issue in all_issues.iter() {
753                if let IssueKind::UndefinedClass { name } = &issue.kind {
754                    if !self.type_exists(name) && !loaded.contains(name) {
755                        if let Some(path) = psr4.resolve(name) {
756                            to_load.entry(name.clone()).or_insert(path);
757                        }
758                    }
759                }
760            }
761
762            if to_load.is_empty() {
763                break;
764            }
765
766            loaded.extend(to_load.keys().cloned());
767
768            for path in to_load.values() {
769                if let Ok(src) = std::fs::read_to_string(path) {
770                    let file: Arc<str> = Arc::from(path.to_string_lossy().as_ref());
771                    let _ = self.collect_and_ingest_source(file, &src);
772                }
773            }
774
775            // Load inheritance deps of newly-added types and finalize.
776            // This covers e.g. `class Helper extends \App\Base` where Base is
777            // also not in the initial file set.
778            self.lazy_load_missing_classes(psr4.clone(), all_issues);
779
780            // Re-analyze every file that has an UndefinedClass for a type now
781            // present in the codebase — covers both direct and transitive loads.
782            let files_to_reanalyze: HashSet<Arc<str>> = all_issues
783                .iter()
784                .filter_map(|i| {
785                    if let IssueKind::UndefinedClass { name } = &i.kind {
786                        if self.type_exists(name) {
787                            return Some(i.location.file.clone());
788                        }
789                    }
790                    None
791                })
792                .collect();
793
794            if files_to_reanalyze.is_empty() {
795                break;
796            }
797
798            all_issues.retain(|i| !files_to_reanalyze.contains(&i.location.file));
799            all_symbols.retain(|s| !files_to_reanalyze.contains(&s.file));
800
801            let db_full = {
802                let guard = self.shared_db.salsa.lock();
803                guard.clone()
804            };
805
806            let reanalysis: Vec<(Vec<Issue>, Vec<crate::symbol::ResolvedSymbol>)> = file_data
807                .par_iter()
808                .filter(|(f, _)| {
809                    !files_with_parse_errors.contains(f) && files_to_reanalyze.contains(f)
810                })
811                .map_with(db_full, |db, (file, src)| {
812                    let driver =
813                        Pass2Driver::new(&*db as &dyn MirDatabase, self.resolved_php_version());
814                    let arena = crate::arena::create_parse_arena(src.len());
815                    let parsed = php_rs_parser::parse(&arena, src);
816                    driver.analyze_bodies(&parsed.program, file.clone(), src, &parsed.source_map)
817                })
818                .collect();
819
820            for (issues, symbols) in reanalysis {
821                all_issues.extend(issues);
822                all_symbols.extend(symbols);
823            }
824        }
825    }
826
827    /// Re-analyze a single file within the existing codebase.
828    ///
829    /// This is the incremental analysis API for LSP:
830    /// 1. Removes old definitions from this file
831    /// 2. Re-runs Pass 1 (definition collection) on the new content
832    /// 3. Resolves any newly-collected `@psalm-import-type` declarations
833    /// 4. Re-runs Pass 2 (body analysis) on this file
834    /// 5. Returns the analysis result for this file only
835    pub fn re_analyze_file(&self, file_path: &str, new_content: &str) -> AnalysisResult {
836        // Fast path: content unchanged and cache has a valid entry — skip full re-analysis.
837        if let Some(cache) = &self.cache {
838            let h = hash_content(new_content);
839            if let Some((issues, ref_locs)) = cache.get(file_path, &h) {
840                let file: Arc<str> = Arc::from(file_path);
841                let guard = self.shared_db.salsa.lock();
842                guard.replay_reference_locations(file, &ref_locs);
843                return AnalysisResult::build(issues, HashMap::new(), Vec::new());
844            }
845        }
846
847        let file: Arc<str> = Arc::from(file_path);
848
849        {
850            let mut guard = self.shared_db.salsa.lock();
851            guard.remove_file_definitions(file_path);
852        }
853
854        // --- Salsa-backed Pass 1: memoized parse + definition collection ------
855        let file_defs = {
856            let mut guard = self.shared_db.salsa.lock();
857            let salsa_file = guard.upsert_source_file(file.clone(), Arc::from(new_content));
858            collect_file_definitions(&*guard, salsa_file)
859        };
860
861        let mut all_issues: Vec<Issue> = Arc::unwrap_or_clone(file_defs.issues.clone());
862
863        // --- S2 + Pass 2: hold the Salsa lock for ClassNode upserts and body
864        // analysis so the db reference is live during Pass 2 (S5).
865        let symbols = {
866            let mut guard = self.shared_db.salsa.lock();
867
868            guard.ingest_stub_slice(&file_defs.slice);
869
870            // Resolve any newly-collected @psalm-import-type declarations so
871            // Pass 2 reads the imported aliases out of `type_aliases`.
872            // Re-parse in the arena so Pass 2 can walk the AST.
873            let arena = bumpalo::Bump::new();
874            let parsed = php_rs_parser::parse(&arena, new_content);
875
876            if parsed.errors.is_empty() {
877                let db_ref: &dyn MirDatabase = &*guard;
878                let driver = Pass2Driver::new(db_ref, self.resolved_php_version());
879                let (body_issues, symbols) = driver.analyze_bodies(
880                    &parsed.program,
881                    file.clone(),
882                    new_content,
883                    &parsed.source_map,
884                );
885                all_issues.extend(body_issues);
886                symbols
887            } else {
888                Vec::new()
889            }
890        };
891
892        if let Some(cache) = &self.cache {
893            let h = hash_content(new_content);
894            cache.evict_with_dependents(&[file_path.to_string()]);
895            let guard = self.shared_db.salsa.lock();
896            let ref_locs = extract_reference_locations(&*guard, &file);
897            cache.put(file_path, h, all_issues.clone(), ref_locs);
898        }
899
900        AnalysisResult::build(all_issues, HashMap::new(), symbols)
901    }
902
903    /// Analyze a PHP source string without a real file path.
904    /// Useful for tests and LSP single-file mode.
905    pub fn analyze_source(source: &str) -> AnalysisResult {
906        let analyzer = ProjectAnalyzer::new();
907        let file: Arc<str> = Arc::from("<source>");
908        let mut db = MirDb::default();
909        for slice in crate::stubs::builtin_stub_slices_for_version(analyzer.resolved_php_version())
910        {
911            db.ingest_stub_slice(&slice);
912        }
913        let salsa_file = SourceFile::new(&db, file.clone(), Arc::from(source));
914        let file_defs = collect_file_definitions(&db, salsa_file);
915        db.ingest_stub_slice(&file_defs.slice);
916        let mut all_issues = Arc::unwrap_or_clone(file_defs.issues);
917        if all_issues
918            .iter()
919            .any(|issue| matches!(issue.kind, mir_issues::IssueKind::ParseError { .. }))
920        {
921            return AnalysisResult::build(all_issues, std::collections::HashMap::new(), Vec::new());
922        }
923        let mut type_envs = std::collections::HashMap::new();
924        let mut all_symbols = Vec::new();
925        let arena = bumpalo::Bump::new();
926        let result = php_rs_parser::parse(&arena, source);
927
928        let driver = Pass2Driver::new(&db, analyzer.resolved_php_version());
929        all_issues.extend(driver.analyze_bodies_typed(
930            &result.program,
931            file.clone(),
932            source,
933            &result.source_map,
934            &mut type_envs,
935            &mut all_symbols,
936        ));
937        AnalysisResult::build(all_issues, type_envs, all_symbols)
938    }
939
940    /// Discover all `.php` files under a directory, recursively.
941    pub fn discover_files(root: &Path) -> Vec<PathBuf> {
942        if root.is_file() {
943            return vec![root.to_path_buf()];
944        }
945        let mut files = Vec::new();
946        collect_php_files(root, &mut files);
947        files
948    }
949
950    /// Pass 1 only: collect type definitions from `paths` into the codebase without
951    /// analyzing method bodies or emitting issues. Used to load vendor types.
952    pub fn collect_types_only(&self, paths: &[PathBuf]) {
953        let file_data: Vec<(Arc<str>, Arc<str>)> = paths
954            .par_iter()
955            .filter_map(|path| {
956                let src = std::fs::read_to_string(path).ok()?;
957                Some((
958                    Arc::from(path.to_string_lossy().as_ref()),
959                    Arc::<str>::from(src),
960                ))
961            })
962            .collect();
963
964        let source_files: Vec<SourceFile> = {
965            let mut guard = self.shared_db.salsa.lock();
966            file_data
967                .iter()
968                .map(|(file, src)| guard.upsert_source_file(file.clone(), src.clone()))
969                .collect()
970        };
971
972        let db_pass1 = {
973            let guard = self.shared_db.salsa.lock();
974            guard.clone()
975        };
976
977        let file_defs: Vec<FileDefinitions> = source_files
978            .par_iter()
979            .map_with(db_pass1, |db, salsa_file| {
980                collect_file_definitions_uncached(&*db, *salsa_file)
981            })
982            .collect();
983
984        let mut guard = self.shared_db.salsa.lock();
985        for defs in file_defs {
986            guard.ingest_stub_slice(&defs.slice);
987        }
988        drop(guard);
989
990        // Print profiling statistics for the collection phase.
991        crate::collector::print_collector_stats();
992    }
993}
994
995impl Default for ProjectAnalyzer {
996    fn default() -> Self {
997        Self::new()
998    }
999}
1000
1001pub(crate) fn collect_php_files(dir: &Path, out: &mut Vec<PathBuf>) {
1002    if let Ok(entries) = std::fs::read_dir(dir) {
1003        for entry in entries.flatten() {
1004            if entry.file_type().map(|ft| ft.is_symlink()).unwrap_or(false) {
1005                continue;
1006            }
1007            let path = entry.path();
1008            if path.is_dir() {
1009                let name = path.file_name().and_then(|n| n.to_str()).unwrap_or("");
1010                if matches!(
1011                    name,
1012                    "vendor" | ".git" | "node_modules" | ".cache" | ".pnpm-store"
1013                ) {
1014                    continue;
1015                }
1016                collect_php_files(&path, out);
1017            } else if path.extension().and_then(|e| e.to_str()) == Some("php") {
1018                out.push(path);
1019            }
1020        }
1021    }
1022}
1023
1024// build_reverse_deps
1025
1026fn build_reverse_deps(db: &dyn crate::db::MirDatabase) -> HashMap<String, HashSet<String>> {
1027    let mut reverse: HashMap<String, HashSet<String>> = HashMap::new();
1028
1029    let mut add_edge = |symbol: &str, dependent_file: &str| {
1030        if let Some(defining_file) = db.symbol_defining_file(symbol) {
1031            let def = defining_file.as_ref().to_string();
1032            if def != dependent_file {
1033                reverse
1034                    .entry(def)
1035                    .or_default()
1036                    .insert(dependent_file.to_string());
1037            }
1038        }
1039    };
1040
1041    for (file, imports) in db.file_import_snapshots() {
1042        let file = file.as_ref().to_string();
1043        for fqcn in imports.values() {
1044            add_edge(fqcn, &file);
1045        }
1046    }
1047
1048    for fqcn in db.active_class_node_fqcns() {
1049        // Only true classes contribute class-direction edges in this loop.
1050        // Interface / trait / enum edges are not currently emitted here —
1051        // this function only ever read classes.
1052        let kind = match crate::db::class_kind_via_db(db, fqcn.as_ref()) {
1053            Some(k) if !k.is_interface && !k.is_trait && !k.is_enum => k,
1054            _ => continue,
1055        };
1056        let _ = kind;
1057        let Some(file) = db
1058            .symbol_defining_file(fqcn.as_ref())
1059            .map(|f| f.as_ref().to_string())
1060        else {
1061            continue;
1062        };
1063
1064        let Some(node) = db.lookup_class_node(fqcn.as_ref()) else {
1065            continue;
1066        };
1067        if let Some(parent) = node.parent(db) {
1068            add_edge(parent.as_ref(), &file);
1069        }
1070        for iface in node.interfaces(db).iter() {
1071            add_edge(iface.as_ref(), &file);
1072        }
1073        for tr in node.traits(db).iter() {
1074            add_edge(tr.as_ref(), &file);
1075        }
1076    }
1077
1078    // Also wire in bare-FQN references from Pass 2 (new \Foo(), \Foo::method(), \foo())
1079    // that do not appear in use-import statements.
1080    for (ref_file, symbol_key) in db.all_reference_location_pairs() {
1081        let file_str = ref_file.as_ref().to_string();
1082        let lookup: &str = match symbol_key.split_once("::") {
1083            Some((class, _)) => class,
1084            None => &symbol_key,
1085        };
1086        add_edge(lookup, &file_str);
1087    }
1088
1089    reverse
1090}
1091
1092fn extract_reference_locations(
1093    db: &dyn crate::db::MirDatabase,
1094    file: &Arc<str>,
1095) -> Vec<(String, u32, u16, u16)> {
1096    db.extract_file_reference_locations(file.as_ref())
1097        .into_iter()
1098        .map(|(sym, line, col_start, col_end)| (sym.to_string(), line, col_start, col_end))
1099        .collect()
1100}
1101
1102pub struct AnalysisResult {
1103    pub issues: Vec<Issue>,
1104    #[doc(hidden)]
1105    pub type_envs: std::collections::HashMap<crate::type_env::ScopeId, crate::type_env::TypeEnv>,
1106    /// Per-expression resolved symbols from Pass 2, sorted by file path.
1107    pub symbols: Vec<crate::symbol::ResolvedSymbol>,
1108    /// Maps each file path to the contiguous range within `symbols` that belongs
1109    /// to it. Built once after analysis; allows `symbol_at` to scan only the
1110    /// relevant file's slice rather than the entire codebase-wide vector.
1111    symbols_by_file: HashMap<Arc<str>, std::ops::Range<usize>>,
1112}
1113
1114impl AnalysisResult {
1115    fn build(
1116        issues: Vec<Issue>,
1117        type_envs: std::collections::HashMap<crate::type_env::ScopeId, crate::type_env::TypeEnv>,
1118        mut symbols: Vec<crate::symbol::ResolvedSymbol>,
1119    ) -> Self {
1120        symbols.sort_unstable_by(|a, b| a.file.as_ref().cmp(b.file.as_ref()));
1121        let mut symbols_by_file: HashMap<Arc<str>, std::ops::Range<usize>> = HashMap::new();
1122        let mut i = 0;
1123        while i < symbols.len() {
1124            let file = Arc::clone(&symbols[i].file);
1125            let start = i;
1126            while i < symbols.len() && symbols[i].file == file {
1127                i += 1;
1128            }
1129            symbols_by_file.insert(file, start..i);
1130        }
1131        Self {
1132            issues,
1133            type_envs,
1134            symbols,
1135            symbols_by_file,
1136        }
1137    }
1138}
1139
1140impl AnalysisResult {
1141    pub fn error_count(&self) -> usize {
1142        self.issues
1143            .iter()
1144            .filter(|i| i.severity == mir_issues::Severity::Error)
1145            .count()
1146    }
1147
1148    pub fn warning_count(&self) -> usize {
1149        self.issues
1150            .iter()
1151            .filter(|i| i.severity == mir_issues::Severity::Warning)
1152            .count()
1153    }
1154
1155    /// Group issues by source file.
1156    pub fn issues_by_file(&self) -> HashMap<std::sync::Arc<str>, Vec<&Issue>> {
1157        let mut map: HashMap<std::sync::Arc<str>, Vec<&Issue>> = HashMap::new();
1158        for issue in &self.issues {
1159            map.entry(issue.location.file.clone())
1160                .or_default()
1161                .push(issue);
1162        }
1163        map
1164    }
1165
1166    /// Count issues by severity. Returned as `(severity, count)` pairs sorted
1167    /// by severity (Info, Warning, Error).
1168    pub fn count_by_severity(&self) -> Vec<(mir_issues::Severity, usize)> {
1169        let mut counts: std::collections::BTreeMap<mir_issues::Severity, usize> =
1170            std::collections::BTreeMap::new();
1171        for issue in &self.issues {
1172            *counts.entry(issue.severity).or_insert(0) += 1;
1173        }
1174        counts.into_iter().collect()
1175    }
1176
1177    /// Total number of issues across all severities and files.
1178    pub fn total_issue_count(&self) -> usize {
1179        self.issues.len()
1180    }
1181
1182    /// Iterator of issues matching `predicate`. Useful for filtering by
1183    /// severity, kind, or file without materializing intermediate vectors.
1184    pub fn filter_issues<'a, F>(&'a self, predicate: F) -> impl Iterator<Item = &'a Issue>
1185    where
1186        F: Fn(&Issue) -> bool + 'a,
1187    {
1188        self.issues.iter().filter(move |i| predicate(i))
1189    }
1190
1191    /// Return the innermost resolved symbol whose span contains `byte_offset`
1192    /// in `file`, or `None` if no symbol was recorded at that position.
1193    pub fn symbol_at(
1194        &self,
1195        file: &str,
1196        byte_offset: u32,
1197    ) -> Option<&crate::symbol::ResolvedSymbol> {
1198        let range = self.symbols_by_file.get(file)?;
1199        self.symbols[range.clone()]
1200            .iter()
1201            .filter(|s| s.span.start <= byte_offset && byte_offset < s.span.end)
1202            .min_by_key(|s| s.span.end - s.span.start)
1203    }
1204}