Skip to main content

mir_analyzer/
project.rs

1/// Project-level orchestration: file discovery, pass 1, pass 2.
2use std::mem::ManuallyDrop;
3use std::path::{Path, PathBuf};
4use std::sync::Arc;
5
6use parking_lot::Mutex;
7
8use rayon::prelude::*;
9
10use std::collections::{HashMap, HashSet};
11
12use crate::cache::{hash_content, AnalysisCache};
13use crate::db::{
14    collect_file_definitions, collect_file_definitions_uncached, FileDefinitions, MirDatabase,
15    MirDb, SourceFile,
16};
17use crate::pass2::Pass2Driver;
18use crate::php_version::PhpVersion;
19use mir_issues::Issue;
20use mir_types::Union;
21use salsa::Setter as _;
22
23pub use crate::pass2::merge_return_types;
24
25pub struct ProjectAnalyzer {
26    /// Optional cache — when `Some`, Pass 2 results are read/written per file.
27    pub cache: Option<AnalysisCache>,
28    /// Called once after each file completes Pass 2 (used for progress reporting).
29    pub on_file_done: Option<Arc<dyn Fn() + Send + Sync>>,
30    /// PSR-4 autoloader mapping from composer.json, if available.
31    pub psr4: Option<Arc<crate::composer::Psr4Map>>,
32    /// Whether stubs have already been loaded (to avoid double-loading).
33    stubs_loaded: std::sync::atomic::AtomicBool,
34    /// When true, run dead code detection at the end of analysis.
35    pub find_dead_code: bool,
36    /// Target PHP language version. `None` means "not configured"; resolved to
37    /// `PhpVersion::LATEST` when passed down to `StatementsAnalyzer`.
38    pub php_version: Option<PhpVersion>,
39    /// Additional stub files to parse before analysis (absolute paths).
40    pub stub_files: Vec<PathBuf>,
41    /// Additional stub directories to walk and parse before analysis (absolute paths).
42    pub stub_dirs: Vec<PathBuf>,
43    /// Salsa database for incremental Pass-1 memoization.
44    /// `MirDb` is `Send` but `!Sync` (thread-local query state); `Mutex`
45    /// provides the `Sync` bound rayon requires without needing `T: Sync`.
46    salsa: Mutex<(MirDb, HashMap<Arc<str>, SourceFile>)>,
47}
48
49struct ParsedProjectFile {
50    file: Arc<str>,
51    source: Arc<str>,
52    parsed: ManuallyDrop<php_rs_parser::ParseResult<'static, 'static>>,
53    arena: ManuallyDrop<Box<bumpalo::Bump>>,
54}
55
56impl ParsedProjectFile {
57    fn new(file: Arc<str>, source: Arc<str>) -> Self {
58        let arena = Box::new(crate::arena::create_parse_arena(source.len()));
59        let parsed = php_rs_parser::parse(&arena, &source);
60        // SAFETY: `parsed` borrows from `arena` and `source`, both owned by this
61        // struct and kept alive until `Drop`. `Drop` manually destroys `parsed`
62        // before releasing either owner, so the widened lifetimes never escape.
63        let parsed = unsafe {
64            std::mem::transmute::<
65                php_rs_parser::ParseResult<'_, '_>,
66                php_rs_parser::ParseResult<'static, 'static>,
67            >(parsed)
68        };
69        Self {
70            file,
71            source,
72            parsed: ManuallyDrop::new(parsed),
73            arena: ManuallyDrop::new(arena),
74        }
75    }
76
77    fn source(&self) -> &str {
78        self.source.as_ref()
79    }
80
81    fn parsed(&self) -> &php_rs_parser::ParseResult<'_, '_> {
82        &self.parsed
83    }
84}
85
86impl Drop for ParsedProjectFile {
87    fn drop(&mut self) {
88        unsafe {
89            ManuallyDrop::drop(&mut self.parsed);
90            ManuallyDrop::drop(&mut self.arena);
91        }
92    }
93}
94
95// SAFETY: after construction the parsed AST and source map are read-only. The
96// bump arena is never mutated again; it only owns backing storage for AST nodes
97// and is dropped after all parallel analysis has completed.
98unsafe impl Send for ParsedProjectFile {}
99unsafe impl Sync for ParsedProjectFile {}
100
101impl ProjectAnalyzer {
102    pub fn new() -> Self {
103        Self {
104            cache: None,
105            on_file_done: None,
106            psr4: None,
107            stubs_loaded: std::sync::atomic::AtomicBool::new(false),
108            find_dead_code: false,
109            php_version: None,
110            stub_files: Vec::new(),
111            stub_dirs: Vec::new(),
112            salsa: Mutex::new((MirDb::default(), HashMap::new())),
113        }
114    }
115
116    /// Create a `ProjectAnalyzer` with a disk-backed cache stored under `cache_dir`.
117    pub fn with_cache(cache_dir: &Path) -> Self {
118        Self {
119            cache: Some(AnalysisCache::open(cache_dir)),
120            on_file_done: None,
121            psr4: None,
122            stubs_loaded: std::sync::atomic::AtomicBool::new(false),
123            find_dead_code: false,
124            php_version: None,
125            stub_files: Vec::new(),
126            stub_dirs: Vec::new(),
127            salsa: Mutex::new((MirDb::default(), HashMap::new())),
128        }
129    }
130
131    /// Create a `ProjectAnalyzer` from a project root containing `composer.json`.
132    /// Returns the analyzer (with `psr4` set) and the `Psr4Map` so callers can
133    /// call `map.project_files()` / `map.vendor_files()`.
134    pub fn from_composer(
135        root: &Path,
136    ) -> Result<(Self, crate::composer::Psr4Map), crate::composer::ComposerError> {
137        let map = crate::composer::Psr4Map::from_composer(root)?;
138        let psr4 = Arc::new(map.clone());
139        let analyzer = Self {
140            cache: None,
141            on_file_done: None,
142            psr4: Some(psr4),
143            stubs_loaded: std::sync::atomic::AtomicBool::new(false),
144            find_dead_code: false,
145            php_version: None,
146            stub_files: Vec::new(),
147            stub_dirs: Vec::new(),
148            salsa: Mutex::new((MirDb::default(), HashMap::new())),
149        };
150        Ok((analyzer, map))
151    }
152
153    /// Set the target PHP version.
154    pub fn with_php_version(mut self, version: PhpVersion) -> Self {
155        self.php_version = Some(version);
156        self
157    }
158
159    /// Resolve the configured PHP version, defaulting to `PhpVersion::LATEST`
160    /// when none has been set.
161    fn resolved_php_version(&self) -> PhpVersion {
162        self.php_version.unwrap_or(PhpVersion::LATEST)
163    }
164
165    fn type_exists(&self, fqcn: &str) -> bool {
166        let db = self.snapshot_db();
167        crate::db::type_exists_via_db(&db, fqcn)
168    }
169
170    /// Acquire a cheap clone of the salsa db for a read-only query.
171    /// The lock is held only for the duration of the clone, so concurrent
172    /// readers never serialize on each other or on writes longer than the
173    /// clone itself.
174    fn snapshot_db(&self) -> MirDb {
175        let guard = self.salsa.lock();
176        guard.0.clone()
177    }
178
179    /// Internal: expose the salsa Mutex for unit tests that need a `&dyn MirDatabase`.
180    #[doc(hidden)]
181    pub fn salsa_db_for_test(&self) -> &Mutex<(MirDb, HashMap<Arc<str>, SourceFile>)> {
182        &self.salsa
183    }
184
185    /// Look up the source location of a class member (method, property, or
186    /// class constant / enum case) by walking the inheritance chain through
187    /// the salsa db.  Returns `None` if no member with that name exists, or
188    /// if the member has no recorded location.
189    pub fn member_location(
190        &self,
191        fqcn: &str,
192        member_name: &str,
193    ) -> Option<mir_codebase::storage::Location> {
194        let db = self.snapshot_db();
195        crate::db::member_location_via_db(&db, fqcn, member_name)
196    }
197
198    pub fn symbol_location(&self, symbol: &str) -> Option<mir_codebase::storage::Location> {
199        let db = self.snapshot_db();
200        db.lookup_class_node(symbol)
201            .filter(|n| n.active(&db))
202            .and_then(|n| n.location(&db))
203            .or_else(|| {
204                db.lookup_function_node(symbol)
205                    .filter(|n| n.active(&db))
206                    .and_then(|n| n.location(&db))
207            })
208    }
209
210    pub fn reference_locations(&self, symbol: &str) -> Vec<(Arc<str>, u32, u16, u16)> {
211        let db = self.snapshot_db();
212        db.reference_locations(symbol)
213    }
214
215    /// Load PHP built-in stubs. Called automatically by `analyze` if not done yet.
216    /// Stubs are filtered against the configured target PHP version (or
217    /// `PhpVersion::LATEST` if none was set).
218    pub fn load_stubs(&self) {
219        if !self
220            .stubs_loaded
221            .swap(true, std::sync::atomic::Ordering::SeqCst)
222        {
223            let php_version = self.resolved_php_version();
224
225            // Parallelize built-in stub parsing.
226            let builtin_slices = crate::stubs::builtin_stub_slices_for_version(php_version);
227
228            // Parallelize user stub parsing (parallelization in user_stub_slices()).
229            let user_slices = crate::stubs::user_stub_slices(&self.stub_files, &self.stub_dirs);
230
231            // Lock once and ingest all slices together.
232            let mut guard = self.salsa.lock();
233            for slice in builtin_slices {
234                guard.0.ingest_stub_slice(&slice);
235            }
236            for slice in user_slices {
237                guard.0.ingest_stub_slice(&slice);
238            }
239        }
240    }
241
242    fn collect_and_ingest_source(&self, file: Arc<str>, src: &str) -> FileDefinitions {
243        let file_defs = {
244            let mut guard = self.salsa.lock();
245            let (ref mut db, ref mut files) = *guard;
246            let salsa_file = match files.get(&file) {
247                Some(&sf) => {
248                    if sf.text(db).as_ref() != src {
249                        sf.set_text(db).to(Arc::from(src));
250                    }
251                    sf
252                }
253                None => {
254                    let sf = SourceFile::new(db, file.clone(), Arc::from(src));
255                    files.insert(file.clone(), sf);
256                    sf
257                }
258            };
259            collect_file_definitions(db, salsa_file)
260        };
261
262        {
263            let mut guard = self.salsa.lock();
264            guard.0.ingest_stub_slice(&file_defs.slice);
265        }
266        file_defs
267    }
268
269    /// Run the full analysis pipeline on a set of file paths.
270    pub fn analyze(&self, paths: &[PathBuf]) -> AnalysisResult {
271        let mut all_issues = Vec::new();
272
273        // ---- Load PHP built-in stubs (before Pass 1 so user code can override)
274        self.load_stubs();
275
276        // ---- Pass 1: read files in parallel ----------------------------------
277        let parsed_files: Vec<ParsedProjectFile> = paths
278            .par_iter()
279            .filter_map(|path| match std::fs::read_to_string(path) {
280                Ok(src) => {
281                    let file = Arc::from(path.to_string_lossy().as_ref());
282                    Some(ParsedProjectFile::new(file, Arc::from(src)))
283                }
284                Err(e) => {
285                    eprintln!("Cannot read {}: {}", path.display(), e);
286                    None
287                }
288            })
289            .collect();
290
291        let file_data: Vec<(Arc<str>, Arc<str>)> = parsed_files
292            .iter()
293            .map(|parsed| (parsed.file.clone(), parsed.source.clone()))
294            .collect();
295
296        // ---- Pre-Pass-2 invalidation: evict dependents of changed files ------
297        if let Some(cache) = &self.cache {
298            let changed: Vec<String> = file_data
299                .par_iter()
300                .filter_map(|(f, src)| {
301                    let h = hash_content(src.as_ref());
302                    if cache.get(f, &h).is_none() {
303                        Some(f.to_string())
304                    } else {
305                        None
306                    }
307                })
308                .collect();
309            if !changed.is_empty() {
310                cache.evict_with_dependents(&changed);
311            }
312        }
313
314        // ---- Register Salsa source inputs for incremental follow-up calls ----
315        {
316            let mut guard = self.salsa.lock();
317            let (ref mut db, ref mut files) = *guard;
318            for parsed in &parsed_files {
319                match files.get(parsed.file.as_ref()) {
320                    Some(&sf) => {
321                        if sf.text(db).as_ref() != parsed.source() {
322                            sf.set_text(db).to(parsed.source.clone());
323                        }
324                    }
325                    None => {
326                        let file_cloned = parsed.file.clone();
327                        let sf = SourceFile::new(db, file_cloned.clone(), parsed.source.clone());
328                        files.insert(file_cloned, sf);
329                    }
330                }
331            }
332        }
333
334        // ---- Pass 1: definition collection from the already-parsed AST -------
335        let file_defs: Vec<FileDefinitions> = parsed_files
336            .par_iter()
337            .map(|parsed| {
338                let parse_result = parsed.parsed();
339                let mut all_issues: Vec<Issue> = parse_result
340                    .errors
341                    .iter()
342                    .map(|err| {
343                        Issue::new(
344                            mir_issues::IssueKind::ParseError {
345                                message: err.to_string(),
346                            },
347                            mir_issues::Location {
348                                file: parsed.file.clone(),
349                                line: 1,
350                                line_end: 1,
351                                col_start: 0,
352                                col_end: 0,
353                            },
354                        )
355                    })
356                    .collect();
357                let collector = crate::collector::DefinitionCollector::new_for_slice(
358                    parsed.file.clone(),
359                    parsed.source(),
360                    &parse_result.source_map,
361                );
362                let (slice, collector_issues) = collector.collect_slice(&parse_result.program);
363                all_issues.extend(collector_issues);
364                FileDefinitions {
365                    slice: Arc::new(slice),
366                    issues: Arc::new(all_issues),
367                }
368            })
369            .collect();
370
371        let mut files_with_parse_errors: std::collections::HashSet<Arc<str>> =
372            std::collections::HashSet::new();
373        let mut files_needing_inference: std::collections::HashSet<Arc<str>> =
374            std::collections::HashSet::new();
375        {
376            let mut guard = self.salsa.lock();
377            let (ref mut db, _) = *guard;
378            for defs in file_defs {
379                for issue in defs.issues.iter() {
380                    if matches!(issue.kind, mir_issues::IssueKind::ParseError { .. }) {
381                        files_with_parse_errors.insert(issue.location.file.clone());
382                    }
383                }
384                if stub_slice_needs_inference(&defs.slice) {
385                    if let Some(file) = defs.slice.file.as_ref() {
386                        files_needing_inference.insert(file.clone());
387                    }
388                }
389                db.ingest_stub_slice(&defs.slice);
390                all_issues.extend(Arc::unwrap_or_clone(defs.issues));
391            }
392        }
393
394        // ---- Lazy-load unknown classes via PSR-4 (issue #50) ----------------
395        if let Some(psr4) = &self.psr4 {
396            self.lazy_load_missing_classes(psr4.clone(), &mut all_issues);
397        }
398
399        // ---- Resolve @psalm-import-type declarations now that all Pass 1
400        // classes (including their `type_aliases`) are populated.
401        // ---- Build reverse dep graph and persist it for the next run ---------
402        if let Some(cache) = &self.cache {
403            let db_snapshot = {
404                let guard = self.salsa.lock();
405                guard.0.clone()
406            };
407            let rev = build_reverse_deps(&db_snapshot);
408            cache.set_reverse_deps(rev);
409        }
410
411        // ---- Class-level checks (M11) ----------------------------------------
412        // `class_db` is scoped tightly: it must be dropped before the priming
413        // sweep's `commit_inferred_return_types` call below, otherwise the
414        // setter's `Storage::cancel_others` blocks waiting for this clone's
415        // Arc to drop (strong-count==1 invariant).
416        let analyzed_file_set: std::collections::HashSet<std::sync::Arc<str>> =
417            file_data.iter().map(|(f, _)| f.clone()).collect();
418        {
419            let class_db = {
420                let guard = self.salsa.lock();
421                guard.0.clone()
422            };
423            let class_issues =
424                crate::class::ClassAnalyzer::with_files(&class_db, analyzed_file_set, &file_data)
425                    .analyze_all();
426            all_issues.extend(class_issues);
427        }
428
429        // ---- S5-PR10b: clone the salsa db once per parallel sweep so each
430        // rayon worker gets its own clone (Salsa databases are `Send` but
431        // `!Sync`; cloning shares the underlying memoization storage).
432        let db_priming = {
433            let guard = self.salsa.lock();
434            guard.0.clone()
435        };
436
437        // ---- Pass 2 priming: populate inferred_return_type for all functions  --
438        // Run a first inference-only sweep so that cross-file inferred return
439        // types are available before the issue-emitting pass below (G6).
440        //
441        // Inferred types are collected into a thread-safe buffer during the
442        // parallel sweep and committed to the Salsa db serially after the sweep
443        // returns. Using `rayon::in_place_scope` ensures all worker threads and
444        // their thread-local Salsa state drop before we commit to the canonical db.
445        let filtered_parsed: Vec<_> = parsed_files
446            .par_iter()
447            .filter(|parsed| {
448                !files_with_parse_errors.contains(&parsed.file)
449                    && files_needing_inference.contains(&parsed.file)
450            })
451            .collect();
452
453        let (functions, methods) =
454            run_inference_sweep(db_priming, filtered_parsed, self.resolved_php_version());
455
456        {
457            let mut guard = self.salsa.lock();
458            guard.0.commit_inferred_return_types(functions, methods);
459        }
460
461        let db_main = {
462            let guard = self.salsa.lock();
463            guard.0.clone()
464        };
465
466        // ---- Pass 2: analyze function/method bodies in parallel (M14) --------
467        let pass2_results: Vec<(Vec<Issue>, Vec<crate::symbol::ResolvedSymbol>)> = parsed_files
468            .par_iter()
469            .filter(|parsed| !files_with_parse_errors.contains(&parsed.file))
470            .map_with(db_main, |db, parsed| {
471                let driver =
472                    Pass2Driver::new(&*db as &dyn MirDatabase, self.resolved_php_version());
473                let result = if let Some(cache) = &self.cache {
474                    let h = hash_content(parsed.source());
475                    if let Some((cached_issues, ref_locs)) = cache.get(&parsed.file, &h) {
476                        db.replay_reference_locations(parsed.file.clone(), &ref_locs);
477                        (cached_issues, Vec::new())
478                    } else {
479                        let parse_result = parsed.parsed();
480                        let (issues, symbols) = driver.analyze_bodies(
481                            &parse_result.program,
482                            parsed.file.clone(),
483                            parsed.source(),
484                            &parse_result.source_map,
485                        );
486                        let ref_locs = extract_reference_locations(&*db, &parsed.file);
487                        cache.put(&parsed.file, h, issues.clone(), ref_locs);
488                        (issues, symbols)
489                    }
490                } else {
491                    let parse_result = parsed.parsed();
492                    driver.analyze_bodies(
493                        &parse_result.program,
494                        parsed.file.clone(),
495                        parsed.source(),
496                        &parse_result.source_map,
497                    )
498                };
499                if let Some(cb) = &self.on_file_done {
500                    cb();
501                }
502                result
503            })
504            .collect();
505
506        let mut all_symbols = Vec::new();
507        for (issues, symbols) in pass2_results {
508            all_issues.extend(issues);
509            all_symbols.extend(symbols);
510        }
511
512        // ---- Post-Pass-2 lazy loading: FQCNs used without `use` imports ------
513        // FQCNs in function/method bodies aren't visible until Pass 2 runs, so
514        // the pre-Pass-2 lazy load misses them.  We collect UndefinedClass names,
515        // resolve them via PSR-4, load those files, re-finalize, then re-analyze
516        // only the affected files to clear the false positives.
517        if let Some(psr4) = &self.psr4 {
518            self.lazy_load_from_body_issues(
519                psr4.clone(),
520                &file_data,
521                &files_with_parse_errors,
522                &mut all_issues,
523                &mut all_symbols,
524            );
525        }
526
527        // Persist cache hits/misses to disk
528        if let Some(cache) = &self.cache {
529            cache.flush();
530        }
531
532        // ---- Compact the reference index ------------------------------------
533        // ---- Dead-code detection (M18) --------------------------------------
534        if self.find_dead_code {
535            let salsa = self.salsa.lock();
536            let dead_code_issues = crate::dead_code::DeadCodeAnalyzer::new(&salsa.0).analyze();
537            drop(salsa);
538            all_issues.extend(dead_code_issues);
539        }
540
541        AnalysisResult::build(all_issues, std::collections::HashMap::new(), all_symbols)
542    }
543
544    fn lazy_load_missing_classes(
545        &self,
546        psr4: Arc<crate::composer::Psr4Map>,
547        all_issues: &mut Vec<Issue>,
548    ) {
549        use std::collections::HashSet;
550
551        let max_depth = 10;
552        let mut loaded: HashSet<String> = HashSet::new();
553
554        for _ in 0..max_depth {
555            let mut to_load: Vec<(String, PathBuf)> = Vec::new();
556
557            let mut try_queue = |fqcn: &str| {
558                if !self.type_exists(fqcn) && !loaded.contains(fqcn) {
559                    if let Some(path) = psr4.resolve(fqcn) {
560                        to_load.push((fqcn.to_string(), path));
561                    }
562                }
563            };
564
565            // Drive the inheritance scan from already-ingested `ClassNode`s.
566            let mut inheritance_candidates = Vec::new();
567            let import_candidates = {
568                let guard = self.salsa.lock();
569                let db = &guard.0;
570                for fqcn in db.active_class_node_fqcns() {
571                    let Some(node) = db.lookup_class_node(&fqcn) else {
572                        continue;
573                    };
574                    if node.is_interface(db) {
575                        for parent in node.extends(db).iter() {
576                            inheritance_candidates.push(parent.to_string());
577                        }
578                    } else if node.is_enum(db) {
579                        for iface in node.interfaces(db).iter() {
580                            inheritance_candidates.push(iface.to_string());
581                        }
582                    } else if node.is_trait(db) {
583                        for used in node.traits(db).iter() {
584                            inheritance_candidates.push(used.to_string());
585                        }
586                    } else {
587                        if let Some(parent) = node.parent(db) {
588                            inheritance_candidates.push(parent.to_string());
589                        }
590                        for iface in node.interfaces(db).iter() {
591                            inheritance_candidates.push(iface.to_string());
592                        }
593                    }
594                }
595                db.file_import_snapshots()
596                    .into_iter()
597                    .flat_map(|(_, imports)| imports.into_values())
598                    .collect::<Vec<_>>()
599            };
600            for fqcn in inheritance_candidates {
601                try_queue(&fqcn);
602            }
603
604            // Also lazy-load any type referenced via `use` imports that isn't yet
605            // in the codebase (covers enums and classes used only in type hints or
606            // static calls, which never appear in the inheritance scan above).
607            for fqcn in import_candidates {
608                try_queue(&fqcn);
609            }
610
611            if to_load.is_empty() {
612                break;
613            }
614
615            for (fqcn, path) in to_load {
616                loaded.insert(fqcn);
617                if let Ok(src) = std::fs::read_to_string(&path) {
618                    let file: Arc<str> = Arc::from(path.to_string_lossy().as_ref());
619                    let defs = self.collect_and_ingest_source(file, &src);
620                    all_issues.extend(Arc::unwrap_or_clone(defs.issues));
621                }
622            }
623        }
624    }
625
626    fn lazy_load_from_body_issues(
627        &self,
628        psr4: Arc<crate::composer::Psr4Map>,
629        file_data: &[(Arc<str>, Arc<str>)],
630        files_with_parse_errors: &HashSet<Arc<str>>,
631        all_issues: &mut Vec<Issue>,
632        all_symbols: &mut Vec<crate::symbol::ResolvedSymbol>,
633    ) {
634        use mir_issues::IssueKind;
635
636        let max_depth = 5;
637        let mut loaded: HashSet<String> = HashSet::new();
638
639        for _ in 0..max_depth {
640            // Deduplicate by FQCN: HashMap prevents loading the same class twice
641            // when multiple files share the same UndefinedClass diagnostic.
642            let mut to_load: HashMap<String, PathBuf> = HashMap::new();
643
644            for issue in all_issues.iter() {
645                if let IssueKind::UndefinedClass { name } = &issue.kind {
646                    if !self.type_exists(name) && !loaded.contains(name) {
647                        if let Some(path) = psr4.resolve(name) {
648                            to_load.entry(name.clone()).or_insert(path);
649                        }
650                    }
651                }
652            }
653
654            if to_load.is_empty() {
655                break;
656            }
657
658            loaded.extend(to_load.keys().cloned());
659
660            for path in to_load.values() {
661                if let Ok(src) = std::fs::read_to_string(path) {
662                    let file: Arc<str> = Arc::from(path.to_string_lossy().as_ref());
663                    let _ = self.collect_and_ingest_source(file, &src);
664                }
665            }
666
667            // Load inheritance deps of newly-added types and finalize.
668            // This covers e.g. `class Helper extends \App\Base` where Base is
669            // also not in the initial file set.
670            self.lazy_load_missing_classes(psr4.clone(), all_issues);
671
672            // Re-analyze every file that has an UndefinedClass for a type now
673            // present in the codebase — covers both direct and transitive loads.
674            let files_to_reanalyze: HashSet<Arc<str>> = all_issues
675                .iter()
676                .filter_map(|i| {
677                    if let IssueKind::UndefinedClass { name } = &i.kind {
678                        if self.type_exists(name) {
679                            return Some(i.location.file.clone());
680                        }
681                    }
682                    None
683                })
684                .collect();
685
686            if files_to_reanalyze.is_empty() {
687                break;
688            }
689
690            all_issues.retain(|i| !files_to_reanalyze.contains(&i.location.file));
691            all_symbols.retain(|s| !files_to_reanalyze.contains(&s.file));
692
693            // Two-phase reanalysis to avoid the salsa `cancel_others` deadlock:
694            //
695            // Phase 1: parallel inference-only Pass 2 on a cloned db. The
696            //   priming clone is consumed by `gather_inferred_types`, so all
697            //   per-thread db handles are dropped before we touch the canonical
698            //   db.
699            // Phase 1.5: single-threaded commit of the inferred return types.
700            // Phase 2: parallel full Pass 2 emits the actual issues + symbols.
701            //
702            // The previous in-line per-file commit (commit while a `db` clone
703            // was still alive in `map_with`) deadlocked salsa: `cancel_others`
704            // waits for outstanding storage references and the local clone is
705            // exactly one such reference.
706            let sweep: Vec<(Arc<str>, Arc<str>)> = file_data
707                .iter()
708                .filter(|(f, _)| {
709                    !files_with_parse_errors.contains(f) && files_to_reanalyze.contains(f)
710                })
711                .cloned()
712                .collect();
713
714            let (inferred_fns, inferred_methods) = crate::session::gather_inferred_types(
715                {
716                    let guard = self.salsa.lock();
717                    guard.0.clone()
718                },
719                &sweep,
720                self.resolved_php_version(),
721            );
722
723            {
724                let mut guard_db = self.salsa.lock();
725                guard_db
726                    .0
727                    .commit_inferred_return_types(inferred_fns, inferred_methods);
728            }
729
730            let db_full = {
731                let guard = self.salsa.lock();
732                guard.0.clone()
733            };
734
735            let reanalysis: Vec<(Vec<Issue>, Vec<crate::symbol::ResolvedSymbol>)> = file_data
736                .par_iter()
737                .filter(|(f, _)| {
738                    !files_with_parse_errors.contains(f) && files_to_reanalyze.contains(f)
739                })
740                .map_with(db_full, |db, (file, src)| {
741                    let driver =
742                        Pass2Driver::new(&*db as &dyn MirDatabase, self.resolved_php_version());
743                    let arena = crate::arena::create_parse_arena(src.len());
744                    let parsed = php_rs_parser::parse(&arena, src);
745                    driver.analyze_bodies(&parsed.program, file.clone(), src, &parsed.source_map)
746                })
747                .collect();
748
749            for (issues, symbols) in reanalysis {
750                all_issues.extend(issues);
751                all_symbols.extend(symbols);
752            }
753        }
754    }
755
756    /// Re-analyze a single file within the existing codebase.
757    ///
758    /// This is the incremental analysis API for LSP:
759    /// 1. Removes old definitions from this file
760    /// 2. Re-runs Pass 1 (definition collection) on the new content
761    /// 3. Resolves any newly-collected `@psalm-import-type` declarations
762    /// 4. Re-runs Pass 2 (body analysis) on this file
763    /// 5. Returns the analysis result for this file only
764    pub fn re_analyze_file(&self, file_path: &str, new_content: &str) -> AnalysisResult {
765        // Fast path: content unchanged and cache has a valid entry — skip full re-analysis.
766        if let Some(cache) = &self.cache {
767            let h = hash_content(new_content);
768            if let Some((issues, ref_locs)) = cache.get(file_path, &h) {
769                let file: Arc<str> = Arc::from(file_path);
770                let guard = self.salsa.lock();
771                guard.0.replay_reference_locations(file, &ref_locs);
772                return AnalysisResult::build(issues, HashMap::new(), Vec::new());
773            }
774        }
775
776        let file: Arc<str> = Arc::from(file_path);
777
778        {
779            let mut guard = self.salsa.lock();
780            let (ref mut db, _) = *guard;
781            db.remove_file_definitions(file_path);
782        }
783
784        // --- Salsa-backed Pass 1: memoized parse + definition collection ------
785        let file_defs = {
786            let mut guard = self.salsa.lock();
787            let (ref mut db, ref mut files) = *guard;
788            let salsa_file = match files.get(&file) {
789                Some(&sf) => {
790                    sf.set_text(db).to(Arc::from(new_content));
791                    sf
792                }
793                None => {
794                    let sf = SourceFile::new(db, file.clone(), Arc::from(new_content));
795                    files.insert(file.clone(), sf);
796                    sf
797                }
798            };
799            collect_file_definitions(db, salsa_file)
800        };
801
802        let mut all_issues: Vec<Issue> = Arc::unwrap_or_clone(file_defs.issues.clone());
803
804        // --- S2 + Pass 2: hold the Salsa lock for ClassNode upserts and body
805        // analysis so the db reference is live during Pass 2 (S5).
806        let symbols = {
807            let mut guard = self.salsa.lock();
808            let (ref mut db, _) = *guard;
809
810            db.ingest_stub_slice(&file_defs.slice);
811
812            // Resolve any newly-collected @psalm-import-type declarations so
813            // Pass 2 reads the imported aliases out of `type_aliases`.
814            // Re-parse in the arena so Pass 2 can walk the AST.
815            let arena = bumpalo::Bump::new();
816            let parsed = php_rs_parser::parse(&arena, new_content);
817
818            if parsed.errors.is_empty() {
819                let db_ref: &dyn MirDatabase = db;
820                let driver = Pass2Driver::new_inference_only(db_ref, self.resolved_php_version());
821                driver.analyze_bodies(
822                    &parsed.program,
823                    file.clone(),
824                    new_content,
825                    &parsed.source_map,
826                );
827                let inferred = driver.take_inferred_types();
828                db.commit_inferred_return_types(inferred.functions, inferred.methods);
829
830                let db_ref: &dyn MirDatabase = db;
831                let driver = Pass2Driver::new(db_ref, self.resolved_php_version());
832                let (body_issues, symbols) = driver.analyze_bodies(
833                    &parsed.program,
834                    file.clone(),
835                    new_content,
836                    &parsed.source_map,
837                );
838                all_issues.extend(body_issues);
839                symbols
840            } else {
841                Vec::new()
842            }
843        };
844
845        if let Some(cache) = &self.cache {
846            let h = hash_content(new_content);
847            cache.evict_with_dependents(&[file_path.to_string()]);
848            let guard = self.salsa.lock();
849            let ref_locs = extract_reference_locations(&guard.0, &file);
850            cache.put(file_path, h, all_issues.clone(), ref_locs);
851        }
852
853        AnalysisResult::build(all_issues, HashMap::new(), symbols)
854    }
855
856    /// Analyze a PHP source string without a real file path.
857    /// Useful for tests and LSP single-file mode.
858    pub fn analyze_source(source: &str) -> AnalysisResult {
859        let analyzer = ProjectAnalyzer::new();
860        let file: Arc<str> = Arc::from("<source>");
861        let mut db = MirDb::default();
862        for slice in crate::stubs::builtin_stub_slices_for_version(analyzer.resolved_php_version())
863        {
864            db.ingest_stub_slice(&slice);
865        }
866        let salsa_file = SourceFile::new(&db, file.clone(), Arc::from(source));
867        let file_defs = collect_file_definitions(&db, salsa_file);
868        db.ingest_stub_slice(&file_defs.slice);
869        let mut all_issues = Arc::unwrap_or_clone(file_defs.issues);
870        if all_issues
871            .iter()
872            .any(|issue| matches!(issue.kind, mir_issues::IssueKind::ParseError { .. }))
873        {
874            return AnalysisResult::build(all_issues, std::collections::HashMap::new(), Vec::new());
875        }
876        let mut type_envs = std::collections::HashMap::new();
877        let mut all_symbols = Vec::new();
878        let arena = bumpalo::Bump::new();
879        let result = php_rs_parser::parse(&arena, source);
880
881        let driver = Pass2Driver::new_inference_only(&db, analyzer.resolved_php_version());
882        driver.analyze_bodies(&result.program, file.clone(), source, &result.source_map);
883        let inferred = driver.take_inferred_types();
884        db.commit_inferred_return_types(inferred.functions, inferred.methods);
885
886        let driver = Pass2Driver::new(&db, analyzer.resolved_php_version());
887        all_issues.extend(driver.analyze_bodies_typed(
888            &result.program,
889            file.clone(),
890            source,
891            &result.source_map,
892            &mut type_envs,
893            &mut all_symbols,
894        ));
895        AnalysisResult::build(all_issues, type_envs, all_symbols)
896    }
897
898    /// Discover all `.php` files under a directory, recursively.
899    pub fn discover_files(root: &Path) -> Vec<PathBuf> {
900        if root.is_file() {
901            return vec![root.to_path_buf()];
902        }
903        let mut files = Vec::new();
904        collect_php_files(root, &mut files);
905        files
906    }
907
908    /// Pass 1 only: collect type definitions from `paths` into the codebase without
909    /// analyzing method bodies or emitting issues. Used to load vendor types.
910    pub fn collect_types_only(&self, paths: &[PathBuf]) {
911        let file_data: Vec<(Arc<str>, Arc<str>)> = paths
912            .par_iter()
913            .filter_map(|path| {
914                let src = std::fs::read_to_string(path).ok()?;
915                Some((
916                    Arc::from(path.to_string_lossy().as_ref()),
917                    Arc::<str>::from(src),
918                ))
919            })
920            .collect();
921
922        let source_files: Vec<SourceFile> = {
923            let mut guard = self.salsa.lock();
924            let (ref mut db, ref mut files) = *guard;
925            file_data
926                .iter()
927                .map(|(file, src)| match files.get(file) {
928                    Some(&sf) => {
929                        if sf.text(db).as_ref() != src.as_ref() {
930                            sf.set_text(db).to(src.clone());
931                        }
932                        sf
933                    }
934                    None => {
935                        let file_cloned = file.clone();
936                        let sf = SourceFile::new(db, file_cloned.clone(), src.clone());
937                        files.insert(file_cloned, sf);
938                        sf
939                    }
940                })
941                .collect()
942        };
943
944        let db_pass1 = {
945            let guard = self.salsa.lock();
946            guard.0.clone()
947        };
948
949        let file_defs: Vec<FileDefinitions> = source_files
950            .par_iter()
951            .map_with(db_pass1, |db, salsa_file| {
952                collect_file_definitions_uncached(&*db, *salsa_file)
953            })
954            .collect();
955
956        let mut guard = self.salsa.lock();
957        let (ref mut db, _) = *guard;
958        for defs in file_defs {
959            db.ingest_stub_slice(&defs.slice);
960        }
961        drop(guard);
962
963        // Print profiling statistics for the collection phase.
964        crate::collector::print_collector_stats();
965    }
966}
967
968impl Default for ProjectAnalyzer {
969    fn default() -> Self {
970        Self::new()
971    }
972}
973
974// Helper: Inference sweep with rayon::in_place_scope
975
976#[allow(clippy::type_complexity)]
977fn run_inference_sweep(
978    db_priming: MirDb,
979    parsed_files: Vec<&ParsedProjectFile>,
980    php_version: PhpVersion,
981) -> (Vec<(Arc<str>, Union)>, Vec<(Arc<str>, Arc<str>, Union)>) {
982    let functions = Arc::new(Mutex::new(Vec::new()));
983    let methods = Arc::new(Mutex::new(Vec::new()));
984
985    rayon::in_place_scope(|s| {
986        for parsed in parsed_files {
987            let db = db_priming.clone();
988            let functions = Arc::clone(&functions);
989            let methods = Arc::clone(&methods);
990
991            s.spawn(move |_| {
992                let driver = Pass2Driver::new_inference_only(&db as &dyn MirDatabase, php_version);
993                let parse_result = parsed.parsed();
994                driver.analyze_bodies(
995                    &parse_result.program,
996                    parsed.file.clone(),
997                    parsed.source(),
998                    &parse_result.source_map,
999                );
1000
1001                let inferred = driver.take_inferred_types();
1002                {
1003                    let mut funcs = functions.lock();
1004                    funcs.extend(inferred.functions);
1005                }
1006                {
1007                    let mut meths = methods.lock();
1008                    meths.extend(inferred.methods);
1009                }
1010            });
1011        }
1012    });
1013
1014    let functions = Arc::try_unwrap(functions)
1015        .map(|mutex| mutex.into_inner())
1016        .unwrap_or_else(|arc| arc.lock().clone());
1017    let methods = Arc::try_unwrap(methods)
1018        .map(|mutex| mutex.into_inner())
1019        .unwrap_or_else(|arc| arc.lock().clone());
1020
1021    (functions, methods)
1022}
1023
1024fn stub_slice_needs_inference(slice: &mir_codebase::storage::StubSlice) -> bool {
1025    slice
1026        .functions
1027        .iter()
1028        .any(|func| func.return_type.is_none())
1029        || slice.classes.iter().any(|class| {
1030            class
1031                .own_methods
1032                .values()
1033                .any(|method| !method.is_abstract && method.return_type.is_none())
1034        })
1035        || slice.traits.iter().any(|tr| {
1036            tr.own_methods
1037                .values()
1038                .any(|method| !method.is_abstract && method.return_type.is_none())
1039        })
1040        || slice.enums.iter().any(|en| {
1041            en.own_methods
1042                .values()
1043                .any(|method| !method.is_abstract && method.return_type.is_none())
1044        })
1045}
1046
1047pub(crate) fn collect_php_files(dir: &Path, out: &mut Vec<PathBuf>) {
1048    if let Ok(entries) = std::fs::read_dir(dir) {
1049        for entry in entries.flatten() {
1050            if entry.file_type().map(|ft| ft.is_symlink()).unwrap_or(false) {
1051                continue;
1052            }
1053            let path = entry.path();
1054            if path.is_dir() {
1055                let name = path.file_name().and_then(|n| n.to_str()).unwrap_or("");
1056                if matches!(
1057                    name,
1058                    "vendor" | ".git" | "node_modules" | ".cache" | ".pnpm-store"
1059                ) {
1060                    continue;
1061                }
1062                collect_php_files(&path, out);
1063            } else if path.extension().and_then(|e| e.to_str()) == Some("php") {
1064                out.push(path);
1065            }
1066        }
1067    }
1068}
1069
1070// build_reverse_deps
1071
1072fn build_reverse_deps(db: &dyn crate::db::MirDatabase) -> HashMap<String, HashSet<String>> {
1073    let mut reverse: HashMap<String, HashSet<String>> = HashMap::new();
1074
1075    let mut add_edge = |symbol: &str, dependent_file: &str| {
1076        if let Some(defining_file) = db.symbol_defining_file(symbol) {
1077            let def = defining_file.as_ref().to_string();
1078            if def != dependent_file {
1079                reverse
1080                    .entry(def)
1081                    .or_default()
1082                    .insert(dependent_file.to_string());
1083            }
1084        }
1085    };
1086
1087    for (file, imports) in db.file_import_snapshots() {
1088        let file = file.as_ref().to_string();
1089        for fqcn in imports.values() {
1090            add_edge(fqcn, &file);
1091        }
1092    }
1093
1094    for fqcn in db.active_class_node_fqcns() {
1095        // Only true classes contribute class-direction edges in this loop.
1096        // Interface / trait / enum edges are not currently emitted here —
1097        // this function only ever read classes.
1098        let kind = match crate::db::class_kind_via_db(db, fqcn.as_ref()) {
1099            Some(k) if !k.is_interface && !k.is_trait && !k.is_enum => k,
1100            _ => continue,
1101        };
1102        let _ = kind;
1103        let Some(file) = db
1104            .symbol_defining_file(fqcn.as_ref())
1105            .map(|f| f.as_ref().to_string())
1106        else {
1107            continue;
1108        };
1109
1110        let Some(node) = db.lookup_class_node(fqcn.as_ref()) else {
1111            continue;
1112        };
1113        if let Some(parent) = node.parent(db) {
1114            add_edge(parent.as_ref(), &file);
1115        }
1116        for iface in node.interfaces(db).iter() {
1117            add_edge(iface.as_ref(), &file);
1118        }
1119        for tr in node.traits(db).iter() {
1120            add_edge(tr.as_ref(), &file);
1121        }
1122    }
1123
1124    reverse
1125}
1126
1127fn extract_reference_locations(
1128    db: &dyn crate::db::MirDatabase,
1129    file: &Arc<str>,
1130) -> Vec<(String, u32, u16, u16)> {
1131    db.extract_file_reference_locations(file.as_ref())
1132        .into_iter()
1133        .map(|(sym, line, col_start, col_end)| (sym.to_string(), line, col_start, col_end))
1134        .collect()
1135}
1136
1137pub struct AnalysisResult {
1138    pub issues: Vec<Issue>,
1139    pub type_envs: std::collections::HashMap<crate::type_env::ScopeId, crate::type_env::TypeEnv>,
1140    /// Per-expression resolved symbols from Pass 2, sorted by file path.
1141    pub symbols: Vec<crate::symbol::ResolvedSymbol>,
1142    /// Maps each file path to the contiguous range within `symbols` that belongs
1143    /// to it. Built once after analysis; allows `symbol_at` to scan only the
1144    /// relevant file's slice rather than the entire codebase-wide vector.
1145    symbols_by_file: HashMap<Arc<str>, std::ops::Range<usize>>,
1146}
1147
1148impl AnalysisResult {
1149    fn build(
1150        issues: Vec<Issue>,
1151        type_envs: std::collections::HashMap<crate::type_env::ScopeId, crate::type_env::TypeEnv>,
1152        mut symbols: Vec<crate::symbol::ResolvedSymbol>,
1153    ) -> Self {
1154        symbols.sort_unstable_by(|a, b| a.file.as_ref().cmp(b.file.as_ref()));
1155        let mut symbols_by_file: HashMap<Arc<str>, std::ops::Range<usize>> = HashMap::new();
1156        let mut i = 0;
1157        while i < symbols.len() {
1158            let file = Arc::clone(&symbols[i].file);
1159            let start = i;
1160            while i < symbols.len() && symbols[i].file == file {
1161                i += 1;
1162            }
1163            symbols_by_file.insert(file, start..i);
1164        }
1165        Self {
1166            issues,
1167            type_envs,
1168            symbols,
1169            symbols_by_file,
1170        }
1171    }
1172}
1173
1174impl AnalysisResult {
1175    pub fn error_count(&self) -> usize {
1176        self.issues
1177            .iter()
1178            .filter(|i| i.severity == mir_issues::Severity::Error)
1179            .count()
1180    }
1181
1182    pub fn warning_count(&self) -> usize {
1183        self.issues
1184            .iter()
1185            .filter(|i| i.severity == mir_issues::Severity::Warning)
1186            .count()
1187    }
1188
1189    /// Group issues by source file.
1190    pub fn issues_by_file(&self) -> HashMap<std::sync::Arc<str>, Vec<&Issue>> {
1191        let mut map: HashMap<std::sync::Arc<str>, Vec<&Issue>> = HashMap::new();
1192        for issue in &self.issues {
1193            map.entry(issue.location.file.clone())
1194                .or_default()
1195                .push(issue);
1196        }
1197        map
1198    }
1199
1200    /// Return the innermost resolved symbol whose span contains `byte_offset`
1201    /// in `file`, or `None` if no symbol was recorded at that position.
1202    pub fn symbol_at(
1203        &self,
1204        file: &str,
1205        byte_offset: u32,
1206    ) -> Option<&crate::symbol::ResolvedSymbol> {
1207        let range = self.symbols_by_file.get(file)?;
1208        self.symbols[range.clone()]
1209            .iter()
1210            .filter(|s| s.span.start <= byte_offset && byte_offset < s.span.end)
1211            .min_by_key(|s| s.span.end - s.span.start)
1212    }
1213}