Skip to main content

mir_analyzer/
project.rs

1/// Project-level orchestration: file discovery, pass 1, pass 2.
2use std::mem::ManuallyDrop;
3use std::path::{Path, PathBuf};
4use std::sync::Arc;
5
6use rayon::prelude::*;
7
8use std::collections::{HashMap, HashSet};
9
10use crate::cache::{hash_content, AnalysisCache};
11use crate::db::{
12    collect_file_definitions, collect_file_definitions_uncached, FileDefinitions, MirDatabase,
13    MirDb, SourceFile,
14};
15use crate::pass2::Pass2Driver;
16use crate::php_version::PhpVersion;
17use mir_issues::Issue;
18use mir_types::Union;
19use salsa::Setter as _;
20
21pub use crate::pass2::merge_return_types;
22
23pub struct ProjectAnalyzer {
24    /// Optional cache — when `Some`, Pass 2 results are read/written per file.
25    pub cache: Option<AnalysisCache>,
26    /// Called once after each file completes Pass 2 (used for progress reporting).
27    pub on_file_done: Option<Arc<dyn Fn() + Send + Sync>>,
28    /// PSR-4 autoloader mapping from composer.json, if available.
29    pub psr4: Option<Arc<crate::composer::Psr4Map>>,
30    /// Whether stubs have already been loaded (to avoid double-loading).
31    stubs_loaded: std::sync::atomic::AtomicBool,
32    /// When true, run dead code detection at the end of analysis.
33    pub find_dead_code: bool,
34    /// Target PHP language version. `None` means "not configured"; resolved to
35    /// `PhpVersion::LATEST` when passed down to `StatementsAnalyzer`.
36    pub php_version: Option<PhpVersion>,
37    /// Additional stub files to parse before analysis (absolute paths).
38    pub stub_files: Vec<PathBuf>,
39    /// Additional stub directories to walk and parse before analysis (absolute paths).
40    pub stub_dirs: Vec<PathBuf>,
41    /// Salsa database for incremental Pass-1 memoization.
42    /// `MirDb` is `Send` but `!Sync` (thread-local query state); `Mutex`
43    /// provides the `Sync` bound rayon requires without needing `T: Sync`.
44    salsa: std::sync::Mutex<(MirDb, HashMap<Arc<str>, SourceFile>)>,
45}
46
47struct ParsedProjectFile {
48    file: Arc<str>,
49    source: Arc<str>,
50    parsed: ManuallyDrop<php_rs_parser::ParseResult<'static, 'static>>,
51    arena: ManuallyDrop<Box<bumpalo::Bump>>,
52}
53
54impl ParsedProjectFile {
55    fn new(file: Arc<str>, source: Arc<str>) -> Self {
56        let arena = Box::new(bumpalo::Bump::new());
57        let parsed = php_rs_parser::parse(&arena, &source);
58        // SAFETY: `parsed` borrows from `arena` and `source`, both owned by this
59        // struct and kept alive until `Drop`. `Drop` manually destroys `parsed`
60        // before releasing either owner, so the widened lifetimes never escape.
61        let parsed = unsafe {
62            std::mem::transmute::<
63                php_rs_parser::ParseResult<'_, '_>,
64                php_rs_parser::ParseResult<'static, 'static>,
65            >(parsed)
66        };
67        Self {
68            file,
69            source,
70            parsed: ManuallyDrop::new(parsed),
71            arena: ManuallyDrop::new(arena),
72        }
73    }
74
75    fn source(&self) -> &str {
76        self.source.as_ref()
77    }
78
79    fn parsed(&self) -> &php_rs_parser::ParseResult<'_, '_> {
80        &self.parsed
81    }
82}
83
84impl Drop for ParsedProjectFile {
85    fn drop(&mut self) {
86        unsafe {
87            ManuallyDrop::drop(&mut self.parsed);
88            ManuallyDrop::drop(&mut self.arena);
89        }
90    }
91}
92
93// SAFETY: after construction the parsed AST and source map are read-only. The
94// bump arena is never mutated again; it only owns backing storage for AST nodes
95// and is dropped after all parallel analysis has completed.
96unsafe impl Send for ParsedProjectFile {}
97unsafe impl Sync for ParsedProjectFile {}
98
99impl ProjectAnalyzer {
100    pub fn new() -> Self {
101        Self {
102            cache: None,
103            on_file_done: None,
104            psr4: None,
105            stubs_loaded: std::sync::atomic::AtomicBool::new(false),
106            find_dead_code: false,
107            php_version: None,
108            stub_files: Vec::new(),
109            stub_dirs: Vec::new(),
110            salsa: std::sync::Mutex::new((MirDb::default(), HashMap::new())),
111        }
112    }
113
114    /// Create a `ProjectAnalyzer` with a disk-backed cache stored under `cache_dir`.
115    pub fn with_cache(cache_dir: &Path) -> Self {
116        Self {
117            cache: Some(AnalysisCache::open(cache_dir)),
118            on_file_done: None,
119            psr4: None,
120            stubs_loaded: std::sync::atomic::AtomicBool::new(false),
121            find_dead_code: false,
122            php_version: None,
123            stub_files: Vec::new(),
124            stub_dirs: Vec::new(),
125            salsa: std::sync::Mutex::new((MirDb::default(), HashMap::new())),
126        }
127    }
128
129    /// Create a `ProjectAnalyzer` from a project root containing `composer.json`.
130    /// Returns the analyzer (with `psr4` set) and the `Psr4Map` so callers can
131    /// call `map.project_files()` / `map.vendor_files()`.
132    pub fn from_composer(
133        root: &Path,
134    ) -> Result<(Self, crate::composer::Psr4Map), crate::composer::ComposerError> {
135        let map = crate::composer::Psr4Map::from_composer(root)?;
136        let psr4 = Arc::new(map.clone());
137        let analyzer = Self {
138            cache: None,
139            on_file_done: None,
140            psr4: Some(psr4),
141            stubs_loaded: std::sync::atomic::AtomicBool::new(false),
142            find_dead_code: false,
143            php_version: None,
144            stub_files: Vec::new(),
145            stub_dirs: Vec::new(),
146            salsa: std::sync::Mutex::new((MirDb::default(), HashMap::new())),
147        };
148        Ok((analyzer, map))
149    }
150
151    /// Set the target PHP version.
152    pub fn with_php_version(mut self, version: PhpVersion) -> Self {
153        self.php_version = Some(version);
154        self
155    }
156
157    /// Resolve the configured PHP version, defaulting to `PhpVersion::LATEST`
158    /// when none has been set.
159    fn resolved_php_version(&self) -> PhpVersion {
160        self.php_version.unwrap_or(PhpVersion::LATEST)
161    }
162
163    fn type_exists(&self, fqcn: &str) -> bool {
164        let db = self.snapshot_db();
165        crate::db::type_exists_via_db(&db, fqcn)
166    }
167
168    /// Acquire a cheap clone of the salsa db for a read-only query.
169    /// The lock is held only for the duration of the clone, so concurrent
170    /// readers never serialize on each other or on writes longer than the
171    /// clone itself.
172    fn snapshot_db(&self) -> MirDb {
173        let guard = self.salsa.lock().expect("salsa lock poisoned");
174        guard.0.clone()
175    }
176
177    /// Internal: expose the salsa Mutex for unit tests that need a `&dyn MirDatabase`.
178    #[doc(hidden)]
179    pub fn salsa_db_for_test(&self) -> &std::sync::Mutex<(MirDb, HashMap<Arc<str>, SourceFile>)> {
180        &self.salsa
181    }
182
183    /// Look up the source location of a class member (method, property, or
184    /// class constant / enum case) by walking the inheritance chain through
185    /// the salsa db.  Returns `None` if no member with that name exists, or
186    /// if the member has no recorded location.
187    pub fn member_location(
188        &self,
189        fqcn: &str,
190        member_name: &str,
191    ) -> Option<mir_codebase::storage::Location> {
192        let db = self.snapshot_db();
193        crate::db::member_location_via_db(&db, fqcn, member_name)
194    }
195
196    pub fn symbol_location(&self, symbol: &str) -> Option<mir_codebase::storage::Location> {
197        let db = self.snapshot_db();
198        db.lookup_class_node(symbol)
199            .filter(|n| n.active(&db))
200            .and_then(|n| n.location(&db))
201            .or_else(|| {
202                db.lookup_function_node(symbol)
203                    .filter(|n| n.active(&db))
204                    .and_then(|n| n.location(&db))
205            })
206    }
207
208    pub fn reference_locations(&self, symbol: &str) -> Vec<(Arc<str>, u32, u16, u16)> {
209        let db = self.snapshot_db();
210        db.reference_locations(symbol)
211    }
212
213    /// Load PHP built-in stubs. Called automatically by `analyze` if not done yet.
214    /// Stubs are filtered against the configured target PHP version (or
215    /// `PhpVersion::LATEST` if none was set).
216    pub fn load_stubs(&self) {
217        if !self
218            .stubs_loaded
219            .swap(true, std::sync::atomic::Ordering::SeqCst)
220        {
221            let php_version = self.resolved_php_version();
222            crate::stubs::stub_files()
223                .par_iter()
224                .for_each(|(filename, content)| {
225                    let slice =
226                        crate::stubs::stub_slice_from_source(filename, content, Some(php_version));
227                    let mut guard = self.salsa.lock().expect("salsa lock poisoned");
228                    guard.0.ingest_stub_slice(&slice);
229                });
230
231            let mut guard = self.salsa.lock().expect("salsa lock poisoned");
232            for slice in crate::stubs::user_stub_slices(&self.stub_files, &self.stub_dirs) {
233                guard.0.ingest_stub_slice(&slice);
234            }
235        }
236    }
237
238    fn collect_and_ingest_source(&self, file: Arc<str>, src: &str) -> FileDefinitions {
239        let file_defs = {
240            let mut guard = self.salsa.lock().expect("salsa lock poisoned");
241            let (ref mut db, ref mut files) = *guard;
242            let salsa_file = match files.get(&file) {
243                Some(&sf) => {
244                    if sf.text(db).as_ref() != src {
245                        sf.set_text(db).to(Arc::from(src));
246                    }
247                    sf
248                }
249                None => {
250                    let sf = SourceFile::new(db, file.clone(), Arc::from(src));
251                    files.insert(file.clone(), sf);
252                    sf
253                }
254            };
255            collect_file_definitions(db, salsa_file)
256        };
257
258        {
259            let mut guard = self.salsa.lock().expect("salsa lock poisoned");
260            guard.0.ingest_stub_slice(&file_defs.slice);
261        }
262        file_defs
263    }
264
265    /// Run the full analysis pipeline on a set of file paths.
266    pub fn analyze(&self, paths: &[PathBuf]) -> AnalysisResult {
267        let mut all_issues = Vec::new();
268
269        // ---- Load PHP built-in stubs (before Pass 1 so user code can override)
270        self.load_stubs();
271
272        // ---- Pass 1: read files in parallel ----------------------------------
273        let parsed_files: Vec<ParsedProjectFile> = paths
274            .par_iter()
275            .filter_map(|path| match std::fs::read_to_string(path) {
276                Ok(src) => {
277                    let file = Arc::from(path.to_string_lossy().as_ref());
278                    Some(ParsedProjectFile::new(file, Arc::from(src)))
279                }
280                Err(e) => {
281                    eprintln!("Cannot read {}: {}", path.display(), e);
282                    None
283                }
284            })
285            .collect();
286
287        let file_data: Vec<(Arc<str>, Arc<str>)> = parsed_files
288            .iter()
289            .map(|parsed| (parsed.file.clone(), parsed.source.clone()))
290            .collect();
291
292        // ---- Pre-Pass-2 invalidation: evict dependents of changed files ------
293        if let Some(cache) = &self.cache {
294            let changed: Vec<String> = file_data
295                .par_iter()
296                .filter_map(|(f, src)| {
297                    let h = hash_content(src.as_ref());
298                    if cache.get(f, &h).is_none() {
299                        Some(f.to_string())
300                    } else {
301                        None
302                    }
303                })
304                .collect();
305            if !changed.is_empty() {
306                cache.evict_with_dependents(&changed);
307            }
308        }
309
310        // ---- Register Salsa source inputs for incremental follow-up calls ----
311        {
312            let mut guard = self.salsa.lock().expect("salsa lock poisoned");
313            let (ref mut db, ref mut files) = *guard;
314            for parsed in &parsed_files {
315                match files.get(parsed.file.as_ref()) {
316                    Some(&sf) => {
317                        if sf.text(db).as_ref() != parsed.source() {
318                            sf.set_text(db).to(parsed.source.clone());
319                        }
320                    }
321                    None => {
322                        let sf = SourceFile::new(db, parsed.file.clone(), parsed.source.clone());
323                        files.insert(parsed.file.clone(), sf);
324                    }
325                }
326            }
327        }
328
329        // ---- Pass 1: definition collection from the already-parsed AST -------
330        let file_defs: Vec<FileDefinitions> = parsed_files
331            .par_iter()
332            .map(|parsed| {
333                let parse_result = parsed.parsed();
334                let mut all_issues: Vec<Issue> = parse_result
335                    .errors
336                    .iter()
337                    .map(|err| {
338                        Issue::new(
339                            mir_issues::IssueKind::ParseError {
340                                message: err.to_string(),
341                            },
342                            mir_issues::Location {
343                                file: parsed.file.clone(),
344                                line: 1,
345                                line_end: 1,
346                                col_start: 0,
347                                col_end: 0,
348                            },
349                        )
350                    })
351                    .collect();
352                let collector = crate::collector::DefinitionCollector::new_for_slice(
353                    parsed.file.clone(),
354                    parsed.source(),
355                    &parse_result.source_map,
356                );
357                let (slice, collector_issues) = collector.collect_slice(&parse_result.program);
358                all_issues.extend(collector_issues);
359                FileDefinitions {
360                    slice: Arc::new(slice),
361                    issues: Arc::new(all_issues),
362                }
363            })
364            .collect();
365
366        let mut files_with_parse_errors: std::collections::HashSet<Arc<str>> =
367            std::collections::HashSet::new();
368        let mut files_needing_inference: std::collections::HashSet<Arc<str>> =
369            std::collections::HashSet::new();
370        {
371            let mut guard = self.salsa.lock().expect("salsa lock poisoned");
372            let (ref mut db, _) = *guard;
373            for defs in file_defs {
374                for issue in defs.issues.iter() {
375                    if matches!(issue.kind, mir_issues::IssueKind::ParseError { .. }) {
376                        files_with_parse_errors.insert(issue.location.file.clone());
377                    }
378                }
379                if stub_slice_needs_inference(&defs.slice) {
380                    if let Some(file) = defs.slice.file.as_ref() {
381                        files_needing_inference.insert(file.clone());
382                    }
383                }
384                db.ingest_stub_slice(&defs.slice);
385                all_issues.extend(Arc::unwrap_or_clone(defs.issues));
386            }
387        }
388
389        // ---- Lazy-load unknown classes via PSR-4 (issue #50) ----------------
390        if let Some(psr4) = &self.psr4 {
391            self.lazy_load_missing_classes(psr4.clone(), &mut all_issues);
392        }
393
394        // ---- Resolve @psalm-import-type declarations now that all Pass 1
395        // classes (including their `type_aliases`) are populated.
396        // ---- Build reverse dep graph and persist it for the next run ---------
397        if let Some(cache) = &self.cache {
398            let db_snapshot = {
399                let guard = self.salsa.lock().expect("salsa lock poisoned");
400                guard.0.clone()
401            };
402            let rev = build_reverse_deps(&db_snapshot);
403            cache.set_reverse_deps(rev);
404        }
405
406        // ---- Class-level checks (M11) ----------------------------------------
407        // `class_db` is scoped tightly: it must be dropped before the priming
408        // sweep's `commit_inferred_return_types` call below, otherwise the
409        // setter's `Storage::cancel_others` blocks waiting for this clone's
410        // Arc to drop (strong-count==1 invariant).
411        let analyzed_file_set: std::collections::HashSet<std::sync::Arc<str>> =
412            file_data.iter().map(|(f, _)| f.clone()).collect();
413        {
414            let class_db = {
415                let guard = self.salsa.lock().expect("salsa lock poisoned");
416                guard.0.clone()
417            };
418            let class_issues =
419                crate::class::ClassAnalyzer::with_files(&class_db, analyzed_file_set, &file_data)
420                    .analyze_all();
421            all_issues.extend(class_issues);
422        }
423
424        // ---- S5-PR10b: clone the salsa db once per parallel sweep so each
425        // rayon worker gets its own clone (Salsa databases are `Send` but
426        // `!Sync`; cloning shares the underlying memoization storage).
427        let db_priming = {
428            let guard = self.salsa.lock().expect("salsa lock poisoned");
429            guard.0.clone()
430        };
431
432        // ---- Pass 2 priming: populate inferred_return_type for all functions  --
433        // Run a first inference-only sweep so that cross-file inferred return
434        // types are available before the issue-emitting pass below (G6).
435        //
436        // Inferred types are collected into a thread-safe buffer during the
437        // parallel sweep and committed to the Salsa db serially after the sweep
438        // returns. Using `rayon::in_place_scope` ensures all worker threads and
439        // their thread-local Salsa state drop before we commit to the canonical db.
440        let filtered_parsed: Vec<_> = parsed_files
441            .par_iter()
442            .filter(|parsed| {
443                !files_with_parse_errors.contains(&parsed.file)
444                    && files_needing_inference.contains(&parsed.file)
445            })
446            .collect();
447
448        let (functions, methods) =
449            run_inference_sweep(db_priming, filtered_parsed, self.resolved_php_version());
450
451        {
452            let mut guard = self.salsa.lock().expect("salsa lock poisoned");
453            guard.0.commit_inferred_return_types(functions, methods);
454        }
455
456        let db_main = {
457            let guard = self.salsa.lock().expect("salsa lock poisoned");
458            guard.0.clone()
459        };
460
461        // ---- Pass 2: analyze function/method bodies in parallel (M14) --------
462        let pass2_results: Vec<(Vec<Issue>, Vec<crate::symbol::ResolvedSymbol>)> = parsed_files
463            .par_iter()
464            .filter(|parsed| !files_with_parse_errors.contains(&parsed.file))
465            .map_with(db_main, |db, parsed| {
466                let driver =
467                    Pass2Driver::new(&*db as &dyn MirDatabase, self.resolved_php_version());
468                let result = if let Some(cache) = &self.cache {
469                    let h = hash_content(parsed.source());
470                    if let Some((cached_issues, ref_locs)) = cache.get(&parsed.file, &h) {
471                        db.replay_reference_locations(parsed.file.clone(), &ref_locs);
472                        (cached_issues, Vec::new())
473                    } else {
474                        let parse_result = parsed.parsed();
475                        let (issues, symbols) = driver.analyze_bodies(
476                            &parse_result.program,
477                            parsed.file.clone(),
478                            parsed.source(),
479                            &parse_result.source_map,
480                        );
481                        let ref_locs = extract_reference_locations(&*db, &parsed.file);
482                        cache.put(&parsed.file, h, issues.clone(), ref_locs);
483                        (issues, symbols)
484                    }
485                } else {
486                    let parse_result = parsed.parsed();
487                    driver.analyze_bodies(
488                        &parse_result.program,
489                        parsed.file.clone(),
490                        parsed.source(),
491                        &parse_result.source_map,
492                    )
493                };
494                if let Some(cb) = &self.on_file_done {
495                    cb();
496                }
497                result
498            })
499            .collect();
500
501        let mut all_symbols = Vec::new();
502        for (issues, symbols) in pass2_results {
503            all_issues.extend(issues);
504            all_symbols.extend(symbols);
505        }
506
507        // ---- Post-Pass-2 lazy loading: FQCNs used without `use` imports ------
508        // FQCNs in function/method bodies aren't visible until Pass 2 runs, so
509        // the pre-Pass-2 lazy load misses them.  We collect UndefinedClass names,
510        // resolve them via PSR-4, load those files, re-finalize, then re-analyze
511        // only the affected files to clear the false positives.
512        if let Some(psr4) = &self.psr4 {
513            self.lazy_load_from_body_issues(
514                psr4.clone(),
515                &file_data,
516                &files_with_parse_errors,
517                &mut all_issues,
518                &mut all_symbols,
519            );
520        }
521
522        // Persist cache hits/misses to disk
523        if let Some(cache) = &self.cache {
524            cache.flush();
525        }
526
527        // ---- Compact the reference index ------------------------------------
528        // ---- Dead-code detection (M18) --------------------------------------
529        if self.find_dead_code {
530            let salsa = self.salsa.lock().unwrap();
531            let dead_code_issues = crate::dead_code::DeadCodeAnalyzer::new(&salsa.0).analyze();
532            drop(salsa);
533            all_issues.extend(dead_code_issues);
534        }
535
536        AnalysisResult::build(all_issues, std::collections::HashMap::new(), all_symbols)
537    }
538
539    fn lazy_load_missing_classes(
540        &self,
541        psr4: Arc<crate::composer::Psr4Map>,
542        all_issues: &mut Vec<Issue>,
543    ) {
544        use std::collections::HashSet;
545
546        let max_depth = 10;
547        let mut loaded: HashSet<String> = HashSet::new();
548
549        for _ in 0..max_depth {
550            let mut to_load: Vec<(String, PathBuf)> = Vec::new();
551
552            let mut try_queue = |fqcn: &str| {
553                if !self.type_exists(fqcn) && !loaded.contains(fqcn) {
554                    if let Some(path) = psr4.resolve(fqcn) {
555                        to_load.push((fqcn.to_string(), path));
556                    }
557                }
558            };
559
560            // Drive the inheritance scan from already-ingested `ClassNode`s.
561            let mut inheritance_candidates = Vec::new();
562            let import_candidates = {
563                let guard = self.salsa.lock().expect("salsa lock poisoned");
564                let db = &guard.0;
565                for fqcn in db.active_class_node_fqcns() {
566                    let Some(node) = db.lookup_class_node(&fqcn) else {
567                        continue;
568                    };
569                    if node.is_interface(db) {
570                        for parent in node.extends(db).iter() {
571                            inheritance_candidates.push(parent.to_string());
572                        }
573                    } else if node.is_enum(db) {
574                        for iface in node.interfaces(db).iter() {
575                            inheritance_candidates.push(iface.to_string());
576                        }
577                    } else if node.is_trait(db) {
578                        for used in node.traits(db).iter() {
579                            inheritance_candidates.push(used.to_string());
580                        }
581                    } else {
582                        if let Some(parent) = node.parent(db) {
583                            inheritance_candidates.push(parent.to_string());
584                        }
585                        for iface in node.interfaces(db).iter() {
586                            inheritance_candidates.push(iface.to_string());
587                        }
588                    }
589                }
590                db.file_import_snapshots()
591                    .into_iter()
592                    .flat_map(|(_, imports)| imports.into_values())
593                    .collect::<Vec<_>>()
594            };
595            for fqcn in inheritance_candidates {
596                try_queue(&fqcn);
597            }
598
599            // Also lazy-load any type referenced via `use` imports that isn't yet
600            // in the codebase (covers enums and classes used only in type hints or
601            // static calls, which never appear in the inheritance scan above).
602            for fqcn in import_candidates {
603                try_queue(&fqcn);
604            }
605
606            if to_load.is_empty() {
607                break;
608            }
609
610            for (fqcn, path) in to_load {
611                loaded.insert(fqcn);
612                if let Ok(src) = std::fs::read_to_string(&path) {
613                    let file: Arc<str> = Arc::from(path.to_string_lossy().as_ref());
614                    let defs = self.collect_and_ingest_source(file, &src);
615                    all_issues.extend(Arc::unwrap_or_clone(defs.issues));
616                }
617            }
618        }
619    }
620
621    fn lazy_load_from_body_issues(
622        &self,
623        psr4: Arc<crate::composer::Psr4Map>,
624        file_data: &[(Arc<str>, Arc<str>)],
625        files_with_parse_errors: &HashSet<Arc<str>>,
626        all_issues: &mut Vec<Issue>,
627        all_symbols: &mut Vec<crate::symbol::ResolvedSymbol>,
628    ) {
629        use mir_issues::IssueKind;
630
631        let max_depth = 5;
632        let mut loaded: HashSet<String> = HashSet::new();
633
634        for _ in 0..max_depth {
635            // Deduplicate by FQCN: HashMap prevents loading the same class twice
636            // when multiple files share the same UndefinedClass diagnostic.
637            let mut to_load: HashMap<String, PathBuf> = HashMap::new();
638
639            for issue in all_issues.iter() {
640                if let IssueKind::UndefinedClass { name } = &issue.kind {
641                    if !self.type_exists(name) && !loaded.contains(name) {
642                        if let Some(path) = psr4.resolve(name) {
643                            to_load.entry(name.clone()).or_insert(path);
644                        }
645                    }
646                }
647            }
648
649            if to_load.is_empty() {
650                break;
651            }
652
653            loaded.extend(to_load.keys().cloned());
654
655            for path in to_load.values() {
656                if let Ok(src) = std::fs::read_to_string(path) {
657                    let file: Arc<str> = Arc::from(path.to_string_lossy().as_ref());
658                    let _ = self.collect_and_ingest_source(file, &src);
659                }
660            }
661
662            // Load inheritance deps of newly-added types and finalize.
663            // This covers e.g. `class Helper extends \App\Base` where Base is
664            // also not in the initial file set.
665            self.lazy_load_missing_classes(psr4.clone(), all_issues);
666
667            // Re-analyze every file that has an UndefinedClass for a type now
668            // present in the codebase — covers both direct and transitive loads.
669            let files_to_reanalyze: HashSet<Arc<str>> = all_issues
670                .iter()
671                .filter_map(|i| {
672                    if let IssueKind::UndefinedClass { name } = &i.kind {
673                        if self.type_exists(name) {
674                            return Some(i.location.file.clone());
675                        }
676                    }
677                    None
678                })
679                .collect();
680
681            if files_to_reanalyze.is_empty() {
682                break;
683            }
684
685            all_issues.retain(|i| !files_to_reanalyze.contains(&i.location.file));
686            all_symbols.retain(|s| !files_to_reanalyze.contains(&s.file));
687
688            // Two-phase reanalysis to avoid the salsa `cancel_others` deadlock:
689            //
690            // Phase 1: parallel inference-only Pass 2 on a cloned db. The
691            //   priming clone is consumed by `gather_inferred_types`, so all
692            //   per-thread db handles are dropped before we touch the canonical
693            //   db.
694            // Phase 1.5: single-threaded commit of the inferred return types.
695            // Phase 2: parallel full Pass 2 emits the actual issues + symbols.
696            //
697            // The previous in-line per-file commit (commit while a `db` clone
698            // was still alive in `map_with`) deadlocked salsa: `cancel_others`
699            // waits for outstanding storage references and the local clone is
700            // exactly one such reference.
701            let sweep: Vec<(Arc<str>, Arc<str>)> = file_data
702                .iter()
703                .filter(|(f, _)| {
704                    !files_with_parse_errors.contains(f) && files_to_reanalyze.contains(f)
705                })
706                .cloned()
707                .collect();
708
709            let (inferred_fns, inferred_methods) = crate::session::gather_inferred_types(
710                {
711                    let guard = self.salsa.lock().expect("salsa lock poisoned");
712                    guard.0.clone()
713                },
714                &sweep,
715                self.resolved_php_version(),
716            );
717
718            {
719                let mut guard_db = self.salsa.lock().expect("salsa lock poisoned");
720                guard_db
721                    .0
722                    .commit_inferred_return_types(inferred_fns, inferred_methods);
723            }
724
725            let db_full = {
726                let guard = self.salsa.lock().expect("salsa lock poisoned");
727                guard.0.clone()
728            };
729
730            let reanalysis: Vec<(Vec<Issue>, Vec<crate::symbol::ResolvedSymbol>)> = file_data
731                .par_iter()
732                .filter(|(f, _)| {
733                    !files_with_parse_errors.contains(f) && files_to_reanalyze.contains(f)
734                })
735                .map_with(db_full, |db, (file, src)| {
736                    let driver =
737                        Pass2Driver::new(&*db as &dyn MirDatabase, self.resolved_php_version());
738                    let arena = bumpalo::Bump::new();
739                    let parsed = php_rs_parser::parse(&arena, src);
740                    driver.analyze_bodies(&parsed.program, file.clone(), src, &parsed.source_map)
741                })
742                .collect();
743
744            for (issues, symbols) in reanalysis {
745                all_issues.extend(issues);
746                all_symbols.extend(symbols);
747            }
748        }
749    }
750
751    /// Re-analyze a single file within the existing codebase.
752    ///
753    /// This is the incremental analysis API for LSP:
754    /// 1. Removes old definitions from this file
755    /// 2. Re-runs Pass 1 (definition collection) on the new content
756    /// 3. Resolves any newly-collected `@psalm-import-type` declarations
757    /// 4. Re-runs Pass 2 (body analysis) on this file
758    /// 5. Returns the analysis result for this file only
759    pub fn re_analyze_file(&self, file_path: &str, new_content: &str) -> AnalysisResult {
760        // Fast path: content unchanged and cache has a valid entry — skip full re-analysis.
761        if let Some(cache) = &self.cache {
762            let h = hash_content(new_content);
763            if let Some((issues, ref_locs)) = cache.get(file_path, &h) {
764                let file: Arc<str> = Arc::from(file_path);
765                let guard = self.salsa.lock().expect("salsa lock poisoned");
766                guard.0.replay_reference_locations(file, &ref_locs);
767                return AnalysisResult::build(issues, HashMap::new(), Vec::new());
768            }
769        }
770
771        let file: Arc<str> = Arc::from(file_path);
772
773        {
774            let mut guard = self.salsa.lock().expect("salsa lock poisoned");
775            let (ref mut db, _) = *guard;
776            db.remove_file_definitions(file_path);
777        }
778
779        // --- Salsa-backed Pass 1: memoized parse + definition collection ------
780        let file_defs = {
781            let mut guard = self.salsa.lock().expect("salsa lock poisoned");
782            let (ref mut db, ref mut files) = *guard;
783            let salsa_file = match files.get(&file) {
784                Some(&sf) => {
785                    sf.set_text(db).to(Arc::from(new_content));
786                    sf
787                }
788                None => {
789                    let sf = SourceFile::new(db, file.clone(), Arc::from(new_content));
790                    files.insert(file.clone(), sf);
791                    sf
792                }
793            };
794            collect_file_definitions(db, salsa_file)
795        };
796
797        let mut all_issues: Vec<Issue> = Arc::unwrap_or_clone(file_defs.issues.clone());
798
799        // --- S2 + Pass 2: hold the Salsa lock for ClassNode upserts and body
800        // analysis so the db reference is live during Pass 2 (S5).
801        let symbols = {
802            let mut guard = self.salsa.lock().expect("salsa lock poisoned");
803            let (ref mut db, _) = *guard;
804
805            db.ingest_stub_slice(&file_defs.slice);
806
807            // Resolve any newly-collected @psalm-import-type declarations so
808            // Pass 2 reads the imported aliases out of `type_aliases`.
809            // Re-parse in the arena so Pass 2 can walk the AST.
810            let arena = bumpalo::Bump::new();
811            let parsed = php_rs_parser::parse(&arena, new_content);
812
813            if parsed.errors.is_empty() {
814                let db_ref: &dyn MirDatabase = db;
815                let driver = Pass2Driver::new_inference_only(db_ref, self.resolved_php_version());
816                driver.analyze_bodies(
817                    &parsed.program,
818                    file.clone(),
819                    new_content,
820                    &parsed.source_map,
821                );
822                let inferred = driver.take_inferred_types();
823                db.commit_inferred_return_types(inferred.functions, inferred.methods);
824
825                let db_ref: &dyn MirDatabase = db;
826                let driver = Pass2Driver::new(db_ref, self.resolved_php_version());
827                let (body_issues, symbols) = driver.analyze_bodies(
828                    &parsed.program,
829                    file.clone(),
830                    new_content,
831                    &parsed.source_map,
832                );
833                all_issues.extend(body_issues);
834                symbols
835            } else {
836                Vec::new()
837            }
838        };
839
840        if let Some(cache) = &self.cache {
841            let h = hash_content(new_content);
842            cache.evict_with_dependents(&[file_path.to_string()]);
843            let guard = self.salsa.lock().expect("salsa lock poisoned");
844            let ref_locs = extract_reference_locations(&guard.0, &file);
845            cache.put(file_path, h, all_issues.clone(), ref_locs);
846        }
847
848        AnalysisResult::build(all_issues, HashMap::new(), symbols)
849    }
850
851    /// Analyze a PHP source string without a real file path.
852    /// Useful for tests and LSP single-file mode.
853    pub fn analyze_source(source: &str) -> AnalysisResult {
854        let analyzer = ProjectAnalyzer::new();
855        let file: Arc<str> = Arc::from("<source>");
856        let mut db = MirDb::default();
857        for slice in crate::stubs::builtin_stub_slices_for_version(analyzer.resolved_php_version())
858        {
859            db.ingest_stub_slice(&slice);
860        }
861        let salsa_file = SourceFile::new(&db, file.clone(), Arc::from(source));
862        let file_defs = collect_file_definitions(&db, salsa_file);
863        db.ingest_stub_slice(&file_defs.slice);
864        let mut all_issues = Arc::unwrap_or_clone(file_defs.issues);
865        if all_issues
866            .iter()
867            .any(|issue| matches!(issue.kind, mir_issues::IssueKind::ParseError { .. }))
868        {
869            return AnalysisResult::build(all_issues, std::collections::HashMap::new(), Vec::new());
870        }
871        let mut type_envs = std::collections::HashMap::new();
872        let mut all_symbols = Vec::new();
873        let arena = bumpalo::Bump::new();
874        let result = php_rs_parser::parse(&arena, source);
875
876        let driver = Pass2Driver::new_inference_only(&db, analyzer.resolved_php_version());
877        driver.analyze_bodies(&result.program, file.clone(), source, &result.source_map);
878        let inferred = driver.take_inferred_types();
879        db.commit_inferred_return_types(inferred.functions, inferred.methods);
880
881        let driver = Pass2Driver::new(&db, analyzer.resolved_php_version());
882        all_issues.extend(driver.analyze_bodies_typed(
883            &result.program,
884            file.clone(),
885            source,
886            &result.source_map,
887            &mut type_envs,
888            &mut all_symbols,
889        ));
890        AnalysisResult::build(all_issues, type_envs, all_symbols)
891    }
892
893    /// Discover all `.php` files under a directory, recursively.
894    pub fn discover_files(root: &Path) -> Vec<PathBuf> {
895        if root.is_file() {
896            return vec![root.to_path_buf()];
897        }
898        let mut files = Vec::new();
899        collect_php_files(root, &mut files);
900        files
901    }
902
903    /// Pass 1 only: collect type definitions from `paths` into the codebase without
904    /// analyzing method bodies or emitting issues. Used to load vendor types.
905    pub fn collect_types_only(&self, paths: &[PathBuf]) {
906        let file_data: Vec<(Arc<str>, Arc<str>)> = paths
907            .par_iter()
908            .filter_map(|path| {
909                let src = std::fs::read_to_string(path).ok()?;
910                Some((
911                    Arc::from(path.to_string_lossy().as_ref()),
912                    Arc::<str>::from(src),
913                ))
914            })
915            .collect();
916
917        let source_files: Vec<SourceFile> = {
918            let mut guard = self.salsa.lock().expect("salsa lock poisoned");
919            let (ref mut db, ref mut files) = *guard;
920            file_data
921                .iter()
922                .map(|(file, src)| match files.get(file) {
923                    Some(&sf) => {
924                        if sf.text(db).as_ref() != src.as_ref() {
925                            sf.set_text(db).to(src.clone());
926                        }
927                        sf
928                    }
929                    None => {
930                        let sf = SourceFile::new(db, file.clone(), src.clone());
931                        files.insert(file.clone(), sf);
932                        sf
933                    }
934                })
935                .collect()
936        };
937
938        let db_pass1 = {
939            let guard = self.salsa.lock().expect("salsa lock poisoned");
940            guard.0.clone()
941        };
942
943        let file_defs: Vec<FileDefinitions> = source_files
944            .par_iter()
945            .map_with(db_pass1, |db, salsa_file| {
946                collect_file_definitions_uncached(&*db, *salsa_file)
947            })
948            .collect();
949
950        let mut guard = self.salsa.lock().expect("salsa lock poisoned");
951        let (ref mut db, _) = *guard;
952        for defs in file_defs {
953            db.ingest_stub_slice(&defs.slice);
954        }
955        drop(guard);
956
957        // Print profiling statistics for the collection phase.
958        crate::collector::print_collector_stats();
959    }
960}
961
962impl Default for ProjectAnalyzer {
963    fn default() -> Self {
964        Self::new()
965    }
966}
967
968// Helper: Inference sweep with rayon::in_place_scope
969
970#[allow(clippy::type_complexity)]
971fn run_inference_sweep(
972    db_priming: MirDb,
973    parsed_files: Vec<&ParsedProjectFile>,
974    php_version: PhpVersion,
975) -> (Vec<(Arc<str>, Union)>, Vec<(Arc<str>, Arc<str>, Union)>) {
976    let functions = Arc::new(std::sync::Mutex::new(Vec::new()));
977    let methods = Arc::new(std::sync::Mutex::new(Vec::new()));
978
979    rayon::in_place_scope(|s| {
980        for parsed in parsed_files {
981            let db = db_priming.clone();
982            let functions = Arc::clone(&functions);
983            let methods = Arc::clone(&methods);
984
985            s.spawn(move |_| {
986                let driver = Pass2Driver::new_inference_only(&db as &dyn MirDatabase, php_version);
987                let parse_result = parsed.parsed();
988                driver.analyze_bodies(
989                    &parse_result.program,
990                    parsed.file.clone(),
991                    parsed.source(),
992                    &parse_result.source_map,
993                );
994
995                let inferred = driver.take_inferred_types();
996                if let Ok(mut funcs) = functions.lock() {
997                    funcs.extend(inferred.functions);
998                }
999                if let Ok(mut meths) = methods.lock() {
1000                    meths.extend(inferred.methods);
1001                }
1002            });
1003        }
1004    });
1005
1006    let functions = Arc::try_unwrap(functions)
1007        .map(|mutex| mutex.into_inner().unwrap_or_default())
1008        .unwrap_or_else(|arc| arc.lock().unwrap().clone());
1009    let methods = Arc::try_unwrap(methods)
1010        .map(|mutex| mutex.into_inner().unwrap_or_default())
1011        .unwrap_or_else(|arc| arc.lock().unwrap().clone());
1012
1013    (functions, methods)
1014}
1015
1016fn stub_slice_needs_inference(slice: &mir_codebase::storage::StubSlice) -> bool {
1017    slice
1018        .functions
1019        .iter()
1020        .any(|func| func.return_type.is_none())
1021        || slice.classes.iter().any(|class| {
1022            class
1023                .own_methods
1024                .values()
1025                .any(|method| !method.is_abstract && method.return_type.is_none())
1026        })
1027        || slice.traits.iter().any(|tr| {
1028            tr.own_methods
1029                .values()
1030                .any(|method| !method.is_abstract && method.return_type.is_none())
1031        })
1032        || slice.enums.iter().any(|en| {
1033            en.own_methods
1034                .values()
1035                .any(|method| !method.is_abstract && method.return_type.is_none())
1036        })
1037}
1038
1039pub(crate) fn collect_php_files(dir: &Path, out: &mut Vec<PathBuf>) {
1040    if let Ok(entries) = std::fs::read_dir(dir) {
1041        for entry in entries.flatten() {
1042            if entry.file_type().map(|ft| ft.is_symlink()).unwrap_or(false) {
1043                continue;
1044            }
1045            let path = entry.path();
1046            if path.is_dir() {
1047                let name = path.file_name().and_then(|n| n.to_str()).unwrap_or("");
1048                if matches!(
1049                    name,
1050                    "vendor" | ".git" | "node_modules" | ".cache" | ".pnpm-store"
1051                ) {
1052                    continue;
1053                }
1054                collect_php_files(&path, out);
1055            } else if path.extension().and_then(|e| e.to_str()) == Some("php") {
1056                out.push(path);
1057            }
1058        }
1059    }
1060}
1061
1062// build_reverse_deps
1063
1064fn build_reverse_deps(db: &dyn crate::db::MirDatabase) -> HashMap<String, HashSet<String>> {
1065    let mut reverse: HashMap<String, HashSet<String>> = HashMap::new();
1066
1067    let mut add_edge = |symbol: &str, dependent_file: &str| {
1068        if let Some(defining_file) = db.symbol_defining_file(symbol) {
1069            let def = defining_file.as_ref().to_string();
1070            if def != dependent_file {
1071                reverse
1072                    .entry(def)
1073                    .or_default()
1074                    .insert(dependent_file.to_string());
1075            }
1076        }
1077    };
1078
1079    for (file, imports) in db.file_import_snapshots() {
1080        let file = file.as_ref().to_string();
1081        for fqcn in imports.values() {
1082            add_edge(fqcn, &file);
1083        }
1084    }
1085
1086    for fqcn in db.active_class_node_fqcns() {
1087        // Only true classes contribute class-direction edges in this loop.
1088        // Interface / trait / enum edges are not currently emitted here —
1089        // this function only ever read classes.
1090        let kind = match crate::db::class_kind_via_db(db, fqcn.as_ref()) {
1091            Some(k) if !k.is_interface && !k.is_trait && !k.is_enum => k,
1092            _ => continue,
1093        };
1094        let _ = kind;
1095        let Some(file) = db
1096            .symbol_defining_file(fqcn.as_ref())
1097            .map(|f| f.as_ref().to_string())
1098        else {
1099            continue;
1100        };
1101
1102        let Some(node) = db.lookup_class_node(fqcn.as_ref()) else {
1103            continue;
1104        };
1105        if let Some(parent) = node.parent(db) {
1106            add_edge(parent.as_ref(), &file);
1107        }
1108        for iface in node.interfaces(db).iter() {
1109            add_edge(iface.as_ref(), &file);
1110        }
1111        for tr in node.traits(db).iter() {
1112            add_edge(tr.as_ref(), &file);
1113        }
1114    }
1115
1116    reverse
1117}
1118
1119fn extract_reference_locations(
1120    db: &dyn crate::db::MirDatabase,
1121    file: &Arc<str>,
1122) -> Vec<(String, u32, u16, u16)> {
1123    db.extract_file_reference_locations(file.as_ref())
1124        .into_iter()
1125        .map(|(sym, line, col_start, col_end)| (sym.to_string(), line, col_start, col_end))
1126        .collect()
1127}
1128
1129pub struct AnalysisResult {
1130    pub issues: Vec<Issue>,
1131    pub type_envs: std::collections::HashMap<crate::type_env::ScopeId, crate::type_env::TypeEnv>,
1132    /// Per-expression resolved symbols from Pass 2, sorted by file path.
1133    pub symbols: Vec<crate::symbol::ResolvedSymbol>,
1134    /// Maps each file path to the contiguous range within `symbols` that belongs
1135    /// to it. Built once after analysis; allows `symbol_at` to scan only the
1136    /// relevant file's slice rather than the entire codebase-wide vector.
1137    symbols_by_file: HashMap<Arc<str>, std::ops::Range<usize>>,
1138}
1139
1140impl AnalysisResult {
1141    fn build(
1142        issues: Vec<Issue>,
1143        type_envs: std::collections::HashMap<crate::type_env::ScopeId, crate::type_env::TypeEnv>,
1144        mut symbols: Vec<crate::symbol::ResolvedSymbol>,
1145    ) -> Self {
1146        symbols.sort_unstable_by(|a, b| a.file.as_ref().cmp(b.file.as_ref()));
1147        let mut symbols_by_file: HashMap<Arc<str>, std::ops::Range<usize>> = HashMap::new();
1148        let mut i = 0;
1149        while i < symbols.len() {
1150            let file = Arc::clone(&symbols[i].file);
1151            let start = i;
1152            while i < symbols.len() && symbols[i].file == file {
1153                i += 1;
1154            }
1155            symbols_by_file.insert(file, start..i);
1156        }
1157        Self {
1158            issues,
1159            type_envs,
1160            symbols,
1161            symbols_by_file,
1162        }
1163    }
1164}
1165
1166impl AnalysisResult {
1167    pub fn error_count(&self) -> usize {
1168        self.issues
1169            .iter()
1170            .filter(|i| i.severity == mir_issues::Severity::Error)
1171            .count()
1172    }
1173
1174    pub fn warning_count(&self) -> usize {
1175        self.issues
1176            .iter()
1177            .filter(|i| i.severity == mir_issues::Severity::Warning)
1178            .count()
1179    }
1180
1181    /// Group issues by source file.
1182    pub fn issues_by_file(&self) -> HashMap<std::sync::Arc<str>, Vec<&Issue>> {
1183        let mut map: HashMap<std::sync::Arc<str>, Vec<&Issue>> = HashMap::new();
1184        for issue in &self.issues {
1185            map.entry(issue.location.file.clone())
1186                .or_default()
1187                .push(issue);
1188        }
1189        map
1190    }
1191
1192    /// Return the innermost resolved symbol whose span contains `byte_offset`
1193    /// in `file`, or `None` if no symbol was recorded at that position.
1194    pub fn symbol_at(
1195        &self,
1196        file: &str,
1197        byte_offset: u32,
1198    ) -> Option<&crate::symbol::ResolvedSymbol> {
1199        let range = self.symbols_by_file.get(file)?;
1200        self.symbols[range.clone()]
1201            .iter()
1202            .filter(|s| s.span.start <= byte_offset && byte_offset < s.span.end)
1203            .min_by_key(|s| s.span.end - s.span.start)
1204    }
1205}