Skip to main content

mir_analyzer/
project.rs

1/// Project-level orchestration: file discovery, pass 1, pass 2.
2use std::mem::ManuallyDrop;
3use std::path::{Path, PathBuf};
4use std::sync::Arc;
5
6use rayon::prelude::*;
7
8use std::collections::{HashMap, HashSet};
9
10use crate::cache::{hash_content, AnalysisCache};
11use crate::db::{
12    collect_file_definitions, collect_file_definitions_uncached, FileDefinitions, MirDatabase,
13    MirDb, SourceFile,
14};
15use crate::pass2::Pass2Driver;
16use crate::php_version::PhpVersion;
17use mir_issues::Issue;
18use mir_types::Union;
19use salsa::Setter as _;
20
21pub use crate::pass2::merge_return_types;
22
23pub struct ProjectAnalyzer {
24    /// Optional cache — when `Some`, Pass 2 results are read/written per file.
25    pub cache: Option<AnalysisCache>,
26    /// Called once after each file completes Pass 2 (used for progress reporting).
27    pub on_file_done: Option<Arc<dyn Fn() + Send + Sync>>,
28    /// PSR-4 autoloader mapping from composer.json, if available.
29    pub psr4: Option<Arc<crate::composer::Psr4Map>>,
30    /// Whether stubs have already been loaded (to avoid double-loading).
31    stubs_loaded: std::sync::atomic::AtomicBool,
32    /// When true, run dead code detection at the end of analysis.
33    pub find_dead_code: bool,
34    /// Target PHP language version. `None` means "not configured"; resolved to
35    /// `PhpVersion::LATEST` when passed down to `StatementsAnalyzer`.
36    pub php_version: Option<PhpVersion>,
37    /// Additional stub files to parse before analysis (absolute paths).
38    pub stub_files: Vec<PathBuf>,
39    /// Additional stub directories to walk and parse before analysis (absolute paths).
40    pub stub_dirs: Vec<PathBuf>,
41    /// Salsa database for incremental Pass-1 memoization.
42    /// `MirDb` is `Send` but `!Sync` (thread-local query state); `Mutex`
43    /// provides the `Sync` bound rayon requires without needing `T: Sync`.
44    salsa: std::sync::Mutex<(MirDb, HashMap<Arc<str>, SourceFile>)>,
45}
46
47struct ParsedProjectFile {
48    file: Arc<str>,
49    source: Arc<str>,
50    parsed: ManuallyDrop<php_rs_parser::ParseResult<'static, 'static>>,
51    arena: ManuallyDrop<Box<bumpalo::Bump>>,
52}
53
54impl ParsedProjectFile {
55    fn new(file: Arc<str>, source: Arc<str>) -> Self {
56        let arena = Box::new(bumpalo::Bump::new());
57        let parsed = php_rs_parser::parse(&arena, &source);
58        // SAFETY: `parsed` borrows from `arena` and `source`, both owned by this
59        // struct and kept alive until `Drop`. `Drop` manually destroys `parsed`
60        // before releasing either owner, so the widened lifetimes never escape.
61        let parsed = unsafe {
62            std::mem::transmute::<
63                php_rs_parser::ParseResult<'_, '_>,
64                php_rs_parser::ParseResult<'static, 'static>,
65            >(parsed)
66        };
67        Self {
68            file,
69            source,
70            parsed: ManuallyDrop::new(parsed),
71            arena: ManuallyDrop::new(arena),
72        }
73    }
74
75    fn source(&self) -> &str {
76        self.source.as_ref()
77    }
78
79    fn parsed(&self) -> &php_rs_parser::ParseResult<'_, '_> {
80        &self.parsed
81    }
82}
83
84impl Drop for ParsedProjectFile {
85    fn drop(&mut self) {
86        unsafe {
87            ManuallyDrop::drop(&mut self.parsed);
88            ManuallyDrop::drop(&mut self.arena);
89        }
90    }
91}
92
93// SAFETY: after construction the parsed AST and source map are read-only. The
94// bump arena is never mutated again; it only owns backing storage for AST nodes
95// and is dropped after all parallel analysis has completed.
96unsafe impl Send for ParsedProjectFile {}
97unsafe impl Sync for ParsedProjectFile {}
98
99impl ProjectAnalyzer {
100    pub fn new() -> Self {
101        Self {
102            cache: None,
103            on_file_done: None,
104            psr4: None,
105            stubs_loaded: std::sync::atomic::AtomicBool::new(false),
106            find_dead_code: false,
107            php_version: None,
108            stub_files: Vec::new(),
109            stub_dirs: Vec::new(),
110            salsa: std::sync::Mutex::new((MirDb::default(), HashMap::new())),
111        }
112    }
113
114    /// Create a `ProjectAnalyzer` with a disk-backed cache stored under `cache_dir`.
115    pub fn with_cache(cache_dir: &Path) -> Self {
116        Self {
117            cache: Some(AnalysisCache::open(cache_dir)),
118            on_file_done: None,
119            psr4: None,
120            stubs_loaded: std::sync::atomic::AtomicBool::new(false),
121            find_dead_code: false,
122            php_version: None,
123            stub_files: Vec::new(),
124            stub_dirs: Vec::new(),
125            salsa: std::sync::Mutex::new((MirDb::default(), HashMap::new())),
126        }
127    }
128
129    /// Create a `ProjectAnalyzer` from a project root containing `composer.json`.
130    /// Returns the analyzer (with `psr4` set) and the `Psr4Map` so callers can
131    /// call `map.project_files()` / `map.vendor_files()`.
132    pub fn from_composer(
133        root: &Path,
134    ) -> Result<(Self, crate::composer::Psr4Map), crate::composer::ComposerError> {
135        let map = crate::composer::Psr4Map::from_composer(root)?;
136        let psr4 = Arc::new(map.clone());
137        let analyzer = Self {
138            cache: None,
139            on_file_done: None,
140            psr4: Some(psr4),
141            stubs_loaded: std::sync::atomic::AtomicBool::new(false),
142            find_dead_code: false,
143            php_version: None,
144            stub_files: Vec::new(),
145            stub_dirs: Vec::new(),
146            salsa: std::sync::Mutex::new((MirDb::default(), HashMap::new())),
147        };
148        Ok((analyzer, map))
149    }
150
151    /// Set the target PHP version.
152    pub fn with_php_version(mut self, version: PhpVersion) -> Self {
153        self.php_version = Some(version);
154        self
155    }
156
157    /// Resolve the configured PHP version, defaulting to `PhpVersion::LATEST`
158    /// when none has been set.
159    fn resolved_php_version(&self) -> PhpVersion {
160        self.php_version.unwrap_or(PhpVersion::LATEST)
161    }
162
163    fn type_exists(&self, fqcn: &str) -> bool {
164        let guard = self.salsa.lock().expect("salsa lock poisoned");
165        crate::db::type_exists_via_db(&guard.0, fqcn)
166    }
167
168    /// Internal: expose the salsa Mutex for unit tests that need a `&dyn MirDatabase`.
169    #[doc(hidden)]
170    pub fn salsa_db_for_test(&self) -> &std::sync::Mutex<(MirDb, HashMap<Arc<str>, SourceFile>)> {
171        &self.salsa
172    }
173
174    /// Look up the source location of a class member (method, property, or
175    /// class constant / enum case) by walking the inheritance chain through
176    /// the salsa db.  Returns `None` if no member with that name exists, or
177    /// if the member has no recorded location.
178    pub fn member_location(
179        &self,
180        fqcn: &str,
181        member_name: &str,
182    ) -> Option<mir_codebase::storage::Location> {
183        let guard = self.salsa.lock().expect("salsa lock poisoned");
184        crate::db::member_location_via_db(&guard.0, fqcn, member_name)
185    }
186
187    pub fn symbol_location(&self, symbol: &str) -> Option<mir_codebase::storage::Location> {
188        let guard = self.salsa.lock().expect("salsa lock poisoned");
189        let db = &guard.0;
190        db.lookup_class_node(symbol)
191            .filter(|n| n.active(db))
192            .and_then(|n| n.location(db))
193            .or_else(|| {
194                db.lookup_function_node(symbol)
195                    .filter(|n| n.active(db))
196                    .and_then(|n| n.location(db))
197            })
198    }
199
200    pub fn reference_locations(&self, symbol: &str) -> Vec<(Arc<str>, u32, u16, u16)> {
201        let guard = self.salsa.lock().expect("salsa lock poisoned");
202        guard.0.reference_locations(symbol)
203    }
204
205    /// Load PHP built-in stubs. Called automatically by `analyze` if not done yet.
206    /// Stubs are filtered against the configured target PHP version (or
207    /// `PhpVersion::LATEST` if none was set).
208    pub fn load_stubs(&self) {
209        if !self
210            .stubs_loaded
211            .swap(true, std::sync::atomic::Ordering::SeqCst)
212        {
213            let php_version = self.resolved_php_version();
214            crate::stubs::stub_files()
215                .par_iter()
216                .for_each(|(filename, content)| {
217                    let slice =
218                        crate::stubs::stub_slice_from_source(filename, content, Some(php_version));
219                    let mut guard = self.salsa.lock().expect("salsa lock poisoned");
220                    guard.0.ingest_stub_slice(&slice);
221                });
222
223            let mut guard = self.salsa.lock().expect("salsa lock poisoned");
224            for slice in crate::stubs::user_stub_slices(&self.stub_files, &self.stub_dirs) {
225                guard.0.ingest_stub_slice(&slice);
226            }
227        }
228    }
229
230    fn collect_and_ingest_source(&self, file: Arc<str>, src: &str) -> FileDefinitions {
231        let file_defs = {
232            let mut guard = self.salsa.lock().expect("salsa lock poisoned");
233            let (ref mut db, ref mut files) = *guard;
234            let salsa_file = match files.get(&file) {
235                Some(&sf) => {
236                    if sf.text(db).as_ref() != src {
237                        sf.set_text(db).to(Arc::from(src));
238                    }
239                    sf
240                }
241                None => {
242                    let sf = SourceFile::new(db, file.clone(), Arc::from(src));
243                    files.insert(file.clone(), sf);
244                    sf
245                }
246            };
247            collect_file_definitions(db, salsa_file)
248        };
249
250        {
251            let mut guard = self.salsa.lock().expect("salsa lock poisoned");
252            guard.0.ingest_stub_slice(&file_defs.slice);
253        }
254        file_defs
255    }
256
257    /// Run the full analysis pipeline on a set of file paths.
258    pub fn analyze(&self, paths: &[PathBuf]) -> AnalysisResult {
259        let mut all_issues = Vec::new();
260
261        // ---- Load PHP built-in stubs (before Pass 1 so user code can override)
262        self.load_stubs();
263
264        // ---- Pass 1: read files in parallel ----------------------------------
265        let parsed_files: Vec<ParsedProjectFile> = paths
266            .par_iter()
267            .filter_map(|path| match std::fs::read_to_string(path) {
268                Ok(src) => {
269                    let file = Arc::from(path.to_string_lossy().as_ref());
270                    Some(ParsedProjectFile::new(file, Arc::from(src)))
271                }
272                Err(e) => {
273                    eprintln!("Cannot read {}: {}", path.display(), e);
274                    None
275                }
276            })
277            .collect();
278
279        let file_data: Vec<(Arc<str>, Arc<str>)> = parsed_files
280            .iter()
281            .map(|parsed| (parsed.file.clone(), parsed.source.clone()))
282            .collect();
283
284        // ---- Pre-Pass-2 invalidation: evict dependents of changed files ------
285        if let Some(cache) = &self.cache {
286            let changed: Vec<String> = file_data
287                .par_iter()
288                .filter_map(|(f, src)| {
289                    let h = hash_content(src.as_ref());
290                    if cache.get(f, &h).is_none() {
291                        Some(f.to_string())
292                    } else {
293                        None
294                    }
295                })
296                .collect();
297            if !changed.is_empty() {
298                cache.evict_with_dependents(&changed);
299            }
300        }
301
302        // ---- Register Salsa source inputs for incremental follow-up calls ----
303        {
304            let mut guard = self.salsa.lock().expect("salsa lock poisoned");
305            let (ref mut db, ref mut files) = *guard;
306            for parsed in &parsed_files {
307                match files.get(parsed.file.as_ref()) {
308                    Some(&sf) => {
309                        if sf.text(db).as_ref() != parsed.source() {
310                            sf.set_text(db).to(parsed.source.clone());
311                        }
312                    }
313                    None => {
314                        let sf = SourceFile::new(db, parsed.file.clone(), parsed.source.clone());
315                        files.insert(parsed.file.clone(), sf);
316                    }
317                }
318            }
319        }
320
321        // ---- Pass 1: definition collection from the already-parsed AST -------
322        let file_defs: Vec<FileDefinitions> = parsed_files
323            .par_iter()
324            .map(|parsed| {
325                let parse_result = parsed.parsed();
326                let mut all_issues: Vec<Issue> = parse_result
327                    .errors
328                    .iter()
329                    .map(|err| {
330                        Issue::new(
331                            mir_issues::IssueKind::ParseError {
332                                message: err.to_string(),
333                            },
334                            mir_issues::Location {
335                                file: parsed.file.clone(),
336                                line: 1,
337                                line_end: 1,
338                                col_start: 0,
339                                col_end: 0,
340                            },
341                        )
342                    })
343                    .collect();
344                let collector = crate::collector::DefinitionCollector::new_for_slice(
345                    parsed.file.clone(),
346                    parsed.source(),
347                    &parse_result.source_map,
348                );
349                let (slice, collector_issues) = collector.collect_slice(&parse_result.program);
350                all_issues.extend(collector_issues);
351                FileDefinitions {
352                    slice: Arc::new(slice),
353                    issues: Arc::new(all_issues),
354                }
355            })
356            .collect();
357
358        let mut files_with_parse_errors: std::collections::HashSet<Arc<str>> =
359            std::collections::HashSet::new();
360        let mut files_needing_inference: std::collections::HashSet<Arc<str>> =
361            std::collections::HashSet::new();
362        {
363            let mut guard = self.salsa.lock().expect("salsa lock poisoned");
364            let (ref mut db, _) = *guard;
365            for defs in file_defs {
366                for issue in defs.issues.iter() {
367                    if matches!(issue.kind, mir_issues::IssueKind::ParseError { .. }) {
368                        files_with_parse_errors.insert(issue.location.file.clone());
369                    }
370                }
371                if stub_slice_needs_inference(&defs.slice) {
372                    if let Some(file) = defs.slice.file.as_ref() {
373                        files_needing_inference.insert(file.clone());
374                    }
375                }
376                db.ingest_stub_slice(&defs.slice);
377                all_issues.extend(Arc::unwrap_or_clone(defs.issues));
378            }
379        }
380
381        // ---- Lazy-load unknown classes via PSR-4 (issue #50) ----------------
382        if let Some(psr4) = &self.psr4 {
383            self.lazy_load_missing_classes(psr4.clone(), &mut all_issues);
384        }
385
386        // ---- Resolve @psalm-import-type declarations now that all Pass 1
387        // classes (including their `type_aliases`) are populated.
388        // ---- Build reverse dep graph and persist it for the next run ---------
389        if let Some(cache) = &self.cache {
390            let db_snapshot = {
391                let guard = self.salsa.lock().expect("salsa lock poisoned");
392                guard.0.clone()
393            };
394            let rev = build_reverse_deps(&db_snapshot);
395            cache.set_reverse_deps(rev);
396        }
397
398        // ---- Class-level checks (M11) ----------------------------------------
399        // `class_db` is scoped tightly: it must be dropped before the priming
400        // sweep's `commit_inferred_return_types` call below, otherwise the
401        // setter's `Storage::cancel_others` blocks waiting for this clone's
402        // Arc to drop (strong-count==1 invariant).
403        let analyzed_file_set: std::collections::HashSet<std::sync::Arc<str>> =
404            file_data.iter().map(|(f, _)| f.clone()).collect();
405        {
406            let class_db = {
407                let guard = self.salsa.lock().expect("salsa lock poisoned");
408                guard.0.clone()
409            };
410            let class_issues =
411                crate::class::ClassAnalyzer::with_files(&class_db, analyzed_file_set, &file_data)
412                    .analyze_all();
413            all_issues.extend(class_issues);
414        }
415
416        // ---- S5-PR10b: clone the salsa db once per parallel sweep so each
417        // rayon worker gets its own clone (Salsa databases are `Send` but
418        // `!Sync`; cloning shares the underlying memoization storage).
419        let db_priming = {
420            let guard = self.salsa.lock().expect("salsa lock poisoned");
421            guard.0.clone()
422        };
423
424        // ---- Pass 2 priming: populate inferred_return_type for all functions  --
425        // Run a first inference-only sweep so that cross-file inferred return
426        // types are available before the issue-emitting pass below (G6).
427        //
428        // Inferred types are collected into a thread-safe buffer during the
429        // parallel sweep and committed to the Salsa db serially after the sweep
430        // returns. Using `rayon::in_place_scope` ensures all worker threads and
431        // their thread-local Salsa state drop before we commit to the canonical db.
432        let filtered_parsed: Vec<_> = parsed_files
433            .par_iter()
434            .filter(|parsed| {
435                !files_with_parse_errors.contains(&parsed.file)
436                    && files_needing_inference.contains(&parsed.file)
437            })
438            .collect();
439
440        let (functions, methods) =
441            run_inference_sweep(db_priming, filtered_parsed, self.resolved_php_version());
442
443        {
444            let mut guard = self.salsa.lock().expect("salsa lock poisoned");
445            guard.0.commit_inferred_return_types(functions, methods);
446        }
447
448        let db_main = {
449            let guard = self.salsa.lock().expect("salsa lock poisoned");
450            guard.0.clone()
451        };
452
453        // ---- Pass 2: analyze function/method bodies in parallel (M14) --------
454        let pass2_results: Vec<(Vec<Issue>, Vec<crate::symbol::ResolvedSymbol>)> = parsed_files
455            .par_iter()
456            .filter(|parsed| !files_with_parse_errors.contains(&parsed.file))
457            .map_with(db_main, |db, parsed| {
458                let driver =
459                    Pass2Driver::new(&*db as &dyn MirDatabase, self.resolved_php_version());
460                let result = if let Some(cache) = &self.cache {
461                    let h = hash_content(parsed.source());
462                    if let Some((cached_issues, ref_locs)) = cache.get(&parsed.file, &h) {
463                        db.replay_reference_locations(parsed.file.clone(), &ref_locs);
464                        (cached_issues, Vec::new())
465                    } else {
466                        let parse_result = parsed.parsed();
467                        let (issues, symbols) = driver.analyze_bodies(
468                            &parse_result.program,
469                            parsed.file.clone(),
470                            parsed.source(),
471                            &parse_result.source_map,
472                        );
473                        let ref_locs = extract_reference_locations(&*db, &parsed.file);
474                        cache.put(&parsed.file, h, issues.clone(), ref_locs);
475                        (issues, symbols)
476                    }
477                } else {
478                    let parse_result = parsed.parsed();
479                    driver.analyze_bodies(
480                        &parse_result.program,
481                        parsed.file.clone(),
482                        parsed.source(),
483                        &parse_result.source_map,
484                    )
485                };
486                if let Some(cb) = &self.on_file_done {
487                    cb();
488                }
489                result
490            })
491            .collect();
492
493        let mut all_symbols = Vec::new();
494        for (issues, symbols) in pass2_results {
495            all_issues.extend(issues);
496            all_symbols.extend(symbols);
497        }
498
499        // ---- Post-Pass-2 lazy loading: FQCNs used without `use` imports ------
500        // FQCNs in function/method bodies aren't visible until Pass 2 runs, so
501        // the pre-Pass-2 lazy load misses them.  We collect UndefinedClass names,
502        // resolve them via PSR-4, load those files, re-finalize, then re-analyze
503        // only the affected files to clear the false positives.
504        if let Some(psr4) = &self.psr4 {
505            self.lazy_load_from_body_issues(
506                psr4.clone(),
507                &file_data,
508                &files_with_parse_errors,
509                &mut all_issues,
510                &mut all_symbols,
511            );
512        }
513
514        // Persist cache hits/misses to disk
515        if let Some(cache) = &self.cache {
516            cache.flush();
517        }
518
519        // ---- Compact the reference index ------------------------------------
520        // ---- Dead-code detection (M18) --------------------------------------
521        if self.find_dead_code {
522            let salsa = self.salsa.lock().unwrap();
523            let dead_code_issues = crate::dead_code::DeadCodeAnalyzer::new(&salsa.0).analyze();
524            drop(salsa);
525            all_issues.extend(dead_code_issues);
526        }
527
528        AnalysisResult::build(all_issues, std::collections::HashMap::new(), all_symbols)
529    }
530
531    fn lazy_load_missing_classes(
532        &self,
533        psr4: Arc<crate::composer::Psr4Map>,
534        all_issues: &mut Vec<Issue>,
535    ) {
536        use std::collections::HashSet;
537
538        let max_depth = 10;
539        let mut loaded: HashSet<String> = HashSet::new();
540
541        for _ in 0..max_depth {
542            let mut to_load: Vec<(String, PathBuf)> = Vec::new();
543
544            let mut try_queue = |fqcn: &str| {
545                if !self.type_exists(fqcn) && !loaded.contains(fqcn) {
546                    if let Some(path) = psr4.resolve(fqcn) {
547                        to_load.push((fqcn.to_string(), path));
548                    }
549                }
550            };
551
552            // Drive the inheritance scan from already-ingested `ClassNode`s.
553            let mut inheritance_candidates = Vec::new();
554            let import_candidates = {
555                let guard = self.salsa.lock().expect("salsa lock poisoned");
556                let db = &guard.0;
557                for fqcn in db.active_class_node_fqcns() {
558                    let Some(node) = db.lookup_class_node(&fqcn) else {
559                        continue;
560                    };
561                    if node.is_interface(db) {
562                        for parent in node.extends(db).iter() {
563                            inheritance_candidates.push(parent.to_string());
564                        }
565                    } else if node.is_enum(db) {
566                        for iface in node.interfaces(db).iter() {
567                            inheritance_candidates.push(iface.to_string());
568                        }
569                    } else if node.is_trait(db) {
570                        for used in node.traits(db).iter() {
571                            inheritance_candidates.push(used.to_string());
572                        }
573                    } else {
574                        if let Some(parent) = node.parent(db) {
575                            inheritance_candidates.push(parent.to_string());
576                        }
577                        for iface in node.interfaces(db).iter() {
578                            inheritance_candidates.push(iface.to_string());
579                        }
580                    }
581                }
582                db.file_import_snapshots()
583                    .into_iter()
584                    .flat_map(|(_, imports)| imports.into_values())
585                    .collect::<Vec<_>>()
586            };
587            for fqcn in inheritance_candidates {
588                try_queue(&fqcn);
589            }
590
591            // Also lazy-load any type referenced via `use` imports that isn't yet
592            // in the codebase (covers enums and classes used only in type hints or
593            // static calls, which never appear in the inheritance scan above).
594            for fqcn in import_candidates {
595                try_queue(&fqcn);
596            }
597
598            if to_load.is_empty() {
599                break;
600            }
601
602            for (fqcn, path) in to_load {
603                loaded.insert(fqcn);
604                if let Ok(src) = std::fs::read_to_string(&path) {
605                    let file: Arc<str> = Arc::from(path.to_string_lossy().as_ref());
606                    let defs = self.collect_and_ingest_source(file, &src);
607                    all_issues.extend(Arc::unwrap_or_clone(defs.issues));
608                }
609            }
610        }
611    }
612
613    fn lazy_load_from_body_issues(
614        &self,
615        psr4: Arc<crate::composer::Psr4Map>,
616        file_data: &[(Arc<str>, Arc<str>)],
617        files_with_parse_errors: &HashSet<Arc<str>>,
618        all_issues: &mut Vec<Issue>,
619        all_symbols: &mut Vec<crate::symbol::ResolvedSymbol>,
620    ) {
621        use mir_issues::IssueKind;
622
623        let max_depth = 5;
624        let mut loaded: HashSet<String> = HashSet::new();
625
626        for _ in 0..max_depth {
627            // Deduplicate by FQCN: HashMap prevents loading the same class twice
628            // when multiple files share the same UndefinedClass diagnostic.
629            let mut to_load: HashMap<String, PathBuf> = HashMap::new();
630
631            for issue in all_issues.iter() {
632                if let IssueKind::UndefinedClass { name } = &issue.kind {
633                    if !self.type_exists(name) && !loaded.contains(name) {
634                        if let Some(path) = psr4.resolve(name) {
635                            to_load.entry(name.clone()).or_insert(path);
636                        }
637                    }
638                }
639            }
640
641            if to_load.is_empty() {
642                break;
643            }
644
645            loaded.extend(to_load.keys().cloned());
646
647            for path in to_load.values() {
648                if let Ok(src) = std::fs::read_to_string(path) {
649                    let file: Arc<str> = Arc::from(path.to_string_lossy().as_ref());
650                    let _ = self.collect_and_ingest_source(file, &src);
651                }
652            }
653
654            // Load inheritance deps of newly-added types and finalize.
655            // This covers e.g. `class Helper extends \App\Base` where Base is
656            // also not in the initial file set.
657            self.lazy_load_missing_classes(psr4.clone(), all_issues);
658
659            // Re-analyze every file that has an UndefinedClass for a type now
660            // present in the codebase — covers both direct and transitive loads.
661            let files_to_reanalyze: HashSet<Arc<str>> = all_issues
662                .iter()
663                .filter_map(|i| {
664                    if let IssueKind::UndefinedClass { name } = &i.kind {
665                        if self.type_exists(name) {
666                            return Some(i.location.file.clone());
667                        }
668                    }
669                    None
670                })
671                .collect();
672
673            if files_to_reanalyze.is_empty() {
674                break;
675            }
676
677            all_issues.retain(|i| !files_to_reanalyze.contains(&i.location.file));
678            all_symbols.retain(|s| !files_to_reanalyze.contains(&s.file));
679
680            let db_reanalysis = {
681                let guard = self.salsa.lock().expect("salsa lock poisoned");
682                guard.0.clone()
683            };
684
685            // Lazy-loaded files re-run Pass 2 to pick up the just-loaded
686            // definitions; collect inferred return types for a serial commit
687            // For lazy-loaded files, we run the priming sweep inline during reanalysis.
688            let reanalysis: Vec<(Vec<Issue>, Vec<crate::symbol::ResolvedSymbol>)> = file_data
689                .par_iter()
690                .filter(|(f, _)| {
691                    !files_with_parse_errors.contains(f) && files_to_reanalyze.contains(f)
692                })
693                .map_with(db_reanalysis, |db, (file, src)| {
694                    let driver = Pass2Driver::new_inference_only(
695                        &*db as &dyn MirDatabase,
696                        self.resolved_php_version(),
697                    );
698                    let arena = bumpalo::Bump::new();
699                    let parsed = php_rs_parser::parse(&arena, src);
700                    driver.analyze_bodies(&parsed.program, file.clone(), src, &parsed.source_map);
701
702                    let inferred = driver.take_inferred_types();
703                    let mut guard_db = self.salsa.lock().expect("salsa lock poisoned");
704                    guard_db
705                        .0
706                        .commit_inferred_return_types(inferred.functions, inferred.methods);
707                    drop(guard_db);
708
709                    let driver =
710                        Pass2Driver::new(&*db as &dyn MirDatabase, self.resolved_php_version());
711                    let arena = bumpalo::Bump::new();
712                    let parsed = php_rs_parser::parse(&arena, src);
713                    driver.analyze_bodies(&parsed.program, file.clone(), src, &parsed.source_map)
714                })
715                .collect();
716
717            for (issues, symbols) in reanalysis {
718                all_issues.extend(issues);
719                all_symbols.extend(symbols);
720            }
721        }
722    }
723
724    /// Re-analyze a single file within the existing codebase.
725    ///
726    /// This is the incremental analysis API for LSP:
727    /// 1. Removes old definitions from this file
728    /// 2. Re-runs Pass 1 (definition collection) on the new content
729    /// 3. Resolves any newly-collected `@psalm-import-type` declarations
730    /// 4. Re-runs Pass 2 (body analysis) on this file
731    /// 5. Returns the analysis result for this file only
732    pub fn re_analyze_file(&self, file_path: &str, new_content: &str) -> AnalysisResult {
733        // Fast path: content unchanged and cache has a valid entry — skip full re-analysis.
734        if let Some(cache) = &self.cache {
735            let h = hash_content(new_content);
736            if let Some((issues, ref_locs)) = cache.get(file_path, &h) {
737                let file: Arc<str> = Arc::from(file_path);
738                let guard = self.salsa.lock().expect("salsa lock poisoned");
739                guard.0.replay_reference_locations(file, &ref_locs);
740                return AnalysisResult::build(issues, HashMap::new(), Vec::new());
741            }
742        }
743
744        let file: Arc<str> = Arc::from(file_path);
745
746        {
747            let mut guard = self.salsa.lock().expect("salsa lock poisoned");
748            let (ref mut db, _) = *guard;
749            db.remove_file_definitions(file_path);
750        }
751
752        // --- Salsa-backed Pass 1: memoized parse + definition collection ------
753        let file_defs = {
754            let mut guard = self.salsa.lock().expect("salsa lock poisoned");
755            let (ref mut db, ref mut files) = *guard;
756            let salsa_file = match files.get(&file) {
757                Some(&sf) => {
758                    sf.set_text(db).to(Arc::from(new_content));
759                    sf
760                }
761                None => {
762                    let sf = SourceFile::new(db, file.clone(), Arc::from(new_content));
763                    files.insert(file.clone(), sf);
764                    sf
765                }
766            };
767            collect_file_definitions(db, salsa_file)
768        };
769
770        let mut all_issues: Vec<Issue> = Arc::unwrap_or_clone(file_defs.issues.clone());
771
772        // --- S2 + Pass 2: hold the Salsa lock for ClassNode upserts and body
773        // analysis so the db reference is live during Pass 2 (S5).
774        let symbols = {
775            let mut guard = self.salsa.lock().expect("salsa lock poisoned");
776            let (ref mut db, _) = *guard;
777
778            db.ingest_stub_slice(&file_defs.slice);
779
780            // Resolve any newly-collected @psalm-import-type declarations so
781            // Pass 2 reads the imported aliases out of `type_aliases`.
782            // Re-parse in the arena so Pass 2 can walk the AST.
783            let arena = bumpalo::Bump::new();
784            let parsed = php_rs_parser::parse(&arena, new_content);
785
786            if parsed.errors.is_empty() {
787                let db_ref: &dyn MirDatabase = db;
788                let driver = Pass2Driver::new_inference_only(db_ref, self.resolved_php_version());
789                driver.analyze_bodies(
790                    &parsed.program,
791                    file.clone(),
792                    new_content,
793                    &parsed.source_map,
794                );
795                let inferred = driver.take_inferred_types();
796                db.commit_inferred_return_types(inferred.functions, inferred.methods);
797
798                let db_ref: &dyn MirDatabase = db;
799                let driver = Pass2Driver::new(db_ref, self.resolved_php_version());
800                let (body_issues, symbols) = driver.analyze_bodies(
801                    &parsed.program,
802                    file.clone(),
803                    new_content,
804                    &parsed.source_map,
805                );
806                all_issues.extend(body_issues);
807                symbols
808            } else {
809                Vec::new()
810            }
811        };
812
813        if let Some(cache) = &self.cache {
814            let h = hash_content(new_content);
815            cache.evict_with_dependents(&[file_path.to_string()]);
816            let guard = self.salsa.lock().expect("salsa lock poisoned");
817            let ref_locs = extract_reference_locations(&guard.0, &file);
818            cache.put(file_path, h, all_issues.clone(), ref_locs);
819        }
820
821        AnalysisResult::build(all_issues, HashMap::new(), symbols)
822    }
823
824    /// Analyze a PHP source string without a real file path.
825    /// Useful for tests and LSP single-file mode.
826    pub fn analyze_source(source: &str) -> AnalysisResult {
827        let analyzer = ProjectAnalyzer::new();
828        let file: Arc<str> = Arc::from("<source>");
829        let mut db = MirDb::default();
830        for slice in crate::stubs::builtin_stub_slices_for_version(analyzer.resolved_php_version())
831        {
832            db.ingest_stub_slice(&slice);
833        }
834        let salsa_file = SourceFile::new(&db, file.clone(), Arc::from(source));
835        let file_defs = collect_file_definitions(&db, salsa_file);
836        db.ingest_stub_slice(&file_defs.slice);
837        let mut all_issues = Arc::unwrap_or_clone(file_defs.issues);
838        if all_issues
839            .iter()
840            .any(|issue| matches!(issue.kind, mir_issues::IssueKind::ParseError { .. }))
841        {
842            return AnalysisResult::build(all_issues, std::collections::HashMap::new(), Vec::new());
843        }
844        let mut type_envs = std::collections::HashMap::new();
845        let mut all_symbols = Vec::new();
846        let arena = bumpalo::Bump::new();
847        let result = php_rs_parser::parse(&arena, source);
848
849        let driver = Pass2Driver::new_inference_only(&db, analyzer.resolved_php_version());
850        driver.analyze_bodies(&result.program, file.clone(), source, &result.source_map);
851        let inferred = driver.take_inferred_types();
852        db.commit_inferred_return_types(inferred.functions, inferred.methods);
853
854        let driver = Pass2Driver::new(&db, analyzer.resolved_php_version());
855        all_issues.extend(driver.analyze_bodies_typed(
856            &result.program,
857            file.clone(),
858            source,
859            &result.source_map,
860            &mut type_envs,
861            &mut all_symbols,
862        ));
863        AnalysisResult::build(all_issues, type_envs, all_symbols)
864    }
865
866    /// Discover all `.php` files under a directory, recursively.
867    pub fn discover_files(root: &Path) -> Vec<PathBuf> {
868        if root.is_file() {
869            return vec![root.to_path_buf()];
870        }
871        let mut files = Vec::new();
872        collect_php_files(root, &mut files);
873        files
874    }
875
876    /// Pass 1 only: collect type definitions from `paths` into the codebase without
877    /// analyzing method bodies or emitting issues. Used to load vendor types.
878    pub fn collect_types_only(&self, paths: &[PathBuf]) {
879        let file_data: Vec<(Arc<str>, Arc<str>)> = paths
880            .par_iter()
881            .filter_map(|path| {
882                let src = std::fs::read_to_string(path).ok()?;
883                Some((
884                    Arc::from(path.to_string_lossy().as_ref()),
885                    Arc::<str>::from(src),
886                ))
887            })
888            .collect();
889
890        let source_files: Vec<SourceFile> = {
891            let mut guard = self.salsa.lock().expect("salsa lock poisoned");
892            let (ref mut db, ref mut files) = *guard;
893            file_data
894                .iter()
895                .map(|(file, src)| match files.get(file) {
896                    Some(&sf) => {
897                        if sf.text(db).as_ref() != src.as_ref() {
898                            sf.set_text(db).to(src.clone());
899                        }
900                        sf
901                    }
902                    None => {
903                        let sf = SourceFile::new(db, file.clone(), src.clone());
904                        files.insert(file.clone(), sf);
905                        sf
906                    }
907                })
908                .collect()
909        };
910
911        let db_pass1 = {
912            let guard = self.salsa.lock().expect("salsa lock poisoned");
913            guard.0.clone()
914        };
915
916        let file_defs: Vec<FileDefinitions> = source_files
917            .par_iter()
918            .map_with(db_pass1, |db, salsa_file| {
919                collect_file_definitions_uncached(&*db, *salsa_file)
920            })
921            .collect();
922
923        let mut guard = self.salsa.lock().expect("salsa lock poisoned");
924        let (ref mut db, _) = *guard;
925        for defs in file_defs {
926            db.ingest_stub_slice(&defs.slice);
927        }
928        drop(guard);
929
930        // Print profiling statistics for the collection phase.
931        crate::collector::print_collector_stats();
932    }
933}
934
935impl Default for ProjectAnalyzer {
936    fn default() -> Self {
937        Self::new()
938    }
939}
940
941// Helper: Inference sweep with rayon::in_place_scope
942
943#[allow(clippy::type_complexity)]
944fn run_inference_sweep(
945    db_priming: MirDb,
946    parsed_files: Vec<&ParsedProjectFile>,
947    php_version: PhpVersion,
948) -> (Vec<(Arc<str>, Union)>, Vec<(Arc<str>, Arc<str>, Union)>) {
949    let functions = Arc::new(std::sync::Mutex::new(Vec::new()));
950    let methods = Arc::new(std::sync::Mutex::new(Vec::new()));
951
952    rayon::in_place_scope(|s| {
953        for parsed in parsed_files {
954            let db = db_priming.clone();
955            let functions = Arc::clone(&functions);
956            let methods = Arc::clone(&methods);
957
958            s.spawn(move |_| {
959                let driver = Pass2Driver::new_inference_only(&db as &dyn MirDatabase, php_version);
960                let parse_result = parsed.parsed();
961                driver.analyze_bodies(
962                    &parse_result.program,
963                    parsed.file.clone(),
964                    parsed.source(),
965                    &parse_result.source_map,
966                );
967
968                let inferred = driver.take_inferred_types();
969                if let Ok(mut funcs) = functions.lock() {
970                    funcs.extend(inferred.functions);
971                }
972                if let Ok(mut meths) = methods.lock() {
973                    meths.extend(inferred.methods);
974                }
975            });
976        }
977    });
978
979    let functions = Arc::try_unwrap(functions)
980        .map(|mutex| mutex.into_inner().unwrap_or_default())
981        .unwrap_or_else(|arc| arc.lock().unwrap().clone());
982    let methods = Arc::try_unwrap(methods)
983        .map(|mutex| mutex.into_inner().unwrap_or_default())
984        .unwrap_or_else(|arc| arc.lock().unwrap().clone());
985
986    (functions, methods)
987}
988
989fn stub_slice_needs_inference(slice: &mir_codebase::storage::StubSlice) -> bool {
990    slice
991        .functions
992        .iter()
993        .any(|func| func.return_type.is_none())
994        || slice.classes.iter().any(|class| {
995            class
996                .own_methods
997                .values()
998                .any(|method| !method.is_abstract && method.return_type.is_none())
999        })
1000        || slice.traits.iter().any(|tr| {
1001            tr.own_methods
1002                .values()
1003                .any(|method| !method.is_abstract && method.return_type.is_none())
1004        })
1005        || slice.enums.iter().any(|en| {
1006            en.own_methods
1007                .values()
1008                .any(|method| !method.is_abstract && method.return_type.is_none())
1009        })
1010}
1011
1012pub(crate) fn collect_php_files(dir: &Path, out: &mut Vec<PathBuf>) {
1013    if let Ok(entries) = std::fs::read_dir(dir) {
1014        for entry in entries.flatten() {
1015            if entry.file_type().map(|ft| ft.is_symlink()).unwrap_or(false) {
1016                continue;
1017            }
1018            let path = entry.path();
1019            if path.is_dir() {
1020                let name = path.file_name().and_then(|n| n.to_str()).unwrap_or("");
1021                if matches!(
1022                    name,
1023                    "vendor" | ".git" | "node_modules" | ".cache" | ".pnpm-store"
1024                ) {
1025                    continue;
1026                }
1027                collect_php_files(&path, out);
1028            } else if path.extension().and_then(|e| e.to_str()) == Some("php") {
1029                out.push(path);
1030            }
1031        }
1032    }
1033}
1034
1035// build_reverse_deps
1036
1037fn build_reverse_deps(db: &dyn crate::db::MirDatabase) -> HashMap<String, HashSet<String>> {
1038    let mut reverse: HashMap<String, HashSet<String>> = HashMap::new();
1039
1040    let mut add_edge = |symbol: &str, dependent_file: &str| {
1041        if let Some(defining_file) = db.symbol_defining_file(symbol) {
1042            let def = defining_file.as_ref().to_string();
1043            if def != dependent_file {
1044                reverse
1045                    .entry(def)
1046                    .or_default()
1047                    .insert(dependent_file.to_string());
1048            }
1049        }
1050    };
1051
1052    for (file, imports) in db.file_import_snapshots() {
1053        let file = file.as_ref().to_string();
1054        for fqcn in imports.values() {
1055            add_edge(fqcn, &file);
1056        }
1057    }
1058
1059    for fqcn in db.active_class_node_fqcns() {
1060        // Only true classes contribute class-direction edges in this loop.
1061        // Interface / trait / enum edges are not currently emitted here —
1062        // this function only ever read classes.
1063        let kind = match crate::db::class_kind_via_db(db, fqcn.as_ref()) {
1064            Some(k) if !k.is_interface && !k.is_trait && !k.is_enum => k,
1065            _ => continue,
1066        };
1067        let _ = kind;
1068        let Some(file) = db
1069            .symbol_defining_file(fqcn.as_ref())
1070            .map(|f| f.as_ref().to_string())
1071        else {
1072            continue;
1073        };
1074
1075        let Some(node) = db.lookup_class_node(fqcn.as_ref()) else {
1076            continue;
1077        };
1078        if let Some(parent) = node.parent(db) {
1079            add_edge(parent.as_ref(), &file);
1080        }
1081        for iface in node.interfaces(db).iter() {
1082            add_edge(iface.as_ref(), &file);
1083        }
1084        for tr in node.traits(db).iter() {
1085            add_edge(tr.as_ref(), &file);
1086        }
1087    }
1088
1089    reverse
1090}
1091
1092fn extract_reference_locations(
1093    db: &dyn crate::db::MirDatabase,
1094    file: &Arc<str>,
1095) -> Vec<(String, u32, u16, u16)> {
1096    db.extract_file_reference_locations(file.as_ref())
1097        .into_iter()
1098        .map(|(sym, line, col_start, col_end)| (sym.to_string(), line, col_start, col_end))
1099        .collect()
1100}
1101
1102pub struct AnalysisResult {
1103    pub issues: Vec<Issue>,
1104    pub type_envs: std::collections::HashMap<crate::type_env::ScopeId, crate::type_env::TypeEnv>,
1105    /// Per-expression resolved symbols from Pass 2, sorted by file path.
1106    pub symbols: Vec<crate::symbol::ResolvedSymbol>,
1107    /// Maps each file path to the contiguous range within `symbols` that belongs
1108    /// to it. Built once after analysis; allows `symbol_at` to scan only the
1109    /// relevant file's slice rather than the entire codebase-wide vector.
1110    symbols_by_file: HashMap<Arc<str>, std::ops::Range<usize>>,
1111}
1112
1113impl AnalysisResult {
1114    fn build(
1115        issues: Vec<Issue>,
1116        type_envs: std::collections::HashMap<crate::type_env::ScopeId, crate::type_env::TypeEnv>,
1117        mut symbols: Vec<crate::symbol::ResolvedSymbol>,
1118    ) -> Self {
1119        symbols.sort_unstable_by(|a, b| a.file.as_ref().cmp(b.file.as_ref()));
1120        let mut symbols_by_file: HashMap<Arc<str>, std::ops::Range<usize>> = HashMap::new();
1121        let mut i = 0;
1122        while i < symbols.len() {
1123            let file = Arc::clone(&symbols[i].file);
1124            let start = i;
1125            while i < symbols.len() && symbols[i].file == file {
1126                i += 1;
1127            }
1128            symbols_by_file.insert(file, start..i);
1129        }
1130        Self {
1131            issues,
1132            type_envs,
1133            symbols,
1134            symbols_by_file,
1135        }
1136    }
1137}
1138
1139impl AnalysisResult {
1140    pub fn error_count(&self) -> usize {
1141        self.issues
1142            .iter()
1143            .filter(|i| i.severity == mir_issues::Severity::Error)
1144            .count()
1145    }
1146
1147    pub fn warning_count(&self) -> usize {
1148        self.issues
1149            .iter()
1150            .filter(|i| i.severity == mir_issues::Severity::Warning)
1151            .count()
1152    }
1153
1154    /// Group issues by source file.
1155    pub fn issues_by_file(&self) -> HashMap<std::sync::Arc<str>, Vec<&Issue>> {
1156        let mut map: HashMap<std::sync::Arc<str>, Vec<&Issue>> = HashMap::new();
1157        for issue in &self.issues {
1158            map.entry(issue.location.file.clone())
1159                .or_default()
1160                .push(issue);
1161        }
1162        map
1163    }
1164
1165    /// Return the innermost resolved symbol whose span contains `byte_offset`
1166    /// in `file`, or `None` if no symbol was recorded at that position.
1167    pub fn symbol_at(
1168        &self,
1169        file: &str,
1170        byte_offset: u32,
1171    ) -> Option<&crate::symbol::ResolvedSymbol> {
1172        let range = self.symbols_by_file.get(file)?;
1173        self.symbols[range.clone()]
1174            .iter()
1175            .filter(|s| s.span.start <= byte_offset && byte_offset < s.span.end)
1176            .min_by_key(|s| s.span.end - s.span.start)
1177    }
1178}