Skip to main content

mir_analyzer/
project.rs

1/// Project-level orchestration: file discovery, pass 1, pass 2.
2use std::mem::ManuallyDrop;
3use std::path::{Path, PathBuf};
4use std::sync::Arc;
5
6use rayon::prelude::*;
7
8use std::collections::{HashMap, HashSet};
9
10use crate::cache::{hash_content, AnalysisCache};
11use crate::db::{
12    collect_file_definitions, collect_file_definitions_uncached, FileDefinitions, MirDatabase,
13    MirDb, SourceFile,
14};
15use crate::pass2::Pass2Driver;
16use crate::php_version::PhpVersion;
17use mir_issues::Issue;
18use salsa::Setter as _;
19
20// Re-exports for downstream callers in this crate.
21pub use crate::pass2::merge_return_types;
22
23// ---------------------------------------------------------------------------
24// ProjectAnalyzer
25// ---------------------------------------------------------------------------
26
27pub struct ProjectAnalyzer {
28    /// Optional cache — when `Some`, Pass 2 results are read/written per file.
29    pub cache: Option<AnalysisCache>,
30    /// Called once after each file completes Pass 2 (used for progress reporting).
31    pub on_file_done: Option<Arc<dyn Fn() + Send + Sync>>,
32    /// PSR-4 autoloader mapping from composer.json, if available.
33    pub psr4: Option<Arc<crate::composer::Psr4Map>>,
34    /// Whether stubs have already been loaded (to avoid double-loading).
35    stubs_loaded: std::sync::atomic::AtomicBool,
36    /// When true, run dead code detection at the end of analysis.
37    pub find_dead_code: bool,
38    /// Target PHP language version. `None` means "not configured"; resolved to
39    /// `PhpVersion::LATEST` when passed down to `StatementsAnalyzer`.
40    pub php_version: Option<PhpVersion>,
41    /// Additional stub files to parse before analysis (absolute paths).
42    pub stub_files: Vec<PathBuf>,
43    /// Additional stub directories to walk and parse before analysis (absolute paths).
44    pub stub_dirs: Vec<PathBuf>,
45    /// Salsa database for incremental Pass-1 memoization.
46    /// `MirDb` is `Send` but `!Sync` (thread-local query state); `Mutex`
47    /// provides the `Sync` bound rayon requires without needing `T: Sync`.
48    salsa: std::sync::Mutex<(MirDb, HashMap<Arc<str>, SourceFile>)>,
49}
50
51struct ParsedProjectFile {
52    file: Arc<str>,
53    source: Arc<str>,
54    parsed: ManuallyDrop<php_rs_parser::ParseResult<'static, 'static>>,
55    arena: ManuallyDrop<Box<bumpalo::Bump>>,
56}
57
58impl ParsedProjectFile {
59    fn new(file: Arc<str>, source: Arc<str>) -> Self {
60        let arena = Box::new(bumpalo::Bump::new());
61        let parsed = php_rs_parser::parse(&arena, &source);
62        // SAFETY: `parsed` borrows from `arena` and `source`, both owned by this
63        // struct and kept alive until `Drop`. `Drop` manually destroys `parsed`
64        // before releasing either owner, so the widened lifetimes never escape.
65        let parsed = unsafe {
66            std::mem::transmute::<
67                php_rs_parser::ParseResult<'_, '_>,
68                php_rs_parser::ParseResult<'static, 'static>,
69            >(parsed)
70        };
71        Self {
72            file,
73            source,
74            parsed: ManuallyDrop::new(parsed),
75            arena: ManuallyDrop::new(arena),
76        }
77    }
78
79    fn source(&self) -> &str {
80        self.source.as_ref()
81    }
82
83    fn parsed(&self) -> &php_rs_parser::ParseResult<'_, '_> {
84        &self.parsed
85    }
86}
87
88impl Drop for ParsedProjectFile {
89    fn drop(&mut self) {
90        unsafe {
91            ManuallyDrop::drop(&mut self.parsed);
92            ManuallyDrop::drop(&mut self.arena);
93        }
94    }
95}
96
97// SAFETY: after construction the parsed AST and source map are read-only. The
98// bump arena is never mutated again; it only owns backing storage for AST nodes
99// and is dropped after all parallel analysis has completed.
100unsafe impl Send for ParsedProjectFile {}
101unsafe impl Sync for ParsedProjectFile {}
102
103impl ProjectAnalyzer {
104    pub fn new() -> Self {
105        Self {
106            cache: None,
107            on_file_done: None,
108            psr4: None,
109            stubs_loaded: std::sync::atomic::AtomicBool::new(false),
110            find_dead_code: false,
111            php_version: None,
112            stub_files: Vec::new(),
113            stub_dirs: Vec::new(),
114            salsa: std::sync::Mutex::new((MirDb::default(), HashMap::new())),
115        }
116    }
117
118    /// Create a `ProjectAnalyzer` with a disk-backed cache stored under `cache_dir`.
119    pub fn with_cache(cache_dir: &Path) -> Self {
120        Self {
121            cache: Some(AnalysisCache::open(cache_dir)),
122            on_file_done: None,
123            psr4: None,
124            stubs_loaded: std::sync::atomic::AtomicBool::new(false),
125            find_dead_code: false,
126            php_version: None,
127            stub_files: Vec::new(),
128            stub_dirs: Vec::new(),
129            salsa: std::sync::Mutex::new((MirDb::default(), HashMap::new())),
130        }
131    }
132
133    /// Create a `ProjectAnalyzer` from a project root containing `composer.json`.
134    /// Returns the analyzer (with `psr4` set) and the `Psr4Map` so callers can
135    /// call `map.project_files()` / `map.vendor_files()`.
136    pub fn from_composer(
137        root: &Path,
138    ) -> Result<(Self, crate::composer::Psr4Map), crate::composer::ComposerError> {
139        let map = crate::composer::Psr4Map::from_composer(root)?;
140        let psr4 = Arc::new(map.clone());
141        let analyzer = Self {
142            cache: None,
143            on_file_done: None,
144            psr4: Some(psr4),
145            stubs_loaded: std::sync::atomic::AtomicBool::new(false),
146            find_dead_code: false,
147            php_version: None,
148            stub_files: Vec::new(),
149            stub_dirs: Vec::new(),
150            salsa: std::sync::Mutex::new((MirDb::default(), HashMap::new())),
151        };
152        Ok((analyzer, map))
153    }
154
155    /// Set the target PHP version.
156    pub fn with_php_version(mut self, version: PhpVersion) -> Self {
157        self.php_version = Some(version);
158        self
159    }
160
161    /// Resolve the configured PHP version, defaulting to `PhpVersion::LATEST`
162    /// when none has been set.
163    fn resolved_php_version(&self) -> PhpVersion {
164        self.php_version.unwrap_or(PhpVersion::LATEST)
165    }
166
167    fn type_exists(&self, fqcn: &str) -> bool {
168        let guard = self.salsa.lock().expect("salsa lock poisoned");
169        crate::db::type_exists_via_db(&guard.0, fqcn)
170    }
171
172    /// Internal: expose the salsa Mutex for unit tests that need a `&dyn MirDatabase`.
173    #[doc(hidden)]
174    pub fn salsa_db_for_test(&self) -> &std::sync::Mutex<(MirDb, HashMap<Arc<str>, SourceFile>)> {
175        &self.salsa
176    }
177
178    /// Look up the source location of a class member (method, property, or
179    /// class constant / enum case) by walking the inheritance chain through
180    /// the salsa db.  Returns `None` if no member with that name exists, or
181    /// if the member has no recorded location.
182    pub fn member_location(
183        &self,
184        fqcn: &str,
185        member_name: &str,
186    ) -> Option<mir_codebase::storage::Location> {
187        let guard = self.salsa.lock().expect("salsa lock poisoned");
188        crate::db::member_location_via_db(&guard.0, fqcn, member_name)
189    }
190
191    pub fn symbol_location(&self, symbol: &str) -> Option<mir_codebase::storage::Location> {
192        let guard = self.salsa.lock().expect("salsa lock poisoned");
193        let db = &guard.0;
194        db.lookup_class_node(symbol)
195            .filter(|n| n.active(db))
196            .and_then(|n| n.location(db))
197            .or_else(|| {
198                db.lookup_function_node(symbol)
199                    .filter(|n| n.active(db))
200                    .and_then(|n| n.location(db))
201            })
202    }
203
204    pub fn reference_locations(&self, symbol: &str) -> Vec<(Arc<str>, u32, u16, u16)> {
205        let guard = self.salsa.lock().expect("salsa lock poisoned");
206        guard.0.reference_locations(symbol)
207    }
208
209    /// Load PHP built-in stubs. Called automatically by `analyze` if not done yet.
210    /// Stubs are filtered against the configured target PHP version (or
211    /// `PhpVersion::LATEST` if none was set).
212    pub fn load_stubs(&self) {
213        if !self
214            .stubs_loaded
215            .swap(true, std::sync::atomic::Ordering::SeqCst)
216        {
217            let php_version = self.resolved_php_version();
218            crate::stubs::stub_files()
219                .par_iter()
220                .for_each(|(filename, content)| {
221                    let slice =
222                        crate::stubs::stub_slice_from_source(filename, content, Some(php_version));
223                    let mut guard = self.salsa.lock().expect("salsa lock poisoned");
224                    guard.0.ingest_stub_slice(&slice);
225                });
226
227            let mut guard = self.salsa.lock().expect("salsa lock poisoned");
228            for slice in crate::stubs::user_stub_slices(&self.stub_files, &self.stub_dirs) {
229                guard.0.ingest_stub_slice(&slice);
230            }
231        }
232    }
233
234    fn collect_and_ingest_source(&self, file: Arc<str>, src: &str) -> FileDefinitions {
235        let file_defs = {
236            let mut guard = self.salsa.lock().expect("salsa lock poisoned");
237            let (ref mut db, ref mut files) = *guard;
238            let salsa_file = match files.get(&file) {
239                Some(&sf) => {
240                    if sf.text(db).as_ref() != src {
241                        sf.set_text(db).to(Arc::from(src));
242                    }
243                    sf
244                }
245                None => {
246                    let sf = SourceFile::new(db, file.clone(), Arc::from(src));
247                    files.insert(file.clone(), sf);
248                    sf
249                }
250            };
251            collect_file_definitions(db, salsa_file)
252        };
253
254        {
255            let mut guard = self.salsa.lock().expect("salsa lock poisoned");
256            guard.0.ingest_stub_slice(&file_defs.slice);
257        }
258        file_defs
259    }
260
261    /// Run the full analysis pipeline on a set of file paths.
262    pub fn analyze(&self, paths: &[PathBuf]) -> AnalysisResult {
263        let mut all_issues = Vec::new();
264
265        // ---- Load PHP built-in stubs (before Pass 1 so user code can override)
266        self.load_stubs();
267
268        // ---- Pass 1: read files in parallel ----------------------------------
269        let parsed_files: Vec<ParsedProjectFile> = paths
270            .par_iter()
271            .filter_map(|path| match std::fs::read_to_string(path) {
272                Ok(src) => {
273                    let file = Arc::from(path.to_string_lossy().as_ref());
274                    Some(ParsedProjectFile::new(file, Arc::from(src)))
275                }
276                Err(e) => {
277                    eprintln!("Cannot read {}: {}", path.display(), e);
278                    None
279                }
280            })
281            .collect();
282
283        let file_data: Vec<(Arc<str>, Arc<str>)> = parsed_files
284            .iter()
285            .map(|parsed| (parsed.file.clone(), parsed.source.clone()))
286            .collect();
287
288        // ---- Pre-Pass-2 invalidation: evict dependents of changed files ------
289        if let Some(cache) = &self.cache {
290            let changed: Vec<String> = file_data
291                .par_iter()
292                .filter_map(|(f, src)| {
293                    let h = hash_content(src.as_ref());
294                    if cache.get(f, &h).is_none() {
295                        Some(f.to_string())
296                    } else {
297                        None
298                    }
299                })
300                .collect();
301            if !changed.is_empty() {
302                cache.evict_with_dependents(&changed);
303            }
304        }
305
306        // ---- Register Salsa source inputs for incremental follow-up calls ----
307        {
308            let mut guard = self.salsa.lock().expect("salsa lock poisoned");
309            let (ref mut db, ref mut files) = *guard;
310            for parsed in &parsed_files {
311                match files.get(parsed.file.as_ref()) {
312                    Some(&sf) => {
313                        if sf.text(db).as_ref() != parsed.source() {
314                            sf.set_text(db).to(parsed.source.clone());
315                        }
316                    }
317                    None => {
318                        let sf = SourceFile::new(db, parsed.file.clone(), parsed.source.clone());
319                        files.insert(parsed.file.clone(), sf);
320                    }
321                }
322            }
323        }
324
325        // ---- Pass 1: definition collection from the already-parsed AST -------
326        let file_defs: Vec<FileDefinitions> = parsed_files
327            .par_iter()
328            .map(|parsed| {
329                let parse_result = parsed.parsed();
330                let mut all_issues: Vec<Issue> = parse_result
331                    .errors
332                    .iter()
333                    .map(|err| {
334                        Issue::new(
335                            mir_issues::IssueKind::ParseError {
336                                message: err.to_string(),
337                            },
338                            mir_issues::Location {
339                                file: parsed.file.clone(),
340                                line: 1,
341                                line_end: 1,
342                                col_start: 0,
343                                col_end: 0,
344                            },
345                        )
346                    })
347                    .collect();
348                let collector = crate::collector::DefinitionCollector::new_for_slice(
349                    parsed.file.clone(),
350                    parsed.source(),
351                    &parse_result.source_map,
352                );
353                let (slice, collector_issues) = collector.collect_slice(&parse_result.program);
354                all_issues.extend(collector_issues);
355                FileDefinitions {
356                    slice: Arc::new(slice),
357                    issues: Arc::new(all_issues),
358                }
359            })
360            .collect();
361
362        let mut files_with_parse_errors: std::collections::HashSet<Arc<str>> =
363            std::collections::HashSet::new();
364        let mut files_needing_inference: std::collections::HashSet<Arc<str>> =
365            std::collections::HashSet::new();
366        {
367            let mut guard = self.salsa.lock().expect("salsa lock poisoned");
368            let (ref mut db, _) = *guard;
369            for defs in file_defs {
370                for issue in defs.issues.iter() {
371                    if matches!(issue.kind, mir_issues::IssueKind::ParseError { .. }) {
372                        files_with_parse_errors.insert(issue.location.file.clone());
373                    }
374                }
375                if stub_slice_needs_inference(&defs.slice) {
376                    if let Some(file) = defs.slice.file.as_ref() {
377                        files_needing_inference.insert(file.clone());
378                    }
379                }
380                db.ingest_stub_slice(&defs.slice);
381                all_issues.extend(Arc::unwrap_or_clone(defs.issues));
382            }
383        }
384
385        // ---- Lazy-load unknown classes via PSR-4 (issue #50) ----------------
386        if let Some(psr4) = &self.psr4 {
387            self.lazy_load_missing_classes(psr4.clone(), &mut all_issues);
388        }
389
390        // ---- Resolve @psalm-import-type declarations now that all Pass 1
391        // classes (including their `type_aliases`) are populated.
392        // ---- Build reverse dep graph and persist it for the next run ---------
393        if let Some(cache) = &self.cache {
394            let db_snapshot = {
395                let guard = self.salsa.lock().expect("salsa lock poisoned");
396                guard.0.clone()
397            };
398            let rev = build_reverse_deps(&db_snapshot);
399            cache.set_reverse_deps(rev);
400        }
401
402        // ---- Class-level checks (M11) ----------------------------------------
403        // `class_db` is scoped tightly: it must be dropped before the priming
404        // sweep's `commit_inferred_return_types` call below, otherwise the
405        // setter's `Storage::cancel_others` blocks waiting for this clone's
406        // Arc to drop (strong-count==1 invariant).
407        let analyzed_file_set: std::collections::HashSet<std::sync::Arc<str>> =
408            file_data.iter().map(|(f, _)| f.clone()).collect();
409        {
410            let class_db = {
411                let guard = self.salsa.lock().expect("salsa lock poisoned");
412                guard.0.clone()
413            };
414            let class_issues =
415                crate::class::ClassAnalyzer::with_files(&class_db, analyzed_file_set, &file_data)
416                    .analyze_all();
417            all_issues.extend(class_issues);
418        }
419
420        // ---- S5-PR10b: clone the salsa db once per parallel sweep so each
421        // rayon worker gets its own clone (Salsa databases are `Send` but
422        // `!Sync`; cloning shares the underlying memoization storage).
423        let db_priming = {
424            let guard = self.salsa.lock().expect("salsa lock poisoned");
425            guard.0.clone()
426        };
427
428        // ---- Pass 2 priming: populate inferred_return_type for all functions  --
429        // Run a first inference-only sweep so that cross-file inferred return
430        // types are available before the issue-emitting pass below (G6).
431        //
432        // Inferred types are also collected into a thread-safe buffer here and
433        // committed to the Salsa db serially after the sweep returns.  Writing
434        // setters from inside `for_each_with` would deadlock against
435        // `Storage::cancel_others` (which waits for sibling worker clones to
436        // drop); the post-sweep commit runs against the canonical db with
437        // strong-count==1.  See `crate::db::InferredReturnTypes`.
438        let inferred_buffer = crate::db::InferredReturnTypes::new();
439        parsed_files
440            .par_iter()
441            .filter(|parsed| {
442                !files_with_parse_errors.contains(&parsed.file)
443                    && files_needing_inference.contains(&parsed.file)
444            })
445            .for_each_with(db_priming, |db, parsed| {
446                let driver = Pass2Driver::new_inference_only(
447                    &*db as &dyn MirDatabase,
448                    self.resolved_php_version(),
449                )
450                .with_inferred_buffer(&inferred_buffer);
451                let parse_result = parsed.parsed();
452                driver.analyze_bodies(
453                    &parse_result.program,
454                    parsed.file.clone(),
455                    parsed.source(),
456                    &parse_result.source_map,
457                );
458            });
459
460        // Sweep clones are dropped — commit inferred types into the Salsa db.
461        {
462            let mut guard = self.salsa.lock().expect("salsa lock poisoned");
463            guard.0.commit_inferred_return_types(&inferred_buffer);
464        }
465
466        let db_main = {
467            let guard = self.salsa.lock().expect("salsa lock poisoned");
468            guard.0.clone()
469        };
470
471        // ---- Pass 2: analyze function/method bodies in parallel (M14) --------
472        let pass2_results: Vec<(Vec<Issue>, Vec<crate::symbol::ResolvedSymbol>)> = parsed_files
473            .par_iter()
474            .filter(|parsed| !files_with_parse_errors.contains(&parsed.file))
475            .map_with(db_main, |db, parsed| {
476                let driver =
477                    Pass2Driver::new(&*db as &dyn MirDatabase, self.resolved_php_version());
478                let result = if let Some(cache) = &self.cache {
479                    let h = hash_content(parsed.source());
480                    if let Some((cached_issues, ref_locs)) = cache.get(&parsed.file, &h) {
481                        db.replay_reference_locations(parsed.file.clone(), &ref_locs);
482                        (cached_issues, Vec::new())
483                    } else {
484                        let parse_result = parsed.parsed();
485                        let (issues, symbols) = driver.analyze_bodies(
486                            &parse_result.program,
487                            parsed.file.clone(),
488                            parsed.source(),
489                            &parse_result.source_map,
490                        );
491                        let ref_locs = extract_reference_locations(&*db, &parsed.file);
492                        cache.put(&parsed.file, h, issues.clone(), ref_locs);
493                        (issues, symbols)
494                    }
495                } else {
496                    let parse_result = parsed.parsed();
497                    driver.analyze_bodies(
498                        &parse_result.program,
499                        parsed.file.clone(),
500                        parsed.source(),
501                        &parse_result.source_map,
502                    )
503                };
504                if let Some(cb) = &self.on_file_done {
505                    cb();
506                }
507                result
508            })
509            .collect();
510
511        let mut all_symbols = Vec::new();
512        for (issues, symbols) in pass2_results {
513            all_issues.extend(issues);
514            all_symbols.extend(symbols);
515        }
516
517        // ---- Post-Pass-2 lazy loading: FQCNs used without `use` imports ------
518        // FQCNs in function/method bodies aren't visible until Pass 2 runs, so
519        // the pre-Pass-2 lazy load misses them.  We collect UndefinedClass names,
520        // resolve them via PSR-4, load those files, re-finalize, then re-analyze
521        // only the affected files to clear the false positives.
522        if let Some(psr4) = &self.psr4 {
523            self.lazy_load_from_body_issues(
524                psr4.clone(),
525                &file_data,
526                &files_with_parse_errors,
527                &mut all_issues,
528                &mut all_symbols,
529            );
530        }
531
532        // Persist cache hits/misses to disk
533        if let Some(cache) = &self.cache {
534            cache.flush();
535        }
536
537        // ---- Compact the reference index ------------------------------------
538        // ---- Dead-code detection (M18) --------------------------------------
539        if self.find_dead_code {
540            let salsa = self.salsa.lock().unwrap();
541            let dead_code_issues = crate::dead_code::DeadCodeAnalyzer::new(&salsa.0).analyze();
542            drop(salsa);
543            all_issues.extend(dead_code_issues);
544        }
545
546        AnalysisResult::build(all_issues, std::collections::HashMap::new(), all_symbols)
547    }
548
549    fn lazy_load_missing_classes(
550        &self,
551        psr4: Arc<crate::composer::Psr4Map>,
552        all_issues: &mut Vec<Issue>,
553    ) {
554        use std::collections::HashSet;
555
556        let max_depth = 10;
557        let mut loaded: HashSet<String> = HashSet::new();
558
559        for _ in 0..max_depth {
560            let mut to_load: Vec<(String, PathBuf)> = Vec::new();
561
562            let mut try_queue = |fqcn: &str| {
563                if !self.type_exists(fqcn) && !loaded.contains(fqcn) {
564                    if let Some(path) = psr4.resolve(fqcn) {
565                        to_load.push((fqcn.to_string(), path));
566                    }
567                }
568            };
569
570            // Drive the inheritance scan from already-ingested `ClassNode`s.
571            let mut inheritance_candidates = Vec::new();
572            let import_candidates = {
573                let guard = self.salsa.lock().expect("salsa lock poisoned");
574                let db = &guard.0;
575                for fqcn in db.active_class_node_fqcns() {
576                    let Some(node) = db.lookup_class_node(&fqcn) else {
577                        continue;
578                    };
579                    if node.is_interface(db) {
580                        for parent in node.extends(db).iter() {
581                            inheritance_candidates.push(parent.to_string());
582                        }
583                    } else if node.is_enum(db) {
584                        for iface in node.interfaces(db).iter() {
585                            inheritance_candidates.push(iface.to_string());
586                        }
587                    } else if node.is_trait(db) {
588                        for used in node.traits(db).iter() {
589                            inheritance_candidates.push(used.to_string());
590                        }
591                    } else {
592                        if let Some(parent) = node.parent(db) {
593                            inheritance_candidates.push(parent.to_string());
594                        }
595                        for iface in node.interfaces(db).iter() {
596                            inheritance_candidates.push(iface.to_string());
597                        }
598                    }
599                }
600                db.file_import_snapshots()
601                    .into_iter()
602                    .flat_map(|(_, imports)| imports.into_values())
603                    .collect::<Vec<_>>()
604            };
605            for fqcn in inheritance_candidates {
606                try_queue(&fqcn);
607            }
608
609            // Also lazy-load any type referenced via `use` imports that isn't yet
610            // in the codebase (covers enums and classes used only in type hints or
611            // static calls, which never appear in the inheritance scan above).
612            for fqcn in import_candidates {
613                try_queue(&fqcn);
614            }
615
616            if to_load.is_empty() {
617                break;
618            }
619
620            for (fqcn, path) in to_load {
621                loaded.insert(fqcn);
622                if let Ok(src) = std::fs::read_to_string(&path) {
623                    let file: Arc<str> = Arc::from(path.to_string_lossy().as_ref());
624                    let defs = self.collect_and_ingest_source(file, &src);
625                    all_issues.extend(Arc::unwrap_or_clone(defs.issues));
626                }
627            }
628        }
629    }
630
631    fn lazy_load_from_body_issues(
632        &self,
633        psr4: Arc<crate::composer::Psr4Map>,
634        file_data: &[(Arc<str>, Arc<str>)],
635        files_with_parse_errors: &HashSet<Arc<str>>,
636        all_issues: &mut Vec<Issue>,
637        all_symbols: &mut Vec<crate::symbol::ResolvedSymbol>,
638    ) {
639        use mir_issues::IssueKind;
640
641        let max_depth = 5;
642        let mut loaded: HashSet<String> = HashSet::new();
643
644        for _ in 0..max_depth {
645            // Deduplicate by FQCN: HashMap prevents loading the same class twice
646            // when multiple files share the same UndefinedClass diagnostic.
647            let mut to_load: HashMap<String, PathBuf> = HashMap::new();
648
649            for issue in all_issues.iter() {
650                if let IssueKind::UndefinedClass { name } = &issue.kind {
651                    if !self.type_exists(name) && !loaded.contains(name) {
652                        if let Some(path) = psr4.resolve(name) {
653                            to_load.entry(name.clone()).or_insert(path);
654                        }
655                    }
656                }
657            }
658
659            if to_load.is_empty() {
660                break;
661            }
662
663            loaded.extend(to_load.keys().cloned());
664
665            for path in to_load.values() {
666                if let Ok(src) = std::fs::read_to_string(path) {
667                    let file: Arc<str> = Arc::from(path.to_string_lossy().as_ref());
668                    let _ = self.collect_and_ingest_source(file, &src);
669                }
670            }
671
672            // Load inheritance deps of newly-added types and finalize.
673            // This covers e.g. `class Helper extends \App\Base` where Base is
674            // also not in the initial file set.
675            self.lazy_load_missing_classes(psr4.clone(), all_issues);
676
677            // Re-analyze every file that has an UndefinedClass for a type now
678            // present in the codebase — covers both direct and transitive loads.
679            let files_to_reanalyze: HashSet<Arc<str>> = all_issues
680                .iter()
681                .filter_map(|i| {
682                    if let IssueKind::UndefinedClass { name } = &i.kind {
683                        if self.type_exists(name) {
684                            return Some(i.location.file.clone());
685                        }
686                    }
687                    None
688                })
689                .collect();
690
691            if files_to_reanalyze.is_empty() {
692                break;
693            }
694
695            all_issues.retain(|i| !files_to_reanalyze.contains(&i.location.file));
696            all_symbols.retain(|s| !files_to_reanalyze.contains(&s.file));
697
698            let db_reanalysis = {
699                let guard = self.salsa.lock().expect("salsa lock poisoned");
700                guard.0.clone()
701            };
702
703            // Lazy-loaded files re-run Pass 2 to pick up the just-loaded
704            // definitions; collect inferred return types for a serial commit
705            // after the parallel sweep returns (same buffer-and-commit
706            // pattern as the main batch priming sweep).
707            let inferred_buffer = crate::db::InferredReturnTypes::new();
708            let reanalysis: Vec<(Vec<Issue>, Vec<crate::symbol::ResolvedSymbol>)> = file_data
709                .par_iter()
710                .filter(|(f, _)| {
711                    !files_with_parse_errors.contains(f) && files_to_reanalyze.contains(f)
712                })
713                .map_with(db_reanalysis, |db, (file, src)| {
714                    let driver =
715                        Pass2Driver::new(&*db as &dyn MirDatabase, self.resolved_php_version())
716                            .with_inferred_buffer(&inferred_buffer);
717                    let arena = bumpalo::Bump::new();
718                    let parsed = php_rs_parser::parse(&arena, src);
719                    driver.analyze_bodies(&parsed.program, file.clone(), src, &parsed.source_map)
720                })
721                .collect();
722
723            {
724                let mut guard = self.salsa.lock().expect("salsa lock poisoned");
725                guard.0.commit_inferred_return_types(&inferred_buffer);
726            }
727
728            for (issues, symbols) in reanalysis {
729                all_issues.extend(issues);
730                all_symbols.extend(symbols);
731            }
732        }
733    }
734
735    /// Re-analyze a single file within the existing codebase.
736    ///
737    /// This is the incremental analysis API for LSP:
738    /// 1. Removes old definitions from this file
739    /// 2. Re-runs Pass 1 (definition collection) on the new content
740    /// 3. Resolves any newly-collected `@psalm-import-type` declarations
741    /// 4. Re-runs Pass 2 (body analysis) on this file
742    /// 5. Returns the analysis result for this file only
743    pub fn re_analyze_file(&self, file_path: &str, new_content: &str) -> AnalysisResult {
744        // Fast path: content unchanged and cache has a valid entry — skip full re-analysis.
745        if let Some(cache) = &self.cache {
746            let h = hash_content(new_content);
747            if let Some((issues, ref_locs)) = cache.get(file_path, &h) {
748                let file: Arc<str> = Arc::from(file_path);
749                let guard = self.salsa.lock().expect("salsa lock poisoned");
750                guard.0.replay_reference_locations(file, &ref_locs);
751                return AnalysisResult::build(issues, HashMap::new(), Vec::new());
752            }
753        }
754
755        let file: Arc<str> = Arc::from(file_path);
756
757        {
758            let mut guard = self.salsa.lock().expect("salsa lock poisoned");
759            let (ref mut db, _) = *guard;
760            db.remove_file_definitions(file_path);
761        }
762
763        // --- Salsa-backed Pass 1: memoized parse + definition collection ------
764        let file_defs = {
765            let mut guard = self.salsa.lock().expect("salsa lock poisoned");
766            let (ref mut db, ref mut files) = *guard;
767            let salsa_file = match files.get(&file) {
768                Some(&sf) => {
769                    sf.set_text(db).to(Arc::from(new_content));
770                    sf
771                }
772                None => {
773                    let sf = SourceFile::new(db, file.clone(), Arc::from(new_content));
774                    files.insert(file.clone(), sf);
775                    sf
776                }
777            };
778            collect_file_definitions(db, salsa_file)
779        };
780
781        let mut all_issues: Vec<Issue> = Arc::unwrap_or_clone(file_defs.issues.clone());
782
783        // --- S2 + Pass 2: hold the Salsa lock for ClassNode upserts and body
784        // analysis so the db reference is live during Pass 2 (S5).
785        let symbols = {
786            let mut guard = self.salsa.lock().expect("salsa lock poisoned");
787            let (ref mut db, _) = *guard;
788
789            db.ingest_stub_slice(&file_defs.slice);
790
791            // Resolve any newly-collected @psalm-import-type declarations so
792            // Pass 2 reads the imported aliases out of `type_aliases`.
793            // Re-parse in the arena so Pass 2 can walk the AST.
794            let arena = bumpalo::Bump::new();
795            let parsed = php_rs_parser::parse(&arena, new_content);
796
797            if parsed.errors.is_empty() {
798                // Priming sweep: populate inferred_return_type for this file's functions
799                // before the issue-emitting pass so within-file cross-function calls see
800                // the correct inferred return type rather than None.  The buffer +
801                // commit pattern is overkill for the single-threaded LSP path but kept
802                // for symmetry with the parallel batch path (and so the analyzer's
803                // Salsa node reads see the inferred values).
804                let inferred_buffer = crate::db::InferredReturnTypes::new();
805                {
806                    let db_ref: &dyn MirDatabase = db;
807                    Pass2Driver::new_inference_only(db_ref, self.resolved_php_version())
808                        .with_inferred_buffer(&inferred_buffer)
809                        .analyze_bodies(
810                            &parsed.program,
811                            file.clone(),
812                            new_content,
813                            &parsed.source_map,
814                        );
815                }
816                db.commit_inferred_return_types(&inferred_buffer);
817
818                let db_ref: &dyn MirDatabase = db;
819                let driver = Pass2Driver::new(db_ref, self.resolved_php_version());
820                let (body_issues, symbols) = driver.analyze_bodies(
821                    &parsed.program,
822                    file.clone(),
823                    new_content,
824                    &parsed.source_map,
825                );
826                all_issues.extend(body_issues);
827                symbols
828            } else {
829                Vec::new()
830            }
831        };
832
833        if let Some(cache) = &self.cache {
834            let h = hash_content(new_content);
835            cache.evict_with_dependents(&[file_path.to_string()]);
836            let guard = self.salsa.lock().expect("salsa lock poisoned");
837            let ref_locs = extract_reference_locations(&guard.0, &file);
838            cache.put(file_path, h, all_issues.clone(), ref_locs);
839        }
840
841        AnalysisResult::build(all_issues, HashMap::new(), symbols)
842    }
843
844    /// Analyze a PHP source string without a real file path.
845    /// Useful for tests and LSP single-file mode.
846    pub fn analyze_source(source: &str) -> AnalysisResult {
847        let analyzer = ProjectAnalyzer::new();
848        let file: Arc<str> = Arc::from("<source>");
849        let mut db = MirDb::default();
850        for slice in crate::stubs::builtin_stub_slices_for_version(analyzer.resolved_php_version())
851        {
852            db.ingest_stub_slice(&slice);
853        }
854        let salsa_file = SourceFile::new(&db, file.clone(), Arc::from(source));
855        let file_defs = collect_file_definitions(&db, salsa_file);
856        db.ingest_stub_slice(&file_defs.slice);
857        let mut all_issues = Arc::unwrap_or_clone(file_defs.issues);
858        if all_issues
859            .iter()
860            .any(|issue| matches!(issue.kind, mir_issues::IssueKind::ParseError { .. }))
861        {
862            return AnalysisResult::build(all_issues, std::collections::HashMap::new(), Vec::new());
863        }
864        let mut type_envs = std::collections::HashMap::new();
865        let mut all_symbols = Vec::new();
866        let arena = bumpalo::Bump::new();
867        let result = php_rs_parser::parse(&arena, source);
868
869        // Priming sweep: populate inferred_return_type on FunctionNode /
870        // MethodNode before the issue-emitting pass so call sites see the
871        // inferred values.  Single-threaded — no buffer / commit dance
872        // needed in principle, but we use the same pattern for symmetry
873        // with the parallel batch path.
874        let inferred_buffer = crate::db::InferredReturnTypes::new();
875        Pass2Driver::new_inference_only(&db, analyzer.resolved_php_version())
876            .with_inferred_buffer(&inferred_buffer)
877            .analyze_bodies(&result.program, file.clone(), source, &result.source_map);
878        db.commit_inferred_return_types(&inferred_buffer);
879
880        let driver = Pass2Driver::new(&db, analyzer.resolved_php_version());
881        all_issues.extend(driver.analyze_bodies_typed(
882            &result.program,
883            file.clone(),
884            source,
885            &result.source_map,
886            &mut type_envs,
887            &mut all_symbols,
888        ));
889        AnalysisResult::build(all_issues, type_envs, all_symbols)
890    }
891
892    /// Discover all `.php` files under a directory, recursively.
893    pub fn discover_files(root: &Path) -> Vec<PathBuf> {
894        if root.is_file() {
895            return vec![root.to_path_buf()];
896        }
897        let mut files = Vec::new();
898        collect_php_files(root, &mut files);
899        files
900    }
901
902    /// Pass 1 only: collect type definitions from `paths` into the codebase without
903    /// analyzing method bodies or emitting issues. Used to load vendor types.
904    pub fn collect_types_only(&self, paths: &[PathBuf]) {
905        let file_data: Vec<(Arc<str>, Arc<str>)> = paths
906            .par_iter()
907            .filter_map(|path| {
908                let src = std::fs::read_to_string(path).ok()?;
909                Some((
910                    Arc::from(path.to_string_lossy().as_ref()),
911                    Arc::<str>::from(src),
912                ))
913            })
914            .collect();
915
916        let source_files: Vec<SourceFile> = {
917            let mut guard = self.salsa.lock().expect("salsa lock poisoned");
918            let (ref mut db, ref mut files) = *guard;
919            file_data
920                .iter()
921                .map(|(file, src)| match files.get(file) {
922                    Some(&sf) => {
923                        if sf.text(db).as_ref() != src.as_ref() {
924                            sf.set_text(db).to(src.clone());
925                        }
926                        sf
927                    }
928                    None => {
929                        let sf = SourceFile::new(db, file.clone(), src.clone());
930                        files.insert(file.clone(), sf);
931                        sf
932                    }
933                })
934                .collect()
935        };
936
937        let db_pass1 = {
938            let guard = self.salsa.lock().expect("salsa lock poisoned");
939            guard.0.clone()
940        };
941
942        let file_defs: Vec<FileDefinitions> = source_files
943            .par_iter()
944            .map_with(db_pass1, |db, salsa_file| {
945                collect_file_definitions_uncached(&*db, *salsa_file)
946            })
947            .collect();
948
949        let mut guard = self.salsa.lock().expect("salsa lock poisoned");
950        let (ref mut db, _) = *guard;
951        for defs in file_defs {
952            db.ingest_stub_slice(&defs.slice);
953        }
954        drop(guard);
955
956        // Print profiling statistics for the collection phase.
957        crate::collector::print_collector_stats();
958    }
959}
960
961impl Default for ProjectAnalyzer {
962    fn default() -> Self {
963        Self::new()
964    }
965}
966
967// ---------------------------------------------------------------------------
968
969fn stub_slice_needs_inference(slice: &mir_codebase::storage::StubSlice) -> bool {
970    slice
971        .functions
972        .iter()
973        .any(|func| func.return_type.is_none())
974        || slice.classes.iter().any(|class| {
975            class
976                .own_methods
977                .values()
978                .any(|method| !method.is_abstract && method.return_type.is_none())
979        })
980        || slice.traits.iter().any(|tr| {
981            tr.own_methods
982                .values()
983                .any(|method| !method.is_abstract && method.return_type.is_none())
984        })
985        || slice.enums.iter().any(|en| {
986            en.own_methods
987                .values()
988                .any(|method| !method.is_abstract && method.return_type.is_none())
989        })
990}
991
992// ---------------------------------------------------------------------------
993
994pub(crate) fn collect_php_files(dir: &Path, out: &mut Vec<PathBuf>) {
995    if let Ok(entries) = std::fs::read_dir(dir) {
996        for entry in entries.flatten() {
997            if entry.file_type().map(|ft| ft.is_symlink()).unwrap_or(false) {
998                continue;
999            }
1000            let path = entry.path();
1001            if path.is_dir() {
1002                let name = path.file_name().and_then(|n| n.to_str()).unwrap_or("");
1003                if matches!(
1004                    name,
1005                    "vendor" | ".git" | "node_modules" | ".cache" | ".pnpm-store"
1006                ) {
1007                    continue;
1008                }
1009                collect_php_files(&path, out);
1010            } else if path.extension().and_then(|e| e.to_str()) == Some("php") {
1011                out.push(path);
1012            }
1013        }
1014    }
1015}
1016
1017// ---------------------------------------------------------------------------
1018// build_reverse_deps
1019// ---------------------------------------------------------------------------
1020
1021fn build_reverse_deps(db: &dyn crate::db::MirDatabase) -> HashMap<String, HashSet<String>> {
1022    let mut reverse: HashMap<String, HashSet<String>> = HashMap::new();
1023
1024    let mut add_edge = |symbol: &str, dependent_file: &str| {
1025        if let Some(defining_file) = db.symbol_defining_file(symbol) {
1026            let def = defining_file.as_ref().to_string();
1027            if def != dependent_file {
1028                reverse
1029                    .entry(def)
1030                    .or_default()
1031                    .insert(dependent_file.to_string());
1032            }
1033        }
1034    };
1035
1036    for (file, imports) in db.file_import_snapshots() {
1037        let file = file.as_ref().to_string();
1038        for fqcn in imports.values() {
1039            add_edge(fqcn, &file);
1040        }
1041    }
1042
1043    for fqcn in db.active_class_node_fqcns() {
1044        // Only true classes contribute class-direction edges in this loop.
1045        // Interface / trait / enum edges are not currently emitted here —
1046        // this function only ever read classes.
1047        let kind = match crate::db::class_kind_via_db(db, fqcn.as_ref()) {
1048            Some(k) if !k.is_interface && !k.is_trait && !k.is_enum => k,
1049            _ => continue,
1050        };
1051        let _ = kind;
1052        let Some(file) = db
1053            .symbol_defining_file(fqcn.as_ref())
1054            .map(|f| f.as_ref().to_string())
1055        else {
1056            continue;
1057        };
1058
1059        let Some(node) = db.lookup_class_node(fqcn.as_ref()) else {
1060            continue;
1061        };
1062        if let Some(parent) = node.parent(db) {
1063            add_edge(parent.as_ref(), &file);
1064        }
1065        for iface in node.interfaces(db).iter() {
1066            add_edge(iface.as_ref(), &file);
1067        }
1068        for tr in node.traits(db).iter() {
1069            add_edge(tr.as_ref(), &file);
1070        }
1071    }
1072
1073    reverse
1074}
1075
1076// ---------------------------------------------------------------------------
1077
1078fn extract_reference_locations(
1079    db: &dyn crate::db::MirDatabase,
1080    file: &Arc<str>,
1081) -> Vec<(String, u32, u16, u16)> {
1082    db.extract_file_reference_locations(file.as_ref())
1083        .into_iter()
1084        .map(|(sym, line, col_start, col_end)| (sym.to_string(), line, col_start, col_end))
1085        .collect()
1086}
1087
1088// ---------------------------------------------------------------------------
1089// AnalysisResult
1090// ---------------------------------------------------------------------------
1091
1092pub struct AnalysisResult {
1093    pub issues: Vec<Issue>,
1094    pub type_envs: std::collections::HashMap<crate::type_env::ScopeId, crate::type_env::TypeEnv>,
1095    /// Per-expression resolved symbols from Pass 2, sorted by file path.
1096    pub symbols: Vec<crate::symbol::ResolvedSymbol>,
1097    /// Maps each file path to the contiguous range within `symbols` that belongs
1098    /// to it. Built once after analysis; allows `symbol_at` to scan only the
1099    /// relevant file's slice rather than the entire codebase-wide vector.
1100    symbols_by_file: HashMap<Arc<str>, std::ops::Range<usize>>,
1101}
1102
1103impl AnalysisResult {
1104    fn build(
1105        issues: Vec<Issue>,
1106        type_envs: std::collections::HashMap<crate::type_env::ScopeId, crate::type_env::TypeEnv>,
1107        mut symbols: Vec<crate::symbol::ResolvedSymbol>,
1108    ) -> Self {
1109        symbols.sort_unstable_by(|a, b| a.file.as_ref().cmp(b.file.as_ref()));
1110        let mut symbols_by_file: HashMap<Arc<str>, std::ops::Range<usize>> = HashMap::new();
1111        let mut i = 0;
1112        while i < symbols.len() {
1113            let file = Arc::clone(&symbols[i].file);
1114            let start = i;
1115            while i < symbols.len() && symbols[i].file == file {
1116                i += 1;
1117            }
1118            symbols_by_file.insert(file, start..i);
1119        }
1120        Self {
1121            issues,
1122            type_envs,
1123            symbols,
1124            symbols_by_file,
1125        }
1126    }
1127}
1128
1129impl AnalysisResult {
1130    pub fn error_count(&self) -> usize {
1131        self.issues
1132            .iter()
1133            .filter(|i| i.severity == mir_issues::Severity::Error)
1134            .count()
1135    }
1136
1137    pub fn warning_count(&self) -> usize {
1138        self.issues
1139            .iter()
1140            .filter(|i| i.severity == mir_issues::Severity::Warning)
1141            .count()
1142    }
1143
1144    /// Group issues by source file.
1145    pub fn issues_by_file(&self) -> HashMap<std::sync::Arc<str>, Vec<&Issue>> {
1146        let mut map: HashMap<std::sync::Arc<str>, Vec<&Issue>> = HashMap::new();
1147        for issue in &self.issues {
1148            map.entry(issue.location.file.clone())
1149                .or_default()
1150                .push(issue);
1151        }
1152        map
1153    }
1154
1155    /// Return the innermost resolved symbol whose span contains `byte_offset`
1156    /// in `file`, or `None` if no symbol was recorded at that position.
1157    pub fn symbol_at(
1158        &self,
1159        file: &str,
1160        byte_offset: u32,
1161    ) -> Option<&crate::symbol::ResolvedSymbol> {
1162        let range = self.symbols_by_file.get(file)?;
1163        self.symbols[range.clone()]
1164            .iter()
1165            .filter(|s| s.span.start <= byte_offset && byte_offset < s.span.end)
1166            .min_by_key(|s| s.span.end - s.span.start)
1167    }
1168}