Skip to main content

mir_analyzer/
project.rs

1/// Project-level orchestration: file discovery, pass 1, pass 2.
2use std::mem::ManuallyDrop;
3use std::path::{Path, PathBuf};
4use std::sync::Arc;
5
6use rayon::prelude::*;
7
8use std::collections::{HashMap, HashSet};
9
10use crate::cache::{hash_content, AnalysisCache};
11use crate::db::{collect_file_definitions, FileDefinitions, MirDatabase, MirDb, SourceFile};
12use crate::pass2::Pass2Driver;
13use crate::php_version::PhpVersion;
14use mir_issues::Issue;
15use salsa::Setter as _;
16
17// Re-exports for downstream callers in this crate.
18pub use crate::pass2::merge_return_types;
19
20// ---------------------------------------------------------------------------
21// ProjectAnalyzer
22// ---------------------------------------------------------------------------
23
24pub struct ProjectAnalyzer {
25    /// Optional cache — when `Some`, Pass 2 results are read/written per file.
26    pub cache: Option<AnalysisCache>,
27    /// Called once after each file completes Pass 2 (used for progress reporting).
28    pub on_file_done: Option<Arc<dyn Fn() + Send + Sync>>,
29    /// PSR-4 autoloader mapping from composer.json, if available.
30    pub psr4: Option<Arc<crate::composer::Psr4Map>>,
31    /// Whether stubs have already been loaded (to avoid double-loading).
32    stubs_loaded: std::sync::atomic::AtomicBool,
33    /// When true, run dead code detection at the end of analysis.
34    pub find_dead_code: bool,
35    /// Target PHP language version. `None` means "not configured"; resolved to
36    /// `PhpVersion::LATEST` when passed down to `StatementsAnalyzer`.
37    pub php_version: Option<PhpVersion>,
38    /// Additional stub files to parse before analysis (absolute paths).
39    pub stub_files: Vec<PathBuf>,
40    /// Additional stub directories to walk and parse before analysis (absolute paths).
41    pub stub_dirs: Vec<PathBuf>,
42    /// Salsa database for incremental Pass-1 memoization.
43    /// `MirDb` is `Send` but `!Sync` (thread-local query state); `Mutex`
44    /// provides the `Sync` bound rayon requires without needing `T: Sync`.
45    salsa: std::sync::Mutex<(MirDb, HashMap<Arc<str>, SourceFile>)>,
46}
47
48struct ParsedProjectFile {
49    file: Arc<str>,
50    source: Arc<str>,
51    parsed: ManuallyDrop<php_rs_parser::ParseResult<'static, 'static>>,
52    arena: ManuallyDrop<Box<bumpalo::Bump>>,
53}
54
55impl ParsedProjectFile {
56    fn new(file: Arc<str>, source: Arc<str>) -> Self {
57        let arena = Box::new(bumpalo::Bump::new());
58        let parsed = php_rs_parser::parse(&arena, &source);
59        // SAFETY: `parsed` borrows from `arena` and `source`, both owned by this
60        // struct and kept alive until `Drop`. `Drop` manually destroys `parsed`
61        // before releasing either owner, so the widened lifetimes never escape.
62        let parsed = unsafe {
63            std::mem::transmute::<
64                php_rs_parser::ParseResult<'_, '_>,
65                php_rs_parser::ParseResult<'static, 'static>,
66            >(parsed)
67        };
68        Self {
69            file,
70            source,
71            parsed: ManuallyDrop::new(parsed),
72            arena: ManuallyDrop::new(arena),
73        }
74    }
75
76    fn source(&self) -> &str {
77        self.source.as_ref()
78    }
79
80    fn parsed(&self) -> &php_rs_parser::ParseResult<'_, '_> {
81        &self.parsed
82    }
83}
84
85impl Drop for ParsedProjectFile {
86    fn drop(&mut self) {
87        unsafe {
88            ManuallyDrop::drop(&mut self.parsed);
89            ManuallyDrop::drop(&mut self.arena);
90        }
91    }
92}
93
94// SAFETY: after construction the parsed AST and source map are read-only. The
95// bump arena is never mutated again; it only owns backing storage for AST nodes
96// and is dropped after all parallel analysis has completed.
97unsafe impl Send for ParsedProjectFile {}
98unsafe impl Sync for ParsedProjectFile {}
99
100impl ProjectAnalyzer {
101    pub fn new() -> Self {
102        Self {
103            cache: None,
104            on_file_done: None,
105            psr4: None,
106            stubs_loaded: std::sync::atomic::AtomicBool::new(false),
107            find_dead_code: false,
108            php_version: None,
109            stub_files: Vec::new(),
110            stub_dirs: Vec::new(),
111            salsa: std::sync::Mutex::new((MirDb::default(), HashMap::new())),
112        }
113    }
114
115    /// Create a `ProjectAnalyzer` with a disk-backed cache stored under `cache_dir`.
116    pub fn with_cache(cache_dir: &Path) -> Self {
117        Self {
118            cache: Some(AnalysisCache::open(cache_dir)),
119            on_file_done: None,
120            psr4: None,
121            stubs_loaded: std::sync::atomic::AtomicBool::new(false),
122            find_dead_code: false,
123            php_version: None,
124            stub_files: Vec::new(),
125            stub_dirs: Vec::new(),
126            salsa: std::sync::Mutex::new((MirDb::default(), HashMap::new())),
127        }
128    }
129
130    /// Create a `ProjectAnalyzer` from a project root containing `composer.json`.
131    /// Returns the analyzer (with `psr4` set) and the `Psr4Map` so callers can
132    /// call `map.project_files()` / `map.vendor_files()`.
133    pub fn from_composer(
134        root: &Path,
135    ) -> Result<(Self, crate::composer::Psr4Map), crate::composer::ComposerError> {
136        let map = crate::composer::Psr4Map::from_composer(root)?;
137        let psr4 = Arc::new(map.clone());
138        let analyzer = Self {
139            cache: None,
140            on_file_done: None,
141            psr4: Some(psr4),
142            stubs_loaded: std::sync::atomic::AtomicBool::new(false),
143            find_dead_code: false,
144            php_version: None,
145            stub_files: Vec::new(),
146            stub_dirs: Vec::new(),
147            salsa: std::sync::Mutex::new((MirDb::default(), HashMap::new())),
148        };
149        Ok((analyzer, map))
150    }
151
152    /// Set the target PHP version.
153    pub fn with_php_version(mut self, version: PhpVersion) -> Self {
154        self.php_version = Some(version);
155        self
156    }
157
158    /// Resolve the configured PHP version, defaulting to `PhpVersion::LATEST`
159    /// when none has been set.
160    fn resolved_php_version(&self) -> PhpVersion {
161        self.php_version.unwrap_or(PhpVersion::LATEST)
162    }
163
164    fn type_exists(&self, fqcn: &str) -> bool {
165        let guard = self.salsa.lock().expect("salsa lock poisoned");
166        crate::db::type_exists_via_db(&guard.0, fqcn)
167    }
168
169    /// Internal: expose the salsa Mutex for unit tests that need a `&dyn MirDatabase`.
170    #[doc(hidden)]
171    pub fn salsa_db_for_test(&self) -> &std::sync::Mutex<(MirDb, HashMap<Arc<str>, SourceFile>)> {
172        &self.salsa
173    }
174
175    /// Look up the source location of a class member (method, property, or
176    /// class constant / enum case) by walking the inheritance chain through
177    /// the salsa db.  Returns `None` if no member with that name exists, or
178    /// if the member has no recorded location.
179    pub fn member_location(
180        &self,
181        fqcn: &str,
182        member_name: &str,
183    ) -> Option<mir_codebase::storage::Location> {
184        let guard = self.salsa.lock().expect("salsa lock poisoned");
185        crate::db::member_location_via_db(&guard.0, fqcn, member_name)
186    }
187
188    pub fn symbol_location(&self, symbol: &str) -> Option<mir_codebase::storage::Location> {
189        let guard = self.salsa.lock().expect("salsa lock poisoned");
190        let db = &guard.0;
191        db.lookup_class_node(symbol)
192            .filter(|n| n.active(db))
193            .and_then(|n| n.location(db))
194            .or_else(|| {
195                db.lookup_function_node(symbol)
196                    .filter(|n| n.active(db))
197                    .and_then(|n| n.location(db))
198            })
199    }
200
201    pub fn reference_locations(&self, symbol: &str) -> Vec<(Arc<str>, u32, u16, u16)> {
202        let guard = self.salsa.lock().expect("salsa lock poisoned");
203        guard.0.reference_locations(symbol)
204    }
205
206    /// Load PHP built-in stubs. Called automatically by `analyze` if not done yet.
207    /// Stubs are filtered against the configured target PHP version (or
208    /// `PhpVersion::LATEST` if none was set).
209    pub fn load_stubs(&self) {
210        if !self
211            .stubs_loaded
212            .swap(true, std::sync::atomic::Ordering::SeqCst)
213        {
214            let php_version = self.resolved_php_version();
215            crate::stubs::stub_files()
216                .par_iter()
217                .for_each(|(filename, content)| {
218                    let slice =
219                        crate::stubs::stub_slice_from_source(filename, content, Some(php_version));
220                    let mut guard = self.salsa.lock().expect("salsa lock poisoned");
221                    guard.0.ingest_stub_slice(&slice);
222                });
223
224            let mut guard = self.salsa.lock().expect("salsa lock poisoned");
225            for slice in crate::stubs::user_stub_slices(&self.stub_files, &self.stub_dirs) {
226                guard.0.ingest_stub_slice(&slice);
227            }
228        }
229    }
230
231    fn collect_and_ingest_source(&self, file: Arc<str>, src: &str) -> FileDefinitions {
232        let file_defs = {
233            let mut guard = self.salsa.lock().expect("salsa lock poisoned");
234            let (ref mut db, ref mut files) = *guard;
235            let salsa_file = match files.get(&file) {
236                Some(&sf) => {
237                    if sf.text(db).as_ref() != src {
238                        sf.set_text(db).to(Arc::from(src));
239                    }
240                    sf
241                }
242                None => {
243                    let sf = SourceFile::new(db, file.clone(), Arc::from(src));
244                    files.insert(file.clone(), sf);
245                    sf
246                }
247            };
248            collect_file_definitions(db, salsa_file)
249        };
250
251        {
252            let mut guard = self.salsa.lock().expect("salsa lock poisoned");
253            guard.0.ingest_stub_slice(&file_defs.slice);
254        }
255        file_defs
256    }
257
258    /// Run the full analysis pipeline on a set of file paths.
259    pub fn analyze(&self, paths: &[PathBuf]) -> AnalysisResult {
260        let mut all_issues = Vec::new();
261
262        // ---- Load PHP built-in stubs (before Pass 1 so user code can override)
263        self.load_stubs();
264
265        // ---- Pass 1: read files in parallel ----------------------------------
266        let parsed_files: Vec<ParsedProjectFile> = paths
267            .par_iter()
268            .filter_map(|path| match std::fs::read_to_string(path) {
269                Ok(src) => {
270                    let file = Arc::from(path.to_string_lossy().as_ref());
271                    Some(ParsedProjectFile::new(file, Arc::from(src)))
272                }
273                Err(e) => {
274                    eprintln!("Cannot read {}: {}", path.display(), e);
275                    None
276                }
277            })
278            .collect();
279
280        let file_data: Vec<(Arc<str>, Arc<str>)> = parsed_files
281            .iter()
282            .map(|parsed| (parsed.file.clone(), parsed.source.clone()))
283            .collect();
284
285        // ---- Pre-Pass-2 invalidation: evict dependents of changed files ------
286        if let Some(cache) = &self.cache {
287            let changed: Vec<String> = file_data
288                .par_iter()
289                .filter_map(|(f, src)| {
290                    let h = hash_content(src.as_ref());
291                    if cache.get(f, &h).is_none() {
292                        Some(f.to_string())
293                    } else {
294                        None
295                    }
296                })
297                .collect();
298            if !changed.is_empty() {
299                cache.evict_with_dependents(&changed);
300            }
301        }
302
303        // ---- Register Salsa source inputs for incremental follow-up calls ----
304        {
305            let mut guard = self.salsa.lock().expect("salsa lock poisoned");
306            let (ref mut db, ref mut files) = *guard;
307            for parsed in &parsed_files {
308                match files.get(parsed.file.as_ref()) {
309                    Some(&sf) => {
310                        if sf.text(db).as_ref() != parsed.source() {
311                            sf.set_text(db).to(parsed.source.clone());
312                        }
313                    }
314                    None => {
315                        let sf = SourceFile::new(db, parsed.file.clone(), parsed.source.clone());
316                        files.insert(parsed.file.clone(), sf);
317                    }
318                }
319            }
320        }
321
322        // ---- Pass 1: definition collection from the already-parsed AST -------
323        let file_defs: Vec<FileDefinitions> = parsed_files
324            .par_iter()
325            .map(|parsed| {
326                let parse_result = parsed.parsed();
327                let mut all_issues: Vec<Issue> = parse_result
328                    .errors
329                    .iter()
330                    .map(|err| {
331                        Issue::new(
332                            mir_issues::IssueKind::ParseError {
333                                message: err.to_string(),
334                            },
335                            mir_issues::Location {
336                                file: parsed.file.clone(),
337                                line: 1,
338                                line_end: 1,
339                                col_start: 0,
340                                col_end: 0,
341                            },
342                        )
343                    })
344                    .collect();
345                let collector = crate::collector::DefinitionCollector::new_for_slice(
346                    parsed.file.clone(),
347                    parsed.source(),
348                    &parse_result.source_map,
349                );
350                let (slice, collector_issues) = collector.collect_slice(&parse_result.program);
351                all_issues.extend(collector_issues);
352                FileDefinitions {
353                    slice: Arc::new(slice),
354                    issues: Arc::new(all_issues),
355                }
356            })
357            .collect();
358
359        let mut files_with_parse_errors: std::collections::HashSet<Arc<str>> =
360            std::collections::HashSet::new();
361        let mut files_needing_inference: std::collections::HashSet<Arc<str>> =
362            std::collections::HashSet::new();
363        {
364            let mut guard = self.salsa.lock().expect("salsa lock poisoned");
365            let (ref mut db, _) = *guard;
366            for defs in file_defs {
367                for issue in defs.issues.iter() {
368                    if matches!(issue.kind, mir_issues::IssueKind::ParseError { .. }) {
369                        files_with_parse_errors.insert(issue.location.file.clone());
370                    }
371                }
372                if stub_slice_needs_inference(&defs.slice) {
373                    if let Some(file) = defs.slice.file.as_ref() {
374                        files_needing_inference.insert(file.clone());
375                    }
376                }
377                db.ingest_stub_slice(&defs.slice);
378                all_issues.extend(Arc::unwrap_or_clone(defs.issues));
379            }
380        }
381
382        // ---- Lazy-load unknown classes via PSR-4 (issue #50) ----------------
383        if let Some(psr4) = &self.psr4 {
384            self.lazy_load_missing_classes(psr4.clone(), &mut all_issues);
385        }
386
387        // ---- Resolve @psalm-import-type declarations now that all Pass 1
388        // classes (including their `type_aliases`) are populated.
389        // ---- Build reverse dep graph and persist it for the next run ---------
390        if let Some(cache) = &self.cache {
391            let db_snapshot = {
392                let guard = self.salsa.lock().expect("salsa lock poisoned");
393                guard.0.clone()
394            };
395            let rev = build_reverse_deps(&db_snapshot);
396            cache.set_reverse_deps(rev);
397        }
398
399        // ---- Class-level checks (M11) ----------------------------------------
400        // `class_db` is scoped tightly: it must be dropped before the priming
401        // sweep's `commit_inferred_return_types` call below, otherwise the
402        // setter's `Storage::cancel_others` blocks waiting for this clone's
403        // Arc to drop (strong-count==1 invariant).
404        let analyzed_file_set: std::collections::HashSet<std::sync::Arc<str>> =
405            file_data.iter().map(|(f, _)| f.clone()).collect();
406        {
407            let class_db = {
408                let guard = self.salsa.lock().expect("salsa lock poisoned");
409                guard.0.clone()
410            };
411            let class_issues =
412                crate::class::ClassAnalyzer::with_files(&class_db, analyzed_file_set, &file_data)
413                    .analyze_all();
414            all_issues.extend(class_issues);
415        }
416
417        // ---- S5-PR10b: clone the salsa db once per parallel sweep so each
418        // rayon worker gets its own clone (Salsa databases are `Send` but
419        // `!Sync`; cloning shares the underlying memoization storage).
420        let db_priming = {
421            let guard = self.salsa.lock().expect("salsa lock poisoned");
422            guard.0.clone()
423        };
424
425        // ---- Pass 2 priming: populate inferred_return_type for all functions  --
426        // Run a first inference-only sweep so that cross-file inferred return
427        // types are available before the issue-emitting pass below (G6).
428        //
429        // Inferred types are also collected into a thread-safe buffer here and
430        // committed to the Salsa db serially after the sweep returns.  Writing
431        // setters from inside `for_each_with` would deadlock against
432        // `Storage::cancel_others` (which waits for sibling worker clones to
433        // drop); the post-sweep commit runs against the canonical db with
434        // strong-count==1.  See `crate::db::InferredReturnTypes`.
435        let inferred_buffer = crate::db::InferredReturnTypes::new();
436        parsed_files
437            .par_iter()
438            .filter(|parsed| {
439                !files_with_parse_errors.contains(&parsed.file)
440                    && files_needing_inference.contains(&parsed.file)
441            })
442            .for_each_with(db_priming, |db, parsed| {
443                let driver = Pass2Driver::new_inference_only(
444                    &*db as &dyn MirDatabase,
445                    self.resolved_php_version(),
446                )
447                .with_inferred_buffer(&inferred_buffer);
448                let parse_result = parsed.parsed();
449                driver.analyze_bodies(
450                    &parse_result.program,
451                    parsed.file.clone(),
452                    parsed.source(),
453                    &parse_result.source_map,
454                );
455            });
456
457        // Sweep clones are dropped — commit inferred types into the Salsa db.
458        {
459            let mut guard = self.salsa.lock().expect("salsa lock poisoned");
460            guard.0.commit_inferred_return_types(&inferred_buffer);
461        }
462
463        let db_main = {
464            let guard = self.salsa.lock().expect("salsa lock poisoned");
465            guard.0.clone()
466        };
467
468        // ---- Pass 2: analyze function/method bodies in parallel (M14) --------
469        let pass2_results: Vec<(Vec<Issue>, Vec<crate::symbol::ResolvedSymbol>)> = parsed_files
470            .par_iter()
471            .filter(|parsed| !files_with_parse_errors.contains(&parsed.file))
472            .map_with(db_main, |db, parsed| {
473                let driver =
474                    Pass2Driver::new(&*db as &dyn MirDatabase, self.resolved_php_version());
475                let result = if let Some(cache) = &self.cache {
476                    let h = hash_content(parsed.source());
477                    if let Some((cached_issues, ref_locs)) = cache.get(&parsed.file, &h) {
478                        db.replay_reference_locations(parsed.file.clone(), &ref_locs);
479                        (cached_issues, Vec::new())
480                    } else {
481                        let parse_result = parsed.parsed();
482                        let (issues, symbols) = driver.analyze_bodies(
483                            &parse_result.program,
484                            parsed.file.clone(),
485                            parsed.source(),
486                            &parse_result.source_map,
487                        );
488                        let ref_locs = extract_reference_locations(&*db, &parsed.file);
489                        cache.put(&parsed.file, h, issues.clone(), ref_locs);
490                        (issues, symbols)
491                    }
492                } else {
493                    let parse_result = parsed.parsed();
494                    driver.analyze_bodies(
495                        &parse_result.program,
496                        parsed.file.clone(),
497                        parsed.source(),
498                        &parse_result.source_map,
499                    )
500                };
501                if let Some(cb) = &self.on_file_done {
502                    cb();
503                }
504                result
505            })
506            .collect();
507
508        let mut all_symbols = Vec::new();
509        for (issues, symbols) in pass2_results {
510            all_issues.extend(issues);
511            all_symbols.extend(symbols);
512        }
513
514        // ---- Post-Pass-2 lazy loading: FQCNs used without `use` imports ------
515        // FQCNs in function/method bodies aren't visible until Pass 2 runs, so
516        // the pre-Pass-2 lazy load misses them.  We collect UndefinedClass names,
517        // resolve them via PSR-4, load those files, re-finalize, then re-analyze
518        // only the affected files to clear the false positives.
519        if let Some(psr4) = &self.psr4 {
520            self.lazy_load_from_body_issues(
521                psr4.clone(),
522                &file_data,
523                &files_with_parse_errors,
524                &mut all_issues,
525                &mut all_symbols,
526            );
527        }
528
529        // Persist cache hits/misses to disk
530        if let Some(cache) = &self.cache {
531            cache.flush();
532        }
533
534        // ---- Compact the reference index ------------------------------------
535        // ---- Dead-code detection (M18) --------------------------------------
536        if self.find_dead_code {
537            let salsa = self.salsa.lock().unwrap();
538            let dead_code_issues = crate::dead_code::DeadCodeAnalyzer::new(&salsa.0).analyze();
539            drop(salsa);
540            all_issues.extend(dead_code_issues);
541        }
542
543        AnalysisResult::build(all_issues, std::collections::HashMap::new(), all_symbols)
544    }
545
546    fn lazy_load_missing_classes(
547        &self,
548        psr4: Arc<crate::composer::Psr4Map>,
549        all_issues: &mut Vec<Issue>,
550    ) {
551        use std::collections::HashSet;
552
553        let max_depth = 10;
554        let mut loaded: HashSet<String> = HashSet::new();
555
556        for _ in 0..max_depth {
557            let mut to_load: Vec<(String, PathBuf)> = Vec::new();
558
559            let mut try_queue = |fqcn: &str| {
560                if !self.type_exists(fqcn) && !loaded.contains(fqcn) {
561                    if let Some(path) = psr4.resolve(fqcn) {
562                        to_load.push((fqcn.to_string(), path));
563                    }
564                }
565            };
566
567            // Drive the inheritance scan from already-ingested `ClassNode`s.
568            let mut inheritance_candidates = Vec::new();
569            let import_candidates = {
570                let guard = self.salsa.lock().expect("salsa lock poisoned");
571                let db = &guard.0;
572                for fqcn in db.active_class_node_fqcns() {
573                    let Some(node) = db.lookup_class_node(&fqcn) else {
574                        continue;
575                    };
576                    if node.is_interface(db) {
577                        for parent in node.extends(db).iter() {
578                            inheritance_candidates.push(parent.to_string());
579                        }
580                    } else if node.is_enum(db) {
581                        for iface in node.interfaces(db).iter() {
582                            inheritance_candidates.push(iface.to_string());
583                        }
584                    } else if node.is_trait(db) {
585                        for used in node.traits(db).iter() {
586                            inheritance_candidates.push(used.to_string());
587                        }
588                    } else {
589                        if let Some(parent) = node.parent(db) {
590                            inheritance_candidates.push(parent.to_string());
591                        }
592                        for iface in node.interfaces(db).iter() {
593                            inheritance_candidates.push(iface.to_string());
594                        }
595                    }
596                }
597                db.file_import_snapshots()
598                    .into_iter()
599                    .flat_map(|(_, imports)| imports.into_values())
600                    .collect::<Vec<_>>()
601            };
602            for fqcn in inheritance_candidates {
603                try_queue(&fqcn);
604            }
605
606            // Also lazy-load any type referenced via `use` imports that isn't yet
607            // in the codebase (covers enums and classes used only in type hints or
608            // static calls, which never appear in the inheritance scan above).
609            for fqcn in import_candidates {
610                try_queue(&fqcn);
611            }
612
613            if to_load.is_empty() {
614                break;
615            }
616
617            for (fqcn, path) in to_load {
618                loaded.insert(fqcn);
619                if let Ok(src) = std::fs::read_to_string(&path) {
620                    let file: Arc<str> = Arc::from(path.to_string_lossy().as_ref());
621                    let defs = self.collect_and_ingest_source(file, &src);
622                    all_issues.extend(Arc::unwrap_or_clone(defs.issues));
623                }
624            }
625        }
626    }
627
628    fn lazy_load_from_body_issues(
629        &self,
630        psr4: Arc<crate::composer::Psr4Map>,
631        file_data: &[(Arc<str>, Arc<str>)],
632        files_with_parse_errors: &HashSet<Arc<str>>,
633        all_issues: &mut Vec<Issue>,
634        all_symbols: &mut Vec<crate::symbol::ResolvedSymbol>,
635    ) {
636        use mir_issues::IssueKind;
637
638        let max_depth = 5;
639        let mut loaded: HashSet<String> = HashSet::new();
640
641        for _ in 0..max_depth {
642            // Deduplicate by FQCN: HashMap prevents loading the same class twice
643            // when multiple files share the same UndefinedClass diagnostic.
644            let mut to_load: HashMap<String, PathBuf> = HashMap::new();
645
646            for issue in all_issues.iter() {
647                if let IssueKind::UndefinedClass { name } = &issue.kind {
648                    if !self.type_exists(name) && !loaded.contains(name) {
649                        if let Some(path) = psr4.resolve(name) {
650                            to_load.entry(name.clone()).or_insert(path);
651                        }
652                    }
653                }
654            }
655
656            if to_load.is_empty() {
657                break;
658            }
659
660            loaded.extend(to_load.keys().cloned());
661
662            for path in to_load.values() {
663                if let Ok(src) = std::fs::read_to_string(path) {
664                    let file: Arc<str> = Arc::from(path.to_string_lossy().as_ref());
665                    let _ = self.collect_and_ingest_source(file, &src);
666                }
667            }
668
669            // Load inheritance deps of newly-added types and finalize.
670            // This covers e.g. `class Helper extends \App\Base` where Base is
671            // also not in the initial file set.
672            self.lazy_load_missing_classes(psr4.clone(), all_issues);
673
674            // Re-analyze every file that has an UndefinedClass for a type now
675            // present in the codebase — covers both direct and transitive loads.
676            let files_to_reanalyze: HashSet<Arc<str>> = all_issues
677                .iter()
678                .filter_map(|i| {
679                    if let IssueKind::UndefinedClass { name } = &i.kind {
680                        if self.type_exists(name) {
681                            return Some(i.location.file.clone());
682                        }
683                    }
684                    None
685                })
686                .collect();
687
688            if files_to_reanalyze.is_empty() {
689                break;
690            }
691
692            all_issues.retain(|i| !files_to_reanalyze.contains(&i.location.file));
693            all_symbols.retain(|s| !files_to_reanalyze.contains(&s.file));
694
695            let db_reanalysis = {
696                let guard = self.salsa.lock().expect("salsa lock poisoned");
697                guard.0.clone()
698            };
699
700            // Lazy-loaded files re-run Pass 2 to pick up the just-loaded
701            // definitions; collect inferred return types for a serial commit
702            // after the parallel sweep returns (same buffer-and-commit
703            // pattern as the main batch priming sweep).
704            let inferred_buffer = crate::db::InferredReturnTypes::new();
705            let reanalysis: Vec<(Vec<Issue>, Vec<crate::symbol::ResolvedSymbol>)> = file_data
706                .par_iter()
707                .filter(|(f, _)| {
708                    !files_with_parse_errors.contains(f) && files_to_reanalyze.contains(f)
709                })
710                .map_with(db_reanalysis, |db, (file, src)| {
711                    let driver =
712                        Pass2Driver::new(&*db as &dyn MirDatabase, self.resolved_php_version())
713                            .with_inferred_buffer(&inferred_buffer);
714                    let arena = bumpalo::Bump::new();
715                    let parsed = php_rs_parser::parse(&arena, src);
716                    driver.analyze_bodies(&parsed.program, file.clone(), src, &parsed.source_map)
717                })
718                .collect();
719
720            {
721                let mut guard = self.salsa.lock().expect("salsa lock poisoned");
722                guard.0.commit_inferred_return_types(&inferred_buffer);
723            }
724
725            for (issues, symbols) in reanalysis {
726                all_issues.extend(issues);
727                all_symbols.extend(symbols);
728            }
729        }
730    }
731
732    /// Re-analyze a single file within the existing codebase.
733    ///
734    /// This is the incremental analysis API for LSP:
735    /// 1. Removes old definitions from this file
736    /// 2. Re-runs Pass 1 (definition collection) on the new content
737    /// 3. Resolves any newly-collected `@psalm-import-type` declarations
738    /// 4. Re-runs Pass 2 (body analysis) on this file
739    /// 5. Returns the analysis result for this file only
740    pub fn re_analyze_file(&self, file_path: &str, new_content: &str) -> AnalysisResult {
741        // Fast path: content unchanged and cache has a valid entry — skip full re-analysis.
742        if let Some(cache) = &self.cache {
743            let h = hash_content(new_content);
744            if let Some((issues, ref_locs)) = cache.get(file_path, &h) {
745                let file: Arc<str> = Arc::from(file_path);
746                let guard = self.salsa.lock().expect("salsa lock poisoned");
747                guard.0.replay_reference_locations(file, &ref_locs);
748                return AnalysisResult::build(issues, HashMap::new(), Vec::new());
749            }
750        }
751
752        let file: Arc<str> = Arc::from(file_path);
753
754        {
755            let mut guard = self.salsa.lock().expect("salsa lock poisoned");
756            let (ref mut db, _) = *guard;
757            db.remove_file_definitions(file_path);
758        }
759
760        // --- Salsa-backed Pass 1: memoized parse + definition collection ------
761        let file_defs = {
762            let mut guard = self.salsa.lock().expect("salsa lock poisoned");
763            let (ref mut db, ref mut files) = *guard;
764            let salsa_file = match files.get(&file) {
765                Some(&sf) => {
766                    sf.set_text(db).to(Arc::from(new_content));
767                    sf
768                }
769                None => {
770                    let sf = SourceFile::new(db, file.clone(), Arc::from(new_content));
771                    files.insert(file.clone(), sf);
772                    sf
773                }
774            };
775            collect_file_definitions(db, salsa_file)
776        };
777
778        let mut all_issues: Vec<Issue> = Arc::unwrap_or_clone(file_defs.issues.clone());
779
780        // --- S2 + Pass 2: hold the Salsa lock for ClassNode upserts and body
781        // analysis so the db reference is live during Pass 2 (S5).
782        let symbols = {
783            let mut guard = self.salsa.lock().expect("salsa lock poisoned");
784            let (ref mut db, _) = *guard;
785
786            db.ingest_stub_slice(&file_defs.slice);
787
788            // Resolve any newly-collected @psalm-import-type declarations so
789            // Pass 2 reads the imported aliases out of `type_aliases`.
790            // Re-parse in the arena so Pass 2 can walk the AST.
791            let arena = bumpalo::Bump::new();
792            let parsed = php_rs_parser::parse(&arena, new_content);
793
794            if parsed.errors.is_empty() {
795                // Priming sweep: populate inferred_return_type for this file's functions
796                // before the issue-emitting pass so within-file cross-function calls see
797                // the correct inferred return type rather than None.  The buffer +
798                // commit pattern is overkill for the single-threaded LSP path but kept
799                // for symmetry with the parallel batch path (and so the analyzer's
800                // Salsa node reads see the inferred values).
801                let inferred_buffer = crate::db::InferredReturnTypes::new();
802                {
803                    let db_ref: &dyn MirDatabase = db;
804                    Pass2Driver::new_inference_only(db_ref, self.resolved_php_version())
805                        .with_inferred_buffer(&inferred_buffer)
806                        .analyze_bodies(
807                            &parsed.program,
808                            file.clone(),
809                            new_content,
810                            &parsed.source_map,
811                        );
812                }
813                db.commit_inferred_return_types(&inferred_buffer);
814
815                let db_ref: &dyn MirDatabase = db;
816                let driver = Pass2Driver::new(db_ref, self.resolved_php_version());
817                let (body_issues, symbols) = driver.analyze_bodies(
818                    &parsed.program,
819                    file.clone(),
820                    new_content,
821                    &parsed.source_map,
822                );
823                all_issues.extend(body_issues);
824                symbols
825            } else {
826                Vec::new()
827            }
828        };
829
830        if let Some(cache) = &self.cache {
831            let h = hash_content(new_content);
832            cache.evict_with_dependents(&[file_path.to_string()]);
833            let guard = self.salsa.lock().expect("salsa lock poisoned");
834            let ref_locs = extract_reference_locations(&guard.0, &file);
835            cache.put(file_path, h, all_issues.clone(), ref_locs);
836        }
837
838        AnalysisResult::build(all_issues, HashMap::new(), symbols)
839    }
840
841    /// Analyze a PHP source string without a real file path.
842    /// Useful for tests and LSP single-file mode.
843    pub fn analyze_source(source: &str) -> AnalysisResult {
844        let analyzer = ProjectAnalyzer::new();
845        let file: Arc<str> = Arc::from("<source>");
846        let mut db = MirDb::default();
847        for slice in crate::stubs::builtin_stub_slices_for_version(analyzer.resolved_php_version())
848        {
849            db.ingest_stub_slice(&slice);
850        }
851        let salsa_file = SourceFile::new(&db, file.clone(), Arc::from(source));
852        let file_defs = collect_file_definitions(&db, salsa_file);
853        db.ingest_stub_slice(&file_defs.slice);
854        let mut all_issues = Arc::unwrap_or_clone(file_defs.issues);
855        if all_issues
856            .iter()
857            .any(|issue| matches!(issue.kind, mir_issues::IssueKind::ParseError { .. }))
858        {
859            return AnalysisResult::build(all_issues, std::collections::HashMap::new(), Vec::new());
860        }
861        let mut type_envs = std::collections::HashMap::new();
862        let mut all_symbols = Vec::new();
863        let arena = bumpalo::Bump::new();
864        let result = php_rs_parser::parse(&arena, source);
865
866        // Priming sweep: populate inferred_return_type on FunctionNode /
867        // MethodNode before the issue-emitting pass so call sites see the
868        // inferred values.  Single-threaded — no buffer / commit dance
869        // needed in principle, but we use the same pattern for symmetry
870        // with the parallel batch path.
871        let inferred_buffer = crate::db::InferredReturnTypes::new();
872        Pass2Driver::new_inference_only(&db, analyzer.resolved_php_version())
873            .with_inferred_buffer(&inferred_buffer)
874            .analyze_bodies(&result.program, file.clone(), source, &result.source_map);
875        db.commit_inferred_return_types(&inferred_buffer);
876
877        let driver = Pass2Driver::new(&db, analyzer.resolved_php_version());
878        all_issues.extend(driver.analyze_bodies_typed(
879            &result.program,
880            file.clone(),
881            source,
882            &result.source_map,
883            &mut type_envs,
884            &mut all_symbols,
885        ));
886        AnalysisResult::build(all_issues, type_envs, all_symbols)
887    }
888
889    /// Discover all `.php` files under a directory, recursively.
890    pub fn discover_files(root: &Path) -> Vec<PathBuf> {
891        if root.is_file() {
892            return vec![root.to_path_buf()];
893        }
894        let mut files = Vec::new();
895        collect_php_files(root, &mut files);
896        files
897    }
898
899    /// Pass 1 only: collect type definitions from `paths` into the codebase without
900    /// analyzing method bodies or emitting issues. Used to load vendor types.
901    pub fn collect_types_only(&self, paths: &[PathBuf]) {
902        let file_data: Vec<(Arc<str>, Arc<str>)> = paths
903            .par_iter()
904            .filter_map(|path| {
905                let src = std::fs::read_to_string(path).ok()?;
906                Some((
907                    Arc::from(path.to_string_lossy().as_ref()),
908                    Arc::<str>::from(src),
909                ))
910            })
911            .collect();
912
913        let source_files: Vec<SourceFile> = {
914            let mut guard = self.salsa.lock().expect("salsa lock poisoned");
915            let (ref mut db, ref mut files) = *guard;
916            file_data
917                .iter()
918                .map(|(file, src)| match files.get(file) {
919                    Some(&sf) => {
920                        if sf.text(db).as_ref() != src.as_ref() {
921                            sf.set_text(db).to(src.clone());
922                        }
923                        sf
924                    }
925                    None => {
926                        let sf = SourceFile::new(db, file.clone(), src.clone());
927                        files.insert(file.clone(), sf);
928                        sf
929                    }
930                })
931                .collect()
932        };
933
934        let db_pass1 = {
935            let guard = self.salsa.lock().expect("salsa lock poisoned");
936            guard.0.clone()
937        };
938        let file_defs: Vec<FileDefinitions> = source_files
939            .par_iter()
940            .map_with(db_pass1, |db, salsa_file| {
941                collect_file_definitions(&*db, *salsa_file)
942            })
943            .collect();
944
945        let mut guard = self.salsa.lock().expect("salsa lock poisoned");
946        let (ref mut db, _) = *guard;
947        for defs in file_defs {
948            db.ingest_stub_slice(&defs.slice);
949        }
950    }
951}
952
953impl Default for ProjectAnalyzer {
954    fn default() -> Self {
955        Self::new()
956    }
957}
958
959// ---------------------------------------------------------------------------
960
961fn stub_slice_needs_inference(slice: &mir_codebase::storage::StubSlice) -> bool {
962    slice
963        .functions
964        .iter()
965        .any(|func| func.return_type.is_none())
966        || slice.classes.iter().any(|class| {
967            class
968                .own_methods
969                .values()
970                .any(|method| !method.is_abstract && method.return_type.is_none())
971        })
972        || slice.traits.iter().any(|tr| {
973            tr.own_methods
974                .values()
975                .any(|method| !method.is_abstract && method.return_type.is_none())
976        })
977        || slice.enums.iter().any(|en| {
978            en.own_methods
979                .values()
980                .any(|method| !method.is_abstract && method.return_type.is_none())
981        })
982}
983
984// ---------------------------------------------------------------------------
985
986pub(crate) fn collect_php_files(dir: &Path, out: &mut Vec<PathBuf>) {
987    if let Ok(entries) = std::fs::read_dir(dir) {
988        for entry in entries.flatten() {
989            if entry.file_type().map(|ft| ft.is_symlink()).unwrap_or(false) {
990                continue;
991            }
992            let path = entry.path();
993            if path.is_dir() {
994                let name = path.file_name().and_then(|n| n.to_str()).unwrap_or("");
995                if matches!(
996                    name,
997                    "vendor" | ".git" | "node_modules" | ".cache" | ".pnpm-store"
998                ) {
999                    continue;
1000                }
1001                collect_php_files(&path, out);
1002            } else if path.extension().and_then(|e| e.to_str()) == Some("php") {
1003                out.push(path);
1004            }
1005        }
1006    }
1007}
1008
1009// ---------------------------------------------------------------------------
1010// build_reverse_deps
1011// ---------------------------------------------------------------------------
1012
1013fn build_reverse_deps(db: &dyn crate::db::MirDatabase) -> HashMap<String, HashSet<String>> {
1014    let mut reverse: HashMap<String, HashSet<String>> = HashMap::new();
1015
1016    let mut add_edge = |symbol: &str, dependent_file: &str| {
1017        if let Some(defining_file) = db.symbol_defining_file(symbol) {
1018            let def = defining_file.as_ref().to_string();
1019            if def != dependent_file {
1020                reverse
1021                    .entry(def)
1022                    .or_default()
1023                    .insert(dependent_file.to_string());
1024            }
1025        }
1026    };
1027
1028    for (file, imports) in db.file_import_snapshots() {
1029        let file = file.as_ref().to_string();
1030        for fqcn in imports.values() {
1031            add_edge(fqcn, &file);
1032        }
1033    }
1034
1035    for fqcn in db.active_class_node_fqcns() {
1036        // Only true classes contribute class-direction edges in this loop.
1037        // Interface / trait / enum edges are not currently emitted here —
1038        // this function only ever read classes.
1039        let kind = match crate::db::class_kind_via_db(db, fqcn.as_ref()) {
1040            Some(k) if !k.is_interface && !k.is_trait && !k.is_enum => k,
1041            _ => continue,
1042        };
1043        let _ = kind;
1044        let Some(file) = db
1045            .symbol_defining_file(fqcn.as_ref())
1046            .map(|f| f.as_ref().to_string())
1047        else {
1048            continue;
1049        };
1050
1051        let Some(node) = db.lookup_class_node(fqcn.as_ref()) else {
1052            continue;
1053        };
1054        if let Some(parent) = node.parent(db) {
1055            add_edge(parent.as_ref(), &file);
1056        }
1057        for iface in node.interfaces(db).iter() {
1058            add_edge(iface.as_ref(), &file);
1059        }
1060        for tr in node.traits(db).iter() {
1061            add_edge(tr.as_ref(), &file);
1062        }
1063    }
1064
1065    reverse
1066}
1067
1068// ---------------------------------------------------------------------------
1069
1070fn extract_reference_locations(
1071    db: &dyn crate::db::MirDatabase,
1072    file: &Arc<str>,
1073) -> Vec<(String, u32, u16, u16)> {
1074    db.extract_file_reference_locations(file.as_ref())
1075        .into_iter()
1076        .map(|(sym, line, col_start, col_end)| (sym.to_string(), line, col_start, col_end))
1077        .collect()
1078}
1079
1080// ---------------------------------------------------------------------------
1081// AnalysisResult
1082// ---------------------------------------------------------------------------
1083
1084pub struct AnalysisResult {
1085    pub issues: Vec<Issue>,
1086    pub type_envs: std::collections::HashMap<crate::type_env::ScopeId, crate::type_env::TypeEnv>,
1087    /// Per-expression resolved symbols from Pass 2, sorted by file path.
1088    pub symbols: Vec<crate::symbol::ResolvedSymbol>,
1089    /// Maps each file path to the contiguous range within `symbols` that belongs
1090    /// to it. Built once after analysis; allows `symbol_at` to scan only the
1091    /// relevant file's slice rather than the entire codebase-wide vector.
1092    symbols_by_file: HashMap<Arc<str>, std::ops::Range<usize>>,
1093}
1094
1095impl AnalysisResult {
1096    fn build(
1097        issues: Vec<Issue>,
1098        type_envs: std::collections::HashMap<crate::type_env::ScopeId, crate::type_env::TypeEnv>,
1099        mut symbols: Vec<crate::symbol::ResolvedSymbol>,
1100    ) -> Self {
1101        symbols.sort_unstable_by(|a, b| a.file.as_ref().cmp(b.file.as_ref()));
1102        let mut symbols_by_file: HashMap<Arc<str>, std::ops::Range<usize>> = HashMap::new();
1103        let mut i = 0;
1104        while i < symbols.len() {
1105            let file = Arc::clone(&symbols[i].file);
1106            let start = i;
1107            while i < symbols.len() && symbols[i].file == file {
1108                i += 1;
1109            }
1110            symbols_by_file.insert(file, start..i);
1111        }
1112        Self {
1113            issues,
1114            type_envs,
1115            symbols,
1116            symbols_by_file,
1117        }
1118    }
1119}
1120
1121impl AnalysisResult {
1122    pub fn error_count(&self) -> usize {
1123        self.issues
1124            .iter()
1125            .filter(|i| i.severity == mir_issues::Severity::Error)
1126            .count()
1127    }
1128
1129    pub fn warning_count(&self) -> usize {
1130        self.issues
1131            .iter()
1132            .filter(|i| i.severity == mir_issues::Severity::Warning)
1133            .count()
1134    }
1135
1136    /// Group issues by source file.
1137    pub fn issues_by_file(&self) -> HashMap<std::sync::Arc<str>, Vec<&Issue>> {
1138        let mut map: HashMap<std::sync::Arc<str>, Vec<&Issue>> = HashMap::new();
1139        for issue in &self.issues {
1140            map.entry(issue.location.file.clone())
1141                .or_default()
1142                .push(issue);
1143        }
1144        map
1145    }
1146
1147    /// Return the innermost resolved symbol whose span contains `byte_offset`
1148    /// in `file`, or `None` if no symbol was recorded at that position.
1149    pub fn symbol_at(
1150        &self,
1151        file: &str,
1152        byte_offset: u32,
1153    ) -> Option<&crate::symbol::ResolvedSymbol> {
1154        let range = self.symbols_by_file.get(file)?;
1155        self.symbols[range.clone()]
1156            .iter()
1157            .filter(|s| s.span.start <= byte_offset && byte_offset < s.span.end)
1158            .min_by_key(|s| s.span.end - s.span.start)
1159    }
1160}