Skip to main content

mir_analyzer/
project.rs

1/// Project-level orchestration: file discovery, pass 1, pass 2.
2use std::path::{Path, PathBuf};
3use std::sync::Arc;
4
5use rayon::prelude::*;
6
7use std::collections::{HashMap, HashSet};
8
9use crate::cache::{hash_content, AnalysisCache};
10use crate::db::{collect_file_definitions, MirDatabase, MirDb, SourceFile};
11use crate::pass2::Pass2Driver;
12use crate::php_version::PhpVersion;
13use mir_codebase::Codebase;
14use mir_issues::Issue;
15use salsa::Setter as _;
16
17use crate::collector::DefinitionCollector;
18
19// Re-exports for downstream callers in this crate.
20pub use crate::pass2::merge_return_types;
21
22// ---------------------------------------------------------------------------
23// ProjectAnalyzer
24// ---------------------------------------------------------------------------
25
26pub struct ProjectAnalyzer {
27    pub codebase: Arc<Codebase>,
28    /// Optional cache — when `Some`, Pass 2 results are read/written per file.
29    pub cache: Option<AnalysisCache>,
30    /// Called once after each file completes Pass 2 (used for progress reporting).
31    pub on_file_done: Option<Arc<dyn Fn() + Send + Sync>>,
32    /// PSR-4 autoloader mapping from composer.json, if available.
33    pub psr4: Option<Arc<crate::composer::Psr4Map>>,
34    /// Whether stubs have already been loaded (to avoid double-loading).
35    stubs_loaded: std::sync::atomic::AtomicBool,
36    /// When true, run dead code detection at the end of analysis.
37    pub find_dead_code: bool,
38    /// Target PHP language version. `None` means "not configured"; resolved to
39    /// `PhpVersion::LATEST` when passed down to `StatementsAnalyzer`.
40    pub php_version: Option<PhpVersion>,
41    /// Additional stub files to parse before analysis (absolute paths).
42    pub stub_files: Vec<PathBuf>,
43    /// Additional stub directories to walk and parse before analysis (absolute paths).
44    pub stub_dirs: Vec<PathBuf>,
45    /// Salsa database for incremental Pass-1 memoization.
46    /// `MirDb` is `Send` but `!Sync` (thread-local query state); `Mutex`
47    /// provides the `Sync` bound rayon requires without needing `T: Sync`.
48    salsa: std::sync::Mutex<(MirDb, HashMap<Arc<str>, SourceFile>)>,
49}
50
51impl ProjectAnalyzer {
52    pub fn new() -> Self {
53        Self {
54            codebase: Arc::new(Codebase::new()),
55            cache: None,
56            on_file_done: None,
57            psr4: None,
58            stubs_loaded: std::sync::atomic::AtomicBool::new(false),
59            find_dead_code: false,
60            php_version: None,
61            stub_files: Vec::new(),
62            stub_dirs: Vec::new(),
63            salsa: std::sync::Mutex::new((MirDb::default(), HashMap::new())),
64        }
65    }
66
67    /// Create a `ProjectAnalyzer` with a disk-backed cache stored under `cache_dir`.
68    pub fn with_cache(cache_dir: &Path) -> Self {
69        Self {
70            codebase: Arc::new(Codebase::new()),
71            cache: Some(AnalysisCache::open(cache_dir)),
72            on_file_done: None,
73            psr4: None,
74            stubs_loaded: std::sync::atomic::AtomicBool::new(false),
75            find_dead_code: false,
76            php_version: None,
77            stub_files: Vec::new(),
78            stub_dirs: Vec::new(),
79            salsa: std::sync::Mutex::new((MirDb::default(), HashMap::new())),
80        }
81    }
82
83    /// Create a `ProjectAnalyzer` from a project root containing `composer.json`.
84    /// Returns the analyzer (with `psr4` set) and the `Psr4Map` so callers can
85    /// call `map.project_files()` / `map.vendor_files()`.
86    pub fn from_composer(
87        root: &Path,
88    ) -> Result<(Self, crate::composer::Psr4Map), crate::composer::ComposerError> {
89        let map = crate::composer::Psr4Map::from_composer(root)?;
90        let psr4 = Arc::new(map.clone());
91        let analyzer = Self {
92            codebase: Arc::new(Codebase::new()),
93            cache: None,
94            on_file_done: None,
95            psr4: Some(psr4),
96            stubs_loaded: std::sync::atomic::AtomicBool::new(false),
97            find_dead_code: false,
98            php_version: None,
99            stub_files: Vec::new(),
100            stub_dirs: Vec::new(),
101            salsa: std::sync::Mutex::new((MirDb::default(), HashMap::new())),
102        };
103        Ok((analyzer, map))
104    }
105
106    /// Set the target PHP version.
107    pub fn with_php_version(mut self, version: PhpVersion) -> Self {
108        self.php_version = Some(version);
109        self
110    }
111
112    /// Resolve the configured PHP version, defaulting to `PhpVersion::LATEST`
113    /// when none has been set.
114    fn resolved_php_version(&self) -> PhpVersion {
115        self.php_version.unwrap_or(PhpVersion::LATEST)
116    }
117
118    /// Expose codebase for external use (e.g., pre-loading stubs from CLI).
119    pub fn codebase(&self) -> &Arc<Codebase> {
120        &self.codebase
121    }
122
123    /// Internal: expose the salsa Mutex for unit tests that need a `&dyn MirDatabase`.
124    #[doc(hidden)]
125    pub fn salsa_db_for_test(&self) -> &std::sync::Mutex<(MirDb, HashMap<Arc<str>, SourceFile>)> {
126        &self.salsa
127    }
128
129    /// Look up the source location of a class member (method, property, or
130    /// class constant / enum case) by walking the inheritance chain through
131    /// the salsa db.  Returns `None` if no member with that name exists, or
132    /// if the member has no recorded location.
133    pub fn member_location(
134        &self,
135        fqcn: &str,
136        member_name: &str,
137    ) -> Option<mir_codebase::storage::Location> {
138        let guard = self.salsa.lock().expect("salsa lock poisoned");
139        crate::db::member_location_via_db(&guard.0, fqcn, member_name)
140    }
141
142    /// Load PHP built-in stubs. Called automatically by `analyze` if not done yet.
143    /// Stubs are filtered against the configured target PHP version (or
144    /// `PhpVersion::LATEST` if none was set).
145    pub fn load_stubs(&self) {
146        if !self
147            .stubs_loaded
148            .swap(true, std::sync::atomic::Ordering::SeqCst)
149        {
150            crate::stubs::load_stubs_for_version(&self.codebase, self.resolved_php_version());
151            crate::stubs::load_user_stubs(&self.codebase, &self.stub_files, &self.stub_dirs);
152            // S5-PR8: mirror the loaded stubs into the Salsa db so
153            // `type_exists_via_db` / `class_kind_via_db` / `class_template_params_via_db`
154            // see them.
155            let mut guard = self.salsa.lock().expect("salsa lock poisoned");
156            guard.0.ingest_codebase(&self.codebase);
157        }
158    }
159
160    /// Run the full analysis pipeline on a set of file paths.
161    pub fn analyze(&self, paths: &[PathBuf]) -> AnalysisResult {
162        let mut all_issues = Vec::new();
163        let mut parse_errors = Vec::new();
164
165        // ---- Load PHP built-in stubs (before Pass 1 so user code can override)
166        self.load_stubs();
167
168        // ---- Pass 1: read files in parallel ----------------------------------
169        let file_data: Vec<(Arc<str>, String)> = paths
170            .par_iter()
171            .filter_map(|path| match std::fs::read_to_string(path) {
172                Ok(src) => Some((Arc::from(path.to_string_lossy().as_ref()), src)),
173                Err(e) => {
174                    eprintln!("Cannot read {}: {}", path.display(), e);
175                    None
176                }
177            })
178            .collect();
179
180        // ---- Pre-Pass-2 invalidation: evict dependents of changed files ------
181        if let Some(cache) = &self.cache {
182            let changed: Vec<String> = file_data
183                .par_iter()
184                .filter_map(|(f, src)| {
185                    let h = hash_content(src);
186                    if cache.get(f, &h).is_none() {
187                        Some(f.to_string())
188                    } else {
189                        None
190                    }
191                })
192                .collect();
193            if !changed.is_empty() {
194                cache.evict_with_dependents(&changed);
195            }
196        }
197
198        // ---- Pass 1: combined pre-index + definition collection (parallel) -----
199        let pass1_results: Vec<(Vec<Issue>, Vec<Issue>)> = file_data
200            .par_iter()
201            .map(|(file, src)| {
202                use php_ast::ast::StmtKind;
203                let arena = bumpalo::Bump::new();
204                let result = php_rs_parser::parse(&arena, src);
205
206                // --- Pre-index: build FQCN index, file imports, and namespaces ---
207                let mut current_namespace: Option<String> = None;
208                let mut imports: std::collections::HashMap<String, String> =
209                    std::collections::HashMap::new();
210                let mut file_ns_set = false;
211
212                let index_stmts =
213                    |stmts: &[php_ast::ast::Stmt<'_, '_>],
214                     ns: Option<&str>,
215                     imports: &mut std::collections::HashMap<String, String>| {
216                        for stmt in stmts.iter() {
217                            match &stmt.kind {
218                                StmtKind::Use(use_decl) => {
219                                    for item in use_decl.uses.iter() {
220                                        let full_name = crate::parser::name_to_string(&item.name)
221                                            .trim_start_matches('\\')
222                                            .to_string();
223                                        let alias = item.alias.unwrap_or_else(|| {
224                                            full_name.rsplit('\\').next().unwrap_or(&full_name)
225                                        });
226                                        imports.insert(alias.to_string(), full_name);
227                                    }
228                                }
229                                StmtKind::Class(decl) => {
230                                    if let Some(n) = decl.name {
231                                        let fqcn = match ns {
232                                            Some(ns) => format!("{ns}\\{n}"),
233                                            None => n.to_string(),
234                                        };
235                                        self.codebase
236                                            .known_symbols
237                                            .insert(Arc::from(fqcn.as_str()));
238                                    }
239                                }
240                                StmtKind::Interface(decl) => {
241                                    let fqcn = match ns {
242                                        Some(ns) => format!("{}\\{}", ns, decl.name),
243                                        None => decl.name.to_string(),
244                                    };
245                                    self.codebase.known_symbols.insert(Arc::from(fqcn.as_str()));
246                                }
247                                StmtKind::Trait(decl) => {
248                                    let fqcn = match ns {
249                                        Some(ns) => format!("{}\\{}", ns, decl.name),
250                                        None => decl.name.to_string(),
251                                    };
252                                    self.codebase.known_symbols.insert(Arc::from(fqcn.as_str()));
253                                }
254                                StmtKind::Enum(decl) => {
255                                    let fqcn = match ns {
256                                        Some(ns) => format!("{}\\{}", ns, decl.name),
257                                        None => decl.name.to_string(),
258                                    };
259                                    self.codebase.known_symbols.insert(Arc::from(fqcn.as_str()));
260                                }
261                                StmtKind::Function(decl) => {
262                                    let fqn = match ns {
263                                        Some(ns) => format!("{}\\{}", ns, decl.name),
264                                        None => decl.name.to_string(),
265                                    };
266                                    self.codebase.known_symbols.insert(Arc::from(fqn.as_str()));
267                                }
268                                _ => {}
269                            }
270                        }
271                    };
272
273                for stmt in result.program.stmts.iter() {
274                    match &stmt.kind {
275                        StmtKind::Namespace(ns) => {
276                            current_namespace =
277                                ns.name.as_ref().map(|n| crate::parser::name_to_string(n));
278                            if !file_ns_set {
279                                if let Some(ref ns_str) = current_namespace {
280                                    self.codebase
281                                        .file_namespaces
282                                        .insert(file.clone(), ns_str.clone());
283                                    file_ns_set = true;
284                                }
285                            }
286                            if let php_ast::ast::NamespaceBody::Braced(inner_stmts) = &ns.body {
287                                index_stmts(
288                                    inner_stmts,
289                                    current_namespace.as_deref(),
290                                    &mut imports,
291                                );
292                            }
293                        }
294                        _ => index_stmts(
295                            std::slice::from_ref(stmt),
296                            current_namespace.as_deref(),
297                            &mut imports,
298                        ),
299                    }
300                }
301
302                if !imports.is_empty() {
303                    self.codebase.file_imports.insert(file.clone(), imports);
304                }
305
306                // --- Parse errors ---
307                let file_parse_errors: Vec<Issue> = result
308                    .errors
309                    .iter()
310                    .map(|err| {
311                        Issue::new(
312                            mir_issues::IssueKind::ParseError {
313                                message: err.to_string(),
314                            },
315                            mir_issues::Location {
316                                file: file.clone(),
317                                line: 1,
318                                line_end: 1,
319                                col_start: 0,
320                                col_end: 0,
321                            },
322                        )
323                    })
324                    .collect();
325
326                // --- Definition collection ---
327                let collector =
328                    DefinitionCollector::new(&self.codebase, file.clone(), src, &result.source_map);
329                let issues = collector.collect(&result.program);
330
331                (file_parse_errors, issues)
332            })
333            .collect();
334
335        let mut files_with_parse_errors: std::collections::HashSet<Arc<str>> =
336            std::collections::HashSet::new();
337        for (file_parse_errors, issues) in pass1_results {
338            for issue in &file_parse_errors {
339                files_with_parse_errors.insert(issue.location.file.clone());
340            }
341            parse_errors.extend(file_parse_errors);
342            all_issues.extend(issues);
343        }
344
345        all_issues.extend(parse_errors);
346
347        // ---- Lazy-load unknown classes via PSR-4 (issue #50) ----------------
348        if let Some(psr4) = &self.psr4 {
349            self.lazy_load_missing_classes(psr4.clone(), &mut all_issues);
350        }
351
352        // ---- Resolve @psalm-import-type declarations now that all Pass 1
353        // classes (including their `type_aliases`) are populated.
354        self.codebase.resolve_pending_import_types();
355
356        // ---- S5-PR9: mirror Pass 1 + lazy-loaded definitions into the Salsa
357        // db.  Today the batch Pass 2 driver still passes `db: None`, so this
358        // is preparatory — the db is populated and ready for the per-helper
359        // fallback removal that follows once `Pass2Driver` is wired with a
360        // shared db reference.
361        {
362            let mut guard = self.salsa.lock().expect("salsa lock poisoned");
363            guard.0.ingest_codebase(&self.codebase);
364        }
365
366        // ---- Build reverse dep graph and persist it for the next run ---------
367        if let Some(cache) = &self.cache {
368            let db_snapshot = {
369                let guard = self.salsa.lock().expect("salsa lock poisoned");
370                guard.0.clone()
371            };
372            let rev = build_reverse_deps(&self.codebase, &db_snapshot);
373            cache.set_reverse_deps(rev);
374        }
375
376        // ---- Class-level checks (M11) ----------------------------------------
377        // `class_db` is scoped tightly: it must be dropped before the priming
378        // sweep's `commit_inferred_return_types` call below, otherwise the
379        // setter's `Storage::cancel_others` blocks waiting for this clone's
380        // Arc to drop (strong-count==1 invariant).
381        let analyzed_file_set: std::collections::HashSet<std::sync::Arc<str>> =
382            file_data.iter().map(|(f, _)| f.clone()).collect();
383        {
384            let class_db = {
385                let guard = self.salsa.lock().expect("salsa lock poisoned");
386                guard.0.clone()
387            };
388            let class_issues = crate::class::ClassAnalyzer::with_files(
389                &self.codebase,
390                &class_db,
391                analyzed_file_set,
392                &file_data,
393            )
394            .analyze_all();
395            all_issues.extend(class_issues);
396        }
397
398        // ---- S5-PR10b: clone the salsa db once per parallel sweep so each
399        // rayon worker gets its own clone (Salsa databases are `Send` but
400        // `!Sync`; cloning shares the underlying memoization storage).
401        let db_priming = {
402            let guard = self.salsa.lock().expect("salsa lock poisoned");
403            guard.0.clone()
404        };
405
406        // ---- Pass 2 priming: populate inferred_return_type for all functions  --
407        // Run a first inference-only sweep so that cross-file inferred return
408        // types are available before the issue-emitting pass below (G6).
409        //
410        // Inferred types are also collected into a thread-safe buffer here and
411        // committed to the Salsa db serially after the sweep returns.  Writing
412        // setters from inside `for_each_with` would deadlock against
413        // `Storage::cancel_others` (which waits for sibling worker clones to
414        // drop); the post-sweep commit runs against the canonical db with
415        // strong-count==1.  See `crate::db::InferredReturnTypes`.
416        let inferred_buffer = crate::db::InferredReturnTypes::new();
417        file_data
418            .par_iter()
419            .filter(|(file, _)| !files_with_parse_errors.contains(file))
420            .for_each_with(db_priming, |db, (file, src)| {
421                let driver = Pass2Driver::new_inference_only(
422                    &self.codebase,
423                    &*db as &dyn MirDatabase,
424                    self.resolved_php_version(),
425                )
426                .with_inferred_buffer(&inferred_buffer);
427                let arena = bumpalo::Bump::new();
428                let parsed = php_rs_parser::parse(&arena, src);
429                driver.analyze_bodies(&parsed.program, file.clone(), src, &parsed.source_map);
430            });
431
432        // Sweep clones are dropped — commit inferred types into the Salsa db.
433        {
434            let mut guard = self.salsa.lock().expect("salsa lock poisoned");
435            guard.0.commit_inferred_return_types(&inferred_buffer);
436        }
437
438        let db_main = {
439            let guard = self.salsa.lock().expect("salsa lock poisoned");
440            guard.0.clone()
441        };
442
443        // ---- Pass 2: analyze function/method bodies in parallel (M14) --------
444        let pass2_results: Vec<(Vec<Issue>, Vec<crate::symbol::ResolvedSymbol>)> = file_data
445            .par_iter()
446            .filter(|(file, _)| !files_with_parse_errors.contains(file))
447            .map_with(db_main, |db, (file, src)| {
448                let driver = Pass2Driver::new(
449                    &self.codebase,
450                    &*db as &dyn MirDatabase,
451                    self.resolved_php_version(),
452                );
453                let result = if let Some(cache) = &self.cache {
454                    let h = hash_content(src);
455                    if let Some((cached_issues, ref_locs)) = cache.get(file, &h) {
456                        self.codebase
457                            .replay_reference_locations(file.clone(), &ref_locs);
458                        (cached_issues, Vec::new())
459                    } else {
460                        let arena = bumpalo::Bump::new();
461                        let parsed = php_rs_parser::parse(&arena, src);
462                        let (issues, symbols) = driver.analyze_bodies(
463                            &parsed.program,
464                            file.clone(),
465                            src,
466                            &parsed.source_map,
467                        );
468                        let ref_locs = extract_reference_locations(&self.codebase, file);
469                        cache.put(file, h, issues.clone(), ref_locs);
470                        (issues, symbols)
471                    }
472                } else {
473                    let arena = bumpalo::Bump::new();
474                    let parsed = php_rs_parser::parse(&arena, src);
475                    driver.analyze_bodies(&parsed.program, file.clone(), src, &parsed.source_map)
476                };
477                if let Some(cb) = &self.on_file_done {
478                    cb();
479                }
480                result
481            })
482            .collect();
483
484        let mut all_symbols = Vec::new();
485        for (issues, symbols) in pass2_results {
486            all_issues.extend(issues);
487            all_symbols.extend(symbols);
488        }
489
490        // ---- Post-Pass-2 lazy loading: FQCNs used without `use` imports ------
491        // FQCNs in function/method bodies aren't visible until Pass 2 runs, so
492        // the pre-Pass-2 lazy load misses them.  We collect UndefinedClass names,
493        // resolve them via PSR-4, load those files, re-finalize, then re-analyze
494        // only the affected files to clear the false positives.
495        if let Some(psr4) = &self.psr4 {
496            self.lazy_load_from_body_issues(
497                psr4.clone(),
498                &file_data,
499                &files_with_parse_errors,
500                &mut all_issues,
501                &mut all_symbols,
502            );
503        }
504
505        // Persist cache hits/misses to disk
506        if let Some(cache) = &self.cache {
507            cache.flush();
508        }
509
510        // ---- Compact the reference index ------------------------------------
511        self.codebase.compact_reference_index();
512
513        // ---- Dead-code detection (M18) --------------------------------------
514        if self.find_dead_code {
515            let salsa = self.salsa.lock().unwrap();
516            let dead_code_issues =
517                crate::dead_code::DeadCodeAnalyzer::new(&self.codebase, &salsa.0).analyze();
518            drop(salsa);
519            all_issues.extend(dead_code_issues);
520        }
521
522        AnalysisResult::build(all_issues, std::collections::HashMap::new(), all_symbols)
523    }
524
525    fn lazy_load_missing_classes(
526        &self,
527        psr4: Arc<crate::composer::Psr4Map>,
528        all_issues: &mut Vec<Issue>,
529    ) {
530        use std::collections::HashSet;
531
532        let max_depth = 10;
533        let mut loaded: HashSet<String> = HashSet::new();
534
535        for _ in 0..max_depth {
536            let mut to_load: Vec<(String, PathBuf)> = Vec::new();
537
538            let mut try_queue = |fqcn: &str| {
539                if !self.codebase.type_exists(fqcn) && !loaded.contains(fqcn) {
540                    if let Some(path) = psr4.resolve(fqcn) {
541                        to_load.push((fqcn.to_string(), path));
542                    }
543                }
544            };
545
546            // Mirror everything collected so far (initial Pass 1 plus any
547            // classes loaded by previous iterations of this loop) into the
548            // salsa db, then drive the inheritance scan from `ClassNode`s.
549            {
550                let mut guard = self.salsa.lock().expect("salsa lock poisoned");
551                guard.0.ingest_codebase(&self.codebase);
552                let db = &guard.0;
553                for fqcn in db.active_class_node_fqcns() {
554                    let Some(node) = db.lookup_class_node(&fqcn) else {
555                        continue;
556                    };
557                    if node.is_interface(db) {
558                        for parent in node.extends(db).iter() {
559                            try_queue(parent.as_ref());
560                        }
561                    } else if node.is_enum(db) {
562                        for iface in node.interfaces(db).iter() {
563                            try_queue(iface.as_ref());
564                        }
565                    } else if node.is_trait(db) {
566                        for used in node.traits(db).iter() {
567                            try_queue(used.as_ref());
568                        }
569                    } else {
570                        if let Some(parent) = node.parent(db) {
571                            try_queue(parent.as_ref());
572                        }
573                        for iface in node.interfaces(db).iter() {
574                            try_queue(iface.as_ref());
575                        }
576                    }
577                }
578            }
579
580            // Also lazy-load any type referenced via `use` imports that isn't yet
581            // in the codebase (covers enums and classes used only in type hints or
582            // static calls, which never appear in the inheritance scan above).
583            for entry in self.codebase.file_imports.iter() {
584                for fqcn in entry.value().values() {
585                    try_queue(fqcn.as_str());
586                }
587            }
588
589            if to_load.is_empty() {
590                break;
591            }
592
593            for (fqcn, path) in to_load {
594                loaded.insert(fqcn);
595                if let Ok(src) = std::fs::read_to_string(&path) {
596                    let file: Arc<str> = Arc::from(path.to_string_lossy().as_ref());
597                    let arena = bumpalo::Bump::new();
598                    let result = php_rs_parser::parse(&arena, &src);
599                    let collector = crate::collector::DefinitionCollector::new(
600                        &self.codebase,
601                        file,
602                        &src,
603                        &result.source_map,
604                    );
605                    let issues = collector.collect(&result.program);
606                    all_issues.extend(issues);
607                }
608            }
609
610            self.codebase.resolve_pending_import_types();
611        }
612    }
613
614    fn lazy_load_from_body_issues(
615        &self,
616        psr4: Arc<crate::composer::Psr4Map>,
617        file_data: &[(Arc<str>, String)],
618        files_with_parse_errors: &HashSet<Arc<str>>,
619        all_issues: &mut Vec<Issue>,
620        all_symbols: &mut Vec<crate::symbol::ResolvedSymbol>,
621    ) {
622        use mir_issues::IssueKind;
623
624        let max_depth = 5;
625        let mut loaded: HashSet<String> = HashSet::new();
626
627        for _ in 0..max_depth {
628            // Deduplicate by FQCN: HashMap prevents loading the same class twice
629            // when multiple files share the same UndefinedClass diagnostic.
630            let mut to_load: HashMap<String, PathBuf> = HashMap::new();
631
632            for issue in all_issues.iter() {
633                if let IssueKind::UndefinedClass { name } = &issue.kind {
634                    if !self.codebase.type_exists(name) && !loaded.contains(name) {
635                        if let Some(path) = psr4.resolve(name) {
636                            to_load.entry(name.clone()).or_insert(path);
637                        }
638                    }
639                }
640            }
641
642            if to_load.is_empty() {
643                break;
644            }
645
646            loaded.extend(to_load.keys().cloned());
647
648            for path in to_load.values() {
649                if let Ok(src) = std::fs::read_to_string(path) {
650                    let file: Arc<str> = Arc::from(path.to_string_lossy().as_ref());
651                    let arena = bumpalo::Bump::new();
652                    let result = php_rs_parser::parse(&arena, &src);
653                    let collector = crate::collector::DefinitionCollector::new(
654                        &self.codebase,
655                        file,
656                        &src,
657                        &result.source_map,
658                    );
659                    let _ = collector.collect(&result.program);
660                }
661            }
662
663            // Load inheritance deps of newly-added types and finalize.
664            // This covers e.g. `class Helper extends \App\Base` where Base is
665            // also not in the initial file set.
666            self.lazy_load_missing_classes(psr4.clone(), all_issues);
667
668            // Re-analyze every file that has an UndefinedClass for a type now
669            // present in the codebase — covers both direct and transitive loads.
670            let files_to_reanalyze: HashSet<Arc<str>> = all_issues
671                .iter()
672                .filter_map(|i| {
673                    if let IssueKind::UndefinedClass { name } = &i.kind {
674                        if self.codebase.type_exists(name) {
675                            return Some(i.location.file.clone());
676                        }
677                    }
678                    None
679                })
680                .collect();
681
682            if files_to_reanalyze.is_empty() {
683                break;
684            }
685
686            all_issues.retain(|i| !files_to_reanalyze.contains(&i.location.file));
687            all_symbols.retain(|s| !files_to_reanalyze.contains(&s.file));
688
689            // S5-PR11a: mirror newly-loaded definitions into the salsa db
690            // before re-analyzing, so the cloned db each rayon worker
691            // receives sees them.
692            let db_reanalysis = {
693                let mut guard = self.salsa.lock().expect("salsa lock poisoned");
694                guard.0.ingest_codebase(&self.codebase);
695                guard.0.clone()
696            };
697
698            // Lazy-loaded files re-run Pass 2 to pick up the just-loaded
699            // definitions; collect inferred return types for a serial commit
700            // after the parallel sweep returns (same buffer-and-commit
701            // pattern as the main batch priming sweep).
702            let inferred_buffer = crate::db::InferredReturnTypes::new();
703            let reanalysis: Vec<(Vec<Issue>, Vec<crate::symbol::ResolvedSymbol>)> = file_data
704                .par_iter()
705                .filter(|(f, _)| {
706                    !files_with_parse_errors.contains(f) && files_to_reanalyze.contains(f)
707                })
708                .map_with(db_reanalysis, |db, (file, src)| {
709                    let driver = Pass2Driver::new(
710                        &self.codebase,
711                        &*db as &dyn MirDatabase,
712                        self.resolved_php_version(),
713                    )
714                    .with_inferred_buffer(&inferred_buffer);
715                    let arena = bumpalo::Bump::new();
716                    let parsed = php_rs_parser::parse(&arena, src);
717                    driver.analyze_bodies(&parsed.program, file.clone(), src, &parsed.source_map)
718                })
719                .collect();
720
721            {
722                let mut guard = self.salsa.lock().expect("salsa lock poisoned");
723                guard.0.commit_inferred_return_types(&inferred_buffer);
724            }
725
726            for (issues, symbols) in reanalysis {
727                all_issues.extend(issues);
728                all_symbols.extend(symbols);
729            }
730        }
731    }
732
733    /// Re-analyze a single file within the existing codebase.
734    ///
735    /// This is the incremental analysis API for LSP:
736    /// 1. Removes old definitions from this file
737    /// 2. Re-runs Pass 1 (definition collection) on the new content
738    /// 3. Resolves any newly-collected `@psalm-import-type` declarations
739    /// 4. Re-runs Pass 2 (body analysis) on this file
740    /// 5. Returns the analysis result for this file only
741    pub fn re_analyze_file(&self, file_path: &str, new_content: &str) -> AnalysisResult {
742        // Fast path: content unchanged and cache has a valid entry — skip full re-analysis.
743        if let Some(cache) = &self.cache {
744            let h = hash_content(new_content);
745            if let Some((issues, ref_locs)) = cache.get(file_path, &h) {
746                let file: Arc<str> = Arc::from(file_path);
747                self.codebase.replay_reference_locations(file, &ref_locs);
748                return AnalysisResult::build(issues, HashMap::new(), Vec::new());
749            }
750        }
751
752        let file: Arc<str> = Arc::from(file_path);
753
754        // Collect FQCNs defined in this file before removal so the
755        // corresponding salsa nodes can be deactivated below.
756        let old_fqcns: Vec<Arc<str>> = self
757            .codebase
758            .symbol_to_file
759            .iter()
760            .filter(|e| e.value().as_ref() == file_path)
761            .map(|e| e.key().clone())
762            .collect();
763
764        // Mark removed classes, functions, methods, properties, and constants inactive.
765        {
766            let mut guard = self.salsa.lock().expect("salsa lock poisoned");
767            let (ref mut db, _) = *guard;
768            for fqcn in &old_fqcns {
769                db.deactivate_class_node(fqcn);
770                db.deactivate_function_node(fqcn);
771                db.deactivate_class_methods(fqcn);
772                db.deactivate_class_properties(fqcn);
773                db.deactivate_class_constants(fqcn);
774            }
775        }
776
777        self.codebase.remove_file_definitions(file_path);
778
779        // --- Salsa-backed Pass 1: memoized parse + definition collection ------
780        let file_defs = {
781            let mut guard = self.salsa.lock().expect("salsa lock poisoned");
782            let (ref mut db, ref mut files) = *guard;
783            let salsa_file = match files.get(&file) {
784                Some(&sf) => {
785                    sf.set_text(db).to(Arc::from(new_content));
786                    sf
787                }
788                None => {
789                    let sf = SourceFile::new(db, file.clone(), Arc::from(new_content));
790                    files.insert(file.clone(), sf);
791                    sf
792                }
793            };
794            collect_file_definitions(db, salsa_file)
795        };
796
797        let mut all_issues: Vec<Issue> = (*file_defs.issues).clone();
798        self.codebase.inject_stub_slice((*file_defs.slice).clone());
799
800        // --- S2 + Pass 2: hold the Salsa lock for ClassNode upserts and body
801        // analysis so the db reference is live during Pass 2 (S5).
802        let symbols = {
803            let mut guard = self.salsa.lock().expect("salsa lock poisoned");
804            let (ref mut db, _) = *guard;
805
806            // --- S2 + S5-PR5a: Upsert ClassNodes for all type kinds.  Traits and
807            // enums are registered with empty ancestor data — `class_ancestors`
808            // returns empty for them, matching `Codebase::ensure_finalized`.
809            for cls in &file_defs.slice.classes {
810                db.upsert_class_node(crate::db::ClassNodeFields {
811                    is_abstract: cls.is_abstract,
812                    parent: cls.parent.clone(),
813                    interfaces: Arc::from(cls.interfaces.as_slice()),
814                    traits: Arc::from(cls.traits.as_slice()),
815                    template_params: Arc::from(cls.template_params.as_slice()),
816                    mixins: Arc::from(cls.mixins.as_slice()),
817                    deprecated: cls.deprecated.clone(),
818                    is_final: cls.is_final,
819                    is_readonly: cls.is_readonly,
820                    location: cls.location.clone(),
821                    extends_type_args: Arc::from(cls.extends_type_args.as_slice()),
822                    implements_type_args: Arc::from(
823                        cls.implements_type_args
824                            .iter()
825                            .map(|(iface, args)| (iface.clone(), Arc::from(args.as_slice())))
826                            .collect::<Vec<_>>(),
827                    ),
828                    ..crate::db::ClassNodeFields::for_class(cls.fqcn.clone())
829                });
830            }
831            for iface in &file_defs.slice.interfaces {
832                db.upsert_class_node(crate::db::ClassNodeFields {
833                    extends: Arc::from(iface.extends.as_slice()),
834                    template_params: Arc::from(iface.template_params.as_slice()),
835                    location: iface.location.clone(),
836                    ..crate::db::ClassNodeFields::for_interface(iface.fqcn.clone())
837                });
838            }
839            for tr in &file_defs.slice.traits {
840                db.upsert_class_node(crate::db::ClassNodeFields {
841                    traits: Arc::from(tr.traits.as_slice()),
842                    template_params: Arc::from(tr.template_params.as_slice()),
843                    require_extends: Arc::from(tr.require_extends.as_slice()),
844                    require_implements: Arc::from(tr.require_implements.as_slice()),
845                    location: tr.location.clone(),
846                    ..crate::db::ClassNodeFields::for_trait(tr.fqcn.clone())
847                });
848            }
849            for en in &file_defs.slice.enums {
850                db.upsert_class_node(crate::db::ClassNodeFields {
851                    interfaces: Arc::from(en.interfaces.as_slice()),
852                    is_backed_enum: en.scalar_type.is_some(),
853                    enum_scalar_type: en.scalar_type.clone(),
854                    location: en.location.clone(),
855                    ..crate::db::ClassNodeFields::for_enum(en.fqcn.clone())
856                });
857            }
858
859            // --- S5-PR2: Upsert FunctionNodes ------------------------------------
860            for func in &file_defs.slice.functions {
861                db.upsert_function_node(func);
862            }
863
864            // --- S5-PR47: Upsert GlobalConstantNodes ------------------------------
865            for (fqn, ty) in &file_defs.slice.constants {
866                db.upsert_global_constant_node(fqn.clone(), ty.clone());
867            }
868
869            // --- S5-PR3: Upsert MethodNodes for all type members ------------------
870            for cls in &file_defs.slice.classes {
871                for method in cls.own_methods.values() {
872                    db.upsert_method_node(method);
873                }
874            }
875            for iface in &file_defs.slice.interfaces {
876                for method in iface.own_methods.values() {
877                    db.upsert_method_node(method);
878                }
879            }
880            for tr in &file_defs.slice.traits {
881                for method in tr.own_methods.values() {
882                    db.upsert_method_node(method);
883                }
884            }
885            for en in &file_defs.slice.enums {
886                for method in en.own_methods.values() {
887                    db.upsert_method_node(method);
888                }
889            }
890
891            // --- S5-PR4: Upsert PropertyNodes and ClassConstantNodes --------------
892            for cls in &file_defs.slice.classes {
893                for prop in cls.own_properties.values() {
894                    db.upsert_property_node(&cls.fqcn, prop);
895                }
896                for constant in cls.own_constants.values() {
897                    db.upsert_class_constant_node(&cls.fqcn, constant);
898                }
899            }
900            for iface in &file_defs.slice.interfaces {
901                for constant in iface.own_constants.values() {
902                    db.upsert_class_constant_node(&iface.fqcn, constant);
903                }
904            }
905            for tr in &file_defs.slice.traits {
906                for prop in tr.own_properties.values() {
907                    db.upsert_property_node(&tr.fqcn, prop);
908                }
909                for constant in tr.own_constants.values() {
910                    db.upsert_class_constant_node(&tr.fqcn, constant);
911                }
912            }
913            for en in &file_defs.slice.enums {
914                for constant in en.own_constants.values() {
915                    db.upsert_class_constant_node(&en.fqcn, constant);
916                }
917            }
918
919            // Resolve any newly-collected @psalm-import-type declarations so
920            // Pass 2 reads the imported aliases out of `type_aliases`.
921            self.codebase.resolve_pending_import_types();
922
923            // Re-parse in the arena so Pass 2 can walk the AST.
924            let arena = bumpalo::Bump::new();
925            let parsed = php_rs_parser::parse(&arena, new_content);
926
927            if parsed.errors.is_empty() {
928                // Priming sweep: populate inferred_return_type for this file's functions
929                // before the issue-emitting pass so within-file cross-function calls see
930                // the correct inferred return type rather than None.  The buffer +
931                // commit pattern is overkill for the single-threaded LSP path but kept
932                // for symmetry with the parallel batch path (and so the analyzer's
933                // Salsa node reads see the inferred values).
934                let inferred_buffer = crate::db::InferredReturnTypes::new();
935                {
936                    let db_ref: &dyn MirDatabase = db;
937                    Pass2Driver::new_inference_only(
938                        &self.codebase,
939                        db_ref,
940                        self.resolved_php_version(),
941                    )
942                    .with_inferred_buffer(&inferred_buffer)
943                    .analyze_bodies(
944                        &parsed.program,
945                        file.clone(),
946                        new_content,
947                        &parsed.source_map,
948                    );
949                }
950                db.commit_inferred_return_types(&inferred_buffer);
951
952                let db_ref: &dyn MirDatabase = db;
953                let driver = Pass2Driver::new(&self.codebase, db_ref, self.resolved_php_version());
954                let (body_issues, symbols) = driver.analyze_bodies(
955                    &parsed.program,
956                    file.clone(),
957                    new_content,
958                    &parsed.source_map,
959                );
960                all_issues.extend(body_issues);
961                symbols
962            } else {
963                Vec::new()
964            }
965        };
966
967        if let Some(cache) = &self.cache {
968            let h = hash_content(new_content);
969            cache.evict_with_dependents(&[file_path.to_string()]);
970            let ref_locs = extract_reference_locations(&self.codebase, &file);
971            cache.put(file_path, h, all_issues.clone(), ref_locs);
972        }
973
974        AnalysisResult::build(all_issues, HashMap::new(), symbols)
975    }
976
977    /// Analyze a PHP source string without a real file path.
978    /// Useful for tests and LSP single-file mode.
979    pub fn analyze_source(source: &str) -> AnalysisResult {
980        use crate::collector::DefinitionCollector;
981        let analyzer = ProjectAnalyzer::new();
982        analyzer.load_stubs();
983        let file: Arc<str> = Arc::from("<source>");
984        let arena = bumpalo::Bump::new();
985        let result = php_rs_parser::parse(&arena, source);
986        let mut all_issues = Vec::new();
987        for err in &result.errors {
988            all_issues.push(Issue::new(
989                mir_issues::IssueKind::ParseError {
990                    message: err.to_string(),
991                },
992                mir_issues::Location {
993                    file: file.clone(),
994                    line: 1,
995                    line_end: 1,
996                    col_start: 0,
997                    col_end: 0,
998                },
999            ));
1000        }
1001        if !result.errors.is_empty() {
1002            return AnalysisResult::build(all_issues, std::collections::HashMap::new(), Vec::new());
1003        }
1004        let collector =
1005            DefinitionCollector::new(&analyzer.codebase, file.clone(), source, &result.source_map);
1006        all_issues.extend(collector.collect(&result.program));
1007        analyzer.codebase.resolve_pending_import_types();
1008        let mut type_envs = std::collections::HashMap::new();
1009        let mut all_symbols = Vec::new();
1010        // Build a db that mirrors the just-collected definitions so the
1011        // analyzers' db reads see them.
1012        let mut db = MirDb::default();
1013        db.ingest_codebase(&analyzer.codebase);
1014
1015        // Priming sweep: populate inferred_return_type on FunctionNode /
1016        // MethodNode before the issue-emitting pass so call sites see the
1017        // inferred values.  Single-threaded — no buffer / commit dance
1018        // needed in principle, but we use the same pattern for symmetry
1019        // and so the read-side fallback to `Codebase` can be dropped.
1020        let inferred_buffer = crate::db::InferredReturnTypes::new();
1021        Pass2Driver::new_inference_only(&analyzer.codebase, &db, analyzer.resolved_php_version())
1022            .with_inferred_buffer(&inferred_buffer)
1023            .analyze_bodies(&result.program, file.clone(), source, &result.source_map);
1024        db.commit_inferred_return_types(&inferred_buffer);
1025
1026        let driver = Pass2Driver::new(&analyzer.codebase, &db, analyzer.resolved_php_version());
1027        all_issues.extend(driver.analyze_bodies_typed(
1028            &result.program,
1029            file.clone(),
1030            source,
1031            &result.source_map,
1032            &mut type_envs,
1033            &mut all_symbols,
1034        ));
1035        AnalysisResult::build(all_issues, type_envs, all_symbols)
1036    }
1037
1038    /// Discover all `.php` files under a directory, recursively.
1039    pub fn discover_files(root: &Path) -> Vec<PathBuf> {
1040        if root.is_file() {
1041            return vec![root.to_path_buf()];
1042        }
1043        let mut files = Vec::new();
1044        collect_php_files(root, &mut files);
1045        files
1046    }
1047
1048    /// Pass 1 only: collect type definitions from `paths` into the codebase without
1049    /// analyzing method bodies or emitting issues. Used to load vendor types.
1050    pub fn collect_types_only(&self, paths: &[PathBuf]) {
1051        paths.par_iter().for_each(|path| {
1052            let Ok(src) = std::fs::read_to_string(path) else {
1053                return;
1054            };
1055            let file: Arc<str> = Arc::from(path.to_string_lossy().as_ref());
1056            let arena = bumpalo::Bump::new();
1057            let result = php_rs_parser::parse(&arena, &src);
1058            let collector =
1059                DefinitionCollector::new(&self.codebase, file, &src, &result.source_map);
1060            let _ = collector.collect(&result.program);
1061        });
1062    }
1063}
1064
1065impl Default for ProjectAnalyzer {
1066    fn default() -> Self {
1067        Self::new()
1068    }
1069}
1070
1071// ---------------------------------------------------------------------------
1072
1073pub(crate) fn collect_php_files(dir: &Path, out: &mut Vec<PathBuf>) {
1074    if let Ok(entries) = std::fs::read_dir(dir) {
1075        for entry in entries.flatten() {
1076            if entry.file_type().map(|ft| ft.is_symlink()).unwrap_or(false) {
1077                continue;
1078            }
1079            let path = entry.path();
1080            if path.is_dir() {
1081                let name = path.file_name().and_then(|n| n.to_str()).unwrap_or("");
1082                if matches!(
1083                    name,
1084                    "vendor" | ".git" | "node_modules" | ".cache" | ".pnpm-store"
1085                ) {
1086                    continue;
1087                }
1088                collect_php_files(&path, out);
1089            } else if path.extension().and_then(|e| e.to_str()) == Some("php") {
1090                out.push(path);
1091            }
1092        }
1093    }
1094}
1095
1096// ---------------------------------------------------------------------------
1097// build_reverse_deps
1098// ---------------------------------------------------------------------------
1099
1100fn build_reverse_deps(
1101    codebase: &Codebase,
1102    db: &dyn crate::db::MirDatabase,
1103) -> HashMap<String, HashSet<String>> {
1104    let mut reverse: HashMap<String, HashSet<String>> = HashMap::new();
1105
1106    let mut add_edge = |symbol: &str, dependent_file: &str| {
1107        if let Some(defining_file) = codebase.symbol_to_file.get(symbol) {
1108            let def = defining_file.as_ref().to_string();
1109            if def != dependent_file {
1110                reverse
1111                    .entry(def)
1112                    .or_default()
1113                    .insert(dependent_file.to_string());
1114            }
1115        }
1116    };
1117
1118    for entry in codebase.file_imports.iter() {
1119        let file = entry.key().as_ref().to_string();
1120        for fqcn in entry.value().values() {
1121            add_edge(fqcn, &file);
1122        }
1123    }
1124
1125    for fqcn in db.active_class_node_fqcns() {
1126        // Match `Codebase::classes` semantics: only true classes contribute
1127        // class-direction edges in this loop.  Interface / trait / enum edges
1128        // are handled by their own dedicated codebase iterators elsewhere if
1129        // needed (none currently — this function only ever read classes).
1130        let kind = match crate::db::class_kind_via_db(db, fqcn.as_ref()) {
1131            Some(k) if !k.is_interface && !k.is_trait && !k.is_enum => k,
1132            _ => continue,
1133        };
1134        let _ = kind;
1135        let Some(file) = codebase
1136            .symbol_to_file
1137            .get(fqcn.as_ref())
1138            .map(|f| f.as_ref().to_string())
1139        else {
1140            continue;
1141        };
1142
1143        let Some(node) = db.lookup_class_node(fqcn.as_ref()) else {
1144            continue;
1145        };
1146        if let Some(parent) = node.parent(db) {
1147            add_edge(parent.as_ref(), &file);
1148        }
1149        for iface in node.interfaces(db).iter() {
1150            add_edge(iface.as_ref(), &file);
1151        }
1152        for tr in node.traits(db).iter() {
1153            add_edge(tr.as_ref(), &file);
1154        }
1155    }
1156
1157    reverse
1158}
1159
1160// ---------------------------------------------------------------------------
1161
1162fn extract_reference_locations(
1163    codebase: &Codebase,
1164    file: &Arc<str>,
1165) -> Vec<(String, u32, u16, u16)> {
1166    codebase
1167        .extract_file_reference_locations(file.as_ref())
1168        .into_iter()
1169        .map(|(sym, line, col_start, col_end)| (sym.to_string(), line, col_start, col_end))
1170        .collect()
1171}
1172
1173// ---------------------------------------------------------------------------
1174// AnalysisResult
1175// ---------------------------------------------------------------------------
1176
1177pub struct AnalysisResult {
1178    pub issues: Vec<Issue>,
1179    pub type_envs: std::collections::HashMap<crate::type_env::ScopeId, crate::type_env::TypeEnv>,
1180    /// Per-expression resolved symbols from Pass 2, sorted by file path.
1181    pub symbols: Vec<crate::symbol::ResolvedSymbol>,
1182    /// Maps each file path to the contiguous range within `symbols` that belongs
1183    /// to it. Built once after analysis; allows `symbol_at` to scan only the
1184    /// relevant file's slice rather than the entire codebase-wide vector.
1185    symbols_by_file: HashMap<Arc<str>, std::ops::Range<usize>>,
1186}
1187
1188impl AnalysisResult {
1189    fn build(
1190        issues: Vec<Issue>,
1191        type_envs: std::collections::HashMap<crate::type_env::ScopeId, crate::type_env::TypeEnv>,
1192        mut symbols: Vec<crate::symbol::ResolvedSymbol>,
1193    ) -> Self {
1194        symbols.sort_unstable_by(|a, b| a.file.as_ref().cmp(b.file.as_ref()));
1195        let mut symbols_by_file: HashMap<Arc<str>, std::ops::Range<usize>> = HashMap::new();
1196        let mut i = 0;
1197        while i < symbols.len() {
1198            let file = Arc::clone(&symbols[i].file);
1199            let start = i;
1200            while i < symbols.len() && symbols[i].file == file {
1201                i += 1;
1202            }
1203            symbols_by_file.insert(file, start..i);
1204        }
1205        Self {
1206            issues,
1207            type_envs,
1208            symbols,
1209            symbols_by_file,
1210        }
1211    }
1212}
1213
1214impl AnalysisResult {
1215    pub fn error_count(&self) -> usize {
1216        self.issues
1217            .iter()
1218            .filter(|i| i.severity == mir_issues::Severity::Error)
1219            .count()
1220    }
1221
1222    pub fn warning_count(&self) -> usize {
1223        self.issues
1224            .iter()
1225            .filter(|i| i.severity == mir_issues::Severity::Warning)
1226            .count()
1227    }
1228
1229    /// Group issues by source file.
1230    pub fn issues_by_file(&self) -> HashMap<std::sync::Arc<str>, Vec<&Issue>> {
1231        let mut map: HashMap<std::sync::Arc<str>, Vec<&Issue>> = HashMap::new();
1232        for issue in &self.issues {
1233            map.entry(issue.location.file.clone())
1234                .or_default()
1235                .push(issue);
1236        }
1237        map
1238    }
1239
1240    /// Return the innermost resolved symbol whose span contains `byte_offset`
1241    /// in `file`, or `None` if no symbol was recorded at that position.
1242    pub fn symbol_at(
1243        &self,
1244        file: &str,
1245        byte_offset: u32,
1246    ) -> Option<&crate::symbol::ResolvedSymbol> {
1247        let range = self.symbols_by_file.get(file)?;
1248        self.symbols[range.clone()]
1249            .iter()
1250            .filter(|s| s.span.start <= byte_offset && byte_offset < s.span.end)
1251            .min_by_key(|s| s.span.end - s.span.start)
1252    }
1253}