Skip to main content

mir_analyzer/
project.rs

1/// Project-level orchestration: file discovery, pass 1, pass 2.
2use std::path::{Path, PathBuf};
3use std::sync::Arc;
4
5use rayon::prelude::*;
6
7use std::collections::{HashMap, HashSet};
8
9use crate::cache::{hash_content, AnalysisCache};
10use mir_codebase::Codebase;
11use mir_issues::Issue;
12use mir_types::Union;
13
14use crate::collector::DefinitionCollector;
15
16// ---------------------------------------------------------------------------
17// ProjectAnalyzer
18// ---------------------------------------------------------------------------
19
20pub struct ProjectAnalyzer {
21    pub codebase: Arc<Codebase>,
22    /// Optional cache — when `Some`, Pass 2 results are read/written per file.
23    pub cache: Option<AnalysisCache>,
24    /// Called once after each file completes Pass 2 (used for progress reporting).
25    pub on_file_done: Option<Arc<dyn Fn() + Send + Sync>>,
26    /// PSR-4 autoloader mapping from composer.json, if available.
27    pub psr4: Option<Arc<crate::composer::Psr4Map>>,
28    /// Whether stubs have already been loaded (to avoid double-loading).
29    stubs_loaded: std::sync::atomic::AtomicBool,
30    /// When true, run dead code detection at the end of analysis.
31    pub find_dead_code: bool,
32}
33
34impl ProjectAnalyzer {
35    pub fn new() -> Self {
36        Self {
37            codebase: Arc::new(Codebase::new()),
38            cache: None,
39            on_file_done: None,
40            psr4: None,
41            stubs_loaded: std::sync::atomic::AtomicBool::new(false),
42            find_dead_code: false,
43        }
44    }
45
46    /// Create a `ProjectAnalyzer` with a disk-backed cache stored under `cache_dir`.
47    pub fn with_cache(cache_dir: &Path) -> Self {
48        Self {
49            codebase: Arc::new(Codebase::new()),
50            cache: Some(AnalysisCache::open(cache_dir)),
51            on_file_done: None,
52            psr4: None,
53            stubs_loaded: std::sync::atomic::AtomicBool::new(false),
54            find_dead_code: false,
55        }
56    }
57
58    /// Create a `ProjectAnalyzer` from a project root containing `composer.json`.
59    /// Returns the analyzer (with `psr4` set) and the `Psr4Map` so callers can
60    /// call `map.project_files()` / `map.vendor_files()`.
61    pub fn from_composer(
62        root: &Path,
63    ) -> Result<(Self, crate::composer::Psr4Map), crate::composer::ComposerError> {
64        let map = crate::composer::Psr4Map::from_composer(root)?;
65        let psr4 = Arc::new(map.clone());
66        let analyzer = Self {
67            codebase: Arc::new(Codebase::new()),
68            cache: None,
69            on_file_done: None,
70            psr4: Some(psr4),
71            stubs_loaded: std::sync::atomic::AtomicBool::new(false),
72            find_dead_code: false,
73        };
74        Ok((analyzer, map))
75    }
76
77    /// Expose codebase for external use (e.g., pre-loading stubs from CLI).
78    pub fn codebase(&self) -> &Arc<Codebase> {
79        &self.codebase
80    }
81
82    /// Load PHP built-in stubs. Called automatically by `analyze` if not done yet.
83    pub fn load_stubs(&self) {
84        if !self
85            .stubs_loaded
86            .swap(true, std::sync::atomic::Ordering::SeqCst)
87        {
88            crate::stubs::load_stubs(&self.codebase);
89        }
90    }
91
92    /// Run the full analysis pipeline on a set of file paths.
93    pub fn analyze(&self, paths: &[PathBuf]) -> AnalysisResult {
94        let mut all_issues = Vec::new();
95        let mut parse_errors = Vec::new();
96
97        // ---- Load PHP built-in stubs (before Pass 1 so user code can override)
98        self.load_stubs();
99
100        // ---- Pre-Pass-2 invalidation: evict dependents of changed files ------
101        // Uses the reverse dep graph persisted from the previous run.
102        if let Some(cache) = &self.cache {
103            let changed: Vec<String> = paths
104                .iter()
105                .filter_map(|p| {
106                    let path_str = p.to_string_lossy().into_owned();
107                    let content = std::fs::read_to_string(p).ok()?;
108                    let h = hash_content(&content);
109                    if cache.get(&path_str, &h).is_none() {
110                        Some(path_str)
111                    } else {
112                        None
113                    }
114                })
115                .collect();
116            if !changed.is_empty() {
117                cache.evict_with_dependents(&changed);
118            }
119        }
120
121        // ---- Pass 1: read files in parallel ----------------------------------
122        let file_data: Vec<(Arc<str>, String)> = paths
123            .par_iter()
124            .filter_map(|path| match std::fs::read_to_string(path) {
125                Ok(src) => Some((Arc::from(path.to_string_lossy().as_ref()), src)),
126                Err(e) => {
127                    eprintln!("Cannot read {}: {}", path.display(), e);
128                    None
129                }
130            })
131            .collect();
132
133        // ---- Pass 1: combined pre-index + definition collection (parallel) -----
134        // Parse each file once; both the FQCN/namespace/import index and the full
135        // definition collection run in the same rayon closure, eliminating the
136        // second sequential parse of every file. DashMap handles concurrent writes.
137        let pass1_results: Vec<(Vec<Issue>, Vec<Issue>)> = file_data
138            .par_iter()
139            .map(|(file, src)| {
140                use php_ast::ast::StmtKind;
141                let arena = bumpalo::Bump::new();
142                let result = php_rs_parser::parse(&arena, src);
143
144                // --- Pre-index: build FQCN index, file imports, and namespaces ---
145                let mut current_namespace: Option<String> = None;
146                let mut imports: std::collections::HashMap<String, String> =
147                    std::collections::HashMap::new();
148                let mut file_ns_set = false;
149
150                // Index a flat list of stmts under a given namespace prefix.
151                let index_stmts =
152                    |stmts: &[php_ast::ast::Stmt<'_, '_>],
153                     ns: Option<&str>,
154                     imports: &mut std::collections::HashMap<String, String>| {
155                        for stmt in stmts.iter() {
156                            match &stmt.kind {
157                                StmtKind::Use(use_decl) => {
158                                    for item in use_decl.uses.iter() {
159                                        let full_name = crate::parser::name_to_string(&item.name);
160                                        let alias = item.alias.unwrap_or_else(|| {
161                                            full_name.rsplit('\\').next().unwrap_or(&full_name)
162                                        });
163                                        imports.insert(alias.to_string(), full_name);
164                                    }
165                                }
166                                StmtKind::Class(decl) => {
167                                    if let Some(n) = decl.name {
168                                        let fqcn = match ns {
169                                            Some(ns) => format!("{}\\{}", ns, n),
170                                            None => n.to_string(),
171                                        };
172                                        self.codebase
173                                            .known_symbols
174                                            .insert(Arc::from(fqcn.as_str()));
175                                    }
176                                }
177                                StmtKind::Interface(decl) => {
178                                    let fqcn = match ns {
179                                        Some(ns) => format!("{}\\{}", ns, decl.name),
180                                        None => decl.name.to_string(),
181                                    };
182                                    self.codebase.known_symbols.insert(Arc::from(fqcn.as_str()));
183                                }
184                                StmtKind::Trait(decl) => {
185                                    let fqcn = match ns {
186                                        Some(ns) => format!("{}\\{}", ns, decl.name),
187                                        None => decl.name.to_string(),
188                                    };
189                                    self.codebase.known_symbols.insert(Arc::from(fqcn.as_str()));
190                                }
191                                StmtKind::Enum(decl) => {
192                                    let fqcn = match ns {
193                                        Some(ns) => format!("{}\\{}", ns, decl.name),
194                                        None => decl.name.to_string(),
195                                    };
196                                    self.codebase.known_symbols.insert(Arc::from(fqcn.as_str()));
197                                }
198                                StmtKind::Function(decl) => {
199                                    let fqn = match ns {
200                                        Some(ns) => format!("{}\\{}", ns, decl.name),
201                                        None => decl.name.to_string(),
202                                    };
203                                    self.codebase.known_symbols.insert(Arc::from(fqn.as_str()));
204                                }
205                                _ => {}
206                            }
207                        }
208                    };
209
210                for stmt in result.program.stmts.iter() {
211                    match &stmt.kind {
212                        StmtKind::Namespace(ns) => {
213                            current_namespace =
214                                ns.name.as_ref().map(|n| crate::parser::name_to_string(n));
215                            if !file_ns_set {
216                                if let Some(ref ns_str) = current_namespace {
217                                    self.codebase
218                                        .file_namespaces
219                                        .insert(file.clone(), ns_str.clone());
220                                    file_ns_set = true;
221                                }
222                            }
223                            // Bracketed namespace: walk inner stmts for Use/Class/etc.
224                            if let php_ast::ast::NamespaceBody::Braced(inner_stmts) = &ns.body {
225                                index_stmts(
226                                    inner_stmts,
227                                    current_namespace.as_deref(),
228                                    &mut imports,
229                                );
230                            }
231                        }
232                        _ => index_stmts(
233                            std::slice::from_ref(stmt),
234                            current_namespace.as_deref(),
235                            &mut imports,
236                        ),
237                    }
238                }
239
240                if !imports.is_empty() {
241                    self.codebase.file_imports.insert(file.clone(), imports);
242                }
243
244                // --- Parse errors ---
245                let file_parse_errors: Vec<Issue> = result
246                    .errors
247                    .iter()
248                    .map(|err| {
249                        Issue::new(
250                            mir_issues::IssueKind::ParseError {
251                                message: err.to_string(),
252                            },
253                            mir_issues::Location {
254                                file: file.clone(),
255                                line: 1,
256                                col_start: 0,
257                                col_end: 0,
258                            },
259                        )
260                    })
261                    .collect();
262
263                // --- Definition collection ---
264                let collector =
265                    DefinitionCollector::new(&self.codebase, file.clone(), src, &result.source_map);
266                let issues = collector.collect(&result.program);
267
268                (file_parse_errors, issues)
269            })
270            .collect();
271
272        for (file_parse_errors, issues) in pass1_results {
273            parse_errors.extend(file_parse_errors);
274            all_issues.extend(issues);
275        }
276
277        all_issues.extend(parse_errors);
278
279        // ---- Finalize codebase (resolve inheritance, build dispatch tables) --
280        self.codebase.finalize();
281
282        // ---- Lazy-load unknown classes via PSR-4 (issue #50) ----------------
283        if let Some(psr4) = &self.psr4 {
284            self.lazy_load_missing_classes(psr4.clone(), &mut all_issues);
285        }
286
287        // ---- Build reverse dep graph and persist it for the next run ---------
288        if let Some(cache) = &self.cache {
289            let rev = build_reverse_deps(&self.codebase);
290            cache.set_reverse_deps(rev);
291        }
292
293        // ---- Class-level checks (M11) ----------------------------------------
294        let analyzed_file_set: std::collections::HashSet<std::sync::Arc<str>> =
295            file_data.iter().map(|(f, _)| f.clone()).collect();
296        let class_issues =
297            crate::class::ClassAnalyzer::with_files(&self.codebase, analyzed_file_set, &file_data)
298                .analyze_all();
299        all_issues.extend(class_issues);
300
301        // ---- Pass 2: analyze function/method bodies in parallel (M14) --------
302        // Each file is analyzed independently; arena + parse happen inside the
303        // rayon closure so there is no cross-thread borrow.
304        // When a cache is present, files whose content hash matches a stored
305        // entry skip re-analysis entirely (M17).
306        let pass2_results: Vec<(Vec<Issue>, Vec<crate::symbol::ResolvedSymbol>)> = file_data
307            .par_iter()
308            .map(|(file, src)| {
309                // Cache lookup
310                let result = if let Some(cache) = &self.cache {
311                    let h = hash_content(src);
312                    if let Some((cached_issues, ref_locs)) = cache.get(file, &h) {
313                        // Hit — replay reference locations so symbol_reference_locations
314                        // is populated without re-running analyze_bodies.
315                        self.codebase
316                            .replay_reference_locations(file.clone(), &ref_locs);
317                        (cached_issues, Vec::new())
318                    } else {
319                        // Miss — analyze and store
320                        let arena = bumpalo::Bump::new();
321                        let parsed = php_rs_parser::parse(&arena, src);
322                        let (issues, symbols) = self.analyze_bodies(
323                            &parsed.program,
324                            file.clone(),
325                            src,
326                            &parsed.source_map,
327                        );
328                        let ref_locs = extract_reference_locations(&self.codebase, file);
329                        cache.put(file, h, issues.clone(), ref_locs);
330                        (issues, symbols)
331                    }
332                } else {
333                    let arena = bumpalo::Bump::new();
334                    let parsed = php_rs_parser::parse(&arena, src);
335                    self.analyze_bodies(&parsed.program, file.clone(), src, &parsed.source_map)
336                };
337                if let Some(cb) = &self.on_file_done {
338                    cb();
339                }
340                result
341            })
342            .collect();
343
344        let mut all_symbols = Vec::new();
345        for (issues, symbols) in pass2_results {
346            all_issues.extend(issues);
347            all_symbols.extend(symbols);
348        }
349
350        // Persist cache hits/misses to disk
351        if let Some(cache) = &self.cache {
352            cache.flush();
353        }
354
355        // ---- Compact the reference index ------------------------------------
356        // Convert build-phase DashMaps into a CSR structure, freeing the
357        // per-entry HashMap/HashSet overhead accumulated during Pass 2.
358        self.codebase.compact_reference_index();
359
360        // ---- Dead-code detection (M18) --------------------------------------
361        if self.find_dead_code {
362            let dead_code_issues =
363                crate::dead_code::DeadCodeAnalyzer::new(&self.codebase).analyze();
364            all_issues.extend(dead_code_issues);
365        }
366
367        AnalysisResult {
368            issues: all_issues,
369            type_envs: std::collections::HashMap::new(),
370            symbols: all_symbols,
371        }
372    }
373
374    /// Lazily load class definitions for referenced-but-unknown FQCNs via PSR-4.
375    ///
376    /// After Pass 1 and `codebase.finalize()`, some classes referenced as parents
377    /// or interfaces may not be in the codebase (they weren't in the initial file
378    /// list). This method iterates up to `max_depth` times, each time resolving
379    /// unknown parent/interface FQCNs via the PSR-4 map, running Pass 1 on those
380    /// files, and re-finalizing the codebase. The loop stops when no new files
381    /// are discovered.
382    fn lazy_load_missing_classes(
383        &self,
384        psr4: Arc<crate::composer::Psr4Map>,
385        all_issues: &mut Vec<Issue>,
386    ) {
387        use std::collections::HashSet;
388
389        let max_depth = 10; // prevent infinite chains
390        let mut loaded: HashSet<String> = HashSet::new();
391
392        for _ in 0..max_depth {
393            // Collect all referenced FQCNs that aren't in the codebase
394            let mut to_load: Vec<(String, PathBuf)> = Vec::new();
395
396            for entry in self.codebase.classes.iter() {
397                let cls = entry.value();
398
399                // Check parent class
400                if let Some(parent) = &cls.parent {
401                    let fqcn = parent.as_ref();
402                    if !self.codebase.classes.contains_key(fqcn) && !loaded.contains(fqcn) {
403                        if let Some(path) = psr4.resolve(fqcn) {
404                            to_load.push((fqcn.to_string(), path));
405                        }
406                    }
407                }
408
409                // Check interfaces
410                for iface in &cls.interfaces {
411                    let fqcn = iface.as_ref();
412                    if !self.codebase.classes.contains_key(fqcn)
413                        && !self.codebase.interfaces.contains_key(fqcn)
414                        && !loaded.contains(fqcn)
415                    {
416                        if let Some(path) = psr4.resolve(fqcn) {
417                            to_load.push((fqcn.to_string(), path));
418                        }
419                    }
420                }
421            }
422
423            if to_load.is_empty() {
424                break;
425            }
426
427            // Load each discovered file (Pass 1 only)
428            for (fqcn, path) in to_load {
429                loaded.insert(fqcn);
430                if let Ok(src) = std::fs::read_to_string(&path) {
431                    let file: Arc<str> = Arc::from(path.to_string_lossy().as_ref());
432                    let arena = bumpalo::Bump::new();
433                    let result = php_rs_parser::parse(&arena, &src);
434                    let collector = crate::collector::DefinitionCollector::new(
435                        &self.codebase,
436                        file,
437                        &src,
438                        &result.source_map,
439                    );
440                    let issues = collector.collect(&result.program);
441                    all_issues.extend(issues);
442                }
443            }
444
445            // Re-finalize to include newly loaded classes in the inheritance graph.
446            // Must reset the flag first so finalize() isn't a no-op.
447            self.codebase.invalidate_finalization();
448            self.codebase.finalize();
449        }
450    }
451
452    /// Re-analyze a single file within the existing codebase.
453    ///
454    /// This is the incremental analysis API for LSP:
455    /// 1. Removes old definitions from this file
456    /// 2. Re-runs Pass 1 (definition collection) on the new content
457    /// 3. Re-finalizes the codebase (rebuilds inheritance)
458    /// 4. Re-runs Pass 2 (body analysis) on this file
459    /// 5. Returns the analysis result for this file only
460    pub fn re_analyze_file(&self, file_path: &str, new_content: &str) -> AnalysisResult {
461        // 1. Remove old definitions from this file
462        self.codebase.remove_file_definitions(file_path);
463
464        // 2. Parse new content and run Pass 1
465        let file: Arc<str> = Arc::from(file_path);
466        let arena = bumpalo::Bump::new();
467        let parsed = php_rs_parser::parse(&arena, new_content);
468
469        let mut all_issues = Vec::new();
470
471        // Collect parse errors
472        for err in &parsed.errors {
473            all_issues.push(Issue::new(
474                mir_issues::IssueKind::ParseError {
475                    message: err.to_string(),
476                },
477                mir_issues::Location {
478                    file: file.clone(),
479                    line: 1,
480                    col_start: 0,
481                    col_end: 0,
482                },
483            ));
484        }
485
486        let collector = DefinitionCollector::new(
487            &self.codebase,
488            file.clone(),
489            new_content,
490            &parsed.source_map,
491        );
492        all_issues.extend(collector.collect(&parsed.program));
493
494        // 3. Re-finalize (invalidation already done by remove_file_definitions)
495        self.codebase.finalize();
496
497        // 4. Run Pass 2 on this file
498        let (body_issues, symbols) = self.analyze_bodies(
499            &parsed.program,
500            file.clone(),
501            new_content,
502            &parsed.source_map,
503        );
504        all_issues.extend(body_issues);
505
506        // 5. Update cache if present
507        if let Some(cache) = &self.cache {
508            let h = hash_content(new_content);
509            cache.evict_with_dependents(&[file_path.to_string()]);
510            let ref_locs = extract_reference_locations(&self.codebase, &file);
511            cache.put(file_path, h, all_issues.clone(), ref_locs);
512        }
513
514        AnalysisResult {
515            issues: all_issues,
516            type_envs: HashMap::new(),
517            symbols,
518        }
519    }
520
521    /// Analyze a PHP source string without a real file path.
522    /// Useful for tests and LSP single-file mode.
523    pub fn analyze_source(source: &str) -> AnalysisResult {
524        use crate::collector::DefinitionCollector;
525        let analyzer = ProjectAnalyzer::new();
526        analyzer.load_stubs();
527        let file: Arc<str> = Arc::from("<source>");
528        let arena = bumpalo::Bump::new();
529        let result = php_rs_parser::parse(&arena, source);
530        let mut all_issues = Vec::new();
531        let collector =
532            DefinitionCollector::new(&analyzer.codebase, file.clone(), source, &result.source_map);
533        all_issues.extend(collector.collect(&result.program));
534        analyzer.codebase.finalize();
535        let mut type_envs = std::collections::HashMap::new();
536        let mut all_symbols = Vec::new();
537        all_issues.extend(analyzer.analyze_bodies_typed(
538            &result.program,
539            file.clone(),
540            source,
541            &result.source_map,
542            &mut type_envs,
543            &mut all_symbols,
544        ));
545        AnalysisResult {
546            issues: all_issues,
547            type_envs,
548            symbols: all_symbols,
549        }
550    }
551
552    /// Pass 2: walk all function/method bodies in one file, return issues, and
553    /// write inferred return types back to the codebase.
554    fn analyze_bodies<'arena, 'src>(
555        &self,
556        program: &php_ast::ast::Program<'arena, 'src>,
557        file: Arc<str>,
558        source: &str,
559        source_map: &php_rs_parser::source_map::SourceMap,
560    ) -> (Vec<mir_issues::Issue>, Vec<crate::symbol::ResolvedSymbol>) {
561        use php_ast::ast::StmtKind;
562
563        let mut all_issues = Vec::new();
564        let mut all_symbols = Vec::new();
565
566        for stmt in program.stmts.iter() {
567            match &stmt.kind {
568                StmtKind::Function(decl) => {
569                    self.analyze_fn_decl(
570                        decl,
571                        &file,
572                        source,
573                        source_map,
574                        &mut all_issues,
575                        &mut all_symbols,
576                    );
577                }
578                StmtKind::Class(decl) => {
579                    self.analyze_class_decl(
580                        decl,
581                        &file,
582                        source,
583                        source_map,
584                        &mut all_issues,
585                        &mut all_symbols,
586                    );
587                }
588                StmtKind::Enum(decl) => {
589                    self.analyze_enum_decl(decl, &file, source, source_map, &mut all_issues);
590                }
591                StmtKind::Namespace(ns) => {
592                    if let php_ast::ast::NamespaceBody::Braced(stmts) = &ns.body {
593                        for inner in stmts.iter() {
594                            match &inner.kind {
595                                StmtKind::Function(decl) => {
596                                    self.analyze_fn_decl(
597                                        decl,
598                                        &file,
599                                        source,
600                                        source_map,
601                                        &mut all_issues,
602                                        &mut all_symbols,
603                                    );
604                                }
605                                StmtKind::Class(decl) => {
606                                    self.analyze_class_decl(
607                                        decl,
608                                        &file,
609                                        source,
610                                        source_map,
611                                        &mut all_issues,
612                                        &mut all_symbols,
613                                    );
614                                }
615                                StmtKind::Enum(decl) => {
616                                    self.analyze_enum_decl(
617                                        decl,
618                                        &file,
619                                        source,
620                                        source_map,
621                                        &mut all_issues,
622                                    );
623                                }
624                                _ => {}
625                            }
626                        }
627                    }
628                }
629                _ => {}
630            }
631        }
632
633        (all_issues, all_symbols)
634    }
635
636    /// Analyze a single function declaration body and collect issues + inferred return type.
637    #[allow(clippy::too_many_arguments)]
638    fn analyze_fn_decl<'arena, 'src>(
639        &self,
640        decl: &php_ast::ast::FunctionDecl<'arena, 'src>,
641        file: &Arc<str>,
642        source: &str,
643        source_map: &php_rs_parser::source_map::SourceMap,
644        all_issues: &mut Vec<mir_issues::Issue>,
645        all_symbols: &mut Vec<crate::symbol::ResolvedSymbol>,
646    ) {
647        let fn_name = decl.name;
648        let body = &decl.body;
649        // Check parameter and return type hints for undefined classes.
650        for param in decl.params.iter() {
651            if let Some(hint) = &param.type_hint {
652                check_type_hint_classes(hint, &self.codebase, file, source, source_map, all_issues);
653            }
654        }
655        if let Some(hint) = &decl.return_type {
656            check_type_hint_classes(hint, &self.codebase, file, source, source_map, all_issues);
657        }
658        use crate::context::Context;
659        use crate::stmt::StatementsAnalyzer;
660        use mir_issues::IssueBuffer;
661
662        // Resolve function name using the file's namespace (handles namespaced functions)
663        let resolved_fn = self.codebase.resolve_class_name(file.as_ref(), fn_name);
664        let func_opt: Option<mir_codebase::storage::FunctionStorage> = self
665            .codebase
666            .functions
667            .get(resolved_fn.as_str())
668            .map(|r| r.clone())
669            .or_else(|| self.codebase.functions.get(fn_name).map(|r| r.clone()))
670            .or_else(|| {
671                self.codebase
672                    .functions
673                    .iter()
674                    .find(|e| e.short_name.as_ref() == fn_name)
675                    .map(|e| e.value().clone())
676            });
677
678        let fqn = func_opt.as_ref().map(|f| f.fqn.clone());
679        // Always use the codebase entry when its params match the AST (same count + names).
680        // This covers the common case and preserves docblock-enriched types.
681        // When names differ (two files define the same unnamespaced function), fall back to
682        // the AST params so param variables are always in scope for this file's body.
683        let (params, return_ty): (Vec<mir_codebase::FnParam>, _) = match &func_opt {
684            Some(f)
685                if f.params.len() == decl.params.len()
686                    && f.params
687                        .iter()
688                        .zip(decl.params.iter())
689                        .all(|(cp, ap)| cp.name.as_ref() == ap.name) =>
690            {
691                (f.params.clone(), f.return_type.clone())
692            }
693            _ => {
694                let ast_params = decl
695                    .params
696                    .iter()
697                    .map(|p| mir_codebase::FnParam {
698                        name: Arc::from(p.name),
699                        ty: None,
700                        default: p.default.as_ref().map(|_| mir_types::Union::mixed()),
701                        is_variadic: p.variadic,
702                        is_byref: p.by_ref,
703                        is_optional: p.default.is_some() || p.variadic,
704                    })
705                    .collect();
706                (ast_params, None)
707            }
708        };
709
710        let mut ctx = Context::for_function(&params, return_ty, None, None, None, false, true);
711        let mut buf = IssueBuffer::new();
712        let mut sa = StatementsAnalyzer::new(
713            &self.codebase,
714            file.clone(),
715            source,
716            source_map,
717            &mut buf,
718            all_symbols,
719        );
720        sa.analyze_stmts(body, &mut ctx);
721        let inferred = merge_return_types(&sa.return_types);
722        drop(sa);
723
724        emit_unused_params(&params, &ctx, "", file, all_issues);
725        emit_unused_variables(&ctx, file, all_issues);
726        all_issues.extend(buf.into_issues());
727
728        if let Some(fqn) = fqn {
729            if let Some(mut func) = self.codebase.functions.get_mut(fqn.as_ref()) {
730                func.inferred_return_type = Some(inferred);
731            }
732        }
733    }
734
735    /// Analyze all method bodies on a class declaration and collect issues + inferred return types.
736    #[allow(clippy::too_many_arguments)]
737    fn analyze_class_decl<'arena, 'src>(
738        &self,
739        decl: &php_ast::ast::ClassDecl<'arena, 'src>,
740        file: &Arc<str>,
741        source: &str,
742        source_map: &php_rs_parser::source_map::SourceMap,
743        all_issues: &mut Vec<mir_issues::Issue>,
744        all_symbols: &mut Vec<crate::symbol::ResolvedSymbol>,
745    ) {
746        use crate::context::Context;
747        use crate::stmt::StatementsAnalyzer;
748        use mir_issues::IssueBuffer;
749
750        let class_name = decl.name.unwrap_or("<anonymous>");
751        // Resolve the FQCN using the file's namespace/imports — avoids ambiguity
752        // when multiple classes share the same short name across namespaces.
753        let resolved = self.codebase.resolve_class_name(file.as_ref(), class_name);
754        let fqcn: &str = &resolved;
755        let parent_fqcn = self
756            .codebase
757            .classes
758            .get(fqcn)
759            .and_then(|c| c.parent.clone());
760
761        for member in decl.members.iter() {
762            let php_ast::ast::ClassMemberKind::Method(method) = &member.kind else {
763                continue;
764            };
765
766            // Check parameter and return type hints for undefined classes (even abstract methods).
767            for param in method.params.iter() {
768                if let Some(hint) = &param.type_hint {
769                    check_type_hint_classes(
770                        hint,
771                        &self.codebase,
772                        file,
773                        source,
774                        source_map,
775                        all_issues,
776                    );
777                }
778            }
779            if let Some(hint) = &method.return_type {
780                check_type_hint_classes(hint, &self.codebase, file, source, source_map, all_issues);
781            }
782
783            let Some(body) = &method.body else { continue };
784
785            let method_storage = self.codebase.get_method(fqcn, method.name);
786            let (params, return_ty) = method_storage
787                .as_ref()
788                .map(|m| (m.params.clone(), m.return_type.clone()))
789                .unwrap_or_default();
790
791            let is_ctor = method.name == "__construct";
792            let mut ctx = Context::for_method(
793                &params,
794                return_ty,
795                Some(Arc::from(fqcn)),
796                parent_fqcn.clone(),
797                Some(Arc::from(fqcn)),
798                false,
799                is_ctor,
800                method.is_static,
801            );
802
803            let mut buf = IssueBuffer::new();
804            let mut sa = StatementsAnalyzer::new(
805                &self.codebase,
806                file.clone(),
807                source,
808                source_map,
809                &mut buf,
810                all_symbols,
811            );
812            sa.analyze_stmts(body, &mut ctx);
813            let inferred = merge_return_types(&sa.return_types);
814            drop(sa);
815
816            emit_unused_params(&params, &ctx, method.name, file, all_issues);
817            emit_unused_variables(&ctx, file, all_issues);
818            all_issues.extend(buf.into_issues());
819
820            if let Some(mut cls) = self.codebase.classes.get_mut(fqcn) {
821                if let Some(m) = cls.own_methods.get_mut(method.name) {
822                    m.inferred_return_type = Some(inferred);
823                }
824            }
825        }
826    }
827
828    /// Like `analyze_bodies` but also populates `type_envs` with per-scope type environments.
829    #[allow(clippy::too_many_arguments)]
830    fn analyze_bodies_typed<'arena, 'src>(
831        &self,
832        program: &php_ast::ast::Program<'arena, 'src>,
833        file: Arc<str>,
834        source: &str,
835        source_map: &php_rs_parser::source_map::SourceMap,
836        type_envs: &mut std::collections::HashMap<
837            crate::type_env::ScopeId,
838            crate::type_env::TypeEnv,
839        >,
840        all_symbols: &mut Vec<crate::symbol::ResolvedSymbol>,
841    ) -> Vec<mir_issues::Issue> {
842        use php_ast::ast::StmtKind;
843        let mut all_issues = Vec::new();
844        for stmt in program.stmts.iter() {
845            match &stmt.kind {
846                StmtKind::Function(decl) => {
847                    self.analyze_fn_decl_typed(
848                        decl,
849                        &file,
850                        source,
851                        source_map,
852                        &mut all_issues,
853                        type_envs,
854                        all_symbols,
855                    );
856                }
857                StmtKind::Class(decl) => {
858                    self.analyze_class_decl_typed(
859                        decl,
860                        &file,
861                        source,
862                        source_map,
863                        &mut all_issues,
864                        type_envs,
865                        all_symbols,
866                    );
867                }
868                StmtKind::Enum(decl) => {
869                    self.analyze_enum_decl(decl, &file, source, source_map, &mut all_issues);
870                }
871                StmtKind::Namespace(ns) => {
872                    if let php_ast::ast::NamespaceBody::Braced(stmts) = &ns.body {
873                        for inner in stmts.iter() {
874                            match &inner.kind {
875                                StmtKind::Function(decl) => {
876                                    self.analyze_fn_decl_typed(
877                                        decl,
878                                        &file,
879                                        source,
880                                        source_map,
881                                        &mut all_issues,
882                                        type_envs,
883                                        all_symbols,
884                                    );
885                                }
886                                StmtKind::Class(decl) => {
887                                    self.analyze_class_decl_typed(
888                                        decl,
889                                        &file,
890                                        source,
891                                        source_map,
892                                        &mut all_issues,
893                                        type_envs,
894                                        all_symbols,
895                                    );
896                                }
897                                StmtKind::Enum(decl) => {
898                                    self.analyze_enum_decl(
899                                        decl,
900                                        &file,
901                                        source,
902                                        source_map,
903                                        &mut all_issues,
904                                    );
905                                }
906                                _ => {}
907                            }
908                        }
909                    }
910                }
911                _ => {}
912            }
913        }
914        all_issues
915    }
916
917    /// Like `analyze_fn_decl` but also captures a `TypeEnv` for the function scope.
918    #[allow(clippy::too_many_arguments)]
919    fn analyze_fn_decl_typed<'arena, 'src>(
920        &self,
921        decl: &php_ast::ast::FunctionDecl<'arena, 'src>,
922        file: &Arc<str>,
923        source: &str,
924        source_map: &php_rs_parser::source_map::SourceMap,
925        all_issues: &mut Vec<mir_issues::Issue>,
926        type_envs: &mut std::collections::HashMap<
927            crate::type_env::ScopeId,
928            crate::type_env::TypeEnv,
929        >,
930        all_symbols: &mut Vec<crate::symbol::ResolvedSymbol>,
931    ) {
932        use crate::context::Context;
933        use crate::stmt::StatementsAnalyzer;
934        use mir_issues::IssueBuffer;
935
936        let fn_name = decl.name;
937        let body = &decl.body;
938
939        for param in decl.params.iter() {
940            if let Some(hint) = &param.type_hint {
941                check_type_hint_classes(hint, &self.codebase, file, source, source_map, all_issues);
942            }
943        }
944        if let Some(hint) = &decl.return_type {
945            check_type_hint_classes(hint, &self.codebase, file, source, source_map, all_issues);
946        }
947
948        let resolved_fn = self.codebase.resolve_class_name(file.as_ref(), fn_name);
949        let func_opt: Option<mir_codebase::storage::FunctionStorage> = self
950            .codebase
951            .functions
952            .get(resolved_fn.as_str())
953            .map(|r| r.clone())
954            .or_else(|| self.codebase.functions.get(fn_name).map(|r| r.clone()))
955            .or_else(|| {
956                self.codebase
957                    .functions
958                    .iter()
959                    .find(|e| e.short_name.as_ref() == fn_name)
960                    .map(|e| e.value().clone())
961            });
962
963        let fqn = func_opt.as_ref().map(|f| f.fqn.clone());
964        let (params, return_ty): (Vec<mir_codebase::FnParam>, _) = match &func_opt {
965            Some(f)
966                if f.params.len() == decl.params.len()
967                    && f.params
968                        .iter()
969                        .zip(decl.params.iter())
970                        .all(|(cp, ap)| cp.name.as_ref() == ap.name) =>
971            {
972                (f.params.clone(), f.return_type.clone())
973            }
974            _ => {
975                let ast_params = decl
976                    .params
977                    .iter()
978                    .map(|p| mir_codebase::FnParam {
979                        name: Arc::from(p.name),
980                        ty: None,
981                        default: p.default.as_ref().map(|_| mir_types::Union::mixed()),
982                        is_variadic: p.variadic,
983                        is_byref: p.by_ref,
984                        is_optional: p.default.is_some() || p.variadic,
985                    })
986                    .collect();
987                (ast_params, None)
988            }
989        };
990
991        let mut ctx = Context::for_function(&params, return_ty, None, None, None, false, true);
992        let mut buf = IssueBuffer::new();
993        let mut sa = StatementsAnalyzer::new(
994            &self.codebase,
995            file.clone(),
996            source,
997            source_map,
998            &mut buf,
999            all_symbols,
1000        );
1001        sa.analyze_stmts(body, &mut ctx);
1002        let inferred = merge_return_types(&sa.return_types);
1003        drop(sa);
1004
1005        // Capture TypeEnv for this scope
1006        let scope_name = fqn.clone().unwrap_or_else(|| Arc::from(fn_name));
1007        type_envs.insert(
1008            crate::type_env::ScopeId::Function {
1009                file: file.clone(),
1010                name: scope_name,
1011            },
1012            crate::type_env::TypeEnv::new(ctx.vars.clone()),
1013        );
1014
1015        emit_unused_params(&params, &ctx, "", file, all_issues);
1016        emit_unused_variables(&ctx, file, all_issues);
1017        all_issues.extend(buf.into_issues());
1018
1019        if let Some(fqn) = fqn {
1020            if let Some(mut func) = self.codebase.functions.get_mut(fqn.as_ref()) {
1021                func.inferred_return_type = Some(inferred);
1022            }
1023        }
1024    }
1025
1026    /// Like `analyze_class_decl` but also captures a `TypeEnv` per method scope.
1027    #[allow(clippy::too_many_arguments)]
1028    fn analyze_class_decl_typed<'arena, 'src>(
1029        &self,
1030        decl: &php_ast::ast::ClassDecl<'arena, 'src>,
1031        file: &Arc<str>,
1032        source: &str,
1033        source_map: &php_rs_parser::source_map::SourceMap,
1034        all_issues: &mut Vec<mir_issues::Issue>,
1035        type_envs: &mut std::collections::HashMap<
1036            crate::type_env::ScopeId,
1037            crate::type_env::TypeEnv,
1038        >,
1039        all_symbols: &mut Vec<crate::symbol::ResolvedSymbol>,
1040    ) {
1041        use crate::context::Context;
1042        use crate::stmt::StatementsAnalyzer;
1043        use mir_issues::IssueBuffer;
1044
1045        let class_name = decl.name.unwrap_or("<anonymous>");
1046        let resolved = self.codebase.resolve_class_name(file.as_ref(), class_name);
1047        let fqcn: &str = &resolved;
1048        let parent_fqcn = self
1049            .codebase
1050            .classes
1051            .get(fqcn)
1052            .and_then(|c| c.parent.clone());
1053
1054        for member in decl.members.iter() {
1055            let php_ast::ast::ClassMemberKind::Method(method) = &member.kind else {
1056                continue;
1057            };
1058
1059            for param in method.params.iter() {
1060                if let Some(hint) = &param.type_hint {
1061                    check_type_hint_classes(
1062                        hint,
1063                        &self.codebase,
1064                        file,
1065                        source,
1066                        source_map,
1067                        all_issues,
1068                    );
1069                }
1070            }
1071            if let Some(hint) = &method.return_type {
1072                check_type_hint_classes(hint, &self.codebase, file, source, source_map, all_issues);
1073            }
1074
1075            let Some(body) = &method.body else { continue };
1076
1077            let method_storage = self.codebase.get_method(fqcn, method.name);
1078            let (params, return_ty) = method_storage
1079                .as_ref()
1080                .map(|m| (m.params.clone(), m.return_type.clone()))
1081                .unwrap_or_default();
1082
1083            let is_ctor = method.name == "__construct";
1084            let mut ctx = Context::for_method(
1085                &params,
1086                return_ty,
1087                Some(Arc::from(fqcn)),
1088                parent_fqcn.clone(),
1089                Some(Arc::from(fqcn)),
1090                false,
1091                is_ctor,
1092                method.is_static,
1093            );
1094
1095            let mut buf = IssueBuffer::new();
1096            let mut sa = StatementsAnalyzer::new(
1097                &self.codebase,
1098                file.clone(),
1099                source,
1100                source_map,
1101                &mut buf,
1102                all_symbols,
1103            );
1104            sa.analyze_stmts(body, &mut ctx);
1105            let inferred = merge_return_types(&sa.return_types);
1106            drop(sa);
1107
1108            // Capture TypeEnv for this method scope
1109            type_envs.insert(
1110                crate::type_env::ScopeId::Method {
1111                    class: Arc::from(fqcn),
1112                    method: Arc::from(method.name),
1113                },
1114                crate::type_env::TypeEnv::new(ctx.vars.clone()),
1115            );
1116
1117            emit_unused_params(&params, &ctx, method.name, file, all_issues);
1118            emit_unused_variables(&ctx, file, all_issues);
1119            all_issues.extend(buf.into_issues());
1120
1121            if let Some(mut cls) = self.codebase.classes.get_mut(fqcn) {
1122                if let Some(m) = cls.own_methods.get_mut(method.name) {
1123                    m.inferred_return_type = Some(inferred);
1124                }
1125            }
1126        }
1127    }
1128
1129    /// Discover all `.php` files under a directory, recursively.
1130    pub fn discover_files(root: &Path) -> Vec<PathBuf> {
1131        if root.is_file() {
1132            return vec![root.to_path_buf()];
1133        }
1134        let mut files = Vec::new();
1135        collect_php_files(root, &mut files);
1136        files
1137    }
1138
1139    /// Pass 1 only: collect type definitions from `paths` into the codebase without
1140    /// analyzing method bodies or emitting issues. Used to load vendor types.
1141    pub fn collect_types_only(&self, paths: &[PathBuf]) {
1142        paths.par_iter().for_each(|path| {
1143            let Ok(src) = std::fs::read_to_string(path) else {
1144                return;
1145            };
1146            let file: Arc<str> = Arc::from(path.to_string_lossy().as_ref());
1147            let arena = bumpalo::Bump::new();
1148            let result = php_rs_parser::parse(&arena, &src);
1149            let collector =
1150                DefinitionCollector::new(&self.codebase, file, &src, &result.source_map);
1151            // Ignore any issues emitted during vendor collection
1152            let _ = collector.collect(&result.program);
1153        });
1154    }
1155
1156    /// Check type hints in enum methods for undefined classes.
1157    #[allow(clippy::too_many_arguments)]
1158    fn analyze_enum_decl<'arena, 'src>(
1159        &self,
1160        decl: &php_ast::ast::EnumDecl<'arena, 'src>,
1161        file: &Arc<str>,
1162        source: &str,
1163        source_map: &php_rs_parser::source_map::SourceMap,
1164        all_issues: &mut Vec<mir_issues::Issue>,
1165    ) {
1166        use php_ast::ast::EnumMemberKind;
1167        for member in decl.members.iter() {
1168            let EnumMemberKind::Method(method) = &member.kind else {
1169                continue;
1170            };
1171            for param in method.params.iter() {
1172                if let Some(hint) = &param.type_hint {
1173                    check_type_hint_classes(
1174                        hint,
1175                        &self.codebase,
1176                        file,
1177                        source,
1178                        source_map,
1179                        all_issues,
1180                    );
1181                }
1182            }
1183            if let Some(hint) = &method.return_type {
1184                check_type_hint_classes(hint, &self.codebase, file, source, source_map, all_issues);
1185            }
1186        }
1187    }
1188}
1189
1190impl Default for ProjectAnalyzer {
1191    fn default() -> Self {
1192        Self::new()
1193    }
1194}
1195
1196// ---------------------------------------------------------------------------
1197// Offset to char-count column conversion
1198// ---------------------------------------------------------------------------
1199
1200/// Convert a byte offset to a Unicode char-count column on a given line.
1201/// Returns (line, col) where col is a 0-based Unicode code-point count.
1202fn offset_to_line_col(
1203    source: &str,
1204    offset: u32,
1205    source_map: &php_rs_parser::source_map::SourceMap,
1206) -> (u32, u16) {
1207    let lc = source_map.offset_to_line_col(offset);
1208    let line = lc.line + 1;
1209
1210    let byte_offset = offset as usize;
1211    let line_start_byte = if byte_offset == 0 {
1212        0
1213    } else {
1214        source[..byte_offset]
1215            .rfind('\n')
1216            .map(|p| p + 1)
1217            .unwrap_or(0)
1218    };
1219
1220    let col = source[line_start_byte..byte_offset].chars().count() as u16;
1221
1222    (line, col)
1223}
1224
1225// ---------------------------------------------------------------------------
1226// Type-hint class existence checker
1227// ---------------------------------------------------------------------------
1228
1229/// Walk a `TypeHint` AST node and emit `UndefinedClass` for any named class
1230/// that does not exist in the codebase.  Skips PHP built-in type keywords.
1231fn check_type_hint_classes<'arena, 'src>(
1232    hint: &php_ast::ast::TypeHint<'arena, 'src>,
1233    codebase: &Codebase,
1234    file: &Arc<str>,
1235    source: &str,
1236    source_map: &php_rs_parser::source_map::SourceMap,
1237    issues: &mut Vec<mir_issues::Issue>,
1238) {
1239    use php_ast::ast::TypeHintKind;
1240    match &hint.kind {
1241        TypeHintKind::Named(name) => {
1242            let name_str = crate::parser::name_to_string(name);
1243            // Skip built-in pseudo-types that are not real classes.
1244            if is_pseudo_type(&name_str) {
1245                return;
1246            }
1247            let resolved = codebase.resolve_class_name(file.as_ref(), &name_str);
1248            if !codebase.type_exists(&resolved) {
1249                let (line, col_start) = offset_to_line_col(source, hint.span.start, source_map);
1250                let col_end = if hint.span.start < hint.span.end {
1251                    let (_end_line, end_col) =
1252                        offset_to_line_col(source, hint.span.end, source_map);
1253                    end_col
1254                } else {
1255                    col_start
1256                };
1257                issues.push(
1258                    mir_issues::Issue::new(
1259                        mir_issues::IssueKind::UndefinedClass { name: resolved },
1260                        mir_issues::Location {
1261                            file: file.clone(),
1262                            line,
1263                            col_start,
1264                            col_end: col_end.max(col_start + 1),
1265                        },
1266                    )
1267                    .with_snippet(crate::parser::span_text(source, hint.span).unwrap_or_default()),
1268                );
1269            }
1270        }
1271        TypeHintKind::Nullable(inner) => {
1272            check_type_hint_classes(inner, codebase, file, source, source_map, issues);
1273        }
1274        TypeHintKind::Union(parts) | TypeHintKind::Intersection(parts) => {
1275            for part in parts.iter() {
1276                check_type_hint_classes(part, codebase, file, source, source_map, issues);
1277            }
1278        }
1279        TypeHintKind::Keyword(_, _) => {} // built-in keyword, always valid
1280    }
1281}
1282
1283/// Returns true for names that are PHP pseudo-types / special identifiers, not
1284/// real classes.
1285fn is_pseudo_type(name: &str) -> bool {
1286    matches!(
1287        name.to_lowercase().as_str(),
1288        "self"
1289            | "static"
1290            | "parent"
1291            | "null"
1292            | "true"
1293            | "false"
1294            | "never"
1295            | "void"
1296            | "mixed"
1297            | "object"
1298            | "callable"
1299            | "iterable"
1300    )
1301}
1302
1303/// Magic methods whose parameters are passed by the PHP runtime, not user call sites.
1304const MAGIC_METHODS_WITH_RUNTIME_PARAMS: &[&str] = &[
1305    "__get",
1306    "__set",
1307    "__call",
1308    "__callStatic",
1309    "__isset",
1310    "__unset",
1311];
1312
1313/// Emit `UnusedParam` issues for params that were never read in `ctx`.
1314/// Skips magic methods whose parameters are passed by the PHP runtime.
1315fn emit_unused_params(
1316    params: &[mir_codebase::FnParam],
1317    ctx: &crate::context::Context,
1318    method_name: &str,
1319    file: &Arc<str>,
1320    issues: &mut Vec<mir_issues::Issue>,
1321) {
1322    if MAGIC_METHODS_WITH_RUNTIME_PARAMS.contains(&method_name) {
1323        return;
1324    }
1325    for p in params {
1326        let name = p.name.as_ref().trim_start_matches('$');
1327        if !ctx.read_vars.contains(name) {
1328            issues.push(
1329                mir_issues::Issue::new(
1330                    mir_issues::IssueKind::UnusedParam {
1331                        name: name.to_string(),
1332                    },
1333                    mir_issues::Location {
1334                        file: file.clone(),
1335                        line: 1,
1336                        col_start: 0,
1337                        col_end: 0,
1338                    },
1339                )
1340                .with_snippet(format!("${}", name)),
1341            );
1342        }
1343    }
1344}
1345
1346fn emit_unused_variables(
1347    ctx: &crate::context::Context,
1348    file: &Arc<str>,
1349    issues: &mut Vec<mir_issues::Issue>,
1350) {
1351    // Superglobals are always "used" — skip them
1352    const SUPERGLOBALS: &[&str] = &[
1353        "_SERVER", "_GET", "_POST", "_REQUEST", "_SESSION", "_COOKIE", "_FILES", "_ENV", "GLOBALS",
1354    ];
1355    for name in &ctx.assigned_vars {
1356        if ctx.param_names.contains(name) {
1357            continue;
1358        }
1359        if SUPERGLOBALS.contains(&name.as_str()) {
1360            continue;
1361        }
1362        // $this is implicitly used whenever the method accesses properties or
1363        // calls other methods — never report it as unused.
1364        if name == "this" {
1365            continue;
1366        }
1367        if name.starts_with('_') {
1368            continue;
1369        }
1370        if !ctx.read_vars.contains(name) {
1371            issues.push(mir_issues::Issue::new(
1372                mir_issues::IssueKind::UnusedVariable { name: name.clone() },
1373                mir_issues::Location {
1374                    file: file.clone(),
1375                    line: 1,
1376                    col_start: 0,
1377                    col_end: 0,
1378                },
1379            ));
1380        }
1381    }
1382}
1383
1384/// Merge a list of return types into a single `Union`.
1385/// Returns `void` if the list is empty.
1386pub fn merge_return_types(return_types: &[Union]) -> Union {
1387    if return_types.is_empty() {
1388        return Union::single(mir_types::Atomic::TVoid);
1389    }
1390    return_types
1391        .iter()
1392        .fold(Union::empty(), |acc, t| Union::merge(&acc, t))
1393}
1394
1395pub(crate) fn collect_php_files(dir: &Path, out: &mut Vec<PathBuf>) {
1396    if let Ok(entries) = std::fs::read_dir(dir) {
1397        for entry in entries.flatten() {
1398            // Skip symlinks — they can form cycles (e.g. .pnpm-store)
1399            if entry.file_type().map(|ft| ft.is_symlink()).unwrap_or(false) {
1400                continue;
1401            }
1402            let path = entry.path();
1403            if path.is_dir() {
1404                let name = path.file_name().and_then(|n| n.to_str()).unwrap_or("");
1405                if matches!(
1406                    name,
1407                    "vendor" | ".git" | "node_modules" | ".cache" | ".pnpm-store"
1408                ) {
1409                    continue;
1410                }
1411                collect_php_files(&path, out);
1412            } else if path.extension().and_then(|e| e.to_str()) == Some("php") {
1413                out.push(path);
1414            }
1415        }
1416    }
1417}
1418
1419// ---------------------------------------------------------------------------
1420// AnalysisResult
1421// ---------------------------------------------------------------------------
1422
1423// ---------------------------------------------------------------------------
1424// build_reverse_deps
1425// ---------------------------------------------------------------------------
1426
1427/// Build a reverse dependency graph from the codebase after Pass 1.
1428///
1429/// Returns a map: `defining_file → {files that depend on it}`.
1430///
1431/// Dependency edges captured (all derivable from Pass 1 data):
1432/// - `use` imports  (`file_imports`)
1433/// - `extends` / `implements` / trait `use` from `ClassStorage`
1434fn build_reverse_deps(codebase: &Codebase) -> HashMap<String, HashSet<String>> {
1435    let mut reverse: HashMap<String, HashSet<String>> = HashMap::new();
1436
1437    // Helper: record edge "defining_file → dependent_file"
1438    let mut add_edge = |symbol: &str, dependent_file: &str| {
1439        if let Some(defining_file) = codebase.symbol_to_file.get(symbol) {
1440            let def = defining_file.as_ref().to_string();
1441            if def != dependent_file {
1442                reverse
1443                    .entry(def)
1444                    .or_default()
1445                    .insert(dependent_file.to_string());
1446            }
1447        }
1448    };
1449
1450    // use-import edges
1451    for entry in codebase.file_imports.iter() {
1452        let file = entry.key().as_ref().to_string();
1453        for fqcn in entry.value().values() {
1454            add_edge(fqcn, &file);
1455        }
1456    }
1457
1458    // extends / implements / trait edges from ClassStorage
1459    for entry in codebase.classes.iter() {
1460        let defining = {
1461            let fqcn = entry.key().as_ref();
1462            codebase
1463                .symbol_to_file
1464                .get(fqcn)
1465                .map(|f| f.as_ref().to_string())
1466        };
1467        let Some(file) = defining else { continue };
1468
1469        let cls = entry.value();
1470        if let Some(ref parent) = cls.parent {
1471            add_edge(parent.as_ref(), &file);
1472        }
1473        for iface in &cls.interfaces {
1474            add_edge(iface.as_ref(), &file);
1475        }
1476        for tr in &cls.traits {
1477            add_edge(tr.as_ref(), &file);
1478        }
1479    }
1480
1481    reverse
1482}
1483
1484// ---------------------------------------------------------------------------
1485
1486/// Extract the reference locations recorded for `file` from the codebase into
1487/// a flat `Vec<(symbol_key, start, end)>` suitable for caching.
1488fn extract_reference_locations(codebase: &Codebase, file: &Arc<str>) -> Vec<(String, u32, u32)> {
1489    codebase
1490        .extract_file_reference_locations(file.as_ref())
1491        .into_iter()
1492        .map(|(sym, start, end)| (sym.to_string(), start, end))
1493        .collect()
1494}
1495
1496// ---------------------------------------------------------------------------
1497
1498pub struct AnalysisResult {
1499    pub issues: Vec<Issue>,
1500    pub type_envs: std::collections::HashMap<crate::type_env::ScopeId, crate::type_env::TypeEnv>,
1501    /// Per-expression resolved symbols from Pass 2.
1502    pub symbols: Vec<crate::symbol::ResolvedSymbol>,
1503}
1504
1505impl AnalysisResult {
1506    pub fn error_count(&self) -> usize {
1507        self.issues
1508            .iter()
1509            .filter(|i| i.severity == mir_issues::Severity::Error)
1510            .count()
1511    }
1512
1513    pub fn warning_count(&self) -> usize {
1514        self.issues
1515            .iter()
1516            .filter(|i| i.severity == mir_issues::Severity::Warning)
1517            .count()
1518    }
1519
1520    /// Group issues by source file.
1521    ///
1522    /// Returns a map from absolute file path to the slice of issues that belong
1523    /// to that file. Useful for LSP `textDocument/publishDiagnostics`, which
1524    /// pushes diagnostics per document.
1525    pub fn issues_by_file(&self) -> HashMap<std::sync::Arc<str>, Vec<&Issue>> {
1526        let mut map: HashMap<std::sync::Arc<str>, Vec<&Issue>> = HashMap::new();
1527        for issue in &self.issues {
1528            map.entry(issue.location.file.clone())
1529                .or_default()
1530                .push(issue);
1531        }
1532        map
1533    }
1534
1535    /// Return the innermost resolved symbol whose span contains `byte_offset`
1536    /// in `file`, or `None` if no symbol was recorded at that position.
1537    ///
1538    /// When multiple symbols overlap (e.g. a method call whose span contains a
1539    /// property access span), the one with the smallest span is returned so the
1540    /// caller gets the most specific symbol at the cursor.
1541    ///
1542    /// Typical use: LSP `textDocument/references` and `textDocument/hover`.
1543    pub fn symbol_at(
1544        &self,
1545        file: &str,
1546        byte_offset: u32,
1547    ) -> Option<&crate::symbol::ResolvedSymbol> {
1548        self.symbols
1549            .iter()
1550            .filter(|s| {
1551                s.file.as_ref() == file && s.span.start <= byte_offset && byte_offset < s.span.end
1552            })
1553            .min_by_key(|s| s.span.end - s.span.start)
1554    }
1555}