Skip to main content

mir_analyzer/
project.rs

1/// Project-level orchestration: file discovery, pass 1, pass 2.
2use std::path::{Path, PathBuf};
3use std::sync::Arc;
4
5use rayon::prelude::*;
6
7use std::collections::{HashMap, HashSet};
8
9use crate::cache::{hash_content, AnalysisCache};
10use crate::php_version::PhpVersion;
11use mir_codebase::Codebase;
12use mir_issues::Issue;
13use mir_types::Union;
14
15use crate::collector::DefinitionCollector;
16
17// ---------------------------------------------------------------------------
18// ProjectAnalyzer
19// ---------------------------------------------------------------------------
20
21pub struct ProjectAnalyzer {
22    pub codebase: Arc<Codebase>,
23    /// Optional cache — when `Some`, Pass 2 results are read/written per file.
24    pub cache: Option<AnalysisCache>,
25    /// Called once after each file completes Pass 2 (used for progress reporting).
26    pub on_file_done: Option<Arc<dyn Fn() + Send + Sync>>,
27    /// PSR-4 autoloader mapping from composer.json, if available.
28    pub psr4: Option<Arc<crate::composer::Psr4Map>>,
29    /// Whether stubs have already been loaded (to avoid double-loading).
30    stubs_loaded: std::sync::atomic::AtomicBool,
31    /// When true, run dead code detection at the end of analysis.
32    pub find_dead_code: bool,
33    /// Target PHP language version. `None` means "not configured"; resolved to
34    /// `PhpVersion::LATEST` when passed down to `StatementsAnalyzer`.
35    pub php_version: Option<PhpVersion>,
36}
37
38impl ProjectAnalyzer {
39    pub fn new() -> Self {
40        Self {
41            codebase: Arc::new(Codebase::new()),
42            cache: None,
43            on_file_done: None,
44            psr4: None,
45            stubs_loaded: std::sync::atomic::AtomicBool::new(false),
46            find_dead_code: false,
47            php_version: None,
48        }
49    }
50
51    /// Create a `ProjectAnalyzer` with a disk-backed cache stored under `cache_dir`.
52    pub fn with_cache(cache_dir: &Path) -> Self {
53        Self {
54            codebase: Arc::new(Codebase::new()),
55            cache: Some(AnalysisCache::open(cache_dir)),
56            on_file_done: None,
57            psr4: None,
58            stubs_loaded: std::sync::atomic::AtomicBool::new(false),
59            find_dead_code: false,
60            php_version: None,
61        }
62    }
63
64    /// Create a `ProjectAnalyzer` from a project root containing `composer.json`.
65    /// Returns the analyzer (with `psr4` set) and the `Psr4Map` so callers can
66    /// call `map.project_files()` / `map.vendor_files()`.
67    pub fn from_composer(
68        root: &Path,
69    ) -> Result<(Self, crate::composer::Psr4Map), crate::composer::ComposerError> {
70        let map = crate::composer::Psr4Map::from_composer(root)?;
71        let psr4 = Arc::new(map.clone());
72        let analyzer = Self {
73            codebase: Arc::new(Codebase::new()),
74            cache: None,
75            on_file_done: None,
76            psr4: Some(psr4),
77            stubs_loaded: std::sync::atomic::AtomicBool::new(false),
78            find_dead_code: false,
79            php_version: None,
80        };
81        Ok((analyzer, map))
82    }
83
84    /// Set the target PHP version.
85    pub fn with_php_version(mut self, version: PhpVersion) -> Self {
86        self.php_version = Some(version);
87        self
88    }
89
90    /// Resolve the configured PHP version, defaulting to `PhpVersion::LATEST`
91    /// when none has been set.
92    fn resolved_php_version(&self) -> PhpVersion {
93        self.php_version.unwrap_or(PhpVersion::LATEST)
94    }
95
96    /// Expose codebase for external use (e.g., pre-loading stubs from CLI).
97    pub fn codebase(&self) -> &Arc<Codebase> {
98        &self.codebase
99    }
100
101    /// Load PHP built-in stubs. Called automatically by `analyze` if not done yet.
102    pub fn load_stubs(&self) {
103        if !self
104            .stubs_loaded
105            .swap(true, std::sync::atomic::Ordering::SeqCst)
106        {
107            crate::stubs::load_stubs(&self.codebase);
108        }
109    }
110
111    /// Run the full analysis pipeline on a set of file paths.
112    pub fn analyze(&self, paths: &[PathBuf]) -> AnalysisResult {
113        let mut all_issues = Vec::new();
114        let mut parse_errors = Vec::new();
115
116        // ---- Load PHP built-in stubs (before Pass 1 so user code can override)
117        self.load_stubs();
118
119        // ---- Pass 1: read files in parallel ----------------------------------
120        let file_data: Vec<(Arc<str>, String)> = paths
121            .par_iter()
122            .filter_map(|path| match std::fs::read_to_string(path) {
123                Ok(src) => Some((Arc::from(path.to_string_lossy().as_ref()), src)),
124                Err(e) => {
125                    eprintln!("Cannot read {}: {}", path.display(), e);
126                    None
127                }
128            })
129            .collect();
130
131        // ---- Pre-Pass-2 invalidation: evict dependents of changed files ------
132        // Uses the reverse dep graph persisted from the previous run. Hashes are
133        // recomputed inline inside Pass 2; avoiding a shared HashMap + global
134        // sync barrier keeps Pass 2's parallel pipeline unblocked.
135        if let Some(cache) = &self.cache {
136            let changed: Vec<String> = file_data
137                .par_iter()
138                .filter_map(|(f, src)| {
139                    let h = hash_content(src);
140                    if cache.get(f, &h).is_none() {
141                        Some(f.to_string())
142                    } else {
143                        None
144                    }
145                })
146                .collect();
147            if !changed.is_empty() {
148                cache.evict_with_dependents(&changed);
149            }
150        }
151
152        // ---- Pass 1: combined pre-index + definition collection (parallel) -----
153        // Parse each file once; both the FQCN/namespace/import index and the full
154        // definition collection run in the same rayon closure, eliminating the
155        // second sequential parse of every file. DashMap handles concurrent writes.
156        let pass1_results: Vec<(Vec<Issue>, Vec<Issue>)> = file_data
157            .par_iter()
158            .map(|(file, src)| {
159                use php_ast::ast::StmtKind;
160                let arena = bumpalo::Bump::new();
161                let result = php_rs_parser::parse(&arena, src);
162
163                // --- Pre-index: build FQCN index, file imports, and namespaces ---
164                let mut current_namespace: Option<String> = None;
165                let mut imports: std::collections::HashMap<String, String> =
166                    std::collections::HashMap::new();
167                let mut file_ns_set = false;
168
169                // Index a flat list of stmts under a given namespace prefix.
170                let index_stmts =
171                    |stmts: &[php_ast::ast::Stmt<'_, '_>],
172                     ns: Option<&str>,
173                     imports: &mut std::collections::HashMap<String, String>| {
174                        for stmt in stmts.iter() {
175                            match &stmt.kind {
176                                StmtKind::Use(use_decl) => {
177                                    for item in use_decl.uses.iter() {
178                                        let full_name = crate::parser::name_to_string(&item.name)
179                                            .trim_start_matches('\\')
180                                            .to_string();
181                                        let alias = item.alias.unwrap_or_else(|| {
182                                            full_name.rsplit('\\').next().unwrap_or(&full_name)
183                                        });
184                                        imports.insert(alias.to_string(), full_name);
185                                    }
186                                }
187                                StmtKind::Class(decl) => {
188                                    if let Some(n) = decl.name {
189                                        let fqcn = match ns {
190                                            Some(ns) => format!("{}\\{}", ns, n),
191                                            None => n.to_string(),
192                                        };
193                                        self.codebase
194                                            .known_symbols
195                                            .insert(Arc::from(fqcn.as_str()));
196                                    }
197                                }
198                                StmtKind::Interface(decl) => {
199                                    let fqcn = match ns {
200                                        Some(ns) => format!("{}\\{}", ns, decl.name),
201                                        None => decl.name.to_string(),
202                                    };
203                                    self.codebase.known_symbols.insert(Arc::from(fqcn.as_str()));
204                                }
205                                StmtKind::Trait(decl) => {
206                                    let fqcn = match ns {
207                                        Some(ns) => format!("{}\\{}", ns, decl.name),
208                                        None => decl.name.to_string(),
209                                    };
210                                    self.codebase.known_symbols.insert(Arc::from(fqcn.as_str()));
211                                }
212                                StmtKind::Enum(decl) => {
213                                    let fqcn = match ns {
214                                        Some(ns) => format!("{}\\{}", ns, decl.name),
215                                        None => decl.name.to_string(),
216                                    };
217                                    self.codebase.known_symbols.insert(Arc::from(fqcn.as_str()));
218                                }
219                                StmtKind::Function(decl) => {
220                                    let fqn = match ns {
221                                        Some(ns) => format!("{}\\{}", ns, decl.name),
222                                        None => decl.name.to_string(),
223                                    };
224                                    self.codebase.known_symbols.insert(Arc::from(fqn.as_str()));
225                                }
226                                _ => {}
227                            }
228                        }
229                    };
230
231                for stmt in result.program.stmts.iter() {
232                    match &stmt.kind {
233                        StmtKind::Namespace(ns) => {
234                            current_namespace =
235                                ns.name.as_ref().map(|n| crate::parser::name_to_string(n));
236                            if !file_ns_set {
237                                if let Some(ref ns_str) = current_namespace {
238                                    self.codebase
239                                        .file_namespaces
240                                        .insert(file.clone(), ns_str.clone());
241                                    file_ns_set = true;
242                                }
243                            }
244                            // Bracketed namespace: walk inner stmts for Use/Class/etc.
245                            if let php_ast::ast::NamespaceBody::Braced(inner_stmts) = &ns.body {
246                                index_stmts(
247                                    inner_stmts,
248                                    current_namespace.as_deref(),
249                                    &mut imports,
250                                );
251                            }
252                        }
253                        _ => index_stmts(
254                            std::slice::from_ref(stmt),
255                            current_namespace.as_deref(),
256                            &mut imports,
257                        ),
258                    }
259                }
260
261                if !imports.is_empty() {
262                    self.codebase.file_imports.insert(file.clone(), imports);
263                }
264
265                // --- Parse errors ---
266                let file_parse_errors: Vec<Issue> = result
267                    .errors
268                    .iter()
269                    .map(|err| {
270                        Issue::new(
271                            mir_issues::IssueKind::ParseError {
272                                message: err.to_string(),
273                            },
274                            mir_issues::Location {
275                                file: file.clone(),
276                                line: 1,
277                                col_start: 0,
278                                col_end: 0,
279                            },
280                        )
281                    })
282                    .collect();
283
284                // --- Definition collection ---
285                let collector =
286                    DefinitionCollector::new(&self.codebase, file.clone(), src, &result.source_map);
287                let issues = collector.collect(&result.program);
288
289                (file_parse_errors, issues)
290            })
291            .collect();
292
293        for (file_parse_errors, issues) in pass1_results {
294            parse_errors.extend(file_parse_errors);
295            all_issues.extend(issues);
296        }
297
298        all_issues.extend(parse_errors);
299
300        // ---- Finalize codebase (resolve inheritance, build dispatch tables) --
301        self.codebase.finalize();
302
303        // ---- Lazy-load unknown classes via PSR-4 (issue #50) ----------------
304        if let Some(psr4) = &self.psr4 {
305            self.lazy_load_missing_classes(psr4.clone(), &mut all_issues);
306        }
307
308        // ---- Build reverse dep graph and persist it for the next run ---------
309        if let Some(cache) = &self.cache {
310            let rev = build_reverse_deps(&self.codebase);
311            cache.set_reverse_deps(rev);
312        }
313
314        // ---- Class-level checks (M11) ----------------------------------------
315        let analyzed_file_set: std::collections::HashSet<std::sync::Arc<str>> =
316            file_data.iter().map(|(f, _)| f.clone()).collect();
317        let class_issues =
318            crate::class::ClassAnalyzer::with_files(&self.codebase, analyzed_file_set, &file_data)
319                .analyze_all();
320        all_issues.extend(class_issues);
321
322        // ---- Pass 2: analyze function/method bodies in parallel (M14) --------
323        // Each file is analyzed independently; arena + parse happen inside the
324        // rayon closure so there is no cross-thread borrow.
325        // When a cache is present, files whose content hash matches a stored
326        // entry skip re-analysis entirely (M17).
327        let pass2_results: Vec<(Vec<Issue>, Vec<crate::symbol::ResolvedSymbol>)> = file_data
328            .par_iter()
329            .map(|(file, src)| {
330                // Cache lookup
331                let result = if let Some(cache) = &self.cache {
332                    let h = hash_content(src);
333                    if let Some((cached_issues, ref_locs)) = cache.get(file, &h) {
334                        // Hit — replay reference locations so symbol_reference_locations
335                        // is populated without re-running analyze_bodies.
336                        self.codebase
337                            .replay_reference_locations(file.clone(), &ref_locs);
338                        (cached_issues, Vec::new())
339                    } else {
340                        // Miss — analyze and store
341                        let arena = bumpalo::Bump::new();
342                        let parsed = php_rs_parser::parse(&arena, src);
343                        let (issues, symbols) = self.analyze_bodies(
344                            &parsed.program,
345                            file.clone(),
346                            src,
347                            &parsed.source_map,
348                        );
349                        let ref_locs = extract_reference_locations(&self.codebase, file);
350                        cache.put(file, h, issues.clone(), ref_locs);
351                        (issues, symbols)
352                    }
353                } else {
354                    let arena = bumpalo::Bump::new();
355                    let parsed = php_rs_parser::parse(&arena, src);
356                    self.analyze_bodies(&parsed.program, file.clone(), src, &parsed.source_map)
357                };
358                if let Some(cb) = &self.on_file_done {
359                    cb();
360                }
361                result
362            })
363            .collect();
364
365        let mut all_symbols = Vec::new();
366        for (issues, symbols) in pass2_results {
367            all_issues.extend(issues);
368            all_symbols.extend(symbols);
369        }
370
371        // Persist cache hits/misses to disk
372        if let Some(cache) = &self.cache {
373            cache.flush();
374        }
375
376        // ---- Compact the reference index ------------------------------------
377        // Convert build-phase DashMaps into a CSR structure, freeing the
378        // per-entry HashMap/HashSet overhead accumulated during Pass 2.
379        self.codebase.compact_reference_index();
380
381        // ---- Dead-code detection (M18) --------------------------------------
382        if self.find_dead_code {
383            let dead_code_issues =
384                crate::dead_code::DeadCodeAnalyzer::new(&self.codebase).analyze();
385            all_issues.extend(dead_code_issues);
386        }
387
388        AnalysisResult::build(all_issues, std::collections::HashMap::new(), all_symbols)
389    }
390
391    /// Lazily load class definitions for referenced-but-unknown FQCNs via PSR-4.
392    ///
393    /// After Pass 1 and `codebase.finalize()`, some classes referenced as parents
394    /// or interfaces may not be in the codebase (they weren't in the initial file
395    /// list). This method iterates up to `max_depth` times, each time resolving
396    /// unknown parent/interface FQCNs via the PSR-4 map, running Pass 1 on those
397    /// files, and re-finalizing the codebase. The loop stops when no new files
398    /// are discovered.
399    fn lazy_load_missing_classes(
400        &self,
401        psr4: Arc<crate::composer::Psr4Map>,
402        all_issues: &mut Vec<Issue>,
403    ) {
404        use std::collections::HashSet;
405
406        let max_depth = 10; // prevent infinite chains
407        let mut loaded: HashSet<String> = HashSet::new();
408
409        for _ in 0..max_depth {
410            // Collect all referenced FQCNs that aren't in the codebase
411            let mut to_load: Vec<(String, PathBuf)> = Vec::new();
412
413            for entry in self.codebase.classes.iter() {
414                let cls = entry.value();
415
416                // Check parent class
417                if let Some(parent) = &cls.parent {
418                    let fqcn = parent.as_ref();
419                    if !self.codebase.classes.contains_key(fqcn) && !loaded.contains(fqcn) {
420                        if let Some(path) = psr4.resolve(fqcn) {
421                            to_load.push((fqcn.to_string(), path));
422                        }
423                    }
424                }
425
426                // Check interfaces
427                for iface in &cls.interfaces {
428                    let fqcn = iface.as_ref();
429                    if !self.codebase.classes.contains_key(fqcn)
430                        && !self.codebase.interfaces.contains_key(fqcn)
431                        && !loaded.contains(fqcn)
432                    {
433                        if let Some(path) = psr4.resolve(fqcn) {
434                            to_load.push((fqcn.to_string(), path));
435                        }
436                    }
437                }
438            }
439
440            if to_load.is_empty() {
441                break;
442            }
443
444            // Load each discovered file (Pass 1 only)
445            for (fqcn, path) in to_load {
446                loaded.insert(fqcn);
447                if let Ok(src) = std::fs::read_to_string(&path) {
448                    let file: Arc<str> = Arc::from(path.to_string_lossy().as_ref());
449                    let arena = bumpalo::Bump::new();
450                    let result = php_rs_parser::parse(&arena, &src);
451                    let collector = crate::collector::DefinitionCollector::new(
452                        &self.codebase,
453                        file,
454                        &src,
455                        &result.source_map,
456                    );
457                    let issues = collector.collect(&result.program);
458                    all_issues.extend(issues);
459                }
460            }
461
462            // Re-finalize to include newly loaded classes in the inheritance graph.
463            // Must reset the flag first so finalize() isn't a no-op.
464            self.codebase.invalidate_finalization();
465            self.codebase.finalize();
466        }
467    }
468
469    /// Re-analyze a single file within the existing codebase.
470    ///
471    /// This is the incremental analysis API for LSP:
472    /// 1. Removes old definitions from this file
473    /// 2. Re-runs Pass 1 (definition collection) on the new content
474    /// 3. Re-finalizes the codebase (rebuilds inheritance)
475    /// 4. Re-runs Pass 2 (body analysis) on this file
476    /// 5. Returns the analysis result for this file only
477    pub fn re_analyze_file(&self, file_path: &str, new_content: &str) -> AnalysisResult {
478        // Fast path: content unchanged and cache has a valid entry — skip full re-analysis.
479        if let Some(cache) = &self.cache {
480            let h = hash_content(new_content);
481            if let Some((issues, ref_locs)) = cache.get(file_path, &h) {
482                let file: Arc<str> = Arc::from(file_path);
483                self.codebase.replay_reference_locations(file, &ref_locs);
484                return AnalysisResult::build(issues, HashMap::new(), Vec::new());
485            }
486        }
487
488        // 1. Snapshot inheritance structure before removing old definitions.
489        //    This lets us skip finalize() later if only method bodies changed.
490        let structural_snapshot = self.codebase.file_structural_snapshot(file_path);
491
492        // 2. Remove old definitions from this file
493        self.codebase.remove_file_definitions(file_path);
494
495        // 3. Parse new content and run Pass 1
496        let file: Arc<str> = Arc::from(file_path);
497        let arena = bumpalo::Bump::new();
498        let parsed = php_rs_parser::parse(&arena, new_content);
499
500        let mut all_issues = Vec::new();
501
502        // Collect parse errors
503        for err in &parsed.errors {
504            all_issues.push(Issue::new(
505                mir_issues::IssueKind::ParseError {
506                    message: err.to_string(),
507                },
508                mir_issues::Location {
509                    file: file.clone(),
510                    line: 1,
511                    col_start: 0,
512                    col_end: 0,
513                },
514            ));
515        }
516
517        let collector = DefinitionCollector::new(
518            &self.codebase,
519            file.clone(),
520            new_content,
521            &parsed.source_map,
522        );
523        all_issues.extend(collector.collect(&parsed.program));
524
525        // 4. Re-finalize, or skip if only method bodies changed.
526        //    finalize() rebuilds all_parents for every class/interface in the
527        //    codebase by walking the full inheritance graph — this is expensive.
528        //    If the inheritance structure of this file is unchanged (same parent,
529        //    interfaces, traits), restore all_parents from the snapshot and skip
530        //    the full walk.
531        if self
532            .codebase
533            .structural_unchanged_after_pass1(file_path, &structural_snapshot)
534        {
535            self.codebase
536                .restore_all_parents(file_path, &structural_snapshot);
537        } else {
538            self.codebase.finalize();
539        }
540
541        // 5. Run Pass 2 on this file
542        let (body_issues, symbols) = self.analyze_bodies(
543            &parsed.program,
544            file.clone(),
545            new_content,
546            &parsed.source_map,
547        );
548        all_issues.extend(body_issues);
549
550        // 6. Update cache if present
551        if let Some(cache) = &self.cache {
552            let h = hash_content(new_content);
553            cache.evict_with_dependents(&[file_path.to_string()]);
554            let ref_locs = extract_reference_locations(&self.codebase, &file);
555            cache.put(file_path, h, all_issues.clone(), ref_locs);
556        }
557
558        AnalysisResult::build(all_issues, HashMap::new(), symbols)
559    }
560
561    /// Analyze a PHP source string without a real file path.
562    /// Useful for tests and LSP single-file mode.
563    pub fn analyze_source(source: &str) -> AnalysisResult {
564        use crate::collector::DefinitionCollector;
565        let analyzer = ProjectAnalyzer::new();
566        analyzer.load_stubs();
567        let file: Arc<str> = Arc::from("<source>");
568        let arena = bumpalo::Bump::new();
569        let result = php_rs_parser::parse(&arena, source);
570        let mut all_issues = Vec::new();
571        let collector =
572            DefinitionCollector::new(&analyzer.codebase, file.clone(), source, &result.source_map);
573        all_issues.extend(collector.collect(&result.program));
574        analyzer.codebase.finalize();
575        let mut type_envs = std::collections::HashMap::new();
576        let mut all_symbols = Vec::new();
577        all_issues.extend(analyzer.analyze_bodies_typed(
578            &result.program,
579            file.clone(),
580            source,
581            &result.source_map,
582            &mut type_envs,
583            &mut all_symbols,
584        ));
585        AnalysisResult::build(all_issues, type_envs, all_symbols)
586    }
587
588    /// Pass 2: walk all function/method bodies in one file, return issues, and
589    /// write inferred return types back to the codebase.
590    fn analyze_bodies<'arena, 'src>(
591        &self,
592        program: &php_ast::ast::Program<'arena, 'src>,
593        file: Arc<str>,
594        source: &str,
595        source_map: &php_rs_parser::source_map::SourceMap,
596    ) -> (Vec<mir_issues::Issue>, Vec<crate::symbol::ResolvedSymbol>) {
597        use php_ast::ast::StmtKind;
598
599        let mut all_issues = Vec::new();
600        let mut all_symbols = Vec::new();
601
602        for stmt in program.stmts.iter() {
603            match &stmt.kind {
604                StmtKind::Function(decl) => {
605                    self.analyze_fn_decl(
606                        decl,
607                        &file,
608                        source,
609                        source_map,
610                        &mut all_issues,
611                        &mut all_symbols,
612                    );
613                }
614                StmtKind::Class(decl) => {
615                    self.analyze_class_decl(
616                        decl,
617                        &file,
618                        source,
619                        source_map,
620                        &mut all_issues,
621                        &mut all_symbols,
622                    );
623                }
624                StmtKind::Enum(decl) => {
625                    self.analyze_enum_decl(decl, &file, source, source_map, &mut all_issues);
626                }
627                StmtKind::Interface(decl) => {
628                    self.analyze_interface_decl(decl, &file, source, source_map, &mut all_issues);
629                }
630                StmtKind::Namespace(ns) => {
631                    if let php_ast::ast::NamespaceBody::Braced(stmts) = &ns.body {
632                        for inner in stmts.iter() {
633                            match &inner.kind {
634                                StmtKind::Function(decl) => {
635                                    self.analyze_fn_decl(
636                                        decl,
637                                        &file,
638                                        source,
639                                        source_map,
640                                        &mut all_issues,
641                                        &mut all_symbols,
642                                    );
643                                }
644                                StmtKind::Class(decl) => {
645                                    self.analyze_class_decl(
646                                        decl,
647                                        &file,
648                                        source,
649                                        source_map,
650                                        &mut all_issues,
651                                        &mut all_symbols,
652                                    );
653                                }
654                                StmtKind::Enum(decl) => {
655                                    self.analyze_enum_decl(
656                                        decl,
657                                        &file,
658                                        source,
659                                        source_map,
660                                        &mut all_issues,
661                                    );
662                                }
663                                StmtKind::Interface(decl) => {
664                                    self.analyze_interface_decl(
665                                        decl,
666                                        &file,
667                                        source,
668                                        source_map,
669                                        &mut all_issues,
670                                    );
671                                }
672                                _ => {}
673                            }
674                        }
675                    }
676                }
677                _ => {}
678            }
679        }
680
681        // Analyze top-level executable statements in global scope.
682        {
683            use crate::context::Context;
684            use crate::stmt::StatementsAnalyzer;
685            use mir_issues::IssueBuffer;
686
687            let mut ctx = Context::new();
688            let mut buf = IssueBuffer::new();
689            let mut sa = StatementsAnalyzer::new(
690                &self.codebase,
691                file.clone(),
692                source,
693                source_map,
694                &mut buf,
695                &mut all_symbols,
696                self.resolved_php_version(),
697            );
698            for stmt in program.stmts.iter() {
699                match &stmt.kind {
700                    StmtKind::Function(_)
701                    | StmtKind::Class(_)
702                    | StmtKind::Enum(_)
703                    | StmtKind::Interface(_)
704                    | StmtKind::Trait(_)
705                    | StmtKind::Namespace(_)
706                    | StmtKind::Use(_)
707                    | StmtKind::Declare(_) => {}
708                    _ => sa.analyze_stmt(stmt, &mut ctx),
709                }
710            }
711            drop(sa);
712            all_issues.extend(buf.into_issues());
713        }
714
715        (all_issues, all_symbols)
716    }
717
718    /// Analyze a single function declaration body and collect issues + inferred return type.
719    #[allow(clippy::too_many_arguments)]
720    fn analyze_fn_decl<'arena, 'src>(
721        &self,
722        decl: &php_ast::ast::FunctionDecl<'arena, 'src>,
723        file: &Arc<str>,
724        source: &str,
725        source_map: &php_rs_parser::source_map::SourceMap,
726        all_issues: &mut Vec<mir_issues::Issue>,
727        all_symbols: &mut Vec<crate::symbol::ResolvedSymbol>,
728    ) {
729        let fn_name = decl.name;
730        let body = &decl.body;
731        // Check parameter and return type hints for undefined classes.
732        for param in decl.params.iter() {
733            if let Some(hint) = &param.type_hint {
734                check_type_hint_classes(hint, &self.codebase, file, source, source_map, all_issues);
735            }
736        }
737        if let Some(hint) = &decl.return_type {
738            check_type_hint_classes(hint, &self.codebase, file, source, source_map, all_issues);
739        }
740        use crate::context::Context;
741        use crate::stmt::StatementsAnalyzer;
742        use mir_issues::IssueBuffer;
743
744        // Resolve function name using the file's namespace (handles namespaced functions)
745        let resolved_fn = self.codebase.resolve_class_name(file.as_ref(), fn_name);
746        let func_opt: Option<mir_codebase::storage::FunctionStorage> = self
747            .codebase
748            .functions
749            .get(resolved_fn.as_str())
750            .map(|r| r.clone())
751            .or_else(|| self.codebase.functions.get(fn_name).map(|r| r.clone()))
752            .or_else(|| {
753                self.codebase
754                    .functions
755                    .iter()
756                    .find(|e| e.short_name.as_ref() == fn_name)
757                    .map(|e| e.value().clone())
758            });
759
760        let fqn = func_opt.as_ref().map(|f| f.fqn.clone());
761        // Always use the codebase entry when its params match the AST (same count + names).
762        // This covers the common case and preserves docblock-enriched types.
763        // When names differ (two files define the same unnamespaced function), fall back to
764        // the AST params so param variables are always in scope for this file's body.
765        let (params, return_ty): (Vec<mir_codebase::FnParam>, _) = match &func_opt {
766            Some(f)
767                if f.params.len() == decl.params.len()
768                    && f.params
769                        .iter()
770                        .zip(decl.params.iter())
771                        .all(|(cp, ap)| cp.name.as_ref() == ap.name) =>
772            {
773                (f.params.clone(), f.return_type.clone())
774            }
775            _ => {
776                let ast_params = decl
777                    .params
778                    .iter()
779                    .map(|p| mir_codebase::FnParam {
780                        name: Arc::from(p.name),
781                        ty: None,
782                        default: p.default.as_ref().map(|_| mir_types::Union::mixed()),
783                        is_variadic: p.variadic,
784                        is_byref: p.by_ref,
785                        is_optional: p.default.is_some() || p.variadic,
786                    })
787                    .collect();
788                (ast_params, None)
789            }
790        };
791
792        let mut ctx = Context::for_function(&params, return_ty, None, None, None, false, true);
793        let mut buf = IssueBuffer::new();
794        let mut sa = StatementsAnalyzer::new(
795            &self.codebase,
796            file.clone(),
797            source,
798            source_map,
799            &mut buf,
800            all_symbols,
801            self.resolved_php_version(),
802        );
803        sa.analyze_stmts(body, &mut ctx);
804        let inferred = merge_return_types(&sa.return_types);
805        drop(sa);
806
807        emit_unused_params(&params, &ctx, "", file, all_issues);
808        emit_unused_variables(&ctx, file, all_issues);
809        all_issues.extend(buf.into_issues());
810
811        if let Some(fqn) = fqn {
812            if let Some(mut func) = self.codebase.functions.get_mut(fqn.as_ref()) {
813                func.inferred_return_type = Some(inferred);
814            }
815        }
816    }
817
818    /// Analyze all method bodies on a class declaration and collect issues + inferred return types.
819    #[allow(clippy::too_many_arguments)]
820    fn analyze_class_decl<'arena, 'src>(
821        &self,
822        decl: &php_ast::ast::ClassDecl<'arena, 'src>,
823        file: &Arc<str>,
824        source: &str,
825        source_map: &php_rs_parser::source_map::SourceMap,
826        all_issues: &mut Vec<mir_issues::Issue>,
827        all_symbols: &mut Vec<crate::symbol::ResolvedSymbol>,
828    ) {
829        use crate::context::Context;
830        use crate::stmt::StatementsAnalyzer;
831        use mir_issues::IssueBuffer;
832
833        let class_name = decl.name.unwrap_or("<anonymous>");
834        // Resolve the FQCN using the file's namespace/imports — avoids ambiguity
835        // when multiple classes share the same short name across namespaces.
836        let resolved = self.codebase.resolve_class_name(file.as_ref(), class_name);
837        let fqcn: &str = &resolved;
838        let parent_fqcn = self
839            .codebase
840            .classes
841            .get(fqcn)
842            .and_then(|c| c.parent.clone());
843
844        if let Some(parent) = &decl.extends {
845            check_name_class(parent, &self.codebase, file, source, source_map, all_issues);
846        }
847        for iface in decl.implements.iter() {
848            check_name_class(iface, &self.codebase, file, source, source_map, all_issues);
849        }
850
851        for member in decl.members.iter() {
852            if let php_ast::ast::ClassMemberKind::Property(prop) = &member.kind {
853                if let Some(hint) = &prop.type_hint {
854                    check_type_hint_classes(
855                        hint,
856                        &self.codebase,
857                        file,
858                        source,
859                        source_map,
860                        all_issues,
861                    );
862                }
863                continue;
864            }
865            let php_ast::ast::ClassMemberKind::Method(method) = &member.kind else {
866                continue;
867            };
868
869            // Check parameter and return type hints for undefined classes (even abstract methods).
870            for param in method.params.iter() {
871                if let Some(hint) = &param.type_hint {
872                    check_type_hint_classes(
873                        hint,
874                        &self.codebase,
875                        file,
876                        source,
877                        source_map,
878                        all_issues,
879                    );
880                }
881            }
882            if let Some(hint) = &method.return_type {
883                check_type_hint_classes(hint, &self.codebase, file, source, source_map, all_issues);
884            }
885
886            let Some(body) = &method.body else { continue };
887
888            let (params, return_ty) = self
889                .codebase
890                .get_method(fqcn, method.name)
891                .as_deref()
892                .map(|m| (m.params.clone(), m.return_type.clone()))
893                .unwrap_or_default();
894
895            let is_ctor = method.name == "__construct";
896            let mut ctx = Context::for_method(
897                &params,
898                return_ty,
899                Some(Arc::from(fqcn)),
900                parent_fqcn.clone(),
901                Some(Arc::from(fqcn)),
902                false,
903                is_ctor,
904                method.is_static,
905            );
906
907            let mut buf = IssueBuffer::new();
908            let mut sa = StatementsAnalyzer::new(
909                &self.codebase,
910                file.clone(),
911                source,
912                source_map,
913                &mut buf,
914                all_symbols,
915                self.resolved_php_version(),
916            );
917            sa.analyze_stmts(body, &mut ctx);
918            let inferred = merge_return_types(&sa.return_types);
919            drop(sa);
920
921            emit_unused_params(&params, &ctx, method.name, file, all_issues);
922            emit_unused_variables(&ctx, file, all_issues);
923            all_issues.extend(buf.into_issues());
924
925            if let Some(mut cls) = self.codebase.classes.get_mut(fqcn) {
926                if let Some(m) = cls.own_methods.get_mut(method.name) {
927                    Arc::make_mut(m).inferred_return_type = Some(inferred);
928                }
929            }
930        }
931    }
932
933    /// Like `analyze_bodies` but also populates `type_envs` with per-scope type environments.
934    #[allow(clippy::too_many_arguments)]
935    fn analyze_bodies_typed<'arena, 'src>(
936        &self,
937        program: &php_ast::ast::Program<'arena, 'src>,
938        file: Arc<str>,
939        source: &str,
940        source_map: &php_rs_parser::source_map::SourceMap,
941        type_envs: &mut std::collections::HashMap<
942            crate::type_env::ScopeId,
943            crate::type_env::TypeEnv,
944        >,
945        all_symbols: &mut Vec<crate::symbol::ResolvedSymbol>,
946    ) -> Vec<mir_issues::Issue> {
947        use php_ast::ast::StmtKind;
948        let mut all_issues = Vec::new();
949        for stmt in program.stmts.iter() {
950            match &stmt.kind {
951                StmtKind::Function(decl) => {
952                    self.analyze_fn_decl_typed(
953                        decl,
954                        &file,
955                        source,
956                        source_map,
957                        &mut all_issues,
958                        type_envs,
959                        all_symbols,
960                    );
961                }
962                StmtKind::Class(decl) => {
963                    self.analyze_class_decl_typed(
964                        decl,
965                        &file,
966                        source,
967                        source_map,
968                        &mut all_issues,
969                        type_envs,
970                        all_symbols,
971                    );
972                }
973                StmtKind::Enum(decl) => {
974                    self.analyze_enum_decl(decl, &file, source, source_map, &mut all_issues);
975                }
976                StmtKind::Interface(decl) => {
977                    self.analyze_interface_decl(decl, &file, source, source_map, &mut all_issues);
978                }
979                StmtKind::Namespace(ns) => {
980                    if let php_ast::ast::NamespaceBody::Braced(stmts) = &ns.body {
981                        for inner in stmts.iter() {
982                            match &inner.kind {
983                                StmtKind::Function(decl) => {
984                                    self.analyze_fn_decl_typed(
985                                        decl,
986                                        &file,
987                                        source,
988                                        source_map,
989                                        &mut all_issues,
990                                        type_envs,
991                                        all_symbols,
992                                    );
993                                }
994                                StmtKind::Class(decl) => {
995                                    self.analyze_class_decl_typed(
996                                        decl,
997                                        &file,
998                                        source,
999                                        source_map,
1000                                        &mut all_issues,
1001                                        type_envs,
1002                                        all_symbols,
1003                                    );
1004                                }
1005                                StmtKind::Enum(decl) => {
1006                                    self.analyze_enum_decl(
1007                                        decl,
1008                                        &file,
1009                                        source,
1010                                        source_map,
1011                                        &mut all_issues,
1012                                    );
1013                                }
1014                                StmtKind::Interface(decl) => {
1015                                    self.analyze_interface_decl(
1016                                        decl,
1017                                        &file,
1018                                        source,
1019                                        source_map,
1020                                        &mut all_issues,
1021                                    );
1022                                }
1023                                _ => {}
1024                            }
1025                        }
1026                    }
1027                }
1028                _ => {}
1029            }
1030        }
1031
1032        // Analyze top-level executable statements in global scope (e.g. function calls
1033        // outside any function/class body). Declaration nodes are skipped since they
1034        // were already handled above.
1035        {
1036            use crate::context::Context;
1037            use crate::stmt::StatementsAnalyzer;
1038            use mir_issues::IssueBuffer;
1039
1040            let mut ctx = Context::new();
1041            let mut buf = IssueBuffer::new();
1042            let mut sa = StatementsAnalyzer::new(
1043                &self.codebase,
1044                file.clone(),
1045                source,
1046                source_map,
1047                &mut buf,
1048                all_symbols,
1049                self.resolved_php_version(),
1050            );
1051            for stmt in program.stmts.iter() {
1052                match &stmt.kind {
1053                    StmtKind::Function(_)
1054                    | StmtKind::Class(_)
1055                    | StmtKind::Enum(_)
1056                    | StmtKind::Interface(_)
1057                    | StmtKind::Trait(_)
1058                    | StmtKind::Namespace(_)
1059                    | StmtKind::Use(_)
1060                    | StmtKind::Declare(_) => {}
1061                    _ => sa.analyze_stmt(stmt, &mut ctx),
1062                }
1063            }
1064            drop(sa);
1065            all_issues.extend(buf.into_issues());
1066        }
1067
1068        all_issues
1069    }
1070
1071    /// Like `analyze_fn_decl` but also captures a `TypeEnv` for the function scope.
1072    #[allow(clippy::too_many_arguments)]
1073    fn analyze_fn_decl_typed<'arena, 'src>(
1074        &self,
1075        decl: &php_ast::ast::FunctionDecl<'arena, 'src>,
1076        file: &Arc<str>,
1077        source: &str,
1078        source_map: &php_rs_parser::source_map::SourceMap,
1079        all_issues: &mut Vec<mir_issues::Issue>,
1080        type_envs: &mut std::collections::HashMap<
1081            crate::type_env::ScopeId,
1082            crate::type_env::TypeEnv,
1083        >,
1084        all_symbols: &mut Vec<crate::symbol::ResolvedSymbol>,
1085    ) {
1086        use crate::context::Context;
1087        use crate::stmt::StatementsAnalyzer;
1088        use mir_issues::IssueBuffer;
1089
1090        let fn_name = decl.name;
1091        let body = &decl.body;
1092
1093        for param in decl.params.iter() {
1094            if let Some(hint) = &param.type_hint {
1095                check_type_hint_classes(hint, &self.codebase, file, source, source_map, all_issues);
1096            }
1097        }
1098        if let Some(hint) = &decl.return_type {
1099            check_type_hint_classes(hint, &self.codebase, file, source, source_map, all_issues);
1100        }
1101
1102        let resolved_fn = self.codebase.resolve_class_name(file.as_ref(), fn_name);
1103        let func_opt: Option<mir_codebase::storage::FunctionStorage> = self
1104            .codebase
1105            .functions
1106            .get(resolved_fn.as_str())
1107            .map(|r| r.clone())
1108            .or_else(|| self.codebase.functions.get(fn_name).map(|r| r.clone()))
1109            .or_else(|| {
1110                self.codebase
1111                    .functions
1112                    .iter()
1113                    .find(|e| e.short_name.as_ref() == fn_name)
1114                    .map(|e| e.value().clone())
1115            });
1116
1117        let fqn = func_opt.as_ref().map(|f| f.fqn.clone());
1118        let (params, return_ty): (Vec<mir_codebase::FnParam>, _) = match &func_opt {
1119            Some(f)
1120                if f.params.len() == decl.params.len()
1121                    && f.params
1122                        .iter()
1123                        .zip(decl.params.iter())
1124                        .all(|(cp, ap)| cp.name.as_ref() == ap.name) =>
1125            {
1126                (f.params.clone(), f.return_type.clone())
1127            }
1128            _ => {
1129                let ast_params = decl
1130                    .params
1131                    .iter()
1132                    .map(|p| mir_codebase::FnParam {
1133                        name: Arc::from(p.name),
1134                        ty: None,
1135                        default: p.default.as_ref().map(|_| mir_types::Union::mixed()),
1136                        is_variadic: p.variadic,
1137                        is_byref: p.by_ref,
1138                        is_optional: p.default.is_some() || p.variadic,
1139                    })
1140                    .collect();
1141                (ast_params, None)
1142            }
1143        };
1144
1145        let mut ctx = Context::for_function(&params, return_ty, None, None, None, false, true);
1146        let mut buf = IssueBuffer::new();
1147        let mut sa = StatementsAnalyzer::new(
1148            &self.codebase,
1149            file.clone(),
1150            source,
1151            source_map,
1152            &mut buf,
1153            all_symbols,
1154            self.resolved_php_version(),
1155        );
1156        sa.analyze_stmts(body, &mut ctx);
1157        let inferred = merge_return_types(&sa.return_types);
1158        drop(sa);
1159
1160        // Capture TypeEnv for this scope
1161        let scope_name = fqn.clone().unwrap_or_else(|| Arc::from(fn_name));
1162        type_envs.insert(
1163            crate::type_env::ScopeId::Function {
1164                file: file.clone(),
1165                name: scope_name,
1166            },
1167            crate::type_env::TypeEnv::new(ctx.vars.clone()),
1168        );
1169
1170        emit_unused_params(&params, &ctx, "", file, all_issues);
1171        emit_unused_variables(&ctx, file, all_issues);
1172        all_issues.extend(buf.into_issues());
1173
1174        if let Some(fqn) = fqn {
1175            if let Some(mut func) = self.codebase.functions.get_mut(fqn.as_ref()) {
1176                func.inferred_return_type = Some(inferred);
1177            }
1178        }
1179    }
1180
1181    /// Like `analyze_class_decl` but also captures a `TypeEnv` per method scope.
1182    #[allow(clippy::too_many_arguments)]
1183    fn analyze_class_decl_typed<'arena, 'src>(
1184        &self,
1185        decl: &php_ast::ast::ClassDecl<'arena, 'src>,
1186        file: &Arc<str>,
1187        source: &str,
1188        source_map: &php_rs_parser::source_map::SourceMap,
1189        all_issues: &mut Vec<mir_issues::Issue>,
1190        type_envs: &mut std::collections::HashMap<
1191            crate::type_env::ScopeId,
1192            crate::type_env::TypeEnv,
1193        >,
1194        all_symbols: &mut Vec<crate::symbol::ResolvedSymbol>,
1195    ) {
1196        use crate::context::Context;
1197        use crate::stmt::StatementsAnalyzer;
1198        use mir_issues::IssueBuffer;
1199
1200        let class_name = decl.name.unwrap_or("<anonymous>");
1201        let resolved = self.codebase.resolve_class_name(file.as_ref(), class_name);
1202        let fqcn: &str = &resolved;
1203        let parent_fqcn = self
1204            .codebase
1205            .classes
1206            .get(fqcn)
1207            .and_then(|c| c.parent.clone());
1208
1209        if let Some(parent) = &decl.extends {
1210            check_name_class(parent, &self.codebase, file, source, source_map, all_issues);
1211        }
1212        for iface in decl.implements.iter() {
1213            check_name_class(iface, &self.codebase, file, source, source_map, all_issues);
1214        }
1215
1216        for member in decl.members.iter() {
1217            if let php_ast::ast::ClassMemberKind::Property(prop) = &member.kind {
1218                if let Some(hint) = &prop.type_hint {
1219                    check_type_hint_classes(
1220                        hint,
1221                        &self.codebase,
1222                        file,
1223                        source,
1224                        source_map,
1225                        all_issues,
1226                    );
1227                }
1228                continue;
1229            }
1230            let php_ast::ast::ClassMemberKind::Method(method) = &member.kind else {
1231                continue;
1232            };
1233
1234            for param in method.params.iter() {
1235                if let Some(hint) = &param.type_hint {
1236                    check_type_hint_classes(
1237                        hint,
1238                        &self.codebase,
1239                        file,
1240                        source,
1241                        source_map,
1242                        all_issues,
1243                    );
1244                }
1245            }
1246            if let Some(hint) = &method.return_type {
1247                check_type_hint_classes(hint, &self.codebase, file, source, source_map, all_issues);
1248            }
1249
1250            let Some(body) = &method.body else { continue };
1251
1252            let (params, return_ty) = self
1253                .codebase
1254                .get_method(fqcn, method.name)
1255                .as_deref()
1256                .map(|m| (m.params.clone(), m.return_type.clone()))
1257                .unwrap_or_default();
1258
1259            let is_ctor = method.name == "__construct";
1260            let mut ctx = Context::for_method(
1261                &params,
1262                return_ty,
1263                Some(Arc::from(fqcn)),
1264                parent_fqcn.clone(),
1265                Some(Arc::from(fqcn)),
1266                false,
1267                is_ctor,
1268                method.is_static,
1269            );
1270
1271            let mut buf = IssueBuffer::new();
1272            let mut sa = StatementsAnalyzer::new(
1273                &self.codebase,
1274                file.clone(),
1275                source,
1276                source_map,
1277                &mut buf,
1278                all_symbols,
1279                self.resolved_php_version(),
1280            );
1281            sa.analyze_stmts(body, &mut ctx);
1282            let inferred = merge_return_types(&sa.return_types);
1283            drop(sa);
1284
1285            // Capture TypeEnv for this method scope
1286            type_envs.insert(
1287                crate::type_env::ScopeId::Method {
1288                    class: Arc::from(fqcn),
1289                    method: Arc::from(method.name),
1290                },
1291                crate::type_env::TypeEnv::new(ctx.vars.clone()),
1292            );
1293
1294            emit_unused_params(&params, &ctx, method.name, file, all_issues);
1295            emit_unused_variables(&ctx, file, all_issues);
1296            all_issues.extend(buf.into_issues());
1297
1298            if let Some(mut cls) = self.codebase.classes.get_mut(fqcn) {
1299                if let Some(m) = cls.own_methods.get_mut(method.name) {
1300                    Arc::make_mut(m).inferred_return_type = Some(inferred);
1301                }
1302            }
1303        }
1304    }
1305
1306    /// Discover all `.php` files under a directory, recursively.
1307    pub fn discover_files(root: &Path) -> Vec<PathBuf> {
1308        if root.is_file() {
1309            return vec![root.to_path_buf()];
1310        }
1311        let mut files = Vec::new();
1312        collect_php_files(root, &mut files);
1313        files
1314    }
1315
1316    /// Pass 1 only: collect type definitions from `paths` into the codebase without
1317    /// analyzing method bodies or emitting issues. Used to load vendor types.
1318    pub fn collect_types_only(&self, paths: &[PathBuf]) {
1319        paths.par_iter().for_each(|path| {
1320            let Ok(src) = std::fs::read_to_string(path) else {
1321                return;
1322            };
1323            let file: Arc<str> = Arc::from(path.to_string_lossy().as_ref());
1324            let arena = bumpalo::Bump::new();
1325            let result = php_rs_parser::parse(&arena, &src);
1326            let collector =
1327                DefinitionCollector::new(&self.codebase, file, &src, &result.source_map);
1328            // Ignore any issues emitted during vendor collection
1329            let _ = collector.collect(&result.program);
1330        });
1331    }
1332
1333    /// Check type hints in enum methods for undefined classes.
1334    #[allow(clippy::too_many_arguments)]
1335    fn analyze_enum_decl<'arena, 'src>(
1336        &self,
1337        decl: &php_ast::ast::EnumDecl<'arena, 'src>,
1338        file: &Arc<str>,
1339        source: &str,
1340        source_map: &php_rs_parser::source_map::SourceMap,
1341        all_issues: &mut Vec<mir_issues::Issue>,
1342    ) {
1343        use php_ast::ast::EnumMemberKind;
1344        for iface in decl.implements.iter() {
1345            check_name_class(iface, &self.codebase, file, source, source_map, all_issues);
1346        }
1347        for member in decl.members.iter() {
1348            let EnumMemberKind::Method(method) = &member.kind else {
1349                continue;
1350            };
1351            for param in method.params.iter() {
1352                if let Some(hint) = &param.type_hint {
1353                    check_type_hint_classes(
1354                        hint,
1355                        &self.codebase,
1356                        file,
1357                        source,
1358                        source_map,
1359                        all_issues,
1360                    );
1361                }
1362            }
1363            if let Some(hint) = &method.return_type {
1364                check_type_hint_classes(hint, &self.codebase, file, source, source_map, all_issues);
1365            }
1366        }
1367    }
1368
1369    /// Check extends clauses in interface declarations for undefined types.
1370    fn analyze_interface_decl<'arena, 'src>(
1371        &self,
1372        decl: &php_ast::ast::InterfaceDecl<'arena, 'src>,
1373        file: &Arc<str>,
1374        source: &str,
1375        source_map: &php_rs_parser::source_map::SourceMap,
1376        all_issues: &mut Vec<mir_issues::Issue>,
1377    ) {
1378        use php_ast::ast::ClassMemberKind;
1379        for parent in decl.extends.iter() {
1380            check_name_class(parent, &self.codebase, file, source, source_map, all_issues);
1381        }
1382        for member in decl.members.iter() {
1383            let ClassMemberKind::Method(method) = &member.kind else {
1384                continue;
1385            };
1386            for param in method.params.iter() {
1387                if let Some(hint) = &param.type_hint {
1388                    check_type_hint_classes(
1389                        hint,
1390                        &self.codebase,
1391                        file,
1392                        source,
1393                        source_map,
1394                        all_issues,
1395                    );
1396                }
1397            }
1398            if let Some(hint) = &method.return_type {
1399                check_type_hint_classes(hint, &self.codebase, file, source, source_map, all_issues);
1400            }
1401        }
1402    }
1403}
1404
1405impl Default for ProjectAnalyzer {
1406    fn default() -> Self {
1407        Self::new()
1408    }
1409}
1410
1411// ---------------------------------------------------------------------------
1412// Offset to char-count column conversion
1413// ---------------------------------------------------------------------------
1414
1415/// Convert a byte offset to a Unicode char-count column on a given line.
1416/// Returns (line, col) where col is a 0-based Unicode code-point count.
1417fn offset_to_line_col(
1418    source: &str,
1419    offset: u32,
1420    source_map: &php_rs_parser::source_map::SourceMap,
1421) -> (u32, u16) {
1422    let lc = source_map.offset_to_line_col(offset);
1423    let line = lc.line + 1;
1424
1425    let byte_offset = offset as usize;
1426    let line_start_byte = if byte_offset == 0 {
1427        0
1428    } else {
1429        source[..byte_offset]
1430            .rfind('\n')
1431            .map(|p| p + 1)
1432            .unwrap_or(0)
1433    };
1434
1435    let col = source[line_start_byte..byte_offset].chars().count() as u16;
1436
1437    (line, col)
1438}
1439
1440// ---------------------------------------------------------------------------
1441// Type-hint class existence checker
1442// ---------------------------------------------------------------------------
1443
1444/// Walk a `TypeHint` AST node and emit `UndefinedClass` for any named class
1445/// that does not exist in the codebase.  Skips PHP built-in type keywords.
1446fn check_type_hint_classes<'arena, 'src>(
1447    hint: &php_ast::ast::TypeHint<'arena, 'src>,
1448    codebase: &Codebase,
1449    file: &Arc<str>,
1450    source: &str,
1451    source_map: &php_rs_parser::source_map::SourceMap,
1452    issues: &mut Vec<mir_issues::Issue>,
1453) {
1454    use php_ast::ast::TypeHintKind;
1455    match &hint.kind {
1456        TypeHintKind::Named(name) => {
1457            let name_str = crate::parser::name_to_string(name);
1458            // Skip built-in pseudo-types that are not real classes.
1459            if is_pseudo_type(&name_str) {
1460                return;
1461            }
1462            let resolved = codebase.resolve_class_name(file.as_ref(), &name_str);
1463            if !codebase.type_exists(&resolved) {
1464                let (line, col_start) = offset_to_line_col(source, hint.span.start, source_map);
1465                let col_end = if hint.span.start < hint.span.end {
1466                    let (_end_line, end_col) =
1467                        offset_to_line_col(source, hint.span.end, source_map);
1468                    end_col
1469                } else {
1470                    col_start
1471                };
1472                issues.push(
1473                    mir_issues::Issue::new(
1474                        mir_issues::IssueKind::UndefinedClass { name: resolved },
1475                        mir_issues::Location {
1476                            file: file.clone(),
1477                            line,
1478                            col_start,
1479                            col_end: col_end.max(col_start + 1),
1480                        },
1481                    )
1482                    .with_snippet(crate::parser::span_text(source, hint.span).unwrap_or_default()),
1483                );
1484            }
1485        }
1486        TypeHintKind::Nullable(inner) => {
1487            check_type_hint_classes(inner, codebase, file, source, source_map, issues);
1488        }
1489        TypeHintKind::Union(parts) | TypeHintKind::Intersection(parts) => {
1490            for part in parts.iter() {
1491                check_type_hint_classes(part, codebase, file, source, source_map, issues);
1492            }
1493        }
1494        TypeHintKind::Keyword(_, _) => {} // built-in keyword, always valid
1495    }
1496}
1497
1498/// Check a single `Name` AST node from an `extends` or `implements` clause and
1499/// emit `UndefinedClass` if the named type is not in the codebase.
1500fn check_name_class(
1501    name: &php_ast::ast::Name<'_, '_>,
1502    codebase: &Codebase,
1503    file: &Arc<str>,
1504    source: &str,
1505    source_map: &php_rs_parser::source_map::SourceMap,
1506    issues: &mut Vec<mir_issues::Issue>,
1507) {
1508    let name_str = crate::parser::name_to_string(name);
1509    let resolved = codebase.resolve_class_name(file.as_ref(), &name_str);
1510    if !codebase.type_exists(&resolved) {
1511        let span = name.span();
1512        let (line, col_start) = offset_to_line_col(source, span.start, source_map);
1513        let (_, col_end) = offset_to_line_col(source, span.end, source_map);
1514        issues.push(
1515            mir_issues::Issue::new(
1516                mir_issues::IssueKind::UndefinedClass { name: resolved },
1517                mir_issues::Location {
1518                    file: file.clone(),
1519                    line,
1520                    col_start,
1521                    col_end: col_end.max(col_start + 1),
1522                },
1523            )
1524            .with_snippet(crate::parser::span_text(source, span).unwrap_or_default()),
1525        );
1526    }
1527}
1528
1529/// Returns true for names that are PHP pseudo-types / special identifiers, not
1530/// real classes.
1531fn is_pseudo_type(name: &str) -> bool {
1532    matches!(
1533        name.to_lowercase().as_str(),
1534        "self"
1535            | "static"
1536            | "parent"
1537            | "null"
1538            | "true"
1539            | "false"
1540            | "never"
1541            | "void"
1542            | "mixed"
1543            | "object"
1544            | "callable"
1545            | "iterable"
1546    )
1547}
1548
1549/// Magic methods whose parameters are passed by the PHP runtime, not user call sites.
1550const MAGIC_METHODS_WITH_RUNTIME_PARAMS: &[&str] = &[
1551    "__get",
1552    "__set",
1553    "__call",
1554    "__callStatic",
1555    "__isset",
1556    "__unset",
1557];
1558
1559/// Emit `UnusedParam` issues for params that were never read in `ctx`.
1560/// Skips magic methods whose parameters are passed by the PHP runtime.
1561fn emit_unused_params(
1562    params: &[mir_codebase::FnParam],
1563    ctx: &crate::context::Context,
1564    method_name: &str,
1565    file: &Arc<str>,
1566    issues: &mut Vec<mir_issues::Issue>,
1567) {
1568    if MAGIC_METHODS_WITH_RUNTIME_PARAMS.contains(&method_name) {
1569        return;
1570    }
1571    for p in params {
1572        let name = p.name.as_ref().trim_start_matches('$');
1573        if !ctx.read_vars.contains(name) {
1574            issues.push(
1575                mir_issues::Issue::new(
1576                    mir_issues::IssueKind::UnusedParam {
1577                        name: name.to_string(),
1578                    },
1579                    mir_issues::Location {
1580                        file: file.clone(),
1581                        line: 1,
1582                        col_start: 0,
1583                        col_end: 0,
1584                    },
1585                )
1586                .with_snippet(format!("${}", name)),
1587            );
1588        }
1589    }
1590}
1591
1592fn emit_unused_variables(
1593    ctx: &crate::context::Context,
1594    file: &Arc<str>,
1595    issues: &mut Vec<mir_issues::Issue>,
1596) {
1597    // Superglobals are always "used" — skip them
1598    const SUPERGLOBALS: &[&str] = &[
1599        "_SERVER", "_GET", "_POST", "_REQUEST", "_SESSION", "_COOKIE", "_FILES", "_ENV", "GLOBALS",
1600    ];
1601    for name in &ctx.assigned_vars {
1602        if ctx.param_names.contains(name) {
1603            continue;
1604        }
1605        if SUPERGLOBALS.contains(&name.as_str()) {
1606            continue;
1607        }
1608        // $this is implicitly used whenever the method accesses properties or
1609        // calls other methods — never report it as unused.
1610        if name == "this" {
1611            continue;
1612        }
1613        if name.starts_with('_') {
1614            continue;
1615        }
1616        if !ctx.read_vars.contains(name) {
1617            issues.push(mir_issues::Issue::new(
1618                mir_issues::IssueKind::UnusedVariable { name: name.clone() },
1619                mir_issues::Location {
1620                    file: file.clone(),
1621                    line: 1,
1622                    col_start: 0,
1623                    col_end: 0,
1624                },
1625            ));
1626        }
1627    }
1628}
1629
1630/// Merge a list of return types into a single `Union`.
1631/// Returns `void` if the list is empty.
1632pub fn merge_return_types(return_types: &[Union]) -> Union {
1633    if return_types.is_empty() {
1634        return Union::single(mir_types::Atomic::TVoid);
1635    }
1636    return_types
1637        .iter()
1638        .fold(Union::empty(), |acc, t| Union::merge(&acc, t))
1639}
1640
1641pub(crate) fn collect_php_files(dir: &Path, out: &mut Vec<PathBuf>) {
1642    if let Ok(entries) = std::fs::read_dir(dir) {
1643        for entry in entries.flatten() {
1644            // Skip symlinks — they can form cycles (e.g. .pnpm-store)
1645            if entry.file_type().map(|ft| ft.is_symlink()).unwrap_or(false) {
1646                continue;
1647            }
1648            let path = entry.path();
1649            if path.is_dir() {
1650                let name = path.file_name().and_then(|n| n.to_str()).unwrap_or("");
1651                if matches!(
1652                    name,
1653                    "vendor" | ".git" | "node_modules" | ".cache" | ".pnpm-store"
1654                ) {
1655                    continue;
1656                }
1657                collect_php_files(&path, out);
1658            } else if path.extension().and_then(|e| e.to_str()) == Some("php") {
1659                out.push(path);
1660            }
1661        }
1662    }
1663}
1664
1665// ---------------------------------------------------------------------------
1666// AnalysisResult
1667// ---------------------------------------------------------------------------
1668
1669// ---------------------------------------------------------------------------
1670// build_reverse_deps
1671// ---------------------------------------------------------------------------
1672
1673/// Build a reverse dependency graph from the codebase after Pass 1.
1674///
1675/// Returns a map: `defining_file → {files that depend on it}`.
1676///
1677/// Dependency edges captured (all derivable from Pass 1 data):
1678/// - `use` imports  (`file_imports`)
1679/// - `extends` / `implements` / trait `use` from `ClassStorage`
1680fn build_reverse_deps(codebase: &Codebase) -> HashMap<String, HashSet<String>> {
1681    let mut reverse: HashMap<String, HashSet<String>> = HashMap::new();
1682
1683    // Helper: record edge "defining_file → dependent_file"
1684    let mut add_edge = |symbol: &str, dependent_file: &str| {
1685        if let Some(defining_file) = codebase.symbol_to_file.get(symbol) {
1686            let def = defining_file.as_ref().to_string();
1687            if def != dependent_file {
1688                reverse
1689                    .entry(def)
1690                    .or_default()
1691                    .insert(dependent_file.to_string());
1692            }
1693        }
1694    };
1695
1696    // use-import edges
1697    for entry in codebase.file_imports.iter() {
1698        let file = entry.key().as_ref().to_string();
1699        for fqcn in entry.value().values() {
1700            add_edge(fqcn, &file);
1701        }
1702    }
1703
1704    // extends / implements / trait edges from ClassStorage
1705    for entry in codebase.classes.iter() {
1706        let defining = {
1707            let fqcn = entry.key().as_ref();
1708            codebase
1709                .symbol_to_file
1710                .get(fqcn)
1711                .map(|f| f.as_ref().to_string())
1712        };
1713        let Some(file) = defining else { continue };
1714
1715        let cls = entry.value();
1716        if let Some(ref parent) = cls.parent {
1717            add_edge(parent.as_ref(), &file);
1718        }
1719        for iface in &cls.interfaces {
1720            add_edge(iface.as_ref(), &file);
1721        }
1722        for tr in &cls.traits {
1723            add_edge(tr.as_ref(), &file);
1724        }
1725    }
1726
1727    reverse
1728}
1729
1730// ---------------------------------------------------------------------------
1731
1732/// Extract the reference locations recorded for `file` from the codebase into
1733/// a flat `Vec<(symbol_key, start, end)>` suitable for caching.
1734fn extract_reference_locations(codebase: &Codebase, file: &Arc<str>) -> Vec<(String, u32, u32)> {
1735    codebase
1736        .extract_file_reference_locations(file.as_ref())
1737        .into_iter()
1738        .map(|(sym, start, end)| (sym.to_string(), start, end))
1739        .collect()
1740}
1741
1742// ---------------------------------------------------------------------------
1743
1744pub struct AnalysisResult {
1745    pub issues: Vec<Issue>,
1746    pub type_envs: std::collections::HashMap<crate::type_env::ScopeId, crate::type_env::TypeEnv>,
1747    /// Per-expression resolved symbols from Pass 2, sorted by file path.
1748    pub symbols: Vec<crate::symbol::ResolvedSymbol>,
1749    /// Maps each file path to the contiguous range within `symbols` that belongs
1750    /// to it. Built once after analysis; allows `symbol_at` to scan only the
1751    /// relevant file's slice rather than the entire codebase-wide vector.
1752    symbols_by_file: HashMap<Arc<str>, std::ops::Range<usize>>,
1753}
1754
1755impl AnalysisResult {
1756    fn build(
1757        issues: Vec<Issue>,
1758        type_envs: std::collections::HashMap<crate::type_env::ScopeId, crate::type_env::TypeEnv>,
1759        mut symbols: Vec<crate::symbol::ResolvedSymbol>,
1760    ) -> Self {
1761        // Sort by file so each file's symbols form a contiguous slice.
1762        symbols.sort_unstable_by(|a, b| a.file.as_ref().cmp(b.file.as_ref()));
1763        let mut symbols_by_file: HashMap<Arc<str>, std::ops::Range<usize>> = HashMap::new();
1764        let mut i = 0;
1765        while i < symbols.len() {
1766            let file = Arc::clone(&symbols[i].file);
1767            let start = i;
1768            while i < symbols.len() && symbols[i].file == file {
1769                i += 1;
1770            }
1771            symbols_by_file.insert(file, start..i);
1772        }
1773        Self {
1774            issues,
1775            type_envs,
1776            symbols,
1777            symbols_by_file,
1778        }
1779    }
1780}
1781
1782impl AnalysisResult {
1783    pub fn error_count(&self) -> usize {
1784        self.issues
1785            .iter()
1786            .filter(|i| i.severity == mir_issues::Severity::Error)
1787            .count()
1788    }
1789
1790    pub fn warning_count(&self) -> usize {
1791        self.issues
1792            .iter()
1793            .filter(|i| i.severity == mir_issues::Severity::Warning)
1794            .count()
1795    }
1796
1797    /// Group issues by source file.
1798    ///
1799    /// Returns a map from absolute file path to the slice of issues that belong
1800    /// to that file. Useful for LSP `textDocument/publishDiagnostics`, which
1801    /// pushes diagnostics per document.
1802    pub fn issues_by_file(&self) -> HashMap<std::sync::Arc<str>, Vec<&Issue>> {
1803        let mut map: HashMap<std::sync::Arc<str>, Vec<&Issue>> = HashMap::new();
1804        for issue in &self.issues {
1805            map.entry(issue.location.file.clone())
1806                .or_default()
1807                .push(issue);
1808        }
1809        map
1810    }
1811
1812    /// Return the innermost resolved symbol whose span contains `byte_offset`
1813    /// in `file`, or `None` if no symbol was recorded at that position.
1814    ///
1815    /// When multiple symbols overlap (e.g. a method call whose span contains a
1816    /// property access span), the one with the smallest span is returned so the
1817    /// caller gets the most specific symbol at the cursor.
1818    ///
1819    /// Typical use: LSP `textDocument/references` and `textDocument/hover`.
1820    pub fn symbol_at(
1821        &self,
1822        file: &str,
1823        byte_offset: u32,
1824    ) -> Option<&crate::symbol::ResolvedSymbol> {
1825        let range = self.symbols_by_file.get(file)?;
1826        self.symbols[range.clone()]
1827            .iter()
1828            .filter(|s| s.span.start <= byte_offset && byte_offset < s.span.end)
1829            .min_by_key(|s| s.span.end - s.span.start)
1830    }
1831}