Skip to main content

mir_analyzer/
project.rs

1/// Project-level orchestration: file discovery, pass 1, pass 2.
2use std::path::{Path, PathBuf};
3use std::sync::Arc;
4
5use rayon::prelude::*;
6
7use std::collections::{HashMap, HashSet};
8
9use crate::cache::{hash_content, AnalysisCache};
10use crate::php_version::PhpVersion;
11use mir_codebase::Codebase;
12use mir_issues::Issue;
13use mir_types::Union;
14
15use crate::collector::DefinitionCollector;
16
17// ---------------------------------------------------------------------------
18// ProjectAnalyzer
19// ---------------------------------------------------------------------------
20
21pub struct ProjectAnalyzer {
22    pub codebase: Arc<Codebase>,
23    /// Optional cache — when `Some`, Pass 2 results are read/written per file.
24    pub cache: Option<AnalysisCache>,
25    /// Called once after each file completes Pass 2 (used for progress reporting).
26    pub on_file_done: Option<Arc<dyn Fn() + Send + Sync>>,
27    /// PSR-4 autoloader mapping from composer.json, if available.
28    pub psr4: Option<Arc<crate::composer::Psr4Map>>,
29    /// Whether stubs have already been loaded (to avoid double-loading).
30    stubs_loaded: std::sync::atomic::AtomicBool,
31    /// When true, run dead code detection at the end of analysis.
32    pub find_dead_code: bool,
33    /// Target PHP language version. Used for version-conditional decisions
34    /// such as stub filtering.
35    pub php_version: PhpVersion,
36}
37
38impl ProjectAnalyzer {
39    pub fn new() -> Self {
40        Self {
41            codebase: Arc::new(Codebase::new()),
42            cache: None,
43            on_file_done: None,
44            psr4: None,
45            stubs_loaded: std::sync::atomic::AtomicBool::new(false),
46            find_dead_code: false,
47            php_version: PhpVersion::default(),
48        }
49    }
50
51    /// Create a `ProjectAnalyzer` with a disk-backed cache stored under `cache_dir`.
52    pub fn with_cache(cache_dir: &Path) -> Self {
53        Self {
54            codebase: Arc::new(Codebase::new()),
55            cache: Some(AnalysisCache::open(cache_dir)),
56            on_file_done: None,
57            psr4: None,
58            stubs_loaded: std::sync::atomic::AtomicBool::new(false),
59            find_dead_code: false,
60            php_version: PhpVersion::default(),
61        }
62    }
63
64    /// Create a `ProjectAnalyzer` from a project root containing `composer.json`.
65    /// Returns the analyzer (with `psr4` set) and the `Psr4Map` so callers can
66    /// call `map.project_files()` / `map.vendor_files()`.
67    pub fn from_composer(
68        root: &Path,
69    ) -> Result<(Self, crate::composer::Psr4Map), crate::composer::ComposerError> {
70        let map = crate::composer::Psr4Map::from_composer(root)?;
71        let psr4 = Arc::new(map.clone());
72        let analyzer = Self {
73            codebase: Arc::new(Codebase::new()),
74            cache: None,
75            on_file_done: None,
76            psr4: Some(psr4),
77            stubs_loaded: std::sync::atomic::AtomicBool::new(false),
78            find_dead_code: false,
79            php_version: PhpVersion::default(),
80        };
81        Ok((analyzer, map))
82    }
83
84    /// Set the target PHP version.
85    pub fn with_php_version(mut self, version: PhpVersion) -> Self {
86        self.php_version = version;
87        self
88    }
89
90    /// Expose codebase for external use (e.g., pre-loading stubs from CLI).
91    pub fn codebase(&self) -> &Arc<Codebase> {
92        &self.codebase
93    }
94
95    /// Load PHP built-in stubs. Called automatically by `analyze` if not done yet.
96    pub fn load_stubs(&self) {
97        if !self
98            .stubs_loaded
99            .swap(true, std::sync::atomic::Ordering::SeqCst)
100        {
101            crate::stubs::load_stubs(&self.codebase);
102        }
103    }
104
105    /// Run the full analysis pipeline on a set of file paths.
106    pub fn analyze(&self, paths: &[PathBuf]) -> AnalysisResult {
107        let mut all_issues = Vec::new();
108        let mut parse_errors = Vec::new();
109
110        // ---- Load PHP built-in stubs (before Pass 1 so user code can override)
111        self.load_stubs();
112
113        // ---- Pass 1: read files in parallel ----------------------------------
114        let file_data: Vec<(Arc<str>, String)> = paths
115            .par_iter()
116            .filter_map(|path| match std::fs::read_to_string(path) {
117                Ok(src) => Some((Arc::from(path.to_string_lossy().as_ref()), src)),
118                Err(e) => {
119                    eprintln!("Cannot read {}: {}", path.display(), e);
120                    None
121                }
122            })
123            .collect();
124
125        // ---- Pre-Pass-2 invalidation: evict dependents of changed files ------
126        // Uses the reverse dep graph persisted from the previous run. Hashes are
127        // recomputed inline inside Pass 2; avoiding a shared HashMap + global
128        // sync barrier keeps Pass 2's parallel pipeline unblocked.
129        if let Some(cache) = &self.cache {
130            let changed: Vec<String> = file_data
131                .par_iter()
132                .filter_map(|(f, src)| {
133                    let h = hash_content(src);
134                    if cache.get(f, &h).is_none() {
135                        Some(f.to_string())
136                    } else {
137                        None
138                    }
139                })
140                .collect();
141            if !changed.is_empty() {
142                cache.evict_with_dependents(&changed);
143            }
144        }
145
146        // ---- Pass 1: combined pre-index + definition collection (parallel) -----
147        // Parse each file once; both the FQCN/namespace/import index and the full
148        // definition collection run in the same rayon closure, eliminating the
149        // second sequential parse of every file. DashMap handles concurrent writes.
150        let pass1_results: Vec<(Vec<Issue>, Vec<Issue>)> = file_data
151            .par_iter()
152            .map(|(file, src)| {
153                use php_ast::ast::StmtKind;
154                let arena = bumpalo::Bump::new();
155                let result = php_rs_parser::parse(&arena, src);
156
157                // --- Pre-index: build FQCN index, file imports, and namespaces ---
158                let mut current_namespace: Option<String> = None;
159                let mut imports: std::collections::HashMap<String, String> =
160                    std::collections::HashMap::new();
161                let mut file_ns_set = false;
162
163                // Index a flat list of stmts under a given namespace prefix.
164                let index_stmts =
165                    |stmts: &[php_ast::ast::Stmt<'_, '_>],
166                     ns: Option<&str>,
167                     imports: &mut std::collections::HashMap<String, String>| {
168                        for stmt in stmts.iter() {
169                            match &stmt.kind {
170                                StmtKind::Use(use_decl) => {
171                                    for item in use_decl.uses.iter() {
172                                        let full_name = crate::parser::name_to_string(&item.name)
173                                            .trim_start_matches('\\')
174                                            .to_string();
175                                        let alias = item.alias.unwrap_or_else(|| {
176                                            full_name.rsplit('\\').next().unwrap_or(&full_name)
177                                        });
178                                        imports.insert(alias.to_string(), full_name);
179                                    }
180                                }
181                                StmtKind::Class(decl) => {
182                                    if let Some(n) = decl.name {
183                                        let fqcn = match ns {
184                                            Some(ns) => format!("{}\\{}", ns, n),
185                                            None => n.to_string(),
186                                        };
187                                        self.codebase
188                                            .known_symbols
189                                            .insert(Arc::from(fqcn.as_str()));
190                                    }
191                                }
192                                StmtKind::Interface(decl) => {
193                                    let fqcn = match ns {
194                                        Some(ns) => format!("{}\\{}", ns, decl.name),
195                                        None => decl.name.to_string(),
196                                    };
197                                    self.codebase.known_symbols.insert(Arc::from(fqcn.as_str()));
198                                }
199                                StmtKind::Trait(decl) => {
200                                    let fqcn = match ns {
201                                        Some(ns) => format!("{}\\{}", ns, decl.name),
202                                        None => decl.name.to_string(),
203                                    };
204                                    self.codebase.known_symbols.insert(Arc::from(fqcn.as_str()));
205                                }
206                                StmtKind::Enum(decl) => {
207                                    let fqcn = match ns {
208                                        Some(ns) => format!("{}\\{}", ns, decl.name),
209                                        None => decl.name.to_string(),
210                                    };
211                                    self.codebase.known_symbols.insert(Arc::from(fqcn.as_str()));
212                                }
213                                StmtKind::Function(decl) => {
214                                    let fqn = match ns {
215                                        Some(ns) => format!("{}\\{}", ns, decl.name),
216                                        None => decl.name.to_string(),
217                                    };
218                                    self.codebase.known_symbols.insert(Arc::from(fqn.as_str()));
219                                }
220                                _ => {}
221                            }
222                        }
223                    };
224
225                for stmt in result.program.stmts.iter() {
226                    match &stmt.kind {
227                        StmtKind::Namespace(ns) => {
228                            current_namespace =
229                                ns.name.as_ref().map(|n| crate::parser::name_to_string(n));
230                            if !file_ns_set {
231                                if let Some(ref ns_str) = current_namespace {
232                                    self.codebase
233                                        .file_namespaces
234                                        .insert(file.clone(), ns_str.clone());
235                                    file_ns_set = true;
236                                }
237                            }
238                            // Bracketed namespace: walk inner stmts for Use/Class/etc.
239                            if let php_ast::ast::NamespaceBody::Braced(inner_stmts) = &ns.body {
240                                index_stmts(
241                                    inner_stmts,
242                                    current_namespace.as_deref(),
243                                    &mut imports,
244                                );
245                            }
246                        }
247                        _ => index_stmts(
248                            std::slice::from_ref(stmt),
249                            current_namespace.as_deref(),
250                            &mut imports,
251                        ),
252                    }
253                }
254
255                if !imports.is_empty() {
256                    self.codebase.file_imports.insert(file.clone(), imports);
257                }
258
259                // --- Parse errors ---
260                let file_parse_errors: Vec<Issue> = result
261                    .errors
262                    .iter()
263                    .map(|err| {
264                        Issue::new(
265                            mir_issues::IssueKind::ParseError {
266                                message: err.to_string(),
267                            },
268                            mir_issues::Location {
269                                file: file.clone(),
270                                line: 1,
271                                col_start: 0,
272                                col_end: 0,
273                            },
274                        )
275                    })
276                    .collect();
277
278                // --- Definition collection ---
279                let collector =
280                    DefinitionCollector::new(&self.codebase, file.clone(), src, &result.source_map);
281                let issues = collector.collect(&result.program);
282
283                (file_parse_errors, issues)
284            })
285            .collect();
286
287        for (file_parse_errors, issues) in pass1_results {
288            parse_errors.extend(file_parse_errors);
289            all_issues.extend(issues);
290        }
291
292        all_issues.extend(parse_errors);
293
294        // ---- Finalize codebase (resolve inheritance, build dispatch tables) --
295        self.codebase.finalize();
296
297        // ---- Lazy-load unknown classes via PSR-4 (issue #50) ----------------
298        if let Some(psr4) = &self.psr4 {
299            self.lazy_load_missing_classes(psr4.clone(), &mut all_issues);
300        }
301
302        // ---- Build reverse dep graph and persist it for the next run ---------
303        if let Some(cache) = &self.cache {
304            let rev = build_reverse_deps(&self.codebase);
305            cache.set_reverse_deps(rev);
306        }
307
308        // ---- Class-level checks (M11) ----------------------------------------
309        let analyzed_file_set: std::collections::HashSet<std::sync::Arc<str>> =
310            file_data.iter().map(|(f, _)| f.clone()).collect();
311        let class_issues =
312            crate::class::ClassAnalyzer::with_files(&self.codebase, analyzed_file_set, &file_data)
313                .analyze_all();
314        all_issues.extend(class_issues);
315
316        // ---- Pass 2: analyze function/method bodies in parallel (M14) --------
317        // Each file is analyzed independently; arena + parse happen inside the
318        // rayon closure so there is no cross-thread borrow.
319        // When a cache is present, files whose content hash matches a stored
320        // entry skip re-analysis entirely (M17).
321        let pass2_results: Vec<(Vec<Issue>, Vec<crate::symbol::ResolvedSymbol>)> = file_data
322            .par_iter()
323            .map(|(file, src)| {
324                // Cache lookup
325                let result = if let Some(cache) = &self.cache {
326                    let h = hash_content(src);
327                    if let Some((cached_issues, ref_locs)) = cache.get(file, &h) {
328                        // Hit — replay reference locations so symbol_reference_locations
329                        // is populated without re-running analyze_bodies.
330                        self.codebase
331                            .replay_reference_locations(file.clone(), &ref_locs);
332                        (cached_issues, Vec::new())
333                    } else {
334                        // Miss — analyze and store
335                        let arena = bumpalo::Bump::new();
336                        let parsed = php_rs_parser::parse(&arena, src);
337                        let (issues, symbols) = self.analyze_bodies(
338                            &parsed.program,
339                            file.clone(),
340                            src,
341                            &parsed.source_map,
342                        );
343                        let ref_locs = extract_reference_locations(&self.codebase, file);
344                        cache.put(file, h, issues.clone(), ref_locs);
345                        (issues, symbols)
346                    }
347                } else {
348                    let arena = bumpalo::Bump::new();
349                    let parsed = php_rs_parser::parse(&arena, src);
350                    self.analyze_bodies(&parsed.program, file.clone(), src, &parsed.source_map)
351                };
352                if let Some(cb) = &self.on_file_done {
353                    cb();
354                }
355                result
356            })
357            .collect();
358
359        let mut all_symbols = Vec::new();
360        for (issues, symbols) in pass2_results {
361            all_issues.extend(issues);
362            all_symbols.extend(symbols);
363        }
364
365        // Persist cache hits/misses to disk
366        if let Some(cache) = &self.cache {
367            cache.flush();
368        }
369
370        // ---- Compact the reference index ------------------------------------
371        // Convert build-phase DashMaps into a CSR structure, freeing the
372        // per-entry HashMap/HashSet overhead accumulated during Pass 2.
373        self.codebase.compact_reference_index();
374
375        // ---- Dead-code detection (M18) --------------------------------------
376        if self.find_dead_code {
377            let dead_code_issues =
378                crate::dead_code::DeadCodeAnalyzer::new(&self.codebase).analyze();
379            all_issues.extend(dead_code_issues);
380        }
381
382        AnalysisResult::build(all_issues, std::collections::HashMap::new(), all_symbols)
383    }
384
385    /// Lazily load class definitions for referenced-but-unknown FQCNs via PSR-4.
386    ///
387    /// After Pass 1 and `codebase.finalize()`, some classes referenced as parents
388    /// or interfaces may not be in the codebase (they weren't in the initial file
389    /// list). This method iterates up to `max_depth` times, each time resolving
390    /// unknown parent/interface FQCNs via the PSR-4 map, running Pass 1 on those
391    /// files, and re-finalizing the codebase. The loop stops when no new files
392    /// are discovered.
393    fn lazy_load_missing_classes(
394        &self,
395        psr4: Arc<crate::composer::Psr4Map>,
396        all_issues: &mut Vec<Issue>,
397    ) {
398        use std::collections::HashSet;
399
400        let max_depth = 10; // prevent infinite chains
401        let mut loaded: HashSet<String> = HashSet::new();
402
403        for _ in 0..max_depth {
404            // Collect all referenced FQCNs that aren't in the codebase
405            let mut to_load: Vec<(String, PathBuf)> = Vec::new();
406
407            for entry in self.codebase.classes.iter() {
408                let cls = entry.value();
409
410                // Check parent class
411                if let Some(parent) = &cls.parent {
412                    let fqcn = parent.as_ref();
413                    if !self.codebase.classes.contains_key(fqcn) && !loaded.contains(fqcn) {
414                        if let Some(path) = psr4.resolve(fqcn) {
415                            to_load.push((fqcn.to_string(), path));
416                        }
417                    }
418                }
419
420                // Check interfaces
421                for iface in &cls.interfaces {
422                    let fqcn = iface.as_ref();
423                    if !self.codebase.classes.contains_key(fqcn)
424                        && !self.codebase.interfaces.contains_key(fqcn)
425                        && !loaded.contains(fqcn)
426                    {
427                        if let Some(path) = psr4.resolve(fqcn) {
428                            to_load.push((fqcn.to_string(), path));
429                        }
430                    }
431                }
432            }
433
434            if to_load.is_empty() {
435                break;
436            }
437
438            // Load each discovered file (Pass 1 only)
439            for (fqcn, path) in to_load {
440                loaded.insert(fqcn);
441                if let Ok(src) = std::fs::read_to_string(&path) {
442                    let file: Arc<str> = Arc::from(path.to_string_lossy().as_ref());
443                    let arena = bumpalo::Bump::new();
444                    let result = php_rs_parser::parse(&arena, &src);
445                    let collector = crate::collector::DefinitionCollector::new(
446                        &self.codebase,
447                        file,
448                        &src,
449                        &result.source_map,
450                    );
451                    let issues = collector.collect(&result.program);
452                    all_issues.extend(issues);
453                }
454            }
455
456            // Re-finalize to include newly loaded classes in the inheritance graph.
457            // Must reset the flag first so finalize() isn't a no-op.
458            self.codebase.invalidate_finalization();
459            self.codebase.finalize();
460        }
461    }
462
463    /// Re-analyze a single file within the existing codebase.
464    ///
465    /// This is the incremental analysis API for LSP:
466    /// 1. Removes old definitions from this file
467    /// 2. Re-runs Pass 1 (definition collection) on the new content
468    /// 3. Re-finalizes the codebase (rebuilds inheritance)
469    /// 4. Re-runs Pass 2 (body analysis) on this file
470    /// 5. Returns the analysis result for this file only
471    pub fn re_analyze_file(&self, file_path: &str, new_content: &str) -> AnalysisResult {
472        // Fast path: content unchanged and cache has a valid entry — skip full re-analysis.
473        if let Some(cache) = &self.cache {
474            let h = hash_content(new_content);
475            if let Some((issues, ref_locs)) = cache.get(file_path, &h) {
476                let file: Arc<str> = Arc::from(file_path);
477                self.codebase.replay_reference_locations(file, &ref_locs);
478                return AnalysisResult::build(issues, HashMap::new(), Vec::new());
479            }
480        }
481
482        // 1. Snapshot inheritance structure before removing old definitions.
483        //    This lets us skip finalize() later if only method bodies changed.
484        let structural_snapshot = self.codebase.file_structural_snapshot(file_path);
485
486        // 2. Remove old definitions from this file
487        self.codebase.remove_file_definitions(file_path);
488
489        // 3. Parse new content and run Pass 1
490        let file: Arc<str> = Arc::from(file_path);
491        let arena = bumpalo::Bump::new();
492        let parsed = php_rs_parser::parse(&arena, new_content);
493
494        let mut all_issues = Vec::new();
495
496        // Collect parse errors
497        for err in &parsed.errors {
498            all_issues.push(Issue::new(
499                mir_issues::IssueKind::ParseError {
500                    message: err.to_string(),
501                },
502                mir_issues::Location {
503                    file: file.clone(),
504                    line: 1,
505                    col_start: 0,
506                    col_end: 0,
507                },
508            ));
509        }
510
511        let collector = DefinitionCollector::new(
512            &self.codebase,
513            file.clone(),
514            new_content,
515            &parsed.source_map,
516        );
517        all_issues.extend(collector.collect(&parsed.program));
518
519        // 4. Re-finalize, or skip if only method bodies changed.
520        //    finalize() rebuilds all_parents for every class/interface in the
521        //    codebase by walking the full inheritance graph — this is expensive.
522        //    If the inheritance structure of this file is unchanged (same parent,
523        //    interfaces, traits), restore all_parents from the snapshot and skip
524        //    the full walk.
525        if self
526            .codebase
527            .structural_unchanged_after_pass1(file_path, &structural_snapshot)
528        {
529            self.codebase
530                .restore_all_parents(file_path, &structural_snapshot);
531        } else {
532            self.codebase.finalize();
533        }
534
535        // 5. Run Pass 2 on this file
536        let (body_issues, symbols) = self.analyze_bodies(
537            &parsed.program,
538            file.clone(),
539            new_content,
540            &parsed.source_map,
541        );
542        all_issues.extend(body_issues);
543
544        // 6. Update cache if present
545        if let Some(cache) = &self.cache {
546            let h = hash_content(new_content);
547            cache.evict_with_dependents(&[file_path.to_string()]);
548            let ref_locs = extract_reference_locations(&self.codebase, &file);
549            cache.put(file_path, h, all_issues.clone(), ref_locs);
550        }
551
552        AnalysisResult::build(all_issues, HashMap::new(), symbols)
553    }
554
555    /// Analyze a PHP source string without a real file path.
556    /// Useful for tests and LSP single-file mode.
557    pub fn analyze_source(source: &str) -> AnalysisResult {
558        use crate::collector::DefinitionCollector;
559        let analyzer = ProjectAnalyzer::new();
560        analyzer.load_stubs();
561        let file: Arc<str> = Arc::from("<source>");
562        let arena = bumpalo::Bump::new();
563        let result = php_rs_parser::parse(&arena, source);
564        let mut all_issues = Vec::new();
565        let collector =
566            DefinitionCollector::new(&analyzer.codebase, file.clone(), source, &result.source_map);
567        all_issues.extend(collector.collect(&result.program));
568        analyzer.codebase.finalize();
569        let mut type_envs = std::collections::HashMap::new();
570        let mut all_symbols = Vec::new();
571        all_issues.extend(analyzer.analyze_bodies_typed(
572            &result.program,
573            file.clone(),
574            source,
575            &result.source_map,
576            &mut type_envs,
577            &mut all_symbols,
578        ));
579        AnalysisResult::build(all_issues, type_envs, all_symbols)
580    }
581
582    /// Pass 2: walk all function/method bodies in one file, return issues, and
583    /// write inferred return types back to the codebase.
584    fn analyze_bodies<'arena, 'src>(
585        &self,
586        program: &php_ast::ast::Program<'arena, 'src>,
587        file: Arc<str>,
588        source: &str,
589        source_map: &php_rs_parser::source_map::SourceMap,
590    ) -> (Vec<mir_issues::Issue>, Vec<crate::symbol::ResolvedSymbol>) {
591        use php_ast::ast::StmtKind;
592
593        let mut all_issues = Vec::new();
594        let mut all_symbols = Vec::new();
595
596        for stmt in program.stmts.iter() {
597            match &stmt.kind {
598                StmtKind::Function(decl) => {
599                    self.analyze_fn_decl(
600                        decl,
601                        &file,
602                        source,
603                        source_map,
604                        &mut all_issues,
605                        &mut all_symbols,
606                    );
607                }
608                StmtKind::Class(decl) => {
609                    self.analyze_class_decl(
610                        decl,
611                        &file,
612                        source,
613                        source_map,
614                        &mut all_issues,
615                        &mut all_symbols,
616                    );
617                }
618                StmtKind::Enum(decl) => {
619                    self.analyze_enum_decl(decl, &file, source, source_map, &mut all_issues);
620                }
621                StmtKind::Interface(decl) => {
622                    self.analyze_interface_decl(decl, &file, source, source_map, &mut all_issues);
623                }
624                StmtKind::Namespace(ns) => {
625                    if let php_ast::ast::NamespaceBody::Braced(stmts) = &ns.body {
626                        for inner in stmts.iter() {
627                            match &inner.kind {
628                                StmtKind::Function(decl) => {
629                                    self.analyze_fn_decl(
630                                        decl,
631                                        &file,
632                                        source,
633                                        source_map,
634                                        &mut all_issues,
635                                        &mut all_symbols,
636                                    );
637                                }
638                                StmtKind::Class(decl) => {
639                                    self.analyze_class_decl(
640                                        decl,
641                                        &file,
642                                        source,
643                                        source_map,
644                                        &mut all_issues,
645                                        &mut all_symbols,
646                                    );
647                                }
648                                StmtKind::Enum(decl) => {
649                                    self.analyze_enum_decl(
650                                        decl,
651                                        &file,
652                                        source,
653                                        source_map,
654                                        &mut all_issues,
655                                    );
656                                }
657                                StmtKind::Interface(decl) => {
658                                    self.analyze_interface_decl(
659                                        decl,
660                                        &file,
661                                        source,
662                                        source_map,
663                                        &mut all_issues,
664                                    );
665                                }
666                                _ => {}
667                            }
668                        }
669                    }
670                }
671                _ => {}
672            }
673        }
674
675        // Analyze top-level executable statements in global scope.
676        {
677            use crate::context::Context;
678            use crate::stmt::StatementsAnalyzer;
679            use mir_issues::IssueBuffer;
680
681            let mut ctx = Context::new();
682            let mut buf = IssueBuffer::new();
683            let mut sa = StatementsAnalyzer::new(
684                &self.codebase,
685                file.clone(),
686                source,
687                source_map,
688                &mut buf,
689                &mut all_symbols,
690            );
691            for stmt in program.stmts.iter() {
692                match &stmt.kind {
693                    StmtKind::Function(_)
694                    | StmtKind::Class(_)
695                    | StmtKind::Enum(_)
696                    | StmtKind::Interface(_)
697                    | StmtKind::Trait(_)
698                    | StmtKind::Namespace(_)
699                    | StmtKind::Use(_)
700                    | StmtKind::Declare(_) => {}
701                    _ => sa.analyze_stmt(stmt, &mut ctx),
702                }
703            }
704            drop(sa);
705            all_issues.extend(buf.into_issues());
706        }
707
708        (all_issues, all_symbols)
709    }
710
711    /// Analyze a single function declaration body and collect issues + inferred return type.
712    #[allow(clippy::too_many_arguments)]
713    fn analyze_fn_decl<'arena, 'src>(
714        &self,
715        decl: &php_ast::ast::FunctionDecl<'arena, 'src>,
716        file: &Arc<str>,
717        source: &str,
718        source_map: &php_rs_parser::source_map::SourceMap,
719        all_issues: &mut Vec<mir_issues::Issue>,
720        all_symbols: &mut Vec<crate::symbol::ResolvedSymbol>,
721    ) {
722        let fn_name = decl.name;
723        let body = &decl.body;
724        // Check parameter and return type hints for undefined classes.
725        for param in decl.params.iter() {
726            if let Some(hint) = &param.type_hint {
727                check_type_hint_classes(hint, &self.codebase, file, source, source_map, all_issues);
728            }
729        }
730        if let Some(hint) = &decl.return_type {
731            check_type_hint_classes(hint, &self.codebase, file, source, source_map, all_issues);
732        }
733        use crate::context::Context;
734        use crate::stmt::StatementsAnalyzer;
735        use mir_issues::IssueBuffer;
736
737        // Resolve function name using the file's namespace (handles namespaced functions)
738        let resolved_fn = self.codebase.resolve_class_name(file.as_ref(), fn_name);
739        let func_opt: Option<mir_codebase::storage::FunctionStorage> = self
740            .codebase
741            .functions
742            .get(resolved_fn.as_str())
743            .map(|r| r.clone())
744            .or_else(|| self.codebase.functions.get(fn_name).map(|r| r.clone()))
745            .or_else(|| {
746                self.codebase
747                    .functions
748                    .iter()
749                    .find(|e| e.short_name.as_ref() == fn_name)
750                    .map(|e| e.value().clone())
751            });
752
753        let fqn = func_opt.as_ref().map(|f| f.fqn.clone());
754        // Always use the codebase entry when its params match the AST (same count + names).
755        // This covers the common case and preserves docblock-enriched types.
756        // When names differ (two files define the same unnamespaced function), fall back to
757        // the AST params so param variables are always in scope for this file's body.
758        let (params, return_ty): (Vec<mir_codebase::FnParam>, _) = match &func_opt {
759            Some(f)
760                if f.params.len() == decl.params.len()
761                    && f.params
762                        .iter()
763                        .zip(decl.params.iter())
764                        .all(|(cp, ap)| cp.name.as_ref() == ap.name) =>
765            {
766                (f.params.clone(), f.return_type.clone())
767            }
768            _ => {
769                let ast_params = decl
770                    .params
771                    .iter()
772                    .map(|p| mir_codebase::FnParam {
773                        name: Arc::from(p.name),
774                        ty: None,
775                        default: p.default.as_ref().map(|_| mir_types::Union::mixed()),
776                        is_variadic: p.variadic,
777                        is_byref: p.by_ref,
778                        is_optional: p.default.is_some() || p.variadic,
779                    })
780                    .collect();
781                (ast_params, None)
782            }
783        };
784
785        let mut ctx = Context::for_function(&params, return_ty, None, None, None, false, true);
786        let mut buf = IssueBuffer::new();
787        let mut sa = StatementsAnalyzer::new(
788            &self.codebase,
789            file.clone(),
790            source,
791            source_map,
792            &mut buf,
793            all_symbols,
794        );
795        sa.analyze_stmts(body, &mut ctx);
796        let inferred = merge_return_types(&sa.return_types);
797        drop(sa);
798
799        emit_unused_params(&params, &ctx, "", file, all_issues);
800        emit_unused_variables(&ctx, file, all_issues);
801        all_issues.extend(buf.into_issues());
802
803        if let Some(fqn) = fqn {
804            if let Some(mut func) = self.codebase.functions.get_mut(fqn.as_ref()) {
805                func.inferred_return_type = Some(inferred);
806            }
807        }
808    }
809
810    /// Analyze all method bodies on a class declaration and collect issues + inferred return types.
811    #[allow(clippy::too_many_arguments)]
812    fn analyze_class_decl<'arena, 'src>(
813        &self,
814        decl: &php_ast::ast::ClassDecl<'arena, 'src>,
815        file: &Arc<str>,
816        source: &str,
817        source_map: &php_rs_parser::source_map::SourceMap,
818        all_issues: &mut Vec<mir_issues::Issue>,
819        all_symbols: &mut Vec<crate::symbol::ResolvedSymbol>,
820    ) {
821        use crate::context::Context;
822        use crate::stmt::StatementsAnalyzer;
823        use mir_issues::IssueBuffer;
824
825        let class_name = decl.name.unwrap_or("<anonymous>");
826        // Resolve the FQCN using the file's namespace/imports — avoids ambiguity
827        // when multiple classes share the same short name across namespaces.
828        let resolved = self.codebase.resolve_class_name(file.as_ref(), class_name);
829        let fqcn: &str = &resolved;
830        let parent_fqcn = self
831            .codebase
832            .classes
833            .get(fqcn)
834            .and_then(|c| c.parent.clone());
835
836        if let Some(parent) = &decl.extends {
837            check_name_class(parent, &self.codebase, file, source, source_map, all_issues);
838        }
839        for iface in decl.implements.iter() {
840            check_name_class(iface, &self.codebase, file, source, source_map, all_issues);
841        }
842
843        for member in decl.members.iter() {
844            let php_ast::ast::ClassMemberKind::Method(method) = &member.kind else {
845                continue;
846            };
847
848            // Check parameter and return type hints for undefined classes (even abstract methods).
849            for param in method.params.iter() {
850                if let Some(hint) = &param.type_hint {
851                    check_type_hint_classes(
852                        hint,
853                        &self.codebase,
854                        file,
855                        source,
856                        source_map,
857                        all_issues,
858                    );
859                }
860            }
861            if let Some(hint) = &method.return_type {
862                check_type_hint_classes(hint, &self.codebase, file, source, source_map, all_issues);
863            }
864
865            let Some(body) = &method.body else { continue };
866
867            let (params, return_ty) = self
868                .codebase
869                .get_method(fqcn, method.name)
870                .as_deref()
871                .map(|m| (m.params.clone(), m.return_type.clone()))
872                .unwrap_or_default();
873
874            let is_ctor = method.name == "__construct";
875            let mut ctx = Context::for_method(
876                &params,
877                return_ty,
878                Some(Arc::from(fqcn)),
879                parent_fqcn.clone(),
880                Some(Arc::from(fqcn)),
881                false,
882                is_ctor,
883                method.is_static,
884            );
885
886            let mut buf = IssueBuffer::new();
887            let mut sa = StatementsAnalyzer::new(
888                &self.codebase,
889                file.clone(),
890                source,
891                source_map,
892                &mut buf,
893                all_symbols,
894            );
895            sa.analyze_stmts(body, &mut ctx);
896            let inferred = merge_return_types(&sa.return_types);
897            drop(sa);
898
899            emit_unused_params(&params, &ctx, method.name, file, all_issues);
900            emit_unused_variables(&ctx, file, all_issues);
901            all_issues.extend(buf.into_issues());
902
903            if let Some(mut cls) = self.codebase.classes.get_mut(fqcn) {
904                if let Some(m) = cls.own_methods.get_mut(method.name) {
905                    Arc::make_mut(m).inferred_return_type = Some(inferred);
906                }
907            }
908        }
909    }
910
911    /// Like `analyze_bodies` but also populates `type_envs` with per-scope type environments.
912    #[allow(clippy::too_many_arguments)]
913    fn analyze_bodies_typed<'arena, 'src>(
914        &self,
915        program: &php_ast::ast::Program<'arena, 'src>,
916        file: Arc<str>,
917        source: &str,
918        source_map: &php_rs_parser::source_map::SourceMap,
919        type_envs: &mut std::collections::HashMap<
920            crate::type_env::ScopeId,
921            crate::type_env::TypeEnv,
922        >,
923        all_symbols: &mut Vec<crate::symbol::ResolvedSymbol>,
924    ) -> Vec<mir_issues::Issue> {
925        use php_ast::ast::StmtKind;
926        let mut all_issues = Vec::new();
927        for stmt in program.stmts.iter() {
928            match &stmt.kind {
929                StmtKind::Function(decl) => {
930                    self.analyze_fn_decl_typed(
931                        decl,
932                        &file,
933                        source,
934                        source_map,
935                        &mut all_issues,
936                        type_envs,
937                        all_symbols,
938                    );
939                }
940                StmtKind::Class(decl) => {
941                    self.analyze_class_decl_typed(
942                        decl,
943                        &file,
944                        source,
945                        source_map,
946                        &mut all_issues,
947                        type_envs,
948                        all_symbols,
949                    );
950                }
951                StmtKind::Enum(decl) => {
952                    self.analyze_enum_decl(decl, &file, source, source_map, &mut all_issues);
953                }
954                StmtKind::Interface(decl) => {
955                    self.analyze_interface_decl(decl, &file, source, source_map, &mut all_issues);
956                }
957                StmtKind::Namespace(ns) => {
958                    if let php_ast::ast::NamespaceBody::Braced(stmts) = &ns.body {
959                        for inner in stmts.iter() {
960                            match &inner.kind {
961                                StmtKind::Function(decl) => {
962                                    self.analyze_fn_decl_typed(
963                                        decl,
964                                        &file,
965                                        source,
966                                        source_map,
967                                        &mut all_issues,
968                                        type_envs,
969                                        all_symbols,
970                                    );
971                                }
972                                StmtKind::Class(decl) => {
973                                    self.analyze_class_decl_typed(
974                                        decl,
975                                        &file,
976                                        source,
977                                        source_map,
978                                        &mut all_issues,
979                                        type_envs,
980                                        all_symbols,
981                                    );
982                                }
983                                StmtKind::Enum(decl) => {
984                                    self.analyze_enum_decl(
985                                        decl,
986                                        &file,
987                                        source,
988                                        source_map,
989                                        &mut all_issues,
990                                    );
991                                }
992                                StmtKind::Interface(decl) => {
993                                    self.analyze_interface_decl(
994                                        decl,
995                                        &file,
996                                        source,
997                                        source_map,
998                                        &mut all_issues,
999                                    );
1000                                }
1001                                _ => {}
1002                            }
1003                        }
1004                    }
1005                }
1006                _ => {}
1007            }
1008        }
1009
1010        // Analyze top-level executable statements in global scope (e.g. function calls
1011        // outside any function/class body). Declaration nodes are skipped since they
1012        // were already handled above.
1013        {
1014            use crate::context::Context;
1015            use crate::stmt::StatementsAnalyzer;
1016            use mir_issues::IssueBuffer;
1017
1018            let mut ctx = Context::new();
1019            let mut buf = IssueBuffer::new();
1020            let mut sa = StatementsAnalyzer::new(
1021                &self.codebase,
1022                file.clone(),
1023                source,
1024                source_map,
1025                &mut buf,
1026                all_symbols,
1027            );
1028            for stmt in program.stmts.iter() {
1029                match &stmt.kind {
1030                    StmtKind::Function(_)
1031                    | StmtKind::Class(_)
1032                    | StmtKind::Enum(_)
1033                    | StmtKind::Interface(_)
1034                    | StmtKind::Trait(_)
1035                    | StmtKind::Namespace(_)
1036                    | StmtKind::Use(_)
1037                    | StmtKind::Declare(_) => {}
1038                    _ => sa.analyze_stmt(stmt, &mut ctx),
1039                }
1040            }
1041            drop(sa);
1042            all_issues.extend(buf.into_issues());
1043        }
1044
1045        all_issues
1046    }
1047
1048    /// Like `analyze_fn_decl` but also captures a `TypeEnv` for the function scope.
1049    #[allow(clippy::too_many_arguments)]
1050    fn analyze_fn_decl_typed<'arena, 'src>(
1051        &self,
1052        decl: &php_ast::ast::FunctionDecl<'arena, 'src>,
1053        file: &Arc<str>,
1054        source: &str,
1055        source_map: &php_rs_parser::source_map::SourceMap,
1056        all_issues: &mut Vec<mir_issues::Issue>,
1057        type_envs: &mut std::collections::HashMap<
1058            crate::type_env::ScopeId,
1059            crate::type_env::TypeEnv,
1060        >,
1061        all_symbols: &mut Vec<crate::symbol::ResolvedSymbol>,
1062    ) {
1063        use crate::context::Context;
1064        use crate::stmt::StatementsAnalyzer;
1065        use mir_issues::IssueBuffer;
1066
1067        let fn_name = decl.name;
1068        let body = &decl.body;
1069
1070        for param in decl.params.iter() {
1071            if let Some(hint) = &param.type_hint {
1072                check_type_hint_classes(hint, &self.codebase, file, source, source_map, all_issues);
1073            }
1074        }
1075        if let Some(hint) = &decl.return_type {
1076            check_type_hint_classes(hint, &self.codebase, file, source, source_map, all_issues);
1077        }
1078
1079        let resolved_fn = self.codebase.resolve_class_name(file.as_ref(), fn_name);
1080        let func_opt: Option<mir_codebase::storage::FunctionStorage> = self
1081            .codebase
1082            .functions
1083            .get(resolved_fn.as_str())
1084            .map(|r| r.clone())
1085            .or_else(|| self.codebase.functions.get(fn_name).map(|r| r.clone()))
1086            .or_else(|| {
1087                self.codebase
1088                    .functions
1089                    .iter()
1090                    .find(|e| e.short_name.as_ref() == fn_name)
1091                    .map(|e| e.value().clone())
1092            });
1093
1094        let fqn = func_opt.as_ref().map(|f| f.fqn.clone());
1095        let (params, return_ty): (Vec<mir_codebase::FnParam>, _) = match &func_opt {
1096            Some(f)
1097                if f.params.len() == decl.params.len()
1098                    && f.params
1099                        .iter()
1100                        .zip(decl.params.iter())
1101                        .all(|(cp, ap)| cp.name.as_ref() == ap.name) =>
1102            {
1103                (f.params.clone(), f.return_type.clone())
1104            }
1105            _ => {
1106                let ast_params = decl
1107                    .params
1108                    .iter()
1109                    .map(|p| mir_codebase::FnParam {
1110                        name: Arc::from(p.name),
1111                        ty: None,
1112                        default: p.default.as_ref().map(|_| mir_types::Union::mixed()),
1113                        is_variadic: p.variadic,
1114                        is_byref: p.by_ref,
1115                        is_optional: p.default.is_some() || p.variadic,
1116                    })
1117                    .collect();
1118                (ast_params, None)
1119            }
1120        };
1121
1122        let mut ctx = Context::for_function(&params, return_ty, None, None, None, false, true);
1123        let mut buf = IssueBuffer::new();
1124        let mut sa = StatementsAnalyzer::new(
1125            &self.codebase,
1126            file.clone(),
1127            source,
1128            source_map,
1129            &mut buf,
1130            all_symbols,
1131        );
1132        sa.analyze_stmts(body, &mut ctx);
1133        let inferred = merge_return_types(&sa.return_types);
1134        drop(sa);
1135
1136        // Capture TypeEnv for this scope
1137        let scope_name = fqn.clone().unwrap_or_else(|| Arc::from(fn_name));
1138        type_envs.insert(
1139            crate::type_env::ScopeId::Function {
1140                file: file.clone(),
1141                name: scope_name,
1142            },
1143            crate::type_env::TypeEnv::new(ctx.vars.clone()),
1144        );
1145
1146        emit_unused_params(&params, &ctx, "", file, all_issues);
1147        emit_unused_variables(&ctx, file, all_issues);
1148        all_issues.extend(buf.into_issues());
1149
1150        if let Some(fqn) = fqn {
1151            if let Some(mut func) = self.codebase.functions.get_mut(fqn.as_ref()) {
1152                func.inferred_return_type = Some(inferred);
1153            }
1154        }
1155    }
1156
1157    /// Like `analyze_class_decl` but also captures a `TypeEnv` per method scope.
1158    #[allow(clippy::too_many_arguments)]
1159    fn analyze_class_decl_typed<'arena, 'src>(
1160        &self,
1161        decl: &php_ast::ast::ClassDecl<'arena, 'src>,
1162        file: &Arc<str>,
1163        source: &str,
1164        source_map: &php_rs_parser::source_map::SourceMap,
1165        all_issues: &mut Vec<mir_issues::Issue>,
1166        type_envs: &mut std::collections::HashMap<
1167            crate::type_env::ScopeId,
1168            crate::type_env::TypeEnv,
1169        >,
1170        all_symbols: &mut Vec<crate::symbol::ResolvedSymbol>,
1171    ) {
1172        use crate::context::Context;
1173        use crate::stmt::StatementsAnalyzer;
1174        use mir_issues::IssueBuffer;
1175
1176        let class_name = decl.name.unwrap_or("<anonymous>");
1177        let resolved = self.codebase.resolve_class_name(file.as_ref(), class_name);
1178        let fqcn: &str = &resolved;
1179        let parent_fqcn = self
1180            .codebase
1181            .classes
1182            .get(fqcn)
1183            .and_then(|c| c.parent.clone());
1184
1185        if let Some(parent) = &decl.extends {
1186            check_name_class(parent, &self.codebase, file, source, source_map, all_issues);
1187        }
1188        for iface in decl.implements.iter() {
1189            check_name_class(iface, &self.codebase, file, source, source_map, all_issues);
1190        }
1191
1192        for member in decl.members.iter() {
1193            let php_ast::ast::ClassMemberKind::Method(method) = &member.kind else {
1194                continue;
1195            };
1196
1197            for param in method.params.iter() {
1198                if let Some(hint) = &param.type_hint {
1199                    check_type_hint_classes(
1200                        hint,
1201                        &self.codebase,
1202                        file,
1203                        source,
1204                        source_map,
1205                        all_issues,
1206                    );
1207                }
1208            }
1209            if let Some(hint) = &method.return_type {
1210                check_type_hint_classes(hint, &self.codebase, file, source, source_map, all_issues);
1211            }
1212
1213            let Some(body) = &method.body else { continue };
1214
1215            let (params, return_ty) = self
1216                .codebase
1217                .get_method(fqcn, method.name)
1218                .as_deref()
1219                .map(|m| (m.params.clone(), m.return_type.clone()))
1220                .unwrap_or_default();
1221
1222            let is_ctor = method.name == "__construct";
1223            let mut ctx = Context::for_method(
1224                &params,
1225                return_ty,
1226                Some(Arc::from(fqcn)),
1227                parent_fqcn.clone(),
1228                Some(Arc::from(fqcn)),
1229                false,
1230                is_ctor,
1231                method.is_static,
1232            );
1233
1234            let mut buf = IssueBuffer::new();
1235            let mut sa = StatementsAnalyzer::new(
1236                &self.codebase,
1237                file.clone(),
1238                source,
1239                source_map,
1240                &mut buf,
1241                all_symbols,
1242            );
1243            sa.analyze_stmts(body, &mut ctx);
1244            let inferred = merge_return_types(&sa.return_types);
1245            drop(sa);
1246
1247            // Capture TypeEnv for this method scope
1248            type_envs.insert(
1249                crate::type_env::ScopeId::Method {
1250                    class: Arc::from(fqcn),
1251                    method: Arc::from(method.name),
1252                },
1253                crate::type_env::TypeEnv::new(ctx.vars.clone()),
1254            );
1255
1256            emit_unused_params(&params, &ctx, method.name, file, all_issues);
1257            emit_unused_variables(&ctx, file, all_issues);
1258            all_issues.extend(buf.into_issues());
1259
1260            if let Some(mut cls) = self.codebase.classes.get_mut(fqcn) {
1261                if let Some(m) = cls.own_methods.get_mut(method.name) {
1262                    Arc::make_mut(m).inferred_return_type = Some(inferred);
1263                }
1264            }
1265        }
1266    }
1267
1268    /// Discover all `.php` files under a directory, recursively.
1269    pub fn discover_files(root: &Path) -> Vec<PathBuf> {
1270        if root.is_file() {
1271            return vec![root.to_path_buf()];
1272        }
1273        let mut files = Vec::new();
1274        collect_php_files(root, &mut files);
1275        files
1276    }
1277
1278    /// Pass 1 only: collect type definitions from `paths` into the codebase without
1279    /// analyzing method bodies or emitting issues. Used to load vendor types.
1280    pub fn collect_types_only(&self, paths: &[PathBuf]) {
1281        paths.par_iter().for_each(|path| {
1282            let Ok(src) = std::fs::read_to_string(path) else {
1283                return;
1284            };
1285            let file: Arc<str> = Arc::from(path.to_string_lossy().as_ref());
1286            let arena = bumpalo::Bump::new();
1287            let result = php_rs_parser::parse(&arena, &src);
1288            let collector =
1289                DefinitionCollector::new(&self.codebase, file, &src, &result.source_map);
1290            // Ignore any issues emitted during vendor collection
1291            let _ = collector.collect(&result.program);
1292        });
1293    }
1294
1295    /// Check type hints in enum methods for undefined classes.
1296    #[allow(clippy::too_many_arguments)]
1297    fn analyze_enum_decl<'arena, 'src>(
1298        &self,
1299        decl: &php_ast::ast::EnumDecl<'arena, 'src>,
1300        file: &Arc<str>,
1301        source: &str,
1302        source_map: &php_rs_parser::source_map::SourceMap,
1303        all_issues: &mut Vec<mir_issues::Issue>,
1304    ) {
1305        use php_ast::ast::EnumMemberKind;
1306        for iface in decl.implements.iter() {
1307            check_name_class(iface, &self.codebase, file, source, source_map, all_issues);
1308        }
1309        for member in decl.members.iter() {
1310            let EnumMemberKind::Method(method) = &member.kind else {
1311                continue;
1312            };
1313            for param in method.params.iter() {
1314                if let Some(hint) = &param.type_hint {
1315                    check_type_hint_classes(
1316                        hint,
1317                        &self.codebase,
1318                        file,
1319                        source,
1320                        source_map,
1321                        all_issues,
1322                    );
1323                }
1324            }
1325            if let Some(hint) = &method.return_type {
1326                check_type_hint_classes(hint, &self.codebase, file, source, source_map, all_issues);
1327            }
1328        }
1329    }
1330
1331    /// Check extends clauses in interface declarations for undefined types.
1332    fn analyze_interface_decl<'arena, 'src>(
1333        &self,
1334        decl: &php_ast::ast::InterfaceDecl<'arena, 'src>,
1335        file: &Arc<str>,
1336        source: &str,
1337        source_map: &php_rs_parser::source_map::SourceMap,
1338        all_issues: &mut Vec<mir_issues::Issue>,
1339    ) {
1340        use php_ast::ast::ClassMemberKind;
1341        for parent in decl.extends.iter() {
1342            check_name_class(parent, &self.codebase, file, source, source_map, all_issues);
1343        }
1344        for member in decl.members.iter() {
1345            let ClassMemberKind::Method(method) = &member.kind else {
1346                continue;
1347            };
1348            for param in method.params.iter() {
1349                if let Some(hint) = &param.type_hint {
1350                    check_type_hint_classes(
1351                        hint,
1352                        &self.codebase,
1353                        file,
1354                        source,
1355                        source_map,
1356                        all_issues,
1357                    );
1358                }
1359            }
1360            if let Some(hint) = &method.return_type {
1361                check_type_hint_classes(hint, &self.codebase, file, source, source_map, all_issues);
1362            }
1363        }
1364    }
1365}
1366
1367impl Default for ProjectAnalyzer {
1368    fn default() -> Self {
1369        Self::new()
1370    }
1371}
1372
1373// ---------------------------------------------------------------------------
1374// Offset to char-count column conversion
1375// ---------------------------------------------------------------------------
1376
1377/// Convert a byte offset to a Unicode char-count column on a given line.
1378/// Returns (line, col) where col is a 0-based Unicode code-point count.
1379fn offset_to_line_col(
1380    source: &str,
1381    offset: u32,
1382    source_map: &php_rs_parser::source_map::SourceMap,
1383) -> (u32, u16) {
1384    let lc = source_map.offset_to_line_col(offset);
1385    let line = lc.line + 1;
1386
1387    let byte_offset = offset as usize;
1388    let line_start_byte = if byte_offset == 0 {
1389        0
1390    } else {
1391        source[..byte_offset]
1392            .rfind('\n')
1393            .map(|p| p + 1)
1394            .unwrap_or(0)
1395    };
1396
1397    let col = source[line_start_byte..byte_offset].chars().count() as u16;
1398
1399    (line, col)
1400}
1401
1402// ---------------------------------------------------------------------------
1403// Type-hint class existence checker
1404// ---------------------------------------------------------------------------
1405
1406/// Walk a `TypeHint` AST node and emit `UndefinedClass` for any named class
1407/// that does not exist in the codebase.  Skips PHP built-in type keywords.
1408fn check_type_hint_classes<'arena, 'src>(
1409    hint: &php_ast::ast::TypeHint<'arena, 'src>,
1410    codebase: &Codebase,
1411    file: &Arc<str>,
1412    source: &str,
1413    source_map: &php_rs_parser::source_map::SourceMap,
1414    issues: &mut Vec<mir_issues::Issue>,
1415) {
1416    use php_ast::ast::TypeHintKind;
1417    match &hint.kind {
1418        TypeHintKind::Named(name) => {
1419            let name_str = crate::parser::name_to_string(name);
1420            // Skip built-in pseudo-types that are not real classes.
1421            if is_pseudo_type(&name_str) {
1422                return;
1423            }
1424            let resolved = codebase.resolve_class_name(file.as_ref(), &name_str);
1425            if !codebase.type_exists(&resolved) {
1426                let (line, col_start) = offset_to_line_col(source, hint.span.start, source_map);
1427                let col_end = if hint.span.start < hint.span.end {
1428                    let (_end_line, end_col) =
1429                        offset_to_line_col(source, hint.span.end, source_map);
1430                    end_col
1431                } else {
1432                    col_start
1433                };
1434                issues.push(
1435                    mir_issues::Issue::new(
1436                        mir_issues::IssueKind::UndefinedClass { name: resolved },
1437                        mir_issues::Location {
1438                            file: file.clone(),
1439                            line,
1440                            col_start,
1441                            col_end: col_end.max(col_start + 1),
1442                        },
1443                    )
1444                    .with_snippet(crate::parser::span_text(source, hint.span).unwrap_or_default()),
1445                );
1446            }
1447        }
1448        TypeHintKind::Nullable(inner) => {
1449            check_type_hint_classes(inner, codebase, file, source, source_map, issues);
1450        }
1451        TypeHintKind::Union(parts) | TypeHintKind::Intersection(parts) => {
1452            for part in parts.iter() {
1453                check_type_hint_classes(part, codebase, file, source, source_map, issues);
1454            }
1455        }
1456        TypeHintKind::Keyword(_, _) => {} // built-in keyword, always valid
1457    }
1458}
1459
1460/// Check a single `Name` AST node from an `extends` or `implements` clause and
1461/// emit `UndefinedClass` if the named type is not in the codebase.
1462fn check_name_class(
1463    name: &php_ast::ast::Name<'_, '_>,
1464    codebase: &Codebase,
1465    file: &Arc<str>,
1466    source: &str,
1467    source_map: &php_rs_parser::source_map::SourceMap,
1468    issues: &mut Vec<mir_issues::Issue>,
1469) {
1470    let name_str = crate::parser::name_to_string(name);
1471    let resolved = codebase.resolve_class_name(file.as_ref(), &name_str);
1472    if !codebase.type_exists(&resolved) {
1473        let span = name.span();
1474        let (line, col_start) = offset_to_line_col(source, span.start, source_map);
1475        let (_, col_end) = offset_to_line_col(source, span.end, source_map);
1476        issues.push(
1477            mir_issues::Issue::new(
1478                mir_issues::IssueKind::UndefinedClass { name: resolved },
1479                mir_issues::Location {
1480                    file: file.clone(),
1481                    line,
1482                    col_start,
1483                    col_end: col_end.max(col_start + 1),
1484                },
1485            )
1486            .with_snippet(crate::parser::span_text(source, span).unwrap_or_default()),
1487        );
1488    }
1489}
1490
1491/// Returns true for names that are PHP pseudo-types / special identifiers, not
1492/// real classes.
1493fn is_pseudo_type(name: &str) -> bool {
1494    matches!(
1495        name.to_lowercase().as_str(),
1496        "self"
1497            | "static"
1498            | "parent"
1499            | "null"
1500            | "true"
1501            | "false"
1502            | "never"
1503            | "void"
1504            | "mixed"
1505            | "object"
1506            | "callable"
1507            | "iterable"
1508    )
1509}
1510
1511/// Magic methods whose parameters are passed by the PHP runtime, not user call sites.
1512const MAGIC_METHODS_WITH_RUNTIME_PARAMS: &[&str] = &[
1513    "__get",
1514    "__set",
1515    "__call",
1516    "__callStatic",
1517    "__isset",
1518    "__unset",
1519];
1520
1521/// Emit `UnusedParam` issues for params that were never read in `ctx`.
1522/// Skips magic methods whose parameters are passed by the PHP runtime.
1523fn emit_unused_params(
1524    params: &[mir_codebase::FnParam],
1525    ctx: &crate::context::Context,
1526    method_name: &str,
1527    file: &Arc<str>,
1528    issues: &mut Vec<mir_issues::Issue>,
1529) {
1530    if MAGIC_METHODS_WITH_RUNTIME_PARAMS.contains(&method_name) {
1531        return;
1532    }
1533    for p in params {
1534        let name = p.name.as_ref().trim_start_matches('$');
1535        if !ctx.read_vars.contains(name) {
1536            issues.push(
1537                mir_issues::Issue::new(
1538                    mir_issues::IssueKind::UnusedParam {
1539                        name: name.to_string(),
1540                    },
1541                    mir_issues::Location {
1542                        file: file.clone(),
1543                        line: 1,
1544                        col_start: 0,
1545                        col_end: 0,
1546                    },
1547                )
1548                .with_snippet(format!("${}", name)),
1549            );
1550        }
1551    }
1552}
1553
1554fn emit_unused_variables(
1555    ctx: &crate::context::Context,
1556    file: &Arc<str>,
1557    issues: &mut Vec<mir_issues::Issue>,
1558) {
1559    // Superglobals are always "used" — skip them
1560    const SUPERGLOBALS: &[&str] = &[
1561        "_SERVER", "_GET", "_POST", "_REQUEST", "_SESSION", "_COOKIE", "_FILES", "_ENV", "GLOBALS",
1562    ];
1563    for name in &ctx.assigned_vars {
1564        if ctx.param_names.contains(name) {
1565            continue;
1566        }
1567        if SUPERGLOBALS.contains(&name.as_str()) {
1568            continue;
1569        }
1570        // $this is implicitly used whenever the method accesses properties or
1571        // calls other methods — never report it as unused.
1572        if name == "this" {
1573            continue;
1574        }
1575        if name.starts_with('_') {
1576            continue;
1577        }
1578        if !ctx.read_vars.contains(name) {
1579            issues.push(mir_issues::Issue::new(
1580                mir_issues::IssueKind::UnusedVariable { name: name.clone() },
1581                mir_issues::Location {
1582                    file: file.clone(),
1583                    line: 1,
1584                    col_start: 0,
1585                    col_end: 0,
1586                },
1587            ));
1588        }
1589    }
1590}
1591
1592/// Merge a list of return types into a single `Union`.
1593/// Returns `void` if the list is empty.
1594pub fn merge_return_types(return_types: &[Union]) -> Union {
1595    if return_types.is_empty() {
1596        return Union::single(mir_types::Atomic::TVoid);
1597    }
1598    return_types
1599        .iter()
1600        .fold(Union::empty(), |acc, t| Union::merge(&acc, t))
1601}
1602
1603pub(crate) fn collect_php_files(dir: &Path, out: &mut Vec<PathBuf>) {
1604    if let Ok(entries) = std::fs::read_dir(dir) {
1605        for entry in entries.flatten() {
1606            // Skip symlinks — they can form cycles (e.g. .pnpm-store)
1607            if entry.file_type().map(|ft| ft.is_symlink()).unwrap_or(false) {
1608                continue;
1609            }
1610            let path = entry.path();
1611            if path.is_dir() {
1612                let name = path.file_name().and_then(|n| n.to_str()).unwrap_or("");
1613                if matches!(
1614                    name,
1615                    "vendor" | ".git" | "node_modules" | ".cache" | ".pnpm-store"
1616                ) {
1617                    continue;
1618                }
1619                collect_php_files(&path, out);
1620            } else if path.extension().and_then(|e| e.to_str()) == Some("php") {
1621                out.push(path);
1622            }
1623        }
1624    }
1625}
1626
1627// ---------------------------------------------------------------------------
1628// AnalysisResult
1629// ---------------------------------------------------------------------------
1630
1631// ---------------------------------------------------------------------------
1632// build_reverse_deps
1633// ---------------------------------------------------------------------------
1634
1635/// Build a reverse dependency graph from the codebase after Pass 1.
1636///
1637/// Returns a map: `defining_file → {files that depend on it}`.
1638///
1639/// Dependency edges captured (all derivable from Pass 1 data):
1640/// - `use` imports  (`file_imports`)
1641/// - `extends` / `implements` / trait `use` from `ClassStorage`
1642fn build_reverse_deps(codebase: &Codebase) -> HashMap<String, HashSet<String>> {
1643    let mut reverse: HashMap<String, HashSet<String>> = HashMap::new();
1644
1645    // Helper: record edge "defining_file → dependent_file"
1646    let mut add_edge = |symbol: &str, dependent_file: &str| {
1647        if let Some(defining_file) = codebase.symbol_to_file.get(symbol) {
1648            let def = defining_file.as_ref().to_string();
1649            if def != dependent_file {
1650                reverse
1651                    .entry(def)
1652                    .or_default()
1653                    .insert(dependent_file.to_string());
1654            }
1655        }
1656    };
1657
1658    // use-import edges
1659    for entry in codebase.file_imports.iter() {
1660        let file = entry.key().as_ref().to_string();
1661        for fqcn in entry.value().values() {
1662            add_edge(fqcn, &file);
1663        }
1664    }
1665
1666    // extends / implements / trait edges from ClassStorage
1667    for entry in codebase.classes.iter() {
1668        let defining = {
1669            let fqcn = entry.key().as_ref();
1670            codebase
1671                .symbol_to_file
1672                .get(fqcn)
1673                .map(|f| f.as_ref().to_string())
1674        };
1675        let Some(file) = defining else { continue };
1676
1677        let cls = entry.value();
1678        if let Some(ref parent) = cls.parent {
1679            add_edge(parent.as_ref(), &file);
1680        }
1681        for iface in &cls.interfaces {
1682            add_edge(iface.as_ref(), &file);
1683        }
1684        for tr in &cls.traits {
1685            add_edge(tr.as_ref(), &file);
1686        }
1687    }
1688
1689    reverse
1690}
1691
1692// ---------------------------------------------------------------------------
1693
1694/// Extract the reference locations recorded for `file` from the codebase into
1695/// a flat `Vec<(symbol_key, start, end)>` suitable for caching.
1696fn extract_reference_locations(codebase: &Codebase, file: &Arc<str>) -> Vec<(String, u32, u32)> {
1697    codebase
1698        .extract_file_reference_locations(file.as_ref())
1699        .into_iter()
1700        .map(|(sym, start, end)| (sym.to_string(), start, end))
1701        .collect()
1702}
1703
1704// ---------------------------------------------------------------------------
1705
1706pub struct AnalysisResult {
1707    pub issues: Vec<Issue>,
1708    pub type_envs: std::collections::HashMap<crate::type_env::ScopeId, crate::type_env::TypeEnv>,
1709    /// Per-expression resolved symbols from Pass 2, sorted by file path.
1710    pub symbols: Vec<crate::symbol::ResolvedSymbol>,
1711    /// Maps each file path to the contiguous range within `symbols` that belongs
1712    /// to it. Built once after analysis; allows `symbol_at` to scan only the
1713    /// relevant file's slice rather than the entire codebase-wide vector.
1714    symbols_by_file: HashMap<Arc<str>, std::ops::Range<usize>>,
1715}
1716
1717impl AnalysisResult {
1718    fn build(
1719        issues: Vec<Issue>,
1720        type_envs: std::collections::HashMap<crate::type_env::ScopeId, crate::type_env::TypeEnv>,
1721        mut symbols: Vec<crate::symbol::ResolvedSymbol>,
1722    ) -> Self {
1723        // Sort by file so each file's symbols form a contiguous slice.
1724        symbols.sort_unstable_by(|a, b| a.file.as_ref().cmp(b.file.as_ref()));
1725        let mut symbols_by_file: HashMap<Arc<str>, std::ops::Range<usize>> = HashMap::new();
1726        let mut i = 0;
1727        while i < symbols.len() {
1728            let file = Arc::clone(&symbols[i].file);
1729            let start = i;
1730            while i < symbols.len() && symbols[i].file == file {
1731                i += 1;
1732            }
1733            symbols_by_file.insert(file, start..i);
1734        }
1735        Self {
1736            issues,
1737            type_envs,
1738            symbols,
1739            symbols_by_file,
1740        }
1741    }
1742}
1743
1744impl AnalysisResult {
1745    pub fn error_count(&self) -> usize {
1746        self.issues
1747            .iter()
1748            .filter(|i| i.severity == mir_issues::Severity::Error)
1749            .count()
1750    }
1751
1752    pub fn warning_count(&self) -> usize {
1753        self.issues
1754            .iter()
1755            .filter(|i| i.severity == mir_issues::Severity::Warning)
1756            .count()
1757    }
1758
1759    /// Group issues by source file.
1760    ///
1761    /// Returns a map from absolute file path to the slice of issues that belong
1762    /// to that file. Useful for LSP `textDocument/publishDiagnostics`, which
1763    /// pushes diagnostics per document.
1764    pub fn issues_by_file(&self) -> HashMap<std::sync::Arc<str>, Vec<&Issue>> {
1765        let mut map: HashMap<std::sync::Arc<str>, Vec<&Issue>> = HashMap::new();
1766        for issue in &self.issues {
1767            map.entry(issue.location.file.clone())
1768                .or_default()
1769                .push(issue);
1770        }
1771        map
1772    }
1773
1774    /// Return the innermost resolved symbol whose span contains `byte_offset`
1775    /// in `file`, or `None` if no symbol was recorded at that position.
1776    ///
1777    /// When multiple symbols overlap (e.g. a method call whose span contains a
1778    /// property access span), the one with the smallest span is returned so the
1779    /// caller gets the most specific symbol at the cursor.
1780    ///
1781    /// Typical use: LSP `textDocument/references` and `textDocument/hover`.
1782    pub fn symbol_at(
1783        &self,
1784        file: &str,
1785        byte_offset: u32,
1786    ) -> Option<&crate::symbol::ResolvedSymbol> {
1787        let range = self.symbols_by_file.get(file)?;
1788        self.symbols[range.clone()]
1789            .iter()
1790            .filter(|s| s.span.start <= byte_offset && byte_offset < s.span.end)
1791            .min_by_key(|s| s.span.end - s.span.start)
1792    }
1793}