Skip to main content

mir_analyzer/
project.rs

1/// Project-level orchestration: file discovery, pass 1, pass 2.
2use std::path::{Path, PathBuf};
3use std::sync::Arc;
4
5use rayon::prelude::*;
6
7use std::collections::{HashMap, HashSet};
8
9use crate::cache::{hash_content, AnalysisCache};
10use mir_codebase::Codebase;
11use mir_issues::Issue;
12use mir_types::Union;
13
14use crate::collector::DefinitionCollector;
15
16// ---------------------------------------------------------------------------
17// ProjectAnalyzer
18// ---------------------------------------------------------------------------
19
20pub struct ProjectAnalyzer {
21    pub codebase: Arc<Codebase>,
22    /// Optional cache — when `Some`, Pass 2 results are read/written per file.
23    pub cache: Option<AnalysisCache>,
24    /// Called once after each file completes Pass 2 (used for progress reporting).
25    pub on_file_done: Option<Arc<dyn Fn() + Send + Sync>>,
26    /// PSR-4 autoloader mapping from composer.json, if available.
27    pub psr4: Option<Arc<crate::composer::Psr4Map>>,
28    /// Whether stubs have already been loaded (to avoid double-loading).
29    stubs_loaded: std::sync::atomic::AtomicBool,
30    /// When true, run dead code detection at the end of analysis.
31    pub find_dead_code: bool,
32}
33
34impl ProjectAnalyzer {
35    pub fn new() -> Self {
36        Self {
37            codebase: Arc::new(Codebase::new()),
38            cache: None,
39            on_file_done: None,
40            psr4: None,
41            stubs_loaded: std::sync::atomic::AtomicBool::new(false),
42            find_dead_code: false,
43        }
44    }
45
46    /// Create a `ProjectAnalyzer` with a disk-backed cache stored under `cache_dir`.
47    pub fn with_cache(cache_dir: &Path) -> Self {
48        Self {
49            codebase: Arc::new(Codebase::new()),
50            cache: Some(AnalysisCache::open(cache_dir)),
51            on_file_done: None,
52            psr4: None,
53            stubs_loaded: std::sync::atomic::AtomicBool::new(false),
54            find_dead_code: false,
55        }
56    }
57
58    /// Create a `ProjectAnalyzer` from a project root containing `composer.json`.
59    /// Returns the analyzer (with `psr4` set) and the `Psr4Map` so callers can
60    /// call `map.project_files()` / `map.vendor_files()`.
61    pub fn from_composer(
62        root: &Path,
63    ) -> Result<(Self, crate::composer::Psr4Map), crate::composer::ComposerError> {
64        let map = crate::composer::Psr4Map::from_composer(root)?;
65        let psr4 = Arc::new(map.clone());
66        let analyzer = Self {
67            codebase: Arc::new(Codebase::new()),
68            cache: None,
69            on_file_done: None,
70            psr4: Some(psr4),
71            stubs_loaded: std::sync::atomic::AtomicBool::new(false),
72            find_dead_code: false,
73        };
74        Ok((analyzer, map))
75    }
76
77    /// Expose codebase for external use (e.g., pre-loading stubs from CLI).
78    pub fn codebase(&self) -> &Arc<Codebase> {
79        &self.codebase
80    }
81
82    /// Load PHP built-in stubs. Called automatically by `analyze` if not done yet.
83    pub fn load_stubs(&self) {
84        if !self
85            .stubs_loaded
86            .swap(true, std::sync::atomic::Ordering::SeqCst)
87        {
88            crate::stubs::load_stubs(&self.codebase);
89        }
90    }
91
92    /// Run the full analysis pipeline on a set of file paths.
93    pub fn analyze(&self, paths: &[PathBuf]) -> AnalysisResult {
94        let mut all_issues = Vec::new();
95        let mut parse_errors = Vec::new();
96
97        // ---- Load PHP built-in stubs (before Pass 1 so user code can override)
98        self.load_stubs();
99
100        // ---- Pre-Pass-2 invalidation: evict dependents of changed files ------
101        // Uses the reverse dep graph persisted from the previous run.
102        if let Some(cache) = &self.cache {
103            let changed: Vec<String> = paths
104                .iter()
105                .filter_map(|p| {
106                    let path_str = p.to_string_lossy().into_owned();
107                    let content = std::fs::read_to_string(p).ok()?;
108                    let h = hash_content(&content);
109                    if cache.get(&path_str, &h).is_none() {
110                        Some(path_str)
111                    } else {
112                        None
113                    }
114                })
115                .collect();
116            if !changed.is_empty() {
117                cache.evict_with_dependents(&changed);
118            }
119        }
120
121        // ---- Pass 1: read files in parallel ----------------------------------
122        let file_data: Vec<(Arc<str>, String)> = paths
123            .par_iter()
124            .filter_map(|path| match std::fs::read_to_string(path) {
125                Ok(src) => Some((Arc::from(path.to_string_lossy().as_ref()), src)),
126                Err(e) => {
127                    eprintln!("Cannot read {}: {}", path.display(), e);
128                    None
129                }
130            })
131            .collect();
132
133        // ---- Pass 1: combined pre-index + definition collection (parallel) -----
134        // Parse each file once; both the FQCN/namespace/import index and the full
135        // definition collection run in the same rayon closure, eliminating the
136        // second sequential parse of every file. DashMap handles concurrent writes.
137        let pass1_results: Vec<(Vec<Issue>, Vec<Issue>)> = file_data
138            .par_iter()
139            .map(|(file, src)| {
140                use php_ast::ast::StmtKind;
141                let arena = bumpalo::Bump::new();
142                let result = php_rs_parser::parse(&arena, src);
143
144                // --- Pre-index: build FQCN index, file imports, and namespaces ---
145                let mut current_namespace: Option<String> = None;
146                let mut imports: std::collections::HashMap<String, String> =
147                    std::collections::HashMap::new();
148                let mut file_ns_set = false;
149
150                // Index a flat list of stmts under a given namespace prefix.
151                let index_stmts =
152                    |stmts: &[php_ast::ast::Stmt<'_, '_>],
153                     ns: Option<&str>,
154                     imports: &mut std::collections::HashMap<String, String>| {
155                        for stmt in stmts.iter() {
156                            match &stmt.kind {
157                                StmtKind::Use(use_decl) => {
158                                    for item in use_decl.uses.iter() {
159                                        let full_name = crate::parser::name_to_string(&item.name);
160                                        let alias = item.alias.unwrap_or_else(|| {
161                                            full_name.rsplit('\\').next().unwrap_or(&full_name)
162                                        });
163                                        imports.insert(alias.to_string(), full_name);
164                                    }
165                                }
166                                StmtKind::Class(decl) => {
167                                    if let Some(n) = decl.name {
168                                        let fqcn = match ns {
169                                            Some(ns) => format!("{}\\{}", ns, n),
170                                            None => n.to_string(),
171                                        };
172                                        self.codebase
173                                            .known_symbols
174                                            .insert(Arc::from(fqcn.as_str()));
175                                    }
176                                }
177                                StmtKind::Interface(decl) => {
178                                    let fqcn = match ns {
179                                        Some(ns) => format!("{}\\{}", ns, decl.name),
180                                        None => decl.name.to_string(),
181                                    };
182                                    self.codebase.known_symbols.insert(Arc::from(fqcn.as_str()));
183                                }
184                                StmtKind::Trait(decl) => {
185                                    let fqcn = match ns {
186                                        Some(ns) => format!("{}\\{}", ns, decl.name),
187                                        None => decl.name.to_string(),
188                                    };
189                                    self.codebase.known_symbols.insert(Arc::from(fqcn.as_str()));
190                                }
191                                StmtKind::Enum(decl) => {
192                                    let fqcn = match ns {
193                                        Some(ns) => format!("{}\\{}", ns, decl.name),
194                                        None => decl.name.to_string(),
195                                    };
196                                    self.codebase.known_symbols.insert(Arc::from(fqcn.as_str()));
197                                }
198                                StmtKind::Function(decl) => {
199                                    let fqn = match ns {
200                                        Some(ns) => format!("{}\\{}", ns, decl.name),
201                                        None => decl.name.to_string(),
202                                    };
203                                    self.codebase.known_symbols.insert(Arc::from(fqn.as_str()));
204                                }
205                                _ => {}
206                            }
207                        }
208                    };
209
210                for stmt in result.program.stmts.iter() {
211                    match &stmt.kind {
212                        StmtKind::Namespace(ns) => {
213                            current_namespace =
214                                ns.name.as_ref().map(|n| crate::parser::name_to_string(n));
215                            if !file_ns_set {
216                                if let Some(ref ns_str) = current_namespace {
217                                    self.codebase
218                                        .file_namespaces
219                                        .insert(file.clone(), ns_str.clone());
220                                    file_ns_set = true;
221                                }
222                            }
223                            // Bracketed namespace: walk inner stmts for Use/Class/etc.
224                            if let php_ast::ast::NamespaceBody::Braced(inner_stmts) = &ns.body {
225                                index_stmts(
226                                    inner_stmts,
227                                    current_namespace.as_deref(),
228                                    &mut imports,
229                                );
230                            }
231                        }
232                        _ => index_stmts(
233                            std::slice::from_ref(stmt),
234                            current_namespace.as_deref(),
235                            &mut imports,
236                        ),
237                    }
238                }
239
240                if !imports.is_empty() {
241                    self.codebase.file_imports.insert(file.clone(), imports);
242                }
243
244                // --- Parse errors ---
245                let file_parse_errors: Vec<Issue> = result
246                    .errors
247                    .iter()
248                    .map(|err| {
249                        Issue::new(
250                            mir_issues::IssueKind::ParseError {
251                                message: err.to_string(),
252                            },
253                            mir_issues::Location {
254                                file: file.clone(),
255                                line: 1,
256                                col_start: 0,
257                                col_end: 0,
258                            },
259                        )
260                    })
261                    .collect();
262
263                // --- Definition collection ---
264                let collector =
265                    DefinitionCollector::new(&self.codebase, file.clone(), src, &result.source_map);
266                let issues = collector.collect(&result.program);
267
268                (file_parse_errors, issues)
269            })
270            .collect();
271
272        for (file_parse_errors, issues) in pass1_results {
273            parse_errors.extend(file_parse_errors);
274            all_issues.extend(issues);
275        }
276
277        all_issues.extend(parse_errors);
278
279        // ---- Finalize codebase (resolve inheritance, build dispatch tables) --
280        self.codebase.finalize();
281
282        // ---- Lazy-load unknown classes via PSR-4 (issue #50) ----------------
283        if let Some(psr4) = &self.psr4 {
284            self.lazy_load_missing_classes(psr4.clone(), &mut all_issues);
285        }
286
287        // ---- Build reverse dep graph and persist it for the next run ---------
288        if let Some(cache) = &self.cache {
289            let rev = build_reverse_deps(&self.codebase);
290            cache.set_reverse_deps(rev);
291        }
292
293        // ---- Class-level checks (M11) ----------------------------------------
294        let analyzed_file_set: std::collections::HashSet<std::sync::Arc<str>> =
295            file_data.iter().map(|(f, _)| f.clone()).collect();
296        let class_issues =
297            crate::class::ClassAnalyzer::with_files(&self.codebase, analyzed_file_set, &file_data)
298                .analyze_all();
299        all_issues.extend(class_issues);
300
301        // ---- Pass 2: analyze function/method bodies in parallel (M14) --------
302        // Each file is analyzed independently; arena + parse happen inside the
303        // rayon closure so there is no cross-thread borrow.
304        // When a cache is present, files whose content hash matches a stored
305        // entry skip re-analysis entirely (M17).
306        let pass2_results: Vec<(Vec<Issue>, Vec<crate::symbol::ResolvedSymbol>)> = file_data
307            .par_iter()
308            .map(|(file, src)| {
309                // Cache lookup
310                let result = if let Some(cache) = &self.cache {
311                    let h = hash_content(src);
312                    if let Some((cached_issues, ref_locs)) = cache.get(file, &h) {
313                        // Hit — replay reference locations so symbol_reference_locations
314                        // is populated without re-running analyze_bodies.
315                        self.codebase
316                            .replay_reference_locations(file.clone(), &ref_locs);
317                        (cached_issues, Vec::new())
318                    } else {
319                        // Miss — analyze and store
320                        let arena = bumpalo::Bump::new();
321                        let parsed = php_rs_parser::parse(&arena, src);
322                        let (issues, symbols) = self.analyze_bodies(
323                            &parsed.program,
324                            file.clone(),
325                            src,
326                            &parsed.source_map,
327                        );
328                        let ref_locs = extract_reference_locations(&self.codebase, file);
329                        cache.put(file, h, issues.clone(), ref_locs);
330                        (issues, symbols)
331                    }
332                } else {
333                    let arena = bumpalo::Bump::new();
334                    let parsed = php_rs_parser::parse(&arena, src);
335                    self.analyze_bodies(&parsed.program, file.clone(), src, &parsed.source_map)
336                };
337                if let Some(cb) = &self.on_file_done {
338                    cb();
339                }
340                result
341            })
342            .collect();
343
344        let mut all_symbols = Vec::new();
345        for (issues, symbols) in pass2_results {
346            all_issues.extend(issues);
347            all_symbols.extend(symbols);
348        }
349
350        // Persist cache hits/misses to disk
351        if let Some(cache) = &self.cache {
352            cache.flush();
353        }
354
355        // ---- Compact the reference index ------------------------------------
356        // Convert build-phase DashMaps into a CSR structure, freeing the
357        // per-entry HashMap/HashSet overhead accumulated during Pass 2.
358        self.codebase.compact_reference_index();
359
360        // ---- Dead-code detection (M18) --------------------------------------
361        if self.find_dead_code {
362            let dead_code_issues =
363                crate::dead_code::DeadCodeAnalyzer::new(&self.codebase).analyze();
364            all_issues.extend(dead_code_issues);
365        }
366
367        AnalysisResult {
368            issues: all_issues,
369            type_envs: std::collections::HashMap::new(),
370            symbols: all_symbols,
371        }
372    }
373
374    /// Lazily load class definitions for referenced-but-unknown FQCNs via PSR-4.
375    ///
376    /// After Pass 1 and `codebase.finalize()`, some classes referenced as parents
377    /// or interfaces may not be in the codebase (they weren't in the initial file
378    /// list). This method iterates up to `max_depth` times, each time resolving
379    /// unknown parent/interface FQCNs via the PSR-4 map, running Pass 1 on those
380    /// files, and re-finalizing the codebase. The loop stops when no new files
381    /// are discovered.
382    fn lazy_load_missing_classes(
383        &self,
384        psr4: Arc<crate::composer::Psr4Map>,
385        all_issues: &mut Vec<Issue>,
386    ) {
387        use std::collections::HashSet;
388
389        let max_depth = 10; // prevent infinite chains
390        let mut loaded: HashSet<String> = HashSet::new();
391
392        for _ in 0..max_depth {
393            // Collect all referenced FQCNs that aren't in the codebase
394            let mut to_load: Vec<(String, PathBuf)> = Vec::new();
395
396            for entry in self.codebase.classes.iter() {
397                let cls = entry.value();
398
399                // Check parent class
400                if let Some(parent) = &cls.parent {
401                    let fqcn = parent.as_ref();
402                    if !self.codebase.classes.contains_key(fqcn) && !loaded.contains(fqcn) {
403                        if let Some(path) = psr4.resolve(fqcn) {
404                            to_load.push((fqcn.to_string(), path));
405                        }
406                    }
407                }
408
409                // Check interfaces
410                for iface in &cls.interfaces {
411                    let fqcn = iface.as_ref();
412                    if !self.codebase.classes.contains_key(fqcn)
413                        && !self.codebase.interfaces.contains_key(fqcn)
414                        && !loaded.contains(fqcn)
415                    {
416                        if let Some(path) = psr4.resolve(fqcn) {
417                            to_load.push((fqcn.to_string(), path));
418                        }
419                    }
420                }
421            }
422
423            if to_load.is_empty() {
424                break;
425            }
426
427            // Load each discovered file (Pass 1 only)
428            for (fqcn, path) in to_load {
429                loaded.insert(fqcn);
430                if let Ok(src) = std::fs::read_to_string(&path) {
431                    let file: Arc<str> = Arc::from(path.to_string_lossy().as_ref());
432                    let arena = bumpalo::Bump::new();
433                    let result = php_rs_parser::parse(&arena, &src);
434                    let collector = crate::collector::DefinitionCollector::new(
435                        &self.codebase,
436                        file,
437                        &src,
438                        &result.source_map,
439                    );
440                    let issues = collector.collect(&result.program);
441                    all_issues.extend(issues);
442                }
443            }
444
445            // Re-finalize to include newly loaded classes in the inheritance graph.
446            // Must reset the flag first so finalize() isn't a no-op.
447            self.codebase.invalidate_finalization();
448            self.codebase.finalize();
449        }
450    }
451
452    /// Re-analyze a single file within the existing codebase.
453    ///
454    /// This is the incremental analysis API for LSP:
455    /// 1. Removes old definitions from this file
456    /// 2. Re-runs Pass 1 (definition collection) on the new content
457    /// 3. Re-finalizes the codebase (rebuilds inheritance)
458    /// 4. Re-runs Pass 2 (body analysis) on this file
459    /// 5. Returns the analysis result for this file only
460    pub fn re_analyze_file(&self, file_path: &str, new_content: &str) -> AnalysisResult {
461        // Fast path: content unchanged and cache has a valid entry — skip full re-analysis.
462        if let Some(cache) = &self.cache {
463            let h = hash_content(new_content);
464            if let Some((issues, ref_locs)) = cache.get(file_path, &h) {
465                let file: Arc<str> = Arc::from(file_path);
466                self.codebase.replay_reference_locations(file, &ref_locs);
467                return AnalysisResult {
468                    issues,
469                    type_envs: HashMap::new(),
470                    symbols: Default::default(),
471                };
472            }
473        }
474
475        // 1. Snapshot inheritance structure before removing old definitions.
476        //    This lets us skip finalize() later if only method bodies changed.
477        let structural_snapshot = self.codebase.file_structural_snapshot(file_path);
478
479        // 2. Remove old definitions from this file
480        self.codebase.remove_file_definitions(file_path);
481
482        // 3. Parse new content and run Pass 1
483        let file: Arc<str> = Arc::from(file_path);
484        let arena = bumpalo::Bump::new();
485        let parsed = php_rs_parser::parse(&arena, new_content);
486
487        let mut all_issues = Vec::new();
488
489        // Collect parse errors
490        for err in &parsed.errors {
491            all_issues.push(Issue::new(
492                mir_issues::IssueKind::ParseError {
493                    message: err.to_string(),
494                },
495                mir_issues::Location {
496                    file: file.clone(),
497                    line: 1,
498                    col_start: 0,
499                    col_end: 0,
500                },
501            ));
502        }
503
504        let collector = DefinitionCollector::new(
505            &self.codebase,
506            file.clone(),
507            new_content,
508            &parsed.source_map,
509        );
510        all_issues.extend(collector.collect(&parsed.program));
511
512        // 4. Re-finalize, or skip if only method bodies changed.
513        //    finalize() rebuilds all_parents for every class/interface in the
514        //    codebase by walking the full inheritance graph — this is expensive.
515        //    If the inheritance structure of this file is unchanged (same parent,
516        //    interfaces, traits), restore all_parents from the snapshot and skip
517        //    the full walk.
518        if self
519            .codebase
520            .structural_unchanged_after_pass1(file_path, &structural_snapshot)
521        {
522            self.codebase
523                .restore_all_parents(file_path, &structural_snapshot);
524        } else {
525            self.codebase.finalize();
526        }
527
528        // 5. Run Pass 2 on this file
529        let (body_issues, symbols) = self.analyze_bodies(
530            &parsed.program,
531            file.clone(),
532            new_content,
533            &parsed.source_map,
534        );
535        all_issues.extend(body_issues);
536
537        // 6. Update cache if present
538        if let Some(cache) = &self.cache {
539            let h = hash_content(new_content);
540            cache.evict_with_dependents(&[file_path.to_string()]);
541            let ref_locs = extract_reference_locations(&self.codebase, &file);
542            cache.put(file_path, h, all_issues.clone(), ref_locs);
543        }
544
545        AnalysisResult {
546            issues: all_issues,
547            type_envs: HashMap::new(),
548            symbols,
549        }
550    }
551
552    /// Analyze a PHP source string without a real file path.
553    /// Useful for tests and LSP single-file mode.
554    pub fn analyze_source(source: &str) -> AnalysisResult {
555        use crate::collector::DefinitionCollector;
556        let analyzer = ProjectAnalyzer::new();
557        analyzer.load_stubs();
558        let file: Arc<str> = Arc::from("<source>");
559        let arena = bumpalo::Bump::new();
560        let result = php_rs_parser::parse(&arena, source);
561        let mut all_issues = Vec::new();
562        let collector =
563            DefinitionCollector::new(&analyzer.codebase, file.clone(), source, &result.source_map);
564        all_issues.extend(collector.collect(&result.program));
565        analyzer.codebase.finalize();
566        let mut type_envs = std::collections::HashMap::new();
567        let mut all_symbols = Vec::new();
568        all_issues.extend(analyzer.analyze_bodies_typed(
569            &result.program,
570            file.clone(),
571            source,
572            &result.source_map,
573            &mut type_envs,
574            &mut all_symbols,
575        ));
576        AnalysisResult {
577            issues: all_issues,
578            type_envs,
579            symbols: all_symbols,
580        }
581    }
582
583    /// Pass 2: walk all function/method bodies in one file, return issues, and
584    /// write inferred return types back to the codebase.
585    fn analyze_bodies<'arena, 'src>(
586        &self,
587        program: &php_ast::ast::Program<'arena, 'src>,
588        file: Arc<str>,
589        source: &str,
590        source_map: &php_rs_parser::source_map::SourceMap,
591    ) -> (Vec<mir_issues::Issue>, Vec<crate::symbol::ResolvedSymbol>) {
592        use php_ast::ast::StmtKind;
593
594        let mut all_issues = Vec::new();
595        let mut all_symbols = Vec::new();
596
597        for stmt in program.stmts.iter() {
598            match &stmt.kind {
599                StmtKind::Function(decl) => {
600                    self.analyze_fn_decl(
601                        decl,
602                        &file,
603                        source,
604                        source_map,
605                        &mut all_issues,
606                        &mut all_symbols,
607                    );
608                }
609                StmtKind::Class(decl) => {
610                    self.analyze_class_decl(
611                        decl,
612                        &file,
613                        source,
614                        source_map,
615                        &mut all_issues,
616                        &mut all_symbols,
617                    );
618                }
619                StmtKind::Enum(decl) => {
620                    self.analyze_enum_decl(decl, &file, source, source_map, &mut all_issues);
621                }
622                StmtKind::Namespace(ns) => {
623                    if let php_ast::ast::NamespaceBody::Braced(stmts) = &ns.body {
624                        for inner in stmts.iter() {
625                            match &inner.kind {
626                                StmtKind::Function(decl) => {
627                                    self.analyze_fn_decl(
628                                        decl,
629                                        &file,
630                                        source,
631                                        source_map,
632                                        &mut all_issues,
633                                        &mut all_symbols,
634                                    );
635                                }
636                                StmtKind::Class(decl) => {
637                                    self.analyze_class_decl(
638                                        decl,
639                                        &file,
640                                        source,
641                                        source_map,
642                                        &mut all_issues,
643                                        &mut all_symbols,
644                                    );
645                                }
646                                StmtKind::Enum(decl) => {
647                                    self.analyze_enum_decl(
648                                        decl,
649                                        &file,
650                                        source,
651                                        source_map,
652                                        &mut all_issues,
653                                    );
654                                }
655                                _ => {}
656                            }
657                        }
658                    }
659                }
660                _ => {}
661            }
662        }
663
664        (all_issues, all_symbols)
665    }
666
667    /// Analyze a single function declaration body and collect issues + inferred return type.
668    #[allow(clippy::too_many_arguments)]
669    fn analyze_fn_decl<'arena, 'src>(
670        &self,
671        decl: &php_ast::ast::FunctionDecl<'arena, 'src>,
672        file: &Arc<str>,
673        source: &str,
674        source_map: &php_rs_parser::source_map::SourceMap,
675        all_issues: &mut Vec<mir_issues::Issue>,
676        all_symbols: &mut Vec<crate::symbol::ResolvedSymbol>,
677    ) {
678        let fn_name = decl.name;
679        let body = &decl.body;
680        // Check parameter and return type hints for undefined classes.
681        for param in decl.params.iter() {
682            if let Some(hint) = &param.type_hint {
683                check_type_hint_classes(hint, &self.codebase, file, source, source_map, all_issues);
684            }
685        }
686        if let Some(hint) = &decl.return_type {
687            check_type_hint_classes(hint, &self.codebase, file, source, source_map, all_issues);
688        }
689        use crate::context::Context;
690        use crate::stmt::StatementsAnalyzer;
691        use mir_issues::IssueBuffer;
692
693        // Resolve function name using the file's namespace (handles namespaced functions)
694        let resolved_fn = self.codebase.resolve_class_name(file.as_ref(), fn_name);
695        let func_opt: Option<mir_codebase::storage::FunctionStorage> = self
696            .codebase
697            .functions
698            .get(resolved_fn.as_str())
699            .map(|r| r.clone())
700            .or_else(|| self.codebase.functions.get(fn_name).map(|r| r.clone()))
701            .or_else(|| {
702                self.codebase
703                    .functions
704                    .iter()
705                    .find(|e| e.short_name.as_ref() == fn_name)
706                    .map(|e| e.value().clone())
707            });
708
709        let fqn = func_opt.as_ref().map(|f| f.fqn.clone());
710        // Always use the codebase entry when its params match the AST (same count + names).
711        // This covers the common case and preserves docblock-enriched types.
712        // When names differ (two files define the same unnamespaced function), fall back to
713        // the AST params so param variables are always in scope for this file's body.
714        let (params, return_ty): (Vec<mir_codebase::FnParam>, _) = match &func_opt {
715            Some(f)
716                if f.params.len() == decl.params.len()
717                    && f.params
718                        .iter()
719                        .zip(decl.params.iter())
720                        .all(|(cp, ap)| cp.name.as_ref() == ap.name) =>
721            {
722                (f.params.clone(), f.return_type.clone())
723            }
724            _ => {
725                let ast_params = decl
726                    .params
727                    .iter()
728                    .map(|p| mir_codebase::FnParam {
729                        name: Arc::from(p.name),
730                        ty: None,
731                        default: p.default.as_ref().map(|_| mir_types::Union::mixed()),
732                        is_variadic: p.variadic,
733                        is_byref: p.by_ref,
734                        is_optional: p.default.is_some() || p.variadic,
735                    })
736                    .collect();
737                (ast_params, None)
738            }
739        };
740
741        let mut ctx = Context::for_function(&params, return_ty, None, None, None, false, true);
742        let mut buf = IssueBuffer::new();
743        let mut sa = StatementsAnalyzer::new(
744            &self.codebase,
745            file.clone(),
746            source,
747            source_map,
748            &mut buf,
749            all_symbols,
750        );
751        sa.analyze_stmts(body, &mut ctx);
752        let inferred = merge_return_types(&sa.return_types);
753        drop(sa);
754
755        emit_unused_params(&params, &ctx, "", file, all_issues);
756        emit_unused_variables(&ctx, file, all_issues);
757        all_issues.extend(buf.into_issues());
758
759        if let Some(fqn) = fqn {
760            if let Some(mut func) = self.codebase.functions.get_mut(fqn.as_ref()) {
761                func.inferred_return_type = Some(inferred);
762            }
763        }
764    }
765
766    /// Analyze all method bodies on a class declaration and collect issues + inferred return types.
767    #[allow(clippy::too_many_arguments)]
768    fn analyze_class_decl<'arena, 'src>(
769        &self,
770        decl: &php_ast::ast::ClassDecl<'arena, 'src>,
771        file: &Arc<str>,
772        source: &str,
773        source_map: &php_rs_parser::source_map::SourceMap,
774        all_issues: &mut Vec<mir_issues::Issue>,
775        all_symbols: &mut Vec<crate::symbol::ResolvedSymbol>,
776    ) {
777        use crate::context::Context;
778        use crate::stmt::StatementsAnalyzer;
779        use mir_issues::IssueBuffer;
780
781        let class_name = decl.name.unwrap_or("<anonymous>");
782        // Resolve the FQCN using the file's namespace/imports — avoids ambiguity
783        // when multiple classes share the same short name across namespaces.
784        let resolved = self.codebase.resolve_class_name(file.as_ref(), class_name);
785        let fqcn: &str = &resolved;
786        let parent_fqcn = self
787            .codebase
788            .classes
789            .get(fqcn)
790            .and_then(|c| c.parent.clone());
791
792        for member in decl.members.iter() {
793            let php_ast::ast::ClassMemberKind::Method(method) = &member.kind else {
794                continue;
795            };
796
797            // Check parameter and return type hints for undefined classes (even abstract methods).
798            for param in method.params.iter() {
799                if let Some(hint) = &param.type_hint {
800                    check_type_hint_classes(
801                        hint,
802                        &self.codebase,
803                        file,
804                        source,
805                        source_map,
806                        all_issues,
807                    );
808                }
809            }
810            if let Some(hint) = &method.return_type {
811                check_type_hint_classes(hint, &self.codebase, file, source, source_map, all_issues);
812            }
813
814            let Some(body) = &method.body else { continue };
815
816            let (params, return_ty) = self
817                .codebase
818                .get_method(fqcn, method.name)
819                .as_deref()
820                .map(|m| (m.params.clone(), m.return_type.clone()))
821                .unwrap_or_default();
822
823            let is_ctor = method.name == "__construct";
824            let mut ctx = Context::for_method(
825                &params,
826                return_ty,
827                Some(Arc::from(fqcn)),
828                parent_fqcn.clone(),
829                Some(Arc::from(fqcn)),
830                false,
831                is_ctor,
832                method.is_static,
833            );
834
835            let mut buf = IssueBuffer::new();
836            let mut sa = StatementsAnalyzer::new(
837                &self.codebase,
838                file.clone(),
839                source,
840                source_map,
841                &mut buf,
842                all_symbols,
843            );
844            sa.analyze_stmts(body, &mut ctx);
845            let inferred = merge_return_types(&sa.return_types);
846            drop(sa);
847
848            emit_unused_params(&params, &ctx, method.name, file, all_issues);
849            emit_unused_variables(&ctx, file, all_issues);
850            all_issues.extend(buf.into_issues());
851
852            if let Some(mut cls) = self.codebase.classes.get_mut(fqcn) {
853                if let Some(m) = cls.own_methods.get_mut(method.name) {
854                    Arc::make_mut(m).inferred_return_type = Some(inferred);
855                }
856            }
857        }
858    }
859
860    /// Like `analyze_bodies` but also populates `type_envs` with per-scope type environments.
861    #[allow(clippy::too_many_arguments)]
862    fn analyze_bodies_typed<'arena, 'src>(
863        &self,
864        program: &php_ast::ast::Program<'arena, 'src>,
865        file: Arc<str>,
866        source: &str,
867        source_map: &php_rs_parser::source_map::SourceMap,
868        type_envs: &mut std::collections::HashMap<
869            crate::type_env::ScopeId,
870            crate::type_env::TypeEnv,
871        >,
872        all_symbols: &mut Vec<crate::symbol::ResolvedSymbol>,
873    ) -> Vec<mir_issues::Issue> {
874        use php_ast::ast::StmtKind;
875        let mut all_issues = Vec::new();
876        for stmt in program.stmts.iter() {
877            match &stmt.kind {
878                StmtKind::Function(decl) => {
879                    self.analyze_fn_decl_typed(
880                        decl,
881                        &file,
882                        source,
883                        source_map,
884                        &mut all_issues,
885                        type_envs,
886                        all_symbols,
887                    );
888                }
889                StmtKind::Class(decl) => {
890                    self.analyze_class_decl_typed(
891                        decl,
892                        &file,
893                        source,
894                        source_map,
895                        &mut all_issues,
896                        type_envs,
897                        all_symbols,
898                    );
899                }
900                StmtKind::Enum(decl) => {
901                    self.analyze_enum_decl(decl, &file, source, source_map, &mut all_issues);
902                }
903                StmtKind::Namespace(ns) => {
904                    if let php_ast::ast::NamespaceBody::Braced(stmts) = &ns.body {
905                        for inner in stmts.iter() {
906                            match &inner.kind {
907                                StmtKind::Function(decl) => {
908                                    self.analyze_fn_decl_typed(
909                                        decl,
910                                        &file,
911                                        source,
912                                        source_map,
913                                        &mut all_issues,
914                                        type_envs,
915                                        all_symbols,
916                                    );
917                                }
918                                StmtKind::Class(decl) => {
919                                    self.analyze_class_decl_typed(
920                                        decl,
921                                        &file,
922                                        source,
923                                        source_map,
924                                        &mut all_issues,
925                                        type_envs,
926                                        all_symbols,
927                                    );
928                                }
929                                StmtKind::Enum(decl) => {
930                                    self.analyze_enum_decl(
931                                        decl,
932                                        &file,
933                                        source,
934                                        source_map,
935                                        &mut all_issues,
936                                    );
937                                }
938                                _ => {}
939                            }
940                        }
941                    }
942                }
943                _ => {}
944            }
945        }
946        all_issues
947    }
948
949    /// Like `analyze_fn_decl` but also captures a `TypeEnv` for the function scope.
950    #[allow(clippy::too_many_arguments)]
951    fn analyze_fn_decl_typed<'arena, 'src>(
952        &self,
953        decl: &php_ast::ast::FunctionDecl<'arena, 'src>,
954        file: &Arc<str>,
955        source: &str,
956        source_map: &php_rs_parser::source_map::SourceMap,
957        all_issues: &mut Vec<mir_issues::Issue>,
958        type_envs: &mut std::collections::HashMap<
959            crate::type_env::ScopeId,
960            crate::type_env::TypeEnv,
961        >,
962        all_symbols: &mut Vec<crate::symbol::ResolvedSymbol>,
963    ) {
964        use crate::context::Context;
965        use crate::stmt::StatementsAnalyzer;
966        use mir_issues::IssueBuffer;
967
968        let fn_name = decl.name;
969        let body = &decl.body;
970
971        for param in decl.params.iter() {
972            if let Some(hint) = &param.type_hint {
973                check_type_hint_classes(hint, &self.codebase, file, source, source_map, all_issues);
974            }
975        }
976        if let Some(hint) = &decl.return_type {
977            check_type_hint_classes(hint, &self.codebase, file, source, source_map, all_issues);
978        }
979
980        let resolved_fn = self.codebase.resolve_class_name(file.as_ref(), fn_name);
981        let func_opt: Option<mir_codebase::storage::FunctionStorage> = self
982            .codebase
983            .functions
984            .get(resolved_fn.as_str())
985            .map(|r| r.clone())
986            .or_else(|| self.codebase.functions.get(fn_name).map(|r| r.clone()))
987            .or_else(|| {
988                self.codebase
989                    .functions
990                    .iter()
991                    .find(|e| e.short_name.as_ref() == fn_name)
992                    .map(|e| e.value().clone())
993            });
994
995        let fqn = func_opt.as_ref().map(|f| f.fqn.clone());
996        let (params, return_ty): (Vec<mir_codebase::FnParam>, _) = match &func_opt {
997            Some(f)
998                if f.params.len() == decl.params.len()
999                    && f.params
1000                        .iter()
1001                        .zip(decl.params.iter())
1002                        .all(|(cp, ap)| cp.name.as_ref() == ap.name) =>
1003            {
1004                (f.params.clone(), f.return_type.clone())
1005            }
1006            _ => {
1007                let ast_params = decl
1008                    .params
1009                    .iter()
1010                    .map(|p| mir_codebase::FnParam {
1011                        name: Arc::from(p.name),
1012                        ty: None,
1013                        default: p.default.as_ref().map(|_| mir_types::Union::mixed()),
1014                        is_variadic: p.variadic,
1015                        is_byref: p.by_ref,
1016                        is_optional: p.default.is_some() || p.variadic,
1017                    })
1018                    .collect();
1019                (ast_params, None)
1020            }
1021        };
1022
1023        let mut ctx = Context::for_function(&params, return_ty, None, None, None, false, true);
1024        let mut buf = IssueBuffer::new();
1025        let mut sa = StatementsAnalyzer::new(
1026            &self.codebase,
1027            file.clone(),
1028            source,
1029            source_map,
1030            &mut buf,
1031            all_symbols,
1032        );
1033        sa.analyze_stmts(body, &mut ctx);
1034        let inferred = merge_return_types(&sa.return_types);
1035        drop(sa);
1036
1037        // Capture TypeEnv for this scope
1038        let scope_name = fqn.clone().unwrap_or_else(|| Arc::from(fn_name));
1039        type_envs.insert(
1040            crate::type_env::ScopeId::Function {
1041                file: file.clone(),
1042                name: scope_name,
1043            },
1044            crate::type_env::TypeEnv::new(ctx.vars.clone()),
1045        );
1046
1047        emit_unused_params(&params, &ctx, "", file, all_issues);
1048        emit_unused_variables(&ctx, file, all_issues);
1049        all_issues.extend(buf.into_issues());
1050
1051        if let Some(fqn) = fqn {
1052            if let Some(mut func) = self.codebase.functions.get_mut(fqn.as_ref()) {
1053                func.inferred_return_type = Some(inferred);
1054            }
1055        }
1056    }
1057
1058    /// Like `analyze_class_decl` but also captures a `TypeEnv` per method scope.
1059    #[allow(clippy::too_many_arguments)]
1060    fn analyze_class_decl_typed<'arena, 'src>(
1061        &self,
1062        decl: &php_ast::ast::ClassDecl<'arena, 'src>,
1063        file: &Arc<str>,
1064        source: &str,
1065        source_map: &php_rs_parser::source_map::SourceMap,
1066        all_issues: &mut Vec<mir_issues::Issue>,
1067        type_envs: &mut std::collections::HashMap<
1068            crate::type_env::ScopeId,
1069            crate::type_env::TypeEnv,
1070        >,
1071        all_symbols: &mut Vec<crate::symbol::ResolvedSymbol>,
1072    ) {
1073        use crate::context::Context;
1074        use crate::stmt::StatementsAnalyzer;
1075        use mir_issues::IssueBuffer;
1076
1077        let class_name = decl.name.unwrap_or("<anonymous>");
1078        let resolved = self.codebase.resolve_class_name(file.as_ref(), class_name);
1079        let fqcn: &str = &resolved;
1080        let parent_fqcn = self
1081            .codebase
1082            .classes
1083            .get(fqcn)
1084            .and_then(|c| c.parent.clone());
1085
1086        for member in decl.members.iter() {
1087            let php_ast::ast::ClassMemberKind::Method(method) = &member.kind else {
1088                continue;
1089            };
1090
1091            for param in method.params.iter() {
1092                if let Some(hint) = &param.type_hint {
1093                    check_type_hint_classes(
1094                        hint,
1095                        &self.codebase,
1096                        file,
1097                        source,
1098                        source_map,
1099                        all_issues,
1100                    );
1101                }
1102            }
1103            if let Some(hint) = &method.return_type {
1104                check_type_hint_classes(hint, &self.codebase, file, source, source_map, all_issues);
1105            }
1106
1107            let Some(body) = &method.body else { continue };
1108
1109            let (params, return_ty) = self
1110                .codebase
1111                .get_method(fqcn, method.name)
1112                .as_deref()
1113                .map(|m| (m.params.clone(), m.return_type.clone()))
1114                .unwrap_or_default();
1115
1116            let is_ctor = method.name == "__construct";
1117            let mut ctx = Context::for_method(
1118                &params,
1119                return_ty,
1120                Some(Arc::from(fqcn)),
1121                parent_fqcn.clone(),
1122                Some(Arc::from(fqcn)),
1123                false,
1124                is_ctor,
1125                method.is_static,
1126            );
1127
1128            let mut buf = IssueBuffer::new();
1129            let mut sa = StatementsAnalyzer::new(
1130                &self.codebase,
1131                file.clone(),
1132                source,
1133                source_map,
1134                &mut buf,
1135                all_symbols,
1136            );
1137            sa.analyze_stmts(body, &mut ctx);
1138            let inferred = merge_return_types(&sa.return_types);
1139            drop(sa);
1140
1141            // Capture TypeEnv for this method scope
1142            type_envs.insert(
1143                crate::type_env::ScopeId::Method {
1144                    class: Arc::from(fqcn),
1145                    method: Arc::from(method.name),
1146                },
1147                crate::type_env::TypeEnv::new(ctx.vars.clone()),
1148            );
1149
1150            emit_unused_params(&params, &ctx, method.name, file, all_issues);
1151            emit_unused_variables(&ctx, file, all_issues);
1152            all_issues.extend(buf.into_issues());
1153
1154            if let Some(mut cls) = self.codebase.classes.get_mut(fqcn) {
1155                if let Some(m) = cls.own_methods.get_mut(method.name) {
1156                    Arc::make_mut(m).inferred_return_type = Some(inferred);
1157                }
1158            }
1159        }
1160    }
1161
1162    /// Discover all `.php` files under a directory, recursively.
1163    pub fn discover_files(root: &Path) -> Vec<PathBuf> {
1164        if root.is_file() {
1165            return vec![root.to_path_buf()];
1166        }
1167        let mut files = Vec::new();
1168        collect_php_files(root, &mut files);
1169        files
1170    }
1171
1172    /// Pass 1 only: collect type definitions from `paths` into the codebase without
1173    /// analyzing method bodies or emitting issues. Used to load vendor types.
1174    pub fn collect_types_only(&self, paths: &[PathBuf]) {
1175        paths.par_iter().for_each(|path| {
1176            let Ok(src) = std::fs::read_to_string(path) else {
1177                return;
1178            };
1179            let file: Arc<str> = Arc::from(path.to_string_lossy().as_ref());
1180            let arena = bumpalo::Bump::new();
1181            let result = php_rs_parser::parse(&arena, &src);
1182            let collector =
1183                DefinitionCollector::new(&self.codebase, file, &src, &result.source_map);
1184            // Ignore any issues emitted during vendor collection
1185            let _ = collector.collect(&result.program);
1186        });
1187    }
1188
1189    /// Check type hints in enum methods for undefined classes.
1190    #[allow(clippy::too_many_arguments)]
1191    fn analyze_enum_decl<'arena, 'src>(
1192        &self,
1193        decl: &php_ast::ast::EnumDecl<'arena, 'src>,
1194        file: &Arc<str>,
1195        source: &str,
1196        source_map: &php_rs_parser::source_map::SourceMap,
1197        all_issues: &mut Vec<mir_issues::Issue>,
1198    ) {
1199        use php_ast::ast::EnumMemberKind;
1200        for member in decl.members.iter() {
1201            let EnumMemberKind::Method(method) = &member.kind else {
1202                continue;
1203            };
1204            for param in method.params.iter() {
1205                if let Some(hint) = &param.type_hint {
1206                    check_type_hint_classes(
1207                        hint,
1208                        &self.codebase,
1209                        file,
1210                        source,
1211                        source_map,
1212                        all_issues,
1213                    );
1214                }
1215            }
1216            if let Some(hint) = &method.return_type {
1217                check_type_hint_classes(hint, &self.codebase, file, source, source_map, all_issues);
1218            }
1219        }
1220    }
1221}
1222
1223impl Default for ProjectAnalyzer {
1224    fn default() -> Self {
1225        Self::new()
1226    }
1227}
1228
1229// ---------------------------------------------------------------------------
1230// Offset to char-count column conversion
1231// ---------------------------------------------------------------------------
1232
1233/// Convert a byte offset to a Unicode char-count column on a given line.
1234/// Returns (line, col) where col is a 0-based Unicode code-point count.
1235fn offset_to_line_col(
1236    source: &str,
1237    offset: u32,
1238    source_map: &php_rs_parser::source_map::SourceMap,
1239) -> (u32, u16) {
1240    let lc = source_map.offset_to_line_col(offset);
1241    let line = lc.line + 1;
1242
1243    let byte_offset = offset as usize;
1244    let line_start_byte = if byte_offset == 0 {
1245        0
1246    } else {
1247        source[..byte_offset]
1248            .rfind('\n')
1249            .map(|p| p + 1)
1250            .unwrap_or(0)
1251    };
1252
1253    let col = source[line_start_byte..byte_offset].chars().count() as u16;
1254
1255    (line, col)
1256}
1257
1258// ---------------------------------------------------------------------------
1259// Type-hint class existence checker
1260// ---------------------------------------------------------------------------
1261
1262/// Walk a `TypeHint` AST node and emit `UndefinedClass` for any named class
1263/// that does not exist in the codebase.  Skips PHP built-in type keywords.
1264fn check_type_hint_classes<'arena, 'src>(
1265    hint: &php_ast::ast::TypeHint<'arena, 'src>,
1266    codebase: &Codebase,
1267    file: &Arc<str>,
1268    source: &str,
1269    source_map: &php_rs_parser::source_map::SourceMap,
1270    issues: &mut Vec<mir_issues::Issue>,
1271) {
1272    use php_ast::ast::TypeHintKind;
1273    match &hint.kind {
1274        TypeHintKind::Named(name) => {
1275            let name_str = crate::parser::name_to_string(name);
1276            // Skip built-in pseudo-types that are not real classes.
1277            if is_pseudo_type(&name_str) {
1278                return;
1279            }
1280            let resolved = codebase.resolve_class_name(file.as_ref(), &name_str);
1281            if !codebase.type_exists(&resolved) {
1282                let (line, col_start) = offset_to_line_col(source, hint.span.start, source_map);
1283                let col_end = if hint.span.start < hint.span.end {
1284                    let (_end_line, end_col) =
1285                        offset_to_line_col(source, hint.span.end, source_map);
1286                    end_col
1287                } else {
1288                    col_start
1289                };
1290                issues.push(
1291                    mir_issues::Issue::new(
1292                        mir_issues::IssueKind::UndefinedClass { name: resolved },
1293                        mir_issues::Location {
1294                            file: file.clone(),
1295                            line,
1296                            col_start,
1297                            col_end: col_end.max(col_start + 1),
1298                        },
1299                    )
1300                    .with_snippet(crate::parser::span_text(source, hint.span).unwrap_or_default()),
1301                );
1302            }
1303        }
1304        TypeHintKind::Nullable(inner) => {
1305            check_type_hint_classes(inner, codebase, file, source, source_map, issues);
1306        }
1307        TypeHintKind::Union(parts) | TypeHintKind::Intersection(parts) => {
1308            for part in parts.iter() {
1309                check_type_hint_classes(part, codebase, file, source, source_map, issues);
1310            }
1311        }
1312        TypeHintKind::Keyword(_, _) => {} // built-in keyword, always valid
1313    }
1314}
1315
1316/// Returns true for names that are PHP pseudo-types / special identifiers, not
1317/// real classes.
1318fn is_pseudo_type(name: &str) -> bool {
1319    matches!(
1320        name.to_lowercase().as_str(),
1321        "self"
1322            | "static"
1323            | "parent"
1324            | "null"
1325            | "true"
1326            | "false"
1327            | "never"
1328            | "void"
1329            | "mixed"
1330            | "object"
1331            | "callable"
1332            | "iterable"
1333    )
1334}
1335
1336/// Magic methods whose parameters are passed by the PHP runtime, not user call sites.
1337const MAGIC_METHODS_WITH_RUNTIME_PARAMS: &[&str] = &[
1338    "__get",
1339    "__set",
1340    "__call",
1341    "__callStatic",
1342    "__isset",
1343    "__unset",
1344];
1345
1346/// Emit `UnusedParam` issues for params that were never read in `ctx`.
1347/// Skips magic methods whose parameters are passed by the PHP runtime.
1348fn emit_unused_params(
1349    params: &[mir_codebase::FnParam],
1350    ctx: &crate::context::Context,
1351    method_name: &str,
1352    file: &Arc<str>,
1353    issues: &mut Vec<mir_issues::Issue>,
1354) {
1355    if MAGIC_METHODS_WITH_RUNTIME_PARAMS.contains(&method_name) {
1356        return;
1357    }
1358    for p in params {
1359        let name = p.name.as_ref().trim_start_matches('$');
1360        if !ctx.read_vars.contains(name) {
1361            issues.push(
1362                mir_issues::Issue::new(
1363                    mir_issues::IssueKind::UnusedParam {
1364                        name: name.to_string(),
1365                    },
1366                    mir_issues::Location {
1367                        file: file.clone(),
1368                        line: 1,
1369                        col_start: 0,
1370                        col_end: 0,
1371                    },
1372                )
1373                .with_snippet(format!("${}", name)),
1374            );
1375        }
1376    }
1377}
1378
1379fn emit_unused_variables(
1380    ctx: &crate::context::Context,
1381    file: &Arc<str>,
1382    issues: &mut Vec<mir_issues::Issue>,
1383) {
1384    // Superglobals are always "used" — skip them
1385    const SUPERGLOBALS: &[&str] = &[
1386        "_SERVER", "_GET", "_POST", "_REQUEST", "_SESSION", "_COOKIE", "_FILES", "_ENV", "GLOBALS",
1387    ];
1388    for name in &ctx.assigned_vars {
1389        if ctx.param_names.contains(name) {
1390            continue;
1391        }
1392        if SUPERGLOBALS.contains(&name.as_str()) {
1393            continue;
1394        }
1395        // $this is implicitly used whenever the method accesses properties or
1396        // calls other methods — never report it as unused.
1397        if name == "this" {
1398            continue;
1399        }
1400        if name.starts_with('_') {
1401            continue;
1402        }
1403        if !ctx.read_vars.contains(name) {
1404            issues.push(mir_issues::Issue::new(
1405                mir_issues::IssueKind::UnusedVariable { name: name.clone() },
1406                mir_issues::Location {
1407                    file: file.clone(),
1408                    line: 1,
1409                    col_start: 0,
1410                    col_end: 0,
1411                },
1412            ));
1413        }
1414    }
1415}
1416
1417/// Merge a list of return types into a single `Union`.
1418/// Returns `void` if the list is empty.
1419pub fn merge_return_types(return_types: &[Union]) -> Union {
1420    if return_types.is_empty() {
1421        return Union::single(mir_types::Atomic::TVoid);
1422    }
1423    return_types
1424        .iter()
1425        .fold(Union::empty(), |acc, t| Union::merge(&acc, t))
1426}
1427
1428pub(crate) fn collect_php_files(dir: &Path, out: &mut Vec<PathBuf>) {
1429    if let Ok(entries) = std::fs::read_dir(dir) {
1430        for entry in entries.flatten() {
1431            // Skip symlinks — they can form cycles (e.g. .pnpm-store)
1432            if entry.file_type().map(|ft| ft.is_symlink()).unwrap_or(false) {
1433                continue;
1434            }
1435            let path = entry.path();
1436            if path.is_dir() {
1437                let name = path.file_name().and_then(|n| n.to_str()).unwrap_or("");
1438                if matches!(
1439                    name,
1440                    "vendor" | ".git" | "node_modules" | ".cache" | ".pnpm-store"
1441                ) {
1442                    continue;
1443                }
1444                collect_php_files(&path, out);
1445            } else if path.extension().and_then(|e| e.to_str()) == Some("php") {
1446                out.push(path);
1447            }
1448        }
1449    }
1450}
1451
1452// ---------------------------------------------------------------------------
1453// AnalysisResult
1454// ---------------------------------------------------------------------------
1455
1456// ---------------------------------------------------------------------------
1457// build_reverse_deps
1458// ---------------------------------------------------------------------------
1459
1460/// Build a reverse dependency graph from the codebase after Pass 1.
1461///
1462/// Returns a map: `defining_file → {files that depend on it}`.
1463///
1464/// Dependency edges captured (all derivable from Pass 1 data):
1465/// - `use` imports  (`file_imports`)
1466/// - `extends` / `implements` / trait `use` from `ClassStorage`
1467fn build_reverse_deps(codebase: &Codebase) -> HashMap<String, HashSet<String>> {
1468    let mut reverse: HashMap<String, HashSet<String>> = HashMap::new();
1469
1470    // Helper: record edge "defining_file → dependent_file"
1471    let mut add_edge = |symbol: &str, dependent_file: &str| {
1472        if let Some(defining_file) = codebase.symbol_to_file.get(symbol) {
1473            let def = defining_file.as_ref().to_string();
1474            if def != dependent_file {
1475                reverse
1476                    .entry(def)
1477                    .or_default()
1478                    .insert(dependent_file.to_string());
1479            }
1480        }
1481    };
1482
1483    // use-import edges
1484    for entry in codebase.file_imports.iter() {
1485        let file = entry.key().as_ref().to_string();
1486        for fqcn in entry.value().values() {
1487            add_edge(fqcn, &file);
1488        }
1489    }
1490
1491    // extends / implements / trait edges from ClassStorage
1492    for entry in codebase.classes.iter() {
1493        let defining = {
1494            let fqcn = entry.key().as_ref();
1495            codebase
1496                .symbol_to_file
1497                .get(fqcn)
1498                .map(|f| f.as_ref().to_string())
1499        };
1500        let Some(file) = defining else { continue };
1501
1502        let cls = entry.value();
1503        if let Some(ref parent) = cls.parent {
1504            add_edge(parent.as_ref(), &file);
1505        }
1506        for iface in &cls.interfaces {
1507            add_edge(iface.as_ref(), &file);
1508        }
1509        for tr in &cls.traits {
1510            add_edge(tr.as_ref(), &file);
1511        }
1512    }
1513
1514    reverse
1515}
1516
1517// ---------------------------------------------------------------------------
1518
1519/// Extract the reference locations recorded for `file` from the codebase into
1520/// a flat `Vec<(symbol_key, start, end)>` suitable for caching.
1521fn extract_reference_locations(codebase: &Codebase, file: &Arc<str>) -> Vec<(String, u32, u32)> {
1522    codebase
1523        .extract_file_reference_locations(file.as_ref())
1524        .into_iter()
1525        .map(|(sym, start, end)| (sym.to_string(), start, end))
1526        .collect()
1527}
1528
1529// ---------------------------------------------------------------------------
1530
1531pub struct AnalysisResult {
1532    pub issues: Vec<Issue>,
1533    pub type_envs: std::collections::HashMap<crate::type_env::ScopeId, crate::type_env::TypeEnv>,
1534    /// Per-expression resolved symbols from Pass 2.
1535    pub symbols: Vec<crate::symbol::ResolvedSymbol>,
1536}
1537
1538impl AnalysisResult {
1539    pub fn error_count(&self) -> usize {
1540        self.issues
1541            .iter()
1542            .filter(|i| i.severity == mir_issues::Severity::Error)
1543            .count()
1544    }
1545
1546    pub fn warning_count(&self) -> usize {
1547        self.issues
1548            .iter()
1549            .filter(|i| i.severity == mir_issues::Severity::Warning)
1550            .count()
1551    }
1552
1553    /// Group issues by source file.
1554    ///
1555    /// Returns a map from absolute file path to the slice of issues that belong
1556    /// to that file. Useful for LSP `textDocument/publishDiagnostics`, which
1557    /// pushes diagnostics per document.
1558    pub fn issues_by_file(&self) -> HashMap<std::sync::Arc<str>, Vec<&Issue>> {
1559        let mut map: HashMap<std::sync::Arc<str>, Vec<&Issue>> = HashMap::new();
1560        for issue in &self.issues {
1561            map.entry(issue.location.file.clone())
1562                .or_default()
1563                .push(issue);
1564        }
1565        map
1566    }
1567
1568    /// Return the innermost resolved symbol whose span contains `byte_offset`
1569    /// in `file`, or `None` if no symbol was recorded at that position.
1570    ///
1571    /// When multiple symbols overlap (e.g. a method call whose span contains a
1572    /// property access span), the one with the smallest span is returned so the
1573    /// caller gets the most specific symbol at the cursor.
1574    ///
1575    /// Typical use: LSP `textDocument/references` and `textDocument/hover`.
1576    pub fn symbol_at(
1577        &self,
1578        file: &str,
1579        byte_offset: u32,
1580    ) -> Option<&crate::symbol::ResolvedSymbol> {
1581        self.symbols
1582            .iter()
1583            .filter(|s| {
1584                s.file.as_ref() == file && s.span.start <= byte_offset && byte_offset < s.span.end
1585            })
1586            .min_by_key(|s| s.span.end - s.span.start)
1587    }
1588}