Skip to main content

mir_analyzer/
project.rs

1/// Project-level orchestration: file discovery, pass 1, pass 2.
2use std::path::{Path, PathBuf};
3use std::sync::Arc;
4
5use rayon::prelude::*;
6
7use std::collections::{HashMap, HashSet};
8
9use crate::cache::{hash_content, AnalysisCache};
10use mir_codebase::Codebase;
11use mir_issues::Issue;
12use mir_types::Union;
13
14use crate::collector::DefinitionCollector;
15
16// ---------------------------------------------------------------------------
17// ProjectAnalyzer
18// ---------------------------------------------------------------------------
19
20pub struct ProjectAnalyzer {
21    pub codebase: Arc<Codebase>,
22    /// Optional cache — when `Some`, Pass 2 results are read/written per file.
23    pub cache: Option<AnalysisCache>,
24    /// Called once after each file completes Pass 2 (used for progress reporting).
25    pub on_file_done: Option<Arc<dyn Fn() + Send + Sync>>,
26    /// PSR-4 autoloader mapping from composer.json, if available.
27    pub psr4: Option<Arc<crate::composer::Psr4Map>>,
28    /// Whether stubs have already been loaded (to avoid double-loading).
29    stubs_loaded: std::sync::atomic::AtomicBool,
30    /// When true, run dead code detection at the end of analysis.
31    pub find_dead_code: bool,
32}
33
34impl ProjectAnalyzer {
35    pub fn new() -> Self {
36        Self {
37            codebase: Arc::new(Codebase::new()),
38            cache: None,
39            on_file_done: None,
40            psr4: None,
41            stubs_loaded: std::sync::atomic::AtomicBool::new(false),
42            find_dead_code: false,
43        }
44    }
45
46    /// Create a `ProjectAnalyzer` with a disk-backed cache stored under `cache_dir`.
47    pub fn with_cache(cache_dir: &Path) -> Self {
48        Self {
49            codebase: Arc::new(Codebase::new()),
50            cache: Some(AnalysisCache::open(cache_dir)),
51            on_file_done: None,
52            psr4: None,
53            stubs_loaded: std::sync::atomic::AtomicBool::new(false),
54            find_dead_code: false,
55        }
56    }
57
58    /// Create a `ProjectAnalyzer` from a project root containing `composer.json`.
59    /// Returns the analyzer (with `psr4` set) and the `Psr4Map` so callers can
60    /// call `map.project_files()` / `map.vendor_files()`.
61    pub fn from_composer(
62        root: &Path,
63    ) -> Result<(Self, crate::composer::Psr4Map), crate::composer::ComposerError> {
64        let map = crate::composer::Psr4Map::from_composer(root)?;
65        let psr4 = Arc::new(map.clone());
66        let analyzer = Self {
67            codebase: Arc::new(Codebase::new()),
68            cache: None,
69            on_file_done: None,
70            psr4: Some(psr4),
71            stubs_loaded: std::sync::atomic::AtomicBool::new(false),
72            find_dead_code: false,
73        };
74        Ok((analyzer, map))
75    }
76
77    /// Expose codebase for external use (e.g., pre-loading stubs from CLI).
78    pub fn codebase(&self) -> &Arc<Codebase> {
79        &self.codebase
80    }
81
82    /// Load PHP built-in stubs. Called automatically by `analyze` if not done yet.
83    pub fn load_stubs(&self) {
84        if !self
85            .stubs_loaded
86            .swap(true, std::sync::atomic::Ordering::SeqCst)
87        {
88            crate::stubs::load_stubs(&self.codebase);
89        }
90    }
91
92    /// Run the full analysis pipeline on a set of file paths.
93    pub fn analyze(&self, paths: &[PathBuf]) -> AnalysisResult {
94        let mut all_issues = Vec::new();
95        let mut parse_errors = Vec::new();
96
97        // ---- Load PHP built-in stubs (before Pass 1 so user code can override)
98        self.load_stubs();
99
100        // ---- Pre-Pass-2 invalidation: evict dependents of changed files ------
101        // Uses the reverse dep graph persisted from the previous run.
102        if let Some(cache) = &self.cache {
103            let changed: Vec<String> = paths
104                .iter()
105                .filter_map(|p| {
106                    let path_str = p.to_string_lossy().into_owned();
107                    let content = std::fs::read_to_string(p).ok()?;
108                    let h = hash_content(&content);
109                    if cache.get(&path_str, &h).is_none() {
110                        Some(path_str)
111                    } else {
112                        None
113                    }
114                })
115                .collect();
116            if !changed.is_empty() {
117                cache.evict_with_dependents(&changed);
118            }
119        }
120
121        // ---- Pass 1: read files in parallel ----------------------------------
122        let file_data: Vec<(Arc<str>, String)> = paths
123            .par_iter()
124            .filter_map(|path| match std::fs::read_to_string(path) {
125                Ok(src) => Some((Arc::from(path.to_string_lossy().as_ref()), src)),
126                Err(e) => {
127                    eprintln!("Cannot read {}: {}", path.display(), e);
128                    None
129                }
130            })
131            .collect();
132
133        // ---- Pre-index pass: use SymbolTable to build FQCN index & file imports ---
134        // SymbolTable is lightweight (no type inference) so we run it in parallel.
135        file_data.par_iter().for_each(|(file, src)| {
136            let arena = bumpalo::Bump::new();
137            let result = php_rs_parser::parse(&arena, src);
138            let table = php_ast::symbol_table::SymbolTable::build(&result.program);
139
140            // Populate known_symbols with all top-level FQCNs
141            for sym in table.symbols() {
142                if sym.parent.is_none() {
143                    self.codebase
144                        .known_symbols
145                        .insert(Arc::from(sym.fqn.as_str()));
146                }
147            }
148
149            // Populate file_imports from SymbolTable imports
150            let mut imports = std::collections::HashMap::new();
151            for imp in table.imports() {
152                imports.insert(imp.local_name().to_string(), imp.name.to_string());
153            }
154            if !imports.is_empty() {
155                self.codebase.file_imports.insert(file.clone(), imports);
156            }
157
158            // Populate file_namespaces from top-level symbol FQNs
159            // (infer namespace from the first namespaced symbol)
160            for sym in table.symbols() {
161                if sym.parent.is_none() {
162                    if let Some(pos) = sym.fqn.rfind('\\') {
163                        let ns = &sym.fqn[..pos];
164                        self.codebase
165                            .file_namespaces
166                            .insert(file.clone(), ns.to_string());
167                        break;
168                    }
169                }
170            }
171        });
172
173        // ---- Pass 1: definition collection (sequential) -------------------------
174        // DashMap handles concurrent writes, but sequential avoids contention.
175        for (file, src) in &file_data {
176            let arena = bumpalo::Bump::new();
177            let result = php_rs_parser::parse(&arena, src);
178
179            for err in &result.errors {
180                let msg: String = err.to_string();
181                parse_errors.push(Issue::new(
182                    mir_issues::IssueKind::ParseError { message: msg },
183                    mir_issues::Location {
184                        file: file.clone(),
185                        line: 1,
186                        col_start: 0,
187                        col_end: 0,
188                    },
189                ));
190            }
191
192            let collector =
193                DefinitionCollector::new(&self.codebase, file.clone(), src, &result.source_map);
194            let issues = collector.collect(&result.program);
195            all_issues.extend(issues);
196        }
197
198        all_issues.extend(parse_errors);
199
200        // ---- Finalize codebase (resolve inheritance, build dispatch tables) --
201        self.codebase.finalize();
202
203        // ---- Lazy-load unknown classes via PSR-4 (issue #50) ----------------
204        if let Some(psr4) = &self.psr4 {
205            self.lazy_load_missing_classes(psr4.clone(), &mut all_issues);
206        }
207
208        // ---- Build reverse dep graph and persist it for the next run ---------
209        if let Some(cache) = &self.cache {
210            let rev = build_reverse_deps(&self.codebase);
211            cache.set_reverse_deps(rev);
212        }
213
214        // ---- Class-level checks (M11) ----------------------------------------
215        let analyzed_file_set: std::collections::HashSet<std::sync::Arc<str>> =
216            file_data.iter().map(|(f, _)| f.clone()).collect();
217        let class_issues =
218            crate::class::ClassAnalyzer::with_files(&self.codebase, analyzed_file_set, &file_data)
219                .analyze_all();
220        all_issues.extend(class_issues);
221
222        // ---- Pass 2: analyze function/method bodies in parallel (M14) --------
223        // Each file is analyzed independently; arena + parse happen inside the
224        // rayon closure so there is no cross-thread borrow.
225        // When a cache is present, files whose content hash matches a stored
226        // entry skip re-analysis entirely (M17).
227        let pass2_results: Vec<(Vec<Issue>, Vec<crate::symbol::ResolvedSymbol>)> = file_data
228            .par_iter()
229            .map(|(file, src)| {
230                // Cache lookup
231                let result = if let Some(cache) = &self.cache {
232                    let h = hash_content(src);
233                    if let Some(cached) = cache.get(file, &h) {
234                        (cached, Vec::new())
235                    } else {
236                        // Miss — analyze and store
237                        let arena = bumpalo::Bump::new();
238                        let parsed = php_rs_parser::parse(&arena, src);
239                        let (issues, symbols) = self.analyze_bodies(
240                            &parsed.program,
241                            file.clone(),
242                            src,
243                            &parsed.source_map,
244                        );
245                        cache.put(file, h, issues.clone());
246                        (issues, symbols)
247                    }
248                } else {
249                    let arena = bumpalo::Bump::new();
250                    let parsed = php_rs_parser::parse(&arena, src);
251                    self.analyze_bodies(&parsed.program, file.clone(), src, &parsed.source_map)
252                };
253                if let Some(cb) = &self.on_file_done {
254                    cb();
255                }
256                result
257            })
258            .collect();
259
260        let mut all_symbols = Vec::new();
261        for (issues, symbols) in pass2_results {
262            all_issues.extend(issues);
263            all_symbols.extend(symbols);
264        }
265
266        // Persist cache hits/misses to disk
267        if let Some(cache) = &self.cache {
268            cache.flush();
269        }
270
271        // ---- Dead-code detection (M18) --------------------------------------
272        if self.find_dead_code {
273            let dead_code_issues =
274                crate::dead_code::DeadCodeAnalyzer::new(&self.codebase).analyze();
275            all_issues.extend(dead_code_issues);
276        }
277
278        AnalysisResult {
279            issues: all_issues,
280            type_envs: std::collections::HashMap::new(),
281            symbols: all_symbols,
282        }
283    }
284
285    /// Lazily load class definitions for referenced-but-unknown FQCNs via PSR-4.
286    ///
287    /// After Pass 1 and `codebase.finalize()`, some classes referenced as parents
288    /// or interfaces may not be in the codebase (they weren't in the initial file
289    /// list). This method iterates up to `max_depth` times, each time resolving
290    /// unknown parent/interface FQCNs via the PSR-4 map, running Pass 1 on those
291    /// files, and re-finalizing the codebase. The loop stops when no new files
292    /// are discovered.
293    fn lazy_load_missing_classes(
294        &self,
295        psr4: Arc<crate::composer::Psr4Map>,
296        all_issues: &mut Vec<Issue>,
297    ) {
298        use std::collections::HashSet;
299
300        let max_depth = 10; // prevent infinite chains
301        let mut loaded: HashSet<String> = HashSet::new();
302
303        for _ in 0..max_depth {
304            // Collect all referenced FQCNs that aren't in the codebase
305            let mut to_load: Vec<(String, PathBuf)> = Vec::new();
306
307            for entry in self.codebase.classes.iter() {
308                let cls = entry.value();
309
310                // Check parent class
311                if let Some(parent) = &cls.parent {
312                    let fqcn = parent.as_ref();
313                    if !self.codebase.classes.contains_key(fqcn) && !loaded.contains(fqcn) {
314                        if let Some(path) = psr4.resolve(fqcn) {
315                            to_load.push((fqcn.to_string(), path));
316                        }
317                    }
318                }
319
320                // Check interfaces
321                for iface in &cls.interfaces {
322                    let fqcn = iface.as_ref();
323                    if !self.codebase.classes.contains_key(fqcn)
324                        && !self.codebase.interfaces.contains_key(fqcn)
325                        && !loaded.contains(fqcn)
326                    {
327                        if let Some(path) = psr4.resolve(fqcn) {
328                            to_load.push((fqcn.to_string(), path));
329                        }
330                    }
331                }
332            }
333
334            if to_load.is_empty() {
335                break;
336            }
337
338            // Load each discovered file (Pass 1 only)
339            for (fqcn, path) in to_load {
340                loaded.insert(fqcn);
341                if let Ok(src) = std::fs::read_to_string(&path) {
342                    let file: Arc<str> = Arc::from(path.to_string_lossy().as_ref());
343                    let arena = bumpalo::Bump::new();
344                    let result = php_rs_parser::parse(&arena, &src);
345                    let collector = crate::collector::DefinitionCollector::new(
346                        &self.codebase,
347                        file,
348                        &src,
349                        &result.source_map,
350                    );
351                    let issues = collector.collect(&result.program);
352                    all_issues.extend(issues);
353                }
354            }
355
356            // Re-finalize to include newly loaded classes in the inheritance graph.
357            // Must reset the flag first so finalize() isn't a no-op.
358            self.codebase.invalidate_finalization();
359            self.codebase.finalize();
360        }
361    }
362
363    /// Re-analyze a single file within the existing codebase.
364    ///
365    /// This is the incremental analysis API for LSP:
366    /// 1. Removes old definitions from this file
367    /// 2. Re-runs Pass 1 (definition collection) on the new content
368    /// 3. Re-finalizes the codebase (rebuilds inheritance)
369    /// 4. Re-runs Pass 2 (body analysis) on this file
370    /// 5. Returns the analysis result for this file only
371    pub fn re_analyze_file(&self, file_path: &str, new_content: &str) -> AnalysisResult {
372        // 1. Remove old definitions from this file
373        self.codebase.remove_file_definitions(file_path);
374
375        // 2. Parse new content and run Pass 1
376        let file: Arc<str> = Arc::from(file_path);
377        let arena = bumpalo::Bump::new();
378        let parsed = php_rs_parser::parse(&arena, new_content);
379
380        let mut all_issues = Vec::new();
381
382        // Collect parse errors
383        for err in &parsed.errors {
384            all_issues.push(Issue::new(
385                mir_issues::IssueKind::ParseError {
386                    message: err.to_string(),
387                },
388                mir_issues::Location {
389                    file: file.clone(),
390                    line: 1,
391                    col_start: 0,
392                    col_end: 0,
393                },
394            ));
395        }
396
397        let collector = DefinitionCollector::new(
398            &self.codebase,
399            file.clone(),
400            new_content,
401            &parsed.source_map,
402        );
403        all_issues.extend(collector.collect(&parsed.program));
404
405        // 3. Re-finalize (invalidation already done by remove_file_definitions)
406        self.codebase.finalize();
407
408        // 4. Run Pass 2 on this file
409        let (body_issues, symbols) = self.analyze_bodies(
410            &parsed.program,
411            file.clone(),
412            new_content,
413            &parsed.source_map,
414        );
415        all_issues.extend(body_issues);
416
417        // 5. Update cache if present
418        if let Some(cache) = &self.cache {
419            let h = hash_content(new_content);
420            cache.evict_with_dependents(&[file_path.to_string()]);
421            cache.put(file_path, h, all_issues.clone());
422        }
423
424        AnalysisResult {
425            issues: all_issues,
426            type_envs: HashMap::new(),
427            symbols,
428        }
429    }
430
431    /// Analyze a PHP source string without a real file path.
432    /// Useful for tests and LSP single-file mode.
433    pub fn analyze_source(source: &str) -> AnalysisResult {
434        use crate::collector::DefinitionCollector;
435        let analyzer = ProjectAnalyzer::new();
436        analyzer.load_stubs();
437        let file: Arc<str> = Arc::from("<source>");
438        let arena = bumpalo::Bump::new();
439        let result = php_rs_parser::parse(&arena, source);
440        let mut all_issues = Vec::new();
441        let collector =
442            DefinitionCollector::new(&analyzer.codebase, file.clone(), source, &result.source_map);
443        all_issues.extend(collector.collect(&result.program));
444        analyzer.codebase.finalize();
445        let mut type_envs = std::collections::HashMap::new();
446        let mut all_symbols = Vec::new();
447        all_issues.extend(analyzer.analyze_bodies_typed(
448            &result.program,
449            file.clone(),
450            source,
451            &result.source_map,
452            &mut type_envs,
453            &mut all_symbols,
454        ));
455        AnalysisResult {
456            issues: all_issues,
457            type_envs,
458            symbols: all_symbols,
459        }
460    }
461
462    /// Pass 2: walk all function/method bodies in one file, return issues, and
463    /// write inferred return types back to the codebase.
464    fn analyze_bodies<'arena, 'src>(
465        &self,
466        program: &php_ast::ast::Program<'arena, 'src>,
467        file: Arc<str>,
468        source: &str,
469        source_map: &php_ast::source_map::SourceMap,
470    ) -> (Vec<mir_issues::Issue>, Vec<crate::symbol::ResolvedSymbol>) {
471        use php_ast::ast::StmtKind;
472
473        let mut all_issues = Vec::new();
474        let mut all_symbols = Vec::new();
475
476        for stmt in program.stmts.iter() {
477            match &stmt.kind {
478                StmtKind::Function(decl) => {
479                    self.analyze_fn_decl(
480                        decl,
481                        &file,
482                        source,
483                        source_map,
484                        &mut all_issues,
485                        &mut all_symbols,
486                    );
487                }
488                StmtKind::Class(decl) => {
489                    self.analyze_class_decl(
490                        decl,
491                        &file,
492                        source,
493                        source_map,
494                        &mut all_issues,
495                        &mut all_symbols,
496                    );
497                }
498                StmtKind::Enum(decl) => {
499                    self.analyze_enum_decl(decl, &file, source, source_map, &mut all_issues);
500                }
501                StmtKind::Namespace(ns) => {
502                    if let php_ast::ast::NamespaceBody::Braced(stmts) = &ns.body {
503                        for inner in stmts.iter() {
504                            match &inner.kind {
505                                StmtKind::Function(decl) => {
506                                    self.analyze_fn_decl(
507                                        decl,
508                                        &file,
509                                        source,
510                                        source_map,
511                                        &mut all_issues,
512                                        &mut all_symbols,
513                                    );
514                                }
515                                StmtKind::Class(decl) => {
516                                    self.analyze_class_decl(
517                                        decl,
518                                        &file,
519                                        source,
520                                        source_map,
521                                        &mut all_issues,
522                                        &mut all_symbols,
523                                    );
524                                }
525                                StmtKind::Enum(decl) => {
526                                    self.analyze_enum_decl(
527                                        decl,
528                                        &file,
529                                        source,
530                                        source_map,
531                                        &mut all_issues,
532                                    );
533                                }
534                                _ => {}
535                            }
536                        }
537                    }
538                }
539                _ => {}
540            }
541        }
542
543        (all_issues, all_symbols)
544    }
545
546    /// Analyze a single function declaration body and collect issues + inferred return type.
547    #[allow(clippy::too_many_arguments)]
548    fn analyze_fn_decl<'arena, 'src>(
549        &self,
550        decl: &php_ast::ast::FunctionDecl<'arena, 'src>,
551        file: &Arc<str>,
552        source: &str,
553        source_map: &php_ast::source_map::SourceMap,
554        all_issues: &mut Vec<mir_issues::Issue>,
555        all_symbols: &mut Vec<crate::symbol::ResolvedSymbol>,
556    ) {
557        let fn_name = decl.name;
558        let body = &decl.body;
559        // Check parameter and return type hints for undefined classes.
560        for param in decl.params.iter() {
561            if let Some(hint) = &param.type_hint {
562                check_type_hint_classes(hint, &self.codebase, file, source, source_map, all_issues);
563            }
564        }
565        if let Some(hint) = &decl.return_type {
566            check_type_hint_classes(hint, &self.codebase, file, source, source_map, all_issues);
567        }
568        use crate::context::Context;
569        use crate::stmt::StatementsAnalyzer;
570        use mir_issues::IssueBuffer;
571
572        // Resolve function name using the file's namespace (handles namespaced functions)
573        let resolved_fn = self.codebase.resolve_class_name(file.as_ref(), fn_name);
574        let func_opt: Option<mir_codebase::storage::FunctionStorage> = self
575            .codebase
576            .functions
577            .get(resolved_fn.as_str())
578            .map(|r| r.clone())
579            .or_else(|| self.codebase.functions.get(fn_name).map(|r| r.clone()))
580            .or_else(|| {
581                self.codebase
582                    .functions
583                    .iter()
584                    .find(|e| e.short_name.as_ref() == fn_name)
585                    .map(|e| e.value().clone())
586            });
587
588        let fqn = func_opt.as_ref().map(|f| f.fqn.clone());
589        // Always use the codebase entry when its params match the AST (same count + names).
590        // This covers the common case and preserves docblock-enriched types.
591        // When names differ (two files define the same unnamespaced function), fall back to
592        // the AST params so param variables are always in scope for this file's body.
593        let (params, return_ty): (Vec<mir_codebase::FnParam>, _) = match &func_opt {
594            Some(f)
595                if f.params.len() == decl.params.len()
596                    && f.params
597                        .iter()
598                        .zip(decl.params.iter())
599                        .all(|(cp, ap)| cp.name.as_ref() == ap.name) =>
600            {
601                (f.params.clone(), f.return_type.clone())
602            }
603            _ => {
604                let ast_params = decl
605                    .params
606                    .iter()
607                    .map(|p| mir_codebase::FnParam {
608                        name: Arc::from(p.name),
609                        ty: None,
610                        default: p.default.as_ref().map(|_| mir_types::Union::mixed()),
611                        is_variadic: p.variadic,
612                        is_byref: p.by_ref,
613                        is_optional: p.default.is_some() || p.variadic,
614                    })
615                    .collect();
616                (ast_params, None)
617            }
618        };
619
620        let mut ctx = Context::for_function(&params, return_ty, None, None, None, false);
621        let mut buf = IssueBuffer::new();
622        let mut sa = StatementsAnalyzer::new(
623            &self.codebase,
624            file.clone(),
625            source,
626            source_map,
627            &mut buf,
628            all_symbols,
629        );
630        sa.analyze_stmts(body, &mut ctx);
631        let inferred = merge_return_types(&sa.return_types);
632        drop(sa);
633
634        emit_unused_params(&params, &ctx, false, file, all_issues);
635        emit_unused_variables(&ctx, file, all_issues);
636        all_issues.extend(buf.into_issues());
637
638        if let Some(fqn) = fqn {
639            if let Some(mut func) = self.codebase.functions.get_mut(fqn.as_ref()) {
640                func.inferred_return_type = Some(inferred);
641            }
642        }
643    }
644
645    /// Analyze all method bodies on a class declaration and collect issues + inferred return types.
646    #[allow(clippy::too_many_arguments)]
647    fn analyze_class_decl<'arena, 'src>(
648        &self,
649        decl: &php_ast::ast::ClassDecl<'arena, 'src>,
650        file: &Arc<str>,
651        source: &str,
652        source_map: &php_ast::source_map::SourceMap,
653        all_issues: &mut Vec<mir_issues::Issue>,
654        all_symbols: &mut Vec<crate::symbol::ResolvedSymbol>,
655    ) {
656        use crate::context::Context;
657        use crate::stmt::StatementsAnalyzer;
658        use mir_issues::IssueBuffer;
659
660        let class_name = decl.name.unwrap_or("<anonymous>");
661        // Resolve the FQCN using the file's namespace/imports — avoids ambiguity
662        // when multiple classes share the same short name across namespaces.
663        let resolved = self.codebase.resolve_class_name(file.as_ref(), class_name);
664        let fqcn: &str = &resolved;
665        let parent_fqcn = self
666            .codebase
667            .classes
668            .get(fqcn)
669            .and_then(|c| c.parent.clone());
670
671        for member in decl.members.iter() {
672            let php_ast::ast::ClassMemberKind::Method(method) = &member.kind else {
673                continue;
674            };
675
676            // Check parameter and return type hints for undefined classes (even abstract methods).
677            for param in method.params.iter() {
678                if let Some(hint) = &param.type_hint {
679                    check_type_hint_classes(
680                        hint,
681                        &self.codebase,
682                        file,
683                        source,
684                        source_map,
685                        all_issues,
686                    );
687                }
688            }
689            if let Some(hint) = &method.return_type {
690                check_type_hint_classes(hint, &self.codebase, file, source, source_map, all_issues);
691            }
692
693            let Some(body) = &method.body else { continue };
694
695            let method_storage = self.codebase.get_method(fqcn, method.name);
696            let (params, return_ty) = method_storage
697                .as_ref()
698                .map(|m| (m.params.clone(), m.return_type.clone()))
699                .unwrap_or_default();
700
701            let is_ctor = method.name == "__construct";
702            let mut ctx = Context::for_method(
703                &params,
704                return_ty,
705                Some(Arc::from(fqcn)),
706                parent_fqcn.clone(),
707                Some(Arc::from(fqcn)),
708                false,
709                is_ctor,
710            );
711
712            let mut buf = IssueBuffer::new();
713            let mut sa = StatementsAnalyzer::new(
714                &self.codebase,
715                file.clone(),
716                source,
717                source_map,
718                &mut buf,
719                all_symbols,
720            );
721            sa.analyze_stmts(body, &mut ctx);
722            let inferred = merge_return_types(&sa.return_types);
723            drop(sa);
724
725            emit_unused_params(&params, &ctx, is_ctor, file, all_issues);
726            emit_unused_variables(&ctx, file, all_issues);
727            all_issues.extend(buf.into_issues());
728
729            if let Some(mut cls) = self.codebase.classes.get_mut(fqcn) {
730                if let Some(m) = cls.own_methods.get_mut(method.name) {
731                    m.inferred_return_type = Some(inferred);
732                }
733            }
734        }
735    }
736
737    /// Like `analyze_bodies` but also populates `type_envs` with per-scope type environments.
738    #[allow(clippy::too_many_arguments)]
739    fn analyze_bodies_typed<'arena, 'src>(
740        &self,
741        program: &php_ast::ast::Program<'arena, 'src>,
742        file: Arc<str>,
743        source: &str,
744        source_map: &php_ast::source_map::SourceMap,
745        type_envs: &mut std::collections::HashMap<
746            crate::type_env::ScopeId,
747            crate::type_env::TypeEnv,
748        >,
749        all_symbols: &mut Vec<crate::symbol::ResolvedSymbol>,
750    ) -> Vec<mir_issues::Issue> {
751        use php_ast::ast::StmtKind;
752        let mut all_issues = Vec::new();
753        for stmt in program.stmts.iter() {
754            match &stmt.kind {
755                StmtKind::Function(decl) => {
756                    self.analyze_fn_decl_typed(
757                        decl,
758                        &file,
759                        source,
760                        source_map,
761                        &mut all_issues,
762                        type_envs,
763                        all_symbols,
764                    );
765                }
766                StmtKind::Class(decl) => {
767                    self.analyze_class_decl_typed(
768                        decl,
769                        &file,
770                        source,
771                        source_map,
772                        &mut all_issues,
773                        type_envs,
774                        all_symbols,
775                    );
776                }
777                StmtKind::Enum(decl) => {
778                    self.analyze_enum_decl(decl, &file, source, source_map, &mut all_issues);
779                }
780                StmtKind::Namespace(ns) => {
781                    if let php_ast::ast::NamespaceBody::Braced(stmts) = &ns.body {
782                        for inner in stmts.iter() {
783                            match &inner.kind {
784                                StmtKind::Function(decl) => {
785                                    self.analyze_fn_decl_typed(
786                                        decl,
787                                        &file,
788                                        source,
789                                        source_map,
790                                        &mut all_issues,
791                                        type_envs,
792                                        all_symbols,
793                                    );
794                                }
795                                StmtKind::Class(decl) => {
796                                    self.analyze_class_decl_typed(
797                                        decl,
798                                        &file,
799                                        source,
800                                        source_map,
801                                        &mut all_issues,
802                                        type_envs,
803                                        all_symbols,
804                                    );
805                                }
806                                StmtKind::Enum(decl) => {
807                                    self.analyze_enum_decl(
808                                        decl,
809                                        &file,
810                                        source,
811                                        source_map,
812                                        &mut all_issues,
813                                    );
814                                }
815                                _ => {}
816                            }
817                        }
818                    }
819                }
820                _ => {}
821            }
822        }
823        all_issues
824    }
825
826    /// Like `analyze_fn_decl` but also captures a `TypeEnv` for the function scope.
827    #[allow(clippy::too_many_arguments)]
828    fn analyze_fn_decl_typed<'arena, 'src>(
829        &self,
830        decl: &php_ast::ast::FunctionDecl<'arena, 'src>,
831        file: &Arc<str>,
832        source: &str,
833        source_map: &php_ast::source_map::SourceMap,
834        all_issues: &mut Vec<mir_issues::Issue>,
835        type_envs: &mut std::collections::HashMap<
836            crate::type_env::ScopeId,
837            crate::type_env::TypeEnv,
838        >,
839        all_symbols: &mut Vec<crate::symbol::ResolvedSymbol>,
840    ) {
841        use crate::context::Context;
842        use crate::stmt::StatementsAnalyzer;
843        use mir_issues::IssueBuffer;
844
845        let fn_name = decl.name;
846        let body = &decl.body;
847
848        for param in decl.params.iter() {
849            if let Some(hint) = &param.type_hint {
850                check_type_hint_classes(hint, &self.codebase, file, source, source_map, all_issues);
851            }
852        }
853        if let Some(hint) = &decl.return_type {
854            check_type_hint_classes(hint, &self.codebase, file, source, source_map, all_issues);
855        }
856
857        let resolved_fn = self.codebase.resolve_class_name(file.as_ref(), fn_name);
858        let func_opt: Option<mir_codebase::storage::FunctionStorage> = self
859            .codebase
860            .functions
861            .get(resolved_fn.as_str())
862            .map(|r| r.clone())
863            .or_else(|| self.codebase.functions.get(fn_name).map(|r| r.clone()))
864            .or_else(|| {
865                self.codebase
866                    .functions
867                    .iter()
868                    .find(|e| e.short_name.as_ref() == fn_name)
869                    .map(|e| e.value().clone())
870            });
871
872        let fqn = func_opt.as_ref().map(|f| f.fqn.clone());
873        let (params, return_ty): (Vec<mir_codebase::FnParam>, _) = match &func_opt {
874            Some(f)
875                if f.params.len() == decl.params.len()
876                    && f.params
877                        .iter()
878                        .zip(decl.params.iter())
879                        .all(|(cp, ap)| cp.name.as_ref() == ap.name) =>
880            {
881                (f.params.clone(), f.return_type.clone())
882            }
883            _ => {
884                let ast_params = decl
885                    .params
886                    .iter()
887                    .map(|p| mir_codebase::FnParam {
888                        name: Arc::from(p.name),
889                        ty: None,
890                        default: p.default.as_ref().map(|_| mir_types::Union::mixed()),
891                        is_variadic: p.variadic,
892                        is_byref: p.by_ref,
893                        is_optional: p.default.is_some() || p.variadic,
894                    })
895                    .collect();
896                (ast_params, None)
897            }
898        };
899
900        let mut ctx = Context::for_function(&params, return_ty, None, None, None, false);
901        let mut buf = IssueBuffer::new();
902        let mut sa = StatementsAnalyzer::new(
903            &self.codebase,
904            file.clone(),
905            source,
906            source_map,
907            &mut buf,
908            all_symbols,
909        );
910        sa.analyze_stmts(body, &mut ctx);
911        let inferred = merge_return_types(&sa.return_types);
912        drop(sa);
913
914        // Capture TypeEnv for this scope
915        let scope_name = fqn.clone().unwrap_or_else(|| Arc::from(fn_name));
916        type_envs.insert(
917            crate::type_env::ScopeId::Function {
918                file: file.clone(),
919                name: scope_name,
920            },
921            crate::type_env::TypeEnv::new(ctx.vars.clone()),
922        );
923
924        emit_unused_params(&params, &ctx, false, file, all_issues);
925        emit_unused_variables(&ctx, file, all_issues);
926        all_issues.extend(buf.into_issues());
927
928        if let Some(fqn) = fqn {
929            if let Some(mut func) = self.codebase.functions.get_mut(fqn.as_ref()) {
930                func.inferred_return_type = Some(inferred);
931            }
932        }
933    }
934
935    /// Like `analyze_class_decl` but also captures a `TypeEnv` per method scope.
936    #[allow(clippy::too_many_arguments)]
937    fn analyze_class_decl_typed<'arena, 'src>(
938        &self,
939        decl: &php_ast::ast::ClassDecl<'arena, 'src>,
940        file: &Arc<str>,
941        source: &str,
942        source_map: &php_ast::source_map::SourceMap,
943        all_issues: &mut Vec<mir_issues::Issue>,
944        type_envs: &mut std::collections::HashMap<
945            crate::type_env::ScopeId,
946            crate::type_env::TypeEnv,
947        >,
948        all_symbols: &mut Vec<crate::symbol::ResolvedSymbol>,
949    ) {
950        use crate::context::Context;
951        use crate::stmt::StatementsAnalyzer;
952        use mir_issues::IssueBuffer;
953
954        let class_name = decl.name.unwrap_or("<anonymous>");
955        let resolved = self.codebase.resolve_class_name(file.as_ref(), class_name);
956        let fqcn: &str = &resolved;
957        let parent_fqcn = self
958            .codebase
959            .classes
960            .get(fqcn)
961            .and_then(|c| c.parent.clone());
962
963        for member in decl.members.iter() {
964            let php_ast::ast::ClassMemberKind::Method(method) = &member.kind else {
965                continue;
966            };
967
968            for param in method.params.iter() {
969                if let Some(hint) = &param.type_hint {
970                    check_type_hint_classes(
971                        hint,
972                        &self.codebase,
973                        file,
974                        source,
975                        source_map,
976                        all_issues,
977                    );
978                }
979            }
980            if let Some(hint) = &method.return_type {
981                check_type_hint_classes(hint, &self.codebase, file, source, source_map, all_issues);
982            }
983
984            let Some(body) = &method.body else { continue };
985
986            let method_storage = self.codebase.get_method(fqcn, method.name);
987            let (params, return_ty) = method_storage
988                .as_ref()
989                .map(|m| (m.params.clone(), m.return_type.clone()))
990                .unwrap_or_default();
991
992            let is_ctor = method.name == "__construct";
993            let mut ctx = Context::for_method(
994                &params,
995                return_ty,
996                Some(Arc::from(fqcn)),
997                parent_fqcn.clone(),
998                Some(Arc::from(fqcn)),
999                false,
1000                is_ctor,
1001            );
1002
1003            let mut buf = IssueBuffer::new();
1004            let mut sa = StatementsAnalyzer::new(
1005                &self.codebase,
1006                file.clone(),
1007                source,
1008                source_map,
1009                &mut buf,
1010                all_symbols,
1011            );
1012            sa.analyze_stmts(body, &mut ctx);
1013            let inferred = merge_return_types(&sa.return_types);
1014            drop(sa);
1015
1016            // Capture TypeEnv for this method scope
1017            type_envs.insert(
1018                crate::type_env::ScopeId::Method {
1019                    class: Arc::from(fqcn),
1020                    method: Arc::from(method.name),
1021                },
1022                crate::type_env::TypeEnv::new(ctx.vars.clone()),
1023            );
1024
1025            emit_unused_params(&params, &ctx, is_ctor, file, all_issues);
1026            emit_unused_variables(&ctx, file, all_issues);
1027            all_issues.extend(buf.into_issues());
1028
1029            if let Some(mut cls) = self.codebase.classes.get_mut(fqcn) {
1030                if let Some(m) = cls.own_methods.get_mut(method.name) {
1031                    m.inferred_return_type = Some(inferred);
1032                }
1033            }
1034        }
1035    }
1036
1037    /// Discover all `.php` files under a directory, recursively.
1038    pub fn discover_files(root: &Path) -> Vec<PathBuf> {
1039        if root.is_file() {
1040            return vec![root.to_path_buf()];
1041        }
1042        let mut files = Vec::new();
1043        collect_php_files(root, &mut files);
1044        files
1045    }
1046
1047    /// Pass 1 only: collect type definitions from `paths` into the codebase without
1048    /// analyzing method bodies or emitting issues. Used to load vendor types.
1049    pub fn collect_types_only(&self, paths: &[PathBuf]) {
1050        let file_data: Vec<(Arc<str>, String)> = paths
1051            .par_iter()
1052            .filter_map(|path| {
1053                std::fs::read_to_string(path)
1054                    .ok()
1055                    .map(|src| (Arc::from(path.to_string_lossy().as_ref()), src))
1056            })
1057            .collect();
1058
1059        for (file, src) in &file_data {
1060            let arena = bumpalo::Bump::new();
1061            let result = php_rs_parser::parse(&arena, src);
1062            let collector =
1063                DefinitionCollector::new(&self.codebase, file.clone(), src, &result.source_map);
1064            // Ignore any issues emitted during vendor collection
1065            let _ = collector.collect(&result.program);
1066        }
1067    }
1068
1069    /// Check type hints in enum methods for undefined classes.
1070    #[allow(clippy::too_many_arguments)]
1071    fn analyze_enum_decl<'arena, 'src>(
1072        &self,
1073        decl: &php_ast::ast::EnumDecl<'arena, 'src>,
1074        file: &Arc<str>,
1075        source: &str,
1076        source_map: &php_ast::source_map::SourceMap,
1077        all_issues: &mut Vec<mir_issues::Issue>,
1078    ) {
1079        use php_ast::ast::EnumMemberKind;
1080        for member in decl.members.iter() {
1081            let EnumMemberKind::Method(method) = &member.kind else {
1082                continue;
1083            };
1084            for param in method.params.iter() {
1085                if let Some(hint) = &param.type_hint {
1086                    check_type_hint_classes(
1087                        hint,
1088                        &self.codebase,
1089                        file,
1090                        source,
1091                        source_map,
1092                        all_issues,
1093                    );
1094                }
1095            }
1096            if let Some(hint) = &method.return_type {
1097                check_type_hint_classes(hint, &self.codebase, file, source, source_map, all_issues);
1098            }
1099        }
1100    }
1101}
1102
1103impl Default for ProjectAnalyzer {
1104    fn default() -> Self {
1105        Self::new()
1106    }
1107}
1108
1109// ---------------------------------------------------------------------------
1110// Type-hint class existence checker
1111// ---------------------------------------------------------------------------
1112
1113/// Walk a `TypeHint` AST node and emit `UndefinedClass` for any named class
1114/// that does not exist in the codebase.  Skips PHP built-in type keywords.
1115fn check_type_hint_classes<'arena, 'src>(
1116    hint: &php_ast::ast::TypeHint<'arena, 'src>,
1117    codebase: &Codebase,
1118    file: &Arc<str>,
1119    source: &str,
1120    source_map: &php_ast::source_map::SourceMap,
1121    issues: &mut Vec<mir_issues::Issue>,
1122) {
1123    use php_ast::ast::TypeHintKind;
1124    match &hint.kind {
1125        TypeHintKind::Named(name) => {
1126            let name_str = crate::parser::name_to_string(name);
1127            // Skip built-in pseudo-types that are not real classes.
1128            if is_pseudo_type(&name_str) {
1129                return;
1130            }
1131            let resolved = codebase.resolve_class_name(file.as_ref(), &name_str);
1132            if !codebase.type_exists(&resolved) {
1133                let lc = source_map.offset_to_line_col(hint.span.start);
1134                let (line, col) = (lc.line + 1, lc.col as u16);
1135                issues.push(
1136                    mir_issues::Issue::new(
1137                        mir_issues::IssueKind::UndefinedClass { name: resolved },
1138                        mir_issues::Location {
1139                            file: file.clone(),
1140                            line,
1141                            col_start: col,
1142                            col_end: col,
1143                        },
1144                    )
1145                    .with_snippet(crate::parser::span_text(source, hint.span).unwrap_or_default()),
1146                );
1147            }
1148        }
1149        TypeHintKind::Nullable(inner) => {
1150            check_type_hint_classes(inner, codebase, file, source, source_map, issues);
1151        }
1152        TypeHintKind::Union(parts) | TypeHintKind::Intersection(parts) => {
1153            for part in parts.iter() {
1154                check_type_hint_classes(part, codebase, file, source, source_map, issues);
1155            }
1156        }
1157        TypeHintKind::Keyword(_, _) => {} // built-in keyword, always valid
1158    }
1159}
1160
1161/// Returns true for names that are PHP pseudo-types / special identifiers, not
1162/// real classes.
1163fn is_pseudo_type(name: &str) -> bool {
1164    matches!(
1165        name.to_lowercase().as_str(),
1166        "self"
1167            | "static"
1168            | "parent"
1169            | "null"
1170            | "true"
1171            | "false"
1172            | "never"
1173            | "void"
1174            | "mixed"
1175            | "object"
1176            | "callable"
1177            | "iterable"
1178    )
1179}
1180
1181/// Emit `UnusedParam` issues for params that were never read in `ctx`.
1182/// Skips variadic params, `_`-prefixed names, and constructors.
1183fn emit_unused_params(
1184    params: &[mir_codebase::FnParam],
1185    ctx: &crate::context::Context,
1186    is_ctor: bool,
1187    file: &Arc<str>,
1188    issues: &mut Vec<mir_issues::Issue>,
1189) {
1190    if is_ctor {
1191        return;
1192    }
1193    for p in params {
1194        if p.is_variadic {
1195            continue;
1196        }
1197        let name = p.name.as_ref().trim_start_matches('$');
1198        if name.starts_with('_') {
1199            continue;
1200        }
1201        if !ctx.read_vars.contains(name) {
1202            issues.push(
1203                mir_issues::Issue::new(
1204                    mir_issues::IssueKind::UnusedParam {
1205                        name: name.to_string(),
1206                    },
1207                    mir_issues::Location {
1208                        file: file.clone(),
1209                        line: 1,
1210                        col_start: 0,
1211                        col_end: 0,
1212                    },
1213                )
1214                .with_snippet(format!("${}", name)),
1215            );
1216        }
1217    }
1218}
1219
1220fn emit_unused_variables(
1221    ctx: &crate::context::Context,
1222    file: &Arc<str>,
1223    issues: &mut Vec<mir_issues::Issue>,
1224) {
1225    // Superglobals are always "used" — skip them
1226    const SUPERGLOBALS: &[&str] = &[
1227        "_SERVER", "_GET", "_POST", "_REQUEST", "_SESSION", "_COOKIE", "_FILES", "_ENV", "GLOBALS",
1228    ];
1229    for name in &ctx.assigned_vars {
1230        if ctx.param_names.contains(name) {
1231            continue;
1232        }
1233        if SUPERGLOBALS.contains(&name.as_str()) {
1234            continue;
1235        }
1236        if name.starts_with('_') {
1237            continue;
1238        }
1239        if !ctx.read_vars.contains(name) {
1240            issues.push(mir_issues::Issue::new(
1241                mir_issues::IssueKind::UnusedVariable { name: name.clone() },
1242                mir_issues::Location {
1243                    file: file.clone(),
1244                    line: 1,
1245                    col_start: 0,
1246                    col_end: 0,
1247                },
1248            ));
1249        }
1250    }
1251}
1252
1253/// Merge a list of return types into a single `Union`.
1254/// Returns `void` if the list is empty.
1255pub fn merge_return_types(return_types: &[Union]) -> Union {
1256    if return_types.is_empty() {
1257        return Union::single(mir_types::Atomic::TVoid);
1258    }
1259    return_types
1260        .iter()
1261        .fold(Union::empty(), |acc, t| Union::merge(&acc, t))
1262}
1263
1264pub(crate) fn collect_php_files(dir: &Path, out: &mut Vec<PathBuf>) {
1265    if let Ok(entries) = std::fs::read_dir(dir) {
1266        for entry in entries.flatten() {
1267            // Skip symlinks — they can form cycles (e.g. .pnpm-store)
1268            if entry.file_type().map(|ft| ft.is_symlink()).unwrap_or(false) {
1269                continue;
1270            }
1271            let path = entry.path();
1272            if path.is_dir() {
1273                let name = path.file_name().and_then(|n| n.to_str()).unwrap_or("");
1274                if matches!(
1275                    name,
1276                    "vendor" | ".git" | "node_modules" | ".cache" | ".pnpm-store"
1277                ) {
1278                    continue;
1279                }
1280                collect_php_files(&path, out);
1281            } else if path.extension().and_then(|e| e.to_str()) == Some("php") {
1282                out.push(path);
1283            }
1284        }
1285    }
1286}
1287
1288// ---------------------------------------------------------------------------
1289// AnalysisResult
1290// ---------------------------------------------------------------------------
1291
1292// ---------------------------------------------------------------------------
1293// build_reverse_deps
1294// ---------------------------------------------------------------------------
1295
1296/// Build a reverse dependency graph from the codebase after Pass 1.
1297///
1298/// Returns a map: `defining_file → {files that depend on it}`.
1299///
1300/// Dependency edges captured (all derivable from Pass 1 data):
1301/// - `use` imports  (`file_imports`)
1302/// - `extends` / `implements` / trait `use` from `ClassStorage`
1303fn build_reverse_deps(codebase: &Codebase) -> HashMap<String, HashSet<String>> {
1304    let mut reverse: HashMap<String, HashSet<String>> = HashMap::new();
1305
1306    // Helper: record edge "defining_file → dependent_file"
1307    let mut add_edge = |symbol: &str, dependent_file: &str| {
1308        if let Some(defining_file) = codebase.symbol_to_file.get(symbol) {
1309            let def = defining_file.as_ref().to_string();
1310            if def != dependent_file {
1311                reverse
1312                    .entry(def)
1313                    .or_default()
1314                    .insert(dependent_file.to_string());
1315            }
1316        }
1317    };
1318
1319    // use-import edges
1320    for entry in codebase.file_imports.iter() {
1321        let file = entry.key().as_ref().to_string();
1322        for fqcn in entry.value().values() {
1323            add_edge(fqcn, &file);
1324        }
1325    }
1326
1327    // extends / implements / trait edges from ClassStorage
1328    for entry in codebase.classes.iter() {
1329        let defining = {
1330            let fqcn = entry.key().as_ref();
1331            codebase
1332                .symbol_to_file
1333                .get(fqcn)
1334                .map(|f| f.as_ref().to_string())
1335        };
1336        let Some(file) = defining else { continue };
1337
1338        let cls = entry.value();
1339        if let Some(ref parent) = cls.parent {
1340            add_edge(parent.as_ref(), &file);
1341        }
1342        for iface in &cls.interfaces {
1343            add_edge(iface.as_ref(), &file);
1344        }
1345        for tr in &cls.traits {
1346            add_edge(tr.as_ref(), &file);
1347        }
1348    }
1349
1350    reverse
1351}
1352
1353// ---------------------------------------------------------------------------
1354
1355pub struct AnalysisResult {
1356    pub issues: Vec<Issue>,
1357    pub type_envs: std::collections::HashMap<crate::type_env::ScopeId, crate::type_env::TypeEnv>,
1358    /// Per-expression resolved symbols from Pass 2.
1359    pub symbols: Vec<crate::symbol::ResolvedSymbol>,
1360}
1361
1362impl AnalysisResult {
1363    pub fn error_count(&self) -> usize {
1364        self.issues
1365            .iter()
1366            .filter(|i| i.severity == mir_issues::Severity::Error)
1367            .count()
1368    }
1369
1370    pub fn warning_count(&self) -> usize {
1371        self.issues
1372            .iter()
1373            .filter(|i| i.severity == mir_issues::Severity::Warning)
1374            .count()
1375    }
1376}