Skip to main content

mir_analyzer/
project.rs

1/// Project-level orchestration: file discovery, pass 1, pass 2.
2use std::path::{Path, PathBuf};
3use std::sync::Arc;
4
5use rayon::prelude::*;
6
7use std::collections::{HashMap, HashSet};
8
9use crate::cache::{hash_content, AnalysisCache};
10use mir_codebase::Codebase;
11use mir_issues::Issue;
12use mir_types::Union;
13
14use crate::collector::DefinitionCollector;
15
16// ---------------------------------------------------------------------------
17// ProjectAnalyzer
18// ---------------------------------------------------------------------------
19
20pub struct ProjectAnalyzer {
21    pub codebase: Arc<Codebase>,
22    /// Optional cache — when `Some`, Pass 2 results are read/written per file.
23    pub cache: Option<AnalysisCache>,
24    /// Called once after each file completes Pass 2 (used for progress reporting).
25    pub on_file_done: Option<Arc<dyn Fn() + Send + Sync>>,
26    /// PSR-4 autoloader mapping from composer.json, if available.
27    pub psr4: Option<Arc<crate::composer::Psr4Map>>,
28    /// Whether stubs have already been loaded (to avoid double-loading).
29    stubs_loaded: std::sync::atomic::AtomicBool,
30    /// When true, run dead code detection at the end of analysis.
31    pub find_dead_code: bool,
32}
33
34impl ProjectAnalyzer {
35    pub fn new() -> Self {
36        Self {
37            codebase: Arc::new(Codebase::new()),
38            cache: None,
39            on_file_done: None,
40            psr4: None,
41            stubs_loaded: std::sync::atomic::AtomicBool::new(false),
42            find_dead_code: false,
43        }
44    }
45
46    /// Create a `ProjectAnalyzer` with a disk-backed cache stored under `cache_dir`.
47    pub fn with_cache(cache_dir: &Path) -> Self {
48        Self {
49            codebase: Arc::new(Codebase::new()),
50            cache: Some(AnalysisCache::open(cache_dir)),
51            on_file_done: None,
52            psr4: None,
53            stubs_loaded: std::sync::atomic::AtomicBool::new(false),
54            find_dead_code: false,
55        }
56    }
57
58    /// Create a `ProjectAnalyzer` from a project root containing `composer.json`.
59    /// Returns the analyzer (with `psr4` set) and the `Psr4Map` so callers can
60    /// call `map.project_files()` / `map.vendor_files()`.
61    pub fn from_composer(
62        root: &Path,
63    ) -> Result<(Self, crate::composer::Psr4Map), crate::composer::ComposerError> {
64        let map = crate::composer::Psr4Map::from_composer(root)?;
65        let psr4 = Arc::new(map.clone());
66        let analyzer = Self {
67            codebase: Arc::new(Codebase::new()),
68            cache: None,
69            on_file_done: None,
70            psr4: Some(psr4),
71            stubs_loaded: std::sync::atomic::AtomicBool::new(false),
72            find_dead_code: false,
73        };
74        Ok((analyzer, map))
75    }
76
77    /// Expose codebase for external use (e.g., pre-loading stubs from CLI).
78    pub fn codebase(&self) -> &Arc<Codebase> {
79        &self.codebase
80    }
81
82    /// Load PHP built-in stubs. Called automatically by `analyze` if not done yet.
83    pub fn load_stubs(&self) {
84        if !self
85            .stubs_loaded
86            .swap(true, std::sync::atomic::Ordering::SeqCst)
87        {
88            crate::stubs::load_stubs(&self.codebase);
89        }
90    }
91
92    /// Run the full analysis pipeline on a set of file paths.
93    pub fn analyze(&self, paths: &[PathBuf]) -> AnalysisResult {
94        let mut all_issues = Vec::new();
95        let mut parse_errors = Vec::new();
96
97        // ---- Load PHP built-in stubs (before Pass 1 so user code can override)
98        self.load_stubs();
99
100        // ---- Pre-Pass-2 invalidation: evict dependents of changed files ------
101        // Uses the reverse dep graph persisted from the previous run.
102        if let Some(cache) = &self.cache {
103            let changed: Vec<String> = paths
104                .iter()
105                .filter_map(|p| {
106                    let path_str = p.to_string_lossy().into_owned();
107                    let content = std::fs::read_to_string(p).ok()?;
108                    let h = hash_content(&content);
109                    if cache.get(&path_str, &h).is_none() {
110                        Some(path_str)
111                    } else {
112                        None
113                    }
114                })
115                .collect();
116            if !changed.is_empty() {
117                cache.evict_with_dependents(&changed);
118            }
119        }
120
121        // ---- Pass 1: read files in parallel ----------------------------------
122        let file_data: Vec<(Arc<str>, String)> = paths
123            .par_iter()
124            .filter_map(|path| match std::fs::read_to_string(path) {
125                Ok(src) => Some((Arc::from(path.to_string_lossy().as_ref()), src)),
126                Err(e) => {
127                    eprintln!("Cannot read {}: {}", path.display(), e);
128                    None
129                }
130            })
131            .collect();
132
133        // ---- Pre-index pass: use SymbolTable to build FQCN index & file imports ---
134        // SymbolTable is lightweight (no type inference) so we run it in parallel.
135        file_data.par_iter().for_each(|(file, src)| {
136            let arena = bumpalo::Bump::new();
137            let result = php_rs_parser::parse(&arena, src);
138            let table = php_ast::symbol_table::SymbolTable::build(&result.program);
139
140            // Populate known_symbols with all top-level FQCNs
141            for sym in table.symbols() {
142                if sym.parent.is_none() {
143                    self.codebase
144                        .known_symbols
145                        .insert(Arc::from(sym.fqn.as_str()));
146                }
147            }
148
149            // Populate file_imports from SymbolTable imports
150            let mut imports = std::collections::HashMap::new();
151            for imp in table.imports() {
152                imports.insert(imp.local_name().to_string(), imp.name.to_string());
153            }
154            if !imports.is_empty() {
155                self.codebase.file_imports.insert(file.clone(), imports);
156            }
157
158            // Populate file_namespaces from top-level symbol FQNs
159            // (infer namespace from the first namespaced symbol)
160            for sym in table.symbols() {
161                if sym.parent.is_none() {
162                    if let Some(pos) = sym.fqn.rfind('\\') {
163                        let ns = &sym.fqn[..pos];
164                        self.codebase
165                            .file_namespaces
166                            .insert(file.clone(), ns.to_string());
167                        break;
168                    }
169                }
170            }
171        });
172
173        // ---- Pass 1: definition collection (sequential) -------------------------
174        // DashMap handles concurrent writes, but sequential avoids contention.
175        for (file, src) in &file_data {
176            let arena = bumpalo::Bump::new();
177            let result = php_rs_parser::parse(&arena, src);
178
179            for err in &result.errors {
180                let msg: String = err.to_string();
181                parse_errors.push(Issue::new(
182                    mir_issues::IssueKind::ParseError { message: msg },
183                    mir_issues::Location {
184                        file: file.clone(),
185                        line: 1,
186                        col_start: 0,
187                        col_end: 0,
188                    },
189                ));
190            }
191
192            let collector = DefinitionCollector::new(&self.codebase, file.clone(), src);
193            let issues = collector.collect(&result.program);
194            all_issues.extend(issues);
195        }
196
197        all_issues.extend(parse_errors);
198
199        // ---- Finalize codebase (resolve inheritance, build dispatch tables) --
200        self.codebase.finalize();
201
202        // ---- Lazy-load unknown classes via PSR-4 (issue #50) ----------------
203        if let Some(psr4) = &self.psr4 {
204            self.lazy_load_missing_classes(psr4.clone(), &mut all_issues);
205        }
206
207        // ---- Build reverse dep graph and persist it for the next run ---------
208        if let Some(cache) = &self.cache {
209            let rev = build_reverse_deps(&self.codebase);
210            cache.set_reverse_deps(rev);
211        }
212
213        // ---- Class-level checks (M11) ----------------------------------------
214        let analyzed_file_set: std::collections::HashSet<std::sync::Arc<str>> =
215            file_data.iter().map(|(f, _)| f.clone()).collect();
216        let class_issues =
217            crate::class::ClassAnalyzer::with_files(&self.codebase, analyzed_file_set)
218                .analyze_all();
219        all_issues.extend(class_issues);
220
221        // ---- Pass 2: analyze function/method bodies in parallel (M14) --------
222        // Each file is analyzed independently; arena + parse happen inside the
223        // rayon closure so there is no cross-thread borrow.
224        // When a cache is present, files whose content hash matches a stored
225        // entry skip re-analysis entirely (M17).
226        let pass2_results: Vec<(Vec<Issue>, Vec<crate::symbol::ResolvedSymbol>)> = file_data
227            .par_iter()
228            .map(|(file, src)| {
229                // Cache lookup
230                let result = if let Some(cache) = &self.cache {
231                    let h = hash_content(src);
232                    if let Some(cached) = cache.get(file, &h) {
233                        (cached, Vec::new())
234                    } else {
235                        // Miss — analyze and store
236                        let arena = bumpalo::Bump::new();
237                        let parsed = php_rs_parser::parse(&arena, src);
238                        let (issues, symbols) =
239                            self.analyze_bodies(&parsed.program, file.clone(), src);
240                        cache.put(file, h, issues.clone());
241                        (issues, symbols)
242                    }
243                } else {
244                    let arena = bumpalo::Bump::new();
245                    let parsed = php_rs_parser::parse(&arena, src);
246                    self.analyze_bodies(&parsed.program, file.clone(), src)
247                };
248                if let Some(cb) = &self.on_file_done {
249                    cb();
250                }
251                result
252            })
253            .collect();
254
255        let mut all_symbols = Vec::new();
256        for (issues, symbols) in pass2_results {
257            all_issues.extend(issues);
258            all_symbols.extend(symbols);
259        }
260
261        // Persist cache hits/misses to disk
262        if let Some(cache) = &self.cache {
263            cache.flush();
264        }
265
266        // ---- Dead-code detection (M18) --------------------------------------
267        if self.find_dead_code {
268            let dead_code_issues =
269                crate::dead_code::DeadCodeAnalyzer::new(&self.codebase).analyze();
270            all_issues.extend(dead_code_issues);
271        }
272
273        AnalysisResult {
274            issues: all_issues,
275            type_envs: std::collections::HashMap::new(),
276            symbols: all_symbols,
277        }
278    }
279
280    /// Lazily load class definitions for referenced-but-unknown FQCNs via PSR-4.
281    ///
282    /// After Pass 1 and `codebase.finalize()`, some classes referenced as parents
283    /// or interfaces may not be in the codebase (they weren't in the initial file
284    /// list). This method iterates up to `max_depth` times, each time resolving
285    /// unknown parent/interface FQCNs via the PSR-4 map, running Pass 1 on those
286    /// files, and re-finalizing the codebase. The loop stops when no new files
287    /// are discovered.
288    fn lazy_load_missing_classes(
289        &self,
290        psr4: Arc<crate::composer::Psr4Map>,
291        all_issues: &mut Vec<Issue>,
292    ) {
293        use std::collections::HashSet;
294
295        let max_depth = 10; // prevent infinite chains
296        let mut loaded: HashSet<String> = HashSet::new();
297
298        for _ in 0..max_depth {
299            // Collect all referenced FQCNs that aren't in the codebase
300            let mut to_load: Vec<(String, PathBuf)> = Vec::new();
301
302            for entry in self.codebase.classes.iter() {
303                let cls = entry.value();
304
305                // Check parent class
306                if let Some(parent) = &cls.parent {
307                    let fqcn = parent.as_ref();
308                    if !self.codebase.classes.contains_key(fqcn) && !loaded.contains(fqcn) {
309                        if let Some(path) = psr4.resolve(fqcn) {
310                            to_load.push((fqcn.to_string(), path));
311                        }
312                    }
313                }
314
315                // Check interfaces
316                for iface in &cls.interfaces {
317                    let fqcn = iface.as_ref();
318                    if !self.codebase.classes.contains_key(fqcn)
319                        && !self.codebase.interfaces.contains_key(fqcn)
320                        && !loaded.contains(fqcn)
321                    {
322                        if let Some(path) = psr4.resolve(fqcn) {
323                            to_load.push((fqcn.to_string(), path));
324                        }
325                    }
326                }
327            }
328
329            if to_load.is_empty() {
330                break;
331            }
332
333            // Load each discovered file (Pass 1 only)
334            for (fqcn, path) in to_load {
335                loaded.insert(fqcn);
336                if let Ok(src) = std::fs::read_to_string(&path) {
337                    let file: Arc<str> = Arc::from(path.to_string_lossy().as_ref());
338                    let arena = bumpalo::Bump::new();
339                    let result = php_rs_parser::parse(&arena, &src);
340                    let collector =
341                        crate::collector::DefinitionCollector::new(&self.codebase, file, &src);
342                    let issues = collector.collect(&result.program);
343                    all_issues.extend(issues);
344                }
345            }
346
347            // Re-finalize to include newly loaded classes in the inheritance graph.
348            // Must reset the flag first so finalize() isn't a no-op.
349            self.codebase.invalidate_finalization();
350            self.codebase.finalize();
351        }
352    }
353
354    /// Re-analyze a single file within the existing codebase.
355    ///
356    /// This is the incremental analysis API for LSP:
357    /// 1. Removes old definitions from this file
358    /// 2. Re-runs Pass 1 (definition collection) on the new content
359    /// 3. Re-finalizes the codebase (rebuilds inheritance)
360    /// 4. Re-runs Pass 2 (body analysis) on this file
361    /// 5. Returns the analysis result for this file only
362    pub fn re_analyze_file(&self, file_path: &str, new_content: &str) -> AnalysisResult {
363        // 1. Remove old definitions from this file
364        self.codebase.remove_file_definitions(file_path);
365
366        // 2. Parse new content and run Pass 1
367        let file: Arc<str> = Arc::from(file_path);
368        let arena = bumpalo::Bump::new();
369        let parsed = php_rs_parser::parse(&arena, new_content);
370
371        let mut all_issues = Vec::new();
372
373        // Collect parse errors
374        for err in &parsed.errors {
375            all_issues.push(Issue::new(
376                mir_issues::IssueKind::ParseError {
377                    message: err.to_string(),
378                },
379                mir_issues::Location {
380                    file: file.clone(),
381                    line: 1,
382                    col_start: 0,
383                    col_end: 0,
384                },
385            ));
386        }
387
388        let collector = DefinitionCollector::new(&self.codebase, file.clone(), new_content);
389        all_issues.extend(collector.collect(&parsed.program));
390
391        // 3. Re-finalize (invalidation already done by remove_file_definitions)
392        self.codebase.finalize();
393
394        // 4. Run Pass 2 on this file
395        let (body_issues, symbols) =
396            self.analyze_bodies(&parsed.program, file.clone(), new_content);
397        all_issues.extend(body_issues);
398
399        // 5. Update cache if present
400        if let Some(cache) = &self.cache {
401            let h = hash_content(new_content);
402            cache.evict_with_dependents(&[file_path.to_string()]);
403            cache.put(file_path, h, all_issues.clone());
404        }
405
406        AnalysisResult {
407            issues: all_issues,
408            type_envs: HashMap::new(),
409            symbols,
410        }
411    }
412
413    /// Analyze a PHP source string without a real file path.
414    /// Useful for tests and LSP single-file mode.
415    pub fn analyze_source(source: &str) -> AnalysisResult {
416        use crate::collector::DefinitionCollector;
417        let analyzer = ProjectAnalyzer::new();
418        analyzer.load_stubs();
419        let file: Arc<str> = Arc::from("<source>");
420        let arena = bumpalo::Bump::new();
421        let result = php_rs_parser::parse(&arena, source);
422        let mut all_issues = Vec::new();
423        let collector = DefinitionCollector::new(&analyzer.codebase, file.clone(), source);
424        all_issues.extend(collector.collect(&result.program));
425        analyzer.codebase.finalize();
426        let mut type_envs = std::collections::HashMap::new();
427        let mut all_symbols = Vec::new();
428        all_issues.extend(analyzer.analyze_bodies_typed(
429            &result.program,
430            file.clone(),
431            source,
432            &mut type_envs,
433            &mut all_symbols,
434        ));
435        AnalysisResult {
436            issues: all_issues,
437            type_envs,
438            symbols: all_symbols,
439        }
440    }
441
442    /// Pass 2: walk all function/method bodies in one file, return issues, and
443    /// write inferred return types back to the codebase.
444    fn analyze_bodies<'arena, 'src>(
445        &self,
446        program: &php_ast::ast::Program<'arena, 'src>,
447        file: Arc<str>,
448        source: &str,
449    ) -> (Vec<mir_issues::Issue>, Vec<crate::symbol::ResolvedSymbol>) {
450        use php_ast::ast::StmtKind;
451
452        let mut all_issues = Vec::new();
453        let mut all_symbols = Vec::new();
454
455        for stmt in program.stmts.iter() {
456            match &stmt.kind {
457                StmtKind::Function(decl) => {
458                    self.analyze_fn_decl(decl, &file, source, &mut all_issues, &mut all_symbols);
459                }
460                StmtKind::Class(decl) => {
461                    self.analyze_class_decl(decl, &file, source, &mut all_issues, &mut all_symbols);
462                }
463                StmtKind::Enum(decl) => {
464                    self.analyze_enum_decl(decl, &file, source, &mut all_issues);
465                }
466                StmtKind::Namespace(ns) => {
467                    if let php_ast::ast::NamespaceBody::Braced(stmts) = &ns.body {
468                        for inner in stmts.iter() {
469                            match &inner.kind {
470                                StmtKind::Function(decl) => {
471                                    self.analyze_fn_decl(
472                                        decl,
473                                        &file,
474                                        source,
475                                        &mut all_issues,
476                                        &mut all_symbols,
477                                    );
478                                }
479                                StmtKind::Class(decl) => {
480                                    self.analyze_class_decl(
481                                        decl,
482                                        &file,
483                                        source,
484                                        &mut all_issues,
485                                        &mut all_symbols,
486                                    );
487                                }
488                                StmtKind::Enum(decl) => {
489                                    self.analyze_enum_decl(decl, &file, source, &mut all_issues);
490                                }
491                                _ => {}
492                            }
493                        }
494                    }
495                }
496                _ => {}
497            }
498        }
499
500        (all_issues, all_symbols)
501    }
502
503    /// Analyze a single function declaration body and collect issues + inferred return type.
504    fn analyze_fn_decl<'arena, 'src>(
505        &self,
506        decl: &php_ast::ast::FunctionDecl<'arena, 'src>,
507        file: &Arc<str>,
508        source: &str,
509        all_issues: &mut Vec<mir_issues::Issue>,
510        all_symbols: &mut Vec<crate::symbol::ResolvedSymbol>,
511    ) {
512        let fn_name = decl.name;
513        let body = &decl.body;
514        // Check parameter and return type hints for undefined classes.
515        for param in decl.params.iter() {
516            if let Some(hint) = &param.type_hint {
517                check_type_hint_classes(hint, &self.codebase, file, source, all_issues);
518            }
519        }
520        if let Some(hint) = &decl.return_type {
521            check_type_hint_classes(hint, &self.codebase, file, source, all_issues);
522        }
523        use crate::context::Context;
524        use crate::stmt::StatementsAnalyzer;
525        use mir_issues::IssueBuffer;
526
527        // Resolve function name using the file's namespace (handles namespaced functions)
528        let resolved_fn = self.codebase.resolve_class_name(file.as_ref(), fn_name);
529        let func_opt: Option<mir_codebase::storage::FunctionStorage> = self
530            .codebase
531            .functions
532            .get(resolved_fn.as_str())
533            .map(|r| r.clone())
534            .or_else(|| self.codebase.functions.get(fn_name).map(|r| r.clone()))
535            .or_else(|| {
536                self.codebase
537                    .functions
538                    .iter()
539                    .find(|e| e.short_name.as_ref() == fn_name)
540                    .map(|e| e.value().clone())
541            });
542
543        let fqn = func_opt.as_ref().map(|f| f.fqn.clone());
544        // Always use the codebase entry when its params match the AST (same count + names).
545        // This covers the common case and preserves docblock-enriched types.
546        // When names differ (two files define the same unnamespaced function), fall back to
547        // the AST params so param variables are always in scope for this file's body.
548        let (params, return_ty): (Vec<mir_codebase::FnParam>, _) = match &func_opt {
549            Some(f)
550                if f.params.len() == decl.params.len()
551                    && f.params
552                        .iter()
553                        .zip(decl.params.iter())
554                        .all(|(cp, ap)| cp.name.as_ref() == ap.name) =>
555            {
556                (f.params.clone(), f.return_type.clone())
557            }
558            _ => {
559                let ast_params = decl
560                    .params
561                    .iter()
562                    .map(|p| mir_codebase::FnParam {
563                        name: Arc::from(p.name),
564                        ty: None,
565                        default: p.default.as_ref().map(|_| mir_types::Union::mixed()),
566                        is_variadic: p.variadic,
567                        is_byref: p.by_ref,
568                        is_optional: p.default.is_some() || p.variadic,
569                    })
570                    .collect();
571                (ast_params, None)
572            }
573        };
574
575        let mut ctx = Context::for_function(&params, return_ty, None, None, None, false);
576        let mut buf = IssueBuffer::new();
577        let sm = php_ast::source_map::SourceMap::new(source);
578        let mut sa = StatementsAnalyzer::new(
579            &self.codebase,
580            file.clone(),
581            source,
582            &sm,
583            &mut buf,
584            all_symbols,
585        );
586        sa.analyze_stmts(body, &mut ctx);
587        let inferred = merge_return_types(&sa.return_types);
588        drop(sa);
589
590        emit_unused_params(&params, &ctx, false, file, all_issues);
591        emit_unused_variables(&ctx, file, all_issues);
592        all_issues.extend(buf.into_issues());
593
594        if let Some(fqn) = fqn {
595            if let Some(mut func) = self.codebase.functions.get_mut(fqn.as_ref()) {
596                func.inferred_return_type = Some(inferred);
597            }
598        }
599    }
600
601    /// Analyze all method bodies on a class declaration and collect issues + inferred return types.
602    fn analyze_class_decl<'arena, 'src>(
603        &self,
604        decl: &php_ast::ast::ClassDecl<'arena, 'src>,
605        file: &Arc<str>,
606        source: &str,
607        all_issues: &mut Vec<mir_issues::Issue>,
608        all_symbols: &mut Vec<crate::symbol::ResolvedSymbol>,
609    ) {
610        use crate::context::Context;
611        use crate::stmt::StatementsAnalyzer;
612        use mir_issues::IssueBuffer;
613
614        let class_name = decl.name.unwrap_or("<anonymous>");
615        // Resolve the FQCN using the file's namespace/imports — avoids ambiguity
616        // when multiple classes share the same short name across namespaces.
617        let resolved = self.codebase.resolve_class_name(file.as_ref(), class_name);
618        let fqcn: &str = &resolved;
619        let parent_fqcn = self
620            .codebase
621            .classes
622            .get(fqcn)
623            .and_then(|c| c.parent.clone());
624
625        for member in decl.members.iter() {
626            let php_ast::ast::ClassMemberKind::Method(method) = &member.kind else {
627                continue;
628            };
629
630            // Check parameter and return type hints for undefined classes (even abstract methods).
631            for param in method.params.iter() {
632                if let Some(hint) = &param.type_hint {
633                    check_type_hint_classes(hint, &self.codebase, file, source, all_issues);
634                }
635            }
636            if let Some(hint) = &method.return_type {
637                check_type_hint_classes(hint, &self.codebase, file, source, all_issues);
638            }
639
640            let Some(body) = &method.body else { continue };
641
642            let method_storage = self.codebase.get_method(fqcn, method.name);
643            let (params, return_ty) = method_storage
644                .as_ref()
645                .map(|m| (m.params.clone(), m.return_type.clone()))
646                .unwrap_or_default();
647
648            let is_ctor = method.name == "__construct";
649            let mut ctx = Context::for_method(
650                &params,
651                return_ty,
652                Some(Arc::from(fqcn)),
653                parent_fqcn.clone(),
654                Some(Arc::from(fqcn)),
655                false,
656                is_ctor,
657            );
658
659            let mut buf = IssueBuffer::new();
660            let sm = php_ast::source_map::SourceMap::new(source);
661            let mut sa = StatementsAnalyzer::new(
662                &self.codebase,
663                file.clone(),
664                source,
665                &sm,
666                &mut buf,
667                all_symbols,
668            );
669            sa.analyze_stmts(body, &mut ctx);
670            let inferred = merge_return_types(&sa.return_types);
671            drop(sa);
672
673            emit_unused_params(&params, &ctx, is_ctor, file, all_issues);
674            emit_unused_variables(&ctx, file, all_issues);
675            all_issues.extend(buf.into_issues());
676
677            if let Some(mut cls) = self.codebase.classes.get_mut(fqcn) {
678                if let Some(m) = cls.own_methods.get_mut(method.name) {
679                    m.inferred_return_type = Some(inferred);
680                }
681            }
682        }
683    }
684
685    /// Like `analyze_bodies` but also populates `type_envs` with per-scope type environments.
686    fn analyze_bodies_typed<'arena, 'src>(
687        &self,
688        program: &php_ast::ast::Program<'arena, 'src>,
689        file: Arc<str>,
690        source: &str,
691        type_envs: &mut std::collections::HashMap<
692            crate::type_env::ScopeId,
693            crate::type_env::TypeEnv,
694        >,
695        all_symbols: &mut Vec<crate::symbol::ResolvedSymbol>,
696    ) -> Vec<mir_issues::Issue> {
697        use php_ast::ast::StmtKind;
698        let mut all_issues = Vec::new();
699        for stmt in program.stmts.iter() {
700            match &stmt.kind {
701                StmtKind::Function(decl) => {
702                    self.analyze_fn_decl_typed(
703                        decl,
704                        &file,
705                        source,
706                        &mut all_issues,
707                        type_envs,
708                        all_symbols,
709                    );
710                }
711                StmtKind::Class(decl) => {
712                    self.analyze_class_decl_typed(
713                        decl,
714                        &file,
715                        source,
716                        &mut all_issues,
717                        type_envs,
718                        all_symbols,
719                    );
720                }
721                StmtKind::Enum(decl) => {
722                    self.analyze_enum_decl(decl, &file, source, &mut all_issues);
723                }
724                StmtKind::Namespace(ns) => {
725                    if let php_ast::ast::NamespaceBody::Braced(stmts) = &ns.body {
726                        for inner in stmts.iter() {
727                            match &inner.kind {
728                                StmtKind::Function(decl) => {
729                                    self.analyze_fn_decl_typed(
730                                        decl,
731                                        &file,
732                                        source,
733                                        &mut all_issues,
734                                        type_envs,
735                                        all_symbols,
736                                    );
737                                }
738                                StmtKind::Class(decl) => {
739                                    self.analyze_class_decl_typed(
740                                        decl,
741                                        &file,
742                                        source,
743                                        &mut all_issues,
744                                        type_envs,
745                                        all_symbols,
746                                    );
747                                }
748                                StmtKind::Enum(decl) => {
749                                    self.analyze_enum_decl(decl, &file, source, &mut all_issues);
750                                }
751                                _ => {}
752                            }
753                        }
754                    }
755                }
756                _ => {}
757            }
758        }
759        all_issues
760    }
761
762    /// Like `analyze_fn_decl` but also captures a `TypeEnv` for the function scope.
763    fn analyze_fn_decl_typed<'arena, 'src>(
764        &self,
765        decl: &php_ast::ast::FunctionDecl<'arena, 'src>,
766        file: &Arc<str>,
767        source: &str,
768        all_issues: &mut Vec<mir_issues::Issue>,
769        type_envs: &mut std::collections::HashMap<
770            crate::type_env::ScopeId,
771            crate::type_env::TypeEnv,
772        >,
773        all_symbols: &mut Vec<crate::symbol::ResolvedSymbol>,
774    ) {
775        use crate::context::Context;
776        use crate::stmt::StatementsAnalyzer;
777        use mir_issues::IssueBuffer;
778
779        let fn_name = decl.name;
780        let body = &decl.body;
781
782        for param in decl.params.iter() {
783            if let Some(hint) = &param.type_hint {
784                check_type_hint_classes(hint, &self.codebase, file, source, all_issues);
785            }
786        }
787        if let Some(hint) = &decl.return_type {
788            check_type_hint_classes(hint, &self.codebase, file, source, all_issues);
789        }
790
791        let resolved_fn = self.codebase.resolve_class_name(file.as_ref(), fn_name);
792        let func_opt: Option<mir_codebase::storage::FunctionStorage> = self
793            .codebase
794            .functions
795            .get(resolved_fn.as_str())
796            .map(|r| r.clone())
797            .or_else(|| self.codebase.functions.get(fn_name).map(|r| r.clone()))
798            .or_else(|| {
799                self.codebase
800                    .functions
801                    .iter()
802                    .find(|e| e.short_name.as_ref() == fn_name)
803                    .map(|e| e.value().clone())
804            });
805
806        let fqn = func_opt.as_ref().map(|f| f.fqn.clone());
807        let (params, return_ty): (Vec<mir_codebase::FnParam>, _) = match &func_opt {
808            Some(f)
809                if f.params.len() == decl.params.len()
810                    && f.params
811                        .iter()
812                        .zip(decl.params.iter())
813                        .all(|(cp, ap)| cp.name.as_ref() == ap.name) =>
814            {
815                (f.params.clone(), f.return_type.clone())
816            }
817            _ => {
818                let ast_params = decl
819                    .params
820                    .iter()
821                    .map(|p| mir_codebase::FnParam {
822                        name: Arc::from(p.name),
823                        ty: None,
824                        default: p.default.as_ref().map(|_| mir_types::Union::mixed()),
825                        is_variadic: p.variadic,
826                        is_byref: p.by_ref,
827                        is_optional: p.default.is_some() || p.variadic,
828                    })
829                    .collect();
830                (ast_params, None)
831            }
832        };
833
834        let mut ctx = Context::for_function(&params, return_ty, None, None, None, false);
835        let mut buf = IssueBuffer::new();
836        let sm = php_ast::source_map::SourceMap::new(source);
837        let mut sa = StatementsAnalyzer::new(
838            &self.codebase,
839            file.clone(),
840            source,
841            &sm,
842            &mut buf,
843            all_symbols,
844        );
845        sa.analyze_stmts(body, &mut ctx);
846        let inferred = merge_return_types(&sa.return_types);
847        drop(sa);
848
849        // Capture TypeEnv for this scope
850        let scope_name = fqn.clone().unwrap_or_else(|| Arc::from(fn_name));
851        type_envs.insert(
852            crate::type_env::ScopeId::Function {
853                file: file.clone(),
854                name: scope_name,
855            },
856            crate::type_env::TypeEnv::new(ctx.vars.clone()),
857        );
858
859        emit_unused_params(&params, &ctx, false, file, all_issues);
860        emit_unused_variables(&ctx, file, all_issues);
861        all_issues.extend(buf.into_issues());
862
863        if let Some(fqn) = fqn {
864            if let Some(mut func) = self.codebase.functions.get_mut(fqn.as_ref()) {
865                func.inferred_return_type = Some(inferred);
866            }
867        }
868    }
869
870    /// Like `analyze_class_decl` but also captures a `TypeEnv` per method scope.
871    fn analyze_class_decl_typed<'arena, 'src>(
872        &self,
873        decl: &php_ast::ast::ClassDecl<'arena, 'src>,
874        file: &Arc<str>,
875        source: &str,
876        all_issues: &mut Vec<mir_issues::Issue>,
877        type_envs: &mut std::collections::HashMap<
878            crate::type_env::ScopeId,
879            crate::type_env::TypeEnv,
880        >,
881        all_symbols: &mut Vec<crate::symbol::ResolvedSymbol>,
882    ) {
883        use crate::context::Context;
884        use crate::stmt::StatementsAnalyzer;
885        use mir_issues::IssueBuffer;
886
887        let class_name = decl.name.unwrap_or("<anonymous>");
888        let resolved = self.codebase.resolve_class_name(file.as_ref(), class_name);
889        let fqcn: &str = &resolved;
890        let parent_fqcn = self
891            .codebase
892            .classes
893            .get(fqcn)
894            .and_then(|c| c.parent.clone());
895
896        for member in decl.members.iter() {
897            let php_ast::ast::ClassMemberKind::Method(method) = &member.kind else {
898                continue;
899            };
900
901            for param in method.params.iter() {
902                if let Some(hint) = &param.type_hint {
903                    check_type_hint_classes(hint, &self.codebase, file, source, all_issues);
904                }
905            }
906            if let Some(hint) = &method.return_type {
907                check_type_hint_classes(hint, &self.codebase, file, source, all_issues);
908            }
909
910            let Some(body) = &method.body else { continue };
911
912            let method_storage = self.codebase.get_method(fqcn, method.name);
913            let (params, return_ty) = method_storage
914                .as_ref()
915                .map(|m| (m.params.clone(), m.return_type.clone()))
916                .unwrap_or_default();
917
918            let is_ctor = method.name == "__construct";
919            let mut ctx = Context::for_method(
920                &params,
921                return_ty,
922                Some(Arc::from(fqcn)),
923                parent_fqcn.clone(),
924                Some(Arc::from(fqcn)),
925                false,
926                is_ctor,
927            );
928
929            let mut buf = IssueBuffer::new();
930            let sm = php_ast::source_map::SourceMap::new(source);
931            let mut sa = StatementsAnalyzer::new(
932                &self.codebase,
933                file.clone(),
934                source,
935                &sm,
936                &mut buf,
937                all_symbols,
938            );
939            sa.analyze_stmts(body, &mut ctx);
940            let inferred = merge_return_types(&sa.return_types);
941            drop(sa);
942
943            // Capture TypeEnv for this method scope
944            type_envs.insert(
945                crate::type_env::ScopeId::Method {
946                    class: Arc::from(fqcn),
947                    method: Arc::from(method.name),
948                },
949                crate::type_env::TypeEnv::new(ctx.vars.clone()),
950            );
951
952            emit_unused_params(&params, &ctx, is_ctor, file, all_issues);
953            emit_unused_variables(&ctx, file, all_issues);
954            all_issues.extend(buf.into_issues());
955
956            if let Some(mut cls) = self.codebase.classes.get_mut(fqcn) {
957                if let Some(m) = cls.own_methods.get_mut(method.name) {
958                    m.inferred_return_type = Some(inferred);
959                }
960            }
961        }
962    }
963
964    /// Discover all `.php` files under a directory, recursively.
965    pub fn discover_files(root: &Path) -> Vec<PathBuf> {
966        if root.is_file() {
967            return vec![root.to_path_buf()];
968        }
969        let mut files = Vec::new();
970        collect_php_files(root, &mut files);
971        files
972    }
973
974    /// Pass 1 only: collect type definitions from `paths` into the codebase without
975    /// analyzing method bodies or emitting issues. Used to load vendor types.
976    pub fn collect_types_only(&self, paths: &[PathBuf]) {
977        let file_data: Vec<(Arc<str>, String)> = paths
978            .par_iter()
979            .filter_map(|path| {
980                std::fs::read_to_string(path)
981                    .ok()
982                    .map(|src| (Arc::from(path.to_string_lossy().as_ref()), src))
983            })
984            .collect();
985
986        for (file, src) in &file_data {
987            let arena = bumpalo::Bump::new();
988            let result = php_rs_parser::parse(&arena, src);
989            let collector = DefinitionCollector::new(&self.codebase, file.clone(), src);
990            // Ignore any issues emitted during vendor collection
991            let _ = collector.collect(&result.program);
992        }
993    }
994
995    /// Check type hints in enum methods for undefined classes.
996    fn analyze_enum_decl<'arena, 'src>(
997        &self,
998        decl: &php_ast::ast::EnumDecl<'arena, 'src>,
999        file: &Arc<str>,
1000        source: &str,
1001        all_issues: &mut Vec<mir_issues::Issue>,
1002    ) {
1003        use php_ast::ast::EnumMemberKind;
1004        for member in decl.members.iter() {
1005            let EnumMemberKind::Method(method) = &member.kind else {
1006                continue;
1007            };
1008            for param in method.params.iter() {
1009                if let Some(hint) = &param.type_hint {
1010                    check_type_hint_classes(hint, &self.codebase, file, source, all_issues);
1011                }
1012            }
1013            if let Some(hint) = &method.return_type {
1014                check_type_hint_classes(hint, &self.codebase, file, source, all_issues);
1015            }
1016        }
1017    }
1018}
1019
1020impl Default for ProjectAnalyzer {
1021    fn default() -> Self {
1022        Self::new()
1023    }
1024}
1025
1026// ---------------------------------------------------------------------------
1027// Type-hint class existence checker
1028// ---------------------------------------------------------------------------
1029
1030/// Walk a `TypeHint` AST node and emit `UndefinedClass` for any named class
1031/// that does not exist in the codebase.  Skips PHP built-in type keywords.
1032fn check_type_hint_classes<'arena, 'src>(
1033    hint: &php_ast::ast::TypeHint<'arena, 'src>,
1034    codebase: &Codebase,
1035    file: &Arc<str>,
1036    source: &str,
1037    issues: &mut Vec<mir_issues::Issue>,
1038) {
1039    use php_ast::ast::TypeHintKind;
1040    match &hint.kind {
1041        TypeHintKind::Named(name) => {
1042            let name_str = crate::parser::name_to_string(name);
1043            // Skip built-in pseudo-types that are not real classes.
1044            if is_pseudo_type(&name_str) {
1045                return;
1046            }
1047            let resolved = codebase.resolve_class_name(file.as_ref(), &name_str);
1048            if !codebase.type_exists(&resolved) {
1049                let sm = php_ast::source_map::SourceMap::new(source);
1050                let lc = sm.offset_to_line_col(hint.span.start);
1051                let (line, col) = (lc.line + 1, lc.col as u16);
1052                issues.push(
1053                    mir_issues::Issue::new(
1054                        mir_issues::IssueKind::UndefinedClass { name: resolved },
1055                        mir_issues::Location {
1056                            file: file.clone(),
1057                            line,
1058                            col_start: col,
1059                            col_end: col,
1060                        },
1061                    )
1062                    .with_snippet(crate::parser::span_text(source, hint.span).unwrap_or_default()),
1063                );
1064            }
1065        }
1066        TypeHintKind::Nullable(inner) => {
1067            check_type_hint_classes(inner, codebase, file, source, issues);
1068        }
1069        TypeHintKind::Union(parts) | TypeHintKind::Intersection(parts) => {
1070            for part in parts.iter() {
1071                check_type_hint_classes(part, codebase, file, source, issues);
1072            }
1073        }
1074        TypeHintKind::Keyword(_, _) => {} // built-in keyword, always valid
1075    }
1076}
1077
1078/// Returns true for names that are PHP pseudo-types / special identifiers, not
1079/// real classes.
1080fn is_pseudo_type(name: &str) -> bool {
1081    matches!(
1082        name.to_lowercase().as_str(),
1083        "self"
1084            | "static"
1085            | "parent"
1086            | "null"
1087            | "true"
1088            | "false"
1089            | "never"
1090            | "void"
1091            | "mixed"
1092            | "object"
1093            | "callable"
1094            | "iterable"
1095    )
1096}
1097
1098/// Emit `UnusedParam` issues for params that were never read in `ctx`.
1099/// Skips variadic params, `_`-prefixed names, and constructors.
1100fn emit_unused_params(
1101    params: &[mir_codebase::FnParam],
1102    ctx: &crate::context::Context,
1103    is_ctor: bool,
1104    file: &Arc<str>,
1105    issues: &mut Vec<mir_issues::Issue>,
1106) {
1107    if is_ctor {
1108        return;
1109    }
1110    for p in params {
1111        if p.is_variadic {
1112            continue;
1113        }
1114        let name = p.name.as_ref().trim_start_matches('$');
1115        if name.starts_with('_') {
1116            continue;
1117        }
1118        if !ctx.read_vars.contains(name) {
1119            issues.push(
1120                mir_issues::Issue::new(
1121                    mir_issues::IssueKind::UnusedParam {
1122                        name: name.to_string(),
1123                    },
1124                    mir_issues::Location {
1125                        file: file.clone(),
1126                        line: 1,
1127                        col_start: 0,
1128                        col_end: 0,
1129                    },
1130                )
1131                .with_snippet(format!("${}", name)),
1132            );
1133        }
1134    }
1135}
1136
1137fn emit_unused_variables(
1138    ctx: &crate::context::Context,
1139    file: &Arc<str>,
1140    issues: &mut Vec<mir_issues::Issue>,
1141) {
1142    // Superglobals are always "used" — skip them
1143    const SUPERGLOBALS: &[&str] = &[
1144        "_SERVER", "_GET", "_POST", "_REQUEST", "_SESSION", "_COOKIE", "_FILES", "_ENV", "GLOBALS",
1145    ];
1146    for name in &ctx.assigned_vars {
1147        if ctx.param_names.contains(name) {
1148            continue;
1149        }
1150        if SUPERGLOBALS.contains(&name.as_str()) {
1151            continue;
1152        }
1153        if name.starts_with('_') {
1154            continue;
1155        }
1156        if !ctx.read_vars.contains(name) {
1157            issues.push(mir_issues::Issue::new(
1158                mir_issues::IssueKind::UnusedVariable { name: name.clone() },
1159                mir_issues::Location {
1160                    file: file.clone(),
1161                    line: 1,
1162                    col_start: 0,
1163                    col_end: 0,
1164                },
1165            ));
1166        }
1167    }
1168}
1169
1170/// Merge a list of return types into a single `Union`.
1171/// Returns `void` if the list is empty.
1172pub fn merge_return_types(return_types: &[Union]) -> Union {
1173    if return_types.is_empty() {
1174        return Union::single(mir_types::Atomic::TVoid);
1175    }
1176    return_types
1177        .iter()
1178        .fold(Union::empty(), |acc, t| Union::merge(&acc, t))
1179}
1180
1181pub(crate) fn collect_php_files(dir: &Path, out: &mut Vec<PathBuf>) {
1182    if let Ok(entries) = std::fs::read_dir(dir) {
1183        for entry in entries.flatten() {
1184            // Skip symlinks — they can form cycles (e.g. .pnpm-store)
1185            if entry.file_type().map(|ft| ft.is_symlink()).unwrap_or(false) {
1186                continue;
1187            }
1188            let path = entry.path();
1189            if path.is_dir() {
1190                let name = path.file_name().and_then(|n| n.to_str()).unwrap_or("");
1191                if matches!(
1192                    name,
1193                    "vendor" | ".git" | "node_modules" | ".cache" | ".pnpm-store"
1194                ) {
1195                    continue;
1196                }
1197                collect_php_files(&path, out);
1198            } else if path.extension().and_then(|e| e.to_str()) == Some("php") {
1199                out.push(path);
1200            }
1201        }
1202    }
1203}
1204
1205// ---------------------------------------------------------------------------
1206// AnalysisResult
1207// ---------------------------------------------------------------------------
1208
1209// ---------------------------------------------------------------------------
1210// build_reverse_deps
1211// ---------------------------------------------------------------------------
1212
1213/// Build a reverse dependency graph from the codebase after Pass 1.
1214///
1215/// Returns a map: `defining_file → {files that depend on it}`.
1216///
1217/// Dependency edges captured (all derivable from Pass 1 data):
1218/// - `use` imports  (`file_imports`)
1219/// - `extends` / `implements` / trait `use` from `ClassStorage`
1220fn build_reverse_deps(codebase: &Codebase) -> HashMap<String, HashSet<String>> {
1221    let mut reverse: HashMap<String, HashSet<String>> = HashMap::new();
1222
1223    // Helper: record edge "defining_file → dependent_file"
1224    let mut add_edge = |symbol: &str, dependent_file: &str| {
1225        if let Some(defining_file) = codebase.symbol_to_file.get(symbol) {
1226            let def = defining_file.as_ref().to_string();
1227            if def != dependent_file {
1228                reverse
1229                    .entry(def)
1230                    .or_default()
1231                    .insert(dependent_file.to_string());
1232            }
1233        }
1234    };
1235
1236    // use-import edges
1237    for entry in codebase.file_imports.iter() {
1238        let file = entry.key().as_ref().to_string();
1239        for fqcn in entry.value().values() {
1240            add_edge(fqcn, &file);
1241        }
1242    }
1243
1244    // extends / implements / trait edges from ClassStorage
1245    for entry in codebase.classes.iter() {
1246        let defining = {
1247            let fqcn = entry.key().as_ref();
1248            codebase
1249                .symbol_to_file
1250                .get(fqcn)
1251                .map(|f| f.as_ref().to_string())
1252        };
1253        let Some(file) = defining else { continue };
1254
1255        let cls = entry.value();
1256        if let Some(ref parent) = cls.parent {
1257            add_edge(parent.as_ref(), &file);
1258        }
1259        for iface in &cls.interfaces {
1260            add_edge(iface.as_ref(), &file);
1261        }
1262        for tr in &cls.traits {
1263            add_edge(tr.as_ref(), &file);
1264        }
1265    }
1266
1267    reverse
1268}
1269
1270// ---------------------------------------------------------------------------
1271
1272pub struct AnalysisResult {
1273    pub issues: Vec<Issue>,
1274    pub type_envs: std::collections::HashMap<crate::type_env::ScopeId, crate::type_env::TypeEnv>,
1275    /// Per-expression resolved symbols from Pass 2.
1276    pub symbols: Vec<crate::symbol::ResolvedSymbol>,
1277}
1278
1279impl AnalysisResult {
1280    pub fn error_count(&self) -> usize {
1281        self.issues
1282            .iter()
1283            .filter(|i| i.severity == mir_issues::Severity::Error)
1284            .count()
1285    }
1286
1287    pub fn warning_count(&self) -> usize {
1288        self.issues
1289            .iter()
1290            .filter(|i| i.severity == mir_issues::Severity::Warning)
1291            .count()
1292    }
1293}