Skip to main content

mir_analyzer/
project.rs

1/// Project-level orchestration: file discovery, pass 1, pass 2.
2use std::path::{Path, PathBuf};
3use std::sync::Arc;
4
5use rayon::prelude::*;
6
7use std::collections::{HashMap, HashSet};
8
9use crate::cache::{hash_content, AnalysisCache};
10use mir_codebase::Codebase;
11use mir_issues::Issue;
12use mir_types::Union;
13
14use crate::collector::DefinitionCollector;
15
16// ---------------------------------------------------------------------------
17// ProjectAnalyzer
18// ---------------------------------------------------------------------------
19
20pub struct ProjectAnalyzer {
21    pub codebase: Arc<Codebase>,
22    /// Optional cache — when `Some`, Pass 2 results are read/written per file.
23    pub cache: Option<AnalysisCache>,
24    /// Called once after each file completes Pass 2 (used for progress reporting).
25    pub on_file_done: Option<Arc<dyn Fn() + Send + Sync>>,
26    /// PSR-4 autoloader mapping from composer.json, if available.
27    pub psr4: Option<Arc<crate::composer::Psr4Map>>,
28    /// Whether stubs have already been loaded (to avoid double-loading).
29    stubs_loaded: std::sync::atomic::AtomicBool,
30    /// When true, run dead code detection at the end of analysis.
31    pub find_dead_code: bool,
32}
33
34impl ProjectAnalyzer {
35    pub fn new() -> Self {
36        Self {
37            codebase: Arc::new(Codebase::new()),
38            cache: None,
39            on_file_done: None,
40            psr4: None,
41            stubs_loaded: std::sync::atomic::AtomicBool::new(false),
42            find_dead_code: false,
43        }
44    }
45
46    /// Create a `ProjectAnalyzer` with a disk-backed cache stored under `cache_dir`.
47    pub fn with_cache(cache_dir: &Path) -> Self {
48        Self {
49            codebase: Arc::new(Codebase::new()),
50            cache: Some(AnalysisCache::open(cache_dir)),
51            on_file_done: None,
52            psr4: None,
53            stubs_loaded: std::sync::atomic::AtomicBool::new(false),
54            find_dead_code: false,
55        }
56    }
57
58    /// Create a `ProjectAnalyzer` from a project root containing `composer.json`.
59    /// Returns the analyzer (with `psr4` set) and the `Psr4Map` so callers can
60    /// call `map.project_files()` / `map.vendor_files()`.
61    pub fn from_composer(
62        root: &Path,
63    ) -> Result<(Self, crate::composer::Psr4Map), crate::composer::ComposerError> {
64        let map = crate::composer::Psr4Map::from_composer(root)?;
65        let psr4 = Arc::new(map.clone());
66        let analyzer = Self {
67            codebase: Arc::new(Codebase::new()),
68            cache: None,
69            on_file_done: None,
70            psr4: Some(psr4),
71            stubs_loaded: std::sync::atomic::AtomicBool::new(false),
72            find_dead_code: false,
73        };
74        Ok((analyzer, map))
75    }
76
77    /// Expose codebase for external use (e.g., pre-loading stubs from CLI).
78    pub fn codebase(&self) -> &Arc<Codebase> {
79        &self.codebase
80    }
81
82    /// Load PHP built-in stubs. Called automatically by `analyze` if not done yet.
83    pub fn load_stubs(&self) {
84        if !self
85            .stubs_loaded
86            .swap(true, std::sync::atomic::Ordering::SeqCst)
87        {
88            crate::stubs::load_stubs(&self.codebase);
89        }
90    }
91
92    /// Run the full analysis pipeline on a set of file paths.
93    pub fn analyze(&self, paths: &[PathBuf]) -> AnalysisResult {
94        let mut all_issues = Vec::new();
95        let mut parse_errors = Vec::new();
96
97        // ---- Load PHP built-in stubs (before Pass 1 so user code can override)
98        self.load_stubs();
99
100        // ---- Pre-Pass-2 invalidation: evict dependents of changed files ------
101        // Uses the reverse dep graph persisted from the previous run.
102        if let Some(cache) = &self.cache {
103            let changed: Vec<String> = paths
104                .iter()
105                .filter_map(|p| {
106                    let path_str = p.to_string_lossy().into_owned();
107                    let content = std::fs::read_to_string(p).ok()?;
108                    let h = hash_content(&content);
109                    if cache.get(&path_str, &h).is_none() {
110                        Some(path_str)
111                    } else {
112                        None
113                    }
114                })
115                .collect();
116            if !changed.is_empty() {
117                cache.evict_with_dependents(&changed);
118            }
119        }
120
121        // ---- Pass 1: read files in parallel ----------------------------------
122        let file_data: Vec<(Arc<str>, String)> = paths
123            .par_iter()
124            .filter_map(|path| match std::fs::read_to_string(path) {
125                Ok(src) => Some((Arc::from(path.to_string_lossy().as_ref()), src)),
126                Err(e) => {
127                    eprintln!("Cannot read {}: {}", path.display(), e);
128                    None
129                }
130            })
131            .collect();
132
133        // ---- Pre-index pass: walk the AST to build FQCN index, file imports, and namespaces ---
134        file_data.par_iter().for_each(|(file, src)| {
135            use php_ast::ast::StmtKind;
136            let arena = bumpalo::Bump::new();
137            let result = php_rs_parser::parse(&arena, src);
138
139            let mut current_namespace: Option<String> = None;
140            let mut imports: std::collections::HashMap<String, String> =
141                std::collections::HashMap::new();
142            let mut file_ns_set = false;
143
144            // Index a flat list of stmts under a given namespace prefix.
145            let index_stmts =
146                |stmts: &[php_ast::ast::Stmt<'_, '_>],
147                 ns: Option<&str>,
148                 imports: &mut std::collections::HashMap<String, String>| {
149                    for stmt in stmts.iter() {
150                        match &stmt.kind {
151                            StmtKind::Use(use_decl) => {
152                                for item in use_decl.uses.iter() {
153                                    let full_name = crate::parser::name_to_string(&item.name);
154                                    let alias = item.alias.unwrap_or_else(|| {
155                                        full_name.rsplit('\\').next().unwrap_or(&full_name)
156                                    });
157                                    imports.insert(alias.to_string(), full_name);
158                                }
159                            }
160                            StmtKind::Class(decl) => {
161                                if let Some(n) = decl.name {
162                                    let fqcn = match ns {
163                                        Some(ns) => format!("{}\\{}", ns, n),
164                                        None => n.to_string(),
165                                    };
166                                    self.codebase.known_symbols.insert(Arc::from(fqcn.as_str()));
167                                }
168                            }
169                            StmtKind::Interface(decl) => {
170                                let fqcn = match ns {
171                                    Some(ns) => format!("{}\\{}", ns, decl.name),
172                                    None => decl.name.to_string(),
173                                };
174                                self.codebase.known_symbols.insert(Arc::from(fqcn.as_str()));
175                            }
176                            StmtKind::Trait(decl) => {
177                                let fqcn = match ns {
178                                    Some(ns) => format!("{}\\{}", ns, decl.name),
179                                    None => decl.name.to_string(),
180                                };
181                                self.codebase.known_symbols.insert(Arc::from(fqcn.as_str()));
182                            }
183                            StmtKind::Enum(decl) => {
184                                let fqcn = match ns {
185                                    Some(ns) => format!("{}\\{}", ns, decl.name),
186                                    None => decl.name.to_string(),
187                                };
188                                self.codebase.known_symbols.insert(Arc::from(fqcn.as_str()));
189                            }
190                            StmtKind::Function(decl) => {
191                                let fqn = match ns {
192                                    Some(ns) => format!("{}\\{}", ns, decl.name),
193                                    None => decl.name.to_string(),
194                                };
195                                self.codebase.known_symbols.insert(Arc::from(fqn.as_str()));
196                            }
197                            _ => {}
198                        }
199                    }
200                };
201
202            for stmt in result.program.stmts.iter() {
203                match &stmt.kind {
204                    StmtKind::Namespace(ns) => {
205                        current_namespace =
206                            ns.name.as_ref().map(|n| crate::parser::name_to_string(n));
207                        if !file_ns_set {
208                            if let Some(ref ns_str) = current_namespace {
209                                self.codebase
210                                    .file_namespaces
211                                    .insert(file.clone(), ns_str.clone());
212                                file_ns_set = true;
213                            }
214                        }
215                        // Bracketed namespace: walk inner stmts for Use/Class/etc.
216                        if let php_ast::ast::NamespaceBody::Braced(inner_stmts) = &ns.body {
217                            index_stmts(inner_stmts, current_namespace.as_deref(), &mut imports);
218                        }
219                    }
220                    _ => index_stmts(
221                        std::slice::from_ref(stmt),
222                        current_namespace.as_deref(),
223                        &mut imports,
224                    ),
225                }
226            }
227
228            if !imports.is_empty() {
229                self.codebase.file_imports.insert(file.clone(), imports);
230            }
231        });
232
233        // ---- Pass 1: definition collection (sequential) -------------------------
234        // DashMap handles concurrent writes, but sequential avoids contention.
235        for (file, src) in &file_data {
236            let arena = bumpalo::Bump::new();
237            let result = php_rs_parser::parse(&arena, src);
238
239            for err in &result.errors {
240                let msg: String = err.to_string();
241                parse_errors.push(Issue::new(
242                    mir_issues::IssueKind::ParseError { message: msg },
243                    mir_issues::Location {
244                        file: file.clone(),
245                        line: 1,
246                        col_start: 0,
247                        col_end: 0,
248                    },
249                ));
250            }
251
252            let collector =
253                DefinitionCollector::new(&self.codebase, file.clone(), src, &result.source_map);
254            let issues = collector.collect(&result.program);
255            all_issues.extend(issues);
256        }
257
258        all_issues.extend(parse_errors);
259
260        // ---- Finalize codebase (resolve inheritance, build dispatch tables) --
261        self.codebase.finalize();
262
263        // ---- Lazy-load unknown classes via PSR-4 (issue #50) ----------------
264        if let Some(psr4) = &self.psr4 {
265            self.lazy_load_missing_classes(psr4.clone(), &mut all_issues);
266        }
267
268        // ---- Build reverse dep graph and persist it for the next run ---------
269        if let Some(cache) = &self.cache {
270            let rev = build_reverse_deps(&self.codebase);
271            cache.set_reverse_deps(rev);
272        }
273
274        // ---- Class-level checks (M11) ----------------------------------------
275        let analyzed_file_set: std::collections::HashSet<std::sync::Arc<str>> =
276            file_data.iter().map(|(f, _)| f.clone()).collect();
277        let class_issues =
278            crate::class::ClassAnalyzer::with_files(&self.codebase, analyzed_file_set, &file_data)
279                .analyze_all();
280        all_issues.extend(class_issues);
281
282        // ---- Pass 2: analyze function/method bodies in parallel (M14) --------
283        // Each file is analyzed independently; arena + parse happen inside the
284        // rayon closure so there is no cross-thread borrow.
285        // When a cache is present, files whose content hash matches a stored
286        // entry skip re-analysis entirely (M17).
287        let pass2_results: Vec<(Vec<Issue>, Vec<crate::symbol::ResolvedSymbol>)> = file_data
288            .par_iter()
289            .map(|(file, src)| {
290                // Cache lookup
291                let result = if let Some(cache) = &self.cache {
292                    let h = hash_content(src);
293                    if let Some((cached_issues, ref_locs)) = cache.get(file, &h) {
294                        // Hit — replay reference locations so symbol_reference_locations
295                        // is populated without re-running analyze_bodies.
296                        self.codebase
297                            .replay_reference_locations(file.clone(), &ref_locs);
298                        (cached_issues, Vec::new())
299                    } else {
300                        // Miss — analyze and store
301                        let arena = bumpalo::Bump::new();
302                        let parsed = php_rs_parser::parse(&arena, src);
303                        let (issues, symbols) = self.analyze_bodies(
304                            &parsed.program,
305                            file.clone(),
306                            src,
307                            &parsed.source_map,
308                        );
309                        let ref_locs = extract_reference_locations(&self.codebase, file);
310                        cache.put(file, h, issues.clone(), ref_locs);
311                        (issues, symbols)
312                    }
313                } else {
314                    let arena = bumpalo::Bump::new();
315                    let parsed = php_rs_parser::parse(&arena, src);
316                    self.analyze_bodies(&parsed.program, file.clone(), src, &parsed.source_map)
317                };
318                if let Some(cb) = &self.on_file_done {
319                    cb();
320                }
321                result
322            })
323            .collect();
324
325        let mut all_symbols = Vec::new();
326        for (issues, symbols) in pass2_results {
327            all_issues.extend(issues);
328            all_symbols.extend(symbols);
329        }
330
331        // Persist cache hits/misses to disk
332        if let Some(cache) = &self.cache {
333            cache.flush();
334        }
335
336        // ---- Dead-code detection (M18) --------------------------------------
337        if self.find_dead_code {
338            let dead_code_issues =
339                crate::dead_code::DeadCodeAnalyzer::new(&self.codebase).analyze();
340            all_issues.extend(dead_code_issues);
341        }
342
343        AnalysisResult {
344            issues: all_issues,
345            type_envs: std::collections::HashMap::new(),
346            symbols: all_symbols,
347        }
348    }
349
350    /// Lazily load class definitions for referenced-but-unknown FQCNs via PSR-4.
351    ///
352    /// After Pass 1 and `codebase.finalize()`, some classes referenced as parents
353    /// or interfaces may not be in the codebase (they weren't in the initial file
354    /// list). This method iterates up to `max_depth` times, each time resolving
355    /// unknown parent/interface FQCNs via the PSR-4 map, running Pass 1 on those
356    /// files, and re-finalizing the codebase. The loop stops when no new files
357    /// are discovered.
358    fn lazy_load_missing_classes(
359        &self,
360        psr4: Arc<crate::composer::Psr4Map>,
361        all_issues: &mut Vec<Issue>,
362    ) {
363        use std::collections::HashSet;
364
365        let max_depth = 10; // prevent infinite chains
366        let mut loaded: HashSet<String> = HashSet::new();
367
368        for _ in 0..max_depth {
369            // Collect all referenced FQCNs that aren't in the codebase
370            let mut to_load: Vec<(String, PathBuf)> = Vec::new();
371
372            for entry in self.codebase.classes.iter() {
373                let cls = entry.value();
374
375                // Check parent class
376                if let Some(parent) = &cls.parent {
377                    let fqcn = parent.as_ref();
378                    if !self.codebase.classes.contains_key(fqcn) && !loaded.contains(fqcn) {
379                        if let Some(path) = psr4.resolve(fqcn) {
380                            to_load.push((fqcn.to_string(), path));
381                        }
382                    }
383                }
384
385                // Check interfaces
386                for iface in &cls.interfaces {
387                    let fqcn = iface.as_ref();
388                    if !self.codebase.classes.contains_key(fqcn)
389                        && !self.codebase.interfaces.contains_key(fqcn)
390                        && !loaded.contains(fqcn)
391                    {
392                        if let Some(path) = psr4.resolve(fqcn) {
393                            to_load.push((fqcn.to_string(), path));
394                        }
395                    }
396                }
397            }
398
399            if to_load.is_empty() {
400                break;
401            }
402
403            // Load each discovered file (Pass 1 only)
404            for (fqcn, path) in to_load {
405                loaded.insert(fqcn);
406                if let Ok(src) = std::fs::read_to_string(&path) {
407                    let file: Arc<str> = Arc::from(path.to_string_lossy().as_ref());
408                    let arena = bumpalo::Bump::new();
409                    let result = php_rs_parser::parse(&arena, &src);
410                    let collector = crate::collector::DefinitionCollector::new(
411                        &self.codebase,
412                        file,
413                        &src,
414                        &result.source_map,
415                    );
416                    let issues = collector.collect(&result.program);
417                    all_issues.extend(issues);
418                }
419            }
420
421            // Re-finalize to include newly loaded classes in the inheritance graph.
422            // Must reset the flag first so finalize() isn't a no-op.
423            self.codebase.invalidate_finalization();
424            self.codebase.finalize();
425        }
426    }
427
428    /// Re-analyze a single file within the existing codebase.
429    ///
430    /// This is the incremental analysis API for LSP:
431    /// 1. Removes old definitions from this file
432    /// 2. Re-runs Pass 1 (definition collection) on the new content
433    /// 3. Re-finalizes the codebase (rebuilds inheritance)
434    /// 4. Re-runs Pass 2 (body analysis) on this file
435    /// 5. Returns the analysis result for this file only
436    pub fn re_analyze_file(&self, file_path: &str, new_content: &str) -> AnalysisResult {
437        // 1. Remove old definitions from this file
438        self.codebase.remove_file_definitions(file_path);
439
440        // 2. Parse new content and run Pass 1
441        let file: Arc<str> = Arc::from(file_path);
442        let arena = bumpalo::Bump::new();
443        let parsed = php_rs_parser::parse(&arena, new_content);
444
445        let mut all_issues = Vec::new();
446
447        // Collect parse errors
448        for err in &parsed.errors {
449            all_issues.push(Issue::new(
450                mir_issues::IssueKind::ParseError {
451                    message: err.to_string(),
452                },
453                mir_issues::Location {
454                    file: file.clone(),
455                    line: 1,
456                    col_start: 0,
457                    col_end: 0,
458                },
459            ));
460        }
461
462        let collector = DefinitionCollector::new(
463            &self.codebase,
464            file.clone(),
465            new_content,
466            &parsed.source_map,
467        );
468        all_issues.extend(collector.collect(&parsed.program));
469
470        // 3. Re-finalize (invalidation already done by remove_file_definitions)
471        self.codebase.finalize();
472
473        // 4. Run Pass 2 on this file
474        let (body_issues, symbols) = self.analyze_bodies(
475            &parsed.program,
476            file.clone(),
477            new_content,
478            &parsed.source_map,
479        );
480        all_issues.extend(body_issues);
481
482        // 5. Update cache if present
483        if let Some(cache) = &self.cache {
484            let h = hash_content(new_content);
485            cache.evict_with_dependents(&[file_path.to_string()]);
486            let ref_locs = extract_reference_locations(&self.codebase, &file);
487            cache.put(file_path, h, all_issues.clone(), ref_locs);
488        }
489
490        AnalysisResult {
491            issues: all_issues,
492            type_envs: HashMap::new(),
493            symbols,
494        }
495    }
496
497    /// Analyze a PHP source string without a real file path.
498    /// Useful for tests and LSP single-file mode.
499    pub fn analyze_source(source: &str) -> AnalysisResult {
500        use crate::collector::DefinitionCollector;
501        let analyzer = ProjectAnalyzer::new();
502        analyzer.load_stubs();
503        let file: Arc<str> = Arc::from("<source>");
504        let arena = bumpalo::Bump::new();
505        let result = php_rs_parser::parse(&arena, source);
506        let mut all_issues = Vec::new();
507        let collector =
508            DefinitionCollector::new(&analyzer.codebase, file.clone(), source, &result.source_map);
509        all_issues.extend(collector.collect(&result.program));
510        analyzer.codebase.finalize();
511        let mut type_envs = std::collections::HashMap::new();
512        let mut all_symbols = Vec::new();
513        all_issues.extend(analyzer.analyze_bodies_typed(
514            &result.program,
515            file.clone(),
516            source,
517            &result.source_map,
518            &mut type_envs,
519            &mut all_symbols,
520        ));
521        AnalysisResult {
522            issues: all_issues,
523            type_envs,
524            symbols: all_symbols,
525        }
526    }
527
528    /// Pass 2: walk all function/method bodies in one file, return issues, and
529    /// write inferred return types back to the codebase.
530    fn analyze_bodies<'arena, 'src>(
531        &self,
532        program: &php_ast::ast::Program<'arena, 'src>,
533        file: Arc<str>,
534        source: &str,
535        source_map: &php_rs_parser::source_map::SourceMap,
536    ) -> (Vec<mir_issues::Issue>, Vec<crate::symbol::ResolvedSymbol>) {
537        use php_ast::ast::StmtKind;
538
539        let mut all_issues = Vec::new();
540        let mut all_symbols = Vec::new();
541
542        for stmt in program.stmts.iter() {
543            match &stmt.kind {
544                StmtKind::Function(decl) => {
545                    self.analyze_fn_decl(
546                        decl,
547                        &file,
548                        source,
549                        source_map,
550                        &mut all_issues,
551                        &mut all_symbols,
552                    );
553                }
554                StmtKind::Class(decl) => {
555                    self.analyze_class_decl(
556                        decl,
557                        &file,
558                        source,
559                        source_map,
560                        &mut all_issues,
561                        &mut all_symbols,
562                    );
563                }
564                StmtKind::Enum(decl) => {
565                    self.analyze_enum_decl(decl, &file, source, source_map, &mut all_issues);
566                }
567                StmtKind::Namespace(ns) => {
568                    if let php_ast::ast::NamespaceBody::Braced(stmts) = &ns.body {
569                        for inner in stmts.iter() {
570                            match &inner.kind {
571                                StmtKind::Function(decl) => {
572                                    self.analyze_fn_decl(
573                                        decl,
574                                        &file,
575                                        source,
576                                        source_map,
577                                        &mut all_issues,
578                                        &mut all_symbols,
579                                    );
580                                }
581                                StmtKind::Class(decl) => {
582                                    self.analyze_class_decl(
583                                        decl,
584                                        &file,
585                                        source,
586                                        source_map,
587                                        &mut all_issues,
588                                        &mut all_symbols,
589                                    );
590                                }
591                                StmtKind::Enum(decl) => {
592                                    self.analyze_enum_decl(
593                                        decl,
594                                        &file,
595                                        source,
596                                        source_map,
597                                        &mut all_issues,
598                                    );
599                                }
600                                _ => {}
601                            }
602                        }
603                    }
604                }
605                _ => {}
606            }
607        }
608
609        (all_issues, all_symbols)
610    }
611
612    /// Analyze a single function declaration body and collect issues + inferred return type.
613    #[allow(clippy::too_many_arguments)]
614    fn analyze_fn_decl<'arena, 'src>(
615        &self,
616        decl: &php_ast::ast::FunctionDecl<'arena, 'src>,
617        file: &Arc<str>,
618        source: &str,
619        source_map: &php_rs_parser::source_map::SourceMap,
620        all_issues: &mut Vec<mir_issues::Issue>,
621        all_symbols: &mut Vec<crate::symbol::ResolvedSymbol>,
622    ) {
623        let fn_name = decl.name;
624        let body = &decl.body;
625        // Check parameter and return type hints for undefined classes.
626        for param in decl.params.iter() {
627            if let Some(hint) = &param.type_hint {
628                check_type_hint_classes(hint, &self.codebase, file, source, source_map, all_issues);
629            }
630        }
631        if let Some(hint) = &decl.return_type {
632            check_type_hint_classes(hint, &self.codebase, file, source, source_map, all_issues);
633        }
634        use crate::context::Context;
635        use crate::stmt::StatementsAnalyzer;
636        use mir_issues::IssueBuffer;
637
638        // Resolve function name using the file's namespace (handles namespaced functions)
639        let resolved_fn = self.codebase.resolve_class_name(file.as_ref(), fn_name);
640        let func_opt: Option<mir_codebase::storage::FunctionStorage> = self
641            .codebase
642            .functions
643            .get(resolved_fn.as_str())
644            .map(|r| r.clone())
645            .or_else(|| self.codebase.functions.get(fn_name).map(|r| r.clone()))
646            .or_else(|| {
647                self.codebase
648                    .functions
649                    .iter()
650                    .find(|e| e.short_name.as_ref() == fn_name)
651                    .map(|e| e.value().clone())
652            });
653
654        let fqn = func_opt.as_ref().map(|f| f.fqn.clone());
655        // Always use the codebase entry when its params match the AST (same count + names).
656        // This covers the common case and preserves docblock-enriched types.
657        // When names differ (two files define the same unnamespaced function), fall back to
658        // the AST params so param variables are always in scope for this file's body.
659        let (params, return_ty): (Vec<mir_codebase::FnParam>, _) = match &func_opt {
660            Some(f)
661                if f.params.len() == decl.params.len()
662                    && f.params
663                        .iter()
664                        .zip(decl.params.iter())
665                        .all(|(cp, ap)| cp.name.as_ref() == ap.name) =>
666            {
667                (f.params.clone(), f.return_type.clone())
668            }
669            _ => {
670                let ast_params = decl
671                    .params
672                    .iter()
673                    .map(|p| mir_codebase::FnParam {
674                        name: Arc::from(p.name),
675                        ty: None,
676                        default: p.default.as_ref().map(|_| mir_types::Union::mixed()),
677                        is_variadic: p.variadic,
678                        is_byref: p.by_ref,
679                        is_optional: p.default.is_some() || p.variadic,
680                    })
681                    .collect();
682                (ast_params, None)
683            }
684        };
685
686        let mut ctx = Context::for_function(&params, return_ty, None, None, None, false, true);
687        let mut buf = IssueBuffer::new();
688        let mut sa = StatementsAnalyzer::new(
689            &self.codebase,
690            file.clone(),
691            source,
692            source_map,
693            &mut buf,
694            all_symbols,
695        );
696        sa.analyze_stmts(body, &mut ctx);
697        let inferred = merge_return_types(&sa.return_types);
698        drop(sa);
699
700        emit_unused_params(&params, &ctx, "", file, all_issues);
701        emit_unused_variables(&ctx, file, all_issues);
702        all_issues.extend(buf.into_issues());
703
704        if let Some(fqn) = fqn {
705            if let Some(mut func) = self.codebase.functions.get_mut(fqn.as_ref()) {
706                func.inferred_return_type = Some(inferred);
707            }
708        }
709    }
710
711    /// Analyze all method bodies on a class declaration and collect issues + inferred return types.
712    #[allow(clippy::too_many_arguments)]
713    fn analyze_class_decl<'arena, 'src>(
714        &self,
715        decl: &php_ast::ast::ClassDecl<'arena, 'src>,
716        file: &Arc<str>,
717        source: &str,
718        source_map: &php_rs_parser::source_map::SourceMap,
719        all_issues: &mut Vec<mir_issues::Issue>,
720        all_symbols: &mut Vec<crate::symbol::ResolvedSymbol>,
721    ) {
722        use crate::context::Context;
723        use crate::stmt::StatementsAnalyzer;
724        use mir_issues::IssueBuffer;
725
726        let class_name = decl.name.unwrap_or("<anonymous>");
727        // Resolve the FQCN using the file's namespace/imports — avoids ambiguity
728        // when multiple classes share the same short name across namespaces.
729        let resolved = self.codebase.resolve_class_name(file.as_ref(), class_name);
730        let fqcn: &str = &resolved;
731        let parent_fqcn = self
732            .codebase
733            .classes
734            .get(fqcn)
735            .and_then(|c| c.parent.clone());
736
737        for member in decl.members.iter() {
738            let php_ast::ast::ClassMemberKind::Method(method) = &member.kind else {
739                continue;
740            };
741
742            // Check parameter and return type hints for undefined classes (even abstract methods).
743            for param in method.params.iter() {
744                if let Some(hint) = &param.type_hint {
745                    check_type_hint_classes(
746                        hint,
747                        &self.codebase,
748                        file,
749                        source,
750                        source_map,
751                        all_issues,
752                    );
753                }
754            }
755            if let Some(hint) = &method.return_type {
756                check_type_hint_classes(hint, &self.codebase, file, source, source_map, all_issues);
757            }
758
759            let Some(body) = &method.body else { continue };
760
761            let method_storage = self.codebase.get_method(fqcn, method.name);
762            let (params, return_ty) = method_storage
763                .as_ref()
764                .map(|m| (m.params.clone(), m.return_type.clone()))
765                .unwrap_or_default();
766
767            let is_ctor = method.name == "__construct";
768            let mut ctx = Context::for_method(
769                &params,
770                return_ty,
771                Some(Arc::from(fqcn)),
772                parent_fqcn.clone(),
773                Some(Arc::from(fqcn)),
774                false,
775                is_ctor,
776                method.is_static,
777            );
778
779            let mut buf = IssueBuffer::new();
780            let mut sa = StatementsAnalyzer::new(
781                &self.codebase,
782                file.clone(),
783                source,
784                source_map,
785                &mut buf,
786                all_symbols,
787            );
788            sa.analyze_stmts(body, &mut ctx);
789            let inferred = merge_return_types(&sa.return_types);
790            drop(sa);
791
792            emit_unused_params(&params, &ctx, method.name, file, all_issues);
793            emit_unused_variables(&ctx, file, all_issues);
794            all_issues.extend(buf.into_issues());
795
796            if let Some(mut cls) = self.codebase.classes.get_mut(fqcn) {
797                if let Some(m) = cls.own_methods.get_mut(method.name) {
798                    m.inferred_return_type = Some(inferred);
799                }
800            }
801        }
802    }
803
804    /// Like `analyze_bodies` but also populates `type_envs` with per-scope type environments.
805    #[allow(clippy::too_many_arguments)]
806    fn analyze_bodies_typed<'arena, 'src>(
807        &self,
808        program: &php_ast::ast::Program<'arena, 'src>,
809        file: Arc<str>,
810        source: &str,
811        source_map: &php_rs_parser::source_map::SourceMap,
812        type_envs: &mut std::collections::HashMap<
813            crate::type_env::ScopeId,
814            crate::type_env::TypeEnv,
815        >,
816        all_symbols: &mut Vec<crate::symbol::ResolvedSymbol>,
817    ) -> Vec<mir_issues::Issue> {
818        use php_ast::ast::StmtKind;
819        let mut all_issues = Vec::new();
820        for stmt in program.stmts.iter() {
821            match &stmt.kind {
822                StmtKind::Function(decl) => {
823                    self.analyze_fn_decl_typed(
824                        decl,
825                        &file,
826                        source,
827                        source_map,
828                        &mut all_issues,
829                        type_envs,
830                        all_symbols,
831                    );
832                }
833                StmtKind::Class(decl) => {
834                    self.analyze_class_decl_typed(
835                        decl,
836                        &file,
837                        source,
838                        source_map,
839                        &mut all_issues,
840                        type_envs,
841                        all_symbols,
842                    );
843                }
844                StmtKind::Enum(decl) => {
845                    self.analyze_enum_decl(decl, &file, source, source_map, &mut all_issues);
846                }
847                StmtKind::Namespace(ns) => {
848                    if let php_ast::ast::NamespaceBody::Braced(stmts) = &ns.body {
849                        for inner in stmts.iter() {
850                            match &inner.kind {
851                                StmtKind::Function(decl) => {
852                                    self.analyze_fn_decl_typed(
853                                        decl,
854                                        &file,
855                                        source,
856                                        source_map,
857                                        &mut all_issues,
858                                        type_envs,
859                                        all_symbols,
860                                    );
861                                }
862                                StmtKind::Class(decl) => {
863                                    self.analyze_class_decl_typed(
864                                        decl,
865                                        &file,
866                                        source,
867                                        source_map,
868                                        &mut all_issues,
869                                        type_envs,
870                                        all_symbols,
871                                    );
872                                }
873                                StmtKind::Enum(decl) => {
874                                    self.analyze_enum_decl(
875                                        decl,
876                                        &file,
877                                        source,
878                                        source_map,
879                                        &mut all_issues,
880                                    );
881                                }
882                                _ => {}
883                            }
884                        }
885                    }
886                }
887                _ => {}
888            }
889        }
890        all_issues
891    }
892
893    /// Like `analyze_fn_decl` but also captures a `TypeEnv` for the function scope.
894    #[allow(clippy::too_many_arguments)]
895    fn analyze_fn_decl_typed<'arena, 'src>(
896        &self,
897        decl: &php_ast::ast::FunctionDecl<'arena, 'src>,
898        file: &Arc<str>,
899        source: &str,
900        source_map: &php_rs_parser::source_map::SourceMap,
901        all_issues: &mut Vec<mir_issues::Issue>,
902        type_envs: &mut std::collections::HashMap<
903            crate::type_env::ScopeId,
904            crate::type_env::TypeEnv,
905        >,
906        all_symbols: &mut Vec<crate::symbol::ResolvedSymbol>,
907    ) {
908        use crate::context::Context;
909        use crate::stmt::StatementsAnalyzer;
910        use mir_issues::IssueBuffer;
911
912        let fn_name = decl.name;
913        let body = &decl.body;
914
915        for param in decl.params.iter() {
916            if let Some(hint) = &param.type_hint {
917                check_type_hint_classes(hint, &self.codebase, file, source, source_map, all_issues);
918            }
919        }
920        if let Some(hint) = &decl.return_type {
921            check_type_hint_classes(hint, &self.codebase, file, source, source_map, all_issues);
922        }
923
924        let resolved_fn = self.codebase.resolve_class_name(file.as_ref(), fn_name);
925        let func_opt: Option<mir_codebase::storage::FunctionStorage> = self
926            .codebase
927            .functions
928            .get(resolved_fn.as_str())
929            .map(|r| r.clone())
930            .or_else(|| self.codebase.functions.get(fn_name).map(|r| r.clone()))
931            .or_else(|| {
932                self.codebase
933                    .functions
934                    .iter()
935                    .find(|e| e.short_name.as_ref() == fn_name)
936                    .map(|e| e.value().clone())
937            });
938
939        let fqn = func_opt.as_ref().map(|f| f.fqn.clone());
940        let (params, return_ty): (Vec<mir_codebase::FnParam>, _) = match &func_opt {
941            Some(f)
942                if f.params.len() == decl.params.len()
943                    && f.params
944                        .iter()
945                        .zip(decl.params.iter())
946                        .all(|(cp, ap)| cp.name.as_ref() == ap.name) =>
947            {
948                (f.params.clone(), f.return_type.clone())
949            }
950            _ => {
951                let ast_params = decl
952                    .params
953                    .iter()
954                    .map(|p| mir_codebase::FnParam {
955                        name: Arc::from(p.name),
956                        ty: None,
957                        default: p.default.as_ref().map(|_| mir_types::Union::mixed()),
958                        is_variadic: p.variadic,
959                        is_byref: p.by_ref,
960                        is_optional: p.default.is_some() || p.variadic,
961                    })
962                    .collect();
963                (ast_params, None)
964            }
965        };
966
967        let mut ctx = Context::for_function(&params, return_ty, None, None, None, false, true);
968        let mut buf = IssueBuffer::new();
969        let mut sa = StatementsAnalyzer::new(
970            &self.codebase,
971            file.clone(),
972            source,
973            source_map,
974            &mut buf,
975            all_symbols,
976        );
977        sa.analyze_stmts(body, &mut ctx);
978        let inferred = merge_return_types(&sa.return_types);
979        drop(sa);
980
981        // Capture TypeEnv for this scope
982        let scope_name = fqn.clone().unwrap_or_else(|| Arc::from(fn_name));
983        type_envs.insert(
984            crate::type_env::ScopeId::Function {
985                file: file.clone(),
986                name: scope_name,
987            },
988            crate::type_env::TypeEnv::new(ctx.vars.clone()),
989        );
990
991        emit_unused_params(&params, &ctx, "", file, all_issues);
992        emit_unused_variables(&ctx, file, all_issues);
993        all_issues.extend(buf.into_issues());
994
995        if let Some(fqn) = fqn {
996            if let Some(mut func) = self.codebase.functions.get_mut(fqn.as_ref()) {
997                func.inferred_return_type = Some(inferred);
998            }
999        }
1000    }
1001
1002    /// Like `analyze_class_decl` but also captures a `TypeEnv` per method scope.
1003    #[allow(clippy::too_many_arguments)]
1004    fn analyze_class_decl_typed<'arena, 'src>(
1005        &self,
1006        decl: &php_ast::ast::ClassDecl<'arena, 'src>,
1007        file: &Arc<str>,
1008        source: &str,
1009        source_map: &php_rs_parser::source_map::SourceMap,
1010        all_issues: &mut Vec<mir_issues::Issue>,
1011        type_envs: &mut std::collections::HashMap<
1012            crate::type_env::ScopeId,
1013            crate::type_env::TypeEnv,
1014        >,
1015        all_symbols: &mut Vec<crate::symbol::ResolvedSymbol>,
1016    ) {
1017        use crate::context::Context;
1018        use crate::stmt::StatementsAnalyzer;
1019        use mir_issues::IssueBuffer;
1020
1021        let class_name = decl.name.unwrap_or("<anonymous>");
1022        let resolved = self.codebase.resolve_class_name(file.as_ref(), class_name);
1023        let fqcn: &str = &resolved;
1024        let parent_fqcn = self
1025            .codebase
1026            .classes
1027            .get(fqcn)
1028            .and_then(|c| c.parent.clone());
1029
1030        for member in decl.members.iter() {
1031            let php_ast::ast::ClassMemberKind::Method(method) = &member.kind else {
1032                continue;
1033            };
1034
1035            for param in method.params.iter() {
1036                if let Some(hint) = &param.type_hint {
1037                    check_type_hint_classes(
1038                        hint,
1039                        &self.codebase,
1040                        file,
1041                        source,
1042                        source_map,
1043                        all_issues,
1044                    );
1045                }
1046            }
1047            if let Some(hint) = &method.return_type {
1048                check_type_hint_classes(hint, &self.codebase, file, source, source_map, all_issues);
1049            }
1050
1051            let Some(body) = &method.body else { continue };
1052
1053            let method_storage = self.codebase.get_method(fqcn, method.name);
1054            let (params, return_ty) = method_storage
1055                .as_ref()
1056                .map(|m| (m.params.clone(), m.return_type.clone()))
1057                .unwrap_or_default();
1058
1059            let is_ctor = method.name == "__construct";
1060            let mut ctx = Context::for_method(
1061                &params,
1062                return_ty,
1063                Some(Arc::from(fqcn)),
1064                parent_fqcn.clone(),
1065                Some(Arc::from(fqcn)),
1066                false,
1067                is_ctor,
1068                method.is_static,
1069            );
1070
1071            let mut buf = IssueBuffer::new();
1072            let mut sa = StatementsAnalyzer::new(
1073                &self.codebase,
1074                file.clone(),
1075                source,
1076                source_map,
1077                &mut buf,
1078                all_symbols,
1079            );
1080            sa.analyze_stmts(body, &mut ctx);
1081            let inferred = merge_return_types(&sa.return_types);
1082            drop(sa);
1083
1084            // Capture TypeEnv for this method scope
1085            type_envs.insert(
1086                crate::type_env::ScopeId::Method {
1087                    class: Arc::from(fqcn),
1088                    method: Arc::from(method.name),
1089                },
1090                crate::type_env::TypeEnv::new(ctx.vars.clone()),
1091            );
1092
1093            emit_unused_params(&params, &ctx, method.name, file, all_issues);
1094            emit_unused_variables(&ctx, file, all_issues);
1095            all_issues.extend(buf.into_issues());
1096
1097            if let Some(mut cls) = self.codebase.classes.get_mut(fqcn) {
1098                if let Some(m) = cls.own_methods.get_mut(method.name) {
1099                    m.inferred_return_type = Some(inferred);
1100                }
1101            }
1102        }
1103    }
1104
1105    /// Discover all `.php` files under a directory, recursively.
1106    pub fn discover_files(root: &Path) -> Vec<PathBuf> {
1107        if root.is_file() {
1108            return vec![root.to_path_buf()];
1109        }
1110        let mut files = Vec::new();
1111        collect_php_files(root, &mut files);
1112        files
1113    }
1114
1115    /// Pass 1 only: collect type definitions from `paths` into the codebase without
1116    /// analyzing method bodies or emitting issues. Used to load vendor types.
1117    pub fn collect_types_only(&self, paths: &[PathBuf]) {
1118        let file_data: Vec<(Arc<str>, String)> = paths
1119            .par_iter()
1120            .filter_map(|path| {
1121                std::fs::read_to_string(path)
1122                    .ok()
1123                    .map(|src| (Arc::from(path.to_string_lossy().as_ref()), src))
1124            })
1125            .collect();
1126
1127        for (file, src) in &file_data {
1128            let arena = bumpalo::Bump::new();
1129            let result = php_rs_parser::parse(&arena, src);
1130            let collector =
1131                DefinitionCollector::new(&self.codebase, file.clone(), src, &result.source_map);
1132            // Ignore any issues emitted during vendor collection
1133            let _ = collector.collect(&result.program);
1134        }
1135    }
1136
1137    /// Check type hints in enum methods for undefined classes.
1138    #[allow(clippy::too_many_arguments)]
1139    fn analyze_enum_decl<'arena, 'src>(
1140        &self,
1141        decl: &php_ast::ast::EnumDecl<'arena, 'src>,
1142        file: &Arc<str>,
1143        source: &str,
1144        source_map: &php_rs_parser::source_map::SourceMap,
1145        all_issues: &mut Vec<mir_issues::Issue>,
1146    ) {
1147        use php_ast::ast::EnumMemberKind;
1148        for member in decl.members.iter() {
1149            let EnumMemberKind::Method(method) = &member.kind else {
1150                continue;
1151            };
1152            for param in method.params.iter() {
1153                if let Some(hint) = &param.type_hint {
1154                    check_type_hint_classes(
1155                        hint,
1156                        &self.codebase,
1157                        file,
1158                        source,
1159                        source_map,
1160                        all_issues,
1161                    );
1162                }
1163            }
1164            if let Some(hint) = &method.return_type {
1165                check_type_hint_classes(hint, &self.codebase, file, source, source_map, all_issues);
1166            }
1167        }
1168    }
1169}
1170
1171impl Default for ProjectAnalyzer {
1172    fn default() -> Self {
1173        Self::new()
1174    }
1175}
1176
1177// ---------------------------------------------------------------------------
1178// UTF-16 offset conversion utility
1179// ---------------------------------------------------------------------------
1180
1181/// Convert a byte offset to a UTF-16 column on a given line.
1182/// Returns (line, col_utf16) where col is 0-based UTF-16 code unit count.
1183fn offset_to_line_col_utf16(
1184    source: &str,
1185    offset: u32,
1186    source_map: &php_rs_parser::source_map::SourceMap,
1187) -> (u32, u16) {
1188    let lc = source_map.offset_to_line_col(offset);
1189    let line = lc.line + 1;
1190
1191    // Find the start of the line containing this offset
1192    let byte_offset = offset as usize;
1193    let line_start_byte = if byte_offset == 0 {
1194        0
1195    } else {
1196        // Find the position after the last newline before this offset
1197        source[..byte_offset]
1198            .rfind('\n')
1199            .map(|p| p + 1)
1200            .unwrap_or(0)
1201    };
1202
1203    // Count UTF-16 code units from line start to the offset
1204    let col_utf16 = source[line_start_byte..byte_offset]
1205        .chars()
1206        .map(|c| c.len_utf16() as u16)
1207        .sum();
1208
1209    (line, col_utf16)
1210}
1211
1212// ---------------------------------------------------------------------------
1213// Type-hint class existence checker
1214// ---------------------------------------------------------------------------
1215
1216/// Walk a `TypeHint` AST node and emit `UndefinedClass` for any named class
1217/// that does not exist in the codebase.  Skips PHP built-in type keywords.
1218fn check_type_hint_classes<'arena, 'src>(
1219    hint: &php_ast::ast::TypeHint<'arena, 'src>,
1220    codebase: &Codebase,
1221    file: &Arc<str>,
1222    source: &str,
1223    source_map: &php_rs_parser::source_map::SourceMap,
1224    issues: &mut Vec<mir_issues::Issue>,
1225) {
1226    use php_ast::ast::TypeHintKind;
1227    match &hint.kind {
1228        TypeHintKind::Named(name) => {
1229            let name_str = crate::parser::name_to_string(name);
1230            // Skip built-in pseudo-types that are not real classes.
1231            if is_pseudo_type(&name_str) {
1232                return;
1233            }
1234            let resolved = codebase.resolve_class_name(file.as_ref(), &name_str);
1235            if !codebase.type_exists(&resolved) {
1236                let (line, col_start) =
1237                    offset_to_line_col_utf16(source, hint.span.start, source_map);
1238                let col_end = if hint.span.start < hint.span.end {
1239                    let (_end_line, end_col) =
1240                        offset_to_line_col_utf16(source, hint.span.end, source_map);
1241                    end_col
1242                } else {
1243                    col_start
1244                };
1245                issues.push(
1246                    mir_issues::Issue::new(
1247                        mir_issues::IssueKind::UndefinedClass { name: resolved },
1248                        mir_issues::Location {
1249                            file: file.clone(),
1250                            line,
1251                            col_start,
1252                            col_end: col_end.max(col_start + 1),
1253                        },
1254                    )
1255                    .with_snippet(crate::parser::span_text(source, hint.span).unwrap_or_default()),
1256                );
1257            }
1258        }
1259        TypeHintKind::Nullable(inner) => {
1260            check_type_hint_classes(inner, codebase, file, source, source_map, issues);
1261        }
1262        TypeHintKind::Union(parts) | TypeHintKind::Intersection(parts) => {
1263            for part in parts.iter() {
1264                check_type_hint_classes(part, codebase, file, source, source_map, issues);
1265            }
1266        }
1267        TypeHintKind::Keyword(_, _) => {} // built-in keyword, always valid
1268    }
1269}
1270
1271/// Returns true for names that are PHP pseudo-types / special identifiers, not
1272/// real classes.
1273fn is_pseudo_type(name: &str) -> bool {
1274    matches!(
1275        name.to_lowercase().as_str(),
1276        "self"
1277            | "static"
1278            | "parent"
1279            | "null"
1280            | "true"
1281            | "false"
1282            | "never"
1283            | "void"
1284            | "mixed"
1285            | "object"
1286            | "callable"
1287            | "iterable"
1288    )
1289}
1290
1291/// Magic methods whose parameters are passed by the PHP runtime, not user call sites.
1292const MAGIC_METHODS_WITH_RUNTIME_PARAMS: &[&str] = &[
1293    "__get",
1294    "__set",
1295    "__call",
1296    "__callStatic",
1297    "__isset",
1298    "__unset",
1299];
1300
1301/// Emit `UnusedParam` issues for params that were never read in `ctx`.
1302/// Skips magic methods whose parameters are passed by the PHP runtime.
1303fn emit_unused_params(
1304    params: &[mir_codebase::FnParam],
1305    ctx: &crate::context::Context,
1306    method_name: &str,
1307    file: &Arc<str>,
1308    issues: &mut Vec<mir_issues::Issue>,
1309) {
1310    if MAGIC_METHODS_WITH_RUNTIME_PARAMS.contains(&method_name) {
1311        return;
1312    }
1313    for p in params {
1314        let name = p.name.as_ref().trim_start_matches('$');
1315        if !ctx.read_vars.contains(name) {
1316            issues.push(
1317                mir_issues::Issue::new(
1318                    mir_issues::IssueKind::UnusedParam {
1319                        name: name.to_string(),
1320                    },
1321                    mir_issues::Location {
1322                        file: file.clone(),
1323                        line: 1,
1324                        col_start: 0,
1325                        col_end: 0,
1326                    },
1327                )
1328                .with_snippet(format!("${}", name)),
1329            );
1330        }
1331    }
1332}
1333
1334fn emit_unused_variables(
1335    ctx: &crate::context::Context,
1336    file: &Arc<str>,
1337    issues: &mut Vec<mir_issues::Issue>,
1338) {
1339    // Superglobals are always "used" — skip them
1340    const SUPERGLOBALS: &[&str] = &[
1341        "_SERVER", "_GET", "_POST", "_REQUEST", "_SESSION", "_COOKIE", "_FILES", "_ENV", "GLOBALS",
1342    ];
1343    for name in &ctx.assigned_vars {
1344        if ctx.param_names.contains(name) {
1345            continue;
1346        }
1347        if SUPERGLOBALS.contains(&name.as_str()) {
1348            continue;
1349        }
1350        // $this is implicitly used whenever the method accesses properties or
1351        // calls other methods — never report it as unused.
1352        if name == "this" {
1353            continue;
1354        }
1355        if name.starts_with('_') {
1356            continue;
1357        }
1358        if !ctx.read_vars.contains(name) {
1359            issues.push(mir_issues::Issue::new(
1360                mir_issues::IssueKind::UnusedVariable { name: name.clone() },
1361                mir_issues::Location {
1362                    file: file.clone(),
1363                    line: 1,
1364                    col_start: 0,
1365                    col_end: 0,
1366                },
1367            ));
1368        }
1369    }
1370}
1371
1372/// Merge a list of return types into a single `Union`.
1373/// Returns `void` if the list is empty.
1374pub fn merge_return_types(return_types: &[Union]) -> Union {
1375    if return_types.is_empty() {
1376        return Union::single(mir_types::Atomic::TVoid);
1377    }
1378    return_types
1379        .iter()
1380        .fold(Union::empty(), |acc, t| Union::merge(&acc, t))
1381}
1382
1383pub(crate) fn collect_php_files(dir: &Path, out: &mut Vec<PathBuf>) {
1384    if let Ok(entries) = std::fs::read_dir(dir) {
1385        for entry in entries.flatten() {
1386            // Skip symlinks — they can form cycles (e.g. .pnpm-store)
1387            if entry.file_type().map(|ft| ft.is_symlink()).unwrap_or(false) {
1388                continue;
1389            }
1390            let path = entry.path();
1391            if path.is_dir() {
1392                let name = path.file_name().and_then(|n| n.to_str()).unwrap_or("");
1393                if matches!(
1394                    name,
1395                    "vendor" | ".git" | "node_modules" | ".cache" | ".pnpm-store"
1396                ) {
1397                    continue;
1398                }
1399                collect_php_files(&path, out);
1400            } else if path.extension().and_then(|e| e.to_str()) == Some("php") {
1401                out.push(path);
1402            }
1403        }
1404    }
1405}
1406
1407// ---------------------------------------------------------------------------
1408// AnalysisResult
1409// ---------------------------------------------------------------------------
1410
1411// ---------------------------------------------------------------------------
1412// build_reverse_deps
1413// ---------------------------------------------------------------------------
1414
1415/// Build a reverse dependency graph from the codebase after Pass 1.
1416///
1417/// Returns a map: `defining_file → {files that depend on it}`.
1418///
1419/// Dependency edges captured (all derivable from Pass 1 data):
1420/// - `use` imports  (`file_imports`)
1421/// - `extends` / `implements` / trait `use` from `ClassStorage`
1422fn build_reverse_deps(codebase: &Codebase) -> HashMap<String, HashSet<String>> {
1423    let mut reverse: HashMap<String, HashSet<String>> = HashMap::new();
1424
1425    // Helper: record edge "defining_file → dependent_file"
1426    let mut add_edge = |symbol: &str, dependent_file: &str| {
1427        if let Some(defining_file) = codebase.symbol_to_file.get(symbol) {
1428            let def = defining_file.as_ref().to_string();
1429            if def != dependent_file {
1430                reverse
1431                    .entry(def)
1432                    .or_default()
1433                    .insert(dependent_file.to_string());
1434            }
1435        }
1436    };
1437
1438    // use-import edges
1439    for entry in codebase.file_imports.iter() {
1440        let file = entry.key().as_ref().to_string();
1441        for fqcn in entry.value().values() {
1442            add_edge(fqcn, &file);
1443        }
1444    }
1445
1446    // extends / implements / trait edges from ClassStorage
1447    for entry in codebase.classes.iter() {
1448        let defining = {
1449            let fqcn = entry.key().as_ref();
1450            codebase
1451                .symbol_to_file
1452                .get(fqcn)
1453                .map(|f| f.as_ref().to_string())
1454        };
1455        let Some(file) = defining else { continue };
1456
1457        let cls = entry.value();
1458        if let Some(ref parent) = cls.parent {
1459            add_edge(parent.as_ref(), &file);
1460        }
1461        for iface in &cls.interfaces {
1462            add_edge(iface.as_ref(), &file);
1463        }
1464        for tr in &cls.traits {
1465            add_edge(tr.as_ref(), &file);
1466        }
1467    }
1468
1469    reverse
1470}
1471
1472// ---------------------------------------------------------------------------
1473
1474/// Extract the reference locations recorded for `file` from the codebase into
1475/// a flat `Vec<(symbol_key, start, end)>` suitable for caching.
1476fn extract_reference_locations(codebase: &Codebase, file: &Arc<str>) -> Vec<(String, u32, u32)> {
1477    let Some(symbol_keys) = codebase.file_symbol_references.get(file.as_ref()) else {
1478        return Vec::new();
1479    };
1480    let mut out = Vec::new();
1481    for key in symbol_keys.iter() {
1482        let Some(by_file) = codebase.symbol_reference_locations.get(key.as_ref()) else {
1483            continue;
1484        };
1485        let Some(spans) = by_file.get(file.as_ref()) else {
1486            continue;
1487        };
1488        for &(s, e) in spans.iter() {
1489            out.push((key.to_string(), s, e));
1490        }
1491    }
1492    out
1493}
1494
1495// ---------------------------------------------------------------------------
1496
1497pub struct AnalysisResult {
1498    pub issues: Vec<Issue>,
1499    pub type_envs: std::collections::HashMap<crate::type_env::ScopeId, crate::type_env::TypeEnv>,
1500    /// Per-expression resolved symbols from Pass 2.
1501    pub symbols: Vec<crate::symbol::ResolvedSymbol>,
1502}
1503
1504impl AnalysisResult {
1505    pub fn error_count(&self) -> usize {
1506        self.issues
1507            .iter()
1508            .filter(|i| i.severity == mir_issues::Severity::Error)
1509            .count()
1510    }
1511
1512    pub fn warning_count(&self) -> usize {
1513        self.issues
1514            .iter()
1515            .filter(|i| i.severity == mir_issues::Severity::Warning)
1516            .count()
1517    }
1518
1519    /// Group issues by source file.
1520    ///
1521    /// Returns a map from absolute file path to the slice of issues that belong
1522    /// to that file. Useful for LSP `textDocument/publishDiagnostics`, which
1523    /// pushes diagnostics per document.
1524    pub fn issues_by_file(&self) -> HashMap<std::sync::Arc<str>, Vec<&Issue>> {
1525        let mut map: HashMap<std::sync::Arc<str>, Vec<&Issue>> = HashMap::new();
1526        for issue in &self.issues {
1527            map.entry(issue.location.file.clone())
1528                .or_default()
1529                .push(issue);
1530        }
1531        map
1532    }
1533
1534    /// Return the innermost resolved symbol whose span contains `byte_offset`
1535    /// in `file`, or `None` if no symbol was recorded at that position.
1536    ///
1537    /// When multiple symbols overlap (e.g. a method call whose span contains a
1538    /// property access span), the one with the smallest span is returned so the
1539    /// caller gets the most specific symbol at the cursor.
1540    ///
1541    /// Typical use: LSP `textDocument/references` and `textDocument/hover`.
1542    pub fn symbol_at(
1543        &self,
1544        file: &str,
1545        byte_offset: u32,
1546    ) -> Option<&crate::symbol::ResolvedSymbol> {
1547        self.symbols
1548            .iter()
1549            .filter(|s| {
1550                s.file.as_ref() == file && s.span.start <= byte_offset && byte_offset < s.span.end
1551            })
1552            .min_by_key(|s| s.span.end - s.span.start)
1553    }
1554}