Skip to main content

mir_analyzer/
project.rs

1/// Project-level orchestration: file discovery, pass 1, pass 2.
2use std::path::{Path, PathBuf};
3use std::sync::Arc;
4
5use rayon::prelude::*;
6
7use std::collections::{HashMap, HashSet};
8
9use crate::cache::{hash_content, AnalysisCache};
10use crate::pass2::Pass2Driver;
11use crate::php_version::PhpVersion;
12use mir_codebase::Codebase;
13use mir_issues::Issue;
14
15use crate::collector::DefinitionCollector;
16
17// Re-exports for downstream callers in this crate.
18pub use crate::pass2::merge_return_types;
19
20// ---------------------------------------------------------------------------
21// ProjectAnalyzer
22// ---------------------------------------------------------------------------
23
24pub struct ProjectAnalyzer {
25    pub codebase: Arc<Codebase>,
26    /// Optional cache — when `Some`, Pass 2 results are read/written per file.
27    pub cache: Option<AnalysisCache>,
28    /// Called once after each file completes Pass 2 (used for progress reporting).
29    pub on_file_done: Option<Arc<dyn Fn() + Send + Sync>>,
30    /// PSR-4 autoloader mapping from composer.json, if available.
31    pub psr4: Option<Arc<crate::composer::Psr4Map>>,
32    /// Whether stubs have already been loaded (to avoid double-loading).
33    stubs_loaded: std::sync::atomic::AtomicBool,
34    /// When true, run dead code detection at the end of analysis.
35    pub find_dead_code: bool,
36    /// Target PHP language version. `None` means "not configured"; resolved to
37    /// `PhpVersion::LATEST` when passed down to `StatementsAnalyzer`.
38    pub php_version: Option<PhpVersion>,
39    /// Additional stub files to parse before analysis (absolute paths).
40    pub stub_files: Vec<PathBuf>,
41    /// Additional stub directories to walk and parse before analysis (absolute paths).
42    pub stub_dirs: Vec<PathBuf>,
43}
44
45impl ProjectAnalyzer {
46    pub fn new() -> Self {
47        Self {
48            codebase: Arc::new(Codebase::new()),
49            cache: None,
50            on_file_done: None,
51            psr4: None,
52            stubs_loaded: std::sync::atomic::AtomicBool::new(false),
53            find_dead_code: false,
54            php_version: None,
55            stub_files: Vec::new(),
56            stub_dirs: Vec::new(),
57        }
58    }
59
60    /// Create a `ProjectAnalyzer` with a disk-backed cache stored under `cache_dir`.
61    pub fn with_cache(cache_dir: &Path) -> Self {
62        Self {
63            codebase: Arc::new(Codebase::new()),
64            cache: Some(AnalysisCache::open(cache_dir)),
65            on_file_done: None,
66            psr4: None,
67            stubs_loaded: std::sync::atomic::AtomicBool::new(false),
68            find_dead_code: false,
69            php_version: None,
70            stub_files: Vec::new(),
71            stub_dirs: Vec::new(),
72        }
73    }
74
75    /// Create a `ProjectAnalyzer` from a project root containing `composer.json`.
76    /// Returns the analyzer (with `psr4` set) and the `Psr4Map` so callers can
77    /// call `map.project_files()` / `map.vendor_files()`.
78    pub fn from_composer(
79        root: &Path,
80    ) -> Result<(Self, crate::composer::Psr4Map), crate::composer::ComposerError> {
81        let map = crate::composer::Psr4Map::from_composer(root)?;
82        let psr4 = Arc::new(map.clone());
83        let analyzer = Self {
84            codebase: Arc::new(Codebase::new()),
85            cache: None,
86            on_file_done: None,
87            psr4: Some(psr4),
88            stubs_loaded: std::sync::atomic::AtomicBool::new(false),
89            find_dead_code: false,
90            php_version: None,
91            stub_files: Vec::new(),
92            stub_dirs: Vec::new(),
93        };
94        Ok((analyzer, map))
95    }
96
97    /// Set the target PHP version.
98    pub fn with_php_version(mut self, version: PhpVersion) -> Self {
99        self.php_version = Some(version);
100        self
101    }
102
103    /// Resolve the configured PHP version, defaulting to `PhpVersion::LATEST`
104    /// when none has been set.
105    fn resolved_php_version(&self) -> PhpVersion {
106        self.php_version.unwrap_or(PhpVersion::LATEST)
107    }
108
109    /// Expose codebase for external use (e.g., pre-loading stubs from CLI).
110    pub fn codebase(&self) -> &Arc<Codebase> {
111        &self.codebase
112    }
113
114    /// Load PHP built-in stubs. Called automatically by `analyze` if not done yet.
115    /// Stubs are filtered against the configured target PHP version (or
116    /// `PhpVersion::LATEST` if none was set).
117    pub fn load_stubs(&self) {
118        if !self
119            .stubs_loaded
120            .swap(true, std::sync::atomic::Ordering::SeqCst)
121        {
122            crate::stubs::load_stubs_for_version(&self.codebase, self.resolved_php_version());
123            crate::stubs::load_user_stubs(&self.codebase, &self.stub_files, &self.stub_dirs);
124        }
125    }
126
127    /// Run the full analysis pipeline on a set of file paths.
128    pub fn analyze(&self, paths: &[PathBuf]) -> AnalysisResult {
129        let mut all_issues = Vec::new();
130        let mut parse_errors = Vec::new();
131
132        // ---- Load PHP built-in stubs (before Pass 1 so user code can override)
133        self.load_stubs();
134
135        // ---- Pass 1: read files in parallel ----------------------------------
136        let file_data: Vec<(Arc<str>, String)> = paths
137            .par_iter()
138            .filter_map(|path| match std::fs::read_to_string(path) {
139                Ok(src) => Some((Arc::from(path.to_string_lossy().as_ref()), src)),
140                Err(e) => {
141                    eprintln!("Cannot read {}: {}", path.display(), e);
142                    None
143                }
144            })
145            .collect();
146
147        // ---- Pre-Pass-2 invalidation: evict dependents of changed files ------
148        if let Some(cache) = &self.cache {
149            let changed: Vec<String> = file_data
150                .par_iter()
151                .filter_map(|(f, src)| {
152                    let h = hash_content(src);
153                    if cache.get(f, &h).is_none() {
154                        Some(f.to_string())
155                    } else {
156                        None
157                    }
158                })
159                .collect();
160            if !changed.is_empty() {
161                cache.evict_with_dependents(&changed);
162            }
163        }
164
165        // ---- Pass 1: combined pre-index + definition collection (parallel) -----
166        let pass1_results: Vec<(Vec<Issue>, Vec<Issue>)> = file_data
167            .par_iter()
168            .map(|(file, src)| {
169                use php_ast::ast::StmtKind;
170                let arena = bumpalo::Bump::new();
171                let result = php_rs_parser::parse(&arena, src);
172
173                // --- Pre-index: build FQCN index, file imports, and namespaces ---
174                let mut current_namespace: Option<String> = None;
175                let mut imports: std::collections::HashMap<String, String> =
176                    std::collections::HashMap::new();
177                let mut file_ns_set = false;
178
179                let index_stmts =
180                    |stmts: &[php_ast::ast::Stmt<'_, '_>],
181                     ns: Option<&str>,
182                     imports: &mut std::collections::HashMap<String, String>| {
183                        for stmt in stmts.iter() {
184                            match &stmt.kind {
185                                StmtKind::Use(use_decl) => {
186                                    for item in use_decl.uses.iter() {
187                                        let full_name = crate::parser::name_to_string(&item.name)
188                                            .trim_start_matches('\\')
189                                            .to_string();
190                                        let alias = item.alias.unwrap_or_else(|| {
191                                            full_name.rsplit('\\').next().unwrap_or(&full_name)
192                                        });
193                                        imports.insert(alias.to_string(), full_name);
194                                    }
195                                }
196                                StmtKind::Class(decl) => {
197                                    if let Some(n) = decl.name {
198                                        let fqcn = match ns {
199                                            Some(ns) => format!("{ns}\\{n}"),
200                                            None => n.to_string(),
201                                        };
202                                        self.codebase
203                                            .known_symbols
204                                            .insert(Arc::from(fqcn.as_str()));
205                                    }
206                                }
207                                StmtKind::Interface(decl) => {
208                                    let fqcn = match ns {
209                                        Some(ns) => format!("{}\\{}", ns, decl.name),
210                                        None => decl.name.to_string(),
211                                    };
212                                    self.codebase.known_symbols.insert(Arc::from(fqcn.as_str()));
213                                }
214                                StmtKind::Trait(decl) => {
215                                    let fqcn = match ns {
216                                        Some(ns) => format!("{}\\{}", ns, decl.name),
217                                        None => decl.name.to_string(),
218                                    };
219                                    self.codebase.known_symbols.insert(Arc::from(fqcn.as_str()));
220                                }
221                                StmtKind::Enum(decl) => {
222                                    let fqcn = match ns {
223                                        Some(ns) => format!("{}\\{}", ns, decl.name),
224                                        None => decl.name.to_string(),
225                                    };
226                                    self.codebase.known_symbols.insert(Arc::from(fqcn.as_str()));
227                                }
228                                StmtKind::Function(decl) => {
229                                    let fqn = match ns {
230                                        Some(ns) => format!("{}\\{}", ns, decl.name),
231                                        None => decl.name.to_string(),
232                                    };
233                                    self.codebase.known_symbols.insert(Arc::from(fqn.as_str()));
234                                }
235                                _ => {}
236                            }
237                        }
238                    };
239
240                for stmt in result.program.stmts.iter() {
241                    match &stmt.kind {
242                        StmtKind::Namespace(ns) => {
243                            current_namespace =
244                                ns.name.as_ref().map(|n| crate::parser::name_to_string(n));
245                            if !file_ns_set {
246                                if let Some(ref ns_str) = current_namespace {
247                                    self.codebase
248                                        .file_namespaces
249                                        .insert(file.clone(), ns_str.clone());
250                                    file_ns_set = true;
251                                }
252                            }
253                            if let php_ast::ast::NamespaceBody::Braced(inner_stmts) = &ns.body {
254                                index_stmts(
255                                    inner_stmts,
256                                    current_namespace.as_deref(),
257                                    &mut imports,
258                                );
259                            }
260                        }
261                        _ => index_stmts(
262                            std::slice::from_ref(stmt),
263                            current_namespace.as_deref(),
264                            &mut imports,
265                        ),
266                    }
267                }
268
269                if !imports.is_empty() {
270                    self.codebase.file_imports.insert(file.clone(), imports);
271                }
272
273                // --- Parse errors ---
274                let file_parse_errors: Vec<Issue> = result
275                    .errors
276                    .iter()
277                    .map(|err| {
278                        Issue::new(
279                            mir_issues::IssueKind::ParseError {
280                                message: err.to_string(),
281                            },
282                            mir_issues::Location {
283                                file: file.clone(),
284                                line: 1,
285                                line_end: 1,
286                                col_start: 0,
287                                col_end: 0,
288                            },
289                        )
290                    })
291                    .collect();
292
293                // --- Definition collection ---
294                let collector =
295                    DefinitionCollector::new(&self.codebase, file.clone(), src, &result.source_map);
296                let issues = collector.collect(&result.program);
297
298                (file_parse_errors, issues)
299            })
300            .collect();
301
302        let mut files_with_parse_errors: std::collections::HashSet<Arc<str>> =
303            std::collections::HashSet::new();
304        for (file_parse_errors, issues) in pass1_results {
305            for issue in &file_parse_errors {
306                files_with_parse_errors.insert(issue.location.file.clone());
307            }
308            parse_errors.extend(file_parse_errors);
309            all_issues.extend(issues);
310        }
311
312        all_issues.extend(parse_errors);
313
314        // ---- Lazy-load unknown classes via PSR-4 (issue #50) ----------------
315        if let Some(psr4) = &self.psr4 {
316            self.lazy_load_missing_classes(psr4.clone(), &mut all_issues);
317        }
318
319        // ---- Build reverse dep graph and persist it for the next run ---------
320        if let Some(cache) = &self.cache {
321            let rev = build_reverse_deps(&self.codebase);
322            cache.set_reverse_deps(rev);
323        }
324
325        // ---- Class-level checks (M11) ----------------------------------------
326        let analyzed_file_set: std::collections::HashSet<std::sync::Arc<str>> =
327            file_data.iter().map(|(f, _)| f.clone()).collect();
328        let class_issues =
329            crate::class::ClassAnalyzer::with_files(&self.codebase, analyzed_file_set, &file_data)
330                .analyze_all();
331        all_issues.extend(class_issues);
332
333        // ---- Pass 2 priming: populate inferred_return_type for all functions  --
334        // Run a first inference-only sweep so that cross-file inferred return
335        // types are available before the issue-emitting pass below (G6).
336        file_data
337            .par_iter()
338            .filter(|(file, _)| !files_with_parse_errors.contains(file))
339            .for_each(|(file, src)| {
340                let driver =
341                    Pass2Driver::new_inference_only(&self.codebase, self.resolved_php_version());
342                let arena = bumpalo::Bump::new();
343                let parsed = php_rs_parser::parse(&arena, src);
344                driver.analyze_bodies(&parsed.program, file.clone(), src, &parsed.source_map);
345            });
346
347        // ---- Pass 2: analyze function/method bodies in parallel (M14) --------
348        let pass2_results: Vec<(Vec<Issue>, Vec<crate::symbol::ResolvedSymbol>)> = file_data
349            .par_iter()
350            .filter(|(file, _)| !files_with_parse_errors.contains(file))
351            .map(|(file, src)| {
352                let driver = Pass2Driver::new(&self.codebase, self.resolved_php_version());
353                let result = if let Some(cache) = &self.cache {
354                    let h = hash_content(src);
355                    if let Some((cached_issues, ref_locs)) = cache.get(file, &h) {
356                        self.codebase
357                            .replay_reference_locations(file.clone(), &ref_locs);
358                        (cached_issues, Vec::new())
359                    } else {
360                        let arena = bumpalo::Bump::new();
361                        let parsed = php_rs_parser::parse(&arena, src);
362                        let (issues, symbols) = driver.analyze_bodies(
363                            &parsed.program,
364                            file.clone(),
365                            src,
366                            &parsed.source_map,
367                        );
368                        let ref_locs = extract_reference_locations(&self.codebase, file);
369                        cache.put(file, h, issues.clone(), ref_locs);
370                        (issues, symbols)
371                    }
372                } else {
373                    let arena = bumpalo::Bump::new();
374                    let parsed = php_rs_parser::parse(&arena, src);
375                    driver.analyze_bodies(&parsed.program, file.clone(), src, &parsed.source_map)
376                };
377                if let Some(cb) = &self.on_file_done {
378                    cb();
379                }
380                result
381            })
382            .collect();
383
384        let mut all_symbols = Vec::new();
385        for (issues, symbols) in pass2_results {
386            all_issues.extend(issues);
387            all_symbols.extend(symbols);
388        }
389
390        // Persist cache hits/misses to disk
391        if let Some(cache) = &self.cache {
392            cache.flush();
393        }
394
395        // ---- Compact the reference index ------------------------------------
396        self.codebase.compact_reference_index();
397
398        // ---- Dead-code detection (M18) --------------------------------------
399        if self.find_dead_code {
400            let dead_code_issues =
401                crate::dead_code::DeadCodeAnalyzer::new(&self.codebase).analyze();
402            all_issues.extend(dead_code_issues);
403        }
404
405        AnalysisResult::build(all_issues, std::collections::HashMap::new(), all_symbols)
406    }
407
408    fn lazy_load_missing_classes(
409        &self,
410        psr4: Arc<crate::composer::Psr4Map>,
411        all_issues: &mut Vec<Issue>,
412    ) {
413        use std::collections::HashSet;
414
415        let max_depth = 10;
416        let mut loaded: HashSet<String> = HashSet::new();
417
418        for _ in 0..max_depth {
419            let mut to_load: Vec<(String, PathBuf)> = Vec::new();
420
421            let mut try_queue = |fqcn: &str| {
422                if !self.codebase.type_exists(fqcn) && !loaded.contains(fqcn) {
423                    if let Some(path) = psr4.resolve(fqcn) {
424                        to_load.push((fqcn.to_string(), path));
425                    }
426                }
427            };
428
429            for entry in self.codebase.classes.iter() {
430                let cls = entry.value();
431                if let Some(parent) = &cls.parent {
432                    try_queue(parent.as_ref());
433                }
434                for iface in &cls.interfaces {
435                    try_queue(iface.as_ref());
436                }
437            }
438
439            for entry in self.codebase.interfaces.iter() {
440                for parent in &entry.value().extends {
441                    try_queue(parent.as_ref());
442                }
443            }
444
445            for entry in self.codebase.enums.iter() {
446                for iface in &entry.value().interfaces {
447                    try_queue(iface.as_ref());
448                }
449            }
450
451            for entry in self.codebase.traits.iter() {
452                for used in &entry.value().traits {
453                    try_queue(used.as_ref());
454                }
455            }
456
457            // Also lazy-load any type referenced via `use` imports that isn't yet
458            // in the codebase (covers enums and classes used only in type hints or
459            // static calls, which never appear in the inheritance scan above).
460            for entry in self.codebase.file_imports.iter() {
461                for fqcn in entry.value().values() {
462                    try_queue(fqcn.as_str());
463                }
464            }
465
466            if to_load.is_empty() {
467                break;
468            }
469
470            for (fqcn, path) in to_load {
471                loaded.insert(fqcn);
472                if let Ok(src) = std::fs::read_to_string(&path) {
473                    let file: Arc<str> = Arc::from(path.to_string_lossy().as_ref());
474                    let arena = bumpalo::Bump::new();
475                    let result = php_rs_parser::parse(&arena, &src);
476                    let collector = crate::collector::DefinitionCollector::new(
477                        &self.codebase,
478                        file,
479                        &src,
480                        &result.source_map,
481                    );
482                    let issues = collector.collect(&result.program);
483                    all_issues.extend(issues);
484                }
485            }
486
487            self.codebase.invalidate_finalization();
488            self.codebase.finalize();
489        }
490    }
491
492    /// Re-analyze a single file within the existing codebase.
493    ///
494    /// This is the incremental analysis API for LSP:
495    /// 1. Removes old definitions from this file
496    /// 2. Re-runs Pass 1 (definition collection) on the new content
497    /// 3. Re-finalizes the codebase (rebuilds inheritance)
498    /// 4. Re-runs Pass 2 (body analysis) on this file
499    /// 5. Returns the analysis result for this file only
500    pub fn re_analyze_file(&self, file_path: &str, new_content: &str) -> AnalysisResult {
501        // Fast path: content unchanged and cache has a valid entry — skip full re-analysis.
502        if let Some(cache) = &self.cache {
503            let h = hash_content(new_content);
504            if let Some((issues, ref_locs)) = cache.get(file_path, &h) {
505                let file: Arc<str> = Arc::from(file_path);
506                self.codebase.replay_reference_locations(file, &ref_locs);
507                return AnalysisResult::build(issues, HashMap::new(), Vec::new());
508            }
509        }
510
511        let structural_snapshot = self.codebase.file_structural_snapshot(file_path);
512        self.codebase.remove_file_definitions(file_path);
513
514        let file: Arc<str> = Arc::from(file_path);
515        let arena = bumpalo::Bump::new();
516        let parsed = php_rs_parser::parse(&arena, new_content);
517
518        let mut all_issues = Vec::new();
519
520        for err in &parsed.errors {
521            all_issues.push(Issue::new(
522                mir_issues::IssueKind::ParseError {
523                    message: err.to_string(),
524                },
525                mir_issues::Location {
526                    file: file.clone(),
527                    line: 1,
528                    line_end: 1,
529                    col_start: 0,
530                    col_end: 0,
531                },
532            ));
533        }
534
535        let collector = DefinitionCollector::new(
536            &self.codebase,
537            file.clone(),
538            new_content,
539            &parsed.source_map,
540        );
541        all_issues.extend(collector.collect(&parsed.program));
542
543        if self
544            .codebase
545            .structural_unchanged_after_pass1(file_path, &structural_snapshot)
546        {
547            self.codebase
548                .restore_all_parents(file_path, &structural_snapshot);
549        } else {
550            self.codebase.finalize();
551        }
552
553        let symbols = if parsed.errors.is_empty() {
554            let driver = Pass2Driver::new(&self.codebase, self.resolved_php_version());
555            let (body_issues, symbols) = driver.analyze_bodies(
556                &parsed.program,
557                file.clone(),
558                new_content,
559                &parsed.source_map,
560            );
561            all_issues.extend(body_issues);
562            symbols
563        } else {
564            Vec::new()
565        };
566
567        if let Some(cache) = &self.cache {
568            let h = hash_content(new_content);
569            cache.evict_with_dependents(&[file_path.to_string()]);
570            let ref_locs = extract_reference_locations(&self.codebase, &file);
571            cache.put(file_path, h, all_issues.clone(), ref_locs);
572        }
573
574        AnalysisResult::build(all_issues, HashMap::new(), symbols)
575    }
576
577    /// Analyze a PHP source string without a real file path.
578    /// Useful for tests and LSP single-file mode.
579    pub fn analyze_source(source: &str) -> AnalysisResult {
580        use crate::collector::DefinitionCollector;
581        let analyzer = ProjectAnalyzer::new();
582        analyzer.load_stubs();
583        let file: Arc<str> = Arc::from("<source>");
584        let arena = bumpalo::Bump::new();
585        let result = php_rs_parser::parse(&arena, source);
586        let mut all_issues = Vec::new();
587        for err in &result.errors {
588            all_issues.push(Issue::new(
589                mir_issues::IssueKind::ParseError {
590                    message: err.to_string(),
591                },
592                mir_issues::Location {
593                    file: file.clone(),
594                    line: 1,
595                    line_end: 1,
596                    col_start: 0,
597                    col_end: 0,
598                },
599            ));
600        }
601        if !result.errors.is_empty() {
602            return AnalysisResult::build(all_issues, std::collections::HashMap::new(), Vec::new());
603        }
604        let collector =
605            DefinitionCollector::new(&analyzer.codebase, file.clone(), source, &result.source_map);
606        all_issues.extend(collector.collect(&result.program));
607        analyzer.codebase.finalize();
608        let mut type_envs = std::collections::HashMap::new();
609        let mut all_symbols = Vec::new();
610        let driver = Pass2Driver::new(&analyzer.codebase, analyzer.resolved_php_version());
611        all_issues.extend(driver.analyze_bodies_typed(
612            &result.program,
613            file.clone(),
614            source,
615            &result.source_map,
616            &mut type_envs,
617            &mut all_symbols,
618        ));
619        AnalysisResult::build(all_issues, type_envs, all_symbols)
620    }
621
622    /// Discover all `.php` files under a directory, recursively.
623    pub fn discover_files(root: &Path) -> Vec<PathBuf> {
624        if root.is_file() {
625            return vec![root.to_path_buf()];
626        }
627        let mut files = Vec::new();
628        collect_php_files(root, &mut files);
629        files
630    }
631
632    /// Pass 1 only: collect type definitions from `paths` into the codebase without
633    /// analyzing method bodies or emitting issues. Used to load vendor types.
634    pub fn collect_types_only(&self, paths: &[PathBuf]) {
635        paths.par_iter().for_each(|path| {
636            let Ok(src) = std::fs::read_to_string(path) else {
637                return;
638            };
639            let file: Arc<str> = Arc::from(path.to_string_lossy().as_ref());
640            let arena = bumpalo::Bump::new();
641            let result = php_rs_parser::parse(&arena, &src);
642            let collector =
643                DefinitionCollector::new(&self.codebase, file, &src, &result.source_map);
644            let _ = collector.collect(&result.program);
645        });
646    }
647}
648
649impl Default for ProjectAnalyzer {
650    fn default() -> Self {
651        Self::new()
652    }
653}
654
655// ---------------------------------------------------------------------------
656
657pub(crate) fn collect_php_files(dir: &Path, out: &mut Vec<PathBuf>) {
658    if let Ok(entries) = std::fs::read_dir(dir) {
659        for entry in entries.flatten() {
660            if entry.file_type().map(|ft| ft.is_symlink()).unwrap_or(false) {
661                continue;
662            }
663            let path = entry.path();
664            if path.is_dir() {
665                let name = path.file_name().and_then(|n| n.to_str()).unwrap_or("");
666                if matches!(
667                    name,
668                    "vendor" | ".git" | "node_modules" | ".cache" | ".pnpm-store"
669                ) {
670                    continue;
671                }
672                collect_php_files(&path, out);
673            } else if path.extension().and_then(|e| e.to_str()) == Some("php") {
674                out.push(path);
675            }
676        }
677    }
678}
679
680// ---------------------------------------------------------------------------
681// build_reverse_deps
682// ---------------------------------------------------------------------------
683
684fn build_reverse_deps(codebase: &Codebase) -> HashMap<String, HashSet<String>> {
685    let mut reverse: HashMap<String, HashSet<String>> = HashMap::new();
686
687    let mut add_edge = |symbol: &str, dependent_file: &str| {
688        if let Some(defining_file) = codebase.symbol_to_file.get(symbol) {
689            let def = defining_file.as_ref().to_string();
690            if def != dependent_file {
691                reverse
692                    .entry(def)
693                    .or_default()
694                    .insert(dependent_file.to_string());
695            }
696        }
697    };
698
699    for entry in codebase.file_imports.iter() {
700        let file = entry.key().as_ref().to_string();
701        for fqcn in entry.value().values() {
702            add_edge(fqcn, &file);
703        }
704    }
705
706    for entry in codebase.classes.iter() {
707        let defining = {
708            let fqcn = entry.key().as_ref();
709            codebase
710                .symbol_to_file
711                .get(fqcn)
712                .map(|f| f.as_ref().to_string())
713        };
714        let Some(file) = defining else { continue };
715
716        let cls = entry.value();
717        if let Some(ref parent) = cls.parent {
718            add_edge(parent.as_ref(), &file);
719        }
720        for iface in &cls.interfaces {
721            add_edge(iface.as_ref(), &file);
722        }
723        for tr in &cls.traits {
724            add_edge(tr.as_ref(), &file);
725        }
726    }
727
728    reverse
729}
730
731// ---------------------------------------------------------------------------
732
733fn extract_reference_locations(
734    codebase: &Codebase,
735    file: &Arc<str>,
736) -> Vec<(String, u32, u16, u16)> {
737    codebase
738        .extract_file_reference_locations(file.as_ref())
739        .into_iter()
740        .map(|(sym, line, col_start, col_end)| (sym.to_string(), line, col_start, col_end))
741        .collect()
742}
743
744// ---------------------------------------------------------------------------
745// AnalysisResult
746// ---------------------------------------------------------------------------
747
748pub struct AnalysisResult {
749    pub issues: Vec<Issue>,
750    pub type_envs: std::collections::HashMap<crate::type_env::ScopeId, crate::type_env::TypeEnv>,
751    /// Per-expression resolved symbols from Pass 2, sorted by file path.
752    pub symbols: Vec<crate::symbol::ResolvedSymbol>,
753    /// Maps each file path to the contiguous range within `symbols` that belongs
754    /// to it. Built once after analysis; allows `symbol_at` to scan only the
755    /// relevant file's slice rather than the entire codebase-wide vector.
756    symbols_by_file: HashMap<Arc<str>, std::ops::Range<usize>>,
757}
758
759impl AnalysisResult {
760    fn build(
761        issues: Vec<Issue>,
762        type_envs: std::collections::HashMap<crate::type_env::ScopeId, crate::type_env::TypeEnv>,
763        mut symbols: Vec<crate::symbol::ResolvedSymbol>,
764    ) -> Self {
765        symbols.sort_unstable_by(|a, b| a.file.as_ref().cmp(b.file.as_ref()));
766        let mut symbols_by_file: HashMap<Arc<str>, std::ops::Range<usize>> = HashMap::new();
767        let mut i = 0;
768        while i < symbols.len() {
769            let file = Arc::clone(&symbols[i].file);
770            let start = i;
771            while i < symbols.len() && symbols[i].file == file {
772                i += 1;
773            }
774            symbols_by_file.insert(file, start..i);
775        }
776        Self {
777            issues,
778            type_envs,
779            symbols,
780            symbols_by_file,
781        }
782    }
783}
784
785impl AnalysisResult {
786    pub fn error_count(&self) -> usize {
787        self.issues
788            .iter()
789            .filter(|i| i.severity == mir_issues::Severity::Error)
790            .count()
791    }
792
793    pub fn warning_count(&self) -> usize {
794        self.issues
795            .iter()
796            .filter(|i| i.severity == mir_issues::Severity::Warning)
797            .count()
798    }
799
800    /// Group issues by source file.
801    pub fn issues_by_file(&self) -> HashMap<std::sync::Arc<str>, Vec<&Issue>> {
802        let mut map: HashMap<std::sync::Arc<str>, Vec<&Issue>> = HashMap::new();
803        for issue in &self.issues {
804            map.entry(issue.location.file.clone())
805                .or_default()
806                .push(issue);
807        }
808        map
809    }
810
811    /// Return the innermost resolved symbol whose span contains `byte_offset`
812    /// in `file`, or `None` if no symbol was recorded at that position.
813    pub fn symbol_at(
814        &self,
815        file: &str,
816        byte_offset: u32,
817    ) -> Option<&crate::symbol::ResolvedSymbol> {
818        let range = self.symbols_by_file.get(file)?;
819        self.symbols[range.clone()]
820            .iter()
821            .filter(|s| s.span.start <= byte_offset && byte_offset < s.span.end)
822            .min_by_key(|s| s.span.end - s.span.start)
823    }
824}