1use std::path::{Path, PathBuf};
3use std::sync::Arc;
4
5use rayon::prelude::*;
6
7use std::collections::{HashMap, HashSet};
8
9use crate::cache::{hash_content, AnalysisCache};
10use crate::pass2::Pass2Driver;
11use crate::php_version::PhpVersion;
12use mir_codebase::Codebase;
13use mir_issues::Issue;
14
15use crate::collector::DefinitionCollector;
16
17pub use crate::pass2::merge_return_types;
19
20pub struct ProjectAnalyzer {
25 pub codebase: Arc<Codebase>,
26 pub cache: Option<AnalysisCache>,
28 pub on_file_done: Option<Arc<dyn Fn() + Send + Sync>>,
30 pub psr4: Option<Arc<crate::composer::Psr4Map>>,
32 stubs_loaded: std::sync::atomic::AtomicBool,
34 pub find_dead_code: bool,
36 pub php_version: Option<PhpVersion>,
39 pub stub_files: Vec<PathBuf>,
41 pub stub_dirs: Vec<PathBuf>,
43}
44
45impl ProjectAnalyzer {
46 pub fn new() -> Self {
47 Self {
48 codebase: Arc::new(Codebase::new()),
49 cache: None,
50 on_file_done: None,
51 psr4: None,
52 stubs_loaded: std::sync::atomic::AtomicBool::new(false),
53 find_dead_code: false,
54 php_version: None,
55 stub_files: Vec::new(),
56 stub_dirs: Vec::new(),
57 }
58 }
59
60 pub fn with_cache(cache_dir: &Path) -> Self {
62 Self {
63 codebase: Arc::new(Codebase::new()),
64 cache: Some(AnalysisCache::open(cache_dir)),
65 on_file_done: None,
66 psr4: None,
67 stubs_loaded: std::sync::atomic::AtomicBool::new(false),
68 find_dead_code: false,
69 php_version: None,
70 stub_files: Vec::new(),
71 stub_dirs: Vec::new(),
72 }
73 }
74
75 pub fn from_composer(
79 root: &Path,
80 ) -> Result<(Self, crate::composer::Psr4Map), crate::composer::ComposerError> {
81 let map = crate::composer::Psr4Map::from_composer(root)?;
82 let psr4 = Arc::new(map.clone());
83 let analyzer = Self {
84 codebase: Arc::new(Codebase::new()),
85 cache: None,
86 on_file_done: None,
87 psr4: Some(psr4),
88 stubs_loaded: std::sync::atomic::AtomicBool::new(false),
89 find_dead_code: false,
90 php_version: None,
91 stub_files: Vec::new(),
92 stub_dirs: Vec::new(),
93 };
94 Ok((analyzer, map))
95 }
96
97 pub fn with_php_version(mut self, version: PhpVersion) -> Self {
99 self.php_version = Some(version);
100 self
101 }
102
103 fn resolved_php_version(&self) -> PhpVersion {
106 self.php_version.unwrap_or(PhpVersion::LATEST)
107 }
108
109 pub fn codebase(&self) -> &Arc<Codebase> {
111 &self.codebase
112 }
113
114 pub fn load_stubs(&self) {
118 if !self
119 .stubs_loaded
120 .swap(true, std::sync::atomic::Ordering::SeqCst)
121 {
122 crate::stubs::load_stubs_for_version(&self.codebase, self.resolved_php_version());
123 crate::stubs::load_user_stubs(&self.codebase, &self.stub_files, &self.stub_dirs);
124 }
125 }
126
127 pub fn analyze(&self, paths: &[PathBuf]) -> AnalysisResult {
129 let mut all_issues = Vec::new();
130 let mut parse_errors = Vec::new();
131
132 self.load_stubs();
134
135 let file_data: Vec<(Arc<str>, String)> = paths
137 .par_iter()
138 .filter_map(|path| match std::fs::read_to_string(path) {
139 Ok(src) => Some((Arc::from(path.to_string_lossy().as_ref()), src)),
140 Err(e) => {
141 eprintln!("Cannot read {}: {}", path.display(), e);
142 None
143 }
144 })
145 .collect();
146
147 if let Some(cache) = &self.cache {
149 let changed: Vec<String> = file_data
150 .par_iter()
151 .filter_map(|(f, src)| {
152 let h = hash_content(src);
153 if cache.get(f, &h).is_none() {
154 Some(f.to_string())
155 } else {
156 None
157 }
158 })
159 .collect();
160 if !changed.is_empty() {
161 cache.evict_with_dependents(&changed);
162 }
163 }
164
165 let pass1_results: Vec<(Vec<Issue>, Vec<Issue>)> = file_data
167 .par_iter()
168 .map(|(file, src)| {
169 use php_ast::ast::StmtKind;
170 let arena = bumpalo::Bump::new();
171 let result = php_rs_parser::parse(&arena, src);
172
173 let mut current_namespace: Option<String> = None;
175 let mut imports: std::collections::HashMap<String, String> =
176 std::collections::HashMap::new();
177 let mut file_ns_set = false;
178
179 let index_stmts =
180 |stmts: &[php_ast::ast::Stmt<'_, '_>],
181 ns: Option<&str>,
182 imports: &mut std::collections::HashMap<String, String>| {
183 for stmt in stmts.iter() {
184 match &stmt.kind {
185 StmtKind::Use(use_decl) => {
186 for item in use_decl.uses.iter() {
187 let full_name = crate::parser::name_to_string(&item.name)
188 .trim_start_matches('\\')
189 .to_string();
190 let alias = item.alias.unwrap_or_else(|| {
191 full_name.rsplit('\\').next().unwrap_or(&full_name)
192 });
193 imports.insert(alias.to_string(), full_name);
194 }
195 }
196 StmtKind::Class(decl) => {
197 if let Some(n) = decl.name {
198 let fqcn = match ns {
199 Some(ns) => format!("{ns}\\{n}"),
200 None => n.to_string(),
201 };
202 self.codebase
203 .known_symbols
204 .insert(Arc::from(fqcn.as_str()));
205 }
206 }
207 StmtKind::Interface(decl) => {
208 let fqcn = match ns {
209 Some(ns) => format!("{}\\{}", ns, decl.name),
210 None => decl.name.to_string(),
211 };
212 self.codebase.known_symbols.insert(Arc::from(fqcn.as_str()));
213 }
214 StmtKind::Trait(decl) => {
215 let fqcn = match ns {
216 Some(ns) => format!("{}\\{}", ns, decl.name),
217 None => decl.name.to_string(),
218 };
219 self.codebase.known_symbols.insert(Arc::from(fqcn.as_str()));
220 }
221 StmtKind::Enum(decl) => {
222 let fqcn = match ns {
223 Some(ns) => format!("{}\\{}", ns, decl.name),
224 None => decl.name.to_string(),
225 };
226 self.codebase.known_symbols.insert(Arc::from(fqcn.as_str()));
227 }
228 StmtKind::Function(decl) => {
229 let fqn = match ns {
230 Some(ns) => format!("{}\\{}", ns, decl.name),
231 None => decl.name.to_string(),
232 };
233 self.codebase.known_symbols.insert(Arc::from(fqn.as_str()));
234 }
235 _ => {}
236 }
237 }
238 };
239
240 for stmt in result.program.stmts.iter() {
241 match &stmt.kind {
242 StmtKind::Namespace(ns) => {
243 current_namespace =
244 ns.name.as_ref().map(|n| crate::parser::name_to_string(n));
245 if !file_ns_set {
246 if let Some(ref ns_str) = current_namespace {
247 self.codebase
248 .file_namespaces
249 .insert(file.clone(), ns_str.clone());
250 file_ns_set = true;
251 }
252 }
253 if let php_ast::ast::NamespaceBody::Braced(inner_stmts) = &ns.body {
254 index_stmts(
255 inner_stmts,
256 current_namespace.as_deref(),
257 &mut imports,
258 );
259 }
260 }
261 _ => index_stmts(
262 std::slice::from_ref(stmt),
263 current_namespace.as_deref(),
264 &mut imports,
265 ),
266 }
267 }
268
269 if !imports.is_empty() {
270 self.codebase.file_imports.insert(file.clone(), imports);
271 }
272
273 let file_parse_errors: Vec<Issue> = result
275 .errors
276 .iter()
277 .map(|err| {
278 Issue::new(
279 mir_issues::IssueKind::ParseError {
280 message: err.to_string(),
281 },
282 mir_issues::Location {
283 file: file.clone(),
284 line: 1,
285 line_end: 1,
286 col_start: 0,
287 col_end: 0,
288 },
289 )
290 })
291 .collect();
292
293 let collector =
295 DefinitionCollector::new(&self.codebase, file.clone(), src, &result.source_map);
296 let issues = collector.collect(&result.program);
297
298 (file_parse_errors, issues)
299 })
300 .collect();
301
302 let mut files_with_parse_errors: std::collections::HashSet<Arc<str>> =
303 std::collections::HashSet::new();
304 for (file_parse_errors, issues) in pass1_results {
305 for issue in &file_parse_errors {
306 files_with_parse_errors.insert(issue.location.file.clone());
307 }
308 parse_errors.extend(file_parse_errors);
309 all_issues.extend(issues);
310 }
311
312 all_issues.extend(parse_errors);
313
314 self.codebase.finalize();
316
317 if let Some(psr4) = &self.psr4 {
319 self.lazy_load_missing_classes(psr4.clone(), &mut all_issues);
320 }
321
322 if let Some(cache) = &self.cache {
324 let rev = build_reverse_deps(&self.codebase);
325 cache.set_reverse_deps(rev);
326 }
327
328 let analyzed_file_set: std::collections::HashSet<std::sync::Arc<str>> =
330 file_data.iter().map(|(f, _)| f.clone()).collect();
331 let class_issues =
332 crate::class::ClassAnalyzer::with_files(&self.codebase, analyzed_file_set, &file_data)
333 .analyze_all();
334 all_issues.extend(class_issues);
335
336 let pass2_results: Vec<(Vec<Issue>, Vec<crate::symbol::ResolvedSymbol>)> = file_data
338 .par_iter()
339 .filter(|(file, _)| !files_with_parse_errors.contains(file))
340 .map(|(file, src)| {
341 let driver = Pass2Driver::new(&self.codebase, self.resolved_php_version());
342 let result = if let Some(cache) = &self.cache {
343 let h = hash_content(src);
344 if let Some((cached_issues, ref_locs)) = cache.get(file, &h) {
345 self.codebase
346 .replay_reference_locations(file.clone(), &ref_locs);
347 (cached_issues, Vec::new())
348 } else {
349 let arena = bumpalo::Bump::new();
350 let parsed = php_rs_parser::parse(&arena, src);
351 let (issues, symbols) = driver.analyze_bodies(
352 &parsed.program,
353 file.clone(),
354 src,
355 &parsed.source_map,
356 );
357 let ref_locs = extract_reference_locations(&self.codebase, file);
358 cache.put(file, h, issues.clone(), ref_locs);
359 (issues, symbols)
360 }
361 } else {
362 let arena = bumpalo::Bump::new();
363 let parsed = php_rs_parser::parse(&arena, src);
364 driver.analyze_bodies(&parsed.program, file.clone(), src, &parsed.source_map)
365 };
366 if let Some(cb) = &self.on_file_done {
367 cb();
368 }
369 result
370 })
371 .collect();
372
373 let mut all_symbols = Vec::new();
374 for (issues, symbols) in pass2_results {
375 all_issues.extend(issues);
376 all_symbols.extend(symbols);
377 }
378
379 if let Some(cache) = &self.cache {
381 cache.flush();
382 }
383
384 self.codebase.compact_reference_index();
386
387 if self.find_dead_code {
389 let dead_code_issues =
390 crate::dead_code::DeadCodeAnalyzer::new(&self.codebase).analyze();
391 all_issues.extend(dead_code_issues);
392 }
393
394 AnalysisResult::build(all_issues, std::collections::HashMap::new(), all_symbols)
395 }
396
397 fn lazy_load_missing_classes(
398 &self,
399 psr4: Arc<crate::composer::Psr4Map>,
400 all_issues: &mut Vec<Issue>,
401 ) {
402 use std::collections::HashSet;
403
404 let max_depth = 10;
405 let mut loaded: HashSet<String> = HashSet::new();
406
407 for _ in 0..max_depth {
408 let mut to_load: Vec<(String, PathBuf)> = Vec::new();
409
410 let mut try_queue = |fqcn: &str| {
411 if !self.codebase.type_exists(fqcn) && !loaded.contains(fqcn) {
412 if let Some(path) = psr4.resolve(fqcn) {
413 to_load.push((fqcn.to_string(), path));
414 }
415 }
416 };
417
418 for entry in self.codebase.classes.iter() {
419 let cls = entry.value();
420 if let Some(parent) = &cls.parent {
421 try_queue(parent.as_ref());
422 }
423 for iface in &cls.interfaces {
424 try_queue(iface.as_ref());
425 }
426 }
427
428 for entry in self.codebase.interfaces.iter() {
429 for parent in &entry.value().extends {
430 try_queue(parent.as_ref());
431 }
432 }
433
434 for entry in self.codebase.enums.iter() {
435 for iface in &entry.value().interfaces {
436 try_queue(iface.as_ref());
437 }
438 }
439
440 for entry in self.codebase.traits.iter() {
441 for used in &entry.value().traits {
442 try_queue(used.as_ref());
443 }
444 }
445
446 for entry in self.codebase.file_imports.iter() {
450 for fqcn in entry.value().values() {
451 try_queue(fqcn.as_str());
452 }
453 }
454
455 if to_load.is_empty() {
456 break;
457 }
458
459 for (fqcn, path) in to_load {
460 loaded.insert(fqcn);
461 if let Ok(src) = std::fs::read_to_string(&path) {
462 let file: Arc<str> = Arc::from(path.to_string_lossy().as_ref());
463 let arena = bumpalo::Bump::new();
464 let result = php_rs_parser::parse(&arena, &src);
465 let collector = crate::collector::DefinitionCollector::new(
466 &self.codebase,
467 file,
468 &src,
469 &result.source_map,
470 );
471 let issues = collector.collect(&result.program);
472 all_issues.extend(issues);
473 }
474 }
475
476 self.codebase.invalidate_finalization();
477 self.codebase.finalize();
478 }
479 }
480
481 pub fn re_analyze_file(&self, file_path: &str, new_content: &str) -> AnalysisResult {
490 if let Some(cache) = &self.cache {
492 let h = hash_content(new_content);
493 if let Some((issues, ref_locs)) = cache.get(file_path, &h) {
494 let file: Arc<str> = Arc::from(file_path);
495 self.codebase.replay_reference_locations(file, &ref_locs);
496 return AnalysisResult::build(issues, HashMap::new(), Vec::new());
497 }
498 }
499
500 let structural_snapshot = self.codebase.file_structural_snapshot(file_path);
501 self.codebase.remove_file_definitions(file_path);
502
503 let file: Arc<str> = Arc::from(file_path);
504 let arena = bumpalo::Bump::new();
505 let parsed = php_rs_parser::parse(&arena, new_content);
506
507 let mut all_issues = Vec::new();
508
509 for err in &parsed.errors {
510 all_issues.push(Issue::new(
511 mir_issues::IssueKind::ParseError {
512 message: err.to_string(),
513 },
514 mir_issues::Location {
515 file: file.clone(),
516 line: 1,
517 line_end: 1,
518 col_start: 0,
519 col_end: 0,
520 },
521 ));
522 }
523
524 let collector = DefinitionCollector::new(
525 &self.codebase,
526 file.clone(),
527 new_content,
528 &parsed.source_map,
529 );
530 all_issues.extend(collector.collect(&parsed.program));
531
532 if self
533 .codebase
534 .structural_unchanged_after_pass1(file_path, &structural_snapshot)
535 {
536 self.codebase
537 .restore_all_parents(file_path, &structural_snapshot);
538 } else {
539 self.codebase.finalize();
540 }
541
542 let symbols = if parsed.errors.is_empty() {
543 let driver = Pass2Driver::new(&self.codebase, self.resolved_php_version());
544 let (body_issues, symbols) = driver.analyze_bodies(
545 &parsed.program,
546 file.clone(),
547 new_content,
548 &parsed.source_map,
549 );
550 all_issues.extend(body_issues);
551 symbols
552 } else {
553 Vec::new()
554 };
555
556 if let Some(cache) = &self.cache {
557 let h = hash_content(new_content);
558 cache.evict_with_dependents(&[file_path.to_string()]);
559 let ref_locs = extract_reference_locations(&self.codebase, &file);
560 cache.put(file_path, h, all_issues.clone(), ref_locs);
561 }
562
563 AnalysisResult::build(all_issues, HashMap::new(), symbols)
564 }
565
566 pub fn analyze_source(source: &str) -> AnalysisResult {
569 use crate::collector::DefinitionCollector;
570 let analyzer = ProjectAnalyzer::new();
571 analyzer.load_stubs();
572 let file: Arc<str> = Arc::from("<source>");
573 let arena = bumpalo::Bump::new();
574 let result = php_rs_parser::parse(&arena, source);
575 let mut all_issues = Vec::new();
576 for err in &result.errors {
577 all_issues.push(Issue::new(
578 mir_issues::IssueKind::ParseError {
579 message: err.to_string(),
580 },
581 mir_issues::Location {
582 file: file.clone(),
583 line: 1,
584 line_end: 1,
585 col_start: 0,
586 col_end: 0,
587 },
588 ));
589 }
590 if !result.errors.is_empty() {
591 return AnalysisResult::build(all_issues, std::collections::HashMap::new(), Vec::new());
592 }
593 let collector =
594 DefinitionCollector::new(&analyzer.codebase, file.clone(), source, &result.source_map);
595 all_issues.extend(collector.collect(&result.program));
596 analyzer.codebase.finalize();
597 let mut type_envs = std::collections::HashMap::new();
598 let mut all_symbols = Vec::new();
599 let driver = Pass2Driver::new(&analyzer.codebase, analyzer.resolved_php_version());
600 all_issues.extend(driver.analyze_bodies_typed(
601 &result.program,
602 file.clone(),
603 source,
604 &result.source_map,
605 &mut type_envs,
606 &mut all_symbols,
607 ));
608 AnalysisResult::build(all_issues, type_envs, all_symbols)
609 }
610
611 pub fn discover_files(root: &Path) -> Vec<PathBuf> {
613 if root.is_file() {
614 return vec![root.to_path_buf()];
615 }
616 let mut files = Vec::new();
617 collect_php_files(root, &mut files);
618 files
619 }
620
621 pub fn collect_types_only(&self, paths: &[PathBuf]) {
624 paths.par_iter().for_each(|path| {
625 let Ok(src) = std::fs::read_to_string(path) else {
626 return;
627 };
628 let file: Arc<str> = Arc::from(path.to_string_lossy().as_ref());
629 let arena = bumpalo::Bump::new();
630 let result = php_rs_parser::parse(&arena, &src);
631 let collector =
632 DefinitionCollector::new(&self.codebase, file, &src, &result.source_map);
633 let _ = collector.collect(&result.program);
634 });
635 }
636}
637
638impl Default for ProjectAnalyzer {
639 fn default() -> Self {
640 Self::new()
641 }
642}
643
644pub(crate) fn collect_php_files(dir: &Path, out: &mut Vec<PathBuf>) {
647 if let Ok(entries) = std::fs::read_dir(dir) {
648 for entry in entries.flatten() {
649 if entry.file_type().map(|ft| ft.is_symlink()).unwrap_or(false) {
650 continue;
651 }
652 let path = entry.path();
653 if path.is_dir() {
654 let name = path.file_name().and_then(|n| n.to_str()).unwrap_or("");
655 if matches!(
656 name,
657 "vendor" | ".git" | "node_modules" | ".cache" | ".pnpm-store"
658 ) {
659 continue;
660 }
661 collect_php_files(&path, out);
662 } else if path.extension().and_then(|e| e.to_str()) == Some("php") {
663 out.push(path);
664 }
665 }
666 }
667}
668
669fn build_reverse_deps(codebase: &Codebase) -> HashMap<String, HashSet<String>> {
674 let mut reverse: HashMap<String, HashSet<String>> = HashMap::new();
675
676 let mut add_edge = |symbol: &str, dependent_file: &str| {
677 if let Some(defining_file) = codebase.symbol_to_file.get(symbol) {
678 let def = defining_file.as_ref().to_string();
679 if def != dependent_file {
680 reverse
681 .entry(def)
682 .or_default()
683 .insert(dependent_file.to_string());
684 }
685 }
686 };
687
688 for entry in codebase.file_imports.iter() {
689 let file = entry.key().as_ref().to_string();
690 for fqcn in entry.value().values() {
691 add_edge(fqcn, &file);
692 }
693 }
694
695 for entry in codebase.classes.iter() {
696 let defining = {
697 let fqcn = entry.key().as_ref();
698 codebase
699 .symbol_to_file
700 .get(fqcn)
701 .map(|f| f.as_ref().to_string())
702 };
703 let Some(file) = defining else { continue };
704
705 let cls = entry.value();
706 if let Some(ref parent) = cls.parent {
707 add_edge(parent.as_ref(), &file);
708 }
709 for iface in &cls.interfaces {
710 add_edge(iface.as_ref(), &file);
711 }
712 for tr in &cls.traits {
713 add_edge(tr.as_ref(), &file);
714 }
715 }
716
717 reverse
718}
719
720fn extract_reference_locations(
723 codebase: &Codebase,
724 file: &Arc<str>,
725) -> Vec<(String, u32, u16, u16)> {
726 codebase
727 .extract_file_reference_locations(file.as_ref())
728 .into_iter()
729 .map(|(sym, line, col_start, col_end)| (sym.to_string(), line, col_start, col_end))
730 .collect()
731}
732
733pub struct AnalysisResult {
738 pub issues: Vec<Issue>,
739 pub type_envs: std::collections::HashMap<crate::type_env::ScopeId, crate::type_env::TypeEnv>,
740 pub symbols: Vec<crate::symbol::ResolvedSymbol>,
742 symbols_by_file: HashMap<Arc<str>, std::ops::Range<usize>>,
746}
747
748impl AnalysisResult {
749 fn build(
750 issues: Vec<Issue>,
751 type_envs: std::collections::HashMap<crate::type_env::ScopeId, crate::type_env::TypeEnv>,
752 mut symbols: Vec<crate::symbol::ResolvedSymbol>,
753 ) -> Self {
754 symbols.sort_unstable_by(|a, b| a.file.as_ref().cmp(b.file.as_ref()));
755 let mut symbols_by_file: HashMap<Arc<str>, std::ops::Range<usize>> = HashMap::new();
756 let mut i = 0;
757 while i < symbols.len() {
758 let file = Arc::clone(&symbols[i].file);
759 let start = i;
760 while i < symbols.len() && symbols[i].file == file {
761 i += 1;
762 }
763 symbols_by_file.insert(file, start..i);
764 }
765 Self {
766 issues,
767 type_envs,
768 symbols,
769 symbols_by_file,
770 }
771 }
772}
773
774impl AnalysisResult {
775 pub fn error_count(&self) -> usize {
776 self.issues
777 .iter()
778 .filter(|i| i.severity == mir_issues::Severity::Error)
779 .count()
780 }
781
782 pub fn warning_count(&self) -> usize {
783 self.issues
784 .iter()
785 .filter(|i| i.severity == mir_issues::Severity::Warning)
786 .count()
787 }
788
789 pub fn issues_by_file(&self) -> HashMap<std::sync::Arc<str>, Vec<&Issue>> {
791 let mut map: HashMap<std::sync::Arc<str>, Vec<&Issue>> = HashMap::new();
792 for issue in &self.issues {
793 map.entry(issue.location.file.clone())
794 .or_default()
795 .push(issue);
796 }
797 map
798 }
799
800 pub fn symbol_at(
803 &self,
804 file: &str,
805 byte_offset: u32,
806 ) -> Option<&crate::symbol::ResolvedSymbol> {
807 let range = self.symbols_by_file.get(file)?;
808 self.symbols[range.clone()]
809 .iter()
810 .filter(|s| s.span.start <= byte_offset && byte_offset < s.span.end)
811 .min_by_key(|s| s.span.end - s.span.start)
812 }
813}