1use std::path::{Path, PathBuf};
3use std::sync::Arc;
4
5use rayon::prelude::*;
6
7use std::collections::{HashMap, HashSet};
8
9use crate::cache::{hash_content, AnalysisCache};
10use crate::pass2::Pass2Driver;
11use crate::php_version::PhpVersion;
12use mir_codebase::Codebase;
13use mir_issues::Issue;
14
15use crate::collector::DefinitionCollector;
16
17pub use crate::pass2::merge_return_types;
19
20pub struct ProjectAnalyzer {
25 pub codebase: Arc<Codebase>,
26 pub cache: Option<AnalysisCache>,
28 pub on_file_done: Option<Arc<dyn Fn() + Send + Sync>>,
30 pub psr4: Option<Arc<crate::composer::Psr4Map>>,
32 stubs_loaded: std::sync::atomic::AtomicBool,
34 pub find_dead_code: bool,
36 pub php_version: Option<PhpVersion>,
39 pub stub_files: Vec<PathBuf>,
41 pub stub_dirs: Vec<PathBuf>,
43}
44
45impl ProjectAnalyzer {
46 pub fn new() -> Self {
47 Self {
48 codebase: Arc::new(Codebase::new()),
49 cache: None,
50 on_file_done: None,
51 psr4: None,
52 stubs_loaded: std::sync::atomic::AtomicBool::new(false),
53 find_dead_code: false,
54 php_version: None,
55 stub_files: Vec::new(),
56 stub_dirs: Vec::new(),
57 }
58 }
59
60 pub fn with_cache(cache_dir: &Path) -> Self {
62 Self {
63 codebase: Arc::new(Codebase::new()),
64 cache: Some(AnalysisCache::open(cache_dir)),
65 on_file_done: None,
66 psr4: None,
67 stubs_loaded: std::sync::atomic::AtomicBool::new(false),
68 find_dead_code: false,
69 php_version: None,
70 stub_files: Vec::new(),
71 stub_dirs: Vec::new(),
72 }
73 }
74
75 pub fn from_composer(
79 root: &Path,
80 ) -> Result<(Self, crate::composer::Psr4Map), crate::composer::ComposerError> {
81 let map = crate::composer::Psr4Map::from_composer(root)?;
82 let psr4 = Arc::new(map.clone());
83 let analyzer = Self {
84 codebase: Arc::new(Codebase::new()),
85 cache: None,
86 on_file_done: None,
87 psr4: Some(psr4),
88 stubs_loaded: std::sync::atomic::AtomicBool::new(false),
89 find_dead_code: false,
90 php_version: None,
91 stub_files: Vec::new(),
92 stub_dirs: Vec::new(),
93 };
94 Ok((analyzer, map))
95 }
96
97 pub fn with_php_version(mut self, version: PhpVersion) -> Self {
99 self.php_version = Some(version);
100 self
101 }
102
103 fn resolved_php_version(&self) -> PhpVersion {
106 self.php_version.unwrap_or(PhpVersion::LATEST)
107 }
108
109 pub fn codebase(&self) -> &Arc<Codebase> {
111 &self.codebase
112 }
113
114 pub fn load_stubs(&self) {
118 if !self
119 .stubs_loaded
120 .swap(true, std::sync::atomic::Ordering::SeqCst)
121 {
122 crate::stubs::load_stubs_for_version(&self.codebase, self.resolved_php_version());
123 crate::stubs::load_user_stubs(&self.codebase, &self.stub_files, &self.stub_dirs);
124 }
125 }
126
127 pub fn analyze(&self, paths: &[PathBuf]) -> AnalysisResult {
129 let mut all_issues = Vec::new();
130 let mut parse_errors = Vec::new();
131
132 self.load_stubs();
134
135 let file_data: Vec<(Arc<str>, String)> = paths
137 .par_iter()
138 .filter_map(|path| match std::fs::read_to_string(path) {
139 Ok(src) => Some((Arc::from(path.to_string_lossy().as_ref()), src)),
140 Err(e) => {
141 eprintln!("Cannot read {}: {}", path.display(), e);
142 None
143 }
144 })
145 .collect();
146
147 if let Some(cache) = &self.cache {
149 let changed: Vec<String> = file_data
150 .par_iter()
151 .filter_map(|(f, src)| {
152 let h = hash_content(src);
153 if cache.get(f, &h).is_none() {
154 Some(f.to_string())
155 } else {
156 None
157 }
158 })
159 .collect();
160 if !changed.is_empty() {
161 cache.evict_with_dependents(&changed);
162 }
163 }
164
165 let pass1_results: Vec<(Vec<Issue>, Vec<Issue>)> = file_data
167 .par_iter()
168 .map(|(file, src)| {
169 use php_ast::ast::StmtKind;
170 let arena = bumpalo::Bump::new();
171 let result = php_rs_parser::parse(&arena, src);
172
173 let mut current_namespace: Option<String> = None;
175 let mut imports: std::collections::HashMap<String, String> =
176 std::collections::HashMap::new();
177 let mut file_ns_set = false;
178
179 let index_stmts =
180 |stmts: &[php_ast::ast::Stmt<'_, '_>],
181 ns: Option<&str>,
182 imports: &mut std::collections::HashMap<String, String>| {
183 for stmt in stmts.iter() {
184 match &stmt.kind {
185 StmtKind::Use(use_decl) => {
186 for item in use_decl.uses.iter() {
187 let full_name = crate::parser::name_to_string(&item.name)
188 .trim_start_matches('\\')
189 .to_string();
190 let alias = item.alias.unwrap_or_else(|| {
191 full_name.rsplit('\\').next().unwrap_or(&full_name)
192 });
193 imports.insert(alias.to_string(), full_name);
194 }
195 }
196 StmtKind::Class(decl) => {
197 if let Some(n) = decl.name {
198 let fqcn = match ns {
199 Some(ns) => format!("{ns}\\{n}"),
200 None => n.to_string(),
201 };
202 self.codebase
203 .known_symbols
204 .insert(Arc::from(fqcn.as_str()));
205 }
206 }
207 StmtKind::Interface(decl) => {
208 let fqcn = match ns {
209 Some(ns) => format!("{}\\{}", ns, decl.name),
210 None => decl.name.to_string(),
211 };
212 self.codebase.known_symbols.insert(Arc::from(fqcn.as_str()));
213 }
214 StmtKind::Trait(decl) => {
215 let fqcn = match ns {
216 Some(ns) => format!("{}\\{}", ns, decl.name),
217 None => decl.name.to_string(),
218 };
219 self.codebase.known_symbols.insert(Arc::from(fqcn.as_str()));
220 }
221 StmtKind::Enum(decl) => {
222 let fqcn = match ns {
223 Some(ns) => format!("{}\\{}", ns, decl.name),
224 None => decl.name.to_string(),
225 };
226 self.codebase.known_symbols.insert(Arc::from(fqcn.as_str()));
227 }
228 StmtKind::Function(decl) => {
229 let fqn = match ns {
230 Some(ns) => format!("{}\\{}", ns, decl.name),
231 None => decl.name.to_string(),
232 };
233 self.codebase.known_symbols.insert(Arc::from(fqn.as_str()));
234 }
235 _ => {}
236 }
237 }
238 };
239
240 for stmt in result.program.stmts.iter() {
241 match &stmt.kind {
242 StmtKind::Namespace(ns) => {
243 current_namespace =
244 ns.name.as_ref().map(|n| crate::parser::name_to_string(n));
245 if !file_ns_set {
246 if let Some(ref ns_str) = current_namespace {
247 self.codebase
248 .file_namespaces
249 .insert(file.clone(), ns_str.clone());
250 file_ns_set = true;
251 }
252 }
253 if let php_ast::ast::NamespaceBody::Braced(inner_stmts) = &ns.body {
254 index_stmts(
255 inner_stmts,
256 current_namespace.as_deref(),
257 &mut imports,
258 );
259 }
260 }
261 _ => index_stmts(
262 std::slice::from_ref(stmt),
263 current_namespace.as_deref(),
264 &mut imports,
265 ),
266 }
267 }
268
269 if !imports.is_empty() {
270 self.codebase.file_imports.insert(file.clone(), imports);
271 }
272
273 let file_parse_errors: Vec<Issue> = result
275 .errors
276 .iter()
277 .map(|err| {
278 Issue::new(
279 mir_issues::IssueKind::ParseError {
280 message: err.to_string(),
281 },
282 mir_issues::Location {
283 file: file.clone(),
284 line: 1,
285 line_end: 1,
286 col_start: 0,
287 col_end: 0,
288 },
289 )
290 })
291 .collect();
292
293 let collector =
295 DefinitionCollector::new(&self.codebase, file.clone(), src, &result.source_map);
296 let issues = collector.collect(&result.program);
297
298 (file_parse_errors, issues)
299 })
300 .collect();
301
302 let mut files_with_parse_errors: std::collections::HashSet<Arc<str>> =
303 std::collections::HashSet::new();
304 for (file_parse_errors, issues) in pass1_results {
305 for issue in &file_parse_errors {
306 files_with_parse_errors.insert(issue.location.file.clone());
307 }
308 parse_errors.extend(file_parse_errors);
309 all_issues.extend(issues);
310 }
311
312 all_issues.extend(parse_errors);
313
314 if let Some(psr4) = &self.psr4 {
316 self.lazy_load_missing_classes(psr4.clone(), &mut all_issues);
317 }
318
319 if let Some(cache) = &self.cache {
321 let rev = build_reverse_deps(&self.codebase);
322 cache.set_reverse_deps(rev);
323 }
324
325 let analyzed_file_set: std::collections::HashSet<std::sync::Arc<str>> =
327 file_data.iter().map(|(f, _)| f.clone()).collect();
328 let class_issues =
329 crate::class::ClassAnalyzer::with_files(&self.codebase, analyzed_file_set, &file_data)
330 .analyze_all();
331 all_issues.extend(class_issues);
332
333 file_data
337 .par_iter()
338 .filter(|(file, _)| !files_with_parse_errors.contains(file))
339 .for_each(|(file, src)| {
340 let driver =
341 Pass2Driver::new_inference_only(&self.codebase, self.resolved_php_version());
342 let arena = bumpalo::Bump::new();
343 let parsed = php_rs_parser::parse(&arena, src);
344 driver.analyze_bodies(&parsed.program, file.clone(), src, &parsed.source_map);
345 });
346
347 let pass2_results: Vec<(Vec<Issue>, Vec<crate::symbol::ResolvedSymbol>)> = file_data
349 .par_iter()
350 .filter(|(file, _)| !files_with_parse_errors.contains(file))
351 .map(|(file, src)| {
352 let driver = Pass2Driver::new(&self.codebase, self.resolved_php_version());
353 let result = if let Some(cache) = &self.cache {
354 let h = hash_content(src);
355 if let Some((cached_issues, ref_locs)) = cache.get(file, &h) {
356 self.codebase
357 .replay_reference_locations(file.clone(), &ref_locs);
358 (cached_issues, Vec::new())
359 } else {
360 let arena = bumpalo::Bump::new();
361 let parsed = php_rs_parser::parse(&arena, src);
362 let (issues, symbols) = driver.analyze_bodies(
363 &parsed.program,
364 file.clone(),
365 src,
366 &parsed.source_map,
367 );
368 let ref_locs = extract_reference_locations(&self.codebase, file);
369 cache.put(file, h, issues.clone(), ref_locs);
370 (issues, symbols)
371 }
372 } else {
373 let arena = bumpalo::Bump::new();
374 let parsed = php_rs_parser::parse(&arena, src);
375 driver.analyze_bodies(&parsed.program, file.clone(), src, &parsed.source_map)
376 };
377 if let Some(cb) = &self.on_file_done {
378 cb();
379 }
380 result
381 })
382 .collect();
383
384 let mut all_symbols = Vec::new();
385 for (issues, symbols) in pass2_results {
386 all_issues.extend(issues);
387 all_symbols.extend(symbols);
388 }
389
390 if let Some(cache) = &self.cache {
392 cache.flush();
393 }
394
395 self.codebase.compact_reference_index();
397
398 if self.find_dead_code {
400 let dead_code_issues =
401 crate::dead_code::DeadCodeAnalyzer::new(&self.codebase).analyze();
402 all_issues.extend(dead_code_issues);
403 }
404
405 AnalysisResult::build(all_issues, std::collections::HashMap::new(), all_symbols)
406 }
407
408 fn lazy_load_missing_classes(
409 &self,
410 psr4: Arc<crate::composer::Psr4Map>,
411 all_issues: &mut Vec<Issue>,
412 ) {
413 use std::collections::HashSet;
414
415 let max_depth = 10;
416 let mut loaded: HashSet<String> = HashSet::new();
417
418 for _ in 0..max_depth {
419 let mut to_load: Vec<(String, PathBuf)> = Vec::new();
420
421 let mut try_queue = |fqcn: &str| {
422 if !self.codebase.type_exists(fqcn) && !loaded.contains(fqcn) {
423 if let Some(path) = psr4.resolve(fqcn) {
424 to_load.push((fqcn.to_string(), path));
425 }
426 }
427 };
428
429 for entry in self.codebase.classes.iter() {
430 let cls = entry.value();
431 if let Some(parent) = &cls.parent {
432 try_queue(parent.as_ref());
433 }
434 for iface in &cls.interfaces {
435 try_queue(iface.as_ref());
436 }
437 }
438
439 for entry in self.codebase.interfaces.iter() {
440 for parent in &entry.value().extends {
441 try_queue(parent.as_ref());
442 }
443 }
444
445 for entry in self.codebase.enums.iter() {
446 for iface in &entry.value().interfaces {
447 try_queue(iface.as_ref());
448 }
449 }
450
451 for entry in self.codebase.traits.iter() {
452 for used in &entry.value().traits {
453 try_queue(used.as_ref());
454 }
455 }
456
457 for entry in self.codebase.file_imports.iter() {
461 for fqcn in entry.value().values() {
462 try_queue(fqcn.as_str());
463 }
464 }
465
466 if to_load.is_empty() {
467 break;
468 }
469
470 for (fqcn, path) in to_load {
471 loaded.insert(fqcn);
472 if let Ok(src) = std::fs::read_to_string(&path) {
473 let file: Arc<str> = Arc::from(path.to_string_lossy().as_ref());
474 let arena = bumpalo::Bump::new();
475 let result = php_rs_parser::parse(&arena, &src);
476 let collector = crate::collector::DefinitionCollector::new(
477 &self.codebase,
478 file,
479 &src,
480 &result.source_map,
481 );
482 let issues = collector.collect(&result.program);
483 all_issues.extend(issues);
484 }
485 }
486
487 self.codebase.invalidate_finalization();
488 self.codebase.finalize();
489 }
490 }
491
492 pub fn re_analyze_file(&self, file_path: &str, new_content: &str) -> AnalysisResult {
501 if let Some(cache) = &self.cache {
503 let h = hash_content(new_content);
504 if let Some((issues, ref_locs)) = cache.get(file_path, &h) {
505 let file: Arc<str> = Arc::from(file_path);
506 self.codebase.replay_reference_locations(file, &ref_locs);
507 return AnalysisResult::build(issues, HashMap::new(), Vec::new());
508 }
509 }
510
511 let structural_snapshot = self.codebase.file_structural_snapshot(file_path);
512 self.codebase.remove_file_definitions(file_path);
513
514 let file: Arc<str> = Arc::from(file_path);
515 let arena = bumpalo::Bump::new();
516 let parsed = php_rs_parser::parse(&arena, new_content);
517
518 let mut all_issues = Vec::new();
519
520 for err in &parsed.errors {
521 all_issues.push(Issue::new(
522 mir_issues::IssueKind::ParseError {
523 message: err.to_string(),
524 },
525 mir_issues::Location {
526 file: file.clone(),
527 line: 1,
528 line_end: 1,
529 col_start: 0,
530 col_end: 0,
531 },
532 ));
533 }
534
535 let collector = DefinitionCollector::new(
536 &self.codebase,
537 file.clone(),
538 new_content,
539 &parsed.source_map,
540 );
541 all_issues.extend(collector.collect(&parsed.program));
542
543 if self
544 .codebase
545 .structural_unchanged_after_pass1(file_path, &structural_snapshot)
546 {
547 self.codebase
548 .restore_all_parents(file_path, &structural_snapshot);
549 } else {
550 self.codebase.finalize();
551 }
552
553 let symbols = if parsed.errors.is_empty() {
554 let driver = Pass2Driver::new(&self.codebase, self.resolved_php_version());
555 let (body_issues, symbols) = driver.analyze_bodies(
556 &parsed.program,
557 file.clone(),
558 new_content,
559 &parsed.source_map,
560 );
561 all_issues.extend(body_issues);
562 symbols
563 } else {
564 Vec::new()
565 };
566
567 if let Some(cache) = &self.cache {
568 let h = hash_content(new_content);
569 cache.evict_with_dependents(&[file_path.to_string()]);
570 let ref_locs = extract_reference_locations(&self.codebase, &file);
571 cache.put(file_path, h, all_issues.clone(), ref_locs);
572 }
573
574 AnalysisResult::build(all_issues, HashMap::new(), symbols)
575 }
576
577 pub fn analyze_source(source: &str) -> AnalysisResult {
580 use crate::collector::DefinitionCollector;
581 let analyzer = ProjectAnalyzer::new();
582 analyzer.load_stubs();
583 let file: Arc<str> = Arc::from("<source>");
584 let arena = bumpalo::Bump::new();
585 let result = php_rs_parser::parse(&arena, source);
586 let mut all_issues = Vec::new();
587 for err in &result.errors {
588 all_issues.push(Issue::new(
589 mir_issues::IssueKind::ParseError {
590 message: err.to_string(),
591 },
592 mir_issues::Location {
593 file: file.clone(),
594 line: 1,
595 line_end: 1,
596 col_start: 0,
597 col_end: 0,
598 },
599 ));
600 }
601 if !result.errors.is_empty() {
602 return AnalysisResult::build(all_issues, std::collections::HashMap::new(), Vec::new());
603 }
604 let collector =
605 DefinitionCollector::new(&analyzer.codebase, file.clone(), source, &result.source_map);
606 all_issues.extend(collector.collect(&result.program));
607 analyzer.codebase.finalize();
608 let mut type_envs = std::collections::HashMap::new();
609 let mut all_symbols = Vec::new();
610 let driver = Pass2Driver::new(&analyzer.codebase, analyzer.resolved_php_version());
611 all_issues.extend(driver.analyze_bodies_typed(
612 &result.program,
613 file.clone(),
614 source,
615 &result.source_map,
616 &mut type_envs,
617 &mut all_symbols,
618 ));
619 AnalysisResult::build(all_issues, type_envs, all_symbols)
620 }
621
622 pub fn discover_files(root: &Path) -> Vec<PathBuf> {
624 if root.is_file() {
625 return vec![root.to_path_buf()];
626 }
627 let mut files = Vec::new();
628 collect_php_files(root, &mut files);
629 files
630 }
631
632 pub fn collect_types_only(&self, paths: &[PathBuf]) {
635 paths.par_iter().for_each(|path| {
636 let Ok(src) = std::fs::read_to_string(path) else {
637 return;
638 };
639 let file: Arc<str> = Arc::from(path.to_string_lossy().as_ref());
640 let arena = bumpalo::Bump::new();
641 let result = php_rs_parser::parse(&arena, &src);
642 let collector =
643 DefinitionCollector::new(&self.codebase, file, &src, &result.source_map);
644 let _ = collector.collect(&result.program);
645 });
646 }
647}
648
649impl Default for ProjectAnalyzer {
650 fn default() -> Self {
651 Self::new()
652 }
653}
654
655pub(crate) fn collect_php_files(dir: &Path, out: &mut Vec<PathBuf>) {
658 if let Ok(entries) = std::fs::read_dir(dir) {
659 for entry in entries.flatten() {
660 if entry.file_type().map(|ft| ft.is_symlink()).unwrap_or(false) {
661 continue;
662 }
663 let path = entry.path();
664 if path.is_dir() {
665 let name = path.file_name().and_then(|n| n.to_str()).unwrap_or("");
666 if matches!(
667 name,
668 "vendor" | ".git" | "node_modules" | ".cache" | ".pnpm-store"
669 ) {
670 continue;
671 }
672 collect_php_files(&path, out);
673 } else if path.extension().and_then(|e| e.to_str()) == Some("php") {
674 out.push(path);
675 }
676 }
677 }
678}
679
680fn build_reverse_deps(codebase: &Codebase) -> HashMap<String, HashSet<String>> {
685 let mut reverse: HashMap<String, HashSet<String>> = HashMap::new();
686
687 let mut add_edge = |symbol: &str, dependent_file: &str| {
688 if let Some(defining_file) = codebase.symbol_to_file.get(symbol) {
689 let def = defining_file.as_ref().to_string();
690 if def != dependent_file {
691 reverse
692 .entry(def)
693 .or_default()
694 .insert(dependent_file.to_string());
695 }
696 }
697 };
698
699 for entry in codebase.file_imports.iter() {
700 let file = entry.key().as_ref().to_string();
701 for fqcn in entry.value().values() {
702 add_edge(fqcn, &file);
703 }
704 }
705
706 for entry in codebase.classes.iter() {
707 let defining = {
708 let fqcn = entry.key().as_ref();
709 codebase
710 .symbol_to_file
711 .get(fqcn)
712 .map(|f| f.as_ref().to_string())
713 };
714 let Some(file) = defining else { continue };
715
716 let cls = entry.value();
717 if let Some(ref parent) = cls.parent {
718 add_edge(parent.as_ref(), &file);
719 }
720 for iface in &cls.interfaces {
721 add_edge(iface.as_ref(), &file);
722 }
723 for tr in &cls.traits {
724 add_edge(tr.as_ref(), &file);
725 }
726 }
727
728 reverse
729}
730
731fn extract_reference_locations(
734 codebase: &Codebase,
735 file: &Arc<str>,
736) -> Vec<(String, u32, u16, u16)> {
737 codebase
738 .extract_file_reference_locations(file.as_ref())
739 .into_iter()
740 .map(|(sym, line, col_start, col_end)| (sym.to_string(), line, col_start, col_end))
741 .collect()
742}
743
744pub struct AnalysisResult {
749 pub issues: Vec<Issue>,
750 pub type_envs: std::collections::HashMap<crate::type_env::ScopeId, crate::type_env::TypeEnv>,
751 pub symbols: Vec<crate::symbol::ResolvedSymbol>,
753 symbols_by_file: HashMap<Arc<str>, std::ops::Range<usize>>,
757}
758
759impl AnalysisResult {
760 fn build(
761 issues: Vec<Issue>,
762 type_envs: std::collections::HashMap<crate::type_env::ScopeId, crate::type_env::TypeEnv>,
763 mut symbols: Vec<crate::symbol::ResolvedSymbol>,
764 ) -> Self {
765 symbols.sort_unstable_by(|a, b| a.file.as_ref().cmp(b.file.as_ref()));
766 let mut symbols_by_file: HashMap<Arc<str>, std::ops::Range<usize>> = HashMap::new();
767 let mut i = 0;
768 while i < symbols.len() {
769 let file = Arc::clone(&symbols[i].file);
770 let start = i;
771 while i < symbols.len() && symbols[i].file == file {
772 i += 1;
773 }
774 symbols_by_file.insert(file, start..i);
775 }
776 Self {
777 issues,
778 type_envs,
779 symbols,
780 symbols_by_file,
781 }
782 }
783}
784
785impl AnalysisResult {
786 pub fn error_count(&self) -> usize {
787 self.issues
788 .iter()
789 .filter(|i| i.severity == mir_issues::Severity::Error)
790 .count()
791 }
792
793 pub fn warning_count(&self) -> usize {
794 self.issues
795 .iter()
796 .filter(|i| i.severity == mir_issues::Severity::Warning)
797 .count()
798 }
799
800 pub fn issues_by_file(&self) -> HashMap<std::sync::Arc<str>, Vec<&Issue>> {
802 let mut map: HashMap<std::sync::Arc<str>, Vec<&Issue>> = HashMap::new();
803 for issue in &self.issues {
804 map.entry(issue.location.file.clone())
805 .or_default()
806 .push(issue);
807 }
808 map
809 }
810
811 pub fn symbol_at(
814 &self,
815 file: &str,
816 byte_offset: u32,
817 ) -> Option<&crate::symbol::ResolvedSymbol> {
818 let range = self.symbols_by_file.get(file)?;
819 self.symbols[range.clone()]
820 .iter()
821 .filter(|s| s.span.start <= byte_offset && byte_offset < s.span.end)
822 .min_by_key(|s| s.span.end - s.span.start)
823 }
824}