1use std::path::{Path, PathBuf};
3use std::sync::Arc;
4
5use rayon::prelude::*;
6
7use std::collections::{HashMap, HashSet};
8
9use crate::cache::{hash_content, AnalysisCache};
10use crate::pass2::Pass2Driver;
11use crate::php_version::PhpVersion;
12use mir_codebase::Codebase;
13use mir_issues::Issue;
14
15use crate::collector::DefinitionCollector;
16
17pub use crate::pass2::merge_return_types;
19
20pub struct ProjectAnalyzer {
25 pub codebase: Arc<Codebase>,
26 pub cache: Option<AnalysisCache>,
28 pub on_file_done: Option<Arc<dyn Fn() + Send + Sync>>,
30 pub psr4: Option<Arc<crate::composer::Psr4Map>>,
32 stubs_loaded: std::sync::atomic::AtomicBool,
34 pub find_dead_code: bool,
36 pub php_version: Option<PhpVersion>,
39}
40
41impl ProjectAnalyzer {
42 pub fn new() -> Self {
43 Self {
44 codebase: Arc::new(Codebase::new()),
45 cache: None,
46 on_file_done: None,
47 psr4: None,
48 stubs_loaded: std::sync::atomic::AtomicBool::new(false),
49 find_dead_code: false,
50 php_version: None,
51 }
52 }
53
54 pub fn with_cache(cache_dir: &Path) -> Self {
56 Self {
57 codebase: Arc::new(Codebase::new()),
58 cache: Some(AnalysisCache::open(cache_dir)),
59 on_file_done: None,
60 psr4: None,
61 stubs_loaded: std::sync::atomic::AtomicBool::new(false),
62 find_dead_code: false,
63 php_version: None,
64 }
65 }
66
67 pub fn from_composer(
71 root: &Path,
72 ) -> Result<(Self, crate::composer::Psr4Map), crate::composer::ComposerError> {
73 let map = crate::composer::Psr4Map::from_composer(root)?;
74 let psr4 = Arc::new(map.clone());
75 let analyzer = Self {
76 codebase: Arc::new(Codebase::new()),
77 cache: None,
78 on_file_done: None,
79 psr4: Some(psr4),
80 stubs_loaded: std::sync::atomic::AtomicBool::new(false),
81 find_dead_code: false,
82 php_version: None,
83 };
84 Ok((analyzer, map))
85 }
86
87 pub fn with_php_version(mut self, version: PhpVersion) -> Self {
89 self.php_version = Some(version);
90 self
91 }
92
93 fn resolved_php_version(&self) -> PhpVersion {
96 self.php_version.unwrap_or(PhpVersion::LATEST)
97 }
98
99 pub fn codebase(&self) -> &Arc<Codebase> {
101 &self.codebase
102 }
103
104 pub fn load_stubs(&self) {
106 if !self
107 .stubs_loaded
108 .swap(true, std::sync::atomic::Ordering::SeqCst)
109 {
110 crate::stubs::load_stubs(&self.codebase);
111 }
112 }
113
114 pub fn analyze(&self, paths: &[PathBuf]) -> AnalysisResult {
116 let mut all_issues = Vec::new();
117 let mut parse_errors = Vec::new();
118
119 self.load_stubs();
121
122 let file_data: Vec<(Arc<str>, String)> = paths
124 .par_iter()
125 .filter_map(|path| match std::fs::read_to_string(path) {
126 Ok(src) => Some((Arc::from(path.to_string_lossy().as_ref()), src)),
127 Err(e) => {
128 eprintln!("Cannot read {}: {}", path.display(), e);
129 None
130 }
131 })
132 .collect();
133
134 if let Some(cache) = &self.cache {
136 let changed: Vec<String> = file_data
137 .par_iter()
138 .filter_map(|(f, src)| {
139 let h = hash_content(src);
140 if cache.get(f, &h).is_none() {
141 Some(f.to_string())
142 } else {
143 None
144 }
145 })
146 .collect();
147 if !changed.is_empty() {
148 cache.evict_with_dependents(&changed);
149 }
150 }
151
152 let pass1_results: Vec<(Vec<Issue>, Vec<Issue>)> = file_data
154 .par_iter()
155 .map(|(file, src)| {
156 use php_ast::ast::StmtKind;
157 let arena = bumpalo::Bump::new();
158 let result = php_rs_parser::parse(&arena, src);
159
160 let mut current_namespace: Option<String> = None;
162 let mut imports: std::collections::HashMap<String, String> =
163 std::collections::HashMap::new();
164 let mut file_ns_set = false;
165
166 let index_stmts =
167 |stmts: &[php_ast::ast::Stmt<'_, '_>],
168 ns: Option<&str>,
169 imports: &mut std::collections::HashMap<String, String>| {
170 for stmt in stmts.iter() {
171 match &stmt.kind {
172 StmtKind::Use(use_decl) => {
173 for item in use_decl.uses.iter() {
174 let full_name = crate::parser::name_to_string(&item.name)
175 .trim_start_matches('\\')
176 .to_string();
177 let alias = item.alias.unwrap_or_else(|| {
178 full_name.rsplit('\\').next().unwrap_or(&full_name)
179 });
180 imports.insert(alias.to_string(), full_name);
181 }
182 }
183 StmtKind::Class(decl) => {
184 if let Some(n) = decl.name {
185 let fqcn = match ns {
186 Some(ns) => format!("{ns}\\{n}"),
187 None => n.to_string(),
188 };
189 self.codebase
190 .known_symbols
191 .insert(Arc::from(fqcn.as_str()));
192 }
193 }
194 StmtKind::Interface(decl) => {
195 let fqcn = match ns {
196 Some(ns) => format!("{}\\{}", ns, decl.name),
197 None => decl.name.to_string(),
198 };
199 self.codebase.known_symbols.insert(Arc::from(fqcn.as_str()));
200 }
201 StmtKind::Trait(decl) => {
202 let fqcn = match ns {
203 Some(ns) => format!("{}\\{}", ns, decl.name),
204 None => decl.name.to_string(),
205 };
206 self.codebase.known_symbols.insert(Arc::from(fqcn.as_str()));
207 }
208 StmtKind::Enum(decl) => {
209 let fqcn = match ns {
210 Some(ns) => format!("{}\\{}", ns, decl.name),
211 None => decl.name.to_string(),
212 };
213 self.codebase.known_symbols.insert(Arc::from(fqcn.as_str()));
214 }
215 StmtKind::Function(decl) => {
216 let fqn = match ns {
217 Some(ns) => format!("{}\\{}", ns, decl.name),
218 None => decl.name.to_string(),
219 };
220 self.codebase.known_symbols.insert(Arc::from(fqn.as_str()));
221 }
222 _ => {}
223 }
224 }
225 };
226
227 for stmt in result.program.stmts.iter() {
228 match &stmt.kind {
229 StmtKind::Namespace(ns) => {
230 current_namespace =
231 ns.name.as_ref().map(|n| crate::parser::name_to_string(n));
232 if !file_ns_set {
233 if let Some(ref ns_str) = current_namespace {
234 self.codebase
235 .file_namespaces
236 .insert(file.clone(), ns_str.clone());
237 file_ns_set = true;
238 }
239 }
240 if let php_ast::ast::NamespaceBody::Braced(inner_stmts) = &ns.body {
241 index_stmts(
242 inner_stmts,
243 current_namespace.as_deref(),
244 &mut imports,
245 );
246 }
247 }
248 _ => index_stmts(
249 std::slice::from_ref(stmt),
250 current_namespace.as_deref(),
251 &mut imports,
252 ),
253 }
254 }
255
256 if !imports.is_empty() {
257 self.codebase.file_imports.insert(file.clone(), imports);
258 }
259
260 let file_parse_errors: Vec<Issue> = result
262 .errors
263 .iter()
264 .map(|err| {
265 Issue::new(
266 mir_issues::IssueKind::ParseError {
267 message: err.to_string(),
268 },
269 mir_issues::Location {
270 file: file.clone(),
271 line: 1,
272 line_end: 1,
273 col_start: 0,
274 col_end: 0,
275 },
276 )
277 })
278 .collect();
279
280 let collector =
282 DefinitionCollector::new(&self.codebase, file.clone(), src, &result.source_map);
283 let issues = collector.collect(&result.program);
284
285 (file_parse_errors, issues)
286 })
287 .collect();
288
289 let mut files_with_parse_errors: std::collections::HashSet<Arc<str>> =
290 std::collections::HashSet::new();
291 for (file_parse_errors, issues) in pass1_results {
292 for issue in &file_parse_errors {
293 files_with_parse_errors.insert(issue.location.file.clone());
294 }
295 parse_errors.extend(file_parse_errors);
296 all_issues.extend(issues);
297 }
298
299 all_issues.extend(parse_errors);
300
301 self.codebase.finalize();
303
304 if let Some(psr4) = &self.psr4 {
306 self.lazy_load_missing_classes(psr4.clone(), &mut all_issues);
307 }
308
309 if let Some(cache) = &self.cache {
311 let rev = build_reverse_deps(&self.codebase);
312 cache.set_reverse_deps(rev);
313 }
314
315 let analyzed_file_set: std::collections::HashSet<std::sync::Arc<str>> =
317 file_data.iter().map(|(f, _)| f.clone()).collect();
318 let class_issues =
319 crate::class::ClassAnalyzer::with_files(&self.codebase, analyzed_file_set, &file_data)
320 .analyze_all();
321 all_issues.extend(class_issues);
322
323 let pass2_results: Vec<(Vec<Issue>, Vec<crate::symbol::ResolvedSymbol>)> = file_data
325 .par_iter()
326 .filter(|(file, _)| !files_with_parse_errors.contains(file))
327 .map(|(file, src)| {
328 let driver = Pass2Driver::new(&self.codebase, self.resolved_php_version());
329 let result = if let Some(cache) = &self.cache {
330 let h = hash_content(src);
331 if let Some((cached_issues, ref_locs)) = cache.get(file, &h) {
332 self.codebase
333 .replay_reference_locations(file.clone(), &ref_locs);
334 (cached_issues, Vec::new())
335 } else {
336 let arena = bumpalo::Bump::new();
337 let parsed = php_rs_parser::parse(&arena, src);
338 let (issues, symbols) = driver.analyze_bodies(
339 &parsed.program,
340 file.clone(),
341 src,
342 &parsed.source_map,
343 );
344 let ref_locs = extract_reference_locations(&self.codebase, file);
345 cache.put(file, h, issues.clone(), ref_locs);
346 (issues, symbols)
347 }
348 } else {
349 let arena = bumpalo::Bump::new();
350 let parsed = php_rs_parser::parse(&arena, src);
351 driver.analyze_bodies(&parsed.program, file.clone(), src, &parsed.source_map)
352 };
353 if let Some(cb) = &self.on_file_done {
354 cb();
355 }
356 result
357 })
358 .collect();
359
360 let mut all_symbols = Vec::new();
361 for (issues, symbols) in pass2_results {
362 all_issues.extend(issues);
363 all_symbols.extend(symbols);
364 }
365
366 if let Some(cache) = &self.cache {
368 cache.flush();
369 }
370
371 self.codebase.compact_reference_index();
373
374 if self.find_dead_code {
376 let dead_code_issues =
377 crate::dead_code::DeadCodeAnalyzer::new(&self.codebase).analyze();
378 all_issues.extend(dead_code_issues);
379 }
380
381 AnalysisResult::build(all_issues, std::collections::HashMap::new(), all_symbols)
382 }
383
384 fn lazy_load_missing_classes(
385 &self,
386 psr4: Arc<crate::composer::Psr4Map>,
387 all_issues: &mut Vec<Issue>,
388 ) {
389 use std::collections::HashSet;
390
391 let max_depth = 10;
392 let mut loaded: HashSet<String> = HashSet::new();
393
394 for _ in 0..max_depth {
395 let mut to_load: Vec<(String, PathBuf)> = Vec::new();
396
397 for entry in self.codebase.classes.iter() {
398 let cls = entry.value();
399
400 if let Some(parent) = &cls.parent {
401 let fqcn = parent.as_ref();
402 if !self.codebase.classes.contains_key(fqcn) && !loaded.contains(fqcn) {
403 if let Some(path) = psr4.resolve(fqcn) {
404 to_load.push((fqcn.to_string(), path));
405 }
406 }
407 }
408
409 for iface in &cls.interfaces {
410 let fqcn = iface.as_ref();
411 if !self.codebase.classes.contains_key(fqcn)
412 && !self.codebase.interfaces.contains_key(fqcn)
413 && !loaded.contains(fqcn)
414 {
415 if let Some(path) = psr4.resolve(fqcn) {
416 to_load.push((fqcn.to_string(), path));
417 }
418 }
419 }
420 }
421
422 if to_load.is_empty() {
423 break;
424 }
425
426 for (fqcn, path) in to_load {
427 loaded.insert(fqcn);
428 if let Ok(src) = std::fs::read_to_string(&path) {
429 let file: Arc<str> = Arc::from(path.to_string_lossy().as_ref());
430 let arena = bumpalo::Bump::new();
431 let result = php_rs_parser::parse(&arena, &src);
432 let collector = crate::collector::DefinitionCollector::new(
433 &self.codebase,
434 file,
435 &src,
436 &result.source_map,
437 );
438 let issues = collector.collect(&result.program);
439 all_issues.extend(issues);
440 }
441 }
442
443 self.codebase.invalidate_finalization();
444 self.codebase.finalize();
445 }
446 }
447
448 pub fn re_analyze_file(&self, file_path: &str, new_content: &str) -> AnalysisResult {
457 if let Some(cache) = &self.cache {
459 let h = hash_content(new_content);
460 if let Some((issues, ref_locs)) = cache.get(file_path, &h) {
461 let file: Arc<str> = Arc::from(file_path);
462 self.codebase.replay_reference_locations(file, &ref_locs);
463 return AnalysisResult::build(issues, HashMap::new(), Vec::new());
464 }
465 }
466
467 let structural_snapshot = self.codebase.file_structural_snapshot(file_path);
468 self.codebase.remove_file_definitions(file_path);
469
470 let file: Arc<str> = Arc::from(file_path);
471 let arena = bumpalo::Bump::new();
472 let parsed = php_rs_parser::parse(&arena, new_content);
473
474 let mut all_issues = Vec::new();
475
476 for err in &parsed.errors {
477 all_issues.push(Issue::new(
478 mir_issues::IssueKind::ParseError {
479 message: err.to_string(),
480 },
481 mir_issues::Location {
482 file: file.clone(),
483 line: 1,
484 line_end: 1,
485 col_start: 0,
486 col_end: 0,
487 },
488 ));
489 }
490
491 let collector = DefinitionCollector::new(
492 &self.codebase,
493 file.clone(),
494 new_content,
495 &parsed.source_map,
496 );
497 all_issues.extend(collector.collect(&parsed.program));
498
499 if self
500 .codebase
501 .structural_unchanged_after_pass1(file_path, &structural_snapshot)
502 {
503 self.codebase
504 .restore_all_parents(file_path, &structural_snapshot);
505 } else {
506 self.codebase.finalize();
507 }
508
509 let symbols = if parsed.errors.is_empty() {
510 let driver = Pass2Driver::new(&self.codebase, self.resolved_php_version());
511 let (body_issues, symbols) = driver.analyze_bodies(
512 &parsed.program,
513 file.clone(),
514 new_content,
515 &parsed.source_map,
516 );
517 all_issues.extend(body_issues);
518 symbols
519 } else {
520 Vec::new()
521 };
522
523 if let Some(cache) = &self.cache {
524 let h = hash_content(new_content);
525 cache.evict_with_dependents(&[file_path.to_string()]);
526 let ref_locs = extract_reference_locations(&self.codebase, &file);
527 cache.put(file_path, h, all_issues.clone(), ref_locs);
528 }
529
530 AnalysisResult::build(all_issues, HashMap::new(), symbols)
531 }
532
533 pub fn analyze_source(source: &str) -> AnalysisResult {
536 use crate::collector::DefinitionCollector;
537 let analyzer = ProjectAnalyzer::new();
538 analyzer.load_stubs();
539 let file: Arc<str> = Arc::from("<source>");
540 let arena = bumpalo::Bump::new();
541 let result = php_rs_parser::parse(&arena, source);
542 let mut all_issues = Vec::new();
543 for err in &result.errors {
544 all_issues.push(Issue::new(
545 mir_issues::IssueKind::ParseError {
546 message: err.to_string(),
547 },
548 mir_issues::Location {
549 file: file.clone(),
550 line: 1,
551 line_end: 1,
552 col_start: 0,
553 col_end: 0,
554 },
555 ));
556 }
557 if !result.errors.is_empty() {
558 return AnalysisResult::build(all_issues, std::collections::HashMap::new(), Vec::new());
559 }
560 let collector =
561 DefinitionCollector::new(&analyzer.codebase, file.clone(), source, &result.source_map);
562 all_issues.extend(collector.collect(&result.program));
563 analyzer.codebase.finalize();
564 let mut type_envs = std::collections::HashMap::new();
565 let mut all_symbols = Vec::new();
566 let driver = Pass2Driver::new(&analyzer.codebase, analyzer.resolved_php_version());
567 all_issues.extend(driver.analyze_bodies_typed(
568 &result.program,
569 file.clone(),
570 source,
571 &result.source_map,
572 &mut type_envs,
573 &mut all_symbols,
574 ));
575 AnalysisResult::build(all_issues, type_envs, all_symbols)
576 }
577
578 pub fn discover_files(root: &Path) -> Vec<PathBuf> {
580 if root.is_file() {
581 return vec![root.to_path_buf()];
582 }
583 let mut files = Vec::new();
584 collect_php_files(root, &mut files);
585 files
586 }
587
588 pub fn collect_types_only(&self, paths: &[PathBuf]) {
591 paths.par_iter().for_each(|path| {
592 let Ok(src) = std::fs::read_to_string(path) else {
593 return;
594 };
595 let file: Arc<str> = Arc::from(path.to_string_lossy().as_ref());
596 let arena = bumpalo::Bump::new();
597 let result = php_rs_parser::parse(&arena, &src);
598 let collector =
599 DefinitionCollector::new(&self.codebase, file, &src, &result.source_map);
600 let _ = collector.collect(&result.program);
601 });
602 }
603}
604
605impl Default for ProjectAnalyzer {
606 fn default() -> Self {
607 Self::new()
608 }
609}
610
611pub(crate) fn collect_php_files(dir: &Path, out: &mut Vec<PathBuf>) {
614 if let Ok(entries) = std::fs::read_dir(dir) {
615 for entry in entries.flatten() {
616 if entry.file_type().map(|ft| ft.is_symlink()).unwrap_or(false) {
617 continue;
618 }
619 let path = entry.path();
620 if path.is_dir() {
621 let name = path.file_name().and_then(|n| n.to_str()).unwrap_or("");
622 if matches!(
623 name,
624 "vendor" | ".git" | "node_modules" | ".cache" | ".pnpm-store"
625 ) {
626 continue;
627 }
628 collect_php_files(&path, out);
629 } else if path.extension().and_then(|e| e.to_str()) == Some("php") {
630 out.push(path);
631 }
632 }
633 }
634}
635
636fn build_reverse_deps(codebase: &Codebase) -> HashMap<String, HashSet<String>> {
641 let mut reverse: HashMap<String, HashSet<String>> = HashMap::new();
642
643 let mut add_edge = |symbol: &str, dependent_file: &str| {
644 if let Some(defining_file) = codebase.symbol_to_file.get(symbol) {
645 let def = defining_file.as_ref().to_string();
646 if def != dependent_file {
647 reverse
648 .entry(def)
649 .or_default()
650 .insert(dependent_file.to_string());
651 }
652 }
653 };
654
655 for entry in codebase.file_imports.iter() {
656 let file = entry.key().as_ref().to_string();
657 for fqcn in entry.value().values() {
658 add_edge(fqcn, &file);
659 }
660 }
661
662 for entry in codebase.classes.iter() {
663 let defining = {
664 let fqcn = entry.key().as_ref();
665 codebase
666 .symbol_to_file
667 .get(fqcn)
668 .map(|f| f.as_ref().to_string())
669 };
670 let Some(file) = defining else { continue };
671
672 let cls = entry.value();
673 if let Some(ref parent) = cls.parent {
674 add_edge(parent.as_ref(), &file);
675 }
676 for iface in &cls.interfaces {
677 add_edge(iface.as_ref(), &file);
678 }
679 for tr in &cls.traits {
680 add_edge(tr.as_ref(), &file);
681 }
682 }
683
684 reverse
685}
686
687fn extract_reference_locations(codebase: &Codebase, file: &Arc<str>) -> Vec<(String, u32, u32)> {
690 codebase
691 .extract_file_reference_locations(file.as_ref())
692 .into_iter()
693 .map(|(sym, start, end)| (sym.to_string(), start, end))
694 .collect()
695}
696
697pub struct AnalysisResult {
702 pub issues: Vec<Issue>,
703 pub type_envs: std::collections::HashMap<crate::type_env::ScopeId, crate::type_env::TypeEnv>,
704 pub symbols: Vec<crate::symbol::ResolvedSymbol>,
706 symbols_by_file: HashMap<Arc<str>, std::ops::Range<usize>>,
710}
711
712impl AnalysisResult {
713 fn build(
714 issues: Vec<Issue>,
715 type_envs: std::collections::HashMap<crate::type_env::ScopeId, crate::type_env::TypeEnv>,
716 mut symbols: Vec<crate::symbol::ResolvedSymbol>,
717 ) -> Self {
718 symbols.sort_unstable_by(|a, b| a.file.as_ref().cmp(b.file.as_ref()));
719 let mut symbols_by_file: HashMap<Arc<str>, std::ops::Range<usize>> = HashMap::new();
720 let mut i = 0;
721 while i < symbols.len() {
722 let file = Arc::clone(&symbols[i].file);
723 let start = i;
724 while i < symbols.len() && symbols[i].file == file {
725 i += 1;
726 }
727 symbols_by_file.insert(file, start..i);
728 }
729 Self {
730 issues,
731 type_envs,
732 symbols,
733 symbols_by_file,
734 }
735 }
736}
737
738impl AnalysisResult {
739 pub fn error_count(&self) -> usize {
740 self.issues
741 .iter()
742 .filter(|i| i.severity == mir_issues::Severity::Error)
743 .count()
744 }
745
746 pub fn warning_count(&self) -> usize {
747 self.issues
748 .iter()
749 .filter(|i| i.severity == mir_issues::Severity::Warning)
750 .count()
751 }
752
753 pub fn issues_by_file(&self) -> HashMap<std::sync::Arc<str>, Vec<&Issue>> {
755 let mut map: HashMap<std::sync::Arc<str>, Vec<&Issue>> = HashMap::new();
756 for issue in &self.issues {
757 map.entry(issue.location.file.clone())
758 .or_default()
759 .push(issue);
760 }
761 map
762 }
763
764 pub fn symbol_at(
767 &self,
768 file: &str,
769 byte_offset: u32,
770 ) -> Option<&crate::symbol::ResolvedSymbol> {
771 let range = self.symbols_by_file.get(file)?;
772 self.symbols[range.clone()]
773 .iter()
774 .filter(|s| s.span.start <= byte_offset && byte_offset < s.span.end)
775 .min_by_key(|s| s.span.end - s.span.start)
776 }
777}