1use std::path::{Path, PathBuf};
3use std::sync::Arc;
4
5use rayon::prelude::*;
6
7use std::collections::{HashMap, HashSet};
8
9use crate::cache::{hash_content, AnalysisCache};
10use crate::pass2::Pass2Driver;
11use crate::php_version::PhpVersion;
12use mir_codebase::Codebase;
13use mir_issues::Issue;
14
15use crate::collector::DefinitionCollector;
16
17pub use crate::pass2::merge_return_types;
19
20pub struct ProjectAnalyzer {
25 pub codebase: Arc<Codebase>,
26 pub cache: Option<AnalysisCache>,
28 pub on_file_done: Option<Arc<dyn Fn() + Send + Sync>>,
30 pub psr4: Option<Arc<crate::composer::Psr4Map>>,
32 stubs_loaded: std::sync::atomic::AtomicBool,
34 pub find_dead_code: bool,
36 pub php_version: Option<PhpVersion>,
39}
40
41impl ProjectAnalyzer {
42 pub fn new() -> Self {
43 Self {
44 codebase: Arc::new(Codebase::new()),
45 cache: None,
46 on_file_done: None,
47 psr4: None,
48 stubs_loaded: std::sync::atomic::AtomicBool::new(false),
49 find_dead_code: false,
50 php_version: None,
51 }
52 }
53
54 pub fn with_cache(cache_dir: &Path) -> Self {
56 Self {
57 codebase: Arc::new(Codebase::new()),
58 cache: Some(AnalysisCache::open(cache_dir)),
59 on_file_done: None,
60 psr4: None,
61 stubs_loaded: std::sync::atomic::AtomicBool::new(false),
62 find_dead_code: false,
63 php_version: None,
64 }
65 }
66
67 pub fn from_composer(
71 root: &Path,
72 ) -> Result<(Self, crate::composer::Psr4Map), crate::composer::ComposerError> {
73 let map = crate::composer::Psr4Map::from_composer(root)?;
74 let psr4 = Arc::new(map.clone());
75 let analyzer = Self {
76 codebase: Arc::new(Codebase::new()),
77 cache: None,
78 on_file_done: None,
79 psr4: Some(psr4),
80 stubs_loaded: std::sync::atomic::AtomicBool::new(false),
81 find_dead_code: false,
82 php_version: None,
83 };
84 Ok((analyzer, map))
85 }
86
87 pub fn with_php_version(mut self, version: PhpVersion) -> Self {
89 self.php_version = Some(version);
90 self
91 }
92
93 fn resolved_php_version(&self) -> PhpVersion {
96 self.php_version.unwrap_or(PhpVersion::LATEST)
97 }
98
99 pub fn codebase(&self) -> &Arc<Codebase> {
101 &self.codebase
102 }
103
104 pub fn load_stubs(&self) {
108 if !self
109 .stubs_loaded
110 .swap(true, std::sync::atomic::Ordering::SeqCst)
111 {
112 crate::stubs::load_stubs_for_version(&self.codebase, self.resolved_php_version());
113 }
114 }
115
116 pub fn analyze(&self, paths: &[PathBuf]) -> AnalysisResult {
118 let mut all_issues = Vec::new();
119 let mut parse_errors = Vec::new();
120
121 self.load_stubs();
123
124 let file_data: Vec<(Arc<str>, String)> = paths
126 .par_iter()
127 .filter_map(|path| match std::fs::read_to_string(path) {
128 Ok(src) => Some((Arc::from(path.to_string_lossy().as_ref()), src)),
129 Err(e) => {
130 eprintln!("Cannot read {}: {}", path.display(), e);
131 None
132 }
133 })
134 .collect();
135
136 if let Some(cache) = &self.cache {
138 let changed: Vec<String> = file_data
139 .par_iter()
140 .filter_map(|(f, src)| {
141 let h = hash_content(src);
142 if cache.get(f, &h).is_none() {
143 Some(f.to_string())
144 } else {
145 None
146 }
147 })
148 .collect();
149 if !changed.is_empty() {
150 cache.evict_with_dependents(&changed);
151 }
152 }
153
154 let pass1_results: Vec<(Vec<Issue>, Vec<Issue>)> = file_data
156 .par_iter()
157 .map(|(file, src)| {
158 use php_ast::ast::StmtKind;
159 let arena = bumpalo::Bump::new();
160 let result = php_rs_parser::parse(&arena, src);
161
162 let mut current_namespace: Option<String> = None;
164 let mut imports: std::collections::HashMap<String, String> =
165 std::collections::HashMap::new();
166 let mut file_ns_set = false;
167
168 let index_stmts =
169 |stmts: &[php_ast::ast::Stmt<'_, '_>],
170 ns: Option<&str>,
171 imports: &mut std::collections::HashMap<String, String>| {
172 for stmt in stmts.iter() {
173 match &stmt.kind {
174 StmtKind::Use(use_decl) => {
175 for item in use_decl.uses.iter() {
176 let full_name = crate::parser::name_to_string(&item.name)
177 .trim_start_matches('\\')
178 .to_string();
179 let alias = item.alias.unwrap_or_else(|| {
180 full_name.rsplit('\\').next().unwrap_or(&full_name)
181 });
182 imports.insert(alias.to_string(), full_name);
183 }
184 }
185 StmtKind::Class(decl) => {
186 if let Some(n) = decl.name {
187 let fqcn = match ns {
188 Some(ns) => format!("{ns}\\{n}"),
189 None => n.to_string(),
190 };
191 self.codebase
192 .known_symbols
193 .insert(Arc::from(fqcn.as_str()));
194 }
195 }
196 StmtKind::Interface(decl) => {
197 let fqcn = match ns {
198 Some(ns) => format!("{}\\{}", ns, decl.name),
199 None => decl.name.to_string(),
200 };
201 self.codebase.known_symbols.insert(Arc::from(fqcn.as_str()));
202 }
203 StmtKind::Trait(decl) => {
204 let fqcn = match ns {
205 Some(ns) => format!("{}\\{}", ns, decl.name),
206 None => decl.name.to_string(),
207 };
208 self.codebase.known_symbols.insert(Arc::from(fqcn.as_str()));
209 }
210 StmtKind::Enum(decl) => {
211 let fqcn = match ns {
212 Some(ns) => format!("{}\\{}", ns, decl.name),
213 None => decl.name.to_string(),
214 };
215 self.codebase.known_symbols.insert(Arc::from(fqcn.as_str()));
216 }
217 StmtKind::Function(decl) => {
218 let fqn = match ns {
219 Some(ns) => format!("{}\\{}", ns, decl.name),
220 None => decl.name.to_string(),
221 };
222 self.codebase.known_symbols.insert(Arc::from(fqn.as_str()));
223 }
224 _ => {}
225 }
226 }
227 };
228
229 for stmt in result.program.stmts.iter() {
230 match &stmt.kind {
231 StmtKind::Namespace(ns) => {
232 current_namespace =
233 ns.name.as_ref().map(|n| crate::parser::name_to_string(n));
234 if !file_ns_set {
235 if let Some(ref ns_str) = current_namespace {
236 self.codebase
237 .file_namespaces
238 .insert(file.clone(), ns_str.clone());
239 file_ns_set = true;
240 }
241 }
242 if let php_ast::ast::NamespaceBody::Braced(inner_stmts) = &ns.body {
243 index_stmts(
244 inner_stmts,
245 current_namespace.as_deref(),
246 &mut imports,
247 );
248 }
249 }
250 _ => index_stmts(
251 std::slice::from_ref(stmt),
252 current_namespace.as_deref(),
253 &mut imports,
254 ),
255 }
256 }
257
258 if !imports.is_empty() {
259 self.codebase.file_imports.insert(file.clone(), imports);
260 }
261
262 let file_parse_errors: Vec<Issue> = result
264 .errors
265 .iter()
266 .map(|err| {
267 Issue::new(
268 mir_issues::IssueKind::ParseError {
269 message: err.to_string(),
270 },
271 mir_issues::Location {
272 file: file.clone(),
273 line: 1,
274 line_end: 1,
275 col_start: 0,
276 col_end: 0,
277 },
278 )
279 })
280 .collect();
281
282 let collector =
284 DefinitionCollector::new(&self.codebase, file.clone(), src, &result.source_map);
285 let issues = collector.collect(&result.program);
286
287 (file_parse_errors, issues)
288 })
289 .collect();
290
291 let mut files_with_parse_errors: std::collections::HashSet<Arc<str>> =
292 std::collections::HashSet::new();
293 for (file_parse_errors, issues) in pass1_results {
294 for issue in &file_parse_errors {
295 files_with_parse_errors.insert(issue.location.file.clone());
296 }
297 parse_errors.extend(file_parse_errors);
298 all_issues.extend(issues);
299 }
300
301 all_issues.extend(parse_errors);
302
303 self.codebase.finalize();
305
306 if let Some(psr4) = &self.psr4 {
308 self.lazy_load_missing_classes(psr4.clone(), &mut all_issues);
309 }
310
311 if let Some(cache) = &self.cache {
313 let rev = build_reverse_deps(&self.codebase);
314 cache.set_reverse_deps(rev);
315 }
316
317 let analyzed_file_set: std::collections::HashSet<std::sync::Arc<str>> =
319 file_data.iter().map(|(f, _)| f.clone()).collect();
320 let class_issues =
321 crate::class::ClassAnalyzer::with_files(&self.codebase, analyzed_file_set, &file_data)
322 .analyze_all();
323 all_issues.extend(class_issues);
324
325 let pass2_results: Vec<(Vec<Issue>, Vec<crate::symbol::ResolvedSymbol>)> = file_data
327 .par_iter()
328 .filter(|(file, _)| !files_with_parse_errors.contains(file))
329 .map(|(file, src)| {
330 let driver = Pass2Driver::new(&self.codebase, self.resolved_php_version());
331 let result = if let Some(cache) = &self.cache {
332 let h = hash_content(src);
333 if let Some((cached_issues, ref_locs)) = cache.get(file, &h) {
334 self.codebase
335 .replay_reference_locations(file.clone(), &ref_locs);
336 (cached_issues, Vec::new())
337 } else {
338 let arena = bumpalo::Bump::new();
339 let parsed = php_rs_parser::parse(&arena, src);
340 let (issues, symbols) = driver.analyze_bodies(
341 &parsed.program,
342 file.clone(),
343 src,
344 &parsed.source_map,
345 );
346 let ref_locs = extract_reference_locations(&self.codebase, file);
347 cache.put(file, h, issues.clone(), ref_locs);
348 (issues, symbols)
349 }
350 } else {
351 let arena = bumpalo::Bump::new();
352 let parsed = php_rs_parser::parse(&arena, src);
353 driver.analyze_bodies(&parsed.program, file.clone(), src, &parsed.source_map)
354 };
355 if let Some(cb) = &self.on_file_done {
356 cb();
357 }
358 result
359 })
360 .collect();
361
362 let mut all_symbols = Vec::new();
363 for (issues, symbols) in pass2_results {
364 all_issues.extend(issues);
365 all_symbols.extend(symbols);
366 }
367
368 if let Some(cache) = &self.cache {
370 cache.flush();
371 }
372
373 self.codebase.compact_reference_index();
375
376 if self.find_dead_code {
378 let dead_code_issues =
379 crate::dead_code::DeadCodeAnalyzer::new(&self.codebase).analyze();
380 all_issues.extend(dead_code_issues);
381 }
382
383 AnalysisResult::build(all_issues, std::collections::HashMap::new(), all_symbols)
384 }
385
386 fn lazy_load_missing_classes(
387 &self,
388 psr4: Arc<crate::composer::Psr4Map>,
389 all_issues: &mut Vec<Issue>,
390 ) {
391 use std::collections::HashSet;
392
393 let max_depth = 10;
394 let mut loaded: HashSet<String> = HashSet::new();
395
396 for _ in 0..max_depth {
397 let mut to_load: Vec<(String, PathBuf)> = Vec::new();
398
399 for entry in self.codebase.classes.iter() {
400 let cls = entry.value();
401
402 if let Some(parent) = &cls.parent {
403 let fqcn = parent.as_ref();
404 if !self.codebase.classes.contains_key(fqcn) && !loaded.contains(fqcn) {
405 if let Some(path) = psr4.resolve(fqcn) {
406 to_load.push((fqcn.to_string(), path));
407 }
408 }
409 }
410
411 for iface in &cls.interfaces {
412 let fqcn = iface.as_ref();
413 if !self.codebase.classes.contains_key(fqcn)
414 && !self.codebase.interfaces.contains_key(fqcn)
415 && !loaded.contains(fqcn)
416 {
417 if let Some(path) = psr4.resolve(fqcn) {
418 to_load.push((fqcn.to_string(), path));
419 }
420 }
421 }
422 }
423
424 if to_load.is_empty() {
425 break;
426 }
427
428 for (fqcn, path) in to_load {
429 loaded.insert(fqcn);
430 if let Ok(src) = std::fs::read_to_string(&path) {
431 let file: Arc<str> = Arc::from(path.to_string_lossy().as_ref());
432 let arena = bumpalo::Bump::new();
433 let result = php_rs_parser::parse(&arena, &src);
434 let collector = crate::collector::DefinitionCollector::new(
435 &self.codebase,
436 file,
437 &src,
438 &result.source_map,
439 );
440 let issues = collector.collect(&result.program);
441 all_issues.extend(issues);
442 }
443 }
444
445 self.codebase.invalidate_finalization();
446 self.codebase.finalize();
447 }
448 }
449
450 pub fn re_analyze_file(&self, file_path: &str, new_content: &str) -> AnalysisResult {
459 if let Some(cache) = &self.cache {
461 let h = hash_content(new_content);
462 if let Some((issues, ref_locs)) = cache.get(file_path, &h) {
463 let file: Arc<str> = Arc::from(file_path);
464 self.codebase.replay_reference_locations(file, &ref_locs);
465 return AnalysisResult::build(issues, HashMap::new(), Vec::new());
466 }
467 }
468
469 let structural_snapshot = self.codebase.file_structural_snapshot(file_path);
470 self.codebase.remove_file_definitions(file_path);
471
472 let file: Arc<str> = Arc::from(file_path);
473 let arena = bumpalo::Bump::new();
474 let parsed = php_rs_parser::parse(&arena, new_content);
475
476 let mut all_issues = Vec::new();
477
478 for err in &parsed.errors {
479 all_issues.push(Issue::new(
480 mir_issues::IssueKind::ParseError {
481 message: err.to_string(),
482 },
483 mir_issues::Location {
484 file: file.clone(),
485 line: 1,
486 line_end: 1,
487 col_start: 0,
488 col_end: 0,
489 },
490 ));
491 }
492
493 let collector = DefinitionCollector::new(
494 &self.codebase,
495 file.clone(),
496 new_content,
497 &parsed.source_map,
498 );
499 all_issues.extend(collector.collect(&parsed.program));
500
501 if self
502 .codebase
503 .structural_unchanged_after_pass1(file_path, &structural_snapshot)
504 {
505 self.codebase
506 .restore_all_parents(file_path, &structural_snapshot);
507 } else {
508 self.codebase.finalize();
509 }
510
511 let symbols = if parsed.errors.is_empty() {
512 let driver = Pass2Driver::new(&self.codebase, self.resolved_php_version());
513 let (body_issues, symbols) = driver.analyze_bodies(
514 &parsed.program,
515 file.clone(),
516 new_content,
517 &parsed.source_map,
518 );
519 all_issues.extend(body_issues);
520 symbols
521 } else {
522 Vec::new()
523 };
524
525 if let Some(cache) = &self.cache {
526 let h = hash_content(new_content);
527 cache.evict_with_dependents(&[file_path.to_string()]);
528 let ref_locs = extract_reference_locations(&self.codebase, &file);
529 cache.put(file_path, h, all_issues.clone(), ref_locs);
530 }
531
532 AnalysisResult::build(all_issues, HashMap::new(), symbols)
533 }
534
535 pub fn analyze_source(source: &str) -> AnalysisResult {
538 use crate::collector::DefinitionCollector;
539 let analyzer = ProjectAnalyzer::new();
540 analyzer.load_stubs();
541 let file: Arc<str> = Arc::from("<source>");
542 let arena = bumpalo::Bump::new();
543 let result = php_rs_parser::parse(&arena, source);
544 let mut all_issues = Vec::new();
545 for err in &result.errors {
546 all_issues.push(Issue::new(
547 mir_issues::IssueKind::ParseError {
548 message: err.to_string(),
549 },
550 mir_issues::Location {
551 file: file.clone(),
552 line: 1,
553 line_end: 1,
554 col_start: 0,
555 col_end: 0,
556 },
557 ));
558 }
559 if !result.errors.is_empty() {
560 return AnalysisResult::build(all_issues, std::collections::HashMap::new(), Vec::new());
561 }
562 let collector =
563 DefinitionCollector::new(&analyzer.codebase, file.clone(), source, &result.source_map);
564 all_issues.extend(collector.collect(&result.program));
565 analyzer.codebase.finalize();
566 let mut type_envs = std::collections::HashMap::new();
567 let mut all_symbols = Vec::new();
568 let driver = Pass2Driver::new(&analyzer.codebase, analyzer.resolved_php_version());
569 all_issues.extend(driver.analyze_bodies_typed(
570 &result.program,
571 file.clone(),
572 source,
573 &result.source_map,
574 &mut type_envs,
575 &mut all_symbols,
576 ));
577 AnalysisResult::build(all_issues, type_envs, all_symbols)
578 }
579
580 pub fn discover_files(root: &Path) -> Vec<PathBuf> {
582 if root.is_file() {
583 return vec![root.to_path_buf()];
584 }
585 let mut files = Vec::new();
586 collect_php_files(root, &mut files);
587 files
588 }
589
590 pub fn collect_types_only(&self, paths: &[PathBuf]) {
593 paths.par_iter().for_each(|path| {
594 let Ok(src) = std::fs::read_to_string(path) else {
595 return;
596 };
597 let file: Arc<str> = Arc::from(path.to_string_lossy().as_ref());
598 let arena = bumpalo::Bump::new();
599 let result = php_rs_parser::parse(&arena, &src);
600 let collector =
601 DefinitionCollector::new(&self.codebase, file, &src, &result.source_map);
602 let _ = collector.collect(&result.program);
603 });
604 }
605}
606
607impl Default for ProjectAnalyzer {
608 fn default() -> Self {
609 Self::new()
610 }
611}
612
613pub(crate) fn collect_php_files(dir: &Path, out: &mut Vec<PathBuf>) {
616 if let Ok(entries) = std::fs::read_dir(dir) {
617 for entry in entries.flatten() {
618 if entry.file_type().map(|ft| ft.is_symlink()).unwrap_or(false) {
619 continue;
620 }
621 let path = entry.path();
622 if path.is_dir() {
623 let name = path.file_name().and_then(|n| n.to_str()).unwrap_or("");
624 if matches!(
625 name,
626 "vendor" | ".git" | "node_modules" | ".cache" | ".pnpm-store"
627 ) {
628 continue;
629 }
630 collect_php_files(&path, out);
631 } else if path.extension().and_then(|e| e.to_str()) == Some("php") {
632 out.push(path);
633 }
634 }
635 }
636}
637
638fn build_reverse_deps(codebase: &Codebase) -> HashMap<String, HashSet<String>> {
643 let mut reverse: HashMap<String, HashSet<String>> = HashMap::new();
644
645 let mut add_edge = |symbol: &str, dependent_file: &str| {
646 if let Some(defining_file) = codebase.symbol_to_file.get(symbol) {
647 let def = defining_file.as_ref().to_string();
648 if def != dependent_file {
649 reverse
650 .entry(def)
651 .or_default()
652 .insert(dependent_file.to_string());
653 }
654 }
655 };
656
657 for entry in codebase.file_imports.iter() {
658 let file = entry.key().as_ref().to_string();
659 for fqcn in entry.value().values() {
660 add_edge(fqcn, &file);
661 }
662 }
663
664 for entry in codebase.classes.iter() {
665 let defining = {
666 let fqcn = entry.key().as_ref();
667 codebase
668 .symbol_to_file
669 .get(fqcn)
670 .map(|f| f.as_ref().to_string())
671 };
672 let Some(file) = defining else { continue };
673
674 let cls = entry.value();
675 if let Some(ref parent) = cls.parent {
676 add_edge(parent.as_ref(), &file);
677 }
678 for iface in &cls.interfaces {
679 add_edge(iface.as_ref(), &file);
680 }
681 for tr in &cls.traits {
682 add_edge(tr.as_ref(), &file);
683 }
684 }
685
686 reverse
687}
688
689fn extract_reference_locations(codebase: &Codebase, file: &Arc<str>) -> Vec<(String, u32, u32)> {
692 codebase
693 .extract_file_reference_locations(file.as_ref())
694 .into_iter()
695 .map(|(sym, start, end)| (sym.to_string(), start, end))
696 .collect()
697}
698
699pub struct AnalysisResult {
704 pub issues: Vec<Issue>,
705 pub type_envs: std::collections::HashMap<crate::type_env::ScopeId, crate::type_env::TypeEnv>,
706 pub symbols: Vec<crate::symbol::ResolvedSymbol>,
708 symbols_by_file: HashMap<Arc<str>, std::ops::Range<usize>>,
712}
713
714impl AnalysisResult {
715 fn build(
716 issues: Vec<Issue>,
717 type_envs: std::collections::HashMap<crate::type_env::ScopeId, crate::type_env::TypeEnv>,
718 mut symbols: Vec<crate::symbol::ResolvedSymbol>,
719 ) -> Self {
720 symbols.sort_unstable_by(|a, b| a.file.as_ref().cmp(b.file.as_ref()));
721 let mut symbols_by_file: HashMap<Arc<str>, std::ops::Range<usize>> = HashMap::new();
722 let mut i = 0;
723 while i < symbols.len() {
724 let file = Arc::clone(&symbols[i].file);
725 let start = i;
726 while i < symbols.len() && symbols[i].file == file {
727 i += 1;
728 }
729 symbols_by_file.insert(file, start..i);
730 }
731 Self {
732 issues,
733 type_envs,
734 symbols,
735 symbols_by_file,
736 }
737 }
738}
739
740impl AnalysisResult {
741 pub fn error_count(&self) -> usize {
742 self.issues
743 .iter()
744 .filter(|i| i.severity == mir_issues::Severity::Error)
745 .count()
746 }
747
748 pub fn warning_count(&self) -> usize {
749 self.issues
750 .iter()
751 .filter(|i| i.severity == mir_issues::Severity::Warning)
752 .count()
753 }
754
755 pub fn issues_by_file(&self) -> HashMap<std::sync::Arc<str>, Vec<&Issue>> {
757 let mut map: HashMap<std::sync::Arc<str>, Vec<&Issue>> = HashMap::new();
758 for issue in &self.issues {
759 map.entry(issue.location.file.clone())
760 .or_default()
761 .push(issue);
762 }
763 map
764 }
765
766 pub fn symbol_at(
769 &self,
770 file: &str,
771 byte_offset: u32,
772 ) -> Option<&crate::symbol::ResolvedSymbol> {
773 let range = self.symbols_by_file.get(file)?;
774 self.symbols[range.clone()]
775 .iter()
776 .filter(|s| s.span.start <= byte_offset && byte_offset < s.span.end)
777 .min_by_key(|s| s.span.end - s.span.start)
778 }
779}