1use std::path::{Path, PathBuf};
3use std::sync::Arc;
4
5use rayon::prelude::*;
6
7use std::collections::{HashMap, HashSet};
8
9use crate::cache::{hash_content, AnalysisCache};
10use crate::pass2::Pass2Driver;
11use crate::php_version::PhpVersion;
12use mir_codebase::Codebase;
13use mir_issues::Issue;
14
15use crate::collector::DefinitionCollector;
16
17pub use crate::pass2::merge_return_types;
19
20pub struct ProjectAnalyzer {
25 pub codebase: Arc<Codebase>,
26 pub cache: Option<AnalysisCache>,
28 pub on_file_done: Option<Arc<dyn Fn() + Send + Sync>>,
30 pub psr4: Option<Arc<crate::composer::Psr4Map>>,
32 stubs_loaded: std::sync::atomic::AtomicBool,
34 pub find_dead_code: bool,
36 pub php_version: Option<PhpVersion>,
39}
40
41impl ProjectAnalyzer {
42 pub fn new() -> Self {
43 Self {
44 codebase: Arc::new(Codebase::new()),
45 cache: None,
46 on_file_done: None,
47 psr4: None,
48 stubs_loaded: std::sync::atomic::AtomicBool::new(false),
49 find_dead_code: false,
50 php_version: None,
51 }
52 }
53
54 pub fn with_cache(cache_dir: &Path) -> Self {
56 Self {
57 codebase: Arc::new(Codebase::new()),
58 cache: Some(AnalysisCache::open(cache_dir)),
59 on_file_done: None,
60 psr4: None,
61 stubs_loaded: std::sync::atomic::AtomicBool::new(false),
62 find_dead_code: false,
63 php_version: None,
64 }
65 }
66
67 pub fn from_composer(
71 root: &Path,
72 ) -> Result<(Self, crate::composer::Psr4Map), crate::composer::ComposerError> {
73 let map = crate::composer::Psr4Map::from_composer(root)?;
74 let psr4 = Arc::new(map.clone());
75 let analyzer = Self {
76 codebase: Arc::new(Codebase::new()),
77 cache: None,
78 on_file_done: None,
79 psr4: Some(psr4),
80 stubs_loaded: std::sync::atomic::AtomicBool::new(false),
81 find_dead_code: false,
82 php_version: None,
83 };
84 Ok((analyzer, map))
85 }
86
87 pub fn with_php_version(mut self, version: PhpVersion) -> Self {
89 self.php_version = Some(version);
90 self
91 }
92
93 fn resolved_php_version(&self) -> PhpVersion {
96 self.php_version.unwrap_or(PhpVersion::LATEST)
97 }
98
99 pub fn codebase(&self) -> &Arc<Codebase> {
101 &self.codebase
102 }
103
104 pub fn load_stubs(&self) {
106 if !self
107 .stubs_loaded
108 .swap(true, std::sync::atomic::Ordering::SeqCst)
109 {
110 crate::stubs::load_stubs(&self.codebase);
111 }
112 }
113
114 pub fn analyze(&self, paths: &[PathBuf]) -> AnalysisResult {
116 let mut all_issues = Vec::new();
117 let mut parse_errors = Vec::new();
118
119 self.load_stubs();
121
122 let file_data: Vec<(Arc<str>, String)> = paths
124 .par_iter()
125 .filter_map(|path| match std::fs::read_to_string(path) {
126 Ok(src) => Some((Arc::from(path.to_string_lossy().as_ref()), src)),
127 Err(e) => {
128 eprintln!("Cannot read {}: {}", path.display(), e);
129 None
130 }
131 })
132 .collect();
133
134 if let Some(cache) = &self.cache {
136 let changed: Vec<String> = file_data
137 .par_iter()
138 .filter_map(|(f, src)| {
139 let h = hash_content(src);
140 if cache.get(f, &h).is_none() {
141 Some(f.to_string())
142 } else {
143 None
144 }
145 })
146 .collect();
147 if !changed.is_empty() {
148 cache.evict_with_dependents(&changed);
149 }
150 }
151
152 let pass1_results: Vec<(Vec<Issue>, Vec<Issue>)> = file_data
154 .par_iter()
155 .map(|(file, src)| {
156 use php_ast::ast::StmtKind;
157 let arena = bumpalo::Bump::new();
158 let result = php_rs_parser::parse(&arena, src);
159
160 let mut current_namespace: Option<String> = None;
162 let mut imports: std::collections::HashMap<String, String> =
163 std::collections::HashMap::new();
164 let mut file_ns_set = false;
165
166 let index_stmts =
167 |stmts: &[php_ast::ast::Stmt<'_, '_>],
168 ns: Option<&str>,
169 imports: &mut std::collections::HashMap<String, String>| {
170 for stmt in stmts.iter() {
171 match &stmt.kind {
172 StmtKind::Use(use_decl) => {
173 for item in use_decl.uses.iter() {
174 let full_name = crate::parser::name_to_string(&item.name)
175 .trim_start_matches('\\')
176 .to_string();
177 let alias = item.alias.unwrap_or_else(|| {
178 full_name.rsplit('\\').next().unwrap_or(&full_name)
179 });
180 imports.insert(alias.to_string(), full_name);
181 }
182 }
183 StmtKind::Class(decl) => {
184 if let Some(n) = decl.name {
185 let fqcn = match ns {
186 Some(ns) => format!("{}\\{}", ns, n),
187 None => n.to_string(),
188 };
189 self.codebase
190 .known_symbols
191 .insert(Arc::from(fqcn.as_str()));
192 }
193 }
194 StmtKind::Interface(decl) => {
195 let fqcn = match ns {
196 Some(ns) => format!("{}\\{}", ns, decl.name),
197 None => decl.name.to_string(),
198 };
199 self.codebase.known_symbols.insert(Arc::from(fqcn.as_str()));
200 }
201 StmtKind::Trait(decl) => {
202 let fqcn = match ns {
203 Some(ns) => format!("{}\\{}", ns, decl.name),
204 None => decl.name.to_string(),
205 };
206 self.codebase.known_symbols.insert(Arc::from(fqcn.as_str()));
207 }
208 StmtKind::Enum(decl) => {
209 let fqcn = match ns {
210 Some(ns) => format!("{}\\{}", ns, decl.name),
211 None => decl.name.to_string(),
212 };
213 self.codebase.known_symbols.insert(Arc::from(fqcn.as_str()));
214 }
215 StmtKind::Function(decl) => {
216 let fqn = match ns {
217 Some(ns) => format!("{}\\{}", ns, decl.name),
218 None => decl.name.to_string(),
219 };
220 self.codebase.known_symbols.insert(Arc::from(fqn.as_str()));
221 }
222 _ => {}
223 }
224 }
225 };
226
227 for stmt in result.program.stmts.iter() {
228 match &stmt.kind {
229 StmtKind::Namespace(ns) => {
230 current_namespace =
231 ns.name.as_ref().map(|n| crate::parser::name_to_string(n));
232 if !file_ns_set {
233 if let Some(ref ns_str) = current_namespace {
234 self.codebase
235 .file_namespaces
236 .insert(file.clone(), ns_str.clone());
237 file_ns_set = true;
238 }
239 }
240 if let php_ast::ast::NamespaceBody::Braced(inner_stmts) = &ns.body {
241 index_stmts(
242 inner_stmts,
243 current_namespace.as_deref(),
244 &mut imports,
245 );
246 }
247 }
248 _ => index_stmts(
249 std::slice::from_ref(stmt),
250 current_namespace.as_deref(),
251 &mut imports,
252 ),
253 }
254 }
255
256 if !imports.is_empty() {
257 self.codebase.file_imports.insert(file.clone(), imports);
258 }
259
260 let file_parse_errors: Vec<Issue> = result
262 .errors
263 .iter()
264 .map(|err| {
265 Issue::new(
266 mir_issues::IssueKind::ParseError {
267 message: err.to_string(),
268 },
269 mir_issues::Location {
270 file: file.clone(),
271 line: 1,
272 col_start: 0,
273 col_end: 0,
274 },
275 )
276 })
277 .collect();
278
279 let collector =
281 DefinitionCollector::new(&self.codebase, file.clone(), src, &result.source_map);
282 let issues = collector.collect(&result.program);
283
284 (file_parse_errors, issues)
285 })
286 .collect();
287
288 for (file_parse_errors, issues) in pass1_results {
289 parse_errors.extend(file_parse_errors);
290 all_issues.extend(issues);
291 }
292
293 all_issues.extend(parse_errors);
294
295 self.codebase.finalize();
297
298 if let Some(psr4) = &self.psr4 {
300 self.lazy_load_missing_classes(psr4.clone(), &mut all_issues);
301 }
302
303 if let Some(cache) = &self.cache {
305 let rev = build_reverse_deps(&self.codebase);
306 cache.set_reverse_deps(rev);
307 }
308
309 let analyzed_file_set: std::collections::HashSet<std::sync::Arc<str>> =
311 file_data.iter().map(|(f, _)| f.clone()).collect();
312 let class_issues =
313 crate::class::ClassAnalyzer::with_files(&self.codebase, analyzed_file_set, &file_data)
314 .analyze_all();
315 all_issues.extend(class_issues);
316
317 let pass2_results: Vec<(Vec<Issue>, Vec<crate::symbol::ResolvedSymbol>)> = file_data
319 .par_iter()
320 .map(|(file, src)| {
321 let driver = Pass2Driver::new(&self.codebase, self.resolved_php_version());
322 let result = if let Some(cache) = &self.cache {
323 let h = hash_content(src);
324 if let Some((cached_issues, ref_locs)) = cache.get(file, &h) {
325 self.codebase
326 .replay_reference_locations(file.clone(), &ref_locs);
327 (cached_issues, Vec::new())
328 } else {
329 let arena = bumpalo::Bump::new();
330 let parsed = php_rs_parser::parse(&arena, src);
331 let (issues, symbols) = driver.analyze_bodies(
332 &parsed.program,
333 file.clone(),
334 src,
335 &parsed.source_map,
336 );
337 let ref_locs = extract_reference_locations(&self.codebase, file);
338 cache.put(file, h, issues.clone(), ref_locs);
339 (issues, symbols)
340 }
341 } else {
342 let arena = bumpalo::Bump::new();
343 let parsed = php_rs_parser::parse(&arena, src);
344 driver.analyze_bodies(&parsed.program, file.clone(), src, &parsed.source_map)
345 };
346 if let Some(cb) = &self.on_file_done {
347 cb();
348 }
349 result
350 })
351 .collect();
352
353 let mut all_symbols = Vec::new();
354 for (issues, symbols) in pass2_results {
355 all_issues.extend(issues);
356 all_symbols.extend(symbols);
357 }
358
359 if let Some(cache) = &self.cache {
361 cache.flush();
362 }
363
364 self.codebase.compact_reference_index();
366
367 if self.find_dead_code {
369 let dead_code_issues =
370 crate::dead_code::DeadCodeAnalyzer::new(&self.codebase).analyze();
371 all_issues.extend(dead_code_issues);
372 }
373
374 AnalysisResult::build(all_issues, std::collections::HashMap::new(), all_symbols)
375 }
376
377 fn lazy_load_missing_classes(
378 &self,
379 psr4: Arc<crate::composer::Psr4Map>,
380 all_issues: &mut Vec<Issue>,
381 ) {
382 use std::collections::HashSet;
383
384 let max_depth = 10;
385 let mut loaded: HashSet<String> = HashSet::new();
386
387 for _ in 0..max_depth {
388 let mut to_load: Vec<(String, PathBuf)> = Vec::new();
389
390 for entry in self.codebase.classes.iter() {
391 let cls = entry.value();
392
393 if let Some(parent) = &cls.parent {
394 let fqcn = parent.as_ref();
395 if !self.codebase.classes.contains_key(fqcn) && !loaded.contains(fqcn) {
396 if let Some(path) = psr4.resolve(fqcn) {
397 to_load.push((fqcn.to_string(), path));
398 }
399 }
400 }
401
402 for iface in &cls.interfaces {
403 let fqcn = iface.as_ref();
404 if !self.codebase.classes.contains_key(fqcn)
405 && !self.codebase.interfaces.contains_key(fqcn)
406 && !loaded.contains(fqcn)
407 {
408 if let Some(path) = psr4.resolve(fqcn) {
409 to_load.push((fqcn.to_string(), path));
410 }
411 }
412 }
413 }
414
415 if to_load.is_empty() {
416 break;
417 }
418
419 for (fqcn, path) in to_load {
420 loaded.insert(fqcn);
421 if let Ok(src) = std::fs::read_to_string(&path) {
422 let file: Arc<str> = Arc::from(path.to_string_lossy().as_ref());
423 let arena = bumpalo::Bump::new();
424 let result = php_rs_parser::parse(&arena, &src);
425 let collector = crate::collector::DefinitionCollector::new(
426 &self.codebase,
427 file,
428 &src,
429 &result.source_map,
430 );
431 let issues = collector.collect(&result.program);
432 all_issues.extend(issues);
433 }
434 }
435
436 self.codebase.invalidate_finalization();
437 self.codebase.finalize();
438 }
439 }
440
441 pub fn re_analyze_file(&self, file_path: &str, new_content: &str) -> AnalysisResult {
450 if let Some(cache) = &self.cache {
452 let h = hash_content(new_content);
453 if let Some((issues, ref_locs)) = cache.get(file_path, &h) {
454 let file: Arc<str> = Arc::from(file_path);
455 self.codebase.replay_reference_locations(file, &ref_locs);
456 return AnalysisResult::build(issues, HashMap::new(), Vec::new());
457 }
458 }
459
460 let structural_snapshot = self.codebase.file_structural_snapshot(file_path);
461 self.codebase.remove_file_definitions(file_path);
462
463 let file: Arc<str> = Arc::from(file_path);
464 let arena = bumpalo::Bump::new();
465 let parsed = php_rs_parser::parse(&arena, new_content);
466
467 let mut all_issues = Vec::new();
468
469 for err in &parsed.errors {
470 all_issues.push(Issue::new(
471 mir_issues::IssueKind::ParseError {
472 message: err.to_string(),
473 },
474 mir_issues::Location {
475 file: file.clone(),
476 line: 1,
477 col_start: 0,
478 col_end: 0,
479 },
480 ));
481 }
482
483 let collector = DefinitionCollector::new(
484 &self.codebase,
485 file.clone(),
486 new_content,
487 &parsed.source_map,
488 );
489 all_issues.extend(collector.collect(&parsed.program));
490
491 if self
492 .codebase
493 .structural_unchanged_after_pass1(file_path, &structural_snapshot)
494 {
495 self.codebase
496 .restore_all_parents(file_path, &structural_snapshot);
497 } else {
498 self.codebase.finalize();
499 }
500
501 let driver = Pass2Driver::new(&self.codebase, self.resolved_php_version());
502 let (body_issues, symbols) = driver.analyze_bodies(
503 &parsed.program,
504 file.clone(),
505 new_content,
506 &parsed.source_map,
507 );
508 all_issues.extend(body_issues);
509
510 if let Some(cache) = &self.cache {
511 let h = hash_content(new_content);
512 cache.evict_with_dependents(&[file_path.to_string()]);
513 let ref_locs = extract_reference_locations(&self.codebase, &file);
514 cache.put(file_path, h, all_issues.clone(), ref_locs);
515 }
516
517 AnalysisResult::build(all_issues, HashMap::new(), symbols)
518 }
519
520 pub fn analyze_source(source: &str) -> AnalysisResult {
523 use crate::collector::DefinitionCollector;
524 let analyzer = ProjectAnalyzer::new();
525 analyzer.load_stubs();
526 let file: Arc<str> = Arc::from("<source>");
527 let arena = bumpalo::Bump::new();
528 let result = php_rs_parser::parse(&arena, source);
529 let mut all_issues = Vec::new();
530 let collector =
531 DefinitionCollector::new(&analyzer.codebase, file.clone(), source, &result.source_map);
532 all_issues.extend(collector.collect(&result.program));
533 analyzer.codebase.finalize();
534 let mut type_envs = std::collections::HashMap::new();
535 let mut all_symbols = Vec::new();
536 let driver = Pass2Driver::new(&analyzer.codebase, analyzer.resolved_php_version());
537 all_issues.extend(driver.analyze_bodies_typed(
538 &result.program,
539 file.clone(),
540 source,
541 &result.source_map,
542 &mut type_envs,
543 &mut all_symbols,
544 ));
545 AnalysisResult::build(all_issues, type_envs, all_symbols)
546 }
547
548 pub fn discover_files(root: &Path) -> Vec<PathBuf> {
550 if root.is_file() {
551 return vec![root.to_path_buf()];
552 }
553 let mut files = Vec::new();
554 collect_php_files(root, &mut files);
555 files
556 }
557
558 pub fn collect_types_only(&self, paths: &[PathBuf]) {
561 paths.par_iter().for_each(|path| {
562 let Ok(src) = std::fs::read_to_string(path) else {
563 return;
564 };
565 let file: Arc<str> = Arc::from(path.to_string_lossy().as_ref());
566 let arena = bumpalo::Bump::new();
567 let result = php_rs_parser::parse(&arena, &src);
568 let collector =
569 DefinitionCollector::new(&self.codebase, file, &src, &result.source_map);
570 let _ = collector.collect(&result.program);
571 });
572 }
573}
574
575impl Default for ProjectAnalyzer {
576 fn default() -> Self {
577 Self::new()
578 }
579}
580
581pub(crate) fn collect_php_files(dir: &Path, out: &mut Vec<PathBuf>) {
584 if let Ok(entries) = std::fs::read_dir(dir) {
585 for entry in entries.flatten() {
586 if entry.file_type().map(|ft| ft.is_symlink()).unwrap_or(false) {
587 continue;
588 }
589 let path = entry.path();
590 if path.is_dir() {
591 let name = path.file_name().and_then(|n| n.to_str()).unwrap_or("");
592 if matches!(
593 name,
594 "vendor" | ".git" | "node_modules" | ".cache" | ".pnpm-store"
595 ) {
596 continue;
597 }
598 collect_php_files(&path, out);
599 } else if path.extension().and_then(|e| e.to_str()) == Some("php") {
600 out.push(path);
601 }
602 }
603 }
604}
605
606fn build_reverse_deps(codebase: &Codebase) -> HashMap<String, HashSet<String>> {
611 let mut reverse: HashMap<String, HashSet<String>> = HashMap::new();
612
613 let mut add_edge = |symbol: &str, dependent_file: &str| {
614 if let Some(defining_file) = codebase.symbol_to_file.get(symbol) {
615 let def = defining_file.as_ref().to_string();
616 if def != dependent_file {
617 reverse
618 .entry(def)
619 .or_default()
620 .insert(dependent_file.to_string());
621 }
622 }
623 };
624
625 for entry in codebase.file_imports.iter() {
626 let file = entry.key().as_ref().to_string();
627 for fqcn in entry.value().values() {
628 add_edge(fqcn, &file);
629 }
630 }
631
632 for entry in codebase.classes.iter() {
633 let defining = {
634 let fqcn = entry.key().as_ref();
635 codebase
636 .symbol_to_file
637 .get(fqcn)
638 .map(|f| f.as_ref().to_string())
639 };
640 let Some(file) = defining else { continue };
641
642 let cls = entry.value();
643 if let Some(ref parent) = cls.parent {
644 add_edge(parent.as_ref(), &file);
645 }
646 for iface in &cls.interfaces {
647 add_edge(iface.as_ref(), &file);
648 }
649 for tr in &cls.traits {
650 add_edge(tr.as_ref(), &file);
651 }
652 }
653
654 reverse
655}
656
657fn extract_reference_locations(codebase: &Codebase, file: &Arc<str>) -> Vec<(String, u32, u32)> {
660 codebase
661 .extract_file_reference_locations(file.as_ref())
662 .into_iter()
663 .map(|(sym, start, end)| (sym.to_string(), start, end))
664 .collect()
665}
666
667pub struct AnalysisResult {
672 pub issues: Vec<Issue>,
673 pub type_envs: std::collections::HashMap<crate::type_env::ScopeId, crate::type_env::TypeEnv>,
674 pub symbols: Vec<crate::symbol::ResolvedSymbol>,
676 symbols_by_file: HashMap<Arc<str>, std::ops::Range<usize>>,
680}
681
682impl AnalysisResult {
683 fn build(
684 issues: Vec<Issue>,
685 type_envs: std::collections::HashMap<crate::type_env::ScopeId, crate::type_env::TypeEnv>,
686 mut symbols: Vec<crate::symbol::ResolvedSymbol>,
687 ) -> Self {
688 symbols.sort_unstable_by(|a, b| a.file.as_ref().cmp(b.file.as_ref()));
689 let mut symbols_by_file: HashMap<Arc<str>, std::ops::Range<usize>> = HashMap::new();
690 let mut i = 0;
691 while i < symbols.len() {
692 let file = Arc::clone(&symbols[i].file);
693 let start = i;
694 while i < symbols.len() && symbols[i].file == file {
695 i += 1;
696 }
697 symbols_by_file.insert(file, start..i);
698 }
699 Self {
700 issues,
701 type_envs,
702 symbols,
703 symbols_by_file,
704 }
705 }
706}
707
708impl AnalysisResult {
709 pub fn error_count(&self) -> usize {
710 self.issues
711 .iter()
712 .filter(|i| i.severity == mir_issues::Severity::Error)
713 .count()
714 }
715
716 pub fn warning_count(&self) -> usize {
717 self.issues
718 .iter()
719 .filter(|i| i.severity == mir_issues::Severity::Warning)
720 .count()
721 }
722
723 pub fn issues_by_file(&self) -> HashMap<std::sync::Arc<str>, Vec<&Issue>> {
725 let mut map: HashMap<std::sync::Arc<str>, Vec<&Issue>> = HashMap::new();
726 for issue in &self.issues {
727 map.entry(issue.location.file.clone())
728 .or_default()
729 .push(issue);
730 }
731 map
732 }
733
734 pub fn symbol_at(
737 &self,
738 file: &str,
739 byte_offset: u32,
740 ) -> Option<&crate::symbol::ResolvedSymbol> {
741 let range = self.symbols_by_file.get(file)?;
742 self.symbols[range.clone()]
743 .iter()
744 .filter(|s| s.span.start <= byte_offset && byte_offset < s.span.end)
745 .min_by_key(|s| s.span.end - s.span.start)
746 }
747}