1use std::path::{Path, PathBuf};
3use std::sync::Arc;
4
5use rayon::prelude::*;
6
7use std::collections::{HashMap, HashSet};
8
9use crate::cache::{hash_content, AnalysisCache};
10use mir_codebase::Codebase;
11use mir_issues::Issue;
12use mir_types::Union;
13
14use crate::collector::DefinitionCollector;
15
16pub struct ProjectAnalyzer {
21 pub codebase: Arc<Codebase>,
22 pub cache: Option<AnalysisCache>,
24 pub on_file_done: Option<Arc<dyn Fn() + Send + Sync>>,
26 pub psr4: Option<Arc<crate::composer::Psr4Map>>,
28 stubs_loaded: std::sync::atomic::AtomicBool,
30 pub find_dead_code: bool,
32}
33
34impl ProjectAnalyzer {
35 pub fn new() -> Self {
36 Self {
37 codebase: Arc::new(Codebase::new()),
38 cache: None,
39 on_file_done: None,
40 psr4: None,
41 stubs_loaded: std::sync::atomic::AtomicBool::new(false),
42 find_dead_code: false,
43 }
44 }
45
46 pub fn with_cache(cache_dir: &Path) -> Self {
48 Self {
49 codebase: Arc::new(Codebase::new()),
50 cache: Some(AnalysisCache::open(cache_dir)),
51 on_file_done: None,
52 psr4: None,
53 stubs_loaded: std::sync::atomic::AtomicBool::new(false),
54 find_dead_code: false,
55 }
56 }
57
58 pub fn from_composer(
62 root: &Path,
63 ) -> Result<(Self, crate::composer::Psr4Map), crate::composer::ComposerError> {
64 let map = crate::composer::Psr4Map::from_composer(root)?;
65 let psr4 = Arc::new(map.clone());
66 let analyzer = Self {
67 codebase: Arc::new(Codebase::new()),
68 cache: None,
69 on_file_done: None,
70 psr4: Some(psr4),
71 stubs_loaded: std::sync::atomic::AtomicBool::new(false),
72 find_dead_code: false,
73 };
74 Ok((analyzer, map))
75 }
76
77 pub fn codebase(&self) -> &Arc<Codebase> {
79 &self.codebase
80 }
81
82 pub fn load_stubs(&self) {
84 if !self
85 .stubs_loaded
86 .swap(true, std::sync::atomic::Ordering::SeqCst)
87 {
88 crate::stubs::load_stubs(&self.codebase);
89 }
90 }
91
92 pub fn analyze(&self, paths: &[PathBuf]) -> AnalysisResult {
94 let mut all_issues = Vec::new();
95 let mut parse_errors = Vec::new();
96
97 self.load_stubs();
99
100 if let Some(cache) = &self.cache {
103 let changed: Vec<String> = paths
104 .iter()
105 .filter_map(|p| {
106 let path_str = p.to_string_lossy().into_owned();
107 let content = std::fs::read_to_string(p).ok()?;
108 let h = hash_content(&content);
109 if cache.get(&path_str, &h).is_none() {
110 Some(path_str)
111 } else {
112 None
113 }
114 })
115 .collect();
116 if !changed.is_empty() {
117 cache.evict_with_dependents(&changed);
118 }
119 }
120
121 let file_data: Vec<(Arc<str>, String)> = paths
123 .par_iter()
124 .filter_map(|path| match std::fs::read_to_string(path) {
125 Ok(src) => Some((Arc::from(path.to_string_lossy().as_ref()), src)),
126 Err(e) => {
127 eprintln!("Cannot read {}: {}", path.display(), e);
128 None
129 }
130 })
131 .collect();
132
133 let pass1_results: Vec<(Vec<Issue>, Vec<Issue>)> = file_data
138 .par_iter()
139 .map(|(file, src)| {
140 use php_ast::ast::StmtKind;
141 let arena = bumpalo::Bump::new();
142 let result = php_rs_parser::parse(&arena, src);
143
144 let mut current_namespace: Option<String> = None;
146 let mut imports: std::collections::HashMap<String, String> =
147 std::collections::HashMap::new();
148 let mut file_ns_set = false;
149
150 let index_stmts =
152 |stmts: &[php_ast::ast::Stmt<'_, '_>],
153 ns: Option<&str>,
154 imports: &mut std::collections::HashMap<String, String>| {
155 for stmt in stmts.iter() {
156 match &stmt.kind {
157 StmtKind::Use(use_decl) => {
158 for item in use_decl.uses.iter() {
159 let full_name = crate::parser::name_to_string(&item.name);
160 let alias = item.alias.unwrap_or_else(|| {
161 full_name.rsplit('\\').next().unwrap_or(&full_name)
162 });
163 imports.insert(alias.to_string(), full_name);
164 }
165 }
166 StmtKind::Class(decl) => {
167 if let Some(n) = decl.name {
168 let fqcn = match ns {
169 Some(ns) => format!("{}\\{}", ns, n),
170 None => n.to_string(),
171 };
172 self.codebase
173 .known_symbols
174 .insert(Arc::from(fqcn.as_str()));
175 }
176 }
177 StmtKind::Interface(decl) => {
178 let fqcn = match ns {
179 Some(ns) => format!("{}\\{}", ns, decl.name),
180 None => decl.name.to_string(),
181 };
182 self.codebase.known_symbols.insert(Arc::from(fqcn.as_str()));
183 }
184 StmtKind::Trait(decl) => {
185 let fqcn = match ns {
186 Some(ns) => format!("{}\\{}", ns, decl.name),
187 None => decl.name.to_string(),
188 };
189 self.codebase.known_symbols.insert(Arc::from(fqcn.as_str()));
190 }
191 StmtKind::Enum(decl) => {
192 let fqcn = match ns {
193 Some(ns) => format!("{}\\{}", ns, decl.name),
194 None => decl.name.to_string(),
195 };
196 self.codebase.known_symbols.insert(Arc::from(fqcn.as_str()));
197 }
198 StmtKind::Function(decl) => {
199 let fqn = match ns {
200 Some(ns) => format!("{}\\{}", ns, decl.name),
201 None => decl.name.to_string(),
202 };
203 self.codebase.known_symbols.insert(Arc::from(fqn.as_str()));
204 }
205 _ => {}
206 }
207 }
208 };
209
210 for stmt in result.program.stmts.iter() {
211 match &stmt.kind {
212 StmtKind::Namespace(ns) => {
213 current_namespace =
214 ns.name.as_ref().map(|n| crate::parser::name_to_string(n));
215 if !file_ns_set {
216 if let Some(ref ns_str) = current_namespace {
217 self.codebase
218 .file_namespaces
219 .insert(file.clone(), ns_str.clone());
220 file_ns_set = true;
221 }
222 }
223 if let php_ast::ast::NamespaceBody::Braced(inner_stmts) = &ns.body {
225 index_stmts(
226 inner_stmts,
227 current_namespace.as_deref(),
228 &mut imports,
229 );
230 }
231 }
232 _ => index_stmts(
233 std::slice::from_ref(stmt),
234 current_namespace.as_deref(),
235 &mut imports,
236 ),
237 }
238 }
239
240 if !imports.is_empty() {
241 self.codebase.file_imports.insert(file.clone(), imports);
242 }
243
244 let file_parse_errors: Vec<Issue> = result
246 .errors
247 .iter()
248 .map(|err| {
249 Issue::new(
250 mir_issues::IssueKind::ParseError {
251 message: err.to_string(),
252 },
253 mir_issues::Location {
254 file: file.clone(),
255 line: 1,
256 col_start: 0,
257 col_end: 0,
258 },
259 )
260 })
261 .collect();
262
263 let collector =
265 DefinitionCollector::new(&self.codebase, file.clone(), src, &result.source_map);
266 let issues = collector.collect(&result.program);
267
268 (file_parse_errors, issues)
269 })
270 .collect();
271
272 for (file_parse_errors, issues) in pass1_results {
273 parse_errors.extend(file_parse_errors);
274 all_issues.extend(issues);
275 }
276
277 all_issues.extend(parse_errors);
278
279 self.codebase.finalize();
281
282 if let Some(psr4) = &self.psr4 {
284 self.lazy_load_missing_classes(psr4.clone(), &mut all_issues);
285 }
286
287 if let Some(cache) = &self.cache {
289 let rev = build_reverse_deps(&self.codebase);
290 cache.set_reverse_deps(rev);
291 }
292
293 let analyzed_file_set: std::collections::HashSet<std::sync::Arc<str>> =
295 file_data.iter().map(|(f, _)| f.clone()).collect();
296 let class_issues =
297 crate::class::ClassAnalyzer::with_files(&self.codebase, analyzed_file_set, &file_data)
298 .analyze_all();
299 all_issues.extend(class_issues);
300
301 let pass2_results: Vec<(Vec<Issue>, Vec<crate::symbol::ResolvedSymbol>)> = file_data
307 .par_iter()
308 .map(|(file, src)| {
309 let result = if let Some(cache) = &self.cache {
311 let h = hash_content(src);
312 if let Some((cached_issues, ref_locs)) = cache.get(file, &h) {
313 self.codebase
316 .replay_reference_locations(file.clone(), &ref_locs);
317 (cached_issues, Vec::new())
318 } else {
319 let arena = bumpalo::Bump::new();
321 let parsed = php_rs_parser::parse(&arena, src);
322 let (issues, symbols) = self.analyze_bodies(
323 &parsed.program,
324 file.clone(),
325 src,
326 &parsed.source_map,
327 );
328 let ref_locs = extract_reference_locations(&self.codebase, file);
329 cache.put(file, h, issues.clone(), ref_locs);
330 (issues, symbols)
331 }
332 } else {
333 let arena = bumpalo::Bump::new();
334 let parsed = php_rs_parser::parse(&arena, src);
335 self.analyze_bodies(&parsed.program, file.clone(), src, &parsed.source_map)
336 };
337 if let Some(cb) = &self.on_file_done {
338 cb();
339 }
340 result
341 })
342 .collect();
343
344 let mut all_symbols = Vec::new();
345 for (issues, symbols) in pass2_results {
346 all_issues.extend(issues);
347 all_symbols.extend(symbols);
348 }
349
350 if let Some(cache) = &self.cache {
352 cache.flush();
353 }
354
355 self.codebase.compact_reference_index();
359
360 if self.find_dead_code {
362 let dead_code_issues =
363 crate::dead_code::DeadCodeAnalyzer::new(&self.codebase).analyze();
364 all_issues.extend(dead_code_issues);
365 }
366
367 AnalysisResult {
368 issues: all_issues,
369 type_envs: std::collections::HashMap::new(),
370 symbols: all_symbols,
371 }
372 }
373
374 fn lazy_load_missing_classes(
383 &self,
384 psr4: Arc<crate::composer::Psr4Map>,
385 all_issues: &mut Vec<Issue>,
386 ) {
387 use std::collections::HashSet;
388
389 let max_depth = 10; let mut loaded: HashSet<String> = HashSet::new();
391
392 for _ in 0..max_depth {
393 let mut to_load: Vec<(String, PathBuf)> = Vec::new();
395
396 for entry in self.codebase.classes.iter() {
397 let cls = entry.value();
398
399 if let Some(parent) = &cls.parent {
401 let fqcn = parent.as_ref();
402 if !self.codebase.classes.contains_key(fqcn) && !loaded.contains(fqcn) {
403 if let Some(path) = psr4.resolve(fqcn) {
404 to_load.push((fqcn.to_string(), path));
405 }
406 }
407 }
408
409 for iface in &cls.interfaces {
411 let fqcn = iface.as_ref();
412 if !self.codebase.classes.contains_key(fqcn)
413 && !self.codebase.interfaces.contains_key(fqcn)
414 && !loaded.contains(fqcn)
415 {
416 if let Some(path) = psr4.resolve(fqcn) {
417 to_load.push((fqcn.to_string(), path));
418 }
419 }
420 }
421 }
422
423 if to_load.is_empty() {
424 break;
425 }
426
427 for (fqcn, path) in to_load {
429 loaded.insert(fqcn);
430 if let Ok(src) = std::fs::read_to_string(&path) {
431 let file: Arc<str> = Arc::from(path.to_string_lossy().as_ref());
432 let arena = bumpalo::Bump::new();
433 let result = php_rs_parser::parse(&arena, &src);
434 let collector = crate::collector::DefinitionCollector::new(
435 &self.codebase,
436 file,
437 &src,
438 &result.source_map,
439 );
440 let issues = collector.collect(&result.program);
441 all_issues.extend(issues);
442 }
443 }
444
445 self.codebase.invalidate_finalization();
448 self.codebase.finalize();
449 }
450 }
451
452 pub fn re_analyze_file(&self, file_path: &str, new_content: &str) -> AnalysisResult {
461 if let Some(cache) = &self.cache {
463 let h = hash_content(new_content);
464 if let Some((issues, ref_locs)) = cache.get(file_path, &h) {
465 let file: Arc<str> = Arc::from(file_path);
466 self.codebase.replay_reference_locations(file, &ref_locs);
467 return AnalysisResult {
468 issues,
469 type_envs: HashMap::new(),
470 symbols: Default::default(),
471 };
472 }
473 }
474
475 let structural_snapshot = self.codebase.file_structural_snapshot(file_path);
478
479 self.codebase.remove_file_definitions(file_path);
481
482 let file: Arc<str> = Arc::from(file_path);
484 let arena = bumpalo::Bump::new();
485 let parsed = php_rs_parser::parse(&arena, new_content);
486
487 let mut all_issues = Vec::new();
488
489 for err in &parsed.errors {
491 all_issues.push(Issue::new(
492 mir_issues::IssueKind::ParseError {
493 message: err.to_string(),
494 },
495 mir_issues::Location {
496 file: file.clone(),
497 line: 1,
498 col_start: 0,
499 col_end: 0,
500 },
501 ));
502 }
503
504 let collector = DefinitionCollector::new(
505 &self.codebase,
506 file.clone(),
507 new_content,
508 &parsed.source_map,
509 );
510 all_issues.extend(collector.collect(&parsed.program));
511
512 if self
519 .codebase
520 .structural_unchanged_after_pass1(file_path, &structural_snapshot)
521 {
522 self.codebase
523 .restore_all_parents(file_path, &structural_snapshot);
524 } else {
525 self.codebase.finalize();
526 }
527
528 let (body_issues, symbols) = self.analyze_bodies(
530 &parsed.program,
531 file.clone(),
532 new_content,
533 &parsed.source_map,
534 );
535 all_issues.extend(body_issues);
536
537 if let Some(cache) = &self.cache {
539 let h = hash_content(new_content);
540 cache.evict_with_dependents(&[file_path.to_string()]);
541 let ref_locs = extract_reference_locations(&self.codebase, &file);
542 cache.put(file_path, h, all_issues.clone(), ref_locs);
543 }
544
545 AnalysisResult {
546 issues: all_issues,
547 type_envs: HashMap::new(),
548 symbols,
549 }
550 }
551
552 pub fn analyze_source(source: &str) -> AnalysisResult {
555 use crate::collector::DefinitionCollector;
556 let analyzer = ProjectAnalyzer::new();
557 analyzer.load_stubs();
558 let file: Arc<str> = Arc::from("<source>");
559 let arena = bumpalo::Bump::new();
560 let result = php_rs_parser::parse(&arena, source);
561 let mut all_issues = Vec::new();
562 let collector =
563 DefinitionCollector::new(&analyzer.codebase, file.clone(), source, &result.source_map);
564 all_issues.extend(collector.collect(&result.program));
565 analyzer.codebase.finalize();
566 let mut type_envs = std::collections::HashMap::new();
567 let mut all_symbols = Vec::new();
568 all_issues.extend(analyzer.analyze_bodies_typed(
569 &result.program,
570 file.clone(),
571 source,
572 &result.source_map,
573 &mut type_envs,
574 &mut all_symbols,
575 ));
576 AnalysisResult {
577 issues: all_issues,
578 type_envs,
579 symbols: all_symbols,
580 }
581 }
582
583 fn analyze_bodies<'arena, 'src>(
586 &self,
587 program: &php_ast::ast::Program<'arena, 'src>,
588 file: Arc<str>,
589 source: &str,
590 source_map: &php_rs_parser::source_map::SourceMap,
591 ) -> (Vec<mir_issues::Issue>, Vec<crate::symbol::ResolvedSymbol>) {
592 use php_ast::ast::StmtKind;
593
594 let mut all_issues = Vec::new();
595 let mut all_symbols = Vec::new();
596
597 for stmt in program.stmts.iter() {
598 match &stmt.kind {
599 StmtKind::Function(decl) => {
600 self.analyze_fn_decl(
601 decl,
602 &file,
603 source,
604 source_map,
605 &mut all_issues,
606 &mut all_symbols,
607 );
608 }
609 StmtKind::Class(decl) => {
610 self.analyze_class_decl(
611 decl,
612 &file,
613 source,
614 source_map,
615 &mut all_issues,
616 &mut all_symbols,
617 );
618 }
619 StmtKind::Enum(decl) => {
620 self.analyze_enum_decl(decl, &file, source, source_map, &mut all_issues);
621 }
622 StmtKind::Interface(decl) => {
623 self.analyze_interface_decl(decl, &file, source, source_map, &mut all_issues);
624 }
625 StmtKind::Namespace(ns) => {
626 if let php_ast::ast::NamespaceBody::Braced(stmts) = &ns.body {
627 for inner in stmts.iter() {
628 match &inner.kind {
629 StmtKind::Function(decl) => {
630 self.analyze_fn_decl(
631 decl,
632 &file,
633 source,
634 source_map,
635 &mut all_issues,
636 &mut all_symbols,
637 );
638 }
639 StmtKind::Class(decl) => {
640 self.analyze_class_decl(
641 decl,
642 &file,
643 source,
644 source_map,
645 &mut all_issues,
646 &mut all_symbols,
647 );
648 }
649 StmtKind::Enum(decl) => {
650 self.analyze_enum_decl(
651 decl,
652 &file,
653 source,
654 source_map,
655 &mut all_issues,
656 );
657 }
658 StmtKind::Interface(decl) => {
659 self.analyze_interface_decl(
660 decl,
661 &file,
662 source,
663 source_map,
664 &mut all_issues,
665 );
666 }
667 _ => {}
668 }
669 }
670 }
671 }
672 _ => {}
673 }
674 }
675
676 (all_issues, all_symbols)
677 }
678
679 #[allow(clippy::too_many_arguments)]
681 fn analyze_fn_decl<'arena, 'src>(
682 &self,
683 decl: &php_ast::ast::FunctionDecl<'arena, 'src>,
684 file: &Arc<str>,
685 source: &str,
686 source_map: &php_rs_parser::source_map::SourceMap,
687 all_issues: &mut Vec<mir_issues::Issue>,
688 all_symbols: &mut Vec<crate::symbol::ResolvedSymbol>,
689 ) {
690 let fn_name = decl.name;
691 let body = &decl.body;
692 for param in decl.params.iter() {
694 if let Some(hint) = ¶m.type_hint {
695 check_type_hint_classes(hint, &self.codebase, file, source, source_map, all_issues);
696 }
697 }
698 if let Some(hint) = &decl.return_type {
699 check_type_hint_classes(hint, &self.codebase, file, source, source_map, all_issues);
700 }
701 use crate::context::Context;
702 use crate::stmt::StatementsAnalyzer;
703 use mir_issues::IssueBuffer;
704
705 let resolved_fn = self.codebase.resolve_class_name(file.as_ref(), fn_name);
707 let func_opt: Option<mir_codebase::storage::FunctionStorage> = self
708 .codebase
709 .functions
710 .get(resolved_fn.as_str())
711 .map(|r| r.clone())
712 .or_else(|| self.codebase.functions.get(fn_name).map(|r| r.clone()))
713 .or_else(|| {
714 self.codebase
715 .functions
716 .iter()
717 .find(|e| e.short_name.as_ref() == fn_name)
718 .map(|e| e.value().clone())
719 });
720
721 let fqn = func_opt.as_ref().map(|f| f.fqn.clone());
722 let (params, return_ty): (Vec<mir_codebase::FnParam>, _) = match &func_opt {
727 Some(f)
728 if f.params.len() == decl.params.len()
729 && f.params
730 .iter()
731 .zip(decl.params.iter())
732 .all(|(cp, ap)| cp.name.as_ref() == ap.name) =>
733 {
734 (f.params.clone(), f.return_type.clone())
735 }
736 _ => {
737 let ast_params = decl
738 .params
739 .iter()
740 .map(|p| mir_codebase::FnParam {
741 name: Arc::from(p.name),
742 ty: None,
743 default: p.default.as_ref().map(|_| mir_types::Union::mixed()),
744 is_variadic: p.variadic,
745 is_byref: p.by_ref,
746 is_optional: p.default.is_some() || p.variadic,
747 })
748 .collect();
749 (ast_params, None)
750 }
751 };
752
753 let mut ctx = Context::for_function(¶ms, return_ty, None, None, None, false, true);
754 let mut buf = IssueBuffer::new();
755 let mut sa = StatementsAnalyzer::new(
756 &self.codebase,
757 file.clone(),
758 source,
759 source_map,
760 &mut buf,
761 all_symbols,
762 );
763 sa.analyze_stmts(body, &mut ctx);
764 let inferred = merge_return_types(&sa.return_types);
765 drop(sa);
766
767 emit_unused_params(¶ms, &ctx, "", file, all_issues);
768 emit_unused_variables(&ctx, file, all_issues);
769 all_issues.extend(buf.into_issues());
770
771 if let Some(fqn) = fqn {
772 if let Some(mut func) = self.codebase.functions.get_mut(fqn.as_ref()) {
773 func.inferred_return_type = Some(inferred);
774 }
775 }
776 }
777
778 #[allow(clippy::too_many_arguments)]
780 fn analyze_class_decl<'arena, 'src>(
781 &self,
782 decl: &php_ast::ast::ClassDecl<'arena, 'src>,
783 file: &Arc<str>,
784 source: &str,
785 source_map: &php_rs_parser::source_map::SourceMap,
786 all_issues: &mut Vec<mir_issues::Issue>,
787 all_symbols: &mut Vec<crate::symbol::ResolvedSymbol>,
788 ) {
789 use crate::context::Context;
790 use crate::stmt::StatementsAnalyzer;
791 use mir_issues::IssueBuffer;
792
793 let class_name = decl.name.unwrap_or("<anonymous>");
794 let resolved = self.codebase.resolve_class_name(file.as_ref(), class_name);
797 let fqcn: &str = &resolved;
798 let parent_fqcn = self
799 .codebase
800 .classes
801 .get(fqcn)
802 .and_then(|c| c.parent.clone());
803
804 if let Some(parent) = &decl.extends {
805 check_name_class(parent, &self.codebase, file, source, source_map, all_issues);
806 }
807 for iface in decl.implements.iter() {
808 check_name_class(iface, &self.codebase, file, source, source_map, all_issues);
809 }
810
811 for member in decl.members.iter() {
812 let php_ast::ast::ClassMemberKind::Method(method) = &member.kind else {
813 continue;
814 };
815
816 for param in method.params.iter() {
818 if let Some(hint) = ¶m.type_hint {
819 check_type_hint_classes(
820 hint,
821 &self.codebase,
822 file,
823 source,
824 source_map,
825 all_issues,
826 );
827 }
828 }
829 if let Some(hint) = &method.return_type {
830 check_type_hint_classes(hint, &self.codebase, file, source, source_map, all_issues);
831 }
832
833 let Some(body) = &method.body else { continue };
834
835 let (params, return_ty) = self
836 .codebase
837 .get_method(fqcn, method.name)
838 .as_deref()
839 .map(|m| (m.params.clone(), m.return_type.clone()))
840 .unwrap_or_default();
841
842 let is_ctor = method.name == "__construct";
843 let mut ctx = Context::for_method(
844 ¶ms,
845 return_ty,
846 Some(Arc::from(fqcn)),
847 parent_fqcn.clone(),
848 Some(Arc::from(fqcn)),
849 false,
850 is_ctor,
851 method.is_static,
852 );
853
854 let mut buf = IssueBuffer::new();
855 let mut sa = StatementsAnalyzer::new(
856 &self.codebase,
857 file.clone(),
858 source,
859 source_map,
860 &mut buf,
861 all_symbols,
862 );
863 sa.analyze_stmts(body, &mut ctx);
864 let inferred = merge_return_types(&sa.return_types);
865 drop(sa);
866
867 emit_unused_params(¶ms, &ctx, method.name, file, all_issues);
868 emit_unused_variables(&ctx, file, all_issues);
869 all_issues.extend(buf.into_issues());
870
871 if let Some(mut cls) = self.codebase.classes.get_mut(fqcn) {
872 if let Some(m) = cls.own_methods.get_mut(method.name) {
873 Arc::make_mut(m).inferred_return_type = Some(inferred);
874 }
875 }
876 }
877 }
878
879 #[allow(clippy::too_many_arguments)]
881 fn analyze_bodies_typed<'arena, 'src>(
882 &self,
883 program: &php_ast::ast::Program<'arena, 'src>,
884 file: Arc<str>,
885 source: &str,
886 source_map: &php_rs_parser::source_map::SourceMap,
887 type_envs: &mut std::collections::HashMap<
888 crate::type_env::ScopeId,
889 crate::type_env::TypeEnv,
890 >,
891 all_symbols: &mut Vec<crate::symbol::ResolvedSymbol>,
892 ) -> Vec<mir_issues::Issue> {
893 use php_ast::ast::StmtKind;
894 let mut all_issues = Vec::new();
895 for stmt in program.stmts.iter() {
896 match &stmt.kind {
897 StmtKind::Function(decl) => {
898 self.analyze_fn_decl_typed(
899 decl,
900 &file,
901 source,
902 source_map,
903 &mut all_issues,
904 type_envs,
905 all_symbols,
906 );
907 }
908 StmtKind::Class(decl) => {
909 self.analyze_class_decl_typed(
910 decl,
911 &file,
912 source,
913 source_map,
914 &mut all_issues,
915 type_envs,
916 all_symbols,
917 );
918 }
919 StmtKind::Enum(decl) => {
920 self.analyze_enum_decl(decl, &file, source, source_map, &mut all_issues);
921 }
922 StmtKind::Interface(decl) => {
923 self.analyze_interface_decl(decl, &file, source, source_map, &mut all_issues);
924 }
925 StmtKind::Namespace(ns) => {
926 if let php_ast::ast::NamespaceBody::Braced(stmts) = &ns.body {
927 for inner in stmts.iter() {
928 match &inner.kind {
929 StmtKind::Function(decl) => {
930 self.analyze_fn_decl_typed(
931 decl,
932 &file,
933 source,
934 source_map,
935 &mut all_issues,
936 type_envs,
937 all_symbols,
938 );
939 }
940 StmtKind::Class(decl) => {
941 self.analyze_class_decl_typed(
942 decl,
943 &file,
944 source,
945 source_map,
946 &mut all_issues,
947 type_envs,
948 all_symbols,
949 );
950 }
951 StmtKind::Enum(decl) => {
952 self.analyze_enum_decl(
953 decl,
954 &file,
955 source,
956 source_map,
957 &mut all_issues,
958 );
959 }
960 StmtKind::Interface(decl) => {
961 self.analyze_interface_decl(
962 decl,
963 &file,
964 source,
965 source_map,
966 &mut all_issues,
967 );
968 }
969 _ => {}
970 }
971 }
972 }
973 }
974 _ => {}
975 }
976 }
977 all_issues
978 }
979
980 #[allow(clippy::too_many_arguments)]
982 fn analyze_fn_decl_typed<'arena, 'src>(
983 &self,
984 decl: &php_ast::ast::FunctionDecl<'arena, 'src>,
985 file: &Arc<str>,
986 source: &str,
987 source_map: &php_rs_parser::source_map::SourceMap,
988 all_issues: &mut Vec<mir_issues::Issue>,
989 type_envs: &mut std::collections::HashMap<
990 crate::type_env::ScopeId,
991 crate::type_env::TypeEnv,
992 >,
993 all_symbols: &mut Vec<crate::symbol::ResolvedSymbol>,
994 ) {
995 use crate::context::Context;
996 use crate::stmt::StatementsAnalyzer;
997 use mir_issues::IssueBuffer;
998
999 let fn_name = decl.name;
1000 let body = &decl.body;
1001
1002 for param in decl.params.iter() {
1003 if let Some(hint) = ¶m.type_hint {
1004 check_type_hint_classes(hint, &self.codebase, file, source, source_map, all_issues);
1005 }
1006 }
1007 if let Some(hint) = &decl.return_type {
1008 check_type_hint_classes(hint, &self.codebase, file, source, source_map, all_issues);
1009 }
1010
1011 let resolved_fn = self.codebase.resolve_class_name(file.as_ref(), fn_name);
1012 let func_opt: Option<mir_codebase::storage::FunctionStorage> = self
1013 .codebase
1014 .functions
1015 .get(resolved_fn.as_str())
1016 .map(|r| r.clone())
1017 .or_else(|| self.codebase.functions.get(fn_name).map(|r| r.clone()))
1018 .or_else(|| {
1019 self.codebase
1020 .functions
1021 .iter()
1022 .find(|e| e.short_name.as_ref() == fn_name)
1023 .map(|e| e.value().clone())
1024 });
1025
1026 let fqn = func_opt.as_ref().map(|f| f.fqn.clone());
1027 let (params, return_ty): (Vec<mir_codebase::FnParam>, _) = match &func_opt {
1028 Some(f)
1029 if f.params.len() == decl.params.len()
1030 && f.params
1031 .iter()
1032 .zip(decl.params.iter())
1033 .all(|(cp, ap)| cp.name.as_ref() == ap.name) =>
1034 {
1035 (f.params.clone(), f.return_type.clone())
1036 }
1037 _ => {
1038 let ast_params = decl
1039 .params
1040 .iter()
1041 .map(|p| mir_codebase::FnParam {
1042 name: Arc::from(p.name),
1043 ty: None,
1044 default: p.default.as_ref().map(|_| mir_types::Union::mixed()),
1045 is_variadic: p.variadic,
1046 is_byref: p.by_ref,
1047 is_optional: p.default.is_some() || p.variadic,
1048 })
1049 .collect();
1050 (ast_params, None)
1051 }
1052 };
1053
1054 let mut ctx = Context::for_function(¶ms, return_ty, None, None, None, false, true);
1055 let mut buf = IssueBuffer::new();
1056 let mut sa = StatementsAnalyzer::new(
1057 &self.codebase,
1058 file.clone(),
1059 source,
1060 source_map,
1061 &mut buf,
1062 all_symbols,
1063 );
1064 sa.analyze_stmts(body, &mut ctx);
1065 let inferred = merge_return_types(&sa.return_types);
1066 drop(sa);
1067
1068 let scope_name = fqn.clone().unwrap_or_else(|| Arc::from(fn_name));
1070 type_envs.insert(
1071 crate::type_env::ScopeId::Function {
1072 file: file.clone(),
1073 name: scope_name,
1074 },
1075 crate::type_env::TypeEnv::new(ctx.vars.clone()),
1076 );
1077
1078 emit_unused_params(¶ms, &ctx, "", file, all_issues);
1079 emit_unused_variables(&ctx, file, all_issues);
1080 all_issues.extend(buf.into_issues());
1081
1082 if let Some(fqn) = fqn {
1083 if let Some(mut func) = self.codebase.functions.get_mut(fqn.as_ref()) {
1084 func.inferred_return_type = Some(inferred);
1085 }
1086 }
1087 }
1088
1089 #[allow(clippy::too_many_arguments)]
1091 fn analyze_class_decl_typed<'arena, 'src>(
1092 &self,
1093 decl: &php_ast::ast::ClassDecl<'arena, 'src>,
1094 file: &Arc<str>,
1095 source: &str,
1096 source_map: &php_rs_parser::source_map::SourceMap,
1097 all_issues: &mut Vec<mir_issues::Issue>,
1098 type_envs: &mut std::collections::HashMap<
1099 crate::type_env::ScopeId,
1100 crate::type_env::TypeEnv,
1101 >,
1102 all_symbols: &mut Vec<crate::symbol::ResolvedSymbol>,
1103 ) {
1104 use crate::context::Context;
1105 use crate::stmt::StatementsAnalyzer;
1106 use mir_issues::IssueBuffer;
1107
1108 let class_name = decl.name.unwrap_or("<anonymous>");
1109 let resolved = self.codebase.resolve_class_name(file.as_ref(), class_name);
1110 let fqcn: &str = &resolved;
1111 let parent_fqcn = self
1112 .codebase
1113 .classes
1114 .get(fqcn)
1115 .and_then(|c| c.parent.clone());
1116
1117 if let Some(parent) = &decl.extends {
1118 check_name_class(parent, &self.codebase, file, source, source_map, all_issues);
1119 }
1120 for iface in decl.implements.iter() {
1121 check_name_class(iface, &self.codebase, file, source, source_map, all_issues);
1122 }
1123
1124 for member in decl.members.iter() {
1125 let php_ast::ast::ClassMemberKind::Method(method) = &member.kind else {
1126 continue;
1127 };
1128
1129 for param in method.params.iter() {
1130 if let Some(hint) = ¶m.type_hint {
1131 check_type_hint_classes(
1132 hint,
1133 &self.codebase,
1134 file,
1135 source,
1136 source_map,
1137 all_issues,
1138 );
1139 }
1140 }
1141 if let Some(hint) = &method.return_type {
1142 check_type_hint_classes(hint, &self.codebase, file, source, source_map, all_issues);
1143 }
1144
1145 let Some(body) = &method.body else { continue };
1146
1147 let (params, return_ty) = self
1148 .codebase
1149 .get_method(fqcn, method.name)
1150 .as_deref()
1151 .map(|m| (m.params.clone(), m.return_type.clone()))
1152 .unwrap_or_default();
1153
1154 let is_ctor = method.name == "__construct";
1155 let mut ctx = Context::for_method(
1156 ¶ms,
1157 return_ty,
1158 Some(Arc::from(fqcn)),
1159 parent_fqcn.clone(),
1160 Some(Arc::from(fqcn)),
1161 false,
1162 is_ctor,
1163 method.is_static,
1164 );
1165
1166 let mut buf = IssueBuffer::new();
1167 let mut sa = StatementsAnalyzer::new(
1168 &self.codebase,
1169 file.clone(),
1170 source,
1171 source_map,
1172 &mut buf,
1173 all_symbols,
1174 );
1175 sa.analyze_stmts(body, &mut ctx);
1176 let inferred = merge_return_types(&sa.return_types);
1177 drop(sa);
1178
1179 type_envs.insert(
1181 crate::type_env::ScopeId::Method {
1182 class: Arc::from(fqcn),
1183 method: Arc::from(method.name),
1184 },
1185 crate::type_env::TypeEnv::new(ctx.vars.clone()),
1186 );
1187
1188 emit_unused_params(¶ms, &ctx, method.name, file, all_issues);
1189 emit_unused_variables(&ctx, file, all_issues);
1190 all_issues.extend(buf.into_issues());
1191
1192 if let Some(mut cls) = self.codebase.classes.get_mut(fqcn) {
1193 if let Some(m) = cls.own_methods.get_mut(method.name) {
1194 Arc::make_mut(m).inferred_return_type = Some(inferred);
1195 }
1196 }
1197 }
1198 }
1199
1200 pub fn discover_files(root: &Path) -> Vec<PathBuf> {
1202 if root.is_file() {
1203 return vec![root.to_path_buf()];
1204 }
1205 let mut files = Vec::new();
1206 collect_php_files(root, &mut files);
1207 files
1208 }
1209
1210 pub fn collect_types_only(&self, paths: &[PathBuf]) {
1213 paths.par_iter().for_each(|path| {
1214 let Ok(src) = std::fs::read_to_string(path) else {
1215 return;
1216 };
1217 let file: Arc<str> = Arc::from(path.to_string_lossy().as_ref());
1218 let arena = bumpalo::Bump::new();
1219 let result = php_rs_parser::parse(&arena, &src);
1220 let collector =
1221 DefinitionCollector::new(&self.codebase, file, &src, &result.source_map);
1222 let _ = collector.collect(&result.program);
1224 });
1225 }
1226
1227 #[allow(clippy::too_many_arguments)]
1229 fn analyze_enum_decl<'arena, 'src>(
1230 &self,
1231 decl: &php_ast::ast::EnumDecl<'arena, 'src>,
1232 file: &Arc<str>,
1233 source: &str,
1234 source_map: &php_rs_parser::source_map::SourceMap,
1235 all_issues: &mut Vec<mir_issues::Issue>,
1236 ) {
1237 use php_ast::ast::EnumMemberKind;
1238 for iface in decl.implements.iter() {
1239 check_name_class(iface, &self.codebase, file, source, source_map, all_issues);
1240 }
1241 for member in decl.members.iter() {
1242 let EnumMemberKind::Method(method) = &member.kind else {
1243 continue;
1244 };
1245 for param in method.params.iter() {
1246 if let Some(hint) = ¶m.type_hint {
1247 check_type_hint_classes(
1248 hint,
1249 &self.codebase,
1250 file,
1251 source,
1252 source_map,
1253 all_issues,
1254 );
1255 }
1256 }
1257 if let Some(hint) = &method.return_type {
1258 check_type_hint_classes(hint, &self.codebase, file, source, source_map, all_issues);
1259 }
1260 }
1261 }
1262
1263 fn analyze_interface_decl<'arena, 'src>(
1265 &self,
1266 decl: &php_ast::ast::InterfaceDecl<'arena, 'src>,
1267 file: &Arc<str>,
1268 source: &str,
1269 source_map: &php_rs_parser::source_map::SourceMap,
1270 all_issues: &mut Vec<mir_issues::Issue>,
1271 ) {
1272 use php_ast::ast::ClassMemberKind;
1273 for parent in decl.extends.iter() {
1274 check_name_class(parent, &self.codebase, file, source, source_map, all_issues);
1275 }
1276 for member in decl.members.iter() {
1277 let ClassMemberKind::Method(method) = &member.kind else {
1278 continue;
1279 };
1280 for param in method.params.iter() {
1281 if let Some(hint) = ¶m.type_hint {
1282 check_type_hint_classes(
1283 hint,
1284 &self.codebase,
1285 file,
1286 source,
1287 source_map,
1288 all_issues,
1289 );
1290 }
1291 }
1292 if let Some(hint) = &method.return_type {
1293 check_type_hint_classes(hint, &self.codebase, file, source, source_map, all_issues);
1294 }
1295 }
1296 }
1297}
1298
1299impl Default for ProjectAnalyzer {
1300 fn default() -> Self {
1301 Self::new()
1302 }
1303}
1304
1305fn offset_to_line_col(
1312 source: &str,
1313 offset: u32,
1314 source_map: &php_rs_parser::source_map::SourceMap,
1315) -> (u32, u16) {
1316 let lc = source_map.offset_to_line_col(offset);
1317 let line = lc.line + 1;
1318
1319 let byte_offset = offset as usize;
1320 let line_start_byte = if byte_offset == 0 {
1321 0
1322 } else {
1323 source[..byte_offset]
1324 .rfind('\n')
1325 .map(|p| p + 1)
1326 .unwrap_or(0)
1327 };
1328
1329 let col = source[line_start_byte..byte_offset].chars().count() as u16;
1330
1331 (line, col)
1332}
1333
1334fn check_type_hint_classes<'arena, 'src>(
1341 hint: &php_ast::ast::TypeHint<'arena, 'src>,
1342 codebase: &Codebase,
1343 file: &Arc<str>,
1344 source: &str,
1345 source_map: &php_rs_parser::source_map::SourceMap,
1346 issues: &mut Vec<mir_issues::Issue>,
1347) {
1348 use php_ast::ast::TypeHintKind;
1349 match &hint.kind {
1350 TypeHintKind::Named(name) => {
1351 let name_str = crate::parser::name_to_string(name);
1352 if is_pseudo_type(&name_str) {
1354 return;
1355 }
1356 let resolved = codebase.resolve_class_name(file.as_ref(), &name_str);
1357 if !codebase.type_exists(&resolved) {
1358 let (line, col_start) = offset_to_line_col(source, hint.span.start, source_map);
1359 let col_end = if hint.span.start < hint.span.end {
1360 let (_end_line, end_col) =
1361 offset_to_line_col(source, hint.span.end, source_map);
1362 end_col
1363 } else {
1364 col_start
1365 };
1366 issues.push(
1367 mir_issues::Issue::new(
1368 mir_issues::IssueKind::UndefinedClass { name: resolved },
1369 mir_issues::Location {
1370 file: file.clone(),
1371 line,
1372 col_start,
1373 col_end: col_end.max(col_start + 1),
1374 },
1375 )
1376 .with_snippet(crate::parser::span_text(source, hint.span).unwrap_or_default()),
1377 );
1378 }
1379 }
1380 TypeHintKind::Nullable(inner) => {
1381 check_type_hint_classes(inner, codebase, file, source, source_map, issues);
1382 }
1383 TypeHintKind::Union(parts) | TypeHintKind::Intersection(parts) => {
1384 for part in parts.iter() {
1385 check_type_hint_classes(part, codebase, file, source, source_map, issues);
1386 }
1387 }
1388 TypeHintKind::Keyword(_, _) => {} }
1390}
1391
1392fn check_name_class(
1395 name: &php_ast::ast::Name<'_, '_>,
1396 codebase: &Codebase,
1397 file: &Arc<str>,
1398 source: &str,
1399 source_map: &php_rs_parser::source_map::SourceMap,
1400 issues: &mut Vec<mir_issues::Issue>,
1401) {
1402 let name_str = crate::parser::name_to_string(name);
1403 let resolved = codebase.resolve_class_name(file.as_ref(), &name_str);
1404 if !codebase.type_exists(&resolved) {
1405 let span = name.span();
1406 let (line, col_start) = offset_to_line_col(source, span.start, source_map);
1407 let (_, col_end) = offset_to_line_col(source, span.end, source_map);
1408 issues.push(
1409 mir_issues::Issue::new(
1410 mir_issues::IssueKind::UndefinedClass { name: resolved },
1411 mir_issues::Location {
1412 file: file.clone(),
1413 line,
1414 col_start,
1415 col_end: col_end.max(col_start + 1),
1416 },
1417 )
1418 .with_snippet(crate::parser::span_text(source, span).unwrap_or_default()),
1419 );
1420 }
1421}
1422
1423fn is_pseudo_type(name: &str) -> bool {
1426 matches!(
1427 name.to_lowercase().as_str(),
1428 "self"
1429 | "static"
1430 | "parent"
1431 | "null"
1432 | "true"
1433 | "false"
1434 | "never"
1435 | "void"
1436 | "mixed"
1437 | "object"
1438 | "callable"
1439 | "iterable"
1440 )
1441}
1442
1443const MAGIC_METHODS_WITH_RUNTIME_PARAMS: &[&str] = &[
1445 "__get",
1446 "__set",
1447 "__call",
1448 "__callStatic",
1449 "__isset",
1450 "__unset",
1451];
1452
1453fn emit_unused_params(
1456 params: &[mir_codebase::FnParam],
1457 ctx: &crate::context::Context,
1458 method_name: &str,
1459 file: &Arc<str>,
1460 issues: &mut Vec<mir_issues::Issue>,
1461) {
1462 if MAGIC_METHODS_WITH_RUNTIME_PARAMS.contains(&method_name) {
1463 return;
1464 }
1465 for p in params {
1466 let name = p.name.as_ref().trim_start_matches('$');
1467 if !ctx.read_vars.contains(name) {
1468 issues.push(
1469 mir_issues::Issue::new(
1470 mir_issues::IssueKind::UnusedParam {
1471 name: name.to_string(),
1472 },
1473 mir_issues::Location {
1474 file: file.clone(),
1475 line: 1,
1476 col_start: 0,
1477 col_end: 0,
1478 },
1479 )
1480 .with_snippet(format!("${}", name)),
1481 );
1482 }
1483 }
1484}
1485
1486fn emit_unused_variables(
1487 ctx: &crate::context::Context,
1488 file: &Arc<str>,
1489 issues: &mut Vec<mir_issues::Issue>,
1490) {
1491 const SUPERGLOBALS: &[&str] = &[
1493 "_SERVER", "_GET", "_POST", "_REQUEST", "_SESSION", "_COOKIE", "_FILES", "_ENV", "GLOBALS",
1494 ];
1495 for name in &ctx.assigned_vars {
1496 if ctx.param_names.contains(name) {
1497 continue;
1498 }
1499 if SUPERGLOBALS.contains(&name.as_str()) {
1500 continue;
1501 }
1502 if name == "this" {
1505 continue;
1506 }
1507 if name.starts_with('_') {
1508 continue;
1509 }
1510 if !ctx.read_vars.contains(name) {
1511 issues.push(mir_issues::Issue::new(
1512 mir_issues::IssueKind::UnusedVariable { name: name.clone() },
1513 mir_issues::Location {
1514 file: file.clone(),
1515 line: 1,
1516 col_start: 0,
1517 col_end: 0,
1518 },
1519 ));
1520 }
1521 }
1522}
1523
1524pub fn merge_return_types(return_types: &[Union]) -> Union {
1527 if return_types.is_empty() {
1528 return Union::single(mir_types::Atomic::TVoid);
1529 }
1530 return_types
1531 .iter()
1532 .fold(Union::empty(), |acc, t| Union::merge(&acc, t))
1533}
1534
1535pub(crate) fn collect_php_files(dir: &Path, out: &mut Vec<PathBuf>) {
1536 if let Ok(entries) = std::fs::read_dir(dir) {
1537 for entry in entries.flatten() {
1538 if entry.file_type().map(|ft| ft.is_symlink()).unwrap_or(false) {
1540 continue;
1541 }
1542 let path = entry.path();
1543 if path.is_dir() {
1544 let name = path.file_name().and_then(|n| n.to_str()).unwrap_or("");
1545 if matches!(
1546 name,
1547 "vendor" | ".git" | "node_modules" | ".cache" | ".pnpm-store"
1548 ) {
1549 continue;
1550 }
1551 collect_php_files(&path, out);
1552 } else if path.extension().and_then(|e| e.to_str()) == Some("php") {
1553 out.push(path);
1554 }
1555 }
1556 }
1557}
1558
1559fn build_reverse_deps(codebase: &Codebase) -> HashMap<String, HashSet<String>> {
1575 let mut reverse: HashMap<String, HashSet<String>> = HashMap::new();
1576
1577 let mut add_edge = |symbol: &str, dependent_file: &str| {
1579 if let Some(defining_file) = codebase.symbol_to_file.get(symbol) {
1580 let def = defining_file.as_ref().to_string();
1581 if def != dependent_file {
1582 reverse
1583 .entry(def)
1584 .or_default()
1585 .insert(dependent_file.to_string());
1586 }
1587 }
1588 };
1589
1590 for entry in codebase.file_imports.iter() {
1592 let file = entry.key().as_ref().to_string();
1593 for fqcn in entry.value().values() {
1594 add_edge(fqcn, &file);
1595 }
1596 }
1597
1598 for entry in codebase.classes.iter() {
1600 let defining = {
1601 let fqcn = entry.key().as_ref();
1602 codebase
1603 .symbol_to_file
1604 .get(fqcn)
1605 .map(|f| f.as_ref().to_string())
1606 };
1607 let Some(file) = defining else { continue };
1608
1609 let cls = entry.value();
1610 if let Some(ref parent) = cls.parent {
1611 add_edge(parent.as_ref(), &file);
1612 }
1613 for iface in &cls.interfaces {
1614 add_edge(iface.as_ref(), &file);
1615 }
1616 for tr in &cls.traits {
1617 add_edge(tr.as_ref(), &file);
1618 }
1619 }
1620
1621 reverse
1622}
1623
1624fn extract_reference_locations(codebase: &Codebase, file: &Arc<str>) -> Vec<(String, u32, u32)> {
1629 codebase
1630 .extract_file_reference_locations(file.as_ref())
1631 .into_iter()
1632 .map(|(sym, start, end)| (sym.to_string(), start, end))
1633 .collect()
1634}
1635
1636pub struct AnalysisResult {
1639 pub issues: Vec<Issue>,
1640 pub type_envs: std::collections::HashMap<crate::type_env::ScopeId, crate::type_env::TypeEnv>,
1641 pub symbols: Vec<crate::symbol::ResolvedSymbol>,
1643}
1644
1645impl AnalysisResult {
1646 pub fn error_count(&self) -> usize {
1647 self.issues
1648 .iter()
1649 .filter(|i| i.severity == mir_issues::Severity::Error)
1650 .count()
1651 }
1652
1653 pub fn warning_count(&self) -> usize {
1654 self.issues
1655 .iter()
1656 .filter(|i| i.severity == mir_issues::Severity::Warning)
1657 .count()
1658 }
1659
1660 pub fn issues_by_file(&self) -> HashMap<std::sync::Arc<str>, Vec<&Issue>> {
1666 let mut map: HashMap<std::sync::Arc<str>, Vec<&Issue>> = HashMap::new();
1667 for issue in &self.issues {
1668 map.entry(issue.location.file.clone())
1669 .or_default()
1670 .push(issue);
1671 }
1672 map
1673 }
1674
1675 pub fn symbol_at(
1684 &self,
1685 file: &str,
1686 byte_offset: u32,
1687 ) -> Option<&crate::symbol::ResolvedSymbol> {
1688 self.symbols
1689 .iter()
1690 .filter(|s| {
1691 s.file.as_ref() == file && s.span.start <= byte_offset && byte_offset < s.span.end
1692 })
1693 .min_by_key(|s| s.span.end - s.span.start)
1694 }
1695}