1use std::path::{Path, PathBuf};
3use std::sync::Arc;
4
5use rayon::prelude::*;
6
7use std::collections::{HashMap, HashSet};
8
9use crate::cache::{hash_content, AnalysisCache};
10use mir_codebase::Codebase;
11use mir_issues::Issue;
12use mir_types::Union;
13
14use crate::collector::DefinitionCollector;
15
16pub struct ProjectAnalyzer {
21 pub codebase: Arc<Codebase>,
22 pub cache: Option<AnalysisCache>,
24 pub on_file_done: Option<Arc<dyn Fn() + Send + Sync>>,
26 pub psr4: Option<Arc<crate::composer::Psr4Map>>,
28 stubs_loaded: std::sync::atomic::AtomicBool,
30 pub find_dead_code: bool,
32}
33
34impl ProjectAnalyzer {
35 pub fn new() -> Self {
36 Self {
37 codebase: Arc::new(Codebase::new()),
38 cache: None,
39 on_file_done: None,
40 psr4: None,
41 stubs_loaded: std::sync::atomic::AtomicBool::new(false),
42 find_dead_code: false,
43 }
44 }
45
46 pub fn with_cache(cache_dir: &Path) -> Self {
48 Self {
49 codebase: Arc::new(Codebase::new()),
50 cache: Some(AnalysisCache::open(cache_dir)),
51 on_file_done: None,
52 psr4: None,
53 stubs_loaded: std::sync::atomic::AtomicBool::new(false),
54 find_dead_code: false,
55 }
56 }
57
58 pub fn from_composer(
62 root: &Path,
63 ) -> Result<(Self, crate::composer::Psr4Map), crate::composer::ComposerError> {
64 let map = crate::composer::Psr4Map::from_composer(root)?;
65 let psr4 = Arc::new(map.clone());
66 let analyzer = Self {
67 codebase: Arc::new(Codebase::new()),
68 cache: None,
69 on_file_done: None,
70 psr4: Some(psr4),
71 stubs_loaded: std::sync::atomic::AtomicBool::new(false),
72 find_dead_code: false,
73 };
74 Ok((analyzer, map))
75 }
76
77 pub fn codebase(&self) -> &Arc<Codebase> {
79 &self.codebase
80 }
81
82 pub fn load_stubs(&self) {
84 if !self
85 .stubs_loaded
86 .swap(true, std::sync::atomic::Ordering::SeqCst)
87 {
88 crate::stubs::load_stubs(&self.codebase);
89 }
90 }
91
92 pub fn analyze(&self, paths: &[PathBuf]) -> AnalysisResult {
94 let mut all_issues = Vec::new();
95 let mut parse_errors = Vec::new();
96
97 self.load_stubs();
99
100 if let Some(cache) = &self.cache {
103 let changed: Vec<String> = paths
104 .iter()
105 .filter_map(|p| {
106 let path_str = p.to_string_lossy().into_owned();
107 let content = std::fs::read_to_string(p).ok()?;
108 let h = hash_content(&content);
109 if cache.get(&path_str, &h).is_none() {
110 Some(path_str)
111 } else {
112 None
113 }
114 })
115 .collect();
116 if !changed.is_empty() {
117 cache.evict_with_dependents(&changed);
118 }
119 }
120
121 let file_data: Vec<(Arc<str>, String)> = paths
123 .par_iter()
124 .filter_map(|path| match std::fs::read_to_string(path) {
125 Ok(src) => Some((Arc::from(path.to_string_lossy().as_ref()), src)),
126 Err(e) => {
127 eprintln!("Cannot read {}: {}", path.display(), e);
128 None
129 }
130 })
131 .collect();
132
133 let pass1_results: Vec<(Vec<Issue>, Vec<Issue>)> = file_data
138 .par_iter()
139 .map(|(file, src)| {
140 use php_ast::ast::StmtKind;
141 let arena = bumpalo::Bump::new();
142 let result = php_rs_parser::parse(&arena, src);
143
144 let mut current_namespace: Option<String> = None;
146 let mut imports: std::collections::HashMap<String, String> =
147 std::collections::HashMap::new();
148 let mut file_ns_set = false;
149
150 let index_stmts =
152 |stmts: &[php_ast::ast::Stmt<'_, '_>],
153 ns: Option<&str>,
154 imports: &mut std::collections::HashMap<String, String>| {
155 for stmt in stmts.iter() {
156 match &stmt.kind {
157 StmtKind::Use(use_decl) => {
158 for item in use_decl.uses.iter() {
159 let full_name = crate::parser::name_to_string(&item.name);
160 let alias = item.alias.unwrap_or_else(|| {
161 full_name.rsplit('\\').next().unwrap_or(&full_name)
162 });
163 imports.insert(alias.to_string(), full_name);
164 }
165 }
166 StmtKind::Class(decl) => {
167 if let Some(n) = decl.name {
168 let fqcn = match ns {
169 Some(ns) => format!("{}\\{}", ns, n),
170 None => n.to_string(),
171 };
172 self.codebase
173 .known_symbols
174 .insert(Arc::from(fqcn.as_str()));
175 }
176 }
177 StmtKind::Interface(decl) => {
178 let fqcn = match ns {
179 Some(ns) => format!("{}\\{}", ns, decl.name),
180 None => decl.name.to_string(),
181 };
182 self.codebase.known_symbols.insert(Arc::from(fqcn.as_str()));
183 }
184 StmtKind::Trait(decl) => {
185 let fqcn = match ns {
186 Some(ns) => format!("{}\\{}", ns, decl.name),
187 None => decl.name.to_string(),
188 };
189 self.codebase.known_symbols.insert(Arc::from(fqcn.as_str()));
190 }
191 StmtKind::Enum(decl) => {
192 let fqcn = match ns {
193 Some(ns) => format!("{}\\{}", ns, decl.name),
194 None => decl.name.to_string(),
195 };
196 self.codebase.known_symbols.insert(Arc::from(fqcn.as_str()));
197 }
198 StmtKind::Function(decl) => {
199 let fqn = match ns {
200 Some(ns) => format!("{}\\{}", ns, decl.name),
201 None => decl.name.to_string(),
202 };
203 self.codebase.known_symbols.insert(Arc::from(fqn.as_str()));
204 }
205 _ => {}
206 }
207 }
208 };
209
210 for stmt in result.program.stmts.iter() {
211 match &stmt.kind {
212 StmtKind::Namespace(ns) => {
213 current_namespace =
214 ns.name.as_ref().map(|n| crate::parser::name_to_string(n));
215 if !file_ns_set {
216 if let Some(ref ns_str) = current_namespace {
217 self.codebase
218 .file_namespaces
219 .insert(file.clone(), ns_str.clone());
220 file_ns_set = true;
221 }
222 }
223 if let php_ast::ast::NamespaceBody::Braced(inner_stmts) = &ns.body {
225 index_stmts(
226 inner_stmts,
227 current_namespace.as_deref(),
228 &mut imports,
229 );
230 }
231 }
232 _ => index_stmts(
233 std::slice::from_ref(stmt),
234 current_namespace.as_deref(),
235 &mut imports,
236 ),
237 }
238 }
239
240 if !imports.is_empty() {
241 self.codebase.file_imports.insert(file.clone(), imports);
242 }
243
244 let file_parse_errors: Vec<Issue> = result
246 .errors
247 .iter()
248 .map(|err| {
249 Issue::new(
250 mir_issues::IssueKind::ParseError {
251 message: err.to_string(),
252 },
253 mir_issues::Location {
254 file: file.clone(),
255 line: 1,
256 col_start: 0,
257 col_end: 0,
258 },
259 )
260 })
261 .collect();
262
263 let collector =
265 DefinitionCollector::new(&self.codebase, file.clone(), src, &result.source_map);
266 let issues = collector.collect(&result.program);
267
268 (file_parse_errors, issues)
269 })
270 .collect();
271
272 for (file_parse_errors, issues) in pass1_results {
273 parse_errors.extend(file_parse_errors);
274 all_issues.extend(issues);
275 }
276
277 all_issues.extend(parse_errors);
278
279 self.codebase.finalize();
281
282 if let Some(psr4) = &self.psr4 {
284 self.lazy_load_missing_classes(psr4.clone(), &mut all_issues);
285 }
286
287 if let Some(cache) = &self.cache {
289 let rev = build_reverse_deps(&self.codebase);
290 cache.set_reverse_deps(rev);
291 }
292
293 let analyzed_file_set: std::collections::HashSet<std::sync::Arc<str>> =
295 file_data.iter().map(|(f, _)| f.clone()).collect();
296 let class_issues =
297 crate::class::ClassAnalyzer::with_files(&self.codebase, analyzed_file_set, &file_data)
298 .analyze_all();
299 all_issues.extend(class_issues);
300
301 let pass2_results: Vec<(Vec<Issue>, Vec<crate::symbol::ResolvedSymbol>)> = file_data
307 .par_iter()
308 .map(|(file, src)| {
309 let result = if let Some(cache) = &self.cache {
311 let h = hash_content(src);
312 if let Some((cached_issues, ref_locs)) = cache.get(file, &h) {
313 self.codebase
316 .replay_reference_locations(file.clone(), &ref_locs);
317 (cached_issues, Vec::new())
318 } else {
319 let arena = bumpalo::Bump::new();
321 let parsed = php_rs_parser::parse(&arena, src);
322 let (issues, symbols) = self.analyze_bodies(
323 &parsed.program,
324 file.clone(),
325 src,
326 &parsed.source_map,
327 );
328 let ref_locs = extract_reference_locations(&self.codebase, file);
329 cache.put(file, h, issues.clone(), ref_locs);
330 (issues, symbols)
331 }
332 } else {
333 let arena = bumpalo::Bump::new();
334 let parsed = php_rs_parser::parse(&arena, src);
335 self.analyze_bodies(&parsed.program, file.clone(), src, &parsed.source_map)
336 };
337 if let Some(cb) = &self.on_file_done {
338 cb();
339 }
340 result
341 })
342 .collect();
343
344 let mut all_symbols = Vec::new();
345 for (issues, symbols) in pass2_results {
346 all_issues.extend(issues);
347 all_symbols.extend(symbols);
348 }
349
350 if let Some(cache) = &self.cache {
352 cache.flush();
353 }
354
355 self.codebase.compact_reference_index();
359
360 if self.find_dead_code {
362 let dead_code_issues =
363 crate::dead_code::DeadCodeAnalyzer::new(&self.codebase).analyze();
364 all_issues.extend(dead_code_issues);
365 }
366
367 AnalysisResult {
368 issues: all_issues,
369 type_envs: std::collections::HashMap::new(),
370 symbols: all_symbols,
371 }
372 }
373
374 fn lazy_load_missing_classes(
383 &self,
384 psr4: Arc<crate::composer::Psr4Map>,
385 all_issues: &mut Vec<Issue>,
386 ) {
387 use std::collections::HashSet;
388
389 let max_depth = 10; let mut loaded: HashSet<String> = HashSet::new();
391
392 for _ in 0..max_depth {
393 let mut to_load: Vec<(String, PathBuf)> = Vec::new();
395
396 for entry in self.codebase.classes.iter() {
397 let cls = entry.value();
398
399 if let Some(parent) = &cls.parent {
401 let fqcn = parent.as_ref();
402 if !self.codebase.classes.contains_key(fqcn) && !loaded.contains(fqcn) {
403 if let Some(path) = psr4.resolve(fqcn) {
404 to_load.push((fqcn.to_string(), path));
405 }
406 }
407 }
408
409 for iface in &cls.interfaces {
411 let fqcn = iface.as_ref();
412 if !self.codebase.classes.contains_key(fqcn)
413 && !self.codebase.interfaces.contains_key(fqcn)
414 && !loaded.contains(fqcn)
415 {
416 if let Some(path) = psr4.resolve(fqcn) {
417 to_load.push((fqcn.to_string(), path));
418 }
419 }
420 }
421 }
422
423 if to_load.is_empty() {
424 break;
425 }
426
427 for (fqcn, path) in to_load {
429 loaded.insert(fqcn);
430 if let Ok(src) = std::fs::read_to_string(&path) {
431 let file: Arc<str> = Arc::from(path.to_string_lossy().as_ref());
432 let arena = bumpalo::Bump::new();
433 let result = php_rs_parser::parse(&arena, &src);
434 let collector = crate::collector::DefinitionCollector::new(
435 &self.codebase,
436 file,
437 &src,
438 &result.source_map,
439 );
440 let issues = collector.collect(&result.program);
441 all_issues.extend(issues);
442 }
443 }
444
445 self.codebase.invalidate_finalization();
448 self.codebase.finalize();
449 }
450 }
451
452 pub fn re_analyze_file(&self, file_path: &str, new_content: &str) -> AnalysisResult {
461 if let Some(cache) = &self.cache {
463 let h = hash_content(new_content);
464 if let Some((issues, ref_locs)) = cache.get(file_path, &h) {
465 let file: Arc<str> = Arc::from(file_path);
466 self.codebase.replay_reference_locations(file, &ref_locs);
467 return AnalysisResult {
468 issues,
469 type_envs: HashMap::new(),
470 symbols: Default::default(),
471 };
472 }
473 }
474
475 let structural_snapshot = self.codebase.file_structural_snapshot(file_path);
478
479 self.codebase.remove_file_definitions(file_path);
481
482 let file: Arc<str> = Arc::from(file_path);
484 let arena = bumpalo::Bump::new();
485 let parsed = php_rs_parser::parse(&arena, new_content);
486
487 let mut all_issues = Vec::new();
488
489 for err in &parsed.errors {
491 all_issues.push(Issue::new(
492 mir_issues::IssueKind::ParseError {
493 message: err.to_string(),
494 },
495 mir_issues::Location {
496 file: file.clone(),
497 line: 1,
498 col_start: 0,
499 col_end: 0,
500 },
501 ));
502 }
503
504 let collector = DefinitionCollector::new(
505 &self.codebase,
506 file.clone(),
507 new_content,
508 &parsed.source_map,
509 );
510 all_issues.extend(collector.collect(&parsed.program));
511
512 if self
519 .codebase
520 .structural_unchanged_after_pass1(file_path, &structural_snapshot)
521 {
522 self.codebase
523 .restore_all_parents(file_path, &structural_snapshot);
524 } else {
525 self.codebase.finalize();
526 }
527
528 let (body_issues, symbols) = self.analyze_bodies(
530 &parsed.program,
531 file.clone(),
532 new_content,
533 &parsed.source_map,
534 );
535 all_issues.extend(body_issues);
536
537 if let Some(cache) = &self.cache {
539 let h = hash_content(new_content);
540 cache.evict_with_dependents(&[file_path.to_string()]);
541 let ref_locs = extract_reference_locations(&self.codebase, &file);
542 cache.put(file_path, h, all_issues.clone(), ref_locs);
543 }
544
545 AnalysisResult {
546 issues: all_issues,
547 type_envs: HashMap::new(),
548 symbols,
549 }
550 }
551
552 pub fn analyze_source(source: &str) -> AnalysisResult {
555 use crate::collector::DefinitionCollector;
556 let analyzer = ProjectAnalyzer::new();
557 analyzer.load_stubs();
558 let file: Arc<str> = Arc::from("<source>");
559 let arena = bumpalo::Bump::new();
560 let result = php_rs_parser::parse(&arena, source);
561 let mut all_issues = Vec::new();
562 let collector =
563 DefinitionCollector::new(&analyzer.codebase, file.clone(), source, &result.source_map);
564 all_issues.extend(collector.collect(&result.program));
565 analyzer.codebase.finalize();
566 let mut type_envs = std::collections::HashMap::new();
567 let mut all_symbols = Vec::new();
568 all_issues.extend(analyzer.analyze_bodies_typed(
569 &result.program,
570 file.clone(),
571 source,
572 &result.source_map,
573 &mut type_envs,
574 &mut all_symbols,
575 ));
576 AnalysisResult {
577 issues: all_issues,
578 type_envs,
579 symbols: all_symbols,
580 }
581 }
582
583 fn analyze_bodies<'arena, 'src>(
586 &self,
587 program: &php_ast::ast::Program<'arena, 'src>,
588 file: Arc<str>,
589 source: &str,
590 source_map: &php_rs_parser::source_map::SourceMap,
591 ) -> (Vec<mir_issues::Issue>, Vec<crate::symbol::ResolvedSymbol>) {
592 use php_ast::ast::StmtKind;
593
594 let mut all_issues = Vec::new();
595 let mut all_symbols = Vec::new();
596
597 for stmt in program.stmts.iter() {
598 match &stmt.kind {
599 StmtKind::Function(decl) => {
600 self.analyze_fn_decl(
601 decl,
602 &file,
603 source,
604 source_map,
605 &mut all_issues,
606 &mut all_symbols,
607 );
608 }
609 StmtKind::Class(decl) => {
610 self.analyze_class_decl(
611 decl,
612 &file,
613 source,
614 source_map,
615 &mut all_issues,
616 &mut all_symbols,
617 );
618 }
619 StmtKind::Enum(decl) => {
620 self.analyze_enum_decl(decl, &file, source, source_map, &mut all_issues);
621 }
622 StmtKind::Namespace(ns) => {
623 if let php_ast::ast::NamespaceBody::Braced(stmts) = &ns.body {
624 for inner in stmts.iter() {
625 match &inner.kind {
626 StmtKind::Function(decl) => {
627 self.analyze_fn_decl(
628 decl,
629 &file,
630 source,
631 source_map,
632 &mut all_issues,
633 &mut all_symbols,
634 );
635 }
636 StmtKind::Class(decl) => {
637 self.analyze_class_decl(
638 decl,
639 &file,
640 source,
641 source_map,
642 &mut all_issues,
643 &mut all_symbols,
644 );
645 }
646 StmtKind::Enum(decl) => {
647 self.analyze_enum_decl(
648 decl,
649 &file,
650 source,
651 source_map,
652 &mut all_issues,
653 );
654 }
655 _ => {}
656 }
657 }
658 }
659 }
660 _ => {}
661 }
662 }
663
664 (all_issues, all_symbols)
665 }
666
667 #[allow(clippy::too_many_arguments)]
669 fn analyze_fn_decl<'arena, 'src>(
670 &self,
671 decl: &php_ast::ast::FunctionDecl<'arena, 'src>,
672 file: &Arc<str>,
673 source: &str,
674 source_map: &php_rs_parser::source_map::SourceMap,
675 all_issues: &mut Vec<mir_issues::Issue>,
676 all_symbols: &mut Vec<crate::symbol::ResolvedSymbol>,
677 ) {
678 let fn_name = decl.name;
679 let body = &decl.body;
680 for param in decl.params.iter() {
682 if let Some(hint) = ¶m.type_hint {
683 check_type_hint_classes(hint, &self.codebase, file, source, source_map, all_issues);
684 }
685 }
686 if let Some(hint) = &decl.return_type {
687 check_type_hint_classes(hint, &self.codebase, file, source, source_map, all_issues);
688 }
689 use crate::context::Context;
690 use crate::stmt::StatementsAnalyzer;
691 use mir_issues::IssueBuffer;
692
693 let resolved_fn = self.codebase.resolve_class_name(file.as_ref(), fn_name);
695 let func_opt: Option<mir_codebase::storage::FunctionStorage> = self
696 .codebase
697 .functions
698 .get(resolved_fn.as_str())
699 .map(|r| r.clone())
700 .or_else(|| self.codebase.functions.get(fn_name).map(|r| r.clone()))
701 .or_else(|| {
702 self.codebase
703 .functions
704 .iter()
705 .find(|e| e.short_name.as_ref() == fn_name)
706 .map(|e| e.value().clone())
707 });
708
709 let fqn = func_opt.as_ref().map(|f| f.fqn.clone());
710 let (params, return_ty): (Vec<mir_codebase::FnParam>, _) = match &func_opt {
715 Some(f)
716 if f.params.len() == decl.params.len()
717 && f.params
718 .iter()
719 .zip(decl.params.iter())
720 .all(|(cp, ap)| cp.name.as_ref() == ap.name) =>
721 {
722 (f.params.clone(), f.return_type.clone())
723 }
724 _ => {
725 let ast_params = decl
726 .params
727 .iter()
728 .map(|p| mir_codebase::FnParam {
729 name: Arc::from(p.name),
730 ty: None,
731 default: p.default.as_ref().map(|_| mir_types::Union::mixed()),
732 is_variadic: p.variadic,
733 is_byref: p.by_ref,
734 is_optional: p.default.is_some() || p.variadic,
735 })
736 .collect();
737 (ast_params, None)
738 }
739 };
740
741 let mut ctx = Context::for_function(¶ms, return_ty, None, None, None, false, true);
742 let mut buf = IssueBuffer::new();
743 let mut sa = StatementsAnalyzer::new(
744 &self.codebase,
745 file.clone(),
746 source,
747 source_map,
748 &mut buf,
749 all_symbols,
750 );
751 sa.analyze_stmts(body, &mut ctx);
752 let inferred = merge_return_types(&sa.return_types);
753 drop(sa);
754
755 emit_unused_params(¶ms, &ctx, "", file, all_issues);
756 emit_unused_variables(&ctx, file, all_issues);
757 all_issues.extend(buf.into_issues());
758
759 if let Some(fqn) = fqn {
760 if let Some(mut func) = self.codebase.functions.get_mut(fqn.as_ref()) {
761 func.inferred_return_type = Some(inferred);
762 }
763 }
764 }
765
766 #[allow(clippy::too_many_arguments)]
768 fn analyze_class_decl<'arena, 'src>(
769 &self,
770 decl: &php_ast::ast::ClassDecl<'arena, 'src>,
771 file: &Arc<str>,
772 source: &str,
773 source_map: &php_rs_parser::source_map::SourceMap,
774 all_issues: &mut Vec<mir_issues::Issue>,
775 all_symbols: &mut Vec<crate::symbol::ResolvedSymbol>,
776 ) {
777 use crate::context::Context;
778 use crate::stmt::StatementsAnalyzer;
779 use mir_issues::IssueBuffer;
780
781 let class_name = decl.name.unwrap_or("<anonymous>");
782 let resolved = self.codebase.resolve_class_name(file.as_ref(), class_name);
785 let fqcn: &str = &resolved;
786 let parent_fqcn = self
787 .codebase
788 .classes
789 .get(fqcn)
790 .and_then(|c| c.parent.clone());
791
792 for member in decl.members.iter() {
793 let php_ast::ast::ClassMemberKind::Method(method) = &member.kind else {
794 continue;
795 };
796
797 for param in method.params.iter() {
799 if let Some(hint) = ¶m.type_hint {
800 check_type_hint_classes(
801 hint,
802 &self.codebase,
803 file,
804 source,
805 source_map,
806 all_issues,
807 );
808 }
809 }
810 if let Some(hint) = &method.return_type {
811 check_type_hint_classes(hint, &self.codebase, file, source, source_map, all_issues);
812 }
813
814 let Some(body) = &method.body else { continue };
815
816 let (params, return_ty) = self
817 .codebase
818 .get_method(fqcn, method.name)
819 .as_deref()
820 .map(|m| (m.params.clone(), m.return_type.clone()))
821 .unwrap_or_default();
822
823 let is_ctor = method.name == "__construct";
824 let mut ctx = Context::for_method(
825 ¶ms,
826 return_ty,
827 Some(Arc::from(fqcn)),
828 parent_fqcn.clone(),
829 Some(Arc::from(fqcn)),
830 false,
831 is_ctor,
832 method.is_static,
833 );
834
835 let mut buf = IssueBuffer::new();
836 let mut sa = StatementsAnalyzer::new(
837 &self.codebase,
838 file.clone(),
839 source,
840 source_map,
841 &mut buf,
842 all_symbols,
843 );
844 sa.analyze_stmts(body, &mut ctx);
845 let inferred = merge_return_types(&sa.return_types);
846 drop(sa);
847
848 emit_unused_params(¶ms, &ctx, method.name, file, all_issues);
849 emit_unused_variables(&ctx, file, all_issues);
850 all_issues.extend(buf.into_issues());
851
852 if let Some(mut cls) = self.codebase.classes.get_mut(fqcn) {
853 if let Some(m) = cls.own_methods.get_mut(method.name) {
854 Arc::make_mut(m).inferred_return_type = Some(inferred);
855 }
856 }
857 }
858 }
859
860 #[allow(clippy::too_many_arguments)]
862 fn analyze_bodies_typed<'arena, 'src>(
863 &self,
864 program: &php_ast::ast::Program<'arena, 'src>,
865 file: Arc<str>,
866 source: &str,
867 source_map: &php_rs_parser::source_map::SourceMap,
868 type_envs: &mut std::collections::HashMap<
869 crate::type_env::ScopeId,
870 crate::type_env::TypeEnv,
871 >,
872 all_symbols: &mut Vec<crate::symbol::ResolvedSymbol>,
873 ) -> Vec<mir_issues::Issue> {
874 use php_ast::ast::StmtKind;
875 let mut all_issues = Vec::new();
876 for stmt in program.stmts.iter() {
877 match &stmt.kind {
878 StmtKind::Function(decl) => {
879 self.analyze_fn_decl_typed(
880 decl,
881 &file,
882 source,
883 source_map,
884 &mut all_issues,
885 type_envs,
886 all_symbols,
887 );
888 }
889 StmtKind::Class(decl) => {
890 self.analyze_class_decl_typed(
891 decl,
892 &file,
893 source,
894 source_map,
895 &mut all_issues,
896 type_envs,
897 all_symbols,
898 );
899 }
900 StmtKind::Enum(decl) => {
901 self.analyze_enum_decl(decl, &file, source, source_map, &mut all_issues);
902 }
903 StmtKind::Namespace(ns) => {
904 if let php_ast::ast::NamespaceBody::Braced(stmts) = &ns.body {
905 for inner in stmts.iter() {
906 match &inner.kind {
907 StmtKind::Function(decl) => {
908 self.analyze_fn_decl_typed(
909 decl,
910 &file,
911 source,
912 source_map,
913 &mut all_issues,
914 type_envs,
915 all_symbols,
916 );
917 }
918 StmtKind::Class(decl) => {
919 self.analyze_class_decl_typed(
920 decl,
921 &file,
922 source,
923 source_map,
924 &mut all_issues,
925 type_envs,
926 all_symbols,
927 );
928 }
929 StmtKind::Enum(decl) => {
930 self.analyze_enum_decl(
931 decl,
932 &file,
933 source,
934 source_map,
935 &mut all_issues,
936 );
937 }
938 _ => {}
939 }
940 }
941 }
942 }
943 _ => {}
944 }
945 }
946 all_issues
947 }
948
949 #[allow(clippy::too_many_arguments)]
951 fn analyze_fn_decl_typed<'arena, 'src>(
952 &self,
953 decl: &php_ast::ast::FunctionDecl<'arena, 'src>,
954 file: &Arc<str>,
955 source: &str,
956 source_map: &php_rs_parser::source_map::SourceMap,
957 all_issues: &mut Vec<mir_issues::Issue>,
958 type_envs: &mut std::collections::HashMap<
959 crate::type_env::ScopeId,
960 crate::type_env::TypeEnv,
961 >,
962 all_symbols: &mut Vec<crate::symbol::ResolvedSymbol>,
963 ) {
964 use crate::context::Context;
965 use crate::stmt::StatementsAnalyzer;
966 use mir_issues::IssueBuffer;
967
968 let fn_name = decl.name;
969 let body = &decl.body;
970
971 for param in decl.params.iter() {
972 if let Some(hint) = ¶m.type_hint {
973 check_type_hint_classes(hint, &self.codebase, file, source, source_map, all_issues);
974 }
975 }
976 if let Some(hint) = &decl.return_type {
977 check_type_hint_classes(hint, &self.codebase, file, source, source_map, all_issues);
978 }
979
980 let resolved_fn = self.codebase.resolve_class_name(file.as_ref(), fn_name);
981 let func_opt: Option<mir_codebase::storage::FunctionStorage> = self
982 .codebase
983 .functions
984 .get(resolved_fn.as_str())
985 .map(|r| r.clone())
986 .or_else(|| self.codebase.functions.get(fn_name).map(|r| r.clone()))
987 .or_else(|| {
988 self.codebase
989 .functions
990 .iter()
991 .find(|e| e.short_name.as_ref() == fn_name)
992 .map(|e| e.value().clone())
993 });
994
995 let fqn = func_opt.as_ref().map(|f| f.fqn.clone());
996 let (params, return_ty): (Vec<mir_codebase::FnParam>, _) = match &func_opt {
997 Some(f)
998 if f.params.len() == decl.params.len()
999 && f.params
1000 .iter()
1001 .zip(decl.params.iter())
1002 .all(|(cp, ap)| cp.name.as_ref() == ap.name) =>
1003 {
1004 (f.params.clone(), f.return_type.clone())
1005 }
1006 _ => {
1007 let ast_params = decl
1008 .params
1009 .iter()
1010 .map(|p| mir_codebase::FnParam {
1011 name: Arc::from(p.name),
1012 ty: None,
1013 default: p.default.as_ref().map(|_| mir_types::Union::mixed()),
1014 is_variadic: p.variadic,
1015 is_byref: p.by_ref,
1016 is_optional: p.default.is_some() || p.variadic,
1017 })
1018 .collect();
1019 (ast_params, None)
1020 }
1021 };
1022
1023 let mut ctx = Context::for_function(¶ms, return_ty, None, None, None, false, true);
1024 let mut buf = IssueBuffer::new();
1025 let mut sa = StatementsAnalyzer::new(
1026 &self.codebase,
1027 file.clone(),
1028 source,
1029 source_map,
1030 &mut buf,
1031 all_symbols,
1032 );
1033 sa.analyze_stmts(body, &mut ctx);
1034 let inferred = merge_return_types(&sa.return_types);
1035 drop(sa);
1036
1037 let scope_name = fqn.clone().unwrap_or_else(|| Arc::from(fn_name));
1039 type_envs.insert(
1040 crate::type_env::ScopeId::Function {
1041 file: file.clone(),
1042 name: scope_name,
1043 },
1044 crate::type_env::TypeEnv::new(ctx.vars.clone()),
1045 );
1046
1047 emit_unused_params(¶ms, &ctx, "", file, all_issues);
1048 emit_unused_variables(&ctx, file, all_issues);
1049 all_issues.extend(buf.into_issues());
1050
1051 if let Some(fqn) = fqn {
1052 if let Some(mut func) = self.codebase.functions.get_mut(fqn.as_ref()) {
1053 func.inferred_return_type = Some(inferred);
1054 }
1055 }
1056 }
1057
1058 #[allow(clippy::too_many_arguments)]
1060 fn analyze_class_decl_typed<'arena, 'src>(
1061 &self,
1062 decl: &php_ast::ast::ClassDecl<'arena, 'src>,
1063 file: &Arc<str>,
1064 source: &str,
1065 source_map: &php_rs_parser::source_map::SourceMap,
1066 all_issues: &mut Vec<mir_issues::Issue>,
1067 type_envs: &mut std::collections::HashMap<
1068 crate::type_env::ScopeId,
1069 crate::type_env::TypeEnv,
1070 >,
1071 all_symbols: &mut Vec<crate::symbol::ResolvedSymbol>,
1072 ) {
1073 use crate::context::Context;
1074 use crate::stmt::StatementsAnalyzer;
1075 use mir_issues::IssueBuffer;
1076
1077 let class_name = decl.name.unwrap_or("<anonymous>");
1078 let resolved = self.codebase.resolve_class_name(file.as_ref(), class_name);
1079 let fqcn: &str = &resolved;
1080 let parent_fqcn = self
1081 .codebase
1082 .classes
1083 .get(fqcn)
1084 .and_then(|c| c.parent.clone());
1085
1086 for member in decl.members.iter() {
1087 let php_ast::ast::ClassMemberKind::Method(method) = &member.kind else {
1088 continue;
1089 };
1090
1091 for param in method.params.iter() {
1092 if let Some(hint) = ¶m.type_hint {
1093 check_type_hint_classes(
1094 hint,
1095 &self.codebase,
1096 file,
1097 source,
1098 source_map,
1099 all_issues,
1100 );
1101 }
1102 }
1103 if let Some(hint) = &method.return_type {
1104 check_type_hint_classes(hint, &self.codebase, file, source, source_map, all_issues);
1105 }
1106
1107 let Some(body) = &method.body else { continue };
1108
1109 let (params, return_ty) = self
1110 .codebase
1111 .get_method(fqcn, method.name)
1112 .as_deref()
1113 .map(|m| (m.params.clone(), m.return_type.clone()))
1114 .unwrap_or_default();
1115
1116 let is_ctor = method.name == "__construct";
1117 let mut ctx = Context::for_method(
1118 ¶ms,
1119 return_ty,
1120 Some(Arc::from(fqcn)),
1121 parent_fqcn.clone(),
1122 Some(Arc::from(fqcn)),
1123 false,
1124 is_ctor,
1125 method.is_static,
1126 );
1127
1128 let mut buf = IssueBuffer::new();
1129 let mut sa = StatementsAnalyzer::new(
1130 &self.codebase,
1131 file.clone(),
1132 source,
1133 source_map,
1134 &mut buf,
1135 all_symbols,
1136 );
1137 sa.analyze_stmts(body, &mut ctx);
1138 let inferred = merge_return_types(&sa.return_types);
1139 drop(sa);
1140
1141 type_envs.insert(
1143 crate::type_env::ScopeId::Method {
1144 class: Arc::from(fqcn),
1145 method: Arc::from(method.name),
1146 },
1147 crate::type_env::TypeEnv::new(ctx.vars.clone()),
1148 );
1149
1150 emit_unused_params(¶ms, &ctx, method.name, file, all_issues);
1151 emit_unused_variables(&ctx, file, all_issues);
1152 all_issues.extend(buf.into_issues());
1153
1154 if let Some(mut cls) = self.codebase.classes.get_mut(fqcn) {
1155 if let Some(m) = cls.own_methods.get_mut(method.name) {
1156 Arc::make_mut(m).inferred_return_type = Some(inferred);
1157 }
1158 }
1159 }
1160 }
1161
1162 pub fn discover_files(root: &Path) -> Vec<PathBuf> {
1164 if root.is_file() {
1165 return vec![root.to_path_buf()];
1166 }
1167 let mut files = Vec::new();
1168 collect_php_files(root, &mut files);
1169 files
1170 }
1171
1172 pub fn collect_types_only(&self, paths: &[PathBuf]) {
1175 paths.par_iter().for_each(|path| {
1176 let Ok(src) = std::fs::read_to_string(path) else {
1177 return;
1178 };
1179 let file: Arc<str> = Arc::from(path.to_string_lossy().as_ref());
1180 let arena = bumpalo::Bump::new();
1181 let result = php_rs_parser::parse(&arena, &src);
1182 let collector =
1183 DefinitionCollector::new(&self.codebase, file, &src, &result.source_map);
1184 let _ = collector.collect(&result.program);
1186 });
1187 }
1188
1189 #[allow(clippy::too_many_arguments)]
1191 fn analyze_enum_decl<'arena, 'src>(
1192 &self,
1193 decl: &php_ast::ast::EnumDecl<'arena, 'src>,
1194 file: &Arc<str>,
1195 source: &str,
1196 source_map: &php_rs_parser::source_map::SourceMap,
1197 all_issues: &mut Vec<mir_issues::Issue>,
1198 ) {
1199 use php_ast::ast::EnumMemberKind;
1200 for member in decl.members.iter() {
1201 let EnumMemberKind::Method(method) = &member.kind else {
1202 continue;
1203 };
1204 for param in method.params.iter() {
1205 if let Some(hint) = ¶m.type_hint {
1206 check_type_hint_classes(
1207 hint,
1208 &self.codebase,
1209 file,
1210 source,
1211 source_map,
1212 all_issues,
1213 );
1214 }
1215 }
1216 if let Some(hint) = &method.return_type {
1217 check_type_hint_classes(hint, &self.codebase, file, source, source_map, all_issues);
1218 }
1219 }
1220 }
1221}
1222
1223impl Default for ProjectAnalyzer {
1224 fn default() -> Self {
1225 Self::new()
1226 }
1227}
1228
1229fn offset_to_line_col(
1236 source: &str,
1237 offset: u32,
1238 source_map: &php_rs_parser::source_map::SourceMap,
1239) -> (u32, u16) {
1240 let lc = source_map.offset_to_line_col(offset);
1241 let line = lc.line + 1;
1242
1243 let byte_offset = offset as usize;
1244 let line_start_byte = if byte_offset == 0 {
1245 0
1246 } else {
1247 source[..byte_offset]
1248 .rfind('\n')
1249 .map(|p| p + 1)
1250 .unwrap_or(0)
1251 };
1252
1253 let col = source[line_start_byte..byte_offset].chars().count() as u16;
1254
1255 (line, col)
1256}
1257
1258fn check_type_hint_classes<'arena, 'src>(
1265 hint: &php_ast::ast::TypeHint<'arena, 'src>,
1266 codebase: &Codebase,
1267 file: &Arc<str>,
1268 source: &str,
1269 source_map: &php_rs_parser::source_map::SourceMap,
1270 issues: &mut Vec<mir_issues::Issue>,
1271) {
1272 use php_ast::ast::TypeHintKind;
1273 match &hint.kind {
1274 TypeHintKind::Named(name) => {
1275 let name_str = crate::parser::name_to_string(name);
1276 if is_pseudo_type(&name_str) {
1278 return;
1279 }
1280 let resolved = codebase.resolve_class_name(file.as_ref(), &name_str);
1281 if !codebase.type_exists(&resolved) {
1282 let (line, col_start) = offset_to_line_col(source, hint.span.start, source_map);
1283 let col_end = if hint.span.start < hint.span.end {
1284 let (_end_line, end_col) =
1285 offset_to_line_col(source, hint.span.end, source_map);
1286 end_col
1287 } else {
1288 col_start
1289 };
1290 issues.push(
1291 mir_issues::Issue::new(
1292 mir_issues::IssueKind::UndefinedClass { name: resolved },
1293 mir_issues::Location {
1294 file: file.clone(),
1295 line,
1296 col_start,
1297 col_end: col_end.max(col_start + 1),
1298 },
1299 )
1300 .with_snippet(crate::parser::span_text(source, hint.span).unwrap_or_default()),
1301 );
1302 }
1303 }
1304 TypeHintKind::Nullable(inner) => {
1305 check_type_hint_classes(inner, codebase, file, source, source_map, issues);
1306 }
1307 TypeHintKind::Union(parts) | TypeHintKind::Intersection(parts) => {
1308 for part in parts.iter() {
1309 check_type_hint_classes(part, codebase, file, source, source_map, issues);
1310 }
1311 }
1312 TypeHintKind::Keyword(_, _) => {} }
1314}
1315
1316fn is_pseudo_type(name: &str) -> bool {
1319 matches!(
1320 name.to_lowercase().as_str(),
1321 "self"
1322 | "static"
1323 | "parent"
1324 | "null"
1325 | "true"
1326 | "false"
1327 | "never"
1328 | "void"
1329 | "mixed"
1330 | "object"
1331 | "callable"
1332 | "iterable"
1333 )
1334}
1335
1336const MAGIC_METHODS_WITH_RUNTIME_PARAMS: &[&str] = &[
1338 "__get",
1339 "__set",
1340 "__call",
1341 "__callStatic",
1342 "__isset",
1343 "__unset",
1344];
1345
1346fn emit_unused_params(
1349 params: &[mir_codebase::FnParam],
1350 ctx: &crate::context::Context,
1351 method_name: &str,
1352 file: &Arc<str>,
1353 issues: &mut Vec<mir_issues::Issue>,
1354) {
1355 if MAGIC_METHODS_WITH_RUNTIME_PARAMS.contains(&method_name) {
1356 return;
1357 }
1358 for p in params {
1359 let name = p.name.as_ref().trim_start_matches('$');
1360 if !ctx.read_vars.contains(name) {
1361 issues.push(
1362 mir_issues::Issue::new(
1363 mir_issues::IssueKind::UnusedParam {
1364 name: name.to_string(),
1365 },
1366 mir_issues::Location {
1367 file: file.clone(),
1368 line: 1,
1369 col_start: 0,
1370 col_end: 0,
1371 },
1372 )
1373 .with_snippet(format!("${}", name)),
1374 );
1375 }
1376 }
1377}
1378
1379fn emit_unused_variables(
1380 ctx: &crate::context::Context,
1381 file: &Arc<str>,
1382 issues: &mut Vec<mir_issues::Issue>,
1383) {
1384 const SUPERGLOBALS: &[&str] = &[
1386 "_SERVER", "_GET", "_POST", "_REQUEST", "_SESSION", "_COOKIE", "_FILES", "_ENV", "GLOBALS",
1387 ];
1388 for name in &ctx.assigned_vars {
1389 if ctx.param_names.contains(name) {
1390 continue;
1391 }
1392 if SUPERGLOBALS.contains(&name.as_str()) {
1393 continue;
1394 }
1395 if name == "this" {
1398 continue;
1399 }
1400 if name.starts_with('_') {
1401 continue;
1402 }
1403 if !ctx.read_vars.contains(name) {
1404 issues.push(mir_issues::Issue::new(
1405 mir_issues::IssueKind::UnusedVariable { name: name.clone() },
1406 mir_issues::Location {
1407 file: file.clone(),
1408 line: 1,
1409 col_start: 0,
1410 col_end: 0,
1411 },
1412 ));
1413 }
1414 }
1415}
1416
1417pub fn merge_return_types(return_types: &[Union]) -> Union {
1420 if return_types.is_empty() {
1421 return Union::single(mir_types::Atomic::TVoid);
1422 }
1423 return_types
1424 .iter()
1425 .fold(Union::empty(), |acc, t| Union::merge(&acc, t))
1426}
1427
1428pub(crate) fn collect_php_files(dir: &Path, out: &mut Vec<PathBuf>) {
1429 if let Ok(entries) = std::fs::read_dir(dir) {
1430 for entry in entries.flatten() {
1431 if entry.file_type().map(|ft| ft.is_symlink()).unwrap_or(false) {
1433 continue;
1434 }
1435 let path = entry.path();
1436 if path.is_dir() {
1437 let name = path.file_name().and_then(|n| n.to_str()).unwrap_or("");
1438 if matches!(
1439 name,
1440 "vendor" | ".git" | "node_modules" | ".cache" | ".pnpm-store"
1441 ) {
1442 continue;
1443 }
1444 collect_php_files(&path, out);
1445 } else if path.extension().and_then(|e| e.to_str()) == Some("php") {
1446 out.push(path);
1447 }
1448 }
1449 }
1450}
1451
1452fn build_reverse_deps(codebase: &Codebase) -> HashMap<String, HashSet<String>> {
1468 let mut reverse: HashMap<String, HashSet<String>> = HashMap::new();
1469
1470 let mut add_edge = |symbol: &str, dependent_file: &str| {
1472 if let Some(defining_file) = codebase.symbol_to_file.get(symbol) {
1473 let def = defining_file.as_ref().to_string();
1474 if def != dependent_file {
1475 reverse
1476 .entry(def)
1477 .or_default()
1478 .insert(dependent_file.to_string());
1479 }
1480 }
1481 };
1482
1483 for entry in codebase.file_imports.iter() {
1485 let file = entry.key().as_ref().to_string();
1486 for fqcn in entry.value().values() {
1487 add_edge(fqcn, &file);
1488 }
1489 }
1490
1491 for entry in codebase.classes.iter() {
1493 let defining = {
1494 let fqcn = entry.key().as_ref();
1495 codebase
1496 .symbol_to_file
1497 .get(fqcn)
1498 .map(|f| f.as_ref().to_string())
1499 };
1500 let Some(file) = defining else { continue };
1501
1502 let cls = entry.value();
1503 if let Some(ref parent) = cls.parent {
1504 add_edge(parent.as_ref(), &file);
1505 }
1506 for iface in &cls.interfaces {
1507 add_edge(iface.as_ref(), &file);
1508 }
1509 for tr in &cls.traits {
1510 add_edge(tr.as_ref(), &file);
1511 }
1512 }
1513
1514 reverse
1515}
1516
1517fn extract_reference_locations(codebase: &Codebase, file: &Arc<str>) -> Vec<(String, u32, u32)> {
1522 codebase
1523 .extract_file_reference_locations(file.as_ref())
1524 .into_iter()
1525 .map(|(sym, start, end)| (sym.to_string(), start, end))
1526 .collect()
1527}
1528
1529pub struct AnalysisResult {
1532 pub issues: Vec<Issue>,
1533 pub type_envs: std::collections::HashMap<crate::type_env::ScopeId, crate::type_env::TypeEnv>,
1534 pub symbols: Vec<crate::symbol::ResolvedSymbol>,
1536}
1537
1538impl AnalysisResult {
1539 pub fn error_count(&self) -> usize {
1540 self.issues
1541 .iter()
1542 .filter(|i| i.severity == mir_issues::Severity::Error)
1543 .count()
1544 }
1545
1546 pub fn warning_count(&self) -> usize {
1547 self.issues
1548 .iter()
1549 .filter(|i| i.severity == mir_issues::Severity::Warning)
1550 .count()
1551 }
1552
1553 pub fn issues_by_file(&self) -> HashMap<std::sync::Arc<str>, Vec<&Issue>> {
1559 let mut map: HashMap<std::sync::Arc<str>, Vec<&Issue>> = HashMap::new();
1560 for issue in &self.issues {
1561 map.entry(issue.location.file.clone())
1562 .or_default()
1563 .push(issue);
1564 }
1565 map
1566 }
1567
1568 pub fn symbol_at(
1577 &self,
1578 file: &str,
1579 byte_offset: u32,
1580 ) -> Option<&crate::symbol::ResolvedSymbol> {
1581 self.symbols
1582 .iter()
1583 .filter(|s| {
1584 s.file.as_ref() == file && s.span.start <= byte_offset && byte_offset < s.span.end
1585 })
1586 .min_by_key(|s| s.span.end - s.span.start)
1587 }
1588}