1use std::path::{Path, PathBuf};
3use std::sync::Arc;
4
5use rayon::prelude::*;
6
7use std::collections::{HashMap, HashSet};
8
9use crate::cache::{hash_content, AnalysisCache};
10use mir_codebase::Codebase;
11use mir_issues::Issue;
12use mir_types::Union;
13
14use crate::collector::DefinitionCollector;
15
16pub struct ProjectAnalyzer {
21 pub codebase: Arc<Codebase>,
22 pub cache: Option<AnalysisCache>,
24 pub on_file_done: Option<Arc<dyn Fn() + Send + Sync>>,
26 pub psr4: Option<Arc<crate::composer::Psr4Map>>,
28 stubs_loaded: std::sync::atomic::AtomicBool,
30 pub find_dead_code: bool,
32}
33
34impl ProjectAnalyzer {
35 pub fn new() -> Self {
36 Self {
37 codebase: Arc::new(Codebase::new()),
38 cache: None,
39 on_file_done: None,
40 psr4: None,
41 stubs_loaded: std::sync::atomic::AtomicBool::new(false),
42 find_dead_code: false,
43 }
44 }
45
46 pub fn with_cache(cache_dir: &Path) -> Self {
48 Self {
49 codebase: Arc::new(Codebase::new()),
50 cache: Some(AnalysisCache::open(cache_dir)),
51 on_file_done: None,
52 psr4: None,
53 stubs_loaded: std::sync::atomic::AtomicBool::new(false),
54 find_dead_code: false,
55 }
56 }
57
58 pub fn from_composer(
62 root: &Path,
63 ) -> Result<(Self, crate::composer::Psr4Map), crate::composer::ComposerError> {
64 let map = crate::composer::Psr4Map::from_composer(root)?;
65 let psr4 = Arc::new(map.clone());
66 let analyzer = Self {
67 codebase: Arc::new(Codebase::new()),
68 cache: None,
69 on_file_done: None,
70 psr4: Some(psr4),
71 stubs_loaded: std::sync::atomic::AtomicBool::new(false),
72 find_dead_code: false,
73 };
74 Ok((analyzer, map))
75 }
76
77 pub fn codebase(&self) -> &Arc<Codebase> {
79 &self.codebase
80 }
81
82 pub fn load_stubs(&self) {
84 if !self
85 .stubs_loaded
86 .swap(true, std::sync::atomic::Ordering::SeqCst)
87 {
88 crate::stubs::load_stubs(&self.codebase);
89 }
90 }
91
92 pub fn analyze(&self, paths: &[PathBuf]) -> AnalysisResult {
94 let mut all_issues = Vec::new();
95 let mut parse_errors = Vec::new();
96
97 self.load_stubs();
99
100 if let Some(cache) = &self.cache {
103 let changed: Vec<String> = paths
104 .iter()
105 .filter_map(|p| {
106 let path_str = p.to_string_lossy().into_owned();
107 let content = std::fs::read_to_string(p).ok()?;
108 let h = hash_content(&content);
109 if cache.get(&path_str, &h).is_none() {
110 Some(path_str)
111 } else {
112 None
113 }
114 })
115 .collect();
116 if !changed.is_empty() {
117 cache.evict_with_dependents(&changed);
118 }
119 }
120
121 let file_data: Vec<(Arc<str>, String)> = paths
123 .par_iter()
124 .filter_map(|path| match std::fs::read_to_string(path) {
125 Ok(src) => Some((Arc::from(path.to_string_lossy().as_ref()), src)),
126 Err(e) => {
127 eprintln!("Cannot read {}: {}", path.display(), e);
128 None
129 }
130 })
131 .collect();
132
133 file_data.par_iter().for_each(|(file, src)| {
135 use php_ast::ast::StmtKind;
136 let arena = bumpalo::Bump::new();
137 let result = php_rs_parser::parse(&arena, src);
138
139 let mut current_namespace: Option<String> = None;
140 let mut imports: std::collections::HashMap<String, String> =
141 std::collections::HashMap::new();
142 let mut file_ns_set = false;
143
144 let index_stmts =
146 |stmts: &[php_ast::ast::Stmt<'_, '_>],
147 ns: Option<&str>,
148 imports: &mut std::collections::HashMap<String, String>| {
149 for stmt in stmts.iter() {
150 match &stmt.kind {
151 StmtKind::Use(use_decl) => {
152 for item in use_decl.uses.iter() {
153 let full_name = crate::parser::name_to_string(&item.name);
154 let alias = item.alias.unwrap_or_else(|| {
155 full_name.rsplit('\\').next().unwrap_or(&full_name)
156 });
157 imports.insert(alias.to_string(), full_name);
158 }
159 }
160 StmtKind::Class(decl) => {
161 if let Some(n) = decl.name {
162 let fqcn = match ns {
163 Some(ns) => format!("{}\\{}", ns, n),
164 None => n.to_string(),
165 };
166 self.codebase.known_symbols.insert(Arc::from(fqcn.as_str()));
167 }
168 }
169 StmtKind::Interface(decl) => {
170 let fqcn = match ns {
171 Some(ns) => format!("{}\\{}", ns, decl.name),
172 None => decl.name.to_string(),
173 };
174 self.codebase.known_symbols.insert(Arc::from(fqcn.as_str()));
175 }
176 StmtKind::Trait(decl) => {
177 let fqcn = match ns {
178 Some(ns) => format!("{}\\{}", ns, decl.name),
179 None => decl.name.to_string(),
180 };
181 self.codebase.known_symbols.insert(Arc::from(fqcn.as_str()));
182 }
183 StmtKind::Enum(decl) => {
184 let fqcn = match ns {
185 Some(ns) => format!("{}\\{}", ns, decl.name),
186 None => decl.name.to_string(),
187 };
188 self.codebase.known_symbols.insert(Arc::from(fqcn.as_str()));
189 }
190 StmtKind::Function(decl) => {
191 let fqn = match ns {
192 Some(ns) => format!("{}\\{}", ns, decl.name),
193 None => decl.name.to_string(),
194 };
195 self.codebase.known_symbols.insert(Arc::from(fqn.as_str()));
196 }
197 _ => {}
198 }
199 }
200 };
201
202 for stmt in result.program.stmts.iter() {
203 match &stmt.kind {
204 StmtKind::Namespace(ns) => {
205 current_namespace =
206 ns.name.as_ref().map(|n| crate::parser::name_to_string(n));
207 if !file_ns_set {
208 if let Some(ref ns_str) = current_namespace {
209 self.codebase
210 .file_namespaces
211 .insert(file.clone(), ns_str.clone());
212 file_ns_set = true;
213 }
214 }
215 if let php_ast::ast::NamespaceBody::Braced(inner_stmts) = &ns.body {
217 index_stmts(inner_stmts, current_namespace.as_deref(), &mut imports);
218 }
219 }
220 _ => index_stmts(
221 std::slice::from_ref(stmt),
222 current_namespace.as_deref(),
223 &mut imports,
224 ),
225 }
226 }
227
228 if !imports.is_empty() {
229 self.codebase.file_imports.insert(file.clone(), imports);
230 }
231 });
232
233 for (file, src) in &file_data {
236 let arena = bumpalo::Bump::new();
237 let result = php_rs_parser::parse(&arena, src);
238
239 for err in &result.errors {
240 let msg: String = err.to_string();
241 parse_errors.push(Issue::new(
242 mir_issues::IssueKind::ParseError { message: msg },
243 mir_issues::Location {
244 file: file.clone(),
245 line: 1,
246 col_start: 0,
247 col_end: 0,
248 },
249 ));
250 }
251
252 let collector =
253 DefinitionCollector::new(&self.codebase, file.clone(), src, &result.source_map);
254 let issues = collector.collect(&result.program);
255 all_issues.extend(issues);
256 }
257
258 all_issues.extend(parse_errors);
259
260 self.codebase.finalize();
262
263 if let Some(psr4) = &self.psr4 {
265 self.lazy_load_missing_classes(psr4.clone(), &mut all_issues);
266 }
267
268 if let Some(cache) = &self.cache {
270 let rev = build_reverse_deps(&self.codebase);
271 cache.set_reverse_deps(rev);
272 }
273
274 let analyzed_file_set: std::collections::HashSet<std::sync::Arc<str>> =
276 file_data.iter().map(|(f, _)| f.clone()).collect();
277 let class_issues =
278 crate::class::ClassAnalyzer::with_files(&self.codebase, analyzed_file_set, &file_data)
279 .analyze_all();
280 all_issues.extend(class_issues);
281
282 let pass2_results: Vec<(Vec<Issue>, Vec<crate::symbol::ResolvedSymbol>)> = file_data
288 .par_iter()
289 .map(|(file, src)| {
290 let result = if let Some(cache) = &self.cache {
292 let h = hash_content(src);
293 if let Some((cached_issues, ref_locs)) = cache.get(file, &h) {
294 self.codebase
297 .replay_reference_locations(file.clone(), &ref_locs);
298 (cached_issues, Vec::new())
299 } else {
300 let arena = bumpalo::Bump::new();
302 let parsed = php_rs_parser::parse(&arena, src);
303 let (issues, symbols) = self.analyze_bodies(
304 &parsed.program,
305 file.clone(),
306 src,
307 &parsed.source_map,
308 );
309 let ref_locs = extract_reference_locations(&self.codebase, file);
310 cache.put(file, h, issues.clone(), ref_locs);
311 (issues, symbols)
312 }
313 } else {
314 let arena = bumpalo::Bump::new();
315 let parsed = php_rs_parser::parse(&arena, src);
316 self.analyze_bodies(&parsed.program, file.clone(), src, &parsed.source_map)
317 };
318 if let Some(cb) = &self.on_file_done {
319 cb();
320 }
321 result
322 })
323 .collect();
324
325 let mut all_symbols = Vec::new();
326 for (issues, symbols) in pass2_results {
327 all_issues.extend(issues);
328 all_symbols.extend(symbols);
329 }
330
331 if let Some(cache) = &self.cache {
333 cache.flush();
334 }
335
336 if self.find_dead_code {
338 let dead_code_issues =
339 crate::dead_code::DeadCodeAnalyzer::new(&self.codebase).analyze();
340 all_issues.extend(dead_code_issues);
341 }
342
343 AnalysisResult {
344 issues: all_issues,
345 type_envs: std::collections::HashMap::new(),
346 symbols: all_symbols,
347 }
348 }
349
350 fn lazy_load_missing_classes(
359 &self,
360 psr4: Arc<crate::composer::Psr4Map>,
361 all_issues: &mut Vec<Issue>,
362 ) {
363 use std::collections::HashSet;
364
365 let max_depth = 10; let mut loaded: HashSet<String> = HashSet::new();
367
368 for _ in 0..max_depth {
369 let mut to_load: Vec<(String, PathBuf)> = Vec::new();
371
372 for entry in self.codebase.classes.iter() {
373 let cls = entry.value();
374
375 if let Some(parent) = &cls.parent {
377 let fqcn = parent.as_ref();
378 if !self.codebase.classes.contains_key(fqcn) && !loaded.contains(fqcn) {
379 if let Some(path) = psr4.resolve(fqcn) {
380 to_load.push((fqcn.to_string(), path));
381 }
382 }
383 }
384
385 for iface in &cls.interfaces {
387 let fqcn = iface.as_ref();
388 if !self.codebase.classes.contains_key(fqcn)
389 && !self.codebase.interfaces.contains_key(fqcn)
390 && !loaded.contains(fqcn)
391 {
392 if let Some(path) = psr4.resolve(fqcn) {
393 to_load.push((fqcn.to_string(), path));
394 }
395 }
396 }
397 }
398
399 if to_load.is_empty() {
400 break;
401 }
402
403 for (fqcn, path) in to_load {
405 loaded.insert(fqcn);
406 if let Ok(src) = std::fs::read_to_string(&path) {
407 let file: Arc<str> = Arc::from(path.to_string_lossy().as_ref());
408 let arena = bumpalo::Bump::new();
409 let result = php_rs_parser::parse(&arena, &src);
410 let collector = crate::collector::DefinitionCollector::new(
411 &self.codebase,
412 file,
413 &src,
414 &result.source_map,
415 );
416 let issues = collector.collect(&result.program);
417 all_issues.extend(issues);
418 }
419 }
420
421 self.codebase.invalidate_finalization();
424 self.codebase.finalize();
425 }
426 }
427
428 pub fn re_analyze_file(&self, file_path: &str, new_content: &str) -> AnalysisResult {
437 self.codebase.remove_file_definitions(file_path);
439
440 let file: Arc<str> = Arc::from(file_path);
442 let arena = bumpalo::Bump::new();
443 let parsed = php_rs_parser::parse(&arena, new_content);
444
445 let mut all_issues = Vec::new();
446
447 for err in &parsed.errors {
449 all_issues.push(Issue::new(
450 mir_issues::IssueKind::ParseError {
451 message: err.to_string(),
452 },
453 mir_issues::Location {
454 file: file.clone(),
455 line: 1,
456 col_start: 0,
457 col_end: 0,
458 },
459 ));
460 }
461
462 let collector = DefinitionCollector::new(
463 &self.codebase,
464 file.clone(),
465 new_content,
466 &parsed.source_map,
467 );
468 all_issues.extend(collector.collect(&parsed.program));
469
470 self.codebase.finalize();
472
473 let (body_issues, symbols) = self.analyze_bodies(
475 &parsed.program,
476 file.clone(),
477 new_content,
478 &parsed.source_map,
479 );
480 all_issues.extend(body_issues);
481
482 if let Some(cache) = &self.cache {
484 let h = hash_content(new_content);
485 cache.evict_with_dependents(&[file_path.to_string()]);
486 let ref_locs = extract_reference_locations(&self.codebase, &file);
487 cache.put(file_path, h, all_issues.clone(), ref_locs);
488 }
489
490 AnalysisResult {
491 issues: all_issues,
492 type_envs: HashMap::new(),
493 symbols,
494 }
495 }
496
497 pub fn analyze_source(source: &str) -> AnalysisResult {
500 use crate::collector::DefinitionCollector;
501 let analyzer = ProjectAnalyzer::new();
502 analyzer.load_stubs();
503 let file: Arc<str> = Arc::from("<source>");
504 let arena = bumpalo::Bump::new();
505 let result = php_rs_parser::parse(&arena, source);
506 let mut all_issues = Vec::new();
507 let collector =
508 DefinitionCollector::new(&analyzer.codebase, file.clone(), source, &result.source_map);
509 all_issues.extend(collector.collect(&result.program));
510 analyzer.codebase.finalize();
511 let mut type_envs = std::collections::HashMap::new();
512 let mut all_symbols = Vec::new();
513 all_issues.extend(analyzer.analyze_bodies_typed(
514 &result.program,
515 file.clone(),
516 source,
517 &result.source_map,
518 &mut type_envs,
519 &mut all_symbols,
520 ));
521 AnalysisResult {
522 issues: all_issues,
523 type_envs,
524 symbols: all_symbols,
525 }
526 }
527
528 fn analyze_bodies<'arena, 'src>(
531 &self,
532 program: &php_ast::ast::Program<'arena, 'src>,
533 file: Arc<str>,
534 source: &str,
535 source_map: &php_rs_parser::source_map::SourceMap,
536 ) -> (Vec<mir_issues::Issue>, Vec<crate::symbol::ResolvedSymbol>) {
537 use php_ast::ast::StmtKind;
538
539 let mut all_issues = Vec::new();
540 let mut all_symbols = Vec::new();
541
542 for stmt in program.stmts.iter() {
543 match &stmt.kind {
544 StmtKind::Function(decl) => {
545 self.analyze_fn_decl(
546 decl,
547 &file,
548 source,
549 source_map,
550 &mut all_issues,
551 &mut all_symbols,
552 );
553 }
554 StmtKind::Class(decl) => {
555 self.analyze_class_decl(
556 decl,
557 &file,
558 source,
559 source_map,
560 &mut all_issues,
561 &mut all_symbols,
562 );
563 }
564 StmtKind::Enum(decl) => {
565 self.analyze_enum_decl(decl, &file, source, source_map, &mut all_issues);
566 }
567 StmtKind::Namespace(ns) => {
568 if let php_ast::ast::NamespaceBody::Braced(stmts) = &ns.body {
569 for inner in stmts.iter() {
570 match &inner.kind {
571 StmtKind::Function(decl) => {
572 self.analyze_fn_decl(
573 decl,
574 &file,
575 source,
576 source_map,
577 &mut all_issues,
578 &mut all_symbols,
579 );
580 }
581 StmtKind::Class(decl) => {
582 self.analyze_class_decl(
583 decl,
584 &file,
585 source,
586 source_map,
587 &mut all_issues,
588 &mut all_symbols,
589 );
590 }
591 StmtKind::Enum(decl) => {
592 self.analyze_enum_decl(
593 decl,
594 &file,
595 source,
596 source_map,
597 &mut all_issues,
598 );
599 }
600 _ => {}
601 }
602 }
603 }
604 }
605 _ => {}
606 }
607 }
608
609 (all_issues, all_symbols)
610 }
611
612 #[allow(clippy::too_many_arguments)]
614 fn analyze_fn_decl<'arena, 'src>(
615 &self,
616 decl: &php_ast::ast::FunctionDecl<'arena, 'src>,
617 file: &Arc<str>,
618 source: &str,
619 source_map: &php_rs_parser::source_map::SourceMap,
620 all_issues: &mut Vec<mir_issues::Issue>,
621 all_symbols: &mut Vec<crate::symbol::ResolvedSymbol>,
622 ) {
623 let fn_name = decl.name;
624 let body = &decl.body;
625 for param in decl.params.iter() {
627 if let Some(hint) = ¶m.type_hint {
628 check_type_hint_classes(hint, &self.codebase, file, source, source_map, all_issues);
629 }
630 }
631 if let Some(hint) = &decl.return_type {
632 check_type_hint_classes(hint, &self.codebase, file, source, source_map, all_issues);
633 }
634 use crate::context::Context;
635 use crate::stmt::StatementsAnalyzer;
636 use mir_issues::IssueBuffer;
637
638 let resolved_fn = self.codebase.resolve_class_name(file.as_ref(), fn_name);
640 let func_opt: Option<mir_codebase::storage::FunctionStorage> = self
641 .codebase
642 .functions
643 .get(resolved_fn.as_str())
644 .map(|r| r.clone())
645 .or_else(|| self.codebase.functions.get(fn_name).map(|r| r.clone()))
646 .or_else(|| {
647 self.codebase
648 .functions
649 .iter()
650 .find(|e| e.short_name.as_ref() == fn_name)
651 .map(|e| e.value().clone())
652 });
653
654 let fqn = func_opt.as_ref().map(|f| f.fqn.clone());
655 let (params, return_ty): (Vec<mir_codebase::FnParam>, _) = match &func_opt {
660 Some(f)
661 if f.params.len() == decl.params.len()
662 && f.params
663 .iter()
664 .zip(decl.params.iter())
665 .all(|(cp, ap)| cp.name.as_ref() == ap.name) =>
666 {
667 (f.params.clone(), f.return_type.clone())
668 }
669 _ => {
670 let ast_params = decl
671 .params
672 .iter()
673 .map(|p| mir_codebase::FnParam {
674 name: Arc::from(p.name),
675 ty: None,
676 default: p.default.as_ref().map(|_| mir_types::Union::mixed()),
677 is_variadic: p.variadic,
678 is_byref: p.by_ref,
679 is_optional: p.default.is_some() || p.variadic,
680 })
681 .collect();
682 (ast_params, None)
683 }
684 };
685
686 let mut ctx = Context::for_function(¶ms, return_ty, None, None, None, false, true);
687 let mut buf = IssueBuffer::new();
688 let mut sa = StatementsAnalyzer::new(
689 &self.codebase,
690 file.clone(),
691 source,
692 source_map,
693 &mut buf,
694 all_symbols,
695 );
696 sa.analyze_stmts(body, &mut ctx);
697 let inferred = merge_return_types(&sa.return_types);
698 drop(sa);
699
700 emit_unused_params(¶ms, &ctx, "", file, all_issues);
701 emit_unused_variables(&ctx, file, all_issues);
702 all_issues.extend(buf.into_issues());
703
704 if let Some(fqn) = fqn {
705 if let Some(mut func) = self.codebase.functions.get_mut(fqn.as_ref()) {
706 func.inferred_return_type = Some(inferred);
707 }
708 }
709 }
710
711 #[allow(clippy::too_many_arguments)]
713 fn analyze_class_decl<'arena, 'src>(
714 &self,
715 decl: &php_ast::ast::ClassDecl<'arena, 'src>,
716 file: &Arc<str>,
717 source: &str,
718 source_map: &php_rs_parser::source_map::SourceMap,
719 all_issues: &mut Vec<mir_issues::Issue>,
720 all_symbols: &mut Vec<crate::symbol::ResolvedSymbol>,
721 ) {
722 use crate::context::Context;
723 use crate::stmt::StatementsAnalyzer;
724 use mir_issues::IssueBuffer;
725
726 let class_name = decl.name.unwrap_or("<anonymous>");
727 let resolved = self.codebase.resolve_class_name(file.as_ref(), class_name);
730 let fqcn: &str = &resolved;
731 let parent_fqcn = self
732 .codebase
733 .classes
734 .get(fqcn)
735 .and_then(|c| c.parent.clone());
736
737 for member in decl.members.iter() {
738 let php_ast::ast::ClassMemberKind::Method(method) = &member.kind else {
739 continue;
740 };
741
742 for param in method.params.iter() {
744 if let Some(hint) = ¶m.type_hint {
745 check_type_hint_classes(
746 hint,
747 &self.codebase,
748 file,
749 source,
750 source_map,
751 all_issues,
752 );
753 }
754 }
755 if let Some(hint) = &method.return_type {
756 check_type_hint_classes(hint, &self.codebase, file, source, source_map, all_issues);
757 }
758
759 let Some(body) = &method.body else { continue };
760
761 let method_storage = self.codebase.get_method(fqcn, method.name);
762 let (params, return_ty) = method_storage
763 .as_ref()
764 .map(|m| (m.params.clone(), m.return_type.clone()))
765 .unwrap_or_default();
766
767 let is_ctor = method.name == "__construct";
768 let mut ctx = Context::for_method(
769 ¶ms,
770 return_ty,
771 Some(Arc::from(fqcn)),
772 parent_fqcn.clone(),
773 Some(Arc::from(fqcn)),
774 false,
775 is_ctor,
776 method.is_static,
777 );
778
779 let mut buf = IssueBuffer::new();
780 let mut sa = StatementsAnalyzer::new(
781 &self.codebase,
782 file.clone(),
783 source,
784 source_map,
785 &mut buf,
786 all_symbols,
787 );
788 sa.analyze_stmts(body, &mut ctx);
789 let inferred = merge_return_types(&sa.return_types);
790 drop(sa);
791
792 emit_unused_params(¶ms, &ctx, method.name, file, all_issues);
793 emit_unused_variables(&ctx, file, all_issues);
794 all_issues.extend(buf.into_issues());
795
796 if let Some(mut cls) = self.codebase.classes.get_mut(fqcn) {
797 if let Some(m) = cls.own_methods.get_mut(method.name) {
798 m.inferred_return_type = Some(inferred);
799 }
800 }
801 }
802 }
803
804 #[allow(clippy::too_many_arguments)]
806 fn analyze_bodies_typed<'arena, 'src>(
807 &self,
808 program: &php_ast::ast::Program<'arena, 'src>,
809 file: Arc<str>,
810 source: &str,
811 source_map: &php_rs_parser::source_map::SourceMap,
812 type_envs: &mut std::collections::HashMap<
813 crate::type_env::ScopeId,
814 crate::type_env::TypeEnv,
815 >,
816 all_symbols: &mut Vec<crate::symbol::ResolvedSymbol>,
817 ) -> Vec<mir_issues::Issue> {
818 use php_ast::ast::StmtKind;
819 let mut all_issues = Vec::new();
820 for stmt in program.stmts.iter() {
821 match &stmt.kind {
822 StmtKind::Function(decl) => {
823 self.analyze_fn_decl_typed(
824 decl,
825 &file,
826 source,
827 source_map,
828 &mut all_issues,
829 type_envs,
830 all_symbols,
831 );
832 }
833 StmtKind::Class(decl) => {
834 self.analyze_class_decl_typed(
835 decl,
836 &file,
837 source,
838 source_map,
839 &mut all_issues,
840 type_envs,
841 all_symbols,
842 );
843 }
844 StmtKind::Enum(decl) => {
845 self.analyze_enum_decl(decl, &file, source, source_map, &mut all_issues);
846 }
847 StmtKind::Namespace(ns) => {
848 if let php_ast::ast::NamespaceBody::Braced(stmts) = &ns.body {
849 for inner in stmts.iter() {
850 match &inner.kind {
851 StmtKind::Function(decl) => {
852 self.analyze_fn_decl_typed(
853 decl,
854 &file,
855 source,
856 source_map,
857 &mut all_issues,
858 type_envs,
859 all_symbols,
860 );
861 }
862 StmtKind::Class(decl) => {
863 self.analyze_class_decl_typed(
864 decl,
865 &file,
866 source,
867 source_map,
868 &mut all_issues,
869 type_envs,
870 all_symbols,
871 );
872 }
873 StmtKind::Enum(decl) => {
874 self.analyze_enum_decl(
875 decl,
876 &file,
877 source,
878 source_map,
879 &mut all_issues,
880 );
881 }
882 _ => {}
883 }
884 }
885 }
886 }
887 _ => {}
888 }
889 }
890 all_issues
891 }
892
893 #[allow(clippy::too_many_arguments)]
895 fn analyze_fn_decl_typed<'arena, 'src>(
896 &self,
897 decl: &php_ast::ast::FunctionDecl<'arena, 'src>,
898 file: &Arc<str>,
899 source: &str,
900 source_map: &php_rs_parser::source_map::SourceMap,
901 all_issues: &mut Vec<mir_issues::Issue>,
902 type_envs: &mut std::collections::HashMap<
903 crate::type_env::ScopeId,
904 crate::type_env::TypeEnv,
905 >,
906 all_symbols: &mut Vec<crate::symbol::ResolvedSymbol>,
907 ) {
908 use crate::context::Context;
909 use crate::stmt::StatementsAnalyzer;
910 use mir_issues::IssueBuffer;
911
912 let fn_name = decl.name;
913 let body = &decl.body;
914
915 for param in decl.params.iter() {
916 if let Some(hint) = ¶m.type_hint {
917 check_type_hint_classes(hint, &self.codebase, file, source, source_map, all_issues);
918 }
919 }
920 if let Some(hint) = &decl.return_type {
921 check_type_hint_classes(hint, &self.codebase, file, source, source_map, all_issues);
922 }
923
924 let resolved_fn = self.codebase.resolve_class_name(file.as_ref(), fn_name);
925 let func_opt: Option<mir_codebase::storage::FunctionStorage> = self
926 .codebase
927 .functions
928 .get(resolved_fn.as_str())
929 .map(|r| r.clone())
930 .or_else(|| self.codebase.functions.get(fn_name).map(|r| r.clone()))
931 .or_else(|| {
932 self.codebase
933 .functions
934 .iter()
935 .find(|e| e.short_name.as_ref() == fn_name)
936 .map(|e| e.value().clone())
937 });
938
939 let fqn = func_opt.as_ref().map(|f| f.fqn.clone());
940 let (params, return_ty): (Vec<mir_codebase::FnParam>, _) = match &func_opt {
941 Some(f)
942 if f.params.len() == decl.params.len()
943 && f.params
944 .iter()
945 .zip(decl.params.iter())
946 .all(|(cp, ap)| cp.name.as_ref() == ap.name) =>
947 {
948 (f.params.clone(), f.return_type.clone())
949 }
950 _ => {
951 let ast_params = decl
952 .params
953 .iter()
954 .map(|p| mir_codebase::FnParam {
955 name: Arc::from(p.name),
956 ty: None,
957 default: p.default.as_ref().map(|_| mir_types::Union::mixed()),
958 is_variadic: p.variadic,
959 is_byref: p.by_ref,
960 is_optional: p.default.is_some() || p.variadic,
961 })
962 .collect();
963 (ast_params, None)
964 }
965 };
966
967 let mut ctx = Context::for_function(¶ms, return_ty, None, None, None, false, true);
968 let mut buf = IssueBuffer::new();
969 let mut sa = StatementsAnalyzer::new(
970 &self.codebase,
971 file.clone(),
972 source,
973 source_map,
974 &mut buf,
975 all_symbols,
976 );
977 sa.analyze_stmts(body, &mut ctx);
978 let inferred = merge_return_types(&sa.return_types);
979 drop(sa);
980
981 let scope_name = fqn.clone().unwrap_or_else(|| Arc::from(fn_name));
983 type_envs.insert(
984 crate::type_env::ScopeId::Function {
985 file: file.clone(),
986 name: scope_name,
987 },
988 crate::type_env::TypeEnv::new(ctx.vars.clone()),
989 );
990
991 emit_unused_params(¶ms, &ctx, "", file, all_issues);
992 emit_unused_variables(&ctx, file, all_issues);
993 all_issues.extend(buf.into_issues());
994
995 if let Some(fqn) = fqn {
996 if let Some(mut func) = self.codebase.functions.get_mut(fqn.as_ref()) {
997 func.inferred_return_type = Some(inferred);
998 }
999 }
1000 }
1001
1002 #[allow(clippy::too_many_arguments)]
1004 fn analyze_class_decl_typed<'arena, 'src>(
1005 &self,
1006 decl: &php_ast::ast::ClassDecl<'arena, 'src>,
1007 file: &Arc<str>,
1008 source: &str,
1009 source_map: &php_rs_parser::source_map::SourceMap,
1010 all_issues: &mut Vec<mir_issues::Issue>,
1011 type_envs: &mut std::collections::HashMap<
1012 crate::type_env::ScopeId,
1013 crate::type_env::TypeEnv,
1014 >,
1015 all_symbols: &mut Vec<crate::symbol::ResolvedSymbol>,
1016 ) {
1017 use crate::context::Context;
1018 use crate::stmt::StatementsAnalyzer;
1019 use mir_issues::IssueBuffer;
1020
1021 let class_name = decl.name.unwrap_or("<anonymous>");
1022 let resolved = self.codebase.resolve_class_name(file.as_ref(), class_name);
1023 let fqcn: &str = &resolved;
1024 let parent_fqcn = self
1025 .codebase
1026 .classes
1027 .get(fqcn)
1028 .and_then(|c| c.parent.clone());
1029
1030 for member in decl.members.iter() {
1031 let php_ast::ast::ClassMemberKind::Method(method) = &member.kind else {
1032 continue;
1033 };
1034
1035 for param in method.params.iter() {
1036 if let Some(hint) = ¶m.type_hint {
1037 check_type_hint_classes(
1038 hint,
1039 &self.codebase,
1040 file,
1041 source,
1042 source_map,
1043 all_issues,
1044 );
1045 }
1046 }
1047 if let Some(hint) = &method.return_type {
1048 check_type_hint_classes(hint, &self.codebase, file, source, source_map, all_issues);
1049 }
1050
1051 let Some(body) = &method.body else { continue };
1052
1053 let method_storage = self.codebase.get_method(fqcn, method.name);
1054 let (params, return_ty) = method_storage
1055 .as_ref()
1056 .map(|m| (m.params.clone(), m.return_type.clone()))
1057 .unwrap_or_default();
1058
1059 let is_ctor = method.name == "__construct";
1060 let mut ctx = Context::for_method(
1061 ¶ms,
1062 return_ty,
1063 Some(Arc::from(fqcn)),
1064 parent_fqcn.clone(),
1065 Some(Arc::from(fqcn)),
1066 false,
1067 is_ctor,
1068 method.is_static,
1069 );
1070
1071 let mut buf = IssueBuffer::new();
1072 let mut sa = StatementsAnalyzer::new(
1073 &self.codebase,
1074 file.clone(),
1075 source,
1076 source_map,
1077 &mut buf,
1078 all_symbols,
1079 );
1080 sa.analyze_stmts(body, &mut ctx);
1081 let inferred = merge_return_types(&sa.return_types);
1082 drop(sa);
1083
1084 type_envs.insert(
1086 crate::type_env::ScopeId::Method {
1087 class: Arc::from(fqcn),
1088 method: Arc::from(method.name),
1089 },
1090 crate::type_env::TypeEnv::new(ctx.vars.clone()),
1091 );
1092
1093 emit_unused_params(¶ms, &ctx, method.name, file, all_issues);
1094 emit_unused_variables(&ctx, file, all_issues);
1095 all_issues.extend(buf.into_issues());
1096
1097 if let Some(mut cls) = self.codebase.classes.get_mut(fqcn) {
1098 if let Some(m) = cls.own_methods.get_mut(method.name) {
1099 m.inferred_return_type = Some(inferred);
1100 }
1101 }
1102 }
1103 }
1104
1105 pub fn discover_files(root: &Path) -> Vec<PathBuf> {
1107 if root.is_file() {
1108 return vec![root.to_path_buf()];
1109 }
1110 let mut files = Vec::new();
1111 collect_php_files(root, &mut files);
1112 files
1113 }
1114
1115 pub fn collect_types_only(&self, paths: &[PathBuf]) {
1118 let file_data: Vec<(Arc<str>, String)> = paths
1119 .par_iter()
1120 .filter_map(|path| {
1121 std::fs::read_to_string(path)
1122 .ok()
1123 .map(|src| (Arc::from(path.to_string_lossy().as_ref()), src))
1124 })
1125 .collect();
1126
1127 for (file, src) in &file_data {
1128 let arena = bumpalo::Bump::new();
1129 let result = php_rs_parser::parse(&arena, src);
1130 let collector =
1131 DefinitionCollector::new(&self.codebase, file.clone(), src, &result.source_map);
1132 let _ = collector.collect(&result.program);
1134 }
1135 }
1136
1137 #[allow(clippy::too_many_arguments)]
1139 fn analyze_enum_decl<'arena, 'src>(
1140 &self,
1141 decl: &php_ast::ast::EnumDecl<'arena, 'src>,
1142 file: &Arc<str>,
1143 source: &str,
1144 source_map: &php_rs_parser::source_map::SourceMap,
1145 all_issues: &mut Vec<mir_issues::Issue>,
1146 ) {
1147 use php_ast::ast::EnumMemberKind;
1148 for member in decl.members.iter() {
1149 let EnumMemberKind::Method(method) = &member.kind else {
1150 continue;
1151 };
1152 for param in method.params.iter() {
1153 if let Some(hint) = ¶m.type_hint {
1154 check_type_hint_classes(
1155 hint,
1156 &self.codebase,
1157 file,
1158 source,
1159 source_map,
1160 all_issues,
1161 );
1162 }
1163 }
1164 if let Some(hint) = &method.return_type {
1165 check_type_hint_classes(hint, &self.codebase, file, source, source_map, all_issues);
1166 }
1167 }
1168 }
1169}
1170
1171impl Default for ProjectAnalyzer {
1172 fn default() -> Self {
1173 Self::new()
1174 }
1175}
1176
1177fn offset_to_line_col_utf16(
1184 source: &str,
1185 offset: u32,
1186 source_map: &php_rs_parser::source_map::SourceMap,
1187) -> (u32, u16) {
1188 let lc = source_map.offset_to_line_col(offset);
1189 let line = lc.line + 1;
1190
1191 let byte_offset = offset as usize;
1193 let line_start_byte = if byte_offset == 0 {
1194 0
1195 } else {
1196 source[..byte_offset]
1198 .rfind('\n')
1199 .map(|p| p + 1)
1200 .unwrap_or(0)
1201 };
1202
1203 let col_utf16 = source[line_start_byte..byte_offset]
1205 .chars()
1206 .map(|c| c.len_utf16() as u16)
1207 .sum();
1208
1209 (line, col_utf16)
1210}
1211
1212fn check_type_hint_classes<'arena, 'src>(
1219 hint: &php_ast::ast::TypeHint<'arena, 'src>,
1220 codebase: &Codebase,
1221 file: &Arc<str>,
1222 source: &str,
1223 source_map: &php_rs_parser::source_map::SourceMap,
1224 issues: &mut Vec<mir_issues::Issue>,
1225) {
1226 use php_ast::ast::TypeHintKind;
1227 match &hint.kind {
1228 TypeHintKind::Named(name) => {
1229 let name_str = crate::parser::name_to_string(name);
1230 if is_pseudo_type(&name_str) {
1232 return;
1233 }
1234 let resolved = codebase.resolve_class_name(file.as_ref(), &name_str);
1235 if !codebase.type_exists(&resolved) {
1236 let (line, col_start) =
1237 offset_to_line_col_utf16(source, hint.span.start, source_map);
1238 let col_end = if hint.span.start < hint.span.end {
1239 let (_end_line, end_col) =
1240 offset_to_line_col_utf16(source, hint.span.end, source_map);
1241 end_col
1242 } else {
1243 col_start
1244 };
1245 issues.push(
1246 mir_issues::Issue::new(
1247 mir_issues::IssueKind::UndefinedClass { name: resolved },
1248 mir_issues::Location {
1249 file: file.clone(),
1250 line,
1251 col_start,
1252 col_end: col_end.max(col_start + 1),
1253 },
1254 )
1255 .with_snippet(crate::parser::span_text(source, hint.span).unwrap_or_default()),
1256 );
1257 }
1258 }
1259 TypeHintKind::Nullable(inner) => {
1260 check_type_hint_classes(inner, codebase, file, source, source_map, issues);
1261 }
1262 TypeHintKind::Union(parts) | TypeHintKind::Intersection(parts) => {
1263 for part in parts.iter() {
1264 check_type_hint_classes(part, codebase, file, source, source_map, issues);
1265 }
1266 }
1267 TypeHintKind::Keyword(_, _) => {} }
1269}
1270
1271fn is_pseudo_type(name: &str) -> bool {
1274 matches!(
1275 name.to_lowercase().as_str(),
1276 "self"
1277 | "static"
1278 | "parent"
1279 | "null"
1280 | "true"
1281 | "false"
1282 | "never"
1283 | "void"
1284 | "mixed"
1285 | "object"
1286 | "callable"
1287 | "iterable"
1288 )
1289}
1290
1291const MAGIC_METHODS_WITH_RUNTIME_PARAMS: &[&str] = &[
1293 "__get",
1294 "__set",
1295 "__call",
1296 "__callStatic",
1297 "__isset",
1298 "__unset",
1299];
1300
1301fn emit_unused_params(
1304 params: &[mir_codebase::FnParam],
1305 ctx: &crate::context::Context,
1306 method_name: &str,
1307 file: &Arc<str>,
1308 issues: &mut Vec<mir_issues::Issue>,
1309) {
1310 if MAGIC_METHODS_WITH_RUNTIME_PARAMS.contains(&method_name) {
1311 return;
1312 }
1313 for p in params {
1314 let name = p.name.as_ref().trim_start_matches('$');
1315 if !ctx.read_vars.contains(name) {
1316 issues.push(
1317 mir_issues::Issue::new(
1318 mir_issues::IssueKind::UnusedParam {
1319 name: name.to_string(),
1320 },
1321 mir_issues::Location {
1322 file: file.clone(),
1323 line: 1,
1324 col_start: 0,
1325 col_end: 0,
1326 },
1327 )
1328 .with_snippet(format!("${}", name)),
1329 );
1330 }
1331 }
1332}
1333
1334fn emit_unused_variables(
1335 ctx: &crate::context::Context,
1336 file: &Arc<str>,
1337 issues: &mut Vec<mir_issues::Issue>,
1338) {
1339 const SUPERGLOBALS: &[&str] = &[
1341 "_SERVER", "_GET", "_POST", "_REQUEST", "_SESSION", "_COOKIE", "_FILES", "_ENV", "GLOBALS",
1342 ];
1343 for name in &ctx.assigned_vars {
1344 if ctx.param_names.contains(name) {
1345 continue;
1346 }
1347 if SUPERGLOBALS.contains(&name.as_str()) {
1348 continue;
1349 }
1350 if name == "this" {
1353 continue;
1354 }
1355 if name.starts_with('_') {
1356 continue;
1357 }
1358 if !ctx.read_vars.contains(name) {
1359 issues.push(mir_issues::Issue::new(
1360 mir_issues::IssueKind::UnusedVariable { name: name.clone() },
1361 mir_issues::Location {
1362 file: file.clone(),
1363 line: 1,
1364 col_start: 0,
1365 col_end: 0,
1366 },
1367 ));
1368 }
1369 }
1370}
1371
1372pub fn merge_return_types(return_types: &[Union]) -> Union {
1375 if return_types.is_empty() {
1376 return Union::single(mir_types::Atomic::TVoid);
1377 }
1378 return_types
1379 .iter()
1380 .fold(Union::empty(), |acc, t| Union::merge(&acc, t))
1381}
1382
1383pub(crate) fn collect_php_files(dir: &Path, out: &mut Vec<PathBuf>) {
1384 if let Ok(entries) = std::fs::read_dir(dir) {
1385 for entry in entries.flatten() {
1386 if entry.file_type().map(|ft| ft.is_symlink()).unwrap_or(false) {
1388 continue;
1389 }
1390 let path = entry.path();
1391 if path.is_dir() {
1392 let name = path.file_name().and_then(|n| n.to_str()).unwrap_or("");
1393 if matches!(
1394 name,
1395 "vendor" | ".git" | "node_modules" | ".cache" | ".pnpm-store"
1396 ) {
1397 continue;
1398 }
1399 collect_php_files(&path, out);
1400 } else if path.extension().and_then(|e| e.to_str()) == Some("php") {
1401 out.push(path);
1402 }
1403 }
1404 }
1405}
1406
1407fn build_reverse_deps(codebase: &Codebase) -> HashMap<String, HashSet<String>> {
1423 let mut reverse: HashMap<String, HashSet<String>> = HashMap::new();
1424
1425 let mut add_edge = |symbol: &str, dependent_file: &str| {
1427 if let Some(defining_file) = codebase.symbol_to_file.get(symbol) {
1428 let def = defining_file.as_ref().to_string();
1429 if def != dependent_file {
1430 reverse
1431 .entry(def)
1432 .or_default()
1433 .insert(dependent_file.to_string());
1434 }
1435 }
1436 };
1437
1438 for entry in codebase.file_imports.iter() {
1440 let file = entry.key().as_ref().to_string();
1441 for fqcn in entry.value().values() {
1442 add_edge(fqcn, &file);
1443 }
1444 }
1445
1446 for entry in codebase.classes.iter() {
1448 let defining = {
1449 let fqcn = entry.key().as_ref();
1450 codebase
1451 .symbol_to_file
1452 .get(fqcn)
1453 .map(|f| f.as_ref().to_string())
1454 };
1455 let Some(file) = defining else { continue };
1456
1457 let cls = entry.value();
1458 if let Some(ref parent) = cls.parent {
1459 add_edge(parent.as_ref(), &file);
1460 }
1461 for iface in &cls.interfaces {
1462 add_edge(iface.as_ref(), &file);
1463 }
1464 for tr in &cls.traits {
1465 add_edge(tr.as_ref(), &file);
1466 }
1467 }
1468
1469 reverse
1470}
1471
1472fn extract_reference_locations(codebase: &Codebase, file: &Arc<str>) -> Vec<(String, u32, u32)> {
1477 let Some(symbol_keys) = codebase.file_symbol_references.get(file.as_ref()) else {
1478 return Vec::new();
1479 };
1480 let mut out = Vec::new();
1481 for key in symbol_keys.iter() {
1482 let Some(by_file) = codebase.symbol_reference_locations.get(key.as_ref()) else {
1483 continue;
1484 };
1485 let Some(spans) = by_file.get(file.as_ref()) else {
1486 continue;
1487 };
1488 for &(s, e) in spans.iter() {
1489 out.push((key.to_string(), s, e));
1490 }
1491 }
1492 out
1493}
1494
1495pub struct AnalysisResult {
1498 pub issues: Vec<Issue>,
1499 pub type_envs: std::collections::HashMap<crate::type_env::ScopeId, crate::type_env::TypeEnv>,
1500 pub symbols: Vec<crate::symbol::ResolvedSymbol>,
1502}
1503
1504impl AnalysisResult {
1505 pub fn error_count(&self) -> usize {
1506 self.issues
1507 .iter()
1508 .filter(|i| i.severity == mir_issues::Severity::Error)
1509 .count()
1510 }
1511
1512 pub fn warning_count(&self) -> usize {
1513 self.issues
1514 .iter()
1515 .filter(|i| i.severity == mir_issues::Severity::Warning)
1516 .count()
1517 }
1518
1519 pub fn issues_by_file(&self) -> HashMap<std::sync::Arc<str>, Vec<&Issue>> {
1525 let mut map: HashMap<std::sync::Arc<str>, Vec<&Issue>> = HashMap::new();
1526 for issue in &self.issues {
1527 map.entry(issue.location.file.clone())
1528 .or_default()
1529 .push(issue);
1530 }
1531 map
1532 }
1533
1534 pub fn symbol_at(
1543 &self,
1544 file: &str,
1545 byte_offset: u32,
1546 ) -> Option<&crate::symbol::ResolvedSymbol> {
1547 self.symbols
1548 .iter()
1549 .filter(|s| {
1550 s.file.as_ref() == file && s.span.start <= byte_offset && byte_offset < s.span.end
1551 })
1552 .min_by_key(|s| s.span.end - s.span.start)
1553 }
1554}