Skip to main content

mir_analyzer/
lib.rs

1use rustc_hash::FxHashMap;
2
3pub(crate) mod analyzer_db;
4pub(crate) mod attributes;
5pub mod batch;
6pub(crate) mod body_analysis;
7#[doc(hidden)]
8pub mod cache;
9pub(crate) mod call;
10pub(crate) mod class;
11pub(crate) mod collector;
12#[doc(hidden)]
13pub mod db;
14pub(crate) mod dead_code;
15pub(crate) mod diagnostics;
16pub(crate) mod expr;
17pub mod file_analyzer;
18pub(crate) mod flow_state;
19pub(crate) mod generic;
20pub mod indexing;
21#[doc(hidden)]
22pub mod metrics;
23pub(crate) mod narrowing;
24#[doc(hidden)]
25pub mod parse_cache;
26#[doc(hidden)]
27pub mod parser;
28pub mod php_version;
29pub mod prelude;
30pub mod session;
31pub mod source_provider;
32pub(crate) mod stmt;
33#[doc(hidden)]
34pub mod stub_cache;
35#[doc(hidden)]
36pub mod stubs;
37pub(crate) mod subtype;
38pub mod suppression;
39pub(crate) mod taint;
40pub(crate) mod type_env;
41
42pub use batch::{
43    analyze_source, dead_code_issue_kinds, discover_files, AnalysisResult, BatchOptions,
44};
45pub use file_analyzer::{BatchFileAnalyzer, FileAnalysis, FileAnalyzer, ParsedFile};
46pub use indexing::{IndexBatchOutcome, IndexCancel, IndexParallelism};
47pub use parser::type_from_hint::type_from_hint;
48pub use parser::{DocblockParser, ParsedDocblock};
49pub use php_version::{ParsePhpVersionError, PhpVersion};
50pub use session::AnalysisSession;
51pub use source_provider::{FsSourceProvider, SourceProvider};
52pub use stubs::{
53    is_builtin_function, stub_files, stub_path_for_class, ChainedClassResolver, StubClassResolver,
54    StubVfs,
55};
56
57// ============================================================================
58// Analysis entry points
59// ============================================================================
60//
61// `AnalysisSession` is the single analysis engine. It supports two usage modes:
62//
63// - Batch (CLI, CI, bulk analysis): use `analyze_paths` / `BatchOptions` to
64//   run definition collection and body analysis over many files in parallel.
65//
66// - Incremental (LSP, watch mode): ingest files as they change; per-file
67//   results come from `FileAnalyzer::analyze`. Builder-style configuration
68//   (`with_cache`, `with_psr4`, …).
69//
70// The two phases of analysis are:
71//   1. Definition collection — discovers classes, functions, constants in a
72//      file and registers them in the salsa database.
73//   2. Body analysis (`BodyAnalyzer`) — walks function/method bodies,
74//      inferring types and emitting issues.
75
76/// A position in source code: 1-based line, 0-based codepoint column.
77#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)]
78pub struct Position {
79    pub line: u32,
80    pub column: u32,
81}
82
83/// A range in source code: start and end positions.
84#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
85pub struct Range {
86    pub start: Position,
87    pub end: Position,
88}
89
90/// A semantic identifier for a code entity that the analyzer can resolve.
91///
92/// Replaces the previous stringly-typed `&str` keys. Method names are
93/// normalized (lowercased) at construction since PHP method dispatch is
94/// case-insensitive — this prevents a class of correctness bugs where
95/// callers pass mixed-case names and get empty results.
96#[derive(Debug, Clone, PartialEq, Eq, Hash)]
97pub enum Name {
98    /// A class, interface, trait, or enum (FQCN).
99    Class(std::sync::Arc<str>),
100    /// A global function (FQN).
101    Function(std::sync::Arc<str>),
102    /// An instance or static method.
103    Method {
104        class: std::sync::Arc<str>,
105        name: std::sync::Arc<str>,
106    },
107    /// A class property.
108    Property {
109        class: std::sync::Arc<str>,
110        name: std::sync::Arc<str>,
111    },
112    /// A class / interface / enum constant.
113    ClassConstant {
114        class: std::sync::Arc<str>,
115        name: std::sync::Arc<str>,
116    },
117    /// A global constant.
118    GlobalConstant(std::sync::Arc<str>),
119}
120
121impl Name {
122    /// Construct a method symbol. Normalizes `name` to lowercase since PHP
123    /// methods are case-insensitive.
124    pub fn method(class: impl Into<std::sync::Arc<str>>, name: &str) -> Self {
125        Name::Method {
126            class: class.into(),
127            name: std::sync::Arc::from(name.to_ascii_lowercase()),
128        }
129    }
130
131    /// Construct a class symbol.
132    pub fn class(fqcn: impl Into<std::sync::Arc<str>>) -> Self {
133        Name::Class(fqcn.into())
134    }
135
136    /// Construct a function symbol.
137    pub fn function(fqn: impl Into<std::sync::Arc<str>>) -> Self {
138        Name::Function(fqn.into())
139    }
140
141    /// Construct a property symbol.
142    pub fn property(
143        class: impl Into<std::sync::Arc<str>>,
144        name: impl Into<std::sync::Arc<str>>,
145    ) -> Self {
146        Name::Property {
147            class: class.into(),
148            name: name.into(),
149        }
150    }
151
152    /// Construct a class constant symbol.
153    pub fn class_constant(
154        class: impl Into<std::sync::Arc<str>>,
155        name: impl Into<std::sync::Arc<str>>,
156    ) -> Self {
157        Name::ClassConstant {
158            class: class.into(),
159            name: name.into(),
160        }
161    }
162
163    /// Construct a global constant symbol.
164    pub fn global_constant(fqn: impl Into<std::sync::Arc<str>>) -> Self {
165        Name::GlobalConstant(fqn.into())
166    }
167
168    /// The codebase lookup key for this symbol (used internally for the
169    /// reference-locations index). Stable across releases.
170    pub fn codebase_key(&self) -> String {
171        match self {
172            Name::Class(fqcn) => fqcn.to_string(),
173            Name::Function(fqn) => fqn.to_string(),
174            Name::Method { class, name } => format!("{class}::{name}"),
175            Name::Property { class, name } => format!("{class}::{name}"),
176            Name::ClassConstant { class, name } => format!("{class}::{name}"),
177            Name::GlobalConstant(fqn) => fqn.to_string(),
178        }
179    }
180}
181
182/// Reason a symbol lookup did not return a location.
183#[derive(Debug, Clone, PartialEq, Eq)]
184pub enum SymbolLookupError {
185    /// No such symbol exists in the codebase.
186    NotFound,
187    /// The symbol exists but has no recorded source location (e.g. a
188    /// stub-only declaration without a span).
189    NoSourceLocation,
190}
191
192/// Outcome of a [`AnalysisSession::load_class`] attempt.
193#[derive(Debug, Clone, Copy, PartialEq, Eq)]
194pub enum LoadOutcome {
195    /// The symbol was already present in the session; no work performed.
196    AlreadyLoaded,
197    /// The symbol was resolved by the configured [`ClassResolver`] and the
198    /// defining file was ingested.
199    Loaded,
200    /// No resolver is configured, the resolver could not map the FQCN to a
201    /// file, or the resolved file could not be read / did not define the
202    /// requested symbol.
203    NotResolvable,
204}
205
206impl LoadOutcome {
207    /// `true` when the symbol is now present in the session (whether it was
208    /// already there or just freshly loaded).
209    pub fn is_loaded(self) -> bool {
210        !matches!(self, LoadOutcome::NotResolvable)
211    }
212}
213
214/// Pluggable strategy for mapping a fully-qualified class name to the file
215/// that should define it. The analyzer never touches `vendor/` or the
216/// filesystem on its own — it asks a `ClassResolver` when a symbol is needed.
217///
218/// `mir_analyzer::Psr4Map` is the built-in implementation for Composer-based
219/// projects. Consumers with non-Composer conventions (WordPress, Drupal, a
220/// custom autoloader, a workspace-walk index) supply their own.
221pub trait ClassResolver: Send + Sync {
222    /// Resolve `fqcn` to the file that defines it. Returning `None` causes
223    /// the analyzer to fall back to emitting `UndefinedClass`.
224    fn resolve(&self, fqcn: &str) -> Option<std::path::PathBuf>;
225}
226
227impl ClassResolver for composer::Psr4Map {
228    fn resolve(&self, fqcn: &str) -> Option<std::path::PathBuf> {
229        composer::Psr4Map::resolve(self, fqcn)
230    }
231}
232
233impl std::fmt::Display for SymbolLookupError {
234    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
235        match self {
236            SymbolLookupError::NotFound => write!(f, "symbol not found"),
237            SymbolLookupError::NoSourceLocation => write!(f, "symbol has no source location"),
238        }
239    }
240}
241
242impl std::error::Error for SymbolLookupError {}
243
244/// Hover information for a symbol at a source location.
245/// Includes the inferred type, optional docstring, and location of definition.
246#[derive(Debug, Clone)]
247pub struct HoverInfo {
248    /// Inferred type of the symbol.
249    pub ty: Type,
250    /// Docstring / documentation comment for the symbol (if available).
251    pub docstring: Option<String>,
252    /// Source location of the symbol's definition.
253    pub definition: Option<mir_types::Location>,
254}
255
256/// File dependency graph: tracks which files depend on which other files.
257/// Used for incremental invalidation in LSP servers and build systems.
258#[derive(Debug, Clone)]
259pub struct DependencyGraph {
260    /// Direct dependencies: file → [files it depends on]
261    dependencies: FxHashMap<String, Vec<String>>,
262    /// Reverse dependencies: file → [files that depend on it]
263    dependents: FxHashMap<String, Vec<String>>,
264}
265
266impl DependencyGraph {
267    /// Files that `file` directly depends on (imports, parent classes, interfaces, traits).
268    pub fn dependencies_of(&self, file: &str) -> &[String] {
269        self.dependencies
270            .get(file)
271            .map(|v| v.as_slice())
272            .unwrap_or(&[])
273    }
274
275    /// Files that directly depend on `file` (reverse edge).
276    pub fn dependents_of(&self, file: &str) -> &[String] {
277        self.dependents
278            .get(file)
279            .map(|v| v.as_slice())
280            .unwrap_or(&[])
281    }
282
283    /// All files transitively depended upon by `file` (including indirect).
284    pub fn transitive_dependencies(&self, file: &str) -> Vec<String> {
285        let mut visited = rustc_hash::FxHashSet::default();
286        let mut queue = vec![file.to_string()];
287        let mut result = Vec::new();
288
289        while let Some(current) = queue.pop() {
290            if !visited.insert(current.clone()) {
291                continue;
292            }
293            for dep in self.dependencies_of(&current) {
294                if !visited.contains(dep) {
295                    queue.push(dep.clone());
296                    result.push(dep.clone());
297                }
298            }
299        }
300        result
301    }
302
303    /// All files that transitively depend on `file` (reverse transitive).
304    pub fn transitive_dependents(&self, file: &str) -> Vec<String> {
305        let mut visited = rustc_hash::FxHashSet::default();
306        let mut queue = vec![file.to_string()];
307        let mut result = Vec::new();
308
309        while let Some(current) = queue.pop() {
310            if !visited.insert(current.clone()) {
311                continue;
312            }
313            for dep in self.dependents_of(&current) {
314                if !visited.contains(dep) {
315                    queue.push(dep.clone());
316                    result.push(dep.clone());
317                }
318            }
319        }
320        result
321    }
322}
323
324pub mod symbol;
325pub use mir_codebase::storage::{FnParam, TemplateParam, Visibility};
326pub use mir_issues::{Issue, IssueKind, Severity};
327pub use mir_types::Type;
328
329/// Convert a parser [`php_ast::Span`] (byte-offset range) into a
330/// [`mir_types::Location`] (file path + 1-based line range +
331/// 0-based codepoint columns) using `source` and the parser's `source_map`.
332///
333/// This is the canonical way for consumers to translate body-analysis result spans
334/// (e.g. [`crate::symbol::ResolvedSymbol::span`]) into source locations they
335/// can hand to their own protocol layer. Consumers that need different
336/// position semantics (LSP UTF-16 code units, byte offsets, etc.) translate
337/// from this `Location` rather than re-implementing the column math.
338pub fn location_from_span(
339    span: php_ast::Span,
340    file: std::sync::Arc<str>,
341    source: &str,
342    source_map: &php_rs_parser::source_map::SourceMap,
343) -> mir_types::Location {
344    let (line, col_start) = diagnostics::offset_to_line_col(source, span.start, source_map);
345    let (line_end, col_end) = if span.start < span.end {
346        diagnostics::offset_to_line_col(source, span.end, source_map)
347    } else {
348        (line, col_start)
349    };
350    mir_types::Location {
351        file,
352        line,
353        line_end,
354        col_start,
355        col_end: col_end.max(col_start.saturating_add(1)),
356    }
357}
358pub use symbol::{DeclarationKind, DocumentSymbol, ReferenceKind, ResolvedSymbol};
359
360pub mod composer;
361pub use composer::{ComposerError, Psr4Map};
362pub use type_env::ScopeId;
363
364#[doc(hidden)]
365pub mod test_utils;