Skip to main content

mir_analyzer/
lib.rs

1use rustc_hash::FxHashMap;
2
3pub(crate) mod analyzer_db;
4pub mod batch;
5pub(crate) mod body_analysis;
6#[doc(hidden)]
7pub mod cache;
8pub(crate) mod call;
9pub(crate) mod class;
10pub(crate) mod collector;
11#[doc(hidden)]
12pub mod db;
13pub(crate) mod dead_code;
14pub(crate) mod diagnostics;
15pub(crate) mod expr;
16pub mod file_analyzer;
17pub(crate) mod flow_state;
18pub(crate) mod generic;
19#[doc(hidden)]
20pub mod metrics;
21pub(crate) mod narrowing;
22#[doc(hidden)]
23pub mod parser;
24pub mod php_version;
25pub mod prelude;
26pub mod session;
27pub mod source_provider;
28pub(crate) mod stmt;
29#[doc(hidden)]
30pub mod stub_cache;
31#[doc(hidden)]
32pub mod stubs;
33pub(crate) mod subtype;
34pub(crate) mod taint;
35pub(crate) mod type_env;
36
37pub use batch::{
38    analyze_source, dead_code_issue_kinds, discover_files, AnalysisResult, BatchOptions,
39};
40pub use file_analyzer::{BatchFileAnalyzer, FileAnalysis, FileAnalyzer, ParsedFile};
41pub use parser::type_from_hint::type_from_hint;
42pub use parser::{DocblockParser, ParsedDocblock};
43pub use php_version::{ParsePhpVersionError, PhpVersion};
44pub use session::AnalysisSession;
45pub use source_provider::{FsSourceProvider, SourceProvider};
46pub use stubs::{
47    is_builtin_function, stub_files, stub_path_for_class, ChainedClassResolver, StubClassResolver,
48    StubVfs,
49};
50
51// ============================================================================
52// Analysis entry points
53// ============================================================================
54//
55// `AnalysisSession` is the single analysis engine. It supports two usage modes:
56//
57// - Batch (CLI, CI, bulk analysis): use `analyze_paths` / `BatchOptions` to
58//   run definition collection and body analysis over many files in parallel.
59//
60// - Incremental (LSP, watch mode): ingest files as they change; per-file
61//   results come from `FileAnalyzer::analyze`. Builder-style configuration
62//   (`with_cache`, `with_psr4`, …).
63//
64// The two phases of analysis are:
65//   1. Definition collection — discovers classes, functions, constants in a
66//      file and registers them in the salsa database.
67//   2. Body analysis (`BodyAnalyzer`) — walks function/method bodies,
68//      inferring types and emitting issues.
69
70/// A position in source code: 1-based line, 0-based codepoint column.
71#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)]
72pub struct Position {
73    pub line: u32,
74    pub column: u32,
75}
76
77/// A range in source code: start and end positions.
78#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
79pub struct Range {
80    pub start: Position,
81    pub end: Position,
82}
83
84/// A semantic identifier for a code entity that the analyzer can resolve.
85///
86/// Replaces the previous stringly-typed `&str` keys. Method names are
87/// normalized (lowercased) at construction since PHP method dispatch is
88/// case-insensitive — this prevents a class of correctness bugs where
89/// callers pass mixed-case names and get empty results.
90#[derive(Debug, Clone, PartialEq, Eq, Hash)]
91pub enum Name {
92    /// A class, interface, trait, or enum (FQCN).
93    Class(std::sync::Arc<str>),
94    /// A global function (FQN).
95    Function(std::sync::Arc<str>),
96    /// An instance or static method.
97    Method {
98        class: std::sync::Arc<str>,
99        name: std::sync::Arc<str>,
100    },
101    /// A class property.
102    Property {
103        class: std::sync::Arc<str>,
104        name: std::sync::Arc<str>,
105    },
106    /// A class / interface / enum constant.
107    ClassConstant {
108        class: std::sync::Arc<str>,
109        name: std::sync::Arc<str>,
110    },
111    /// A global constant.
112    GlobalConstant(std::sync::Arc<str>),
113}
114
115impl Name {
116    /// Construct a method symbol. Normalizes `name` to lowercase since PHP
117    /// methods are case-insensitive.
118    pub fn method(class: impl Into<std::sync::Arc<str>>, name: &str) -> Self {
119        Name::Method {
120            class: class.into(),
121            name: std::sync::Arc::from(name.to_ascii_lowercase()),
122        }
123    }
124
125    /// Construct a class symbol.
126    pub fn class(fqcn: impl Into<std::sync::Arc<str>>) -> Self {
127        Name::Class(fqcn.into())
128    }
129
130    /// Construct a function symbol.
131    pub fn function(fqn: impl Into<std::sync::Arc<str>>) -> Self {
132        Name::Function(fqn.into())
133    }
134
135    /// Construct a property symbol.
136    pub fn property(
137        class: impl Into<std::sync::Arc<str>>,
138        name: impl Into<std::sync::Arc<str>>,
139    ) -> Self {
140        Name::Property {
141            class: class.into(),
142            name: name.into(),
143        }
144    }
145
146    /// Construct a class constant symbol.
147    pub fn class_constant(
148        class: impl Into<std::sync::Arc<str>>,
149        name: impl Into<std::sync::Arc<str>>,
150    ) -> Self {
151        Name::ClassConstant {
152            class: class.into(),
153            name: name.into(),
154        }
155    }
156
157    /// Construct a global constant symbol.
158    pub fn global_constant(fqn: impl Into<std::sync::Arc<str>>) -> Self {
159        Name::GlobalConstant(fqn.into())
160    }
161
162    /// The codebase lookup key for this symbol (used internally for the
163    /// reference-locations index). Stable across releases.
164    pub fn codebase_key(&self) -> String {
165        match self {
166            Name::Class(fqcn) => fqcn.to_string(),
167            Name::Function(fqn) => fqn.to_string(),
168            Name::Method { class, name } => format!("{class}::{name}"),
169            Name::Property { class, name } => format!("{class}::{name}"),
170            Name::ClassConstant { class, name } => format!("{class}::{name}"),
171            Name::GlobalConstant(fqn) => fqn.to_string(),
172        }
173    }
174}
175
176/// Reason a symbol lookup did not return a location.
177#[derive(Debug, Clone, PartialEq, Eq)]
178pub enum SymbolLookupError {
179    /// No such symbol exists in the codebase.
180    NotFound,
181    /// The symbol exists but has no recorded source location (e.g. a
182    /// stub-only declaration without a span).
183    NoSourceLocation,
184}
185
186/// Outcome of a [`AnalysisSession::load_class`] attempt.
187#[derive(Debug, Clone, Copy, PartialEq, Eq)]
188pub enum LoadOutcome {
189    /// The symbol was already present in the session; no work performed.
190    AlreadyLoaded,
191    /// The symbol was resolved by the configured [`ClassResolver`] and the
192    /// defining file was ingested.
193    Loaded,
194    /// No resolver is configured, the resolver could not map the FQCN to a
195    /// file, or the resolved file could not be read / did not define the
196    /// requested symbol.
197    NotResolvable,
198}
199
200impl LoadOutcome {
201    /// `true` when the symbol is now present in the session (whether it was
202    /// already there or just freshly loaded).
203    pub fn is_loaded(self) -> bool {
204        !matches!(self, LoadOutcome::NotResolvable)
205    }
206}
207
208/// Pluggable strategy for mapping a fully-qualified class name to the file
209/// that should define it. The analyzer never touches `vendor/` or the
210/// filesystem on its own — it asks a `ClassResolver` when a symbol is needed.
211///
212/// `mir_analyzer::Psr4Map` is the built-in implementation for Composer-based
213/// projects. Consumers with non-Composer conventions (WordPress, Drupal, a
214/// custom autoloader, a workspace-walk index) supply their own.
215pub trait ClassResolver: Send + Sync {
216    /// Resolve `fqcn` to the file that defines it. Returning `None` causes
217    /// the analyzer to fall back to emitting `UndefinedClass`.
218    fn resolve(&self, fqcn: &str) -> Option<std::path::PathBuf>;
219}
220
221impl ClassResolver for composer::Psr4Map {
222    fn resolve(&self, fqcn: &str) -> Option<std::path::PathBuf> {
223        composer::Psr4Map::resolve(self, fqcn)
224    }
225}
226
227impl std::fmt::Display for SymbolLookupError {
228    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
229        match self {
230            SymbolLookupError::NotFound => write!(f, "symbol not found"),
231            SymbolLookupError::NoSourceLocation => write!(f, "symbol has no source location"),
232        }
233    }
234}
235
236impl std::error::Error for SymbolLookupError {}
237
238/// Hover information for a symbol at a source location.
239/// Includes the inferred type, optional docstring, and location of definition.
240#[derive(Debug, Clone)]
241pub struct HoverInfo {
242    /// Inferred type of the symbol.
243    pub ty: Type,
244    /// Docstring / documentation comment for the symbol (if available).
245    pub docstring: Option<String>,
246    /// Source location of the symbol's definition.
247    pub definition: Option<mir_types::Location>,
248}
249
250/// File dependency graph: tracks which files depend on which other files.
251/// Used for incremental invalidation in LSP servers and build systems.
252#[derive(Debug, Clone)]
253pub struct DependencyGraph {
254    /// Direct dependencies: file → [files it depends on]
255    dependencies: FxHashMap<String, Vec<String>>,
256    /// Reverse dependencies: file → [files that depend on it]
257    dependents: FxHashMap<String, Vec<String>>,
258}
259
260impl DependencyGraph {
261    /// Files that `file` directly depends on (imports, parent classes, interfaces, traits).
262    pub fn dependencies_of(&self, file: &str) -> &[String] {
263        self.dependencies
264            .get(file)
265            .map(|v| v.as_slice())
266            .unwrap_or(&[])
267    }
268
269    /// Files that directly depend on `file` (reverse edge).
270    pub fn dependents_of(&self, file: &str) -> &[String] {
271        self.dependents
272            .get(file)
273            .map(|v| v.as_slice())
274            .unwrap_or(&[])
275    }
276
277    /// All files transitively depended upon by `file` (including indirect).
278    pub fn transitive_dependencies(&self, file: &str) -> Vec<String> {
279        let mut visited = rustc_hash::FxHashSet::default();
280        let mut queue = vec![file.to_string()];
281        let mut result = Vec::new();
282
283        while let Some(current) = queue.pop() {
284            if !visited.insert(current.clone()) {
285                continue;
286            }
287            for dep in self.dependencies_of(&current) {
288                if !visited.contains(dep) {
289                    queue.push(dep.clone());
290                    result.push(dep.clone());
291                }
292            }
293        }
294        result
295    }
296
297    /// All files that transitively depend on `file` (reverse transitive).
298    pub fn transitive_dependents(&self, file: &str) -> Vec<String> {
299        let mut visited = rustc_hash::FxHashSet::default();
300        let mut queue = vec![file.to_string()];
301        let mut result = Vec::new();
302
303        while let Some(current) = queue.pop() {
304            if !visited.insert(current.clone()) {
305                continue;
306            }
307            for dep in self.dependents_of(&current) {
308                if !visited.contains(dep) {
309                    queue.push(dep.clone());
310                    result.push(dep.clone());
311                }
312            }
313        }
314        result
315    }
316}
317
318pub mod symbol;
319pub use mir_codebase::storage::{FnParam, TemplateParam, Visibility};
320pub use mir_issues::{Issue, IssueKind, Severity};
321pub use mir_types::Type;
322
323/// Convert a parser [`php_ast::Span`] (byte-offset range) into a
324/// [`mir_types::Location`] (file path + 1-based line range +
325/// 0-based codepoint columns) using `source` and the parser's `source_map`.
326///
327/// This is the canonical way for consumers to translate body-analysis result spans
328/// (e.g. [`crate::symbol::ResolvedSymbol::span`]) into source locations they
329/// can hand to their own protocol layer. Consumers that need different
330/// position semantics (LSP UTF-16 code units, byte offsets, etc.) translate
331/// from this `Location` rather than re-implementing the column math.
332pub fn location_from_span(
333    span: php_ast::Span,
334    file: std::sync::Arc<str>,
335    source: &str,
336    source_map: &php_rs_parser::source_map::SourceMap,
337) -> mir_types::Location {
338    let (line, col_start) = diagnostics::offset_to_line_col(source, span.start, source_map);
339    let (line_end, col_end) = if span.start < span.end {
340        diagnostics::offset_to_line_col(source, span.end, source_map)
341    } else {
342        (line, col_start)
343    };
344    mir_types::Location {
345        file,
346        line,
347        line_end,
348        col_start,
349        col_end: col_end.max(col_start.saturating_add(1)),
350    }
351}
352pub use symbol::{DeclarationKind, DocumentSymbol, ReferenceKind, ResolvedSymbol};
353
354pub mod composer;
355pub use composer::{ComposerError, Psr4Map};
356pub use type_env::ScopeId;
357
358#[doc(hidden)]
359pub mod test_utils;