Skip to main content

mir_analyzer/
lib.rs

1use rustc_hash::FxHashMap;
2
3pub(crate) mod analyzer_db;
4pub mod batch;
5pub(crate) mod body_analysis;
6#[doc(hidden)]
7pub mod cache;
8pub(crate) mod call;
9pub(crate) mod class;
10pub(crate) mod collector;
11#[doc(hidden)]
12pub mod db;
13pub(crate) mod dead_code;
14pub(crate) mod diagnostics;
15pub(crate) mod expr;
16pub mod file_analyzer;
17pub(crate) mod flow_state;
18pub(crate) mod generic;
19#[doc(hidden)]
20pub mod metrics;
21pub(crate) mod narrowing;
22#[doc(hidden)]
23pub mod parser;
24pub mod php_version;
25pub mod prelude;
26pub mod session;
27pub mod source_provider;
28pub(crate) mod stmt;
29#[doc(hidden)]
30pub mod stub_cache;
31#[doc(hidden)]
32pub mod stubs;
33pub(crate) mod subtype;
34pub mod suppression;
35pub(crate) mod taint;
36pub(crate) mod type_env;
37
38pub use batch::{
39    analyze_source, dead_code_issue_kinds, discover_files, AnalysisResult, BatchOptions,
40};
41pub use file_analyzer::{BatchFileAnalyzer, FileAnalysis, FileAnalyzer, ParsedFile};
42pub use parser::type_from_hint::type_from_hint;
43pub use parser::{DocblockParser, ParsedDocblock};
44pub use php_version::{ParsePhpVersionError, PhpVersion};
45pub use session::AnalysisSession;
46pub use source_provider::{FsSourceProvider, SourceProvider};
47pub use stubs::{
48    is_builtin_function, stub_files, stub_path_for_class, ChainedClassResolver, StubClassResolver,
49    StubVfs,
50};
51
52// ============================================================================
53// Analysis entry points
54// ============================================================================
55//
56// `AnalysisSession` is the single analysis engine. It supports two usage modes:
57//
58// - Batch (CLI, CI, bulk analysis): use `analyze_paths` / `BatchOptions` to
59//   run definition collection and body analysis over many files in parallel.
60//
61// - Incremental (LSP, watch mode): ingest files as they change; per-file
62//   results come from `FileAnalyzer::analyze`. Builder-style configuration
63//   (`with_cache`, `with_psr4`, …).
64//
65// The two phases of analysis are:
66//   1. Definition collection — discovers classes, functions, constants in a
67//      file and registers them in the salsa database.
68//   2. Body analysis (`BodyAnalyzer`) — walks function/method bodies,
69//      inferring types and emitting issues.
70
71/// A position in source code: 1-based line, 0-based codepoint column.
72#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)]
73pub struct Position {
74    pub line: u32,
75    pub column: u32,
76}
77
78/// A range in source code: start and end positions.
79#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
80pub struct Range {
81    pub start: Position,
82    pub end: Position,
83}
84
85/// A semantic identifier for a code entity that the analyzer can resolve.
86///
87/// Replaces the previous stringly-typed `&str` keys. Method names are
88/// normalized (lowercased) at construction since PHP method dispatch is
89/// case-insensitive — this prevents a class of correctness bugs where
90/// callers pass mixed-case names and get empty results.
91#[derive(Debug, Clone, PartialEq, Eq, Hash)]
92pub enum Name {
93    /// A class, interface, trait, or enum (FQCN).
94    Class(std::sync::Arc<str>),
95    /// A global function (FQN).
96    Function(std::sync::Arc<str>),
97    /// An instance or static method.
98    Method {
99        class: std::sync::Arc<str>,
100        name: std::sync::Arc<str>,
101    },
102    /// A class property.
103    Property {
104        class: std::sync::Arc<str>,
105        name: std::sync::Arc<str>,
106    },
107    /// A class / interface / enum constant.
108    ClassConstant {
109        class: std::sync::Arc<str>,
110        name: std::sync::Arc<str>,
111    },
112    /// A global constant.
113    GlobalConstant(std::sync::Arc<str>),
114}
115
116impl Name {
117    /// Construct a method symbol. Normalizes `name` to lowercase since PHP
118    /// methods are case-insensitive.
119    pub fn method(class: impl Into<std::sync::Arc<str>>, name: &str) -> Self {
120        Name::Method {
121            class: class.into(),
122            name: std::sync::Arc::from(name.to_ascii_lowercase()),
123        }
124    }
125
126    /// Construct a class symbol.
127    pub fn class(fqcn: impl Into<std::sync::Arc<str>>) -> Self {
128        Name::Class(fqcn.into())
129    }
130
131    /// Construct a function symbol.
132    pub fn function(fqn: impl Into<std::sync::Arc<str>>) -> Self {
133        Name::Function(fqn.into())
134    }
135
136    /// Construct a property symbol.
137    pub fn property(
138        class: impl Into<std::sync::Arc<str>>,
139        name: impl Into<std::sync::Arc<str>>,
140    ) -> Self {
141        Name::Property {
142            class: class.into(),
143            name: name.into(),
144        }
145    }
146
147    /// Construct a class constant symbol.
148    pub fn class_constant(
149        class: impl Into<std::sync::Arc<str>>,
150        name: impl Into<std::sync::Arc<str>>,
151    ) -> Self {
152        Name::ClassConstant {
153            class: class.into(),
154            name: name.into(),
155        }
156    }
157
158    /// Construct a global constant symbol.
159    pub fn global_constant(fqn: impl Into<std::sync::Arc<str>>) -> Self {
160        Name::GlobalConstant(fqn.into())
161    }
162
163    /// The codebase lookup key for this symbol (used internally for the
164    /// reference-locations index). Stable across releases.
165    pub fn codebase_key(&self) -> String {
166        match self {
167            Name::Class(fqcn) => fqcn.to_string(),
168            Name::Function(fqn) => fqn.to_string(),
169            Name::Method { class, name } => format!("{class}::{name}"),
170            Name::Property { class, name } => format!("{class}::{name}"),
171            Name::ClassConstant { class, name } => format!("{class}::{name}"),
172            Name::GlobalConstant(fqn) => fqn.to_string(),
173        }
174    }
175}
176
177/// Reason a symbol lookup did not return a location.
178#[derive(Debug, Clone, PartialEq, Eq)]
179pub enum SymbolLookupError {
180    /// No such symbol exists in the codebase.
181    NotFound,
182    /// The symbol exists but has no recorded source location (e.g. a
183    /// stub-only declaration without a span).
184    NoSourceLocation,
185}
186
187/// Outcome of a [`AnalysisSession::load_class`] attempt.
188#[derive(Debug, Clone, Copy, PartialEq, Eq)]
189pub enum LoadOutcome {
190    /// The symbol was already present in the session; no work performed.
191    AlreadyLoaded,
192    /// The symbol was resolved by the configured [`ClassResolver`] and the
193    /// defining file was ingested.
194    Loaded,
195    /// No resolver is configured, the resolver could not map the FQCN to a
196    /// file, or the resolved file could not be read / did not define the
197    /// requested symbol.
198    NotResolvable,
199}
200
201impl LoadOutcome {
202    /// `true` when the symbol is now present in the session (whether it was
203    /// already there or just freshly loaded).
204    pub fn is_loaded(self) -> bool {
205        !matches!(self, LoadOutcome::NotResolvable)
206    }
207}
208
209/// Pluggable strategy for mapping a fully-qualified class name to the file
210/// that should define it. The analyzer never touches `vendor/` or the
211/// filesystem on its own — it asks a `ClassResolver` when a symbol is needed.
212///
213/// `mir_analyzer::Psr4Map` is the built-in implementation for Composer-based
214/// projects. Consumers with non-Composer conventions (WordPress, Drupal, a
215/// custom autoloader, a workspace-walk index) supply their own.
216pub trait ClassResolver: Send + Sync {
217    /// Resolve `fqcn` to the file that defines it. Returning `None` causes
218    /// the analyzer to fall back to emitting `UndefinedClass`.
219    fn resolve(&self, fqcn: &str) -> Option<std::path::PathBuf>;
220}
221
222impl ClassResolver for composer::Psr4Map {
223    fn resolve(&self, fqcn: &str) -> Option<std::path::PathBuf> {
224        composer::Psr4Map::resolve(self, fqcn)
225    }
226}
227
228impl std::fmt::Display for SymbolLookupError {
229    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
230        match self {
231            SymbolLookupError::NotFound => write!(f, "symbol not found"),
232            SymbolLookupError::NoSourceLocation => write!(f, "symbol has no source location"),
233        }
234    }
235}
236
237impl std::error::Error for SymbolLookupError {}
238
239/// Hover information for a symbol at a source location.
240/// Includes the inferred type, optional docstring, and location of definition.
241#[derive(Debug, Clone)]
242pub struct HoverInfo {
243    /// Inferred type of the symbol.
244    pub ty: Type,
245    /// Docstring / documentation comment for the symbol (if available).
246    pub docstring: Option<String>,
247    /// Source location of the symbol's definition.
248    pub definition: Option<mir_types::Location>,
249}
250
251/// File dependency graph: tracks which files depend on which other files.
252/// Used for incremental invalidation in LSP servers and build systems.
253#[derive(Debug, Clone)]
254pub struct DependencyGraph {
255    /// Direct dependencies: file → [files it depends on]
256    dependencies: FxHashMap<String, Vec<String>>,
257    /// Reverse dependencies: file → [files that depend on it]
258    dependents: FxHashMap<String, Vec<String>>,
259}
260
261impl DependencyGraph {
262    /// Files that `file` directly depends on (imports, parent classes, interfaces, traits).
263    pub fn dependencies_of(&self, file: &str) -> &[String] {
264        self.dependencies
265            .get(file)
266            .map(|v| v.as_slice())
267            .unwrap_or(&[])
268    }
269
270    /// Files that directly depend on `file` (reverse edge).
271    pub fn dependents_of(&self, file: &str) -> &[String] {
272        self.dependents
273            .get(file)
274            .map(|v| v.as_slice())
275            .unwrap_or(&[])
276    }
277
278    /// All files transitively depended upon by `file` (including indirect).
279    pub fn transitive_dependencies(&self, file: &str) -> Vec<String> {
280        let mut visited = rustc_hash::FxHashSet::default();
281        let mut queue = vec![file.to_string()];
282        let mut result = Vec::new();
283
284        while let Some(current) = queue.pop() {
285            if !visited.insert(current.clone()) {
286                continue;
287            }
288            for dep in self.dependencies_of(&current) {
289                if !visited.contains(dep) {
290                    queue.push(dep.clone());
291                    result.push(dep.clone());
292                }
293            }
294        }
295        result
296    }
297
298    /// All files that transitively depend on `file` (reverse transitive).
299    pub fn transitive_dependents(&self, file: &str) -> Vec<String> {
300        let mut visited = rustc_hash::FxHashSet::default();
301        let mut queue = vec![file.to_string()];
302        let mut result = Vec::new();
303
304        while let Some(current) = queue.pop() {
305            if !visited.insert(current.clone()) {
306                continue;
307            }
308            for dep in self.dependents_of(&current) {
309                if !visited.contains(dep) {
310                    queue.push(dep.clone());
311                    result.push(dep.clone());
312                }
313            }
314        }
315        result
316    }
317}
318
319pub mod symbol;
320pub use mir_codebase::storage::{FnParam, TemplateParam, Visibility};
321pub use mir_issues::{Issue, IssueKind, Severity};
322pub use mir_types::Type;
323
324/// Convert a parser [`php_ast::Span`] (byte-offset range) into a
325/// [`mir_types::Location`] (file path + 1-based line range +
326/// 0-based codepoint columns) using `source` and the parser's `source_map`.
327///
328/// This is the canonical way for consumers to translate body-analysis result spans
329/// (e.g. [`crate::symbol::ResolvedSymbol::span`]) into source locations they
330/// can hand to their own protocol layer. Consumers that need different
331/// position semantics (LSP UTF-16 code units, byte offsets, etc.) translate
332/// from this `Location` rather than re-implementing the column math.
333pub fn location_from_span(
334    span: php_ast::Span,
335    file: std::sync::Arc<str>,
336    source: &str,
337    source_map: &php_rs_parser::source_map::SourceMap,
338) -> mir_types::Location {
339    let (line, col_start) = diagnostics::offset_to_line_col(source, span.start, source_map);
340    let (line_end, col_end) = if span.start < span.end {
341        diagnostics::offset_to_line_col(source, span.end, source_map)
342    } else {
343        (line, col_start)
344    };
345    mir_types::Location {
346        file,
347        line,
348        line_end,
349        col_start,
350        col_end: col_end.max(col_start.saturating_add(1)),
351    }
352}
353pub use symbol::{DeclarationKind, DocumentSymbol, ReferenceKind, ResolvedSymbol};
354
355pub mod composer;
356pub use composer::{ComposerError, Psr4Map};
357pub use type_env::ScopeId;
358
359#[doc(hidden)]
360pub mod test_utils;