Skip to main content

mir_analyzer/
lib.rs

1use rustc_hash::FxHashMap;
2
3pub(crate) mod analyzer_db;
4pub mod batch;
5pub(crate) mod body_analysis;
6#[doc(hidden)]
7pub mod cache;
8pub(crate) mod call;
9pub(crate) mod class;
10pub(crate) mod collector;
11#[doc(hidden)]
12pub mod db;
13pub(crate) mod dead_code;
14pub(crate) mod diagnostics;
15pub(crate) mod expr;
16pub mod file_analyzer;
17pub(crate) mod flow_state;
18pub(crate) mod generic;
19pub mod indexing;
20#[doc(hidden)]
21pub mod metrics;
22pub(crate) mod narrowing;
23#[doc(hidden)]
24pub mod parse_cache;
25#[doc(hidden)]
26pub mod parser;
27pub mod php_version;
28pub mod prelude;
29pub mod session;
30pub mod source_provider;
31pub(crate) mod stmt;
32#[doc(hidden)]
33pub mod stub_cache;
34#[doc(hidden)]
35pub mod stubs;
36pub(crate) mod subtype;
37pub mod suppression;
38pub(crate) mod taint;
39pub(crate) mod type_env;
40
41pub use batch::{
42    analyze_source, dead_code_issue_kinds, discover_files, AnalysisResult, BatchOptions,
43};
44pub use file_analyzer::{BatchFileAnalyzer, FileAnalysis, FileAnalyzer, ParsedFile};
45pub use indexing::{IndexBatchOutcome, IndexCancel, IndexParallelism};
46pub use parser::type_from_hint::type_from_hint;
47pub use parser::{DocblockParser, ParsedDocblock};
48pub use php_version::{ParsePhpVersionError, PhpVersion};
49pub use session::AnalysisSession;
50pub use source_provider::{FsSourceProvider, SourceProvider};
51pub use stubs::{
52    is_builtin_function, stub_files, stub_path_for_class, ChainedClassResolver, StubClassResolver,
53    StubVfs,
54};
55
56// ============================================================================
57// Analysis entry points
58// ============================================================================
59//
60// `AnalysisSession` is the single analysis engine. It supports two usage modes:
61//
62// - Batch (CLI, CI, bulk analysis): use `analyze_paths` / `BatchOptions` to
63//   run definition collection and body analysis over many files in parallel.
64//
65// - Incremental (LSP, watch mode): ingest files as they change; per-file
66//   results come from `FileAnalyzer::analyze`. Builder-style configuration
67//   (`with_cache`, `with_psr4`, …).
68//
69// The two phases of analysis are:
70//   1. Definition collection — discovers classes, functions, constants in a
71//      file and registers them in the salsa database.
72//   2. Body analysis (`BodyAnalyzer`) — walks function/method bodies,
73//      inferring types and emitting issues.
74
75/// A position in source code: 1-based line, 0-based codepoint column.
76#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)]
77pub struct Position {
78    pub line: u32,
79    pub column: u32,
80}
81
82/// A range in source code: start and end positions.
83#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
84pub struct Range {
85    pub start: Position,
86    pub end: Position,
87}
88
89/// A semantic identifier for a code entity that the analyzer can resolve.
90///
91/// Replaces the previous stringly-typed `&str` keys. Method names are
92/// normalized (lowercased) at construction since PHP method dispatch is
93/// case-insensitive — this prevents a class of correctness bugs where
94/// callers pass mixed-case names and get empty results.
95#[derive(Debug, Clone, PartialEq, Eq, Hash)]
96pub enum Name {
97    /// A class, interface, trait, or enum (FQCN).
98    Class(std::sync::Arc<str>),
99    /// A global function (FQN).
100    Function(std::sync::Arc<str>),
101    /// An instance or static method.
102    Method {
103        class: std::sync::Arc<str>,
104        name: std::sync::Arc<str>,
105    },
106    /// A class property.
107    Property {
108        class: std::sync::Arc<str>,
109        name: std::sync::Arc<str>,
110    },
111    /// A class / interface / enum constant.
112    ClassConstant {
113        class: std::sync::Arc<str>,
114        name: std::sync::Arc<str>,
115    },
116    /// A global constant.
117    GlobalConstant(std::sync::Arc<str>),
118}
119
120impl Name {
121    /// Construct a method symbol. Normalizes `name` to lowercase since PHP
122    /// methods are case-insensitive.
123    pub fn method(class: impl Into<std::sync::Arc<str>>, name: &str) -> Self {
124        Name::Method {
125            class: class.into(),
126            name: std::sync::Arc::from(name.to_ascii_lowercase()),
127        }
128    }
129
130    /// Construct a class symbol.
131    pub fn class(fqcn: impl Into<std::sync::Arc<str>>) -> Self {
132        Name::Class(fqcn.into())
133    }
134
135    /// Construct a function symbol.
136    pub fn function(fqn: impl Into<std::sync::Arc<str>>) -> Self {
137        Name::Function(fqn.into())
138    }
139
140    /// Construct a property symbol.
141    pub fn property(
142        class: impl Into<std::sync::Arc<str>>,
143        name: impl Into<std::sync::Arc<str>>,
144    ) -> Self {
145        Name::Property {
146            class: class.into(),
147            name: name.into(),
148        }
149    }
150
151    /// Construct a class constant symbol.
152    pub fn class_constant(
153        class: impl Into<std::sync::Arc<str>>,
154        name: impl Into<std::sync::Arc<str>>,
155    ) -> Self {
156        Name::ClassConstant {
157            class: class.into(),
158            name: name.into(),
159        }
160    }
161
162    /// Construct a global constant symbol.
163    pub fn global_constant(fqn: impl Into<std::sync::Arc<str>>) -> Self {
164        Name::GlobalConstant(fqn.into())
165    }
166
167    /// The codebase lookup key for this symbol (used internally for the
168    /// reference-locations index). Stable across releases.
169    pub fn codebase_key(&self) -> String {
170        match self {
171            Name::Class(fqcn) => fqcn.to_string(),
172            Name::Function(fqn) => fqn.to_string(),
173            Name::Method { class, name } => format!("{class}::{name}"),
174            Name::Property { class, name } => format!("{class}::{name}"),
175            Name::ClassConstant { class, name } => format!("{class}::{name}"),
176            Name::GlobalConstant(fqn) => fqn.to_string(),
177        }
178    }
179}
180
181/// Reason a symbol lookup did not return a location.
182#[derive(Debug, Clone, PartialEq, Eq)]
183pub enum SymbolLookupError {
184    /// No such symbol exists in the codebase.
185    NotFound,
186    /// The symbol exists but has no recorded source location (e.g. a
187    /// stub-only declaration without a span).
188    NoSourceLocation,
189}
190
191/// Outcome of a [`AnalysisSession::load_class`] attempt.
192#[derive(Debug, Clone, Copy, PartialEq, Eq)]
193pub enum LoadOutcome {
194    /// The symbol was already present in the session; no work performed.
195    AlreadyLoaded,
196    /// The symbol was resolved by the configured [`ClassResolver`] and the
197    /// defining file was ingested.
198    Loaded,
199    /// No resolver is configured, the resolver could not map the FQCN to a
200    /// file, or the resolved file could not be read / did not define the
201    /// requested symbol.
202    NotResolvable,
203}
204
205impl LoadOutcome {
206    /// `true` when the symbol is now present in the session (whether it was
207    /// already there or just freshly loaded).
208    pub fn is_loaded(self) -> bool {
209        !matches!(self, LoadOutcome::NotResolvable)
210    }
211}
212
213/// Pluggable strategy for mapping a fully-qualified class name to the file
214/// that should define it. The analyzer never touches `vendor/` or the
215/// filesystem on its own — it asks a `ClassResolver` when a symbol is needed.
216///
217/// `mir_analyzer::Psr4Map` is the built-in implementation for Composer-based
218/// projects. Consumers with non-Composer conventions (WordPress, Drupal, a
219/// custom autoloader, a workspace-walk index) supply their own.
220pub trait ClassResolver: Send + Sync {
221    /// Resolve `fqcn` to the file that defines it. Returning `None` causes
222    /// the analyzer to fall back to emitting `UndefinedClass`.
223    fn resolve(&self, fqcn: &str) -> Option<std::path::PathBuf>;
224}
225
226impl ClassResolver for composer::Psr4Map {
227    fn resolve(&self, fqcn: &str) -> Option<std::path::PathBuf> {
228        composer::Psr4Map::resolve(self, fqcn)
229    }
230}
231
232impl std::fmt::Display for SymbolLookupError {
233    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
234        match self {
235            SymbolLookupError::NotFound => write!(f, "symbol not found"),
236            SymbolLookupError::NoSourceLocation => write!(f, "symbol has no source location"),
237        }
238    }
239}
240
241impl std::error::Error for SymbolLookupError {}
242
243/// Hover information for a symbol at a source location.
244/// Includes the inferred type, optional docstring, and location of definition.
245#[derive(Debug, Clone)]
246pub struct HoverInfo {
247    /// Inferred type of the symbol.
248    pub ty: Type,
249    /// Docstring / documentation comment for the symbol (if available).
250    pub docstring: Option<String>,
251    /// Source location of the symbol's definition.
252    pub definition: Option<mir_types::Location>,
253}
254
255/// File dependency graph: tracks which files depend on which other files.
256/// Used for incremental invalidation in LSP servers and build systems.
257#[derive(Debug, Clone)]
258pub struct DependencyGraph {
259    /// Direct dependencies: file → [files it depends on]
260    dependencies: FxHashMap<String, Vec<String>>,
261    /// Reverse dependencies: file → [files that depend on it]
262    dependents: FxHashMap<String, Vec<String>>,
263}
264
265impl DependencyGraph {
266    /// Files that `file` directly depends on (imports, parent classes, interfaces, traits).
267    pub fn dependencies_of(&self, file: &str) -> &[String] {
268        self.dependencies
269            .get(file)
270            .map(|v| v.as_slice())
271            .unwrap_or(&[])
272    }
273
274    /// Files that directly depend on `file` (reverse edge).
275    pub fn dependents_of(&self, file: &str) -> &[String] {
276        self.dependents
277            .get(file)
278            .map(|v| v.as_slice())
279            .unwrap_or(&[])
280    }
281
282    /// All files transitively depended upon by `file` (including indirect).
283    pub fn transitive_dependencies(&self, file: &str) -> Vec<String> {
284        let mut visited = rustc_hash::FxHashSet::default();
285        let mut queue = vec![file.to_string()];
286        let mut result = Vec::new();
287
288        while let Some(current) = queue.pop() {
289            if !visited.insert(current.clone()) {
290                continue;
291            }
292            for dep in self.dependencies_of(&current) {
293                if !visited.contains(dep) {
294                    queue.push(dep.clone());
295                    result.push(dep.clone());
296                }
297            }
298        }
299        result
300    }
301
302    /// All files that transitively depend on `file` (reverse transitive).
303    pub fn transitive_dependents(&self, file: &str) -> Vec<String> {
304        let mut visited = rustc_hash::FxHashSet::default();
305        let mut queue = vec![file.to_string()];
306        let mut result = Vec::new();
307
308        while let Some(current) = queue.pop() {
309            if !visited.insert(current.clone()) {
310                continue;
311            }
312            for dep in self.dependents_of(&current) {
313                if !visited.contains(dep) {
314                    queue.push(dep.clone());
315                    result.push(dep.clone());
316                }
317            }
318        }
319        result
320    }
321}
322
323pub mod symbol;
324pub use mir_codebase::storage::{FnParam, TemplateParam, Visibility};
325pub use mir_issues::{Issue, IssueKind, Severity};
326pub use mir_types::Type;
327
328/// Convert a parser [`php_ast::Span`] (byte-offset range) into a
329/// [`mir_types::Location`] (file path + 1-based line range +
330/// 0-based codepoint columns) using `source` and the parser's `source_map`.
331///
332/// This is the canonical way for consumers to translate body-analysis result spans
333/// (e.g. [`crate::symbol::ResolvedSymbol::span`]) into source locations they
334/// can hand to their own protocol layer. Consumers that need different
335/// position semantics (LSP UTF-16 code units, byte offsets, etc.) translate
336/// from this `Location` rather than re-implementing the column math.
337pub fn location_from_span(
338    span: php_ast::Span,
339    file: std::sync::Arc<str>,
340    source: &str,
341    source_map: &php_rs_parser::source_map::SourceMap,
342) -> mir_types::Location {
343    let (line, col_start) = diagnostics::offset_to_line_col(source, span.start, source_map);
344    let (line_end, col_end) = if span.start < span.end {
345        diagnostics::offset_to_line_col(source, span.end, source_map)
346    } else {
347        (line, col_start)
348    };
349    mir_types::Location {
350        file,
351        line,
352        line_end,
353        col_start,
354        col_end: col_end.max(col_start.saturating_add(1)),
355    }
356}
357pub use symbol::{DeclarationKind, DocumentSymbol, ReferenceKind, ResolvedSymbol};
358
359pub mod composer;
360pub use composer::{ComposerError, Psr4Map};
361pub use type_env::ScopeId;
362
363#[doc(hidden)]
364pub mod test_utils;