Skip to main content

mir_analyzer/
lib.rs

1pub(crate) mod arena;
2#[doc(hidden)]
3pub mod cache;
4pub(crate) mod call;
5pub(crate) mod class;
6pub(crate) mod collector;
7pub(crate) mod context;
8#[doc(hidden)]
9pub mod db;
10pub(crate) mod dead_code;
11pub(crate) mod diagnostics;
12pub(crate) mod expr;
13pub mod file_analyzer;
14pub(crate) mod generic;
15pub(crate) mod narrowing;
16#[doc(hidden)]
17pub mod parser;
18pub(crate) mod pass2;
19pub mod php_version;
20pub mod project;
21pub mod session;
22pub(crate) mod shared_db;
23pub(crate) mod stmt;
24#[doc(hidden)]
25pub mod stubs;
26pub(crate) mod taint;
27pub(crate) mod type_env;
28
29pub use file_analyzer::{BatchFileAnalyzer, FileAnalysis, FileAnalyzer, ParsedFile};
30pub use parser::type_from_hint::type_from_hint;
31pub use parser::{DocblockParser, ParsedDocblock};
32pub use php_version::{ParsePhpVersionError, PhpVersion};
33pub use project::{AnalysisResult, ProjectAnalyzer};
34pub use session::AnalysisSession;
35pub use stubs::{is_builtin_function, stub_files, StubVfs};
36
37// ============================================================================
38// API Unification: ProjectAnalyzer and AnalysisSession
39// ============================================================================
40//
41// `ProjectAnalyzer` (batch-oriented) and `AnalysisSession` (incremental) are
42// now unified under a single analysis engine. Both share the same Salsa database,
43// definition collection, and Pass 2 type inference logic. The difference is
44// ownership model and parallelization strategy:
45//
46// - `ProjectAnalyzer`: Owns the database and all files; analyzes them in parallel.
47//   Best for CLI, CI, and bulk analysis. Configuration via public fields before
48//   calling `analyze()`.
49//
50// - `AnalysisSession`: Incremental file-by-file analysis; clients ingest files
51//   as they change. Best for LSP servers and watch modes. Configuration via
52//   builder pattern (with_cache, with_psr4, etc.).
53//
54// New code should prefer `AnalysisSession` for flexibility; `ProjectAnalyzer`
55// is maintained for backward compatibility with batch workflows.
56
57/// A position in source code: 1-based line, 0-based codepoint column.
58#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)]
59pub struct Position {
60    pub line: u32,
61    pub column: u32,
62}
63
64/// A range in source code: start and end positions.
65#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
66pub struct Range {
67    pub start: Position,
68    pub end: Position,
69}
70
71/// A semantic identifier for a code entity that the analyzer can resolve.
72///
73/// Replaces the previous stringly-typed `&str` keys. Method names are
74/// normalized (lowercased) at construction since PHP method dispatch is
75/// case-insensitive — this prevents a class of correctness bugs where
76/// callers pass mixed-case names and get empty results.
77#[derive(Debug, Clone, PartialEq, Eq, Hash)]
78pub enum Symbol {
79    /// A class, interface, trait, or enum (FQCN).
80    Class(std::sync::Arc<str>),
81    /// A global function (FQN).
82    Function(std::sync::Arc<str>),
83    /// An instance or static method.
84    Method {
85        class: std::sync::Arc<str>,
86        name: std::sync::Arc<str>,
87    },
88    /// A class property.
89    Property {
90        class: std::sync::Arc<str>,
91        name: std::sync::Arc<str>,
92    },
93    /// A class / interface / enum constant.
94    ClassConstant {
95        class: std::sync::Arc<str>,
96        name: std::sync::Arc<str>,
97    },
98    /// A global constant.
99    GlobalConstant(std::sync::Arc<str>),
100}
101
102impl Symbol {
103    /// Construct a method symbol. Normalizes `name` to lowercase since PHP
104    /// methods are case-insensitive.
105    pub fn method(class: impl Into<std::sync::Arc<str>>, name: &str) -> Self {
106        Symbol::Method {
107            class: class.into(),
108            name: std::sync::Arc::from(name.to_ascii_lowercase()),
109        }
110    }
111
112    /// Construct a class symbol.
113    pub fn class(fqcn: impl Into<std::sync::Arc<str>>) -> Self {
114        Symbol::Class(fqcn.into())
115    }
116
117    /// Construct a function symbol.
118    pub fn function(fqn: impl Into<std::sync::Arc<str>>) -> Self {
119        Symbol::Function(fqn.into())
120    }
121
122    /// Construct a property symbol.
123    pub fn property(
124        class: impl Into<std::sync::Arc<str>>,
125        name: impl Into<std::sync::Arc<str>>,
126    ) -> Self {
127        Symbol::Property {
128            class: class.into(),
129            name: name.into(),
130        }
131    }
132
133    /// Construct a class constant symbol.
134    pub fn class_constant(
135        class: impl Into<std::sync::Arc<str>>,
136        name: impl Into<std::sync::Arc<str>>,
137    ) -> Self {
138        Symbol::ClassConstant {
139            class: class.into(),
140            name: name.into(),
141        }
142    }
143
144    /// Construct a global constant symbol.
145    pub fn global_constant(fqn: impl Into<std::sync::Arc<str>>) -> Self {
146        Symbol::GlobalConstant(fqn.into())
147    }
148
149    /// The codebase lookup key for this symbol (used internally for the
150    /// reference-locations index). Stable across releases.
151    pub fn codebase_key(&self) -> String {
152        match self {
153            Symbol::Class(fqcn) => fqcn.to_string(),
154            Symbol::Function(fqn) => fqn.to_string(),
155            Symbol::Method { class, name } => format!("{class}::{name}"),
156            Symbol::Property { class, name } => format!("{class}::{name}"),
157            Symbol::ClassConstant { class, name } => format!("{class}::{name}"),
158            Symbol::GlobalConstant(fqn) => fqn.to_string(),
159        }
160    }
161}
162
163/// Reason a symbol lookup did not return a location.
164#[derive(Debug, Clone, PartialEq, Eq)]
165pub enum SymbolLookupError {
166    /// No such symbol exists in the codebase.
167    NotFound,
168    /// The symbol exists but has no recorded source location (e.g. a
169    /// stub-only declaration without a span).
170    NoSourceLocation,
171}
172
173/// Result of a lazy-load attempt.
174#[derive(Debug, Clone, Copy, PartialEq, Eq)]
175pub enum LazyLoadOutcome {
176    /// The symbol was already present in the session; no work performed.
177    AlreadyLoaded,
178    /// The symbol was resolved by the configured [`ClassResolver`] and the
179    /// defining file was ingested.
180    Loaded,
181    /// No resolver is configured, the resolver could not map the FQCN to a
182    /// file, or the resolved file could not be read / did not define the
183    /// requested symbol.
184    NotResolvable,
185}
186
187/// Pluggable strategy for mapping a fully-qualified class name to the file
188/// that should define it. The analyzer never touches `vendor/` or the
189/// filesystem on its own — it asks a `ClassResolver` when a symbol is needed.
190///
191/// `mir_analyzer::Psr4Map` is the built-in implementation for Composer-based
192/// projects. Consumers with non-Composer conventions (WordPress, Drupal, a
193/// custom autoloader, a workspace-walk index) supply their own.
194pub trait ClassResolver: Send + Sync {
195    /// Resolve `fqcn` to the file that defines it. Returning `None` causes
196    /// the analyzer to fall back to emitting `UndefinedClass`.
197    fn resolve(&self, fqcn: &str) -> Option<std::path::PathBuf>;
198}
199
200impl ClassResolver for composer::Psr4Map {
201    fn resolve(&self, fqcn: &str) -> Option<std::path::PathBuf> {
202        composer::Psr4Map::resolve(self, fqcn)
203    }
204}
205
206impl std::fmt::Display for SymbolLookupError {
207    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
208        match self {
209            SymbolLookupError::NotFound => write!(f, "symbol not found"),
210            SymbolLookupError::NoSourceLocation => write!(f, "symbol has no source location"),
211        }
212    }
213}
214
215impl std::error::Error for SymbolLookupError {}
216
217/// Hover information for a symbol at a source location.
218/// Includes the inferred type, optional docstring, and location of definition.
219#[derive(Debug, Clone)]
220pub struct HoverInfo {
221    /// Inferred type of the symbol.
222    pub ty: Type,
223    /// Docstring / documentation comment for the symbol (if available).
224    pub docstring: Option<String>,
225    /// Source location of the symbol's definition.
226    pub definition: Option<mir_codebase::storage::Location>,
227}
228
229/// File dependency graph: tracks which files depend on which other files.
230/// Used for incremental invalidation in LSP servers and build systems.
231#[derive(Debug, Clone)]
232pub struct DependencyGraph {
233    /// Direct dependencies: file → [files it depends on]
234    dependencies: std::collections::HashMap<String, Vec<String>>,
235    /// Reverse dependencies: file → [files that depend on it]
236    dependents: std::collections::HashMap<String, Vec<String>>,
237}
238
239impl DependencyGraph {
240    /// Files that `file` directly depends on (imports, parent classes, interfaces, traits).
241    pub fn dependencies_of(&self, file: &str) -> &[String] {
242        self.dependencies
243            .get(file)
244            .map(|v| v.as_slice())
245            .unwrap_or(&[])
246    }
247
248    /// Files that directly depend on `file` (reverse edge).
249    pub fn dependents_of(&self, file: &str) -> &[String] {
250        self.dependents
251            .get(file)
252            .map(|v| v.as_slice())
253            .unwrap_or(&[])
254    }
255
256    /// All files transitively depended upon by `file` (including indirect).
257    pub fn transitive_dependencies(&self, file: &str) -> Vec<String> {
258        let mut visited = std::collections::HashSet::new();
259        let mut queue = vec![file.to_string()];
260        let mut result = Vec::new();
261
262        while let Some(current) = queue.pop() {
263            if !visited.insert(current.clone()) {
264                continue;
265            }
266            for dep in self.dependencies_of(&current) {
267                if !visited.contains(dep) {
268                    queue.push(dep.clone());
269                    result.push(dep.clone());
270                }
271            }
272        }
273        result
274    }
275
276    /// All files that transitively depend on `file` (reverse transitive).
277    pub fn transitive_dependents(&self, file: &str) -> Vec<String> {
278        let mut visited = std::collections::HashSet::new();
279        let mut queue = vec![file.to_string()];
280        let mut result = Vec::new();
281
282        while let Some(current) = queue.pop() {
283            if !visited.insert(current.clone()) {
284                continue;
285            }
286            for dep in self.dependents_of(&current) {
287                if !visited.contains(dep) {
288                    queue.push(dep.clone());
289                    result.push(dep.clone());
290                }
291            }
292        }
293        result
294    }
295}
296
297pub mod symbol;
298pub use mir_codebase::storage::{FnParam, TemplateParam, Visibility};
299pub use mir_issues::{Issue, IssueKind, Location, Severity};
300pub use mir_types::Union as Type;
301
302/// Convert a parser [`php_ast::Span`] (byte-offset range) into a
303/// [`mir_codebase::storage::Location`] (file path + 1-based line range +
304/// 0-based codepoint columns) using `source` and the parser's `source_map`.
305///
306/// This is the canonical way for consumers to translate Pass-2 result spans
307/// (e.g. [`crate::symbol::ResolvedSymbol::span`]) into source locations they
308/// can hand to their own protocol layer. Consumers that need different
309/// position semantics (LSP UTF-16 code units, byte offsets, etc.) translate
310/// from this `Location` rather than re-implementing the column math.
311pub fn location_from_span(
312    span: php_ast::Span,
313    file: std::sync::Arc<str>,
314    source: &str,
315    source_map: &php_rs_parser::source_map::SourceMap,
316) -> mir_codebase::storage::Location {
317    let (line, col_start) = diagnostics::offset_to_line_col(source, span.start, source_map);
318    let (line_end, col_end) = if span.start < span.end {
319        diagnostics::offset_to_line_col(source, span.end, source_map)
320    } else {
321        (line, col_start)
322    };
323    mir_codebase::storage::Location {
324        file,
325        line,
326        line_end,
327        col_start,
328        col_end: col_end.max(col_start.saturating_add(1)),
329    }
330}
331pub use symbol::{DocumentSymbol, DocumentSymbolKind, ResolvedSymbol, SymbolKind};
332
333pub mod composer;
334pub use composer::{ComposerError, Psr4Map};
335pub use type_env::ScopeId;
336
337#[doc(hidden)]
338pub mod test_utils;