Skip to main content

mir_codebase/
storage.rs

1use std::sync::Arc;
2
3use indexmap::IndexMap;
4use mir_types::Union;
5use serde::{Deserialize, Serialize};
6
7// ---------------------------------------------------------------------------
8// Interned common types for deduplication
9// ---------------------------------------------------------------------------
10
11/// Interned Union types for common parameter/property types.
12/// Deduplicates allocations when thousands of parameters share types like `string`, `int`, etc.
13mod interned_types {
14    use super::*;
15    use std::sync::OnceLock;
16
17    fn intern_string() -> Arc<Union> {
18        Arc::new(Union::string())
19    }
20
21    fn intern_int() -> Arc<Union> {
22        Arc::new(Union::int())
23    }
24
25    fn intern_float() -> Arc<Union> {
26        Arc::new(Union::float())
27    }
28
29    fn intern_bool() -> Arc<Union> {
30        Arc::new(Union::bool())
31    }
32
33    fn intern_mixed() -> Arc<Union> {
34        Arc::new(Union::mixed())
35    }
36
37    fn intern_null() -> Arc<Union> {
38        Arc::new(Union::null())
39    }
40
41    fn intern_void() -> Arc<Union> {
42        Arc::new(Union::void())
43    }
44
45    static STRING: OnceLock<Arc<Union>> = OnceLock::new();
46    static INT: OnceLock<Arc<Union>> = OnceLock::new();
47    static FLOAT: OnceLock<Arc<Union>> = OnceLock::new();
48    static BOOL: OnceLock<Arc<Union>> = OnceLock::new();
49    static MIXED: OnceLock<Arc<Union>> = OnceLock::new();
50    static NULL: OnceLock<Arc<Union>> = OnceLock::new();
51    static VOID: OnceLock<Arc<Union>> = OnceLock::new();
52
53    pub fn string() -> Arc<Union> {
54        STRING.get_or_init(intern_string).clone()
55    }
56
57    pub fn int() -> Arc<Union> {
58        INT.get_or_init(intern_int).clone()
59    }
60
61    pub fn float() -> Arc<Union> {
62        FLOAT.get_or_init(intern_float).clone()
63    }
64
65    pub fn bool() -> Arc<Union> {
66        BOOL.get_or_init(intern_bool).clone()
67    }
68
69    pub fn mixed() -> Arc<Union> {
70        MIXED.get_or_init(intern_mixed).clone()
71    }
72
73    pub fn null() -> Arc<Union> {
74        NULL.get_or_init(intern_null).clone()
75    }
76
77    pub fn void() -> Arc<Union> {
78        VOID.get_or_init(intern_void).clone()
79    }
80
81    /// Try to intern a Union if it matches a common type, otherwise wrap in Arc.
82    pub fn intern_or_wrap(union: Union) -> Arc<Union> {
83        // Check if this is a single-atomic type that we intern
84        if union.types.len() == 1 && !union.possibly_undefined && !union.from_docblock {
85            match &union.types[0] {
86                mir_types::Atomic::TString => return string(),
87                mir_types::Atomic::TInt => return int(),
88                mir_types::Atomic::TFloat => return float(),
89                mir_types::Atomic::TBool => return bool(),
90                mir_types::Atomic::TMixed => return mixed(),
91                mir_types::Atomic::TNull => return null(),
92                mir_types::Atomic::TVoid => return void(),
93                _ => {}
94            }
95        }
96        Arc::new(union)
97    }
98}
99
100// ---------------------------------------------------------------------------
101// Shared primitives
102// ---------------------------------------------------------------------------
103
104#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize)]
105pub enum Visibility {
106    Public,
107    Protected,
108    Private,
109}
110
111impl Visibility {
112    pub fn is_at_least(&self, required: Visibility) -> bool {
113        *self <= required
114    }
115}
116
117impl std::fmt::Display for Visibility {
118    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
119        match self {
120            Visibility::Public => write!(f, "public"),
121            Visibility::Protected => write!(f, "protected"),
122            Visibility::Private => write!(f, "private"),
123        }
124    }
125}
126
127#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
128pub struct TemplateParam {
129    pub name: Arc<str>,
130    pub bound: Option<Union>,
131    /// The entity (class or function FQN) that declared this template param.
132    pub defining_entity: Arc<str>,
133    pub variance: mir_types::Variance,
134}
135
136#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
137pub struct FnParam {
138    pub name: Arc<str>,
139    /// Parameter type. Stored as `Option<Arc<Union>>` to enable deduplication of
140    /// common types across parameters. Many parameters share types like `string`,
141    /// `int`, `bool`, etc., so interning via Arc saves allocations.
142    #[serde(
143        deserialize_with = "deserialize_param_type",
144        serialize_with = "serialize_param_type"
145    )]
146    pub ty: Option<Arc<Union>>,
147    /// Whether this parameter has a default value. During analysis, defaults are
148    /// never used for their value — only for marking parameters as optional.
149    pub has_default: bool,
150    pub is_variadic: bool,
151    pub is_byref: bool,
152    pub is_optional: bool,
153}
154
155impl std::hash::Hash for FnParam {
156    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
157        self.name.hash(state);
158        self.has_default.hash(state);
159        self.is_variadic.hash(state);
160        self.is_byref.hash(state);
161        self.is_optional.hash(state);
162        // Hash the type value (not the Arc pointer) so that two FnParams with
163        // equal types (PartialEq) always produce the same hash, even when they
164        // are backed by different Arc allocations.
165        self.ty.as_deref().hash(state);
166    }
167}
168
169// Serde helpers to transparently convert between Option<Union> and Option<Arc<Union>>
170fn deserialize_param_type<'de, D>(deserializer: D) -> Result<Option<Arc<Union>>, D::Error>
171where
172    D: serde::Deserializer<'de>,
173{
174    Option::<Union>::deserialize(deserializer).map(|opt| opt.map(interned_types::intern_or_wrap))
175}
176
177fn serialize_param_type<S>(value: &Option<Arc<Union>>, serializer: S) -> Result<S::Ok, S::Error>
178where
179    S: serde::Serializer,
180{
181    let opt = value.as_ref().map(|arc| (**arc).clone());
182    opt.serialize(serializer)
183}
184
185fn deserialize_return_type<'de, D>(deserializer: D) -> Result<Option<Arc<Union>>, D::Error>
186where
187    D: serde::Deserializer<'de>,
188{
189    Option::<Union>::deserialize(deserializer).map(|opt| opt.map(interned_types::intern_or_wrap))
190}
191
192fn serialize_return_type<S>(value: &Option<Arc<Union>>, serializer: S) -> Result<S::Ok, S::Error>
193where
194    S: serde::Serializer,
195{
196    let opt = value.as_ref().map(|arc| (**arc).clone());
197    opt.serialize(serializer)
198}
199
200fn deserialize_params<'de, D>(deserializer: D) -> Result<Arc<[FnParam]>, D::Error>
201where
202    D: serde::Deserializer<'de>,
203{
204    Vec::<FnParam>::deserialize(deserializer).map(|v| Arc::from(v.into_boxed_slice()))
205}
206
207fn serialize_params<S>(value: &Arc<[FnParam]>, serializer: S) -> Result<S::Ok, S::Error>
208where
209    S: serde::Serializer,
210{
211    value.as_ref().serialize(serializer)
212}
213
214/// Helper to wrap Option<Union> in interned Arc<Union>.
215pub fn wrap_param_type(ty: Option<Union>) -> Option<Arc<Union>> {
216    ty.map(interned_types::intern_or_wrap)
217}
218
219/// Helper to wrap return type Option<Union> in interned Arc<Union>.
220pub fn wrap_return_type(ty: Option<Union>) -> Option<Arc<Union>> {
221    ty.map(interned_types::intern_or_wrap)
222}
223
224// ---------------------------------------------------------------------------
225// Location — file + pre-computed line/col span
226// ---------------------------------------------------------------------------
227
228/// Declaration location.
229///
230/// Columns are 0-based Unicode scalar value (code-point) counts, equivalent to
231/// LSP `utf-32` position encoding. Convert to UTF-16 code units at the LSP
232/// boundary for clients that do not advertise `utf-32` support.
233#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
234pub struct Location {
235    pub file: Arc<str>,
236    /// 1-based start line.
237    pub line: u32,
238    /// 1-based end line (inclusive). Equal to `line` for single-line spans.
239    pub line_end: u32,
240    /// 0-based Unicode code-point column of the span start.
241    pub col_start: u16,
242    /// 0-based Unicode code-point column of the span end (exclusive).
243    pub col_end: u16,
244}
245
246impl Location {
247    pub fn new(file: Arc<str>, line: u32, line_end: u32, col_start: u16, col_end: u16) -> Self {
248        Self {
249            file,
250            line,
251            line_end,
252            col_start,
253            col_end,
254        }
255    }
256}
257
258// ---------------------------------------------------------------------------
259// Assertion — `@psalm-assert`, `@psalm-assert-if-true`, etc.
260// ---------------------------------------------------------------------------
261
262#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
263pub enum AssertionKind {
264    Assert,
265    AssertIfTrue,
266    AssertIfFalse,
267}
268
269#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
270pub struct Assertion {
271    pub kind: AssertionKind,
272    pub param: Arc<str>,
273    pub ty: Union,
274}
275
276// ---------------------------------------------------------------------------
277// MethodStorage
278// ---------------------------------------------------------------------------
279
280#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
281pub struct MethodStorage {
282    pub name: Arc<str>,
283    pub fqcn: Arc<str>,
284    #[serde(
285        deserialize_with = "deserialize_params",
286        serialize_with = "serialize_params"
287    )]
288    pub params: Arc<[FnParam]>,
289    /// Type from annotation (`@return` / native type hint). `None` means unannotated.
290    /// Stored as `Option<Arc<Union>>` to enable deduplication of common return types
291    /// (e.g., `void`, `string`, `mixed`, `bool`) across thousands of methods.
292    #[serde(
293        deserialize_with = "deserialize_return_type",
294        serialize_with = "serialize_return_type"
295    )]
296    pub return_type: Option<Arc<Union>>,
297    /// Type inferred from body analysis (filled in during pass 2).
298    pub inferred_return_type: Option<Union>,
299    pub visibility: Visibility,
300    pub is_static: bool,
301    pub is_abstract: bool,
302    pub is_final: bool,
303    pub is_constructor: bool,
304    pub template_params: Vec<TemplateParam>,
305    pub assertions: Vec<Assertion>,
306    pub throws: Vec<Arc<str>>,
307    pub deprecated: Option<Arc<str>>,
308    pub is_internal: bool,
309    pub is_pure: bool,
310    pub location: Option<Location>,
311    /// Plain-text description from the docblock (text before `@tag` lines).
312    /// Used for hover info.
313    #[serde(default)]
314    pub docstring: Option<Arc<str>>,
315}
316
317impl MethodStorage {
318    pub fn effective_return_type(&self) -> Option<&Union> {
319        self.return_type
320            .as_deref()
321            .or(self.inferred_return_type.as_ref())
322    }
323}
324
325// ---------------------------------------------------------------------------
326// PropertyStorage
327// ---------------------------------------------------------------------------
328
329#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
330pub struct PropertyStorage {
331    pub name: Arc<str>,
332    pub ty: Option<Union>,
333    pub inferred_ty: Option<Union>,
334    pub visibility: Visibility,
335    pub is_static: bool,
336    pub is_readonly: bool,
337    pub default: Option<Union>,
338    pub location: Option<Location>,
339}
340
341// ---------------------------------------------------------------------------
342// ConstantStorage
343// ---------------------------------------------------------------------------
344
345#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
346pub struct ConstantStorage {
347    pub name: Arc<str>,
348    pub ty: Union,
349    pub visibility: Option<Visibility>,
350    #[serde(default)]
351    pub is_final: bool,
352    pub location: Option<Location>,
353}
354
355// ---------------------------------------------------------------------------
356// ClassStorage
357// ---------------------------------------------------------------------------
358
359#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
360pub struct ClassStorage {
361    pub fqcn: Arc<str>,
362    pub short_name: Arc<str>,
363    pub parent: Option<Arc<str>>,
364    pub interfaces: Vec<Arc<str>>,
365    pub traits: Vec<Arc<str>>,
366    pub own_methods: IndexMap<Arc<str>, Arc<MethodStorage>>,
367    pub own_properties: IndexMap<Arc<str>, PropertyStorage>,
368    pub own_constants: IndexMap<Arc<str>, ConstantStorage>,
369    #[serde(default)]
370    pub mixins: Vec<Arc<str>>,
371    pub template_params: Vec<TemplateParam>,
372    /// Type arguments from `@extends ParentClass<T1, T2>` — maps parent's template params to concrete types.
373    pub extends_type_args: Vec<Union>,
374    /// Type arguments from `@implements Interface<T1, T2>`.
375    #[serde(default)]
376    pub implements_type_args: Vec<(Arc<str>, Vec<Union>)>,
377    pub is_abstract: bool,
378    pub is_final: bool,
379    pub is_readonly: bool,
380    pub deprecated: Option<Arc<str>>,
381    pub is_internal: bool,
382    pub location: Option<Location>,
383    /// Type aliases declared on this class via `@psalm-type` / `@phpstan-type`.
384    #[serde(default)]
385    pub type_aliases: std::collections::HashMap<Arc<str>, Union>,
386    /// Raw import-type declarations (`(local_name, original_name, from_class)`) — resolved during finalization.
387    #[serde(default)]
388    pub pending_import_types: Vec<(Arc<str>, Arc<str>, Arc<str>)>,
389}
390
391impl ClassStorage {
392    pub fn get_method(&self, name: &str) -> Option<&MethodStorage> {
393        // PHP method names are case-insensitive; caller should pass lowercase name.
394        // Only searches own_methods — inherited method resolution is done by
395        // `db::lookup_method_in_chain`.
396        self.own_methods.get(name).map(Arc::as_ref).or_else(|| {
397            self.own_methods
398                .iter()
399                .find(|(k, _)| k.as_ref().eq_ignore_ascii_case(name))
400                .map(|(_, v)| v.as_ref())
401        })
402    }
403
404    pub fn get_property(&self, name: &str) -> Option<&PropertyStorage> {
405        self.own_properties.get(name)
406    }
407}
408
409// ---------------------------------------------------------------------------
410// InterfaceStorage
411// ---------------------------------------------------------------------------
412
413#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
414pub struct InterfaceStorage {
415    pub fqcn: Arc<str>,
416    pub short_name: Arc<str>,
417    pub extends: Vec<Arc<str>>,
418    pub own_methods: IndexMap<Arc<str>, Arc<MethodStorage>>,
419    pub own_constants: IndexMap<Arc<str>, ConstantStorage>,
420    pub template_params: Vec<TemplateParam>,
421    pub location: Option<Location>,
422}
423
424// ---------------------------------------------------------------------------
425// TraitStorage
426// ---------------------------------------------------------------------------
427
428#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
429pub struct TraitStorage {
430    pub fqcn: Arc<str>,
431    pub short_name: Arc<str>,
432    pub own_methods: IndexMap<Arc<str>, Arc<MethodStorage>>,
433    pub own_properties: IndexMap<Arc<str>, PropertyStorage>,
434    pub own_constants: IndexMap<Arc<str>, ConstantStorage>,
435    pub template_params: Vec<TemplateParam>,
436    /// Traits used by this trait (`use OtherTrait;` inside a trait body).
437    pub traits: Vec<Arc<str>>,
438    pub location: Option<Location>,
439    /// `@psalm-require-extends` / `@phpstan-require-extends` — FQCNs that using classes must extend.
440    #[serde(default)]
441    pub require_extends: Vec<Arc<str>>,
442    /// `@psalm-require-implements` / `@phpstan-require-implements` — FQCNs that using classes must implement.
443    #[serde(default)]
444    pub require_implements: Vec<Arc<str>>,
445}
446
447// ---------------------------------------------------------------------------
448// EnumStorage
449// ---------------------------------------------------------------------------
450
451#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
452pub struct EnumCaseStorage {
453    pub name: Arc<str>,
454    pub value: Option<Union>,
455    pub location: Option<Location>,
456}
457
458#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
459pub struct EnumStorage {
460    pub fqcn: Arc<str>,
461    pub short_name: Arc<str>,
462    pub scalar_type: Option<Union>,
463    pub interfaces: Vec<Arc<str>>,
464    pub cases: IndexMap<Arc<str>, EnumCaseStorage>,
465    pub own_methods: IndexMap<Arc<str>, Arc<MethodStorage>>,
466    pub own_constants: IndexMap<Arc<str>, ConstantStorage>,
467    pub location: Option<Location>,
468}
469
470// ---------------------------------------------------------------------------
471// FunctionStorage
472// ---------------------------------------------------------------------------
473
474#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
475pub struct FunctionStorage {
476    pub fqn: Arc<str>,
477    pub short_name: Arc<str>,
478    #[serde(
479        deserialize_with = "deserialize_params",
480        serialize_with = "serialize_params"
481    )]
482    pub params: Arc<[FnParam]>,
483    /// Type from annotation (`@return` / native type hint). `None` means unannotated.
484    /// Stored as `Option<Arc<Union>>` to enable deduplication of common return types.
485    #[serde(
486        deserialize_with = "deserialize_return_type",
487        serialize_with = "serialize_return_type"
488    )]
489    pub return_type: Option<Arc<Union>>,
490    pub inferred_return_type: Option<Union>,
491    pub template_params: Vec<TemplateParam>,
492    pub assertions: Vec<Assertion>,
493    pub throws: Vec<Arc<str>>,
494    pub deprecated: Option<Arc<str>>,
495    pub is_pure: bool,
496    pub location: Option<Location>,
497    /// Plain-text description from the docblock (text before `@tag` lines).
498    /// Used for hover info.
499    #[serde(default)]
500    pub docstring: Option<Arc<str>>,
501}
502
503impl FunctionStorage {
504    pub fn effective_return_type(&self) -> Option<&Union> {
505        self.return_type
506            .as_deref()
507            .or(self.inferred_return_type.as_ref())
508    }
509}
510
511// ---------------------------------------------------------------------------
512// StubSlice — serializable bundle of definitions from one extension's stubs
513// ---------------------------------------------------------------------------
514
515/// A snapshot of all PHP definitions contributed by a single stub file set.
516///
517/// Produced by `mir-stubs-gen` at code-generation time and deserialized at
518/// runtime to ingest definitions into the salsa db via
519/// `MirDatabase::ingest_stub_slice`.
520#[derive(Debug, Clone, Default, serde::Serialize, serde::Deserialize)]
521pub struct StubSlice {
522    pub classes: Vec<ClassStorage>,
523    pub interfaces: Vec<InterfaceStorage>,
524    pub traits: Vec<TraitStorage>,
525    pub enums: Vec<EnumStorage>,
526    pub functions: Vec<FunctionStorage>,
527    #[serde(default)]
528    pub constants: Vec<(Arc<str>, Union)>,
529    /// Source file this slice was collected from. `None` for bundled stub slices
530    /// that were pre-computed and are not tied to a specific on-disk file.
531    #[serde(default)]
532    pub file: Option<Arc<str>>,
533    /// Types of `@var`-annotated global variables collected from this file.
534    /// Populated by `DefinitionCollector`; ingested into the salsa db's
535    /// `global_vars` table by `ingest_stub_slice` when `file` is `Some`.
536    #[serde(default)]
537    pub global_vars: Vec<(Arc<str>, Union)>,
538    /// The first namespace declared in this file (e.g. `"App\\Service"`).
539    /// Populated by `DefinitionCollector`; ingested into the salsa db's
540    /// `file_namespaces` table by `ingest_stub_slice` when `file` is `Some`.
541    #[serde(default)]
542    pub namespace: Option<Arc<str>>,
543    /// `use` alias map for this file: alias → FQCN.
544    /// Populated by `DefinitionCollector`; ingested into the salsa db's
545    /// `file_imports` table by `ingest_stub_slice` when `file` is `Some`.
546    #[serde(default)]
547    pub imports: std::collections::HashMap<String, String>,
548    /// Set to `true` after `deduplicate_params_in_slice` has run on this slice.
549    /// `ingest_stub_slice` skips the clone+re-dedup when this flag is set.
550    #[serde(skip)]
551    pub is_deduped: bool,
552}
553
554// ---------------------------------------------------------------------------
555// Param list deduplication
556// ---------------------------------------------------------------------------
557
558use rustc_hash::FxHashMap;
559use std::sync::Mutex;
560
561type ParamCache = Mutex<FxHashMap<Vec<FnParam>, Arc<[FnParam]>>>;
562
563/// Global cache of canonical Arc<[FnParam]> instances for deduplication.
564/// Shared across all StubSlices to deduplicate vendor code with millions of
565/// methods that often have identical parameter lists.
566static PARAM_DEDUP_CACHE: std::sync::OnceLock<ParamCache> = std::sync::OnceLock::new();
567
568/// Deduplicate parameter lists across all methods and functions in a StubSlice.
569/// Many PHP framework methods share identical parameter lists (e.g., thousands
570/// of `(string $arg, array $opts)` signatures). This function groups identical
571/// param lists globally (across all slices processed so far) and replaces them
572/// with Arc<[FnParam]> pointers to shared allocations.
573///
574/// Expected memory savings: 100–150 MiB on cold start (vendor collection).
575pub fn deduplicate_params_in_slice(slice: &mut StubSlice) {
576    let cache: &ParamCache = PARAM_DEDUP_CACHE.get_or_init(|| Mutex::new(FxHashMap::default()));
577    let mut canonical_params = cache.lock().unwrap();
578
579    let mut deduplicate = |params: &mut Arc<[FnParam]>| {
580        if let Some(existing) = canonical_params.get(params.as_ref()) {
581            *params = existing.clone();
582        } else {
583            canonical_params.insert(params.as_ref().to_vec(), params.clone());
584        }
585    };
586
587    // Deduplicate method params in all classes
588    for cls in &mut slice.classes {
589        for method in cls.own_methods.values_mut() {
590            deduplicate(&mut Arc::make_mut(method).params);
591        }
592    }
593
594    // Deduplicate method params in all interfaces
595    for iface in &mut slice.interfaces {
596        for method in iface.own_methods.values_mut() {
597            deduplicate(&mut Arc::make_mut(method).params);
598        }
599    }
600
601    // Deduplicate method params in all traits
602    for tr in &mut slice.traits {
603        for method in tr.own_methods.values_mut() {
604            deduplicate(&mut Arc::make_mut(method).params);
605        }
606    }
607
608    // Deduplicate method params in all enums
609    for en in &mut slice.enums {
610        for method in en.own_methods.values_mut() {
611            deduplicate(&mut Arc::make_mut(method).params);
612        }
613    }
614
615    // Deduplicate function params
616    for func in &mut slice.functions {
617        deduplicate(&mut func.params);
618    }
619    slice.is_deduped = true;
620}