Skip to main content

mir_codebase/
storage.rs

1use std::sync::Arc;
2
3use indexmap::IndexMap;
4use mir_types::Union;
5use serde::{Deserialize, Serialize};
6
7// ---------------------------------------------------------------------------
8// Interned common types for deduplication
9// ---------------------------------------------------------------------------
10
11/// Interned Union types for common parameter/property types.
12/// Deduplicates allocations when thousands of parameters share types like `string`, `int`, etc.
13mod interned_types {
14    use super::*;
15    use std::sync::OnceLock;
16
17    fn intern_string() -> Arc<Union> {
18        Arc::new(Union::string())
19    }
20
21    fn intern_int() -> Arc<Union> {
22        Arc::new(Union::int())
23    }
24
25    fn intern_float() -> Arc<Union> {
26        Arc::new(Union::float())
27    }
28
29    fn intern_bool() -> Arc<Union> {
30        Arc::new(Union::bool())
31    }
32
33    fn intern_mixed() -> Arc<Union> {
34        Arc::new(Union::mixed())
35    }
36
37    fn intern_null() -> Arc<Union> {
38        Arc::new(Union::null())
39    }
40
41    fn intern_void() -> Arc<Union> {
42        Arc::new(Union::void())
43    }
44
45    static STRING: OnceLock<Arc<Union>> = OnceLock::new();
46    static INT: OnceLock<Arc<Union>> = OnceLock::new();
47    static FLOAT: OnceLock<Arc<Union>> = OnceLock::new();
48    static BOOL: OnceLock<Arc<Union>> = OnceLock::new();
49    static MIXED: OnceLock<Arc<Union>> = OnceLock::new();
50    static NULL: OnceLock<Arc<Union>> = OnceLock::new();
51    static VOID: OnceLock<Arc<Union>> = OnceLock::new();
52
53    pub fn string() -> Arc<Union> {
54        STRING.get_or_init(intern_string).clone()
55    }
56
57    pub fn int() -> Arc<Union> {
58        INT.get_or_init(intern_int).clone()
59    }
60
61    pub fn float() -> Arc<Union> {
62        FLOAT.get_or_init(intern_float).clone()
63    }
64
65    pub fn bool() -> Arc<Union> {
66        BOOL.get_or_init(intern_bool).clone()
67    }
68
69    pub fn mixed() -> Arc<Union> {
70        MIXED.get_or_init(intern_mixed).clone()
71    }
72
73    pub fn null() -> Arc<Union> {
74        NULL.get_or_init(intern_null).clone()
75    }
76
77    pub fn void() -> Arc<Union> {
78        VOID.get_or_init(intern_void).clone()
79    }
80
81    /// Try to intern a Union if it matches a common type, otherwise wrap in Arc.
82    pub fn intern_or_wrap(union: Union) -> Arc<Union> {
83        // Check if this is a single-atomic type that we intern
84        if union.types.len() == 1 && !union.possibly_undefined && !union.from_docblock {
85            match &union.types[0] {
86                mir_types::Atomic::TString => return string(),
87                mir_types::Atomic::TInt => return int(),
88                mir_types::Atomic::TFloat => return float(),
89                mir_types::Atomic::TBool => return bool(),
90                mir_types::Atomic::TMixed => return mixed(),
91                mir_types::Atomic::TNull => return null(),
92                mir_types::Atomic::TVoid => return void(),
93                _ => {}
94            }
95        }
96        Arc::new(union)
97    }
98}
99
100// ---------------------------------------------------------------------------
101// Shared primitives
102// ---------------------------------------------------------------------------
103
104#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize)]
105pub enum Visibility {
106    Public,
107    Protected,
108    Private,
109}
110
111impl Visibility {
112    pub fn is_at_least(&self, required: Visibility) -> bool {
113        *self <= required
114    }
115}
116
117impl std::fmt::Display for Visibility {
118    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
119        match self {
120            Visibility::Public => write!(f, "public"),
121            Visibility::Protected => write!(f, "protected"),
122            Visibility::Private => write!(f, "private"),
123        }
124    }
125}
126
127#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
128pub struct TemplateParam {
129    pub name: Arc<str>,
130    pub bound: Option<Union>,
131    /// The entity (class or function FQN) that declared this template param.
132    pub defining_entity: Arc<str>,
133    pub variance: mir_types::Variance,
134}
135
136#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
137pub struct FnParam {
138    pub name: Arc<str>,
139    /// Parameter type. Stored as `Option<Arc<Union>>` to enable deduplication of
140    /// common types across parameters. Many parameters share types like `string`,
141    /// `int`, `bool`, etc., so interning via Arc saves allocations.
142    #[serde(
143        deserialize_with = "deserialize_param_type",
144        serialize_with = "serialize_param_type"
145    )]
146    pub ty: Option<Arc<Union>>,
147    /// Whether this parameter has a default value. During analysis, defaults are
148    /// never used for their value — only for marking parameters as optional.
149    pub has_default: bool,
150    pub is_variadic: bool,
151    pub is_byref: bool,
152    pub is_optional: bool,
153}
154
155impl std::hash::Hash for FnParam {
156    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
157        self.name.hash(state);
158        self.has_default.hash(state);
159        self.is_variadic.hash(state);
160        self.is_byref.hash(state);
161        self.is_optional.hash(state);
162        if let Some(ty) = &self.ty {
163            // Hash the Arc pointer address. Since interned types reuse Arc allocations,
164            // parameters with the same type will have the same pointer.
165            (Arc::as_ptr(ty) as usize).hash(state);
166        } else {
167            0u8.hash(state);
168        }
169    }
170}
171
172// Serde helpers to transparently convert between Option<Union> and Option<Arc<Union>>
173fn deserialize_param_type<'de, D>(deserializer: D) -> Result<Option<Arc<Union>>, D::Error>
174where
175    D: serde::Deserializer<'de>,
176{
177    Option::<Union>::deserialize(deserializer).map(|opt| opt.map(interned_types::intern_or_wrap))
178}
179
180fn serialize_param_type<S>(value: &Option<Arc<Union>>, serializer: S) -> Result<S::Ok, S::Error>
181where
182    S: serde::Serializer,
183{
184    let opt = value.as_ref().map(|arc| (**arc).clone());
185    opt.serialize(serializer)
186}
187
188fn deserialize_return_type<'de, D>(deserializer: D) -> Result<Option<Arc<Union>>, D::Error>
189where
190    D: serde::Deserializer<'de>,
191{
192    Option::<Union>::deserialize(deserializer).map(|opt| opt.map(interned_types::intern_or_wrap))
193}
194
195fn serialize_return_type<S>(value: &Option<Arc<Union>>, serializer: S) -> Result<S::Ok, S::Error>
196where
197    S: serde::Serializer,
198{
199    let opt = value.as_ref().map(|arc| (**arc).clone());
200    opt.serialize(serializer)
201}
202
203fn deserialize_params<'de, D>(deserializer: D) -> Result<Arc<[FnParam]>, D::Error>
204where
205    D: serde::Deserializer<'de>,
206{
207    Vec::<FnParam>::deserialize(deserializer).map(|v| Arc::from(v.into_boxed_slice()))
208}
209
210fn serialize_params<S>(value: &Arc<[FnParam]>, serializer: S) -> Result<S::Ok, S::Error>
211where
212    S: serde::Serializer,
213{
214    value.as_ref().serialize(serializer)
215}
216
217/// Helper to wrap Option<Union> in interned Arc<Union>.
218pub fn wrap_param_type(ty: Option<Union>) -> Option<Arc<Union>> {
219    ty.map(interned_types::intern_or_wrap)
220}
221
222/// Helper to wrap return type Option<Union> in interned Arc<Union>.
223pub fn wrap_return_type(ty: Option<Union>) -> Option<Arc<Union>> {
224    ty.map(interned_types::intern_or_wrap)
225}
226
227// ---------------------------------------------------------------------------
228// Location — file + pre-computed line/col span
229// ---------------------------------------------------------------------------
230
231/// Declaration location.
232///
233/// Columns are 0-based Unicode scalar value (code-point) counts, equivalent to
234/// LSP `utf-32` position encoding. Convert to UTF-16 code units at the LSP
235/// boundary for clients that do not advertise `utf-32` support.
236#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
237pub struct Location {
238    pub file: Arc<str>,
239    /// 1-based start line.
240    pub line: u32,
241    /// 1-based end line (inclusive). Equal to `line` for single-line spans.
242    pub line_end: u32,
243    /// 0-based Unicode code-point column of the span start.
244    pub col_start: u16,
245    /// 0-based Unicode code-point column of the span end (exclusive).
246    pub col_end: u16,
247}
248
249impl Location {
250    pub fn new(file: Arc<str>, line: u32, line_end: u32, col_start: u16, col_end: u16) -> Self {
251        Self {
252            file,
253            line,
254            line_end,
255            col_start,
256            col_end,
257        }
258    }
259}
260
261// ---------------------------------------------------------------------------
262// Assertion — `@psalm-assert`, `@psalm-assert-if-true`, etc.
263// ---------------------------------------------------------------------------
264
265#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
266pub enum AssertionKind {
267    Assert,
268    AssertIfTrue,
269    AssertIfFalse,
270}
271
272#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
273pub struct Assertion {
274    pub kind: AssertionKind,
275    pub param: Arc<str>,
276    pub ty: Union,
277}
278
279// ---------------------------------------------------------------------------
280// MethodStorage
281// ---------------------------------------------------------------------------
282
283#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
284pub struct MethodStorage {
285    pub name: Arc<str>,
286    pub fqcn: Arc<str>,
287    #[serde(
288        deserialize_with = "deserialize_params",
289        serialize_with = "serialize_params"
290    )]
291    pub params: Arc<[FnParam]>,
292    /// Type from annotation (`@return` / native type hint). `None` means unannotated.
293    /// Stored as `Option<Arc<Union>>` to enable deduplication of common return types
294    /// (e.g., `void`, `string`, `mixed`, `bool`) across thousands of methods.
295    #[serde(
296        deserialize_with = "deserialize_return_type",
297        serialize_with = "serialize_return_type"
298    )]
299    pub return_type: Option<Arc<Union>>,
300    /// Type inferred from body analysis (filled in during pass 2).
301    pub inferred_return_type: Option<Union>,
302    pub visibility: Visibility,
303    pub is_static: bool,
304    pub is_abstract: bool,
305    pub is_final: bool,
306    pub is_constructor: bool,
307    pub template_params: Vec<TemplateParam>,
308    pub assertions: Vec<Assertion>,
309    pub throws: Vec<Arc<str>>,
310    pub deprecated: Option<Arc<str>>,
311    pub is_internal: bool,
312    pub is_pure: bool,
313    pub location: Option<Location>,
314    /// Plain-text description from the docblock (text before `@tag` lines).
315    /// Used for hover info.
316    #[serde(default)]
317    pub docstring: Option<Arc<str>>,
318}
319
320impl MethodStorage {
321    pub fn effective_return_type(&self) -> Option<&Union> {
322        self.return_type
323            .as_deref()
324            .or(self.inferred_return_type.as_ref())
325    }
326}
327
328// ---------------------------------------------------------------------------
329// PropertyStorage
330// ---------------------------------------------------------------------------
331
332#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
333pub struct PropertyStorage {
334    pub name: Arc<str>,
335    pub ty: Option<Union>,
336    pub inferred_ty: Option<Union>,
337    pub visibility: Visibility,
338    pub is_static: bool,
339    pub is_readonly: bool,
340    pub default: Option<Union>,
341    pub location: Option<Location>,
342}
343
344// ---------------------------------------------------------------------------
345// ConstantStorage
346// ---------------------------------------------------------------------------
347
348#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
349pub struct ConstantStorage {
350    pub name: Arc<str>,
351    pub ty: Union,
352    pub visibility: Option<Visibility>,
353    #[serde(default)]
354    pub is_final: bool,
355    pub location: Option<Location>,
356}
357
358// ---------------------------------------------------------------------------
359// ClassStorage
360// ---------------------------------------------------------------------------
361
362#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
363pub struct ClassStorage {
364    pub fqcn: Arc<str>,
365    pub short_name: Arc<str>,
366    pub parent: Option<Arc<str>>,
367    pub interfaces: Vec<Arc<str>>,
368    pub traits: Vec<Arc<str>>,
369    pub own_methods: IndexMap<Arc<str>, Arc<MethodStorage>>,
370    pub own_properties: IndexMap<Arc<str>, PropertyStorage>,
371    pub own_constants: IndexMap<Arc<str>, ConstantStorage>,
372    #[serde(default)]
373    pub mixins: Vec<Arc<str>>,
374    pub template_params: Vec<TemplateParam>,
375    /// Type arguments from `@extends ParentClass<T1, T2>` — maps parent's template params to concrete types.
376    pub extends_type_args: Vec<Union>,
377    /// Type arguments from `@implements Interface<T1, T2>`.
378    #[serde(default)]
379    pub implements_type_args: Vec<(Arc<str>, Vec<Union>)>,
380    pub is_abstract: bool,
381    pub is_final: bool,
382    pub is_readonly: bool,
383    pub deprecated: Option<Arc<str>>,
384    pub is_internal: bool,
385    pub location: Option<Location>,
386    /// Type aliases declared on this class via `@psalm-type` / `@phpstan-type`.
387    #[serde(default)]
388    pub type_aliases: std::collections::HashMap<Arc<str>, Union>,
389    /// Raw import-type declarations (`(local_name, original_name, from_class)`) — resolved during finalization.
390    #[serde(default)]
391    pub pending_import_types: Vec<(Arc<str>, Arc<str>, Arc<str>)>,
392}
393
394impl ClassStorage {
395    pub fn get_method(&self, name: &str) -> Option<&MethodStorage> {
396        // PHP method names are case-insensitive; caller should pass lowercase name.
397        // Only searches own_methods — inherited method resolution is done by
398        // `db::lookup_method_in_chain`.
399        self.own_methods.get(name).map(Arc::as_ref).or_else(|| {
400            self.own_methods
401                .iter()
402                .find(|(k, _)| k.as_ref().eq_ignore_ascii_case(name))
403                .map(|(_, v)| v.as_ref())
404        })
405    }
406
407    pub fn get_property(&self, name: &str) -> Option<&PropertyStorage> {
408        self.own_properties.get(name)
409    }
410}
411
412// ---------------------------------------------------------------------------
413// InterfaceStorage
414// ---------------------------------------------------------------------------
415
416#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
417pub struct InterfaceStorage {
418    pub fqcn: Arc<str>,
419    pub short_name: Arc<str>,
420    pub extends: Vec<Arc<str>>,
421    pub own_methods: IndexMap<Arc<str>, Arc<MethodStorage>>,
422    pub own_constants: IndexMap<Arc<str>, ConstantStorage>,
423    pub template_params: Vec<TemplateParam>,
424    pub location: Option<Location>,
425}
426
427// ---------------------------------------------------------------------------
428// TraitStorage
429// ---------------------------------------------------------------------------
430
431#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
432pub struct TraitStorage {
433    pub fqcn: Arc<str>,
434    pub short_name: Arc<str>,
435    pub own_methods: IndexMap<Arc<str>, Arc<MethodStorage>>,
436    pub own_properties: IndexMap<Arc<str>, PropertyStorage>,
437    pub own_constants: IndexMap<Arc<str>, ConstantStorage>,
438    pub template_params: Vec<TemplateParam>,
439    /// Traits used by this trait (`use OtherTrait;` inside a trait body).
440    pub traits: Vec<Arc<str>>,
441    pub location: Option<Location>,
442    /// `@psalm-require-extends` / `@phpstan-require-extends` — FQCNs that using classes must extend.
443    #[serde(default)]
444    pub require_extends: Vec<Arc<str>>,
445    /// `@psalm-require-implements` / `@phpstan-require-implements` — FQCNs that using classes must implement.
446    #[serde(default)]
447    pub require_implements: Vec<Arc<str>>,
448}
449
450// ---------------------------------------------------------------------------
451// EnumStorage
452// ---------------------------------------------------------------------------
453
454#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
455pub struct EnumCaseStorage {
456    pub name: Arc<str>,
457    pub value: Option<Union>,
458    pub location: Option<Location>,
459}
460
461#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
462pub struct EnumStorage {
463    pub fqcn: Arc<str>,
464    pub short_name: Arc<str>,
465    pub scalar_type: Option<Union>,
466    pub interfaces: Vec<Arc<str>>,
467    pub cases: IndexMap<Arc<str>, EnumCaseStorage>,
468    pub own_methods: IndexMap<Arc<str>, Arc<MethodStorage>>,
469    pub own_constants: IndexMap<Arc<str>, ConstantStorage>,
470    pub location: Option<Location>,
471}
472
473// ---------------------------------------------------------------------------
474// FunctionStorage
475// ---------------------------------------------------------------------------
476
477#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
478pub struct FunctionStorage {
479    pub fqn: Arc<str>,
480    pub short_name: Arc<str>,
481    #[serde(
482        deserialize_with = "deserialize_params",
483        serialize_with = "serialize_params"
484    )]
485    pub params: Arc<[FnParam]>,
486    /// Type from annotation (`@return` / native type hint). `None` means unannotated.
487    /// Stored as `Option<Arc<Union>>` to enable deduplication of common return types.
488    #[serde(
489        deserialize_with = "deserialize_return_type",
490        serialize_with = "serialize_return_type"
491    )]
492    pub return_type: Option<Arc<Union>>,
493    pub inferred_return_type: Option<Union>,
494    pub template_params: Vec<TemplateParam>,
495    pub assertions: Vec<Assertion>,
496    pub throws: Vec<Arc<str>>,
497    pub deprecated: Option<Arc<str>>,
498    pub is_pure: bool,
499    pub location: Option<Location>,
500    /// Plain-text description from the docblock (text before `@tag` lines).
501    /// Used for hover info.
502    #[serde(default)]
503    pub docstring: Option<Arc<str>>,
504}
505
506impl FunctionStorage {
507    pub fn effective_return_type(&self) -> Option<&Union> {
508        self.return_type
509            .as_deref()
510            .or(self.inferred_return_type.as_ref())
511    }
512}
513
514// ---------------------------------------------------------------------------
515// StubSlice — serializable bundle of definitions from one extension's stubs
516// ---------------------------------------------------------------------------
517
518/// A snapshot of all PHP definitions contributed by a single stub file set.
519///
520/// Produced by `mir-stubs-gen` at code-generation time and deserialized at
521/// runtime to ingest definitions into the salsa db via
522/// `MirDatabase::ingest_stub_slice`.
523#[derive(Debug, Clone, Default, serde::Serialize, serde::Deserialize)]
524pub struct StubSlice {
525    pub classes: Vec<ClassStorage>,
526    pub interfaces: Vec<InterfaceStorage>,
527    pub traits: Vec<TraitStorage>,
528    pub enums: Vec<EnumStorage>,
529    pub functions: Vec<FunctionStorage>,
530    #[serde(default)]
531    pub constants: Vec<(Arc<str>, Union)>,
532    /// Source file this slice was collected from. `None` for bundled stub slices
533    /// that were pre-computed and are not tied to a specific on-disk file.
534    #[serde(default)]
535    pub file: Option<Arc<str>>,
536    /// Types of `@var`-annotated global variables collected from this file.
537    /// Populated by `DefinitionCollector`; ingested into the salsa db's
538    /// `global_vars` table by `ingest_stub_slice` when `file` is `Some`.
539    #[serde(default)]
540    pub global_vars: Vec<(Arc<str>, Union)>,
541    /// The first namespace declared in this file (e.g. `"App\\Service"`).
542    /// Populated by `DefinitionCollector`; ingested into the salsa db's
543    /// `file_namespaces` table by `ingest_stub_slice` when `file` is `Some`.
544    #[serde(default)]
545    pub namespace: Option<Arc<str>>,
546    /// `use` alias map for this file: alias → FQCN.
547    /// Populated by `DefinitionCollector`; ingested into the salsa db's
548    /// `file_imports` table by `ingest_stub_slice` when `file` is `Some`.
549    #[serde(default)]
550    pub imports: std::collections::HashMap<String, String>,
551}
552
553// ---------------------------------------------------------------------------
554// Param list deduplication
555// ---------------------------------------------------------------------------
556
557use std::sync::Mutex;
558
559/// Global cache of canonical Arc<[FnParam]> instances for deduplication.
560/// Shared across all StubSlices to deduplicate vendor code with millions of
561/// methods that often have identical parameter lists.
562static PARAM_DEDUP_CACHE: std::sync::OnceLock<Mutex<Vec<Arc<[FnParam]>>>> =
563    std::sync::OnceLock::new();
564
565/// Deduplicate parameter lists across all methods and functions in a StubSlice.
566/// Many PHP framework methods share identical parameter lists (e.g., thousands
567/// of `(string $arg, array $opts)` signatures). This function groups identical
568/// param lists globally (across all slices processed so far) and replaces them
569/// with Arc<[FnParam]> pointers to shared allocations.
570///
571/// Expected memory savings: 100–150 MiB on cold start (vendor collection).
572pub fn deduplicate_params_in_slice(slice: &mut StubSlice) {
573    let cache = PARAM_DEDUP_CACHE.get_or_init(|| Mutex::new(Vec::new()));
574    let mut canonical_params = cache.lock().unwrap();
575
576    // Helper to find or insert a param list in the global cache
577    let mut deduplicate = |params: &mut Arc<[FnParam]>| {
578        // Check if this param list already exists in our global cache
579        for existing in canonical_params.iter() {
580            if existing.as_ref() == params.as_ref() {
581                // Found a match, replace with the cached Arc
582                *params = existing.clone();
583                return;
584            }
585        }
586        // Not found, add this as a new canonical param list
587        canonical_params.push(params.clone());
588    };
589
590    // Deduplicate method params in all classes
591    for cls in &mut slice.classes {
592        for method in cls.own_methods.values_mut() {
593            deduplicate(&mut Arc::make_mut(method).params);
594        }
595    }
596
597    // Deduplicate method params in all interfaces
598    for iface in &mut slice.interfaces {
599        for method in iface.own_methods.values_mut() {
600            deduplicate(&mut Arc::make_mut(method).params);
601        }
602    }
603
604    // Deduplicate method params in all traits
605    for tr in &mut slice.traits {
606        for method in tr.own_methods.values_mut() {
607            deduplicate(&mut Arc::make_mut(method).params);
608        }
609    }
610
611    // Deduplicate method params in all enums
612    for en in &mut slice.enums {
613        for method in en.own_methods.values_mut() {
614            deduplicate(&mut Arc::make_mut(method).params);
615        }
616    }
617
618    // Deduplicate function params
619    for func in &mut slice.functions {
620        deduplicate(&mut func.params);
621    }
622}