Skip to main content

mir_codebase/
storage.rs

1use std::sync::Arc;
2
3use indexmap::IndexMap;
4use mir_types::Union;
5use serde::{Deserialize, Serialize};
6
7// ---------------------------------------------------------------------------
8// Interned common types for deduplication
9// ---------------------------------------------------------------------------
10
11/// Interned Union types for common parameter/property types.
12/// Deduplicates allocations when thousands of parameters share types like `string`, `int`, etc.
13mod interned_types {
14    use super::*;
15    use std::sync::OnceLock;
16
17    fn intern_string() -> Arc<Union> {
18        Arc::new(Union::string())
19    }
20
21    fn intern_int() -> Arc<Union> {
22        Arc::new(Union::int())
23    }
24
25    fn intern_float() -> Arc<Union> {
26        Arc::new(Union::float())
27    }
28
29    fn intern_bool() -> Arc<Union> {
30        Arc::new(Union::bool())
31    }
32
33    fn intern_mixed() -> Arc<Union> {
34        Arc::new(Union::mixed())
35    }
36
37    fn intern_null() -> Arc<Union> {
38        Arc::new(Union::null())
39    }
40
41    fn intern_void() -> Arc<Union> {
42        Arc::new(Union::void())
43    }
44
45    static STRING: OnceLock<Arc<Union>> = OnceLock::new();
46    static INT: OnceLock<Arc<Union>> = OnceLock::new();
47    static FLOAT: OnceLock<Arc<Union>> = OnceLock::new();
48    static BOOL: OnceLock<Arc<Union>> = OnceLock::new();
49    static MIXED: OnceLock<Arc<Union>> = OnceLock::new();
50    static NULL: OnceLock<Arc<Union>> = OnceLock::new();
51    static VOID: OnceLock<Arc<Union>> = OnceLock::new();
52
53    pub fn string() -> Arc<Union> {
54        STRING.get_or_init(intern_string).clone()
55    }
56
57    pub fn int() -> Arc<Union> {
58        INT.get_or_init(intern_int).clone()
59    }
60
61    pub fn float() -> Arc<Union> {
62        FLOAT.get_or_init(intern_float).clone()
63    }
64
65    pub fn bool() -> Arc<Union> {
66        BOOL.get_or_init(intern_bool).clone()
67    }
68
69    pub fn mixed() -> Arc<Union> {
70        MIXED.get_or_init(intern_mixed).clone()
71    }
72
73    pub fn null() -> Arc<Union> {
74        NULL.get_or_init(intern_null).clone()
75    }
76
77    pub fn void() -> Arc<Union> {
78        VOID.get_or_init(intern_void).clone()
79    }
80
81    /// Try to intern a Union if it matches a common type, otherwise wrap in Arc.
82    pub fn intern_or_wrap(union: Union) -> Arc<Union> {
83        // Check if this is a single-atomic type that we intern
84        if union.types.len() == 1 && !union.possibly_undefined && !union.from_docblock {
85            match &union.types[0] {
86                mir_types::Atomic::TString => return string(),
87                mir_types::Atomic::TInt => return int(),
88                mir_types::Atomic::TFloat => return float(),
89                mir_types::Atomic::TBool => return bool(),
90                mir_types::Atomic::TMixed => return mixed(),
91                mir_types::Atomic::TNull => return null(),
92                mir_types::Atomic::TVoid => return void(),
93                _ => {}
94            }
95        }
96        Arc::new(union)
97    }
98}
99
100// ---------------------------------------------------------------------------
101// Shared primitives
102// ---------------------------------------------------------------------------
103
104#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize)]
105pub enum Visibility {
106    Public,
107    Protected,
108    Private,
109}
110
111impl Visibility {
112    pub fn is_at_least(&self, required: Visibility) -> bool {
113        *self <= required
114    }
115}
116
117impl std::fmt::Display for Visibility {
118    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
119        match self {
120            Visibility::Public => write!(f, "public"),
121            Visibility::Protected => write!(f, "protected"),
122            Visibility::Private => write!(f, "private"),
123        }
124    }
125}
126
127#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
128pub struct TemplateParam {
129    pub name: Arc<str>,
130    pub bound: Option<Union>,
131    /// The entity (class or function FQN) that declared this template param.
132    pub defining_entity: Arc<str>,
133    pub variance: mir_types::Variance,
134}
135
136#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
137pub struct FnParam {
138    pub name: Arc<str>,
139    /// Parameter type. Stored as `Option<Arc<Union>>` to enable deduplication of
140    /// common types across parameters. Many parameters share types like `string`,
141    /// `int`, `bool`, etc., so interning via Arc saves allocations.
142    #[serde(
143        deserialize_with = "deserialize_param_type",
144        serialize_with = "serialize_param_type"
145    )]
146    pub ty: Option<Arc<Union>>,
147    /// Whether this parameter has a default value. During analysis, defaults are
148    /// never used for their value — only for marking parameters as optional.
149    pub has_default: bool,
150    pub is_variadic: bool,
151    pub is_byref: bool,
152    pub is_optional: bool,
153}
154
155impl std::hash::Hash for FnParam {
156    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
157        self.name.hash(state);
158        self.has_default.hash(state);
159        self.is_variadic.hash(state);
160        self.is_byref.hash(state);
161        self.is_optional.hash(state);
162        if let Some(ty) = &self.ty {
163            // Hash the Arc pointer address. Since interned types reuse Arc allocations,
164            // parameters with the same type will have the same pointer.
165            (Arc::as_ptr(ty) as usize).hash(state);
166        } else {
167            0u8.hash(state);
168        }
169    }
170}
171
172// Serde helpers to transparently convert between Option<Union> and Option<Arc<Union>>
173fn deserialize_param_type<'de, D>(deserializer: D) -> Result<Option<Arc<Union>>, D::Error>
174where
175    D: serde::Deserializer<'de>,
176{
177    Option::<Union>::deserialize(deserializer).map(|opt| opt.map(interned_types::intern_or_wrap))
178}
179
180fn serialize_param_type<S>(value: &Option<Arc<Union>>, serializer: S) -> Result<S::Ok, S::Error>
181where
182    S: serde::Serializer,
183{
184    let opt = value.as_ref().map(|arc| (**arc).clone());
185    opt.serialize(serializer)
186}
187
188fn deserialize_return_type<'de, D>(deserializer: D) -> Result<Option<Arc<Union>>, D::Error>
189where
190    D: serde::Deserializer<'de>,
191{
192    Option::<Union>::deserialize(deserializer).map(|opt| opt.map(interned_types::intern_or_wrap))
193}
194
195fn serialize_return_type<S>(value: &Option<Arc<Union>>, serializer: S) -> Result<S::Ok, S::Error>
196where
197    S: serde::Serializer,
198{
199    let opt = value.as_ref().map(|arc| (**arc).clone());
200    opt.serialize(serializer)
201}
202
203fn deserialize_params<'de, D>(deserializer: D) -> Result<Arc<[FnParam]>, D::Error>
204where
205    D: serde::Deserializer<'de>,
206{
207    Vec::<FnParam>::deserialize(deserializer).map(|v| Arc::from(v.into_boxed_slice()))
208}
209
210fn serialize_params<S>(value: &Arc<[FnParam]>, serializer: S) -> Result<S::Ok, S::Error>
211where
212    S: serde::Serializer,
213{
214    value.as_ref().serialize(serializer)
215}
216
217/// Helper to wrap Option<Union> in interned Arc<Union>.
218pub fn wrap_param_type(ty: Option<Union>) -> Option<Arc<Union>> {
219    ty.map(interned_types::intern_or_wrap)
220}
221
222/// Helper to wrap return type Option<Union> in interned Arc<Union>.
223pub fn wrap_return_type(ty: Option<Union>) -> Option<Arc<Union>> {
224    ty.map(interned_types::intern_or_wrap)
225}
226
227// ---------------------------------------------------------------------------
228// Location — file + pre-computed line/col span
229// ---------------------------------------------------------------------------
230
231/// Declaration location.
232///
233/// Columns are 0-based Unicode scalar value (code-point) counts, equivalent to
234/// LSP `utf-32` position encoding. Convert to UTF-16 code units at the LSP
235/// boundary for clients that do not advertise `utf-32` support.
236#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
237pub struct Location {
238    pub file: Arc<str>,
239    /// 1-based start line.
240    pub line: u32,
241    /// 1-based end line (inclusive). Equal to `line` for single-line spans.
242    pub line_end: u32,
243    /// 0-based Unicode code-point column of the span start.
244    pub col_start: u16,
245    /// 0-based Unicode code-point column of the span end (exclusive).
246    pub col_end: u16,
247}
248
249impl Location {
250    pub fn new(file: Arc<str>, line: u32, line_end: u32, col_start: u16, col_end: u16) -> Self {
251        Self {
252            file,
253            line,
254            line_end,
255            col_start,
256            col_end,
257        }
258    }
259}
260
261// ---------------------------------------------------------------------------
262// Assertion — `@psalm-assert`, `@psalm-assert-if-true`, etc.
263// ---------------------------------------------------------------------------
264
265#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
266pub enum AssertionKind {
267    Assert,
268    AssertIfTrue,
269    AssertIfFalse,
270}
271
272#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
273pub struct Assertion {
274    pub kind: AssertionKind,
275    pub param: Arc<str>,
276    pub ty: Union,
277}
278
279// ---------------------------------------------------------------------------
280// MethodStorage
281// ---------------------------------------------------------------------------
282
283#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
284pub struct MethodStorage {
285    pub name: Arc<str>,
286    pub fqcn: Arc<str>,
287    #[serde(
288        deserialize_with = "deserialize_params",
289        serialize_with = "serialize_params"
290    )]
291    pub params: Arc<[FnParam]>,
292    /// Type from annotation (`@return` / native type hint). `None` means unannotated.
293    /// Stored as `Option<Arc<Union>>` to enable deduplication of common return types
294    /// (e.g., `void`, `string`, `mixed`, `bool`) across thousands of methods.
295    #[serde(
296        deserialize_with = "deserialize_return_type",
297        serialize_with = "serialize_return_type"
298    )]
299    pub return_type: Option<Arc<Union>>,
300    /// Type inferred from body analysis (filled in during pass 2).
301    pub inferred_return_type: Option<Union>,
302    pub visibility: Visibility,
303    pub is_static: bool,
304    pub is_abstract: bool,
305    pub is_final: bool,
306    pub is_constructor: bool,
307    pub template_params: Vec<TemplateParam>,
308    pub assertions: Vec<Assertion>,
309    pub throws: Vec<Arc<str>>,
310    pub deprecated: Option<Arc<str>>,
311    pub is_internal: bool,
312    pub is_pure: bool,
313    pub location: Option<Location>,
314}
315
316impl MethodStorage {
317    pub fn effective_return_type(&self) -> Option<&Union> {
318        self.return_type
319            .as_deref()
320            .or(self.inferred_return_type.as_ref())
321    }
322}
323
324// ---------------------------------------------------------------------------
325// PropertyStorage
326// ---------------------------------------------------------------------------
327
328#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
329pub struct PropertyStorage {
330    pub name: Arc<str>,
331    pub ty: Option<Union>,
332    pub inferred_ty: Option<Union>,
333    pub visibility: Visibility,
334    pub is_static: bool,
335    pub is_readonly: bool,
336    pub default: Option<Union>,
337    pub location: Option<Location>,
338}
339
340// ---------------------------------------------------------------------------
341// ConstantStorage
342// ---------------------------------------------------------------------------
343
344#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
345pub struct ConstantStorage {
346    pub name: Arc<str>,
347    pub ty: Union,
348    pub visibility: Option<Visibility>,
349    #[serde(default)]
350    pub is_final: bool,
351    pub location: Option<Location>,
352}
353
354// ---------------------------------------------------------------------------
355// ClassStorage
356// ---------------------------------------------------------------------------
357
358#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
359pub struct ClassStorage {
360    pub fqcn: Arc<str>,
361    pub short_name: Arc<str>,
362    pub parent: Option<Arc<str>>,
363    pub interfaces: Vec<Arc<str>>,
364    pub traits: Vec<Arc<str>>,
365    pub own_methods: IndexMap<Arc<str>, Arc<MethodStorage>>,
366    pub own_properties: IndexMap<Arc<str>, PropertyStorage>,
367    pub own_constants: IndexMap<Arc<str>, ConstantStorage>,
368    #[serde(default)]
369    pub mixins: Vec<Arc<str>>,
370    pub template_params: Vec<TemplateParam>,
371    /// Type arguments from `@extends ParentClass<T1, T2>` — maps parent's template params to concrete types.
372    pub extends_type_args: Vec<Union>,
373    /// Type arguments from `@implements Interface<T1, T2>`.
374    #[serde(default)]
375    pub implements_type_args: Vec<(Arc<str>, Vec<Union>)>,
376    pub is_abstract: bool,
377    pub is_final: bool,
378    pub is_readonly: bool,
379    pub deprecated: Option<Arc<str>>,
380    pub is_internal: bool,
381    pub location: Option<Location>,
382    /// Type aliases declared on this class via `@psalm-type` / `@phpstan-type`.
383    #[serde(default)]
384    pub type_aliases: std::collections::HashMap<Arc<str>, Union>,
385    /// Raw import-type declarations (`(local_name, original_name, from_class)`) — resolved during finalization.
386    #[serde(default)]
387    pub pending_import_types: Vec<(Arc<str>, Arc<str>, Arc<str>)>,
388}
389
390impl ClassStorage {
391    pub fn get_method(&self, name: &str) -> Option<&MethodStorage> {
392        // PHP method names are case-insensitive; caller should pass lowercase name.
393        // Only searches own_methods — inherited method resolution is done by
394        // `db::lookup_method_in_chain`.
395        self.own_methods.get(name).map(Arc::as_ref).or_else(|| {
396            self.own_methods
397                .iter()
398                .find(|(k, _)| k.as_ref().eq_ignore_ascii_case(name))
399                .map(|(_, v)| v.as_ref())
400        })
401    }
402
403    pub fn get_property(&self, name: &str) -> Option<&PropertyStorage> {
404        self.own_properties.get(name)
405    }
406}
407
408// ---------------------------------------------------------------------------
409// InterfaceStorage
410// ---------------------------------------------------------------------------
411
412#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
413pub struct InterfaceStorage {
414    pub fqcn: Arc<str>,
415    pub short_name: Arc<str>,
416    pub extends: Vec<Arc<str>>,
417    pub own_methods: IndexMap<Arc<str>, Arc<MethodStorage>>,
418    pub own_constants: IndexMap<Arc<str>, ConstantStorage>,
419    pub template_params: Vec<TemplateParam>,
420    pub location: Option<Location>,
421}
422
423// ---------------------------------------------------------------------------
424// TraitStorage
425// ---------------------------------------------------------------------------
426
427#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
428pub struct TraitStorage {
429    pub fqcn: Arc<str>,
430    pub short_name: Arc<str>,
431    pub own_methods: IndexMap<Arc<str>, Arc<MethodStorage>>,
432    pub own_properties: IndexMap<Arc<str>, PropertyStorage>,
433    pub own_constants: IndexMap<Arc<str>, ConstantStorage>,
434    pub template_params: Vec<TemplateParam>,
435    /// Traits used by this trait (`use OtherTrait;` inside a trait body).
436    pub traits: Vec<Arc<str>>,
437    pub location: Option<Location>,
438    /// `@psalm-require-extends` / `@phpstan-require-extends` — FQCNs that using classes must extend.
439    #[serde(default)]
440    pub require_extends: Vec<Arc<str>>,
441    /// `@psalm-require-implements` / `@phpstan-require-implements` — FQCNs that using classes must implement.
442    #[serde(default)]
443    pub require_implements: Vec<Arc<str>>,
444}
445
446// ---------------------------------------------------------------------------
447// EnumStorage
448// ---------------------------------------------------------------------------
449
450#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
451pub struct EnumCaseStorage {
452    pub name: Arc<str>,
453    pub value: Option<Union>,
454    pub location: Option<Location>,
455}
456
457#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
458pub struct EnumStorage {
459    pub fqcn: Arc<str>,
460    pub short_name: Arc<str>,
461    pub scalar_type: Option<Union>,
462    pub interfaces: Vec<Arc<str>>,
463    pub cases: IndexMap<Arc<str>, EnumCaseStorage>,
464    pub own_methods: IndexMap<Arc<str>, Arc<MethodStorage>>,
465    pub own_constants: IndexMap<Arc<str>, ConstantStorage>,
466    pub location: Option<Location>,
467}
468
469// ---------------------------------------------------------------------------
470// FunctionStorage
471// ---------------------------------------------------------------------------
472
473#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
474pub struct FunctionStorage {
475    pub fqn: Arc<str>,
476    pub short_name: Arc<str>,
477    #[serde(
478        deserialize_with = "deserialize_params",
479        serialize_with = "serialize_params"
480    )]
481    pub params: Arc<[FnParam]>,
482    /// Type from annotation (`@return` / native type hint). `None` means unannotated.
483    /// Stored as `Option<Arc<Union>>` to enable deduplication of common return types.
484    #[serde(
485        deserialize_with = "deserialize_return_type",
486        serialize_with = "serialize_return_type"
487    )]
488    pub return_type: Option<Arc<Union>>,
489    pub inferred_return_type: Option<Union>,
490    pub template_params: Vec<TemplateParam>,
491    pub assertions: Vec<Assertion>,
492    pub throws: Vec<Arc<str>>,
493    pub deprecated: Option<Arc<str>>,
494    pub is_pure: bool,
495    pub location: Option<Location>,
496}
497
498impl FunctionStorage {
499    pub fn effective_return_type(&self) -> Option<&Union> {
500        self.return_type
501            .as_deref()
502            .or(self.inferred_return_type.as_ref())
503    }
504}
505
506// ---------------------------------------------------------------------------
507// StubSlice — serializable bundle of definitions from one extension's stubs
508// ---------------------------------------------------------------------------
509
510/// A snapshot of all PHP definitions contributed by a single stub file set.
511///
512/// Produced by `mir-stubs-gen` at code-generation time and deserialized at
513/// runtime to ingest definitions into the salsa db via
514/// `MirDatabase::ingest_stub_slice`.
515#[derive(Debug, Clone, Default, serde::Serialize, serde::Deserialize)]
516pub struct StubSlice {
517    pub classes: Vec<ClassStorage>,
518    pub interfaces: Vec<InterfaceStorage>,
519    pub traits: Vec<TraitStorage>,
520    pub enums: Vec<EnumStorage>,
521    pub functions: Vec<FunctionStorage>,
522    #[serde(default)]
523    pub constants: Vec<(Arc<str>, Union)>,
524    /// Source file this slice was collected from. `None` for bundled stub slices
525    /// that were pre-computed and are not tied to a specific on-disk file.
526    #[serde(default)]
527    pub file: Option<Arc<str>>,
528    /// Types of `@var`-annotated global variables collected from this file.
529    /// Populated by `DefinitionCollector`; ingested into the salsa db's
530    /// `global_vars` table by `ingest_stub_slice` when `file` is `Some`.
531    #[serde(default)]
532    pub global_vars: Vec<(Arc<str>, Union)>,
533    /// The first namespace declared in this file (e.g. `"App\\Service"`).
534    /// Populated by `DefinitionCollector`; ingested into the salsa db's
535    /// `file_namespaces` table by `ingest_stub_slice` when `file` is `Some`.
536    #[serde(default)]
537    pub namespace: Option<Arc<str>>,
538    /// `use` alias map for this file: alias → FQCN.
539    /// Populated by `DefinitionCollector`; ingested into the salsa db's
540    /// `file_imports` table by `ingest_stub_slice` when `file` is `Some`.
541    #[serde(default)]
542    pub imports: std::collections::HashMap<String, String>,
543}
544
545// ---------------------------------------------------------------------------
546// Param list deduplication
547// ---------------------------------------------------------------------------
548
549use std::sync::Mutex;
550
551/// Global cache of canonical Arc<[FnParam]> instances for deduplication.
552/// Shared across all StubSlices to deduplicate vendor code with millions of
553/// methods that often have identical parameter lists.
554static PARAM_DEDUP_CACHE: std::sync::OnceLock<Mutex<Vec<Arc<[FnParam]>>>> =
555    std::sync::OnceLock::new();
556
557/// Deduplicate parameter lists across all methods and functions in a StubSlice.
558/// Many PHP framework methods share identical parameter lists (e.g., thousands
559/// of `(string $arg, array $opts)` signatures). This function groups identical
560/// param lists globally (across all slices processed so far) and replaces them
561/// with Arc<[FnParam]> pointers to shared allocations.
562///
563/// Expected memory savings: 100–150 MiB on cold start (vendor collection).
564pub fn deduplicate_params_in_slice(slice: &mut StubSlice) {
565    let cache = PARAM_DEDUP_CACHE.get_or_init(|| Mutex::new(Vec::new()));
566    let mut canonical_params = cache.lock().expect("param dedup cache poisoned");
567
568    // Helper to find or insert a param list in the global cache
569    let mut deduplicate = |params: &mut Arc<[FnParam]>| {
570        // Check if this param list already exists in our global cache
571        for existing in canonical_params.iter() {
572            if existing.as_ref() == params.as_ref() {
573                // Found a match, replace with the cached Arc
574                *params = existing.clone();
575                return;
576            }
577        }
578        // Not found, add this as a new canonical param list
579        canonical_params.push(params.clone());
580    };
581
582    // Deduplicate method params in all classes
583    for cls in &mut slice.classes {
584        for method in cls.own_methods.values_mut() {
585            deduplicate(&mut Arc::make_mut(method).params);
586        }
587    }
588
589    // Deduplicate method params in all interfaces
590    for iface in &mut slice.interfaces {
591        for method in iface.own_methods.values_mut() {
592            deduplicate(&mut Arc::make_mut(method).params);
593        }
594    }
595
596    // Deduplicate method params in all traits
597    for tr in &mut slice.traits {
598        for method in tr.own_methods.values_mut() {
599            deduplicate(&mut Arc::make_mut(method).params);
600        }
601    }
602
603    // Deduplicate method params in all enums
604    for en in &mut slice.enums {
605        for method in en.own_methods.values_mut() {
606            deduplicate(&mut Arc::make_mut(method).params);
607        }
608    }
609
610    // Deduplicate function params
611    for func in &mut slice.functions {
612        deduplicate(&mut func.params);
613    }
614}