Skip to main content

mir_codebase/
storage.rs

1use std::sync::Arc;
2
3use indexmap::IndexMap;
4use mir_types::{Location, Name, Type};
5use rustc_hash::FxHashMap;
6use serde::{Deserialize, Serialize};
7
8// ---------------------------------------------------------------------------
9// Interned common types for deduplication
10// ---------------------------------------------------------------------------
11
12/// Interned Type types for common parameter/property types.
13/// Deduplicates allocations when thousands of parameters share types like `string`, `int`, etc.
14mod interned_types {
15    use super::*;
16    use std::sync::OnceLock;
17
18    fn intern_string() -> Arc<Type> {
19        Arc::new(Type::string())
20    }
21
22    fn intern_int() -> Arc<Type> {
23        Arc::new(Type::int())
24    }
25
26    fn intern_float() -> Arc<Type> {
27        Arc::new(Type::float())
28    }
29
30    fn intern_bool() -> Arc<Type> {
31        Arc::new(Type::bool())
32    }
33
34    fn intern_mixed() -> Arc<Type> {
35        Arc::new(Type::mixed())
36    }
37
38    fn intern_null() -> Arc<Type> {
39        Arc::new(Type::null())
40    }
41
42    fn intern_void() -> Arc<Type> {
43        Arc::new(Type::void())
44    }
45
46    static STRING: OnceLock<Arc<Type>> = OnceLock::new();
47    static INT: OnceLock<Arc<Type>> = OnceLock::new();
48    static FLOAT: OnceLock<Arc<Type>> = OnceLock::new();
49    static BOOL: OnceLock<Arc<Type>> = OnceLock::new();
50    static MIXED: OnceLock<Arc<Type>> = OnceLock::new();
51    static NULL: OnceLock<Arc<Type>> = OnceLock::new();
52    static VOID: OnceLock<Arc<Type>> = OnceLock::new();
53
54    pub fn string() -> Arc<Type> {
55        STRING.get_or_init(intern_string).clone()
56    }
57
58    pub fn int() -> Arc<Type> {
59        INT.get_or_init(intern_int).clone()
60    }
61
62    pub fn float() -> Arc<Type> {
63        FLOAT.get_or_init(intern_float).clone()
64    }
65
66    pub fn bool() -> Arc<Type> {
67        BOOL.get_or_init(intern_bool).clone()
68    }
69
70    pub fn mixed() -> Arc<Type> {
71        MIXED.get_or_init(intern_mixed).clone()
72    }
73
74    pub fn null() -> Arc<Type> {
75        NULL.get_or_init(intern_null).clone()
76    }
77
78    pub fn void() -> Arc<Type> {
79        VOID.get_or_init(intern_void).clone()
80    }
81
82    /// Global content-keyed `Arc<Type>` interner. Any structurally-identical
83    /// Type is shared as a single Arc across the session.
84    ///
85    /// Why: PHP codebases re-declare a small set of type shapes thousands of
86    /// times — `string|null` return types, `int` params, `array<string, mixed>`
87    /// property types. Without interning, each declaration allocates its own
88    /// `Arc<Type>` plus the inline `SmallVec<[Atomic; 2]>` and any boxed
89    /// `Atomic` payloads. With interning, only the first occurrence allocates.
90    ///
91    /// Trade-off: every `intern_or_wrap` call hashes + does one DashMap lookup.
92    /// Hashing a `Type` is cheap (SmallVec, small atomics) — measured cost is
93    /// well below the alloc-savings benefit on real workloads.
94    static GLOBAL_UNION_INTERN: std::sync::OnceLock<dashmap::DashMap<Type, Arc<Type>>> =
95        std::sync::OnceLock::new();
96
97    fn global_intern_table() -> &'static dashmap::DashMap<Type, Arc<Type>> {
98        GLOBAL_UNION_INTERN.get_or_init(dashmap::DashMap::default)
99    }
100
101    /// Try to intern a Type if it matches a common type, otherwise wrap in Arc.
102    pub fn intern_or_wrap(union: Type) -> Arc<Type> {
103        // Fast path 1: single-atomic scalar — covered by `OnceLock` constants.
104        // Avoids any DashMap traffic for the most common case.
105        if union.types.len() == 1 && !union.possibly_undefined && !union.from_docblock {
106            match &union.types[0] {
107                mir_types::Atomic::TString => return string(),
108                mir_types::Atomic::TInt => return int(),
109                mir_types::Atomic::TFloat => return float(),
110                mir_types::Atomic::TBool => return bool(),
111                mir_types::Atomic::TMixed => return mixed(),
112                mir_types::Atomic::TNull => return null(),
113                mir_types::Atomic::TVoid => return void(),
114                _ => {}
115            }
116        }
117        // Fast path 2: empty Type — also a common case (e.g. unresolved
118        // return type). Don't pollute the intern table with these.
119        if union.types.is_empty() {
120            return Arc::new(union);
121        }
122        // Global path: dedup against any previously-seen identical Type.
123        let table = global_intern_table();
124        if let Some(existing) = table.get(&union) {
125            return Arc::clone(existing.value());
126        }
127        let arc = Arc::new(union.clone());
128        // `insert` semantics: if a parallel thread beat us, its Arc wins.
129        // The lookup-before-insert race is benign — both Arcs are content-
130        // equal — but we still want to share the canonical one going forward.
131        match table.entry(union) {
132            dashmap::mapref::entry::Entry::Occupied(o) => Arc::clone(o.get()),
133            dashmap::mapref::entry::Entry::Vacant(v) => {
134                v.insert(Arc::clone(&arc));
135                arc
136            }
137        }
138    }
139}
140
141// ---------------------------------------------------------------------------
142// Shared primitives
143// ---------------------------------------------------------------------------
144
145#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize)]
146pub enum Visibility {
147    Public,
148    Protected,
149    Private,
150}
151
152impl Visibility {
153    pub fn is_at_least(&self, required: Visibility) -> bool {
154        *self <= required
155    }
156}
157
158impl std::fmt::Display for Visibility {
159    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
160        match self {
161            Visibility::Public => write!(f, "public"),
162            Visibility::Protected => write!(f, "protected"),
163            Visibility::Private => write!(f, "private"),
164        }
165    }
166}
167
168fn serialize_template_bound<S>(value: &Option<Arc<Type>>, serializer: S) -> Result<S::Ok, S::Error>
169where
170    S: serde::Serializer,
171{
172    value.as_deref().serialize(serializer)
173}
174
175fn deserialize_template_bound<'de, D>(deserializer: D) -> Result<Option<Arc<Type>>, D::Error>
176where
177    D: serde::Deserializer<'de>,
178{
179    Option::<Type>::deserialize(deserializer).map(|opt| opt.map(interned_types::intern_or_wrap))
180}
181
182#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
183pub struct TemplateParam {
184    pub name: Name,
185    /// Declared upper bound, e.g. `@template T of Traversable`.
186    /// Stored as `Option<Arc<Type>>` so common bounds (e.g. `object`, `mixed`)
187    /// are deduplicated across all template params via the global intern table.
188    #[serde(
189        serialize_with = "serialize_template_bound",
190        deserialize_with = "deserialize_template_bound"
191    )]
192    pub bound: Option<Arc<Type>>,
193    /// The entity (class or function FQN) that declared this template param.
194    pub defining_entity: Name,
195    pub variance: mir_types::Variance,
196}
197
198#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
199pub struct FnParam {
200    pub name: Name,
201    /// Parameter type. Stored as `Option<Arc<Type>>` to enable deduplication of
202    /// common types across parameters. Many parameters share types like `string`,
203    /// `int`, `bool`, etc., so interning via Arc saves allocations.
204    #[serde(
205        deserialize_with = "deserialize_param_type",
206        serialize_with = "serialize_param_type"
207    )]
208    pub ty: Option<Arc<Type>>,
209    /// Whether this parameter has a default value. During analysis, defaults are
210    /// never used for their value — only for marking parameters as optional.
211    pub has_default: bool,
212    pub is_variadic: bool,
213    pub is_byref: bool,
214    pub is_optional: bool,
215}
216
217impl std::hash::Hash for FnParam {
218    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
219        self.name.hash(state);
220        self.has_default.hash(state);
221        self.is_variadic.hash(state);
222        self.is_byref.hash(state);
223        self.is_optional.hash(state);
224        // Hash the type value (not the Arc pointer) so that two FnParams with
225        // equal types (PartialEq) always produce the same hash, even when they
226        // are backed by different Arc allocations.
227        self.ty.as_deref().hash(state);
228    }
229}
230
231// Serde helpers to transparently convert between Option<Type> and Option<Arc<Type>>
232fn deserialize_param_type<'de, D>(deserializer: D) -> Result<Option<Arc<Type>>, D::Error>
233where
234    D: serde::Deserializer<'de>,
235{
236    Option::<Type>::deserialize(deserializer).map(|opt| opt.map(interned_types::intern_or_wrap))
237}
238
239fn serialize_param_type<S>(value: &Option<Arc<Type>>, serializer: S) -> Result<S::Ok, S::Error>
240where
241    S: serde::Serializer,
242{
243    let opt = value.as_ref().map(|arc| (**arc).clone());
244    opt.serialize(serializer)
245}
246
247fn deserialize_return_type<'de, D>(deserializer: D) -> Result<Option<Arc<Type>>, D::Error>
248where
249    D: serde::Deserializer<'de>,
250{
251    Option::<Type>::deserialize(deserializer).map(|opt| opt.map(interned_types::intern_or_wrap))
252}
253
254fn serialize_return_type<S>(value: &Option<Arc<Type>>, serializer: S) -> Result<S::Ok, S::Error>
255where
256    S: serde::Serializer,
257{
258    let opt = value.as_ref().map(|arc| (**arc).clone());
259    opt.serialize(serializer)
260}
261
262fn deserialize_params<'de, D>(deserializer: D) -> Result<Arc<[FnParam]>, D::Error>
263where
264    D: serde::Deserializer<'de>,
265{
266    Vec::<FnParam>::deserialize(deserializer).map(|v| Arc::from(v.into_boxed_slice()))
267}
268
269fn default_imports() -> Arc<FxHashMap<Name, Name>> {
270    Arc::new(FxHashMap::default())
271}
272
273/// Deserialize imports map. Supports both new (Name-keyed) and legacy
274/// (String-keyed) on-disk formats — older `cache.bin` files have plain
275/// `HashMap<String, String>`. Either way, we intern at load time so the
276/// in-memory representation is always `Arc<FxHashMap<Name, Name>>`.
277fn deserialize_imports<'de, D>(deserializer: D) -> Result<Arc<FxHashMap<Name, Name>>, D::Error>
278where
279    D: serde::Deserializer<'de>,
280{
281    let raw = FxHashMap::<String, String>::deserialize(deserializer)?;
282    let mut out: FxHashMap<Name, Name> =
283        FxHashMap::with_capacity_and_hasher(raw.len(), Default::default());
284    for (k, v) in raw {
285        out.insert(Name::new(&k), Name::new(&v));
286    }
287    Ok(Arc::new(out))
288}
289
290/// Serialize imports as the legacy `HashMap<String, String>` shape so disk
291/// caches written by this version remain compatible with readers that haven't
292/// been recompiled yet (and vice-versa).
293fn serialize_imports<S>(
294    value: &Arc<FxHashMap<Name, Name>>,
295    serializer: S,
296) -> Result<S::Ok, S::Error>
297where
298    S: serde::Serializer,
299{
300    use serde::ser::SerializeMap;
301    let mut map = serializer.serialize_map(Some(value.len()))?;
302    for (k, v) in value.iter() {
303        map.serialize_entry(k.as_str(), v.as_str())?;
304    }
305    map.end()
306}
307
308fn serialize_params<S>(value: &Arc<[FnParam]>, serializer: S) -> Result<S::Ok, S::Error>
309where
310    S: serde::Serializer,
311{
312    value.as_ref().serialize(serializer)
313}
314
315/// Helper to wrap Option<Type> in interned Arc<Type>.
316pub fn wrap_param_type(ty: Option<Type>) -> Option<Arc<Type>> {
317    ty.map(interned_types::intern_or_wrap)
318}
319
320/// Helper to wrap return type Option<Type> in interned Arc<Type>.
321pub fn wrap_return_type(ty: Option<Type>) -> Option<Arc<Type>> {
322    ty.map(interned_types::intern_or_wrap)
323}
324
325/// Helper to wrap a `PropertyDef` type field (`ty`/`inferred_ty`/`default`) in
326/// an interned `Arc<Type>`, deduplicating common property types via the global
327/// pool. See [`PropertyDef`].
328pub fn wrap_property_type(ty: Option<Type>) -> Option<Arc<Type>> {
329    ty.map(interned_types::intern_or_wrap)
330}
331
332/// Helper to wrap a `TemplateParam.bound` in an interned `Arc<Type>`.
333pub fn wrap_template_bound(ty: Option<Type>) -> Option<Arc<Type>> {
334    ty.map(interned_types::intern_or_wrap)
335}
336
337/// Wrap a variable type in an interned `Arc<Type>`. Use instead of
338/// `Arc::new(ty)` at `FlowState::set_var` and parameter-init sites so that
339/// common scalars (string, int, bool, null, mixed) share a static Arc rather
340/// than allocating a fresh one per assignment.
341pub fn wrap_var_type(ty: Type) -> Arc<Type> {
342    interned_types::intern_or_wrap(ty)
343}
344
345// ---------------------------------------------------------------------------
346// Assertion — `@psalm-assert`, `@psalm-assert-if-true`, etc.
347// ---------------------------------------------------------------------------
348
349#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
350pub enum AssertionKind {
351    Assert,
352    AssertIfTrue,
353    AssertIfFalse,
354}
355
356#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
357pub struct Assertion {
358    pub kind: AssertionKind,
359    pub param: Arc<str>,
360    pub ty: Type,
361}
362
363// ---------------------------------------------------------------------------
364// MethodDef
365// ---------------------------------------------------------------------------
366
367#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
368pub struct MethodDef {
369    pub name: Arc<str>,
370    pub fqcn: Arc<str>,
371    #[serde(
372        deserialize_with = "deserialize_params",
373        serialize_with = "serialize_params"
374    )]
375    pub params: Arc<[FnParam]>,
376    /// Type from annotation (`@return` / native type hint). `None` means unannotated.
377    /// Stored as `Option<Arc<Type>>` to enable deduplication of common return types
378    /// (e.g., `void`, `string`, `mixed`, `bool`) across thousands of methods.
379    #[serde(
380        deserialize_with = "deserialize_return_type",
381        serialize_with = "serialize_return_type"
382    )]
383    pub return_type: Option<Arc<Type>>,
384    /// Type inferred from body analysis. Stored as `Option<Arc<Type>>` (8 B) rather
385    /// than inline `Option<Type>` (176 B, no niche) — inference is now demand-driven
386    /// via salsa (`inferred_*_return_type_demand`), so this field is a rarely/never
387    /// populated fallback; shrinking it saves ~168 B on every MethodDef.
388    #[serde(
389        deserialize_with = "deserialize_return_type",
390        serialize_with = "serialize_return_type"
391    )]
392    pub inferred_return_type: Option<Arc<Type>>,
393    pub visibility: Visibility,
394    pub is_static: bool,
395    pub is_abstract: bool,
396    pub is_final: bool,
397    pub is_constructor: bool,
398    pub template_params: Vec<TemplateParam>,
399    pub assertions: Vec<Assertion>,
400    pub throws: Vec<Arc<str>>,
401    pub deprecated: Option<Arc<str>>,
402    pub is_internal: bool,
403    pub is_pure: bool,
404    /// `@no-named-arguments` — callers must not use named argument syntax.
405    #[serde(default)]
406    pub no_named_arguments: bool,
407    /// True when the method has the `#[Override]` PHP attribute.
408    #[serde(default)]
409    pub is_override: bool,
410    pub location: Option<Location>,
411    /// Plain-text description from the docblock (text before `@tag` lines).
412    /// Used for hover info.
413    #[serde(default)]
414    pub docstring: Option<Arc<str>>,
415    /// True for methods added via `@method` docblock annotations. Virtual
416    /// methods must not be required as concrete interface implementations.
417    #[serde(default)]
418    pub is_virtual: bool,
419    /// Parameters declared as taint sinks via `@taint-sink <kind> $param`.
420    /// Each entry is `(param_name_without_dollar, sink_kind_string)`.
421    #[serde(default)]
422    pub taint_sink_params: Vec<(Arc<str>, Arc<str>)>,
423    /// `@if-this-is Type` — the resolved constraint a receiver's type must
424    /// satisfy for this method to be callable. `None` when absent.
425    #[serde(default)]
426    pub if_this_is: Option<Arc<Type>>,
427}
428
429impl MethodDef {
430    pub fn effective_return_type(&self) -> Option<&Type> {
431        self.return_type
432            .as_deref()
433            .or(self.inferred_return_type.as_deref())
434    }
435}
436
437// ---------------------------------------------------------------------------
438// PropertyDef
439// ---------------------------------------------------------------------------
440
441#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
442pub struct PropertyDef {
443    pub name: Arc<str>,
444    /// Declared/inferred/default types. Stored as `Option<Arc<Type>>` (8 B)
445    /// rather than inline `Option<Type>` (176 B, no niche) and interned via the
446    /// global pool on construction/deserialization — common property types
447    /// (`string`, `int`, a shared class type) dedup to one allocation. Mirrors
448    /// `FnParam::ty`. On-disk format is unchanged (the serde helpers (de)serialize
449    /// the inner `Type` transparently).
450    #[serde(
451        deserialize_with = "deserialize_param_type",
452        serialize_with = "serialize_param_type"
453    )]
454    pub ty: Option<Arc<Type>>,
455    #[serde(
456        deserialize_with = "deserialize_param_type",
457        serialize_with = "serialize_param_type"
458    )]
459    pub inferred_ty: Option<Arc<Type>>,
460    pub visibility: Visibility,
461    pub is_static: bool,
462    pub is_readonly: bool,
463    #[serde(
464        deserialize_with = "deserialize_param_type",
465        serialize_with = "serialize_param_type"
466    )]
467    pub default: Option<Arc<Type>>,
468    pub location: Option<Location>,
469    /// `@deprecated` docblock annotation, if present.
470    #[serde(default)]
471    pub deprecated: Option<Arc<str>>,
472    /// True when the property declares a PHP native type hint (`public int $x`).
473    /// A property typed only via a `@var` docblock (or untyped entirely) is
474    /// `false`: PHP gives such a property an implicit `null` default, so it is
475    /// never "uninitialized" (no MissingConstructor) and accepts `null` on
476    /// assignment regardless of the advisory docblock type.
477    #[serde(default)]
478    pub has_native_type: bool,
479}
480
481// ---------------------------------------------------------------------------
482// ConstantDef
483// ---------------------------------------------------------------------------
484
485#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
486pub struct ConstantDef {
487    pub name: Arc<str>,
488    pub ty: Type,
489    pub visibility: Option<Visibility>,
490    #[serde(default)]
491    pub is_final: bool,
492    pub location: Option<Location>,
493    /// `@deprecated` docblock annotation, if present.
494    #[serde(default)]
495    pub deprecated: Option<Arc<str>>,
496}
497
498// ---------------------------------------------------------------------------
499// ClassDef
500// ---------------------------------------------------------------------------
501
502#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
503pub struct ClassDef {
504    pub fqcn: Arc<str>,
505    pub short_name: Arc<str>,
506    pub parent: Option<Arc<str>>,
507    pub interfaces: Vec<Arc<str>>,
508    pub traits: Vec<Arc<str>>,
509    pub own_methods: IndexMap<Arc<str>, Arc<MethodDef>>,
510    pub own_properties: IndexMap<Arc<str>, PropertyDef>,
511    pub own_constants: IndexMap<Arc<str>, ConstantDef>,
512    #[serde(default)]
513    pub mixins: Vec<Arc<str>>,
514    pub template_params: Vec<TemplateParam>,
515    /// Type arguments from `@extends ParentClass<T1, T2>` — maps parent's template params to concrete types.
516    pub extends_type_args: Vec<Type>,
517    /// Type arguments from `@implements Interface<T1, T2>`.
518    #[serde(default)]
519    pub implements_type_args: Vec<(Arc<str>, Vec<Type>)>,
520    pub is_abstract: bool,
521    pub is_final: bool,
522    pub is_readonly: bool,
523    pub deprecated: Option<Arc<str>>,
524    pub is_internal: bool,
525    /// Attribute target flags if this class has `#[Attribute]` annotation.
526    /// `None` = not an attribute class. The value is a bitmask of PHP's
527    /// `Attribute::TARGET_*` constants (e.g. `Attribute::TARGET_CLASS = 1`).
528    #[serde(default)]
529    pub attribute_flags: Option<i64>,
530    pub location: Option<Location>,
531    /// Per-`use` statement locations for each used trait: `(fqcn, location)` in
532    /// declaration order, parallel to `traits`.  Absent from older serialized
533    /// slices; defaults to empty.
534    #[serde(default)]
535    pub trait_use_locations: Vec<(Arc<str>, Location)>,
536    /// Type aliases declared on this class via `@psalm-type` / `@phpstan-type`.
537    #[serde(default)]
538    pub type_aliases: FxHashMap<Arc<str>, Type>,
539    /// Raw import-type declarations (`(local_name, original_name, from_class)`) — resolved during finalization.
540    #[serde(default)]
541    pub pending_import_types: Vec<(Arc<str>, Arc<str>, Arc<str>)>,
542    /// Trait precedence exclusions from `insteadof` declarations in this class's `use` blocks.
543    /// Maps method_name_lowercase → list of trait FQCNs whose version of the method is excluded.
544    /// E.g. `use A, B { B::hello insteadof A; }` stores `"hello" → ["A"]`.
545    #[serde(default)]
546    pub trait_insteadof: IndexMap<Arc<str>, Vec<Arc<str>>>,
547}
548
549impl ClassDef {
550    pub fn get_method(&self, name: &str) -> Option<&MethodDef> {
551        // PHP method names are case-insensitive; caller should pass lowercase name.
552        // Only searches own_methods — inherited method resolution is done by
553        // `db::lookup_method_in_chain`.
554        self.own_methods.get(name).map(Arc::as_ref).or_else(|| {
555            self.own_methods
556                .iter()
557                .find(|(k, _)| k.as_ref().eq_ignore_ascii_case(name))
558                .map(|(_, v)| v.as_ref())
559        })
560    }
561
562    pub fn get_property(&self, name: &str) -> Option<&PropertyDef> {
563        self.own_properties.get(name)
564    }
565}
566
567// ---------------------------------------------------------------------------
568// InterfaceDef
569// ---------------------------------------------------------------------------
570
571#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
572pub struct InterfaceDef {
573    pub fqcn: Arc<str>,
574    pub short_name: Arc<str>,
575    pub extends: Vec<Arc<str>>,
576    pub own_methods: IndexMap<Arc<str>, Arc<MethodDef>>,
577    pub own_constants: IndexMap<Arc<str>, ConstantDef>,
578    pub template_params: Vec<TemplateParam>,
579    pub location: Option<Location>,
580    /// `@deprecated` docblock annotation, if present.
581    #[serde(default)]
582    pub deprecated: Option<Arc<str>>,
583    /// Properties declared via `@property*` docblock annotations on the interface.
584    #[serde(default)]
585    pub own_properties: IndexMap<Arc<str>, PropertyDef>,
586    /// `@seal-properties` / `@psalm-seal-properties` — disallows undeclared property access.
587    #[serde(default)]
588    pub seal_properties: bool,
589}
590
591// ---------------------------------------------------------------------------
592// TraitDef
593// ---------------------------------------------------------------------------
594
595#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
596pub struct TraitDef {
597    pub fqcn: Arc<str>,
598    pub short_name: Arc<str>,
599    pub own_methods: IndexMap<Arc<str>, Arc<MethodDef>>,
600    pub own_properties: IndexMap<Arc<str>, PropertyDef>,
601    pub own_constants: IndexMap<Arc<str>, ConstantDef>,
602    pub template_params: Vec<TemplateParam>,
603    /// Traits used by this trait (`use OtherTrait;` inside a trait body).
604    pub traits: Vec<Arc<str>>,
605    pub location: Option<Location>,
606    /// `@psalm-require-extends` / `@phpstan-require-extends` — FQCNs that using classes must extend.
607    #[serde(default)]
608    pub require_extends: Vec<Arc<str>>,
609    /// `@psalm-require-implements` / `@phpstan-require-implements` — FQCNs that using classes must implement.
610    #[serde(default)]
611    pub require_implements: Vec<Arc<str>>,
612    /// `@deprecated` docblock annotation, if present.
613    #[serde(default)]
614    pub deprecated: Option<Arc<str>>,
615}
616
617// ---------------------------------------------------------------------------
618// EnumDef
619// ---------------------------------------------------------------------------
620
621#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
622pub struct EnumCaseDef {
623    pub name: Arc<str>,
624    pub value: Option<Type>,
625    pub location: Option<Location>,
626    /// `@deprecated` docblock annotation, if present.
627    #[serde(default)]
628    pub deprecated: Option<Arc<str>>,
629}
630
631#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
632pub struct EnumDef {
633    pub fqcn: Arc<str>,
634    pub short_name: Arc<str>,
635    pub scalar_type: Option<Type>,
636    pub interfaces: Vec<Arc<str>>,
637    pub cases: IndexMap<Arc<str>, EnumCaseDef>,
638    pub own_methods: IndexMap<Arc<str>, Arc<MethodDef>>,
639    pub own_constants: IndexMap<Arc<str>, ConstantDef>,
640    pub location: Option<Location>,
641}
642
643// ---------------------------------------------------------------------------
644// FunctionDef
645// ---------------------------------------------------------------------------
646
647#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
648pub struct FunctionDef {
649    pub fqn: Arc<str>,
650    pub short_name: Arc<str>,
651    #[serde(
652        deserialize_with = "deserialize_params",
653        serialize_with = "serialize_params"
654    )]
655    pub params: Arc<[FnParam]>,
656    /// Type from annotation (`@return` / native type hint). `None` means unannotated.
657    /// Stored as `Option<Arc<Type>>` to enable deduplication of common return types.
658    #[serde(
659        deserialize_with = "deserialize_return_type",
660        serialize_with = "serialize_return_type"
661    )]
662    pub return_type: Option<Arc<Type>>,
663    /// See `MethodDef::inferred_return_type` — `Option<Arc<Type>>` (8 B) for the
664    /// same demand-driven-inference reason.
665    #[serde(
666        deserialize_with = "deserialize_return_type",
667        serialize_with = "serialize_return_type"
668    )]
669    pub inferred_return_type: Option<Arc<Type>>,
670    pub template_params: Vec<TemplateParam>,
671    pub assertions: Vec<Assertion>,
672    pub throws: Vec<Arc<str>>,
673    pub deprecated: Option<Arc<str>>,
674    pub is_pure: bool,
675    /// `@no-named-arguments` — callers must not use named argument syntax.
676    #[serde(default)]
677    pub no_named_arguments: bool,
678    pub location: Option<Location>,
679    /// Plain-text description from the docblock (text before `@tag` lines).
680    /// Used for hover info.
681    #[serde(default)]
682    pub docstring: Option<Arc<str>>,
683    /// Parameters declared as taint sinks via `@taint-sink <kind> $param`.
684    /// Each entry is `(param_name_without_dollar, sink_kind_string)`.
685    #[serde(default)]
686    pub taint_sink_params: Vec<(Arc<str>, Arc<str>)>,
687}
688
689impl FunctionDef {
690    pub fn effective_return_type(&self) -> Option<&Type> {
691        self.return_type
692            .as_deref()
693            .or(self.inferred_return_type.as_deref())
694    }
695}
696
697// ---------------------------------------------------------------------------
698// StubSlice — serializable bundle of definitions from one extension's stubs
699// ---------------------------------------------------------------------------
700
701/// A snapshot of all PHP definitions contributed by a single stub file set.
702///
703/// Produced by `mir-stubs-gen` at code-generation time and deserialized at
704/// runtime to ingest definitions into the salsa db via
705/// `MirDatabase::ingest_stub_slice`.
706#[derive(Debug, Clone, Default, PartialEq, serde::Serialize, serde::Deserialize)]
707pub struct StubSlice {
708    pub classes: Vec<Arc<ClassDef>>,
709    pub interfaces: Vec<Arc<InterfaceDef>>,
710    pub traits: Vec<Arc<TraitDef>>,
711    pub enums: Vec<Arc<EnumDef>>,
712    pub functions: Vec<Arc<FunctionDef>>,
713    #[serde(default)]
714    pub constants: Vec<(Arc<str>, Type)>,
715    /// Source file this slice was collected from. `None` for bundled stub slices
716    /// that were pre-computed and are not tied to a specific on-disk file.
717    #[serde(default)]
718    pub file: Option<Arc<str>>,
719    /// Types of `@var`-annotated global variables collected from this file.
720    /// Populated by `DefinitionCollector`; ingested into the salsa db's
721    /// `global_vars` table by `ingest_stub_slice` when `file` is `Some`.
722    #[serde(default)]
723    pub global_vars: Vec<(Arc<str>, Type)>,
724    /// The first namespace declared in this file (e.g. `"App\\Service"`).
725    /// Populated by `DefinitionCollector`; ingested into the salsa db's
726    /// `file_namespaces` table by `ingest_stub_slice` when `file` is `Some`.
727    #[serde(default)]
728    pub namespace: Option<Arc<str>>,
729    /// `use` alias map for this file: alias → FQCN.
730    ///
731    /// Stored as `Arc<FxHashMap<Name, Name>>` so that `file_imports()`
732    /// returns a cheap Arc clone instead of deep-cloning the map on every
733    /// `resolve_name` call (which fires once per symbol reference in
734    /// Pass 2). `Name` keys/values shrink each entry from ~108 bytes
735    /// (two `String` headers + two heap allocs averaging ~30 chars) to
736    /// 16 bytes (two `Ustr` u64 handles); the global ustr interner holds
737    /// one copy of each unique alias / FQCN string for the whole session.
738    #[serde(
739        deserialize_with = "deserialize_imports",
740        serialize_with = "serialize_imports"
741    )]
742    #[serde(default = "default_imports")]
743    pub imports: Arc<FxHashMap<Name, Name>>,
744    /// Set to `true` after `deduplicate_params_in_slice` has run on this slice.
745    /// `ingest_stub_slice` skips the clone+re-dedup when this flag is set.
746    #[serde(skip)]
747    pub is_deduped: bool,
748}
749
750// ---------------------------------------------------------------------------
751// Param list deduplication
752// ---------------------------------------------------------------------------
753
754use std::sync::Mutex;
755
756type ParamCache = Mutex<FxHashMap<Vec<FnParam>, Arc<[FnParam]>>>;
757
758/// Global cache of canonical Arc<[FnParam]> instances for deduplication.
759/// Shared across all StubSlices to deduplicate vendor code with millions of
760/// methods that often have identical parameter lists.
761static PARAM_DEDUP_CACHE: std::sync::OnceLock<ParamCache> = std::sync::OnceLock::new();
762
763/// Deduplicate parameter lists across all methods and functions in a StubSlice.
764/// Many PHP framework methods share identical parameter lists (e.g., thousands
765/// of `(string $arg, array $opts)` signatures). This function groups identical
766/// param lists globally (across all slices processed so far) and replaces them
767/// with Arc<[FnParam]> pointers to shared allocations.
768///
769/// Expected memory savings: 100–150 MiB on cold start (vendor collection).
770pub fn deduplicate_params_in_slice(slice: &mut StubSlice) {
771    let cache: &ParamCache = PARAM_DEDUP_CACHE.get_or_init(|| Mutex::new(FxHashMap::default()));
772    let mut canonical_params = cache.lock().unwrap();
773
774    let mut deduplicate = |params: &mut Arc<[FnParam]>| {
775        if let Some(existing) = canonical_params.get(params.as_ref()) {
776            *params = existing.clone();
777        } else {
778            canonical_params.insert(params.as_ref().to_vec(), params.clone());
779        }
780    };
781
782    // Deduplicate method params in all classes
783    for cls in &mut slice.classes {
784        for method in Arc::make_mut(cls).own_methods.values_mut() {
785            deduplicate(&mut Arc::make_mut(method).params);
786        }
787    }
788
789    // Deduplicate method params in all interfaces
790    for iface in &mut slice.interfaces {
791        for method in Arc::make_mut(iface).own_methods.values_mut() {
792            deduplicate(&mut Arc::make_mut(method).params);
793        }
794    }
795
796    // Deduplicate method params in all traits
797    for tr in &mut slice.traits {
798        for method in Arc::make_mut(tr).own_methods.values_mut() {
799            deduplicate(&mut Arc::make_mut(method).params);
800        }
801    }
802
803    // Deduplicate method params in all enums
804    for en in &mut slice.enums {
805        for method in Arc::make_mut(en).own_methods.values_mut() {
806            deduplicate(&mut Arc::make_mut(method).params);
807        }
808    }
809
810    // Deduplicate function params
811    for func in &mut slice.functions {
812        deduplicate(&mut Arc::make_mut(func).params);
813    }
814    slice.is_deduped = true;
815}