Skip to main content

mir_analyzer/
context.rs

1/// Analysis context — carries type state through statement/expression analysis.
2use std::collections::{HashMap, HashSet};
3use std::sync::Arc;
4
5use indexmap::IndexMap;
6use mir_types::Union;
7
8// ---------------------------------------------------------------------------
9// Context
10// ---------------------------------------------------------------------------
11
12#[derive(Debug, Clone)]
13pub struct Context {
14    /// Types of variables at this point in execution.
15    pub vars: IndexMap<String, Union>,
16
17    /// Variables that are definitely assigned at this point.
18    pub assigned_vars: HashSet<String>,
19
20    /// Variables that *might* be assigned (e.g. only in one if branch).
21    pub possibly_assigned_vars: HashSet<String>,
22
23    /// The class in whose body we are analysing (`self`).
24    pub self_fqcn: Option<Arc<str>>,
25
26    /// The parent class (`parent`).
27    pub parent_fqcn: Option<Arc<str>>,
28
29    /// Late-static-binding class (`static`).
30    pub static_fqcn: Option<Arc<str>>,
31
32    /// Declared return type for the current function/method.
33    pub fn_return_type: Option<Union>,
34
35    /// Declared exception types for the current function/method (@throws).
36    pub fn_declared_throws: Arc<[Arc<str>]>,
37
38    /// Whether we are currently inside a loop.
39    pub inside_loop: bool,
40
41    /// Whether we are currently inside a finally block.
42    pub inside_finally: bool,
43
44    /// Whether we are inside a constructor.
45    pub inside_constructor: bool,
46
47    /// Whether `strict_types=1` is declared for this file.
48    pub strict_types: bool,
49
50    /// Variables that carry tainted (user-controlled) values at this point.
51    /// Used by taint analysis (M19).
52    pub tainted_vars: HashSet<String>,
53
54    /// Variables that have been read at least once in this scope.
55    /// Used by UnusedParam detection (M18).
56    pub read_vars: HashSet<String>,
57
58    /// Names of function/method parameters in this scope (stripped of `$`).
59    /// Used to exclude parameters from UnusedVariable detection.
60    pub param_names: HashSet<String>,
61
62    /// Names of by-reference parameters in this scope (stripped of `$`).
63    /// Assigning to these is externally observable, so it counts as usage.
64    pub byref_param_names: HashSet<String>,
65
66    /// Whether every execution path through this context has diverged
67    /// (returned, thrown, or exited). Used to detect "all catch branches
68    /// return" so that variables assigned only in the try body are
69    /// considered definitely assigned after the try/catch.
70    pub diverges: bool,
71
72    /// Pre-converted (line, col_start, line_end, col_end) of the first assignment
73    /// to each variable. Used to emit accurate locations for UnusedVariable / UnusedParam.
74    pub var_locations: HashMap<String, (u32, u16, u32, u16)>,
75
76    /// Names of template parameters in the current function/method.
77    /// Used during type narrowing to correctly handle generic template variables.
78    pub template_param_names: HashSet<String>,
79}
80
81impl Context {
82    pub fn new() -> Self {
83        let mut ctx = Self {
84            vars: IndexMap::new(),
85            assigned_vars: HashSet::new(),
86            possibly_assigned_vars: HashSet::new(),
87            self_fqcn: None,
88            parent_fqcn: None,
89            static_fqcn: None,
90            fn_return_type: None,
91            fn_declared_throws: Arc::from([]),
92            inside_loop: false,
93            inside_finally: false,
94            inside_constructor: false,
95            strict_types: false,
96            tainted_vars: HashSet::new(),
97            read_vars: HashSet::new(),
98            param_names: HashSet::new(),
99            byref_param_names: HashSet::new(),
100            diverges: false,
101            var_locations: HashMap::new(),
102            template_param_names: HashSet::new(),
103        };
104        // PHP superglobals — always in scope in any context
105        for sg in &[
106            "_SERVER", "_GET", "_POST", "_REQUEST", "_SESSION", "_COOKIE", "_FILES", "_ENV",
107            "GLOBALS",
108        ] {
109            ctx.vars.insert(sg.to_string(), mir_types::Union::mixed());
110            ctx.assigned_vars.insert(sg.to_string());
111        }
112        ctx
113    }
114
115    /// Create a context seeded with the given parameters.
116    #[allow(clippy::too_many_arguments)]
117    pub fn for_function(
118        params: &[mir_codebase::FnParam],
119        return_type: Option<Union>,
120        declared_throws: Arc<[Arc<str>]>,
121        self_fqcn: Option<Arc<str>>,
122        parent_fqcn: Option<Arc<str>>,
123        static_fqcn: Option<Arc<str>>,
124        strict_types: bool,
125        is_static: bool,
126    ) -> Self {
127        Self::for_method(
128            params,
129            return_type,
130            declared_throws,
131            self_fqcn,
132            parent_fqcn,
133            static_fqcn,
134            strict_types,
135            false,
136            is_static,
137        )
138    }
139
140    /// Like `for_function` but also sets `inside_constructor`.
141    #[allow(clippy::too_many_arguments)]
142    pub fn for_method(
143        params: &[mir_codebase::FnParam],
144        return_type: Option<Union>,
145        declared_throws: Arc<[Arc<str>]>,
146        self_fqcn: Option<Arc<str>>,
147        parent_fqcn: Option<Arc<str>>,
148        static_fqcn: Option<Arc<str>>,
149        strict_types: bool,
150        inside_constructor: bool,
151        is_static: bool,
152    ) -> Self {
153        Self::for_method_with_templates(
154            params,
155            return_type,
156            declared_throws,
157            self_fqcn,
158            parent_fqcn,
159            static_fqcn,
160            strict_types,
161            inside_constructor,
162            is_static,
163            None,
164        )
165    }
166
167    /// Like `for_method` but also accepts template parameters.
168    #[allow(clippy::too_many_arguments)]
169    pub fn for_method_with_templates(
170        params: &[mir_codebase::FnParam],
171        return_type: Option<Union>,
172        declared_throws: Arc<[Arc<str>]>,
173        self_fqcn: Option<Arc<str>>,
174        parent_fqcn: Option<Arc<str>>,
175        static_fqcn: Option<Arc<str>>,
176        strict_types: bool,
177        inside_constructor: bool,
178        is_static: bool,
179        template_params: Option<&[mir_codebase::TemplateParam]>,
180    ) -> Self {
181        let mut ctx = Self::new();
182        ctx.fn_return_type = return_type;
183        ctx.fn_declared_throws = declared_throws;
184        ctx.self_fqcn = self_fqcn.clone();
185        ctx.parent_fqcn = parent_fqcn;
186        ctx.static_fqcn = static_fqcn;
187        ctx.strict_types = strict_types;
188        ctx.inside_constructor = inside_constructor;
189
190        // Build a map of template names to their bounds for parameter type resolution
191        let mut template_bounds_map: std::collections::HashMap<String, Union> =
192            std::collections::HashMap::new();
193        if let Some(templates) = template_params {
194            for tp in templates {
195                ctx.template_param_names.insert(tp.name.to_string());
196                if let Some(bound) = &tp.bound {
197                    template_bounds_map.insert(tp.name.to_string(), bound.clone());
198                }
199            }
200        }
201
202        for p in params {
203            let mut elem_ty =
204                p.ty.as_ref()
205                    .map(|arc| (**arc).clone())
206                    .unwrap_or_else(Union::mixed);
207
208            // Resolve template references to their bounds
209            // If the parameter type is a bare unqualified name matching a template parameter,
210            // replace it with the template's bound
211            if elem_ty.types.len() == 1 {
212                if let mir_types::Atomic::TNamedObject { fqcn, type_params } = &elem_ty.types[0] {
213                    if type_params.is_empty() && !fqcn.contains('\\') {
214                        if let Some(bound) = template_bounds_map.get(fqcn.as_ref()) {
215                            elem_ty = bound.clone();
216                        }
217                    }
218                }
219            }
220
221            // Variadic params like `Type ...$name` are accessed as `list<Type>` in the body.
222            // If the docblock already provides a list/array collection type, don't double-wrap.
223            let ty = if p.is_variadic {
224                let already_collection = elem_ty.types.iter().any(|a| {
225                    matches!(
226                        a,
227                        mir_types::Atomic::TList { .. }
228                            | mir_types::Atomic::TNonEmptyList { .. }
229                            | mir_types::Atomic::TArray { .. }
230                            | mir_types::Atomic::TNonEmptyArray { .. }
231                    )
232                });
233                if already_collection {
234                    elem_ty
235                } else {
236                    mir_types::Union::single(mir_types::Atomic::TList {
237                        value: Box::new(elem_ty),
238                    })
239                }
240            } else {
241                elem_ty
242            };
243            let name = p.name.as_ref().trim_start_matches('$').to_string();
244            ctx.vars.insert(name.clone(), ty);
245            ctx.assigned_vars.insert(name.clone());
246            ctx.param_names.insert(name.clone());
247            if p.is_byref {
248                ctx.byref_param_names.insert(name);
249            }
250        }
251
252        // Inject $this for non-static methods so that $this->method() can be
253        // resolved without hitting the mixed-receiver early-return guard.
254        if !is_static {
255            if let Some(fqcn) = self_fqcn {
256                let this_ty = mir_types::Union::single(mir_types::Atomic::TNamedObject {
257                    fqcn,
258                    type_params: vec![],
259                });
260                ctx.vars.insert("this".to_string(), this_ty);
261                ctx.assigned_vars.insert("this".to_string());
262            }
263        }
264
265        ctx
266    }
267
268    /// Get the type of a variable. Returns `mixed` if not found.
269    pub fn get_var(&self, name: &str) -> Union {
270        let name = name.trim_start_matches('$');
271        self.vars.get(name).cloned().unwrap_or_else(Union::mixed)
272    }
273
274    /// Set the type of a variable and mark it as assigned.
275    pub fn set_var(&mut self, name: impl Into<String>, ty: Union) {
276        let name: String = name.into();
277        let name = name.trim_start_matches('$').to_string();
278        self.vars.insert(name.clone(), ty);
279        self.assigned_vars.insert(name);
280    }
281
282    /// Check if a variable is definitely in scope.
283    pub fn var_is_defined(&self, name: &str) -> bool {
284        let name = name.trim_start_matches('$');
285        self.assigned_vars.contains(name)
286    }
287
288    /// Check if a variable might be defined (but not certainly).
289    pub fn var_possibly_defined(&self, name: &str) -> bool {
290        let name = name.trim_start_matches('$');
291        self.assigned_vars.contains(name) || self.possibly_assigned_vars.contains(name)
292    }
293
294    /// Mark a variable as carrying tainted (user-controlled) data.
295    pub fn taint_var(&mut self, name: &str) {
296        let name = name.trim_start_matches('$').to_string();
297        self.tainted_vars.insert(name);
298    }
299
300    /// Returns true if the variable is known to carry tainted data.
301    pub fn is_tainted(&self, name: &str) -> bool {
302        let name = name.trim_start_matches('$');
303        self.tainted_vars.contains(name)
304    }
305
306    /// Record the location of the first assignment to a variable (first-write-wins).
307    pub fn record_var_location(
308        &mut self,
309        name: &str,
310        line: u32,
311        col_start: u16,
312        line_end: u32,
313        col_end: u16,
314    ) {
315        let name = name.trim_start_matches('$');
316        self.var_locations
317            .entry(name.to_string())
318            .or_insert((line, col_start, line_end, col_end));
319    }
320
321    /// Remove a variable from the context (after `unset`).
322    pub fn unset_var(&mut self, name: &str) {
323        let name = name.trim_start_matches('$');
324        self.vars.shift_remove(name);
325        self.assigned_vars.remove(name);
326        self.possibly_assigned_vars.remove(name);
327    }
328
329    /// Fork this context for a branch (e.g. the `if` branch).
330    pub fn fork(&self) -> Context {
331        self.clone()
332    }
333
334    /// Merge two branch contexts at a join point (e.g. end of if/else).
335    ///
336    /// - vars present in both: merged union of types
337    /// - vars present in only one branch: marked `possibly_undefined`
338    /// - pre-existing vars from before the branch: preserved
339    pub fn merge_branches(pre: &Context, if_ctx: Context, else_ctx: Option<Context>) -> Context {
340        let else_ctx = else_ctx.unwrap_or_else(|| pre.clone());
341
342        // If the then-branch always diverges, the code after the if runs only
343        // in the else-branch — use that as the result directly.
344        if if_ctx.diverges && !else_ctx.diverges {
345            let mut result = else_ctx;
346            result.diverges = false;
347            return result;
348        }
349        // If the else-branch always diverges, code after the if runs only
350        // in the then-branch.
351        if else_ctx.diverges && !if_ctx.diverges {
352            let mut result = if_ctx;
353            result.diverges = false;
354            return result;
355        }
356        // If both diverge, the code after the if is unreachable.
357        if if_ctx.diverges && else_ctx.diverges {
358            let mut result = pre.clone();
359            result.diverges = true;
360            return result;
361        }
362
363        let mut result = pre.clone();
364
365        // Collect all variable names from both branch contexts
366        let all_names: HashSet<&String> = if_ctx.vars.keys().chain(else_ctx.vars.keys()).collect();
367
368        for name in all_names {
369            let in_if = if_ctx.assigned_vars.contains(name);
370            let in_else = else_ctx.assigned_vars.contains(name);
371            let in_pre = pre.assigned_vars.contains(name);
372
373            let ty_if = if_ctx.vars.get(name);
374            let ty_else = else_ctx.vars.get(name);
375
376            match (ty_if, ty_else) {
377                (Some(a), Some(b)) => {
378                    let merged = Union::merge(a, b);
379                    result.vars.insert(name.clone(), merged);
380                    if in_if && in_else {
381                        result.assigned_vars.insert(name.clone());
382                    } else {
383                        result.possibly_assigned_vars.insert(name.clone());
384                    }
385                }
386                (Some(a), None) => {
387                    if in_pre {
388                        // var existed before: merge with pre type
389                        let pre_ty = pre.vars.get(name).cloned().unwrap_or_else(Union::mixed);
390                        let merged = Union::merge(a, &pre_ty);
391                        result.vars.insert(name.clone(), merged);
392                        result.assigned_vars.insert(name.clone());
393                    } else {
394                        // only assigned in if branch
395                        let ty = a.clone().possibly_undefined();
396                        result.vars.insert(name.clone(), ty);
397                        result.possibly_assigned_vars.insert(name.clone());
398                    }
399                }
400                (None, Some(b)) => {
401                    if in_pre {
402                        let pre_ty = pre.vars.get(name).cloned().unwrap_or_else(Union::mixed);
403                        let merged = Union::merge(&pre_ty, b);
404                        result.vars.insert(name.clone(), merged);
405                        result.assigned_vars.insert(name.clone());
406                    } else {
407                        let ty = b.clone().possibly_undefined();
408                        result.vars.insert(name.clone(), ty);
409                        result.possibly_assigned_vars.insert(name.clone());
410                    }
411                }
412                (None, None) => {}
413            }
414        }
415
416        // Taint: conservative union — if either branch taints a var, it stays tainted
417        for name in if_ctx
418            .tainted_vars
419            .iter()
420            .chain(else_ctx.tainted_vars.iter())
421        {
422            result.tainted_vars.insert(name.clone());
423        }
424
425        // Read vars: union — if either branch reads a var, it counts as read
426        for name in if_ctx.read_vars.iter().chain(else_ctx.read_vars.iter()) {
427            result.read_vars.insert(name.clone());
428        }
429
430        // Var locations: keep the earliest known span for each variable
431        for (name, loc) in if_ctx
432            .var_locations
433            .iter()
434            .chain(else_ctx.var_locations.iter())
435        {
436            result.var_locations.entry(name.clone()).or_insert(*loc);
437        }
438
439        // After merging branches, the merged context does not diverge
440        // (at least one path through the merge reaches the next statement).
441        result.diverges = false;
442
443        result
444    }
445}
446
447impl Default for Context {
448    fn default() -> Self {
449        Self::new()
450    }
451}