Skip to main content

mir_analyzer/
context.rs

1/// Analysis context — carries type state through statement/expression analysis.
2use std::collections::{HashMap, HashSet};
3use std::sync::Arc;
4
5use indexmap::IndexMap;
6use mir_types::Union;
7
8// ---------------------------------------------------------------------------
9// Context
10// ---------------------------------------------------------------------------
11
12#[derive(Debug, Clone)]
13pub struct Context {
14    /// Types of variables at this point in execution.
15    pub vars: IndexMap<String, Union>,
16
17    /// Variables that are definitely assigned at this point.
18    pub assigned_vars: HashSet<String>,
19
20    /// Variables that *might* be assigned (e.g. only in one if branch).
21    pub possibly_assigned_vars: HashSet<String>,
22
23    /// The class in whose body we are analysing (`self`).
24    pub self_fqcn: Option<Arc<str>>,
25
26    /// The parent class (`parent`).
27    pub parent_fqcn: Option<Arc<str>>,
28
29    /// Late-static-binding class (`static`).
30    pub static_fqcn: Option<Arc<str>>,
31
32    /// Declared return type for the current function/method.
33    pub fn_return_type: Option<Union>,
34
35    /// Whether we are currently inside a loop.
36    pub inside_loop: bool,
37
38    /// Whether we are currently inside a finally block.
39    pub inside_finally: bool,
40
41    /// Whether we are inside a constructor.
42    pub inside_constructor: bool,
43
44    /// Whether `strict_types=1` is declared for this file.
45    pub strict_types: bool,
46
47    /// Variables that carry tainted (user-controlled) values at this point.
48    /// Used by taint analysis (M19).
49    pub tainted_vars: HashSet<String>,
50
51    /// Variables that have been read at least once in this scope.
52    /// Used by UnusedParam detection (M18).
53    pub read_vars: HashSet<String>,
54
55    /// Names of function/method parameters in this scope (stripped of `$`).
56    /// Used to exclude parameters from UnusedVariable detection.
57    pub param_names: HashSet<String>,
58
59    /// Names of by-reference parameters in this scope (stripped of `$`).
60    /// Assigning to these is externally observable, so it counts as usage.
61    pub byref_param_names: HashSet<String>,
62
63    /// Whether every execution path through this context has diverged
64    /// (returned, thrown, or exited). Used to detect "all catch branches
65    /// return" so that variables assigned only in the try body are
66    /// considered definitely assigned after the try/catch.
67    pub diverges: bool,
68
69    /// Pre-converted (line, col_start, line_end, col_end) of the first assignment
70    /// to each variable. Used to emit accurate locations for UnusedVariable / UnusedParam.
71    pub var_locations: HashMap<String, (u32, u16, u32, u16)>,
72
73    /// Names of template parameters in the current function/method.
74    /// Used during type narrowing to correctly handle generic template variables.
75    pub template_param_names: HashSet<String>,
76}
77
78impl Context {
79    pub fn new() -> Self {
80        let mut ctx = Self {
81            vars: IndexMap::new(),
82            assigned_vars: HashSet::new(),
83            possibly_assigned_vars: HashSet::new(),
84            self_fqcn: None,
85            parent_fqcn: None,
86            static_fqcn: None,
87            fn_return_type: None,
88            inside_loop: false,
89            inside_finally: false,
90            inside_constructor: false,
91            strict_types: false,
92            tainted_vars: HashSet::new(),
93            read_vars: HashSet::new(),
94            param_names: HashSet::new(),
95            byref_param_names: HashSet::new(),
96            diverges: false,
97            var_locations: HashMap::new(),
98            template_param_names: HashSet::new(),
99        };
100        // PHP superglobals — always in scope in any context
101        for sg in &[
102            "_SERVER", "_GET", "_POST", "_REQUEST", "_SESSION", "_COOKIE", "_FILES", "_ENV",
103            "GLOBALS",
104        ] {
105            ctx.vars.insert(sg.to_string(), mir_types::Union::mixed());
106            ctx.assigned_vars.insert(sg.to_string());
107        }
108        ctx
109    }
110
111    /// Create a context seeded with the given parameters.
112    pub fn for_function(
113        params: &[mir_codebase::FnParam],
114        return_type: Option<Union>,
115        self_fqcn: Option<Arc<str>>,
116        parent_fqcn: Option<Arc<str>>,
117        static_fqcn: Option<Arc<str>>,
118        strict_types: bool,
119        is_static: bool,
120    ) -> Self {
121        Self::for_method(
122            params,
123            return_type,
124            self_fqcn,
125            parent_fqcn,
126            static_fqcn,
127            strict_types,
128            false,
129            is_static,
130        )
131    }
132
133    /// Like `for_function` but also sets `inside_constructor`.
134    #[allow(clippy::too_many_arguments)]
135    pub fn for_method(
136        params: &[mir_codebase::FnParam],
137        return_type: Option<Union>,
138        self_fqcn: Option<Arc<str>>,
139        parent_fqcn: Option<Arc<str>>,
140        static_fqcn: Option<Arc<str>>,
141        strict_types: bool,
142        inside_constructor: bool,
143        is_static: bool,
144    ) -> Self {
145        Self::for_method_with_templates(
146            params,
147            return_type,
148            self_fqcn,
149            parent_fqcn,
150            static_fqcn,
151            strict_types,
152            inside_constructor,
153            is_static,
154            None,
155        )
156    }
157
158    /// Like `for_method` but also accepts template parameters.
159    #[allow(clippy::too_many_arguments)]
160    pub fn for_method_with_templates(
161        params: &[mir_codebase::FnParam],
162        return_type: Option<Union>,
163        self_fqcn: Option<Arc<str>>,
164        parent_fqcn: Option<Arc<str>>,
165        static_fqcn: Option<Arc<str>>,
166        strict_types: bool,
167        inside_constructor: bool,
168        is_static: bool,
169        template_params: Option<&[mir_codebase::TemplateParam]>,
170    ) -> Self {
171        let mut ctx = Self::new();
172        ctx.fn_return_type = return_type;
173        ctx.self_fqcn = self_fqcn.clone();
174        ctx.parent_fqcn = parent_fqcn;
175        ctx.static_fqcn = static_fqcn;
176        ctx.strict_types = strict_types;
177        ctx.inside_constructor = inside_constructor;
178
179        // Build a map of template names to their bounds for parameter type resolution
180        let mut template_bounds_map: std::collections::HashMap<String, Union> =
181            std::collections::HashMap::new();
182        if let Some(templates) = template_params {
183            for tp in templates {
184                ctx.template_param_names.insert(tp.name.to_string());
185                if let Some(bound) = &tp.bound {
186                    template_bounds_map.insert(tp.name.to_string(), bound.clone());
187                }
188            }
189        }
190
191        for p in params {
192            let mut elem_ty =
193                p.ty.as_ref()
194                    .map(|arc| (**arc).clone())
195                    .unwrap_or_else(Union::mixed);
196
197            // Resolve template references to their bounds
198            // If the parameter type is a bare unqualified name matching a template parameter,
199            // replace it with the template's bound
200            if elem_ty.types.len() == 1 {
201                if let mir_types::Atomic::TNamedObject { fqcn, type_params } = &elem_ty.types[0] {
202                    if type_params.is_empty() && !fqcn.contains('\\') {
203                        if let Some(bound) = template_bounds_map.get(fqcn.as_ref()) {
204                            elem_ty = bound.clone();
205                        }
206                    }
207                }
208            }
209
210            // Variadic params like `Type ...$name` are accessed as `list<Type>` in the body.
211            // If the docblock already provides a list/array collection type, don't double-wrap.
212            let ty = if p.is_variadic {
213                let already_collection = elem_ty.types.iter().any(|a| {
214                    matches!(
215                        a,
216                        mir_types::Atomic::TList { .. }
217                            | mir_types::Atomic::TNonEmptyList { .. }
218                            | mir_types::Atomic::TArray { .. }
219                            | mir_types::Atomic::TNonEmptyArray { .. }
220                    )
221                });
222                if already_collection {
223                    elem_ty
224                } else {
225                    mir_types::Union::single(mir_types::Atomic::TList {
226                        value: Box::new(elem_ty),
227                    })
228                }
229            } else {
230                elem_ty
231            };
232            let name = p.name.as_ref().trim_start_matches('$').to_string();
233            ctx.vars.insert(name.clone(), ty);
234            ctx.assigned_vars.insert(name.clone());
235            ctx.param_names.insert(name.clone());
236            if p.is_byref {
237                ctx.byref_param_names.insert(name);
238            }
239        }
240
241        // Inject $this for non-static methods so that $this->method() can be
242        // resolved without hitting the mixed-receiver early-return guard.
243        if !is_static {
244            if let Some(fqcn) = self_fqcn {
245                let this_ty = mir_types::Union::single(mir_types::Atomic::TNamedObject {
246                    fqcn,
247                    type_params: vec![],
248                });
249                ctx.vars.insert("this".to_string(), this_ty);
250                ctx.assigned_vars.insert("this".to_string());
251            }
252        }
253
254        ctx
255    }
256
257    /// Get the type of a variable. Returns `mixed` if not found.
258    pub fn get_var(&self, name: &str) -> Union {
259        let name = name.trim_start_matches('$');
260        self.vars.get(name).cloned().unwrap_or_else(Union::mixed)
261    }
262
263    /// Set the type of a variable and mark it as assigned.
264    pub fn set_var(&mut self, name: impl Into<String>, ty: Union) {
265        let name: String = name.into();
266        let name = name.trim_start_matches('$').to_string();
267        self.vars.insert(name.clone(), ty);
268        self.assigned_vars.insert(name);
269    }
270
271    /// Check if a variable is definitely in scope.
272    pub fn var_is_defined(&self, name: &str) -> bool {
273        let name = name.trim_start_matches('$');
274        self.assigned_vars.contains(name)
275    }
276
277    /// Check if a variable might be defined (but not certainly).
278    pub fn var_possibly_defined(&self, name: &str) -> bool {
279        let name = name.trim_start_matches('$');
280        self.assigned_vars.contains(name) || self.possibly_assigned_vars.contains(name)
281    }
282
283    /// Mark a variable as carrying tainted (user-controlled) data.
284    pub fn taint_var(&mut self, name: &str) {
285        let name = name.trim_start_matches('$').to_string();
286        self.tainted_vars.insert(name);
287    }
288
289    /// Returns true if the variable is known to carry tainted data.
290    pub fn is_tainted(&self, name: &str) -> bool {
291        let name = name.trim_start_matches('$');
292        self.tainted_vars.contains(name)
293    }
294
295    /// Record the location of the first assignment to a variable (first-write-wins).
296    pub fn record_var_location(
297        &mut self,
298        name: &str,
299        line: u32,
300        col_start: u16,
301        line_end: u32,
302        col_end: u16,
303    ) {
304        let name = name.trim_start_matches('$');
305        self.var_locations
306            .entry(name.to_string())
307            .or_insert((line, col_start, line_end, col_end));
308    }
309
310    /// Remove a variable from the context (after `unset`).
311    pub fn unset_var(&mut self, name: &str) {
312        let name = name.trim_start_matches('$');
313        self.vars.shift_remove(name);
314        self.assigned_vars.remove(name);
315        self.possibly_assigned_vars.remove(name);
316    }
317
318    /// Fork this context for a branch (e.g. the `if` branch).
319    pub fn fork(&self) -> Context {
320        self.clone()
321    }
322
323    /// Merge two branch contexts at a join point (e.g. end of if/else).
324    ///
325    /// - vars present in both: merged union of types
326    /// - vars present in only one branch: marked `possibly_undefined`
327    /// - pre-existing vars from before the branch: preserved
328    pub fn merge_branches(pre: &Context, if_ctx: Context, else_ctx: Option<Context>) -> Context {
329        let else_ctx = else_ctx.unwrap_or_else(|| pre.clone());
330
331        // If the then-branch always diverges, the code after the if runs only
332        // in the else-branch — use that as the result directly.
333        if if_ctx.diverges && !else_ctx.diverges {
334            let mut result = else_ctx;
335            result.diverges = false;
336            return result;
337        }
338        // If the else-branch always diverges, code after the if runs only
339        // in the then-branch.
340        if else_ctx.diverges && !if_ctx.diverges {
341            let mut result = if_ctx;
342            result.diverges = false;
343            return result;
344        }
345        // If both diverge, the code after the if is unreachable.
346        if if_ctx.diverges && else_ctx.diverges {
347            let mut result = pre.clone();
348            result.diverges = true;
349            return result;
350        }
351
352        let mut result = pre.clone();
353
354        // Collect all variable names from both branch contexts
355        let all_names: HashSet<&String> = if_ctx.vars.keys().chain(else_ctx.vars.keys()).collect();
356
357        for name in all_names {
358            let in_if = if_ctx.assigned_vars.contains(name);
359            let in_else = else_ctx.assigned_vars.contains(name);
360            let in_pre = pre.assigned_vars.contains(name);
361
362            let ty_if = if_ctx.vars.get(name);
363            let ty_else = else_ctx.vars.get(name);
364
365            match (ty_if, ty_else) {
366                (Some(a), Some(b)) => {
367                    let merged = Union::merge(a, b);
368                    result.vars.insert(name.clone(), merged);
369                    if in_if && in_else {
370                        result.assigned_vars.insert(name.clone());
371                    } else {
372                        result.possibly_assigned_vars.insert(name.clone());
373                    }
374                }
375                (Some(a), None) => {
376                    if in_pre {
377                        // var existed before: merge with pre type
378                        let pre_ty = pre.vars.get(name).cloned().unwrap_or_else(Union::mixed);
379                        let merged = Union::merge(a, &pre_ty);
380                        result.vars.insert(name.clone(), merged);
381                        result.assigned_vars.insert(name.clone());
382                    } else {
383                        // only assigned in if branch
384                        let ty = a.clone().possibly_undefined();
385                        result.vars.insert(name.clone(), ty);
386                        result.possibly_assigned_vars.insert(name.clone());
387                    }
388                }
389                (None, Some(b)) => {
390                    if in_pre {
391                        let pre_ty = pre.vars.get(name).cloned().unwrap_or_else(Union::mixed);
392                        let merged = Union::merge(&pre_ty, b);
393                        result.vars.insert(name.clone(), merged);
394                        result.assigned_vars.insert(name.clone());
395                    } else {
396                        let ty = b.clone().possibly_undefined();
397                        result.vars.insert(name.clone(), ty);
398                        result.possibly_assigned_vars.insert(name.clone());
399                    }
400                }
401                (None, None) => {}
402            }
403        }
404
405        // Taint: conservative union — if either branch taints a var, it stays tainted
406        for name in if_ctx
407            .tainted_vars
408            .iter()
409            .chain(else_ctx.tainted_vars.iter())
410        {
411            result.tainted_vars.insert(name.clone());
412        }
413
414        // Read vars: union — if either branch reads a var, it counts as read
415        for name in if_ctx.read_vars.iter().chain(else_ctx.read_vars.iter()) {
416            result.read_vars.insert(name.clone());
417        }
418
419        // Var locations: keep the earliest known span for each variable
420        for (name, loc) in if_ctx
421            .var_locations
422            .iter()
423            .chain(else_ctx.var_locations.iter())
424        {
425            result.var_locations.entry(name.clone()).or_insert(*loc);
426        }
427
428        // After merging branches, the merged context does not diverge
429        // (at least one path through the merge reaches the next statement).
430        result.diverges = false;
431
432        result
433    }
434}
435
436impl Default for Context {
437    fn default() -> Self {
438        Self::new()
439    }
440}