Skip to main content

mir_analyzer/
context.rs

1/// Analysis context — carries type state through statement/expression analysis.
2use std::collections::{HashMap, HashSet};
3use std::sync::Arc;
4
5use indexmap::IndexMap;
6use mir_types::Union;
7
8// ---------------------------------------------------------------------------
9// Context
10// ---------------------------------------------------------------------------
11
12#[derive(Debug, Clone)]
13pub struct Context {
14    /// Types of variables at this point in execution.
15    pub vars: IndexMap<String, Union>,
16
17    /// Variables that are definitely assigned at this point.
18    pub assigned_vars: HashSet<String>,
19
20    /// Variables that *might* be assigned (e.g. only in one if branch).
21    pub possibly_assigned_vars: HashSet<String>,
22
23    /// The class in whose body we are analysing (`self`).
24    pub self_fqcn: Option<Arc<str>>,
25
26    /// The parent class (`parent`).
27    pub parent_fqcn: Option<Arc<str>>,
28
29    /// Late-static-binding class (`static`).
30    pub static_fqcn: Option<Arc<str>>,
31
32    /// Declared return type for the current function/method.
33    pub fn_return_type: Option<Union>,
34
35    /// Whether we are currently inside a loop.
36    pub inside_loop: bool,
37
38    /// Whether we are currently inside a finally block.
39    pub inside_finally: bool,
40
41    /// Whether we are inside a constructor.
42    pub inside_constructor: bool,
43
44    /// Whether `strict_types=1` is declared for this file.
45    pub strict_types: bool,
46
47    /// Variables that carry tainted (user-controlled) values at this point.
48    /// Used by taint analysis (M19).
49    pub tainted_vars: HashSet<String>,
50
51    /// Variables that have been read at least once in this scope.
52    /// Used by UnusedParam detection (M18).
53    pub read_vars: HashSet<String>,
54
55    /// Names of function/method parameters in this scope (stripped of `$`).
56    /// Used to exclude parameters from UnusedVariable detection.
57    pub param_names: HashSet<String>,
58
59    /// Names of by-reference parameters in this scope (stripped of `$`).
60    /// Assigning to these is externally observable, so it counts as usage.
61    pub byref_param_names: HashSet<String>,
62
63    /// Whether every execution path through this context has diverged
64    /// (returned, thrown, or exited). Used to detect "all catch branches
65    /// return" so that variables assigned only in the try body are
66    /// considered definitely assigned after the try/catch.
67    pub diverges: bool,
68
69    /// Pre-converted (line, col_start, line_end, col_end) of the first assignment
70    /// to each variable. Used to emit accurate locations for UnusedVariable / UnusedParam.
71    pub var_locations: HashMap<String, (u32, u16, u32, u16)>,
72}
73
74impl Context {
75    pub fn new() -> Self {
76        let mut ctx = Self {
77            vars: IndexMap::new(),
78            assigned_vars: HashSet::new(),
79            possibly_assigned_vars: HashSet::new(),
80            self_fqcn: None,
81            parent_fqcn: None,
82            static_fqcn: None,
83            fn_return_type: None,
84            inside_loop: false,
85            inside_finally: false,
86            inside_constructor: false,
87            strict_types: false,
88            tainted_vars: HashSet::new(),
89            read_vars: HashSet::new(),
90            param_names: HashSet::new(),
91            byref_param_names: HashSet::new(),
92            diverges: false,
93            var_locations: HashMap::new(),
94        };
95        // PHP superglobals — always in scope in any context
96        for sg in &[
97            "_SERVER", "_GET", "_POST", "_REQUEST", "_SESSION", "_COOKIE", "_FILES", "_ENV",
98            "GLOBALS",
99        ] {
100            ctx.vars.insert(sg.to_string(), mir_types::Union::mixed());
101            ctx.assigned_vars.insert(sg.to_string());
102        }
103        ctx
104    }
105
106    /// Create a context seeded with the given parameters.
107    pub fn for_function(
108        params: &[mir_codebase::FnParam],
109        return_type: Option<Union>,
110        self_fqcn: Option<Arc<str>>,
111        parent_fqcn: Option<Arc<str>>,
112        static_fqcn: Option<Arc<str>>,
113        strict_types: bool,
114        is_static: bool,
115    ) -> Self {
116        Self::for_method(
117            params,
118            return_type,
119            self_fqcn,
120            parent_fqcn,
121            static_fqcn,
122            strict_types,
123            false,
124            is_static,
125        )
126    }
127
128    /// Like `for_function` but also sets `inside_constructor`.
129    #[allow(clippy::too_many_arguments)]
130    pub fn for_method(
131        params: &[mir_codebase::FnParam],
132        return_type: Option<Union>,
133        self_fqcn: Option<Arc<str>>,
134        parent_fqcn: Option<Arc<str>>,
135        static_fqcn: Option<Arc<str>>,
136        strict_types: bool,
137        inside_constructor: bool,
138        is_static: bool,
139    ) -> Self {
140        let mut ctx = Self::new();
141        ctx.fn_return_type = return_type;
142        ctx.self_fqcn = self_fqcn.clone();
143        ctx.parent_fqcn = parent_fqcn;
144        ctx.static_fqcn = static_fqcn;
145        ctx.strict_types = strict_types;
146        ctx.inside_constructor = inside_constructor;
147
148        for p in params {
149            let elem_ty =
150                p.ty.as_ref()
151                    .map(|arc| (**arc).clone())
152                    .unwrap_or_else(Union::mixed);
153            // Variadic params like `Type ...$name` are accessed as `list<Type>` in the body.
154            // If the docblock already provides a list/array collection type, don't double-wrap.
155            let ty = if p.is_variadic {
156                let already_collection = elem_ty.types.iter().any(|a| {
157                    matches!(
158                        a,
159                        mir_types::Atomic::TList { .. }
160                            | mir_types::Atomic::TNonEmptyList { .. }
161                            | mir_types::Atomic::TArray { .. }
162                            | mir_types::Atomic::TNonEmptyArray { .. }
163                    )
164                });
165                if already_collection {
166                    elem_ty
167                } else {
168                    mir_types::Union::single(mir_types::Atomic::TList {
169                        value: Box::new(elem_ty),
170                    })
171                }
172            } else {
173                elem_ty
174            };
175            let name = p.name.as_ref().trim_start_matches('$').to_string();
176            ctx.vars.insert(name.clone(), ty);
177            ctx.assigned_vars.insert(name.clone());
178            ctx.param_names.insert(name.clone());
179            if p.is_byref {
180                ctx.byref_param_names.insert(name);
181            }
182        }
183
184        // Inject $this for non-static methods so that $this->method() can be
185        // resolved without hitting the mixed-receiver early-return guard.
186        if !is_static {
187            if let Some(fqcn) = self_fqcn {
188                let this_ty = mir_types::Union::single(mir_types::Atomic::TNamedObject {
189                    fqcn,
190                    type_params: vec![],
191                });
192                ctx.vars.insert("this".to_string(), this_ty);
193                ctx.assigned_vars.insert("this".to_string());
194            }
195        }
196
197        ctx
198    }
199
200    /// Get the type of a variable. Returns `mixed` if not found.
201    pub fn get_var(&self, name: &str) -> Union {
202        let name = name.trim_start_matches('$');
203        self.vars.get(name).cloned().unwrap_or_else(Union::mixed)
204    }
205
206    /// Set the type of a variable and mark it as assigned.
207    pub fn set_var(&mut self, name: impl Into<String>, ty: Union) {
208        let name: String = name.into();
209        let name = name.trim_start_matches('$').to_string();
210        self.vars.insert(name.clone(), ty);
211        self.assigned_vars.insert(name);
212    }
213
214    /// Check if a variable is definitely in scope.
215    pub fn var_is_defined(&self, name: &str) -> bool {
216        let name = name.trim_start_matches('$');
217        self.assigned_vars.contains(name)
218    }
219
220    /// Check if a variable might be defined (but not certainly).
221    pub fn var_possibly_defined(&self, name: &str) -> bool {
222        let name = name.trim_start_matches('$');
223        self.assigned_vars.contains(name) || self.possibly_assigned_vars.contains(name)
224    }
225
226    /// Mark a variable as carrying tainted (user-controlled) data.
227    pub fn taint_var(&mut self, name: &str) {
228        let name = name.trim_start_matches('$').to_string();
229        self.tainted_vars.insert(name);
230    }
231
232    /// Returns true if the variable is known to carry tainted data.
233    pub fn is_tainted(&self, name: &str) -> bool {
234        let name = name.trim_start_matches('$');
235        self.tainted_vars.contains(name)
236    }
237
238    /// Record the location of the first assignment to a variable (first-write-wins).
239    pub fn record_var_location(
240        &mut self,
241        name: &str,
242        line: u32,
243        col_start: u16,
244        line_end: u32,
245        col_end: u16,
246    ) {
247        let name = name.trim_start_matches('$');
248        self.var_locations
249            .entry(name.to_string())
250            .or_insert((line, col_start, line_end, col_end));
251    }
252
253    /// Remove a variable from the context (after `unset`).
254    pub fn unset_var(&mut self, name: &str) {
255        let name = name.trim_start_matches('$');
256        self.vars.shift_remove(name);
257        self.assigned_vars.remove(name);
258        self.possibly_assigned_vars.remove(name);
259    }
260
261    /// Fork this context for a branch (e.g. the `if` branch).
262    pub fn fork(&self) -> Context {
263        self.clone()
264    }
265
266    /// Merge two branch contexts at a join point (e.g. end of if/else).
267    ///
268    /// - vars present in both: merged union of types
269    /// - vars present in only one branch: marked `possibly_undefined`
270    /// - pre-existing vars from before the branch: preserved
271    pub fn merge_branches(pre: &Context, if_ctx: Context, else_ctx: Option<Context>) -> Context {
272        let else_ctx = else_ctx.unwrap_or_else(|| pre.clone());
273
274        // If the then-branch always diverges, the code after the if runs only
275        // in the else-branch — use that as the result directly.
276        if if_ctx.diverges && !else_ctx.diverges {
277            let mut result = else_ctx;
278            result.diverges = false;
279            return result;
280        }
281        // If the else-branch always diverges, code after the if runs only
282        // in the then-branch.
283        if else_ctx.diverges && !if_ctx.diverges {
284            let mut result = if_ctx;
285            result.diverges = false;
286            return result;
287        }
288        // If both diverge, the code after the if is unreachable.
289        if if_ctx.diverges && else_ctx.diverges {
290            let mut result = pre.clone();
291            result.diverges = true;
292            return result;
293        }
294
295        let mut result = pre.clone();
296
297        // Collect all variable names from both branch contexts
298        let all_names: HashSet<&String> = if_ctx.vars.keys().chain(else_ctx.vars.keys()).collect();
299
300        for name in all_names {
301            let in_if = if_ctx.assigned_vars.contains(name);
302            let in_else = else_ctx.assigned_vars.contains(name);
303            let in_pre = pre.assigned_vars.contains(name);
304
305            let ty_if = if_ctx.vars.get(name);
306            let ty_else = else_ctx.vars.get(name);
307
308            match (ty_if, ty_else) {
309                (Some(a), Some(b)) => {
310                    let merged = Union::merge(a, b);
311                    result.vars.insert(name.clone(), merged);
312                    if in_if && in_else {
313                        result.assigned_vars.insert(name.clone());
314                    } else {
315                        result.possibly_assigned_vars.insert(name.clone());
316                    }
317                }
318                (Some(a), None) => {
319                    if in_pre {
320                        // var existed before: merge with pre type
321                        let pre_ty = pre.vars.get(name).cloned().unwrap_or_else(Union::mixed);
322                        let merged = Union::merge(a, &pre_ty);
323                        result.vars.insert(name.clone(), merged);
324                        result.assigned_vars.insert(name.clone());
325                    } else {
326                        // only assigned in if branch
327                        let ty = a.clone().possibly_undefined();
328                        result.vars.insert(name.clone(), ty);
329                        result.possibly_assigned_vars.insert(name.clone());
330                    }
331                }
332                (None, Some(b)) => {
333                    if in_pre {
334                        let pre_ty = pre.vars.get(name).cloned().unwrap_or_else(Union::mixed);
335                        let merged = Union::merge(&pre_ty, b);
336                        result.vars.insert(name.clone(), merged);
337                        result.assigned_vars.insert(name.clone());
338                    } else {
339                        let ty = b.clone().possibly_undefined();
340                        result.vars.insert(name.clone(), ty);
341                        result.possibly_assigned_vars.insert(name.clone());
342                    }
343                }
344                (None, None) => {}
345            }
346        }
347
348        // Taint: conservative union — if either branch taints a var, it stays tainted
349        for name in if_ctx
350            .tainted_vars
351            .iter()
352            .chain(else_ctx.tainted_vars.iter())
353        {
354            result.tainted_vars.insert(name.clone());
355        }
356
357        // Read vars: union — if either branch reads a var, it counts as read
358        for name in if_ctx.read_vars.iter().chain(else_ctx.read_vars.iter()) {
359            result.read_vars.insert(name.clone());
360        }
361
362        // Var locations: keep the earliest known span for each variable
363        for (name, loc) in if_ctx
364            .var_locations
365            .iter()
366            .chain(else_ctx.var_locations.iter())
367        {
368            result.var_locations.entry(name.clone()).or_insert(*loc);
369        }
370
371        // After merging branches, the merged context does not diverge
372        // (at least one path through the merge reaches the next statement).
373        result.diverges = false;
374
375        result
376    }
377}
378
379impl Default for Context {
380    fn default() -> Self {
381        Self::new()
382    }
383}