Skip to main content

mir_analyzer/
context.rs

1/// Analysis context — carries type state through statement/expression analysis.
2use std::collections::HashSet;
3use std::sync::Arc;
4
5use indexmap::IndexMap;
6use mir_types::Union;
7
8// ---------------------------------------------------------------------------
9// Context
10// ---------------------------------------------------------------------------
11
12#[derive(Debug, Clone)]
13pub struct Context {
14    /// Types of variables at this point in execution.
15    pub vars: IndexMap<String, Union>,
16
17    /// Variables that are definitely assigned at this point.
18    pub assigned_vars: HashSet<String>,
19
20    /// Variables that *might* be assigned (e.g. only in one if branch).
21    pub possibly_assigned_vars: HashSet<String>,
22
23    /// The class in whose body we are analysing (`self`).
24    pub self_fqcn: Option<Arc<str>>,
25
26    /// The parent class (`parent`).
27    pub parent_fqcn: Option<Arc<str>>,
28
29    /// Late-static-binding class (`static`).
30    pub static_fqcn: Option<Arc<str>>,
31
32    /// Declared return type for the current function/method.
33    pub fn_return_type: Option<Union>,
34
35    /// Whether we are currently inside a loop.
36    pub inside_loop: bool,
37
38    /// Whether we are currently inside a finally block.
39    pub inside_finally: bool,
40
41    /// Whether we are inside a constructor.
42    pub inside_constructor: bool,
43
44    /// Whether `strict_types=1` is declared for this file.
45    pub strict_types: bool,
46
47    /// Variables that carry tainted (user-controlled) values at this point.
48    /// Used by taint analysis (M19).
49    pub tainted_vars: HashSet<String>,
50
51    /// Variables that have been read at least once in this scope.
52    /// Used by UnusedParam detection (M18).
53    pub read_vars: HashSet<String>,
54
55    /// Names of function/method parameters in this scope (stripped of `$`).
56    /// Used to exclude parameters from UnusedVariable detection.
57    pub param_names: HashSet<String>,
58
59    /// Names of by-reference parameters in this scope (stripped of `$`).
60    /// Assigning to these is externally observable, so it counts as usage.
61    pub byref_param_names: HashSet<String>,
62
63    /// Whether every execution path through this context has diverged
64    /// (returned, thrown, or exited). Used to detect "all catch branches
65    /// return" so that variables assigned only in the try body are
66    /// considered definitely assigned after the try/catch.
67    pub diverges: bool,
68}
69
70impl Context {
71    pub fn new() -> Self {
72        let mut ctx = Self {
73            vars: IndexMap::new(),
74            assigned_vars: HashSet::new(),
75            possibly_assigned_vars: HashSet::new(),
76            self_fqcn: None,
77            parent_fqcn: None,
78            static_fqcn: None,
79            fn_return_type: None,
80            inside_loop: false,
81            inside_finally: false,
82            inside_constructor: false,
83            strict_types: false,
84            tainted_vars: HashSet::new(),
85            read_vars: HashSet::new(),
86            param_names: HashSet::new(),
87            byref_param_names: HashSet::new(),
88            diverges: false,
89        };
90        // PHP superglobals — always in scope in any context
91        for sg in &[
92            "_SERVER", "_GET", "_POST", "_REQUEST", "_SESSION", "_COOKIE", "_FILES", "_ENV",
93            "GLOBALS",
94        ] {
95            ctx.vars.insert(sg.to_string(), mir_types::Union::mixed());
96            ctx.assigned_vars.insert(sg.to_string());
97        }
98        ctx
99    }
100
101    /// Create a context seeded with the given parameters.
102    pub fn for_function(
103        params: &[mir_codebase::FnParam],
104        return_type: Option<Union>,
105        self_fqcn: Option<Arc<str>>,
106        parent_fqcn: Option<Arc<str>>,
107        static_fqcn: Option<Arc<str>>,
108        strict_types: bool,
109        is_static: bool,
110    ) -> Self {
111        Self::for_method(
112            params,
113            return_type,
114            self_fqcn,
115            parent_fqcn,
116            static_fqcn,
117            strict_types,
118            false,
119            is_static,
120        )
121    }
122
123    /// Like `for_function` but also sets `inside_constructor`.
124    #[allow(clippy::too_many_arguments)]
125    pub fn for_method(
126        params: &[mir_codebase::FnParam],
127        return_type: Option<Union>,
128        self_fqcn: Option<Arc<str>>,
129        parent_fqcn: Option<Arc<str>>,
130        static_fqcn: Option<Arc<str>>,
131        strict_types: bool,
132        inside_constructor: bool,
133        is_static: bool,
134    ) -> Self {
135        let mut ctx = Self::new();
136        ctx.fn_return_type = return_type;
137        ctx.self_fqcn = self_fqcn.clone();
138        ctx.parent_fqcn = parent_fqcn;
139        ctx.static_fqcn = static_fqcn;
140        ctx.strict_types = strict_types;
141        ctx.inside_constructor = inside_constructor;
142
143        for p in params {
144            let elem_ty = p.ty.clone().unwrap_or_else(Union::mixed);
145            // Variadic params like `Type ...$name` are accessed as `list<Type>` in the body.
146            // If the docblock already provides a list/array collection type, don't double-wrap.
147            let ty = if p.is_variadic {
148                let already_collection = elem_ty.types.iter().any(|a| {
149                    matches!(
150                        a,
151                        mir_types::Atomic::TList { .. }
152                            | mir_types::Atomic::TNonEmptyList { .. }
153                            | mir_types::Atomic::TArray { .. }
154                            | mir_types::Atomic::TNonEmptyArray { .. }
155                    )
156                });
157                if already_collection {
158                    elem_ty
159                } else {
160                    mir_types::Union::single(mir_types::Atomic::TList {
161                        value: Box::new(elem_ty),
162                    })
163                }
164            } else {
165                elem_ty
166            };
167            let name = p.name.as_ref().trim_start_matches('$').to_string();
168            ctx.vars.insert(name.clone(), ty);
169            ctx.assigned_vars.insert(name.clone());
170            ctx.param_names.insert(name.clone());
171            if p.is_byref {
172                ctx.byref_param_names.insert(name);
173            }
174        }
175
176        // Inject $this for non-static methods so that $this->method() can be
177        // resolved without hitting the mixed-receiver early-return guard.
178        if !is_static {
179            if let Some(fqcn) = self_fqcn {
180                let this_ty = mir_types::Union::single(mir_types::Atomic::TNamedObject {
181                    fqcn,
182                    type_params: vec![],
183                });
184                ctx.vars.insert("this".to_string(), this_ty);
185                ctx.assigned_vars.insert("this".to_string());
186            }
187        }
188
189        ctx
190    }
191
192    /// Get the type of a variable. Returns `mixed` if not found.
193    pub fn get_var(&self, name: &str) -> Union {
194        let name = name.trim_start_matches('$');
195        self.vars.get(name).cloned().unwrap_or_else(Union::mixed)
196    }
197
198    /// Set the type of a variable and mark it as assigned.
199    pub fn set_var(&mut self, name: impl Into<String>, ty: Union) {
200        let name: String = name.into();
201        let name = name.trim_start_matches('$').to_string();
202        self.vars.insert(name.clone(), ty);
203        self.assigned_vars.insert(name);
204    }
205
206    /// Check if a variable is definitely in scope.
207    pub fn var_is_defined(&self, name: &str) -> bool {
208        let name = name.trim_start_matches('$');
209        self.assigned_vars.contains(name)
210    }
211
212    /// Check if a variable might be defined (but not certainly).
213    pub fn var_possibly_defined(&self, name: &str) -> bool {
214        let name = name.trim_start_matches('$');
215        self.assigned_vars.contains(name) || self.possibly_assigned_vars.contains(name)
216    }
217
218    /// Mark a variable as carrying tainted (user-controlled) data.
219    pub fn taint_var(&mut self, name: &str) {
220        let name = name.trim_start_matches('$').to_string();
221        self.tainted_vars.insert(name);
222    }
223
224    /// Returns true if the variable is known to carry tainted data.
225    pub fn is_tainted(&self, name: &str) -> bool {
226        let name = name.trim_start_matches('$');
227        self.tainted_vars.contains(name)
228    }
229
230    /// Remove a variable from the context (after `unset`).
231    pub fn unset_var(&mut self, name: &str) {
232        let name = name.trim_start_matches('$');
233        self.vars.shift_remove(name);
234        self.assigned_vars.remove(name);
235        self.possibly_assigned_vars.remove(name);
236    }
237
238    /// Fork this context for a branch (e.g. the `if` branch).
239    pub fn fork(&self) -> Context {
240        self.clone()
241    }
242
243    /// Merge two branch contexts at a join point (e.g. end of if/else).
244    ///
245    /// - vars present in both: merged union of types
246    /// - vars present in only one branch: marked `possibly_undefined`
247    /// - pre-existing vars from before the branch: preserved
248    pub fn merge_branches(pre: &Context, if_ctx: Context, else_ctx: Option<Context>) -> Context {
249        let else_ctx = else_ctx.unwrap_or_else(|| pre.clone());
250
251        // If the then-branch always diverges, the code after the if runs only
252        // in the else-branch — use that as the result directly.
253        if if_ctx.diverges && !else_ctx.diverges {
254            let mut result = else_ctx;
255            result.diverges = false;
256            return result;
257        }
258        // If the else-branch always diverges, code after the if runs only
259        // in the then-branch.
260        if else_ctx.diverges && !if_ctx.diverges {
261            let mut result = if_ctx;
262            result.diverges = false;
263            return result;
264        }
265        // If both diverge, the code after the if is unreachable.
266        if if_ctx.diverges && else_ctx.diverges {
267            let mut result = pre.clone();
268            result.diverges = true;
269            return result;
270        }
271
272        let mut result = pre.clone();
273
274        // Collect all variable names from both branch contexts
275        let all_names: HashSet<&String> = if_ctx.vars.keys().chain(else_ctx.vars.keys()).collect();
276
277        for name in all_names {
278            let in_if = if_ctx.assigned_vars.contains(name);
279            let in_else = else_ctx.assigned_vars.contains(name);
280            let in_pre = pre.assigned_vars.contains(name);
281
282            let ty_if = if_ctx.vars.get(name);
283            let ty_else = else_ctx.vars.get(name);
284
285            match (ty_if, ty_else) {
286                (Some(a), Some(b)) => {
287                    let merged = Union::merge(a, b);
288                    result.vars.insert(name.clone(), merged);
289                    if in_if && in_else {
290                        result.assigned_vars.insert(name.clone());
291                    } else {
292                        result.possibly_assigned_vars.insert(name.clone());
293                    }
294                }
295                (Some(a), None) => {
296                    if in_pre {
297                        // var existed before: merge with pre type
298                        let pre_ty = pre.vars.get(name).cloned().unwrap_or_else(Union::mixed);
299                        let merged = Union::merge(a, &pre_ty);
300                        result.vars.insert(name.clone(), merged);
301                        result.assigned_vars.insert(name.clone());
302                    } else {
303                        // only assigned in if branch
304                        let ty = a.clone().possibly_undefined();
305                        result.vars.insert(name.clone(), ty);
306                        result.possibly_assigned_vars.insert(name.clone());
307                    }
308                }
309                (None, Some(b)) => {
310                    if in_pre {
311                        let pre_ty = pre.vars.get(name).cloned().unwrap_or_else(Union::mixed);
312                        let merged = Union::merge(&pre_ty, b);
313                        result.vars.insert(name.clone(), merged);
314                        result.assigned_vars.insert(name.clone());
315                    } else {
316                        let ty = b.clone().possibly_undefined();
317                        result.vars.insert(name.clone(), ty);
318                        result.possibly_assigned_vars.insert(name.clone());
319                    }
320                }
321                (None, None) => {}
322            }
323        }
324
325        // Taint: conservative union — if either branch taints a var, it stays tainted
326        for name in if_ctx
327            .tainted_vars
328            .iter()
329            .chain(else_ctx.tainted_vars.iter())
330        {
331            result.tainted_vars.insert(name.clone());
332        }
333
334        // Read vars: union — if either branch reads a var, it counts as read
335        for name in if_ctx.read_vars.iter().chain(else_ctx.read_vars.iter()) {
336            result.read_vars.insert(name.clone());
337        }
338
339        // After merging branches, the merged context does not diverge
340        // (at least one path through the merge reaches the next statement).
341        result.diverges = false;
342
343        result
344    }
345}
346
347impl Default for Context {
348    fn default() -> Self {
349        Self::new()
350    }
351}