Skip to main content

mir_analyzer/
context.rs

1/// Analysis context — carries type state through statement/expression analysis.
2use std::collections::HashSet;
3use std::sync::Arc;
4
5use indexmap::IndexMap;
6use mir_types::Union;
7
8// ---------------------------------------------------------------------------
9// Context
10// ---------------------------------------------------------------------------
11
12#[derive(Debug, Clone)]
13pub struct Context {
14    /// Types of variables at this point in execution.
15    pub vars: IndexMap<String, Union>,
16
17    /// Variables that are definitely assigned at this point.
18    pub assigned_vars: HashSet<String>,
19
20    /// Variables that *might* be assigned (e.g. only in one if branch).
21    pub possibly_assigned_vars: HashSet<String>,
22
23    /// The class in whose body we are analysing (`self`).
24    pub self_fqcn: Option<Arc<str>>,
25
26    /// The parent class (`parent`).
27    pub parent_fqcn: Option<Arc<str>>,
28
29    /// Late-static-binding class (`static`).
30    pub static_fqcn: Option<Arc<str>>,
31
32    /// Declared return type for the current function/method.
33    pub fn_return_type: Option<Union>,
34
35    /// Whether we are currently inside a loop.
36    pub inside_loop: bool,
37
38    /// Whether we are currently inside a finally block.
39    pub inside_finally: bool,
40
41    /// Whether we are inside a constructor.
42    pub inside_constructor: bool,
43
44    /// Whether `strict_types=1` is declared for this file.
45    pub strict_types: bool,
46
47    /// Variables that carry tainted (user-controlled) values at this point.
48    /// Used by taint analysis (M19).
49    pub tainted_vars: HashSet<String>,
50
51    /// Variables that have been read at least once in this scope.
52    /// Used by UnusedParam detection (M18).
53    pub read_vars: HashSet<String>,
54
55    /// Names of function/method parameters in this scope (stripped of `$`).
56    /// Used to exclude parameters from UnusedVariable detection.
57    pub param_names: HashSet<String>,
58
59    /// Whether every execution path through this context has diverged
60    /// (returned, thrown, or exited). Used to detect "all catch branches
61    /// return" so that variables assigned only in the try body are
62    /// considered definitely assigned after the try/catch.
63    pub diverges: bool,
64}
65
66impl Context {
67    pub fn new() -> Self {
68        let mut ctx = Self {
69            vars: IndexMap::new(),
70            assigned_vars: HashSet::new(),
71            possibly_assigned_vars: HashSet::new(),
72            self_fqcn: None,
73            parent_fqcn: None,
74            static_fqcn: None,
75            fn_return_type: None,
76            inside_loop: false,
77            inside_finally: false,
78            inside_constructor: false,
79            strict_types: false,
80            tainted_vars: HashSet::new(),
81            read_vars: HashSet::new(),
82            param_names: HashSet::new(),
83            diverges: false,
84        };
85        // PHP superglobals — always in scope in any context
86        for sg in &[
87            "_SERVER", "_GET", "_POST", "_REQUEST", "_SESSION", "_COOKIE", "_FILES", "_ENV",
88            "GLOBALS",
89        ] {
90            ctx.vars.insert(sg.to_string(), mir_types::Union::mixed());
91            ctx.assigned_vars.insert(sg.to_string());
92        }
93        ctx
94    }
95
96    /// Create a context seeded with the given parameters.
97    pub fn for_function(
98        params: &[mir_codebase::FnParam],
99        return_type: Option<Union>,
100        self_fqcn: Option<Arc<str>>,
101        parent_fqcn: Option<Arc<str>>,
102        static_fqcn: Option<Arc<str>>,
103        strict_types: bool,
104    ) -> Self {
105        Self::for_method(
106            params,
107            return_type,
108            self_fqcn,
109            parent_fqcn,
110            static_fqcn,
111            strict_types,
112            false,
113        )
114    }
115
116    /// Like `for_function` but also sets `inside_constructor`.
117    pub fn for_method(
118        params: &[mir_codebase::FnParam],
119        return_type: Option<Union>,
120        self_fqcn: Option<Arc<str>>,
121        parent_fqcn: Option<Arc<str>>,
122        static_fqcn: Option<Arc<str>>,
123        strict_types: bool,
124        inside_constructor: bool,
125    ) -> Self {
126        let mut ctx = Self::new();
127        ctx.fn_return_type = return_type;
128        ctx.self_fqcn = self_fqcn;
129        ctx.parent_fqcn = parent_fqcn;
130        ctx.static_fqcn = static_fqcn;
131        ctx.strict_types = strict_types;
132        ctx.inside_constructor = inside_constructor;
133
134        for p in params {
135            let elem_ty = p.ty.clone().unwrap_or_else(Union::mixed);
136            // Variadic params like `Type ...$name` are accessed as `list<Type>` in the body.
137            // If the docblock already provides a list/array collection type, don't double-wrap.
138            let ty = if p.is_variadic {
139                let already_collection = elem_ty.types.iter().any(|a| {
140                    matches!(
141                        a,
142                        mir_types::Atomic::TList { .. }
143                            | mir_types::Atomic::TNonEmptyList { .. }
144                            | mir_types::Atomic::TArray { .. }
145                            | mir_types::Atomic::TNonEmptyArray { .. }
146                    )
147                });
148                if already_collection {
149                    elem_ty
150                } else {
151                    mir_types::Union::single(mir_types::Atomic::TList {
152                        value: Box::new(elem_ty),
153                    })
154                }
155            } else {
156                elem_ty
157            };
158            let name = p.name.as_ref().trim_start_matches('$').to_string();
159            ctx.vars.insert(name.clone(), ty);
160            ctx.assigned_vars.insert(name.clone());
161            ctx.param_names.insert(name);
162        }
163        ctx
164    }
165
166    /// Get the type of a variable. Returns `mixed` if not found.
167    pub fn get_var(&self, name: &str) -> Union {
168        let name = name.trim_start_matches('$');
169        self.vars.get(name).cloned().unwrap_or_else(Union::mixed)
170    }
171
172    /// Set the type of a variable and mark it as assigned.
173    pub fn set_var(&mut self, name: impl Into<String>, ty: Union) {
174        let name: String = name.into();
175        let name = name.trim_start_matches('$').to_string();
176        self.vars.insert(name.clone(), ty);
177        self.assigned_vars.insert(name);
178    }
179
180    /// Check if a variable is definitely in scope.
181    pub fn var_is_defined(&self, name: &str) -> bool {
182        let name = name.trim_start_matches('$');
183        self.assigned_vars.contains(name)
184    }
185
186    /// Check if a variable might be defined (but not certainly).
187    pub fn var_possibly_defined(&self, name: &str) -> bool {
188        let name = name.trim_start_matches('$');
189        self.assigned_vars.contains(name) || self.possibly_assigned_vars.contains(name)
190    }
191
192    /// Mark a variable as carrying tainted (user-controlled) data.
193    pub fn taint_var(&mut self, name: &str) {
194        let name = name.trim_start_matches('$').to_string();
195        self.tainted_vars.insert(name);
196    }
197
198    /// Returns true if the variable is known to carry tainted data.
199    pub fn is_tainted(&self, name: &str) -> bool {
200        let name = name.trim_start_matches('$');
201        self.tainted_vars.contains(name)
202    }
203
204    /// Remove a variable from the context (after `unset`).
205    pub fn unset_var(&mut self, name: &str) {
206        let name = name.trim_start_matches('$');
207        self.vars.shift_remove(name);
208        self.assigned_vars.remove(name);
209        self.possibly_assigned_vars.remove(name);
210    }
211
212    /// Fork this context for a branch (e.g. the `if` branch).
213    pub fn fork(&self) -> Context {
214        self.clone()
215    }
216
217    /// Merge two branch contexts at a join point (e.g. end of if/else).
218    ///
219    /// - vars present in both: merged union of types
220    /// - vars present in only one branch: marked `possibly_undefined`
221    /// - pre-existing vars from before the branch: preserved
222    pub fn merge_branches(pre: &Context, if_ctx: Context, else_ctx: Option<Context>) -> Context {
223        let else_ctx = else_ctx.unwrap_or_else(|| pre.clone());
224
225        // If the then-branch always diverges, the code after the if runs only
226        // in the else-branch — use that as the result directly.
227        if if_ctx.diverges && !else_ctx.diverges {
228            let mut result = else_ctx;
229            result.diverges = false;
230            return result;
231        }
232        // If the else-branch always diverges, code after the if runs only
233        // in the then-branch.
234        if else_ctx.diverges && !if_ctx.diverges {
235            let mut result = if_ctx;
236            result.diverges = false;
237            return result;
238        }
239        // If both diverge, the code after the if is unreachable.
240        if if_ctx.diverges && else_ctx.diverges {
241            let mut result = pre.clone();
242            result.diverges = true;
243            return result;
244        }
245
246        let mut result = pre.clone();
247
248        // Collect all variable names from both branch contexts
249        let all_names: HashSet<&String> = if_ctx.vars.keys().chain(else_ctx.vars.keys()).collect();
250
251        for name in all_names {
252            let in_if = if_ctx.assigned_vars.contains(name);
253            let in_else = else_ctx.assigned_vars.contains(name);
254            let in_pre = pre.assigned_vars.contains(name);
255
256            let ty_if = if_ctx.vars.get(name);
257            let ty_else = else_ctx.vars.get(name);
258
259            match (ty_if, ty_else) {
260                (Some(a), Some(b)) => {
261                    let merged = Union::merge(a, b);
262                    result.vars.insert(name.clone(), merged);
263                    if in_if && in_else {
264                        result.assigned_vars.insert(name.clone());
265                    } else {
266                        result.possibly_assigned_vars.insert(name.clone());
267                    }
268                }
269                (Some(a), None) => {
270                    if in_pre {
271                        // var existed before: merge with pre type
272                        let pre_ty = pre.vars.get(name).cloned().unwrap_or_else(Union::mixed);
273                        let merged = Union::merge(a, &pre_ty);
274                        result.vars.insert(name.clone(), merged);
275                        result.assigned_vars.insert(name.clone());
276                    } else {
277                        // only assigned in if branch
278                        let ty = a.clone().possibly_undefined();
279                        result.vars.insert(name.clone(), ty);
280                        result.possibly_assigned_vars.insert(name.clone());
281                    }
282                }
283                (None, Some(b)) => {
284                    if in_pre {
285                        let pre_ty = pre.vars.get(name).cloned().unwrap_or_else(Union::mixed);
286                        let merged = Union::merge(&pre_ty, b);
287                        result.vars.insert(name.clone(), merged);
288                        result.assigned_vars.insert(name.clone());
289                    } else {
290                        let ty = b.clone().possibly_undefined();
291                        result.vars.insert(name.clone(), ty);
292                        result.possibly_assigned_vars.insert(name.clone());
293                    }
294                }
295                (None, None) => {}
296            }
297        }
298
299        // Taint: conservative union — if either branch taints a var, it stays tainted
300        for name in if_ctx
301            .tainted_vars
302            .iter()
303            .chain(else_ctx.tainted_vars.iter())
304        {
305            result.tainted_vars.insert(name.clone());
306        }
307
308        // Read vars: union — if either branch reads a var, it counts as read
309        for name in if_ctx.read_vars.iter().chain(else_ctx.read_vars.iter()) {
310            result.read_vars.insert(name.clone());
311        }
312
313        // After merging branches, the merged context does not diverge
314        // (at least one path through the merge reaches the next statement).
315        result.diverges = false;
316
317        result
318    }
319}
320
321impl Default for Context {
322    fn default() -> Self {
323        Self::new()
324    }
325}