Skip to main content

mir_analyzer/
context.rs

1/// Analysis context — carries type state through statement/expression analysis.
2use std::collections::HashSet;
3use std::sync::Arc;
4
5use indexmap::IndexMap;
6use mir_types::Union;
7
8// ---------------------------------------------------------------------------
9// Context
10// ---------------------------------------------------------------------------
11
12#[derive(Debug, Clone)]
13pub struct Context {
14    /// Types of variables at this point in execution.
15    pub vars: IndexMap<String, Union>,
16
17    /// Variables that are definitely assigned at this point.
18    pub assigned_vars: HashSet<String>,
19
20    /// Variables that *might* be assigned (e.g. only in one if branch).
21    pub possibly_assigned_vars: HashSet<String>,
22
23    /// The class in whose body we are analysing (`self`).
24    pub self_fqcn: Option<Arc<str>>,
25
26    /// The parent class (`parent`).
27    pub parent_fqcn: Option<Arc<str>>,
28
29    /// Late-static-binding class (`static`).
30    pub static_fqcn: Option<Arc<str>>,
31
32    /// Declared return type for the current function/method.
33    pub fn_return_type: Option<Union>,
34
35    /// Whether we are currently inside a loop.
36    pub inside_loop: bool,
37
38    /// Whether we are currently inside a finally block.
39    pub inside_finally: bool,
40
41    /// Whether we are inside a constructor.
42    pub inside_constructor: bool,
43
44    /// Whether `strict_types=1` is declared for this file.
45    pub strict_types: bool,
46
47    /// Variables that carry tainted (user-controlled) values at this point.
48    /// Used by taint analysis (M19).
49    pub tainted_vars: HashSet<String>,
50
51    /// Variables that have been read at least once in this scope.
52    /// Used by UnusedParam detection (M18).
53    pub read_vars: HashSet<String>,
54
55    /// Names of function/method parameters in this scope (stripped of `$`).
56    /// Used to exclude parameters from UnusedVariable detection.
57    pub param_names: HashSet<String>,
58
59    /// Whether every execution path through this context has diverged
60    /// (returned, thrown, or exited). Used to detect "all catch branches
61    /// return" so that variables assigned only in the try body are
62    /// considered definitely assigned after the try/catch.
63    pub diverges: bool,
64}
65
66impl Context {
67    pub fn new() -> Self {
68        let mut ctx = Self {
69            vars: IndexMap::new(),
70            assigned_vars: HashSet::new(),
71            possibly_assigned_vars: HashSet::new(),
72            self_fqcn: None,
73            parent_fqcn: None,
74            static_fqcn: None,
75            fn_return_type: None,
76            inside_loop: false,
77            inside_finally: false,
78            inside_constructor: false,
79            strict_types: false,
80            tainted_vars: HashSet::new(),
81            read_vars: HashSet::new(),
82            param_names: HashSet::new(),
83            diverges: false,
84        };
85        // PHP superglobals — always in scope in any context
86        for sg in &[
87            "_SERVER", "_GET", "_POST", "_REQUEST", "_SESSION", "_COOKIE", "_FILES", "_ENV",
88            "GLOBALS",
89        ] {
90            ctx.vars.insert(sg.to_string(), mir_types::Union::mixed());
91            ctx.assigned_vars.insert(sg.to_string());
92        }
93        ctx
94    }
95
96    /// Create a context seeded with the given parameters.
97    pub fn for_function(
98        params: &[mir_codebase::FnParam],
99        return_type: Option<Union>,
100        self_fqcn: Option<Arc<str>>,
101        parent_fqcn: Option<Arc<str>>,
102        static_fqcn: Option<Arc<str>>,
103        strict_types: bool,
104        is_static: bool,
105    ) -> Self {
106        Self::for_method(
107            params,
108            return_type,
109            self_fqcn,
110            parent_fqcn,
111            static_fqcn,
112            strict_types,
113            false,
114            is_static,
115        )
116    }
117
118    /// Like `for_function` but also sets `inside_constructor`.
119    #[allow(clippy::too_many_arguments)]
120    pub fn for_method(
121        params: &[mir_codebase::FnParam],
122        return_type: Option<Union>,
123        self_fqcn: Option<Arc<str>>,
124        parent_fqcn: Option<Arc<str>>,
125        static_fqcn: Option<Arc<str>>,
126        strict_types: bool,
127        inside_constructor: bool,
128        is_static: bool,
129    ) -> Self {
130        let mut ctx = Self::new();
131        ctx.fn_return_type = return_type;
132        ctx.self_fqcn = self_fqcn.clone();
133        ctx.parent_fqcn = parent_fqcn;
134        ctx.static_fqcn = static_fqcn;
135        ctx.strict_types = strict_types;
136        ctx.inside_constructor = inside_constructor;
137
138        for p in params {
139            let elem_ty = p.ty.clone().unwrap_or_else(Union::mixed);
140            // Variadic params like `Type ...$name` are accessed as `list<Type>` in the body.
141            // If the docblock already provides a list/array collection type, don't double-wrap.
142            let ty = if p.is_variadic {
143                let already_collection = elem_ty.types.iter().any(|a| {
144                    matches!(
145                        a,
146                        mir_types::Atomic::TList { .. }
147                            | mir_types::Atomic::TNonEmptyList { .. }
148                            | mir_types::Atomic::TArray { .. }
149                            | mir_types::Atomic::TNonEmptyArray { .. }
150                    )
151                });
152                if already_collection {
153                    elem_ty
154                } else {
155                    mir_types::Union::single(mir_types::Atomic::TList {
156                        value: Box::new(elem_ty),
157                    })
158                }
159            } else {
160                elem_ty
161            };
162            let name = p.name.as_ref().trim_start_matches('$').to_string();
163            ctx.vars.insert(name.clone(), ty);
164            ctx.assigned_vars.insert(name.clone());
165            ctx.param_names.insert(name);
166        }
167
168        // Inject $this for non-static methods so that $this->method() can be
169        // resolved without hitting the mixed-receiver early-return guard.
170        if !is_static {
171            if let Some(fqcn) = self_fqcn {
172                let this_ty = mir_types::Union::single(mir_types::Atomic::TNamedObject {
173                    fqcn,
174                    type_params: vec![],
175                });
176                ctx.vars.insert("this".to_string(), this_ty);
177                ctx.assigned_vars.insert("this".to_string());
178            }
179        }
180
181        ctx
182    }
183
184    /// Get the type of a variable. Returns `mixed` if not found.
185    pub fn get_var(&self, name: &str) -> Union {
186        let name = name.trim_start_matches('$');
187        self.vars.get(name).cloned().unwrap_or_else(Union::mixed)
188    }
189
190    /// Set the type of a variable and mark it as assigned.
191    pub fn set_var(&mut self, name: impl Into<String>, ty: Union) {
192        let name: String = name.into();
193        let name = name.trim_start_matches('$').to_string();
194        self.vars.insert(name.clone(), ty);
195        self.assigned_vars.insert(name);
196    }
197
198    /// Check if a variable is definitely in scope.
199    pub fn var_is_defined(&self, name: &str) -> bool {
200        let name = name.trim_start_matches('$');
201        self.assigned_vars.contains(name)
202    }
203
204    /// Check if a variable might be defined (but not certainly).
205    pub fn var_possibly_defined(&self, name: &str) -> bool {
206        let name = name.trim_start_matches('$');
207        self.assigned_vars.contains(name) || self.possibly_assigned_vars.contains(name)
208    }
209
210    /// Mark a variable as carrying tainted (user-controlled) data.
211    pub fn taint_var(&mut self, name: &str) {
212        let name = name.trim_start_matches('$').to_string();
213        self.tainted_vars.insert(name);
214    }
215
216    /// Returns true if the variable is known to carry tainted data.
217    pub fn is_tainted(&self, name: &str) -> bool {
218        let name = name.trim_start_matches('$');
219        self.tainted_vars.contains(name)
220    }
221
222    /// Remove a variable from the context (after `unset`).
223    pub fn unset_var(&mut self, name: &str) {
224        let name = name.trim_start_matches('$');
225        self.vars.shift_remove(name);
226        self.assigned_vars.remove(name);
227        self.possibly_assigned_vars.remove(name);
228    }
229
230    /// Fork this context for a branch (e.g. the `if` branch).
231    pub fn fork(&self) -> Context {
232        self.clone()
233    }
234
235    /// Merge two branch contexts at a join point (e.g. end of if/else).
236    ///
237    /// - vars present in both: merged union of types
238    /// - vars present in only one branch: marked `possibly_undefined`
239    /// - pre-existing vars from before the branch: preserved
240    pub fn merge_branches(pre: &Context, if_ctx: Context, else_ctx: Option<Context>) -> Context {
241        let else_ctx = else_ctx.unwrap_or_else(|| pre.clone());
242
243        // If the then-branch always diverges, the code after the if runs only
244        // in the else-branch — use that as the result directly.
245        if if_ctx.diverges && !else_ctx.diverges {
246            let mut result = else_ctx;
247            result.diverges = false;
248            return result;
249        }
250        // If the else-branch always diverges, code after the if runs only
251        // in the then-branch.
252        if else_ctx.diverges && !if_ctx.diverges {
253            let mut result = if_ctx;
254            result.diverges = false;
255            return result;
256        }
257        // If both diverge, the code after the if is unreachable.
258        if if_ctx.diverges && else_ctx.diverges {
259            let mut result = pre.clone();
260            result.diverges = true;
261            return result;
262        }
263
264        let mut result = pre.clone();
265
266        // Collect all variable names from both branch contexts
267        let all_names: HashSet<&String> = if_ctx.vars.keys().chain(else_ctx.vars.keys()).collect();
268
269        for name in all_names {
270            let in_if = if_ctx.assigned_vars.contains(name);
271            let in_else = else_ctx.assigned_vars.contains(name);
272            let in_pre = pre.assigned_vars.contains(name);
273
274            let ty_if = if_ctx.vars.get(name);
275            let ty_else = else_ctx.vars.get(name);
276
277            match (ty_if, ty_else) {
278                (Some(a), Some(b)) => {
279                    let merged = Union::merge(a, b);
280                    result.vars.insert(name.clone(), merged);
281                    if in_if && in_else {
282                        result.assigned_vars.insert(name.clone());
283                    } else {
284                        result.possibly_assigned_vars.insert(name.clone());
285                    }
286                }
287                (Some(a), None) => {
288                    if in_pre {
289                        // var existed before: merge with pre type
290                        let pre_ty = pre.vars.get(name).cloned().unwrap_or_else(Union::mixed);
291                        let merged = Union::merge(a, &pre_ty);
292                        result.vars.insert(name.clone(), merged);
293                        result.assigned_vars.insert(name.clone());
294                    } else {
295                        // only assigned in if branch
296                        let ty = a.clone().possibly_undefined();
297                        result.vars.insert(name.clone(), ty);
298                        result.possibly_assigned_vars.insert(name.clone());
299                    }
300                }
301                (None, Some(b)) => {
302                    if in_pre {
303                        let pre_ty = pre.vars.get(name).cloned().unwrap_or_else(Union::mixed);
304                        let merged = Union::merge(&pre_ty, b);
305                        result.vars.insert(name.clone(), merged);
306                        result.assigned_vars.insert(name.clone());
307                    } else {
308                        let ty = b.clone().possibly_undefined();
309                        result.vars.insert(name.clone(), ty);
310                        result.possibly_assigned_vars.insert(name.clone());
311                    }
312                }
313                (None, None) => {}
314            }
315        }
316
317        // Taint: conservative union — if either branch taints a var, it stays tainted
318        for name in if_ctx
319            .tainted_vars
320            .iter()
321            .chain(else_ctx.tainted_vars.iter())
322        {
323            result.tainted_vars.insert(name.clone());
324        }
325
326        // Read vars: union — if either branch reads a var, it counts as read
327        for name in if_ctx.read_vars.iter().chain(else_ctx.read_vars.iter()) {
328            result.read_vars.insert(name.clone());
329        }
330
331        // After merging branches, the merged context does not diverge
332        // (at least one path through the merge reaches the next statement).
333        result.diverges = false;
334
335        result
336    }
337}
338
339impl Default for Context {
340    fn default() -> Self {
341        Self::new()
342    }
343}