Skip to main content

mir_analyzer/
context.rs

1/// Analysis context — carries type state through statement/expression analysis.
2use std::collections::{HashMap, HashSet};
3use std::sync::Arc;
4
5use indexmap::IndexMap;
6use mir_types::Union;
7
8// ---------------------------------------------------------------------------
9// Context
10// ---------------------------------------------------------------------------
11
12#[derive(Debug, Clone)]
13pub struct Context {
14    /// Types of variables at this point in execution.
15    pub vars: IndexMap<String, Union>,
16
17    /// Variables that are definitely assigned at this point.
18    pub assigned_vars: HashSet<String>,
19
20    /// Variables that *might* be assigned (e.g. only in one if branch).
21    pub possibly_assigned_vars: HashSet<String>,
22
23    /// The class in whose body we are analysing (`self`).
24    pub self_fqcn: Option<Arc<str>>,
25
26    /// The parent class (`parent`).
27    pub parent_fqcn: Option<Arc<str>>,
28
29    /// Late-static-binding class (`static`).
30    pub static_fqcn: Option<Arc<str>>,
31
32    /// Declared return type for the current function/method.
33    pub fn_return_type: Option<Union>,
34
35    /// Whether we are currently inside a loop.
36    pub inside_loop: bool,
37
38    /// Whether we are currently inside a finally block.
39    pub inside_finally: bool,
40
41    /// Whether we are inside a constructor.
42    pub inside_constructor: bool,
43
44    /// Whether `strict_types=1` is declared for this file.
45    pub strict_types: bool,
46
47    /// Variables that carry tainted (user-controlled) values at this point.
48    /// Used by taint analysis (M19).
49    pub tainted_vars: HashSet<String>,
50
51    /// Variables that have been read at least once in this scope.
52    /// Used by UnusedParam detection (M18).
53    pub read_vars: HashSet<String>,
54
55    /// Names of function/method parameters in this scope (stripped of `$`).
56    /// Used to exclude parameters from UnusedVariable detection.
57    pub param_names: HashSet<String>,
58
59    /// Names of by-reference parameters in this scope (stripped of `$`).
60    /// Assigning to these is externally observable, so it counts as usage.
61    pub byref_param_names: HashSet<String>,
62
63    /// Whether every execution path through this context has diverged
64    /// (returned, thrown, or exited). Used to detect "all catch branches
65    /// return" so that variables assigned only in the try body are
66    /// considered definitely assigned after the try/catch.
67    pub diverges: bool,
68
69    /// Pre-converted (line, col_start, line_end, col_end) of the first assignment
70    /// to each variable. Used to emit accurate locations for UnusedVariable / UnusedParam.
71    pub var_locations: HashMap<String, (u32, u16, u32, u16)>,
72}
73
74impl Context {
75    pub fn new() -> Self {
76        let mut ctx = Self {
77            vars: IndexMap::new(),
78            assigned_vars: HashSet::new(),
79            possibly_assigned_vars: HashSet::new(),
80            self_fqcn: None,
81            parent_fqcn: None,
82            static_fqcn: None,
83            fn_return_type: None,
84            inside_loop: false,
85            inside_finally: false,
86            inside_constructor: false,
87            strict_types: false,
88            tainted_vars: HashSet::new(),
89            read_vars: HashSet::new(),
90            param_names: HashSet::new(),
91            byref_param_names: HashSet::new(),
92            diverges: false,
93            var_locations: HashMap::new(),
94        };
95        // PHP superglobals — always in scope in any context
96        for sg in &[
97            "_SERVER", "_GET", "_POST", "_REQUEST", "_SESSION", "_COOKIE", "_FILES", "_ENV",
98            "GLOBALS",
99        ] {
100            ctx.vars.insert(sg.to_string(), mir_types::Union::mixed());
101            ctx.assigned_vars.insert(sg.to_string());
102        }
103        ctx
104    }
105
106    /// Create a context seeded with the given parameters.
107    pub fn for_function(
108        params: &[mir_codebase::FnParam],
109        return_type: Option<Union>,
110        self_fqcn: Option<Arc<str>>,
111        parent_fqcn: Option<Arc<str>>,
112        static_fqcn: Option<Arc<str>>,
113        strict_types: bool,
114        is_static: bool,
115    ) -> Self {
116        Self::for_method(
117            params,
118            return_type,
119            self_fqcn,
120            parent_fqcn,
121            static_fqcn,
122            strict_types,
123            false,
124            is_static,
125        )
126    }
127
128    /// Like `for_function` but also sets `inside_constructor`.
129    #[allow(clippy::too_many_arguments)]
130    pub fn for_method(
131        params: &[mir_codebase::FnParam],
132        return_type: Option<Union>,
133        self_fqcn: Option<Arc<str>>,
134        parent_fqcn: Option<Arc<str>>,
135        static_fqcn: Option<Arc<str>>,
136        strict_types: bool,
137        inside_constructor: bool,
138        is_static: bool,
139    ) -> Self {
140        let mut ctx = Self::new();
141        ctx.fn_return_type = return_type;
142        ctx.self_fqcn = self_fqcn.clone();
143        ctx.parent_fqcn = parent_fqcn;
144        ctx.static_fqcn = static_fqcn;
145        ctx.strict_types = strict_types;
146        ctx.inside_constructor = inside_constructor;
147
148        for p in params {
149            let elem_ty = p.ty.clone().unwrap_or_else(Union::mixed);
150            // Variadic params like `Type ...$name` are accessed as `list<Type>` in the body.
151            // If the docblock already provides a list/array collection type, don't double-wrap.
152            let ty = if p.is_variadic {
153                let already_collection = elem_ty.types.iter().any(|a| {
154                    matches!(
155                        a,
156                        mir_types::Atomic::TList { .. }
157                            | mir_types::Atomic::TNonEmptyList { .. }
158                            | mir_types::Atomic::TArray { .. }
159                            | mir_types::Atomic::TNonEmptyArray { .. }
160                    )
161                });
162                if already_collection {
163                    elem_ty
164                } else {
165                    mir_types::Union::single(mir_types::Atomic::TList {
166                        value: Box::new(elem_ty),
167                    })
168                }
169            } else {
170                elem_ty
171            };
172            let name = p.name.as_ref().trim_start_matches('$').to_string();
173            ctx.vars.insert(name.clone(), ty);
174            ctx.assigned_vars.insert(name.clone());
175            ctx.param_names.insert(name.clone());
176            if p.is_byref {
177                ctx.byref_param_names.insert(name);
178            }
179        }
180
181        // Inject $this for non-static methods so that $this->method() can be
182        // resolved without hitting the mixed-receiver early-return guard.
183        if !is_static {
184            if let Some(fqcn) = self_fqcn {
185                let this_ty = mir_types::Union::single(mir_types::Atomic::TNamedObject {
186                    fqcn,
187                    type_params: vec![],
188                });
189                ctx.vars.insert("this".to_string(), this_ty);
190                ctx.assigned_vars.insert("this".to_string());
191            }
192        }
193
194        ctx
195    }
196
197    /// Get the type of a variable. Returns `mixed` if not found.
198    pub fn get_var(&self, name: &str) -> Union {
199        let name = name.trim_start_matches('$');
200        self.vars.get(name).cloned().unwrap_or_else(Union::mixed)
201    }
202
203    /// Set the type of a variable and mark it as assigned.
204    pub fn set_var(&mut self, name: impl Into<String>, ty: Union) {
205        let name: String = name.into();
206        let name = name.trim_start_matches('$').to_string();
207        self.vars.insert(name.clone(), ty);
208        self.assigned_vars.insert(name);
209    }
210
211    /// Check if a variable is definitely in scope.
212    pub fn var_is_defined(&self, name: &str) -> bool {
213        let name = name.trim_start_matches('$');
214        self.assigned_vars.contains(name)
215    }
216
217    /// Check if a variable might be defined (but not certainly).
218    pub fn var_possibly_defined(&self, name: &str) -> bool {
219        let name = name.trim_start_matches('$');
220        self.assigned_vars.contains(name) || self.possibly_assigned_vars.contains(name)
221    }
222
223    /// Mark a variable as carrying tainted (user-controlled) data.
224    pub fn taint_var(&mut self, name: &str) {
225        let name = name.trim_start_matches('$').to_string();
226        self.tainted_vars.insert(name);
227    }
228
229    /// Returns true if the variable is known to carry tainted data.
230    pub fn is_tainted(&self, name: &str) -> bool {
231        let name = name.trim_start_matches('$');
232        self.tainted_vars.contains(name)
233    }
234
235    /// Record the location of the first assignment to a variable (first-write-wins).
236    pub fn record_var_location(
237        &mut self,
238        name: &str,
239        line: u32,
240        col_start: u16,
241        line_end: u32,
242        col_end: u16,
243    ) {
244        let name = name.trim_start_matches('$');
245        self.var_locations
246            .entry(name.to_string())
247            .or_insert((line, col_start, line_end, col_end));
248    }
249
250    /// Remove a variable from the context (after `unset`).
251    pub fn unset_var(&mut self, name: &str) {
252        let name = name.trim_start_matches('$');
253        self.vars.shift_remove(name);
254        self.assigned_vars.remove(name);
255        self.possibly_assigned_vars.remove(name);
256    }
257
258    /// Fork this context for a branch (e.g. the `if` branch).
259    pub fn fork(&self) -> Context {
260        self.clone()
261    }
262
263    /// Merge two branch contexts at a join point (e.g. end of if/else).
264    ///
265    /// - vars present in both: merged union of types
266    /// - vars present in only one branch: marked `possibly_undefined`
267    /// - pre-existing vars from before the branch: preserved
268    pub fn merge_branches(pre: &Context, if_ctx: Context, else_ctx: Option<Context>) -> Context {
269        let else_ctx = else_ctx.unwrap_or_else(|| pre.clone());
270
271        // If the then-branch always diverges, the code after the if runs only
272        // in the else-branch — use that as the result directly.
273        if if_ctx.diverges && !else_ctx.diverges {
274            let mut result = else_ctx;
275            result.diverges = false;
276            return result;
277        }
278        // If the else-branch always diverges, code after the if runs only
279        // in the then-branch.
280        if else_ctx.diverges && !if_ctx.diverges {
281            let mut result = if_ctx;
282            result.diverges = false;
283            return result;
284        }
285        // If both diverge, the code after the if is unreachable.
286        if if_ctx.diverges && else_ctx.diverges {
287            let mut result = pre.clone();
288            result.diverges = true;
289            return result;
290        }
291
292        let mut result = pre.clone();
293
294        // Collect all variable names from both branch contexts
295        let all_names: HashSet<&String> = if_ctx.vars.keys().chain(else_ctx.vars.keys()).collect();
296
297        for name in all_names {
298            let in_if = if_ctx.assigned_vars.contains(name);
299            let in_else = else_ctx.assigned_vars.contains(name);
300            let in_pre = pre.assigned_vars.contains(name);
301
302            let ty_if = if_ctx.vars.get(name);
303            let ty_else = else_ctx.vars.get(name);
304
305            match (ty_if, ty_else) {
306                (Some(a), Some(b)) => {
307                    let merged = Union::merge(a, b);
308                    result.vars.insert(name.clone(), merged);
309                    if in_if && in_else {
310                        result.assigned_vars.insert(name.clone());
311                    } else {
312                        result.possibly_assigned_vars.insert(name.clone());
313                    }
314                }
315                (Some(a), None) => {
316                    if in_pre {
317                        // var existed before: merge with pre type
318                        let pre_ty = pre.vars.get(name).cloned().unwrap_or_else(Union::mixed);
319                        let merged = Union::merge(a, &pre_ty);
320                        result.vars.insert(name.clone(), merged);
321                        result.assigned_vars.insert(name.clone());
322                    } else {
323                        // only assigned in if branch
324                        let ty = a.clone().possibly_undefined();
325                        result.vars.insert(name.clone(), ty);
326                        result.possibly_assigned_vars.insert(name.clone());
327                    }
328                }
329                (None, Some(b)) => {
330                    if in_pre {
331                        let pre_ty = pre.vars.get(name).cloned().unwrap_or_else(Union::mixed);
332                        let merged = Union::merge(&pre_ty, b);
333                        result.vars.insert(name.clone(), merged);
334                        result.assigned_vars.insert(name.clone());
335                    } else {
336                        let ty = b.clone().possibly_undefined();
337                        result.vars.insert(name.clone(), ty);
338                        result.possibly_assigned_vars.insert(name.clone());
339                    }
340                }
341                (None, None) => {}
342            }
343        }
344
345        // Taint: conservative union — if either branch taints a var, it stays tainted
346        for name in if_ctx
347            .tainted_vars
348            .iter()
349            .chain(else_ctx.tainted_vars.iter())
350        {
351            result.tainted_vars.insert(name.clone());
352        }
353
354        // Read vars: union — if either branch reads a var, it counts as read
355        for name in if_ctx.read_vars.iter().chain(else_ctx.read_vars.iter()) {
356            result.read_vars.insert(name.clone());
357        }
358
359        // Var locations: keep the earliest known span for each variable
360        for (name, loc) in if_ctx
361            .var_locations
362            .iter()
363            .chain(else_ctx.var_locations.iter())
364        {
365            result.var_locations.entry(name.clone()).or_insert(*loc);
366        }
367
368        // After merging branches, the merged context does not diverge
369        // (at least one path through the merge reaches the next statement).
370        result.diverges = false;
371
372        result
373    }
374}
375
376impl Default for Context {
377    fn default() -> Self {
378        Self::new()
379    }
380}