Skip to main content

mir_analyzer/expr/
mod.rs

1/// Expression analyzer — infers the `Union` type of any PHP expression.
2use std::sync::Arc;
3
4use php_ast::ast::ExprKind;
5
6use mir_issues::{Issue, IssueBuffer, IssueKind, Location, Severity};
7use mir_types::{Atomic, Union};
8
9use crate::call::CallAnalyzer;
10use crate::context::Context;
11use crate::db::MirDatabase;
12use crate::php_version::PhpVersion;
13use crate::symbol::{ResolvedSymbol, SymbolKind};
14
15mod arrays;
16mod assignment;
17mod binary;
18mod casts;
19mod closures;
20mod conditional;
21mod helpers;
22mod intrinsics;
23mod literals;
24mod objects;
25mod unary;
26mod variables;
27
28pub use helpers::{extract_destructure_vars, extract_simple_var, infer_arithmetic};
29
30// ---------------------------------------------------------------------------
31// ExpressionAnalyzer
32// ---------------------------------------------------------------------------
33
34pub struct ExpressionAnalyzer<'a> {
35    pub db: &'a dyn MirDatabase,
36    pub file: Arc<str>,
37    pub source: &'a str,
38    pub source_map: &'a php_rs_parser::source_map::SourceMap,
39    pub issues: &'a mut IssueBuffer,
40    pub symbols: &'a mut Vec<ResolvedSymbol>,
41    pub php_version: PhpVersion,
42    /// When true, skip all reference-tracking side-effects (used by the
43    /// inference priming pass so reference locations aren't double-counted).
44    pub inference_only: bool,
45}
46
47impl<'a> ExpressionAnalyzer<'a> {
48    #[allow(clippy::too_many_arguments)]
49    pub fn new(
50        db: &'a dyn MirDatabase,
51        file: Arc<str>,
52        source: &'a str,
53        source_map: &'a php_rs_parser::source_map::SourceMap,
54        issues: &'a mut IssueBuffer,
55        symbols: &'a mut Vec<ResolvedSymbol>,
56        php_version: PhpVersion,
57        inference_only: bool,
58    ) -> Self {
59        Self {
60            db,
61            file,
62            source,
63            source_map,
64            issues,
65            symbols,
66            php_version,
67            inference_only,
68        }
69    }
70
71    /// Record a resolved symbol.
72    pub fn record_symbol(&mut self, span: php_ast::Span, kind: SymbolKind, resolved_type: Union) {
73        self.symbols.push(ResolvedSymbol {
74            file: self.file.clone(),
75            span,
76            kind,
77            resolved_type,
78        });
79    }
80
81    pub fn analyze<'arena, 'src>(
82        &mut self,
83        expr: &php_ast::ast::Expr<'arena, 'src>,
84        ctx: &mut Context,
85    ) -> Union {
86        match &expr.kind {
87            // --- Literals ---------------------------------------------------
88            ExprKind::Int(_)
89            | ExprKind::Float(_)
90            | ExprKind::String(_)
91            | ExprKind::Bool(_)
92            | ExprKind::Null => literals::analyze(&expr.kind),
93
94            ExprKind::InterpolatedString(parts) | ExprKind::Heredoc { parts, .. } => {
95                for part in parts.iter() {
96                    if let php_ast::StringPart::Expr(e) = part {
97                        self.analyze(e, ctx);
98                    }
99                }
100                Union::single(Atomic::TString)
101            }
102            ExprKind::Nowdoc { .. } => Union::single(Atomic::TString),
103            ExprKind::ShellExec(_) => Union::single(Atomic::TString),
104
105            // --- Variables --------------------------------------------------
106            ExprKind::Variable(name) => self.analyze_variable(name, expr, ctx),
107            ExprKind::VariableVariable(inner) => self.analyze_variable_variable(inner, ctx),
108            ExprKind::Identifier(name) => self.analyze_identifier(name, expr, ctx),
109
110            // --- Assignment -------------------------------------------------
111            ExprKind::Assign(a) => self.analyze_assign(a, expr.span, ctx),
112
113            // --- Binary operations ------------------------------------------
114            ExprKind::Binary(b) => self.analyze_binary_expr(b, expr.span, ctx),
115
116            // --- Unary ------------------------------------------------------
117            ExprKind::UnaryPrefix(u) => self.analyze_unary_prefix(u, ctx),
118            ExprKind::UnaryPostfix(u) => self.analyze_unary_postfix(u, ctx),
119
120            // --- Ternary / null coalesce ------------------------------------
121            ExprKind::Ternary(t) => self.analyze_ternary(t, ctx),
122            ExprKind::NullCoalesce(nc) => self.analyze_null_coalesce(nc, ctx),
123
124            // --- Casts ------------------------------------------------------
125            ExprKind::Cast(kind, inner) => self.analyze_cast(kind, inner, ctx),
126
127            // --- Error suppression ------------------------------------------
128            ExprKind::ErrorSuppress(inner) => self.analyze(inner, ctx),
129
130            // --- Parenthesized ----------------------------------------------
131            ExprKind::Parenthesized(inner) => self.analyze(inner, ctx),
132
133            // --- Array literals ---------------------------------------------
134            ExprKind::Array(elements) => self.analyze_array(elements, ctx),
135
136            // --- Array access -----------------------------------------------
137            ExprKind::ArrayAccess(aa) => self.analyze_array_access(aa, expr, ctx),
138
139            // --- isset / empty ----------------------------------------------
140            ExprKind::Isset(exprs) => {
141                for e in exprs.iter() {
142                    self.analyze(e, ctx);
143                }
144                Union::single(Atomic::TBool)
145            }
146            ExprKind::Empty(inner) => {
147                self.analyze(inner, ctx);
148                Union::single(Atomic::TBool)
149            }
150
151            // --- print ------------------------------------------------------
152            ExprKind::Print(inner) => {
153                self.analyze(inner, ctx);
154                Union::single(Atomic::TLiteralInt(1))
155            }
156
157            // --- clone ------------------------------------------------------
158            ExprKind::Clone(inner) => self.analyze(inner, ctx),
159            ExprKind::CloneWith(inner, _props) => self.analyze(inner, ctx),
160
161            // --- new ClassName(...) ----------------------------------------
162            ExprKind::New(n) => self.analyze_new(n, expr.span, ctx),
163
164            ExprKind::AnonymousClass(_) => Union::single(Atomic::TObject),
165
166            // --- Property access -------------------------------------------
167            ExprKind::PropertyAccess(pa) => self.analyze_property_access(pa, expr.span, ctx),
168
169            ExprKind::NullsafePropertyAccess(pa) => self.analyze_nullsafe_property_access(pa, ctx),
170
171            ExprKind::StaticPropertyAccess(spa) => self.analyze_static_property_access(spa),
172
173            ExprKind::ClassConstAccess(cca) => self.analyze_class_const_access(cca, expr.span),
174
175            ExprKind::ClassConstAccessDynamic { .. } => Union::mixed(),
176            ExprKind::StaticPropertyAccessDynamic { .. } => Union::mixed(),
177
178            // --- Method calls ----------------------------------------------
179            ExprKind::MethodCall(mc) => {
180                CallAnalyzer::analyze_method_call(self, mc, ctx, expr.span, false)
181            }
182
183            ExprKind::NullsafeMethodCall(mc) => {
184                CallAnalyzer::analyze_method_call(self, mc, ctx, expr.span, true)
185            }
186
187            ExprKind::StaticMethodCall(smc) => {
188                CallAnalyzer::analyze_static_method_call(self, smc, ctx, expr.span)
189            }
190
191            ExprKind::StaticDynMethodCall(smc) => {
192                CallAnalyzer::analyze_static_dyn_method_call(self, smc, ctx)
193            }
194
195            // --- Function calls --------------------------------------------
196            ExprKind::FunctionCall(fc) => {
197                CallAnalyzer::analyze_function_call(self, fc, ctx, expr.span)
198            }
199
200            // --- Closures / arrow functions --------------------------------
201            ExprKind::Closure(c) => self.analyze_closure(c, ctx),
202
203            ExprKind::ArrowFunction(af) => self.analyze_arrow_function(af, ctx),
204
205            ExprKind::CallableCreate(_) => Union::single(Atomic::TCallable {
206                params: None,
207                return_type: None,
208            }),
209
210            // --- Match expression ------------------------------------------
211            ExprKind::Match(m) => self.analyze_match(m, ctx),
212
213            // --- Throw as expression (PHP 8) --------------------------------
214            ExprKind::ThrowExpr(e) => {
215                self.analyze(e, ctx);
216                Union::single(Atomic::TNever)
217            }
218
219            // --- Yield -----------------------------------------------------
220            ExprKind::Yield(y) => self.analyze_yield(y, ctx),
221
222            // --- Magic constants -------------------------------------------
223            ExprKind::MagicConst(kind) => ExpressionAnalyzer::analyze_magic_const(kind),
224
225            // --- Include/require --------------------------------------------
226            ExprKind::Include(_, inner) => {
227                self.analyze(inner, ctx);
228                Union::mixed()
229            }
230
231            // --- Eval -------------------------------------------------------
232            ExprKind::Eval(inner) => {
233                self.analyze(inner, ctx);
234                Union::mixed()
235            }
236
237            // --- Exit -------------------------------------------------------
238            ExprKind::Exit(opt) => {
239                if let Some(e) = opt {
240                    self.analyze(e, ctx);
241                }
242                ctx.diverges = true;
243                Union::single(Atomic::TNever)
244            }
245
246            // --- Error node (parse error placeholder) ----------------------
247            ExprKind::Error => Union::mixed(),
248
249            // --- Omitted array slot (e.g. [, $b] destructuring) ------------
250            ExprKind::Omit => Union::single(Atomic::TNull),
251        }
252    }
253
254    // -----------------------------------------------------------------------
255    // Issue emission
256    // -----------------------------------------------------------------------
257
258    /// Convert a byte offset to a Unicode char-count column on a given line.
259    /// Returns (line, col) where col is a 0-based Unicode code-point count.
260    fn offset_to_line_col(&self, offset: u32) -> (u32, u16) {
261        let lc = self.source_map.offset_to_line_col(offset);
262        let line = lc.line + 1;
263
264        let byte_offset = offset as usize;
265        let line_start_byte = if byte_offset == 0 {
266            0
267        } else {
268            self.source[..byte_offset]
269                .rfind('\n')
270                .map(|p| p + 1)
271                .unwrap_or(0)
272        };
273
274        let col = self.source[line_start_byte..byte_offset].chars().count() as u16;
275
276        (line, col)
277    }
278
279    /// Convert an AST span to `(line, col_start, col_end)` for reference recording.
280    pub(crate) fn span_to_ref_loc(&self, span: php_ast::Span) -> (u32, u16, u16) {
281        let (line, col_start) = self.offset_to_line_col(span.start);
282        let end_off = (span.end as usize).min(self.source.len());
283        let end_line_start = self.source[..end_off]
284            .rfind('\n')
285            .map(|p| p + 1)
286            .unwrap_or(0);
287        let col_end = self.source[end_line_start..end_off].chars().count() as u16;
288        (line, col_start, col_end)
289    }
290
291    /// Walk a type hint and emit `UndefinedClass` for any named type not in the codebase.
292    fn check_type_hint(&mut self, hint: &php_ast::ast::TypeHint<'_, '_>) {
293        use php_ast::ast::TypeHintKind;
294        match &hint.kind {
295            TypeHintKind::Named(name) => {
296                let name_str = crate::parser::name_to_string(name);
297                if matches!(
298                    name_str.to_lowercase().as_str(),
299                    "self"
300                        | "static"
301                        | "parent"
302                        | "null"
303                        | "true"
304                        | "false"
305                        | "never"
306                        | "void"
307                        | "mixed"
308                        | "object"
309                        | "callable"
310                        | "iterable"
311                ) {
312                    return;
313                }
314                let resolved = crate::db::resolve_name_via_db(self.db, &self.file, &name_str);
315                if !crate::db::type_exists_via_db(self.db, &resolved) {
316                    self.emit(
317                        IssueKind::UndefinedClass { name: resolved },
318                        Severity::Error,
319                        hint.span,
320                    );
321                }
322            }
323            TypeHintKind::Nullable(inner) => self.check_type_hint(inner),
324            TypeHintKind::Union(parts) | TypeHintKind::Intersection(parts) => {
325                for part in parts.iter() {
326                    self.check_type_hint(part);
327                }
328            }
329            TypeHintKind::Keyword(_, _) => {}
330        }
331    }
332
333    pub fn emit(&mut self, kind: IssueKind, severity: Severity, span: php_ast::Span) {
334        let (line, col_start) = self.offset_to_line_col(span.start);
335
336        let (line_end, col_end) = if span.start < span.end {
337            let (end_line, end_col) = self.offset_to_line_col(span.end);
338            (end_line, end_col)
339        } else {
340            (line, col_start)
341        };
342
343        let mut issue = Issue::new(
344            kind,
345            Location {
346                file: self.file.clone(),
347                line,
348                line_end,
349                col_start,
350                col_end: col_end.max(col_start + 1),
351            },
352        );
353        issue.severity = severity;
354        // Store the source snippet for baseline matching.
355        if span.start < span.end {
356            let s = span.start as usize;
357            let e = (span.end as usize).min(self.source.len());
358            if let Some(text) = self.source.get(s..e) {
359                let trimmed = text.trim();
360                if !trimmed.is_empty() {
361                    issue.snippet = Some(trimmed.to_string());
362                }
363            }
364        }
365        self.issues.add(issue);
366    }
367}
368
369// ---------------------------------------------------------------------------
370// Tests
371// ---------------------------------------------------------------------------
372
373#[cfg(test)]
374mod tests {
375    /// Helper to create a SourceMap from PHP source code
376    fn create_source_map(source: &str) -> php_rs_parser::source_map::SourceMap {
377        let bump = bumpalo::Bump::new();
378        let result = php_rs_parser::parse(&bump, source);
379        result.source_map
380    }
381
382    /// Helper to test offset_to_line_col conversion (Unicode char-count columns).
383    fn test_offset_conversion(source: &str, offset: u32) -> (u32, u16) {
384        let source_map = create_source_map(source);
385        let lc = source_map.offset_to_line_col(offset);
386        let line = lc.line + 1;
387
388        let byte_offset = offset as usize;
389        let line_start_byte = if byte_offset == 0 {
390            0
391        } else {
392            source[..byte_offset]
393                .rfind('\n')
394                .map(|p| p + 1)
395                .unwrap_or(0)
396        };
397
398        let col = source[line_start_byte..byte_offset].chars().count() as u16;
399
400        (line, col)
401    }
402
403    #[test]
404    fn col_conversion_simple_ascii() {
405        let source = "<?php\n$var = 123;";
406
407        // '$' on line 2, column 0
408        let (line, col) = test_offset_conversion(source, 6);
409        assert_eq!(line, 2);
410        assert_eq!(col, 0);
411
412        // 'v' on line 2, column 1
413        let (line, col) = test_offset_conversion(source, 7);
414        assert_eq!(line, 2);
415        assert_eq!(col, 1);
416    }
417
418    #[test]
419    fn col_conversion_different_lines() {
420        let source = "<?php\n$x = 1;\n$y = 2;";
421        // Line 1: <?php     (bytes 0-4, newline at 5)
422        // Line 2: $x = 1;  (bytes 6-12, newline at 13)
423        // Line 3: $y = 2;  (bytes 14-20)
424
425        let (line, col) = test_offset_conversion(source, 0);
426        assert_eq!((line, col), (1, 0));
427
428        let (line, col) = test_offset_conversion(source, 6);
429        assert_eq!((line, col), (2, 0));
430
431        let (line, col) = test_offset_conversion(source, 14);
432        assert_eq!((line, col), (3, 0));
433    }
434
435    #[test]
436    fn col_conversion_accented_characters() {
437        // é is 2 UTF-8 bytes but 1 Unicode char (and 1 UTF-16 unit — same result either way)
438        let source = "<?php\n$café = 1;";
439        // Line 2: $ c a f é ...
440        // bytes:  6 7 8 9 10(2 bytes)
441
442        // 'f' at byte 9 → char col 3
443        let (line, col) = test_offset_conversion(source, 9);
444        assert_eq!((line, col), (2, 3));
445
446        // 'é' at byte 10 → char col 4
447        let (line, col) = test_offset_conversion(source, 10);
448        assert_eq!((line, col), (2, 4));
449    }
450
451    #[test]
452    fn col_conversion_emoji_counts_as_one_char() {
453        // 🎉 (U+1F389) is 4 UTF-8 bytes and 2 UTF-16 units, but 1 Unicode char.
454        // A char after the emoji must land at col 7, not col 8.
455        let source = "<?php\n$y = \"🎉x\";";
456        // Line 2: $ y   =   " 🎉 x " ;
457        // chars:  0 1 2 3 4 5  6  7 8 9
458
459        let emoji_start = source.find("🎉").unwrap();
460        let after_emoji = emoji_start + "🎉".len(); // skip 4 bytes
461
462        // position at 'x' (right after the emoji)
463        let (line, col) = test_offset_conversion(source, after_emoji as u32);
464        assert_eq!(line, 2);
465        assert_eq!(col, 7); // emoji counts as 1, not 2
466    }
467
468    #[test]
469    fn col_conversion_emoji_start_position() {
470        // The opening quote is at col 5; the emoji immediately follows at col 6.
471        let source = "<?php\n$y = \"🎉\";";
472        // Line 2: $ y   =   " 🎉 " ;
473        // chars:  0 1 2 3 4 5  6  7 8
474
475        let quote_pos = source.find('"').unwrap();
476        let emoji_pos = quote_pos + 1; // byte after opening quote = emoji start
477
478        let (line, col) = test_offset_conversion(source, quote_pos as u32);
479        assert_eq!(line, 2);
480        assert_eq!(col, 5); // '"' is the 6th char on line 2 (0-based: col 5)
481
482        let (line, col) = test_offset_conversion(source, emoji_pos as u32);
483        assert_eq!(line, 2);
484        assert_eq!(col, 6); // emoji follows the quote
485    }
486
487    #[test]
488    fn col_end_minimum_width() {
489        // Ensure col_end is at least col_start + 1 (1 character minimum)
490        let col_start = 0u16;
491        let col_end = 0u16; // Would happen if span.start == span.end
492        let effective_col_end = col_end.max(col_start + 1);
493
494        assert_eq!(
495            effective_col_end, 1,
496            "col_end should be at least col_start + 1"
497        );
498    }
499
500    #[test]
501    fn col_conversion_multiline_span() {
502        // Test span that starts on one line and ends on another
503        let source = "<?php\n$x = [\n  'a',\n  'b'\n];";
504        //           Line 1: <?php
505        //           Line 2: $x = [
506        //           Line 3:   'a',
507        //           Line 4:   'b'
508        //           Line 5: ];
509
510        // Start of array bracket on line 2
511        let bracket_open = source.find('[').unwrap();
512        let (line_start, _col_start) = test_offset_conversion(source, bracket_open as u32);
513        assert_eq!(line_start, 2);
514
515        // End of array bracket on line 5
516        let bracket_close = source.rfind(']').unwrap();
517        let (line_end, col_end) = test_offset_conversion(source, bracket_close as u32);
518        assert_eq!(line_end, 5);
519        assert_eq!(col_end, 0); // ']' is at column 0 on line 5
520    }
521
522    #[test]
523    fn col_end_handles_emoji_in_span() {
524        // Test that col_end correctly handles emoji spanning
525        let source = "<?php\n$greeting = \"Hello 🎉\";";
526
527        // Find emoji position
528        let emoji_pos = source.find('🎉').unwrap();
529        let hello_pos = source.find("Hello").unwrap();
530
531        // Column at "Hello" on line 2
532        let (line, col) = test_offset_conversion(source, hello_pos as u32);
533        assert_eq!(line, 2);
534        assert_eq!(col, 13); // Position of 'H' after "$greeting = \""
535
536        // Column at emoji
537        let (line, col) = test_offset_conversion(source, emoji_pos as u32);
538        assert_eq!(line, 2);
539        // Should be after "Hello " (13 + 5 + 1 = 19 chars)
540        assert_eq!(col, 19);
541    }
542}