Skip to main content

mir_analyzer/expr/
mod.rs

1/// Expression analyzer — infers the `Union` type of any PHP expression.
2use std::sync::Arc;
3
4use php_ast::ast::ExprKind;
5
6use mir_issues::{Issue, IssueBuffer, IssueKind, Location, Severity};
7use mir_types::{Atomic, Union};
8
9use crate::call::CallAnalyzer;
10use crate::context::Context;
11use crate::db::MirDatabase;
12use crate::php_version::PhpVersion;
13use crate::symbol::{ResolvedSymbol, SymbolKind};
14
15mod arrays;
16mod assignment;
17mod binary;
18mod casts;
19mod closures;
20mod conditional;
21mod helpers;
22mod intrinsics;
23mod literals;
24mod objects;
25mod unary;
26mod variables;
27
28pub use helpers::{extract_destructure_vars, extract_simple_var, infer_arithmetic};
29
30// ---------------------------------------------------------------------------
31// ExpressionAnalyzer
32// ---------------------------------------------------------------------------
33
34pub struct ExpressionAnalyzer<'a> {
35    pub db: &'a dyn MirDatabase,
36    pub file: Arc<str>,
37    pub source: &'a str,
38    pub source_map: &'a php_rs_parser::source_map::SourceMap,
39    pub issues: &'a mut IssueBuffer,
40    pub symbols: &'a mut Vec<ResolvedSymbol>,
41    pub php_version: PhpVersion,
42    /// When true, skip all reference-tracking side-effects (used by the
43    /// inference priming pass so reference locations aren't double-counted).
44    pub inference_only: bool,
45}
46
47impl<'a> ExpressionAnalyzer<'a> {
48    #[allow(clippy::too_many_arguments)]
49    pub fn new(
50        db: &'a dyn MirDatabase,
51        file: Arc<str>,
52        source: &'a str,
53        source_map: &'a php_rs_parser::source_map::SourceMap,
54        issues: &'a mut IssueBuffer,
55        symbols: &'a mut Vec<ResolvedSymbol>,
56        php_version: PhpVersion,
57        inference_only: bool,
58    ) -> Self {
59        Self {
60            db,
61            file,
62            source,
63            source_map,
64            issues,
65            symbols,
66            php_version,
67            inference_only,
68        }
69    }
70
71    /// Record a resolved symbol.
72    pub fn record_symbol(&mut self, span: php_ast::Span, kind: SymbolKind, resolved_type: Union) {
73        self.symbols.push(ResolvedSymbol {
74            file: self.file.clone(),
75            span,
76            kind,
77            resolved_type,
78        });
79    }
80
81    pub fn analyze<'arena, 'src>(
82        &mut self,
83        expr: &php_ast::ast::Expr<'arena, 'src>,
84        ctx: &mut Context,
85    ) -> Union {
86        match &expr.kind {
87            // --- Literals ---------------------------------------------------
88            ExprKind::Int(_)
89            | ExprKind::Float(_)
90            | ExprKind::String(_)
91            | ExprKind::Bool(_)
92            | ExprKind::Null => literals::analyze(&expr.kind),
93
94            ExprKind::InterpolatedString(parts) | ExprKind::Heredoc { parts, .. } => {
95                for part in parts.iter() {
96                    if let php_ast::StringPart::Expr(e) = part {
97                        let expr_ty = self.analyze(e, ctx);
98                        self.check_interpolation_implicit_to_string_cast(&expr_ty, e.span);
99                    }
100                }
101                Union::single(Atomic::TString)
102            }
103            ExprKind::Nowdoc { .. } => Union::single(Atomic::TString),
104            ExprKind::ShellExec(_) => Union::single(Atomic::TString),
105
106            // --- Variables --------------------------------------------------
107            ExprKind::Variable(name) => self.analyze_variable(name, expr, ctx),
108            ExprKind::VariableVariable(inner) => self.analyze_variable_variable(inner, ctx),
109            ExprKind::Identifier(name) => self.analyze_identifier(name, expr, ctx),
110
111            // --- Assignment -------------------------------------------------
112            ExprKind::Assign(a) => self.analyze_assign(a, expr.span, ctx),
113
114            // --- Binary operations ------------------------------------------
115            ExprKind::Binary(b) => self.analyze_binary_expr(b, expr.span, ctx),
116
117            // --- Unary ------------------------------------------------------
118            ExprKind::UnaryPrefix(u) => self.analyze_unary_prefix(u, ctx),
119            ExprKind::UnaryPostfix(u) => self.analyze_unary_postfix(u, ctx),
120
121            // --- Ternary / null coalesce ------------------------------------
122            ExprKind::Ternary(t) => self.analyze_ternary(t, ctx),
123            ExprKind::NullCoalesce(nc) => self.analyze_null_coalesce(nc, ctx),
124
125            // --- Casts ------------------------------------------------------
126            ExprKind::Cast(kind, inner) => self.analyze_cast(kind, inner, ctx),
127
128            // --- Error suppression ------------------------------------------
129            ExprKind::ErrorSuppress(inner) => self.analyze(inner, ctx),
130
131            // --- Parenthesized ----------------------------------------------
132            ExprKind::Parenthesized(inner) => self.analyze(inner, ctx),
133
134            // --- Array literals ---------------------------------------------
135            ExprKind::Array(elements) => self.analyze_array(elements, ctx),
136
137            // --- Array access -----------------------------------------------
138            ExprKind::ArrayAccess(aa) => self.analyze_array_access(aa, expr, ctx),
139
140            // --- isset / empty ----------------------------------------------
141            ExprKind::Isset(exprs) => {
142                for e in exprs.iter() {
143                    self.analyze(e, ctx);
144                }
145                Union::single(Atomic::TBool)
146            }
147            ExprKind::Empty(inner) => {
148                self.analyze(inner, ctx);
149                Union::single(Atomic::TBool)
150            }
151
152            // --- print ------------------------------------------------------
153            ExprKind::Print(inner) => {
154                let expr_ty = self.analyze(inner, ctx);
155                self.check_interpolation_implicit_to_string_cast(&expr_ty, inner.span);
156                Union::single(Atomic::TLiteralInt(1))
157            }
158
159            // --- clone ------------------------------------------------------
160            ExprKind::Clone(inner) => {
161                let ty = self.analyze(inner, ctx);
162                if ty.is_mixed() {
163                    self.emit(IssueKind::MixedClone, Severity::Info, expr.span);
164                }
165                ty
166            }
167            ExprKind::CloneWith(inner, _props) => {
168                let ty = self.analyze(inner, ctx);
169                if ty.is_mixed() {
170                    self.emit(IssueKind::MixedClone, Severity::Info, expr.span);
171                }
172                ty
173            }
174
175            // --- new ClassName(...) ----------------------------------------
176            ExprKind::New(n) => self.analyze_new(n, expr.span, ctx),
177
178            ExprKind::AnonymousClass(_) => Union::single(Atomic::TObject),
179
180            // --- Property access -------------------------------------------
181            ExprKind::PropertyAccess(pa) => self.analyze_property_access(pa, expr.span, ctx),
182
183            ExprKind::NullsafePropertyAccess(pa) => self.analyze_nullsafe_property_access(pa, ctx),
184
185            ExprKind::StaticPropertyAccess(spa) => self.analyze_static_property_access(spa),
186
187            ExprKind::ClassConstAccess(cca) => self.analyze_class_const_access(cca, expr.span),
188
189            ExprKind::ClassConstAccessDynamic { .. } => Union::mixed(),
190            ExprKind::StaticPropertyAccessDynamic { .. } => Union::mixed(),
191
192            // --- Method calls ----------------------------------------------
193            ExprKind::MethodCall(mc) => {
194                CallAnalyzer::analyze_method_call(self, mc, ctx, expr.span, false)
195            }
196
197            ExprKind::NullsafeMethodCall(mc) => {
198                CallAnalyzer::analyze_method_call(self, mc, ctx, expr.span, true)
199            }
200
201            ExprKind::StaticMethodCall(smc) => {
202                CallAnalyzer::analyze_static_method_call(self, smc, ctx, expr.span)
203            }
204
205            ExprKind::StaticDynMethodCall(smc) => {
206                CallAnalyzer::analyze_static_dyn_method_call(self, smc, ctx)
207            }
208
209            // --- Function calls --------------------------------------------
210            ExprKind::FunctionCall(fc) => {
211                CallAnalyzer::analyze_function_call(self, fc, ctx, expr.span)
212            }
213
214            // --- Closures / arrow functions --------------------------------
215            ExprKind::Closure(c) => self.analyze_closure(c, ctx),
216
217            ExprKind::ArrowFunction(af) => self.analyze_arrow_function(af, ctx),
218
219            ExprKind::CallableCreate(_) => Union::single(Atomic::TCallable {
220                params: None,
221                return_type: None,
222            }),
223
224            // --- Match expression ------------------------------------------
225            ExprKind::Match(m) => self.analyze_match(m, ctx),
226
227            // --- Throw as expression (PHP 8) --------------------------------
228            ExprKind::ThrowExpr(e) => {
229                self.analyze(e, ctx);
230                Union::single(Atomic::TNever)
231            }
232
233            // --- Yield -----------------------------------------------------
234            ExprKind::Yield(y) => self.analyze_yield(y, ctx),
235
236            // --- Magic constants -------------------------------------------
237            ExprKind::MagicConst(kind) => ExpressionAnalyzer::analyze_magic_const(kind),
238
239            // --- Include/require --------------------------------------------
240            ExprKind::Include(_, inner) => {
241                self.analyze(inner, ctx);
242                Union::mixed()
243            }
244
245            // --- Eval -------------------------------------------------------
246            ExprKind::Eval(inner) => {
247                self.analyze(inner, ctx);
248                Union::mixed()
249            }
250
251            // --- Exit -------------------------------------------------------
252            ExprKind::Exit(opt) => {
253                if let Some(e) = opt {
254                    self.analyze(e, ctx);
255                }
256                ctx.diverges = true;
257                Union::single(Atomic::TNever)
258            }
259
260            // --- Error node (parse error placeholder) ----------------------
261            ExprKind::Error => Union::mixed(),
262
263            // --- Omitted array slot (e.g. [, $b] destructuring) ------------
264            ExprKind::Omit => Union::single(Atomic::TNull),
265        }
266    }
267
268    // -----------------------------------------------------------------------
269    // Issue emission
270    // -----------------------------------------------------------------------
271
272    /// Convert a byte offset to a Unicode char-count column on a given line.
273    /// Returns (line, col) where col is a 0-based Unicode code-point count.
274    fn offset_to_line_col(&self, offset: u32) -> (u32, u16) {
275        let lc = self.source_map.offset_to_line_col(offset);
276        let line = lc.line + 1;
277
278        let byte_offset = offset as usize;
279        let line_start_byte = if byte_offset == 0 {
280            0
281        } else {
282            self.source[..byte_offset]
283                .rfind('\n')
284                .map(|p| p + 1)
285                .unwrap_or(0)
286        };
287
288        let col = self.source[line_start_byte..byte_offset].chars().count() as u16;
289
290        (line, col)
291    }
292
293    /// Convert an AST span to `(line, col_start, col_end)` for reference recording.
294    pub(crate) fn span_to_ref_loc(&self, span: php_ast::Span) -> (u32, u16, u16) {
295        let (line, col_start) = self.offset_to_line_col(span.start);
296        let end_off = (span.end as usize).min(self.source.len());
297        let end_line_start = self.source[..end_off]
298            .rfind('\n')
299            .map(|p| p + 1)
300            .unwrap_or(0);
301        let col_end = self.source[end_line_start..end_off].chars().count() as u16;
302        (line, col_start, col_end)
303    }
304
305    /// Walk a type hint and emit `UndefinedClass` for any named type not in the codebase.
306    fn check_type_hint(&mut self, hint: &php_ast::ast::TypeHint<'_, '_>) {
307        use php_ast::ast::TypeHintKind;
308        match &hint.kind {
309            TypeHintKind::Named(name) => {
310                let name_str = crate::parser::name_to_string(name);
311                if matches!(
312                    name_str.to_lowercase().as_str(),
313                    "self"
314                        | "static"
315                        | "parent"
316                        | "null"
317                        | "true"
318                        | "false"
319                        | "never"
320                        | "void"
321                        | "mixed"
322                        | "object"
323                        | "callable"
324                        | "iterable"
325                ) {
326                    return;
327                }
328                let resolved = crate::db::resolve_name_via_db(self.db, &self.file, &name_str);
329                if !crate::db::type_exists_via_db(self.db, &resolved) {
330                    self.emit(
331                        IssueKind::UndefinedClass { name: resolved },
332                        Severity::Error,
333                        hint.span,
334                    );
335                }
336            }
337            TypeHintKind::Nullable(inner) => self.check_type_hint(inner),
338            TypeHintKind::Union(parts) | TypeHintKind::Intersection(parts) => {
339                for part in parts.iter() {
340                    self.check_type_hint(part);
341                }
342            }
343            TypeHintKind::Keyword(_, _) => {}
344        }
345    }
346
347    pub fn emit(&mut self, kind: IssueKind, severity: Severity, span: php_ast::Span) {
348        let (line, col_start) = self.offset_to_line_col(span.start);
349
350        let (line_end, col_end) = if span.start < span.end {
351            let (end_line, end_col) = self.offset_to_line_col(span.end);
352            (end_line, end_col)
353        } else {
354            (line, col_start)
355        };
356
357        let mut issue = Issue::new(
358            kind,
359            Location {
360                file: self.file.clone(),
361                line,
362                line_end,
363                col_start,
364                col_end: col_end.max(col_start + 1),
365            },
366        );
367        issue.severity = severity;
368        // Store the source snippet for baseline matching.
369        if span.start < span.end {
370            let s = span.start as usize;
371            let e = (span.end as usize).min(self.source.len());
372            if let Some(text) = self.source.get(s..e) {
373                let trimmed = text.trim();
374                if !trimmed.is_empty() {
375                    issue.snippet = Some(trimmed.to_string());
376                }
377            }
378        }
379        self.issues.add(issue);
380    }
381
382    fn check_interpolation_implicit_to_string_cast(&mut self, ty: &Union, span: php_ast::Span) {
383        for atomic in &ty.types {
384            if let Atomic::TNamedObject { fqcn, .. } = atomic {
385                let fqcn_str = fqcn.as_ref();
386                if crate::db::lookup_method_in_chain(self.db, fqcn_str, "__toString").is_none()
387                    && !crate::db::extends_or_implements_via_db(self.db, fqcn_str, "Stringable")
388                {
389                    self.emit(
390                        IssueKind::ImplicitToStringCast {
391                            class: fqcn_str.to_string(),
392                        },
393                        Severity::Warning,
394                        span,
395                    );
396                }
397            }
398        }
399    }
400}
401
402// ---------------------------------------------------------------------------
403// Tests
404// ---------------------------------------------------------------------------
405
406#[cfg(test)]
407mod tests {
408    /// Helper to create a SourceMap from PHP source code
409    fn create_source_map(source: &str) -> php_rs_parser::source_map::SourceMap {
410        let bump = bumpalo::Bump::new();
411        let result = php_rs_parser::parse(&bump, source);
412        result.source_map
413    }
414
415    /// Helper to test offset_to_line_col conversion (Unicode char-count columns).
416    fn test_offset_conversion(source: &str, offset: u32) -> (u32, u16) {
417        let source_map = create_source_map(source);
418        let lc = source_map.offset_to_line_col(offset);
419        let line = lc.line + 1;
420
421        let byte_offset = offset as usize;
422        let line_start_byte = if byte_offset == 0 {
423            0
424        } else {
425            source[..byte_offset]
426                .rfind('\n')
427                .map(|p| p + 1)
428                .unwrap_or(0)
429        };
430
431        let col = source[line_start_byte..byte_offset].chars().count() as u16;
432
433        (line, col)
434    }
435
436    #[test]
437    fn col_conversion_simple_ascii() {
438        let source = "<?php\n$var = 123;";
439
440        // '$' on line 2, column 0
441        let (line, col) = test_offset_conversion(source, 6);
442        assert_eq!(line, 2);
443        assert_eq!(col, 0);
444
445        // 'v' on line 2, column 1
446        let (line, col) = test_offset_conversion(source, 7);
447        assert_eq!(line, 2);
448        assert_eq!(col, 1);
449    }
450
451    #[test]
452    fn col_conversion_different_lines() {
453        let source = "<?php\n$x = 1;\n$y = 2;";
454        // Line 1: <?php     (bytes 0-4, newline at 5)
455        // Line 2: $x = 1;  (bytes 6-12, newline at 13)
456        // Line 3: $y = 2;  (bytes 14-20)
457
458        let (line, col) = test_offset_conversion(source, 0);
459        assert_eq!((line, col), (1, 0));
460
461        let (line, col) = test_offset_conversion(source, 6);
462        assert_eq!((line, col), (2, 0));
463
464        let (line, col) = test_offset_conversion(source, 14);
465        assert_eq!((line, col), (3, 0));
466    }
467
468    #[test]
469    fn col_conversion_accented_characters() {
470        // é is 2 UTF-8 bytes but 1 Unicode char (and 1 UTF-16 unit — same result either way)
471        let source = "<?php\n$café = 1;";
472        // Line 2: $ c a f é ...
473        // bytes:  6 7 8 9 10(2 bytes)
474
475        // 'f' at byte 9 → char col 3
476        let (line, col) = test_offset_conversion(source, 9);
477        assert_eq!((line, col), (2, 3));
478
479        // 'é' at byte 10 → char col 4
480        let (line, col) = test_offset_conversion(source, 10);
481        assert_eq!((line, col), (2, 4));
482    }
483
484    #[test]
485    fn col_conversion_emoji_counts_as_one_char() {
486        // 🎉 (U+1F389) is 4 UTF-8 bytes and 2 UTF-16 units, but 1 Unicode char.
487        // A char after the emoji must land at col 7, not col 8.
488        let source = "<?php\n$y = \"🎉x\";";
489        // Line 2: $ y   =   " 🎉 x " ;
490        // chars:  0 1 2 3 4 5  6  7 8 9
491
492        let emoji_start = source.find("🎉").unwrap();
493        let after_emoji = emoji_start + "🎉".len(); // skip 4 bytes
494
495        // position at 'x' (right after the emoji)
496        let (line, col) = test_offset_conversion(source, after_emoji as u32);
497        assert_eq!(line, 2);
498        assert_eq!(col, 7); // emoji counts as 1, not 2
499    }
500
501    #[test]
502    fn col_conversion_emoji_start_position() {
503        // The opening quote is at col 5; the emoji immediately follows at col 6.
504        let source = "<?php\n$y = \"🎉\";";
505        // Line 2: $ y   =   " 🎉 " ;
506        // chars:  0 1 2 3 4 5  6  7 8
507
508        let quote_pos = source.find('"').unwrap();
509        let emoji_pos = quote_pos + 1; // byte after opening quote = emoji start
510
511        let (line, col) = test_offset_conversion(source, quote_pos as u32);
512        assert_eq!(line, 2);
513        assert_eq!(col, 5); // '"' is the 6th char on line 2 (0-based: col 5)
514
515        let (line, col) = test_offset_conversion(source, emoji_pos as u32);
516        assert_eq!(line, 2);
517        assert_eq!(col, 6); // emoji follows the quote
518    }
519
520    #[test]
521    fn col_end_minimum_width() {
522        // Ensure col_end is at least col_start + 1 (1 character minimum)
523        let col_start = 0u16;
524        let col_end = 0u16; // Would happen if span.start == span.end
525        let effective_col_end = col_end.max(col_start + 1);
526
527        assert_eq!(
528            effective_col_end, 1,
529            "col_end should be at least col_start + 1"
530        );
531    }
532
533    #[test]
534    fn col_conversion_multiline_span() {
535        // Test span that starts on one line and ends on another
536        let source = "<?php\n$x = [\n  'a',\n  'b'\n];";
537        //           Line 1: <?php
538        //           Line 2: $x = [
539        //           Line 3:   'a',
540        //           Line 4:   'b'
541        //           Line 5: ];
542
543        // Start of array bracket on line 2
544        let bracket_open = source.find('[').unwrap();
545        let (line_start, _col_start) = test_offset_conversion(source, bracket_open as u32);
546        assert_eq!(line_start, 2);
547
548        // End of array bracket on line 5
549        let bracket_close = source.rfind(']').unwrap();
550        let (line_end, col_end) = test_offset_conversion(source, bracket_close as u32);
551        assert_eq!(line_end, 5);
552        assert_eq!(col_end, 0); // ']' is at column 0 on line 5
553    }
554
555    #[test]
556    fn col_end_handles_emoji_in_span() {
557        // Test that col_end correctly handles emoji spanning
558        let source = "<?php\n$greeting = \"Hello 🎉\";";
559
560        // Find emoji position
561        let emoji_pos = source.find('🎉').unwrap();
562        let hello_pos = source.find("Hello").unwrap();
563
564        // Column at "Hello" on line 2
565        let (line, col) = test_offset_conversion(source, hello_pos as u32);
566        assert_eq!(line, 2);
567        assert_eq!(col, 13); // Position of 'H' after "$greeting = \""
568
569        // Column at emoji
570        let (line, col) = test_offset_conversion(source, emoji_pos as u32);
571        assert_eq!(line, 2);
572        // Should be after "Hello " (13 + 5 + 1 = 19 chars)
573        assert_eq!(col, 19);
574    }
575}