Skip to main content

mir_analyzer/expr/
mod.rs

1/// Expression analyzer — infers the `Union` type of any PHP expression.
2use std::sync::Arc;
3
4use php_ast::ast::ExprKind;
5
6use mir_issues::{Issue, IssueBuffer, IssueKind, Location, Severity};
7use mir_types::{Atomic, Union};
8
9use crate::call::CallAnalyzer;
10use crate::context::Context;
11use crate::db::MirDatabase;
12use crate::php_version::PhpVersion;
13use crate::symbol::{ResolvedSymbol, SymbolKind};
14
15mod arrays;
16mod assignment;
17mod binary;
18mod casts;
19mod closures;
20mod conditional;
21mod helpers;
22mod intrinsics;
23mod literals;
24mod objects;
25mod unary;
26mod variables;
27
28pub use helpers::{extract_destructure_vars, extract_simple_var, infer_arithmetic};
29
30// ---------------------------------------------------------------------------
31// ExpressionAnalyzer
32// ---------------------------------------------------------------------------
33
34pub struct ExpressionAnalyzer<'a> {
35    pub db: &'a dyn MirDatabase,
36    pub file: Arc<str>,
37    pub source: &'a str,
38    pub source_map: &'a php_rs_parser::source_map::SourceMap,
39    pub issues: &'a mut IssueBuffer,
40    pub symbols: &'a mut Vec<ResolvedSymbol>,
41    pub php_version: PhpVersion,
42    /// When true, skip all reference-tracking side-effects (used by the
43    /// inference priming pass so reference locations aren't double-counted).
44    pub inference_only: bool,
45}
46
47impl<'a> ExpressionAnalyzer<'a> {
48    #[allow(clippy::too_many_arguments)]
49    pub fn new(
50        db: &'a dyn MirDatabase,
51        file: Arc<str>,
52        source: &'a str,
53        source_map: &'a php_rs_parser::source_map::SourceMap,
54        issues: &'a mut IssueBuffer,
55        symbols: &'a mut Vec<ResolvedSymbol>,
56        php_version: PhpVersion,
57        inference_only: bool,
58    ) -> Self {
59        Self {
60            db,
61            file,
62            source,
63            source_map,
64            issues,
65            symbols,
66            php_version,
67            inference_only,
68        }
69    }
70
71    /// Record a resolved symbol.
72    pub fn record_symbol(&mut self, span: php_ast::Span, kind: SymbolKind, resolved_type: Union) {
73        self.symbols.push(ResolvedSymbol {
74            file: self.file.clone(),
75            span,
76            kind,
77            resolved_type,
78        });
79    }
80
81    pub fn analyze<'arena, 'src>(
82        &mut self,
83        expr: &php_ast::ast::Expr<'arena, 'src>,
84        ctx: &mut Context,
85    ) -> Union {
86        match &expr.kind {
87            // --- Literals ---------------------------------------------------
88            ExprKind::Int(_)
89            | ExprKind::Float(_)
90            | ExprKind::String(_)
91            | ExprKind::Bool(_)
92            | ExprKind::Null => literals::analyze(&expr.kind),
93
94            ExprKind::InterpolatedString(parts) | ExprKind::Heredoc { parts, .. } => {
95                for part in parts.iter() {
96                    if let php_ast::StringPart::Expr(e) = part {
97                        let expr_ty = self.analyze(e, ctx);
98                        self.check_interpolation_implicit_to_string_cast(&expr_ty, e.span);
99                    }
100                }
101                Union::single(Atomic::TString)
102            }
103            ExprKind::Nowdoc { .. } => Union::single(Atomic::TString),
104            ExprKind::ShellExec(_) => Union::single(Atomic::TString),
105
106            // --- Variables --------------------------------------------------
107            ExprKind::Variable(name) => self.analyze_variable(name, expr, ctx),
108            ExprKind::VariableVariable(inner) => self.analyze_variable_variable(inner, ctx),
109            ExprKind::Identifier(name) => self.analyze_identifier(name, expr, ctx),
110
111            // --- Assignment -------------------------------------------------
112            ExprKind::Assign(a) => self.analyze_assign(a, expr.span, ctx),
113
114            // --- Binary operations ------------------------------------------
115            ExprKind::Binary(b) => self.analyze_binary_expr(b, expr.span, ctx),
116
117            // --- Unary ------------------------------------------------------
118            ExprKind::UnaryPrefix(u) => self.analyze_unary_prefix(u, ctx),
119            ExprKind::UnaryPostfix(u) => self.analyze_unary_postfix(u, ctx),
120
121            // --- Ternary / null coalesce ------------------------------------
122            ExprKind::Ternary(t) => self.analyze_ternary(t, ctx),
123            ExprKind::NullCoalesce(nc) => self.analyze_null_coalesce(nc, ctx),
124
125            // --- Casts ------------------------------------------------------
126            ExprKind::Cast(kind, inner) => self.analyze_cast(kind, inner, ctx),
127
128            // --- Error suppression ------------------------------------------
129            ExprKind::ErrorSuppress(inner) => self.analyze(inner, ctx),
130
131            // --- Parenthesized ----------------------------------------------
132            ExprKind::Parenthesized(inner) => self.analyze(inner, ctx),
133
134            // --- Array literals ---------------------------------------------
135            ExprKind::Array(elements) => self.analyze_array(elements, ctx),
136
137            // --- Array access -----------------------------------------------
138            ExprKind::ArrayAccess(aa) => self.analyze_array_access(aa, expr, ctx),
139
140            // --- isset / empty ----------------------------------------------
141            ExprKind::Isset(exprs) => {
142                for e in exprs.iter() {
143                    self.analyze(e, ctx);
144                }
145                Union::single(Atomic::TBool)
146            }
147            ExprKind::Empty(inner) => {
148                self.analyze(inner, ctx);
149                Union::single(Atomic::TBool)
150            }
151
152            // --- print ------------------------------------------------------
153            ExprKind::Print(inner) => {
154                let expr_ty = self.analyze(inner, ctx);
155                self.check_interpolation_implicit_to_string_cast(&expr_ty, inner.span);
156                Union::single(Atomic::TLiteralInt(1))
157            }
158
159            // --- clone ------------------------------------------------------
160            ExprKind::Clone(inner) => self.analyze(inner, ctx),
161            ExprKind::CloneWith(inner, _props) => self.analyze(inner, ctx),
162
163            // --- new ClassName(...) ----------------------------------------
164            ExprKind::New(n) => self.analyze_new(n, expr.span, ctx),
165
166            ExprKind::AnonymousClass(_) => Union::single(Atomic::TObject),
167
168            // --- Property access -------------------------------------------
169            ExprKind::PropertyAccess(pa) => self.analyze_property_access(pa, expr.span, ctx),
170
171            ExprKind::NullsafePropertyAccess(pa) => self.analyze_nullsafe_property_access(pa, ctx),
172
173            ExprKind::StaticPropertyAccess(spa) => self.analyze_static_property_access(spa),
174
175            ExprKind::ClassConstAccess(cca) => self.analyze_class_const_access(cca, expr.span),
176
177            ExprKind::ClassConstAccessDynamic { .. } => Union::mixed(),
178            ExprKind::StaticPropertyAccessDynamic { .. } => Union::mixed(),
179
180            // --- Method calls ----------------------------------------------
181            ExprKind::MethodCall(mc) => {
182                CallAnalyzer::analyze_method_call(self, mc, ctx, expr.span, false)
183            }
184
185            ExprKind::NullsafeMethodCall(mc) => {
186                CallAnalyzer::analyze_method_call(self, mc, ctx, expr.span, true)
187            }
188
189            ExprKind::StaticMethodCall(smc) => {
190                CallAnalyzer::analyze_static_method_call(self, smc, ctx, expr.span)
191            }
192
193            ExprKind::StaticDynMethodCall(smc) => {
194                CallAnalyzer::analyze_static_dyn_method_call(self, smc, ctx)
195            }
196
197            // --- Function calls --------------------------------------------
198            ExprKind::FunctionCall(fc) => {
199                CallAnalyzer::analyze_function_call(self, fc, ctx, expr.span)
200            }
201
202            // --- Closures / arrow functions --------------------------------
203            ExprKind::Closure(c) => self.analyze_closure(c, ctx),
204
205            ExprKind::ArrowFunction(af) => self.analyze_arrow_function(af, ctx),
206
207            ExprKind::CallableCreate(_) => Union::single(Atomic::TCallable {
208                params: None,
209                return_type: None,
210            }),
211
212            // --- Match expression ------------------------------------------
213            ExprKind::Match(m) => self.analyze_match(m, ctx),
214
215            // --- Throw as expression (PHP 8) --------------------------------
216            ExprKind::ThrowExpr(e) => {
217                self.analyze(e, ctx);
218                Union::single(Atomic::TNever)
219            }
220
221            // --- Yield -----------------------------------------------------
222            ExprKind::Yield(y) => self.analyze_yield(y, ctx),
223
224            // --- Magic constants -------------------------------------------
225            ExprKind::MagicConst(kind) => ExpressionAnalyzer::analyze_magic_const(kind),
226
227            // --- Include/require --------------------------------------------
228            ExprKind::Include(_, inner) => {
229                self.analyze(inner, ctx);
230                Union::mixed()
231            }
232
233            // --- Eval -------------------------------------------------------
234            ExprKind::Eval(inner) => {
235                self.analyze(inner, ctx);
236                Union::mixed()
237            }
238
239            // --- Exit -------------------------------------------------------
240            ExprKind::Exit(opt) => {
241                if let Some(e) = opt {
242                    self.analyze(e, ctx);
243                }
244                ctx.diverges = true;
245                Union::single(Atomic::TNever)
246            }
247
248            // --- Error node (parse error placeholder) ----------------------
249            ExprKind::Error => Union::mixed(),
250
251            // --- Omitted array slot (e.g. [, $b] destructuring) ------------
252            ExprKind::Omit => Union::single(Atomic::TNull),
253        }
254    }
255
256    // -----------------------------------------------------------------------
257    // Issue emission
258    // -----------------------------------------------------------------------
259
260    /// Convert a byte offset to a Unicode char-count column on a given line.
261    /// Returns (line, col) where col is a 0-based Unicode code-point count.
262    fn offset_to_line_col(&self, offset: u32) -> (u32, u16) {
263        let lc = self.source_map.offset_to_line_col(offset);
264        let line = lc.line + 1;
265
266        let byte_offset = offset as usize;
267        let line_start_byte = if byte_offset == 0 {
268            0
269        } else {
270            self.source[..byte_offset]
271                .rfind('\n')
272                .map(|p| p + 1)
273                .unwrap_or(0)
274        };
275
276        let col = self.source[line_start_byte..byte_offset].chars().count() as u16;
277
278        (line, col)
279    }
280
281    /// Convert an AST span to `(line, col_start, col_end)` for reference recording.
282    pub(crate) fn span_to_ref_loc(&self, span: php_ast::Span) -> (u32, u16, u16) {
283        let (line, col_start) = self.offset_to_line_col(span.start);
284        let end_off = (span.end as usize).min(self.source.len());
285        let end_line_start = self.source[..end_off]
286            .rfind('\n')
287            .map(|p| p + 1)
288            .unwrap_or(0);
289        let col_end = self.source[end_line_start..end_off].chars().count() as u16;
290        (line, col_start, col_end)
291    }
292
293    /// Walk a type hint and emit `UndefinedClass` for any named type not in the codebase.
294    fn check_type_hint(&mut self, hint: &php_ast::ast::TypeHint<'_, '_>) {
295        use php_ast::ast::TypeHintKind;
296        match &hint.kind {
297            TypeHintKind::Named(name) => {
298                let name_str = crate::parser::name_to_string(name);
299                if matches!(
300                    name_str.to_lowercase().as_str(),
301                    "self"
302                        | "static"
303                        | "parent"
304                        | "null"
305                        | "true"
306                        | "false"
307                        | "never"
308                        | "void"
309                        | "mixed"
310                        | "object"
311                        | "callable"
312                        | "iterable"
313                ) {
314                    return;
315                }
316                let resolved = crate::db::resolve_name_via_db(self.db, &self.file, &name_str);
317                if !crate::db::type_exists_via_db(self.db, &resolved) {
318                    self.emit(
319                        IssueKind::UndefinedClass { name: resolved },
320                        Severity::Error,
321                        hint.span,
322                    );
323                }
324            }
325            TypeHintKind::Nullable(inner) => self.check_type_hint(inner),
326            TypeHintKind::Union(parts) | TypeHintKind::Intersection(parts) => {
327                for part in parts.iter() {
328                    self.check_type_hint(part);
329                }
330            }
331            TypeHintKind::Keyword(_, _) => {}
332        }
333    }
334
335    pub fn emit(&mut self, kind: IssueKind, severity: Severity, span: php_ast::Span) {
336        let (line, col_start) = self.offset_to_line_col(span.start);
337
338        let (line_end, col_end) = if span.start < span.end {
339            let (end_line, end_col) = self.offset_to_line_col(span.end);
340            (end_line, end_col)
341        } else {
342            (line, col_start)
343        };
344
345        let mut issue = Issue::new(
346            kind,
347            Location {
348                file: self.file.clone(),
349                line,
350                line_end,
351                col_start,
352                col_end: col_end.max(col_start + 1),
353            },
354        );
355        issue.severity = severity;
356        // Store the source snippet for baseline matching.
357        if span.start < span.end {
358            let s = span.start as usize;
359            let e = (span.end as usize).min(self.source.len());
360            if let Some(text) = self.source.get(s..e) {
361                let trimmed = text.trim();
362                if !trimmed.is_empty() {
363                    issue.snippet = Some(trimmed.to_string());
364                }
365            }
366        }
367        self.issues.add(issue);
368    }
369
370    fn check_interpolation_implicit_to_string_cast(&mut self, ty: &Union, span: php_ast::Span) {
371        for atomic in &ty.types {
372            if let Atomic::TNamedObject { fqcn, .. } = atomic {
373                let fqcn_str = fqcn.as_ref();
374                if crate::db::lookup_method_in_chain(self.db, fqcn_str, "__toString").is_none()
375                    && !crate::db::extends_or_implements_via_db(self.db, fqcn_str, "Stringable")
376                {
377                    self.emit(
378                        IssueKind::ImplicitToStringCast {
379                            class: fqcn_str.to_string(),
380                        },
381                        Severity::Warning,
382                        span,
383                    );
384                }
385            }
386        }
387    }
388}
389
390// ---------------------------------------------------------------------------
391// Tests
392// ---------------------------------------------------------------------------
393
394#[cfg(test)]
395mod tests {
396    /// Helper to create a SourceMap from PHP source code
397    fn create_source_map(source: &str) -> php_rs_parser::source_map::SourceMap {
398        let bump = bumpalo::Bump::new();
399        let result = php_rs_parser::parse(&bump, source);
400        result.source_map
401    }
402
403    /// Helper to test offset_to_line_col conversion (Unicode char-count columns).
404    fn test_offset_conversion(source: &str, offset: u32) -> (u32, u16) {
405        let source_map = create_source_map(source);
406        let lc = source_map.offset_to_line_col(offset);
407        let line = lc.line + 1;
408
409        let byte_offset = offset as usize;
410        let line_start_byte = if byte_offset == 0 {
411            0
412        } else {
413            source[..byte_offset]
414                .rfind('\n')
415                .map(|p| p + 1)
416                .unwrap_or(0)
417        };
418
419        let col = source[line_start_byte..byte_offset].chars().count() as u16;
420
421        (line, col)
422    }
423
424    #[test]
425    fn col_conversion_simple_ascii() {
426        let source = "<?php\n$var = 123;";
427
428        // '$' on line 2, column 0
429        let (line, col) = test_offset_conversion(source, 6);
430        assert_eq!(line, 2);
431        assert_eq!(col, 0);
432
433        // 'v' on line 2, column 1
434        let (line, col) = test_offset_conversion(source, 7);
435        assert_eq!(line, 2);
436        assert_eq!(col, 1);
437    }
438
439    #[test]
440    fn col_conversion_different_lines() {
441        let source = "<?php\n$x = 1;\n$y = 2;";
442        // Line 1: <?php     (bytes 0-4, newline at 5)
443        // Line 2: $x = 1;  (bytes 6-12, newline at 13)
444        // Line 3: $y = 2;  (bytes 14-20)
445
446        let (line, col) = test_offset_conversion(source, 0);
447        assert_eq!((line, col), (1, 0));
448
449        let (line, col) = test_offset_conversion(source, 6);
450        assert_eq!((line, col), (2, 0));
451
452        let (line, col) = test_offset_conversion(source, 14);
453        assert_eq!((line, col), (3, 0));
454    }
455
456    #[test]
457    fn col_conversion_accented_characters() {
458        // é is 2 UTF-8 bytes but 1 Unicode char (and 1 UTF-16 unit — same result either way)
459        let source = "<?php\n$café = 1;";
460        // Line 2: $ c a f é ...
461        // bytes:  6 7 8 9 10(2 bytes)
462
463        // 'f' at byte 9 → char col 3
464        let (line, col) = test_offset_conversion(source, 9);
465        assert_eq!((line, col), (2, 3));
466
467        // 'é' at byte 10 → char col 4
468        let (line, col) = test_offset_conversion(source, 10);
469        assert_eq!((line, col), (2, 4));
470    }
471
472    #[test]
473    fn col_conversion_emoji_counts_as_one_char() {
474        // 🎉 (U+1F389) is 4 UTF-8 bytes and 2 UTF-16 units, but 1 Unicode char.
475        // A char after the emoji must land at col 7, not col 8.
476        let source = "<?php\n$y = \"🎉x\";";
477        // Line 2: $ y   =   " 🎉 x " ;
478        // chars:  0 1 2 3 4 5  6  7 8 9
479
480        let emoji_start = source.find("🎉").unwrap();
481        let after_emoji = emoji_start + "🎉".len(); // skip 4 bytes
482
483        // position at 'x' (right after the emoji)
484        let (line, col) = test_offset_conversion(source, after_emoji as u32);
485        assert_eq!(line, 2);
486        assert_eq!(col, 7); // emoji counts as 1, not 2
487    }
488
489    #[test]
490    fn col_conversion_emoji_start_position() {
491        // The opening quote is at col 5; the emoji immediately follows at col 6.
492        let source = "<?php\n$y = \"🎉\";";
493        // Line 2: $ y   =   " 🎉 " ;
494        // chars:  0 1 2 3 4 5  6  7 8
495
496        let quote_pos = source.find('"').unwrap();
497        let emoji_pos = quote_pos + 1; // byte after opening quote = emoji start
498
499        let (line, col) = test_offset_conversion(source, quote_pos as u32);
500        assert_eq!(line, 2);
501        assert_eq!(col, 5); // '"' is the 6th char on line 2 (0-based: col 5)
502
503        let (line, col) = test_offset_conversion(source, emoji_pos as u32);
504        assert_eq!(line, 2);
505        assert_eq!(col, 6); // emoji follows the quote
506    }
507
508    #[test]
509    fn col_end_minimum_width() {
510        // Ensure col_end is at least col_start + 1 (1 character minimum)
511        let col_start = 0u16;
512        let col_end = 0u16; // Would happen if span.start == span.end
513        let effective_col_end = col_end.max(col_start + 1);
514
515        assert_eq!(
516            effective_col_end, 1,
517            "col_end should be at least col_start + 1"
518        );
519    }
520
521    #[test]
522    fn col_conversion_multiline_span() {
523        // Test span that starts on one line and ends on another
524        let source = "<?php\n$x = [\n  'a',\n  'b'\n];";
525        //           Line 1: <?php
526        //           Line 2: $x = [
527        //           Line 3:   'a',
528        //           Line 4:   'b'
529        //           Line 5: ];
530
531        // Start of array bracket on line 2
532        let bracket_open = source.find('[').unwrap();
533        let (line_start, _col_start) = test_offset_conversion(source, bracket_open as u32);
534        assert_eq!(line_start, 2);
535
536        // End of array bracket on line 5
537        let bracket_close = source.rfind(']').unwrap();
538        let (line_end, col_end) = test_offset_conversion(source, bracket_close as u32);
539        assert_eq!(line_end, 5);
540        assert_eq!(col_end, 0); // ']' is at column 0 on line 5
541    }
542
543    #[test]
544    fn col_end_handles_emoji_in_span() {
545        // Test that col_end correctly handles emoji spanning
546        let source = "<?php\n$greeting = \"Hello 🎉\";";
547
548        // Find emoji position
549        let emoji_pos = source.find('🎉').unwrap();
550        let hello_pos = source.find("Hello").unwrap();
551
552        // Column at "Hello" on line 2
553        let (line, col) = test_offset_conversion(source, hello_pos as u32);
554        assert_eq!(line, 2);
555        assert_eq!(col, 13); // Position of 'H' after "$greeting = \""
556
557        // Column at emoji
558        let (line, col) = test_offset_conversion(source, emoji_pos as u32);
559        assert_eq!(line, 2);
560        // Should be after "Hello " (13 + 5 + 1 = 19 chars)
561        assert_eq!(col, 19);
562    }
563}