Skip to main content

mir_analyzer/
expr.rs

1/// Expression analyzer — infers the `Union` type of any PHP expression.
2use std::sync::Arc;
3
4use php_ast::ast::{
5    AssignOp, BinaryOp, CastKind, ExprKind, MagicConstKind, UnaryPostfixOp, UnaryPrefixOp,
6};
7
8use mir_codebase::Codebase;
9use mir_issues::{Issue, IssueBuffer, IssueKind, Location, Severity};
10use mir_types::{Atomic, Union};
11
12use crate::call::CallAnalyzer;
13use crate::context::Context;
14use crate::symbol::{ResolvedSymbol, SymbolKind};
15
16// ---------------------------------------------------------------------------
17// ExpressionAnalyzer
18// ---------------------------------------------------------------------------
19
20pub struct ExpressionAnalyzer<'a> {
21    pub codebase: &'a Codebase,
22    pub file: Arc<str>,
23    pub source: &'a str,
24    pub source_map: &'a php_rs_parser::source_map::SourceMap,
25    pub issues: &'a mut IssueBuffer,
26    pub symbols: &'a mut Vec<ResolvedSymbol>,
27}
28
29impl<'a> ExpressionAnalyzer<'a> {
30    pub fn new(
31        codebase: &'a Codebase,
32        file: Arc<str>,
33        source: &'a str,
34        source_map: &'a php_rs_parser::source_map::SourceMap,
35        issues: &'a mut IssueBuffer,
36        symbols: &'a mut Vec<ResolvedSymbol>,
37    ) -> Self {
38        Self {
39            codebase,
40            file,
41            source,
42            source_map,
43            issues,
44            symbols,
45        }
46    }
47
48    /// Record a resolved symbol.
49    pub fn record_symbol(&mut self, span: php_ast::Span, kind: SymbolKind, resolved_type: Union) {
50        self.symbols.push(ResolvedSymbol {
51            span,
52            kind,
53            resolved_type,
54        });
55    }
56
57    pub fn analyze<'arena, 'src>(
58        &mut self,
59        expr: &php_ast::ast::Expr<'arena, 'src>,
60        ctx: &mut Context,
61    ) -> Union {
62        match &expr.kind {
63            // --- Literals ---------------------------------------------------
64            ExprKind::Int(n) => Union::single(Atomic::TLiteralInt(*n)),
65            ExprKind::Float(f) => {
66                let bits = f.to_bits();
67                Union::single(Atomic::TLiteralFloat(
68                    (bits >> 32) as i64,
69                    (bits & 0xFFFF_FFFF) as i64,
70                ))
71            }
72            ExprKind::String(s) => Union::single(Atomic::TLiteralString((*s).into())),
73            ExprKind::Bool(b) => {
74                if *b {
75                    Union::single(Atomic::TTrue)
76                } else {
77                    Union::single(Atomic::TFalse)
78                }
79            }
80            ExprKind::Null => Union::single(Atomic::TNull),
81
82            // Interpolated strings always produce TString
83            ExprKind::InterpolatedString(parts) | ExprKind::Heredoc { parts, .. } => {
84                for part in parts.iter() {
85                    if let php_ast::StringPart::Expr(e) = part {
86                        self.analyze(e, ctx);
87                    }
88                }
89                Union::single(Atomic::TString)
90            }
91
92            ExprKind::Nowdoc { .. } => Union::single(Atomic::TString),
93            ExprKind::ShellExec(_) => Union::single(Atomic::TString),
94
95            // --- Variables --------------------------------------------------
96            ExprKind::Variable(name) => {
97                let name_str = name.as_str().trim_start_matches('$');
98                if !ctx.var_is_defined(name_str) {
99                    if ctx.var_possibly_defined(name_str) {
100                        self.emit(
101                            IssueKind::PossiblyUndefinedVariable {
102                                name: name_str.to_string(),
103                            },
104                            Severity::Info,
105                            expr.span,
106                        );
107                    } else if name_str != "this" {
108                        self.emit(
109                            IssueKind::UndefinedVariable {
110                                name: name_str.to_string(),
111                            },
112                            Severity::Error,
113                            expr.span,
114                        );
115                    }
116                }
117                ctx.read_vars.insert(name_str.to_string());
118                let ty = ctx.get_var(name_str);
119                self.record_symbol(
120                    expr.span,
121                    SymbolKind::Variable(name_str.to_string()),
122                    ty.clone(),
123                );
124                ty
125            }
126
127            ExprKind::VariableVariable(_) => Union::mixed(), // $$x — unknowable
128
129            ExprKind::Identifier(_name) => {
130                // Bare identifier used as value (e.g. class constant, global const)
131                Union::mixed()
132            }
133
134            // --- Assignment -------------------------------------------------
135            ExprKind::Assign(a) => {
136                let rhs_tainted = crate::taint::is_expr_tainted(a.value, ctx);
137                let rhs_ty = self.analyze(a.value, ctx);
138                match a.op {
139                    AssignOp::Assign => {
140                        self.assign_to_target(a.target, rhs_ty.clone(), ctx, expr.span);
141                        // Propagate taint: if RHS is tainted, taint LHS variable (M19)
142                        if rhs_tainted {
143                            if let ExprKind::Variable(name) = &a.target.kind {
144                                ctx.taint_var(name.as_ref());
145                            }
146                        }
147                        rhs_ty
148                    }
149                    AssignOp::Concat => {
150                        // .= always produces string
151                        if let Some(var_name) = extract_simple_var(a.target) {
152                            ctx.set_var(&var_name, Union::single(Atomic::TString));
153                        }
154                        Union::single(Atomic::TString)
155                    }
156                    AssignOp::Plus
157                    | AssignOp::Minus
158                    | AssignOp::Mul
159                    | AssignOp::Div
160                    | AssignOp::Mod
161                    | AssignOp::Pow => {
162                        let lhs_ty = self.analyze(a.target, ctx);
163                        let result_ty = infer_arithmetic(&lhs_ty, &rhs_ty);
164                        if let Some(var_name) = extract_simple_var(a.target) {
165                            ctx.set_var(&var_name, result_ty.clone());
166                        }
167                        result_ty
168                    }
169                    AssignOp::Coalesce => {
170                        // ??= — assign only if null
171                        let lhs_ty = self.analyze(a.target, ctx);
172                        let merged = Union::merge(&lhs_ty.remove_null(), &rhs_ty);
173                        if let Some(var_name) = extract_simple_var(a.target) {
174                            ctx.set_var(&var_name, merged.clone());
175                        }
176                        merged
177                    }
178                    _ => {
179                        if let Some(var_name) = extract_simple_var(a.target) {
180                            ctx.set_var(&var_name, Union::mixed());
181                        }
182                        Union::mixed()
183                    }
184                }
185            }
186
187            // --- Binary operations ------------------------------------------
188            ExprKind::Binary(b) => self.analyze_binary(b, expr.span, ctx),
189
190            // --- Unary ------------------------------------------------------
191            ExprKind::UnaryPrefix(u) => {
192                let operand_ty = self.analyze(u.operand, ctx);
193                match u.op {
194                    UnaryPrefixOp::BooleanNot => Union::single(Atomic::TBool),
195                    UnaryPrefixOp::Negate => {
196                        if operand_ty.contains(|t| t.is_int()) {
197                            Union::single(Atomic::TInt)
198                        } else {
199                            Union::single(Atomic::TFloat)
200                        }
201                    }
202                    UnaryPrefixOp::Plus => operand_ty,
203                    UnaryPrefixOp::BitwiseNot => Union::single(Atomic::TInt),
204                    UnaryPrefixOp::PreIncrement | UnaryPrefixOp::PreDecrement => {
205                        // ++$x / --$x: increment and return new value
206                        if let Some(var_name) = extract_simple_var(u.operand) {
207                            let ty = ctx.get_var(&var_name);
208                            let new_ty = if ty.contains(|t| {
209                                matches!(t, Atomic::TFloat | Atomic::TLiteralFloat(..))
210                            }) {
211                                Union::single(Atomic::TFloat)
212                            } else {
213                                Union::single(Atomic::TInt)
214                            };
215                            ctx.set_var(&var_name, new_ty.clone());
216                            new_ty
217                        } else {
218                            Union::single(Atomic::TInt)
219                        }
220                    }
221                }
222            }
223
224            ExprKind::UnaryPostfix(u) => {
225                let operand_ty = self.analyze(u.operand, ctx);
226                // $x++ / $x-- returns original value, but mutates variable
227                match u.op {
228                    UnaryPostfixOp::PostIncrement | UnaryPostfixOp::PostDecrement => {
229                        if let Some(var_name) = extract_simple_var(u.operand) {
230                            let new_ty = if operand_ty.contains(|t| {
231                                matches!(t, Atomic::TFloat | Atomic::TLiteralFloat(..))
232                            }) {
233                                Union::single(Atomic::TFloat)
234                            } else {
235                                Union::single(Atomic::TInt)
236                            };
237                            ctx.set_var(&var_name, new_ty);
238                        }
239                        operand_ty // returns original value
240                    }
241                }
242            }
243
244            // --- Ternary / null coalesce ------------------------------------
245            ExprKind::Ternary(t) => {
246                let cond_ty = self.analyze(t.condition, ctx);
247                match &t.then_expr {
248                    Some(then_expr) => {
249                        let mut then_ctx = ctx.fork();
250                        crate::narrowing::narrow_from_condition(
251                            t.condition,
252                            &mut then_ctx,
253                            true,
254                            self.codebase,
255                            &self.file,
256                        );
257                        let then_ty =
258                            self.with_ctx(&mut then_ctx, |ea, c| ea.analyze(then_expr, c));
259
260                        let mut else_ctx = ctx.fork();
261                        crate::narrowing::narrow_from_condition(
262                            t.condition,
263                            &mut else_ctx,
264                            false,
265                            self.codebase,
266                            &self.file,
267                        );
268                        let else_ty =
269                            self.with_ctx(&mut else_ctx, |ea, c| ea.analyze(t.else_expr, c));
270
271                        // Propagate variable reads from both branches
272                        for name in then_ctx.read_vars.iter().chain(else_ctx.read_vars.iter()) {
273                            ctx.read_vars.insert(name.clone());
274                        }
275
276                        Union::merge(&then_ty, &else_ty)
277                    }
278                    None => {
279                        // $x ?: $y — short ternary: if $x truthy, return $x; else return $y
280                        let else_ty = self.analyze(t.else_expr, ctx);
281                        let truthy_ty = cond_ty.narrow_to_truthy();
282                        if truthy_ty.is_empty() {
283                            else_ty
284                        } else {
285                            Union::merge(&truthy_ty, &else_ty)
286                        }
287                    }
288                }
289            }
290
291            ExprKind::NullCoalesce(nc) => {
292                let left_ty = self.analyze(nc.left, ctx);
293                let right_ty = self.analyze(nc.right, ctx);
294                // result = remove_null(left) | right
295                let non_null_left = left_ty.remove_null();
296                if non_null_left.is_empty() {
297                    right_ty
298                } else {
299                    Union::merge(&non_null_left, &right_ty)
300                }
301            }
302
303            // --- Casts ------------------------------------------------------
304            ExprKind::Cast(kind, inner) => {
305                let _inner_ty = self.analyze(inner, ctx);
306                match kind {
307                    CastKind::Int => Union::single(Atomic::TInt),
308                    CastKind::Float => Union::single(Atomic::TFloat),
309                    CastKind::String => Union::single(Atomic::TString),
310                    CastKind::Bool => Union::single(Atomic::TBool),
311                    CastKind::Array => Union::single(Atomic::TArray {
312                        key: Box::new(Union::single(Atomic::TMixed)),
313                        value: Box::new(Union::mixed()),
314                    }),
315                    CastKind::Object => Union::single(Atomic::TObject),
316                    CastKind::Unset | CastKind::Void => Union::single(Atomic::TNull),
317                }
318            }
319
320            // --- Error suppression ------------------------------------------
321            ExprKind::ErrorSuppress(inner) => self.analyze(inner, ctx),
322
323            // --- Parenthesized ----------------------------------------------
324            ExprKind::Parenthesized(inner) => self.analyze(inner, ctx),
325
326            // --- Array literals ---------------------------------------------
327            ExprKind::Array(elements) => {
328                use mir_types::atomic::{ArrayKey, KeyedProperty};
329
330                if elements.is_empty() {
331                    return Union::single(Atomic::TKeyedArray {
332                        properties: indexmap::IndexMap::new(),
333                        is_open: false,
334                        is_list: true,
335                    });
336                }
337
338                // Try to build a TKeyedArray when all keys are literal strings/ints
339                // (or no keys — pure list). Fall back to TArray on spread or dynamic keys.
340                let mut keyed_props: indexmap::IndexMap<ArrayKey, KeyedProperty> =
341                    indexmap::IndexMap::new();
342                let mut is_list = true;
343                let mut can_be_keyed = true;
344                let mut next_int_key: i64 = 0;
345
346                for elem in elements.iter() {
347                    if elem.unpack {
348                        self.analyze(&elem.value, ctx);
349                        can_be_keyed = false;
350                        break;
351                    }
352                    let value_ty = self.analyze(&elem.value, ctx);
353                    let array_key = if let Some(key_expr) = &elem.key {
354                        is_list = false;
355                        let key_ty = self.analyze(key_expr, ctx);
356                        // Only build keyed array if key is a string or int literal
357                        match key_ty.types.as_slice() {
358                            [Atomic::TLiteralString(s)] => ArrayKey::String(s.clone()),
359                            [Atomic::TLiteralInt(i)] => {
360                                next_int_key = *i + 1;
361                                ArrayKey::Int(*i)
362                            }
363                            _ => {
364                                can_be_keyed = false;
365                                break;
366                            }
367                        }
368                    } else {
369                        let k = ArrayKey::Int(next_int_key);
370                        next_int_key += 1;
371                        k
372                    };
373                    keyed_props.insert(
374                        array_key,
375                        KeyedProperty {
376                            ty: value_ty,
377                            optional: false,
378                        },
379                    );
380                }
381
382                if can_be_keyed {
383                    return Union::single(Atomic::TKeyedArray {
384                        properties: keyed_props,
385                        is_open: false,
386                        is_list,
387                    });
388                }
389
390                // Fallback: generic TArray — re-evaluate elements to build merged types
391                let mut all_value_types = Union::empty();
392                let mut key_union = Union::empty();
393                let mut has_unpack = false;
394                for elem in elements.iter() {
395                    let value_ty = self.analyze(&elem.value, ctx);
396                    if elem.unpack {
397                        has_unpack = true;
398                    } else {
399                        all_value_types = Union::merge(&all_value_types, &value_ty);
400                        if let Some(key_expr) = &elem.key {
401                            let key_ty = self.analyze(key_expr, ctx);
402                            key_union = Union::merge(&key_union, &key_ty);
403                        } else {
404                            key_union.add_type(Atomic::TInt);
405                        }
406                    }
407                }
408                if has_unpack {
409                    return Union::single(Atomic::TArray {
410                        key: Box::new(Union::single(Atomic::TMixed)),
411                        value: Box::new(Union::mixed()),
412                    });
413                }
414                if key_union.is_empty() {
415                    key_union.add_type(Atomic::TInt);
416                }
417                Union::single(Atomic::TArray {
418                    key: Box::new(key_union),
419                    value: Box::new(all_value_types),
420                })
421            }
422
423            // --- Array access -----------------------------------------------
424            ExprKind::ArrayAccess(aa) => {
425                let arr_ty = self.analyze(aa.array, ctx);
426
427                // Analyze the index expression for variable read tracking
428                if let Some(idx) = &aa.index {
429                    self.analyze(idx, ctx);
430                }
431
432                // Check for null access
433                if arr_ty.contains(|t| matches!(t, Atomic::TNull)) && arr_ty.is_single() {
434                    self.emit(IssueKind::NullArrayAccess, Severity::Error, expr.span);
435                    return Union::mixed();
436                }
437                if arr_ty.is_nullable() {
438                    self.emit(
439                        IssueKind::PossiblyNullArrayAccess,
440                        Severity::Info,
441                        expr.span,
442                    );
443                }
444
445                // Determine the key being accessed (if it's a literal)
446                let literal_key: Option<mir_types::atomic::ArrayKey> =
447                    aa.index.as_ref().and_then(|idx| match &idx.kind {
448                        ExprKind::String(s) => {
449                            Some(mir_types::atomic::ArrayKey::String(Arc::from(&**s)))
450                        }
451                        ExprKind::Int(i) => Some(mir_types::atomic::ArrayKey::Int(*i)),
452                        _ => None,
453                    });
454
455                // Infer element type
456                for atomic in &arr_ty.types {
457                    match atomic {
458                        Atomic::TKeyedArray { properties, .. } => {
459                            // If we know the key, look it up precisely
460                            if let Some(ref key) = literal_key {
461                                if let Some(prop) = properties.get(key) {
462                                    return prop.ty.clone();
463                                }
464                            }
465                            // Unknown key — return union of all value types
466                            let mut result = Union::empty();
467                            for prop in properties.values() {
468                                result = Union::merge(&result, &prop.ty);
469                            }
470                            return if result.types.is_empty() {
471                                Union::mixed()
472                            } else {
473                                result
474                            };
475                        }
476                        Atomic::TArray { value, .. } | Atomic::TNonEmptyArray { value, .. } => {
477                            return *value.clone();
478                        }
479                        Atomic::TList { value } | Atomic::TNonEmptyList { value } => {
480                            return *value.clone();
481                        }
482                        Atomic::TString | Atomic::TLiteralString(_) => {
483                            return Union::single(Atomic::TString);
484                        }
485                        _ => {}
486                    }
487                }
488                Union::mixed()
489            }
490
491            // --- isset / empty ----------------------------------------------
492            ExprKind::Isset(exprs) => {
493                for e in exprs.iter() {
494                    self.analyze(e, ctx);
495                }
496                Union::single(Atomic::TBool)
497            }
498            ExprKind::Empty(inner) => {
499                self.analyze(inner, ctx);
500                Union::single(Atomic::TBool)
501            }
502
503            // --- print ------------------------------------------------------
504            ExprKind::Print(inner) => {
505                self.analyze(inner, ctx);
506                Union::single(Atomic::TLiteralInt(1))
507            }
508
509            // --- clone ------------------------------------------------------
510            ExprKind::Clone(inner) => self.analyze(inner, ctx),
511            ExprKind::CloneWith(inner, _props) => self.analyze(inner, ctx),
512
513            // --- new ClassName(...) ----------------------------------------
514            ExprKind::New(n) => {
515                // Evaluate args first (needed for taint / type check)
516                let arg_types: Vec<Union> = n
517                    .args
518                    .iter()
519                    .map(|a| {
520                        let ty = self.analyze(&a.value, ctx);
521                        if a.unpack {
522                            crate::call::spread_element_type(&ty)
523                        } else {
524                            ty
525                        }
526                    })
527                    .collect();
528                let arg_spans: Vec<php_ast::Span> = n.args.iter().map(|a| a.span).collect();
529                let arg_names: Vec<Option<String>> = n
530                    .args
531                    .iter()
532                    .map(|a| a.name.as_ref().map(|nm| nm.to_string()))
533                    .collect();
534
535                let class_ty = match &n.class.kind {
536                    ExprKind::Identifier(name) => {
537                        let resolved = self.codebase.resolve_class_name(&self.file, name.as_ref());
538                        // `self`, `static`, `parent` resolve to the current class — use ctx
539                        let fqcn: Arc<str> = match resolved.as_str() {
540                            "self" | "static" => ctx
541                                .self_fqcn
542                                .clone()
543                                .or_else(|| ctx.static_fqcn.clone())
544                                .unwrap_or_else(|| Arc::from(resolved.as_str())),
545                            "parent" => ctx
546                                .parent_fqcn
547                                .clone()
548                                .unwrap_or_else(|| Arc::from(resolved.as_str())),
549                            _ => Arc::from(resolved.as_str()),
550                        };
551                        if !matches!(resolved.as_str(), "self" | "static" | "parent")
552                            && !self.codebase.type_exists(&fqcn)
553                        {
554                            self.emit(
555                                IssueKind::UndefinedClass {
556                                    name: resolved.clone(),
557                                },
558                                Severity::Error,
559                                n.class.span,
560                            );
561                        } else if self.codebase.type_exists(&fqcn) {
562                            // Check constructor arguments
563                            if let Some(ctor) = self.codebase.get_method(&fqcn, "__construct") {
564                                crate::call::check_constructor_args(
565                                    self,
566                                    &fqcn,
567                                    crate::call::CheckArgsParams {
568                                        fn_name: "__construct",
569                                        params: &ctor.params,
570                                        arg_types: &arg_types,
571                                        arg_spans: &arg_spans,
572                                        arg_names: &arg_names,
573                                        call_span: expr.span,
574                                        has_spread: n.args.iter().any(|a| a.unpack),
575                                    },
576                                );
577                            }
578                        }
579                        let ty = Union::single(Atomic::TNamedObject {
580                            fqcn: fqcn.clone(),
581                            type_params: vec![],
582                        });
583                        self.record_symbol(
584                            n.class.span,
585                            SymbolKind::ClassReference(fqcn),
586                            ty.clone(),
587                        );
588                        ty
589                    }
590                    _ => {
591                        self.analyze(n.class, ctx);
592                        Union::single(Atomic::TObject)
593                    }
594                };
595                class_ty
596            }
597
598            ExprKind::AnonymousClass(_) => Union::single(Atomic::TObject),
599
600            // --- Property access -------------------------------------------
601            ExprKind::PropertyAccess(pa) => {
602                let obj_ty = self.analyze(pa.object, ctx);
603                let prop_name = extract_string_from_expr(pa.property)
604                    .unwrap_or_else(|| "<dynamic>".to_string());
605
606                if obj_ty.contains(|t| matches!(t, Atomic::TNull)) && obj_ty.is_single() {
607                    self.emit(
608                        IssueKind::NullPropertyFetch {
609                            property: prop_name.clone(),
610                        },
611                        Severity::Error,
612                        expr.span,
613                    );
614                    return Union::mixed();
615                }
616                if obj_ty.is_nullable() {
617                    self.emit(
618                        IssueKind::PossiblyNullPropertyFetch {
619                            property: prop_name.clone(),
620                        },
621                        Severity::Info,
622                        expr.span,
623                    );
624                }
625
626                // Dynamic property access ($obj->$varName) — can't resolve statically.
627                if prop_name == "<dynamic>" {
628                    return Union::mixed();
629                }
630                let resolved = self.resolve_property_type(&obj_ty, &prop_name, expr.span);
631                // Record property access symbol for each named object in the receiver type
632                for atomic in &obj_ty.types {
633                    if let Atomic::TNamedObject { fqcn, .. } = atomic {
634                        self.record_symbol(
635                            expr.span,
636                            SymbolKind::PropertyAccess {
637                                class: fqcn.clone(),
638                                property: Arc::from(prop_name.as_str()),
639                            },
640                            resolved.clone(),
641                        );
642                        break;
643                    }
644                }
645                resolved
646            }
647
648            ExprKind::NullsafePropertyAccess(pa) => {
649                let obj_ty = self.analyze(pa.object, ctx);
650                let prop_name = extract_string_from_expr(pa.property)
651                    .unwrap_or_else(|| "<dynamic>".to_string());
652                if prop_name == "<dynamic>" {
653                    return Union::mixed();
654                }
655                // ?-> strips null from receiver
656                let non_null_ty = obj_ty.remove_null();
657                let mut prop_ty = self.resolve_property_type(&non_null_ty, &prop_name, expr.span);
658                prop_ty.add_type(Atomic::TNull); // result is nullable because receiver may be null
659                prop_ty
660            }
661
662            ExprKind::StaticPropertyAccess(_spa) => {
663                // Class::$prop
664                Union::mixed()
665            }
666
667            ExprKind::ClassConstAccess(cca) => {
668                // Foo::CONST or Foo::class
669                if cca.member.as_ref() == "class" {
670                    // Resolve the class name so Foo::class gives the correct FQCN string
671                    let fqcn = if let ExprKind::Identifier(id) = &cca.class.kind {
672                        let resolved = self.codebase.resolve_class_name(&self.file, id.as_ref());
673                        Some(Arc::from(resolved.as_str()))
674                    } else {
675                        None
676                    };
677                    return Union::single(Atomic::TClassString(fqcn));
678                }
679                Union::mixed()
680            }
681
682            ExprKind::ClassConstAccessDynamic { .. } => Union::mixed(),
683            ExprKind::StaticPropertyAccessDynamic { .. } => Union::mixed(),
684
685            // --- Method calls ----------------------------------------------
686            ExprKind::MethodCall(mc) => {
687                CallAnalyzer::analyze_method_call(self, mc, ctx, expr.span, false)
688            }
689
690            ExprKind::NullsafeMethodCall(mc) => {
691                CallAnalyzer::analyze_method_call(self, mc, ctx, expr.span, true)
692            }
693
694            ExprKind::StaticMethodCall(smc) => {
695                CallAnalyzer::analyze_static_method_call(self, smc, ctx, expr.span)
696            }
697
698            // --- Function calls --------------------------------------------
699            ExprKind::FunctionCall(fc) => {
700                CallAnalyzer::analyze_function_call(self, fc, ctx, expr.span)
701            }
702
703            // --- Closures / arrow functions --------------------------------
704            ExprKind::Closure(c) => {
705                let params = ast_params_to_fn_params_resolved(
706                    &c.params,
707                    ctx.self_fqcn.as_deref(),
708                    self.codebase,
709                    &self.file,
710                );
711                let return_ty_hint = c
712                    .return_type
713                    .as_ref()
714                    .map(|h| crate::parser::type_from_hint(h, ctx.self_fqcn.as_deref()))
715                    .map(|u| resolve_named_objects_in_union(u, self.codebase, &self.file));
716
717                // Build closure context — capture declared use-vars from outer scope
718                // Note: is_static only prevents $this binding; self_fqcn is still accessible
719                // for resolving `self::` references and private/protected visibility checks.
720                let mut closure_ctx = crate::context::Context::for_function(
721                    &params,
722                    return_ty_hint.clone(),
723                    ctx.self_fqcn.clone(),
724                    ctx.parent_fqcn.clone(),
725                    ctx.static_fqcn.clone(),
726                    ctx.strict_types,
727                );
728                for use_var in c.use_vars.iter() {
729                    let name = use_var.name.trim_start_matches('$');
730                    closure_ctx.set_var(name, ctx.get_var(name));
731                    if ctx.is_tainted(name) {
732                        closure_ctx.taint_var(name);
733                    }
734                }
735
736                // Analyze closure body, collecting issues into the same buffer
737                let inferred_return = {
738                    let mut sa = crate::stmt::StatementsAnalyzer::new(
739                        self.codebase,
740                        self.file.clone(),
741                        self.source,
742                        self.source_map,
743                        self.issues,
744                        self.symbols,
745                    );
746                    sa.analyze_stmts(&c.body, &mut closure_ctx);
747                    let ret = crate::project::merge_return_types(&sa.return_types);
748                    drop(sa);
749                    ret
750                };
751
752                // Propagate variable reads from closure back to outer scope
753                for name in &closure_ctx.read_vars {
754                    ctx.read_vars.insert(name.clone());
755                }
756
757                let return_ty = return_ty_hint.unwrap_or(inferred_return);
758                let closure_params: Vec<mir_types::atomic::FnParam> = params
759                    .iter()
760                    .map(|p| mir_types::atomic::FnParam {
761                        name: p.name.clone(),
762                        ty: p.ty.clone(),
763                        default: p.default.clone(),
764                        is_variadic: p.is_variadic,
765                        is_byref: p.is_byref,
766                        is_optional: p.is_optional,
767                    })
768                    .collect();
769
770                Union::single(Atomic::TClosure {
771                    params: closure_params,
772                    return_type: Box::new(return_ty),
773                    this_type: ctx.self_fqcn.clone().map(|f| {
774                        Box::new(Union::single(Atomic::TNamedObject {
775                            fqcn: f,
776                            type_params: vec![],
777                        }))
778                    }),
779                })
780            }
781
782            ExprKind::ArrowFunction(af) => {
783                let params = ast_params_to_fn_params_resolved(
784                    &af.params,
785                    ctx.self_fqcn.as_deref(),
786                    self.codebase,
787                    &self.file,
788                );
789                let return_ty_hint = af
790                    .return_type
791                    .as_ref()
792                    .map(|h| crate::parser::type_from_hint(h, ctx.self_fqcn.as_deref()))
793                    .map(|u| resolve_named_objects_in_union(u, self.codebase, &self.file));
794
795                // Arrow functions implicitly capture the outer scope by value
796                // Note: is_static only prevents $this binding; self_fqcn is still accessible
797                // for resolving `self::` references and private/protected visibility checks.
798                let mut arrow_ctx = crate::context::Context::for_function(
799                    &params,
800                    return_ty_hint.clone(),
801                    ctx.self_fqcn.clone(),
802                    ctx.parent_fqcn.clone(),
803                    ctx.static_fqcn.clone(),
804                    ctx.strict_types,
805                );
806                // Copy outer vars into arrow context (implicit capture)
807                for (name, ty) in &ctx.vars {
808                    if !arrow_ctx.vars.contains_key(name) {
809                        arrow_ctx.set_var(name, ty.clone());
810                    }
811                }
812
813                // Analyze single-expression body
814                let inferred_return = self.analyze(af.body, &mut arrow_ctx);
815
816                // Propagate variable reads from arrow function back to outer scope
817                for name in &arrow_ctx.read_vars {
818                    ctx.read_vars.insert(name.clone());
819                }
820
821                let return_ty = return_ty_hint.unwrap_or(inferred_return);
822                let closure_params: Vec<mir_types::atomic::FnParam> = params
823                    .iter()
824                    .map(|p| mir_types::atomic::FnParam {
825                        name: p.name.clone(),
826                        ty: p.ty.clone(),
827                        default: p.default.clone(),
828                        is_variadic: p.is_variadic,
829                        is_byref: p.is_byref,
830                        is_optional: p.is_optional,
831                    })
832                    .collect();
833
834                Union::single(Atomic::TClosure {
835                    params: closure_params,
836                    return_type: Box::new(return_ty),
837                    this_type: if af.is_static {
838                        None
839                    } else {
840                        ctx.self_fqcn.clone().map(|f| {
841                            Box::new(Union::single(Atomic::TNamedObject {
842                                fqcn: f,
843                                type_params: vec![],
844                            }))
845                        })
846                    },
847                })
848            }
849
850            ExprKind::CallableCreate(_) => Union::single(Atomic::TCallable {
851                params: None,
852                return_type: None,
853            }),
854
855            // --- Match expression ------------------------------------------
856            ExprKind::Match(m) => {
857                let subject_ty = self.analyze(m.subject, ctx);
858                // Extract the variable name of the subject for narrowing
859                let subject_var = match &m.subject.kind {
860                    ExprKind::Variable(name) => {
861                        Some(name.as_str().trim_start_matches('$').to_string())
862                    }
863                    _ => None,
864                };
865
866                let mut result = Union::empty();
867                for arm in m.arms.iter() {
868                    // Fork context for each arm so arms don't bleed into each other
869                    let mut arm_ctx = ctx.fork();
870
871                    // Narrow the subject variable in this arm's context
872                    if let (Some(var), Some(conditions)) = (&subject_var, &arm.conditions) {
873                        // Build a union of all condition types for this arm
874                        let mut arm_ty = Union::empty();
875                        for cond in conditions.iter() {
876                            let cond_ty = self.analyze(cond, ctx);
877                            arm_ty = Union::merge(&arm_ty, &cond_ty);
878                        }
879                        // Intersect subject type with the arm condition types
880                        if !arm_ty.is_empty() && !arm_ty.is_mixed() {
881                            // Narrow to the matched literal/type if possible
882                            let narrowed = subject_ty.intersect_with(&arm_ty);
883                            if !narrowed.is_empty() {
884                                arm_ctx.set_var(var, narrowed);
885                            }
886                        }
887                    }
888
889                    // For `match(true) { $x instanceof Y => ... }` patterns:
890                    // narrow from each condition expression even when subject is not a simple var.
891                    if let Some(conditions) = &arm.conditions {
892                        for cond in conditions.iter() {
893                            crate::narrowing::narrow_from_condition(
894                                cond,
895                                &mut arm_ctx,
896                                true,
897                                self.codebase,
898                                &self.file,
899                            );
900                        }
901                    }
902
903                    let arm_body_ty = self.analyze(&arm.body, &mut arm_ctx);
904                    result = Union::merge(&result, &arm_body_ty);
905
906                    // Propagate variable reads from arm back to outer scope
907                    for name in &arm_ctx.read_vars {
908                        ctx.read_vars.insert(name.clone());
909                    }
910                }
911                if result.is_empty() {
912                    Union::mixed()
913                } else {
914                    result
915                }
916            }
917
918            // --- Throw as expression (PHP 8) --------------------------------
919            ExprKind::ThrowExpr(e) => {
920                self.analyze(e, ctx);
921                Union::single(Atomic::TNever)
922            }
923
924            // --- Yield -----------------------------------------------------
925            ExprKind::Yield(y) => {
926                if let Some(key) = &y.key {
927                    self.analyze(key, ctx);
928                }
929                if let Some(value) = &y.value {
930                    self.analyze(value, ctx);
931                }
932                Union::mixed()
933            }
934
935            // --- Magic constants -------------------------------------------
936            ExprKind::MagicConst(kind) => match kind {
937                MagicConstKind::Line => Union::single(Atomic::TInt),
938                MagicConstKind::File
939                | MagicConstKind::Dir
940                | MagicConstKind::Function
941                | MagicConstKind::Class
942                | MagicConstKind::Method
943                | MagicConstKind::Namespace
944                | MagicConstKind::Trait
945                | MagicConstKind::Property => Union::single(Atomic::TString),
946            },
947
948            // --- Include/require --------------------------------------------
949            ExprKind::Include(_, inner) => {
950                self.analyze(inner, ctx);
951                Union::mixed()
952            }
953
954            // --- Eval -------------------------------------------------------
955            ExprKind::Eval(inner) => {
956                self.analyze(inner, ctx);
957                Union::mixed()
958            }
959
960            // --- Exit -------------------------------------------------------
961            ExprKind::Exit(opt) => {
962                if let Some(e) = opt {
963                    self.analyze(e, ctx);
964                }
965                Union::single(Atomic::TNever)
966            }
967
968            // --- Error node (parse error placeholder) ----------------------
969            ExprKind::Error => Union::mixed(),
970
971            // --- Omitted array slot (e.g. [, $b] destructuring) ------------
972            ExprKind::Omit => Union::single(Atomic::TNull),
973        }
974    }
975
976    // -----------------------------------------------------------------------
977    // Binary operations
978    // -----------------------------------------------------------------------
979
980    fn analyze_binary<'arena, 'src>(
981        &mut self,
982        b: &php_ast::ast::BinaryExpr<'arena, 'src>,
983        _span: php_ast::Span,
984        ctx: &mut Context,
985    ) -> Union {
986        // Short-circuit operators: narrow the context for the right operand based on
987        // the left operand's truthiness (just like the then/else branches of an if).
988        // We evaluate the right side in a forked context so that the narrowing
989        // (e.g. `instanceof`) applies to method/property calls on the right side
990        // without permanently mutating the caller's context.
991        use php_ast::ast::BinaryOp as B;
992        if matches!(
993            b.op,
994            B::BooleanAnd | B::LogicalAnd | B::BooleanOr | B::LogicalOr
995        ) {
996            let _left_ty = self.analyze(b.left, ctx);
997            let mut right_ctx = ctx.fork();
998            let is_and = matches!(b.op, B::BooleanAnd | B::LogicalAnd);
999            crate::narrowing::narrow_from_condition(
1000                b.left,
1001                &mut right_ctx,
1002                is_and,
1003                self.codebase,
1004                &self.file,
1005            );
1006            // If narrowing made the right side statically unreachable, skip it
1007            // (e.g. `$x === null || $x->method()` — right is dead when $x is only null).
1008            if !right_ctx.diverges {
1009                let _right_ty = self.analyze(b.right, &mut right_ctx);
1010            }
1011            // Propagate read-var tracking and any new variable assignments back.
1012            // New assignments from the right side are only "possibly" made (short-circuit),
1013            // so mark them in possibly_assigned_vars but not assigned_vars.
1014            for v in right_ctx.read_vars {
1015                ctx.read_vars.insert(v.clone());
1016            }
1017            for (name, ty) in &right_ctx.vars {
1018                if !ctx.vars.contains_key(name.as_str()) {
1019                    // Variable first assigned in the right side — possibly assigned
1020                    ctx.vars.insert(name.clone(), ty.clone());
1021                    ctx.possibly_assigned_vars.insert(name.clone());
1022                }
1023            }
1024            return Union::single(Atomic::TBool);
1025        }
1026
1027        let left_ty = self.analyze(b.left, ctx);
1028        let right_ty = self.analyze(b.right, ctx);
1029
1030        match b.op {
1031            // Arithmetic
1032            BinaryOp::Add
1033            | BinaryOp::Sub
1034            | BinaryOp::Mul
1035            | BinaryOp::Div
1036            | BinaryOp::Mod
1037            | BinaryOp::Pow => infer_arithmetic(&left_ty, &right_ty),
1038
1039            // String concatenation
1040            BinaryOp::Concat => Union::single(Atomic::TString),
1041
1042            // Comparisons always return bool
1043            BinaryOp::Equal
1044            | BinaryOp::NotEqual
1045            | BinaryOp::Identical
1046            | BinaryOp::NotIdentical
1047            | BinaryOp::Less
1048            | BinaryOp::Greater
1049            | BinaryOp::LessOrEqual
1050            | BinaryOp::GreaterOrEqual => Union::single(Atomic::TBool),
1051
1052            BinaryOp::Instanceof => {
1053                // Check that the class on the right side of `instanceof` exists.
1054                if let ExprKind::Identifier(name) = &b.right.kind {
1055                    let resolved = self.codebase.resolve_class_name(&self.file, name.as_ref());
1056                    let fqcn: std::sync::Arc<str> = std::sync::Arc::from(resolved.as_str());
1057                    if !matches!(resolved.as_str(), "self" | "static" | "parent")
1058                        && !self.codebase.type_exists(&fqcn)
1059                    {
1060                        self.emit(
1061                            IssueKind::UndefinedClass { name: resolved },
1062                            Severity::Error,
1063                            b.right.span,
1064                        );
1065                    }
1066                }
1067                Union::single(Atomic::TBool)
1068            }
1069
1070            // Spaceship returns -1|0|1
1071            BinaryOp::Spaceship => Union::single(Atomic::TIntRange {
1072                min: Some(-1),
1073                max: Some(1),
1074            }),
1075
1076            // Logical
1077            BinaryOp::BooleanAnd
1078            | BinaryOp::BooleanOr
1079            | BinaryOp::LogicalAnd
1080            | BinaryOp::LogicalOr
1081            | BinaryOp::LogicalXor => Union::single(Atomic::TBool),
1082
1083            // Bitwise
1084            BinaryOp::BitwiseAnd
1085            | BinaryOp::BitwiseOr
1086            | BinaryOp::BitwiseXor
1087            | BinaryOp::ShiftLeft
1088            | BinaryOp::ShiftRight => Union::single(Atomic::TInt),
1089
1090            // Pipe (FirstClassCallable-style) — rare
1091            BinaryOp::Pipe => right_ty,
1092        }
1093    }
1094
1095    // -----------------------------------------------------------------------
1096    // Property resolution
1097    // -----------------------------------------------------------------------
1098
1099    fn resolve_property_type(
1100        &mut self,
1101        obj_ty: &Union,
1102        prop_name: &str,
1103        span: php_ast::Span,
1104    ) -> Union {
1105        for atomic in &obj_ty.types {
1106            match atomic {
1107                Atomic::TNamedObject { fqcn, .. } => {
1108                    if self.codebase.classes.contains_key(fqcn.as_ref()) {
1109                        if let Some(prop) = self.codebase.get_property(fqcn.as_ref(), prop_name) {
1110                            // Record reference for dead-code detection (M18)
1111                            self.codebase.mark_property_referenced(fqcn, prop_name);
1112                            return prop.ty.clone().unwrap_or_else(Union::mixed);
1113                        }
1114                        // Only emit UndefinedProperty if all ancestors are known and no __get magic.
1115                        if !self.codebase.has_unknown_ancestor(fqcn.as_ref())
1116                            && !self.codebase.has_magic_get(fqcn.as_ref())
1117                        {
1118                            self.emit(
1119                                IssueKind::UndefinedProperty {
1120                                    class: fqcn.to_string(),
1121                                    property: prop_name.to_string(),
1122                                },
1123                                Severity::Warning,
1124                                span,
1125                            );
1126                        }
1127                        return Union::mixed();
1128                    }
1129                    // Class not in codebase (external/vendor) — skip silently.
1130                }
1131                Atomic::TMixed => return Union::mixed(),
1132                _ => {}
1133            }
1134        }
1135        Union::mixed()
1136    }
1137
1138    // -----------------------------------------------------------------------
1139    // Assignment helpers
1140    // -----------------------------------------------------------------------
1141
1142    fn assign_to_target<'arena, 'src>(
1143        &mut self,
1144        target: &php_ast::ast::Expr<'arena, 'src>,
1145        ty: Union,
1146        ctx: &mut Context,
1147        span: php_ast::Span,
1148    ) {
1149        match &target.kind {
1150            ExprKind::Variable(name) => {
1151                let name_str = name.as_str().trim_start_matches('$').to_string();
1152                ctx.set_var(name_str, ty);
1153            }
1154            ExprKind::Array(elements) => {
1155                // [$a, $b] = $arr  — destructuring
1156                // If the RHS can be false/null (e.g. unpack() returns array|false),
1157                // the destructuring may fail → PossiblyInvalidArrayAccess.
1158                let has_non_array = ty.contains(|a| matches!(a, Atomic::TFalse | Atomic::TNull));
1159                let has_array = ty.contains(|a| {
1160                    matches!(
1161                        a,
1162                        Atomic::TArray { .. }
1163                            | Atomic::TList { .. }
1164                            | Atomic::TNonEmptyArray { .. }
1165                            | Atomic::TNonEmptyList { .. }
1166                            | Atomic::TKeyedArray { .. }
1167                    )
1168                });
1169                if has_non_array && has_array {
1170                    let actual = format!("{}", ty);
1171                    self.emit(
1172                        IssueKind::PossiblyInvalidArrayOffset {
1173                            expected: "array".to_string(),
1174                            actual,
1175                        },
1176                        Severity::Warning,
1177                        span,
1178                    );
1179                }
1180
1181                // Extract the element value type from the RHS array type (if known).
1182                let value_ty: Union = ty
1183                    .types
1184                    .iter()
1185                    .find_map(|a| match a {
1186                        Atomic::TArray { value, .. }
1187                        | Atomic::TList { value }
1188                        | Atomic::TNonEmptyArray { value, .. }
1189                        | Atomic::TNonEmptyList { value } => Some(*value.clone()),
1190                        _ => None,
1191                    })
1192                    .unwrap_or_else(Union::mixed);
1193
1194                for elem in elements.iter() {
1195                    self.assign_to_target(&elem.value, value_ty.clone(), ctx, span);
1196                }
1197            }
1198            ExprKind::PropertyAccess(pa) => {
1199                // Check readonly (M19 readonly enforcement)
1200                let obj_ty = self.analyze(pa.object, ctx);
1201                if let Some(prop_name) = extract_string_from_expr(pa.property) {
1202                    for atomic in &obj_ty.types {
1203                        if let Atomic::TNamedObject { fqcn, .. } = atomic {
1204                            if let Some(cls) = self.codebase.classes.get(fqcn.as_ref()) {
1205                                if let Some(prop) = cls.get_property(&prop_name) {
1206                                    if prop.is_readonly && !ctx.inside_constructor {
1207                                        self.emit(
1208                                            IssueKind::ReadonlyPropertyAssignment {
1209                                                class: fqcn.to_string(),
1210                                                property: prop_name.clone(),
1211                                            },
1212                                            Severity::Error,
1213                                            span,
1214                                        );
1215                                    }
1216                                }
1217                            }
1218                        }
1219                    }
1220                }
1221            }
1222            ExprKind::StaticPropertyAccess(_) => {
1223                // static property assignment — could add readonly check here too
1224            }
1225            ExprKind::ArrayAccess(aa) => {
1226                // $arr[$k] = v  — PHP auto-initialises $arr as an array if undefined.
1227                // Analyze the index expression for variable read tracking.
1228                if let Some(idx) = &aa.index {
1229                    self.analyze(idx, ctx);
1230                }
1231                // Walk the base to find the root variable and update its type to include
1232                // the new value, so loop analysis can widen correctly.
1233                let mut base = aa.array;
1234                loop {
1235                    match &base.kind {
1236                        ExprKind::Variable(name) => {
1237                            let name_str = name.as_str().trim_start_matches('$');
1238                            if !ctx.var_is_defined(name_str) {
1239                                ctx.vars.insert(
1240                                    name_str.to_string(),
1241                                    Union::single(Atomic::TArray {
1242                                        key: Box::new(Union::mixed()),
1243                                        value: Box::new(ty.clone()),
1244                                    }),
1245                                );
1246                                ctx.assigned_vars.insert(name_str.to_string());
1247                            } else {
1248                                // Widen the existing array type to include the new value type.
1249                                // This ensures loop analysis can see the type change and widen properly.
1250                                let current = ctx.get_var(name_str);
1251                                let updated = widen_array_with_value(&current, &ty);
1252                                ctx.set_var(name_str, updated);
1253                            }
1254                            break;
1255                        }
1256                        ExprKind::ArrayAccess(inner) => {
1257                            if let Some(idx) = &inner.index {
1258                                self.analyze(idx, ctx);
1259                            }
1260                            base = inner.array;
1261                        }
1262                        _ => break,
1263                    }
1264                }
1265            }
1266            _ => {}
1267        }
1268    }
1269
1270    // -----------------------------------------------------------------------
1271    // Issue emission
1272    // -----------------------------------------------------------------------
1273
1274    /// Convert a byte offset to a UTF-16 column on a given line.
1275    /// Returns (line, col_utf16) where col is 0-based UTF-16 code unit count.
1276    fn offset_to_line_col_utf16(&self, offset: u32) -> (u32, u16) {
1277        let lc = self.source_map.offset_to_line_col(offset);
1278        let line = lc.line + 1;
1279
1280        // Find the start of the line containing this offset
1281        let byte_offset = offset as usize;
1282        let line_start_byte = if byte_offset == 0 {
1283            0
1284        } else {
1285            // Find the position after the last newline before this offset
1286            self.source[..byte_offset]
1287                .rfind('\n')
1288                .map(|p| p + 1)
1289                .unwrap_or(0)
1290        };
1291
1292        // Count UTF-16 code units from line start to the offset
1293        let col_utf16 = self.source[line_start_byte..byte_offset]
1294            .chars()
1295            .map(|c| c.len_utf16() as u16)
1296            .sum();
1297
1298        (line, col_utf16)
1299    }
1300
1301    pub fn emit(&mut self, kind: IssueKind, severity: Severity, span: php_ast::Span) {
1302        let (line, col_start) = self.offset_to_line_col_utf16(span.start);
1303
1304        // Calculate col_end: if span.end is on the same line, use its UTF-16 column;
1305        // otherwise use col_start (single-line range for diagnostics)
1306        let col_end = if span.start < span.end {
1307            let (_end_line, end_col) = self.offset_to_line_col_utf16(span.end);
1308            end_col
1309        } else {
1310            col_start
1311        };
1312
1313        let mut issue = Issue::new(
1314            kind,
1315            Location {
1316                file: self.file.clone(),
1317                line,
1318                col_start,
1319                col_end: col_end.max(col_start + 1),
1320            },
1321        );
1322        issue.severity = severity;
1323        // Store the source snippet for baseline matching.
1324        if span.start < span.end {
1325            let s = span.start as usize;
1326            let e = (span.end as usize).min(self.source.len());
1327            if let Some(text) = self.source.get(s..e) {
1328                let trimmed = text.trim();
1329                if !trimmed.is_empty() {
1330                    issue.snippet = Some(trimmed.to_string());
1331                }
1332            }
1333        }
1334        self.issues.add(issue);
1335    }
1336
1337    // Helper to call a closure with a mutable context reference while holding &mut self.
1338    fn with_ctx<F, R>(&mut self, ctx: &mut Context, f: F) -> R
1339    where
1340        F: FnOnce(&mut ExpressionAnalyzer<'a>, &mut Context) -> R,
1341    {
1342        f(self, ctx)
1343    }
1344}
1345
1346// ---------------------------------------------------------------------------
1347// Free functions
1348// ---------------------------------------------------------------------------
1349
1350/// Widen an array type to include a new element value type.
1351/// Used when `$arr[$k] = $val` is analyzed — updates the array's value type
1352/// so loop analysis can detect the change and widen properly.
1353fn widen_array_with_value(current: &Union, new_value: &Union) -> Union {
1354    let mut result = Union::empty();
1355    result.possibly_undefined = current.possibly_undefined;
1356    result.from_docblock = current.from_docblock;
1357    let mut found_array = false;
1358    for atomic in &current.types {
1359        match atomic {
1360            Atomic::TKeyedArray { properties, .. } => {
1361                // Merge all existing keyed values with the new value type, converting to TArray
1362                let mut all_values = new_value.clone();
1363                for prop in properties.values() {
1364                    all_values = Union::merge(&all_values, &prop.ty);
1365                }
1366                result.add_type(Atomic::TArray {
1367                    key: Box::new(Union::mixed()),
1368                    value: Box::new(all_values),
1369                });
1370                found_array = true;
1371            }
1372            Atomic::TArray { key, value } => {
1373                let merged = Union::merge(value, new_value);
1374                result.add_type(Atomic::TArray {
1375                    key: key.clone(),
1376                    value: Box::new(merged),
1377                });
1378                found_array = true;
1379            }
1380            Atomic::TList { value } | Atomic::TNonEmptyList { value } => {
1381                let merged = Union::merge(value, new_value);
1382                result.add_type(Atomic::TList {
1383                    value: Box::new(merged),
1384                });
1385                found_array = true;
1386            }
1387            Atomic::TMixed => {
1388                return Union::mixed();
1389            }
1390            other => {
1391                result.add_type(other.clone());
1392            }
1393        }
1394    }
1395    if !found_array {
1396        // Current type has no array component — don't introduce one.
1397        // (e.g. typed object; return the original type unchanged.)
1398        return current.clone();
1399    }
1400    result
1401}
1402
1403pub fn infer_arithmetic(left: &Union, right: &Union) -> Union {
1404    // If either operand is mixed, result is mixed (could be numeric or array addition)
1405    if left.is_mixed() || right.is_mixed() {
1406        return Union::mixed();
1407    }
1408
1409    // PHP array union: array + array → array (union of keys)
1410    let left_is_array = left.contains(|t| {
1411        matches!(
1412            t,
1413            Atomic::TArray { .. }
1414                | Atomic::TNonEmptyArray { .. }
1415                | Atomic::TList { .. }
1416                | Atomic::TNonEmptyList { .. }
1417                | Atomic::TKeyedArray { .. }
1418        )
1419    });
1420    let right_is_array = right.contains(|t| {
1421        matches!(
1422            t,
1423            Atomic::TArray { .. }
1424                | Atomic::TNonEmptyArray { .. }
1425                | Atomic::TList { .. }
1426                | Atomic::TNonEmptyList { .. }
1427                | Atomic::TKeyedArray { .. }
1428        )
1429    });
1430    if left_is_array || right_is_array {
1431        // Merge the two array types (simplified: return mixed array)
1432        let merged_left = if left_is_array {
1433            left.clone()
1434        } else {
1435            Union::single(Atomic::TArray {
1436                key: Box::new(Union::single(Atomic::TMixed)),
1437                value: Box::new(Union::mixed()),
1438            })
1439        };
1440        return merged_left;
1441    }
1442
1443    let left_is_float = left.contains(|t| matches!(t, Atomic::TFloat | Atomic::TLiteralFloat(..)));
1444    let right_is_float =
1445        right.contains(|t| matches!(t, Atomic::TFloat | Atomic::TLiteralFloat(..)));
1446    if left_is_float || right_is_float {
1447        Union::single(Atomic::TFloat)
1448    } else if left.contains(|t| t.is_int()) && right.contains(|t| t.is_int()) {
1449        Union::single(Atomic::TInt)
1450    } else {
1451        // Could be int or float (e.g. mixed + int)
1452        let mut u = Union::empty();
1453        u.add_type(Atomic::TInt);
1454        u.add_type(Atomic::TFloat);
1455        u
1456    }
1457}
1458
1459pub fn extract_simple_var<'arena, 'src>(expr: &php_ast::ast::Expr<'arena, 'src>) -> Option<String> {
1460    match &expr.kind {
1461        ExprKind::Variable(name) => Some(name.as_str().trim_start_matches('$').to_string()),
1462        ExprKind::Parenthesized(inner) => extract_simple_var(inner),
1463        _ => None,
1464    }
1465}
1466
1467/// Extract all variable names from a list/array destructure pattern.
1468/// e.g. `[$a, $b]` or `list($a, $b)` → `["a", "b"]`
1469/// Returns an empty vec if the expression is not a destructure.
1470pub fn extract_destructure_vars<'arena, 'src>(
1471    expr: &php_ast::ast::Expr<'arena, 'src>,
1472) -> Vec<String> {
1473    match &expr.kind {
1474        ExprKind::Array(elements) => {
1475            let mut vars = vec![];
1476            for elem in elements.iter() {
1477                // Nested destructure or simple variable
1478                let sub = extract_destructure_vars(&elem.value);
1479                if sub.is_empty() {
1480                    if let Some(v) = extract_simple_var(&elem.value) {
1481                        vars.push(v);
1482                    }
1483                } else {
1484                    vars.extend(sub);
1485                }
1486            }
1487            vars
1488        }
1489        _ => vec![],
1490    }
1491}
1492
1493/// Like `ast_params_to_fn_params` but resolves type names through the file's import table.
1494fn ast_params_to_fn_params_resolved<'arena, 'src>(
1495    params: &php_ast::ast::ArenaVec<'arena, php_ast::ast::Param<'arena, 'src>>,
1496    self_fqcn: Option<&str>,
1497    codebase: &mir_codebase::Codebase,
1498    file: &str,
1499) -> Vec<mir_codebase::FnParam> {
1500    params
1501        .iter()
1502        .map(|p| {
1503            let ty = p
1504                .type_hint
1505                .as_ref()
1506                .map(|h| crate::parser::type_from_hint(h, self_fqcn))
1507                .map(|u| resolve_named_objects_in_union(u, codebase, file));
1508            mir_codebase::FnParam {
1509                name: p.name.trim_start_matches('$').into(),
1510                ty,
1511                default: p.default.as_ref().map(|_| Union::mixed()),
1512                is_variadic: p.variadic,
1513                is_byref: p.by_ref,
1514                is_optional: p.default.is_some() || p.variadic,
1515            }
1516        })
1517        .collect()
1518}
1519
1520/// Resolve TNamedObject fqcns in a union through the file's import table.
1521fn resolve_named_objects_in_union(
1522    union: Union,
1523    codebase: &mir_codebase::Codebase,
1524    file: &str,
1525) -> Union {
1526    use mir_types::Atomic;
1527    let from_docblock = union.from_docblock;
1528    let possibly_undefined = union.possibly_undefined;
1529    let types: Vec<Atomic> = union
1530        .types
1531        .into_iter()
1532        .map(|a| match a {
1533            Atomic::TNamedObject { fqcn, type_params } => {
1534                let resolved = codebase.resolve_class_name(file, fqcn.as_ref());
1535                Atomic::TNamedObject {
1536                    fqcn: resolved.into(),
1537                    type_params,
1538                }
1539            }
1540            other => other,
1541        })
1542        .collect();
1543    let mut result = Union::from_vec(types);
1544    result.from_docblock = from_docblock;
1545    result.possibly_undefined = possibly_undefined;
1546    result
1547}
1548
1549fn extract_string_from_expr<'arena, 'src>(
1550    expr: &php_ast::ast::Expr<'arena, 'src>,
1551) -> Option<String> {
1552    match &expr.kind {
1553        ExprKind::Identifier(s) => Some(s.trim_start_matches('$').to_string()),
1554        // Variable in property position means dynamic access ($obj->$prop) — not a literal name.
1555        ExprKind::Variable(_) => None,
1556        ExprKind::String(s) => Some(s.to_string()),
1557        _ => None,
1558    }
1559}
1560
1561#[cfg(test)]
1562mod tests {
1563    /// Helper to create a SourceMap from PHP source code
1564    fn create_source_map(source: &str) -> php_rs_parser::source_map::SourceMap {
1565        let bump = bumpalo::Bump::new();
1566        let result = php_rs_parser::parse(&bump, source);
1567        result.source_map
1568    }
1569
1570    /// Helper to test offset_to_line_col_utf16 conversion
1571    fn test_offset_conversion(source: &str, offset: u32) -> (u32, u16) {
1572        let source_map = create_source_map(source);
1573        let lc = source_map.offset_to_line_col(offset);
1574        let line = lc.line + 1;
1575
1576        let byte_offset = offset as usize;
1577        let line_start_byte = if byte_offset == 0 {
1578            0
1579        } else {
1580            source[..byte_offset]
1581                .rfind('\n')
1582                .map(|p| p + 1)
1583                .unwrap_or(0)
1584        };
1585
1586        let col_utf16 = source[line_start_byte..byte_offset]
1587            .chars()
1588            .map(|c| c.len_utf16() as u16)
1589            .sum();
1590
1591        (line, col_utf16)
1592    }
1593
1594    #[test]
1595    fn utf16_conversion_simple_ascii() {
1596        // Test simple ASCII on a single line
1597        let source = "<?php\n$var = 123;";
1598        //               0123456789012345
1599
1600        // Position of '$' on line 2 should be column 0 (byte 6)
1601        let (line, col) = test_offset_conversion(source, 6);
1602        assert_eq!(line, 2);
1603        assert_eq!(col, 0);
1604
1605        // Position of 'v' should be column 1 (byte 7)
1606        let (line, col) = test_offset_conversion(source, 7);
1607        assert_eq!(line, 2);
1608        assert_eq!(col, 1);
1609    }
1610
1611    #[test]
1612    fn utf16_conversion_emoji_utf16_units() {
1613        // Test that emoji (2 UTF-16 units) are counted correctly
1614        let source = "<?php\n$x = 1;\n$y = \"🎉\";";
1615        //                              emoji starts around byte 23
1616
1617        // Find the exact byte position of the emoji
1618        let quote_pos = source.find('"').unwrap();
1619        let emoji_pos = quote_pos + 1; // After opening quote
1620
1621        // Position before emoji (the quote)
1622        let (line, _col) = test_offset_conversion(source, quote_pos as u32);
1623        assert_eq!(line, 3);
1624
1625        // Position at emoji start
1626        let (line, col) = test_offset_conversion(source, emoji_pos as u32);
1627        assert_eq!(line, 3);
1628        // Column should include the quote before it
1629        let expected_col = (quote_pos - source[..quote_pos].rfind('\n').unwrap_or(0) - 1) as u16;
1630        assert_eq!(col, expected_col + 1);
1631    }
1632
1633    #[test]
1634    fn utf16_conversion_different_lines() {
1635        let source = "<?php\n$x = 1;\n$y = 2;";
1636        //          Line 1: <?php (bytes 0-4, newline at 5)
1637        //          Line 2: $x = 1; (bytes 6-12, newline at 13)
1638        //          Line 3: $y = 2; (bytes 14-20)
1639
1640        // Position on line 1, byte 0
1641        let (line, col) = test_offset_conversion(source, 0);
1642        assert_eq!(line, 1);
1643        assert_eq!(col, 0);
1644
1645        // Position on line 2, byte 6 (first char after newline)
1646        let (line, col) = test_offset_conversion(source, 6);
1647        assert_eq!(line, 2);
1648        assert_eq!(col, 0);
1649
1650        // Position on line 3, byte 14 (first char after second newline)
1651        let (line, col) = test_offset_conversion(source, 14);
1652        assert_eq!(line, 3);
1653        assert_eq!(col, 0); // '$' is the first character on line 3
1654    }
1655
1656    #[test]
1657    fn utf16_conversion_accented_characters() {
1658        // Test accented characters (é, ñ, etc.)
1659        let source = "<?php\n$café = 1;";
1660        //               012345678901234567
1661        // é is 2 bytes in UTF-8 but 1 UTF-16 code unit
1662
1663        // Position at 'f' (byte 9)
1664        let (line, col) = test_offset_conversion(source, 9);
1665        assert_eq!(line, 2);
1666        assert_eq!(col, 3); // $, c, a, f
1667
1668        // Position at 'é' (byte 10, start of é which is 2 bytes)
1669        let (line, col) = test_offset_conversion(source, 10);
1670        assert_eq!(line, 2);
1671        assert_eq!(col, 4); // $ c a f = 4 UTF-16 units
1672    }
1673
1674    #[test]
1675    fn col_end_minimum_width() {
1676        // Ensure col_end is at least col_start + 1 (1 character minimum)
1677        let col_start = 0u16;
1678        let col_end = 0u16; // Would happen if span.start == span.end
1679        let effective_col_end = col_end.max(col_start + 1);
1680
1681        assert_eq!(
1682            effective_col_end, 1,
1683            "col_end should be at least col_start + 1"
1684        );
1685    }
1686
1687    #[test]
1688    fn utf16_conversion_multiline_span() {
1689        // Test span that starts on one line and ends on another
1690        let source = "<?php\n$x = [\n  'a',\n  'b'\n];";
1691        //           Line 1: <?php
1692        //           Line 2: $x = [
1693        //           Line 3:   'a',
1694        //           Line 4:   'b'
1695        //           Line 5: ];
1696
1697        // Start of array bracket on line 2
1698        let bracket_open = source.find('[').unwrap();
1699        let (line_start, _col_start) = test_offset_conversion(source, bracket_open as u32);
1700        assert_eq!(line_start, 2);
1701
1702        // End of array bracket on line 5
1703        let bracket_close = source.rfind(']').unwrap();
1704        let (line_end, col_end) = test_offset_conversion(source, bracket_close as u32);
1705        assert_eq!(line_end, 5);
1706        assert_eq!(col_end, 0); // ']' is at column 0 on line 5
1707    }
1708
1709    #[test]
1710    fn col_end_handles_emoji_in_span() {
1711        // Test that col_end correctly handles emoji spanning
1712        let source = "<?php\n$greeting = \"Hello 🎉\";";
1713
1714        // Find emoji position
1715        let emoji_pos = source.find('🎉').unwrap();
1716        let hello_pos = source.find("Hello").unwrap();
1717
1718        // Column at "Hello" on line 2
1719        let (line, col) = test_offset_conversion(source, hello_pos as u32);
1720        assert_eq!(line, 2);
1721        assert_eq!(col, 13); // Position of 'H' after "$greeting = \""
1722
1723        // Column at emoji
1724        let (line, col) = test_offset_conversion(source, emoji_pos as u32);
1725        assert_eq!(line, 2);
1726        // Should be after "Hello " (13 + 5 + 1 = 19 UTF-16 units)
1727        assert_eq!(col, 19);
1728    }
1729}