mir-analyzer 0.17.2

Analysis engine for the mir PHP static analyzer
Documentation
use php_ast::ast::{ExprKind, FunctionCallExpr};
use php_ast::Span;

use std::sync::Arc;

use mir_codebase::storage::{Assertion, AssertionKind, FnParam, TemplateParam};
use mir_issues::{IssueKind, Severity};
use mir_types::{Atomic, Union};

use crate::context::Context;
use crate::expr::ExpressionAnalyzer;
use crate::generic::{check_template_bounds, infer_template_bindings};
use crate::symbol::SymbolKind;
use crate::taint::{classify_sink, is_expr_tainted, SinkKind};

use super::args::{
    check_args, expr_can_be_passed_by_reference, spread_element_type, CheckArgsParams,
};
use super::CallAnalyzer;

struct ResolvedFn {
    fqn: std::sync::Arc<str>,
    deprecated: Option<std::sync::Arc<str>>,
    params: Vec<FnParam>,
    template_params: Vec<TemplateParam>,
    assertions: Vec<Assertion>,
    return_ty_raw: Union,
}

fn resolve_fn(ea: &ExpressionAnalyzer<'_>, fqn: &str) -> Option<ResolvedFn> {
    let db = ea.db;
    let node = db.lookup_function_node(fqn).filter(|n| n.active(db))?;
    // `inferred_return_type` is the priming-sweep-derived type, published
    // on `FunctionNode` via `MirDb::commit_inferred_return_types` after
    // each priming sweep returns.  Every entry path (batch `analyze`,
    // `re_analyze_file`, lazy-load reanalysis sweep, `analyze_source`)
    // runs a priming-sweep + commit before the issue-emitting pass.
    let inferred = node.inferred_return_type(db);
    let return_ty_raw = node
        .return_type(db)
        .or(inferred)
        .unwrap_or_else(Union::mixed);
    Some(ResolvedFn {
        fqn: node.fqn(db),
        deprecated: node.deprecated(db),
        params: node.params(db).to_vec(),
        template_params: node.template_params(db).to_vec(),
        assertions: node.assertions(db).to_vec(),
        return_ty_raw,
    })
}

impl CallAnalyzer {
    pub fn analyze_function_call<'a, 'arena, 'src>(
        ea: &mut ExpressionAnalyzer<'a>,
        call: &FunctionCallExpr<'arena, 'src>,
        ctx: &mut Context,
        span: Span,
    ) -> Union {
        let fn_name = match &call.name.kind {
            ExprKind::Identifier(name) => (*name).to_string(),
            _ => {
                let callee_ty = ea.analyze(call.name, ctx);
                for arg in call.args.iter() {
                    ea.analyze(&arg.value, ctx);
                }
                for atomic in &callee_ty.types {
                    match atomic {
                        Atomic::TClosure { return_type, .. } => return *return_type.clone(),
                        Atomic::TCallable {
                            return_type: Some(rt),
                            ..
                        } => return *rt.clone(),
                        _ => {}
                    }
                }
                return Union::mixed();
            }
        };

        // Taint sink check (M19): before evaluating args so we can inspect raw exprs
        if let Some(sink_kind) = classify_sink(&fn_name) {
            for arg in call.args.iter() {
                if is_expr_tainted(&arg.value, ctx) {
                    let issue_kind = match sink_kind {
                        SinkKind::Html => IssueKind::TaintedHtml,
                        SinkKind::Sql => IssueKind::TaintedSql,
                        SinkKind::Shell => IssueKind::TaintedShell,
                    };
                    ea.emit(issue_kind, Severity::Error, span);
                    break;
                }
            }
        }

        // PHP resolves `foo()` as `\App\Ns\foo` first, then `\foo` if not found.
        // A leading `\` means explicit global namespace.
        let fn_name = fn_name
            .strip_prefix('\\')
            .map(|s: &str| s.to_string())
            .unwrap_or(fn_name);
        let resolved_fn_name: String = {
            let imports = ea.db.file_imports(&ea.file);
            let qualified = if let Some(imported) = imports.get(fn_name.as_str()) {
                imported.clone()
            } else if fn_name.contains('\\') {
                crate::db::resolve_name_via_db(ea.db, &ea.file, &fn_name)
            } else if let Some(ns) = ea.db.file_namespace(&ea.file) {
                format!("{}\\{}", ns, fn_name)
            } else {
                fn_name.clone()
            };
            let fn_exists = |name: &str| -> bool {
                let db = ea.db;
                db.lookup_function_node(name).is_some_and(|n| n.active(db))
            };
            if fn_exists(qualified.as_str()) {
                qualified
            } else if fn_exists(fn_name.as_str()) {
                fn_name.clone()
            } else {
                qualified
            }
        };

        // Pre-mark by-reference parameter variables as defined BEFORE evaluating args
        if let Some(resolved) = resolve_fn(ea, resolved_fn_name.as_str()) {
            for (i, param) in resolved.params.iter().enumerate() {
                if param.is_byref {
                    if param.is_variadic {
                        for arg in call.args.iter().skip(i) {
                            if let ExprKind::Variable(name) = &arg.value.kind {
                                let var_name = name.as_str().trim_start_matches('$');
                                if !ctx.var_is_defined(var_name) {
                                    ctx.set_var(var_name, Union::mixed());
                                }
                            }
                        }
                    } else if let Some(arg) = call.args.get(i) {
                        if let ExprKind::Variable(name) = &arg.value.kind {
                            let var_name = name.as_str().trim_start_matches('$');
                            if !ctx.var_is_defined(var_name) {
                                ctx.set_var(var_name, Union::mixed());
                            }
                        }
                    }
                }
            }
        }

        let arg_types: Vec<Union> = call
            .args
            .iter()
            .map(|arg| {
                let ty = ea.analyze(&arg.value, ctx);
                if arg.unpack {
                    spread_element_type(&ty)
                } else {
                    ty
                }
            })
            .collect();

        // When call_user_func / call_user_func_array is called with a bare string
        // literal as the callable argument, treat that string as a direct FQN
        // reference so the named function is not flagged as dead code.
        // Note: 'helper' always resolves to \helper (global) — no namespace
        // fallback applies to runtime callable strings.
        if matches!(
            resolved_fn_name.as_str(),
            "call_user_func" | "call_user_func_array"
        ) {
            if let Some(arg) = call.args.first() {
                if let ExprKind::String(name) = &arg.value.kind {
                    let fqn = name.strip_prefix('\\').unwrap_or(name);
                    if let Some(node) = ea.db.lookup_function_node(fqn).filter(|n| n.active(ea.db))
                    {
                        if !ea.inference_only {
                            let (line, col_start, col_end) = ea.span_to_ref_loc(arg.span);
                            ea.db.record_reference_location(crate::db::RefLoc {
                                symbol_key: Arc::from(node.fqn(ea.db).as_ref()),
                                file: ea.file.clone(),
                                line,
                                col_start,
                                col_end,
                            });
                        }
                    }
                }
            }
        }

        // compact() reads variables by string name at runtime; mark each string-literal arg as read
        if fn_name == "compact" {
            for arg in call.args.iter() {
                if let ExprKind::String(name) = &arg.value.kind {
                    ctx.read_vars.insert((*name).to_string());
                }
            }
        }

        if let Some(resolved) = resolve_fn(ea, resolved_fn_name.as_str()) {
            if !ea.inference_only {
                let (line, col_start, col_end) = ea.span_to_ref_loc(call.name.span);
                ea.db.record_reference_location(crate::db::RefLoc {
                    symbol_key: resolved.fqn.clone(),
                    file: ea.file.clone(),
                    line,
                    col_start,
                    col_end,
                });
            }
            let deprecated = resolved.deprecated;
            let params = resolved.params;
            let template_params = resolved.template_params;
            let return_ty_raw = resolved.return_ty_raw;

            if let Some(msg) = deprecated {
                ea.emit(
                    IssueKind::DeprecatedCall {
                        name: resolved_fn_name.clone(),
                        message: Some(msg).filter(|m| !m.is_empty()),
                    },
                    Severity::Info,
                    span,
                );
            }

            check_args(
                ea,
                CheckArgsParams {
                    fn_name: &fn_name,
                    params: &params,
                    arg_types: &arg_types,
                    arg_spans: &call.args.iter().map(|a| a.span).collect::<Vec<_>>(),
                    arg_names: &call
                        .args
                        .iter()
                        .map(|a| a.name.as_ref().map(|n| n.to_string_repr().into_owned()))
                        .collect::<Vec<_>>(),
                    arg_can_be_byref: &call
                        .args
                        .iter()
                        .map(|a| expr_can_be_passed_by_reference(&a.value))
                        .collect::<Vec<_>>(),
                    call_span: span,
                    has_spread: call.args.iter().any(|a| a.unpack),
                },
            );

            for (i, param) in params.iter().enumerate() {
                if param.is_byref {
                    if param.is_variadic {
                        for arg in call.args.iter().skip(i) {
                            if let ExprKind::Variable(name) = &arg.value.kind {
                                let var_name = name.as_str().trim_start_matches('$');
                                ctx.set_var(var_name, Union::mixed());
                            }
                        }
                    } else if let Some(arg) = call.args.get(i) {
                        if let ExprKind::Variable(name) = &arg.value.kind {
                            let var_name = name.as_str().trim_start_matches('$');
                            ctx.set_var(var_name, Union::mixed());
                        }
                    }
                }
            }

            let template_bindings = if !template_params.is_empty() {
                let bindings = infer_template_bindings(&template_params, &params, &arg_types);
                for (name, inferred, bound) in check_template_bounds(&bindings, &template_params) {
                    ea.emit(
                        IssueKind::InvalidTemplateParam {
                            name: name.to_string(),
                            expected_bound: format!("{bound}"),
                            actual: format!("{inferred}"),
                        },
                        Severity::Error,
                        span,
                    );
                }
                Some(bindings)
            } else {
                None
            };

            for assertion in resolved
                .assertions
                .iter()
                .filter(|a| a.kind == AssertionKind::Assert)
            {
                if let Some(index) = params.iter().position(|p| p.name == assertion.param) {
                    if let Some(arg) = call.args.get(index) {
                        if let ExprKind::Variable(name) = &arg.value.kind {
                            let asserted_ty = match &template_bindings {
                                Some(b) => assertion.ty.substitute_templates(b),
                                None => assertion.ty.clone(),
                            };
                            ctx.set_var(name.as_str().trim_start_matches('$'), asserted_ty);
                        }
                    }
                }
            }

            let return_ty = match &template_bindings {
                Some(bindings) => return_ty_raw.substitute_templates(bindings),
                None => return_ty_raw,
            };

            ea.record_symbol(
                call.name.span,
                SymbolKind::FunctionCall(resolved.fqn.clone()),
                return_ty.clone(),
            );
            return return_ty;
        }

        ea.emit(
            IssueKind::UndefinedFunction { name: fn_name },
            Severity::Error,
            span,
        );
        Union::mixed()
    }
}