echidna 0.9.0 - Docs.rs

//! Bytecode opcodes for the bytecode tape.
//!
//! Each opcode represents an elementary operation. The [`eval_forward`] and
//! [`reverse_partials`] functions evaluate / differentiate a single opcode.

use num_traits::Float;

use crate::kernels;

/// Sentinel used in `arg_indices[1]` for unary ops (the second argument slot is unused).
pub const UNUSED: u32 = u32::MAX;

/// Elementary operation codes for the bytecode tape.
///
/// Fits in a `u8` (44 variants). Binary ops use both `arg_indices` slots;
/// unary ops use slot 0 only (slot 1 = [`UNUSED`], except for [`OpCode::Powi`]
/// which stores the `i32` exponent reinterpreted as `u32` in slot 1).
#[repr(u8)]
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
pub enum OpCode {
    // ── Structural ──
    /// Input variable (leaf node).
    Input,
    /// Scalar constant.
    Const,

    // ── Binary arithmetic ──
    /// Addition.
    Add,
    /// Subtraction.
    Sub,
    /// Multiplication.
    Mul,
    /// Division.
    Div,
    /// Remainder.
    Rem,
    /// Floating-point power.
    Powf,
    /// Two-argument arctangent.
    Atan2,
    /// Euclidean distance.
    Hypot,
    /// Maximum of two values.
    Max,
    /// Minimum of two values.
    Min,

    // ── Unary ──
    /// Negation.
    Neg,
    /// Reciprocal (1/x).
    Recip,
    /// Square root.
    Sqrt,
    /// Cube root.
    Cbrt,
    /// Integer power. Exponent stored in `arg_indices[1]` as `exp as u32`.
    Powi,

    // ── Exp / Log ──
    /// Natural exponential (e^x).
    Exp,
    /// Base-2 exponential (2^x).
    Exp2,
    /// e^x - 1, accurate near zero.
    ExpM1,
    /// Natural logarithm.
    Ln,
    /// Base-2 logarithm.
    Log2,
    /// Base-10 logarithm.
    Log10,
    /// ln(1+x), accurate near zero.
    Ln1p,

    // ── Trig ──
    /// Sine.
    Sin,
    /// Cosine.
    Cos,
    /// Tangent.
    Tan,
    /// Arcsine.
    Asin,
    /// Arccosine.
    Acos,
    /// Arctangent.
    Atan,

    // ── Hyperbolic ──
    /// Hyperbolic sine.
    Sinh,
    /// Hyperbolic cosine.
    Cosh,
    /// Hyperbolic tangent.
    Tanh,
    /// Inverse hyperbolic sine.
    Asinh,
    /// Inverse hyperbolic cosine.
    Acosh,
    /// Inverse hyperbolic tangent.
    Atanh,

    // ── Misc ──
    /// Absolute value.
    Abs,
    /// Zero derivative but needed for re-evaluation.
    Signum,
    /// Zero derivative but needed for re-evaluation.
    Floor,
    /// Zero derivative but needed for re-evaluation.
    Ceil,
    /// Zero derivative but needed for re-evaluation.
    Round,
    /// Zero derivative but needed for re-evaluation.
    Trunc,
    /// Fractional part.
    Fract,

    // ── Custom ──
    /// User-registered custom operation. The callback index is stored in
    /// `arg_indices[1]` (for unary ops) or in a side table (for binary ops).
    Custom,
}

/// Returns true if this opcode is a nonsmooth operation.
///
/// Includes both operations with nontrivial subdifferentials (`Abs`, `Min`,
/// `Max` — where the two sides of the kink have different derivatives) and
/// step-function operations (`Signum`, `Floor`, `Ceil`, `Round`, `Trunc`,
/// `Fract` — where both sides have zero derivative, or in `Fract`'s case
/// identical derivative, but the value is discontinuous).
///
/// All nine ops are tracked for kink proximity detection via
/// [`NonsmoothInfo::active_kinks`](crate::nonsmooth::NonsmoothInfo::active_kinks).
/// Use [`has_nontrivial_subdifferential`] to distinguish the subset that
/// contributes distinct limiting Jacobians for Clarke enumeration.
#[inline]
#[must_use]
pub fn is_nonsmooth(op: OpCode) -> bool {
    matches!(
        op,
        OpCode::Abs
            | OpCode::Min
            | OpCode::Max
            | OpCode::Signum
            | OpCode::Floor
            | OpCode::Ceil
            | OpCode::Round
            | OpCode::Trunc
            | OpCode::Fract
    )
}

/// Returns true if forced branch choices produce distinct partial derivatives.
///
/// For `Abs`, `Min`, `Max`, the two sides of the kink have different derivatives
/// (e.g., `abs` has slope +1 vs −1). For step functions (`Signum`, `Floor`, `Ceil`,
/// `Round`, `Trunc`), both sides have zero derivative — forced branches produce
/// identical partials, so enumerating them in Clarke Jacobian adds cost with no
/// information.
#[inline]
#[must_use]
pub fn has_nontrivial_subdifferential(op: OpCode) -> bool {
    matches!(op, OpCode::Abs | OpCode::Min | OpCode::Max)
}

/// Compute reverse-mode partials with a forced branch choice for nonsmooth ops.
///
/// For nonsmooth ops, uses the given `sign` to select which branch's derivative
/// to return, regardless of the actual operand values. For all other ops,
/// delegates to [`reverse_partials`].
///
/// - `Abs`: `sign >= 0` → `(+1, 0)`, `sign < 0` → `(-1, 0)`
/// - `Max`: `sign >= 0` → `(1, 0)` (first arg wins), `sign < 0` → `(0, 1)`
/// - `Min`: `sign >= 0` → `(1, 0)` (first arg wins), `sign < 0` → `(0, 1)`
/// - `Signum`, `Floor`, `Ceil`, `Round`, `Trunc`: `(0, 0)` regardless of sign
/// - `Fract`: `(1, 0)` regardless of sign (derivative is 1 on both sides)
#[inline]
pub fn forced_reverse_partials<T: Float>(op: OpCode, a: T, b: T, r: T, sign: i8) -> (T, T) {
    let zero = T::zero();
    let one = T::one();
    match op {
        OpCode::Abs => {
            if sign >= 0 {
                (one, zero)
            } else {
                (-one, zero)
            }
        }
        OpCode::Max | OpCode::Min => {
            if sign >= 0 {
                (one, zero)
            } else {
                (zero, one)
            }
        }
        OpCode::Signum | OpCode::Floor | OpCode::Ceil | OpCode::Round | OpCode::Trunc => {
            (zero, zero)
        }
        OpCode::Fract => {
            // fract(x) = x - floor(x); derivative is 1 on both sides of the
            // integer discontinuity.
            (one, zero)
        }
        _ => reverse_partials(op, a, b, r),
    }
}

/// Evaluate a single opcode in the forward direction.
///
/// Generic over `T: Float` so Phase 3 can call it with `Dual<F>`.
///
/// For binary ops, `a` and `b` are the two operand values.
/// For unary ops, `a` is the operand value and `b` is ignored
/// (except [`OpCode::Powi`] where `b` bits encode the `i32` exponent).
#[inline]
pub fn eval_forward<T: Float>(op: OpCode, a: T, b: T) -> T {
    match op {
        OpCode::Input | OpCode::Const => {
            // values are already set during tape setup
            unreachable!("Input/Const should not be re-evaluated via eval_forward")
        }

        // Binary arithmetic
        OpCode::Add => a + b,
        OpCode::Sub => a - b,
        OpCode::Mul => a * b,
        OpCode::Div => a / b,
        OpCode::Rem => a % b,
        OpCode::Powf => a.powf(b),
        OpCode::Atan2 => a.atan2(b),
        OpCode::Hypot => a.hypot(b),
        OpCode::Max => {
            if a >= b || b.is_nan() {
                a
            } else {
                b
            }
        }
        OpCode::Min => {
            if a <= b || b.is_nan() {
                a
            } else {
                b
            }
        }

        // Unary
        OpCode::Neg => -a,
        OpCode::Recip => a.recip(),
        OpCode::Sqrt => a.sqrt(),
        OpCode::Cbrt => a.cbrt(),
        OpCode::Powi => {
            let exp = powi_exp_decode_raw(b.to_u32().unwrap_or(0));
            a.powi(exp)
        }

        // Exp/Log
        OpCode::Exp => a.exp(),
        OpCode::Exp2 => a.exp2(),
        OpCode::ExpM1 => a.exp_m1(),
        OpCode::Ln => a.ln(),
        OpCode::Log2 => a.log2(),
        OpCode::Log10 => a.log10(),
        OpCode::Ln1p => a.ln_1p(),

        // Trig
        OpCode::Sin => a.sin(),
        OpCode::Cos => a.cos(),
        OpCode::Tan => a.tan(),
        OpCode::Asin => a.asin(),
        OpCode::Acos => a.acos(),
        OpCode::Atan => a.atan(),

        // Hyperbolic
        OpCode::Sinh => a.sinh(),
        OpCode::Cosh => a.cosh(),
        OpCode::Tanh => a.tanh(),
        OpCode::Asinh => a.asinh(),
        OpCode::Acosh => a.acosh(),
        OpCode::Atanh => a.atanh(),

        // Misc
        OpCode::Abs => a.abs(),
        OpCode::Signum => a.signum(),
        OpCode::Floor => a.floor(),
        OpCode::Ceil => a.ceil(),
        OpCode::Round => a.round(),
        OpCode::Trunc => a.trunc(),
        OpCode::Fract => a.fract(),

        OpCode::Custom => unreachable!("Custom ops are dispatched separately in the tape"),
    }
}

/// Compute reverse-mode partial derivatives for a single opcode.
///
/// Returns `(∂result/∂arg0, ∂result/∂arg1)`.
/// For unary ops the second partial is `T::zero()`.
///
/// `a`, `b` are the operand values (at recording or re-evaluation time).
/// `r` is the result value.
///
/// Generic over `T: Float` so Phase 3 can call it with `Dual<F>` for
/// forward-over-reverse.
#[inline]
pub fn reverse_partials<T: Float>(op: OpCode, a: T, b: T, r: T) -> (T, T) {
    let zero = T::zero();
    let one = T::one();
    match op {
        OpCode::Input | OpCode::Const => (zero, zero),

        // Binary
        OpCode::Add => (one, one),
        OpCode::Sub => (one, -one),
        OpCode::Mul => (b, a),
        OpCode::Div => {
            // d/da(a/b) = 1/b, d/db(a/b) = -a/b² = -(a/b)/b = -r/b
            // Using -r/b avoids squaring 1/b which overflows when |b| < ~1e-154
            let inv = one / b;
            (inv, -r * inv)
        }
        // a % b = a - b*trunc(a/b). Treating trunc as piecewise-constant (zero derivative a.e.):
        // d/da = 1, d/db = -trunc(a/b). Matches Rust's truncation-based remainder.
        OpCode::Rem => (one, -(a / b).trunc()),
        OpCode::Powf => {
            // d/da a^b = b * a^(b-1)
            // d/db a^b = a^b * ln(a)  (→ 0 when a^b = 0, since lim_{a→0+} a^b*ln(a) = 0 for b>0)
            if b == zero {
                // d/da(a^0) = 0, d/db(a^b)|_{b=0} = a^0 * ln(a) = ln(a) for a > 0
                let db = if a > zero { a.ln() } else { zero };
                (zero, db)
            } else {
                let da = if a == zero || r == zero || !a.is_finite() || !r.is_finite() {
                    // Direct powf avoids 0/0 when a=0, catches underflow when
                    // r=a^b underflows to 0 but a!=0, and avoids Inf/Inf = NaN
                    // when either a or r is infinite (e.g., Inf^2 = Inf, whose
                    // true derivative at Inf is Inf, not NaN).
                    b * a.powf(b - one)
                } else {
                    b * r / a
                };
                // For `a <= 0`, `a.ln()` is NaN (stdlib real-valued). A finite
                // `r` at `a < 0` means `b` was integer (else `r = NaN`); the
                // derivative w.r.t. `b` at an integer `b` is not classically
                // defined, and the convention here is 0 — matching the
                // forward-mode `Dual::powf` constant-integer fast path. This
                // also prevents NaN from contaminating a live tape slot for
                // `b` on reverse sweep.
                let db = if r == zero || a <= zero {
                    zero
                } else {
                    r * a.ln()
                };
                (da, db)
            }
        }
        // Atan2 and Hypot delegate to `kernels` so the overflow-safe factoring
        // (a/h)/h pattern stays the single source of truth across the AD types.
        OpCode::Atan2 => kernels::atan2_partials(a, b),
        OpCode::Hypot => kernels::hypot_partials(a, b, r),
        OpCode::Max => {
            if a >= b || b.is_nan() {
                (one, zero)
            } else {
                (zero, one)
            }
        }
        OpCode::Min => {
            if a <= b || b.is_nan() {
                (one, zero)
            } else {
                (zero, one)
            }
        }

        // Unary
        OpCode::Neg => (-one, zero),
        OpCode::Recip => {
            // d/da (1/a) = -1/a²
            let inv = one / a;
            (-inv * inv, zero)
        }
        // sqrt'(x) = 1/(2√x) and cbrt'(x) = 1/(3x^{2/3}) are mathematically
        // singular at x=0, producing IEEE inf. This is the correct answer.
        OpCode::Sqrt => {
            let two = one + one;
            (one / (two * r), zero)
        }
        OpCode::Cbrt => {
            let three = T::from(3.0).unwrap();
            (one / (three * r * r), zero)
        }
        OpCode::Powi => {
            let exp = powi_exp_decode_raw(b.to_u32().unwrap_or(0));
            if exp == 0 {
                (zero, zero) // d/dx(x^0) = 0
            } else if exp == i32::MIN {
                // exp - 1 would overflow i32; use r/a to avoid precision loss
                let n = T::from(exp).unwrap();
                (n * r / a, zero)
            } else {
                let n = T::from(exp).unwrap();
                (n * a.powi(exp - 1), zero)
            }
        }

        // Exp/Log. Domain-restricted ops emit a NaN partial strictly
        // outside their valid interval (a < 0 for Ln/Log2/Log10, a < -1
        // for Ln1p, |a| > 1 for Atanh) so a caller that supplied
        // out-of-domain inputs sees `(NaN, 0)` instead of a numerically
        // plausible but semantically meaningless finite partial
        // (e.g. `1/-1 = -1` for `Ln` at a = -1). Boundary values
        // (a = 0 for Ln, a = -1 for Ln1p, |a| = 1 for Atanh) are left
        // to the IEEE arithmetic — the partial evaluates to ±Inf, which
        // is the correct one-sided derivative limit.
        OpCode::Exp => (r, zero), // d/da e^a = e^a = r
        OpCode::Exp2 => (r * T::ln(T::from(2.0).unwrap()), zero),
        OpCode::ExpM1 => (r + one, zero), // d/da (e^a - 1) = e^a = r+1
        OpCode::Ln => {
            if a >= zero {
                (one / a, zero)
            } else {
                (T::nan(), zero)
            }
        }
        OpCode::Log2 => {
            if a >= zero {
                (one / (a * T::ln(T::from(2.0).unwrap())), zero)
            } else {
                (T::nan(), zero)
            }
        }
        OpCode::Log10 => {
            if a >= zero {
                (one / (a * T::ln(T::from(10.0).unwrap())), zero)
            } else {
                (T::nan(), zero)
            }
        }
        OpCode::Ln1p => {
            if a >= -one {
                (one / (one + a), zero)
            } else {
                (T::nan(), zero)
            }
        }

        // Trig
        OpCode::Sin => (a.cos(), zero),
        OpCode::Cos => (-a.sin(), zero),
        OpCode::Tan => {
            let c = a.cos();
            (one / (c * c), zero)
        }
        OpCode::Asin => (one / ((one - a) * (one + a)).sqrt(), zero),
        OpCode::Acos => (-one / ((one - a) * (one + a)).sqrt(), zero),
        OpCode::Atan => (kernels::atan_deriv(a), zero),

        // Hyperbolic
        OpCode::Sinh => (a.cosh(), zero),
        OpCode::Cosh => (a.sinh(), zero),
        OpCode::Tanh => {
            let c = a.cosh();
            (one / (c * c), zero)
        }
        OpCode::Asinh => (kernels::asinh_deriv(a), zero),
        OpCode::Acosh => (kernels::acosh_deriv(a), zero),
        OpCode::Atanh => {
            if a >= -one && a <= one {
                (one / ((one - a) * (one + a)), zero)
            } else {
                (T::nan(), zero)
            }
        }

        // Misc. `Abs` uses the symmetric midpoint 0 of the Clarke
        // subdifferential [-1, 1] at the kink; relying on `a.signum()`
        // leaks the sign bit of ±0 (`signum(+0) = +0`, `signum(-0) = -0`),
        // which makes algebraically equivalent tape positions report
        // different subgradients depending on how the zero was produced.
        OpCode::Abs => {
            if a == zero {
                (zero, zero)
            } else {
                (a.signum(), zero)
            }
        }
        OpCode::Signum | OpCode::Floor | OpCode::Ceil | OpCode::Round | OpCode::Trunc => {
            (zero, zero)
        }
        OpCode::Fract => (one, zero),

        OpCode::Custom => unreachable!("Custom ops are dispatched separately in the tape"),
    }
}

/// Decode a `powi` exponent directly from the raw `u32` in `arg_indices[1]`.
///
/// Avoids the float round-trip of `powi_exp_decode`, which silently fails
/// for f32 when the u32 encoding exceeds 2^24 (any negative exponent).
#[inline]
#[must_use]
pub fn powi_exp_decode_raw(b_idx: u32) -> i32 {
    b_idx as i32
}

/// Encode a `powi` exponent as a value that can be stored in `arg_indices[1]`.
///
/// This is a bit-preserving reinterpretation (`i32 as u32`), NOT a numeric
/// conversion. The round-trip `powi_exp_decode_raw(powi_exp_encode(n)) == n`
/// holds for all i32 values including negatives and `i32::MIN`.
#[inline]
#[must_use]
pub fn powi_exp_encode(exp: i32) -> u32 {
    exp as u32
}