blackscholes 0.24.0

//
// This source code resides at www.jaeckel.org/LetsBeRational.7z .
//
// ======================================================================================
// Copyright © 2013-2017 Peter Jäckel.
//
// Permission to use, copy, modify, and distribute this software is freely granted,
// provided that this notice is preserved.
//
// WARRANTY DISCLAIMER
// The Software is provided "as is" without warranty of any kind, either express or implied,
// including without limitation any implied warranties of condition, uninterrupted use,
// merchantability, fitness for a particular purpose, or non-infringement.
// ======================================================================================
//

#include "lets_be_rational.h"

// To cross-compile on a command line, you could just use something like
//
//   i686-w64-mingw32-g++ -w -fpermissive -shared -DNDEBUG -O3 erf_cody.cpp rationalcubic.cpp normaldistribution.cpp lets_be_rational.cpp xlcall.cpp excel_registration.cpp xlcall32.lib -o lets_be_rational.xll -static-libstdc++ -static-libgcc -s
//
// To compile into a shared library on non-Windows systems, you could try
//
//   g++ -fPIC -shared -DNDEBUG -Ofast erf_cody.cpp rationalcubic.cpp normaldistribution.cpp lets_be_rational.cpp -o lets_be_rational.so
//

#if defined(_MSC_VER)
#define NOMINMAX // to suppress MSVC's definitions of min() and max()
// These four pragmas are the equivalent to /fp:fast.
#pragma float_control(except, off)
#pragma float_control(precise, off)
#pragma fp_contract(on)
#pragma fenv_access(off)
#endif

#include "normaldistribution.h"
#include "rationalcubic.h"
#include <algorithm>
#include <cmath>
#include <float.h>
#if defined(_WIN32) || defined(_WIN64)
#include <windows.h>
#endif

#define TWO_PI 6.283185307179586476925286766559005768394338798750
#define SQRT_PI_OVER_TWO 1.253314137315500251207882642405522626503493370305 // sqrt(pi/2) to avoid misinterpretation.
#define SQRT_THREE 1.732050807568877293527446341505872366942805253810
#define SQRT_ONE_OVER_THREE 0.577350269189625764509148780501957455647601751270
#define TWO_PI_OVER_SQRT_TWENTY_SEVEN 1.209199576156145233729385505094770488189377498728 // 2*pi/sqrt(27)
#define PI_OVER_SIX 0.523598775598298873077107230546583814032861566563

namespace {
static const double SQRT_DBL_EPSILON = sqrt(DBL_EPSILON);
static const double FOURTH_ROOT_DBL_EPSILON = sqrt(SQRT_DBL_EPSILON);
static const double EIGHTH_ROOT_DBL_EPSILON = sqrt(FOURTH_ROOT_DBL_EPSILON);
static const double SIXTEENTH_ROOT_DBL_EPSILON = sqrt(EIGHTH_ROOT_DBL_EPSILON);
static const double SQRT_DBL_MIN = sqrt(DBL_MIN);
static const double SQRT_DBL_MAX = sqrt(DBL_MAX);

// Set this to 0 if you want positive results for (positive) denormalised inputs, else to DBL_MIN.
// Note that you cannot achieve full machine accuracy from denormalised inputs!
static const double DENORMALISATION_CUTOFF = 0;

static const double VOLATILITY_VALUE_TO_SIGNAL_PRICE_IS_BELOW_INTRINSIC = -DBL_MAX;
static const double VOLATILITY_VALUE_TO_SIGNAL_PRICE_IS_ABOVE_MAXIMUM = DBL_MAX;

inline bool is_below_horizon(double x) {
    return fabs(x) < DENORMALISATION_CUTOFF;
} // This weeds out denormalised (a.k.a. 'subnormal') numbers.

// See https://www.kernel.org/doc/Documentation/atomic_ops.txt for further details on this simplistic implementation of an atomic flag that is *not* volatile.
typedef struct {
#if defined(_MSC_VER) || defined(_WIN32) || defined(_WIN64)
    long data;
#else
    int data;
#endif
} atomic_t;

static atomic_t implied_volatility_maximum_iterations = {2}; // (DBL_DIG*20)/3 ≈ 100 . Only needed when the iteration effectively alternates Householder/Halley/Newton steps and binary nesting due to roundoff truncation.

#ifdef ENABLE_SWITCHING_THE_OUTPUT_TO_ITERATION_COUNT
static atomic_t implied_volatility_output_type = {0};
inline double implied_volatility_output(int count, double volatility) {
    return implied_volatility_output_type.data > 0 ? count : volatility;
}
#else
inline double implied_volatility_output(int count, double volatility) {
    return volatility;
}
#endif

#ifdef ENABLE_CHANGING_THE_HOUSEHOLDER_METHOD_ORDER
static atomic_t implied_volatility_householder_method_order = {4};
inline double householder_factor(double newton, double halley, double hh3) {
    return implied_volatility_householder_method_order.data > 3 ? (1 + 0.5 * halley * newton) / (1 + newton * (halley + hh3 * newton / 6)) : (implied_volatility_householder_method_order.data > 2 ? 1 / (1 + 0.5 * halley * newton) : 1);
}
#else
inline double householder_factor(double newton, double halley, double hh3) {
    return (1 + 0.5 * halley * newton) / (1 + newton * (halley + hh3 * newton / 6));
}
#endif

} // namespace

EXPORT_EXTERN_C double set_implied_volatility_maximum_iterations(double t) {
    int i = (int)t;
    if (i >= 0) {
#if defined(_MSC_VER) || defined(_WIN32) || defined(_WIN64)
        InterlockedExchange(&(implied_volatility_maximum_iterations.data), i);
#elif defined(__x86__) || defined(__x86_64__) || defined(__arm64__)
        implied_volatility_maximum_iterations.data = i;
#else
#error Atomic operations not implemented for this platform.
#endif
    }
    return implied_volatility_maximum_iterations.data;
}

#ifdef ENABLE_SWITCHING_THE_OUTPUT_TO_ITERATION_COUNT
EXPORT_EXTERN_C double set_implied_volatility_output_type(double t) {
    int i = (int)t;
#if defined(_MSC_VER) || defined(_WIN32) || defined(_WIN64)
    InterlockedExchange(&(implied_volatility_output_type.data), i);
#elif defined(__x86__) || defined(__x86_64__) || defined(__arm64__)
    implied_volatility_output_type.data = i;
#else
#error Atomic operations not implemented for this platform.
#endif
    return implied_volatility_output_type.data;
}
#endif

#ifdef ENABLE_CHANGING_THE_HOUSEHOLDER_METHOD_ORDER
EXPORT_EXTERN_C double set_implied_volatility_householder_method_order(double t) {
    int i = (int)t;
    if (i >= 0) {
#if defined(_MSC_VER) || defined(_WIN32) || defined(_WIN64)
        InterlockedExchange(&(implied_volatility_householder_method_order.data), i);
#elif defined(__x86__) || defined(__x86_64__) || defined(__arm64__)
        implied_volatility_householder_method_order.data = i;
#else
#error Atomic operations not implemented for this platform.
#endif
    }
    return implied_volatility_householder_method_order.data;
}
#endif

double normalised_intrinsic(double x, double q /* q=±1 */) {
    if (q * x <= 0)
        return 0;
    const double x2 = x * x;
    if (x2 < 98 * FOURTH_ROOT_DBL_EPSILON) // The factor 98 is computed from last coefficient: √√92897280 = 98.1749
        return fabs(std::max((q < 0 ? -1 : 1) * x * (1 + x2 * ((1.0 / 24.0) + x2 * ((1.0 / 1920.0) + x2 * ((1.0 / 322560.0) + (1.0 / 92897280.0) * x2)))), 0.0));
    const double b_max = exp(0.5 * x), one_over_b_max = 1 / b_max;
    return fabs(std::max((q < 0 ? -1 : 1) * (b_max - one_over_b_max), 0.));
}

double normalised_intrinsic_call(double x) {
    return normalised_intrinsic(x, 1);
}

// Asymptotic expansion of
//
//              b  =  Φ(h+t)·exp(x/2) - Φ(h-t)·exp(-x/2)
// with
//              h  =  x/s   and   t  =  s/2
// which makes
//              b  =  Φ(h+t)·exp(h·t) - Φ(h-t)·exp(-h·t)
//
//                    exp(-(h²+t²)/2)
//                 =  ---------------  ·  [ Y(h+t) - Y(h-t) ]
//                        √(2π)
// with
//           Y(z) := Φ(z)/φ(z)
//
// for large negative (t-|h|) by the aid of Abramowitz & Stegun (26.2.12) where Φ(z) = φ(z)/|z|·[1-1/z^2+...].
// We define
//                     r
//         A(h,t) :=  --- · [ Y(h+t) - Y(h-t) ]
//                     t
//
// with r := (h+t)·(h-t) and give an expansion for A(h,t) in q:=(h/r)² expressed in terms of e:=(t/h)² .
double asymptotic_expansion_of_normalised_black_call(double h, double t) {
    const double e = (t / h) * (t / h), r = ((h + t) * (h - t)), q = (h / r) * (h / r);
    // 17th order asymptotic expansion of A(h,t) in q, sufficient for Φ(h) [and thus y(h)] to have relative accuracy of 1.64E-16 for h <= η  with  η:=-10.
    const double asymptotic_expansion_sum = (2.0 + q * (-6.0E0 - 2.0 * e + 3.0 * q * (1.0E1 + e * (2.0E1 + 2.0 * e) + 5.0 * q * (-1.4E1 + e * (-7.0E1 + e * (-4.2E1 - 2.0 * e)) + 7.0 * q * (1.8E1 + e * (1.68E2 + e * (2.52E2 + e * (7.2E1 + 2.0 * e))) + 9.0 * q * (-2.2E1 + e * (-3.3E2 + e * (-9.24E2 + e * (-6.6E2 + e * (-1.1E2 - 2.0 * e)))) + 1.1E1 * q * (2.6E1 + e * (5.72E2 + e * (2.574E3 + e * (3.432E3 + e * (1.43E3 + e * (1.56E2 + 2.0 * e))))) + 1.3E1 * q * (-3.0E1 + e * (-9.1E2 + e * (-6.006E3 + e * (-1.287E4 + e * (-1.001E4 + e * (-2.73E3 + e * (-2.1E2 - 2.0 * e)))))) + 1.5E1 * q * (3.4E1 + e * (1.36E3 + e * (1.2376E4 + e * (3.8896E4 + e * (4.862E4 + e * (2.4752E4 + e * (4.76E3 + e * (2.72E2 + 2.0 * e))))))) + 1.7E1 * q * (-3.8E1 + e * (-1.938E3 + e * (-2.3256E4 + e * (-1.00776E5 + e * (-1.84756E5 + e * (-1.51164E5 + e * (-5.4264E4 + e * (-7.752E3 + e * (-3.42E2 - 2.0 * e)))))))) + 1.9E1 * q * (4.2E1 + e * (2.66E3 + e * (4.0698E4 + e * (2.3256E5 + e * (5.8786E5 + e * (7.05432E5 + e * (4.0698E5 + e * (1.08528E5 + e * (1.197E4 + e * (4.2E2 + 2.0 * e))))))))) + 2.1E1 * q * (-4.6E1 + e * (-3.542E3 + e * (-6.7298E4 + e * (-4.90314E5 + e * (-1.63438E6 + e * (-2.704156E6 + e * (-2.288132E6 + e * (-9.80628E5 + e * (-2.01894E5 + e * (-1.771E4 + e * (-5.06E2 - 2.0 * e)))))))))) + 2.3E1 * q * (5.0E1 + e * (4.6E3 + e * (1.0626E5 + e * (9.614E5 + e * (4.08595E6 + e * (8.9148E6 + e * (1.04006E7 + e * (6.53752E6 + e * (2.16315E6 + e * (3.542E5 + e * (2.53E4 + e * (6.0E2 + 2.0 * e))))))))))) + 2.5E1 * q * (-5.4E1 + e * (-5.85E3 + e * (-1.6146E5 + e * (-1.77606E6 + e * (-9.37365E6 + e * (-2.607579E7 + e * (-4.01166E7 + e * (-3.476772E7 + e * (-1.687257E7 + e * (-4.44015E6 + e * (-5.9202E5 + e * (-3.51E4 + e * (-7.02E2 - 2.0 * e)))))))))))) + 2.7E1 * q * (5.8E1 + e * (7.308E3 + e * (2.3751E5 + e * (3.12156E6 + e * (2.003001E7 + e * (6.919458E7 + e * (1.3572783E8 + e * (1.5511752E8 + e * (1.0379187E8 + e * (4.006002E7 + e * (8.58429E6 + e * (9.5004E5 + e * (4.7502E4 + e * (8.12E2 + 2.0 * e))))))))))))) + 2.9E1 * q * (-6.2E1 + e * (-8.99E3 + e * (-3.39822E5 + e * (-5.25915E6 + e * (-4.032015E7 + e * (-1.6934463E8 + e * (-4.1250615E8 + e * (-6.0108039E8 + e * (-5.3036505E8 + e * (-2.8224105E8 + e * (-8.870433E7 + e * (-1.577745E7 + e * (-1.472562E6 + e * (-6.293E4 + e * (-9.3E2 - 2.0 * e)))))))))))))) + 3.1E1 * q * (6.6E1 + e * (1.0912E4 + e * (4.74672E5 + e * (8.544096E6 + e * (7.71342E7 + e * (3.8707344E8 + e * (1.14633288E9 + e * (2.07431664E9 + e * (2.33360622E9 + e * (1.6376184E9 + e * (7.0963464E8 + e * (1.8512208E8 + e * (2.7768312E7 + e * (2.215136E6 + e * (8.184E4 + e * (1.056E3 + 2.0 * e))))))))))))))) + 3.3E1 * (-7.0E1 + e * (-1.309E4 + e * (-6.49264E5 + e * (-1.344904E7 + e * (-1.4121492E8 + e * (-8.344518E8 + e * (-2.9526756E9 + e * (-6.49588632E9 + e * (-9.0751353E9 + e * (-8.1198579E9 + e * (-4.6399188E9 + e * (-1.6689036E9 + e * (-3.67158792E8 + e * (-4.707164E7 + e * (-3.24632E6 + e * (-1.0472E5 + e * (-1.19E3 - 2.0 * e))))))))))))))))) * q)))))))))))))))));
    const double b = ONE_OVER_SQRT_TWO_PI * exp((-0.5 * (h * h + t * t))) * (t / r) * asymptotic_expansion_sum;
    return fabs(std::max(b, 0.));
}

namespace { /* η */
static const double asymptotic_expansion_accuracy_threshold = -10;
}

double normalised_black_call_using_erfcx(double h, double t) {
    // Given h = x/s and t = s/2, the normalised Black function can be written as
    //
    //     b(x,s)  =  Φ(x/s+s/2)·exp(x/2)  -   Φ(x/s-s/2)·exp(-x/2)
    //             =  Φ(h+t)·exp(h·t)      -   Φ(h-t)·exp(-h·t) .                     (*)
    //
    // It is mentioned in section 4 (and discussion of figures 2 and 3) of George Marsaglia's article "Evaluating the
    // Normal Distribution" (available at http://www.jstatsoft.org/v11/a05/paper) that the error of any cumulative normal
    // function Φ(z) is dominated by the hardware (or compiler implementation) accuracy of exp(-z²/2) which is not
    // reliably more than 14 digits when z is large. The accuracy of Φ(z) typically starts coming down to 14 digits when
    // z is around -8. For the (normalised) Black function, as above in (*), this means that we are subtracting two terms
    // that are each products of terms with about 14 digits of accuracy. The net result, in each of the products, is even
    // less accuracy, and then we are taking the difference of these terms, resulting in even less accuracy. When we are
    // using the asymptotic expansion asymptotic_expansion_of_normalised_black_call() invoked in the second branch at the
    // beginning of this function, we are using only *one* exponential instead of 4, and this improves accuracy. It
    // actually improves it a bit more than you would expect from the above logic, namely, almost the full two missing
    // digits (in 64 bit IEEE floating point).  Unfortunately, going higher order in the asymptotic expansion will not
    // enable us to gain more accuracy (by extending the range in which we could use the expansion) since the asymptotic
    // expansion, being a divergent series, can never gain 16 digits of accuracy for z=-8 or just below. The best you can
    // get is about 15 digits (just), for about 35 terms in the series (26.2.12), which would result in an prohibitively
    // long expression in function asymptotic expansion asymptotic_expansion_of_normalised_black_call(). In this last branch,
    // here, we therefore take a different tack as follows.
    //     The "scaled complementary error function" is defined as erfcx(z) = exp(z²)·erfc(z). Cody's implementation of this
    // function as published in "Rational Chebyshev approximations for the error function", W. J. Cody, Math. Comp., 1969, pp.
    // 631-638, uses rational functions that theoretically approximates erfcx(x) to at least 18 significant decimal digits,
    // *without* the use of the exponential function when x>4, which translates to about z<-5.66 in Φ(z). To make use of it,
    // we write
    //             Φ(z) = exp(-z²/2)·erfcx(-z/√2)/2
    //
    // to transform the normalised black function to
    //
    //   b   =  ½ · exp(-½(h²+t²)) · [ erfcx(-(h+t)/√2) -  erfcx(-(h-t)/√2) ]
    //
    // which now involves only one exponential, instead of three, when |h|+|t| > 5.66 , and the difference inside the
    // square bracket is between the evaluation of two rational functions, which, typically, according to Marsaglia,
    // retains the full 16 digits of accuracy (or just a little less than that).
    //
    const double b = 0.5 * exp(-0.5 * (h * h + t * t)) * (erfcx_cody(-ONE_OVER_SQRT_TWO * (h + t)) - erfcx_cody(-ONE_OVER_SQRT_TWO * (h - t)));
    return fabs(std::max(b, 0.0));
}

// Calculation of
//
//              b  =  Φ(h+t)·exp(h·t) - Φ(h-t)·exp(-h·t)
//
//                    exp(-(h²+t²)/2)
//                 =  --------------- ·  [ Y(h+t) - Y(h-t) ]
//                        √(2π)
// with
//           Y(z) := Φ(z)/φ(z)
//
// using an expansion of Y(h+t)-Y(h-t) for small t to twelvth order in t.
// Theoretically accurate to (better than) precision  ε = 2.23E-16  when  h<=0  and  t < τ  with  τ := 2·ε^(1/16) ≈ 0.21.
// The main bottleneck for precision is the coefficient a:=1+h·Y(h) when |h|>1 .
double small_t_expansion_of_normalised_black_call(double h, double t) {
    // Y(h) := Φ(h)/φ(h) = √(π/2)·erfcx(-h/√2)
    // a := 1+h·Y(h)  --- Note that due to h<0, and h·Y(h) -> -1 (from above) as h -> -∞, we also have that a>0 and a -> 0 as h -> -∞
    // w := t² , h2 := h²
    const double a = 1 + h * (0.5 * SQRT_TWO_PI) * erfcx_cody(-ONE_OVER_SQRT_TWO * h), w = t * t, h2 = h * h;
    const double expansion = 2 * t * (a + w * ((-1 + 3 * a + a * h2) / 6 + w * ((-7 + 15 * a + h2 * (-1 + 10 * a + a * h2)) / 120 + w * ((-57 + 105 * a + h2 * (-18 + 105 * a + h2 * (-1 + 21 * a + a * h2))) / 5040 + w * ((-561 + 945 * a + h2 * (-285 + 1260 * a + h2 * (-33 + 378 * a + h2 * (-1 + 36 * a + a * h2)))) / 362880 + w * ((-6555 + 10395 * a + h2 * (-4680 + 17325 * a + h2 * (-840 + 6930 * a + h2 * (-52 + 990 * a + h2 * (-1 + 55 * a + a * h2))))) / 39916800 + ((-89055 + 135135 * a + h2 * (-82845 + 270270 * a + h2 * (-20370 + 135135 * a + h2 * (-1926 + 25740 * a + h2 * (-75 + 2145 * a + h2 * (-1 + 78 * a + a * h2)))))) * w) / 6227020800.0))))));
    const double b = ONE_OVER_SQRT_TWO_PI * exp((-0.5 * (h * h + t * t))) * expansion;
    return fabs(std::max(b, 0.0));
}

namespace { /* τ */
static const double small_t_expansion_of_normalised_black_threshold = 2 * SIXTEENTH_ROOT_DBL_EPSILON;
}

//     b(x,s)  =  Φ(x/s+s/2)·exp(x/2)  -   Φ(x/s-s/2)·exp(-x/2)
//             =  Φ(h+t)·exp(x/2)      -   Φ(h-t)·exp(-x/2)
// with
//              h  =  x/s   and   t  =  s/2
double normalised_black_call_using_norm_cdf(double x, double s) {
    const double h = x / s, t = 0.5 * s, b_max = exp(0.5 * x), b = norm_cdf(h + t) * b_max - norm_cdf(h - t) / b_max;
    return fabs(std::max(b, 0.0));
}

//
// Introduced on 2017-02-18
//
//     b(x,s)  =  Φ(x/s+s/2)·exp(x/2)  -   Φ(x/s-s/2)·exp(-x/2)
//             =  Φ(h+t)·exp(x/2)      -   Φ(h-t)·exp(-x/2)
//             =  ½ · exp(-u²-v²) · [ erfcx(u-v) -  erfcx(u+v) ]
//             =  ½ · [ exp(x/2)·erfc(u-v)     -  exp(-x/2)·erfc(u+v)    ]
//             =  ½ · [ exp(x/2)·erfc(u-v)     -  exp(-u²-v²)·erfcx(u+v) ]
//             =  ½ · [ exp(-u²-v²)·erfcx(u-v) -  exp(-x/2)·erfc(u+v)    ]
// with
//              h  =  x/s ,       t  =  s/2 ,
// and
//              u  = -h/√2  and   v  =  t/√2 .
//
// Cody's erfc() and erfcx() functions each, for some values of their argument, involve the evaluation
// of the exponential function exp(). The normalised Black function requires additional evaluation(s)
// of the exponential function irrespective of which of the above formulations is used. However, the total
// number of exponential function evaluations can be minimised by a judicious choice of one of the above
// formulations depending on the input values and the branch logic in Cody's erfc() and erfcx().
//
double normalised_black_call_with_optimal_use_of_codys_functions(double x, double s) {
    const double codys_threshold = 0.46875, h = x / s, t = 0.5 * s, q1 = -ONE_OVER_SQRT_TWO * (h + t), q2 = -ONE_OVER_SQRT_TWO * (h - t);
    double two_b;
    if (q1 < codys_threshold)
        if (q2 < codys_threshold)
            two_b = exp(0.5 * x) * erfc_cody(q1) - exp(-0.5 * x) * erfc_cody(q2);
        else
            two_b = exp(0.5 * x) * erfc_cody(q1) - exp(-0.5 * (h * h + t * t)) * erfcx_cody(q2);
    else if (q2 < codys_threshold)
        two_b = exp(-0.5 * (h * h + t * t)) * erfcx_cody(q1) - exp(-0.5 * x) * erfc_cody(q2);
    else
        two_b = exp(-0.5 * (h * h + t * t)) * (erfcx_cody(q1) - erfcx_cody(q2));
    return fabs(std::max(0.5 * two_b, 0.0));
}

EXPORT_EXTERN_C double normalised_black_call(double x, double s) {
    if (x > 0)
        return normalised_intrinsic_call(x) + normalised_black_call(-x, s); // In the money.
    if (s <= fabs(x) * DENORMALISATION_CUTOFF)
        return normalised_intrinsic_call(x); // sigma=0 -> intrinsic value.
    // Denote h := x/s and t := s/2.
    // We evaluate the condition |h|>|η|, i.e., h<η  &&  t < τ+|h|-|η|  avoiding any divisions by s , where η = asymptotic_expansion_accuracy_threshold  and τ = small_t_expansion_of_normalised_black_threshold .
    if (x < s * asymptotic_expansion_accuracy_threshold && 0.5 * s * s + x < s * (small_t_expansion_of_normalised_black_threshold + asymptotic_expansion_accuracy_threshold))
        return asymptotic_expansion_of_normalised_black_call(x / s, 0.5 * s);
    if (0.5 * s < small_t_expansion_of_normalised_black_threshold)
        return small_t_expansion_of_normalised_black_call(x / s, 0.5 * s);
#ifdef DO_NOT_OPTIMISE_NORMALISED_BLACK_IN_REGIONS_3_AND_4_FOR_CODYS_FUNCTIONS
    // When b is more than, say, about 85% of b_max=exp(x/2), then b is dominated by the first of the two terms in the Black formula, and we retain more accuracy by not attempting to combine the two terms in any way.
    // We evaluate the condition h+t>0.85  avoiding any divisions by s.
    if (x + 0.5 * s * s > s * 0.85)
        return normalised_black_call_using_norm_cdf(x, s);
    return normalised_black_call_using_erfcx(x / s, 0.5 * s);
#else
    return normalised_black_call_with_optimal_use_of_codys_functions(x, s);
#endif
}

inline double square(double x) {
    return x * x;
}

EXPORT_EXTERN_C double normalised_vega(double x, double s) {
    const double ax = fabs(x);
    return (ax <= 0) ? ONE_OVER_SQRT_TWO_PI * exp(-0.125 * s * s) : ((s <= 0 || s <= ax * SQRT_DBL_MIN) ? 0 : ONE_OVER_SQRT_TWO_PI * exp(-0.5 * (square(x / s) + square(0.5 * s))));
}

EXPORT_EXTERN_C double normalised_black(double x, double s, double q /* q=±1 */) {
    return normalised_black_call(q < 0 ? -x : x, s); /* Reciprocal-strike call-put equivalence */
}

EXPORT_EXTERN_C double black(double F, double K, double sigma, double T, double q /* q=±1 */) {
    const double intrinsic = fabs(std::max((q < 0 ? K - F : F - K), 0.0));
    // Map in-the-money to out-of-the-money
    if (q * (F - K) > 0)
        return intrinsic + black(F, K, sigma, T, -q);
    return std::max(intrinsic, (sqrt(F) * sqrt(K)) * normalised_black(log(F / K), sigma * sqrt(T), q));
}

#ifdef COMPUTE_LOWER_MAP_DERIVATIVES_INDIVIDUALLY
double f_lower_map(const double x, const double s) {
    if (is_below_horizon(x))
        return 0;
    if (is_below_horizon(s))
        return 0;
    const double z = SQRT_ONE_OVER_THREE * fabs(x) / s, Phi = norm_cdf(-z);
    return TWO_PI_OVER_SQRT_TWENTY_SEVEN * fabs(x) * (Phi * Phi * Phi);
}
double d_f_lower_map_d_beta(const double x, const double s) {
    if (is_below_horizon(s))
        return 1;
    const double z = SQRT_ONE_OVER_THREE * fabs(x) / s, y = z * z, Phi = norm_cdf(-z);
    return TWO_PI * y * (Phi * Phi) * exp(y + 0.125 * s * s);
}
double d2_f_lower_map_d_beta2(const double x, const double s) {
    const double ax = fabs(x), z = SQRT_ONE_OVER_THREE * ax / s, y = z * z, s2 = s * s, Phi = norm_cdf(-z), phi = norm_pdf(z);
    return PI_OVER_SIX * y / (s2 * s) * Phi * (8 * SQRT_THREE * s * ax + (3 * s2 * (s2 - 8) - 8 * x * x) * Phi / phi) * exp(2 * y + 0.25 * s2);
}
void compute_f_lower_map_and_first_two_derivatives(const double x, const double s, double& f, double& fp, double& fpp) {
    f = f_lower_map(x, s);
    fp = d_f_lower_map_d_beta(x, s);
    fpp = d2_f_lower_map_d_beta2(x, s);
}
#else
void compute_f_lower_map_and_first_two_derivatives(const double x, const double s, double& f, double& fp, double& fpp) {
    const double ax = fabs(x), z = SQRT_ONE_OVER_THREE * ax / s, y = z * z, s2 = s * s, Phi = norm_cdf(-z), phi = norm_pdf(z);
    fpp = PI_OVER_SIX * y / (s2 * s) * Phi * (8 * SQRT_THREE * s * ax + (3 * s2 * (s2 - 8) - 8 * x * x) * Phi / phi) * exp(2 * y + 0.25 * s2);
    if (is_below_horizon(s)) {
        fp = 1;
        f = 0;
    } else {
        const double Phi2 = Phi * Phi;
        fp = TWO_PI * y * Phi2 * exp(y + 0.125 * s * s);
        if (is_below_horizon(x))
            f = 0;
        else
            f = TWO_PI_OVER_SQRT_TWENTY_SEVEN * ax * (Phi2 * Phi);
    }
}
#endif

double inverse_f_lower_map(const double x, const double f) {
    return is_below_horizon(f) ? 0 : fabs(x / (SQRT_THREE * inverse_norm_cdf(std::pow(f / (TWO_PI_OVER_SQRT_TWENTY_SEVEN * fabs(x)), 1. / 3.))));
}

#ifdef COMPUTE_UPPER_MAP_DERIVATIVES_INDIVIDUALLY
double f_upper_map(const double s) {
    return norm_cdf(-0.5 * s);
}
double d_f_upper_map_d_beta(const double x, const double s) {
    return is_below_horizon(x) ? -0.5 : -0.5 * exp(0.5 * square(x / s));
}
double d2_f_upper_map_d_beta2(const double x, const double s) {
    if (is_below_horizon(x))
        return 0;
    const double w = square(x / s);
    return SQRT_PI_OVER_TWO * exp(w + 0.125 * s * s) * w / s;
}
void compute_f_upper_map_and_first_two_derivatives(const double x, const double s, double& f, double& fp, double& fpp) {
    f = f_upper_map(s);
    fp = d_f_upper_map_d_beta(x, s);
    fpp = d2_f_upper_map_d_beta2(x, s);
}
#else
void compute_f_upper_map_and_first_two_derivatives(const double x, const double s, double& f, double& fp, double& fpp) {
    f = norm_cdf(-0.5 * s);
    if (is_below_horizon(x)) {
        fp = -0.5;
        fpp = 0;
    } else {
        const double w = square(x / s);
        fp = -0.5 * exp(0.5 * w);
        fpp = SQRT_PI_OVER_TWO * exp(w + 0.125 * s * s) * w / s;
    }
}
#endif

double inverse_f_upper_map(double f) {
    return -2. * inverse_norm_cdf(f);
}

// See http://en.wikipedia.org/wiki/Householder%27s_method for a detailed explanation of the third order Householder iteration.
//
// Given the objective function g(s) whose root x such that 0 = g(s) we seek, iterate
//
//     s_n+1  =  s_n  -  (g/g') · [ 1 - (g''/g')·(g/g') ] / [ 1 - (g/g')·( (g''/g') - (g'''/g')·(g/g')/6 ) ]
//
// Denoting  newton:=-(g/g'), halley:=(g''/g'), and hh3:=(g'''/g'), this reads
//
//     s_n+1  =  s_n  +  newton · [ 1 + halley·newton/2 ] / [ 1 + newton·( halley + hh3·newton/6 ) ]
//
//
// NOTE that this function returns 0 when beta<intrinsic without any safety checks.
//
double unchecked_normalised_implied_volatility_from_a_transformed_rational_guess_with_limited_iterations(double beta, double x, double q /* q=±1 */, int N) {
    // Subtract intrinsic.
    if (q * x > 0) {
        beta = fabs(std::max(beta - normalised_intrinsic(x, q), 0.));
        q = -q;
    }
    // Map puts to calls
    if (q < 0) {
        x = -x;
        q = -q;
    }
    if (beta <= 0) // For negative or zero prices we return 0.
        return implied_volatility_output(0, 0);
    if (beta < DENORMALISATION_CUTOFF) // For positive but denormalised (a.k.a. 'subnormal') prices, we return 0 since it would be impossible to converge to full machine accuracy anyway.
        return implied_volatility_output(0, 0);
    const double b_max = exp(0.5 * x);
    if (beta >= b_max)
        return implied_volatility_output(0, VOLATILITY_VALUE_TO_SIGNAL_PRICE_IS_ABOVE_MAXIMUM);
    int iterations = 0, direction_reversal_count = 0;
    double f = -DBL_MAX, s = -DBL_MAX, ds = s, ds_previous = 0, s_left = DBL_MIN, s_right = DBL_MAX;
    // The temptation is great to use the optimised form b_c = exp(x/2)/2-exp(-x/2)·Phi(sqrt(-2·x)) but that would require implementing all of the above types of round-off and over/underflow handling for this expression, too.
    const double s_c = sqrt(fabs(2 * x)), b_c = normalised_black_call(x, s_c), v_c = normalised_vega(x, s_c);
    // Four branches.
    if (beta < b_c) {
        const double s_l = s_c - b_c / v_c, b_l = normalised_black_call(x, s_l);
        if (beta < b_l) {
            double f_lower_map_l, d_f_lower_map_l_d_beta, d2_f_lower_map_l_d_beta2;
            compute_f_lower_map_and_first_two_derivatives(x, s_l, f_lower_map_l, d_f_lower_map_l_d_beta, d2_f_lower_map_l_d_beta2);
            const double r_ll = convex_rational_cubic_control_parameter_to_fit_second_derivative_at_right_side(0., b_l, 0., f_lower_map_l, 1., d_f_lower_map_l_d_beta, d2_f_lower_map_l_d_beta2, true);
            f = rational_cubic_interpolation(beta, 0., b_l, 0., f_lower_map_l, 1., d_f_lower_map_l_d_beta, r_ll);
            if (!(f > 0)) { // This can happen due to roundoff truncation for extreme values such as |x|>500.
                // We switch to quadratic interpolation using f(0)≡0, f(b_l), and f'(0)≡1 to specify the quadratic.
                const double t = beta / b_l;
                f = (f_lower_map_l * t + b_l * (1 - t)) * t;
            }
            s = inverse_f_lower_map(x, f);
            s_right = s_l;
            //
            // In this branch, which comprises the lowest segment, the objective function is
            //     g(s) = 1/ln(b(x,s)) - 1/ln(beta)
            //          ≡ 1/ln(b(s)) - 1/ln(beta)
            // This makes
            //              g'               =   -b'/(b·ln(b)²)
            //              newton = -g/g'   =   (ln(beta)-ln(b))·ln(b)/ln(beta)·b/b'
            //              halley = g''/g'  =   b''/b'  -  b'/b·(1+2/ln(b))
            //              hh3    = g'''/g' =   b'''/b' +  2(b'/b)²·(1+3/ln(b)·(1+1/ln(b)))  -  3(b''/b)·(1+2/ln(b))
            //
            // The Householder(3) iteration is
            //     s_n+1  =  s_n  +  newton · [ 1 + halley·newton/2 ] / [ 1 + newton·( halley + hh3·newton/6 ) ]
            //
            for (; iterations < N && fabs(ds) > DBL_EPSILON * s; ++iterations) {
                if (ds * ds_previous < 0)
                    ++direction_reversal_count;
                if (iterations > 0 && (3 == direction_reversal_count || !(s > s_left && s < s_right))) {
                    // If looping inefficently, or the forecast step takes us outside the bracket, or onto its edges, switch to binary nesting.
                    // NOTE that this can only really happen for very extreme values of |x|, such as |x| = |ln(F/K)| > 500.
                    s = 0.5 * (s_left + s_right);
                    if (s_right - s_left <= DBL_EPSILON * s)
                        break;
                    direction_reversal_count = 0;
                    ds = 0;
                }
                ds_previous = ds;
                const double b = normalised_black_call(x, s), bp = normalised_vega(x, s);
                if (b > beta && s < s_right)
                    s_right = s;
                else if (b < beta && s > s_left)
                    s_left = s;        // Tighten the bracket if applicable.
                if (b <= 0 || bp <= 0) // Numerical underflow. Switch to binary nesting for this iteration.
                    ds = 0.5 * (s_left + s_right) - s;
                else {
                    const double ln_b = log(b), ln_beta = log(beta), bpob = bp / b, h = x / s, b_halley = h * h / s - s / 4, newton = (ln_beta - ln_b) * ln_b / ln_beta / bpob, halley = b_halley - bpob * (1 + 2 / ln_b);
                    const double b_hh3 = b_halley * b_halley - 3 * square(h / s) - 0.25, hh3 = b_hh3 + 2 * square(bpob) * (1 + 3 / ln_b * (1 + 1 / ln_b)) - 3 * b_halley * bpob * (1 + 2 / ln_b);
                    ds = newton * householder_factor(newton, halley, hh3);
                }
                s += ds = std::max(-0.5 * s, ds);
            }
            return implied_volatility_output(iterations, s);
        } else {
            const double v_l = normalised_vega(x, s_l), r_lm = convex_rational_cubic_control_parameter_to_fit_second_derivative_at_right_side(b_l, b_c, s_l, s_c, 1 / v_l, 1 / v_c, 0.0, false);
            s = rational_cubic_interpolation(beta, b_l, b_c, s_l, s_c, 1 / v_l, 1 / v_c, r_lm);
            s_left = s_l;
            s_right = s_c;
        }
    } else {
        const double s_h = v_c > DBL_MIN ? s_c + (b_max - b_c) / v_c : s_c, b_h = normalised_black_call(x, s_h);
        if (beta <= b_h) {
            const double v_h = normalised_vega(x, s_h), r_hm = convex_rational_cubic_control_parameter_to_fit_second_derivative_at_left_side(b_c, b_h, s_c, s_h, 1 / v_c, 1 / v_h, 0.0, false);
            s = rational_cubic_interpolation(beta, b_c, b_h, s_c, s_h, 1 / v_c, 1 / v_h, r_hm);
            s_left = s_c;
            s_right = s_h;
        } else {
            double f_upper_map_h, d_f_upper_map_h_d_beta, d2_f_upper_map_h_d_beta2;
            compute_f_upper_map_and_first_two_derivatives(x, s_h, f_upper_map_h, d_f_upper_map_h_d_beta, d2_f_upper_map_h_d_beta2);
            if (d2_f_upper_map_h_d_beta2 > -SQRT_DBL_MAX && d2_f_upper_map_h_d_beta2 < SQRT_DBL_MAX) {
                const double r_hh = convex_rational_cubic_control_parameter_to_fit_second_derivative_at_left_side(b_h, b_max, f_upper_map_h, 0., d_f_upper_map_h_d_beta, -0.5, d2_f_upper_map_h_d_beta2, true);
                f = rational_cubic_interpolation(beta, b_h, b_max, f_upper_map_h, 0., d_f_upper_map_h_d_beta, -0.5, r_hh);
            }
            if (f <= 0) {
                const double h = b_max - b_h, t = (beta - b_h) / h;
                f = (f_upper_map_h * (1 - t) + 0.5 * h * t) * (1 - t); // We switch to quadratic interpolation using f(b_h), f(b_max)≡0, and f'(b_max)≡-1/2 to specify the quadratic.
            }
            s = inverse_f_upper_map(f);
            s_left = s_h;
            if (beta > 0.5 * b_max) { // Else we better drop through and let the objective function be g(s) = b(x,s)-beta.
                //
                // In this branch, which comprises the upper segment, the objective function is
                //     g(s) = ln(b_max-beta)-ln(b_max-b(x,s))
                //          ≡ ln((b_max-beta)/(b_max-b(s)))
                // This makes
                //              g'               =   b'/(b_max-b)
                //              newton = -g/g'   =   ln((b_max-b)/(b_max-beta))·(b_max-b)/b'
                //              halley = g''/g'  =   b''/b'  +  b'/(b_max-b)
                //              hh3    = g'''/g' =   b'''/b' +  g'·(2g'+3b''/b')
                // and the iteration is
                //     s_n+1  =  s_n  +  newton · [ 1 + halley·newton/2 ] / [ 1 + newton·( halley + hh3·newton/6 ) ].
                //
                for (; iterations < N && fabs(ds) > DBL_EPSILON * s; ++iterations) {
                    if (ds * ds_previous < 0)
                        ++direction_reversal_count;
                    if (iterations > 0 && (3 == direction_reversal_count || !(s > s_left && s < s_right))) {
                        // If looping inefficently, or the forecast step takes us outside the bracket, or onto its edges, switch to binary nesting.
                        // NOTE that this can only really happen for very extreme values of |x|, such as |x| = |ln(F/K)| > 500.
                        s = 0.5 * (s_left + s_right);
                        if (s_right - s_left <= DBL_EPSILON * s)
                            break;
                        direction_reversal_count = 0;
                        ds = 0;
                    }
                    ds_previous = ds;
                    const double b = normalised_black_call(x, s), bp = normalised_vega(x, s);
                    if (b > beta && s < s_right)
                        s_right = s;
                    else if (b < beta && s > s_left)
                        s_left = s;                  // Tighten the bracket if applicable.
                    if (b >= b_max || bp <= DBL_MIN) // Numerical underflow. Switch to binary nesting for this iteration.
                        ds = 0.5 * (s_left + s_right) - s;
                    else {
                        const double b_max_minus_b = b_max - b, g = log((b_max - beta) / b_max_minus_b), gp = bp / b_max_minus_b;
                        const double b_halley = square(x / s) / s - s / 4, b_hh3 = b_halley * b_halley - 3 * square(x / (s * s)) - 0.25;
                        const double newton = -g / gp, halley = b_halley + gp, hh3 = b_hh3 + gp * (2 * gp + 3 * b_halley);
                        ds = newton * householder_factor(newton, halley, hh3);
                    }
                    s += ds = std::max(-0.5 * s, ds);
                }
                return implied_volatility_output(iterations, s);
            }
        }
    }
    // In this branch, which comprises the two middle segments, the objective function is g(s) = b(x,s)-beta, or g(s) = b(s) - beta, for short.
    // This makes
    //              newton = -g/g'   =  -(b-beta)/b'
    //              halley = g''/g'  =    b''/b'    =  x²/s³-s/4
    //              hh3    = g'''/g' =    b'''/b'   =  halley² - 3·(x/s²)² - 1/4
    // and the iteration is
    //     s_n+1  =  s_n  +  newton · [ 1 + halley·newton/2 ] / [ 1 + newton·( halley + hh3·newton/6 ) ].
    //
    for (; iterations < N && fabs(ds) > DBL_EPSILON * s; ++iterations) {
        if (ds * ds_previous < 0)
            ++direction_reversal_count;
        if (iterations > 0 && (3 == direction_reversal_count || !(s > s_left && s < s_right))) {
            // If looping inefficently, or the forecast step takes us outside the bracket, or onto its edges, switch to binary nesting.
            // NOTE that this can only really happen for very extreme values of |x|, such as |x| = |ln(F/K)| > 500.
            s = 0.5 * (s_left + s_right);
            if (s_right - s_left <= DBL_EPSILON * s)
                break;
            direction_reversal_count = 0;
            ds = 0;
        }
        ds_previous = ds;
        const double b = normalised_black_call(x, s), bp = normalised_vega(x, s);
        if (b > beta && s < s_right)
            s_right = s;
        else if (b < beta && s > s_left)
            s_left = s; // Tighten the bracket if applicable.
        const double newton = (beta - b) / bp, halley = square(x / s) / s - s / 4, hh3 = halley * halley - 3 * square(x / (s * s)) - 0.25;
        s += ds = std::max(-0.5 * s, newton * householder_factor(newton, halley, hh3));
    }
    return implied_volatility_output(iterations, s);
}

EXPORT_EXTERN_C double implied_volatility_from_a_transformed_rational_guess_with_limited_iterations(double price, double F, double K, double T, double q /* q=±1 */, int N) {
    const double intrinsic = fabs(std::max((q < 0 ? K - F : F - K), 0.0));
    if (price < intrinsic)
        return implied_volatility_output(0, VOLATILITY_VALUE_TO_SIGNAL_PRICE_IS_BELOW_INTRINSIC);
    const double max_price = (q < 0 ? K : F);
    if (price >= max_price)
        return implied_volatility_output(0, VOLATILITY_VALUE_TO_SIGNAL_PRICE_IS_ABOVE_MAXIMUM);
    const double x = log(F / K);
    // Map in-the-money to out-of-the-money
    if (q * x > 0) {
        price = fabs(std::max(price - intrinsic, 0.0));
        q = -q;
    }
    return unchecked_normalised_implied_volatility_from_a_transformed_rational_guess_with_limited_iterations(price / (sqrt(F) * sqrt(K)), x, q, N) / sqrt(T);
}

EXPORT_EXTERN_C double implied_volatility_from_a_transformed_rational_guess(double price, double F, double K, double T, double q /* q=±1 */) {
    return implied_volatility_from_a_transformed_rational_guess_with_limited_iterations(price, F, K, T, q, implied_volatility_maximum_iterations.data);
}

EXPORT_EXTERN_C double normalised_implied_volatility_from_a_transformed_rational_guess_with_limited_iterations(double beta, double x, double q /* q=±1 */, int N) {
    // Map in-the-money to out-of-the-money
    if (q * x > 0) {
        beta -= normalised_intrinsic(x, q);
        q = -q;
    }
    if (beta < 0)
        return implied_volatility_output(0, VOLATILITY_VALUE_TO_SIGNAL_PRICE_IS_BELOW_INTRINSIC);
    return unchecked_normalised_implied_volatility_from_a_transformed_rational_guess_with_limited_iterations(beta, x, q, N);
}

EXPORT_EXTERN_C double normalised_implied_volatility_from_a_transformed_rational_guess(double beta, double x, double q /* q=±1 */) {
    return normalised_implied_volatility_from_a_transformed_rational_guess_with_limited_iterations(beta, x, q, implied_volatility_maximum_iterations.data);
}