blackscholes 0.24.0

Black-Scholes option pricing model calculator
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
//
// This source code resides at www.jaeckel.org/LetsBeRational.7z .
//
// ======================================================================================
// Copyright © 2013-2017 Peter Jäckel.
//
// Permission to use, copy, modify, and distribute this software is freely granted,
// provided that this notice is preserved.
//
// WARRANTY DISCLAIMER
// The Software is provided "as is" without warranty of any kind, either express or implied,
// including without limitation any implied warranties of condition, uninterrupted use,
// merchantability, fitness for a particular purpose, or non-infringement.
// ======================================================================================
//

#include "lets_be_rational.h"

// To cross-compile on a command line, you could just use something like
//
//   i686-w64-mingw32-g++ -w -fpermissive -shared -DNDEBUG -O3 erf_cody.cpp rationalcubic.cpp normaldistribution.cpp lets_be_rational.cpp xlcall.cpp excel_registration.cpp xlcall32.lib -o lets_be_rational.xll -static-libstdc++ -static-libgcc -s
//
// To compile into a shared library on non-Windows systems, you could try
//
//   g++ -fPIC -shared -DNDEBUG -Ofast erf_cody.cpp rationalcubic.cpp normaldistribution.cpp lets_be_rational.cpp -o lets_be_rational.so
//

#if defined(_MSC_VER)
#define NOMINMAX // to suppress MSVC's definitions of min() and max()
// These four pragmas are the equivalent to /fp:fast.
#pragma float_control(except, off)
#pragma float_control(precise, off)
#pragma fp_contract(on)
#pragma fenv_access(off)
#endif

#include "normaldistribution.h"
#include "rationalcubic.h"
#include <algorithm>
#include <cmath>
#include <float.h>
#if defined(_WIN32) || defined(_WIN64)
#include <windows.h>
#endif

#define TWO_PI 6.283185307179586476925286766559005768394338798750
#define SQRT_PI_OVER_TWO 1.253314137315500251207882642405522626503493370305 // sqrt(pi/2) to avoid misinterpretation.
#define SQRT_THREE 1.732050807568877293527446341505872366942805253810
#define SQRT_ONE_OVER_THREE 0.577350269189625764509148780501957455647601751270
#define TWO_PI_OVER_SQRT_TWENTY_SEVEN 1.209199576156145233729385505094770488189377498728 // 2*pi/sqrt(27)
#define PI_OVER_SIX 0.523598775598298873077107230546583814032861566563

namespace {
static const double SQRT_DBL_EPSILON = sqrt(DBL_EPSILON);
static const double FOURTH_ROOT_DBL_EPSILON = sqrt(SQRT_DBL_EPSILON);
static const double EIGHTH_ROOT_DBL_EPSILON = sqrt(FOURTH_ROOT_DBL_EPSILON);
static const double SIXTEENTH_ROOT_DBL_EPSILON = sqrt(EIGHTH_ROOT_DBL_EPSILON);
static const double SQRT_DBL_MIN = sqrt(DBL_MIN);
static const double SQRT_DBL_MAX = sqrt(DBL_MAX);

// Set this to 0 if you want positive results for (positive) denormalised inputs, else to DBL_MIN.
// Note that you cannot achieve full machine accuracy from denormalised inputs!
static const double DENORMALISATION_CUTOFF = 0;

static const double VOLATILITY_VALUE_TO_SIGNAL_PRICE_IS_BELOW_INTRINSIC = -DBL_MAX;
static const double VOLATILITY_VALUE_TO_SIGNAL_PRICE_IS_ABOVE_MAXIMUM = DBL_MAX;

inline bool is_below_horizon(double x) {
    return fabs(x) < DENORMALISATION_CUTOFF;
} // This weeds out denormalised (a.k.a. 'subnormal') numbers.

// See https://www.kernel.org/doc/Documentation/atomic_ops.txt for further details on this simplistic implementation of an atomic flag that is *not* volatile.
typedef struct {
#if defined(_MSC_VER) || defined(_WIN32) || defined(_WIN64)
    long data;
#else
    int data;
#endif
} atomic_t;

static atomic_t implied_volatility_maximum_iterations = {2}; // (DBL_DIG*20)/3 ≈ 100 . Only needed when the iteration effectively alternates Householder/Halley/Newton steps and binary nesting due to roundoff truncation.

#ifdef ENABLE_SWITCHING_THE_OUTPUT_TO_ITERATION_COUNT
static atomic_t implied_volatility_output_type = {0};
inline double implied_volatility_output(int count, double volatility) {
    return implied_volatility_output_type.data > 0 ? count : volatility;
}
#else
inline double implied_volatility_output(int count, double volatility) {
    return volatility;
}
#endif

#ifdef ENABLE_CHANGING_THE_HOUSEHOLDER_METHOD_ORDER
static atomic_t implied_volatility_householder_method_order = {4};
inline double householder_factor(double newton, double halley, double hh3) {
    return implied_volatility_householder_method_order.data > 3 ? (1 + 0.5 * halley * newton) / (1 + newton * (halley + hh3 * newton / 6)) : (implied_volatility_householder_method_order.data > 2 ? 1 / (1 + 0.5 * halley * newton) : 1);
}
#else
inline double householder_factor(double newton, double halley, double hh3) {
    return (1 + 0.5 * halley * newton) / (1 + newton * (halley + hh3 * newton / 6));
}
#endif

} // namespace

EXPORT_EXTERN_C double set_implied_volatility_maximum_iterations(double t) {
    int i = (int)t;
    if (i >= 0) {
#if defined(_MSC_VER) || defined(_WIN32) || defined(_WIN64)
        InterlockedExchange(&(implied_volatility_maximum_iterations.data), i);
#elif defined(__x86__) || defined(__x86_64__) || defined(__arm64__)
        implied_volatility_maximum_iterations.data = i;
#else
#error Atomic operations not implemented for this platform.
#endif
    }
    return implied_volatility_maximum_iterations.data;
}

#ifdef ENABLE_SWITCHING_THE_OUTPUT_TO_ITERATION_COUNT
EXPORT_EXTERN_C double set_implied_volatility_output_type(double t) {
    int i = (int)t;
#if defined(_MSC_VER) || defined(_WIN32) || defined(_WIN64)
    InterlockedExchange(&(implied_volatility_output_type.data), i);
#elif defined(__x86__) || defined(__x86_64__) || defined(__arm64__)
    implied_volatility_output_type.data = i;
#else
#error Atomic operations not implemented for this platform.
#endif
    return implied_volatility_output_type.data;
}
#endif

#ifdef ENABLE_CHANGING_THE_HOUSEHOLDER_METHOD_ORDER
EXPORT_EXTERN_C double set_implied_volatility_householder_method_order(double t) {
    int i = (int)t;
    if (i >= 0) {
#if defined(_MSC_VER) || defined(_WIN32) || defined(_WIN64)
        InterlockedExchange(&(implied_volatility_householder_method_order.data), i);
#elif defined(__x86__) || defined(__x86_64__) || defined(__arm64__)
        implied_volatility_householder_method_order.data = i;
#else
#error Atomic operations not implemented for this platform.
#endif
    }
    return implied_volatility_householder_method_order.data;
}
#endif

double normalised_intrinsic(double x, double q /* q=±1 */) {
    if (q * x <= 0)
        return 0;
    const double x2 = x * x;
    if (x2 < 98 * FOURTH_ROOT_DBL_EPSILON) // The factor 98 is computed from last coefficient: √√92897280 = 98.1749
        return fabs(std::max((q < 0 ? -1 : 1) * x * (1 + x2 * ((1.0 / 24.0) + x2 * ((1.0 / 1920.0) + x2 * ((1.0 / 322560.0) + (1.0 / 92897280.0) * x2)))), 0.0));
    const double b_max = exp(0.5 * x), one_over_b_max = 1 / b_max;
    return fabs(std::max((q < 0 ? -1 : 1) * (b_max - one_over_b_max), 0.));
}

double normalised_intrinsic_call(double x) {
    return normalised_intrinsic(x, 1);
}

// Asymptotic expansion of
//
//              b  =  Φ(h+t)·exp(x/2) - Φ(h-t)·exp(-x/2)
// with
//              h  =  x/s   and   t  =  s/2
// which makes
//              b  =  Φ(h+t)·exp(h·t) - Φ(h-t)·exp(-h·t)
//
//                    exp(-(h²+t²)/2)
//                 =  ---------------  ·  [ Y(h+t) - Y(h-t) ]
//                        √(2π)
// with
//           Y(z) := Φ(z)/φ(z)
//
// for large negative (t-|h|) by the aid of Abramowitz & Stegun (26.2.12) where Φ(z) = φ(z)/|z|·[1-1/z^2+...].
// We define
//                     r
//         A(h,t) :=  --- · [ Y(h+t) - Y(h-t) ]
//                     t
//
// with r := (h+t)·(h-t) and give an expansion for A(h,t) in q:=(h/r)² expressed in terms of e:=(t/h)² .
double asymptotic_expansion_of_normalised_black_call(double h, double t) {
    const double e = (t / h) * (t / h), r = ((h + t) * (h - t)), q = (h / r) * (h / r);
    // 17th order asymptotic expansion of A(h,t) in q, sufficient for Φ(h) [and thus y(h)] to have relative accuracy of 1.64E-16 for h <= η  with  η:=-10.
    const double asymptotic_expansion_sum = (2.0 + q * (-6.0E0 - 2.0 * e + 3.0 * q * (1.0E1 + e * (2.0E1 + 2.0 * e) + 5.0 * q * (-1.4E1 + e * (-7.0E1 + e * (-4.2E1 - 2.0 * e)) + 7.0 * q * (1.8E1 + e * (1.68E2 + e * (2.52E2 + e * (7.2E1 + 2.0 * e))) + 9.0 * q * (-2.2E1 + e * (-3.3E2 + e * (-9.24E2 + e * (-6.6E2 + e * (-1.1E2 - 2.0 * e)))) + 1.1E1 * q * (2.6E1 + e * (5.72E2 + e * (2.574E3 + e * (3.432E3 + e * (1.43E3 + e * (1.56E2 + 2.0 * e))))) + 1.3E1 * q * (-3.0E1 + e * (-9.1E2 + e * (-6.006E3 + e * (-1.287E4 + e * (-1.001E4 + e * (-2.73E3 + e * (-2.1E2 - 2.0 * e)))))) + 1.5E1 * q * (3.4E1 + e * (1.36E3 + e * (1.2376E4 + e * (3.8896E4 + e * (4.862E4 + e * (2.4752E4 + e * (4.76E3 + e * (2.72E2 + 2.0 * e))))))) + 1.7E1 * q * (-3.8E1 + e * (-1.938E3 + e * (-2.3256E4 + e * (-1.00776E5 + e * (-1.84756E5 + e * (-1.51164E5 + e * (-5.4264E4 + e * (-7.752E3 + e * (-3.42E2 - 2.0 * e)))))))) + 1.9E1 * q * (4.2E1 + e * (2.66E3 + e * (4.0698E4 + e * (2.3256E5 + e * (5.8786E5 + e * (7.05432E5 + e * (4.0698E5 + e * (1.08528E5 + e * (1.197E4 + e * (4.2E2 + 2.0 * e))))))))) + 2.1E1 * q * (-4.6E1 + e * (-3.542E3 + e * (-6.7298E4 + e * (-4.90314E5 + e * (-1.63438E6 + e * (-2.704156E6 + e * (-2.288132E6 + e * (-9.80628E5 + e * (-2.01894E5 + e * (-1.771E4 + e * (-5.06E2 - 2.0 * e)))))))))) + 2.3E1 * q * (5.0E1 + e * (4.6E3 + e * (1.0626E5 + e * (9.614E5 + e * (4.08595E6 + e * (8.9148E6 + e * (1.04006E7 + e * (6.53752E6 + e * (2.16315E6 + e * (3.542E5 + e * (2.53E4 + e * (6.0E2 + 2.0 * e))))))))))) + 2.5E1 * q * (-5.4E1 + e * (-5.85E3 + e * (-1.6146E5 + e * (-1.77606E6 + e * (-9.37365E6 + e * (-2.607579E7 + e * (-4.01166E7 + e * (-3.476772E7 + e * (-1.687257E7 + e * (-4.44015E6 + e * (-5.9202E5 + e * (-3.51E4 + e * (-7.02E2 - 2.0 * e)))))))))))) + 2.7E1 * q * (5.8E1 + e * (7.308E3 + e * (2.3751E5 + e * (3.12156E6 + e * (2.003001E7 + e * (6.919458E7 + e * (1.3572783E8 + e * (1.5511752E8 + e * (1.0379187E8 + e * (4.006002E7 + e * (8.58429E6 + e * (9.5004E5 + e * (4.7502E4 + e * (8.12E2 + 2.0 * e))))))))))))) + 2.9E1 * q * (-6.2E1 + e * (-8.99E3 + e * (-3.39822E5 + e * (-5.25915E6 + e * (-4.032015E7 + e * (-1.6934463E8 + e * (-4.1250615E8 + e * (-6.0108039E8 + e * (-5.3036505E8 + e * (-2.8224105E8 + e * (-8.870433E7 + e * (-1.577745E7 + e * (-1.472562E6 + e * (-6.293E4 + e * (-9.3E2 - 2.0 * e)))))))))))))) + 3.1E1 * q * (6.6E1 + e * (1.0912E4 + e * (4.74672E5 + e * (8.544096E6 + e * (7.71342E7 + e * (3.8707344E8 + e * (1.14633288E9 + e * (2.07431664E9 + e * (2.33360622E9 + e * (1.6376184E9 + e * (7.0963464E8 + e * (1.8512208E8 + e * (2.7768312E7 + e * (2.215136E6 + e * (8.184E4 + e * (1.056E3 + 2.0 * e))))))))))))))) + 3.3E1 * (-7.0E1 + e * (-1.309E4 + e * (-6.49264E5 + e * (-1.344904E7 + e * (-1.4121492E8 + e * (-8.344518E8 + e * (-2.9526756E9 + e * (-6.49588632E9 + e * (-9.0751353E9 + e * (-8.1198579E9 + e * (-4.6399188E9 + e * (-1.6689036E9 + e * (-3.67158792E8 + e * (-4.707164E7 + e * (-3.24632E6 + e * (-1.0472E5 + e * (-1.19E3 - 2.0 * e))))))))))))))))) * q)))))))))))))))));
    const double b = ONE_OVER_SQRT_TWO_PI * exp((-0.5 * (h * h + t * t))) * (t / r) * asymptotic_expansion_sum;
    return fabs(std::max(b, 0.));
}

namespace { /* η */
static const double asymptotic_expansion_accuracy_threshold = -10;
}

double normalised_black_call_using_erfcx(double h, double t) {
    // Given h = x/s and t = s/2, the normalised Black function can be written as
    //
    //     b(x,s)  =  Φ(x/s+s/2)·exp(x/2)  -   Φ(x/s-s/2)·exp(-x/2)
    //             =  Φ(h+t)·exp(h·t)      -   Φ(h-t)·exp(-h·t) .                     (*)
    //
    // It is mentioned in section 4 (and discussion of figures 2 and 3) of George Marsaglia's article "Evaluating the
    // Normal Distribution" (available at http://www.jstatsoft.org/v11/a05/paper) that the error of any cumulative normal
    // function Φ(z) is dominated by the hardware (or compiler implementation) accuracy of exp(-z²/2) which is not
    // reliably more than 14 digits when z is large. The accuracy of Φ(z) typically starts coming down to 14 digits when
    // z is around -8. For the (normalised) Black function, as above in (*), this means that we are subtracting two terms
    // that are each products of terms with about 14 digits of accuracy. The net result, in each of the products, is even
    // less accuracy, and then we are taking the difference of these terms, resulting in even less accuracy. When we are
    // using the asymptotic expansion asymptotic_expansion_of_normalised_black_call() invoked in the second branch at the
    // beginning of this function, we are using only *one* exponential instead of 4, and this improves accuracy. It
    // actually improves it a bit more than you would expect from the above logic, namely, almost the full two missing
    // digits (in 64 bit IEEE floating point).  Unfortunately, going higher order in the asymptotic expansion will not
    // enable us to gain more accuracy (by extending the range in which we could use the expansion) since the asymptotic
    // expansion, being a divergent series, can never gain 16 digits of accuracy for z=-8 or just below. The best you can
    // get is about 15 digits (just), for about 35 terms in the series (26.2.12), which would result in an prohibitively
    // long expression in function asymptotic expansion asymptotic_expansion_of_normalised_black_call(). In this last branch,
    // here, we therefore take a different tack as follows.
    //     The "scaled complementary error function" is defined as erfcx(z) = exp(z²)·erfc(z). Cody's implementation of this
    // function as published in "Rational Chebyshev approximations for the error function", W. J. Cody, Math. Comp., 1969, pp.
    // 631-638, uses rational functions that theoretically approximates erfcx(x) to at least 18 significant decimal digits,
    // *without* the use of the exponential function when x>4, which translates to about z<-5.66 in Φ(z). To make use of it,
    // we write
    //             Φ(z) = exp(-z²/2)·erfcx(-z/√2)/2
    //
    // to transform the normalised black function to
    //
    //   b   =  ½ · exp(-½(h²+t²)) · [ erfcx(-(h+t)/√2) -  erfcx(-(h-t)/√2) ]
    //
    // which now involves only one exponential, instead of three, when |h|+|t| > 5.66 , and the difference inside the
    // square bracket is between the evaluation of two rational functions, which, typically, according to Marsaglia,
    // retains the full 16 digits of accuracy (or just a little less than that).
    //
    const double b = 0.5 * exp(-0.5 * (h * h + t * t)) * (erfcx_cody(-ONE_OVER_SQRT_TWO * (h + t)) - erfcx_cody(-ONE_OVER_SQRT_TWO * (h - t)));
    return fabs(std::max(b, 0.0));
}

// Calculation of
//
//              b  =  Φ(h+t)·exp(h·t) - Φ(h-t)·exp(-h·t)
//
//                    exp(-(h²+t²)/2)
//                 =  --------------- ·  [ Y(h+t) - Y(h-t) ]
//                        √(2π)
// with
//           Y(z) := Φ(z)/φ(z)
//
// using an expansion of Y(h+t)-Y(h-t) for small t to twelvth order in t.
// Theoretically accurate to (better than) precision  ε = 2.23E-16  when  h<=0  and  t < τ  with  τ := 2·ε^(1/16) ≈ 0.21.
// The main bottleneck for precision is the coefficient a:=1+h·Y(h) when |h|>1 .
double small_t_expansion_of_normalised_black_call(double h, double t) {
    // Y(h) := Φ(h)/φ(h) = √(π/2)·erfcx(-h/√2)
    // a := 1+h·Y(h)  --- Note that due to h<0, and h·Y(h) -> -1 (from above) as h -> -∞, we also have that a>0 and a -> 0 as h -> -∞
    // w := t² , h2 := h²
    const double a = 1 + h * (0.5 * SQRT_TWO_PI) * erfcx_cody(-ONE_OVER_SQRT_TWO * h), w = t * t, h2 = h * h;
    const double expansion = 2 * t * (a + w * ((-1 + 3 * a + a * h2) / 6 + w * ((-7 + 15 * a + h2 * (-1 + 10 * a + a * h2)) / 120 + w * ((-57 + 105 * a + h2 * (-18 + 105 * a + h2 * (-1 + 21 * a + a * h2))) / 5040 + w * ((-561 + 945 * a + h2 * (-285 + 1260 * a + h2 * (-33 + 378 * a + h2 * (-1 + 36 * a + a * h2)))) / 362880 + w * ((-6555 + 10395 * a + h2 * (-4680 + 17325 * a + h2 * (-840 + 6930 * a + h2 * (-52 + 990 * a + h2 * (-1 + 55 * a + a * h2))))) / 39916800 + ((-89055 + 135135 * a + h2 * (-82845 + 270270 * a + h2 * (-20370 + 135135 * a + h2 * (-1926 + 25740 * a + h2 * (-75 + 2145 * a + h2 * (-1 + 78 * a + a * h2)))))) * w) / 6227020800.0))))));
    const double b = ONE_OVER_SQRT_TWO_PI * exp((-0.5 * (h * h + t * t))) * expansion;
    return fabs(std::max(b, 0.0));
}

namespace { /* τ */
static const double small_t_expansion_of_normalised_black_threshold = 2 * SIXTEENTH_ROOT_DBL_EPSILON;
}

//     b(x,s)  =  Φ(x/s+s/2)·exp(x/2)  -   Φ(x/s-s/2)·exp(-x/2)
//             =  Φ(h+t)·exp(x/2)      -   Φ(h-t)·exp(-x/2)
// with
//              h  =  x/s   and   t  =  s/2
double normalised_black_call_using_norm_cdf(double x, double s) {
    const double h = x / s, t = 0.5 * s, b_max = exp(0.5 * x), b = norm_cdf(h + t) * b_max - norm_cdf(h - t) / b_max;
    return fabs(std::max(b, 0.0));
}

//
// Introduced on 2017-02-18
//
//     b(x,s)  =  Φ(x/s+s/2)·exp(x/2)  -   Φ(x/s-s/2)·exp(-x/2)
//             =  Φ(h+t)·exp(x/2)      -   Φ(h-t)·exp(-x/2)
//             =  ½ · exp(-u²-v²) · [ erfcx(u-v) -  erfcx(u+v) ]
//             =  ½ · [ exp(x/2)·erfc(u-v)     -  exp(-x/2)·erfc(u+v)    ]
//             =  ½ · [ exp(x/2)·erfc(u-v)     -  exp(-u²-v²)·erfcx(u+v) ]
//             =  ½ · [ exp(-u²-v²)·erfcx(u-v) -  exp(-x/2)·erfc(u+v)    ]
// with
//              h  =  x/s ,       t  =  s/2 ,
// and
//              u  = -h/√2  and   v  =  t/√2 .
//
// Cody's erfc() and erfcx() functions each, for some values of their argument, involve the evaluation
// of the exponential function exp(). The normalised Black function requires additional evaluation(s)
// of the exponential function irrespective of which of the above formulations is used. However, the total
// number of exponential function evaluations can be minimised by a judicious choice of one of the above
// formulations depending on the input values and the branch logic in Cody's erfc() and erfcx().
//
double normalised_black_call_with_optimal_use_of_codys_functions(double x, double s) {
    const double codys_threshold = 0.46875, h = x / s, t = 0.5 * s, q1 = -ONE_OVER_SQRT_TWO * (h + t), q2 = -ONE_OVER_SQRT_TWO * (h - t);
    double two_b;
    if (q1 < codys_threshold)
        if (q2 < codys_threshold)
            two_b = exp(0.5 * x) * erfc_cody(q1) - exp(-0.5 * x) * erfc_cody(q2);
        else
            two_b = exp(0.5 * x) * erfc_cody(q1) - exp(-0.5 * (h * h + t * t)) * erfcx_cody(q2);
    else if (q2 < codys_threshold)
        two_b = exp(-0.5 * (h * h + t * t)) * erfcx_cody(q1) - exp(-0.5 * x) * erfc_cody(q2);
    else
        two_b = exp(-0.5 * (h * h + t * t)) * (erfcx_cody(q1) - erfcx_cody(q2));
    return fabs(std::max(0.5 * two_b, 0.0));
}

EXPORT_EXTERN_C double normalised_black_call(double x, double s) {
    if (x > 0)
        return normalised_intrinsic_call(x) + normalised_black_call(-x, s); // In the money.
    if (s <= fabs(x) * DENORMALISATION_CUTOFF)
        return normalised_intrinsic_call(x); // sigma=0 -> intrinsic value.
    // Denote h := x/s and t := s/2.
    // We evaluate the condition |h|>|η|, i.e., h<η  &&  t < τ+|h|-|η|  avoiding any divisions by s , where η = asymptotic_expansion_accuracy_threshold  and τ = small_t_expansion_of_normalised_black_threshold .
    if (x < s * asymptotic_expansion_accuracy_threshold && 0.5 * s * s + x < s * (small_t_expansion_of_normalised_black_threshold + asymptotic_expansion_accuracy_threshold))
        return asymptotic_expansion_of_normalised_black_call(x / s, 0.5 * s);
    if (0.5 * s < small_t_expansion_of_normalised_black_threshold)
        return small_t_expansion_of_normalised_black_call(x / s, 0.5 * s);
#ifdef DO_NOT_OPTIMISE_NORMALISED_BLACK_IN_REGIONS_3_AND_4_FOR_CODYS_FUNCTIONS
    // When b is more than, say, about 85% of b_max=exp(x/2), then b is dominated by the first of the two terms in the Black formula, and we retain more accuracy by not attempting to combine the two terms in any way.
    // We evaluate the condition h+t>0.85  avoiding any divisions by s.
    if (x + 0.5 * s * s > s * 0.85)
        return normalised_black_call_using_norm_cdf(x, s);
    return normalised_black_call_using_erfcx(x / s, 0.5 * s);
#else
    return normalised_black_call_with_optimal_use_of_codys_functions(x, s);
#endif
}

inline double square(double x) {
    return x * x;
}

EXPORT_EXTERN_C double normalised_vega(double x, double s) {
    const double ax = fabs(x);
    return (ax <= 0) ? ONE_OVER_SQRT_TWO_PI * exp(-0.125 * s * s) : ((s <= 0 || s <= ax * SQRT_DBL_MIN) ? 0 : ONE_OVER_SQRT_TWO_PI * exp(-0.5 * (square(x / s) + square(0.5 * s))));
}

EXPORT_EXTERN_C double normalised_black(double x, double s, double q /* q=±1 */) {
    return normalised_black_call(q < 0 ? -x : x, s); /* Reciprocal-strike call-put equivalence */
}

EXPORT_EXTERN_C double black(double F, double K, double sigma, double T, double q /* q=±1 */) {
    const double intrinsic = fabs(std::max((q < 0 ? K - F : F - K), 0.0));
    // Map in-the-money to out-of-the-money
    if (q * (F - K) > 0)
        return intrinsic + black(F, K, sigma, T, -q);
    return std::max(intrinsic, (sqrt(F) * sqrt(K)) * normalised_black(log(F / K), sigma * sqrt(T), q));
}

#ifdef COMPUTE_LOWER_MAP_DERIVATIVES_INDIVIDUALLY
double f_lower_map(const double x, const double s) {
    if (is_below_horizon(x))
        return 0;
    if (is_below_horizon(s))
        return 0;
    const double z = SQRT_ONE_OVER_THREE * fabs(x) / s, Phi = norm_cdf(-z);
    return TWO_PI_OVER_SQRT_TWENTY_SEVEN * fabs(x) * (Phi * Phi * Phi);
}
double d_f_lower_map_d_beta(const double x, const double s) {
    if (is_below_horizon(s))
        return 1;
    const double z = SQRT_ONE_OVER_THREE * fabs(x) / s, y = z * z, Phi = norm_cdf(-z);
    return TWO_PI * y * (Phi * Phi) * exp(y + 0.125 * s * s);
}
double d2_f_lower_map_d_beta2(const double x, const double s) {
    const double ax = fabs(x), z = SQRT_ONE_OVER_THREE * ax / s, y = z * z, s2 = s * s, Phi = norm_cdf(-z), phi = norm_pdf(z);
    return PI_OVER_SIX * y / (s2 * s) * Phi * (8 * SQRT_THREE * s * ax + (3 * s2 * (s2 - 8) - 8 * x * x) * Phi / phi) * exp(2 * y + 0.25 * s2);
}
void compute_f_lower_map_and_first_two_derivatives(const double x, const double s, double& f, double& fp, double& fpp) {
    f = f_lower_map(x, s);
    fp = d_f_lower_map_d_beta(x, s);
    fpp = d2_f_lower_map_d_beta2(x, s);
}
#else
void compute_f_lower_map_and_first_two_derivatives(const double x, const double s, double& f, double& fp, double& fpp) {
    const double ax = fabs(x), z = SQRT_ONE_OVER_THREE * ax / s, y = z * z, s2 = s * s, Phi = norm_cdf(-z), phi = norm_pdf(z);
    fpp = PI_OVER_SIX * y / (s2 * s) * Phi * (8 * SQRT_THREE * s * ax + (3 * s2 * (s2 - 8) - 8 * x * x) * Phi / phi) * exp(2 * y + 0.25 * s2);
    if (is_below_horizon(s)) {
        fp = 1;
        f = 0;
    } else {
        const double Phi2 = Phi * Phi;
        fp = TWO_PI * y * Phi2 * exp(y + 0.125 * s * s);
        if (is_below_horizon(x))
            f = 0;
        else
            f = TWO_PI_OVER_SQRT_TWENTY_SEVEN * ax * (Phi2 * Phi);
    }
}
#endif

double inverse_f_lower_map(const double x, const double f) {
    return is_below_horizon(f) ? 0 : fabs(x / (SQRT_THREE * inverse_norm_cdf(std::pow(f / (TWO_PI_OVER_SQRT_TWENTY_SEVEN * fabs(x)), 1. / 3.))));
}

#ifdef COMPUTE_UPPER_MAP_DERIVATIVES_INDIVIDUALLY
double f_upper_map(const double s) {
    return norm_cdf(-0.5 * s);
}
double d_f_upper_map_d_beta(const double x, const double s) {
    return is_below_horizon(x) ? -0.5 : -0.5 * exp(0.5 * square(x / s));
}
double d2_f_upper_map_d_beta2(const double x, const double s) {
    if (is_below_horizon(x))
        return 0;
    const double w = square(x / s);
    return SQRT_PI_OVER_TWO * exp(w + 0.125 * s * s) * w / s;
}
void compute_f_upper_map_and_first_two_derivatives(const double x, const double s, double& f, double& fp, double& fpp) {
    f = f_upper_map(s);
    fp = d_f_upper_map_d_beta(x, s);
    fpp = d2_f_upper_map_d_beta2(x, s);
}
#else
void compute_f_upper_map_and_first_two_derivatives(const double x, const double s, double& f, double& fp, double& fpp) {
    f = norm_cdf(-0.5 * s);
    if (is_below_horizon(x)) {
        fp = -0.5;
        fpp = 0;
    } else {
        const double w = square(x / s);
        fp = -0.5 * exp(0.5 * w);
        fpp = SQRT_PI_OVER_TWO * exp(w + 0.125 * s * s) * w / s;
    }
}
#endif

double inverse_f_upper_map(double f) {
    return -2. * inverse_norm_cdf(f);
}

// See http://en.wikipedia.org/wiki/Householder%27s_method for a detailed explanation of the third order Householder iteration.
//
// Given the objective function g(s) whose root x such that 0 = g(s) we seek, iterate
//
//     s_n+1  =  s_n  -  (g/g') · [ 1 - (g''/g')·(g/g') ] / [ 1 - (g/g')·( (g''/g') - (g'''/g')·(g/g')/6 ) ]
//
// Denoting  newton:=-(g/g'), halley:=(g''/g'), and hh3:=(g'''/g'), this reads
//
//     s_n+1  =  s_n  +  newton · [ 1 + halley·newton/2 ] / [ 1 + newton·( halley + hh3·newton/6 ) ]
//
//
// NOTE that this function returns 0 when beta<intrinsic without any safety checks.
//
double unchecked_normalised_implied_volatility_from_a_transformed_rational_guess_with_limited_iterations(double beta, double x, double q /* q=±1 */, int N) {
    // Subtract intrinsic.
    if (q * x > 0) {
        beta = fabs(std::max(beta - normalised_intrinsic(x, q), 0.));
        q = -q;
    }
    // Map puts to calls
    if (q < 0) {
        x = -x;
        q = -q;
    }
    if (beta <= 0) // For negative or zero prices we return 0.
        return implied_volatility_output(0, 0);
    if (beta < DENORMALISATION_CUTOFF) // For positive but denormalised (a.k.a. 'subnormal') prices, we return 0 since it would be impossible to converge to full machine accuracy anyway.
        return implied_volatility_output(0, 0);
    const double b_max = exp(0.5 * x);
    if (beta >= b_max)
        return implied_volatility_output(0, VOLATILITY_VALUE_TO_SIGNAL_PRICE_IS_ABOVE_MAXIMUM);
    int iterations = 0, direction_reversal_count = 0;
    double f = -DBL_MAX, s = -DBL_MAX, ds = s, ds_previous = 0, s_left = DBL_MIN, s_right = DBL_MAX;
    // The temptation is great to use the optimised form b_c = exp(x/2)/2-exp(-x/2)·Phi(sqrt(-2·x)) but that would require implementing all of the above types of round-off and over/underflow handling for this expression, too.
    const double s_c = sqrt(fabs(2 * x)), b_c = normalised_black_call(x, s_c), v_c = normalised_vega(x, s_c);
    // Four branches.
    if (beta < b_c) {
        const double s_l = s_c - b_c / v_c, b_l = normalised_black_call(x, s_l);
        if (beta < b_l) {
            double f_lower_map_l, d_f_lower_map_l_d_beta, d2_f_lower_map_l_d_beta2;
            compute_f_lower_map_and_first_two_derivatives(x, s_l, f_lower_map_l, d_f_lower_map_l_d_beta, d2_f_lower_map_l_d_beta2);
            const double r_ll = convex_rational_cubic_control_parameter_to_fit_second_derivative_at_right_side(0., b_l, 0., f_lower_map_l, 1., d_f_lower_map_l_d_beta, d2_f_lower_map_l_d_beta2, true);
            f = rational_cubic_interpolation(beta, 0., b_l, 0., f_lower_map_l, 1., d_f_lower_map_l_d_beta, r_ll);
            if (!(f > 0)) { // This can happen due to roundoff truncation for extreme values such as |x|>500.
                // We switch to quadratic interpolation using f(0)≡0, f(b_l), and f'(0)≡1 to specify the quadratic.
                const double t = beta / b_l;
                f = (f_lower_map_l * t + b_l * (1 - t)) * t;
            }
            s = inverse_f_lower_map(x, f);
            s_right = s_l;
            //
            // In this branch, which comprises the lowest segment, the objective function is
            //     g(s) = 1/ln(b(x,s)) - 1/ln(beta)
            //          ≡ 1/ln(b(s)) - 1/ln(beta)
            // This makes
            //              g'               =   -b'/(b·ln(b)²)
            //              newton = -g/g'   =   (ln(beta)-ln(b))·ln(b)/ln(beta)·b/b'
            //              halley = g''/g'  =   b''/b'  -  b'/b·(1+2/ln(b))
            //              hh3    = g'''/g' =   b'''/b' +  2(b'/b)²·(1+3/ln(b)·(1+1/ln(b)))  -  3(b''/b)·(1+2/ln(b))
            //
            // The Householder(3) iteration is
            //     s_n+1  =  s_n  +  newton · [ 1 + halley·newton/2 ] / [ 1 + newton·( halley + hh3·newton/6 ) ]
            //
            for (; iterations < N && fabs(ds) > DBL_EPSILON * s; ++iterations) {
                if (ds * ds_previous < 0)
                    ++direction_reversal_count;
                if (iterations > 0 && (3 == direction_reversal_count || !(s > s_left && s < s_right))) {
                    // If looping inefficently, or the forecast step takes us outside the bracket, or onto its edges, switch to binary nesting.
                    // NOTE that this can only really happen for very extreme values of |x|, such as |x| = |ln(F/K)| > 500.
                    s = 0.5 * (s_left + s_right);
                    if (s_right - s_left <= DBL_EPSILON * s)
                        break;
                    direction_reversal_count = 0;
                    ds = 0;
                }
                ds_previous = ds;
                const double b = normalised_black_call(x, s), bp = normalised_vega(x, s);
                if (b > beta && s < s_right)
                    s_right = s;
                else if (b < beta && s > s_left)
                    s_left = s;        // Tighten the bracket if applicable.
                if (b <= 0 || bp <= 0) // Numerical underflow. Switch to binary nesting for this iteration.
                    ds = 0.5 * (s_left + s_right) - s;
                else {
                    const double ln_b = log(b), ln_beta = log(beta), bpob = bp / b, h = x / s, b_halley = h * h / s - s / 4, newton = (ln_beta - ln_b) * ln_b / ln_beta / bpob, halley = b_halley - bpob * (1 + 2 / ln_b);
                    const double b_hh3 = b_halley * b_halley - 3 * square(h / s) - 0.25, hh3 = b_hh3 + 2 * square(bpob) * (1 + 3 / ln_b * (1 + 1 / ln_b)) - 3 * b_halley * bpob * (1 + 2 / ln_b);
                    ds = newton * householder_factor(newton, halley, hh3);
                }
                s += ds = std::max(-0.5 * s, ds);
            }
            return implied_volatility_output(iterations, s);
        } else {
            const double v_l = normalised_vega(x, s_l), r_lm = convex_rational_cubic_control_parameter_to_fit_second_derivative_at_right_side(b_l, b_c, s_l, s_c, 1 / v_l, 1 / v_c, 0.0, false);
            s = rational_cubic_interpolation(beta, b_l, b_c, s_l, s_c, 1 / v_l, 1 / v_c, r_lm);
            s_left = s_l;
            s_right = s_c;
        }
    } else {
        const double s_h = v_c > DBL_MIN ? s_c + (b_max - b_c) / v_c : s_c, b_h = normalised_black_call(x, s_h);
        if (beta <= b_h) {
            const double v_h = normalised_vega(x, s_h), r_hm = convex_rational_cubic_control_parameter_to_fit_second_derivative_at_left_side(b_c, b_h, s_c, s_h, 1 / v_c, 1 / v_h, 0.0, false);
            s = rational_cubic_interpolation(beta, b_c, b_h, s_c, s_h, 1 / v_c, 1 / v_h, r_hm);
            s_left = s_c;
            s_right = s_h;
        } else {
            double f_upper_map_h, d_f_upper_map_h_d_beta, d2_f_upper_map_h_d_beta2;
            compute_f_upper_map_and_first_two_derivatives(x, s_h, f_upper_map_h, d_f_upper_map_h_d_beta, d2_f_upper_map_h_d_beta2);
            if (d2_f_upper_map_h_d_beta2 > -SQRT_DBL_MAX && d2_f_upper_map_h_d_beta2 < SQRT_DBL_MAX) {
                const double r_hh = convex_rational_cubic_control_parameter_to_fit_second_derivative_at_left_side(b_h, b_max, f_upper_map_h, 0., d_f_upper_map_h_d_beta, -0.5, d2_f_upper_map_h_d_beta2, true);
                f = rational_cubic_interpolation(beta, b_h, b_max, f_upper_map_h, 0., d_f_upper_map_h_d_beta, -0.5, r_hh);
            }
            if (f <= 0) {
                const double h = b_max - b_h, t = (beta - b_h) / h;
                f = (f_upper_map_h * (1 - t) + 0.5 * h * t) * (1 - t); // We switch to quadratic interpolation using f(b_h), f(b_max)≡0, and f'(b_max)≡-1/2 to specify the quadratic.
            }
            s = inverse_f_upper_map(f);
            s_left = s_h;
            if (beta > 0.5 * b_max) { // Else we better drop through and let the objective function be g(s) = b(x,s)-beta.
                //
                // In this branch, which comprises the upper segment, the objective function is
                //     g(s) = ln(b_max-beta)-ln(b_max-b(x,s))
                //          ≡ ln((b_max-beta)/(b_max-b(s)))
                // This makes
                //              g'               =   b'/(b_max-b)
                //              newton = -g/g'   =   ln((b_max-b)/(b_max-beta))·(b_max-b)/b'
                //              halley = g''/g'  =   b''/b'  +  b'/(b_max-b)
                //              hh3    = g'''/g' =   b'''/b' +  g'·(2g'+3b''/b')
                // and the iteration is
                //     s_n+1  =  s_n  +  newton · [ 1 + halley·newton/2 ] / [ 1 + newton·( halley + hh3·newton/6 ) ].
                //
                for (; iterations < N && fabs(ds) > DBL_EPSILON * s; ++iterations) {
                    if (ds * ds_previous < 0)
                        ++direction_reversal_count;
                    if (iterations > 0 && (3 == direction_reversal_count || !(s > s_left && s < s_right))) {
                        // If looping inefficently, or the forecast step takes us outside the bracket, or onto its edges, switch to binary nesting.
                        // NOTE that this can only really happen for very extreme values of |x|, such as |x| = |ln(F/K)| > 500.
                        s = 0.5 * (s_left + s_right);
                        if (s_right - s_left <= DBL_EPSILON * s)
                            break;
                        direction_reversal_count = 0;
                        ds = 0;
                    }
                    ds_previous = ds;
                    const double b = normalised_black_call(x, s), bp = normalised_vega(x, s);
                    if (b > beta && s < s_right)
                        s_right = s;
                    else if (b < beta && s > s_left)
                        s_left = s;                  // Tighten the bracket if applicable.
                    if (b >= b_max || bp <= DBL_MIN) // Numerical underflow. Switch to binary nesting for this iteration.
                        ds = 0.5 * (s_left + s_right) - s;
                    else {
                        const double b_max_minus_b = b_max - b, g = log((b_max - beta) / b_max_minus_b), gp = bp / b_max_minus_b;
                        const double b_halley = square(x / s) / s - s / 4, b_hh3 = b_halley * b_halley - 3 * square(x / (s * s)) - 0.25;
                        const double newton = -g / gp, halley = b_halley + gp, hh3 = b_hh3 + gp * (2 * gp + 3 * b_halley);
                        ds = newton * householder_factor(newton, halley, hh3);
                    }
                    s += ds = std::max(-0.5 * s, ds);
                }
                return implied_volatility_output(iterations, s);
            }
        }
    }
    // In this branch, which comprises the two middle segments, the objective function is g(s) = b(x,s)-beta, or g(s) = b(s) - beta, for short.
    // This makes
    //              newton = -g/g'   =  -(b-beta)/b'
    //              halley = g''/g'  =    b''/b'    =  x²/s³-s/4
    //              hh3    = g'''/g' =    b'''/b'   =  halley² - 3·(x/s²)² - 1/4
    // and the iteration is
    //     s_n+1  =  s_n  +  newton · [ 1 + halley·newton/2 ] / [ 1 + newton·( halley + hh3·newton/6 ) ].
    //
    for (; iterations < N && fabs(ds) > DBL_EPSILON * s; ++iterations) {
        if (ds * ds_previous < 0)
            ++direction_reversal_count;
        if (iterations > 0 && (3 == direction_reversal_count || !(s > s_left && s < s_right))) {
            // If looping inefficently, or the forecast step takes us outside the bracket, or onto its edges, switch to binary nesting.
            // NOTE that this can only really happen for very extreme values of |x|, such as |x| = |ln(F/K)| > 500.
            s = 0.5 * (s_left + s_right);
            if (s_right - s_left <= DBL_EPSILON * s)
                break;
            direction_reversal_count = 0;
            ds = 0;
        }
        ds_previous = ds;
        const double b = normalised_black_call(x, s), bp = normalised_vega(x, s);
        if (b > beta && s < s_right)
            s_right = s;
        else if (b < beta && s > s_left)
            s_left = s; // Tighten the bracket if applicable.
        const double newton = (beta - b) / bp, halley = square(x / s) / s - s / 4, hh3 = halley * halley - 3 * square(x / (s * s)) - 0.25;
        s += ds = std::max(-0.5 * s, newton * householder_factor(newton, halley, hh3));
    }
    return implied_volatility_output(iterations, s);
}

EXPORT_EXTERN_C double implied_volatility_from_a_transformed_rational_guess_with_limited_iterations(double price, double F, double K, double T, double q /* q=±1 */, int N) {
    const double intrinsic = fabs(std::max((q < 0 ? K - F : F - K), 0.0));
    if (price < intrinsic)
        return implied_volatility_output(0, VOLATILITY_VALUE_TO_SIGNAL_PRICE_IS_BELOW_INTRINSIC);
    const double max_price = (q < 0 ? K : F);
    if (price >= max_price)
        return implied_volatility_output(0, VOLATILITY_VALUE_TO_SIGNAL_PRICE_IS_ABOVE_MAXIMUM);
    const double x = log(F / K);
    // Map in-the-money to out-of-the-money
    if (q * x > 0) {
        price = fabs(std::max(price - intrinsic, 0.0));
        q = -q;
    }
    return unchecked_normalised_implied_volatility_from_a_transformed_rational_guess_with_limited_iterations(price / (sqrt(F) * sqrt(K)), x, q, N) / sqrt(T);
}

EXPORT_EXTERN_C double implied_volatility_from_a_transformed_rational_guess(double price, double F, double K, double T, double q /* q=±1 */) {
    return implied_volatility_from_a_transformed_rational_guess_with_limited_iterations(price, F, K, T, q, implied_volatility_maximum_iterations.data);
}

EXPORT_EXTERN_C double normalised_implied_volatility_from_a_transformed_rational_guess_with_limited_iterations(double beta, double x, double q /* q=±1 */, int N) {
    // Map in-the-money to out-of-the-money
    if (q * x > 0) {
        beta -= normalised_intrinsic(x, q);
        q = -q;
    }
    if (beta < 0)
        return implied_volatility_output(0, VOLATILITY_VALUE_TO_SIGNAL_PRICE_IS_BELOW_INTRINSIC);
    return unchecked_normalised_implied_volatility_from_a_transformed_rational_guess_with_limited_iterations(beta, x, q, N);
}

EXPORT_EXTERN_C double normalised_implied_volatility_from_a_transformed_rational_guess(double beta, double x, double q /* q=±1 */) {
    return normalised_implied_volatility_from_a_transformed_rational_guess_with_limited_iterations(beta, x, q, implied_volatility_maximum_iterations.data);
}