atelier_quant 0.0.12

Quantitative Finance Tools & Models for the atelier-rs engine
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
//! Maximum Likelihood Estimation for the univariate Hawkes process.
//!
//! # Model
//!
//! Univariate Hawkes process with exponential kernel:
//!
//! $$
//!   \lambda(t) = \mu + \alpha \sum_{t_j < t} e^{-\beta(t - t_j)}
//! $$
//!
//! # Log-Likelihood
//!
//! $$
//!   \ell(\mu, \alpha, \beta) = \sum_i \ln \lambda(t_i)
//!     - \mu T
//!     - \frac{\alpha}{\beta} \sum_i \bigl(1 - e^{-\beta(T - t_i)}\bigr)
//! $$
//!
//! Evaluated in $O(n)$ via the auxiliary recursion:
//!
//! $$
//!   A_i = e^{-\beta(t_i - t_{i-1})} \cdot (1 + A_{i-1}), \quad A_0 = 0
//! $$
//!
//! # Optimization
//!
//! Gradient ascent with analytical gradients, Armijo backtracking line
//! search, and parameter projection to maintain the feasibility set
//! $\mu > 0,\; \alpha > 0,\; \beta > 0,\; \alpha / \beta < 1$.

use serde::{Deserialize, Serialize};

use super::errors::HawkesError;

// ── Configuration ────────────────────────────────────────────────────

/// Configuration for the Hawkes MLE optimizer.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct HawkesEstimationConfig {
    /// Maximum number of gradient ascent iterations.
    pub max_iter: usize,

    /// Convergence tolerance on the gradient infinity-norm.
    pub tol: f64,

    /// Initial step size for line search.
    pub learning_rate: f64,

    /// Optional initial parameter guess `(μ₀, α₀, β₀)`.
    /// If `None`, a heuristic based on the data is used.
    pub initial_params: Option<(f64, f64, f64)>,
}

impl Default for HawkesEstimationConfig {
    fn default() -> Self {
        Self {
            max_iter: 1_000,
            tol: 1e-3,
            learning_rate: 1e-3,
            initial_params: None,
        }
    }
}

// ── Result ───────────────────────────────────────────────────────────

/// Output of the Hawkes MLE estimation.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct HawkesEstimationResult {
    /// Estimated baseline intensity.
    pub mu: f64,
    /// Estimated excitation factor.
    pub alpha: f64,
    /// Estimated decay rate.
    pub beta: f64,
    /// Log-likelihood at the optimum.
    pub log_likelihood: f64,
    /// Number of iterations executed.
    pub iterations: usize,
    /// Whether the optimizer converged within tolerance.
    pub converged: bool,
    /// Branching ratio α/β (must be < 1 for stationarity).
    pub branching_ratio: f64,
    /// Akaike Information Criterion: 2k − 2ℓ (k=3).
    pub aic: f64,
    /// Bayesian Information Criterion: k·ln(n) − 2ℓ.
    pub bic: f64,
}

// ── Core functions ───────────────────────────────────────────────────

/// Minimum parameter value to avoid log(0) and division by zero.
const PARAM_EPS: f64 = 1e-12;

/// Maximum branching ratio allowed during optimization.
const MAX_BRANCHING: f64 = 0.9999;

/// Compute the log-likelihood of a univariate Hawkes process with
/// exponential kernel, given event times and parameters.
///
/// Uses the O(n) recursive formulation.
///
/// # Arguments
///
/// * `mu` - Baseline intensity (> 0).
/// * `alpha` - Excitation factor (> 0).
/// * `beta` - Decay rate (> 0).
/// * `events` - Sorted event times (in any consistent time unit).
///
/// # Returns
///
/// The log-likelihood value, or `f64::NEG_INFINITY` if any intensity is ≤ 0.
pub fn log_likelihood(mu: f64, alpha: f64, beta: f64, events: &[f64]) -> f64 {
    let n = events.len();
    if n == 0 {
        return 0.0;
    }

    let t_max = events[n - 1];
    let mut ll = -(mu * t_max);

    // Recursive auxiliary A_i
    let mut a = 0.0_f64;

    for i in 0..n {
        if i > 0 {
            let dt = events[i] - events[i - 1];
            a = (-beta * dt).exp() * (1.0 + a);
        }

        let intensity = mu + alpha * a;
        if intensity <= 0.0 {
            return f64::NEG_INFINITY;
        }
        ll += intensity.ln();

        // Compensator contribution for this event
        let remaining = t_max - events[i];
        ll -= (alpha / beta) * (1.0 - (-beta * remaining).exp());
    }

    ll
}

/// Compute the analytical gradient of the log-likelihood.
///
/// Returns `(∂ℓ/∂μ, ∂ℓ/∂α, ∂ℓ/∂β)`.
///
/// Uses the recursive auxiliaries:
///   A(i) = exp(-β·Δt) · (1 + A(i-1))
///   B(i) = -(Δt · exp(-β·Δt) · (1 + A(i-1)) + exp(-β·Δt) · B(i-1))
pub fn gradient(mu: f64, alpha: f64, beta: f64, events: &[f64]) -> (f64, f64, f64) {
    let n = events.len();
    if n == 0 {
        return (0.0, 0.0, 0.0);
    }

    let t_max = events[n - 1];

    let mut dl_dmu = -t_max;
    let mut dl_dalpha = 0.0_f64;
    let mut dl_dbeta = 0.0_f64;

    let mut a = 0.0_f64; // A(i) recursion
    let mut b = 0.0_f64; // B(i) recursion (∂A/∂β)

    for i in 0..n {
        if i > 0 {
            let dt = events[i] - events[i - 1];
            let exp_neg = (-beta * dt).exp();
            let one_plus_a = 1.0 + a;

            // B must be computed before A is updated
            //   B_i = −Δt·exp(−β·Δt)·(1+A_{i−1}) + exp(−β·Δt)·B_{i−1}
            b = -dt * exp_neg * one_plus_a + exp_neg * b;
            a = exp_neg * one_plus_a;
        }

        let intensity = mu + alpha * a;
        if intensity <= PARAM_EPS {
            continue; // skip degenerate points
        }
        let inv_lambda = 1.0 / intensity;

        // ∂ℓ/∂μ += 1/λ(tᵢ)
        dl_dmu += inv_lambda;

        // ∂ℓ/∂α += A(i)/λ(tᵢ)
        dl_dalpha += a * inv_lambda;

        // ∂ℓ/∂β += α·B(i)/λ(tᵢ)
        dl_dbeta += alpha * b * inv_lambda;

        // Compensator gradient contributions
        let remaining = t_max - events[i];
        let exp_rem = (-beta * remaining).exp();

        // ∂/∂α of compensator term: -(1/β)(1 - exp(-β·r))
        dl_dalpha -= (1.0 / beta) * (1.0 - exp_rem);

        // ∂/∂β of compensator term:
        //   (α/β²)(1 - exp(-β·r)) - (α/β)·r·exp(-β·r)
        dl_dbeta += (alpha / (beta * beta)) * (1.0 - exp_rem);
        dl_dbeta -= (alpha / beta) * remaining * exp_rem;
    }

    (dl_dmu, dl_dalpha, dl_dbeta)
}

/// Project parameters onto the feasible set.
///
/// Ensures μ > 0, α > 0, β > 0, and α/β < MAX_BRANCHING.
#[inline]
pub fn project(mu: &mut f64, alpha: &mut f64, beta: &mut f64) {
    *mu = mu.max(PARAM_EPS);
    *alpha = alpha.max(PARAM_EPS);
    *beta = beta.max(PARAM_EPS);

    // Enforce stationarity: α/β < MAX_BRANCHING
    if *alpha / *beta >= MAX_BRANCHING {
        *alpha = MAX_BRANCHING * *beta;
    }
}

/// Infinity-norm of a 3-vector.
#[inline]
pub fn grad_norm(g: &(f64, f64, f64)) -> f64 {
    g.0.abs().max(g.1.abs()).max(g.2.abs())
}

/// Estimate Hawkes process parameters via Maximum Likelihood.
///
/// Uses projected gradient ascent with Armijo backtracking line search.
///
/// # Arguments
///
/// * `events` - Sorted event times in any consistent unit (e.g. seconds).
///   `(2 spaces)`: Must contain at least 2 events.
/// * `config` - Optimizer configuration (iterations, tolerance, etc.).
///
/// # Returns
///
/// `HawkesEstimationResult` with fitted parameters and diagnostics.
///
/// # Errors
///
/// - `HawkesError::InvalidEventTimes` if events are empty, unsorted, or contain NaN.
/// - `HawkesError::NotConverged` if the gradient norm exceeds `tol` after `max_iter`.
/// - `HawkesError::EstimationFailed` on numerical failure (NaN log-likelihood).
///
pub fn estimate_hawkes_mle(
    events: &[f64],
    config: &HawkesEstimationConfig,
) -> Result<HawkesEstimationResult, HawkesError> {
    // ── Validate input ───────────────────────────────────────────────
    validate_events(events)?;

    let n = events.len();
    let t_max = events[n - 1];
    let t_min = events[0];
    let duration = t_max - t_min;

    if duration <= 0.0 {
        return Err(HawkesError::InvalidEventTimes(
            "All events have the same timestamp".into(),
        ));
    }

    // ── Initial parameters ───────────────────────────────────────────
    let (mut mu, mut alpha, mut beta) = match config.initial_params {
        Some((m, a, b)) => (m, a, b),
        None => {
            // Heuristic that adapts to the time-scale of the data.
            //
            // The empirical rate is λ̂ = n/T.  We split the rate
            // between a baseline μ and the Hawkes self-excitation
            // contribution μ/(1−α/β).
            //
            //   μ₀  = 0.8 · λ̂       (80 % attributed to baseline)
            //   β₀  = 1 / mean_ia    (decay ≈ one interarrival)
            //   α₀  = 0.2 · β₀       (branching ratio = 0.2)
            //
            // This ensures α₀/β₀ = 0.2, well inside the stationarity
            // region regardless of the time unit (ms, s, μs, …).
            let mean_ia = duration / (n as f64 - 1.0);
            let lambda_hat = n as f64 / duration;
            let mu0 = 0.8 * lambda_hat;
            let beta0 = 1.0 / mean_ia;
            let alpha0 = 0.2 * beta0;
            (mu0, alpha0, beta0)
        }
    };

    project(&mut mu, &mut alpha, &mut beta);

    let mut ll = log_likelihood(mu, alpha, beta, events);
    let mut lr = config.learning_rate;
    let mut converged = false;
    let mut iterations = 0;

    // ── Gradient ascent with Armijo line search ──────────────────────
    //
    // We use a *normalised* search direction: d = g / ||g||_inf.
    // This decouples the step size from the gradient magnitude, so the
    // same learning_rate works whether n=10 or n=100_000.

    for iter in 0..config.max_iter {
        iterations = iter + 1;

        let g = gradient(mu, alpha, beta, events);
        let gn = grad_norm(&g);

        if gn < config.tol {
            converged = true;
            break;
        }

        // Normalised direction: each component in [-1, 1]
        let inv_gn = 1.0 / gn;
        let d = (g.0 * inv_gn, g.1 * inv_gn, g.2 * inv_gn);

        // Armijo backtracking line search
        let c1 = 1e-4; // sufficient decrease constant
        let mut step = lr;
        let mut found_step = false;

        for _ in 0..30 {
            let mut mu_new = mu + step * d.0;
            let mut alpha_new = alpha + step * d.1;
            let mut beta_new = beta + step * d.2;
            project(&mut mu_new, &mut alpha_new, &mut beta_new);

            let ll_new = log_likelihood(mu_new, alpha_new, beta_new, events);

            // Armijo condition: f(x + step·d) ≥ f(x) + c1·step·(g·d)
            let directional =
                g.0 * (mu_new - mu) + g.1 * (alpha_new - alpha) + g.2 * (beta_new - beta);

            if ll_new.is_finite() && ll_new >= ll + c1 * directional {
                mu = mu_new;
                alpha = alpha_new;
                beta = beta_new;
                ll = ll_new;
                found_step = true;

                // Adaptive step size: grow cautiously on success
                lr = (step * 1.2).min(1.0);
                break;
            }

            step *= 0.5;
        }

        if !found_step {
            // Line search failed — shrink global learning rate and retry
            lr *= 0.1;
            if lr < 1e-15 {
                // Effectively stuck
                break;
            }
        }

        if !ll.is_finite() {
            return Err(HawkesError::EstimationFailed(
                "Log-likelihood became NaN/Inf during optimization".into(),
            ));
        }
    }

    // ── Package results ──────────────────────────────────────────────
    let branching_ratio = alpha / beta;
    let k = 3.0_f64; // number of parameters
    let n_f = n as f64;
    let aic = 2.0 * k - 2.0 * ll;
    let bic = k * n_f.ln() - 2.0 * ll;

    if !converged {
        let final_grad = gradient(mu, alpha, beta, events);
        let gn = grad_norm(&final_grad);

        // If gradient is reasonably small, consider it "close enough"
        if gn < config.tol {
            converged = true;
        } else {
            return Err(HawkesError::NotConverged {
                iterations,
                gradient_norm: gn,
            });
        }
    }

    Ok(HawkesEstimationResult {
        mu,
        alpha,
        beta,
        log_likelihood: ll,
        iterations,
        converged,
        branching_ratio,
        aic,
        bic,
    })
}

/// Compute the A_i and B_i recursion arrays for testing/diagnostics.
///
/// Returns `(a_values, b_values)` where:
/// - `a_values[i]` = $\sum_{j < i} e^{-\beta(t_i - t_j)}$ (the Ozaki recursion)
/// - `b_values[i]` = $\partial A_i / \partial \beta$
///
/// This is exposed for testing the recursion against brute-force computation.
pub fn compute_recursion_arrays(beta: f64, events: &[f64]) -> (Vec<f64>, Vec<f64>) {
    let n = events.len();
    let mut a_vals = Vec::with_capacity(n);
    let mut b_vals = Vec::with_capacity(n);

    let mut a = 0.0_f64;
    let mut b = 0.0_f64;

    for i in 0..n {
        if i > 0 {
            let dt = events[i] - events[i - 1];
            let exp_neg = (-beta * dt).exp();
            let one_plus_a = 1.0 + a;
            b = -dt * exp_neg * one_plus_a + exp_neg * b;
            a = exp_neg * one_plus_a;
        }
        a_vals.push(a);
        b_vals.push(b);
    }

    (a_vals, b_vals)
}

/// Validate event times for MLE input.
///
/// Ensures at least 2 events, all finite, and strictly increasing.
pub fn validate_events(events: &[f64]) -> Result<(), HawkesError> {
    if events.len() < 2 {
        return Err(HawkesError::InvalidEventTimes(
            "Need at least 2 events for estimation".into(),
        ));
    }

    for (i, &t) in events.iter().enumerate() {
        if !t.is_finite() {
            return Err(HawkesError::InvalidEventTimes(format!(
                "Event at index {} is not finite: {}",
                i, t
            )));
        }
    }

    for i in 1..events.len() {
        if events[i] <= events[i - 1] {
            return Err(HawkesError::InvalidEventTimes(format!(
                "Events not strictly increasing at index {}: {} <= {}",
                i,
                events[i],
                events[i - 1]
            )));
        }
    }

    Ok(())
}

/// Compute the compensator (integrated intensity) over `[0, T]`.
///
/// $$
///   \Lambda(T) = \mu T + \frac{\alpha}{\beta} \sum_i
///     \bigl(1 - e^{-\beta(T - t_i)}\bigr)
/// $$
///
/// Useful for time-rescaling residual analysis: if the model is correctly
/// specified, the transformed inter-event intervals
/// $\Lambda(t_{i+1}) - \Lambda(t_i)$ should be i.i.d. Exp(1).
pub fn compensator(mu: f64, alpha: f64, beta: f64, events: &[f64], t: f64) -> f64 {
    let mut comp = mu * t;

    for &ti in events {
        if ti >= t {
            break;
        }
        comp += (alpha / beta) * (1.0 - (-beta * (t - ti)).exp());
    }

    comp
}

/// Compute time-rescaling residuals for goodness-of-fit assessment.
///
/// Returns the transformed inter-event intervals
/// `Λ(t_{i+1}) − Λ(t_i)` which should be i.i.d. Exp(1) if the
/// model is correctly specified (time-rescaling theorem).
///
/// # Arguments
///
/// * `mu`, `alpha`, `beta` - Fitted Hawkes parameters.
/// * `events` - The event times used for fitting.
///
/// # Returns
///
/// A vector of rescaled intervals (length `n-1`).
pub fn time_rescaling_residuals(
    mu: f64,
    alpha: f64,
    beta: f64,
    events: &[f64],
) -> Vec<f64> {
    let n = events.len();
    if n < 2 {
        return vec![];
    }

    let mut residuals = Vec::with_capacity(n - 1);

    // Compute compensator at each event time
    let mut comp_prev = compensator(mu, alpha, beta, events, events[0]);

    for i in 1..n {
        let comp_curr = compensator(mu, alpha, beta, events, events[i]);
        residuals.push(comp_curr - comp_prev);
        comp_prev = comp_curr;
    }

    residuals
}