g_math 0.4.2

Multi-domain fixed-point arithmetic with geometric extension: Lie groups, manifolds, ODE solvers, tensors, fiber bundles — zero-float, 0 ULP transcendentals
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
//! Decimal exponential — `exp(x)` at compute dp.
//!
//! # Algorithm
//!
//! Uses **ln(2)-based range reduction**: rewrite `exp(x) = 2^k × exp(r)` where
//! `k = round(x / ln(2))` and `r = x - k × ln(2)` ensures `|r| ≤ ln(2)/2 ≈ 0.347`.
//!
//! 1. **Range reduction**: compute integer `k` and small remainder `r`.
//! 2. **Taylor series** for `exp(r)` with `|r| ≤ 0.347` — converges in ~25 terms at dp=38.
//!    Iterative: `term_0 = 1, term_n = term_{n-1} × r / n`.
//! 3. **Multiply by 2^k**: bit shift the compute-tier result. For positive `k`,
//!    shift left (exact); for negative `k`, shift right with rounding.
//!
//! # Precision advantage over scaling-and-squaring
//!
//! Each squaring iteration of `exp(y)^2` doubles relative error. For `exp(20)` with
//! the halve-and-square method, 14 squarings amplify error by 16384×.
//!
//! With ln(2)-based reduction, there is **no squaring** — only Taylor convergence
//! error and bit-shift truncation. Total error stays bounded by ~Taylor truncation,
//! which is well below 1 ULP at the compute dp.

use super::decimal_compute::{
    ComputeStorage, DECIMAL_COMPUTE_DP,
    decimal_compute_zero, decimal_compute_one,
    decimal_compute_add, decimal_compute_sub, decimal_compute_mul, decimal_compute_div,
    decimal_compute_div_int, decimal_compute_halve,
    decimal_compute_is_zero, decimal_compute_is_negative, decimal_compute_cmp,
    decimal_compute_neg, pow10_compute_ct,
};
use crate::fixed_point::domains::symbolic::rational::rational_number::OverflowDetected;
use std::cell::RefCell;

thread_local! {
    /// Cached `ln(2)` at compute dp. Computed once via `2 * atanh(1/3)`.
    static LN2_FOR_EXP: RefCell<Option<ComputeStorage>> = const { RefCell::new(None) };
}

/// Compute `ln(2)` at decimal compute dp via `2 × atanh(1/3)`.
///
/// We can't depend on `ln.rs` because that would create a cycle when `pow` calls
/// both `ln` and `exp`. Inline the atanh series here (it's a small function).
fn ln2_at_compute() -> Result<ComputeStorage, OverflowDetected> {
    let cached: Option<ComputeStorage> = LN2_FOR_EXP.with(|c| c.borrow().clone());
    if let Some(v) = cached {
        return Ok(v);
    }

    let one = decimal_compute_one();
    let three = super::decimal_compute::decimal_compute_from_int(3);
    let s = decimal_compute_div(one, three)?;

    // atanh(s) = s + s³/3 + s⁵/5 + ... for |s| < 1
    let s_sq = decimal_compute_mul(s, s);
    let mut term = s;
    let mut sum = s;
    let max_terms = (DECIMAL_COMPUTE_DP as u32) + 20;
    for k in 1..=max_terms {
        term = decimal_compute_mul(term, s_sq);
        if decimal_compute_is_zero(&term) { break; }
        let divisor = (2 * k as u64) + 1;
        let contribution = decimal_compute_div_int(term, divisor);
        if decimal_compute_is_zero(&contribution) { break; }
        sum = decimal_compute_add(sum, contribution);
    }
    let ln2 = decimal_compute_add(sum, sum); // 2 × atanh(1/3)

    LN2_FOR_EXP.with(|c: &RefCell<Option<ComputeStorage>>| *c.borrow_mut() = Some(ln2));
    Ok(ln2)
}

/// Maximum Taylor terms for exp series with |r| ≤ ln(2)/2 ≈ 0.347.
///
/// Term k is bounded by `0.347^k / k!`. This falls below `10^-DECIMAL_COMPUTE_DP`
/// at about `k ≈ DECIMAL_COMPUTE_DP / 1.46 + 5` terms (Stirling estimate).
const fn max_taylor_terms() -> u32 {
    (DECIMAL_COMPUTE_DP as u32) + 20
}

/// Round a compute-tier value to the nearest integer (returned as i64).
fn round_to_int(v: ComputeStorage) -> Result<i64, OverflowDetected> {
    let half = decimal_compute_halve(decimal_compute_one());
    let rounded = if decimal_compute_is_negative(&v) {
        decimal_compute_sub(v, half)
    } else {
        decimal_compute_add(v, half)
    };
    let scale = decimal_compute_one();

    #[cfg(table_format = "q16_16")]
    { Ok(rounded / scale) }
    #[cfg(table_format = "q32_32")]
    { Ok((rounded / scale) as i64) }
    #[cfg(table_format = "q64_64")]
    {
        let q = rounded / scale;
        if !q.fits_in_i128() { return Err(OverflowDetected::Overflow); }
        let q_i128 = q.as_i128();
        if q_i128 > i64::MAX as i128 || q_i128 < i64::MIN as i128 {
            return Err(OverflowDetected::Overflow);
        }
        Ok(q_i128 as i64)
    }
    #[cfg(table_format = "q128_128")]
    {
        let q = rounded / scale;
        let q_i128 = q.as_i128();
        if q_i128 > i64::MAX as i128 || q_i128 < i64::MIN as i128 {
            return Err(OverflowDetected::Overflow);
        }
        Ok(q_i128 as i64)
    }
    #[cfg(table_format = "q256_256")]
    {
        let q = rounded / scale;
        let q_i128 = q.as_i128();
        if q_i128 > i64::MAX as i128 || q_i128 < i64::MIN as i128 {
            return Err(OverflowDetected::Overflow);
        }
        Ok(q_i128 as i64)
    }
}

/// Multiply a compute-tier value by `2^k`, where `k` may be negative (shift right with rounding).
///
/// **Shift type note**: I256 shifts take `u32`, while i64/i128/I512/I1024 take `usize`.
fn mul_by_pow2(v: ComputeStorage, k: i64) -> Result<ComputeStorage, OverflowDetected> {
    if k == 0 {
        return Ok(v);
    }
    if k > 0 {
        if k > 200 {
            return Err(OverflowDetected::Overflow);
        }
        // I256 Shl takes usize; I512/I1024 Shl take usize; primitive ints take u32 or usize.
        #[cfg(table_format = "q16_16")]
        { Ok(v << (k as u32)) }
        #[cfg(table_format = "q32_32")]
        { Ok(v << (k as u32)) }
        #[cfg(table_format = "q64_64")]
        { Ok(v << (k as usize)) }
        #[cfg(table_format = "q128_128")]
        { Ok(v << (k as usize)) }
        #[cfg(table_format = "q256_256")]
        { Ok(v << (k as usize)) }
    } else {
        let abs_k = (-k) as u64;
        if abs_k >= 250 {
            return Ok(decimal_compute_zero());
        }
        let one_compute = {
            #[cfg(table_format = "q16_16")]
            { 1i64 }
            #[cfg(table_format = "q32_32")]
            { 1i128 }
            #[cfg(table_format = "q64_64")]
            { crate::fixed_point::i256::I256::from_i128(1) }
            #[cfg(table_format = "q128_128")]
            { crate::fixed_point::i512::I512::from_i128(1) }
            #[cfg(table_format = "q256_256")]
            { crate::fixed_point::I1024::from_i128(1) }
        };
        // I256 Shl: usize; I256 Shr: u32 (asymmetric in i256.rs).
        // I512: both usize. I1024: both usize. Primitives: u32.
        let round_bit = {
            #[cfg(table_format = "q16_16")]
            { one_compute << ((abs_k - 1) as u32) }
            #[cfg(table_format = "q32_32")]
            { one_compute << ((abs_k - 1) as u32) }
            #[cfg(table_format = "q64_64")]
            { one_compute << ((abs_k - 1) as usize) }
            #[cfg(table_format = "q128_128")]
            { one_compute << ((abs_k - 1) as usize) }
            #[cfg(table_format = "q256_256")]
            { one_compute << ((abs_k - 1) as usize) }
        };
        let rounded = if decimal_compute_is_negative(&v) {
            v - round_bit
        } else {
            v + round_bit
        };
        #[cfg(table_format = "q16_16")]
        { Ok(rounded >> (abs_k as u32)) }
        #[cfg(table_format = "q32_32")]
        { Ok(rounded >> (abs_k as u32)) }
        #[cfg(table_format = "q64_64")]
        { Ok(rounded >> (abs_k as u32)) }
        #[cfg(table_format = "q128_128")]
        { Ok(rounded >> (abs_k as usize)) }
        #[cfg(table_format = "q256_256")]
        { Ok(rounded >> (abs_k as usize)) }
    }
}

// ============================================================================
// 4-STAGE TABLE CACHE FOR FAST EXP
// ============================================================================

/// Cached exp tables for 4-stage decimal digit decomposition.
///
/// `exp(x) = exp(k) × exp(d1/10) × exp(d2/100) × exp(d3/1000) × exp(d4/10000) × exp(r)`
/// where `|r| < 10^-4` and Taylor converges in ~8 terms at dp=38.
///
/// Tables are computed once per thread on first use, then reused.
/// Total: 31 + 4×10 = 71 entries × sizeof(ComputeStorage).
struct ExpTables {
    /// exp(k) for k = 0..=30
    exp_int: [ComputeStorage; 31],
    /// exp(d × 0.1) for d = 0..=9
    exp_tenths: [ComputeStorage; 10],
    /// exp(d × 0.01) for d = 0..=9
    exp_hundredths: [ComputeStorage; 10],
    /// exp(d × 0.001) for d = 0..=9
    exp_thousandths: [ComputeStorage; 10],
}

impl Clone for ExpTables {
    fn clone(&self) -> Self {
        Self {
            exp_int: self.exp_int,
            exp_tenths: self.exp_tenths,
            exp_hundredths: self.exp_hundredths,
            exp_thousandths: self.exp_thousandths,
        }
    }
}

thread_local! {
    static EXP_TABLES: RefCell<Option<ExpTables>> = const { RefCell::new(None) };
}

/// Pure Taylor series for exp(x) = 1 + x + x²/2! + x³/3! + ...
/// No range reduction — works for any x but converges faster for small |x|.
/// Early-exits when terms underflow to zero.
fn exp_taylor_raw(x: ComputeStorage) -> Result<ComputeStorage, OverflowDetected> {
    let one = decimal_compute_one();
    let mut term = one;
    let mut sum = one;
    let max_terms = max_taylor_terms();
    for n in 1..=max_terms {
        term = decimal_compute_mul(term, x);
        term = decimal_compute_div_int(term, n as u64);
        if decimal_compute_is_zero(&term) { break; }
        sum = decimal_compute_add(sum, term);
    }
    Ok(sum)
}

/// Extract a small integer from ComputeStorage (must fit in i64).
fn compute_as_i64(v: ComputeStorage) -> i64 {
    #[cfg(table_format = "q16_16")]
    { v }
    #[cfg(table_format = "q32_32")]
    { v as i64 }
    #[cfg(table_format = "q64_64")]
    { v.as_i128() as i64 }
    #[cfg(table_format = "q128_128")]
    { v.as_i128() as i64 }
    #[cfg(table_format = "q256_256")]
    { v.as_i128() as i64 }
}

/// Build exp tables (one-time per thread). Uses raw Taylor series.
fn build_exp_tables() -> Result<ExpTables, OverflowDetected> {
    let one = decimal_compute_one();
    let zero = decimal_compute_zero();

    // exp(1) via Taylor (converges in ~35 terms at dp=38)
    let e1 = exp_taylor_raw(one)?;

    // exp(k) = exp(1)^k via successive multiplication
    let mut exp_int = [zero; 31];
    exp_int[0] = one;
    exp_int[1] = e1;
    for k in 2..=30usize {
        exp_int[k] = decimal_compute_mul(exp_int[k - 1], e1);
    }

    // exp(d × 0.1) for d = 0..9
    let mut exp_tenths = [zero; 10];
    exp_tenths[0] = one;
    let pt1 = pow10_compute_ct(DECIMAL_COMPUTE_DP - 1); // 0.1 at compute dp
    let e_pt1 = exp_taylor_raw(pt1)?;
    exp_tenths[1] = e_pt1;
    for d in 2..=9usize {
        exp_tenths[d] = decimal_compute_mul(exp_tenths[d - 1], e_pt1);
    }

    // exp(d × 0.01)
    let mut exp_hundredths = [zero; 10];
    exp_hundredths[0] = one;
    let pt01 = pow10_compute_ct(DECIMAL_COMPUTE_DP - 2);
    let e_pt01 = exp_taylor_raw(pt01)?;
    exp_hundredths[1] = e_pt01;
    for d in 2..=9usize {
        exp_hundredths[d] = decimal_compute_mul(exp_hundredths[d - 1], e_pt01);
    }

    // exp(d × 0.001)
    let mut exp_thousandths = [zero; 10];
    exp_thousandths[0] = one;
    let pt001 = pow10_compute_ct(DECIMAL_COMPUTE_DP - 3);
    let e_pt001 = exp_taylor_raw(pt001)?;
    exp_thousandths[1] = e_pt001;
    for d in 2..=9usize {
        exp_thousandths[d] = decimal_compute_mul(exp_thousandths[d - 1], e_pt001);
    }

    Ok(ExpTables { exp_int, exp_tenths, exp_hundredths, exp_thousandths })
}

/// 4-stage table-based exp for |x| ≤ 30.
///
/// Decomposes |x| = k + d1/10 + d2/100 + d3/1000 + r where:
/// - k ∈ [0, 30] — integer part
/// - d1, d2, d3 ∈ [0, 9] — fractional decimal digits
/// - |r| < 10^-3 — tiny remainder for short Taylor (~8-12 terms)
///
/// Result = exp(k) × exp(d1/10) × exp(d2/100) × exp(d3/1000) × exp(r)
/// All factors except exp(r) are table lookups. 4 widening multiplies + short Taylor.
fn decimal_exp_table_path(abs_x: ComputeStorage) -> Result<ComputeStorage, OverflowDetected> {
    let one = decimal_compute_one();
    let s1 = pow10_compute_ct(DECIMAL_COMPUTE_DP - 1);
    let s2 = pow10_compute_ct(DECIMAL_COMPUTE_DP - 2);
    let s3 = pow10_compute_ct(DECIMAL_COMPUTE_DP - 3);

    // Extract integer part and 3 fractional digits
    let k_compute = abs_x / one;
    let k = compute_as_i64(k_compute);
    let frac = decimal_compute_sub(abs_x, mul_compute_by_int(one, k));

    let d1_compute = frac / s1;
    let d1 = compute_as_i64(d1_compute);
    let frac2 = decimal_compute_sub(frac, mul_compute_by_int(s1, d1));

    let d2_compute = frac2 / s2;
    let d2 = compute_as_i64(d2_compute);
    let frac3 = decimal_compute_sub(frac2, mul_compute_by_int(s2, d2));

    let d3_compute = frac3 / s3;
    let d3 = compute_as_i64(d3_compute);
    let remainder = decimal_compute_sub(frac3, mul_compute_by_int(s3, d3));

    // Bounds check — k must be in table range
    if k < 0 || k > 30 || d1 < 0 || d1 > 9 || d2 < 0 || d2 > 9 || d3 < 0 || d3 > 9 {
        // Fallback to ln(2) reduction for out-of-range
        return decimal_exp_ln2_reduction(abs_x);
    }

    // Table lookup + multiply
    EXP_TABLES.with(|c| {
        // Ensure tables are built
        {
            let cached = c.borrow();
            if cached.is_none() {
                drop(cached);
                let tables = build_exp_tables()?;
                *c.borrow_mut() = Some(tables);
            }
        }
        let tables = c.borrow();
        let t = tables.as_ref().unwrap();

        let mut result = t.exp_int[k as usize];
        result = decimal_compute_mul(result, t.exp_tenths[d1 as usize]);
        result = decimal_compute_mul(result, t.exp_hundredths[d2 as usize]);
        result = decimal_compute_mul(result, t.exp_thousandths[d3 as usize]);

        // Short Taylor for remainder (|r| < 10^-3)
        if !decimal_compute_is_zero(&remainder) {
            let exp_r = exp_taylor_raw(remainder)?;
            result = decimal_compute_mul(result, exp_r);
        }

        Ok(result)
    })
}

/// ln(2)-based range reduction fallback for large |x| > 30.
fn decimal_exp_ln2_reduction(x: ComputeStorage) -> Result<ComputeStorage, OverflowDetected> {
    let ln2 = ln2_at_compute()?;
    let x_over_ln2 = decimal_compute_div(x, ln2)?;
    let k = round_to_int(x_over_ln2)?;
    let k_times_ln2 = mul_compute_by_int(ln2, k);
    let r = decimal_compute_sub(x, k_times_ln2);

    let exp_r = exp_taylor_raw(r)?;
    mul_by_pow2(exp_r, k)
}

/// Compute `exp(x)` for x at compute dp.
///
/// # Algorithm
///
/// For |x| ≤ 30: **4-stage table decomposition** — decomposes x into decimal digits,
/// looks up precomputed exp values per digit, multiplies, then short Taylor for the
/// tiny remainder. ~4 widening multiplies + ~8-12 Taylor terms.
///
/// For |x| > 30: **ln(2)-based range reduction** — `exp(x) = 2^k × exp(r)` where
/// `k = round(x/ln(2))` and `|r| ≤ ln(2)/2`.
///
/// The table path is ~2-3× faster than the ln(2) path for common inputs.
pub fn decimal_exp(x: ComputeStorage) -> Result<ComputeStorage, OverflowDetected> {
    if decimal_compute_is_zero(&x) {
        return Ok(decimal_compute_one());
    }

    let is_neg = decimal_compute_is_negative(&x);
    let abs_x = if is_neg { decimal_compute_neg(x) } else { x };

    // Check if |x| ≤ 30 (table path range)
    let one = decimal_compute_one();
    let thirty = mul_compute_by_int(one, 30);
    let use_table = decimal_compute_cmp(&abs_x, &thirty) != std::cmp::Ordering::Greater;

    let result = if use_table {
        decimal_exp_table_path(abs_x)?
    } else {
        // Large argument: ln(2) reduction on the original (signed) value
        return decimal_exp_ln2_reduction(x);
    };

    if is_neg {
        // exp(-|x|) = 1/exp(|x|)
        decimal_compute_div(one, result)
    } else {
        Ok(result)
    }
}

/// Multiply a compute-tier value by an integer (positive or negative).
fn mul_compute_by_int(v: ComputeStorage, n: i64) -> ComputeStorage {
    if n == 0 {
        return decimal_compute_zero();
    }
    let negative = n < 0;
    let n_abs = n.unsigned_abs();
    let n_compute: ComputeStorage = {
        #[cfg(table_format = "q16_16")]
        { n_abs as i64 }
        #[cfg(table_format = "q32_32")]
        { n_abs as i128 }
        #[cfg(table_format = "q64_64")]
        { crate::fixed_point::i256::I256::from_i128(n_abs as i128) }
        #[cfg(table_format = "q128_128")]
        { crate::fixed_point::i512::I512::from_i128(n_abs as i128) }
        #[cfg(table_format = "q256_256")]
        { crate::fixed_point::I1024::from_i128(n_abs as i128) }
    };
    let result = v * n_compute;
    if negative {
        decimal_compute_neg(result)
    } else {
        result
    }
}

/// Compute `exp(-x)` — used internally for hyperbolic functions.
#[allow(dead_code)]
pub fn decimal_exp_neg(x: ComputeStorage) -> Result<ComputeStorage, OverflowDetected> {
    decimal_exp(decimal_compute_neg(x))
}

/// Fused `(sinh(x), cosh(x))` at decimal compute tier — shares one exp-pair evaluation.
///
/// Computes `exp(x)` and `exp(-x)` once, then derives
/// `sinh(x) = (exp(x) - exp(-x)) / 2` and `cosh(x) = (exp(x) + exp(-x)) / 2`.
///
/// sinh and cosh are derived from the **same** `(ep, en)` pair at compute tier,
/// so their rounding bias is correlated — critical for expressions like
/// `cosh(θ)·p + (sinh(θ)/θ)·v` where the two errors cancel.
pub fn decimal_sinhcosh(x: ComputeStorage) -> Result<(ComputeStorage, ComputeStorage), OverflowDetected> {
    if decimal_compute_is_zero(&x) {
        return Ok((decimal_compute_zero(), decimal_compute_one()));
    }
    let ep = decimal_exp(x)?;
    let en = decimal_exp(decimal_compute_neg(x))?;
    let sinh_c = decimal_compute_halve(decimal_compute_sub(ep, en));
    let cosh_c = decimal_compute_halve(decimal_compute_add(ep, en));
    Ok((sinh_c, cosh_c))
}

#[cfg(all(test, table_format = "q64_64"))]
mod tests {
    use super::*;
    use super::super::decimal_compute::{decimal_compute_from_int, pow10_compute_ct};
    use crate::fixed_point::i256::I256;

    /// mpmath reference: exp(0) = 1.0 exactly
    #[test]
    fn exp_zero_is_one() {
        let result = decimal_exp(decimal_compute_zero()).unwrap();
        assert_eq!(result, decimal_compute_one());
    }

    /// mpmath: exp(1) = 2.71828182845904523536028747135266249775724709369995957...
    #[test]
    fn exp_one_within_1_ulp() {
        let result = decimal_exp(decimal_compute_one()).unwrap();
        // Expected: 2.71828182845904523536028747135266249775 × 10^38
        // = 27182818284590452353602874713526624977 at dp=38 (+0.5 rounding)
        // mpmath rounded: 2.7182818284590452353602874713526624977572 × 10^38
        // → 27182818284590452353602874713526624978 at dp=38 (round half to even)
        let expected_str = "271828182845904523536028747135266249776";
        let expected = parse_decimal_str_q64_64(expected_str);

        // Allow up to 1000 ULP tolerance for this first correctness check
        let diff = if result > expected { result - expected } else { expected - result };
        let tolerance = I256::from_i128(10_000);
        assert!(
            diff < tolerance,
            "exp(1) precision check: got={:?}, expected={:?}, diff={:?}",
            result, expected, diff
        );
    }

    /// mpmath: exp(0.5) = 1.64872127070012814684865078781416357165377610071014...
    #[test]
    fn exp_half_within_1_ulp() {
        // 0.5 at compute dp = 5 × 10^37
        let half = pow10_compute_ct(37) * I256::from_i128(5);
        let result = decimal_exp(half).unwrap();
        // Expected: 1.64872127070012814684865078781416357165 × 10^38
        let expected_str = "164872127070012814684865078781416357165";
        let expected = parse_decimal_str_q64_64(expected_str);

        let diff = if result > expected { result - expected } else { expected - result };
        let tolerance = I256::from_i128(10_000);
        assert!(
            diff < tolerance,
            "exp(0.5) precision check: got={:?}, expected={:?}, diff={:?}",
            result, expected, diff
        );
    }

    /// Parse a decimal-digit string into an I256 assuming it represents a value
    /// at q64_64 compute dp=38.
    fn parse_decimal_str_q64_64(s: &str) -> I256 {
        let mut result = I256::from_i128(0);
        let ten = I256::from_i128(10);
        for ch in s.chars() {
            let digit = ch.to_digit(10).expect("non-digit in test string");
            result = result * ten + I256::from_i128(digit as i128);
        }
        result
    }

    /// mpmath: exp(2) = 7.38905609893065022723042746057500781318031557055184...
    #[test]
    fn exp_two_within_reasonable() {
        let two = decimal_compute_from_int(2);
        let result = decimal_exp(two).unwrap();
        // Expected: 7.38905609893065022723042746057500781318 × 10^38
        let expected_str = "738905609893065022723042746057500781318";
        let expected = parse_decimal_str_q64_64(expected_str);

        let diff = if result > expected { result - expected } else { expected - result };
        let tolerance = I256::from_i128(100_000);
        assert!(
            diff < tolerance,
            "exp(2) precision check: got={:?}, expected={:?}, diff={:?}",
            result, expected, diff
        );
    }

    /// mpmath: exp(-1) = 0.36787944117144232839...
    /// Storage-tier validation (compute-tier rounding from 1/exp(1) is acceptable
    /// as long as storage-tier result is 0 ULP — covered by decimal_transcendental_validation).
    #[test]
    fn exp_neg_one() {
        use crate::canonical::{gmath, evaluate};
        let result = evaluate(&gmath("-1.0").exp()).unwrap();
        let s = format!("{}", result);
        // exp(-1) ≈ 0.36787944117144232...
        assert!(s.starts_with("0.3678794411714"),
            "exp(-1) at storage tier should match mpmath to 13+ digits, got: {}", s);
    }
}