1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
use *;
pub const SURVIVAL_ROW_PARALLEL_THRESHOLD: usize = 256;
pub const SURVIVAL_ROW_PARALLEL_CHUNK: usize = 64;
/// Relative slack tolerating round-off when checking a represented
/// nonnegativity / linear-inequality constraint (`value < -tol·max(1, |scale|)`
/// rejects). A coefficient or slack that is negative only by this much is
/// floating-point noise about an active constraint, not a real violation.
pub const CONSTRAINT_NONNEGATIVITY_REL_TOL: f64 = 1e-10;
/// Absolute feasibility tolerance of the monotone time-derivative cone that the
/// DOWNSTREAM consumers actually enforce — the active-set QP entry gate
/// (`check_linear_feasibility`) and the cone projection
/// (`project_onto_linear_constraints`) both certify feasibility to this `1e-8`
/// (gam#1108/#797). The strictly-interior projection lands with ~1e-6 of margin,
/// so a converged iterate clears this gate with room, but accumulated round-off
/// over an inner solve can leave a binding guard row at slack ~-1e-9..-1e-8 —
/// numerically AT the boundary, not a real violation.
///
/// The post-update sanity check ([`validate_linear_constraints`]) must therefore
/// accept any β the consumer gate accepts: its tolerance is floored at this
/// value so it never rejects a round-off-feasible iterate the rest of the
/// pipeline treats as feasible. Before #1569 it used only the much stricter
/// `CONSTRAINT_NONNEGATIVITY_REL_TOL` (1e-10·scale); once the spectrum-branch
/// α-crush bypass (gam#1569) let the aggressive heteroscedastic survival-LS solve
/// run to its final inner refit, that refit's cone-projected β routinely landed
/// at slack ~-6.6e-9 — feasible to the 1e-8 gate but a hard error at 1e-10 — so
/// the otherwise-converged fit failed on a pure numerical-precision mismatch.
pub const MONOTONE_CONE_FEASIBILITY_GATE_TOL: f64 = 1e-8;
/// Maximum number of Dykstra alternating-projection sweeps when projecting an
/// initial coefficient guess onto the represented linear inequality
/// constraints. The projection converges geometrically; this caps the rare
/// near-degenerate constraint set and keeps the warm-start best-effort.
pub const DYKSTRA_PROJECTION_MAX_SWEEPS: usize = 100;
/// Absolute feasibility tolerance at which the Dykstra projection sweep is
/// declared converged (max constraint violation below this stops the loop).
pub const DYKSTRA_PROJECTION_TOL: f64 = 1e-10;
/// Squared-row-norm floor below which a constraint row is treated as
/// structurally empty and skipped during Dykstra projection (avoids dividing
/// the projection step by a vanishing normal).
pub const DYKSTRA_ROW_DEGENERACY_FLOOR: f64 = 1e-18;
/// Relative tolerance (× the largest |eigenvalue|) for accepting a covariance
/// block as positive semidefinite, floored by an absolute value so an
/// all-tiny-eigenvalue block is not rejected on pure round-off. Eigenvalues
/// below `-tol` flag a genuine indefinite block.
pub const PSD_EIGENVALUE_REL_TOL: f64 = 1e-12;
pub const PSD_EIGENVALUE_ABS_FLOOR: f64 = 1e-14;
/// Levenberg damping schedule for the direct parametric-AFT Newton solve. When
/// the Hessian is not Cholesky-factorizable, damping starts at
/// `INITIAL × max(1, ‖diag H‖∞)`, grows by `GROWTH` per failed factorization,
/// and the solve aborts once it would exceed `MAX × max(1, ‖diag H‖∞)` (the
/// Hessian is then numerically unsalvageable). All three scale with the
/// Hessian's diagonal magnitude so the schedule is units-invariant.
pub const LEVENBERG_INITIAL_DAMPING_REL: f64 = 1e-8;
pub const LEVENBERG_DAMPING_GROWTH: f64 = 10.0;
pub const LEVENBERG_MAX_DAMPING_REL: f64 = 1e8;
/// Relative floor for the coupled-survival SCALE-COUPLED block trust-region
/// metric (issue #1569). The free scale predictor `η_σ` enters the likelihood
/// through the standardized index `u = inv_sigma·(h − η_t)` with
/// `inv_sigma = exp(−η_σ)`, so `∂u/∂η_t = −inv_sigma`: the LOCATION (threshold)
/// and LOG-σ channels carry an `exp(−η_σ)` factor in their gradient and an
/// `exp(−2 η_σ)` factor in their likelihood-Hessian diagonal
/// `Σ_r exp(−2 η_σ,r) X_{rj}²` (the flexible time baseline `h` does NOT — its
/// `∂u/∂h = 1` is scale-free). When `exp(−η_σ)` is large on a few rows the metric
/// is dominated by them for the coefficients they load on, while a coefficient
/// loading mostly on large-σ rows is metric-STARVED. The affine-covariant
/// Moré–Sorensen step then over-reaches on the starved coordinates and the inner
/// trust loop grinds. We floor every scale-coupled metric entry at this fraction
/// of the block's MAXIMUM metric entry, capping the `exp(−η_σ)`-induced condition
/// number so no coordinate is starved. The floor is derived ENTIRELY from the
/// scale-coupled diagonal (no knob, no flag): it scales with `exp(−2 η_σ)`
/// automatically and self-vanishes at the KKT fixed point (it shapes the step
/// norm only, never the converged β). `1e-6` keeps the floor six orders below the
/// block's dominant curvature so a well-conditioned block (constant or mild
/// scale) sees a byte-identical metric, while a sharply heteroscedastic block has
/// its metric dynamic range capped at `1e6`.
pub const SCALE_COUPLED_TRUST_METRIC_FLOOR_REL: f64 = 1e-6;
/// Outer (smoothing-parameter) loop budget for the blockwise location-scale
/// fit: at most this many outer iterations, stopping once the outer relative
/// change falls below the tolerance. The dead-flat time-smoothing ridge of the
/// constant-scale case is what makes a finite cap necessary.
pub const BLOCKWISE_OUTER_MAX_ITER: usize = 60;
pub const BLOCKWISE_OUTER_TOL: f64 = 1e-5;
/// Lower bound on the gradient tolerance handed to the reduced parametric-AFT
/// direct MLE. The inner-solve tolerance can be configured arbitrarily small;
/// flooring it here keeps the Newton stopping test above the noise of the
/// log-likelihood gradient evaluation.
pub const REDUCED_AFT_GRAD_TOL_FLOOR: f64 = 1e-8;
/// Relative ridge added to the normal-equations diagonal of the structural
/// time-coefficient warm-start least squares (× the largest diagonal of XᵀX,
/// floored at 1). Stabilizes the best-effort guess against a rank-deficient
/// derivative design without materially biasing it.
pub const STRUCTURAL_GUESS_RIDGE_REL: f64 = 1e-6;
/// Floor on the exit age when forming the `1/age` structural-derivative target
/// for the time warm-start, guarding against a divide-by-zero at age 0.
pub const STRUCTURAL_GUESS_AGE_FLOOR: f64 = 1e-9;
/// Target byte budget for one row-chunk when streaming a design matrix's
/// trailing columns into a dense buffer. The per-chunk row count is derived as
/// `BUDGET / (p · sizeof(f64))`, so wide designs use proportionally fewer rows
/// per chunk and the working set stays near this size regardless of `p`.
pub const ROW_CHUNK_BYTE_BUDGET: usize = 8 * 1024 * 1024;
/// Relative floor on the monotonicity-guard round-off slack: the compensated
/// subtraction's low-part residual is the primary slack estimate, but this
/// `1e-12 × (1 + ‖state‖∞)` term remains as a floor for moderate-magnitude
/// inputs where the residual underestimates accumulated error.
pub const MONOTONICITY_GUARD_SLACK_REL: f64 = 1e-12;
/// Location-scale guard policy: a degenerate `guard == 0` (a bare
/// non-negativity request on `q'(t)`) is admissible here, and feasibility of
/// coefficient-free rows uses the family's historical absolute slack.
pub const LOCATION_SCALE_GUARD_POLICY: GuardConstraintPolicy = GuardConstraintPolicy ;
pub const DENSE_WEIGHTED_CROSSPROD_PARALLEL_FLOP_THRESHOLD: u64 = 200_000;
pub const DENSE_ROW_SCALE_PARALLEL_ELEM_THRESHOLD: usize = 100_000;
pub const DENSE_ROW_CHUNKS_PER_THREAD: usize = 4;