1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
//! Named constants replacing magic numbers throughout scry-learn.
//!
//! Grouped by purpose. Each constant documents where it is used.
// ─── Near-zero thresholds ────────────────────────────────────────────────────
/// Threshold below which a value is treated as effectively zero.
/// Used for: column norm checks (lasso, elastic_net), probability clamping
/// (gradient boosting log-loss), PCA variance guards, Jacobi element skipping,
/// isotonic regression interpolation.
pub const NEAR_ZERO: f64 = 1e-15;
/// Threshold for singular matrix detection in Gauss-Jordan elimination
/// and Newton-Raphson Hessian denominators.
pub const SINGULAR_THRESHOLD: f64 = 1e-12;
/// Hessian regularization constant for Platt scaling Newton steps.
pub const PLATT_HESSIAN_REG: f64 = 1e-12;
/// Singular determinant threshold for Platt scaling 2×2 system.
pub const PLATT_SINGULAR_DET: f64 = 1e-20;
/// Minimum step size for Platt scaling Newton line search.
pub const PLATT_MIN_STEP: f64 = 1e-10;
/// Convergence threshold for Platt scaling parameter updates.
pub const PLATT_CONVERGENCE: f64 = 1e-9;
/// Probability clamping bounds for gradient boosting prior/leaf values.
pub const GBT_PROB_CLAMP: f64 = 1e-7;
// ─── Convergence tolerances ──────────────────────────────────────────────────
/// Default convergence tolerance for coordinate descent (Lasso, ElasticNet,
/// LinearSVC, LinearSVR).
pub const DEFAULT_TOL: f64 = 1e-4;
/// Stricter convergence tolerance for L-BFGS and logistic regression.
pub const STRICT_TOL: f64 = 1e-6;
/// Jacobi eigendecomposition convergence tolerance (off-diagonal Frobenius norm).
pub const JACOBI_TOL: f64 = 1e-12;
/// Maximum Jacobi sweeps in PCA eigendecomposition.
pub const JACOBI_MAX_SWEEPS: usize = 100;
// ─── Line search constants ──────────────────────────────────────────────────
/// Armijo sufficient decrease constant (c₁ in Wolfe conditions).
pub const ARMIJO_C: f64 = 1e-4;
/// Strong Wolfe curvature condition constant (c₂).
pub const WOLFE_C2: f64 = 0.9;
/// Backtracking factor for line search step reduction.
pub const LINE_SEARCH_BACKTRACK: f64 = 0.5;
/// Maximum line search iterations before giving up.
pub const LINE_SEARCH_MAX_ITER: usize = 20;
/// Steepest descent fallback step size scaling factor.
pub const STEEPEST_DESCENT_SCALE: f64 = 0.01;
// ─── L-BFGS constants ───────────────────────────────────────────────────────
/// Curvature condition threshold for L-BFGS history update.
/// Only add correction pair if s·y > this value.
pub const LBFGS_CURVATURE_THRESH: f64 = 1e-16;
// ─── Optimizer defaults ─────────────────────────────────────────────────────
/// Default momentum coefficient for SGD with momentum.
pub const SGD_MOMENTUM: f64 = 0.9;
/// Default Adam first moment decay rate (β₁).
pub const ADAM_BETA1: f64 = 0.9;
/// Default Adam second moment decay rate (β₂).
pub const ADAM_BETA2: f64 = 0.999;
/// Default Adam numerical stability constant (ε).
pub const ADAM_EPSILON: f64 = 1e-8;
// ─── Pegasos (SVM) constants ────────────────────────────────────────────────
/// Learning rate decay constant for Pegasos SGD: lr = 1/(C * (1 + DECAY * epoch)).
pub const PEGASOS_LR_DECAY: f64 = 0.01;
// ─── SMO (Kernel SVM) constants ─────────────────────────────────────────────
/// Default convergence tolerance for SMO (KKT violation threshold).
/// Matches sklearn's `SVC(tol=1e-3)` default.
pub const SMO_TOL: f64 = 1e-3;
/// Threshold for treating SMO box constraint bounds L and H as equal.
/// When |L − H| < this value the pair is skipped.
pub const SMO_BOUNDS_EQ: f64 = 1e-8;
/// Minimum change in alpha_j to accept an SMO step.
/// Steps smaller than this are treated as no-ops.
pub const SMO_ALPHA_CHANGE_THRESH: f64 = 1e-5;
/// Alpha threshold for identifying support vectors.
/// Points with alpha > this value are retained after SMO.
pub const SV_ALPHA_THRESH: f64 = 1e-8;
// ─── Parallelism thresholds ───────────────────────────────────────────────
/// Minimum n×m product to enable rayon parallelism in logistic regression
/// feature gradient computation. Below this, rayon spawn overhead (~2-5 µs)
/// exceeds the parallel speedup.
pub const LOGREG_PAR_THRESHOLD: usize = 5_000;
/// Minimum query×train×features product to parallelize KNN brute-force predict.
pub const KNN_PAR_THRESHOLD: usize = 10_000;
/// Minimum n×k product to parallelize K-Means assignment step.
pub const KMEANS_PAR_THRESHOLD: usize = 5_000;
/// Minimum n² kernel matrix size to parallelize SVM kernel computation.
pub const SVM_KERNEL_PAR_THRESHOLD: usize = 10_000;