ngboost_rs/scores/
mod.rs

1use ndarray::{Array1, Array2, Array3};
2use ndarray_linalg::{Inverse, Solve, SVD};
3
4/// A marker trait for scoring rules.
5pub trait Score: 'static + Clone + Copy + Default {}
6
7/// The Logarithmic Score (or Log-Likelihood).
8#[derive(Clone, Copy, Default)]
9pub struct LogScore;
10impl Score for LogScore {}
11
12/// The Continuous Ranked Probability Score.
13#[derive(Clone, Copy, Default)]
14pub struct CRPScore;
15impl Score for CRPScore {}
16
17/// Censored version of LogScore for survival analysis.
18#[derive(Clone, Copy, Default)]
19pub struct LogScoreCensored;
20impl Score for LogScoreCensored {}
21
22/// Censored version of CRPScore for survival analysis.
23#[derive(Clone, Copy, Default)]
24pub struct CRPScoreCensored;
25impl Score for CRPScoreCensored {}
26
27/// Survival data structure containing event indicators and times.
28#[derive(Debug, Clone)]
29pub struct SurvivalData {
30    /// Event indicator: true if event occurred, false if censored
31    pub event: Array1<bool>,
32    /// Time to event or censoring
33    pub time: Array1<f64>,
34}
35
36impl SurvivalData {
37    /// Create new survival data from event indicators and times.
38    pub fn new(event: Array1<bool>, time: Array1<f64>) -> Self {
39        SurvivalData { event, time }
40    }
41
42    /// Create from separate arrays (converting f64 event to bool).
43    pub fn from_arrays(time: &Array1<f64>, event: &Array1<f64>) -> Self {
44        let event_bool = event.mapv(|e| e > 0.5);
45        SurvivalData {
46            event: event_bool,
47            time: time.clone(),
48        }
49    }
50
51    /// Create survival data assuming all events are observed (no censoring).
52    pub fn uncensored(time: Array1<f64>) -> Self {
53        let event = Array1::from_elem(time.len(), true);
54        SurvivalData { event, time }
55    }
56
57    /// Get the number of observations.
58    pub fn len(&self) -> usize {
59        self.time.len()
60    }
61
62    /// Check if empty.
63    pub fn is_empty(&self) -> bool {
64        self.time.is_empty()
65    }
66}
67
68/// A trait for censored scoring rules used in survival analysis.
69pub trait CensoredScorable<S: Score> {
70    /// Calculates the censored score for each observation.
71    fn censored_score(&self, y: &SurvivalData) -> Array1<f64>;
72
73    /// Calculates the gradient of the censored score with respect to the distribution's parameters.
74    fn censored_d_score(&self, y: &SurvivalData) -> Array2<f64>;
75
76    /// Calculates the Riemannian metric tensor for censored data.
77    fn censored_metric(&self) -> Array3<f64>;
78
79    /// Calculates the total censored score, averaged over all observations.
80    fn total_censored_score(&self, y: &SurvivalData, sample_weight: Option<&Array1<f64>>) -> f64 {
81        let scores = self.censored_score(y);
82        if let Some(weights) = sample_weight {
83            (scores * weights).sum() / weights.sum()
84        } else {
85            scores.mean().unwrap_or(0.0)
86        }
87    }
88
89    /// Calculates the gradient for censored data, optionally the natural gradient.
90    fn censored_grad(&self, y: &SurvivalData, natural: bool) -> Array2<f64>
91    where
92        Self: Sized,
93    {
94        let grad = self.censored_d_score(y);
95        if !natural {
96            return grad;
97        }
98
99        let metric = self.censored_metric();
100        let n_obs = grad.nrows();
101        let mut natural_grad = Array2::zeros(grad.raw_dim());
102
103        for i in 0..n_obs {
104            let g_i = grad.row(i).to_owned();
105            let metric_i = metric.index_axis(ndarray::Axis(0), i).to_owned();
106
107            // Try direct solve first
108            if let Ok(ng_i) = metric_i.solve_into(g_i.clone()) {
109                if ng_i.iter().all(|&v| v.is_finite()) {
110                    natural_grad.row_mut(i).assign(&ng_i);
111                    continue;
112                }
113            }
114
115            // Fall back to inverse
116            if let Ok(inv_metric_i) = metric_i.inv() {
117                let result = inv_metric_i.dot(&grad.row(i));
118                if result.iter().all(|&v| v.is_finite()) {
119                    natural_grad.row_mut(i).assign(&result);
120                    continue;
121                }
122            }
123
124            // Fall back to pseudo-inverse
125            if let Some(pinv_metric_i) = pinv(&metric_i) {
126                let result = pinv_metric_i.dot(&grad.row(i));
127                if result.iter().all(|&v| v.is_finite()) {
128                    natural_grad.row_mut(i).assign(&result);
129                    continue;
130                }
131            }
132
133            // Last resort: use regular gradient
134            natural_grad.row_mut(i).assign(&(&grad.row(i) * 0.99));
135        }
136        natural_grad
137    }
138}
139
140/// Compute the natural gradient with optional Tikhonov regularization.
141/// This function adds `reg * I` to the metric before solving, which stabilizes
142/// the solution for ill-conditioned Fisher Information Matrices.
143///
144/// # Arguments
145/// * `grad` - The standard gradient (n_obs x n_params)
146/// * `metric` - The Fisher Information Matrix (n_obs x n_params x n_params)
147/// * `reg` - Tikhonov regularization parameter (0.0 to disable)
148///
149/// # Returns
150/// The natural gradient (n_obs x n_params)
151pub fn natural_gradient_regularized(
152    grad: &Array2<f64>,
153    metric: &Array3<f64>,
154    reg: f64,
155) -> Array2<f64> {
156    let n_obs = grad.nrows();
157    let n_params = grad.ncols();
158    let mut natural_grad = Array2::zeros(grad.raw_dim());
159
160    for i in 0..n_obs {
161        let g_i = grad.row(i).to_owned();
162        let mut metric_i = metric.index_axis(ndarray::Axis(0), i).to_owned();
163
164        // Apply Tikhonov regularization: F_reg = F + reg * I
165        if reg > 0.0 {
166            for j in 0..n_params {
167                metric_i[[j, j]] += reg;
168            }
169        }
170
171        // Try direct solve first (fastest)
172        if let Ok(ng_i) = metric_i.solve_into(g_i.clone()) {
173            if ng_i.iter().all(|&v| v.is_finite()) {
174                natural_grad.row_mut(i).assign(&ng_i);
175                continue;
176            }
177        }
178
179        // Fall back to inverse
180        if let Ok(inv_metric_i) = metric_i.inv() {
181            let result = inv_metric_i.dot(&grad.row(i));
182            if result.iter().all(|&v| v.is_finite()) {
183                natural_grad.row_mut(i).assign(&result);
184                continue;
185            }
186        }
187
188        // Fall back to pseudo-inverse
189        if let Some(pinv_metric_i) = pinv(&metric_i) {
190            let result = pinv_metric_i.dot(&grad.row(i));
191            if result.iter().all(|&v| v.is_finite()) {
192                natural_grad.row_mut(i).assign(&result);
193                continue;
194            }
195        }
196
197        // Last resort: use regular gradient with small damping
198        natural_grad.row_mut(i).assign(&(&grad.row(i) * 0.99));
199    }
200    natural_grad
201}
202
203/// Compute the Moore-Penrose pseudo-inverse of a matrix using SVD.
204/// This matches numpy's np.linalg.pinv behavior with default rcond.
205fn pinv(matrix: &Array2<f64>) -> Option<Array2<f64>> {
206    // Use default rcond like numpy (machine epsilon times max dimension)
207    let rcond = 1e-15; // This matches numpy's default behavior for typical matrices
208
209    // Perform SVD: A = U * S * V^T
210    let (u, s, vt) = matrix.svd(true, true).ok()?;
211    let u = u?;
212    let vt = vt?;
213
214    // Determine the cutoff for small singular values
215    let max_sv = s.iter().cloned().fold(0.0_f64, f64::max);
216    let cutoff = rcond * max_sv;
217
218    // Compute S^+ (pseudo-inverse of singular values)
219    let s_pinv: Array1<f64> = s.mapv(|sv| if sv > cutoff { 1.0 / sv } else { 0.0 });
220
221    // Compute A^+ = V * S^+ * U^T
222    // First compute S^+ * U^T by scaling rows of U^T
223    let n = s_pinv.len();
224    let mut result = Array2::zeros((vt.ncols(), u.nrows()));
225
226    for i in 0..n {
227        for j in 0..u.nrows() {
228            for k in 0..vt.ncols() {
229                result[[k, j]] += vt[[i, k]] * s_pinv[i] * u[[j, i]];
230            }
231        }
232    }
233
234    Some(result)
235}
236
237/// A trait that connects a Distribution to a Score.
238pub trait Scorable<S: Score> {
239    /// Calculates the score for each observation.
240    fn score(&self, y: &Array1<f64>) -> Array1<f64>;
241
242    /// Calculates the gradient of the score with respect to the distribution's parameters.
243    fn d_score(&self, y: &Array1<f64>) -> Array2<f64>;
244
245    /// Calculates the Riemannian metric tensor of the score for each observation.
246    fn metric(&self) -> Array3<f64>;
247
248    /// Calculates the total score, averaged over all observations.
249    fn total_score(&self, y: &Array1<f64>, sample_weight: Option<&Array1<f64>>) -> f64 {
250        let scores = self.score(y);
251        if let Some(weights) = sample_weight {
252            (scores * weights).sum() / weights.sum()
253        } else {
254            scores.mean().unwrap_or(0.0)
255        }
256    }
257
258    /// Calculates the gradient, optionally the natural gradient.
259    /// Uses the same fallback strategy as Python's NGBoost:
260    /// 1. Try to solve the linear system directly
261    /// 2. Fall back to matrix inverse
262    /// 3. Fall back to pseudo-inverse (pinv) for singular/ill-conditioned matrices
263    fn grad(&self, y: &Array1<f64>, natural: bool) -> Array2<f64> {
264        let grad = self.d_score(y);
265        if !natural {
266            return grad;
267        }
268
269        let metric = self.metric();
270        let n_obs = grad.nrows();
271        let mut natural_grad = Array2::zeros(grad.raw_dim());
272
273        for i in 0..n_obs {
274            let g_i = grad.row(i).to_owned();
275            let metric_i = metric.index_axis(ndarray::Axis(0), i).to_owned();
276
277            // Try direct solve first (fastest) - matches Python's np.linalg.solve
278            if let Ok(ng_i) = metric_i.solve_into(g_i.clone()) {
279                // Check if solution is reasonable
280                if ng_i.iter().all(|&v| v.is_finite()) {
281                    natural_grad.row_mut(i).assign(&ng_i);
282                    continue;
283                }
284            }
285
286            // Fall back to inverse
287            if let Ok(inv_metric_i) = metric_i.inv() {
288                let result = inv_metric_i.dot(&grad.row(i));
289                if result.iter().all(|&v| v.is_finite()) {
290                    natural_grad.row_mut(i).assign(&result);
291                    continue;
292                }
293            }
294
295            // Fall back to pseudo-inverse (matches Python's np.linalg.pinv)
296            if let Some(pinv_metric_i) = pinv(&metric_i) {
297                let result = pinv_metric_i.dot(&grad.row(i));
298                if result.iter().all(|&v| v.is_finite()) {
299                    natural_grad.row_mut(i).assign(&result);
300                    continue;
301                }
302            }
303
304            // Last resort: use regular gradient (should rarely happen)
305            // Add small regularization to avoid numerical issues
306            natural_grad.row_mut(i).assign(&(&grad.row(i) * 0.99));
307        }
308        natural_grad
309    }
310}