ngboost_rs/dist/
lognormal.rs

1use crate::dist::{Distribution, DistributionMethods, RegressionDistn};
2use crate::scores::{
3    CRPScore, CRPScoreCensored, CensoredScorable, LogScore, LogScoreCensored, Scorable,
4    SurvivalData,
5};
6use ndarray::{array, Array1, Array2, Array3};
7use rand::prelude::*;
8use statrs::distribution::{
9    Continuous, ContinuousCDF, LogNormal as LogNormalDist, Normal as NormalDist,
10};
11
12/// The LogNormal distribution.
13#[derive(Debug, Clone)]
14pub struct LogNormal {
15    pub loc: Array1<f64>,
16    pub scale: Array1<f64>,
17    _params: Array2<f64>,
18}
19
20impl Distribution for LogNormal {
21    fn from_params(params: &Array2<f64>) -> Self {
22        let loc = params.column(0).to_owned();
23        let scale = params.column(1).mapv(f64::exp);
24        LogNormal {
25            loc,
26            scale,
27            _params: params.clone(),
28        }
29    }
30
31    fn fit(y: &Array1<f64>) -> Array1<f64> {
32        if y.is_empty() {
33            return array![0.0, 0.0];
34        }
35        let log_y: Array1<f64> = y.mapv(|v| v.max(1e-9).ln());
36        let mean = log_y.mean().unwrap_or(0.0);
37        let std_dev = log_y.std(0.0);
38        array![mean, std_dev.max(1e-6).ln()]
39    }
40
41    fn n_params(&self) -> usize {
42        2
43    }
44
45    fn predict(&self) -> Array1<f64> {
46        // Mean of lognormal is exp(loc + scale^2 / 2)
47        (&self.loc + &(&self.scale.mapv(|s| s.powi(2)) / 2.0)).mapv(f64::exp)
48    }
49
50    fn params(&self) -> &Array2<f64> {
51        &self._params
52    }
53}
54
55impl RegressionDistn for LogNormal {}
56
57impl DistributionMethods for LogNormal {
58    fn mean(&self) -> Array1<f64> {
59        // Mean of lognormal is exp(mu + sigma^2 / 2)
60        (&self.loc + &(&self.scale.mapv(|s| s.powi(2)) / 2.0)).mapv(f64::exp)
61    }
62
63    fn variance(&self) -> Array1<f64> {
64        // Var of lognormal is (exp(sigma^2) - 1) * exp(2*mu + sigma^2)
65        let sigma_sq = self.scale.mapv(|s| s.powi(2));
66        let exp_sigma_sq = sigma_sq.mapv(f64::exp);
67        let two_mu_plus_sigma_sq = 2.0 * &self.loc + &sigma_sq;
68        (&exp_sigma_sq - 1.0) * two_mu_plus_sigma_sq.mapv(f64::exp)
69    }
70
71    fn std(&self) -> Array1<f64> {
72        self.variance().mapv(f64::sqrt)
73    }
74
75    fn pdf(&self, y: &Array1<f64>) -> Array1<f64> {
76        let mut result = Array1::zeros(y.len());
77        for i in 0..y.len() {
78            if let Ok(d) = LogNormalDist::new(self.loc[i], self.scale[i]) {
79                result[i] = d.pdf(y[i]);
80            }
81        }
82        result
83    }
84
85    fn logpdf(&self, y: &Array1<f64>) -> Array1<f64> {
86        let mut result = Array1::zeros(y.len());
87        for i in 0..y.len() {
88            if let Ok(d) = LogNormalDist::new(self.loc[i], self.scale[i]) {
89                result[i] = d.ln_pdf(y[i]);
90            }
91        }
92        result
93    }
94
95    fn cdf(&self, y: &Array1<f64>) -> Array1<f64> {
96        let mut result = Array1::zeros(y.len());
97        for i in 0..y.len() {
98            if let Ok(d) = LogNormalDist::new(self.loc[i], self.scale[i]) {
99                result[i] = d.cdf(y[i]);
100            }
101        }
102        result
103    }
104
105    fn ppf(&self, q: &Array1<f64>) -> Array1<f64> {
106        let mut result = Array1::zeros(q.len());
107        for i in 0..q.len() {
108            if let Ok(d) = LogNormalDist::new(self.loc[i], self.scale[i]) {
109                result[i] = d.inverse_cdf(q[i]);
110            }
111        }
112        result
113    }
114
115    fn sample(&self, n_samples: usize) -> Array2<f64> {
116        let n_obs = self.loc.len();
117        let mut samples = Array2::zeros((n_samples, n_obs));
118        let mut rng = rand::rng();
119
120        for i in 0..n_obs {
121            if let Ok(d) = LogNormalDist::new(self.loc[i], self.scale[i]) {
122                for s in 0..n_samples {
123                    let u: f64 = rng.random();
124                    samples[[s, i]] = d.inverse_cdf(u);
125                }
126            }
127        }
128        samples
129    }
130
131    fn median(&self) -> Array1<f64> {
132        // Median of lognormal is exp(mu)
133        self.loc.mapv(f64::exp)
134    }
135
136    fn mode(&self) -> Array1<f64> {
137        // Mode of lognormal is exp(mu - sigma^2)
138        (&self.loc - &self.scale.mapv(|s| s.powi(2))).mapv(f64::exp)
139    }
140}
141
142impl Scorable<LogScore> for LogNormal {
143    fn score(&self, y: &Array1<f64>) -> Array1<f64> {
144        let mut scores = Array1::zeros(y.len());
145        for (i, &y_i) in y.iter().enumerate() {
146            let d = LogNormalDist::new(self.loc[i], self.scale[i]).unwrap();
147            scores[i] = -d.ln_pdf(y_i);
148        }
149        scores
150    }
151
152    fn d_score(&self, y: &Array1<f64>) -> Array2<f64> {
153        let n_obs = y.len();
154        let mut d_params = Array2::zeros((n_obs, 2));
155
156        let log_y = y.mapv(|v| v.ln());
157        let err = &self.loc - &log_y;
158        let var = self.scale.mapv(|s| s.powi(2));
159
160        // d/d(loc)
161        d_params.column_mut(0).assign(&(&err / &var));
162
163        // d/d(log(scale))
164        let term2 = (&err * &err) / &var;
165        d_params.column_mut(1).assign(&(1.0 - term2));
166
167        d_params
168    }
169
170    fn metric(&self) -> Array3<f64> {
171        let n_obs = self.loc.len();
172        let mut fi = Array3::zeros((n_obs, 2, 2));
173        let var = self.scale.mapv(|s| s.powi(2));
174
175        for i in 0..n_obs {
176            fi[[i, 0, 0]] = 1.0 / var[i];
177            fi[[i, 1, 1]] = 2.0;
178        }
179
180        fi
181    }
182}
183
184impl Scorable<CRPScore> for LogNormal {
185    fn score(&self, y: &Array1<f64>) -> Array1<f64> {
186        let std_normal = NormalDist::new(0.0, 1.0).unwrap();
187        let sqrt_pi = std::f64::consts::PI.sqrt();
188
189        let mut scores = Array1::zeros(y.len());
190        for i in 0..y.len() {
191            let log_y = y[i].ln();
192            let z = (log_y - self.loc[i]) / self.scale[i];
193            let pdf_z = std_normal.pdf(z);
194            let cdf_z = std_normal.cdf(z);
195            scores[i] = self.scale[i] * (z * (2.0 * cdf_z - 1.0) + 2.0 * pdf_z - 1.0 / sqrt_pi);
196        }
197        scores
198    }
199
200    fn d_score(&self, y: &Array1<f64>) -> Array2<f64> {
201        let std_normal = NormalDist::new(0.0, 1.0).unwrap();
202        let n_obs = y.len();
203        let mut d_params = Array2::zeros((n_obs, 2));
204
205        for i in 0..n_obs {
206            let log_y = y[i].ln();
207            let z = (log_y - self.loc[i]) / self.scale[i];
208            let cdf_z = std_normal.cdf(z);
209
210            // d/d(loc)
211            d_params[[i, 0]] = -(2.0 * cdf_z - 1.0);
212
213            // d/d(log(scale))
214            let pdf_z = std_normal.pdf(z);
215            let sqrt_pi = std::f64::consts::PI.sqrt();
216            let score_i = self.scale[i] * (z * (2.0 * cdf_z - 1.0) + 2.0 * pdf_z - 1.0 / sqrt_pi);
217            d_params[[i, 1]] = score_i + (log_y - self.loc[i]) * d_params[[i, 0]];
218        }
219
220        d_params
221    }
222
223    fn metric(&self) -> Array3<f64> {
224        let sqrt_pi = std::f64::consts::PI.sqrt();
225        let n_obs = self.loc.len();
226        let mut fi = Array3::zeros((n_obs, 2, 2));
227        let var = self.scale.mapv(|s| s.powi(2));
228
229        for i in 0..n_obs {
230            fi[[i, 0, 0]] = 2.0;
231            fi[[i, 1, 1]] = var[i];
232        }
233
234        // Scale by 1/(2*sqrt(pi))
235        fi.mapv_inplace(|x| x / (2.0 * sqrt_pi));
236        fi
237    }
238}
239
240// ============================================================================
241// Censored LogScore for survival analysis
242// ============================================================================
243
244impl CensoredScorable<LogScoreCensored> for LogNormal {
245    fn censored_score(&self, y: &SurvivalData) -> Array1<f64> {
246        let eps = 1e-5;
247        let mut scores = Array1::zeros(y.len());
248
249        for i in 0..y.len() {
250            let t = y.time[i];
251            let e = y.event[i];
252            let d = LogNormalDist::new(self.loc[i], self.scale[i]).unwrap();
253
254            if e {
255                // Uncensored: -log(pdf(t))
256                scores[i] = -d.ln_pdf(t);
257            } else {
258                // Censored: -log(1 - cdf(t))
259                let survival = 1.0 - d.cdf(t) + eps;
260                scores[i] = -survival.ln();
261            }
262        }
263        scores
264    }
265
266    fn censored_d_score(&self, y: &SurvivalData) -> Array2<f64> {
267        let eps = 1e-5;
268        let std_normal = NormalDist::new(0.0, 1.0).unwrap();
269        let n_obs = y.len();
270        let mut d_params = Array2::zeros((n_obs, 2));
271
272        for i in 0..n_obs {
273            let t = y.time[i];
274            let e = y.event[i];
275            let log_t = t.ln();
276            let z = (log_t - self.loc[i]) / self.scale[i];
277            let var = self.scale[i].powi(2);
278            let d = LogNormalDist::new(self.loc[i], self.scale[i]).unwrap();
279
280            if e {
281                // Uncensored gradient (same as regular LogScore)
282                d_params[[i, 0]] = (self.loc[i] - log_t) / var;
283                d_params[[i, 1]] = 1.0 - ((self.loc[i] - log_t).powi(2)) / var;
284            } else {
285                // Censored gradient
286                let survival = 1.0 - d.cdf(t) + eps;
287                let norm_pdf = std_normal.pdf(z);
288
289                d_params[[i, 0]] = -norm_pdf / (self.scale[i] * survival);
290                d_params[[i, 1]] = -z * norm_pdf / survival;
291            }
292        }
293        d_params
294    }
295
296    fn censored_metric(&self) -> Array3<f64> {
297        // Use the same metric as uncensored LogScore (Fisher Information)
298        let eps = 1e-5;
299        let n_obs = self.loc.len();
300        let mut fi = Array3::zeros((n_obs, 2, 2));
301        let var = self.scale.mapv(|s| s.powi(2));
302
303        for i in 0..n_obs {
304            fi[[i, 0, 0]] = 1.0 / (var[i] + eps);
305            fi[[i, 1, 1]] = 2.0;
306        }
307
308        fi
309    }
310}
311
312// ============================================================================
313// Censored CRPScore for survival analysis
314// ============================================================================
315
316impl CensoredScorable<CRPScoreCensored> for LogNormal {
317    fn censored_score(&self, y: &SurvivalData) -> Array1<f64> {
318        let std_normal = NormalDist::new(0.0, 1.0).unwrap();
319        let sqrt_pi = std::f64::consts::PI.sqrt();
320        let sqrt_2 = 2.0_f64.sqrt();
321
322        let mut scores = Array1::zeros(y.len());
323
324        for i in 0..y.len() {
325            let t = y.time[i];
326            let e = y.event[i];
327            let log_t = t.ln();
328            let z = (log_t - self.loc[i]) / self.scale[i];
329            let cdf_z = std_normal.cdf(z);
330            let pdf_z = std_normal.pdf(z);
331
332            if e {
333                // Uncensored CRPS (same as regular)
334                scores[i] = self.scale[i] * (z * (2.0 * cdf_z - 1.0) + 2.0 * pdf_z - 1.0 / sqrt_pi);
335            } else {
336                // Censored CRPS
337                let cdf_sqrt2_z = std_normal.cdf(sqrt_2 * z);
338                scores[i] = self.scale[i]
339                    * (z * cdf_z.powi(2) + 2.0 * cdf_z * pdf_z - cdf_sqrt2_z / sqrt_pi);
340            }
341        }
342        scores
343    }
344
345    fn censored_d_score(&self, y: &SurvivalData) -> Array2<f64> {
346        let std_normal = NormalDist::new(0.0, 1.0).unwrap();
347        let sqrt_pi = std::f64::consts::PI.sqrt();
348        let sqrt_2 = 2.0_f64.sqrt();
349        let sqrt_2_over_pi = (2.0 / std::f64::consts::PI).sqrt();
350
351        let n_obs = y.len();
352        let mut d_params = Array2::zeros((n_obs, 2));
353
354        for i in 0..n_obs {
355            let t = y.time[i];
356            let e = y.event[i];
357            let log_t = t.ln();
358            let z = (log_t - self.loc[i]) / self.scale[i];
359            let cdf_z = std_normal.cdf(z);
360            let pdf_z = std_normal.pdf(z);
361            let pdf_sqrt2_z = std_normal.pdf(sqrt_2 * z);
362
363            if e {
364                // Uncensored gradient
365                d_params[[i, 0]] = -(2.0 * cdf_z - 1.0);
366            } else {
367                // Censored gradient
368                d_params[[i, 0]] = -(cdf_z.powi(2) + 2.0 * z * cdf_z * pdf_z + 2.0 * pdf_z.powi(2)
369                    - 2.0 * cdf_z * pdf_z.powi(2)
370                    - sqrt_2_over_pi * pdf_sqrt2_z);
371            }
372
373            // d/d(log(scale)) = score + (log_t - loc) * d/d(loc)
374            let score_i = if e {
375                self.scale[i] * (z * (2.0 * cdf_z - 1.0) + 2.0 * pdf_z - 1.0 / sqrt_pi)
376            } else {
377                let cdf_sqrt2_z = std_normal.cdf(sqrt_2 * z);
378                self.scale[i] * (z * cdf_z.powi(2) + 2.0 * cdf_z * pdf_z - cdf_sqrt2_z / sqrt_pi)
379            };
380            d_params[[i, 1]] = score_i + (log_t - self.loc[i]) * d_params[[i, 0]];
381        }
382
383        d_params
384    }
385
386    fn censored_metric(&self) -> Array3<f64> {
387        let sqrt_pi = std::f64::consts::PI.sqrt();
388        let n_obs = self.loc.len();
389        let mut fi = Array3::zeros((n_obs, 2, 2));
390
391        for i in 0..n_obs {
392            fi[[i, 0, 0]] = 2.0;
393            fi[[i, 1, 1]] = self.scale[i].powi(2);
394        }
395
396        fi.mapv_inplace(|x| x / (2.0 * sqrt_pi));
397        fi
398    }
399}
400
401#[cfg(test)]
402mod tests {
403    use super::*;
404    use approx::assert_relative_eq;
405
406    #[test]
407    fn test_lognormal_distribution_methods() {
408        let params = Array2::from_shape_vec((2, 2), vec![0.0, 0.0, 1.0, 0.5_f64.ln()]).unwrap();
409        let dist = LogNormal::from_params(&params);
410
411        // Test mean: exp(mu + sigma^2/2)
412        let mean = dist.mean();
413        assert_relative_eq!(mean[0], (0.5_f64).exp(), epsilon = 1e-6);
414
415        // Test median: exp(mu)
416        let median = dist.median();
417        assert_relative_eq!(median[0], 1.0, epsilon = 1e-10);
418        assert_relative_eq!(median[1], std::f64::consts::E, epsilon = 1e-10);
419    }
420
421    #[test]
422    fn test_lognormal_cdf_ppf() {
423        let params = Array2::from_shape_vec((1, 2), vec![0.0, 0.0]).unwrap();
424        let dist = LogNormal::from_params(&params);
425
426        // CDF at median should be 0.5
427        let y = Array1::from_vec(vec![1.0]); // exp(0) = 1 is the median
428        let cdf = dist.cdf(&y);
429        assert_relative_eq!(cdf[0], 0.5, epsilon = 1e-10);
430
431        // PPF at 0.5 should return median
432        let q = Array1::from_vec(vec![0.5]);
433        let ppf = dist.ppf(&q);
434        assert_relative_eq!(ppf[0], 1.0, epsilon = 1e-10);
435    }
436
437    #[test]
438    fn test_lognormal_sample() {
439        let params = Array2::from_shape_vec((1, 2), vec![1.0, 0.5_f64.ln()]).unwrap();
440        let dist = LogNormal::from_params(&params);
441
442        let samples = dist.sample(1000);
443        assert_eq!(samples.shape(), &[1000, 1]);
444
445        // All samples should be positive (lognormal is always positive)
446        assert!(samples.iter().all(|&x| x > 0.0));
447
448        // Check that sample median is close to exp(mu)
449        let mut sample_vec: Vec<f64> = samples.column(0).to_vec();
450        sample_vec.sort_by(|a, b| a.partial_cmp(b).unwrap());
451        let sample_median = sample_vec[500];
452        let expected_median = std::f64::consts::E; // exp(1.0)
453        assert!((sample_median - expected_median).abs() / expected_median < 0.15);
454    }
455
456    #[test]
457    fn test_lognormal_fit() {
458        let y = Array1::from_vec(vec![1.0, 2.0, 3.0, 4.0, 5.0]);
459        let params = LogNormal::fit(&y);
460        assert_eq!(params.len(), 2);
461        // Should fit log-mean and log-std
462    }
463
464    #[test]
465    fn test_lognormal_survival_function() {
466        let params = Array2::from_shape_vec((1, 2), vec![0.0, 0.0]).unwrap();
467        let dist = LogNormal::from_params(&params);
468
469        // SF at median should be 0.5
470        let y = Array1::from_vec(vec![1.0]);
471        let sf = dist.sf(&y);
472        assert_relative_eq!(sf[0], 0.5, epsilon = 1e-10);
473    }
474}