Skip to main content

plotkit_core/charts/
violin.rs

1//! Violin plot builder methods and kernel density estimation.
2//!
3//! Provides a fluent builder API for configuring [`ViolinArtist`] instances,
4//! along with a Gaussian kernel density estimation (KDE) function used to
5//! compute the shape of each violin.
6
7use crate::artist::ViolinArtist;
8use crate::primitives::Color;
9
10/// Computes the linear-interpolation percentile of a sorted slice.
11///
12/// Uses the same interpolation method as NumPy's default (`linear`).
13/// `p` must be in the range [0.0, 1.0].
14fn percentile(sorted: &[f64], p: f64) -> f64 {
15    assert!(!sorted.is_empty(), "percentile requires non-empty data");
16    if sorted.len() == 1 {
17        return sorted[0];
18    }
19    let idx = p * (sorted.len() - 1) as f64;
20    let lo = idx.floor() as usize;
21    let hi = lo + 1;
22    let frac = idx - lo as f64;
23    if hi >= sorted.len() {
24        sorted[sorted.len() - 1]
25    } else {
26        sorted[lo] * (1.0 - frac) + sorted[hi] * frac
27    }
28}
29
30/// Computes the standard deviation of a sorted slice of finite values.
31///
32/// Returns 0.0 for slices with fewer than 2 elements.
33fn std_dev(sorted: &[f64]) -> f64 {
34    let n = sorted.len();
35    if n < 2 {
36        return 0.0;
37    }
38    let mean: f64 = sorted.iter().sum::<f64>() / n as f64;
39    let variance = sorted.iter().map(|&v| (v - mean).powi(2)).sum::<f64>() / n as f64;
40    variance.sqrt()
41}
42
43/// Computes the bandwidth for kernel density estimation using Silverman's rule.
44///
45/// The bandwidth is calculated as:
46/// `h = 0.9 * min(std, IQR / 1.34) * n^(-1/5)`
47///
48/// Falls back to a small positive value when the data has zero spread
49/// (e.g. all identical values or a single data point).
50pub fn silverman_bandwidth(sorted: &[f64]) -> f64 {
51    let n = sorted.len();
52    if n < 2 {
53        return 1.0;
54    }
55    let sd = std_dev(sorted);
56    let q1 = percentile(sorted, 0.25);
57    let q3 = percentile(sorted, 0.75);
58    let iqr = q3 - q1;
59
60    let spread = if sd > 0.0 && iqr > 0.0 {
61        sd.min(iqr / 1.34)
62    } else if sd > 0.0 {
63        sd
64    } else if iqr > 0.0 {
65        iqr / 1.34
66    } else {
67        // All values identical -- use a small positive bandwidth.
68        return 1.0;
69    };
70
71    0.9 * spread * (n as f64).powf(-0.2)
72}
73
74/// Evaluates a Gaussian kernel density estimate at a set of points.
75///
76/// For each point in `eval_points`, the density is estimated by summing
77/// Gaussian kernels centered on each data value, scaled by `bandwidth`.
78///
79/// # Arguments
80/// * `data` - The source data values.
81/// * `bandwidth` - The smoothing bandwidth (standard deviation of each kernel).
82/// * `eval_points` - The x-values at which to evaluate the density.
83///
84/// # Returns
85/// A vector of density values, one per eval point.
86pub fn gaussian_kde(data: &[f64], bandwidth: f64, eval_points: &[f64]) -> Vec<f64> {
87    let n = data.len() as f64;
88    let inv_bw = 1.0 / bandwidth;
89    let norm = 1.0 / (n * bandwidth * (2.0 * std::f64::consts::PI).sqrt());
90
91    eval_points
92        .iter()
93        .map(|&x| {
94            let sum: f64 = data
95                .iter()
96                .map(|&xi| {
97                    let u = (x - xi) * inv_bw;
98                    (-0.5 * u * u).exp()
99                })
100                .sum();
101            sum * norm
102        })
103        .collect()
104}
105
106impl ViolinArtist {
107    /// Sets the fill color.
108    ///
109    /// Applies the given [`Color`] to every violin rendered by this artist.
110    pub fn color(&mut self, color: Color) -> &mut Self {
111        self.color = color;
112        self
113    }
114
115    /// Sets the legend label.
116    ///
117    /// When a legend is displayed on the figure, this label will appear
118    /// next to the color swatch for this violin plot.
119    pub fn label(&mut self, label: &str) -> &mut Self {
120        self.label = Some(label.to_string());
121        self
122    }
123
124    /// Sets the opacity.
125    ///
126    /// The value is clamped to the range [0.0, 1.0], where 0.0 is fully
127    /// transparent and 1.0 is fully opaque.
128    pub fn alpha(&mut self, alpha: f64) -> &mut Self {
129        self.alpha = alpha.clamp(0.0, 1.0);
130        self
131    }
132
133    /// Sets the maximum width of each violin shape.
134    ///
135    /// The value is clamped to [0.1, 2.0]. This controls the visual width
136    /// of the widest part of the violin.
137    pub fn widths(&mut self, widths: f64) -> &mut Self {
138        self.widths = widths.clamp(0.1, 2.0);
139        self
140    }
141
142    /// Sets the x-positions for each violin.
143    ///
144    /// When `None` (the default), violins are placed at 1.0, 2.0, 3.0, etc.
145    pub fn positions(&mut self, positions: Vec<f64>) -> &mut Self {
146        self.positions = Some(positions);
147        self
148    }
149
150    /// Controls whether the median line is drawn.
151    ///
152    /// When `true` (the default), a horizontal line is drawn at the median
153    /// of each dataset inside the violin shape.
154    pub fn show_median(&mut self, show: bool) -> &mut Self {
155        self.show_median = show;
156        self
157    }
158
159    /// Controls whether Q1/Q3 quartile lines are drawn.
160    ///
161    /// When `true` (the default), thin horizontal lines are drawn at the
162    /// first and third quartiles inside the violin shape.
163    pub fn show_quartiles(&mut self, show: bool) -> &mut Self {
164        self.show_quartiles = show;
165        self
166    }
167
168    /// Sets the KDE bandwidth manually.
169    ///
170    /// Overrides the automatically computed Silverman's rule bandwidth.
171    /// A value of 0.0 or negative resets to automatic bandwidth selection.
172    pub fn bw_method(&mut self, bw: f64) -> &mut Self {
173        self.bw_method = bw;
174        self
175    }
176
177    /// Returns the x-position for a given violin index.
178    ///
179    /// Uses custom positions if set, otherwise defaults to 1-based indexing.
180    pub fn position_for(&self, index: usize) -> f64 {
181        self.positions
182            .as_ref()
183            .and_then(|p| p.get(index).copied())
184            .unwrap_or(index as f64 + 1.0)
185    }
186
187    /// Computes the data-space bounding box `(xmin, xmax, ymin, ymax)`.
188    ///
189    /// The x-bounds span from the leftmost position minus half the max width
190    /// to the rightmost position plus half the max width. The y-bounds span
191    /// from the minimum data value to the maximum data value across all
192    /// datasets. Falls back to `(0.0, 1.0, 0.0, 1.0)` when there are no
193    /// datasets.
194    pub fn data_bounds(&self) -> (f64, f64, f64, f64) {
195        if self.datasets.is_empty() {
196            return (0.0, 1.0, 0.0, 1.0);
197        }
198
199        let half_w = self.widths / 2.0;
200        let mut xmin = f64::INFINITY;
201        let mut xmax = f64::NEG_INFINITY;
202        let mut ymin = f64::INFINITY;
203        let mut ymax = f64::NEG_INFINITY;
204
205        for (i, dataset) in self.datasets.iter().enumerate() {
206            let pos = self.position_for(i);
207            xmin = xmin.min(pos - half_w);
208            xmax = xmax.max(pos + half_w);
209            for &v in dataset {
210                if v.is_finite() {
211                    ymin = ymin.min(v);
212                    ymax = ymax.max(v);
213                }
214            }
215        }
216
217        if !xmin.is_finite() {
218            xmin = 0.0;
219        }
220        if !xmax.is_finite() {
221            xmax = 1.0;
222        }
223        if !ymin.is_finite() {
224            ymin = 0.0;
225        }
226        if !ymax.is_finite() {
227            ymax = 1.0;
228        }
229
230        (xmin, xmax, ymin, ymax)
231    }
232}
233
234// ---------------------------------------------------------------------------
235// Tests
236// ---------------------------------------------------------------------------
237
238#[cfg(test)]
239mod tests {
240    use super::*;
241
242    /// Helper to create a sample ViolinArtist for tests.
243    fn sample_violin() -> ViolinArtist {
244        ViolinArtist {
245            datasets: vec![vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0]],
246            positions: None,
247            widths: 0.7,
248            show_median: true,
249            show_quartiles: true,
250            color: Color::TAB_BLUE,
251            alpha: 0.7,
252            label: None,
253            bw_method: 0.0,
254        }
255    }
256
257    #[test]
258    fn silverman_bandwidth_basic() {
259        let sorted = vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0];
260        let bw = silverman_bandwidth(&sorted);
261        assert!(bw > 0.0, "bandwidth must be positive");
262        assert!(bw < 10.0, "bandwidth should be reasonable");
263    }
264
265    #[test]
266    fn silverman_bandwidth_single_value() {
267        let bw = silverman_bandwidth(&[42.0]);
268        assert!((bw - 1.0).abs() < f64::EPSILON, "single value should return fallback bandwidth");
269    }
270
271    #[test]
272    fn silverman_bandwidth_identical_values() {
273        let sorted = vec![5.0, 5.0, 5.0, 5.0, 5.0];
274        let bw = silverman_bandwidth(&sorted);
275        assert!((bw - 1.0).abs() < f64::EPSILON, "identical values should return fallback bandwidth");
276    }
277
278    #[test]
279    fn gaussian_kde_basic() {
280        let data = vec![1.0, 2.0, 3.0, 4.0, 5.0];
281        let bw = 1.0;
282        let eval_points: Vec<f64> = (0..11).map(|i| i as f64 * 0.5).collect();
283        let density = gaussian_kde(&data, bw, &eval_points);
284        assert_eq!(density.len(), eval_points.len());
285        // All density values should be non-negative.
286        for &d in &density {
287            assert!(d >= 0.0, "density must be non-negative");
288        }
289    }
290
291    #[test]
292    fn gaussian_kde_integrates_roughly_to_one() {
293        let data: Vec<f64> = (0..100).map(|i| i as f64 * 0.1).collect();
294        let bw = silverman_bandwidth(&data);
295        // Evaluate on a fine grid covering the data range with margin.
296        let n_points = 1000;
297        let lo = -2.0;
298        let hi = 12.0;
299        let step = (hi - lo) / n_points as f64;
300        let eval_points: Vec<f64> = (0..=n_points).map(|i| lo + i as f64 * step).collect();
301        let density = gaussian_kde(&data, bw, &eval_points);
302        let integral: f64 = density.iter().sum::<f64>() * step;
303        assert!(
304            (integral - 1.0).abs() < 0.1,
305            "KDE integral should be approximately 1.0, got {integral}"
306        );
307    }
308
309    #[test]
310    fn gaussian_kde_single_point() {
311        let data = vec![5.0];
312        let bw = 1.0;
313        let density = gaussian_kde(&data, bw, &[5.0]);
314        // Peak should be at the data point.
315        assert!(density[0] > 0.0);
316    }
317
318    #[test]
319    fn gaussian_kde_symmetry() {
320        let data = vec![0.0];
321        let bw = 1.0;
322        let d_left = gaussian_kde(&data, bw, &[-1.0]);
323        let d_right = gaussian_kde(&data, bw, &[1.0]);
324        assert!(
325            (d_left[0] - d_right[0]).abs() < 1e-10,
326            "KDE should be symmetric around single data point"
327        );
328    }
329
330    #[test]
331    fn data_bounds_single_dataset() {
332        let artist = sample_violin();
333        let (xmin, xmax, ymin, ymax) = artist.data_bounds();
334        // Default position for index 0 is 1.0, half width is 0.35.
335        assert!((xmin - 0.65).abs() < f64::EPSILON);
336        assert!((xmax - 1.35).abs() < f64::EPSILON);
337        assert!((ymin - 1.0).abs() < f64::EPSILON);
338        assert!((ymax - 10.0).abs() < f64::EPSILON);
339    }
340
341    #[test]
342    fn data_bounds_multiple_datasets() {
343        let artist = ViolinArtist {
344            datasets: vec![
345                vec![1.0, 2.0, 3.0],
346                vec![10.0, 20.0, 30.0],
347            ],
348            positions: None,
349            widths: 0.7,
350            show_median: true,
351            show_quartiles: true,
352            color: Color::TAB_BLUE,
353            alpha: 0.7,
354            label: None,
355            bw_method: 0.0,
356        };
357        let (xmin, xmax, ymin, ymax) = artist.data_bounds();
358        // Position 0 -> 1.0, position 1 -> 2.0
359        assert!((xmin - 0.65).abs() < f64::EPSILON);
360        assert!((xmax - 2.35).abs() < f64::EPSILON);
361        assert!((ymin - 1.0).abs() < f64::EPSILON);
362        assert!((ymax - 30.0).abs() < f64::EPSILON);
363    }
364
365    #[test]
366    fn data_bounds_custom_positions() {
367        let artist = ViolinArtist {
368            datasets: vec![vec![1.0, 2.0], vec![3.0, 4.0]],
369            positions: Some(vec![5.0, 10.0]),
370            widths: 1.0,
371            show_median: true,
372            show_quartiles: true,
373            color: Color::TAB_BLUE,
374            alpha: 0.7,
375            label: None,
376            bw_method: 0.0,
377        };
378        let (xmin, xmax, _ymin, _ymax) = artist.data_bounds();
379        assert!((xmin - 4.5).abs() < f64::EPSILON);
380        assert!((xmax - 10.5).abs() < f64::EPSILON);
381    }
382
383    #[test]
384    fn data_bounds_empty() {
385        let artist = ViolinArtist {
386            datasets: vec![],
387            positions: None,
388            widths: 0.7,
389            show_median: true,
390            show_quartiles: true,
391            color: Color::TAB_BLUE,
392            alpha: 0.7,
393            label: None,
394            bw_method: 0.0,
395        };
396        assert_eq!(artist.data_bounds(), (0.0, 1.0, 0.0, 1.0));
397    }
398
399    #[test]
400    fn data_bounds_nan_filtered() {
401        let artist = ViolinArtist {
402            datasets: vec![vec![f64::NAN, 1.0, 5.0, f64::NAN]],
403            positions: None,
404            widths: 0.7,
405            show_median: true,
406            show_quartiles: true,
407            color: Color::TAB_BLUE,
408            alpha: 0.7,
409            label: None,
410            bw_method: 0.0,
411        };
412        let (_xmin, _xmax, ymin, ymax) = artist.data_bounds();
413        assert!((ymin - 1.0).abs() < f64::EPSILON);
414        assert!((ymax - 5.0).abs() < f64::EPSILON);
415    }
416
417    #[test]
418    fn builder_color() {
419        let mut artist = sample_violin();
420        artist.color(Color::TAB_RED);
421        assert_eq!(artist.color, Color::TAB_RED);
422    }
423
424    #[test]
425    fn builder_label() {
426        let mut artist = sample_violin();
427        artist.label("my violin");
428        assert_eq!(artist.label.as_deref(), Some("my violin"));
429    }
430
431    #[test]
432    fn builder_alpha() {
433        let mut artist = sample_violin();
434        artist.alpha(0.5);
435        assert!((artist.alpha - 0.5).abs() < f64::EPSILON);
436    }
437
438    #[test]
439    fn builder_alpha_clamped() {
440        let mut artist = sample_violin();
441        artist.alpha(2.0);
442        assert!((artist.alpha - 1.0).abs() < f64::EPSILON);
443        artist.alpha(-1.0);
444        assert!((artist.alpha - 0.0).abs() < f64::EPSILON);
445    }
446
447    #[test]
448    fn builder_widths() {
449        let mut artist = sample_violin();
450        artist.widths(0.5);
451        assert!((artist.widths - 0.5).abs() < f64::EPSILON);
452    }
453
454    #[test]
455    fn builder_widths_clamped() {
456        let mut artist = sample_violin();
457        artist.widths(0.01);
458        assert!((artist.widths - 0.1).abs() < f64::EPSILON);
459        artist.widths(5.0);
460        assert!((artist.widths - 2.0).abs() < f64::EPSILON);
461    }
462
463    #[test]
464    fn builder_show_median() {
465        let mut artist = sample_violin();
466        artist.show_median(false);
467        assert!(!artist.show_median);
468    }
469
470    #[test]
471    fn builder_show_quartiles() {
472        let mut artist = sample_violin();
473        artist.show_quartiles(false);
474        assert!(!artist.show_quartiles);
475    }
476
477    #[test]
478    fn builder_bw_method() {
479        let mut artist = sample_violin();
480        artist.bw_method(0.5);
481        assert!((artist.bw_method - 0.5).abs() < f64::EPSILON);
482    }
483
484    #[test]
485    fn builder_positions() {
486        let mut artist = sample_violin();
487        artist.positions(vec![2.0, 4.0, 6.0]);
488        assert_eq!(artist.positions, Some(vec![2.0, 4.0, 6.0]));
489    }
490
491    #[test]
492    fn position_for_default() {
493        let artist = sample_violin();
494        assert!((artist.position_for(0) - 1.0).abs() < f64::EPSILON);
495        assert!((artist.position_for(2) - 3.0).abs() < f64::EPSILON);
496    }
497
498    #[test]
499    fn position_for_custom() {
500        let mut artist = sample_violin();
501        artist.positions(vec![10.0, 20.0]);
502        assert!((artist.position_for(0) - 10.0).abs() < f64::EPSILON);
503        assert!((artist.position_for(1) - 20.0).abs() < f64::EPSILON);
504        // Beyond custom positions, falls back to default.
505        assert!((artist.position_for(2) - 3.0).abs() < f64::EPSILON);
506    }
507
508    #[test]
509    fn percentile_basic() {
510        let data = [1.0, 2.0, 3.0, 4.0];
511        assert!((percentile(&data, 0.5) - 2.5).abs() < 1e-10);
512    }
513
514    #[test]
515    fn std_dev_basic() {
516        let data = vec![2.0, 4.0, 4.0, 4.0, 5.0, 5.0, 7.0, 9.0];
517        let sd = std_dev(&data);
518        assert!((sd - 2.0).abs() < 1e-10);
519    }
520
521    #[test]
522    fn std_dev_single() {
523        assert!((std_dev(&[5.0]) - 0.0).abs() < f64::EPSILON);
524    }
525}