Skip to main content

plotkit_core/charts/
violin.rs

1//! Violin plot builder methods and kernel density estimation.
2//!
3//! Provides a fluent builder API for configuring [`ViolinArtist`] instances,
4//! along with a Gaussian kernel density estimation (KDE) function used to
5//! compute the shape of each violin.
6
7use crate::artist::ViolinArtist;
8use crate::primitives::Color;
9
10/// Computes the linear-interpolation percentile of a sorted slice.
11///
12/// Uses the same interpolation method as NumPy's default (`linear`).
13/// `p` must be in the range [0.0, 1.0].
14fn percentile(sorted: &[f64], p: f64) -> f64 {
15    assert!(!sorted.is_empty(), "percentile requires non-empty data");
16    if sorted.len() == 1 {
17        return sorted[0];
18    }
19    let idx = p * (sorted.len() - 1) as f64;
20    let lo = idx.floor() as usize;
21    let hi = lo + 1;
22    let frac = idx - lo as f64;
23    if hi >= sorted.len() {
24        sorted[sorted.len() - 1]
25    } else {
26        sorted[lo] * (1.0 - frac) + sorted[hi] * frac
27    }
28}
29
30/// Computes the standard deviation of a sorted slice of finite values.
31///
32/// Returns 0.0 for slices with fewer than 2 elements.
33fn std_dev(sorted: &[f64]) -> f64 {
34    let n = sorted.len();
35    if n < 2 {
36        return 0.0;
37    }
38    let mean: f64 = sorted.iter().sum::<f64>() / n as f64;
39    let variance = sorted.iter().map(|&v| (v - mean).powi(2)).sum::<f64>() / n as f64;
40    variance.sqrt()
41}
42
43/// Computes the bandwidth for kernel density estimation using Silverman's rule.
44///
45/// The bandwidth is calculated as:
46/// `h = 0.9 * min(std, IQR / 1.34) * n^(-1/5)`
47///
48/// Falls back to a small positive value when the data has zero spread
49/// (e.g. all identical values or a single data point).
50pub fn silverman_bandwidth(sorted: &[f64]) -> f64 {
51    let n = sorted.len();
52    if n < 2 {
53        return 1.0;
54    }
55    let sd = std_dev(sorted);
56    let q1 = percentile(sorted, 0.25);
57    let q3 = percentile(sorted, 0.75);
58    let iqr = q3 - q1;
59
60    let spread = if sd > 0.0 && iqr > 0.0 {
61        sd.min(iqr / 1.34)
62    } else if sd > 0.0 {
63        sd
64    } else if iqr > 0.0 {
65        iqr / 1.34
66    } else {
67        // All values identical -- use a small positive bandwidth.
68        return 1.0;
69    };
70
71    0.9 * spread * (n as f64).powf(-0.2)
72}
73
74/// Evaluates a Gaussian kernel density estimate at a set of points.
75///
76/// For each point in `eval_points`, the density is estimated by summing
77/// Gaussian kernels centered on each data value, scaled by `bandwidth`.
78///
79/// # Arguments
80/// * `data` - The source data values.
81/// * `bandwidth` - The smoothing bandwidth (standard deviation of each kernel).
82/// * `eval_points` - The x-values at which to evaluate the density.
83///
84/// # Returns
85/// A vector of density values, one per eval point.
86pub fn gaussian_kde(data: &[f64], bandwidth: f64, eval_points: &[f64]) -> Vec<f64> {
87    let n = data.len() as f64;
88    let inv_bw = 1.0 / bandwidth;
89    let norm = 1.0 / (n * bandwidth * (2.0 * std::f64::consts::PI).sqrt());
90
91    eval_points
92        .iter()
93        .map(|&x| {
94            let sum: f64 = data
95                .iter()
96                .map(|&xi| {
97                    let u = (x - xi) * inv_bw;
98                    (-0.5 * u * u).exp()
99                })
100                .sum();
101            sum * norm
102        })
103        .collect()
104}
105
106impl ViolinArtist {
107    /// Sets the fill color.
108    ///
109    /// Applies the given [`Color`] to every violin rendered by this artist.
110    pub fn color(&mut self, color: Color) -> &mut Self {
111        self.color = color;
112        self
113    }
114
115    /// Sets the legend label.
116    ///
117    /// When a legend is displayed on the figure, this label will appear
118    /// next to the color swatch for this violin plot.
119    pub fn label(&mut self, label: &str) -> &mut Self {
120        self.label = Some(label.to_string());
121        self
122    }
123
124    /// Sets the opacity.
125    ///
126    /// The value is clamped to the range [0.0, 1.0], where 0.0 is fully
127    /// transparent and 1.0 is fully opaque.
128    pub fn alpha(&mut self, alpha: f64) -> &mut Self {
129        self.alpha = alpha.clamp(0.0, 1.0);
130        self
131    }
132
133    /// Sets the maximum width of each violin shape.
134    ///
135    /// The value is clamped to [0.1, 2.0]. This controls the visual width
136    /// of the widest part of the violin.
137    pub fn widths(&mut self, widths: f64) -> &mut Self {
138        self.widths = widths.clamp(0.1, 2.0);
139        self
140    }
141
142    /// Sets the x-positions for each violin.
143    ///
144    /// When `None` (the default), violins are placed at 1.0, 2.0, 3.0, etc.
145    pub fn positions(&mut self, positions: Vec<f64>) -> &mut Self {
146        self.positions = Some(positions);
147        self
148    }
149
150    /// Controls whether the median line is drawn.
151    ///
152    /// When `true` (the default), a horizontal line is drawn at the median
153    /// of each dataset inside the violin shape.
154    pub fn show_median(&mut self, show: bool) -> &mut Self {
155        self.show_median = show;
156        self
157    }
158
159    /// Controls whether Q1/Q3 quartile lines are drawn.
160    ///
161    /// When `true` (the default), thin horizontal lines are drawn at the
162    /// first and third quartiles inside the violin shape.
163    pub fn show_quartiles(&mut self, show: bool) -> &mut Self {
164        self.show_quartiles = show;
165        self
166    }
167
168    /// Sets the KDE bandwidth manually.
169    ///
170    /// Overrides the automatically computed Silverman's rule bandwidth.
171    /// A value of 0.0 or negative resets to automatic bandwidth selection.
172    pub fn bw_method(&mut self, bw: f64) -> &mut Self {
173        self.bw_method = bw;
174        self
175    }
176
177    /// Returns the x-position for a given violin index.
178    ///
179    /// Uses custom positions if set, otherwise defaults to 1-based indexing.
180    pub fn position_for(&self, index: usize) -> f64 {
181        self.positions
182            .as_ref()
183            .and_then(|p| p.get(index).copied())
184            .unwrap_or(index as f64 + 1.0)
185    }
186
187    /// Computes the data-space bounding box `(xmin, xmax, ymin, ymax)`.
188    ///
189    /// The x-bounds span from the leftmost position minus half the max width
190    /// to the rightmost position plus half the max width. The y-bounds span
191    /// from the minimum data value to the maximum data value across all
192    /// datasets. Falls back to `(0.0, 1.0, 0.0, 1.0)` when there are no
193    /// datasets.
194    pub fn data_bounds(&self) -> (f64, f64, f64, f64) {
195        if self.datasets.is_empty() {
196            return (0.0, 1.0, 0.0, 1.0);
197        }
198
199        let half_w = self.widths / 2.0;
200        let mut xmin = f64::INFINITY;
201        let mut xmax = f64::NEG_INFINITY;
202        let mut ymin = f64::INFINITY;
203        let mut ymax = f64::NEG_INFINITY;
204
205        for (i, dataset) in self.datasets.iter().enumerate() {
206            let pos = self.position_for(i);
207            xmin = xmin.min(pos - half_w);
208            xmax = xmax.max(pos + half_w);
209            for &v in dataset {
210                if v.is_finite() {
211                    ymin = ymin.min(v);
212                    ymax = ymax.max(v);
213                }
214            }
215        }
216
217        if !xmin.is_finite() {
218            xmin = 0.0;
219        }
220        if !xmax.is_finite() {
221            xmax = 1.0;
222        }
223        if !ymin.is_finite() {
224            ymin = 0.0;
225        }
226        if !ymax.is_finite() {
227            ymax = 1.0;
228        }
229
230        (xmin, xmax, ymin, ymax)
231    }
232}
233
234// ---------------------------------------------------------------------------
235// Tests
236// ---------------------------------------------------------------------------
237
238#[cfg(test)]
239mod tests {
240    use super::*;
241
242    /// Helper to create a sample ViolinArtist for tests.
243    fn sample_violin() -> ViolinArtist {
244        ViolinArtist {
245            datasets: vec![vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0]],
246            positions: None,
247            widths: 0.7,
248            show_median: true,
249            show_quartiles: true,
250            color: Color::TAB_BLUE,
251            alpha: 0.7,
252            label: None,
253            bw_method: 0.0,
254        }
255    }
256
257    #[test]
258    fn silverman_bandwidth_basic() {
259        let sorted = vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0];
260        let bw = silverman_bandwidth(&sorted);
261        assert!(bw > 0.0, "bandwidth must be positive");
262        assert!(bw < 10.0, "bandwidth should be reasonable");
263    }
264
265    #[test]
266    fn silverman_bandwidth_single_value() {
267        let bw = silverman_bandwidth(&[42.0]);
268        assert!(
269            (bw - 1.0).abs() < f64::EPSILON,
270            "single value should return fallback bandwidth"
271        );
272    }
273
274    #[test]
275    fn silverman_bandwidth_identical_values() {
276        let sorted = vec![5.0, 5.0, 5.0, 5.0, 5.0];
277        let bw = silverman_bandwidth(&sorted);
278        assert!(
279            (bw - 1.0).abs() < f64::EPSILON,
280            "identical values should return fallback bandwidth"
281        );
282    }
283
284    #[test]
285    fn gaussian_kde_basic() {
286        let data = vec![1.0, 2.0, 3.0, 4.0, 5.0];
287        let bw = 1.0;
288        let eval_points: Vec<f64> = (0..11).map(|i| i as f64 * 0.5).collect();
289        let density = gaussian_kde(&data, bw, &eval_points);
290        assert_eq!(density.len(), eval_points.len());
291        // All density values should be non-negative.
292        for &d in &density {
293            assert!(d >= 0.0, "density must be non-negative");
294        }
295    }
296
297    #[test]
298    fn gaussian_kde_integrates_roughly_to_one() {
299        let data: Vec<f64> = (0..100).map(|i| i as f64 * 0.1).collect();
300        let bw = silverman_bandwidth(&data);
301        // Evaluate on a fine grid covering the data range with margin.
302        let n_points = 1000;
303        let lo = -2.0;
304        let hi = 12.0;
305        let step = (hi - lo) / n_points as f64;
306        let eval_points: Vec<f64> = (0..=n_points).map(|i| lo + i as f64 * step).collect();
307        let density = gaussian_kde(&data, bw, &eval_points);
308        let integral: f64 = density.iter().sum::<f64>() * step;
309        assert!(
310            (integral - 1.0).abs() < 0.1,
311            "KDE integral should be approximately 1.0, got {integral}"
312        );
313    }
314
315    #[test]
316    fn gaussian_kde_single_point() {
317        let data = vec![5.0];
318        let bw = 1.0;
319        let density = gaussian_kde(&data, bw, &[5.0]);
320        // Peak should be at the data point.
321        assert!(density[0] > 0.0);
322    }
323
324    #[test]
325    fn gaussian_kde_symmetry() {
326        let data = vec![0.0];
327        let bw = 1.0;
328        let d_left = gaussian_kde(&data, bw, &[-1.0]);
329        let d_right = gaussian_kde(&data, bw, &[1.0]);
330        assert!(
331            (d_left[0] - d_right[0]).abs() < 1e-10,
332            "KDE should be symmetric around single data point"
333        );
334    }
335
336    #[test]
337    fn data_bounds_single_dataset() {
338        let artist = sample_violin();
339        let (xmin, xmax, ymin, ymax) = artist.data_bounds();
340        // Default position for index 0 is 1.0, half width is 0.35.
341        assert!((xmin - 0.65).abs() < f64::EPSILON);
342        assert!((xmax - 1.35).abs() < f64::EPSILON);
343        assert!((ymin - 1.0).abs() < f64::EPSILON);
344        assert!((ymax - 10.0).abs() < f64::EPSILON);
345    }
346
347    #[test]
348    fn data_bounds_multiple_datasets() {
349        let artist = ViolinArtist {
350            datasets: vec![vec![1.0, 2.0, 3.0], vec![10.0, 20.0, 30.0]],
351            positions: None,
352            widths: 0.7,
353            show_median: true,
354            show_quartiles: true,
355            color: Color::TAB_BLUE,
356            alpha: 0.7,
357            label: None,
358            bw_method: 0.0,
359        };
360        let (xmin, xmax, ymin, ymax) = artist.data_bounds();
361        // Position 0 -> 1.0, position 1 -> 2.0
362        assert!((xmin - 0.65).abs() < f64::EPSILON);
363        assert!((xmax - 2.35).abs() < f64::EPSILON);
364        assert!((ymin - 1.0).abs() < f64::EPSILON);
365        assert!((ymax - 30.0).abs() < f64::EPSILON);
366    }
367
368    #[test]
369    fn data_bounds_custom_positions() {
370        let artist = ViolinArtist {
371            datasets: vec![vec![1.0, 2.0], vec![3.0, 4.0]],
372            positions: Some(vec![5.0, 10.0]),
373            widths: 1.0,
374            show_median: true,
375            show_quartiles: true,
376            color: Color::TAB_BLUE,
377            alpha: 0.7,
378            label: None,
379            bw_method: 0.0,
380        };
381        let (xmin, xmax, _ymin, _ymax) = artist.data_bounds();
382        assert!((xmin - 4.5).abs() < f64::EPSILON);
383        assert!((xmax - 10.5).abs() < f64::EPSILON);
384    }
385
386    #[test]
387    fn data_bounds_empty() {
388        let artist = ViolinArtist {
389            datasets: vec![],
390            positions: None,
391            widths: 0.7,
392            show_median: true,
393            show_quartiles: true,
394            color: Color::TAB_BLUE,
395            alpha: 0.7,
396            label: None,
397            bw_method: 0.0,
398        };
399        assert_eq!(artist.data_bounds(), (0.0, 1.0, 0.0, 1.0));
400    }
401
402    #[test]
403    fn data_bounds_nan_filtered() {
404        let artist = ViolinArtist {
405            datasets: vec![vec![f64::NAN, 1.0, 5.0, f64::NAN]],
406            positions: None,
407            widths: 0.7,
408            show_median: true,
409            show_quartiles: true,
410            color: Color::TAB_BLUE,
411            alpha: 0.7,
412            label: None,
413            bw_method: 0.0,
414        };
415        let (_xmin, _xmax, ymin, ymax) = artist.data_bounds();
416        assert!((ymin - 1.0).abs() < f64::EPSILON);
417        assert!((ymax - 5.0).abs() < f64::EPSILON);
418    }
419
420    #[test]
421    fn builder_color() {
422        let mut artist = sample_violin();
423        artist.color(Color::TAB_RED);
424        assert_eq!(artist.color, Color::TAB_RED);
425    }
426
427    #[test]
428    fn builder_label() {
429        let mut artist = sample_violin();
430        artist.label("my violin");
431        assert_eq!(artist.label.as_deref(), Some("my violin"));
432    }
433
434    #[test]
435    fn builder_alpha() {
436        let mut artist = sample_violin();
437        artist.alpha(0.5);
438        assert!((artist.alpha - 0.5).abs() < f64::EPSILON);
439    }
440
441    #[test]
442    fn builder_alpha_clamped() {
443        let mut artist = sample_violin();
444        artist.alpha(2.0);
445        assert!((artist.alpha - 1.0).abs() < f64::EPSILON);
446        artist.alpha(-1.0);
447        assert!((artist.alpha - 0.0).abs() < f64::EPSILON);
448    }
449
450    #[test]
451    fn builder_widths() {
452        let mut artist = sample_violin();
453        artist.widths(0.5);
454        assert!((artist.widths - 0.5).abs() < f64::EPSILON);
455    }
456
457    #[test]
458    fn builder_widths_clamped() {
459        let mut artist = sample_violin();
460        artist.widths(0.01);
461        assert!((artist.widths - 0.1).abs() < f64::EPSILON);
462        artist.widths(5.0);
463        assert!((artist.widths - 2.0).abs() < f64::EPSILON);
464    }
465
466    #[test]
467    fn builder_show_median() {
468        let mut artist = sample_violin();
469        artist.show_median(false);
470        assert!(!artist.show_median);
471    }
472
473    #[test]
474    fn builder_show_quartiles() {
475        let mut artist = sample_violin();
476        artist.show_quartiles(false);
477        assert!(!artist.show_quartiles);
478    }
479
480    #[test]
481    fn builder_bw_method() {
482        let mut artist = sample_violin();
483        artist.bw_method(0.5);
484        assert!((artist.bw_method - 0.5).abs() < f64::EPSILON);
485    }
486
487    #[test]
488    fn builder_positions() {
489        let mut artist = sample_violin();
490        artist.positions(vec![2.0, 4.0, 6.0]);
491        assert_eq!(artist.positions, Some(vec![2.0, 4.0, 6.0]));
492    }
493
494    #[test]
495    fn position_for_default() {
496        let artist = sample_violin();
497        assert!((artist.position_for(0) - 1.0).abs() < f64::EPSILON);
498        assert!((artist.position_for(2) - 3.0).abs() < f64::EPSILON);
499    }
500
501    #[test]
502    fn position_for_custom() {
503        let mut artist = sample_violin();
504        artist.positions(vec![10.0, 20.0]);
505        assert!((artist.position_for(0) - 10.0).abs() < f64::EPSILON);
506        assert!((artist.position_for(1) - 20.0).abs() < f64::EPSILON);
507        // Beyond custom positions, falls back to default.
508        assert!((artist.position_for(2) - 3.0).abs() < f64::EPSILON);
509    }
510
511    #[test]
512    fn percentile_basic() {
513        let data = [1.0, 2.0, 3.0, 4.0];
514        assert!((percentile(&data, 0.5) - 2.5).abs() < 1e-10);
515    }
516
517    #[test]
518    fn std_dev_basic() {
519        let data = vec![2.0, 4.0, 4.0, 4.0, 5.0, 5.0, 7.0, 9.0];
520        let sd = std_dev(&data);
521        assert!((sd - 2.0).abs() < 1e-10);
522    }
523
524    #[test]
525    fn std_dev_single() {
526        assert!((std_dev(&[5.0]) - 0.0).abs() < f64::EPSILON);
527    }
528}