gpui_px/
boxplot.rs

1//! Box plot - Plotly Express style API.
2//!
3//! Box plots display the distribution of data based on quartiles, showing:
4//! - The median (Q2) as a line
5//! - The interquartile range (IQR) as a box from Q1 to Q3
6//! - Whiskers extending to 1.5×IQR or data min/max
7//! - Outliers as individual points
8
9use crate::error::ChartError;
10use crate::{
11    DEFAULT_COLOR, DEFAULT_HEIGHT, DEFAULT_PADDING_FRACTION, DEFAULT_TITLE_FONT_SIZE,
12    DEFAULT_WIDTH, ScaleType, TITLE_AREA_HEIGHT, extent_padded, validate_data_array,
13    validate_data_length, validate_dimensions, validate_positive,
14};
15use d3rs::axis::{AxisConfig, DefaultAxisTheme, render_axis};
16use d3rs::color::D3Color;
17use d3rs::grid::{GridConfig, render_grid};
18use d3rs::scale::{LinearScale, LogScale, Scale};
19use d3rs::text::{VectorFontConfig, render_vector_text};
20use gpui::prelude::*;
21use gpui::*;
22
23/// Statistics for a single box in a box plot
24#[derive(Debug, Clone)]
25pub struct BoxStats {
26    /// X position (bin center)
27    pub x: f64,
28    /// First quartile (25th percentile)
29    pub q1: f64,
30    /// Median (50th percentile)
31    pub q2: f64,
32    /// Third quartile (75th percentile)
33    pub q3: f64,
34    /// Lower whisker extent
35    pub whisker_low: f64,
36    /// Upper whisker extent
37    pub whisker_high: f64,
38    /// Outliers below the lower whisker
39    pub outliers_low: Vec<f64>,
40    /// Outliers above the upper whisker
41    pub outliers_high: Vec<f64>,
42}
43
44impl BoxStats {
45    /// Calculate box statistics from a sorted slice of values
46    fn from_sorted(x: f64, sorted_values: &[f64]) -> Option<Self> {
47        if sorted_values.is_empty() {
48            return None;
49        }
50
51        let n = sorted_values.len();
52
53        // Calculate quartiles using linear interpolation
54        let q1 = percentile(sorted_values, 0.25);
55        let q2 = percentile(sorted_values, 0.50);
56        let q3 = percentile(sorted_values, 0.75);
57
58        let iqr = q3 - q1;
59        let whisker_low_limit = q1 - 1.5 * iqr;
60        let whisker_high_limit = q3 + 1.5 * iqr;
61
62        // Find actual whisker positions (closest data points within limits)
63        let whisker_low = sorted_values
64            .iter()
65            .copied()
66            .find(|&v| v >= whisker_low_limit)
67            .unwrap_or(sorted_values[0]);
68
69        let whisker_high = sorted_values
70            .iter()
71            .copied()
72            .rev()
73            .find(|&v| v <= whisker_high_limit)
74            .unwrap_or(sorted_values[n - 1]);
75
76        // Collect outliers
77        let outliers_low: Vec<f64> = sorted_values
78            .iter()
79            .copied()
80            .filter(|&v| v < whisker_low)
81            .collect();
82
83        let outliers_high: Vec<f64> = sorted_values
84            .iter()
85            .copied()
86            .filter(|&v| v > whisker_high)
87            .collect();
88
89        Some(BoxStats {
90            x,
91            q1,
92            q2,
93            q3,
94            whisker_low,
95            whisker_high,
96            outliers_low,
97            outliers_high,
98        })
99    }
100}
101
102/// Calculate percentile using linear interpolation
103fn percentile(sorted: &[f64], p: f64) -> f64 {
104    if sorted.is_empty() {
105        return 0.0;
106    }
107    if sorted.len() == 1 {
108        return sorted[0];
109    }
110
111    let n = sorted.len();
112    let index = p * (n - 1) as f64;
113    let lower = index.floor() as usize;
114    let upper = index.ceil() as usize;
115    let frac = index - lower as f64;
116
117    if lower == upper || upper >= n {
118        sorted[lower.min(n - 1)]
119    } else {
120        sorted[lower] * (1.0 - frac) + sorted[upper] * frac
121    }
122}
123
124/// Box plot builder.
125#[derive(Debug, Clone)]
126pub struct BoxPlotChart {
127    x: Vec<f64>,
128    y: Vec<f64>,
129    title: Option<String>,
130    box_color: u32,
131    median_color: u32,
132    whisker_color: u32,
133    outlier_color: u32,
134    box_opacity: f32,
135    box_width: f32,
136    stroke_width: f32,
137    outlier_radius: f32,
138    num_bins: Option<usize>,
139    width: f32,
140    height: f32,
141    x_scale_type: ScaleType,
142    y_scale_type: ScaleType,
143}
144
145impl BoxPlotChart {
146    /// Set chart title (rendered at top of chart).
147    pub fn title(mut self, title: impl Into<String>) -> Self {
148        self.title = Some(title.into());
149        self
150    }
151
152    /// Set box fill color as 24-bit RGB hex value (format: 0xRRGGBB).
153    pub fn box_color(mut self, hex: u32) -> Self {
154        self.box_color = hex;
155        self
156    }
157
158    /// Set median line color.
159    pub fn median_color(mut self, hex: u32) -> Self {
160        self.median_color = hex;
161        self
162    }
163
164    /// Set whisker line color.
165    pub fn whisker_color(mut self, hex: u32) -> Self {
166        self.whisker_color = hex;
167        self
168    }
169
170    /// Set outlier point color.
171    pub fn outlier_color(mut self, hex: u32) -> Self {
172        self.outlier_color = hex;
173        self
174    }
175
176    /// Set box opacity (0.0 - 1.0).
177    pub fn box_opacity(mut self, opacity: f32) -> Self {
178        self.box_opacity = opacity.clamp(0.0, 1.0);
179        self
180    }
181
182    /// Set box width in pixels.
183    pub fn box_width(mut self, width: f32) -> Self {
184        self.box_width = width;
185        self
186    }
187
188    /// Set stroke width for median and whisker lines.
189    pub fn stroke_width(mut self, width: f32) -> Self {
190        self.stroke_width = width;
191        self
192    }
193
194    /// Set outlier point radius.
195    pub fn outlier_radius(mut self, radius: f32) -> Self {
196        self.outlier_radius = radius;
197        self
198    }
199
200    /// Set the number of bins for grouping data.
201    /// If not set, automatically calculated based on chart width.
202    pub fn bins(mut self, n: usize) -> Self {
203        self.num_bins = Some(n);
204        self
205    }
206
207    /// Set chart dimensions.
208    pub fn size(mut self, width: f32, height: f32) -> Self {
209        self.width = width;
210        self.height = height;
211        self
212    }
213
214    /// Set X-axis scale type (linear or log).
215    pub fn x_scale(mut self, scale: ScaleType) -> Self {
216        self.x_scale_type = scale;
217        self
218    }
219
220    /// Set Y-axis scale type (linear or log).
221    pub fn y_scale(mut self, scale: ScaleType) -> Self {
222        self.y_scale_type = scale;
223        self
224    }
225
226    /// Build and validate the chart, returning renderable element.
227    pub fn build(self) -> Result<impl IntoElement, ChartError> {
228        // Validate inputs
229        validate_data_array(&self.x, "x")?;
230        validate_data_array(&self.y, "y")?;
231        validate_data_length(self.x.len(), self.y.len(), "x", "y")?;
232        validate_dimensions(self.width, self.height)?;
233
234        // Validate positive values for log scale
235        if self.x_scale_type == ScaleType::Log {
236            validate_positive(&self.x, "x")?;
237        }
238        if self.y_scale_type == ScaleType::Log {
239            validate_positive(&self.y, "y")?;
240        }
241
242        // Define margins
243        let margin_left = 60.0;
244        let margin_bottom = 30.0;
245        let margin_top = 10.0;
246        let margin_right = 20.0;
247
248        // Calculate plot area
249        let title_height = if self.title.is_some() {
250            TITLE_AREA_HEIGHT
251        } else {
252            0.0
253        };
254
255        let plot_width = (self.width as f64 - margin_left - margin_right).max(0.0);
256        let plot_height =
257            (self.height as f64 - title_height as f64 - margin_top - margin_bottom).max(0.0);
258
259        // Calculate domains
260        let (x_min, x_max) = extent_padded(&self.x, DEFAULT_PADDING_FRACTION);
261        let (y_min, y_max) = extent_padded(&self.y, DEFAULT_PADDING_FRACTION);
262
263        // Calculate number of bins
264        let num_bins = self
265            .num_bins
266            .unwrap_or_else(|| (plot_width / 40.0).max(3.0) as usize);
267
268        // Bin the data
269        let boxes = self.calculate_boxes(x_min, x_max, num_bins);
270
271        // Build based on scale types
272        let chart_content =
273            self.render_chart(&boxes, x_min, x_max, y_min, y_max, plot_width, plot_height);
274
275        // Build container with optional title
276        let mut container = div()
277            .w(px(self.width))
278            .h(px(self.height))
279            .relative()
280            .flex()
281            .flex_col();
282
283        // Add title if present
284        if let Some(title) = &self.title {
285            let font_config =
286                VectorFontConfig::horizontal(DEFAULT_TITLE_FONT_SIZE, hsla(0.0, 0.0, 0.2, 1.0));
287            container = container.child(
288                div()
289                    .w_full()
290                    .h(px(title_height))
291                    .flex()
292                    .justify_center()
293                    .items_center()
294                    .child(render_vector_text(title, &font_config)),
295            );
296        }
297
298        // Add chart content
299        container = container.child(div().relative().child(chart_content));
300
301        Ok(container)
302    }
303
304    /// Calculate box statistics for each bin
305    fn calculate_boxes(&self, x_min: f64, x_max: f64, num_bins: usize) -> Vec<BoxStats> {
306        let bin_width = (x_max - x_min) / num_bins as f64;
307
308        // Group data points by bin
309        let mut bins: Vec<Vec<f64>> = vec![Vec::new(); num_bins];
310
311        for (&x, &y) in self.x.iter().zip(self.y.iter()) {
312            let bin_idx = ((x - x_min) / bin_width).floor() as usize;
313            let bin_idx = bin_idx.min(num_bins - 1);
314            bins[bin_idx].push(y);
315        }
316
317        // Calculate statistics for each non-empty bin
318        bins.iter_mut()
319            .enumerate()
320            .filter_map(|(i, bin)| {
321                if bin.is_empty() {
322                    return None;
323                }
324                bin.sort_by(|a, b| a.partial_cmp(b).unwrap());
325                let x_center = x_min + (i as f64 + 0.5) * bin_width;
326                BoxStats::from_sorted(x_center, bin)
327            })
328            .collect()
329    }
330
331    /// Render the chart content
332    fn render_chart(
333        &self,
334        boxes: &[BoxStats],
335        x_min: f64,
336        x_max: f64,
337        y_min: f64,
338        y_max: f64,
339        plot_width: f64,
340        plot_height: f64,
341    ) -> AnyElement {
342        let theme = DefaultAxisTheme;
343
344        match (self.x_scale_type, self.y_scale_type) {
345            (ScaleType::Linear, ScaleType::Linear) => {
346                let x_scale = LinearScale::new()
347                    .domain(x_min, x_max)
348                    .range(0.0, plot_width);
349                let y_scale = LinearScale::new()
350                    .domain(y_min, y_max)
351                    .range(plot_height, 0.0);
352
353                self.render_with_scales(&x_scale, &y_scale, boxes, plot_width, plot_height, &theme)
354            }
355            (ScaleType::Log, ScaleType::Linear) => {
356                let x_scale = LogScale::new()
357                    .domain(x_min.max(1e-10), x_max)
358                    .range(0.0, plot_width);
359                let y_scale = LinearScale::new()
360                    .domain(y_min, y_max)
361                    .range(plot_height, 0.0);
362
363                self.render_with_scales(&x_scale, &y_scale, boxes, plot_width, plot_height, &theme)
364            }
365            (ScaleType::Linear, ScaleType::Log) => {
366                let x_scale = LinearScale::new()
367                    .domain(x_min, x_max)
368                    .range(0.0, plot_width);
369                let y_scale = LogScale::new()
370                    .domain(y_min.max(1e-10), y_max)
371                    .range(plot_height, 0.0);
372
373                self.render_with_scales(&x_scale, &y_scale, boxes, plot_width, plot_height, &theme)
374            }
375            (ScaleType::Log, ScaleType::Log) => {
376                let x_scale = LogScale::new()
377                    .domain(x_min.max(1e-10), x_max)
378                    .range(0.0, plot_width);
379                let y_scale = LogScale::new()
380                    .domain(y_min.max(1e-10), y_max)
381                    .range(plot_height, 0.0);
382
383                self.render_with_scales(&x_scale, &y_scale, boxes, plot_width, plot_height, &theme)
384            }
385        }
386    }
387
388    /// Render with specific scale types
389    fn render_with_scales<XS, YS>(
390        &self,
391        x_scale: &XS,
392        y_scale: &YS,
393        boxes: &[BoxStats],
394        plot_width: f64,
395        plot_height: f64,
396        theme: &DefaultAxisTheme,
397    ) -> AnyElement
398    where
399        XS: Scale<f64, f64>,
400        YS: Scale<f64, f64>,
401    {
402        let box_color = D3Color::from_hex(self.box_color).to_rgba();
403        let median_color = D3Color::from_hex(self.median_color).to_rgba();
404        let whisker_color = D3Color::from_hex(self.whisker_color).to_rgba();
405        let outlier_color = D3Color::from_hex(self.outlier_color).to_rgba();
406
407        // Render all boxes
408        let box_elements: Vec<AnyElement> = boxes
409            .iter()
410            .flat_map(|stats| {
411                let x_px = x_scale.scale(stats.x) as f32;
412                let half_width = self.box_width / 2.0;
413
414                let q1_px = y_scale.scale(stats.q1) as f32;
415                let q2_px = y_scale.scale(stats.q2) as f32;
416                let q3_px = y_scale.scale(stats.q3) as f32;
417                let whisker_low_px = y_scale.scale(stats.whisker_low) as f32;
418                let whisker_high_px = y_scale.scale(stats.whisker_high) as f32;
419
420                let box_top = q3_px.min(q1_px);
421                let box_bottom = q3_px.max(q1_px);
422                let box_height = (box_bottom - box_top).max(1.0);
423
424                let mut elements: Vec<AnyElement> = Vec::new();
425
426                // Whisker line (vertical line from low to high)
427                elements.push(
428                    div()
429                        .absolute()
430                        .left(px(x_px - 0.5))
431                        .top(px(whisker_high_px.min(whisker_low_px)))
432                        .w(px(self.stroke_width))
433                        .h(px((whisker_low_px - whisker_high_px).abs().max(1.0)))
434                        .bg(whisker_color)
435                        .into_any_element(),
436                );
437
438                // Lower whisker cap (horizontal line)
439                elements.push(
440                    div()
441                        .absolute()
442                        .left(px(x_px - half_width * 0.5))
443                        .top(px(whisker_low_px - self.stroke_width / 2.0))
444                        .w(px(half_width))
445                        .h(px(self.stroke_width))
446                        .bg(whisker_color)
447                        .into_any_element(),
448                );
449
450                // Upper whisker cap (horizontal line)
451                elements.push(
452                    div()
453                        .absolute()
454                        .left(px(x_px - half_width * 0.5))
455                        .top(px(whisker_high_px - self.stroke_width / 2.0))
456                        .w(px(half_width))
457                        .h(px(self.stroke_width))
458                        .bg(whisker_color)
459                        .into_any_element(),
460                );
461
462                // Box (IQR)
463                elements.push(
464                    div()
465                        .absolute()
466                        .left(px(x_px - half_width))
467                        .top(px(box_top))
468                        .w(px(self.box_width))
469                        .h(px(box_height))
470                        .bg(box_color)
471                        .opacity(self.box_opacity)
472                        .border_1()
473                        .border_color(whisker_color)
474                        .into_any_element(),
475                );
476
477                // Median line
478                elements.push(
479                    div()
480                        .absolute()
481                        .left(px(x_px - half_width))
482                        .top(px(q2_px - self.stroke_width))
483                        .w(px(self.box_width))
484                        .h(px(self.stroke_width * 2.0))
485                        .bg(median_color)
486                        .into_any_element(),
487                );
488
489                // Outliers
490                for &outlier in &stats.outliers_low {
491                    let y_px = y_scale.scale(outlier) as f32;
492                    elements.push(
493                        div()
494                            .absolute()
495                            .left(px(x_px - self.outlier_radius))
496                            .top(px(y_px - self.outlier_radius))
497                            .w(px(self.outlier_radius * 2.0))
498                            .h(px(self.outlier_radius * 2.0))
499                            .rounded_full()
500                            .bg(outlier_color)
501                            .opacity(0.7)
502                            .into_any_element(),
503                    );
504                }
505
506                for &outlier in &stats.outliers_high {
507                    let y_px = y_scale.scale(outlier) as f32;
508                    elements.push(
509                        div()
510                            .absolute()
511                            .left(px(x_px - self.outlier_radius))
512                            .top(px(y_px - self.outlier_radius))
513                            .w(px(self.outlier_radius * 2.0))
514                            .h(px(self.outlier_radius * 2.0))
515                            .rounded_full()
516                            .bg(outlier_color)
517                            .opacity(0.7)
518                            .into_any_element(),
519                    );
520                }
521
522                elements
523            })
524            .collect();
525
526        div()
527            .flex()
528            .child(render_axis(
529                y_scale,
530                &AxisConfig::left(),
531                plot_height as f32,
532                theme,
533            ))
534            .child(
535                div()
536                    .flex()
537                    .flex_col()
538                    .child(
539                        div()
540                            .w(px(plot_width as f32))
541                            .h(px(plot_height as f32))
542                            .relative()
543                            .bg(rgb(0xf8f8f8))
544                            .child(render_grid(
545                                x_scale,
546                                y_scale,
547                                &GridConfig::default(),
548                                plot_width as f32,
549                                plot_height as f32,
550                                theme,
551                            ))
552                            .children(box_elements),
553                    )
554                    .child(render_axis(
555                        x_scale,
556                        &AxisConfig::bottom(),
557                        plot_width as f32,
558                        theme,
559                    )),
560            )
561            .into_any_element()
562    }
563}
564
565/// Create a box plot from x and y data.
566///
567/// The data is binned by x values, and for each bin, box-and-whisker statistics
568/// are calculated from the y values.
569///
570/// # Example
571///
572/// ```rust,no_run
573/// use gpui_px::boxplot;
574///
575/// // Generate some sample data
576/// let x: Vec<f64> = (0..100).map(|i| (i / 10) as f64).collect();
577/// let y: Vec<f64> = x.iter().map(|&xi| xi * 2.0 + rand::random::<f64>() * 10.0).collect();
578///
579/// let chart = boxplot(&x, &y)
580///     .title("Distribution by Group")
581///     .box_color(0xdddddd)
582///     .median_color(0x000000)
583///     .build()?;
584/// # Ok::<(), gpui_px::ChartError>(())
585/// ```
586pub fn boxplot(x: &[f64], y: &[f64]) -> BoxPlotChart {
587    BoxPlotChart {
588        x: x.to_vec(),
589        y: y.to_vec(),
590        title: None,
591        box_color: 0xdddddd,
592        median_color: 0x000000,
593        whisker_color: 0x333333,
594        outlier_color: DEFAULT_COLOR,
595        box_opacity: 1.0,
596        box_width: 20.0,
597        stroke_width: 2.0,
598        outlier_radius: 3.0,
599        num_bins: None,
600        width: DEFAULT_WIDTH,
601        height: DEFAULT_HEIGHT,
602        x_scale_type: ScaleType::Linear,
603        y_scale_type: ScaleType::Linear,
604    }
605}
606
607#[cfg(test)]
608mod tests {
609    use super::*;
610
611    #[test]
612    fn test_percentile_basic() {
613        let values = vec![1.0, 2.0, 3.0, 4.0, 5.0];
614        assert!((percentile(&values, 0.0) - 1.0).abs() < 1e-10);
615        assert!((percentile(&values, 0.5) - 3.0).abs() < 1e-10);
616        assert!((percentile(&values, 1.0) - 5.0).abs() < 1e-10);
617    }
618
619    #[test]
620    fn test_percentile_quartiles() {
621        let values = vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0];
622        let q1 = percentile(&values, 0.25);
623        let q2 = percentile(&values, 0.50);
624        let q3 = percentile(&values, 0.75);
625        assert!((q2 - 6.0).abs() < 1e-10); // Median
626        assert!(q1 < q2);
627        assert!(q2 < q3);
628    }
629
630    #[test]
631    fn test_box_stats_from_sorted() {
632        let values = vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0];
633        let stats = BoxStats::from_sorted(5.0, &values).unwrap();
634
635        assert!((stats.x - 5.0).abs() < 1e-10);
636        assert!(stats.q1 < stats.q2);
637        assert!(stats.q2 < stats.q3);
638        assert!(stats.whisker_low <= stats.q1);
639        assert!(stats.whisker_high >= stats.q3);
640    }
641
642    #[test]
643    fn test_box_stats_with_outliers() {
644        // Create data with outliers
645        let values = vec![1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 100.0];
646        let stats = BoxStats::from_sorted(0.0, &values).unwrap();
647
648        // 1.0 and 100.0 should be outliers
649        assert!(!stats.outliers_low.is_empty() || !stats.outliers_high.is_empty());
650    }
651
652    #[test]
653    fn test_boxplot_empty_data() {
654        let result = boxplot(&[], &[]).build();
655        assert!(matches!(result, Err(ChartError::EmptyData { .. })));
656    }
657
658    #[test]
659    fn test_boxplot_mismatched_lengths() {
660        let x = vec![1.0, 2.0, 3.0];
661        let y = vec![1.0, 2.0];
662        let result = boxplot(&x, &y).build();
663        assert!(matches!(result, Err(ChartError::DataLengthMismatch { .. })));
664    }
665
666    #[test]
667    fn test_boxplot_successful_build() {
668        let x: Vec<f64> = (0..100).map(|i| (i / 10) as f64).collect();
669        let y: Vec<f64> = x.iter().map(|&xi| xi * 2.0).collect();
670
671        let result = boxplot(&x, &y)
672            .title("Test Box Plot")
673            .box_color(0xcccccc)
674            .build();
675        assert!(result.is_ok());
676    }
677
678    #[test]
679    fn test_boxplot_builder_chain() {
680        let x = vec![1.0, 2.0, 3.0, 4.0, 5.0];
681        let y = vec![10.0, 20.0, 15.0, 25.0, 30.0];
682
683        let result = boxplot(&x, &y)
684            .title("My Box Plot")
685            .box_color(0xdddddd)
686            .median_color(0xff0000)
687            .whisker_color(0x333333)
688            .outlier_color(0x0000ff)
689            .box_opacity(0.8)
690            .box_width(25.0)
691            .stroke_width(3.0)
692            .outlier_radius(4.0)
693            .bins(5)
694            .size(800.0, 600.0)
695            .build();
696        assert!(result.is_ok());
697    }
698
699    #[test]
700    fn test_boxplot_log_scale_positive_values() {
701        let x = vec![10.0, 100.0, 1000.0, 10000.0];
702        let y = vec![1.0, 10.0, 100.0, 1000.0];
703
704        let result = boxplot(&x, &y)
705            .x_scale(ScaleType::Log)
706            .y_scale(ScaleType::Log)
707            .build();
708        assert!(result.is_ok());
709    }
710
711    #[test]
712    fn test_boxplot_log_scale_negative_values() {
713        let x = vec![-1.0, 2.0, 3.0];
714        let y = vec![1.0, 2.0, 3.0];
715
716        let result = boxplot(&x, &y).x_scale(ScaleType::Log).build();
717        assert!(matches!(result, Err(ChartError::InvalidData { .. })));
718    }
719}