Skip to main content

datui_lib/
chart_data.rs

1//! Prepare chart data from LazyFrame: select x/y columns, collect, and convert to (f64, f64) points.
2//! All prepare_* and collect_* functions take a `row_limit` to cap materialized rows (default from config).
3
4use chrono::{DateTime, NaiveDate, NaiveTime, Utc};
5use color_eyre::Result;
6use polars::datatypes::{DataType, TimeUnit};
7use polars::prelude::*;
8use std::f64::consts::PI;
9
10/// Describes how x-axis numeric values map to temporal types for label formatting.
11#[derive(Clone, Copy, Debug, PartialEq, Eq)]
12pub enum XAxisTemporalKind {
13    Numeric,
14    Date,       // x = days since Unix epoch (f64)
15    DatetimeUs, // x = microseconds since epoch
16    DatetimeMs,
17    DatetimeNs,
18    Time, // x = nanoseconds since midnight
19}
20
21fn x_axis_temporal_kind(dtype: &DataType) -> XAxisTemporalKind {
22    match dtype {
23        DataType::Date => XAxisTemporalKind::Date,
24        DataType::Datetime(unit, _) => match unit {
25            TimeUnit::Nanoseconds => XAxisTemporalKind::DatetimeNs,
26            TimeUnit::Microseconds => XAxisTemporalKind::DatetimeUs,
27            TimeUnit::Milliseconds => XAxisTemporalKind::DatetimeMs,
28        },
29        DataType::Time => XAxisTemporalKind::Time,
30        _ => XAxisTemporalKind::Numeric,
31    }
32}
33
34/// Returns the x-axis temporal kind for a column from the schema (for axis label formatting when no data is loaded yet).
35pub fn x_axis_temporal_kind_for_column(schema: &Schema, x_column: &str) -> XAxisTemporalKind {
36    schema
37        .get(x_column)
38        .map(x_axis_temporal_kind)
39        .unwrap_or(XAxisTemporalKind::Numeric)
40}
41
42/// Format a numeric axis tick (for y-axis or generic numeric).
43pub fn format_axis_label(v: f64) -> String {
44    if v.abs() >= 1e6 || (v.abs() < 1e-2 && v != 0.0) {
45        format!("{:.2e}", v)
46    } else {
47        format!("{:.2}", v)
48    }
49}
50
51/// Format x-axis tick: dates/datetimes/times when kind is temporal, else numeric. Used by chart widget and export.
52pub fn format_x_axis_label(v: f64, kind: XAxisTemporalKind) -> String {
53    match kind {
54        XAxisTemporalKind::Numeric => format_axis_label(v),
55        XAxisTemporalKind::Date => {
56            const UNIX_EPOCH_CE_DAYS: i32 = 719_163;
57            let days = v.trunc() as i32;
58            match NaiveDate::from_num_days_from_ce_opt(UNIX_EPOCH_CE_DAYS.saturating_add(days)) {
59                Some(d) => d.format("%Y-%m-%d").to_string(),
60                None => format_axis_label(v),
61            }
62        }
63        XAxisTemporalKind::DatetimeUs => DateTime::from_timestamp_micros(v.trunc() as i64)
64            .map(|dt: DateTime<Utc>| dt.format("%Y-%m-%d %H:%M").to_string())
65            .unwrap_or_else(|| format_axis_label(v)),
66        XAxisTemporalKind::DatetimeMs => DateTime::from_timestamp_millis(v.trunc() as i64)
67            .map(|dt: DateTime<Utc>| dt.format("%Y-%m-%d %H:%M").to_string())
68            .unwrap_or_else(|| format_axis_label(v)),
69        XAxisTemporalKind::DatetimeNs => {
70            let millis = (v.trunc() as i64) / 1_000_000;
71            DateTime::from_timestamp_millis(millis)
72                .map(|dt: DateTime<Utc>| dt.format("%Y-%m-%d %H:%M").to_string())
73                .unwrap_or_else(|| format_axis_label(v))
74        }
75        XAxisTemporalKind::Time => {
76            let nsecs = v.trunc() as u64;
77            let secs = (nsecs / 1_000_000_000) as u32;
78            let subsec = (nsecs % 1_000_000_000) as u32;
79            match NaiveTime::from_num_seconds_from_midnight_opt(secs, subsec) {
80                Some(t) => t.format("%H:%M:%S").to_string(),
81                None => format_axis_label(v),
82            }
83        }
84    }
85}
86
87/// Result of loading only the x column: min/max for axis bounds and temporal kind.
88pub struct ChartXRangeResult {
89    pub x_min: f64,
90    pub x_max: f64,
91    pub x_axis_kind: XAxisTemporalKind,
92}
93
94/// Loads only the x column and returns its min/max (for axis display when no y is selected).
95/// Limits to `row_limit` rows then scans for min/max (single column materialized).
96pub fn prepare_chart_x_range(
97    lf: &LazyFrame,
98    schema: &Schema,
99    x_column: &str,
100    row_limit: usize,
101) -> Result<ChartXRangeResult> {
102    let x_dtype = schema
103        .get(x_column)
104        .ok_or_else(|| color_eyre::eyre::eyre!("x column '{}' not in schema", x_column))?;
105
106    let x_axis_kind = x_axis_temporal_kind(x_dtype);
107    let x_expr: Expr = match x_dtype {
108        DataType::Datetime(_, _) | DataType::Date | DataType::Time => {
109            col(x_column).cast(DataType::Int64)
110        }
111        _ => col(x_column).cast(DataType::Float64),
112    };
113
114    let df = lf
115        .clone()
116        .select([x_expr])
117        .drop_nulls(None)
118        .slice(0, row_limit as u32)
119        .collect()?;
120
121    let n_rows = df.height();
122    if n_rows == 0 {
123        return Ok(ChartXRangeResult {
124            x_min: 0.0,
125            x_max: 1.0,
126            x_axis_kind,
127        });
128    }
129
130    let x_series = df.column(x_column)?;
131    let x_f64 = match x_series.dtype() {
132        DataType::Int64 => x_series.cast(&DataType::Float64)?,
133        _ => x_series.clone(),
134    };
135    let x_f64 = x_f64.f64()?;
136
137    let mut x_min = f64::INFINITY;
138    let mut x_max = f64::NEG_INFINITY;
139    for i in 0..n_rows {
140        if let Some(v) = x_f64.get(i) {
141            if v.is_finite() {
142                x_min = x_min.min(v);
143                x_max = x_max.max(v);
144            }
145        }
146    }
147
148    let (x_min, x_max) = if x_max >= x_min {
149        (x_min, x_max)
150    } else {
151        (0.0, 1.0)
152    };
153
154    Ok(ChartXRangeResult {
155        x_min,
156        x_max,
157        x_axis_kind,
158    })
159}
160
161/// Result of preparing chart data: series points and x-axis kind for label formatting.
162pub struct ChartDataResult {
163    pub series: Vec<Vec<(f64, f64)>>,
164    pub x_axis_kind: XAxisTemporalKind,
165}
166
167/// Histogram bin (center and count).
168#[derive(Clone)]
169pub struct HistogramBin {
170    pub center: f64,
171    pub count: f64,
172}
173
174/// Histogram data for a single column.
175#[derive(Clone)]
176pub struct HistogramData {
177    pub column: String,
178    pub bins: Vec<HistogramBin>,
179    pub x_min: f64,
180    pub x_max: f64,
181    pub max_count: f64,
182}
183
184/// KDE series and bounds.
185#[derive(Clone)]
186pub struct KdeSeries {
187    pub name: String,
188    pub points: Vec<(f64, f64)>,
189}
190
191#[derive(Clone)]
192pub struct KdeData {
193    pub series: Vec<KdeSeries>,
194    pub x_min: f64,
195    pub x_max: f64,
196    pub y_max: f64,
197}
198
199/// Box plot stats for a column.
200#[derive(Clone)]
201pub struct BoxPlotStats {
202    pub name: String,
203    pub min: f64,
204    pub q1: f64,
205    pub median: f64,
206    pub q3: f64,
207    pub max: f64,
208}
209
210#[derive(Clone)]
211pub struct BoxPlotData {
212    pub stats: Vec<BoxPlotStats>,
213    pub y_min: f64,
214    pub y_max: f64,
215}
216
217/// Heatmap data for two numeric columns.
218#[derive(Clone)]
219pub struct HeatmapData {
220    pub x_column: String,
221    pub y_column: String,
222    pub x_min: f64,
223    pub x_max: f64,
224    pub y_min: f64,
225    pub y_max: f64,
226    pub x_bins: usize,
227    pub y_bins: usize,
228    pub counts: Vec<Vec<f64>>,
229    pub max_count: f64,
230}
231
232/// Prepares chart data from the current LazyFrame.
233/// Returns series data and x-axis kind. X is cast to f64 (temporal types as ordinal).
234/// Drops nulls and limits to `row_limit` rows.
235pub fn prepare_chart_data(
236    lf: &LazyFrame,
237    schema: &Schema,
238    x_column: &str,
239    y_columns: &[String],
240    row_limit: usize,
241) -> Result<ChartDataResult> {
242    if y_columns.is_empty() {
243        return Ok(ChartDataResult {
244            series: Vec::new(),
245            x_axis_kind: XAxisTemporalKind::Numeric,
246        });
247    }
248
249    let x_dtype = schema
250        .get(x_column)
251        .ok_or_else(|| color_eyre::eyre::eyre!("x column '{}' not in schema", x_column))?;
252
253    let x_axis_kind = x_axis_temporal_kind(x_dtype);
254
255    // X expr: cast to Float64; for Date/Datetime/Time cast to Int64 (ordinal), then cast to f64 after collect.
256    let x_expr: Expr = match x_dtype {
257        DataType::Datetime(_, _) | DataType::Date | DataType::Time => {
258            col(x_column).cast(DataType::Int64)
259        }
260        _ => col(x_column).cast(DataType::Float64),
261    };
262
263    let mut select_exprs = vec![x_expr];
264    for y in y_columns {
265        select_exprs.push(col(y.as_str()).cast(DataType::Float64));
266    }
267
268    let df = lf
269        .clone()
270        .select(select_exprs)
271        .drop_nulls(None)
272        .slice(0, row_limit as u32)
273        .collect()?;
274
275    let n_rows = df.height();
276    if n_rows == 0 {
277        return Ok(ChartDataResult {
278            series: vec![vec![]; y_columns.len()],
279            x_axis_kind,
280        });
281    }
282
283    let x_series = df.column(x_column)?;
284    let x_f64 = match x_series.dtype() {
285        DataType::Int64 => x_series.cast(&DataType::Float64)?,
286        _ => x_series.clone(),
287    };
288    let x_f64 = x_f64.f64()?;
289
290    let mut series_per_y: Vec<Vec<(f64, f64)>> = vec![Vec::with_capacity(n_rows); y_columns.len()];
291
292    for (yi, y_name) in y_columns.iter().enumerate() {
293        let y_series = df.column(y_name.as_str())?.f64()?;
294        for i in 0..n_rows {
295            let x_val = x_f64.get(i).unwrap_or(0.0);
296            let y_val = y_series.get(i).unwrap_or(0.0);
297            if x_val.is_finite() && y_val.is_finite() {
298                series_per_y[yi].push((x_val, y_val));
299            }
300        }
301    }
302
303    Ok(ChartDataResult {
304        series: series_per_y,
305        x_axis_kind,
306    })
307}
308
309fn collect_numeric_values(lf: &LazyFrame, column: &str, row_limit: usize) -> Result<Vec<f64>> {
310    let df = lf
311        .clone()
312        .select([col(column).cast(DataType::Float64)])
313        .drop_nulls(None)
314        .slice(0, row_limit as u32)
315        .collect()?;
316    let series = df.column(column)?.f64()?;
317    let mut values = Vec::with_capacity(series.len());
318    for i in 0..series.len() {
319        if let Some(v) = series.get(i) {
320            if v.is_finite() {
321                values.push(v);
322            }
323        }
324    }
325    Ok(values)
326}
327
328/// Collect multiple numeric columns in a single scan. Returns one Vec<f64> per column (finite values only).
329fn collect_numeric_columns(
330    lf: &LazyFrame,
331    columns: &[&str],
332    row_limit: usize,
333) -> Result<Vec<Vec<f64>>> {
334    if columns.is_empty() {
335        return Ok(Vec::new());
336    }
337    let select_exprs: Vec<Expr> = columns
338        .iter()
339        .map(|c| col(*c).cast(DataType::Float64))
340        .collect();
341    let df = lf
342        .clone()
343        .select(select_exprs)
344        .drop_nulls(None)
345        .slice(0, row_limit as u32)
346        .collect()?;
347    let mut out = Vec::with_capacity(columns.len());
348    for col_name in columns {
349        let series = df.column(col_name)?.f64()?;
350        let mut values = Vec::with_capacity(series.len());
351        for i in 0..series.len() {
352            if let Some(v) = series.get(i) {
353                if v.is_finite() {
354                    values.push(v);
355                }
356            }
357        }
358        out.push(values);
359    }
360    Ok(out)
361}
362
363fn collect_numeric_pairs(
364    lf: &LazyFrame,
365    x_column: &str,
366    y_column: &str,
367    row_limit: usize,
368) -> Result<Vec<(f64, f64)>> {
369    let df = lf
370        .clone()
371        .select([
372            col(x_column).cast(DataType::Float64),
373            col(y_column).cast(DataType::Float64),
374        ])
375        .drop_nulls(None)
376        .slice(0, row_limit as u32)
377        .collect()?;
378    let x_series = df.column(x_column)?.f64()?;
379    let y_series = df.column(y_column)?.f64()?;
380    let mut values = Vec::with_capacity(df.height());
381    for i in 0..df.height() {
382        let x_val = x_series.get(i).unwrap_or(0.0);
383        let y_val = y_series.get(i).unwrap_or(0.0);
384        if x_val.is_finite() && y_val.is_finite() {
385            values.push((x_val, y_val));
386        }
387    }
388    Ok(values)
389}
390
391/// Prepare histogram data for a numeric column.
392pub fn prepare_histogram_data(
393    lf: &LazyFrame,
394    column: &str,
395    bins: usize,
396    row_limit: usize,
397) -> Result<HistogramData> {
398    let mut values = collect_numeric_values(lf, column, row_limit)?;
399    if values.is_empty() {
400        return Ok(HistogramData {
401            column: column.to_string(),
402            bins: Vec::new(),
403            x_min: 0.0,
404            x_max: 1.0,
405            max_count: 0.0,
406        });
407    }
408    values.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
409    let (x_min, x_max) = match (values.first(), values.last()) {
410        (Some(a), Some(b)) => (*a, *b),
411        _ => {
412            return Ok(HistogramData {
413                column: column.to_string(),
414                bins: vec![],
415                x_min: 0.0,
416                x_max: 1.0,
417                max_count: 0.0,
418            });
419        }
420    };
421    let range = (x_max - x_min).abs();
422    let bin_count = bins.max(1);
423    if range <= f64::EPSILON {
424        return Ok(HistogramData {
425            column: column.to_string(),
426            bins: vec![HistogramBin {
427                center: x_min,
428                count: values.len() as f64,
429            }],
430            x_min: x_min - 0.5,
431            x_max: x_max + 0.5,
432            max_count: values.len() as f64,
433        });
434    }
435    let bin_width = range / bin_count as f64;
436    let mut counts = vec![0.0_f64; bin_count];
437    for v in values {
438        let mut idx = ((v - x_min) / bin_width).floor() as isize;
439        if idx < 0 {
440            idx = 0;
441        }
442        if idx as usize >= bin_count {
443            idx = bin_count.saturating_sub(1) as isize;
444        }
445        counts[idx as usize] += 1.0;
446    }
447    let bins: Vec<HistogramBin> = counts
448        .iter()
449        .enumerate()
450        .map(|(i, count)| HistogramBin {
451            center: x_min + (i as f64 + 0.5) * bin_width,
452            count: *count,
453        })
454        .collect();
455    let max_count = counts.iter().cloned().fold(0.0_f64, |a, b| a.max(b));
456    Ok(HistogramData {
457        column: column.to_string(),
458        bins,
459        x_min,
460        x_max,
461        max_count,
462    })
463}
464
465fn quantile(sorted: &[f64], q: f64) -> f64 {
466    if sorted.is_empty() {
467        return 0.0;
468    }
469    let n = sorted.len();
470    if n == 1 {
471        return sorted[0];
472    }
473    let pos = q.clamp(0.0, 1.0) * (n as f64 - 1.0);
474    let idx = pos.floor() as usize;
475    let next = pos.ceil() as usize;
476    if idx == next {
477        sorted[idx]
478    } else {
479        let lower = sorted[idx];
480        let upper = sorted[next];
481        let weight = pos - idx as f64;
482        lower + (upper - lower) * weight
483    }
484}
485
486/// Prepare box plot stats for one or more numeric columns. Uses a single collect for all columns.
487pub fn prepare_box_plot_data<T: AsRef<str>>(
488    lf: &LazyFrame,
489    columns: &[T],
490    row_limit: usize,
491) -> Result<BoxPlotData> {
492    if columns.is_empty() {
493        return Ok(BoxPlotData {
494            stats: Vec::new(),
495            y_min: 0.0,
496            y_max: 1.0,
497        });
498    }
499    let col_refs: Vec<&str> = columns.iter().map(|c| c.as_ref()).collect();
500    let columns_values = collect_numeric_columns(lf, &col_refs, row_limit)?;
501    let mut stats = Vec::new();
502    let mut y_min = f64::INFINITY;
503    let mut y_max = f64::NEG_INFINITY;
504    for (column, mut values) in col_refs.iter().zip(columns_values) {
505        if values.is_empty() {
506            continue;
507        }
508        values.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
509        let (min, max) = match (values.first(), values.last()) {
510            (Some(a), Some(b)) => (*a, *b),
511            _ => continue,
512        };
513        let q1 = quantile(&values, 0.25);
514        let median = quantile(&values, 0.5);
515        let q3 = quantile(&values, 0.75);
516        y_min = y_min.min(min);
517        y_max = y_max.max(max);
518        stats.push(BoxPlotStats {
519            name: (*column).to_string(),
520            min,
521            q1,
522            median,
523            q3,
524            max,
525        });
526    }
527    if stats.is_empty() {
528        return Ok(BoxPlotData {
529            stats,
530            y_min: 0.0,
531            y_max: 1.0,
532        });
533    }
534    if y_max <= y_min {
535        y_max = y_min + 1.0;
536    }
537    Ok(BoxPlotData {
538        stats,
539        y_min,
540        y_max,
541    })
542}
543
544fn kde_bandwidth(values: &[f64]) -> f64 {
545    if values.len() <= 1 {
546        return 1.0;
547    }
548    let n = values.len() as f64;
549    let mean = values.iter().sum::<f64>() / n;
550    let var = values.iter().map(|v| (v - mean).powi(2)).sum::<f64>() / n;
551    let std = var.sqrt();
552    if std <= f64::EPSILON {
553        return 1.0;
554    }
555    1.06 * std * n.powf(-0.2)
556}
557
558/// Prepare KDE data for one or more numeric columns. Uses a single collect for all columns.
559pub fn prepare_kde_data<T: AsRef<str>>(
560    lf: &LazyFrame,
561    columns: &[T],
562    bandwidth_factor: f64,
563    row_limit: usize,
564) -> Result<KdeData> {
565    if columns.is_empty() {
566        return Ok(KdeData {
567            series: Vec::new(),
568            x_min: 0.0,
569            x_max: 1.0,
570            y_max: 1.0,
571        });
572    }
573    let col_refs: Vec<&str> = columns.iter().map(|c| c.as_ref()).collect();
574    let columns_values = collect_numeric_columns(lf, &col_refs, row_limit)?;
575    let mut series = Vec::new();
576    let mut all_x_min = f64::INFINITY;
577    let mut all_x_max = f64::NEG_INFINITY;
578    let mut all_y_max = f64::NEG_INFINITY;
579    for (column, mut values) in col_refs.iter().zip(columns_values) {
580        if values.is_empty() {
581            continue;
582        }
583        values.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
584        let (min, max) = match (values.first(), values.last()) {
585            (Some(a), Some(b)) => (*a, *b),
586            _ => continue,
587        };
588        let base_bw = kde_bandwidth(&values);
589        let bandwidth = (base_bw * bandwidth_factor).max(f64::EPSILON);
590        let x_start = min - 3.0 * bandwidth;
591        let x_end = max + 3.0 * bandwidth;
592        let samples = 200_usize;
593        let step = (x_end - x_start) / (samples.saturating_sub(1).max(1) as f64);
594        let inv = 1.0 / ((values.len() as f64) * bandwidth * (2.0 * PI).sqrt());
595        let mut points = Vec::with_capacity(samples);
596        for i in 0..samples {
597            let x = x_start + i as f64 * step;
598            let mut sum = 0.0;
599            for &v in &values {
600                let u = (x - v) / bandwidth;
601                sum += (-0.5 * u * u).exp();
602            }
603            let y = inv * sum;
604            all_y_max = all_y_max.max(y);
605            points.push((x, y));
606        }
607        all_x_min = all_x_min.min(x_start);
608        all_x_max = all_x_max.max(x_end);
609        series.push(KdeSeries {
610            name: (*column).to_string(),
611            points,
612        });
613    }
614    if series.is_empty() {
615        return Ok(KdeData {
616            series,
617            x_min: 0.0,
618            x_max: 1.0,
619            y_max: 1.0,
620        });
621    }
622    if all_x_max <= all_x_min {
623        all_x_max = all_x_min + 1.0;
624    }
625    if all_y_max <= 0.0 {
626        all_y_max = 1.0;
627    }
628    Ok(KdeData {
629        series,
630        x_min: all_x_min,
631        x_max: all_x_max,
632        y_max: all_y_max,
633    })
634}
635
636/// Prepare heatmap data for two numeric columns.
637pub fn prepare_heatmap_data(
638    lf: &LazyFrame,
639    x_column: &str,
640    y_column: &str,
641    bins: usize,
642    row_limit: usize,
643) -> Result<HeatmapData> {
644    let pairs = collect_numeric_pairs(lf, x_column, y_column, row_limit)?;
645    if pairs.is_empty() {
646        return Ok(HeatmapData {
647            x_column: x_column.to_string(),
648            y_column: y_column.to_string(),
649            x_min: 0.0,
650            x_max: 1.0,
651            y_min: 0.0,
652            y_max: 1.0,
653            x_bins: bins,
654            y_bins: bins,
655            counts: vec![vec![0.0; bins.max(1)]; bins.max(1)],
656            max_count: 0.0,
657        });
658    }
659    let mut x_min = f64::INFINITY;
660    let mut x_max = f64::NEG_INFINITY;
661    let mut y_min = f64::INFINITY;
662    let mut y_max = f64::NEG_INFINITY;
663    for (x, y) in &pairs {
664        x_min = x_min.min(*x);
665        x_max = x_max.max(*x);
666        y_min = y_min.min(*y);
667        y_max = y_max.max(*y);
668    }
669    if x_max <= x_min {
670        x_max = x_min + 1.0;
671    }
672    if y_max <= y_min {
673        y_max = y_min + 1.0;
674    }
675    let x_bins = bins.max(1);
676    let y_bins = bins.max(1);
677    let mut counts = vec![vec![0.0_f64; x_bins]; y_bins];
678    let x_range = x_max - x_min;
679    let y_range = y_max - y_min;
680    for (x, y) in pairs {
681        let mut xi = ((x - x_min) / x_range * x_bins as f64).floor() as isize;
682        let mut yi = ((y - y_min) / y_range * y_bins as f64).floor() as isize;
683        if xi < 0 {
684            xi = 0;
685        }
686        if yi < 0 {
687            yi = 0;
688        }
689        if xi as usize >= x_bins {
690            xi = x_bins.saturating_sub(1) as isize;
691        }
692        if yi as usize >= y_bins {
693            yi = y_bins.saturating_sub(1) as isize;
694        }
695        counts[yi as usize][xi as usize] += 1.0;
696    }
697    let max_count = counts
698        .iter()
699        .flat_map(|row| row.iter())
700        .cloned()
701        .fold(0.0_f64, |a, b| a.max(b));
702    Ok(HeatmapData {
703        x_column: x_column.to_string(),
704        y_column: y_column.to_string(),
705        x_min,
706        x_max,
707        y_min,
708        y_max,
709        x_bins,
710        y_bins,
711        counts,
712        max_count,
713    })
714}
715
716#[cfg(test)]
717mod tests {
718    use super::{prepare_chart_data, XAxisTemporalKind};
719    use polars::prelude::*;
720
721    #[test]
722    fn prepare_empty_y_columns() {
723        let lf = df!("x" => &[1.0_f64, 2.0], "y" => &[10.0, 20.0])
724            .unwrap()
725            .lazy();
726        let schema = lf.clone().collect_schema().unwrap();
727        let result = prepare_chart_data(&lf, schema.as_ref(), "x", &[], 10_000).unwrap();
728        assert!(result.series.is_empty());
729        assert_eq!(result.x_axis_kind, XAxisTemporalKind::Numeric);
730    }
731
732    #[test]
733    fn prepare_small_data() {
734        let lf = df!(
735            "x" => &[1.0_f64, 2.0, 3.0],
736            "a" => &[10.0_f64, 20.0, 30.0],
737            "b" => &[100.0_f64, 200.0, 300.0]
738        )
739        .unwrap()
740        .lazy();
741        let schema = lf.clone().collect_schema().unwrap();
742        let result =
743            prepare_chart_data(&lf, schema.as_ref(), "x", &["a".into(), "b".into()], 10_000)
744                .unwrap();
745        assert_eq!(result.series.len(), 2);
746        assert_eq!(
747            result.series[0],
748            vec![(1.0, 10.0), (2.0, 20.0), (3.0, 30.0)]
749        );
750        assert_eq!(
751            result.series[1],
752            vec![(1.0, 100.0), (2.0, 200.0), (3.0, 300.0)]
753        );
754        assert_eq!(result.x_axis_kind, XAxisTemporalKind::Numeric);
755    }
756
757    #[test]
758    fn prepare_skips_nan() {
759        let lf = df!(
760            "x" => &[1.0_f64, 2.0, 3.0],
761            "y" => &[10.0_f64, f64::NAN, 30.0]
762        )
763        .unwrap()
764        .lazy();
765        let schema = lf.clone().collect_schema().unwrap();
766        let result = prepare_chart_data(&lf, schema.as_ref(), "x", &["y".into()], 10_000).unwrap();
767        assert_eq!(result.series[0].len(), 2);
768        assert_eq!(result.series[0], vec![(1.0, 10.0), (3.0, 30.0)]);
769    }
770
771    #[test]
772    fn prepare_missing_x_column_errors() {
773        let lf = df!("x" => &[1.0_f64], "y" => &[2.0_f64]).unwrap().lazy();
774        let schema = lf.clone().collect_schema().unwrap();
775        let result = prepare_chart_data(&lf, schema.as_ref(), "missing", &["y".into()], 10_000);
776        assert!(result.is_err());
777    }
778}
779
780#[cfg(test)]
781mod x_range_tests {
782    use super::{prepare_chart_x_range, XAxisTemporalKind};
783    use polars::prelude::*;
784
785    #[test]
786    fn prepare_x_range_numeric() {
787        let lf = df!("x" => &[10.0_f64, 20.0, 5.0, 30.0]).unwrap().lazy();
788        let schema = lf.clone().collect_schema().unwrap();
789        let r = prepare_chart_x_range(&lf, schema.as_ref(), "x", 10_000).unwrap();
790        assert_eq!(r.x_min, 5.0);
791        assert_eq!(r.x_max, 30.0);
792        assert_eq!(r.x_axis_kind, XAxisTemporalKind::Numeric);
793    }
794
795    #[test]
796    fn prepare_x_range_empty_returns_placeholder() {
797        let lf = df!("x" => &[1.0_f64]).unwrap().lazy().slice(0, 0);
798        let schema = lf.clone().collect_schema().unwrap();
799        let r = prepare_chart_x_range(&lf, schema.as_ref(), "x", 10_000).unwrap();
800        assert_eq!(r.x_min, 0.0);
801        assert_eq!(r.x_max, 1.0);
802    }
803}