qdplot/
lib.rs

1use clap::ValueEnum;
2use std::collections::HashMap;
3use std::error::Error;
4use std::fmt;
5use std::fmt::{Display, Formatter};
6use std::iter::zip;
7use std::num::ParseFloatError;
8
9const MARGIN: f64 = 0.0;
10
11#[derive(Debug)]
12pub enum CanvasError {
13    /// try to write out of range
14    OutOfRange(String),
15    /// No data to plot
16    NoData,
17}
18
19impl Display for CanvasError {
20    fn fmt(&self, f: &mut Formatter) -> fmt::Result {
21        match self {
22            Self::OutOfRange(s) => write!(f, "Canvas Error: Out of range {}", s),
23            Self::NoData => write!(f, "Canvas Error: No Data"),
24        }
25    }
26}
27
28impl Error for CanvasError {}
29
30#[derive(Debug)]
31pub enum DatasetError {
32    /// NoData
33    NoData,
34    /// Invalid data
35    InvalidData(String),
36}
37
38impl Display for DatasetError {
39    fn fmt(&self, f: &mut Formatter) -> fmt::Result {
40        match self {
41            Self::NoData => write!(f, "Data Error: No data"),
42            Self::InvalidData(s) => write!(f, "Data Error: Invalid Data: {}", s),
43        }
44    }
45}
46
47impl Error for DatasetError {}
48
49impl From<ParseFloatError> for DatasetError {
50    fn from(err: ParseFloatError) -> DatasetError {
51        Self::InvalidData(err.to_string())
52    }
53}
54
55/// Where to plot
56#[derive(Default, Debug)]
57pub struct Canvas {
58    /// Vec<line: Vec<u8>>
59    cells: Vec<Vec<u8>>,
60    width: usize,
61    height: usize,
62    x_range: (f64, f64),
63    y_range: (f64, f64),
64}
65
66impl Canvas {
67    pub fn new() -> Self {
68        Self::from_size(25, 80)
69    }
70
71    fn from_size(height: usize, width: usize) -> Self {
72        Self {
73            cells: (0..height).map(|_| vec![b' '; width]).collect(),
74            width,
75            height,
76            x_range: (0.0, 0.0),
77            y_range: (0.0, 0.0),
78        }
79    }
80
81    /// Remove drawing
82    pub fn clear(&mut self) {
83        self.cells = (0..self.height).map(|_| vec![b' '; self.width]).collect()
84    }
85
86    fn set_x_range(&mut self, x_min: f64, x_max: f64) {
87        assert!(x_min < x_max);
88        let delta = x_max - x_min;
89        let x_range = (x_min - MARGIN * delta, x_max + MARGIN * delta);
90        let cell_width = (x_range.1 - x_range.0) / self.width as f64;
91        self.x_range = (
92            x_min - (MARGIN * delta) - cell_width,
93            x_max + (MARGIN * delta) + cell_width,
94        );
95    }
96
97    fn set_y_range(&mut self, y_min: f64, y_max: f64) {
98        assert!(y_min < y_max);
99        let delta = y_max - y_min;
100        let y_range = (y_min - MARGIN * delta, y_max + MARGIN * delta);
101        let cell_width = (y_range.1 - y_range.0) / self.height as f64;
102        self.y_range = (
103            y_min - MARGIN * delta - 2.0 * cell_width,
104            y_max + MARGIN * delta,
105        );
106    }
107
108    /// Put a specific value in a specific cell
109    fn set_cell(&mut self, line: usize, column: usize, value: u8) -> Result<(), CanvasError> {
110        if let Some(cell) = self
111            .cells
112            .get_mut(line)
113            .unwrap_or(&mut Vec::new())
114            .get_mut(column)
115        {
116            *cell = value;
117            Ok(())
118        } else {
119            Err(CanvasError::OutOfRange(format!(
120                "try to write in ({}, {}) (Canvas size: ({}, {}))",
121                line, column, &self.height, &self.width
122            )))
123        }
124    }
125
126    fn get_mut_cell(&mut self, line: usize, column: usize) -> Option<&mut u8> {
127        todo!()
128    }
129
130    /// Put a specific value with specific coordinates in the canvas
131    fn draw_value(&mut self, x: f64, y: f64, value: u8) -> Result<(), CanvasError> {
132        self.set_cell(
133            self.height - get_cell(y, self.y_range.0, self.y_range.1, self.height)?,
134            get_cell(x, self.x_range.0, self.x_range.1, self.width)?,
135            value,
136        )
137    }
138
139    fn get_mut_value(&mut self, x: f64, y: f64) -> Option<&mut u8> {
140        let offset = get_cell(y, self.y_range.0, self.y_range.1, self.height).ok()?;
141        let line = self.height - offset;
142        let column = get_cell(x, self.x_range.0, self.x_range.1, self.width).ok()?;
143        self.get_mut_cell(line, column)
144    }
145
146    /// Draw axes
147    fn draw_axes(&mut self) -> Result<(), CanvasError> {
148        let y_axis_location = match get_cell(0.0, self.x_range.0, self.x_range.1, self.width) {
149            Ok(u) => u,
150            _ => {
151                if self.x_range.1 < 0.0 {
152                    self.width - 1
153                } else {
154                    0
155                }
156            }
157        };
158        let x_axis_location = match get_cell(0.0, self.y_range.0, self.y_range.1, self.height) {
159            Ok(u) => u,
160            _ => {
161                if self.y_range.1 < 0.0 {
162                    0
163                } else {
164                    self.height - 1
165                }
166            }
167        };
168        for cell in 0..self.width {
169            let c = match (cell as i32 - y_axis_location as i32) % 5 {
170                0 => b'+',
171                _ => b'-',
172            };
173            self.set_cell(x_axis_location, cell, c)?;
174        }
175        for cell in 0..self.height {
176            let c = match (cell as i32 - x_axis_location as i32) % 5 {
177                0 => b'+',
178                _ => b'|',
179            };
180            self.set_cell(cell, y_axis_location, c)?;
181        }
182        self.set_cell(x_axis_location, y_axis_location, b'+')?;
183        Ok(())
184    }
185}
186
187/// Get cell coordinate to write to
188fn get_cell(x: f64, x_min: f64, x_max: f64, width: usize) -> Result<usize, CanvasError> {
189    assert!(x_max > x_min);
190    if x < x_min || x > x_max {
191        Err(CanvasError::OutOfRange(format!("{x_min} < {x} < {x_max}")))
192    } else {
193        Ok(((width - 1) as f64 / (x_max - x_min) * (x - x_min)).round() as usize)
194    }
195}
196
197impl Display for Canvas {
198    fn fmt(&self, f: &mut Formatter) -> Result<(), std::fmt::Error> {
199        for line in &self.cells {
200            writeln!(
201                f,
202                "{}",
203                line.iter().map(|&c| { c as char }).collect::<String>()
204            )?;
205        }
206        Ok(())
207    }
208}
209
210#[derive(Debug, Default, Copy, Clone, ValueEnum)]
211pub enum PlotKind {
212    /// Points
213    #[default]
214    Point,
215
216    /// Boxplot, highliting quantiles and outliers
217    Boxplot,
218
219    /// Cumulative distribution function
220    CDF,
221
222    /// Histogram
223    Histogram,
224}
225
226impl Display for PlotKind {
227    fn fmt(&self, f: &mut Formatter) -> fmt::Result {
228        match self {
229            PlotKind::Point => write!(f, "point"),
230            PlotKind::Boxplot => write!(f, "boxplot"),
231            PlotKind::CDF => write!(f, "cdf"),
232            PlotKind::Histogram => write!(f, "histogram"),
233        }
234    }
235}
236
237#[derive(Debug, Default, Clone, PartialEq)]
238pub struct Quantiles {
239    min: f64,
240    q1: f64,
241    q2: f64,
242    q3: f64,
243    max: f64,
244    outliers: Vec<f64>,
245}
246
247impl Quantiles {
248    pub fn from_slice(input: &[f64]) -> Self {
249        let inter_quartiles = 1.5;
250        let mut x: Vec<f64> = input.iter().filter(|a| !a.is_nan()).copied().collect();
251        assert!(
252            !x.is_empty(),
253            "not enough valid values in input ({input:?})"
254        );
255        x.sort_by(|a, b| a.partial_cmp(b).unwrap());
256        let [q1, q2, q3] =
257            [0.25, 0.5, 0.75].map(|q| get_value(x.as_slice(), get_index(q, x.len())).unwrap());
258        let lower = q2 - inter_quartiles * (q3 - q1);
259        let upper = q2 + inter_quartiles * (q3 - q1);
260        Self {
261            min: x
262                .iter()
263                .filter(|&a| *a > lower)
264                .copied()
265                .min_by(|a, b| a.partial_cmp(b).unwrap())
266                .unwrap(),
267            q1,
268            q2,
269            q3,
270            max: x
271                .iter()
272                .filter(|&a| *a < upper)
273                .copied()
274                .max_by(|a, b| a.partial_cmp(b).unwrap())
275                .unwrap(),
276            outliers: x
277                .iter()
278                .filter(|&a| *a < lower || *a > upper)
279                .copied()
280                .collect(),
281        }
282    }
283
284    /// Draw a horizontal boxplot on the canvas from lines height to height+3
285    pub fn draw_into(&self, canvas: &mut Canvas, height: usize) -> Result<(), CanvasError> {
286        assert!(canvas.height >= height + 3);
287        let [min, q1, q2, q3, max] = [self.min, self.q1, self.q2, self.q3, self.max]
288            .map(|x| get_cell(x, canvas.x_range.0, canvas.x_range.1, canvas.width));
289        let outliers = self
290            .outliers
291            .iter()
292            .map(|&x| get_cell(x, canvas.x_range.0, canvas.x_range.1, canvas.width))
293            .collect::<Vec<_>>();
294
295        let (q1, q2, q3) = (q1?, q2?, q3?);
296        let (min, max) = (min?, max?);
297        for x in (min + 1)..q1 {
298            canvas.set_cell(height + 1, x, b'-')?;
299        }
300        for x in (q3 + 1)..max {
301            canvas.set_cell(height + 1, x, b'-')?;
302        }
303        for x in outliers {
304            canvas.set_cell(height + 1, x?, b'+')?;
305        }
306        for x in q1..q3 {
307            canvas.set_cell(height, x, b'-')?;
308            canvas.set_cell(height + 2, x, b'-')?;
309        }
310        for x in [min, q1, q2, q3, max] {
311            canvas.set_cell(height + 1, x, b'|')?;
312        }
313        Ok(())
314    }
315}
316
317#[derive(Debug, Default)]
318pub struct CDF {
319    steps: Vec<(f64, f64)>,
320}
321
322impl CDF {
323    pub fn from_vec(input: Vec<f64>) -> Self {
324        let step = 1.0 / (input.len() as f64);
325        let mut steps: Vec<(f64, f64)> = Vec::new();
326        let mut input: Vec<f64> = input.iter().filter(|y| !y.is_nan()).copied().collect();
327        input.sort_by(|a, b| a.partial_cmp(b).unwrap());
328        let mut cur = 0.0;
329        for y in input {
330            cur += step;
331            if let Some(point) = steps.iter_mut().find(|elt| elt.0 == y) {
332                point.1 = cur;
333            } else {
334                steps.push((y, cur));
335            }
336        }
337        Self { steps }
338    }
339
340    pub fn draw_into(&self, canvas: &mut Canvas, symbole: u8) -> Result<(), CanvasError> {
341        let delta = (canvas.x_range.1 - canvas.x_range.0) / canvas.width as f64;
342        for c in 0..=canvas.width {
343            let x = canvas.x_range.0 + delta * c as f64;
344            let y = self.get_value(x);
345            canvas.draw_value(x, y, symbole)?;
346        }
347        Ok(())
348    }
349
350    /// Get the value of the CDF evaluted on x
351    fn get_value(&self, x: f64) -> f64 {
352        let mut y = 0.0;
353        for p in &self.steps {
354            if p.0 < x {
355                y = p.1;
356            }
357        }
358        y
359    }
360}
361
362#[derive(Debug, Default)]
363pub struct Histogram {
364    /// Bins boundaries
365    bins: Vec<f64>,
366    /// number of sample per bins
367    values: Vec<usize>,
368}
369
370impl Histogram {
371    pub fn from_vec(input: Vec<f64>) -> Self {
372        let bin_nb = 10;
373        if input.is_empty() {
374            return Self::default();
375        }
376        let first = input[0];
377        let (x_min, x_max) = input
378            .iter()
379            .copied()
380            .fold((first, first), |(mi, ma), x| (x.min(mi), x.max(ma)));
381        let x_max = x_max + 0.001 * (x_max - x_min);
382        let mut hist = Histogram::default();
383        hist.reset_bins(x_min, x_max, bin_nb);
384        hist.add_values(&input);
385        hist
386    }
387
388    /// get bin number into which the value should go.
389    fn get_bin(&self, x: f64) -> Option<usize> {
390        if x.is_nan() {
391            return None;
392        }
393        if let Some(first) = self.bins.first() {
394            if &x < first || &x > self.bins.last().expect("at least one item") {
395                None
396            } else {
397                for (idx, b) in self.bins.iter().skip(1).enumerate() {
398                    if x < *b {
399                        return Some(idx);
400                    }
401                }
402                unreachable!()
403            }
404        } else {
405            None
406        }
407    }
408
409    pub fn draw_into(&self, canvas: &mut Canvas, label: u8) -> Result<(), CanvasError> {
410        let step = (canvas.x_range.1 - canvas.x_range.0) / (canvas.width as f64);
411        let start = canvas.x_range.0;
412        let xs = (0..canvas.width).map(|a| start + a as f64 * step);
413        for x in xs {
414            canvas.draw_value(x, self.get_value(x).unwrap(), label)?
415        }
416        Ok(())
417    }
418
419    /// Get the value of the histogram at specific value
420    /// Return None if the Historgram is not initialized
421    fn get_value(&self, x: f64) -> Option<f64> {
422        if self.bins.is_empty() || self.values.is_empty() {
423            return None;
424        }
425        if let Some(b) = self.get_bin(x) {
426            self.values.get(b).map(|&x| x as f64)
427        } else {
428            Some(0.0)
429        }
430    }
431
432    /// Get the normalized value of the histogram at specific value
433    fn get_frequency(&self, x: f64) -> Option<f64> {
434        let nb = self.values.iter().sum::<usize>() as f64;
435        self.get_value(x).map(|x| x / nb)
436    }
437
438    /// Compute bins boundaries.
439    fn reset_bins(&mut self, x_min: f64, x_max: f64, bin_nb: usize) {
440        if bin_nb == 0 {
441            panic!("bin_nb should not be 0");
442        }
443        let bin_size = (x_max - x_min) / (bin_nb as f64);
444        self.bins = (0..=bin_nb).map(|x| x_min + x as f64 * bin_size).collect();
445        self.values = vec![0; bin_nb];
446    }
447
448    fn add_values(&mut self, input: &[f64]) {
449        for &x in input.iter() {
450            let idx = self.get_bin(x).unwrap();
451            *self.values.get_mut(idx).unwrap() += 1;
452        }
453    }
454}
455
456#[derive(Debug, Default)]
457pub struct DataSet {
458    /// label: list of points
459    dataset: HashMap<String, Vec<(f64, f64)>>,
460}
461
462impl DataSet {
463    /// Build the dataset from the content of a csv file
464    ///
465    /// the content looks like
466    /// ```plaintext
467    ///      , A , B , "C"
468    ///  -1  , 0 , 1 , 3
469    ///  -5  , 1 , -2, 4
470    /// ```
471    pub fn from_csv(content: &str) -> Result<Self, DatasetError> {
472        let sep = ',';
473        let mut dataset = Self::default();
474        let mut lines = content.lines();
475        let headers: Vec<_> = lines
476            .next()
477            .ok_or(DatasetError::NoData)?
478            .split(sep)
479            .map(|l| String::from(l.replace('"', "").trim()))
480            .skip(1)
481            .collect();
482        for line in lines {
483            let mut values = line
484                .split(sep)
485                .map(|l| String::from(l.replace('"', "").trim()));
486            let x = values
487                .next()
488                .expect("first column (indexes) should exist")
489                .parse()?;
490            for (label, y) in zip(headers.clone(), values) {
491                dataset
492                    .dataset
493                    .entry(label)
494                    .or_default()
495                    .push((x, y.parse()?));
496            }
497        }
498        Ok(dataset)
499    }
500
501    pub fn add_points(&mut self, dataset: String, points: Vec<(f64, f64)>) {
502        self.dataset
503            .entry(dataset)
504            .or_default()
505            .extend(points.iter())
506    }
507
508    pub fn draw_into(&self, canvas: &mut Canvas, kind: PlotKind) -> Result<(), CanvasError> {
509        match kind {
510            PlotKind::Point => self.draw_point(canvas),
511            PlotKind::Boxplot => self.draw_boxplot(canvas),
512            PlotKind::CDF => self.draw_cdf(canvas),
513            PlotKind::Histogram => self.draw_histogram(canvas),
514        }
515    }
516
517    fn draw_point(&self, canvas: &mut Canvas) -> Result<(), CanvasError> {
518        // TODO check if range already set
519        self.reset_canvas_range(canvas)?;
520        canvas.draw_axes()?;
521
522        // TODO add labels
523        for (label, points) in self.dataset.iter() {
524            // TODO: use correct labels
525            let l = label.bytes().next().unwrap();
526            for point in points {
527                if point.0.is_nan() || point.1.is_nan() {
528                    continue;
529                }
530                canvas.draw_value(point.0, point.1, l)?;
531            }
532        }
533        Ok(())
534    }
535
536    fn draw_boxplot(&self, canvas: &mut Canvas) -> Result<(), CanvasError> {
537        // TODO set canvas size
538        let mut height = 0;
539        for dataset in self.dataset.values() {
540            let q = Quantiles::from_slice(&dataset.iter().map(|x| x.1).collect::<Vec<_>>());
541            q.draw_into(canvas, height)?;
542            height += 4
543        }
544        Ok(())
545    }
546
547    fn draw_cdf(&self, canvas: &mut Canvas) -> Result<(), CanvasError> {
548        // TODO: set canvas size
549        canvas.y_range = (-0.1, 1.1);
550        canvas.draw_axes()?;
551        for (label, data) in &self.dataset {
552            let cdf = CDF::from_vec(data.iter().map(|x| x.1).collect());
553            cdf.draw_into(
554                canvas,
555                label.bytes().next().expect("label should not be empty"),
556            )?
557        }
558        Ok(())
559    }
560
561    fn draw_histogram(&self, canvas: &mut Canvas) -> Result<(), CanvasError> {
562        let hists: HashMap<String, Histogram> = self
563            .dataset
564            .iter()
565            .map(|(label, dataset)| {
566                (
567                    label.clone(),
568                    Histogram::from_vec(
569                        dataset
570                            .iter()
571                            .map(|x| x.1)
572                            .filter(|x| !x.is_nan())
573                            .collect(),
574                    ),
575                )
576            })
577            .collect();
578
579        // set canvas ranges
580        let (x_min, x_max) = hists
581            .values()
582            .map(|h| {
583                (
584                    *h.bins.first().expect("dataset should not be empty"),
585                    *h.bins.last().unwrap(),
586                )
587            })
588            .reduce(|(a, b), (c, d)| (a.min(c), b.max(d)))
589            .unwrap();
590        let y_max = hists
591            .values()
592            .map(|h| h.values.clone().into_iter().fold(0, |acc, x| acc.max(x)))
593            .reduce(|acc, b| acc.max(b))
594            .unwrap() as f64;
595        let y_min = -y_max / 20.0;
596        canvas.x_range = (x_min, x_max);
597        canvas.y_range = (y_min, y_max);
598
599        for (l, h) in hists.iter() {
600            h.draw_into(
601                canvas,
602                l.bytes()
603                    .next()
604                    .expect("label should be at least one letter long"),
605            )?
606        }
607        Ok(())
608    }
609
610    fn reset_canvas_range(&self, canvas: &mut Canvas) -> Result<(), CanvasError> {
611        let mut points = self.dataset.values().flatten();
612        let first = points.next().ok_or(CanvasError::NoData)?;
613        let (x_min, x_max, y_min, y_max) = points.fold(
614            (first.0, first.0, first.1, first.1),
615            |(x0, x1, y0, y1), p| (x0.min(p.0), x1.max(p.0), y0.min(p.1), y1.max(p.1)),
616        );
617        canvas.set_x_range(x_min, x_max);
618        canvas.set_y_range(y_min, y_max);
619        Ok(())
620    }
621
622    /// Get quantiles for each dataset
623    fn get_quantiles(&self) -> HashMap<String, Option<Quantiles>> {
624        todo!()
625    }
626
627    /// Get cumulative distribution for each dataset
628    /// Return points where the distribution changes
629    fn get_cumulatives(&self) -> HashMap<String, Option<Vec<(f64, f64)>>> {
630        todo!()
631    }
632}
633
634fn get_index(quantile: f64, length: usize) -> f64 {
635    quantile * length as f64
636}
637
638/// Get value at specific non-integer index
639///
640/// Return a weighted sum of previous and next values
641/// The nearest from an index, the most weight this index has
642fn get_value(x: &[f64], idx: f64) -> Option<f64> {
643    if idx + 1.0 > x.len() as f64 {
644        return None;
645    }
646    assert!(x.len() as f64 >= idx);
647    if idx == x.len() as f64 - 1.0 {
648        return Some(*x.last().unwrap());
649    }
650    let f = idx.fract();
651    let i = idx.floor() as usize;
652    Some((1.0 - f) * x[i] + f * (x[i + 1]))
653}
654
655#[cfg(test)]
656mod tests {
657    use super::*;
658    #[test]
659    fn value_getter() {
660        let v = [-1.0, 1.0];
661        assert_eq!(get_value(&v, 0.0).unwrap(), -1.0);
662        assert_eq!(get_value(&v, 1.0).unwrap(), 1.0);
663        assert_eq!(get_value(&v, 0.5).unwrap(), 0.0);
664        assert_eq!(get_value(&v, 0.25).unwrap(), -0.5);
665
666        let v = [-1.0, 1.0, 2.0];
667        assert!(get_value(&v, 2.1).is_none());
668        assert_eq!(get_value(&v, 0.0).unwrap(), -1.0);
669        assert_eq!(get_value(&v, 1.0).unwrap(), 1.0);
670        assert_eq!(get_value(&v, 0.25).unwrap(), -0.5);
671        assert_eq!(get_value(&v, 0.5).unwrap(), 0.0);
672
673        let v = [];
674        assert!(get_value(&v, 0.25).is_none());
675    }
676
677    #[test]
678    fn quantiles() {
679        let v = [1.0, 3.0, 4.0, 0.0, 2.0];
680        let q = Quantiles::from_slice(&v);
681        assert_eq!(
682            q,
683            Quantiles {
684                min: 0.0,
685                q1: 1.25,
686                q2: 2.5,
687                q3: 3.75,
688                max: 4.0,
689                outliers: Vec::new(),
690            }
691        );
692    }
693
694    #[test]
695    fn dataset_csv() {
696        let text = r#"
697         , A , B , "C"
698        -1  , 0 , 1 , 3
699        -5  , 1 , -2, 4
700    "#
701        .trim();
702        let dataset = DataSet::from_csv(text).unwrap();
703        assert!(dataset.dataset.len() == 3);
704    }
705
706    #[test]
707    fn hist_empty() {
708        let hist = Histogram::default();
709        assert!(hist.get_value(0.0).is_none());
710    }
711
712    #[test]
713    fn hist_values() {
714        let values = [-1.0, 0.0, 0.0, 0.1, 0.2, 10.0];
715        let hist = Histogram::from_vec(values.into());
716        assert_eq!(hist.get_value(0.0), Some(4.0));
717        assert_eq!(hist.get_value(11.0), Some(0.0));
718        assert_eq!(hist.get_value(5.0), Some(0.0));
719        assert_eq!(hist.get_value(1.0), Some(1.0));
720    }
721}