1use clap::ValueEnum;
2use std::collections::HashMap;
3use std::error::Error;
4use std::fmt;
5use std::fmt::{Display, Formatter};
6use std::iter::zip;
7use std::num::ParseFloatError;
8
9const MARGIN: f64 = 0.0;
10
11#[derive(Debug)]
12pub enum CanvasError {
13 OutOfRange(String),
15 NoData,
17}
18
19impl Display for CanvasError {
20 fn fmt(&self, f: &mut Formatter) -> fmt::Result {
21 match self {
22 Self::OutOfRange(s) => write!(f, "Canvas Error: Out of range {}", s),
23 Self::NoData => write!(f, "Canvas Error: No Data"),
24 }
25 }
26}
27
28impl Error for CanvasError {}
29
30#[derive(Debug)]
31pub enum DatasetError {
32 NoData,
34 InvalidData(String),
36}
37
38impl Display for DatasetError {
39 fn fmt(&self, f: &mut Formatter) -> fmt::Result {
40 match self {
41 Self::NoData => write!(f, "Data Error: No data"),
42 Self::InvalidData(s) => write!(f, "Data Error: Invalid Data: {}", s),
43 }
44 }
45}
46
47impl Error for DatasetError {}
48
49impl From<ParseFloatError> for DatasetError {
50 fn from(err: ParseFloatError) -> DatasetError {
51 Self::InvalidData(err.to_string())
52 }
53}
54
55#[derive(Default, Debug)]
57pub struct Canvas {
58 cells: Vec<Vec<u8>>,
60 width: usize,
61 height: usize,
62 x_range: (f64, f64),
63 y_range: (f64, f64),
64}
65
66impl Canvas {
67 pub fn new() -> Self {
68 Self::from_size(25, 80)
69 }
70
71 fn from_size(height: usize, width: usize) -> Self {
72 Self {
73 cells: (0..height).map(|_| vec![b' '; width]).collect(),
74 width,
75 height,
76 x_range: (0.0, 0.0),
77 y_range: (0.0, 0.0),
78 }
79 }
80
81 pub fn clear(&mut self) {
83 self.cells = (0..self.height).map(|_| vec![b' '; self.width]).collect()
84 }
85
86 fn set_x_range(&mut self, x_min: f64, x_max: f64) {
87 assert!(x_min < x_max);
88 let delta = x_max - x_min;
89 let x_range = (x_min - MARGIN * delta, x_max + MARGIN * delta);
90 let cell_width = (x_range.1 - x_range.0) / self.width as f64;
91 self.x_range = (
92 x_min - (MARGIN * delta) - cell_width,
93 x_max + (MARGIN * delta) + cell_width,
94 );
95 }
96
97 fn set_y_range(&mut self, y_min: f64, y_max: f64) {
98 assert!(y_min < y_max);
99 let delta = y_max - y_min;
100 let y_range = (y_min - MARGIN * delta, y_max + MARGIN * delta);
101 let cell_width = (y_range.1 - y_range.0) / self.height as f64;
102 self.y_range = (
103 y_min - MARGIN * delta - 2.0 * cell_width,
104 y_max + MARGIN * delta,
105 );
106 }
107
108 fn set_cell(&mut self, line: usize, column: usize, value: u8) -> Result<(), CanvasError> {
110 if let Some(cell) = self
111 .cells
112 .get_mut(line)
113 .unwrap_or(&mut Vec::new())
114 .get_mut(column)
115 {
116 *cell = value;
117 Ok(())
118 } else {
119 Err(CanvasError::OutOfRange(format!(
120 "try to write in ({}, {}) (Canvas size: ({}, {}))",
121 line, column, &self.height, &self.width
122 )))
123 }
124 }
125
126 fn get_mut_cell(&mut self, line: usize, column: usize) -> Option<&mut u8> {
127 todo!()
128 }
129
130 fn draw_value(&mut self, x: f64, y: f64, value: u8) -> Result<(), CanvasError> {
132 self.set_cell(
133 self.height - get_cell(y, self.y_range.0, self.y_range.1, self.height)?,
134 get_cell(x, self.x_range.0, self.x_range.1, self.width)?,
135 value,
136 )
137 }
138
139 fn get_mut_value(&mut self, x: f64, y: f64) -> Option<&mut u8> {
140 let offset = get_cell(y, self.y_range.0, self.y_range.1, self.height).ok()?;
141 let line = self.height - offset;
142 let column = get_cell(x, self.x_range.0, self.x_range.1, self.width).ok()?;
143 self.get_mut_cell(line, column)
144 }
145
146 fn draw_axes(&mut self) -> Result<(), CanvasError> {
148 let y_axis_location = match get_cell(0.0, self.x_range.0, self.x_range.1, self.width) {
149 Ok(u) => u,
150 _ => {
151 if self.x_range.1 < 0.0 {
152 self.width - 1
153 } else {
154 0
155 }
156 }
157 };
158 let x_axis_location = match get_cell(0.0, self.y_range.0, self.y_range.1, self.height) {
159 Ok(u) => u,
160 _ => {
161 if self.y_range.1 < 0.0 {
162 0
163 } else {
164 self.height - 1
165 }
166 }
167 };
168 for cell in 0..self.width {
169 let c = match (cell as i32 - y_axis_location as i32) % 5 {
170 0 => b'+',
171 _ => b'-',
172 };
173 self.set_cell(x_axis_location, cell, c)?;
174 }
175 for cell in 0..self.height {
176 let c = match (cell as i32 - x_axis_location as i32) % 5 {
177 0 => b'+',
178 _ => b'|',
179 };
180 self.set_cell(cell, y_axis_location, c)?;
181 }
182 self.set_cell(x_axis_location, y_axis_location, b'+')?;
183 Ok(())
184 }
185}
186
187fn get_cell(x: f64, x_min: f64, x_max: f64, width: usize) -> Result<usize, CanvasError> {
189 assert!(x_max > x_min);
190 if x < x_min || x > x_max {
191 Err(CanvasError::OutOfRange(format!("{x_min} < {x} < {x_max}")))
192 } else {
193 Ok(((width - 1) as f64 / (x_max - x_min) * (x - x_min)).round() as usize)
194 }
195}
196
197impl Display for Canvas {
198 fn fmt(&self, f: &mut Formatter) -> Result<(), std::fmt::Error> {
199 for line in &self.cells {
200 writeln!(
201 f,
202 "{}",
203 line.iter().map(|&c| { c as char }).collect::<String>()
204 )?;
205 }
206 Ok(())
207 }
208}
209
210#[derive(Debug, Default, Copy, Clone, ValueEnum)]
211pub enum PlotKind {
212 #[default]
214 Point,
215
216 Boxplot,
218
219 CDF,
221
222 Histogram,
224}
225
226impl Display for PlotKind {
227 fn fmt(&self, f: &mut Formatter) -> fmt::Result {
228 match self {
229 PlotKind::Point => write!(f, "point"),
230 PlotKind::Boxplot => write!(f, "boxplot"),
231 PlotKind::CDF => write!(f, "cdf"),
232 PlotKind::Histogram => write!(f, "histogram"),
233 }
234 }
235}
236
237#[derive(Debug, Default, Clone, PartialEq)]
238pub struct Quantiles {
239 min: f64,
240 q1: f64,
241 q2: f64,
242 q3: f64,
243 max: f64,
244 outliers: Vec<f64>,
245}
246
247impl Quantiles {
248 pub fn from_slice(input: &[f64]) -> Self {
249 let inter_quartiles = 1.5;
250 let mut x: Vec<f64> = input.iter().filter(|a| !a.is_nan()).copied().collect();
251 assert!(
252 !x.is_empty(),
253 "not enough valid values in input ({input:?})"
254 );
255 x.sort_by(|a, b| a.partial_cmp(b).unwrap());
256 let [q1, q2, q3] =
257 [0.25, 0.5, 0.75].map(|q| get_value(x.as_slice(), get_index(q, x.len())).unwrap());
258 let lower = q2 - inter_quartiles * (q3 - q1);
259 let upper = q2 + inter_quartiles * (q3 - q1);
260 Self {
261 min: x
262 .iter()
263 .filter(|&a| *a > lower)
264 .copied()
265 .min_by(|a, b| a.partial_cmp(b).unwrap())
266 .unwrap(),
267 q1,
268 q2,
269 q3,
270 max: x
271 .iter()
272 .filter(|&a| *a < upper)
273 .copied()
274 .max_by(|a, b| a.partial_cmp(b).unwrap())
275 .unwrap(),
276 outliers: x
277 .iter()
278 .filter(|&a| *a < lower || *a > upper)
279 .copied()
280 .collect(),
281 }
282 }
283
284 pub fn draw_into(&self, canvas: &mut Canvas, height: usize) -> Result<(), CanvasError> {
286 assert!(canvas.height >= height + 3);
287 let [min, q1, q2, q3, max] = [self.min, self.q1, self.q2, self.q3, self.max]
288 .map(|x| get_cell(x, canvas.x_range.0, canvas.x_range.1, canvas.width));
289 let outliers = self
290 .outliers
291 .iter()
292 .map(|&x| get_cell(x, canvas.x_range.0, canvas.x_range.1, canvas.width))
293 .collect::<Vec<_>>();
294
295 let (q1, q2, q3) = (q1?, q2?, q3?);
296 let (min, max) = (min?, max?);
297 for x in (min + 1)..q1 {
298 canvas.set_cell(height + 1, x, b'-')?;
299 }
300 for x in (q3 + 1)..max {
301 canvas.set_cell(height + 1, x, b'-')?;
302 }
303 for x in outliers {
304 canvas.set_cell(height + 1, x?, b'+')?;
305 }
306 for x in q1..q3 {
307 canvas.set_cell(height, x, b'-')?;
308 canvas.set_cell(height + 2, x, b'-')?;
309 }
310 for x in [min, q1, q2, q3, max] {
311 canvas.set_cell(height + 1, x, b'|')?;
312 }
313 Ok(())
314 }
315}
316
317#[derive(Debug, Default)]
318pub struct CDF {
319 steps: Vec<(f64, f64)>,
320}
321
322impl CDF {
323 pub fn from_vec(input: Vec<f64>) -> Self {
324 let step = 1.0 / (input.len() as f64);
325 let mut steps: Vec<(f64, f64)> = Vec::new();
326 let mut input: Vec<f64> = input.iter().filter(|y| !y.is_nan()).copied().collect();
327 input.sort_by(|a, b| a.partial_cmp(b).unwrap());
328 let mut cur = 0.0;
329 for y in input {
330 cur += step;
331 if let Some(point) = steps.iter_mut().find(|elt| elt.0 == y) {
332 point.1 = cur;
333 } else {
334 steps.push((y, cur));
335 }
336 }
337 Self { steps }
338 }
339
340 pub fn draw_into(&self, canvas: &mut Canvas, symbole: u8) -> Result<(), CanvasError> {
341 let delta = (canvas.x_range.1 - canvas.x_range.0) / canvas.width as f64;
342 for c in 0..=canvas.width {
343 let x = canvas.x_range.0 + delta * c as f64;
344 let y = self.get_value(x);
345 canvas.draw_value(x, y, symbole)?;
346 }
347 Ok(())
348 }
349
350 fn get_value(&self, x: f64) -> f64 {
352 let mut y = 0.0;
353 for p in &self.steps {
354 if p.0 < x {
355 y = p.1;
356 }
357 }
358 y
359 }
360}
361
362#[derive(Debug, Default)]
363pub struct Histogram {
364 bins: Vec<f64>,
366 values: Vec<usize>,
368}
369
370impl Histogram {
371 pub fn from_vec(input: Vec<f64>) -> Self {
372 let bin_nb = 10;
373 if input.is_empty() {
374 return Self::default();
375 }
376 let first = input[0];
377 let (x_min, x_max) = input
378 .iter()
379 .copied()
380 .fold((first, first), |(mi, ma), x| (x.min(mi), x.max(ma)));
381 let x_max = x_max + 0.001 * (x_max - x_min);
382 let mut hist = Histogram::default();
383 hist.reset_bins(x_min, x_max, bin_nb);
384 hist.add_values(&input);
385 hist
386 }
387
388 fn get_bin(&self, x: f64) -> Option<usize> {
390 if x.is_nan() {
391 return None;
392 }
393 if let Some(first) = self.bins.first() {
394 if &x < first || &x > self.bins.last().expect("at least one item") {
395 None
396 } else {
397 for (idx, b) in self.bins.iter().skip(1).enumerate() {
398 if x < *b {
399 return Some(idx);
400 }
401 }
402 unreachable!()
403 }
404 } else {
405 None
406 }
407 }
408
409 pub fn draw_into(&self, canvas: &mut Canvas, label: u8) -> Result<(), CanvasError> {
410 let step = (canvas.x_range.1 - canvas.x_range.0) / (canvas.width as f64);
411 let start = canvas.x_range.0;
412 let xs = (0..canvas.width).map(|a| start + a as f64 * step);
413 for x in xs {
414 canvas.draw_value(x, self.get_value(x).unwrap(), label)?
415 }
416 Ok(())
417 }
418
419 fn get_value(&self, x: f64) -> Option<f64> {
422 if self.bins.is_empty() || self.values.is_empty() {
423 return None;
424 }
425 if let Some(b) = self.get_bin(x) {
426 self.values.get(b).map(|&x| x as f64)
427 } else {
428 Some(0.0)
429 }
430 }
431
432 fn get_frequency(&self, x: f64) -> Option<f64> {
434 let nb = self.values.iter().sum::<usize>() as f64;
435 self.get_value(x).map(|x| x / nb)
436 }
437
438 fn reset_bins(&mut self, x_min: f64, x_max: f64, bin_nb: usize) {
440 if bin_nb == 0 {
441 panic!("bin_nb should not be 0");
442 }
443 let bin_size = (x_max - x_min) / (bin_nb as f64);
444 self.bins = (0..=bin_nb).map(|x| x_min + x as f64 * bin_size).collect();
445 self.values = vec![0; bin_nb];
446 }
447
448 fn add_values(&mut self, input: &[f64]) {
449 for &x in input.iter() {
450 let idx = self.get_bin(x).unwrap();
451 *self.values.get_mut(idx).unwrap() += 1;
452 }
453 }
454}
455
456#[derive(Debug, Default)]
457pub struct DataSet {
458 dataset: HashMap<String, Vec<(f64, f64)>>,
460}
461
462impl DataSet {
463 pub fn from_csv(content: &str) -> Result<Self, DatasetError> {
472 let sep = ',';
473 let mut dataset = Self::default();
474 let mut lines = content.lines();
475 let headers: Vec<_> = lines
476 .next()
477 .ok_or(DatasetError::NoData)?
478 .split(sep)
479 .map(|l| String::from(l.replace('"', "").trim()))
480 .skip(1)
481 .collect();
482 for line in lines {
483 let mut values = line
484 .split(sep)
485 .map(|l| String::from(l.replace('"', "").trim()));
486 let x = values
487 .next()
488 .expect("first column (indexes) should exist")
489 .parse()?;
490 for (label, y) in zip(headers.clone(), values) {
491 dataset
492 .dataset
493 .entry(label)
494 .or_default()
495 .push((x, y.parse()?));
496 }
497 }
498 Ok(dataset)
499 }
500
501 pub fn add_points(&mut self, dataset: String, points: Vec<(f64, f64)>) {
502 self.dataset
503 .entry(dataset)
504 .or_default()
505 .extend(points.iter())
506 }
507
508 pub fn draw_into(&self, canvas: &mut Canvas, kind: PlotKind) -> Result<(), CanvasError> {
509 match kind {
510 PlotKind::Point => self.draw_point(canvas),
511 PlotKind::Boxplot => self.draw_boxplot(canvas),
512 PlotKind::CDF => self.draw_cdf(canvas),
513 PlotKind::Histogram => self.draw_histogram(canvas),
514 }
515 }
516
517 fn draw_point(&self, canvas: &mut Canvas) -> Result<(), CanvasError> {
518 self.reset_canvas_range(canvas)?;
520 canvas.draw_axes()?;
521
522 for (label, points) in self.dataset.iter() {
524 let l = label.bytes().next().unwrap();
526 for point in points {
527 if point.0.is_nan() || point.1.is_nan() {
528 continue;
529 }
530 canvas.draw_value(point.0, point.1, l)?;
531 }
532 }
533 Ok(())
534 }
535
536 fn draw_boxplot(&self, canvas: &mut Canvas) -> Result<(), CanvasError> {
537 let mut height = 0;
539 for dataset in self.dataset.values() {
540 let q = Quantiles::from_slice(&dataset.iter().map(|x| x.1).collect::<Vec<_>>());
541 q.draw_into(canvas, height)?;
542 height += 4
543 }
544 Ok(())
545 }
546
547 fn draw_cdf(&self, canvas: &mut Canvas) -> Result<(), CanvasError> {
548 canvas.y_range = (-0.1, 1.1);
550 canvas.draw_axes()?;
551 for (label, data) in &self.dataset {
552 let cdf = CDF::from_vec(data.iter().map(|x| x.1).collect());
553 cdf.draw_into(
554 canvas,
555 label.bytes().next().expect("label should not be empty"),
556 )?
557 }
558 Ok(())
559 }
560
561 fn draw_histogram(&self, canvas: &mut Canvas) -> Result<(), CanvasError> {
562 let hists: HashMap<String, Histogram> = self
563 .dataset
564 .iter()
565 .map(|(label, dataset)| {
566 (
567 label.clone(),
568 Histogram::from_vec(
569 dataset
570 .iter()
571 .map(|x| x.1)
572 .filter(|x| !x.is_nan())
573 .collect(),
574 ),
575 )
576 })
577 .collect();
578
579 let (x_min, x_max) = hists
581 .values()
582 .map(|h| {
583 (
584 *h.bins.first().expect("dataset should not be empty"),
585 *h.bins.last().unwrap(),
586 )
587 })
588 .reduce(|(a, b), (c, d)| (a.min(c), b.max(d)))
589 .unwrap();
590 let y_max = hists
591 .values()
592 .map(|h| h.values.clone().into_iter().fold(0, |acc, x| acc.max(x)))
593 .reduce(|acc, b| acc.max(b))
594 .unwrap() as f64;
595 let y_min = -y_max / 20.0;
596 canvas.x_range = (x_min, x_max);
597 canvas.y_range = (y_min, y_max);
598
599 for (l, h) in hists.iter() {
600 h.draw_into(
601 canvas,
602 l.bytes()
603 .next()
604 .expect("label should be at least one letter long"),
605 )?
606 }
607 Ok(())
608 }
609
610 fn reset_canvas_range(&self, canvas: &mut Canvas) -> Result<(), CanvasError> {
611 let mut points = self.dataset.values().flatten();
612 let first = points.next().ok_or(CanvasError::NoData)?;
613 let (x_min, x_max, y_min, y_max) = points.fold(
614 (first.0, first.0, first.1, first.1),
615 |(x0, x1, y0, y1), p| (x0.min(p.0), x1.max(p.0), y0.min(p.1), y1.max(p.1)),
616 );
617 canvas.set_x_range(x_min, x_max);
618 canvas.set_y_range(y_min, y_max);
619 Ok(())
620 }
621
622 fn get_quantiles(&self) -> HashMap<String, Option<Quantiles>> {
624 todo!()
625 }
626
627 fn get_cumulatives(&self) -> HashMap<String, Option<Vec<(f64, f64)>>> {
630 todo!()
631 }
632}
633
634fn get_index(quantile: f64, length: usize) -> f64 {
635 quantile * length as f64
636}
637
638fn get_value(x: &[f64], idx: f64) -> Option<f64> {
643 if idx + 1.0 > x.len() as f64 {
644 return None;
645 }
646 assert!(x.len() as f64 >= idx);
647 if idx == x.len() as f64 - 1.0 {
648 return Some(*x.last().unwrap());
649 }
650 let f = idx.fract();
651 let i = idx.floor() as usize;
652 Some((1.0 - f) * x[i] + f * (x[i + 1]))
653}
654
655#[cfg(test)]
656mod tests {
657 use super::*;
658 #[test]
659 fn value_getter() {
660 let v = [-1.0, 1.0];
661 assert_eq!(get_value(&v, 0.0).unwrap(), -1.0);
662 assert_eq!(get_value(&v, 1.0).unwrap(), 1.0);
663 assert_eq!(get_value(&v, 0.5).unwrap(), 0.0);
664 assert_eq!(get_value(&v, 0.25).unwrap(), -0.5);
665
666 let v = [-1.0, 1.0, 2.0];
667 assert!(get_value(&v, 2.1).is_none());
668 assert_eq!(get_value(&v, 0.0).unwrap(), -1.0);
669 assert_eq!(get_value(&v, 1.0).unwrap(), 1.0);
670 assert_eq!(get_value(&v, 0.25).unwrap(), -0.5);
671 assert_eq!(get_value(&v, 0.5).unwrap(), 0.0);
672
673 let v = [];
674 assert!(get_value(&v, 0.25).is_none());
675 }
676
677 #[test]
678 fn quantiles() {
679 let v = [1.0, 3.0, 4.0, 0.0, 2.0];
680 let q = Quantiles::from_slice(&v);
681 assert_eq!(
682 q,
683 Quantiles {
684 min: 0.0,
685 q1: 1.25,
686 q2: 2.5,
687 q3: 3.75,
688 max: 4.0,
689 outliers: Vec::new(),
690 }
691 );
692 }
693
694 #[test]
695 fn dataset_csv() {
696 let text = r#"
697 , A , B , "C"
698 -1 , 0 , 1 , 3
699 -5 , 1 , -2, 4
700 "#
701 .trim();
702 let dataset = DataSet::from_csv(text).unwrap();
703 assert!(dataset.dataset.len() == 3);
704 }
705
706 #[test]
707 fn hist_empty() {
708 let hist = Histogram::default();
709 assert!(hist.get_value(0.0).is_none());
710 }
711
712 #[test]
713 fn hist_values() {
714 let values = [-1.0, 0.0, 0.0, 0.1, 0.2, 10.0];
715 let hist = Histogram::from_vec(values.into());
716 assert_eq!(hist.get_value(0.0), Some(4.0));
717 assert_eq!(hist.get_value(11.0), Some(0.0));
718 assert_eq!(hist.get_value(5.0), Some(0.0));
719 assert_eq!(hist.get_value(1.0), Some(1.0));
720 }
721}