1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
// Copyright Claudio Mattera 2021.
// Distributed under the MIT License or Apache 2.0 License at your option.
// See accompanying files License-MIT.txt and License-Apache-2.0, or online at
// https://opensource.org/licenses/MIT
// https://opensource.org/licenses/Apache-2.0

//! Dummy dataframe implementation

use std::collections::HashMap;
use std::convert::TryFrom;
use std::fmt;

use chrono::{DateTime, Utc};

use rinfluxdb_types::{DataFrameError, Value};

/// Column type
#[derive(Clone, Debug, PartialEq)]
pub enum Column {
    /// A column of floating point values
    Float(Vec<f64>),

    /// A column of integer values
    Integer(Vec<i64>),

    /// A column of unsigned integer values
    UnsignedInteger(Vec<u64>),

    /// A column of string values
    String(Vec<String>),

    /// A column of boolean values
    Boolean(Vec<bool>),

    /// A column of datetime values
    Timestamp(Vec<DateTime<Utc>>),
}

impl Column {
    fn display_index(&self, index: usize, f: &mut fmt::Formatter) -> fmt::Result {
        match self {
            Column::Float(values) => write!(f, "{:16}  ", values[index])?,
            Column::Integer(values) => write!(f, "{:16}  ", values[index])?,
            Column::UnsignedInteger(values) => write!(f, "{:16}  ", values[index])?,
            Column::String(values) => write!(f, "{:16}  ", values[index])?,
            Column::Boolean(values) => write!(f, "{:16}  ", values[index])?,
            Column::Timestamp(values) => write!(f, "{:16}  ", values[index])?,
        }

        Ok(())
    }
}

/// A time-indexed dataframe
///
/// A dataframe contains multiple named columns indexed by the same index.
#[derive(Clone, Debug)]
pub struct DataFrame {
    name: String,
    index: Vec<DateTime<Utc>>,
    columns: HashMap<String, Column>,
}

impl fmt::Display for DataFrame {
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
        write!(f, "{:>23}  ", "datetime")?;
        for column in self.columns.keys() {
            write!(f, "{:>16}  ", column)?;
        }
        write!(f, "\n-----------------------  ")?;
        for _column in self.columns.keys() {
            write!(f, "----------------  ")?;
        }
        writeln!(f)?;

        for (i, index) in self.index.iter().enumerate() {
            write!(f, "{:>23}  ", index)?;
            for column in self.columns.values() {
                column.display_index(i, f)?;
            }
            writeln!(f)?;
        }

        Ok(())
    }
}

impl TryFrom<(String, Vec<DateTime<Utc>>, HashMap<String, Vec<Value>>)> for DataFrame {
    type Error = DataFrameError;

    fn try_from(
        (name, index, columns): (String, Vec<DateTime<Utc>>, HashMap<String, Vec<Value>>),
    ) -> Result<Self, Self::Error> {
        let columns: HashMap<String, Result<Column, Self::Error>> = columns
            .into_iter()
            .map(|(name, column)| {
                let column = match column.first() {
                    Some(Value::Float(_)) => Ok(Column::Float(
                        column
                            .into_iter()
                            .map(|element| element.into_f64())
                            .collect(),
                    )),
                    Some(Value::Integer(_)) => Ok(Column::Integer(
                        column
                            .into_iter()
                            .map(|element| element.into_i64())
                            .collect(),
                    )),
                    Some(Value::UnsignedInteger(_)) => Ok(Column::UnsignedInteger(
                        column
                            .into_iter()
                            .map(|element| element.into_u64())
                            .collect(),
                    )),
                    Some(Value::String(_)) => Ok(Column::String(
                        column
                            .into_iter()
                            .map(|element| element.into_string())
                            .collect(),
                    )),
                    Some(Value::Boolean(_)) => Ok(Column::Boolean(
                        column
                            .into_iter()
                            .map(|element| element.into_boolean())
                            .collect(),
                    )),
                    Some(Value::Timestamp(_)) => Ok(Column::Timestamp(
                        column
                            .into_iter()
                            .map(|element| element.into_timestamp())
                            .collect(),
                    )),
                    None => Err(DataFrameError::Creation),
                };
                (name, column)
            })
            .collect();

        let columns = flatten_map(columns)?;

        Ok(Self {
            name,
            index,
            columns,
        })
    }
}

fn flatten_map<K, V, E>(map: HashMap<K, Result<V, E>>) -> Result<HashMap<K, V>, E>
where
    K: Eq + std::hash::Hash,
    E: std::error::Error,
{
    map.into_iter()
        .try_fold(HashMap::new(), |mut accumulator, (name, column)| {
            let column = column?;
            accumulator.insert(name, column);
            Ok(accumulator)
        })
}