use serde::{Deserialize, Serialize};
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub enum DataType {
Int64,
Float64,
Bool,
Utf8,
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct Field {
pub name: String,
pub data_type: DataType,
}
impl Field {
pub fn new(name: impl Into<String>, data_type: DataType) -> Self {
Self {
name: name.into(),
data_type,
}
}
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct Schema {
pub fields: Vec<Field>,
}
impl Schema {
pub fn new(fields: Vec<Field>) -> Self {
Self { fields }
}
pub fn field_names(&self) -> impl Iterator<Item = &str> {
self.fields.iter().map(|f| f.name.as_str())
}
pub fn index_of(&self, name: &str) -> Option<usize> {
self.fields.iter().position(|f| f.name == name)
}
}
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub enum Value {
Null,
Int64(i64),
Float64(f64),
Bool(bool),
Utf8(String),
}
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct DataSet {
pub schema: Schema,
pub rows: Vec<Vec<Value>>,
}
impl DataSet {
pub fn new(schema: Schema, rows: Vec<Vec<Value>>) -> Self {
Self { schema, rows }
}
pub fn row_count(&self) -> usize {
self.rows.len()
}
pub fn filter_rows<F>(&self, mut predicate: F) -> Self
where
F: FnMut(&[Value]) -> bool,
{
let rows = self
.rows
.iter()
.filter(|row| predicate(row.as_slice()))
.cloned()
.collect();
Self {
schema: self.schema.clone(),
rows,
}
}
pub fn map_rows<F>(&self, mut mapper: F) -> Self
where
F: FnMut(&[Value]) -> Vec<Value>,
{
let expected_len = self.schema.fields.len();
let rows = self
.rows
.iter()
.map(|row| {
let out = mapper(row.as_slice());
assert!(
out.len() == expected_len,
"mapped row length {} does not match schema length {}",
out.len(),
expected_len
);
out
})
.collect();
Self {
schema: self.schema.clone(),
rows,
}
}
pub fn reduce_rows<A, F>(&self, init: A, mut reducer: F) -> A
where
F: FnMut(A, &[Value]) -> A,
{
self.rows
.iter()
.fold(init, |acc, row| reducer(acc, row.as_slice()))
}
}