locustdb 0.3.4

Embeddable high-performance analytics database.
Documentation
use std::result::Result;

#[derive(Clone, Eq, PartialEq, Hash, Debug)]
pub struct Schema {
    pub column_names: Option<Vec<String>>,
    pub column_schemas: Vec<ColumnSchema>,
}

#[derive(Clone, Eq, PartialEq, Hash, Debug)]
pub struct ColumnSchema {
    pub types: ColumnType,
    pub transformation: Option<ColumnTransformation>,
}

#[derive(Clone, Eq, PartialEq, Hash, Debug)]
pub enum ColumnType {
    String,
    Integer,
    NullableString,
    NullableInteger,
    Drop,
}

#[derive(Clone, Eq, PartialEq, Hash, Debug)]
pub enum ColumnTransformation {
    Multiply100,
    Multiply1000,
    Date,
}


impl Schema {
    pub fn parse(s: &str) -> Result<Schema, String> {
        let mut column_names = Vec::new();
        let mut column_schemas = Vec::new();
        let columns = s.split(',');
        for column in columns {
            let segments = column.split(':').collect::<Vec<_>>();
            if segments.is_empty() {
                column_schemas.push(ColumnSchema::drop_column());
            } else if segments.len() == 1 {
                column_schemas.push(ColumnSchema::parse(segments[0])?);
            } else if segments.len() == 2 {
                column_names.push(segments[0].to_string());
                column_schemas.push(ColumnSchema::parse(segments[1])?);
            } else {
                return Err(format!("Expected at most one `:` in {}.", column));
            }
        }
        if !column_names.is_empty() && column_names.len() != column_schemas.len() {
            return Err("Must specify names for all columns, or for none.".to_string());
        }
        Ok(Schema {
            column_names: if column_names.is_empty() { None } else { Some(column_names) },
            column_schemas,
        })
    }
}

impl ColumnSchema {
    fn parse(s: &str) -> Result<ColumnSchema, String> {
        let segments = s.split('.').collect::<Vec<_>>();
        let (stype, stransform) = if segments.len() == 1 {
            (segments[0].to_string(), "".to_string())
        } else if segments.len() == 2 {
            (segments[0].to_string(), segments[1].to_string())
        } else {
            return Err(format!("Expected at most one `.` in {}.", s));
        };
        let types = match stype.as_ref() {
            "integer" | "int" | "i" => ColumnType::Integer,
            "ninteger" | "nint" | "ni" => ColumnType::NullableInteger,
            "string" | "s" => ColumnType::String,
            "nstring" | "ns" => ColumnType::NullableString,
            "" => ColumnType::Drop,
            _ => return Err(format!("Unrecognized type {}.", s))
        };
        let transformation = match stransform.as_ref() {
            "date" => Some(ColumnTransformation::Date),
            "100" => Some(ColumnTransformation::Multiply100),
            "1000" => Some(ColumnTransformation::Multiply1000),
            _ => None,
        };
        Ok(ColumnSchema {
            types,
            transformation,
        })
    }

    fn drop_column() -> ColumnSchema {
        ColumnSchema {
            types: ColumnType::Drop,
            transformation: None,
        }
    }
}

#[cfg(test)]
mod tests {
    use crate::ingest::nyc_taxi_data::nyc_schema;

    use super::*;

    #[test]
    fn test_parse_schema() {
        let expected = Ok(Schema {
            column_names: None,
            column_schemas: vec![
                ColumnSchema { types: ColumnType::Integer, transformation: None },
                ColumnSchema { types: ColumnType::NullableString, transformation: None },
                ColumnSchema { types: ColumnType::String, transformation: None },
                ColumnSchema { types: ColumnType::NullableInteger, transformation: None },
            ],
        });
        let actual = Schema::parse("i,ns,string,nint");
        assert_eq!(expected, actual);
        assert!(Schema::parse(&nyc_schema()).is_ok(), format!("{:?}", Schema::parse(&nyc_schema())));
    }
}