use std::fmt;
use std::ops::Index;
use chrono::{NaiveDate, NaiveDateTime, NaiveTime};
use crate::error::{Error, Result};
use super::format::{Format, FormatParseError};
use super::iter::{ColumnNames, IntoIter, Iter, IterMut};
use super::newtypes::{DomainCode, Label, VariableName};
#[derive(Debug, Clone, PartialEq)]
pub struct Dataset {
domain_code: DomainCode,
dataset_label: Option<Label>,
columns: Vec<Column>,
nrows: usize,
}
impl Dataset {
#[must_use = "this returns a Result that should be handled"]
pub fn new(domain_code: impl Into<DomainCode>, columns: Vec<Column>) -> Result<Self> {
let nrows = columns.first().map_or(0, Column::len);
for col in &columns {
if col.len() != nrows {
return Err(Error::ColumnLengthMismatch {
column_name: col.name().to_string(),
actual: col.len(),
expected: nrows,
});
}
}
Ok(Self {
domain_code: domain_code.into(),
dataset_label: None,
columns,
nrows,
})
}
pub fn with_label(
domain_code: impl Into<DomainCode>,
dataset_label: impl Into<Label>,
columns: Vec<Column>,
) -> Result<Self> {
let mut dataset = Self::new(domain_code, columns)?;
dataset.dataset_label = Some(dataset_label.into());
Ok(dataset)
}
#[must_use]
pub fn ncols(&self) -> usize {
self.columns.len()
}
#[must_use]
pub fn is_empty(&self) -> bool {
self.nrows == 0
}
#[must_use]
pub fn domain_code(&self) -> &str {
self.domain_code.as_str()
}
#[must_use]
pub fn dataset_label(&self) -> Option<&str> {
self.dataset_label.as_ref().map(Label::as_str)
}
pub fn set_label(&mut self, label: impl Into<Label>) {
self.dataset_label = Some(label.into());
}
#[must_use]
pub fn columns(&self) -> &[Column] {
&self.columns
}
#[must_use]
pub fn nrows(&self) -> usize {
self.nrows
}
#[must_use]
pub fn iter(&self) -> Iter<'_> {
Iter::new(&self.columns)
}
#[must_use]
pub fn iter_mut(&mut self) -> IterMut<'_> {
IterMut::new(&mut self.columns)
}
#[must_use]
pub fn column_names(&self) -> ColumnNames<'_> {
ColumnNames::new(&self.columns)
}
#[must_use]
pub fn column(&self, name: &str) -> Option<&Column> {
self.columns.iter().find(|c| c.name() == name)
}
}
impl IntoIterator for Dataset {
type Item = Column;
type IntoIter = IntoIter;
fn into_iter(self) -> Self::IntoIter {
IntoIter::new(self.columns)
}
}
impl<'a> IntoIterator for &'a Dataset {
type Item = &'a Column;
type IntoIter = Iter<'a>;
fn into_iter(self) -> Self::IntoIter {
self.iter()
}
}
impl<'a> IntoIterator for &'a mut Dataset {
type Item = &'a mut Column;
type IntoIter = IterMut<'a>;
fn into_iter(self) -> Self::IntoIter {
self.iter_mut()
}
}
impl Index<usize> for Dataset {
type Output = Column;
fn index(&self, index: usize) -> &Self::Output {
&self.columns[index]
}
}
impl Index<&str> for Dataset {
type Output = Column;
fn index(&self, name: &str) -> &Self::Output {
self.columns
.iter()
.find(|c| c.name() == name)
.unwrap_or_else(|| panic!("no column named '{}'", name))
}
}
impl Extend<Column> for Dataset {
fn extend<T: IntoIterator<Item = Column>>(&mut self, iter: T) {
for col in iter {
if self.nrows == 0 && self.columns.is_empty() {
self.nrows = col.len();
} else if col.len() != self.nrows {
panic!(
"column '{}' has length {} but dataset has {} rows",
col.name(),
col.len(),
self.nrows
);
}
self.columns.push(col);
}
}
}
impl fmt::Display for Dataset {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(
f,
"{} [{} rows × {} cols]",
self.domain_code.as_str(),
self.nrows,
self.columns.len()
)
}
}
#[derive(Debug, Clone, PartialEq)]
pub struct Column {
name: VariableName,
role: Option<VariableRole>,
data: ColumnData,
label: Option<Label>,
format: Option<Format>,
informat: Option<Format>,
length: Option<usize>,
}
impl Column {
#[must_use]
pub fn new(name: impl Into<VariableName>, data: ColumnData) -> Self {
Self {
name: name.into(),
role: None,
data,
label: None,
format: None,
informat: None,
length: None,
}
}
#[must_use]
pub fn with_role(name: impl Into<VariableName>, role: VariableRole, data: ColumnData) -> Self {
Self {
name: name.into(),
role: Some(role),
data,
label: None,
format: None,
informat: None,
length: None,
}
}
#[must_use]
pub fn with_label(mut self, label: impl Into<Label>) -> Self {
self.label = Some(label.into());
self
}
#[must_use]
pub fn with_format(mut self, format: Format) -> Self {
self.format = Some(format);
self
}
pub fn with_format_str(mut self, format: &str) -> std::result::Result<Self, FormatParseError> {
self.format = Some(Format::parse(format)?);
Ok(self)
}
#[must_use]
pub fn with_informat(mut self, informat: Format) -> Self {
self.informat = Some(informat);
self
}
#[must_use]
pub fn with_length(mut self, length: usize) -> Self {
self.length = Some(length);
self
}
#[must_use]
pub fn len(&self) -> usize {
self.data.len()
}
#[must_use]
pub fn is_empty(&self) -> bool {
self.data.is_empty()
}
#[must_use]
pub fn name(&self) -> &str {
self.name.as_str()
}
#[must_use]
pub fn role(&self) -> Option<VariableRole> {
self.role
}
#[must_use]
pub fn label(&self) -> Option<&str> {
self.label.as_ref().map(Label::as_str)
}
#[must_use]
pub fn format(&self) -> Option<&Format> {
self.format.as_ref()
}
#[must_use]
pub fn informat(&self) -> Option<&Format> {
self.informat.as_ref()
}
#[must_use]
pub fn explicit_length(&self) -> Option<usize> {
self.length
}
#[must_use]
pub fn data(&self) -> &ColumnData {
&self.data
}
#[must_use]
pub fn is_numeric(&self) -> bool {
self.data.is_numeric()
}
#[must_use]
pub fn is_character(&self) -> bool {
self.data.is_character()
}
}
impl fmt::Display for Column {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let type_name = match &self.data {
ColumnData::F64(_) => "F64",
ColumnData::I64(_) => "I64",
ColumnData::Bool(_) => "Bool",
ColumnData::String(_) => "String",
ColumnData::Bytes(_) => "Bytes",
ColumnData::Date(_) => "Date",
ColumnData::DateTime(_) => "DateTime",
ColumnData::Time(_) => "Time",
};
write!(f, "{} ({})", self.name.as_str(), type_name)
}
}
#[derive(Debug, Clone, PartialEq)]
#[non_exhaustive]
pub enum ColumnData {
F64(Vec<Option<f64>>),
I64(Vec<Option<i64>>),
Bool(Vec<Option<bool>>),
String(Vec<Option<String>>),
Bytes(Vec<Option<Vec<u8>>>),
Date(Vec<Option<NaiveDate>>),
DateTime(Vec<Option<NaiveDateTime>>),
Time(Vec<Option<NaiveTime>>),
}
impl ColumnData {
#[must_use]
pub fn len(&self) -> usize {
match self {
Self::F64(v) => v.len(),
Self::I64(v) => v.len(),
Self::Bool(v) => v.len(),
Self::String(v) => v.len(),
Self::Bytes(v) => v.len(),
Self::Date(v) => v.len(),
Self::DateTime(v) => v.len(),
Self::Time(v) => v.len(),
}
}
#[must_use]
pub fn is_empty(&self) -> bool {
self.len() == 0
}
#[must_use]
pub fn is_numeric(&self) -> bool {
matches!(
self,
Self::F64(_)
| Self::I64(_)
| Self::Bool(_)
| Self::Date(_)
| Self::DateTime(_)
| Self::Time(_)
)
}
#[must_use]
pub fn is_character(&self) -> bool {
matches!(self, Self::String(_) | Self::Bytes(_))
}
}
impl fmt::Display for ColumnData {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let (type_name, len) = match self {
Self::F64(v) => ("F64", v.len()),
Self::I64(v) => ("I64", v.len()),
Self::Bool(v) => ("Bool", v.len()),
Self::String(v) => ("String", v.len()),
Self::Bytes(v) => ("Bytes", v.len()),
Self::Date(v) => ("Date", v.len()),
Self::DateTime(v) => ("DateTime", v.len()),
Self::Time(v) => ("Time", v.len()),
};
write!(f, "{}({})", type_name, len)
}
}
impl From<Vec<f64>> for ColumnData {
fn from(values: Vec<f64>) -> Self {
Self::F64(values.into_iter().map(Some).collect())
}
}
impl From<Vec<i64>> for ColumnData {
fn from(values: Vec<i64>) -> Self {
Self::I64(values.into_iter().map(Some).collect())
}
}
impl From<Vec<i32>> for ColumnData {
fn from(values: Vec<i32>) -> Self {
Self::I64(values.into_iter().map(|v| Some(i64::from(v))).collect())
}
}
impl From<Vec<bool>> for ColumnData {
fn from(values: Vec<bool>) -> Self {
Self::Bool(values.into_iter().map(Some).collect())
}
}
impl From<Vec<String>> for ColumnData {
fn from(values: Vec<String>) -> Self {
Self::String(values.into_iter().map(Some).collect())
}
}
impl From<Vec<&str>> for ColumnData {
fn from(values: Vec<&str>) -> Self {
Self::String(values.into_iter().map(|s| Some(s.to_string())).collect())
}
}
impl From<Vec<NaiveDate>> for ColumnData {
fn from(values: Vec<NaiveDate>) -> Self {
Self::Date(values.into_iter().map(Some).collect())
}
}
impl From<Vec<NaiveDateTime>> for ColumnData {
fn from(values: Vec<NaiveDateTime>) -> Self {
Self::DateTime(values.into_iter().map(Some).collect())
}
}
impl From<Vec<NaiveTime>> for ColumnData {
fn from(values: Vec<NaiveTime>) -> Self {
Self::Time(values.into_iter().map(Some).collect())
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
#[non_exhaustive]
pub enum VariableRole {
Identifier,
Topic,
Timing,
Qualifier,
Rule,
}
impl VariableRole {
#[must_use]
pub const fn as_str(&self) -> &'static str {
match self {
Self::Identifier => "Identifier",
Self::Topic => "Topic",
Self::Timing => "Timing",
Self::Qualifier => "Qualifier",
Self::Rule => "Rule",
}
}
}
impl std::fmt::Display for VariableRole {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.write_str(self.as_str())
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_domain_dataset_new() {
let cols = vec![
Column::new("A", ColumnData::F64(vec![Some(1.0), Some(2.0)])),
Column::new(
"B",
ColumnData::String(vec![Some("x".into()), Some("y".into())]),
),
];
let ds = Dataset::new("AE", cols).unwrap();
assert_eq!(ds.nrows(), 2);
assert_eq!(ds.ncols(), 2);
}
#[test]
fn test_column_length_mismatch() {
let cols = vec![
Column::new("A", ColumnData::F64(vec![Some(1.0)])),
Column::new(
"B",
ColumnData::String(vec![Some("x".into()), Some("y".into())]),
),
];
let result = Dataset::new("AE", cols);
assert!(result.is_err());
}
#[test]
fn test_column_data_types() {
assert!(ColumnData::F64(vec![]).is_numeric());
assert!(ColumnData::I64(vec![]).is_numeric());
assert!(ColumnData::Bool(vec![]).is_numeric());
assert!(ColumnData::Date(vec![]).is_numeric());
assert!(ColumnData::DateTime(vec![]).is_numeric());
assert!(ColumnData::Time(vec![]).is_numeric());
assert!(ColumnData::String(vec![]).is_character());
assert!(ColumnData::Bytes(vec![]).is_character());
}
#[test]
fn test_column_data_from_conversions() {
let data: ColumnData = vec![1.0, 2.0, 3.0].into();
assert_eq!(data.len(), 3);
assert!(data.is_numeric());
let data: ColumnData = vec![1i64, 2, 3].into();
assert_eq!(data.len(), 3);
assert!(data.is_numeric());
let data: ColumnData = vec![1i32, 2, 3].into();
assert_eq!(data.len(), 3);
assert!(data.is_numeric());
let data: ColumnData = vec![true, false, true].into();
assert_eq!(data.len(), 3);
assert!(data.is_numeric());
let data: ColumnData = vec!["a".to_string(), "b".to_string()].into();
assert_eq!(data.len(), 2);
assert!(data.is_character());
let data: ColumnData = vec!["a", "b", "c"].into();
assert_eq!(data.len(), 3);
assert!(data.is_character());
}
}