use std::collections::HashSet;
use error::Result;
use parser::Parser;
use super::FlatIter;
use super::column::Column;
use super::value::Value;
#[derive(Debug, Clone, PartialEq)]
pub struct DataSet {
relation: String,
columns: Vec<Column>,
n_rows: usize,
}
impl DataSet {
pub fn new(relation: &str, columns: Vec<Column>) -> DataSet {
let n_rows = {
let mut it = columns.iter().map(Column::len);
let n_rows = it.next().unwrap_or(0);
assert!(it.all(|l| l == n_rows));
n_rows
};
DataSet {
relation: relation.to_owned(),
columns,
n_rows,
}
}
pub fn name(&self) -> &str {
&self.relation
}
pub fn from_str(input: &str) -> Result<Self> {
let mut parser = Parser::new(input);
let header = parser.parse_header()?;
let mut columns = Vec::new();
for attr in header.attrs.into_iter() {
columns.push(Column::from_attr(attr)?);
}
let mut n_rows = 0;
parser.skip_empty();
while !parser.is_eof() {
let mut cit = columns.iter_mut();
match cit.next() {
None => {}
Some(col) => {
col.parse_value(&mut parser)?;
}
}
for col in cit {
parser.parse_column_delimiter()?;
col.parse_value(&mut parser)?;
}
parser.parse_row_delimiter()?;
parser.skip_empty();
n_rows += 1;
}
Ok(DataSet {
relation: header.name,
columns,
n_rows,
})
}
pub fn n_rows(&self) -> usize {
self.n_rows
}
pub fn n_cols(&self) -> usize {
self.columns.len()
}
pub fn col_names<'a>(&'a self) -> Box<'a + Iterator<Item = &'a str>> {
let iter = self.columns.iter().map(|col| col.name());
Box::new(iter)
}
pub fn col_name<'a>(&'a self, idx: usize) -> &str {
self.columns[idx].name()
}
pub fn row(&self, idx: usize) -> Vec<Value> {
self.columns.iter().map(|c| c.item(idx)).collect()
}
pub fn col(&self, idx: usize) -> &Column {
&self.columns[idx]
}
pub fn col_by_name(&self, col: &str) -> &Column {
for c in &self.columns {
if c.name() == col {
return c;
}
}
panic!("unknown column: {}", col);
}
pub fn item(&self, row: usize, col: usize) -> Value {
self.col(col).item(row)
}
pub fn item_by_name<T>(&self, row: usize, col: &str) -> Value {
self.col_by_name(col).item(row)
}
pub fn flat_iter(&self) -> FlatIter {
FlatIter::new(self)
}
pub fn split(self, names: HashSet<&str>) -> (Self, Self) {
let mut a = DataSet {
relation: self.relation.clone(),
columns: Vec::new(),
n_rows: self.n_rows,
};
let mut b = DataSet {
relation: self.relation.clone(),
columns: Vec::new(),
n_rows: self.n_rows,
};
for col in self.columns {
if names.contains(col.name()) {
b.columns.push(col);
} else {
a.columns.push(col);
}
}
(a, b)
}
pub fn split_one(self, column: &str) -> (Self, Self) {
let mut a = DataSet {
relation: self.relation.clone(),
columns: Vec::new(),
n_rows: self.n_rows,
};
let mut b = DataSet {
relation: self.relation.clone(),
columns: Vec::new(),
n_rows: self.n_rows,
};
for col in self.columns {
if column == col.name() {
b.columns.push(col);
} else {
a.columns.push(col);
}
}
(a, b)
}
}