use std::path::Path;
use crate::error::MattenDataError;
use crate::table::{CellValue, Table};
impl Table {
pub fn from_csv_str(input: &str) -> Result<Table, MattenDataError> {
if input.trim().is_empty() {
return Err(MattenDataError::EmptyInput);
}
let mut reader = ::csv::ReaderBuilder::new()
.has_headers(true)
.flexible(true)
.from_reader(input.as_bytes());
let headers: Vec<String> = reader
.headers()
.map_err(|e| MattenDataError::Csv {
message: e.to_string(),
})?
.iter()
.map(|h| h.trim().to_string())
.collect();
if headers.is_empty() {
return Err(MattenDataError::EmptyInput);
}
for (i, name) in headers.iter().enumerate() {
if name.is_empty() {
return Err(MattenDataError::Csv {
message: format!("header column {} is empty", i + 1),
});
}
}
for i in 0..headers.len() {
for j in (i + 1)..headers.len() {
if headers[i] == headers[j] {
return Err(MattenDataError::DuplicateColumn {
name: headers[i].clone(),
});
}
}
}
let n = headers.len();
let mut rows: Vec<Vec<CellValue>> = Vec::new();
for (idx, record) in reader.records().enumerate() {
let record = record.map_err(|e| MattenDataError::Csv {
message: e.to_string(),
})?;
if record.is_empty() {
continue;
}
let line = idx + 2;
if record.len() != n {
return Err(MattenDataError::RaggedRow {
row: line,
expected: n,
actual: record.len(),
});
}
rows.push(record.iter().map(parse_cell).collect());
}
Ok(Table::from_parts(headers, rows))
}
pub fn from_csv_path<P: AsRef<Path>>(path: P) -> Result<Table, MattenDataError> {
let path = path.as_ref();
let content = std::fs::read_to_string(path).map_err(|source| MattenDataError::Io {
path: path.to_path_buf(),
source,
})?;
Table::from_csv_str(&content)
}
}
fn parse_cell(raw: &str) -> CellValue {
let s = raw.trim();
if s.is_empty() {
return CellValue::Missing;
}
if let Ok(i) = s.parse::<i64>() {
return CellValue::Int(i);
}
if let Ok(fl) = s.parse::<f64>() {
return CellValue::Float(fl);
}
match s {
"true" => CellValue::Bool(true),
"false" => CellValue::Bool(false),
_ => CellValue::Text(s.to_string()),
}
}