use crate::{TimeSeries, TimeSeriesError};
#[cfg(feature = "csv-import")]
use std::path::Path;
#[cfg(feature = "csv-import")]
use std::fs::File;
#[cfg(feature = "csv-import")]
use std::io::{BufRead, BufReader};
#[derive(Debug, Clone)]
pub struct CsvImportOptions {
pub has_header: bool,
pub timestamp_column: Option<usize>,
pub value_column: usize,
pub delimiter: char,
pub missing_value: String,
}
impl Default for CsvImportOptions {
fn default() -> Self {
Self {
has_header: true,
timestamp_column: Some(0),
value_column: 1,
delimiter: ',',
missing_value: String::new(),
}
}
}
impl<T> TimeSeries<T>
where
T: std::str::FromStr + Copy + From<f64>,
{
#[cfg(feature = "csv-import")]
pub fn from_csv<P: AsRef<Path>>(
path: P,
options: CsvImportOptions,
) -> Result<Self, TimeSeriesError> {
let file = File::open(path).map_err(|_| TimeSeriesError::EmptyData)?;
let reader = BufReader::new(file);
let mut lines = reader.lines();
if options.has_header {
lines.next();
}
let mut timestamps = Vec::new();
let mut values = Vec::new();
for (idx, line) in lines.enumerate() {
let line = line.map_err(|_| TimeSeriesError::EmptyData)?;
let parts: Vec<&str> = line.split(options.delimiter).collect();
let timestamp = if let Some(ts_col) = options.timestamp_column {
if ts_col >= parts.len() {
return Err(TimeSeriesError::EmptyData);
}
parts[ts_col]
.parse::<f64>()
.map(T::from)
.map_err(|_| TimeSeriesError::EmptyData)?
} else {
T::from(idx as f64)
};
let value = if options.value_column >= parts.len() {
return Err(TimeSeriesError::EmptyData);
} else if parts[options.value_column].trim() == options.missing_value {
None
} else {
Some(
parts[options.value_column]
.trim()
.parse::<f64>()
.map(T::from)
.map_err(|_| TimeSeriesError::EmptyData)?,
)
};
timestamps.push(timestamp);
values.push(value);
}
if timestamps.is_empty() {
return Err(TimeSeriesError::EmptyData);
}
TimeSeries::new(timestamps, values)
}
pub fn from_csv_string(
csv_data: &str,
options: CsvImportOptions,
) -> Result<Self, TimeSeriesError> {
let mut lines = csv_data.lines();
if options.has_header {
lines.next();
}
let mut timestamps = Vec::new();
let mut values = Vec::new();
for (idx, line) in lines.enumerate() {
let parts: Vec<&str> = line.split(options.delimiter).collect();
let timestamp = if let Some(ts_col) = options.timestamp_column {
if ts_col >= parts.len() {
return Err(TimeSeriesError::EmptyData);
}
parts[ts_col]
.parse::<f64>()
.map(T::from)
.map_err(|_| TimeSeriesError::EmptyData)?
} else {
T::from(idx as f64)
};
let value = if options.value_column >= parts.len() {
return Err(TimeSeriesError::EmptyData);
} else if parts[options.value_column].trim() == options.missing_value {
None
} else {
Some(
parts[options.value_column]
.trim()
.parse::<f64>()
.map(T::from)
.map_err(|_| TimeSeriesError::EmptyData)?,
)
};
timestamps.push(timestamp);
values.push(value);
}
if timestamps.is_empty() {
return Err(TimeSeriesError::EmptyData);
}
TimeSeries::new(timestamps, values)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_csv_string_import() {
let csv_data = "timestamp,value\n0,1.0\n1,2.0\n2,3.0";
let series = TimeSeries::<f64>::from_csv_string(
csv_data,
CsvImportOptions::default(),
)
.unwrap();
assert_eq!(series.len(), 3);
}
#[test]
fn test_csv_with_missing() {
let csv_data = "timestamp,value\n0,1.0\n1,\n2,3.0";
let options = CsvImportOptions {
has_header: true,
timestamp_column: Some(0),
value_column: 1,
delimiter: ',',
missing_value: String::new(),
};
let series = TimeSeries::<f64>::from_csv_string(csv_data, options).unwrap();
assert_eq!(series.len(), 3);
assert!(series.values[1].is_none());
}
}