use crate::error::OhlcvError;
use chrono::NaiveDate;
use rust_decimal::Decimal;
use rust_decimal::prelude::FromStr;
use serde::{Deserialize, Serialize};
use std::fs::File;
use std::io::{BufRead, BufReader};
use std::path::Path;
use zip::ZipArchive;
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct OhlcvCandle {
pub date: NaiveDate,
pub time: String,
pub open: Decimal,
pub high: Decimal,
pub low: Decimal,
pub close: Decimal,
pub volume: u64,
}
pub fn read_ohlcv_from_zip(
zip_path: &str,
start_date: Option<&str>,
end_date: Option<&str>,
) -> Result<Vec<OhlcvCandle>, OhlcvError> {
let start = if let Some(start_str) = start_date {
Some(NaiveDate::parse_from_str(start_str, "%d/%m/%Y")?)
} else {
None
};
let end = if let Some(end_str) = end_date {
Some(NaiveDate::parse_from_str(end_str, "%d/%m/%Y")?)
} else {
None
};
if let (Some(start_date), Some(end_date)) = (&start, &end)
&& start_date > end_date
{
return Err(OhlcvError::InvalidParameter {
reason: format!("Start date {start_date} is after end date {end_date}"),
});
}
let file = File::open(Path::new(zip_path))?;
let mut archive = ZipArchive::new(file)?;
let mut csv_index = None;
for i in 0..archive.len() {
let file = archive.by_index(i)?;
if file.name().ends_with(".csv") {
csv_index = Some(i);
break;
}
}
let csv_index = csv_index.ok_or(OhlcvError::OtherError {
reason: "No CSV file found in ZIP archive".to_string(),
})?;
let file = archive.by_index(csv_index)?;
let reader = BufReader::new(file);
let mut candles = Vec::new();
for (line_num, line_result) in reader.lines().enumerate() {
if line_num == 0
&& line_result
.as_ref()
.is_ok_and(|l| l.contains("date") || l.contains("Date"))
{
continue;
}
let line = line_result?;
let parts: Vec<&str> = line.split(';').collect();
if parts.len() != 7 {
return Err(OhlcvError::CsvError {
reason: format!(
"Invalid CSV format at line {}: expected 7 fields, got {}",
line_num + 1,
parts.len()
),
});
}
let date = NaiveDate::parse_from_str(parts[0], "%d/%m/%Y")?;
if (start.is_some() && date < start.unwrap()) || (end.is_some() && date > end.unwrap()) {
continue;
}
let candle = OhlcvCandle {
date,
time: parts[1].to_string(),
open: Decimal::from_str(parts[2])?,
high: Decimal::from_str(parts[3])?,
low: Decimal::from_str(parts[4])?,
close: Decimal::from_str(parts[5])?,
volume: parts[6].parse::<u64>().map_err(|e| OhlcvError::CsvError {
reason: format!("Invalid volume at line {}: {}", line_num + 1, e),
})?,
};
candles.push(candle);
}
Ok(candles)
}
#[cfg(feature = "async")]
pub async fn read_ohlcv_from_zip_async(
zip_path: String,
start_date: Option<String>,
end_date: Option<String>,
) -> Result<Vec<OhlcvCandle>, OhlcvError> {
tokio::task::spawn_blocking(move || {
read_ohlcv_from_zip(&zip_path, start_date.as_deref(), end_date.as_deref())
})
.await
.map_err(|e| OhlcvError::OtherError {
reason: format!("Async task error: {}", e),
})?
}
#[cfg(test)]
mod ohlcv_tests {
use super::*;
use chrono::NaiveDate;
use mockall::predicate::*;
use rust_decimal_macros::dec;
use std::io::Write;
use tempfile::NamedTempFile;
use zip::{ZipWriter, write::FileOptions};
fn create_test_zip(data: &str) -> Result<(String, NamedTempFile), OhlcvError> {
let temp_file = NamedTempFile::new().map_err(|e| OhlcvError::IoError {
reason: e.to_string(),
})?;
let mut zip = ZipWriter::new(File::create(temp_file.path())?);
let options: FileOptions<'_, ()> =
FileOptions::default().compression_method(zip::CompressionMethod::Stored);
zip.start_file("test_data.csv", options)
.map_err(|e| OhlcvError::ZipError {
reason: e.to_string(),
})?;
zip.write_all(data.as_bytes())
.map_err(|e| OhlcvError::IoError {
reason: e.to_string(),
})?;
zip.finish().map_err(|e| OhlcvError::ZipError {
reason: e.to_string(),
})?;
Ok((temp_file.path().to_string_lossy().to_string(), temp_file))
}
#[test]
fn test_read_ohlcv_valid_data() -> Result<(), OhlcvError> {
let csv_data = "date;time;open;high;low;close;volume\n\
01/01/2022;10:00:00;100.0;110.0;95.0;105.0;5000\n\
02/01/2022;10:00:00;105.0;112.0;104.0;110.0;6000\n\
03/01/2022;10:00:00;110.0;115.0;108.0;114.0;7000";
let (zip_path, _temp_file) = create_test_zip(csv_data)?;
let candles = read_ohlcv_from_zip(&zip_path, Some("01/01/2022"), Some("02/01/2022"))?;
assert_eq!(candles.len(), 2, "Should return exactly 2 candles");
assert_eq!(
candles[0].date,
NaiveDate::from_ymd_opt(2022, 1, 1).unwrap()
);
assert_eq!(candles[0].time, "10:00:00");
assert_eq!(candles[0].open, dec!(100.0));
assert_eq!(candles[0].high, dec!(110.0));
assert_eq!(candles[0].low, dec!(95.0));
assert_eq!(candles[0].close, dec!(105.0));
assert_eq!(candles[0].volume, 5000);
assert_eq!(
candles[1].date,
NaiveDate::from_ymd_opt(2022, 1, 2).unwrap()
);
assert_eq!(candles[1].time, "10:00:00");
assert_eq!(candles[1].open, dec!(105.0));
assert_eq!(candles[1].high, dec!(112.0));
assert_eq!(candles[1].low, dec!(104.0));
assert_eq!(candles[1].close, dec!(110.0));
assert_eq!(candles[1].volume, 6000);
Ok(())
}
#[test]
fn test_read_ohlcv_without_header() -> Result<(), OhlcvError> {
let csv_data = "01/01/2022;10:00:00;100.0;110.0;95.0;105.0;5000\n\
02/01/2022;10:00:00;105.0;112.0;104.0;110.0;6000";
let (zip_path, _temp_file) = create_test_zip(csv_data)?;
let candles = read_ohlcv_from_zip(&zip_path, Some("01/01/2022"), Some("02/01/2022"))?;
assert_eq!(
candles.len(),
2,
"Should return exactly 2 candles even without header"
);
Ok(())
}
#[test]
fn test_read_ohlcv_invalid_date_range() {
let csv_data = "date;time;open;high;low;close;volume\n\
01/01/2022;10:00:00;100.0;110.0;95.0;105.0;5000";
let (zip_path, _temp_file) = create_test_zip(csv_data).unwrap();
let result = read_ohlcv_from_zip(&zip_path, Some("02/01/2022"), Some("01/01/2022"));
assert!(
result.is_err(),
"Should return an error for invalid date range"
);
if let Err(OhlcvError::InvalidParameter { reason }) = result {
assert!(
reason.contains("Start date"),
"Error should mention start date being after end date"
);
} else {
panic!("Expected InvalidParameter error");
}
}
#[test]
fn test_read_ohlcv_nonexistent_file() {
let result = read_ohlcv_from_zip(
"nonexistent_file.zip",
Some("01/01/2022"),
Some("31/12/2022"),
);
assert!(
result.is_err(),
"Should return an error for nonexistent file"
);
if let Err(OhlcvError::IoError { .. }) = result {
} else {
panic!("Expected IoError");
}
}
#[test]
fn test_read_ohlcv_invalid_csv_format() -> Result<(), OhlcvError> {
let csv_data = "date;time;open;high;low;close\n\
01/01/2022;10:00:00;100.0;110.0;95.0;105.0";
let (zip_path, _temp_file) = create_test_zip(csv_data)?;
let result = read_ohlcv_from_zip(&zip_path, Some("01/01/2022"), Some("31/12/2022"));
assert!(
result.is_err(),
"Should return an error for invalid CSV format"
);
if let Err(OhlcvError::CsvError { reason }) = result {
assert!(
reason.contains("expected 7 fields"),
"Error should mention expected field count"
);
} else {
panic!("Expected CsvError");
}
Ok(())
}
#[test]
fn test_read_ohlcv_invalid_decimal() -> Result<(), OhlcvError> {
let csv_data = "date;time;open;high;low;close;volume\n\
01/01/2022;10:00:00;not_a_number;110.0;95.0;105.0;5000";
let (zip_path, _temp_file) = create_test_zip(csv_data)?;
let result = read_ohlcv_from_zip(&zip_path, Some("01/01/2022"), Some("31/12/2022"));
assert!(
result.is_err(),
"Should return an error for invalid decimal"
);
if let Err(OhlcvError::DecimalParseError { .. }) = result {
} else {
panic!("Expected DecimalParseError");
}
Ok(())
}
#[test]
fn test_read_ohlcv_invalid_volume() -> Result<(), OhlcvError> {
let csv_data = "date;time;open;high;low;close;volume\n\
01/01/2022;10:00:00;100.0;110.0;95.0;105.0;not_a_number";
let (zip_path, _temp_file) = create_test_zip(csv_data)?;
let result = read_ohlcv_from_zip(&zip_path, Some("01/01/2022"), Some("31/12/2022"));
assert!(result.is_err(), "Should return an error for invalid volume");
if let Err(OhlcvError::CsvError { .. }) = result {
} else {
panic!("Expected CsvError");
}
Ok(())
}
#[test]
fn test_read_ohlcv_invalid_date_format() -> Result<(), OhlcvError> {
let csv_data = "date;time;open;high;low;close;volume\n\
2022-01-01;10:00:00;100.0;110.0;95.0;105.0;5000";
let (zip_path, _temp_file) = create_test_zip(csv_data)?;
let result = read_ohlcv_from_zip(&zip_path, Some("01/01/2022"), Some("31/12/2022"));
assert!(
result.is_err(),
"Should return an error for invalid date format"
);
if let Err(OhlcvError::DateParseError { .. }) = result {
} else {
panic!("Expected DateParseError");
}
Ok(())
}
#[test]
fn test_read_ohlcv_empty_file() -> Result<(), OhlcvError> {
let csv_data = "";
let (zip_path, _temp_file) = create_test_zip(csv_data)?;
let candles = read_ohlcv_from_zip(&zip_path, Some("01/01/2022"), Some("31/12/2022"))?;
assert_eq!(
candles.len(),
0,
"Should return empty vector for empty file"
);
Ok(())
}
#[test]
fn test_read_ohlcv_no_matching_dates() -> Result<(), OhlcvError> {
let csv_data = "date;time;open;high;low;close;volume\n\
01/01/2022;10:00:00;100.0;110.0;95.0;105.0;5000\n\
02/01/2022;10:00:00;105.0;112.0;104.0;110.0;6000";
let (zip_path, _temp_file) = create_test_zip(csv_data)?;
let candles = read_ohlcv_from_zip(&zip_path, Some("03/01/2022"), Some("04/01/2022"))?;
assert_eq!(
candles.len(),
0,
"Should return empty vector when no dates match"
);
Ok(())
}
#[test]
fn test_read_ohlcv_partial_matches() -> Result<(), OhlcvError> {
let csv_data = "date;time;open;high;low;close;volume\n\
01/01/2022;10:00:00;100.0;110.0;95.0;105.0;5000\n\
02/01/2022;10:00:00;105.0;112.0;104.0;110.0;6000\n\
03/01/2022;10:00:00;110.0;115.0;108.0;114.0;7000";
let (zip_path, _temp_file) = create_test_zip(csv_data)?;
let candles = read_ohlcv_from_zip(&zip_path, Some("02/01/2022"), Some("03/01/2022"))?;
assert_eq!(candles.len(), 2, "Should return exactly 2 candles");
assert_eq!(
candles[0].date,
NaiveDate::from_ymd_opt(2022, 1, 2).unwrap()
);
assert_eq!(
candles[1].date,
NaiveDate::from_ymd_opt(2022, 1, 3).unwrap()
);
Ok(())
}
#[test]
fn test_ohlcv_error_display() {
let io_error = OhlcvError::IoError {
reason: "test reason".to_string(),
};
assert_eq!(format!("{io_error}"), "IO error: test reason");
let zip_error = OhlcvError::ZipError {
reason: "test reason".to_string(),
};
assert_eq!(format!("{zip_error}"), "ZIP error: test reason");
let csv_error = OhlcvError::CsvError {
reason: "test reason".to_string(),
};
assert_eq!(format!("{csv_error}"), "CSV error: test reason");
let date_error = OhlcvError::DateParseError {
reason: "test reason".to_string(),
};
assert_eq!(format!("{date_error}"), "Date parse error: test reason");
let decimal_error = OhlcvError::DecimalParseError {
reason: "test reason".to_string(),
};
assert_eq!(
format!("{decimal_error}"),
"Decimal parse error: test reason"
);
let param_error = OhlcvError::InvalidParameter {
reason: "test reason".to_string(),
};
assert_eq!(format!("{param_error}"), "Invalid parameter: test reason");
let other_error = OhlcvError::OtherError {
reason: "test reason".to_string(),
};
assert_eq!(format!("{other_error}"), "Error: test reason");
}
#[test]
fn test_read_ohlcv_error_conversions() {
let io_error = std::io::Error::new(std::io::ErrorKind::NotFound, "file not found");
let ohlcv_error = OhlcvError::from(io_error);
assert!(matches!(ohlcv_error, OhlcvError::IoError { .. }));
let zip_error = zip::result::ZipError::FileNotFound;
let ohlcv_error = OhlcvError::from(zip_error);
assert!(matches!(ohlcv_error, OhlcvError::ZipError { .. }));
let date_str = "invalid date";
let parse_result = NaiveDate::parse_from_str(date_str, "%d/%m/%Y");
assert!(parse_result.is_err());
let ohlcv_error = OhlcvError::from(parse_result.err().unwrap());
assert!(matches!(ohlcv_error, OhlcvError::DateParseError { .. }));
let decimal_str = "not a number";
let decimal_result = Decimal::from_str(decimal_str);
assert!(decimal_result.is_err());
let ohlcv_error = OhlcvError::from(decimal_result.err().unwrap());
assert!(matches!(ohlcv_error, OhlcvError::DecimalParseError { .. }));
}
#[test]
fn test_ohlcv_struct_serialization() {
let candle = OhlcvCandle {
date: NaiveDate::from_ymd_opt(2022, 1, 1).unwrap(),
time: "10:00:00".to_string(),
open: dec!(100.0),
high: dec!(110.0),
low: dec!(95.0),
close: dec!(105.0),
volume: 5000,
};
let json = serde_json::to_string(&candle).unwrap();
let deserialized: OhlcvCandle = serde_json::from_str(&json).unwrap();
assert_eq!(candle.date, deserialized.date);
assert_eq!(candle.time, deserialized.time);
assert_eq!(candle.open, deserialized.open);
assert_eq!(candle.high, deserialized.high);
assert_eq!(candle.low, deserialized.low);
assert_eq!(candle.close, deserialized.close);
assert_eq!(candle.volume, deserialized.volume);
}
#[test]
fn test_read_ohlcv_all_data_no_dates() -> Result<(), OhlcvError> {
let csv_data = "date;time;open;high;low;close;volume\n\
01/01/2022;10:00:00;100.0;110.0;95.0;105.0;5000\n\
02/01/2022;10:00:00;105.0;112.0;104.0;110.0;6000\n\
03/01/2022;10:00:00;110.0;115.0;108.0;114.0;7000";
let (zip_path, _temp_file) = create_test_zip(csv_data)?;
let candles = read_ohlcv_from_zip(&zip_path, None, None)?;
assert_eq!(
candles.len(),
3,
"Should return all 3 candles when no dates specified"
);
assert_eq!(
candles[0].date,
NaiveDate::from_ymd_opt(2022, 1, 1).unwrap()
);
assert_eq!(
candles[1].date,
NaiveDate::from_ymd_opt(2022, 1, 2).unwrap()
);
assert_eq!(
candles[2].date,
NaiveDate::from_ymd_opt(2022, 1, 3).unwrap()
);
Ok(())
}
#[test]
fn test_read_ohlcv_only_start_date() -> Result<(), OhlcvError> {
let csv_data = "date;time;open;high;low;close;volume\n\
01/01/2022;10:00:00;100.0;110.0;95.0;105.0;5000\n\
02/01/2022;10:00:00;105.0;112.0;104.0;110.0;6000\n\
03/01/2022;10:00:00;110.0;115.0;108.0;114.0;7000";
let (zip_path, _temp_file) = create_test_zip(csv_data)?;
let candles = read_ohlcv_from_zip(&zip_path, Some("02/01/2022"), None)?;
assert_eq!(
candles.len(),
2,
"Should return 2 candles from start date onwards"
);
assert_eq!(
candles[0].date,
NaiveDate::from_ymd_opt(2022, 1, 2).unwrap()
);
assert_eq!(
candles[1].date,
NaiveDate::from_ymd_opt(2022, 1, 3).unwrap()
);
Ok(())
}
#[test]
fn test_read_ohlcv_only_end_date() -> Result<(), OhlcvError> {
let csv_data = "date;time;open;high;low;close;volume\n\
01/01/2022;10:00:00;100.0;110.0;95.0;105.0;5000\n\
02/01/2022;10:00:00;105.0;112.0;104.0;110.0;6000\n\
03/01/2022;10:00:00;110.0;115.0;108.0;114.0;7000";
let (zip_path, _temp_file) = create_test_zip(csv_data)?;
let candles = read_ohlcv_from_zip(&zip_path, None, Some("02/01/2022"))?;
assert_eq!(candles.len(), 2, "Should return 2 candles up to end date");
assert_eq!(
candles[0].date,
NaiveDate::from_ymd_opt(2022, 1, 1).unwrap()
);
assert_eq!(
candles[1].date,
NaiveDate::from_ymd_opt(2022, 1, 2).unwrap()
);
Ok(())
}
#[test]
fn test_read_ohlcv_invalid_start_date_format() {
let csv_data = "date;time;open;high;low;close;volume\n\
01/01/2022;10:00:00;100.0;110.0;95.0;105.0;5000";
let (zip_path, _temp_file) = create_test_zip(csv_data).unwrap();
let result = read_ohlcv_from_zip(&zip_path, Some("2022-01-01"), None);
assert!(
result.is_err(),
"Should return an error for invalid start date format"
);
if let Err(OhlcvError::DateParseError { .. }) = result {
} else {
panic!("Expected DateParseError");
}
}
#[test]
fn test_read_ohlcv_invalid_end_date_format() {
let csv_data = "date;time;open;high;low;close;volume\n\
01/01/2022;10:00:00;100.0;110.0;95.0;105.0;5000";
let (zip_path, _temp_file) = create_test_zip(csv_data).unwrap();
let result = read_ohlcv_from_zip(&zip_path, None, Some("2022-01-01"));
assert!(
result.is_err(),
"Should return an error for invalid end date format"
);
if let Err(OhlcvError::DateParseError { .. }) = result {
} else {
panic!("Expected DateParseError");
}
}
#[test]
fn test_read_ohlcv_no_matching_dates_with_only_start_date() -> Result<(), OhlcvError> {
let csv_data = "date;time;open;high;low;close;volume\n\
01/01/2022;10:00:00;100.0;110.0;95.0;105.0;5000\n\
02/01/2022;10:00:00;105.0;112.0;104.0;110.0;6000";
let (zip_path, _temp_file) = create_test_zip(csv_data)?;
let candles = read_ohlcv_from_zip(&zip_path, Some("03/01/2022"), None)?;
assert_eq!(
candles.len(),
0,
"Should return empty vector when no dates match the start date criteria"
);
Ok(())
}
#[test]
fn test_read_ohlcv_no_matching_dates_with_only_end_date() -> Result<(), OhlcvError> {
let csv_data = "date;time;open;high;low;close;volume\n\
01/01/2022;10:00:00;100.0;110.0;95.0;105.0;5000\n\
02/01/2022;10:00:00;105.0;112.0;104.0;110.0;6000";
let (zip_path, _temp_file) = create_test_zip(csv_data)?;
let candles = read_ohlcv_from_zip(&zip_path, None, Some("31/12/2021"))?;
assert_eq!(
candles.len(),
0,
"Should return empty vector when no dates match the end date criteria"
);
Ok(())
}
}