use std::path::Path;
use polars::prelude::DataFrame;
use crate::error::{DataLoadError, Result};
use crate::file_type::{detect_file_type, FileType};
use crate::options::LoadOptions;
#[derive(Debug, Clone, Default)]
pub struct DataLoader {
options: LoadOptions,
}
impl DataLoader {
#[must_use]
pub fn new() -> Self {
Self::default()
}
#[must_use]
pub const fn with_options(options: LoadOptions) -> Self {
Self { options }
}
#[must_use]
pub fn with_delimiter(mut self, delimiter: crate::delimiter::Delimiter) -> Self {
self.options.delimiter = delimiter;
self
}
#[must_use]
pub const fn with_header(mut self, has_header: bool) -> Self {
self.options.has_header = has_header;
self
}
#[must_use]
pub const fn with_skip_rows(mut self, skip_rows: usize) -> Self {
self.options.skip_rows = skip_rows;
self
}
#[must_use]
pub const fn with_max_rows(mut self, max_rows: Option<usize>) -> Self {
self.options.max_rows = max_rows;
self
}
#[must_use]
pub const fn with_sheet_index(mut self, index: usize) -> Self {
self.options.sheet_index = Some(index);
self
}
#[must_use]
pub fn with_sheet_name(mut self, name: impl Into<String>) -> Self {
self.options.sheet_name = Some(name.into());
self
}
#[must_use]
pub const fn with_infer_schema(mut self, infer: bool) -> Self {
self.options.infer_schema = infer;
self
}
#[must_use]
pub const fn options(&self) -> &LoadOptions {
&self.options
}
pub fn load_file(&self, path: &Path) -> Result<DataFrame> {
let content = std::fs::read(path)?;
let filename = path
.file_name()
.and_then(|s| s.to_str())
.ok_or_else(|| DataLoadError::UnsupportedFileType("invalid path".into()))?;
self.load_bytes(&content, filename)
}
pub fn load_bytes(&self, content: &[u8], filename: &str) -> Result<DataFrame> {
let file_type = detect_file_type(content, filename)
.ok_or_else(|| DataLoadError::UnsupportedFileType(filename.to_string()))?;
match file_type {
FileType::Csv => crate::csv::load_csv(content, &self.options),
#[cfg(feature = "excel")]
FileType::Excel => crate::excel::load_excel(content, &self.options),
#[cfg(not(feature = "excel"))]
FileType::Excel => Err(DataLoadError::UnsupportedFileType(
"Excel support requires the 'excel' feature".into(),
)),
}
}
}
pub fn load_file(path: &Path) -> Result<DataFrame> {
DataLoader::new().load_file(path)
}
pub fn load_bytes(content: &[u8], filename: &str) -> Result<DataFrame> {
DataLoader::new().load_bytes(content, filename)
}
#[cfg(test)]
mod tests {
use super::*;
use crate::Delimiter;
#[test]
fn test_dataloader_default() {
let loader = DataLoader::new();
assert_eq!(loader.options.delimiter, Delimiter::Auto);
assert!(loader.options.has_header);
}
#[test]
fn test_dataloader_builder() {
let loader = DataLoader::new()
.with_delimiter(Delimiter::Semicolon)
.with_header(false)
.with_skip_rows(2)
.with_max_rows(Some(100));
assert_eq!(loader.options.delimiter, Delimiter::Semicolon);
assert!(!loader.options.has_header);
assert_eq!(loader.options.skip_rows, 2);
assert_eq!(loader.options.max_rows, Some(100));
}
#[test]
fn test_load_bytes_csv() {
let content = b"name,value\nfoo,1\nbar,2";
let df = DataLoader::new().load_bytes(content, "test.csv").unwrap();
assert_eq!(df.shape(), (2, 2));
}
#[test]
fn test_load_bytes_unsupported() {
let content = b"{}";
let result = DataLoader::new().load_bytes(content, "test.json");
assert!(matches!(result, Err(DataLoadError::UnsupportedFileType(_))));
}
#[test]
fn test_convenience_function() {
let content = b"x,y\n1,2";
let df = load_bytes(content, "data.csv").unwrap();
assert_eq!(df.shape(), (1, 2));
}
}