pub use alimentar::{ArrowDataset, CsvOptions, DataLoader, Dataset, JsonOptions};
pub use alimentar::{Error as AlimentarError, Result as AlimentarResult};
pub use alimentar::{RecordBatch, Schema, SchemaRef};
pub use alimentar::transform::{
Cast, Chain, Drop, FillNull, FillStrategy, Filter, Map, NormMethod, Normalize, Rename, Select,
Skip, Sort, SortOrder, Take, Transform, Unique,
};
pub use alimentar::{ColumnQuality, QualityChecker, QualityIssue, QualityProfile, QualityReport};
pub use alimentar::{ColumnDrift, DriftDetector, DriftReport, DriftSeverity, DriftTest};
pub use alimentar::DatasetSplit;
pub use alimentar::{
FederatedSplitCoordinator, FederatedSplitStrategy, GlobalSplitReport, NodeSplitInstruction,
NodeSplitManifest, NodeSummary, SplitQualityIssue,
};
pub fn load_parquet(path: &str) -> AlimentarResult<ArrowDataset> {
ArrowDataset::from_parquet(path)
}
pub fn load_csv(path: &str) -> AlimentarResult<ArrowDataset> {
ArrowDataset::from_csv(path)
}
pub fn load_json(path: &str) -> AlimentarResult<ArrowDataset> {
ArrowDataset::from_json(path)
}
#[must_use]
pub fn create_loader(dataset: ArrowDataset) -> DataLoader<ArrowDataset> {
DataLoader::new(dataset).batch_size(32).shuffle(true)
}
#[must_use]
pub fn create_loader_with_batch_size(
dataset: ArrowDataset,
batch_size: usize,
) -> DataLoader<ArrowDataset> {
DataLoader::new(dataset).batch_size(batch_size)
}
#[cfg(test)]
mod tests {
use super::*;
use std::io::Write;
use tempfile::NamedTempFile;
#[test]
fn test_dataloader_batch_size() {
fn check_api() {
let _: fn(ArrowDataset) -> DataLoader<ArrowDataset> = create_loader;
let _: fn(ArrowDataset, usize) -> DataLoader<ArrowDataset> =
create_loader_with_batch_size;
}
check_api();
}
#[test]
fn test_csv_options_exists() {
let _: fn() -> CsvOptions = CsvOptions::default;
}
#[test]
fn test_load_parquet_file_not_found() {
let result = load_parquet("/nonexistent/path/to/file.parquet");
assert!(result.is_err());
}
#[test]
fn test_load_csv_file_not_found() {
let result = load_csv("/nonexistent/path/to/file.csv");
assert!(result.is_err());
}
#[test]
fn test_load_json_file_not_found() {
let result = load_json("/nonexistent/path/to/file.json");
assert!(result.is_err());
}
#[test]
fn test_load_csv_valid_file() {
let mut temp_file = NamedTempFile::new().expect("create temp file");
writeln!(temp_file, "name,value").expect("write header");
writeln!(temp_file, "test,42").expect("write row");
temp_file.flush().expect("flush file");
let result = load_csv(temp_file.path().to_str().expect("path to str"));
assert!(result.is_ok());
let dataset = result.expect("dataset");
assert!(dataset.len() > 0);
}
#[test]
fn test_load_json_valid_file() {
let mut temp_file = NamedTempFile::with_suffix(".json").expect("create temp file");
writeln!(temp_file, r#"{{"name": "test", "value": 42}}"#).expect("write json");
temp_file.flush().expect("flush file");
let result = load_json(temp_file.path().to_str().expect("path to str"));
let _ = result;
}
#[test]
fn test_create_loader_with_dataset() {
let mut temp_file = NamedTempFile::new().expect("create temp file");
writeln!(temp_file, "col1,col2").expect("write header");
writeln!(temp_file, "a,1").expect("write row");
temp_file.flush().expect("flush file");
let dataset = load_csv(temp_file.path().to_str().expect("path")).expect("load csv");
let _loader = create_loader(dataset);
}
#[test]
fn test_create_loader_with_custom_batch_size() {
let mut temp_file = NamedTempFile::new().expect("create temp file");
writeln!(temp_file, "col1,col2").expect("write header");
writeln!(temp_file, "a,1").expect("write row");
temp_file.flush().expect("flush file");
let dataset = load_csv(temp_file.path().to_str().expect("path")).expect("load csv");
let _loader = create_loader_with_batch_size(dataset, 64);
}
#[test]
fn test_json_options_exists() {
let _: fn() -> JsonOptions = JsonOptions::default;
}
#[test]
fn test_quality_checker_accessible() {
fn _check_api(_checker: QualityChecker) {}
}
#[test]
fn test_drift_detector_accessible() {
fn _check_api(_detector: DriftDetector) {}
}
}
#[cfg(test)]
mod property_tests {
use proptest::prelude::*;
proptest! {
#![proptest_config(ProptestConfig::with_cases(100))]
#[test]
fn prop_batch_size_positive(batch_size in 1usize..1000) {
prop_assert!(batch_size > 0);
}
}
}