dataload 0.1.1

A flexible data loading library for CSV and Excel files with automatic delimiter detection
Documentation
//! # dataload
//!
//! A flexible library for loading CSV and Excel files into Polars DataFrames.
//!
//! `dataload` provides automatic file type detection, intelligent delimiter detection
//! for CSV files, and a builder-pattern API for customizing how files are loaded.
//!
//! ## Features
//!
//! - **Automatic file type detection** via magic bytes and file extensions
//! - **Smart delimiter detection** for CSV files (comma, tab, semicolon, pipe)
//! - **Excel support** for xlsx, xls, xlsm, xlsb, and ods formats
//! - **Builder-pattern API** for flexible configuration
//! - **Zero-copy where possible** using Polars DataFrames
//!
//! ## Quick Start
//!
//! ```
//! use dataload::DataLoader;
//!
//! // Load from bytes with automatic detection
//! let csv_data = b"name,age,city\nAlice,30,NYC\nBob,25,LA";
//! let df = DataLoader::new()
//!     .load_bytes(csv_data, "people.csv")?;
//!
//! assert_eq!(df.shape(), (2, 3));
//! # Ok::<(), dataload::DataLoadError>(())
//! ```
//!
//! ## Loading Files
//!
//! ```no_run
//! use dataload::{DataLoader, load_file};
//! use std::path::Path;
//!
//! // Using the convenience function
//! let df = load_file(Path::new("data.csv"))?;
//!
//! // Or with custom options
//! let df = DataLoader::new()
//!     .with_header(false)
//!     .with_skip_rows(1)
//!     .load_file(Path::new("data.csv"))?;
//! # Ok::<(), dataload::DataLoadError>(())
//! ```
//!
//! ## Custom Delimiters
//!
//! ```
//! use dataload::{DataLoader, Delimiter};
//!
//! // Force a specific delimiter
//! let tsv_data = b"col1\tcol2\n1\t2";
//! let df = DataLoader::new()
//!     .with_delimiter(Delimiter::Tab)
//!     .load_bytes(tsv_data, "data.tsv")?;
//! # Ok::<(), dataload::DataLoadError>(())
//! ```
//!
//! ## Excel Files
//!
//! Excel support requires the `excel` feature (enabled by default):
//!
//! ```no_run
//! use dataload::DataLoader;
//! use std::path::Path;
//!
//! let df = DataLoader::new()
//!     .with_sheet_name("Sales")
//!     .load_file(Path::new("report.xlsx"))?;
//! # Ok::<(), dataload::DataLoadError>(())
//! ```
//!
//! ## Feature Flags
//!
//! - `csv` (default): CSV/TSV file support
//! - `excel` (default): Excel file support (xlsx, xls, etc.)
//!
//! To disable Excel support and reduce dependencies:
//!
//! ```toml
//! [dependencies]
//! dataload = { version = "0.1", default-features = false, features = ["csv"] }
//! ```

#![warn(missing_docs)]
#![warn(clippy::all)]
#![warn(clippy::pedantic)]
#![allow(clippy::module_name_repetitions)]

pub mod csv;
mod delimiter;
mod error;
#[cfg(feature = "excel")]
mod excel;
mod file_type;
mod loader;
mod options;

// Re-export main types at crate root for ergonomic imports
pub use delimiter::Delimiter;
pub use error::{DataLoadError, Result};
pub use file_type::FileType;
pub use loader::{load_bytes, load_file, DataLoader};
pub use options::LoadOptions;


// CSV utilities
pub use csv::load_csv_with_fallback;

// Re-export polars DataFrame for convenience
pub use polars::prelude::DataFrame;

// Excel-specific re-exports
#[cfg(feature = "excel")]
pub use excel::list_sheets;

/// Prelude module for convenient imports.
///
/// ```
/// use dataload::prelude::*;
/// ```
pub mod prelude {
    pub use crate::csv::load_csv_with_fallback;
    pub use crate::delimiter::Delimiter;
    pub use crate::error::{DataLoadError, Result};
    pub use crate::loader::{load_bytes, load_file, DataLoader};
    pub use crate::options::LoadOptions;
    pub use polars::prelude::DataFrame;
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_prelude_imports() {
        use crate::prelude::*;

        let _ = DataLoader::new();
        let _ = Delimiter::Auto;
        let _ = LoadOptions::new();
    }

    #[test]
    fn test_basic_csv_loading() {
        let data = b"a,b,c\n1,2,3\n4,5,6";
        let df = load_bytes(data, "test.csv").unwrap();
        assert_eq!(df.shape(), (2, 3));
        assert_eq!(df.get_column_names(), &["a", "b", "c"]);
    }

    #[test]
    fn test_tab_separated() {
        let data = b"x\ty\tz\n10\t20\t30";
        let df = DataLoader::new()
            .load_bytes(data, "test.tsv")
            .unwrap();
        assert_eq!(df.shape(), (1, 3));
    }

    #[test]
    fn test_semicolon_separated() {
        let data = b"col1;col2;col3\na;b;c\nd;e;f";
        let df = load_bytes(data, "test.csv").unwrap();
        assert_eq!(df.shape(), (2, 3));
    }

    #[test]
    fn test_no_header() {
        let data = b"1,2,3\n4,5,6";
        let df = DataLoader::new()
            .with_header(false)
            .load_bytes(data, "test.csv")
            .unwrap();
        assert_eq!(df.shape(), (2, 3));
    }

    #[test]
    fn test_max_rows() {
        let data = b"h1,h2\n1,2\n3,4\n5,6\n7,8\n9,10";
        let df = DataLoader::new()
            .with_max_rows(Some(2))
            .load_bytes(data, "test.csv")
            .unwrap();
        assert_eq!(df.shape(), (2, 2));
    }

    #[test]
    fn test_unsupported_file_type() {
        let result = load_bytes(b"{}", "test.json");
        assert!(matches!(result, Err(DataLoadError::UnsupportedFileType(_))));
    }
}