dataload 0.1.1

A flexible data loading library for CSV and Excel files with automatic delimiter detection
Documentation
//! Configuration options for data loading operations.

use crate::delimiter::Delimiter;

/// Configuration options for loading data files.
///
/// This struct uses the builder pattern for a flexible, extensible API.
///
/// # Examples
///
/// ```
/// use dataload::{LoadOptions, Delimiter};
///
/// // Default options
/// let opts = LoadOptions::default();
///
/// // Custom options
/// let opts = LoadOptions::new()
///     .with_delimiter(Delimiter::Tab)
///     .with_header(false)
///     .with_skip_rows(1);
/// ```
#[derive(Debug, Clone)]
pub struct LoadOptions {
    /// The delimiter to use for CSV parsing.
    pub delimiter: Delimiter,

    /// Whether the first row contains headers.
    pub has_header: bool,

    /// Number of rows to skip from the start.
    pub skip_rows: usize,

    /// Maximum number of rows to read (None = all rows).
    pub max_rows: Option<usize>,

    /// For Excel files: which sheet to read (0-indexed).
    /// `None` means read the first sheet.
    pub sheet_index: Option<usize>,

    /// For Excel files: sheet name to read.
    /// Takes precedence over `sheet_index` if both are set.
    pub sheet_name: Option<String>,

    /// Whether to infer data types for columns.
    pub infer_schema: bool,

    /// Number of rows to use for schema inference.
    pub infer_schema_length: Option<usize>,
}

impl Default for LoadOptions {
    fn default() -> Self {
        Self {
            delimiter: Delimiter::Auto,
            has_header: true,
            skip_rows: 0,
            max_rows: None,
            sheet_index: None,
            sheet_name: None,
            infer_schema: true,
            infer_schema_length: Some(1000),
        }
    }
}

impl LoadOptions {
    /// Creates a new `LoadOptions` with default values.
    #[must_use]
    pub fn new() -> Self {
        Self::default()
    }

    /// Sets the delimiter for CSV parsing.
    ///
    /// Use `Delimiter::Auto` (default) for automatic detection.
    #[must_use]
    pub const fn with_delimiter(mut self, delimiter: Delimiter) -> Self {
        self.delimiter = delimiter;
        self
    }

    /// Sets whether the first row contains headers.
    ///
    /// Default is `true`.
    #[must_use]
    pub const fn with_header(mut self, has_header: bool) -> Self {
        self.has_header = has_header;
        self
    }

    /// Sets the number of rows to skip from the start of the file.
    ///
    /// This is applied before header detection, so if `skip_rows = 1`
    /// and `has_header = true`, the second row becomes the header.
    #[must_use]
    pub const fn with_skip_rows(mut self, skip_rows: usize) -> Self {
        self.skip_rows = skip_rows;
        self
    }

    /// Sets the maximum number of rows to read.
    ///
    /// `None` means read all rows (default).
    #[must_use]
    pub const fn with_max_rows(mut self, max_rows: Option<usize>) -> Self {
        self.max_rows = max_rows;
        self
    }

    /// Sets the sheet index to read from Excel files (0-indexed).
    #[must_use]
    pub const fn with_sheet_index(mut self, index: usize) -> Self {
        self.sheet_index = Some(index);
        self
    }

    /// Sets the sheet name to read from Excel files.
    ///
    /// Takes precedence over `sheet_index` if both are set.
    #[must_use]
    pub fn with_sheet_name(mut self, name: impl Into<String>) -> Self {
        self.sheet_name = Some(name.into());
        self
    }

    /// Sets whether to infer data types for columns.
    ///
    /// Default is `true`.
    #[must_use]
    pub const fn with_infer_schema(mut self, infer: bool) -> Self {
        self.infer_schema = infer;
        self
    }

    /// Sets the number of rows to use for schema inference.
    ///
    /// `None` means use all rows. Default is `Some(1000)`.
    #[must_use]
    pub const fn with_infer_schema_length(mut self, length: Option<usize>) -> Self {
        self.infer_schema_length = length;
        self
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_default_options() {
        let opts = LoadOptions::default();
        assert_eq!(opts.delimiter, Delimiter::Auto);
        assert!(opts.has_header);
        assert_eq!(opts.skip_rows, 0);
        assert_eq!(opts.max_rows, None);
        assert!(opts.infer_schema);
    }

    #[test]
    fn test_builder_chain() {
        let opts = LoadOptions::new()
            .with_delimiter(Delimiter::Tab)
            .with_header(false)
            .with_skip_rows(2)
            .with_max_rows(Some(100))
            .with_sheet_name("Sheet2");

        assert_eq!(opts.delimiter, Delimiter::Tab);
        assert!(!opts.has_header);
        assert_eq!(opts.skip_rows, 2);
        assert_eq!(opts.max_rows, Some(100));
        assert_eq!(opts.sheet_name, Some("Sheet2".to_string()));
    }
}