use std::path::PathBuf;
use polars_core::prelude::*;
use polars_io::csv::{CsvEncoding, NullValues};
#[cfg(feature = "parquet")]
use polars_io::parquet::ParquetCompression;
use polars_io::RowCount;
#[cfg(feature = "dynamic_groupby")]
use polars_time::{DynamicGroupOptions, RollingGroupOptions};
#[cfg(feature = "serde")]
use serde::{Deserialize, Serialize};
use crate::prelude::Expr;
pub type FileCount = u32;
#[derive(Clone, Debug, PartialEq, Eq)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
pub struct CsvParserOptions {
pub delimiter: u8,
pub comment_char: Option<u8>,
pub quote_char: Option<u8>,
pub eol_char: u8,
pub has_header: bool,
pub skip_rows: usize,
pub n_rows: Option<usize>,
pub with_columns: Option<Arc<Vec<String>>>,
pub low_memory: bool,
pub ignore_errors: bool,
pub cache: bool,
pub null_values: Option<NullValues>,
pub rechunk: bool,
pub encoding: CsvEncoding,
pub row_count: Option<RowCount>,
pub parse_dates: bool,
pub file_counter: FileCount,
}
#[cfg(feature = "parquet")]
#[derive(Clone, Debug, PartialEq, Eq)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
pub struct ParquetOptions {
pub n_rows: Option<usize>,
pub with_columns: Option<Arc<Vec<String>>>,
pub cache: bool,
pub parallel: polars_io::parquet::ParallelStrategy,
pub rechunk: bool,
pub row_count: Option<RowCount>,
pub file_counter: FileCount,
pub low_memory: bool,
}
#[cfg(feature = "parquet")]
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
pub struct ParquetWriteOptions {
pub compression: ParquetCompression,
pub statistics: bool,
pub row_group_size: Option<usize>,
pub data_pagesize_limit: Option<usize>,
pub maintain_order: bool,
}
#[derive(Clone, Debug)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
pub struct IpcScanOptions {
pub n_rows: Option<usize>,
pub with_columns: Option<Arc<Vec<String>>>,
pub cache: bool,
pub row_count: Option<RowCount>,
pub rechunk: bool,
pub memmap: bool,
}
#[derive(Clone, Debug, PartialEq, Eq)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
pub struct IpcScanOptionsInner {
pub n_rows: Option<usize>,
pub with_columns: Option<Arc<Vec<String>>>,
pub cache: bool,
pub row_count: Option<RowCount>,
pub rechunk: bool,
pub file_counter: FileCount,
pub memmap: bool,
}
impl From<IpcScanOptions> for IpcScanOptionsInner {
fn from(options: IpcScanOptions) -> Self {
Self {
n_rows: options.n_rows,
with_columns: options.with_columns,
cache: options.cache,
row_count: options.row_count,
rechunk: options.rechunk,
file_counter: Default::default(),
memmap: options.memmap,
}
}
}
#[derive(Clone, Debug, Copy, Default, Eq, PartialEq)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
pub struct UnionOptions {
pub slice: bool,
pub slice_offset: i64,
pub slice_len: IdxSize,
pub parallel: bool,
pub rows: (Option<usize>, usize),
pub from_partitioned_ds: bool,
}
#[derive(Clone, Debug, PartialEq, Eq)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
pub struct GroupbyOptions {
#[cfg(feature = "dynamic_groupby")]
pub dynamic: Option<DynamicGroupOptions>,
#[cfg(feature = "dynamic_groupby")]
pub rolling: Option<RollingGroupOptions>,
pub slice: Option<(i64, usize)>,
}
#[derive(Clone, Debug, Eq, PartialEq)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
pub struct DistinctOptions {
pub subset: Option<Arc<Vec<String>>>,
pub maintain_order: bool,
pub keep_strategy: UniqueKeepStrategy,
}
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
pub enum ApplyOptions {
ApplyGroups,
ApplyList,
ApplyFlat,
}
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
pub struct WindowOptions {
pub explode: bool,
}
#[derive(Clone, Copy, PartialEq, Eq, Debug)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
pub struct FunctionOptions {
pub collect_groups: ApplyOptions,
pub input_wildcard_expansion: bool,
pub auto_explode: bool,
#[cfg_attr(feature = "serde", serde(skip_deserializing))]
pub fmt_str: &'static str,
pub cast_to_supertypes: bool,
pub allow_rename: bool,
}
impl Default for FunctionOptions {
fn default() -> Self {
FunctionOptions {
collect_groups: ApplyOptions::ApplyGroups,
input_wildcard_expansion: false,
auto_explode: false,
fmt_str: "",
cast_to_supertypes: false,
allow_rename: false,
}
}
}
#[derive(Clone, Copy, PartialEq, Eq, Debug)]
pub struct LogicalPlanUdfOptions {
pub predicate_pd: bool,
pub projection_pd: bool,
pub fmt_str: &'static str,
}
#[derive(Clone, PartialEq, Eq, Debug)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
pub struct SortArguments {
pub reverse: Vec<bool>,
pub nulls_last: bool,
pub slice: Option<(i64, usize)>,
}
#[derive(Clone, PartialEq, Eq, Debug, Default)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[cfg(feature = "python")]
pub struct PythonOptions {
pub scan_fn: Vec<u8>,
pub schema: SchemaRef,
pub output_schema: Option<SchemaRef>,
pub with_columns: Option<Arc<Vec<String>>>,
pub pyarrow: bool,
pub predicate: Option<String>,
}
#[derive(Clone, PartialEq, Eq, Debug, Default)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
pub struct AnonymousScanOptions {
pub schema: SchemaRef,
pub output_schema: Option<SchemaRef>,
pub skip_rows: Option<usize>,
pub n_rows: Option<usize>,
pub with_columns: Option<Arc<Vec<String>>>,
pub predicate: Option<Expr>,
pub fmt_str: &'static str,
}
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[derive(Clone, Debug)]
pub struct FileSinkOptions {
pub path: Arc<PathBuf>,
pub file_type: FileType,
}
#[cfg(feature = "parquet")]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[derive(Clone, Debug)]
pub enum FileType {
Parquet(ParquetWriteOptions),
}
#[cfg(not(feature = "parquet"))]
pub type FileType = ();