Skip to main content

robin_sparkless/
lib.rs

1//! Robin Sparkless - A Rust DataFrame library with PySpark-like API
2//!
3//! This library provides a PySpark-compatible API. The **root crate** is engine-agnostic:
4//! it depends on [robin-sparkless-core](https://docs.rs/robin-sparkless-core) (types, expression IR, config)
5//! and one backend—currently **robin-sparkless-polars**, which uses [Polars](https://www.pola.rs/)
6//! for execution. The public API exposes engine-agnostic expression types where possible.
7//!
8//! # Expression APIs
9//!
10//! - **ExprIr (engine-agnostic):** Use [`col`], [`lit_i64`], [`lit_str`], [`when`], [`gt`], [`eq`], etc.
11//!   from the crate root (re-exported from `robin_sparkless_core`). These build an [`ExprIr`] tree.
12//!   Use [`DataFrame::filter_expr_ir`], [`DataFrame::select_expr_ir`], [`DataFrame::with_column_expr_ir`],
13//!   [`DataFrame::collect_rows`], and [`GroupedData::agg_expr_ir`] with `&ExprIr` / `&[ExprIr]`.
14//!   Collect returns [`CollectedRows`] (JSON-like rows). Prefer this for new code and embeddings.
15//!
16//! - **Column / Expr (Polars-backed):** Use [`prelude`] or `robin_sparkless::functions::{col, lit_i64, ...}`
17//!   for the full PySpark-like API that returns [`Column`] and uses Polars [`Expr`]. Use
18//!   [`DataFrame::filter`], [`DataFrame::with_column`], [`DataFrame::select_exprs`], etc.
19//!   with those types. Still supported for compatibility and advanced use.
20//!
21//! # Getting started and embedding
22//!
23//! For application code and embedding, use the [prelude]: `use robin_sparkless::prelude::*`.
24//! For a minimal FFI surface, use [prelude::embed]. For engine-agnostic expressions, use the
25//! root re-exports (`col`, `lit_i64`, `gt`, etc.) and the `*_expr_ir` / `collect_rows` methods.
26//!
27//! # Panics and errors
28//!
29//! Some functions panic when used with invalid or empty inputs (e.g. calling
30//! `when(cond).otherwise(val)` without `.then()`, or passing no columns to
31//! `format_string`, `elt`, `concat`, `coalesce`, or `named_struct` in Rust).
32//! In Rust, `create_map` and `array` return `Result` for empty input instead of
33//! panicking. From Python, empty columns for `coalesce`, `format_string`,
34//! `printf`, and `named_struct` raise `ValueError`. See the documentation for
35//! each function for details.
36//!
37//! # API stability
38//!
39//! While the crate is in the 0.x series, we follow [semver](https://semver.org/) but may introduce
40//! breaking changes in minor releases (e.g. 0.1 → 0.2) until 1.0. For behavioral caveats and
41//! intentional differences from PySpark, see the [repository documentation](https://github.com/eddiethedean/robin-sparkless/blob/main/docs/PYSPARK_DIFFERENCES.md).
42
43#![allow(clippy::collapsible_if)]
44#![allow(clippy::let_and_return)]
45
46pub mod config;
47pub mod dataframe;
48pub mod prelude;
49pub mod schema;
50pub mod session;
51pub mod traits;
52
53// Re-export backend modules (column, functions, etc.) for internal use and backward compat.
54pub use robin_sparkless_polars::{column, error, functions, type_coercion};
55
56pub use robin_sparkless_polars::functions::{SortOrder, *};
57/// Plan execution; use [`execute_plan`] for root session/DataFrame. Re-exports plan error types.
58pub mod plan {
59    pub use crate::execute_plan;
60    pub use robin_sparkless_polars::plan::{PlanError, PlanExprError};
61}
62// Engine-agnostic types and expression IR from core (no Polars in public API).
63pub use robin_sparkless_core::engine::CollectedRows;
64pub use robin_sparkless_core::expr::{
65    ExprIr, LiteralValue, WhenBuilder, WhenThenBuilder, alias, and_, between, call, col, count, eq,
66    ge, gt, is_in, is_null, le, lit_bool, lit_f64, lit_i32, lit_i64, lit_null, lit_str, lt, max,
67    mean, min, ne, not_, or_, sum, when,
68};
69pub use robin_sparkless_core::{DataType, EngineError, StructField, StructType};
70pub use robin_sparkless_polars::{
71    Column, Expr, PolarsError, RustUdf, StructTypePolarsExt, UdfRegistry, broadcast, expression,
72    schema_from_json,
73};
74
75// Root-owned entry-point types (delegate to robin-sparkless-polars).
76pub use dataframe::{
77    CubeRollupData, DataFrame, DataFrameNa, DataFrameStat, DataFrameWriter, GroupedData, JoinType,
78    PivotedGroupedData, SaveMode, SelectItem, WriteFormat, WriteMode,
79};
80pub use session::{DataFrameReader, SparkSession, SparkSessionBuilder};
81
82// Root-owned traits (work with root DataFrame/SparkSession); plan re-export.
83pub use robin_sparkless_polars::plan::{PlanError, PlanExprError};
84pub use traits::{FromRobinDf, IntoRobinDf};
85
86/// Execute a logical plan; returns root-owned [`DataFrame`].
87pub fn execute_plan(
88    session: &SparkSession,
89    data: Vec<Vec<serde_json::Value>>,
90    schema: Vec<(String, String)>,
91    plan: &[serde_json::Value],
92) -> Result<DataFrame, PlanError> {
93    robin_sparkless_polars::plan::execute_plan(&session.0, data, schema, plan).map(DataFrame)
94}
95
96pub use config::SparklessConfig;
97
98/// Convert PolarsError to EngineError (for APIs that still return PolarsError).
99pub fn to_engine_error(e: PolarsError) -> EngineError {
100    robin_sparkless_polars::polars_to_core_error(e)
101}
102
103#[cfg(feature = "sql")]
104pub mod sql {
105    //! SQL parsing and execution; returns root-owned DataFrame.
106    use crate::dataframe::DataFrame;
107    use crate::session::SparkSession;
108    use robin_sparkless_polars::PolarsError;
109
110    pub use robin_sparkless_polars::sql::{Statement, execute_sql, parse_sql};
111
112    /// Execute SQL and return root-owned DataFrame.
113    pub fn execute_sql_root(session: &SparkSession, query: &str) -> Result<DataFrame, PolarsError> {
114        robin_sparkless_polars::sql::execute_sql(&session.0, query).map(DataFrame)
115    }
116}
117
118#[cfg(feature = "delta")]
119pub mod delta {
120    //! Delta Lake read/write; returns root-owned DataFrame where applicable.
121    use crate::dataframe::DataFrame;
122    use robin_sparkless_polars::PolarsError;
123    use std::path::Path;
124
125    pub use robin_sparkless_polars::delta::{read_delta, read_delta_with_version, write_delta};
126
127    /// Read Delta table; returns root-owned DataFrame.
128    pub fn read_delta_root(
129        path: impl AsRef<Path>,
130        case_sensitive: bool,
131    ) -> Result<DataFrame, PolarsError> {
132        robin_sparkless_polars::delta::read_delta(path, case_sensitive).map(DataFrame)
133    }
134
135    /// Read Delta table at version; returns root-owned DataFrame.
136    pub fn read_delta_with_version_root(
137        path: impl AsRef<Path>,
138        version: Option<i64>,
139        case_sensitive: bool,
140    ) -> Result<DataFrame, PolarsError> {
141        robin_sparkless_polars::delta::read_delta_with_version(path, version, case_sensitive)
142            .map(DataFrame)
143    }
144}