robin_sparkless/lib.rs
1//! Robin Sparkless - A Rust DataFrame library with PySpark-like API
2//!
3//! This library provides a PySpark-compatible API. The **root crate** is engine-agnostic:
4//! it depends on [robin-sparkless-core](https://docs.rs/robin-sparkless-core) (types, expression IR, config)
5//! and one backend—currently **robin-sparkless-polars**, which uses [Polars](https://www.pola.rs/)
6//! for execution. The public API exposes engine-agnostic expression types where possible.
7//!
8//! # Expression APIs
9//!
10//! - **ExprIr (engine-agnostic):** Use [`col`], [`lit_i64`], [`lit_str`], [`when`], [`gt`], [`eq`], etc.
11//! from the crate root (re-exported from `robin_sparkless_core`). These build an [`ExprIr`] tree.
12//! Use [`DataFrame::filter_expr_ir`], [`DataFrame::select_expr_ir`], [`DataFrame::with_column_expr_ir`],
13//! [`DataFrame::collect_rows`], and [`GroupedData::agg_expr_ir`] with `&ExprIr` / `&[ExprIr]`.
14//! Collect returns [`CollectedRows`] (JSON-like rows). Prefer this for new code and embeddings.
15//!
16//! - **Column / Expr (Polars-backed):** Use [`prelude`] or `robin_sparkless::functions::{col, lit_i64, ...}`
17//! for the full PySpark-like API that returns [`Column`] and uses Polars [`Expr`]. Use
18//! [`DataFrame::filter`], [`DataFrame::with_column`], [`DataFrame::select_exprs`], etc.
19//! with those types. Still supported for compatibility and advanced use.
20//!
21//! # Getting started and embedding
22//!
23//! For application code and embedding, use the [prelude]: `use robin_sparkless::prelude::*`.
24//! For a minimal FFI surface, use [prelude::embed]. For engine-agnostic expressions, use the
25//! root re-exports (`col`, `lit_i64`, `gt`, etc.) and the `*_expr_ir` / `collect_rows` methods.
26//!
27//! # Panics and errors
28//!
29//! Some functions panic when used with invalid or empty inputs (e.g. calling
30//! `when(cond).otherwise(val)` without `.then()`, or passing no columns to
31//! `format_string`, `elt`, `concat`, `coalesce`, or `named_struct` in Rust).
32//! In Rust, `create_map` and `array` return `Result` for empty input instead of
33//! panicking. From Python, empty columns for `coalesce`, `format_string`,
34//! `printf`, and `named_struct` raise `ValueError`. See the documentation for
35//! each function for details.
36//!
37//! # API stability
38//!
39//! While the crate is in the 0.x series, we follow [semver](https://semver.org/) but may introduce
40//! breaking changes in minor releases (e.g. 0.1 → 0.2) until 1.0. For behavioral caveats and
41//! intentional differences from PySpark, see the [repository documentation](https://github.com/eddiethedean/robin-sparkless/blob/main/docs/PYSPARK_DIFFERENCES.md).
42
43#![allow(clippy::collapsible_if)]
44#![allow(clippy::let_and_return)]
45
46pub mod config;
47pub mod dataframe;
48pub mod prelude;
49pub mod schema;
50pub mod session;
51pub mod traits;
52
53// Re-export backend modules (column, functions, etc.) for internal use and backward compat.
54pub use robin_sparkless_polars::{column, error, functions, type_coercion};
55
56pub use robin_sparkless_polars::functions::{SortOrder, *};
57/// Plan execution; use [`execute_plan`] for root session/DataFrame. Re-exports plan error types.
58pub mod plan {
59 pub use crate::execute_plan;
60 pub use robin_sparkless_polars::plan::{PlanError, PlanExprError};
61}
62// Engine-agnostic types and expression IR from core (no Polars in public API).
63pub use robin_sparkless_core::engine::CollectedRows;
64pub use robin_sparkless_core::expr::{
65 ExprIr, LiteralValue, WhenBuilder, WhenThenBuilder, alias, and_, between, call, col, count, eq,
66 ge, gt, is_in, is_null, le, lit_bool, lit_f64, lit_i32, lit_i64, lit_null, lit_str, lt, max,
67 mean, min, ne, not_, or_, sum, when,
68};
69pub use robin_sparkless_core::{DataType, EngineError, StructField, StructType};
70pub use robin_sparkless_polars::{
71 Column, Expr, PolarsError, RustUdf, StructTypePolarsExt, UdfRegistry, broadcast, expression,
72 schema_from_json,
73};
74
75// Root-owned entry-point types (delegate to robin-sparkless-polars).
76pub use dataframe::{
77 CubeRollupData, DataFrame, DataFrameNa, DataFrameStat, DataFrameWriter, GroupedData, JoinType,
78 PivotedGroupedData, SaveMode, SelectItem, WriteFormat, WriteMode,
79};
80pub use session::{DataFrameReader, SparkSession, SparkSessionBuilder};
81
82// Root-owned traits (work with root DataFrame/SparkSession); plan re-export.
83pub use robin_sparkless_polars::plan::{PlanError, PlanExprError};
84pub use traits::{FromRobinDf, IntoRobinDf};
85
86/// Execute a logical plan; returns root-owned [`DataFrame`].
87pub fn execute_plan(
88 session: &SparkSession,
89 data: Vec<Vec<serde_json::Value>>,
90 schema: Vec<(String, String)>,
91 plan: &[serde_json::Value],
92) -> Result<DataFrame, PlanError> {
93 robin_sparkless_polars::plan::execute_plan(&session.0, data, schema, plan).map(DataFrame)
94}
95
96pub use config::SparklessConfig;
97
98/// Convert PolarsError to EngineError (for APIs that still return PolarsError).
99pub fn to_engine_error(e: PolarsError) -> EngineError {
100 robin_sparkless_polars::polars_to_core_error(e)
101}
102
103#[cfg(feature = "sql")]
104pub mod sql {
105 //! SQL parsing and execution; returns root-owned DataFrame.
106 use crate::dataframe::DataFrame;
107 use crate::session::SparkSession;
108 use robin_sparkless_polars::PolarsError;
109
110 pub use robin_sparkless_polars::sql::{Statement, execute_sql, parse_sql};
111
112 /// Execute SQL and return root-owned DataFrame.
113 pub fn execute_sql_root(session: &SparkSession, query: &str) -> Result<DataFrame, PolarsError> {
114 robin_sparkless_polars::sql::execute_sql(&session.0, query).map(DataFrame)
115 }
116}
117
118#[cfg(feature = "delta")]
119pub mod delta {
120 //! Delta Lake read/write; returns root-owned DataFrame where applicable.
121 use crate::dataframe::DataFrame;
122 use robin_sparkless_polars::PolarsError;
123 use std::path::Path;
124
125 pub use robin_sparkless_polars::delta::{read_delta, read_delta_with_version, write_delta};
126
127 /// Read Delta table; returns root-owned DataFrame.
128 pub fn read_delta_root(
129 path: impl AsRef<Path>,
130 case_sensitive: bool,
131 ) -> Result<DataFrame, PolarsError> {
132 robin_sparkless_polars::delta::read_delta(path, case_sensitive).map(DataFrame)
133 }
134
135 /// Read Delta table at version; returns root-owned DataFrame.
136 pub fn read_delta_with_version_root(
137 path: impl AsRef<Path>,
138 version: Option<i64>,
139 case_sensitive: bool,
140 ) -> Result<DataFrame, PolarsError> {
141 robin_sparkless_polars::delta::read_delta_with_version(path, version, case_sensitive)
142 .map(DataFrame)
143 }
144}