datafusion_spark/lib.rs
1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements. See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership. The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License. You may obtain a copy of the License at
8//
9// http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied. See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18#![doc(
19 html_logo_url = "https://raw.githubusercontent.com/apache/datafusion/19fe44cf2f30cbdd63d4a4f52c74055163c6cc38/docs/logos/standalone_logo/logo_original.svg",
20 html_favicon_url = "https://raw.githubusercontent.com/apache/datafusion/19fe44cf2f30cbdd63d4a4f52c74055163c6cc38/docs/logos/standalone_logo/logo_original.svg"
21)]
22#![cfg_attr(docsrs, feature(doc_auto_cfg))]
23// Make cheap clones clear: https://github.com/apache/datafusion/issues/11143
24#![deny(clippy::clone_on_ref_ptr)]
25
26//! Spark Expression packages for [DataFusion].
27//!
28//! This crate contains a collection of various Spark function packages for DataFusion,
29//! implemented using the extension API.
30//!
31//! [DataFusion]: https://crates.io/crates/datafusion
32//!
33//!
34//! # Available Function Packages
35//! See the list of [modules](#modules) in this crate for available packages.
36//!
37//! # Example: using all function packages
38//!
39//! You can register all the functions in all packages using the [`register_all`]
40//! function as shown below.
41//!
42//! ```
43//! # use datafusion_execution::FunctionRegistry;
44//! # use datafusion_expr::{ScalarUDF, AggregateUDF, WindowUDF};
45//! # use datafusion_expr::planner::ExprPlanner;
46//! # use datafusion_common::Result;
47//! # use std::collections::HashSet;
48//! # use std::sync::Arc;
49//! # // Note: We can't use a real SessionContext here because the
50//! # // `datafusion_spark` crate has no dependence on the DataFusion crate
51//! # // thus use a dummy SessionContext that has enough of the implementation
52//! # struct SessionContext {}
53//! # impl FunctionRegistry for SessionContext {
54//! # fn register_udf(&mut self, _udf: Arc<ScalarUDF>) -> Result<Option<Arc<ScalarUDF>>> { Ok (None) }
55//! # fn udfs(&self) -> HashSet<String> { unimplemented!() }
56//! # fn udf(&self, _name: &str) -> Result<Arc<ScalarUDF>> { unimplemented!() }
57//! # fn udaf(&self, name: &str) -> Result<Arc<AggregateUDF>> {unimplemented!() }
58//! # fn udwf(&self, name: &str) -> Result<Arc<WindowUDF>> { unimplemented!() }
59//! # fn expr_planners(&self) -> Vec<Arc<dyn ExprPlanner>> { unimplemented!() }
60//! # }
61//! # impl SessionContext {
62//! # fn new() -> Self { SessionContext {} }
63//! # async fn sql(&mut self, _query: &str) -> Result<()> { Ok(()) }
64//! # }
65//! #
66//! # async fn stub() -> Result<()> {
67//! // Create a new session context
68//! let mut ctx = SessionContext::new();
69//! // register all spark functions with the context
70//! datafusion_spark::register_all(&mut ctx)?;
71//! // run a query. Note the `sha2` function is now available which
72//! // has Spark semantics
73//! let df = ctx.sql("SELECT sha2('The input String', 256)").await?;
74//! # Ok(())
75//! # }
76//! ```
77//!
78//! # Example: calling a specific function in Rust
79//!
80//! Each package also exports an `expr_fn` submodule that create [`Expr`]s for
81//! invoking functions via rust using a fluent style. For example, to invoke the
82//! `sha2` function, you can use the following code:
83//!
84//! ```rust
85//! # use datafusion_expr::{col, lit};
86//! use datafusion_spark::expr_fn::sha2;
87//! // Create the expression `sha2(my_data, 256)`
88//! let expr = sha2(col("my_data"), lit(256));
89//!```
90//!
91//![`Expr`]: datafusion_expr::Expr
92
93pub mod function;
94
95use datafusion_catalog::TableFunction;
96use datafusion_common::Result;
97use datafusion_execution::FunctionRegistry;
98use datafusion_expr::{AggregateUDF, ScalarUDF, WindowUDF};
99use log::debug;
100use std::sync::Arc;
101
102/// Fluent-style API for creating `Expr`s
103#[allow(unused)]
104pub mod expr_fn {
105 pub use super::function::aggregate::expr_fn::*;
106 pub use super::function::array::expr_fn::*;
107 pub use super::function::bitwise::expr_fn::*;
108 pub use super::function::collection::expr_fn::*;
109 pub use super::function::conditional::expr_fn::*;
110 pub use super::function::conversion::expr_fn::*;
111 pub use super::function::csv::expr_fn::*;
112 pub use super::function::datetime::expr_fn::*;
113 pub use super::function::generator::expr_fn::*;
114 pub use super::function::hash::expr_fn::*;
115 pub use super::function::json::expr_fn::*;
116 pub use super::function::lambda::expr_fn::*;
117 pub use super::function::map::expr_fn::*;
118 pub use super::function::math::expr_fn::*;
119 pub use super::function::misc::expr_fn::*;
120 pub use super::function::predicate::expr_fn::*;
121 pub use super::function::r#struct::expr_fn::*;
122 pub use super::function::string::expr_fn::*;
123 pub use super::function::table::expr_fn::*;
124 pub use super::function::url::expr_fn::*;
125 pub use super::function::window::expr_fn::*;
126 pub use super::function::xml::expr_fn::*;
127}
128
129/// Returns all default scalar functions
130pub fn all_default_scalar_functions() -> Vec<Arc<ScalarUDF>> {
131 function::array::functions()
132 .into_iter()
133 .chain(function::bitwise::functions())
134 .chain(function::collection::functions())
135 .chain(function::conditional::functions())
136 .chain(function::conversion::functions())
137 .chain(function::csv::functions())
138 .chain(function::datetime::functions())
139 .chain(function::generator::functions())
140 .chain(function::hash::functions())
141 .chain(function::json::functions())
142 .chain(function::lambda::functions())
143 .chain(function::map::functions())
144 .chain(function::math::functions())
145 .chain(function::misc::functions())
146 .chain(function::predicate::functions())
147 .chain(function::string::functions())
148 .chain(function::r#struct::functions())
149 .chain(function::url::functions())
150 .chain(function::xml::functions())
151 .collect::<Vec<_>>()
152}
153
154/// Returns all default aggregate functions
155pub fn all_default_aggregate_functions() -> Vec<Arc<AggregateUDF>> {
156 function::aggregate::functions()
157}
158
159/// Returns all default window functions
160pub fn all_default_window_functions() -> Vec<Arc<WindowUDF>> {
161 function::window::functions()
162}
163
164/// Returns all default table functions
165pub fn all_default_table_functions() -> Vec<Arc<TableFunction>> {
166 function::table::functions()
167}
168
169/// Registers all enabled packages with a [`FunctionRegistry`]
170pub fn register_all(registry: &mut dyn FunctionRegistry) -> Result<()> {
171 let scalar_functions: Vec<Arc<ScalarUDF>> = all_default_scalar_functions();
172 scalar_functions.into_iter().try_for_each(|udf| {
173 let existing_udf = registry.register_udf(udf)?;
174 if let Some(existing_udf) = existing_udf {
175 debug!("Overwrite existing UDF: {}", existing_udf.name());
176 }
177 Ok(()) as Result<()>
178 })?;
179
180 let aggregate_functions: Vec<Arc<AggregateUDF>> = all_default_aggregate_functions();
181 aggregate_functions.into_iter().try_for_each(|udf| {
182 let existing_udaf = registry.register_udaf(udf)?;
183 if let Some(existing_udaf) = existing_udaf {
184 debug!("Overwrite existing UDAF: {}", existing_udaf.name());
185 }
186 Ok(()) as Result<()>
187 })?;
188
189 let window_functions: Vec<Arc<WindowUDF>> = all_default_window_functions();
190 window_functions.into_iter().try_for_each(|udf| {
191 let existing_udwf = registry.register_udwf(udf)?;
192 if let Some(existing_udwf) = existing_udwf {
193 debug!("Overwrite existing UDWF: {}", existing_udwf.name());
194 }
195 Ok(()) as Result<()>
196 })?;
197
198 Ok(())
199}