1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364
//! # Builders to create expressions and schemas
//!
//! This module contains utilities for programmatically creating expressions, schemas,
//! and types.
//!
//! ## Overview
//!
//! To create expressions you first create a schema, then an expression builder, and
//! then add the expressions you want, and finally build a message. Here is an example
//! creating an ExtendedExpression message with a single expression `x+3`
//!
//! ```
//! use substrait_expr::builder::schema::SchemaBuildersExt;
//! use substrait_expr::helpers::schema::SchemaInfo;
//! use substrait_expr::helpers::types;
//! use substrait_expr::{
//! builder::{BuilderParams, ExpressionsBuilder},
//! functions::functions_arithmetic::FunctionsArithmeticExt,
//! helpers::literals::literal,
//! };
//!
//! let schema = SchemaInfo::new_full()
//! .field("score", types::i32(false))
//! .nested("location", false, |builder| {
//! builder
//! .field("x", types::fp32(false))
//! .field("y", types::fp64(true))
//! })
//! .build();
//!
//! let builder = ExpressionsBuilder::new(schema, BuilderParams::default());
//!
//! builder
//! .add_expression(
//! "sum",
//! builder
//! .functions()
//! .add(
//! builder.fields().resolve_by_name("location.x").unwrap(),
//! literal(3.0_f32),
//! )
//! .build()
//! .unwrap(),
//! )
//! .unwrap();
//!
//! let expressions = builder.build();
//! ```
//!
//! ## Creating a Schema
//!
//! Before you can create any expressions you will need a schema. There are four
//! different kinds of schemas, unknown, names only, types only, and full. Which one
//! you create will depend on how much information you know about the input fields.
//! For more information see the docs on [schema resolution](crate#schema-resolution).
//!
//! Creating an empty schema is simple.
//!
//! ```
//! # use substrait_expr::helpers::schema::EmptySchema;
//! # use substrait_expr::helpers::schema::SchemaInfo;
//!
//! let schema = SchemaInfo::Empty(EmptySchema::default());
//! ```
//!
//! The rest of the schema types have builders.
//!
//! ```
//! use substrait_expr::builder::schema::SchemaBuildersExt;
//! use substrait_expr::helpers::schema::SchemaInfo;
//! use substrait_expr::helpers::types;
//!
//! // Constructing a schema for
//! // {
//! // "score": fp32?,
//! // "location": {
//! // "x": fp64,
//! // "y": fp64
//! // }
//! // }
//!
//! // Names only
//! let schema = SchemaInfo::new_names()
//! .field("score")
//! .nested("location", |builder| builder.field("x").field("y"));
//!
//! // Types only
//! let schema = SchemaInfo::new_types()
//! .field(types::fp32(true))
//! .nested(false, |builder| {
//! builder.field(types::fp64(false)).field(types::fp64(false))
//! })
//! .build();
//!
//! // Full schema
//! // TODO
//! ```
//!
//! If you need to use *parameterized types* or *user defined types* then you
//! can use the schema builder to create those as well. This works because
//! every schema builder also has a type registry that gets passed through to
//! the created schema.
//!
//! ```
//! use substrait_expr::builder::schema::SchemaBuildersExt;
//! use substrait_expr::helpers::schema::SchemaInfo;
//!
//! let builder = SchemaInfo::new_types();
//! let complex_number = builder
//! .types()
//! .user_defined("https://imaginary.com/types", "complex-number");
//! let schema = builder.field(complex_number.with_nullability(true)).build();
//! ```
//!
//! There are also utility macros for creating schemas. These are mainly
//! used in unit tests since they require you to know the fields in the schema at
//! compile time.
//!
//! ```
//! use substrait_expr::macros::names_schema;
//!
//! // Names only
//! let schema = names_schema!({
//! score: {},
//! location: {
//! x: {},
//! y: {}
//! }
//! });
//!
//! // Types only
//! // TODO
//!
//! // Full
//! // TODO
//! ```
//!
//! ## Creating Expressions
//!
//! Once you have a schema you can create an expression builder and start creating
//! expressions. One important thing to note is that expressions in Substrait
//! cannot stand alone. They must either be part of a Plan or part of an
//! ExtendedExpression. An ExtendedExpression is a collection of expressions plus
//! schema/type/function metadata. This is what the expression builder creates.
//!
//! There is an example above covering the entire process.
//!
//! ### Referencing fields
//!
//! To reference a field in the schema you can use
//! [crate::builder::ExpressionsBuilder::fields].
//!
//! You can reference fields by name
//!
//! ```
//! # use substrait_expr::helpers::schema::EmptySchema;
//! # use substrait_expr::helpers::schema::SchemaInfo;
//! # use substrait_expr::builder::{BuilderParams, ExpressionsBuilder};
//!
//! # let schema = SchemaInfo::Empty(EmptySchema::default());
//! # let builder = ExpressionsBuilder::new(schema, BuilderParams::new_loose());
//! let reference = builder.fields().resolve_by_name("location.x").unwrap();
//! ```
//!
//! The syntax for referencing fields by name is fairly simplistic. The `.`
//! character will choose a subfield. To choose a list item you can use `[]`.
//!
//! ```
//! # use substrait_expr::helpers::schema::EmptySchema;
//! # use substrait_expr::helpers::schema::SchemaInfo;
//! # use substrait_expr::builder::{BuilderParams, ExpressionsBuilder};
//!
//! # let schema = SchemaInfo::Empty(EmptySchema::default());
//! # let builder = ExpressionsBuilder::new(schema, BuilderParams::new_loose());
//! let list_item = builder.fields().resolve_by_name("genres[3]").unwrap();
//! ```
//!
//! If you have a map column and the map-key is string then you can also
//! reference it with `[]`.
//!
//! ```
//! # use substrait_expr::helpers::schema::EmptySchema;
//! # use substrait_expr::helpers::schema::SchemaInfo;
//! # use substrait_expr::builder::{BuilderParams, ExpressionsBuilder};
//!
//! # let schema = SchemaInfo::Empty(EmptySchema::default());
//! # let builder = ExpressionsBuilder::new(schema, BuilderParams::new_loose());
//! let map_item = builder.fields().resolve_by_name("metadata[size]").unwrap();
//! ```
use std::cell::RefCell;
use substrait::proto::expression_reference::ExprType;
use substrait::proto::{Expression, ExpressionReference, ExtendedExpression};
use crate::error::{Result, SubstraitExprError};
use crate::helpers::expr::ExpressionExt;
use crate::helpers::schema::SchemaInfo;
use crate::helpers::types::TypeExt;
use self::functions::FunctionsBuilder;
use self::schema::RefBuilder;
pub mod functions;
pub mod schema;
pub mod types;
pub struct BuilderParams {
pub allow_late_name_lookup: bool,
pub allow_loose_types: bool,
pub allow_unknown_types: bool,
}
impl Default for BuilderParams {
fn default() -> Self {
Self {
allow_late_name_lookup: false,
allow_loose_types: false,
allow_unknown_types: false,
}
}
}
impl BuilderParams {
pub fn new_loose() -> Self {
Self {
allow_late_name_lookup: true,
allow_loose_types: true,
allow_unknown_types: true,
}
}
}
struct NamedExpression {
expr: Expression,
output_names: Vec<String>,
}
impl NamedExpression {
fn try_new(expr: Expression, output_names: Vec<String>, schema: &SchemaInfo) -> Result<Self> {
let expr_type = expr.output_type(schema)?;
let num_types = expr_type.num_types();
let num_names = output_names.len() as u32;
if num_types != num_names {
Err(SubstraitExprError::InvalidInput(format!(
"An expression was given that returns {} types but only {} names were given",
num_types, num_names
)))
} else {
Ok(Self { expr, output_names })
}
}
}
/// A builder object to create expressions
///
/// Note that the output of this builder is not an "Expression" message. Expression is not
/// a top-level message in the Substrait specification because an expression
/// references a schema and various extension metadata. Instead, the top level message is
/// ExtendedExpression, which holds a collection of expressions. If you only need to serialize
/// a single expression then you can create an ExtendedExpression that contains a single expression.
pub struct ExpressionsBuilder {
schema: SchemaInfo,
params: BuilderParams,
expressions: RefCell<Vec<NamedExpression>>,
}
pub trait IntoExprOutputNames {
fn into_names(self) -> Vec<String>;
}
impl<'a> IntoExprOutputNames for &'a str {
fn into_names(self) -> Vec<String> {
vec![self.to_string()]
}
}
impl IntoExprOutputNames for String {
fn into_names(self) -> Vec<String> {
vec![self]
}
}
impl IntoExprOutputNames for Vec<String> {
fn into_names(self) -> Vec<String> {
self
}
}
impl ExpressionsBuilder {
pub fn new(schema: SchemaInfo, params: BuilderParams) -> Self {
Self {
schema,
params,
expressions: RefCell::new(Vec::new()),
}
}
pub fn fields(&self) -> RefBuilder {
RefBuilder::new(&self.schema, &self.params, self.functions())
}
pub fn functions(&self) -> FunctionsBuilder {
FunctionsBuilder::new(&self.schema)
}
pub fn add_expression(
&self,
output_names: impl IntoExprOutputNames,
expression: Expression,
) -> Result<&Self> {
let mut expressions = self.expressions.borrow_mut();
expressions.push(NamedExpression::try_new(
expression,
output_names.into_names(),
&self.schema,
)?);
Ok(self)
}
pub fn build(self) -> ExtendedExpression {
let (extension_uris, extensions) = self.schema.extensions_registry().to_substrait();
let referred_expr = self
.expressions
.into_inner()
.into_iter()
.map(|named_expr| ExpressionReference {
output_names: named_expr.output_names,
expr_type: Some(ExprType::Expression(named_expr.expr)),
})
.collect::<Vec<_>>();
ExtendedExpression {
version: Some(substrait::version::version_with_producer("substrait-expr")),
extension_uris,
extensions,
advanced_extensions: None,
expected_type_urls: Vec::new(),
base_schema: Some(self.schema.to_substrait()),
referred_expr,
}
}
}
#[cfg(test)]
mod tests {
use substrait_expr_macros::names_schema;
use super::*;
use crate as substrait_expr;
#[test]
fn prevent_unknown_types_via_unknown_field_ref() {
let params = BuilderParams {
allow_unknown_types: false,
..Default::default()
};
let schema = names_schema!({
x: {}
});
let builder = ExpressionsBuilder::new(schema, params);
assert!(builder.fields().resolve_by_name("x").is_err());
assert!(builder.fields().field_builder().field("x").is_err());
}
}