Trait datafusion::logical_expr::ScalarUDFImpl

source ·

pub trait ScalarUDFImpl: Debug + Send + Sync {
    // Required methods
    fn as_any(&self) -> &(dyn Any + 'static);
    fn name(&self) -> &str;
    fn signature(&self) -> &Signature;
    fn return_type(
        &self,
        arg_types: &[DataType]
    ) -> Result<DataType, DataFusionError>;
    fn invoke(
        &self,
        _args: &[ColumnarValue]
    ) -> Result<ColumnarValue, DataFusionError>;

    // Provided methods
    fn return_type_from_exprs(
        &self,
        _args: &[Expr],
        _schema: &dyn ExprSchema,
        arg_types: &[DataType]
    ) -> Result<DataType, DataFusionError> { ... }
    fn invoke_no_args(
        &self,
        _number_rows: usize
    ) -> Result<ColumnarValue, DataFusionError> { ... }
    fn aliases(&self) -> &[String] { ... }
    fn monotonicity(&self) -> Result<Option<Vec<Option<bool>>>, DataFusionError> { ... }
    fn simplify(
        &self,
        args: Vec<Expr>,
        _info: &dyn SimplifyInfo
    ) -> Result<ExprSimplifyResult, DataFusionError> { ... }
    fn short_circuits(&self) -> bool { ... }
}

Expand description

Trait for implementing ScalarUDF.

This trait exposes the full API for implementing user defined functions and can be used to implement any function.

See advanced_udf.rs for a full example with complete implementation and ScalarUDF for other available options.

§Basic Example

#[derive(Debug)]
struct AddOne {
  signature: Signature
};

impl AddOne {
  fn new() -> Self {
    Self {
      signature: Signature::uniform(1, vec![DataType::Int32], Volatility::Immutable)
     }
  }
}

/// Implement the ScalarUDFImpl trait for AddOne
impl ScalarUDFImpl for AddOne {
   fn as_any(&self) -> &dyn Any { self }
   fn name(&self) -> &str { "add_one" }
   fn signature(&self) -> &Signature { &self.signature }
   fn return_type(&self, args: &[DataType]) -> Result<DataType> {
     if !matches!(args.get(0), Some(&DataType::Int32)) {
       return plan_err!("add_one only accepts Int32 arguments");
     }
     Ok(DataType::Int32)
   }
   // The actual implementation would add one to the argument
   fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> { unimplemented!() }
}

// Create a new ScalarUDF from the implementation
let add_one = ScalarUDF::from(AddOne::new());

// Call the function `add_one(col)`
let expr = add_one.call(vec![col("a")]);

Required Methods§

source

fn as_any(&self) -> &(dyn Any + 'static)

Returns this object as an Any trait object

source

fn name(&self) -> &str

Returns this function’s name

source

fn signature(&self) -> &Signature

Returns the function’s Signature for information about what input types are accepted and the function’s Volatility.

source

fn return_type( &self, arg_types: &[DataType] ) -> Result<DataType, DataFusionError>

What DataType will be returned by this function, given the types of the arguments.

§Notes

If you provide an implementation for Self::return_type_from_exprs, DataFusion will not call return_type (this function). In this case it is recommended to return DataFusionError::Internal.

source

fn invoke( &self, _args: &[ColumnarValue] ) -> Result<ColumnarValue, DataFusionError>

Invoke the function on args, returning the appropriate result

The function will be invoked passed with the slice of ColumnarValue (either scalar or array).

If the function does not take any arguments, please use invoke_no_args instead and return not_impl_err for this function.

§Performance

For the best performance, the implementations of invoke should handle the common case when one or more of their arguments are constant values (aka ColumnarValue::Scalar).

ColumnarValue::values_to_arrays can be used to convert the arguments to arrays, which will likely be simpler code, but be slower.

Provided Methods§

source

fn return_type_from_exprs( &self, _args: &[Expr], _schema: &dyn ExprSchema, arg_types: &[DataType] ) -> Result<DataType, DataFusionError>

What DataType will be returned by this function, given the arguments?

Note most UDFs should implement Self::return_type and not this function. The output type for most functions only depends on the types of their inputs (e.g. sqrt(f32) is always f32).

By default, this function calls Self::return_type with the types of each argument.

This method can be overridden for functions that return different types based on the values of their arguments.

For example, the following two function calls get the same argument types (something and a Utf8 string) but return different types based on the value of the second argument:

arrow_cast(x, 'Int16') –> Int16
arrow_cast(x, 'Float32') –> Float32

§Notes:

This function must consistently return the same type for the same logical input even if the input is simplified (e.g. it must return the same value for ('foo' | 'bar') as it does for (‘foobar’).

source

fn invoke_no_args( &self, _number_rows: usize ) -> Result<ColumnarValue, DataFusionError>

Invoke the function without args, instead the number of rows are provided, returning the appropriate result.

source

fn aliases(&self) -> &[String]

Returns any aliases (alternate names) for this function.

Aliases can be used to invoke the same function using different names. For example in some databases now() and current_timestamp() are aliases for the same function. This behavior can be obtained by returning current_timestamp as an alias for the now function.

Note: aliases should only include names other than Self::name. Defaults to [] (no aliases)

source

fn monotonicity(&self) -> Result<Option<Vec<Option<bool>>>, DataFusionError>

This function specifies monotonicity behaviors for User defined scalar functions.

source

fn simplify( &self, args: Vec<Expr>, _info: &dyn SimplifyInfo ) -> Result<ExprSimplifyResult, DataFusionError>

Optionally apply per-UDF simplification / rewrite rules.

This can be used to apply function specific simplification rules during optimization (e.g. arrow_cast –> Expr::Cast). The default implementation does nothing.

Note that DataFusion handles simplifying arguments and “constant folding” (replacing a function call with constant arguments such as my_add(1,2) --> 3 ). Thus, there is no need to implement such optimizations manually for specific UDFs.

§Arguments

‘args’: The arguments of the function
‘schema’: The schema of the function

§Returns

ExprSimplifyResult indicating the result of the simplification NOTE if the function cannot be simplified, the arguments MUST be returned unmodified

source

fn short_circuits(&self) -> bool

Returns true if some of this exprs subexpressions may not be evaluated and thus any side effects (like divide by zero) may not be encountered Setting this to true prevents certain optimizations such as common subexpression elimination

Trait datafusion::logical_expr::ScalarUDFImplCopy item path

§Basic Example

Required Methods§

fn as_any(&self) -> &(dyn Any + 'static)

fn name(&self) -> &str

fn signature(&self) -> &Signature

fn return_type( &self, arg_types: &[DataType] ) -> Result<DataType, DataFusionError>

§Notes

fn invoke( &self, _args: &[ColumnarValue] ) -> Result<ColumnarValue, DataFusionError>

§Performance

Provided Methods§

fn return_type_from_exprs( &self, _args: &[Expr], _schema: &dyn ExprSchema, arg_types: &[DataType] ) -> Result<DataType, DataFusionError>

§Notes:

fn invoke_no_args( &self, _number_rows: usize ) -> Result<ColumnarValue, DataFusionError>

fn aliases(&self) -> &[String]

fn monotonicity(&self) -> Result<Option<Vec<Option<bool>>>, DataFusionError>

fn simplify( &self, args: Vec<Expr>, _info: &dyn SimplifyInfo ) -> Result<ExprSimplifyResult, DataFusionError>

§Arguments

§Returns

fn short_circuits(&self) -> bool

Implementors§

impl ScalarUDFImpl for ArrowCastFunc

impl ScalarUDFImpl for ArrowTypeOfFunc

impl ScalarUDFImpl for CoalesceFunc

impl ScalarUDFImpl for GetFieldFunc

impl ScalarUDFImpl for NamedStructFunc

impl ScalarUDFImpl for NullIfFunc

impl ScalarUDFImpl for NVL2Func

impl ScalarUDFImpl for NVLFunc

impl ScalarUDFImpl for StructFunc

impl ScalarUDFImpl for DigestFunc

impl ScalarUDFImpl for Md5Func

impl ScalarUDFImpl for SHA224Func

impl ScalarUDFImpl for SHA256Func

impl ScalarUDFImpl for SHA384Func

impl ScalarUDFImpl for SHA512Func

impl ScalarUDFImpl for CurrentDateFunc

impl ScalarUDFImpl for CurrentTimeFunc

impl ScalarUDFImpl for DateBinFunc

impl ScalarUDFImpl for DatePartFunc

impl ScalarUDFImpl for DateTruncFunc

impl ScalarUDFImpl for FromUnixtimeFunc

impl ScalarUDFImpl for MakeDateFunc

impl ScalarUDFImpl for NowFunc

impl ScalarUDFImpl for ToCharFunc

impl ScalarUDFImpl for ToDateFunc

impl ScalarUDFImpl for ToTimestampFunc

impl ScalarUDFImpl for ToTimestampMicrosFunc

impl ScalarUDFImpl for ToTimestampMillisFunc

impl ScalarUDFImpl for ToTimestampNanosFunc

impl ScalarUDFImpl for ToTimestampSecondsFunc

impl ScalarUDFImpl for ToUnixtimeFunc

impl ScalarUDFImpl for DecodeFunc

impl ScalarUDFImpl for EncodeFunc

impl ScalarUDFImpl for AbsFunc

impl ScalarUDFImpl for CotFunc

impl ScalarUDFImpl for FactorialFunc

impl ScalarUDFImpl for GcdFunc

impl ScalarUDFImpl for IsZeroFunc

impl ScalarUDFImpl for LcmFunc

impl ScalarUDFImpl for LogFunc

impl ScalarUDFImpl for IsNanFunc

impl ScalarUDFImpl for NanvlFunc

impl ScalarUDFImpl for PiFunc

impl ScalarUDFImpl for PowerFunc

impl ScalarUDFImpl for RandomFunc

impl ScalarUDFImpl for RoundFunc

impl ScalarUDFImpl for TruncFunc

impl ScalarUDFImpl for RegexpLikeFunc

impl ScalarUDFImpl for RegexpMatchFunc

impl ScalarUDFImpl for RegexpReplaceFunc

impl ScalarUDFImpl for AsciiFunc

impl ScalarUDFImpl for BitLengthFunc

impl ScalarUDFImpl for BTrimFunc

impl ScalarUDFImpl for ChrFunc

impl ScalarUDFImpl for ConcatFunc

impl ScalarUDFImpl for ConcatWsFunc

impl ScalarUDFImpl for EndsWithFunc

impl ScalarUDFImpl for InitcapFunc

impl ScalarUDFImpl for LevenshteinFunc

Trait datafusion::logical_expr::ScalarUDFImpl