1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements.  See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership.  The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License.  You may obtain a copy of the License at
//
//   http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied.  See the License for the
// specific language governing permissions and limitations
// under the License.

//! UDF support

use arrow::array::ArrayRef;
use arrow::datatypes::{DataType, Field, Schema};

use crate::error::Result;
use crate::execution::physical_plan::PhysicalExpr;

use arrow::record_batch::RecordBatch;
use std::sync::Arc;

/// Scalar UDF
pub type ScalarUdf = Arc<dyn Fn(&[ArrayRef]) -> Result<ArrayRef> + Send + Sync>;

/// Scalar UDF Expression
#[derive(Clone)]
pub struct ScalarFunction {
    /// Function name
    pub name: String,
    /// Function argument meta-data
    pub args: Vec<Field>,
    /// Return type
    pub return_type: DataType,
    /// UDF implementation
    pub fun: ScalarUdf,
}

impl ScalarFunction {
    /// Create a new ScalarFunction
    pub fn new(
        name: &str,
        args: Vec<Field>,
        return_type: DataType,
        fun: ScalarUdf,
    ) -> Self {
        Self {
            name: name.to_owned(),
            args,
            return_type,
            fun,
        }
    }
}

/// Scalar UDF Physical Expression
pub struct ScalarFunctionExpr {
    name: String,
    fun: Box<ScalarUdf>,
    args: Vec<Arc<dyn PhysicalExpr>>,
    return_type: DataType,
}

impl ScalarFunctionExpr {
    /// Create a new Scalar function
    pub fn new(
        name: &str,
        fun: Box<ScalarUdf>,
        args: Vec<Arc<dyn PhysicalExpr>>,
        return_type: &DataType,
    ) -> Self {
        Self {
            name: name.to_owned(),
            fun,
            args,
            return_type: return_type.clone(),
        }
    }
}

impl PhysicalExpr for ScalarFunctionExpr {
    fn name(&self) -> String {
        self.name.clone()
    }

    fn data_type(&self, _input_schema: &Schema) -> Result<DataType> {
        Ok(self.return_type.clone())
    }

    fn nullable(&self, _input_schema: &Schema) -> Result<bool> {
        Ok(true)
    }

    fn evaluate(&self, batch: &RecordBatch) -> Result<ArrayRef> {
        // evaluate the arguments
        let inputs = self
            .args
            .iter()
            .map(|e| e.evaluate(batch))
            .collect::<Result<Vec<_>>>()?;

        // evaluate the function
        let fun = self.fun.as_ref();
        (fun)(&inputs)
    }
}