datafusion_comet_spark_expr/string_funcs/
substring.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18#![allow(deprecated)]
19
20use crate::kernels::strings::substring;
21use arrow::datatypes::{DataType, Schema};
22use arrow::record_batch::RecordBatch;
23use datafusion::common::DataFusionError;
24use datafusion::logical_expr::ColumnarValue;
25use datafusion::physical_expr::PhysicalExpr;
26use std::{
27    any::Any,
28    fmt::{Display, Formatter},
29    hash::Hash,
30    sync::Arc,
31};
32
33#[derive(Debug, Eq)]
34pub struct SubstringExpr {
35    pub child: Arc<dyn PhysicalExpr>,
36    pub start: i64,
37    pub len: u64,
38}
39
40impl Hash for SubstringExpr {
41    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
42        self.child.hash(state);
43        self.start.hash(state);
44        self.len.hash(state);
45    }
46}
47
48impl PartialEq for SubstringExpr {
49    fn eq(&self, other: &Self) -> bool {
50        self.child.eq(&other.child) && self.start.eq(&other.start) && self.len.eq(&other.len)
51    }
52}
53
54impl SubstringExpr {
55    pub fn new(child: Arc<dyn PhysicalExpr>, start: i64, len: u64) -> Self {
56        Self { child, start, len }
57    }
58}
59
60impl Display for SubstringExpr {
61    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
62        write!(
63            f,
64            "StringSpace [start: {}, len: {}, child: {}]",
65            self.start, self.len, self.child
66        )
67    }
68}
69
70impl PhysicalExpr for SubstringExpr {
71    fn as_any(&self) -> &dyn Any {
72        self
73    }
74
75    fn fmt_sql(&self, _: &mut Formatter<'_>) -> std::fmt::Result {
76        unimplemented!()
77    }
78
79    fn data_type(&self, input_schema: &Schema) -> datafusion::common::Result<DataType> {
80        self.child.data_type(input_schema)
81    }
82
83    fn nullable(&self, _: &Schema) -> datafusion::common::Result<bool> {
84        Ok(true)
85    }
86
87    fn evaluate(&self, batch: &RecordBatch) -> datafusion::common::Result<ColumnarValue> {
88        let arg = self.child.evaluate(batch)?;
89        match arg {
90            ColumnarValue::Array(array) => {
91                let result = substring(&array, self.start, self.len)?;
92
93                Ok(ColumnarValue::Array(result))
94            }
95            _ => Err(DataFusionError::Execution(
96                "Substring(scalar) should be fold in Spark JVM side.".to_string(),
97            )),
98        }
99    }
100
101    fn children(&self) -> Vec<&Arc<dyn PhysicalExpr>> {
102        vec![&self.child]
103    }
104
105    fn with_new_children(
106        self: Arc<Self>,
107        children: Vec<Arc<dyn PhysicalExpr>>,
108    ) -> datafusion::common::Result<Arc<dyn PhysicalExpr>> {
109        Ok(Arc::new(SubstringExpr::new(
110            Arc::clone(&children[0]),
111            self.start,
112            self.len,
113        )))
114    }
115}