datafusion_comet_spark_expr/string_funcs/
substring.rs1#![allow(deprecated)]
19
20use crate::kernels::strings::substring;
21use arrow::datatypes::{DataType, Schema};
22use arrow::record_batch::RecordBatch;
23use datafusion::common::DataFusionError;
24use datafusion::logical_expr::ColumnarValue;
25use datafusion::physical_expr::PhysicalExpr;
26use std::{
27 any::Any,
28 fmt::{Display, Formatter},
29 hash::Hash,
30 sync::Arc,
31};
32
33#[derive(Debug, Eq)]
34pub struct SubstringExpr {
35 pub child: Arc<dyn PhysicalExpr>,
36 pub start: i64,
37 pub len: u64,
38}
39
40impl Hash for SubstringExpr {
41 fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
42 self.child.hash(state);
43 self.start.hash(state);
44 self.len.hash(state);
45 }
46}
47
48impl PartialEq for SubstringExpr {
49 fn eq(&self, other: &Self) -> bool {
50 self.child.eq(&other.child) && self.start.eq(&other.start) && self.len.eq(&other.len)
51 }
52}
53
54impl SubstringExpr {
55 pub fn new(child: Arc<dyn PhysicalExpr>, start: i64, len: u64) -> Self {
56 Self { child, start, len }
57 }
58}
59
60impl Display for SubstringExpr {
61 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
62 write!(
63 f,
64 "StringSpace [start: {}, len: {}, child: {}]",
65 self.start, self.len, self.child
66 )
67 }
68}
69
70impl PhysicalExpr for SubstringExpr {
71 fn as_any(&self) -> &dyn Any {
72 self
73 }
74
75 fn fmt_sql(&self, _: &mut Formatter<'_>) -> std::fmt::Result {
76 unimplemented!()
77 }
78
79 fn data_type(&self, input_schema: &Schema) -> datafusion::common::Result<DataType> {
80 self.child.data_type(input_schema)
81 }
82
83 fn nullable(&self, _: &Schema) -> datafusion::common::Result<bool> {
84 Ok(true)
85 }
86
87 fn evaluate(&self, batch: &RecordBatch) -> datafusion::common::Result<ColumnarValue> {
88 let arg = self.child.evaluate(batch)?;
89 match arg {
90 ColumnarValue::Array(array) => {
91 let result = substring(&array, self.start, self.len)?;
92
93 Ok(ColumnarValue::Array(result))
94 }
95 _ => Err(DataFusionError::Execution(
96 "Substring(scalar) should be fold in Spark JVM side.".to_string(),
97 )),
98 }
99 }
100
101 fn children(&self) -> Vec<&Arc<dyn PhysicalExpr>> {
102 vec![&self.child]
103 }
104
105 fn with_new_children(
106 self: Arc<Self>,
107 children: Vec<Arc<dyn PhysicalExpr>>,
108 ) -> datafusion::common::Result<Arc<dyn PhysicalExpr>> {
109 Ok(Arc::new(SubstringExpr::new(
110 Arc::clone(&children[0]),
111 self.start,
112 self.len,
113 )))
114 }
115}