datafusion_expr/
async_udf.rs1use crate::{ReturnFieldArgs, ScalarFunctionArgs, ScalarUDF, ScalarUDFImpl};
19use arrow::datatypes::{DataType, FieldRef};
20use async_trait::async_trait;
21use datafusion_common::error::Result;
22use datafusion_common::internal_err;
23use datafusion_expr_common::columnar_value::ColumnarValue;
24use datafusion_expr_common::signature::Signature;
25use std::any::Any;
26use std::fmt::{Debug, Display};
27use std::hash::{Hash, Hasher};
28use std::sync::Arc;
29
30#[async_trait]
37pub trait AsyncScalarUDFImpl: ScalarUDFImpl {
38 fn ideal_batch_size(&self) -> Option<usize> {
43 None
44 }
45
46 async fn invoke_async_with_args(
48 &self,
49 args: ScalarFunctionArgs,
50 ) -> Result<ColumnarValue>;
51}
52
53#[derive(Debug)]
58pub struct AsyncScalarUDF {
59 inner: Arc<dyn AsyncScalarUDFImpl>,
60}
61
62impl PartialEq for AsyncScalarUDF {
63 fn eq(&self, other: &Self) -> bool {
64 let Self { inner } = self;
66 inner.as_ref().dyn_eq(other.inner.as_ref() as &dyn Any)
67 }
68}
69impl Eq for AsyncScalarUDF {}
70
71impl Hash for AsyncScalarUDF {
72 fn hash<H: Hasher>(&self, state: &mut H) {
73 let Self { inner } = self;
75 inner.dyn_hash(state);
76 }
77}
78
79impl AsyncScalarUDF {
80 pub fn new(inner: Arc<dyn AsyncScalarUDFImpl>) -> Self {
81 Self { inner }
82 }
83
84 pub fn ideal_batch_size(&self) -> Option<usize> {
86 self.inner.ideal_batch_size()
87 }
88
89 pub fn into_scalar_udf(self) -> ScalarUDF {
92 ScalarUDF::new_from_impl(self)
93 }
94
95 pub async fn invoke_async_with_args(
97 &self,
98 args: ScalarFunctionArgs,
99 ) -> Result<ColumnarValue> {
100 self.inner.invoke_async_with_args(args).await
101 }
102}
103
104impl ScalarUDFImpl for AsyncScalarUDF {
105 fn name(&self) -> &str {
106 self.inner.name()
107 }
108
109 fn signature(&self) -> &Signature {
110 self.inner.signature()
111 }
112
113 fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
114 self.inner.return_type(arg_types)
115 }
116
117 fn return_field_from_args(&self, args: ReturnFieldArgs) -> Result<FieldRef> {
118 self.inner.return_field_from_args(args)
119 }
120
121 fn invoke_with_args(&self, _args: ScalarFunctionArgs) -> Result<ColumnarValue> {
122 internal_err!("async functions should not be called directly")
123 }
124}
125
126impl Display for AsyncScalarUDF {
127 fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
128 write!(f, "AsyncScalarUDF: {}", self.inner.name())
129 }
130}
131
132#[cfg(test)]
133mod tests {
134 use std::{
135 hash::{DefaultHasher, Hash, Hasher},
136 sync::Arc,
137 };
138
139 use arrow::datatypes::DataType;
140 use async_trait::async_trait;
141 use datafusion_common::error::Result;
142 use datafusion_expr_common::{columnar_value::ColumnarValue, signature::Signature};
143
144 use crate::{
145 ScalarFunctionArgs, ScalarUDFImpl,
146 async_udf::{AsyncScalarUDF, AsyncScalarUDFImpl},
147 };
148
149 #[derive(Debug, PartialEq, Eq, Hash, Clone)]
150 struct TestAsyncUDFImpl1 {
151 a: i32,
152 }
153
154 impl ScalarUDFImpl for TestAsyncUDFImpl1 {
155 fn name(&self) -> &str {
156 todo!()
157 }
158
159 fn signature(&self) -> &Signature {
160 todo!()
161 }
162
163 fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
164 todo!()
165 }
166
167 fn invoke_with_args(&self, _args: ScalarFunctionArgs) -> Result<ColumnarValue> {
168 todo!()
169 }
170 }
171
172 #[async_trait]
173 impl AsyncScalarUDFImpl for TestAsyncUDFImpl1 {
174 async fn invoke_async_with_args(
175 &self,
176 _args: ScalarFunctionArgs,
177 ) -> Result<ColumnarValue> {
178 todo!()
179 }
180 }
181
182 #[derive(Debug, PartialEq, Eq, Hash, Clone)]
183 struct TestAsyncUDFImpl2 {
184 a: i32,
185 }
186
187 impl ScalarUDFImpl for TestAsyncUDFImpl2 {
188 fn name(&self) -> &str {
189 todo!()
190 }
191
192 fn signature(&self) -> &Signature {
193 todo!()
194 }
195
196 fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
197 todo!()
198 }
199
200 fn invoke_with_args(&self, _args: ScalarFunctionArgs) -> Result<ColumnarValue> {
201 todo!()
202 }
203 }
204
205 #[async_trait]
206 impl AsyncScalarUDFImpl for TestAsyncUDFImpl2 {
207 async fn invoke_async_with_args(
208 &self,
209 _args: ScalarFunctionArgs,
210 ) -> Result<ColumnarValue> {
211 todo!()
212 }
213 }
214
215 fn hash<T: Hash>(value: &T) -> u64 {
216 let hasher = &mut DefaultHasher::new();
217 value.hash(hasher);
218 hasher.finish()
219 }
220
221 #[test]
222 fn test_async_udf_partial_eq_and_hash() {
223 let inner = Arc::new(TestAsyncUDFImpl1 { a: 1 });
225 let a = AsyncScalarUDF::new(Arc::clone(&inner) as Arc<dyn AsyncScalarUDFImpl>);
226 let b = AsyncScalarUDF::new(inner);
227 assert_eq!(a, b);
228 assert_eq!(hash(&a), hash(&b));
229
230 let a = AsyncScalarUDF::new(Arc::new(TestAsyncUDFImpl1 { a: 1 }));
232 let b = AsyncScalarUDF::new(Arc::new(TestAsyncUDFImpl1 { a: 1 }));
233 assert_eq!(a, b);
234 assert_eq!(hash(&a), hash(&b));
235
236 let a = AsyncScalarUDF::new(Arc::new(TestAsyncUDFImpl1 { a: 1 }));
238 let b = AsyncScalarUDF::new(Arc::new(TestAsyncUDFImpl1 { a: 2 }));
239 assert_ne!(a, b);
240 assert_ne!(hash(&a), hash(&b));
241
242 let a = AsyncScalarUDF::new(Arc::new(TestAsyncUDFImpl1 { a: 1 }));
244 let b = AsyncScalarUDF::new(Arc::new(TestAsyncUDFImpl2 { a: 1 }));
245 assert_ne!(a, b);
246 assert_ne!(hash(&a), hash(&b));
247 }
248}