datafusion_spark/function/string/
ascii.rs1use arrow::array::{ArrayAccessor, ArrayIter, ArrayRef, AsArray, Int32Array};
19use arrow::datatypes::DataType;
20use arrow::error::ArrowError;
21use datafusion_common::{internal_err, plan_err, Result};
22use datafusion_expr::ColumnarValue;
23use datafusion_expr::{ScalarFunctionArgs, ScalarUDFImpl, Signature, Volatility};
24use datafusion_functions::utils::make_scalar_function;
25use std::any::Any;
26use std::sync::Arc;
27
28#[derive(Debug, PartialEq, Eq, Hash)]
30pub struct SparkAscii {
31 signature: Signature,
32 aliases: Vec<String>,
33}
34
35impl Default for SparkAscii {
36 fn default() -> Self {
37 Self::new()
38 }
39}
40
41impl SparkAscii {
42 pub fn new() -> Self {
43 Self {
44 signature: Signature::user_defined(Volatility::Immutable),
45 aliases: vec![],
46 }
47 }
48}
49
50impl ScalarUDFImpl for SparkAscii {
51 fn as_any(&self) -> &dyn Any {
52 self
53 }
54
55 fn name(&self) -> &str {
56 "ascii"
57 }
58
59 fn signature(&self) -> &Signature {
60 &self.signature
61 }
62
63 fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
64 Ok(DataType::Int32)
65 }
66
67 fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> {
68 make_scalar_function(ascii, vec![])(&args.args)
69 }
70
71 fn aliases(&self) -> &[String] {
72 &self.aliases
73 }
74
75 fn coerce_types(&self, arg_types: &[DataType]) -> Result<Vec<DataType>> {
76 if arg_types.len() != 1 {
77 return plan_err!(
78 "The {} function requires 1 argument, but got {}.",
79 self.name(),
80 arg_types.len()
81 );
82 }
83 Ok(vec![DataType::Utf8])
84 }
85}
86
87fn calculate_ascii<'a, V>(array: V) -> Result<ArrayRef, ArrowError>
88where
89 V: ArrayAccessor<Item = &'a str>,
90{
91 let iter = ArrayIter::new(array);
92 let result = iter
93 .map(|string| {
94 string.map(|s| {
95 let mut chars = s.chars();
96 chars.next().map_or(0, |v| v as i32)
97 })
98 })
99 .collect::<Int32Array>();
100
101 Ok(Arc::new(result) as ArrayRef)
102}
103
104pub fn ascii(args: &[ArrayRef]) -> Result<ArrayRef> {
106 match args[0].data_type() {
107 DataType::Utf8 => {
108 let string_array = args[0].as_string::<i32>();
109 Ok(calculate_ascii(string_array)?)
110 }
111 DataType::LargeUtf8 => {
112 let string_array = args[0].as_string::<i64>();
113 Ok(calculate_ascii(string_array)?)
114 }
115 DataType::Utf8View => {
116 let string_array = args[0].as_string_view();
117 Ok(calculate_ascii(string_array)?)
118 }
119 _ => internal_err!("Unsupported data type"),
120 }
121}
122
123#[cfg(test)]
124mod tests {
125 use crate::function::string::ascii::SparkAscii;
126 use crate::function::utils::test::test_scalar_function;
127 use arrow::array::{Array, Int32Array};
128 use arrow::datatypes::DataType::Int32;
129 use datafusion_common::{Result, ScalarValue};
130 use datafusion_expr::{ColumnarValue, ScalarUDFImpl};
131
132 macro_rules! test_ascii_string_invoke {
133 ($INPUT:expr, $EXPECTED:expr) => {
134 test_scalar_function!(
135 SparkAscii::new(),
136 vec![ColumnarValue::Scalar(ScalarValue::Utf8($INPUT))],
137 $EXPECTED,
138 i32,
139 Int32,
140 Int32Array
141 );
142
143 test_scalar_function!(
144 SparkAscii::new(),
145 vec![ColumnarValue::Scalar(ScalarValue::LargeUtf8($INPUT))],
146 $EXPECTED,
147 i32,
148 Int32,
149 Int32Array
150 );
151
152 test_scalar_function!(
153 SparkAscii::new(),
154 vec![ColumnarValue::Scalar(ScalarValue::Utf8View($INPUT))],
155 $EXPECTED,
156 i32,
157 Int32,
158 Int32Array
159 );
160 };
161 }
162
163 #[test]
164 fn test_ascii_invoke() -> Result<()> {
165 test_ascii_string_invoke!(Some(String::from("x")), Ok(Some(120)));
166 test_ascii_string_invoke!(Some(String::from("a")), Ok(Some(97)));
167 test_ascii_string_invoke!(Some(String::from("")), Ok(Some(0)));
168 test_ascii_string_invoke!(Some(String::from("\n")), Ok(Some(10)));
169 test_ascii_string_invoke!(Some(String::from("\t")), Ok(Some(9)));
170 test_ascii_string_invoke!(None, Ok(None));
171
172 Ok(())
173 }
174}