datafusion_spark/function/string/
ascii.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use arrow::array::{ArrayAccessor, ArrayIter, ArrayRef, AsArray, Int32Array};
19use arrow::datatypes::DataType;
20use arrow::error::ArrowError;
21use datafusion_common::{internal_err, plan_err, Result};
22use datafusion_expr::ColumnarValue;
23use datafusion_expr::{ScalarFunctionArgs, ScalarUDFImpl, Signature, Volatility};
24use datafusion_functions::utils::make_scalar_function;
25use std::any::Any;
26use std::sync::Arc;
27
28/// <https://spark.apache.org/docs/latest/api/sql/index.html#ascii>
29#[derive(Debug, PartialEq, Eq, Hash)]
30pub struct SparkAscii {
31    signature: Signature,
32    aliases: Vec<String>,
33}
34
35impl Default for SparkAscii {
36    fn default() -> Self {
37        Self::new()
38    }
39}
40
41impl SparkAscii {
42    pub fn new() -> Self {
43        Self {
44            signature: Signature::user_defined(Volatility::Immutable),
45            aliases: vec![],
46        }
47    }
48}
49
50impl ScalarUDFImpl for SparkAscii {
51    fn as_any(&self) -> &dyn Any {
52        self
53    }
54
55    fn name(&self) -> &str {
56        "ascii"
57    }
58
59    fn signature(&self) -> &Signature {
60        &self.signature
61    }
62
63    fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
64        Ok(DataType::Int32)
65    }
66
67    fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> {
68        make_scalar_function(ascii, vec![])(&args.args)
69    }
70
71    fn aliases(&self) -> &[String] {
72        &self.aliases
73    }
74
75    fn coerce_types(&self, arg_types: &[DataType]) -> Result<Vec<DataType>> {
76        if arg_types.len() != 1 {
77            return plan_err!(
78                "The {} function requires 1 argument, but got {}.",
79                self.name(),
80                arg_types.len()
81            );
82        }
83        Ok(vec![DataType::Utf8])
84    }
85}
86
87fn calculate_ascii<'a, V>(array: V) -> Result<ArrayRef, ArrowError>
88where
89    V: ArrayAccessor<Item = &'a str>,
90{
91    let iter = ArrayIter::new(array);
92    let result = iter
93        .map(|string| {
94            string.map(|s| {
95                let mut chars = s.chars();
96                chars.next().map_or(0, |v| v as i32)
97            })
98        })
99        .collect::<Int32Array>();
100
101    Ok(Arc::new(result) as ArrayRef)
102}
103
104/// Returns the numeric code of the first character of the argument.
105pub fn ascii(args: &[ArrayRef]) -> Result<ArrayRef> {
106    match args[0].data_type() {
107        DataType::Utf8 => {
108            let string_array = args[0].as_string::<i32>();
109            Ok(calculate_ascii(string_array)?)
110        }
111        DataType::LargeUtf8 => {
112            let string_array = args[0].as_string::<i64>();
113            Ok(calculate_ascii(string_array)?)
114        }
115        DataType::Utf8View => {
116            let string_array = args[0].as_string_view();
117            Ok(calculate_ascii(string_array)?)
118        }
119        _ => internal_err!("Unsupported data type"),
120    }
121}
122
123#[cfg(test)]
124mod tests {
125    use crate::function::string::ascii::SparkAscii;
126    use crate::function::utils::test::test_scalar_function;
127    use arrow::array::{Array, Int32Array};
128    use arrow::datatypes::DataType::Int32;
129    use datafusion_common::{Result, ScalarValue};
130    use datafusion_expr::{ColumnarValue, ScalarUDFImpl};
131
132    macro_rules! test_ascii_string_invoke {
133        ($INPUT:expr, $EXPECTED:expr) => {
134            test_scalar_function!(
135                SparkAscii::new(),
136                vec![ColumnarValue::Scalar(ScalarValue::Utf8($INPUT))],
137                $EXPECTED,
138                i32,
139                Int32,
140                Int32Array
141            );
142
143            test_scalar_function!(
144                SparkAscii::new(),
145                vec![ColumnarValue::Scalar(ScalarValue::LargeUtf8($INPUT))],
146                $EXPECTED,
147                i32,
148                Int32,
149                Int32Array
150            );
151
152            test_scalar_function!(
153                SparkAscii::new(),
154                vec![ColumnarValue::Scalar(ScalarValue::Utf8View($INPUT))],
155                $EXPECTED,
156                i32,
157                Int32,
158                Int32Array
159            );
160        };
161    }
162
163    #[test]
164    fn test_ascii_invoke() -> Result<()> {
165        test_ascii_string_invoke!(Some(String::from("x")), Ok(Some(120)));
166        test_ascii_string_invoke!(Some(String::from("a")), Ok(Some(97)));
167        test_ascii_string_invoke!(Some(String::from("")), Ok(Some(0)));
168        test_ascii_string_invoke!(Some(String::from("\n")), Ok(Some(10)));
169        test_ascii_string_invoke!(Some(String::from("\t")), Ok(Some(9)));
170        test_ascii_string_invoke!(None, Ok(None));
171
172        Ok(())
173    }
174}