datafusion_functions/string/
ascii.rs1use arrow::array::{ArrayRef, AsArray, Int32Array, StringArrayType};
19use arrow::datatypes::DataType;
20use arrow::error::ArrowError;
21use datafusion_common::types::logical_string;
22use datafusion_common::utils::take_function_args;
23use datafusion_common::{Result, ScalarValue, internal_err};
24use datafusion_expr::{ColumnarValue, Documentation, TypeSignatureClass};
25use datafusion_expr::{ScalarFunctionArgs, ScalarUDFImpl, Signature, Volatility};
26use datafusion_expr_common::signature::Coercion;
27use datafusion_macros::user_doc;
28use std::any::Any;
29use std::sync::Arc;
30
31#[user_doc(
32 doc_section(label = "String Functions"),
33 description = "Returns the first Unicode scalar value of a string.",
34 syntax_example = "ascii(str)",
35 sql_example = r#"```sql
36> select ascii('abc');
37+--------------------+
38| ascii(Utf8("abc")) |
39+--------------------+
40| 97 |
41+--------------------+
42> select ascii('🚀');
43+-------------------+
44| ascii(Utf8("🚀")) |
45+-------------------+
46| 128640 |
47+-------------------+
48```"#,
49 standard_argument(name = "str", prefix = "String"),
50 related_udf(name = "chr")
51)]
52#[derive(Debug, PartialEq, Eq, Hash)]
53pub struct AsciiFunc {
54 signature: Signature,
55}
56
57impl Default for AsciiFunc {
58 fn default() -> Self {
59 Self::new()
60 }
61}
62
63impl AsciiFunc {
64 pub fn new() -> Self {
65 Self {
66 signature: Signature::coercible(
67 vec![Coercion::new_exact(TypeSignatureClass::Native(
68 logical_string(),
69 ))],
70 Volatility::Immutable,
71 ),
72 }
73 }
74}
75
76impl ScalarUDFImpl for AsciiFunc {
77 fn as_any(&self) -> &dyn Any {
78 self
79 }
80
81 fn name(&self) -> &str {
82 "ascii"
83 }
84
85 fn signature(&self) -> &Signature {
86 &self.signature
87 }
88
89 fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
90 Ok(DataType::Int32)
91 }
92
93 fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> {
94 let [arg] = take_function_args(self.name(), args.args)?;
95
96 match arg {
97 ColumnarValue::Scalar(scalar) => {
98 if scalar.is_null() {
99 return Ok(ColumnarValue::Scalar(ScalarValue::Int32(None)));
100 }
101
102 match scalar {
103 ScalarValue::Utf8(Some(s))
104 | ScalarValue::LargeUtf8(Some(s))
105 | ScalarValue::Utf8View(Some(s)) => {
106 let result = s.chars().next().map_or(0, |c| c as i32);
107 Ok(ColumnarValue::Scalar(ScalarValue::Int32(Some(result))))
108 }
109 _ => {
110 internal_err!(
111 "Unexpected data type {:?} for function ascii",
112 scalar.data_type()
113 )
114 }
115 }
116 }
117 ColumnarValue::Array(array) => Ok(ColumnarValue::Array(ascii(&[array])?)),
118 }
119 }
120
121 fn documentation(&self) -> Option<&Documentation> {
122 self.doc()
123 }
124}
125
126fn calculate_ascii<'a, V>(array: &V) -> Result<ArrayRef, ArrowError>
127where
128 V: StringArrayType<'a, Item = &'a str>,
129{
130 let values: Vec<_> = (0..array.len())
131 .map(|i| {
132 if array.is_null(i) {
133 0
134 } else {
135 let s = array.value(i);
136 s.chars().next().map_or(0, |c| c as i32)
137 }
138 })
139 .collect();
140
141 let array = Int32Array::new(values.into(), array.nulls().cloned());
142
143 Ok(Arc::new(array))
144}
145
146pub fn ascii(args: &[ArrayRef]) -> Result<ArrayRef> {
148 match args[0].data_type() {
149 DataType::Utf8 => {
150 let string_array = args[0].as_string::<i32>();
151 Ok(calculate_ascii(&string_array)?)
152 }
153 DataType::LargeUtf8 => {
154 let string_array = args[0].as_string::<i64>();
155 Ok(calculate_ascii(&string_array)?)
156 }
157 DataType::Utf8View => {
158 let string_array = args[0].as_string_view();
159 Ok(calculate_ascii(&string_array)?)
160 }
161 _ => internal_err!("Unsupported data type"),
162 }
163}
164
165#[cfg(test)]
166mod tests {
167 use crate::string::ascii::AsciiFunc;
168 use crate::utils::test::test_function;
169 use arrow::array::{Array, Int32Array};
170 use arrow::datatypes::DataType::Int32;
171 use datafusion_common::{Result, ScalarValue};
172 use datafusion_expr::{ColumnarValue, ScalarUDFImpl};
173
174 macro_rules! test_ascii {
175 ($INPUT:expr, $EXPECTED:expr) => {
176 test_function!(
177 AsciiFunc::new(),
178 vec![ColumnarValue::Scalar(ScalarValue::Utf8($INPUT))],
179 $EXPECTED,
180 i32,
181 Int32,
182 Int32Array
183 );
184
185 test_function!(
186 AsciiFunc::new(),
187 vec![ColumnarValue::Scalar(ScalarValue::LargeUtf8($INPUT))],
188 $EXPECTED,
189 i32,
190 Int32,
191 Int32Array
192 );
193
194 test_function!(
195 AsciiFunc::new(),
196 vec![ColumnarValue::Scalar(ScalarValue::Utf8View($INPUT))],
197 $EXPECTED,
198 i32,
199 Int32,
200 Int32Array
201 );
202 };
203 }
204
205 #[test]
206 fn test_functions() -> Result<()> {
207 test_ascii!(Some(String::from("x")), Ok(Some(120)));
208 test_ascii!(Some(String::from("a")), Ok(Some(97)));
209 test_ascii!(Some(String::from("")), Ok(Some(0)));
210 test_ascii!(Some(String::from("🚀")), Ok(Some(128640)));
211 test_ascii!(Some(String::from("\n")), Ok(Some(10)));
212 test_ascii!(Some(String::from("\t")), Ok(Some(9)));
213 test_ascii!(None, Ok(None));
214 Ok(())
215 }
216}