datafusion_functions/string/
ascii.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use crate::utils::make_scalar_function;
19use arrow::array::{ArrayRef, AsArray, Int32Array, StringArrayType};
20use arrow::datatypes::DataType;
21use arrow::error::ArrowError;
22use datafusion_common::types::logical_string;
23use datafusion_common::{internal_err, Result};
24use datafusion_expr::{ColumnarValue, Documentation, TypeSignatureClass};
25use datafusion_expr::{ScalarFunctionArgs, ScalarUDFImpl, Signature, Volatility};
26use datafusion_expr_common::signature::Coercion;
27use datafusion_macros::user_doc;
28use std::any::Any;
29use std::sync::Arc;
30
31#[user_doc(
32    doc_section(label = "String Functions"),
33    description = "Returns the first Unicode scalar value of a string.",
34    syntax_example = "ascii(str)",
35    sql_example = r#"```sql
36> select ascii('abc');
37+--------------------+
38| ascii(Utf8("abc")) |
39+--------------------+
40| 97                 |
41+--------------------+
42> select ascii('🚀');
43+-------------------+
44| ascii(Utf8("🚀")) |
45+-------------------+
46| 128640            |
47+-------------------+
48```"#,
49    standard_argument(name = "str", prefix = "String"),
50    related_udf(name = "chr")
51)]
52#[derive(Debug, PartialEq, Eq, Hash)]
53pub struct AsciiFunc {
54    signature: Signature,
55}
56
57impl Default for AsciiFunc {
58    fn default() -> Self {
59        Self::new()
60    }
61}
62
63impl AsciiFunc {
64    pub fn new() -> Self {
65        Self {
66            signature: Signature::coercible(
67                vec![Coercion::new_exact(TypeSignatureClass::Native(
68                    logical_string(),
69                ))],
70                Volatility::Immutable,
71            ),
72        }
73    }
74}
75
76impl ScalarUDFImpl for AsciiFunc {
77    fn as_any(&self) -> &dyn Any {
78        self
79    }
80
81    fn name(&self) -> &str {
82        "ascii"
83    }
84
85    fn signature(&self) -> &Signature {
86        &self.signature
87    }
88
89    fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
90        Ok(DataType::Int32)
91    }
92
93    fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> {
94        make_scalar_function(ascii, vec![])(&args.args)
95    }
96
97    fn documentation(&self) -> Option<&Documentation> {
98        self.doc()
99    }
100}
101
102fn calculate_ascii<'a, V>(array: V) -> Result<ArrayRef, ArrowError>
103where
104    V: StringArrayType<'a, Item = &'a str>,
105{
106    let values: Vec<_> = (0..array.len())
107        .map(|i| {
108            if array.is_null(i) {
109                0
110            } else {
111                let s = array.value(i);
112                s.chars().next().map_or(0, |c| c as i32)
113            }
114        })
115        .collect();
116
117    let array = Int32Array::new(values.into(), array.nulls().cloned());
118
119    Ok(Arc::new(array))
120}
121
122/// Returns the numeric code of the first character of the argument.
123pub fn ascii(args: &[ArrayRef]) -> Result<ArrayRef> {
124    match args[0].data_type() {
125        DataType::Utf8 => {
126            let string_array = args[0].as_string::<i32>();
127            Ok(calculate_ascii(string_array)?)
128        }
129        DataType::LargeUtf8 => {
130            let string_array = args[0].as_string::<i64>();
131            Ok(calculate_ascii(string_array)?)
132        }
133        DataType::Utf8View => {
134            let string_array = args[0].as_string_view();
135            Ok(calculate_ascii(string_array)?)
136        }
137        _ => internal_err!("Unsupported data type"),
138    }
139}
140
141#[cfg(test)]
142mod tests {
143    use crate::string::ascii::AsciiFunc;
144    use crate::utils::test::test_function;
145    use arrow::array::{Array, Int32Array};
146    use arrow::datatypes::DataType::Int32;
147    use datafusion_common::{Result, ScalarValue};
148    use datafusion_expr::{ColumnarValue, ScalarUDFImpl};
149
150    macro_rules! test_ascii {
151        ($INPUT:expr, $EXPECTED:expr) => {
152            test_function!(
153                AsciiFunc::new(),
154                vec![ColumnarValue::Scalar(ScalarValue::Utf8($INPUT))],
155                $EXPECTED,
156                i32,
157                Int32,
158                Int32Array
159            );
160
161            test_function!(
162                AsciiFunc::new(),
163                vec![ColumnarValue::Scalar(ScalarValue::LargeUtf8($INPUT))],
164                $EXPECTED,
165                i32,
166                Int32,
167                Int32Array
168            );
169
170            test_function!(
171                AsciiFunc::new(),
172                vec![ColumnarValue::Scalar(ScalarValue::Utf8View($INPUT))],
173                $EXPECTED,
174                i32,
175                Int32,
176                Int32Array
177            );
178        };
179    }
180
181    #[test]
182    fn test_functions() -> Result<()> {
183        test_ascii!(Some(String::from("x")), Ok(Some(120)));
184        test_ascii!(Some(String::from("a")), Ok(Some(97)));
185        test_ascii!(Some(String::from("")), Ok(Some(0)));
186        test_ascii!(Some(String::from("🚀")), Ok(Some(128640)));
187        test_ascii!(Some(String::from("\n")), Ok(Some(10)));
188        test_ascii!(Some(String::from("\t")), Ok(Some(9)));
189        test_ascii!(None, Ok(None));
190        Ok(())
191    }
192}