Skip to main content

datafusion_functions/string/
ascii.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use arrow::array::{ArrayRef, AsArray, Int32Array, StringArrayType};
19use arrow::datatypes::DataType;
20use arrow::error::ArrowError;
21use datafusion_common::types::logical_string;
22use datafusion_common::utils::take_function_args;
23use datafusion_common::{Result, ScalarValue, internal_err};
24use datafusion_expr::{ColumnarValue, Documentation, TypeSignatureClass};
25use datafusion_expr::{ScalarFunctionArgs, ScalarUDFImpl, Signature, Volatility};
26use datafusion_expr_common::signature::Coercion;
27use datafusion_macros::user_doc;
28use std::any::Any;
29use std::sync::Arc;
30
31#[user_doc(
32    doc_section(label = "String Functions"),
33    description = "Returns the first Unicode scalar value of a string.",
34    syntax_example = "ascii(str)",
35    sql_example = r#"```sql
36> select ascii('abc');
37+--------------------+
38| ascii(Utf8("abc")) |
39+--------------------+
40| 97                 |
41+--------------------+
42> select ascii('🚀');
43+-------------------+
44| ascii(Utf8("🚀")) |
45+-------------------+
46| 128640            |
47+-------------------+
48```"#,
49    standard_argument(name = "str", prefix = "String"),
50    related_udf(name = "chr")
51)]
52#[derive(Debug, PartialEq, Eq, Hash)]
53pub struct AsciiFunc {
54    signature: Signature,
55}
56
57impl Default for AsciiFunc {
58    fn default() -> Self {
59        Self::new()
60    }
61}
62
63impl AsciiFunc {
64    pub fn new() -> Self {
65        Self {
66            signature: Signature::coercible(
67                vec![Coercion::new_exact(TypeSignatureClass::Native(
68                    logical_string(),
69                ))],
70                Volatility::Immutable,
71            ),
72        }
73    }
74}
75
76impl ScalarUDFImpl for AsciiFunc {
77    fn as_any(&self) -> &dyn Any {
78        self
79    }
80
81    fn name(&self) -> &str {
82        "ascii"
83    }
84
85    fn signature(&self) -> &Signature {
86        &self.signature
87    }
88
89    fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
90        Ok(DataType::Int32)
91    }
92
93    fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> {
94        let [arg] = take_function_args(self.name(), args.args)?;
95
96        match arg {
97            ColumnarValue::Scalar(scalar) => {
98                if scalar.is_null() {
99                    return Ok(ColumnarValue::Scalar(ScalarValue::Int32(None)));
100                }
101
102                match scalar {
103                    ScalarValue::Utf8(Some(s))
104                    | ScalarValue::LargeUtf8(Some(s))
105                    | ScalarValue::Utf8View(Some(s)) => {
106                        let result = s.chars().next().map_or(0, |c| c as i32);
107                        Ok(ColumnarValue::Scalar(ScalarValue::Int32(Some(result))))
108                    }
109                    _ => {
110                        internal_err!(
111                            "Unexpected data type {:?} for function ascii",
112                            scalar.data_type()
113                        )
114                    }
115                }
116            }
117            ColumnarValue::Array(array) => Ok(ColumnarValue::Array(ascii(&[array])?)),
118        }
119    }
120
121    fn documentation(&self) -> Option<&Documentation> {
122        self.doc()
123    }
124}
125
126fn calculate_ascii<'a, V>(array: &V) -> Result<ArrayRef, ArrowError>
127where
128    V: StringArrayType<'a, Item = &'a str>,
129{
130    let values: Vec<_> = (0..array.len())
131        .map(|i| {
132            if array.is_null(i) {
133                0
134            } else {
135                let s = array.value(i);
136                s.chars().next().map_or(0, |c| c as i32)
137            }
138        })
139        .collect();
140
141    let array = Int32Array::new(values.into(), array.nulls().cloned());
142
143    Ok(Arc::new(array))
144}
145
146/// Returns the numeric code of the first character of the argument.
147pub fn ascii(args: &[ArrayRef]) -> Result<ArrayRef> {
148    match args[0].data_type() {
149        DataType::Utf8 => {
150            let string_array = args[0].as_string::<i32>();
151            Ok(calculate_ascii(&string_array)?)
152        }
153        DataType::LargeUtf8 => {
154            let string_array = args[0].as_string::<i64>();
155            Ok(calculate_ascii(&string_array)?)
156        }
157        DataType::Utf8View => {
158            let string_array = args[0].as_string_view();
159            Ok(calculate_ascii(&string_array)?)
160        }
161        _ => internal_err!("Unsupported data type"),
162    }
163}
164
165#[cfg(test)]
166mod tests {
167    use crate::string::ascii::AsciiFunc;
168    use crate::utils::test::test_function;
169    use arrow::array::{Array, Int32Array};
170    use arrow::datatypes::DataType::Int32;
171    use datafusion_common::{Result, ScalarValue};
172    use datafusion_expr::{ColumnarValue, ScalarUDFImpl};
173
174    macro_rules! test_ascii {
175        ($INPUT:expr, $EXPECTED:expr) => {
176            test_function!(
177                AsciiFunc::new(),
178                vec![ColumnarValue::Scalar(ScalarValue::Utf8($INPUT))],
179                $EXPECTED,
180                i32,
181                Int32,
182                Int32Array
183            );
184
185            test_function!(
186                AsciiFunc::new(),
187                vec![ColumnarValue::Scalar(ScalarValue::LargeUtf8($INPUT))],
188                $EXPECTED,
189                i32,
190                Int32,
191                Int32Array
192            );
193
194            test_function!(
195                AsciiFunc::new(),
196                vec![ColumnarValue::Scalar(ScalarValue::Utf8View($INPUT))],
197                $EXPECTED,
198                i32,
199                Int32,
200                Int32Array
201            );
202        };
203    }
204
205    #[test]
206    fn test_functions() -> Result<()> {
207        test_ascii!(Some(String::from("x")), Ok(Some(120)));
208        test_ascii!(Some(String::from("a")), Ok(Some(97)));
209        test_ascii!(Some(String::from("")), Ok(Some(0)));
210        test_ascii!(Some(String::from("🚀")), Ok(Some(128640)));
211        test_ascii!(Some(String::from("\n")), Ok(Some(10)));
212        test_ascii!(Some(String::from("\t")), Ok(Some(9)));
213        test_ascii!(None, Ok(None));
214        Ok(())
215    }
216}