Skip to main content

datafusion_functions/string/
chr.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use std::any::Any;
19use std::sync::Arc;
20
21use arrow::array::{ArrayRef, GenericStringBuilder, Int64Array};
22use arrow::datatypes::DataType;
23use arrow::datatypes::DataType::Int64;
24use arrow::datatypes::DataType::Utf8;
25
26use datafusion_common::cast::as_int64_array;
27use datafusion_common::utils::take_function_args;
28use datafusion_common::{Result, ScalarValue, exec_err, internal_err};
29use datafusion_expr::{ColumnarValue, Documentation, Volatility};
30use datafusion_expr::{ScalarFunctionArgs, ScalarUDFImpl, Signature};
31use datafusion_macros::user_doc;
32
33/// Returns the character with the given code.
34/// chr(65) = 'A'
35fn chr_array(integer_array: &Int64Array) -> Result<ArrayRef> {
36    let mut builder = GenericStringBuilder::<i32>::with_capacity(
37        integer_array.len(),
38        // 1 byte per character, assuming that is the common case
39        integer_array.len(),
40    );
41
42    let mut buf = [0u8; 4];
43
44    for integer in integer_array {
45        match integer {
46            Some(integer) => {
47                if let Ok(u) = u32::try_from(integer)
48                    && let Some(c) = core::char::from_u32(u)
49                {
50                    builder.append_value(c.encode_utf8(&mut buf));
51                    continue;
52                }
53
54                return exec_err!("invalid Unicode scalar value: {integer}");
55            }
56            None => builder.append_null(),
57        }
58    }
59
60    Ok(Arc::new(builder.finish()) as ArrayRef)
61}
62
63#[user_doc(
64    doc_section(label = "String Functions"),
65    description = "Returns a string containing the character with the specified Unicode scalar value.",
66    syntax_example = "chr(expression)",
67    sql_example = r#"```sql
68> select chr(128640);
69+--------------------+
70| chr(Int64(128640)) |
71+--------------------+
72| 🚀                 |
73+--------------------+
74```"#,
75    standard_argument(name = "expression", prefix = "String"),
76    related_udf(name = "ascii")
77)]
78#[derive(Debug, PartialEq, Eq, Hash)]
79pub struct ChrFunc {
80    signature: Signature,
81}
82
83impl Default for ChrFunc {
84    fn default() -> Self {
85        Self::new()
86    }
87}
88
89impl ChrFunc {
90    pub fn new() -> Self {
91        Self {
92            signature: Signature::uniform(1, vec![Int64], Volatility::Immutable),
93        }
94    }
95}
96
97impl ScalarUDFImpl for ChrFunc {
98    fn as_any(&self) -> &dyn Any {
99        self
100    }
101
102    fn name(&self) -> &str {
103        "chr"
104    }
105
106    fn signature(&self) -> &Signature {
107        &self.signature
108    }
109
110    fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
111        Ok(Utf8)
112    }
113
114    fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> {
115        let [arg] = take_function_args(self.name(), args.args)?;
116
117        match arg {
118            ColumnarValue::Scalar(ScalarValue::Int64(Some(code_point))) => {
119                if let Ok(u) = u32::try_from(code_point)
120                    && let Some(c) = core::char::from_u32(u)
121                {
122                    Ok(ColumnarValue::Scalar(ScalarValue::Utf8(Some(
123                        c.to_string(),
124                    ))))
125                } else {
126                    exec_err!("invalid Unicode scalar value: {code_point}")
127                }
128            }
129            ColumnarValue::Scalar(ScalarValue::Int64(None)) => {
130                Ok(ColumnarValue::Scalar(ScalarValue::Utf8(None)))
131            }
132            ColumnarValue::Array(array) => {
133                let integer_array = as_int64_array(&array)?;
134                Ok(ColumnarValue::Array(chr_array(integer_array)?))
135            }
136            other => internal_err!(
137                "Unexpected data type {:?} for function chr",
138                other.data_type()
139            ),
140        }
141    }
142
143    fn documentation(&self) -> Option<&Documentation> {
144        self.doc()
145    }
146}
147
148#[cfg(test)]
149mod tests {
150    use super::*;
151
152    use arrow::array::{Array, Int64Array, StringArray};
153    use arrow::datatypes::Field;
154    use datafusion_common::assert_contains;
155    use datafusion_common::config::ConfigOptions;
156    use datafusion_expr::{ColumnarValue, ScalarFunctionArgs, ScalarUDFImpl};
157
158    fn invoke_chr(arg: ColumnarValue, number_rows: usize) -> Result<ColumnarValue> {
159        ChrFunc::new().invoke_with_args(ScalarFunctionArgs {
160            args: vec![arg],
161            arg_fields: vec![Field::new("a", Int64, true).into()],
162            number_rows,
163            return_field: Field::new("f", Utf8, true).into(),
164            config_options: Arc::new(ConfigOptions::default()),
165        })
166    }
167
168    #[test]
169    fn test_chr_normal() {
170        let input = Arc::new(Int64Array::from(vec![
171            Some(0),        // \u{0000}
172            Some(65),       // A
173            Some(66),       // B
174            Some(67),       // C
175            Some(128640),   // 🚀
176            Some(8364),     // €
177            Some(945),      // α
178            None,           // NULL
179            Some(32),       // space
180            Some(10),       // newline
181            Some(9),        // tab
182            Some(0x10FFFF), // 0x10FFFF, the largest Unicode code point
183        ]));
184
185        let result = invoke_chr(ColumnarValue::Array(input), 12).unwrap();
186        let ColumnarValue::Array(arr) = result else {
187            panic!("Expected array");
188        };
189        let string_array = arr.as_any().downcast_ref::<StringArray>().unwrap();
190
191        let expected = [
192            "\u{0000}",
193            "A",
194            "B",
195            "C",
196            "🚀",
197            "€",
198            "α",
199            "",
200            " ",
201            "\n",
202            "\t",
203            "\u{10ffff}",
204        ];
205
206        assert_eq!(string_array.len(), expected.len());
207        for (i, e) in expected.iter().enumerate() {
208            assert_eq!(string_array.value(i), *e);
209        }
210    }
211
212    #[test]
213    fn test_chr_error() {
214        let input = Arc::new(Int64Array::from(vec![i64::MAX]));
215        let result = invoke_chr(ColumnarValue::Array(input), 1);
216        assert!(result.is_err());
217        assert_contains!(
218            result.err().unwrap().to_string(),
219            "invalid Unicode scalar value: 9223372036854775807"
220        );
221
222        let input = Arc::new(Int64Array::from(vec![0x10FFFF + 1]));
223        let result = invoke_chr(ColumnarValue::Array(input), 1);
224        assert!(result.is_err());
225        assert_contains!(
226            result.err().unwrap().to_string(),
227            "invalid Unicode scalar value: 1114112"
228        );
229
230        let input = Arc::new(Int64Array::from(vec![0xD800 + 1]));
231        let result = invoke_chr(ColumnarValue::Array(input), 1);
232        assert!(result.is_err());
233        assert_contains!(
234            result.err().unwrap().to_string(),
235            "invalid Unicode scalar value: 55297"
236        );
237
238        let input = Arc::new(Int64Array::from(vec![i64::MIN + 2i64]));
239        let result = invoke_chr(ColumnarValue::Array(input), 1);
240        assert!(result.is_err());
241        assert_contains!(
242            result.err().unwrap().to_string(),
243            "invalid Unicode scalar value: -9223372036854775806"
244        );
245
246        let input = Arc::new(Int64Array::from(vec![-1]));
247        let result = invoke_chr(ColumnarValue::Array(input), 1);
248        assert!(result.is_err());
249        assert_contains!(
250            result.err().unwrap().to_string(),
251            "invalid Unicode scalar value: -1"
252        );
253
254        let input = Arc::new(Int64Array::from(vec![65, -1, 66]));
255        let result = invoke_chr(ColumnarValue::Array(input), 3);
256        assert!(result.is_err());
257        assert_contains!(
258            result.err().unwrap().to_string(),
259            "invalid Unicode scalar value: -1"
260        );
261    }
262
263    #[test]
264    fn test_chr_empty() {
265        let input = Arc::new(Int64Array::from(Vec::<i64>::new()));
266        let result = invoke_chr(ColumnarValue::Array(input), 0).unwrap();
267        let ColumnarValue::Array(arr) = result else {
268            panic!("Expected array");
269        };
270        let string_array = arr.as_any().downcast_ref::<StringArray>().unwrap();
271        assert_eq!(string_array.len(), 0);
272    }
273
274    #[test]
275    fn test_chr_scalar() {
276        let result =
277            invoke_chr(ColumnarValue::Scalar(ScalarValue::Int64(Some(65))), 1).unwrap();
278
279        match result {
280            ColumnarValue::Scalar(ScalarValue::Utf8(Some(s))) => {
281                assert_eq!(s, "A");
282            }
283            other => panic!("Unexpected result: {other:?}"),
284        }
285    }
286
287    #[test]
288    fn test_chr_scalar_null() {
289        let result =
290            invoke_chr(ColumnarValue::Scalar(ScalarValue::Int64(None)), 1).unwrap();
291
292        match result {
293            ColumnarValue::Scalar(ScalarValue::Utf8(None)) => {}
294            other => panic!("Unexpected result: {other:?}"),
295        }
296    }
297}