datafusion_functions/string/
chr.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use std::any::Any;
19use std::sync::Arc;
20
21use arrow::array::ArrayRef;
22use arrow::array::GenericStringBuilder;
23use arrow::datatypes::DataType;
24use arrow::datatypes::DataType::Int64;
25use arrow::datatypes::DataType::Utf8;
26
27use crate::utils::make_scalar_function;
28use datafusion_common::cast::as_int64_array;
29use datafusion_common::{exec_err, Result};
30use datafusion_expr::{ColumnarValue, Documentation, Volatility};
31use datafusion_expr::{ScalarFunctionArgs, ScalarUDFImpl, Signature};
32use datafusion_macros::user_doc;
33
34/// Returns the character with the given code.
35/// chr(65) = 'A'
36pub fn chr(args: &[ArrayRef]) -> Result<ArrayRef> {
37    let integer_array = as_int64_array(&args[0])?;
38
39    let mut builder = GenericStringBuilder::<i32>::with_capacity(
40        integer_array.len(),
41        // 1 byte per character, assuming that is the common case
42        integer_array.len(),
43    );
44
45    let mut buf = [0u8; 4];
46
47    for integer in integer_array {
48        match integer {
49            Some(integer) => {
50                if let Ok(u) = u32::try_from(integer) {
51                    if let Some(c) = core::char::from_u32(u) {
52                        builder.append_value(c.encode_utf8(&mut buf));
53                        continue;
54                    }
55                }
56
57                return exec_err!("invalid Unicode scalar value: {integer}");
58            }
59            None => {
60                builder.append_null();
61            }
62        }
63    }
64
65    let result = builder.finish();
66
67    Ok(Arc::new(result) as ArrayRef)
68}
69
70#[user_doc(
71    doc_section(label = "String Functions"),
72    description = "Returns a string containing the character with the specified Unicode scalar value.",
73    syntax_example = "chr(expression)",
74    sql_example = r#"```sql
75> select chr(128640);
76+--------------------+
77| chr(Int64(128640)) |
78+--------------------+
79| 🚀                 |
80+--------------------+
81```"#,
82    standard_argument(name = "expression", prefix = "String"),
83    related_udf(name = "ascii")
84)]
85#[derive(Debug, PartialEq, Eq, Hash)]
86pub struct ChrFunc {
87    signature: Signature,
88}
89
90impl Default for ChrFunc {
91    fn default() -> Self {
92        Self::new()
93    }
94}
95
96impl ChrFunc {
97    pub fn new() -> Self {
98        Self {
99            signature: Signature::uniform(1, vec![Int64], Volatility::Immutable),
100        }
101    }
102}
103
104impl ScalarUDFImpl for ChrFunc {
105    fn as_any(&self) -> &dyn Any {
106        self
107    }
108
109    fn name(&self) -> &str {
110        "chr"
111    }
112
113    fn signature(&self) -> &Signature {
114        &self.signature
115    }
116
117    fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
118        Ok(Utf8)
119    }
120
121    fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> {
122        make_scalar_function(chr, vec![])(&args.args)
123    }
124
125    fn documentation(&self) -> Option<&Documentation> {
126        self.doc()
127    }
128}
129
130#[cfg(test)]
131mod tests {
132    use super::*;
133    use arrow::array::{Array, Int64Array, StringArray};
134    use datafusion_common::assert_contains;
135
136    #[test]
137    fn test_chr_normal() {
138        let input = Arc::new(Int64Array::from(vec![
139            Some(0),        // null
140            Some(65),       // A
141            Some(66),       // B
142            Some(67),       // C
143            Some(128640),   // 🚀
144            Some(8364),     // €
145            Some(945),      // α
146            None,           // NULL
147            Some(32),       // space
148            Some(10),       // newline
149            Some(9),        // tab
150            Some(0x10FFFF), // 0x10FFFF, the largest Unicode code point
151        ]));
152        let result = chr(&[input]).unwrap();
153        let string_array = result.as_any().downcast_ref::<StringArray>().unwrap();
154        let expected = [
155            "\u{0000}",
156            "A",
157            "B",
158            "C",
159            "🚀",
160            "€",
161            "α",
162            "",
163            " ",
164            "\n",
165            "\t",
166            "\u{10ffff}",
167        ];
168
169        assert_eq!(string_array.len(), expected.len());
170        for (i, e) in expected.iter().enumerate() {
171            assert_eq!(string_array.value(i), *e);
172        }
173    }
174
175    #[test]
176    fn test_chr_error() {
177        // invalid Unicode code points (too large)
178        let input = Arc::new(Int64Array::from(vec![i64::MAX]));
179        let result = chr(&[input]);
180        assert!(result.is_err());
181        assert_contains!(
182            result.err().unwrap().to_string(),
183            "invalid Unicode scalar value: 9223372036854775807"
184        );
185
186        // invalid Unicode code points (too large) case 2
187        let input = Arc::new(Int64Array::from(vec![0x10FFFF + 1]));
188        let result = chr(&[input]);
189        assert!(result.is_err());
190        assert_contains!(
191            result.err().unwrap().to_string(),
192            "invalid Unicode scalar value: 1114112"
193        );
194
195        // invalid Unicode code points (surrogate code point)
196        // link: <https://learn.microsoft.com/en-us/globalization/encoding/unicode-standard#surrogate-pairs>
197        let input = Arc::new(Int64Array::from(vec![0xD800 + 1]));
198        let result = chr(&[input]);
199        assert!(result.is_err());
200        assert_contains!(
201            result.err().unwrap().to_string(),
202            "invalid Unicode scalar value: 55297"
203        );
204
205        // negative input
206        let input = Arc::new(Int64Array::from(vec![i64::MIN + 2i64])); // will be 2 if cast to u32
207        let result = chr(&[input]);
208        assert!(result.is_err());
209        assert_contains!(
210            result.err().unwrap().to_string(),
211            "invalid Unicode scalar value: -9223372036854775806"
212        );
213
214        // negative input case 2
215        let input = Arc::new(Int64Array::from(vec![-1]));
216        let result = chr(&[input]);
217        assert!(result.is_err());
218        assert_contains!(
219            result.err().unwrap().to_string(),
220            "invalid Unicode scalar value: -1"
221        );
222
223        // one error with valid values after
224        let input = Arc::new(Int64Array::from(vec![65, -1, 66])); // A, -1, B
225        let result = chr(&[input]);
226        assert!(result.is_err());
227        assert_contains!(
228            result.err().unwrap().to_string(),
229            "invalid Unicode scalar value: -1"
230        );
231    }
232
233    #[test]
234    fn test_chr_empty() {
235        // empty input array
236        let input = Arc::new(Int64Array::from(Vec::<i64>::new()));
237        let result = chr(&[input]).unwrap();
238        let string_array = result.as_any().downcast_ref::<StringArray>().unwrap();
239        assert_eq!(string_array.len(), 0);
240    }
241}