datafusion_functions/string/
chr.rs1use std::any::Any;
19use std::sync::Arc;
20
21use arrow::array::{ArrayRef, GenericStringBuilder, Int64Array};
22use arrow::datatypes::DataType;
23use arrow::datatypes::DataType::Int64;
24use arrow::datatypes::DataType::Utf8;
25
26use datafusion_common::cast::as_int64_array;
27use datafusion_common::utils::take_function_args;
28use datafusion_common::{Result, ScalarValue, exec_err, internal_err};
29use datafusion_expr::{ColumnarValue, Documentation, Volatility};
30use datafusion_expr::{ScalarFunctionArgs, ScalarUDFImpl, Signature};
31use datafusion_macros::user_doc;
32
33fn chr_array(integer_array: &Int64Array) -> Result<ArrayRef> {
36 let mut builder = GenericStringBuilder::<i32>::with_capacity(
37 integer_array.len(),
38 integer_array.len(),
40 );
41
42 let mut buf = [0u8; 4];
43
44 for integer in integer_array {
45 match integer {
46 Some(integer) => {
47 if let Ok(u) = u32::try_from(integer)
48 && let Some(c) = core::char::from_u32(u)
49 {
50 builder.append_value(c.encode_utf8(&mut buf));
51 continue;
52 }
53
54 return exec_err!("invalid Unicode scalar value: {integer}");
55 }
56 None => builder.append_null(),
57 }
58 }
59
60 Ok(Arc::new(builder.finish()) as ArrayRef)
61}
62
63#[user_doc(
64 doc_section(label = "String Functions"),
65 description = "Returns a string containing the character with the specified Unicode scalar value.",
66 syntax_example = "chr(expression)",
67 sql_example = r#"```sql
68> select chr(128640);
69+--------------------+
70| chr(Int64(128640)) |
71+--------------------+
72| 🚀 |
73+--------------------+
74```"#,
75 standard_argument(name = "expression", prefix = "String"),
76 related_udf(name = "ascii")
77)]
78#[derive(Debug, PartialEq, Eq, Hash)]
79pub struct ChrFunc {
80 signature: Signature,
81}
82
83impl Default for ChrFunc {
84 fn default() -> Self {
85 Self::new()
86 }
87}
88
89impl ChrFunc {
90 pub fn new() -> Self {
91 Self {
92 signature: Signature::uniform(1, vec![Int64], Volatility::Immutable),
93 }
94 }
95}
96
97impl ScalarUDFImpl for ChrFunc {
98 fn as_any(&self) -> &dyn Any {
99 self
100 }
101
102 fn name(&self) -> &str {
103 "chr"
104 }
105
106 fn signature(&self) -> &Signature {
107 &self.signature
108 }
109
110 fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
111 Ok(Utf8)
112 }
113
114 fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> {
115 let [arg] = take_function_args(self.name(), args.args)?;
116
117 match arg {
118 ColumnarValue::Scalar(ScalarValue::Int64(Some(code_point))) => {
119 if let Ok(u) = u32::try_from(code_point)
120 && let Some(c) = core::char::from_u32(u)
121 {
122 Ok(ColumnarValue::Scalar(ScalarValue::Utf8(Some(
123 c.to_string(),
124 ))))
125 } else {
126 exec_err!("invalid Unicode scalar value: {code_point}")
127 }
128 }
129 ColumnarValue::Scalar(ScalarValue::Int64(None)) => {
130 Ok(ColumnarValue::Scalar(ScalarValue::Utf8(None)))
131 }
132 ColumnarValue::Array(array) => {
133 let integer_array = as_int64_array(&array)?;
134 Ok(ColumnarValue::Array(chr_array(integer_array)?))
135 }
136 other => internal_err!(
137 "Unexpected data type {:?} for function chr",
138 other.data_type()
139 ),
140 }
141 }
142
143 fn documentation(&self) -> Option<&Documentation> {
144 self.doc()
145 }
146}
147
148#[cfg(test)]
149mod tests {
150 use super::*;
151
152 use arrow::array::{Array, Int64Array, StringArray};
153 use arrow::datatypes::Field;
154 use datafusion_common::assert_contains;
155 use datafusion_common::config::ConfigOptions;
156 use datafusion_expr::{ColumnarValue, ScalarFunctionArgs, ScalarUDFImpl};
157
158 fn invoke_chr(arg: ColumnarValue, number_rows: usize) -> Result<ColumnarValue> {
159 ChrFunc::new().invoke_with_args(ScalarFunctionArgs {
160 args: vec![arg],
161 arg_fields: vec![Field::new("a", Int64, true).into()],
162 number_rows,
163 return_field: Field::new("f", Utf8, true).into(),
164 config_options: Arc::new(ConfigOptions::default()),
165 })
166 }
167
168 #[test]
169 fn test_chr_normal() {
170 let input = Arc::new(Int64Array::from(vec![
171 Some(0), Some(65), Some(66), Some(67), Some(128640), Some(8364), Some(945), None, Some(32), Some(10), Some(9), Some(0x10FFFF), ]));
184
185 let result = invoke_chr(ColumnarValue::Array(input), 12).unwrap();
186 let ColumnarValue::Array(arr) = result else {
187 panic!("Expected array");
188 };
189 let string_array = arr.as_any().downcast_ref::<StringArray>().unwrap();
190
191 let expected = [
192 "\u{0000}",
193 "A",
194 "B",
195 "C",
196 "🚀",
197 "€",
198 "α",
199 "",
200 " ",
201 "\n",
202 "\t",
203 "\u{10ffff}",
204 ];
205
206 assert_eq!(string_array.len(), expected.len());
207 for (i, e) in expected.iter().enumerate() {
208 assert_eq!(string_array.value(i), *e);
209 }
210 }
211
212 #[test]
213 fn test_chr_error() {
214 let input = Arc::new(Int64Array::from(vec![i64::MAX]));
215 let result = invoke_chr(ColumnarValue::Array(input), 1);
216 assert!(result.is_err());
217 assert_contains!(
218 result.err().unwrap().to_string(),
219 "invalid Unicode scalar value: 9223372036854775807"
220 );
221
222 let input = Arc::new(Int64Array::from(vec![0x10FFFF + 1]));
223 let result = invoke_chr(ColumnarValue::Array(input), 1);
224 assert!(result.is_err());
225 assert_contains!(
226 result.err().unwrap().to_string(),
227 "invalid Unicode scalar value: 1114112"
228 );
229
230 let input = Arc::new(Int64Array::from(vec![0xD800 + 1]));
231 let result = invoke_chr(ColumnarValue::Array(input), 1);
232 assert!(result.is_err());
233 assert_contains!(
234 result.err().unwrap().to_string(),
235 "invalid Unicode scalar value: 55297"
236 );
237
238 let input = Arc::new(Int64Array::from(vec![i64::MIN + 2i64]));
239 let result = invoke_chr(ColumnarValue::Array(input), 1);
240 assert!(result.is_err());
241 assert_contains!(
242 result.err().unwrap().to_string(),
243 "invalid Unicode scalar value: -9223372036854775806"
244 );
245
246 let input = Arc::new(Int64Array::from(vec![-1]));
247 let result = invoke_chr(ColumnarValue::Array(input), 1);
248 assert!(result.is_err());
249 assert_contains!(
250 result.err().unwrap().to_string(),
251 "invalid Unicode scalar value: -1"
252 );
253
254 let input = Arc::new(Int64Array::from(vec![65, -1, 66]));
255 let result = invoke_chr(ColumnarValue::Array(input), 3);
256 assert!(result.is_err());
257 assert_contains!(
258 result.err().unwrap().to_string(),
259 "invalid Unicode scalar value: -1"
260 );
261 }
262
263 #[test]
264 fn test_chr_empty() {
265 let input = Arc::new(Int64Array::from(Vec::<i64>::new()));
266 let result = invoke_chr(ColumnarValue::Array(input), 0).unwrap();
267 let ColumnarValue::Array(arr) = result else {
268 panic!("Expected array");
269 };
270 let string_array = arr.as_any().downcast_ref::<StringArray>().unwrap();
271 assert_eq!(string_array.len(), 0);
272 }
273
274 #[test]
275 fn test_chr_scalar() {
276 let result =
277 invoke_chr(ColumnarValue::Scalar(ScalarValue::Int64(Some(65))), 1).unwrap();
278
279 match result {
280 ColumnarValue::Scalar(ScalarValue::Utf8(Some(s))) => {
281 assert_eq!(s, "A");
282 }
283 other => panic!("Unexpected result: {other:?}"),
284 }
285 }
286
287 #[test]
288 fn test_chr_scalar_null() {
289 let result =
290 invoke_chr(ColumnarValue::Scalar(ScalarValue::Int64(None)), 1).unwrap();
291
292 match result {
293 ColumnarValue::Scalar(ScalarValue::Utf8(None)) => {}
294 other => panic!("Unexpected result: {other:?}"),
295 }
296 }
297}