datafusion_functions/string/
upper.rs1use crate::string::common::to_upper;
19use arrow::datatypes::DataType;
20use datafusion_common::Result;
21use datafusion_common::types::logical_string;
22use datafusion_expr::{
23 Coercion, ColumnarValue, Documentation, ScalarFunctionArgs, ScalarUDFImpl, Signature,
24 TypeSignatureClass, Volatility,
25};
26use datafusion_macros::user_doc;
27
28#[user_doc(
29 doc_section(label = "String Functions"),
30 description = "Converts a string to upper-case.",
31 syntax_example = "upper(str)",
32 sql_example = r#"```sql
33> select upper('dataFusion');
34+---------------------------+
35| upper(Utf8("dataFusion")) |
36+---------------------------+
37| DATAFUSION |
38+---------------------------+
39```"#,
40 standard_argument(name = "str", prefix = "String"),
41 related_udf(name = "initcap"),
42 related_udf(name = "lower")
43)]
44#[derive(Debug, PartialEq, Eq, Hash)]
45pub struct UpperFunc {
46 signature: Signature,
47}
48
49impl Default for UpperFunc {
50 fn default() -> Self {
51 Self::new()
52 }
53}
54
55impl UpperFunc {
56 pub fn new() -> Self {
57 Self {
58 signature: Signature::coercible(
59 vec![Coercion::new_exact(TypeSignatureClass::Native(
60 logical_string(),
61 ))],
62 Volatility::Immutable,
63 ),
64 }
65 }
66}
67
68impl ScalarUDFImpl for UpperFunc {
69 fn name(&self) -> &str {
70 "upper"
71 }
72
73 fn signature(&self) -> &Signature {
74 &self.signature
75 }
76
77 fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
78 Ok(arg_types[0].clone())
79 }
80
81 fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> {
82 to_upper(&args.args, "upper")
83 }
84
85 fn documentation(&self) -> Option<&Documentation> {
86 self.doc()
87 }
88}
89
90#[cfg(test)]
91mod tests {
92 use super::*;
93 use arrow::array::{Array, ArrayRef, StringArray, StringViewArray};
94 use arrow::datatypes::Field;
95 use datafusion_common::config::ConfigOptions;
96 use std::sync::Arc;
97
98 fn invoke_upper(input: ArrayRef) -> Result<ArrayRef> {
99 let func = UpperFunc::new();
100 let data_type = input.data_type().clone();
101 let args = ScalarFunctionArgs {
102 number_rows: input.len(),
103 args: vec![ColumnarValue::Array(input)],
104 arg_fields: vec![Field::new("a", data_type.clone(), true).into()],
105 return_field: Field::new("f", data_type, true).into(),
106 config_options: Arc::new(ConfigOptions::default()),
107 };
108 match func.invoke_with_args(args)? {
109 ColumnarValue::Array(r) => Ok(r),
110 _ => unreachable!("upper"),
111 }
112 }
113
114 fn to_upper(input: ArrayRef, expected: ArrayRef) -> Result<()> {
115 let result = invoke_upper(input)?;
116 assert_eq!(&expected, &result);
117 Ok(())
118 }
119
120 #[test]
121 fn upper_maybe_optimization() -> Result<()> {
122 let input = Arc::new(StringArray::from(vec![
123 Some("农历新年"),
124 None,
125 Some("datafusion"),
126 Some("0123456789"),
127 Some(""),
128 ])) as ArrayRef;
129
130 let expected = Arc::new(StringArray::from(vec![
131 Some("农历新年"),
132 None,
133 Some("DATAFUSION"),
134 Some("0123456789"),
135 Some(""),
136 ])) as ArrayRef;
137
138 to_upper(input, expected)
139 }
140
141 #[test]
142 fn upper_full_optimization() -> Result<()> {
143 let input = Arc::new(StringArray::from(vec![
144 Some("arrow"),
145 None,
146 Some("datafusion"),
147 Some("0123456789"),
148 Some(""),
149 ])) as ArrayRef;
150
151 let expected = Arc::new(StringArray::from(vec![
152 Some("ARROW"),
153 None,
154 Some("DATAFUSION"),
155 Some("0123456789"),
156 Some(""),
157 ])) as ArrayRef;
158
159 to_upper(input, expected)
160 }
161
162 #[test]
163 fn upper_partial_optimization() -> Result<()> {
164 let input = Arc::new(StringArray::from(vec![
165 Some("arrow"),
166 None,
167 Some("datafusion"),
168 Some("@_"),
169 Some("0123456789"),
170 Some(""),
171 Some("\t\n"),
172 Some("ὀδυσσεύς"),
173 Some("tschüß"),
174 Some("ⱦ"), Some("农历新年"),
176 ])) as ArrayRef;
177
178 let expected = Arc::new(StringArray::from(vec![
179 Some("ARROW"),
180 None,
181 Some("DATAFUSION"),
182 Some("@_"),
183 Some("0123456789"),
184 Some(""),
185 Some("\t\n"),
186 Some("ὈΔΥΣΣΕΎΣ"),
187 Some("TSCHÜSS"),
188 Some("Ⱦ"),
189 Some("农历新年"),
190 ])) as ArrayRef;
191
192 to_upper(input, expected)
193 }
194
195 #[test]
196 fn upper_utf8view() -> Result<()> {
197 let input = Arc::new(StringViewArray::from(vec![
198 Some("arrow"),
199 None,
200 Some("tschüß"),
201 ])) as ArrayRef;
202
203 let expected = Arc::new(StringViewArray::from(vec![
204 Some("ARROW"),
205 None,
206 Some("TSCHÜSS"),
207 ])) as ArrayRef;
208
209 to_upper(input, expected)
210 }
211
212 #[test]
213 fn upper_ascii_utf8view() -> Result<()> {
214 let input = Arc::new(StringViewArray::from(vec![
217 Some("arrow"), None,
219 Some("hello world 123"), Some(""),
221 Some("0123456789"), Some("datafusion is cool"), ])) as ArrayRef;
224
225 let expected = Arc::new(StringViewArray::from(vec![
226 Some("ARROW"),
227 None,
228 Some("HELLO WORLD 123"),
229 Some(""),
230 Some("0123456789"),
231 Some("DATAFUSION IS COOL"),
232 ])) as ArrayRef;
233
234 to_upper(input, expected)
235 }
236
237 #[test]
238 fn upper_sliced_ascii_utf8view() -> Result<()> {
239 let parent = Arc::new(StringViewArray::from(vec![
244 Some("农历新年long enough for buffer"),
245 Some("hello world 123"),
246 Some("datafusion rocks!"),
247 Some("zzzzzzzzzzzzzzzz"),
248 ])) as ArrayRef;
249 let sliced = parent.slice(1, 2);
250 let result = invoke_upper(sliced)?;
251 let result_sv = result.as_any().downcast_ref::<StringViewArray>().unwrap();
252
253 let expected = StringViewArray::from(vec![
254 Some("HELLO WORLD 123"),
255 Some("DATAFUSION ROCKS!"),
256 ]);
257 assert_eq!(result_sv, &expected);
258 assert_eq!(result_sv.data_buffers().len(), 1);
262 assert_eq!(result_sv.data_buffers()[0].len(), 32);
263 Ok(())
264 }
265
266 #[test]
267 fn upper_utf8view_inline_only_no_buffers() -> Result<()> {
268 let input = Arc::new(StringViewArray::from(vec![
271 Some("hello"),
272 None,
273 Some(""),
274 Some("0123456789AB"), ])) as ArrayRef;
276 let result = invoke_upper(input)?;
277 let result_sv = result.as_any().downcast_ref::<StringViewArray>().unwrap();
278
279 let expected = StringViewArray::from(vec![
280 Some("HELLO"),
281 None,
282 Some(""),
283 Some("0123456789AB"),
284 ]);
285 assert_eq!(result_sv, &expected);
286 assert_eq!(
287 result_sv.data_buffers().len(),
288 0,
289 "inline-only Utf8View should produce no data buffers"
290 );
291 Ok(())
292 }
293
294 #[test]
295 fn upper_utf8view_long_packs_tight() -> Result<()> {
296 let input = Arc::new(StringViewArray::from(vec![
300 Some("hello world 123"), Some("abc"), None,
303 Some("datafusion rocks!"), Some("another long string"), ])) as ArrayRef;
306 let result = invoke_upper(input)?;
307 let result_sv = result.as_any().downcast_ref::<StringViewArray>().unwrap();
308
309 let expected = StringViewArray::from(vec![
310 Some("HELLO WORLD 123"),
311 Some("ABC"),
312 None,
313 Some("DATAFUSION ROCKS!"),
314 Some("ANOTHER LONG STRING"),
315 ]);
316 assert_eq!(result_sv, &expected);
317 assert_eq!(result_sv.data_buffers().len(), 1);
318 assert_eq!(result_sv.data_buffers()[0].len(), 15 + 17 + 19);
319 Ok(())
320 }
321
322 #[test]
323 fn upper_utf8view_splits_into_multiple_buffers() -> Result<()> {
324 const STR_LEN: usize = 500;
329 const N: usize = 40; let value = "x".repeat(STR_LEN);
331 let inputs: Vec<Option<String>> = (0..N).map(|_| Some(value.clone())).collect();
332 let input = Arc::new(StringViewArray::from(inputs.clone())) as ArrayRef;
333 let result = invoke_upper(input)?;
334 let result_sv = result.as_any().downcast_ref::<StringViewArray>().unwrap();
335
336 let expected_value = "X".repeat(STR_LEN);
337 let expected: Vec<Option<&str>> =
338 (0..N).map(|_| Some(expected_value.as_str())).collect();
339 assert_eq!(result_sv, &StringViewArray::from(expected));
340 assert!(
341 result_sv.data_buffers().len() >= 2,
342 "expected the output to span more than one data buffer, got {}",
343 result_sv.data_buffers().len()
344 );
345 let total: usize = result_sv.data_buffers().iter().map(|b| b.len()).sum();
348 assert_eq!(total, N * STR_LEN);
349 Ok(())
350 }
351
352 #[test]
353 fn upper_sliced_utf8() -> Result<()> {
354 let parent = Arc::new(StringArray::from(vec![
355 Some("aaaaaaaa"),
356 Some("hello"),
357 Some("world"),
358 Some(""),
359 Some("zzzzzzzz"),
360 ])) as ArrayRef;
361 let sliced = parent.slice(1, 3);
362 let result = invoke_upper(sliced)?;
363 let result_sa = result.as_any().downcast_ref::<StringArray>().unwrap();
364
365 let expected = StringArray::from(vec![Some("HELLO"), Some("WORLD"), Some("")]);
366 assert_eq!(result_sa, &expected);
367 assert_eq!(result_sa.value_data().len(), 10);
370 Ok(())
371 }
372}