Skip to main content

datafusion_functions/string/
concat_ws.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use arrow::array::Array;
19use std::sync::Arc;
20
21use arrow::datatypes::DataType;
22
23use crate::string::concat;
24use crate::string::concat::simplify_concat;
25use crate::string::concat_ws;
26use crate::strings::{
27    ColumnarValueRef, ConcatLargeStringBuilder, ConcatStringBuilder,
28    ConcatStringViewBuilder,
29};
30use datafusion_common::cast::{
31    as_large_string_array, as_string_array, as_string_view_array,
32};
33use datafusion_common::{Result, ScalarValue, exec_err, internal_err, plan_err};
34use datafusion_expr::expr::ScalarFunction;
35use datafusion_expr::simplify::{ExprSimplifyResult, SimplifyContext};
36use datafusion_expr::{ColumnarValue, Documentation, Expr, Volatility, lit};
37use datafusion_expr::{ScalarFunctionArgs, ScalarUDFImpl, Signature};
38use datafusion_macros::user_doc;
39
40#[user_doc(
41    doc_section(label = "String Functions"),
42    description = "Concatenates multiple strings together with a specified separator.",
43    syntax_example = "concat_ws(separator, str[, ..., str_n])",
44    sql_example = r#"```sql
45> select concat_ws('_', 'data', 'fusion');
46+--------------------------------------------------+
47| concat_ws(Utf8("_"),Utf8("data"),Utf8("fusion")) |
48+--------------------------------------------------+
49| data_fusion                                      |
50+--------------------------------------------------+
51```"#,
52    argument(
53        name = "separator",
54        description = "Separator to insert between concatenated strings."
55    ),
56    argument(
57        name = "str",
58        description = "String expression to operate on. Can be a constant, column, or function, and any combination of operators."
59    ),
60    argument(
61        name = "str_n",
62        description = "Subsequent string expressions to concatenate."
63    ),
64    related_udf(name = "concat")
65)]
66#[derive(Debug, PartialEq, Eq, Hash)]
67pub struct ConcatWsFunc {
68    signature: Signature,
69}
70
71impl Default for ConcatWsFunc {
72    fn default() -> Self {
73        ConcatWsFunc::new()
74    }
75}
76
77impl ConcatWsFunc {
78    pub fn new() -> Self {
79        use DataType::*;
80        Self {
81            signature: Signature::variadic(
82                vec![Utf8View, Utf8, LargeUtf8],
83                Volatility::Immutable,
84            ),
85        }
86    }
87}
88
89impl ScalarUDFImpl for ConcatWsFunc {
90    fn name(&self) -> &str {
91        "concat_ws"
92    }
93
94    fn signature(&self) -> &Signature {
95        &self.signature
96    }
97
98    /// Match the return type to the input types to avoid unnecessary casts. On
99    /// mixed inputs, prefer Utf8View; prefer LargeUtf8 over Utf8 to avoid
100    /// potential overflow on LargeUtf8 input.
101    fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
102        use DataType::*;
103        if arg_types.contains(&Utf8View) {
104            Ok(Utf8View)
105        } else if arg_types.contains(&LargeUtf8) {
106            Ok(LargeUtf8)
107        } else {
108            Ok(Utf8)
109        }
110    }
111
112    /// Concatenates all but the first argument, with separators. The first
113    /// argument is used as the separator string, and should not be NULL. Other
114    /// NULL arguments are ignored.
115    /// concat_ws(',', 'abcde', 2, NULL, 22) = 'abcde,2,22'
116    fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> {
117        let ScalarFunctionArgs { args, .. } = args;
118
119        if args.len() < 2 {
120            return exec_err!(
121                "concat_ws was called with {} arguments. It requires at least 2.",
122                args.len()
123            );
124        }
125
126        let return_datatype = if args.iter().any(|c| c.data_type() == DataType::Utf8View)
127        {
128            DataType::Utf8View
129        } else if args.iter().any(|c| c.data_type() == DataType::LargeUtf8) {
130            DataType::LargeUtf8
131        } else {
132            DataType::Utf8
133        };
134
135        let array_len = args.iter().find_map(|x| match x {
136            ColumnarValue::Array(array) => Some(array.len()),
137            _ => None,
138        });
139
140        // Scalar
141        if array_len.is_none() {
142            let ColumnarValue::Scalar(scalar) = &args[0] else {
143                unreachable!()
144            };
145            let sep = match scalar.try_as_str() {
146                Some(Some(s)) => s,
147                Some(None) => {
148                    // null literal string
149                    return match return_datatype {
150                        DataType::Utf8View => {
151                            Ok(ColumnarValue::Scalar(ScalarValue::Utf8View(None)))
152                        }
153                        DataType::LargeUtf8 => {
154                            Ok(ColumnarValue::Scalar(ScalarValue::LargeUtf8(None)))
155                        }
156                        _ => Ok(ColumnarValue::Scalar(ScalarValue::Utf8(None))),
157                    };
158                }
159                None => return internal_err!("Expected string literal, got {scalar:?}"),
160            };
161
162            let mut values = Vec::with_capacity(args.len() - 1);
163            for arg in &args[1..] {
164                let ColumnarValue::Scalar(scalar) = arg else {
165                    unreachable!()
166                };
167
168                match scalar.try_as_str() {
169                    Some(Some(v)) => values.push(v),
170                    Some(None) => {} // null literal string
171                    None => {
172                        return internal_err!("Expected string literal, got {scalar:?}");
173                    }
174                }
175            }
176            let result = values.join(sep);
177
178            return match return_datatype {
179                DataType::Utf8View => {
180                    Ok(ColumnarValue::Scalar(ScalarValue::Utf8View(Some(result))))
181                }
182                DataType::LargeUtf8 => {
183                    Ok(ColumnarValue::Scalar(ScalarValue::LargeUtf8(Some(result))))
184                }
185                _ => Ok(ColumnarValue::Scalar(ScalarValue::Utf8(Some(result)))),
186            };
187        }
188
189        // Array
190        let len = array_len.unwrap();
191        let mut data_size = 0;
192
193        // parse sep
194        let sep = match &args[0] {
195            ColumnarValue::Scalar(scalar) => match scalar.try_as_str() {
196                Some(Some(s)) => {
197                    data_size += s.len() * len * (args.len() - 2); // estimate
198                    ColumnarValueRef::Scalar(s.as_bytes())
199                }
200                Some(None) => {
201                    return match return_datatype {
202                        DataType::Utf8View => {
203                            Ok(ColumnarValue::Scalar(ScalarValue::Utf8View(None)))
204                        }
205                        DataType::LargeUtf8 => {
206                            Ok(ColumnarValue::Scalar(ScalarValue::LargeUtf8(None)))
207                        }
208                        _ => Ok(ColumnarValue::Scalar(ScalarValue::Utf8(None))),
209                    };
210                }
211                None => {
212                    return internal_err!("Expected string separator, got {scalar:?}");
213                }
214            },
215            ColumnarValue::Array(array) => match array.data_type() {
216                DataType::Utf8 => {
217                    let string_array = as_string_array(array)?;
218                    data_size += string_array.values().len() * (args.len() - 2);
219                    if array.is_nullable() {
220                        ColumnarValueRef::NullableArray(string_array)
221                    } else {
222                        ColumnarValueRef::NonNullableArray(string_array)
223                    }
224                }
225                DataType::LargeUtf8 => {
226                    let string_array = as_large_string_array(array)?;
227                    data_size += string_array.values().len() * (args.len() - 2);
228                    if array.is_nullable() {
229                        ColumnarValueRef::NullableLargeStringArray(string_array)
230                    } else {
231                        ColumnarValueRef::NonNullableLargeStringArray(string_array)
232                    }
233                }
234                DataType::Utf8View => {
235                    let string_array = as_string_view_array(array)?;
236                    data_size +=
237                        string_array.total_buffer_bytes_used() * (args.len() - 2);
238                    if array.is_nullable() {
239                        ColumnarValueRef::NullableStringViewArray(string_array)
240                    } else {
241                        ColumnarValueRef::NonNullableStringViewArray(string_array)
242                    }
243                }
244                other => {
245                    return plan_err!(
246                        "Input was {other} which is not a supported datatype for concat_ws separator"
247                    );
248                }
249            },
250        };
251
252        let mut columns = Vec::with_capacity(args.len() - 1);
253        for arg in &args[1..] {
254            match arg {
255                ColumnarValue::Scalar(ScalarValue::Utf8(maybe_value))
256                | ColumnarValue::Scalar(ScalarValue::LargeUtf8(maybe_value))
257                | ColumnarValue::Scalar(ScalarValue::Utf8View(maybe_value)) => {
258                    if let Some(s) = maybe_value {
259                        data_size += s.len() * len;
260                        columns.push(ColumnarValueRef::Scalar(s.as_bytes()));
261                    }
262                }
263                ColumnarValue::Array(array) => {
264                    match array.data_type() {
265                        DataType::Utf8 => {
266                            let string_array = as_string_array(array)?;
267
268                            data_size += string_array.values().len();
269                            let column = if array.is_nullable() {
270                                ColumnarValueRef::NullableArray(string_array)
271                            } else {
272                                ColumnarValueRef::NonNullableArray(string_array)
273                            };
274                            columns.push(column);
275                        }
276                        DataType::LargeUtf8 => {
277                            let string_array = as_large_string_array(array)?;
278
279                            data_size += string_array.values().len();
280                            let column = if array.is_nullable() {
281                                ColumnarValueRef::NullableLargeStringArray(string_array)
282                            } else {
283                                ColumnarValueRef::NonNullableLargeStringArray(
284                                    string_array,
285                                )
286                            };
287                            columns.push(column);
288                        }
289                        DataType::Utf8View => {
290                            let string_array = as_string_view_array(array)?;
291
292                            // This is an estimate; in particular, it will
293                            // undercount arrays of short strings (<= 12 bytes).
294                            data_size += string_array.total_buffer_bytes_used();
295                            let column = if array.is_nullable() {
296                                ColumnarValueRef::NullableStringViewArray(string_array)
297                            } else {
298                                ColumnarValueRef::NonNullableStringViewArray(string_array)
299                            };
300                            columns.push(column);
301                        }
302                        other => {
303                            return plan_err!(
304                                "Input was {other} which is not a supported datatype for concat_ws function."
305                            );
306                        }
307                    };
308                }
309                _ => unreachable!(),
310            }
311        }
312
313        match return_datatype {
314            DataType::Utf8View => {
315                let mut builder = ConcatStringViewBuilder::with_capacity(len, data_size);
316                for i in 0..len {
317                    if !sep.is_valid(i) {
318                        builder.append_offset()?;
319                        continue;
320                    }
321                    let mut first = true;
322                    for column in &columns {
323                        if column.is_valid(i) {
324                            if !first {
325                                builder.write::<false>(&sep, i);
326                            }
327                            builder.write::<false>(column, i);
328                            first = false;
329                        }
330                    }
331                    builder.append_offset()?;
332                }
333                Ok(ColumnarValue::Array(Arc::new(builder.finish(sep.nulls())?)))
334            }
335            DataType::LargeUtf8 => {
336                let mut builder = ConcatLargeStringBuilder::with_capacity(len, data_size);
337                for i in 0..len {
338                    if !sep.is_valid(i) {
339                        builder.append_offset()?;
340                        continue;
341                    }
342                    let mut first = true;
343                    for column in &columns {
344                        if column.is_valid(i) {
345                            if !first {
346                                builder.write::<false>(&sep, i);
347                            }
348                            builder.write::<false>(column, i);
349                            first = false;
350                        }
351                    }
352                    builder.append_offset()?;
353                }
354                Ok(ColumnarValue::Array(Arc::new(builder.finish(sep.nulls())?)))
355            }
356            _ => {
357                let mut builder = ConcatStringBuilder::with_capacity(len, data_size);
358                for i in 0..len {
359                    if !sep.is_valid(i) {
360                        builder.append_offset()?;
361                        continue;
362                    }
363                    let mut first = true;
364                    for column in &columns {
365                        if column.is_valid(i) {
366                            if !first {
367                                builder.write::<false>(&sep, i);
368                            }
369                            builder.write::<false>(column, i);
370                            first = false;
371                        }
372                    }
373                    builder.append_offset()?;
374                }
375                Ok(ColumnarValue::Array(Arc::new(builder.finish(sep.nulls())?)))
376            }
377        }
378    }
379
380    /// Simply the `concat_ws` function by
381    /// 1. folding to `null` if the delimiter is null
382    /// 2. filtering out `null` arguments
383    /// 3. using `concat` to replace `concat_ws` if the delimiter is an empty string
384    /// 4. concatenating contiguous literals if the delimiter is a literal.
385    fn simplify(
386        &self,
387        args: Vec<Expr>,
388        _info: &SimplifyContext,
389    ) -> Result<ExprSimplifyResult> {
390        match &args[..] {
391            [delimiter, vals @ ..] => simplify_concat_ws(delimiter, vals),
392            _ => Ok(ExprSimplifyResult::Original(args)),
393        }
394    }
395
396    fn documentation(&self) -> Option<&Documentation> {
397        self.doc()
398    }
399}
400
401fn simplify_concat_ws(delimiter: &Expr, args: &[Expr]) -> Result<ExprSimplifyResult> {
402    // Preserve the delimiter's string type for any new literals produced
403    // during simplification.
404    let delimiter_type = match delimiter {
405        Expr::Literal(v, _) => v.data_type(),
406        _ => DataType::Utf8,
407    };
408
409    let typed_lit = |s: String| -> Expr {
410        match delimiter_type {
411            DataType::LargeUtf8 => lit(ScalarValue::LargeUtf8(Some(s))),
412            DataType::Utf8View => lit(ScalarValue::Utf8View(Some(s))),
413            _ => lit(s),
414        }
415    };
416
417    match delimiter {
418        Expr::Literal(
419            ScalarValue::Utf8(delimiter)
420            | ScalarValue::LargeUtf8(delimiter)
421            | ScalarValue::Utf8View(delimiter),
422            _,
423        ) => {
424            match delimiter {
425                // When the delimiter is the empty string, replace `concat_ws`
426                // with `concat`
427                Some(delimiter) if delimiter.is_empty() => {
428                    match simplify_concat(args.to_vec())? {
429                        ExprSimplifyResult::Original(_) => {
430                            Ok(ExprSimplifyResult::Simplified(Expr::ScalarFunction(
431                                ScalarFunction {
432                                    func: concat(),
433                                    args: args.to_vec(),
434                                },
435                            )))
436                        }
437                        expr => Ok(expr),
438                    }
439                }
440                Some(delimiter) => {
441                    let mut new_args = Vec::with_capacity(args.len());
442                    new_args.push(typed_lit(delimiter.to_string()));
443                    let mut contiguous_scalar = None;
444                    for arg in args {
445                        match arg {
446                            // filter out null args
447                            Expr::Literal(
448                                ScalarValue::Utf8(None)
449                                | ScalarValue::LargeUtf8(None)
450                                | ScalarValue::Utf8View(None),
451                                _,
452                            ) => {}
453                            Expr::Literal(
454                                ScalarValue::Utf8(Some(v))
455                                | ScalarValue::LargeUtf8(Some(v))
456                                | ScalarValue::Utf8View(Some(v)),
457                                _,
458                            ) => match contiguous_scalar {
459                                None => contiguous_scalar = Some(v.to_string()),
460                                Some(mut pre) => {
461                                    pre += delimiter;
462                                    pre += v;
463                                    contiguous_scalar = Some(pre)
464                                }
465                            },
466                            Expr::Literal(s, _) => {
467                                return internal_err!(
468                                    "The scalar {s} should be casted to string type during the type coercion."
469                                );
470                            }
471                            // If the arg is not a literal, we should first push the current `contiguous_scalar`
472                            // to the `new_args` and reset it to None.
473                            // Then pushing this arg to the `new_args`.
474                            arg => {
475                                if let Some(val) = contiguous_scalar {
476                                    new_args.push(typed_lit(val));
477                                }
478                                new_args.push(arg.clone());
479                                contiguous_scalar = None;
480                            }
481                        }
482                    }
483                    if let Some(val) = contiguous_scalar {
484                        new_args.push(typed_lit(val));
485                    }
486
487                    Ok(ExprSimplifyResult::Simplified(Expr::ScalarFunction(
488                        ScalarFunction {
489                            func: concat_ws(),
490                            args: new_args,
491                        },
492                    )))
493                }
494                // If the delimiter is null, then the value of the whole expression is null.
495                None => {
496                    let null_scalar = match delimiter_type {
497                        DataType::LargeUtf8 => ScalarValue::LargeUtf8(None),
498                        DataType::Utf8View => ScalarValue::Utf8View(None),
499                        _ => ScalarValue::Utf8(None),
500                    };
501                    Ok(ExprSimplifyResult::Simplified(Expr::Literal(
502                        null_scalar,
503                        None,
504                    )))
505                }
506            }
507        }
508        Expr::Literal(d, _) => internal_err!(
509            "The scalar {d} should be casted to string type during the type coercion."
510        ),
511        _ => {
512            let mut args = args
513                .iter()
514                .filter(|&x| !is_null(x))
515                .cloned()
516                .collect::<Vec<Expr>>();
517            args.insert(0, delimiter.clone());
518            Ok(ExprSimplifyResult::Original(args))
519        }
520    }
521}
522
523fn is_null(expr: &Expr) -> bool {
524    match expr {
525        Expr::Literal(v, _) => v.is_null(),
526        _ => false,
527    }
528}
529
530#[cfg(test)]
531mod tests {
532    use std::sync::Arc;
533
534    use crate::string::concat_ws::ConcatWsFunc;
535    use arrow::array::{Array, ArrayRef, LargeStringArray, StringArray, StringViewArray};
536    use arrow::datatypes::DataType::{LargeUtf8, Utf8, Utf8View};
537    use arrow::datatypes::Field;
538    use datafusion_common::Result;
539    use datafusion_common::ScalarValue;
540    use datafusion_common::config::ConfigOptions;
541    use datafusion_expr::{ColumnarValue, ScalarFunctionArgs, ScalarUDFImpl};
542
543    use crate::utils::test::test_function;
544
545    #[test]
546    fn test_functions() -> Result<()> {
547        test_function!(
548            ConcatWsFunc::new(),
549            vec![
550                ColumnarValue::Scalar(ScalarValue::from("|")),
551                ColumnarValue::Scalar(ScalarValue::from("aa")),
552                ColumnarValue::Scalar(ScalarValue::from("bb")),
553                ColumnarValue::Scalar(ScalarValue::from("cc")),
554            ],
555            Ok(Some("aa|bb|cc")),
556            &str,
557            Utf8,
558            StringArray
559        );
560        test_function!(
561            ConcatWsFunc::new(),
562            vec![
563                ColumnarValue::Scalar(ScalarValue::from("|")),
564                ColumnarValue::Scalar(ScalarValue::Utf8(None)),
565            ],
566            Ok(Some("")),
567            &str,
568            Utf8,
569            StringArray
570        );
571        test_function!(
572            ConcatWsFunc::new(),
573            vec![
574                ColumnarValue::Scalar(ScalarValue::Utf8(None)),
575                ColumnarValue::Scalar(ScalarValue::from("aa")),
576                ColumnarValue::Scalar(ScalarValue::from("bb")),
577                ColumnarValue::Scalar(ScalarValue::from("cc")),
578            ],
579            Ok(None),
580            &str,
581            Utf8,
582            StringArray
583        );
584        test_function!(
585            ConcatWsFunc::new(),
586            vec![
587                ColumnarValue::Scalar(ScalarValue::from("|")),
588                ColumnarValue::Scalar(ScalarValue::from("aa")),
589                ColumnarValue::Scalar(ScalarValue::Utf8(None)),
590                ColumnarValue::Scalar(ScalarValue::from("cc")),
591            ],
592            Ok(Some("aa|cc")),
593            &str,
594            Utf8,
595            StringArray
596        );
597
598        Ok(())
599    }
600
601    #[test]
602    fn concat_ws() -> Result<()> {
603        // sep is scalar
604        let c0 = ColumnarValue::Scalar(ScalarValue::Utf8(Some(",".to_string())));
605        let c1 =
606            ColumnarValue::Array(Arc::new(StringArray::from(vec!["foo", "bar", "baz"])));
607        let c2 = ColumnarValue::Array(Arc::new(StringArray::from(vec![
608            Some("x"),
609            None,
610            Some("z"),
611        ])));
612
613        let arg_fields = vec![
614            Field::new("a", Utf8, true).into(),
615            Field::new("a", Utf8, true).into(),
616            Field::new("a", Utf8, true).into(),
617        ];
618        let args = ScalarFunctionArgs {
619            args: vec![c0, c1, c2],
620            arg_fields,
621            number_rows: 3,
622            return_field: Field::new("f", Utf8, true).into(),
623            config_options: Arc::new(ConfigOptions::default()),
624        };
625
626        let result = ConcatWsFunc::new().invoke_with_args(args)?;
627        let expected =
628            Arc::new(StringArray::from(vec!["foo,x", "bar", "baz,z"])) as ArrayRef;
629        match &result {
630            ColumnarValue::Array(array) => {
631                assert_eq!(&expected, array);
632            }
633            _ => panic!(),
634        }
635
636        // sep is nullable array
637        let c0 = ColumnarValue::Array(Arc::new(StringArray::from(vec![
638            Some(","),
639            None,
640            Some("+"),
641        ])));
642        let c1 =
643            ColumnarValue::Array(Arc::new(StringArray::from(vec!["foo", "bar", "baz"])));
644        let c2 = ColumnarValue::Array(Arc::new(StringArray::from(vec![
645            Some("x"),
646            Some("y"),
647            Some("z"),
648        ])));
649
650        let arg_fields = vec![
651            Field::new("a", Utf8, true).into(),
652            Field::new("a", Utf8, true).into(),
653            Field::new("a", Utf8, true).into(),
654        ];
655        let args = ScalarFunctionArgs {
656            args: vec![c0, c1, c2],
657            arg_fields,
658            number_rows: 3,
659            return_field: Field::new("f", Utf8, true).into(),
660            config_options: Arc::new(ConfigOptions::default()),
661        };
662
663        let result = ConcatWsFunc::new().invoke_with_args(args)?;
664        let expected =
665            Arc::new(StringArray::from(vec![Some("foo,x"), None, Some("baz+z")]))
666                as ArrayRef;
667        match &result {
668            ColumnarValue::Array(array) => {
669                assert_eq!(&expected, array);
670            }
671            _ => panic!(),
672        }
673
674        Ok(())
675    }
676
677    #[test]
678    fn concat_ws_utf8view_scalar_separator() -> Result<()> {
679        let c0 = ColumnarValue::Scalar(ScalarValue::Utf8View(Some(",".to_string())));
680        let c1 =
681            ColumnarValue::Array(Arc::new(StringArray::from(vec!["foo", "bar", "baz"])));
682        let c2 = ColumnarValue::Array(Arc::new(StringArray::from(vec![
683            Some("x"),
684            None,
685            Some("z"),
686        ])));
687
688        let arg_fields = vec![
689            Field::new("a", Utf8View, true).into(),
690            Field::new("a", Utf8, true).into(),
691            Field::new("a", Utf8, true).into(),
692        ];
693        let args = ScalarFunctionArgs {
694            args: vec![c0, c1, c2],
695            arg_fields,
696            number_rows: 3,
697            return_field: Field::new("f", Utf8View, true).into(),
698            config_options: Arc::new(ConfigOptions::default()),
699        };
700
701        let result = ConcatWsFunc::new().invoke_with_args(args)?;
702        let expected =
703            Arc::new(StringViewArray::from(vec!["foo,x", "bar", "baz,z"])) as ArrayRef;
704        match &result {
705            ColumnarValue::Array(array) => {
706                assert_eq!(&expected, array);
707            }
708            _ => panic!("Expected array result"),
709        }
710
711        Ok(())
712    }
713
714    #[test]
715    fn concat_ws_largeutf8_scalar_separator() -> Result<()> {
716        let c0 = ColumnarValue::Scalar(ScalarValue::LargeUtf8(Some(",".to_string())));
717        let c1 =
718            ColumnarValue::Array(Arc::new(StringArray::from(vec!["foo", "bar", "baz"])));
719        let c2 = ColumnarValue::Array(Arc::new(StringArray::from(vec![
720            Some("x"),
721            None,
722            Some("z"),
723        ])));
724
725        let arg_fields = vec![
726            Field::new("a", LargeUtf8, true).into(),
727            Field::new("a", Utf8, true).into(),
728            Field::new("a", Utf8, true).into(),
729        ];
730        let args = ScalarFunctionArgs {
731            args: vec![c0, c1, c2],
732            arg_fields,
733            number_rows: 3,
734            return_field: Field::new("f", LargeUtf8, true).into(),
735            config_options: Arc::new(ConfigOptions::default()),
736        };
737
738        let result = ConcatWsFunc::new().invoke_with_args(args)?;
739        let expected =
740            Arc::new(LargeStringArray::from(vec!["foo,x", "bar", "baz,z"])) as ArrayRef;
741        match &result {
742            ColumnarValue::Array(array) => {
743                assert_eq!(&expected, array);
744            }
745            _ => panic!("Expected array result"),
746        }
747
748        Ok(())
749    }
750
751    #[test]
752    fn concat_ws_utf8view_nullable_separator() -> Result<()> {
753        let c0 = ColumnarValue::Array(Arc::new(StringViewArray::from(vec![
754            Some(","),
755            None,
756            Some("+"),
757        ])));
758        let c1 = ColumnarValue::Array(Arc::new(StringViewArray::from(vec![
759            "foo", "bar", "baz",
760        ])));
761        let c2 = ColumnarValue::Array(Arc::new(StringViewArray::from(vec![
762            Some("x"),
763            Some("y"),
764            Some("z"),
765        ])));
766
767        let arg_fields = vec![
768            Field::new("a", Utf8View, true).into(),
769            Field::new("a", Utf8View, true).into(),
770            Field::new("a", Utf8View, true).into(),
771        ];
772        let args = ScalarFunctionArgs {
773            args: vec![c0, c1, c2],
774            arg_fields,
775            number_rows: 3,
776            return_field: Field::new("f", Utf8View, true).into(),
777            config_options: Arc::new(ConfigOptions::default()),
778        };
779
780        let result = ConcatWsFunc::new().invoke_with_args(args)?;
781        let expected = Arc::new(StringViewArray::from(vec![
782            Some("foo,x"),
783            None,
784            Some("baz+z"),
785        ])) as ArrayRef;
786        match &result {
787            ColumnarValue::Array(array) => {
788                assert_eq!(&expected, array);
789            }
790            _ => panic!("Expected array result"),
791        }
792
793        Ok(())
794    }
795
796    #[test]
797    fn concat_ws_largeutf8_arrays() -> Result<()> {
798        let c0 = ColumnarValue::Scalar(ScalarValue::LargeUtf8(Some(",".to_string())));
799        let c1 = ColumnarValue::Array(Arc::new(LargeStringArray::from(vec![
800            "foo", "bar", "baz",
801        ])));
802        let c2 = ColumnarValue::Array(Arc::new(LargeStringArray::from(vec![
803            Some("x"),
804            None,
805            Some("z"),
806        ])));
807
808        let arg_fields = vec![
809            Field::new("a", LargeUtf8, true).into(),
810            Field::new("a", LargeUtf8, true).into(),
811            Field::new("a", LargeUtf8, true).into(),
812        ];
813        let args = ScalarFunctionArgs {
814            args: vec![c0, c1, c2],
815            arg_fields,
816            number_rows: 3,
817            return_field: Field::new("f", LargeUtf8, true).into(),
818            config_options: Arc::new(ConfigOptions::default()),
819        };
820
821        let result = ConcatWsFunc::new().invoke_with_args(args)?;
822        let expected =
823            Arc::new(LargeStringArray::from(vec!["foo,x", "bar", "baz,z"])) as ArrayRef;
824        match &result {
825            ColumnarValue::Array(array) => {
826                assert_eq!(&expected, array);
827            }
828            _ => panic!("Expected array result"),
829        }
830
831        Ok(())
832    }
833
834    #[test]
835    fn concat_ws_utf8view_null_separator() -> Result<()> {
836        // All-scalar path: null Utf8View separator should return Utf8View(None)
837        let c0 = ColumnarValue::Scalar(ScalarValue::Utf8View(None));
838        let c1 = ColumnarValue::Scalar(ScalarValue::Utf8View(Some("aa".to_string())));
839        let c2 = ColumnarValue::Scalar(ScalarValue::Utf8View(Some("bb".to_string())));
840
841        let arg_fields = vec![
842            Field::new("a", Utf8View, true).into(),
843            Field::new("a", Utf8View, true).into(),
844            Field::new("a", Utf8View, true).into(),
845        ];
846        let args = ScalarFunctionArgs {
847            args: vec![c0, c1, c2],
848            arg_fields,
849            number_rows: 1,
850            return_field: Field::new("f", Utf8View, true).into(),
851            config_options: Arc::new(ConfigOptions::default()),
852        };
853
854        let result = ConcatWsFunc::new().invoke_with_args(args)?;
855        match result {
856            ColumnarValue::Scalar(ScalarValue::Utf8View(None)) => {}
857            other => panic!("Expected Utf8View(None), got {other:?}"),
858        }
859
860        // Array path: null Utf8View scalar separator with array args
861        let c0 = ColumnarValue::Scalar(ScalarValue::Utf8View(None));
862        let c1 =
863            ColumnarValue::Array(Arc::new(StringViewArray::from(vec!["foo", "bar"])));
864
865        let arg_fields = vec![
866            Field::new("a", Utf8View, true).into(),
867            Field::new("a", Utf8View, true).into(),
868        ];
869        let args = ScalarFunctionArgs {
870            args: vec![c0, c1],
871            arg_fields,
872            number_rows: 2,
873            return_field: Field::new("f", Utf8View, true).into(),
874            config_options: Arc::new(ConfigOptions::default()),
875        };
876
877        let result = ConcatWsFunc::new().invoke_with_args(args)?;
878        match result {
879            ColumnarValue::Scalar(ScalarValue::Utf8View(None)) => {}
880            other => panic!("Expected Utf8View(None), got {other:?}"),
881        }
882
883        Ok(())
884    }
885
886    #[test]
887    fn concat_ws_largeutf8_null_separator() -> Result<()> {
888        // All-scalar path: null LargeUtf8 separator should return LargeUtf8(None)
889        let c0 = ColumnarValue::Scalar(ScalarValue::LargeUtf8(None));
890        let c1 = ColumnarValue::Scalar(ScalarValue::LargeUtf8(Some("aa".to_string())));
891        let c2 = ColumnarValue::Scalar(ScalarValue::LargeUtf8(Some("bb".to_string())));
892
893        let arg_fields = vec![
894            Field::new("a", LargeUtf8, true).into(),
895            Field::new("a", LargeUtf8, true).into(),
896            Field::new("a", LargeUtf8, true).into(),
897        ];
898        let args = ScalarFunctionArgs {
899            args: vec![c0, c1, c2],
900            arg_fields,
901            number_rows: 1,
902            return_field: Field::new("f", LargeUtf8, true).into(),
903            config_options: Arc::new(ConfigOptions::default()),
904        };
905
906        let result = ConcatWsFunc::new().invoke_with_args(args)?;
907        match result {
908            ColumnarValue::Scalar(ScalarValue::LargeUtf8(None)) => {}
909            other => panic!("Expected LargeUtf8(None), got {other:?}"),
910        }
911
912        // Array path: null LargeUtf8 scalar separator with array args
913        let c0 = ColumnarValue::Scalar(ScalarValue::LargeUtf8(None));
914        let c1 =
915            ColumnarValue::Array(Arc::new(LargeStringArray::from(vec!["foo", "bar"])));
916
917        let arg_fields = vec![
918            Field::new("a", LargeUtf8, true).into(),
919            Field::new("a", LargeUtf8, true).into(),
920        ];
921        let args = ScalarFunctionArgs {
922            args: vec![c0, c1],
923            arg_fields,
924            number_rows: 2,
925            return_field: Field::new("f", LargeUtf8, true).into(),
926            config_options: Arc::new(ConfigOptions::default()),
927        };
928
929        let result = ConcatWsFunc::new().invoke_with_args(args)?;
930        match result {
931            ColumnarValue::Scalar(ScalarValue::LargeUtf8(None)) => {}
932            other => panic!("Expected LargeUtf8(None), got {other:?}"),
933        }
934
935        Ok(())
936    }
937}