datafusion_functions/regex/
regexpreplace.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! Regex expressions
19use arrow::array::ArrayDataBuilder;
20use arrow::array::BufferBuilder;
21use arrow::array::GenericStringArray;
22use arrow::array::StringViewBuilder;
23use arrow::array::{Array, ArrayRef, OffsetSizeTrait};
24use arrow::array::{ArrayAccessor, StringViewArray};
25use arrow::array::{ArrayIter, AsArray, new_null_array};
26use arrow::datatypes::DataType;
27use datafusion_common::ScalarValue;
28use datafusion_common::cast::{
29    as_large_string_array, as_string_array, as_string_view_array,
30};
31use datafusion_common::exec_err;
32use datafusion_common::plan_err;
33use datafusion_common::{
34    DataFusionError, Result, cast::as_generic_string_array, internal_err,
35};
36use datafusion_expr::ColumnarValue;
37use datafusion_expr::TypeSignature;
38use datafusion_expr::function::Hint;
39use datafusion_expr::{Documentation, ScalarUDFImpl, Signature, Volatility};
40use datafusion_macros::user_doc;
41use regex::Regex;
42use std::any::Any;
43use std::collections::HashMap;
44use std::sync::{Arc, LazyLock};
45
46#[user_doc(
47    doc_section(label = "Regular Expression Functions"),
48    description = "Replaces substrings in a string that match a [regular expression](https://docs.rs/regex/latest/regex/#syntax).",
49    syntax_example = "regexp_replace(str, regexp, replacement[, flags])",
50    sql_example = r#"```sql
51> select regexp_replace('foobarbaz', 'b(..)', 'X\\1Y', 'g');
52+------------------------------------------------------------------------+
53| regexp_replace(Utf8("foobarbaz"),Utf8("b(..)"),Utf8("X\1Y"),Utf8("g")) |
54+------------------------------------------------------------------------+
55| fooXarYXazY                                                            |
56+------------------------------------------------------------------------+
57SELECT regexp_replace('aBc', '(b|d)', 'Ab\\1a', 'i');
58+-------------------------------------------------------------------+
59| regexp_replace(Utf8("aBc"),Utf8("(b|d)"),Utf8("Ab\1a"),Utf8("i")) |
60+-------------------------------------------------------------------+
61| aAbBac                                                            |
62+-------------------------------------------------------------------+
63```
64Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/builtin_functions/regexp.rs)
65"#,
66    standard_argument(name = "str", prefix = "String"),
67    argument(
68        name = "regexp",
69        description = "Regular expression to match against.
70  Can be a constant, column, or function."
71    ),
72    argument(
73        name = "replacement",
74        description = "Replacement string expression to operate on. Can be a constant, column, or function, and any combination of operators."
75    ),
76    argument(
77        name = "flags",
78        description = r#"Optional regular expression flags that control the behavior of the regular expression. The following flags are supported:
79- **g**: (global) Search globally and don't return after the first match
80- **i**: case-insensitive: letters match both upper and lower case
81- **m**: multi-line mode: ^ and $ match begin/end of line
82- **s**: allow . to match \n
83- **R**: enables CRLF mode: when multi-line mode is enabled, \r\n is used
84- **U**: swap the meaning of x* and x*?"#
85    )
86)]
87#[derive(Debug, PartialEq, Eq, Hash)]
88pub struct RegexpReplaceFunc {
89    signature: Signature,
90}
91impl Default for RegexpReplaceFunc {
92    fn default() -> Self {
93        Self::new()
94    }
95}
96
97impl RegexpReplaceFunc {
98    pub fn new() -> Self {
99        use DataType::*;
100        use TypeSignature::*;
101        Self {
102            signature: Signature::one_of(
103                vec![
104                    Uniform(3, vec![Utf8View, LargeUtf8, Utf8]),
105                    Uniform(4, vec![Utf8View, LargeUtf8, Utf8]),
106                ],
107                Volatility::Immutable,
108            ),
109        }
110    }
111}
112
113impl ScalarUDFImpl for RegexpReplaceFunc {
114    fn as_any(&self) -> &dyn Any {
115        self
116    }
117
118    fn name(&self) -> &str {
119        "regexp_replace"
120    }
121
122    fn signature(&self) -> &Signature {
123        &self.signature
124    }
125
126    fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
127        use DataType::*;
128        Ok(match &arg_types[0] {
129            LargeUtf8 | LargeBinary => LargeUtf8,
130            Utf8 | Binary => Utf8,
131            Utf8View | BinaryView => Utf8View,
132            Null => Null,
133            Dictionary(_, t) => match **t {
134                LargeUtf8 | LargeBinary => LargeUtf8,
135                Utf8 | Binary => Utf8,
136                Null => Null,
137                _ => {
138                    return plan_err!(
139                        "the regexp_replace can only accept strings but got {:?}",
140                        **t
141                    );
142                }
143            },
144            other => {
145                return plan_err!(
146                    "The regexp_replace function can only accept strings. Got {other}"
147                );
148            }
149        })
150    }
151
152    fn invoke_with_args(
153        &self,
154        args: datafusion_expr::ScalarFunctionArgs,
155    ) -> Result<ColumnarValue> {
156        let args = &args.args;
157
158        let len = args
159            .iter()
160            .fold(Option::<usize>::None, |acc, arg| match arg {
161                ColumnarValue::Scalar(_) => acc,
162                ColumnarValue::Array(a) => Some(a.len()),
163            });
164
165        let is_scalar = len.is_none();
166        let result = regexp_replace_func(args);
167        if is_scalar {
168            // If all inputs are scalar, keeps output as scalar
169            let result = result.and_then(|arr| ScalarValue::try_from_array(&arr, 0));
170            result.map(ColumnarValue::Scalar)
171        } else {
172            result.map(ColumnarValue::Array)
173        }
174    }
175
176    fn documentation(&self) -> Option<&Documentation> {
177        self.doc()
178    }
179}
180
181fn regexp_replace_func(args: &[ColumnarValue]) -> Result<ArrayRef> {
182    match args[0].data_type() {
183        DataType::Utf8 => specialize_regexp_replace::<i32>(args),
184        DataType::LargeUtf8 => specialize_regexp_replace::<i64>(args),
185        DataType::Utf8View => specialize_regexp_replace::<i32>(args),
186        other => {
187            internal_err!("Unsupported data type {other:?} for function regexp_replace")
188        }
189    }
190}
191
192/// replace POSIX capture groups (like \1) with Rust Regex group (like ${1})
193/// used by regexp_replace
194fn regex_replace_posix_groups(replacement: &str) -> String {
195    static CAPTURE_GROUPS_RE_LOCK: LazyLock<Regex> =
196        LazyLock::new(|| Regex::new(r"(\\)(\d*)").unwrap());
197    CAPTURE_GROUPS_RE_LOCK
198        .replace_all(replacement, "$${$2}")
199        .into_owned()
200}
201
202/// Replaces substring(s) matching a PCRE-like regular expression.
203///
204/// The full list of supported features and syntax can be found at
205/// <https://docs.rs/regex/latest/regex/#syntax>
206///
207/// Supported flags with the addition of 'g' can be found at
208/// <https://docs.rs/regex/latest/regex/#grouping-and-flags>
209///
210/// # Examples
211///
212/// ```ignore
213/// # use datafusion::prelude::*;
214/// # use datafusion::error::Result;
215/// # #[tokio::main]
216/// # async fn main() -> Result<()> {
217/// let ctx = SessionContext::new();
218/// let df = ctx.read_csv("tests/data/regex.csv", CsvReadOptions::new()).await?;
219///
220/// // use the regexp_replace function to replace substring(s) without flags
221/// let df = df.with_column(
222///     "a",
223///     regexp_replace(vec![col("values"), col("patterns"), col("replacement")])
224/// )?;
225/// // use the regexp_replace function to replace substring(s) with flags
226/// let df = df.with_column(
227///     "b",
228///     regexp_replace(vec![col("values"), col("patterns"), col("replacement"), col("flags")]),
229/// )?;
230///
231/// // literals can be used as well
232/// let df = df.with_column(
233///     "c",
234///     regexp_replace(vec![lit("foobarbequebaz"), lit("(bar)(beque)"), lit(r"\2")]),
235/// )?;
236///
237/// df.show().await?;
238///
239/// # Ok(())
240/// # }
241/// ```
242pub fn regexp_replace<'a, T: OffsetSizeTrait, U>(
243    string_array: U,
244    pattern_array: U,
245    replacement_array: U,
246    flags_array: Option<U>,
247) -> Result<ArrayRef>
248where
249    U: ArrayAccessor<Item = &'a str>,
250{
251    // Default implementation for regexp_replace, assumes all args are arrays
252    // and args is a sequence of 3 or 4 elements.
253
254    // creating Regex is expensive so create hashmap for memoization
255    let mut patterns: HashMap<String, Regex> = HashMap::new();
256
257    let datatype = string_array.data_type().to_owned();
258
259    let string_array_iter = ArrayIter::new(string_array);
260    let pattern_array_iter = ArrayIter::new(pattern_array);
261    let replacement_array_iter = ArrayIter::new(replacement_array);
262
263    match flags_array {
264        None => {
265            let result_iter = string_array_iter
266                .zip(pattern_array_iter)
267                .zip(replacement_array_iter)
268                .map(|((string, pattern), replacement)| {
269                    match (string, pattern, replacement) {
270                        (Some(string), Some(pattern), Some(replacement)) => {
271                            let replacement = regex_replace_posix_groups(replacement);
272                            // if patterns hashmap already has regexp then use else create and return
273                            let re = match patterns.get(pattern) {
274                                Some(re) => Ok(re),
275                                None => match Regex::new(pattern) {
276                                    Ok(re) => {
277                                        patterns.insert(pattern.to_string(), re);
278                                        Ok(patterns.get(pattern).unwrap())
279                                    }
280                                    Err(err) => {
281                                        Err(DataFusionError::External(Box::new(err)))
282                                    }
283                                },
284                            };
285
286                            Some(re.map(|re| re.replace(string, replacement.as_str())))
287                                .transpose()
288                        }
289                        _ => Ok(None),
290                    }
291                });
292
293            match datatype {
294                DataType::Utf8 | DataType::LargeUtf8 => {
295                    let result =
296                        result_iter.collect::<Result<GenericStringArray<T>>>()?;
297                    Ok(Arc::new(result) as ArrayRef)
298                }
299                DataType::Utf8View => {
300                    let result = result_iter.collect::<Result<StringViewArray>>()?;
301                    Ok(Arc::new(result) as ArrayRef)
302                }
303                other => {
304                    exec_err!(
305                        "Unsupported data type {other:?} for function regex_replace"
306                    )
307                }
308            }
309        }
310        Some(flags_array) => {
311            let flags_array_iter = ArrayIter::new(flags_array);
312
313            let result_iter = string_array_iter
314                .zip(pattern_array_iter)
315                .zip(replacement_array_iter)
316                .zip(flags_array_iter)
317                .map(|(((string, pattern), replacement), flags)| {
318                    match (string, pattern, replacement, flags) {
319                        (Some(string), Some(pattern), Some(replacement), Some(flags)) => {
320                            let replacement = regex_replace_posix_groups(replacement);
321
322                            // format flags into rust pattern
323                            let (pattern, replace_all) = if flags == "g" {
324                                (pattern.to_string(), true)
325                            } else if flags.contains('g') {
326                                (
327                                    format!(
328                                        "(?{}){}",
329                                        flags.to_string().replace('g', ""),
330                                        pattern
331                                    ),
332                                    true,
333                                )
334                            } else {
335                                (format!("(?{flags}){pattern}"), false)
336                            };
337
338                            // if patterns hashmap already has regexp then use else create and return
339                            let re = match patterns.get(&pattern) {
340                                Some(re) => Ok(re),
341                                None => match Regex::new(pattern.as_str()) {
342                                    Ok(re) => {
343                                        patterns.insert(pattern.clone(), re);
344                                        Ok(patterns.get(&pattern).unwrap())
345                                    }
346                                    Err(err) => {
347                                        Err(DataFusionError::External(Box::new(err)))
348                                    }
349                                },
350                            };
351
352                            Some(re.map(|re| {
353                                if replace_all {
354                                    re.replace_all(string, replacement.as_str())
355                                } else {
356                                    re.replace(string, replacement.as_str())
357                                }
358                            }))
359                            .transpose()
360                        }
361                        _ => Ok(None),
362                    }
363                });
364
365            match datatype {
366                DataType::Utf8 | DataType::LargeUtf8 => {
367                    let result =
368                        result_iter.collect::<Result<GenericStringArray<T>>>()?;
369                    Ok(Arc::new(result) as ArrayRef)
370                }
371                DataType::Utf8View => {
372                    let result = result_iter.collect::<Result<StringViewArray>>()?;
373                    Ok(Arc::new(result) as ArrayRef)
374                }
375                other => {
376                    exec_err!(
377                        "Unsupported data type {other:?} for function regex_replace"
378                    )
379                }
380            }
381        }
382    }
383}
384
385/// Get the first argument from the given string array.
386///
387/// Note: If the array is empty or the first argument is null,
388/// then aborts early.
389macro_rules! fetch_string_arg {
390    ($ARG:expr, $NAME:expr, $ARRAY_SIZE:expr) => {{
391        let string_array_type = ($ARG).data_type();
392        match string_array_type {
393            dt if $ARG.len() == 0 || $ARG.is_null(0) => {
394                // Mimicking the existing behavior of regexp_replace, if any of the scalar arguments
395                // are actually null, then the result will be an array of the same size as the first argument with all nulls.
396                //
397                // Also acts like an early abort mechanism when the input array is empty.
398                return Ok(new_null_array(dt, $ARRAY_SIZE));
399            }
400            DataType::Utf8 => {
401                let array = as_string_array($ARG)?;
402                array.value(0)
403            }
404            DataType::LargeUtf8 => {
405                let array = as_large_string_array($ARG)?;
406                array.value(0)
407            }
408            DataType::Utf8View => {
409                let array = as_string_view_array($ARG)?;
410                array.value(0)
411            }
412            _ => unreachable!(
413                "Invalid data type for regexp_replace: {}",
414                string_array_type
415            ),
416        }
417    }};
418}
419/// Special cased regex_replace implementation for the scenario where
420/// the pattern, replacement and flags are static (arrays that are derived
421/// from scalars). This means we can skip regex caching system and basically
422/// hold a single Regex object for the replace operation. This also speeds
423/// up the pre-processing time of the replacement string, since it only
424/// needs to processed once.
425fn _regexp_replace_static_pattern_replace<T: OffsetSizeTrait>(
426    args: &[ArrayRef],
427) -> Result<ArrayRef> {
428    let array_size = args[0].len();
429    let pattern = fetch_string_arg!(&args[1], "pattern", array_size);
430    let replacement = fetch_string_arg!(&args[2], "replacement", array_size);
431    let flags = match args.len() {
432        3 => None,
433        4 => Some(fetch_string_arg!(&args[3], "flags", array_size)),
434        other => {
435            return exec_err!(
436                "regexp_replace was called with {other} arguments. It requires at least 3 and at most 4."
437            );
438        }
439    };
440
441    // Embed the flag (if it exists) into the pattern. Limit will determine
442    // whether this is a global match (as in replace all) or just a single
443    // replace operation.
444    let (pattern, limit) = match flags {
445        Some("g") => (pattern.to_string(), 0),
446        Some(flags) => (
447            format!("(?{}){}", flags.to_string().replace('g', ""), pattern),
448            !flags.contains('g') as usize,
449        ),
450        None => (pattern.to_string(), 1),
451    };
452
453    let re =
454        Regex::new(&pattern).map_err(|err| DataFusionError::External(Box::new(err)))?;
455
456    // Replaces the posix groups in the replacement string
457    // with rust ones.
458    let replacement = regex_replace_posix_groups(replacement);
459
460    let string_array_type = args[0].data_type();
461    match string_array_type {
462        DataType::Utf8 | DataType::LargeUtf8 => {
463            let string_array = as_generic_string_array::<T>(&args[0])?;
464
465            // We are going to create the underlying string buffer from its parts
466            // to be able to re-use the existing null buffer for sparse arrays.
467            let mut vals = BufferBuilder::<u8>::new({
468                let offsets = string_array.value_offsets();
469                (offsets[string_array.len()] - offsets[0])
470                    .to_usize()
471                    .unwrap()
472            });
473            let mut new_offsets = BufferBuilder::<T>::new(string_array.len() + 1);
474            new_offsets.append(T::zero());
475
476            string_array.iter().for_each(|val| {
477                if let Some(val) = val {
478                    let result = re.replacen(val, limit, replacement.as_str());
479                    vals.append_slice(result.as_bytes());
480                }
481                new_offsets.append(T::from_usize(vals.len()).unwrap());
482            });
483
484            let data = ArrayDataBuilder::new(GenericStringArray::<T>::DATA_TYPE)
485                .len(string_array.len())
486                .nulls(string_array.nulls().cloned())
487                .buffers(vec![new_offsets.finish(), vals.finish()])
488                .build()?;
489            let result_array = GenericStringArray::<T>::from(data);
490            Ok(Arc::new(result_array) as ArrayRef)
491        }
492        DataType::Utf8View => {
493            let string_view_array = as_string_view_array(&args[0])?;
494
495            let mut builder = StringViewBuilder::with_capacity(string_view_array.len());
496
497            for val in string_view_array.iter() {
498                if let Some(val) = val {
499                    let result = re.replacen(val, limit, replacement.as_str());
500                    builder.append_value(result);
501                } else {
502                    builder.append_null();
503                }
504            }
505
506            let result = builder.finish();
507            Ok(Arc::new(result) as ArrayRef)
508        }
509        _ => unreachable!(
510            "Invalid data type for regexp_replace: {}",
511            string_array_type
512        ),
513    }
514}
515
516/// Determine which implementation of the regexp_replace to use based
517/// on the given set of arguments.
518fn specialize_regexp_replace<T: OffsetSizeTrait>(
519    args: &[ColumnarValue],
520) -> Result<ArrayRef> {
521    // This will serve as a dispatch table where we can
522    // leverage it in order to determine whether the scalarity
523    // of the given set of arguments fits a better specialized
524    // function.
525    let (is_source_scalar, is_pattern_scalar, is_replacement_scalar, is_flags_scalar) = (
526        matches!(args[0], ColumnarValue::Scalar(_)),
527        matches!(args[1], ColumnarValue::Scalar(_)),
528        matches!(args[2], ColumnarValue::Scalar(_)),
529        // The forth argument (flags) is optional; so in the event that
530        // it is not available, we'll claim that it is scalar.
531        matches!(args.get(3), Some(ColumnarValue::Scalar(_)) | None),
532    );
533    let len = args
534        .iter()
535        .fold(Option::<usize>::None, |acc, arg| match arg {
536            ColumnarValue::Scalar(_) => acc,
537            ColumnarValue::Array(a) => Some(a.len()),
538        });
539    let inferred_length = len.unwrap_or(1);
540    match (
541        is_source_scalar,
542        is_pattern_scalar,
543        is_replacement_scalar,
544        is_flags_scalar,
545    ) {
546        // This represents a very hot path for the case where the there is
547        // a single pattern that is being matched against and a single replacement.
548        // This is extremely important to specialize on since it removes the overhead
549        // of DF's in-house regex pattern cache (since there will be at most a single
550        // pattern) and the pre-processing of the same replacement pattern at each
551        // query.
552        //
553        // The flags needs to be a scalar as well since each pattern is actually
554        // constructed with the flags embedded into the pattern itself. This means
555        // even if the pattern itself is scalar, if the flags are an array then
556        // we will create many regexes and it is best to use the implementation
557        // that caches it. If there are no flags, we can simply ignore it here,
558        // and let the specialized function handle it.
559        (_, true, true, true) => {
560            let hints = [
561                Hint::Pad,
562                Hint::AcceptsSingular,
563                Hint::AcceptsSingular,
564                Hint::AcceptsSingular,
565            ];
566            let args = args
567                .iter()
568                .zip(hints.iter().chain(std::iter::repeat(&Hint::Pad)))
569                .map(|(arg, hint)| {
570                    // Decide on the length to expand this scalar to depending
571                    // on the given hints.
572                    let expansion_len = match hint {
573                        Hint::AcceptsSingular => 1,
574                        Hint::Pad => inferred_length,
575                    };
576                    arg.to_array(expansion_len)
577                })
578                .collect::<Result<Vec<_>>>()?;
579            _regexp_replace_static_pattern_replace::<T>(&args)
580        }
581
582        // If there are no specialized implementations, we'll fall back to the
583        // generic implementation.
584        (_, _, _, _) => {
585            let args = args
586                .iter()
587                .map(|arg| arg.to_array(inferred_length))
588                .collect::<Result<Vec<_>>>()?;
589
590            match (
591                args[0].data_type(),
592                args[1].data_type(),
593                args[2].data_type(),
594                args.get(3).map(|a| a.data_type()),
595            ) {
596                (
597                    DataType::Utf8,
598                    DataType::Utf8,
599                    DataType::Utf8,
600                    Some(DataType::Utf8) | None,
601                ) => {
602                    let string_array = args[0].as_string::<i32>();
603                    let pattern_array = args[1].as_string::<i32>();
604                    let replacement_array = args[2].as_string::<i32>();
605                    let flags_array = args.get(3).map(|a| a.as_string::<i32>());
606                    regexp_replace::<i32, _>(
607                        string_array,
608                        pattern_array,
609                        replacement_array,
610                        flags_array,
611                    )
612                }
613                (
614                    DataType::Utf8View,
615                    DataType::Utf8View,
616                    DataType::Utf8View,
617                    Some(DataType::Utf8View) | None,
618                ) => {
619                    let string_array = args[0].as_string_view();
620                    let pattern_array = args[1].as_string_view();
621                    let replacement_array = args[2].as_string_view();
622                    let flags_array = args.get(3).map(|a| a.as_string_view());
623                    regexp_replace::<i32, _>(
624                        string_array,
625                        pattern_array,
626                        replacement_array,
627                        flags_array,
628                    )
629                }
630                (
631                    DataType::LargeUtf8,
632                    DataType::LargeUtf8,
633                    DataType::LargeUtf8,
634                    Some(DataType::LargeUtf8) | None,
635                ) => {
636                    let string_array = args[0].as_string::<i64>();
637                    let pattern_array = args[1].as_string::<i64>();
638                    let replacement_array = args[2].as_string::<i64>();
639                    let flags_array = args.get(3).map(|a| a.as_string::<i64>());
640                    regexp_replace::<i64, _>(
641                        string_array,
642                        pattern_array,
643                        replacement_array,
644                        flags_array,
645                    )
646                }
647                other => {
648                    exec_err!(
649                        "Unsupported data type {other:?} for function regex_replace"
650                    )
651                }
652            }
653        }
654    }
655}
656#[cfg(test)]
657mod tests {
658    use arrow::array::*;
659
660    use super::*;
661
662    macro_rules! static_pattern_regexp_replace {
663        ($name:ident, $T:ty, $O:ty) => {
664            #[test]
665            fn $name() {
666                let values = vec!["abc", "acd", "abcd1234567890123", "123456789012abc"];
667                let patterns = vec!["b"; 4];
668                let replacement = vec!["foo"; 4];
669                let expected =
670                    vec!["afooc", "acd", "afoocd1234567890123", "123456789012afooc"];
671
672                let values = <$T>::from(values);
673                let patterns = <$T>::from(patterns);
674                let replacements = <$T>::from(replacement);
675                let expected = <$T>::from(expected);
676
677                let re = _regexp_replace_static_pattern_replace::<$O>(&[
678                    Arc::new(values),
679                    Arc::new(patterns),
680                    Arc::new(replacements),
681                ])
682                .unwrap();
683
684                assert_eq!(re.as_ref(), &expected);
685            }
686        };
687    }
688
689    static_pattern_regexp_replace!(string_array, StringArray, i32);
690    static_pattern_regexp_replace!(string_view_array, StringViewArray, i32);
691    static_pattern_regexp_replace!(large_string_array, LargeStringArray, i64);
692
693    macro_rules! static_pattern_regexp_replace_with_flags {
694        ($name:ident, $T:ty, $O: ty) => {
695            #[test]
696            fn $name() {
697                let values = vec![
698                    "abc",
699                    "aBc",
700                    "acd",
701                    "abcd1234567890123",
702                    "aBcd1234567890123",
703                    "123456789012abc",
704                    "123456789012aBc",
705                ];
706                let expected = vec![
707                    "afooc",
708                    "afooc",
709                    "acd",
710                    "afoocd1234567890123",
711                    "afoocd1234567890123",
712                    "123456789012afooc",
713                    "123456789012afooc",
714                ];
715
716                let values = <$T>::from(values);
717                let patterns = StringArray::from(vec!["b"; 7]);
718                let replacements = StringArray::from(vec!["foo"; 7]);
719                let flags = StringArray::from(vec!["i"; 5]);
720                let expected = <$T>::from(expected);
721
722                let re = _regexp_replace_static_pattern_replace::<$O>(&[
723                    Arc::new(values),
724                    Arc::new(patterns),
725                    Arc::new(replacements),
726                    Arc::new(flags),
727                ])
728                .unwrap();
729
730                assert_eq!(re.as_ref(), &expected);
731            }
732        };
733    }
734
735    static_pattern_regexp_replace_with_flags!(string_array_with_flags, StringArray, i32);
736    static_pattern_regexp_replace_with_flags!(
737        string_view_array_with_flags,
738        StringViewArray,
739        i32
740    );
741    static_pattern_regexp_replace_with_flags!(
742        large_string_array_with_flags,
743        LargeStringArray,
744        i64
745    );
746
747    #[test]
748    fn test_static_pattern_regexp_replace_early_abort() {
749        let values = StringArray::from(vec!["abc"; 5]);
750        let patterns = StringArray::from(vec![None::<&str>; 5]);
751        let replacements = StringArray::from(vec!["foo"; 5]);
752        let expected = StringArray::from(vec![None::<&str>; 5]);
753
754        let re = _regexp_replace_static_pattern_replace::<i32>(&[
755            Arc::new(values),
756            Arc::new(patterns),
757            Arc::new(replacements),
758        ])
759        .unwrap();
760
761        assert_eq!(re.as_ref(), &expected);
762    }
763
764    #[test]
765    fn test_static_pattern_regexp_replace_early_abort_when_empty() {
766        let values = StringArray::from(Vec::<Option<&str>>::new());
767        let patterns = StringArray::from(Vec::<Option<&str>>::new());
768        let replacements = StringArray::from(Vec::<Option<&str>>::new());
769        let expected = StringArray::from(Vec::<Option<&str>>::new());
770
771        let re = _regexp_replace_static_pattern_replace::<i32>(&[
772            Arc::new(values),
773            Arc::new(patterns),
774            Arc::new(replacements),
775        ])
776        .unwrap();
777
778        assert_eq!(re.as_ref(), &expected);
779    }
780
781    #[test]
782    fn test_static_pattern_regexp_replace_early_abort_flags() {
783        let values = StringArray::from(vec!["abc"; 5]);
784        let patterns = StringArray::from(vec!["a"; 5]);
785        let replacements = StringArray::from(vec!["foo"; 5]);
786        let flags = StringArray::from(vec![None::<&str>; 5]);
787        let expected = StringArray::from(vec![None::<&str>; 5]);
788
789        let re = _regexp_replace_static_pattern_replace::<i32>(&[
790            Arc::new(values),
791            Arc::new(patterns),
792            Arc::new(replacements),
793            Arc::new(flags),
794        ])
795        .unwrap();
796
797        assert_eq!(re.as_ref(), &expected);
798    }
799
800    #[test]
801    fn test_static_pattern_regexp_replace_pattern_error() {
802        let values = StringArray::from(vec!["abc"; 5]);
803        // Deliberately using an invalid pattern to see how the single pattern
804        // error is propagated on regexp_replace.
805        let patterns = StringArray::from(vec!["["; 5]);
806        let replacements = StringArray::from(vec!["foo"; 5]);
807
808        let re = _regexp_replace_static_pattern_replace::<i32>(&[
809            Arc::new(values),
810            Arc::new(patterns),
811            Arc::new(replacements),
812        ]);
813        let pattern_err = re.expect_err("broken pattern should have failed");
814        assert_eq!(
815            pattern_err.strip_backtrace(),
816            "External error: regex parse error:\n    [\n    ^\nerror: unclosed character class"
817        );
818    }
819
820    #[test]
821    fn test_static_pattern_regexp_replace_with_null_buffers() {
822        let values = StringArray::from(vec![
823            Some("a"),
824            None,
825            Some("b"),
826            None,
827            Some("a"),
828            None,
829            None,
830            Some("c"),
831        ]);
832        let patterns = StringArray::from(vec!["a"; 1]);
833        let replacements = StringArray::from(vec!["foo"; 1]);
834        let expected = StringArray::from(vec![
835            Some("foo"),
836            None,
837            Some("b"),
838            None,
839            Some("foo"),
840            None,
841            None,
842            Some("c"),
843        ]);
844
845        let re = _regexp_replace_static_pattern_replace::<i32>(&[
846            Arc::new(values),
847            Arc::new(patterns),
848            Arc::new(replacements),
849        ])
850        .unwrap();
851
852        assert_eq!(re.as_ref(), &expected);
853        assert_eq!(re.null_count(), 4);
854    }
855
856    #[test]
857    fn test_static_pattern_regexp_replace_with_sliced_null_buffer() {
858        let values = StringArray::from(vec![
859            Some("a"),
860            None,
861            Some("b"),
862            None,
863            Some("a"),
864            None,
865            None,
866            Some("c"),
867        ]);
868        let values = values.slice(2, 5);
869        let patterns = StringArray::from(vec!["a"; 1]);
870        let replacements = StringArray::from(vec!["foo"; 1]);
871        let expected = StringArray::from(vec![Some("b"), None, Some("foo"), None, None]);
872
873        let re = _regexp_replace_static_pattern_replace::<i32>(&[
874            Arc::new(values),
875            Arc::new(patterns),
876            Arc::new(replacements),
877        ])
878        .unwrap();
879        assert_eq!(re.as_ref(), &expected);
880        assert_eq!(re.null_count(), 3);
881    }
882}