1use arrow::array::ArrayDataBuilder;
20use arrow::array::BufferBuilder;
21use arrow::array::GenericStringArray;
22use arrow::array::StringViewBuilder;
23use arrow::array::{Array, ArrayRef, OffsetSizeTrait};
24use arrow::array::{ArrayAccessor, StringViewArray};
25use arrow::array::{ArrayIter, AsArray, new_null_array};
26use arrow::datatypes::DataType;
27use datafusion_common::ScalarValue;
28use datafusion_common::cast::{
29 as_large_string_array, as_string_array, as_string_view_array,
30};
31use datafusion_common::exec_err;
32use datafusion_common::plan_err;
33use datafusion_common::{
34 DataFusionError, Result, cast::as_generic_string_array, internal_err,
35};
36use datafusion_expr::ColumnarValue;
37use datafusion_expr::TypeSignature;
38use datafusion_expr::function::Hint;
39use datafusion_expr::{Documentation, ScalarUDFImpl, Signature, Volatility};
40use datafusion_macros::user_doc;
41use regex::Regex;
42use std::any::Any;
43use std::collections::HashMap;
44use std::sync::{Arc, LazyLock};
45
46#[user_doc(
47 doc_section(label = "Regular Expression Functions"),
48 description = "Replaces substrings in a string that match a [regular expression](https://docs.rs/regex/latest/regex/#syntax).",
49 syntax_example = "regexp_replace(str, regexp, replacement[, flags])",
50 sql_example = r#"```sql
51> select regexp_replace('foobarbaz', 'b(..)', 'X\\1Y', 'g');
52+------------------------------------------------------------------------+
53| regexp_replace(Utf8("foobarbaz"),Utf8("b(..)"),Utf8("X\1Y"),Utf8("g")) |
54+------------------------------------------------------------------------+
55| fooXarYXazY |
56+------------------------------------------------------------------------+
57SELECT regexp_replace('aBc', '(b|d)', 'Ab\\1a', 'i');
58+-------------------------------------------------------------------+
59| regexp_replace(Utf8("aBc"),Utf8("(b|d)"),Utf8("Ab\1a"),Utf8("i")) |
60+-------------------------------------------------------------------+
61| aAbBac |
62+-------------------------------------------------------------------+
63```
64Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/builtin_functions/regexp.rs)
65"#,
66 standard_argument(name = "str", prefix = "String"),
67 argument(
68 name = "regexp",
69 description = "Regular expression to match against.
70 Can be a constant, column, or function."
71 ),
72 argument(
73 name = "replacement",
74 description = "Replacement string expression to operate on. Can be a constant, column, or function, and any combination of operators."
75 ),
76 argument(
77 name = "flags",
78 description = r#"Optional regular expression flags that control the behavior of the regular expression. The following flags are supported:
79- **g**: (global) Search globally and don't return after the first match
80- **i**: case-insensitive: letters match both upper and lower case
81- **m**: multi-line mode: ^ and $ match begin/end of line
82- **s**: allow . to match \n
83- **R**: enables CRLF mode: when multi-line mode is enabled, \r\n is used
84- **U**: swap the meaning of x* and x*?"#
85 )
86)]
87#[derive(Debug, PartialEq, Eq, Hash)]
88pub struct RegexpReplaceFunc {
89 signature: Signature,
90}
91impl Default for RegexpReplaceFunc {
92 fn default() -> Self {
93 Self::new()
94 }
95}
96
97impl RegexpReplaceFunc {
98 pub fn new() -> Self {
99 use DataType::*;
100 use TypeSignature::*;
101 Self {
102 signature: Signature::one_of(
103 vec![
104 Uniform(3, vec![Utf8View, LargeUtf8, Utf8]),
105 Uniform(4, vec![Utf8View, LargeUtf8, Utf8]),
106 ],
107 Volatility::Immutable,
108 ),
109 }
110 }
111}
112
113impl ScalarUDFImpl for RegexpReplaceFunc {
114 fn as_any(&self) -> &dyn Any {
115 self
116 }
117
118 fn name(&self) -> &str {
119 "regexp_replace"
120 }
121
122 fn signature(&self) -> &Signature {
123 &self.signature
124 }
125
126 fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
127 use DataType::*;
128 Ok(match &arg_types[0] {
129 LargeUtf8 | LargeBinary => LargeUtf8,
130 Utf8 | Binary => Utf8,
131 Utf8View | BinaryView => Utf8View,
132 Null => Null,
133 Dictionary(_, t) => match **t {
134 LargeUtf8 | LargeBinary => LargeUtf8,
135 Utf8 | Binary => Utf8,
136 Null => Null,
137 _ => {
138 return plan_err!(
139 "the regexp_replace can only accept strings but got {:?}",
140 **t
141 );
142 }
143 },
144 other => {
145 return plan_err!(
146 "The regexp_replace function can only accept strings. Got {other}"
147 );
148 }
149 })
150 }
151
152 fn invoke_with_args(
153 &self,
154 args: datafusion_expr::ScalarFunctionArgs,
155 ) -> Result<ColumnarValue> {
156 let args = &args.args;
157
158 let len = args
159 .iter()
160 .fold(Option::<usize>::None, |acc, arg| match arg {
161 ColumnarValue::Scalar(_) => acc,
162 ColumnarValue::Array(a) => Some(a.len()),
163 });
164
165 let is_scalar = len.is_none();
166 let result = regexp_replace_func(args);
167 if is_scalar {
168 let result = result.and_then(|arr| ScalarValue::try_from_array(&arr, 0));
170 result.map(ColumnarValue::Scalar)
171 } else {
172 result.map(ColumnarValue::Array)
173 }
174 }
175
176 fn documentation(&self) -> Option<&Documentation> {
177 self.doc()
178 }
179}
180
181fn regexp_replace_func(args: &[ColumnarValue]) -> Result<ArrayRef> {
182 match args[0].data_type() {
183 DataType::Utf8 => specialize_regexp_replace::<i32>(args),
184 DataType::LargeUtf8 => specialize_regexp_replace::<i64>(args),
185 DataType::Utf8View => specialize_regexp_replace::<i32>(args),
186 other => {
187 internal_err!("Unsupported data type {other:?} for function regexp_replace")
188 }
189 }
190}
191
192fn regex_replace_posix_groups(replacement: &str) -> String {
201 static CAPTURE_GROUPS_RE_LOCK: LazyLock<Regex> =
202 LazyLock::new(|| Regex::new(r"\\{1,2}(\d+)").unwrap());
203 CAPTURE_GROUPS_RE_LOCK
204 .replace_all(replacement, "$${$1}")
205 .into_owned()
206}
207
208pub fn regexp_replace<'a, T: OffsetSizeTrait, U>(
249 string_array: U,
250 pattern_array: U,
251 replacement_array: U,
252 flags_array: Option<U>,
253) -> Result<ArrayRef>
254where
255 U: ArrayAccessor<Item = &'a str>,
256{
257 let mut patterns: HashMap<String, Regex> = HashMap::new();
262
263 let datatype = string_array.data_type().to_owned();
264
265 let string_array_iter = ArrayIter::new(string_array);
266 let pattern_array_iter = ArrayIter::new(pattern_array);
267 let replacement_array_iter = ArrayIter::new(replacement_array);
268
269 match flags_array {
270 None => {
271 let result_iter = string_array_iter
272 .zip(pattern_array_iter)
273 .zip(replacement_array_iter)
274 .map(|((string, pattern), replacement)| {
275 match (string, pattern, replacement) {
276 (Some(string), Some(pattern), Some(replacement)) => {
277 let replacement = regex_replace_posix_groups(replacement);
278 let re = match patterns.get(pattern) {
280 Some(re) => Ok(re),
281 None => match Regex::new(pattern) {
282 Ok(re) => {
283 patterns.insert(pattern.to_string(), re);
284 Ok(patterns.get(pattern).unwrap())
285 }
286 Err(err) => {
287 Err(DataFusionError::External(Box::new(err)))
288 }
289 },
290 };
291
292 Some(re.map(|re| re.replace(string, replacement.as_str())))
293 .transpose()
294 }
295 _ => Ok(None),
296 }
297 });
298
299 match datatype {
300 DataType::Utf8 | DataType::LargeUtf8 => {
301 let result =
302 result_iter.collect::<Result<GenericStringArray<T>>>()?;
303 Ok(Arc::new(result) as ArrayRef)
304 }
305 DataType::Utf8View => {
306 let result = result_iter.collect::<Result<StringViewArray>>()?;
307 Ok(Arc::new(result) as ArrayRef)
308 }
309 other => {
310 exec_err!(
311 "Unsupported data type {other:?} for function regex_replace"
312 )
313 }
314 }
315 }
316 Some(flags_array) => {
317 let flags_array_iter = ArrayIter::new(flags_array);
318
319 let result_iter = string_array_iter
320 .zip(pattern_array_iter)
321 .zip(replacement_array_iter)
322 .zip(flags_array_iter)
323 .map(|(((string, pattern), replacement), flags)| {
324 match (string, pattern, replacement, flags) {
325 (Some(string), Some(pattern), Some(replacement), Some(flags)) => {
326 let replacement = regex_replace_posix_groups(replacement);
327
328 let (pattern, replace_all) = if flags == "g" {
330 (pattern.to_string(), true)
331 } else if flags.contains('g') {
332 (
333 format!(
334 "(?{}){}",
335 flags.to_string().replace('g', ""),
336 pattern
337 ),
338 true,
339 )
340 } else {
341 (format!("(?{flags}){pattern}"), false)
342 };
343
344 let re = match patterns.get(&pattern) {
346 Some(re) => Ok(re),
347 None => match Regex::new(pattern.as_str()) {
348 Ok(re) => {
349 patterns.insert(pattern.clone(), re);
350 Ok(patterns.get(&pattern).unwrap())
351 }
352 Err(err) => {
353 Err(DataFusionError::External(Box::new(err)))
354 }
355 },
356 };
357
358 Some(re.map(|re| {
359 if replace_all {
360 re.replace_all(string, replacement.as_str())
361 } else {
362 re.replace(string, replacement.as_str())
363 }
364 }))
365 .transpose()
366 }
367 _ => Ok(None),
368 }
369 });
370
371 match datatype {
372 DataType::Utf8 | DataType::LargeUtf8 => {
373 let result =
374 result_iter.collect::<Result<GenericStringArray<T>>>()?;
375 Ok(Arc::new(result) as ArrayRef)
376 }
377 DataType::Utf8View => {
378 let result = result_iter.collect::<Result<StringViewArray>>()?;
379 Ok(Arc::new(result) as ArrayRef)
380 }
381 other => {
382 exec_err!(
383 "Unsupported data type {other:?} for function regex_replace"
384 )
385 }
386 }
387 }
388 }
389}
390
391macro_rules! fetch_string_arg {
396 ($ARG:expr, $NAME:expr, $ARRAY_SIZE:expr) => {{
397 let string_array_type = ($ARG).data_type();
398 match string_array_type {
399 dt if $ARG.len() == 0 || $ARG.is_null(0) => {
400 return Ok(new_null_array(dt, $ARRAY_SIZE));
405 }
406 DataType::Utf8 => {
407 let array = as_string_array($ARG)?;
408 array.value(0)
409 }
410 DataType::LargeUtf8 => {
411 let array = as_large_string_array($ARG)?;
412 array.value(0)
413 }
414 DataType::Utf8View => {
415 let array = as_string_view_array($ARG)?;
416 array.value(0)
417 }
418 _ => unreachable!(
419 "Invalid data type for regexp_replace: {}",
420 string_array_type
421 ),
422 }
423 }};
424}
425fn _regexp_replace_static_pattern_replace<T: OffsetSizeTrait>(
432 args: &[ArrayRef],
433) -> Result<ArrayRef> {
434 let array_size = args[0].len();
435 let pattern = fetch_string_arg!(&args[1], "pattern", array_size);
436 let replacement = fetch_string_arg!(&args[2], "replacement", array_size);
437 let flags = match args.len() {
438 3 => None,
439 4 => Some(fetch_string_arg!(&args[3], "flags", array_size)),
440 other => {
441 return exec_err!(
442 "regexp_replace was called with {other} arguments. It requires at least 3 and at most 4."
443 );
444 }
445 };
446
447 let (pattern, limit) = match flags {
451 Some("g") => (pattern.to_string(), 0),
452 Some(flags) => (
453 format!("(?{}){}", flags.to_string().replace('g', ""), pattern),
454 !flags.contains('g') as usize,
455 ),
456 None => (pattern.to_string(), 1),
457 };
458
459 let re =
460 Regex::new(&pattern).map_err(|err| DataFusionError::External(Box::new(err)))?;
461
462 let replacement = regex_replace_posix_groups(replacement);
465
466 let string_array_type = args[0].data_type();
467 match string_array_type {
468 DataType::Utf8 | DataType::LargeUtf8 => {
469 let string_array = as_generic_string_array::<T>(&args[0])?;
470
471 let mut vals = BufferBuilder::<u8>::new({
474 let offsets = string_array.value_offsets();
475 (offsets[string_array.len()] - offsets[0])
476 .to_usize()
477 .unwrap()
478 });
479 let mut new_offsets = BufferBuilder::<T>::new(string_array.len() + 1);
480 new_offsets.append(T::zero());
481
482 string_array.iter().for_each(|val| {
483 if let Some(val) = val {
484 let result = re.replacen(val, limit, replacement.as_str());
485 vals.append_slice(result.as_bytes());
486 }
487 new_offsets.append(T::from_usize(vals.len()).unwrap());
488 });
489
490 let data = ArrayDataBuilder::new(GenericStringArray::<T>::DATA_TYPE)
491 .len(string_array.len())
492 .nulls(string_array.nulls().cloned())
493 .buffers(vec![new_offsets.finish(), vals.finish()])
494 .build()?;
495 let result_array = GenericStringArray::<T>::from(data);
496 Ok(Arc::new(result_array) as ArrayRef)
497 }
498 DataType::Utf8View => {
499 let string_view_array = as_string_view_array(&args[0])?;
500
501 let mut builder = StringViewBuilder::with_capacity(string_view_array.len());
502
503 for val in string_view_array.iter() {
504 if let Some(val) = val {
505 let result = re.replacen(val, limit, replacement.as_str());
506 builder.append_value(result);
507 } else {
508 builder.append_null();
509 }
510 }
511
512 let result = builder.finish();
513 Ok(Arc::new(result) as ArrayRef)
514 }
515 _ => unreachable!(
516 "Invalid data type for regexp_replace: {}",
517 string_array_type
518 ),
519 }
520}
521
522fn specialize_regexp_replace<T: OffsetSizeTrait>(
525 args: &[ColumnarValue],
526) -> Result<ArrayRef> {
527 let (is_source_scalar, is_pattern_scalar, is_replacement_scalar, is_flags_scalar) = (
532 matches!(args[0], ColumnarValue::Scalar(_)),
533 matches!(args[1], ColumnarValue::Scalar(_)),
534 matches!(args[2], ColumnarValue::Scalar(_)),
535 matches!(args.get(3), Some(ColumnarValue::Scalar(_)) | None),
538 );
539 let len = args
540 .iter()
541 .fold(Option::<usize>::None, |acc, arg| match arg {
542 ColumnarValue::Scalar(_) => acc,
543 ColumnarValue::Array(a) => Some(a.len()),
544 });
545 let inferred_length = len.unwrap_or(1);
546 match (
547 is_source_scalar,
548 is_pattern_scalar,
549 is_replacement_scalar,
550 is_flags_scalar,
551 ) {
552 (_, true, true, true) => {
566 let hints = [
567 Hint::Pad,
568 Hint::AcceptsSingular,
569 Hint::AcceptsSingular,
570 Hint::AcceptsSingular,
571 ];
572 let args = args
573 .iter()
574 .zip(hints.iter().chain(std::iter::repeat(&Hint::Pad)))
575 .map(|(arg, hint)| {
576 let expansion_len = match hint {
579 Hint::AcceptsSingular => 1,
580 Hint::Pad => inferred_length,
581 };
582 arg.to_array(expansion_len)
583 })
584 .collect::<Result<Vec<_>>>()?;
585 _regexp_replace_static_pattern_replace::<T>(&args)
586 }
587
588 (_, _, _, _) => {
591 let args = args
592 .iter()
593 .map(|arg| arg.to_array(inferred_length))
594 .collect::<Result<Vec<_>>>()?;
595
596 match (
597 args[0].data_type(),
598 args[1].data_type(),
599 args[2].data_type(),
600 args.get(3).map(|a| a.data_type()),
601 ) {
602 (
603 DataType::Utf8,
604 DataType::Utf8,
605 DataType::Utf8,
606 Some(DataType::Utf8) | None,
607 ) => {
608 let string_array = args[0].as_string::<i32>();
609 let pattern_array = args[1].as_string::<i32>();
610 let replacement_array = args[2].as_string::<i32>();
611 let flags_array = args.get(3).map(|a| a.as_string::<i32>());
612 regexp_replace::<i32, _>(
613 string_array,
614 pattern_array,
615 replacement_array,
616 flags_array,
617 )
618 }
619 (
620 DataType::Utf8View,
621 DataType::Utf8View,
622 DataType::Utf8View,
623 Some(DataType::Utf8View) | None,
624 ) => {
625 let string_array = args[0].as_string_view();
626 let pattern_array = args[1].as_string_view();
627 let replacement_array = args[2].as_string_view();
628 let flags_array = args.get(3).map(|a| a.as_string_view());
629 regexp_replace::<i32, _>(
630 string_array,
631 pattern_array,
632 replacement_array,
633 flags_array,
634 )
635 }
636 (
637 DataType::LargeUtf8,
638 DataType::LargeUtf8,
639 DataType::LargeUtf8,
640 Some(DataType::LargeUtf8) | None,
641 ) => {
642 let string_array = args[0].as_string::<i64>();
643 let pattern_array = args[1].as_string::<i64>();
644 let replacement_array = args[2].as_string::<i64>();
645 let flags_array = args.get(3).map(|a| a.as_string::<i64>());
646 regexp_replace::<i64, _>(
647 string_array,
648 pattern_array,
649 replacement_array,
650 flags_array,
651 )
652 }
653 other => {
654 exec_err!(
655 "Unsupported data type {other:?} for function regex_replace"
656 )
657 }
658 }
659 }
660 }
661}
662#[cfg(test)]
663mod tests {
664 use arrow::array::*;
665
666 use super::*;
667
668 #[test]
669 fn test_regex_replace_posix_groups() {
670 assert_eq!(regex_replace_posix_groups(r"\1"), "${1}");
672 assert_eq!(regex_replace_posix_groups(r"\12"), "${12}");
673 assert_eq!(regex_replace_posix_groups(r"X\1Y"), "X${1}Y");
674 assert_eq!(regex_replace_posix_groups(r"\1\2"), "${1}${2}");
675
676 assert_eq!(regex_replace_posix_groups(r"\\1"), "${1}");
678 assert_eq!(regex_replace_posix_groups(r"X\\1Y"), "X${1}Y");
679 assert_eq!(regex_replace_posix_groups(r"\\1\\2"), "${1}${2}");
680
681 assert_eq!(regex_replace_posix_groups(r"\\\1"), r"\${1}");
683 assert_eq!(regex_replace_posix_groups(r"\\\\1"), r"\\${1}");
684 assert_eq!(regex_replace_posix_groups(r"\\\1\\\\2"), r"\${1}\\${2}");
685
686 assert_eq!(regex_replace_posix_groups(r"\"), r"\");
688 assert_eq!(regex_replace_posix_groups(r"foo\bar"), r"foo\bar");
689
690 assert_eq!(regex_replace_posix_groups(r"\n"), r"\n");
692 assert_eq!(regex_replace_posix_groups(r"\t"), r"\t");
693
694 assert_eq!(regex_replace_posix_groups(r"\0"), "${0}");
698 assert_eq!(
699 regex_replace_posix_groups(r"prefix\0suffix"),
700 "prefix${0}suffix"
701 );
702 }
703
704 macro_rules! static_pattern_regexp_replace {
705 ($name:ident, $T:ty, $O:ty) => {
706 #[test]
707 fn $name() {
708 let values = vec!["abc", "acd", "abcd1234567890123", "123456789012abc"];
709 let patterns = vec!["b"; 4];
710 let replacement = vec!["foo"; 4];
711 let expected =
712 vec!["afooc", "acd", "afoocd1234567890123", "123456789012afooc"];
713
714 let values = <$T>::from(values);
715 let patterns = <$T>::from(patterns);
716 let replacements = <$T>::from(replacement);
717 let expected = <$T>::from(expected);
718
719 let re = _regexp_replace_static_pattern_replace::<$O>(&[
720 Arc::new(values),
721 Arc::new(patterns),
722 Arc::new(replacements),
723 ])
724 .unwrap();
725
726 assert_eq!(re.as_ref(), &expected);
727 }
728 };
729 }
730
731 static_pattern_regexp_replace!(string_array, StringArray, i32);
732 static_pattern_regexp_replace!(string_view_array, StringViewArray, i32);
733 static_pattern_regexp_replace!(large_string_array, LargeStringArray, i64);
734
735 macro_rules! static_pattern_regexp_replace_with_flags {
736 ($name:ident, $T:ty, $O: ty) => {
737 #[test]
738 fn $name() {
739 let values = vec![
740 "abc",
741 "aBc",
742 "acd",
743 "abcd1234567890123",
744 "aBcd1234567890123",
745 "123456789012abc",
746 "123456789012aBc",
747 ];
748 let expected = vec![
749 "afooc",
750 "afooc",
751 "acd",
752 "afoocd1234567890123",
753 "afoocd1234567890123",
754 "123456789012afooc",
755 "123456789012afooc",
756 ];
757
758 let values = <$T>::from(values);
759 let patterns = StringArray::from(vec!["b"; 7]);
760 let replacements = StringArray::from(vec!["foo"; 7]);
761 let flags = StringArray::from(vec!["i"; 5]);
762 let expected = <$T>::from(expected);
763
764 let re = _regexp_replace_static_pattern_replace::<$O>(&[
765 Arc::new(values),
766 Arc::new(patterns),
767 Arc::new(replacements),
768 Arc::new(flags),
769 ])
770 .unwrap();
771
772 assert_eq!(re.as_ref(), &expected);
773 }
774 };
775 }
776
777 static_pattern_regexp_replace_with_flags!(string_array_with_flags, StringArray, i32);
778 static_pattern_regexp_replace_with_flags!(
779 string_view_array_with_flags,
780 StringViewArray,
781 i32
782 );
783 static_pattern_regexp_replace_with_flags!(
784 large_string_array_with_flags,
785 LargeStringArray,
786 i64
787 );
788
789 #[test]
790 fn test_static_pattern_regexp_replace_early_abort() {
791 let values = StringArray::from(vec!["abc"; 5]);
792 let patterns = StringArray::from(vec![None::<&str>; 5]);
793 let replacements = StringArray::from(vec!["foo"; 5]);
794 let expected = StringArray::from(vec![None::<&str>; 5]);
795
796 let re = _regexp_replace_static_pattern_replace::<i32>(&[
797 Arc::new(values),
798 Arc::new(patterns),
799 Arc::new(replacements),
800 ])
801 .unwrap();
802
803 assert_eq!(re.as_ref(), &expected);
804 }
805
806 #[test]
807 fn test_static_pattern_regexp_replace_early_abort_when_empty() {
808 let values = StringArray::from(Vec::<Option<&str>>::new());
809 let patterns = StringArray::from(Vec::<Option<&str>>::new());
810 let replacements = StringArray::from(Vec::<Option<&str>>::new());
811 let expected = StringArray::from(Vec::<Option<&str>>::new());
812
813 let re = _regexp_replace_static_pattern_replace::<i32>(&[
814 Arc::new(values),
815 Arc::new(patterns),
816 Arc::new(replacements),
817 ])
818 .unwrap();
819
820 assert_eq!(re.as_ref(), &expected);
821 }
822
823 #[test]
824 fn test_static_pattern_regexp_replace_early_abort_flags() {
825 let values = StringArray::from(vec!["abc"; 5]);
826 let patterns = StringArray::from(vec!["a"; 5]);
827 let replacements = StringArray::from(vec!["foo"; 5]);
828 let flags = StringArray::from(vec![None::<&str>; 5]);
829 let expected = StringArray::from(vec![None::<&str>; 5]);
830
831 let re = _regexp_replace_static_pattern_replace::<i32>(&[
832 Arc::new(values),
833 Arc::new(patterns),
834 Arc::new(replacements),
835 Arc::new(flags),
836 ])
837 .unwrap();
838
839 assert_eq!(re.as_ref(), &expected);
840 }
841
842 #[test]
843 fn test_static_pattern_regexp_replace_pattern_error() {
844 let values = StringArray::from(vec!["abc"; 5]);
845 let patterns = StringArray::from(vec!["["; 5]);
848 let replacements = StringArray::from(vec!["foo"; 5]);
849
850 let re = _regexp_replace_static_pattern_replace::<i32>(&[
851 Arc::new(values),
852 Arc::new(patterns),
853 Arc::new(replacements),
854 ]);
855 let pattern_err = re.expect_err("broken pattern should have failed");
856 assert_eq!(
857 pattern_err.strip_backtrace(),
858 "External error: regex parse error:\n [\n ^\nerror: unclosed character class"
859 );
860 }
861
862 #[test]
863 fn test_static_pattern_regexp_replace_with_null_buffers() {
864 let values = StringArray::from(vec![
865 Some("a"),
866 None,
867 Some("b"),
868 None,
869 Some("a"),
870 None,
871 None,
872 Some("c"),
873 ]);
874 let patterns = StringArray::from(vec!["a"; 1]);
875 let replacements = StringArray::from(vec!["foo"; 1]);
876 let expected = StringArray::from(vec![
877 Some("foo"),
878 None,
879 Some("b"),
880 None,
881 Some("foo"),
882 None,
883 None,
884 Some("c"),
885 ]);
886
887 let re = _regexp_replace_static_pattern_replace::<i32>(&[
888 Arc::new(values),
889 Arc::new(patterns),
890 Arc::new(replacements),
891 ])
892 .unwrap();
893
894 assert_eq!(re.as_ref(), &expected);
895 assert_eq!(re.null_count(), 4);
896 }
897
898 #[test]
899 fn test_static_pattern_regexp_replace_with_sliced_null_buffer() {
900 let values = StringArray::from(vec![
901 Some("a"),
902 None,
903 Some("b"),
904 None,
905 Some("a"),
906 None,
907 None,
908 Some("c"),
909 ]);
910 let values = values.slice(2, 5);
911 let patterns = StringArray::from(vec!["a"; 1]);
912 let replacements = StringArray::from(vec!["foo"; 1]);
913 let expected = StringArray::from(vec![Some("b"), None, Some("foo"), None, None]);
914
915 let re = _regexp_replace_static_pattern_replace::<i32>(&[
916 Arc::new(values),
917 Arc::new(patterns),
918 Arc::new(replacements),
919 ])
920 .unwrap();
921 assert_eq!(re.as_ref(), &expected);
922 assert_eq!(re.null_count(), 3);
923 }
924}