use std::collections::HashMap;
use regex::Regex;
use super::utils::{combine_validities, unary_utf8_boolean};
use crate::array::{BooleanArray, Offset, Utf8Array};
use crate::datatypes::DataType;
use crate::error::{ArrowError, Result};
use crate::{array::*, bitmap::Bitmap};
pub fn regex_match<O: Offset>(values: &Utf8Array<O>, regex: &Utf8Array<O>) -> Result<BooleanArray> {
if values.len() != regex.len() {
return Err(ArrowError::InvalidArgumentError(
"Cannot perform comparison operation on arrays of different length".to_string(),
));
}
let mut map = HashMap::new();
let validity = combine_validities(values.validity(), regex.validity());
let iterator = values.iter().zip(regex.iter()).map(|(haystack, regex)| {
if haystack.is_none() | regex.is_none() {
return Result::Ok(false);
};
let haystack = haystack.unwrap();
let regex = regex.unwrap();
let regex = if let Some(regex) = map.get(regex) {
regex
} else {
let re = Regex::new(regex).map_err(|e| {
ArrowError::InvalidArgumentError(format!(
"Unable to build regex from LIKE pattern: {}",
e
))
})?;
map.insert(regex, re);
map.get(regex).unwrap()
};
Ok(regex.is_match(haystack))
});
let new_values = Bitmap::try_from_trusted_len_iter(iterator)?;
Ok(BooleanArray::from_data(
DataType::Boolean,
new_values,
validity,
))
}
pub fn regex_match_scalar<O: Offset>(values: &Utf8Array<O>, regex: &str) -> Result<BooleanArray> {
let regex = Regex::new(regex)
.map_err(|e| ArrowError::InvalidArgumentError(format!("Unable to compile regex: {}", e)))?;
Ok(unary_utf8_boolean(values, |x| regex.is_match(x)))
}