1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
#[cfg(feature = "extract_jsonpath")]
mod json_path;
use crate::chunked_array::kernels::strings::string_lengths;
use crate::prelude::*;
use arrow::compute::substring::substring;
use regex::Regex;
impl Utf8Chunked {
pub fn str_lengths(&self) -> UInt32Chunked {
self.apply_kernel_cast(string_lengths)
}
pub fn contains(&self, pat: &str) -> Result<BooleanChunked> {
let reg = Regex::new(pat)?;
let f = |s| reg.is_match(s);
let mut ca: BooleanChunked = if self.null_count() == 0 {
self.into_no_null_iter().map(f).collect()
} else {
self.into_iter().map(|opt_s| opt_s.map(f)).collect()
};
ca.rename(self.name());
Ok(ca)
}
pub fn replace(&self, pat: &str, val: &str) -> Result<Utf8Chunked> {
let reg = Regex::new(pat)?;
let f = |s| reg.replace(s, val);
Ok(self.apply(f))
}
pub fn replace_all(&self, pat: &str, val: &str) -> Result<Utf8Chunked> {
let reg = Regex::new(pat)?;
let f = |s| reg.replace_all(s, val);
Ok(self.apply(f))
}
pub fn to_lowercase(&self) -> Utf8Chunked {
self.apply(|s| str::to_lowercase(s).into())
}
pub fn to_uppercase(&self) -> Utf8Chunked {
self.apply(|s| str::to_uppercase(s).into())
}
pub fn concat(&self, other: &Utf8Chunked) -> Self {
self + other
}
pub fn str_slice(&self, start: i64, length: Option<u64>) -> Result<Self> {
let chunks = self
.downcast_iter()
.map(|c| Ok(substring(c, start, &length)?.into()))
.collect::<arrow::error::Result<_>>()?;
Ok(Self::new_from_chunks(self.name(), chunks))
}
}