use polars_arrow::array::ValueSize;
#[cfg(feature = "dtype-struct")]
use polars_arrow::export::arrow::array::{MutableArray, MutableUtf8Array};
use super::function_expr::StringFunction;
use super::*;
pub struct StringNameSpace(pub(crate) Expr);
impl StringNameSpace {
pub fn contains_literal<S: AsRef<str>>(self, pat: S) -> Expr {
let pat = pat.as_ref().into();
self.0
.map_private(StringFunction::Contains { pat, literal: true }.into())
}
pub fn contains<S: AsRef<str>>(self, pat: S) -> Expr {
let pat = pat.as_ref().into();
self.0.map_private(
StringFunction::Contains {
pat,
literal: false,
}
.into(),
)
}
pub fn ends_with<S: AsRef<str>>(self, sub: S) -> Expr {
let sub = sub.as_ref().into();
self.0.map_private(StringFunction::EndsWith(sub).into())
}
pub fn starts_with<S: AsRef<str>>(self, sub: S) -> Expr {
let sub = sub.as_ref().into();
self.0.map_private(StringFunction::StartsWith(sub).into())
}
pub fn extract(self, pat: &str, group_index: usize) -> Expr {
let pat = pat.to_string();
self.0
.map_private(StringFunction::Extract { pat, group_index }.into())
}
#[cfg(feature = "string_justify")]
#[cfg_attr(docsrs, doc(cfg(feature = "string_justify")))]
pub fn zfill(self, alignment: usize) -> Expr {
self.0.map_private(StringFunction::Zfill(alignment).into())
}
#[cfg(feature = "string_justify")]
#[cfg_attr(docsrs, doc(cfg(feature = "string_justify")))]
pub fn ljust(self, width: usize, fillchar: char) -> Expr {
self.0
.map_private(StringFunction::LJust { width, fillchar }.into())
}
#[cfg(feature = "string_justify")]
#[cfg_attr(docsrs, doc(cfg(feature = "string_justify")))]
pub fn rjust(self, width: usize, fillchar: char) -> Expr {
self.0
.map_private(StringFunction::RJust { width, fillchar }.into())
}
pub fn extract_all(self, pat: Expr) -> Expr {
self.0
.map_many_private(StringFunction::ExtractAll.into(), &[pat], false)
}
pub fn count_match(self, pat: &str) -> Expr {
let pat = pat.to_string();
self.0.map_private(StringFunction::CountMatch(pat).into())
}
#[cfg(feature = "temporal")]
pub fn strptime(self, options: StrpTimeOptions) -> Expr {
self.0.map_private(StringFunction::Strptime(options).into())
}
#[cfg(feature = "concat_str")]
pub fn concat(self, delimiter: &str) -> Expr {
let delimiter = delimiter.to_owned();
Expr::Function {
input: vec![self.0],
function: StringFunction::ConcatVertical(delimiter).into(),
options: FunctionOptions {
collect_groups: ApplyOptions::ApplyGroups,
input_wildcard_expansion: false,
auto_explode: true,
..Default::default()
},
}
}
pub fn split(self, by: &str) -> Expr {
let by = by.to_string();
let function = move |s: Series| {
let ca = s.utf8()?;
let mut builder = ListUtf8ChunkedBuilder::new(s.name(), s.len(), ca.get_values_size());
ca.into_iter().for_each(|opt_s| match opt_s {
None => builder.append_null(),
Some(s) => {
let iter = s.split(&by);
builder.append_values_iter(iter);
}
});
Ok(builder.finish().into_series())
};
self.0
.map(
function,
GetOutput::from_type(DataType::List(Box::new(DataType::Utf8))),
)
.with_fmt("str.split")
}
pub fn split_inclusive(self, by: &str) -> Expr {
let by = by.to_string();
let function = move |s: Series| {
let ca = s.utf8()?;
let mut builder = ListUtf8ChunkedBuilder::new(s.name(), s.len(), ca.get_values_size());
ca.into_iter().for_each(|opt_s| match opt_s {
None => builder.append_null(),
Some(s) => {
let iter = s.split_inclusive(&by);
builder.append_values_iter(iter);
}
});
Ok(builder.finish().into_series())
};
self.0
.map(
function,
GetOutput::from_type(DataType::List(Box::new(DataType::Utf8))),
)
.with_fmt("str.split_inclusive")
}
#[cfg(feature = "dtype-struct")]
pub fn split_exact(self, by: &str, n: usize) -> Expr {
let by = by.to_string();
let function = move |s: Series| {
let ca = s.utf8()?;
let mut arrs = (0..n + 1)
.map(|_| MutableUtf8Array::<i64>::with_capacity(ca.len()))
.collect::<Vec<_>>();
ca.into_iter().for_each(|opt_s| match opt_s {
None => {
for arr in &mut arrs {
arr.push_null()
}
}
Some(s) => {
let mut arr_iter = arrs.iter_mut();
let split_iter = s.split(&by);
(split_iter)
.zip(&mut arr_iter)
.for_each(|(splitted, arr)| arr.push(Some(splitted)));
for arr in arr_iter {
arr.push_null()
}
}
});
let fields = arrs
.into_iter()
.enumerate()
.map(|(i, mut arr)| {
Series::try_from((format!("field_{i}").as_str(), arr.as_box())).unwrap()
})
.collect::<Vec<_>>();
Ok(StructChunked::new(ca.name(), &fields)?.into_series())
};
self.0
.map(
function,
GetOutput::from_type(DataType::Struct(
(0..n + 1)
.map(|i| Field::from_owned(format!("field_{i}"), DataType::Utf8))
.collect(),
)),
)
.with_fmt("str.split_exact")
}
#[cfg(feature = "dtype-struct")]
pub fn split_exact_inclusive(self, by: &str, n: usize) -> Expr {
let by = by.to_string();
let function = move |s: Series| {
let ca = s.utf8()?;
let mut arrs = (0..n + 1)
.map(|_| MutableUtf8Array::<i64>::with_capacity(ca.len()))
.collect::<Vec<_>>();
ca.into_iter().for_each(|opt_s| match opt_s {
None => {
for arr in &mut arrs {
arr.push_null()
}
}
Some(s) => {
let mut arr_iter = arrs.iter_mut();
let split_iter = s.split_inclusive(&by);
(split_iter)
.zip(&mut arr_iter)
.for_each(|(splitted, arr)| arr.push(Some(splitted)));
for arr in arr_iter {
arr.push_null()
}
}
});
let fields = arrs
.into_iter()
.enumerate()
.map(|(i, mut arr)| {
Series::try_from((format!("field_{i}").as_str(), arr.as_box())).unwrap()
})
.collect::<Vec<_>>();
Ok(StructChunked::new(ca.name(), &fields)?.into_series())
};
self.0
.map(
function,
GetOutput::from_type(DataType::Struct(
(0..n + 1)
.map(|i| Field::from_owned(format!("field_{i}"), DataType::Utf8))
.collect(),
)),
)
.with_fmt("str.split_exact")
}
#[cfg(feature = "dtype-struct")]
pub fn splitn(self, by: &str, n: usize) -> Expr {
let by = by.to_string();
let function = move |s: Series| {
let ca = s.utf8()?;
let mut arrs = (0..n)
.map(|_| MutableUtf8Array::<i64>::with_capacity(ca.len()))
.collect::<Vec<_>>();
ca.into_iter().for_each(|opt_s| match opt_s {
None => {
for arr in &mut arrs {
arr.push_null()
}
}
Some(s) => {
let mut arr_iter = arrs.iter_mut();
let split_iter = s.splitn(n, &by);
(split_iter)
.zip(&mut arr_iter)
.for_each(|(splitted, arr)| arr.push(Some(splitted)));
for arr in arr_iter {
arr.push_null()
}
}
});
let fields = arrs
.into_iter()
.enumerate()
.map(|(i, mut arr)| {
Series::try_from((format!("field_{i}").as_str(), arr.as_box())).unwrap()
})
.collect::<Vec<_>>();
Ok(StructChunked::new(ca.name(), &fields)?.into_series())
};
self.0
.map(
function,
GetOutput::from_type(DataType::Struct(
(0..n)
.map(|i| Field::from_owned(format!("field_{i}"), DataType::Utf8))
.collect(),
)),
)
.with_fmt("str.splitn")
}
#[cfg(feature = "regex")]
pub fn replace(self, pat: Expr, value: Expr, literal: bool) -> Expr {
self.0.map_many_private(
FunctionExpr::StringExpr(StringFunction::Replace {
all: false,
literal,
}),
&[pat, value],
true,
)
}
#[cfg(feature = "regex")]
pub fn replace_all(self, pat: Expr, value: Expr, literal: bool) -> Expr {
self.0.map_many_private(
FunctionExpr::StringExpr(StringFunction::Replace { all: true, literal }),
&[pat, value],
true,
)
}
pub fn strip(self, matches: Option<char>) -> Expr {
self.0
.map_private(FunctionExpr::StringExpr(StringFunction::Strip(matches)))
}
pub fn lstrip(self, matches: Option<char>) -> Expr {
self.0
.map_private(FunctionExpr::StringExpr(StringFunction::LStrip(matches)))
}
pub fn rstrip(self, matches: Option<char>) -> Expr {
self.0
.map_private(FunctionExpr::StringExpr(StringFunction::RStrip(matches)))
}
pub fn to_lowercase(self) -> Expr {
self.0
.map_private(FunctionExpr::StringExpr(StringFunction::Lowercase))
}
pub fn to_uppercase(self) -> Expr {
self.0
.map_private(FunctionExpr::StringExpr(StringFunction::Uppercase))
}
}