use std::borrow::Cow;
use arrow::array::builder::StaticArrayBuilder;
use arrow::array::{Array, Utf8ViewArrayBuilder};
use arrow::datatypes::ArrowDataType;
use polars_core::prelude::{Column, DataType, IntoColumn, StringChunked};
use polars_core::scalar::Scalar;
use polars_error::{PolarsResult, polars_ensure};
use polars_utils::pl_str::PlSmallStr;
#[inline(always)]
fn opt_str_to_string(s: Option<&str>) -> &str {
s.unwrap_or("null")
}
pub fn str_format(cs: &mut [Column], format: &str, insertions: &[usize]) -> PolarsResult<Column> {
assert_eq!(cs.len(), insertions.len());
assert!(!cs.is_empty());
let output_name = cs[0].name().clone();
let mut output_length = 1;
for c in cs.iter() {
if c.len() != 1 {
polars_ensure!(
output_length == 1 || output_length == c.len(),
length_mismatch = "format",
output_length,
c.len()
);
output_length = c.len();
}
}
let mut validity = None;
let mut num_scalar_inputs = 0;
for c in cs.iter_mut() {
if let Some(c_validity) = c.rechunk_validity() {
if c.null_count() == c.len() {
return Ok(Column::full_null(
output_name,
output_length,
&DataType::String,
));
}
match &mut validity {
v @ None => *v = Some(c_validity),
Some(v) => *v = arrow::bitmap::and(v, &c_validity),
}
}
*c = c.cast(&DataType::String)?;
num_scalar_inputs += usize::from(c.len() == 1);
}
let mut format = Cow::Borrowed(format);
let mut insertions = Cow::Borrowed(insertions);
if num_scalar_inputs > 0 {
let mut filled_format = String::new();
filled_format.push_str(&format[..*insertions.first().unwrap()]);
insertions = Cow::Owned(
cs.iter()
.enumerate()
.filter_map(|(i, c)| {
let v = if c.len() == 1 {
filled_format.push_str(opt_str_to_string(c.str().unwrap().get(0)));
None
} else {
Some(filled_format.len())
};
let s = if i == cs.len() - 1 {
&format[insertions[i]..]
} else {
&format[insertions[i]..insertions[i + 1]]
};
filled_format.push_str(s);
v
})
.collect(),
);
format = filled_format.into();
}
let format = format.as_ref();
let insertions = insertions.as_ref();
if num_scalar_inputs == cs.len() {
let sc = Scalar::from(PlSmallStr::from_str(format));
return Ok(Column::new_scalar(output_name, sc, output_length));
}
let mut builder = Utf8ViewArrayBuilder::new(ArrowDataType::Utf8View);
builder.reserve(output_length);
let mut arrays = cs
.iter()
.filter(|c| c.len() != 1)
.map(|c| {
let ca = c.str().unwrap();
let mut iter = ca.downcast_iter();
let arr = iter.next().unwrap();
(iter, arr, 0)
})
.collect::<Vec<_>>();
let mut s = String::new();
for i in 0..output_length {
if validity
.as_ref()
.is_some_and(|v| !unsafe { v.get_bit_unchecked(i) })
{
unsafe { builder.push_inline_view_ignore_validity(Default::default()) };
for (iter, arr, elem_idx) in arrays.iter_mut() {
*elem_idx += 1;
if i + 1 != output_length && *elem_idx == arr.len() {
*arr = iter.next().unwrap();
*elem_idx = 0;
}
}
continue;
}
s.clear();
s.push_str(&format[..insertions[0]]);
for (j, (iter, arr, elem_idx)) in arrays.iter_mut().enumerate() {
s.push_str(opt_str_to_string(arr.get(*elem_idx)));
let start = insertions[j];
let end = insertions.get(j + 1).copied().unwrap_or(format.len());
s.push_str(&format[start..end]);
*elem_idx += 1;
if i + 1 != output_length && *elem_idx == arr.len() {
*arr = iter.next().unwrap();
*elem_idx = 0;
}
}
builder.push_value_ignore_validity(&s);
}
let array = builder.freeze().with_validity(validity).to_boxed();
Ok(unsafe { StringChunked::from_chunks(output_name, vec![array]) }.into_column())
}