use reifydb_core::value::column::{Column, columns::Columns, data::ColumnData};
use reifydb_type::{
util::bitvec::BitVec,
value::{container::utf8::Utf8Container, r#type::Type},
};
use crate::function::{Function, FunctionCapability, FunctionContext, FunctionInfo, error::FunctionError};
pub struct TextSubstring {
info: FunctionInfo,
}
impl Default for TextSubstring {
fn default() -> Self {
Self::new()
}
}
impl TextSubstring {
pub fn new() -> Self {
Self {
info: FunctionInfo::new("text::substring"),
}
}
}
impl Function for TextSubstring {
fn info(&self) -> &FunctionInfo {
&self.info
}
fn capabilities(&self) -> &[FunctionCapability] {
&[FunctionCapability::Scalar]
}
fn return_type(&self, _input_types: &[Type]) -> Type {
Type::Utf8
}
fn execute(&self, ctx: &FunctionContext, args: &Columns) -> Result<Columns, FunctionError> {
if args.len() != 3 {
return Err(FunctionError::ArityMismatch {
function: ctx.fragment.clone(),
expected: 3,
actual: args.len(),
});
}
let text_col = &args[0];
let start_col = &args[1];
let length_col = &args[2];
let (text_data, text_bv) = text_col.data().unwrap_option();
let (start_data, start_bv) = start_col.data().unwrap_option();
let (length_data, length_bv) = length_col.data().unwrap_option();
let row_count = text_data.len();
match (text_data, start_data, length_data) {
(
ColumnData::Utf8 {
container: text_container,
max_bytes,
},
ColumnData::Int4(start_container),
ColumnData::Int4(length_container),
) => {
let mut result_data = Vec::with_capacity(text_container.data().len());
for i in 0..row_count {
if text_container.is_defined(i)
&& start_container.is_defined(i) && length_container.is_defined(i)
{
let original_str = &text_container[i];
let start_pos = start_container.get(i).copied().unwrap_or(0);
let length = length_container.get(i).copied().unwrap_or(0);
let chars: Vec<char> = original_str.chars().collect();
let chars_len = chars.len();
let start_idx = if start_pos < 0 {
chars_len.saturating_sub((-start_pos) as usize)
} else {
start_pos as usize
};
let length_usize = if length < 0 {
0
} else {
length as usize
};
let substring = if start_idx >= chars_len {
String::new()
} else {
let end_idx = (start_idx + length_usize).min(chars_len);
chars[start_idx..end_idx].iter().collect()
};
result_data.push(substring);
} else {
result_data.push(String::new());
}
}
let result_col_data = ColumnData::Utf8 {
container: Utf8Container::new(result_data),
max_bytes: *max_bytes,
};
let mut combined_bv: Option<BitVec> = None;
for bv in [text_bv, start_bv, length_bv].into_iter().flatten() {
combined_bv = Some(match combined_bv {
Some(existing) => existing.and(bv),
None => bv.clone(),
});
}
let final_data = match combined_bv {
Some(bv) => ColumnData::Option {
inner: Box::new(result_col_data),
bitvec: bv,
},
None => result_col_data,
};
Ok(Columns::new(vec![Column::new(ctx.fragment.clone(), final_data)]))
}
(
ColumnData::Utf8 {
container: text_container,
max_bytes,
},
start_d,
length_d,
) => {
let mut result_data = Vec::with_capacity(text_container.data().len());
for i in 0..row_count {
if text_container.is_defined(i) {
let original_str = &text_container[i];
let start_pos = match start_d {
ColumnData::Int1(container) => {
container.get(i).map(|&v| v as i32).unwrap_or(0)
}
ColumnData::Int2(container) => {
container.get(i).map(|&v| v as i32).unwrap_or(0)
}
ColumnData::Int4(container) => {
container.get(i).copied().unwrap_or(0)
}
ColumnData::Int8(container) => {
container.get(i).map(|&v| v as i32).unwrap_or(0)
}
_ => 0,
};
let length = match length_d {
ColumnData::Int1(container) => {
container.get(i).map(|&v| v as i32).unwrap_or(0)
}
ColumnData::Int2(container) => {
container.get(i).map(|&v| v as i32).unwrap_or(0)
}
ColumnData::Int4(container) => {
container.get(i).copied().unwrap_or(0)
}
ColumnData::Int8(container) => {
container.get(i).map(|&v| v as i32).unwrap_or(0)
}
_ => 0,
};
let chars: Vec<char> = original_str.chars().collect();
let chars_len = chars.len();
let start_idx = if start_pos < 0 {
chars_len.saturating_sub((-start_pos) as usize)
} else {
start_pos as usize
};
let length_usize = if length < 0 {
0
} else {
length as usize
};
let substring = if start_idx >= chars_len {
String::new()
} else {
let end_idx = (start_idx + length_usize).min(chars_len);
chars[start_idx..end_idx].iter().collect()
};
result_data.push(substring);
} else {
result_data.push(String::new());
}
}
let result_col_data = ColumnData::Utf8 {
container: Utf8Container::new(result_data),
max_bytes: *max_bytes,
};
let mut combined_bv: Option<BitVec> = None;
for bv in [text_bv, start_bv, length_bv].into_iter().flatten() {
combined_bv = Some(match combined_bv {
Some(existing) => existing.and(bv),
None => bv.clone(),
});
}
let final_data = match combined_bv {
Some(bv) => ColumnData::Option {
inner: Box::new(result_col_data),
bitvec: bv,
},
None => result_col_data,
};
Ok(Columns::new(vec![Column::new(ctx.fragment.clone(), final_data)]))
}
(other, _, _) => Err(FunctionError::InvalidArgumentType {
function: ctx.fragment.clone(),
argument_index: 0,
expected: vec![Type::Utf8],
actual: other.get_type(),
}),
}
}
}