use arrow_array::builder::GenericStringBuilder;
use arrow_array::{Array, GenericStringArray, OffsetSizeTrait};
use arrow_data::ArrayData;
use arrow_schema::ArrowError;
use std::marker::PhantomData;
use crate::reader::tape::{Tape, TapeElement};
use crate::reader::ArrayDecoder;
const TRUE: &str = "true";
const FALSE: &str = "false";
pub struct StringArrayDecoder<O: OffsetSizeTrait> {
coerce_primitive: bool,
phantom: PhantomData<O>,
}
impl<O: OffsetSizeTrait> StringArrayDecoder<O> {
pub fn new(coerce_primitive: bool) -> Self {
Self {
coerce_primitive,
phantom: Default::default(),
}
}
}
impl<O: OffsetSizeTrait> ArrayDecoder for StringArrayDecoder<O> {
fn decode(&mut self, tape: &Tape<'_>, pos: &[u32]) -> Result<ArrayData, ArrowError> {
let coerce_primitive = self.coerce_primitive;
let mut data_capacity = 0;
for p in pos {
match tape.get(*p) {
TapeElement::String(idx) => {
data_capacity += tape.get_string(idx).len();
}
TapeElement::Null => {}
TapeElement::True if coerce_primitive => {
data_capacity += TRUE.len();
}
TapeElement::False if coerce_primitive => {
data_capacity += FALSE.len();
}
TapeElement::Number(idx) if coerce_primitive => {
data_capacity += tape.get_string(idx).len();
}
_ => return Err(tape.error(*p, "string")),
}
}
if O::from_usize(data_capacity).is_none() {
return Err(ArrowError::JsonError(format!(
"offset overflow decoding {}",
GenericStringArray::<O>::DATA_TYPE
)));
}
let mut builder =
GenericStringBuilder::<O>::with_capacity(pos.len(), data_capacity);
for p in pos {
match tape.get(*p) {
TapeElement::String(idx) => {
builder.append_value(tape.get_string(idx));
}
TapeElement::Null => builder.append_null(),
TapeElement::True if coerce_primitive => {
builder.append_value(TRUE);
}
TapeElement::False if coerce_primitive => {
builder.append_value(FALSE);
}
TapeElement::Number(idx) if coerce_primitive => {
builder.append_value(tape.get_string(idx));
}
_ => unreachable!(),
}
}
Ok(builder.finish().into_data())
}
}