use std::sync::Arc;
use fsst::{Decompressor, Symbol, MAX_CODE};
use serde::{Deserialize, Serialize};
use vortex::stats::{ArrayStatisticsCompute, StatsSet};
use vortex::validity::{ArrayValidity, LogicalValidity};
use vortex::variants::{ArrayVariants, BinaryArrayTrait, Utf8ArrayTrait};
use vortex::visitor::AcceptArrayVisitor;
use vortex::{impl_encoding, Array, ArrayDType, ArrayDef, ArrayTrait, IntoCanonical};
use vortex_dtype::{DType, Nullability, PType};
use vortex_error::{vortex_bail, VortexResult};
impl_encoding!("vortex.fsst", 24u16, FSST);
static SYMBOLS_DTYPE: DType = DType::Primitive(PType::U64, Nullability::NonNullable);
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct FSSTMetadata {
symbols_len: usize,
codes_dtype: DType,
}
impl FSSTArray {
pub fn try_new(dtype: DType, symbols: Array, codes: Array) -> VortexResult<Self> {
if symbols.dtype() != &DType::Primitive(PType::U64, Nullability::NonNullable) {
vortex_bail!(InvalidArgument: "symbols array must be of type u64")
}
if symbols.len() > MAX_CODE as usize {
vortex_bail!(InvalidArgument: "symbols array must have length <= 255")
}
if !matches!(codes.dtype(), DType::Binary(_)) {
vortex_bail!(InvalidArgument: "strings array must be DType::Binary type");
}
let symbols_len = symbols.len();
let len = codes.len();
let strings_dtype = codes.dtype().clone();
let children = Arc::new([symbols, codes]);
Self::try_from_parts(
dtype,
len,
FSSTMetadata {
symbols_len,
codes_dtype: strings_dtype,
},
children,
StatsSet::new(),
)
}
pub fn symbols(&self) -> Array {
self.array()
.child(0, &SYMBOLS_DTYPE, self.metadata().symbols_len)
.expect("FSSTArray must have a symbols child array")
}
pub fn codes(&self) -> Array {
self.array()
.child(1, &self.metadata().codes_dtype, self.len())
.expect("FSSTArray must have a codes child array")
}
pub(crate) fn decompressor(&self) -> VortexResult<Decompressor> {
let symbols_array = self.symbols().into_canonical()?.into_primitive()?;
let symbols = symbols_array.maybe_null_slice::<u64>();
let symbols = unsafe { std::mem::transmute::<&[u64], &[Symbol]>(symbols) };
Ok(Decompressor::new(symbols))
}
}
impl AcceptArrayVisitor for FSSTArray {
fn accept(&self, _visitor: &mut dyn vortex::visitor::ArrayVisitor) -> VortexResult<()> {
todo!("implement this")
}
}
impl ArrayStatisticsCompute for FSSTArray {}
impl ArrayValidity for FSSTArray {
fn is_valid(&self, index: usize) -> bool {
self.codes().with_dyn(|a| a.is_valid(index))
}
fn logical_validity(&self) -> LogicalValidity {
self.codes().with_dyn(|a| a.logical_validity())
}
}
impl ArrayVariants for FSSTArray {
fn as_binary_array(&self) -> Option<&dyn BinaryArrayTrait> {
Some(self)
}
fn as_utf8_array(&self) -> Option<&dyn Utf8ArrayTrait> {
Some(self)
}
}
impl Utf8ArrayTrait for FSSTArray {}
impl BinaryArrayTrait for FSSTArray {}
impl ArrayTrait for FSSTArray {}