pub struct LiquidByteViewArray<B: FsstBacking> { /* private fields */ }Expand description
An array that stores strings using the FSST format with compact offsets:
- Dictionary keys with 2-byte keys stored in memory
- Compact offsets with variable-size residuals (1, 2, or 4 bytes) stored in memory
- Per-value prefix keys (7-byte prefix + len) stored in memory
- FSST buffer can be stored in memory or on disk
§Initialization
The recommended way to create a LiquidByteViewArray is using the from_*_array constructors
which build a compact (offset + prefix key) representation directly from Arrow inputs.
let liquid_array = LiquidByteViewArray::from_string_array(&input, compressor);Data access flow:
- Use dictionary key to index into compact offsets buffer
- Reconstruct actual offset from linear regression (predicted + residual)
- Use prefix keys for quick comparisons to avoid decompression when possible
- Decompress bytes from FSST buffer to get the full value when needed
Implementations§
Source§impl LiquidByteViewArray<FsstArray>
impl LiquidByteViewArray<FsstArray>
Sourcepub fn compare_with(&self, needle: &[u8], op: &ByteViewOperator) -> BooleanArray
pub fn compare_with(&self, needle: &[u8], op: &ByteViewOperator) -> BooleanArray
Compare with prefix optimization and fallback to Arrow operations
Source§impl<B: FsstBacking> LiquidByteViewArray<B>
impl<B: FsstBacking> LiquidByteViewArray<B>
Sourcepub fn prefix_compare_counts(
&self,
needle: &[u8],
op: &Comparison,
) -> (usize, usize, usize)
pub fn prefix_compare_counts( &self, needle: &[u8], op: &Comparison, ) -> (usize, usize, usize)
Return (selected_rows, ambiguous_rows, unique_rows) based on prefix-only comparison.
Source§impl<B: FsstBacking> LiquidByteViewArray<B>
impl<B: FsstBacking> LiquidByteViewArray<B>
Sourcepub fn from_string_view_array(
array: &StringViewArray,
compressor: Arc<Compressor>,
) -> LiquidByteViewArray<FsstArray>
pub fn from_string_view_array( array: &StringViewArray, compressor: Arc<Compressor>, ) -> LiquidByteViewArray<FsstArray>
Create a LiquidByteViewArray from an Arrow StringViewArray
Sourcepub fn from_binary_view_array(
array: &BinaryViewArray,
compressor: Arc<Compressor>,
) -> LiquidByteViewArray<FsstArray>
pub fn from_binary_view_array( array: &BinaryViewArray, compressor: Arc<Compressor>, ) -> LiquidByteViewArray<FsstArray>
Create a LiquidByteViewArray from an Arrow BinaryViewArray
Sourcepub fn from_string_array(
array: &StringArray,
compressor: Arc<Compressor>,
) -> LiquidByteViewArray<FsstArray>
pub fn from_string_array( array: &StringArray, compressor: Arc<Compressor>, ) -> LiquidByteViewArray<FsstArray>
Create a LiquidByteViewArray from an Arrow StringArray
Sourcepub fn from_binary_array(
array: &BinaryArray,
compressor: Arc<Compressor>,
) -> LiquidByteViewArray<FsstArray>
pub fn from_binary_array( array: &BinaryArray, compressor: Arc<Compressor>, ) -> LiquidByteViewArray<FsstArray>
Create a LiquidByteViewArray from an Arrow BinaryArray
Sourcepub fn train_from_string_view(
array: &StringViewArray,
) -> (Arc<Compressor>, LiquidByteViewArray<FsstArray>)
pub fn train_from_string_view( array: &StringViewArray, ) -> (Arc<Compressor>, LiquidByteViewArray<FsstArray>)
Train a compressor from an Arrow StringViewArray
Sourcepub fn train_from_binary_view(
array: &BinaryViewArray,
) -> (Arc<Compressor>, LiquidByteViewArray<FsstArray>)
pub fn train_from_binary_view( array: &BinaryViewArray, ) -> (Arc<Compressor>, LiquidByteViewArray<FsstArray>)
Train a compressor from an Arrow BinaryViewArray
Sourcepub fn train_from_arrow<T: ByteArrayType>(
array: &GenericByteArray<T>,
) -> (Arc<Compressor>, LiquidByteViewArray<FsstArray>)
pub fn train_from_arrow<T: ByteArrayType>( array: &GenericByteArray<T>, ) -> (Arc<Compressor>, LiquidByteViewArray<FsstArray>)
Train a compressor from an Arrow ByteArray.
Sourcepub unsafe fn from_unique_dict_array(
array: &DictionaryArray<UInt16Type>,
compressor: Arc<Compressor>,
) -> LiquidByteViewArray<FsstArray>
pub unsafe fn from_unique_dict_array( array: &DictionaryArray<UInt16Type>, compressor: Arc<Compressor>, ) -> LiquidByteViewArray<FsstArray>
Only used when the dictionary is read from a trusted parquet reader, which reads a trusted parquet file, written by a trusted writer.
§Safety
The caller must ensure that the values in the dictionary are unique.
Sourcepub fn train_from_arrow_dict(
array: &DictionaryArray<UInt16Type>,
) -> (Arc<Compressor>, LiquidByteViewArray<FsstArray>)
pub fn train_from_arrow_dict( array: &DictionaryArray<UInt16Type>, ) -> (Arc<Compressor>, LiquidByteViewArray<FsstArray>)
Train a compressor from an Arrow DictionaryArray.
Sourcepub fn train_compressor<'a, T: ArrayAccessor<Item = &'a str>>(
array: ArrayIter<T>,
) -> Arc<Compressor>
pub fn train_compressor<'a, T: ArrayAccessor<Item = &'a str>>( array: ArrayIter<T>, ) -> Arc<Compressor>
Train a compressor from an iterator of strings
Sourcepub fn train_compressor_bytes<'a, T: ArrayAccessor<Item = &'a [u8]>>(
array: ArrayIter<T>,
) -> Arc<Compressor>
pub fn train_compressor_bytes<'a, T: ArrayAccessor<Item = &'a [u8]>>( array: ArrayIter<T>, ) -> Arc<Compressor>
Train a compressor from an iterator of byte arrays
Source§impl LiquidByteViewArray<FsstArray>
impl LiquidByteViewArray<FsstArray>
Sourcepub fn from_bytes(
bytes: Bytes,
compressor: Arc<Compressor>,
) -> LiquidByteViewArray<FsstArray>
pub fn from_bytes( bytes: Bytes, compressor: Arc<Compressor>, ) -> LiquidByteViewArray<FsstArray>
Deserialize a LiquidByteViewArray from bytes.
Source§impl<B: FsstBacking> LiquidByteViewArray<B>
impl<B: FsstBacking> LiquidByteViewArray<B>
Sourcepub fn nulls(&self) -> Option<&NullBuffer>
pub fn nulls(&self) -> Option<&NullBuffer>
Get the nulls buffer
Sourcepub fn get_detailed_memory_usage(&self) -> ByteViewArrayMemoryUsage
pub fn get_detailed_memory_usage(&self) -> ByteViewArrayMemoryUsage
Get detailed memory usage of the byte view array
Source§impl LiquidByteViewArray<FsstArray>
impl LiquidByteViewArray<FsstArray>
Sourcepub fn to_dict_arrow(&self) -> DictionaryArray<UInt16Type>
pub fn to_dict_arrow(&self) -> DictionaryArray<UInt16Type>
Convert to Arrow DictionaryArray
Sourcepub fn to_arrow_array(&self) -> ArrayRef
pub fn to_arrow_array(&self) -> ArrayRef
Convert to Arrow array with original type
Sourcepub fn is_fsst_buffer_on_disk(&self) -> bool
pub fn is_fsst_buffer_on_disk(&self) -> bool
Check if the FSST buffer is currently stored on disk
Source§impl LiquidByteViewArray<DiskBuffer>
impl LiquidByteViewArray<DiskBuffer>
Sourcepub fn is_fsst_buffer_on_disk(&self) -> bool
pub fn is_fsst_buffer_on_disk(&self) -> bool
Check if the FSST buffer is currently stored on disk
Sourcepub async fn to_dict_arrow(&self) -> DictionaryArray<UInt16Type>
pub async fn to_dict_arrow(&self) -> DictionaryArray<UInt16Type>
Convert to Arrow DictionaryArray
Sourcepub async fn to_arrow_array(&self) -> ArrayRef
pub async fn to_arrow_array(&self) -> ArrayRef
Convert to Arrow array with original type
Trait Implementations§
Source§impl<B: Clone + FsstBacking> Clone for LiquidByteViewArray<B>
impl<B: Clone + FsstBacking> Clone for LiquidByteViewArray<B>
Source§fn clone(&self) -> LiquidByteViewArray<B>
fn clone(&self) -> LiquidByteViewArray<B>
1.0.0 · Source§fn clone_from(&mut self, source: &Self)
fn clone_from(&mut self, source: &Self)
source. Read moreSource§impl<B: FsstBacking> Debug for LiquidByteViewArray<B>
impl<B: FsstBacking> Debug for LiquidByteViewArray<B>
Source§impl LiquidArray for LiquidByteViewArray<FsstArray>
impl LiquidArray for LiquidByteViewArray<FsstArray>
Source§fn get_array_memory_size(&self) -> usize
fn get_array_memory_size(&self) -> usize
Source§fn to_arrow_array(&self) -> ArrayRef
fn to_arrow_array(&self) -> ArrayRef
Source§fn to_best_arrow_array(&self) -> ArrayRef
fn to_best_arrow_array(&self) -> ArrayRef
Source§fn try_eval_predicate(
&self,
expr: &Arc<dyn PhysicalExpr>,
filter: &BooleanBuffer,
) -> Option<BooleanArray>
fn try_eval_predicate( &self, expr: &Arc<dyn PhysicalExpr>, filter: &BooleanBuffer, ) -> Option<BooleanArray>
None if the predicate is not supported. Read moreSource§fn original_arrow_data_type(&self) -> DataType
fn original_arrow_data_type(&self) -> DataType
Source§fn data_type(&self) -> LiquidDataType
fn data_type(&self) -> LiquidDataType
Source§fn squeeze(
&self,
io: Arc<dyn SqueezeIoHandler>,
squeeze_hint: Option<&CacheExpression>,
) -> Option<(LiquidSqueezedArrayRef, Bytes)>
fn squeeze( &self, io: Arc<dyn SqueezeIoHandler>, squeeze_hint: Option<&CacheExpression>, ) -> Option<(LiquidSqueezedArrayRef, Bytes)>
LiquidHybridArrayRef and a bytes::Bytes.
Return None if the Liquid array cannot be squeezed. Read moreSource§fn filter(&self, selection: &BooleanBuffer) -> ArrayRef
fn filter(&self, selection: &BooleanBuffer) -> ArrayRef
Source§impl LiquidSqueezedArray for LiquidByteViewArray<DiskBuffer>
impl LiquidSqueezedArray for LiquidByteViewArray<DiskBuffer>
Source§fn get_array_memory_size(&self) -> usize
fn get_array_memory_size(&self) -> usize
Get the memory size of the Liquid array.
Source§fn to_arrow_array<'life0, 'async_trait>(
&'life0 self,
) -> Pin<Box<dyn Future<Output = ArrayRef> + Send + 'async_trait>>where
Self: 'async_trait,
'life0: 'async_trait,
fn to_arrow_array<'life0, 'async_trait>(
&'life0 self,
) -> Pin<Box<dyn Future<Output = ArrayRef> + Send + 'async_trait>>where
Self: 'async_trait,
'life0: 'async_trait,
Convert the Liquid array to an Arrow array.
Source§fn data_type(&self) -> LiquidDataType
fn data_type(&self) -> LiquidDataType
Get the logical data type of the Liquid array.
Source§fn filter<'life0, 'life1, 'async_trait>(
&'life0 self,
selection: &'life1 BooleanBuffer,
) -> Pin<Box<dyn Future<Output = ArrayRef> + Send + 'async_trait>>where
Self: 'async_trait,
'life0: 'async_trait,
'life1: 'async_trait,
fn filter<'life0, 'life1, 'async_trait>(
&'life0 self,
selection: &'life1 BooleanBuffer,
) -> Pin<Box<dyn Future<Output = ArrayRef> + Send + 'async_trait>>where
Self: 'async_trait,
'life0: 'async_trait,
'life1: 'async_trait,
Filter the Liquid array with a boolean array and return an arrow array.
Source§fn try_eval_predicate<'life0, 'life1, 'life2, 'async_trait>(
&'life0 self,
expr: &'life1 Arc<dyn PhysicalExpr>,
filter: &'life2 BooleanBuffer,
) -> Pin<Box<dyn Future<Output = Option<BooleanArray>> + Send + 'async_trait>>where
Self: 'async_trait,
'life0: 'async_trait,
'life1: 'async_trait,
'life2: 'async_trait,
fn try_eval_predicate<'life0, 'life1, 'life2, 'async_trait>(
&'life0 self,
expr: &'life1 Arc<dyn PhysicalExpr>,
filter: &'life2 BooleanBuffer,
) -> Pin<Box<dyn Future<Output = Option<BooleanArray>> + Send + 'async_trait>>where
Self: 'async_trait,
'life0: 'async_trait,
'life1: 'async_trait,
'life2: 'async_trait,
Try to evaluate a predicate on the Liquid array with a filter.
Returns Ok(None) if the predicate is not supported.
Note that the filter is a boolean buffer, not a boolean array, i.e., filter can’t be nullable. The returned boolean mask is nullable if the the original array is nullable.
Source§fn original_arrow_data_type(&self) -> DataType
fn original_arrow_data_type(&self) -> DataType
Source§fn to_best_arrow_array<'life0, 'async_trait>(
&'life0 self,
) -> Pin<Box<dyn Future<Output = ArrayRef> + Send + 'async_trait>>where
Self: 'async_trait,
'life0: 'async_trait,
fn to_best_arrow_array<'life0, 'async_trait>(
&'life0 self,
) -> Pin<Box<dyn Future<Output = ArrayRef> + Send + 'async_trait>>where
Self: 'async_trait,
'life0: 'async_trait,
Source§fn disk_backing(&self) -> SqueezedBacking
fn disk_backing(&self) -> SqueezedBacking
Auto Trait Implementations§
impl<B> Freeze for LiquidByteViewArray<B>where
B: Freeze,
impl<B> RefUnwindSafe for LiquidByteViewArray<B>where
B: RefUnwindSafe,
impl<B> Send for LiquidByteViewArray<B>where
B: Send,
impl<B> Sync for LiquidByteViewArray<B>where
B: Sync,
impl<B> Unpin for LiquidByteViewArray<B>where
B: Unpin,
impl<B> UnsafeUnpin for LiquidByteViewArray<B>where
B: UnsafeUnpin,
impl<B> UnwindSafe for LiquidByteViewArray<B>where
B: UnwindSafe,
Blanket Implementations§
Source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
Source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
Source§impl<T> CloneToUninit for Twhere
T: Clone,
impl<T> CloneToUninit for Twhere
T: Clone,
Source§impl<T> Instrument for T
impl<T> Instrument for T
Source§fn instrument(self, span: Span) -> Instrumented<Self>
fn instrument(self, span: Span) -> Instrumented<Self>
Source§fn in_current_span(self) -> Instrumented<Self>
fn in_current_span(self) -> Instrumented<Self>
Source§impl<T> IntoEither for T
impl<T> IntoEither for T
Source§fn into_either(self, into_left: bool) -> Either<Self, Self>
fn into_either(self, into_left: bool) -> Either<Self, Self>
self into a Left variant of Either<Self, Self>
if into_left is true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read moreSource§fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
self into a Left variant of Either<Self, Self>
if into_left(&self) returns true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read moreSource§impl<T> IntoRequest<T> for T
impl<T> IntoRequest<T> for T
Source§fn into_request(self) -> Request<T>
fn into_request(self) -> Request<T>
T in a tonic::Request