use super::*;
use arrow::array::{
Array, ArrayRef, BooleanArray, DictionaryArray, StringArray, UInt16Array, cast::AsArray,
types::UInt16Type,
};
use arrow::buffer::{BooleanBuffer, NullBuffer, ScalarBuffer};
use arrow_schema::DataType;
use rand::{RngExt as _, SeedableRng};
use std::sync::Arc;
use crate::cache::transcode_liquid_inner_with_hint;
use crate::cache::{CacheExpression, LiquidCompressorStates, TestSqueezeIo};
use crate::liquid_array::byte_view_array::operator::{
ByteViewOperator, Comparison, Equality, SubString,
};
use crate::liquid_array::raw::fsst_buffer::{DiskBuffer, FsstArray, PrefixKey};
use crate::liquid_array::{LiquidArray, LiquidDataType, LiquidSqueezedArray};
#[test]
fn test_dictionary_view_structure() {
let prefix_key = PrefixKey::from_parts([1, 2, 3, 4, 5, 6, 7], 7);
assert_eq!(prefix_key.prefix7(), &[1, 2, 3, 4, 5, 6, 7]);
assert_eq!(prefix_key.len_byte(), 7);
let keys = UInt16Array::from(vec![42, 100, 255]);
assert_eq!(keys.value(0), 42);
assert_eq!(keys.value(1), 100);
assert_eq!(keys.value(2), 255);
}
#[test]
fn test_original_arrow_data_type_returns_utf8() {
let input = StringArray::from(vec!["foo", "bar"]);
let compressor = LiquidByteViewArray::<FsstArray>::train_compressor(input.iter());
let array = LiquidByteViewArray::<FsstArray>::from_string_array(&input, compressor);
assert_eq!(array.original_arrow_data_type(), DataType::Utf8);
}
#[test]
fn test_hybrid_original_arrow_data_type_returns_utf8() {
let input = StringArray::from(vec!["foo", "bar"]);
let compressor = LiquidByteViewArray::<FsstArray>::train_compressor(input.iter());
let in_memory = LiquidByteViewArray::<FsstArray>::from_string_array(&input, compressor);
let (hybrid, _) = in_memory
.squeeze(
Arc::new(TestSqueezeIo::default()),
Some(&CacheExpression::PredicateColumn),
)
.expect("squeeze should succeed");
let disk_view = hybrid
.as_any()
.downcast_ref::<LiquidByteViewArray<DiskBuffer>>()
.expect("should downcast to disk array");
assert_eq!(disk_view.original_arrow_data_type(), DataType::Utf8);
}
#[test]
fn test_squeeze_builds_string_fingerprints() {
let input = StringArray::from(vec!["alpha", "beta", "alphabet"]);
let compressor = LiquidByteViewArray::<FsstArray>::train_compressor(input.iter());
let in_memory = LiquidByteViewArray::<FsstArray>::from_string_array(&input, compressor);
let (hybrid, _) = in_memory
.squeeze(
Arc::new(TestSqueezeIo::default()),
Some(&CacheExpression::substring_search()),
)
.expect("squeeze should succeed");
let disk_view = hybrid
.as_any()
.downcast_ref::<LiquidByteViewArray<DiskBuffer>>()
.expect("should downcast to disk array");
assert!(disk_view.string_fingerprints.is_some());
}
#[test]
fn test_ipc_roundtrip_preserves_string_fingerprints() {
let input = StringArray::from(vec!["alpha", "beta", "alphabet"]);
let array: ArrayRef = Arc::new(input);
let state = LiquidCompressorStates::new();
let liquid = transcode_liquid_inner_with_hint(
&array,
&state,
Some(&CacheExpression::substring_search()),
)
.expect("transcode should succeed");
let byte_view = liquid
.as_any()
.downcast_ref::<LiquidByteViewArray<FsstArray>>()
.expect("expected byte view array");
assert!(byte_view.string_fingerprints.is_some());
let bytes = byte_view.to_bytes();
let decoded = LiquidByteViewArray::<FsstArray>::from_bytes(
bytes.into(),
byte_view.fsst_buffer.compressor_arc(),
);
assert!(decoded.string_fingerprints.is_some());
assert_eq!(
byte_view.string_fingerprints.as_ref().unwrap().as_ref(),
decoded.string_fingerprints.as_ref().unwrap().as_ref()
);
}
#[tokio::test]
async fn test_string_fingerprint_skips_disk_read_for_impossible_substring() {
let input = StringArray::from(vec!["alpha", "ALP", "beta", "gamma"]);
let compressor = LiquidByteViewArray::<FsstArray>::train_compressor(input.iter());
let in_memory = LiquidByteViewArray::<FsstArray>::from_string_array(&input, compressor);
let io = Arc::new(TestSqueezeIo::default());
let (hybrid, bytes) = in_memory
.squeeze(io.clone(), Some(&CacheExpression::substring_search()))
.expect("squeeze should succeed");
io.set_bytes(bytes);
let disk_view = hybrid
.as_any()
.downcast_ref::<LiquidByteViewArray<DiskBuffer>>()
.expect("should downcast to disk array");
let fingerprints = disk_view
.string_fingerprints
.as_ref()
.expect("fingerprints should be present");
let result = disk_view
.compare_like_substring(b"zzz", SubString::Contains, fingerprints)
.await;
let expected = BooleanArray::from(vec![false, false, false, false]);
assert_eq!(result, expected);
assert_eq!(io.reads(), 0);
let result = disk_view
.compare_like_substring(b"alp", SubString::Contains, fingerprints)
.await;
let expected = BooleanArray::from(vec![true, false, false, false]);
assert_eq!(result, expected);
assert_eq!(io.reads(), 1);
}
#[test]
fn test_ipc_roundtrip_sliced_dictionary_nulls() {
let values: ArrayRef = Arc::new(StringArray::from(vec!["a", "b", "c", "d"]));
let keys = UInt16Array::from(vec![
Some(0u16),
None,
Some(2),
Some(1),
None,
Some(3),
Some(0),
Some(2),
Some(1),
]);
let dict = DictionaryArray::<UInt16Type>::new(keys, values);
let sliced = dict.slice(1, 7);
let compressor = LiquidByteViewArray::<FsstArray>::train_compressor(
sliced.values().as_string::<i32>().iter(),
);
let original = unsafe {
LiquidByteViewArray::<FsstArray>::from_unique_dict_array(&sliced, compressor.clone())
};
let before = original.to_arrow_array();
let bytes = original.to_bytes();
let decoded = LiquidByteViewArray::<FsstArray>::from_bytes(bytes.into(), compressor);
let after = decoded.to_arrow_array();
assert_eq!(before.as_ref(), after.as_ref());
}
#[test]
fn test_prefix_extraction() {
let input = StringArray::from(vec!["hello", "world", "test"]);
let compressor = LiquidByteViewArray::<FsstArray>::train_compressor(input.iter());
let liquid_array = LiquidByteViewArray::<FsstArray>::from_string_array(&input, compressor);
assert_eq!(liquid_array.shared_prefix, Vec::<u8>::new());
assert_eq!(liquid_array.prefix_keys[0].prefix7(), b"hello\0\0");
assert_eq!(liquid_array.prefix_keys[1].prefix7(), b"world\0\0");
assert_eq!(liquid_array.prefix_keys[2].prefix7(), b"test\0\0\0");
}
#[test]
fn test_shared_prefix_functionality() {
let input = StringArray::from(vec![
"hello_world",
"hello_rust",
"hello_test",
"hello_code",
]);
let compressor = LiquidByteViewArray::<FsstArray>::train_compressor(input.iter());
let liquid_array = LiquidByteViewArray::<FsstArray>::from_string_array(&input, compressor);
assert_eq!(liquid_array.shared_prefix, b"hello_");
assert_eq!(liquid_array.prefix_keys[0].prefix7(), b"world\0\0");
assert_eq!(liquid_array.prefix_keys[1].prefix7(), b"rust\0\0\0");
assert_eq!(liquid_array.prefix_keys[2].prefix7(), b"test\0\0\0");
assert_eq!(liquid_array.prefix_keys[3].prefix7(), b"code\0\0\0");
let output = liquid_array.to_arrow_array();
assert_eq!(&input, output.as_string::<i32>());
let result = liquid_array.compare_equals(b"hello_rust");
let expected = BooleanArray::from(vec![false, true, false, false]);
assert_eq!(result, expected);
let result = liquid_array.compare_equals(b"goodbye_world");
let expected = BooleanArray::from(vec![false, false, false, false]);
assert_eq!(result, expected);
let result = liquid_array.compare_equals(b"hello_");
let expected = BooleanArray::from(vec![false, false, false, false]);
assert_eq!(result, expected);
}
#[test]
fn test_shared_prefix_with_short_strings() {
let input = StringArray::from(vec!["abc", "abcde", "abcdef", "abcdefg"]);
let compressor = LiquidByteViewArray::<FsstArray>::train_compressor(input.iter());
let liquid_array = LiquidByteViewArray::<FsstArray>::from_string_array(&input, compressor);
assert_eq!(liquid_array.shared_prefix, b"abc");
assert_eq!(liquid_array.prefix_keys[0].prefix7(), &[0u8; 7]); assert_eq!(liquid_array.prefix_keys[1].prefix7(), b"de\0\0\0\0\0"); assert_eq!(liquid_array.prefix_keys[2].prefix7(), b"def\0\0\0\0"); assert_eq!(liquid_array.prefix_keys[3].prefix7(), b"defg\0\0\0");
let output = liquid_array.to_arrow_array();
assert_eq!(&input, output.as_string::<i32>());
let result = liquid_array.compare_equals(b"abc");
let expected = BooleanArray::from(vec![true, false, false, false]);
assert_eq!(result, expected);
let result = liquid_array.compare_equals(b"abcde");
let expected = BooleanArray::from(vec![false, true, false, false]);
assert_eq!(result, expected);
let result = liquid_array.compare_with(b"ab", &ByteViewOperator::Comparison(Comparison::Gt));
let expected = BooleanArray::from(vec![true, true, true, true]); assert_eq!(result, expected);
let result = liquid_array.compare_with(b"abcd", &ByteViewOperator::Comparison(Comparison::Lt));
let expected = BooleanArray::from(vec![true, false, false, false]); assert_eq!(result, expected);
}
#[test]
fn test_shared_prefix_contains_complete_strings() {
let input = StringArray::from(vec!["data", "database", "data_entry", "data_", "datatype"]);
let compressor = LiquidByteViewArray::<FsstArray>::train_compressor(input.iter());
let liquid_array = LiquidByteViewArray::<FsstArray>::from_string_array(&input, compressor);
assert_eq!(liquid_array.shared_prefix, b"data");
assert_eq!(liquid_array.prefix_keys[0].prefix7(), &[0u8; 7]); assert_eq!(liquid_array.prefix_keys[1].prefix7(), b"base\0\0\0"); assert_eq!(liquid_array.prefix_keys[2].prefix7(), b"_entry\0"); assert_eq!(liquid_array.prefix_keys[3].prefix7(), b"_\0\0\0\0\0\0"); assert_eq!(liquid_array.prefix_keys[4].prefix7(), b"type\0\0\0");
let output = liquid_array.to_arrow_array();
assert_eq!(&input, output.as_string::<i32>());
let result = liquid_array.compare_equals(b"data");
let expected = BooleanArray::from(vec![true, false, false, false, false]);
assert_eq!(result, expected);
let result = liquid_array.compare_with(b"dat", &ByteViewOperator::Comparison(Comparison::Gt));
let expected = BooleanArray::from(vec![true, true, true, true, true]); assert_eq!(result, expected);
let result = liquid_array.compare_with(b"datab", &ByteViewOperator::Comparison(Comparison::Lt));
let expected = BooleanArray::from(vec![true, false, true, true, false]); assert_eq!(result, expected);
let result = liquid_array.compare_with(b"da", &ByteViewOperator::Comparison(Comparison::Gt));
let expected = BooleanArray::from(vec![true, true, true, true, true]); assert_eq!(result, expected);
let result =
liquid_array.compare_with(b"data", &ByteViewOperator::Comparison(Comparison::GtEq));
let expected = BooleanArray::from(vec![true, true, true, true, true]); assert_eq!(result, expected);
let result = liquid_array.compare_with(b"data", &ByteViewOperator::Comparison(Comparison::Gt));
let expected = BooleanArray::from(vec![false, true, true, true, true]); assert_eq!(result, expected);
}
#[test]
fn test_compare_with_large_value_no_panic() {
let big = "aaaaaaa".to_string() + &"b".repeat(2 * 1024 * 1024 + 128);
let input = StringArray::from(vec![big.as_str()]);
let compressor = LiquidByteViewArray::<FsstArray>::train_compressor(input.iter());
let liquid_array = LiquidByteViewArray::<FsstArray>::from_string_array(&input, compressor);
let result = liquid_array.compare_with(
big.as_bytes(),
&ByteViewOperator::Comparison(Comparison::LtEq),
);
assert_eq!(result.len(), 1);
assert!(result.value(0));
}
#[test]
fn test_shared_prefix_corner_case() {
let input = StringArray::from(vec!["data", "database", "data_entry", "data_", "datatype"]);
let compressor = LiquidByteViewArray::<FsstArray>::train_compressor(input.iter());
let liquid_array = LiquidByteViewArray::<FsstArray>::from_string_array(&input, compressor);
let result =
liquid_array.compare_with(b"data", &ByteViewOperator::Comparison(Comparison::GtEq));
let expected = BooleanArray::from(vec![true, true, true, true, true]); assert_eq!(result, expected);
}
#[test]
fn test_shared_prefix_edge_cases() {
let input = StringArray::from(vec!["identical", "identical", "identical"]);
let compressor = LiquidByteViewArray::<FsstArray>::train_compressor(input.iter());
let liquid_array = LiquidByteViewArray::<FsstArray>::from_string_array(&input, compressor);
assert_eq!(liquid_array.shared_prefix, b"identical");
for i in 0..liquid_array.prefix_keys.len() {
assert_eq!(liquid_array.prefix_keys[i].prefix7(), &[0u8; 7]);
}
let output = liquid_array.to_arrow_array();
assert_eq!(&input, output.as_string::<i32>());
let input = StringArray::from(vec!["hello", "hello_world", "hello_test"]);
let compressor = LiquidByteViewArray::<FsstArray>::train_compressor(input.iter());
let liquid_array = LiquidByteViewArray::<FsstArray>::from_string_array(&input, compressor);
assert_eq!(liquid_array.shared_prefix, b"hello");
assert_eq!(liquid_array.prefix_keys[0].prefix7(), &[0u8; 7]); assert_eq!(liquid_array.prefix_keys[1].prefix7(), b"_world\0");
assert_eq!(liquid_array.prefix_keys[2].prefix7(), b"_test\0\0");
let output = liquid_array.to_arrow_array();
assert_eq!(&input, output.as_string::<i32>());
let input = StringArray::from(vec!["", "hello", "hello_world"]);
let compressor = LiquidByteViewArray::<FsstArray>::train_compressor(input.iter());
let liquid_array = LiquidByteViewArray::<FsstArray>::from_string_array(&input, compressor);
assert_eq!(liquid_array.shared_prefix, Vec::<u8>::new()); assert_eq!(liquid_array.prefix_keys[0].prefix7(), &[0u8; 7]);
assert_eq!(liquid_array.prefix_keys[1].prefix7(), b"hello\0\0");
assert_eq!(liquid_array.prefix_keys[2].prefix7(), b"hello_w");
let output = liquid_array.to_arrow_array();
assert_eq!(&input, output.as_string::<i32>());
}
#[test]
fn test_memory_layout() {
let input = StringArray::from(vec!["hello", "world", "test"]);
let compressor = LiquidByteViewArray::<FsstArray>::train_compressor(input.iter());
let liquid_array = LiquidByteViewArray::<FsstArray>::from_string_array(&input, compressor);
assert_eq!(liquid_array.dictionary_keys.len(), 3);
assert_eq!(liquid_array.fsst_buffer.offsets_len(), 4);
assert!(liquid_array.nulls().is_none());
let _first = liquid_array.fsst_buffer.get_compressed_slice(0);
}
fn check_filter_result(input: &StringArray, filter: BooleanBuffer) {
let compressor = LiquidByteViewArray::<FsstArray>::train_compressor(input.iter());
let liquid_array = LiquidByteViewArray::<FsstArray>::from_string_array(input, compressor);
let output = liquid_array.filter(&filter);
let expected = {
let selection = BooleanArray::new(filter.clone(), None);
let arrow_filtered = arrow::compute::filter(&input, &selection).unwrap();
arrow_filtered.as_string::<i32>().clone()
};
assert_eq!(output.as_ref(), &expected);
}
#[test]
fn test_filter_functionality() {
let input = StringArray::from(vec![
Some("hello"),
Some("test"),
None,
Some("test"),
None,
Some("test"),
Some("rust"),
]);
let mut seeded_rng = rand::rngs::StdRng::seed_from_u64(42);
for _i in 0..100 {
let filter =
BooleanBuffer::from_iter((0..input.len()).map(|_| seeded_rng.random::<bool>()));
check_filter_result(&input, filter);
}
}
#[test]
fn test_memory_efficiency() {
let input = StringArray::from(vec!["hello", "world", "hello", "world", "hello"]);
let compressor = LiquidByteViewArray::<FsstArray>::train_compressor(input.iter());
let liquid_array = LiquidByteViewArray::<FsstArray>::from_string_array(&input, compressor);
assert_eq!(liquid_array.dictionary_keys.len(), 5);
let dict = liquid_array.to_dict_arrow();
assert_eq!(dict.values().len(), 2); }
#[test]
fn test_to_best_arrow_array() {
let input = StringArray::from(vec!["hello", "world", "test"]);
let compressor = LiquidByteViewArray::<FsstArray>::train_compressor(input.iter());
let liquid_array = LiquidByteViewArray::<FsstArray>::from_string_array(&input, compressor);
let best_array = liquid_array.to_best_arrow_array();
let dict_array = best_array.as_dictionary::<UInt16Type>();
assert_eq!(dict_array.len(), 3);
assert_eq!(dict_array.values().len(), 3); }
#[test]
fn test_data_type() {
let input = StringArray::from(vec!["hello", "world"]);
let compressor = LiquidByteViewArray::<FsstArray>::train_compressor(input.iter());
let liquid_array = LiquidByteViewArray::<FsstArray>::from_string_array(&input, compressor);
let data_type = liquid_array.data_type();
assert!(matches!(data_type, LiquidDataType::ByteViewArray));
}
#[test]
fn test_compare_with_prefix_optimization_fast_path() {
let input = StringArray::from(vec![
"apple123", "banana456", "cherry789", "apple999", "zebra000", ]);
let compressor = LiquidByteViewArray::<FsstArray>::train_compressor(input.iter());
let liquid_array = LiquidByteViewArray::<FsstArray>::from_string_array(&input, compressor);
let result = liquid_array.compare_with_inner(b"car", &Comparison::Lt);
let expected = BooleanArray::from(vec![true, true, false, true, false]);
assert_eq!(result, expected);
let result = liquid_array.compare_with_inner(b"dog", &Comparison::Gt);
let expected = BooleanArray::from(vec![false, false, false, false, true]);
assert_eq!(result, expected);
let result = liquid_array.compare_with_inner(b"apple", &Comparison::GtEq);
let expected = BooleanArray::from(vec![true, true, true, true, true]);
assert_eq!(result, expected);
}
#[test]
fn test_compare_with_prefix_optimization_decompression_path() {
let input = StringArray::from(vec![
"prefix_aaa", "prefix_bbb", "prefix_ccc", "prefix_abc", "different", ]);
let compressor = LiquidByteViewArray::<FsstArray>::train_compressor(input.iter());
let liquid_array = LiquidByteViewArray::<FsstArray>::from_string_array(&input, compressor);
let result = liquid_array.compare_with_inner(b"prefix_b", &Comparison::Lt);
let expected = BooleanArray::from(vec![true, false, false, true, true]);
assert_eq!(result, expected);
let result = liquid_array.compare_with_inner(b"prefix_bbb", &Comparison::LtEq);
let expected = BooleanArray::from(vec![true, true, false, true, true]);
assert_eq!(result, expected);
let result = liquid_array.compare_with_inner(b"prefix_abc", &Comparison::Gt);
let expected = BooleanArray::from(vec![false, true, true, false, false]);
assert_eq!(result, expected);
}
#[test]
fn test_compare_with_prefix_optimization_edge_cases_and_nulls() {
let input = StringArray::from(vec![
Some(""), None, Some("a"), Some("abcdef"), Some("abcdefghij"), Some("abcdeg"), ]);
let compressor = LiquidByteViewArray::<FsstArray>::train_compressor(input.iter());
let liquid_array = LiquidByteViewArray::<FsstArray>::from_string_array(&input, compressor);
let result = liquid_array.compare_with_inner(b"", &Comparison::Lt);
let expected = BooleanArray::from(vec![
Some(false),
None,
Some(false),
Some(false),
Some(false),
Some(false),
]);
assert_eq!(result, expected);
let result = liquid_array.compare_with_inner(b"abcdef", &Comparison::Gt);
let expected = BooleanArray::from(vec![
Some(false),
None,
Some(false),
Some(false),
Some(true),
Some(true),
]);
assert_eq!(result, expected);
let result = liquid_array.compare_with_inner(b"b", &Comparison::LtEq);
let expected = BooleanArray::from(vec![
Some(true),
None,
Some(true),
Some(true),
Some(true),
Some(true),
]);
assert_eq!(result, expected);
let result = liquid_array.compare_with_inner(b"abcdeg", &Comparison::GtEq);
let expected = BooleanArray::from(vec![
Some(false),
None,
Some(false),
Some(false),
Some(false),
Some(true),
]);
assert_eq!(result, expected);
}
#[test]
fn test_compare_with_prefix_empty_suffix() {
let input = StringArray::from(vec!["x", "x1"]);
let compressor = LiquidByteViewArray::<FsstArray>::train_compressor(input.iter());
let liquid_array = LiquidByteViewArray::<FsstArray>::from_string_array(&input, compressor);
let result = liquid_array.compare_with_inner(b"x", &Comparison::LtEq);
let expected = BooleanArray::from(vec![true, false]);
assert_eq!(result, expected);
let result = liquid_array.compare_with_inner(b"x", &Comparison::Gt);
let expected = BooleanArray::from(vec![false, true]);
assert_eq!(result, expected);
}
#[test]
fn test_compare_with_prefix_optimization_utf8_and_binary() {
let input = StringArray::from(vec![
"café", "naïve", "résumé", "hello", "世界", ]);
let compressor = LiquidByteViewArray::<FsstArray>::train_compressor(input.iter());
let liquid_array = LiquidByteViewArray::<FsstArray>::from_string_array(&input, compressor);
let naive_bytes = "naïve".as_bytes(); let result = liquid_array.compare_with_inner(naive_bytes, &Comparison::Lt);
let expected = BooleanArray::from(vec![true, false, false, true, false]);
assert_eq!(result, expected);
let cafe_bytes = "café".as_bytes(); let result = liquid_array.compare_with_inner(cafe_bytes, &Comparison::Gt);
let expected = BooleanArray::from(vec![false, true, true, true, true]);
assert_eq!(result, expected);
let world_bytes = "世界".as_bytes(); let result = liquid_array.compare_with_inner(world_bytes, &Comparison::LtEq);
let expected = BooleanArray::from(vec![true, true, true, true, true]);
assert_eq!(result, expected);
let resume_bytes = "résumé".as_bytes(); let gte_result = liquid_array.compare_with_inner(resume_bytes, &Comparison::GtEq);
let lte_result = liquid_array.compare_with_inner(resume_bytes, &Comparison::LtEq);
let gte_expected = BooleanArray::from(vec![false, false, true, false, true]);
let lte_expected = BooleanArray::from(vec![true, true, true, true, false]);
assert_eq!(gte_result, gte_expected);
assert_eq!(lte_result, lte_expected);
}
fn test_compare_equals(input: StringArray, needle: &[u8], expected: BooleanArray) {
let compressor = LiquidByteViewArray::<FsstArray>::train_compressor(input.iter());
let liquid_array = LiquidByteViewArray::<FsstArray>::from_string_array(&input, compressor);
let result = liquid_array.compare_equals(needle);
assert_eq!(result, expected);
}
#[test]
fn test_compare_equals_on_disk() {
let input = StringArray::from(vec![
Some("apple_orange"),
None,
Some("apple_orange_long_string"),
Some("apple_b"),
Some("apple_oo_long_string"),
Some("apple_b"),
Some("apple"),
]);
test_compare_equals(
input.clone(),
b"apple",
BooleanArray::from(vec![
Some(false),
None,
Some(false),
Some(false),
Some(false),
Some(false),
Some(true),
]),
);
test_compare_equals(
input.clone(),
b"",
BooleanArray::from(vec![
Some(false),
None,
Some(false),
Some(false),
Some(false),
Some(false),
Some(false),
]),
);
test_compare_equals(
input.clone(),
b"apple_b",
BooleanArray::from(vec![
Some(false),
None,
Some(false),
Some(true),
Some(false),
Some(true),
Some(false),
]),
);
test_compare_equals(
input.clone(),
b"apple_oo_long_string",
BooleanArray::from(vec![
Some(false),
None,
Some(false),
Some(false),
Some(true),
Some(false),
Some(false),
]),
);
}
#[test]
fn test_compare_equals_long_string_len_byte_255() {
let common = "prefix_";
let long_len = 260;
let suffix_len = long_len - common.len();
let long_a = format!("{}{}", common, "a".repeat(suffix_len));
let long_b = format!("{}{}", common, "b".repeat(suffix_len));
let input = StringArray::from(vec![long_a.as_str(), long_b.as_str(), "z"]);
let compressor = LiquidByteViewArray::<FsstArray>::train_compressor(input.iter());
let liquid_array = LiquidByteViewArray::<FsstArray>::from_string_array(&input, compressor);
let result = liquid_array.compare_equals(long_a.as_bytes());
let expected = BooleanArray::from(vec![true, false, false]);
assert_eq!(result, expected);
let shorter = format!("{}{}", common, "a".repeat(200));
let result = liquid_array.compare_equals(shorter.as_bytes());
let expected = BooleanArray::from(vec![false, false, false]);
assert_eq!(result, expected);
}
#[test]
fn test_compare_not_equals_preserves_nulls() {
let input = StringArray::from(vec![Some("alpha"), None, Some("beta"), Some("alpha")]);
let compressor = LiquidByteViewArray::<FsstArray>::train_compressor(input.iter());
let liquid_array = LiquidByteViewArray::<FsstArray>::from_string_array(&input, compressor);
let result = liquid_array.compare_with(b"alpha", &ByteViewOperator::Equality(Equality::NotEq));
let expected = BooleanArray::from(vec![Some(false), None, Some(true), Some(false)]);
assert_eq!(result, expected);
}
#[test]
fn test_compare_equals_ignores_raw_key_value_in_null_slot() {
let values: ArrayRef = Arc::new(StringArray::from(vec!["alpha", "beta"]));
let keys = UInt16Array::new(
ScalarBuffer::from(vec![0u16, u16::MAX, 1u16]),
Some(NullBuffer::from(BooleanBuffer::from(vec![
true, false, true,
]))),
);
let dict = DictionaryArray::<UInt16Type>::new(keys, values);
let compressor =
LiquidByteViewArray::<FsstArray>::train_compressor(dict.values().as_string::<i32>().iter());
let liquid_array =
unsafe { LiquidByteViewArray::<FsstArray>::from_unique_dict_array(&dict, compressor) };
let result = liquid_array.compare_equals(b"alpha");
let expected = BooleanArray::from(vec![Some(true), None, Some(false)]);
assert_eq!(result, expected);
}
#[test]
fn test_compare_with_shared_prefix_shorter_needle_lt() {
let input = StringArray::from(vec!["hello_world", "hello_rust"]);
let compressor = LiquidByteViewArray::<FsstArray>::train_compressor(input.iter());
let liquid_array = LiquidByteViewArray::<FsstArray>::from_string_array(&input, compressor);
let result = liquid_array.compare_with(b"hell", &ByteViewOperator::Comparison(Comparison::Lt));
let expected = BooleanArray::from(vec![false, false]);
assert_eq!(result, expected);
let result =
liquid_array.compare_with(b"hell", &ByteViewOperator::Comparison(Comparison::LtEq));
let expected = BooleanArray::from(vec![false, false]);
assert_eq!(result, expected);
}
#[test]
fn test_compare_with_like_fallback() {
let input = StringArray::from(vec![
Some("Alpha"),
Some("alphabet"),
Some("beta"),
None,
Some("ALPHA"),
]);
let compressor = LiquidByteViewArray::<FsstArray>::train_compressor(input.iter());
let liquid_array = LiquidByteViewArray::<FsstArray>::from_string_array(&input, compressor);
let result =
liquid_array.compare_with(b"Al%", &ByteViewOperator::SubString(SubString::Contains));
let expected = BooleanArray::from(vec![
Some(true),
Some(false),
Some(false),
None,
Some(false),
]);
assert_eq!(result, expected);
let result =
liquid_array.compare_with(b"Al%", &ByteViewOperator::SubString(SubString::NotContains));
let expected = BooleanArray::from(vec![Some(false), Some(true), Some(true), None, Some(true)]);
assert_eq!(result, expected);
}
#[tokio::test]
async fn test_compare_equals_on_disk_long_prefix() {
let common = "prefix_";
let long_len = 260;
let suffix_len = long_len - common.len();
let long_a = format!("{}{}", common, "a".repeat(suffix_len));
let long_b = format!("{}{}", common, "b".repeat(suffix_len));
let input = StringArray::from(vec![long_a.as_str(), long_b.as_str(), "z"]);
let compressor = LiquidByteViewArray::<FsstArray>::train_compressor(input.iter());
let in_memory = LiquidByteViewArray::<FsstArray>::from_string_array(&input, compressor);
let io = Arc::new(TestSqueezeIo::default());
let (hybrid, bytes) = in_memory
.squeeze(io.clone(), Some(&CacheExpression::PredicateColumn))
.expect("squeeze should succeed");
io.set_bytes(bytes);
let disk_view = hybrid
.as_any()
.downcast_ref::<LiquidByteViewArray<DiskBuffer>>()
.expect("should downcast to disk array");
let result = disk_view.compare_equals(long_b.as_bytes()).await;
let expected = BooleanArray::from(vec![false, true, false]);
assert_eq!(result, expected);
assert_eq!(io.reads(), 1);
}
fn generate_mixed_size_strings(count: usize, seed: u64) -> Vec<String> {
let mut rng = rand::rngs::StdRng::seed_from_u64(seed);
let mut strings = Vec::with_capacity(count);
for _ in 0..count {
let size_type = rng.random_range(0..4);
let string = match size_type {
0 => {
let len = rng.random_range(1..=3);
(0..len)
.map(|_| rng.random_range(b'a'..=b'z') as char)
.collect()
}
1 => {
let len = rng.random_range(50..=200);
(0..len)
.map(|_| rng.random_range(b'a'..=b'z') as char)
.collect()
}
2 => {
let len = rng.random_range(1000..=5000);
(0..len)
.map(|_| rng.random_range(b'a'..=b'z') as char)
.collect()
}
_ => {
let len = rng.random_range(10000..=50000);
"x".repeat(len)
}
};
strings.push(string);
}
strings
}
fn generate_zipf_strings(count: usize, base_strings: &[&str], seed: u64) -> Vec<String> {
let mut rng = rand::rngs::StdRng::seed_from_u64(seed);
let mut strings = Vec::with_capacity(count);
for _ in 0..count {
let zipf_choice = rng.random_range(0..100);
let base_idx = if zipf_choice < 50 {
0 } else if zipf_choice < 75 {
1 } else if zipf_choice < 87 {
2 } else {
rng.random_range(3..base_strings.len()) };
let base = base_strings[base_idx];
let variation = rng.random_range(0..4);
let string = match variation {
0 => base.to_string(), 1 => format!("{}_{}", base, rng.random_range(1000..9999)), 2 => format!("{}/{}", base, rng.random_range(100..999)), _ => format!("prefix_{}", base), };
strings.push(string);
}
strings
}
#[test]
fn test_mixed_size_offset_views() {
let strings = generate_mixed_size_strings(16384, 42);
let input = StringArray::from(strings.clone());
let compressor = LiquidByteViewArray::<FsstArray>::train_compressor(input.iter());
let liquid_array = LiquidByteViewArray::<FsstArray>::from_string_array(&input, compressor);
let output = liquid_array.to_arrow_array();
assert_eq!(&input, output.as_string::<i32>());
}
#[test]
fn test_zipf_offset_views() {
let base_patterns = &[
"error", "warning", "info", "debug", "user", "admin", "guest", "GET", "POST", "PUT",
"DELETE", "success", "failure", "pending", "/api/v1", "/api/v2", "/health",
];
let strings = generate_zipf_strings(16384, base_patterns, 123);
let input = StringArray::from(strings.clone());
let compressor = LiquidByteViewArray::<FsstArray>::train_compressor(input.iter());
let liquid_array = LiquidByteViewArray::<FsstArray>::from_string_array(&input, compressor);
let output = liquid_array.to_arrow_array();
assert_eq!(&input, output.as_string::<i32>());
let offset_bytes = liquid_array.fsst_buffer.offset_bytes();
assert!(
offset_bytes <= 2,
"Zipf patterns with short strings should use 1 or 2 byte compact offsets, got {} bytes",
offset_bytes
);
}
#[test]
fn test_offset_stress() {
let mut strings = Vec::with_capacity(16384);
for i in 0..16384 {
let string = match i % 8 {
0 => "a".to_string(), 1 => "x".repeat(1000 + (i % 100)), 2 => "b".to_string(), 3 => "y".repeat(5000 + (i % 1000)), 4 => "c".to_string(), 5 => "medium".repeat(50 + (i % 20)), 6 => "huge".repeat(2000 + (i % 500)), _ => format!("string_{}", i), };
strings.push(string);
}
let input = StringArray::from(strings);
let compressor = LiquidByteViewArray::<FsstArray>::train_compressor(input.iter());
let liquid_array = LiquidByteViewArray::<FsstArray>::from_string_array(&input, compressor);
let output = liquid_array.to_arrow_array();
assert_eq!(&input, output.as_string::<i32>());
let offsets = liquid_array.fsst_buffer.offsets();
for i in 1..offsets.len() {
assert!(offsets[i] >= offsets[i - 1], "Offsets should be monotonic");
}
}