use crate::error::{Result, ZiporaError};
use std::mem;
use std::str;
#[repr(C)] pub struct FixedLenStrVec<const N: usize> {
string_arena: Vec<u8>,
indices: Vec<u32>,
len: usize,
stats: MemoryStats,
}
#[derive(Debug, Default)]
struct MemoryStats {
total_capacity_bytes: usize,
strings_stored: usize,
memory_saved_vs_vec_string: usize,
}
#[derive(Debug, Clone)]
pub struct MemoryInfo {
pub arena_bytes: usize,
pub indices_bytes: usize,
pub metadata_bytes: usize,
pub total_bytes: usize,
pub vec_string_equivalent_bytes: usize,
pub memory_ratio: f64,
pub strings_count: usize,
}
impl<const N: usize> FixedLenStrVec<N> {
pub fn new() -> Self {
Self {
string_arena: Vec::new(),
indices: Vec::new(),
len: 0,
stats: MemoryStats::default(),
}
}
pub fn with_capacity(capacity: usize) -> Self {
let estimated_avg_len = (N * 4) / 5; let estimated_total_bytes = capacity * estimated_avg_len;
let mut vec = Self::new();
vec.string_arena.reserve_exact(estimated_total_bytes);
vec.indices.reserve_exact(capacity);
vec.stats.total_capacity_bytes = estimated_total_bytes + capacity * mem::size_of::<u32>();
vec
}
pub fn push(&mut self, s: &str) -> Result<()> {
let s_bytes = s.as_bytes();
if s_bytes.len() > N {
return Err(ZiporaError::invalid_data(format!(
"String length {} exceeds fixed length {}",
s_bytes.len(),
N
)));
}
if s_bytes.len() > 255 {
return Err(ZiporaError::invalid_data(
"String length cannot exceed 255 bytes for bit-packed storage".to_string(),
));
}
if self.string_arena.len() + s_bytes.len() >= (1 << 24) {
return Err(ZiporaError::invalid_data(
"String arena size limit exceeded (16MB)".to_string(),
));
}
let offset = self.string_arena.len();
let length = s_bytes.len();
self.string_arena.extend_from_slice(s_bytes);
let packed_index = (offset as u32) | ((length as u32) << 24);
self.indices.push(packed_index);
self.len += 1;
self.update_stats();
Ok(())
}
pub fn get(&self, index: usize) -> Option<&str> {
if index >= self.len {
return None;
}
let packed = self.indices[index];
let offset = (packed & 0x00FFFFFF) as usize; let length = (packed >> 24) as usize;
let end_offset = offset + length;
if end_offset <= self.string_arena.len() {
let slice = &self.string_arena[offset..end_offset];
str::from_utf8(slice).ok()
} else {
None
}
}
pub fn get_bytes(&self, index: usize) -> Option<&[u8]> {
if index >= self.len {
return None;
}
let packed = self.indices[index];
let offset = (packed & 0x00FFFFFF) as usize; let length = (packed >> 24) as usize;
let end_offset = offset + length;
if end_offset <= self.string_arena.len() {
Some(&self.string_arena[offset..end_offset])
} else {
None
}
}
#[inline]
pub fn len(&self) -> usize {
self.len
}
#[inline]
pub fn is_empty(&self) -> bool {
self.len == 0
}
pub fn memory_savings_vs_vec_string(&self) -> (usize, usize, f64) {
let memory_info = self.memory_info();
(
memory_info.vec_string_equivalent_bytes,
memory_info.total_bytes,
memory_info.memory_ratio,
)
}
pub fn stats(&self) -> (usize, usize, f64) {
self.memory_savings_vs_vec_string()
}
#[cfg(feature = "simd")]
pub fn find_exact(&self, needle: &str) -> Option<usize> {
if needle.len() > N || needle.len() > 255 {
return None;
}
if needle.len() >= 16 {
self.find_exact_simd(needle)
} else {
self.find_exact_fallback(needle)
}
}
#[cfg(not(feature = "simd"))]
pub fn find_exact(&self, needle: &str) -> Option<usize> {
self.find_exact_fallback(needle)
}
#[cfg(feature = "simd")]
pub fn count_prefix(&self, prefix: &str) -> usize {
if prefix.len() > N || prefix.len() > 255 {
return 0;
}
if prefix.len() >= 8 {
self.count_prefix_simd(prefix)
} else {
self.count_prefix_fallback(prefix)
}
}
#[cfg(not(feature = "simd"))]
pub fn count_prefix(&self, prefix: &str) -> usize {
self.count_prefix_fallback(prefix)
}
fn update_stats(&mut self) {
self.stats.strings_stored = self.len;
let (vec_string_size, our_size, _) = self.memory_savings_vs_vec_string();
self.stats.memory_saved_vs_vec_string = vec_string_size.saturating_sub(our_size);
}
fn find_exact_fallback(&self, needle: &str) -> Option<usize> {
let needle_bytes = needle.as_bytes();
let needle_len = needle_bytes.len();
for i in 0..self.len {
let packed = self.indices[i];
let length = (packed >> 24) as usize;
if length == needle_len {
if let Some(s) = self.get(i) {
if s == needle {
return Some(i);
}
}
}
}
None
}
fn count_prefix_fallback(&self, prefix: &str) -> usize {
let prefix_bytes = prefix.as_bytes();
let prefix_len = prefix_bytes.len();
let mut count = 0;
for i in 0..self.len {
let packed = self.indices[i];
let length = (packed >> 24) as usize;
if length >= prefix_len {
if let Some(s) = self.get(i) {
if s.starts_with(prefix) {
count += 1;
}
}
}
}
count
}
#[cfg(feature = "simd")]
fn find_exact_simd(&self, needle: &str) -> Option<usize> {
self.find_exact_fallback(needle)
}
#[cfg(feature = "simd")]
fn count_prefix_simd(&self, prefix: &str) -> usize {
self.count_prefix_fallback(prefix)
}
pub fn memory_info(&self) -> MemoryInfo {
let vec_string_memory = if self.len > 0 {
let string_metadata_size = mem::size_of::<String>() * self.len;
let string_content_size = self.string_arena.len();
let heap_overhead = self.len * 8;
let vec_overhead = mem::size_of::<Vec<String>>();
string_metadata_size + string_content_size + heap_overhead + vec_overhead
} else {
mem::size_of::<Vec<String>>()
};
let arena_bytes = self.string_arena.len();
let indices_bytes = self.indices.len() * mem::size_of::<u32>();
let metadata_bytes = mem::size_of::<Self>();
let our_total_bytes = arena_bytes + indices_bytes + metadata_bytes;
let total_used_bytes = our_total_bytes;
let memory_ratio = if vec_string_memory > 0 {
total_used_bytes as f64 / vec_string_memory as f64
} else {
1.0
};
MemoryInfo {
arena_bytes,
indices_bytes,
metadata_bytes,
total_bytes: total_used_bytes,
vec_string_equivalent_bytes: vec_string_memory,
memory_ratio,
strings_count: self.len,
}
}
}
impl<const N: usize> Default for FixedLenStrVec<N> {
fn default() -> Self {
Self::new()
}
}
pub type FixedStr4Vec = FixedLenStrVec<4>;
pub type FixedStr8Vec = FixedLenStrVec<8>;
pub type FixedStr16Vec = FixedLenStrVec<16>;
pub type FixedStr32Vec = FixedLenStrVec<32>;
pub type FixedStr64Vec = FixedLenStrVec<64>;
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_basic_operations() {
let mut vec: FixedStr8Vec = FixedLenStrVec::new();
assert_eq!(vec.len(), 0);
assert!(vec.is_empty());
vec.push("hello").unwrap();
assert_eq!(vec.len(), 1);
assert_eq!(vec.get(0), Some("hello"));
assert_eq!(vec.get(1), None);
}
#[test]
fn test_fixed_length_constraint() {
let mut vec: FixedStr4Vec = FixedLenStrVec::new();
vec.push("hi").unwrap();
vec.push("test").unwrap();
assert!(vec.push("toolong").is_err());
}
#[test]
fn test_padding_and_retrieval() {
let mut vec: FixedStr8Vec = FixedLenStrVec::new();
vec.push("a").unwrap(); vec.push("hello").unwrap(); vec.push("maxleng").unwrap();
assert_eq!(vec.get(0), Some("a"));
assert_eq!(vec.get(1), Some("hello"));
assert_eq!(vec.get(2), Some("maxleng"));
}
#[test]
fn test_memory_savings() {
let mut vec: FixedStr16Vec = FixedLenStrVec::with_capacity(1000);
for i in 0..1000 {
let s = format!("str_{:010}", i); vec.push(&s).unwrap();
}
let memory_info = vec.memory_info();
println!("=== FixedStr16Vec Memory Savings Test ===");
println!("Strings: {}", memory_info.strings_count);
println!("Arena bytes: {}", memory_info.arena_bytes);
println!("Indices bytes: {}", memory_info.indices_bytes);
println!("Total bytes: {}", memory_info.total_bytes);
println!(
"Vec<String> equivalent: {}",
memory_info.vec_string_equivalent_bytes
);
println!("Memory ratio: {:.3}x", memory_info.memory_ratio);
println!(
"Memory savings: {:.1}%",
(1.0 - memory_info.memory_ratio) * 100.0
);
assert!(
memory_info.memory_ratio < 0.65,
"Memory ratio {:.3} should be < 0.65 (>35% savings)",
memory_info.memory_ratio
);
assert!(memory_info.total_bytes < memory_info.vec_string_equivalent_bytes);
}
#[test]
fn test_find_exact() {
let mut vec: FixedStr8Vec = FixedLenStrVec::new();
vec.push("apple").unwrap();
vec.push("banana").unwrap();
vec.push("cherry").unwrap();
vec.push("apple").unwrap();
assert_eq!(vec.find_exact("banana"), Some(1));
assert_eq!(vec.find_exact("apple"), Some(0)); assert_eq!(vec.find_exact("grape"), None);
assert_eq!(vec.find_exact("toolongstring"), None);
}
#[test]
fn test_count_prefix() {
let mut vec: FixedStr8Vec = FixedLenStrVec::new();
vec.push("apple").unwrap();
vec.push("apricot").unwrap();
vec.push("banana").unwrap();
vec.push("app").unwrap();
vec.push("apply").unwrap();
assert_eq!(vec.count_prefix("ap"), 4);
assert_eq!(vec.count_prefix("app"), 3);
assert_eq!(vec.count_prefix("apple"), 1);
assert_eq!(vec.count_prefix("ban"), 1);
assert_eq!(vec.count_prefix("z"), 0);
}
}