use crate::{AionError, Result};
#[derive(Debug, Clone, Default)]
pub struct StringTableBuilder {
data: Vec<u8>,
}
impl StringTableBuilder {
#[must_use]
#[allow(clippy::missing_const_for_fn)] pub fn new() -> Self {
Self { data: Vec::new() }
}
#[must_use]
pub fn with_capacity(capacity: usize) -> Self {
Self {
data: Vec::with_capacity(capacity),
}
}
#[allow(clippy::cast_possible_truncation)] pub fn add(&mut self, s: &str) -> (u64, u32) {
let offset = self.data.len() as u64;
let length = s.len() as u32;
self.data.extend_from_slice(s.as_bytes());
self.data.push(0);
(offset, length)
}
#[must_use]
pub fn len(&self) -> usize {
self.data.len()
}
#[must_use]
pub fn is_empty(&self) -> bool {
self.data.is_empty()
}
#[must_use]
pub fn build(self) -> Vec<u8> {
self.data
}
pub fn clear(&mut self) {
self.data.clear();
}
}
#[derive(Debug, Clone, Copy)]
pub struct StringTable<'a> {
data: &'a [u8],
}
impl<'a> StringTable<'a> {
pub fn new(data: &'a [u8]) -> Result<Self> {
std::str::from_utf8(data).map_err(|e| AionError::InvalidUtf8 {
reason: format!("String table contains invalid UTF-8: {e}"),
})?;
Ok(Self { data })
}
#[allow(clippy::cast_possible_truncation)] pub fn get(&self, offset: u64, length: u32) -> Result<&'a str> {
let offset = offset as usize;
let length = length as usize;
let end = offset
.checked_add(length)
.ok_or_else(|| AionError::InvalidFormat {
reason: format!("String table access overflow: offset={offset}, length={length}"),
})?;
if end > self.data.len() {
return Err(AionError::InvalidFormat {
reason: format!(
"String table access out of bounds: offset={offset}, length={length}, table_size={}",
self.data.len()
),
});
}
let string_bytes = self
.data
.get(offset..end)
.ok_or_else(|| AionError::InvalidFormat {
reason: format!("Failed to extract string at offset {offset}"),
})?;
if end < self.data.len() {
if let Some(&byte) = self.data.get(end) {
if byte != 0 {
return Err(AionError::InvalidFormat {
reason: format!("String at offset {offset} is not null-terminated"),
});
}
}
}
std::str::from_utf8(string_bytes).map_err(|e| AionError::InvalidUtf8 {
reason: format!("String at offset {offset} contains invalid UTF-8: {e}"),
})
}
#[must_use]
pub const fn len(&self) -> usize {
self.data.len()
}
#[must_use]
pub const fn is_empty(&self) -> bool {
self.data.is_empty()
}
#[must_use]
pub const fn as_bytes(&self) -> &'a [u8] {
self.data
}
}
#[cfg(test)]
#[allow(clippy::unwrap_used)] mod tests {
use super::*;
mod builder {
use super::*;
#[test]
fn should_create_empty_builder() {
let builder = StringTableBuilder::new();
assert_eq!(builder.len(), 0);
assert!(builder.is_empty());
}
#[test]
fn should_add_single_string() {
let mut builder = StringTableBuilder::new();
let (offset, length) = builder.add("Hello");
assert_eq!(offset, 0);
assert_eq!(length, 5);
assert_eq!(builder.len(), 6);
let bytes = builder.build();
assert_eq!(bytes, b"Hello\0");
}
#[test]
fn should_add_multiple_strings() {
let mut builder = StringTableBuilder::new();
let (offset1, len1) = builder.add("First");
assert_eq!(offset1, 0);
assert_eq!(len1, 5);
let (offset2, len2) = builder.add("Second");
assert_eq!(offset2, 6);
assert_eq!(len2, 6);
let (offset3, len3) = builder.add("Third");
assert_eq!(offset3, 13);
assert_eq!(len3, 5);
let bytes = builder.build();
assert_eq!(bytes, b"First\0Second\0Third\0");
}
#[test]
fn should_handle_empty_strings() {
let mut builder = StringTableBuilder::new();
let (offset, length) = builder.add("");
assert_eq!(offset, 0);
assert_eq!(length, 0);
assert_eq!(builder.len(), 1);
let bytes = builder.build();
assert_eq!(bytes, b"\0");
}
#[test]
fn should_handle_utf8_strings() {
let mut builder = StringTableBuilder::new();
builder.add("Hello 世界");
builder.add("Γειά σου κόσμε");
builder.add("🎉🎊");
let bytes = builder.build();
let expected = "Hello 世界\0Γειά σου κόσμε\0🎉🎊\0";
assert_eq!(bytes, expected.as_bytes());
}
#[test]
fn should_handle_special_characters() {
let mut builder = StringTableBuilder::new();
builder.add("Line1\nLine2");
builder.add("Tab\there");
builder.add("Quote\"Test");
let bytes = builder.build();
assert_eq!(bytes, b"Line1\nLine2\0Tab\there\0Quote\"Test\0");
}
#[test]
fn should_create_with_capacity() {
let builder = StringTableBuilder::with_capacity(1024);
assert_eq!(builder.len(), 0);
assert!(builder.is_empty());
}
#[test]
fn should_clear_builder() {
let mut builder = StringTableBuilder::new();
builder.add("Test");
assert_eq!(builder.len(), 5);
builder.clear();
assert_eq!(builder.len(), 0);
assert!(builder.is_empty());
}
#[test]
fn should_track_offsets_correctly() {
let mut builder = StringTableBuilder::new();
let strings = vec![
"Genesis version",
"Added fraud detection",
"Updated compliance rules",
];
let mut expected_offset = 0u64;
for s in &strings {
let (offset, length) = builder.add(s);
assert_eq!(offset, expected_offset);
assert_eq!(length as usize, s.len());
expected_offset += s.len() as u64 + 1; }
}
}
mod parser {
use super::*;
#[test]
fn should_parse_empty_table() {
let table = StringTable::new(b"").unwrap();
assert_eq!(table.len(), 0);
assert!(table.is_empty());
}
#[test]
fn should_parse_single_string() {
let data = b"Hello\0";
let table = StringTable::new(data).unwrap();
let s = table.get(0, 5).unwrap();
assert_eq!(s, "Hello");
}
#[test]
fn should_parse_multiple_strings() {
let data = b"First\0Second\0Third\0";
let table = StringTable::new(data).unwrap();
assert_eq!(table.get(0, 5).unwrap(), "First");
assert_eq!(table.get(6, 6).unwrap(), "Second");
assert_eq!(table.get(13, 5).unwrap(), "Third");
}
#[test]
fn should_handle_empty_string() {
let data = b"\0Test\0";
let table = StringTable::new(data).unwrap();
assert_eq!(table.get(0, 0).unwrap(), "");
assert_eq!(table.get(1, 4).unwrap(), "Test");
}
#[test]
fn should_parse_utf8_strings() {
let s1 = "Hello 世界";
let s2 = "🎉";
let data = format!("{s1}\0{s2}\0");
let table = StringTable::new(data.as_bytes()).unwrap();
#[allow(clippy::cast_possible_truncation)]
let len1 = s1.len() as u32;
#[allow(clippy::cast_possible_truncation)]
let len2 = s2.len() as u32;
let offset2 = u64::from(len1 + 1);
assert_eq!(table.get(0, len1).unwrap(), s1);
assert_eq!(table.get(offset2, len2).unwrap(), s2);
}
#[test]
fn should_reject_invalid_utf8() {
let data = b"Hello\0\xFF\xFE\0"; let result = StringTable::new(data);
assert!(result.is_err());
}
#[test]
fn should_reject_out_of_bounds_access() {
let data = b"Test\0";
let table = StringTable::new(data).unwrap();
let result = table.get(100, 5);
assert!(result.is_err());
let result = table.get(0, 100);
assert!(result.is_err());
}
#[test]
fn should_verify_null_terminator() {
let data = b"Hello\0World\0";
let table = StringTable::new(data).unwrap();
assert!(table.get(0, 5).is_ok());
let result = table.get(0, 10);
assert!(result.is_err());
}
#[test]
fn should_get_as_bytes() {
let data = b"Test\0";
let table = StringTable::new(data).unwrap();
assert_eq!(table.as_bytes(), b"Test\0");
}
}
mod roundtrip {
use super::*;
#[test]
fn should_roundtrip_single_string() {
let mut builder = StringTableBuilder::new();
let (offset, length) = builder.add("Test string");
let bytes = builder.build();
let table = StringTable::new(&bytes).unwrap();
let recovered = table.get(offset, length).unwrap();
assert_eq!(recovered, "Test string");
}
#[test]
fn should_roundtrip_multiple_strings() {
let mut builder = StringTableBuilder::new();
let strings = vec![
"Genesis version",
"Added fraud detection",
"Updated compliance rules",
"Fixed security vulnerability",
];
let mut entries = Vec::new();
for s in &strings {
entries.push(builder.add(s));
}
let bytes = builder.build();
let table = StringTable::new(&bytes).unwrap();
for ((offset, length), expected) in entries.iter().zip(&strings) {
let recovered = table.get(*offset, *length).unwrap();
assert_eq!(recovered, *expected);
}
}
#[test]
fn should_roundtrip_utf8() {
let mut builder = StringTableBuilder::new();
let strings = vec!["Hello 世界", "Γειά σου κόσμε", "مرحبا بالعالم", "🎉🎊🎈"];
let mut entries = Vec::new();
for s in &strings {
entries.push(builder.add(s));
}
let bytes = builder.build();
let table = StringTable::new(&bytes).unwrap();
for ((offset, length), expected) in entries.iter().zip(&strings) {
let recovered = table.get(*offset, *length).unwrap();
assert_eq!(recovered, *expected);
}
}
#[test]
fn should_roundtrip_empty_string() {
let mut builder = StringTableBuilder::new();
let (offset, length) = builder.add("");
let bytes = builder.build();
let table = StringTable::new(&bytes).unwrap();
let recovered = table.get(offset, length).unwrap();
assert_eq!(recovered, "");
}
}
mod properties {
use super::*;
use hegel::generators as gs;
#[hegel::test]
fn prop_add_get_roundtrip(tc: hegel::TestCase) {
let strings = tc.draw(gs::vecs(gs::text().max_size(64)).min_size(1).max_size(16));
let mut builder = StringTableBuilder::new();
let handles: Vec<(u64, u32)> = strings.iter().map(|s| builder.add(s)).collect();
let bytes = builder.build();
let table = StringTable::new(&bytes).unwrap_or_else(|_| std::process::abort());
for (original, (offset, length)) in strings.iter().zip(handles.iter()) {
let recovered = table
.get(*offset, *length)
.unwrap_or_else(|_| std::process::abort());
assert_eq!(recovered, original.as_str());
}
}
#[hegel::test]
fn prop_builder_len_strictly_increases_on_add(tc: hegel::TestCase) {
let strings = tc.draw(gs::vecs(gs::text().max_size(64)).min_size(1).max_size(16));
let mut builder = StringTableBuilder::new();
let mut prev = builder.len();
for s in &strings {
builder.add(s);
let now = builder.len();
assert!(now > prev);
prev = now;
}
}
}
}