use core::marker::PhantomData;
use crate::allocator::*;
use crate::{Error, Offset, Result, StringId, StringIndex, StringTable};
pub type StringPoolBuilder<O = u32, I = u16, const NULL_PADDED: bool = false, A = Global> =
StringTableBuilder<O, I, NULL_PADDED, A>;
pub struct StringTableBuilder<
O = u32,
I = u16,
const NULL_PADDED: bool = false,
A: Allocator + Clone = Global,
> where
O: Offset,
I: StringIndex,
{
bytes: Vec<u8, A>,
offsets: Vec<O, A>,
_id: PhantomData<I>,
}
impl StringTableBuilder<u32, u16, false, Global> {
#[inline]
pub fn new() -> Self {
Self::new_in(Global)
}
#[inline]
pub fn with_capacity(strings: usize, bytes: usize) -> Self {
Self::with_capacity_in(strings, bytes, Global)
}
}
impl Default for StringTableBuilder<u32, u16, false, Global> {
#[inline]
fn default() -> Self {
Self::new()
}
}
impl StringTableBuilder<u32, u16, true, Global> {
#[inline]
pub fn new_null_padded() -> Self {
Self::new_in(Global)
}
#[inline]
pub fn with_capacity_null_padded(strings: usize, bytes: usize) -> Self {
Self::with_capacity_in(strings, bytes, Global)
}
}
impl<O: Offset, I: StringIndex, const NULL_PADDED: bool, A: Allocator + Clone>
StringTableBuilder<O, I, NULL_PADDED, A>
{
pub fn new_in(allocator: A) -> Self {
let mut offsets = Vec::with_capacity_in(1, allocator.clone());
offsets.push(zero_offset::<O>());
Self {
bytes: Vec::new_in(allocator),
offsets,
_id: PhantomData,
}
}
pub fn with_capacity_in(strings: usize, bytes: usize, allocator: A) -> Self {
let mut offsets = Vec::with_capacity_in(strings.saturating_add(1), allocator.clone());
offsets.push(zero_offset::<O>());
Self {
bytes: Vec::with_capacity_in(bytes, allocator),
offsets,
_id: PhantomData,
}
}
#[inline]
pub fn len(&self) -> usize {
self.offsets.len().saturating_sub(1)
}
#[inline]
pub fn is_empty(&self) -> bool {
self.len() == 0
}
#[inline]
pub fn bytes_len(&self) -> usize {
self.bytes.len()
}
pub fn try_push(&mut self, value: &str) -> Result<StringId<I>> {
let id = self.len();
let id_value = I::try_from_usize(id).ok_or(Error::TooManyStrings {
strings: id.saturating_add(1),
id_type: I::TYPE_NAME,
})?;
let start = self.bytes.len();
let end = start
.checked_add(value.len())
.ok_or(Error::TooManyBytesForOffsetType {
bytes: start,
offset_type: O::TYPE_NAME,
})?;
let end = if NULL_PADDED {
end.checked_add(1).ok_or(Error::TooManyBytesForOffsetType {
bytes: start,
offset_type: O::TYPE_NAME,
})?
} else {
end
};
let end_offset = O::try_from_usize(end).ok_or(Error::TooManyBytesForOffsetType {
bytes: end,
offset_type: O::TYPE_NAME,
})?;
self.bytes.extend_from_slice(value.as_bytes());
if NULL_PADDED {
self.bytes.push(0);
}
self.offsets.push(end_offset);
Ok(StringId::new(id_value))
}
#[inline]
pub fn build(self) -> StringTable<O, I, NULL_PADDED, A> {
let table = StringTable::from_parts_unchecked(
self.bytes.into_boxed_slice(),
self.offsets.into_boxed_slice(),
);
#[cfg(any(debug_assertions, test))]
debug_assert!(table.validate().is_ok());
table
}
}
#[inline]
fn zero_offset<O: Offset>() -> O {
unsafe { O::try_from_usize(0).unwrap_unchecked() }
}
#[cfg(test)]
mod tests {
use proptest::prelude::*;
use crate::allocator::Global;
use crate::{Error, StringId, StringTableBuilder};
#[test]
fn empty_table() {
let table = StringTableBuilder::new().build();
assert_eq!(table.len(), 0);
assert!(table.is_empty());
assert_eq!(table.as_bytes(), b"");
assert_eq!(table.offsets(), &[0u32]);
}
#[test]
fn single_string() {
let mut builder = StringTableBuilder::new();
let id = builder.try_push("hello").unwrap();
let table = builder.build();
assert_eq!(id, StringId::new(0));
assert_eq!(table.len(), 1);
assert_eq!(table.get(id), Some("hello"));
assert_eq!(table.offsets(), &[0u32, 5u32]);
}
#[test]
fn null_padded_single_string() {
let mut builder = StringTableBuilder::new_null_padded();
let id = builder.try_push("hello").unwrap();
let table = builder.build();
assert_eq!(table.get(id), Some("hello"));
assert_eq!(table.as_bytes(), b"hello\0");
assert_eq!(table.offsets(), &[0u32, 6u32]);
assert_eq!(table.byte_range(id), Some(0..5));
}
#[test]
fn null_padded_empty_string() {
let mut builder = StringTableBuilder::new_null_padded();
let id = builder.try_push("").unwrap();
let table = builder.build();
assert_eq!(table.get(id), Some(""));
assert_eq!(table.as_bytes(), b"\0");
assert_eq!(table.offsets(), &[0u32, 1u32]);
assert_eq!(table.byte_range(id), Some(0..0));
}
#[test]
fn multiple_with_empty_string() {
let mut builder = StringTableBuilder::new();
let a = builder.try_push("a").unwrap();
let b = builder.try_push("").unwrap();
let c = builder.try_push("ccc").unwrap();
let table = builder.build();
assert_eq!(table.get(a), Some("a"));
assert_eq!(table.get(b), Some(""));
assert_eq!(table.get(c), Some("ccc"));
assert_eq!(table.offsets(), &[0u32, 1u32, 1u32, 4u32]);
}
#[test]
fn unicode_strings() {
let mut builder = StringTableBuilder::new();
let a = builder.try_push("猫").unwrap();
let b = builder.try_push("дом").unwrap();
let c = builder.try_push("music/曲").unwrap();
let table = builder.build();
assert_eq!(table.get(a), Some("猫"));
assert_eq!(table.get(b), Some("дом"));
assert_eq!(table.get(c), Some("music/曲"));
}
#[test]
fn iter_matches_insert_order() {
let mut builder = StringTableBuilder::new();
builder.try_push("z").unwrap();
builder.try_push("a").unwrap();
builder.try_push("m").unwrap();
let table = builder.build();
let got: alloc::vec::Vec<&str> = table.iter().collect();
assert_eq!(got, alloc::vec!["z", "a", "m"]);
}
#[test]
fn supports_custom_allocator() {
let mut builder = StringTableBuilder::<u32>::new_in(Global);
let id = builder.try_push("hello").unwrap();
let table = builder.build();
assert_eq!(table.get(id), Some("hello"));
}
#[test]
fn supports_small_offset_type() {
let mut builder = StringTableBuilder::<u8>::new_in(Global);
let id = builder.try_push("abc").unwrap();
let table = builder.build();
assert_eq!(table.get(id), Some("abc"));
assert_eq!(table.offsets(), &[0u8, 3u8]);
}
#[test]
fn supports_small_id_type() {
let mut builder = StringTableBuilder::<u32, u8>::new_in(Global);
let id = builder.try_push("abc").unwrap();
let table = builder.build();
assert_eq!(id, StringId::<u8>::new(0));
assert_eq!(table.get(id), Some("abc"));
}
#[test]
fn small_offset_type_reports_overflow() {
let mut builder = StringTableBuilder::<u8>::new_in(Global);
let long = "a".repeat(300);
let result = builder.try_push(&long);
assert!(matches!(
result,
Err(Error::TooManyBytesForOffsetType {
offset_type: "u8",
..
})
));
}
#[test]
fn small_id_type_reports_overflow() {
let mut builder = StringTableBuilder::<u32, u8>::new_in(Global);
for _ in 0..=u8::MAX {
builder.try_push("a").unwrap();
}
let result = builder.try_push("overflow");
assert!(matches!(
result,
Err(Error::TooManyStrings { id_type: "u8", .. })
));
}
proptest! {
#[test]
fn roundtrip_vec_of_strings(values in proptest::collection::vec(".*", 0..256)) {
let mut builder = StringTableBuilder::new();
let mut ids = alloc::vec::Vec::with_capacity(values.len());
for value in &values {
ids.push(builder.try_push(value).unwrap());
}
let table = builder.build();
prop_assert_eq!(table.len(), values.len());
prop_assert_eq!(table.offsets().len(), values.len() + 1);
for (index, value) in values.iter().enumerate() {
let id = ids[index];
prop_assert_eq!(table.get(id), Some(value.as_str()));
prop_assert_eq!(
table.byte_range(id).map(|r| &table.as_bytes()[r]),
Some(value.as_bytes())
);
}
}
}
}