Skip to main content

lite_strtab/
builder.rs

1//! Builder for creating an immutable [`crate::StringTable`].
2//!
3//! The builder stores data in growable vectors while constructing.
4//! [`StringTableBuilder::build`] converts those vectors to boxed slices,
5//! making the final table immutable and compact.
6
7use core::marker::PhantomData;
8
9use crate::allocator::*;
10use crate::{Error, Offset, Result, StringId, StringIndex, StringTable};
11
12/// Alias for [`StringTableBuilder`].
13pub type StringPoolBuilder<O = u32, I = u16, const NULL_PADDED: bool = false, A = Global> =
14    StringTableBuilder<O, I, NULL_PADDED, A>;
15
16/// Incremental builder for [`crate::StringTable`].
17///
18/// Each call to [`Self::try_push`] appends string bytes to a single byte buffer and
19/// appends one offset.
20///
21/// Generic parameters control capacity and metadata size:
22/// - `O` is the byte-offset type (see [`Offset`]). It bounds total UTF-8 bytes and costs
23///   `size_of::<O>()` per string inside the [`crate::StringTable`].
24/// - `I` is the string-ID type (see [`StringIndex`]) used by [`crate::StringId`].
25///   It limits string count and costs `size_of::<I>()` per stored ID field
26///   (table index) in your own structs.
27///
28/// The common choice is `O = u32, I = u16`: meaning `4 GiB` of UTF-8 data
29/// and `64Ki` entries per table.
30///
31/// This is 4 bytes per string offset in the table and 2 bytes
32/// per StringID (index into table) inside your structs.
33/// For comparison: `Box<str>` == 16 bytes, `String` == 24 bytes.
34///
35/// By default, inserted strings are not NUL-terminated.
36/// Set `NULL_PADDED = true` to store strings with a trailing NUL byte.
37pub struct StringTableBuilder<
38    O = u32,
39    I = u16,
40    const NULL_PADDED: bool = false,
41    A: Allocator + Clone = Global,
42> where
43    O: Offset,
44    I: StringIndex,
45{
46    bytes: Vec<u8, A>,
47    offsets: Vec<O, A>,
48    _id: PhantomData<I>,
49}
50
51impl StringTableBuilder<u32, u16, false, Global> {
52    /// Creates an empty builder using the global allocator.
53    #[inline]
54    pub fn new() -> Self {
55        Self::new_in(Global)
56    }
57
58    /// Creates a builder with reserved capacities using the global allocator.
59    ///
60    /// `strings` is the expected number of strings, `bytes` is the expected
61    /// total number of UTF-8 bytes.
62    #[inline]
63    pub fn with_capacity(strings: usize, bytes: usize) -> Self {
64        Self::with_capacity_in(strings, bytes, Global)
65    }
66}
67
68impl Default for StringTableBuilder<u32, u16, false, Global> {
69    #[inline]
70    fn default() -> Self {
71        Self::new()
72    }
73}
74
75impl StringTableBuilder<u32, u16, true, Global> {
76    /// Creates an empty builder with null-padded mode using the global allocator.
77    #[inline]
78    pub fn new_null_padded() -> Self {
79        Self::new_in(Global)
80    }
81
82    /// Creates a null-padded builder with reserved capacities using the global allocator.
83    ///
84    /// `strings` is the expected number of strings, `bytes` is the expected
85    /// total number of UTF-8 bytes.
86    #[inline]
87    pub fn with_capacity_null_padded(strings: usize, bytes: usize) -> Self {
88        Self::with_capacity_in(strings, bytes, Global)
89    }
90}
91
92impl<O: Offset, I: StringIndex, const NULL_PADDED: bool, A: Allocator + Clone>
93    StringTableBuilder<O, I, NULL_PADDED, A>
94{
95    /// Creates an empty builder with a custom allocator.
96    pub fn new_in(allocator: A) -> Self {
97        let mut offsets = Vec::with_capacity_in(1, allocator.clone());
98        offsets.push(zero_offset::<O>());
99
100        Self {
101            bytes: Vec::new_in(allocator),
102            offsets,
103            _id: PhantomData,
104        }
105    }
106
107    /// Creates a builder with reserved capacities and a custom allocator.
108    ///
109    /// `strings` is the expected number of strings, `bytes` is the expected
110    /// total number of UTF-8 bytes.
111    pub fn with_capacity_in(strings: usize, bytes: usize, allocator: A) -> Self {
112        let mut offsets = Vec::with_capacity_in(strings.saturating_add(1), allocator.clone());
113        offsets.push(zero_offset::<O>());
114
115        Self {
116            bytes: Vec::with_capacity_in(bytes, allocator),
117            offsets,
118            _id: PhantomData,
119        }
120    }
121
122    /// Number of strings currently pushed.
123    #[inline]
124    pub fn len(&self) -> usize {
125        self.offsets.len().saturating_sub(1)
126    }
127
128    /// Returns `true` when the builder has no strings.
129    #[inline]
130    pub fn is_empty(&self) -> bool {
131        self.len() == 0
132    }
133
134    /// Current total byte length of pushed string data.
135    #[inline]
136    pub fn bytes_len(&self) -> usize {
137        self.bytes.len()
138    }
139
140    /// Appends a string and returns its [`StringId`].
141    ///
142    /// Returns an error when total string count exceeds the configured ID
143    /// type, or when the byte length cannot be represented by the configured
144    /// offset type.
145    pub fn try_push(&mut self, value: &str) -> Result<StringId<I>> {
146        let id = self.len();
147        let id_value = I::try_from_usize(id).ok_or(Error::TooManyStrings {
148            strings: id.saturating_add(1),
149            id_type: I::TYPE_NAME,
150        })?;
151
152        let start = self.bytes.len();
153        let end = start
154            .checked_add(value.len())
155            .ok_or(Error::TooManyBytesForOffsetType {
156                bytes: start,
157                offset_type: O::TYPE_NAME,
158            })?;
159        // Branch resolved at compile time; no runtime cost.
160        let end = if NULL_PADDED {
161            end.checked_add(1).ok_or(Error::TooManyBytesForOffsetType {
162                bytes: start,
163                offset_type: O::TYPE_NAME,
164            })?
165        } else {
166            end
167        };
168
169        let end_offset = O::try_from_usize(end).ok_or(Error::TooManyBytesForOffsetType {
170            bytes: end,
171            offset_type: O::TYPE_NAME,
172        })?;
173
174        self.bytes.extend_from_slice(value.as_bytes());
175        if NULL_PADDED {
176            self.bytes.push(0);
177        }
178        self.offsets.push(end_offset);
179        Ok(StringId::new(id_value))
180    }
181
182    /// Finalizes into an immutable [`crate::StringTable`].
183    ///
184    /// This does not copy string bytes. Internal vectors are converted into
185    /// boxed slices so the resulting table is immutable and compact.
186    #[inline]
187    pub fn build(self) -> StringTable<O, I, NULL_PADDED, A> {
188        let table = StringTable::from_parts_unchecked(
189            self.bytes.into_boxed_slice(),
190            self.offsets.into_boxed_slice(),
191        );
192        #[cfg(any(debug_assertions, test))]
193        debug_assert!(table.validate().is_ok());
194        table
195    }
196}
197
198#[inline]
199fn zero_offset<O: Offset>() -> O {
200    // SAFETY: All built-in integer implementations accept zero.
201    unsafe { O::try_from_usize(0).unwrap_unchecked() }
202}
203
204#[cfg(test)]
205mod tests {
206    use proptest::prelude::*;
207
208    use crate::allocator::Global;
209    use crate::{Error, StringId, StringTableBuilder};
210
211    #[test]
212    fn empty_table() {
213        let table = StringTableBuilder::new().build();
214        assert_eq!(table.len(), 0);
215        assert!(table.is_empty());
216        assert_eq!(table.as_bytes(), b"");
217        assert_eq!(table.offsets(), &[0u32]);
218    }
219
220    #[test]
221    fn single_string() {
222        let mut builder = StringTableBuilder::new();
223        let id = builder.try_push("hello").unwrap();
224        let table = builder.build();
225
226        assert_eq!(id, StringId::new(0));
227        assert_eq!(table.len(), 1);
228        assert_eq!(table.get(id), Some("hello"));
229        assert_eq!(table.offsets(), &[0u32, 5u32]);
230    }
231
232    #[test]
233    fn null_padded_single_string() {
234        let mut builder = StringTableBuilder::new_null_padded();
235        let id = builder.try_push("hello").unwrap();
236        let table = builder.build();
237
238        assert_eq!(table.get(id), Some("hello"));
239        assert_eq!(table.as_bytes(), b"hello\0");
240        assert_eq!(table.offsets(), &[0u32, 6u32]);
241        assert_eq!(table.byte_range(id), Some(0..5));
242    }
243
244    #[test]
245    fn null_padded_empty_string() {
246        let mut builder = StringTableBuilder::new_null_padded();
247        let id = builder.try_push("").unwrap();
248        let table = builder.build();
249
250        assert_eq!(table.get(id), Some(""));
251        assert_eq!(table.as_bytes(), b"\0");
252        assert_eq!(table.offsets(), &[0u32, 1u32]);
253        assert_eq!(table.byte_range(id), Some(0..0));
254    }
255
256    #[test]
257    fn multiple_with_empty_string() {
258        let mut builder = StringTableBuilder::new();
259        let a = builder.try_push("a").unwrap();
260        let b = builder.try_push("").unwrap();
261        let c = builder.try_push("ccc").unwrap();
262        let table = builder.build();
263
264        assert_eq!(table.get(a), Some("a"));
265        assert_eq!(table.get(b), Some(""));
266        assert_eq!(table.get(c), Some("ccc"));
267        assert_eq!(table.offsets(), &[0u32, 1u32, 1u32, 4u32]);
268    }
269
270    #[test]
271    fn unicode_strings() {
272        let mut builder = StringTableBuilder::new();
273        let a = builder.try_push("猫").unwrap();
274        let b = builder.try_push("дом").unwrap();
275        let c = builder.try_push("music/曲").unwrap();
276        let table = builder.build();
277
278        assert_eq!(table.get(a), Some("猫"));
279        assert_eq!(table.get(b), Some("дом"));
280        assert_eq!(table.get(c), Some("music/曲"));
281    }
282
283    #[test]
284    fn iter_matches_insert_order() {
285        let mut builder = StringTableBuilder::new();
286        builder.try_push("z").unwrap();
287        builder.try_push("a").unwrap();
288        builder.try_push("m").unwrap();
289
290        let table = builder.build();
291        let got: alloc::vec::Vec<&str> = table.iter().collect();
292        assert_eq!(got, alloc::vec!["z", "a", "m"]);
293    }
294
295    #[test]
296    fn supports_custom_allocator() {
297        let mut builder = StringTableBuilder::<u32>::new_in(Global);
298        let id = builder.try_push("hello").unwrap();
299        let table = builder.build();
300        assert_eq!(table.get(id), Some("hello"));
301    }
302
303    #[test]
304    fn supports_small_offset_type() {
305        let mut builder = StringTableBuilder::<u8>::new_in(Global);
306        let id = builder.try_push("abc").unwrap();
307        let table = builder.build();
308        assert_eq!(table.get(id), Some("abc"));
309        assert_eq!(table.offsets(), &[0u8, 3u8]);
310    }
311
312    #[test]
313    fn supports_small_id_type() {
314        let mut builder = StringTableBuilder::<u32, u8>::new_in(Global);
315        let id = builder.try_push("abc").unwrap();
316        let table = builder.build();
317
318        assert_eq!(id, StringId::<u8>::new(0));
319        assert_eq!(table.get(id), Some("abc"));
320    }
321
322    #[test]
323    fn small_offset_type_reports_overflow() {
324        let mut builder = StringTableBuilder::<u8>::new_in(Global);
325        let long = "a".repeat(300);
326        let result = builder.try_push(&long);
327
328        assert!(matches!(
329            result,
330            Err(Error::TooManyBytesForOffsetType {
331                offset_type: "u8",
332                ..
333            })
334        ));
335    }
336
337    #[test]
338    fn small_id_type_reports_overflow() {
339        let mut builder = StringTableBuilder::<u32, u8>::new_in(Global);
340        for _ in 0..=u8::MAX {
341            builder.try_push("a").unwrap();
342        }
343
344        let result = builder.try_push("overflow");
345        assert!(matches!(
346            result,
347            Err(Error::TooManyStrings { id_type: "u8", .. })
348        ));
349    }
350
351    proptest! {
352        #[test]
353        fn roundtrip_vec_of_strings(values in proptest::collection::vec(".*", 0..256)) {
354            let mut builder = StringTableBuilder::new();
355            let mut ids = alloc::vec::Vec::with_capacity(values.len());
356
357            for value in &values {
358                ids.push(builder.try_push(value).unwrap());
359            }
360
361            let table = builder.build();
362
363            prop_assert_eq!(table.len(), values.len());
364            prop_assert_eq!(table.offsets().len(), values.len() + 1);
365
366            for (index, value) in values.iter().enumerate() {
367                let id = ids[index];
368                prop_assert_eq!(table.get(id), Some(value.as_str()));
369                prop_assert_eq!(
370                    table.byte_range(id).map(|r| &table.as_bytes()[r]),
371                    Some(value.as_bytes())
372                );
373            }
374        }
375    }
376}