1use core::iter::FusedIterator;
13use core::marker::PhantomData;
14use core::ops::Range;
15use core::str;
16
17use crate::allocator::*;
18#[cfg(any(debug_assertions, test))]
19use crate::error::{ValidationError, ValidationResult};
20use crate::{Offset, StringId, StringIndex};
21
22pub type StringPool<O = u32, I = u16, const NULL_PADDED: bool = false, A = Global> =
24 StringTable<O, I, NULL_PADDED, A>;
25
26pub type StringPoolIter<'a, O = u32, const NULL_PADDED: bool = false> =
28 StringTableIter<'a, O, NULL_PADDED>;
29
30pub struct StringTable<
73 O = u32,
74 I = u16,
75 const NULL_PADDED: bool = false,
76 A: Allocator + Clone = Global,
77> where
78 O: Offset,
79 I: StringIndex,
80{
81 bytes: Box<[u8], A>,
82 offsets: Box<[O], A>,
83 _id: PhantomData<I>,
84}
85
86impl StringTable<u32, u16, false, Global> {
87 #[inline]
89 pub fn empty() -> Self {
90 Self::empty_in(Global)
91 }
92}
93
94impl<O: Offset, I: StringIndex, const NULL_PADDED: bool, A: Allocator + Clone>
95 StringTable<O, I, NULL_PADDED, A>
96{
97 pub fn empty_in(allocator: A) -> Self {
99 let bytes = Vec::new_in(allocator.clone()).into_boxed_slice();
100 let mut offsets = Vec::with_capacity_in(1, allocator);
101 offsets.push(zero_offset::<O>());
102
103 Self {
104 bytes,
105 offsets: offsets.into_boxed_slice(),
106 _id: PhantomData,
107 }
108 }
109
110 #[inline]
111 pub(crate) fn from_parts_unchecked(bytes: Box<[u8], A>, offsets: Box<[O], A>) -> Self {
112 Self {
113 bytes,
114 offsets,
115 _id: PhantomData,
116 }
117 }
118
119 #[inline]
121 pub fn len(&self) -> usize {
122 self.offsets.len().saturating_sub(1)
123 }
124
125 #[inline]
127 pub fn is_empty(&self) -> bool {
128 self.len() == 0
129 }
130
131 #[inline]
133 pub fn get(&self, id: StringId<I>) -> Option<&str> {
134 let index = id.into_usize();
135 if index < self.len() {
139 let start = unsafe { self.offsets.get_unchecked(index) }.to_usize();
141 let end = unsafe { self.offsets.get_unchecked(index + 1) }.to_usize();
142 let logical_end = end.saturating_sub(usize::from(NULL_PADDED));
144 debug_assert!(logical_end >= start);
145
146 let bytes = unsafe { self.bytes.get_unchecked(start..logical_end) };
148 Some(unsafe { str::from_utf8_unchecked(bytes) })
150 } else {
151 None
152 }
153 }
154
155 #[inline]
161 pub unsafe fn get_unchecked(&self, id: StringId<I>) -> &str {
162 let index = id.into_usize();
163 let start = unsafe { self.offsets.get_unchecked(index) }.to_usize();
164 let end = unsafe { self.offsets.get_unchecked(index + 1) }.to_usize();
165 let logical_end = end.saturating_sub(usize::from(NULL_PADDED));
167 debug_assert!(logical_end >= start);
168 let bytes = unsafe { self.bytes.get_unchecked(start..logical_end) };
169
170 unsafe { str::from_utf8_unchecked(bytes) }
172 }
173
174 #[inline]
176 pub fn iter(&self) -> StringTableIter<'_, O, NULL_PADDED> {
177 let offsets = &self.offsets;
178 let strings = offsets.len().saturating_sub(1);
179 let cur_offset = offsets.as_ptr();
180
181 StringTableIter {
182 bytes: &self.bytes,
183 cur_offset,
184 max_offset: unsafe { cur_offset.add(strings) },
187 remaining: strings,
188 _offsets: PhantomData,
189 }
190 }
191
192 #[inline]
194 pub fn as_bytes(&self) -> &[u8] {
195 &self.bytes
196 }
197
198 #[inline]
200 pub fn contains(&self, value: &str) -> bool {
201 self.iter().any(|item| item == value)
202 }
203
204 #[inline]
206 pub fn offsets(&self) -> &[O] {
207 &self.offsets
208 }
209
210 #[inline]
212 pub fn byte_range(&self, id: StringId<I>) -> Option<Range<usize>> {
213 let index = id.into_usize();
214 if index < self.len() {
218 let start = unsafe { self.offsets.get_unchecked(index) }.to_usize();
220 let end = unsafe { self.offsets.get_unchecked(index + 1) }.to_usize();
221 let logical_end = end.saturating_sub(usize::from(NULL_PADDED));
223 debug_assert!(logical_end >= start);
224
225 Some(start..logical_end)
226 } else {
227 None
228 }
229 }
230
231 #[cfg(any(debug_assertions, test))]
232 pub(crate) fn validate(&self) -> ValidationResult<()> {
233 let bytes_len = self.bytes.len();
234 if O::try_from_usize(bytes_len).is_none() {
235 return Err(ValidationError::TooManyBytesForOffsetType {
236 bytes: bytes_len,
237 offset_type: O::TYPE_NAME,
238 });
239 }
240
241 let strings = self.len();
242 if strings > 0 && I::try_from_usize(strings - 1).is_none() {
243 return Err(ValidationError::TooManyStrings {
244 strings,
245 id_type: I::TYPE_NAME,
246 });
247 }
248
249 let offsets = &self.offsets;
250 if offsets.is_empty() {
251 return Err(ValidationError::MissingSentinelOffset);
252 }
253
254 let last_index = offsets.len() - 1;
255 let found_last = offsets[last_index].to_usize();
256 if found_last != bytes_len {
257 return Err(ValidationError::LastOffsetMismatch {
258 found: found_last,
259 expected: bytes_len,
260 });
261 }
262
263 let mut previous = 0usize;
264 for (index, &offset) in offsets.iter().enumerate() {
265 let current = offset.to_usize();
266
267 if current > bytes_len {
268 return Err(ValidationError::OffsetOutOfBounds {
269 index,
270 offset: current,
271 bytes_len,
272 });
273 }
274
275 if index == 0 {
276 previous = current;
277 continue;
278 }
279
280 if current < previous {
281 return Err(ValidationError::OffsetsNotMonotonic {
282 index,
283 previous,
284 current,
285 });
286 }
287
288 if NULL_PADDED {
289 if current == previous {
290 return Err(ValidationError::NullPaddedStringMissingTerminatorByte {
291 index: index - 1,
292 });
293 }
294
295 let terminator_index = current - 1;
296 if self.bytes[terminator_index] != 0 {
297 return Err(ValidationError::NullPaddedStringMissingTrailingNul {
298 index: index - 1,
299 });
300 }
301
302 if str::from_utf8(&self.bytes[previous..terminator_index]).is_err() {
303 return Err(ValidationError::InvalidUtf8 { index: index - 1 });
304 }
305 } else if str::from_utf8(&self.bytes[previous..current]).is_err() {
306 return Err(ValidationError::InvalidUtf8 { index: index - 1 });
307 }
308
309 previous = current;
310 }
311
312 Ok(())
313 }
314}
315
316pub struct StringTableIter<'a, O: Offset = u32, const NULL_PADDED: bool = false> {
318 bytes: &'a [u8],
319 cur_offset: *const O,
320 max_offset: *const O,
321 remaining: usize,
322 _offsets: PhantomData<&'a [O]>,
323}
324
325impl<'a, O: Offset, const NULL_PADDED: bool> Iterator for StringTableIter<'a, O, NULL_PADDED> {
326 type Item = &'a str;
327
328 #[inline]
329 fn next(&mut self) -> Option<Self::Item> {
330 if self.cur_offset != self.max_offset {
331 let start = unsafe { (*self.cur_offset).to_usize() };
334 self.cur_offset = unsafe { self.cur_offset.add(1) };
335 let end = unsafe { (*self.cur_offset).to_usize() };
336 self.remaining -= 1;
337
338 let logical_end = end.saturating_sub(usize::from(NULL_PADDED));
340 debug_assert!(logical_end >= start);
341
342 let bytes = unsafe { self.bytes.get_unchecked(start..logical_end) };
344 Some(unsafe { str::from_utf8_unchecked(bytes) })
345 } else {
346 None
347 }
348 }
349
350 #[inline]
351 fn size_hint(&self) -> (usize, Option<usize>) {
352 let remaining = self.len();
353 (remaining, Some(remaining))
354 }
355}
356
357impl<O: Offset, const NULL_PADDED: bool> ExactSizeIterator for StringTableIter<'_, O, NULL_PADDED> {
358 #[inline]
359 fn len(&self) -> usize {
360 self.remaining
361 }
362}
363
364impl<O: Offset, const NULL_PADDED: bool> FusedIterator for StringTableIter<'_, O, NULL_PADDED> {}
365
366#[inline]
367fn zero_offset<O: Offset>() -> O {
368 unsafe { O::try_from_usize(0).unwrap_unchecked() }
370}
371
372#[cfg(test)]
373mod tests {
374 use crate::allocator::{Global, Vec};
375 use crate::error::{ValidationError, ValidationResult};
376 use crate::{Offset, StringId, StringIndex, StringTable};
377
378 fn validate_parts<O: Offset, I: StringIndex, const NULL_PADDED: bool>(
379 bytes: Vec<u8, Global>,
380 offsets: Vec<O, Global>,
381 ) -> ValidationResult<()> {
382 let table = StringTable::<O, I, NULL_PADDED, Global>::from_parts_unchecked(
383 bytes.into_boxed_slice(),
384 offsets.into_boxed_slice(),
385 );
386 table.validate()
387 }
388
389 #[test]
390 fn validate_rejects_missing_sentinel() {
391 let mut bytes = Vec::new_in(Global);
392 bytes.extend_from_slice(b"hello");
393
394 let mut offsets = Vec::new_in(Global);
395 offsets.push(0u32);
396
397 let result = validate_parts::<u32, u32, false>(bytes, offsets);
398 assert!(matches!(
399 result,
400 Err(ValidationError::LastOffsetMismatch { .. })
401 ));
402 }
403
404 #[test]
405 fn validate_rejects_non_monotonic_offsets() {
406 let mut bytes = Vec::new_in(Global);
407 bytes.extend_from_slice(b"abcd");
408
409 let mut offsets = Vec::new_in(Global);
410 offsets.push(0u32);
411 offsets.push(3u32);
412 offsets.push(2u32);
413 offsets.push(4u32);
414
415 let result = validate_parts::<u32, u32, false>(bytes, offsets);
416 assert!(matches!(
417 result,
418 Err(ValidationError::OffsetsNotMonotonic { .. })
419 ));
420 }
421
422 #[test]
423 fn validate_rejects_invalid_utf8() {
424 let mut bytes = Vec::new_in(Global);
425 bytes.push(0xFF);
426
427 let mut offsets = Vec::new_in(Global);
428 offsets.push(0u32);
429 offsets.push(1u32);
430
431 let result = validate_parts::<u32, u32, false>(bytes, offsets);
432 assert!(matches!(result, Err(ValidationError::InvalidUtf8 { .. })));
433 }
434
435 #[test]
436 fn validate_null_padded_accepts_trailing_nul() {
437 let mut bytes = Vec::new_in(Global);
438 bytes.extend_from_slice(b"hello\0");
439
440 let mut offsets = Vec::new_in(Global);
441 offsets.push(0u32);
442 offsets.push(6u32);
443
444 let result = validate_parts::<u32, u32, true>(bytes, offsets);
445 assert!(result.is_ok());
446 }
447
448 #[test]
449 fn validate_null_padded_rejects_missing_trailing_nul() {
450 let mut bytes = Vec::new_in(Global);
451 bytes.extend_from_slice(b"hello");
452
453 let mut offsets = Vec::new_in(Global);
454 offsets.push(0u32);
455 offsets.push(5u32);
456
457 let result = validate_parts::<u32, u32, true>(bytes, offsets);
458 assert!(matches!(
459 result,
460 Err(ValidationError::NullPaddedStringMissingTrailingNul { .. })
461 ));
462 }
463
464 #[test]
465 fn validate_rejects_offset_type_overflow() {
466 let mut bytes = Vec::new_in(Global);
467 bytes.extend_from_slice(b"abc");
468
469 let mut offsets = Vec::new_in(Global);
470 offsets.push(0u8);
471 offsets.push(3u8);
472
473 let result = validate_parts::<u8, u32, false>(bytes, offsets);
474 assert!(result.is_ok());
475
476 let mut too_big = Vec::new_in(Global);
477 too_big.extend_from_slice(&[0u8; 300]);
478 let mut offsets = Vec::new_in(Global);
479 offsets.push(0u8);
480 offsets.push(u8::MAX);
481
482 let result = validate_parts::<u8, u32, false>(too_big, offsets);
483 assert!(matches!(
484 result,
485 Err(ValidationError::TooManyBytesForOffsetType { .. })
486 ));
487 }
488
489 #[test]
490 fn validate_rejects_id_type_overflow() {
491 let bytes = Vec::new_in(Global).into_boxed_slice();
492 let mut offsets = Vec::new_in(Global);
493 for _ in 0..258 {
494 offsets.push(0u32);
495 }
496
497 let table = StringTable::<u32, u8>::from_parts_unchecked(bytes, offsets.into_boxed_slice());
498 let result = table.validate();
499 assert!(matches!(
500 result,
501 Err(ValidationError::TooManyStrings {
502 strings: 257,
503 id_type: "u8"
504 })
505 ));
506 }
507
508 #[test]
509 fn get_returns_none_for_invalid_id() {
510 let table = StringTable::empty();
511 assert_eq!(table.get(StringId::new(0)), None);
512 }
513}