aion_context/string_table.rs
1// SPDX-License-Identifier: MIT OR Apache-2.0
2//! String table for AION v2 file format
3//!
4//! This module implements the null-terminated UTF-8 string table as specified
5//! in RFC-0002 Section 5.5. The string table is used to store variable-length
6//! text data such as commit messages, audit details, and metadata.
7//!
8//! # Format
9//!
10//! The string table is a concatenation of null-terminated UTF-8 strings with
11//! no padding between entries:
12//!
13//! ```text
14//! "Genesis version\0Added fraud detection\0Updated rules\0"
15//! ```
16//!
17//! # Rules (RFC-0002)
18//!
19//! 1. All strings are UTF-8 encoded
20//! 2. Each string terminated with single null byte (0x00)
21//! 3. No padding between strings
22//! 4. Offsets point to first character (not null terminator)
23//! 5. Lengths do NOT include null terminator
24//!
25//! # Building String Tables
26//!
27//! Use [`StringTableBuilder`] to construct string tables during serialization:
28//!
29//! ```
30//! use aion_context::string_table::StringTableBuilder;
31//!
32//! let mut builder = StringTableBuilder::new();
33//!
34//! // Add strings and get their (offset, length)
35//! let (offset1, len1) = builder.add("Genesis version");
36//! let (offset2, len2) = builder.add("Added fraud detection");
37//!
38//! // Build final byte array
39//! let bytes = builder.build();
40//!
41//! assert_eq!(offset1, 0);
42//! assert_eq!(len1, 15);
43//! assert_eq!(offset2, 16); // "Genesis version\0" = 16 bytes
44//! assert_eq!(len2, 21);
45//! ```
46//!
47//! # Parsing String Tables
48//!
49//! Use [`StringTable`] for zero-copy parsing during deserialization:
50//!
51//! ```
52//! use aion_context::string_table::StringTable;
53//!
54//! let data = b"Genesis version\0Added fraud detection\0";
55//! let table = StringTable::new(data).unwrap();
56//!
57//! // Extract strings by offset/length
58//! let s1 = table.get(0, 15).unwrap();
59//! assert_eq!(s1, "Genesis version");
60//!
61//! let s2 = table.get(16, 21).unwrap();
62//! assert_eq!(s2, "Added fraud detection");
63//! ```
64//!
65//! # UTF-8 Validation
66//!
67//! All strings are validated as UTF-8:
68//! - During construction (when added to builder)
69//! - During parsing (when table is created)
70//! - During extraction (when strings are retrieved)
71//!
72//! Invalid UTF-8 sequences return [`AionError::InvalidUtf8`].
73
74use crate::{AionError, Result};
75
76/// String table builder for constructing string tables during serialization
77///
78/// This builder accumulates strings and tracks their offsets/lengths.
79/// Strings are automatically null-terminated and concatenated with no padding.
80///
81/// # Examples
82///
83/// ```
84/// use aion_context::string_table::StringTableBuilder;
85///
86/// let mut builder = StringTableBuilder::new();
87///
88/// let (offset, length) = builder.add("Hello, world!");
89/// assert_eq!(offset, 0);
90/// assert_eq!(length, 13);
91///
92/// let bytes = builder.build();
93/// assert_eq!(bytes, b"Hello, world!\0");
94/// ```
95#[derive(Debug, Clone, Default)]
96pub struct StringTableBuilder {
97 /// Accumulated string data (null-terminated)
98 data: Vec<u8>,
99}
100
101impl StringTableBuilder {
102 /// Create a new empty string table builder
103 ///
104 /// # Examples
105 ///
106 /// ```
107 /// use aion_context::string_table::StringTableBuilder;
108 ///
109 /// let builder = StringTableBuilder::new();
110 /// assert_eq!(builder.len(), 0);
111 /// assert!(builder.is_empty());
112 /// ```
113 #[must_use]
114 #[allow(clippy::missing_const_for_fn)] // Vec::new() not const in MSRV 1.70
115 pub fn new() -> Self {
116 Self { data: Vec::new() }
117 }
118
119 /// Create a builder with pre-allocated capacity
120 ///
121 /// # Examples
122 ///
123 /// ```
124 /// use aion_context::string_table::StringTableBuilder;
125 ///
126 /// let builder = StringTableBuilder::with_capacity(1024);
127 /// assert_eq!(builder.len(), 0);
128 /// ```
129 #[must_use]
130 pub fn with_capacity(capacity: usize) -> Self {
131 Self {
132 data: Vec::with_capacity(capacity),
133 }
134 }
135
136 /// Add a string to the table
137 ///
138 /// Returns `(offset, length)` where:
139 /// - `offset` is the byte offset of the string's first character
140 /// - `length` is the string length in bytes (excluding null terminator)
141 ///
142 /// The string is automatically null-terminated and appended to the table.
143 ///
144 /// # Examples
145 ///
146 /// ```
147 /// use aion_context::string_table::StringTableBuilder;
148 ///
149 /// let mut builder = StringTableBuilder::new();
150 ///
151 /// let (offset1, len1) = builder.add("First");
152 /// assert_eq!(offset1, 0);
153 /// assert_eq!(len1, 5);
154 ///
155 /// let (offset2, len2) = builder.add("Second");
156 /// assert_eq!(offset2, 6); // "First\0" = 6 bytes
157 /// assert_eq!(len2, 6);
158 /// ```
159 #[allow(clippy::cast_possible_truncation)] // String lengths capped by u32::MAX
160 pub fn add(&mut self, s: &str) -> (u64, u32) {
161 let offset = self.data.len() as u64;
162 let length = s.len() as u32;
163
164 // Append string bytes
165 self.data.extend_from_slice(s.as_bytes());
166
167 // Append null terminator
168 self.data.push(0);
169
170 (offset, length)
171 }
172
173 /// Get the current total size in bytes
174 ///
175 /// This includes all strings and their null terminators.
176 ///
177 /// # Examples
178 ///
179 /// ```
180 /// use aion_context::string_table::StringTableBuilder;
181 ///
182 /// let mut builder = StringTableBuilder::new();
183 /// assert_eq!(builder.len(), 0);
184 ///
185 /// builder.add("Hello");
186 /// assert_eq!(builder.len(), 6); // "Hello\0"
187 ///
188 /// builder.add("World");
189 /// assert_eq!(builder.len(), 12); // "Hello\0World\0"
190 /// ```
191 #[must_use]
192 pub fn len(&self) -> usize {
193 self.data.len()
194 }
195
196 /// Check if the table is empty
197 ///
198 /// # Examples
199 ///
200 /// ```
201 /// use aion_context::string_table::StringTableBuilder;
202 ///
203 /// let mut builder = StringTableBuilder::new();
204 /// assert!(builder.is_empty());
205 ///
206 /// builder.add("Test");
207 /// assert!(!builder.is_empty());
208 /// ```
209 #[must_use]
210 pub fn is_empty(&self) -> bool {
211 self.data.is_empty()
212 }
213
214 /// Build the final string table as a byte vector
215 ///
216 /// Returns the complete string table with all null terminators.
217 ///
218 /// # Examples
219 ///
220 /// ```
221 /// use aion_context::string_table::StringTableBuilder;
222 ///
223 /// let mut builder = StringTableBuilder::new();
224 /// builder.add("Alpha");
225 /// builder.add("Beta");
226 ///
227 /// let bytes = builder.build();
228 /// assert_eq!(bytes, b"Alpha\0Beta\0");
229 /// ```
230 #[must_use]
231 pub fn build(self) -> Vec<u8> {
232 self.data
233 }
234
235 /// Clear all strings from the builder
236 ///
237 /// # Examples
238 ///
239 /// ```
240 /// use aion_context::string_table::StringTableBuilder;
241 ///
242 /// let mut builder = StringTableBuilder::new();
243 /// builder.add("Test");
244 /// assert!(!builder.is_empty());
245 ///
246 /// builder.clear();
247 /// assert!(builder.is_empty());
248 /// ```
249 pub fn clear(&mut self) {
250 self.data.clear();
251 }
252}
253
254/// String table for zero-copy parsing of string data
255///
256/// This struct wraps a byte slice containing null-terminated UTF-8 strings.
257/// Strings can be extracted by offset and length without copying.
258///
259/// # Examples
260///
261/// ```
262/// use aion_context::string_table::StringTable;
263///
264/// let data = b"Genesis\0Version 2\0";
265/// let table = StringTable::new(data).unwrap();
266///
267/// let s1 = table.get(0, 7).unwrap();
268/// assert_eq!(s1, "Genesis");
269///
270/// let s2 = table.get(8, 9).unwrap();
271/// assert_eq!(s2, "Version 2");
272/// ```
273#[derive(Debug, Clone, Copy)]
274pub struct StringTable<'a> {
275 /// Raw byte data containing null-terminated strings
276 data: &'a [u8],
277}
278
279impl<'a> StringTable<'a> {
280 /// Create a new string table from byte data
281 ///
282 /// # Errors
283 ///
284 /// Returns an error if:
285 /// - The data contains invalid UTF-8 sequences
286 /// - The data is not properly null-terminated
287 ///
288 /// # Examples
289 ///
290 /// ```
291 /// use aion_context::string_table::StringTable;
292 ///
293 /// let data = b"Hello\0World\0";
294 /// let table = StringTable::new(data).unwrap();
295 /// ```
296 pub fn new(data: &'a [u8]) -> Result<Self> {
297 // Validate that data contains valid UTF-8
298 // We do this by attempting to convert to str
299 std::str::from_utf8(data).map_err(|e| AionError::InvalidUtf8 {
300 reason: format!("String table contains invalid UTF-8: {e}"),
301 })?;
302
303 Ok(Self { data })
304 }
305
306 /// Get a string by offset and length
307 ///
308 /// # Arguments
309 ///
310 /// * `offset` - Byte offset to the first character of the string
311 /// * `length` - Length of the string in bytes (excluding null terminator)
312 ///
313 /// # Errors
314 ///
315 /// Returns an error if:
316 /// - Offset + length exceeds table bounds
317 /// - The extracted bytes are not valid UTF-8
318 /// - The string is not properly null-terminated
319 ///
320 /// # Examples
321 ///
322 /// ```
323 /// use aion_context::string_table::StringTable;
324 ///
325 /// let data = b"First\0Second\0Third\0";
326 /// let table = StringTable::new(data).unwrap();
327 ///
328 /// assert_eq!(table.get(0, 5).unwrap(), "First");
329 /// assert_eq!(table.get(6, 6).unwrap(), "Second");
330 /// assert_eq!(table.get(13, 5).unwrap(), "Third");
331 /// ```
332 #[allow(clippy::cast_possible_truncation)] // u64 to usize for indexing
333 pub fn get(&self, offset: u64, length: u32) -> Result<&'a str> {
334 let offset = offset as usize;
335 let length = length as usize;
336
337 // Check bounds
338 let end = offset
339 .checked_add(length)
340 .ok_or_else(|| AionError::InvalidFormat {
341 reason: format!("String table access overflow: offset={offset}, length={length}"),
342 })?;
343
344 if end > self.data.len() {
345 return Err(AionError::InvalidFormat {
346 reason: format!(
347 "String table access out of bounds: offset={offset}, length={length}, table_size={}",
348 self.data.len()
349 ),
350 });
351 }
352
353 // Extract string bytes (excluding null terminator)
354 let string_bytes = self
355 .data
356 .get(offset..end)
357 .ok_or_else(|| AionError::InvalidFormat {
358 reason: format!("Failed to extract string at offset {offset}"),
359 })?;
360
361 // Verify null terminator is present
362 if end < self.data.len() {
363 if let Some(&byte) = self.data.get(end) {
364 if byte != 0 {
365 return Err(AionError::InvalidFormat {
366 reason: format!("String at offset {offset} is not null-terminated"),
367 });
368 }
369 }
370 }
371
372 // Convert to UTF-8 string
373 std::str::from_utf8(string_bytes).map_err(|e| AionError::InvalidUtf8 {
374 reason: format!("String at offset {offset} contains invalid UTF-8: {e}"),
375 })
376 }
377
378 /// Get total size of the string table in bytes
379 ///
380 /// # Examples
381 ///
382 /// ```
383 /// use aion_context::string_table::StringTable;
384 ///
385 /// let data = b"Alpha\0Beta\0";
386 /// let table = StringTable::new(data).unwrap();
387 /// assert_eq!(table.len(), 11);
388 /// ```
389 #[must_use]
390 pub const fn len(&self) -> usize {
391 self.data.len()
392 }
393
394 /// Check if the string table is empty
395 ///
396 /// # Examples
397 ///
398 /// ```
399 /// use aion_context::string_table::StringTable;
400 ///
401 /// let empty = StringTable::new(b"").unwrap();
402 /// assert!(empty.is_empty());
403 ///
404 /// let non_empty = StringTable::new(b"Test\0").unwrap();
405 /// assert!(!non_empty.is_empty());
406 /// ```
407 #[must_use]
408 pub const fn is_empty(&self) -> bool {
409 self.data.is_empty()
410 }
411
412 /// Get the raw byte data
413 ///
414 /// # Examples
415 ///
416 /// ```
417 /// use aion_context::string_table::StringTable;
418 ///
419 /// let data = b"Hello\0";
420 /// let table = StringTable::new(data).unwrap();
421 /// assert_eq!(table.as_bytes(), b"Hello\0");
422 /// ```
423 #[must_use]
424 pub const fn as_bytes(&self) -> &'a [u8] {
425 self.data
426 }
427}
428
429#[cfg(test)]
430#[allow(clippy::unwrap_used)] // Allow unwrap in test code
431mod tests {
432 use super::*;
433
434 mod builder {
435 use super::*;
436
437 #[test]
438 fn should_create_empty_builder() {
439 let builder = StringTableBuilder::new();
440 assert_eq!(builder.len(), 0);
441 assert!(builder.is_empty());
442 }
443
444 #[test]
445 fn should_add_single_string() {
446 let mut builder = StringTableBuilder::new();
447 let (offset, length) = builder.add("Hello");
448
449 assert_eq!(offset, 0);
450 assert_eq!(length, 5);
451 assert_eq!(builder.len(), 6); // "Hello\0"
452
453 let bytes = builder.build();
454 assert_eq!(bytes, b"Hello\0");
455 }
456
457 #[test]
458 fn should_add_multiple_strings() {
459 let mut builder = StringTableBuilder::new();
460
461 let (offset1, len1) = builder.add("First");
462 assert_eq!(offset1, 0);
463 assert_eq!(len1, 5);
464
465 let (offset2, len2) = builder.add("Second");
466 assert_eq!(offset2, 6);
467 assert_eq!(len2, 6);
468
469 let (offset3, len3) = builder.add("Third");
470 assert_eq!(offset3, 13);
471 assert_eq!(len3, 5);
472
473 let bytes = builder.build();
474 assert_eq!(bytes, b"First\0Second\0Third\0");
475 }
476
477 #[test]
478 fn should_handle_empty_strings() {
479 let mut builder = StringTableBuilder::new();
480 let (offset, length) = builder.add("");
481
482 assert_eq!(offset, 0);
483 assert_eq!(length, 0);
484 assert_eq!(builder.len(), 1); // Just null terminator
485
486 let bytes = builder.build();
487 assert_eq!(bytes, b"\0");
488 }
489
490 #[test]
491 fn should_handle_utf8_strings() {
492 let mut builder = StringTableBuilder::new();
493
494 builder.add("Hello 世界");
495 builder.add("Γειά σου κόσμε");
496 builder.add("🎉🎊");
497
498 let bytes = builder.build();
499 let expected = "Hello 世界\0Γειά σου κόσμε\0🎉🎊\0";
500 assert_eq!(bytes, expected.as_bytes());
501 }
502
503 #[test]
504 fn should_handle_special_characters() {
505 let mut builder = StringTableBuilder::new();
506 builder.add("Line1\nLine2");
507 builder.add("Tab\there");
508 builder.add("Quote\"Test");
509
510 let bytes = builder.build();
511 assert_eq!(bytes, b"Line1\nLine2\0Tab\there\0Quote\"Test\0");
512 }
513
514 #[test]
515 fn should_create_with_capacity() {
516 let builder = StringTableBuilder::with_capacity(1024);
517 assert_eq!(builder.len(), 0);
518 assert!(builder.is_empty());
519 }
520
521 #[test]
522 fn should_clear_builder() {
523 let mut builder = StringTableBuilder::new();
524 builder.add("Test");
525 assert_eq!(builder.len(), 5);
526
527 builder.clear();
528 assert_eq!(builder.len(), 0);
529 assert!(builder.is_empty());
530 }
531
532 #[test]
533 fn should_track_offsets_correctly() {
534 let mut builder = StringTableBuilder::new();
535
536 let strings = vec![
537 "Genesis version",
538 "Added fraud detection",
539 "Updated compliance rules",
540 ];
541
542 let mut expected_offset = 0u64;
543 for s in &strings {
544 let (offset, length) = builder.add(s);
545 assert_eq!(offset, expected_offset);
546 assert_eq!(length as usize, s.len());
547 expected_offset += s.len() as u64 + 1; // +1 for null terminator
548 }
549 }
550 }
551
552 mod parser {
553 use super::*;
554
555 #[test]
556 fn should_parse_empty_table() {
557 let table = StringTable::new(b"").unwrap();
558 assert_eq!(table.len(), 0);
559 assert!(table.is_empty());
560 }
561
562 #[test]
563 fn should_parse_single_string() {
564 let data = b"Hello\0";
565 let table = StringTable::new(data).unwrap();
566
567 let s = table.get(0, 5).unwrap();
568 assert_eq!(s, "Hello");
569 }
570
571 #[test]
572 fn should_parse_multiple_strings() {
573 let data = b"First\0Second\0Third\0";
574 let table = StringTable::new(data).unwrap();
575
576 assert_eq!(table.get(0, 5).unwrap(), "First");
577 assert_eq!(table.get(6, 6).unwrap(), "Second");
578 assert_eq!(table.get(13, 5).unwrap(), "Third");
579 }
580
581 #[test]
582 fn should_handle_empty_string() {
583 let data = b"\0Test\0";
584 let table = StringTable::new(data).unwrap();
585
586 assert_eq!(table.get(0, 0).unwrap(), "");
587 assert_eq!(table.get(1, 4).unwrap(), "Test");
588 }
589
590 #[test]
591 fn should_parse_utf8_strings() {
592 let s1 = "Hello 世界";
593 let s2 = "🎉";
594 let data = format!("{s1}\0{s2}\0");
595 let table = StringTable::new(data.as_bytes()).unwrap();
596
597 #[allow(clippy::cast_possible_truncation)]
598 let len1 = s1.len() as u32;
599 #[allow(clippy::cast_possible_truncation)]
600 let len2 = s2.len() as u32;
601 let offset2 = u64::from(len1 + 1); // +1 for null terminator
602
603 assert_eq!(table.get(0, len1).unwrap(), s1);
604 assert_eq!(table.get(offset2, len2).unwrap(), s2);
605 }
606
607 #[test]
608 fn should_reject_invalid_utf8() {
609 let data = b"Hello\0\xFF\xFE\0"; // Invalid UTF-8
610 let result = StringTable::new(data);
611 assert!(result.is_err());
612 }
613
614 #[test]
615 fn should_reject_out_of_bounds_access() {
616 let data = b"Test\0";
617 let table = StringTable::new(data).unwrap();
618
619 // Offset beyond bounds
620 let result = table.get(100, 5);
621 assert!(result.is_err());
622
623 // Length exceeds bounds
624 let result = table.get(0, 100);
625 assert!(result.is_err());
626 }
627
628 #[test]
629 fn should_verify_null_terminator() {
630 let data = b"Hello\0World\0";
631 let table = StringTable::new(data).unwrap();
632
633 // Valid: properly null-terminated
634 assert!(table.get(0, 5).is_ok());
635
636 // Invalid: wrong length (would miss null terminator)
637 let result = table.get(0, 10);
638 assert!(result.is_err());
639 }
640
641 #[test]
642 fn should_get_as_bytes() {
643 let data = b"Test\0";
644 let table = StringTable::new(data).unwrap();
645 assert_eq!(table.as_bytes(), b"Test\0");
646 }
647 }
648
649 mod roundtrip {
650 use super::*;
651
652 #[test]
653 fn should_roundtrip_single_string() {
654 let mut builder = StringTableBuilder::new();
655 let (offset, length) = builder.add("Test string");
656
657 let bytes = builder.build();
658 let table = StringTable::new(&bytes).unwrap();
659
660 let recovered = table.get(offset, length).unwrap();
661 assert_eq!(recovered, "Test string");
662 }
663
664 #[test]
665 fn should_roundtrip_multiple_strings() {
666 let mut builder = StringTableBuilder::new();
667
668 let strings = vec![
669 "Genesis version",
670 "Added fraud detection",
671 "Updated compliance rules",
672 "Fixed security vulnerability",
673 ];
674
675 let mut entries = Vec::new();
676 for s in &strings {
677 entries.push(builder.add(s));
678 }
679
680 let bytes = builder.build();
681 let table = StringTable::new(&bytes).unwrap();
682
683 for ((offset, length), expected) in entries.iter().zip(&strings) {
684 let recovered = table.get(*offset, *length).unwrap();
685 assert_eq!(recovered, *expected);
686 }
687 }
688
689 #[test]
690 fn should_roundtrip_utf8() {
691 let mut builder = StringTableBuilder::new();
692
693 let strings = vec!["Hello 世界", "Γειά σου κόσμε", "مرحبا بالعالم", "🎉🎊🎈"];
694
695 let mut entries = Vec::new();
696 for s in &strings {
697 entries.push(builder.add(s));
698 }
699
700 let bytes = builder.build();
701 let table = StringTable::new(&bytes).unwrap();
702
703 for ((offset, length), expected) in entries.iter().zip(&strings) {
704 let recovered = table.get(*offset, *length).unwrap();
705 assert_eq!(recovered, *expected);
706 }
707 }
708
709 #[test]
710 fn should_roundtrip_empty_string() {
711 let mut builder = StringTableBuilder::new();
712 let (offset, length) = builder.add("");
713
714 let bytes = builder.build();
715 let table = StringTable::new(&bytes).unwrap();
716
717 let recovered = table.get(offset, length).unwrap();
718 assert_eq!(recovered, "");
719 }
720 }
721
722 mod properties {
723 use super::*;
724 use hegel::generators as gs;
725
726 #[hegel::test]
727 fn prop_add_get_roundtrip(tc: hegel::TestCase) {
728 let strings = tc.draw(gs::vecs(gs::text().max_size(64)).min_size(1).max_size(16));
729 let mut builder = StringTableBuilder::new();
730 let handles: Vec<(u64, u32)> = strings.iter().map(|s| builder.add(s)).collect();
731 let bytes = builder.build();
732 let table = StringTable::new(&bytes).unwrap_or_else(|_| std::process::abort());
733 for (original, (offset, length)) in strings.iter().zip(handles.iter()) {
734 let recovered = table
735 .get(*offset, *length)
736 .unwrap_or_else(|_| std::process::abort());
737 assert_eq!(recovered, original.as_str());
738 }
739 }
740
741 #[hegel::test]
742 fn prop_builder_len_strictly_increases_on_add(tc: hegel::TestCase) {
743 let strings = tc.draw(gs::vecs(gs::text().max_size(64)).min_size(1).max_size(16));
744 let mut builder = StringTableBuilder::new();
745 let mut prev = builder.len();
746 for s in &strings {
747 builder.add(s);
748 let now = builder.len();
749 assert!(now > prev);
750 prev = now;
751 }
752 }
753 }
754}