Skip to main content

aion_context/
string_table.rs

1// SPDX-License-Identifier: MIT OR Apache-2.0
2//! String table for AION v2 file format
3//!
4//! This module implements the null-terminated UTF-8 string table as specified
5//! in RFC-0002 Section 5.5. The string table is used to store variable-length
6//! text data such as commit messages, audit details, and metadata.
7//!
8//! # Format
9//!
10//! The string table is a concatenation of null-terminated UTF-8 strings with
11//! no padding between entries:
12//!
13//! ```text
14//! "Genesis version\0Added fraud detection\0Updated rules\0"
15//! ```
16//!
17//! # Rules (RFC-0002)
18//!
19//! 1. All strings are UTF-8 encoded
20//! 2. Each string terminated with single null byte (0x00)
21//! 3. No padding between strings
22//! 4. Offsets point to first character (not null terminator)
23//! 5. Lengths do NOT include null terminator
24//!
25//! # Building String Tables
26//!
27//! Use [`StringTableBuilder`] to construct string tables during serialization:
28//!
29//! ```
30//! use aion_context::string_table::StringTableBuilder;
31//!
32//! let mut builder = StringTableBuilder::new();
33//!
34//! // Add strings and get their (offset, length)
35//! let (offset1, len1) = builder.add("Genesis version");
36//! let (offset2, len2) = builder.add("Added fraud detection");
37//!
38//! // Build final byte array
39//! let bytes = builder.build();
40//!
41//! assert_eq!(offset1, 0);
42//! assert_eq!(len1, 15);
43//! assert_eq!(offset2, 16); // "Genesis version\0" = 16 bytes
44//! assert_eq!(len2, 21);
45//! ```
46//!
47//! # Parsing String Tables
48//!
49//! Use [`StringTable`] for zero-copy parsing during deserialization:
50//!
51//! ```
52//! use aion_context::string_table::StringTable;
53//!
54//! let data = b"Genesis version\0Added fraud detection\0";
55//! let table = StringTable::new(data).unwrap();
56//!
57//! // Extract strings by offset/length
58//! let s1 = table.get(0, 15).unwrap();
59//! assert_eq!(s1, "Genesis version");
60//!
61//! let s2 = table.get(16, 21).unwrap();
62//! assert_eq!(s2, "Added fraud detection");
63//! ```
64//!
65//! # UTF-8 Validation
66//!
67//! All strings are validated as UTF-8:
68//! - During construction (when added to builder)
69//! - During parsing (when table is created)
70//! - During extraction (when strings are retrieved)
71//!
72//! Invalid UTF-8 sequences return [`AionError::InvalidUtf8`].
73
74use crate::{AionError, Result};
75
76/// String table builder for constructing string tables during serialization
77///
78/// This builder accumulates strings and tracks their offsets/lengths.
79/// Strings are automatically null-terminated and concatenated with no padding.
80///
81/// # Examples
82///
83/// ```
84/// use aion_context::string_table::StringTableBuilder;
85///
86/// let mut builder = StringTableBuilder::new();
87///
88/// let (offset, length) = builder.add("Hello, world!");
89/// assert_eq!(offset, 0);
90/// assert_eq!(length, 13);
91///
92/// let bytes = builder.build();
93/// assert_eq!(bytes, b"Hello, world!\0");
94/// ```
95#[derive(Debug, Clone, Default)]
96pub struct StringTableBuilder {
97    /// Accumulated string data (null-terminated)
98    data: Vec<u8>,
99}
100
101impl StringTableBuilder {
102    /// Create a new empty string table builder
103    ///
104    /// # Examples
105    ///
106    /// ```
107    /// use aion_context::string_table::StringTableBuilder;
108    ///
109    /// let builder = StringTableBuilder::new();
110    /// assert_eq!(builder.len(), 0);
111    /// assert!(builder.is_empty());
112    /// ```
113    #[must_use]
114    #[allow(clippy::missing_const_for_fn)] // Vec::new() not const in MSRV 1.70
115    pub fn new() -> Self {
116        Self { data: Vec::new() }
117    }
118
119    /// Create a builder with pre-allocated capacity
120    ///
121    /// # Examples
122    ///
123    /// ```
124    /// use aion_context::string_table::StringTableBuilder;
125    ///
126    /// let builder = StringTableBuilder::with_capacity(1024);
127    /// assert_eq!(builder.len(), 0);
128    /// ```
129    #[must_use]
130    pub fn with_capacity(capacity: usize) -> Self {
131        Self {
132            data: Vec::with_capacity(capacity),
133        }
134    }
135
136    /// Add a string to the table
137    ///
138    /// Returns `(offset, length)` where:
139    /// - `offset` is the byte offset of the string's first character
140    /// - `length` is the string length in bytes (excluding null terminator)
141    ///
142    /// The string is automatically null-terminated and appended to the table.
143    ///
144    /// # Examples
145    ///
146    /// ```
147    /// use aion_context::string_table::StringTableBuilder;
148    ///
149    /// let mut builder = StringTableBuilder::new();
150    ///
151    /// let (offset1, len1) = builder.add("First");
152    /// assert_eq!(offset1, 0);
153    /// assert_eq!(len1, 5);
154    ///
155    /// let (offset2, len2) = builder.add("Second");
156    /// assert_eq!(offset2, 6); // "First\0" = 6 bytes
157    /// assert_eq!(len2, 6);
158    /// ```
159    #[allow(clippy::cast_possible_truncation)] // String lengths capped by u32::MAX
160    pub fn add(&mut self, s: &str) -> (u64, u32) {
161        let offset = self.data.len() as u64;
162        let length = s.len() as u32;
163
164        // Append string bytes
165        self.data.extend_from_slice(s.as_bytes());
166
167        // Append null terminator
168        self.data.push(0);
169
170        (offset, length)
171    }
172
173    /// Get the current total size in bytes
174    ///
175    /// This includes all strings and their null terminators.
176    ///
177    /// # Examples
178    ///
179    /// ```
180    /// use aion_context::string_table::StringTableBuilder;
181    ///
182    /// let mut builder = StringTableBuilder::new();
183    /// assert_eq!(builder.len(), 0);
184    ///
185    /// builder.add("Hello");
186    /// assert_eq!(builder.len(), 6); // "Hello\0"
187    ///
188    /// builder.add("World");
189    /// assert_eq!(builder.len(), 12); // "Hello\0World\0"
190    /// ```
191    #[must_use]
192    pub fn len(&self) -> usize {
193        self.data.len()
194    }
195
196    /// Check if the table is empty
197    ///
198    /// # Examples
199    ///
200    /// ```
201    /// use aion_context::string_table::StringTableBuilder;
202    ///
203    /// let mut builder = StringTableBuilder::new();
204    /// assert!(builder.is_empty());
205    ///
206    /// builder.add("Test");
207    /// assert!(!builder.is_empty());
208    /// ```
209    #[must_use]
210    pub fn is_empty(&self) -> bool {
211        self.data.is_empty()
212    }
213
214    /// Build the final string table as a byte vector
215    ///
216    /// Returns the complete string table with all null terminators.
217    ///
218    /// # Examples
219    ///
220    /// ```
221    /// use aion_context::string_table::StringTableBuilder;
222    ///
223    /// let mut builder = StringTableBuilder::new();
224    /// builder.add("Alpha");
225    /// builder.add("Beta");
226    ///
227    /// let bytes = builder.build();
228    /// assert_eq!(bytes, b"Alpha\0Beta\0");
229    /// ```
230    #[must_use]
231    pub fn build(self) -> Vec<u8> {
232        self.data
233    }
234
235    /// Clear all strings from the builder
236    ///
237    /// # Examples
238    ///
239    /// ```
240    /// use aion_context::string_table::StringTableBuilder;
241    ///
242    /// let mut builder = StringTableBuilder::new();
243    /// builder.add("Test");
244    /// assert!(!builder.is_empty());
245    ///
246    /// builder.clear();
247    /// assert!(builder.is_empty());
248    /// ```
249    pub fn clear(&mut self) {
250        self.data.clear();
251    }
252}
253
254/// String table for zero-copy parsing of string data
255///
256/// This struct wraps a byte slice containing null-terminated UTF-8 strings.
257/// Strings can be extracted by offset and length without copying.
258///
259/// # Examples
260///
261/// ```
262/// use aion_context::string_table::StringTable;
263///
264/// let data = b"Genesis\0Version 2\0";
265/// let table = StringTable::new(data).unwrap();
266///
267/// let s1 = table.get(0, 7).unwrap();
268/// assert_eq!(s1, "Genesis");
269///
270/// let s2 = table.get(8, 9).unwrap();
271/// assert_eq!(s2, "Version 2");
272/// ```
273#[derive(Debug, Clone, Copy)]
274pub struct StringTable<'a> {
275    /// Raw byte data containing null-terminated strings
276    data: &'a [u8],
277}
278
279impl<'a> StringTable<'a> {
280    /// Create a new string table from byte data
281    ///
282    /// # Errors
283    ///
284    /// Returns an error if:
285    /// - The data contains invalid UTF-8 sequences
286    /// - The data is not properly null-terminated
287    ///
288    /// # Examples
289    ///
290    /// ```
291    /// use aion_context::string_table::StringTable;
292    ///
293    /// let data = b"Hello\0World\0";
294    /// let table = StringTable::new(data).unwrap();
295    /// ```
296    pub fn new(data: &'a [u8]) -> Result<Self> {
297        // Validate that data contains valid UTF-8
298        // We do this by attempting to convert to str
299        std::str::from_utf8(data).map_err(|e| AionError::InvalidUtf8 {
300            reason: format!("String table contains invalid UTF-8: {e}"),
301        })?;
302
303        Ok(Self { data })
304    }
305
306    /// Get a string by offset and length
307    ///
308    /// # Arguments
309    ///
310    /// * `offset` - Byte offset to the first character of the string
311    /// * `length` - Length of the string in bytes (excluding null terminator)
312    ///
313    /// # Errors
314    ///
315    /// Returns an error if:
316    /// - Offset + length exceeds table bounds
317    /// - The extracted bytes are not valid UTF-8
318    /// - The string is not properly null-terminated
319    ///
320    /// # Examples
321    ///
322    /// ```
323    /// use aion_context::string_table::StringTable;
324    ///
325    /// let data = b"First\0Second\0Third\0";
326    /// let table = StringTable::new(data).unwrap();
327    ///
328    /// assert_eq!(table.get(0, 5).unwrap(), "First");
329    /// assert_eq!(table.get(6, 6).unwrap(), "Second");
330    /// assert_eq!(table.get(13, 5).unwrap(), "Third");
331    /// ```
332    #[allow(clippy::cast_possible_truncation)] // u64 to usize for indexing
333    pub fn get(&self, offset: u64, length: u32) -> Result<&'a str> {
334        let offset = offset as usize;
335        let length = length as usize;
336
337        // Check bounds
338        let end = offset
339            .checked_add(length)
340            .ok_or_else(|| AionError::InvalidFormat {
341                reason: format!("String table access overflow: offset={offset}, length={length}"),
342            })?;
343
344        if end > self.data.len() {
345            return Err(AionError::InvalidFormat {
346                reason: format!(
347                    "String table access out of bounds: offset={offset}, length={length}, table_size={}",
348                    self.data.len()
349                ),
350            });
351        }
352
353        // Extract string bytes (excluding null terminator)
354        let string_bytes = self
355            .data
356            .get(offset..end)
357            .ok_or_else(|| AionError::InvalidFormat {
358                reason: format!("Failed to extract string at offset {offset}"),
359            })?;
360
361        // Verify null terminator is present
362        if end < self.data.len() {
363            if let Some(&byte) = self.data.get(end) {
364                if byte != 0 {
365                    return Err(AionError::InvalidFormat {
366                        reason: format!("String at offset {offset} is not null-terminated"),
367                    });
368                }
369            }
370        }
371
372        // Convert to UTF-8 string
373        std::str::from_utf8(string_bytes).map_err(|e| AionError::InvalidUtf8 {
374            reason: format!("String at offset {offset} contains invalid UTF-8: {e}"),
375        })
376    }
377
378    /// Get total size of the string table in bytes
379    ///
380    /// # Examples
381    ///
382    /// ```
383    /// use aion_context::string_table::StringTable;
384    ///
385    /// let data = b"Alpha\0Beta\0";
386    /// let table = StringTable::new(data).unwrap();
387    /// assert_eq!(table.len(), 11);
388    /// ```
389    #[must_use]
390    pub const fn len(&self) -> usize {
391        self.data.len()
392    }
393
394    /// Check if the string table is empty
395    ///
396    /// # Examples
397    ///
398    /// ```
399    /// use aion_context::string_table::StringTable;
400    ///
401    /// let empty = StringTable::new(b"").unwrap();
402    /// assert!(empty.is_empty());
403    ///
404    /// let non_empty = StringTable::new(b"Test\0").unwrap();
405    /// assert!(!non_empty.is_empty());
406    /// ```
407    #[must_use]
408    pub const fn is_empty(&self) -> bool {
409        self.data.is_empty()
410    }
411
412    /// Get the raw byte data
413    ///
414    /// # Examples
415    ///
416    /// ```
417    /// use aion_context::string_table::StringTable;
418    ///
419    /// let data = b"Hello\0";
420    /// let table = StringTable::new(data).unwrap();
421    /// assert_eq!(table.as_bytes(), b"Hello\0");
422    /// ```
423    #[must_use]
424    pub const fn as_bytes(&self) -> &'a [u8] {
425        self.data
426    }
427}
428
429#[cfg(test)]
430#[allow(clippy::unwrap_used)] // Allow unwrap in test code
431mod tests {
432    use super::*;
433
434    mod builder {
435        use super::*;
436
437        #[test]
438        fn should_create_empty_builder() {
439            let builder = StringTableBuilder::new();
440            assert_eq!(builder.len(), 0);
441            assert!(builder.is_empty());
442        }
443
444        #[test]
445        fn should_add_single_string() {
446            let mut builder = StringTableBuilder::new();
447            let (offset, length) = builder.add("Hello");
448
449            assert_eq!(offset, 0);
450            assert_eq!(length, 5);
451            assert_eq!(builder.len(), 6); // "Hello\0"
452
453            let bytes = builder.build();
454            assert_eq!(bytes, b"Hello\0");
455        }
456
457        #[test]
458        fn should_add_multiple_strings() {
459            let mut builder = StringTableBuilder::new();
460
461            let (offset1, len1) = builder.add("First");
462            assert_eq!(offset1, 0);
463            assert_eq!(len1, 5);
464
465            let (offset2, len2) = builder.add("Second");
466            assert_eq!(offset2, 6);
467            assert_eq!(len2, 6);
468
469            let (offset3, len3) = builder.add("Third");
470            assert_eq!(offset3, 13);
471            assert_eq!(len3, 5);
472
473            let bytes = builder.build();
474            assert_eq!(bytes, b"First\0Second\0Third\0");
475        }
476
477        #[test]
478        fn should_handle_empty_strings() {
479            let mut builder = StringTableBuilder::new();
480            let (offset, length) = builder.add("");
481
482            assert_eq!(offset, 0);
483            assert_eq!(length, 0);
484            assert_eq!(builder.len(), 1); // Just null terminator
485
486            let bytes = builder.build();
487            assert_eq!(bytes, b"\0");
488        }
489
490        #[test]
491        fn should_handle_utf8_strings() {
492            let mut builder = StringTableBuilder::new();
493
494            builder.add("Hello 世界");
495            builder.add("Γειά σου κόσμε");
496            builder.add("🎉🎊");
497
498            let bytes = builder.build();
499            let expected = "Hello 世界\0Γειά σου κόσμε\0🎉🎊\0";
500            assert_eq!(bytes, expected.as_bytes());
501        }
502
503        #[test]
504        fn should_handle_special_characters() {
505            let mut builder = StringTableBuilder::new();
506            builder.add("Line1\nLine2");
507            builder.add("Tab\there");
508            builder.add("Quote\"Test");
509
510            let bytes = builder.build();
511            assert_eq!(bytes, b"Line1\nLine2\0Tab\there\0Quote\"Test\0");
512        }
513
514        #[test]
515        fn should_create_with_capacity() {
516            let builder = StringTableBuilder::with_capacity(1024);
517            assert_eq!(builder.len(), 0);
518            assert!(builder.is_empty());
519        }
520
521        #[test]
522        fn should_clear_builder() {
523            let mut builder = StringTableBuilder::new();
524            builder.add("Test");
525            assert_eq!(builder.len(), 5);
526
527            builder.clear();
528            assert_eq!(builder.len(), 0);
529            assert!(builder.is_empty());
530        }
531
532        #[test]
533        fn should_track_offsets_correctly() {
534            let mut builder = StringTableBuilder::new();
535
536            let strings = vec![
537                "Genesis version",
538                "Added fraud detection",
539                "Updated compliance rules",
540            ];
541
542            let mut expected_offset = 0u64;
543            for s in &strings {
544                let (offset, length) = builder.add(s);
545                assert_eq!(offset, expected_offset);
546                assert_eq!(length as usize, s.len());
547                expected_offset += s.len() as u64 + 1; // +1 for null terminator
548            }
549        }
550    }
551
552    mod parser {
553        use super::*;
554
555        #[test]
556        fn should_parse_empty_table() {
557            let table = StringTable::new(b"").unwrap();
558            assert_eq!(table.len(), 0);
559            assert!(table.is_empty());
560        }
561
562        #[test]
563        fn should_parse_single_string() {
564            let data = b"Hello\0";
565            let table = StringTable::new(data).unwrap();
566
567            let s = table.get(0, 5).unwrap();
568            assert_eq!(s, "Hello");
569        }
570
571        #[test]
572        fn should_parse_multiple_strings() {
573            let data = b"First\0Second\0Third\0";
574            let table = StringTable::new(data).unwrap();
575
576            assert_eq!(table.get(0, 5).unwrap(), "First");
577            assert_eq!(table.get(6, 6).unwrap(), "Second");
578            assert_eq!(table.get(13, 5).unwrap(), "Third");
579        }
580
581        #[test]
582        fn should_handle_empty_string() {
583            let data = b"\0Test\0";
584            let table = StringTable::new(data).unwrap();
585
586            assert_eq!(table.get(0, 0).unwrap(), "");
587            assert_eq!(table.get(1, 4).unwrap(), "Test");
588        }
589
590        #[test]
591        fn should_parse_utf8_strings() {
592            let s1 = "Hello 世界";
593            let s2 = "🎉";
594            let data = format!("{s1}\0{s2}\0");
595            let table = StringTable::new(data.as_bytes()).unwrap();
596
597            #[allow(clippy::cast_possible_truncation)]
598            let len1 = s1.len() as u32;
599            #[allow(clippy::cast_possible_truncation)]
600            let len2 = s2.len() as u32;
601            let offset2 = u64::from(len1 + 1); // +1 for null terminator
602
603            assert_eq!(table.get(0, len1).unwrap(), s1);
604            assert_eq!(table.get(offset2, len2).unwrap(), s2);
605        }
606
607        #[test]
608        fn should_reject_invalid_utf8() {
609            let data = b"Hello\0\xFF\xFE\0"; // Invalid UTF-8
610            let result = StringTable::new(data);
611            assert!(result.is_err());
612        }
613
614        #[test]
615        fn should_reject_out_of_bounds_access() {
616            let data = b"Test\0";
617            let table = StringTable::new(data).unwrap();
618
619            // Offset beyond bounds
620            let result = table.get(100, 5);
621            assert!(result.is_err());
622
623            // Length exceeds bounds
624            let result = table.get(0, 100);
625            assert!(result.is_err());
626        }
627
628        #[test]
629        fn should_verify_null_terminator() {
630            let data = b"Hello\0World\0";
631            let table = StringTable::new(data).unwrap();
632
633            // Valid: properly null-terminated
634            assert!(table.get(0, 5).is_ok());
635
636            // Invalid: wrong length (would miss null terminator)
637            let result = table.get(0, 10);
638            assert!(result.is_err());
639        }
640
641        #[test]
642        fn should_get_as_bytes() {
643            let data = b"Test\0";
644            let table = StringTable::new(data).unwrap();
645            assert_eq!(table.as_bytes(), b"Test\0");
646        }
647    }
648
649    mod roundtrip {
650        use super::*;
651
652        #[test]
653        fn should_roundtrip_single_string() {
654            let mut builder = StringTableBuilder::new();
655            let (offset, length) = builder.add("Test string");
656
657            let bytes = builder.build();
658            let table = StringTable::new(&bytes).unwrap();
659
660            let recovered = table.get(offset, length).unwrap();
661            assert_eq!(recovered, "Test string");
662        }
663
664        #[test]
665        fn should_roundtrip_multiple_strings() {
666            let mut builder = StringTableBuilder::new();
667
668            let strings = vec![
669                "Genesis version",
670                "Added fraud detection",
671                "Updated compliance rules",
672                "Fixed security vulnerability",
673            ];
674
675            let mut entries = Vec::new();
676            for s in &strings {
677                entries.push(builder.add(s));
678            }
679
680            let bytes = builder.build();
681            let table = StringTable::new(&bytes).unwrap();
682
683            for ((offset, length), expected) in entries.iter().zip(&strings) {
684                let recovered = table.get(*offset, *length).unwrap();
685                assert_eq!(recovered, *expected);
686            }
687        }
688
689        #[test]
690        fn should_roundtrip_utf8() {
691            let mut builder = StringTableBuilder::new();
692
693            let strings = vec!["Hello 世界", "Γειά σου κόσμε", "مرحبا بالعالم", "🎉🎊🎈"];
694
695            let mut entries = Vec::new();
696            for s in &strings {
697                entries.push(builder.add(s));
698            }
699
700            let bytes = builder.build();
701            let table = StringTable::new(&bytes).unwrap();
702
703            for ((offset, length), expected) in entries.iter().zip(&strings) {
704                let recovered = table.get(*offset, *length).unwrap();
705                assert_eq!(recovered, *expected);
706            }
707        }
708
709        #[test]
710        fn should_roundtrip_empty_string() {
711            let mut builder = StringTableBuilder::new();
712            let (offset, length) = builder.add("");
713
714            let bytes = builder.build();
715            let table = StringTable::new(&bytes).unwrap();
716
717            let recovered = table.get(offset, length).unwrap();
718            assert_eq!(recovered, "");
719        }
720    }
721
722    mod properties {
723        use super::*;
724        use hegel::generators as gs;
725
726        #[hegel::test]
727        fn prop_add_get_roundtrip(tc: hegel::TestCase) {
728            let strings = tc.draw(gs::vecs(gs::text().max_size(64)).min_size(1).max_size(16));
729            let mut builder = StringTableBuilder::new();
730            let handles: Vec<(u64, u32)> = strings.iter().map(|s| builder.add(s)).collect();
731            let bytes = builder.build();
732            let table = StringTable::new(&bytes).unwrap_or_else(|_| std::process::abort());
733            for (original, (offset, length)) in strings.iter().zip(handles.iter()) {
734                let recovered = table
735                    .get(*offset, *length)
736                    .unwrap_or_else(|_| std::process::abort());
737                assert_eq!(recovered, original.as_str());
738            }
739        }
740
741        #[hegel::test]
742        fn prop_builder_len_strictly_increases_on_add(tc: hegel::TestCase) {
743            let strings = tc.draw(gs::vecs(gs::text().max_size(64)).min_size(1).max_size(16));
744            let mut builder = StringTableBuilder::new();
745            let mut prev = builder.len();
746            for s in &strings {
747                builder.add(s);
748                let now = builder.len();
749                assert!(now > prev);
750                prev = now;
751            }
752        }
753    }
754}