Skip to main content

xsd_schema/document/
strings.rs

1use bumpalo::Bump;
2
3/// Strings shorter than or equal to this threshold are arena-allocated.
4const SHORT_THRESHOLD: usize = 64;
5
6/// Internal representation: short strings live in the arena, long ones on the heap.
7enum StringValue<'a> {
8    Short(&'a str),
9    Long(Box<str>),
10}
11
12impl<'a> StringValue<'a> {
13    fn as_str(&self) -> &str {
14        match self {
15            StringValue::Short(s) => s,
16            StringValue::Long(s) => s,
17        }
18    }
19}
20
21/// Arena-allocated string pool with heap fallback for long strings.
22///
23/// Index 0 is a sentinel representing the empty string (no entry is stored for it).
24/// All stored strings receive 1-based indices.
25pub struct StringStore<'a> {
26    arena: &'a Bump,
27    values: Vec<StringValue<'a>>,
28}
29
30impl<'a> StringStore<'a> {
31    /// Creates a new empty string store backed by the given arena.
32    pub fn new(arena: &'a Bump) -> Self {
33        Self {
34            arena,
35            values: Vec::new(),
36        }
37    }
38
39    /// Stores a string and returns its 1-based index.
40    ///
41    /// Strings up to [`SHORT_THRESHOLD`] bytes are copied into the arena;
42    /// longer strings are heap-allocated.
43    pub fn store(&mut self, s: &str) -> u32 {
44        let val = if s.len() <= SHORT_THRESHOLD {
45            let copied = self.arena.alloc_str(s);
46            StringValue::Short(copied)
47        } else {
48            StringValue::Long(s.into())
49        };
50        self.values.push(val);
51        self.values.len() as u32 // 1-based
52    }
53
54    /// Returns the string at the given index.
55    ///
56    /// Index 0 returns `""` (empty-string sentinel).
57    /// Other indices are 1-based into the internal storage.
58    ///
59    /// # Panics
60    ///
61    /// Panics if `idx` is out of range (greater than the number of stored strings).
62    pub fn get(&self, idx: u32) -> &str {
63        if idx == 0 {
64            return "";
65        }
66        self.values[(idx - 1) as usize].as_str()
67    }
68}
69
70#[cfg(test)]
71mod tests {
72    use super::*;
73
74    #[test]
75    fn store_and_retrieve_short_string() {
76        let arena = Bump::new();
77        let mut store = StringStore::new(&arena);
78        let idx = store.store("hello");
79        assert_eq!(idx, 1);
80        assert_eq!(store.get(idx), "hello");
81    }
82
83    #[test]
84    fn store_and_retrieve_long_string() {
85        let arena = Bump::new();
86        let mut store = StringStore::new(&arena);
87        let long = "x".repeat(100);
88        let idx = store.store(&long);
89        assert_eq!(idx, 1);
90        assert_eq!(store.get(idx), long);
91    }
92
93    #[test]
94    fn index_zero_returns_empty() {
95        let arena = Bump::new();
96        let store = StringStore::new(&arena);
97        assert_eq!(store.get(0), "");
98    }
99
100    #[test]
101    fn sequential_one_based_indices() {
102        let arena = Bump::new();
103        let mut store = StringStore::new(&arena);
104        let i1 = store.store("a");
105        let i2 = store.store("bb");
106        let i3 = store.store("ccc");
107        assert_eq!(i1, 1);
108        assert_eq!(i2, 2);
109        assert_eq!(i3, 3);
110        assert_eq!(store.get(1), "a");
111        assert_eq!(store.get(2), "bb");
112        assert_eq!(store.get(3), "ccc");
113    }
114
115    #[test]
116    fn store_empty_string() {
117        let arena = Bump::new();
118        let mut store = StringStore::new(&arena);
119        let idx = store.store("");
120        assert_eq!(idx, 1);
121        assert_eq!(store.get(idx), "");
122    }
123
124    #[test]
125    fn boundary_short_long() {
126        let arena = Bump::new();
127        let mut store = StringStore::new(&arena);
128        // Exactly at threshold — should be Short
129        let at_threshold = "a".repeat(SHORT_THRESHOLD);
130        let idx1 = store.store(&at_threshold);
131        assert_eq!(store.get(idx1), at_threshold);
132        // One byte over — should be Long
133        let over_threshold = "a".repeat(SHORT_THRESHOLD + 1);
134        let idx2 = store.store(&over_threshold);
135        assert_eq!(store.get(idx2), over_threshold);
136    }
137}