seq_core/seqstring.rs
1//! SeqString - Arena or Globally Allocated String
2//!
3//! Strings in Seq can be allocated from two sources:
4//! 1. Thread-local arena (fast, bulk-freed on strand exit)
5//! 2. Global allocator (persists across arena resets)
6//!
7//! This allows fast temporary string creation during strand execution
8//! while maintaining safety for channel communication (clone to global).
9
10use crate::arena;
11use std::fmt;
12
13/// String that tracks its allocation source
14///
15/// # Safety Invariants
16/// - If global=true: ptr points to global-allocated String, must be dropped
17/// - If global=false: ptr points to thread-local arena, no drop needed
18/// - ptr + len must form a valid UTF-8 string
19/// - For global strings: capacity must match the original String's capacity
20pub struct SeqString {
21 ptr: *const u8,
22 len: usize,
23 capacity: usize, // Only meaningful for global strings
24 global: bool,
25}
26
27// Implement PartialEq manually to compare string content, not pointers
28impl PartialEq for SeqString {
29 fn eq(&self, other: &Self) -> bool {
30 self.as_str() == other.as_str()
31 }
32}
33
34impl Eq for SeqString {}
35
36// Safety: SeqString is Send because:
37// - Global strings are truly independent (owned heap allocation)
38// - Arena strings are cloned to global on channel send (see Clone impl)
39// - We never send arena pointers across threads unsafely
40unsafe impl Send for SeqString {}
41
42// Safety: SeqString is Sync because:
43// - The string content is immutable after construction
44// - ptr/len are only read, never modified after construction
45// - Global strings (Arc<String>) are already Sync
46// - Arena strings point to memory that won't be deallocated while in use
47unsafe impl Sync for SeqString {}
48
49impl SeqString {
50 /// Get string slice
51 ///
52 /// # Safety
53 /// ptr + len must point to valid UTF-8. This is guaranteed by constructors.
54 pub fn as_str(&self) -> &str {
55 unsafe { std::str::from_utf8_unchecked(std::slice::from_raw_parts(self.ptr, self.len)) }
56 }
57
58 /// Check if this string is globally allocated
59 pub fn is_global(&self) -> bool {
60 self.global
61 }
62
63 /// Get length in bytes
64 pub fn len(&self) -> usize {
65 self.len
66 }
67
68 /// Check if empty
69 pub fn is_empty(&self) -> bool {
70 self.len == 0
71 }
72
73 /// Check if this is an interned/static string (Issue #166)
74 ///
75 /// Interned strings have capacity=0 and point to static data.
76 /// They are never freed and can be compared by pointer for O(1) equality.
77 pub fn is_interned(&self) -> bool {
78 self.capacity == 0 && self.global
79 }
80
81 /// Get raw pointer to string data
82 ///
83 /// Used for O(1) pointer comparison of interned symbols.
84 pub fn as_ptr(&self) -> *const u8 {
85 self.ptr
86 }
87
88 /// Reconstruct SeqString from raw parts
89 ///
90 /// # Safety
91 /// The parts must be a valid allocation matching the ptr/len/capacity/global
92 /// invariants documented on `SeqString`.
93 pub unsafe fn from_raw_parts(
94 ptr: *const u8,
95 len: usize,
96 capacity: usize,
97 global: bool,
98 ) -> Self {
99 SeqString {
100 ptr,
101 len,
102 capacity,
103 global,
104 }
105 }
106}
107
108impl Clone for SeqString {
109 /// Clone always allocates from global allocator for Send safety
110 ///
111 /// This ensures that when a String is sent through a channel,
112 /// the receiving strand gets an independent copy that doesn't
113 /// depend on the sender's arena.
114 fn clone(&self) -> Self {
115 let s = self.as_str().to_string();
116 global_string(s)
117 }
118}
119
120impl Drop for SeqString {
121 fn drop(&mut self) {
122 // Drop only if BOTH conditions are true:
123 // - global=true: Arena strings have global=false and are bulk-freed on strand exit
124 // - capacity > 0: Interned symbols (Issue #166) have capacity=0 and point to
125 // static data that must NOT be deallocated
126 if self.global && self.capacity > 0 {
127 // Reconstruct String and drop it
128 // Safety: We created this from String in global_string() and stored
129 // the original ptr, len, and capacity. This ensures correct deallocation.
130 unsafe {
131 let _s = String::from_raw_parts(
132 self.ptr as *mut u8,
133 self.len,
134 self.capacity, // Use original capacity for correct deallocation
135 );
136 // _s is dropped here, freeing the memory with correct size
137 }
138 }
139 // Arena strings don't need explicit drop - arena reset frees them
140 // Static/interned strings (capacity=0) point to static data - no drop needed
141 }
142}
143
144impl fmt::Debug for SeqString {
145 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
146 write!(f, "SeqString({:?}, global={})", self.as_str(), self.global)
147 }
148}
149
150impl fmt::Display for SeqString {
151 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
152 write!(f, "{}", self.as_str())
153 }
154}
155
156/// Create arena-allocated string (fast path for temporaries)
157///
158/// # Performance
159/// ~5ns vs ~100ns for global allocator (20x faster)
160///
161/// # Lifetime
162/// Valid until arena_reset() is called (typically when strand exits)
163pub fn arena_string(s: &str) -> SeqString {
164 arena::with_arena(|arena| {
165 let arena_str = arena.alloc_str(s);
166 SeqString {
167 ptr: arena_str.as_ptr(),
168 len: arena_str.len(),
169 capacity: 0, // Not used for arena strings
170 global: false,
171 }
172 })
173}
174
175/// Create globally-allocated string (persists across arena resets)
176///
177/// # Usage
178/// For strings that need to outlive the current strand, or be sent through channels.
179///
180/// # Performance
181/// Same as regular String allocation
182pub fn global_string(s: String) -> SeqString {
183 let len = s.len();
184 let capacity = s.capacity();
185 let ptr = s.as_ptr();
186 std::mem::forget(s); // Transfer ownership, don't drop
187
188 SeqString {
189 ptr,
190 len,
191 capacity, // Store original capacity for correct deallocation
192 global: true,
193 }
194}
195
196/// Convert &str to SeqString using arena allocation
197impl From<&str> for SeqString {
198 fn from(s: &str) -> Self {
199 arena_string(s)
200 }
201}
202
203/// Convert String to SeqString using global allocation
204impl From<String> for SeqString {
205 fn from(s: String) -> Self {
206 global_string(s)
207 }
208}
209
210#[cfg(test)]
211mod tests {
212 use super::*;
213
214 #[test]
215 fn test_arena_string() {
216 let s = arena_string("Hello, arena!");
217 assert_eq!(s.as_str(), "Hello, arena!");
218 assert_eq!(s.len(), 13);
219 assert!(!s.is_global());
220 }
221
222 #[test]
223 fn test_global_string() {
224 let s = global_string("Hello, global!".to_string());
225 assert_eq!(s.as_str(), "Hello, global!");
226 assert_eq!(s.len(), 14);
227 assert!(s.is_global());
228 }
229
230 #[test]
231 fn test_clone_creates_global() {
232 // Clone an arena string
233 let s1 = arena_string("test");
234 let s2 = s1.clone();
235
236 assert_eq!(s1.as_str(), s2.as_str());
237 assert!(!s1.is_global());
238 assert!(s2.is_global()); // Clone is always global!
239 }
240
241 #[test]
242 fn test_clone_global() {
243 let s1 = global_string("test".to_string());
244 let s2 = s1.clone();
245
246 assert_eq!(s1.as_str(), s2.as_str());
247 assert!(s1.is_global());
248 assert!(s2.is_global());
249 }
250
251 #[test]
252 fn test_drop_global() {
253 // Create and drop a global string
254 {
255 let s = global_string("Will be dropped".to_string());
256 assert_eq!(s.as_str(), "Will be dropped");
257 }
258 // If we get here without crashing, drop worked
259 }
260
261 #[test]
262 fn test_drop_arena() {
263 // Create and drop an arena string
264 {
265 let s = arena_string("Will be dropped (no-op)");
266 assert_eq!(s.as_str(), "Will be dropped (no-op)");
267 }
268 // Arena strings don't need explicit drop
269 }
270
271 #[test]
272 fn test_equality() {
273 let s1 = arena_string("test");
274 let s2 = arena_string("test");
275 let s3 = global_string("test".to_string());
276 let s4 = arena_string("different");
277
278 assert_eq!(s1, s2); // Same content, both arena
279 assert_eq!(s1, s3); // Same content, different allocation
280 assert_ne!(s1, s4); // Different content
281 }
282
283 #[test]
284 fn test_from_str() {
285 let s: SeqString = "test".into();
286 assert_eq!(s.as_str(), "test");
287 assert!(!s.is_global()); // from &str uses arena
288 }
289
290 #[test]
291 fn test_from_string() {
292 let s: SeqString = "test".to_string().into();
293 assert_eq!(s.as_str(), "test");
294 assert!(s.is_global()); // from String uses global
295 }
296
297 #[test]
298 fn test_debug_format() {
299 let s = arena_string("debug");
300 let debug_str = format!("{:?}", s);
301 assert!(debug_str.contains("debug"));
302 assert!(debug_str.contains("global=false"));
303 }
304
305 #[test]
306 fn test_display_format() {
307 let s = global_string("display".to_string());
308 let display_str = format!("{}", s);
309 assert_eq!(display_str, "display");
310 }
311
312 #[test]
313 fn test_empty_string() {
314 let s = arena_string("");
315 assert_eq!(s.len(), 0);
316 assert!(s.is_empty());
317 assert_eq!(s.as_str(), "");
318 }
319
320 #[test]
321 fn test_unicode() {
322 let s = arena_string("Hello, δΈη! π¦");
323 assert_eq!(s.as_str(), "Hello, δΈη! π¦");
324 assert!(s.len() > 10); // UTF-8 bytes, not chars
325 }
326
327 #[test]
328 fn test_global_string_preserves_capacity() {
329 // PR #11 Critical fix: Verify capacity is preserved for correct deallocation
330 let mut s = String::with_capacity(100);
331 s.push_str("hi");
332
333 assert_eq!(s.len(), 2);
334 assert_eq!(s.capacity(), 100);
335
336 let cem = global_string(s);
337
338 // Verify the SeqString captured the original capacity
339 assert_eq!(cem.len(), 2);
340 assert_eq!(cem.capacity, 100); // Critical: Must be 100, not 2!
341 assert_eq!(cem.as_str(), "hi");
342 assert!(cem.is_global());
343
344 // Drop cem - if capacity was wrong, this would cause heap corruption
345 drop(cem);
346
347 // If we get here without crash/UB, the fix worked
348 }
349
350 #[test]
351 fn test_arena_string_capacity_zero() {
352 // Arena strings don't use capacity field
353 let s = arena_string("test");
354 assert_eq!(s.capacity, 0); // Arena strings have capacity=0
355 assert!(!s.is_global());
356 }
357}