seq_runtime/seqstring.rs
1//! SeqString - Arena or Globally Allocated String
2//!
3//! Strings in Seq can be allocated from two sources:
4//! 1. Thread-local arena (fast, bulk-freed on strand exit)
5//! 2. Global allocator (persists across arena resets)
6//!
7//! This allows fast temporary string creation during strand execution
8//! while maintaining safety for channel communication (clone to global).
9
10use crate::arena;
11use std::fmt;
12
13/// String that tracks its allocation source
14///
15/// # Safety Invariants
16/// - If global=true: ptr points to global-allocated String, must be dropped
17/// - If global=false: ptr points to thread-local arena, no drop needed
18/// - ptr + len must form a valid UTF-8 string
19/// - For global strings: capacity must match the original String's capacity
20pub struct SeqString {
21 ptr: *const u8,
22 len: usize,
23 capacity: usize, // Only meaningful for global strings
24 global: bool,
25}
26
27// Implement PartialEq manually to compare string content, not pointers
28impl PartialEq for SeqString {
29 fn eq(&self, other: &Self) -> bool {
30 self.as_str() == other.as_str()
31 }
32}
33
34impl Eq for SeqString {}
35
36// Safety: SeqString is Send because:
37// - Global strings are truly independent (owned heap allocation)
38// - Arena strings are cloned to global on channel send (see Clone impl)
39// - We never send arena pointers across threads unsafely
40unsafe impl Send for SeqString {}
41
42// Safety: SeqString is Sync because:
43// - The string content is immutable after construction
44// - ptr/len are only read, never modified after construction
45// - Global strings (Arc<String>) are already Sync
46// - Arena strings point to memory that won't be deallocated while in use
47unsafe impl Sync for SeqString {}
48
49impl SeqString {
50 /// Get string slice
51 ///
52 /// # Safety
53 /// ptr + len must point to valid UTF-8. This is guaranteed by constructors.
54 pub fn as_str(&self) -> &str {
55 unsafe { std::str::from_utf8_unchecked(std::slice::from_raw_parts(self.ptr, self.len)) }
56 }
57
58 /// Check if this string is globally allocated
59 #[allow(dead_code)]
60 pub fn is_global(&self) -> bool {
61 self.global
62 }
63
64 /// Get length in bytes
65 pub fn len(&self) -> usize {
66 self.len
67 }
68
69 /// Check if empty
70 #[allow(dead_code)]
71 pub fn is_empty(&self) -> bool {
72 self.len == 0
73 }
74
75 /// Check if this is an interned/static string (Issue #166)
76 ///
77 /// Interned strings have capacity=0 and point to static data.
78 /// They are never freed and can be compared by pointer for O(1) equality.
79 pub fn is_interned(&self) -> bool {
80 self.capacity == 0 && self.global
81 }
82
83 /// Get raw pointer to string data
84 ///
85 /// Used for O(1) pointer comparison of interned symbols.
86 pub fn as_ptr(&self) -> *const u8 {
87 self.ptr
88 }
89
90 /// Consume self and return raw parts for storage in StackValue
91 ///
92 /// Returns (ptr, len, capacity, global)
93 ///
94 /// # Safety
95 /// The caller must either reconstruct using `from_raw_parts` or
96 /// properly handle drop (for global strings only).
97 pub fn into_raw_parts(self) -> (*const u8, usize, usize, bool) {
98 let parts = (self.ptr, self.len, self.capacity, self.global);
99 std::mem::forget(self); // Don't run Drop
100 parts
101 }
102
103 /// Reconstruct SeqString from raw parts
104 ///
105 /// # Safety
106 /// The parts must have come from `into_raw_parts` on a valid SeqString,
107 /// or be a new valid allocation matching the ptr/len/capacity/global invariants.
108 pub unsafe fn from_raw_parts(
109 ptr: *const u8,
110 len: usize,
111 capacity: usize,
112 global: bool,
113 ) -> Self {
114 SeqString {
115 ptr,
116 len,
117 capacity,
118 global,
119 }
120 }
121}
122
123impl Clone for SeqString {
124 /// Clone always allocates from global allocator for Send safety
125 ///
126 /// This ensures that when a String is sent through a channel,
127 /// the receiving strand gets an independent copy that doesn't
128 /// depend on the sender's arena.
129 fn clone(&self) -> Self {
130 let s = self.as_str().to_string();
131 global_string(s)
132 }
133}
134
135impl Drop for SeqString {
136 fn drop(&mut self) {
137 // Drop only if BOTH conditions are true:
138 // - global=true: Arena strings have global=false and are bulk-freed on strand exit
139 // - capacity > 0: Interned symbols (Issue #166) have capacity=0 and point to
140 // static data that must NOT be deallocated
141 if self.global && self.capacity > 0 {
142 // Reconstruct String and drop it
143 // Safety: We created this from String in global_string() and stored
144 // the original ptr, len, and capacity. This ensures correct deallocation.
145 unsafe {
146 let _s = String::from_raw_parts(
147 self.ptr as *mut u8,
148 self.len,
149 self.capacity, // Use original capacity for correct deallocation
150 );
151 // _s is dropped here, freeing the memory with correct size
152 }
153 }
154 // Arena strings don't need explicit drop - arena reset frees them
155 // Static/interned strings (capacity=0) point to static data - no drop needed
156 }
157}
158
159impl fmt::Debug for SeqString {
160 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
161 write!(f, "SeqString({:?}, global={})", self.as_str(), self.global)
162 }
163}
164
165impl fmt::Display for SeqString {
166 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
167 write!(f, "{}", self.as_str())
168 }
169}
170
171/// Create arena-allocated string (fast path for temporaries)
172///
173/// # Performance
174/// ~5ns vs ~100ns for global allocator (20x faster)
175///
176/// # Lifetime
177/// Valid until arena_reset() is called (typically when strand exits)
178pub fn arena_string(s: &str) -> SeqString {
179 arena::with_arena(|arena| {
180 let arena_str = arena.alloc_str(s);
181 SeqString {
182 ptr: arena_str.as_ptr(),
183 len: arena_str.len(),
184 capacity: 0, // Not used for arena strings
185 global: false,
186 }
187 })
188}
189
190/// Create globally-allocated string (persists across arena resets)
191///
192/// # Usage
193/// For strings that need to outlive the current strand, or be sent through channels.
194///
195/// # Performance
196/// Same as regular String allocation
197pub fn global_string(s: String) -> SeqString {
198 let len = s.len();
199 let capacity = s.capacity();
200 let ptr = s.as_ptr();
201 std::mem::forget(s); // Transfer ownership, don't drop
202
203 SeqString {
204 ptr,
205 len,
206 capacity, // Store original capacity for correct deallocation
207 global: true,
208 }
209}
210
211/// Convert &str to SeqString using arena allocation
212impl From<&str> for SeqString {
213 fn from(s: &str) -> Self {
214 arena_string(s)
215 }
216}
217
218/// Convert String to SeqString using global allocation
219impl From<String> for SeqString {
220 fn from(s: String) -> Self {
221 global_string(s)
222 }
223}
224
225#[cfg(test)]
226mod tests {
227 use super::*;
228
229 #[test]
230 fn test_arena_string() {
231 let s = arena_string("Hello, arena!");
232 assert_eq!(s.as_str(), "Hello, arena!");
233 assert_eq!(s.len(), 13);
234 assert!(!s.is_global());
235 }
236
237 #[test]
238 fn test_global_string() {
239 let s = global_string("Hello, global!".to_string());
240 assert_eq!(s.as_str(), "Hello, global!");
241 assert_eq!(s.len(), 14);
242 assert!(s.is_global());
243 }
244
245 #[test]
246 fn test_clone_creates_global() {
247 // Clone an arena string
248 let s1 = arena_string("test");
249 let s2 = s1.clone();
250
251 assert_eq!(s1.as_str(), s2.as_str());
252 assert!(!s1.is_global());
253 assert!(s2.is_global()); // Clone is always global!
254 }
255
256 #[test]
257 fn test_clone_global() {
258 let s1 = global_string("test".to_string());
259 let s2 = s1.clone();
260
261 assert_eq!(s1.as_str(), s2.as_str());
262 assert!(s1.is_global());
263 assert!(s2.is_global());
264 }
265
266 #[test]
267 fn test_drop_global() {
268 // Create and drop a global string
269 {
270 let s = global_string("Will be dropped".to_string());
271 assert_eq!(s.as_str(), "Will be dropped");
272 }
273 // If we get here without crashing, drop worked
274 }
275
276 #[test]
277 fn test_drop_arena() {
278 // Create and drop an arena string
279 {
280 let s = arena_string("Will be dropped (no-op)");
281 assert_eq!(s.as_str(), "Will be dropped (no-op)");
282 }
283 // Arena strings don't need explicit drop
284 }
285
286 #[test]
287 fn test_equality() {
288 let s1 = arena_string("test");
289 let s2 = arena_string("test");
290 let s3 = global_string("test".to_string());
291 let s4 = arena_string("different");
292
293 assert_eq!(s1, s2); // Same content, both arena
294 assert_eq!(s1, s3); // Same content, different allocation
295 assert_ne!(s1, s4); // Different content
296 }
297
298 #[test]
299 fn test_from_str() {
300 let s: SeqString = "test".into();
301 assert_eq!(s.as_str(), "test");
302 assert!(!s.is_global()); // from &str uses arena
303 }
304
305 #[test]
306 fn test_from_string() {
307 let s: SeqString = "test".to_string().into();
308 assert_eq!(s.as_str(), "test");
309 assert!(s.is_global()); // from String uses global
310 }
311
312 #[test]
313 fn test_debug_format() {
314 let s = arena_string("debug");
315 let debug_str = format!("{:?}", s);
316 assert!(debug_str.contains("debug"));
317 assert!(debug_str.contains("global=false"));
318 }
319
320 #[test]
321 fn test_display_format() {
322 let s = global_string("display".to_string());
323 let display_str = format!("{}", s);
324 assert_eq!(display_str, "display");
325 }
326
327 #[test]
328 fn test_empty_string() {
329 let s = arena_string("");
330 assert_eq!(s.len(), 0);
331 assert!(s.is_empty());
332 assert_eq!(s.as_str(), "");
333 }
334
335 #[test]
336 fn test_unicode() {
337 let s = arena_string("Hello, δΈη! π¦");
338 assert_eq!(s.as_str(), "Hello, δΈη! π¦");
339 assert!(s.len() > 10); // UTF-8 bytes, not chars
340 }
341
342 #[test]
343 fn test_global_string_preserves_capacity() {
344 // PR #11 Critical fix: Verify capacity is preserved for correct deallocation
345 let mut s = String::with_capacity(100);
346 s.push_str("hi");
347
348 assert_eq!(s.len(), 2);
349 assert_eq!(s.capacity(), 100);
350
351 let cem = global_string(s);
352
353 // Verify the SeqString captured the original capacity
354 assert_eq!(cem.len(), 2);
355 assert_eq!(cem.capacity, 100); // Critical: Must be 100, not 2!
356 assert_eq!(cem.as_str(), "hi");
357 assert!(cem.is_global());
358
359 // Drop cem - if capacity was wrong, this would cause heap corruption
360 drop(cem);
361
362 // If we get here without crash/UB, the fix worked
363 }
364
365 #[test]
366 fn test_arena_string_capacity_zero() {
367 // Arena strings don't use capacity field
368 let s = arena_string("test");
369 assert_eq!(s.capacity, 0); // Arena strings have capacity=0
370 assert!(!s.is_global());
371 }
372}