seq_runtime/
seqstring.rs

1//! SeqString - Arena or Globally Allocated String
2//!
3//! Strings in Seq can be allocated from two sources:
4//! 1. Thread-local arena (fast, bulk-freed on strand exit)
5//! 2. Global allocator (persists across arena resets)
6//!
7//! This allows fast temporary string creation during strand execution
8//! while maintaining safety for channel communication (clone to global).
9
10use crate::arena;
11use std::fmt;
12
13/// String that tracks its allocation source
14///
15/// # Safety Invariants
16/// - If global=true: ptr points to global-allocated String, must be dropped
17/// - If global=false: ptr points to thread-local arena, no drop needed
18/// - ptr + len must form a valid UTF-8 string
19/// - For global strings: capacity must match the original String's capacity
20pub struct SeqString {
21    ptr: *const u8,
22    len: usize,
23    capacity: usize, // Only meaningful for global strings
24    global: bool,
25}
26
27// Implement PartialEq manually to compare string content, not pointers
28impl PartialEq for SeqString {
29    fn eq(&self, other: &Self) -> bool {
30        self.as_str() == other.as_str()
31    }
32}
33
34impl Eq for SeqString {}
35
36// Safety: SeqString is Send because:
37// - Global strings are truly independent (owned heap allocation)
38// - Arena strings are cloned to global on channel send (see Clone impl)
39// - We never send arena pointers across threads unsafely
40unsafe impl Send for SeqString {}
41
42// Safety: SeqString is Sync because:
43// - The string content is immutable after construction
44// - ptr/len are only read, never modified after construction
45// - Global strings (Arc<String>) are already Sync
46// - Arena strings point to memory that won't be deallocated while in use
47unsafe impl Sync for SeqString {}
48
49impl SeqString {
50    /// Get string slice
51    ///
52    /// # Safety
53    /// ptr + len must point to valid UTF-8. This is guaranteed by constructors.
54    pub fn as_str(&self) -> &str {
55        unsafe { std::str::from_utf8_unchecked(std::slice::from_raw_parts(self.ptr, self.len)) }
56    }
57
58    /// Check if this string is globally allocated
59    #[allow(dead_code)]
60    pub fn is_global(&self) -> bool {
61        self.global
62    }
63
64    /// Get length in bytes
65    pub fn len(&self) -> usize {
66        self.len
67    }
68
69    /// Check if empty
70    #[allow(dead_code)]
71    pub fn is_empty(&self) -> bool {
72        self.len == 0
73    }
74
75    /// Check if this is an interned/static string (Issue #166)
76    ///
77    /// Interned strings have capacity=0 and point to static data.
78    /// They are never freed and can be compared by pointer for O(1) equality.
79    pub fn is_interned(&self) -> bool {
80        self.capacity == 0 && self.global
81    }
82
83    /// Get raw pointer to string data
84    ///
85    /// Used for O(1) pointer comparison of interned symbols.
86    pub fn as_ptr(&self) -> *const u8 {
87        self.ptr
88    }
89
90    /// Consume self and return raw parts for storage in StackValue
91    ///
92    /// Returns (ptr, len, capacity, global)
93    ///
94    /// # Safety
95    /// The caller must either reconstruct using `from_raw_parts` or
96    /// properly handle drop (for global strings only).
97    pub fn into_raw_parts(self) -> (*const u8, usize, usize, bool) {
98        let parts = (self.ptr, self.len, self.capacity, self.global);
99        std::mem::forget(self); // Don't run Drop
100        parts
101    }
102
103    /// Reconstruct SeqString from raw parts
104    ///
105    /// # Safety
106    /// The parts must have come from `into_raw_parts` on a valid SeqString,
107    /// or be a new valid allocation matching the ptr/len/capacity/global invariants.
108    pub unsafe fn from_raw_parts(
109        ptr: *const u8,
110        len: usize,
111        capacity: usize,
112        global: bool,
113    ) -> Self {
114        SeqString {
115            ptr,
116            len,
117            capacity,
118            global,
119        }
120    }
121}
122
123impl Clone for SeqString {
124    /// Clone always allocates from global allocator for Send safety
125    ///
126    /// This ensures that when a String is sent through a channel,
127    /// the receiving strand gets an independent copy that doesn't
128    /// depend on the sender's arena.
129    fn clone(&self) -> Self {
130        let s = self.as_str().to_string();
131        global_string(s)
132    }
133}
134
135impl Drop for SeqString {
136    fn drop(&mut self) {
137        // Drop only if BOTH conditions are true:
138        // - global=true: Arena strings have global=false and are bulk-freed on strand exit
139        // - capacity > 0: Interned symbols (Issue #166) have capacity=0 and point to
140        //   static data that must NOT be deallocated
141        if self.global && self.capacity > 0 {
142            // Reconstruct String and drop it
143            // Safety: We created this from String in global_string() and stored
144            // the original ptr, len, and capacity. This ensures correct deallocation.
145            unsafe {
146                let _s = String::from_raw_parts(
147                    self.ptr as *mut u8,
148                    self.len,
149                    self.capacity, // Use original capacity for correct deallocation
150                );
151                // _s is dropped here, freeing the memory with correct size
152            }
153        }
154        // Arena strings don't need explicit drop - arena reset frees them
155        // Static/interned strings (capacity=0) point to static data - no drop needed
156    }
157}
158
159impl fmt::Debug for SeqString {
160    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
161        write!(f, "SeqString({:?}, global={})", self.as_str(), self.global)
162    }
163}
164
165impl fmt::Display for SeqString {
166    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
167        write!(f, "{}", self.as_str())
168    }
169}
170
171/// Create arena-allocated string (fast path for temporaries)
172///
173/// # Performance
174/// ~5ns vs ~100ns for global allocator (20x faster)
175///
176/// # Lifetime
177/// Valid until arena_reset() is called (typically when strand exits)
178pub fn arena_string(s: &str) -> SeqString {
179    arena::with_arena(|arena| {
180        let arena_str = arena.alloc_str(s);
181        SeqString {
182            ptr: arena_str.as_ptr(),
183            len: arena_str.len(),
184            capacity: 0, // Not used for arena strings
185            global: false,
186        }
187    })
188}
189
190/// Create globally-allocated string (persists across arena resets)
191///
192/// # Usage
193/// For strings that need to outlive the current strand, or be sent through channels.
194///
195/// # Performance
196/// Same as regular String allocation
197pub fn global_string(s: String) -> SeqString {
198    let len = s.len();
199    let capacity = s.capacity();
200    let ptr = s.as_ptr();
201    std::mem::forget(s); // Transfer ownership, don't drop
202
203    SeqString {
204        ptr,
205        len,
206        capacity, // Store original capacity for correct deallocation
207        global: true,
208    }
209}
210
211/// Convert &str to SeqString using arena allocation
212impl From<&str> for SeqString {
213    fn from(s: &str) -> Self {
214        arena_string(s)
215    }
216}
217
218/// Convert String to SeqString using global allocation
219impl From<String> for SeqString {
220    fn from(s: String) -> Self {
221        global_string(s)
222    }
223}
224
225#[cfg(test)]
226mod tests {
227    use super::*;
228
229    #[test]
230    fn test_arena_string() {
231        let s = arena_string("Hello, arena!");
232        assert_eq!(s.as_str(), "Hello, arena!");
233        assert_eq!(s.len(), 13);
234        assert!(!s.is_global());
235    }
236
237    #[test]
238    fn test_global_string() {
239        let s = global_string("Hello, global!".to_string());
240        assert_eq!(s.as_str(), "Hello, global!");
241        assert_eq!(s.len(), 14);
242        assert!(s.is_global());
243    }
244
245    #[test]
246    fn test_clone_creates_global() {
247        // Clone an arena string
248        let s1 = arena_string("test");
249        let s2 = s1.clone();
250
251        assert_eq!(s1.as_str(), s2.as_str());
252        assert!(!s1.is_global());
253        assert!(s2.is_global()); // Clone is always global!
254    }
255
256    #[test]
257    fn test_clone_global() {
258        let s1 = global_string("test".to_string());
259        let s2 = s1.clone();
260
261        assert_eq!(s1.as_str(), s2.as_str());
262        assert!(s1.is_global());
263        assert!(s2.is_global());
264    }
265
266    #[test]
267    fn test_drop_global() {
268        // Create and drop a global string
269        {
270            let s = global_string("Will be dropped".to_string());
271            assert_eq!(s.as_str(), "Will be dropped");
272        }
273        // If we get here without crashing, drop worked
274    }
275
276    #[test]
277    fn test_drop_arena() {
278        // Create and drop an arena string
279        {
280            let s = arena_string("Will be dropped (no-op)");
281            assert_eq!(s.as_str(), "Will be dropped (no-op)");
282        }
283        // Arena strings don't need explicit drop
284    }
285
286    #[test]
287    fn test_equality() {
288        let s1 = arena_string("test");
289        let s2 = arena_string("test");
290        let s3 = global_string("test".to_string());
291        let s4 = arena_string("different");
292
293        assert_eq!(s1, s2); // Same content, both arena
294        assert_eq!(s1, s3); // Same content, different allocation
295        assert_ne!(s1, s4); // Different content
296    }
297
298    #[test]
299    fn test_from_str() {
300        let s: SeqString = "test".into();
301        assert_eq!(s.as_str(), "test");
302        assert!(!s.is_global()); // from &str uses arena
303    }
304
305    #[test]
306    fn test_from_string() {
307        let s: SeqString = "test".to_string().into();
308        assert_eq!(s.as_str(), "test");
309        assert!(s.is_global()); // from String uses global
310    }
311
312    #[test]
313    fn test_debug_format() {
314        let s = arena_string("debug");
315        let debug_str = format!("{:?}", s);
316        assert!(debug_str.contains("debug"));
317        assert!(debug_str.contains("global=false"));
318    }
319
320    #[test]
321    fn test_display_format() {
322        let s = global_string("display".to_string());
323        let display_str = format!("{}", s);
324        assert_eq!(display_str, "display");
325    }
326
327    #[test]
328    fn test_empty_string() {
329        let s = arena_string("");
330        assert_eq!(s.len(), 0);
331        assert!(s.is_empty());
332        assert_eq!(s.as_str(), "");
333    }
334
335    #[test]
336    fn test_unicode() {
337        let s = arena_string("Hello, δΈ–η•Œ! πŸ¦€");
338        assert_eq!(s.as_str(), "Hello, δΈ–η•Œ! πŸ¦€");
339        assert!(s.len() > 10); // UTF-8 bytes, not chars
340    }
341
342    #[test]
343    fn test_global_string_preserves_capacity() {
344        // PR #11 Critical fix: Verify capacity is preserved for correct deallocation
345        let mut s = String::with_capacity(100);
346        s.push_str("hi");
347
348        assert_eq!(s.len(), 2);
349        assert_eq!(s.capacity(), 100);
350
351        let cem = global_string(s);
352
353        // Verify the SeqString captured the original capacity
354        assert_eq!(cem.len(), 2);
355        assert_eq!(cem.capacity, 100); // Critical: Must be 100, not 2!
356        assert_eq!(cem.as_str(), "hi");
357        assert!(cem.is_global());
358
359        // Drop cem - if capacity was wrong, this would cause heap corruption
360        drop(cem);
361
362        // If we get here without crash/UB, the fix worked
363    }
364
365    #[test]
366    fn test_arena_string_capacity_zero() {
367        // Arena strings don't use capacity field
368        let s = arena_string("test");
369        assert_eq!(s.capacity, 0); // Arena strings have capacity=0
370        assert!(!s.is_global());
371    }
372}