Skip to main content

sochdb_core/
lockfree_interner.rs

1// SPDX-License-Identifier: AGPL-3.0-or-later
2// SochDB - LLM-Optimized Embedded Database
3// Copyright (C) 2026 Sushanth Reddy Vanagala (https://github.com/sushanthpy)
4//
5// This program is free software: you can redistribute it and/or modify
6// it under the terms of the GNU Affero General Public License as published by
7// the Free Software Foundation, either version 3 of the License, or
8// (at your option) any later version.
9//
10// This program is distributed in the hope that it will be useful,
11// but WITHOUT ANY WARRANTY; without even the implied warranty of
12// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13// GNU Affero General Public License for more details.
14//
15// You should have received a copy of the GNU Affero General Public License
16// along with this program. If not, see <https://www.gnu.org/licenses/>.
17
18//! Lock-Free String Interner with Chunked Append-Only Storage
19//!
20//! This module implements a high-performance string interner that:
21//! - Uses chunked append-only storage (never moves existing strings)
22//! - Provides lock-free reads via pre-allocated fixed buffers
23//! - Uses sharded hash maps to reduce write contention
24//! - Supports zero-cost string comparison via Symbol handles
25//!
26//! # Concurrency Guarantees - PLEASE READ CAREFULLY
27//!
28//! Despite the module name, this interner is **NOT fully lock-free**.
29//! The "lock-free" in the name refers specifically to the read path.
30//!
31//! ## Threading Guarantees Table
32//!
33//! | Operation | Guarantee | Notes |
34//! |-----------|-----------|-------|
35//! | `resolve()` | Wait-free | Atomic pointer traversal, no locks |
36//! | `get()` | Low-contention | Hash lookup with sharded RwLocks |
37//! | `intern()` | Blocking | Requires write lock on shard |
38//! | `len()` | Wait-free | Atomic counter |
39//!
40//! ## What IS Provided
41//!
42//! - **Thread-safety**: All operations are safe from multiple threads
43//! - **Wait-free reads**: `resolve()` uses atomic chunk traversal
44//! - **Low-contention lookups**: 256-way sharding reduces lock contention
45//! - **Deadlock-freedom**: Single lock per shard, no nested locking
46//!
47//! ## What is NOT Provided
48//!
49//! - **Lock-free writes**: Interning requires write locks on shards
50//! - **Wait-free interning**: Writers may block under contention
51//!
52//! ## Progress Taxonomy
53//!
54//! ```text
55//! wait-free ⊂ lock-free ⊂ obstruction-free ⊂ blocking
56//!     ↑           ↑
57//!  resolve()   (not provided)
58//! ```
59//!
60//! This implementation provides:
61//! - **Wait-free reads**: Always complete in bounded steps
62//! - **Blocking writes**: May wait for lock acquisition
63//!
64//! ## Memory Layout
65//!
66//! Each chunk is a fixed-size pre-allocated buffer using `Box<[MaybeUninit<u8>]>`.
67//! This eliminates the data race that would occur with `Vec<u8>` (whose metadata
68//! like `len` and `capacity` are not thread-safe to modify concurrently).
69//!
70//! ```text
71//! Chunk 0 (1MB fixed)       Chunk 1 (1MB fixed)       Chunk 2 (1MB fixed)
72//! ┌─────────────────┐       ┌─────────────────┐       ┌─────────────────┐
73//! │ "hello"         │       │ "different"     │       │ "more strings"  │
74//! │ "world"         │       │ "strings"       │       │ ...             │
75//! │ "foo"           │       │ "here"          │       │                 │
76//! │ [uninitialized] │       │ [uninitialized] │       │ [uninitialized] │
77//! └─────────────────┘       └─────────────────┘       └─────────────────┘
78//!           ↑                        ↑                        ↑
79//!           │                        │                        │
80//!     AtomicPtr chain for lock-free traversal
81//! ```
82
83use parking_lot::RwLock as ParkingLotRwLock;
84use std::collections::HashMap;
85use std::hash::{Hash, Hasher};
86use std::sync::RwLock;
87use std::sync::atomic::{AtomicPtr, AtomicU32, AtomicUsize, Ordering, fence};
88
89/// Number of shards for the hash map (power of 2 for fast modulo)
90const SHARD_COUNT: usize = 256;
91
92/// Default chunk size (1MB)
93const DEFAULT_CHUNK_SIZE: usize = 1024 * 1024;
94
95/// Symbol handle - cheap to copy and compare
96#[derive(Clone, Copy, PartialEq, Eq, Hash, Debug)]
97pub struct Symbol(pub u32);
98
99impl Symbol {
100    /// Invalid/null symbol
101    pub const NULL: Symbol = Symbol(u32::MAX);
102
103    /// Get the raw index
104    pub fn index(self) -> u32 {
105        self.0
106    }
107}
108
109/// Reference to a string location in a chunk
110#[derive(Clone, Copy)]
111struct StringRef {
112    /// Chunk index
113    chunk_idx: u32,
114    /// Offset within chunk
115    offset: u32,
116    /// Length of string
117    length: u32,
118}
119
120/// A single chunk of string storage using pre-allocated fixed buffer
121///
122/// ## Safety Rationale
123///
124/// This design eliminates the data race present in the Vec-based approach:
125/// - `capacity` is immutable after construction
126/// - `data` is a fixed-size buffer that never resizes
127/// - `write_pos` atomically reserves disjoint byte ranges
128/// - Each thread writes only to its reserved range
129///
130/// The only shared mutable state is the byte range reservation via `write_pos`,
131/// which is handled atomically with `fetch_add`.
132struct Chunk {
133    /// Fixed-size pre-allocated buffer (never resized)
134    /// Using MaybeUninit because we write to reserved ranges atomically
135    data: *mut u8,
136    /// Buffer capacity (immutable after creation)
137    capacity: usize,
138    /// Current write position (atomically updated)
139    write_pos: AtomicUsize,
140    /// Next chunk in chain
141    next: AtomicPtr<Chunk>,
142}
143
144impl Chunk {
145    fn new(capacity: usize) -> Box<Self> {
146        // Allocate the buffer as a Vec<u8> with zeros
147        let buffer: Vec<u8> = vec![0u8; capacity];
148        let ptr = Box::into_raw(buffer.into_boxed_slice()) as *mut u8;
149
150        Box::new(Self {
151            data: ptr,
152            capacity,
153            write_pos: AtomicUsize::new(0),
154            next: AtomicPtr::new(std::ptr::null_mut()),
155        })
156    }
157
158    /// Try to append a string, returns offset if successful
159    ///
160    /// ## Safety
161    ///
162    /// This is safe because:
163    /// 1. `fetch_add` atomically reserves a disjoint byte range
164    /// 2. Each thread writes only to its reserved range
165    /// 3. The buffer is pre-allocated and never resized (no Vec metadata race)
166    /// 4. Release fence ensures bytes are visible before returning
167    fn try_append(&self, s: &str) -> Option<u32> {
168        let bytes = s.as_bytes();
169        let len = bytes.len();
170
171        if len == 0 {
172            // Empty strings can share offset 0 with length 0
173            return Some(0);
174        }
175
176        // Atomically reserve space
177        let offset = self.write_pos.fetch_add(len, Ordering::Relaxed);
178
179        // Check if we have room
180        if offset + len > self.capacity {
181            // Rollback reservation (best effort - may waste some space on high contention)
182            // This is safe because the space is simply unused
183            self.write_pos.fetch_sub(len, Ordering::Relaxed);
184            return None;
185        }
186
187        // SAFETY: We have exclusive access to [offset..offset+len] via atomic reservation
188        // The buffer was pre-allocated with `capacity` bytes
189        // No other thread will write to this range
190        unsafe {
191            std::ptr::copy_nonoverlapping(bytes.as_ptr(), self.data.add(offset), len);
192        }
193
194        // Release fence ensures bytes are visible to other threads
195        // before we return the StringRef (which gets published to the hash map)
196        fence(Ordering::Release);
197
198        Some(offset as u32)
199    }
200
201    /// Read a string at offset (lock-free)
202    ///
203    /// ## Safety
204    ///
205    /// Caller must ensure:
206    /// 1. The offset and length were returned by a prior `try_append`
207    /// 2. An Acquire fence was issued after observing the StringRef
208    fn read(&self, offset: u32, len: u32) -> &str {
209        // Acquire fence ensures we see the bytes written by try_append
210        fence(Ordering::Acquire);
211
212        // SAFETY:
213        // - offset and len came from a successful try_append
214        // - The buffer is stable (never moved or reallocated)
215        // - Acquire fence synchronized with Release fence in try_append
216        unsafe {
217            let bytes = std::slice::from_raw_parts(self.data.add(offset as usize), len as usize);
218            // SAFETY: we only store valid UTF-8 from str::as_bytes()
219            std::str::from_utf8_unchecked(bytes)
220        }
221    }
222}
223
224impl Drop for Chunk {
225    fn drop(&mut self) {
226        // Reconstruct and drop the boxed slice
227        unsafe {
228            let slice = std::slice::from_raw_parts_mut(self.data, self.capacity);
229            drop(Box::from_raw(slice as *mut [u8]));
230        }
231    }
232}
233
234/// Chunked append-only storage for strings
235pub struct ChunkedStorage {
236    /// Head of chunk chain
237    head: AtomicPtr<Chunk>,
238    /// Current chunk for new allocations
239    current: AtomicPtr<Chunk>,
240    /// Chunk size
241    chunk_size: usize,
242    /// Number of chunks
243    chunk_count: AtomicU32,
244}
245
246impl ChunkedStorage {
247    fn new(chunk_size: usize) -> Self {
248        let initial = Box::into_raw(Chunk::new(chunk_size));
249        Self {
250            head: AtomicPtr::new(initial),
251            current: AtomicPtr::new(initial),
252            chunk_size,
253            chunk_count: AtomicU32::new(1),
254        }
255    }
256
257    /// Append a string, returns reference
258    fn append(&self, s: &str) -> StringRef {
259        loop {
260            let current = self.current.load(Ordering::Acquire);
261            let chunk = unsafe { &*current };
262
263            // Try to append to current chunk
264            if let Some(offset) = chunk.try_append(s) {
265                let chunk_idx = self.get_chunk_index(current);
266                return StringRef {
267                    chunk_idx,
268                    offset,
269                    length: s.len() as u32,
270                };
271            }
272
273            // Need new chunk
274            self.grow_chunk(current);
275        }
276    }
277
278    /// Grow by adding a new chunk
279    fn grow_chunk(&self, expected_current: *mut Chunk) {
280        let new_chunk = Box::into_raw(Chunk::new(self.chunk_size));
281
282        // Link new chunk to current
283        let current = unsafe { &*expected_current };
284
285        // Try to set next pointer
286        if current
287            .next
288            .compare_exchange(
289                std::ptr::null_mut(),
290                new_chunk,
291                Ordering::AcqRel,
292                Ordering::Relaxed,
293            )
294            .is_ok()
295        {
296            // We successfully linked the new chunk
297            self.current.store(new_chunk, Ordering::Release);
298            self.chunk_count.fetch_add(1, Ordering::Relaxed);
299        } else {
300            // Another thread already grew - free our chunk
301            unsafe {
302                drop(Box::from_raw(new_chunk));
303            }
304            // Update current to the actual new chunk
305            let actual_next = current.next.load(Ordering::Acquire);
306            if !actual_next.is_null() {
307                self.current.store(actual_next, Ordering::Release);
308            }
309        }
310    }
311
312    /// Get chunk by index (lock-free traversal)
313    fn get_chunk(&self, idx: u32) -> Option<&Chunk> {
314        let mut current = self.head.load(Ordering::Acquire);
315        let mut i = 0u32;
316
317        while !current.is_null() {
318            if i == idx {
319                return Some(unsafe { &*current });
320            }
321            current = unsafe { (*current).next.load(Ordering::Acquire) };
322            i += 1;
323        }
324
325        None
326    }
327
328    /// Get index of a chunk pointer
329    fn get_chunk_index(&self, ptr: *mut Chunk) -> u32 {
330        let mut current = self.head.load(Ordering::Acquire);
331        let mut i = 0u32;
332
333        while !current.is_null() {
334            if current == ptr {
335                return i;
336            }
337            current = unsafe { (*current).next.load(Ordering::Acquire) };
338            i += 1;
339        }
340
341        0 // Fallback (shouldn't happen)
342    }
343
344    /// Read a string by reference (lock-free)
345    fn read(&self, string_ref: StringRef) -> Option<&str> {
346        let chunk = self.get_chunk(string_ref.chunk_idx)?;
347        Some(chunk.read(string_ref.offset, string_ref.length))
348    }
349}
350
351impl Drop for ChunkedStorage {
352    fn drop(&mut self) {
353        let mut current = self.head.load(Ordering::Relaxed);
354        while !current.is_null() {
355            let chunk = unsafe { Box::from_raw(current) };
356            current = chunk.next.load(Ordering::Relaxed);
357        }
358    }
359}
360
361/// Hasher for string lookup
362fn hash_string(s: &str) -> u64 {
363    let mut hasher = std::collections::hash_map::DefaultHasher::new();
364    s.hash(&mut hasher);
365    hasher.finish()
366}
367
368/// Lock-free string interner
369pub struct LockFreeInterner {
370    /// Sharded hash maps: string hash -> Symbol
371    shards: [ParkingLotRwLock<HashMap<u64, (Symbol, StringRef)>>; SHARD_COUNT],
372    /// Chunked string storage
373    storage: ChunkedStorage,
374    /// Next symbol ID
375    next_symbol: AtomicU32,
376    /// Reverse mapping: Symbol -> StringRef (for resolve)
377    symbols: RwLock<Vec<StringRef>>,
378    /// Statistics
379    stats: InternerStats,
380}
381
382/// Interner statistics
383#[derive(Default)]
384pub struct InternerStats {
385    pub interned_count: AtomicU32,
386    pub lookup_hits: AtomicU32,
387    pub lookup_misses: AtomicU32,
388    pub resolve_count: AtomicU32,
389}
390
391impl Default for LockFreeInterner {
392    fn default() -> Self {
393        Self::new()
394    }
395}
396
397impl LockFreeInterner {
398    /// Create a new interner
399    pub fn new() -> Self {
400        Self::with_chunk_size(DEFAULT_CHUNK_SIZE)
401    }
402
403    /// Create with custom chunk size
404    pub fn with_chunk_size(chunk_size: usize) -> Self {
405        Self {
406            shards: std::array::from_fn(|_| ParkingLotRwLock::new(HashMap::new())),
407            storage: ChunkedStorage::new(chunk_size),
408            next_symbol: AtomicU32::new(0),
409            symbols: RwLock::new(Vec::new()),
410            stats: InternerStats::default(),
411        }
412    }
413
414    /// Get shard index for a hash
415    #[inline]
416    fn shard_index(&self, hash: u64) -> usize {
417        (hash as usize) % SHARD_COUNT
418    }
419
420    /// Get or intern a string
421    pub fn get_or_intern(&self, s: &str) -> Symbol {
422        let hash = hash_string(s);
423        let shard_idx = self.shard_index(hash);
424
425        // Fast path: read-only lookup
426        {
427            let shard = self.shards[shard_idx].read();
428            if let Some(&(symbol, _)) = shard.get(&hash) {
429                self.stats.lookup_hits.fetch_add(1, Ordering::Relaxed);
430                return symbol;
431            }
432        }
433
434        self.stats.lookup_misses.fetch_add(1, Ordering::Relaxed);
435
436        // Slow path: need to intern
437        let mut shard = self.shards[shard_idx].write();
438
439        // Double-check after acquiring write lock
440        if let Some(&(symbol, _)) = shard.get(&hash) {
441            return symbol;
442        }
443
444        // Allocate symbol and store string
445        let symbol = Symbol(self.next_symbol.fetch_add(1, Ordering::SeqCst));
446        let string_ref = self.storage.append(s);
447
448        shard.insert(hash, (symbol, string_ref));
449
450        // Update reverse mapping
451        {
452            let mut symbols = self.symbols.write().unwrap();
453            // Ensure vector is large enough
454            while symbols.len() <= symbol.0 as usize {
455                symbols.push(StringRef {
456                    chunk_idx: 0,
457                    offset: 0,
458                    length: 0,
459                });
460            }
461            symbols[symbol.0 as usize] = string_ref;
462        }
463
464        self.stats.interned_count.fetch_add(1, Ordering::Relaxed);
465        symbol
466    }
467
468    /// Resolve a symbol to its string (lock-free for reads!)
469    pub fn resolve(&self, symbol: Symbol) -> Option<&str> {
470        if symbol == Symbol::NULL {
471            return None;
472        }
473
474        self.stats.resolve_count.fetch_add(1, Ordering::Relaxed);
475
476        // Read from symbols vector (only takes read lock briefly)
477        let string_ref = {
478            let symbols = self.symbols.read().unwrap();
479            symbols.get(symbol.0 as usize).copied()
480        }?;
481
482        // Storage read is completely lock-free
483        self.storage.read(string_ref)
484    }
485
486    /// Get the number of interned strings
487    pub fn len(&self) -> usize {
488        self.next_symbol.load(Ordering::Relaxed) as usize
489    }
490
491    /// Check if empty
492    pub fn is_empty(&self) -> bool {
493        self.len() == 0
494    }
495
496    /// Get statistics
497    pub fn stats(&self) -> &InternerStats {
498        &self.stats
499    }
500
501    /// Get total memory used by string storage
502    pub fn storage_bytes(&self) -> usize {
503        self.storage.chunk_count.load(Ordering::Relaxed) as usize * self.storage.chunk_size
504    }
505}
506
507// SAFETY: LockFreeInterner is safe to share across threads because:
508// 1. All hash map access is protected by ParkingLotRwLock (sharded)
509// 2. ChunkedStorage uses atomic operations for all shared mutable state
510// 3. Chunk buffers are pre-allocated fixed-size (no Vec metadata races)
511// 4. Memory ordering is correct (Release on write, Acquire on read)
512unsafe impl Send for LockFreeInterner {}
513unsafe impl Sync for LockFreeInterner {}
514
515// SAFETY: ChunkedStorage is safe to share across threads because:
516// 1. write_pos uses atomic fetch_add for disjoint range reservation
517// 2. Chunk buffers are fixed-size pre-allocations (immutable capacity)
518// 3. Chunk chain traversal uses AtomicPtr with proper ordering
519// 4. Each writer has exclusive access to its reserved byte range
520unsafe impl Send for ChunkedStorage {}
521unsafe impl Sync for ChunkedStorage {}
522
523// SAFETY: Chunk contains a raw pointer but is designed for concurrent access:
524// 1. data pointer is stable (never moved after allocation)
525// 2. capacity is immutable after construction
526// 3. write_pos is atomic
527// 4. Each thread writes only to atomically-reserved disjoint ranges
528unsafe impl Send for Chunk {}
529unsafe impl Sync for Chunk {}
530
531#[cfg(test)]
532mod tests {
533    use super::*;
534    use std::sync::Arc;
535    use std::thread;
536
537    #[test]
538    fn test_basic_intern() {
539        let interner = LockFreeInterner::new();
540
541        let s1 = interner.get_or_intern("hello");
542        let s2 = interner.get_or_intern("world");
543        let s3 = interner.get_or_intern("hello");
544
545        // Same string should give same symbol
546        assert_eq!(s1, s3);
547        // Different strings should give different symbols
548        assert_ne!(s1, s2);
549
550        assert_eq!(interner.len(), 2);
551    }
552
553    #[test]
554    fn test_resolve() {
555        let interner = LockFreeInterner::new();
556
557        let s1 = interner.get_or_intern("hello");
558        let s2 = interner.get_or_intern("world");
559
560        assert_eq!(interner.resolve(s1), Some("hello"));
561        assert_eq!(interner.resolve(s2), Some("world"));
562        assert_eq!(interner.resolve(Symbol::NULL), None);
563    }
564
565    #[test]
566    fn test_concurrent_intern() {
567        let interner = Arc::new(LockFreeInterner::new());
568        let mut handles = vec![];
569
570        // 8 threads each intern the same 100 strings
571        for _ in 0..8 {
572            let interner = interner.clone();
573            handles.push(thread::spawn(move || {
574                let mut symbols = Vec::new();
575                for i in 0..100 {
576                    let s = format!("string_{}", i);
577                    symbols.push(interner.get_or_intern(&s));
578                }
579                symbols
580            }));
581        }
582
583        let results: Vec<_> = handles.into_iter().map(|h| h.join().unwrap()).collect();
584
585        // All threads should get the same symbols for the same strings
586        for i in 0..100 {
587            let first = results[0][i];
588            for result in &results[1..] {
589                assert_eq!(result[i], first, "Symbol mismatch for string_{}", i);
590            }
591        }
592
593        // Should have exactly 100 unique strings
594        assert_eq!(interner.len(), 100);
595    }
596
597    #[test]
598    fn test_concurrent_resolve() {
599        let interner = Arc::new(LockFreeInterner::new());
600
601        // Intern some strings first
602        let symbols: Vec<_> = (0..100)
603            .map(|i| interner.get_or_intern(&format!("string_{}", i)))
604            .collect();
605
606        let mut handles = vec![];
607
608        // 8 threads each resolve all strings many times
609        for _ in 0..8 {
610            let interner = interner.clone();
611            let symbols = symbols.clone();
612            handles.push(thread::spawn(move || {
613                for _ in 0..100 {
614                    for (i, &symbol) in symbols.iter().enumerate() {
615                        let resolved = interner.resolve(symbol);
616                        assert_eq!(resolved, Some(format!("string_{}", i)).as_deref());
617                    }
618                }
619            }));
620        }
621
622        for handle in handles {
623            handle.join().unwrap();
624        }
625
626        // Should have many resolve operations
627        assert!(interner.stats().resolve_count.load(Ordering::Relaxed) > 0);
628    }
629
630    #[test]
631    fn test_chunk_growth() {
632        // Small chunk size to force growth
633        let interner = LockFreeInterner::with_chunk_size(1024);
634
635        // Intern enough strings to fill multiple chunks
636        for i in 0..1000 {
637            let s = format!("this_is_a_longer_string_number_{:05}", i);
638            interner.get_or_intern(&s);
639        }
640
641        // Should have grown to multiple chunks
642        assert!(interner.storage.chunk_count.load(Ordering::Relaxed) > 1);
643
644        // All strings should still be resolvable
645        for i in 0..1000 {
646            let s = format!("this_is_a_longer_string_number_{:05}", i);
647            let symbol = interner.get_or_intern(&s);
648            assert_eq!(interner.resolve(symbol), Some(s.as_str()));
649        }
650    }
651
652    #[test]
653    fn test_symbol_comparison() {
654        let interner = LockFreeInterner::new();
655
656        let s1 = interner.get_or_intern("hello");
657        let s2 = interner.get_or_intern("world");
658        let s3 = interner.get_or_intern("hello");
659
660        // Symbol comparison is O(1) - just comparing u32
661        assert_eq!(s1, s3);
662        assert_ne!(s1, s2);
663
664        // Can use as hash map key
665        let mut map = HashMap::new();
666        map.insert(s1, "value1");
667        map.insert(s2, "value2");
668
669        assert_eq!(map.get(&s3), Some(&"value1"));
670    }
671
672    #[test]
673    fn test_stats() {
674        let interner = LockFreeInterner::new();
675
676        // Intern some strings
677        interner.get_or_intern("hello");
678        interner.get_or_intern("world");
679        interner.get_or_intern("hello"); // Hit
680
681        let stats = interner.stats();
682        assert_eq!(stats.interned_count.load(Ordering::Relaxed), 2);
683        assert!(stats.lookup_hits.load(Ordering::Relaxed) >= 1);
684        assert!(stats.lookup_misses.load(Ordering::Relaxed) >= 2);
685    }
686
687    #[test]
688    fn test_concurrent_intern_and_resolve() {
689        let interner = Arc::new(LockFreeInterner::new());
690        let mut handles = vec![];
691
692        // Writers: intern new strings
693        for writer_id in 0..4 {
694            let interner = interner.clone();
695            handles.push(thread::spawn(move || {
696                for i in 0..100 {
697                    let s = format!("writer_{}_string_{}", writer_id, i);
698                    interner.get_or_intern(&s);
699                }
700            }));
701        }
702
703        // Readers: continuously resolve existing strings
704        for reader_id in 0..4 {
705            let interner = interner.clone();
706            handles.push(thread::spawn(move || {
707                for i in 0..1000 {
708                    // Intern and immediately resolve
709                    let s = format!("common_string_{}", (i + reader_id) % 10);
710                    let symbol = interner.get_or_intern(&s);
711                    let resolved = interner.resolve(symbol);
712                    assert!(resolved.is_some());
713                }
714            }));
715        }
716
717        for handle in handles {
718            handle.join().unwrap();
719        }
720    }
721
722    #[test]
723    fn test_empty_string() {
724        let interner = LockFreeInterner::new();
725
726        let s = interner.get_or_intern("");
727        assert_eq!(interner.resolve(s), Some(""));
728    }
729
730    #[test]
731    fn test_long_strings() {
732        let interner = LockFreeInterner::new();
733
734        let long_string: String = (0..10000).map(|_| 'x').collect();
735        let symbol = interner.get_or_intern(&long_string);
736        assert_eq!(interner.resolve(symbol), Some(long_string.as_str()));
737    }
738
739    #[test]
740    fn test_unicode_strings() {
741        let interner = LockFreeInterner::new();
742
743        let s1 = interner.get_or_intern("hello 世界 🌍");
744        let s2 = interner.get_or_intern("émojis: 🎉🎊🎁");
745
746        assert_eq!(interner.resolve(s1), Some("hello 世界 🌍"));
747        assert_eq!(interner.resolve(s2), Some("émojis: 🎉🎊🎁"));
748    }
749
750    #[test]
751    fn test_shard_distribution() {
752        let interner = LockFreeInterner::new();
753
754        // Intern many strings
755        for i in 0..10000 {
756            interner.get_or_intern(&format!("key_{}", i));
757        }
758
759        // Check that strings are distributed across shards
760        let mut non_empty = 0;
761        for shard in &interner.shards {
762            if !shard.read().is_empty() {
763                non_empty += 1;
764            }
765        }
766
767        // With 10000 strings across 256 shards, should use many shards
768        assert!(
769            non_empty > 200,
770            "Expected better shard distribution: {} non-empty",
771            non_empty
772        );
773    }
774}