sochdb_core/lockfree_interner.rs
1// Copyright 2025 Sushanth (https://github.com/sushanthpy)
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15//! Lock-Free String Interner with Chunked Append-Only Storage
16//!
17//! This module implements a high-performance string interner that:
18//! - Uses chunked append-only storage (never moves existing strings)
19//! - Provides lock-free reads via pre-allocated fixed buffers
20//! - Uses sharded hash maps to reduce write contention
21//! - Supports zero-cost string comparison via Symbol handles
22//!
23//! # Concurrency Guarantees - PLEASE READ CAREFULLY
24//!
25//! Despite the module name, this interner is **NOT fully lock-free**.
26//! The "lock-free" in the name refers specifically to the read path.
27//!
28//! ## Threading Guarantees Table
29//!
30//! | Operation | Guarantee | Notes |
31//! |-----------|-----------|-------|
32//! | `resolve()` | Wait-free | Atomic pointer traversal, no locks |
33//! | `get()` | Low-contention | Hash lookup with sharded RwLocks |
34//! | `intern()` | Blocking | Requires write lock on shard |
35//! | `len()` | Wait-free | Atomic counter |
36//!
37//! ## What IS Provided
38//!
39//! - **Thread-safety**: All operations are safe from multiple threads
40//! - **Wait-free reads**: `resolve()` uses atomic chunk traversal
41//! - **Low-contention lookups**: 256-way sharding reduces lock contention
42//! - **Deadlock-freedom**: Single lock per shard, no nested locking
43//!
44//! ## What is NOT Provided
45//!
46//! - **Lock-free writes**: Interning requires write locks on shards
47//! - **Wait-free interning**: Writers may block under contention
48//!
49//! ## Progress Taxonomy
50//!
51//! ```text
52//! wait-free ⊂ lock-free ⊂ obstruction-free ⊂ blocking
53//! ↑ ↑
54//! resolve() (not provided)
55//! ```
56//!
57//! This implementation provides:
58//! - **Wait-free reads**: Always complete in bounded steps
59//! - **Blocking writes**: May wait for lock acquisition
60//!
61//! ## Memory Layout
62//!
63//! Each chunk is a fixed-size pre-allocated buffer using `Box<[MaybeUninit<u8>]>`.
64//! This eliminates the data race that would occur with `Vec<u8>` (whose metadata
65//! like `len` and `capacity` are not thread-safe to modify concurrently).
66//!
67//! ```text
68//! Chunk 0 (1MB fixed) Chunk 1 (1MB fixed) Chunk 2 (1MB fixed)
69//! ┌─────────────────┐ ┌─────────────────┐ ┌─────────────────┐
70//! │ "hello" │ │ "different" │ │ "more strings" │
71//! │ "world" │ │ "strings" │ │ ... │
72//! │ "foo" │ │ "here" │ │ │
73//! │ [uninitialized] │ │ [uninitialized] │ │ [uninitialized] │
74//! └─────────────────┘ └─────────────────┘ └─────────────────┘
75//! ↑ ↑ ↑
76//! │ │ │
77//! AtomicPtr chain for lock-free traversal
78//! ```
79
80use parking_lot::RwLock as ParkingLotRwLock;
81use std::collections::HashMap;
82use std::hash::{Hash, Hasher};
83use std::sync::RwLock;
84use std::sync::atomic::{AtomicPtr, AtomicU32, AtomicUsize, Ordering, fence};
85
86/// Number of shards for the hash map (power of 2 for fast modulo)
87const SHARD_COUNT: usize = 256;
88
89/// Default chunk size (1MB)
90const DEFAULT_CHUNK_SIZE: usize = 1024 * 1024;
91
92/// Symbol handle - cheap to copy and compare
93#[derive(Clone, Copy, PartialEq, Eq, Hash, Debug)]
94pub struct Symbol(pub u32);
95
96impl Symbol {
97 /// Invalid/null symbol
98 pub const NULL: Symbol = Symbol(u32::MAX);
99
100 /// Get the raw index
101 pub fn index(self) -> u32 {
102 self.0
103 }
104}
105
106/// Reference to a string location in a chunk
107#[derive(Clone, Copy)]
108struct StringRef {
109 /// Chunk index
110 chunk_idx: u32,
111 /// Offset within chunk
112 offset: u32,
113 /// Length of string
114 length: u32,
115}
116
117/// A single chunk of string storage using pre-allocated fixed buffer
118///
119/// ## Safety Rationale
120///
121/// This design eliminates the data race present in the Vec-based approach:
122/// - `capacity` is immutable after construction
123/// - `data` is a fixed-size buffer that never resizes
124/// - `write_pos` atomically reserves disjoint byte ranges
125/// - Each thread writes only to its reserved range
126///
127/// The only shared mutable state is the byte range reservation via `write_pos`,
128/// which is handled atomically with `fetch_add`.
129struct Chunk {
130 /// Fixed-size pre-allocated buffer (never resized)
131 /// Using MaybeUninit because we write to reserved ranges atomically
132 data: *mut u8,
133 /// Buffer capacity (immutable after creation)
134 capacity: usize,
135 /// Current write position (atomically updated)
136 write_pos: AtomicUsize,
137 /// Next chunk in chain
138 next: AtomicPtr<Chunk>,
139}
140
141impl Chunk {
142 fn new(capacity: usize) -> Box<Self> {
143 // Allocate the buffer as a Vec<u8> with zeros
144 let buffer: Vec<u8> = vec![0u8; capacity];
145 let ptr = Box::into_raw(buffer.into_boxed_slice()) as *mut u8;
146
147 Box::new(Self {
148 data: ptr,
149 capacity,
150 write_pos: AtomicUsize::new(0),
151 next: AtomicPtr::new(std::ptr::null_mut()),
152 })
153 }
154
155 /// Try to append a string, returns offset if successful
156 ///
157 /// ## Safety
158 ///
159 /// This is safe because:
160 /// 1. `fetch_add` atomically reserves a disjoint byte range
161 /// 2. Each thread writes only to its reserved range
162 /// 3. The buffer is pre-allocated and never resized (no Vec metadata race)
163 /// 4. Release fence ensures bytes are visible before returning
164 fn try_append(&self, s: &str) -> Option<u32> {
165 let bytes = s.as_bytes();
166 let len = bytes.len();
167
168 if len == 0 {
169 // Empty strings can share offset 0 with length 0
170 return Some(0);
171 }
172
173 // Atomically reserve space
174 let offset = self.write_pos.fetch_add(len, Ordering::Relaxed);
175
176 // Check if we have room
177 if offset + len > self.capacity {
178 // Rollback reservation (best effort - may waste some space on high contention)
179 // This is safe because the space is simply unused
180 self.write_pos.fetch_sub(len, Ordering::Relaxed);
181 return None;
182 }
183
184 // SAFETY: We have exclusive access to [offset..offset+len] via atomic reservation
185 // The buffer was pre-allocated with `capacity` bytes
186 // No other thread will write to this range
187 unsafe {
188 std::ptr::copy_nonoverlapping(bytes.as_ptr(), self.data.add(offset), len);
189 }
190
191 // Release fence ensures bytes are visible to other threads
192 // before we return the StringRef (which gets published to the hash map)
193 fence(Ordering::Release);
194
195 Some(offset as u32)
196 }
197
198 /// Read a string at offset (lock-free)
199 ///
200 /// ## Safety
201 ///
202 /// Caller must ensure:
203 /// 1. The offset and length were returned by a prior `try_append`
204 /// 2. An Acquire fence was issued after observing the StringRef
205 fn read(&self, offset: u32, len: u32) -> &str {
206 // Acquire fence ensures we see the bytes written by try_append
207 fence(Ordering::Acquire);
208
209 // SAFETY:
210 // - offset and len came from a successful try_append
211 // - The buffer is stable (never moved or reallocated)
212 // - Acquire fence synchronized with Release fence in try_append
213 unsafe {
214 let bytes = std::slice::from_raw_parts(self.data.add(offset as usize), len as usize);
215 // SAFETY: we only store valid UTF-8 from str::as_bytes()
216 std::str::from_utf8_unchecked(bytes)
217 }
218 }
219}
220
221impl Drop for Chunk {
222 fn drop(&mut self) {
223 // Reconstruct and drop the boxed slice
224 unsafe {
225 let slice = std::slice::from_raw_parts_mut(self.data, self.capacity);
226 drop(Box::from_raw(slice as *mut [u8]));
227 }
228 }
229}
230
231/// Chunked append-only storage for strings
232pub struct ChunkedStorage {
233 /// Head of chunk chain
234 head: AtomicPtr<Chunk>,
235 /// Current chunk for new allocations
236 current: AtomicPtr<Chunk>,
237 /// Chunk size
238 chunk_size: usize,
239 /// Number of chunks
240 chunk_count: AtomicU32,
241}
242
243impl ChunkedStorage {
244 fn new(chunk_size: usize) -> Self {
245 let initial = Box::into_raw(Chunk::new(chunk_size));
246 Self {
247 head: AtomicPtr::new(initial),
248 current: AtomicPtr::new(initial),
249 chunk_size,
250 chunk_count: AtomicU32::new(1),
251 }
252 }
253
254 /// Append a string, returns reference
255 fn append(&self, s: &str) -> StringRef {
256 loop {
257 let current = self.current.load(Ordering::Acquire);
258 let chunk = unsafe { &*current };
259
260 // Try to append to current chunk
261 if let Some(offset) = chunk.try_append(s) {
262 let chunk_idx = self.get_chunk_index(current);
263 return StringRef {
264 chunk_idx,
265 offset,
266 length: s.len() as u32,
267 };
268 }
269
270 // Need new chunk
271 self.grow_chunk(current);
272 }
273 }
274
275 /// Grow by adding a new chunk
276 fn grow_chunk(&self, expected_current: *mut Chunk) {
277 let new_chunk = Box::into_raw(Chunk::new(self.chunk_size));
278
279 // Link new chunk to current
280 let current = unsafe { &*expected_current };
281
282 // Try to set next pointer
283 if current
284 .next
285 .compare_exchange(
286 std::ptr::null_mut(),
287 new_chunk,
288 Ordering::AcqRel,
289 Ordering::Relaxed,
290 )
291 .is_ok()
292 {
293 // We successfully linked the new chunk
294 self.current.store(new_chunk, Ordering::Release);
295 self.chunk_count.fetch_add(1, Ordering::Relaxed);
296 } else {
297 // Another thread already grew - free our chunk
298 unsafe {
299 drop(Box::from_raw(new_chunk));
300 }
301 // Update current to the actual new chunk
302 let actual_next = current.next.load(Ordering::Acquire);
303 if !actual_next.is_null() {
304 self.current.store(actual_next, Ordering::Release);
305 }
306 }
307 }
308
309 /// Get chunk by index (lock-free traversal)
310 fn get_chunk(&self, idx: u32) -> Option<&Chunk> {
311 let mut current = self.head.load(Ordering::Acquire);
312 let mut i = 0u32;
313
314 while !current.is_null() {
315 if i == idx {
316 return Some(unsafe { &*current });
317 }
318 current = unsafe { (*current).next.load(Ordering::Acquire) };
319 i += 1;
320 }
321
322 None
323 }
324
325 /// Get index of a chunk pointer
326 fn get_chunk_index(&self, ptr: *mut Chunk) -> u32 {
327 let mut current = self.head.load(Ordering::Acquire);
328 let mut i = 0u32;
329
330 while !current.is_null() {
331 if current == ptr {
332 return i;
333 }
334 current = unsafe { (*current).next.load(Ordering::Acquire) };
335 i += 1;
336 }
337
338 0 // Fallback (shouldn't happen)
339 }
340
341 /// Read a string by reference (lock-free)
342 fn read(&self, string_ref: StringRef) -> Option<&str> {
343 let chunk = self.get_chunk(string_ref.chunk_idx)?;
344 Some(chunk.read(string_ref.offset, string_ref.length))
345 }
346}
347
348impl Drop for ChunkedStorage {
349 fn drop(&mut self) {
350 let mut current = self.head.load(Ordering::Relaxed);
351 while !current.is_null() {
352 let chunk = unsafe { Box::from_raw(current) };
353 current = chunk.next.load(Ordering::Relaxed);
354 }
355 }
356}
357
358/// Hasher for string lookup
359fn hash_string(s: &str) -> u64 {
360 let mut hasher = std::collections::hash_map::DefaultHasher::new();
361 s.hash(&mut hasher);
362 hasher.finish()
363}
364
365/// Lock-free string interner
366pub struct LockFreeInterner {
367 /// Sharded hash maps: string hash -> Symbol
368 shards: [ParkingLotRwLock<HashMap<u64, (Symbol, StringRef)>>; SHARD_COUNT],
369 /// Chunked string storage
370 storage: ChunkedStorage,
371 /// Next symbol ID
372 next_symbol: AtomicU32,
373 /// Reverse mapping: Symbol -> StringRef (for resolve)
374 symbols: RwLock<Vec<StringRef>>,
375 /// Statistics
376 stats: InternerStats,
377}
378
379/// Interner statistics
380#[derive(Default)]
381pub struct InternerStats {
382 pub interned_count: AtomicU32,
383 pub lookup_hits: AtomicU32,
384 pub lookup_misses: AtomicU32,
385 pub resolve_count: AtomicU32,
386}
387
388impl Default for LockFreeInterner {
389 fn default() -> Self {
390 Self::new()
391 }
392}
393
394impl LockFreeInterner {
395 /// Create a new interner
396 pub fn new() -> Self {
397 Self::with_chunk_size(DEFAULT_CHUNK_SIZE)
398 }
399
400 /// Create with custom chunk size
401 pub fn with_chunk_size(chunk_size: usize) -> Self {
402 Self {
403 shards: std::array::from_fn(|_| ParkingLotRwLock::new(HashMap::new())),
404 storage: ChunkedStorage::new(chunk_size),
405 next_symbol: AtomicU32::new(0),
406 symbols: RwLock::new(Vec::new()),
407 stats: InternerStats::default(),
408 }
409 }
410
411 /// Get shard index for a hash
412 #[inline]
413 fn shard_index(&self, hash: u64) -> usize {
414 (hash as usize) % SHARD_COUNT
415 }
416
417 /// Get or intern a string
418 pub fn get_or_intern(&self, s: &str) -> Symbol {
419 let hash = hash_string(s);
420 let shard_idx = self.shard_index(hash);
421
422 // Fast path: read-only lookup
423 {
424 let shard = self.shards[shard_idx].read();
425 if let Some(&(symbol, _)) = shard.get(&hash) {
426 self.stats.lookup_hits.fetch_add(1, Ordering::Relaxed);
427 return symbol;
428 }
429 }
430
431 self.stats.lookup_misses.fetch_add(1, Ordering::Relaxed);
432
433 // Slow path: need to intern
434 let mut shard = self.shards[shard_idx].write();
435
436 // Double-check after acquiring write lock
437 if let Some(&(symbol, _)) = shard.get(&hash) {
438 return symbol;
439 }
440
441 // Allocate symbol and store string
442 let symbol = Symbol(self.next_symbol.fetch_add(1, Ordering::SeqCst));
443 let string_ref = self.storage.append(s);
444
445 shard.insert(hash, (symbol, string_ref));
446
447 // Update reverse mapping
448 {
449 let mut symbols = self.symbols.write().unwrap();
450 // Ensure vector is large enough
451 while symbols.len() <= symbol.0 as usize {
452 symbols.push(StringRef {
453 chunk_idx: 0,
454 offset: 0,
455 length: 0,
456 });
457 }
458 symbols[symbol.0 as usize] = string_ref;
459 }
460
461 self.stats.interned_count.fetch_add(1, Ordering::Relaxed);
462 symbol
463 }
464
465 /// Resolve a symbol to its string (lock-free for reads!)
466 pub fn resolve(&self, symbol: Symbol) -> Option<&str> {
467 if symbol == Symbol::NULL {
468 return None;
469 }
470
471 self.stats.resolve_count.fetch_add(1, Ordering::Relaxed);
472
473 // Read from symbols vector (only takes read lock briefly)
474 let string_ref = {
475 let symbols = self.symbols.read().unwrap();
476 symbols.get(symbol.0 as usize).copied()
477 }?;
478
479 // Storage read is completely lock-free
480 self.storage.read(string_ref)
481 }
482
483 /// Get the number of interned strings
484 pub fn len(&self) -> usize {
485 self.next_symbol.load(Ordering::Relaxed) as usize
486 }
487
488 /// Check if empty
489 pub fn is_empty(&self) -> bool {
490 self.len() == 0
491 }
492
493 /// Get statistics
494 pub fn stats(&self) -> &InternerStats {
495 &self.stats
496 }
497
498 /// Get total memory used by string storage
499 pub fn storage_bytes(&self) -> usize {
500 self.storage.chunk_count.load(Ordering::Relaxed) as usize * self.storage.chunk_size
501 }
502}
503
504// SAFETY: LockFreeInterner is safe to share across threads because:
505// 1. All hash map access is protected by ParkingLotRwLock (sharded)
506// 2. ChunkedStorage uses atomic operations for all shared mutable state
507// 3. Chunk buffers are pre-allocated fixed-size (no Vec metadata races)
508// 4. Memory ordering is correct (Release on write, Acquire on read)
509unsafe impl Send for LockFreeInterner {}
510unsafe impl Sync for LockFreeInterner {}
511
512// SAFETY: ChunkedStorage is safe to share across threads because:
513// 1. write_pos uses atomic fetch_add for disjoint range reservation
514// 2. Chunk buffers are fixed-size pre-allocations (immutable capacity)
515// 3. Chunk chain traversal uses AtomicPtr with proper ordering
516// 4. Each writer has exclusive access to its reserved byte range
517unsafe impl Send for ChunkedStorage {}
518unsafe impl Sync for ChunkedStorage {}
519
520// SAFETY: Chunk contains a raw pointer but is designed for concurrent access:
521// 1. data pointer is stable (never moved after allocation)
522// 2. capacity is immutable after construction
523// 3. write_pos is atomic
524// 4. Each thread writes only to atomically-reserved disjoint ranges
525unsafe impl Send for Chunk {}
526unsafe impl Sync for Chunk {}
527
528#[cfg(test)]
529mod tests {
530 use super::*;
531 use std::sync::Arc;
532 use std::thread;
533
534 #[test]
535 fn test_basic_intern() {
536 let interner = LockFreeInterner::new();
537
538 let s1 = interner.get_or_intern("hello");
539 let s2 = interner.get_or_intern("world");
540 let s3 = interner.get_or_intern("hello");
541
542 // Same string should give same symbol
543 assert_eq!(s1, s3);
544 // Different strings should give different symbols
545 assert_ne!(s1, s2);
546
547 assert_eq!(interner.len(), 2);
548 }
549
550 #[test]
551 fn test_resolve() {
552 let interner = LockFreeInterner::new();
553
554 let s1 = interner.get_or_intern("hello");
555 let s2 = interner.get_or_intern("world");
556
557 assert_eq!(interner.resolve(s1), Some("hello"));
558 assert_eq!(interner.resolve(s2), Some("world"));
559 assert_eq!(interner.resolve(Symbol::NULL), None);
560 }
561
562 #[test]
563 fn test_concurrent_intern() {
564 let interner = Arc::new(LockFreeInterner::new());
565 let mut handles = vec![];
566
567 // 8 threads each intern the same 100 strings
568 for _ in 0..8 {
569 let interner = interner.clone();
570 handles.push(thread::spawn(move || {
571 let mut symbols = Vec::new();
572 for i in 0..100 {
573 let s = format!("string_{}", i);
574 symbols.push(interner.get_or_intern(&s));
575 }
576 symbols
577 }));
578 }
579
580 let results: Vec<_> = handles.into_iter().map(|h| h.join().unwrap()).collect();
581
582 // All threads should get the same symbols for the same strings
583 for i in 0..100 {
584 let first = results[0][i];
585 for result in &results[1..] {
586 assert_eq!(result[i], first, "Symbol mismatch for string_{}", i);
587 }
588 }
589
590 // Should have exactly 100 unique strings
591 assert_eq!(interner.len(), 100);
592 }
593
594 #[test]
595 fn test_concurrent_resolve() {
596 let interner = Arc::new(LockFreeInterner::new());
597
598 // Intern some strings first
599 let symbols: Vec<_> = (0..100)
600 .map(|i| interner.get_or_intern(&format!("string_{}", i)))
601 .collect();
602
603 let mut handles = vec![];
604
605 // 8 threads each resolve all strings many times
606 for _ in 0..8 {
607 let interner = interner.clone();
608 let symbols = symbols.clone();
609 handles.push(thread::spawn(move || {
610 for _ in 0..100 {
611 for (i, &symbol) in symbols.iter().enumerate() {
612 let resolved = interner.resolve(symbol);
613 assert_eq!(resolved, Some(format!("string_{}", i)).as_deref());
614 }
615 }
616 }));
617 }
618
619 for handle in handles {
620 handle.join().unwrap();
621 }
622
623 // Should have many resolve operations
624 assert!(interner.stats().resolve_count.load(Ordering::Relaxed) > 0);
625 }
626
627 #[test]
628 fn test_chunk_growth() {
629 // Small chunk size to force growth
630 let interner = LockFreeInterner::with_chunk_size(1024);
631
632 // Intern enough strings to fill multiple chunks
633 for i in 0..1000 {
634 let s = format!("this_is_a_longer_string_number_{:05}", i);
635 interner.get_or_intern(&s);
636 }
637
638 // Should have grown to multiple chunks
639 assert!(interner.storage.chunk_count.load(Ordering::Relaxed) > 1);
640
641 // All strings should still be resolvable
642 for i in 0..1000 {
643 let s = format!("this_is_a_longer_string_number_{:05}", i);
644 let symbol = interner.get_or_intern(&s);
645 assert_eq!(interner.resolve(symbol), Some(s.as_str()));
646 }
647 }
648
649 #[test]
650 fn test_symbol_comparison() {
651 let interner = LockFreeInterner::new();
652
653 let s1 = interner.get_or_intern("hello");
654 let s2 = interner.get_or_intern("world");
655 let s3 = interner.get_or_intern("hello");
656
657 // Symbol comparison is O(1) - just comparing u32
658 assert_eq!(s1, s3);
659 assert_ne!(s1, s2);
660
661 // Can use as hash map key
662 let mut map = HashMap::new();
663 map.insert(s1, "value1");
664 map.insert(s2, "value2");
665
666 assert_eq!(map.get(&s3), Some(&"value1"));
667 }
668
669 #[test]
670 fn test_stats() {
671 let interner = LockFreeInterner::new();
672
673 // Intern some strings
674 interner.get_or_intern("hello");
675 interner.get_or_intern("world");
676 interner.get_or_intern("hello"); // Hit
677
678 let stats = interner.stats();
679 assert_eq!(stats.interned_count.load(Ordering::Relaxed), 2);
680 assert!(stats.lookup_hits.load(Ordering::Relaxed) >= 1);
681 assert!(stats.lookup_misses.load(Ordering::Relaxed) >= 2);
682 }
683
684 #[test]
685 fn test_concurrent_intern_and_resolve() {
686 let interner = Arc::new(LockFreeInterner::new());
687 let mut handles = vec![];
688
689 // Writers: intern new strings
690 for writer_id in 0..4 {
691 let interner = interner.clone();
692 handles.push(thread::spawn(move || {
693 for i in 0..100 {
694 let s = format!("writer_{}_string_{}", writer_id, i);
695 interner.get_or_intern(&s);
696 }
697 }));
698 }
699
700 // Readers: continuously resolve existing strings
701 for reader_id in 0..4 {
702 let interner = interner.clone();
703 handles.push(thread::spawn(move || {
704 for i in 0..1000 {
705 // Intern and immediately resolve
706 let s = format!("common_string_{}", (i + reader_id) % 10);
707 let symbol = interner.get_or_intern(&s);
708 let resolved = interner.resolve(symbol);
709 assert!(resolved.is_some());
710 }
711 }));
712 }
713
714 for handle in handles {
715 handle.join().unwrap();
716 }
717 }
718
719 #[test]
720 fn test_empty_string() {
721 let interner = LockFreeInterner::new();
722
723 let s = interner.get_or_intern("");
724 assert_eq!(interner.resolve(s), Some(""));
725 }
726
727 #[test]
728 fn test_long_strings() {
729 let interner = LockFreeInterner::new();
730
731 let long_string: String = (0..10000).map(|_| 'x').collect();
732 let symbol = interner.get_or_intern(&long_string);
733 assert_eq!(interner.resolve(symbol), Some(long_string.as_str()));
734 }
735
736 #[test]
737 fn test_unicode_strings() {
738 let interner = LockFreeInterner::new();
739
740 let s1 = interner.get_or_intern("hello 世界 🌍");
741 let s2 = interner.get_or_intern("émojis: 🎉🎊🎁");
742
743 assert_eq!(interner.resolve(s1), Some("hello 世界 🌍"));
744 assert_eq!(interner.resolve(s2), Some("émojis: 🎉🎊🎁"));
745 }
746
747 #[test]
748 fn test_shard_distribution() {
749 let interner = LockFreeInterner::new();
750
751 // Intern many strings
752 for i in 0..10000 {
753 interner.get_or_intern(&format!("key_{}", i));
754 }
755
756 // Check that strings are distributed across shards
757 let mut non_empty = 0;
758 for shard in &interner.shards {
759 if !shard.read().is_empty() {
760 non_empty += 1;
761 }
762 }
763
764 // With 10000 strings across 256 shards, should use many shards
765 assert!(
766 non_empty > 200,
767 "Expected better shard distribution: {} non-empty",
768 non_empty
769 );
770 }
771}