interned_string/
lib.rs

1use std::{fmt::Debug, ops::Deref};
2use storage::{IStringKey, ThreadLocalReader, SHARED_STORAGE, THREAD_LOCAL_READER};
3
4mod storage;
5
6/// An immutable and interned string.
7/// 
8/// Reading an `IString`'s contents is very fast, lock-free and wait-free.
9/// It can be shared and read from any number of threads.
10/// It scales linearly with the number of reading threads.
11/// 
12/// `IString` provides `Hash` and `Eq` implementations that run in O(1),
13/// perfect for an high performance `HashMap<IString, _>`
14/// 
15/// The tradeoff is that creating a new `IString` is comparatively slower :
16/// - Creating a new `IString` with a string that is already interned is fast and lock-free.
17/// - Creating a new `IString` with a string that isn't already interned is slower.
18///   It acquires a global lock and waits for all readers to finish reading.
19#[derive(Eq, PartialEq, Ord, Hash)]
20pub struct IString {
21    pub(crate) key: IStringKey
22}
23
24// Indispensable traits impl : From, Drop, Deref
25
26impl From<String> for IString {
27    /// Intern the given `String` by consuming it. Its allocation is reused.
28    /// 
29    /// This operation runs in O(N) where N is the `string.len()`.
30    /// If the string was already interned, this operation is lock-free.
31    /// Otherwise, a global lock is acquired.
32    /// 
33    /// # Example
34    /// 
35    /// ```
36    /// use interned_string::IString;
37    /// 
38    /// let my_istring = IString::from("hello".to_string());
39    /// ```
40    #[inline]
41    fn from(string: String) -> Self {
42        Self {
43            // could block
44            key: SHARED_STORAGE.insert_or_retain(string)
45        }
46    }
47}
48
49impl From<&str> for IString {
50    /// Intern the given `&str` by cloning its contents.
51    /// 
52    /// This operation runs in O(N) where N is the `string.len()`.
53    /// If the string was already interned, this operation is lock-free.
54    /// Otherwise, a global lock is acquired.
55    /// 
56    /// # Example
57    /// 
58    /// ```
59    /// use interned_string::IString;
60    /// 
61    /// let my_istring = IString::from("hello");
62    /// ```
63    #[inline]
64    fn from(string: &str) -> Self {
65        Self {
66            // could block
67            key: SHARED_STORAGE.insert_or_retain(String::from(string))
68        }
69    }
70}
71
72impl Drop for IString {
73    #[inline]
74    fn drop(&mut self) {
75        THREAD_LOCAL_READER.with(|tl_reader| {
76            tl_reader.release(self);
77        });
78    }
79}
80
81impl Deref for IString {
82    type Target = str;
83    
84    /// Returns a reference to the string's contents.
85    /// 
86    /// This operation runs in O(1) and is lock-free.
87    /// 
88    /// # Example
89    /// ```
90    /// use interned_string::Intern;
91    /// 
92    /// fn foo(string: &str) {
93    ///     println!("{string}")
94    /// }
95    /// 
96    /// let my_istring = "hello".intern();
97    /// // implicit call to Deref::deref
98    /// foo(&my_istring);
99    /// ```
100    #[inline]
101    fn deref(&self) -> &Self::Target {
102        THREAD_LOCAL_READER.with(|reader: &ThreadLocalReader| {
103            reader.read(self)
104        })
105    }
106}
107
108impl AsRef<str> for IString {
109    /// Returns a reference to the string's contents.
110    /// 
111    /// This operation runs in O(1) and is lock-free.
112    /// 
113    /// # Example
114    /// ```
115    /// use interned_string::Intern;
116    /// 
117    /// let my_istring = "Hello, World!".intern();
118    /// let (hello, world) = my_istring.as_ref().split_at(5);
119    /// ```
120    #[inline]
121    fn as_ref(&self) -> &str {
122        THREAD_LOCAL_READER.with(|tl_reader: &ThreadLocalReader| {
123            tl_reader.read(self)
124        })
125    }
126}
127
128// Common traits impl that can't be derived : Clone, PartialOrd, Debug, Display, Default
129
130impl Clone for IString {
131    /// Returns a copy of the `IString`.
132    /// 
133    /// This operation runs in O(1) and is lock-free.
134    #[inline]
135    fn clone(&self) -> Self {
136        THREAD_LOCAL_READER.with(|reader: &ThreadLocalReader| {
137            reader.retain(self.key)
138        });
139
140        Self { key: self.key }
141    }
142}
143
144impl PartialOrd for IString {
145    #[inline]
146    fn lt(&self, other: &Self) -> bool {
147        self.deref().lt(other.deref())
148    }
149
150    #[inline]
151    fn le(&self, other: &Self) -> bool {
152        self.deref().le(other.deref())
153    }
154
155    #[inline]
156    fn gt(&self, other: &Self) -> bool {
157        self.deref().gt(other.deref())
158    }
159
160    #[inline]
161    fn ge(&self, other: &Self) -> bool {
162        self.deref().ge(other.deref())
163    }
164    
165    #[inline]
166    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
167        self.deref().partial_cmp(other.deref())
168    }
169}
170
171impl Debug for IString {
172    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
173        f.debug_tuple("IString")
174         .field(&self.deref())
175         .finish()
176    }
177}
178
179impl std::fmt::Display for IString {
180    #[inline]
181    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
182        f.write_str(self)
183    }
184}
185
186impl Default for IString {
187    /// Creates an empty `IString`.
188    #[inline]
189    fn default() -> Self {
190        Self::from(String::default())
191    }
192}
193
194// Convenience trait Intern
195
196pub trait Intern {
197    fn intern(self) -> IString where Self: Sized;
198}
199
200impl Intern for String {
201    /// Intern the given `String` by consuming it. Its allocation is reused.
202    /// 
203    /// This operation runs in O(N) where N is the `string.len()`.
204    /// If the string was already interned, this operation is lock-free.
205    /// Otherwise, a global lock is acquired.
206    /// 
207    /// # Example
208    /// 
209    /// ```
210    /// use interned_string::Intern;
211    /// 
212    /// let my_istring = "hello".to_string().intern();
213    /// ```
214    #[inline]
215    fn intern(self) -> IString {
216        IString::from(self)
217    }
218}
219
220impl Intern for &str {
221    /// Intern the given `&str` by cloning its contents.
222    /// 
223    /// This operation runs in O(N) where N is the `string.len()`.
224    /// If the string was already interned, this operation is lock-free.
225    /// Otherwise, a global lock is acquired.
226    /// 
227    /// # Example
228    /// 
229    /// ```
230    /// use interned_string::Intern;
231    /// 
232    /// let my_istring = "hello".intern();
233    /// ```
234    #[inline]
235    fn intern(self) -> IString {
236        IString::from(self)
237    }
238}
239
240// Garbage collection
241
242impl IString {
243    /// Immediately frees all the interned strings that are no longer used.
244    /// 
245    /// Call this function when you wish to immediately reduce memory usage,
246    /// at the cost of some CPU time. 
247    /// This will acquire a global lock and wait for all readers to finish reading.
248    /// It's recommended to only call this function when your program has nothing else to do.
249    /// 
250    /// Using this function is optional. Memory is always eventually freed.
251    pub fn collect_garbage_now() {
252        SHARED_STORAGE.writer.lock().unwrap().collect_garbage();
253    }
254}
255
256#[cfg(feature = "serde")]
257mod feature_serde {
258    use serde::{de::Visitor, Deserialize, Serialize};
259    use crate::IString;
260
261    impl Serialize for IString {
262        fn serialize<S: serde::Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
263            serializer.serialize_str(std::ops::Deref::deref(&self))
264        }
265    }
266    
267    impl<'de> Deserialize<'de> for IString {
268        fn deserialize<D: serde::Deserializer<'de>>(deserializer: D) -> Result<Self, D::Error> {
269            deserializer.deserialize_string(IStringVisitor)
270        }
271    }
272    
273    struct IStringVisitor;
274    
275    impl<'de> Visitor<'de> for IStringVisitor {
276        type Value = IString;
277    
278        fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result {
279            formatter.write_str("a string")
280        }
281    
282        fn visit_string<E: serde::de::Error>(self, string: String) -> Result<Self::Value, E> {
283            // does not need to allocate a new string
284            Ok(IString::from(string))
285        }
286    
287        fn visit_str<E: serde::de::Error>(self, slice: &str) -> Result<Self::Value, E> {
288            // less performant, will allocate
289            Ok(IString::from(slice))
290        }
291    }
292}
293
294// tests
295
296#[cfg(test)]
297mod tests {
298    use std::{ops::Deref, sync::Mutex};
299    use radix_trie::TrieCommon;
300
301    use super::*;
302    use crate::storage::SHARED_STORAGE;
303
304    #[test]
305    fn it_creates_and_removes_1_string() {
306        with_exclusive_use_of_shared_storage(|| {
307            let my_istring1 = "hello".intern();
308            assert!(my_istring1.deref() == "hello");
309
310            assert_string_count_in_storage(1);
311            assert_string_is_stored_with_key("hello", my_istring1.key);
312
313            drop(my_istring1);
314
315            assert_string_count_in_storage(1);
316            assert_string_is_still_stored("hello");
317
318            let my_istring2 = "another".to_string().intern();
319            assert!(my_istring2.deref() == "another");
320
321            assert_string_count_in_storage(1);
322            assert_string_is_stored_with_key("another", my_istring2.key);
323            assert_string_is_not_stored("hello")
324        });
325    }
326
327    #[test]
328    fn it_creates_and_removes_1_shared_string() {
329        with_exclusive_use_of_shared_storage(|| {
330            let my_istring1 = IString::from("hello");
331            let my_istring2 = IString::from("hello");
332            assert!(my_istring1.deref() == "hello");
333            assert!(my_istring2.deref() == "hello");
334            assert!(my_istring1.key == my_istring2.key);
335
336            assert_string_count_in_storage(1);
337            assert_string_is_stored_with_key("hello", my_istring1.key);
338
339            drop(my_istring1);
340
341            assert_string_count_in_storage(1);
342            assert_string_is_stored_with_key("hello", my_istring2.key);
343
344            drop(my_istring2);
345
346            assert_string_count_in_storage(1);
347            assert_string_is_still_stored("hello");
348        });
349    }
350
351    #[test]
352    fn it_creates_and_removes_3_strings() {
353        with_exclusive_use_of_shared_storage(|| {
354            let my_istring1 = IString::from("hello");
355            let my_istring2 = IString::from("world");
356            let my_istring3 = IString::from("howdy");
357            assert!(my_istring1.deref() == "hello");
358            assert!(my_istring2.deref() == "world");
359            assert!(my_istring3.deref() == "howdy");
360            assert!(my_istring1.key != my_istring2.key);
361            assert!(my_istring2.key != my_istring3.key);
362
363            assert_string_count_in_storage(3);
364            assert_string_is_stored_with_key("hello", my_istring1.key);
365            assert_string_is_stored_with_key("world", my_istring2.key);
366            assert_string_is_stored_with_key("howdy", my_istring3.key);
367            assert_string_is_not_stored("hola");
368
369            drop(my_istring1);
370            drop(my_istring2);
371
372            assert_string_count_in_storage(3);
373            assert_string_is_still_stored("hello");
374            assert_string_is_still_stored("world");
375            assert_string_is_stored_with_key("howdy", my_istring3.key);
376            assert_string_is_not_stored("hola");
377
378            // it should reuse the storage
379            let my_istring1bis = IString::from("hello");
380            assert!(my_istring1bis.deref() == "hello");
381
382            // and not clean up the storage of "world" yet
383            assert_string_count_in_storage(3);
384            assert_string_is_stored_with_key("hello", my_istring1bis.key);
385            assert_string_is_stored_with_key("howdy", my_istring3.key);
386            assert_string_is_still_stored("world");
387
388            let my_istring4 = IString::from("another");
389            assert!(my_istring4.deref() == "another");
390
391            // creating a new string should cause the storage of unused strings to be cleaned up
392            assert_string_is_stored_with_key("hello", my_istring1bis.key);
393            assert_string_is_stored_with_key("howdy", my_istring3.key);
394            assert_string_is_stored_with_key("another", my_istring4.key);
395            assert_string_is_not_stored("world");
396            assert_string_count_in_storage(3);
397        });
398    }
399
400    #[test]
401    fn it_is_send() {
402        fn assert_send<T: Send>() {}
403        assert_send::<IString>();
404    }
405
406    #[test]
407    fn it_is_sync() {
408        fn assert_sync<T: Sync>() {}
409        assert_sync::<IString>();
410    }
411
412    #[cfg(feature = "serde")]
413    #[test]
414    fn it_serializes() {
415        with_exclusive_use_of_shared_storage(|| {
416            use serde::Serialize;
417
418            #[derive(Serialize)]
419            struct ExampleDTO {
420                favorite_dish: IString
421            }
422
423            let dto = ExampleDTO { favorite_dish: "pasta".intern() };
424
425            assert_eq!(serde_json::to_string(&dto).unwrap(), "{\"favorite_dish\":\"pasta\"}");
426        });
427    }
428
429    #[cfg(feature = "serde")]
430    #[test]
431    fn it_deserializes() {
432        with_exclusive_use_of_shared_storage(|| {
433            use serde::Deserialize;
434
435            #[derive(Deserialize, PartialEq, Debug)]
436            struct ExampleDTO {
437                favorite_dish: IString
438            }
439
440            let input = "{\"favorite_dish\":\"pasta\"}";
441
442            let dto: Result<ExampleDTO, _> = serde_json::from_str(input);
443
444            assert_eq!(dto.unwrap(), ExampleDTO { favorite_dish: "pasta".into() });
445        });
446    }
447
448    fn assert_string_count_in_storage(count: usize) {
449        let guard = SHARED_STORAGE.read_handle.lock().unwrap();
450        let read_handle = guard.enter().unwrap();
451        assert_eq!(read_handle.map.len(), count);
452        assert_eq!(read_handle.trie.len(), count);
453    }
454
455    fn assert_string_is_still_stored(string: &str) {
456        let guard = SHARED_STORAGE.read_handle.lock().unwrap();
457        let read_handle = guard.enter().unwrap();
458        let key = read_handle.trie.get(&string.into());
459        if let Some(key) = key {
460            assert!(read_handle.map.get(&key).unwrap().inner.deref() == string);
461        } else {
462            assert!(false, "the string is not in the trie");
463        }
464    }
465
466    fn assert_string_is_stored_with_key(string: &str, key: u32) {
467        let guard = SHARED_STORAGE.read_handle.lock().unwrap();
468        let read_handle = guard.enter().unwrap();
469        assert!(read_handle.map.get(&key).unwrap().inner.deref() == string);
470        assert_eq!(read_handle.trie.get(&string.into()), Some(&key));
471    }
472
473    fn assert_string_is_not_stored(string: &str) {
474        let guard = SHARED_STORAGE.read_handle.lock().unwrap();
475        let read_handle = guard.enter().unwrap();
476        assert_eq!(read_handle.trie.get(&string.into()), None);
477    }
478
479    static SHARED_STORAGE_MUTEX: Mutex<()> = Mutex::new(());
480
481    fn with_exclusive_use_of_shared_storage(closure: fn()) {
482        let guard = SHARED_STORAGE_MUTEX.lock().expect("test lock is not poisoned");
483        closure();
484
485        // reset the writer for the next test
486        let mut writer = SHARED_STORAGE.writer.lock().unwrap();
487        writer.drain_channel_ops();
488        writer.write_handle.append(storage::StringStorageOp::DropUnusedStrings);
489        writer.write_handle.publish();
490        drop(writer);
491        drop(guard);
492    }
493}