Skip to main content

generic_static_cache/
lib.rs

1#![cfg_attr(docsrs, feature(doc_cfg))]
2#![allow(named_asm_labels)]
3#![cfg_attr(
4    any(
5        target_arch = "x86_64",
6        target_arch = "aarch64",
7        target_arch = "arm",
8        target_arch = "x86"
9    ),
10    no_std
11)]
12// TODO: Properly test data for same type from different compilation units
13// TODO: More platforms
14// TODO: Benches
15
16//! Quoting the [Rust Reference](https://doc.rust-lang.org/reference/items/static-items.html#statics--generics):
17//!
18//! > A static item defined in a generic scope (for example in a blanket or default implementation)
19//! > will result in exactly one static item being defined, as if the static definition was pulled
20//! > out of the current scope into the module. There will not be one item per monomorphization.
21//!
22//! One way to work around this is to use a `HashMap<TypeId,Data>`. This is a simple & usually the best solution.
23//! If lookup performance is important, you can skip hashing the `TypeId` for minor gains as it
24//! [already contains](https://github.com/rust-lang/rust/blob/eeff92ad32c2627876112ccfe812e19d38494087/library/core/src/any.rs#L645)
25//! a good-quality hash.
26//!
27//! This crate aims to further fully remove the lookup by allocating the storage using inline
28//! assembly.
29//!
30//! # ⚠ Caveats ⚠
31//!
32//! THIS PLACE IS NOT A PLACE OF HONOR.  
33//! NO HIGHLY ESTEEMED DEED IS COMMEMORATED HERE.  
34//! NOTHING VALUED IS HERE.  
35//! WHAT IS HERE WAS DANGEROUS AND REPULSIVE TO US.  
36//! THE DANGER IS IN A PARTICULAR LOCATION.  
37//! THE DANGER IS STILL PRESENT, IN YOUR TIME, AS IT WAS IN OURS.  
38//!
39//! Different compilation units may access different instances of the data
40//! (at least without `share-generics`).
41//!
42//! Supported targets are **x86-64**, **aarch64**, **arm** and **x86**;
43//! on other targets, this crate falls back to a hashmap.
44//!
45//! The linker may not use a smaller align for the `data` section than that of
46//! the types used with `global`.
47//!
48//! This crate isn't as well-tested as is should be.
49//!
50//! # Usage
51//! The `generic_static` macro defines a static inside of a function,
52//! with every instantiation of the function having it's own instance of the static:
53//!
54//! ```rust
55//! # use core::sync::atomic::{AtomicI32, Ordering};
56//! # use generic_static_cache::generic_static;
57//! fn get_and_inc<T>() -> i32 {
58//!     generic_static!{
59//!         static COUNTER: &AtomicI32 = &AtomicI32::new(1);
60//!     }
61//!     COUNTER.fetch_add(1, Ordering::Relaxed)
62//! }
63//! assert_eq!(get_and_inc::<bool>(), 1);
64//! assert_eq!(get_and_inc::<bool>(), 2);
65//! assert_eq!(get_and_inc::<String>(), 1);
66//! assert_eq!(get_and_inc::<bool>(), 3);
67//! ```
68//!
69//! Underlying this is the `global::<T>()` function,
70//! which allocates a shared global static for each type it's used with.
71
72extern crate alloc;
73
74pub use alloc::boxed::Box;
75pub use bytemuck;
76
77/// Access a global instance of `T`, zero-initialized at program start.
78///
79/// For types that cannot be zero-initialized, use [`non_zeroable_global`].
80///
81/// # Example
82/// ```
83/// # use core::sync::atomic::{AtomicI32, Ordering::Relaxed};
84/// # use generic_static_cache::bytemuck;
85/// struct MyType(AtomicI32);
86/// unsafe impl bytemuck::Zeroable for MyType {}
87///
88/// assert_eq!(generic_static_cache::global::<MyType>().0.load(Relaxed), 0);
89/// generic_static_cache::global::<MyType>().0.store(1, Relaxed);
90/// assert_eq!(generic_static_cache::global::<MyType>().0.load(Relaxed), 1);
91/// ```
92pub fn global<T: bytemuck::Zeroable + Sync + 'static>() -> &'static T {
93    assert!(core::mem::align_of::<T>() <= 4 * 1024);
94    #[cfg(any(
95        target_arch = "x86_64",
96        target_arch = "aarch64",
97        target_arch = "arm",
98        target_arch = "x86"
99    ))]
100    {
101        unsafe {
102            // Tested both with position-independent code and with -C relocation-model=static
103
104            // Reserve space in writable memory
105            core::arch::asm!(
106                ".pushsection .data",
107                ".balign {align}",
108                "global_static_{id}:",
109                ".skip {size}",
110                ".popsection",
111                id = sym global::<T>,
112                align = const core::mem::align_of::<T>(),
113                size = const core::mem::size_of::<T>(),
114                options(nomem)
115            );
116            let addr: usize;
117            #[cfg(target_arch = "x86_64")]
118            {
119                core::arch::asm!(
120                    "lea {addr}, [rip+global_static_{id}]",
121                    addr = out(reg) addr,
122                    id = sym global::<T>,
123                    options(pure, nomem)
124                );
125            }
126            #[cfg(target_arch = "aarch64")]
127            {
128                core::arch::asm!(
129                    "adrp {addr}, global_static_{id}",
130                    "add {addr}, {addr}, :lo12:global_static_{id}",
131                    addr = out(reg) addr,
132                    id = sym global::<T>,
133                    options(pure, nomem)
134                );
135            }
136            #[cfg(target_arch = "arm")]
137            {
138                core::arch::asm!(
139                    "ldr {addr}, =global_static_{id}",
140                    addr = out(reg) addr,
141                    id = sym global::<T>,
142                    options(pure, nomem)
143                );
144            }
145            #[cfg(target_arch = "x86")]
146            {
147                core::arch::asm!(
148                    // AT&T syntax as intel syntax seems to throw an error
149                    // when using two symbols in the memory operand
150                    "call 1f",
151                    "1: pop {addr}",
152                    "lea global_static_{id}-1b({addr}), {addr}",
153                    addr = out(reg) addr,
154                    id = sym global::<T>,
155                    options(pure, nomem, att_syntax)
156                );
157            }
158            &*(addr as *const _)
159        }
160    }
161    #[cfg(not(any(
162        target_arch = "x86_64",
163        target_arch = "aarch64",
164        target_arch = "arm",
165        target_arch = "x86"
166    )))]
167    {
168        pub(crate) struct SyncWrapper(*const u8);
169        unsafe impl Sync for SyncWrapper {}
170        unsafe impl Send for SyncWrapper {}
171        use std::any::TypeId;
172        static MAP: std::sync::RwLock<TypeIdMap<SyncWrapper>> =
173            std::sync::RwLock::new(TypeIdMap::with_hasher(NoOpTypeIdBuildHasher));
174        {
175            let guard = MAP.read().unwrap();
176            if let Some(value) = guard.get(&TypeId::of::<T>()) {
177                return unsafe { &*(value.0 as *const T) };
178            }
179        }
180        let mut guard = MAP.write().unwrap();
181        let value = guard
182            .entry(TypeId::of::<T>())
183            .or_insert(SyncWrapper(
184                Box::into_raw(Box::new(<T as bytemuck::Zeroable>::zeroed())) as *const u8,
185            ));
186        unsafe { &*(value.0 as *const T) }
187    }
188}
189
190/// Access a generic global that can contain non-zeroable types.
191/// This is done via an indirection to the heap, so use
192/// [`global`] if this is not needed.
193///
194/// This doesn't interfere with data accessed with [`global`].
195///
196/// # Example
197/// ```
198/// # use generic_static_cache::non_zeroable_global;
199/// #[derive(PartialEq, Debug)]
200/// struct MyType;
201///
202/// assert_eq!(non_zeroable_global::get::<MyType>(), None);
203/// let _ = non_zeroable_global::init(MyType);
204/// assert_eq!(non_zeroable_global::get::<MyType>(), Some(&MyType));
205// TODO: Add fallback
206#[cfg(any(
207    target_arch = "x86_64",
208    target_arch = "aarch64",
209    target_arch = "arm",
210    target_arch = "x86"
211))]
212pub mod non_zeroable_global {
213    use super::*;
214
215    // TODO: Remove one layer of indirection on unsupported platforms
216
217    /// Wrapper to prevent interfering with the user's `direct` calls
218    struct Heap<T>(core::sync::atomic::AtomicPtr<T>);
219    unsafe impl<T> bytemuck::Zeroable for Heap<T> {}
220    unsafe impl<T: Sync> Sync for Heap<T> {}
221
222    #[derive(Debug)]
223    pub struct AlreadyInitialized;
224
225    /// Initialize the global initiable storage of type `Type`.
226    ///
227    /// If called multiple times, only the first call will succeed.
228    ///
229    /// [`global`]: crate::global
230    pub fn init<T: Sync + 'static>(data: T) -> Result<(), AlreadyInitialized> {
231        use core::sync::atomic::Ordering;
232        let boxed = Box::into_raw(Box::new(data));
233        match global::<Heap<T>>().0.compare_exchange(
234            core::ptr::null_mut(),
235            boxed,
236            Ordering::SeqCst,
237            Ordering::SeqCst,
238        ) {
239            Ok(_) => Ok(()),
240            Err(_) => {
241                unsafe {
242                    drop(Box::from_raw(boxed));
243                }
244                Err(AlreadyInitialized)
245            }
246        }
247    }
248
249    /// Access the global initiable storage of type `Type`.
250    ///
251    /// [`global`]: crate::global
252    pub fn get<T: Sync + 'static>() -> Option<&'static T> {
253        use core::sync::atomic::Ordering;
254        let data = global::<Heap<T>>().0.load(Ordering::SeqCst);
255        if data.is_null() {
256            None
257        } else {
258            Some(unsafe { &*data })
259        }
260    }
261
262    /// Initialize & access the global initiable storage of type `Type`.
263    ///
264    /// If this is called multiple times simultaneously,
265    /// the `cons` argument of multiple invocations may be called,
266    /// but only one result will be used.
267    ///
268    /// [`global`]: crate::global
269    pub fn get_or_init<T: Sync + 'static>(cons: impl Fn() -> T) -> &'static T {
270        use core::sync::atomic::Ordering;
271        let data = global::<Heap<T>>().0.load(Ordering::SeqCst);
272        if data.is_null() {
273            let _ = init::<_>(cons());
274            get::<_>().unwrap()
275        } else {
276            unsafe { &*data }
277        }
278    }
279}
280
281/// Declare a static variable that is not shared across different monomorphizations
282/// of the containing functions. Its type must be a shared reference to a [`Sync`] type.
283///
284/// If this is executed for the first time in multiple threads simultaneously,
285/// the initializing expression may get executed multiple times.
286///
287/// # Example
288/// ```
289/// # use std::sync::Mutex;
290/// # use generic_static_cache::generic_static;
291/// fn generic_function<T>() {
292///     generic_static!{
293///         static NAME: &Mutex<String> = &Mutex::new("Ferris".to_string());
294///     }
295/// # /*
296///     …
297/// # */
298/// }
299/// ```
300#[cfg(any(
301    target_arch = "x86_64",
302    target_arch = "aarch64",
303    target_arch = "arm",
304    target_arch = "x86"
305))]
306#[macro_export]
307macro_rules! generic_static {
308    {static $ident:ident $(: &$type:ty)? = &$init:expr;} => {
309        #[allow(non_snake_case)]
310        let $ident $(: &'static $type)? = {
311            let init = ||$init;
312            fn assert_sync<T: Sync>(_: &impl FnOnce() -> T) {}
313            assert_sync(&init);
314
315            // Use empty closure to create a new type to use as a unique key,
316            // use reference to initializer to infer type of static data
317            fn make<Key: 'static, Value: Sync + 'static>(_: Key, _: &impl FnOnce()->Value)
318            -> &'static ::core::sync::atomic::AtomicPtr<Value> {
319                struct Holder<T, D> {
320                    _marker: core::marker::PhantomData<T>,
321                    value: core::sync::atomic::AtomicPtr<D>
322                }
323                unsafe impl<T, D> $crate::bytemuck::Zeroable for Holder<T,D>{}
324                unsafe impl<T, D> Sync for Holder<T,D>{}
325                &$crate::global::<Holder<Key, Value>>().value
326            }
327            let ptr = make(||(), &init);
328
329            let data = ptr.load(::core::sync::atomic::Ordering::SeqCst);
330            if data.is_null() {
331                // Need to call initializer
332                // This can be called multiple times if executed for the first time
333                // in multiple threads simultaneously!
334                let boxed = $crate::Box::into_raw($crate::Box::new(init()));
335                if ptr
336                    .compare_exchange(
337                        ::core::ptr::null_mut(),
338                        boxed,
339                        ::core::sync::atomic::Ordering::SeqCst,
340                        ::core::sync::atomic::Ordering::SeqCst,
341                    )
342                    .is_err()
343                {
344                    // Was simultaneously initialized by another thread
345                    unsafe {
346                        drop($crate::Box::from_raw(boxed));
347                    }
348                }
349                unsafe { &*boxed }
350            } else {
351                unsafe { &*data }
352            }
353        };
354    };
355}
356
357/// Declare a static variable that is not shared across different monomorphizations
358/// of the containing functions. Its type must be a shared reference to a [`Sync`] type.
359///
360/// If this is executed for the first time in multiple threads simultaneously,
361/// the initializing expression may get executed multiple times.
362///
363/// # Example
364/// ```
365/// # use std::sync::Mutex;
366/// # use generic_static_cache::generic_static;
367/// fn generic_function<T>() {
368///     generic_static!{
369///         static NAME: &Mutex<String> = &Mutex::new("Ferris".to_string());
370///     }
371/// # /*
372///     …
373/// # */
374/// }
375/// ```
376#[cfg(not(any(
377    target_arch = "x86_64",
378    target_arch = "aarch64",
379    target_arch = "arm",
380    target_arch = "x86"
381)))]
382#[macro_export]
383macro_rules! generic_static {
384    // TODO: Use a RwLock<*mut u8> to remove the restrictions on type inference
385    // and usage of outer types.
386    {static $ident:ident $(: &$type:ty)? = &$init:expr;} => {
387        #[allow(non_snake_case)]
388        let $ident $(: &'static $type)? = 'block: {
389            struct SyncWrapper(*const u8);
390            unsafe impl Sync for SyncWrapper {}
391            unsafe impl Send for SyncWrapper {}
392            fn id<T: 'static>(_: T) -> core::any::TypeId {
393                core::any::TypeId::of::<T>()
394            }
395            // Each closure expression defines a closure type [expr.closure.intro].
396            // As closures can depend on generic types from a surrounding items,
397            // closures in different instantiations of outer generics must have different types
398            // and therefore must be different closure expressions.
399            // The types defined by closure expressions are unique [expr.closure.unique-type].
400            let id = id(||());
401            static MAP: ::std::sync::RwLock<$crate::TypeIdMap<SyncWrapper>> =
402                ::std::sync::RwLock::new($crate::TypeIdMap::with_hasher(
403                    $crate::NoOpTypeIdBuildHasher,
404                ));
405            {
406                let guard = MAP.read().unwrap();
407                if let Some(value) = guard.get(&id) {
408                    break 'block unsafe { &*(value.0 as *const _) };
409                }
410            }
411            let mut guard = MAP.write().unwrap();
412            let value = guard
413                .entry(id)
414                .or_insert(SyncWrapper(
415                    Box::into_raw(Box::new($init)) as *const u8,
416                ));
417            unsafe { &*(value.0 as *const _) }
418        };
419
420    };
421}
422
423#[cfg(not(any(
424    target_arch = "x86_64",
425    target_arch = "aarch64",
426    target_arch = "arm",
427    target_arch = "x86"
428)))]
429pub use fallback::*;
430
431#[cfg(not(any(
432    target_arch = "x86_64",
433    target_arch = "aarch64",
434    target_arch = "arm",
435    target_arch = "x86"
436)))]
437mod fallback {
438    use core::any::TypeId;
439    use core::hash::{BuildHasher, Hasher};
440    use std::collections::HashMap;
441
442    /// Fast type map as a fallback.
443    pub type TypeIdMap<T> = HashMap<TypeId, T, NoOpTypeIdBuildHasher>;
444
445    #[doc(hidden)]
446    #[derive(Default)]
447    pub struct NoOpTypeIdBuildHasher;
448
449    impl BuildHasher for NoOpTypeIdBuildHasher {
450        type Hasher = NoOpTypeIdHasher;
451
452        fn build_hasher(&self) -> Self::Hasher {
453            NoOpTypeIdHasher(0)
454        }
455    }
456
457    #[doc(hidden)]
458    #[derive(Default)]
459    pub struct NoOpTypeIdHasher(u64);
460
461    impl Hasher for NoOpTypeIdHasher {
462        fn finish(&self) -> u64 {
463            self.0
464        }
465
466        fn write(&mut self, _bytes: &[u8]) {
467            unimplemented!()
468        }
469
470        fn write_u64(&mut self, i: u64) {
471            self.0 = i
472        }
473    }
474}
475
476#[cfg(test)]
477mod test {
478    #[test]
479    fn test_global() {
480        use crate::global;
481        use core::sync::atomic::{AtomicI32, AtomicI64, Ordering};
482
483        let a = global::<AtomicI32>();
484        let b = global::<AtomicI64>();
485        assert_eq!(a.load(Ordering::Relaxed), 0);
486        a.store(69, Ordering::Relaxed);
487        assert_eq!(a.load(Ordering::Relaxed), 69);
488        assert_eq!(b.load(Ordering::Relaxed), 0);
489        assert_eq!(*global::<i64>(), 0);
490
491        core::hint::black_box(global::<AtomicI64>());
492    }
493
494    #[test]
495    fn test_macro() {
496        use core::sync::atomic::{AtomicI32, Ordering};
497        #[allow(clippy::extra_unused_type_parameters)]
498        fn get_and_inc<T: 'static>() -> i32 {
499            generic_static!(
500                static BLUB: &AtomicI32 = &AtomicI32::new(1);
501            );
502            let value = BLUB.load(Ordering::Relaxed);
503            BLUB.fetch_add(1, Ordering::Relaxed);
504            value
505        }
506        assert_eq!(get_and_inc::<bool>(), 1);
507        assert_eq!(get_and_inc::<bool>(), 2);
508        assert_eq!(get_and_inc::<i32>(), 1);
509        assert_eq!(get_and_inc::<bool>(), 3);
510
511        generic_static!(
512            static FOO_1: &AtomicI32 = &AtomicI32::new(0);
513        );
514        generic_static!(
515            static FOO_2: &AtomicI32 = &AtomicI32::new(69);
516        );
517        assert_eq!(FOO_1.load(Ordering::Relaxed), 0);
518        assert_eq!(FOO_2.load(Ordering::Relaxed), 69);
519        FOO_1.store(1, Ordering::Relaxed);
520        FOO_2.store(2, Ordering::Relaxed);
521        assert_eq!(FOO_1.load(Ordering::Relaxed), 1);
522        assert_eq!(FOO_2.load(Ordering::Relaxed), 2);
523    }
524
525    #[cfg(any(
526        target_arch = "x86_64",
527        target_arch = "aarch64",
528        target_arch = "arm",
529        target_arch = "x86"
530    ))]
531    #[test]
532    fn test_macro_type_inference() {
533        generic_static! {
534            static _FOO = &();
535        }
536    }
537}