Skip to main content

generic_static_cache/
lib.rs

1#![cfg_attr(docsrs, feature(doc_cfg))]
2#![allow(named_asm_labels)]
3#![cfg_attr(not(feature = "std"), no_std)]
4
5//! # generic_static_cache
6//!
7//! Quoting the [Rust Reference](https://doc.rust-lang.org/reference/items/static-items.html#statics--generics):
8//!
9//! > A static item defined in a generic scope (for example in a blanket or default implementation)
10//! > will result in exactly one static item being defined, as if the static definition was pulled
11//! > out of the current scope into the module. There will not be one item per monomorphization.
12//!
13//! One way to work around this is to use a `HashMap<TypeId,Data>`. This is a simple & usually the best solution.
14//! If lookup performance is important, you can skip hashing the `TypeId` for minor gains as it
15//! [already contains](https://github.com/rust-lang/rust/blob/eeff92ad32c2627876112ccfe812e19d38494087/library/core/src/any.rs#L645)
16//! a good-quality hash.
17//!
18//! This crate aims to further speed up the lookup by allocating the storage using inline
19//! assembly: Accessing a generic static provided by this crate is instant,
20//! whereas using a hashmap takes more than 10×instant.
21//!
22//! ## ⚠ Caveats ⚠
23//!
24//! This crate isn't as well-tested as is should be.
25//!
26//! Supported targets are **x86-64**, **aarch64**, **arm** and **x86**;
27//! on other targets, this crate falls back to a hashmap.
28//!
29//! ## no_std
30//! On supported platforms, `global` is always available.
31//!
32//! With the `alloc` feature, `generic_static!` and `non_zeroable_global` become available.
33//!
34//! With the `std` feature, everything also becomes available on unsupported platforms.
35
36pub use bytemuck;
37
38/// Access a zero-initialized global static instance of `T`.
39///
40/// For types that cannot be zero-initialized, use [`non_zeroable_global`].
41///
42/// # Example
43/// ```
44/// # use core::sync::atomic::{AtomicI32, Ordering::Relaxed};
45/// # use generic_static_cache::bytemuck;
46/// struct MyType(AtomicI32);
47/// unsafe impl bytemuck::Zeroable for MyType {}
48///
49/// assert_eq!(generic_static_cache::global::<MyType>().0.load(Relaxed), 0);
50/// generic_static_cache::global::<MyType>().0.store(1, Relaxed);
51/// assert_eq!(generic_static_cache::global::<MyType>().0.load(Relaxed), 1);
52/// assert_eq!(*generic_static_cache::global::<i32>(), 0);
53/// ```
54#[cfg(any(
55    feature = "std",
56    target_arch = "x86_64",
57    target_arch = "aarch64",
58    target_arch = "arm",
59    target_arch = "x86"
60))]
61#[inline(always)]
62pub fn global<T: bytemuck::Zeroable + Sync + 'static>() -> &'static T {
63    global_impl::<false, T>()
64}
65
66/// Like [`global`], but the symbol identifying the storage will not be exported.
67///
68/// This means different binaries linked together may not share the same instance
69/// of this global variable, but the binary may be smaller (if many such globals
70/// are exposed).
71///
72/// Globals defined with `global` and `local_global` are separate from each other.
73#[cfg(any(
74    feature = "std",
75    target_arch = "x86_64",
76    target_arch = "aarch64",
77    target_arch = "arm",
78    target_arch = "x86"
79))]
80#[inline(always)]
81pub fn local_global<T: bytemuck::Zeroable + Sync + 'static>() -> &'static T {
82    global_impl::<true, T>()
83}
84
85#[cfg(any(
86    feature = "std",
87    target_arch = "x86_64",
88    target_arch = "aarch64",
89    target_arch = "arm",
90    target_arch = "x86"
91))]
92#[inline(always)]
93fn global_impl<const LOCAL: bool, T: bytemuck::Zeroable + Sync + 'static>() -> &'static T {
94    assert!(core::mem::align_of::<T>() <= 4 * 1024);
95    #[cfg(any(
96        target_arch = "x86_64",
97        target_arch = "aarch64",
98        target_arch = "arm",
99        target_arch = "x86"
100    ))]
101    {
102        unsafe {
103            // Tested both with position-independent code and with -C relocation-model=static
104
105            // Reserve space in zeroed-initialized writable memory (bss).
106            // rustc may duplicate the content of this function,
107            // so using .ifndef to deduplicate them within the binary.
108            core::arch::asm!(
109                ".ifnotdef global_{local}_{id}",
110                ".if {local}",
111                ".local global_{local}_{id}",
112                ".endif",
113                ".comm global_{local}_{id}, {size}, {align}",
114                ".endif",
115                local = const if LOCAL {1} else {0},
116                id = sym global::<T>,
117                size = const core::mem::size_of::<T>(),
118                align = const core::mem::align_of::<T>(),
119                options(nomem)
120            );
121            // Now load the symbol's address
122            let addr: usize;
123            #[cfg(target_arch = "x86_64")]
124            {
125                core::arch::asm!(
126                    "lea {addr}, [rip+global_{local}_{id}]",
127                    addr = out(reg) addr,
128                    local = const if LOCAL {1} else {0},
129                    id = sym global::<T>,
130                    options(pure, nomem)
131                );
132            }
133            #[cfg(target_arch = "aarch64")]
134            {
135                core::arch::asm!(
136                    "adrp {addr}, global_{local}_{id}",
137                    "add {addr}, {addr}, :lo12:global_{local}_{id}",
138                    addr = out(reg) addr,
139                    local = const if LOCAL {1} else {0},
140                    id = sym global::<T>,
141                    options(pure, nomem)
142                );
143            }
144            #[cfg(target_arch = "arm")]
145            {
146                core::arch::asm!(
147                    "ldr {addr}, =global_{local}_{id}",
148                    addr = out(reg) addr,
149                    local = const if LOCAL {1} else {0},
150                    id = sym global::<T>,
151                    options(pure, nomem)
152                );
153            }
154            #[cfg(target_arch = "x86")]
155            {
156                core::arch::asm!(
157                    // AT&T syntax allows using multiple symbols in the memory operand
158                    "call 2f",
159                    "2: pop {addr}",
160                    "lea global_{local}_{id}-2b({addr}), {addr}",
161                    addr = out(reg) addr,
162                    local = const if LOCAL {1} else {0},
163                    id = sym global::<T>,
164                    options(pure, nomem, att_syntax)
165                );
166            }
167            &*(addr as *const _)
168        }
169    }
170    #[cfg(not(any(
171        target_arch = "x86_64",
172        target_arch = "aarch64",
173        target_arch = "arm",
174        target_arch = "x86"
175    )))]
176    {
177        pub(crate) struct SyncWrapper(*const u8);
178        unsafe impl Sync for SyncWrapper {}
179        unsafe impl Send for SyncWrapper {}
180        use core::any::TypeId;
181        static MAP: std::sync::RwLock<TypeIdMap<SyncWrapper>> =
182            std::sync::RwLock::new(TypeIdMap::with_hasher(NoOpTypeIdBuildHasher));
183        {
184            let guard = MAP.read().unwrap();
185            if let Some(value) = guard.get(&TypeId::of::<T>()) {
186                return unsafe { &*(value.0 as *const T) };
187            }
188        }
189        let mut guard = MAP.write().unwrap();
190        let value =
191            guard
192                .entry(TypeId::of::<T>())
193                .or_insert(SyncWrapper(
194                    alloc::boxed::Box::into_raw(alloc::boxed::Box::new(
195                        <T as bytemuck::Zeroable>::zeroed(),
196                    )) as *const u8,
197                ));
198        unsafe { &*(value.0 as *const T) }
199    }
200}
201
202/// Access a generic global that can contain non-zeroable types.
203///
204/// This doesn't interfere with data accessed with [`global`].
205///
206/// # Example
207/// ```
208/// # use generic_static_cache::non_zeroable_global;
209/// #[derive(PartialEq, Debug)]
210/// struct MyType;
211///
212/// assert_eq!(non_zeroable_global::get::<MyType>(), None);
213/// let _ = non_zeroable_global::init(MyType);
214/// assert_eq!(non_zeroable_global::get::<MyType>(), Some(&MyType));
215/// ```
216#[cfg(all(
217    feature = "alloc",
218    any(
219        feature = "std",
220        target_arch = "x86_64",
221        target_arch = "aarch64",
222        target_arch = "arm",
223        target_arch = "x86"
224    )
225))]
226pub mod non_zeroable_global {
227    extern crate alloc;
228
229    use super::*;
230
231    // TODO: Remove one layer of indirection on unsupported platforms
232
233    /// Wrapper to prevent interfering with the user's `direct` calls
234    struct Heap<T>(core::sync::atomic::AtomicPtr<T>);
235    unsafe impl<T> bytemuck::Zeroable for Heap<T> {}
236    unsafe impl<T: Sync> Sync for Heap<T> {}
237
238    #[derive(Debug)]
239    pub struct AlreadyInitialized;
240
241    /// Initialize the global initiable storage of type `Type`.
242    ///
243    /// If called multiple times, only the first call will succeed.
244    ///
245    /// [`global`]: crate::global
246    pub fn init<T: Sync + 'static>(data: T) -> Result<(), AlreadyInitialized> {
247        use core::sync::atomic::Ordering;
248        let boxed = alloc::boxed::Box::into_raw(alloc::boxed::Box::new(data));
249        match global::<Heap<T>>().0.compare_exchange(
250            core::ptr::null_mut(),
251            boxed,
252            Ordering::SeqCst,
253            Ordering::SeqCst,
254        ) {
255            Ok(_) => Ok(()),
256            Err(_) => {
257                unsafe {
258                    drop(alloc::boxed::Box::from_raw(boxed));
259                }
260                Err(AlreadyInitialized)
261            }
262        }
263    }
264
265    /// Access the global initiable storage of type `Type`.
266    ///
267    /// [`global`]: crate::global
268    pub fn get<T: Sync + 'static>() -> Option<&'static T> {
269        use core::sync::atomic::Ordering;
270        let data = global::<Heap<T>>().0.load(Ordering::SeqCst);
271        if data.is_null() {
272            None
273        } else {
274            Some(unsafe { &*data })
275        }
276    }
277
278    /// Initialize & access the global initiable storage of type `Type`.
279    ///
280    /// If this is called multiple times simultaneously,
281    /// the `cons` argument of multiple invocations may be called,
282    /// but only one result will be used.
283    ///
284    /// [`global`]: crate::global
285    pub fn get_or_init<T: Sync + 'static>(cons: impl Fn() -> T) -> &'static T {
286        use core::sync::atomic::Ordering;
287        let data = global::<Heap<T>>().0.load(Ordering::SeqCst);
288        if data.is_null() {
289            let _ = init::<_>(cons());
290            get::<_>().unwrap()
291        } else {
292            unsafe { &*data }
293        }
294    }
295}
296
297/// Declare a static variable that is not shared across different monomorphizations
298/// of the containing functions.
299///
300/// Its type must be a shared reference to a [`Sync`]`+'static` type,
301/// and the initializer expression must start with a `&`.
302/// Outer type variables may be used and the type hint is optional.
303///
304/// The initializing expression doesn't need to be `const`.
305/// If this is executed for the first time in multiple threads simultaneously,
306/// the initializing expression may get executed multiple times.
307///
308/// # Example
309/// ```
310/// # use core::sync::atomic::{AtomicU32, Ordering::Relaxed};
311/// # use generic_static_cache::generic_static;
312/// fn numeric_type_id<T>() -> u32 {
313///     static NEXT: AtomicU32 = AtomicU32::new(0);
314///     generic_static!{
315///         static ID: &u32 = &NEXT.fetch_add(1, Relaxed);
316///     }
317///     *ID
318/// }
319/// assert_eq!(numeric_type_id::<bool>(), 0);
320/// assert_eq!(numeric_type_id::<String>(), 1);
321/// assert_eq!(numeric_type_id::<i32>(), 2);
322/// assert_eq!(numeric_type_id::<bool>(), 0);
323/// ```
324#[cfg(all(
325    feature = "alloc",
326    any(
327        feature = "std",
328        target_arch = "x86_64",
329        target_arch = "aarch64",
330        target_arch = "arm",
331        target_arch = "x86"
332    )
333))]
334#[macro_export]
335macro_rules! generic_static {
336    {static $ident:ident $(: &$type:ty)? = &$init:expr;} => {
337        #[allow(non_snake_case)]
338        let $ident $(: &'static $type)? = {
339            #[cfg(any(
340                target_arch = "x86_64",
341                target_arch = "aarch64",
342                target_arch = "arm",
343                target_arch = "x86"
344            ))] {
345                extern crate alloc;
346
347                let init = ||$init;
348                fn assert_sync_static<T: Sync + 'static>(_: &impl FnOnce() -> T) {}
349                assert_sync_static(&init);
350
351                // Use empty closure to create a new type to use as a unique key,
352                // use reference to initializer to infer type of static data
353                fn make<Key: 'static, Value: Sync + 'static>(_: Key, _: &impl FnOnce()->Value)
354                -> &'static ::core::sync::atomic::AtomicPtr<Value> {
355                    struct Holder<T, D> {
356                        _marker: core::marker::PhantomData<T>,
357                        value: core::sync::atomic::AtomicPtr<D>
358                    }
359                    unsafe impl<T, D> $crate::bytemuck::Zeroable for Holder<T,D>{}
360                    unsafe impl<T, D> Sync for Holder<T,D>{}
361                    &$crate::global::<Holder<Key, Value>>().value
362                }
363                let ptr = make(||(), &init);
364
365                let data = ptr.load(::core::sync::atomic::Ordering::SeqCst);
366                if data.is_null() {
367                    // Need to call initializer
368                    // This can be called multiple times if executed for the first time
369                    // in multiple threads simultaneously!
370                    let boxed = alloc::boxed::Box::into_raw(alloc::boxed::Box::new(init()));
371                    if ptr
372                        .compare_exchange(
373                            ::core::ptr::null_mut(),
374                            boxed,
375                            ::core::sync::atomic::Ordering::SeqCst,
376                            ::core::sync::atomic::Ordering::SeqCst,
377                        )
378                        .is_err()
379                    {
380                        // Was simultaneously initialized by another thread
381                        unsafe {
382                            drop(alloc::boxed::Box::from_raw(boxed));
383                        }
384                    }
385                    unsafe { &*boxed }
386                } else {
387                    unsafe { &*data }
388                }
389            }
390            #[cfg(not(any(
391                target_arch = "x86_64",
392                target_arch = "aarch64",
393                target_arch = "arm",
394                target_arch = "x86"
395            )))] {
396                #[cfg(not(feature = "std"))]
397                compile_error!("Unsupported platform, enable feature \"std\" to enable fallback");
398
399                struct SyncWrapper(*const u8);
400                unsafe impl Sync for SyncWrapper {}
401                unsafe impl Send for SyncWrapper {}
402                fn id<T: 'static>(_: T) -> core::any::TypeId {
403                    core::any::TypeId::of::<T>()
404                }
405                // Each closure expression defines a closure type [expr.closure.intro].
406                // As closures can depend on generic types from a surrounding items,
407                // closures in different instantiations of outer generics must have different types
408                // and therefore must be different closure expressions.
409                // The types defined by closure expressions are unique [expr.closure.unique-type].
410                let id = id(||());
411                static MAP: ::std::sync::RwLock<$crate::TypeIdMap<SyncWrapper>> =
412                    ::std::sync::RwLock::new($crate::TypeIdMap::with_hasher(
413                        $crate::NoOpTypeIdBuildHasher,
414                    ));
415                {
416                    let guard = MAP.read().unwrap();
417                    if let Some(value) = guard.get(&id) {
418                        break 'block unsafe { &*(value.0 as *const _) };
419                    }
420                }
421                let mut guard = MAP.write().unwrap();
422                let value = guard
423                    .entry(id)
424                    .or_insert(SyncWrapper(
425                        alloc::boxed::Box::into_raw(alloc::boxed::Box::new($init)) as *const u8,
426                    ));
427                unsafe { &*(value.0 as *const _) }
428            }
429        };
430    };
431}
432
433#[cfg(feature = "std")]
434pub use with_std::*;
435
436#[cfg(feature = "std")]
437mod with_std {
438    use core::any::TypeId;
439    use core::hash::{BuildHasher, Hasher};
440    use std::collections::HashMap;
441
442    /// Fast map keyed by `TypeId`.
443    pub type TypeIdMap<T> = HashMap<TypeId, T, NoOpTypeIdBuildHasher>;
444
445    /// Hasher for [`TypeIdMap`].
446    #[derive(Default)]
447    pub struct NoOpTypeIdBuildHasher;
448
449    impl BuildHasher for NoOpTypeIdBuildHasher {
450        type Hasher = NoOpTypeIdHasher;
451
452        fn build_hasher(&self) -> Self::Hasher {
453            NoOpTypeIdHasher(0)
454        }
455    }
456
457    #[doc(hidden)]
458    #[derive(Default)]
459    pub struct NoOpTypeIdHasher(u64);
460
461    impl Hasher for NoOpTypeIdHasher {
462        fn finish(&self) -> u64 {
463            self.0
464        }
465
466        fn write(&mut self, bytes: &[u8]) {
467            // Slow, bad quality fallback to not break applications in case std implementation changes.
468            self.0 = bytes.iter().fold(self.0, |hash, b| {
469                hash.rotate_left(8).wrapping_add(*b as u64)
470            });
471        }
472
473        fn write_u64(&mut self, i: u64) {
474            self.0 = i
475        }
476    }
477}
478
479#[cfg(test)]
480mod test {
481    use std::{
482        any::TypeId,
483        hash::{Hash, Hasher},
484    };
485
486    #[test]
487    fn test_local_global() {
488        use crate::local_global;
489        use core::sync::atomic::{AtomicI32, AtomicI64, Ordering};
490
491        let a = local_global::<AtomicI32>();
492        let b = local_global::<AtomicI64>();
493        assert_eq!(a.load(Ordering::Relaxed), 0);
494        a.store(69, Ordering::Relaxed);
495        assert_eq!(a.load(Ordering::Relaxed), 69);
496        assert_eq!(b.load(Ordering::Relaxed), 0);
497        assert_eq!(*local_global::<i64>(), 0);
498
499        core::hint::black_box(local_global::<AtomicI64>());
500    }
501
502    #[test]
503    fn test_macro() {
504        use core::sync::atomic::{AtomicI32, Ordering};
505        #[allow(clippy::extra_unused_type_parameters)]
506        fn get_and_inc<T: 'static>() -> i32 {
507            generic_static!(
508                static BLUB: &AtomicI32 = &AtomicI32::new(1);
509            );
510            let value = BLUB.load(Ordering::Relaxed);
511            BLUB.fetch_add(1, Ordering::Relaxed);
512            value
513        }
514        assert_eq!(get_and_inc::<bool>(), 1);
515        assert_eq!(get_and_inc::<bool>(), 2);
516        assert_eq!(get_and_inc::<i32>(), 1);
517        assert_eq!(get_and_inc::<bool>(), 3);
518
519        generic_static!(
520            static FOO_1: &AtomicI32 = &AtomicI32::new(0);
521        );
522        generic_static!(
523            static FOO_2: &AtomicI32 = &AtomicI32::new(69);
524        );
525        assert_eq!(FOO_1.load(Ordering::Relaxed), 0);
526        assert_eq!(FOO_2.load(Ordering::Relaxed), 69);
527        FOO_1.store(1, Ordering::Relaxed);
528        FOO_2.store(2, Ordering::Relaxed);
529        assert_eq!(FOO_1.load(Ordering::Relaxed), 1);
530        assert_eq!(FOO_2.load(Ordering::Relaxed), 2);
531    }
532
533    #[test]
534    fn test_macro_types() {
535        fn generic<T: Sync + 'static>(t: T) {
536            generic_static! {
537                static _FOO = &t;
538            }
539        }
540        generic(0);
541        generic(true);
542    }
543
544    #[test]
545    fn type_id_hash() {
546        TypeId::of::<()>().hash(&mut {
547            struct H;
548            impl Hasher for H {
549                fn finish(&self) -> u64 {
550                    0
551                }
552
553                fn write(&mut self, _: &[u8]) {
554                    unimplemented!()
555                }
556
557                fn write_u64(&mut self, _: u64) {}
558            }
559            H
560        });
561    }
562}