1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
//! `tearor` provides the [`TearCell`], a (barely) thread-safe lock-free cell
//! type providing tearing access to any type which is [`Pod`].
//!
//! Tearing access refers to when multiple smaller, separate read or write
//! operations are used to perform a larger unit of work. For example, if you
//! wrote to a &mut u32 by performing 4 writes, one to each byte, or vice-versa.
//!
//! TearCell uses the same idea, but with atomics. If your `T` is too large to
//! fit inside an atomic, then `TearCell` will split it over a few operations.
//!
//! Needless to say, this means calls to `TearCell::load`, `TearCell::store`,
//! (etc) are *not* atomic (nor do they provide *any* guarantees about
//! ordering), however every individual operation the TearCell performs *is*
//! atomic (with the weakest ordering we can get our hands on), which is enough
//! to avoid data races.
//!
//! It's essentially a tool for turning data races into data corruption. If the
//! lack of synchronization would cause a data race (e.g. with UnsafeCell), then
//! `TearCell` is very likely to corrupt your data.
//!
//! However, if this does not matter to you for one reason or another (examples:
//! your synchronization is performed externally, you want to perform an
//! optimistic read, all threads are writing the same value, or you miss the fun
//! you had debugging data corruption issues in C++), then `TearCell` might be
//! what you want.
//!
//! # This library might not be for you if...
//!
//! - You aren't sure if tearing is acceptable for your use case, or aren't sure
//!   you understand what it means. You should just use a `Mutex<T>` or
//!   `RwLock<T>` then. These are much harder to misuse, and can't corrupt your
//!   data.
//!
//! - You don't require `Sync` -- e.g. `Cell<T>` would be acceptable for your
//!   use case. In this case, use `Cell<T>`.
//!
//! - Your `T` is not `Send`. In this case, `TearCell<T>` will be neither `Send`
//!   nor `Sync`, which makes it pretty useless.
//!
//! - Your `T` is not [plain-old-data]([`Pod`]) -- e.g. it has padding bytes,
//!   isn't `Copy`, contains references, has initialized bit patterns which
//!   cause undefined behavior, etc. (Example: #[repr(Rust)] types, bools,
//!   enums, char, references...).
//!
//! - Your `T` (is POD and) fits inside one of the atomic types in
//!   `core::sync::atomic`. TearCell will literally be worse in every way than
//!   just performing `Relaxed` loads/stores to that type.
//!
//! - You need to compile for a target which doesn't support `AtomicU8` or
//!   `AtomicUsize`. Currently this is unsupported, but I'd accept a PR adding
//!   the `#[cfg]` needed.
//!
//! - `T` is fairly large, but only has a 1-byte alignment, or has an odd size.
//!   (For example, `[u8; 127]`). For these the performance will be suboptimal,
//!   but this may be improved in the future.

#![no_std]

#[cfg(test)]
extern crate alloc;

// Re-export relevant types from bytemuck
pub use bytemuck::{Pod, Zeroable};

use core::cell::UnsafeCell;
use core::mem::{align_of, size_of};
use core::sync::atomic::*;

/// `TearCell` is a minimally thread-safe cell type for anything that can meet the
/// requirements of `Pod`.
///
/// By "minimally thread-safe", I mean that it's completely possible for
/// `TearCell` to corrupt whatever you store in it, but that *strictly speaking*
/// all reads and writes are atomic, and thus you have no undefined behavior.
/// That said, writes and reads from the `TearCell` itself are *not* atomic.
///
/// It's intended for cases where `T` does not fit into an atomic type, you
/// don't want to use a Mutex, and you happen to know that *in practice* data
/// races aren't a concern. (For example, if synchronization is provided
/// externally but in a way you can't prove, or you just don't really care).
///
/// ## Implementation details
///
/// **Note: These are not part of TearCell's stability guarantee.**
///
/// `TearCell<T>` wraps an `UnsafeCell<T>` and implements reads and writes by
/// interpreting it as either a `&[AtomicU$N]`, where `$N` is the largest
/// implemented integer size available that meets `T`'s size and alignment
/// requirements.
///
/// For loads, we zero-init a target `T`, interpret it as a `&mut [u$N]`, and
/// perform relaxed loads from our atomics, and write the result into the
/// buffer.
///
/// For stores, we interpret the provided value as a `&[u$N]`, and perform a
/// relaxed store into our atomic buffer for each value we read from it.
#[repr(transparent)]
pub struct TearCell<T>(UnsafeCell<T>);

unsafe impl<T: Pod + Send> Sync for TearCell<T> {}
unsafe impl<T: Pod + Send> Send for TearCell<T> {}

impl<T> TearCell<T> {
    /// Create a new TearCell, wrapping `v`.
    ///
    /// This should probably only be done if `T: Pod`, as it's almost entirely
    /// useless without it. However, putting a `where T: Pod` bound currently
    /// would mean `TearCell::new` can no longer be a `const fn`, which makes it
    /// drastically less useful.
    #[inline]
    pub const fn new(v: T) -> Self {
        Self(UnsafeCell::new(v))
    }

    /// Unwraps the stored value.
    #[inline]
    pub fn into_inner(self) -> T {
        self.0.into_inner()
    }
}

#[inline]
fn tearcell_can_use_atom<T, U: Atom>() -> bool {
    (align_of::<T>() >= align_of::<U>())
        && (size_of::<T>() >= size_of::<U>())
        && (size_of::<T>() % size_of::<U>()) == 0
}

impl<T: Pod> TearCell<T> {
    /// Store `value` into the cell. No guarantees of ordering or atomicity is
    /// provided for this write, only that it will not cause undefined
    /// behavior.
    ///
    /// See also `store_ref` if `T` is large enough that you'd rather not copy
    /// it on the stack when you can avoid it.
    #[inline]
    pub fn store(&self, value: T) {
        self.store_ref(&value)
    }

    /// Read the value out of this cell. No guarantees of ordering or atomicity
    /// is provided for this write, only that it will not cause undefined
    /// behavior.
    ///
    /// Specifically, the `T` returned may not be one ever written. See docs for
    /// more info.
    #[inline]
    pub fn load(&self) -> T {
        if size_of::<T>() == 0 {
            T::zeroed()
        } else if tearcell_can_use_atom::<T, AtomicU64>() {
            self.do_load::<AtomicU64>()
        } else if tearcell_can_use_atom::<T, AtomicU32>() {
            self.do_load::<AtomicU32>()
        } else if tearcell_can_use_atom::<T, AtomicU16>() {
            self.do_load::<AtomicU16>()
        } else if tearcell_can_use_atom::<T, AtomicU8>() {
            self.do_load::<AtomicU8>()
        } else {
            unreachable!();
        }
    }

    /// Equivalent to `store` but takes the value by reference. No guarantees of
    /// ordering or atomicity is provided for this write, only that it will not
    /// cause undefined behavior.
    #[inline]
    pub fn store_ref(&self, value: &T) {
        if size_of::<T>() == 0 {
            return;
        } else if tearcell_can_use_atom::<T, AtomicU64>() {
            self.do_store::<AtomicU64>(value)
        } else if tearcell_can_use_atom::<T, AtomicU32>() {
            self.do_store::<AtomicU32>(value)
        } else if tearcell_can_use_atom::<T, AtomicU16>() {
            self.do_store::<AtomicU16>(value)
        } else if tearcell_can_use_atom::<T, AtomicU8>() {
            self.do_store::<AtomicU8>(value)
        } else {
            unreachable!();
        }
    }

    #[inline]
    fn atom_slice<A: Atom>(&self) -> &[A] {
        let size = size_of::<T>() / size_of::<A>();
        assert!(size != 0);
        assert!(size * size_of::<A>() == size_of::<T>());
        unsafe { core::slice::from_raw_parts(self.0.get() as *const A, size) }
    }

    #[inline]
    fn do_load<A: Atom>(&self) -> T {
        let mut result = T::zeroed();
        let src: &[A] = self.atom_slice();
        let dst: &mut [A::Prim] =
            bytemuck::try_cast_slice_mut(core::slice::from_mut(&mut result)).unwrap();
        assert_eq!(src.len(), dst.len());
        for (db, sb) in dst.iter_mut().zip(src.iter()) {
            *db = sb.get();
        }
        result
    }

    #[inline]
    fn do_store<A: Atom>(&self, v: &T) {
        let src: &[A::Prim] = bytemuck::try_cast_slice(core::slice::from_ref(v)).unwrap();
        let dst: &[A] = self.atom_slice();
        assert_eq!(src.len(), dst.len());
        for (d, s) in dst.iter().zip(src.iter()) {
            d.set(*s);
        }
    }
}

/// Used to impl the underlying tearing loads/stores.
unsafe trait Atom: Sync + Send + 'static + Sized {
    type Prim: Pod;
    fn get(&self) -> Self::Prim;
    fn set(&self, p: Self::Prim);
}
macro_rules! def_atom {
    ($Atom:ty, $prim:ty) => {
        unsafe impl Atom for $Atom {
            type Prim = $prim;
            #[inline]
            fn get(&self) -> Self::Prim {
                self.load(Ordering::Relaxed)
            }
            #[inline]
            fn set(&self, v: Self::Prim) {
                self.store(v, Ordering::Relaxed);
            }
        }
    };
}
def_atom!(AtomicU64, u64);
// def_atom!(AtomicUsize, usize);
def_atom!(AtomicU32, u32);
def_atom!(AtomicU16, u16);
def_atom!(AtomicU8, u8);

// Don't bother with u16/u32/etc -- we're only bothering with u8 because we
// can't statically prevent it.

#[cfg(test)]
mod test {
    use super::*;
    use core::convert::*;
    #[test]
    fn test_basic() {
        let v: TearCell<[u8; 0]> = TearCell::new([]);
        assert_eq!(v.load(), []);
        v.store([]);
        v.store_ref(&[]);

        let v: TearCell<[usize; 0]> = TearCell::new([]);
        assert_eq!(v.load(), []);
        v.store([]);
        v.store_ref(&[]);

        let v: TearCell<[u8; 0]> = TearCell::new([]);
        assert_eq!(v.load(), []);
        v.store([]);
        v.store_ref(&[]);

        let v: TearCell<[u8; 1]> = TearCell::new([0u8; 1]);
        assert_eq!(v.load(), [0]);
        v.store([1]);
        assert_eq!(v.load(), [1]);
        v.store_ref(&[2]);
        assert_eq!(v.load(), [2]);
    }
    macro_rules! test_arr {
        ($($n:expr),+) => {$(
            do_test_arr::<u8, [u8; $n]>();
            do_test_arr::<u16, [u16; $n]>();
            do_test_arr::<u32, [u32; $n]>();
            do_test_arr::<u64, [u64; $n]>();
            do_test_arr::<usize, [usize; $n]>();
            do_test_arr::<u128, [u128; $n]>();
        )+};
    }

    #[test]
    fn test_many() {
        test_arr![2, 3, 4, 5, 6, 7, 8, 9, 10];
        // slows down compile lots..
        //  11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25,
        //     26, 27, 28, 29, 30, 31, 32
    }

    #[test]
    fn test_overaligned() {
        #[derive(Copy, Clone, PartialEq, Default, Debug)]
        #[repr(C, align(16))]
        struct Overaligned([u8; 16]);

        unsafe impl bytemuck::Zeroable for Overaligned {}
        unsafe impl bytemuck::Pod for Overaligned {}

        impl AsRef<[u8]> for Overaligned {
            fn as_ref(&self) -> &[u8] {
                &self.0
            }
        }

        do_test::<u8, Overaligned>(16, move |s: &[u8]| Overaligned(s.try_into().unwrap()));
    }

    fn do_test_arr<T, Arr>()
    where
        T: Copy + From<u8> + Default + core::ops::Not<Output = T> + PartialEq + core::fmt::Debug,
        Arr: Pod + AsRef<[T]> + PartialEq + core::fmt::Debug,
        for<'a> Arr: TryFrom<&'a [T]>,
    {
        do_test(
            core::mem::size_of::<Arr>() / core::mem::size_of::<T>(),
            move |v: &[T]| -> Arr { v.try_into().ok().unwrap() },
        )
    }
    fn do_test<T, Arr>(size: usize, make: fn(&[T]) -> Arr)
    where
        T: Copy + From<u8> + Default + core::ops::Not<Output = T> + PartialEq + core::fmt::Debug,
        Arr: Pod + AsRef<[T]> + PartialEq + core::fmt::Debug,
    {
        let mut v0 = alloc::vec![Default::default(); size];
        let a0: Arr = make(&v0);
        let tc0: TearCell<Arr> = TearCell::new(a0);
        assert_eq!(tc0.load(), a0);
        for i in 0..size {
            v0[i] = ((i + 1) as u8).into();
        }

        let a0: Arr = make(&v0);
        let tc1 = TearCell::new(a0);
        assert_eq!(&v0[..], tc1.load().as_ref());
        tc0.store(a0);
        assert_eq!(&v0[..], tc0.load().as_ref());
        v0.reverse();

        let ar0: Arr = make(&v0);
        tc0.store_ref(&ar0);
        assert_eq!(&v0[..], tc0.load().as_ref());

        for i in 0..size {
            v0[i] = !v0[i]
        }
        let a0: Arr = make(&v0);
        tc0.store(a0);
        assert_eq!(&v0[..], tc0.load().as_ref());

        tc1.store_ref(&a0);
        assert_eq!(&v0[..], tc0.load().as_ref());
    }
}