atomic_memcpy/lib.rs
1// SPDX-License-Identifier: Apache-2.0 OR MIT
2
3/*!
4<!-- Note: Document from sync-markdown-to-rustdoc:start through sync-markdown-to-rustdoc:end
5 is synchronized from README.md. Any changes to that range are not preserved. -->
6<!-- tidy:sync-markdown-to-rustdoc:start -->
7
8Byte-wise atomic memcpy.
9
10This is an attempt to implement equivalent of C++ ["P1478: Byte-wise atomic memcpy"][p1478] in Rust.
11
12This is expected to allow algorithms such as Seqlock and Chase-Lev deque to be implemented without UB of data races.
13See [P1478][p1478] for more.
14
15## Status
16
17- If the alignment of the type being copied is the same as the pointer width, `atomic_load` is possible to produce an assembly roughly equivalent to the case of using volatile read + atomic fence on many platforms. (e.g., [aarch64](https://github.com/taiki-e/atomic-memcpy/blob/ce18c00c073e682a6d439ef1fa9ac75f3d6148a0/tests/asm-test/asm/aarch64.asm#L495-L525), [riscv64](https://github.com/taiki-e/atomic-memcpy/blob/ce18c00c073e682a6d439ef1fa9ac75f3d6148a0/tests/asm-test/asm/riscv64gc.asm#L607-L645). See [`tests/asm-test/asm`][asm-test] directory for more).
18- If the alignment of the type being copied is smaller than the pointer width, there will be some performance degradation. However, it is implemented in such a way that it does not cause extreme performance degradation at least on x86_64. (See [the implementation comments of `atomic_load`][implementation] for more.) It is possible that there is still room for improvement, especially on non-x86_64 platforms.
19- Optimization for the case where the alignment of the type being copied is larger than the pointer width has not yet been fully investigated. It is possible that there is still room for improvement.
20- If the type being copied contains pointers it is not compatible with strict provenance because the copy does ptr-to-int transmutes.
21- If the type being copied contains uninitialized bytes (e.g., padding) [it is undefined behavior because the copy goes through integers][undefined-behavior]. This problem will probably not be resolved until something like `AtomicMaybeUninit` is supported.
22
23## Related Projects
24
25- [portable-atomic]: Portable atomic types including support for 128-bit atomics, atomic float, etc. Using byte-wise atomic memcpy to implement Seqlock, which is used in the fallback implementation.
26- [atomic-maybe-uninit]: Atomic operations on potentially uninitialized integers.
27- [asmtest]: A library for tracking generated assemblies.
28
29[asm-test]: https://github.com/taiki-e/atomic-memcpy/tree/HEAD/tests/asm-test/asm
30[asmtest]: https://github.com/taiki-e/asmtest
31[atomic-maybe-uninit]: https://github.com/taiki-e/atomic-maybe-uninit
32[implementation]: https://github.com/taiki-e/atomic-memcpy/blob/v0.2.0/src/lib.rs#L367-L427
33[p1478]: https://www.open-std.org/jtc1/sc22/wg21/docs/papers/2022/p1478r8.html
34[portable-atomic]: https://github.com/taiki-e/portable-atomic
35[undefined-behavior]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html
36
37<!-- tidy:sync-markdown-to-rustdoc:end -->
38*/
39
40#![no_std]
41#![doc(test(
42 no_crate_inject,
43 attr(allow(
44 dead_code,
45 unused_variables,
46 clippy::undocumented_unsafe_blocks,
47 clippy::unused_trait_names,
48 ))
49))]
50#![cfg_attr(test, warn(unsafe_op_in_unsafe_fn))] // unsafe_op_in_unsafe_fn requires Rust 1.52
51#![cfg_attr(not(test), allow(unused_unsafe))]
52#![warn(
53 // Lints that may help when writing public library.
54 missing_debug_implementations,
55 missing_docs,
56 clippy::alloc_instead_of_core,
57 clippy::exhaustive_enums,
58 clippy::exhaustive_structs,
59 clippy::impl_trait_in_params,
60 clippy::std_instead_of_alloc,
61 clippy::std_instead_of_core,
62 clippy::missing_inline_in_public_items,
63)]
64#![allow(clippy::doc_link_with_quotes, clippy::inline_always)]
65
66// This crate should work on targets with power-of-two pointer widths,
67// but it is not clear how it will work on targets without them.
68// There are currently no 128-bit or higher builtin targets.
69// Note that Rust (and C99) pointers must be at least 16-bit (i.e., 8-bit targets are impossible): https://github.com/rust-lang/rust/pull/49305
70#[cfg(not(any(
71 target_pointer_width = "16",
72 target_pointer_width = "32",
73 target_pointer_width = "64",
74)))]
75compile_error!(
76 "atomic-memcpy currently only supports targets with {16,32,64}-bit pointer width; \
77 if you need support for others, \
78 please submit an issue at <https://github.com/taiki-e/atomic-memcpy>"
79);
80
81#[cfg(not(target_os = "none"))]
82use core::sync::atomic;
83use core::sync::atomic::Ordering;
84
85#[cfg(target_os = "none")]
86use portable_atomic as atomic;
87
88/// Byte-wise atomic load.
89///
90/// # Safety
91///
92/// Behavior is undefined if any of the following conditions are violated:
93///
94/// - `src` must be valid for reads.
95/// - `src` must be properly aligned.
96/// - `src` must go through [`UnsafeCell::get`](core::cell::UnsafeCell::get).
97/// - `T` must not contain uninitialized bytes.
98/// - There are no concurrent non-atomic write operations.
99/// - There are no concurrent atomic write operations of different
100/// granularity. The granularity of atomic operations is an implementation
101/// detail, so the concurrent write operation that can always
102/// safely be used is only [`atomic_store`].
103///
104/// Like [`ptr::read`](core::ptr::read), `atomic_load` creates a bitwise copy of `T`, regardless of
105/// whether `T` is [`Copy`]. If `T` is not [`Copy`], using both the returned
106/// value and the value at `*src` can [violate memory safety][read-ownership].
107///
108/// Note that even if `T` has size `0`, the pointer must be non-null.
109///
110/// ## Returned value
111///
112/// This function returns [`MaybeUninit<T>`](core::mem::MaybeUninit) instead of `T`.
113///
114/// - All bits in the returned value are guaranteed to be copied from `src`.
115/// - There is *no* guarantee that all bits in the return have been copied at
116/// the same time, so if `src` is updated by a concurrent write operation,
117/// it is up to the caller to make sure that the returned value is valid as `T`.
118///
119/// [read-ownership]: core::ptr::read#ownership-of-the-returned-value
120/// [valid]: core::ptr#safety
121///
122/// # Panics
123///
124/// Panics if `order` is [`Release`](Ordering::Release) or [`AcqRel`](Ordering::AcqRel).
125///
126/// # Examples
127///
128/// ```
129/// use std::{cell::UnsafeCell, sync::atomic::Ordering};
130///
131/// let v = UnsafeCell::new([0_u8; 64]);
132/// let result = unsafe { atomic_memcpy::atomic_load(v.get(), Ordering::Acquire) };
133/// // SAFETY: there was no concurrent write operations during load.
134/// assert_eq!(unsafe { result.assume_init() }, [0; 64]);
135/// ```
136#[cfg_attr(feature = "inline-always", inline(always))]
137#[cfg_attr(not(feature = "inline-always"), inline)]
138pub unsafe fn atomic_load<T>(src: *const T, order: Ordering) -> core::mem::MaybeUninit<T> {
139 assert_load_ordering(order);
140 // SAFETY: the caller must uphold the safety contract for `atomic_load`.
141 let val = unsafe { imp::atomic_load(src) };
142 match order {
143 Ordering::Relaxed => { /* no-op */ }
144 _ => atomic::fence(order),
145 }
146 val
147}
148
149/// Byte-wise atomic store.
150///
151/// # Safety
152///
153/// Behavior is undefined if any of the following conditions are violated:
154///
155/// - `dst` must be [valid] for writes.
156/// - `dst` must be properly aligned.
157/// - `dst` must go through [`UnsafeCell::get`](core::cell::UnsafeCell::get).
158/// - `T` must not contain uninitialized bytes.
159/// - There are no concurrent non-atomic operations.
160/// - There are no concurrent atomic operations of different
161/// granularity. The granularity of atomic operations is an implementation
162/// detail, so the concurrent operation that can always
163/// safely be used is only [`atomic_load`].
164///
165/// If there are concurrent write operations, the resulting value at `*dst` may
166/// contain a mixture of bytes written by this thread and bytes written by
167/// another thread. If `T` is not valid for all bit patterns, using the value at
168/// `*dst` can violate memory safety.
169///
170/// Note that even if `T` has size `0`, the pointer must be non-null.
171///
172/// [valid]: core::ptr#safety
173///
174/// # Panics
175///
176/// Panics if `order` is [`Acquire`](Ordering::Acquire) or [`AcqRel`](Ordering::AcqRel).
177///
178/// # Examples
179///
180/// ```
181/// use std::{cell::UnsafeCell, sync::atomic::Ordering};
182///
183/// let v = UnsafeCell::new([0_u8; 64]);
184/// unsafe {
185/// atomic_memcpy::atomic_store(v.get(), [1; 64], Ordering::Release);
186/// }
187/// let result = unsafe { atomic_memcpy::atomic_load(v.get(), Ordering::Acquire) };
188/// // SAFETY: there was no concurrent write operations during load.
189/// assert_eq!(unsafe { result.assume_init() }, [1; 64]);
190/// ```
191#[cfg_attr(feature = "inline-always", inline(always))]
192#[cfg_attr(not(feature = "inline-always"), inline)]
193pub unsafe fn atomic_store<T>(dst: *mut T, val: T, order: Ordering) {
194 assert_store_ordering(order);
195 match order {
196 Ordering::Relaxed => { /* no-op */ }
197 _ => atomic::fence(order),
198 }
199 // SAFETY: the caller must uphold the safety contract for `atomic_store`.
200 unsafe {
201 imp::atomic_store(dst, val);
202 }
203}
204
205// https://github.com/rust-lang/rust/blob/1.84.0/library/core/src/sync/atomic.rs#L3338
206#[cfg_attr(feature = "inline-always", inline(always))]
207#[cfg_attr(not(feature = "inline-always"), inline)]
208fn assert_load_ordering(order: Ordering) {
209 match order {
210 Ordering::Acquire | Ordering::Relaxed | Ordering::SeqCst => {}
211 Ordering::Release => panic!("there is no such thing as a release load"),
212 Ordering::AcqRel => panic!("there is no such thing as an acquire-release load"),
213 _ => unreachable!(),
214 }
215}
216// https://github.com/rust-lang/rust/blob/1.84.0/library/core/src/sync/atomic.rs#L3323
217#[cfg_attr(feature = "inline-always", inline(always))]
218#[cfg_attr(not(feature = "inline-always"), inline)]
219fn assert_store_ordering(order: Ordering) {
220 match order {
221 Ordering::Release | Ordering::Relaxed | Ordering::SeqCst => {}
222 Ordering::Acquire => panic!("there is no such thing as an acquire store"),
223 Ordering::AcqRel => panic!("there is no such thing as an acquire-release store"),
224 _ => unreachable!(),
225 }
226}
227
228mod imp {
229 use core::{
230 mem::{self, ManuallyDrop, MaybeUninit},
231 ops::Range,
232 };
233
234 #[cfg(not(target_pointer_width = "16"))]
235 use crate::atomic::AtomicU32;
236 use crate::atomic::{AtomicU16, AtomicUsize, Ordering};
237
238 // Boundary to make the fields of LoadState private.
239 //
240 // Note that this is not a complete safe/unsafe boundary[1], since it is still
241 // possible to pass an invalid pointer to the constructor.
242 //
243 // [1]: https://www.ralfj.de/blog/2016/01/09/the-scope-of-unsafe.html
244 mod load {
245 use core::mem;
246
247 use crate::atomic::{AtomicU8, AtomicUsize, Ordering};
248
249 // Invariant: `src` and `result` will never change.
250 // Invariant: Only the `advance` method can advance offset and counter.
251 pub(super) struct LoadState {
252 src: *const u8,
253 // Note: This is a pointer from MaybeUninit.
254 result: *mut u8,
255 /// Counter to track remaining bytes in `T`.
256 remaining: usize,
257 offset: usize,
258 }
259
260 impl LoadState {
261 #[cfg_attr(feature = "inline-always", inline(always))]
262 #[cfg_attr(not(feature = "inline-always"), inline)]
263 pub(super) fn new<T>(result: *mut T, src: *const T) -> Self {
264 Self {
265 src: src as *const u8,
266 result: result as *mut u8,
267 remaining: mem::size_of::<T>(),
268 offset: 0,
269 }
270 }
271
272 /// Advances pointers by `size` **bytes**.
273 ///
274 /// # Safety
275 ///
276 /// - The remaining bytes must be greater than or equal to `size`.
277 /// - The range of `self.dst..self.dst.add(size)` must be filled.
278 #[cfg_attr(feature = "inline-always", inline(always))]
279 #[cfg_attr(not(feature = "inline-always"), inline)]
280 unsafe fn advance(&mut self, size: usize) {
281 debug_assert!(self.remaining >= size);
282 self.remaining -= size;
283 self.offset += size;
284 }
285
286 #[cfg_attr(feature = "inline-always", inline(always))]
287 #[cfg_attr(not(feature = "inline-always"), inline)]
288 pub(super) fn remaining(&self) -> usize {
289 self.remaining
290 }
291
292 #[cfg_attr(feature = "inline-always", inline(always))]
293 #[cfg_attr(not(feature = "inline-always"), inline)]
294 unsafe fn src<T>(&self) -> &T {
295 // SAFETY: the caller must uphold the safety contract.
296 unsafe { &*(self.src.add(self.offset) as *const T) }
297 }
298
299 #[cfg_attr(feature = "inline-always", inline(always))]
300 #[cfg_attr(not(feature = "inline-always"), inline)]
301 unsafe fn result<T>(&self) -> *mut T {
302 // SAFETY: the caller must uphold the safety contract.
303 unsafe { self.result.add(self.offset) as *mut T }
304 }
305
306 #[cfg_attr(feature = "inline-always", inline(always))]
307 #[cfg_attr(not(feature = "inline-always"), inline)]
308 pub(super) fn atomic_load_u8(&mut self, count: usize) {
309 // This condition is also checked by the caller, so the compiler
310 // will remove this assertion by optimization.
311 assert!(self.remaining() >= count);
312 for _ in 0..count {
313 // SAFETY:
314 // - we've checked that the remaining bytes is greater than or equal to `count`
315 // Therefore, due to `LoadState`'s invariant:
316 // - `src` is valid to atomic read of `count` of u8.
317 // - `result` is valid to write of `count` of u8.
318 unsafe {
319 let val = self.src::<AtomicU8>().load(Ordering::Relaxed);
320 self.result::<u8>().write(val);
321 // SAFETY: we've filled 1 byte.
322 self.advance(1);
323 }
324 }
325 }
326
327 /// Note: The remaining bytes smaller than usize are ignored.
328 ///
329 /// # Safety
330 ///
331 /// - `self.src` must be properly aligned for `usize`.
332 ///
333 /// There is no alignment requirement for `self.result`.
334 #[cfg_attr(feature = "inline-always", inline(always))]
335 #[cfg_attr(not(feature = "inline-always"), inline)]
336 pub(super) unsafe fn atomic_load_usize_to_end(&mut self) {
337 while self.remaining() >= mem::size_of::<usize>() {
338 // SAFETY:
339 // - the caller must guarantee that `src` is properly aligned for `usize`.
340 // - we've checked that the remaining bytes is greater than
341 // or equal to `size_of::<usize>()`.
342 // Therefore, due to `LoadState`'s invariant:
343 // - `src` is valid to atomic read of `usize`.
344 // - `result` is valid to *unaligned* write of `usize`.
345 unsafe {
346 let val = self.src::<AtomicUsize>().load(Ordering::Relaxed);
347 self.result::<usize>().write_unaligned(val);
348 // SAFETY: we've filled `size_of::<usize>()` bytes.
349 self.advance(mem::size_of::<usize>());
350 }
351 }
352 }
353 }
354 }
355
356 /// Byte-wise atomic load.
357 ///
358 /// # Safety
359 ///
360 /// See the documentation of [crate root's `atomic_load`](crate::atomic_load) for safety requirements.
361 /**
362 # Implementation
363
364 It is implemented based on the assumption that atomic operations at a
365 granularity greater than bytes is not a problem, as stated by [p1478].
366
367 > Note that on standard hardware, it should be OK to actually perform the
368 > copy at larger than byte granularity. Copying multiple bytes as part of
369 > one operation is indistinguishable from running them so quickly that the
370 > intermediate state is not observed. In fact, we expect that existing
371 > assembly memcpy implementations will suffice when suffixed with the required fence.
372
373 And it turns out that the granularity of the atomic operations is very important for performance.
374
375 - Loading/storing all bytes in bytes is very slow at least on x86/x86_64.
376 - The pointer width atomic operation is the fastest at least on x86/x86_64.
377 - Atomic operations with a granularity larger than the pointer width are slow
378 at least on x86/x86_64 (cmpxchg8b/cmpxchg16b).
379
380 Note that the following additional safety requirements.
381
382 - The granularity of the atomic operations in load and store must be the same.
383 - When performing an atomic operation as a type with alignment greater than 1,
384 the pointer must be properly aligned.
385
386 The caller of `atomic_load` guarantees that the `src` is properly aligned.
387 So, we can avoid calling `align_offset` or read at a granularity greater
388 than u8 in some cases.
389
390 The following is what this implementation is currently `atomic_load` using
391 (Note: `atomic_store` also uses exactly the same way to determine the
392 granularity of atomic operations):
393
394 Branch | Granularity of atomic operations | Conditions
395 ------ | -------------------------------- | ----------
396 1 | u8 ..., usize ..., u8 ... | `size_of::<T>() >= size_of::<usize>() * 4`, `align_of::<T>() < align_of::<AtomicUsize>()`
397 2 | usize ... | `align_of::<T>() >= align_of::<AtomicUsize>()`
398 3 | u32 ... | `align_of::<T>() >= align_of::<AtomicU32>()`, 64-bit or higher
399 4 | u16 ... | `align_of::<T>() >= align_of::<AtomicU16>()`, 32-bit or higher
400 5 | u8 ... |
401
402 - Branch 1: If the alignment of `T` is less than usize, but `T` can be read
403 as at least a few numbers of usize, compute the align offset and read it
404 like `(&[AtomicU8], &[AtomicUsize], &[AtomicU8])`.
405 - Branch 2: If the alignment of `T` is greater than or equal to usize, we
406 can read it as a chunk of usize from the first byte.
407 - Branch 3, 4: If the alignment of `T` is greater than 1, we can read it as
408 a chunk of smaller integers (u32 or u16). This is basically the same
409 strategy as Branch 2.
410 - Branch 5: Otherwise, we read it per byte.
411
412 Note that only Branch 1 requires to compute align offset dynamically.
413 Note that which branch is chosen is evaluated at compile time.
414
415 - The fastest is Branch 2, which can read all bytes as a chunk of usize.
416 - If the size of `T` is not too small, Branch 1 is the next fastest to Branch 2.
417 - If the size of `T` is small, Branch 3/4/5 can be faster than Branch 1.
418
419 Whether to choose Branch 1 or Branch 3/4/5 when `T` is small is currently
420 based on a rough heuristic based on simple benchmarks on x86_64.
421
422 [p1478]: https://www.open-std.org/jtc1/sc22/wg21/docs/papers/2022/p1478r8.html
423 */
424 #[cfg_attr(feature = "inline-always", inline(always))]
425 #[cfg_attr(not(feature = "inline-always"), inline)]
426 pub(crate) unsafe fn atomic_load<T>(src: *const T) -> MaybeUninit<T> {
427 // Safety requirements guaranteed by the caller:
428 // - `src` is valid for atomic reads.
429 // - `src` is properly aligned for `T`.
430 // - `src` go through `UnsafeCell::get`.
431 // - `T` does not contain uninitialized bytes.
432 // - there are no concurrent non-atomic write operations.
433 // - there are no concurrent atomic write operations of different granularity.
434 // Note that the safety of the code in this function relies on these guarantees,
435 // whether or not they are explicitly mentioned in the each safety comment.
436 debug_assert!(!src.is_null());
437 debug_assert!(src as usize % mem::align_of::<T>() == 0);
438
439 let mut result = MaybeUninit::<T>::uninit();
440
441 if mem::size_of::<T>() == 0 {
442 return result;
443 }
444
445 // Branch 1: If the alignment of `T` is less than usize, but `T` can be read as
446 // at least one or more usize, compute the align offset and read it
447 // like `(&[AtomicU8], &[AtomicUsize], &[AtomicU8])`.
448 if mem::align_of::<T>() < mem::align_of::<AtomicUsize>()
449 && mem::size_of::<T>() >= mem::size_of::<usize>() * 4
450 {
451 let mut state = load::LoadState::new(result.as_mut_ptr(), src);
452 let offset = (src as *const u8).align_offset(mem::align_of::<AtomicUsize>());
453 // Note: align_offset may returns usize::MAX: https://github.com/rust-lang/rust/issues/62420
454 if state.remaining() >= offset {
455 // Load `offset` bytes per byte to align `state.src`.
456 state.atomic_load_u8(offset);
457 debug_assert!(state.remaining() >= mem::size_of::<usize>());
458 // SAFETY:
459 // - align_offset succeeds and the `offset` bytes have been
460 // filled, so now `state.src` is definitely aligned.
461 // - we've checked that the remaining bytes is greater than
462 // or equal to `size_of::<usize>()`.
463 //
464 // In this branch, the pointer to `state.result` is usually
465 // not properly aligned, so we use `atomic_load_usize_to_end`,
466 // which has no requirement for alignment of `state.result`.
467 unsafe { state.atomic_load_usize_to_end() }
468 // Load remaining bytes per byte.
469 state.atomic_load_u8(state.remaining());
470 debug_assert_eq!(state.remaining(), 0);
471 return result;
472 }
473 }
474
475 // Branch 2: If the alignment of `T` is greater than or equal to usize,
476 // we can read it as a chunk of usize from the first byte.
477 if mem::align_of::<T>() >= mem::align_of::<AtomicUsize>() {
478 let src = src as *const AtomicUsize;
479 let dst = result.as_mut_ptr() as *mut usize;
480 for i in range(0..mem::size_of::<T>() / mem::size_of::<usize>()) {
481 // SAFETY:
482 // - the caller must guarantee that `src` is properly aligned for `T`.
483 // - `T` has an alignment greater than or equal to usize.
484 // - the remaining bytes is greater than or equal to `size_of::<usize>()`.
485 unsafe {
486 let val: usize = (*src.add(i)).load(Ordering::Relaxed);
487 dst.add(i).write(val);
488 }
489 }
490 return result;
491 }
492
493 #[cfg(not(target_pointer_width = "16"))]
494 {
495 // Branch 3: If the alignment of `T` is greater than or equal to u32,
496 // we can read it as a chunk of u32 from the first byte.
497 if mem::size_of::<usize>() > 4 && mem::align_of::<T>() >= mem::align_of::<AtomicU32>() {
498 let src = src as *const AtomicU32;
499 let dst = result.as_mut_ptr() as *mut u32;
500 for i in range(0..mem::size_of::<T>() / mem::size_of::<u32>()) {
501 // SAFETY:
502 // - the caller must guarantee that `src` is properly aligned for `T`.
503 // - `T` has an alignment greater than or equal to u32.
504 // - the remaining bytes is greater than or equal to `size_of::<u32>()`.
505 unsafe {
506 let val: u32 = (*src.add(i)).load(Ordering::Relaxed);
507 dst.add(i).write(val);
508 }
509 }
510 return result;
511 }
512 }
513
514 // Branch 4: If the alignment of `T` is greater than or equal to u16,
515 // we can read it as a chunk of u16 from the first byte.
516 if mem::size_of::<usize>() > 2 && mem::align_of::<T>() >= mem::align_of::<AtomicU16>() {
517 let src = src as *const AtomicU16;
518 let dst = result.as_mut_ptr() as *mut u16;
519 for i in range(0..mem::size_of::<T>() / mem::size_of::<u16>()) {
520 // SAFETY:
521 // - the caller must guarantee that `src` is properly aligned for `T`.
522 // - `T` has an alignment greater than or equal to u16.
523 // - the remaining bytes is greater than or equal to `size_of::<u16>()`.
524 unsafe {
525 let val: u16 = (*src.add(i)).load(Ordering::Relaxed);
526 dst.add(i).write(val);
527 }
528 }
529 return result;
530 }
531
532 // Branch 5: Otherwise, we read it per byte.
533 let mut state = load::LoadState::new(result.as_mut_ptr(), src);
534 state.atomic_load_u8(state.remaining());
535 debug_assert_eq!(state.remaining(), 0);
536 result
537 }
538
539 // Boundary to make the fields of StoreState private.
540 //
541 // Note that this is not a complete safe/unsafe boundary, since it is still
542 // possible to pass an invalid pointer to the constructor.
543 mod store {
544 use core::mem;
545
546 use crate::atomic::{AtomicU8, AtomicUsize, Ordering};
547
548 // Invariant: `src` and `dst` will never change.
549 // Invariant: Only the `advance` method can advance offset and counter.
550 pub(super) struct StoreState {
551 src: *const u8,
552 dst: *const u8,
553 /// Number of remaining bytes in `T`.
554 remaining: usize,
555 offset: usize,
556 }
557
558 impl StoreState {
559 #[cfg_attr(feature = "inline-always", inline(always))]
560 #[cfg_attr(not(feature = "inline-always"), inline)]
561 pub(super) fn new<T>(dst: *mut T, src: *const T) -> Self {
562 Self {
563 src: src as *const u8,
564 dst: dst as *mut u8 as *const u8,
565 remaining: mem::size_of::<T>(),
566 offset: 0,
567 }
568 }
569
570 /// Advances pointers by `size` **bytes**.
571 ///
572 /// # Safety
573 ///
574 /// - The remaining bytes must be greater than or equal to `size`.
575 /// - The range of `self.dst..self.dst.add(size)` must be filled.
576 #[cfg_attr(feature = "inline-always", inline(always))]
577 #[cfg_attr(not(feature = "inline-always"), inline)]
578 unsafe fn advance(&mut self, size: usize) {
579 debug_assert!(self.remaining >= size);
580 self.remaining -= size;
581 self.offset += size;
582 }
583
584 #[cfg_attr(feature = "inline-always", inline(always))]
585 #[cfg_attr(not(feature = "inline-always"), inline)]
586 pub(super) fn remaining(&self) -> usize {
587 self.remaining
588 }
589
590 #[cfg_attr(feature = "inline-always", inline(always))]
591 #[cfg_attr(not(feature = "inline-always"), inline)]
592 unsafe fn src<T>(&self) -> *const T {
593 // SAFETY: the caller must uphold the safety contract.
594 unsafe { self.src.add(self.offset) as *const T }
595 }
596
597 #[cfg_attr(feature = "inline-always", inline(always))]
598 #[cfg_attr(not(feature = "inline-always"), inline)]
599 unsafe fn dst<T>(&self) -> &T {
600 // SAFETY: the caller must uphold the safety contract.
601 unsafe { &*(self.dst.add(self.offset) as *const T) }
602 }
603
604 #[cfg_attr(feature = "inline-always", inline(always))]
605 #[cfg_attr(not(feature = "inline-always"), inline)]
606 pub(super) fn atomic_store_u8(&mut self, count: usize) {
607 // This condition is also checked by the caller, so the compiler
608 // will remove this assertion by optimization.
609 assert!(self.remaining() >= count);
610 for _ in 0..count {
611 // SAFETY:
612 // - we've checked that the remaining bytes is greater than or equal to `count`
613 // Therefore, due to `StoreState`'s invariant:
614 // - `src` is valid to read of `count` of u8.
615 // - `dst` is valid to atomic write of `count` of u8.
616 unsafe {
617 let val = self.src::<u8>().read();
618 self.dst::<AtomicU8>().store(val, Ordering::Relaxed);
619 // SAFETY: we've filled 1 byte.
620 self.advance(1);
621 }
622 }
623 }
624
625 /// Note: The remaining bytes smaller than usize are ignored.
626 ///
627 /// # Safety
628 ///
629 /// - `self.dst` must be properly aligned for `usize`.
630 ///
631 /// There is no alignment requirement for `self.src`.
632 #[cfg_attr(feature = "inline-always", inline(always))]
633 #[cfg_attr(not(feature = "inline-always"), inline)]
634 pub(super) unsafe fn atomic_store_usize_to_end(&mut self) {
635 while self.remaining() >= mem::size_of::<usize>() {
636 // SAFETY:
637 // - the caller must guarantee that `dst` is properly aligned for `usize`.
638 // - we've checked that the remaining bytes is greater than
639 // or equal to `size_of::<usize>()`.
640 // Therefore, due to `StoreState`'s invariant:
641 // - `src` is valid to *unaligned* read of `usize`.
642 // - `dst` is valid to atomic write of `usize`.
643 unsafe {
644 let val = self.src::<usize>().read_unaligned();
645 self.dst::<AtomicUsize>().store(val, Ordering::Relaxed);
646 // SAFETY: we've filled `size_of::<usize>()` bytes.
647 self.advance(mem::size_of::<usize>());
648 }
649 }
650 }
651 }
652 }
653
654 /// Byte-wise atomic store.
655 ///
656 /// See the [`atomic_load`] function for the detailed implementation comment.
657 ///
658 /// # Safety
659 ///
660 /// See the documentation of [crate root's `atomic_store`](crate::atomic_store) for safety requirements.
661 #[cfg_attr(feature = "inline-always", inline(always))]
662 #[cfg_attr(not(feature = "inline-always"), inline)]
663 pub(crate) unsafe fn atomic_store<T>(dst: *mut T, val: T) {
664 // Safety requirements guaranteed by the caller:
665 // - `dst` is valid for atomic writes.
666 // - `dst` is properly aligned for `T`.
667 // - `dst` go through `UnsafeCell::get`.
668 // - `T` does not contain uninitialized bytes.
669 // - there are no concurrent non-atomic operations.
670 // - there are no concurrent atomic operations of different granularity.
671 // - if there are concurrent atomic write operations, `T` is valid for all bit patterns.
672 // Note that the safety of the code in this function relies on these guarantees,
673 // whether or not they are explicitly mentioned in the each safety comment.
674 debug_assert!(!dst.is_null());
675 debug_assert!(dst as usize % mem::align_of::<T>() == 0);
676
677 // In atomic_store, the panic *after* the first store operation is unsound
678 // because dst may become an invalid bit pattern.
679 //
680 // Our code is written very carefully so as not to cause panic, but we
681 // will use additional guards just in case.
682 //
683 // Note:
684 // - If the compiler can understand at compile time that panic will
685 // never occur, this guard will be removed (as with no-panic).
686 // - atomic_load does not modify the data, so it does not have this requirement.
687 // - If an invalid ordering is passed, it will be panic *before* the
688 // first store operation, so is fine.
689 let guard = PanicGuard;
690
691 let val = ManuallyDrop::new(val); // Do not drop `val`.
692
693 if mem::size_of::<T>() == 0 {
694 mem::forget(guard);
695 return;
696 }
697
698 // Branch 1: If the alignment of `T` is less than usize, but `T` can be write as
699 // at least one or more usize, compute the align offset and write it
700 // like `(&[AtomicU8], &[AtomicUsize], &[AtomicU8])`.
701 if mem::align_of::<T>() < mem::align_of::<AtomicUsize>()
702 && mem::size_of::<T>() >= mem::size_of::<usize>() * 4
703 {
704 let mut state = store::StoreState::new(dst, &*val);
705 let offset = (dst as *mut u8).align_offset(mem::align_of::<AtomicUsize>());
706 // Note: align_offset may returns usize::MAX: https://github.com/rust-lang/rust/issues/62420
707 if state.remaining() >= offset {
708 // Store `offset` bytes per byte to align `state.dst`.
709 state.atomic_store_u8(offset);
710 debug_assert!(state.remaining() >= mem::size_of::<usize>());
711 // SAFETY:
712 // - align_offset succeeds and the `offset` bytes have been
713 // filled, so now `state.dst` is definitely aligned.
714 // - we've checked that the remaining bytes is greater than
715 // or equal to `size_of::<usize>()`.
716 //
717 // In this branch, the pointer to `state.src` is usually
718 // not properly aligned, so we use `atomic_store_usize_to_end`,
719 // which has no requirement for alignment of `state.src`.
720 unsafe {
721 state.atomic_store_usize_to_end();
722 }
723 // Store remaining bytes per byte.
724 state.atomic_store_u8(state.remaining());
725 debug_assert_eq!(state.remaining(), 0);
726 mem::forget(guard);
727 return;
728 }
729 }
730
731 // Branch 2: If the alignment of `T` is greater than or equal to usize,
732 // we can write it as a chunk of usize from the first byte.
733 if mem::align_of::<T>() >= mem::align_of::<AtomicUsize>() {
734 let src = &*val as *const T as *const usize;
735 let dst = dst as *const AtomicUsize;
736 for i in range(0..mem::size_of::<T>() / mem::size_of::<usize>()) {
737 // SAFETY:
738 // - the caller must guarantee that `dst` is properly aligned for `T`.
739 // - `T` has an alignment greater than or equal to usize.
740 // - the remaining bytes is greater than or equal to `size_of::<usize>()`.
741 unsafe {
742 let val: usize = src.add(i).read();
743 (*dst.add(i)).store(val, Ordering::Relaxed);
744 }
745 }
746 mem::forget(guard);
747 return;
748 }
749
750 #[cfg(not(target_pointer_width = "16"))]
751 {
752 // Branch 3: If the alignment of `T` is greater than or equal to u32,
753 // we can write it as a chunk of u32 from the first byte.
754 if mem::size_of::<usize>() > 4 && mem::align_of::<T>() >= mem::align_of::<AtomicU32>() {
755 let src = &*val as *const T as *const u32;
756 let dst = dst as *const AtomicU32;
757 for i in range(0..mem::size_of::<T>() / mem::size_of::<u32>()) {
758 // SAFETY:
759 // - the caller must guarantee that `dst` is properly aligned for `T`.
760 // - `T` has an alignment greater than or equal to u32.
761 // - the remaining bytes is greater than or equal to `size_of::<u32>()`.
762 unsafe {
763 let val: u32 = src.add(i).read();
764 (*dst.add(i)).store(val, Ordering::Relaxed);
765 }
766 }
767 mem::forget(guard);
768 return;
769 }
770 }
771
772 // Branch 4: If the alignment of `T` is greater than or equal to u16,
773 // we can write it as a chunk of u16 from the first byte.
774 if mem::size_of::<usize>() > 2 && mem::align_of::<T>() >= mem::align_of::<AtomicU16>() {
775 let src = &*val as *const T as *const u16;
776 let dst = dst as *const AtomicU16;
777 for i in range(0..mem::size_of::<T>() / mem::size_of::<u16>()) {
778 // SAFETY:
779 // - the caller must guarantee that `dst` is properly aligned for `T`.
780 // - `T` has an alignment greater than or equal to u16.
781 // - the remaining bytes is greater than or equal to `size_of::<u16>()`.
782 unsafe {
783 let val: u16 = src.add(i).read();
784 (*dst.add(i)).store(val, Ordering::Relaxed);
785 }
786 }
787 mem::forget(guard);
788 return;
789 }
790
791 // Branch 5: Otherwise, we write it per byte.
792 let mut state = store::StoreState::new(dst, &*val);
793 state.atomic_store_u8(state.remaining());
794 debug_assert_eq!(state.remaining(), 0);
795 mem::forget(guard);
796 }
797
798 // This allows read_volatile and atomic_load to be lowered to exactly the
799 // same assembly on little endian platforms such as aarch64, riscv64.
800 #[cfg_attr(feature = "inline-always", inline(always))]
801 #[cfg_attr(not(feature = "inline-always"), inline)]
802 #[cfg(target_endian = "little")]
803 fn range<T>(r: Range<T>) -> core::iter::Rev<Range<T>>
804 where
805 Range<T>: DoubleEndedIterator,
806 {
807 r.rev()
808 }
809 #[cfg_attr(feature = "inline-always", inline(always))]
810 #[cfg_attr(not(feature = "inline-always"), inline)]
811 #[cfg(target_endian = "big")]
812 fn range<T>(r: Range<T>) -> Range<T>
813 where
814 Range<T>: DoubleEndedIterator,
815 {
816 r
817 }
818
819 struct PanicGuard;
820
821 impl Drop for PanicGuard {
822 fn drop(&mut self) {
823 // This crate supports no-std environment, so we cannot use std::process::abort.
824 // Instead, it uses the nature of double panics being converted to an abort.
825 panic!("abort");
826 }
827 }
828}