1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
//! This module provides a function that forces an artificial data dependency
//! between two loads. Basically, the code:
//!
//! ```text
//! val = some_atomic.load(DepOrd);
//! val2_ref = &val2;
//! val2 ref ^= val;
//! val2_ref ^= val; // val_2 ref now is equal to &val2, but data depends on val
//! loaded_val2 = *val2_ref; // Is ordered-after val as if by consume ordering
//! ```
//! is executed. This can be far faster than fences on arm and
//! power architectures, since the ordering is a result of data dependencies in
//! the pipeline and not full-on fences. This still isn't free, since you must
//! wait for the previous load to finish but it's better than a fence
//!
//!
//! # Example:
//! ```
//! use std::sync::atomic::{AtomicUsize, Ordering};
//! use std::sync::{Arc, Barrier};
//! use std::thread;
//! use atomic_utilities::artificial_dep::{DepOrd, dependently};
//! let num_run = 1000000;
//! let atomic_val1 = Arc::new(AtomicUsize::new(0));
//! let atomic_val2 = Arc::new(AtomicUsize::new(0));
//! let start_bar = Arc::new(Barrier::new(2));
//! let atomic_valt1 = atomic_val1.clone();
//! let atomic_valt2 = atomic_val2.clone();
//! let start_bart = start_bar.clone();
//! let to_join = thread::spawn(move || {
//! start_bart.wait();
//! for i in 0..num_run {
//! atomic_valt2.store(i, Ordering::Relaxed);
//! atomic_valt1.store(i, Ordering::Release);
//! }
//! });
//!
//! start_bar.wait();
//! for _ in 0..num_run {
//! let val1_ld = atomic_val1.load(DepOrd);
//! let val2_ld = dependently(val1_ld, &atomic_val2,
//! |dep_ref| dep_ref.load(Ordering::Relaxed));
//! assert!(val2_ld >= val1_ld); // Can fail if val2_ld is ordered_before val1_ld
//! }
//! ```
/* Once this can be tested on a power machine it's good to go
#[cfg(all(any(target_arch = "powerpc", target_arch = "powerpc64"),
use_asm))]
mod artificial_dep_inner {
use std::sync::atomic::Ordering;
pub const DEPORD: Ordering = Ordering::Relaxed;
#[inline(always)]
pub fn false_dep<T>(myref: &T, val: usize) -> &T {
asm!("xor $1 $0 $0
xor $1 $0 $0"
: "+r" (myref)
: "r" (val));
myref
}
}*/
use Ordering;
/// The ordering that must be used for any load which has fake dependent operations
pub const DepOrd: Ordering = DEPORD;
/// Ensures that loads from the value myref are ordered after the load of val.
/// Val can be anything convertable to a usize, or any usize calculated from the
/// base load.
/// Ensures that loads from the value myref are ordered after the load of val.
/// Val can be anything convertable to a usize, or any usize calculated from the
/// base load.