1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198
// Copyright 2021 UCLouvain // // Licensed under the Apache License, Version 2.0, <LICENSE-APACHE or // http://apache.org/licenses/LICENSE-2.0> or the MIT License <LICENSE-MIT or // http://opensource.org/licenses/MIT>, at your option. This file may not be // copied, modified, or distributed except according to those terms. // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #![deny(missing_docs)] //! Hytra //! A beast that eats your data from many threads. //! //! The main type in this library is [`TrAcc`], which allows you to accumulate data in a single //! variable from multiple threads extremely fast. A specialized version is [`TrAdder`], that //! contains an sum. //! //! Hytra has been inspired by Java's //! [`LongAccumulator`](https://docs.oracle.com/javase/8/docs/api/java/util/concurrent/atomic/LongAccumulator.html), //! [`DoubleAccumulator`](https://docs.oracle.com/javase/8/docs/api/java/util/concurrent/atomic/DoubleAccumulator.html), //! [`LongAdder`](https://docs.oracle.com/javase/8/docs/api/java/util/concurrent/atomic/LongAdder.html) //! and //! [`DoubleAdder`](https://docs.oracle.com/javase/8/docs/api/java/util/concurrent/atomic/DoubleAdder.html). //! //! [`TrAcc`]: struct.TrAcc.html //! [`TrAdder`]: struct.TrAdder.html use atomic::Atomic; use crossbeam_utils::CachePadded; use num_traits::Zero; use std::ops::Deref; use std::sync::atomic::Ordering; use thread_local::ThreadLocal; /// This is workaround for the fact that the Fn trait is not stable. /// We could have `TrAcc<T, F: Fn(T, T) -> T>`. However, since the `Fn` trait is not stable, this /// would not allow to have `TrAcc` for an accumulator other than a closure (which makes the type /// un-namable) or a function pointer (which means dynamic dispatch). /// The `FnAcc` is a custom trait that we use as a purpose-specific variant of `Fn(T, T) -> T`. pub trait FnAcc<T> { /// Call the function. fn call(&self, arg1: T, arg2: T) -> T; } impl<T, U: Fn(T, T) -> T> FnAcc<T> for U { fn call(&self, arg1: T, arg2: T) -> T { self(arg1, arg2) } } /// The threaded accumulator allows to accumulate data in a single state from multiple threads /// without contention, which allows performance to scale well with the number of /// thread/processors. /// /// The accumulation function must be associative an commutative, and the `identity` element must be /// the neutral element w.r.t. the accumulation function. /// /// The accumulated state can be any `Copy + Send` state, and the implementation uses atomic /// instructions if supported by the architecture for the size of `T` (in which case this /// datastructure is lock-free), and mutexes otherwise. /// /// This optimizes for accumulation speed, at the expense of increased memory usage (the state is /// replicated once for each thread) and cost of reading the accumulated state (which has to walk /// over the states of each thread). /// /// ```rust /// use hytra::TrAcc; /// let acc: TrAcc<i64, _> = TrAcc::new(|a, b| a*b, 1); /// let acc_ref = &acc; /// crossbeam_utils::thread::scope(|s| { /// for j in 1..=2 { /// s.spawn(move |_| { /// for i in 1..=3 { /// acc_ref.acc(i*j); /// } /// }); /// } /// }) /// .unwrap(); /// assert_eq!(acc.get(), (1*2*3)*((2*1)*(2*2)*(2*3))); /// ``` #[derive(Debug)] pub struct TrAcc<T: Copy + Send, F: FnAcc<T>> { state: ThreadLocal<CachePadded<Atomic<T>>>, acc_fn: F, identity: T, } impl<T: Copy + Send, F: Sync + FnAcc<T>> TrAcc<T, F> { /// Create a a `TrAcc`. pub fn new(acc_fn: F, identity: T) -> Self { Self { state: ThreadLocal::new(), acc_fn, identity, } } /// Accumulate `x`. If `state` is the current state, the new state is `fn_acc(state, x)`. /// /// This function has `Release` semantic w.r.t. the accumulator. pub fn acc(&self, x: T) { // Since writes to the thread-local are uncontented, we can have a relaxed load. let local_acc: &Atomic<T> = self .state .get_or(|| CachePadded::new(Atomic::new(self.identity))) .deref(); let res = self.acc_fn.call(local_acc.load(Ordering::Relaxed), x); // We use a release for the store such that it synchronizes with the acquire of the get // function. local_acc.store(res, Ordering::Release); } /// Return the current state. /// /// This function has `Acquire` semantic w.r.t. the accumulator. pub fn get(&self) -> T { // The Acquire ordering synchronizes with the acc function. return self .state .iter() .map(|x| x.load(Ordering::Acquire)) .fold(self.identity, |a, b| self.acc_fn.call(a, b)); } } #[derive(Debug)] struct Adder<T>(std::marker::PhantomData<fn() -> T>); impl<T: std::ops::Add<T, Output = T>> FnAcc<T> for Adder<T> { fn call(&self, arg1: T, arg2: T) -> T { <T as std::ops::Add>::add(arg1, arg2) } } /// The threaded add allows to increment and decrement an integer from multiple threads without /// contention, which allows performance to scale well with the number of /// thread/processors. `TrAdder` can wrap any primitive integer type. /// /// **Overflow behavior.** /// Overflow may occur if the sum of the increments in any subset of the threads overflows, even if /// the total leads to no overflow. Overflow semantic is the same as for primitive types (panic or /// wrapping). /// /// See [`TrAcc`] for a discussion of performance characteristics. /// /// ```rust /// use hytra::TrAdder; /// let adder: TrAdder<i64> = TrAdder::new(); /// crossbeam_utils::thread::scope(|s| { /// for _ in 0..10 { /// s.spawn(|_| { /// for _ in 0..10 { /// adder.inc(1); /// } /// }); /// } /// }) /// .unwrap(); /// assert_eq!(adder.get(), 100); /// ``` /// /// [`TrAcc`]: struct.ThreadLocal.html #[derive(Debug)] pub struct TrAdder<T: Copy + Zero + std::ops::Add<T, Output = T> + Send>(TrAcc<T, Adder<T>>); impl<T: Copy + Zero + std::ops::Add<T, Output = T> + Send> TrAdder<T> { /// Create a new `TrAdder` initialized to 0. pub fn new() -> Self { Self(TrAcc::new(Adder(Default::default()), T::zero())) } /// Increment the `TrAdder`. pub fn inc(&self, x: T) { self.0.acc(x); } /// Return the value of the `TrAdder`. pub fn get(&self) -> T { self.0.get() } } impl<T: Copy + Zero + std::ops::Add<T, Output = T> + Send> Default for TrAdder<T> { fn default() -> Self { Self::new() } } #[test] fn test_adder_single_thread() { let adder: TrAdder<i64> = TrAdder::new(); for i in 0..10i64 { assert_eq!(adder.get(), i); adder.inc(1); } }