1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442
//! An efficient implementation of concurrent (lockless) the PLRU cache replacement policy. //! //! # Cache replacement //! //! Cache replacement policies are used to determine which cache lines (buffers), that should be //! replaced because they're no longer "hot" (likely to be used in the near future). A number of //! different approaches exist. //! //! LRU is the name of the cache replacement policies which will replace the cache line which was //! used the longest time ago. //! //! PLRU is the name of the class of cache replacement deriving from LRU, but instead of using an //! exact measure they use an approximate measure of the recently used lines. This allows for much //! more efficient cache management (LRU cache management is notoriously slow) on the cost of //! possibly worse cache policies. PLRU caches are used in many major CPUs to maintain CPU cache //! lines. //! //! We use a specific version of PLRU: BPLRU or bit PLRU. Tree PLRU is an alternative, but it is //! not easy to make concurrent. //! //! # Usage //! //! ## Runtime defined number of cache lines //! //! This can be achieved through `plru::create()` unless the `no_std` feature is enabled. //! //! ## How fixed-size arrays are handled //! //! Our implementation is generic over fixed-size arrays (or heap allocated vectors) without //! relying on any external library. We abstract over `AsRef<[AtomicU64]>` in order to allow a wide //! varity of different forms of arrays. //! //! For convenience, we define a bunch of type aliases for various sizes of caches. //! //! # Implementation details //! //! This implementation of BPLRU makes use of atomic integers to store the bit flags, and thus make //! up a concurrent and entirely lockless cache manager with excellent performance. //! //! The cache lines are organized into 64-bit integers storing the bit flags ("hot" and "cold"). //! Each of these flags are called "bulks". Whenever a cache line is touched (used), its bit flag //! is said. //! //! Finding the cache line to replace is simply done by finding an unset bit in one of the bulks. //! If all cache lines in the bulk are hot, they're flipped so that they're all cold. This process //! is called cache inflation. //! //! In order to avoid double cache replacement, we use a counter which is used to maximize the time //! between the same bulk being used to find a cache line replacement. //! //! The algorithms are described in detail in the documentation of each method. //! //! # In short... //! //! In short, the algorithm works by cache lines holding an entry until their slot (bulk) is //! filled, which will (upon cache replacement) lead to every cache line in that bulk being assumed //! to be inactive until they reclaim their place. //! //! An analogy is parking lots. We only care about parking lot space if it is filled (if it isn't, //! we can just place our car, and we're good). When it's filled, we need to make sure that the //! owner of every car has been there in the last N hours (or: since the parking lot got filled) to //! avoid owners occupying places forever. When new cars come in and a car have been there for //! enough time, it have to be forcibly removed to open up new space. //! //! Now, think of each parking lot as a bulk. And each car as a cache line. //! //! # Code quality //! //! ✔ Well-tested //! //! ✔ Well-documented //! //! ✔ Examples //! //! ✔ Conforms to Rust's style guidelines //! //! ✔ Idiomatic Rust //! //! ✔ Production-quality #![cfg_attr(features = "no_std", no_std)] #![feature(integer_atomics)] #![warn(missing_docs)] extern crate core; use core::fmt; use core::sync::atomic::{self, AtomicU64, AtomicU8}; /// The atomic ordering we use throughout the code. const ORDERING: atomic::Ordering = atomic::Ordering::Relaxed; /// A micro cache (64 lines). pub type MicroCache = Cache<[AtomicU64; 1]>; /// A small cache (128 lines). pub type SmallCache = Cache<[AtomicU64; 2]>; /// A medium size cache (256 lines). pub type MediumCache = Cache<[AtomicU64; 4]>; /// A big cache (512 lines). pub type BigCache = Cache<[AtomicU64; 8]>; /// A huge cache (2048 lines). pub type HugeCache = Cache<[AtomicU64; 32]>; /// A dynamic cache. pub type DynamicCache = Cache<Box<[AtomicU64]>>; /// Create a heap allocated cache of a fixed (but dynamic) number of cache lines. /// /// Ideally, 64 should divide `len` to optimize space efficiency. #[cfg(not(features = "no_std"))] pub fn create(lines: usize) -> DynamicCache { // Unfortunately, `AtomicU64` doesn't implement `Clone`, so we cannot use the `vec![]` macro. // We need to manually construct our vector. // We divide it by 64 to get the target length of the vector, but instead of rounding down, we // ensure that we have the right length by using rounding up division through this simple trick. let len = (lines + 63) / 64; // Allocate a `len` capacity vector. let mut vec = Vec::with_capacity(len); // Repeat `len` times and push the value. for _ in 0..len { vec.push(AtomicU64::default()); } Cache::new(vec.into_boxed_slice()) } /// A pseudo-LRU cache tracker. /// /// This manages a set of cache lines (enumerated by `usize`) in an efficient manner, such that you /// can touch cache lines (mark usage) and efficiently find a cache line which can be replaced /// (unlikely to be used in the near future). #[derive(Default)] pub struct Cache<B> { /// The LRU bit flags representing the "recently used" lines. /// /// The bits are broken down into 64-bit "bulks", which allows us to handle them efficiently /// atomically. bulks: B, /// A counter which is incremented on every new `replace`. /// /// This is used to reduce the chance of choosing the same replacement cache line twice. counter: AtomicU8, } impl<B: AsRef<[AtomicU64]>> Cache<B> { /// Create a new cache based on some array. /// /// Generally, this should not be used unless you're building abstractions. You should likely /// use `Cache::<SomeType>::default()` or `plru::create()` instead. /// /// # Example /// /// ```rust /// use plru::Cache; /// /// Cache::new([Default::default(), Default::default()]); /// ``` pub fn new(bulks: B) -> Cache<B> { Cache { bulks: bulks, counter: AtomicU8::default(), } } /// Touch the n'th cache line. /// /// Whenever you modify/use the cache line, you should call `touch` in order to mark that it /// was recently used. /// /// Each cache line has a bitflag defining if it was recently used. This will set said flag. /// /// # Example /// /// ```rust /// let cache = plru::SmallCache::default(); /// /// cache.touch(10); /// assert!(cache.is_hot(10)); /// ``` pub fn touch(&self, n: usize) { // We OR our mask together with the bulk in order to set the bit in question. self.bulks.as_ref()[n / 64].fetch_or(1 << (n % 64), ORDERING); } /// Trash the n'th cache line. /// /// Trashing is generally only used if you _know_ that this line is not going to be used later /// on. /// /// Trashing will merely mark this line as cold, and hence be queued for replacement until it /// is used again. As such, it is not a terrible performance loss if you trash a line which is /// used later, and decision can be made heuristically ("this is likely not going to be used /// later again"). /// /// # Example /// /// ```rust /// let cache = plru::SmallCache::default(); /// /// cache.touch(10); /// assert!(cache.is_hot(10)); /// /// cache.trash(10); /// assert!(!cache.is_hot(10)); /// ``` pub fn trash(&self, n: usize) { // We use a mask and atomic AND in order to set the bit to zero. self.bulks.as_ref()[n / 64].fetch_and(!(1 << (n % 64)), ORDERING); } /// Find the approximately least recently used cache line to replace. /// /// A particular bulk is selected based on a counter incremented on every `replace` call. The /// first unset bit of this bulk determines the cold cache line we will return. If all the /// flags in the 64-bit bulk are set, the whole bulk will be reset to zero in order to inflate /// the cache. /// /// This is approximately the least-recently-used cache line. /// /// Note that it will not set the found bit. If you use the line right after requesting it, you /// still need to call `touch`. In fact, it will always return a cold line. /// /// You cannot rely on uniqueness of the output. It might return the same result twice, /// although it is unlikely. /// /// # Example /// /// ```rust /// let cache = plru::MediumCache::default(); /// cache.touch(10); /// cache.touch(20); /// cache.touch(1); /// /// assert_ne!(cache.replace(), 1); /// assert_ne!(cache.replace(), 20); /// assert_ne!(cache.replace(), 10); /// ``` pub fn replace(&self) -> usize { // In order to avoid returning the same result, we use a counter which wraps. Incrementing // this counter and using it to index the bulks maximizes the time spend inbetween the // allocations of the replacement line. let counter = self.counter.fetch_add(1, ORDERING) as usize % self.bulks.as_ref().len(); // Load the bulk. If it turns out that every bit is set, this bulk will be inflated by // setting zeroing the bulk, so that all cache lines in this bulk are marked cold. let bulk = self.bulks.as_ref()[counter].compare_and_swap(!0, 0, ORDERING); // Find the first set bit in the bulk. The modulo 64 here is a neat hack, which allows us // to eliminate a branch. In particular, the ffz will only return 64 if all bits are set, // which is only the case if the old bulk value was inflated. By doing modulo 64, we map 64 // to 0, which is not set if the bulk was inflated (all lines in the bulk are marked // "cold"). let ffz = (!bulk).trailing_zeros() % 64; // Calculate the place of the line. counter * 64 + ffz as usize } /// Find the number of cache lines in this cache. /// /// This is subject equal to the length of `B` mutliplied by 64. /// /// # Example /// /// ```rust /// assert_eq!(plru::create(10).len(), 64); /// assert_eq!(plru::create(64).len(), 64); /// assert_eq!(plru::create(65).len(), 128); /// ``` pub fn len(&self) -> usize { self.bulks.as_ref().len() * 64 } /// Is the n'th cache line hot? /// /// This returns a boolean defining if it has recently be used or not. `true` means that the /// cache line is registered as "hot" and `false` that it is registered as "cold". /// /// # Example /// /// ```rust /// let cache = plru::MicroCache::default(); /// cache.touch(2); /// /// assert!(cache.is_hot(2)); /// assert!(!cache.is_hot(3)); /// ``` pub fn is_hot(&self, n: usize) -> bool { // Load the bulk and mask it to find out if the bit is set. self.bulks.as_ref()[n / 64].load(ORDERING) & (1 << (n % 64)) != 0 } } impl<B: AsRef<[AtomicU64]>> fmt::Debug for Cache<B> { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { for i in self.bulks.as_ref() { write!(f, "{:b},", i.load(ORDERING))?; } Ok(()) } } #[cfg(test)] mod tests { #[test] #[cfg(not(features = "no_std"))] fn create() { assert_eq!(super::create(10).len(), 64); assert_eq!(super::create(64).len(), 64); assert_eq!(super::create(63).len(), 64); assert_eq!(super::create(1).len(), 64); assert_eq!(super::create(65).len(), 128); assert_eq!(super::create(128).len(), 128); assert_eq!(super::create(129).len(), 192); assert_eq!(super::create(192).len(), 192); assert_eq!(super::create(256).len(), 256); } #[test] fn default() { let cache = super::BigCache::default(); for i in 0..512 { assert!(!cache.is_hot(i)); } } #[test] fn inflation() { let cache = super::SmallCache::default(); // Touch all the cache lines in a bulk. cache.touch(0); for i in 1..64 { assert!(cache.is_hot(i - 1)); cache.touch(i); } // Find a replacement line. assert_eq!(cache.replace(), 0); // Since every line was hot before, inflation should have occured. Check that every line is // cold. for i in 0..64 { assert!(!cache.is_hot(i)); } } #[test] fn simple_replace() { let cache = super::SmallCache::default(); assert_eq!(cache.replace(), 0); assert_eq!(cache.replace(), 64); assert_eq!(cache.replace(), 0); assert_eq!(cache.replace(), 64); cache.touch(0); cache.touch(64); assert_eq!(cache.replace(), 1); assert_eq!(cache.replace(), 65); cache.touch(1); cache.touch(65); assert_eq!(cache.replace(), 2); assert_eq!(cache.replace(), 66); } #[test] fn replace_and_touch() { let cache = super::SmallCache::default(); for _ in 0..128 { let r = cache.replace(); assert!(!cache.is_hot(r)); cache.touch(r); assert!(cache.is_hot(r)); } } #[test] fn replace_touch_trash() { let cache = super::SmallCache::default(); for _ in 0..1000 { let r = cache.replace(); cache.touch(r); assert!(cache.is_hot(r)); cache.trash(r); assert!(!cache.is_hot(r)); } } #[test] fn replace_cold() { let cache = super::SmallCache::default(); for i in 0..1000 { let r = cache.replace(); assert!(!cache.is_hot(r)); if i % 2 == 0 { cache.touch(r); } } } #[test] fn trash_cold() { let cache = super::SmallCache::default(); for i in 0..128 { cache.trash(i); assert!(!cache.is_hot(i)); } } #[test] fn cache_sizes() { let a = super::MicroCache::default(); let b = super::SmallCache::default(); let c = super::MediumCache::default(); let d = super::BigCache::default(); let e = super::HugeCache::default(); assert!(a.len() < b.len()); assert!(b.len() < c.len()); assert!(c.len() < d.len()); assert!(d.len() < e.len()); } #[test] #[should_panic] fn line_oob() { let cache = super::SmallCache::default(); cache.touch(128); } }