Skip to main content

locality/
locality.rs

1/*! Observe the L1 data cache hit rate under two different access patterns.
2
3This example measures L1 data cache hit rate and prefetch counts while
4accessing a 40MB array linearly, and then randomly.
5
6One surprising finding is that, even though the loop accessing the
7array is extremely simple, a `dev` build performs seven times as many
8reads as a `release` build. Furthermore, in a `dev` build, the L1 data
9cache still manages to achieve an 85% hit rate even under the random
10access pattern, which should be completely uncacheable.
11
12This suggests that the machine code for a `dev` build generates a lot
13of extraneous memory traffic, but the cache is able to cover for most
14of it. This would seem to render `dev` builds unsuitable for assessing
15cache behavior.
16
17    $ cargo run --quiet --example locality
18    linear: hits / reads: 68791796 / 70043629  98.21%, prefetched  1250129
19    random: hits / reads: 60639316 / 70642773  85.84%, prefetched       84
20
21    $ cargo run --quiet --example locality --release
22    linear: hits / reads:  8750124 / 10000528  87.50%, prefetched  1249700
23    random: hits / reads:    11500 / 10009804   0.11%, prefetched      149
24
25*/
26
27fn main() {
28    use std::hint::black_box;
29
30    const SIZE: usize = 10_000_000;
31
32    // Build a vector that `walk` will traverse from start to end.
33    let mut vec: Vec<usize> = (1..SIZE).chain(Some(0)).collect();
34    measure("linear", || {
35        black_box(walk(&vec));
36    });
37
38    let mut random = XorShift128Plus::from_seed(1729, 42);
39    random.nth(100); // Propagate the 1-bits in the state a bit.
40
41    // Build a vector that `walk` will access randomly.
42    //
43    // This turns `vec` into a single chain that visits every element.
44    // Proof left to the reader.
45    vec.clear();
46    vec.extend(0..SIZE);
47    let mut rest = &mut vec[..];
48    while let Some((first, next @ &mut [_, ..])) = rest.split_first_mut() {
49        let swap_with = random.next().unwrap() as usize % next.len();
50        std::mem::swap(first, &mut next[swap_with]);
51        rest = next;
52    }
53
54    measure("random", || {
55        black_box(walk(&vec));
56    });
57}
58
59/// Access elements of `indices`, as guided by its contents.
60///
61/// Treating each element of `indices` as the index of the next
62/// element to visit, start with `indices[0]` and follow the path
63/// until we get back to `0`.
64///
65/// Note that the access pattern depends solely on the slice's
66/// contents, not on control flow. The caller can produce whatever
67/// access pattern it wants by choosing the contents of `indices`
68/// appropriately.
69///
70/// Return the number of steps needed to return to index 0.
71fn walk(indices: &[usize]) -> usize {
72    let mut count = 0;
73    let mut i = 0;
74    loop {
75        count += 1;
76        i = indices[i];
77        if i == 0 {
78            break;
79        }
80    }
81    count
82}
83
84fn measure(label: &str, task: impl FnOnce()) {
85    use perf_event::events::{Cache, CacheOp, CacheResult, WhichCache};
86    use perf_event::{Builder, Group};
87
88    let mut group = Group::new().expect("creating group is ok");
89    let read_counter = Builder::new()
90        .group(&mut group)
91        .kind(Cache {
92            which: WhichCache::L1D,
93            operation: CacheOp::READ,
94            result: CacheResult::ACCESS,
95        })
96        .build()
97        .expect("building read_counter is ok");
98    let read_miss_counter = Builder::new()
99        .group(&mut group)
100        .kind(Cache {
101            which: WhichCache::L1D,
102            operation: CacheOp::READ,
103            result: CacheResult::MISS,
104        })
105        .build()
106        .expect("building read_miss_counter is ok");
107    let prefetch_counter = Builder::new()
108        .group(&mut group)
109        .kind(Cache {
110            which: WhichCache::L1D,
111            operation: CacheOp::PREFETCH,
112            result: CacheResult::ACCESS,
113        })
114        .build()
115        .expect("building prefetch_counter is ok");
116
117    group.enable().expect("enabling group is ok");
118    task();
119    group.disable().expect("disabling group is ok");
120
121    let counts = group.read().expect("reading group is ok");
122    let reads = counts[&read_counter];
123    let read_misses = counts[&read_miss_counter];
124    let read_hits = reads - read_misses;
125    let prefetches = counts[&prefetch_counter];
126
127    println!(
128        "{label}: hits / reads: {read_hits:8} / {reads:8} {:6.2}%, \
129         prefetched {prefetches:8}",
130        (read_hits as f64 / reads as f64) * 100.0,
131    );
132
133    if counts.time_enabled() != counts.time_running() {
134        println!(
135            "time enabled: {}  time running: {}",
136            counts.time_enabled(),
137            counts.time_running(),
138        );
139    }
140}
141
142/// The [XorShift128+] pseudorandom number generator.
143///
144/// This implements [`Iterator`], producing pseudorandom `u64` values
145/// as items.
146///
147/// [XorShift128+]: https://en.wikipedia.org/wiki/Xorshift#xorshift+
148struct XorShift128Plus {
149    state: [u64; 2],
150}
151
152impl XorShift128Plus {
153    fn from_seed(seed1: u64, seed2: u64) -> Self {
154        assert!(seed1 != 0 || seed2 != 0);
155        Self {
156            state: [seed1, seed2],
157        }
158    }
159}
160
161impl Iterator for XorShift128Plus {
162    type Item = u64;
163
164    fn next(&mut self) -> Option<Self::Item> {
165        let mut t = self.state[0];
166        let s = self.state[1];
167        self.state[0] = s;
168        t ^= t << 23;
169        t ^= t >> 18;
170        t ^= s ^ (s >> 5);
171        self.state[1] = t;
172        Some(t.wrapping_add(s))
173    }
174}