padlock_core/analysis/impact.rs
1// padlock-core/src/analysis/impact.rs
2//
3// Estimates the concrete memory and cache-line impact of struct layout
4// inefficiencies at different instance-count scales.
5//
6// All estimates are approximations intended to give engineers a concrete sense
7// of magnitude, not cycle-accurate benchmarks.
8
9/// Estimated impact of a struct layout inefficiency at various scales.
10///
11/// Built from `estimate_impact`. All "extra" figures are relative to the
12/// layout produced by `reorder::reorder_savings` (the optimal field order).
13#[derive(Debug, Clone, PartialEq)]
14pub struct ImpactEstimate {
15 /// Bytes saved per instance by applying the optimal field ordering.
16 pub savings_per_instance: usize,
17 /// Cache-line size used for this estimate (bytes).
18 pub cache_line_size: usize,
19 /// Cache lines occupied by the *current* layout per instance.
20 pub current_cache_lines: usize,
21 /// Cache lines that would be occupied by the *optimal* layout per instance.
22 pub optimal_cache_lines: usize,
23 /// Extra bytes across 1 000 instances (`savings × 1 000`).
24 pub extra_bytes_1k: usize,
25 /// Extra bytes across 1 000 000 instances (`savings × 1 000 000`).
26 pub extra_bytes_1m: usize,
27 /// Approximate extra cache lines loaded for a sequential scan of 1 000 instances.
28 pub extra_cache_lines_1k: usize,
29 /// Approximate extra cache lines loaded for a sequential scan of 1 000 000 instances.
30 pub extra_cache_lines_1m: usize,
31}
32
33impl ImpactEstimate {
34 /// Returns `true` when the current layout occupies more cache lines per
35 /// instance than the optimal layout, meaning a reorder would reduce
36 /// cache-line crossings on random access.
37 pub fn reduces_cache_line_crossings(&self) -> bool {
38 self.current_cache_lines > self.optimal_cache_lines
39 }
40}
41
42/// Compute the impact estimate for a struct layout inefficiency.
43///
44/// # Parameters
45/// - `savings` — bytes saved per instance by reordering (from `reorder::reorder_savings`)
46/// - `current_size` — current total struct size in bytes
47/// - `optimal_size` — total size after optimal reordering
48/// - `cache_line` — cache-line size in bytes (typically 64; use `ArchConfig.cache_line_size`)
49///
50/// # Example
51/// ```
52/// use padlock_core::analysis::impact::estimate_impact;
53///
54/// // 24-byte struct that can shrink to 16 bytes by reordering
55/// let est = estimate_impact(8, 24, 16, 64);
56/// assert_eq!(est.savings_per_instance, 8);
57/// assert_eq!(est.extra_bytes_1m, 8_000_000);
58/// assert_eq!(est.extra_cache_lines_1m, 125_000);
59/// assert!(!est.reduces_cache_line_crossings()); // both fit in one cache line
60/// ```
61pub fn estimate_impact(
62 savings: usize,
63 current_size: usize,
64 optimal_size: usize,
65 cache_line: usize,
66) -> ImpactEstimate {
67 let cl = cache_line.max(1);
68 let current_cache_lines = current_size.div_ceil(cl);
69 let optimal_cache_lines = optimal_size.div_ceil(cl);
70
71 ImpactEstimate {
72 savings_per_instance: savings,
73 cache_line_size: cl,
74 current_cache_lines,
75 optimal_cache_lines,
76 extra_bytes_1k: savings * 1_000,
77 extra_bytes_1m: savings * 1_000_000,
78 // Ceiling division: conservative estimate (rounds up)
79 extra_cache_lines_1k: (savings * 1_000).div_ceil(cl),
80 extra_cache_lines_1m: (savings * 1_000_000).div_ceil(cl),
81 }
82}
83
84// ── tests ─────────────────────────────────────────────────────────────────────
85
86#[cfg(test)]
87mod tests {
88 use super::*;
89
90 #[test]
91 fn connection_layout_impact() {
92 // 24-byte → 16-byte, cache line 64 (both fit in one cache line)
93 let est = estimate_impact(8, 24, 16, 64);
94 assert_eq!(est.savings_per_instance, 8);
95 assert_eq!(est.cache_line_size, 64);
96 assert_eq!(est.current_cache_lines, 1);
97 assert_eq!(est.optimal_cache_lines, 1);
98 assert!(!est.reduces_cache_line_crossings());
99 assert_eq!(est.extra_bytes_1k, 8_000);
100 assert_eq!(est.extra_bytes_1m, 8_000_000);
101 assert_eq!(est.extra_cache_lines_1k, 125);
102 assert_eq!(est.extra_cache_lines_1m, 125_000);
103 }
104
105 #[test]
106 fn large_struct_reduces_cache_line_crossings() {
107 // 128-byte → 64-byte: spans 2 cache lines, optimal spans 1
108 let est = estimate_impact(64, 128, 64, 64);
109 assert_eq!(est.current_cache_lines, 2);
110 assert_eq!(est.optimal_cache_lines, 1);
111 assert!(est.reduces_cache_line_crossings());
112 assert_eq!(est.extra_bytes_1m, 64_000_000);
113 assert_eq!(est.extra_cache_lines_1m, 1_000_000);
114 }
115
116 #[test]
117 fn zero_savings_produces_zero_impact() {
118 let est = estimate_impact(0, 16, 16, 64);
119 assert_eq!(est.savings_per_instance, 0);
120 assert_eq!(est.extra_bytes_1k, 0);
121 assert_eq!(est.extra_bytes_1m, 0);
122 assert_eq!(est.extra_cache_lines_1k, 0);
123 assert_eq!(est.extra_cache_lines_1m, 0);
124 assert!(!est.reduces_cache_line_crossings());
125 }
126
127 #[test]
128 fn apple_silicon_128_byte_cache_line() {
129 // Apple M-series: 128-byte cache line
130 let est = estimate_impact(8, 24, 16, 128);
131 assert_eq!(est.cache_line_size, 128);
132 assert_eq!(est.current_cache_lines, 1);
133 assert_eq!(est.optimal_cache_lines, 1);
134 // Sequential scan: 8 * 1M / 128 = 62500
135 assert_eq!(est.extra_cache_lines_1m, 62_500);
136 }
137
138 #[test]
139 fn struct_spanning_boundary_in_current_but_not_optimal() {
140 // 72-byte struct (spans 2 cache lines of 64) → optimal 64 bytes (fits in 1)
141 let est = estimate_impact(8, 72, 64, 64);
142 assert_eq!(est.current_cache_lines, 2); // ceil(72/64) = 2
143 assert_eq!(est.optimal_cache_lines, 1); // ceil(64/64) = 1
144 assert!(est.reduces_cache_line_crossings());
145 }
146
147 #[test]
148 fn small_savings_cache_lines_round_up() {
149 // 1 byte savings × 1000 instances = 1000 bytes; 1000/64 rounds up to 16
150 let est = estimate_impact(1, 8, 7, 64);
151 assert_eq!(est.extra_cache_lines_1k, 16); // ceil(1000/64)
152 }
153}