warp_types/research/register_pressure.rs
1//! Register Pressure Tracking
2//!
3//! **STATUS: Research** — Exploratory prototype, not promoted to main API.
4//!
5//! Research question: "Can we track register pressure linearly?"
6//!
7//! # Background
8//!
9//! GPU register allocation is critical for performance:
10//! - Each thread has limited registers (64-255 depending on GPU)
11//! - More registers per thread = fewer concurrent threads (occupancy)
12//! - Spilling to local memory is very slow
13//!
14//! Can the type system track register usage to prevent spills?
15//!
16//! # Key Insight
17//!
18//! Linear types can track resources that must be used exactly once.
19//! Registers are such resources:
20//! - Allocated when variable is created
21//! - Freed when variable goes out of scope
22//! - Total usage must not exceed limit
23//!
24//! # Challenges
25//!
26//! 1. Register count depends on variable types (f32 = 1, f64 = 2, etc.)
27//! 2. Compiler may use more registers than source suggests (temporaries)
28//! 3. Control flow affects live ranges (different paths = different pressure)
29//!
30//! # Approach
31//!
32//! Track APPROXIMATE pressure using type-level natural numbers.
33//! This won't be exact but can catch obvious overuse.
34
35use std::marker::PhantomData;
36
37// ============================================================================
38// TYPE-LEVEL NATURAL NUMBERS (Peano)
39// ============================================================================
40
41/// Zero registers
42pub struct Z;
43
44/// Successor: S<N> = N + 1
45pub struct S<N>(PhantomData<N>);
46
47// Type aliases for convenience
48pub type N0 = Z;
49pub type N1 = S<N0>;
50pub type N2 = S<N1>;
51pub type N3 = S<N2>;
52pub type N4 = S<N3>;
53pub type N5 = S<N4>;
54pub type N8 = S<S<S<N5>>>; // 5 + 3 = 8
55pub type N16 = S<S<S<S<S<S<S<S<N8>>>>>>>>; // 8 + 8 = 16
56pub type N32 = S<S<S<S<S<S<S<S<S<S<S<S<S<S<S<S<N16>>>>>>>>>>>>>>>>; // 16 + 16 = 32
57
58// ============================================================================
59// REGISTER BUDGET
60// ============================================================================
61
62/// A register budget with N registers remaining
63pub struct Budget<N> {
64 _remaining: PhantomData<N>,
65}
66
67impl<N> Budget<N> {
68 pub fn new() -> Self {
69 Budget {
70 _remaining: PhantomData,
71 }
72 }
73}
74
75/// Trait for types that can be subtracted
76pub trait Sub<Rhs> {
77 type Output;
78}
79
80// N - 0 = N
81impl<N> Sub<Z> for N {
82 type Output = N;
83}
84
85// S<N> - S<M> = N - M (for N >= M)
86impl<N, M> Sub<S<M>> for S<N>
87where
88 N: Sub<M>,
89{
90 type Output = <N as Sub<M>>::Output;
91}
92
93// ============================================================================
94// REGISTER-TRACKED VALUES
95// ============================================================================
96
97/// A value that uses R registers
98pub struct Reg<T: Copy, R> {
99 value: T,
100 _registers: PhantomData<R>,
101}
102
103impl<T: Copy, R> Reg<T, R> {
104 /// Create a register-tracked value
105 pub fn new(value: T) -> Self {
106 Reg {
107 value,
108 _registers: PhantomData,
109 }
110 }
111
112 pub fn get(&self) -> T {
113 self.value
114 }
115}
116
117// Size of common types in registers
118pub trait RegisterSize {
119 type Size;
120}
121
122impl RegisterSize for i32 {
123 type Size = N1;
124}
125impl RegisterSize for u32 {
126 type Size = N1;
127}
128impl RegisterSize for f32 {
129 type Size = N1;
130}
131impl RegisterSize for i64 {
132 type Size = N2;
133}
134impl RegisterSize for u64 {
135 type Size = N2;
136}
137impl RegisterSize for f64 {
138 type Size = N2;
139}
140impl RegisterSize for bool {
141 type Size = N1;
142} // Predicate register
143
144// Arrays use N * element_size registers
145impl<T: RegisterSize, const N: usize> RegisterSize for [T; N]
146where
147// This would need type-level multiplication which is complex
148// Simplified: treat arrays as using many registers
149{
150 type Size = N32; // Conservative estimate
151}
152
153// ============================================================================
154// ALLOCATION AND DEALLOCATION
155// ============================================================================
156
157/// Allocate registers for a value
158pub fn alloc<T, R, B>(_budget: Budget<B>, value: T) -> (Budget<<B as Sub<R>>::Output>, Reg<T, R>)
159where
160 T: Copy + RegisterSize<Size = R>,
161 B: Sub<R>,
162{
163 (Budget::new(), Reg::new(value))
164}
165
166/// Frees one register slot (simplified — a full implementation would track R
167/// and add it back to B, but this prototype only increments by one).
168pub fn free<T: Copy, R, B>(_budget: Budget<B>, _reg: Reg<T, R>) -> Budget<S<B>>
169where
170 // This is simplified - real implementation would add R to B
171{
172 Budget::new()
173}
174
175// ============================================================================
176// SIMPLIFIED RUNTIME TRACKING
177// ============================================================================
178
179/// Runtime register tracker (simpler, doesn't use type-level numbers)
180pub mod runtime {
181 /// Track register usage at runtime
182 #[derive(Debug, Clone)]
183 pub struct RegisterTracker {
184 used: usize,
185 limit: usize,
186 peak: usize,
187 }
188
189 impl RegisterTracker {
190 pub fn new(limit: usize) -> Self {
191 RegisterTracker {
192 used: 0,
193 limit,
194 peak: 0,
195 }
196 }
197
198 /// Allocate registers, returns false if would exceed limit
199 pub fn alloc(&mut self, count: usize) -> bool {
200 if self.used + count > self.limit {
201 return false;
202 }
203 self.used += count;
204 self.peak = self.peak.max(self.used);
205 true
206 }
207
208 /// Free registers
209 pub fn free(&mut self, count: usize) {
210 self.used = self.used.saturating_sub(count);
211 }
212
213 pub fn used(&self) -> usize {
214 self.used
215 }
216 pub fn remaining(&self) -> usize {
217 self.limit - self.used
218 }
219 pub fn peak(&self) -> usize {
220 self.peak
221 }
222 }
223
224 /// A value tracked by a register tracker
225 pub struct TrackedValue<T> {
226 value: T,
227 reg_count: usize,
228 }
229
230 impl<T> TrackedValue<T> {
231 pub fn new(tracker: &mut RegisterTracker, value: T, reg_count: usize) -> Option<Self> {
232 if tracker.alloc(reg_count) {
233 Some(TrackedValue { value, reg_count })
234 } else {
235 None
236 }
237 }
238
239 pub fn get(&self) -> &T {
240 &self.value
241 }
242
243 pub fn free(self, tracker: &mut RegisterTracker) -> T {
244 tracker.free(self.reg_count);
245 self.value
246 }
247 }
248
249 #[cfg(test)]
250 mod tests {
251 use super::*;
252
253 #[test]
254 fn test_basic_tracking() {
255 let mut tracker = RegisterTracker::new(64);
256
257 assert_eq!(tracker.used(), 0);
258 assert_eq!(tracker.remaining(), 64);
259
260 // Allocate 10 registers
261 assert!(tracker.alloc(10));
262 assert_eq!(tracker.used(), 10);
263
264 // Allocate 50 more
265 assert!(tracker.alloc(50));
266 assert_eq!(tracker.used(), 60);
267
268 // Try to allocate 10 more (would exceed limit)
269 assert!(!tracker.alloc(10));
270 assert_eq!(tracker.used(), 60);
271
272 // Free some
273 tracker.free(20);
274 assert_eq!(tracker.used(), 40);
275
276 // Peak should be 60
277 assert_eq!(tracker.peak(), 60);
278 }
279
280 #[test]
281 fn test_tracked_values() {
282 let mut tracker = RegisterTracker::new(10);
283
284 // Allocate a value using 4 registers
285 let v1 = TrackedValue::new(&mut tracker, 42i32, 4).unwrap();
286 assert_eq!(tracker.used(), 4);
287
288 // Allocate another using 4
289 let v2 = TrackedValue::new(&mut tracker, 3.14f32, 4).unwrap();
290 assert_eq!(tracker.used(), 8);
291
292 // Can't allocate 4 more (8 + 4 > 10)
293 assert!(TrackedValue::new(&mut tracker, 0u64, 4).is_none());
294
295 // Free v1
296 let val1 = v1.free(&mut tracker);
297 assert_eq!(val1, 42);
298 assert_eq!(tracker.used(), 4);
299
300 // Now we can allocate
301 let _v3 = TrackedValue::new(&mut tracker, 0u64, 4).unwrap();
302 assert_eq!(tracker.used(), 8);
303
304 let _ = v2; // Use v2 to avoid warning
305 }
306 }
307}
308
309// ============================================================================
310// SUMMARY
311// ============================================================================
312
313/// Summary: Can we track register pressure linearly?
314///
315/// ## Answer: Partially, with limitations
316///
317/// ### What Works
318///
319/// 1. **Runtime tracking**: Track allocation/deallocation, enforce limits
320/// - Simple to implement
321/// - Catches actual overuse
322/// - Runtime overhead
323///
324/// 2. **Type-level approximation**: Use Peano numbers to track pressure
325/// - Zero runtime overhead
326/// - Catches some issues at compile time
327/// - Complex type signatures
328///
329/// ### Limitations
330///
331/// 1. **Compiler temporaries**: Compiler may use more registers than source
332/// 2. **Spilling decisions**: Compiler decides when to spill, not programmer
333/// 3. **Live ranges**: Optimal allocation requires whole-function analysis
334/// 4. **Branching**: Different branches may have different pressure
335///
336/// ### Practical Approach
337///
338/// 1. Use runtime tracking during development to profile pressure
339/// 2. Use type-level hints for hot paths (e.g., `#[max_registers(32)]`)
340/// 3. Trust compiler for final allocation, but guide it with annotations
341///
342/// ### Relation to Linear Types
343///
344/// Register tracking IS linear resource management:
345/// - Allocate: Use resource
346/// - Free: Return resource
347/// - Budget: Finite pool
348///
349/// But registers are FUNGIBLE (any register works for any value),
350/// unlike affine resources which have specific identities.
351///
352/// ### Verdict
353///
354/// Type-level register tracking is POSSIBLE but IMPRACTICAL for full precision.
355/// Runtime tracking + compiler hints is more realistic.
356/// Linear types help conceptually but exact tracking needs compiler support.
357pub const _SUMMARY: () = ();
358
359#[cfg(test)]
360mod type_level_tests {
361 use super::*;
362
363 #[test]
364 fn test_type_level_allocation() {
365 // Start with 5 register budget
366 let budget: Budget<N5> = Budget::new();
367
368 // Allocate an i32 (1 register)
369 let (budget, reg1): (Budget<N4>, Reg<i32, N1>) = alloc(budget, 42i32);
370
371 // Allocate another i32
372 let (budget, reg2): (Budget<N3>, Reg<i32, N1>) = alloc(budget, 10i32);
373
374 // Use the values
375 assert_eq!(reg1.get() + reg2.get(), 52);
376
377 // Budget is now N3 (started with N5, used N2)
378 let _: Budget<N3> = budget;
379 }
380}