Skip to main content

warp_types/research/
register_pressure.rs

1//! Register Pressure Tracking
2//!
3//! **STATUS: Research** — Exploratory prototype, not promoted to main API.
4//!
5//! Research question: "Can we track register pressure linearly?"
6//!
7//! # Background
8//!
9//! GPU register allocation is critical for performance:
10//! - Each thread has limited registers (64-255 depending on GPU)
11//! - More registers per thread = fewer concurrent threads (occupancy)
12//! - Spilling to local memory is very slow
13//!
14//! Can the type system track register usage to prevent spills?
15//!
16//! # Key Insight
17//!
18//! Linear types can track resources that must be used exactly once.
19//! Registers are such resources:
20//! - Allocated when variable is created
21//! - Freed when variable goes out of scope
22//! - Total usage must not exceed limit
23//!
24//! # Challenges
25//!
26//! 1. Register count depends on variable types (f32 = 1, f64 = 2, etc.)
27//! 2. Compiler may use more registers than source suggests (temporaries)
28//! 3. Control flow affects live ranges (different paths = different pressure)
29//!
30//! # Approach
31//!
32//! Track APPROXIMATE pressure using type-level natural numbers.
33//! This won't be exact but can catch obvious overuse.
34
35use std::marker::PhantomData;
36
37// ============================================================================
38// TYPE-LEVEL NATURAL NUMBERS (Peano)
39// ============================================================================
40
41/// Zero registers
42pub struct Z;
43
44/// Successor: S<N> = N + 1
45pub struct S<N>(PhantomData<N>);
46
47// Type aliases for convenience
48pub type N0 = Z;
49pub type N1 = S<N0>;
50pub type N2 = S<N1>;
51pub type N3 = S<N2>;
52pub type N4 = S<N3>;
53pub type N5 = S<N4>;
54pub type N8 = S<S<S<N5>>>; // 5 + 3 = 8
55pub type N16 = S<S<S<S<S<S<S<S<N8>>>>>>>>; // 8 + 8 = 16
56pub type N32 = S<S<S<S<S<S<S<S<S<S<S<S<S<S<S<S<N16>>>>>>>>>>>>>>>>; // 16 + 16 = 32
57
58// ============================================================================
59// REGISTER BUDGET
60// ============================================================================
61
62/// A register budget with N registers remaining
63pub struct Budget<N> {
64    _remaining: PhantomData<N>,
65}
66
67impl<N> Budget<N> {
68    pub fn new() -> Self {
69        Budget {
70            _remaining: PhantomData,
71        }
72    }
73}
74
75/// Trait for types that can be subtracted
76pub trait Sub<Rhs> {
77    type Output;
78}
79
80// N - 0 = N
81impl<N> Sub<Z> for N {
82    type Output = N;
83}
84
85// S<N> - S<M> = N - M (for N >= M)
86impl<N, M> Sub<S<M>> for S<N>
87where
88    N: Sub<M>,
89{
90    type Output = <N as Sub<M>>::Output;
91}
92
93// ============================================================================
94// REGISTER-TRACKED VALUES
95// ============================================================================
96
97/// A value that uses R registers
98pub struct Reg<T: Copy, R> {
99    value: T,
100    _registers: PhantomData<R>,
101}
102
103impl<T: Copy, R> Reg<T, R> {
104    /// Create a register-tracked value
105    pub fn new(value: T) -> Self {
106        Reg {
107            value,
108            _registers: PhantomData,
109        }
110    }
111
112    pub fn get(&self) -> T {
113        self.value
114    }
115}
116
117// Size of common types in registers
118pub trait RegisterSize {
119    type Size;
120}
121
122impl RegisterSize for i32 {
123    type Size = N1;
124}
125impl RegisterSize for u32 {
126    type Size = N1;
127}
128impl RegisterSize for f32 {
129    type Size = N1;
130}
131impl RegisterSize for i64 {
132    type Size = N2;
133}
134impl RegisterSize for u64 {
135    type Size = N2;
136}
137impl RegisterSize for f64 {
138    type Size = N2;
139}
140impl RegisterSize for bool {
141    type Size = N1;
142} // Predicate register
143
144// Arrays use N * element_size registers
145impl<T: RegisterSize, const N: usize> RegisterSize for [T; N]
146where
147// This would need type-level multiplication which is complex
148// Simplified: treat arrays as using many registers
149{
150    type Size = N32; // Conservative estimate
151}
152
153// ============================================================================
154// ALLOCATION AND DEALLOCATION
155// ============================================================================
156
157/// Allocate registers for a value
158pub fn alloc<T, R, B>(_budget: Budget<B>, value: T) -> (Budget<<B as Sub<R>>::Output>, Reg<T, R>)
159where
160    T: Copy + RegisterSize<Size = R>,
161    B: Sub<R>,
162{
163    (Budget::new(), Reg::new(value))
164}
165
166/// Frees one register slot (simplified — a full implementation would track R
167/// and add it back to B, but this prototype only increments by one).
168pub fn free<T: Copy, R, B>(_budget: Budget<B>, _reg: Reg<T, R>) -> Budget<S<B>>
169where
170    // This is simplified - real implementation would add R to B
171{
172    Budget::new()
173}
174
175// ============================================================================
176// SIMPLIFIED RUNTIME TRACKING
177// ============================================================================
178
179/// Runtime register tracker (simpler, doesn't use type-level numbers)
180pub mod runtime {
181    /// Track register usage at runtime
182    #[derive(Debug, Clone)]
183    pub struct RegisterTracker {
184        used: usize,
185        limit: usize,
186        peak: usize,
187    }
188
189    impl RegisterTracker {
190        pub fn new(limit: usize) -> Self {
191            RegisterTracker {
192                used: 0,
193                limit,
194                peak: 0,
195            }
196        }
197
198        /// Allocate registers, returns false if would exceed limit
199        pub fn alloc(&mut self, count: usize) -> bool {
200            if self.used + count > self.limit {
201                return false;
202            }
203            self.used += count;
204            self.peak = self.peak.max(self.used);
205            true
206        }
207
208        /// Free registers
209        pub fn free(&mut self, count: usize) {
210            self.used = self.used.saturating_sub(count);
211        }
212
213        pub fn used(&self) -> usize {
214            self.used
215        }
216        pub fn remaining(&self) -> usize {
217            self.limit - self.used
218        }
219        pub fn peak(&self) -> usize {
220            self.peak
221        }
222    }
223
224    /// A value tracked by a register tracker
225    pub struct TrackedValue<T> {
226        value: T,
227        reg_count: usize,
228    }
229
230    impl<T> TrackedValue<T> {
231        pub fn new(tracker: &mut RegisterTracker, value: T, reg_count: usize) -> Option<Self> {
232            if tracker.alloc(reg_count) {
233                Some(TrackedValue { value, reg_count })
234            } else {
235                None
236            }
237        }
238
239        pub fn get(&self) -> &T {
240            &self.value
241        }
242
243        pub fn free(self, tracker: &mut RegisterTracker) -> T {
244            tracker.free(self.reg_count);
245            self.value
246        }
247    }
248
249    #[cfg(test)]
250    mod tests {
251        use super::*;
252
253        #[test]
254        fn test_basic_tracking() {
255            let mut tracker = RegisterTracker::new(64);
256
257            assert_eq!(tracker.used(), 0);
258            assert_eq!(tracker.remaining(), 64);
259
260            // Allocate 10 registers
261            assert!(tracker.alloc(10));
262            assert_eq!(tracker.used(), 10);
263
264            // Allocate 50 more
265            assert!(tracker.alloc(50));
266            assert_eq!(tracker.used(), 60);
267
268            // Try to allocate 10 more (would exceed limit)
269            assert!(!tracker.alloc(10));
270            assert_eq!(tracker.used(), 60);
271
272            // Free some
273            tracker.free(20);
274            assert_eq!(tracker.used(), 40);
275
276            // Peak should be 60
277            assert_eq!(tracker.peak(), 60);
278        }
279
280        #[test]
281        fn test_tracked_values() {
282            let mut tracker = RegisterTracker::new(10);
283
284            // Allocate a value using 4 registers
285            let v1 = TrackedValue::new(&mut tracker, 42i32, 4).unwrap();
286            assert_eq!(tracker.used(), 4);
287
288            // Allocate another using 4
289            let v2 = TrackedValue::new(&mut tracker, 3.14f32, 4).unwrap();
290            assert_eq!(tracker.used(), 8);
291
292            // Can't allocate 4 more (8 + 4 > 10)
293            assert!(TrackedValue::new(&mut tracker, 0u64, 4).is_none());
294
295            // Free v1
296            let val1 = v1.free(&mut tracker);
297            assert_eq!(val1, 42);
298            assert_eq!(tracker.used(), 4);
299
300            // Now we can allocate
301            let _v3 = TrackedValue::new(&mut tracker, 0u64, 4).unwrap();
302            assert_eq!(tracker.used(), 8);
303
304            let _ = v2; // Use v2 to avoid warning
305        }
306    }
307}
308
309// ============================================================================
310// SUMMARY
311// ============================================================================
312
313/// Summary: Can we track register pressure linearly?
314///
315/// ## Answer: Partially, with limitations
316///
317/// ### What Works
318///
319/// 1. **Runtime tracking**: Track allocation/deallocation, enforce limits
320///    - Simple to implement
321///    - Catches actual overuse
322///    - Runtime overhead
323///
324/// 2. **Type-level approximation**: Use Peano numbers to track pressure
325///    - Zero runtime overhead
326///    - Catches some issues at compile time
327///    - Complex type signatures
328///
329/// ### Limitations
330///
331/// 1. **Compiler temporaries**: Compiler may use more registers than source
332/// 2. **Spilling decisions**: Compiler decides when to spill, not programmer
333/// 3. **Live ranges**: Optimal allocation requires whole-function analysis
334/// 4. **Branching**: Different branches may have different pressure
335///
336/// ### Practical Approach
337///
338/// 1. Use runtime tracking during development to profile pressure
339/// 2. Use type-level hints for hot paths (e.g., `#[max_registers(32)]`)
340/// 3. Trust compiler for final allocation, but guide it with annotations
341///
342/// ### Relation to Linear Types
343///
344/// Register tracking IS linear resource management:
345/// - Allocate: Use resource
346/// - Free: Return resource
347/// - Budget: Finite pool
348///
349/// But registers are FUNGIBLE (any register works for any value),
350/// unlike affine resources which have specific identities.
351///
352/// ### Verdict
353///
354/// Type-level register tracking is POSSIBLE but IMPRACTICAL for full precision.
355/// Runtime tracking + compiler hints is more realistic.
356/// Linear types help conceptually but exact tracking needs compiler support.
357pub const _SUMMARY: () = ();
358
359#[cfg(test)]
360mod type_level_tests {
361    use super::*;
362
363    #[test]
364    fn test_type_level_allocation() {
365        // Start with 5 register budget
366        let budget: Budget<N5> = Budget::new();
367
368        // Allocate an i32 (1 register)
369        let (budget, reg1): (Budget<N4>, Reg<i32, N1>) = alloc(budget, 42i32);
370
371        // Allocate another i32
372        let (budget, reg2): (Budget<N3>, Reg<i32, N1>) = alloc(budget, 10i32);
373
374        // Use the values
375        assert_eq!(reg1.get() + reg2.get(), 52);
376
377        // Budget is now N3 (started with N5, used N2)
378        let _: Budget<N3> = budget;
379    }
380}