Skip to main content

trueno/brick/
budget.rs

1//! Token and byte performance budgets for ComputeBrick operations.
2
3/// Performance budget expressed in token terms.
4/// Aligns compute costs with LLM inference metrics.
5#[derive(Debug, Clone, Copy)]
6pub struct TokenBudget {
7    /// Latency budget per token (microseconds)
8    pub us_per_token: f64,
9    /// Throughput target (tokens/second)
10    pub tokens_per_sec: f64,
11    /// Batch size for amortization
12    pub batch_size: usize,
13}
14
15/// Performance budget for byte-oriented operations (compression, I/O).
16/// Use this for trueno-zram, disk I/O, network throughput, etc.
17///
18/// PMAT-452: Serializable for hardware.toml export.
19#[derive(Debug, Clone, Copy, serde::Serialize, serde::Deserialize)]
20pub struct ByteBudget {
21    /// Latency budget per page (microseconds)
22    pub us_per_page: f64,
23    /// Throughput target (GB/s)
24    pub gb_per_sec: f64,
25    /// Page size in bytes (default 4096)
26    pub page_size: usize,
27}
28
29impl Default for ByteBudget {
30    fn default() -> Self {
31        // Default: 25 GB/s (trueno-zram ZSTD target)
32        Self::from_throughput(25.0)
33    }
34}
35
36impl ByteBudget {
37    /// Create budget from throughput target (GB/s).
38    /// 25 GB/s = 0.16µs per 4KB page
39    pub fn from_throughput(gb_per_sec: f64) -> Self {
40        debug_assert!(
41            gb_per_sec > 0.0 && gb_per_sec.is_finite(),
42            "CB-BUDGET: throughput must be positive and finite, got {}",
43            gb_per_sec
44        );
45        let bytes_per_sec = gb_per_sec * 1e9;
46        let pages_per_sec = bytes_per_sec / 4096.0;
47        Self { us_per_page: 1_000_000.0 / pages_per_sec, gb_per_sec, page_size: 4096 }
48    }
49
50    /// Create budget from latency target (µs per page).
51    pub fn from_latency(us_per_page: f64) -> Self {
52        let pages_per_sec = 1_000_000.0 / us_per_page;
53        let bytes_per_sec = pages_per_sec * 4096.0;
54        Self { us_per_page, gb_per_sec: bytes_per_sec / 1e9, page_size: 4096 }
55    }
56
57    /// Set custom page size (e.g., 64KB for huge pages).
58    #[must_use]
59    pub fn with_page_size(mut self, page_size: usize) -> Self {
60        // Recalculate us_per_page based on new page size
61        let bytes_per_sec = self.gb_per_sec * 1e9;
62        let pages_per_sec = bytes_per_sec / page_size as f64;
63        self.us_per_page = 1_000_000.0 / pages_per_sec;
64        self.page_size = page_size;
65        self
66    }
67
68    /// Convert to TokenBudget (1 token = 1 page).
69    /// Useful for integrating byte workloads with token-centric monitoring.
70    pub fn to_token_budget(&self) -> TokenBudget {
71        TokenBudget {
72            us_per_token: self.us_per_page,
73            tokens_per_sec: 1_000_000.0 / self.us_per_page,
74            batch_size: 1,
75        }
76    }
77
78    /// Check if actual performance meets budget.
79    pub fn is_met(&self, actual_us_per_page: f64) -> bool {
80        actual_us_per_page <= self.us_per_page
81    }
82
83    /// Calculate budget utilization.
84    pub fn utilization(&self, actual_us_per_page: f64) -> f64 {
85        actual_us_per_page / self.us_per_page
86    }
87
88    /// Calculate actual throughput from latency.
89    pub fn throughput_from_latency(us_per_page: f64, page_size: usize) -> f64 {
90        let pages_per_sec = 1_000_000.0 / us_per_page;
91        pages_per_sec * page_size as f64 / 1e9
92    }
93}
94
95impl Default for TokenBudget {
96    fn default() -> Self {
97        // Default: 50µs/token = 20,000 tokens/sec
98        Self::from_latency(50.0)
99    }
100}
101
102impl TokenBudget {
103    /// Create budget from latency target.
104    /// 50µs/token = 20,000 tokens/sec
105    pub fn from_latency(us_per_token: f64) -> Self {
106        Self { us_per_token, tokens_per_sec: 1_000_000.0 / us_per_token, batch_size: 1 }
107    }
108
109    /// Create budget from throughput target.
110    /// 20,000 tokens/sec = 50µs/token
111    pub fn from_throughput(tokens_per_sec: f64) -> Self {
112        Self { us_per_token: 1_000_000.0 / tokens_per_sec, tokens_per_sec, batch_size: 1 }
113    }
114
115    /// Set batch size for amortization.
116    #[must_use]
117    pub fn with_batch_size(mut self, batch_size: usize) -> Self {
118        self.batch_size = batch_size.max(1);
119        self
120    }
121
122    /// Check if actual performance meets budget.
123    pub fn is_met(&self, actual_us_per_token: f64) -> bool {
124        actual_us_per_token <= self.us_per_token
125    }
126
127    /// Calculate budget utilization (0.0 = unused, 1.0 = exactly at budget, >1.0 = over budget).
128    pub fn utilization(&self, actual_us_per_token: f64) -> f64 {
129        actual_us_per_token / self.us_per_token
130    }
131}
132
133/// Result of ComputeBrick execution with token metrics.
134#[derive(Debug, Clone)]
135pub struct TokenResult<T> {
136    /// Computed output
137    pub output: T,
138    /// Number of tokens processed
139    pub tokens_processed: usize,
140    /// Actual latency (microseconds/token)
141    pub us_per_token: f64,
142    /// Actual throughput (tokens/second)
143    pub tokens_per_sec: f64,
144    /// Did we meet the budget?
145    pub budget_met: bool,
146    /// Budget utilization (0.0-1.0+ where 1.0 = exactly at budget)
147    pub budget_utilization: f64,
148}
149
150impl<T> TokenResult<T> {
151    /// Map the output to a new type.
152    pub fn map<U, F: FnOnce(T) -> U>(self, f: F) -> TokenResult<U> {
153        TokenResult {
154            output: f(self.output),
155            tokens_processed: self.tokens_processed,
156            us_per_token: self.us_per_token,
157            tokens_per_sec: self.tokens_per_sec,
158            budget_met: self.budget_met,
159            budget_utilization: self.budget_utilization,
160        }
161    }
162}