trueno/tuner/features/
mod.rs1#![allow(missing_docs)]
2mod builder;
7mod extractor;
8
9pub use builder::TunerFeaturesBuilder;
10pub use extractor::{FeatureExtractor, RunConfig};
11
12use serde::{Deserialize, Serialize};
13
14use super::error::TunerError;
15use super::types::BottleneckClass;
16
17#[derive(Debug, Clone, Serialize, Deserialize)]
34pub struct TunerFeatures {
35 pub model_params_b: f32,
38 pub hidden_dim_norm: f32,
40 pub num_layers_norm: f32,
42 pub num_heads_norm: f32,
44 pub head_dim_norm: f32,
46 pub vocab_size_log: f32,
48 pub batch_size_norm: f32,
50 pub seq_len_log: f32,
52 pub cuda_graphs: f32,
54 pub kv_cache_ratio: f32,
56 pub is_prefill: f32,
58
59 pub quant_type_onehot: [f32; 8],
61
62 pub kernel_type_onehot: [f32; 16],
64
65 pub gpu_mem_bw_norm: f32,
68 pub gpu_compute_norm: f32,
70 pub gpu_sm_norm: f32,
72 pub gpu_l2_cache_norm: f32,
74 pub is_zero_copy: f32,
76
77 pub arithmetic_intensity: f32,
80 pub theoretical_efficiency: f32,
82
83 #[serde(skip_serializing_if = "Option::is_none")]
86 pub measured_tps: Option<f32>,
87 #[serde(skip_serializing_if = "Option::is_none")]
89 pub best_kernel_id: Option<u8>,
90 #[serde(skip_serializing_if = "Option::is_none")]
92 pub bottleneck_class: Option<BottleneckClass>,
93}
94
95impl Default for TunerFeatures {
96 fn default() -> Self {
97 Self {
98 model_params_b: 0.0,
99 hidden_dim_norm: 0.0,
100 num_layers_norm: 0.0,
101 num_heads_norm: 0.0,
102 head_dim_norm: 0.0,
103 vocab_size_log: 0.0,
104 batch_size_norm: 0.0,
105 seq_len_log: 0.0,
106 cuda_graphs: 0.0,
107 kv_cache_ratio: 1.0,
108 is_prefill: 0.0,
109 quant_type_onehot: [0.0; 8],
110 kernel_type_onehot: [0.0; 16],
111 gpu_mem_bw_norm: 0.0,
112 gpu_compute_norm: 0.0,
113 gpu_sm_norm: 0.0,
114 gpu_l2_cache_norm: 0.0,
115 is_zero_copy: 0.0,
116 arithmetic_intensity: 0.0,
117 theoretical_efficiency: 0.0,
118 measured_tps: None,
119 best_kernel_id: None,
120 bottleneck_class: None,
121 }
122 }
123}
124
125impl TunerFeatures {
126 pub const DIM: usize = 11 + 8 + 16 + 5 + 2; pub fn builder() -> TunerFeaturesBuilder {
132 TunerFeaturesBuilder::default()
133 }
134
135 pub fn to_array(&self) -> [f32; Self::DIM] {
138 let mut a = [0.0f32; Self::DIM];
139 let mut i = 0;
140
141 a[i] = self.model_params_b;
143 i += 1;
144 a[i] = self.hidden_dim_norm;
145 i += 1;
146 a[i] = self.num_layers_norm;
147 i += 1;
148 a[i] = self.num_heads_norm;
149 i += 1;
150 a[i] = self.head_dim_norm;
151 i += 1;
152 a[i] = self.vocab_size_log;
153 i += 1;
154 a[i] = self.batch_size_norm;
155 i += 1;
156 a[i] = self.seq_len_log;
157 i += 1;
158 a[i] = self.cuda_graphs;
159 i += 1;
160 a[i] = self.kv_cache_ratio;
161 i += 1;
162 a[i] = self.is_prefill;
163 i += 1;
164
165 a[i..i + 8].copy_from_slice(&self.quant_type_onehot);
167 i += 8;
168 a[i..i + 16].copy_from_slice(&self.kernel_type_onehot);
169 i += 16;
170
171 a[i] = self.gpu_mem_bw_norm;
173 i += 1;
174 a[i] = self.gpu_compute_norm;
175 i += 1;
176 a[i] = self.gpu_sm_norm;
177 i += 1;
178 a[i] = self.gpu_l2_cache_norm;
179 i += 1;
180 a[i] = self.is_zero_copy;
181 i += 1;
182
183 a[i] = self.arithmetic_intensity;
185 i += 1;
186 a[i] = self.theoretical_efficiency;
187
188 a
189 }
190
191 pub fn to_vector(&self) -> Vec<f32> {
193 self.to_array().to_vec()
194 }
195
196 pub fn validate(&self) -> Result<(), TunerError> {
198 let v = self.to_array();
199
200 if v.iter().any(|x| x.is_nan()) {
202 return Err(TunerError::InvalidFeature("NaN value in features".into()));
203 }
204
205 if v.iter().any(|x| x.is_infinite()) {
207 return Err(TunerError::InvalidFeature("Infinite value in features".into()));
208 }
209
210 if v.iter().any(|x| *x < -0.001 || *x > 1.001) {
212 return Err(TunerError::InvalidFeature("Feature value outside [0, 1]".into()));
213 }
214
215 let quant_sum: f32 = self.quant_type_onehot.iter().sum();
217 if (quant_sum - 1.0).abs() > 0.001 && quant_sum > 0.001 {
218 return Err(TunerError::InvalidFeature("Quant one-hot does not sum to 1".into()));
219 }
220
221 let kernel_sum: f32 = self.kernel_type_onehot.iter().sum();
222 if (kernel_sum - 1.0).abs() > 0.001 && kernel_sum > 0.001 {
223 return Err(TunerError::InvalidFeature("Kernel one-hot does not sum to 1".into()));
224 }
225
226 Ok(())
227 }
228}