oxigdal_gpu_advanced/gpu_ml/
mod.rs1mod compute;
4mod neural;
5#[cfg(test)]
6#[allow(clippy::panic)]
7mod tests;
8
9use crate::error::{GpuAdvancedError, Result};
10use oxigdal_gpu::GpuContext;
11use std::sync::Arc;
12use wgpu::util::DeviceExt;
13
14#[derive(Debug)]
16pub struct GpuBuffer {
17 buffer: wgpu::Buffer,
19 size: u64,
21}
22
23impl GpuBuffer {
24 pub fn buffer(&self) -> &wgpu::Buffer {
26 &self.buffer
27 }
28
29 pub fn size(&self) -> u64 {
31 self.size
32 }
33}
34
35#[derive(Debug, Clone)]
37pub enum LayerType {
38 Dense {
40 input_features: usize,
42 output_features: usize,
44 },
45 Conv2d {
47 input_channels: usize,
49 output_channels: usize,
51 kernel_size: usize,
53 },
54 BatchNorm {
56 num_features: usize,
58 epsilon: f32,
60 },
61 Pool2d {
63 pool_type: PoolType,
65 pool_size: usize,
67 stride: usize,
69 },
70 Activation {
72 activation: ActivationType,
74 },
75 Flatten,
77 Dropout {
79 _rate: f32,
81 },
82}
83
84pub struct GpuLayer {
86 pub(crate) layer_type: LayerType,
88 pub(crate) weights: Option<GpuBuffer>,
90 pub(crate) bias: Option<GpuBuffer>,
92 pub(crate) extra_params: Vec<GpuBuffer>,
94 pub(crate) pipeline: Option<wgpu::ComputePipeline>,
96 pub(crate) bind_group_layout: Option<wgpu::BindGroupLayout>,
98}
99
100impl GpuLayer {
101 pub fn layer_type(&self) -> &LayerType {
103 &self.layer_type
104 }
105
106 pub fn has_weights(&self) -> bool {
108 self.weights.is_some()
109 }
110
111 pub fn weights(&self) -> Option<&GpuBuffer> {
113 self.weights.as_ref()
114 }
115
116 pub fn bias(&self) -> Option<&GpuBuffer> {
118 self.bias.as_ref()
119 }
120
121 pub fn extra_params(&self) -> &[GpuBuffer] {
126 &self.extra_params
127 }
128
129 pub fn pipeline(&self) -> Option<&wgpu::ComputePipeline> {
134 self.pipeline.as_ref()
135 }
136
137 pub fn bind_group_layout(&self) -> Option<&wgpu::BindGroupLayout> {
142 self.bind_group_layout.as_ref()
143 }
144}
145
146pub struct GpuModel {
148 layers: Vec<GpuLayer>,
150 context: Arc<GpuContext>,
152 name: String,
154 input_shape: Vec<usize>,
156 output_shape: Vec<usize>,
158}
159
160impl GpuModel {
161 pub fn new(context: Arc<GpuContext>, name: impl Into<String>) -> Self {
163 Self {
164 layers: Vec::new(),
165 context,
166 name: name.into(),
167 input_shape: Vec::new(),
168 output_shape: Vec::new(),
169 }
170 }
171
172 pub fn with_input_shape(mut self, shape: Vec<usize>) -> Self {
174 self.input_shape = shape;
175 self
176 }
177
178 pub fn with_output_shape(mut self, shape: Vec<usize>) -> Self {
180 self.output_shape = shape;
181 self
182 }
183
184 pub fn add_dense_layer(
186 &mut self,
187 input_features: usize,
188 output_features: usize,
189 weights: &[f32],
190 bias: &[f32],
191 ) -> Result<()> {
192 let expected_weights = input_features * output_features;
194 if weights.len() != expected_weights {
195 return Err(GpuAdvancedError::invalid_parameter(format!(
196 "Dense layer weight size mismatch: expected {}, got {}",
197 expected_weights,
198 weights.len()
199 )));
200 }
201 if bias.len() != output_features {
202 return Err(GpuAdvancedError::invalid_parameter(format!(
203 "Dense layer bias size mismatch: expected {}, got {}",
204 output_features,
205 bias.len()
206 )));
207 }
208
209 let weights_buffer =
211 self.context
212 .device()
213 .create_buffer_init(&wgpu::util::BufferInitDescriptor {
214 label: Some("Dense Weights Buffer"),
215 contents: bytemuck::cast_slice(weights),
216 usage: wgpu::BufferUsages::STORAGE,
217 });
218
219 let bias_buffer =
220 self.context
221 .device()
222 .create_buffer_init(&wgpu::util::BufferInitDescriptor {
223 label: Some("Dense Bias Buffer"),
224 contents: bytemuck::cast_slice(bias),
225 usage: wgpu::BufferUsages::STORAGE,
226 });
227
228 let layer = GpuLayer {
229 layer_type: LayerType::Dense {
230 input_features,
231 output_features,
232 },
233 weights: Some(GpuBuffer {
234 buffer: weights_buffer,
235 size: std::mem::size_of_val(weights) as u64,
236 }),
237 bias: Some(GpuBuffer {
238 buffer: bias_buffer,
239 size: std::mem::size_of_val(bias) as u64,
240 }),
241 extra_params: Vec::new(),
242 pipeline: None,
243 bind_group_layout: None,
244 };
245
246 self.layers.push(layer);
247 Ok(())
248 }
249
250 pub fn add_activation_layer(&mut self, activation: ActivationType) {
252 let layer = GpuLayer {
253 layer_type: LayerType::Activation { activation },
254 weights: None,
255 bias: None,
256 extra_params: Vec::new(),
257 pipeline: None,
258 bind_group_layout: None,
259 };
260 self.layers.push(layer);
261 }
262
263 pub fn add_flatten_layer(&mut self) {
265 let layer = GpuLayer {
266 layer_type: LayerType::Flatten,
267 weights: None,
268 bias: None,
269 extra_params: Vec::new(),
270 pipeline: None,
271 bind_group_layout: None,
272 };
273 self.layers.push(layer);
274 }
275
276 pub fn num_layers(&self) -> usize {
278 self.layers.len()
279 }
280
281 pub fn name(&self) -> &str {
283 &self.name
284 }
285
286 pub fn input_shape(&self) -> &[usize] {
288 &self.input_shape
289 }
290
291 pub fn output_shape(&self) -> &[usize] {
293 &self.output_shape
294 }
295
296 pub fn context(&self) -> &Arc<GpuContext> {
298 &self.context
299 }
300
301 pub fn layers(&self) -> &[GpuLayer] {
303 &self.layers
304 }
305}
306
307pub struct GpuMlInference {
309 context: Arc<GpuContext>,
311 batch_size: usize,
313 mixed_precision: bool,
315 model: Option<GpuModel>,
317}
318
319impl GpuMlInference {
320 pub fn new(context: Arc<GpuContext>, batch_size: usize) -> Self {
322 Self {
323 context,
324 batch_size,
325 mixed_precision: false,
326 model: None,
327 }
328 }
329
330 pub fn with_mixed_precision(mut self, enabled: bool) -> Self {
332 self.mixed_precision = enabled;
333 self
334 }
335
336 pub fn load_model(&mut self, model: GpuModel) {
338 self.model = Some(model);
339 }
340
341 pub fn create_feedforward_model(
343 &mut self,
344 name: &str,
345 layer_sizes: &[usize],
346 weights: &[Vec<f32>],
347 biases: &[Vec<f32>],
348 activations: &[ActivationType],
349 ) -> Result<()> {
350 if layer_sizes.len() < 2 {
351 return Err(GpuAdvancedError::invalid_parameter(
352 "Model must have at least input and output layer",
353 ));
354 }
355
356 let num_layers = layer_sizes.len() - 1;
357 if weights.len() != num_layers || biases.len() != num_layers {
358 return Err(GpuAdvancedError::invalid_parameter(
359 "Number of weight/bias arrays must match number of layers",
360 ));
361 }
362
363 let mut model = GpuModel::new(Arc::clone(&self.context), name)
364 .with_input_shape(vec![layer_sizes[0]])
365 .with_output_shape(vec![layer_sizes[layer_sizes.len() - 1]]);
366
367 for i in 0..num_layers {
368 model.add_dense_layer(layer_sizes[i], layer_sizes[i + 1], &weights[i], &biases[i])?;
369
370 if i < activations.len() {
372 model.add_activation_layer(activations[i]);
373 }
374 }
375
376 self.model = Some(model);
377 Ok(())
378 }
379
380 pub async fn infer_batch(&self, inputs: &[Vec<f32>]) -> Result<Vec<Vec<f32>>> {
382 if inputs.is_empty() {
383 return Ok(Vec::new());
384 }
385
386 let mut results = Vec::with_capacity(inputs.len());
387
388 for chunk in inputs.chunks(self.batch_size) {
390 let batch_results = self.process_batch(chunk).await?;
391 results.extend(batch_results);
392 }
393
394 Ok(results)
395 }
396
397 async fn process_batch(&self, batch: &[Vec<f32>]) -> Result<Vec<Vec<f32>>> {
399 let model = self.model.as_ref().ok_or_else(|| {
401 GpuAdvancedError::MlInferenceError("No model loaded for inference".to_string())
402 })?;
403
404 if batch.is_empty() {
405 return Ok(Vec::new());
406 }
407
408 let input_size = model.input_shape().iter().product::<usize>();
410 for (idx, input) in batch.iter().enumerate() {
411 if input.len() != input_size {
412 return Err(GpuAdvancedError::invalid_parameter(format!(
413 "Input {} has wrong size: expected {}, got {}",
414 idx,
415 input_size,
416 input.len()
417 )));
418 }
419 }
420
421 let batch_size = batch.len();
422
423 let mut flat_input: Vec<f32> = Vec::with_capacity(batch_size * input_size);
425 for input in batch {
426 flat_input.extend_from_slice(input);
427 }
428
429 let mut current_data = flat_input;
431 let mut current_feature_size = input_size;
432
433 for layer in model.layers() {
434 match layer.layer_type() {
435 LayerType::Dense {
436 input_features,
437 output_features,
438 } => {
439 current_data = self
441 .execute_dense_layer(
442 ¤t_data,
443 layer,
444 batch_size,
445 *input_features,
446 *output_features,
447 )
448 .await?;
449 current_feature_size = *output_features;
450 }
451 LayerType::Activation { activation } => {
452 current_data = self.activation(¤t_data, *activation).await?;
454 }
455 LayerType::Flatten => {
456 continue;
458 }
459 LayerType::Dropout { .. } => {
460 continue;
462 }
463 _ => {
464 return Err(GpuAdvancedError::NotImplemented(format!(
466 "Layer type {:?} not yet supported in batched inference",
467 layer.layer_type()
468 )));
469 }
470 }
471 }
472
473 let output_size = current_feature_size;
475 let mut results = Vec::with_capacity(batch_size);
476 for i in 0..batch_size {
477 let start = i * output_size;
478 let end = start + output_size;
479 results.push(current_data[start..end].to_vec());
480 }
481
482 Ok(results)
483 }
484
485 pub async fn dynamic_batch(&self, inputs: Vec<Vec<f32>>) -> Result<Vec<Vec<f32>>> {
487 let mut size_groups: std::collections::HashMap<usize, Vec<Vec<f32>>> =
489 std::collections::HashMap::new();
490
491 for input in inputs {
492 size_groups.entry(input.len()).or_default().push(input);
493 }
494
495 let mut all_results = Vec::new();
496
497 for (_size, group) in size_groups {
499 let results = self.infer_batch(&group).await?;
500 all_results.extend(results);
501 }
502
503 Ok(all_results)
504 }
505
506 pub fn model(&self) -> Option<&GpuModel> {
508 self.model.as_ref()
509 }
510
511 pub fn has_model(&self) -> bool {
513 self.model.is_some()
514 }
515
516 pub fn batch_size(&self) -> usize {
518 self.batch_size
519 }
520
521 pub fn is_mixed_precision(&self) -> bool {
523 self.mixed_precision
524 }
525}
526
527#[derive(Debug, Clone, Copy)]
529pub enum ActivationType {
530 ReLU,
532 Sigmoid,
534 Tanh,
536 LeakyReLU(f32),
538}
539
540#[derive(Debug, Clone, Copy)]
542pub enum PoolType {
543 Max,
545 Average,
547}
548
549#[derive(Debug, Clone, Default)]
551pub struct InferenceStats {
552 pub total_inferences: u64,
554 pub total_batches: u64,
556 pub avg_batch_size: f64,
558 pub total_time_us: u64,
560 pub avg_time_per_sample_us: f64,
562}
563
564impl InferenceStats {
565 pub fn print(&self) {
567 println!("\nML Inference Statistics:");
568 println!(" Total inferences: {}", self.total_inferences);
569 println!(" Total batches: {}", self.total_batches);
570 println!(" Average batch size: {:.1}", self.avg_batch_size);
571 println!(" Total time: {} ms", self.total_time_us / 1000);
572 println!(
573 " Avg time per sample: {:.2} us",
574 self.avg_time_per_sample_us
575 );
576 }
577}