Skip to main content

oxigdal_gpu_advanced/gpu_ml/
mod.rs

1//! GPU-accelerated ML inference for geospatial data.
2
3mod compute;
4mod neural;
5#[cfg(test)]
6#[allow(clippy::panic)]
7mod tests;
8
9use crate::error::{GpuAdvancedError, Result};
10use oxigdal_gpu::GpuContext;
11use std::sync::Arc;
12use wgpu::util::DeviceExt;
13
14/// GPU buffer handle for model data
15#[derive(Debug)]
16pub struct GpuBuffer {
17    /// The wgpu buffer
18    buffer: wgpu::Buffer,
19    /// Size in bytes
20    size: u64,
21}
22
23impl GpuBuffer {
24    /// Get the underlying wgpu buffer
25    pub fn buffer(&self) -> &wgpu::Buffer {
26        &self.buffer
27    }
28
29    /// Get buffer size in bytes
30    pub fn size(&self) -> u64 {
31        self.size
32    }
33}
34
35/// Model layer type
36#[derive(Debug, Clone)]
37pub enum LayerType {
38    /// Dense (fully connected) layer
39    Dense {
40        /// Input features
41        input_features: usize,
42        /// Output features
43        output_features: usize,
44    },
45    /// 2D Convolution layer
46    Conv2d {
47        /// Input channels
48        input_channels: usize,
49        /// Output channels
50        output_channels: usize,
51        /// Kernel size
52        kernel_size: usize,
53    },
54    /// Batch normalization layer
55    BatchNorm {
56        /// Number of features
57        num_features: usize,
58        /// Epsilon for numerical stability
59        epsilon: f32,
60    },
61    /// Pooling layer
62    Pool2d {
63        /// Pool type
64        pool_type: PoolType,
65        /// Pool size
66        pool_size: usize,
67        /// Stride
68        stride: usize,
69    },
70    /// Activation layer
71    Activation {
72        /// Activation type
73        activation: ActivationType,
74    },
75    /// Flatten layer
76    Flatten,
77    /// Dropout layer (inference mode - no-op)
78    Dropout {
79        /// Dropout rate (unused during inference)
80        _rate: f32,
81    },
82}
83
84/// GPU-resident model layer with weights
85pub struct GpuLayer {
86    /// Layer type
87    pub(crate) layer_type: LayerType,
88    /// Weight buffer (if applicable)
89    pub(crate) weights: Option<GpuBuffer>,
90    /// Bias buffer (if applicable)
91    pub(crate) bias: Option<GpuBuffer>,
92    /// Additional parameters (e.g., batch norm mean/var)
93    pub(crate) extra_params: Vec<GpuBuffer>,
94    /// Cached pipeline for this layer
95    pub(crate) pipeline: Option<wgpu::ComputePipeline>,
96    /// Bind group layout
97    pub(crate) bind_group_layout: Option<wgpu::BindGroupLayout>,
98}
99
100impl GpuLayer {
101    /// Get the layer type
102    pub fn layer_type(&self) -> &LayerType {
103        &self.layer_type
104    }
105
106    /// Check if this layer has trainable weights
107    pub fn has_weights(&self) -> bool {
108        self.weights.is_some()
109    }
110
111    /// Get the weights buffer if present
112    pub fn weights(&self) -> Option<&GpuBuffer> {
113        self.weights.as_ref()
114    }
115
116    /// Get the bias buffer if present
117    pub fn bias(&self) -> Option<&GpuBuffer> {
118        self.bias.as_ref()
119    }
120
121    /// Get extra parameters (e.g., batch norm mean/var/gamma/beta)
122    ///
123    /// These are additional GPU buffers needed by certain layer types
124    /// like BatchNorm that require more than just weights and bias.
125    pub fn extra_params(&self) -> &[GpuBuffer] {
126        &self.extra_params
127    }
128
129    /// Get the cached compute pipeline if present
130    ///
131    /// Pipelines can be cached to avoid recreation on each inference call.
132    /// This is used for performance optimization in repeated inference.
133    pub fn pipeline(&self) -> Option<&wgpu::ComputePipeline> {
134        self.pipeline.as_ref()
135    }
136
137    /// Get the cached bind group layout if present
138    ///
139    /// Bind group layouts define the structure of resources bound to shaders.
140    /// Caching these avoids recreation overhead during inference.
141    pub fn bind_group_layout(&self) -> Option<&wgpu::BindGroupLayout> {
142        self.bind_group_layout.as_ref()
143    }
144}
145
146/// GPU-resident model for inference
147pub struct GpuModel {
148    /// Model layers
149    layers: Vec<GpuLayer>,
150    /// GPU context
151    context: Arc<GpuContext>,
152    /// Model name
153    name: String,
154    /// Input shape (batch dimension excluded)
155    input_shape: Vec<usize>,
156    /// Output shape (batch dimension excluded)
157    output_shape: Vec<usize>,
158}
159
160impl GpuModel {
161    /// Create a new empty GPU model
162    pub fn new(context: Arc<GpuContext>, name: impl Into<String>) -> Self {
163        Self {
164            layers: Vec::new(),
165            context,
166            name: name.into(),
167            input_shape: Vec::new(),
168            output_shape: Vec::new(),
169        }
170    }
171
172    /// Set input shape
173    pub fn with_input_shape(mut self, shape: Vec<usize>) -> Self {
174        self.input_shape = shape;
175        self
176    }
177
178    /// Set output shape
179    pub fn with_output_shape(mut self, shape: Vec<usize>) -> Self {
180        self.output_shape = shape;
181        self
182    }
183
184    /// Add a dense layer with weights loaded to GPU
185    pub fn add_dense_layer(
186        &mut self,
187        input_features: usize,
188        output_features: usize,
189        weights: &[f32],
190        bias: &[f32],
191    ) -> Result<()> {
192        // Validate dimensions
193        let expected_weights = input_features * output_features;
194        if weights.len() != expected_weights {
195            return Err(GpuAdvancedError::invalid_parameter(format!(
196                "Dense layer weight size mismatch: expected {}, got {}",
197                expected_weights,
198                weights.len()
199            )));
200        }
201        if bias.len() != output_features {
202            return Err(GpuAdvancedError::invalid_parameter(format!(
203                "Dense layer bias size mismatch: expected {}, got {}",
204                output_features,
205                bias.len()
206            )));
207        }
208
209        // Create GPU buffers for weights and bias
210        let weights_buffer =
211            self.context
212                .device()
213                .create_buffer_init(&wgpu::util::BufferInitDescriptor {
214                    label: Some("Dense Weights Buffer"),
215                    contents: bytemuck::cast_slice(weights),
216                    usage: wgpu::BufferUsages::STORAGE,
217                });
218
219        let bias_buffer =
220            self.context
221                .device()
222                .create_buffer_init(&wgpu::util::BufferInitDescriptor {
223                    label: Some("Dense Bias Buffer"),
224                    contents: bytemuck::cast_slice(bias),
225                    usage: wgpu::BufferUsages::STORAGE,
226                });
227
228        let layer = GpuLayer {
229            layer_type: LayerType::Dense {
230                input_features,
231                output_features,
232            },
233            weights: Some(GpuBuffer {
234                buffer: weights_buffer,
235                size: std::mem::size_of_val(weights) as u64,
236            }),
237            bias: Some(GpuBuffer {
238                buffer: bias_buffer,
239                size: std::mem::size_of_val(bias) as u64,
240            }),
241            extra_params: Vec::new(),
242            pipeline: None,
243            bind_group_layout: None,
244        };
245
246        self.layers.push(layer);
247        Ok(())
248    }
249
250    /// Add an activation layer
251    pub fn add_activation_layer(&mut self, activation: ActivationType) {
252        let layer = GpuLayer {
253            layer_type: LayerType::Activation { activation },
254            weights: None,
255            bias: None,
256            extra_params: Vec::new(),
257            pipeline: None,
258            bind_group_layout: None,
259        };
260        self.layers.push(layer);
261    }
262
263    /// Add a flatten layer
264    pub fn add_flatten_layer(&mut self) {
265        let layer = GpuLayer {
266            layer_type: LayerType::Flatten,
267            weights: None,
268            bias: None,
269            extra_params: Vec::new(),
270            pipeline: None,
271            bind_group_layout: None,
272        };
273        self.layers.push(layer);
274    }
275
276    /// Get number of layers
277    pub fn num_layers(&self) -> usize {
278        self.layers.len()
279    }
280
281    /// Get model name
282    pub fn name(&self) -> &str {
283        &self.name
284    }
285
286    /// Get input shape
287    pub fn input_shape(&self) -> &[usize] {
288        &self.input_shape
289    }
290
291    /// Get output shape
292    pub fn output_shape(&self) -> &[usize] {
293        &self.output_shape
294    }
295
296    /// Get GPU context
297    pub fn context(&self) -> &Arc<GpuContext> {
298        &self.context
299    }
300
301    /// Get layers
302    pub fn layers(&self) -> &[GpuLayer] {
303        &self.layers
304    }
305}
306
307/// GPU ML inference engine
308pub struct GpuMlInference {
309    /// GPU context
310    context: Arc<GpuContext>,
311    /// Batch size
312    batch_size: usize,
313    /// Use mixed precision (FP16/FP32)
314    mixed_precision: bool,
315    /// Loaded model (optional)
316    model: Option<GpuModel>,
317}
318
319impl GpuMlInference {
320    /// Create a new GPU ML inference engine
321    pub fn new(context: Arc<GpuContext>, batch_size: usize) -> Self {
322        Self {
323            context,
324            batch_size,
325            mixed_precision: false,
326            model: None,
327        }
328    }
329
330    /// Enable mixed precision inference
331    pub fn with_mixed_precision(mut self, enabled: bool) -> Self {
332        self.mixed_precision = enabled;
333        self
334    }
335
336    /// Load a model for inference
337    pub fn load_model(&mut self, model: GpuModel) {
338        self.model = Some(model);
339    }
340
341    /// Create and load a simple feedforward model
342    pub fn create_feedforward_model(
343        &mut self,
344        name: &str,
345        layer_sizes: &[usize],
346        weights: &[Vec<f32>],
347        biases: &[Vec<f32>],
348        activations: &[ActivationType],
349    ) -> Result<()> {
350        if layer_sizes.len() < 2 {
351            return Err(GpuAdvancedError::invalid_parameter(
352                "Model must have at least input and output layer",
353            ));
354        }
355
356        let num_layers = layer_sizes.len() - 1;
357        if weights.len() != num_layers || biases.len() != num_layers {
358            return Err(GpuAdvancedError::invalid_parameter(
359                "Number of weight/bias arrays must match number of layers",
360            ));
361        }
362
363        let mut model = GpuModel::new(Arc::clone(&self.context), name)
364            .with_input_shape(vec![layer_sizes[0]])
365            .with_output_shape(vec![layer_sizes[layer_sizes.len() - 1]]);
366
367        for i in 0..num_layers {
368            model.add_dense_layer(layer_sizes[i], layer_sizes[i + 1], &weights[i], &biases[i])?;
369
370            // Add activation if provided
371            if i < activations.len() {
372                model.add_activation_layer(activations[i]);
373            }
374        }
375
376        self.model = Some(model);
377        Ok(())
378    }
379
380    /// Run batch inference
381    pub async fn infer_batch(&self, inputs: &[Vec<f32>]) -> Result<Vec<Vec<f32>>> {
382        if inputs.is_empty() {
383            return Ok(Vec::new());
384        }
385
386        let mut results = Vec::with_capacity(inputs.len());
387
388        // Process in batches
389        for chunk in inputs.chunks(self.batch_size) {
390            let batch_results = self.process_batch(chunk).await?;
391            results.extend(batch_results);
392        }
393
394        Ok(results)
395    }
396
397    /// Process a single batch through the loaded model
398    async fn process_batch(&self, batch: &[Vec<f32>]) -> Result<Vec<Vec<f32>>> {
399        // Check if model is loaded
400        let model = self.model.as_ref().ok_or_else(|| {
401            GpuAdvancedError::MlInferenceError("No model loaded for inference".to_string())
402        })?;
403
404        if batch.is_empty() {
405            return Ok(Vec::new());
406        }
407
408        // Validate input dimensions
409        let input_size = model.input_shape().iter().product::<usize>();
410        for (idx, input) in batch.iter().enumerate() {
411            if input.len() != input_size {
412                return Err(GpuAdvancedError::invalid_parameter(format!(
413                    "Input {} has wrong size: expected {}, got {}",
414                    idx,
415                    input_size,
416                    input.len()
417                )));
418            }
419        }
420
421        let batch_size = batch.len();
422
423        // Flatten batch into contiguous buffer for GPU upload
424        let mut flat_input: Vec<f32> = Vec::with_capacity(batch_size * input_size);
425        for input in batch {
426            flat_input.extend_from_slice(input);
427        }
428
429        // Process through each layer
430        let mut current_data = flat_input;
431        let mut current_feature_size = input_size;
432
433        for layer in model.layers() {
434            match layer.layer_type() {
435                LayerType::Dense {
436                    input_features,
437                    output_features,
438                } => {
439                    // Execute dense layer on GPU
440                    current_data = self
441                        .execute_dense_layer(
442                            &current_data,
443                            layer,
444                            batch_size,
445                            *input_features,
446                            *output_features,
447                        )
448                        .await?;
449                    current_feature_size = *output_features;
450                }
451                LayerType::Activation { activation } => {
452                    // Execute activation on GPU
453                    current_data = self.activation(&current_data, *activation).await?;
454                }
455                LayerType::Flatten => {
456                    // Flatten is a no-op when data is already flattened
457                    continue;
458                }
459                LayerType::Dropout { .. } => {
460                    // Dropout is a no-op during inference
461                    continue;
462                }
463                _ => {
464                    // Other layer types would be handled here
465                    return Err(GpuAdvancedError::NotImplemented(format!(
466                        "Layer type {:?} not yet supported in batched inference",
467                        layer.layer_type()
468                    )));
469                }
470            }
471        }
472
473        // Split results back into individual outputs
474        let output_size = current_feature_size;
475        let mut results = Vec::with_capacity(batch_size);
476        for i in 0..batch_size {
477            let start = i * output_size;
478            let end = start + output_size;
479            results.push(current_data[start..end].to_vec());
480        }
481
482        Ok(results)
483    }
484
485    /// Dynamic batching for variable-sized inputs
486    pub async fn dynamic_batch(&self, inputs: Vec<Vec<f32>>) -> Result<Vec<Vec<f32>>> {
487        // Group inputs by size
488        let mut size_groups: std::collections::HashMap<usize, Vec<Vec<f32>>> =
489            std::collections::HashMap::new();
490
491        for input in inputs {
492            size_groups.entry(input.len()).or_default().push(input);
493        }
494
495        let mut all_results = Vec::new();
496
497        // Process each size group
498        for (_size, group) in size_groups {
499            let results = self.infer_batch(&group).await?;
500            all_results.extend(results);
501        }
502
503        Ok(all_results)
504    }
505
506    /// Get the loaded model (if any)
507    pub fn model(&self) -> Option<&GpuModel> {
508        self.model.as_ref()
509    }
510
511    /// Check if a model is loaded
512    pub fn has_model(&self) -> bool {
513        self.model.is_some()
514    }
515
516    /// Get batch size
517    pub fn batch_size(&self) -> usize {
518        self.batch_size
519    }
520
521    /// Check if mixed precision is enabled
522    pub fn is_mixed_precision(&self) -> bool {
523        self.mixed_precision
524    }
525}
526
527/// Activation function types
528#[derive(Debug, Clone, Copy)]
529pub enum ActivationType {
530    /// ReLU activation
531    ReLU,
532    /// Sigmoid activation
533    Sigmoid,
534    /// Tanh activation
535    Tanh,
536    /// Leaky ReLU with alpha parameter
537    LeakyReLU(f32),
538}
539
540/// Pooling types
541#[derive(Debug, Clone, Copy)]
542pub enum PoolType {
543    /// Max pooling
544    Max,
545    /// Average pooling
546    Average,
547}
548
549/// Inference statistics
550#[derive(Debug, Clone, Default)]
551pub struct InferenceStats {
552    /// Total inferences
553    pub total_inferences: u64,
554    /// Total batches
555    pub total_batches: u64,
556    /// Average batch size
557    pub avg_batch_size: f64,
558    /// Total inference time (microseconds)
559    pub total_time_us: u64,
560    /// Average inference time per sample (microseconds)
561    pub avg_time_per_sample_us: f64,
562}
563
564impl InferenceStats {
565    /// Print statistics
566    pub fn print(&self) {
567        println!("\nML Inference Statistics:");
568        println!("  Total inferences: {}", self.total_inferences);
569        println!("  Total batches: {}", self.total_batches);
570        println!("  Average batch size: {:.1}", self.avg_batch_size);
571        println!("  Total time: {} ms", self.total_time_us / 1000);
572        println!(
573            "  Avg time per sample: {:.2} us",
574            self.avg_time_per_sample_us
575        );
576    }
577}