mod compute;
mod neural;
#[cfg(test)]
#[allow(clippy::panic)]
mod tests;
use crate::error::{GpuAdvancedError, Result};
use oxigdal_gpu::GpuContext;
use std::sync::Arc;
use wgpu::util::DeviceExt;
#[derive(Debug)]
pub struct GpuBuffer {
buffer: wgpu::Buffer,
size: u64,
}
impl GpuBuffer {
pub fn buffer(&self) -> &wgpu::Buffer {
&self.buffer
}
pub fn size(&self) -> u64 {
self.size
}
}
#[derive(Debug, Clone)]
pub enum LayerType {
Dense {
input_features: usize,
output_features: usize,
},
Conv2d {
input_channels: usize,
output_channels: usize,
kernel_size: usize,
},
BatchNorm {
num_features: usize,
epsilon: f32,
},
Pool2d {
pool_type: PoolType,
pool_size: usize,
stride: usize,
},
Activation {
activation: ActivationType,
},
Flatten,
Dropout {
_rate: f32,
},
}
pub struct GpuLayer {
pub(crate) layer_type: LayerType,
pub(crate) weights: Option<GpuBuffer>,
pub(crate) bias: Option<GpuBuffer>,
pub(crate) extra_params: Vec<GpuBuffer>,
pub(crate) pipeline: Option<wgpu::ComputePipeline>,
pub(crate) bind_group_layout: Option<wgpu::BindGroupLayout>,
}
impl GpuLayer {
pub fn layer_type(&self) -> &LayerType {
&self.layer_type
}
pub fn has_weights(&self) -> bool {
self.weights.is_some()
}
pub fn weights(&self) -> Option<&GpuBuffer> {
self.weights.as_ref()
}
pub fn bias(&self) -> Option<&GpuBuffer> {
self.bias.as_ref()
}
pub fn extra_params(&self) -> &[GpuBuffer] {
&self.extra_params
}
pub fn pipeline(&self) -> Option<&wgpu::ComputePipeline> {
self.pipeline.as_ref()
}
pub fn bind_group_layout(&self) -> Option<&wgpu::BindGroupLayout> {
self.bind_group_layout.as_ref()
}
}
pub struct GpuModel {
layers: Vec<GpuLayer>,
context: Arc<GpuContext>,
name: String,
input_shape: Vec<usize>,
output_shape: Vec<usize>,
}
impl GpuModel {
pub fn new(context: Arc<GpuContext>, name: impl Into<String>) -> Self {
Self {
layers: Vec::new(),
context,
name: name.into(),
input_shape: Vec::new(),
output_shape: Vec::new(),
}
}
pub fn with_input_shape(mut self, shape: Vec<usize>) -> Self {
self.input_shape = shape;
self
}
pub fn with_output_shape(mut self, shape: Vec<usize>) -> Self {
self.output_shape = shape;
self
}
pub fn add_dense_layer(
&mut self,
input_features: usize,
output_features: usize,
weights: &[f32],
bias: &[f32],
) -> Result<()> {
let expected_weights = input_features * output_features;
if weights.len() != expected_weights {
return Err(GpuAdvancedError::invalid_parameter(format!(
"Dense layer weight size mismatch: expected {}, got {}",
expected_weights,
weights.len()
)));
}
if bias.len() != output_features {
return Err(GpuAdvancedError::invalid_parameter(format!(
"Dense layer bias size mismatch: expected {}, got {}",
output_features,
bias.len()
)));
}
let weights_buffer =
self.context
.device()
.create_buffer_init(&wgpu::util::BufferInitDescriptor {
label: Some("Dense Weights Buffer"),
contents: bytemuck::cast_slice(weights),
usage: wgpu::BufferUsages::STORAGE,
});
let bias_buffer =
self.context
.device()
.create_buffer_init(&wgpu::util::BufferInitDescriptor {
label: Some("Dense Bias Buffer"),
contents: bytemuck::cast_slice(bias),
usage: wgpu::BufferUsages::STORAGE,
});
let layer = GpuLayer {
layer_type: LayerType::Dense {
input_features,
output_features,
},
weights: Some(GpuBuffer {
buffer: weights_buffer,
size: std::mem::size_of_val(weights) as u64,
}),
bias: Some(GpuBuffer {
buffer: bias_buffer,
size: std::mem::size_of_val(bias) as u64,
}),
extra_params: Vec::new(),
pipeline: None,
bind_group_layout: None,
};
self.layers.push(layer);
Ok(())
}
pub fn add_activation_layer(&mut self, activation: ActivationType) {
let layer = GpuLayer {
layer_type: LayerType::Activation { activation },
weights: None,
bias: None,
extra_params: Vec::new(),
pipeline: None,
bind_group_layout: None,
};
self.layers.push(layer);
}
pub fn add_flatten_layer(&mut self) {
let layer = GpuLayer {
layer_type: LayerType::Flatten,
weights: None,
bias: None,
extra_params: Vec::new(),
pipeline: None,
bind_group_layout: None,
};
self.layers.push(layer);
}
pub fn num_layers(&self) -> usize {
self.layers.len()
}
pub fn name(&self) -> &str {
&self.name
}
pub fn input_shape(&self) -> &[usize] {
&self.input_shape
}
pub fn output_shape(&self) -> &[usize] {
&self.output_shape
}
pub fn context(&self) -> &Arc<GpuContext> {
&self.context
}
pub fn layers(&self) -> &[GpuLayer] {
&self.layers
}
}
pub struct GpuMlInference {
context: Arc<GpuContext>,
batch_size: usize,
mixed_precision: bool,
model: Option<GpuModel>,
}
impl GpuMlInference {
pub fn new(context: Arc<GpuContext>, batch_size: usize) -> Self {
Self {
context,
batch_size,
mixed_precision: false,
model: None,
}
}
pub fn with_mixed_precision(mut self, enabled: bool) -> Self {
self.mixed_precision = enabled;
self
}
pub fn load_model(&mut self, model: GpuModel) {
self.model = Some(model);
}
pub fn create_feedforward_model(
&mut self,
name: &str,
layer_sizes: &[usize],
weights: &[Vec<f32>],
biases: &[Vec<f32>],
activations: &[ActivationType],
) -> Result<()> {
if layer_sizes.len() < 2 {
return Err(GpuAdvancedError::invalid_parameter(
"Model must have at least input and output layer",
));
}
let num_layers = layer_sizes.len() - 1;
if weights.len() != num_layers || biases.len() != num_layers {
return Err(GpuAdvancedError::invalid_parameter(
"Number of weight/bias arrays must match number of layers",
));
}
let mut model = GpuModel::new(Arc::clone(&self.context), name)
.with_input_shape(vec![layer_sizes[0]])
.with_output_shape(vec![layer_sizes[layer_sizes.len() - 1]]);
for i in 0..num_layers {
model.add_dense_layer(layer_sizes[i], layer_sizes[i + 1], &weights[i], &biases[i])?;
if i < activations.len() {
model.add_activation_layer(activations[i]);
}
}
self.model = Some(model);
Ok(())
}
pub async fn infer_batch(&self, inputs: &[Vec<f32>]) -> Result<Vec<Vec<f32>>> {
if inputs.is_empty() {
return Ok(Vec::new());
}
let mut results = Vec::with_capacity(inputs.len());
for chunk in inputs.chunks(self.batch_size) {
let batch_results = self.process_batch(chunk).await?;
results.extend(batch_results);
}
Ok(results)
}
async fn process_batch(&self, batch: &[Vec<f32>]) -> Result<Vec<Vec<f32>>> {
let model = self.model.as_ref().ok_or_else(|| {
GpuAdvancedError::MlInferenceError("No model loaded for inference".to_string())
})?;
if batch.is_empty() {
return Ok(Vec::new());
}
let input_size = model.input_shape().iter().product::<usize>();
for (idx, input) in batch.iter().enumerate() {
if input.len() != input_size {
return Err(GpuAdvancedError::invalid_parameter(format!(
"Input {} has wrong size: expected {}, got {}",
idx,
input_size,
input.len()
)));
}
}
let batch_size = batch.len();
let mut flat_input: Vec<f32> = Vec::with_capacity(batch_size * input_size);
for input in batch {
flat_input.extend_from_slice(input);
}
let mut current_data = flat_input;
let mut current_feature_size = input_size;
for layer in model.layers() {
match layer.layer_type() {
LayerType::Dense {
input_features,
output_features,
} => {
current_data = self
.execute_dense_layer(
¤t_data,
layer,
batch_size,
*input_features,
*output_features,
)
.await?;
current_feature_size = *output_features;
}
LayerType::Activation { activation } => {
current_data = self.activation(¤t_data, *activation).await?;
}
LayerType::Flatten => {
continue;
}
LayerType::Dropout { .. } => {
continue;
}
_ => {
return Err(GpuAdvancedError::NotImplemented(format!(
"Layer type {:?} not yet supported in batched inference",
layer.layer_type()
)));
}
}
}
let output_size = current_feature_size;
let mut results = Vec::with_capacity(batch_size);
for i in 0..batch_size {
let start = i * output_size;
let end = start + output_size;
results.push(current_data[start..end].to_vec());
}
Ok(results)
}
pub async fn dynamic_batch(&self, inputs: Vec<Vec<f32>>) -> Result<Vec<Vec<f32>>> {
let mut size_groups: std::collections::HashMap<usize, Vec<Vec<f32>>> =
std::collections::HashMap::new();
for input in inputs {
size_groups.entry(input.len()).or_default().push(input);
}
let mut all_results = Vec::new();
for (_size, group) in size_groups {
let results = self.infer_batch(&group).await?;
all_results.extend(results);
}
Ok(all_results)
}
pub fn model(&self) -> Option<&GpuModel> {
self.model.as_ref()
}
pub fn has_model(&self) -> bool {
self.model.is_some()
}
pub fn batch_size(&self) -> usize {
self.batch_size
}
pub fn is_mixed_precision(&self) -> bool {
self.mixed_precision
}
}
#[derive(Debug, Clone, Copy)]
pub enum ActivationType {
ReLU,
Sigmoid,
Tanh,
LeakyReLU(f32),
}
#[derive(Debug, Clone, Copy)]
pub enum PoolType {
Max,
Average,
}
#[derive(Debug, Clone, Default)]
pub struct InferenceStats {
pub total_inferences: u64,
pub total_batches: u64,
pub avg_batch_size: f64,
pub total_time_us: u64,
pub avg_time_per_sample_us: f64,
}
impl InferenceStats {
pub fn print(&self) {
println!("\nML Inference Statistics:");
println!(" Total inferences: {}", self.total_inferences);
println!(" Total batches: {}", self.total_batches);
println!(" Average batch size: {:.1}", self.avg_batch_size);
println!(" Total time: {} ms", self.total_time_us / 1000);
println!(
" Avg time per sample: {:.2} us",
self.avg_time_per_sample_us
);
}
}