numrs/backend/cpu/
conv.rs

1
2use crate::array::Array;
3use anyhow::Result;
4
5/// Naive Conv1D Implementation for CPU Fallback
6pub fn conv1d_naive(
7    input: &Array,
8    weight: &Array,
9    bias: Option<&Array>,
10    stride: usize,
11    padding: usize,
12) -> Result<Array> {
13    // 1. Validation (Already done in ops, but good for safety)
14    let batch_size = input.shape[0];
15    let in_channels = input.shape[1];
16    let input_len = input.shape[2];
17    
18    let out_channels = weight.shape[0];
19    let kernel_size = weight.shape[2];
20    
21    // 2. Output Shape
22    // out_len = (in_len + 2*padding - kernel_size) / stride + 1
23    let out_len = ((input_len + 2 * padding).saturating_sub(kernel_size)) / stride + 1;
24    let output_shape = vec![batch_size, out_channels, out_len];
25    
26    // 3. Execution (Naive Nested Loops)
27    let mut output_data = vec![0.0; batch_size * out_channels * out_len];
28    
29    let input_data = &input.data;
30    let weight_data = &weight.data;
31    
32    // Optimization: Pre-fetch bias values
33    let bias_data = bias.map(|b| &b.data);
34
35    for b in 0..batch_size {
36        for oc in 0..out_channels {
37            let bias_val = if let Some(bd) = bias_data { bd[oc] } else { 0.0 };
38            
39            for ol in 0..out_len {
40                let mut sum = bias_val;
41                
42                // Input window start index (in "virtual" padded array)
43                // virtual_idx = ol * stride
44                // real_idx = virtual_idx - padding
45                let input_start_idx = (ol * stride) as isize - padding as isize;
46                
47                for ic in 0..in_channels {
48                    for k in 0..kernel_size {
49                        let current_input_idx = input_start_idx + k as isize;
50                        
51                        if current_input_idx >= 0 && current_input_idx < input_len as isize {
52                            let val_in = input_data[
53                                b * (in_channels * input_len) + 
54                                ic * input_len + 
55                                current_input_idx as usize
56                            ];
57                            let val_w = weight_data[
58                                oc * (in_channels * kernel_size) + 
59                                ic * kernel_size + 
60                                k
61                            ];
62                            sum += val_in * val_w;
63                        }
64                    }
65                }
66                
67                output_data[
68                    b * (out_channels * out_len) + 
69                    oc * out_len + 
70                    ol
71                ] = sum;
72            }
73        }
74    }
75    
76    Ok(Array::new(output_shape, output_data))
77}
78
79/// Naive Conv1D Backward Implementation
80/// Returns (grad_input, grad_weight, grad_bias)
81pub fn conv1d_backward_naive(
82    grad_output: &Array,
83    input: &Array,
84    weight: &Array,
85    stride: usize,
86    padding: usize,
87) -> Result<(Array, Array, Option<Array>)> {
88    let batch_size = input.shape[0];
89    let in_channels = input.shape[1];
90    let input_len = input.shape[2];
91    
92    let out_channels = weight.shape[0];
93    let kernel_size = weight.shape[2];
94    let out_len = grad_output.shape[2];
95    
96    // Gradients
97    let mut grad_input_data = vec![0.0; batch_size * in_channels * input_len];
98    let mut grad_weight_data = vec![0.0; out_channels * in_channels * kernel_size];
99    let mut grad_bias_data = vec![0.0; out_channels];
100    
101    let grad_out_data = &grad_output.data;
102    let input_data = &input.data;
103    let weight_data = &weight.data;
104
105    // Loop over batch and output spatial
106    for b in 0..batch_size {
107        for oc in 0..out_channels {
108            for ol in 0..out_len {
109                let grad_val = grad_out_data[
110                    b * (out_channels * out_len) + oc * out_len + ol
111                ];
112                
113                // Accumulate grad_bias
114                grad_bias_data[oc] += grad_val;
115                
116                // Mapped input start
117                let input_start_idx = (ol * stride) as isize - padding as isize;
118                
119                for ic in 0..in_channels {
120                    for k in 0..kernel_size {
121                         let current_input_idx = input_start_idx + k as isize;
122                         
123                         // Valid input position?
124                         if current_input_idx >= 0 && current_input_idx < input_len as isize {
125                             let idx_in = b * (in_channels * input_len) + ic * input_len + current_input_idx as usize;
126                             let idx_w = oc * (in_channels * kernel_size) + ic * kernel_size + k;
127                             
128                             // dL/dInput += grad_output * weight
129                             grad_input_data[idx_in] += grad_val * weight_data[idx_w];
130                             
131                             // dL/dWeight += grad_output * input
132                             grad_weight_data[idx_w] += grad_val * input_data[idx_in];
133                         }
134                    }
135                }
136            }
137        }
138    }
139    
140    let grad_input = Array::new(input.shape.clone(), grad_input_data);
141    let grad_weight = Array::new(weight.shape.clone(), grad_weight_data);
142    let grad_bias = Some(Array::new(vec![out_channels], grad_bias_data));
143    
144    Ok((grad_input, grad_weight, grad_bias))
145}