shape_runtime/intrinsics/
convolution.rs1use crate::marshal::register_typed_fn_3_full;
23use crate::module_exports::{ModuleExports, ModuleParam};
24use crate::typed_module_exports::{ConcreteReturn, ConcreteType, TypedReturn};
25use std::sync::Arc;
26use wide::f64x4;
27
28pub fn create_convolution_intrinsics_module() -> ModuleExports {
33 let mut module = ModuleExports::new("std::core::intrinsics::convolution");
34 module.description = "1D stencil/convolution intrinsics (SIMD-accelerated)".to_string();
35
36 register_typed_fn_3_full::<_, Arc<Vec<f64>>, Arc<Vec<f64>>, Arc<String>>(
37 &mut module,
38 "__intrinsic_stencil",
39 "1D convolution of a Vec<number> series against a kernel; modes 'valid' / 'same' / 'full'",
40 [
41 ModuleParam {
42 name: "series".to_string(),
43 type_name: "Array<number>".to_string(),
44 required: true,
45 description: "Input series".to_string(),
46 ..Default::default()
47 },
48 ModuleParam {
49 name: "kernel".to_string(),
50 type_name: "Array<number>".to_string(),
51 required: true,
52 description: "Convolution kernel".to_string(),
53 ..Default::default()
54 },
55 ModuleParam {
56 name: "mode".to_string(),
57 type_name: "string".to_string(),
58 required: false,
59 description: "Boundary mode: 'valid', 'same' (default), or 'full'".to_string(),
60 default_snippet: Some("\"same\"".to_string()),
61 ..Default::default()
62 },
63 ],
64 ConcreteType::ArrayNumber,
65 |series, kernel, mode, _ctx| {
66 let kernel_slice = kernel.as_slice();
67 if kernel_slice.is_empty() {
68 return Err("Kernel cannot be empty".to_string());
69 }
70 let result = convolve_1d(series.as_slice(), kernel_slice, mode.as_str())?;
71 Ok(TypedReturn::Concrete(ConcreteReturn::ArrayF64(result)))
72 },
73 );
74
75 module
76}
77
78fn convolve_1d(data: &[f64], kernel: &[f64], mode: &str) -> Result<Vec<f64>, String> {
83 if data.is_empty() {
84 return Ok(vec![]);
85 }
86
87 let kernel_rev: Vec<f64> = kernel.iter().rev().copied().collect();
88
89 match mode {
90 "valid" => Ok(convolve_valid(data, &kernel_rev)),
91 "same" => Ok(convolve_same(data, &kernel_rev)),
92 "full" => Ok(convolve_full(data, &kernel_rev)),
93 _ => Err(format!(
94 "Unknown convolution mode: {}. Use 'valid', 'same', or 'full'",
95 mode
96 )),
97 }
98}
99
100fn convolve_valid(data: &[f64], kernel: &[f64]) -> Vec<f64> {
101 let n = data.len();
102 let k = kernel.len();
103
104 if k > n {
105 return vec![];
106 }
107
108 let out_len = n - k + 1;
109 let mut result = vec![0.0; out_len];
110
111 const SIMD_THRESHOLD: usize = 16;
112
113 if k >= 4 && out_len >= SIMD_THRESHOLD {
114 let k_chunks = k / 4;
115
116 for i in 0..out_len {
117 let mut sum = f64x4::splat(0.0);
118 for j in 0..k_chunks {
119 let idx = j * 4;
120 let d = f64x4::from(&data[i + idx..i + idx + 4]);
121 let kv = f64x4::from(&kernel[idx..idx + 4]);
122 sum += d * kv;
123 }
124 let arr = sum.to_array();
125 let mut total = arr[0] + arr[1] + arr[2] + arr[3];
126 for j in (k_chunks * 4)..k {
127 total += data[i + j] * kernel[j];
128 }
129 result[i] = total;
130 }
131 } else {
132 for i in 0..out_len {
133 let mut sum = 0.0;
134 for j in 0..k {
135 sum += data[i + j] * kernel[j];
136 }
137 result[i] = sum;
138 }
139 }
140
141 result
142}
143
144fn convolve_same(data: &[f64], kernel: &[f64]) -> Vec<f64> {
145 let n = data.len();
146 let k = kernel.len();
147
148 if n == 0 {
149 return vec![];
150 }
151
152 let mut result = vec![0.0; n];
153 let half = k / 2;
154
155 for i in 0..n {
156 let mut sum = 0.0;
157 for j in 0..k {
158 let data_idx = i as isize + j as isize - half as isize;
159 if data_idx >= 0 && (data_idx as usize) < n {
160 sum += data[data_idx as usize] * kernel[j];
161 }
162 }
163 result[i] = sum;
164 }
165
166 result
167}
168
169fn convolve_full(data: &[f64], kernel: &[f64]) -> Vec<f64> {
170 let n = data.len();
171 let k = kernel.len();
172
173 if n == 0 || k == 0 {
174 return vec![];
175 }
176
177 let out_len = n + k - 1;
178 let mut result = vec![0.0; out_len];
179
180 for i in 0..out_len {
181 let mut sum = 0.0;
182 for j in 0..k {
183 let data_idx = i as isize - j as isize;
184 if data_idx >= 0 && (data_idx as usize) < n {
185 sum += data[data_idx as usize] * kernel[j];
186 }
187 }
188 result[i] = sum;
189 }
190
191 result
192}
193
194#[cfg(test)]
195mod tests {
196 use super::*;
197
198 #[test]
199 fn test_stencil_smoothing() {
200 let data = vec![1.0, 2.0, 3.0, 4.0, 5.0];
201 let kernel = vec![0.25, 0.5, 0.25];
202 let kernel_rev: Vec<f64> = kernel.iter().rev().copied().collect();
203 let result = convolve_same(&data, &kernel_rev);
204 assert_eq!(result.len(), 5);
205 assert!((result[2] - 3.0).abs() < 0.01);
206 }
207
208 #[test]
209 fn test_stencil_valid() {
210 let data = vec![1.0, 2.0, 3.0, 4.0, 5.0];
211 let kernel = vec![1.0, 0.0, -1.0];
212 let kernel_rev: Vec<f64> = kernel.iter().rev().copied().collect();
213 let result = convolve_valid(&data, &kernel_rev);
214 assert_eq!(result.len(), 3);
215 }
216
217 #[test]
218 fn test_stencil_heat_diffusion() {
219 let data = vec![1.0, 1.0, 1.0, 1.0, 1.0];
220 let kernel = vec![0.25, 0.5, 0.25];
221 let kernel_rev: Vec<f64> = kernel.iter().rev().copied().collect();
222 let result = convolve_same(&data, &kernel_rev);
223 assert!((result[2] - 1.0).abs() < 0.01);
224 }
225}