trueno/backends/gpu/device/
eigen.rs1#[cfg(any(feature = "gpu", feature = "gpu-wasm"))]
7use super::super::runtime;
8use super::super::shaders;
9use super::GpuDevice;
10
11impl GpuDevice {
12 #[cfg(all(feature = "gpu", not(target_arch = "wasm32")))]
18 pub fn symmetric_eigen(
19 &self,
20 matrix: &[f32],
21 n: usize,
22 ) -> Result<(Vec<f32>, Vec<f32>), String> {
23 runtime::block_on(async { self.symmetric_eigen_async(matrix, n).await })
24 }
25
26 pub async fn symmetric_eigen_async(
31 &self,
32 matrix: &[f32],
33 n: usize,
34 ) -> Result<(Vec<f32>, Vec<f32>), String> {
35 if matrix.len() != n * n {
36 return Err(format!(
37 "Matrix size mismatch: expected {} elements for {}x{} matrix, got {}",
38 n * n,
39 n,
40 n,
41 matrix.len()
42 ));
43 }
44
45 if n == 0 {
46 return Ok((Vec::new(), Vec::new()));
47 }
48
49 if n < 64 {
51 return self.symmetric_eigen_cpu(matrix, n);
52 }
53
54 let rotation_shader = self.device.create_shader_module(wgpu::ShaderModuleDescriptor {
56 label: Some("Jacobi Rotation Shader"),
57 source: wgpu::ShaderSource::Wgsl(shaders::JACOBI_ROTATION_SHADER.into()),
58 });
59
60 let matrix_size = (n * n * std::mem::size_of::<f32>()) as u64;
62
63 let matrix_buffer = self.device.create_buffer(&wgpu::BufferDescriptor {
64 label: Some("Matrix Buffer"),
65 size: matrix_size,
66 usage: wgpu::BufferUsages::STORAGE
67 | wgpu::BufferUsages::COPY_DST
68 | wgpu::BufferUsages::COPY_SRC,
69 mapped_at_creation: false,
70 });
71
72 let eigenvectors_buffer = self.device.create_buffer(&wgpu::BufferDescriptor {
73 label: Some("Eigenvectors Buffer"),
74 size: matrix_size,
75 usage: wgpu::BufferUsages::STORAGE
76 | wgpu::BufferUsages::COPY_DST
77 | wgpu::BufferUsages::COPY_SRC,
78 mapped_at_creation: false,
79 });
80
81 #[repr(C)]
83 #[derive(Copy, Clone, bytemuck::Pod, bytemuck::Zeroable)]
84 struct JacobiParams {
85 n: u32,
86 p: u32,
87 q: u32,
88 c: f32,
89 s: f32,
90 _padding: [u32; 3],
91 }
92
93 let params_buffer = self.device.create_buffer(&wgpu::BufferDescriptor {
94 label: Some("Jacobi Params"),
95 size: std::mem::size_of::<JacobiParams>() as u64,
96 usage: wgpu::BufferUsages::UNIFORM | wgpu::BufferUsages::COPY_DST,
97 mapped_at_creation: false,
98 });
99
100 let mut eigenvectors = vec![0.0f32; n * n];
102 for i in 0..n {
103 eigenvectors[i * n + i] = 1.0;
104 }
105
106 self.queue.write_buffer(&matrix_buffer, 0, bytemuck::cast_slice(matrix));
108 self.queue.write_buffer(&eigenvectors_buffer, 0, bytemuck::cast_slice(&eigenvectors));
109
110 let bind_group_layout =
112 self.device.create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
113 label: Some("Jacobi Bind Group Layout"),
114 entries: &[
115 wgpu::BindGroupLayoutEntry {
116 binding: 0,
117 visibility: wgpu::ShaderStages::COMPUTE,
118 ty: wgpu::BindingType::Buffer {
119 ty: wgpu::BufferBindingType::Storage { read_only: false },
120 has_dynamic_offset: false,
121 min_binding_size: None,
122 },
123 count: None,
124 },
125 wgpu::BindGroupLayoutEntry {
126 binding: 1,
127 visibility: wgpu::ShaderStages::COMPUTE,
128 ty: wgpu::BindingType::Buffer {
129 ty: wgpu::BufferBindingType::Storage { read_only: false },
130 has_dynamic_offset: false,
131 min_binding_size: None,
132 },
133 count: None,
134 },
135 wgpu::BindGroupLayoutEntry {
136 binding: 2,
137 visibility: wgpu::ShaderStages::COMPUTE,
138 ty: wgpu::BindingType::Buffer {
139 ty: wgpu::BufferBindingType::Uniform,
140 has_dynamic_offset: false,
141 min_binding_size: None,
142 },
143 count: None,
144 },
145 ],
146 });
147
148 let bind_group = self.device.create_bind_group(&wgpu::BindGroupDescriptor {
150 label: Some("Jacobi Bind Group"),
151 layout: &bind_group_layout,
152 entries: &[
153 wgpu::BindGroupEntry { binding: 0, resource: matrix_buffer.as_entire_binding() },
154 wgpu::BindGroupEntry {
155 binding: 1,
156 resource: eigenvectors_buffer.as_entire_binding(),
157 },
158 wgpu::BindGroupEntry { binding: 2, resource: params_buffer.as_entire_binding() },
159 ],
160 });
161
162 let pipeline_layout = self.device.create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
164 label: Some("Jacobi Pipeline Layout"),
165 bind_group_layouts: &[&bind_group_layout],
166 push_constant_ranges: &[],
167 });
168
169 let rotation_pipeline =
170 self.device.create_compute_pipeline(&wgpu::ComputePipelineDescriptor {
171 label: Some("Jacobi Rotation Pipeline"),
172 layout: Some(&pipeline_layout),
173 module: &rotation_shader,
174 entry_point: Some("main"),
175 compilation_options: wgpu::PipelineCompilationOptions::default(),
176 cache: None,
177 });
178
179 let max_sweeps = 50;
181 let tolerance = 1e-7 * (matrix.iter().map(|x| x * x).sum::<f32>().sqrt()).max(1.0);
182
183 let mut a = matrix.to_vec();
185
186 for _sweep in 0..max_sweeps {
187 let mut converged = true;
188
189 for i in 0..n {
191 for j in (i + 1)..n {
192 let aij = a[i * n + j];
193
194 if aij.abs() < tolerance {
195 continue;
196 }
197
198 converged = false;
199
200 let aii = a[i * n + i];
202 let ajj = a[j * n + j];
203
204 let tau = (ajj - aii) / (2.0 * aij);
205 let t = if tau >= 0.0 {
206 1.0 / (tau + (1.0 + tau * tau).sqrt())
207 } else {
208 -1.0 / (-tau + (1.0 + tau * tau).sqrt())
209 };
210
211 let c = 1.0 / (1.0 + t * t).sqrt();
212 let s = t * c;
213
214 let params = JacobiParams {
216 n: n as u32,
217 p: i as u32,
218 q: j as u32,
219 c,
220 s,
221 _padding: [0; 3],
222 };
223
224 self.queue.write_buffer(¶ms_buffer, 0, bytemuck::bytes_of(¶ms));
225
226 let mut encoder =
228 self.device.create_command_encoder(&wgpu::CommandEncoderDescriptor {
229 label: Some("Jacobi Rotation Encoder"),
230 });
231
232 {
233 let mut pass = encoder.begin_compute_pass(&wgpu::ComputePassDescriptor {
234 label: Some("Jacobi Rotation Pass"),
235 timestamp_writes: None,
236 });
237 pass.set_pipeline(&rotation_pipeline);
238 pass.set_bind_group(0, &bind_group, &[]);
239 pass.dispatch_workgroups((n as u32).div_ceil(256), 1, 1);
240 }
241
242 self.queue.submit(Some(encoder.finish()));
243
244 a[i * n + i] = aii - t * aij;
246 a[j * n + j] = ajj + t * aij;
247 a[i * n + j] = 0.0;
248 a[j * n + i] = 0.0;
249
250 for k in 0..n {
252 if k != i && k != j {
253 let aki = a[k * n + i];
254 let akj = a[k * n + j];
255 a[k * n + i] = c * aki - s * akj;
256 a[i * n + k] = a[k * n + i];
257 a[k * n + j] = s * aki + c * akj;
258 a[j * n + k] = a[k * n + j];
259 }
260 }
261 }
262 }
263
264 if converged {
265 break;
266 }
267 }
268
269 let staging_matrix = self.device.create_buffer(&wgpu::BufferDescriptor {
271 label: Some("Staging Matrix"),
272 size: matrix_size,
273 usage: wgpu::BufferUsages::MAP_READ | wgpu::BufferUsages::COPY_DST,
274 mapped_at_creation: false,
275 });
276
277 let staging_eigenvectors = self.device.create_buffer(&wgpu::BufferDescriptor {
278 label: Some("Staging Eigenvectors"),
279 size: matrix_size,
280 usage: wgpu::BufferUsages::MAP_READ | wgpu::BufferUsages::COPY_DST,
281 mapped_at_creation: false,
282 });
283
284 let mut encoder = self.device.create_command_encoder(&wgpu::CommandEncoderDescriptor {
285 label: Some("Copy Encoder"),
286 });
287
288 encoder.copy_buffer_to_buffer(&matrix_buffer, 0, &staging_matrix, 0, matrix_size);
289 encoder.copy_buffer_to_buffer(
290 &eigenvectors_buffer,
291 0,
292 &staging_eigenvectors,
293 0,
294 matrix_size,
295 );
296
297 self.queue.submit(Some(encoder.finish()));
298
299 let eigenvector_slice = staging_eigenvectors.slice(..);
301 let (sender, receiver) = futures_intrusive::channel::shared::oneshot_channel();
302 eigenvector_slice.map_async(wgpu::MapMode::Read, move |result| {
303 sender.send(result).expect("oneshot channel receiver dropped");
304 });
305
306 self.device.poll(wgpu::PollType::Wait { submission_index: None, timeout: None }).ok();
307
308 receiver
309 .receive()
310 .await
311 .ok_or("Failed to receive mapping result")?
312 .map_err(|e| format!("Buffer mapping failed: {:?}", e))?;
313
314 let mut result_eigenvectors = vec![0.0f32; n * n];
315 {
316 let data = eigenvector_slice.get_mapped_range();
317 let output_data: &[f32] = bytemuck::cast_slice(&data);
318 result_eigenvectors.copy_from_slice(output_data);
319 }
320 staging_eigenvectors.unmap();
321
322 let eigenvalues: Vec<f32> = (0..n).map(|i| a[i * n + i]).collect();
324
325 Ok((eigenvalues, result_eigenvectors))
326 }
327
328 pub(super) fn symmetric_eigen_cpu(
330 &self,
331 matrix: &[f32],
332 n: usize,
333 ) -> Result<(Vec<f32>, Vec<f32>), String> {
334 let max_sweeps = 50;
335 let tolerance = 1e-7 * (matrix.iter().map(|x| x * x).sum::<f32>().sqrt()).max(1.0);
336
337 let mut a = matrix.to_vec();
338 let mut v = vec![0.0f32; n * n];
339 for i in 0..n {
340 v[i * n + i] = 1.0;
341 }
342
343 for _sweep in 0..max_sweeps {
344 let mut converged = true;
345
346 for i in 0..n {
347 for j in (i + 1)..n {
348 let aij = a[i * n + j];
349
350 if aij.abs() < tolerance {
351 continue;
352 }
353
354 converged = false;
355
356 let aii = a[i * n + i];
357 let ajj = a[j * n + j];
358
359 let tau = (ajj - aii) / (2.0 * aij);
360 let t = if tau >= 0.0 {
361 1.0 / (tau + (1.0 + tau * tau).sqrt())
362 } else {
363 -1.0 / (-tau + (1.0 + tau * tau).sqrt())
364 };
365
366 let c = 1.0 / (1.0 + t * t).sqrt();
367 let s = t * c;
368
369 a[i * n + i] = aii - t * aij;
371 a[j * n + j] = ajj + t * aij;
372 a[i * n + j] = 0.0;
373 a[j * n + i] = 0.0;
374
375 for k in 0..n {
377 if k != i && k != j {
378 let aki = a[k * n + i];
379 let akj = a[k * n + j];
380 a[k * n + i] = c * aki - s * akj;
381 a[i * n + k] = a[k * n + i];
382 a[k * n + j] = s * aki + c * akj;
383 a[j * n + k] = a[k * n + j];
384 }
385 }
386
387 for k in 0..n {
389 let vki = v[k * n + i];
390 let vkj = v[k * n + j];
391 v[k * n + i] = c * vki - s * vkj;
392 v[k * n + j] = s * vki + c * vkj;
393 }
394 }
395 }
396
397 if converged {
398 break;
399 }
400 }
401
402 let eigenvalues: Vec<f32> = (0..n).map(|i| a[i * n + i]).collect();
403 Ok((eigenvalues, v))
404 }
405}