1use std::sync::mpsc;
22use std::time::Instant;
23
24use bytemuck::{Pod, Zeroable};
25use serde::Serialize;
26use wgpu::util::DeviceExt;
27
28use crate::error::{Error, Result};
29
30pub const FAST_PATH_LAMBDA: f32 = 0.5;
34
35pub const FAST_PATH_K: f32 = 2.0;
37
38pub struct FastPathCpuOutput {
42 pub trust: Vec<f32>,
44 pub residual_history_out: Vec<f32>,
46 pub drift_history_out: Vec<f32>,
48}
49
50pub fn run_fast_path_cpu(
58 current: &[[f32; 3]],
59 history: &[[f32; 3]],
60 residual_history_in: &[f32],
61 drift_history_in: &[f32],
62 width: usize,
63 height: usize,
64 lambda: f32,
65 k: f32,
66 local_aggregation: bool,
67) -> FastPathCpuOutput {
68 let pixel_count = width * height;
69 assert_eq!(current.len(), pixel_count);
70 assert_eq!(history.len(), pixel_count);
71 assert_eq!(residual_history_in.len(), pixel_count);
72 assert_eq!(drift_history_in.len(), pixel_count);
73
74 let mut trust = vec![0.0f32; pixel_count];
77 let mut residual_history_out = vec![0.0f32; pixel_count];
78 let mut drift_history_out = vec![0.0f32; pixel_count];
79 let mut u_field = vec![0.0f32; pixel_count];
80
81 for i in 0..pixel_count {
84 let c = current[i];
85 let h = history[i];
86 let r_t = ((c[0] - h[0]).abs() + (c[1] - h[1]).abs() + (c[2] - h[2]).abs()) / 3.0;
87 let d_t = r_t - residual_history_in[i];
88 let s_t = d_t - drift_history_in[i];
89 let u_t = d_t.abs() + lambda * s_t.abs();
90 u_field[i] = u_t;
91 residual_history_out[i] = r_t;
92 drift_history_out[i] = d_t;
93 }
94
95 if local_aggregation {
96 for y in 0..height {
99 for x in 0..width {
100 let mut u_sum = 0.0f32;
101 for oy in -1i32..=1 {
102 for ox in -1i32..=1 {
103 let nx = (x as i32 + ox).clamp(0, width as i32 - 1) as usize;
104 let ny = (y as i32 + oy).clamp(0, height as i32 - 1) as usize;
105 u_sum += u_field[ny * width + nx];
106 }
107 }
108 let u_prime = u_sum / 9.0;
109 trust[y * width + x] = (1.0 - k * u_prime).clamp(0.0, 1.0);
110 }
111 }
112 } else {
113 for i in 0..pixel_count {
114 trust[i] = (1.0 - k * u_field[i]).clamp(0.0, 1.0);
115 }
116 }
117
118 FastPathCpuOutput {
119 trust,
120 residual_history_out,
121 drift_history_out,
122 }
123}
124
125const FAST_PATH_SHADER: &str = r#"
128struct Params {
129 // [width, height, has_local_agg (0=off, 1=on), _padding]
130 size: vec4<u32>,
131 // [lambda, k, _, _]
132 coefficients: vec4<f32>,
133}
134
135@group(0) @binding(0) var<storage, read> current_color: array<vec4<f32>>;
136@group(0) @binding(1) var<storage, read> reprojected_history: array<vec4<f32>>;
137@group(0) @binding(2) var<storage, read> residual_history_in: array<f32>;
138@group(0) @binding(3) var<storage, read> drift_history_in: array<f32>;
139@group(0) @binding(4) var<uniform> params: Params;
140@group(0) @binding(5) var<storage, read_write> trust_out: array<f32>;
141@group(0) @binding(6) var<storage, read_write> residual_history_out: array<f32>;
142@group(0) @binding(7) var<storage, read_write> drift_history_out: array<f32>;
143
144fn l1_residual(a: vec3<f32>, b: vec3<f32>) -> f32 {
145 return (abs(a.x - b.x) + abs(a.y - b.y) + abs(a.z - b.z)) / 3.0;
146}
147
148/// Compute the proxy scalar u for an arbitrary (clamped) pixel coordinate.
149/// Used by the optional 3×3 local aggregation path.
150fn proxy_u_at(xi: i32, yi: i32) -> f32 {
151 let w = i32(params.size.x);
152 let h = i32(params.size.y);
153 let xc = clamp(xi, 0, w - 1);
154 let yc = clamp(yi, 0, h - 1);
155 let j = u32(yc) * params.size.x + u32(xc);
156 let cur = current_color[j].xyz;
157 let hist = reprojected_history[j].xyz;
158 let r_prev = residual_history_in[j];
159 let d_prev = drift_history_in[j];
160 let r_j = l1_residual(cur, hist);
161 let d_j = r_j - r_prev;
162 let s_j = d_j - d_prev;
163 return abs(d_j) + params.coefficients.x * abs(s_j);
164}
165
166/// Single-pass fast-path proxy kernel.
167///
168/// Computes r_t, d_t, s_t, u_t per pixel.
169/// Writes trust (with optional 3×3 local mean of u), r_t, and d_t.
170@compute @workgroup_size(1, 1, 1)
171fn main(@builtin(global_invocation_id) gid: vec3<u32>) {
172 let w = params.size.x;
173 let h = params.size.y;
174 if (gid.x >= w || gid.y >= h) { return; }
175
176 let i = gid.y * w + gid.x;
177 let lambda = params.coefficients.x;
178 let k = params.coefficients.y;
179
180 let cur = current_color[i].xyz;
181 let hist = reprojected_history[i].xyz;
182 let r_prev = residual_history_in[i];
183 let d_prev = drift_history_in[i];
184
185 let r_t = l1_residual(cur, hist);
186 let d_t = r_t - r_prev;
187 let s_t = d_t - d_prev;
188
189 var trust: f32;
190 if (params.size.z == 1u) {
191 // Optional 3×3 local mean of u before trust.
192 var u_sum = 0.0;
193 for (var oy: i32 = -1; oy <= 1; oy = oy + 1) {
194 for (var ox: i32 = -1; ox <= 1; ox = ox + 1) {
195 u_sum = u_sum + proxy_u_at(i32(gid.x) + ox, i32(gid.y) + oy);
196 }
197 }
198 trust = clamp(1.0 - k * (u_sum / 9.0), 0.0, 1.0);
199 } else {
200 let u_t = abs(d_t) + lambda * abs(s_t);
201 trust = clamp(1.0 - k * u_t, 0.0, 1.0);
202 }
203
204 trust_out[i] = trust;
205 residual_history_out[i] = r_t;
206 drift_history_out[i] = d_t;
207}
208"#;
209
210#[repr(C)]
213#[derive(Clone, Copy, Pod, Zeroable)]
214struct FpGpuParams {
215 size: [u32; 4],
216 coefficients: [f32; 4],
217}
218
219#[repr(C)]
220#[derive(Clone, Copy, Pod, Zeroable)]
221struct FpColor4 {
222 value: [f32; 4],
223}
224
225#[derive(Clone, Debug, Serialize)]
229pub struct FastPathGpuOutput {
230 pub trust: Vec<f32>,
231 pub residual_history_out: Vec<f32>,
232 pub drift_history_out: Vec<f32>,
233 pub total_ms: f64,
235 pub dispatch_ms: f64,
237 pub adapter_name: String,
238 pub backend: String,
239}
240
241pub fn try_run_fast_path_gpu(
245 current: &[[f32; 3]],
246 history: &[[f32; 3]],
247 residual_history_in: &[f32],
248 drift_history_in: &[f32],
249 width: usize,
250 height: usize,
251 lambda: f32,
252 k: f32,
253 local_aggregation: bool,
254) -> Result<Option<FastPathGpuOutput>> {
255 pollster::block_on(try_run_fast_path_gpu_async(
256 current,
257 history,
258 residual_history_in,
259 drift_history_in,
260 width,
261 height,
262 lambda,
263 k,
264 local_aggregation,
265 ))
266}
267
268async fn try_run_fast_path_gpu_async(
269 current: &[[f32; 3]],
270 history: &[[f32; 3]],
271 residual_history_in: &[f32],
272 drift_history_in: &[f32],
273 width: usize,
274 height: usize,
275 lambda: f32,
276 k: f32,
277 local_aggregation: bool,
278) -> Result<Option<FastPathGpuOutput>> {
279 let instance = wgpu::Instance::default();
280 let adapter = match instance
281 .request_adapter(&wgpu::RequestAdapterOptions {
282 power_preference: wgpu::PowerPreference::HighPerformance,
283 compatible_surface: None,
284 force_fallback_adapter: false,
285 })
286 .await
287 {
288 Some(a) => a,
289 None => return Ok(None),
290 };
291 let adapter_info = adapter.get_info();
292 let adapter_limits = adapter.limits();
293 let (device, queue) = adapter
294 .request_device(
295 &wgpu::DeviceDescriptor {
296 label: Some("dsfb-fast-path"),
297 required_features: wgpu::Features::empty(),
298 required_limits: wgpu::Limits {
299 max_storage_buffer_binding_size: adapter_limits
300 .max_storage_buffer_binding_size,
301 max_buffer_size: adapter_limits.max_buffer_size,
302 ..wgpu::Limits::default()
303 },
304 },
305 None,
306 )
307 .await
308 .map_err(|e| Error::Message(format!("wgpu device request failed: {e}")))?;
309
310 let shader = device.create_shader_module(wgpu::ShaderModuleDescriptor {
311 label: Some("dsfb-fast-path-wgsl"),
312 source: wgpu::ShaderSource::Wgsl(FAST_PATH_SHADER.into()),
313 });
314 let bind_group_layout = make_bind_group_layout(&device);
315 let pipeline = make_pipeline(&device, &shader, &bind_group_layout);
316
317 let pixel_count = width * height;
318 let current_packed = pack_colors(current);
319 let history_packed = pack_colors(history);
320 let params_val = FpGpuParams {
321 size: [width as u32, height as u32, u32::from(local_aggregation), 0],
322 coefficients: [lambda, k, 0.0, 0.0],
323 };
324
325 let out_bytes = (pixel_count * std::mem::size_of::<f32>()) as u64;
326 let (
327 cur_buf,
328 hist_buf,
329 res_in_buf,
330 drift_in_buf,
331 params_buf,
332 trust_buf,
333 res_out_buf,
334 drift_out_buf,
335 ) = upload_gpu_buffers(
336 &device,
337 ¤t_packed,
338 &history_packed,
339 residual_history_in,
340 drift_history_in,
341 ¶ms_val,
342 pixel_count,
343 );
344 let trust_staging = make_staging(&device, out_bytes, "fp-trust-stg");
345 let res_staging = make_staging(&device, out_bytes, "fp-res-stg");
346 let drift_staging = make_staging(&device, out_bytes, "fp-drift-stg");
347
348 let bind_group = make_bind_group(
349 &device,
350 &bind_group_layout,
351 &cur_buf,
352 &hist_buf,
353 &res_in_buf,
354 &drift_in_buf,
355 ¶ms_buf,
356 &trust_buf,
357 &res_out_buf,
358 &drift_out_buf,
359 );
360
361 let total_start = Instant::now();
362 let dispatch_start = Instant::now();
363 let mut encoder =
364 device.create_command_encoder(&wgpu::CommandEncoderDescriptor { label: Some("fp-enc") });
365 {
366 let mut pass = encoder.begin_compute_pass(&wgpu::ComputePassDescriptor {
367 label: Some("fp-pass"),
368 timestamp_writes: None,
369 });
370 pass.set_pipeline(&pipeline);
371 pass.set_bind_group(0, &bind_group, &[]);
372 pass.dispatch_workgroups(width as u32, height as u32, 1);
373 }
374 encoder.copy_buffer_to_buffer(&trust_buf, 0, &trust_staging, 0, out_bytes);
375 encoder.copy_buffer_to_buffer(&res_out_buf, 0, &res_staging, 0, out_bytes);
376 encoder.copy_buffer_to_buffer(&drift_out_buf, 0, &drift_staging, 0, out_bytes);
377 queue.submit(Some(encoder.finish()));
378 device.poll(wgpu::Maintain::Wait);
379 let dispatch_ms = dispatch_start.elapsed().as_secs_f64() * 1000.0;
380
381 let trust = read_f32_buf(&device, &trust_staging, pixel_count)?;
382 let residual_out = read_f32_buf(&device, &res_staging, pixel_count)?;
383 let drift_out = read_f32_buf(&device, &drift_staging, pixel_count)?;
384 let total_ms = total_start.elapsed().as_secs_f64() * 1000.0;
385
386 Ok(Some(FastPathGpuOutput {
387 trust,
388 residual_history_out: residual_out,
389 drift_history_out: drift_out,
390 total_ms,
391 dispatch_ms,
392 adapter_name: adapter_info.name,
393 backend: format!("{:?}", adapter_info.backend),
394 }))
395}
396
397#[derive(Clone, Debug, Serialize)]
401pub struct FastPathTimingEntry {
402 pub resolution_label: String,
403 pub width: usize,
404 pub height: usize,
405 pub pixel_count: usize,
406 pub warmup_runs: usize,
407 pub measured_runs: usize,
408 pub local_aggregation: bool,
409 pub mean_dispatch_ms: f64,
411 pub mean_total_ms: f64,
413 pub min_dispatch_ms: f64,
414 pub max_dispatch_ms: f64,
415 pub adapter_name: Option<String>,
416 pub backend: Option<String>,
417 pub actual_gpu_timing: bool,
418 pub notes: Vec<String>,
419}
420
421#[derive(Clone, Debug, Serialize)]
423pub struct FastPathTimingStudy {
424 pub measurement_kind: String,
425 pub actual_gpu_timing: bool,
426 pub lambda: f32,
427 pub k: f32,
428 pub entries: Vec<FastPathTimingEntry>,
429 pub notes: Vec<String>,
430}
431
432pub fn run_fast_path_timing_study() -> Result<FastPathTimingStudy> {
440 let resolutions: &[(&str, usize, usize)] = if cfg!(debug_assertions) {
441 &[("854x480_debug", 854, 480), ("1280x720_debug", 1280, 720)]
443 } else {
444 &[("1920x1080", 1920, 1080), ("3840x2160", 3840, 2160)]
445 };
446
447 let warmup_runs = 3usize;
448 let measured_runs = 10usize;
449
450 let gpu = pollster::block_on(acquire_device());
452 let (device, queue, adapter_name, backend) = match gpu {
453 Some(g) => g,
454 None => {
455 return Ok(FastPathTimingStudy {
456 measurement_kind: "gpu_unavailable_no_measurement".to_string(),
457 actual_gpu_timing: false,
458 lambda: FAST_PATH_LAMBDA,
459 k: FAST_PATH_K,
460 entries: vec![],
461 notes: vec![
462 "No wgpu adapter was available. GPU timing could not be measured.".to_string(),
463 ],
464 });
465 }
466 };
467
468 let shader = device.create_shader_module(wgpu::ShaderModuleDescriptor {
470 label: Some("fp-timing-shader"),
471 source: wgpu::ShaderSource::Wgsl(FAST_PATH_SHADER.into()),
472 });
473 let bgl = make_bind_group_layout(&device);
474 let pipeline = make_pipeline(&device, &shader, &bgl);
475
476 let mut entries = Vec::new();
477
478 for &(label, width, height) in resolutions {
479 let pixel_count = width * height;
480 let current_rgb: Vec<[f32; 3]> = vec![[0.5f32, 0.5, 0.5]; pixel_count];
482 let history_rgb: Vec<[f32; 3]> = vec![[0.48f32, 0.48, 0.48]; pixel_count];
483 let res_in = vec![0.0f32; pixel_count];
484 let drift_in = vec![0.0f32; pixel_count];
485
486 let current_packed = pack_colors(¤t_rgb);
487 let history_packed = pack_colors(&history_rgb);
488 let params_val = FpGpuParams {
489 size: [width as u32, height as u32, 0, 0],
490 coefficients: [FAST_PATH_LAMBDA, FAST_PATH_K, 0.0, 0.0],
491 };
492 let out_bytes = (pixel_count * std::mem::size_of::<f32>()) as u64;
493
494 let (
495 cur_buf,
496 hist_buf,
497 res_in_buf,
498 drift_in_buf,
499 params_buf,
500 trust_buf,
501 res_out_buf,
502 drift_out_buf,
503 ) = upload_gpu_buffers(
504 &device,
505 ¤t_packed,
506 &history_packed,
507 &res_in,
508 &drift_in,
509 ¶ms_val,
510 pixel_count,
511 );
512 let trust_staging = make_staging(&device, out_bytes, "fp-timing-stg");
513 let bind_group = make_bind_group(
514 &device,
515 &bgl,
516 &cur_buf,
517 &hist_buf,
518 &res_in_buf,
519 &drift_in_buf,
520 ¶ms_buf,
521 &trust_buf,
522 &res_out_buf,
523 &drift_out_buf,
524 );
525
526 for _ in 0..warmup_runs {
528 let mut enc = device
529 .create_command_encoder(&wgpu::CommandEncoderDescriptor { label: None });
530 {
531 let mut pass = enc.begin_compute_pass(&wgpu::ComputePassDescriptor {
532 label: None,
533 timestamp_writes: None,
534 });
535 pass.set_pipeline(&pipeline);
536 pass.set_bind_group(0, &bind_group, &[]);
537 pass.dispatch_workgroups(width as u32, height as u32, 1);
538 }
539 queue.submit(Some(enc.finish()));
540 device.poll(wgpu::Maintain::Wait);
541 }
542
543 let mut dispatch_times = Vec::with_capacity(measured_runs);
545 let mut total_times = Vec::with_capacity(measured_runs);
546 for _ in 0..measured_runs {
547 let t0 = Instant::now();
548 let d0 = Instant::now();
549 let mut enc = device
550 .create_command_encoder(&wgpu::CommandEncoderDescriptor { label: None });
551 {
552 let mut pass = enc.begin_compute_pass(&wgpu::ComputePassDescriptor {
553 label: None,
554 timestamp_writes: None,
555 });
556 pass.set_pipeline(&pipeline);
557 pass.set_bind_group(0, &bind_group, &[]);
558 pass.dispatch_workgroups(width as u32, height as u32, 1);
559 }
560 enc.copy_buffer_to_buffer(&trust_buf, 0, &trust_staging, 0, out_bytes);
562 queue.submit(Some(enc.finish()));
563 device.poll(wgpu::Maintain::Wait);
564 dispatch_times.push(d0.elapsed().as_secs_f64() * 1000.0);
565 total_times.push(t0.elapsed().as_secs_f64() * 1000.0);
566 }
567
568 let n = measured_runs as f64;
569 let mean_dispatch = dispatch_times.iter().sum::<f64>() / n;
570 let mean_total = total_times.iter().sum::<f64>() / n;
571 let min_d = dispatch_times.iter().cloned().fold(f64::INFINITY, f64::min);
572 let max_d = dispatch_times.iter().cloned().fold(f64::NEG_INFINITY, f64::max);
573
574 entries.push(FastPathTimingEntry {
575 resolution_label: label.to_string(),
576 width,
577 height,
578 pixel_count,
579 warmup_runs,
580 measured_runs,
581 local_aggregation: false,
582 mean_dispatch_ms: mean_dispatch,
583 mean_total_ms: mean_total,
584 min_dispatch_ms: min_d,
585 max_dispatch_ms: max_d,
586 adapter_name: Some(adapter_name.clone()),
587 backend: Some(backend.clone()),
588 actual_gpu_timing: true,
589 notes: vec![
590 "Timing uses wgpu dispatch + Maintain::Wait (CPU-side wall clock).".to_string(),
591 "Inputs are synthetic uniform-colour buffers; no real capture data required.".to_string(),
592 "This reflects the reduced proxy only, not the full DSFB supervisory system.".to_string(),
593 ],
594 });
595 }
596
597 Ok(FastPathTimingStudy {
598 measurement_kind: "gpu_fast_path_proxy_cpu_wall_clock".to_string(),
599 actual_gpu_timing: true,
600 lambda: FAST_PATH_LAMBDA,
601 k: FAST_PATH_K,
602 entries,
603 notes: vec![
604 "These timings reflect the minimal inline deployment proxy.".to_string(),
605 "They must not be interpreted as the cost of the full DSFB supervisory system.".to_string(),
606 ],
607 })
608}
609
610async fn acquire_device() -> Option<(wgpu::Device, wgpu::Queue, String, String)> {
613 let instance = wgpu::Instance::default();
614 let adapter = instance
615 .request_adapter(&wgpu::RequestAdapterOptions {
616 power_preference: wgpu::PowerPreference::HighPerformance,
617 compatible_surface: None,
618 force_fallback_adapter: false,
619 })
620 .await?;
621 let info = adapter.get_info();
622 let adapter_limits = adapter.limits();
623 let (device, queue) = adapter
624 .request_device(
625 &wgpu::DeviceDescriptor {
626 label: Some("dsfb-fast-path-timing"),
627 required_features: wgpu::Features::empty(),
628 required_limits: wgpu::Limits {
629 max_storage_buffer_binding_size: adapter_limits
630 .max_storage_buffer_binding_size,
631 max_buffer_size: adapter_limits.max_buffer_size,
632 ..wgpu::Limits::default()
633 },
634 },
635 None,
636 )
637 .await
638 .ok()?;
639 Some((device, queue, info.name, format!("{:?}", info.backend)))
640}
641
642fn make_bind_group_layout(device: &wgpu::Device) -> wgpu::BindGroupLayout {
643 device.create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
644 label: Some("fp-bgl"),
645 entries: &[
646 storage_entry(0, true),
647 storage_entry(1, true),
648 storage_entry(2, true),
649 storage_entry(3, true),
650 uniform_entry(4),
651 storage_entry(5, false),
652 storage_entry(6, false),
653 storage_entry(7, false),
654 ],
655 })
656}
657
658fn make_pipeline(
659 device: &wgpu::Device,
660 shader: &wgpu::ShaderModule,
661 bgl: &wgpu::BindGroupLayout,
662) -> wgpu::ComputePipeline {
663 let layout = device.create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
664 label: None,
665 bind_group_layouts: &[bgl],
666 push_constant_ranges: &[],
667 });
668 device.create_compute_pipeline(&wgpu::ComputePipelineDescriptor {
669 label: Some("dsfb-fast-path-pipeline"),
670 layout: Some(&layout),
671 module: shader,
672 entry_point: "main",
673 })
674}
675
676#[allow(clippy::too_many_arguments)]
677fn upload_gpu_buffers(
678 device: &wgpu::Device,
679 current_packed: &[FpColor4],
680 history_packed: &[FpColor4],
681 residual_in: &[f32],
682 drift_in: &[f32],
683 params: &FpGpuParams,
684 pixel_count: usize,
685) -> (
686 wgpu::Buffer,
687 wgpu::Buffer,
688 wgpu::Buffer,
689 wgpu::Buffer,
690 wgpu::Buffer,
691 wgpu::Buffer,
692 wgpu::Buffer,
693 wgpu::Buffer,
694) {
695 let out_size = (pixel_count * std::mem::size_of::<f32>()) as u64;
696 let cur = device.create_buffer_init(&wgpu::util::BufferInitDescriptor {
697 label: Some("fp-cur"),
698 contents: bytemuck::cast_slice(current_packed),
699 usage: wgpu::BufferUsages::STORAGE,
700 });
701 let hist = device.create_buffer_init(&wgpu::util::BufferInitDescriptor {
702 label: Some("fp-hist"),
703 contents: bytemuck::cast_slice(history_packed),
704 usage: wgpu::BufferUsages::STORAGE,
705 });
706 let res_in = device.create_buffer_init(&wgpu::util::BufferInitDescriptor {
707 label: Some("fp-res-in"),
708 contents: bytemuck::cast_slice(residual_in),
709 usage: wgpu::BufferUsages::STORAGE,
710 });
711 let drift_in_buf = device.create_buffer_init(&wgpu::util::BufferInitDescriptor {
712 label: Some("fp-drift-in"),
713 contents: bytemuck::cast_slice(drift_in),
714 usage: wgpu::BufferUsages::STORAGE,
715 });
716 let params_buf = device.create_buffer_init(&wgpu::util::BufferInitDescriptor {
717 label: Some("fp-params"),
718 contents: bytemuck::bytes_of(params),
719 usage: wgpu::BufferUsages::UNIFORM,
720 });
721 let trust_out = device.create_buffer(&wgpu::BufferDescriptor {
722 label: Some("fp-trust"),
723 size: out_size,
724 usage: wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_SRC,
725 mapped_at_creation: false,
726 });
727 let res_out = device.create_buffer(&wgpu::BufferDescriptor {
728 label: Some("fp-res-out"),
729 size: out_size,
730 usage: wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_SRC,
731 mapped_at_creation: false,
732 });
733 let drift_out = device.create_buffer(&wgpu::BufferDescriptor {
734 label: Some("fp-drift-out"),
735 size: out_size,
736 usage: wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_SRC,
737 mapped_at_creation: false,
738 });
739 (cur, hist, res_in, drift_in_buf, params_buf, trust_out, res_out, drift_out)
740}
741
742#[allow(clippy::too_many_arguments)]
743fn make_bind_group(
744 device: &wgpu::Device,
745 bgl: &wgpu::BindGroupLayout,
746 cur: &wgpu::Buffer,
747 hist: &wgpu::Buffer,
748 res_in: &wgpu::Buffer,
749 drift_in: &wgpu::Buffer,
750 params: &wgpu::Buffer,
751 trust_out: &wgpu::Buffer,
752 res_out: &wgpu::Buffer,
753 drift_out: &wgpu::Buffer,
754) -> wgpu::BindGroup {
755 device.create_bind_group(&wgpu::BindGroupDescriptor {
756 label: None,
757 layout: bgl,
758 entries: &[
759 buf_entry(0, cur),
760 buf_entry(1, hist),
761 buf_entry(2, res_in),
762 buf_entry(3, drift_in),
763 buf_entry(4, params),
764 buf_entry(5, trust_out),
765 buf_entry(6, res_out),
766 buf_entry(7, drift_out),
767 ],
768 })
769}
770
771fn make_staging(device: &wgpu::Device, size: u64, label: &str) -> wgpu::Buffer {
772 device.create_buffer(&wgpu::BufferDescriptor {
773 label: Some(label),
774 size,
775 usage: wgpu::BufferUsages::COPY_DST | wgpu::BufferUsages::MAP_READ,
776 mapped_at_creation: false,
777 })
778}
779
780fn read_f32_buf(device: &wgpu::Device, buf: &wgpu::Buffer, count: usize) -> Result<Vec<f32>> {
781 let slice = buf.slice(..);
782 let (tx, rx) = mpsc::channel();
783 slice.map_async(wgpu::MapMode::Read, move |r| {
784 let _ = tx.send(r);
785 });
786 device.poll(wgpu::Maintain::Wait);
787 rx.recv()
788 .map_err(|_| Error::Message("GPU map_async channel closed".to_string()))?
789 .map_err(|e| Error::Message(format!("GPU map_async failed: {e}")))?;
790 let mapped = slice.get_mapped_range();
791 let values: Vec<f32> = bytemuck::cast_slice(&mapped).to_vec();
792 drop(mapped);
793 buf.unmap();
794 if values.len() != count {
795 return Err(Error::Message(format!(
796 "GPU readback: expected {count} f32 values, got {}",
797 values.len()
798 )));
799 }
800 Ok(values)
801}
802
803fn pack_colors(pixels: &[[f32; 3]]) -> Vec<FpColor4> {
804 pixels
805 .iter()
806 .map(|&[r, g, b]| FpColor4 { value: [r, g, b, 1.0] })
807 .collect()
808}
809
810fn storage_entry(binding: u32, read_only: bool) -> wgpu::BindGroupLayoutEntry {
811 wgpu::BindGroupLayoutEntry {
812 binding,
813 visibility: wgpu::ShaderStages::COMPUTE,
814 ty: wgpu::BindingType::Buffer {
815 ty: wgpu::BufferBindingType::Storage { read_only },
816 has_dynamic_offset: false,
817 min_binding_size: None,
818 },
819 count: None,
820 }
821}
822
823fn uniform_entry(binding: u32) -> wgpu::BindGroupLayoutEntry {
824 wgpu::BindGroupLayoutEntry {
825 binding,
826 visibility: wgpu::ShaderStages::COMPUTE,
827 ty: wgpu::BindingType::Buffer {
828 ty: wgpu::BufferBindingType::Uniform,
829 has_dynamic_offset: false,
830 min_binding_size: None,
831 },
832 count: None,
833 }
834}
835
836fn buf_entry(binding: u32, buffer: &wgpu::Buffer) -> wgpu::BindGroupEntry<'_> {
837 wgpu::BindGroupEntry {
838 binding,
839 resource: buffer.as_entire_binding(),
840 }
841}
842
843pub fn render_trust_strip_svg(trust: &[f32], width: usize, height: usize) -> String {
849 let sample_width = width.min(256);
850 let step = width / sample_width.max(1);
851 let bar_h = 40usize;
852 let total_w = sample_width * 2;
853 let total_h = bar_h + 20;
854 let mut svg = format!(
855 r#"<svg xmlns="http://www.w3.org/2000/svg" width="{total_w}" height="{total_h}">"#
856 );
857 let mid_y = height / 2;
859 for col in 0..sample_width {
860 let src_x = (col * step).min(width.saturating_sub(1));
861 let i = mid_y * width + src_x;
862 let t = trust.get(i).copied().unwrap_or(1.0).clamp(0.0, 1.0);
863 let g = (t * 220.0) as u8;
864 let r = ((1.0 - t) * 220.0) as u8;
865 svg.push_str(&format!(
866 r#"<rect x="{}" y="0" width="2" height="{bar_h}" fill="rgb({r},{g},80)"/>"#,
867 col * 2
868 ));
869 }
870 svg.push_str(&format!(
871 "<text x=\"4\" y=\"{}\" font-size=\"10\" fill=\"#444\">trust (mid-row, {}x{})</text>",
872 bar_h + 14,
873 width,
874 height
875 ));
876 svg.push_str("</svg>");
877 svg
878}