Skip to main content

gpu_million_points/
gpu_million_points.rs

1//! Demo headless del HAL GPU directo — Fase 6 del SDD
2//! `02_ruway/llimphi/SDD.md` §"GPU directo wgpu".
3//!
4//! A diferencia de `spike_gpu_directo` (que compara vello vs un pipeline
5//! mock para tomar la decisión arquitectónica), este ejemplo usa
6//! directamente la API pública `GpuPipelines` + `GpuBatch` sobre N
7//! puntos (rects 1.2×1.2 px) sintéticos. Su rol es:
8//!
9//! - Documentar el uso mínimo: 8 líneas de código + uso de Color.
10//! - Ejercitar el HAL sin ninguna app (sin winit, sin runtime Elm).
11//! - Servir de benchmark de referencia post-implementación: tiempo
12//!   total CPU+GPU para 100K / 500K / 1M / 5M rects.
13//!
14//! Corre con: `cargo run -p llimphi-raster --example gpu_million_points --release`.
15
16use std::io::Write;
17use std::time::Instant;
18
19use llimphi_hal::{wgpu, Hal};
20use llimphi_raster::peniko::Color;
21use llimphi_raster::{GpuBatch, GpuPipelines};
22
23const W: u32 = 1024;
24const H: u32 = 1024;
25const FMT: wgpu::TextureFormat = wgpu::TextureFormat::Rgba8Unorm;
26const WARMUP: usize = 5;
27const MEASURED: usize = 15;
28const SIZES: &[u32] = &[100_000, 500_000, 1_000_000, 5_000_000];
29
30fn main() {
31    let hal = pollster::block_on(Hal::new(None)).expect("hal");
32    let pipelines = GpuPipelines::new(&hal.device, FMT);
33
34    let (_tex, view) = make_target(&hal.device);
35
36    println!();
37    println!("gpu_million_points — GpuBatch + 3 pipelines · target {W}×{H} Rgba8Unorm");
38    println!("warmup {WARMUP}, measured {MEASURED}");
39    println!("  {:>10} | {:>14} | {:>14}", "N", "ms / frame", "Mprim/s");
40    println!("  {:->10} + {:->14} + {:->14}", "", "", "");
41
42    for &n in SIZES {
43        let ms = bench(&hal, &pipelines, &view, n);
44        let throughput = (n as f64 / 1_000_000.0) / (ms / 1000.0);
45        println!("  {:>10} | {:>14.3} | {:>14.2}", n, ms, throughput);
46        let _ = std::io::stdout().flush();
47    }
48    println!();
49    println!("(en llvmpipe estos números son CPU-bound — ver Fase 0 del SDD)");
50    println!();
51}
52
53fn make_target(device: &wgpu::Device) -> (wgpu::Texture, wgpu::TextureView) {
54    let tex = device.create_texture(&wgpu::TextureDescriptor {
55        label: Some("gpu_million_points-target"),
56        size: wgpu::Extent3d {
57            width: W,
58            height: H,
59            depth_or_array_layers: 1,
60        },
61        mip_level_count: 1,
62        sample_count: 1,
63        dimension: wgpu::TextureDimension::D2,
64        format: FMT,
65        usage: wgpu::TextureUsages::RENDER_ATTACHMENT,
66        view_formats: &[],
67    });
68    let view = tex.create_view(&wgpu::TextureViewDescriptor::default());
69    (tex, view)
70}
71
72fn bench(hal: &Hal, pipelines: &GpuPipelines, view: &wgpu::TextureView, n: u32) -> f64 {
73    let mut samples: Vec<f64> = Vec::with_capacity(MEASURED);
74    for frame in 0..(WARMUP + MEASURED) {
75        let t0 = Instant::now();
76        let mut batch = GpuBatch::new(pipelines);
77        let mut state: u32 = 0x1234_5678;
78        for _ in 0..n {
79            state = state.wrapping_mul(1_664_525).wrapping_add(1_013_904_223);
80            let x = (state % W) as f32;
81            state = state.wrapping_mul(1_664_525).wrapping_add(1_013_904_223);
82            let y = (state % H) as f32;
83            state = state.wrapping_mul(1_664_525).wrapping_add(1_013_904_223);
84            let r = ((state >>  0) & 0xFF) as f32 / 255.0;
85            let g = ((state >>  8) & 0xFF) as f32 / 255.0;
86            let b = ((state >> 16) & 0xFF) as f32 / 255.0;
87            batch.add_rect(x, y, 1.2, 1.2, Color::new([r, g, b, 1.0]));
88        }
89        let mut encoder = hal.device.create_command_encoder(
90            &wgpu::CommandEncoderDescriptor {
91                label: Some("gpu_million_points-enc"),
92            },
93        );
94        batch.flush(
95            &hal.device,
96            &hal.queue,
97            &mut encoder,
98            view,
99            (W as f32, H as f32),
100            wgpu::LoadOp::Clear(wgpu::Color::BLACK),
101        );
102        hal.queue.submit(std::iter::once(encoder.finish()));
103        hal.device.poll(wgpu::PollType::wait_indefinitely());
104        let dt = t0.elapsed().as_secs_f64() * 1000.0;
105        if frame >= WARMUP {
106            samples.push(dt);
107        }
108    }
109    samples.sort_by(|a, b| a.partial_cmp(b).unwrap());
110    samples[samples.len() / 2]
111}