1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
//! # `oxiui-compute-wgpu`
//!
//! Pure-Rust wgpu GPU-compute abstraction for the COOLJAPAN ecosystem.
//!
//! This crate consolidates the repeated `Instance → Adapter → Device → Queue`
//! initialisation boilerplate that `oxiaero-cfd`, `oxiaero-mcdc`, and similar
//! crates each duplicated for pure GPU compute workloads (sparse linear
//! solvers, Lattice-Boltzmann, Monte-Carlo simulations, …).
//!
//! ## Quick start
//!
//! ```rust
//! use oxiui_compute_wgpu::{bytemuck, compute_pipeline, read_back, storage_buffer_init, wgpu, ComputeContext};
//!
//! let Some(ctx) = ComputeContext::try_new() else {
//! return; // no GPU — skip gracefully
//! };
//!
//! let input: Vec<f32> = vec![1.0, 2.0, 3.0, 4.0];
//! let buffer = storage_buffer_init(&ctx.device, "values", bytemuck::cast_slice(&input));
//!
//! const SHADER: &str = r#"
//! @group(0) @binding(0) var<storage, read_write> data: array<f32>;
//! @compute @workgroup_size(64)
//! fn main(@builtin(global_invocation_id) gid: vec3<u32>) {
//! if gid.x < arrayLength(&data) {
//! data[gid.x] = data[gid.x] * 2.0;
//! }
//! }
//! "#;
//! let pipeline = compute_pipeline(&ctx.device, SHADER, "main");
//!
//! let bind_group = ctx.device.create_bind_group(&wgpu::BindGroupDescriptor {
//! label: Some("values-bind"),
//! layout: &pipeline.get_bind_group_layout(0),
//! entries: &[wgpu::BindGroupEntry {
//! binding: 0,
//! resource: buffer.as_entire_binding(),
//! }],
//! });
//!
//! let mut encoder = ctx.device.create_command_encoder(&wgpu::CommandEncoderDescriptor { label: None });
//! {
//! let mut pass = encoder.begin_compute_pass(&wgpu::ComputePassDescriptor {
//! label: None,
//! timestamp_writes: None,
//! });
//! pass.set_pipeline(&pipeline);
//! pass.set_bind_group(0, &bind_group, &[]);
//! pass.dispatch_workgroups((input.len() as u32 + 63) / 64, 1, 1);
//! }
//! ctx.queue.submit(std::iter::once(encoder.finish()));
//!
//! let output: Vec<f32> = read_back(&ctx.device, &ctx.queue, &buffer, input.len());
//! assert_eq!(output, vec![2.0, 4.0, 6.0, 8.0]);
//! ```
//!
//! ## Module structure
//!
//! | Module | Contents |
//! |--------|----------|
//! | [`context`] | [`ComputeContext`] — headless `Device` + `Queue` init |
//! | [`buffer`] | Storage / uniform / staging buffer helpers + [`read_back`] |
//! | [`pipeline`] | [`compute_pipeline`] builder |
//! | [`error`] | [`ComputeError`] type |
//! | [`wgsl`] | WGSL preprocessor, validation, and built-in compute kernels |
//!
//! ## Dependency re-exports
//!
//! `oxiui-compute-wgpu` re-exports [`wgpu`], [`bytemuck`], and [`pollster`] so
//! that consumers need only a single dependency declaration in their
//! `Cargo.toml`.
// ── Flat re-exports ────────────────────────────────────────────────────────────
// From buffer
pub use ;
// From context
pub use ;
// From error
pub use ComputeError;
// From pipeline
pub use ;
// From wgsl
pub use ;
// ── Underlying crate re-exports ───────────────────────────────────────────────
/// Re-export of [`wgpu`] so consumers need only declare `oxiui-compute-wgpu`.
pub use wgpu;
/// Re-export of [`bytemuck`] for `Pod`/`Zeroable` derives and casting helpers.
pub use bytemuck;
/// Re-export of [`pollster`] for blocking on async wgpu operations.
pub use pollster;
// ── Top-level tests ───────────────────────────────────────────────────────────