oxiui_compute_wgpu/context.rs
1//! Headless GPU compute context: `Instance` → `Adapter` → `Device` + `Queue`.
2//!
3//! [`ComputeContext`] performs the full no-window, no-surface initialisation
4//! chain required for pure GPU compute workloads (sparse solvers, LBM, MC/DC,
5//! …). Three constructors plus a fluent [`ContextBuilder`] are provided:
6//!
7//! * [`ComputeContext::try_new`] — returns `Option<Self>`; `None` means no GPU
8//! adapter is available (graceful CI skip, never panics).
9//! * [`ComputeContext::new`] — returns `Result<Self, ComputeError>`; exposes the
10//! underlying failure reason through [`ComputeError`].
11//! * [`ComputeContext::new_async`] — async variant; awaits adapter and device
12//! requests directly without a `pollster::block_on` wrapper.
13//! * [`ComputeContext::builder`] — returns a [`ContextBuilder`] for fluent
14//! configuration of limits, features, and power preference.
15//! * [`ComputeContext::from_device`] — wraps an externally owned
16//! `wgpu::Device` + `wgpu::Queue` (e.g. from `oxiui-render-wgpu`) so the
17//! compute layer can share the render backend's device.
18//!
19//! ## Multi-queue support
20//!
21//! On adapters that expose separate transfer and compute queue families,
22//! [`ContextBuilder::with_multi_queue`] requests a dedicated transfer queue.
23//! When no second queue family is available the context falls back to a single
24//! shared queue (`transfer_queue()` returns `None`).
25//!
26//! Both sync constructors use `PowerPreference::HighPerformance` and
27//! `wgpu::Limits::default()` (not `downlevel_defaults()`, which caps the
28//! compute feature set).
29
30use crate::error::ComputeError;
31
32// ── ComputeContext ─────────────────────────────────────────────────────────────
33
34/// An initialised headless GPU compute context.
35///
36/// Owns the logical [`wgpu::Device`], the primary compute [`wgpu::Queue`],
37/// an optional dedicated transfer queue (when the adapter exposes more than one
38/// queue family), and the [`wgpu::AdapterInfo`] snapshot captured at
39/// construction time. No window handle, surface, or swap-chain is involved.
40pub struct ComputeContext {
41 /// The logical GPU device.
42 pub device: wgpu::Device,
43 /// The primary command submission queue (compute and, when no separate
44 /// transfer queue is available, also used for DMA transfers).
45 pub queue: wgpu::Queue,
46 /// A dedicated transfer queue, present only when the adapter exposes
47 /// separate queue families **and** `ContextBuilder::with_multi_queue` was
48 /// called. `None` when the adapter provides a single shared queue family.
49 transfer_queue: Option<wgpu::Queue>,
50 /// Adapter metadata snapshot (vendor, backend, driver, …).
51 adapter_info: wgpu::AdapterInfo,
52}
53
54impl ComputeContext {
55 /// Return a reference to the adapter metadata captured at construction time.
56 ///
57 /// The returned [`wgpu::AdapterInfo`] contains fields such as `name`,
58 /// `vendor`, `device`, `backend`, `driver`, and `driver_info`.
59 ///
60 /// ```rust,no_run
61 /// use oxiui_compute_wgpu::ComputeContext;
62 ///
63 /// if let Some(ctx) = ComputeContext::try_new() {
64 /// let info = ctx.adapter_info();
65 /// println!("GPU backend: {:?}", info.backend);
66 /// }
67 /// ```
68 pub fn adapter_info(&self) -> &wgpu::AdapterInfo {
69 &self.adapter_info
70 }
71
72 /// Return the dedicated transfer queue when one was obtained.
73 ///
74 /// Returns `Some` only when [`ContextBuilder::with_multi_queue`] was
75 /// called **and** the underlying adapter exposed a separate transfer queue
76 /// family. Callers should fall back to `self.queue` when this is `None`.
77 ///
78 /// ```rust,no_run
79 /// use oxiui_compute_wgpu::ComputeContext;
80 ///
81 /// if let Some(ctx) = ComputeContext::try_new() {
82 /// if let Some(tq) = ctx.transfer_queue() {
83 /// // Use the dedicated DMA queue for uploads.
84 /// let _ = tq;
85 /// }
86 /// }
87 /// ```
88 pub fn transfer_queue(&self) -> Option<&wgpu::Queue> {
89 self.transfer_queue.as_ref()
90 }
91
92 /// Return a [`ContextBuilder`] for fluent configuration of limits,
93 /// features, and power preference.
94 ///
95 /// ```rust,no_run
96 /// use oxiui_compute_wgpu::ComputeContext;
97 ///
98 /// let ctx = ComputeContext::builder()
99 /// .with_power_preference(wgpu::PowerPreference::LowPower)
100 /// .build();
101 /// ```
102 pub fn builder() -> ContextBuilder {
103 ContextBuilder::default()
104 }
105
106 /// Create a context with high-performance power preference and default limits.
107 ///
108 /// # Errors
109 ///
110 /// * [`ComputeError::NoAdapter`] — no suitable GPU adapter was found.
111 /// * [`ComputeError::DeviceRequest`] — the device/queue request failed.
112 ///
113 /// ```rust,no_run
114 /// use oxiui_compute_wgpu::{ComputeContext, ComputeError};
115 ///
116 /// match ComputeContext::new() {
117 /// Ok(ctx) => { let _ = ctx; }
118 /// Err(ComputeError::NoAdapter) => { /* skip */ }
119 /// Err(e) => panic!("unexpected: {e}"),
120 /// }
121 /// ```
122 #[cfg_attr(feature = "tracing", tracing::instrument(level = "debug"))]
123 pub fn new() -> Result<Self, ComputeError> {
124 ContextBuilder::default().build()
125 }
126
127 /// Try to create a `ComputeContext`, returning `None` when no suitable GPU
128 /// adapter is available on this host.
129 ///
130 /// This constructor never panics. Call sites that want a graceful skip on
131 /// headless CI environments (VMs, containers without GPU pass-through) should
132 /// use this variant:
133 ///
134 /// ```rust,no_run
135 /// use oxiui_compute_wgpu::ComputeContext;
136 ///
137 /// if let Some(ctx) = ComputeContext::try_new() {
138 /// // GPU is available — run the compute workload
139 /// let _ = ctx;
140 /// } else {
141 /// // No GPU — skip gracefully
142 /// }
143 /// ```
144 pub fn try_new() -> Option<Self> {
145 Self::new().ok()
146 }
147
148 /// Async variant of [`new`][Self::new] — awaits adapter and device requests
149 /// directly without a `pollster::block_on` wrapper.
150 ///
151 /// Suitable for use inside an async runtime (Tokio, async-std, etc.).
152 ///
153 /// # Errors
154 ///
155 /// Same as [`new`][Self::new].
156 ///
157 /// ```rust,no_run
158 /// use oxiui_compute_wgpu::ComputeContext;
159 ///
160 /// # async fn run() -> Result<(), oxiui_compute_wgpu::ComputeError> {
161 /// let ctx = ComputeContext::new_async().await?;
162 /// # Ok(())
163 /// # }
164 /// ```
165 pub async fn new_async() -> Result<Self, ComputeError> {
166 ContextBuilder::default().build_async().await
167 }
168
169 /// Wrap externally owned `wgpu::Device` and `wgpu::Queue` in a
170 /// `ComputeContext` so that the compute layer can share a device/queue pair
171 /// that was already created by another backend (e.g. `oxiui-render-wgpu`).
172 ///
173 /// A synthetic [`wgpu::AdapterInfo`] is constructed from the optional
174 /// `adapter_info` argument; pass `None` to use a placeholder.
175 ///
176 /// # Example
177 ///
178 /// ```rust,no_run
179 /// use oxiui_compute_wgpu::ComputeContext;
180 ///
181 /// // Suppose `device` and `queue` come from an external renderer.
182 /// # fn external() -> (wgpu::Device, wgpu::Queue) { unimplemented!() }
183 /// let (device, queue) = external();
184 /// let ctx = ComputeContext::from_device(device, queue, None);
185 /// ```
186 pub fn from_device(
187 device: wgpu::Device,
188 queue: wgpu::Queue,
189 adapter_info: Option<wgpu::AdapterInfo>,
190 ) -> Self {
191 let adapter_info = adapter_info.unwrap_or_else(|| wgpu::AdapterInfo {
192 name: "external".into(),
193 vendor: 0,
194 device: 0,
195 device_type: wgpu::DeviceType::Other,
196 device_pci_bus_id: String::new(),
197 driver: String::new(),
198 driver_info: String::new(),
199 backend: wgpu::Backend::Noop,
200 subgroup_min_size: 0,
201 subgroup_max_size: 0,
202 transient_saves_memory: false,
203 });
204 ComputeContext {
205 device,
206 queue,
207 transfer_queue: None,
208 adapter_info,
209 }
210 }
211
212 // ── Convenience delegates to ContextBuilder ─────────────────────────────
213
214 /// Create a [`crate::dispatch::Dispatcher`] that borrows this context.
215 ///
216 /// The `Dispatcher` provides high-level, zero-boilerplate GPU compute
217 /// operations (`map_f32`, `zip_map_f32`, `reduce_sum_f32`, `sph_density`,
218 /// `sort_f32`, …).
219 ///
220 /// ```rust,no_run
221 /// use oxiui_compute_wgpu::ComputeContext;
222 ///
223 /// if let Some(ctx) = ComputeContext::try_new() {
224 /// let d = ctx.dispatcher();
225 /// let out = d.map_f32(&[1.0, 2.0, 3.0], "x * 2.0");
226 /// assert_eq!(out, vec![2.0, 4.0, 6.0]);
227 /// }
228 /// ```
229 pub fn dispatcher(&self) -> crate::dispatch::Dispatcher<'_> {
230 crate::dispatch::Dispatcher::new(self)
231 }
232
233 /// Start building a context with custom memory limits.
234 ///
235 /// Equivalent to `ComputeContext::builder().with_limits(limits)`.
236 pub fn with_limits(limits: wgpu::Limits) -> ContextBuilder {
237 ContextBuilder::default().with_limits(limits)
238 }
239
240 /// Start building a context with specific GPU features enabled.
241 ///
242 /// Equivalent to `ComputeContext::builder().with_features(features)`.
243 pub fn with_features(features: wgpu::Features) -> ContextBuilder {
244 ContextBuilder::default().with_features(features)
245 }
246
247 /// Start building a context with a specific power preference.
248 ///
249 /// Equivalent to `ComputeContext::builder().with_power_preference(pref)`.
250 pub fn with_power_preference(pref: wgpu::PowerPreference) -> ContextBuilder {
251 ContextBuilder::default().with_power_preference(pref)
252 }
253
254 /// Return a [`crate::hot_reload::ShaderWatcher`] that watches WGSL source
255 /// files on disk and signals when recompilation is needed.
256 ///
257 /// Only available when the `hot-reload` Cargo feature is enabled.
258 ///
259 /// ```rust,no_run
260 /// # #[cfg(feature = "hot-reload")]
261 /// # {
262 /// use oxiui_compute_wgpu::ComputeContext;
263 ///
264 /// if let Some(ctx) = ComputeContext::try_new() {
265 /// let _watcher = ctx.watcher();
266 /// // Add paths to watch, then call watcher.drain_changed() each frame.
267 /// }
268 /// # }
269 /// ```
270 #[cfg(feature = "hot-reload")]
271 pub fn watcher(&self) -> crate::hot_reload::ShaderWatcher {
272 crate::hot_reload::ShaderWatcher::new()
273 }
274}
275
276// ── ContextBuilder ─────────────────────────────────────────────────────────────
277
278/// Fluent builder for [`ComputeContext`].
279///
280/// Compose limits, features, and power preference in one chain, then call
281/// [`build`][ContextBuilder::build] (sync) or [`build_async`][ContextBuilder::build_async]
282/// (async) to finalise.
283///
284/// ```rust,no_run
285/// use oxiui_compute_wgpu::{ComputeContext, ComputeError};
286///
287/// let result = ComputeContext::builder()
288/// .with_power_preference(wgpu::PowerPreference::HighPerformance)
289/// .with_limits(wgpu::Limits::default())
290/// .build();
291/// ```
292#[derive(Debug, Default)]
293pub struct ContextBuilder {
294 power_preference: wgpu::PowerPreference,
295 required_features: wgpu::Features,
296 required_limits: Option<wgpu::Limits>,
297 /// When `true`, request a dedicated transfer queue family (if supported).
298 multi_queue: bool,
299}
300
301impl ContextBuilder {
302 /// Set the GPU power preference.
303 ///
304 /// Defaults to [`wgpu::PowerPreference::HighPerformance`] when not
305 /// called.
306 pub fn with_power_preference(mut self, pref: wgpu::PowerPreference) -> Self {
307 self.power_preference = pref;
308 self
309 }
310
311 /// Request optional GPU features (e.g. `TIMESTAMP_QUERY`, `SHADER_F16`).
312 ///
313 /// If the adapter does not support the requested features, [`ContextBuilder::build`] will
314 /// return [`ComputeError::DeviceRequest`] with a descriptive message before
315 /// attempting `request_device`.
316 pub fn with_features(mut self, features: wgpu::Features) -> Self {
317 self.required_features = features;
318 self
319 }
320
321 /// Override the default device limits.
322 ///
323 /// Use [`wgpu::Limits::downlevel_defaults()`] for maximum compatibility or
324 /// supply custom limits for high-throughput compute workloads.
325 pub fn with_limits(mut self, limits: wgpu::Limits) -> Self {
326 self.required_limits = Some(limits);
327 self
328 }
329
330 /// Request separate transfer and compute queues on adapters that advertise
331 /// more than one queue family.
332 ///
333 /// When the adapter exposes only a single queue family the built context
334 /// falls back gracefully: `transfer_queue()` returns `None` and `queue`
335 /// is used for all operations.
336 ///
337 /// Note: wgpu currently exposes at most one queue per device to the Rust
338 /// API, so this option records the intent and the context exposes
339 /// `transfer_queue()` as `None` until wgpu adds explicit multi-queue
340 /// support. The flag is preserved for forward compatibility.
341 pub fn with_multi_queue(mut self) -> Self {
342 self.multi_queue = true;
343 self
344 }
345
346 /// Blocking variant: run the full adapter + device init on the current thread.
347 ///
348 /// # Errors
349 ///
350 /// * [`ComputeError::NoAdapter`] — no GPU adapter matched the options.
351 /// * [`ComputeError::DeviceRequest`] — features or device request failed.
352 pub fn build(self) -> Result<ComputeContext, ComputeError> {
353 pollster::block_on(self.build_async())
354 }
355
356 /// Async variant: await adapter and device requests inside the caller's runtime.
357 ///
358 /// # Errors
359 ///
360 /// * [`ComputeError::NoAdapter`] — no GPU adapter matched the options.
361 /// * [`ComputeError::DeviceRequest`] — features or device request failed.
362 pub async fn build_async(self) -> Result<ComputeContext, ComputeError> {
363 let instance = wgpu::Instance::default();
364
365 let adapter = instance
366 .request_adapter(&wgpu::RequestAdapterOptions {
367 power_preference: self.power_preference,
368 force_fallback_adapter: false,
369 // No surface — pure compute, no swap-chain required.
370 compatible_surface: None,
371 })
372 .await
373 .map_err(|_| ComputeError::NoAdapter)?;
374
375 // Pre-check requested features before attempting device acquisition so
376 // callers get a clear error instead of a cryptic RequestDeviceError.
377 if !self.required_features.is_empty()
378 && !adapter.features().contains(self.required_features)
379 {
380 return Err(ComputeError::DeviceRequest(format!(
381 "adapter does not support requested features: {:?}",
382 self.required_features
383 )));
384 }
385
386 // Capture adapter metadata before consuming the adapter.
387 let adapter_info = adapter.get_info();
388
389 let limits = self.required_limits.unwrap_or_default();
390
391 let (device, queue) = adapter
392 .request_device(&wgpu::DeviceDescriptor {
393 label: Some("oxiui-compute-wgpu"),
394 required_features: self.required_features,
395 required_limits: limits,
396 ..Default::default()
397 })
398 .await
399 .map_err(|e| ComputeError::DeviceRequest(e.to_string()))?;
400
401 // Multi-queue: wgpu currently exposes one queue per device. The intent
402 // is recorded and `transfer_queue` is set to `None` until wgpu adds
403 // explicit multi-queue support. Callers check `transfer_queue()` and
404 // fall back to `queue` automatically.
405 let transfer_queue: Option<wgpu::Queue> = if self.multi_queue {
406 // Future: when wgpu supports `request_device` returning multiple
407 // queues, acquire a second queue here. For now, advertise that
408 // the context was built with multi-queue intent but that the
409 // adapter does not expose a second queue.
410 None
411 } else {
412 None
413 };
414
415 Ok(ComputeContext {
416 device,
417 queue,
418 transfer_queue,
419 adapter_info,
420 })
421 }
422}
423
424// ── Tests ─────────────────────────────────────────────────────────────────────
425
426#[cfg(test)]
427mod tests {
428 use super::*;
429
430 // ── existing tests (preserved) ───────────────────────────────────────────
431
432 #[test]
433 fn try_new_does_not_panic() {
434 // Gracefully skips if no GPU adapter — must never panic.
435 let _ = ComputeContext::try_new();
436 }
437
438 #[test]
439 fn new_returns_result() {
440 match ComputeContext::new() {
441 Ok(_ctx) => { /* GPU available — context created successfully */ }
442 Err(ComputeError::NoAdapter) => {
443 // No GPU on this host (CI, headless VM) — acceptable skip.
444 }
445 Err(ComputeError::DeviceRequest(ref msg)) => {
446 panic!("unexpected DeviceRequest error: {msg}")
447 }
448 Err(e) => {
449 panic!("unexpected error: {e}")
450 }
451 }
452 }
453
454 #[test]
455 fn try_new_consistent_with_new() {
456 // try_new() must be consistent with new(): both fail or both succeed.
457 let via_new = ComputeContext::new();
458 let via_try = ComputeContext::try_new();
459 match (via_new, via_try) {
460 (Ok(_), Some(_)) | (Err(_), None) => { /* consistent */ }
461 (Ok(_), None) => panic!("new() succeeded but try_new() returned None"),
462 (Err(e), Some(_)) => panic!("new() failed but try_new() returned Some: {e}"),
463 }
464 }
465
466 // ── new tests (S1) ───────────────────────────────────────────────────────
467
468 /// Non-GPU test: verify that `ContextBuilder::default()` constructs without
469 /// panicking, even before `build()` is called.
470 #[test]
471 fn builder_chain_defaults() {
472 // The builder itself must be constructable regardless of GPU availability.
473 let _builder = ContextBuilder::default()
474 .with_power_preference(wgpu::PowerPreference::HighPerformance)
475 .with_limits(wgpu::Limits::default())
476 .with_features(wgpu::Features::empty());
477 // Attempt build; whether it succeeds depends on host GPU availability —
478 // either outcome is acceptable.
479 let _result = _builder.build();
480 // No assertion: success or NoAdapter are both valid outcomes.
481 }
482
483 /// Non-GPU test: `with_multi_queue()` builds correctly.
484 #[test]
485 fn builder_with_multi_queue_does_not_panic() {
486 let _result = ContextBuilder::default().with_multi_queue().build();
487 // Either Ok or NoAdapter — neither must panic.
488 }
489
490 /// GPU-gated: adapter_info() returns a non-empty backend string.
491 #[test]
492 fn context_has_adapter_info() {
493 oxiui_core::require_gpu!(ctx, ComputeContext::try_new());
494 let info = ctx.adapter_info();
495 let backend_str = format!("{:?}", info.backend);
496 assert!(!backend_str.is_empty(), "backend string must not be empty");
497 }
498
499 /// GPU-gated: builder with LowPower preference builds successfully.
500 #[test]
501 fn builder_with_low_power() {
502 oxiui_core::require_gpu!(
503 ctx,
504 ComputeContext::with_power_preference(wgpu::PowerPreference::LowPower)
505 .build()
506 .ok()
507 );
508 let _ = ctx;
509 }
510
511 /// GPU-gated: new_async() via pollster::block_on produces a valid context.
512 #[test]
513 fn new_async_via_pollster() {
514 oxiui_core::require_gpu!(ctx, pollster::block_on(ComputeContext::new_async()).ok());
515 let _ = ctx;
516 }
517
518 /// GPU-gated: requesting all features should return a clean error (not panic)
519 /// when the adapter does not support them all.
520 #[test]
521 fn with_unsupported_features_returns_error() {
522 // `wgpu::Features::all()` is almost certainly not fully supported on any
523 // single adapter; we expect either a DeviceRequest error or a successful
524 // build (the latter is allowed on hardware that does support everything).
525 // What must NOT happen is a panic.
526 let result = ComputeContext::with_features(wgpu::Features::all()).build();
527 match result {
528 Ok(_) => { /* hardware supports all features — acceptable */ }
529 Err(ComputeError::NoAdapter) => { /* no GPU — skip */ }
530 Err(ComputeError::DeviceRequest(_)) => { /* expected clean error */ }
531 Err(e) => panic!("unexpected error variant: {e}"),
532 }
533 }
534
535 /// GPU-gated: `transfer_queue()` returns `None` on a standard context
536 /// (multi-queue not requested).
537 #[test]
538 fn transfer_queue_none_without_multi_queue() {
539 oxiui_core::require_gpu!(ctx, ComputeContext::try_new());
540 assert!(
541 ctx.transfer_queue().is_none(),
542 "transfer_queue must be None when multi-queue was not requested"
543 );
544 }
545
546 /// GPU-gated: multi-queue context builds without panic; transfer_queue is
547 /// `None` (graceful fallback) because wgpu exposes one queue per device.
548 #[test]
549 fn multi_queue_context_builds() {
550 oxiui_core::require_gpu!(
551 ctx,
552 ComputeContext::builder().with_multi_queue().build().ok()
553 );
554 // wgpu currently exposes at most one queue — transfer_queue is None.
555 assert!(ctx.transfer_queue().is_none());
556 }
557
558 /// Non-GPU test: `from_device` wraps externally owned resources.
559 ///
560 /// This test only verifies the `from_device` path at the type level;
561 /// actual GPU execution is not performed.
562 #[test]
563 fn from_device_via_real_gpu() {
564 oxiui_core::require_gpu!(ctx, ComputeContext::try_new());
565 // Extract adapter info from the context we just built.
566 let info = ctx.adapter_info().clone();
567 // Rebuild using from_device with the same device + queue.
568 // (We move ctx's device/queue into a new context.)
569 let ctx2 = ComputeContext::from_device(ctx.device, ctx.queue, Some(info.clone()));
570 assert_eq!(ctx2.adapter_info().name, info.name);
571 assert!(ctx2.transfer_queue().is_none());
572 }
573}