Skip to main content

oxiui_compute_wgpu/
context.rs

1//! Headless GPU compute context: `Instance` → `Adapter` → `Device` + `Queue`.
2//!
3//! [`ComputeContext`] performs the full no-window, no-surface initialisation
4//! chain required for pure GPU compute workloads (sparse solvers, LBM, MC/DC,
5//! …).  Three constructors plus a fluent [`ContextBuilder`] are provided:
6//!
7//! * [`ComputeContext::try_new`] — returns `Option<Self>`; `None` means no GPU
8//!   adapter is available (graceful CI skip, never panics).
9//! * [`ComputeContext::new`] — returns `Result<Self, ComputeError>`; exposes the
10//!   underlying failure reason through [`ComputeError`].
11//! * [`ComputeContext::new_async`] — async variant; awaits adapter and device
12//!   requests directly without a `pollster::block_on` wrapper.
13//! * [`ComputeContext::builder`] — returns a [`ContextBuilder`] for fluent
14//!   configuration of limits, features, and power preference.
15//! * [`ComputeContext::from_device`] — wraps an externally owned
16//!   `wgpu::Device` + `wgpu::Queue` (e.g. from `oxiui-render-wgpu`) so the
17//!   compute layer can share the render backend's device.
18//!
19//! ## Multi-queue support
20//!
21//! On adapters that expose separate transfer and compute queue families,
22//! [`ContextBuilder::with_multi_queue`] requests a dedicated transfer queue.
23//! When no second queue family is available the context falls back to a single
24//! shared queue (`transfer_queue()` returns `None`).
25//!
26//! Both sync constructors use `PowerPreference::HighPerformance` and
27//! `wgpu::Limits::default()` (not `downlevel_defaults()`, which caps the
28//! compute feature set).
29
30use crate::error::ComputeError;
31
32// ── ComputeContext ─────────────────────────────────────────────────────────────
33
34/// An initialised headless GPU compute context.
35///
36/// Owns the logical [`wgpu::Device`], the primary compute [`wgpu::Queue`],
37/// an optional dedicated transfer queue (when the adapter exposes more than one
38/// queue family), and the [`wgpu::AdapterInfo`] snapshot captured at
39/// construction time.  No window handle, surface, or swap-chain is involved.
40pub struct ComputeContext {
41    /// The logical GPU device.
42    pub device: wgpu::Device,
43    /// The primary command submission queue (compute and, when no separate
44    /// transfer queue is available, also used for DMA transfers).
45    pub queue: wgpu::Queue,
46    /// A dedicated transfer queue, present only when the adapter exposes
47    /// separate queue families **and** `ContextBuilder::with_multi_queue` was
48    /// called.  `None` when the adapter provides a single shared queue family.
49    transfer_queue: Option<wgpu::Queue>,
50    /// Adapter metadata snapshot (vendor, backend, driver, …).
51    adapter_info: wgpu::AdapterInfo,
52}
53
54impl ComputeContext {
55    /// Return a reference to the adapter metadata captured at construction time.
56    ///
57    /// The returned [`wgpu::AdapterInfo`] contains fields such as `name`,
58    /// `vendor`, `device`, `backend`, `driver`, and `driver_info`.
59    ///
60    /// ```rust,no_run
61    /// use oxiui_compute_wgpu::ComputeContext;
62    ///
63    /// if let Some(ctx) = ComputeContext::try_new() {
64    ///     let info = ctx.adapter_info();
65    ///     println!("GPU backend: {:?}", info.backend);
66    /// }
67    /// ```
68    pub fn adapter_info(&self) -> &wgpu::AdapterInfo {
69        &self.adapter_info
70    }
71
72    /// Return the dedicated transfer queue when one was obtained.
73    ///
74    /// Returns `Some` only when [`ContextBuilder::with_multi_queue`] was
75    /// called **and** the underlying adapter exposed a separate transfer queue
76    /// family.  Callers should fall back to `self.queue` when this is `None`.
77    ///
78    /// ```rust,no_run
79    /// use oxiui_compute_wgpu::ComputeContext;
80    ///
81    /// if let Some(ctx) = ComputeContext::try_new() {
82    ///     if let Some(tq) = ctx.transfer_queue() {
83    ///         // Use the dedicated DMA queue for uploads.
84    ///         let _ = tq;
85    ///     }
86    /// }
87    /// ```
88    pub fn transfer_queue(&self) -> Option<&wgpu::Queue> {
89        self.transfer_queue.as_ref()
90    }
91
92    /// Return a [`ContextBuilder`] for fluent configuration of limits,
93    /// features, and power preference.
94    ///
95    /// ```rust,no_run
96    /// use oxiui_compute_wgpu::ComputeContext;
97    ///
98    /// let ctx = ComputeContext::builder()
99    ///     .with_power_preference(wgpu::PowerPreference::LowPower)
100    ///     .build();
101    /// ```
102    pub fn builder() -> ContextBuilder {
103        ContextBuilder::default()
104    }
105
106    /// Create a context with high-performance power preference and default limits.
107    ///
108    /// # Errors
109    ///
110    /// * [`ComputeError::NoAdapter`] — no suitable GPU adapter was found.
111    /// * [`ComputeError::DeviceRequest`] — the device/queue request failed.
112    ///
113    /// ```rust,no_run
114    /// use oxiui_compute_wgpu::{ComputeContext, ComputeError};
115    ///
116    /// match ComputeContext::new() {
117    ///     Ok(ctx)                      => { let _ = ctx; }
118    ///     Err(ComputeError::NoAdapter) => { /* skip */ }
119    ///     Err(e)                       => panic!("unexpected: {e}"),
120    /// }
121    /// ```
122    #[cfg_attr(feature = "tracing", tracing::instrument(level = "debug"))]
123    pub fn new() -> Result<Self, ComputeError> {
124        ContextBuilder::default().build()
125    }
126
127    /// Try to create a `ComputeContext`, returning `None` when no suitable GPU
128    /// adapter is available on this host.
129    ///
130    /// This constructor never panics.  Call sites that want a graceful skip on
131    /// headless CI environments (VMs, containers without GPU pass-through) should
132    /// use this variant:
133    ///
134    /// ```rust,no_run
135    /// use oxiui_compute_wgpu::ComputeContext;
136    ///
137    /// if let Some(ctx) = ComputeContext::try_new() {
138    ///     // GPU is available — run the compute workload
139    ///     let _ = ctx;
140    /// } else {
141    ///     // No GPU — skip gracefully
142    /// }
143    /// ```
144    pub fn try_new() -> Option<Self> {
145        Self::new().ok()
146    }
147
148    /// Async variant of [`new`][Self::new] — awaits adapter and device requests
149    /// directly without a `pollster::block_on` wrapper.
150    ///
151    /// Suitable for use inside an async runtime (Tokio, async-std, etc.).
152    ///
153    /// # Errors
154    ///
155    /// Same as [`new`][Self::new].
156    ///
157    /// ```rust,no_run
158    /// use oxiui_compute_wgpu::ComputeContext;
159    ///
160    /// # async fn run() -> Result<(), oxiui_compute_wgpu::ComputeError> {
161    /// let ctx = ComputeContext::new_async().await?;
162    /// # Ok(())
163    /// # }
164    /// ```
165    pub async fn new_async() -> Result<Self, ComputeError> {
166        ContextBuilder::default().build_async().await
167    }
168
169    /// Wrap externally owned `wgpu::Device` and `wgpu::Queue` in a
170    /// `ComputeContext` so that the compute layer can share a device/queue pair
171    /// that was already created by another backend (e.g. `oxiui-render-wgpu`).
172    ///
173    /// A synthetic [`wgpu::AdapterInfo`] is constructed from the optional
174    /// `adapter_info` argument; pass `None` to use a placeholder.
175    ///
176    /// # Example
177    ///
178    /// ```rust,no_run
179    /// use oxiui_compute_wgpu::ComputeContext;
180    ///
181    /// // Suppose `device` and `queue` come from an external renderer.
182    /// # fn external() -> (wgpu::Device, wgpu::Queue) { unimplemented!() }
183    /// let (device, queue) = external();
184    /// let ctx = ComputeContext::from_device(device, queue, None);
185    /// ```
186    pub fn from_device(
187        device: wgpu::Device,
188        queue: wgpu::Queue,
189        adapter_info: Option<wgpu::AdapterInfo>,
190    ) -> Self {
191        let adapter_info = adapter_info.unwrap_or_else(|| wgpu::AdapterInfo {
192            name: "external".into(),
193            vendor: 0,
194            device: 0,
195            device_type: wgpu::DeviceType::Other,
196            device_pci_bus_id: String::new(),
197            driver: String::new(),
198            driver_info: String::new(),
199            backend: wgpu::Backend::Noop,
200            subgroup_min_size: 0,
201            subgroup_max_size: 0,
202            transient_saves_memory: false,
203        });
204        ComputeContext {
205            device,
206            queue,
207            transfer_queue: None,
208            adapter_info,
209        }
210    }
211
212    // ── Convenience delegates to ContextBuilder ─────────────────────────────
213
214    /// Create a [`crate::dispatch::Dispatcher`] that borrows this context.
215    ///
216    /// The `Dispatcher` provides high-level, zero-boilerplate GPU compute
217    /// operations (`map_f32`, `zip_map_f32`, `reduce_sum_f32`, `sph_density`,
218    /// `sort_f32`, …).
219    ///
220    /// ```rust,no_run
221    /// use oxiui_compute_wgpu::ComputeContext;
222    ///
223    /// if let Some(ctx) = ComputeContext::try_new() {
224    ///     let d = ctx.dispatcher();
225    ///     let out = d.map_f32(&[1.0, 2.0, 3.0], "x * 2.0");
226    ///     assert_eq!(out, vec![2.0, 4.0, 6.0]);
227    /// }
228    /// ```
229    pub fn dispatcher(&self) -> crate::dispatch::Dispatcher<'_> {
230        crate::dispatch::Dispatcher::new(self)
231    }
232
233    /// Start building a context with custom memory limits.
234    ///
235    /// Equivalent to `ComputeContext::builder().with_limits(limits)`.
236    pub fn with_limits(limits: wgpu::Limits) -> ContextBuilder {
237        ContextBuilder::default().with_limits(limits)
238    }
239
240    /// Start building a context with specific GPU features enabled.
241    ///
242    /// Equivalent to `ComputeContext::builder().with_features(features)`.
243    pub fn with_features(features: wgpu::Features) -> ContextBuilder {
244        ContextBuilder::default().with_features(features)
245    }
246
247    /// Start building a context with a specific power preference.
248    ///
249    /// Equivalent to `ComputeContext::builder().with_power_preference(pref)`.
250    pub fn with_power_preference(pref: wgpu::PowerPreference) -> ContextBuilder {
251        ContextBuilder::default().with_power_preference(pref)
252    }
253
254    /// Return a [`crate::hot_reload::ShaderWatcher`] that watches WGSL source
255    /// files on disk and signals when recompilation is needed.
256    ///
257    /// Only available when the `hot-reload` Cargo feature is enabled.
258    ///
259    /// ```rust,no_run
260    /// # #[cfg(feature = "hot-reload")]
261    /// # {
262    /// use oxiui_compute_wgpu::ComputeContext;
263    ///
264    /// if let Some(ctx) = ComputeContext::try_new() {
265    ///     let _watcher = ctx.watcher();
266    ///     // Add paths to watch, then call watcher.drain_changed() each frame.
267    /// }
268    /// # }
269    /// ```
270    #[cfg(feature = "hot-reload")]
271    pub fn watcher(&self) -> crate::hot_reload::ShaderWatcher {
272        crate::hot_reload::ShaderWatcher::new()
273    }
274}
275
276// ── ContextBuilder ─────────────────────────────────────────────────────────────
277
278/// Fluent builder for [`ComputeContext`].
279///
280/// Compose limits, features, and power preference in one chain, then call
281/// [`build`][ContextBuilder::build] (sync) or [`build_async`][ContextBuilder::build_async]
282/// (async) to finalise.
283///
284/// ```rust,no_run
285/// use oxiui_compute_wgpu::{ComputeContext, ComputeError};
286///
287/// let result = ComputeContext::builder()
288///     .with_power_preference(wgpu::PowerPreference::HighPerformance)
289///     .with_limits(wgpu::Limits::default())
290///     .build();
291/// ```
292#[derive(Debug, Default)]
293pub struct ContextBuilder {
294    power_preference: wgpu::PowerPreference,
295    required_features: wgpu::Features,
296    required_limits: Option<wgpu::Limits>,
297    /// When `true`, request a dedicated transfer queue family (if supported).
298    multi_queue: bool,
299}
300
301impl ContextBuilder {
302    /// Set the GPU power preference.
303    ///
304    /// Defaults to [`wgpu::PowerPreference::HighPerformance`] when not
305    /// called.
306    pub fn with_power_preference(mut self, pref: wgpu::PowerPreference) -> Self {
307        self.power_preference = pref;
308        self
309    }
310
311    /// Request optional GPU features (e.g. `TIMESTAMP_QUERY`, `SHADER_F16`).
312    ///
313    /// If the adapter does not support the requested features, [`ContextBuilder::build`] will
314    /// return [`ComputeError::DeviceRequest`] with a descriptive message before
315    /// attempting `request_device`.
316    pub fn with_features(mut self, features: wgpu::Features) -> Self {
317        self.required_features = features;
318        self
319    }
320
321    /// Override the default device limits.
322    ///
323    /// Use [`wgpu::Limits::downlevel_defaults()`] for maximum compatibility or
324    /// supply custom limits for high-throughput compute workloads.
325    pub fn with_limits(mut self, limits: wgpu::Limits) -> Self {
326        self.required_limits = Some(limits);
327        self
328    }
329
330    /// Request separate transfer and compute queues on adapters that advertise
331    /// more than one queue family.
332    ///
333    /// When the adapter exposes only a single queue family the built context
334    /// falls back gracefully: `transfer_queue()` returns `None` and `queue`
335    /// is used for all operations.
336    ///
337    /// Note: wgpu currently exposes at most one queue per device to the Rust
338    /// API, so this option records the intent and the context exposes
339    /// `transfer_queue()` as `None` until wgpu adds explicit multi-queue
340    /// support.  The flag is preserved for forward compatibility.
341    pub fn with_multi_queue(mut self) -> Self {
342        self.multi_queue = true;
343        self
344    }
345
346    /// Blocking variant: run the full adapter + device init on the current thread.
347    ///
348    /// # Errors
349    ///
350    /// * [`ComputeError::NoAdapter`] — no GPU adapter matched the options.
351    /// * [`ComputeError::DeviceRequest`] — features or device request failed.
352    pub fn build(self) -> Result<ComputeContext, ComputeError> {
353        pollster::block_on(self.build_async())
354    }
355
356    /// Async variant: await adapter and device requests inside the caller's runtime.
357    ///
358    /// # Errors
359    ///
360    /// * [`ComputeError::NoAdapter`] — no GPU adapter matched the options.
361    /// * [`ComputeError::DeviceRequest`] — features or device request failed.
362    pub async fn build_async(self) -> Result<ComputeContext, ComputeError> {
363        let instance = wgpu::Instance::default();
364
365        let adapter = instance
366            .request_adapter(&wgpu::RequestAdapterOptions {
367                power_preference: self.power_preference,
368                force_fallback_adapter: false,
369                // No surface — pure compute, no swap-chain required.
370                compatible_surface: None,
371            })
372            .await
373            .map_err(|_| ComputeError::NoAdapter)?;
374
375        // Pre-check requested features before attempting device acquisition so
376        // callers get a clear error instead of a cryptic RequestDeviceError.
377        if !self.required_features.is_empty()
378            && !adapter.features().contains(self.required_features)
379        {
380            return Err(ComputeError::DeviceRequest(format!(
381                "adapter does not support requested features: {:?}",
382                self.required_features
383            )));
384        }
385
386        // Capture adapter metadata before consuming the adapter.
387        let adapter_info = adapter.get_info();
388
389        let limits = self.required_limits.unwrap_or_default();
390
391        let (device, queue) = adapter
392            .request_device(&wgpu::DeviceDescriptor {
393                label: Some("oxiui-compute-wgpu"),
394                required_features: self.required_features,
395                required_limits: limits,
396                ..Default::default()
397            })
398            .await
399            .map_err(|e| ComputeError::DeviceRequest(e.to_string()))?;
400
401        // Multi-queue: wgpu currently exposes one queue per device.  The intent
402        // is recorded and `transfer_queue` is set to `None` until wgpu adds
403        // explicit multi-queue support.  Callers check `transfer_queue()` and
404        // fall back to `queue` automatically.
405        let transfer_queue: Option<wgpu::Queue> = if self.multi_queue {
406            // Future: when wgpu supports `request_device` returning multiple
407            // queues, acquire a second queue here.  For now, advertise that
408            // the context was built with multi-queue intent but that the
409            // adapter does not expose a second queue.
410            None
411        } else {
412            None
413        };
414
415        Ok(ComputeContext {
416            device,
417            queue,
418            transfer_queue,
419            adapter_info,
420        })
421    }
422}
423
424// ── Tests ─────────────────────────────────────────────────────────────────────
425
426#[cfg(test)]
427mod tests {
428    use super::*;
429
430    // ── existing tests (preserved) ───────────────────────────────────────────
431
432    #[test]
433    fn try_new_does_not_panic() {
434        // Gracefully skips if no GPU adapter — must never panic.
435        let _ = ComputeContext::try_new();
436    }
437
438    #[test]
439    fn new_returns_result() {
440        match ComputeContext::new() {
441            Ok(_ctx) => { /* GPU available — context created successfully */ }
442            Err(ComputeError::NoAdapter) => {
443                // No GPU on this host (CI, headless VM) — acceptable skip.
444            }
445            Err(ComputeError::DeviceRequest(ref msg)) => {
446                panic!("unexpected DeviceRequest error: {msg}")
447            }
448            Err(e) => {
449                panic!("unexpected error: {e}")
450            }
451        }
452    }
453
454    #[test]
455    fn try_new_consistent_with_new() {
456        // try_new() must be consistent with new(): both fail or both succeed.
457        let via_new = ComputeContext::new();
458        let via_try = ComputeContext::try_new();
459        match (via_new, via_try) {
460            (Ok(_), Some(_)) | (Err(_), None) => { /* consistent */ }
461            (Ok(_), None) => panic!("new() succeeded but try_new() returned None"),
462            (Err(e), Some(_)) => panic!("new() failed but try_new() returned Some: {e}"),
463        }
464    }
465
466    // ── new tests (S1) ───────────────────────────────────────────────────────
467
468    /// Non-GPU test: verify that `ContextBuilder::default()` constructs without
469    /// panicking, even before `build()` is called.
470    #[test]
471    fn builder_chain_defaults() {
472        // The builder itself must be constructable regardless of GPU availability.
473        let _builder = ContextBuilder::default()
474            .with_power_preference(wgpu::PowerPreference::HighPerformance)
475            .with_limits(wgpu::Limits::default())
476            .with_features(wgpu::Features::empty());
477        // Attempt build; whether it succeeds depends on host GPU availability —
478        // either outcome is acceptable.
479        let _result = _builder.build();
480        // No assertion: success or NoAdapter are both valid outcomes.
481    }
482
483    /// Non-GPU test: `with_multi_queue()` builds correctly.
484    #[test]
485    fn builder_with_multi_queue_does_not_panic() {
486        let _result = ContextBuilder::default().with_multi_queue().build();
487        // Either Ok or NoAdapter — neither must panic.
488    }
489
490    /// GPU-gated: adapter_info() returns a non-empty backend string.
491    #[test]
492    fn context_has_adapter_info() {
493        oxiui_core::require_gpu!(ctx, ComputeContext::try_new());
494        let info = ctx.adapter_info();
495        let backend_str = format!("{:?}", info.backend);
496        assert!(!backend_str.is_empty(), "backend string must not be empty");
497    }
498
499    /// GPU-gated: builder with LowPower preference builds successfully.
500    #[test]
501    fn builder_with_low_power() {
502        oxiui_core::require_gpu!(
503            ctx,
504            ComputeContext::with_power_preference(wgpu::PowerPreference::LowPower)
505                .build()
506                .ok()
507        );
508        let _ = ctx;
509    }
510
511    /// GPU-gated: new_async() via pollster::block_on produces a valid context.
512    #[test]
513    fn new_async_via_pollster() {
514        oxiui_core::require_gpu!(ctx, pollster::block_on(ComputeContext::new_async()).ok());
515        let _ = ctx;
516    }
517
518    /// GPU-gated: requesting all features should return a clean error (not panic)
519    /// when the adapter does not support them all.
520    #[test]
521    fn with_unsupported_features_returns_error() {
522        // `wgpu::Features::all()` is almost certainly not fully supported on any
523        // single adapter; we expect either a DeviceRequest error or a successful
524        // build (the latter is allowed on hardware that does support everything).
525        // What must NOT happen is a panic.
526        let result = ComputeContext::with_features(wgpu::Features::all()).build();
527        match result {
528            Ok(_) => { /* hardware supports all features — acceptable */ }
529            Err(ComputeError::NoAdapter) => { /* no GPU — skip */ }
530            Err(ComputeError::DeviceRequest(_)) => { /* expected clean error */ }
531            Err(e) => panic!("unexpected error variant: {e}"),
532        }
533    }
534
535    /// GPU-gated: `transfer_queue()` returns `None` on a standard context
536    /// (multi-queue not requested).
537    #[test]
538    fn transfer_queue_none_without_multi_queue() {
539        oxiui_core::require_gpu!(ctx, ComputeContext::try_new());
540        assert!(
541            ctx.transfer_queue().is_none(),
542            "transfer_queue must be None when multi-queue was not requested"
543        );
544    }
545
546    /// GPU-gated: multi-queue context builds without panic; transfer_queue is
547    /// `None` (graceful fallback) because wgpu exposes one queue per device.
548    #[test]
549    fn multi_queue_context_builds() {
550        oxiui_core::require_gpu!(
551            ctx,
552            ComputeContext::builder().with_multi_queue().build().ok()
553        );
554        // wgpu currently exposes at most one queue — transfer_queue is None.
555        assert!(ctx.transfer_queue().is_none());
556    }
557
558    /// Non-GPU test: `from_device` wraps externally owned resources.
559    ///
560    /// This test only verifies the `from_device` path at the type level;
561    /// actual GPU execution is not performed.
562    #[test]
563    fn from_device_via_real_gpu() {
564        oxiui_core::require_gpu!(ctx, ComputeContext::try_new());
565        // Extract adapter info from the context we just built.
566        let info = ctx.adapter_info().clone();
567        // Rebuild using from_device with the same device + queue.
568        // (We move ctx's device/queue into a new context.)
569        let ctx2 = ComputeContext::from_device(ctx.device, ctx.queue, Some(info.clone()));
570        assert_eq!(ctx2.adapter_info().name, info.name);
571        assert!(ctx2.transfer_queue().is_none());
572    }
573}