tessera_ui/renderer/compute/
pipeline.rs

1//! GPU compute pipeline system for Tessera UI framework.
2//!
3//! This module provides the infrastructure for GPU compute operations in Tessera,
4//! enabling advanced visual effects and post-processing operations that would be
5//! inefficient or impossible to achieve with traditional CPU-based approaches.
6//!
7//! # Architecture Overview
8//!
9//! The compute pipeline system is designed to work seamlessly with the rendering
10//! pipeline, using a ping-pong buffer approach for efficient multi-pass operations.
11//! Each compute pipeline processes a specific type of compute command and operates
12//! on texture data using GPU compute shaders.
13//!
14//! ## Key Components
15//!
16//! - [`ComputablePipeline<C>`]: The main trait for implementing custom compute pipelines
17//! - [`ComputePipelineRegistry`]: Manages and dispatches commands to registered compute pipelines
18//! - [`ComputeResourceManager`]: Manages GPU buffers and resources for compute operations
19//!
20//! # Design Philosophy
21//!
22//! The compute pipeline system embraces WGPU's compute shader capabilities to enable:
23//!
24//! - **Advanced Post-Processing**: Blur, contrast adjustment, color grading, and other image effects
25//! - **Parallel Processing**: Leverage GPU parallelism for computationally intensive operations
26//! - **Real-Time Effects**: Achieve complex visual effects at interactive frame rates
27//! - **Memory Efficiency**: Use GPU memory directly without CPU roundtrips
28//!
29//! # Ping-Pong Rendering
30//!
31//! The system uses a ping-pong approach where:
32//!
33//! 1. **Input Texture**: Contains the result from previous rendering or compute pass
34//! 2. **Output Texture**: Receives the processed result from the current compute operation
35//! 3. **Format Convention**: All textures use `wgpu::TextureFormat::Rgba8Unorm` for compatibility
36//!
37//! This approach enables efficient chaining of multiple compute operations without
38//! intermediate CPU involvement.
39//!
40//! # Implementation Guide
41//!
42//! ## Creating a Custom Compute Pipeline
43//!
44//! To create a custom compute pipeline:
45//!
46//! 1. Define your compute command struct implementing [`ComputeCommand`]
47//! 2. Create a pipeline struct implementing [`ComputablePipeline<YourCommand>`]
48//! 3. Write a compute shader in WGSL
49//! 4. Register the pipeline with [`ComputePipelineRegistry::register`]
50//!
51//! ## Example: Simple Brightness Adjustment Pipeline
52//!
53//! ```rust,ignore
54//! use tessera_ui::{ComputeCommand, ComputablePipeline, compute::resource::ComputeResourceManager};
55//! use wgpu;
56//!
57//! // 1. Define the compute command
58//! #[derive(Debug)]
59//! struct BrightnessCommand {
60//!     brightness: f32,
61//! }
62//!
63//! impl ComputeCommand for BrightnessCommand {}
64//!
65//! // 2. Implement the pipeline
66//! struct BrightnessPipeline {
67//!     compute_pipeline: wgpu::ComputePipeline,
68//!     bind_group_layout: wgpu::BindGroupLayout,
69//! }
70//!
71//! impl BrightnessPipeline {
72//!     fn new(device: &wgpu::Device) -> Self {
73//!         // Create compute shader and pipeline
74//!         let shader = device.create_shader_module(wgpu::ShaderModuleDescriptor {
75//!             label: Some("Brightness Shader"),
76//!             source: wgpu::ShaderSource::Wgsl(include_str!("brightness.wgsl").into()),
77//!         });
78//!         
79//!         // ... setup bind group layout and pipeline ...
80//!         # unimplemented!()
81//!     }
82//! }
83//!
84//! impl ComputablePipeline<BrightnessCommand> for BrightnessPipeline {
85//!     fn dispatch(
86//!         &mut self,
87//!         device: &wgpu::Device,
88//!         queue: &wgpu::Queue,
89//!         config: &wgpu::SurfaceConfiguration,
90//!         compute_pass: &mut wgpu::ComputePass<'_>,
91//!         command: &BrightnessCommand,
92//!         resource_manager: &mut ComputeResourceManager,
93//!         input_view: &wgpu::TextureView,
94//!         output_view: &wgpu::TextureView,
95//!     ) {
96//!         // Create uniforms buffer with brightness value
97//!         let uniforms = [command.brightness];
98//!         let uniform_buffer = device.create_buffer_init(&wgpu::util::BufferInitDescriptor {
99//!             label: Some("Brightness Uniforms"),
100//!             contents: bytemuck::cast_slice(&uniforms),
101//!             usage: wgpu::BufferUsages::UNIFORM,
102//!         });
103//!         
104//!         // Create bind group with input/output textures and uniforms
105//!         let bind_group = device.create_bind_group(&wgpu::BindGroupDescriptor {
106//!             layout: &self.bind_group_layout,
107//!             entries: &[
108//!                 wgpu::BindGroupEntry { binding: 0, resource: uniform_buffer.as_entire_binding() },
109//!                 wgpu::BindGroupEntry { binding: 1, resource: wgpu::BindingResource::TextureView(input_view) },
110//!                 wgpu::BindGroupEntry { binding: 2, resource: wgpu::BindingResource::TextureView(output_view) },
111//!             ],
112//!             label: Some("brightness_bind_group"),
113//!         });
114//!         
115//!         // Dispatch compute shader
116//!         compute_pass.set_pipeline(&self.compute_pipeline);
117//!         compute_pass.set_bind_group(0, &bind_group, &[]);
118//!         compute_pass.dispatch_workgroups(
119//!             (config.width + 7) / 8,
120//!             (config.height + 7) / 8,
121//!             1
122//!         );
123//!     }
124//! }
125//!
126//! // 3. Register the pipeline
127//! let mut registry = ComputePipelineRegistry::new();
128//! let brightness_pipeline = BrightnessPipeline::new(&device);
129//! registry.register(brightness_pipeline);
130//! ```
131//!
132//! ## Example WGSL Compute Shader
133//!
134//! ```wgsl
135//! @group(0) @binding(0) var<uniform> brightness: f32;
136//! @group(0) @binding(1) var input_texture: texture_2d<f32>;
137//! @group(0) @binding(2) var output_texture: texture_storage_2d<rgba8unorm, write>;
138//!
139//! @compute @workgroup_size(8, 8)
140//! fn main(@builtin(global_invocation_id) global_id: vec3<u32>) {
141//!     let coords = vec2<i32>(global_id.xy);
142//!     let input_color = textureLoad(input_texture, coords, 0);
143//!     let output_color = vec4<f32>(input_color.rgb * brightness, input_color.a);
144//!     textureStore(output_texture, coords, output_color);
145//! }
146//! ```
147//!
148//! # Integration with Basic Components
149//!
150//! The `tessera_basic_components` crate provides several compute pipeline implementations:
151//!
152//! - **BlurPipeline**: Gaussian blur effects for backgrounds and UI elements
153//! - **MeanPipeline**: Average color calculation for adaptive UI themes
154//! - **ContrastPipeline**: Contrast and saturation adjustments
155//!
156//! These pipelines demonstrate real-world usage patterns and can serve as references
157//! for implementing custom compute operations.
158//!
159//! # Performance Considerations
160//!
161//! - **Workgroup Size**: Choose workgroup sizes that align with GPU architecture (typically 8x8 or 16x16)
162//! - **Memory Access**: Optimize memory access patterns in shaders for better cache utilization
163//! - **Resource Reuse**: Use the [`ComputeResourceManager`] to reuse buffers across frames
164//! - **Batch Operations**: Combine multiple similar operations when possible
165//!
166//! # Texture Format Requirements
167//!
168//! Due to WGPU limitations, compute shaders require specific texture formats:
169//!
170//! - **Input Textures**: Can be any readable format, typically from render passes
171//! - **Output Textures**: Must use `wgpu::TextureFormat::Rgba8Unorm` for storage binding
172//! - **sRGB Limitation**: sRGB formats cannot be used as storage textures
173//!
174//! The framework automatically handles format conversions when necessary.
175
176use std::{any::TypeId, collections::HashMap};
177
178use crate::{
179    PxPosition, PxRect, PxSize, compute::resource::ComputeResourceManager, renderer::command::AsAny,
180};
181
182use super::command::ComputeCommand;
183
184/// Type-erased metadata describing a compute command within a batch.
185pub struct ErasedComputeBatchItem<'a> {
186    pub command: &'a dyn ComputeCommand,
187    pub size: PxSize,
188    pub position: PxPosition,
189    pub target_area: PxRect,
190}
191
192/// Strongly typed metadata describing a compute command within a batch.
193pub struct ComputeBatchItem<'a, C: ComputeCommand> {
194    pub command: &'a C,
195    pub size: PxSize,
196    pub position: PxPosition,
197    pub target_area: PxRect,
198}
199
200/// Core trait for implementing GPU compute pipelines.
201///
202/// This trait defines the interface for compute pipelines that process specific types
203/// of compute commands using GPU compute shaders. Each pipeline is responsible for
204/// setting up compute resources, managing shader dispatch, and processing texture data.
205///
206/// # Type Parameters
207///
208/// * `C` - The specific [`ComputeCommand`] type this pipeline can handle
209///
210/// # Design Principles
211///
212/// - **Single Responsibility**: Each pipeline handles one specific type of compute operation
213/// - **Stateless Operation**: Pipelines should not maintain state between dispatch calls
214/// - **Resource Efficiency**: Reuse GPU resources when possible through the resource manager
215/// - **Thread Safety**: All implementations must be `Send + Sync` for parallel execution
216///
217/// # Integration with Rendering
218///
219/// Compute pipelines operate within the broader rendering pipeline, typically:
220///
221/// 1. **After Rendering**: Process the rendered scene for post-effects
222/// 2. **Between Passes**: Transform data between different rendering stages
223/// 3. **Before Rendering**: Prepare data or textures for subsequent render operations
224///
225/// # Example Implementation Pattern
226///
227/// ```rust,ignore
228/// impl ComputablePipeline<MyCommand> for MyPipeline {
229///     fn dispatch(&mut self, device, queue, config, compute_pass, items,
230///                 resource_manager, input_view, output_view) {
231///         for item in items {
232///             // 1. Create or retrieve uniform buffer
233///             let uniforms = create_uniforms_from_command(item.command);
234///             let uniform_buffer = device.create_buffer_init(...);
235///
236///             // 2. Create bind group with textures and uniforms
237///             let bind_group = device.create_bind_group(...);
238///
239///             // 3. Set pipeline and dispatch
240///             compute_pass.set_pipeline(&self.compute_pipeline);
241///             compute_pass.set_bind_group(0, &bind_group, &[]);
242///             compute_pass.dispatch_workgroups(workgroup_x, workgroup_y, 1);
243///         }
244///     }
245/// }
246/// ```
247pub trait ComputablePipeline<C: ComputeCommand>: Send + Sync + 'static {
248    /// Dispatches the compute command within an active compute pass.
249    ///
250    /// This method receives one or more compute commands of the same type. Implementations
251    /// may choose to process the batch collectively (e.g., by packing data into a single
252    /// dispatch) or sequentially iterate over the items. It should set up the necessary GPU
253    /// resources, bind them to the compute pipeline, and dispatch the appropriate number of
254    /// workgroups to process the input texture.
255    ///
256    /// # Parameters
257    ///
258    /// * `device` - The WGPU device for creating GPU resources
259    /// * `queue` - The WGPU queue for submitting commands and updating buffers
260    /// * `config` - Current surface configuration containing dimensions and format info
261    /// * `compute_pass` - The active compute pass to record commands into
262    /// * `items` - Slice of compute commands with associated metadata describing their target areas
263    /// * `resource_manager` - Manager for reusing GPU buffers across operations
264    /// * `input_view` - View of the input texture (result from previous pass)
265    /// * `output_view` - View of the output texture (target for this operation)
266    ///
267    /// # Texture Format Requirements
268    ///
269    /// Due to WGPU limitations, storage textures have specific format requirements:
270    ///
271    /// - **Input Texture**: Can be any readable format, typically from render passes
272    /// - **Output Texture**: Must use `wgpu::TextureFormat::Rgba8Unorm` format
273    /// - **sRGB Limitation**: sRGB formats cannot be used as storage textures
274    ///
275    /// The framework ensures that `output_view` always uses a compatible format
276    /// for storage binding operations.
277    ///
278    /// # Workgroup Dispatch Guidelines
279    ///
280    /// When dispatching workgroups, consider:
281    ///
282    /// - **Workgroup Size**: Match your shader's `@workgroup_size` declaration
283    /// - **Coverage**: Ensure all pixels are processed by calculating appropriate dispatch dimensions
284    /// - **Alignment**: Round up dispatch dimensions to cover the entire texture
285    ///
286    /// Common dispatch pattern:
287    /// ```rust,ignore
288    /// let workgroup_size = 8; // Match shader @workgroup_size(8, 8)
289    /// let dispatch_x = (config.width + workgroup_size - 1) / workgroup_size;
290    /// let dispatch_y = (config.height + workgroup_size - 1) / workgroup_size;
291    /// compute_pass.dispatch_workgroups(dispatch_x, dispatch_y, 1);
292    /// ```
293    ///
294    /// # Resource Management
295    ///
296    /// Use the `resource_manager` to:
297    /// - Store persistent buffers that can be reused across frames
298    /// - Avoid recreating expensive GPU resources
299    /// - Manage buffer lifetimes efficiently
300    ///
301    /// # Error Handling
302    ///
303    /// This method should handle errors gracefully:
304    /// - Validate command parameters before use
305    /// - Ensure texture dimensions are compatible
306    /// - Handle resource creation failures appropriately
307    fn dispatch(
308        &mut self,
309        device: &wgpu::Device,
310        queue: &wgpu::Queue,
311        config: &wgpu::SurfaceConfiguration,
312        compute_pass: &mut wgpu::ComputePass<'_>,
313        items: &[ComputeBatchItem<'_, C>],
314        resource_manager: &mut ComputeResourceManager,
315        input_view: &wgpu::TextureView,
316        output_view: &wgpu::TextureView,
317    );
318}
319
320/// Internal trait for type erasure of computable pipelines.
321///
322/// This trait enables dynamic dispatch of compute commands to their corresponding pipelines
323/// without knowing the specific command type at compile time. It's used internally by
324/// the [`ComputePipelineRegistry`] and should not be implemented directly by users.
325///
326/// The type erasure is achieved through the [`AsAny`] trait, which allows downcasting
327/// from `&dyn ComputeCommand` to concrete command types.
328///
329/// # Implementation Note
330///
331/// This trait is automatically implemented for any type that implements
332/// [`ComputablePipeline<C>`] through the [`ComputablePipelineImpl`] wrapper.
333pub(crate) trait ErasedComputablePipeline: Send + Sync {
334    /// Dispatches a type-erased compute command.
335    fn dispatch_erased(
336        &mut self,
337        device: &wgpu::Device,
338        queue: &wgpu::Queue,
339        config: &wgpu::SurfaceConfiguration,
340        compute_pass: &mut wgpu::ComputePass<'_>,
341        items: &[ErasedComputeBatchItem<'_>],
342        resource_manager: &mut ComputeResourceManager,
343        input_view: &wgpu::TextureView,
344        output_view: &wgpu::TextureView,
345    );
346}
347
348/// A wrapper to implement `ErasedComputablePipeline` for any `ComputablePipeline`.
349struct ComputablePipelineImpl<C: ComputeCommand, P: ComputablePipeline<C>> {
350    pipeline: P,
351    _command: std::marker::PhantomData<C>,
352}
353
354impl<C: ComputeCommand + 'static, P: ComputablePipeline<C>> ErasedComputablePipeline
355    for ComputablePipelineImpl<C, P>
356{
357    fn dispatch_erased(
358        &mut self,
359        device: &wgpu::Device,
360        queue: &wgpu::Queue,
361        config: &wgpu::SurfaceConfiguration,
362        compute_pass: &mut wgpu::ComputePass<'_>,
363        items: &[ErasedComputeBatchItem<'_>],
364        resource_manager: &mut ComputeResourceManager,
365        input_view: &wgpu::TextureView,
366        output_view: &wgpu::TextureView,
367    ) {
368        if items.is_empty() {
369            return;
370        }
371
372        let mut typed_items: Vec<ComputeBatchItem<'_, C>> = Vec::with_capacity(items.len());
373        for item in items {
374            let command = AsAny::as_any(item.command)
375                .downcast_ref::<C>()
376                .expect("Compute batch contained command of unexpected type");
377            typed_items.push(ComputeBatchItem {
378                command,
379                size: item.size,
380                position: item.position,
381                target_area: item.target_area,
382            });
383        }
384
385        self.pipeline.dispatch(
386            device,
387            queue,
388            config,
389            compute_pass,
390            &typed_items,
391            resource_manager,
392            input_view,
393            output_view,
394        );
395    }
396}
397
398/// Registry for managing and dispatching compute pipelines.
399///
400/// The `ComputePipelineRegistry` serves as the central hub for all compute pipelines
401/// in the Tessera framework. It maintains a collection of registered pipelines and
402/// handles the dispatch of compute commands to their appropriate pipelines.
403///
404/// # Architecture
405///
406/// The registry uses type erasure to store pipelines of different types in a single
407/// collection. When a compute command needs to be processed, the registry attempts
408/// to dispatch it to all registered pipelines until one handles it successfully.
409///
410/// # Usage Pattern
411///
412/// 1. Create a new registry
413/// 2. Register all required compute pipelines during application initialization
414/// 3. The renderer uses the registry to dispatch commands during frame rendering
415///
416/// # Example
417///
418/// ```rust,ignore
419/// use tessera_ui::renderer::compute::ComputePipelineRegistry;
420///
421/// // Create registry and register pipelines
422/// let mut registry = ComputePipelineRegistry::new();
423/// registry.register(blur_pipeline);
424/// registry.register(contrast_pipeline);
425/// registry.register(brightness_pipeline);
426///
427/// // Registry is now ready for use by the renderer
428/// ```
429///
430/// # Performance Considerations
431///
432/// - Pipeline lookup is O(1) on average due to HashMap implementation.
433///
434/// # Thread Safety
435///
436/// The registry and all registered pipelines must be `Send + Sync` to support
437/// parallel execution in the rendering system.
438#[derive(Default)]
439pub struct ComputePipelineRegistry {
440    pipelines: HashMap<TypeId, Box<dyn ErasedComputablePipeline>>,
441}
442
443impl ComputePipelineRegistry {
444    /// Creates a new empty compute pipeline registry.
445    ///
446    /// # Example
447    ///
448    /// ```
449    /// use tessera_ui::renderer::compute::ComputePipelineRegistry;
450    ///
451    /// let registry = ComputePipelineRegistry::new();
452    /// ```
453    pub fn new() -> Self {
454        Self::default()
455    }
456
457    /// Registers a new compute pipeline for a specific command type.
458    ///
459    /// This method takes ownership of the pipeline and wraps it in a type-erased
460    /// container that can be stored alongside other pipelines of different types.
461    ///
462    /// # Type Parameters
463    ///
464    /// * `C` - The [`ComputeCommand`] type this pipeline handles
465    ///
466    /// # Parameters
467    ///
468    /// * `pipeline` - The pipeline instance to register
469    ///
470    /// # Example
471    ///
472    /// ```rust,ignore
473    /// use tessera_ui::renderer::compute::ComputePipelineRegistry;
474    ///
475    /// let mut registry = ComputePipelineRegistry::new();
476    ///
477    /// // Register custom compute pipelines
478    /// let blur_pipeline = BlurPipeline::new(&device);
479    /// registry.register(blur_pipeline);
480    ///
481    /// let contrast_pipeline = ContrastPipeline::new(&device);
482    /// registry.register(contrast_pipeline);
483    ///
484    /// // Register multiple pipelines for different effects
485    /// registry.register(BrightnessAdjustmentPipeline::new(&device));
486    /// registry.register(ColorGradingPipeline::new(&device));
487    /// ```
488    ///
489    /// # Thread Safety
490    ///
491    /// The pipeline must implement `Send + Sync` to be compatible with Tessera's
492    /// parallel rendering architecture.
493    pub fn register<C: ComputeCommand + 'static>(
494        &mut self,
495        pipeline: impl ComputablePipeline<C> + 'static,
496    ) {
497        let erased_pipeline = Box::new(ComputablePipelineImpl {
498            pipeline,
499            _command: std::marker::PhantomData,
500        });
501        self.pipelines.insert(TypeId::of::<C>(), erased_pipeline);
502    }
503
504    /// Dispatches one or more commands to their corresponding registered pipeline.
505    pub(crate) fn dispatch_erased(
506        &mut self,
507        device: &wgpu::Device,
508        queue: &wgpu::Queue,
509        config: &wgpu::SurfaceConfiguration,
510        compute_pass: &mut wgpu::ComputePass<'_>,
511        items: &[ErasedComputeBatchItem<'_>],
512        resource_manager: &mut ComputeResourceManager,
513        input_view: &wgpu::TextureView,
514        output_view: &wgpu::TextureView,
515    ) {
516        if items.is_empty() {
517            return;
518        }
519
520        let command_type_id = AsAny::as_any(items[0].command).type_id();
521        if let Some(pipeline) = self.pipelines.get_mut(&command_type_id) {
522            pipeline.dispatch_erased(
523                device,
524                queue,
525                config,
526                compute_pass,
527                items,
528                resource_manager,
529                input_view,
530                output_view,
531            );
532        } else {
533            panic!(
534                "No pipeline found for command {:?}",
535                std::any::type_name_of_val(items[0].command)
536            );
537        }
538    }
539}