1use std::{collections::HashMap, num::NonZeroUsize};
2
3use encase::{ShaderType, UniformBuffer, internal::WriteInto};
4use lru::LruCache;
5use smallvec::SmallVec;
6use tessera_ui::{
7 compute::pipeline::{ComputablePipeline, ComputeContext},
8 wgpu,
9};
10
11use super::command::{DualBlurCommand, downscale_factor_for_radius};
12
13const MAX_SAMPLES: usize = 16;
14const WEIGHT_CACHE_CAPACITY: usize = 64;
15const WEIGHT_QUANTIZATION: f32 = 100.0;
16
17fn compute_optimized_blur_params(radius: f32) -> WeightCacheEntry {
26 if radius <= 0.0 {
27 let mut weights = [0.0f32; MAX_SAMPLES];
28 weights[0] = 1.0;
29 return WeightCacheEntry {
30 weights,
31 offsets: [0.0f32; MAX_SAMPLES],
32 sample_count: 1,
33 };
34 }
35
36 let sigma = (radius / 3.0).max(0.1);
38 let two_sigma_sq = 2.0 * sigma * sigma;
39
40 let int_radius = radius.ceil() as i32;
42
43 let mut raw_weights = SmallVec::<[f32; 64]>::with_capacity((int_radius + 1) as usize);
45 raw_weights.resize((int_radius + 1) as usize, 0.0);
46 for i in 0..=int_radius {
47 let x = i as f32;
48 raw_weights[i as usize] = (-x * x / two_sigma_sq).exp();
49 }
50
51 let mut weights = SmallVec::<[f32; MAX_SAMPLES]>::with_capacity(MAX_SAMPLES);
53 let mut offsets = SmallVec::<[f32; MAX_SAMPLES]>::with_capacity(MAX_SAMPLES);
54
55 weights.push(raw_weights[0]);
57 offsets.push(0.0);
58
59 let mut i = 1;
62 while i <= int_radius && weights.len() < MAX_SAMPLES {
63 let w1 = raw_weights[i as usize];
64 let w2 = if i < int_radius {
65 raw_weights[(i + 1) as usize]
66 } else {
67 0.0
68 };
69
70 let combined_weight = w1 + w2;
71 if combined_weight > 1e-6 {
72 let offset = if w2 > 1e-6 {
74 (i as f32 * w1 + (i + 1) as f32 * w2) / combined_weight
75 } else {
76 i as f32
77 };
78
79 weights.push(combined_weight);
80 offsets.push(offset);
81
82 i += 2;
84 } else {
85 i += 1;
86 }
87 }
88
89 let total_weight: f32 = weights[0] + 2.0 * weights[1..].iter().sum::<f32>();
92 for w in &mut weights {
93 *w /= total_weight;
94 }
95
96 let sample_count = weights.len() as u32;
98
99 let mut weights_array = [0.0f32; MAX_SAMPLES];
100 let mut offsets_array = [0.0f32; MAX_SAMPLES];
101 for idx in 0..weights.len() {
102 weights_array[idx] = weights[idx];
103 offsets_array[idx] = offsets[idx];
104 }
105
106 WeightCacheEntry {
107 weights: weights_array,
108 offsets: offsets_array,
109 sample_count,
110 }
111}
112
113#[derive(Clone)]
114struct WeightCacheEntry {
115 weights: [f32; MAX_SAMPLES],
116 offsets: [f32; MAX_SAMPLES],
117 sample_count: u32,
118}
119
120#[derive(ShaderType)]
121struct BlurUniforms {
122 radius: f32,
123 direction_x: f32,
124 direction_y: f32,
125 area_x: u32,
126 area_y: u32,
127 area_width: u32,
128 area_height: u32,
129 sample_count: u32,
130}
131
132#[derive(ShaderType)]
133struct WeightsAndOffsets {
134 weights: [glam::Vec4; 16],
135 offsets: [glam::Vec4; 16],
136}
137
138#[derive(ShaderType)]
139struct DownsampleUniforms {
140 area_x: u32,
141 area_y: u32,
142 area_width: u32,
143 area_height: u32,
144 scale: u32,
145}
146
147#[derive(ShaderType)]
148struct UpsampleUniforms {
149 area_x: u32,
150 area_y: u32,
151 area_width: u32,
152 area_height: u32,
153 scale: u32,
154}
155
156pub struct BlurPipeline {
157 downsample_pipeline: wgpu::ComputePipeline,
158 blur_pipeline: wgpu::ComputePipeline,
159 upsample_pipeline: wgpu::ComputePipeline,
160 downsample_bind_group_layout: wgpu::BindGroupLayout,
161 blur_bind_group_layout: wgpu::BindGroupLayout,
162 upsample_bind_group_layout: wgpu::BindGroupLayout,
163 downsample_sampler: wgpu::Sampler,
164 texture_pool: HashMap<(u32, u32), Vec<wgpu::Texture>>,
165 weight_cache: LruCache<u32, WeightCacheEntry>,
166}
167
168impl BlurPipeline {
169 pub fn new(device: &wgpu::Device, pipeline_cache: Option<&wgpu::PipelineCache>) -> Self {
170 let downsample_shader = device.create_shader_module(wgpu::ShaderModuleDescriptor {
171 label: Some("Blur Downsample Shader"),
172 source: wgpu::ShaderSource::Wgsl(include_str!("downsample.wgsl").into()),
173 });
174 let blur_shader = device.create_shader_module(wgpu::ShaderModuleDescriptor {
175 label: Some("Blur Shader"),
176 source: wgpu::ShaderSource::Wgsl(include_str!("blur.wgsl").into()),
177 });
178 let upsample_shader = device.create_shader_module(wgpu::ShaderModuleDescriptor {
179 label: Some("Blur Upsample Shader"),
180 source: wgpu::ShaderSource::Wgsl(include_str!("upsample.wgsl").into()),
181 });
182
183 let downsample_sampler = device.create_sampler(&wgpu::SamplerDescriptor {
184 label: Some("Blur Downsample Sampler"),
185 address_mode_u: wgpu::AddressMode::ClampToEdge,
186 address_mode_v: wgpu::AddressMode::ClampToEdge,
187 address_mode_w: wgpu::AddressMode::ClampToEdge,
188 mag_filter: wgpu::FilterMode::Linear,
189 min_filter: wgpu::FilterMode::Linear,
190 mipmap_filter: wgpu::FilterMode::Linear,
191 ..Default::default()
192 });
193
194 let downsample_bind_group_layout =
195 device.create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
196 entries: &[
197 wgpu::BindGroupLayoutEntry {
199 binding: 0,
200 visibility: wgpu::ShaderStages::COMPUTE,
201 ty: wgpu::BindingType::Buffer {
202 ty: wgpu::BufferBindingType::Uniform,
203 has_dynamic_offset: false,
204 min_binding_size: None,
205 },
206 count: None,
207 },
208 wgpu::BindGroupLayoutEntry {
210 binding: 1,
211 visibility: wgpu::ShaderStages::COMPUTE,
212 ty: wgpu::BindingType::Texture {
213 sample_type: wgpu::TextureSampleType::Float { filterable: true },
214 view_dimension: wgpu::TextureViewDimension::D2,
215 multisampled: false,
216 },
217 count: None,
218 },
219 wgpu::BindGroupLayoutEntry {
221 binding: 2,
222 visibility: wgpu::ShaderStages::COMPUTE,
223 ty: wgpu::BindingType::StorageTexture {
224 access: wgpu::StorageTextureAccess::WriteOnly,
225 format: wgpu::TextureFormat::Rgba8Unorm,
226 view_dimension: wgpu::TextureViewDimension::D2,
227 },
228 count: None,
229 },
230 wgpu::BindGroupLayoutEntry {
232 binding: 3,
233 visibility: wgpu::ShaderStages::COMPUTE,
234 ty: wgpu::BindingType::Sampler(wgpu::SamplerBindingType::Filtering),
235 count: None,
236 },
237 ],
238 label: Some("blur_downsample_bind_group_layout"),
239 });
240
241 let blur_bind_group_layout =
242 device.create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
243 entries: &[
244 wgpu::BindGroupLayoutEntry {
246 binding: 0,
247 visibility: wgpu::ShaderStages::COMPUTE,
248 ty: wgpu::BindingType::Buffer {
249 ty: wgpu::BufferBindingType::Uniform,
250 has_dynamic_offset: false,
251 min_binding_size: None,
252 },
253 count: None,
254 },
255 wgpu::BindGroupLayoutEntry {
257 binding: 1,
258 visibility: wgpu::ShaderStages::COMPUTE,
259 ty: wgpu::BindingType::Texture {
260 sample_type: wgpu::TextureSampleType::Float { filterable: true },
261 view_dimension: wgpu::TextureViewDimension::D2,
262 multisampled: false,
263 },
264 count: None,
265 },
266 wgpu::BindGroupLayoutEntry {
268 binding: 2,
269 visibility: wgpu::ShaderStages::COMPUTE,
270 ty: wgpu::BindingType::StorageTexture {
271 access: wgpu::StorageTextureAccess::WriteOnly,
272 format: wgpu::TextureFormat::Rgba8Unorm,
273 view_dimension: wgpu::TextureViewDimension::D2,
274 },
275 count: None,
276 },
277 wgpu::BindGroupLayoutEntry {
279 binding: 3,
280 visibility: wgpu::ShaderStages::COMPUTE,
281 ty: wgpu::BindingType::Sampler(wgpu::SamplerBindingType::Filtering),
282 count: None,
283 },
284 wgpu::BindGroupLayoutEntry {
286 binding: 4,
287 visibility: wgpu::ShaderStages::COMPUTE,
288 ty: wgpu::BindingType::Buffer {
289 ty: wgpu::BufferBindingType::Uniform,
290 has_dynamic_offset: false,
291 min_binding_size: None,
292 },
293 count: None,
294 },
295 ],
296 label: Some("blur_pass_bind_group_layout"),
297 });
298
299 let upsample_bind_group_layout =
300 device.create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
301 entries: &[
302 wgpu::BindGroupLayoutEntry {
304 binding: 0,
305 visibility: wgpu::ShaderStages::COMPUTE,
306 ty: wgpu::BindingType::Buffer {
307 ty: wgpu::BufferBindingType::Uniform,
308 has_dynamic_offset: false,
309 min_binding_size: None,
310 },
311 count: None,
312 },
313 wgpu::BindGroupLayoutEntry {
315 binding: 1,
316 visibility: wgpu::ShaderStages::COMPUTE,
317 ty: wgpu::BindingType::Texture {
318 sample_type: wgpu::TextureSampleType::Float { filterable: true },
319 view_dimension: wgpu::TextureViewDimension::D2,
320 multisampled: false,
321 },
322 count: None,
323 },
324 wgpu::BindGroupLayoutEntry {
326 binding: 2,
327 visibility: wgpu::ShaderStages::COMPUTE,
328 ty: wgpu::BindingType::StorageTexture {
329 access: wgpu::StorageTextureAccess::WriteOnly,
330 format: wgpu::TextureFormat::Rgba8Unorm,
331 view_dimension: wgpu::TextureViewDimension::D2,
332 },
333 count: None,
334 },
335 wgpu::BindGroupLayoutEntry {
337 binding: 3,
338 visibility: wgpu::ShaderStages::COMPUTE,
339 ty: wgpu::BindingType::Sampler(wgpu::SamplerBindingType::Filtering),
340 count: None,
341 },
342 ],
343 label: Some("blur_upsample_bind_group_layout"),
344 });
345
346 let downsample_pipeline_layout =
347 device.create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
348 label: Some("Blur Downsample Pipeline Layout"),
349 bind_group_layouts: &[&downsample_bind_group_layout],
350 push_constant_ranges: &[],
351 });
352 let blur_pipeline_layout = device.create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
353 label: Some("Blur Pipeline Layout"),
354 bind_group_layouts: &[&blur_bind_group_layout],
355 push_constant_ranges: &[],
356 });
357 let upsample_pipeline_layout =
358 device.create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
359 label: Some("Blur Upsample Pipeline Layout"),
360 bind_group_layouts: &[&upsample_bind_group_layout],
361 push_constant_ranges: &[],
362 });
363
364 let downsample_pipeline =
365 device.create_compute_pipeline(&wgpu::ComputePipelineDescriptor {
366 label: Some("Blur Downsample Pipeline"),
367 layout: Some(&downsample_pipeline_layout),
368 module: &downsample_shader,
369 entry_point: Some("main"),
370 compilation_options: Default::default(),
371 cache: pipeline_cache,
372 });
373 let blur_pipeline = device.create_compute_pipeline(&wgpu::ComputePipelineDescriptor {
374 label: Some("Blur Pipeline"),
375 layout: Some(&blur_pipeline_layout),
376 module: &blur_shader,
377 entry_point: Some("main"),
378 compilation_options: Default::default(),
379 cache: pipeline_cache,
380 });
381 let upsample_pipeline = device.create_compute_pipeline(&wgpu::ComputePipelineDescriptor {
382 label: Some("Blur Upsample Pipeline"),
383 layout: Some(&upsample_pipeline_layout),
384 module: &upsample_shader,
385 entry_point: Some("main"),
386 compilation_options: Default::default(),
387 cache: pipeline_cache,
388 });
389
390 Self {
391 downsample_pipeline,
392 blur_pipeline,
393 upsample_pipeline,
394 downsample_bind_group_layout,
395 blur_bind_group_layout,
396 upsample_bind_group_layout,
397 downsample_sampler,
398 texture_pool: HashMap::new(),
399 weight_cache: LruCache::new(NonZeroUsize::new(WEIGHT_CACHE_CAPACITY).unwrap()),
400 }
401 }
402
403 fn texture_key(width: u32, height: u32) -> (u32, u32) {
404 (width.max(1), height.max(1))
405 }
406
407 fn acquire_texture(&mut self, device: &wgpu::Device, width: u32, height: u32) -> wgpu::Texture {
408 let key = Self::texture_key(width, height);
409 if let Some(bucket) = self.texture_pool.get_mut(&key)
410 && let Some(texture) = bucket.pop()
411 {
412 return texture;
413 }
414
415 device.create_texture(&wgpu::TextureDescriptor {
416 label: Some("Blur Intermediate Texture"),
417 size: wgpu::Extent3d {
418 width: key.0,
419 height: key.1,
420 depth_or_array_layers: 1,
421 },
422 mip_level_count: 1,
423 sample_count: 1,
424 dimension: wgpu::TextureDimension::D2,
425 format: wgpu::TextureFormat::Rgba8Unorm,
426 usage: wgpu::TextureUsages::TEXTURE_BINDING | wgpu::TextureUsages::STORAGE_BINDING,
427 view_formats: &[],
428 })
429 }
430
431 fn release_texture(&mut self, texture: wgpu::Texture, width: u32, height: u32) {
432 let key = Self::texture_key(width, height);
433 self.texture_pool.entry(key).or_default().push(texture);
434 }
435
436 fn quantize_radius(radius: f32) -> u32 {
437 ((radius * WEIGHT_QUANTIZATION).round().max(0.0)) as u32
438 }
439
440 fn weights_for_radius(&mut self, radius: f32) -> WeightCacheEntry {
441 let key = Self::quantize_radius(radius);
442 if let Some(entry) = self.weight_cache.get(&key) {
443 return entry.clone();
444 }
445
446 let computed = compute_optimized_blur_params(radius);
447 self.weight_cache.put(key, computed.clone());
448 computed
449 }
450
451 fn create_uniform_buffer<T: ShaderType + WriteInto>(
452 device: &wgpu::Device,
453 queue: &wgpu::Queue,
454 label: &str,
455 data: &T,
456 ) -> wgpu::Buffer {
457 let mut buffer = UniformBuffer::new(Vec::new());
458 buffer.write(data).unwrap();
459 let bytes = buffer.into_inner();
460 let uniform_buffer = device.create_buffer(&wgpu::BufferDescriptor {
461 label: Some(label),
462 size: bytes.len() as u64,
463 usage: wgpu::BufferUsages::UNIFORM | wgpu::BufferUsages::COPY_DST,
464 mapped_at_creation: false,
465 });
466 queue.write_buffer(&uniform_buffer, 0, &bytes);
467 uniform_buffer
468 }
469}
470
471impl ComputablePipeline<DualBlurCommand> for BlurPipeline {
472 fn dispatch(&mut self, context: &mut ComputeContext<DualBlurCommand>) {
474 for item in context.items {
475 let target_area = item.target_area;
476 let area_x = target_area.x.0 as u32;
477 let area_y = target_area.y.0 as u32;
478 let area_width = target_area.width.0 as u32;
479 let area_height = target_area.height.0 as u32;
480
481 if area_width == 0 || area_height == 0 {
482 continue;
483 }
484
485 let max_radius = item
486 .command
487 .passes
488 .iter()
489 .map(|pass| pass.radius)
490 .fold(0.0f32, f32::max);
491 let scale = downscale_factor_for_radius(max_radius).max(1);
492 let down_width = area_width.div_ceil(scale);
493 let down_height = area_height.div_ceil(scale);
494
495 if down_width == 0 || down_height == 0 {
496 continue;
497 }
498
499 let downsample_texture = self.acquire_texture(context.device, down_width, down_height);
500 let downsample_view =
501 downsample_texture.create_view(&wgpu::TextureViewDescriptor::default());
502
503 let blur_texture = self.acquire_texture(context.device, down_width, down_height);
504 let blur_view = blur_texture.create_view(&wgpu::TextureViewDescriptor::default());
505
506 let downsample_uniforms = DownsampleUniforms {
508 area_x,
509 area_y,
510 area_width,
511 area_height,
512 scale,
513 };
514 let downsample_uniform_buffer = Self::create_uniform_buffer(
515 context.device,
516 context.queue,
517 "Blur Downsample Uniform Buffer",
518 &downsample_uniforms,
519 );
520 let downsample_bind_group =
521 context
522 .device
523 .create_bind_group(&wgpu::BindGroupDescriptor {
524 layout: &self.downsample_bind_group_layout,
525 entries: &[
526 wgpu::BindGroupEntry {
527 binding: 0,
528 resource: downsample_uniform_buffer.as_entire_binding(),
529 },
530 wgpu::BindGroupEntry {
531 binding: 1,
532 resource: wgpu::BindingResource::TextureView(context.input_view),
533 },
534 wgpu::BindGroupEntry {
535 binding: 2,
536 resource: wgpu::BindingResource::TextureView(&downsample_view),
537 },
538 wgpu::BindGroupEntry {
539 binding: 3,
540 resource: wgpu::BindingResource::Sampler(&self.downsample_sampler),
541 },
542 ],
543 label: Some("blur_downsample_bind_group"),
544 });
545 context.compute_pass.set_pipeline(&self.downsample_pipeline);
546 context
547 .compute_pass
548 .set_bind_group(0, &downsample_bind_group, &[]);
549 let downsample_workgroups_x = down_width.div_ceil(8);
550 let downsample_workgroups_y = down_height.div_ceil(8);
551 if downsample_workgroups_x == 0 || downsample_workgroups_y == 0 {
552 self.release_texture(downsample_texture, down_width, down_height);
553 self.release_texture(blur_texture, down_width, down_height);
554 continue;
555 }
556 context.compute_pass.dispatch_workgroups(
557 downsample_workgroups_x,
558 downsample_workgroups_y,
559 1,
560 );
561
562 let mut read_view = downsample_view.clone();
564 let mut write_view = blur_view.clone();
565 for pass in &item.command.passes {
566 let effective_radius = (pass.radius / scale as f32).max(0.0);
567
568 let weight_entry = self.weights_for_radius(effective_radius);
570
571 let blur_uniforms = BlurUniforms {
572 radius: effective_radius,
573 direction_x: pass.direction.0,
574 direction_y: pass.direction.1,
575 area_x: 0,
576 area_y: 0,
577 area_width: down_width,
578 area_height: down_height,
579 sample_count: weight_entry.sample_count,
580 };
581 let blur_uniform_buffer = Self::create_uniform_buffer(
582 context.device,
583 context.queue,
584 "Blur Pass Uniform Buffer",
585 &blur_uniforms,
586 );
587
588 let weights_and_offsets = WeightsAndOffsets {
590 weights: std::array::from_fn(|i| {
591 glam::Vec4::new(weight_entry.weights[i], 0.0, 0.0, 0.0)
592 }),
593 offsets: std::array::from_fn(|i| {
594 glam::Vec4::new(weight_entry.offsets[i], 0.0, 0.0, 0.0)
595 }),
596 };
597 let weights_buffer = Self::create_uniform_buffer(
598 context.device,
599 context.queue,
600 "Blur Weights and Offsets Buffer",
601 &weights_and_offsets,
602 );
603
604 let blur_bind_group =
605 context
606 .device
607 .create_bind_group(&wgpu::BindGroupDescriptor {
608 layout: &self.blur_bind_group_layout,
609 entries: &[
610 wgpu::BindGroupEntry {
611 binding: 0,
612 resource: blur_uniform_buffer.as_entire_binding(),
613 },
614 wgpu::BindGroupEntry {
615 binding: 1,
616 resource: wgpu::BindingResource::TextureView(&read_view),
617 },
618 wgpu::BindGroupEntry {
619 binding: 2,
620 resource: wgpu::BindingResource::TextureView(&write_view),
621 },
622 wgpu::BindGroupEntry {
623 binding: 3,
624 resource: wgpu::BindingResource::Sampler(
625 &self.downsample_sampler,
626 ),
627 },
628 wgpu::BindGroupEntry {
629 binding: 4,
630 resource: weights_buffer.as_entire_binding(),
631 },
632 ],
633 label: Some("blur_directional_bind_group"),
634 });
635 context.compute_pass.set_pipeline(&self.blur_pipeline);
636 context
637 .compute_pass
638 .set_bind_group(0, &blur_bind_group, &[]);
639 context.compute_pass.dispatch_workgroups(
640 downsample_workgroups_x,
641 downsample_workgroups_y,
642 1,
643 );
644
645 std::mem::swap(&mut read_view, &mut write_view);
646 }
647
648 let upsample_uniforms = UpsampleUniforms {
650 area_x,
651 area_y,
652 area_width,
653 area_height,
654 scale,
655 };
656 let upsample_uniform_buffer = Self::create_uniform_buffer(
657 context.device,
658 context.queue,
659 "Blur Upsample Uniform Buffer",
660 &upsample_uniforms,
661 );
662 let upsample_bind_group =
663 context
664 .device
665 .create_bind_group(&wgpu::BindGroupDescriptor {
666 layout: &self.upsample_bind_group_layout,
667 entries: &[
668 wgpu::BindGroupEntry {
669 binding: 0,
670 resource: upsample_uniform_buffer.as_entire_binding(),
671 },
672 wgpu::BindGroupEntry {
673 binding: 1,
674 resource: wgpu::BindingResource::TextureView(&read_view),
675 },
676 wgpu::BindGroupEntry {
677 binding: 2,
678 resource: wgpu::BindingResource::TextureView(context.output_view),
679 },
680 wgpu::BindGroupEntry {
681 binding: 3,
682 resource: wgpu::BindingResource::Sampler(&self.downsample_sampler),
683 },
684 ],
685 label: Some("blur_upsample_bind_group"),
686 });
687 context.compute_pass.set_pipeline(&self.upsample_pipeline);
688 context
689 .compute_pass
690 .set_bind_group(0, &upsample_bind_group, &[]);
691 let upsample_workgroups_x = area_width.div_ceil(8);
692 let upsample_workgroups_y = area_height.div_ceil(8);
693 if upsample_workgroups_x == 0 || upsample_workgroups_y == 0 {
694 self.release_texture(downsample_texture, down_width, down_height);
695 self.release_texture(blur_texture, down_width, down_height);
696 continue;
697 }
698 context.compute_pass.dispatch_workgroups(
699 upsample_workgroups_x,
700 upsample_workgroups_y,
701 1,
702 );
703
704 self.release_texture(downsample_texture, down_width, down_height);
705 self.release_texture(blur_texture, down_width, down_height);
706 }
707 }
708}