Skip to main content

ff_render/nodes/
upload.rs

1use super::RenderNodeCpu;
2
3/// YUV sub-sampling format for [`YuvUploadNode`].
4#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
5pub enum YuvFormat {
6    /// Planar 4:2:0 — Y at full resolution; Cb/Cr at half width and height.
7    #[default]
8    Yuv420p,
9    /// Planar 4:2:2 — Y at full resolution; Cb/Cr at half width.
10    Yuv422p,
11    /// Planar 4:4:4 — all planes at full resolution.
12    Yuv444p,
13}
14
15// ── Pipeline cache ────────────────────────────────────────────────────────────
16
17#[cfg(feature = "wgpu")]
18struct YuvPipeline {
19    render_pipeline: wgpu::RenderPipeline,
20    bind_group_layout: wgpu::BindGroupLayout,
21    y_tex: wgpu::Texture,
22    cb_tex: wgpu::Texture,
23    cr_tex: wgpu::Texture,
24    uniform_buf: wgpu::Buffer,
25}
26
27// ── YuvUploadNode ─────────────────────────────────────────────────────────────
28
29/// Upload raw YUV plane buffers to the GPU and convert to RGBA in a fragment
30/// shader, bypassing CPU-side `sws_scale`.
31///
32/// The node has `input_count() = 0`; it sources all pixel data from the plane
33/// buffers set via [`YuvUploadNode::set_planes`]. Call `set_planes` once per
34/// frame before the graph processes it.
35pub struct YuvUploadNode {
36    /// Pixel sub-sampling format.
37    pub format: YuvFormat,
38    /// Frame width in pixels.
39    pub width: u32,
40    /// Frame height in pixels.
41    pub height: u32,
42    y_plane: Vec<u8>,
43    cb_plane: Vec<u8>,
44    cr_plane: Vec<u8>,
45    #[cfg(feature = "wgpu")]
46    pipeline: std::sync::OnceLock<YuvPipeline>,
47}
48
49impl YuvUploadNode {
50    /// Create a new node. Plane buffers are initialised to neutral values (Y = 0, Cb = Cr = 128).
51    #[must_use]
52    pub fn new(format: YuvFormat, width: u32, height: u32) -> Self {
53        let (cw, ch) = chroma_dims(format, width, height);
54        Self {
55            format,
56            width,
57            height,
58            y_plane: vec![0u8; (width * height) as usize],
59            cb_plane: vec![128u8; (cw * ch) as usize],
60            cr_plane: vec![128u8; (cw * ch) as usize],
61            #[cfg(feature = "wgpu")]
62            pipeline: std::sync::OnceLock::new(),
63        }
64    }
65
66    /// Replace the stored plane buffers.
67    ///
68    /// Expected sizes for `width × height` at `format`:
69    /// - `y`:       `width × height` bytes
70    /// - `cb`, `cr`: `chroma_w × chroma_h` bytes (sub-sampled per [`YuvFormat`])
71    pub fn set_planes(&mut self, y: Vec<u8>, cb: Vec<u8>, cr: Vec<u8>) {
72        self.y_plane = y;
73        self.cb_plane = cb;
74        self.cr_plane = cr;
75    }
76}
77
78impl Default for YuvUploadNode {
79    fn default() -> Self {
80        Self::new(YuvFormat::Yuv420p, 0, 0)
81    }
82}
83
84/// Returns `(chroma_width, chroma_height)` for a given format and luma dimensions.
85pub(crate) fn chroma_dims(format: YuvFormat, w: u32, h: u32) -> (u32, u32) {
86    match format {
87        YuvFormat::Yuv420p => (w.div_ceil(2), h.div_ceil(2)),
88        YuvFormat::Yuv422p => (w.div_ceil(2), h),
89        YuvFormat::Yuv444p => (w, h),
90    }
91}
92
93fn chroma_divs(format: YuvFormat) -> (u32, u32) {
94    match format {
95        YuvFormat::Yuv420p => (2, 2),
96        YuvFormat::Yuv422p => (2, 1),
97        YuvFormat::Yuv444p => (1, 1),
98    }
99}
100
101// ── CPU path ──────────────────────────────────────────────────────────────────
102
103impl RenderNodeCpu for YuvUploadNode {
104    #[allow(
105        clippy::cast_possible_truncation,
106        clippy::cast_sign_loss,
107        clippy::many_single_char_names
108    )]
109    fn process_cpu(&self, rgba: &mut [u8], w: u32, h: u32) {
110        if self.y_plane.is_empty() || self.width == 0 || self.height == 0 {
111            return;
112        }
113        let (cw, _) = chroma_dims(self.format, self.width, self.height);
114        let (x_div, y_div) = chroma_divs(self.format);
115        let rows = h.min(self.height) as usize;
116        let cols = w.min(self.width) as usize;
117        for row in 0..rows {
118            for col in 0..cols {
119                let y_val = f32::from(self.y_plane[row * self.width as usize + col]) / 255.0;
120                let cx = col / x_div as usize;
121                let cy = row / y_div as usize;
122                let ci = cy * cw as usize + cx;
123                let cb = f32::from(self.cb_plane[ci]) / 255.0 - 0.5;
124                let cr = f32::from(self.cr_plane[ci]) / 255.0 - 0.5;
125                // BT.601 full-range YCbCr → linear RGB.
126                let r = (y_val + 1.402 * cr).clamp(0.0, 1.0);
127                let g = (y_val - 0.344 * cb - 0.714 * cr).clamp(0.0, 1.0);
128                let b = (y_val + 1.772 * cb).clamp(0.0, 1.0);
129                let idx = (row * w as usize + col) * 4;
130                rgba[idx] = (r * 255.0 + 0.5) as u8;
131                rgba[idx + 1] = (g * 255.0 + 0.5) as u8;
132                rgba[idx + 2] = (b * 255.0 + 0.5) as u8;
133                rgba[idx + 3] = 255;
134            }
135        }
136    }
137}
138
139// ── GPU path ──────────────────────────────────────────────────────────────────
140
141#[cfg(feature = "wgpu")]
142impl YuvUploadNode {
143    #[allow(clippy::too_many_lines, clippy::similar_names)]
144    fn get_or_create_pipeline(&self, ctx: &crate::context::RenderContext) -> &YuvPipeline {
145        self.pipeline.get_or_init(|| {
146            let device = &ctx.device;
147            let (cw, ch) = chroma_dims(self.format, self.width, self.height);
148
149            let shader = device.create_shader_module(wgpu::ShaderModuleDescriptor {
150                label: Some("YuvUpload shader"),
151                source: wgpu::ShaderSource::Wgsl(include_str!("../shaders/yuv_upload.wgsl").into()),
152            });
153
154            let bgl = device.create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
155                label: Some("YuvUpload BGL"),
156                entries: &[
157                    wgpu::BindGroupLayoutEntry {
158                        binding: 0,
159                        visibility: wgpu::ShaderStages::FRAGMENT,
160                        ty: wgpu::BindingType::Texture {
161                            sample_type: wgpu::TextureSampleType::Float { filterable: false },
162                            view_dimension: wgpu::TextureViewDimension::D2,
163                            multisampled: false,
164                        },
165                        count: None,
166                    },
167                    wgpu::BindGroupLayoutEntry {
168                        binding: 1,
169                        visibility: wgpu::ShaderStages::FRAGMENT,
170                        ty: wgpu::BindingType::Texture {
171                            sample_type: wgpu::TextureSampleType::Float { filterable: false },
172                            view_dimension: wgpu::TextureViewDimension::D2,
173                            multisampled: false,
174                        },
175                        count: None,
176                    },
177                    wgpu::BindGroupLayoutEntry {
178                        binding: 2,
179                        visibility: wgpu::ShaderStages::FRAGMENT,
180                        ty: wgpu::BindingType::Texture {
181                            sample_type: wgpu::TextureSampleType::Float { filterable: false },
182                            view_dimension: wgpu::TextureViewDimension::D2,
183                            multisampled: false,
184                        },
185                        count: None,
186                    },
187                    wgpu::BindGroupLayoutEntry {
188                        binding: 3,
189                        visibility: wgpu::ShaderStages::FRAGMENT,
190                        ty: wgpu::BindingType::Buffer {
191                            ty: wgpu::BufferBindingType::Uniform,
192                            has_dynamic_offset: false,
193                            min_binding_size: None,
194                        },
195                        count: None,
196                    },
197                ],
198            });
199
200            let pipeline_layout = device.create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
201                label: Some("YuvUpload layout"),
202                bind_group_layouts: &[Some(&bgl)],
203                immediate_size: 0,
204            });
205
206            let render_pipeline = device.create_render_pipeline(&wgpu::RenderPipelineDescriptor {
207                label: Some("YuvUpload pipeline"),
208                layout: Some(&pipeline_layout),
209                vertex: wgpu::VertexState {
210                    module: &shader,
211                    entry_point: Some("vs_main"),
212                    buffers: &[],
213                    compilation_options: wgpu::PipelineCompilationOptions::default(),
214                },
215                fragment: Some(wgpu::FragmentState {
216                    module: &shader,
217                    entry_point: Some("fs_main"),
218                    targets: &[Some(wgpu::ColorTargetState {
219                        format: wgpu::TextureFormat::Rgba8Unorm,
220                        blend: None,
221                        write_mask: wgpu::ColorWrites::ALL,
222                    })],
223                    compilation_options: wgpu::PipelineCompilationOptions::default(),
224                }),
225                primitive: wgpu::PrimitiveState::default(),
226                depth_stencil: None,
227                multisample: wgpu::MultisampleState::default(),
228                multiview_mask: None,
229                cache: None,
230            });
231
232            // Y luma plane (R8Unorm, full resolution).
233            let y_tex = device.create_texture(&wgpu::TextureDescriptor {
234                label: Some("YuvUpload Y"),
235                size: wgpu::Extent3d {
236                    width: self.width,
237                    height: self.height,
238                    depth_or_array_layers: 1,
239                },
240                mip_level_count: 1,
241                sample_count: 1,
242                dimension: wgpu::TextureDimension::D2,
243                format: wgpu::TextureFormat::R8Unorm,
244                usage: wgpu::TextureUsages::COPY_DST | wgpu::TextureUsages::TEXTURE_BINDING,
245                view_formats: &[],
246            });
247
248            // Cb chroma plane (R8Unorm, sub-sampled).
249            let cb_tex = device.create_texture(&wgpu::TextureDescriptor {
250                label: Some("YuvUpload Cb"),
251                size: wgpu::Extent3d {
252                    width: cw,
253                    height: ch,
254                    depth_or_array_layers: 1,
255                },
256                mip_level_count: 1,
257                sample_count: 1,
258                dimension: wgpu::TextureDimension::D2,
259                format: wgpu::TextureFormat::R8Unorm,
260                usage: wgpu::TextureUsages::COPY_DST | wgpu::TextureUsages::TEXTURE_BINDING,
261                view_formats: &[],
262            });
263
264            // Cr chroma plane (R8Unorm, sub-sampled).
265            let cr_tex = device.create_texture(&wgpu::TextureDescriptor {
266                label: Some("YuvUpload Cr"),
267                size: wgpu::Extent3d {
268                    width: cw,
269                    height: ch,
270                    depth_or_array_layers: 1,
271                },
272                mip_level_count: 1,
273                sample_count: 1,
274                dimension: wgpu::TextureDimension::D2,
275                format: wgpu::TextureFormat::R8Unorm,
276                usage: wgpu::TextureUsages::COPY_DST | wgpu::TextureUsages::TEXTURE_BINDING,
277                view_formats: &[],
278            });
279
280            // Uniform buffer: [chroma_x_div, chroma_y_div, pad, pad] = 16 bytes.
281            let uniform_buf = device.create_buffer(&wgpu::BufferDescriptor {
282                label: Some("YuvUpload uniforms"),
283                size: 16,
284                usage: wgpu::BufferUsages::UNIFORM | wgpu::BufferUsages::COPY_DST,
285                mapped_at_creation: false,
286            });
287
288            YuvPipeline {
289                render_pipeline,
290                bind_group_layout: bgl,
291                y_tex,
292                cb_tex,
293                cr_tex,
294                uniform_buf,
295            }
296        })
297    }
298}
299
300#[cfg(feature = "wgpu")]
301impl super::RenderNode for YuvUploadNode {
302    fn input_count(&self) -> usize {
303        0
304    }
305
306    #[allow(clippy::too_many_lines, clippy::similar_names)]
307    fn process(
308        &self,
309        _inputs: &[&wgpu::Texture],
310        outputs: &[&wgpu::Texture],
311        ctx: &crate::context::RenderContext,
312    ) {
313        if self.width == 0 || self.height == 0 || self.y_plane.is_empty() {
314            log::warn!("YuvUploadNode::process called with empty frame data");
315            return;
316        }
317        let Some(output) = outputs.first() else {
318            log::warn!("YuvUploadNode::process called with no outputs");
319            return;
320        };
321
322        let pd = self.get_or_create_pipeline(ctx);
323        let (cw, ch) = chroma_dims(self.format, self.width, self.height);
324        let (x_div, y_div) = chroma_divs(self.format);
325
326        // Upload Y luma plane.
327        ctx.queue.write_texture(
328            wgpu::TexelCopyTextureInfo {
329                texture: &pd.y_tex,
330                mip_level: 0,
331                origin: wgpu::Origin3d::ZERO,
332                aspect: wgpu::TextureAspect::All,
333            },
334            &self.y_plane,
335            wgpu::TexelCopyBufferLayout {
336                offset: 0,
337                bytes_per_row: Some(self.width),
338                rows_per_image: None,
339            },
340            wgpu::Extent3d {
341                width: self.width,
342                height: self.height,
343                depth_or_array_layers: 1,
344            },
345        );
346
347        // Upload Cb chroma plane.
348        ctx.queue.write_texture(
349            wgpu::TexelCopyTextureInfo {
350                texture: &pd.cb_tex,
351                mip_level: 0,
352                origin: wgpu::Origin3d::ZERO,
353                aspect: wgpu::TextureAspect::All,
354            },
355            &self.cb_plane,
356            wgpu::TexelCopyBufferLayout {
357                offset: 0,
358                bytes_per_row: Some(cw),
359                rows_per_image: None,
360            },
361            wgpu::Extent3d {
362                width: cw,
363                height: ch,
364                depth_or_array_layers: 1,
365            },
366        );
367
368        // Upload Cr chroma plane.
369        ctx.queue.write_texture(
370            wgpu::TexelCopyTextureInfo {
371                texture: &pd.cr_tex,
372                mip_level: 0,
373                origin: wgpu::Origin3d::ZERO,
374                aspect: wgpu::TextureAspect::All,
375            },
376            &self.cr_plane,
377            wgpu::TexelCopyBufferLayout {
378                offset: 0,
379                bytes_per_row: Some(cw),
380                rows_per_image: None,
381            },
382            wgpu::Extent3d {
383                width: cw,
384                height: ch,
385                depth_or_array_layers: 1,
386            },
387        );
388
389        // Write chroma sub-sampling divisors to the uniform buffer.
390        ctx.queue
391            .write_buffer(&pd.uniform_buf, 0, &pack_u32(&[x_div, y_div, 0, 0]));
392
393        let y_view = pd
394            .y_tex
395            .create_view(&wgpu::TextureViewDescriptor::default());
396        let cb_view = pd
397            .cb_tex
398            .create_view(&wgpu::TextureViewDescriptor::default());
399        let cr_view = pd
400            .cr_tex
401            .create_view(&wgpu::TextureViewDescriptor::default());
402        let out_view = output.create_view(&wgpu::TextureViewDescriptor::default());
403
404        let bind_group = ctx.device.create_bind_group(&wgpu::BindGroupDescriptor {
405            label: Some("YuvUpload BG"),
406            layout: &pd.bind_group_layout,
407            entries: &[
408                wgpu::BindGroupEntry {
409                    binding: 0,
410                    resource: wgpu::BindingResource::TextureView(&y_view),
411                },
412                wgpu::BindGroupEntry {
413                    binding: 1,
414                    resource: wgpu::BindingResource::TextureView(&cb_view),
415                },
416                wgpu::BindGroupEntry {
417                    binding: 2,
418                    resource: wgpu::BindingResource::TextureView(&cr_view),
419                },
420                wgpu::BindGroupEntry {
421                    binding: 3,
422                    resource: pd.uniform_buf.as_entire_binding(),
423                },
424            ],
425        });
426
427        let mut encoder = ctx
428            .device
429            .create_command_encoder(&wgpu::CommandEncoderDescriptor {
430                label: Some("YuvUpload pass"),
431            });
432        {
433            let mut pass = encoder.begin_render_pass(&wgpu::RenderPassDescriptor {
434                label: Some("YuvUpload pass"),
435                color_attachments: &[Some(wgpu::RenderPassColorAttachment {
436                    view: &out_view,
437                    resolve_target: None,
438                    depth_slice: None,
439                    ops: wgpu::Operations {
440                        load: wgpu::LoadOp::Clear(wgpu::Color::TRANSPARENT),
441                        store: wgpu::StoreOp::Store,
442                    },
443                })],
444                depth_stencil_attachment: None,
445                timestamp_writes: None,
446                occlusion_query_set: None,
447                multiview_mask: None,
448            });
449            pass.set_pipeline(&pd.render_pipeline);
450            pass.set_bind_group(0, &bind_group, &[]);
451            pass.draw(0..6, 0..1);
452        }
453        ctx.queue.submit(std::iter::once(encoder.finish()));
454    }
455}
456
457// ── Tests ─────────────────────────────────────────────────────────────────────
458
459#[cfg(test)]
460mod tests {
461    use super::*;
462
463    #[test]
464    fn yuv_format_default_should_be_yuv420p() {
465        assert_eq!(YuvFormat::default(), YuvFormat::Yuv420p);
466    }
467
468    #[test]
469    fn chroma_dims_420p_should_halve_both_dimensions() {
470        assert_eq!(chroma_dims(YuvFormat::Yuv420p, 4, 4), (2, 2));
471        // Odd dimensions: ceiling division.
472        assert_eq!(chroma_dims(YuvFormat::Yuv420p, 3, 3), (2, 2));
473    }
474
475    #[test]
476    fn chroma_dims_422p_should_halve_width_only() {
477        assert_eq!(chroma_dims(YuvFormat::Yuv422p, 4, 4), (2, 4));
478        assert_eq!(chroma_dims(YuvFormat::Yuv422p, 3, 5), (2, 5));
479    }
480
481    #[test]
482    fn chroma_dims_444p_should_be_full_resolution() {
483        assert_eq!(chroma_dims(YuvFormat::Yuv444p, 4, 6), (4, 6));
484    }
485
486    #[test]
487    fn yuv_upload_node_cpu_black_frame_should_produce_black() {
488        let mut node = YuvUploadNode::new(YuvFormat::Yuv420p, 2, 2);
489        node.set_planes(
490            vec![0u8; 4],   // Y = 0
491            vec![128u8; 1], // Cb = neutral
492            vec![128u8; 1], // Cr = neutral
493        );
494        let mut rgba = vec![0u8; 16];
495        node.process_cpu(&mut rgba, 2, 2);
496        for pixel in rgba.chunks_exact(4) {
497            assert!(pixel[0] <= 1, "R should be ~0 for Y=0; got {}", pixel[0]);
498            assert!(pixel[1] <= 1, "G should be ~0 for Y=0; got {}", pixel[1]);
499            assert!(pixel[2] <= 1, "B should be ~0 for Y=0; got {}", pixel[2]);
500            assert_eq!(pixel[3], 255, "alpha must be opaque");
501        }
502    }
503
504    #[test]
505    fn yuv_upload_node_cpu_white_frame_should_produce_white() {
506        let mut node = YuvUploadNode::new(YuvFormat::Yuv420p, 2, 2);
507        node.set_planes(
508            vec![255u8; 4], // Y = 255
509            vec![128u8; 1], // Cb = neutral
510            vec![128u8; 1], // Cr = neutral
511        );
512        let mut rgba = vec![0u8; 16];
513        node.process_cpu(&mut rgba, 2, 2);
514        for pixel in rgba.chunks_exact(4) {
515            assert!(
516                pixel[0] >= 254,
517                "R should be ~255 for Y=255, neutral chroma; got {}",
518                pixel[0]
519            );
520            assert!(
521                pixel[1] >= 254,
522                "G should be ~255 for Y=255, neutral chroma; got {}",
523                pixel[1]
524            );
525            assert!(
526                pixel[2] >= 254,
527                "B should be ~255 for Y=255, neutral chroma; got {}",
528                pixel[2]
529            );
530        }
531    }
532
533    #[test]
534    fn yuv_upload_node_cpu_neutral_chroma_should_produce_grey() {
535        let mut node = YuvUploadNode::new(YuvFormat::Yuv420p, 2, 2);
536        // Y=128 → y_val ≈ 0.502, Cb=Cr=128 → cb=cr=0 → R=G=B ≈ 128.
537        node.set_planes(vec![128u8; 4], vec![128u8; 1], vec![128u8; 1]);
538        let mut rgba = vec![0u8; 16];
539        node.process_cpu(&mut rgba, 2, 2);
540        for pixel in rgba.chunks_exact(4) {
541            let r = pixel[0] as i32;
542            let g = pixel[1] as i32;
543            let b = pixel[2] as i32;
544            assert!(
545                (r - 128).abs() <= 2,
546                "R should be ~128 for neutral YUV; got {r}"
547            );
548            assert!(
549                (g - 128).abs() <= 2,
550                "G should be ~128 for neutral YUV; got {g}"
551            );
552            assert!(
553                (b - 128).abs() <= 2,
554                "B should be ~128 for neutral YUV; got {b}"
555            );
556        }
557    }
558
559    #[test]
560    fn yuv_upload_node_cpu_422p_should_use_half_width_chroma() {
561        // 4×2 frame, 422p: chroma planes are 2×2.
562        let mut node = YuvUploadNode::new(YuvFormat::Yuv422p, 4, 2);
563        node.set_planes(
564            vec![128u8; 8], // 4×2 luma — neutral grey
565            vec![128u8; 4], // 2×2 Cb
566            vec![128u8; 4], // 2×2 Cr
567        );
568        let mut rgba = vec![0u8; 32];
569        node.process_cpu(&mut rgba, 4, 2);
570        for pixel in rgba.chunks_exact(4) {
571            let r = pixel[0] as i32;
572            assert!(
573                (r - 128).abs() <= 2,
574                "422p neutral: R should be ~128; got {r}"
575            );
576        }
577    }
578
579    #[test]
580    fn yuv_upload_node_set_planes_should_update_stored_data() {
581        let mut node = YuvUploadNode::new(YuvFormat::Yuv444p, 1, 1);
582        // Default: Y=0, Cb=Cr=128 → near-black (128/255 ≈ 0.502, not exact 0.5).
583        let mut rgba = vec![0u8; 4];
584        node.process_cpu(&mut rgba, 1, 1);
585        assert!(
586            rgba[0] <= 2,
587            "default Y=0 must produce near-black; got {}",
588            rgba[0]
589        );
590        // After set_planes: Y=200, Cb=Cr=128 → bright grey.
591        node.set_planes(vec![200], vec![128], vec![128]);
592        node.process_cpu(&mut rgba, 1, 1);
593        assert!(
594            rgba[0] > 150,
595            "Y=200 must produce bright output; got {}",
596            rgba[0]
597        );
598    }
599
600    #[test]
601    fn yuv_upload_node_variant_and_error_types_should_compile() {
602        let _ = YuvFormat::Yuv420p;
603        let _ = YuvFormat::Yuv422p;
604        let _ = YuvFormat::Yuv444p;
605        let _ = YuvUploadNode::new(YuvFormat::Yuv420p, 320, 240);
606        let _ = YuvUploadNode::default();
607    }
608}
609
610// ── helpers ───────────────────────────────────────────────────────────────────
611
612#[cfg(feature = "wgpu")]
613fn pack_u32(values: &[u32]) -> Vec<u8> {
614    values.iter().flat_map(|v| v.to_le_bytes()).collect()
615}