dvd_render/
render.rs

1use ab_glyph::Font;
2use std::collections::HashMap;
3use core::num::NonZeroU8;
4use crate::sequence::GridSequence;
5use crate::atlas::populate_atlas;
6
7#[inline]
8fn compute_output_size<const W: usize, const H: usize>(font_width: u32, font_height: u32) -> (u32, u32) {
9	let output_width = W as u32 * font_width;
10	let output_height = H as u32 * font_height;
11	(output_width, output_height)
12}
13
14pub struct WgpuRenderer<const W: usize, const H: usize> {
15	sequence: GridSequence<W, H>,
16	lut: HashMap<char, u32>,
17	device: wgpu::Device,
18	queue: wgpu::Queue,
19	idx_grid: wgpu::Buffer,
20	output_img: wgpu::Buffer,
21	pipeline: wgpu::ComputePipeline,
22	bind_group: wgpu::BindGroup,
23	output_width: u32,
24	output_height: u32
25}
26
27impl<const W: usize, const H: usize> WgpuRenderer<W, H> {
28	pub async fn new<F: Font>(font: F, sequence: GridSequence<W, H>) -> Self {
29		let populated_atlas = populate_atlas(font, &sequence);
30
31		let (output_width, output_height) = compute_output_size::<W, H>(
32			populated_atlas.font_width,
33			populated_atlas.font_height
34		);
35
36		let instance = wgpu::Instance::new(&wgpu::InstanceDescriptor {
37			backends: wgpu::Backends::all(),
38			flags: wgpu::InstanceFlags::VALIDATION,
39			backend_options: wgpu::BackendOptions::default()
40		});
41
42		let adapter = instance.request_adapter(&wgpu::RequestAdapterOptions::default()).await.unwrap();
43		// I expect that the size of the atlas should be bounded by the size of the output
44		let max_buf_size = output_width * output_height * 4;
45
46		let (device, queue) = adapter.request_device(&wgpu::DeviceDescriptor {
47			required_features: wgpu::Features::SHADER_INT64,
48			required_limits: wgpu::Limits {
49				max_buffer_size: max_buf_size as u64,
50				max_storage_buffer_binding_size: max_buf_size,
51				..wgpu::Limits::default()
52			},
53			memory_hints: wgpu::MemoryHints::Performance,
54			label: Some("device"),
55			trace: wgpu::Trace::Off
56		}).await.unwrap();
57
58		let idx_grid = device.create_buffer(&wgpu::BufferDescriptor {
59			label: Some("idx_grid"),
60			size: (H * W * 4) as u64,
61			usage: wgpu::BufferUsages::COPY_DST | wgpu::BufferUsages::STORAGE,
62			mapped_at_creation: false
63		});
64
65		let atlas = device.create_buffer(&wgpu::BufferDescriptor {
66			label: Some("atlas"),
67			size: populated_atlas.buffer.len() as u64,
68			usage: wgpu::BufferUsages::COPY_DST | wgpu::BufferUsages::STORAGE,
69			mapped_at_creation: false
70		});
71
72		// TODO: investigate efficiency of `write_buffer`
73		queue.write_buffer(&atlas, 0, &populated_atlas.buffer);
74
75		let output_img = device.create_buffer(&wgpu::BufferDescriptor {
76			label: Some("output_buf"),
77			size: output_width as u64 * output_height as u64 * 4,
78			usage: wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_SRC,
79			mapped_at_creation: false
80		});
81
82		let grid_width_uniform = device.create_buffer(&wgpu::BufferDescriptor {
83			label: Some("grid_width_uniform"),
84			size: 4,
85			usage: wgpu::BufferUsages::UNIFORM | wgpu::BufferUsages::COPY_DST,
86			mapped_at_creation: false
87		});
88
89		let grid_height_uniform = device.create_buffer(&wgpu::BufferDescriptor {
90			label: Some("grid_height_uniform"),
91			size: 4,
92			usage: wgpu::BufferUsages::UNIFORM | wgpu::BufferUsages::COPY_DST,
93			mapped_at_creation: false
94		});
95
96		queue.write_buffer(&grid_width_uniform, 0, &(W as u32).to_ne_bytes());
97		queue.write_buffer(&grid_height_uniform, 0, &(H as u32).to_ne_bytes());
98
99		let img_width_uniform = device.create_buffer(&wgpu::BufferDescriptor {
100			label: Some("img_width_uniform"),
101			size: 4,
102			usage: wgpu::BufferUsages::UNIFORM | wgpu::BufferUsages::COPY_DST,
103			mapped_at_creation: false
104		});
105
106		let img_height_uniform = device.create_buffer(&wgpu::BufferDescriptor {
107			label: Some("img_height_uniform"),
108			size: 4,
109			usage: wgpu::BufferUsages::UNIFORM | wgpu::BufferUsages::COPY_DST,
110			mapped_at_creation: false
111		});
112
113		queue.write_buffer(&img_width_uniform, 0, &output_width.to_ne_bytes());
114		queue.write_buffer(&img_height_uniform, 0, &output_height.to_ne_bytes());
115
116		let shader = device.create_shader_module(wgpu::include_wgsl!("shader.wgsl"));
117
118		let bind_group_layout = device.create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
119			label: Some("bind_group_layout"),
120			entries: &[
121				// idx_grid
122				wgpu::BindGroupLayoutEntry {
123					binding: 0,
124					visibility: wgpu::ShaderStages::COMPUTE,
125					ty: wgpu::BindingType::Buffer {
126						ty: wgpu::BufferBindingType::Storage {
127							read_only: true
128						},
129						has_dynamic_offset: false,
130						min_binding_size: None
131					},
132					count: None
133				},
134				// atlas
135				wgpu::BindGroupLayoutEntry {
136					binding: 1,
137					visibility: wgpu::ShaderStages::COMPUTE,
138					ty: wgpu::BindingType::Buffer {
139						ty: wgpu::BufferBindingType::Storage {
140							read_only: true
141						},
142						has_dynamic_offset: false,
143						min_binding_size: None
144					},
145					count: None
146				},
147				// output_img
148				wgpu::BindGroupLayoutEntry {
149					binding: 2,
150					visibility: wgpu::ShaderStages::COMPUTE,
151					ty: wgpu::BindingType::Buffer {
152						ty: wgpu::BufferBindingType::Storage {
153							read_only: false
154						},
155						has_dynamic_offset: false,
156						min_binding_size: None
157					},
158					count: None
159				},
160				// grid_width
161				wgpu::BindGroupLayoutEntry {
162					binding: 3,
163					visibility: wgpu::ShaderStages::COMPUTE,
164					ty: wgpu::BindingType::Buffer {
165						ty: wgpu::BufferBindingType::Uniform,
166						has_dynamic_offset: false,
167						min_binding_size: None
168					},
169					count: None
170				},
171				// grid_height
172				wgpu::BindGroupLayoutEntry {
173					binding: 4,
174					visibility: wgpu::ShaderStages::COMPUTE,
175					ty: wgpu::BindingType::Buffer {
176						ty: wgpu::BufferBindingType::Uniform,
177						has_dynamic_offset: false,
178						min_binding_size: None
179					},
180					count: None
181				},
182				// grid_width
183				wgpu::BindGroupLayoutEntry {
184					binding: 5,
185					visibility: wgpu::ShaderStages::COMPUTE,
186					ty: wgpu::BindingType::Buffer {
187						ty: wgpu::BufferBindingType::Uniform,
188						has_dynamic_offset: false,
189						min_binding_size: None
190					},
191					count: None
192				},
193				// grid_height
194				wgpu::BindGroupLayoutEntry {
195					binding: 6,
196					visibility: wgpu::ShaderStages::COMPUTE,
197					ty: wgpu::BindingType::Buffer {
198						ty: wgpu::BufferBindingType::Uniform,
199						has_dynamic_offset: false,
200						min_binding_size: None
201					},
202					count: None
203				}
204			]
205		});
206
207		let pipeline_layout = device.create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
208			label: Some("pipeline_layout"),
209			bind_group_layouts: &[&bind_group_layout],
210			push_constant_ranges: &[]
211		});
212
213		let pipeline = device.create_compute_pipeline(&wgpu::ComputePipelineDescriptor {
214			label: Some("pipeline"),
215			layout: Some(&pipeline_layout),
216			module: &shader,
217			entry_point: Some("sample_atlas"),
218			compilation_options: wgpu::PipelineCompilationOptions::default(),
219			cache: None
220		});
221
222		let bind_group = device.create_bind_group(&wgpu::BindGroupDescriptor {
223			label: Some("bind_group"),
224			layout: &bind_group_layout,
225			entries: &[
226				wgpu::BindGroupEntry {
227					binding: 0,
228					resource: idx_grid.as_entire_binding()
229				},
230				wgpu::BindGroupEntry {
231					binding: 1,
232					resource: atlas.as_entire_binding()
233				},
234				wgpu::BindGroupEntry {
235					binding: 2,
236					resource: output_img.as_entire_binding()
237				},
238				wgpu::BindGroupEntry {
239					binding: 3,
240					resource: grid_width_uniform.as_entire_binding()
241				},
242				wgpu::BindGroupEntry {
243					binding: 4,
244					resource: grid_height_uniform.as_entire_binding()
245				},
246				wgpu::BindGroupEntry {
247					binding: 5,
248					resource: img_width_uniform.as_entire_binding()
249				},
250				wgpu::BindGroupEntry {
251					binding: 6,
252					resource: img_height_uniform.as_entire_binding()
253				}
254			]
255		});
256
257		Self {
258			sequence,
259			lut: populated_atlas.lut,
260			device,
261			queue,
262			idx_grid,
263			output_img,
264			pipeline,
265			bind_group,
266			output_width,
267			output_height
268		}
269	}
270}
271
272pub struct RenderedFrame {
273	pub img: image::RgbaImage,
274	pub frame_hold: NonZeroU8
275}
276
277impl RenderedFrame {
278	fn deserialize(width: u32, height: u32, data: Vec<u8>, frame_hold: NonZeroU8) -> Self {
279		Self {
280			img: image::RgbaImage::from_raw(width, height, data).unwrap(),
281			frame_hold
282		}
283	}
284}
285
286#[inline]
287fn int_div_round_up(divisor: u32, dividend: u32) -> u32 {
288	(divisor / dividend) + match divisor % dividend {
289		0 => 0,
290		_ => 1
291	}
292}
293
294impl<const W: usize, const H: usize> Iterator for WgpuRenderer<W, H> {
295	type Item = RenderedFrame;
296
297	fn next(&mut self) -> Option<Self::Item> {
298		let frame = self.sequence.pop()?;
299
300		let mut encoder = self.device.create_command_encoder(&wgpu::CommandEncoderDescriptor {
301			label: Some("encoder")
302		});
303
304		let frame_hold = frame.frame_hold;
305		self.queue.write_buffer(&self.idx_grid, 0, &frame.serialize(&self.lut));
306
307		let mut compute_pass = encoder.begin_compute_pass(&wgpu::ComputePassDescriptor {
308			label: Some("sample_compute_pass"),
309			timestamp_writes: None
310		});
311		compute_pass.set_pipeline(&self.pipeline);
312		compute_pass.set_bind_group(0, &self.bind_group, &[]);
313		compute_pass.dispatch_workgroups(
314			int_div_round_up(self.output_width, 16),
315			int_div_round_up(self.output_height, 16),
316			1
317		);
318		drop(compute_pass);
319
320		let pixels = self.output_width * self.output_height;
321		let map_buf = self.device.create_buffer(&wgpu::BufferDescriptor {
322			label: Some("map_buf"),
323			size: pixels as u64 * 4,
324			usage: wgpu::BufferUsages::COPY_DST | wgpu::BufferUsages::MAP_READ,
325			mapped_at_creation: false
326		});
327
328		encoder.copy_buffer_to_buffer(
329			&self.output_img, 0,
330			&map_buf, 0,
331			pixels as u64 * 4
332		);
333
334		self.queue.submit(std::iter::once(encoder.finish()));
335
336		map_buf.map_async(wgpu::MapMode::Read, .., |r| r.unwrap());
337		self.device.poll(wgpu::PollType::Wait).unwrap();
338
339		let serialized_data = map_buf.get_mapped_range(..).to_vec();
340		Some(RenderedFrame::deserialize(
341			self.output_width,
342			self.output_height,
343			serialized_data,
344			frame_hold
345		))
346	}
347}
348
349mod private {
350	use super::WgpuRenderer;
351
352	pub trait Sealed {}
353
354	impl<const W: usize, const H: usize> Sealed for WgpuRenderer<W, H> {}
355}
356
357pub trait VideoSrc: Iterator<Item = RenderedFrame> + Send + 'static + private::Sealed {
358	fn framerate(&self) -> NonZeroU8;
359	fn width(&self) -> u32;
360	fn height(&self) -> u32;
361}
362
363impl<const W: usize, const H: usize> VideoSrc for WgpuRenderer<W, H> {
364	#[inline]
365	fn framerate(&self) -> NonZeroU8 {
366		self.sequence.framerate
367	}
368
369	#[inline]
370	fn width(&self) -> u32 {
371		self.output_width
372	}
373
374	#[inline]
375	fn height(&self) -> u32 {
376		self.output_height
377	}
378}
dvd_render/render.rs

dvd_render/
render.rs