screen_13/graph/
resolver.rs

1use {
2    super::{
3        Area, Attachment, Binding, Bindings, ExecutionPipeline, Node, NodeIndex, Pass, RenderGraph,
4        node::SwapchainImageNode,
5        pass_ref::{Subresource, SubresourceAccess},
6    },
7    crate::{
8        driver::{
9            AttachmentInfo, AttachmentRef, CommandBuffer, CommandBufferInfo, Descriptor,
10            DescriptorInfo, DescriptorPool, DescriptorPoolInfo, DescriptorSet, DriverError,
11            FramebufferAttachmentImageInfo, FramebufferInfo, RenderPass, RenderPassInfo,
12            SubpassDependency, SubpassInfo,
13            accel_struct::AccelerationStructure,
14            buffer::Buffer,
15            format_aspect_mask,
16            graphic::{DepthStencilMode, GraphicPipeline},
17            image::{Image, ImageAccess},
18            image_access_layout, initial_image_layout_access, is_read_access, is_write_access,
19            pipeline_stage_access_flags,
20            swapchain::SwapchainImage,
21        },
22        pool::{Lease, Pool},
23    },
24    ash::vk,
25    log::{
26        Level::{Debug, Trace},
27        debug, log_enabled, trace,
28    },
29    std::{
30        cell::RefCell,
31        collections::{BTreeMap, HashMap, VecDeque},
32        iter::repeat_n,
33        ops::Range,
34    },
35    vk_sync::{AccessType, BufferBarrier, GlobalBarrier, ImageBarrier, cmd::pipeline_barrier},
36};
37
38#[cfg(not(debug_assertions))]
39use std::hint::unreachable_unchecked;
40
41#[derive(Default)]
42struct AccessCache {
43    accesses: Vec<bool>,
44    binding_count: usize,
45    read_count: Vec<usize>,
46    reads: Vec<usize>,
47}
48
49impl AccessCache {
50    /// Finds the unique indexes of the node bindings which a given pass reads. Results are
51    /// returned in the opposite order the dependencies must be resolved in.
52    ///
53    /// Dependent upon means that the node is read from the pass.
54    #[profiling::function]
55    fn dependent_nodes(&self, pass_idx: usize) -> impl ExactSizeIterator<Item = usize> + '_ {
56        let pass_start = pass_idx * self.binding_count;
57        let pass_end = pass_start + self.read_count[pass_idx];
58        self.reads[pass_start..pass_end].iter().copied()
59    }
60
61    /// Finds the unique indexes of the passes which write to a given node; with the restriction
62    /// to not inspect later passes. Results are returned in the opposite order the dependencies
63    /// must be resolved in.
64    ///
65    /// Dependent upon means that the pass writes to the node.
66    #[profiling::function]
67    fn dependent_passes(
68        &self,
69        node_idx: usize,
70        end_pass_idx: usize,
71    ) -> impl Iterator<Item = usize> + '_ {
72        self.accesses[node_idx..end_pass_idx * self.binding_count]
73            .iter()
74            .step_by(self.binding_count)
75            .enumerate()
76            .rev()
77            .filter_map(|(pass_idx, write)| write.then_some(pass_idx))
78    }
79
80    /// Returns the unique indexes of the passes which are dependent on the given pass.
81    #[profiling::function]
82    fn interdependent_passes(
83        &self,
84        pass_idx: usize,
85        end_pass_idx: usize,
86    ) -> impl Iterator<Item = usize> + '_ {
87        self.dependent_nodes(pass_idx)
88            .flat_map(move |node_idx| self.dependent_passes(node_idx, end_pass_idx))
89    }
90
91    fn update(&mut self, graph: &RenderGraph, end_pass_idx: usize) {
92        self.binding_count = graph.bindings.len();
93
94        let cache_len = self.binding_count * end_pass_idx;
95
96        self.accesses.truncate(cache_len);
97        self.accesses.fill(false);
98        self.accesses.resize(cache_len, false);
99
100        self.read_count.clear();
101
102        self.reads.truncate(cache_len);
103        self.reads.fill(usize::MAX);
104        self.reads.resize(cache_len, usize::MAX);
105
106        thread_local! {
107            static NODES: RefCell<Vec<bool>> = Default::default();
108        }
109
110        NODES.with_borrow_mut(|nodes| {
111            nodes.truncate(self.binding_count);
112            nodes.fill(true);
113            nodes.resize(self.binding_count, true);
114
115            for (pass_idx, pass) in graph.passes[0..end_pass_idx].iter().enumerate() {
116                let pass_start = pass_idx * self.binding_count;
117                let mut read_count = 0;
118
119                for (&node_idx, accesses) in pass.execs.iter().flat_map(|exec| exec.accesses.iter())
120                {
121                    self.accesses[pass_start + node_idx] = true;
122
123                    if nodes[node_idx] && is_read_access(accesses.first().unwrap().access) {
124                        self.reads[pass_start + read_count] = node_idx;
125                        nodes[node_idx] = false;
126                        read_count += 1;
127                    }
128                }
129
130                if pass_idx + 1 < end_pass_idx {
131                    nodes.fill(true);
132                }
133
134                self.read_count.push(read_count);
135            }
136        });
137    }
138}
139
140struct ImageSubresourceRangeDebug(vk::ImageSubresourceRange);
141
142impl std::fmt::Debug for ImageSubresourceRangeDebug {
143    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
144        self.0.aspect_mask.fmt(f)?;
145
146        f.write_str(" array: ")?;
147
148        let array_layers = self.0.base_array_layer..self.0.base_array_layer + self.0.layer_count;
149        array_layers.fmt(f)?;
150
151        f.write_str(" mip: ")?;
152
153        let mip_levels = self.0.base_mip_level..self.0.base_mip_level + self.0.level_count;
154        mip_levels.fmt(f)
155    }
156}
157
158#[derive(Debug)]
159struct PhysicalPass {
160    descriptor_pool: Option<Lease<DescriptorPool>>,
161    exec_descriptor_sets: HashMap<usize, Vec<DescriptorSet>>,
162    render_pass: Option<Lease<RenderPass>>,
163}
164
165impl Drop for PhysicalPass {
166    fn drop(&mut self) {
167        self.exec_descriptor_sets.clear();
168        self.descriptor_pool = None;
169    }
170}
171
172/// A structure which can read and execute render graphs. This pattern was derived from:
173///
174/// <http://themaister.net/blog/2017/08/15/render-graphs-and-vulkan-a-deep-dive/>
175/// <https://github.com/EmbarkStudios/kajiya>
176#[derive(Debug)]
177pub struct Resolver {
178    pub(super) graph: RenderGraph,
179    physical_passes: Vec<PhysicalPass>,
180}
181
182impl Resolver {
183    pub(super) fn new(graph: RenderGraph) -> Self {
184        let physical_passes = Vec::with_capacity(graph.passes.len());
185
186        Self {
187            graph,
188            physical_passes,
189        }
190    }
191
192    #[profiling::function]
193    fn allow_merge_passes(lhs: &Pass, rhs: &Pass) -> bool {
194        fn first_graphic_pipeline(pass: &Pass) -> Option<&GraphicPipeline> {
195            pass.execs
196                .first()
197                .and_then(|exec| exec.pipeline.as_ref().map(ExecutionPipeline::as_graphic))
198                .flatten()
199        }
200
201        fn is_multiview(view_mask: u32) -> bool {
202            view_mask != 0
203        }
204
205        let lhs_pipeline = first_graphic_pipeline(lhs);
206        if lhs_pipeline.is_none() {
207            trace!("  {} is not graphic", lhs.name,);
208
209            return false;
210        }
211
212        let rhs_pipeline = first_graphic_pipeline(rhs);
213        if rhs_pipeline.is_none() {
214            trace!("  {} is not graphic", rhs.name,);
215
216            return false;
217        }
218
219        let lhs_pipeline = unsafe { lhs_pipeline.unwrap_unchecked() };
220        let rhs_pipeline = unsafe { rhs_pipeline.unwrap_unchecked() };
221
222        // Must be same general rasterization modes
223        if lhs_pipeline.info.blend != rhs_pipeline.info.blend
224            || lhs_pipeline.info.cull_mode != rhs_pipeline.info.cull_mode
225            || lhs_pipeline.info.front_face != rhs_pipeline.info.front_face
226            || lhs_pipeline.info.polygon_mode != rhs_pipeline.info.polygon_mode
227            || lhs_pipeline.info.samples != rhs_pipeline.info.samples
228        {
229            trace!("  different rasterization modes",);
230
231            return false;
232        }
233
234        let rhs = rhs.execs.first();
235
236        // PassRef makes sure this never happens
237        debug_assert!(rhs.is_some());
238
239        let rhs = unsafe { rhs.unwrap_unchecked() };
240
241        let mut common_color_attachment = false;
242        let mut common_depth_attachment = false;
243
244        // Now we need to know what the subpasses (we may have prior merges) wrote
245        for lhs in lhs.execs.iter().rev() {
246            // Multiview subpasses cannot be combined with non-multiview subpasses
247            if is_multiview(lhs.view_mask) != is_multiview(rhs.view_mask) {
248                trace!("  incompatible multiview");
249
250                return false;
251            }
252
253            // Compare individual color attachments for compatibility
254            for (attachment_idx, lhs_attachment) in lhs
255                .color_attachments
256                .iter()
257                .chain(lhs.color_loads.iter())
258                .chain(lhs.color_stores.iter())
259                .chain(
260                    lhs.color_clears
261                        .iter()
262                        .map(|(attachment_idx, (attachment, _))| (attachment_idx, attachment)),
263                )
264                .chain(
265                    lhs.color_resolves
266                        .iter()
267                        .map(|(attachment_idx, (attachment, _))| (attachment_idx, attachment)),
268                )
269            {
270                let rhs_attachment = rhs
271                    .color_attachments
272                    .get(attachment_idx)
273                    .or_else(|| rhs.color_loads.get(attachment_idx))
274                    .or_else(|| rhs.color_stores.get(attachment_idx))
275                    .or_else(|| {
276                        rhs.color_clears
277                            .get(attachment_idx)
278                            .map(|(attachment, _)| attachment)
279                    })
280                    .or_else(|| {
281                        rhs.color_resolves
282                            .get(attachment_idx)
283                            .map(|(attachment, _)| attachment)
284                    });
285
286                if !Attachment::are_compatible(Some(*lhs_attachment), rhs_attachment.copied()) {
287                    trace!("  incompatible color attachments");
288
289                    return false;
290                }
291
292                common_color_attachment = true;
293            }
294
295            // Compare depth/stencil attachments for compatibility
296            let lhs_depth_stencil = lhs
297                .depth_stencil_attachment
298                .or(lhs.depth_stencil_load)
299                .or(lhs.depth_stencil_store)
300                .or_else(|| lhs.depth_stencil_resolve.map(|(attachment, ..)| attachment))
301                .or_else(|| lhs.depth_stencil_clear.map(|(attachment, _)| attachment));
302
303            let rhs_depth_stencil = rhs
304                .depth_stencil_attachment
305                .or(rhs.depth_stencil_load)
306                .or(rhs.depth_stencil_store)
307                .or_else(|| rhs.depth_stencil_resolve.map(|(attachment, ..)| attachment))
308                .or_else(|| rhs.depth_stencil_clear.map(|(attachment, _)| attachment));
309
310            if !Attachment::are_compatible(lhs_depth_stencil, rhs_depth_stencil) {
311                trace!("  incompatible depth/stencil attachments");
312
313                return false;
314            }
315
316            common_depth_attachment |= lhs_depth_stencil.is_some() && rhs_depth_stencil.is_some();
317        }
318
319        // Keep color and depth on tile.
320        if common_color_attachment || common_depth_attachment {
321            trace!("  merging due to common image");
322
323            return true;
324        }
325
326        // Keep input on tile
327        if !rhs_pipeline.input_attachments.is_empty() {
328            trace!("  merging due to subpass input");
329
330            return true;
331        }
332
333        trace!("  not merging");
334
335        // No reason to merge, so don't.
336        false
337    }
338
339    // See https://vulkan.lunarg.com/doc/view/1.3.204.1/linux/1.3-extensions/vkspec.html#attachment-type-imagelayout
340    fn attachment_layout(
341        aspect_mask: vk::ImageAspectFlags,
342        is_random_access: bool,
343        is_input: bool,
344    ) -> vk::ImageLayout {
345        if aspect_mask.contains(vk::ImageAspectFlags::COLOR) {
346            if is_input {
347                vk::ImageLayout::GENERAL
348            } else {
349                vk::ImageLayout::COLOR_ATTACHMENT_OPTIMAL
350            }
351        } else if aspect_mask.contains(vk::ImageAspectFlags::DEPTH | vk::ImageAspectFlags::STENCIL)
352        {
353            if is_random_access {
354                if is_input {
355                    vk::ImageLayout::GENERAL
356                } else {
357                    vk::ImageLayout::DEPTH_STENCIL_ATTACHMENT_OPTIMAL
358                }
359            } else {
360                vk::ImageLayout::DEPTH_STENCIL_READ_ONLY_OPTIMAL
361            }
362        } else if aspect_mask.contains(vk::ImageAspectFlags::DEPTH) {
363            if is_random_access {
364                if is_input {
365                    vk::ImageLayout::GENERAL
366                } else {
367                    vk::ImageLayout::DEPTH_ATTACHMENT_OPTIMAL
368                }
369            } else {
370                vk::ImageLayout::DEPTH_READ_ONLY_OPTIMAL
371            }
372        } else if aspect_mask.contains(vk::ImageAspectFlags::STENCIL) {
373            if is_random_access {
374                if is_input {
375                    vk::ImageLayout::GENERAL
376                } else {
377                    vk::ImageLayout::STENCIL_ATTACHMENT_OPTIMAL
378                }
379            } else {
380                vk::ImageLayout::STENCIL_READ_ONLY_OPTIMAL
381            }
382        } else {
383            vk::ImageLayout::UNDEFINED
384        }
385    }
386
387    #[profiling::function]
388    fn begin_render_pass(
389        cmd_buf: &CommandBuffer,
390        bindings: &[Binding],
391        pass: &Pass,
392        physical_pass: &mut PhysicalPass,
393        render_area: Area,
394    ) -> Result<(), DriverError> {
395        trace!("  begin render pass");
396
397        let render_pass = physical_pass.render_pass.as_mut().unwrap();
398        let attachment_count = render_pass.info.attachments.len();
399
400        let mut attachments = Vec::with_capacity(attachment_count);
401        attachments.resize(
402            attachment_count,
403            FramebufferAttachmentImageInfo {
404                flags: vk::ImageCreateFlags::empty(),
405                usage: vk::ImageUsageFlags::empty(),
406                width: 0,
407                height: 0,
408                layer_count: 0,
409                view_formats: vec![],
410            },
411        );
412
413        thread_local! {
414            static CLEARS_VIEWS: RefCell<(Vec<vk::ClearValue>, Vec<vk::ImageView>)> = Default::default();
415        }
416
417        CLEARS_VIEWS.with_borrow_mut(|(clear_values, image_views)| {
418            clear_values.resize_with(attachment_count, vk::ClearValue::default);
419            image_views.resize(attachment_count, vk::ImageView::null());
420
421            for exec in &pass.execs {
422                for (attachment_idx, (attachment, clear_value)) in &exec.color_clears {
423                    let attachment_image = &mut attachments[*attachment_idx as usize];
424                    if let Err(idx) = attachment_image
425                        .view_formats
426                        .binary_search(&attachment.format)
427                    {
428                        clear_values[*attachment_idx as usize] = vk::ClearValue {
429                            color: vk::ClearColorValue {
430                                float32: clear_value.0,
431                            },
432                        };
433
434                        let image = bindings[attachment.target].as_driver_image().unwrap();
435
436                        attachment_image.flags = image.info.flags;
437                        attachment_image.usage = image.info.usage;
438                        attachment_image.width = image.info.width >> attachment.base_mip_level;
439                        attachment_image.height = image.info.height >> attachment.base_mip_level;
440                        attachment_image.layer_count = attachment.array_layer_count;
441                        attachment_image.view_formats.insert(idx, attachment.format);
442
443                        image_views[*attachment_idx as usize] =
444                            Image::view(image, attachment.image_view_info(image.info))?;
445                    }
446                }
447
448                for (attachment_idx, attachment) in exec
449                    .color_attachments
450                    .iter()
451                    .chain(&exec.color_loads)
452                    .chain(&exec.color_stores)
453                    .chain(exec.color_resolves.iter().map(
454                        |(dst_attachment_idx, (attachment, _))| (dst_attachment_idx, attachment),
455                    ))
456                {
457                    let attachment_image = &mut attachments[*attachment_idx as usize];
458                    if let Err(idx) = attachment_image
459                        .view_formats
460                        .binary_search(&attachment.format)
461                    {
462                        let image = bindings[attachment.target].as_driver_image().unwrap();
463
464                        attachment_image.flags = image.info.flags;
465                        attachment_image.usage = image.info.usage;
466                        attachment_image.width = image.info.width >> attachment.base_mip_level;
467                        attachment_image.height = image.info.height >> attachment.base_mip_level;
468                        attachment_image.layer_count = attachment.array_layer_count;
469                        attachment_image.view_formats.insert(idx, attachment.format);
470
471                        image_views[*attachment_idx as usize] =
472                            Image::view(image, attachment.image_view_info(image.info))?;
473                    }
474                }
475
476                if let Some((attachment, clear_value)) = &exec.depth_stencil_clear {
477                    let attachment_idx =
478                        attachments.len() - 1 - exec.depth_stencil_resolve.is_some() as usize;
479                    let attachment_image = &mut attachments[attachment_idx];
480                    if let Err(idx) = attachment_image
481                        .view_formats
482                        .binary_search(&attachment.format)
483                    {
484                        clear_values[attachment_idx] = vk::ClearValue {
485                            depth_stencil: *clear_value,
486                        };
487
488                        let image = bindings[attachment.target].as_driver_image().unwrap();
489
490                        attachment_image.flags = image.info.flags;
491                        attachment_image.usage = image.info.usage;
492                        attachment_image.width = image.info.width >> attachment.base_mip_level;
493                        attachment_image.height = image.info.height >> attachment.base_mip_level;
494                        attachment_image.layer_count = attachment.array_layer_count;
495                        attachment_image.view_formats.insert(idx, attachment.format);
496
497                        image_views[attachment_idx] =
498                            Image::view(image, attachment.image_view_info(image.info))?;
499                    }
500                }
501
502                if let Some(attachment) = exec
503                    .depth_stencil_attachment
504                    .or(exec.depth_stencil_load)
505                    .or(exec.depth_stencil_store)
506                {
507                    let attachment_idx =
508                        attachments.len() - 1 - exec.depth_stencil_resolve.is_some() as usize;
509                    let attachment_image = &mut attachments[attachment_idx];
510                    if let Err(idx) = attachment_image
511                        .view_formats
512                        .binary_search(&attachment.format)
513                    {
514                        let image = bindings[attachment.target].as_driver_image().unwrap();
515
516                        attachment_image.flags = image.info.flags;
517                        attachment_image.usage = image.info.usage;
518                        attachment_image.width = image.info.width >> attachment.base_mip_level;
519                        attachment_image.height = image.info.height >> attachment.base_mip_level;
520                        attachment_image.layer_count = attachment.array_layer_count;
521                        attachment_image.view_formats.insert(idx, attachment.format);
522
523                        image_views[attachment_idx] =
524                            Image::view(image, attachment.image_view_info(image.info))?;
525                    }
526                }
527
528                if let Some(attachment) = exec
529                    .depth_stencil_resolve
530                    .map(|(attachment, ..)| attachment)
531                {
532                    let attachment_idx = attachments.len() - 1;
533                    let attachment_image = &mut attachments[attachment_idx];
534                    if let Err(idx) = attachment_image
535                        .view_formats
536                        .binary_search(&attachment.format)
537                    {
538                        let image = bindings[attachment.target].as_driver_image().unwrap();
539
540                        attachment_image.flags = image.info.flags;
541                        attachment_image.usage = image.info.usage;
542                        attachment_image.width = image.info.width >> attachment.base_mip_level;
543                        attachment_image.height = image.info.height >> attachment.base_mip_level;
544                        attachment_image.layer_count = attachment.array_layer_count;
545                        attachment_image.view_formats.insert(idx, attachment.format);
546
547                        image_views[attachment_idx] =
548                            Image::view(image, attachment.image_view_info(image.info))?;
549                    }
550                }
551            }
552
553            let framebuffer =
554                RenderPass::framebuffer(render_pass, FramebufferInfo { attachments })?;
555
556            unsafe {
557                cmd_buf.device.cmd_begin_render_pass(
558                    **cmd_buf,
559                    &vk::RenderPassBeginInfo::default()
560                        .render_pass(***render_pass)
561                        .framebuffer(framebuffer)
562                        .render_area(vk::Rect2D {
563                            offset: vk::Offset2D {
564                                x: render_area.x,
565                                y: render_area.y,
566                            },
567                            extent: vk::Extent2D {
568                                width: render_area.width,
569                                height: render_area.height,
570                            },
571                        })
572                        .clear_values(clear_values)
573                        .push_next(
574                            &mut vk::RenderPassAttachmentBeginInfoKHR::default()
575                                .attachments(image_views),
576                        ),
577                    vk::SubpassContents::INLINE,
578                );
579            }
580
581            Ok(())
582        })
583    }
584
585    #[profiling::function]
586    fn bind_descriptor_sets(
587        cmd_buf: &CommandBuffer,
588        pipeline: &ExecutionPipeline,
589        physical_pass: &PhysicalPass,
590        exec_idx: usize,
591    ) {
592        if let Some(exec_descriptor_sets) = physical_pass.exec_descriptor_sets.get(&exec_idx) {
593            thread_local! {
594                static DESCRIPTOR_SETS: RefCell<Vec<vk::DescriptorSet>> = Default::default();
595            }
596
597            if exec_descriptor_sets.is_empty() {
598                return;
599            }
600
601            DESCRIPTOR_SETS.with_borrow_mut(|descriptor_sets| {
602                descriptor_sets.clear();
603                descriptor_sets.extend(
604                    exec_descriptor_sets
605                        .iter()
606                        .map(|descriptor_set| **descriptor_set),
607                );
608
609                trace!("    bind descriptor sets {:?}", descriptor_sets);
610
611                unsafe {
612                    cmd_buf.device.cmd_bind_descriptor_sets(
613                        **cmd_buf,
614                        pipeline.bind_point(),
615                        pipeline.layout(),
616                        0,
617                        descriptor_sets,
618                        &[],
619                    );
620                }
621            });
622        }
623    }
624
625    #[profiling::function]
626    fn bind_pipeline(
627        cmd_buf: &mut CommandBuffer,
628        physical_pass: &mut PhysicalPass,
629        exec_idx: usize,
630        pipeline: &mut ExecutionPipeline,
631        depth_stencil: Option<DepthStencilMode>,
632    ) -> Result<(), DriverError> {
633        if log_enabled!(Trace) {
634            let (ty, name, vk_pipeline) = match pipeline {
635                ExecutionPipeline::Compute(pipeline) => {
636                    ("compute", pipeline.name.as_ref(), ***pipeline)
637                }
638                ExecutionPipeline::Graphic(pipeline) => {
639                    ("graphic", pipeline.name.as_ref(), vk::Pipeline::null())
640                }
641                ExecutionPipeline::RayTrace(pipeline) => {
642                    ("ray trace", pipeline.name.as_ref(), ***pipeline)
643                }
644            };
645            if let Some(name) = name {
646                trace!("    bind {} pipeline {} ({:?})", ty, name, vk_pipeline);
647            } else {
648                trace!("    bind {} pipeline {:?}", ty, vk_pipeline);
649            }
650        }
651
652        // We store a shared reference to this pipeline inside the command buffer!
653        let pipeline_bind_point = pipeline.bind_point();
654        let pipeline = match pipeline {
655            ExecutionPipeline::Compute(pipeline) => ***pipeline,
656            ExecutionPipeline::Graphic(pipeline) => RenderPass::graphic_pipeline(
657                physical_pass.render_pass.as_mut().unwrap(),
658                pipeline,
659                depth_stencil,
660                exec_idx as _,
661            )?,
662            ExecutionPipeline::RayTrace(pipeline) => ***pipeline,
663        };
664
665        unsafe {
666            cmd_buf
667                .device
668                .cmd_bind_pipeline(**cmd_buf, pipeline_bind_point, pipeline);
669        }
670
671        Ok(())
672    }
673
674    fn end_render_pass(&mut self, cmd_buf: &CommandBuffer) {
675        trace!("  end render pass");
676
677        unsafe {
678            cmd_buf.device.cmd_end_render_pass(**cmd_buf);
679        }
680    }
681
682    /// Returns `true` when all recorded passes have been submitted to a driver command buffer.
683    ///
684    /// A fully-resolved graph contains no additional work and may be discarded, although doing so
685    /// will stall the GPU while the fences are waited on. It is preferrable to wait a few frame so
686    /// that the fences will have already been signalled.
687    pub fn is_resolved(&self) -> bool {
688        self.graph.passes.is_empty()
689    }
690
691    #[allow(clippy::type_complexity)]
692    #[profiling::function]
693    fn lease_descriptor_pool<P>(
694        pool: &mut P,
695        pass: &Pass,
696    ) -> Result<Option<Lease<DescriptorPool>>, DriverError>
697    where
698        P: Pool<DescriptorPoolInfo, DescriptorPool>,
699    {
700        let max_set_idx = pass
701            .execs
702            .iter()
703            .flat_map(|exec| exec.bindings.keys())
704            .map(|descriptor| descriptor.set())
705            .max()
706            .unwrap_or_default();
707        let max_sets = pass.execs.len() as u32 * (max_set_idx + 1);
708        let mut info = DescriptorPoolInfo {
709            max_sets,
710            ..Default::default()
711        };
712
713        // Find the total count of descriptors per type (there may be multiple pipelines!)
714        for pool_sizes in pass.descriptor_pools_sizes() {
715            for pool_size in pool_sizes.values() {
716                for (&descriptor_ty, &descriptor_count) in pool_size {
717                    debug_assert_ne!(descriptor_count, 0);
718
719                    match descriptor_ty {
720                        vk::DescriptorType::ACCELERATION_STRUCTURE_KHR => {
721                            info.acceleration_structure_count += descriptor_count;
722                        }
723                        vk::DescriptorType::COMBINED_IMAGE_SAMPLER => {
724                            info.combined_image_sampler_count += descriptor_count;
725                        }
726                        vk::DescriptorType::INPUT_ATTACHMENT => {
727                            info.input_attachment_count += descriptor_count;
728                        }
729                        vk::DescriptorType::SAMPLED_IMAGE => {
730                            info.sampled_image_count += descriptor_count;
731                        }
732                        vk::DescriptorType::SAMPLER => {
733                            info.sampler_count += descriptor_count;
734                        }
735                        vk::DescriptorType::STORAGE_BUFFER => {
736                            info.storage_buffer_count += descriptor_count;
737                        }
738                        vk::DescriptorType::STORAGE_BUFFER_DYNAMIC => {
739                            info.storage_buffer_dynamic_count += descriptor_count;
740                        }
741                        vk::DescriptorType::STORAGE_IMAGE => {
742                            info.storage_image_count += descriptor_count;
743                        }
744                        vk::DescriptorType::STORAGE_TEXEL_BUFFER => {
745                            info.storage_texel_buffer_count += descriptor_count;
746                        }
747                        vk::DescriptorType::UNIFORM_BUFFER => {
748                            info.uniform_buffer_count += descriptor_count;
749                        }
750                        vk::DescriptorType::UNIFORM_BUFFER_DYNAMIC => {
751                            info.uniform_buffer_dynamic_count += descriptor_count;
752                        }
753                        vk::DescriptorType::UNIFORM_TEXEL_BUFFER => {
754                            info.uniform_texel_buffer_count += descriptor_count;
755                        }
756                        _ => unimplemented!("{descriptor_ty:?}"),
757                    };
758                }
759            }
760        }
761
762        // It's possible to execute a command-only pipeline
763        if info.is_empty() {
764            return Ok(None);
765        }
766
767        // Trivially round up the descriptor counts to increase cache coherence
768        const ATOM: u32 = 1 << 5;
769        info.acceleration_structure_count =
770            info.acceleration_structure_count.next_multiple_of(ATOM);
771        info.combined_image_sampler_count =
772            info.combined_image_sampler_count.next_multiple_of(ATOM);
773        info.input_attachment_count = info.input_attachment_count.next_multiple_of(ATOM);
774        info.sampled_image_count = info.sampled_image_count.next_multiple_of(ATOM);
775        info.sampler_count = info.sampler_count.next_multiple_of(ATOM);
776        info.storage_buffer_count = info.storage_buffer_count.next_multiple_of(ATOM);
777        info.storage_buffer_dynamic_count =
778            info.storage_buffer_dynamic_count.next_multiple_of(ATOM);
779        info.storage_image_count = info.storage_image_count.next_multiple_of(ATOM);
780        info.storage_texel_buffer_count = info.storage_texel_buffer_count.next_multiple_of(ATOM);
781        info.uniform_buffer_count = info.uniform_buffer_count.next_multiple_of(ATOM);
782        info.uniform_buffer_dynamic_count =
783            info.uniform_buffer_dynamic_count.next_multiple_of(ATOM);
784        info.uniform_texel_buffer_count = info.uniform_texel_buffer_count.next_multiple_of(ATOM);
785
786        // Notice how all sets are big enough for any other set; TODO: efficiently dont
787
788        // debug!("{:#?}", info);
789
790        Ok(Some(pool.lease(info)?))
791    }
792
793    #[profiling::function]
794    fn lease_render_pass<P>(
795        &self,
796        pool: &mut P,
797        pass_idx: usize,
798    ) -> Result<Lease<RenderPass>, DriverError>
799    where
800        P: Pool<RenderPassInfo, RenderPass>,
801    {
802        let pass = &self.graph.passes[pass_idx];
803        let (mut color_attachment_count, mut depth_stencil_attachment_count) = (0, 0);
804        for exec in &pass.execs {
805            color_attachment_count = color_attachment_count
806                .max(
807                    exec.color_attachments
808                        .keys()
809                        .max()
810                        .map(|attachment_idx| attachment_idx + 1)
811                        .unwrap_or_default() as usize,
812                )
813                .max(
814                    exec.color_clears
815                        .keys()
816                        .max()
817                        .map(|attachment_idx| attachment_idx + 1)
818                        .unwrap_or_default() as usize,
819                )
820                .max(
821                    exec.color_loads
822                        .keys()
823                        .max()
824                        .map(|attachment_idx| attachment_idx + 1)
825                        .unwrap_or_default() as usize,
826                )
827                .max(
828                    exec.color_resolves
829                        .keys()
830                        .max()
831                        .map(|attachment_idx| attachment_idx + 1)
832                        .unwrap_or_default() as usize,
833                )
834                .max(
835                    exec.color_stores
836                        .keys()
837                        .max()
838                        .map(|attachment_idx| attachment_idx + 1)
839                        .unwrap_or_default() as usize,
840                );
841            let has_depth_stencil_attachment = exec.depth_stencil_attachment.is_some()
842                || exec.depth_stencil_clear.is_some()
843                || exec.depth_stencil_load.is_some()
844                || exec.depth_stencil_store.is_some();
845            let has_depth_stencil_resolve = exec.depth_stencil_resolve.is_some();
846
847            depth_stencil_attachment_count = depth_stencil_attachment_count
848                .max(has_depth_stencil_attachment as usize + has_depth_stencil_resolve as usize);
849        }
850
851        let attachment_count = color_attachment_count + depth_stencil_attachment_count;
852        let mut attachments = Vec::with_capacity(attachment_count);
853        attachments.resize_with(attachment_count, AttachmentInfo::default);
854
855        let mut subpasses = Vec::<SubpassInfo>::with_capacity(pass.execs.len());
856
857        {
858            let mut color_set = vec![false; attachment_count];
859            let mut depth_stencil_set = false;
860
861            // Add load op attachments using the first executions
862            for exec in &pass.execs {
863                // Cleared color attachments
864                for (attachment_idx, (cleared_attachment, _)) in &exec.color_clears {
865                    let color_set = &mut color_set[*attachment_idx as usize];
866                    if *color_set {
867                        continue;
868                    }
869
870                    let attachment = &mut attachments[*attachment_idx as usize];
871                    attachment.fmt = cleared_attachment.format;
872                    attachment.sample_count = cleared_attachment.sample_count;
873                    attachment.load_op = vk::AttachmentLoadOp::CLEAR;
874                    attachment.initial_layout = vk::ImageLayout::COLOR_ATTACHMENT_OPTIMAL;
875                    *color_set = true;
876                }
877
878                // Loaded color attachments
879                for (attachment_idx, loaded_attachment) in &exec.color_loads {
880                    let color_set = &mut color_set[*attachment_idx as usize];
881                    if *color_set {
882                        continue;
883                    }
884
885                    let attachment = &mut attachments[*attachment_idx as usize];
886                    attachment.fmt = loaded_attachment.format;
887                    attachment.sample_count = loaded_attachment.sample_count;
888                    attachment.load_op = vk::AttachmentLoadOp::LOAD;
889                    attachment.initial_layout = vk::ImageLayout::COLOR_ATTACHMENT_OPTIMAL;
890                    *color_set = true;
891                }
892
893                // Cleared depth/stencil attachment
894                if !depth_stencil_set {
895                    if let Some((cleared_attachment, _)) = exec.depth_stencil_clear {
896                        let attachment = &mut attachments[color_attachment_count];
897                        attachment.fmt = cleared_attachment.format;
898                        attachment.sample_count = cleared_attachment.sample_count;
899                        attachment.initial_layout = if cleared_attachment
900                            .aspect_mask
901                            .contains(vk::ImageAspectFlags::DEPTH | vk::ImageAspectFlags::STENCIL)
902                        {
903                            attachment.load_op = vk::AttachmentLoadOp::CLEAR;
904                            attachment.stencil_load_op = vk::AttachmentLoadOp::CLEAR;
905
906                            vk::ImageLayout::DEPTH_STENCIL_ATTACHMENT_OPTIMAL
907                        } else if cleared_attachment
908                            .aspect_mask
909                            .contains(vk::ImageAspectFlags::DEPTH)
910                        {
911                            attachment.load_op = vk::AttachmentLoadOp::CLEAR;
912
913                            vk::ImageLayout::DEPTH_ATTACHMENT_OPTIMAL
914                        } else {
915                            attachment.stencil_load_op = vk::AttachmentLoadOp::CLEAR;
916
917                            vk::ImageLayout::STENCIL_ATTACHMENT_OPTIMAL
918                        };
919                        depth_stencil_set = true;
920                    } else if let Some(loaded_attachment) = exec.depth_stencil_load {
921                        // Loaded depth/stencil attachment
922                        let attachment = &mut attachments[color_attachment_count];
923                        attachment.fmt = loaded_attachment.format;
924                        attachment.sample_count = loaded_attachment.sample_count;
925                        attachment.initial_layout = if loaded_attachment
926                            .aspect_mask
927                            .contains(vk::ImageAspectFlags::DEPTH | vk::ImageAspectFlags::STENCIL)
928                        {
929                            attachment.load_op = vk::AttachmentLoadOp::LOAD;
930                            attachment.stencil_load_op = vk::AttachmentLoadOp::LOAD;
931
932                            vk::ImageLayout::DEPTH_STENCIL_READ_ONLY_OPTIMAL
933                        } else if loaded_attachment
934                            .aspect_mask
935                            .contains(vk::ImageAspectFlags::DEPTH)
936                        {
937                            attachment.load_op = vk::AttachmentLoadOp::LOAD;
938
939                            vk::ImageLayout::DEPTH_READ_ONLY_OPTIMAL
940                        } else {
941                            attachment.stencil_load_op = vk::AttachmentLoadOp::LOAD;
942
943                            vk::ImageLayout::STENCIL_READ_ONLY_OPTIMAL
944                        };
945                        depth_stencil_set = true;
946                    } else if exec.depth_stencil_clear.is_some()
947                        || exec.depth_stencil_store.is_some()
948                    {
949                        depth_stencil_set = true;
950                    }
951                }
952            }
953        }
954
955        {
956            let mut color_set = vec![false; attachment_count];
957            let mut depth_stencil_set = false;
958            let mut depth_stencil_resolve_set = false;
959
960            // Add store op attachments using the last executions
961            for exec in pass.execs.iter().rev() {
962                // Resolved color attachments
963                for (attachment_idx, (resolved_attachment, _)) in &exec.color_resolves {
964                    let color_set = &mut color_set[*attachment_idx as usize];
965                    if *color_set {
966                        continue;
967                    }
968
969                    let attachment = &mut attachments[*attachment_idx as usize];
970                    attachment.fmt = resolved_attachment.format;
971                    attachment.sample_count = resolved_attachment.sample_count;
972                    attachment.final_layout = vk::ImageLayout::COLOR_ATTACHMENT_OPTIMAL;
973                    *color_set = true;
974                }
975
976                // Stored color attachments
977                for (attachment_idx, stored_attachment) in &exec.color_stores {
978                    let color_set = &mut color_set[*attachment_idx as usize];
979                    if *color_set {
980                        continue;
981                    }
982
983                    let attachment = &mut attachments[*attachment_idx as usize];
984                    attachment.fmt = stored_attachment.format;
985                    attachment.sample_count = stored_attachment.sample_count;
986                    attachment.store_op = vk::AttachmentStoreOp::STORE;
987                    attachment.final_layout = vk::ImageLayout::COLOR_ATTACHMENT_OPTIMAL;
988                    *color_set = true;
989                }
990
991                // Stored depth/stencil attachment
992                if !depth_stencil_set && let Some(stored_attachment) = exec.depth_stencil_store {
993                    let attachment = &mut attachments[color_attachment_count];
994                    attachment.fmt = stored_attachment.format;
995                    attachment.sample_count = stored_attachment.sample_count;
996                    attachment.final_layout = if stored_attachment
997                        .aspect_mask
998                        .contains(vk::ImageAspectFlags::DEPTH | vk::ImageAspectFlags::STENCIL)
999                    {
1000                        attachment.store_op = vk::AttachmentStoreOp::STORE;
1001                        attachment.stencil_store_op = vk::AttachmentStoreOp::STORE;
1002
1003                        vk::ImageLayout::DEPTH_STENCIL_ATTACHMENT_OPTIMAL
1004                    } else if stored_attachment
1005                        .aspect_mask
1006                        .contains(vk::ImageAspectFlags::DEPTH)
1007                    {
1008                        attachment.store_op = vk::AttachmentStoreOp::STORE;
1009
1010                        vk::ImageLayout::DEPTH_ATTACHMENT_OPTIMAL
1011                    } else {
1012                        attachment.stencil_store_op = vk::AttachmentStoreOp::STORE;
1013
1014                        vk::ImageLayout::STENCIL_ATTACHMENT_OPTIMAL
1015                    };
1016                    depth_stencil_set = true;
1017                }
1018
1019                // Resolved depth/stencil attachment
1020                if !depth_stencil_resolve_set
1021                    && let Some((resolved_attachment, ..)) = exec.depth_stencil_resolve
1022                {
1023                    let attachment = attachments.last_mut().unwrap();
1024                    attachment.fmt = resolved_attachment.format;
1025                    attachment.sample_count = resolved_attachment.sample_count;
1026                    attachment.final_layout = if resolved_attachment
1027                        .aspect_mask
1028                        .contains(vk::ImageAspectFlags::DEPTH | vk::ImageAspectFlags::STENCIL)
1029                    {
1030                        vk::ImageLayout::DEPTH_STENCIL_ATTACHMENT_OPTIMAL
1031                    } else if resolved_attachment
1032                        .aspect_mask
1033                        .contains(vk::ImageAspectFlags::DEPTH)
1034                    {
1035                        vk::ImageLayout::DEPTH_ATTACHMENT_OPTIMAL
1036                    } else {
1037                        vk::ImageLayout::STENCIL_ATTACHMENT_OPTIMAL
1038                    };
1039                    depth_stencil_resolve_set = true;
1040                }
1041            }
1042        }
1043
1044        for attachment in &mut attachments {
1045            if attachment.load_op == vk::AttachmentLoadOp::DONT_CARE {
1046                attachment.initial_layout = attachment.final_layout;
1047            } else if attachment.store_op == vk::AttachmentStoreOp::DONT_CARE
1048                && attachment.stencil_store_op == vk::AttachmentStoreOp::DONT_CARE
1049            {
1050                attachment.final_layout = attachment.initial_layout;
1051            }
1052        }
1053
1054        // Add subpasses
1055        for (exec_idx, exec) in pass.execs.iter().enumerate() {
1056            let pipeline = exec
1057                .pipeline
1058                .as_ref()
1059                .map(|pipeline| pipeline.unwrap_graphic())
1060                .unwrap();
1061            let mut subpass_info = SubpassInfo::with_capacity(attachment_count);
1062
1063            // Add input attachments
1064            for attachment_idx in pipeline.input_attachments.iter() {
1065                debug_assert!(
1066                    !exec.color_clears.contains_key(attachment_idx),
1067                    "cannot clear color attachment index {attachment_idx} because it uses subpass input",
1068                );
1069
1070                let exec_attachment = exec
1071                    .color_attachments
1072                    .get(attachment_idx)
1073                    .or_else(|| exec.color_loads.get(attachment_idx))
1074                    .or_else(|| exec.color_stores.get(attachment_idx))
1075                    .expect("subpass input attachment index not attached, loaded, or stored");
1076                let is_random_access = exec.color_stores.contains_key(attachment_idx);
1077                subpass_info.input_attachments.push(AttachmentRef {
1078                    attachment: *attachment_idx,
1079                    aspect_mask: exec_attachment.aspect_mask,
1080                    layout: Self::attachment_layout(
1081                        exec_attachment.aspect_mask,
1082                        is_random_access,
1083                        true,
1084                    ),
1085                });
1086
1087                // We should preserve the attachment in the previous subpasses as needed
1088                // (We're asserting that any input renderpasses are actually real subpasses
1089                // here with prior passes..)
1090                for prev_exec_idx in (0..exec_idx - 1).rev() {
1091                    let prev_exec = &pass.execs[prev_exec_idx];
1092                    if prev_exec.color_stores.contains_key(attachment_idx) {
1093                        break;
1094                    }
1095
1096                    let prev_subpass = &mut subpasses[prev_exec_idx];
1097                    prev_subpass.preserve_attachments.push(*attachment_idx);
1098                }
1099            }
1100
1101            // Set color attachments to defaults
1102            for attachment_idx in 0..color_attachment_count as u32 {
1103                let is_input = subpass_info
1104                    .input_attachments
1105                    .iter()
1106                    .any(|input| input.attachment == attachment_idx);
1107                subpass_info.color_attachments.push(AttachmentRef {
1108                    attachment: vk::ATTACHMENT_UNUSED,
1109                    aspect_mask: vk::ImageAspectFlags::COLOR,
1110                    layout: Self::attachment_layout(vk::ImageAspectFlags::COLOR, true, is_input),
1111                });
1112            }
1113
1114            for attachment_idx in exec
1115                .color_attachments
1116                .keys()
1117                .chain(exec.color_clears.keys())
1118                .chain(exec.color_loads.keys())
1119                .chain(exec.color_stores.keys())
1120            {
1121                subpass_info.color_attachments[*attachment_idx as usize].attachment =
1122                    *attachment_idx;
1123            }
1124
1125            // Set depth/stencil attachment
1126            if let Some(depth_stencil) = exec
1127                .depth_stencil_attachment
1128                .or(exec.depth_stencil_load)
1129                .or(exec.depth_stencil_store)
1130                .or_else(|| exec.depth_stencil_clear.map(|(attachment, _)| attachment))
1131            {
1132                let is_random_access = exec.depth_stencil_clear.is_some()
1133                    || exec.depth_stencil_load.is_some()
1134                    || exec.depth_stencil_store.is_some();
1135                subpass_info.depth_stencil_attachment = Some(AttachmentRef {
1136                    attachment: color_attachment_count as u32,
1137                    aspect_mask: depth_stencil.aspect_mask,
1138                    layout: Self::attachment_layout(
1139                        depth_stencil.aspect_mask,
1140                        is_random_access,
1141                        false,
1142                    ),
1143                });
1144            }
1145
1146            // Set color resolves to defaults
1147            subpass_info.color_resolve_attachments.extend(repeat_n(
1148                AttachmentRef {
1149                    attachment: vk::ATTACHMENT_UNUSED,
1150                    aspect_mask: vk::ImageAspectFlags::empty(),
1151                    layout: vk::ImageLayout::UNDEFINED,
1152                },
1153                color_attachment_count,
1154            ));
1155
1156            // Set any used color resolve attachments now
1157            for (dst_attachment_idx, (resolved_attachment, src_attachment_idx)) in
1158                &exec.color_resolves
1159            {
1160                let is_input = subpass_info
1161                    .input_attachments
1162                    .iter()
1163                    .any(|input| input.attachment == *dst_attachment_idx);
1164                subpass_info.color_resolve_attachments[*src_attachment_idx as usize] =
1165                    AttachmentRef {
1166                        attachment: *dst_attachment_idx,
1167                        aspect_mask: resolved_attachment.aspect_mask,
1168                        layout: Self::attachment_layout(
1169                            resolved_attachment.aspect_mask,
1170                            true,
1171                            is_input,
1172                        ),
1173                    };
1174            }
1175
1176            if let Some((
1177                resolved_attachment,
1178                dst_attachment_idx,
1179                depth_resolve_mode,
1180                stencil_resolve_mode,
1181            )) = exec.depth_stencil_resolve
1182            {
1183                subpass_info.depth_stencil_resolve_attachment = Some((
1184                    AttachmentRef {
1185                        attachment: dst_attachment_idx + 1,
1186                        aspect_mask: resolved_attachment.aspect_mask,
1187                        layout: Self::attachment_layout(
1188                            resolved_attachment.aspect_mask,
1189                            true,
1190                            false,
1191                        ),
1192                    },
1193                    depth_resolve_mode,
1194                    stencil_resolve_mode,
1195                ))
1196            }
1197
1198            subpass_info.view_mask = exec.view_mask;
1199            subpass_info.correlated_view_mask = exec.correlated_view_mask;
1200
1201            subpasses.push(subpass_info);
1202        }
1203
1204        // Add dependencies
1205        let dependencies =
1206            {
1207                let mut dependencies = BTreeMap::new();
1208                for (exec_idx, exec) in pass.execs.iter().enumerate() {
1209                    // Check accesses
1210                    'accesses: for (node_idx, accesses) in exec.accesses.iter() {
1211                        let (mut curr_stages, mut curr_access) =
1212                            pipeline_stage_access_flags(accesses.first().unwrap().access);
1213                        if curr_stages.contains(vk::PipelineStageFlags::ALL_COMMANDS) {
1214                            curr_stages |= vk::PipelineStageFlags::ALL_GRAPHICS;
1215                            curr_stages &= !vk::PipelineStageFlags::ALL_COMMANDS;
1216                        }
1217
1218                        // First look for through earlier executions of this pass (in reverse order)
1219                        for (prev_exec_idx, prev_exec) in
1220                            pass.execs[0..exec_idx].iter().enumerate().rev()
1221                        {
1222                            if let Some(accesses) = prev_exec.accesses.get(node_idx) {
1223                                for &SubresourceAccess { access, .. } in accesses {
1224                                    // Is this previous execution access dependent on anything the current
1225                                    // execution access is dependent upon?
1226                                    let (mut prev_stages, prev_access) =
1227                                        pipeline_stage_access_flags(access);
1228                                    if prev_stages.contains(vk::PipelineStageFlags::ALL_COMMANDS) {
1229                                        prev_stages |= vk::PipelineStageFlags::ALL_GRAPHICS;
1230                                        prev_stages &= !vk::PipelineStageFlags::ALL_COMMANDS;
1231                                    }
1232
1233                                    let common_stages = curr_stages & prev_stages;
1234                                    if common_stages.is_empty() {
1235                                        // No common dependencies
1236                                        continue;
1237                                    }
1238
1239                                    let dep = dependencies
1240                                        .entry((prev_exec_idx, exec_idx))
1241                                        .or_insert_with(|| {
1242                                            SubpassDependency::new(
1243                                                prev_exec_idx as _,
1244                                                exec_idx as _,
1245                                            )
1246                                        });
1247
1248                                    // Wait for ...
1249                                    dep.src_stage_mask |= common_stages;
1250                                    dep.src_access_mask |= prev_access;
1251
1252                                    // ... before we:
1253                                    dep.dst_stage_mask |= curr_stages;
1254                                    dep.dst_access_mask |= curr_access;
1255
1256                                    // Do the source and destination stage masks both include
1257                                    // framebuffer-space stages?
1258                                    if (prev_stages | curr_stages).intersects(
1259                                        vk::PipelineStageFlags::FRAGMENT_SHADER
1260                                            | vk::PipelineStageFlags::EARLY_FRAGMENT_TESTS
1261                                            | vk::PipelineStageFlags::LATE_FRAGMENT_TESTS
1262                                            | vk::PipelineStageFlags::COLOR_ATTACHMENT_OUTPUT,
1263                                    ) {
1264                                        dep.dependency_flags |= vk::DependencyFlags::BY_REGION;
1265                                    }
1266
1267                                    curr_stages &= !common_stages;
1268                                    curr_access &= !prev_access;
1269
1270                                    // Have we found all dependencies for this stage? If so no need to
1271                                    // check external passes
1272                                    if curr_stages.is_empty() {
1273                                        continue 'accesses;
1274                                    }
1275                                }
1276                            }
1277                        }
1278
1279                        // Second look in previous passes of the entire render graph
1280                        for prev_subpass in self.graph.passes[0..pass_idx]
1281                            .iter()
1282                            .rev()
1283                            .flat_map(|pass| pass.execs.iter().rev())
1284                        {
1285                            if let Some(accesses) = prev_subpass.accesses.get(node_idx) {
1286                                for &SubresourceAccess { access, .. } in accesses {
1287                                    // Is this previous subpass access dependent on anything the current
1288                                    // subpass access is dependent upon?
1289                                    let (prev_stages, prev_access) =
1290                                        pipeline_stage_access_flags(access);
1291                                    let common_stages = curr_stages & prev_stages;
1292                                    if common_stages.is_empty() {
1293                                        // No common dependencies
1294                                        continue;
1295                                    }
1296
1297                                    let dep = dependencies
1298                                        .entry((vk::SUBPASS_EXTERNAL as _, exec_idx))
1299                                        .or_insert_with(|| {
1300                                            SubpassDependency::new(
1301                                                vk::SUBPASS_EXTERNAL as _,
1302                                                exec_idx as _,
1303                                            )
1304                                        });
1305
1306                                    // Wait for ...
1307                                    dep.src_stage_mask |= common_stages;
1308                                    dep.src_access_mask |= prev_access;
1309
1310                                    // ... before we:
1311                                    dep.dst_stage_mask |=
1312                                        curr_stages.min(vk::PipelineStageFlags::ALL_GRAPHICS);
1313                                    dep.dst_access_mask |= curr_access;
1314
1315                                    // If the source and destination stage masks both include
1316                                    // framebuffer-space stages then we need the BY_REGION flag
1317                                    if (prev_stages | curr_stages).intersects(
1318                                        vk::PipelineStageFlags::FRAGMENT_SHADER
1319                                            | vk::PipelineStageFlags::EARLY_FRAGMENT_TESTS
1320                                            | vk::PipelineStageFlags::LATE_FRAGMENT_TESTS
1321                                            | vk::PipelineStageFlags::COLOR_ATTACHMENT_OUTPUT,
1322                                    ) {
1323                                        dep.dependency_flags |= vk::DependencyFlags::BY_REGION;
1324                                    }
1325
1326                                    curr_stages &= !common_stages;
1327                                    curr_access &= !prev_access;
1328
1329                                    // If we found all dependencies for this stage there is no need to check
1330                                    // external passes
1331                                    if curr_stages.is_empty() {
1332                                        continue 'accesses;
1333                                    }
1334                                }
1335                            }
1336                        }
1337
1338                        // Fall back to external dependencies
1339                        if !curr_stages.is_empty() {
1340                            let dep = dependencies
1341                                .entry((vk::SUBPASS_EXTERNAL as _, exec_idx))
1342                                .or_insert_with(|| {
1343                                    SubpassDependency::new(vk::SUBPASS_EXTERNAL as _, exec_idx as _)
1344                                });
1345
1346                            // Wait for ...
1347                            dep.src_stage_mask |= curr_stages;
1348                            dep.src_access_mask |= curr_access;
1349
1350                            // ... before we:
1351                            dep.dst_stage_mask |= vk::PipelineStageFlags::TOP_OF_PIPE;
1352                            dep.dst_access_mask =
1353                                vk::AccessFlags::MEMORY_READ | vk::AccessFlags::MEMORY_WRITE;
1354                        }
1355                    }
1356
1357                    // Look for attachments of this exec being read or written in other execs of the
1358                    // same pass
1359                    for (other_idx, other) in pass.execs[0..exec_idx].iter().enumerate() {
1360                        // Look for color attachments we're reading
1361                        for attachment_idx in exec.color_loads.keys() {
1362                            // Look for writes in the other exec
1363                            if other.color_clears.contains_key(attachment_idx)
1364                                || other.color_stores.contains_key(attachment_idx)
1365                                || other.color_resolves.contains_key(attachment_idx)
1366                            {
1367                                let dep = dependencies.entry((other_idx, exec_idx)).or_insert_with(
1368                                    || SubpassDependency::new(other_idx as _, exec_idx as _),
1369                                );
1370
1371                                // Wait for ...
1372                                dep.src_stage_mask |=
1373                                    vk::PipelineStageFlags::COLOR_ATTACHMENT_OUTPUT;
1374                                dep.src_access_mask |= vk::AccessFlags::COLOR_ATTACHMENT_WRITE;
1375
1376                                // ... before we:
1377                                dep.dst_stage_mask |= vk::PipelineStageFlags::EARLY_FRAGMENT_TESTS;
1378                                dep.dst_access_mask |= vk::AccessFlags::COLOR_ATTACHMENT_READ;
1379                            }
1380
1381                            // look for reads in the other exec
1382                            if other.color_loads.contains_key(attachment_idx) {
1383                                let dep = dependencies.entry((other_idx, exec_idx)).or_insert_with(
1384                                    || SubpassDependency::new(other_idx as _, exec_idx as _),
1385                                );
1386
1387                                // Wait for ...
1388                                dep.src_stage_mask |= vk::PipelineStageFlags::LATE_FRAGMENT_TESTS;
1389                                dep.src_access_mask |= vk::AccessFlags::COLOR_ATTACHMENT_READ;
1390
1391                                // ... before we:
1392                                dep.dst_stage_mask |= vk::PipelineStageFlags::FRAGMENT_SHADER;
1393                                dep.dst_access_mask |= vk::AccessFlags::COLOR_ATTACHMENT_READ;
1394                            }
1395                        }
1396
1397                        // Look for a depth/stencil attachment read
1398                        if exec.depth_stencil_load.is_some() {
1399                            // Look for writes in the other exec
1400                            if other.depth_stencil_clear.is_some()
1401                                || other.depth_stencil_store.is_some()
1402                                || other.depth_stencil_resolve.is_some()
1403                            {
1404                                let dep = dependencies.entry((other_idx, exec_idx)).or_insert_with(
1405                                    || SubpassDependency::new(other_idx as _, exec_idx as _),
1406                                );
1407
1408                                // Wait for ...
1409                                dep.src_stage_mask |= vk::PipelineStageFlags::LATE_FRAGMENT_TESTS;
1410                                dep.src_access_mask |=
1411                                    vk::AccessFlags::DEPTH_STENCIL_ATTACHMENT_WRITE;
1412
1413                                // ... before we:
1414                                dep.dst_stage_mask |= vk::PipelineStageFlags::EARLY_FRAGMENT_TESTS;
1415                                dep.dst_access_mask |=
1416                                    vk::AccessFlags::DEPTH_STENCIL_ATTACHMENT_READ;
1417                            }
1418
1419                            // TODO: Do we need to depend on a READ..READ between subpasses?
1420                            // look for reads in the other exec
1421                            if other.depth_stencil_load.is_some() {
1422                                let dep = dependencies.entry((other_idx, exec_idx)).or_insert_with(
1423                                    || SubpassDependency::new(other_idx as _, exec_idx as _),
1424                                );
1425
1426                                // Wait for ...
1427                                dep.src_stage_mask |= vk::PipelineStageFlags::LATE_FRAGMENT_TESTS;
1428                                dep.src_access_mask |=
1429                                    vk::AccessFlags::DEPTH_STENCIL_ATTACHMENT_READ;
1430
1431                                // ... before we:
1432                                dep.dst_stage_mask |= vk::PipelineStageFlags::FRAGMENT_SHADER;
1433                                dep.dst_access_mask |=
1434                                    vk::AccessFlags::DEPTH_STENCIL_ATTACHMENT_READ;
1435                            }
1436                        }
1437
1438                        // Look for color attachments we're writing
1439                        for (attachment_idx, aspect_mask) in
1440                            exec.color_clears
1441                                .iter()
1442                                .map(|(attachment_idx, (attachment, _))| {
1443                                    (*attachment_idx, attachment.aspect_mask)
1444                                })
1445                                .chain(exec.color_resolves.iter().map(
1446                                    |(dst_attachment_idx, (resolved_attachment, _))| {
1447                                        (*dst_attachment_idx, resolved_attachment.aspect_mask)
1448                                    },
1449                                ))
1450                                .chain(exec.color_stores.iter().map(
1451                                    |(attachment_idx, attachment)| {
1452                                        (*attachment_idx, attachment.aspect_mask)
1453                                    },
1454                                ))
1455                        {
1456                            let stage = match aspect_mask {
1457                                mask if mask.contains(vk::ImageAspectFlags::COLOR) => {
1458                                    vk::PipelineStageFlags::COLOR_ATTACHMENT_OUTPUT
1459                                }
1460                                mask if mask.intersects(
1461                                    vk::ImageAspectFlags::DEPTH | vk::ImageAspectFlags::STENCIL,
1462                                ) =>
1463                                {
1464                                    vk::PipelineStageFlags::LATE_FRAGMENT_TESTS
1465                                }
1466                                _ => vk::PipelineStageFlags::ALL_GRAPHICS,
1467                            };
1468
1469                            // Look for writes in the other exec
1470                            if other.color_clears.contains_key(&attachment_idx)
1471                                || other.color_stores.contains_key(&attachment_idx)
1472                                || other.color_resolves.contains_key(&attachment_idx)
1473                            {
1474                                let access = match aspect_mask {
1475                                    mask if mask.contains(vk::ImageAspectFlags::COLOR) => {
1476                                        vk::AccessFlags::COLOR_ATTACHMENT_WRITE
1477                                    }
1478                                    mask if mask.intersects(
1479                                        vk::ImageAspectFlags::DEPTH | vk::ImageAspectFlags::STENCIL,
1480                                    ) =>
1481                                    {
1482                                        vk::AccessFlags::DEPTH_STENCIL_ATTACHMENT_WRITE
1483                                    }
1484                                    _ => {
1485                                        vk::AccessFlags::MEMORY_READ | vk::AccessFlags::MEMORY_WRITE
1486                                    }
1487                                };
1488
1489                                let dep = dependencies.entry((other_idx, exec_idx)).or_insert_with(
1490                                    || SubpassDependency::new(other_idx as _, exec_idx as _),
1491                                );
1492
1493                                // Wait for ...
1494                                dep.src_stage_mask |= stage;
1495                                dep.src_access_mask |= access;
1496
1497                                // ... before we:
1498                                dep.dst_stage_mask |= stage;
1499                                dep.dst_access_mask |= access;
1500                            }
1501
1502                            // look for reads in the other exec
1503                            if other.color_loads.contains_key(&attachment_idx) {
1504                                let (src_access, dst_access) = match aspect_mask {
1505                                    mask if mask.contains(vk::ImageAspectFlags::COLOR) => (
1506                                        vk::AccessFlags::COLOR_ATTACHMENT_READ,
1507                                        vk::AccessFlags::COLOR_ATTACHMENT_WRITE,
1508                                    ),
1509                                    mask if mask.intersects(
1510                                        vk::ImageAspectFlags::DEPTH | vk::ImageAspectFlags::STENCIL,
1511                                    ) =>
1512                                    {
1513                                        (
1514                                            vk::AccessFlags::DEPTH_STENCIL_ATTACHMENT_READ,
1515                                            vk::AccessFlags::DEPTH_STENCIL_ATTACHMENT_WRITE,
1516                                        )
1517                                    }
1518                                    _ => (
1519                                        vk::AccessFlags::MEMORY_READ
1520                                            | vk::AccessFlags::MEMORY_WRITE,
1521                                        vk::AccessFlags::MEMORY_READ
1522                                            | vk::AccessFlags::MEMORY_WRITE,
1523                                    ),
1524                                };
1525
1526                                let dep = dependencies.entry((other_idx, exec_idx)).or_insert_with(
1527                                    || SubpassDependency::new(other_idx as _, exec_idx as _),
1528                                );
1529
1530                                // Wait for ...
1531                                dep.src_stage_mask |= vk::PipelineStageFlags::EARLY_FRAGMENT_TESTS;
1532                                dep.src_access_mask |= src_access;
1533
1534                                // ... before we:
1535                                dep.dst_stage_mask |= stage;
1536                                dep.dst_access_mask |= dst_access;
1537                            }
1538                        }
1539
1540                        // Look for a depth/stencil attachment write
1541                        if let Some(aspect_mask) = exec
1542                            .depth_stencil_clear
1543                            .map(|(attachment, _)| attachment.aspect_mask)
1544                            .or_else(|| {
1545                                exec.depth_stencil_store
1546                                    .map(|attachment| attachment.aspect_mask)
1547                            })
1548                            .or_else(|| {
1549                                exec.depth_stencil_resolve
1550                                    .map(|(attachment, ..)| attachment.aspect_mask)
1551                            })
1552                        {
1553                            let stage = match aspect_mask {
1554                                mask if mask.contains(vk::ImageAspectFlags::COLOR) => {
1555                                    vk::PipelineStageFlags::COLOR_ATTACHMENT_OUTPUT
1556                                }
1557                                mask if mask.intersects(
1558                                    vk::ImageAspectFlags::DEPTH | vk::ImageAspectFlags::STENCIL,
1559                                ) =>
1560                                {
1561                                    vk::PipelineStageFlags::LATE_FRAGMENT_TESTS
1562                                }
1563                                _ => vk::PipelineStageFlags::ALL_GRAPHICS,
1564                            };
1565
1566                            // Look for writes in the other exec
1567                            if other.depth_stencil_clear.is_some()
1568                                || other.depth_stencil_store.is_some()
1569                                || other.depth_stencil_resolve.is_some()
1570                            {
1571                                let access = match aspect_mask {
1572                                    mask if mask.contains(vk::ImageAspectFlags::COLOR) => {
1573                                        vk::AccessFlags::COLOR_ATTACHMENT_WRITE
1574                                    }
1575                                    mask if mask.intersects(
1576                                        vk::ImageAspectFlags::DEPTH | vk::ImageAspectFlags::STENCIL,
1577                                    ) =>
1578                                    {
1579                                        vk::AccessFlags::DEPTH_STENCIL_ATTACHMENT_WRITE
1580                                    }
1581                                    _ => {
1582                                        vk::AccessFlags::MEMORY_READ | vk::AccessFlags::MEMORY_WRITE
1583                                    }
1584                                };
1585
1586                                let dep = dependencies.entry((other_idx, exec_idx)).or_insert_with(
1587                                    || SubpassDependency::new(other_idx as _, exec_idx as _),
1588                                );
1589
1590                                // Wait for ...
1591                                dep.src_stage_mask |= stage;
1592                                dep.src_access_mask |= access;
1593
1594                                // ... before we:
1595                                dep.dst_stage_mask |= stage;
1596                                dep.dst_access_mask |= access;
1597                            }
1598
1599                            // look for reads in the other exec
1600                            if other.depth_stencil_load.is_some() {
1601                                let (src_access, dst_access) = match aspect_mask {
1602                                    mask if mask.contains(vk::ImageAspectFlags::COLOR) => (
1603                                        vk::AccessFlags::COLOR_ATTACHMENT_READ,
1604                                        vk::AccessFlags::COLOR_ATTACHMENT_WRITE,
1605                                    ),
1606                                    mask if mask.intersects(
1607                                        vk::ImageAspectFlags::DEPTH | vk::ImageAspectFlags::STENCIL,
1608                                    ) =>
1609                                    {
1610                                        (
1611                                            vk::AccessFlags::DEPTH_STENCIL_ATTACHMENT_READ,
1612                                            vk::AccessFlags::DEPTH_STENCIL_ATTACHMENT_WRITE,
1613                                        )
1614                                    }
1615                                    _ => (
1616                                        vk::AccessFlags::MEMORY_READ
1617                                            | vk::AccessFlags::MEMORY_WRITE,
1618                                        vk::AccessFlags::MEMORY_READ
1619                                            | vk::AccessFlags::MEMORY_WRITE,
1620                                    ),
1621                                };
1622
1623                                let dep = dependencies.entry((other_idx, exec_idx)).or_insert_with(
1624                                    || SubpassDependency::new(other_idx as _, exec_idx as _),
1625                                );
1626
1627                                // Wait for ...
1628                                dep.src_stage_mask |= vk::PipelineStageFlags::EARLY_FRAGMENT_TESTS;
1629                                dep.src_access_mask |= src_access;
1630
1631                                // ... before we:
1632                                dep.dst_stage_mask |= stage;
1633                                dep.dst_access_mask |= dst_access;
1634                            }
1635                        }
1636                    }
1637                }
1638
1639                dependencies.into_values().collect::<Vec<_>>()
1640            };
1641
1642        // let info = RenderPassInfo {
1643        //     attachments,
1644        //     dependencies,
1645        //     subpasses,
1646        // };
1647
1648        // trace!("{:#?}", info);
1649
1650        pool.lease(RenderPassInfo {
1651            attachments,
1652            dependencies,
1653            subpasses,
1654        })
1655    }
1656
1657    #[profiling::function]
1658    fn lease_scheduled_resources<P>(
1659        &mut self,
1660        pool: &mut P,
1661        schedule: &[usize],
1662    ) -> Result<(), DriverError>
1663    where
1664        P: Pool<DescriptorPoolInfo, DescriptorPool> + Pool<RenderPassInfo, RenderPass>,
1665    {
1666        for pass_idx in schedule.iter().copied() {
1667            // At the time this function runs the pass will already have been optimized into a
1668            // larger pass made out of anything that might have been merged into it - so we
1669            // only care about one pass at a time here
1670            let pass = &mut self.graph.passes[pass_idx];
1671
1672            trace!("leasing [{pass_idx}: {}]", pass.name);
1673
1674            let descriptor_pool = Self::lease_descriptor_pool(pool, pass)?;
1675            let mut exec_descriptor_sets = HashMap::with_capacity(
1676                descriptor_pool
1677                    .as_ref()
1678                    .map(|descriptor_pool| descriptor_pool.info.max_sets as usize)
1679                    .unwrap_or_default(),
1680            );
1681            if let Some(descriptor_pool) = descriptor_pool.as_ref() {
1682                for (exec_idx, pipeline) in
1683                    pass.execs
1684                        .iter()
1685                        .enumerate()
1686                        .filter_map(|(exec_idx, exec)| {
1687                            exec.pipeline.as_ref().map(|pipeline| (exec_idx, pipeline))
1688                        })
1689                {
1690                    let layouts = pipeline.descriptor_info().layouts.values();
1691                    let mut descriptor_sets = Vec::with_capacity(layouts.len());
1692                    for descriptor_set_layout in layouts {
1693                        descriptor_sets.push(DescriptorPool::allocate_descriptor_set(
1694                            descriptor_pool,
1695                            descriptor_set_layout,
1696                        )?);
1697                    }
1698                    exec_descriptor_sets.insert(exec_idx, descriptor_sets);
1699                }
1700            }
1701
1702            // Note that as a side effect of merging compatible passes all input passes should
1703            // be globbed onto their preceeding passes by now. This allows subpasses to use
1704            // input attachments without really doing anything, so we are provided a pass that
1705            // starts with input we just blow up b/c we can't provide it, or at least shouldn't.
1706            debug_assert!(!pass.execs.is_empty());
1707            debug_assert!(
1708                pass.execs[0].pipeline.is_none()
1709                    || !pass.execs[0].pipeline.as_ref().unwrap().is_graphic()
1710                    || pass.execs[0]
1711                        .pipeline
1712                        .as_ref()
1713                        .unwrap()
1714                        .unwrap_graphic()
1715                        .descriptor_info
1716                        .pool_sizes
1717                        .values()
1718                        .filter_map(|pool| pool.get(&vk::DescriptorType::INPUT_ATTACHMENT))
1719                        .next()
1720                        .is_none()
1721            );
1722
1723            // Also the renderpass may just be None if the pass contained no graphic ops.
1724            let render_pass = if pass.execs[0]
1725                .pipeline
1726                .as_ref()
1727                .map(|pipeline| pipeline.is_graphic())
1728                .unwrap_or_default()
1729            {
1730                Some(self.lease_render_pass(pool, pass_idx)?)
1731            } else {
1732                None
1733            };
1734
1735            self.physical_passes.push(PhysicalPass {
1736                descriptor_pool,
1737                exec_descriptor_sets,
1738                render_pass,
1739            });
1740        }
1741
1742        Ok(())
1743    }
1744
1745    // Merges passes which are graphic with common-ish attachments - note that scheduled pass order
1746    // is final during this function and so we must merge contiguous groups of passes
1747    #[profiling::function]
1748    fn merge_scheduled_passes(&mut self, schedule: &mut Vec<usize>) {
1749        thread_local! {
1750            static PASSES: RefCell<Vec<Option<Pass>>> = Default::default();
1751        }
1752
1753        PASSES.with_borrow_mut(|passes| {
1754            debug_assert!(passes.is_empty());
1755
1756            passes.extend(self.graph.passes.drain(..).map(Some));
1757
1758            let mut idx = 0;
1759
1760            // debug!("attempting to merge {} passes", schedule.len(),);
1761
1762            while idx < schedule.len() {
1763                let mut pass = passes[schedule[idx]].take().unwrap();
1764
1765                // Find candidates
1766                let start = idx + 1;
1767                let mut end = start;
1768                while end < schedule.len() {
1769                    let other = passes[schedule[end]].as_ref().unwrap();
1770
1771                    debug!(
1772                        "attempting to merge [{idx}: {}] with [{end}: {}]",
1773                        pass.name, other.name
1774                    );
1775
1776                    if Self::allow_merge_passes(&pass, other) {
1777                        end += 1;
1778                    } else {
1779                        break;
1780                    }
1781                }
1782
1783                if log_enabled!(Trace) && start != end {
1784                    trace!("merging {} passes into [{idx}: {}]", end - start, pass.name);
1785                }
1786
1787                // Grow the merged pass once, not per merge
1788                {
1789                    let mut name_additional = 0;
1790                    let mut execs_additional = 0;
1791                    for idx in start..end {
1792                        let other = passes[schedule[idx]].as_ref().unwrap();
1793                        name_additional += other.name.len() + 3;
1794                        execs_additional += other.execs.len();
1795                    }
1796
1797                    pass.name.reserve(name_additional);
1798                    pass.execs.reserve(execs_additional);
1799                }
1800
1801                for idx in start..end {
1802                    let mut other = passes[schedule[idx]].take().unwrap();
1803                    pass.name.push_str(" + ");
1804                    pass.name.push_str(other.name.as_str());
1805                    pass.execs.append(&mut other.execs);
1806                }
1807
1808                self.graph.passes.push(pass);
1809                idx += 1 + end - start;
1810            }
1811
1812            // Reschedule passes
1813            schedule.truncate(self.graph.passes.len());
1814
1815            for (idx, pass_idx) in schedule.iter_mut().enumerate() {
1816                *pass_idx = idx;
1817            }
1818
1819            // Add the remaining passes back into the graph for later
1820            for pass in passes.drain(..).flatten() {
1821                self.graph.passes.push(pass);
1822            }
1823        });
1824    }
1825
1826    fn next_subpass(cmd_buf: &CommandBuffer) {
1827        trace!("next_subpass");
1828
1829        unsafe {
1830            cmd_buf
1831                .device
1832                .cmd_next_subpass(**cmd_buf, vk::SubpassContents::INLINE);
1833        }
1834    }
1835
1836    /// Returns the stages that process the given node.
1837    ///
1838    /// Note that this value must be retrieved before resolving a node as there will be no
1839    /// data left to inspect afterwards!
1840    #[profiling::function]
1841    pub fn node_pipeline_stages(&self, node: impl Node) -> vk::PipelineStageFlags {
1842        let node_idx = node.index();
1843        let mut res = Default::default();
1844
1845        'pass: for pass in self.graph.passes.iter() {
1846            for exec in pass.execs.iter() {
1847                if exec.accesses.contains_key(&node_idx) {
1848                    res |= pass
1849                        .execs
1850                        .iter()
1851                        .filter_map(|exec| exec.pipeline.as_ref())
1852                        .map(|pipeline| pipeline.stage())
1853                        .reduce(|j, k| j | k)
1854                        .unwrap_or(vk::PipelineStageFlags::TRANSFER);
1855
1856                    // The execution pipelines of a pass are always the same type
1857                    continue 'pass;
1858                }
1859            }
1860        }
1861
1862        debug_assert_ne!(
1863            res,
1864            Default::default(),
1865            "The given node was not accessed in this graph"
1866        );
1867
1868        res
1869    }
1870
1871    #[profiling::function]
1872    fn record_execution_barriers<'a>(
1873        cmd_buf: &CommandBuffer,
1874        bindings: &mut [Binding],
1875        accesses: impl Iterator<Item = (&'a NodeIndex, &'a Vec<SubresourceAccess>)>,
1876    ) {
1877        use std::slice::from_ref;
1878
1879        // We store a Barriers in TLS to save an alloc; contents are POD
1880        thread_local! {
1881            static TLS: RefCell<Tls> = Default::default();
1882        }
1883
1884        struct Barrier<T> {
1885            next_access: AccessType,
1886            prev_access: AccessType,
1887            resource: T,
1888        }
1889
1890        struct BufferResource {
1891            buffer: vk::Buffer,
1892            offset: usize,
1893            size: usize,
1894        }
1895
1896        struct ImageResource {
1897            image: vk::Image,
1898            range: vk::ImageSubresourceRange,
1899        }
1900
1901        #[derive(Default)]
1902        struct Tls {
1903            buffers: Vec<Barrier<BufferResource>>,
1904            images: Vec<Barrier<ImageResource>>,
1905            next_accesses: Vec<AccessType>,
1906            prev_accesses: Vec<AccessType>,
1907        }
1908
1909        TLS.with_borrow_mut(|tls| {
1910            // Initialize TLS from a previous call
1911            tls.buffers.clear();
1912            tls.images.clear();
1913            tls.next_accesses.clear();
1914            tls.prev_accesses.clear();
1915
1916            // Map remaining accesses into vk_sync barriers (some accesses may have been removed by the
1917            // render pass leasing function)
1918
1919            for (node_idx, accesses) in accesses {
1920                let binding = &bindings[*node_idx];
1921
1922                match binding {
1923                    Binding::AccelerationStructure(..)
1924                    | Binding::AccelerationStructureLease(..) => {
1925                        let Some(accel_struct) = binding.as_driver_acceleration_structure() else {
1926                            #[cfg(debug_assertions)]
1927                            unreachable!();
1928
1929                            #[cfg(not(debug_assertions))]
1930                            unsafe {
1931                                unreachable_unchecked()
1932                            }
1933                        };
1934
1935                        let prev_access = AccelerationStructure::access(
1936                            accel_struct,
1937                            accesses.last().unwrap().access,
1938                        );
1939
1940                        tls.next_accesses.extend(
1941                            accesses
1942                                .iter()
1943                                .map(|&SubresourceAccess { access, .. }| access),
1944                        );
1945                        tls.prev_accesses.push(prev_access);
1946                    }
1947                    Binding::Buffer(..) | Binding::BufferLease(..) => {
1948                        let Some(buffer) = binding.as_driver_buffer() else {
1949                            #[cfg(debug_assertions)]
1950                            unreachable!();
1951
1952                            #[cfg(not(debug_assertions))]
1953                            unsafe {
1954                                unreachable_unchecked()
1955                            }
1956                        };
1957
1958                        for &SubresourceAccess {
1959                            access,
1960                            subresource,
1961                        } in accesses
1962                        {
1963                            let Subresource::Buffer(range) = subresource else {
1964                                unreachable!()
1965                            };
1966
1967                            for (prev_access, range) in Buffer::access(buffer, access, range) {
1968                                tls.buffers.push(Barrier {
1969                                    next_access: access,
1970                                    prev_access,
1971                                    resource: BufferResource {
1972                                        buffer: **buffer,
1973                                        offset: range.start as _,
1974                                        size: (range.end - range.start) as _,
1975                                    },
1976                                });
1977                            }
1978                        }
1979                    }
1980                    Binding::Image(..) | Binding::ImageLease(..) | Binding::SwapchainImage(..) => {
1981                        let Some(image) = binding.as_driver_image() else {
1982                            #[cfg(debug_assertions)]
1983                            unreachable!();
1984
1985                            #[cfg(not(debug_assertions))]
1986                            unsafe {
1987                                unreachable_unchecked()
1988                            }
1989                        };
1990
1991                        for &SubresourceAccess {
1992                            access,
1993                            subresource,
1994                        } in accesses
1995                        {
1996                            let Subresource::Image(range) = subresource else {
1997                                unreachable!()
1998                            };
1999
2000                            for (prev_access, range) in Image::access(image, access, range) {
2001                                tls.images.push(Barrier {
2002                                    next_access: access,
2003                                    prev_access,
2004                                    resource: ImageResource {
2005                                        image: **image,
2006                                        range,
2007                                    },
2008                                })
2009                            }
2010                        }
2011                    }
2012                }
2013            }
2014
2015            let global_barrier = if !tls.next_accesses.is_empty() {
2016                // No resource attached - we use a global barrier for these
2017                trace!(
2018                    "    global {:?}->{:?}",
2019                    tls.next_accesses, tls.prev_accesses
2020                );
2021
2022                Some(GlobalBarrier {
2023                    next_accesses: tls.next_accesses.as_slice(),
2024                    previous_accesses: tls.prev_accesses.as_slice(),
2025                })
2026            } else {
2027                None
2028            };
2029            let buffer_barriers = tls.buffers.iter().map(
2030                |Barrier {
2031                     next_access,
2032                     prev_access,
2033                     resource,
2034                 }| {
2035                    let BufferResource {
2036                        buffer,
2037                        offset,
2038                        size,
2039                    } = *resource;
2040
2041                    trace!(
2042                        "    buffer {:?} {:?} {:?}->{:?}",
2043                        buffer,
2044                        offset..offset + size,
2045                        prev_access,
2046                        next_access,
2047                    );
2048
2049                    BufferBarrier {
2050                        next_accesses: from_ref(next_access),
2051                        previous_accesses: from_ref(prev_access),
2052                        src_queue_family_index: vk::QUEUE_FAMILY_IGNORED,
2053                        dst_queue_family_index: vk::QUEUE_FAMILY_IGNORED,
2054                        buffer,
2055                        offset,
2056                        size,
2057                    }
2058                },
2059            );
2060            let image_barriers = tls.images.iter().map(
2061                |Barrier {
2062                     next_access,
2063                     prev_access,
2064                     resource,
2065                 }| {
2066                    let ImageResource { image, range } = *resource;
2067
2068                    struct ImageSubresourceRangeDebug(vk::ImageSubresourceRange);
2069
2070                    impl std::fmt::Debug for ImageSubresourceRangeDebug {
2071                        fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
2072                            self.0.aspect_mask.fmt(f)?;
2073
2074                            f.write_str(" array: ")?;
2075
2076                            let array_layers = self.0.base_array_layer
2077                                ..self.0.base_array_layer + self.0.layer_count;
2078                            array_layers.fmt(f)?;
2079
2080                            f.write_str(" mip: ")?;
2081
2082                            let mip_levels =
2083                                self.0.base_mip_level..self.0.base_mip_level + self.0.level_count;
2084                            mip_levels.fmt(f)
2085                        }
2086                    }
2087
2088                    trace!(
2089                        "    image {:?} {:?} {:?}->{:?}",
2090                        image,
2091                        ImageSubresourceRangeDebug(range),
2092                        prev_access,
2093                        next_access,
2094                    );
2095
2096                    ImageBarrier {
2097                        next_accesses: from_ref(next_access),
2098                        next_layout: image_access_layout(*next_access),
2099                        previous_accesses: from_ref(prev_access),
2100                        previous_layout: image_access_layout(*prev_access),
2101                        discard_contents: *prev_access == AccessType::Nothing
2102                            || is_write_access(*next_access),
2103                        src_queue_family_index: vk::QUEUE_FAMILY_IGNORED,
2104                        dst_queue_family_index: vk::QUEUE_FAMILY_IGNORED,
2105                        image,
2106                        range,
2107                    }
2108                },
2109            );
2110
2111            pipeline_barrier(
2112                &cmd_buf.device,
2113                **cmd_buf,
2114                global_barrier,
2115                &buffer_barriers.collect::<Box<[_]>>(),
2116                &image_barriers.collect::<Box<[_]>>(),
2117            );
2118        });
2119    }
2120
2121    #[profiling::function]
2122    fn record_image_layout_transitions(
2123        cmd_buf: &CommandBuffer,
2124        bindings: &mut [Binding],
2125        pass: &mut Pass,
2126    ) {
2127        use std::slice::from_ref;
2128
2129        // We store a Barriers in TLS to save an alloc; contents are POD
2130        thread_local! {
2131            static TLS: RefCell<Tls> = Default::default();
2132        }
2133
2134        struct ImageResourceBarrier {
2135            image: vk::Image,
2136            next_access: AccessType,
2137            prev_access: AccessType,
2138            range: vk::ImageSubresourceRange,
2139        }
2140
2141        #[derive(Default)]
2142        struct Tls {
2143            images: Vec<ImageResourceBarrier>,
2144            initial_layouts: HashMap<usize, ImageAccess<bool>>,
2145        }
2146
2147        TLS.with_borrow_mut(|tls| {
2148            tls.images.clear();
2149            tls.initial_layouts.clear();
2150
2151            for (node_idx, accesses) in pass
2152                .execs
2153                .iter_mut()
2154                .flat_map(|exec| exec.accesses.iter())
2155                .map(|(node_idx, accesses)| (*node_idx, accesses))
2156            {
2157                debug_assert!(bindings.get(node_idx).is_some());
2158
2159                let binding = unsafe {
2160                    // PassRef enforces this using assert_bound_graph_node
2161                    bindings.get_unchecked(node_idx)
2162                };
2163
2164                match binding {
2165                    Binding::AccelerationStructure(..)
2166                    | Binding::AccelerationStructureLease(..) => {
2167                        let Some(accel_struct) = binding.as_driver_acceleration_structure() else {
2168                            #[cfg(debug_assertions)]
2169                            unreachable!();
2170
2171                            #[cfg(not(debug_assertions))]
2172                            unsafe {
2173                                unreachable_unchecked()
2174                            }
2175                        };
2176
2177                        AccelerationStructure::access(accel_struct, AccessType::Nothing);
2178                    }
2179                    Binding::Buffer(..) | Binding::BufferLease(..) => {
2180                        let Some(buffer) = binding.as_driver_buffer() else {
2181                            #[cfg(debug_assertions)]
2182                            unreachable!();
2183
2184                            #[cfg(not(debug_assertions))]
2185                            unsafe {
2186                                unreachable_unchecked()
2187                            }
2188                        };
2189
2190                        for subresource_access in accesses {
2191                            let &SubresourceAccess {
2192                                subresource: Subresource::Buffer(access_range),
2193                                ..
2194                            } = subresource_access
2195                            else {
2196                                #[cfg(debug_assertions)]
2197                                unreachable!();
2198
2199                                #[cfg(not(debug_assertions))]
2200                                unsafe {
2201                                    // This cannot be reached because PassRef enforces the subrange is
2202                                    // of type N::Subresource where N is the image node type
2203                                    unreachable_unchecked()
2204                                }
2205                            };
2206
2207                            for _ in Buffer::access(buffer, AccessType::Nothing, access_range) {}
2208                        }
2209                    }
2210                    Binding::Image(..) | Binding::ImageLease(..) | Binding::SwapchainImage(..) => {
2211                        let Some(image) = binding.as_driver_image() else {
2212                            #[cfg(debug_assertions)]
2213                            unreachable!();
2214
2215                            #[cfg(not(debug_assertions))]
2216                            unsafe {
2217                                unreachable_unchecked()
2218                            }
2219                        };
2220
2221                        let initial_layout = tls
2222                            .initial_layouts
2223                            .entry(node_idx)
2224                            .or_insert_with(|| ImageAccess::new(image.info, true));
2225
2226                        for subresource_access in accesses {
2227                            let &SubresourceAccess {
2228                                access,
2229                                subresource: Subresource::Image(access_range),
2230                            } = subresource_access
2231                            else {
2232                                #[cfg(debug_assertions)]
2233                                unreachable!();
2234
2235                                #[cfg(not(debug_assertions))]
2236                                unsafe {
2237                                    // This cannot be reached because PassRef enforces the subrange is
2238                                    // of type N::Subresource where N is the image node type
2239                                    unreachable_unchecked()
2240                                }
2241                            };
2242
2243                            for (initial_layout, layout_range) in
2244                                initial_layout.access(false, access_range)
2245                            {
2246                                for (prev_access, range) in
2247                                    Image::access(image, access, layout_range)
2248                                {
2249                                    if initial_layout {
2250                                        tls.images.push(ImageResourceBarrier {
2251                                            image: **image,
2252                                            next_access: initial_image_layout_access(access),
2253                                            prev_access,
2254                                            range,
2255                                        });
2256                                    }
2257                                }
2258                            }
2259                        }
2260                    }
2261                }
2262            }
2263
2264            let image_barriers = tls.images.iter().map(
2265                |ImageResourceBarrier {
2266                     image,
2267                     next_access,
2268                     prev_access,
2269                     range,
2270                 }| {
2271                    trace!(
2272                        "    image {:?} {:?} {:?}->{:?}",
2273                        image,
2274                        ImageSubresourceRangeDebug(*range),
2275                        prev_access,
2276                        next_access,
2277                    );
2278
2279                    // Color Attachment Read/Write (blending) will prevent discarding contents.
2280                    // Note that we must check "not-read" because some reads write!
2281                    let discard_contents =
2282                        *prev_access == AccessType::Nothing || !is_read_access(*next_access);
2283
2284                    ImageBarrier {
2285                        next_accesses: from_ref(next_access),
2286                        next_layout: image_access_layout(*next_access),
2287                        previous_accesses: from_ref(prev_access),
2288                        previous_layout: image_access_layout(*prev_access),
2289                        discard_contents,
2290                        src_queue_family_index: vk::QUEUE_FAMILY_IGNORED,
2291                        dst_queue_family_index: vk::QUEUE_FAMILY_IGNORED,
2292                        image: *image,
2293                        range: *range,
2294                    }
2295                },
2296            );
2297
2298            pipeline_barrier(
2299                &cmd_buf.device,
2300                **cmd_buf,
2301                None,
2302                &[],
2303                &image_barriers.collect::<Box<_>>(),
2304            );
2305        });
2306    }
2307
2308    /// Records any pending render graph passes that are required by the given node, but does not
2309    /// record any passes that actually contain the given node.
2310    ///
2311    /// As a side effect, the graph is optimized for the given node. Future calls may further optimize
2312    /// the graph, but only on top of the existing optimizations. This only matters if you are pulling
2313    /// multiple images out and you care - in that case pull the "most important" image first.
2314    #[profiling::function]
2315    pub fn record_node_dependencies<P>(
2316        &mut self,
2317        pool: &mut P,
2318        cmd_buf: &mut CommandBuffer,
2319        node: impl Node,
2320    ) -> Result<(), DriverError>
2321    where
2322        P: Pool<DescriptorPoolInfo, DescriptorPool> + Pool<RenderPassInfo, RenderPass>,
2323    {
2324        let node_idx = node.index();
2325
2326        debug_assert!(self.graph.bindings.get(node_idx).is_some());
2327
2328        // We record up to but not including the first pass which accesses the target node
2329        if let Some(end_pass_idx) = self.graph.first_node_access_pass_index(node) {
2330            self.record_node_passes(pool, cmd_buf, node_idx, end_pass_idx)?;
2331        }
2332
2333        Ok(())
2334    }
2335
2336    /// Records any pending render graph passes that the given node requires.
2337    #[profiling::function]
2338    pub fn record_node<P>(
2339        &mut self,
2340        pool: &mut P,
2341        cmd_buf: &mut CommandBuffer,
2342        node: impl Node,
2343    ) -> Result<(), DriverError>
2344    where
2345        P: Pool<DescriptorPoolInfo, DescriptorPool> + Pool<RenderPassInfo, RenderPass>,
2346    {
2347        let node_idx = node.index();
2348
2349        debug_assert!(self.graph.bindings.get(node_idx).is_some());
2350
2351        if self.graph.passes.is_empty() {
2352            return Ok(());
2353        }
2354
2355        let end_pass_idx = self.graph.passes.len();
2356        self.record_node_passes(pool, cmd_buf, node_idx, end_pass_idx)
2357    }
2358
2359    #[profiling::function]
2360    fn record_node_passes<P>(
2361        &mut self,
2362        pool: &mut P,
2363        cmd_buf: &mut CommandBuffer,
2364        node_idx: usize,
2365        end_pass_idx: usize,
2366    ) -> Result<(), DriverError>
2367    where
2368        P: Pool<DescriptorPoolInfo, DescriptorPool> + Pool<RenderPassInfo, RenderPass>,
2369    {
2370        thread_local! {
2371            static SCHEDULE: RefCell<Schedule> = Default::default();
2372        }
2373
2374        SCHEDULE.with_borrow_mut(|schedule| {
2375            schedule.access_cache.update(&self.graph, end_pass_idx);
2376            schedule.passes.clear();
2377
2378            self.schedule_node_passes(node_idx, end_pass_idx, schedule);
2379            self.record_scheduled_passes(pool, cmd_buf, schedule, end_pass_idx)
2380        })
2381    }
2382
2383    #[profiling::function]
2384    fn record_scheduled_passes<P>(
2385        &mut self,
2386        pool: &mut P,
2387        cmd_buf: &mut CommandBuffer,
2388        schedule: &mut Schedule,
2389        end_pass_idx: usize,
2390    ) -> Result<(), DriverError>
2391    where
2392        P: Pool<DescriptorPoolInfo, DescriptorPool> + Pool<RenderPassInfo, RenderPass>,
2393    {
2394        if schedule.passes.is_empty() {
2395            return Ok(());
2396        }
2397
2398        // Print some handy details or hit a breakpoint if you set the flag
2399        #[cfg(debug_assertions)]
2400        if log_enabled!(Debug) && self.graph.debug {
2401            debug!("resolving the following graph:\n\n{:#?}\n\n", self.graph);
2402        }
2403
2404        debug_assert!(
2405            schedule.passes.windows(2).all(|w| w[0] <= w[1]),
2406            "Unsorted schedule"
2407        );
2408
2409        // Optimize the schedule; leasing the required stuff it needs
2410        Self::reorder_scheduled_passes(schedule, end_pass_idx);
2411        self.merge_scheduled_passes(&mut schedule.passes);
2412        self.lease_scheduled_resources(pool, &schedule.passes)?;
2413
2414        for pass_idx in schedule.passes.iter().copied() {
2415            let pass = &mut self.graph.passes[pass_idx];
2416
2417            profiling::scope!("Pass", &pass.name);
2418
2419            let physical_pass = &mut self.physical_passes[pass_idx];
2420            let is_graphic = physical_pass.render_pass.is_some();
2421
2422            trace!("recording pass [{}: {}]", pass_idx, pass.name);
2423
2424            if !physical_pass.exec_descriptor_sets.is_empty() {
2425                Self::write_descriptor_sets(cmd_buf, &self.graph.bindings, pass, physical_pass)?;
2426            }
2427
2428            let render_area = if is_graphic {
2429                Self::record_image_layout_transitions(cmd_buf, &mut self.graph.bindings, pass);
2430
2431                let render_area = Self::render_area(&self.graph.bindings, pass);
2432
2433                Self::begin_render_pass(
2434                    cmd_buf,
2435                    &self.graph.bindings,
2436                    pass,
2437                    physical_pass,
2438                    render_area,
2439                )?;
2440
2441                Some(render_area)
2442            } else {
2443                None
2444            };
2445
2446            for exec_idx in 0..pass.execs.len() {
2447                let render_area = is_graphic.then(|| {
2448                    pass.execs[exec_idx]
2449                        .render_area
2450                        .unwrap_or(render_area.unwrap())
2451                });
2452
2453                let exec = &mut pass.execs[exec_idx];
2454
2455                if is_graphic && exec_idx > 0 {
2456                    Self::next_subpass(cmd_buf);
2457                }
2458
2459                if let Some(pipeline) = exec.pipeline.as_mut() {
2460                    Self::bind_pipeline(
2461                        cmd_buf,
2462                        physical_pass,
2463                        exec_idx,
2464                        pipeline,
2465                        exec.depth_stencil,
2466                    )?;
2467
2468                    if is_graphic {
2469                        let render_area = render_area.unwrap();
2470
2471                        // In this case we set the viewport and scissor for the user
2472                        Self::set_viewport(
2473                            cmd_buf,
2474                            render_area.x as _,
2475                            render_area.y as _,
2476                            render_area.width as _,
2477                            render_area.height as _,
2478                            exec.depth_stencil
2479                                .map(|depth_stencil| {
2480                                    let min = depth_stencil.min.0;
2481                                    let max = depth_stencil.max.0;
2482                                    min..max
2483                                })
2484                                .unwrap_or(0.0..1.0),
2485                        );
2486                        Self::set_scissor(
2487                            cmd_buf,
2488                            render_area.x,
2489                            render_area.y,
2490                            render_area.width,
2491                            render_area.height,
2492                        );
2493                    }
2494
2495                    Self::bind_descriptor_sets(cmd_buf, pipeline, physical_pass, exec_idx);
2496                }
2497
2498                if !is_graphic {
2499                    Self::record_execution_barriers(
2500                        cmd_buf,
2501                        &mut self.graph.bindings,
2502                        exec.accesses.iter(),
2503                    );
2504                }
2505
2506                trace!("    > exec[{exec_idx}]");
2507
2508                {
2509                    profiling::scope!("Execute callback");
2510
2511                    let exec_func = exec.func.take().unwrap().0;
2512                    exec_func(
2513                        &cmd_buf.device,
2514                        **cmd_buf,
2515                        Bindings::new(&self.graph.bindings, exec),
2516                    );
2517                }
2518            }
2519
2520            if is_graphic {
2521                self.end_render_pass(cmd_buf);
2522            }
2523        }
2524
2525        thread_local! {
2526            static PASSES: RefCell<Vec<Pass>> = Default::default();
2527        }
2528
2529        PASSES.with_borrow_mut(|passes| {
2530            debug_assert!(passes.is_empty());
2531
2532            // We have to keep the bindings and pipelines alive until the gpu is done
2533            schedule.passes.sort_unstable();
2534            while let Some(schedule_idx) = schedule.passes.pop() {
2535                debug_assert!(!self.graph.passes.is_empty());
2536
2537                while let Some(pass) = self.graph.passes.pop() {
2538                    let pass_idx = self.graph.passes.len();
2539
2540                    if pass_idx == schedule_idx {
2541                        // This was a scheduled pass - store it!
2542                        CommandBuffer::push_fenced_drop(
2543                            cmd_buf,
2544                            (pass, self.physical_passes.pop().unwrap()),
2545                        );
2546                        break;
2547                    } else {
2548                        debug_assert!(pass_idx > schedule_idx);
2549
2550                        passes.push(pass);
2551                    }
2552                }
2553            }
2554
2555            debug_assert!(self.physical_passes.is_empty());
2556
2557            // Put the other passes back for future resolves
2558            self.graph.passes.extend(passes.drain(..).rev());
2559        });
2560
2561        log::trace!("Recorded passes");
2562
2563        Ok(())
2564    }
2565
2566    /// Records any pending render graph passes that have not been previously scheduled.
2567    #[profiling::function]
2568    pub fn record_unscheduled_passes<P>(
2569        &mut self,
2570        pool: &mut P,
2571        cmd_buf: &mut CommandBuffer,
2572    ) -> Result<(), DriverError>
2573    where
2574        P: Pool<DescriptorPoolInfo, DescriptorPool> + Pool<RenderPassInfo, RenderPass>,
2575    {
2576        if self.graph.passes.is_empty() {
2577            return Ok(());
2578        }
2579
2580        thread_local! {
2581            static SCHEDULE: RefCell<Schedule> = Default::default();
2582        }
2583
2584        SCHEDULE.with_borrow_mut(|schedule| {
2585            schedule
2586                .access_cache
2587                .update(&self.graph, self.graph.passes.len());
2588            schedule.passes.clear();
2589            schedule.passes.extend(0..self.graph.passes.len());
2590
2591            self.record_scheduled_passes(pool, cmd_buf, schedule, self.graph.passes.len())
2592        })
2593    }
2594
2595    #[profiling::function]
2596    fn render_area(bindings: &[Binding], pass: &Pass) -> Area {
2597        // set_render_area was not specified so we're going to guess using the minimum common
2598        // attachment extents
2599        let first_exec = pass.execs.first().unwrap();
2600
2601        // We must be able to find the render area because render passes require at least one
2602        // image to be attached
2603        let (mut width, mut height) = (u32::MAX, u32::MAX);
2604        for (attachment_width, attachment_height) in first_exec
2605            .color_clears
2606            .values()
2607            .copied()
2608            .map(|(attachment, _)| attachment)
2609            .chain(first_exec.color_loads.values().copied())
2610            .chain(first_exec.color_stores.values().copied())
2611            .chain(
2612                first_exec
2613                    .depth_stencil_clear
2614                    .map(|(attachment, _)| attachment),
2615            )
2616            .chain(first_exec.depth_stencil_load)
2617            .chain(first_exec.depth_stencil_store)
2618            .map(|attachment| {
2619                let info = bindings[attachment.target].as_driver_image().unwrap().info;
2620
2621                (
2622                    info.width >> attachment.base_mip_level,
2623                    info.height >> attachment.base_mip_level,
2624                )
2625            })
2626        {
2627            width = width.min(attachment_width);
2628            height = height.min(attachment_height);
2629        }
2630
2631        Area {
2632            height,
2633            width,
2634            x: 0,
2635            y: 0,
2636        }
2637    }
2638
2639    #[profiling::function]
2640    fn reorder_scheduled_passes(schedule: &mut Schedule, end_pass_idx: usize) {
2641        // It must be a party
2642        if schedule.passes.len() < 3 {
2643            return;
2644        }
2645
2646        let mut scheduled = 0;
2647
2648        thread_local! {
2649            static UNSCHEDULED: RefCell<Vec<bool>> = Default::default();
2650        }
2651
2652        UNSCHEDULED.with_borrow_mut(|unscheduled| {
2653            unscheduled.truncate(end_pass_idx);
2654            unscheduled.fill(true);
2655            unscheduled.resize(end_pass_idx, true);
2656
2657            // Re-order passes by maximizing the distance between dependent nodes
2658            while scheduled < schedule.passes.len() {
2659                let mut best_idx = scheduled;
2660                let pass_idx = schedule.passes[best_idx];
2661                let mut best_overlap_factor = schedule
2662                    .access_cache
2663                    .interdependent_passes(pass_idx, end_pass_idx)
2664                    .count();
2665
2666                for (idx, pass_idx) in schedule.passes[best_idx + 1..schedule.passes.len()]
2667                    .iter()
2668                    .enumerate()
2669                {
2670                    let mut overlap_factor = 0;
2671
2672                    for other_pass_idx in schedule
2673                        .access_cache
2674                        .interdependent_passes(*pass_idx, end_pass_idx)
2675                    {
2676                        if unscheduled[other_pass_idx] {
2677                            // This pass can't be the candidate because it depends on unfinished work
2678                            break;
2679                        }
2680
2681                        overlap_factor += 1;
2682                    }
2683
2684                    if overlap_factor > best_overlap_factor {
2685                        best_idx += idx + 1;
2686                        best_overlap_factor = overlap_factor;
2687                    }
2688                }
2689
2690                unscheduled[schedule.passes[best_idx]] = false;
2691                schedule.passes.swap(scheduled, best_idx);
2692                scheduled += 1;
2693            }
2694        });
2695    }
2696
2697    /// Returns a vec of pass indexes that are required to be executed, in order, for the given
2698    /// node.
2699    #[profiling::function]
2700    fn schedule_node_passes(&self, node_idx: usize, end_pass_idx: usize, schedule: &mut Schedule) {
2701        type UnscheduledUnresolvedUnchecked = (Vec<bool>, Vec<bool>, VecDeque<(usize, usize)>);
2702
2703        thread_local! {
2704            static UNSCHEDULED_UNRESOLVED_UNCHECKED: RefCell<UnscheduledUnresolvedUnchecked> = Default::default();
2705        }
2706
2707        UNSCHEDULED_UNRESOLVED_UNCHECKED.with_borrow_mut(|(unscheduled, unresolved, unchecked)| {
2708            unscheduled.truncate(end_pass_idx);
2709            unscheduled.fill(true);
2710            unscheduled.resize(end_pass_idx, true);
2711
2712            unresolved.truncate(self.graph.bindings.len());
2713            unresolved.fill(true);
2714            unresolved.resize(self.graph.bindings.len(), true);
2715
2716            debug_assert!(unchecked.is_empty());
2717
2718            trace!("scheduling node {node_idx}");
2719
2720            unresolved[node_idx] = false;
2721
2722            // Schedule the first set of passes for the node we're trying to resolve
2723            for pass_idx in schedule
2724                .access_cache
2725                .dependent_passes(node_idx, end_pass_idx)
2726            {
2727                trace!(
2728                    "  pass [{pass_idx}: {}] is dependent",
2729                    self.graph.passes[pass_idx].name
2730                );
2731
2732                debug_assert!(unscheduled[pass_idx]);
2733
2734                unscheduled[pass_idx] = false;
2735                schedule.passes.push(pass_idx);
2736
2737                for node_idx in schedule.access_cache.dependent_nodes(pass_idx) {
2738                    trace!("    node {node_idx} is dependent");
2739
2740                    let unresolved = &mut unresolved[node_idx];
2741                    if *unresolved {
2742                        *unresolved = false;
2743                        unchecked.push_back((node_idx, pass_idx));
2744                    }
2745                }
2746            }
2747
2748            trace!("secondary passes below");
2749
2750            // Now schedule all nodes that are required, going through the tree to find them
2751            while let Some((node_idx, pass_idx)) = unchecked.pop_front() {
2752                trace!("  node {node_idx} is dependent");
2753
2754                for pass_idx in schedule
2755                    .access_cache
2756                    .dependent_passes(node_idx, pass_idx + 1)
2757                {
2758                    let unscheduled = &mut unscheduled[pass_idx];
2759                    if *unscheduled {
2760                        *unscheduled = false;
2761                        schedule.passes.push(pass_idx);
2762
2763                        trace!(
2764                            "  pass [{pass_idx}: {}] is dependent",
2765                            self.graph.passes[pass_idx].name
2766                        );
2767
2768                        for node_idx in schedule.access_cache.dependent_nodes(pass_idx) {
2769                            trace!("    node {node_idx} is dependent");
2770
2771                            let unresolved = &mut unresolved[node_idx];
2772                            if *unresolved {
2773                                *unresolved = false;
2774                                unchecked.push_back((node_idx, pass_idx));
2775                            }
2776                        }
2777                    }
2778                }
2779            }
2780
2781            schedule.passes.sort_unstable();
2782
2783            if log_enabled!(Debug) {
2784                if !schedule.passes.is_empty() {
2785                    // These are the indexes of the passes this thread is about to resolve
2786                    debug!(
2787                        "schedule: {}",
2788                        schedule
2789                            .passes
2790                            .iter()
2791                            .copied()
2792                            .map(|idx| format!("[{}: {}]", idx, self.graph.passes[idx].name))
2793                            .collect::<Vec<_>>()
2794                            .join(", ")
2795                    );
2796                }
2797
2798                if log_enabled!(Trace) {
2799                    let unscheduled = (0..end_pass_idx)
2800                        .filter(|&pass_idx| unscheduled[pass_idx])
2801                        .collect::<Box<_>>();
2802
2803                    if !unscheduled.is_empty() {
2804                        // These passes are within the range of passes we thought we had to do
2805                        // right now, but it turns out that nothing in "schedule" relies on them
2806                        trace!(
2807                            "delaying: {}",
2808                            unscheduled
2809                                .iter()
2810                                .copied()
2811                                .map(|idx| format!("[{}: {}]", idx, self.graph.passes[idx].name))
2812                                .collect::<Vec<_>>()
2813                                .join(", ")
2814                        );
2815                    }
2816
2817                    if end_pass_idx < self.graph.passes.len() {
2818                        // These passes existing on the graph but are not being considered right
2819                        // now because we've been told to stop work at the "end_pass_idx" point
2820                        trace!(
2821                            "ignoring: {}",
2822                            self.graph.passes[end_pass_idx..]
2823                                .iter()
2824                                .enumerate()
2825                                .map(|(idx, pass)| format!(
2826                                    "[{}: {}]",
2827                                    idx + end_pass_idx,
2828                                    pass.name
2829                                ))
2830                                .collect::<Vec<_>>()
2831                                .join(", ")
2832                        );
2833                    }
2834                }
2835            }
2836        });
2837    }
2838
2839    fn set_scissor(cmd_buf: &CommandBuffer, x: i32, y: i32, width: u32, height: u32) {
2840        use std::slice::from_ref;
2841
2842        unsafe {
2843            cmd_buf.device.cmd_set_scissor(
2844                **cmd_buf,
2845                0,
2846                from_ref(&vk::Rect2D {
2847                    extent: vk::Extent2D { width, height },
2848                    offset: vk::Offset2D { x, y },
2849                }),
2850            );
2851        }
2852    }
2853
2854    fn set_viewport(
2855        cmd_buf: &CommandBuffer,
2856        x: f32,
2857        y: f32,
2858        width: f32,
2859        height: f32,
2860        depth: Range<f32>,
2861    ) {
2862        use std::slice::from_ref;
2863
2864        unsafe {
2865            cmd_buf.device.cmd_set_viewport(
2866                **cmd_buf,
2867                0,
2868                from_ref(&vk::Viewport {
2869                    x,
2870                    y,
2871                    width,
2872                    height,
2873                    min_depth: depth.start,
2874                    max_depth: depth.end,
2875                }),
2876            );
2877        }
2878    }
2879
2880    /// Submits the remaining commands stored in this instance.
2881    #[profiling::function]
2882    pub fn submit<P>(
2883        mut self,
2884        pool: &mut P,
2885        queue_family_index: usize,
2886        queue_index: usize,
2887    ) -> Result<Lease<CommandBuffer>, DriverError>
2888    where
2889        P: Pool<CommandBufferInfo, CommandBuffer>
2890            + Pool<DescriptorPoolInfo, DescriptorPool>
2891            + Pool<RenderPassInfo, RenderPass>,
2892    {
2893        use std::slice::from_ref;
2894
2895        trace!("submit");
2896
2897        let mut cmd_buf = pool.lease(CommandBufferInfo::new(queue_family_index as _))?;
2898
2899        debug_assert!(
2900            queue_family_index < cmd_buf.device.physical_device.queue_families.len(),
2901            "Queue family index must be within the range of the available queues created by the device."
2902        );
2903        debug_assert!(
2904            queue_index
2905                < cmd_buf.device.physical_device.queue_families[queue_family_index].queue_count
2906                    as usize,
2907            "Queue index must be within the range of the available queues created by the device."
2908        );
2909
2910        CommandBuffer::wait_until_executed(&mut cmd_buf)?;
2911
2912        unsafe {
2913            cmd_buf
2914                .device
2915                .begin_command_buffer(
2916                    **cmd_buf,
2917                    &vk::CommandBufferBeginInfo::default()
2918                        .flags(vk::CommandBufferUsageFlags::ONE_TIME_SUBMIT),
2919                )
2920                .map_err(|_| DriverError::OutOfMemory)?;
2921        }
2922
2923        self.record_unscheduled_passes(pool, &mut cmd_buf)?;
2924
2925        unsafe {
2926            cmd_buf
2927                .device
2928                .end_command_buffer(**cmd_buf)
2929                .map_err(|_| DriverError::OutOfMemory)?;
2930            cmd_buf
2931                .device
2932                .reset_fences(from_ref(&cmd_buf.fence))
2933                .map_err(|_| DriverError::OutOfMemory)?;
2934            cmd_buf
2935                .device
2936                .queue_submit(
2937                    cmd_buf.device.queues[queue_family_index][queue_index],
2938                    from_ref(&vk::SubmitInfo::default().command_buffers(from_ref(&cmd_buf))),
2939                    cmd_buf.fence,
2940                )
2941                .map_err(|_| DriverError::OutOfMemory)?;
2942        }
2943
2944        cmd_buf.waiting = true;
2945
2946        // This graph contains references to buffers, images, and other resources which must be kept
2947        // alive until this graph execution completes on the GPU. Once those references are dropped
2948        // they will return to the pool for other things to use. The drop will happen the next time
2949        // someone tries to lease a command buffer and we notice this one has returned and the fence
2950        // has been signalled.
2951        CommandBuffer::push_fenced_drop(&mut cmd_buf, self);
2952
2953        Ok(cmd_buf)
2954    }
2955
2956    pub(crate) fn swapchain_image(&mut self, node: SwapchainImageNode) -> &SwapchainImage {
2957        let Some(swapchain_image) = self.graph.bindings[node.idx].as_swapchain_image() else {
2958            panic!("invalid swapchain image node");
2959        };
2960
2961        swapchain_image
2962    }
2963
2964    #[profiling::function]
2965    fn write_descriptor_sets(
2966        cmd_buf: &CommandBuffer,
2967        bindings: &[Binding],
2968        pass: &Pass,
2969        physical_pass: &PhysicalPass,
2970    ) -> Result<(), DriverError> {
2971        struct IndexWrite<'a> {
2972            idx: usize,
2973            write: vk::WriteDescriptorSet<'a>,
2974        }
2975
2976        #[derive(Default)]
2977        struct Tls<'a> {
2978            accel_struct_infos: Vec<vk::WriteDescriptorSetAccelerationStructureKHR<'a>>,
2979            accel_struct_writes: Vec<IndexWrite<'a>>,
2980            buffer_infos: Vec<vk::DescriptorBufferInfo>,
2981            buffer_writes: Vec<IndexWrite<'a>>,
2982            descriptors: Vec<vk::WriteDescriptorSet<'a>>,
2983            image_infos: Vec<vk::DescriptorImageInfo>,
2984            image_writes: Vec<IndexWrite<'a>>,
2985        }
2986
2987        let mut tls = Tls::default();
2988
2989        for (exec_idx, exec, pipeline) in pass
2990            .execs
2991            .iter()
2992            .enumerate()
2993            .filter_map(|(exec_idx, exec)| {
2994                exec.pipeline
2995                    .as_ref()
2996                    .map(|pipeline| (exec_idx, exec, pipeline))
2997            })
2998            .filter(|(.., pipeline)| !pipeline.descriptor_info().layouts.is_empty())
2999        {
3000            let descriptor_sets = &physical_pass.exec_descriptor_sets[&exec_idx];
3001
3002            // Write the manually bound things (access, read, and write functions)
3003            for (descriptor, (node_idx, view_info)) in exec.bindings.iter() {
3004                let (descriptor_set_idx, dst_binding, binding_offset) = descriptor.into_tuple();
3005                let (descriptor_info, _) = pipeline
3006                        .descriptor_bindings()
3007                        .get(&Descriptor { set: descriptor_set_idx, binding: dst_binding })
3008                        .unwrap_or_else(|| panic!("descriptor {descriptor_set_idx}.{dst_binding}[{binding_offset}] specified in recorded execution of pass \"{}\" was not discovered through shader reflection", &pass.name));
3009                let descriptor_type = descriptor_info.descriptor_type();
3010                let bound_node = &bindings[*node_idx];
3011                if let Some(image) = bound_node.as_driver_image() {
3012                    let view_info = view_info.as_ref().unwrap();
3013                    let mut image_view_info = *view_info.as_image().unwrap();
3014
3015                    // Handle default views which did not specify a particaular aspect
3016                    if image_view_info.aspect_mask.is_empty() {
3017                        image_view_info.aspect_mask = format_aspect_mask(image.info.fmt);
3018                    }
3019
3020                    let image_view = Image::view(image, image_view_info)?;
3021                    let image_layout = match descriptor_type {
3022                        vk::DescriptorType::COMBINED_IMAGE_SAMPLER
3023                        | vk::DescriptorType::SAMPLED_IMAGE => {
3024                            if image_view_info.aspect_mask.contains(
3025                                vk::ImageAspectFlags::DEPTH | vk::ImageAspectFlags::STENCIL,
3026                            ) {
3027                                vk::ImageLayout::DEPTH_STENCIL_READ_ONLY_OPTIMAL
3028                            } else if image_view_info
3029                                .aspect_mask
3030                                .contains(vk::ImageAspectFlags::DEPTH)
3031                            {
3032                                vk::ImageLayout::DEPTH_READ_ONLY_OPTIMAL
3033                            } else if image_view_info
3034                                .aspect_mask
3035                                .contains(vk::ImageAspectFlags::STENCIL)
3036                            {
3037                                vk::ImageLayout::STENCIL_READ_ONLY_OPTIMAL
3038                            } else {
3039                                vk::ImageLayout::SHADER_READ_ONLY_OPTIMAL
3040                            }
3041                        }
3042                        vk::DescriptorType::STORAGE_IMAGE => vk::ImageLayout::GENERAL,
3043                        _ => unimplemented!("{descriptor_type:?}"),
3044                    };
3045
3046                    if binding_offset == 0 {
3047                        tls.image_writes.push(IndexWrite {
3048                            idx: tls.image_infos.len(),
3049                            write: vk::WriteDescriptorSet {
3050                                dst_set: *descriptor_sets[descriptor_set_idx as usize],
3051                                dst_binding,
3052                                descriptor_type,
3053                                descriptor_count: 1,
3054                                ..Default::default()
3055                            },
3056                        });
3057                    } else {
3058                        tls.image_writes.last_mut().unwrap().write.descriptor_count += 1;
3059                    }
3060
3061                    tls.image_infos.push(
3062                        vk::DescriptorImageInfo::default()
3063                            .image_layout(image_layout)
3064                            .image_view(image_view),
3065                    );
3066                } else if let Some(buffer) = bound_node.as_driver_buffer() {
3067                    let view_info = view_info.as_ref().unwrap();
3068                    let buffer_view_info = view_info.as_buffer().unwrap();
3069
3070                    if binding_offset == 0 {
3071                        tls.buffer_writes.push(IndexWrite {
3072                            idx: tls.buffer_infos.len(),
3073                            write: vk::WriteDescriptorSet {
3074                                dst_set: *descriptor_sets[descriptor_set_idx as usize],
3075                                dst_binding,
3076                                descriptor_type,
3077                                descriptor_count: 1,
3078                                ..Default::default()
3079                            },
3080                        });
3081                    } else {
3082                        tls.buffer_writes.last_mut().unwrap().write.descriptor_count += 1;
3083                    }
3084
3085                    tls.buffer_infos.push(
3086                        vk::DescriptorBufferInfo::default()
3087                            .buffer(**buffer)
3088                            .offset(buffer_view_info.start)
3089                            .range(buffer_view_info.end - buffer_view_info.start),
3090                    );
3091                } else if let Some(accel_struct) = bound_node.as_driver_acceleration_structure() {
3092                    if binding_offset == 0 {
3093                        tls.accel_struct_writes.push(IndexWrite {
3094                            idx: tls.accel_struct_infos.len(),
3095                            write: vk::WriteDescriptorSet::default()
3096                                .dst_set(*descriptor_sets[descriptor_set_idx as usize])
3097                                .dst_binding(dst_binding)
3098                                .descriptor_type(descriptor_type)
3099                                .descriptor_count(1),
3100                        });
3101                    } else {
3102                        tls.accel_struct_writes
3103                            .last_mut()
3104                            .unwrap()
3105                            .write
3106                            .descriptor_count += 1;
3107                    }
3108
3109                    tls.accel_struct_infos.push(
3110                        vk::WriteDescriptorSetAccelerationStructureKHR::default()
3111                            .acceleration_structures(std::slice::from_ref(accel_struct)),
3112                    );
3113                } else {
3114                    unimplemented!();
3115                }
3116            }
3117
3118            if let ExecutionPipeline::Graphic(pipeline) = pipeline {
3119                // Write graphic render pass input attachments (they're automatic)
3120                if exec_idx > 0 {
3121                    for (
3122                        &Descriptor {
3123                            set: descriptor_set_idx,
3124                            binding: dst_binding,
3125                        },
3126                        (descriptor_info, _),
3127                    ) in &pipeline.descriptor_bindings
3128                    {
3129                        if let DescriptorInfo::InputAttachment(_, attachment_idx) = *descriptor_info
3130                        {
3131                            let is_random_access = exec.color_stores.contains_key(&attachment_idx)
3132                                || exec.color_resolves.contains_key(&attachment_idx);
3133                            let (attachment, write_exec) = pass.execs[0..exec_idx]
3134                                .iter()
3135                                .rev()
3136                                .find_map(|exec| {
3137                                    exec.color_stores
3138                                        .get(&attachment_idx)
3139                                        .copied()
3140                                        .map(|attachment| (attachment, exec))
3141                                        .or_else(|| {
3142                                            exec.color_resolves.get(&attachment_idx).map(
3143                                                |(resolved_attachment, _)| {
3144                                                    (*resolved_attachment, exec)
3145                                                },
3146                                            )
3147                                        })
3148                                })
3149                                .expect("input attachment not written");
3150                            let late = &write_exec.accesses[&attachment.target].last().unwrap();
3151                            let image_range = late.subresource.as_image().unwrap();
3152                            let image_binding = &bindings[attachment.target];
3153                            let image = image_binding.as_driver_image().unwrap();
3154                            let image_view_info = attachment
3155                                .image_view_info(image.info)
3156                                .to_builder()
3157                                .array_layer_count(image_range.layer_count)
3158                                .base_array_layer(image_range.base_array_layer)
3159                                .base_mip_level(image_range.base_mip_level)
3160                                .mip_level_count(image_range.level_count)
3161                                .build();
3162                            let image_view = Image::view(image, image_view_info)?;
3163
3164                            tls.image_writes.push(IndexWrite {
3165                                idx: tls.image_infos.len(),
3166                                write: vk::WriteDescriptorSet {
3167                                    dst_set: *descriptor_sets[descriptor_set_idx as usize],
3168                                    dst_binding,
3169                                    descriptor_type: vk::DescriptorType::INPUT_ATTACHMENT,
3170                                    descriptor_count: 1,
3171                                    ..Default::default()
3172                                },
3173                            });
3174
3175                            tls.image_infos.push(vk::DescriptorImageInfo {
3176                                image_layout: Self::attachment_layout(
3177                                    attachment.aspect_mask,
3178                                    is_random_access,
3179                                    true,
3180                                ),
3181                                image_view,
3182                                sampler: vk::Sampler::null(),
3183                            });
3184                        }
3185                    }
3186                }
3187            }
3188        }
3189
3190        // NOTE: We assign the below pointers after the above insertions so they remain stable!
3191
3192        tls.descriptors
3193            .extend(tls.accel_struct_writes.drain(..).map(
3194                |IndexWrite { idx, mut write }| unsafe {
3195                    write.p_next = tls.accel_struct_infos.as_ptr().add(idx) as *const _;
3196                    write
3197                },
3198            ));
3199        tls.descriptors.extend(tls.buffer_writes.drain(..).map(
3200            |IndexWrite { idx, mut write }| unsafe {
3201                write.p_buffer_info = tls.buffer_infos.as_ptr().add(idx);
3202                write
3203            },
3204        ));
3205        tls.descriptors.extend(tls.image_writes.drain(..).map(
3206            |IndexWrite { idx, mut write }| unsafe {
3207                write.p_image_info = tls.image_infos.as_ptr().add(idx);
3208                write
3209            },
3210        ));
3211
3212        if !tls.descriptors.is_empty() {
3213            trace!(
3214                "  writing {} descriptors ({} buffers, {} images)",
3215                tls.descriptors.len(),
3216                tls.buffer_infos.len(),
3217                tls.image_infos.len()
3218            );
3219
3220            unsafe {
3221                cmd_buf
3222                    .device
3223                    .update_descriptor_sets(tls.descriptors.as_slice(), &[]);
3224            }
3225        }
3226
3227        Ok(())
3228    }
3229}
3230
3231#[derive(Default)]
3232struct Schedule {
3233    access_cache: AccessCache,
3234    passes: Vec<usize>,
3235}