meshopt_rs/vertex/
fetch.rs

1//! Vertex fetch analysis and optimization
2
3use crate::INVALID_INDEX;
4
5use super::Position;
6
7#[derive(Default)]
8pub struct VertexFetchStatistics {
9    pub bytes_fetched: u32,
10    /// Fetched bytes / vertex buffer size
11    ///
12    /// Best case is 1.0 (each byte is fetched once)
13    pub overfetch: f32,
14}
15
16/// Returns cache hit statistics using a simplified direct mapped model.
17///
18/// Results may not match actual GPU performance.
19pub fn analyze_vertex_fetch(indices: &[u32], vertex_count: usize, vertex_size: usize) -> VertexFetchStatistics {
20    assert!(indices.len() % 3 == 0);
21    assert!(vertex_size > 0 && vertex_size <= 256);
22
23    let mut result = VertexFetchStatistics::default();
24
25    let mut vertex_visited = vec![false; vertex_count];
26
27    const CACHE_LINE: usize = 64;
28    const CACHE_SIZE: usize = 128 * 1024;
29
30    // simple direct mapped cache; on typical mesh data this is close to 4-way cache, and this model is a gross approximation anyway
31    let mut cache = [0usize; CACHE_SIZE / CACHE_LINE];
32
33    for index in indices {
34        let index = *index as usize;
35
36        assert!(index < vertex_count);
37
38        vertex_visited[index] = true;
39
40        let start_address = index * vertex_size;
41        let end_address = start_address + vertex_size;
42
43        let start_tag = start_address / CACHE_LINE;
44        let end_tag = (end_address + CACHE_LINE - 1) / CACHE_LINE;
45
46        assert!(start_tag < end_tag);
47
48        for tag in start_tag..end_tag {
49            let line = tag % (CACHE_SIZE / CACHE_LINE);
50
51            // we store +1 since cache is filled with 0 by default
52            result.bytes_fetched += (cache[line] != tag + 1) as u32 * CACHE_LINE as u32;
53            cache[line] = tag + 1;
54        }
55    }
56
57    let unique_vertex_count: usize = vertex_visited.iter().map(|v| *v as usize).sum();
58
59    result.overfetch = if unique_vertex_count == 0 {
60        0.0
61    } else {
62        result.bytes_fetched as f32 / (unique_vertex_count * vertex_size) as f32
63    };
64
65    result
66}
67
68/// Generates vertex remap to reduce the amount of GPU memory fetches during vertex processing.
69///
70/// Returns the number of unique vertices, which is the same as input vertex count unless some vertices are unused.
71/// The resulting remap table should be used to reorder vertex/index buffers using [remap_vertex_buffer](crate::index::generator::remap_vertex_buffer)/[remap_index_buffer](crate::index::generator::remap_index_buffer).
72///
73/// # Arguments
74///
75/// * `destination`: must contain the exact space for the resulting remap table (`vertex_count` elements)
76pub fn optimize_vertex_fetch_remap(destination: &mut [u32], indices: &[u32]) -> usize {
77    assert!(indices.len() % 3 == 0);
78
79    destination[..].fill(INVALID_INDEX);
80
81    let mut next_vertex = 0;
82
83    for index in indices {
84        let index = *index as usize;
85
86        if destination[index] == INVALID_INDEX {
87            destination[index] = next_vertex as u32;
88            next_vertex += 1;
89        }
90    }
91
92    assert!(next_vertex <= destination.len());
93
94    next_vertex
95}
96
97/// Reorders vertices and changes indices to reduce the amount of GPU memory fetches during vertex processing.
98///
99/// Returns the number of unique vertices, which is the same as input vertex count unless some vertices are unused.
100/// This functions works for a single vertex stream; for multiple vertex streams, use [optimize_vertex_fetch_remap] + [remap_vertex_buffer](crate::index::generator::remap_vertex_buffer) for each stream.
101///
102/// # Arguments
103///
104/// * `destination`: must contain enough space for the resulting vertex buffer (`vertices.len()` elements)
105pub fn optimize_vertex_fetch<Vertex>(destination: &mut [Vertex], indices: &mut [u32], vertices: &[Vertex]) -> usize
106where
107    Vertex: Position + Copy,
108{
109    assert!(indices.len() % 3 == 0);
110
111    // build vertex remap table
112    let mut vertex_remap = vec![INVALID_INDEX; vertices.len()];
113
114    let mut next_vertex = 0;
115
116    for index in indices.iter_mut() {
117        let idx = *index as usize;
118
119        let remap = &mut vertex_remap[idx];
120
121        if *remap == INVALID_INDEX {
122            // vertex was not added to destination VB
123            // add vertex
124            destination[next_vertex] = vertices[idx];
125
126            *remap = next_vertex as u32;
127            next_vertex += 1;
128        }
129
130        // modify indices in place
131        *index = *remap;
132    }
133
134    assert!(next_vertex <= vertices.len());
135
136    next_vertex
137}