1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
use crate::INVALID_INDEX;
use super::Position;
#[derive(Default)]
pub struct VertexFetchStatistics {
pub bytes_fetched: u32,
pub overfetch: f32,
}
pub fn analyze_vertex_fetch(indices: &[u32], vertex_count: usize, vertex_size: usize) -> VertexFetchStatistics {
assert!(indices.len() % 3 == 0);
assert!(vertex_size > 0 && vertex_size <= 256);
let mut result = VertexFetchStatistics::default();
let mut vertex_visited = vec![false; vertex_count];
const CACHE_LINE: usize = 64;
const CACHE_SIZE: usize = 128 * 1024;
let mut cache = [0usize; CACHE_SIZE / CACHE_LINE];
for index in indices {
let index = *index as usize;
assert!(index < vertex_count);
vertex_visited[index] = true;
let start_address = index * vertex_size;
let end_address = start_address + vertex_size;
let start_tag = start_address / CACHE_LINE;
let end_tag = (end_address + CACHE_LINE - 1) / CACHE_LINE;
assert!(start_tag < end_tag);
for tag in start_tag..end_tag {
let line = tag % (CACHE_SIZE / CACHE_LINE);
result.bytes_fetched += (cache[line] != tag + 1) as u32 * CACHE_LINE as u32;
cache[line] = tag + 1;
}
}
let unique_vertex_count: usize = vertex_visited.iter().map(|v| *v as usize).sum();
result.overfetch = if unique_vertex_count == 0 {
0.0
} else {
result.bytes_fetched as f32 / (unique_vertex_count * vertex_size) as f32
};
result
}
pub fn optimize_vertex_fetch_remap(destination: &mut [u32], indices: &[u32]) -> usize {
assert!(indices.len() % 3 == 0);
destination[..].fill(INVALID_INDEX);
let mut next_vertex = 0;
for index in indices {
let index = *index as usize;
if destination[index] == INVALID_INDEX {
destination[index] = next_vertex as u32;
next_vertex += 1;
}
}
assert!(next_vertex <= destination.len());
next_vertex
}
pub fn optimize_vertex_fetch<Vertex>(destination: &mut [Vertex], indices: &mut [u32], vertices: &[Vertex]) -> usize
where
Vertex: Position + Copy,
{
assert!(indices.len() % 3 == 0);
let mut vertex_remap = vec![INVALID_INDEX; vertices.len()];
let mut next_vertex = 0;
for index in indices.iter_mut() {
let idx = *index as usize;
let remap = &mut vertex_remap[idx];
if *remap == INVALID_INDEX {
destination[next_vertex] = vertices[idx];
*remap = next_vertex as u32;
next_vertex += 1;
}
*index = *remap;
}
assert!(next_vertex <= vertices.len());
next_vertex
}