1use crate::quantize::quantize_unorm;
4use crate::util::zero_inverse;
5use crate::vertex::{calc_pos_extents, Position};
6use crate::Vector3;
7
8const VIEWPORT: usize = 256;
9
10#[derive(Default)]
11pub struct OverdrawStatistics {
12 pub pixels_covered: u32,
13 pub pixels_shaded: u32,
14 pub overdraw: f32,
16}
17
18struct OverdrawBuffer {
19 z: [[[f32; 2]; VIEWPORT]; VIEWPORT],
20 overdraw: [[[u32; 2]; VIEWPORT]; VIEWPORT],
21}
22
23impl Default for OverdrawBuffer {
24 fn default() -> Self {
25 Self {
26 z: [[[0.0; 2]; VIEWPORT]; VIEWPORT],
27 overdraw: [[[0; 2]; VIEWPORT]; VIEWPORT],
28 }
29 }
30}
31
32fn compute_depth_gradients(v1: Vector3, v2: Vector3, v3: Vector3) -> (f32, f32, f32) {
33 let det = (v2.x - v1.x) * (v3.y - v1.y) - (v2.y - v1.y) * (v3.x - v1.x);
39 let invdet = zero_inverse(det);
40
41 let dzdx = (v2.z - v1.z) * (v3.y - v1.y) - (v2.y - v1.y) * (v3.z - v1.z) * invdet;
42 let dzdy = (v2.x - v1.x) * (v3.z - v1.z) - (v2.z - v1.z) * (v3.x - v1.x) * invdet;
43
44 (det, dzdx, dzdy)
45}
46
47fn rasterize(buffer: &mut OverdrawBuffer, mut v1: Vector3, mut v2: Vector3, mut v3: Vector3) {
49 let (det, mut dzx, mut dzy) = compute_depth_gradients(v1, v2, v3);
51 let sign = det > 0.0;
52
53 if sign {
55 std::mem::swap(&mut v2, &mut v3);
57
58 v1.z = VIEWPORT as f32 - v1.z;
60 dzx = -dzx;
61 dzy = -dzy;
62 }
63
64 let x1 = (16.0 * v1.x + 0.5) as i32;
66 let x2 = (16.0 * v2.x + 0.5) as i32;
67 let x3 = (16.0 * v3.x + 0.5) as i32;
68
69 let y1 = (16.0 * v1.y + 0.5) as i32;
70 let y2 = (16.0 * v2.y + 0.5) as i32;
71 let y3 = (16.0 * v3.y + 0.5) as i32;
72
73 let minx = ((x1.min(x2.min(x3)) + 7) >> 4).max(0);
78 let maxx = ((x1.max(x2.max(x3)) + 7) >> 4).min(VIEWPORT as i32);
79 let miny = ((y1.min(y2.min(y3)) + 7) >> 4).max(0);
80 let maxy = ((y1.max(y2.max(y3)) + 7) >> 4).min(VIEWPORT as i32);
81
82 let dx12 = x1 - x2;
84 let dx23 = x2 - x3;
85 let dx31 = x3 - x1;
86
87 let dy12 = y1 - y2;
88 let dy23 = y2 - y3;
89 let dy31 = y3 - y1;
90
91 let tl1 = (dy12 < 0 || (dy12 == 0 && dx12 > 0)) as i32;
93 let tl2 = (dy23 < 0 || (dy23 == 0 && dx23 > 0)) as i32;
94 let tl3 = (dy31 < 0 || (dy31 == 0 && dx31 > 0)) as i32;
95
96 let fx = (minx << 4) + 8;
99 let fy = (miny << 4) + 8;
100 let mut cy1 = dx12 * (fy - y1) - dy12 * (fx - x1) + tl1 - 1;
101 let mut cy2 = dx23 * (fy - y2) - dy23 * (fx - x2) + tl2 - 1;
102 let mut cy3 = dx31 * (fy - y3) - dy31 * (fx - x3) + tl3 - 1;
103 let mut zy = v1.z + (dzx * (fx - x1) as f32 + dzy * (fy - y1) as f32) * (1.0 / 16.0);
104
105 for y in miny..maxy {
106 let y = y as usize;
107
108 let mut cx1 = cy1;
109 let mut cx2 = cy2;
110 let mut cx3 = cy3;
111 let mut zx = zy;
112
113 for x in minx..maxx {
114 let x = x as usize;
115 let sign = sign as usize;
116
117 if cx1 | cx2 | cx3 >= 0 {
119 if zx >= buffer.z[y][x][sign] {
120 buffer.z[y][x][sign] = zx;
121 buffer.overdraw[y][x][sign] += 1;
122 }
123 }
124
125 cx1 -= ((dy12 as u32) << 4) as i32;
128 cx2 -= ((dy23 as u32) << 4) as i32;
129 cx3 -= ((dy31 as u32) << 4) as i32;
130 zx += dzx;
131 }
132
133 cy1 += ((dx12 as u32) << 4) as i32;
136 cy2 += ((dx23 as u32) << 4) as i32;
137 cy3 += ((dx31 as u32) << 4) as i32;
138 zy += dzy;
139 }
140}
141
142pub fn analyze_overdraw<Vertex>(indices: &[u32], vertices: &[Vertex]) -> OverdrawStatistics
146where
147 Vertex: Position,
148{
149 assert!(indices.len() % 3 == 0);
150
151 let mut result = OverdrawStatistics::default();
152
153 let (minv, extent) = calc_pos_extents(vertices);
154
155 let scale = VIEWPORT as f32 / extent;
156
157 let mut triangles = vec![0.0; indices.len() * 3];
158
159 for (i, index) in indices.iter().enumerate() {
160 let index = *index as usize;
161
162 let v = vertices[index].pos();
163
164 triangles[i * 3 + 0] = (v[0] - minv[0]) * scale;
165 triangles[i * 3 + 1] = (v[1] - minv[1]) * scale;
166 triangles[i * 3 + 2] = (v[2] - minv[2]) * scale;
167 }
168
169 let mut buffer = Box::default();
170
171 for axis in 0..3 {
172 *buffer = OverdrawBuffer::default();
173
174 for vn in triangles.chunks_exact(9) {
175 let vn0 = &vn[0..3];
176 let vn1 = &vn[3..6];
177 let vn2 = &vn[6..9];
178
179 match axis {
180 0 => rasterize(
181 &mut buffer,
182 Vector3::new(vn0[2], vn0[1], vn0[0]),
183 Vector3::new(vn1[2], vn1[1], vn1[0]),
184 Vector3::new(vn2[2], vn2[1], vn2[0]),
185 ),
186 1 => rasterize(
187 &mut buffer,
188 Vector3::new(vn0[0], vn0[2], vn0[1]),
189 Vector3::new(vn1[0], vn1[2], vn1[1]),
190 Vector3::new(vn2[0], vn2[2], vn2[1]),
191 ),
192 2 => rasterize(
193 &mut buffer,
194 Vector3::new(vn0[1], vn0[0], vn0[2]),
195 Vector3::new(vn1[1], vn1[0], vn1[2]),
196 Vector3::new(vn2[1], vn2[0], vn2[2]),
197 ),
198 _ => unreachable!(),
199 }
200 }
201
202 for y in 0..VIEWPORT {
203 for x in 0..VIEWPORT {
204 for s in 0..2 {
205 let overdraw = buffer.overdraw[y][x][s];
206
207 result.pixels_covered += (overdraw > 0) as u32;
208 result.pixels_shaded += overdraw;
209 }
210 }
211 }
212 }
213
214 result.overdraw = if result.pixels_covered > 0 {
215 result.pixels_shaded as f32 / result.pixels_covered as f32
216 } else {
217 0.0
218 };
219
220 result
221}
222
223fn calculate_sort_data<Vertex>(sort_data: &mut [f32], indices: &[u32], vertices: &[Vertex], clusters: &[u32])
224where
225 Vertex: Position,
226{
227 let mut mesh_centroid = [0.0; 3];
228
229 for index in indices {
230 let p = vertices[*index as usize].pos();
231
232 mesh_centroid[0] += p[0];
233 mesh_centroid[1] += p[1];
234 mesh_centroid[2] += p[2];
235 }
236
237 mesh_centroid[0] /= indices.len() as f32;
238 mesh_centroid[1] /= indices.len() as f32;
239 mesh_centroid[2] /= indices.len() as f32;
240
241 for (cluster_idx, cluster_begin) in clusters.iter().enumerate() {
242 let cluster_end = if let Some(begin) = clusters.get(cluster_idx + 1) {
243 (*begin) as usize * 3
244 } else {
245 indices.len()
246 };
247 let cluster = (*cluster_begin as usize) * 3..cluster_end;
248 assert!(!cluster.is_empty());
249
250 let mut cluster_area = 0.0;
251 let mut cluster_centroid = [0.0; 3];
252 let mut cluster_normal = [0.0f32; 3];
253
254 for i in indices[cluster].chunks_exact(3) {
255 let p0 = vertices[i[0] as usize].pos();
256 let p1 = vertices[i[1] as usize].pos();
257 let p2 = vertices[i[2] as usize].pos();
258
259 let p10 = [p1[0] - p0[0], p1[1] - p0[1], p1[2] - p0[2]];
260 let p20 = [p2[0] - p0[0], p2[1] - p0[1], p2[2] - p0[2]];
261
262 let normalx = p10[1] * p20[2] - p10[2] * p20[1];
263 let normaly = p10[2] * p20[0] - p10[0] * p20[2];
264 let normalz = p10[0] * p20[1] - p10[1] * p20[0];
265
266 let area = (normalx * normalx + normaly * normaly + normalz * normalz).sqrt();
267
268 cluster_centroid[0] += (p0[0] + p1[0] + p2[0]) * (area / 3.0);
269 cluster_centroid[1] += (p0[1] + p1[1] + p2[1]) * (area / 3.0);
270 cluster_centroid[2] += (p0[2] + p1[2] + p2[2]) * (area / 3.0);
271 cluster_normal[0] += normalx;
272 cluster_normal[1] += normaly;
273 cluster_normal[2] += normalz;
274 cluster_area += area;
275 }
276
277 let inv_cluster_area = zero_inverse(cluster_area);
278
279 cluster_centroid[0] *= inv_cluster_area;
280 cluster_centroid[1] *= inv_cluster_area;
281 cluster_centroid[2] *= inv_cluster_area;
282
283 let cluster_normal_length = (cluster_normal[0] * cluster_normal[0]
284 + cluster_normal[1] * cluster_normal[1]
285 + cluster_normal[2] * cluster_normal[2])
286 .sqrt();
287 let inv_cluster_normal_length = zero_inverse(cluster_normal_length);
288
289 cluster_normal[0] *= inv_cluster_normal_length;
290 cluster_normal[1] *= inv_cluster_normal_length;
291 cluster_normal[2] *= inv_cluster_normal_length;
292
293 let centroid_vector = [
294 cluster_centroid[0] - mesh_centroid[0],
295 cluster_centroid[1] - mesh_centroid[1],
296 cluster_centroid[2] - mesh_centroid[2],
297 ];
298
299 sort_data[cluster_idx] = centroid_vector[0] * cluster_normal[0]
300 + centroid_vector[1] * cluster_normal[1]
301 + centroid_vector[2] * cluster_normal[2];
302 }
303}
304
305fn calculate_sort_order_radix(sort_order: &mut [u32], sort_data: &[f32], sort_keys: &mut [u16]) {
306 let mut sort_data_max: f32 = 0.001;
308
309 for data in sort_data {
310 sort_data_max = sort_data_max.max(data.abs());
311 }
312
313 const SORT_BITS: i32 = 11;
314
315 for (data, key) in sort_data.iter().zip(sort_keys.iter_mut()) {
316 let sort_key = 0.5 - 0.5 * (data / sort_data_max);
318
319 *key = (quantize_unorm(sort_key, SORT_BITS) & ((1 << SORT_BITS) - 1)) as u16;
320 }
321
322 let mut histogram = [0; 1 << SORT_BITS];
324
325 for key in sort_keys.iter() {
326 histogram[*key as usize] += 1;
327 }
328
329 let mut histogram_sum = 0;
331
332 for i in 0..histogram.len() {
333 let count = histogram[i];
334 histogram[i] = histogram_sum;
335 histogram_sum += count;
336 }
337
338 assert_eq!(histogram_sum, sort_keys.len());
339
340 for i in 0..sort_keys.len() {
342 let idx = &mut histogram[sort_keys[i] as usize];
343 sort_order[*idx as usize] = i as u32;
344 *idx += 1;
345 }
346}
347
348fn update_cache(a: u32, b: u32, c: u32, cache_size: u32, cache_timestamps: &mut [u32], timestamp: &mut u32) -> u32 {
349 let mut cache_misses = 0;
350
351 if *timestamp - cache_timestamps[a as usize] > cache_size {
353 cache_timestamps[a as usize] = *timestamp;
354 *timestamp += 1;
355 cache_misses += 1;
356 }
357
358 if *timestamp - cache_timestamps[b as usize] > cache_size {
359 cache_timestamps[b as usize] = *timestamp;
360 *timestamp += 1;
361 cache_misses += 1;
362 }
363
364 if *timestamp - cache_timestamps[c as usize] > cache_size {
365 cache_timestamps[c as usize] = *timestamp;
366 *timestamp += 1;
367 cache_misses += 1;
368 }
369
370 cache_misses
371}
372
373fn generate_hard_boundaries(
374 destination: &mut [u32],
375 indices: &[u32],
376 cache_size: u32,
377 cache_timestamps: &mut [u32],
378) -> usize {
379 let mut timestamp = cache_size as u32 + 1;
380
381 let face_count = indices.len() / 3;
382
383 let mut result = 0;
384
385 for i in 0..face_count {
386 let m = update_cache(
387 indices[i * 3 + 0],
388 indices[i * 3 + 1],
389 indices[i * 3 + 2],
390 cache_size,
391 cache_timestamps,
392 &mut timestamp,
393 );
394
395 if i == 0 || m == 3 {
400 destination[result] = i as u32;
401 result += 1;
402 }
403 }
404
405 assert!(result <= indices.len() / 3);
406
407 result
408}
409
410fn generate_soft_boundaries(
411 destination: &mut [u32],
412 indices: &[u32],
413 clusters: &[u32],
414 cache_size: u32,
415 threshold: f32,
416 cache_timestamps: &mut [u32],
417) -> usize {
418 let mut timestamp = 0;
419
420 let mut result = 0;
421
422 for (cluster_idx, start) in clusters.iter().enumerate() {
423 let end = if let Some(begin) = clusters.get(cluster_idx + 1) {
424 *begin as usize
425 } else {
426 indices.len() / 3
427 };
428 let cluster = (*start as usize)..end;
429 assert!(!cluster.is_empty());
430
431 timestamp += cache_size + 1;
433
434 let mut cluster_misses = 0;
436
437 for i in cluster.clone() {
438 let m = update_cache(
439 indices[i * 3 + 0],
440 indices[i * 3 + 1],
441 indices[i * 3 + 2],
442 cache_size,
443 cache_timestamps,
444 &mut timestamp,
445 );
446
447 cluster_misses += m;
448 }
449
450 let cluster_threshold = threshold * (cluster_misses as f32 / (end - *start as usize) as f32);
451
452 destination[result] = *start;
454 result += 1;
455
456 timestamp += cache_size + 1;
458
459 let mut running_misses = 0;
460 let mut running_faces = 0;
461
462 for i in cluster {
463 let m = update_cache(
464 indices[i * 3 + 0],
465 indices[i * 3 + 1],
466 indices[i * 3 + 2],
467 cache_size,
468 cache_timestamps,
469 &mut timestamp,
470 );
471
472 running_misses += m;
473 running_faces += 1;
474
475 if running_misses as f32 / running_faces as f32 <= cluster_threshold {
476 destination[result] = i as u32 + 1;
480 result += 1;
481
482 timestamp += cache_size + 1;
484
485 running_misses = 0;
486 running_faces = 0;
487 }
488 }
489
490 if destination[result - 1] != *start {
497 result -= 1;
498 }
499 }
500
501 assert!(result >= clusters.len());
502 assert!(result <= indices.len() / 3);
503
504 result
505}
506
507pub fn optimize_overdraw<Vertex>(destination: &mut [u32], indices: &[u32], vertices: &[Vertex], threshold: f32)
517where
518 Vertex: Position,
519{
520 assert_eq!(indices.len() % 3, 0);
521
522 if indices.is_empty() || vertices.is_empty() {
524 return;
525 }
526
527 const CACHE_SIZE: u32 = 16;
528
529 let mut cache_timestamps = vec![0u32; vertices.len()];
530
531 let mut hard_clusters = vec![0u32; indices.len() / 3];
533 let hard_cluster_count = generate_hard_boundaries(&mut hard_clusters, indices, CACHE_SIZE, &mut cache_timestamps);
534
535 cache_timestamps.fill(0);
537 let mut soft_clusters = vec![0u32; indices.len() / 3 + 1];
538 let soft_cluster_count = generate_soft_boundaries(
539 &mut soft_clusters,
540 indices,
541 &hard_clusters[0..hard_cluster_count],
542 CACHE_SIZE,
543 threshold,
544 &mut cache_timestamps,
545 );
546
547 let clusters = &soft_clusters[0..soft_cluster_count];
548
549 let mut sort_data = vec![0.0; clusters.len()];
551 calculate_sort_data(&mut sort_data, indices, vertices, clusters);
552
553 let mut sort_keys = vec![0u16; clusters.len()];
555 let mut sort_order = vec![0u32; clusters.len()];
556 calculate_sort_order_radix(&mut sort_order, &mut sort_data, &mut sort_keys);
557
558 let mut offset = 0;
560
561 for cluster in &sort_order {
562 let cluster = *cluster as usize;
563
564 let cluster_begin = (clusters[cluster] * 3) as usize;
565 let cluster_end = if let Some(begin) = clusters.get(cluster + 1) {
566 (begin * 3) as usize
567 } else {
568 indices.len()
569 };
570 assert!(cluster_begin < cluster_end);
571
572 let cluster_size = cluster_end - cluster_begin;
573
574 destination[offset..offset + cluster_size].copy_from_slice(&indices[cluster_begin..cluster_end]);
575
576 offset += cluster_size;
577 }
578
579 assert_eq!(offset, indices.len());
580}
581
582#[cfg(test)]
583mod test {
584 use super::*;
585
586 struct DummyVertex;
587
588 impl Position for DummyVertex {
589 fn pos(&self) -> [f32; 3] {
590 [0.0; 3]
591 }
592 }
593
594 #[test]
595 fn test_empty() {
596 optimize_overdraw(&mut [], &[], &[DummyVertex], 1.0);
597 }
598}