use api::{DebugFlags, DocumentId, PremultipliedColorF};
#[cfg(test)]
use api::IdNamespace;
use api::units::*;
use euclid::{HomogeneousVector, Box2D};
use crate::internal_types::{FastHashMap, FastHashSet};
use crate::profiler::{self, TransactionProfile};
use crate::render_backend::{FrameStamp, FrameId};
use crate::prim_store::VECS_PER_SEGMENT;
use crate::renderer::MAX_VERTEX_TEXTURE_WIDTH;
use crate::util::VecHelper;
use std::{u16, u32};
use std::num::NonZeroU32;
use std::ops::Add;
use std::time::{Duration, Instant};
pub const GPU_CACHE_INITIAL_HEIGHT: i32 = 20;
const NEW_ROWS_PER_RESIZE: i32 = 10;
const FRAMES_BEFORE_EVICTION: usize = 10;
const RECLAIM_THRESHOLD: f32 = 0.2;
const RECLAIM_DELAY_S: u64 = 5;
#[derive(Debug, Copy, Clone, Eq, MallocSizeOf, PartialEq)]
#[cfg_attr(feature = "capture", derive(Serialize))]
#[cfg_attr(feature = "replay", derive(Deserialize))]
struct Epoch(u32);
impl Epoch {
fn next(&mut self) {
*self = Epoch(self.0.wrapping_add(1));
}
}
#[derive(Debug, Copy, Clone, MallocSizeOf)]
#[cfg_attr(feature = "capture", derive(Serialize))]
#[cfg_attr(feature = "replay", derive(Deserialize))]
struct CacheLocation {
block_index: BlockIndex,
epoch: Epoch,
}
#[derive(Copy, Clone, Debug, MallocSizeOf)]
#[cfg_attr(feature = "capture", derive(Serialize))]
#[cfg_attr(feature = "replay", derive(Deserialize))]
pub struct GpuBlockData {
data: [f32; 4],
}
impl GpuBlockData {
pub const EMPTY: Self = GpuBlockData { data: [0.0; 4] };
}
impl From<PremultipliedColorF> for GpuBlockData {
fn from(c: PremultipliedColorF) -> Self {
GpuBlockData {
data: [c.r, c.g, c.b, c.a],
}
}
}
impl From<[f32; 4]> for GpuBlockData {
fn from(data: [f32; 4]) -> Self {
GpuBlockData { data }
}
}
impl<P> From<Box2D<f32, P>> for GpuBlockData {
fn from(r: Box2D<f32, P>) -> Self {
GpuBlockData {
data: [
r.min.x,
r.min.y,
r.max.x,
r.max.y,
],
}
}
}
impl<P> From<HomogeneousVector<f32, P>> for GpuBlockData {
fn from(v: HomogeneousVector<f32, P>) -> Self {
GpuBlockData {
data: [
v.x,
v.y,
v.z,
v.w,
],
}
}
}
impl From<TexelRect> for GpuBlockData {
fn from(tr: TexelRect) -> Self {
GpuBlockData {
data: [tr.uv0.x, tr.uv0.y, tr.uv1.x, tr.uv1.y],
}
}
}
#[derive(Debug, Copy, Clone, MallocSizeOf)]
#[cfg_attr(feature = "capture", derive(Serialize))]
#[cfg_attr(feature = "replay", derive(Deserialize))]
pub struct GpuCacheHandle {
location: Option<CacheLocation>,
}
impl GpuCacheHandle {
pub fn new() -> Self {
GpuCacheHandle { location: None }
}
pub fn as_int(self, gpu_cache: &GpuCache) -> i32 {
gpu_cache.get_address(&self).as_int()
}
}
#[derive(Copy, Debug, Clone, MallocSizeOf, Eq, PartialEq)]
#[cfg_attr(feature = "capture", derive(Serialize))]
#[cfg_attr(feature = "replay", derive(Deserialize))]
pub struct GpuCacheAddress {
pub u: u16,
pub v: u16,
}
impl GpuCacheAddress {
fn new(u: usize, v: usize) -> Self {
GpuCacheAddress {
u: u as u16,
v: v as u16,
}
}
pub const INVALID: GpuCacheAddress = GpuCacheAddress {
u: u16::MAX,
v: u16::MAX,
};
pub fn as_int(self) -> i32 {
self.v as i32 * MAX_VERTEX_TEXTURE_WIDTH as i32 + self.u as i32
}
}
impl Add<usize> for GpuCacheAddress {
type Output = GpuCacheAddress;
fn add(self, other: usize) -> GpuCacheAddress {
GpuCacheAddress {
u: self.u + other as u16,
v: self.v,
}
}
}
#[derive(Debug, MallocSizeOf)]
#[cfg_attr(feature = "capture", derive(Serialize))]
#[cfg_attr(feature = "replay", derive(Deserialize))]
struct Block {
address: GpuCacheAddress,
epoch: Epoch,
next: Option<BlockIndex>,
last_access_time: FrameId,
}
impl Block {
fn new(
address: GpuCacheAddress,
next: Option<BlockIndex>,
frame_id: FrameId,
epoch: Epoch,
) -> Self {
Block {
address,
next,
last_access_time: frame_id,
epoch,
}
}
fn advance_epoch(&mut self, max_epoch: &mut Epoch) {
self.epoch.next();
if max_epoch.0 < self.epoch.0 {
max_epoch.0 = self.epoch.0;
}
}
pub const INVALID: Block = Block {
address: GpuCacheAddress { u: 0, v: 0 },
epoch: Epoch(0),
next: None,
last_access_time: FrameId::INVALID,
};
}
#[derive(Debug, Copy, Clone, MallocSizeOf)]
#[cfg_attr(feature = "capture", derive(Serialize))]
#[cfg_attr(feature = "replay", derive(Deserialize))]
struct BlockIndex(NonZeroU32);
impl BlockIndex {
fn new(idx: usize) -> Self {
debug_assert!(idx <= u32::MAX as usize);
BlockIndex(NonZeroU32::new(idx as u32).expect("Index zero forbidden"))
}
fn get(&self) -> usize {
self.0.get() as usize
}
}
#[cfg_attr(feature = "capture", derive(Serialize))]
#[cfg_attr(feature = "replay", derive(Deserialize))]
#[derive(MallocSizeOf)]
struct Row {
block_count_per_item: usize,
}
impl Row {
fn new(block_count_per_item: usize) -> Self {
Row {
block_count_per_item,
}
}
}
#[cfg_attr(feature = "capture", derive(Serialize))]
#[cfg_attr(feature = "replay", derive(Deserialize))]
#[derive(MallocSizeOf)]
pub enum GpuCacheUpdate {
Copy {
block_index: usize,
block_count: usize,
address: GpuCacheAddress,
},
}
#[derive(MallocSizeOf)]
pub enum GpuCacheDebugCmd {
Alloc(GpuCacheDebugChunk),
Free(GpuCacheAddress),
}
#[derive(Clone, MallocSizeOf)]
pub struct GpuCacheDebugChunk {
pub address: GpuCacheAddress,
pub size: usize,
}
#[must_use]
#[cfg_attr(feature = "capture", derive(Serialize))]
#[cfg_attr(feature = "replay", derive(Deserialize))]
#[derive(MallocSizeOf)]
pub struct GpuCacheUpdateList {
pub frame_id: FrameId,
pub clear: bool,
pub height: i32,
pub updates: Vec<GpuCacheUpdate>,
pub blocks: Vec<GpuBlockData>,
#[cfg_attr(feature = "serde", serde(skip))]
pub debug_commands: Vec<GpuCacheDebugCmd>,
}
#[cfg_attr(feature = "capture", derive(Serialize))]
#[cfg_attr(feature = "replay", derive(Deserialize))]
#[derive(MallocSizeOf)]
struct FreeBlockLists {
free_list_1: Option<BlockIndex>,
free_list_2: Option<BlockIndex>,
free_list_4: Option<BlockIndex>,
free_list_8: Option<BlockIndex>,
free_list_16: Option<BlockIndex>,
free_list_32: Option<BlockIndex>,
free_list_64: Option<BlockIndex>,
free_list_128: Option<BlockIndex>,
free_list_256: Option<BlockIndex>,
free_list_341: Option<BlockIndex>,
free_list_512: Option<BlockIndex>,
free_list_1024: Option<BlockIndex>,
}
impl FreeBlockLists {
fn new() -> Self {
FreeBlockLists {
free_list_1: None,
free_list_2: None,
free_list_4: None,
free_list_8: None,
free_list_16: None,
free_list_32: None,
free_list_64: None,
free_list_128: None,
free_list_256: None,
free_list_341: None,
free_list_512: None,
free_list_1024: None,
}
}
fn get_actual_block_count_and_free_list(
&mut self,
block_count: usize,
) -> (usize, &mut Option<BlockIndex>) {
debug_assert_eq!(MAX_VERTEX_TEXTURE_WIDTH, 1024, "Need to update bucketing");
match block_count {
0 => panic!("Can't allocate zero sized blocks!"),
1 => (1, &mut self.free_list_1),
2 => (2, &mut self.free_list_2),
3..=4 => (4, &mut self.free_list_4),
5..=8 => (8, &mut self.free_list_8),
9..=16 => (16, &mut self.free_list_16),
17..=32 => (32, &mut self.free_list_32),
33..=64 => (64, &mut self.free_list_64),
65..=128 => (128, &mut self.free_list_128),
129..=256 => (256, &mut self.free_list_256),
257..=341 => (341, &mut self.free_list_341),
342..=512 => (512, &mut self.free_list_512),
513..=1024 => (1024, &mut self.free_list_1024),
_ => panic!("Can't allocate > MAX_VERTEX_TEXTURE_WIDTH per resource!"),
}
}
}
#[cfg_attr(feature = "capture", derive(Serialize))]
#[cfg_attr(feature = "replay", derive(Deserialize))]
#[derive(MallocSizeOf)]
struct Texture {
height: i32,
blocks: Vec<Block>,
rows: Vec<Row>,
base_epoch: Epoch,
max_epoch: Epoch,
free_lists: FreeBlockLists,
occupied_list_heads: FastHashMap<DocumentId, BlockIndex>,
pending_blocks: Vec<GpuBlockData>,
updates: Vec<GpuCacheUpdate>,
allocated_block_count: usize,
#[cfg_attr(feature = "serde", serde(skip))]
reached_reclaim_threshold: Option<Instant>,
#[cfg_attr(feature = "serde", serde(skip))]
debug_commands: Vec<GpuCacheDebugCmd>,
debug_flags: DebugFlags,
}
impl Texture {
fn new(base_epoch: Epoch, debug_flags: DebugFlags) -> Self {
let blocks = vec![Block::INVALID];
Texture {
height: GPU_CACHE_INITIAL_HEIGHT,
blocks,
rows: Vec::new(),
base_epoch,
max_epoch: base_epoch,
free_lists: FreeBlockLists::new(),
pending_blocks: Vec::new(),
updates: Vec::new(),
occupied_list_heads: FastHashMap::default(),
allocated_block_count: 0,
reached_reclaim_threshold: None,
debug_commands: Vec::new(),
debug_flags,
}
}
fn push_data(
&mut self,
pending_block_index: Option<usize>,
block_count: usize,
frame_stamp: FrameStamp
) -> CacheLocation {
debug_assert!(frame_stamp.is_valid());
let (alloc_size, free_list) = self.free_lists
.get_actual_block_count_and_free_list(block_count);
if free_list.is_none() {
if self.rows.len() as i32 == self.height {
self.height += NEW_ROWS_PER_RESIZE;
}
let items_per_row = MAX_VERTEX_TEXTURE_WIDTH / alloc_size;
let row_index = self.rows.len();
self.rows.push(Row::new(alloc_size));
let mut prev_block_index = None;
for i in 0 .. items_per_row {
let address = GpuCacheAddress::new(i * alloc_size, row_index);
let block_index = BlockIndex::new(self.blocks.len());
let block = Block::new(address, prev_block_index, frame_stamp.frame_id(), self.base_epoch);
self.blocks.push(block);
prev_block_index = Some(block_index);
}
*free_list = prev_block_index;
}
let free_block_index = free_list.take().unwrap();
let block = &mut self.blocks[free_block_index.get()];
*free_list = block.next;
block.next = self.occupied_list_heads.get(&frame_stamp.document_id()).cloned();
block.last_access_time = frame_stamp.frame_id();
self.occupied_list_heads.insert(frame_stamp.document_id(), free_block_index);
self.allocated_block_count += alloc_size;
if let Some(pending_block_index) = pending_block_index {
self.updates.push(GpuCacheUpdate::Copy {
block_index: pending_block_index,
block_count,
address: block.address,
});
}
if self.debug_flags.contains(DebugFlags::GPU_CACHE_DBG) {
self.debug_commands.push(GpuCacheDebugCmd::Alloc(GpuCacheDebugChunk {
address: block.address,
size: block_count,
}));
}
CacheLocation {
block_index: free_block_index,
epoch: block.epoch,
}
}
fn evict_old_blocks(&mut self, frame_stamp: FrameStamp) {
debug_assert!(frame_stamp.is_valid());
let mut current_block = self.occupied_list_heads.get(&frame_stamp.document_id()).map(|x| *x);
let mut prev_block: Option<BlockIndex> = None;
while let Some(index) = current_block {
let (next_block, should_unlink) = {
let block = &mut self.blocks[index.get()];
let next_block = block.next;
let mut should_unlink = false;
if block.last_access_time + FRAMES_BEFORE_EVICTION < frame_stamp.frame_id() {
should_unlink = true;
let row = &mut self.rows[block.address.v as usize];
let (_, free_list) = self.free_lists
.get_actual_block_count_and_free_list(row.block_count_per_item);
block.advance_epoch(&mut self.max_epoch);
block.next = *free_list;
*free_list = Some(index);
self.allocated_block_count -= row.block_count_per_item;
if self.debug_flags.contains(DebugFlags::GPU_CACHE_DBG) {
let cmd = GpuCacheDebugCmd::Free(block.address);
self.debug_commands.push(cmd);
}
};
(next_block, should_unlink)
};
if should_unlink {
match prev_block {
Some(prev_block) => {
self.blocks[prev_block.get()].next = next_block;
}
None => {
match next_block {
Some(next_block) => {
self.occupied_list_heads.insert(frame_stamp.document_id(), next_block);
}
None => {
self.occupied_list_heads.remove(&frame_stamp.document_id());
}
}
}
}
} else {
prev_block = current_block;
}
current_block = next_block;
}
}
fn utilization(&self) -> f32 {
let total_blocks = self.rows.len() * MAX_VERTEX_TEXTURE_WIDTH;
debug_assert!(total_blocks > 0);
let ratio = self.allocated_block_count as f32 / total_blocks as f32;
debug_assert!(0.0 <= ratio && ratio <= 1.0, "Bad ratio: {}", ratio);
ratio
}
}
#[must_use]
pub struct GpuDataRequest<'a> {
#[allow(dead_code)]
handle: &'a mut GpuCacheHandle,
frame_stamp: FrameStamp,
start_index: usize,
max_block_count: usize,
texture: &'a mut Texture,
}
impl<'a> GpuDataRequest<'a> {
pub fn push<B>(&mut self, block: B)
where
B: Into<GpuBlockData>,
{
self.texture.pending_blocks.push(block.into());
}
pub fn write_segment(
&mut self,
local_rect: LayoutRect,
extra_data: [f32; 4],
) {
let _ = VECS_PER_SEGMENT;
self.push(local_rect);
self.push(extra_data);
}
pub fn current_used_block_num(&self) -> usize {
self.texture.pending_blocks.len() - self.start_index
}
}
impl<'a> Drop for GpuDataRequest<'a> {
fn drop(&mut self) {
let block_count = self.current_used_block_num();
debug_assert!(block_count <= self.max_block_count);
let location = self.texture
.push_data(Some(self.start_index), block_count, self.frame_stamp);
self.handle.location = Some(location);
}
}
#[cfg_attr(feature = "capture", derive(Serialize))]
#[cfg_attr(feature = "replay", derive(Deserialize))]
#[derive(MallocSizeOf)]
pub struct GpuCache {
now: FrameStamp,
texture: Texture,
saved_block_count: usize,
debug_flags: DebugFlags,
pending_clear: bool,
prepared_for_frames: bool,
requires_frame_build: bool,
document_frames_to_build: FastHashSet<DocumentId>,
}
impl GpuCache {
pub fn new() -> Self {
let debug_flags = DebugFlags::empty();
GpuCache {
now: FrameStamp::INVALID,
texture: Texture::new(Epoch(0), debug_flags),
saved_block_count: 0,
debug_flags,
pending_clear: false,
prepared_for_frames: false,
requires_frame_build: false,
document_frames_to_build: FastHashSet::default(),
}
}
#[cfg(test)]
pub fn new_for_testing() -> Self {
let mut cache = Self::new();
let mut now = FrameStamp::first(DocumentId::new(IdNamespace(1), 1));
now.advance();
cache.prepared_for_frames = true;
cache.begin_frame(now);
cache
}
pub fn clear(&mut self) {
assert!(self.texture.updates.is_empty(), "Clearing with pending updates");
let mut next_base_epoch = self.texture.max_epoch;
next_base_epoch.next();
self.texture = Texture::new(next_base_epoch, self.debug_flags);
self.saved_block_count = 0;
self.pending_clear = true;
self.requires_frame_build = true;
}
pub fn requires_frame_build(&self) -> bool {
self.requires_frame_build
}
pub fn prepare_for_frames(&mut self) {
self.prepared_for_frames = true;
if self.should_reclaim_memory() {
self.clear();
debug_assert!(self.document_frames_to_build.is_empty());
for &document_id in self.texture.occupied_list_heads.keys() {
self.document_frames_to_build.insert(document_id);
}
}
}
pub fn bookkeep_after_frames(&mut self) {
assert!(self.document_frames_to_build.is_empty());
assert!(self.prepared_for_frames);
self.requires_frame_build = false;
self.prepared_for_frames = false;
}
pub fn begin_frame(&mut self, stamp: FrameStamp) {
debug_assert!(self.texture.pending_blocks.is_empty());
assert!(self.prepared_for_frames);
profile_scope!("begin_frame");
self.now = stamp;
self.texture.evict_old_blocks(self.now);
self.saved_block_count = 0;
}
pub fn invalidate(&mut self, handle: &GpuCacheHandle) {
if let Some(ref location) = handle.location {
if let Some(block) = self.texture.blocks.get_mut(location.block_index.get()) {
if block.epoch == location.epoch {
block.advance_epoch(&mut self.texture.max_epoch);
}
}
}
}
pub fn request<'a>(&'a mut self, handle: &'a mut GpuCacheHandle) -> Option<GpuDataRequest<'a>> {
let mut max_block_count = MAX_VERTEX_TEXTURE_WIDTH;
if let Some(ref location) = handle.location {
if let Some(block) = self.texture.blocks.get_mut(location.block_index.get()) {
if block.epoch == location.epoch {
max_block_count = self.texture.rows[block.address.v as usize].block_count_per_item;
if block.last_access_time != self.now.frame_id() {
block.last_access_time = self.now.frame_id();
self.saved_block_count += max_block_count;
}
return None;
}
}
}
debug_assert!(self.now.is_valid());
Some(GpuDataRequest {
handle,
frame_stamp: self.now,
start_index: self.texture.pending_blocks.len(),
texture: &mut self.texture,
max_block_count,
})
}
pub fn push_per_frame_blocks(&mut self, blocks: &[GpuBlockData]) -> GpuCacheHandle {
let start_index = self.texture.pending_blocks.len();
self.texture.pending_blocks.extend_from_slice(blocks);
let location = self.texture
.push_data(Some(start_index), blocks.len(), self.now);
GpuCacheHandle {
location: Some(location),
}
}
pub fn push_deferred_per_frame_blocks(&mut self, block_count: usize) -> GpuCacheHandle {
let location = self.texture.push_data(None, block_count, self.now);
GpuCacheHandle {
location: Some(location),
}
}
pub fn end_frame(
&mut self,
profile: &mut TransactionProfile,
) -> FrameStamp {
profile_scope!("end_frame");
profile.set(profiler::GPU_CACHE_ROWS_TOTAL, self.texture.rows.len());
profile.set(profiler::GPU_CACHE_BLOCKS_TOTAL, self.texture.allocated_block_count);
profile.set(profiler::GPU_CACHE_BLOCKS_SAVED, self.saved_block_count);
let reached_threshold =
self.texture.rows.len() > (GPU_CACHE_INITIAL_HEIGHT as usize) &&
self.texture.utilization() < RECLAIM_THRESHOLD;
if reached_threshold {
self.texture.reached_reclaim_threshold.get_or_insert_with(Instant::now);
} else {
self.texture.reached_reclaim_threshold = None;
}
self.document_frames_to_build.remove(&self.now.document_id());
self.now
}
pub fn should_reclaim_memory(&self) -> bool {
self.texture.reached_reclaim_threshold
.map_or(false, |t| t.elapsed() > Duration::from_secs(RECLAIM_DELAY_S))
}
pub fn extract_updates(&mut self) -> GpuCacheUpdateList {
let clear = self.pending_clear;
self.pending_clear = false;
GpuCacheUpdateList {
frame_id: self.now.frame_id(),
clear,
height: self.texture.height,
debug_commands: self.texture.debug_commands.take_and_preallocate(),
updates: self.texture.updates.take_and_preallocate(),
blocks: self.texture.pending_blocks.take_and_preallocate(),
}
}
pub fn set_debug_flags(&mut self, flags: DebugFlags) {
self.debug_flags = flags;
self.texture.debug_flags = flags;
}
pub fn get_address(&self, id: &GpuCacheHandle) -> GpuCacheAddress {
let location = id.location.expect("handle not requested or allocated!");
let block = &self.texture.blocks[location.block_index.get()];
debug_assert_eq!(block.epoch, location.epoch);
debug_assert_eq!(block.last_access_time, self.now.frame_id());
block.address
}
}
#[test]
#[cfg(target_pointer_width = "64")]
fn test_struct_sizes() {
use std::mem;
assert_eq!(mem::size_of::<Block>(), 24, "Block size changed");
}