azul_webrender/renderer/
upload.rs

1/* This Source Code Form is subject to the terms of the Mozilla Public
2 * License, v. 2.0. If a copy of the MPL was not distributed with this
3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
4
5//! This module contains the convoluted logic that goes into uploading content into
6//! the texture cache's textures.
7//!
8//! We need to support various combinations of code paths depending on the quirks of
9//! each hardware/driver configuration:
10//! - direct upload,
11//! - staged upload via a pixel buffer object,
12//! - staged upload via a direct upload to a staging texture where PBO's aren't supported,
13//! - copy from the staging to destination textures, either via blits or batched draw calls.
14//!
15//! Conceptually a lot of this logic should probably be in the device module, but some code
16//! here relies on submitting draw calls via the renderer.
17
18
19use std::mem;
20use std::collections::VecDeque;
21use euclid::Transform3D;
22use time::precise_time_ns;
23use malloc_size_of::MallocSizeOfOps;
24use api::units::*;
25use api::{ExternalImageSource, PremultipliedColorF, ImageBufferKind, ImageRendering, ImageFormat};
26use crate::renderer::{
27    Renderer, VertexArrayKind, RendererStats, TextureSampler, TEXTURE_CACHE_DBG_CLEAR_COLOR
28};
29use crate::internal_types::{
30    FastHashMap, TextureUpdateSource, Swizzle, TextureCacheUpdate,
31    CacheTextureId, RenderTargetInfo,
32};
33use crate::device::{
34    Device, UploadMethod, Texture, DrawTarget, UploadStagingBuffer, TextureFlags, TextureUploader,
35    TextureFilter,
36};
37use crate::gpu_types::{ZBufferId, CompositeInstance, CompositorTransform};
38use crate::batch::BatchTextures;
39use crate::texture_pack::{GuillotineAllocator, FreeRectSlice};
40use crate::composite::{CompositeFeatures, CompositeSurfaceFormat};
41use crate::profiler;
42use crate::render_api::MemoryReport;
43
44pub const BATCH_UPLOAD_TEXTURE_SIZE: DeviceIntSize = DeviceIntSize::new(512, 512);
45
46/// Upload a number of items to texture cache textures.
47///
48/// This is the main entry point of the texture cache upload code.
49/// See also the module documentation for more information.
50pub fn upload_to_texture_cache(
51    renderer: &mut Renderer,
52    update_list: FastHashMap<CacheTextureId, Vec<TextureCacheUpdate>>,
53) {
54
55    let mut stats = UploadStats {
56        num_draw_calls: 0,
57        upload_time: 0,
58        cpu_buffer_alloc_time: 0,
59        texture_alloc_time: 0,
60        cpu_copy_time: 0,
61        gpu_copy_commands_time: 0,
62        bytes_uploaded: 0,
63    };
64
65    let upload_total_start = precise_time_ns();
66
67    let mut batch_upload_textures = Vec::new();
68
69    // A list of copies that must be performed from the temporary textures to the texture cache.
70    let mut batch_upload_copies = Vec::new();
71
72    // For each texture format, this stores a list of staging buffers
73    // and a texture allocator for packing the buffers.
74    let mut batch_upload_buffers = FastHashMap::default();
75
76    // For best performance we use a single TextureUploader for all uploads.
77    // This allows us to fill PBOs more efficiently and therefore allocate fewer PBOs.
78    let mut uploader = renderer.device.upload_texture(
79        &mut renderer.texture_upload_pbo_pool,
80    );
81
82    let num_updates = update_list.len();
83
84    for (texture_id, updates) in update_list {
85        let texture = &renderer.texture_resolver.texture_cache_map[&texture_id].texture;
86        for update in updates {
87            let TextureCacheUpdate { rect, stride, offset, format_override, source } = update;
88
89            let dummy_data;
90            let data = match source {
91                TextureUpdateSource::Bytes { ref data } => {
92                    &data[offset as usize ..]
93                }
94                TextureUpdateSource::External { id, channel_index } => {
95                    let handler = renderer.external_image_handler
96                        .as_mut()
97                        .expect("Found external image, but no handler set!");
98                    // The filter is only relevant for NativeTexture external images.
99                    match handler.lock(id, channel_index, ImageRendering::Auto).source {
100                        ExternalImageSource::RawData(data) => {
101                            &data[offset as usize ..]
102                        }
103                        ExternalImageSource::Invalid => {
104                            // Create a local buffer to fill the pbo.
105                            let bpp = texture.get_format().bytes_per_pixel();
106                            let width = stride.unwrap_or(rect.width() * bpp);
107                            let total_size = width * rect.height();
108                            // WR haven't support RGBAF32 format in texture_cache, so
109                            // we use u8 type here.
110                            dummy_data = vec![0xFFu8; total_size as usize];
111                            &dummy_data
112                        }
113                        ExternalImageSource::NativeTexture(eid) => {
114                            panic!("Unexpected external texture {:?} for the texture cache update of {:?}", eid, id);
115                        }
116                    }
117                }
118                TextureUpdateSource::DebugClear => {
119                    let draw_target = DrawTarget::from_texture(
120                        texture,
121                        false,
122                    );
123                    renderer.device.bind_draw_target(draw_target);
124                    renderer.device.clear_target(
125                        Some(TEXTURE_CACHE_DBG_CLEAR_COLOR),
126                        None,
127                        Some(draw_target.to_framebuffer_rect(update.rect.to_i32()))
128                    );
129
130                    continue;
131                }
132            };
133
134            let use_batch_upload = renderer.device.use_batched_texture_uploads() &&
135                texture.flags().contains(TextureFlags::IS_SHARED_TEXTURE_CACHE) &&
136                rect.width() <= BATCH_UPLOAD_TEXTURE_SIZE.width &&
137                rect.height() <= BATCH_UPLOAD_TEXTURE_SIZE.height;
138
139            if use_batch_upload {
140                copy_into_staging_buffer(
141                    &mut renderer.device,
142                    &mut uploader,
143                    &mut renderer.staging_texture_pool,
144                    rect,
145                    stride,
146                    data,
147                    texture_id,
148                    texture,
149                    &mut batch_upload_buffers,
150                    &mut batch_upload_textures,
151                    &mut batch_upload_copies,
152                    &mut stats,
153                );
154            } else {
155                let upload_start_time = precise_time_ns();
156
157                stats.bytes_uploaded += uploader.upload(
158                    &mut renderer.device,
159                    texture,
160                    rect,
161                    stride,
162                    format_override,
163                    data.as_ptr(),
164                    data.len()
165                );
166
167                stats.upload_time += precise_time_ns() - upload_start_time;
168            }
169
170            if let TextureUpdateSource::External { id, channel_index } = source {
171                let handler = renderer.external_image_handler
172                    .as_mut()
173                    .expect("Found external image, but no handler set!");
174                handler.unlock(id, channel_index);
175            }
176        }
177    }
178
179    let upload_start_time = precise_time_ns();
180    // Upload batched texture updates to their temporary textures.
181    for batch_buffer in batch_upload_buffers.into_iter().map(|(_, (_, buffers))| buffers).flatten() {
182        let texture = &batch_upload_textures[batch_buffer.texture_index];
183        match batch_buffer.staging_buffer {
184            StagingBufferKind::Pbo(pbo) => {
185                stats.bytes_uploaded += uploader.upload_staged(
186                    &mut renderer.device,
187                    texture,
188                    DeviceIntRect::from_size(texture.get_dimensions()),
189                    None,
190                    pbo,
191                );
192            }
193            StagingBufferKind::CpuBuffer { bytes, .. } => {
194                let bpp = texture.get_format().bytes_per_pixel();
195                stats.bytes_uploaded += uploader.upload(
196                    &mut renderer.device,
197                    texture,
198                    batch_buffer.upload_rect,
199                    Some(BATCH_UPLOAD_TEXTURE_SIZE.width * bpp),
200                    None,
201                    bytes.as_ptr(),
202                    bytes.len()
203                );
204                renderer.staging_texture_pool.return_temporary_buffer(bytes);
205            }
206        }
207    }
208    stats.upload_time += precise_time_ns() - upload_start_time;
209
210
211    // Flush all uploads, batched or otherwise.
212    let flush_start_time = precise_time_ns();
213    uploader.flush(&mut renderer.device);
214    stats.upload_time += precise_time_ns() - flush_start_time;
215
216    if !batch_upload_copies.is_empty() {
217        // Copy updates that were batch uploaded to their correct destination in the texture cache.
218        // Sort them by destination and source to minimize framebuffer binding changes.
219        batch_upload_copies.sort_unstable_by_key(|b| (b.dest_texture_id.0, b.src_texture_index));
220
221        let gpu_copy_start = precise_time_ns();
222
223        if renderer.device.use_draw_calls_for_texture_copy() {
224            // Some drivers are very have a very high CPU overhead when submitting hundreds of small blit
225            // commands (low end intel drivers on Windows for example can take take 100+ ms submitting a
226            // few hundred blits). In this case we do the copy with batched draw calls.
227            copy_from_staging_to_cache_using_draw_calls(
228                renderer,
229                &mut stats,
230                &batch_upload_textures,
231                batch_upload_copies,
232            );
233        } else {
234            copy_from_staging_to_cache(
235                renderer,
236                &batch_upload_textures,
237                batch_upload_copies,
238            );
239        }
240
241        stats.gpu_copy_commands_time += precise_time_ns() - gpu_copy_start;
242    }
243
244    for texture in batch_upload_textures.drain(..) {
245        renderer.staging_texture_pool.return_texture(texture);
246    }
247
248    // Update the profile counters. We use add instead of set because
249    // this function can be called several times per frame.
250    // We don't update the counters when their value is zero, so that
251    // the profiler can treat them as events and we can get notified
252    // when they happen.
253
254    let upload_total = precise_time_ns() - upload_total_start;
255    renderer.profile.add(
256        profiler::TOTAL_UPLOAD_TIME,
257        profiler::ns_to_ms(upload_total)
258    );
259
260    if num_updates > 0 {
261        renderer.profile.add(profiler::TEXTURE_UPLOADS, num_updates);
262    }
263
264    if stats.bytes_uploaded > 0 {
265        renderer.profile.add(
266            profiler::TEXTURE_UPLOADS_MEM,
267            profiler::bytes_to_mb(stats.bytes_uploaded)
268        );
269    }
270
271    if stats.cpu_copy_time > 0 {
272        renderer.profile.add(
273            profiler::UPLOAD_CPU_COPY_TIME,
274            profiler::ns_to_ms(stats.cpu_copy_time)
275        );
276    }
277    if stats.upload_time > 0 {
278        renderer.profile.add(
279            profiler::UPLOAD_TIME,
280            profiler::ns_to_ms(stats.upload_time)
281        );
282    }
283    if stats.texture_alloc_time > 0 {
284        renderer.profile.add(
285            profiler::STAGING_TEXTURE_ALLOCATION_TIME,
286            profiler::ns_to_ms(stats.texture_alloc_time)
287        );
288    }
289    if stats.cpu_buffer_alloc_time > 0 {
290        renderer.profile.add(
291            profiler::CPU_TEXTURE_ALLOCATION_TIME,
292            profiler::ns_to_ms(stats.cpu_buffer_alloc_time)
293        );
294    }
295    if stats.num_draw_calls > 0{
296        renderer.profile.add(
297            profiler::UPLOAD_NUM_COPY_BATCHES,
298            stats.num_draw_calls
299        );
300    }
301
302    if stats.gpu_copy_commands_time > 0 {
303        renderer.profile.add(
304            profiler::UPLOAD_GPU_COPY_TIME,
305            profiler::ns_to_ms(stats.gpu_copy_commands_time)
306        );
307    }
308}
309
310/// Copy an item into a batched upload staging buffer.
311fn copy_into_staging_buffer<'a>(
312    device: &mut Device,
313    uploader: &mut TextureUploader< 'a>,
314    staging_texture_pool: &mut UploadTexturePool,
315    update_rect: DeviceIntRect,
316    update_stride: Option<i32>,
317    data: &[u8],
318    dest_texture_id: CacheTextureId,
319    texture: &Texture,
320    batch_upload_buffers: &mut FastHashMap<ImageFormat, (GuillotineAllocator, Vec<BatchUploadBuffer<'a>>)>,
321    batch_upload_textures: &mut Vec<Texture>,
322    batch_upload_copies: &mut Vec<BatchUploadCopy>,
323    stats: &mut UploadStats
324) {
325    let (allocator, buffers) = batch_upload_buffers.entry(texture.get_format())
326        .or_insert_with(|| (GuillotineAllocator::new(None), Vec::new()));
327
328    // Allocate a region within the staging buffer for this update. If there is
329    // no room in an existing buffer then allocate another texture and buffer.
330    let (slice, origin) = match allocator.allocate(&update_rect.size()) {
331        Some((slice, origin)) => (slice, origin),
332        None => {
333            let new_slice = FreeRectSlice(buffers.len() as u32);
334            allocator.extend(new_slice, BATCH_UPLOAD_TEXTURE_SIZE, update_rect.size());
335
336            let texture_alloc_time_start = precise_time_ns();
337            let staging_texture = staging_texture_pool.get_texture(device, texture.get_format());
338            stats.texture_alloc_time = precise_time_ns() - texture_alloc_time_start;
339
340            let texture_index = batch_upload_textures.len();
341            batch_upload_textures.push(staging_texture);
342
343            let cpu_buffer_alloc_start_time = precise_time_ns();
344            let staging_buffer = match device.upload_method() {
345                UploadMethod::Immediate => StagingBufferKind::CpuBuffer {
346                    bytes: staging_texture_pool.get_temporary_buffer(),
347                },
348                UploadMethod::PixelBuffer(_) => {
349                    let pbo = uploader.stage(
350                        device,
351                        texture.get_format(),
352                        BATCH_UPLOAD_TEXTURE_SIZE,
353                    ).unwrap();
354
355                    StagingBufferKind::Pbo(pbo)
356                }
357            };
358            stats.cpu_buffer_alloc_time += precise_time_ns() - cpu_buffer_alloc_start_time;
359
360            buffers.push(BatchUploadBuffer {
361                staging_buffer,
362                texture_index,
363                upload_rect: DeviceIntRect::zero()
364            });
365
366            (new_slice, DeviceIntPoint::zero())
367        }
368    };
369    let buffer = &mut buffers[slice.0 as usize];
370    let allocated_rect = DeviceIntRect::from_origin_and_size(origin, update_rect.size());
371    buffer.upload_rect = buffer.upload_rect.union(&allocated_rect);
372
373    batch_upload_copies.push(BatchUploadCopy {
374        src_texture_index: buffer.texture_index,
375        src_offset: allocated_rect.min,
376        dest_texture_id,
377        dest_offset: update_rect.min,
378        size: update_rect.size(),
379    });
380
381    unsafe {
382        let memcpy_start_time = precise_time_ns();
383        let bpp = texture.get_format().bytes_per_pixel() as usize;
384        let width_bytes = update_rect.width() as usize * bpp;
385        let src_stride = update_stride.map_or(width_bytes, |stride| {
386            assert!(stride >= 0);
387            stride as usize
388        });
389        let src_size = (update_rect.height() as usize - 1) * src_stride + width_bytes;
390        assert!(src_size <= data.len());
391
392        let src: &[mem::MaybeUninit<u8>] = std::slice::from_raw_parts(data.as_ptr() as *const _, src_size);
393        let (dst_stride, dst) = match &mut buffer.staging_buffer {
394            StagingBufferKind::Pbo(buffer) => (
395                buffer.get_stride(),
396                buffer.get_mapping(),
397            ),
398            StagingBufferKind::CpuBuffer { bytes } => (
399                BATCH_UPLOAD_TEXTURE_SIZE.width as usize * bpp,
400                &mut bytes[..],
401            )
402        };
403
404        // copy the data line-by-line in to the buffer so that we do not overwrite
405        // any other region of the buffer.
406        for y in 0..allocated_rect.height() as usize {
407            let src_start = y * src_stride;
408            let src_end = src_start + width_bytes;
409            let dst_start = (allocated_rect.min.y as usize + y as usize) * dst_stride +
410                allocated_rect.min.x as usize * bpp;
411            let dst_end = dst_start + width_bytes;
412
413            dst[dst_start..dst_end].copy_from_slice(&src[src_start..src_end])
414        }
415
416        stats.cpu_copy_time += precise_time_ns() - memcpy_start_time;
417    }
418}
419
420
421/// Copy from the staging PBOs or textures to texture cache textures using blit commands.
422///
423/// Using blits instead of draw calls is supposedly more efficient but some drivers have
424/// a very high per-command overhead so in some configurations we end up using
425/// copy_from_staging_to_cache_using_draw_calls instead.
426fn copy_from_staging_to_cache(
427    renderer: &mut Renderer,
428    batch_upload_textures: &[Texture],
429    batch_upload_copies: Vec<BatchUploadCopy>,
430) {
431    for copy in batch_upload_copies {
432        let dest_texture = &renderer.texture_resolver.texture_cache_map[&copy.dest_texture_id].texture;
433
434        renderer.device.copy_texture_sub_region(
435            &batch_upload_textures[copy.src_texture_index],
436            copy.src_offset.x as _,
437            copy.src_offset.y as _,
438            dest_texture,
439            copy.dest_offset.x as _,
440            copy.dest_offset.y as _,
441            copy.size.width as _,
442            copy.size.height as _,
443        );
444    }
445}
446
447/// Generate and submit composite shader batches to copy from
448/// the staging textures to the destination cache textures.
449///
450/// If this shows up in GPU time ptofiles we could replace it with
451/// a simpler shader (composite.glsl is already quite simple).
452fn copy_from_staging_to_cache_using_draw_calls(
453    renderer: &mut Renderer,
454    stats: &mut UploadStats,
455    batch_upload_textures: &[Texture],
456    batch_upload_copies: Vec<BatchUploadCopy>,
457) {
458    let mut dummy_stats = RendererStats {
459        total_draw_calls: 0,
460        alpha_target_count: 0,
461        color_target_count: 0,
462        texture_upload_mb: 0.0,
463        resource_upload_time: 0.0,
464        gpu_cache_upload_time: 0.0,
465        gecko_display_list_time: 0.0,
466        wr_display_list_time: 0.0,
467        scene_build_time: 0.0,
468        frame_build_time: 0.0,
469        full_display_list: false,
470        full_paint: false,
471    };
472
473    let mut copy_instances = Vec::new();
474    let mut prev_src = None;
475    let mut prev_dst = None;
476
477    for copy in batch_upload_copies {
478
479        let src_changed = prev_src != Some(copy.src_texture_index);
480        let dst_changed = prev_dst != Some(copy.dest_texture_id);
481
482        if (src_changed || dst_changed) && !copy_instances.is_empty() {
483
484            renderer.draw_instanced_batch(
485                &copy_instances,
486                VertexArrayKind::Composite,
487                // We bind the staging texture manually because it isn't known
488                // to the texture resolver.
489                &BatchTextures::empty(),
490                &mut dummy_stats,
491            );
492
493            stats.num_draw_calls += 1;
494            copy_instances.clear();
495        }
496
497        if dst_changed {
498            let dest_texture = &renderer.texture_resolver.texture_cache_map[&copy.dest_texture_id].texture;
499            let target_size = dest_texture.get_dimensions();
500
501            let draw_target = DrawTarget::from_texture(
502                dest_texture,
503                false,
504            );
505            renderer.device.bind_draw_target(draw_target);
506
507            let projection = Transform3D::ortho(
508                0.0,
509                target_size.width as f32,
510                0.0,
511                target_size.height as f32,
512                renderer.device.ortho_near_plane(),
513                renderer.device.ortho_far_plane(),
514            );
515
516            renderer.shaders
517                .borrow_mut()
518                .get_composite_shader(
519                    CompositeSurfaceFormat::Rgba,
520                    ImageBufferKind::Texture2D,
521                    CompositeFeatures::empty(),
522                ).bind(
523                    &mut renderer.device,
524                    &projection,
525                    None,
526                    &mut renderer.renderer_errors
527                );
528
529            prev_dst = Some(copy.dest_texture_id);
530        }
531
532        if src_changed {
533            renderer.device.bind_texture(
534                TextureSampler::Color0,
535                &batch_upload_textures[copy.src_texture_index],
536                Swizzle::default(),
537            );
538
539            prev_src = Some(copy.src_texture_index)
540        }
541
542        let dest_rect = DeviceRect::from_origin_and_size(
543            copy.dest_offset.to_f32(),
544            copy.size.to_f32(),
545        );
546
547        let src_rect = TexelRect::new(
548            copy.src_offset.x as f32,
549            copy.src_offset.y as f32,
550            (copy.src_offset.x + copy.size.width) as f32,
551            (copy.src_offset.y + copy.size.height) as f32,
552        );
553
554        copy_instances.push(CompositeInstance::new_rgb(
555            dest_rect.cast_unit(),
556            dest_rect,
557            PremultipliedColorF::WHITE,
558            ZBufferId(0),
559            src_rect,
560            CompositorTransform::identity(),
561        ));
562    }
563
564    if !copy_instances.is_empty() {
565        renderer.draw_instanced_batch(
566            &copy_instances,
567            VertexArrayKind::Composite,
568            // We bind the staging texture manually because it isn't known
569            // to the texture resolver.
570            &BatchTextures::empty(),
571            &mut dummy_stats,
572        );
573
574        stats.num_draw_calls += 1;
575    }
576}
577
578/// A very basic pool to avoid reallocating staging textures as well as staging
579/// CPU side buffers.
580pub struct UploadTexturePool {
581    /// The textures in the pool associated with a last used frame index.
582    ///
583    /// The outer array corresponds to each of teh three supported texture formats.
584    textures: [VecDeque<(Texture, u64)>; 3],
585    // Frame at which to deallocate some textures if there are too many in the pool,
586    // for each format.
587    delay_texture_deallocation: [u64; 3],
588    current_frame: u64,
589
590    /// Temporary buffers that are used when using staging uploads + glTexImage2D.
591    ///
592    /// Temporary buffers aren't used asynchronously so they can be reused every frame.
593    /// To keep things simple we always allocate enough memory for formats with four bytes
594    /// per pixel (more than we need for alpha-only textures but it works just as well).
595    temporary_buffers: Vec<Vec<mem::MaybeUninit<u8>>>,
596    used_temporary_buffers: usize,
597    delay_buffer_deallocation: u64,
598}
599
600impl UploadTexturePool {
601    pub fn new() -> Self {
602        UploadTexturePool {
603            textures: [VecDeque::new(), VecDeque::new(), VecDeque::new()],
604            delay_texture_deallocation: [0; 3],
605            current_frame: 0,
606            temporary_buffers: Vec::new(),
607            used_temporary_buffers: 0,
608            delay_buffer_deallocation: 0,
609        }
610    }
611
612    fn format_index(&self, format: ImageFormat) -> usize {
613        match format {
614            ImageFormat::RGBA8 => 0,
615            ImageFormat::BGRA8 => 1,
616            ImageFormat::R8 => 2,
617            _ => { panic!("unexpected format"); }
618        }
619    }
620
621    pub fn begin_frame(&mut self) {
622        self.current_frame += 1;
623    }
624
625    /// Create or reuse a staging texture.
626    ///
627    /// See also return_texture.
628    pub fn get_texture(&mut self, device: &mut Device, format: ImageFormat) -> Texture {
629
630        // First try to reuse a texture from the pool.
631        // "available" here means hasn't been used for 2 frames to avoid stalls.
632        // No need to scan the vector. Newer textures are always pushed at the back
633        // of the vector so we know the first element is the least recently used.
634        let format_idx = self.format_index(format);
635        let can_reuse = self.textures[format_idx].get(0)
636            .map(|tex| self.current_frame - tex.1 > 2)
637            .unwrap_or(false);
638
639        if can_reuse {
640            return self.textures[format_idx].pop_front().unwrap().0;
641        }
642
643        // If we couldn't find an available texture, create a new one.
644
645        device.create_texture(
646            ImageBufferKind::Texture2D,
647            format,
648            BATCH_UPLOAD_TEXTURE_SIZE.width,
649            BATCH_UPLOAD_TEXTURE_SIZE.height,
650            TextureFilter::Nearest,
651            // Currently we need render target support as we always use glBlitFramebuffer
652            // to copy the texture data. Instead, we should use glCopyImageSubData on some
653            // platforms, and avoid creating the FBOs in that case.
654            Some(RenderTargetInfo { has_depth: false }),
655        )
656    }
657
658    /// Hand the staging texture back to the pool after being done with uploads.
659    ///
660    /// The texture must have been obtained from this pool via get_texture.
661    pub fn return_texture(&mut self, texture: Texture) {
662        let format_idx = self.format_index(texture.get_format());
663        self.textures[format_idx].push_back((texture, self.current_frame));
664    }
665
666    /// Create or reuse a temporary CPU buffer.
667    ///
668    /// These buffers are used in the batched upload path when PBOs are not supported.
669    /// Content is first written to the temporary buffer and uploaded via a single
670    /// glTexSubImage2D call.
671    pub fn get_temporary_buffer(&mut self) -> Vec<mem::MaybeUninit<u8>> {
672        self.used_temporary_buffers += 1;
673        self.temporary_buffers.pop().unwrap_or_else(|| {
674            vec![mem::MaybeUninit::new(0); BATCH_UPLOAD_TEXTURE_SIZE.area() as usize * 4]
675        })
676    }
677
678    /// Return memory that was obtained from this pool via get_temporary_buffer.
679    pub fn return_temporary_buffer(&mut self, buffer: Vec<mem::MaybeUninit<u8>>) {
680        assert_eq!(buffer.len(), BATCH_UPLOAD_TEXTURE_SIZE.area() as usize * 4);
681        self.temporary_buffers.push(buffer);
682    }
683
684    /// Deallocate this pool's CPU and GPU memory.
685    pub fn delete_textures(&mut self, device: &mut Device) {
686        for format in &mut self.textures {
687            while let Some(texture) = format.pop_back() {
688                device.delete_texture(texture.0)
689            }
690        }
691        self.temporary_buffers.clear();
692    }
693
694    /// Deallocate some textures if there are too many for a long time.
695    pub fn end_frame(&mut self, device: &mut Device) {
696        for format_idx in 0..self.textures.len() {
697            // Count the number of reusable staging textures.
698            // if it stays high for a large number of frames, truncate it back to 8-ish
699            // over multiple frames.
700
701            let mut num_reusable_textures = 0;
702            for texture in &self.textures[format_idx] {
703                if self.current_frame - texture.1 > 2 {
704                    num_reusable_textures += 1;
705                }
706            }
707
708            if num_reusable_textures < 8 {
709                // Don't deallocate textures for another 120 frames.
710                self.delay_texture_deallocation[format_idx] = self.current_frame + 120;
711            }
712
713            // Deallocate up to 4 staging textures every frame.
714            let to_remove = if self.current_frame > self.delay_texture_deallocation[format_idx] {
715                num_reusable_textures.min(4)
716            } else {
717                0
718            };
719
720            for _ in 0..to_remove {
721                let texture = self.textures[format_idx].pop_front().unwrap().0;
722                device.delete_texture(texture);
723            }
724        }
725
726        // Similar logic for temporary CPU buffers.
727        let unused_buffers = self.temporary_buffers.len() - self.used_temporary_buffers;
728        if unused_buffers < 8 {
729            self.delay_buffer_deallocation = self.current_frame + 120;
730        }
731        let to_remove = if self.current_frame > self.delay_buffer_deallocation  {
732            unused_buffers.min(4)
733        } else {
734            0
735        };
736        for _ in 0..to_remove {
737            // Unlike textures it doesn't matter whether we pop from the front or back
738            // of the vector.
739            self.temporary_buffers.pop();
740        }
741        self.used_temporary_buffers = 0;
742    }
743
744    pub fn report_memory_to(&self, report: &mut MemoryReport, size_op_funs: &MallocSizeOfOps) {
745        for buf in &self.temporary_buffers {
746            report.upload_staging_memory += unsafe { (size_op_funs.size_of_op)(buf.as_ptr() as *const _) };
747        }
748
749        for format in &self.textures {
750            for texture in format {
751                report.upload_staging_textures += texture.0.size_in_bytes();
752            }
753        }
754    }
755}
756
757struct UploadStats {
758    num_draw_calls: u32,
759    upload_time: u64,
760    cpu_buffer_alloc_time: u64,
761    texture_alloc_time: u64,
762    cpu_copy_time: u64,
763    gpu_copy_commands_time: u64,
764    bytes_uploaded: usize,
765}
766
767#[derive(Debug)]
768enum StagingBufferKind<'a> {
769    Pbo(UploadStagingBuffer<'a>),
770    CpuBuffer { bytes: Vec<mem::MaybeUninit<u8>> }
771}
772#[derive(Debug)]
773struct BatchUploadBuffer<'a> {
774    staging_buffer: StagingBufferKind<'a>,
775    texture_index: usize,
776    // A rectangle containing all items going into this staging texture, so
777    // that we can avoid uploading the entire area if we are using glTexSubImage2d.
778    upload_rect: DeviceIntRect,
779}
780
781// On some devices performing many small texture uploads is slow, so instead we batch
782// updates in to a small number of uploads to temporary textures, then copy from those
783// textures to the correct place in the texture cache.
784// A list of temporary textures that batches of updates are uploaded to.
785#[derive(Debug)]
786struct BatchUploadCopy {
787    // Index within batch_upload_textures
788    src_texture_index: usize,
789    src_offset: DeviceIntPoint,
790    dest_texture_id: CacheTextureId,
791    dest_offset: DeviceIntPoint,
792    size: DeviceIntSize,
793}