webrender/renderer/
upload.rs

1/* This Source Code Form is subject to the terms of the Mozilla Public
2 * License, v. 2.0. If a copy of the MPL was not distributed with this
3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
4
5//! This module contains the convoluted logic that goes into uploading content into
6//! the texture cache's textures.
7//!
8//! We need to support various combinations of code paths depending on the quirks of
9//! each hardware/driver configuration:
10//! - direct upload,
11//! - staged upload via a pixel buffer object,
12//! - staged upload via a direct upload to a staging texture where PBO's aren't supported,
13//! - copy from the staging to destination textures, either via blits or batched draw calls.
14//!
15//! Conceptually a lot of this logic should probably be in the device module, but some code
16//! here relies on submitting draw calls via the renderer.
17
18
19use std::mem;
20use std::collections::VecDeque;
21use std::sync::Arc;
22use std::time::Duration;
23use euclid::{Transform3D, point2};
24use malloc_size_of::MallocSizeOfOps;
25use api::units::*;
26use api::{ExternalImageSource, ImageBufferKind, ImageFormat};
27use crate::renderer::{
28    Renderer, VertexArrayKind, RendererStats, TextureSampler, TEXTURE_CACHE_DBG_CLEAR_COLOR
29};
30use crate::internal_types::{
31    FastHashMap, TextureUpdateSource, Swizzle, TextureCacheUpdate,
32    CacheTextureId, RenderTargetInfo,
33};
34use crate::device::{
35    Device, UploadMethod, Texture, DrawTarget, UploadStagingBuffer, TextureFlags, TextureUploader,
36    TextureFilter,
37};
38use crate::gpu_types::CopyInstance;
39use crate::batch::BatchTextures;
40use crate::texture_pack::{GuillotineAllocator, FreeRectSlice};
41use crate::profiler;
42use crate::render_api::MemoryReport;
43
44pub const BATCH_UPLOAD_TEXTURE_SIZE: DeviceIntSize = DeviceIntSize::new(512, 512);
45const BATCH_UPLOAD_FORMAT_COUNT: usize = 4;
46
47/// Upload a number of items to texture cache textures.
48///
49/// This is the main entry point of the texture cache upload code.
50/// See also the module documentation for more information.
51pub fn upload_to_texture_cache(
52    renderer: &mut Renderer,
53    update_list: FastHashMap<CacheTextureId, Vec<TextureCacheUpdate>>,
54) {
55
56    let mut stats = UploadStats {
57        num_draw_calls: 0,
58        upload_time: 0,
59        cpu_buffer_alloc_time: 0,
60        texture_alloc_time: 0,
61        cpu_copy_time: 0,
62        gpu_copy_commands_time: 0,
63        bytes_uploaded: 0,
64        items_uploaded: 0,
65    };
66
67    let upload_total_start = zeitstempel::now();
68
69    let mut batch_upload_textures = Vec::new();
70
71    // A list of copies that must be performed from the temporary textures to the texture cache.
72    let mut batch_upload_copies = Vec::new();
73
74    // For each texture format, this stores a list of staging buffers
75    // and a texture allocator for packing the buffers.
76    let mut batch_upload_buffers = FastHashMap::default();
77
78    // For best performance we use a single TextureUploader for all uploads.
79    // This allows us to fill PBOs more efficiently and therefore allocate fewer PBOs.
80    let mut uploader = renderer.device.upload_texture(
81        &mut renderer.texture_upload_pbo_pool,
82    );
83
84    let num_updates = update_list.len();
85
86    for (texture_id, updates) in update_list {
87        let texture = &renderer.texture_resolver.texture_cache_map[&texture_id].texture;
88        for update in updates {
89            let TextureCacheUpdate { rect, stride, offset, format_override, source } = update;
90            let mut arc_data = None;
91            let dummy_data;
92            let data = match source {
93                TextureUpdateSource::Bytes { ref data } => {
94                    arc_data = Some(data.clone());
95                    &data[offset as usize ..]
96                }
97                TextureUpdateSource::External { id, channel_index } => {
98                    let handler = renderer.external_image_handler
99                        .as_mut()
100                        .expect("Found external image, but no handler set!");
101                    // The filter is only relevant for NativeTexture external images.
102                    match handler.lock(id, channel_index, false).source {
103                        ExternalImageSource::RawData(data) => {
104                            &data[offset as usize ..]
105                        }
106                        ExternalImageSource::Invalid => {
107                            // Create a local buffer to fill the pbo.
108                            let bpp = texture.get_format().bytes_per_pixel();
109                            let width = stride.unwrap_or(rect.width() * bpp);
110                            let total_size = width * rect.height();
111                            // WR haven't support RGBAF32 format in texture_cache, so
112                            // we use u8 type here.
113                            dummy_data = vec![0xFFu8; total_size as usize];
114                            &dummy_data
115                        }
116                        ExternalImageSource::NativeTexture(eid) => {
117                            panic!("Unexpected external texture {:?} for the texture cache update of {:?}", eid, id);
118                        }
119                    }
120                }
121                TextureUpdateSource::DebugClear => {
122                    let draw_target = DrawTarget::from_texture(
123                        texture,
124                        false,
125                    );
126                    renderer.device.bind_draw_target(draw_target);
127                    renderer.device.clear_target(
128                        Some(TEXTURE_CACHE_DBG_CLEAR_COLOR),
129                        None,
130                        Some(draw_target.to_framebuffer_rect(update.rect.to_i32()))
131                    );
132
133                    continue;
134                }
135            };
136
137            stats.items_uploaded += 1;
138
139            let use_batch_upload = renderer.device.use_batched_texture_uploads() &&
140                texture.flags().contains(TextureFlags::IS_SHARED_TEXTURE_CACHE) &&
141                rect.width() <= BATCH_UPLOAD_TEXTURE_SIZE.width &&
142                rect.height() <= BATCH_UPLOAD_TEXTURE_SIZE.height &&
143                rect.area() < renderer.device.batched_upload_threshold();
144
145            if use_batch_upload
146                && arc_data.is_some()
147                && matches!(renderer.device.upload_method(), &UploadMethod::Immediate)
148                && rect.area() > BATCH_UPLOAD_TEXTURE_SIZE.area() / 2 {
149                skip_staging_buffer(
150                    &mut renderer.device,
151                    &mut renderer.staging_texture_pool,
152                    rect,
153                    stride,
154                    arc_data.unwrap(),
155                    texture_id,
156                    texture,
157                    &mut batch_upload_buffers,
158                    &mut batch_upload_textures,
159                    &mut batch_upload_copies,
160                    &mut stats,
161                );
162            } else if use_batch_upload {
163                copy_into_staging_buffer(
164                    &mut renderer.device,
165                    &mut uploader,
166                    &mut renderer.staging_texture_pool,
167                    rect,
168                    stride,
169                    data,
170                    texture_id,
171                    texture,
172                    &mut batch_upload_buffers,
173                    &mut batch_upload_textures,
174                    &mut batch_upload_copies,
175                    &mut stats,
176                );
177            } else {
178                let upload_start_time = zeitstempel::now();
179
180                stats.bytes_uploaded += uploader.upload(
181                    &mut renderer.device,
182                    texture,
183                    rect,
184                    stride,
185                    format_override,
186                    data.as_ptr(),
187                    data.len()
188                );
189
190                stats.upload_time += zeitstempel::now() - upload_start_time;
191            }
192
193            if let TextureUpdateSource::External { id, channel_index } = source {
194                let handler = renderer.external_image_handler
195                    .as_mut()
196                    .expect("Found external image, but no handler set!");
197                handler.unlock(id, channel_index);
198            }
199        }
200    }
201
202    let upload_start_time = zeitstempel::now();
203    // Upload batched texture updates to their temporary textures.
204    for batch_buffer in batch_upload_buffers.into_iter().map(|(_, (_, buffers))| buffers).flatten() {
205        let texture = &batch_upload_textures[batch_buffer.texture_index];
206        match batch_buffer.staging_buffer {
207            StagingBufferKind::Pbo(pbo) => {
208                stats.bytes_uploaded += uploader.upload_staged(
209                    &mut renderer.device,
210                    texture,
211                    DeviceIntRect::from_size(texture.get_dimensions()),
212                    None,
213                    pbo,
214                );
215            }
216            StagingBufferKind::CpuBuffer { bytes, .. } => {
217                let bpp = texture.get_format().bytes_per_pixel();
218                stats.bytes_uploaded += uploader.upload(
219                    &mut renderer.device,
220                    texture,
221                    batch_buffer.upload_rect,
222                    Some(BATCH_UPLOAD_TEXTURE_SIZE.width * bpp),
223                    None,
224                    bytes.as_ptr(),
225                    bytes.len()
226                );
227                renderer.staging_texture_pool.return_temporary_buffer(bytes);
228            }
229            StagingBufferKind::Image { bytes, stride } => {
230                stats.bytes_uploaded += uploader.upload(
231                    &mut renderer.device,
232                    texture,
233                    batch_buffer.upload_rect,
234                    stride,
235                    None,
236                    bytes.as_ptr(),
237                    bytes.len()
238                );
239            }
240        }
241    }
242    stats.upload_time += zeitstempel::now() - upload_start_time;
243
244
245    // Flush all uploads, batched or otherwise.
246    let flush_start_time = zeitstempel::now();
247    uploader.flush(&mut renderer.device);
248    stats.upload_time += zeitstempel::now() - flush_start_time;
249
250    if !batch_upload_copies.is_empty() {
251        // Copy updates that were batch uploaded to their correct destination in the texture cache.
252        // Sort them by destination and source to minimize framebuffer binding changes.
253        batch_upload_copies.sort_unstable_by_key(|b| (b.dest_texture_id.0, b.src_texture_index));
254
255        let gpu_copy_start = zeitstempel::now();
256
257        if renderer.device.use_draw_calls_for_texture_copy() {
258            // Some drivers have a very high CPU overhead when submitting hundreds of small blit
259            // commands (low end intel drivers on Windows for example can take take 100+ ms submitting a
260            // few hundred blits). In this case we do the copy with batched draw calls.
261            copy_from_staging_to_cache_using_draw_calls(
262                renderer,
263                &mut stats,
264                &batch_upload_textures,
265                batch_upload_copies,
266            );
267        } else {
268            copy_from_staging_to_cache(
269                renderer,
270                &batch_upload_textures,
271                batch_upload_copies,
272            );
273        }
274
275        stats.gpu_copy_commands_time += zeitstempel::now() - gpu_copy_start;
276    }
277
278    for texture in batch_upload_textures.drain(..) {
279        renderer.staging_texture_pool.return_texture(texture);
280    }
281
282    // Update the profile counters. We use add instead of set because
283    // this function can be called several times per frame.
284    // We don't update the counters when their value is zero, so that
285    // the profiler can treat them as events and we can get notified
286    // when they happen.
287
288    let upload_total = zeitstempel::now() - upload_total_start;
289    renderer.profile.add(
290        profiler::TOTAL_UPLOAD_TIME,
291        profiler::ns_to_ms(upload_total)
292    );
293
294    if num_updates > 0 {
295        renderer.profile.add(profiler::TEXTURE_UPLOADS, num_updates);
296    }
297
298    if stats.bytes_uploaded > 0 {
299        renderer.profile.add(
300            profiler::TEXTURE_UPLOADS_MEM,
301            profiler::bytes_to_mb(stats.bytes_uploaded)
302        );
303    }
304
305    if stats.cpu_copy_time > 0 {
306        renderer.profile.add(
307            profiler::UPLOAD_CPU_COPY_TIME,
308            profiler::ns_to_ms(stats.cpu_copy_time)
309        );
310    }
311    if stats.upload_time > 0 {
312        renderer.profile.add(
313            profiler::UPLOAD_TIME,
314            profiler::ns_to_ms(stats.upload_time)
315        );
316    }
317    if stats.texture_alloc_time > 0 {
318        renderer.profile.add(
319            profiler::STAGING_TEXTURE_ALLOCATION_TIME,
320            profiler::ns_to_ms(stats.texture_alloc_time)
321        );
322    }
323    if stats.cpu_buffer_alloc_time > 0 {
324        renderer.profile.add(
325            profiler::CPU_TEXTURE_ALLOCATION_TIME,
326            profiler::ns_to_ms(stats.cpu_buffer_alloc_time)
327        );
328    }
329    if stats.num_draw_calls > 0{
330        renderer.profile.add(
331            profiler::UPLOAD_NUM_COPY_BATCHES,
332            stats.num_draw_calls
333        );
334    }
335
336    if stats.gpu_copy_commands_time > 0 {
337        renderer.profile.add(
338            profiler::UPLOAD_GPU_COPY_TIME,
339            profiler::ns_to_ms(stats.gpu_copy_commands_time)
340        );
341    }
342
343    let add_markers = profiler::thread_is_being_profiled();
344    if add_markers && stats.bytes_uploaded > 0 {
345    	let details = format!("{} bytes uploaded, {} items", stats.bytes_uploaded, stats.items_uploaded);
346    	profiler::add_text_marker(&"Texture uploads", &details, Duration::from_nanos(upload_total));
347    }
348}
349
350/// Copy an item into a batched upload staging buffer.
351fn copy_into_staging_buffer<'a>(
352    device: &mut Device,
353    uploader: &mut TextureUploader< 'a>,
354    staging_texture_pool: &mut UploadTexturePool,
355    update_rect: DeviceIntRect,
356    update_stride: Option<i32>,
357    data: &[u8],
358    dest_texture_id: CacheTextureId,
359    texture: &Texture,
360    batch_upload_buffers: &mut FastHashMap<ImageFormat, (GuillotineAllocator, Vec<BatchUploadBuffer<'a>>)>,
361    batch_upload_textures: &mut Vec<Texture>,
362    batch_upload_copies: &mut Vec<BatchUploadCopy>,
363    stats: &mut UploadStats
364) {
365    let (allocator, buffers) = batch_upload_buffers.entry(texture.get_format())
366        .or_insert_with(|| (GuillotineAllocator::new(None), Vec::new()));
367
368    // Allocate a region within the staging buffer for this update. If there is
369    // no room in an existing buffer then allocate another texture and buffer.
370    let (slice, origin) = match allocator.allocate(&update_rect.size()) {
371        Some((slice, origin)) => (slice, origin),
372        None => {
373            let new_slice = FreeRectSlice(buffers.len() as u32);
374            allocator.extend(new_slice, BATCH_UPLOAD_TEXTURE_SIZE, update_rect.size());
375
376            let texture_alloc_time_start = zeitstempel::now();
377            let staging_texture = staging_texture_pool.get_texture(device, texture.get_format());
378            stats.texture_alloc_time = zeitstempel::now() - texture_alloc_time_start;
379
380            let texture_index = batch_upload_textures.len();
381            batch_upload_textures.push(staging_texture);
382
383            let cpu_buffer_alloc_start_time = zeitstempel::now();
384            let staging_buffer = match device.upload_method() {
385                UploadMethod::Immediate => StagingBufferKind::CpuBuffer {
386                    bytes: staging_texture_pool.get_temporary_buffer(),
387                },
388                UploadMethod::PixelBuffer(_) => {
389                    let pbo = uploader.stage(
390                        device,
391                        texture.get_format(),
392                        BATCH_UPLOAD_TEXTURE_SIZE,
393                    ).unwrap();
394
395                    StagingBufferKind::Pbo(pbo)
396                }
397            };
398            stats.cpu_buffer_alloc_time += zeitstempel::now() - cpu_buffer_alloc_start_time;
399
400            buffers.push(BatchUploadBuffer {
401                staging_buffer,
402                texture_index,
403                upload_rect: DeviceIntRect::zero()
404            });
405
406            (new_slice, DeviceIntPoint::zero())
407        }
408    };
409    let buffer = &mut buffers[slice.0 as usize];
410    let allocated_rect = DeviceIntRect::from_origin_and_size(origin, update_rect.size());
411    buffer.upload_rect = buffer.upload_rect.union(&allocated_rect);
412
413    batch_upload_copies.push(BatchUploadCopy {
414        src_texture_index: buffer.texture_index,
415        src_offset: allocated_rect.min,
416        dest_texture_id,
417        dest_offset: update_rect.min,
418        size: update_rect.size(),
419    });
420
421    unsafe {
422        let memcpy_start_time = zeitstempel::now();
423        let bpp = texture.get_format().bytes_per_pixel() as usize;
424        let width_bytes = update_rect.width() as usize * bpp;
425        let src_stride = update_stride.map_or(width_bytes, |stride| {
426            assert!(stride >= 0);
427            stride as usize
428        });
429        let src_size = (update_rect.height() as usize - 1) * src_stride + width_bytes;
430        assert!(src_size <= data.len());
431
432        let src: &[mem::MaybeUninit<u8>] = std::slice::from_raw_parts(data.as_ptr() as *const _, src_size);
433        let (dst_stride, dst) = match &mut buffer.staging_buffer {
434            StagingBufferKind::Pbo(buffer) => (
435                buffer.get_stride(),
436                buffer.get_mapping(),
437            ),
438            StagingBufferKind::CpuBuffer { bytes } => (
439                BATCH_UPLOAD_TEXTURE_SIZE.width as usize * bpp,
440                &mut bytes[..],
441            ),
442            StagingBufferKind::Image { .. } => unreachable!(),
443        };
444
445        // copy the data line-by-line in to the buffer so that we do not overwrite
446        // any other region of the buffer.
447        for y in 0..allocated_rect.height() as usize {
448            let src_start = y * src_stride;
449            let src_end = src_start + width_bytes;
450            let dst_start = (allocated_rect.min.y as usize + y as usize) * dst_stride +
451                allocated_rect.min.x as usize * bpp;
452            let dst_end = dst_start + width_bytes;
453
454            dst[dst_start..dst_end].copy_from_slice(&src[src_start..src_end])
455        }
456
457        stats.cpu_copy_time += zeitstempel::now() - memcpy_start_time;
458    }
459}
460
461/// Take this code path instead of copying into a staging CPU buffer when the image
462/// we would copy is large enough that it's unlikely anything else would fit in the
463/// buffer, therefore we might as well copy directly from the source image's pixels.
464fn skip_staging_buffer<'a>(
465    device: &mut Device,
466    staging_texture_pool: &mut UploadTexturePool,
467    update_rect: DeviceIntRect,
468    stride: Option<i32>,
469    data: Arc<Vec<u8>>,
470    dest_texture_id: CacheTextureId,
471    texture: &Texture,
472    batch_upload_buffers: &mut FastHashMap<ImageFormat, (GuillotineAllocator, Vec<BatchUploadBuffer<'a>>)>,
473    batch_upload_textures: &mut Vec<Texture>,
474    batch_upload_copies: &mut Vec<BatchUploadCopy>,
475    stats: &mut UploadStats
476) {
477    let (_, buffers) = batch_upload_buffers.entry(texture.get_format())
478        .or_insert_with(|| (GuillotineAllocator::new(None), Vec::new()));
479
480    let texture_alloc_time_start = zeitstempel::now();
481    let staging_texture = staging_texture_pool.get_texture(device, texture.get_format());
482    stats.texture_alloc_time = zeitstempel::now() - texture_alloc_time_start;
483
484    let texture_index = batch_upload_textures.len();
485    batch_upload_textures.push(staging_texture);
486
487    buffers.push(BatchUploadBuffer {
488        staging_buffer: StagingBufferKind::Image { bytes: data, stride },
489        texture_index,
490        upload_rect: DeviceIntRect::from_size(update_rect.size())
491    });
492
493    batch_upload_copies.push(BatchUploadCopy {
494        src_texture_index: texture_index,
495        src_offset: point2(0, 0),
496        dest_texture_id,
497        dest_offset: update_rect.min,
498        size: update_rect.size(),
499    });
500}
501
502
503/// Copy from the staging PBOs or textures to texture cache textures using blit commands.
504///
505/// Using blits instead of draw calls is supposedly more efficient but some drivers have
506/// a very high per-command overhead so in some configurations we end up using
507/// copy_from_staging_to_cache_using_draw_calls instead.
508fn copy_from_staging_to_cache(
509    renderer: &mut Renderer,
510    batch_upload_textures: &[Texture],
511    batch_upload_copies: Vec<BatchUploadCopy>,
512) {
513    for copy in batch_upload_copies {
514        let dest_texture = &renderer.texture_resolver.texture_cache_map[&copy.dest_texture_id].texture;
515
516        renderer.device.copy_texture_sub_region(
517            &batch_upload_textures[copy.src_texture_index],
518            copy.src_offset.x as _,
519            copy.src_offset.y as _,
520            dest_texture,
521            copy.dest_offset.x as _,
522            copy.dest_offset.y as _,
523            copy.size.width as _,
524            copy.size.height as _,
525        );
526    }
527}
528
529/// Generate and submit composite shader batches to copy from
530/// the staging textures to the destination cache textures.
531///
532/// If this shows up in GPU time ptofiles we could replace it with
533/// a simpler shader (composite.glsl is already quite simple).
534fn copy_from_staging_to_cache_using_draw_calls(
535    renderer: &mut Renderer,
536    stats: &mut UploadStats,
537    batch_upload_textures: &[Texture],
538    batch_upload_copies: Vec<BatchUploadCopy>,
539) {
540    let mut copy_instances = Vec::new();
541    let mut prev_src = None;
542    let mut prev_dst = None;
543    let mut dst_texture_size = DeviceSize::new(0.0, 0.0);
544
545    for copy in batch_upload_copies {
546
547        let src_changed = prev_src != Some(copy.src_texture_index);
548        let dst_changed = prev_dst != Some(copy.dest_texture_id);
549
550        if (src_changed || dst_changed) && !copy_instances.is_empty() {
551            renderer.draw_instanced_batch(
552                &copy_instances,
553                VertexArrayKind::Copy,
554                // We bind the staging texture manually because it isn't known
555                // to the texture resolver.
556                &BatchTextures::empty(),
557                &mut RendererStats::default(),
558            );
559
560            stats.num_draw_calls += 1;
561            copy_instances.clear();
562        }
563
564        if dst_changed {
565            let dest_texture = &renderer.texture_resolver.texture_cache_map[&copy.dest_texture_id].texture;
566            dst_texture_size = dest_texture.get_dimensions().to_f32();
567
568            let draw_target = DrawTarget::from_texture(dest_texture, false);
569            renderer.device.bind_draw_target(draw_target);
570
571            renderer.shaders
572                .borrow_mut()
573                .ps_copy()
574                .bind(
575                    &mut renderer.device,
576                    &Transform3D::identity(),
577                    None,
578                    &mut renderer.renderer_errors,
579                    &mut renderer.profile,
580                );
581
582            prev_dst = Some(copy.dest_texture_id);
583        }
584
585        if src_changed {
586            renderer.device.bind_texture(
587                TextureSampler::Color0,
588                &batch_upload_textures[copy.src_texture_index],
589                Swizzle::default(),
590            );
591
592            prev_src = Some(copy.src_texture_index)
593        }
594
595        let src_rect = DeviceRect::from_origin_and_size(
596            copy.src_offset.to_f32(),
597            copy.size.to_f32(),
598        );
599
600        let dst_rect = DeviceRect::from_origin_and_size(
601            copy.dest_offset.to_f32(),
602            copy.size.to_f32(),
603        );
604
605        copy_instances.push(CopyInstance {
606            src_rect,
607            dst_rect,
608            dst_texture_size,
609        });
610    }
611
612    if !copy_instances.is_empty() {
613        renderer.draw_instanced_batch(
614            &copy_instances,
615            VertexArrayKind::Copy,
616            &BatchTextures::empty(),
617            &mut RendererStats::default(),
618        );
619
620        stats.num_draw_calls += 1;
621    }
622}
623
624/// A very basic pool to avoid reallocating staging textures as well as staging
625/// CPU side buffers.
626pub struct UploadTexturePool {
627    /// The textures in the pool associated with a last used frame index.
628    ///
629    /// The outer array corresponds to each of teh three supported texture formats.
630    textures: [VecDeque<(Texture, u64)>; BATCH_UPLOAD_FORMAT_COUNT],
631    // Frame at which to deallocate some textures if there are too many in the pool,
632    // for each format.
633    delay_texture_deallocation: [u64; BATCH_UPLOAD_FORMAT_COUNT],
634    current_frame: u64,
635
636    /// Temporary buffers that are used when using staging uploads + glTexImage2D.
637    ///
638    /// Temporary buffers aren't used asynchronously so they can be reused every frame.
639    /// To keep things simple we always allocate enough memory for formats with four bytes
640    /// per pixel (more than we need for alpha-only textures but it works just as well).
641    temporary_buffers: Vec<Vec<mem::MaybeUninit<u8>>>,
642    min_temporary_buffers: usize,
643    delay_buffer_deallocation: u64,
644}
645
646impl UploadTexturePool {
647    pub fn new() -> Self {
648        UploadTexturePool {
649            textures: [VecDeque::new(), VecDeque::new(), VecDeque::new(), VecDeque::new()],
650            delay_texture_deallocation: [0; BATCH_UPLOAD_FORMAT_COUNT],
651            current_frame: 0,
652            temporary_buffers: Vec::new(),
653            min_temporary_buffers: 0,
654            delay_buffer_deallocation: 0,
655        }
656    }
657
658    fn format_index(&self, format: ImageFormat) -> usize {
659        match format {
660            ImageFormat::RGBA8 => 0,
661            ImageFormat::BGRA8 => 1,
662            ImageFormat::R8 => 2,
663            ImageFormat::R16 => 3,
664            _ => { panic!("unexpected format {:?}", format); }
665        }
666    }
667
668    pub fn begin_frame(&mut self) {
669        self.current_frame += 1;
670        self.min_temporary_buffers = self.temporary_buffers.len();
671    }
672
673    /// Create or reuse a staging texture.
674    ///
675    /// See also return_texture.
676    pub fn get_texture(&mut self, device: &mut Device, format: ImageFormat) -> Texture {
677
678        // First try to reuse a texture from the pool.
679        // "available" here means hasn't been used for 2 frames to avoid stalls.
680        // No need to scan the vector. Newer textures are always pushed at the back
681        // of the vector so we know the first element is the least recently used.
682        let format_idx = self.format_index(format);
683        let can_reuse = self.textures[format_idx].get(0)
684            .map(|tex| self.current_frame - tex.1 > 2)
685            .unwrap_or(false);
686
687        if can_reuse {
688            return self.textures[format_idx].pop_front().unwrap().0;
689        }
690
691        // If we couldn't find an available texture, create a new one.
692
693        device.create_texture(
694            ImageBufferKind::Texture2D,
695            format,
696            BATCH_UPLOAD_TEXTURE_SIZE.width,
697            BATCH_UPLOAD_TEXTURE_SIZE.height,
698            TextureFilter::Nearest,
699            // Currently we need render target support as we always use glBlitFramebuffer
700            // to copy the texture data. Instead, we should use glCopyImageSubData on some
701            // platforms, and avoid creating the FBOs in that case.
702            Some(RenderTargetInfo { has_depth: false }),
703        )
704    }
705
706    /// Hand the staging texture back to the pool after being done with uploads.
707    ///
708    /// The texture must have been obtained from this pool via get_texture.
709    pub fn return_texture(&mut self, texture: Texture) {
710        let format_idx = self.format_index(texture.get_format());
711        self.textures[format_idx].push_back((texture, self.current_frame));
712    }
713
714    /// Create or reuse a temporary CPU buffer.
715    ///
716    /// These buffers are used in the batched upload path when PBOs are not supported.
717    /// Content is first written to the temporary buffer and uploaded via a single
718    /// glTexSubImage2D call.
719    pub fn get_temporary_buffer(&mut self) -> Vec<mem::MaybeUninit<u8>> {
720        let buffer = self.temporary_buffers.pop().unwrap_or_else(|| {
721            vec![mem::MaybeUninit::new(0); BATCH_UPLOAD_TEXTURE_SIZE.area() as usize * 4]
722        });
723        self.min_temporary_buffers = self.min_temporary_buffers.min(self.temporary_buffers.len());
724        buffer
725    }
726
727    /// Return memory that was obtained from this pool via get_temporary_buffer.
728    pub fn return_temporary_buffer(&mut self, buffer: Vec<mem::MaybeUninit<u8>>) {
729        assert_eq!(buffer.len(), BATCH_UPLOAD_TEXTURE_SIZE.area() as usize * 4);
730        self.temporary_buffers.push(buffer);
731    }
732
733    /// Deallocate this pool's CPU and GPU memory.
734    pub fn delete_textures(&mut self, device: &mut Device) {
735        for format in &mut self.textures {
736            while let Some(texture) = format.pop_back() {
737                device.delete_texture(texture.0)
738            }
739        }
740        self.temporary_buffers.clear();
741    }
742
743    /// Deallocate some textures if there are too many for a long time.
744    pub fn end_frame(&mut self, device: &mut Device) {
745        for format_idx in 0..self.textures.len() {
746            // Count the number of reusable staging textures.
747            // if it stays high for a large number of frames, truncate it back to 8-ish
748            // over multiple frames.
749
750            let mut num_reusable_textures = 0;
751            for texture in &self.textures[format_idx] {
752                if self.current_frame - texture.1 > 2 {
753                    num_reusable_textures += 1;
754                }
755            }
756
757            if num_reusable_textures < 8 {
758                // Don't deallocate textures for another 120 frames.
759                self.delay_texture_deallocation[format_idx] = self.current_frame + 120;
760            }
761
762            // Deallocate up to 4 staging textures every frame.
763            let to_remove = if self.current_frame > self.delay_texture_deallocation[format_idx] {
764                num_reusable_textures.min(4)
765            } else {
766                0
767            };
768
769            for _ in 0..to_remove {
770                let texture = self.textures[format_idx].pop_front().unwrap().0;
771                device.delete_texture(texture);
772            }
773        }
774
775        // Similar logic for temporary CPU buffers. Our calls to get and return
776        // temporary buffers should have been balanced for this frame, but the call
777        // get_temporary_buffer will allocate a buffer if the vec is empty. Since we
778        // carry these buffers from frame to frame, we keep track of the smallest
779        // length of the temporary_buffers vec that we encountered this frame. Those
780        // buffers were not touched and we deallocate some if there are a lot of them.
781        let unused_buffers = self.min_temporary_buffers;
782        if unused_buffers < 8 {
783            self.delay_buffer_deallocation = self.current_frame + 120;
784        }
785        let to_remove = if self.current_frame > self.delay_buffer_deallocation  {
786            unused_buffers.min(4)
787        } else {
788            0
789        };
790        for _ in 0..to_remove {
791            // Unlike textures it doesn't matter whether we pop from the front or back
792            // of the vector.
793            self.temporary_buffers.pop();
794        }
795    }
796
797    pub fn report_memory_to(&self, report: &mut MemoryReport, size_op_funs: &MallocSizeOfOps) {
798        for buf in &self.temporary_buffers {
799            report.upload_staging_memory += unsafe { (size_op_funs.size_of_op)(buf.as_ptr() as *const _) };
800        }
801
802        for format in &self.textures {
803            for texture in format {
804                report.upload_staging_textures += texture.0.size_in_bytes();
805            }
806        }
807    }
808}
809
810struct UploadStats {
811    num_draw_calls: u32,
812    upload_time: u64,
813    cpu_buffer_alloc_time: u64,
814    texture_alloc_time: u64,
815    cpu_copy_time: u64,
816    gpu_copy_commands_time: u64,
817    bytes_uploaded: usize,
818    items_uploaded: usize,
819}
820
821#[derive(Debug)]
822enum StagingBufferKind<'a> {
823    Pbo(UploadStagingBuffer<'a>),
824    CpuBuffer { bytes: Vec<mem::MaybeUninit<u8>> },
825    Image { bytes: Arc<Vec<u8>>, stride: Option<i32> },
826}
827#[derive(Debug)]
828struct BatchUploadBuffer<'a> {
829    staging_buffer: StagingBufferKind<'a>,
830    texture_index: usize,
831    // A rectangle containing all items going into this staging texture, so
832    // that we can avoid uploading the entire area if we are using glTexSubImage2d.
833    upload_rect: DeviceIntRect,
834}
835
836// On some devices performing many small texture uploads is slow, so instead we batch
837// updates in to a small number of uploads to temporary textures, then copy from those
838// textures to the correct place in the texture cache.
839// A list of temporary textures that batches of updates are uploaded to.
840#[derive(Debug)]
841struct BatchUploadCopy {
842    // Index within batch_upload_textures
843    src_texture_index: usize,
844    src_offset: DeviceIntPoint,
845    dest_texture_id: CacheTextureId,
846    dest_offset: DeviceIntPoint,
847    size: DeviceIntSize,
848}