webrender/renderer/
upload.rs

1/* This Source Code Form is subject to the terms of the Mozilla Public
2 * License, v. 2.0. If a copy of the MPL was not distributed with this
3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
4
5//! This module contains the convoluted logic that goes into uploading content into
6//! the texture cache's textures.
7//!
8//! We need to support various combinations of code paths depending on the quirks of
9//! each hardware/driver configuration:
10//! - direct upload,
11//! - staged upload via a pixel buffer object,
12//! - staged upload via a direct upload to a staging texture where PBO's aren't supported,
13//! - copy from the staging to destination textures, either via blits or batched draw calls.
14//!
15//! Conceptually a lot of this logic should probably be in the device module, but some code
16//! here relies on submitting draw calls via the renderer.
17
18
19use api::precise_time_ns;
20use std::mem;
21use std::collections::VecDeque;
22use std::sync::Arc;
23use std::time::Duration;
24use euclid::{Transform3D, point2};
25use malloc_size_of::MallocSizeOfOps;
26use api::units::*;
27use api::{ExternalImageSource, ImageBufferKind, ImageFormat};
28use crate::renderer::{
29    Renderer, VertexArrayKind, RendererStats, TextureSampler, TEXTURE_CACHE_DBG_CLEAR_COLOR
30};
31use crate::internal_types::{
32    FastHashMap, TextureUpdateSource, Swizzle, TextureCacheUpdate,
33    CacheTextureId, RenderTargetInfo,
34};
35use crate::device::{
36    Device, UploadMethod, Texture, DrawTarget, UploadStagingBuffer, TextureFlags, TextureUploader,
37    TextureFilter,
38};
39use crate::gpu_types::CopyInstance;
40use crate::batch::BatchTextures;
41use crate::texture_pack::{GuillotineAllocator, FreeRectSlice};
42use crate::profiler;
43use crate::render_api::MemoryReport;
44
45pub const BATCH_UPLOAD_TEXTURE_SIZE: DeviceIntSize = DeviceIntSize::new(512, 512);
46const BATCH_UPLOAD_FORMAT_COUNT: usize = 4;
47
48/// Upload a number of items to texture cache textures.
49///
50/// This is the main entry point of the texture cache upload code.
51/// See also the module documentation for more information.
52pub fn upload_to_texture_cache(
53    renderer: &mut Renderer,
54    update_list: FastHashMap<CacheTextureId, Vec<TextureCacheUpdate>>,
55) {
56
57    let mut stats = UploadStats {
58        num_draw_calls: 0,
59        upload_time: 0,
60        cpu_buffer_alloc_time: 0,
61        texture_alloc_time: 0,
62        cpu_copy_time: 0,
63        gpu_copy_commands_time: 0,
64        bytes_uploaded: 0,
65        items_uploaded: 0,
66    };
67
68    let upload_total_start = precise_time_ns();
69
70    let mut batch_upload_textures = Vec::new();
71
72    // A list of copies that must be performed from the temporary textures to the texture cache.
73    let mut batch_upload_copies = Vec::new();
74
75    // For each texture format, this stores a list of staging buffers
76    // and a texture allocator for packing the buffers.
77    let mut batch_upload_buffers = FastHashMap::default();
78
79    // For best performance we use a single TextureUploader for all uploads.
80    // This allows us to fill PBOs more efficiently and therefore allocate fewer PBOs.
81    let mut uploader = renderer.device.upload_texture(
82        &mut renderer.texture_upload_pbo_pool,
83    );
84
85    let num_updates = update_list.len();
86
87    for (texture_id, updates) in update_list {
88        let texture = &renderer.texture_resolver.texture_cache_map[&texture_id].texture;
89        for update in updates {
90            let TextureCacheUpdate { rect, stride, offset, format_override, source } = update;
91            let mut arc_data = None; 
92            let dummy_data;
93            let data = match source {
94                TextureUpdateSource::Bytes { ref data } => {
95                    arc_data = Some(data.clone());
96                    &data[offset as usize ..]
97                }
98                TextureUpdateSource::External { id, channel_index } => {
99                    let handler = renderer.external_image_handler
100                        .as_mut()
101                        .expect("Found external image, but no handler set!");
102                    // The filter is only relevant for NativeTexture external images.
103                    match handler.lock(id, channel_index).source {
104                        ExternalImageSource::RawData(data) => {
105                            &data[offset as usize ..]
106                        }
107                        ExternalImageSource::Invalid => {
108                            // Create a local buffer to fill the pbo.
109                            let bpp = texture.get_format().bytes_per_pixel();
110                            let width = stride.unwrap_or(rect.width() * bpp);
111                            let total_size = width * rect.height();
112                            // WR haven't support RGBAF32 format in texture_cache, so
113                            // we use u8 type here.
114                            dummy_data = vec![0xFFu8; total_size as usize];
115                            &dummy_data
116                        }
117                        ExternalImageSource::NativeTexture(eid) => {
118                            panic!("Unexpected external texture {:?} for the texture cache update of {:?}", eid, id);
119                        }
120                    }
121                }
122                TextureUpdateSource::DebugClear => {
123                    let draw_target = DrawTarget::from_texture(
124                        texture,
125                        false,
126                    );
127                    renderer.device.bind_draw_target(draw_target);
128                    renderer.device.clear_target(
129                        Some(TEXTURE_CACHE_DBG_CLEAR_COLOR),
130                        None,
131                        Some(draw_target.to_framebuffer_rect(update.rect.to_i32()))
132                    );
133
134                    continue;
135                }
136            };
137
138            stats.items_uploaded += 1;
139
140            let use_batch_upload = renderer.device.use_batched_texture_uploads() &&
141                texture.flags().contains(TextureFlags::IS_SHARED_TEXTURE_CACHE) &&
142                rect.width() <= BATCH_UPLOAD_TEXTURE_SIZE.width &&
143                rect.height() <= BATCH_UPLOAD_TEXTURE_SIZE.height &&
144                rect.area() < renderer.device.batched_upload_threshold();
145
146            if use_batch_upload
147                && arc_data.is_some()
148                && matches!(renderer.device.upload_method(), &UploadMethod::Immediate)
149                && rect.area() > BATCH_UPLOAD_TEXTURE_SIZE.area() / 2 {
150                skip_staging_buffer(
151                    &mut renderer.device,
152                    &mut renderer.staging_texture_pool,
153                    rect,
154                    stride,
155                    arc_data.unwrap(),
156                    texture_id,
157                    texture,
158                    &mut batch_upload_buffers,
159                    &mut batch_upload_textures,
160                    &mut batch_upload_copies,
161                    &mut stats,
162                );
163            } else if use_batch_upload {
164                copy_into_staging_buffer(
165                    &mut renderer.device,
166                    &mut uploader,
167                    &mut renderer.staging_texture_pool,
168                    rect,
169                    stride,
170                    data,
171                    texture_id,
172                    texture,
173                    &mut batch_upload_buffers,
174                    &mut batch_upload_textures,
175                    &mut batch_upload_copies,
176                    &mut stats,
177                );
178            } else {
179                let upload_start_time = precise_time_ns();
180
181                stats.bytes_uploaded += uploader.upload(
182                    &mut renderer.device,
183                    texture,
184                    rect,
185                    stride,
186                    format_override,
187                    data.as_ptr(),
188                    data.len()
189                );
190
191                stats.upload_time += precise_time_ns() - upload_start_time;
192            }
193
194            if let TextureUpdateSource::External { id, channel_index } = source {
195                let handler = renderer.external_image_handler
196                    .as_mut()
197                    .expect("Found external image, but no handler set!");
198                handler.unlock(id, channel_index);
199            }
200        }
201    }
202
203    let upload_start_time = precise_time_ns();
204    // Upload batched texture updates to their temporary textures.
205    for batch_buffer in batch_upload_buffers.into_iter().map(|(_, (_, buffers))| buffers).flatten() {
206        let texture = &batch_upload_textures[batch_buffer.texture_index];
207        match batch_buffer.staging_buffer {
208            StagingBufferKind::Pbo(pbo) => {
209                stats.bytes_uploaded += uploader.upload_staged(
210                    &mut renderer.device,
211                    texture,
212                    DeviceIntRect::from_size(texture.get_dimensions()),
213                    None,
214                    pbo,
215                );
216            }
217            StagingBufferKind::CpuBuffer { bytes, .. } => {
218                let bpp = texture.get_format().bytes_per_pixel();
219                stats.bytes_uploaded += uploader.upload(
220                    &mut renderer.device,
221                    texture,
222                    batch_buffer.upload_rect,
223                    Some(BATCH_UPLOAD_TEXTURE_SIZE.width * bpp),
224                    None,
225                    bytes.as_ptr(),
226                    bytes.len()
227                );
228                renderer.staging_texture_pool.return_temporary_buffer(bytes);
229            }
230            StagingBufferKind::Image { bytes, stride } => {
231                stats.bytes_uploaded += uploader.upload(
232                    &mut renderer.device,
233                    texture,
234                    batch_buffer.upload_rect,
235                    stride,
236                    None,
237                    bytes.as_ptr(),
238                    bytes.len()
239                );
240            }
241        }
242    }
243    stats.upload_time += precise_time_ns() - upload_start_time;
244
245
246    // Flush all uploads, batched or otherwise.
247    let flush_start_time = precise_time_ns();
248    uploader.flush(&mut renderer.device);
249    stats.upload_time += precise_time_ns() - flush_start_time;
250
251    if !batch_upload_copies.is_empty() {
252        // Copy updates that were batch uploaded to their correct destination in the texture cache.
253        // Sort them by destination and source to minimize framebuffer binding changes.
254        batch_upload_copies.sort_unstable_by_key(|b| (b.dest_texture_id.0, b.src_texture_index));
255
256        let gpu_copy_start = precise_time_ns();
257
258        if renderer.device.use_draw_calls_for_texture_copy() {
259            // Some drivers have a very high CPU overhead when submitting hundreds of small blit
260            // commands (low end intel drivers on Windows for example can take take 100+ ms submitting a
261            // few hundred blits). In this case we do the copy with batched draw calls.
262            copy_from_staging_to_cache_using_draw_calls(
263                renderer,
264                &mut stats,
265                &batch_upload_textures,
266                batch_upload_copies,
267            );
268        } else {
269            copy_from_staging_to_cache(
270                renderer,
271                &batch_upload_textures,
272                batch_upload_copies,
273            );
274        }
275
276        stats.gpu_copy_commands_time += precise_time_ns() - gpu_copy_start;
277    }
278
279    for texture in batch_upload_textures.drain(..) {
280        renderer.staging_texture_pool.return_texture(texture);
281    }
282
283    // Update the profile counters. We use add instead of set because
284    // this function can be called several times per frame.
285    // We don't update the counters when their value is zero, so that
286    // the profiler can treat them as events and we can get notified
287    // when they happen.
288
289    let upload_total = precise_time_ns() - upload_total_start;
290    renderer.profile.add(
291        profiler::TOTAL_UPLOAD_TIME,
292        profiler::ns_to_ms(upload_total)
293    );
294
295    if num_updates > 0 {
296        renderer.profile.add(profiler::TEXTURE_UPLOADS, num_updates);
297    }
298
299    if stats.bytes_uploaded > 0 {
300        renderer.profile.add(
301            profiler::TEXTURE_UPLOADS_MEM,
302            profiler::bytes_to_mb(stats.bytes_uploaded)
303        );
304    }
305
306    if stats.cpu_copy_time > 0 {
307        renderer.profile.add(
308            profiler::UPLOAD_CPU_COPY_TIME,
309            profiler::ns_to_ms(stats.cpu_copy_time)
310        );
311    }
312    if stats.upload_time > 0 {
313        renderer.profile.add(
314            profiler::UPLOAD_TIME,
315            profiler::ns_to_ms(stats.upload_time)
316        );
317    }
318    if stats.texture_alloc_time > 0 {
319        renderer.profile.add(
320            profiler::STAGING_TEXTURE_ALLOCATION_TIME,
321            profiler::ns_to_ms(stats.texture_alloc_time)
322        );
323    }
324    if stats.cpu_buffer_alloc_time > 0 {
325        renderer.profile.add(
326            profiler::CPU_TEXTURE_ALLOCATION_TIME,
327            profiler::ns_to_ms(stats.cpu_buffer_alloc_time)
328        );
329    }
330    if stats.num_draw_calls > 0{
331        renderer.profile.add(
332            profiler::UPLOAD_NUM_COPY_BATCHES,
333            stats.num_draw_calls
334        );
335    }
336
337    if stats.gpu_copy_commands_time > 0 {
338        renderer.profile.add(
339            profiler::UPLOAD_GPU_COPY_TIME,
340            profiler::ns_to_ms(stats.gpu_copy_commands_time)
341        );
342    }
343
344    let add_markers = profiler::thread_is_being_profiled();
345    if add_markers && stats.bytes_uploaded > 0 {
346    	let details = format!("{} bytes uploaded, {} items", stats.bytes_uploaded, stats.items_uploaded);
347    	profiler::add_text_marker(&"Texture uploads", &details, Duration::from_nanos(upload_total));
348    }
349}
350
351/// Copy an item into a batched upload staging buffer.
352fn copy_into_staging_buffer<'a>(
353    device: &mut Device,
354    uploader: &mut TextureUploader< 'a>,
355    staging_texture_pool: &mut UploadTexturePool,
356    update_rect: DeviceIntRect,
357    update_stride: Option<i32>,
358    data: &[u8],
359    dest_texture_id: CacheTextureId,
360    texture: &Texture,
361    batch_upload_buffers: &mut FastHashMap<ImageFormat, (GuillotineAllocator, Vec<BatchUploadBuffer<'a>>)>,
362    batch_upload_textures: &mut Vec<Texture>,
363    batch_upload_copies: &mut Vec<BatchUploadCopy>,
364    stats: &mut UploadStats
365) {
366    let (allocator, buffers) = batch_upload_buffers.entry(texture.get_format())
367        .or_insert_with(|| (GuillotineAllocator::new(None), Vec::new()));
368
369    // Allocate a region within the staging buffer for this update. If there is
370    // no room in an existing buffer then allocate another texture and buffer.
371    let (slice, origin) = match allocator.allocate(&update_rect.size()) {
372        Some((slice, origin)) => (slice, origin),
373        None => {
374            let new_slice = FreeRectSlice(buffers.len() as u32);
375            allocator.extend(new_slice, BATCH_UPLOAD_TEXTURE_SIZE, update_rect.size());
376
377            let texture_alloc_time_start = precise_time_ns();
378            let staging_texture = staging_texture_pool.get_texture(device, texture.get_format());
379            stats.texture_alloc_time = precise_time_ns() - texture_alloc_time_start;
380
381            let texture_index = batch_upload_textures.len();
382            batch_upload_textures.push(staging_texture);
383
384            let cpu_buffer_alloc_start_time = precise_time_ns();
385            let staging_buffer = match device.upload_method() {
386                UploadMethod::Immediate => StagingBufferKind::CpuBuffer {
387                    bytes: staging_texture_pool.get_temporary_buffer(),
388                },
389                UploadMethod::PixelBuffer(_) => {
390                    let pbo = uploader.stage(
391                        device,
392                        texture.get_format(),
393                        BATCH_UPLOAD_TEXTURE_SIZE,
394                    ).unwrap();
395
396                    StagingBufferKind::Pbo(pbo)
397                }
398            };
399            stats.cpu_buffer_alloc_time += precise_time_ns() - cpu_buffer_alloc_start_time;
400
401            buffers.push(BatchUploadBuffer {
402                staging_buffer,
403                texture_index,
404                upload_rect: DeviceIntRect::zero()
405            });
406
407            (new_slice, DeviceIntPoint::zero())
408        }
409    };
410    let buffer = &mut buffers[slice.0 as usize];
411    let allocated_rect = DeviceIntRect::from_origin_and_size(origin, update_rect.size());
412    buffer.upload_rect = buffer.upload_rect.union(&allocated_rect);
413
414    batch_upload_copies.push(BatchUploadCopy {
415        src_texture_index: buffer.texture_index,
416        src_offset: allocated_rect.min,
417        dest_texture_id,
418        dest_offset: update_rect.min,
419        size: update_rect.size(),
420    });
421
422    unsafe {
423        let memcpy_start_time = precise_time_ns();
424        let bpp = texture.get_format().bytes_per_pixel() as usize;
425        let width_bytes = update_rect.width() as usize * bpp;
426        let src_stride = update_stride.map_or(width_bytes, |stride| {
427            assert!(stride >= 0);
428            stride as usize
429        });
430        let src_size = (update_rect.height() as usize - 1) * src_stride + width_bytes;
431        assert!(src_size <= data.len());
432
433        let src: &[mem::MaybeUninit<u8>] = std::slice::from_raw_parts(data.as_ptr() as *const _, src_size);
434        let (dst_stride, dst) = match &mut buffer.staging_buffer {
435            StagingBufferKind::Pbo(buffer) => (
436                buffer.get_stride(),
437                buffer.get_mapping(),
438            ),
439            StagingBufferKind::CpuBuffer { bytes } => (
440                BATCH_UPLOAD_TEXTURE_SIZE.width as usize * bpp,
441                &mut bytes[..],
442            ),
443            StagingBufferKind::Image { .. } => unreachable!(),
444        };
445
446        // copy the data line-by-line in to the buffer so that we do not overwrite
447        // any other region of the buffer.
448        for y in 0..allocated_rect.height() as usize {
449            let src_start = y * src_stride;
450            let src_end = src_start + width_bytes;
451            let dst_start = (allocated_rect.min.y as usize + y as usize) * dst_stride +
452                allocated_rect.min.x as usize * bpp;
453            let dst_end = dst_start + width_bytes;
454
455            dst[dst_start..dst_end].copy_from_slice(&src[src_start..src_end])
456        }
457
458        stats.cpu_copy_time += precise_time_ns() - memcpy_start_time;
459    }
460}
461
462/// Take this code path instead of copying into a staging CPU buffer when the image
463/// we would copy is large enough that it's unlikely anything else would fit in the
464/// buffer, therefore we might as well copy directly from the source image's pixels.
465fn skip_staging_buffer<'a>(
466    device: &mut Device,
467    staging_texture_pool: &mut UploadTexturePool,
468    update_rect: DeviceIntRect,
469    stride: Option<i32>,
470    data: Arc<Vec<u8>>,
471    dest_texture_id: CacheTextureId,
472    texture: &Texture,
473    batch_upload_buffers: &mut FastHashMap<ImageFormat, (GuillotineAllocator, Vec<BatchUploadBuffer<'a>>)>,
474    batch_upload_textures: &mut Vec<Texture>,
475    batch_upload_copies: &mut Vec<BatchUploadCopy>,
476    stats: &mut UploadStats
477) {
478    let (_, buffers) = batch_upload_buffers.entry(texture.get_format())
479        .or_insert_with(|| (GuillotineAllocator::new(None), Vec::new()));
480
481    let texture_alloc_time_start = precise_time_ns();
482    let staging_texture = staging_texture_pool.get_texture(device, texture.get_format());
483    stats.texture_alloc_time = precise_time_ns() - texture_alloc_time_start;
484
485    let texture_index = batch_upload_textures.len();
486    batch_upload_textures.push(staging_texture);
487
488    buffers.push(BatchUploadBuffer {
489        staging_buffer: StagingBufferKind::Image { bytes: data, stride },
490        texture_index,
491        upload_rect: DeviceIntRect::from_size(update_rect.size())
492    });
493
494    batch_upload_copies.push(BatchUploadCopy {
495        src_texture_index: texture_index,
496        src_offset: point2(0, 0),
497        dest_texture_id,
498        dest_offset: update_rect.min,
499        size: update_rect.size(),
500    });
501}
502
503
504/// Copy from the staging PBOs or textures to texture cache textures using blit commands.
505///
506/// Using blits instead of draw calls is supposedly more efficient but some drivers have
507/// a very high per-command overhead so in some configurations we end up using
508/// copy_from_staging_to_cache_using_draw_calls instead.
509fn copy_from_staging_to_cache(
510    renderer: &mut Renderer,
511    batch_upload_textures: &[Texture],
512    batch_upload_copies: Vec<BatchUploadCopy>,
513) {
514    for copy in batch_upload_copies {
515        let dest_texture = &renderer.texture_resolver.texture_cache_map[&copy.dest_texture_id].texture;
516
517        renderer.device.copy_texture_sub_region(
518            &batch_upload_textures[copy.src_texture_index],
519            copy.src_offset.x as _,
520            copy.src_offset.y as _,
521            dest_texture,
522            copy.dest_offset.x as _,
523            copy.dest_offset.y as _,
524            copy.size.width as _,
525            copy.size.height as _,
526        );
527    }
528}
529
530/// Generate and submit composite shader batches to copy from
531/// the staging textures to the destination cache textures.
532///
533/// If this shows up in GPU time ptofiles we could replace it with
534/// a simpler shader (composite.glsl is already quite simple).
535fn copy_from_staging_to_cache_using_draw_calls(
536    renderer: &mut Renderer,
537    stats: &mut UploadStats,
538    batch_upload_textures: &[Texture],
539    batch_upload_copies: Vec<BatchUploadCopy>,
540) {
541    let mut copy_instances = Vec::new();
542    let mut prev_src = None;
543    let mut prev_dst = None;
544    let mut dst_texture_size = DeviceSize::new(0.0, 0.0);
545
546    for copy in batch_upload_copies {
547
548        let src_changed = prev_src != Some(copy.src_texture_index);
549        let dst_changed = prev_dst != Some(copy.dest_texture_id);
550
551        if (src_changed || dst_changed) && !copy_instances.is_empty() {
552            renderer.draw_instanced_batch(
553                &copy_instances,
554                VertexArrayKind::Copy,
555                // We bind the staging texture manually because it isn't known
556                // to the texture resolver.
557                &BatchTextures::empty(),
558                &mut RendererStats::default(),
559            );
560
561            stats.num_draw_calls += 1;
562            copy_instances.clear();
563        }
564
565        if dst_changed {
566            let dest_texture = &renderer.texture_resolver.texture_cache_map[&copy.dest_texture_id].texture;
567            dst_texture_size = dest_texture.get_dimensions().to_f32();
568
569            let draw_target = DrawTarget::from_texture(dest_texture, false);
570            renderer.device.bind_draw_target(draw_target);
571
572            renderer.shaders
573                .borrow_mut()
574                .ps_copy()
575                .bind(
576                    &mut renderer.device,
577                    &Transform3D::identity(),
578                    None,
579                    &mut renderer.renderer_errors,
580                    &mut renderer.profile,
581                );
582
583            prev_dst = Some(copy.dest_texture_id);
584        }
585
586        if src_changed {
587            renderer.device.bind_texture(
588                TextureSampler::Color0,
589                &batch_upload_textures[copy.src_texture_index],
590                Swizzle::default(),
591            );
592
593            prev_src = Some(copy.src_texture_index)
594        }
595
596        let src_rect = DeviceRect::from_origin_and_size(
597            copy.src_offset.to_f32(),
598            copy.size.to_f32(),
599        );
600
601        let dst_rect = DeviceRect::from_origin_and_size(
602            copy.dest_offset.to_f32(),
603            copy.size.to_f32(),
604        );
605
606        copy_instances.push(CopyInstance {
607            src_rect,
608            dst_rect,
609            dst_texture_size,
610        });
611    }
612
613    if !copy_instances.is_empty() {
614        renderer.draw_instanced_batch(
615            &copy_instances,
616            VertexArrayKind::Copy,
617            &BatchTextures::empty(),
618            &mut RendererStats::default(),
619        );
620
621        stats.num_draw_calls += 1;
622    }
623}
624
625/// A very basic pool to avoid reallocating staging textures as well as staging
626/// CPU side buffers.
627pub struct UploadTexturePool {
628    /// The textures in the pool associated with a last used frame index.
629    ///
630    /// The outer array corresponds to each of teh three supported texture formats.
631    textures: [VecDeque<(Texture, u64)>; BATCH_UPLOAD_FORMAT_COUNT],
632    // Frame at which to deallocate some textures if there are too many in the pool,
633    // for each format.
634    delay_texture_deallocation: [u64; BATCH_UPLOAD_FORMAT_COUNT],
635    current_frame: u64,
636
637    /// Temporary buffers that are used when using staging uploads + glTexImage2D.
638    ///
639    /// Temporary buffers aren't used asynchronously so they can be reused every frame.
640    /// To keep things simple we always allocate enough memory for formats with four bytes
641    /// per pixel (more than we need for alpha-only textures but it works just as well).
642    temporary_buffers: Vec<Vec<mem::MaybeUninit<u8>>>,
643    min_temporary_buffers: usize,
644    delay_buffer_deallocation: u64,
645}
646
647impl UploadTexturePool {
648    pub fn new() -> Self {
649        UploadTexturePool {
650            textures: [VecDeque::new(), VecDeque::new(), VecDeque::new(), VecDeque::new()],
651            delay_texture_deallocation: [0; BATCH_UPLOAD_FORMAT_COUNT],
652            current_frame: 0,
653            temporary_buffers: Vec::new(),
654            min_temporary_buffers: 0,
655            delay_buffer_deallocation: 0,
656        }
657    }
658
659    fn format_index(&self, format: ImageFormat) -> usize {
660        match format {
661            ImageFormat::RGBA8 => 0,
662            ImageFormat::BGRA8 => 1,
663            ImageFormat::R8 => 2,
664            ImageFormat::R16 => 3,
665            _ => { panic!("unexpected format {:?}", format); }
666        }
667    }
668
669    pub fn begin_frame(&mut self) {
670        self.current_frame += 1;
671        self.min_temporary_buffers = self.temporary_buffers.len();
672    }
673
674    /// Create or reuse a staging texture.
675    ///
676    /// See also return_texture.
677    pub fn get_texture(&mut self, device: &mut Device, format: ImageFormat) -> Texture {
678
679        // First try to reuse a texture from the pool.
680        // "available" here means hasn't been used for 2 frames to avoid stalls.
681        // No need to scan the vector. Newer textures are always pushed at the back
682        // of the vector so we know the first element is the least recently used.
683        let format_idx = self.format_index(format);
684        let can_reuse = self.textures[format_idx].get(0)
685            .map(|tex| self.current_frame - tex.1 > 2)
686            .unwrap_or(false);
687
688        if can_reuse {
689            return self.textures[format_idx].pop_front().unwrap().0;
690        }
691
692        // If we couldn't find an available texture, create a new one.
693
694        device.create_texture(
695            ImageBufferKind::Texture2D,
696            format,
697            BATCH_UPLOAD_TEXTURE_SIZE.width,
698            BATCH_UPLOAD_TEXTURE_SIZE.height,
699            TextureFilter::Nearest,
700            // Currently we need render target support as we always use glBlitFramebuffer
701            // to copy the texture data. Instead, we should use glCopyImageSubData on some
702            // platforms, and avoid creating the FBOs in that case.
703            Some(RenderTargetInfo { has_depth: false }),
704        )
705    }
706
707    /// Hand the staging texture back to the pool after being done with uploads.
708    ///
709    /// The texture must have been obtained from this pool via get_texture.
710    pub fn return_texture(&mut self, texture: Texture) {
711        let format_idx = self.format_index(texture.get_format());
712        self.textures[format_idx].push_back((texture, self.current_frame));
713    }
714
715    /// Create or reuse a temporary CPU buffer.
716    ///
717    /// These buffers are used in the batched upload path when PBOs are not supported.
718    /// Content is first written to the temporary buffer and uploaded via a single
719    /// glTexSubImage2D call.
720    pub fn get_temporary_buffer(&mut self) -> Vec<mem::MaybeUninit<u8>> {
721        let buffer = self.temporary_buffers.pop().unwrap_or_else(|| {
722            vec![mem::MaybeUninit::new(0); BATCH_UPLOAD_TEXTURE_SIZE.area() as usize * 4]
723        });
724        self.min_temporary_buffers = self.min_temporary_buffers.min(self.temporary_buffers.len());
725        buffer
726    }
727
728    /// Return memory that was obtained from this pool via get_temporary_buffer.
729    pub fn return_temporary_buffer(&mut self, buffer: Vec<mem::MaybeUninit<u8>>) {
730        assert_eq!(buffer.len(), BATCH_UPLOAD_TEXTURE_SIZE.area() as usize * 4);
731        self.temporary_buffers.push(buffer);
732    }
733
734    /// Deallocate this pool's CPU and GPU memory.
735    pub fn delete_textures(&mut self, device: &mut Device) {
736        for format in &mut self.textures {
737            while let Some(texture) = format.pop_back() {
738                device.delete_texture(texture.0)
739            }
740        }
741        self.temporary_buffers.clear();
742    }
743
744    /// Deallocate some textures if there are too many for a long time.
745    pub fn end_frame(&mut self, device: &mut Device) {
746        for format_idx in 0..self.textures.len() {
747            // Count the number of reusable staging textures.
748            // if it stays high for a large number of frames, truncate it back to 8-ish
749            // over multiple frames.
750
751            let mut num_reusable_textures = 0;
752            for texture in &self.textures[format_idx] {
753                if self.current_frame - texture.1 > 2 {
754                    num_reusable_textures += 1;
755                }
756            }
757
758            if num_reusable_textures < 8 {
759                // Don't deallocate textures for another 120 frames.
760                self.delay_texture_deallocation[format_idx] = self.current_frame + 120;
761            }
762
763            // Deallocate up to 4 staging textures every frame.
764            let to_remove = if self.current_frame > self.delay_texture_deallocation[format_idx] {
765                num_reusable_textures.min(4)
766            } else {
767                0
768            };
769
770            for _ in 0..to_remove {
771                let texture = self.textures[format_idx].pop_front().unwrap().0;
772                device.delete_texture(texture);
773            }
774        }
775
776        // Similar logic for temporary CPU buffers. Our calls to get and return
777        // temporary buffers should have been balanced for this frame, but the call
778        // get_temporary_buffer will allocate a buffer if the vec is empty. Since we
779        // carry these buffers from frame to frame, we keep track of the smallest
780        // length of the temporary_buffers vec that we encountered this frame. Those
781        // buffers were not touched and we deallocate some if there are a lot of them.
782        let unused_buffers = self.min_temporary_buffers;
783        if unused_buffers < 8 {
784            self.delay_buffer_deallocation = self.current_frame + 120;
785        }
786        let to_remove = if self.current_frame > self.delay_buffer_deallocation  {
787            unused_buffers.min(4)
788        } else {
789            0
790        };
791        for _ in 0..to_remove {
792            // Unlike textures it doesn't matter whether we pop from the front or back
793            // of the vector.
794            self.temporary_buffers.pop();
795        }
796    }
797
798    pub fn report_memory_to(&self, report: &mut MemoryReport, size_op_funs: &MallocSizeOfOps) {
799        for buf in &self.temporary_buffers {
800            report.upload_staging_memory += unsafe { (size_op_funs.size_of_op)(buf.as_ptr() as *const _) };
801        }
802
803        for format in &self.textures {
804            for texture in format {
805                report.upload_staging_textures += texture.0.size_in_bytes();
806            }
807        }
808    }
809}
810
811struct UploadStats {
812    num_draw_calls: u32,
813    upload_time: u64,
814    cpu_buffer_alloc_time: u64,
815    texture_alloc_time: u64,
816    cpu_copy_time: u64,
817    gpu_copy_commands_time: u64,
818    bytes_uploaded: usize,
819    items_uploaded: usize,
820}
821
822#[derive(Debug)]
823enum StagingBufferKind<'a> {
824    Pbo(UploadStagingBuffer<'a>),
825    CpuBuffer { bytes: Vec<mem::MaybeUninit<u8>> },
826    Image { bytes: Arc<Vec<u8>>, stride: Option<i32> },
827}
828#[derive(Debug)]
829struct BatchUploadBuffer<'a> {
830    staging_buffer: StagingBufferKind<'a>,
831    texture_index: usize,
832    // A rectangle containing all items going into this staging texture, so
833    // that we can avoid uploading the entire area if we are using glTexSubImage2d.
834    upload_rect: DeviceIntRect,
835}
836
837// On some devices performing many small texture uploads is slow, so instead we batch
838// updates in to a small number of uploads to temporary textures, then copy from those
839// textures to the correct place in the texture cache.
840// A list of temporary textures that batches of updates are uploaded to.
841#[derive(Debug)]
842struct BatchUploadCopy {
843    // Index within batch_upload_textures
844    src_texture_index: usize,
845    src_offset: DeviceIntPoint,
846    dest_texture_id: CacheTextureId,
847    dest_offset: DeviceIntPoint,
848    size: DeviceIntSize,
849}