Skip to main content

webrender/renderer/
upload.rs

1/* This Source Code Form is subject to the terms of the Mozilla Public
2 * License, v. 2.0. If a copy of the MPL was not distributed with this
3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
4
5//! This module contains the convoluted logic that goes into uploading content into
6//! the texture cache's textures.
7//!
8//! We need to support various combinations of code paths depending on the quirks of
9//! each hardware/driver configuration:
10//! - direct upload,
11//! - staged upload via a pixel buffer object,
12//! - staged upload via a direct upload to a staging texture where PBO's aren't supported,
13//! - copy from the staging to destination textures, either via blits or batched draw calls.
14//!
15//! Conceptually a lot of this logic should probably be in the device module, but some code
16//! here relies on submitting draw calls via the renderer.
17
18
19use std::mem;
20use std::collections::VecDeque;
21use std::sync::Arc;
22use std::time::Duration;
23use euclid::{Transform3D, point2};
24use malloc_size_of::MallocSizeOfOps;
25use api::units::*;
26use api::{ExternalImageSource, ImageBufferKind, ImageFormat};
27use crate::renderer::{
28    Renderer, VertexArrayKind, RendererStats, TextureSampler, TEXTURE_CACHE_DBG_CLEAR_COLOR
29};
30use crate::internal_types::{
31    FastHashMap, TextureUpdateSource, Swizzle, TextureCacheUpdate,
32    CacheTextureId, RenderTargetInfo,
33};
34use crate::device::{
35    Device, UploadMethod, Texture, DrawTarget, UploadStagingBuffer, TextureFlags, TextureUploader,
36    TextureFilter,
37};
38use crate::gpu_types::CopyInstance;
39use crate::batch::BatchTextures;
40use crate::texture_pack::{GuillotineAllocator, FreeRectSlice};
41use crate::profiler;
42use crate::render_api::MemoryReport;
43
44pub const BATCH_UPLOAD_TEXTURE_SIZE: DeviceIntSize = DeviceIntSize::new(512, 512);
45const BATCH_UPLOAD_FORMAT_COUNT: usize = 4;
46
47/// Upload a number of items to texture cache textures.
48///
49/// This is the main entry point of the texture cache upload code.
50/// See also the module documentation for more information.
51pub fn upload_to_texture_cache(
52    renderer: &mut Renderer,
53    update_list: FastHashMap<CacheTextureId, Vec<TextureCacheUpdate>>,
54) {
55    if update_list.is_empty() {
56        return;
57    }
58
59    let mut stats = UploadStats {
60        num_draw_calls: 0,
61        upload_time: 0,
62        cpu_buffer_alloc_time: 0,
63        texture_alloc_time: 0,
64        cpu_copy_time: 0,
65        gpu_copy_commands_time: 0,
66        bytes_uploaded: 0,
67        items_uploaded: 0,
68    };
69
70    let upload_total_start = zeitstempel::now();
71
72    let mut batch_upload_textures = Vec::new();
73
74    // A list of copies that must be performed from the temporary textures to the texture cache.
75    let mut batch_upload_copies = Vec::new();
76
77    // For each texture format, this stores a list of staging buffers
78    // and a texture allocator for packing the buffers.
79    let mut batch_upload_buffers = FastHashMap::default();
80
81    // For best performance we use a single TextureUploader for all uploads.
82    // This allows us to fill PBOs more efficiently and therefore allocate fewer PBOs.
83    let mut uploader = renderer.device.upload_texture(
84        &mut renderer.texture_upload_pbo_pool,
85    );
86
87    let num_updates = update_list.len();
88
89    for (texture_id, updates) in update_list {
90        let texture = &renderer.texture_resolver.texture_cache_map[&texture_id].texture;
91        for update in updates {
92            let TextureCacheUpdate { rect, stride, offset, format_override, source } = update;
93            let mut arc_data = None;
94            let dummy_data;
95            let data = match source {
96                TextureUpdateSource::Bytes { ref data } => {
97                    arc_data = Some(data.clone());
98                    &data[offset as usize ..]
99                }
100                TextureUpdateSource::External { id, channel_index } => {
101                    let handler = renderer.external_image_handler
102                        .as_mut()
103                        .expect("Found external image, but no handler set!");
104                    // The filter is only relevant for NativeTexture external images.
105                    match handler.lock(id, channel_index, false).source {
106                        ExternalImageSource::RawData(data) => {
107                            &data[offset as usize ..]
108                        }
109                        ExternalImageSource::Invalid => {
110                            // Create a local buffer to fill the pbo.
111                            let bpp = texture.get_format().bytes_per_pixel();
112                            let width = stride.unwrap_or(rect.width() * bpp);
113                            let total_size = width * rect.height();
114                            // WR haven't support RGBAF32 format in texture_cache, so
115                            // we use u8 type here.
116                            dummy_data = vec![0xFFu8; total_size as usize];
117                            &dummy_data
118                        }
119                        ExternalImageSource::NativeTexture(eid) => {
120                            panic!("Unexpected external texture {:?} for the texture cache update of {:?}", eid, id);
121                        }
122                    }
123                }
124                TextureUpdateSource::DebugClear => {
125                    let draw_target = DrawTarget::from_texture(
126                        texture,
127                        false,
128                    );
129                    renderer.device.bind_draw_target(draw_target);
130                    renderer.device.clear_target(
131                        Some(TEXTURE_CACHE_DBG_CLEAR_COLOR),
132                        None,
133                        Some(draw_target.to_framebuffer_rect(update.rect.to_i32()))
134                    );
135
136                    continue;
137                }
138            };
139
140            stats.items_uploaded += 1;
141
142            let use_batch_upload = renderer.device.use_batched_texture_uploads() &&
143                texture.flags().contains(TextureFlags::IS_SHARED_TEXTURE_CACHE) &&
144                rect.width() <= BATCH_UPLOAD_TEXTURE_SIZE.width &&
145                rect.height() <= BATCH_UPLOAD_TEXTURE_SIZE.height &&
146                rect.area() < renderer.device.batched_upload_threshold();
147
148            if use_batch_upload
149                && arc_data.is_some()
150                && matches!(renderer.device.upload_method(), &UploadMethod::Immediate)
151                && rect.area() > BATCH_UPLOAD_TEXTURE_SIZE.area() / 2 {
152                skip_staging_buffer(
153                    &mut renderer.device,
154                    &mut renderer.staging_texture_pool,
155                    rect,
156                    stride,
157                    arc_data.unwrap(),
158                    texture_id,
159                    texture,
160                    &mut batch_upload_buffers,
161                    &mut batch_upload_textures,
162                    &mut batch_upload_copies,
163                    &mut stats,
164                );
165            } else if use_batch_upload {
166                copy_into_staging_buffer(
167                    &mut renderer.device,
168                    &mut uploader,
169                    &mut renderer.staging_texture_pool,
170                    rect,
171                    stride,
172                    data,
173                    texture_id,
174                    texture,
175                    &mut batch_upload_buffers,
176                    &mut batch_upload_textures,
177                    &mut batch_upload_copies,
178                    &mut stats,
179                );
180            } else {
181                let upload_start_time = zeitstempel::now();
182
183                stats.bytes_uploaded += uploader.upload(
184                    &mut renderer.device,
185                    texture,
186                    rect,
187                    stride,
188                    format_override,
189                    data.as_ptr(),
190                    data.len()
191                );
192
193                stats.upload_time += zeitstempel::now() - upload_start_time;
194            }
195
196            if let TextureUpdateSource::External { id, channel_index } = source {
197                let handler = renderer.external_image_handler
198                    .as_mut()
199                    .expect("Found external image, but no handler set!");
200                handler.unlock(id, channel_index);
201            }
202        }
203    }
204
205    let upload_start_time = zeitstempel::now();
206    // Upload batched texture updates to their temporary textures.
207    for batch_buffer in batch_upload_buffers.into_iter().map(|(_, (_, buffers))| buffers).flatten() {
208        let texture = &batch_upload_textures[batch_buffer.texture_index];
209        match batch_buffer.staging_buffer {
210            StagingBufferKind::Pbo(pbo) => {
211                stats.bytes_uploaded += uploader.upload_staged(
212                    &mut renderer.device,
213                    texture,
214                    DeviceIntRect::from_size(texture.get_dimensions()),
215                    None,
216                    pbo,
217                );
218            }
219            StagingBufferKind::CpuBuffer { bytes, .. } => {
220                let bpp = texture.get_format().bytes_per_pixel();
221                stats.bytes_uploaded += uploader.upload(
222                    &mut renderer.device,
223                    texture,
224                    batch_buffer.upload_rect,
225                    Some(BATCH_UPLOAD_TEXTURE_SIZE.width * bpp),
226                    None,
227                    bytes.as_ptr(),
228                    bytes.len()
229                );
230                renderer.staging_texture_pool.return_temporary_buffer(bytes);
231            }
232            StagingBufferKind::Image { bytes, stride } => {
233                stats.bytes_uploaded += uploader.upload(
234                    &mut renderer.device,
235                    texture,
236                    batch_buffer.upload_rect,
237                    stride,
238                    None,
239                    bytes.as_ptr(),
240                    bytes.len()
241                );
242            }
243        }
244    }
245    stats.upload_time += zeitstempel::now() - upload_start_time;
246
247
248    // Flush all uploads, batched or otherwise.
249    let flush_start_time = zeitstempel::now();
250    uploader.flush(&mut renderer.device);
251    stats.upload_time += zeitstempel::now() - flush_start_time;
252
253    if !batch_upload_copies.is_empty() {
254        // Copy updates that were batch uploaded to their correct destination in the texture cache.
255        // Sort them by destination and source to minimize framebuffer binding changes.
256        batch_upload_copies.sort_unstable_by_key(|b| (b.dest_texture_id.0, b.src_texture_index));
257
258        let gpu_copy_start = zeitstempel::now();
259
260        if renderer.device.use_draw_calls_for_texture_copy() {
261            // Some drivers have a very high CPU overhead when submitting hundreds of small blit
262            // commands (low end intel drivers on Windows for example can take take 100+ ms submitting a
263            // few hundred blits). In this case we do the copy with batched draw calls.
264            copy_from_staging_to_cache_using_draw_calls(
265                renderer,
266                &mut stats,
267                &batch_upload_textures,
268                batch_upload_copies,
269            );
270        } else {
271            copy_from_staging_to_cache(
272                renderer,
273                &batch_upload_textures,
274                batch_upload_copies,
275            );
276        }
277
278        stats.gpu_copy_commands_time += zeitstempel::now() - gpu_copy_start;
279    }
280
281    for texture in batch_upload_textures.drain(..) {
282        renderer.staging_texture_pool.return_texture(texture);
283    }
284
285    // Update the profile counters. We use add instead of set because
286    // this function can be called several times per frame.
287    // We don't update the counters when their value is zero, so that
288    // the profiler can treat them as events and we can get notified
289    // when they happen.
290
291    let upload_total = zeitstempel::now() - upload_total_start;
292    renderer.profile.add(
293        profiler::TOTAL_UPLOAD_TIME,
294        profiler::ns_to_ms(upload_total)
295    );
296
297    if num_updates > 0 {
298        renderer.profile.add(profiler::TEXTURE_UPLOADS, num_updates);
299    }
300
301    if stats.bytes_uploaded > 0 {
302        renderer.profile.add(
303            profiler::TEXTURE_UPLOADS_MEM,
304            profiler::bytes_to_mb(stats.bytes_uploaded)
305        );
306    }
307
308    if stats.cpu_copy_time > 0 {
309        renderer.profile.add(
310            profiler::UPLOAD_CPU_COPY_TIME,
311            profiler::ns_to_ms(stats.cpu_copy_time)
312        );
313    }
314    if stats.upload_time > 0 {
315        renderer.profile.add(
316            profiler::UPLOAD_TIME,
317            profiler::ns_to_ms(stats.upload_time)
318        );
319    }
320    if stats.texture_alloc_time > 0 {
321        renderer.profile.add(
322            profiler::STAGING_TEXTURE_ALLOCATION_TIME,
323            profiler::ns_to_ms(stats.texture_alloc_time)
324        );
325    }
326    if stats.cpu_buffer_alloc_time > 0 {
327        renderer.profile.add(
328            profiler::CPU_TEXTURE_ALLOCATION_TIME,
329            profiler::ns_to_ms(stats.cpu_buffer_alloc_time)
330        );
331    }
332    if stats.num_draw_calls > 0{
333        renderer.profile.add(
334            profiler::UPLOAD_NUM_COPY_BATCHES,
335            stats.num_draw_calls
336        );
337    }
338
339    if stats.gpu_copy_commands_time > 0 {
340        renderer.profile.add(
341            profiler::UPLOAD_GPU_COPY_TIME,
342            profiler::ns_to_ms(stats.gpu_copy_commands_time)
343        );
344    }
345
346    let add_markers = profiler::thread_is_being_profiled();
347    if add_markers && stats.bytes_uploaded > 0 {
348    	let details = format!("{} bytes uploaded, {} items", stats.bytes_uploaded, stats.items_uploaded);
349    	profiler::add_text_marker(&"Texture uploads", &details, Duration::from_nanos(upload_total));
350    }
351}
352
353/// Copy an item into a batched upload staging buffer.
354fn copy_into_staging_buffer<'a>(
355    device: &mut Device,
356    uploader: &mut TextureUploader< 'a>,
357    staging_texture_pool: &mut UploadTexturePool,
358    update_rect: DeviceIntRect,
359    update_stride: Option<i32>,
360    data: &[u8],
361    dest_texture_id: CacheTextureId,
362    texture: &Texture,
363    batch_upload_buffers: &mut FastHashMap<ImageFormat, (GuillotineAllocator, Vec<BatchUploadBuffer<'a>>)>,
364    batch_upload_textures: &mut Vec<Texture>,
365    batch_upload_copies: &mut Vec<BatchUploadCopy>,
366    stats: &mut UploadStats
367) {
368    let (allocator, buffers) = batch_upload_buffers.entry(texture.get_format())
369        .or_insert_with(|| (GuillotineAllocator::new(None), Vec::new()));
370
371    // Allocate a region within the staging buffer for this update. If there is
372    // no room in an existing buffer then allocate another texture and buffer.
373    let (slice, origin) = match allocator.allocate(&update_rect.size()) {
374        Some((slice, origin)) => (slice, origin),
375        None => {
376            let new_slice = FreeRectSlice(buffers.len() as u32);
377            allocator.extend(new_slice, BATCH_UPLOAD_TEXTURE_SIZE, update_rect.size());
378
379            let texture_alloc_time_start = zeitstempel::now();
380            let staging_texture = staging_texture_pool.get_texture(device, texture.get_format());
381            stats.texture_alloc_time = zeitstempel::now() - texture_alloc_time_start;
382
383            let texture_index = batch_upload_textures.len();
384            batch_upload_textures.push(staging_texture);
385
386            let cpu_buffer_alloc_start_time = zeitstempel::now();
387            let staging_buffer = match device.upload_method() {
388                UploadMethod::Immediate => StagingBufferKind::CpuBuffer {
389                    bytes: staging_texture_pool.get_temporary_buffer(),
390                },
391                UploadMethod::PixelBuffer(_) => {
392                    match uploader.stage(
393                        device,
394                        texture.get_format(),
395                        BATCH_UPLOAD_TEXTURE_SIZE,
396                    ) {
397                        Ok(pbo) => StagingBufferKind::Pbo(pbo),
398                        Err(_) => StagingBufferKind::CpuBuffer {
399                            bytes: staging_texture_pool.get_temporary_buffer(),
400                        },
401                    }
402                }
403            };
404            stats.cpu_buffer_alloc_time += zeitstempel::now() - cpu_buffer_alloc_start_time;
405
406            buffers.push(BatchUploadBuffer {
407                staging_buffer,
408                texture_index,
409                upload_rect: DeviceIntRect::zero()
410            });
411
412            (new_slice, DeviceIntPoint::zero())
413        }
414    };
415    let buffer = &mut buffers[slice.0 as usize];
416    let allocated_rect = DeviceIntRect::from_origin_and_size(origin, update_rect.size());
417    buffer.upload_rect = buffer.upload_rect.union(&allocated_rect);
418
419    batch_upload_copies.push(BatchUploadCopy {
420        src_texture_index: buffer.texture_index,
421        src_offset: allocated_rect.min,
422        dest_texture_id,
423        dest_offset: update_rect.min,
424        size: update_rect.size(),
425    });
426
427    unsafe {
428        let memcpy_start_time = zeitstempel::now();
429        let bpp = texture.get_format().bytes_per_pixel() as usize;
430        let width_bytes = update_rect.width() as usize * bpp;
431        let src_stride = update_stride.map_or(width_bytes, |stride| {
432            assert!(stride >= 0);
433            stride as usize
434        });
435        let src_size = (update_rect.height() as usize - 1) * src_stride + width_bytes;
436        assert!(src_size <= data.len());
437
438        let src: &[mem::MaybeUninit<u8>] = std::slice::from_raw_parts(data.as_ptr() as *const _, src_size);
439        let (dst_stride, dst) = match &mut buffer.staging_buffer {
440            StagingBufferKind::Pbo(buffer) => (
441                buffer.get_stride(),
442                buffer.get_mapping(),
443            ),
444            StagingBufferKind::CpuBuffer { bytes } => (
445                BATCH_UPLOAD_TEXTURE_SIZE.width as usize * bpp,
446                &mut bytes[..],
447            ),
448            StagingBufferKind::Image { .. } => unreachable!(),
449        };
450
451        // copy the data line-by-line in to the buffer so that we do not overwrite
452        // any other region of the buffer.
453        for y in 0..allocated_rect.height() as usize {
454            let src_start = y * src_stride;
455            let src_end = src_start + width_bytes;
456            let dst_start = (allocated_rect.min.y as usize + y as usize) * dst_stride +
457                allocated_rect.min.x as usize * bpp;
458            let dst_end = dst_start + width_bytes;
459
460            dst[dst_start..dst_end].copy_from_slice(&src[src_start..src_end])
461        }
462
463        stats.cpu_copy_time += zeitstempel::now() - memcpy_start_time;
464    }
465}
466
467/// Take this code path instead of copying into a staging CPU buffer when the image
468/// we would copy is large enough that it's unlikely anything else would fit in the
469/// buffer, therefore we might as well copy directly from the source image's pixels.
470fn skip_staging_buffer<'a>(
471    device: &mut Device,
472    staging_texture_pool: &mut UploadTexturePool,
473    update_rect: DeviceIntRect,
474    stride: Option<i32>,
475    data: Arc<Vec<u8>>,
476    dest_texture_id: CacheTextureId,
477    texture: &Texture,
478    batch_upload_buffers: &mut FastHashMap<ImageFormat, (GuillotineAllocator, Vec<BatchUploadBuffer<'a>>)>,
479    batch_upload_textures: &mut Vec<Texture>,
480    batch_upload_copies: &mut Vec<BatchUploadCopy>,
481    stats: &mut UploadStats
482) {
483    let (_, buffers) = batch_upload_buffers.entry(texture.get_format())
484        .or_insert_with(|| (GuillotineAllocator::new(None), Vec::new()));
485
486    let texture_alloc_time_start = zeitstempel::now();
487    let staging_texture = staging_texture_pool.get_texture(device, texture.get_format());
488    stats.texture_alloc_time = zeitstempel::now() - texture_alloc_time_start;
489
490    let texture_index = batch_upload_textures.len();
491    batch_upload_textures.push(staging_texture);
492
493    buffers.push(BatchUploadBuffer {
494        staging_buffer: StagingBufferKind::Image { bytes: data, stride },
495        texture_index,
496        upload_rect: DeviceIntRect::from_size(update_rect.size())
497    });
498
499    batch_upload_copies.push(BatchUploadCopy {
500        src_texture_index: texture_index,
501        src_offset: point2(0, 0),
502        dest_texture_id,
503        dest_offset: update_rect.min,
504        size: update_rect.size(),
505    });
506}
507
508
509/// Copy from the staging PBOs or textures to texture cache textures using blit commands.
510///
511/// Using blits instead of draw calls is supposedly more efficient but some drivers have
512/// a very high per-command overhead so in some configurations we end up using
513/// copy_from_staging_to_cache_using_draw_calls instead.
514fn copy_from_staging_to_cache(
515    renderer: &mut Renderer,
516    batch_upload_textures: &[Texture],
517    batch_upload_copies: Vec<BatchUploadCopy>,
518) {
519    for copy in batch_upload_copies {
520        let dest_texture = &renderer.texture_resolver.texture_cache_map[&copy.dest_texture_id].texture;
521
522        renderer.device.copy_texture_sub_region(
523            &batch_upload_textures[copy.src_texture_index],
524            copy.src_offset.x as _,
525            copy.src_offset.y as _,
526            dest_texture,
527            copy.dest_offset.x as _,
528            copy.dest_offset.y as _,
529            copy.size.width as _,
530            copy.size.height as _,
531        );
532    }
533}
534
535/// Generate and submit composite shader batches to copy from
536/// the staging textures to the destination cache textures.
537///
538/// If this shows up in GPU time ptofiles we could replace it with
539/// a simpler shader (composite.glsl is already quite simple).
540fn copy_from_staging_to_cache_using_draw_calls(
541    renderer: &mut Renderer,
542    stats: &mut UploadStats,
543    batch_upload_textures: &[Texture],
544    batch_upload_copies: Vec<BatchUploadCopy>,
545) {
546    let mut copy_instances = Vec::new();
547    let mut prev_src = None;
548    let mut prev_dst = None;
549    let mut dst_texture_size = DeviceSize::new(0.0, 0.0);
550
551    for copy in batch_upload_copies {
552
553        let src_changed = prev_src != Some(copy.src_texture_index);
554        let dst_changed = prev_dst != Some(copy.dest_texture_id);
555
556        if (src_changed || dst_changed) && !copy_instances.is_empty() {
557            renderer.draw_instanced_batch(
558                &copy_instances,
559                VertexArrayKind::Copy,
560                // We bind the staging texture manually because it isn't known
561                // to the texture resolver.
562                &BatchTextures::empty(),
563                &mut RendererStats::default(),
564            );
565
566            stats.num_draw_calls += 1;
567            copy_instances.clear();
568        }
569
570        if dst_changed {
571            let dest_texture = &renderer.texture_resolver.texture_cache_map[&copy.dest_texture_id].texture;
572            dst_texture_size = dest_texture.get_dimensions().to_f32();
573
574            let draw_target = DrawTarget::from_texture(dest_texture, false);
575            renderer.device.bind_draw_target(draw_target);
576
577            renderer.shaders
578                .borrow_mut()
579                .ps_copy()
580                .bind(
581                    &mut renderer.device,
582                    &Transform3D::identity(),
583                    None,
584                    &mut renderer.renderer_errors,
585                    &mut renderer.profile,
586                    &mut renderer.command_log,
587                );
588
589            prev_dst = Some(copy.dest_texture_id);
590        }
591
592        if src_changed {
593            renderer.device.bind_texture(
594                TextureSampler::Color0,
595                &batch_upload_textures[copy.src_texture_index],
596                Swizzle::default(),
597            );
598
599            prev_src = Some(copy.src_texture_index)
600        }
601
602        let src_rect = DeviceRect::from_origin_and_size(
603            copy.src_offset.to_f32(),
604            copy.size.to_f32(),
605        );
606
607        let dst_rect = DeviceRect::from_origin_and_size(
608            copy.dest_offset.to_f32(),
609            copy.size.to_f32(),
610        );
611
612        copy_instances.push(CopyInstance {
613            src_rect,
614            dst_rect,
615            dst_texture_size,
616        });
617    }
618
619    if !copy_instances.is_empty() {
620        renderer.draw_instanced_batch(
621            &copy_instances,
622            VertexArrayKind::Copy,
623            &BatchTextures::empty(),
624            &mut RendererStats::default(),
625        );
626
627        stats.num_draw_calls += 1;
628    }
629}
630
631/// A very basic pool to avoid reallocating staging textures as well as staging
632/// CPU side buffers.
633pub struct UploadTexturePool {
634    /// The textures in the pool associated with a last used frame index.
635    ///
636    /// The outer array corresponds to each of teh three supported texture formats.
637    textures: [VecDeque<(Texture, u64)>; BATCH_UPLOAD_FORMAT_COUNT],
638    // Frame at which to deallocate some textures if there are too many in the pool,
639    // for each format.
640    delay_texture_deallocation: [u64; BATCH_UPLOAD_FORMAT_COUNT],
641    current_frame: u64,
642
643    /// Temporary buffers that are used when using staging uploads + glTexImage2D.
644    ///
645    /// Temporary buffers aren't used asynchronously so they can be reused every frame.
646    /// To keep things simple we always allocate enough memory for formats with four bytes
647    /// per pixel (more than we need for alpha-only textures but it works just as well).
648    temporary_buffers: Vec<Vec<mem::MaybeUninit<u8>>>,
649    min_temporary_buffers: usize,
650    delay_buffer_deallocation: u64,
651}
652
653impl UploadTexturePool {
654    pub fn new() -> Self {
655        UploadTexturePool {
656            textures: [VecDeque::new(), VecDeque::new(), VecDeque::new(), VecDeque::new()],
657            delay_texture_deallocation: [0; BATCH_UPLOAD_FORMAT_COUNT],
658            current_frame: 0,
659            temporary_buffers: Vec::new(),
660            min_temporary_buffers: 0,
661            delay_buffer_deallocation: 0,
662        }
663    }
664
665    fn format_index(&self, format: ImageFormat) -> usize {
666        match format {
667            ImageFormat::RGBA8 => 0,
668            ImageFormat::BGRA8 => 1,
669            ImageFormat::R8 => 2,
670            ImageFormat::R16 => 3,
671            _ => { panic!("unexpected format {:?}", format); }
672        }
673    }
674
675    pub fn begin_frame(&mut self) {
676        self.current_frame += 1;
677        self.min_temporary_buffers = self.temporary_buffers.len();
678    }
679
680    /// Create or reuse a staging texture.
681    ///
682    /// See also return_texture.
683    pub fn get_texture(&mut self, device: &mut Device, format: ImageFormat) -> Texture {
684
685        // First try to reuse a texture from the pool.
686        // "available" here means hasn't been used for 2 frames to avoid stalls.
687        // No need to scan the vector. Newer textures are always pushed at the back
688        // of the vector so we know the first element is the least recently used.
689        let format_idx = self.format_index(format);
690        let can_reuse = self.textures[format_idx].get(0)
691            .map(|tex| self.current_frame - tex.1 > 2)
692            .unwrap_or(false);
693
694        if can_reuse {
695            return self.textures[format_idx].pop_front().unwrap().0;
696        }
697
698        // If we couldn't find an available texture, create a new one.
699
700        device.create_texture(
701            ImageBufferKind::Texture2D,
702            format,
703            BATCH_UPLOAD_TEXTURE_SIZE.width,
704            BATCH_UPLOAD_TEXTURE_SIZE.height,
705            TextureFilter::Nearest,
706            // Currently we need render target support as we always use glBlitFramebuffer
707            // to copy the texture data. Instead, we should use glCopyImageSubData on some
708            // platforms, and avoid creating the FBOs in that case.
709            Some(RenderTargetInfo { has_depth: false }),
710        )
711    }
712
713    /// Hand the staging texture back to the pool after being done with uploads.
714    ///
715    /// The texture must have been obtained from this pool via get_texture.
716    pub fn return_texture(&mut self, texture: Texture) {
717        let format_idx = self.format_index(texture.get_format());
718        self.textures[format_idx].push_back((texture, self.current_frame));
719    }
720
721    /// Create or reuse a temporary CPU buffer.
722    ///
723    /// These buffers are used in the batched upload path when PBOs are not supported.
724    /// Content is first written to the temporary buffer and uploaded via a single
725    /// glTexSubImage2D call.
726    pub fn get_temporary_buffer(&mut self) -> Vec<mem::MaybeUninit<u8>> {
727        let buffer = self.temporary_buffers.pop().unwrap_or_else(|| {
728            vec![mem::MaybeUninit::new(0); BATCH_UPLOAD_TEXTURE_SIZE.area() as usize * 4]
729        });
730        self.min_temporary_buffers = self.min_temporary_buffers.min(self.temporary_buffers.len());
731        buffer
732    }
733
734    /// Return memory that was obtained from this pool via get_temporary_buffer.
735    pub fn return_temporary_buffer(&mut self, buffer: Vec<mem::MaybeUninit<u8>>) {
736        assert_eq!(buffer.len(), BATCH_UPLOAD_TEXTURE_SIZE.area() as usize * 4);
737        self.temporary_buffers.push(buffer);
738    }
739
740    /// Deallocate this pool's CPU and GPU memory.
741    pub fn delete_textures(&mut self, device: &mut Device) {
742        for format in &mut self.textures {
743            while let Some(texture) = format.pop_back() {
744                device.delete_texture(texture.0)
745            }
746        }
747        self.temporary_buffers.clear();
748    }
749
750    /// Deallocate some textures if there are too many for a long time.
751    pub fn end_frame(&mut self, device: &mut Device) {
752        for format_idx in 0..self.textures.len() {
753            // Count the number of reusable staging textures.
754            // if it stays high for a large number of frames, truncate it back to 8-ish
755            // over multiple frames.
756
757            let mut num_reusable_textures = 0;
758            for texture in &self.textures[format_idx] {
759                if self.current_frame - texture.1 > 2 {
760                    num_reusable_textures += 1;
761                }
762            }
763
764            if num_reusable_textures < 8 {
765                // Don't deallocate textures for another 120 frames.
766                self.delay_texture_deallocation[format_idx] = self.current_frame + 120;
767            }
768
769            // Deallocate up to 4 staging textures every frame.
770            let to_remove = if self.current_frame > self.delay_texture_deallocation[format_idx] {
771                num_reusable_textures.min(4)
772            } else {
773                0
774            };
775
776            for _ in 0..to_remove {
777                let texture = self.textures[format_idx].pop_front().unwrap().0;
778                device.delete_texture(texture);
779            }
780        }
781
782        // Similar logic for temporary CPU buffers. Our calls to get and return
783        // temporary buffers should have been balanced for this frame, but the call
784        // get_temporary_buffer will allocate a buffer if the vec is empty. Since we
785        // carry these buffers from frame to frame, we keep track of the smallest
786        // length of the temporary_buffers vec that we encountered this frame. Those
787        // buffers were not touched and we deallocate some if there are a lot of them.
788        let unused_buffers = self.min_temporary_buffers;
789        if unused_buffers < 8 {
790            self.delay_buffer_deallocation = self.current_frame + 120;
791        }
792        let to_remove = if self.current_frame > self.delay_buffer_deallocation  {
793            unused_buffers.min(4)
794        } else {
795            0
796        };
797        for _ in 0..to_remove {
798            // Unlike textures it doesn't matter whether we pop from the front or back
799            // of the vector.
800            self.temporary_buffers.pop();
801        }
802    }
803
804    pub fn report_memory_to(&self, report: &mut MemoryReport, size_op_funs: &MallocSizeOfOps) {
805        for buf in &self.temporary_buffers {
806            report.upload_staging_memory += unsafe { (size_op_funs.size_of_op)(buf.as_ptr() as *const _) };
807        }
808
809        for format in &self.textures {
810            for texture in format {
811                report.upload_staging_textures += texture.0.size_in_bytes();
812            }
813        }
814    }
815}
816
817struct UploadStats {
818    num_draw_calls: u32,
819    upload_time: u64,
820    cpu_buffer_alloc_time: u64,
821    texture_alloc_time: u64,
822    cpu_copy_time: u64,
823    gpu_copy_commands_time: u64,
824    bytes_uploaded: usize,
825    items_uploaded: usize,
826}
827
828#[derive(Debug)]
829enum StagingBufferKind<'a> {
830    Pbo(UploadStagingBuffer<'a>),
831    CpuBuffer { bytes: Vec<mem::MaybeUninit<u8>> },
832    Image { bytes: Arc<Vec<u8>>, stride: Option<i32> },
833}
834#[derive(Debug)]
835struct BatchUploadBuffer<'a> {
836    staging_buffer: StagingBufferKind<'a>,
837    texture_index: usize,
838    // A rectangle containing all items going into this staging texture, so
839    // that we can avoid uploading the entire area if we are using glTexSubImage2d.
840    upload_rect: DeviceIntRect,
841}
842
843// On some devices performing many small texture uploads is slow, so instead we batch
844// updates in to a small number of uploads to temporary textures, then copy from those
845// textures to the correct place in the texture cache.
846// A list of temporary textures that batches of updates are uploaded to.
847#[derive(Debug)]
848struct BatchUploadCopy {
849    // Index within batch_upload_textures
850    src_texture_index: usize,
851    src_offset: DeviceIntPoint,
852    dest_texture_id: CacheTextureId,
853    dest_offset: DeviceIntPoint,
854    size: DeviceIntSize,
855}