webrender/renderer/
gpu_cache.rs

1/* This Source Code Form is subject to the terms of the Mozilla Public
2 * License, v. 2.0. If a copy of the MPL was not distributed with this
3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
4
5use std::{cmp, mem};
6use api::units::*;
7use malloc_size_of::MallocSizeOfOps;
8use crate::{
9    device::{CustomVAO, Device, DrawTarget, Program, ReadTarget, Texture, TextureFilter, UploadPBOPool, VBO},
10    gpu_cache::{GpuBlockData, GpuCacheUpdate, GpuCacheUpdateList},
11    internal_types::{FrameId, RenderTargetInfo, Swizzle},
12    prim_store::DeferredResolve,
13    profiler,
14    render_api::MemoryReport,
15};
16
17/// Enabling this toggle would force the GPU cache scattered texture to
18/// be resized every frame, which enables GPU debuggers to see if this
19/// is performed correctly.
20const GPU_CACHE_RESIZE_TEST: bool = false;
21
22/// Tracks the state of each row in the GPU cache texture.
23struct CacheRow {
24    /// Mirrored block data on CPU for this row. We store a copy of
25    /// the data on the CPU side to improve upload batching.
26    cpu_blocks: Box<[GpuBlockData; super::MAX_VERTEX_TEXTURE_WIDTH]>,
27    /// The first offset in this row that is dirty.
28    min_dirty: u16,
29    /// The last offset in this row that is dirty.
30    max_dirty: u16,
31}
32
33impl CacheRow {
34    fn new() -> Self {
35        CacheRow {
36            cpu_blocks: Box::new([GpuBlockData::EMPTY; super::MAX_VERTEX_TEXTURE_WIDTH]),
37            min_dirty: super::MAX_VERTEX_TEXTURE_WIDTH as _,
38            max_dirty: 0,
39        }
40    }
41
42    fn is_dirty(&self) -> bool {
43        return self.min_dirty < self.max_dirty;
44    }
45
46    fn clear_dirty(&mut self) {
47        self.min_dirty = super::MAX_VERTEX_TEXTURE_WIDTH as _;
48        self.max_dirty = 0;
49    }
50
51    fn add_dirty(&mut self, block_offset: usize, block_count: usize) {
52        self.min_dirty = self.min_dirty.min(block_offset as _);
53        self.max_dirty = self.max_dirty.max((block_offset + block_count) as _);
54    }
55
56    fn dirty_blocks(&self) -> &[GpuBlockData] {
57        return &self.cpu_blocks[self.min_dirty as usize .. self.max_dirty as usize];
58    }
59}
60
61/// The bus over which CPU and GPU versions of the GPU cache
62/// get synchronized.
63enum GpuCacheBus {
64    /// PBO-based updates, currently operate on a row granularity.
65    /// Therefore, are subject to fragmentation issues.
66    PixelBuffer {
67        /// Per-row data.
68        rows: Vec<CacheRow>,
69    },
70    /// Shader-based scattering updates. Currently rendered by a set
71    /// of points into the GPU texture, each carrying a `GpuBlockData`.
72    Scatter {
73        /// Special program to run the scattered update.
74        program: Program,
75        /// VAO containing the source vertex buffers.
76        vao: CustomVAO,
77        /// VBO for positional data, supplied as normalized `u16`.
78        buf_position: VBO<[u16; 2]>,
79        /// VBO for gpu block data.
80        buf_value: VBO<GpuBlockData>,
81        /// Currently stored block count.
82        count: usize,
83    },
84}
85
86/// The device-specific representation of the cache texture in gpu_cache.rs
87pub struct GpuCacheTexture {
88    texture: Option<Texture>,
89    bus: GpuCacheBus,
90}
91
92impl GpuCacheTexture {
93    /// Ensures that we have an appropriately-sized texture.
94    fn ensure_texture(&mut self, device: &mut Device, height: i32) {
95        // If we already have a texture that works, we're done.
96        if self.texture.as_ref().map_or(false, |t| t.get_dimensions().height >= height) {
97            if GPU_CACHE_RESIZE_TEST {
98                // Special debug mode - resize the texture even though it's fine.
99            } else {
100                return;
101            }
102        }
103
104        // Take the old texture, if any.
105        let blit_source = self.texture.take();
106
107        // Create the new texture.
108        assert!(height >= 2, "Height is too small for ANGLE");
109        let new_size = DeviceIntSize::new(super::MAX_VERTEX_TEXTURE_WIDTH as _, height);
110        // GpuCacheBus::Scatter always requires the texture to be a render target. For
111        // GpuCacheBus::PixelBuffer, we only create the texture with a render target if
112        // RGBAF32 render targets are actually supported, and only if glCopyImageSubData
113        // is not. glCopyImageSubData does not require a render target to copy the texture
114        // data, and if neither RGBAF32 render targets nor glCopyImageSubData is supported,
115        // we simply re-upload the entire contents rather than copying upon resize.
116        let supports_copy_image_sub_data = device.get_capabilities().supports_copy_image_sub_data;
117        let supports_color_buffer_float = device.get_capabilities().supports_color_buffer_float;
118        let rt_info = if matches!(self.bus, GpuCacheBus::PixelBuffer { .. })
119            && (supports_copy_image_sub_data || !supports_color_buffer_float)
120        {
121            None
122        } else {
123            Some(RenderTargetInfo { has_depth: false })
124        };
125        let mut texture = device.create_texture(
126            api::ImageBufferKind::Texture2D,
127            api::ImageFormat::RGBAF32,
128            new_size.width,
129            new_size.height,
130            TextureFilter::Nearest,
131            rt_info,
132        );
133
134        // Copy the contents of the previous texture, if applicable.
135        if let Some(blit_source) = blit_source {
136            if !supports_copy_image_sub_data && !supports_color_buffer_float {
137                // Cannot copy texture, so must re-upload everything.
138                match self.bus {
139                    GpuCacheBus::PixelBuffer { ref mut rows } => {
140                        for row in rows {
141                            row.add_dirty(0, super::MAX_VERTEX_TEXTURE_WIDTH);
142                        }
143                    }
144                    GpuCacheBus::Scatter { .. } => {
145                        panic!("Texture must be copyable to use scatter GPU cache bus method");
146                    }
147                }
148            } else {
149                device.copy_entire_texture(&mut texture, &blit_source);
150            }
151            device.delete_texture(blit_source);
152        }
153
154        self.texture = Some(texture);
155    }
156
157    pub fn new(device: &mut Device, use_scatter: bool) -> Result<Self, super::RendererError> {
158        use super::desc::GPU_CACHE_UPDATE;
159
160        let bus = if use_scatter {
161            assert!(
162                device.get_capabilities().supports_color_buffer_float,
163                "GpuCache scatter method requires EXT_color_buffer_float",
164            );
165            let program = device.create_program_linked(
166                "gpu_cache_update",
167                &[],
168                &GPU_CACHE_UPDATE,
169            )?;
170            let buf_position = device.create_vbo();
171            let buf_value = device.create_vbo();
172            //Note: the vertex attributes have to be supplied in the same order
173            // as for program creation, but each assigned to a different stream.
174            let vao = device.create_custom_vao(&[
175                buf_position.stream_with(&GPU_CACHE_UPDATE.vertex_attributes[0..1]),
176                buf_value   .stream_with(&GPU_CACHE_UPDATE.vertex_attributes[1..2]),
177            ]);
178            GpuCacheBus::Scatter {
179                program,
180                vao,
181                buf_position,
182                buf_value,
183                count: 0,
184            }
185        } else {
186            GpuCacheBus::PixelBuffer {
187                rows: Vec::new(),
188            }
189        };
190
191        Ok(GpuCacheTexture {
192            texture: None,
193            bus,
194        })
195    }
196
197    pub fn deinit(mut self, device: &mut Device) {
198        if let Some(t) = self.texture.take() {
199            device.delete_texture(t);
200        }
201        if let GpuCacheBus::Scatter { program, vao, buf_position, buf_value, .. } = self.bus {
202            device.delete_program(program);
203            device.delete_custom_vao(vao);
204            device.delete_vbo(buf_position);
205            device.delete_vbo(buf_value);
206        }
207    }
208
209    pub fn get_height(&self) -> i32 {
210        self.texture.as_ref().map_or(0, |t| t.get_dimensions().height)
211    }
212
213    #[cfg(feature = "capture")]
214    pub fn get_texture(&self) -> &Texture {
215        self.texture.as_ref().unwrap()
216    }
217
218    fn prepare_for_updates(
219        &mut self,
220        device: &mut Device,
221        total_block_count: usize,
222        max_height: i32,
223    ) {
224        self.ensure_texture(device, max_height);
225        match self.bus {
226            GpuCacheBus::PixelBuffer { .. } => {},
227            GpuCacheBus::Scatter {
228                ref mut buf_position,
229                ref mut buf_value,
230                ref mut count,
231                ..
232            } => {
233                *count = 0;
234                if total_block_count > buf_value.allocated_count() {
235                    device.allocate_vbo(buf_position, total_block_count, super::ONE_TIME_USAGE_HINT);
236                    device.allocate_vbo(buf_value,    total_block_count, super::ONE_TIME_USAGE_HINT);
237                }
238            }
239        }
240    }
241
242    pub fn invalidate(&mut self) {
243        match self.bus {
244            GpuCacheBus::PixelBuffer { ref mut rows, .. } => {
245                info!("Invalidating GPU caches");
246                for row in rows {
247                    row.add_dirty(0, super::MAX_VERTEX_TEXTURE_WIDTH);
248                }
249            }
250            GpuCacheBus::Scatter { .. } => {
251                warn!("Unable to invalidate scattered GPU cache");
252            }
253        }
254    }
255
256    fn update(&mut self, device: &mut Device, updates: &GpuCacheUpdateList) {
257        match self.bus {
258            GpuCacheBus::PixelBuffer { ref mut rows, .. } => {
259                for update in &updates.updates {
260                    match *update {
261                        GpuCacheUpdate::Copy {
262                            block_index,
263                            block_count,
264                            address,
265                        } => {
266                            let row = address.v as usize;
267
268                            // Ensure that the CPU-side shadow copy of the GPU cache data has enough
269                            // rows to apply this patch.
270                            while rows.len() <= row {
271                                // Add a new row.
272                                rows.push(CacheRow::new());
273                            }
274
275                            // Copy the blocks from the patch array in the shadow CPU copy.
276                            let block_offset = address.u as usize;
277                            let data = &mut rows[row].cpu_blocks;
278                            for i in 0 .. block_count {
279                                data[block_offset + i] = updates.blocks[block_index + i];
280                            }
281
282                            // This row is dirty (needs to be updated in GPU texture).
283                            rows[row].add_dirty(block_offset, block_count);
284                        }
285                    }
286                }
287            }
288            GpuCacheBus::Scatter {
289                ref buf_position,
290                ref buf_value,
291                ref mut count,
292                ..
293            } => {
294                //TODO: re-use this heap allocation
295                // Unused positions will be left as 0xFFFF, which translates to
296                // (1.0, 1.0) in the vertex output position and gets culled out
297                let mut position_data = vec![[!0u16; 2]; updates.blocks.len()];
298                let size = self.texture.as_ref().unwrap().get_dimensions().to_usize();
299
300                for update in &updates.updates {
301                    match *update {
302                        GpuCacheUpdate::Copy {
303                            block_index,
304                            block_count,
305                            address,
306                        } => {
307                            // Convert the absolute texel position into normalized
308                            let y = ((2*address.v as usize + 1) << 15) / size.height;
309                            for i in 0 .. block_count {
310                                let x = ((2*address.u as usize + 2*i + 1) << 15) / size.width;
311                                position_data[block_index + i] = [x as _, y as _];
312                            }
313                        }
314                    }
315                }
316
317                device.fill_vbo(buf_value, &updates.blocks, *count);
318                device.fill_vbo(buf_position, &position_data, *count);
319                *count += position_data.len();
320            }
321        }
322    }
323
324    fn flush(&mut self, device: &mut Device, pbo_pool: &mut UploadPBOPool) -> usize {
325        let texture = self.texture.as_ref().unwrap();
326        match self.bus {
327            GpuCacheBus::PixelBuffer { ref mut rows } => {
328                let rows_dirty = rows
329                    .iter()
330                    .filter(|row| row.is_dirty())
331                    .count();
332                if rows_dirty == 0 {
333                    return 0
334                }
335
336                let mut uploader = device.upload_texture(pbo_pool);
337
338                for (row_index, row) in rows.iter_mut().enumerate() {
339                    if !row.is_dirty() {
340                        continue;
341                    }
342
343                    let blocks = row.dirty_blocks();
344                    let rect = DeviceIntRect::from_origin_and_size(
345                        DeviceIntPoint::new(row.min_dirty as i32, row_index as i32),
346                        DeviceIntSize::new(blocks.len() as i32, 1),
347                    );
348
349                    uploader.upload(device, texture, rect, None, None, blocks.as_ptr(), blocks.len());
350
351                    row.clear_dirty();
352                }
353
354                uploader.flush(device);
355
356                rows_dirty
357            }
358            GpuCacheBus::Scatter { ref program, ref vao, count, .. } => {
359                device.disable_depth();
360                device.set_blend(false);
361                device.bind_program(program);
362                device.bind_custom_vao(vao);
363                device.bind_draw_target(
364                    DrawTarget::from_texture(
365                        texture,
366                        false,
367                    ),
368                );
369                device.draw_nonindexed_points(0, count as _);
370                0
371            }
372        }
373    }
374
375    #[cfg(feature = "replay")]
376    pub fn remove_texture(&mut self, device: &mut Device) {
377        if let Some(t) = self.texture.take() {
378            device.delete_texture(t);
379        }
380    }
381
382    #[cfg(feature = "replay")]
383    pub fn load_from_data(&mut self, texture: Texture, data: Vec<u8>) {
384        assert!(self.texture.is_none());
385        match self.bus {
386            GpuCacheBus::PixelBuffer { ref mut rows, .. } => {
387                let dim = texture.get_dimensions();
388                let blocks = unsafe {
389                    std::slice::from_raw_parts(
390                        data.as_ptr() as *const GpuBlockData,
391                        data.len() / mem::size_of::<GpuBlockData>(),
392                    )
393                };
394                // fill up the CPU cache from the contents we just loaded
395                rows.clear();
396                rows.extend((0 .. dim.height).map(|_| CacheRow::new()));
397                let chunks = blocks.chunks(super::MAX_VERTEX_TEXTURE_WIDTH);
398                debug_assert_eq!(chunks.len(), rows.len());
399                for (row, chunk) in rows.iter_mut().zip(chunks) {
400                    row.cpu_blocks.copy_from_slice(chunk);
401                }
402            }
403            GpuCacheBus::Scatter { .. } => {}
404        }
405        self.texture = Some(texture);
406    }
407
408    pub fn report_memory_to(&self, report: &mut MemoryReport, size_op_funs: &MallocSizeOfOps) {
409        if let GpuCacheBus::PixelBuffer{ref rows, ..} = self.bus {
410            for row in rows.iter() {
411                report.gpu_cache_cpu_mirror += unsafe { (size_op_funs.size_of_op)(row.cpu_blocks.as_ptr() as *const _) };
412            }
413        }
414
415        // GPU cache GPU memory.
416        report.gpu_cache_textures +=
417            self.texture.as_ref().map_or(0, |t| t.size_in_bytes());
418    }
419
420    pub fn gpu_size_in_bytes(&self) -> usize {
421        match &self.texture {
422            Some(tex) => tex.size_in_bytes(),
423            None => 0,
424        }
425    }
426}
427
428impl super::Renderer {
429    pub fn update_gpu_cache(&mut self) {
430        let _gm = self.gpu_profiler.start_marker("gpu cache update");
431
432        // For an artificial stress test of GPU cache resizing,
433        // always pass an extra update list with at least one block in it.
434        let gpu_cache_height = self.gpu_cache_texture.get_height();
435        if gpu_cache_height != 0 && GPU_CACHE_RESIZE_TEST {
436            self.pending_gpu_cache_updates.push(GpuCacheUpdateList {
437                frame_id: FrameId::INVALID,
438                clear: false,
439                height: gpu_cache_height,
440                blocks: vec![[1f32; 4].into()],
441                updates: Vec::new(),
442                debug_commands: Vec::new(),
443            });
444        }
445
446        let (updated_blocks, max_requested_height) = self
447            .pending_gpu_cache_updates
448            .iter()
449            .fold((0, gpu_cache_height), |(count, height), list| {
450                (count + list.blocks.len(), cmp::max(height, list.height))
451            });
452
453        if max_requested_height > self.get_max_texture_size() && !self.gpu_cache_overflow {
454            self.gpu_cache_overflow = true;
455            self.renderer_errors.push(super::RendererError::MaxTextureSize);
456        }
457
458        // Note: if we decide to switch to scatter-style GPU cache update
459        // permanently, we can have this code nicer with `BufferUploader` kind
460        // of helper, similarly to how `TextureUploader` API is used.
461        self.gpu_cache_texture.prepare_for_updates(
462            &mut self.device,
463            updated_blocks,
464            max_requested_height,
465        );
466
467        for update_list in self.pending_gpu_cache_updates.drain(..) {
468            assert!(update_list.height <= max_requested_height);
469            if update_list.frame_id > self.gpu_cache_frame_id {
470                self.gpu_cache_frame_id = update_list.frame_id
471            }
472            self.gpu_cache_texture
473                .update(&mut self.device, &update_list);
474        }
475
476        self.profile.start_time(profiler::GPU_CACHE_UPLOAD_TIME);
477        let updated_rows = self.gpu_cache_texture.flush(
478            &mut self.device,
479            &mut self.texture_upload_pbo_pool
480        );
481        self.gpu_cache_upload_time += self.profile.end_time(profiler::GPU_CACHE_UPLOAD_TIME);
482
483        self.profile.set(profiler::GPU_CACHE_ROWS_UPDATED, updated_rows);
484        self.profile.set(profiler::GPU_CACHE_BLOCKS_UPDATED, updated_blocks);
485    }
486
487    pub fn prepare_gpu_cache(
488        &mut self,
489        deferred_resolves: &[DeferredResolve],
490    ) -> Result<(), super::RendererError> {
491        self.profile.start_time(profiler::GPU_CACHE_PREPARE_TIME);
492
493        if self.pending_gpu_cache_clear {
494            let use_scatter =
495                matches!(self.gpu_cache_texture.bus, GpuCacheBus::Scatter { .. });
496            let new_cache = match GpuCacheTexture::new(&mut self.device, use_scatter) {
497                Ok(cache) => cache,
498                Err(err) => {
499                    self.profile.end_time(profiler::GPU_CACHE_PREPARE_TIME);
500                    return Err(err);
501                }
502            };
503            let old_cache = mem::replace(&mut self.gpu_cache_texture, new_cache);
504            old_cache.deinit(&mut self.device);
505            self.pending_gpu_cache_clear = false;
506        }
507
508        let deferred_update_list = self.update_deferred_resolves(deferred_resolves);
509        self.pending_gpu_cache_updates.extend(deferred_update_list);
510
511        self.update_gpu_cache();
512
513        // Note: the texture might have changed during the `update`,
514        // so we need to bind it here.
515        self.device.bind_texture(
516            super::TextureSampler::GpuCache,
517            self.gpu_cache_texture.texture.as_ref().unwrap(),
518            Swizzle::default(),
519        );
520
521        self.profile.end_time(profiler::GPU_CACHE_PREPARE_TIME);
522
523        Ok(())
524    }
525
526    pub fn read_gpu_cache(&mut self) -> (DeviceIntSize, Vec<u8>) {
527        let texture = self.gpu_cache_texture.texture.as_ref().unwrap();
528        let size = device_size_as_framebuffer_size(texture.get_dimensions());
529        let mut texels = vec![0; (size.width * size.height * 16) as usize];
530        self.device.begin_frame();
531        self.device.bind_read_target(ReadTarget::from_texture(texture));
532        self.device.read_pixels_into(
533            size.into(),
534            api::ImageFormat::RGBAF32,
535            &mut texels,
536        );
537        self.device.reset_read_target();
538        self.device.end_frame();
539        (texture.get_dimensions(), texels)
540    }
541}