webrender/
gpu_cache.rs

1/* This Source Code Form is subject to the terms of the Mozilla Public
2 * License, v. 2.0. If a copy of the MPL was not distributed with this
3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
4
5//! Overview of the GPU cache.
6//!
7//! The main goal of the GPU cache is to allow on-demand
8//! allocation and construction of GPU resources for the
9//! vertex shaders to consume.
10//!
11//! Every item that wants to be stored in the GPU cache
12//! should create a GpuCacheHandle that is used to refer
13//! to a cached GPU resource. Creating a handle is a
14//! cheap operation, that does *not* allocate room in the
15//! cache.
16//!
17//! On any frame when that data is required, the caller
18//! must request that handle, via ```request```. If the
19//! data is not in the cache, the user provided closure
20//! will be invoked to build the data.
21//!
22//! After ```end_frame``` has occurred, callers can
23//! use the ```get_address``` API to get the allocated
24//! address in the GPU cache of a given resource slot
25//! for this frame.
26
27use api::{DebugFlags, DocumentId, PremultipliedColorF};
28#[cfg(test)]
29use api::IdNamespace;
30use api::units::*;
31use euclid::{HomogeneousVector, Box2D};
32use crate::internal_types::{FastHashMap, FastHashSet, FrameStamp, FrameId};
33use crate::profiler::{self, TransactionProfile};
34use crate::prim_store::VECS_PER_SEGMENT;
35use crate::renderer::MAX_VERTEX_TEXTURE_WIDTH;
36use crate::util::VecHelper;
37use std::{u16, u32};
38use std::num::NonZeroU32;
39use std::ops::Add;
40use std::time::{Duration, Instant};
41
42
43/// At the time of this writing, Firefox uses about 15 GPU cache rows on
44/// startup, and then gradually works its way up to the mid-30s with normal
45/// browsing.
46pub const GPU_CACHE_INITIAL_HEIGHT: i32 = 20;
47const NEW_ROWS_PER_RESIZE: i32 = 10;
48
49/// The number of frames an entry can go unused before being evicted.
50const FRAMES_BEFORE_EVICTION: u64 = 10;
51
52/// The ratio of utilized blocks to total blocks for which we start the clock
53/// on reclaiming memory.
54const RECLAIM_THRESHOLD: f32 = 0.2;
55
56/// The amount of time utilization must be below the above threshold before we
57/// blow away the cache and rebuild it.
58const RECLAIM_DELAY_S: u64 = 5;
59
60#[derive(Debug, Copy, Clone, Eq, MallocSizeOf, PartialEq)]
61#[cfg_attr(feature = "capture", derive(Serialize))]
62#[cfg_attr(feature = "replay", derive(Deserialize))]
63struct Epoch(u32);
64
65impl Epoch {
66    fn next(&mut self) {
67        *self = Epoch(self.0.wrapping_add(1));
68    }
69}
70
71#[derive(Debug, Copy, Clone, MallocSizeOf)]
72#[cfg_attr(feature = "capture", derive(Serialize))]
73#[cfg_attr(feature = "replay", derive(Deserialize))]
74struct CacheLocation {
75    block_index: BlockIndex,
76    epoch: Epoch,
77}
78
79/// A single texel in RGBAF32 texture - 16 bytes.
80#[derive(Copy, Clone, Debug, MallocSizeOf)]
81#[cfg_attr(feature = "capture", derive(Serialize))]
82#[cfg_attr(feature = "replay", derive(Deserialize))]
83pub struct GpuBlockData {
84    data: [f32; 4],
85}
86
87impl GpuBlockData {
88    pub const EMPTY: Self = GpuBlockData { data: [0.0; 4] };
89}
90
91/// Conversion helpers for GpuBlockData
92impl From<PremultipliedColorF> for GpuBlockData {
93    fn from(c: PremultipliedColorF) -> Self {
94        GpuBlockData {
95            data: [c.r, c.g, c.b, c.a],
96        }
97    }
98}
99
100impl From<[f32; 4]> for GpuBlockData {
101    fn from(data: [f32; 4]) -> Self {
102        GpuBlockData { data }
103    }
104}
105
106impl<P> From<Box2D<f32, P>> for GpuBlockData {
107    fn from(r: Box2D<f32, P>) -> Self {
108        GpuBlockData {
109            data: [
110                r.min.x,
111                r.min.y,
112                r.max.x,
113                r.max.y,
114            ],
115        }
116    }
117}
118
119impl<P> From<HomogeneousVector<f32, P>> for GpuBlockData {
120    fn from(v: HomogeneousVector<f32, P>) -> Self {
121        GpuBlockData {
122            data: [
123                v.x,
124                v.y,
125                v.z,
126                v.w,
127            ],
128        }
129    }
130}
131
132impl From<TexelRect> for GpuBlockData {
133    fn from(tr: TexelRect) -> Self {
134        GpuBlockData {
135            data: [tr.uv0.x, tr.uv0.y, tr.uv1.x, tr.uv1.y],
136        }
137    }
138}
139
140
141// A handle to a GPU resource.
142#[derive(Debug, Copy, Clone, MallocSizeOf)]
143#[cfg_attr(feature = "capture", derive(Serialize))]
144#[cfg_attr(feature = "replay", derive(Deserialize))]
145pub struct GpuCacheHandle {
146    location: Option<CacheLocation>,
147}
148
149impl GpuCacheHandle {
150    pub fn new() -> Self {
151        GpuCacheHandle { location: None }
152    }
153
154    pub fn as_int(self, gpu_cache: &GpuCache) -> i32 {
155        gpu_cache.get_address(&self).as_int()
156    }
157}
158
159// A unique address in the GPU cache. These are uploaded
160// as part of the primitive instances, to allow the vertex
161// shader to fetch the specific data.
162#[repr(C)]
163#[derive(Copy, Debug, Clone, MallocSizeOf, Eq, PartialEq)]
164#[cfg_attr(feature = "capture", derive(Serialize))]
165#[cfg_attr(feature = "replay", derive(Deserialize))]
166pub struct GpuCacheAddress {
167    pub u: u16,
168    pub v: u16,
169}
170
171impl GpuCacheAddress {
172    fn new(u: usize, v: usize) -> Self {
173        GpuCacheAddress {
174            u: u as u16,
175            v: v as u16,
176        }
177    }
178
179    pub const INVALID: GpuCacheAddress = GpuCacheAddress {
180        u: u16::MAX,
181        v: u16::MAX,
182    };
183
184    pub fn as_int(self) -> i32 {
185        // TODO(gw): Temporarily encode GPU Cache addresses as a single int.
186        //           In the future, we can change the PrimitiveInstanceData struct
187        //           to use 2x u16 for the vertex attribute instead of an i32.
188        self.v as i32 * MAX_VERTEX_TEXTURE_WIDTH as i32 + self.u as i32
189    }
190}
191
192impl Add<usize> for GpuCacheAddress {
193    type Output = GpuCacheAddress;
194
195    fn add(self, other: usize) -> GpuCacheAddress {
196        GpuCacheAddress {
197            u: self.u + other as u16,
198            v: self.v,
199        }
200    }
201}
202
203// An entry in a free-list of blocks in the GPU cache.
204#[derive(Debug, MallocSizeOf)]
205#[cfg_attr(feature = "capture", derive(Serialize))]
206#[cfg_attr(feature = "replay", derive(Deserialize))]
207struct Block {
208    // The location in the cache of this block.
209    address: GpuCacheAddress,
210    // The current epoch (generation) of this block.
211    epoch: Epoch,
212    // Index of the next free block in the list it
213    // belongs to (either a free-list or the
214    // occupied list).
215    next: Option<BlockIndex>,
216    // The last frame this block was referenced.
217    last_access_time: FrameId,
218}
219
220impl Block {
221    fn new(
222        address: GpuCacheAddress,
223        next: Option<BlockIndex>,
224        frame_id: FrameId,
225        epoch: Epoch,
226    ) -> Self {
227        Block {
228            address,
229            next,
230            last_access_time: frame_id,
231            epoch,
232        }
233    }
234
235    fn advance_epoch(&mut self, max_epoch: &mut Epoch) {
236        self.epoch.next();
237        if max_epoch.0 < self.epoch.0 {
238            max_epoch.0 = self.epoch.0;
239        }
240    }
241
242    /// Creates an invalid dummy block ID.
243    pub const INVALID: Block = Block {
244        address: GpuCacheAddress { u: 0, v: 0 },
245        epoch: Epoch(0),
246        next: None,
247        last_access_time: FrameId::INVALID,
248    };
249}
250
251/// Represents the index of a Block in the block array. We only create such
252/// structs for blocks that represent the start of a chunk.
253///
254/// Because we use Option<BlockIndex> in a lot of places, we use a NonZeroU32
255/// here and avoid ever using the index zero.
256#[derive(Debug, Copy, Clone, MallocSizeOf)]
257#[cfg_attr(feature = "capture", derive(Serialize))]
258#[cfg_attr(feature = "replay", derive(Deserialize))]
259struct BlockIndex(NonZeroU32);
260
261impl BlockIndex {
262    fn new(idx: usize) -> Self {
263        debug_assert!(idx <= u32::MAX as usize);
264        BlockIndex(NonZeroU32::new(idx as u32).expect("Index zero forbidden"))
265    }
266
267    fn get(&self) -> usize {
268        self.0.get() as usize
269    }
270}
271
272// A row in the cache texture.
273#[cfg_attr(feature = "capture", derive(Serialize))]
274#[cfg_attr(feature = "replay", derive(Deserialize))]
275#[derive(MallocSizeOf)]
276struct Row {
277    // The fixed size of blocks that this row supports.
278    // Each row becomes a slab allocator for a fixed block size.
279    // This means no dealing with fragmentation within a cache
280    // row as items are allocated and freed.
281    block_count_per_item: usize,
282}
283
284impl Row {
285    fn new(block_count_per_item: usize) -> Self {
286        Row {
287            block_count_per_item,
288        }
289    }
290}
291
292// A list of update operations that can be applied on the cache
293// this frame. The list of updates is created by the render backend
294// during frame construction. It's passed to the render thread
295// where GL commands can be applied.
296#[cfg_attr(feature = "capture", derive(Serialize))]
297#[cfg_attr(feature = "replay", derive(Deserialize))]
298#[derive(MallocSizeOf)]
299pub enum GpuCacheUpdate {
300    Copy {
301        block_index: usize,
302        block_count: usize,
303        address: GpuCacheAddress,
304    },
305}
306
307/// Command to inform the debug display in the renderer when chunks are allocated
308/// or freed.
309#[derive(MallocSizeOf)]
310pub enum GpuCacheDebugCmd {
311    /// Describes an allocated chunk.
312    Alloc(GpuCacheDebugChunk),
313    /// Describes a freed chunk.
314    Free(GpuCacheAddress),
315}
316
317#[derive(Clone, MallocSizeOf)]
318pub struct GpuCacheDebugChunk {
319    pub address: GpuCacheAddress,
320    pub size: usize,
321}
322
323#[must_use]
324#[cfg_attr(feature = "capture", derive(Serialize))]
325#[cfg_attr(feature = "replay", derive(Deserialize))]
326#[derive(MallocSizeOf)]
327pub struct GpuCacheUpdateList {
328    /// The frame current update list was generated from.
329    pub frame_id: FrameId,
330    /// Whether the texture should be cleared before updates
331    /// are applied.
332    pub clear: bool,
333    /// The current height of the texture. The render thread
334    /// should resize the texture if required.
335    pub height: i32,
336    /// List of updates to apply.
337    pub updates: Vec<GpuCacheUpdate>,
338    /// A flat list of GPU blocks that are pending upload
339    /// to GPU memory.
340    pub blocks: Vec<GpuBlockData>,
341    /// Whole state GPU block metadata for debugging.
342    #[cfg_attr(feature = "serde", serde(skip))]
343    pub debug_commands: Vec<GpuCacheDebugCmd>,
344}
345
346// Holds the free lists of fixed size blocks. Mostly
347// just serves to work around the borrow checker.
348#[cfg_attr(feature = "capture", derive(Serialize))]
349#[cfg_attr(feature = "replay", derive(Deserialize))]
350#[derive(MallocSizeOf)]
351struct FreeBlockLists {
352    free_list_1: Option<BlockIndex>,
353    free_list_2: Option<BlockIndex>,
354    free_list_4: Option<BlockIndex>,
355    free_list_8: Option<BlockIndex>,
356    free_list_16: Option<BlockIndex>,
357    free_list_32: Option<BlockIndex>,
358    free_list_64: Option<BlockIndex>,
359    free_list_128: Option<BlockIndex>,
360    free_list_256: Option<BlockIndex>,
361    free_list_341: Option<BlockIndex>,
362    free_list_512: Option<BlockIndex>,
363    free_list_1024: Option<BlockIndex>,
364}
365
366impl FreeBlockLists {
367    fn new() -> Self {
368        FreeBlockLists {
369            free_list_1: None,
370            free_list_2: None,
371            free_list_4: None,
372            free_list_8: None,
373            free_list_16: None,
374            free_list_32: None,
375            free_list_64: None,
376            free_list_128: None,
377            free_list_256: None,
378            free_list_341: None,
379            free_list_512: None,
380            free_list_1024: None,
381        }
382    }
383
384    fn get_actual_block_count_and_free_list(
385        &mut self,
386        block_count: usize,
387    ) -> (usize, &mut Option<BlockIndex>) {
388        // Find the appropriate free list to use based on the block size.
389        //
390        // Note that we cheat a bit with the 341 bucket, since it's not quite
391        // a divisor of 1024, because purecss-francine allocates many 260-block
392        // chunks, and there's no reason we shouldn't pack these three to a row.
393        // This means the allocation statistics will under-report by one block
394        // for each row using 341-block buckets, which is fine.
395        debug_assert_eq!(MAX_VERTEX_TEXTURE_WIDTH, 1024, "Need to update bucketing");
396        match block_count {
397            0 => panic!("Can't allocate zero sized blocks!"),
398            1 => (1, &mut self.free_list_1),
399            2 => (2, &mut self.free_list_2),
400            3..=4 => (4, &mut self.free_list_4),
401            5..=8 => (8, &mut self.free_list_8),
402            9..=16 => (16, &mut self.free_list_16),
403            17..=32 => (32, &mut self.free_list_32),
404            33..=64 => (64, &mut self.free_list_64),
405            65..=128 => (128, &mut self.free_list_128),
406            129..=256 => (256, &mut self.free_list_256),
407            257..=341 => (341, &mut self.free_list_341),
408            342..=512 => (512, &mut self.free_list_512),
409            513..=1024 => (1024, &mut self.free_list_1024),
410            _ => panic!("Can't allocate > MAX_VERTEX_TEXTURE_WIDTH per resource!"),
411        }
412    }
413}
414
415// CPU-side representation of the GPU resource cache texture.
416#[cfg_attr(feature = "capture", derive(Serialize))]
417#[cfg_attr(feature = "replay", derive(Deserialize))]
418#[derive(MallocSizeOf)]
419struct Texture {
420    // Current texture height
421    height: i32,
422    // All blocks that have been created for this texture
423    blocks: Vec<Block>,
424    // Metadata about each allocated row.
425    rows: Vec<Row>,
426    // The base Epoch for this texture.
427    base_epoch: Epoch,
428    // The maximum epoch reached. We track this along with the above so
429    // that we can rebuild the Texture and avoid collisions with handles
430    // allocated for the old texture.
431    max_epoch: Epoch,
432    // Free lists of available blocks for each supported
433    // block size in the texture. These are intrusive
434    // linked lists.
435    free_lists: FreeBlockLists,
436    // Linked list of currently occupied blocks. This
437    // makes it faster to iterate blocks looking for
438    // candidates to be evicted from the cache.
439    occupied_list_heads: FastHashMap<DocumentId, BlockIndex>,
440    // Pending blocks that have been written this frame
441    // and will need to be sent to the GPU.
442    pending_blocks: Vec<GpuBlockData>,
443    // Pending update commands.
444    updates: Vec<GpuCacheUpdate>,
445    // Profile stats
446    allocated_block_count: usize,
447    // The stamp at which we first reached our threshold for reclaiming `GpuCache`
448    // memory, or `None` if the threshold hasn't been reached.
449    #[cfg_attr(feature = "serde", serde(skip))]
450    reached_reclaim_threshold: Option<Instant>,
451    // List of debug commands to be sent to the renderer when the GPU cache
452    // debug display is enabled.
453    #[cfg_attr(feature = "serde", serde(skip))]
454    debug_commands: Vec<GpuCacheDebugCmd>,
455    // The current debug flags for the system.
456    debug_flags: DebugFlags,
457}
458
459impl Texture {
460    fn new(base_epoch: Epoch, debug_flags: DebugFlags) -> Self {
461        // Pre-fill the block array with one invalid block so that we never use
462        // 0 for a BlockIndex. This lets us use NonZeroU32 for BlockIndex, which
463        // saves memory.
464        let blocks = vec![Block::INVALID];
465
466        Texture {
467            height: GPU_CACHE_INITIAL_HEIGHT,
468            blocks,
469            rows: Vec::new(),
470            base_epoch,
471            max_epoch: base_epoch,
472            free_lists: FreeBlockLists::new(),
473            pending_blocks: Vec::new(),
474            updates: Vec::new(),
475            occupied_list_heads: FastHashMap::default(),
476            allocated_block_count: 0,
477            reached_reclaim_threshold: None,
478            debug_commands: Vec::new(),
479            debug_flags,
480        }
481    }
482
483    // Push new data into the cache. The ```pending_block_index``` field represents
484    // where the data was pushed into the texture ```pending_blocks``` array.
485    // Return the allocated address for this data.
486    fn push_data(
487        &mut self,
488        pending_block_index: Option<usize>,
489        block_count: usize,
490        frame_stamp: FrameStamp
491    ) -> CacheLocation {
492        debug_assert!(frame_stamp.is_valid());
493        // Find the appropriate free list to use based on the block size.
494        let (alloc_size, free_list) = self.free_lists
495            .get_actual_block_count_and_free_list(block_count);
496
497        // See if we need a new row (if free-list has nothing available)
498        if free_list.is_none() {
499            if self.rows.len() as i32 == self.height {
500                self.height += NEW_ROWS_PER_RESIZE;
501            }
502
503            // Create a new row.
504            let items_per_row = MAX_VERTEX_TEXTURE_WIDTH / alloc_size;
505            let row_index = self.rows.len();
506            self.rows.push(Row::new(alloc_size));
507
508            // Create a ```Block``` for each possible allocation address
509            // in this row, and link it in to the free-list for this
510            // block size.
511            let mut prev_block_index = None;
512            for i in 0 .. items_per_row {
513                let address = GpuCacheAddress::new(i * alloc_size, row_index);
514                let block_index = BlockIndex::new(self.blocks.len());
515                let block = Block::new(address, prev_block_index, frame_stamp.frame_id(), self.base_epoch);
516                self.blocks.push(block);
517                prev_block_index = Some(block_index);
518            }
519
520            *free_list = prev_block_index;
521        }
522
523        // Given the code above, it's now guaranteed that there is a block
524        // available in the appropriate free-list. Pull a block from the
525        // head of the list.
526        let free_block_index = free_list.take().unwrap();
527        let block = &mut self.blocks[free_block_index.get()];
528        *free_list = block.next;
529
530        // Add the block to the occupied linked list.
531        block.next = self.occupied_list_heads.get(&frame_stamp.document_id()).cloned();
532        block.last_access_time = frame_stamp.frame_id();
533        self.occupied_list_heads.insert(frame_stamp.document_id(), free_block_index);
534        self.allocated_block_count += alloc_size;
535
536        if let Some(pending_block_index) = pending_block_index {
537            // Add this update to the pending list of blocks that need
538            // to be updated on the GPU.
539            self.updates.push(GpuCacheUpdate::Copy {
540                block_index: pending_block_index,
541                block_count,
542                address: block.address,
543            });
544        }
545
546        // If we're using the debug display, communicate the allocation to the
547        // renderer thread. Note that we do this regardless of whether or not
548        // pending_block_index is None (if it is, the renderer thread will fill
549        // in the data via a deferred resolve, but the block is still considered
550        // allocated).
551        if self.debug_flags.contains(DebugFlags::GPU_CACHE_DBG) {
552            self.debug_commands.push(GpuCacheDebugCmd::Alloc(GpuCacheDebugChunk {
553                address: block.address,
554                size: block_count,
555            }));
556        }
557
558        CacheLocation {
559            block_index: free_block_index,
560            epoch: block.epoch,
561        }
562    }
563
564    // Run through the list of occupied cache blocks and evict
565    // any old blocks that haven't been referenced for a while.
566    fn evict_old_blocks(&mut self, frame_stamp: FrameStamp) {
567        debug_assert!(frame_stamp.is_valid());
568        // Prune any old items from the list to make room.
569        // Traverse the occupied linked list and see
570        // which items have not been used for a long time.
571        let mut current_block = self.occupied_list_heads.get(&frame_stamp.document_id()).map(|x| *x);
572        let mut prev_block: Option<BlockIndex> = None;
573
574        while let Some(index) = current_block {
575            let (next_block, should_unlink) = {
576                let block = &mut self.blocks[index.get()];
577
578                let next_block = block.next;
579                let mut should_unlink = false;
580
581                // If this resource has not been used in the last
582                // few frames, free it from the texture and mark
583                // as empty.
584                if block.last_access_time + FRAMES_BEFORE_EVICTION < frame_stamp.frame_id() {
585                    should_unlink = true;
586
587                    // Get the row metadata from the address.
588                    let row = &mut self.rows[block.address.v as usize];
589
590                    // Use the row metadata to determine which free-list
591                    // this block belongs to.
592                    let (_, free_list) = self.free_lists
593                        .get_actual_block_count_and_free_list(row.block_count_per_item);
594
595                    block.advance_epoch(&mut self.max_epoch);
596                    block.next = *free_list;
597                    *free_list = Some(index);
598
599                    self.allocated_block_count -= row.block_count_per_item;
600
601                    if self.debug_flags.contains(DebugFlags::GPU_CACHE_DBG) {
602                        let cmd = GpuCacheDebugCmd::Free(block.address);
603                        self.debug_commands.push(cmd);
604                    }
605                };
606
607                (next_block, should_unlink)
608            };
609
610            // If the block was released, we will need to remove it
611            // from the occupied linked list.
612            if should_unlink {
613                match prev_block {
614                    Some(prev_block) => {
615                        self.blocks[prev_block.get()].next = next_block;
616                    }
617                    None => {
618                        match next_block {
619                            Some(next_block) => {
620                                self.occupied_list_heads.insert(frame_stamp.document_id(), next_block);
621                            }
622                            None => {
623                                self.occupied_list_heads.remove(&frame_stamp.document_id());
624                            }
625                        }
626                    }
627                }
628            } else {
629                prev_block = current_block;
630            }
631
632            current_block = next_block;
633        }
634    }
635
636    /// Returns the ratio of utilized blocks.
637    fn utilization(&self) -> f32 {
638        let total_blocks = self.rows.len() * MAX_VERTEX_TEXTURE_WIDTH;
639        debug_assert!(total_blocks > 0);
640        let ratio = self.allocated_block_count as f32 / total_blocks as f32;
641        debug_assert!(0.0 <= ratio && ratio <= 1.0, "Bad ratio: {}", ratio);
642        ratio
643    }
644}
645
646
647/// A wrapper object for GPU data requests,
648/// works as a container that can only grow.
649#[must_use]
650pub struct GpuDataRequest<'a> {
651    //TODO: remove this, see
652    // https://bugzilla.mozilla.org/show_bug.cgi?id=1690546
653    #[allow(dead_code)]
654    handle: &'a mut GpuCacheHandle,
655    frame_stamp: FrameStamp,
656    start_index: usize,
657    max_block_count: usize,
658    texture: &'a mut Texture,
659}
660
661impl<'a> GpuDataRequest<'a> {
662    pub fn push<B>(&mut self, block: B)
663    where
664        B: Into<GpuBlockData>,
665    {
666        self.texture.pending_blocks.push(block.into());
667    }
668
669    // Write the GPU cache data for an individual segment.
670    pub fn write_segment(
671        &mut self,
672        local_rect: LayoutRect,
673        extra_data: [f32; 4],
674    ) {
675        let _ = VECS_PER_SEGMENT;
676        self.push(local_rect);
677        self.push(extra_data);
678    }
679
680    pub fn current_used_block_num(&self) -> usize {
681        self.texture.pending_blocks.len() - self.start_index
682    }
683}
684
685impl<'a> Drop for GpuDataRequest<'a> {
686    fn drop(&mut self) {
687        // Push the data to the texture pending updates list.
688        let block_count = self.current_used_block_num();
689        debug_assert!(block_count <= self.max_block_count);
690
691        let location = self.texture
692            .push_data(Some(self.start_index), block_count, self.frame_stamp);
693        self.handle.location = Some(location);
694    }
695}
696
697
698/// The main LRU cache interface.
699#[cfg_attr(feature = "capture", derive(Serialize))]
700#[cfg_attr(feature = "replay", derive(Deserialize))]
701#[derive(MallocSizeOf)]
702pub struct GpuCache {
703    /// Current FrameId.
704    now: FrameStamp,
705    /// CPU-side texture allocator.
706    texture: Texture,
707    /// Number of blocks requested this frame that don't
708    /// need to be re-uploaded.
709    saved_block_count: usize,
710    /// The current debug flags for the system.
711    debug_flags: DebugFlags,
712    /// Whether there is a pending clear to send with the
713    /// next update.
714    pending_clear: bool,
715    /// Indicates that prepare_for_frames has been called for this group of frames.
716    /// Used for sanity checks.
717    prepared_for_frames: bool,
718    /// This indicates that we performed a cleanup operation which requires all
719    /// documents to build a frame.
720    requires_frame_build: bool,
721    /// The set of documents which have had frames built in this update. Used for
722    /// sanity checks.
723    document_frames_to_build: FastHashSet<DocumentId>,
724}
725
726impl GpuCache {
727    pub fn new() -> Self {
728        let debug_flags = DebugFlags::empty();
729        GpuCache {
730            now: FrameStamp::INVALID,
731            texture: Texture::new(Epoch(0), debug_flags),
732            saved_block_count: 0,
733            debug_flags,
734            pending_clear: false,
735            prepared_for_frames: false,
736            requires_frame_build: false,
737            document_frames_to_build: FastHashSet::default(),
738        }
739    }
740
741    /// Creates a GpuCache and sets it up with a valid `FrameStamp`, which
742    /// is useful for avoiding panics when instantiating the `GpuCache`
743    /// directly from unit test code.
744    #[cfg(test)]
745    pub fn new_for_testing() -> Self {
746        let mut cache = Self::new();
747        let mut now = FrameStamp::first(DocumentId::new(IdNamespace(1), 1));
748        now.advance();
749        cache.prepared_for_frames = true;
750        cache.begin_frame(now);
751        cache
752    }
753
754    /// Drops everything in the GPU cache. Must not be called once gpu cache entries
755    /// for the next frame have already been requested.
756    pub fn clear(&mut self) {
757        assert!(self.texture.updates.is_empty(), "Clearing with pending updates");
758        let mut next_base_epoch = self.texture.max_epoch;
759        next_base_epoch.next();
760        self.texture = Texture::new(next_base_epoch, self.debug_flags);
761        self.saved_block_count = 0;
762        self.pending_clear = true;
763        self.requires_frame_build = true;
764    }
765
766    pub fn requires_frame_build(&self) -> bool {
767        self.requires_frame_build
768    }
769
770    pub fn prepare_for_frames(&mut self) {
771        self.prepared_for_frames = true;
772        if self.should_reclaim_memory() {
773            self.clear();
774            debug_assert!(self.document_frames_to_build.is_empty());
775            for &document_id in self.texture.occupied_list_heads.keys() {
776                self.document_frames_to_build.insert(document_id);
777            }
778        }
779    }
780
781    pub fn bookkeep_after_frames(&mut self) {
782        assert!(self.document_frames_to_build.is_empty());
783        assert!(self.prepared_for_frames);
784        self.requires_frame_build = false;
785        self.prepared_for_frames = false;
786    }
787
788    /// Begin a new frame.
789    pub fn begin_frame(&mut self, stamp: FrameStamp) {
790        debug_assert!(self.texture.pending_blocks.is_empty());
791        assert!(self.prepared_for_frames);
792        profile_scope!("begin_frame");
793        self.now = stamp;
794        self.texture.evict_old_blocks(self.now);
795        self.saved_block_count = 0;
796    }
797
798    // Invalidate a (possibly) existing block in the cache.
799    // This means the next call to request() for this location
800    // will rebuild the data and upload it to the GPU.
801    pub fn invalidate(&mut self, handle: &GpuCacheHandle) {
802        if let Some(ref location) = handle.location {
803            // don't invalidate blocks that are already re-assigned
804            if let Some(block) = self.texture.blocks.get_mut(location.block_index.get()) {
805                if block.epoch == location.epoch {
806                    block.advance_epoch(&mut self.texture.max_epoch);
807                }
808            }
809        }
810    }
811
812    /// Request a resource be added to the cache. If the resource
813    /// is already in the cache, `None` will be returned.
814    pub fn request<'a>(&'a mut self, handle: &'a mut GpuCacheHandle) -> Option<GpuDataRequest<'a>> {
815        let mut max_block_count = MAX_VERTEX_TEXTURE_WIDTH;
816        // Check if the allocation for this handle is still valid.
817        if let Some(ref location) = handle.location {
818            if let Some(block) = self.texture.blocks.get_mut(location.block_index.get()) {
819                if block.epoch == location.epoch {
820                    max_block_count = self.texture.rows[block.address.v as usize].block_count_per_item;
821                    if block.last_access_time != self.now.frame_id() {
822                        // Mark last access time to avoid evicting this block.
823                        block.last_access_time = self.now.frame_id();
824                        self.saved_block_count += max_block_count;
825                    }
826                    return None;
827                }
828            }
829        }
830
831        debug_assert!(self.now.is_valid());
832        Some(GpuDataRequest {
833            handle,
834            frame_stamp: self.now,
835            start_index: self.texture.pending_blocks.len(),
836            texture: &mut self.texture,
837            max_block_count,
838        })
839    }
840
841    // Push an array of data blocks to be uploaded to the GPU
842    // unconditionally for this frame. The cache handle will
843    // assert if the caller tries to retrieve the address
844    // of this handle on a subsequent frame. This is typically
845    // used for uploading data that changes every frame, and
846    // therefore makes no sense to try and cache.
847    pub fn push_per_frame_blocks(&mut self, blocks: &[GpuBlockData]) -> GpuCacheHandle {
848        let start_index = self.texture.pending_blocks.len();
849        self.texture.pending_blocks.extend_from_slice(blocks);
850        let location = self.texture
851            .push_data(Some(start_index), blocks.len(), self.now);
852        GpuCacheHandle {
853            location: Some(location),
854        }
855    }
856
857    // Reserve space in the cache for per-frame blocks that
858    // will be resolved by the render thread via the
859    // external image callback.
860    pub fn push_deferred_per_frame_blocks(&mut self, block_count: usize) -> GpuCacheHandle {
861        let location = self.texture.push_data(None, block_count, self.now);
862        GpuCacheHandle {
863            location: Some(location),
864        }
865    }
866
867    /// End the frame. Return the list of updates to apply to the
868    /// device specific cache texture.
869    pub fn end_frame(
870        &mut self,
871        profile: &mut TransactionProfile,
872    ) -> FrameStamp {
873        profile_scope!("end_frame");
874        profile.set(profiler::GPU_CACHE_ROWS_TOTAL, self.texture.rows.len());
875        profile.set(profiler::GPU_CACHE_BLOCKS_TOTAL, self.texture.allocated_block_count);
876        profile.set(profiler::GPU_CACHE_BLOCKS_SAVED, self.saved_block_count);
877
878        let reached_threshold =
879            self.texture.rows.len() > (GPU_CACHE_INITIAL_HEIGHT as usize) &&
880            self.texture.utilization() < RECLAIM_THRESHOLD;
881        if reached_threshold {
882            self.texture.reached_reclaim_threshold.get_or_insert_with(Instant::now);
883        } else {
884            self.texture.reached_reclaim_threshold = None;
885        }
886
887        self.document_frames_to_build.remove(&self.now.document_id());
888        self.now
889    }
890
891    /// Returns true if utilization has been low enough for long enough that we
892    /// should blow the cache away and rebuild it.
893    pub fn should_reclaim_memory(&self) -> bool {
894        self.texture.reached_reclaim_threshold
895            .map_or(false, |t| t.elapsed() > Duration::from_secs(RECLAIM_DELAY_S))
896    }
897
898    /// Extract the pending updates from the cache.
899    pub fn extract_updates(&mut self) -> GpuCacheUpdateList {
900        let clear = self.pending_clear;
901        self.pending_clear = false;
902        GpuCacheUpdateList {
903            frame_id: self.now.frame_id(),
904            clear,
905            height: self.texture.height,
906            debug_commands: self.texture.debug_commands.take_and_preallocate(),
907            updates: self.texture.updates.take_and_preallocate(),
908            blocks: self.texture.pending_blocks.take_and_preallocate(),
909        }
910    }
911
912    /// Sets the current debug flags for the system.
913    pub fn set_debug_flags(&mut self, flags: DebugFlags) {
914        self.debug_flags = flags;
915        self.texture.debug_flags = flags;
916    }
917
918    /// Get the actual GPU address in the texture for a given slot ID.
919    /// It's assumed at this point that the given slot has been requested
920    /// and built for this frame. Attempting to get the address for a
921    /// freed or pending slot will panic!
922    pub fn get_address(&self, id: &GpuCacheHandle) -> GpuCacheAddress {
923        let location = id.location.expect("handle not requested or allocated!");
924        let block = &self.texture.blocks[location.block_index.get()];
925        debug_assert_eq!(block.epoch, location.epoch);
926        debug_assert_eq!(block.last_access_time, self.now.frame_id());
927        block.address
928    }
929}
930
931#[test]
932#[cfg(target_pointer_width = "64")]
933fn test_struct_sizes() {
934    use std::mem;
935    // We can end up with a lot of blocks stored in the global vec, and keeping
936    // them small helps reduce memory overhead.
937    assert_eq!(mem::size_of::<Block>(), 24, "Block size changed");
938}