gpu_alloc/
usage.rs

1use {
2    core::fmt::{self, Debug},
3    gpu_alloc_types::{MemoryPropertyFlags, MemoryType},
4};
5
6bitflags::bitflags! {
7    /// Memory usage type.
8    /// Bits set define intended usage for requested memory.
9    #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
10    #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
11    pub struct UsageFlags: u8 {
12        /// Hints for allocator to find memory with faster device access.
13        /// If no flags is specified than `FAST_DEVICE_ACCESS` is implied.
14        const FAST_DEVICE_ACCESS = 0x01;
15
16        /// Memory will be accessed from host.
17        /// This flags guarantees that host memory operations will be available.
18        /// Otherwise implementation is encouraged to use non-host-accessible memory.
19        const HOST_ACCESS = 0x02;
20
21        /// Hints allocator that memory will be used for data downloading.
22        /// Allocator will strongly prefer host-cached memory.
23        /// Implies `HOST_ACCESS` flag.
24        const DOWNLOAD = 0x04;
25
26        /// Hints allocator that memory will be used for data uploading.
27        /// If `DOWNLOAD` flag is not set then allocator will assume that
28        /// host will access memory in write-only manner and may
29        /// pick not host-cached.
30        /// Implies `HOST_ACCESS` flag.
31        const UPLOAD = 0x08;
32
33        /// Hints allocator that memory will be used for short duration
34        /// allowing to use faster algorithm with less memory overhead.
35        /// If use holds returned memory block for too long then
36        /// effective memory overhead increases instead.
37        /// Best use case is for staging buffer for single batch of operations.
38        const TRANSIENT = 0x10;
39
40        /// Requests memory that can be addressed with `u64`.
41        /// Allows fetching device address for resources bound to that memory.
42        const DEVICE_ADDRESS = 0x20;
43    }
44}
45
46#[derive(Clone, Copy, Debug)]
47struct MemoryForOneUsage {
48    mask: u32,
49    types: [u32; 32],
50    types_count: u32,
51}
52
53pub(crate) struct MemoryForUsage {
54    usages: [MemoryForOneUsage; 64],
55}
56
57impl Debug for MemoryForUsage {
58    fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {
59        fmt.debug_struct("MemoryForUsage")
60            .field("usages", &&self.usages[..])
61            .finish()
62    }
63}
64
65impl MemoryForUsage {
66    pub fn new(memory_types: &[MemoryType]) -> Self {
67        assert!(
68            memory_types.len() <= 32,
69            "Only up to 32 memory types supported"
70        );
71
72        let mut mfu = MemoryForUsage {
73            usages: [MemoryForOneUsage {
74                mask: 0,
75                types: [0; 32],
76                types_count: 0,
77            }; 64],
78        };
79
80        for usage in 0..64 {
81            mfu.usages[usage as usize] =
82                one_usage(UsageFlags::from_bits_truncate(usage), memory_types);
83        }
84
85        mfu
86    }
87
88    /// Returns mask with bits set for memory type indices that support the
89    /// usage.
90    pub fn mask(&self, usage: UsageFlags) -> u32 {
91        self.usages[usage.bits() as usize].mask
92    }
93
94    /// Returns slice of memory type indices that support the usage.
95    /// Earlier memory type has priority over later.
96    pub fn types(&self, usage: UsageFlags) -> &[u32] {
97        let usage = &self.usages[usage.bits() as usize];
98        &usage.types[..usage.types_count as usize]
99    }
100}
101
102fn one_usage(usage: UsageFlags, memory_types: &[MemoryType]) -> MemoryForOneUsage {
103    let mut types = [0; 32];
104    let mut types_count = 0;
105
106    for (index, mt) in memory_types.iter().enumerate() {
107        if compatible(usage, mt.props) {
108            types[types_count as usize] = index as u32;
109            types_count += 1;
110        }
111    }
112
113    types[..types_count as usize]
114        .sort_unstable_by_key(|&index| reverse_priority(usage, memory_types[index as usize].props));
115
116    let mask = types[..types_count as usize]
117        .iter()
118        .fold(0u32, |mask, index| mask | 1u32 << index);
119
120    MemoryForOneUsage {
121        mask,
122        types,
123        types_count,
124    }
125}
126
127fn compatible(usage: UsageFlags, flags: MemoryPropertyFlags) -> bool {
128    type Flags = MemoryPropertyFlags;
129    if flags.contains(Flags::LAZILY_ALLOCATED) || flags.contains(Flags::PROTECTED) {
130        // Unsupported
131        false
132    } else if usage.intersects(UsageFlags::HOST_ACCESS | UsageFlags::UPLOAD | UsageFlags::DOWNLOAD)
133    {
134        // Requires HOST_VISIBLE
135        flags.contains(Flags::HOST_VISIBLE)
136    } else {
137        true
138    }
139}
140
141/// Returns reversed priority of memory with specified flags for specified usage.
142/// Lesser value returned = more prioritized.
143fn reverse_priority(usage: UsageFlags, flags: MemoryPropertyFlags) -> u32 {
144    type Flags = MemoryPropertyFlags;
145
146    // Highly prefer device local memory when `FAST_DEVICE_ACCESS` usage is specified
147    // or usage is empty.
148    let device_local: bool = flags.contains(Flags::DEVICE_LOCAL)
149        ^ (usage.is_empty() || usage.contains(UsageFlags::FAST_DEVICE_ACCESS));
150
151    assert!(
152        flags.contains(Flags::HOST_VISIBLE)
153            || !usage
154                .intersects(UsageFlags::HOST_ACCESS | UsageFlags::UPLOAD | UsageFlags::DOWNLOAD)
155    );
156
157    // Prefer non-host-visible memory when host access is not required.
158    let host_visible: bool = flags.contains(Flags::HOST_VISIBLE)
159        ^ usage.intersects(UsageFlags::HOST_ACCESS | UsageFlags::UPLOAD | UsageFlags::DOWNLOAD);
160
161    // Prefer cached memory for downloads.
162    // Or non-cached if downloads are not expected.
163    let host_cached: bool =
164        flags.contains(Flags::HOST_CACHED) ^ usage.contains(UsageFlags::DOWNLOAD);
165
166    // Prefer coherent for both uploads and downloads.
167    // Prefer non-coherent if neither flags is set.
168    let host_coherent: bool = flags.contains(Flags::HOST_COHERENT)
169        ^ (usage.intersects(UsageFlags::UPLOAD | UsageFlags::DOWNLOAD));
170
171    // Each boolean is false if flags are preferred.
172    device_local as u32 * 8
173        + host_visible as u32 * 4
174        + host_cached as u32 * 2
175        + host_coherent as u32
176}
gpu_alloc/usage.rs

gpu_alloc/
usage.rs