wgpu_hal/vulkan/mod.rs
1/*!
2# Vulkan API internals.
3
4## Stack memory
5
6Ash expects slices, which we don't generally have available.
7We cope with this requirement by the combination of the following ways:
8 - temporarily allocating `Vec` on heap, where overhead is permitted
9 - growing temporary local storage
10
11## Framebuffers and Render passes
12
13Render passes are cached on the device and kept forever.
14
15Framebuffers are also cached on the device, but they are removed when
16any of the image views (they have) gets removed.
17If Vulkan supports image-less framebuffers,
18then the actual views are excluded from the framebuffer key.
19
20## Fences
21
22If timeline semaphores are available, they are used 1:1 with wgpu-hal fences.
23Otherwise, we manage a pool of `VkFence` objects behind each `hal::Fence`.
24
25!*/
26
27mod adapter;
28mod command;
29mod conv;
30mod device;
31mod drm;
32mod instance;
33mod sampler;
34mod semaphore_list;
35
36pub use adapter::PhysicalDeviceFeatures;
37
38use alloc::{boxed::Box, ffi::CString, sync::Arc, vec::Vec};
39use core::{borrow::Borrow, ffi::CStr, fmt, marker::PhantomData, mem, num::NonZeroU32};
40
41use arrayvec::ArrayVec;
42use ash::{ext, khr, vk};
43use bytemuck::{Pod, Zeroable};
44use hashbrown::HashSet;
45use parking_lot::{Mutex, RwLock};
46
47use naga::FastHashMap;
48use wgt::InternalCounter;
49
50use semaphore_list::SemaphoreList;
51
52const MILLIS_TO_NANOS: u64 = 1_000_000;
53const MAX_TOTAL_ATTACHMENTS: usize = crate::MAX_COLOR_ATTACHMENTS * 2 + 1;
54
55#[derive(Clone, Debug)]
56pub struct Api;
57
58impl crate::Api for Api {
59 type Instance = Instance;
60 type Surface = Surface;
61 type Adapter = Adapter;
62 type Device = Device;
63
64 type Queue = Queue;
65 type CommandEncoder = CommandEncoder;
66 type CommandBuffer = CommandBuffer;
67
68 type Buffer = Buffer;
69 type Texture = Texture;
70 type SurfaceTexture = SurfaceTexture;
71 type TextureView = TextureView;
72 type Sampler = Sampler;
73 type QuerySet = QuerySet;
74 type Fence = Fence;
75 type AccelerationStructure = AccelerationStructure;
76 type PipelineCache = PipelineCache;
77
78 type BindGroupLayout = BindGroupLayout;
79 type BindGroup = BindGroup;
80 type PipelineLayout = PipelineLayout;
81 type ShaderModule = ShaderModule;
82 type RenderPipeline = RenderPipeline;
83 type ComputePipeline = ComputePipeline;
84}
85
86crate::impl_dyn_resource!(
87 Adapter,
88 AccelerationStructure,
89 BindGroup,
90 BindGroupLayout,
91 Buffer,
92 CommandBuffer,
93 CommandEncoder,
94 ComputePipeline,
95 Device,
96 Fence,
97 Instance,
98 PipelineCache,
99 PipelineLayout,
100 QuerySet,
101 Queue,
102 RenderPipeline,
103 Sampler,
104 ShaderModule,
105 Surface,
106 SurfaceTexture,
107 Texture,
108 TextureView
109);
110
111struct DebugUtils {
112 extension: ext::debug_utils::Instance,
113 messenger: vk::DebugUtilsMessengerEXT,
114
115 /// Owning pointer to the debug messenger callback user data.
116 ///
117 /// `InstanceShared::drop` destroys the debug messenger before
118 /// dropping this, so the callback should never receive a dangling
119 /// user data pointer.
120 #[allow(dead_code)]
121 callback_data: Box<DebugUtilsMessengerUserData>,
122}
123
124pub struct DebugUtilsCreateInfo {
125 severity: vk::DebugUtilsMessageSeverityFlagsEXT,
126 message_type: vk::DebugUtilsMessageTypeFlagsEXT,
127 callback_data: Box<DebugUtilsMessengerUserData>,
128}
129
130#[derive(Debug)]
131/// The properties related to the validation layer needed for the
132/// DebugUtilsMessenger for their workarounds
133struct ValidationLayerProperties {
134 /// Validation layer description, from `vk::LayerProperties`.
135 layer_description: CString,
136
137 /// Validation layer specification version, from `vk::LayerProperties`.
138 layer_spec_version: u32,
139}
140
141/// User data needed by `instance::debug_utils_messenger_callback`.
142///
143/// When we create the [`vk::DebugUtilsMessengerEXT`], the `pUserData`
144/// pointer refers to one of these values.
145#[derive(Debug)]
146pub struct DebugUtilsMessengerUserData {
147 /// The properties related to the validation layer, if present
148 validation_layer_properties: Option<ValidationLayerProperties>,
149
150 /// If the OBS layer is present. OBS never increments the version of their layer,
151 /// so there's no reason to have the version.
152 has_obs_layer: bool,
153}
154
155pub struct InstanceShared {
156 raw: ash::Instance,
157 extensions: Vec<&'static CStr>,
158 drop_guard: Option<crate::DropGuard>,
159 flags: wgt::InstanceFlags,
160 memory_budget_thresholds: wgt::MemoryBudgetThresholds,
161 debug_utils: Option<DebugUtils>,
162 get_physical_device_properties: Option<khr::get_physical_device_properties2::Instance>,
163 entry: ash::Entry,
164 has_nv_optimus: bool,
165 android_sdk_version: u32,
166 /// The instance API version.
167 ///
168 /// Which is the version of Vulkan supported for instance-level functionality.
169 ///
170 /// It is associated with a `VkInstance` and its children,
171 /// except for a `VkPhysicalDevice` and its children.
172 instance_api_version: u32,
173}
174
175pub struct Instance {
176 shared: Arc<InstanceShared>,
177}
178
179/// Semaphore used to acquire a swapchain image.
180#[derive(Debug)]
181struct SwapchainAcquireSemaphore {
182 /// A semaphore that is signaled when this image is safe for us to modify.
183 ///
184 /// When [`vkAcquireNextImageKHR`] returns the index of the next swapchain
185 /// image that we should use, that image may actually still be in use by the
186 /// presentation engine, and is not yet safe to modify. However, that
187 /// function does accept a semaphore that it will signal when the image is
188 /// indeed safe to begin messing with.
189 ///
190 /// This semaphore is:
191 ///
192 /// - waited for by the first queue submission to operate on this image
193 /// since it was acquired, and
194 ///
195 /// - signaled by [`vkAcquireNextImageKHR`] when the acquired image is ready
196 /// for us to use.
197 ///
198 /// [`vkAcquireNextImageKHR`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#vkAcquireNextImageKHR
199 acquire: vk::Semaphore,
200
201 /// True if the next command submission operating on this image should wait
202 /// for [`acquire`].
203 ///
204 /// We must wait for `acquire` before drawing to this swapchain image, but
205 /// because `wgpu-hal` queue submissions are always strongly ordered, only
206 /// the first submission that works with a swapchain image actually needs to
207 /// wait. We set this flag when this image is acquired, and clear it the
208 /// first time it's passed to [`Queue::submit`] as a surface texture.
209 ///
210 /// Additionally, semaphores can only be waited on once, so we need to ensure
211 /// that we only actually pass this semaphore to the first submission that
212 /// uses that image.
213 ///
214 /// [`acquire`]: SwapchainAcquireSemaphore::acquire
215 /// [`Queue::submit`]: crate::Queue::submit
216 should_wait_for_acquire: bool,
217
218 /// The fence value of the last command submission that wrote to this image.
219 ///
220 /// The next time we try to acquire this image, we'll block until
221 /// this submission finishes, proving that [`acquire`] is ready to
222 /// pass to `vkAcquireNextImageKHR` again.
223 ///
224 /// [`acquire`]: SwapchainAcquireSemaphore::acquire
225 previously_used_submission_index: crate::FenceValue,
226}
227
228impl SwapchainAcquireSemaphore {
229 fn new(device: &DeviceShared, index: usize) -> Result<Self, crate::DeviceError> {
230 Ok(Self {
231 acquire: device
232 .new_binary_semaphore(&format!("SwapchainImageSemaphore: Index {index} acquire"))?,
233 should_wait_for_acquire: true,
234 previously_used_submission_index: 0,
235 })
236 }
237
238 /// Sets the fence value which the next acquire will wait for. This prevents
239 /// the semaphore from being used while the previous submission is still in flight.
240 fn set_used_fence_value(&mut self, value: crate::FenceValue) {
241 self.previously_used_submission_index = value;
242 }
243
244 /// Return the semaphore that commands drawing to this image should wait for, if any.
245 ///
246 /// This only returns `Some` once per acquisition; see
247 /// [`SwapchainAcquireSemaphore::should_wait_for_acquire`] for details.
248 fn get_acquire_wait_semaphore(&mut self) -> Option<vk::Semaphore> {
249 if self.should_wait_for_acquire {
250 self.should_wait_for_acquire = false;
251 Some(self.acquire)
252 } else {
253 None
254 }
255 }
256
257 /// Indicates the cpu-side usage of this semaphore has finished for the frame,
258 /// so reset internal state to be ready for the next frame.
259 fn end_semaphore_usage(&mut self) {
260 // Reset the acquire semaphore, so that the next time we acquire this
261 // image, we can wait for it again.
262 self.should_wait_for_acquire = true;
263 }
264
265 unsafe fn destroy(&self, device: &ash::Device) {
266 unsafe {
267 device.destroy_semaphore(self.acquire, None);
268 }
269 }
270}
271
272#[derive(Debug)]
273struct SwapchainPresentSemaphores {
274 /// A pool of semaphores for ordering presentation after drawing.
275 ///
276 /// The first [`present_index`] semaphores in this vector are:
277 ///
278 /// - all waited on by the call to [`vkQueuePresentKHR`] that presents this
279 /// image, and
280 ///
281 /// - each signaled by some [`vkQueueSubmit`] queue submission that draws to
282 /// this image, when the submission finishes execution.
283 ///
284 /// This vector accumulates one semaphore per submission that writes to this
285 /// image. This is awkward, but hard to avoid: [`vkQueuePresentKHR`]
286 /// requires a semaphore to order it with respect to drawing commands, and
287 /// we can't attach new completion semaphores to a command submission after
288 /// it's been submitted. This means that, at submission time, we must create
289 /// the semaphore we might need if the caller's next action is to enqueue a
290 /// presentation of this image.
291 ///
292 /// An alternative strategy would be for presentation to enqueue an empty
293 /// submit, ordered relative to other submits in the usual way, and
294 /// signaling a single presentation semaphore. But we suspect that submits
295 /// are usually expensive enough, and semaphores usually cheap enough, that
296 /// performance-sensitive users will avoid making many submits, so that the
297 /// cost of accumulated semaphores will usually be less than the cost of an
298 /// additional submit.
299 ///
300 /// Only the first [`present_index`] semaphores in the vector are actually
301 /// going to be signalled by submitted commands, and need to be waited for
302 /// by the next present call. Any semaphores beyond that index were created
303 /// for prior presents and are simply being retained for recycling.
304 ///
305 /// [`present_index`]: SwapchainPresentSemaphores::present_index
306 /// [`vkQueuePresentKHR`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#vkQueuePresentKHR
307 /// [`vkQueueSubmit`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#vkQueueSubmit
308 present: Vec<vk::Semaphore>,
309
310 /// The number of semaphores in [`present`] to be signalled for this submission.
311 ///
312 /// [`present`]: SwapchainPresentSemaphores::present
313 present_index: usize,
314
315 /// Which image this semaphore set is used for.
316 frame_index: usize,
317}
318
319impl SwapchainPresentSemaphores {
320 pub fn new(frame_index: usize) -> Self {
321 Self {
322 present: Vec::new(),
323 present_index: 0,
324 frame_index,
325 }
326 }
327
328 /// Return the semaphore that the next submission that writes to this image should
329 /// signal when it's done.
330 ///
331 /// See [`SwapchainPresentSemaphores::present`] for details.
332 fn get_submit_signal_semaphore(
333 &mut self,
334 device: &DeviceShared,
335 ) -> Result<vk::Semaphore, crate::DeviceError> {
336 // Try to recycle a semaphore we created for a previous presentation.
337 let sem = match self.present.get(self.present_index) {
338 Some(sem) => *sem,
339 None => {
340 let sem = device.new_binary_semaphore(&format!(
341 "SwapchainImageSemaphore: Image {} present semaphore {}",
342 self.frame_index, self.present_index
343 ))?;
344 self.present.push(sem);
345 sem
346 }
347 };
348
349 self.present_index += 1;
350
351 Ok(sem)
352 }
353
354 /// Indicates the cpu-side usage of this semaphore has finished for the frame,
355 /// so reset internal state to be ready for the next frame.
356 fn end_semaphore_usage(&mut self) {
357 // Reset the index to 0, so that the next time we get a semaphore, we
358 // start from the beginning of the list.
359 self.present_index = 0;
360 }
361
362 /// Return the semaphores that a presentation of this image should wait on.
363 ///
364 /// Return a slice of semaphores that the call to [`vkQueueSubmit`] that
365 /// ends this image's acquisition should wait for. See
366 /// [`SwapchainPresentSemaphores::present`] for details.
367 ///
368 /// Reset `self` to be ready for the next acquisition cycle.
369 ///
370 /// [`vkQueueSubmit`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#vkQueueSubmit
371 fn get_present_wait_semaphores(&mut self) -> Vec<vk::Semaphore> {
372 self.present[0..self.present_index].to_vec()
373 }
374
375 unsafe fn destroy(&self, device: &ash::Device) {
376 unsafe {
377 for sem in &self.present {
378 device.destroy_semaphore(*sem, None);
379 }
380 }
381 }
382}
383
384struct Swapchain {
385 raw: vk::SwapchainKHR,
386 functor: khr::swapchain::Device,
387 device: Arc<DeviceShared>,
388 images: Vec<vk::Image>,
389 config: crate::SurfaceConfiguration,
390
391 /// Semaphores used between image acquisition and the first submission
392 /// that uses that image. This is indexed using [`next_acquire_index`].
393 ///
394 /// Because we need to provide this to [`vkAcquireNextImageKHR`], we haven't
395 /// received the swapchain image index for the frame yet, so we cannot use
396 /// that to index it.
397 ///
398 /// Before we pass this to [`vkAcquireNextImageKHR`], we ensure that we wait on
399 /// the submission indicated by [`previously_used_submission_index`]. This enusres
400 /// the semaphore is no longer in use before we use it.
401 ///
402 /// [`next_acquire_index`]: Swapchain::next_acquire_index
403 /// [`vkAcquireNextImageKHR`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#vkAcquireNextImageKHR
404 /// [`previously_used_submission_index`]: SwapchainAcquireSemaphore::previously_used_submission_index
405 acquire_semaphores: Vec<Arc<Mutex<SwapchainAcquireSemaphore>>>,
406 /// The index of the next acquire semaphore to use.
407 ///
408 /// This is incremented each time we acquire a new image, and wraps around
409 /// to 0 when it reaches the end of [`acquire_semaphores`].
410 ///
411 /// [`acquire_semaphores`]: Swapchain::acquire_semaphores
412 next_acquire_index: usize,
413
414 /// Semaphore sets used between all submissions that write to an image and
415 /// the presentation of that image.
416 ///
417 /// This is indexed by the swapchain image index returned by
418 /// [`vkAcquireNextImageKHR`].
419 ///
420 /// We know it is safe to use these semaphores because use them
421 /// _after_ the acquire semaphore. Because the acquire semaphore
422 /// has been signaled, the previous presentation using that image
423 /// is known-finished, so this semaphore is no longer in use.
424 ///
425 /// [`vkAcquireNextImageKHR`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#vkAcquireNextImageKHR
426 present_semaphores: Vec<Arc<Mutex<SwapchainPresentSemaphores>>>,
427
428 /// The present timing information which will be set in the next call to [`present()`](crate::Queue::present()).
429 ///
430 /// # Safety
431 ///
432 /// This must only be set if [`wgt::Features::VULKAN_GOOGLE_DISPLAY_TIMING`] is enabled, and
433 /// so the VK_GOOGLE_display_timing extension is present.
434 next_present_time: Option<vk::PresentTimeGOOGLE>,
435}
436
437impl Swapchain {
438 /// Mark the current frame finished, advancing to the next acquire semaphore.
439 fn advance_acquire_semaphore(&mut self) {
440 let semaphore_count = self.acquire_semaphores.len();
441 self.next_acquire_index = (self.next_acquire_index + 1) % semaphore_count;
442 }
443
444 /// Get the next acquire semaphore that should be used with this swapchain.
445 fn get_acquire_semaphore(&self) -> Arc<Mutex<SwapchainAcquireSemaphore>> {
446 self.acquire_semaphores[self.next_acquire_index].clone()
447 }
448
449 /// Get the set of present semaphores that should be used with the given image index.
450 fn get_present_semaphores(&self, index: u32) -> Arc<Mutex<SwapchainPresentSemaphores>> {
451 self.present_semaphores[index as usize].clone()
452 }
453}
454
455pub struct Surface {
456 raw: vk::SurfaceKHR,
457 functor: khr::surface::Instance,
458 instance: Arc<InstanceShared>,
459 swapchain: RwLock<Option<Swapchain>>,
460}
461
462impl Surface {
463 /// Get the raw Vulkan swapchain associated with this surface.
464 ///
465 /// Returns [`None`] if the surface is not configured.
466 pub fn raw_swapchain(&self) -> Option<vk::SwapchainKHR> {
467 let read = self.swapchain.read();
468 read.as_ref().map(|it| it.raw)
469 }
470
471 /// Set the present timing information which will be used for the next [presentation](crate::Queue::present()) of this surface,
472 /// using [VK_GOOGLE_display_timing].
473 ///
474 /// This can be used to give an id to presentations, for future use of [`vk::PastPresentationTimingGOOGLE`].
475 /// Note that `wgpu-hal` does *not* provide a way to use that API - you should manually access this through [`ash`].
476 ///
477 /// This can also be used to add a "not before" timestamp to the presentation.
478 ///
479 /// The exact semantics of the fields are also documented in the [specification](https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkPresentTimeGOOGLE.html) for the extension.
480 ///
481 /// # Panics
482 ///
483 /// - If the surface hasn't been configured.
484 /// - If the device doesn't [support present timing](wgt::Features::VULKAN_GOOGLE_DISPLAY_TIMING).
485 ///
486 /// [VK_GOOGLE_display_timing]: https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VK_GOOGLE_display_timing.html
487 #[track_caller]
488 pub fn set_next_present_time(&self, present_timing: vk::PresentTimeGOOGLE) {
489 let mut swapchain = self.swapchain.write();
490 let swapchain = swapchain
491 .as_mut()
492 .expect("Surface should have been configured");
493 let features = wgt::Features::VULKAN_GOOGLE_DISPLAY_TIMING;
494 if swapchain.device.features.contains(features) {
495 swapchain.next_present_time = Some(present_timing);
496 } else {
497 // Ideally we'd use something like `device.required_features` here, but that's in `wgpu-core`, which we are a dependency of
498 panic!(
499 concat!(
500 "Tried to set display timing properties ",
501 "without the corresponding feature ({:?}) enabled."
502 ),
503 features
504 );
505 }
506 }
507}
508
509#[derive(Debug)]
510pub struct SurfaceTexture {
511 index: u32,
512 texture: Texture,
513 acquire_semaphores: Arc<Mutex<SwapchainAcquireSemaphore>>,
514 present_semaphores: Arc<Mutex<SwapchainPresentSemaphores>>,
515}
516
517impl crate::DynSurfaceTexture for SurfaceTexture {}
518
519impl Borrow<Texture> for SurfaceTexture {
520 fn borrow(&self) -> &Texture {
521 &self.texture
522 }
523}
524
525impl Borrow<dyn crate::DynTexture> for SurfaceTexture {
526 fn borrow(&self) -> &dyn crate::DynTexture {
527 &self.texture
528 }
529}
530
531pub struct Adapter {
532 raw: vk::PhysicalDevice,
533 instance: Arc<InstanceShared>,
534 //queue_families: Vec<vk::QueueFamilyProperties>,
535 known_memory_flags: vk::MemoryPropertyFlags,
536 phd_capabilities: adapter::PhysicalDeviceProperties,
537 phd_features: PhysicalDeviceFeatures,
538 downlevel_flags: wgt::DownlevelFlags,
539 private_caps: PrivateCapabilities,
540 workarounds: Workarounds,
541}
542
543// TODO there's no reason why this can't be unified--the function pointers should all be the same--it's not clear how to do this with `ash`.
544enum ExtensionFn<T> {
545 /// The loaded function pointer struct for an extension.
546 Extension(T),
547 /// The extension was promoted to a core version of Vulkan and the functions on `ash`'s `DeviceV1_x` traits should be used.
548 Promoted,
549}
550
551struct DeviceExtensionFunctions {
552 debug_utils: Option<ext::debug_utils::Device>,
553 draw_indirect_count: Option<khr::draw_indirect_count::Device>,
554 timeline_semaphore: Option<ExtensionFn<khr::timeline_semaphore::Device>>,
555 ray_tracing: Option<RayTracingDeviceExtensionFunctions>,
556 mesh_shading: Option<ext::mesh_shader::Device>,
557}
558
559struct RayTracingDeviceExtensionFunctions {
560 acceleration_structure: khr::acceleration_structure::Device,
561 buffer_device_address: khr::buffer_device_address::Device,
562}
563
564/// Set of internal capabilities, which don't show up in the exposed
565/// device geometry, but affect the code paths taken internally.
566#[derive(Clone, Debug)]
567struct PrivateCapabilities {
568 image_view_usage: bool,
569 timeline_semaphores: bool,
570 texture_d24: bool,
571 texture_d24_s8: bool,
572 texture_s8: bool,
573 /// Ability to present contents to any screen. Only needed to work around broken platform configurations.
574 can_present: bool,
575 non_coherent_map_mask: wgt::BufferAddress,
576
577 /// True if this adapter advertises the [`robustBufferAccess`][vrba] feature.
578 ///
579 /// Note that Vulkan's `robustBufferAccess` is not sufficient to implement
580 /// `wgpu_hal`'s guarantee that shaders will not access buffer contents via
581 /// a given bindgroup binding outside that binding's [accessible
582 /// region][ar]. Enabling `robustBufferAccess` does ensure that
583 /// out-of-bounds reads and writes are not undefined behavior (that's good),
584 /// but still permits out-of-bounds reads to return data from anywhere
585 /// within the buffer, not just the accessible region.
586 ///
587 /// [ar]: ../struct.BufferBinding.html#accessible-region
588 /// [vrba]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#features-robustBufferAccess
589 robust_buffer_access: bool,
590
591 robust_image_access: bool,
592
593 /// True if this adapter supports the [`VK_EXT_robustness2`] extension's
594 /// [`robustBufferAccess2`] feature.
595 ///
596 /// This is sufficient to implement `wgpu_hal`'s [required bounds-checking][ar] of
597 /// shader accesses to buffer contents. If this feature is not available,
598 /// this backend must have Naga inject bounds checks in the generated
599 /// SPIR-V.
600 ///
601 /// [`VK_EXT_robustness2`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VK_EXT_robustness2.html
602 /// [`robustBufferAccess2`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkPhysicalDeviceRobustness2FeaturesEXT.html#features-robustBufferAccess2
603 /// [ar]: ../struct.BufferBinding.html#accessible-region
604 robust_buffer_access2: bool,
605
606 robust_image_access2: bool,
607 zero_initialize_workgroup_memory: bool,
608 image_format_list: bool,
609 maximum_samplers: u32,
610
611 /// True if this adapter supports the [`VK_KHR_shader_integer_dot_product`] extension
612 /// (promoted to Vulkan 1.3).
613 ///
614 /// This is used to generate optimized code for WGSL's `dot4{I, U}8Packed`.
615 ///
616 /// [`VK_KHR_shader_integer_dot_product`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VK_KHR_shader_integer_dot_product.html
617 shader_integer_dot_product: bool,
618
619 /// True if this adapter supports 8-bit integers provided by the
620 /// [`VK_KHR_shader_float16_int8`] extension (promoted to Vulkan 1.2).
621 ///
622 /// Allows shaders to declare the "Int8" capability. Note, however, that this
623 /// feature alone allows the use of 8-bit integers "only in the `Private`,
624 /// `Workgroup` (for non-Block variables), and `Function` storage classes"
625 /// ([see spec]). To use 8-bit integers in the interface storage classes (e.g.,
626 /// `StorageBuffer`), you also need to enable the corresponding feature in
627 /// `VkPhysicalDevice8BitStorageFeatures` and declare the corresponding SPIR-V
628 /// capability (e.g., `StorageBuffer8BitAccess`).
629 ///
630 /// [`VK_KHR_shader_float16_int8`]: https://registry.khronos.org/vulkan/specs/latest/man/html/VK_KHR_shader_float16_int8.html
631 /// [see spec]: https://registry.khronos.org/vulkan/specs/latest/man/html/VkPhysicalDeviceShaderFloat16Int8Features.html#extension-features-shaderInt8
632 shader_int8: bool,
633}
634
635bitflags::bitflags!(
636 /// Workaround flags.
637 #[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
638 pub struct Workarounds: u32 {
639 /// Only generate SPIR-V for one entry point at a time.
640 const SEPARATE_ENTRY_POINTS = 0x1;
641 /// Qualcomm OOMs when there are zero color attachments but a non-null pointer
642 /// to a subpass resolve attachment array. This nulls out that pointer in that case.
643 const EMPTY_RESOLVE_ATTACHMENT_LISTS = 0x2;
644 /// If the following code returns false, then nvidia will end up filling the wrong range.
645 ///
646 /// ```skip
647 /// fn nvidia_succeeds() -> bool {
648 /// # let (copy_length, start_offset) = (0, 0);
649 /// if copy_length >= 4096 {
650 /// if start_offset % 16 != 0 {
651 /// if copy_length == 4096 {
652 /// return true;
653 /// }
654 /// if copy_length % 16 == 0 {
655 /// return false;
656 /// }
657 /// }
658 /// }
659 /// true
660 /// }
661 /// ```
662 ///
663 /// As such, we need to make sure all calls to vkCmdFillBuffer are aligned to 16 bytes
664 /// if they cover a range of 4096 bytes or more.
665 const FORCE_FILL_BUFFER_WITH_SIZE_GREATER_4096_ALIGNED_OFFSET_16 = 0x4;
666 }
667);
668
669#[derive(Clone, Debug, Eq, Hash, PartialEq)]
670struct AttachmentKey {
671 format: vk::Format,
672 layout: vk::ImageLayout,
673 ops: crate::AttachmentOps,
674}
675
676impl AttachmentKey {
677 /// Returns an attachment key for a compatible attachment.
678 fn compatible(format: vk::Format, layout: vk::ImageLayout) -> Self {
679 Self {
680 format,
681 layout,
682 ops: crate::AttachmentOps::all(),
683 }
684 }
685}
686
687#[derive(Clone, Eq, Hash, PartialEq)]
688struct ColorAttachmentKey {
689 base: AttachmentKey,
690 resolve: Option<AttachmentKey>,
691}
692
693#[derive(Clone, Eq, Hash, PartialEq)]
694struct DepthStencilAttachmentKey {
695 base: AttachmentKey,
696 stencil_ops: crate::AttachmentOps,
697}
698
699#[derive(Clone, Eq, Default, Hash, PartialEq)]
700struct RenderPassKey {
701 colors: ArrayVec<Option<ColorAttachmentKey>, { crate::MAX_COLOR_ATTACHMENTS }>,
702 depth_stencil: Option<DepthStencilAttachmentKey>,
703 sample_count: u32,
704 multiview: Option<NonZeroU32>,
705}
706
707struct DeviceShared {
708 raw: ash::Device,
709 family_index: u32,
710 queue_index: u32,
711 raw_queue: vk::Queue,
712 drop_guard: Option<crate::DropGuard>,
713 instance: Arc<InstanceShared>,
714 physical_device: vk::PhysicalDevice,
715 enabled_extensions: Vec<&'static CStr>,
716 extension_fns: DeviceExtensionFunctions,
717 vendor_id: u32,
718 pipeline_cache_validation_key: [u8; 16],
719 timestamp_period: f32,
720 private_caps: PrivateCapabilities,
721 workarounds: Workarounds,
722 features: wgt::Features,
723 render_passes: Mutex<FastHashMap<RenderPassKey, vk::RenderPass>>,
724 sampler_cache: Mutex<sampler::SamplerCache>,
725 memory_allocations_counter: InternalCounter,
726
727 /// Because we have cached framebuffers which are not deleted from until
728 /// the device is destroyed, if the implementation of vulkan re-uses handles
729 /// we need some way to differentiate between the old handle and the new handle.
730 /// This factory allows us to have a dedicated identity value for each texture.
731 texture_identity_factory: ResourceIdentityFactory<vk::Image>,
732 /// As above, for texture views.
733 texture_view_identity_factory: ResourceIdentityFactory<vk::ImageView>,
734}
735
736impl Drop for DeviceShared {
737 fn drop(&mut self) {
738 for &raw in self.render_passes.lock().values() {
739 unsafe { self.raw.destroy_render_pass(raw, None) };
740 }
741 if self.drop_guard.is_none() {
742 unsafe { self.raw.destroy_device(None) };
743 }
744 }
745}
746
747pub struct Device {
748 shared: Arc<DeviceShared>,
749 mem_allocator: Mutex<gpu_alloc::GpuAllocator<vk::DeviceMemory>>,
750 desc_allocator:
751 Mutex<gpu_descriptor::DescriptorAllocator<vk::DescriptorPool, vk::DescriptorSet>>,
752 valid_ash_memory_types: u32,
753 naga_options: naga::back::spv::Options<'static>,
754 #[cfg(feature = "renderdoc")]
755 render_doc: crate::auxil::renderdoc::RenderDoc,
756 counters: Arc<wgt::HalCounters>,
757}
758
759impl Drop for Device {
760 fn drop(&mut self) {
761 unsafe { self.mem_allocator.lock().cleanup(&*self.shared) };
762 unsafe { self.desc_allocator.lock().cleanup(&*self.shared) };
763 }
764}
765
766/// Semaphores for forcing queue submissions to run in order.
767///
768/// The [`wgpu_hal::Queue`] trait promises that if two calls to [`submit`] are
769/// ordered, then the first submission will finish on the GPU before the second
770/// submission begins. To get this behavior on Vulkan we need to pass semaphores
771/// to [`vkQueueSubmit`] for the commands to wait on before beginning execution,
772/// and to signal when their execution is done.
773///
774/// Normally this can be done with a single semaphore, waited on and then
775/// signalled for each submission. At any given time there's exactly one
776/// submission that would signal the semaphore, and exactly one waiting on it,
777/// as Vulkan requires.
778///
779/// However, as of Oct 2021, bug [#5508] in the Mesa ANV drivers caused them to
780/// hang if we use a single semaphore. The workaround is to alternate between
781/// two semaphores. The bug has been fixed in Mesa, but we should probably keep
782/// the workaround until, say, Oct 2026.
783///
784/// [`wgpu_hal::Queue`]: crate::Queue
785/// [`submit`]: crate::Queue::submit
786/// [`vkQueueSubmit`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#vkQueueSubmit
787/// [#5508]: https://gitlab.freedesktop.org/mesa/mesa/-/issues/5508
788#[derive(Clone)]
789struct RelaySemaphores {
790 /// The semaphore the next submission should wait on before beginning
791 /// execution on the GPU. This is `None` for the first submission, which
792 /// should not wait on anything at all.
793 wait: Option<vk::Semaphore>,
794
795 /// The semaphore the next submission should signal when it has finished
796 /// execution on the GPU.
797 signal: vk::Semaphore,
798}
799
800impl RelaySemaphores {
801 fn new(device: &DeviceShared) -> Result<Self, crate::DeviceError> {
802 Ok(Self {
803 wait: None,
804 signal: device.new_binary_semaphore("RelaySemaphores: 1")?,
805 })
806 }
807
808 /// Advances the semaphores, returning the semaphores that should be used for a submission.
809 fn advance(&mut self, device: &DeviceShared) -> Result<Self, crate::DeviceError> {
810 let old = self.clone();
811
812 // Build the state for the next submission.
813 match self.wait {
814 None => {
815 // The `old` values describe the first submission to this queue.
816 // The second submission should wait on `old.signal`, and then
817 // signal a new semaphore which we'll create now.
818 self.wait = Some(old.signal);
819 self.signal = device.new_binary_semaphore("RelaySemaphores: 2")?;
820 }
821 Some(ref mut wait) => {
822 // What this submission signals, the next should wait.
823 mem::swap(wait, &mut self.signal);
824 }
825 };
826
827 Ok(old)
828 }
829
830 /// Destroys the semaphores.
831 unsafe fn destroy(&self, device: &ash::Device) {
832 unsafe {
833 if let Some(wait) = self.wait {
834 device.destroy_semaphore(wait, None);
835 }
836 device.destroy_semaphore(self.signal, None);
837 }
838 }
839}
840
841pub struct Queue {
842 raw: vk::Queue,
843 swapchain_fn: khr::swapchain::Device,
844 device: Arc<DeviceShared>,
845 family_index: u32,
846 relay_semaphores: Mutex<RelaySemaphores>,
847 signal_semaphores: Mutex<SemaphoreList>,
848}
849
850impl Queue {
851 pub fn as_raw(&self) -> vk::Queue {
852 self.raw
853 }
854}
855
856impl Drop for Queue {
857 fn drop(&mut self) {
858 unsafe { self.relay_semaphores.lock().destroy(&self.device.raw) };
859 }
860}
861#[derive(Debug)]
862enum BufferMemoryBacking {
863 Managed(gpu_alloc::MemoryBlock<vk::DeviceMemory>),
864 VulkanMemory {
865 memory: vk::DeviceMemory,
866 offset: u64,
867 size: u64,
868 },
869}
870impl BufferMemoryBacking {
871 fn memory(&self) -> &vk::DeviceMemory {
872 match self {
873 Self::Managed(m) => m.memory(),
874 Self::VulkanMemory { memory, .. } => memory,
875 }
876 }
877 fn offset(&self) -> u64 {
878 match self {
879 Self::Managed(m) => m.offset(),
880 Self::VulkanMemory { offset, .. } => *offset,
881 }
882 }
883 fn size(&self) -> u64 {
884 match self {
885 Self::Managed(m) => m.size(),
886 Self::VulkanMemory { size, .. } => *size,
887 }
888 }
889}
890#[derive(Debug)]
891pub struct Buffer {
892 raw: vk::Buffer,
893 block: Option<Mutex<BufferMemoryBacking>>,
894}
895impl Buffer {
896 /// # Safety
897 ///
898 /// - `vk_buffer`'s memory must be managed by the caller
899 /// - Externally imported buffers can't be mapped by `wgpu`
900 pub unsafe fn from_raw(vk_buffer: vk::Buffer) -> Self {
901 Self {
902 raw: vk_buffer,
903 block: None,
904 }
905 }
906 /// # Safety
907 /// - We will use this buffer and the buffer's backing memory range as if we have exclusive ownership over it, until the wgpu resource is dropped and the wgpu-hal object is cleaned up
908 /// - Externally imported buffers can't be mapped by `wgpu`
909 /// - `offset` and `size` must be valid with the allocation of `memory`
910 pub unsafe fn from_raw_managed(
911 vk_buffer: vk::Buffer,
912 memory: vk::DeviceMemory,
913 offset: u64,
914 size: u64,
915 ) -> Self {
916 Self {
917 raw: vk_buffer,
918 block: Some(Mutex::new(BufferMemoryBacking::VulkanMemory {
919 memory,
920 offset,
921 size,
922 })),
923 }
924 }
925}
926
927impl crate::DynBuffer for Buffer {}
928
929#[derive(Debug)]
930pub struct AccelerationStructure {
931 raw: vk::AccelerationStructureKHR,
932 buffer: vk::Buffer,
933 block: Mutex<gpu_alloc::MemoryBlock<vk::DeviceMemory>>,
934 compacted_size_query: Option<vk::QueryPool>,
935}
936
937impl crate::DynAccelerationStructure for AccelerationStructure {}
938
939#[derive(Debug)]
940pub struct Texture {
941 raw: vk::Image,
942 drop_guard: Option<crate::DropGuard>,
943 external_memory: Option<vk::DeviceMemory>,
944 block: Option<gpu_alloc::MemoryBlock<vk::DeviceMemory>>,
945 format: wgt::TextureFormat,
946 copy_size: crate::CopyExtent,
947 identity: ResourceIdentity<vk::Image>,
948}
949
950impl crate::DynTexture for Texture {}
951
952impl Texture {
953 /// # Safety
954 ///
955 /// - The image handle must not be manually destroyed
956 pub unsafe fn raw_handle(&self) -> vk::Image {
957 self.raw
958 }
959}
960
961#[derive(Debug)]
962pub struct TextureView {
963 raw_texture: vk::Image,
964 raw: vk::ImageView,
965 layers: NonZeroU32,
966 format: wgt::TextureFormat,
967 raw_format: vk::Format,
968 base_mip_level: u32,
969 dimension: wgt::TextureViewDimension,
970 texture_identity: ResourceIdentity<vk::Image>,
971 view_identity: ResourceIdentity<vk::ImageView>,
972}
973
974impl crate::DynTextureView for TextureView {}
975
976impl TextureView {
977 /// # Safety
978 ///
979 /// - The image view handle must not be manually destroyed
980 pub unsafe fn raw_handle(&self) -> vk::ImageView {
981 self.raw
982 }
983
984 /// Returns the raw texture view, along with its identity.
985 fn identified_raw_view(&self) -> IdentifiedTextureView {
986 IdentifiedTextureView {
987 raw: self.raw,
988 identity: self.view_identity,
989 }
990 }
991}
992
993#[derive(Debug)]
994pub struct Sampler {
995 raw: vk::Sampler,
996 create_info: vk::SamplerCreateInfo<'static>,
997}
998
999impl crate::DynSampler for Sampler {}
1000
1001#[derive(Debug)]
1002pub struct BindGroupLayout {
1003 raw: vk::DescriptorSetLayout,
1004 desc_count: gpu_descriptor::DescriptorTotalCount,
1005 types: Box<[(vk::DescriptorType, u32)]>,
1006 /// Map of binding index to size,
1007 binding_arrays: Vec<(u32, NonZeroU32)>,
1008}
1009
1010impl crate::DynBindGroupLayout for BindGroupLayout {}
1011
1012#[derive(Debug)]
1013pub struct PipelineLayout {
1014 raw: vk::PipelineLayout,
1015 binding_arrays: naga::back::spv::BindingMap,
1016}
1017
1018impl crate::DynPipelineLayout for PipelineLayout {}
1019
1020#[derive(Debug)]
1021pub struct BindGroup {
1022 set: gpu_descriptor::DescriptorSet<vk::DescriptorSet>,
1023}
1024
1025impl crate::DynBindGroup for BindGroup {}
1026
1027/// Miscellaneous allocation recycling pool for `CommandAllocator`.
1028#[derive(Default)]
1029struct Temp {
1030 marker: Vec<u8>,
1031 buffer_barriers: Vec<vk::BufferMemoryBarrier<'static>>,
1032 image_barriers: Vec<vk::ImageMemoryBarrier<'static>>,
1033}
1034
1035impl Temp {
1036 fn clear(&mut self) {
1037 self.marker.clear();
1038 self.buffer_barriers.clear();
1039 self.image_barriers.clear();
1040 }
1041
1042 fn make_c_str(&mut self, name: &str) -> &CStr {
1043 self.marker.clear();
1044 self.marker.extend_from_slice(name.as_bytes());
1045 self.marker.push(0);
1046 unsafe { CStr::from_bytes_with_nul_unchecked(&self.marker) }
1047 }
1048}
1049
1050/// Generates unique IDs for each resource of type `T`.
1051///
1052/// Because vk handles are not permanently unique, this
1053/// provides a way to generate unique IDs for each resource.
1054struct ResourceIdentityFactory<T> {
1055 #[cfg(not(target_has_atomic = "64"))]
1056 next_id: Mutex<u64>,
1057 #[cfg(target_has_atomic = "64")]
1058 next_id: core::sync::atomic::AtomicU64,
1059 _phantom: PhantomData<T>,
1060}
1061
1062impl<T> ResourceIdentityFactory<T> {
1063 fn new() -> Self {
1064 Self {
1065 #[cfg(not(target_has_atomic = "64"))]
1066 next_id: Mutex::new(0),
1067 #[cfg(target_has_atomic = "64")]
1068 next_id: core::sync::atomic::AtomicU64::new(0),
1069 _phantom: PhantomData,
1070 }
1071 }
1072
1073 /// Returns a new unique ID for a resource of type `T`.
1074 fn next(&self) -> ResourceIdentity<T> {
1075 #[cfg(not(target_has_atomic = "64"))]
1076 {
1077 let mut next_id = self.next_id.lock();
1078 let id = *next_id;
1079 *next_id += 1;
1080 ResourceIdentity {
1081 id,
1082 _phantom: PhantomData,
1083 }
1084 }
1085
1086 #[cfg(target_has_atomic = "64")]
1087 ResourceIdentity {
1088 id: self
1089 .next_id
1090 .fetch_add(1, core::sync::atomic::Ordering::Relaxed),
1091 _phantom: PhantomData,
1092 }
1093 }
1094}
1095
1096/// A unique identifier for a resource of type `T`.
1097///
1098/// This is used as a hashable key for resources, which
1099/// is permanently unique through the lifetime of the program.
1100#[derive(Debug, Copy, Clone, Eq, Hash, PartialEq)]
1101struct ResourceIdentity<T> {
1102 id: u64,
1103 _phantom: PhantomData<T>,
1104}
1105
1106#[derive(Clone, Eq, Hash, PartialEq)]
1107struct FramebufferKey {
1108 raw_pass: vk::RenderPass,
1109 /// Because this is used as a key in a hash map, we need to include the identity
1110 /// so that this hashes differently, even if the ImageView handles are the same
1111 /// between different views.
1112 attachment_identities: ArrayVec<ResourceIdentity<vk::ImageView>, { MAX_TOTAL_ATTACHMENTS }>,
1113 /// While this is redundant for calculating the hash, we need access to an array
1114 /// of all the raw ImageViews when we are creating the actual framebuffer,
1115 /// so we store this here.
1116 attachment_views: ArrayVec<vk::ImageView, { MAX_TOTAL_ATTACHMENTS }>,
1117 extent: wgt::Extent3d,
1118}
1119
1120impl FramebufferKey {
1121 fn push_view(&mut self, view: IdentifiedTextureView) {
1122 self.attachment_identities.push(view.identity);
1123 self.attachment_views.push(view.raw);
1124 }
1125}
1126
1127/// A texture view paired with its identity.
1128#[derive(Copy, Clone)]
1129struct IdentifiedTextureView {
1130 raw: vk::ImageView,
1131 identity: ResourceIdentity<vk::ImageView>,
1132}
1133
1134#[derive(Clone, Eq, Hash, PartialEq)]
1135struct TempTextureViewKey {
1136 texture: vk::Image,
1137 /// As this is used in a hashmap, we need to
1138 /// include the identity so that this hashes differently,
1139 /// even if the Image handles are the same between different images.
1140 texture_identity: ResourceIdentity<vk::Image>,
1141 format: vk::Format,
1142 mip_level: u32,
1143 depth_slice: u32,
1144}
1145
1146pub struct CommandEncoder {
1147 raw: vk::CommandPool,
1148 device: Arc<DeviceShared>,
1149
1150 /// The current command buffer, if `self` is in the ["recording"]
1151 /// state.
1152 ///
1153 /// ["recording"]: crate::CommandEncoder
1154 ///
1155 /// If non-`null`, the buffer is in the Vulkan "recording" state.
1156 active: vk::CommandBuffer,
1157
1158 /// What kind of pass we are currently within: compute or render.
1159 bind_point: vk::PipelineBindPoint,
1160
1161 /// Allocation recycling pool for this encoder.
1162 temp: Temp,
1163
1164 /// A pool of available command buffers.
1165 ///
1166 /// These are all in the Vulkan "initial" state.
1167 free: Vec<vk::CommandBuffer>,
1168
1169 /// A pool of discarded command buffers.
1170 ///
1171 /// These could be in any Vulkan state except "pending".
1172 discarded: Vec<vk::CommandBuffer>,
1173
1174 /// If this is true, the active renderpass enabled a debug span,
1175 /// and needs to be disabled on renderpass close.
1176 rpass_debug_marker_active: bool,
1177
1178 /// If set, the end of the next render/compute pass will write a timestamp at
1179 /// the given pool & location.
1180 end_of_pass_timer_query: Option<(vk::QueryPool, u32)>,
1181
1182 framebuffers: FastHashMap<FramebufferKey, vk::Framebuffer>,
1183 temp_texture_views: FastHashMap<TempTextureViewKey, IdentifiedTextureView>,
1184
1185 counters: Arc<wgt::HalCounters>,
1186}
1187
1188impl Drop for CommandEncoder {
1189 fn drop(&mut self) {
1190 // SAFETY:
1191 //
1192 // VUID-vkDestroyCommandPool-commandPool-00041: wgpu_hal requires that a
1193 // `CommandBuffer` must live until its execution is complete, and that a
1194 // `CommandBuffer` must not outlive the `CommandEncoder` that built it.
1195 // Thus, we know that none of our `CommandBuffers` are in the "pending"
1196 // state.
1197 //
1198 // The other VUIDs are pretty obvious.
1199 unsafe {
1200 // `vkDestroyCommandPool` also frees any command buffers allocated
1201 // from that pool, so there's no need to explicitly call
1202 // `vkFreeCommandBuffers` on `cmd_encoder`'s `free` and `discarded`
1203 // fields.
1204 self.device.raw.destroy_command_pool(self.raw, None);
1205 }
1206
1207 for (_, fb) in self.framebuffers.drain() {
1208 unsafe { self.device.raw.destroy_framebuffer(fb, None) };
1209 }
1210
1211 for (_, view) in self.temp_texture_views.drain() {
1212 unsafe { self.device.raw.destroy_image_view(view.raw, None) };
1213 }
1214
1215 self.counters.command_encoders.sub(1);
1216 }
1217}
1218
1219impl CommandEncoder {
1220 /// # Safety
1221 ///
1222 /// - The command buffer handle must not be manually destroyed
1223 pub unsafe fn raw_handle(&self) -> vk::CommandBuffer {
1224 self.active
1225 }
1226}
1227
1228impl fmt::Debug for CommandEncoder {
1229 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1230 f.debug_struct("CommandEncoder")
1231 .field("raw", &self.raw)
1232 .finish()
1233 }
1234}
1235
1236#[derive(Debug)]
1237pub struct CommandBuffer {
1238 raw: vk::CommandBuffer,
1239}
1240
1241impl crate::DynCommandBuffer for CommandBuffer {}
1242
1243#[derive(Debug)]
1244#[allow(clippy::large_enum_variant)]
1245pub enum ShaderModule {
1246 Raw(vk::ShaderModule),
1247 Intermediate {
1248 naga_shader: crate::NagaShader,
1249 runtime_checks: wgt::ShaderRuntimeChecks,
1250 },
1251}
1252
1253impl crate::DynShaderModule for ShaderModule {}
1254
1255#[derive(Debug)]
1256pub struct RenderPipeline {
1257 raw: vk::Pipeline,
1258}
1259
1260impl crate::DynRenderPipeline for RenderPipeline {}
1261
1262#[derive(Debug)]
1263pub struct ComputePipeline {
1264 raw: vk::Pipeline,
1265}
1266
1267impl crate::DynComputePipeline for ComputePipeline {}
1268
1269#[derive(Debug)]
1270pub struct PipelineCache {
1271 raw: vk::PipelineCache,
1272}
1273
1274impl crate::DynPipelineCache for PipelineCache {}
1275
1276#[derive(Debug)]
1277pub struct QuerySet {
1278 raw: vk::QueryPool,
1279}
1280
1281impl crate::DynQuerySet for QuerySet {}
1282
1283/// The [`Api::Fence`] type for [`vulkan::Api`].
1284///
1285/// This is an `enum` because there are two possible implementations of
1286/// `wgpu-hal` fences on Vulkan: Vulkan fences, which work on any version of
1287/// Vulkan, and Vulkan timeline semaphores, which are easier and cheaper but
1288/// require non-1.0 features.
1289///
1290/// [`Device::create_fence`] returns a [`TimelineSemaphore`] if
1291/// [`VK_KHR_timeline_semaphore`] is available and enabled, and a [`FencePool`]
1292/// otherwise.
1293///
1294/// [`Api::Fence`]: crate::Api::Fence
1295/// [`vulkan::Api`]: Api
1296/// [`Device::create_fence`]: crate::Device::create_fence
1297/// [`TimelineSemaphore`]: Fence::TimelineSemaphore
1298/// [`VK_KHR_timeline_semaphore`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#VK_KHR_timeline_semaphore
1299/// [`FencePool`]: Fence::FencePool
1300#[derive(Debug)]
1301pub enum Fence {
1302 /// A Vulkan [timeline semaphore].
1303 ///
1304 /// These are simpler to use than Vulkan fences, since timeline semaphores
1305 /// work exactly the way [`wpgu_hal::Api::Fence`] is specified to work.
1306 ///
1307 /// [timeline semaphore]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#synchronization-semaphores
1308 /// [`wpgu_hal::Api::Fence`]: crate::Api::Fence
1309 TimelineSemaphore(vk::Semaphore),
1310
1311 /// A collection of Vulkan [fence]s, each associated with a [`FenceValue`].
1312 ///
1313 /// The effective [`FenceValue`] of this variant is the greater of
1314 /// `last_completed` and the maximum value associated with a signalled fence
1315 /// in `active`.
1316 ///
1317 /// Fences are available in all versions of Vulkan, but since they only have
1318 /// two states, "signaled" and "unsignaled", we need to use a separate fence
1319 /// for each queue submission we might want to wait for, and remember which
1320 /// [`FenceValue`] each one represents.
1321 ///
1322 /// [fence]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#synchronization-fences
1323 /// [`FenceValue`]: crate::FenceValue
1324 FencePool {
1325 last_completed: crate::FenceValue,
1326 /// The pending fence values have to be ascending.
1327 active: Vec<(crate::FenceValue, vk::Fence)>,
1328 free: Vec<vk::Fence>,
1329 },
1330}
1331
1332impl crate::DynFence for Fence {}
1333
1334impl Fence {
1335 /// Return the highest [`FenceValue`] among the signalled fences in `active`.
1336 ///
1337 /// As an optimization, assume that we already know that the fence has
1338 /// reached `last_completed`, and don't bother checking fences whose values
1339 /// are less than that: those fences remain in the `active` array only
1340 /// because we haven't called `maintain` yet to clean them up.
1341 ///
1342 /// [`FenceValue`]: crate::FenceValue
1343 fn check_active(
1344 device: &ash::Device,
1345 mut last_completed: crate::FenceValue,
1346 active: &[(crate::FenceValue, vk::Fence)],
1347 ) -> Result<crate::FenceValue, crate::DeviceError> {
1348 for &(value, raw) in active.iter() {
1349 unsafe {
1350 if value > last_completed
1351 && device
1352 .get_fence_status(raw)
1353 .map_err(map_host_device_oom_and_lost_err)?
1354 {
1355 last_completed = value;
1356 }
1357 }
1358 }
1359 Ok(last_completed)
1360 }
1361
1362 /// Return the highest signalled [`FenceValue`] for `self`.
1363 ///
1364 /// [`FenceValue`]: crate::FenceValue
1365 fn get_latest(
1366 &self,
1367 device: &ash::Device,
1368 extension: Option<&ExtensionFn<khr::timeline_semaphore::Device>>,
1369 ) -> Result<crate::FenceValue, crate::DeviceError> {
1370 match *self {
1371 Self::TimelineSemaphore(raw) => unsafe {
1372 Ok(match *extension.unwrap() {
1373 ExtensionFn::Extension(ref ext) => ext
1374 .get_semaphore_counter_value(raw)
1375 .map_err(map_host_device_oom_and_lost_err)?,
1376 ExtensionFn::Promoted => device
1377 .get_semaphore_counter_value(raw)
1378 .map_err(map_host_device_oom_and_lost_err)?,
1379 })
1380 },
1381 Self::FencePool {
1382 last_completed,
1383 ref active,
1384 free: _,
1385 } => Self::check_active(device, last_completed, active),
1386 }
1387 }
1388
1389 /// Trim the internal state of this [`Fence`].
1390 ///
1391 /// This function has no externally visible effect, but you should call it
1392 /// periodically to keep this fence's resource consumption under control.
1393 ///
1394 /// For fences using the [`FencePool`] implementation, this function
1395 /// recycles fences that have been signaled. If you don't call this,
1396 /// [`Queue::submit`] will just keep allocating a new Vulkan fence every
1397 /// time it's called.
1398 ///
1399 /// [`FencePool`]: Fence::FencePool
1400 /// [`Queue::submit`]: crate::Queue::submit
1401 fn maintain(&mut self, device: &ash::Device) -> Result<(), crate::DeviceError> {
1402 match *self {
1403 Self::TimelineSemaphore(_) => {}
1404 Self::FencePool {
1405 ref mut last_completed,
1406 ref mut active,
1407 ref mut free,
1408 } => {
1409 let latest = Self::check_active(device, *last_completed, active)?;
1410 let base_free = free.len();
1411 for &(value, raw) in active.iter() {
1412 if value <= latest {
1413 free.push(raw);
1414 }
1415 }
1416 if free.len() != base_free {
1417 active.retain(|&(value, _)| value > latest);
1418 unsafe { device.reset_fences(&free[base_free..]) }
1419 .map_err(map_device_oom_err)?
1420 }
1421 *last_completed = latest;
1422 }
1423 }
1424 Ok(())
1425 }
1426}
1427
1428impl crate::Queue for Queue {
1429 type A = Api;
1430
1431 unsafe fn submit(
1432 &self,
1433 command_buffers: &[&CommandBuffer],
1434 surface_textures: &[&SurfaceTexture],
1435 (signal_fence, signal_value): (&mut Fence, crate::FenceValue),
1436 ) -> Result<(), crate::DeviceError> {
1437 let mut fence_raw = vk::Fence::null();
1438
1439 let mut wait_stage_masks = Vec::new();
1440 let mut wait_semaphores = Vec::new();
1441 let mut signal_semaphores = SemaphoreList::default();
1442
1443 // Double check that the same swapchain image isn't being given to us multiple times,
1444 // as that will deadlock when we try to lock them all.
1445 debug_assert!(
1446 {
1447 let mut check = HashSet::with_capacity(surface_textures.len());
1448 // We compare the Arcs by pointer, as Eq isn't well defined for SurfaceSemaphores.
1449 for st in surface_textures {
1450 check.insert(Arc::as_ptr(&st.acquire_semaphores) as usize);
1451 check.insert(Arc::as_ptr(&st.present_semaphores) as usize);
1452 }
1453 check.len() == surface_textures.len() * 2
1454 },
1455 "More than one surface texture is being used from the same swapchain. This will cause a deadlock in release."
1456 );
1457
1458 let locked_swapchain_semaphores = surface_textures
1459 .iter()
1460 .map(|st| {
1461 let acquire = st
1462 .acquire_semaphores
1463 .try_lock()
1464 .expect("Failed to lock surface acquire semaphore");
1465 let present = st
1466 .present_semaphores
1467 .try_lock()
1468 .expect("Failed to lock surface present semaphore");
1469
1470 (acquire, present)
1471 })
1472 .collect::<Vec<_>>();
1473
1474 for (mut acquire_semaphore, mut present_semaphores) in locked_swapchain_semaphores {
1475 acquire_semaphore.set_used_fence_value(signal_value);
1476
1477 // If we're the first submission to operate on this image, wait on
1478 // its acquire semaphore, to make sure the presentation engine is
1479 // done with it.
1480 if let Some(sem) = acquire_semaphore.get_acquire_wait_semaphore() {
1481 wait_stage_masks.push(vk::PipelineStageFlags::TOP_OF_PIPE);
1482 wait_semaphores.push(sem);
1483 }
1484
1485 // Get a semaphore to signal when we're done writing to this surface
1486 // image. Presentation of this image will wait for this.
1487 let signal_semaphore = present_semaphores.get_submit_signal_semaphore(&self.device)?;
1488 signal_semaphores.push_binary(signal_semaphore);
1489 }
1490
1491 let mut guard = self.signal_semaphores.lock();
1492 if !guard.is_empty() {
1493 signal_semaphores.append(&mut guard);
1494 }
1495
1496 // In order for submissions to be strictly ordered, we encode a dependency between each submission
1497 // using a pair of semaphores. This adds a wait if it is needed, and signals the next semaphore.
1498 let semaphore_state = self.relay_semaphores.lock().advance(&self.device)?;
1499
1500 if let Some(sem) = semaphore_state.wait {
1501 wait_stage_masks.push(vk::PipelineStageFlags::TOP_OF_PIPE);
1502 wait_semaphores.push(sem);
1503 }
1504
1505 signal_semaphores.push_binary(semaphore_state.signal);
1506
1507 // We need to signal our wgpu::Fence if we have one, this adds it to the signal list.
1508 signal_fence.maintain(&self.device.raw)?;
1509 match *signal_fence {
1510 Fence::TimelineSemaphore(raw) => {
1511 signal_semaphores.push_timeline(raw, signal_value);
1512 }
1513 Fence::FencePool {
1514 ref mut active,
1515 ref mut free,
1516 ..
1517 } => {
1518 fence_raw = match free.pop() {
1519 Some(raw) => raw,
1520 None => unsafe {
1521 self.device
1522 .raw
1523 .create_fence(&vk::FenceCreateInfo::default(), None)
1524 .map_err(map_host_device_oom_err)?
1525 },
1526 };
1527 active.push((signal_value, fence_raw));
1528 }
1529 }
1530
1531 let vk_cmd_buffers = command_buffers
1532 .iter()
1533 .map(|cmd| cmd.raw)
1534 .collect::<Vec<_>>();
1535
1536 let mut vk_info = vk::SubmitInfo::default().command_buffers(&vk_cmd_buffers);
1537
1538 vk_info = vk_info
1539 .wait_semaphores(&wait_semaphores)
1540 .wait_dst_stage_mask(&wait_stage_masks);
1541
1542 let mut vk_timeline_info = mem::MaybeUninit::uninit();
1543 vk_info = signal_semaphores.add_to_submit(vk_info, &mut vk_timeline_info);
1544
1545 profiling::scope!("vkQueueSubmit");
1546 unsafe {
1547 self.device
1548 .raw
1549 .queue_submit(self.raw, &[vk_info], fence_raw)
1550 .map_err(map_host_device_oom_and_lost_err)?
1551 };
1552 Ok(())
1553 }
1554
1555 unsafe fn present(
1556 &self,
1557 surface: &Surface,
1558 texture: SurfaceTexture,
1559 ) -> Result<(), crate::SurfaceError> {
1560 let mut swapchain = surface.swapchain.write();
1561 let ssc = swapchain.as_mut().unwrap();
1562 let mut acquire_semaphore = texture.acquire_semaphores.lock();
1563 let mut present_semaphores = texture.present_semaphores.lock();
1564
1565 let wait_semaphores = present_semaphores.get_present_wait_semaphores();
1566
1567 // Reset the acquire and present semaphores internal state
1568 // to be ready for the next frame.
1569 //
1570 // We do this before the actual call to present to ensure that
1571 // even if this method errors and early outs, we have reset
1572 // the state for next frame.
1573 acquire_semaphore.end_semaphore_usage();
1574 present_semaphores.end_semaphore_usage();
1575
1576 drop(acquire_semaphore);
1577
1578 let swapchains = [ssc.raw];
1579 let image_indices = [texture.index];
1580 let vk_info = vk::PresentInfoKHR::default()
1581 .swapchains(&swapchains)
1582 .image_indices(&image_indices)
1583 .wait_semaphores(&wait_semaphores);
1584
1585 let mut display_timing;
1586 let present_times;
1587 let vk_info = if let Some(present_time) = ssc.next_present_time.take() {
1588 debug_assert!(
1589 ssc.device
1590 .features
1591 .contains(wgt::Features::VULKAN_GOOGLE_DISPLAY_TIMING),
1592 "`next_present_time` should only be set if `VULKAN_GOOGLE_DISPLAY_TIMING` is enabled"
1593 );
1594 present_times = [present_time];
1595 display_timing = vk::PresentTimesInfoGOOGLE::default().times(&present_times);
1596 // SAFETY: We know that VK_GOOGLE_display_timing is present because of the safety contract on `next_present_time`.
1597 vk_info.push_next(&mut display_timing)
1598 } else {
1599 vk_info
1600 };
1601
1602 let suboptimal = {
1603 profiling::scope!("vkQueuePresentKHR");
1604 unsafe { self.swapchain_fn.queue_present(self.raw, &vk_info) }.map_err(|error| {
1605 match error {
1606 vk::Result::ERROR_OUT_OF_DATE_KHR => crate::SurfaceError::Outdated,
1607 vk::Result::ERROR_SURFACE_LOST_KHR => crate::SurfaceError::Lost,
1608 // We don't use VK_EXT_full_screen_exclusive
1609 // VK_ERROR_FULL_SCREEN_EXCLUSIVE_MODE_LOST_EXT
1610 _ => map_host_device_oom_and_lost_err(error).into(),
1611 }
1612 })?
1613 };
1614 if suboptimal {
1615 // We treat `VK_SUBOPTIMAL_KHR` as `VK_SUCCESS` on Android.
1616 // On Android 10+, libvulkan's `vkQueuePresentKHR` implementation returns `VK_SUBOPTIMAL_KHR` if not doing pre-rotation
1617 // (i.e `VkSwapchainCreateInfoKHR::preTransform` not being equal to the current device orientation).
1618 // This is always the case when the device orientation is anything other than the identity one, as we unconditionally use `VK_SURFACE_TRANSFORM_IDENTITY_BIT_KHR`.
1619 #[cfg(not(target_os = "android"))]
1620 log::warn!("Suboptimal present of frame {}", texture.index);
1621 }
1622 Ok(())
1623 }
1624
1625 unsafe fn get_timestamp_period(&self) -> f32 {
1626 self.device.timestamp_period
1627 }
1628}
1629
1630impl Queue {
1631 pub fn raw_device(&self) -> &ash::Device {
1632 &self.device.raw
1633 }
1634
1635 pub fn add_signal_semaphore(&self, semaphore: vk::Semaphore, semaphore_value: Option<u64>) {
1636 let mut guard = self.signal_semaphores.lock();
1637 if let Some(value) = semaphore_value {
1638 guard.push_timeline(semaphore, value);
1639 } else {
1640 guard.push_binary(semaphore);
1641 }
1642 }
1643}
1644
1645/// Maps
1646///
1647/// - VK_ERROR_OUT_OF_HOST_MEMORY
1648/// - VK_ERROR_OUT_OF_DEVICE_MEMORY
1649fn map_host_device_oom_err(err: vk::Result) -> crate::DeviceError {
1650 match err {
1651 vk::Result::ERROR_OUT_OF_HOST_MEMORY | vk::Result::ERROR_OUT_OF_DEVICE_MEMORY => {
1652 get_oom_err(err)
1653 }
1654 e => get_unexpected_err(e),
1655 }
1656}
1657
1658/// Maps
1659///
1660/// - VK_ERROR_OUT_OF_HOST_MEMORY
1661/// - VK_ERROR_OUT_OF_DEVICE_MEMORY
1662/// - VK_ERROR_DEVICE_LOST
1663fn map_host_device_oom_and_lost_err(err: vk::Result) -> crate::DeviceError {
1664 match err {
1665 vk::Result::ERROR_DEVICE_LOST => get_lost_err(),
1666 other => map_host_device_oom_err(other),
1667 }
1668}
1669
1670/// Maps
1671///
1672/// - VK_ERROR_OUT_OF_HOST_MEMORY
1673/// - VK_ERROR_OUT_OF_DEVICE_MEMORY
1674/// - VK_ERROR_INVALID_OPAQUE_CAPTURE_ADDRESS_KHR
1675fn map_host_device_oom_and_ioca_err(err: vk::Result) -> crate::DeviceError {
1676 // We don't use VK_KHR_buffer_device_address
1677 // VK_ERROR_INVALID_OPAQUE_CAPTURE_ADDRESS_KHR
1678 map_host_device_oom_err(err)
1679}
1680
1681/// Maps
1682///
1683/// - VK_ERROR_OUT_OF_HOST_MEMORY
1684fn map_host_oom_err(err: vk::Result) -> crate::DeviceError {
1685 match err {
1686 vk::Result::ERROR_OUT_OF_HOST_MEMORY => get_oom_err(err),
1687 e => get_unexpected_err(e),
1688 }
1689}
1690
1691/// Maps
1692///
1693/// - VK_ERROR_OUT_OF_DEVICE_MEMORY
1694fn map_device_oom_err(err: vk::Result) -> crate::DeviceError {
1695 match err {
1696 vk::Result::ERROR_OUT_OF_DEVICE_MEMORY => get_oom_err(err),
1697 e => get_unexpected_err(e),
1698 }
1699}
1700
1701/// Maps
1702///
1703/// - VK_ERROR_OUT_OF_HOST_MEMORY
1704/// - VK_ERROR_INVALID_OPAQUE_CAPTURE_ADDRESS_KHR
1705fn map_host_oom_and_ioca_err(err: vk::Result) -> crate::DeviceError {
1706 // We don't use VK_KHR_buffer_device_address
1707 // VK_ERROR_INVALID_OPAQUE_CAPTURE_ADDRESS_KHR
1708 map_host_oom_err(err)
1709}
1710
1711/// Maps
1712///
1713/// - VK_ERROR_OUT_OF_HOST_MEMORY
1714/// - VK_ERROR_OUT_OF_DEVICE_MEMORY
1715/// - VK_PIPELINE_COMPILE_REQUIRED_EXT
1716/// - VK_ERROR_INVALID_SHADER_NV
1717fn map_pipeline_err(err: vk::Result) -> crate::DeviceError {
1718 // We don't use VK_EXT_pipeline_creation_cache_control
1719 // VK_PIPELINE_COMPILE_REQUIRED_EXT
1720 // We don't use VK_NV_glsl_shader
1721 // VK_ERROR_INVALID_SHADER_NV
1722 map_host_device_oom_err(err)
1723}
1724
1725/// Returns [`crate::DeviceError::Unexpected`] or panics if the `internal_error_panic`
1726/// feature flag is enabled.
1727fn get_unexpected_err(_err: vk::Result) -> crate::DeviceError {
1728 #[cfg(feature = "internal_error_panic")]
1729 panic!("Unexpected Vulkan error: {_err:?}");
1730
1731 #[allow(unreachable_code)]
1732 crate::DeviceError::Unexpected
1733}
1734
1735/// Returns [`crate::DeviceError::OutOfMemory`].
1736fn get_oom_err(_err: vk::Result) -> crate::DeviceError {
1737 crate::DeviceError::OutOfMemory
1738}
1739
1740/// Returns [`crate::DeviceError::Lost`] or panics if the `device_lost_panic`
1741/// feature flag is enabled.
1742fn get_lost_err() -> crate::DeviceError {
1743 #[cfg(feature = "device_lost_panic")]
1744 panic!("Device lost");
1745
1746 #[allow(unreachable_code)]
1747 crate::DeviceError::Lost
1748}
1749
1750#[derive(Clone, Copy, Pod, Zeroable)]
1751#[repr(C)]
1752struct RawTlasInstance {
1753 transform: [f32; 12],
1754 custom_data_and_mask: u32,
1755 shader_binding_table_record_offset_and_flags: u32,
1756 acceleration_structure_reference: u64,
1757}
1758
1759/// Arguments to the [`CreateDeviceCallback`].
1760pub struct CreateDeviceCallbackArgs<'arg, 'pnext, 'this>
1761where
1762 'this: 'pnext,
1763{
1764 /// The extensions to enable for the device. You must not remove anything from this list,
1765 /// but you may add to it.
1766 pub extensions: &'arg mut Vec<&'static CStr>,
1767 /// The physical device features to enable. You may enable features, but must not disable any.
1768 pub device_features: &'arg mut PhysicalDeviceFeatures,
1769 /// The queue create infos for the device. You may add or modify queue create infos as needed.
1770 pub queue_create_infos: &'arg mut Vec<vk::DeviceQueueCreateInfo<'pnext>>,
1771 /// The create info for the device. You may add or modify things in the pnext chain, but
1772 /// do not turn features off. Additionally, do not add things to the list of extensions,
1773 /// or to the feature set, as all changes to that member will be overwritten.
1774 pub create_info: &'arg mut vk::DeviceCreateInfo<'pnext>,
1775 /// We need to have `'this` in the struct, so we can declare that all lifetimes coming from
1776 /// captures in the closure will live longer (and hence satisfy) `'pnext`. However, we
1777 /// don't actually directly use `'this`
1778 _phantom: PhantomData<&'this ()>,
1779}
1780
1781/// Callback to allow changing the vulkan device creation parameters.
1782///
1783/// # Safety:
1784/// - If you want to add extensions, add the to the `Vec<'static CStr>` not the create info,
1785/// as the create info value will be overwritten.
1786/// - Callback must not remove features.
1787/// - Callback must not change anything to what the instance does not support.
1788pub type CreateDeviceCallback<'this> =
1789 dyn for<'arg, 'pnext> FnOnce(CreateDeviceCallbackArgs<'arg, 'pnext, 'this>) + 'this;
1790
1791/// Arguments to the [`CreateInstanceCallback`].
1792pub struct CreateInstanceCallbackArgs<'arg, 'pnext, 'this>
1793where
1794 'this: 'pnext,
1795{
1796 /// The extensions to enable for the instance. You must not remove anything from this list,
1797 /// but you may add to it.
1798 pub extensions: &'arg mut Vec<&'static CStr>,
1799 /// The create info for the instance. You may add or modify things in the pnext chain, but
1800 /// do not turn features off. Additionally, do not add things to the list of extensions,
1801 /// all changes to that member will be overwritten.
1802 pub create_info: &'arg mut vk::InstanceCreateInfo<'pnext>,
1803 /// Vulkan entry point.
1804 pub entry: &'arg ash::Entry,
1805 /// We need to have `'this` in the struct, so we can declare that all lifetimes coming from
1806 /// captures in the closure will live longer (and hence satisfy) `'pnext`. However, we
1807 /// don't actually directly use `'this`
1808 _phantom: PhantomData<&'this ()>,
1809}
1810
1811/// Callback to allow changing the vulkan instance creation parameters.
1812///
1813/// # Safety:
1814/// - If you want to add extensions, add the to the `Vec<'static CStr>` not the create info,
1815/// as the create info value will be overwritten.
1816/// - Callback must not remove features.
1817/// - Callback must not change anything to what the instance does not support.
1818pub type CreateInstanceCallback<'this> =
1819 dyn for<'arg, 'pnext> FnOnce(CreateInstanceCallbackArgs<'arg, 'pnext, 'this>) + 'this;