wgpu_hal/vulkan/
mod.rs

1/*!
2# Vulkan API internals.
3
4## Stack memory
5
6Ash expects slices, which we don't generally have available.
7We cope with this requirement by the combination of the following ways:
8  - temporarily allocating `Vec` on heap, where overhead is permitted
9  - growing temporary local storage
10
11## Framebuffers and Render passes
12
13Render passes are cached on the device and kept forever.
14
15Framebuffers are also cached on the device, but they are removed when
16any of the image views (they have) gets removed.
17If Vulkan supports image-less framebuffers,
18then the actual views are excluded from the framebuffer key.
19
20## Fences
21
22If timeline semaphores are available, they are used 1:1 with wgpu-hal fences.
23Otherwise, we manage a pool of `VkFence` objects behind each `hal::Fence`.
24
25!*/
26
27mod adapter;
28mod command;
29mod conv;
30mod device;
31mod drm;
32mod instance;
33mod sampler;
34mod semaphore_list;
35
36pub use adapter::PhysicalDeviceFeatures;
37
38use alloc::{boxed::Box, ffi::CString, sync::Arc, vec::Vec};
39use core::{borrow::Borrow, ffi::CStr, fmt, marker::PhantomData, mem, num::NonZeroU32};
40
41use arrayvec::ArrayVec;
42use ash::{ext, khr, vk};
43use bytemuck::{Pod, Zeroable};
44use hashbrown::HashSet;
45use parking_lot::{Mutex, RwLock};
46
47use naga::FastHashMap;
48use wgt::InternalCounter;
49
50use semaphore_list::SemaphoreList;
51
52const MILLIS_TO_NANOS: u64 = 1_000_000;
53const MAX_TOTAL_ATTACHMENTS: usize = crate::MAX_COLOR_ATTACHMENTS * 2 + 1;
54
55#[derive(Clone, Debug)]
56pub struct Api;
57
58impl crate::Api for Api {
59    type Instance = Instance;
60    type Surface = Surface;
61    type Adapter = Adapter;
62    type Device = Device;
63
64    type Queue = Queue;
65    type CommandEncoder = CommandEncoder;
66    type CommandBuffer = CommandBuffer;
67
68    type Buffer = Buffer;
69    type Texture = Texture;
70    type SurfaceTexture = SurfaceTexture;
71    type TextureView = TextureView;
72    type Sampler = Sampler;
73    type QuerySet = QuerySet;
74    type Fence = Fence;
75    type AccelerationStructure = AccelerationStructure;
76    type PipelineCache = PipelineCache;
77
78    type BindGroupLayout = BindGroupLayout;
79    type BindGroup = BindGroup;
80    type PipelineLayout = PipelineLayout;
81    type ShaderModule = ShaderModule;
82    type RenderPipeline = RenderPipeline;
83    type ComputePipeline = ComputePipeline;
84}
85
86crate::impl_dyn_resource!(
87    Adapter,
88    AccelerationStructure,
89    BindGroup,
90    BindGroupLayout,
91    Buffer,
92    CommandBuffer,
93    CommandEncoder,
94    ComputePipeline,
95    Device,
96    Fence,
97    Instance,
98    PipelineCache,
99    PipelineLayout,
100    QuerySet,
101    Queue,
102    RenderPipeline,
103    Sampler,
104    ShaderModule,
105    Surface,
106    SurfaceTexture,
107    Texture,
108    TextureView
109);
110
111struct DebugUtils {
112    extension: ext::debug_utils::Instance,
113    messenger: vk::DebugUtilsMessengerEXT,
114
115    /// Owning pointer to the debug messenger callback user data.
116    ///
117    /// `InstanceShared::drop` destroys the debug messenger before
118    /// dropping this, so the callback should never receive a dangling
119    /// user data pointer.
120    #[allow(dead_code)]
121    callback_data: Box<DebugUtilsMessengerUserData>,
122}
123
124pub struct DebugUtilsCreateInfo {
125    severity: vk::DebugUtilsMessageSeverityFlagsEXT,
126    message_type: vk::DebugUtilsMessageTypeFlagsEXT,
127    callback_data: Box<DebugUtilsMessengerUserData>,
128}
129
130#[derive(Debug)]
131/// The properties related to the validation layer needed for the
132/// DebugUtilsMessenger for their workarounds
133struct ValidationLayerProperties {
134    /// Validation layer description, from `vk::LayerProperties`.
135    layer_description: CString,
136
137    /// Validation layer specification version, from `vk::LayerProperties`.
138    layer_spec_version: u32,
139}
140
141/// User data needed by `instance::debug_utils_messenger_callback`.
142///
143/// When we create the [`vk::DebugUtilsMessengerEXT`], the `pUserData`
144/// pointer refers to one of these values.
145#[derive(Debug)]
146pub struct DebugUtilsMessengerUserData {
147    /// The properties related to the validation layer, if present
148    validation_layer_properties: Option<ValidationLayerProperties>,
149
150    /// If the OBS layer is present. OBS never increments the version of their layer,
151    /// so there's no reason to have the version.
152    has_obs_layer: bool,
153}
154
155pub struct InstanceShared {
156    raw: ash::Instance,
157    extensions: Vec<&'static CStr>,
158    drop_guard: Option<crate::DropGuard>,
159    flags: wgt::InstanceFlags,
160    memory_budget_thresholds: wgt::MemoryBudgetThresholds,
161    debug_utils: Option<DebugUtils>,
162    get_physical_device_properties: Option<khr::get_physical_device_properties2::Instance>,
163    entry: ash::Entry,
164    has_nv_optimus: bool,
165    android_sdk_version: u32,
166    /// The instance API version.
167    ///
168    /// Which is the version of Vulkan supported for instance-level functionality.
169    ///
170    /// It is associated with a `VkInstance` and its children,
171    /// except for a `VkPhysicalDevice` and its children.
172    instance_api_version: u32,
173}
174
175pub struct Instance {
176    shared: Arc<InstanceShared>,
177}
178
179/// Semaphore used to acquire a swapchain image.
180#[derive(Debug)]
181struct SwapchainAcquireSemaphore {
182    /// A semaphore that is signaled when this image is safe for us to modify.
183    ///
184    /// When [`vkAcquireNextImageKHR`] returns the index of the next swapchain
185    /// image that we should use, that image may actually still be in use by the
186    /// presentation engine, and is not yet safe to modify. However, that
187    /// function does accept a semaphore that it will signal when the image is
188    /// indeed safe to begin messing with.
189    ///
190    /// This semaphore is:
191    ///
192    /// - waited for by the first queue submission to operate on this image
193    ///   since it was acquired, and
194    ///
195    /// - signaled by [`vkAcquireNextImageKHR`] when the acquired image is ready
196    ///   for us to use.
197    ///
198    /// [`vkAcquireNextImageKHR`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#vkAcquireNextImageKHR
199    acquire: vk::Semaphore,
200
201    /// True if the next command submission operating on this image should wait
202    /// for [`acquire`].
203    ///
204    /// We must wait for `acquire` before drawing to this swapchain image, but
205    /// because `wgpu-hal` queue submissions are always strongly ordered, only
206    /// the first submission that works with a swapchain image actually needs to
207    /// wait. We set this flag when this image is acquired, and clear it the
208    /// first time it's passed to [`Queue::submit`] as a surface texture.
209    ///
210    /// Additionally, semaphores can only be waited on once, so we need to ensure
211    /// that we only actually pass this semaphore to the first submission that
212    /// uses that image.
213    ///
214    /// [`acquire`]: SwapchainAcquireSemaphore::acquire
215    /// [`Queue::submit`]: crate::Queue::submit
216    should_wait_for_acquire: bool,
217
218    /// The fence value of the last command submission that wrote to this image.
219    ///
220    /// The next time we try to acquire this image, we'll block until
221    /// this submission finishes, proving that [`acquire`] is ready to
222    /// pass to `vkAcquireNextImageKHR` again.
223    ///
224    /// [`acquire`]: SwapchainAcquireSemaphore::acquire
225    previously_used_submission_index: crate::FenceValue,
226}
227
228impl SwapchainAcquireSemaphore {
229    fn new(device: &DeviceShared, index: usize) -> Result<Self, crate::DeviceError> {
230        Ok(Self {
231            acquire: device
232                .new_binary_semaphore(&format!("SwapchainImageSemaphore: Index {index} acquire"))?,
233            should_wait_for_acquire: true,
234            previously_used_submission_index: 0,
235        })
236    }
237
238    /// Sets the fence value which the next acquire will wait for. This prevents
239    /// the semaphore from being used while the previous submission is still in flight.
240    fn set_used_fence_value(&mut self, value: crate::FenceValue) {
241        self.previously_used_submission_index = value;
242    }
243
244    /// Return the semaphore that commands drawing to this image should wait for, if any.
245    ///
246    /// This only returns `Some` once per acquisition; see
247    /// [`SwapchainAcquireSemaphore::should_wait_for_acquire`] for details.
248    fn get_acquire_wait_semaphore(&mut self) -> Option<vk::Semaphore> {
249        if self.should_wait_for_acquire {
250            self.should_wait_for_acquire = false;
251            Some(self.acquire)
252        } else {
253            None
254        }
255    }
256
257    /// Indicates the cpu-side usage of this semaphore has finished for the frame,
258    /// so reset internal state to be ready for the next frame.
259    fn end_semaphore_usage(&mut self) {
260        // Reset the acquire semaphore, so that the next time we acquire this
261        // image, we can wait for it again.
262        self.should_wait_for_acquire = true;
263    }
264
265    unsafe fn destroy(&self, device: &ash::Device) {
266        unsafe {
267            device.destroy_semaphore(self.acquire, None);
268        }
269    }
270}
271
272#[derive(Debug)]
273struct SwapchainPresentSemaphores {
274    /// A pool of semaphores for ordering presentation after drawing.
275    ///
276    /// The first [`present_index`] semaphores in this vector are:
277    ///
278    /// - all waited on by the call to [`vkQueuePresentKHR`] that presents this
279    ///   image, and
280    ///
281    /// - each signaled by some [`vkQueueSubmit`] queue submission that draws to
282    ///   this image, when the submission finishes execution.
283    ///
284    /// This vector accumulates one semaphore per submission that writes to this
285    /// image. This is awkward, but hard to avoid: [`vkQueuePresentKHR`]
286    /// requires a semaphore to order it with respect to drawing commands, and
287    /// we can't attach new completion semaphores to a command submission after
288    /// it's been submitted. This means that, at submission time, we must create
289    /// the semaphore we might need if the caller's next action is to enqueue a
290    /// presentation of this image.
291    ///
292    /// An alternative strategy would be for presentation to enqueue an empty
293    /// submit, ordered relative to other submits in the usual way, and
294    /// signaling a single presentation semaphore. But we suspect that submits
295    /// are usually expensive enough, and semaphores usually cheap enough, that
296    /// performance-sensitive users will avoid making many submits, so that the
297    /// cost of accumulated semaphores will usually be less than the cost of an
298    /// additional submit.
299    ///
300    /// Only the first [`present_index`] semaphores in the vector are actually
301    /// going to be signalled by submitted commands, and need to be waited for
302    /// by the next present call. Any semaphores beyond that index were created
303    /// for prior presents and are simply being retained for recycling.
304    ///
305    /// [`present_index`]: SwapchainPresentSemaphores::present_index
306    /// [`vkQueuePresentKHR`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#vkQueuePresentKHR
307    /// [`vkQueueSubmit`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#vkQueueSubmit
308    present: Vec<vk::Semaphore>,
309
310    /// The number of semaphores in [`present`] to be signalled for this submission.
311    ///
312    /// [`present`]: SwapchainPresentSemaphores::present
313    present_index: usize,
314
315    /// Which image this semaphore set is used for.
316    frame_index: usize,
317}
318
319impl SwapchainPresentSemaphores {
320    pub fn new(frame_index: usize) -> Self {
321        Self {
322            present: Vec::new(),
323            present_index: 0,
324            frame_index,
325        }
326    }
327
328    /// Return the semaphore that the next submission that writes to this image should
329    /// signal when it's done.
330    ///
331    /// See [`SwapchainPresentSemaphores::present`] for details.
332    fn get_submit_signal_semaphore(
333        &mut self,
334        device: &DeviceShared,
335    ) -> Result<vk::Semaphore, crate::DeviceError> {
336        // Try to recycle a semaphore we created for a previous presentation.
337        let sem = match self.present.get(self.present_index) {
338            Some(sem) => *sem,
339            None => {
340                let sem = device.new_binary_semaphore(&format!(
341                    "SwapchainImageSemaphore: Image {} present semaphore {}",
342                    self.frame_index, self.present_index
343                ))?;
344                self.present.push(sem);
345                sem
346            }
347        };
348
349        self.present_index += 1;
350
351        Ok(sem)
352    }
353
354    /// Indicates the cpu-side usage of this semaphore has finished for the frame,
355    /// so reset internal state to be ready for the next frame.
356    fn end_semaphore_usage(&mut self) {
357        // Reset the index to 0, so that the next time we get a semaphore, we
358        // start from the beginning of the list.
359        self.present_index = 0;
360    }
361
362    /// Return the semaphores that a presentation of this image should wait on.
363    ///
364    /// Return a slice of semaphores that the call to [`vkQueueSubmit`] that
365    /// ends this image's acquisition should wait for. See
366    /// [`SwapchainPresentSemaphores::present`] for details.
367    ///
368    /// Reset `self` to be ready for the next acquisition cycle.
369    ///
370    /// [`vkQueueSubmit`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#vkQueueSubmit
371    fn get_present_wait_semaphores(&mut self) -> Vec<vk::Semaphore> {
372        self.present[0..self.present_index].to_vec()
373    }
374
375    unsafe fn destroy(&self, device: &ash::Device) {
376        unsafe {
377            for sem in &self.present {
378                device.destroy_semaphore(*sem, None);
379            }
380        }
381    }
382}
383
384struct Swapchain {
385    raw: vk::SwapchainKHR,
386    functor: khr::swapchain::Device,
387    device: Arc<DeviceShared>,
388    images: Vec<vk::Image>,
389    config: crate::SurfaceConfiguration,
390
391    /// Semaphores used between image acquisition and the first submission
392    /// that uses that image. This is indexed using [`next_acquire_index`].
393    ///
394    /// Because we need to provide this to [`vkAcquireNextImageKHR`], we haven't
395    /// received the swapchain image index for the frame yet, so we cannot use
396    /// that to index it.
397    ///
398    /// Before we pass this to [`vkAcquireNextImageKHR`], we ensure that we wait on
399    /// the submission indicated by [`previously_used_submission_index`]. This enusres
400    /// the semaphore is no longer in use before we use it.
401    ///
402    /// [`next_acquire_index`]: Swapchain::next_acquire_index
403    /// [`vkAcquireNextImageKHR`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#vkAcquireNextImageKHR
404    /// [`previously_used_submission_index`]: SwapchainAcquireSemaphore::previously_used_submission_index
405    acquire_semaphores: Vec<Arc<Mutex<SwapchainAcquireSemaphore>>>,
406    /// The index of the next acquire semaphore to use.
407    ///
408    /// This is incremented each time we acquire a new image, and wraps around
409    /// to 0 when it reaches the end of [`acquire_semaphores`].
410    ///
411    /// [`acquire_semaphores`]: Swapchain::acquire_semaphores
412    next_acquire_index: usize,
413
414    /// Semaphore sets used between all submissions that write to an image and
415    /// the presentation of that image.
416    ///
417    /// This is indexed by the swapchain image index returned by
418    /// [`vkAcquireNextImageKHR`].
419    ///
420    /// We know it is safe to use these semaphores because use them
421    /// _after_ the acquire semaphore. Because the acquire semaphore
422    /// has been signaled, the previous presentation using that image
423    /// is known-finished, so this semaphore is no longer in use.
424    ///
425    /// [`vkAcquireNextImageKHR`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#vkAcquireNextImageKHR
426    present_semaphores: Vec<Arc<Mutex<SwapchainPresentSemaphores>>>,
427
428    /// The present timing information which will be set in the next call to [`present()`](crate::Queue::present()).
429    ///
430    /// # Safety
431    ///
432    /// This must only be set if [`wgt::Features::VULKAN_GOOGLE_DISPLAY_TIMING`] is enabled, and
433    /// so the VK_GOOGLE_display_timing extension is present.
434    next_present_time: Option<vk::PresentTimeGOOGLE>,
435}
436
437impl Swapchain {
438    /// Mark the current frame finished, advancing to the next acquire semaphore.
439    fn advance_acquire_semaphore(&mut self) {
440        let semaphore_count = self.acquire_semaphores.len();
441        self.next_acquire_index = (self.next_acquire_index + 1) % semaphore_count;
442    }
443
444    /// Get the next acquire semaphore that should be used with this swapchain.
445    fn get_acquire_semaphore(&self) -> Arc<Mutex<SwapchainAcquireSemaphore>> {
446        self.acquire_semaphores[self.next_acquire_index].clone()
447    }
448
449    /// Get the set of present semaphores that should be used with the given image index.
450    fn get_present_semaphores(&self, index: u32) -> Arc<Mutex<SwapchainPresentSemaphores>> {
451        self.present_semaphores[index as usize].clone()
452    }
453}
454
455pub struct Surface {
456    raw: vk::SurfaceKHR,
457    functor: khr::surface::Instance,
458    instance: Arc<InstanceShared>,
459    swapchain: RwLock<Option<Swapchain>>,
460}
461
462impl Surface {
463    /// Get the raw Vulkan swapchain associated with this surface.
464    ///
465    /// Returns [`None`] if the surface is not configured.
466    pub fn raw_swapchain(&self) -> Option<vk::SwapchainKHR> {
467        let read = self.swapchain.read();
468        read.as_ref().map(|it| it.raw)
469    }
470
471    /// Set the present timing information which will be used for the next [presentation](crate::Queue::present()) of this surface,
472    /// using [VK_GOOGLE_display_timing].
473    ///
474    /// This can be used to give an id to presentations, for future use of [`vk::PastPresentationTimingGOOGLE`].
475    /// Note that `wgpu-hal` does *not* provide a way to use that API - you should manually access this through [`ash`].
476    ///
477    /// This can also be used to add a "not before" timestamp to the presentation.
478    ///
479    /// The exact semantics of the fields are also documented in the [specification](https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkPresentTimeGOOGLE.html) for the extension.
480    ///
481    /// # Panics
482    ///
483    /// - If the surface hasn't been configured.
484    /// - If the device doesn't [support present timing](wgt::Features::VULKAN_GOOGLE_DISPLAY_TIMING).
485    ///
486    /// [VK_GOOGLE_display_timing]: https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VK_GOOGLE_display_timing.html
487    #[track_caller]
488    pub fn set_next_present_time(&self, present_timing: vk::PresentTimeGOOGLE) {
489        let mut swapchain = self.swapchain.write();
490        let swapchain = swapchain
491            .as_mut()
492            .expect("Surface should have been configured");
493        let features = wgt::Features::VULKAN_GOOGLE_DISPLAY_TIMING;
494        if swapchain.device.features.contains(features) {
495            swapchain.next_present_time = Some(present_timing);
496        } else {
497            // Ideally we'd use something like `device.required_features` here, but that's in `wgpu-core`, which we are a dependency of
498            panic!(
499                concat!(
500                    "Tried to set display timing properties ",
501                    "without the corresponding feature ({:?}) enabled."
502                ),
503                features
504            );
505        }
506    }
507}
508
509#[derive(Debug)]
510pub struct SurfaceTexture {
511    index: u32,
512    texture: Texture,
513    acquire_semaphores: Arc<Mutex<SwapchainAcquireSemaphore>>,
514    present_semaphores: Arc<Mutex<SwapchainPresentSemaphores>>,
515}
516
517impl crate::DynSurfaceTexture for SurfaceTexture {}
518
519impl Borrow<Texture> for SurfaceTexture {
520    fn borrow(&self) -> &Texture {
521        &self.texture
522    }
523}
524
525impl Borrow<dyn crate::DynTexture> for SurfaceTexture {
526    fn borrow(&self) -> &dyn crate::DynTexture {
527        &self.texture
528    }
529}
530
531pub struct Adapter {
532    raw: vk::PhysicalDevice,
533    instance: Arc<InstanceShared>,
534    //queue_families: Vec<vk::QueueFamilyProperties>,
535    known_memory_flags: vk::MemoryPropertyFlags,
536    phd_capabilities: adapter::PhysicalDeviceProperties,
537    phd_features: PhysicalDeviceFeatures,
538    downlevel_flags: wgt::DownlevelFlags,
539    private_caps: PrivateCapabilities,
540    workarounds: Workarounds,
541}
542
543// TODO there's no reason why this can't be unified--the function pointers should all be the same--it's not clear how to do this with `ash`.
544enum ExtensionFn<T> {
545    /// The loaded function pointer struct for an extension.
546    Extension(T),
547    /// The extension was promoted to a core version of Vulkan and the functions on `ash`'s `DeviceV1_x` traits should be used.
548    Promoted,
549}
550
551struct DeviceExtensionFunctions {
552    debug_utils: Option<ext::debug_utils::Device>,
553    draw_indirect_count: Option<khr::draw_indirect_count::Device>,
554    timeline_semaphore: Option<ExtensionFn<khr::timeline_semaphore::Device>>,
555    ray_tracing: Option<RayTracingDeviceExtensionFunctions>,
556    mesh_shading: Option<ext::mesh_shader::Device>,
557}
558
559struct RayTracingDeviceExtensionFunctions {
560    acceleration_structure: khr::acceleration_structure::Device,
561    buffer_device_address: khr::buffer_device_address::Device,
562}
563
564/// Set of internal capabilities, which don't show up in the exposed
565/// device geometry, but affect the code paths taken internally.
566#[derive(Clone, Debug)]
567struct PrivateCapabilities {
568    image_view_usage: bool,
569    timeline_semaphores: bool,
570    texture_d24: bool,
571    texture_d24_s8: bool,
572    texture_s8: bool,
573    /// Ability to present contents to any screen. Only needed to work around broken platform configurations.
574    can_present: bool,
575    non_coherent_map_mask: wgt::BufferAddress,
576
577    /// True if this adapter advertises the [`robustBufferAccess`][vrba] feature.
578    ///
579    /// Note that Vulkan's `robustBufferAccess` is not sufficient to implement
580    /// `wgpu_hal`'s guarantee that shaders will not access buffer contents via
581    /// a given bindgroup binding outside that binding's [accessible
582    /// region][ar]. Enabling `robustBufferAccess` does ensure that
583    /// out-of-bounds reads and writes are not undefined behavior (that's good),
584    /// but still permits out-of-bounds reads to return data from anywhere
585    /// within the buffer, not just the accessible region.
586    ///
587    /// [ar]: ../struct.BufferBinding.html#accessible-region
588    /// [vrba]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#features-robustBufferAccess
589    robust_buffer_access: bool,
590
591    robust_image_access: bool,
592
593    /// True if this adapter supports the [`VK_EXT_robustness2`] extension's
594    /// [`robustBufferAccess2`] feature.
595    ///
596    /// This is sufficient to implement `wgpu_hal`'s [required bounds-checking][ar] of
597    /// shader accesses to buffer contents. If this feature is not available,
598    /// this backend must have Naga inject bounds checks in the generated
599    /// SPIR-V.
600    ///
601    /// [`VK_EXT_robustness2`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VK_EXT_robustness2.html
602    /// [`robustBufferAccess2`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkPhysicalDeviceRobustness2FeaturesEXT.html#features-robustBufferAccess2
603    /// [ar]: ../struct.BufferBinding.html#accessible-region
604    robust_buffer_access2: bool,
605
606    robust_image_access2: bool,
607    zero_initialize_workgroup_memory: bool,
608    image_format_list: bool,
609    maximum_samplers: u32,
610
611    /// True if this adapter supports the [`VK_KHR_shader_integer_dot_product`] extension
612    /// (promoted to Vulkan 1.3).
613    ///
614    /// This is used to generate optimized code for WGSL's `dot4{I, U}8Packed`.
615    ///
616    /// [`VK_KHR_shader_integer_dot_product`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VK_KHR_shader_integer_dot_product.html
617    shader_integer_dot_product: bool,
618
619    /// True if this adapter supports 8-bit integers provided by the
620    /// [`VK_KHR_shader_float16_int8`] extension (promoted to Vulkan 1.2).
621    ///
622    /// Allows shaders to declare the "Int8" capability. Note, however, that this
623    /// feature alone allows the use of 8-bit integers "only in the `Private`,
624    /// `Workgroup` (for non-Block variables), and `Function` storage classes"
625    /// ([see spec]). To use 8-bit integers in the interface storage classes (e.g.,
626    /// `StorageBuffer`), you also need to enable the corresponding feature in
627    /// `VkPhysicalDevice8BitStorageFeatures` and declare the corresponding SPIR-V
628    /// capability (e.g., `StorageBuffer8BitAccess`).
629    ///
630    /// [`VK_KHR_shader_float16_int8`]: https://registry.khronos.org/vulkan/specs/latest/man/html/VK_KHR_shader_float16_int8.html
631    /// [see spec]: https://registry.khronos.org/vulkan/specs/latest/man/html/VkPhysicalDeviceShaderFloat16Int8Features.html#extension-features-shaderInt8
632    shader_int8: bool,
633}
634
635bitflags::bitflags!(
636    /// Workaround flags.
637    #[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
638    pub struct Workarounds: u32 {
639        /// Only generate SPIR-V for one entry point at a time.
640        const SEPARATE_ENTRY_POINTS = 0x1;
641        /// Qualcomm OOMs when there are zero color attachments but a non-null pointer
642        /// to a subpass resolve attachment array. This nulls out that pointer in that case.
643        const EMPTY_RESOLVE_ATTACHMENT_LISTS = 0x2;
644        /// If the following code returns false, then nvidia will end up filling the wrong range.
645        ///
646        /// ```skip
647        /// fn nvidia_succeeds() -> bool {
648        ///   # let (copy_length, start_offset) = (0, 0);
649        ///     if copy_length >= 4096 {
650        ///         if start_offset % 16 != 0 {
651        ///             if copy_length == 4096 {
652        ///                 return true;
653        ///             }
654        ///             if copy_length % 16 == 0 {
655        ///                 return false;
656        ///             }
657        ///         }
658        ///     }
659        ///     true
660        /// }
661        /// ```
662        ///
663        /// As such, we need to make sure all calls to vkCmdFillBuffer are aligned to 16 bytes
664        /// if they cover a range of 4096 bytes or more.
665        const FORCE_FILL_BUFFER_WITH_SIZE_GREATER_4096_ALIGNED_OFFSET_16 = 0x4;
666    }
667);
668
669#[derive(Clone, Debug, Eq, Hash, PartialEq)]
670struct AttachmentKey {
671    format: vk::Format,
672    layout: vk::ImageLayout,
673    ops: crate::AttachmentOps,
674}
675
676impl AttachmentKey {
677    /// Returns an attachment key for a compatible attachment.
678    fn compatible(format: vk::Format, layout: vk::ImageLayout) -> Self {
679        Self {
680            format,
681            layout,
682            ops: crate::AttachmentOps::all(),
683        }
684    }
685}
686
687#[derive(Clone, Eq, Hash, PartialEq)]
688struct ColorAttachmentKey {
689    base: AttachmentKey,
690    resolve: Option<AttachmentKey>,
691}
692
693#[derive(Clone, Eq, Hash, PartialEq)]
694struct DepthStencilAttachmentKey {
695    base: AttachmentKey,
696    stencil_ops: crate::AttachmentOps,
697}
698
699#[derive(Clone, Eq, Default, Hash, PartialEq)]
700struct RenderPassKey {
701    colors: ArrayVec<Option<ColorAttachmentKey>, { crate::MAX_COLOR_ATTACHMENTS }>,
702    depth_stencil: Option<DepthStencilAttachmentKey>,
703    sample_count: u32,
704    multiview: Option<NonZeroU32>,
705}
706
707struct DeviceShared {
708    raw: ash::Device,
709    family_index: u32,
710    queue_index: u32,
711    raw_queue: vk::Queue,
712    drop_guard: Option<crate::DropGuard>,
713    instance: Arc<InstanceShared>,
714    physical_device: vk::PhysicalDevice,
715    enabled_extensions: Vec<&'static CStr>,
716    extension_fns: DeviceExtensionFunctions,
717    vendor_id: u32,
718    pipeline_cache_validation_key: [u8; 16],
719    timestamp_period: f32,
720    private_caps: PrivateCapabilities,
721    workarounds: Workarounds,
722    features: wgt::Features,
723    render_passes: Mutex<FastHashMap<RenderPassKey, vk::RenderPass>>,
724    sampler_cache: Mutex<sampler::SamplerCache>,
725    memory_allocations_counter: InternalCounter,
726
727    /// Because we have cached framebuffers which are not deleted from until
728    /// the device is destroyed, if the implementation of vulkan re-uses handles
729    /// we need some way to differentiate between the old handle and the new handle.
730    /// This factory allows us to have a dedicated identity value for each texture.
731    texture_identity_factory: ResourceIdentityFactory<vk::Image>,
732    /// As above, for texture views.
733    texture_view_identity_factory: ResourceIdentityFactory<vk::ImageView>,
734}
735
736impl Drop for DeviceShared {
737    fn drop(&mut self) {
738        for &raw in self.render_passes.lock().values() {
739            unsafe { self.raw.destroy_render_pass(raw, None) };
740        }
741        if self.drop_guard.is_none() {
742            unsafe { self.raw.destroy_device(None) };
743        }
744    }
745}
746
747pub struct Device {
748    shared: Arc<DeviceShared>,
749    mem_allocator: Mutex<gpu_alloc::GpuAllocator<vk::DeviceMemory>>,
750    desc_allocator:
751        Mutex<gpu_descriptor::DescriptorAllocator<vk::DescriptorPool, vk::DescriptorSet>>,
752    valid_ash_memory_types: u32,
753    naga_options: naga::back::spv::Options<'static>,
754    #[cfg(feature = "renderdoc")]
755    render_doc: crate::auxil::renderdoc::RenderDoc,
756    counters: Arc<wgt::HalCounters>,
757}
758
759impl Drop for Device {
760    fn drop(&mut self) {
761        unsafe { self.mem_allocator.lock().cleanup(&*self.shared) };
762        unsafe { self.desc_allocator.lock().cleanup(&*self.shared) };
763    }
764}
765
766/// Semaphores for forcing queue submissions to run in order.
767///
768/// The [`wgpu_hal::Queue`] trait promises that if two calls to [`submit`] are
769/// ordered, then the first submission will finish on the GPU before the second
770/// submission begins. To get this behavior on Vulkan we need to pass semaphores
771/// to [`vkQueueSubmit`] for the commands to wait on before beginning execution,
772/// and to signal when their execution is done.
773///
774/// Normally this can be done with a single semaphore, waited on and then
775/// signalled for each submission. At any given time there's exactly one
776/// submission that would signal the semaphore, and exactly one waiting on it,
777/// as Vulkan requires.
778///
779/// However, as of Oct 2021, bug [#5508] in the Mesa ANV drivers caused them to
780/// hang if we use a single semaphore. The workaround is to alternate between
781/// two semaphores. The bug has been fixed in Mesa, but we should probably keep
782/// the workaround until, say, Oct 2026.
783///
784/// [`wgpu_hal::Queue`]: crate::Queue
785/// [`submit`]: crate::Queue::submit
786/// [`vkQueueSubmit`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#vkQueueSubmit
787/// [#5508]: https://gitlab.freedesktop.org/mesa/mesa/-/issues/5508
788#[derive(Clone)]
789struct RelaySemaphores {
790    /// The semaphore the next submission should wait on before beginning
791    /// execution on the GPU. This is `None` for the first submission, which
792    /// should not wait on anything at all.
793    wait: Option<vk::Semaphore>,
794
795    /// The semaphore the next submission should signal when it has finished
796    /// execution on the GPU.
797    signal: vk::Semaphore,
798}
799
800impl RelaySemaphores {
801    fn new(device: &DeviceShared) -> Result<Self, crate::DeviceError> {
802        Ok(Self {
803            wait: None,
804            signal: device.new_binary_semaphore("RelaySemaphores: 1")?,
805        })
806    }
807
808    /// Advances the semaphores, returning the semaphores that should be used for a submission.
809    fn advance(&mut self, device: &DeviceShared) -> Result<Self, crate::DeviceError> {
810        let old = self.clone();
811
812        // Build the state for the next submission.
813        match self.wait {
814            None => {
815                // The `old` values describe the first submission to this queue.
816                // The second submission should wait on `old.signal`, and then
817                // signal a new semaphore which we'll create now.
818                self.wait = Some(old.signal);
819                self.signal = device.new_binary_semaphore("RelaySemaphores: 2")?;
820            }
821            Some(ref mut wait) => {
822                // What this submission signals, the next should wait.
823                mem::swap(wait, &mut self.signal);
824            }
825        };
826
827        Ok(old)
828    }
829
830    /// Destroys the semaphores.
831    unsafe fn destroy(&self, device: &ash::Device) {
832        unsafe {
833            if let Some(wait) = self.wait {
834                device.destroy_semaphore(wait, None);
835            }
836            device.destroy_semaphore(self.signal, None);
837        }
838    }
839}
840
841pub struct Queue {
842    raw: vk::Queue,
843    swapchain_fn: khr::swapchain::Device,
844    device: Arc<DeviceShared>,
845    family_index: u32,
846    relay_semaphores: Mutex<RelaySemaphores>,
847    signal_semaphores: Mutex<SemaphoreList>,
848}
849
850impl Queue {
851    pub fn as_raw(&self) -> vk::Queue {
852        self.raw
853    }
854}
855
856impl Drop for Queue {
857    fn drop(&mut self) {
858        unsafe { self.relay_semaphores.lock().destroy(&self.device.raw) };
859    }
860}
861#[derive(Debug)]
862enum BufferMemoryBacking {
863    Managed(gpu_alloc::MemoryBlock<vk::DeviceMemory>),
864    VulkanMemory {
865        memory: vk::DeviceMemory,
866        offset: u64,
867        size: u64,
868    },
869}
870impl BufferMemoryBacking {
871    fn memory(&self) -> &vk::DeviceMemory {
872        match self {
873            Self::Managed(m) => m.memory(),
874            Self::VulkanMemory { memory, .. } => memory,
875        }
876    }
877    fn offset(&self) -> u64 {
878        match self {
879            Self::Managed(m) => m.offset(),
880            Self::VulkanMemory { offset, .. } => *offset,
881        }
882    }
883    fn size(&self) -> u64 {
884        match self {
885            Self::Managed(m) => m.size(),
886            Self::VulkanMemory { size, .. } => *size,
887        }
888    }
889}
890#[derive(Debug)]
891pub struct Buffer {
892    raw: vk::Buffer,
893    block: Option<Mutex<BufferMemoryBacking>>,
894}
895impl Buffer {
896    /// # Safety
897    ///
898    /// - `vk_buffer`'s memory must be managed by the caller
899    /// - Externally imported buffers can't be mapped by `wgpu`
900    pub unsafe fn from_raw(vk_buffer: vk::Buffer) -> Self {
901        Self {
902            raw: vk_buffer,
903            block: None,
904        }
905    }
906    /// # Safety
907    /// - We will use this buffer and the buffer's backing memory range as if we have exclusive ownership over it, until the wgpu resource is dropped and the wgpu-hal object is cleaned up
908    /// - Externally imported buffers can't be mapped by `wgpu`
909    /// - `offset` and `size` must be valid with the allocation of `memory`
910    pub unsafe fn from_raw_managed(
911        vk_buffer: vk::Buffer,
912        memory: vk::DeviceMemory,
913        offset: u64,
914        size: u64,
915    ) -> Self {
916        Self {
917            raw: vk_buffer,
918            block: Some(Mutex::new(BufferMemoryBacking::VulkanMemory {
919                memory,
920                offset,
921                size,
922            })),
923        }
924    }
925}
926
927impl crate::DynBuffer for Buffer {}
928
929#[derive(Debug)]
930pub struct AccelerationStructure {
931    raw: vk::AccelerationStructureKHR,
932    buffer: vk::Buffer,
933    block: Mutex<gpu_alloc::MemoryBlock<vk::DeviceMemory>>,
934    compacted_size_query: Option<vk::QueryPool>,
935}
936
937impl crate::DynAccelerationStructure for AccelerationStructure {}
938
939#[derive(Debug)]
940pub struct Texture {
941    raw: vk::Image,
942    drop_guard: Option<crate::DropGuard>,
943    external_memory: Option<vk::DeviceMemory>,
944    block: Option<gpu_alloc::MemoryBlock<vk::DeviceMemory>>,
945    format: wgt::TextureFormat,
946    copy_size: crate::CopyExtent,
947    identity: ResourceIdentity<vk::Image>,
948}
949
950impl crate::DynTexture for Texture {}
951
952impl Texture {
953    /// # Safety
954    ///
955    /// - The image handle must not be manually destroyed
956    pub unsafe fn raw_handle(&self) -> vk::Image {
957        self.raw
958    }
959}
960
961#[derive(Debug)]
962pub struct TextureView {
963    raw_texture: vk::Image,
964    raw: vk::ImageView,
965    layers: NonZeroU32,
966    format: wgt::TextureFormat,
967    raw_format: vk::Format,
968    base_mip_level: u32,
969    dimension: wgt::TextureViewDimension,
970    texture_identity: ResourceIdentity<vk::Image>,
971    view_identity: ResourceIdentity<vk::ImageView>,
972}
973
974impl crate::DynTextureView for TextureView {}
975
976impl TextureView {
977    /// # Safety
978    ///
979    /// - The image view handle must not be manually destroyed
980    pub unsafe fn raw_handle(&self) -> vk::ImageView {
981        self.raw
982    }
983
984    /// Returns the raw texture view, along with its identity.
985    fn identified_raw_view(&self) -> IdentifiedTextureView {
986        IdentifiedTextureView {
987            raw: self.raw,
988            identity: self.view_identity,
989        }
990    }
991}
992
993#[derive(Debug)]
994pub struct Sampler {
995    raw: vk::Sampler,
996    create_info: vk::SamplerCreateInfo<'static>,
997}
998
999impl crate::DynSampler for Sampler {}
1000
1001#[derive(Debug)]
1002pub struct BindGroupLayout {
1003    raw: vk::DescriptorSetLayout,
1004    desc_count: gpu_descriptor::DescriptorTotalCount,
1005    types: Box<[(vk::DescriptorType, u32)]>,
1006    /// Map of binding index to size,
1007    binding_arrays: Vec<(u32, NonZeroU32)>,
1008}
1009
1010impl crate::DynBindGroupLayout for BindGroupLayout {}
1011
1012#[derive(Debug)]
1013pub struct PipelineLayout {
1014    raw: vk::PipelineLayout,
1015    binding_arrays: naga::back::spv::BindingMap,
1016}
1017
1018impl crate::DynPipelineLayout for PipelineLayout {}
1019
1020#[derive(Debug)]
1021pub struct BindGroup {
1022    set: gpu_descriptor::DescriptorSet<vk::DescriptorSet>,
1023}
1024
1025impl crate::DynBindGroup for BindGroup {}
1026
1027/// Miscellaneous allocation recycling pool for `CommandAllocator`.
1028#[derive(Default)]
1029struct Temp {
1030    marker: Vec<u8>,
1031    buffer_barriers: Vec<vk::BufferMemoryBarrier<'static>>,
1032    image_barriers: Vec<vk::ImageMemoryBarrier<'static>>,
1033}
1034
1035impl Temp {
1036    fn clear(&mut self) {
1037        self.marker.clear();
1038        self.buffer_barriers.clear();
1039        self.image_barriers.clear();
1040    }
1041
1042    fn make_c_str(&mut self, name: &str) -> &CStr {
1043        self.marker.clear();
1044        self.marker.extend_from_slice(name.as_bytes());
1045        self.marker.push(0);
1046        unsafe { CStr::from_bytes_with_nul_unchecked(&self.marker) }
1047    }
1048}
1049
1050/// Generates unique IDs for each resource of type `T`.
1051///
1052/// Because vk handles are not permanently unique, this
1053/// provides a way to generate unique IDs for each resource.
1054struct ResourceIdentityFactory<T> {
1055    #[cfg(not(target_has_atomic = "64"))]
1056    next_id: Mutex<u64>,
1057    #[cfg(target_has_atomic = "64")]
1058    next_id: core::sync::atomic::AtomicU64,
1059    _phantom: PhantomData<T>,
1060}
1061
1062impl<T> ResourceIdentityFactory<T> {
1063    fn new() -> Self {
1064        Self {
1065            #[cfg(not(target_has_atomic = "64"))]
1066            next_id: Mutex::new(0),
1067            #[cfg(target_has_atomic = "64")]
1068            next_id: core::sync::atomic::AtomicU64::new(0),
1069            _phantom: PhantomData,
1070        }
1071    }
1072
1073    /// Returns a new unique ID for a resource of type `T`.
1074    fn next(&self) -> ResourceIdentity<T> {
1075        #[cfg(not(target_has_atomic = "64"))]
1076        {
1077            let mut next_id = self.next_id.lock();
1078            let id = *next_id;
1079            *next_id += 1;
1080            ResourceIdentity {
1081                id,
1082                _phantom: PhantomData,
1083            }
1084        }
1085
1086        #[cfg(target_has_atomic = "64")]
1087        ResourceIdentity {
1088            id: self
1089                .next_id
1090                .fetch_add(1, core::sync::atomic::Ordering::Relaxed),
1091            _phantom: PhantomData,
1092        }
1093    }
1094}
1095
1096/// A unique identifier for a resource of type `T`.
1097///
1098/// This is used as a hashable key for resources, which
1099/// is permanently unique through the lifetime of the program.
1100#[derive(Debug, Copy, Clone, Eq, Hash, PartialEq)]
1101struct ResourceIdentity<T> {
1102    id: u64,
1103    _phantom: PhantomData<T>,
1104}
1105
1106#[derive(Clone, Eq, Hash, PartialEq)]
1107struct FramebufferKey {
1108    raw_pass: vk::RenderPass,
1109    /// Because this is used as a key in a hash map, we need to include the identity
1110    /// so that this hashes differently, even if the ImageView handles are the same
1111    /// between different views.
1112    attachment_identities: ArrayVec<ResourceIdentity<vk::ImageView>, { MAX_TOTAL_ATTACHMENTS }>,
1113    /// While this is redundant for calculating the hash, we need access to an array
1114    /// of all the raw ImageViews when we are creating the actual framebuffer,
1115    /// so we store this here.
1116    attachment_views: ArrayVec<vk::ImageView, { MAX_TOTAL_ATTACHMENTS }>,
1117    extent: wgt::Extent3d,
1118}
1119
1120impl FramebufferKey {
1121    fn push_view(&mut self, view: IdentifiedTextureView) {
1122        self.attachment_identities.push(view.identity);
1123        self.attachment_views.push(view.raw);
1124    }
1125}
1126
1127/// A texture view paired with its identity.
1128#[derive(Copy, Clone)]
1129struct IdentifiedTextureView {
1130    raw: vk::ImageView,
1131    identity: ResourceIdentity<vk::ImageView>,
1132}
1133
1134#[derive(Clone, Eq, Hash, PartialEq)]
1135struct TempTextureViewKey {
1136    texture: vk::Image,
1137    /// As this is used in a hashmap, we need to
1138    /// include the identity so that this hashes differently,
1139    /// even if the Image handles are the same between different images.
1140    texture_identity: ResourceIdentity<vk::Image>,
1141    format: vk::Format,
1142    mip_level: u32,
1143    depth_slice: u32,
1144}
1145
1146pub struct CommandEncoder {
1147    raw: vk::CommandPool,
1148    device: Arc<DeviceShared>,
1149
1150    /// The current command buffer, if `self` is in the ["recording"]
1151    /// state.
1152    ///
1153    /// ["recording"]: crate::CommandEncoder
1154    ///
1155    /// If non-`null`, the buffer is in the Vulkan "recording" state.
1156    active: vk::CommandBuffer,
1157
1158    /// What kind of pass we are currently within: compute or render.
1159    bind_point: vk::PipelineBindPoint,
1160
1161    /// Allocation recycling pool for this encoder.
1162    temp: Temp,
1163
1164    /// A pool of available command buffers.
1165    ///
1166    /// These are all in the Vulkan "initial" state.
1167    free: Vec<vk::CommandBuffer>,
1168
1169    /// A pool of discarded command buffers.
1170    ///
1171    /// These could be in any Vulkan state except "pending".
1172    discarded: Vec<vk::CommandBuffer>,
1173
1174    /// If this is true, the active renderpass enabled a debug span,
1175    /// and needs to be disabled on renderpass close.
1176    rpass_debug_marker_active: bool,
1177
1178    /// If set, the end of the next render/compute pass will write a timestamp at
1179    /// the given pool & location.
1180    end_of_pass_timer_query: Option<(vk::QueryPool, u32)>,
1181
1182    framebuffers: FastHashMap<FramebufferKey, vk::Framebuffer>,
1183    temp_texture_views: FastHashMap<TempTextureViewKey, IdentifiedTextureView>,
1184
1185    counters: Arc<wgt::HalCounters>,
1186}
1187
1188impl Drop for CommandEncoder {
1189    fn drop(&mut self) {
1190        // SAFETY:
1191        //
1192        // VUID-vkDestroyCommandPool-commandPool-00041: wgpu_hal requires that a
1193        // `CommandBuffer` must live until its execution is complete, and that a
1194        // `CommandBuffer` must not outlive the `CommandEncoder` that built it.
1195        // Thus, we know that none of our `CommandBuffers` are in the "pending"
1196        // state.
1197        //
1198        // The other VUIDs are pretty obvious.
1199        unsafe {
1200            // `vkDestroyCommandPool` also frees any command buffers allocated
1201            // from that pool, so there's no need to explicitly call
1202            // `vkFreeCommandBuffers` on `cmd_encoder`'s `free` and `discarded`
1203            // fields.
1204            self.device.raw.destroy_command_pool(self.raw, None);
1205        }
1206
1207        for (_, fb) in self.framebuffers.drain() {
1208            unsafe { self.device.raw.destroy_framebuffer(fb, None) };
1209        }
1210
1211        for (_, view) in self.temp_texture_views.drain() {
1212            unsafe { self.device.raw.destroy_image_view(view.raw, None) };
1213        }
1214
1215        self.counters.command_encoders.sub(1);
1216    }
1217}
1218
1219impl CommandEncoder {
1220    /// # Safety
1221    ///
1222    /// - The command buffer handle must not be manually destroyed
1223    pub unsafe fn raw_handle(&self) -> vk::CommandBuffer {
1224        self.active
1225    }
1226}
1227
1228impl fmt::Debug for CommandEncoder {
1229    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1230        f.debug_struct("CommandEncoder")
1231            .field("raw", &self.raw)
1232            .finish()
1233    }
1234}
1235
1236#[derive(Debug)]
1237pub struct CommandBuffer {
1238    raw: vk::CommandBuffer,
1239}
1240
1241impl crate::DynCommandBuffer for CommandBuffer {}
1242
1243#[derive(Debug)]
1244#[allow(clippy::large_enum_variant)]
1245pub enum ShaderModule {
1246    Raw(vk::ShaderModule),
1247    Intermediate {
1248        naga_shader: crate::NagaShader,
1249        runtime_checks: wgt::ShaderRuntimeChecks,
1250    },
1251}
1252
1253impl crate::DynShaderModule for ShaderModule {}
1254
1255#[derive(Debug)]
1256pub struct RenderPipeline {
1257    raw: vk::Pipeline,
1258}
1259
1260impl crate::DynRenderPipeline for RenderPipeline {}
1261
1262#[derive(Debug)]
1263pub struct ComputePipeline {
1264    raw: vk::Pipeline,
1265}
1266
1267impl crate::DynComputePipeline for ComputePipeline {}
1268
1269#[derive(Debug)]
1270pub struct PipelineCache {
1271    raw: vk::PipelineCache,
1272}
1273
1274impl crate::DynPipelineCache for PipelineCache {}
1275
1276#[derive(Debug)]
1277pub struct QuerySet {
1278    raw: vk::QueryPool,
1279}
1280
1281impl crate::DynQuerySet for QuerySet {}
1282
1283/// The [`Api::Fence`] type for [`vulkan::Api`].
1284///
1285/// This is an `enum` because there are two possible implementations of
1286/// `wgpu-hal` fences on Vulkan: Vulkan fences, which work on any version of
1287/// Vulkan, and Vulkan timeline semaphores, which are easier and cheaper but
1288/// require non-1.0 features.
1289///
1290/// [`Device::create_fence`] returns a [`TimelineSemaphore`] if
1291/// [`VK_KHR_timeline_semaphore`] is available and enabled, and a [`FencePool`]
1292/// otherwise.
1293///
1294/// [`Api::Fence`]: crate::Api::Fence
1295/// [`vulkan::Api`]: Api
1296/// [`Device::create_fence`]: crate::Device::create_fence
1297/// [`TimelineSemaphore`]: Fence::TimelineSemaphore
1298/// [`VK_KHR_timeline_semaphore`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#VK_KHR_timeline_semaphore
1299/// [`FencePool`]: Fence::FencePool
1300#[derive(Debug)]
1301pub enum Fence {
1302    /// A Vulkan [timeline semaphore].
1303    ///
1304    /// These are simpler to use than Vulkan fences, since timeline semaphores
1305    /// work exactly the way [`wpgu_hal::Api::Fence`] is specified to work.
1306    ///
1307    /// [timeline semaphore]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#synchronization-semaphores
1308    /// [`wpgu_hal::Api::Fence`]: crate::Api::Fence
1309    TimelineSemaphore(vk::Semaphore),
1310
1311    /// A collection of Vulkan [fence]s, each associated with a [`FenceValue`].
1312    ///
1313    /// The effective [`FenceValue`] of this variant is the greater of
1314    /// `last_completed` and the maximum value associated with a signalled fence
1315    /// in `active`.
1316    ///
1317    /// Fences are available in all versions of Vulkan, but since they only have
1318    /// two states, "signaled" and "unsignaled", we need to use a separate fence
1319    /// for each queue submission we might want to wait for, and remember which
1320    /// [`FenceValue`] each one represents.
1321    ///
1322    /// [fence]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#synchronization-fences
1323    /// [`FenceValue`]: crate::FenceValue
1324    FencePool {
1325        last_completed: crate::FenceValue,
1326        /// The pending fence values have to be ascending.
1327        active: Vec<(crate::FenceValue, vk::Fence)>,
1328        free: Vec<vk::Fence>,
1329    },
1330}
1331
1332impl crate::DynFence for Fence {}
1333
1334impl Fence {
1335    /// Return the highest [`FenceValue`] among the signalled fences in `active`.
1336    ///
1337    /// As an optimization, assume that we already know that the fence has
1338    /// reached `last_completed`, and don't bother checking fences whose values
1339    /// are less than that: those fences remain in the `active` array only
1340    /// because we haven't called `maintain` yet to clean them up.
1341    ///
1342    /// [`FenceValue`]: crate::FenceValue
1343    fn check_active(
1344        device: &ash::Device,
1345        mut last_completed: crate::FenceValue,
1346        active: &[(crate::FenceValue, vk::Fence)],
1347    ) -> Result<crate::FenceValue, crate::DeviceError> {
1348        for &(value, raw) in active.iter() {
1349            unsafe {
1350                if value > last_completed
1351                    && device
1352                        .get_fence_status(raw)
1353                        .map_err(map_host_device_oom_and_lost_err)?
1354                {
1355                    last_completed = value;
1356                }
1357            }
1358        }
1359        Ok(last_completed)
1360    }
1361
1362    /// Return the highest signalled [`FenceValue`] for `self`.
1363    ///
1364    /// [`FenceValue`]: crate::FenceValue
1365    fn get_latest(
1366        &self,
1367        device: &ash::Device,
1368        extension: Option<&ExtensionFn<khr::timeline_semaphore::Device>>,
1369    ) -> Result<crate::FenceValue, crate::DeviceError> {
1370        match *self {
1371            Self::TimelineSemaphore(raw) => unsafe {
1372                Ok(match *extension.unwrap() {
1373                    ExtensionFn::Extension(ref ext) => ext
1374                        .get_semaphore_counter_value(raw)
1375                        .map_err(map_host_device_oom_and_lost_err)?,
1376                    ExtensionFn::Promoted => device
1377                        .get_semaphore_counter_value(raw)
1378                        .map_err(map_host_device_oom_and_lost_err)?,
1379                })
1380            },
1381            Self::FencePool {
1382                last_completed,
1383                ref active,
1384                free: _,
1385            } => Self::check_active(device, last_completed, active),
1386        }
1387    }
1388
1389    /// Trim the internal state of this [`Fence`].
1390    ///
1391    /// This function has no externally visible effect, but you should call it
1392    /// periodically to keep this fence's resource consumption under control.
1393    ///
1394    /// For fences using the [`FencePool`] implementation, this function
1395    /// recycles fences that have been signaled. If you don't call this,
1396    /// [`Queue::submit`] will just keep allocating a new Vulkan fence every
1397    /// time it's called.
1398    ///
1399    /// [`FencePool`]: Fence::FencePool
1400    /// [`Queue::submit`]: crate::Queue::submit
1401    fn maintain(&mut self, device: &ash::Device) -> Result<(), crate::DeviceError> {
1402        match *self {
1403            Self::TimelineSemaphore(_) => {}
1404            Self::FencePool {
1405                ref mut last_completed,
1406                ref mut active,
1407                ref mut free,
1408            } => {
1409                let latest = Self::check_active(device, *last_completed, active)?;
1410                let base_free = free.len();
1411                for &(value, raw) in active.iter() {
1412                    if value <= latest {
1413                        free.push(raw);
1414                    }
1415                }
1416                if free.len() != base_free {
1417                    active.retain(|&(value, _)| value > latest);
1418                    unsafe { device.reset_fences(&free[base_free..]) }
1419                        .map_err(map_device_oom_err)?
1420                }
1421                *last_completed = latest;
1422            }
1423        }
1424        Ok(())
1425    }
1426}
1427
1428impl crate::Queue for Queue {
1429    type A = Api;
1430
1431    unsafe fn submit(
1432        &self,
1433        command_buffers: &[&CommandBuffer],
1434        surface_textures: &[&SurfaceTexture],
1435        (signal_fence, signal_value): (&mut Fence, crate::FenceValue),
1436    ) -> Result<(), crate::DeviceError> {
1437        let mut fence_raw = vk::Fence::null();
1438
1439        let mut wait_stage_masks = Vec::new();
1440        let mut wait_semaphores = Vec::new();
1441        let mut signal_semaphores = SemaphoreList::default();
1442
1443        // Double check that the same swapchain image isn't being given to us multiple times,
1444        // as that will deadlock when we try to lock them all.
1445        debug_assert!(
1446            {
1447                let mut check = HashSet::with_capacity(surface_textures.len());
1448                // We compare the Arcs by pointer, as Eq isn't well defined for SurfaceSemaphores.
1449                for st in surface_textures {
1450                    check.insert(Arc::as_ptr(&st.acquire_semaphores) as usize);
1451                    check.insert(Arc::as_ptr(&st.present_semaphores) as usize);
1452                }
1453                check.len() == surface_textures.len() * 2
1454            },
1455            "More than one surface texture is being used from the same swapchain. This will cause a deadlock in release."
1456        );
1457
1458        let locked_swapchain_semaphores = surface_textures
1459            .iter()
1460            .map(|st| {
1461                let acquire = st
1462                    .acquire_semaphores
1463                    .try_lock()
1464                    .expect("Failed to lock surface acquire semaphore");
1465                let present = st
1466                    .present_semaphores
1467                    .try_lock()
1468                    .expect("Failed to lock surface present semaphore");
1469
1470                (acquire, present)
1471            })
1472            .collect::<Vec<_>>();
1473
1474        for (mut acquire_semaphore, mut present_semaphores) in locked_swapchain_semaphores {
1475            acquire_semaphore.set_used_fence_value(signal_value);
1476
1477            // If we're the first submission to operate on this image, wait on
1478            // its acquire semaphore, to make sure the presentation engine is
1479            // done with it.
1480            if let Some(sem) = acquire_semaphore.get_acquire_wait_semaphore() {
1481                wait_stage_masks.push(vk::PipelineStageFlags::TOP_OF_PIPE);
1482                wait_semaphores.push(sem);
1483            }
1484
1485            // Get a semaphore to signal when we're done writing to this surface
1486            // image. Presentation of this image will wait for this.
1487            let signal_semaphore = present_semaphores.get_submit_signal_semaphore(&self.device)?;
1488            signal_semaphores.push_binary(signal_semaphore);
1489        }
1490
1491        let mut guard = self.signal_semaphores.lock();
1492        if !guard.is_empty() {
1493            signal_semaphores.append(&mut guard);
1494        }
1495
1496        // In order for submissions to be strictly ordered, we encode a dependency between each submission
1497        // using a pair of semaphores. This adds a wait if it is needed, and signals the next semaphore.
1498        let semaphore_state = self.relay_semaphores.lock().advance(&self.device)?;
1499
1500        if let Some(sem) = semaphore_state.wait {
1501            wait_stage_masks.push(vk::PipelineStageFlags::TOP_OF_PIPE);
1502            wait_semaphores.push(sem);
1503        }
1504
1505        signal_semaphores.push_binary(semaphore_state.signal);
1506
1507        // We need to signal our wgpu::Fence if we have one, this adds it to the signal list.
1508        signal_fence.maintain(&self.device.raw)?;
1509        match *signal_fence {
1510            Fence::TimelineSemaphore(raw) => {
1511                signal_semaphores.push_timeline(raw, signal_value);
1512            }
1513            Fence::FencePool {
1514                ref mut active,
1515                ref mut free,
1516                ..
1517            } => {
1518                fence_raw = match free.pop() {
1519                    Some(raw) => raw,
1520                    None => unsafe {
1521                        self.device
1522                            .raw
1523                            .create_fence(&vk::FenceCreateInfo::default(), None)
1524                            .map_err(map_host_device_oom_err)?
1525                    },
1526                };
1527                active.push((signal_value, fence_raw));
1528            }
1529        }
1530
1531        let vk_cmd_buffers = command_buffers
1532            .iter()
1533            .map(|cmd| cmd.raw)
1534            .collect::<Vec<_>>();
1535
1536        let mut vk_info = vk::SubmitInfo::default().command_buffers(&vk_cmd_buffers);
1537
1538        vk_info = vk_info
1539            .wait_semaphores(&wait_semaphores)
1540            .wait_dst_stage_mask(&wait_stage_masks);
1541
1542        let mut vk_timeline_info = mem::MaybeUninit::uninit();
1543        vk_info = signal_semaphores.add_to_submit(vk_info, &mut vk_timeline_info);
1544
1545        profiling::scope!("vkQueueSubmit");
1546        unsafe {
1547            self.device
1548                .raw
1549                .queue_submit(self.raw, &[vk_info], fence_raw)
1550                .map_err(map_host_device_oom_and_lost_err)?
1551        };
1552        Ok(())
1553    }
1554
1555    unsafe fn present(
1556        &self,
1557        surface: &Surface,
1558        texture: SurfaceTexture,
1559    ) -> Result<(), crate::SurfaceError> {
1560        let mut swapchain = surface.swapchain.write();
1561        let ssc = swapchain.as_mut().unwrap();
1562        let mut acquire_semaphore = texture.acquire_semaphores.lock();
1563        let mut present_semaphores = texture.present_semaphores.lock();
1564
1565        let wait_semaphores = present_semaphores.get_present_wait_semaphores();
1566
1567        // Reset the acquire and present semaphores internal state
1568        // to be ready for the next frame.
1569        //
1570        // We do this before the actual call to present to ensure that
1571        // even if this method errors and early outs, we have reset
1572        // the state for next frame.
1573        acquire_semaphore.end_semaphore_usage();
1574        present_semaphores.end_semaphore_usage();
1575
1576        drop(acquire_semaphore);
1577
1578        let swapchains = [ssc.raw];
1579        let image_indices = [texture.index];
1580        let vk_info = vk::PresentInfoKHR::default()
1581            .swapchains(&swapchains)
1582            .image_indices(&image_indices)
1583            .wait_semaphores(&wait_semaphores);
1584
1585        let mut display_timing;
1586        let present_times;
1587        let vk_info = if let Some(present_time) = ssc.next_present_time.take() {
1588            debug_assert!(
1589                ssc.device
1590                    .features
1591                    .contains(wgt::Features::VULKAN_GOOGLE_DISPLAY_TIMING),
1592                "`next_present_time` should only be set if `VULKAN_GOOGLE_DISPLAY_TIMING` is enabled"
1593            );
1594            present_times = [present_time];
1595            display_timing = vk::PresentTimesInfoGOOGLE::default().times(&present_times);
1596            // SAFETY: We know that VK_GOOGLE_display_timing is present because of the safety contract on `next_present_time`.
1597            vk_info.push_next(&mut display_timing)
1598        } else {
1599            vk_info
1600        };
1601
1602        let suboptimal = {
1603            profiling::scope!("vkQueuePresentKHR");
1604            unsafe { self.swapchain_fn.queue_present(self.raw, &vk_info) }.map_err(|error| {
1605                match error {
1606                    vk::Result::ERROR_OUT_OF_DATE_KHR => crate::SurfaceError::Outdated,
1607                    vk::Result::ERROR_SURFACE_LOST_KHR => crate::SurfaceError::Lost,
1608                    // We don't use VK_EXT_full_screen_exclusive
1609                    // VK_ERROR_FULL_SCREEN_EXCLUSIVE_MODE_LOST_EXT
1610                    _ => map_host_device_oom_and_lost_err(error).into(),
1611                }
1612            })?
1613        };
1614        if suboptimal {
1615            // We treat `VK_SUBOPTIMAL_KHR` as `VK_SUCCESS` on Android.
1616            // On Android 10+, libvulkan's `vkQueuePresentKHR` implementation returns `VK_SUBOPTIMAL_KHR` if not doing pre-rotation
1617            // (i.e `VkSwapchainCreateInfoKHR::preTransform` not being equal to the current device orientation).
1618            // This is always the case when the device orientation is anything other than the identity one, as we unconditionally use `VK_SURFACE_TRANSFORM_IDENTITY_BIT_KHR`.
1619            #[cfg(not(target_os = "android"))]
1620            log::warn!("Suboptimal present of frame {}", texture.index);
1621        }
1622        Ok(())
1623    }
1624
1625    unsafe fn get_timestamp_period(&self) -> f32 {
1626        self.device.timestamp_period
1627    }
1628}
1629
1630impl Queue {
1631    pub fn raw_device(&self) -> &ash::Device {
1632        &self.device.raw
1633    }
1634
1635    pub fn add_signal_semaphore(&self, semaphore: vk::Semaphore, semaphore_value: Option<u64>) {
1636        let mut guard = self.signal_semaphores.lock();
1637        if let Some(value) = semaphore_value {
1638            guard.push_timeline(semaphore, value);
1639        } else {
1640            guard.push_binary(semaphore);
1641        }
1642    }
1643}
1644
1645/// Maps
1646///
1647/// - VK_ERROR_OUT_OF_HOST_MEMORY
1648/// - VK_ERROR_OUT_OF_DEVICE_MEMORY
1649fn map_host_device_oom_err(err: vk::Result) -> crate::DeviceError {
1650    match err {
1651        vk::Result::ERROR_OUT_OF_HOST_MEMORY | vk::Result::ERROR_OUT_OF_DEVICE_MEMORY => {
1652            get_oom_err(err)
1653        }
1654        e => get_unexpected_err(e),
1655    }
1656}
1657
1658/// Maps
1659///
1660/// - VK_ERROR_OUT_OF_HOST_MEMORY
1661/// - VK_ERROR_OUT_OF_DEVICE_MEMORY
1662/// - VK_ERROR_DEVICE_LOST
1663fn map_host_device_oom_and_lost_err(err: vk::Result) -> crate::DeviceError {
1664    match err {
1665        vk::Result::ERROR_DEVICE_LOST => get_lost_err(),
1666        other => map_host_device_oom_err(other),
1667    }
1668}
1669
1670/// Maps
1671///
1672/// - VK_ERROR_OUT_OF_HOST_MEMORY
1673/// - VK_ERROR_OUT_OF_DEVICE_MEMORY
1674/// - VK_ERROR_INVALID_OPAQUE_CAPTURE_ADDRESS_KHR
1675fn map_host_device_oom_and_ioca_err(err: vk::Result) -> crate::DeviceError {
1676    // We don't use VK_KHR_buffer_device_address
1677    // VK_ERROR_INVALID_OPAQUE_CAPTURE_ADDRESS_KHR
1678    map_host_device_oom_err(err)
1679}
1680
1681/// Maps
1682///
1683/// - VK_ERROR_OUT_OF_HOST_MEMORY
1684fn map_host_oom_err(err: vk::Result) -> crate::DeviceError {
1685    match err {
1686        vk::Result::ERROR_OUT_OF_HOST_MEMORY => get_oom_err(err),
1687        e => get_unexpected_err(e),
1688    }
1689}
1690
1691/// Maps
1692///
1693/// - VK_ERROR_OUT_OF_DEVICE_MEMORY
1694fn map_device_oom_err(err: vk::Result) -> crate::DeviceError {
1695    match err {
1696        vk::Result::ERROR_OUT_OF_DEVICE_MEMORY => get_oom_err(err),
1697        e => get_unexpected_err(e),
1698    }
1699}
1700
1701/// Maps
1702///
1703/// - VK_ERROR_OUT_OF_HOST_MEMORY
1704/// - VK_ERROR_INVALID_OPAQUE_CAPTURE_ADDRESS_KHR
1705fn map_host_oom_and_ioca_err(err: vk::Result) -> crate::DeviceError {
1706    // We don't use VK_KHR_buffer_device_address
1707    // VK_ERROR_INVALID_OPAQUE_CAPTURE_ADDRESS_KHR
1708    map_host_oom_err(err)
1709}
1710
1711/// Maps
1712///
1713/// - VK_ERROR_OUT_OF_HOST_MEMORY
1714/// - VK_ERROR_OUT_OF_DEVICE_MEMORY
1715/// - VK_PIPELINE_COMPILE_REQUIRED_EXT
1716/// - VK_ERROR_INVALID_SHADER_NV
1717fn map_pipeline_err(err: vk::Result) -> crate::DeviceError {
1718    // We don't use VK_EXT_pipeline_creation_cache_control
1719    // VK_PIPELINE_COMPILE_REQUIRED_EXT
1720    // We don't use VK_NV_glsl_shader
1721    // VK_ERROR_INVALID_SHADER_NV
1722    map_host_device_oom_err(err)
1723}
1724
1725/// Returns [`crate::DeviceError::Unexpected`] or panics if the `internal_error_panic`
1726/// feature flag is enabled.
1727fn get_unexpected_err(_err: vk::Result) -> crate::DeviceError {
1728    #[cfg(feature = "internal_error_panic")]
1729    panic!("Unexpected Vulkan error: {_err:?}");
1730
1731    #[allow(unreachable_code)]
1732    crate::DeviceError::Unexpected
1733}
1734
1735/// Returns [`crate::DeviceError::OutOfMemory`].
1736fn get_oom_err(_err: vk::Result) -> crate::DeviceError {
1737    crate::DeviceError::OutOfMemory
1738}
1739
1740/// Returns [`crate::DeviceError::Lost`] or panics if the `device_lost_panic`
1741/// feature flag is enabled.
1742fn get_lost_err() -> crate::DeviceError {
1743    #[cfg(feature = "device_lost_panic")]
1744    panic!("Device lost");
1745
1746    #[allow(unreachable_code)]
1747    crate::DeviceError::Lost
1748}
1749
1750#[derive(Clone, Copy, Pod, Zeroable)]
1751#[repr(C)]
1752struct RawTlasInstance {
1753    transform: [f32; 12],
1754    custom_data_and_mask: u32,
1755    shader_binding_table_record_offset_and_flags: u32,
1756    acceleration_structure_reference: u64,
1757}
1758
1759/// Arguments to the [`CreateDeviceCallback`].
1760pub struct CreateDeviceCallbackArgs<'arg, 'pnext, 'this>
1761where
1762    'this: 'pnext,
1763{
1764    /// The extensions to enable for the device. You must not remove anything from this list,
1765    /// but you may add to it.
1766    pub extensions: &'arg mut Vec<&'static CStr>,
1767    /// The physical device features to enable. You may enable features, but must not disable any.
1768    pub device_features: &'arg mut PhysicalDeviceFeatures,
1769    /// The queue create infos for the device. You may add or modify queue create infos as needed.
1770    pub queue_create_infos: &'arg mut Vec<vk::DeviceQueueCreateInfo<'pnext>>,
1771    /// The create info for the device. You may add or modify things in the pnext chain, but
1772    /// do not turn features off. Additionally, do not add things to the list of extensions,
1773    /// or to the feature set, as all changes to that member will be overwritten.
1774    pub create_info: &'arg mut vk::DeviceCreateInfo<'pnext>,
1775    /// We need to have `'this` in the struct, so we can declare that all lifetimes coming from
1776    /// captures in the closure will live longer (and hence satisfy) `'pnext`. However, we
1777    /// don't actually directly use `'this`
1778    _phantom: PhantomData<&'this ()>,
1779}
1780
1781/// Callback to allow changing the vulkan device creation parameters.
1782///
1783/// # Safety:
1784/// - If you want to add extensions, add the to the `Vec<'static CStr>` not the create info,
1785///   as the create info value will be overwritten.
1786/// - Callback must not remove features.
1787/// - Callback must not change anything to what the instance does not support.
1788pub type CreateDeviceCallback<'this> =
1789    dyn for<'arg, 'pnext> FnOnce(CreateDeviceCallbackArgs<'arg, 'pnext, 'this>) + 'this;
1790
1791/// Arguments to the [`CreateInstanceCallback`].
1792pub struct CreateInstanceCallbackArgs<'arg, 'pnext, 'this>
1793where
1794    'this: 'pnext,
1795{
1796    /// The extensions to enable for the instance. You must not remove anything from this list,
1797    /// but you may add to it.
1798    pub extensions: &'arg mut Vec<&'static CStr>,
1799    /// The create info for the instance. You may add or modify things in the pnext chain, but
1800    /// do not turn features off. Additionally, do not add things to the list of extensions,
1801    /// all changes to that member will be overwritten.
1802    pub create_info: &'arg mut vk::InstanceCreateInfo<'pnext>,
1803    /// Vulkan entry point.
1804    pub entry: &'arg ash::Entry,
1805    /// We need to have `'this` in the struct, so we can declare that all lifetimes coming from
1806    /// captures in the closure will live longer (and hence satisfy) `'pnext`. However, we
1807    /// don't actually directly use `'this`
1808    _phantom: PhantomData<&'this ()>,
1809}
1810
1811/// Callback to allow changing the vulkan instance creation parameters.
1812///
1813/// # Safety:
1814/// - If you want to add extensions, add the to the `Vec<'static CStr>` not the create info,
1815///   as the create info value will be overwritten.
1816/// - Callback must not remove features.
1817/// - Callback must not change anything to what the instance does not support.
1818pub type CreateInstanceCallback<'this> =
1819    dyn for<'arg, 'pnext> FnOnce(CreateInstanceCallbackArgs<'arg, 'pnext, 'this>) + 'this;