Skip to main content

vello_common/
simd.rs

1// Copyright 2025 the Vello Authors
2// SPDX-License-Identifier: Apache-2.0 OR MIT
3
4//! A number of SIMD extension traits.
5
6use fearless_simd::*;
7
8/// Splatting every 4th element in the vector, used for splatting the alpha value of
9/// a color to all lanes.
10pub trait Splat4thExt<S> {
11    /// Splat every 4th element of the vector.
12    fn splat_4th(self) -> Self;
13}
14
15impl<S: Simd> Splat4thExt<S> for f32x4<S> {
16    #[inline(always)]
17    fn splat_4th(self) -> Self {
18        // TODO: Explore whether it's just faster to manually access the 4th element and splat it.
19        let zip1 = self.zip_high(self);
20        zip1.zip_high(zip1)
21    }
22}
23
24impl<S: Simd> Splat4thExt<S> for f32x8<S> {
25    #[inline(always)]
26    fn splat_4th(self) -> Self {
27        let (mut p1, mut p2) = self.simd.split_f32x8(self);
28        p1 = p1.splat_4th();
29        p2 = p2.splat_4th();
30
31        self.simd.combine_f32x4(p1, p2)
32    }
33}
34
35impl<S: Simd> Splat4thExt<S> for f32x16<S> {
36    #[inline(always)]
37    fn splat_4th(self) -> Self {
38        let (mut p1, mut p2) = self.simd.split_f32x16(self);
39        p1 = p1.splat_4th();
40        p2 = p2.splat_4th();
41
42        self.simd.combine_f32x8(p1, p2)
43    }
44}
45
46impl<S: Simd> Splat4thExt<S> for u8x16<S> {
47    #[inline(always)]
48    fn splat_4th(self) -> Self {
49        // TODO: SIMDify
50        [
51            self[3], self[3], self[3], self[3], self[7], self[7], self[7], self[7], self[11],
52            self[11], self[11], self[11], self[15], self[15], self[15], self[15],
53        ]
54        .simd_into(self.simd)
55    }
56}
57
58impl<S: Simd> Splat4thExt<S> for u8x32<S> {
59    #[inline(always)]
60    fn splat_4th(self) -> Self {
61        let (mut p1, mut p2) = self.simd.split_u8x32(self);
62        p1 = p1.splat_4th();
63        p2 = p2.splat_4th();
64
65        self.simd.combine_u8x16(p1, p2)
66    }
67}
68
69/// Splat each single element in the vector to 4 lanes.
70#[inline(always)]
71pub fn element_wise_splat<S: Simd>(simd: S, input: f32x4<S>) -> f32x16<S> {
72    simd.combine_f32x8(
73        simd.combine_f32x4(f32x4::splat(simd, input[0]), f32x4::splat(simd, input[1])),
74        simd.combine_f32x4(f32x4::splat(simd, input[2]), f32x4::splat(simd, input[3])),
75    )
76}