vello_common/
simd.rs

1// Copyright 2025 the Vello Authors
2// SPDX-License-Identifier: Apache-2.0 OR MIT
3
4//! A number of SIMD extension traits.
5
6use fearless_simd::*;
7
8/// Splatting every 4th element in the vector, used for splatting the alpha value of
9/// a color to all lanes.
10pub trait Splat4thExt<S> {
11    /// Splat every 4th element of the vector.
12    fn splat_4th(self) -> Self;
13}
14
15impl<S: Simd> Splat4thExt<S> for f32x4<S> {
16    #[inline(always)]
17    fn splat_4th(self) -> Self {
18        // TODO: Explore whether it's just faster to manually access the 4th element and splat it.
19        let zip1 = self.zip_high(self);
20        zip1.zip_high(zip1)
21    }
22}
23
24impl<S: Simd> Splat4thExt<S> for f32x8<S> {
25    #[inline(always)]
26    fn splat_4th(self) -> Self {
27        let (mut p1, mut p2) = self.simd.split_f32x8(self);
28        p1 = p1.splat_4th();
29        p2 = p2.splat_4th();
30
31        self.simd.combine_f32x4(p1, p2)
32    }
33}
34
35impl<S: Simd> Splat4thExt<S> for f32x16<S> {
36    #[inline(always)]
37    fn splat_4th(self) -> Self {
38        let (mut p1, mut p2) = self.simd.split_f32x16(self);
39        p1 = p1.splat_4th();
40        p2 = p2.splat_4th();
41
42        self.simd.combine_f32x8(p1, p2)
43    }
44}
45
46impl<S: Simd> Splat4thExt<S> for u8x16<S> {
47    #[inline(always)]
48    fn splat_4th(self) -> Self {
49        // TODO: SIMDify
50        Self {
51            val: [
52                self.val[3],
53                self.val[3],
54                self.val[3],
55                self.val[3],
56                self.val[7],
57                self.val[7],
58                self.val[7],
59                self.val[7],
60                self.val[11],
61                self.val[11],
62                self.val[11],
63                self.val[11],
64                self.val[15],
65                self.val[15],
66                self.val[15],
67                self.val[15],
68            ],
69            simd: self.simd,
70        }
71    }
72}
73
74impl<S: Simd> Splat4thExt<S> for u8x32<S> {
75    #[inline(always)]
76    fn splat_4th(self) -> Self {
77        let (mut p1, mut p2) = self.simd.split_u8x32(self);
78        p1 = p1.splat_4th();
79        p2 = p2.splat_4th();
80
81        self.simd.combine_u8x16(p1, p2)
82    }
83}
84
85/// Splat each single element in the vector to 4 lanes.
86#[inline(always)]
87pub fn element_wise_splat<S: Simd>(simd: S, input: f32x4<S>) -> f32x16<S> {
88    simd.combine_f32x8(
89        simd.combine_f32x4(
90            f32x4::splat(simd, input.val[0]),
91            f32x4::splat(simd, input.val[1]),
92        ),
93        simd.combine_f32x4(
94            f32x4::splat(simd, input.val[2]),
95            f32x4::splat(simd, input.val[3]),
96        ),
97    )
98}