Skip to main content

fearless_simd/
support.rs

1// Copyright 2025 the Fearless_SIMD Authors
2// SPDX-License-Identifier: Apache-2.0 OR MIT
3
4#[derive(Clone, Copy, Debug)]
5#[repr(C, align(16))]
6#[expect(
7    unnameable_types,
8    reason = "This is used internally, but needs to be `pub` as it's used in a sealed interface"
9)]
10/// Wrapper for internal native vector types that gives them 128-bit alignment.
11pub struct Aligned128<T>(pub T);
12
13#[derive(Clone, Copy, Debug)]
14#[repr(C, align(32))]
15#[expect(
16    unnameable_types,
17    reason = "This is used internally, but needs to be `pub` as it's used in a sealed interface"
18)]
19/// Wrapper for internal native vector types that gives them 256-bit alignment.
20pub struct Aligned256<T>(pub T);
21
22#[derive(Clone, Copy, Debug)]
23#[repr(C, align(64))]
24#[expect(
25    unnameable_types,
26    reason = "This is used internally, but needs to be `pub` as it's used in a sealed interface"
27)]
28/// Wrapper for internal native vector types that gives them 512-bit alignment.
29pub struct Aligned512<T>(pub T);
30
31/// The actual `Debug` implementation for all `SimdBase` types. This only needs to be monomorphized once per element
32/// type, rather than once per vector type.
33#[inline(never)]
34pub(crate) fn simd_debug_impl<Element: core::fmt::Debug>(
35    f: &mut core::fmt::Formatter<'_>,
36    type_name: &str,
37    token: &dyn core::fmt::Debug,
38    items: &[Element],
39) -> core::fmt::Result {
40    f.debug_struct(type_name)
41        .field("val", &items)
42        .field("simd", token)
43        .finish()
44}
45
46/// Selects the input operands to be used for `slignr`/`vext`/etc. when computing a single output block for cross-block
47/// "slide" operations. Extracts from [a : b].
48#[inline(always)]
49#[allow(clippy::allow_attributes, reason = "Only needed in some cfgs.")]
50#[allow(dead_code, reason = "Only used in some cfgs.")]
51pub(crate) fn cross_block_slide_blocks_at<const N: usize, Block: Copy>(
52    a: &[Block; N],
53    b: &[Block; N],
54    out_idx: usize,
55    shift_bytes: usize,
56) -> [Block; 2] {
57    const BLOCK_BYTES: usize = 16;
58    let out_byte_start = out_idx * BLOCK_BYTES + shift_bytes;
59    let lo_idx = out_byte_start.div_euclid(BLOCK_BYTES);
60    let hi_idx = lo_idx + 1;
61    // Concatenation is [a : b], so indices 0..N are from a, indices N..2N are from b
62    let lo_block = if lo_idx < N { a[lo_idx] } else { b[lo_idx - N] };
63    let hi_block = if hi_idx < N { a[hi_idx] } else { b[hi_idx - N] };
64    [lo_block, hi_block]
65}