1use crate::impl_macros::delegate;
7#[cfg(target_arch = "x86")]
8use core::arch::x86 as arch;
9#[cfg(target_arch = "x86_64")]
10use core::arch::x86_64 as arch;
11
12use arch::*;
13
14#[derive(Clone, Copy, Debug)]
16pub struct Sse2 {
17 _private: (),
18}
19
20#[expect(
21 clippy::missing_safety_doc,
22 reason = "TODO: https://github.com/linebender/fearless_simd/issues/40"
23)]
24impl Sse2 {
25 #[inline]
31 pub unsafe fn new_unchecked() -> Self {
32 Self { _private: () }
33 }
34
35 delegate! { arch:
36 fn _mm_pause();
37 #[allow(clippy::not_unsafe_ptr_arg_deref)]
38 fn _mm_clflush(p: *const u8);
39 fn _mm_lfence();
40 fn _mm_mfence();
41 fn _mm_add_epi8(a: __m128i, b: __m128i) -> __m128i;
42 fn _mm_add_epi16(a: __m128i, b: __m128i) -> __m128i;
43 fn _mm_add_epi32(a: __m128i, b: __m128i) -> __m128i;
44 fn _mm_add_epi64(a: __m128i, b: __m128i) -> __m128i;
45 fn _mm_adds_epi8(a: __m128i, b: __m128i) -> __m128i;
46 fn _mm_adds_epi16(a: __m128i, b: __m128i) -> __m128i;
47 fn _mm_adds_epu8(a: __m128i, b: __m128i) -> __m128i;
48 fn _mm_adds_epu16(a: __m128i, b: __m128i) -> __m128i;
49 fn _mm_avg_epu8(a: __m128i, b: __m128i) -> __m128i;
50 fn _mm_avg_epu16(a: __m128i, b: __m128i) -> __m128i;
51 fn _mm_madd_epi16(a: __m128i, b: __m128i) -> __m128i;
52 fn _mm_max_epi16(a: __m128i, b: __m128i) -> __m128i;
53 fn _mm_max_epu8(a: __m128i, b: __m128i) -> __m128i;
54 fn _mm_min_epi16(a: __m128i, b: __m128i) -> __m128i;
55 fn _mm_min_epu8(a: __m128i, b: __m128i) -> __m128i;
56 fn _mm_mulhi_epi16(a: __m128i, b: __m128i) -> __m128i;
57 fn _mm_mulhi_epu16(a: __m128i, b: __m128i) -> __m128i;
58 fn _mm_mullo_epi16(a: __m128i, b: __m128i) -> __m128i;
59 fn _mm_mul_epu32(a: __m128i, b: __m128i) -> __m128i;
60 fn _mm_sad_epu8(a: __m128i, b: __m128i) -> __m128i;
61 fn _mm_sub_epi8(a: __m128i, b: __m128i) -> __m128i;
62 fn _mm_sub_epi16(a: __m128i, b: __m128i) -> __m128i;
63 fn _mm_sub_epi32(a: __m128i, b: __m128i) -> __m128i;
64 fn _mm_sub_epi64(a: __m128i, b: __m128i) -> __m128i;
65 fn _mm_subs_epi8(a: __m128i, b: __m128i) -> __m128i;
66 fn _mm_subs_epi16(a: __m128i, b: __m128i) -> __m128i;
67 fn _mm_subs_epu8(a: __m128i, b: __m128i) -> __m128i;
68 fn _mm_subs_epu16(a: __m128i, b: __m128i) -> __m128i;
69 fn _mm_slli_si128<const IMM8: i32>(a: __m128i) -> __m128i;
70 fn _mm_bslli_si128<const IMM8: i32>(a: __m128i) -> __m128i;
71 fn _mm_bsrli_si128<const IMM8: i32>(a: __m128i) -> __m128i;
72 fn _mm_slli_epi16<const IMM8: i32>(a: __m128i) -> __m128i;
73 fn _mm_sll_epi16(a: __m128i, count: __m128i) -> __m128i;
74 fn _mm_slli_epi32<const IMM8: i32>(a: __m128i) -> __m128i;
75 fn _mm_sll_epi32(a: __m128i, count: __m128i) -> __m128i;
76 fn _mm_slli_epi64<const IMM8: i32>(a: __m128i) -> __m128i;
77 fn _mm_sll_epi64(a: __m128i, count: __m128i) -> __m128i;
78 fn _mm_srai_epi16<const IMM8: i32>(a: __m128i) -> __m128i;
79 fn _mm_sra_epi16(a: __m128i, count: __m128i) -> __m128i;
80 fn _mm_srai_epi32<const IMM8: i32>(a: __m128i) -> __m128i;
81 fn _mm_sra_epi32(a: __m128i, count: __m128i) -> __m128i;
82 fn _mm_srli_si128<const IMM8: i32>(a: __m128i) -> __m128i;
83 fn _mm_srli_epi16<const IMM8: i32>(a: __m128i) -> __m128i;
84 fn _mm_srl_epi16(a: __m128i, count: __m128i) -> __m128i;
85 fn _mm_srli_epi32<const IMM8: i32>(a: __m128i) -> __m128i;
86 fn _mm_srl_epi32(a: __m128i, count: __m128i) -> __m128i;
87 fn _mm_srli_epi64<const IMM8: i32>(a: __m128i) -> __m128i;
88 fn _mm_srl_epi64(a: __m128i, count: __m128i) -> __m128i;
89 fn _mm_and_si128(a: __m128i, b: __m128i) -> __m128i;
90 fn _mm_andnot_si128(a: __m128i, b: __m128i) -> __m128i;
91 fn _mm_or_si128(a: __m128i, b: __m128i) -> __m128i;
92 fn _mm_xor_si128(a: __m128i, b: __m128i) -> __m128i;
93 fn _mm_cmpeq_epi8(a: __m128i, b: __m128i) -> __m128i;
94 fn _mm_cmpeq_epi16(a: __m128i, b: __m128i) -> __m128i;
95 fn _mm_cmpeq_epi32(a: __m128i, b: __m128i) -> __m128i;
96 fn _mm_cmpgt_epi8(a: __m128i, b: __m128i) -> __m128i;
97 fn _mm_cmpgt_epi16(a: __m128i, b: __m128i) -> __m128i;
98 fn _mm_cmpgt_epi32(a: __m128i, b: __m128i) -> __m128i;
99 fn _mm_cmplt_epi8(a: __m128i, b: __m128i) -> __m128i;
100 fn _mm_cmplt_epi16(a: __m128i, b: __m128i) -> __m128i;
101 fn _mm_cmplt_epi32(a: __m128i, b: __m128i) -> __m128i;
102 fn _mm_cvtepi32_pd(a: __m128i) -> __m128d;
103 fn _mm_cvtsi32_sd(a: __m128d, b: i32) -> __m128d;
104 fn _mm_cvtepi32_ps(a: __m128i) -> __m128;
105 fn _mm_cvtps_epi32(a: __m128) -> __m128i;
106 fn _mm_cvtsi32_si128(a: i32) -> __m128i;
107 fn _mm_cvtsi128_si32(a: __m128i) -> i32;
108 fn _mm_set_epi64x(e1: i64, e0: i64) -> __m128i;
109 fn _mm_set_epi32(e3: i32, e2: i32, e1: i32, e0: i32) -> __m128i;
110 fn _mm_set_epi16(
111 e7: i16,
112 e6: i16,
113 e5: i16,
114 e4: i16,
115 e3: i16,
116 e2: i16,
117 e1: i16,
118 e0: i16,
119 ) -> __m128i;
120 fn _mm_set_epi8(
121 e15: i8,
122 e14: i8,
123 e13: i8,
124 e12: i8,
125 e11: i8,
126 e10: i8,
127 e9: i8,
128 e8: i8,
129 e7: i8,
130 e6: i8,
131 e5: i8,
132 e4: i8,
133 e3: i8,
134 e2: i8,
135 e1: i8,
136 e0: i8,
137 ) -> __m128i;
138 fn _mm_set1_epi64x(a: i64) -> __m128i;
139 fn _mm_set1_epi32(a: i32) -> __m128i;
140 fn _mm_set1_epi16(a: i16) -> __m128i;
141 fn _mm_set1_epi8(a: i8) -> __m128i;
142 fn _mm_setr_epi32(e3: i32, e2: i32, e1: i32, e0: i32) -> __m128i;
143 fn _mm_setr_epi16(
144 e7: i16,
145 e6: i16,
146 e5: i16,
147 e4: i16,
148 e3: i16,
149 e2: i16,
150 e1: i16,
151 e0: i16,
152 ) -> __m128i;
153 fn _mm_setr_epi8(
154 e15: i8,
155 e14: i8,
156 e13: i8,
157 e12: i8,
158 e11: i8,
159 e10: i8,
160 e9: i8,
161 e8: i8,
162 e7: i8,
163 e6: i8,
164 e5: i8,
165 e4: i8,
166 e3: i8,
167 e2: i8,
168 e1: i8,
169 e0: i8,
170 ) -> __m128i;
171 fn _mm_setzero_si128() -> __m128i;
172 unsafe fn _mm_loadl_epi64(mem_addr: *const __m128i) -> __m128i;
173 unsafe fn _mm_load_si128(mem_addr: *const __m128i) -> __m128i;
174 unsafe fn _mm_loadu_si128(mem_addr: *const __m128i) -> __m128i;
175 unsafe fn _mm_maskmoveu_si128(a: __m128i, mask: __m128i, mem_addr: *mut i8);
176 unsafe fn _mm_store_si128(mem_addr: *mut __m128i, a: __m128i);
177 unsafe fn _mm_storeu_si128(mem_addr: *mut __m128i, a: __m128i);
178 unsafe fn _mm_storel_epi64(mem_addr: *mut __m128i, a: __m128i);
179 unsafe fn _mm_stream_si128(mem_addr: *mut __m128i, a: __m128i);
180 unsafe fn _mm_stream_si32(mem_addr: *mut i32, a: i32);
181 fn _mm_move_epi64(a: __m128i) -> __m128i;
182 fn _mm_packs_epi16(a: __m128i, b: __m128i) -> __m128i;
183 fn _mm_packs_epi32(a: __m128i, b: __m128i) -> __m128i;
184 fn _mm_packus_epi16(a: __m128i, b: __m128i) -> __m128i;
185 fn _mm_extract_epi16<const IMM8: i32>(a: __m128i) -> i32;
186 fn _mm_insert_epi16<const IMM8: i32>(a: __m128i, i: i32) -> __m128i;
187 fn _mm_movemask_epi8(a: __m128i) -> i32;
188 fn _mm_shuffle_epi32<const IMM8: i32>(a: __m128i) -> __m128i;
189 fn _mm_shufflehi_epi16<const IMM8: i32>(a: __m128i) -> __m128i;
190 fn _mm_shufflelo_epi16<const IMM8: i32>(a: __m128i) -> __m128i;
191 fn _mm_unpackhi_epi8(a: __m128i, b: __m128i) -> __m128i;
192 fn _mm_unpackhi_epi16(a: __m128i, b: __m128i) -> __m128i;
193 fn _mm_unpackhi_epi32(a: __m128i, b: __m128i) -> __m128i;
194 fn _mm_unpackhi_epi64(a: __m128i, b: __m128i) -> __m128i;
195 fn _mm_unpacklo_epi8(a: __m128i, b: __m128i) -> __m128i;
196 fn _mm_unpacklo_epi16(a: __m128i, b: __m128i) -> __m128i;
197 fn _mm_unpacklo_epi32(a: __m128i, b: __m128i) -> __m128i;
198 fn _mm_unpacklo_epi64(a: __m128i, b: __m128i) -> __m128i;
199 fn _mm_add_sd(a: __m128d, b: __m128d) -> __m128d;
200 fn _mm_add_pd(a: __m128d, b: __m128d) -> __m128d;
201 fn _mm_div_sd(a: __m128d, b: __m128d) -> __m128d;
202 fn _mm_div_pd(a: __m128d, b: __m128d) -> __m128d;
203 fn _mm_max_sd(a: __m128d, b: __m128d) -> __m128d;
204 fn _mm_max_pd(a: __m128d, b: __m128d) -> __m128d;
205 fn _mm_min_sd(a: __m128d, b: __m128d) -> __m128d;
206 fn _mm_min_pd(a: __m128d, b: __m128d) -> __m128d;
207 fn _mm_mul_sd(a: __m128d, b: __m128d) -> __m128d;
208 fn _mm_mul_pd(a: __m128d, b: __m128d) -> __m128d;
209 fn _mm_sqrt_sd(a: __m128d, b: __m128d) -> __m128d;
210 fn _mm_sqrt_pd(a: __m128d) -> __m128d;
211 fn _mm_sub_sd(a: __m128d, b: __m128d) -> __m128d;
212 fn _mm_sub_pd(a: __m128d, b: __m128d) -> __m128d;
213 fn _mm_and_pd(a: __m128d, b: __m128d) -> __m128d;
214 fn _mm_andnot_pd(a: __m128d, b: __m128d) -> __m128d;
215 fn _mm_or_pd(a: __m128d, b: __m128d) -> __m128d;
216 fn _mm_xor_pd(a: __m128d, b: __m128d) -> __m128d;
217 fn _mm_cmpeq_sd(a: __m128d, b: __m128d) -> __m128d;
218 fn _mm_cmplt_sd(a: __m128d, b: __m128d) -> __m128d;
219 fn _mm_cmple_sd(a: __m128d, b: __m128d) -> __m128d;
220 fn _mm_cmpgt_sd(a: __m128d, b: __m128d) -> __m128d;
221 fn _mm_cmpge_sd(a: __m128d, b: __m128d) -> __m128d;
222 fn _mm_cmpord_sd(a: __m128d, b: __m128d) -> __m128d;
223 fn _mm_cmpunord_sd(a: __m128d, b: __m128d) -> __m128d;
224 fn _mm_cmpneq_sd(a: __m128d, b: __m128d) -> __m128d;
225 fn _mm_cmpnlt_sd(a: __m128d, b: __m128d) -> __m128d;
226 fn _mm_cmpnle_sd(a: __m128d, b: __m128d) -> __m128d;
227 fn _mm_cmpngt_sd(a: __m128d, b: __m128d) -> __m128d;
228 fn _mm_cmpnge_sd(a: __m128d, b: __m128d) -> __m128d;
229 fn _mm_cmpeq_pd(a: __m128d, b: __m128d) -> __m128d;
230 fn _mm_cmplt_pd(a: __m128d, b: __m128d) -> __m128d;
231 fn _mm_cmple_pd(a: __m128d, b: __m128d) -> __m128d;
232 fn _mm_cmpgt_pd(a: __m128d, b: __m128d) -> __m128d;
233 fn _mm_cmpge_pd(a: __m128d, b: __m128d) -> __m128d;
234 fn _mm_cmpord_pd(a: __m128d, b: __m128d) -> __m128d;
235 fn _mm_cmpunord_pd(a: __m128d, b: __m128d) -> __m128d;
236 fn _mm_cmpneq_pd(a: __m128d, b: __m128d) -> __m128d;
237 fn _mm_cmpnlt_pd(a: __m128d, b: __m128d) -> __m128d;
238 fn _mm_cmpnle_pd(a: __m128d, b: __m128d) -> __m128d;
239 fn _mm_cmpngt_pd(a: __m128d, b: __m128d) -> __m128d;
240 fn _mm_cmpnge_pd(a: __m128d, b: __m128d) -> __m128d;
241 fn _mm_comieq_sd(a: __m128d, b: __m128d) -> i32;
242 fn _mm_comilt_sd(a: __m128d, b: __m128d) -> i32;
243 fn _mm_comile_sd(a: __m128d, b: __m128d) -> i32;
244 fn _mm_comigt_sd(a: __m128d, b: __m128d) -> i32;
245 fn _mm_comige_sd(a: __m128d, b: __m128d) -> i32;
246 fn _mm_comineq_sd(a: __m128d, b: __m128d) -> i32;
247 fn _mm_ucomieq_sd(a: __m128d, b: __m128d) -> i32;
248 fn _mm_ucomilt_sd(a: __m128d, b: __m128d) -> i32;
249 fn _mm_ucomile_sd(a: __m128d, b: __m128d) -> i32;
250 fn _mm_ucomigt_sd(a: __m128d, b: __m128d) -> i32;
251 fn _mm_ucomige_sd(a: __m128d, b: __m128d) -> i32;
252 fn _mm_ucomineq_sd(a: __m128d, b: __m128d) -> i32;
253 fn _mm_cvtpd_ps(a: __m128d) -> __m128;
254 fn _mm_cvtps_pd(a: __m128) -> __m128d;
255 fn _mm_cvtpd_epi32(a: __m128d) -> __m128i;
256 fn _mm_cvtsd_si32(a: __m128d) -> i32;
257 fn _mm_cvtsd_ss(a: __m128, b: __m128d) -> __m128;
258 fn _mm_cvtsd_f64(a: __m128d) -> f64;
259 fn _mm_cvtss_sd(a: __m128d, b: __m128) -> __m128d;
260 fn _mm_cvttpd_epi32(a: __m128d) -> __m128i;
261 fn _mm_cvttsd_si32(a: __m128d) -> i32;
262 fn _mm_cvttps_epi32(a: __m128) -> __m128i;
263 fn _mm_set_sd(a: f64) -> __m128d;
264 fn _mm_set1_pd(a: f64) -> __m128d;
265 fn _mm_set_pd1(a: f64) -> __m128d;
266 fn _mm_set_pd(a: f64, b: f64) -> __m128d;
267 fn _mm_setr_pd(a: f64, b: f64) -> __m128d;
268 fn _mm_setzero_pd() -> __m128d;
269 fn _mm_movemask_pd(a: __m128d) -> i32;
270 unsafe fn _mm_load_pd(mem_addr: *const f64) -> __m128d;
271 unsafe fn _mm_load_sd(mem_addr: *const f64) -> __m128d;
272 unsafe fn _mm_loadh_pd(a: __m128d, mem_addr: *const f64) -> __m128d;
273 unsafe fn _mm_loadl_pd(a: __m128d, mem_addr: *const f64) -> __m128d;
274 unsafe fn _mm_stream_pd(mem_addr: *mut f64, a: __m128d);
275 unsafe fn _mm_store_sd(mem_addr: *mut f64, a: __m128d);
276 unsafe fn _mm_store_pd(mem_addr: *mut f64, a: __m128d);
277 unsafe fn _mm_storeu_pd(mem_addr: *mut f64, a: __m128d);
278 unsafe fn _mm_store1_pd(mem_addr: *mut f64, a: __m128d);
279 unsafe fn _mm_store_pd1(mem_addr: *mut f64, a: __m128d);
280 unsafe fn _mm_storer_pd(mem_addr: *mut f64, a: __m128d);
281 unsafe fn _mm_storeh_pd(mem_addr: *mut f64, a: __m128d);
282 unsafe fn _mm_storel_pd(mem_addr: *mut f64, a: __m128d);
283 unsafe fn _mm_load1_pd(mem_addr: *const f64) -> __m128d;
284 unsafe fn _mm_load_pd1(mem_addr: *const f64) -> __m128d;
285 unsafe fn _mm_loadr_pd(mem_addr: *const f64) -> __m128d;
286 unsafe fn _mm_loadu_pd(mem_addr: *const f64) -> __m128d;
287 fn _mm_shuffle_pd<const MASK: i32>(a: __m128d, b: __m128d) -> __m128d;
288 fn _mm_move_sd(a: __m128d, b: __m128d) -> __m128d;
289 fn _mm_castpd_ps(a: __m128d) -> __m128;
290 fn _mm_castpd_si128(a: __m128d) -> __m128i;
291 fn _mm_castps_pd(a: __m128) -> __m128d;
292 fn _mm_castps_si128(a: __m128) -> __m128i;
293 fn _mm_castsi128_pd(a: __m128i) -> __m128d;
294 fn _mm_castsi128_ps(a: __m128i) -> __m128;
295 fn _mm_undefined_pd() -> __m128d;
296 fn _mm_undefined_si128() -> __m128i;
297 fn _mm_unpackhi_pd(a: __m128d, b: __m128d) -> __m128d;
298 fn _mm_unpacklo_pd(a: __m128d, b: __m128d) -> __m128d;
299 }
300}