1#![allow(dead_code, unused_imports)]
2use crate::leading_zeros::leading_zeros_u16;
3use core::mem;
4
5#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
6mod x86;
7
8#[cfg(target_arch = "aarch64")]
9mod aarch64;
10
11#[cfg(all(feature = "nightly", target_arch = "loongarch64"))]
12mod loongarch64;
13
14macro_rules! convert_fn {
15 (if x86_feature("f16c") { $f16c:expr }
16 else if aarch64_feature("fp16") { $aarch64:expr }
17 else if loongarch64_feature("lsx") { $loongarch64:expr }
18 else { $fallback:expr }) => {
19 cfg_if::cfg_if! {
20 if #[cfg(all(
22 any(target_arch = "x86", target_arch = "x86_64"),
23 target_feature = "f16c"
24 ))] {
25 $f16c
26 }
27 else if #[cfg(all(
28 target_arch = "aarch64",
29 target_feature = "fp16"
30 ))] {
31 $aarch64
32 }
33 else if #[cfg(all(
34 feature = "nightly",
35 target_arch = "loongarch64",
36 target_feature = "lsx"
37 ))] {
38 $loongarch64
39 }
40
41 else if #[cfg(all(
43 feature = "std",
44 any(target_arch = "x86", target_arch = "x86_64")
45 ))] {
46 use std::arch::is_x86_feature_detected;
47 if is_x86_feature_detected!("f16c") {
48 $f16c
49 } else {
50 $fallback
51 }
52 }
53 else if #[cfg(all(
54 feature = "std",
55 target_arch = "aarch64",
56 ))] {
57 use std::arch::is_aarch64_feature_detected;
58 if is_aarch64_feature_detected!("fp16") {
59 $aarch64
60 } else {
61 $fallback
62 }
63 }
64 else if #[cfg(all(
65 feature = "std",
66 feature = "nightly",
67 target_arch = "loongarch64",
68 ))] {
69 use std::arch::is_loongarch_feature_detected;
70 if is_loongarch_feature_detected!("lsx") {
71 $loongarch64
72 } else {
73 $fallback
74 }
75 }
76
77 else {
79 $fallback
80 }
81 }
82 };
83}
84
85#[inline]
86pub(crate) fn f32_to_f16(f: f32) -> u16 {
87 convert_fn! {
88 if x86_feature("f16c") {
89 unsafe { x86::f32_to_f16_x86_f16c(f) }
90 } else if aarch64_feature("fp16") {
91 unsafe { aarch64::f32_to_f16_fp16(f) }
92 } else if loongarch64_feature("lsx") {
93 unsafe { loongarch64::f32_to_f16_lsx(f) }
94 } else {
95 f32_to_f16_fallback(f)
96 }
97 }
98}
99
100#[inline]
101pub(crate) fn f64_to_f16(f: f64) -> u16 {
102 convert_fn! {
103 if x86_feature("f16c") {
104 unsafe { x86::f32_to_f16_x86_f16c(f as f32) }
105 } else if aarch64_feature("fp16") {
106 unsafe { aarch64::f64_to_f16_fp16(f) }
107 } else if loongarch64_feature("lsx") {
108 f64_to_f16_fallback(f)
109 } else {
110 f64_to_f16_fallback(f)
111 }
112 }
113}
114
115#[inline]
116pub(crate) fn f16_to_f32(i: u16) -> f32 {
117 convert_fn! {
118 if x86_feature("f16c") {
119 unsafe { x86::f16_to_f32_x86_f16c(i) }
120 } else if aarch64_feature("fp16") {
121 unsafe { aarch64::f16_to_f32_fp16(i) }
122 } else if loongarch64_feature("lsx") {
123 unsafe { loongarch64::f16_to_f32_lsx(i) }
124 } else {
125 f16_to_f32_fallback(i)
126 }
127 }
128}
129
130#[inline]
131pub(crate) fn f16_to_f64(i: u16) -> f64 {
132 convert_fn! {
133 if x86_feature("f16c") {
134 unsafe { x86::f16_to_f32_x86_f16c(i) as f64 }
135 } else if aarch64_feature("fp16") {
136 unsafe { aarch64::f16_to_f64_fp16(i) }
137 } else if loongarch64_feature("lsx") {
138 unsafe { loongarch64::f16_to_f32_lsx(i) as f64 }
139 } else {
140 f16_to_f64_fallback(i)
141 }
142 }
143}
144
145#[inline]
146pub(crate) fn f32x4_to_f16x4(f: &[f32; 4]) -> [u16; 4] {
147 convert_fn! {
148 if x86_feature("f16c") {
149 unsafe { x86::f32x4_to_f16x4_x86_f16c(f) }
150 } else if aarch64_feature("fp16") {
151 unsafe { aarch64::f32x4_to_f16x4_fp16(f) }
152 } else if loongarch64_feature("lsx") {
153 unsafe { loongarch64::f32x4_to_f16x4_lsx(f) }
154 } else {
155 f32x4_to_f16x4_fallback(f)
156 }
157 }
158}
159
160#[inline]
161pub(crate) fn f16x4_to_f32x4(i: &[u16; 4]) -> [f32; 4] {
162 convert_fn! {
163 if x86_feature("f16c") {
164 unsafe { x86::f16x4_to_f32x4_x86_f16c(i) }
165 } else if aarch64_feature("fp16") {
166 unsafe { aarch64::f16x4_to_f32x4_fp16(i) }
167 } else if loongarch64_feature("lsx") {
168 unsafe { loongarch64::f16x4_to_f32x4_lsx(i) }
169 } else {
170 f16x4_to_f32x4_fallback(i)
171 }
172 }
173}
174
175#[inline]
176pub(crate) fn f64x4_to_f16x4(f: &[f64; 4]) -> [u16; 4] {
177 convert_fn! {
178 if x86_feature("f16c") {
179 unsafe { x86::f64x4_to_f16x4_x86_f16c(f) }
180 } else if aarch64_feature("fp16") {
181 unsafe { aarch64::f64x4_to_f16x4_fp16(f) }
182 } else if loongarch64_feature("lsx") {
183 unsafe { loongarch64::f64x4_to_f16x4_lsx(f) }
184 } else {
185 f64x4_to_f16x4_fallback(f)
186 }
187 }
188}
189
190#[inline]
191pub(crate) fn f16x4_to_f64x4(i: &[u16; 4]) -> [f64; 4] {
192 convert_fn! {
193 if x86_feature("f16c") {
194 unsafe { x86::f16x4_to_f64x4_x86_f16c(i) }
195 } else if aarch64_feature("fp16") {
196 unsafe { aarch64::f16x4_to_f64x4_fp16(i) }
197 } else if loongarch64_feature("lsx") {
198 unsafe { loongarch64::f16x4_to_f64x4_lsx(i) }
199 } else {
200 f16x4_to_f64x4_fallback(i)
201 }
202 }
203}
204
205#[inline]
206pub(crate) fn f32x8_to_f16x8(f: &[f32; 8]) -> [u16; 8] {
207 convert_fn! {
208 if x86_feature("f16c") {
209 unsafe { x86::f32x8_to_f16x8_x86_f16c(f) }
210 } else if aarch64_feature("fp16") {
211 {
212 let mut result = [0u16; 8];
213 convert_chunked_slice_4(f.as_slice(), result.as_mut_slice(),
214 aarch64::f32x4_to_f16x4_fp16);
215 result
216 }
217 } else if loongarch64_feature("lsx") {
218 {
219 let mut result = [0u16; 8];
220 convert_chunked_slice_4(f.as_slice(), result.as_mut_slice(),
221 loongarch64::f32x4_to_f16x4_lsx);
222 result
223 }
224 } else {
225 f32x8_to_f16x8_fallback(f)
226 }
227 }
228}
229
230#[inline]
231pub(crate) fn f16x8_to_f32x8(i: &[u16; 8]) -> [f32; 8] {
232 convert_fn! {
233 if x86_feature("f16c") {
234 unsafe { x86::f16x8_to_f32x8_x86_f16c(i) }
235 } else if aarch64_feature("fp16") {
236 {
237 let mut result = [0f32; 8];
238 convert_chunked_slice_4(i.as_slice(), result.as_mut_slice(),
239 aarch64::f16x4_to_f32x4_fp16);
240 result
241 }
242 } else if loongarch64_feature("lsx") {
243 {
244 let mut result = [0f32; 8];
245 convert_chunked_slice_4(i.as_slice(), result.as_mut_slice(),
246 loongarch64::f16x4_to_f32x4_lsx);
247 result
248 }
249 } else {
250 f16x8_to_f32x8_fallback(i)
251 }
252 }
253}
254
255#[inline]
256pub(crate) fn f64x8_to_f16x8(f: &[f64; 8]) -> [u16; 8] {
257 convert_fn! {
258 if x86_feature("f16c") {
259 unsafe { x86::f64x8_to_f16x8_x86_f16c(f) }
260 } else if aarch64_feature("fp16") {
261 {
262 let mut result = [0u16; 8];
263 convert_chunked_slice_4(f.as_slice(), result.as_mut_slice(),
264 aarch64::f64x4_to_f16x4_fp16);
265 result
266 }
267 } else if loongarch64_feature("lsx") {
268 {
269 let mut result = [0u16; 8];
270 convert_chunked_slice_4(f.as_slice(), result.as_mut_slice(),
271 loongarch64::f64x4_to_f16x4_lsx);
272 result
273 }
274 } else {
275 f64x8_to_f16x8_fallback(f)
276 }
277 }
278}
279
280#[inline]
281pub(crate) fn f16x8_to_f64x8(i: &[u16; 8]) -> [f64; 8] {
282 convert_fn! {
283 if x86_feature("f16c") {
284 unsafe { x86::f16x8_to_f64x8_x86_f16c(i) }
285 } else if aarch64_feature("fp16") {
286 {
287 let mut result = [0f64; 8];
288 convert_chunked_slice_4(i.as_slice(), result.as_mut_slice(),
289 aarch64::f16x4_to_f64x4_fp16);
290 result
291 }
292 } else if loongarch64_feature("lsx") {
293 {
294 let mut result = [0f64; 8];
295 convert_chunked_slice_4(i.as_slice(), result.as_mut_slice(),
296 loongarch64::f16x4_to_f64x4_lsx);
297 result
298 }
299 } else {
300 f16x8_to_f64x8_fallback(i)
301 }
302 }
303}
304
305#[inline]
306pub(crate) fn f32_to_f16_slice(src: &[f32], dst: &mut [u16]) {
307 convert_fn! {
308 if x86_feature("f16c") {
309 convert_chunked_slice_8(src, dst, x86::f32x8_to_f16x8_x86_f16c,
310 x86::f32x4_to_f16x4_x86_f16c)
311 } else if aarch64_feature("fp16") {
312 convert_chunked_slice_4(src, dst, aarch64::f32x4_to_f16x4_fp16)
313 } else if loongarch64_feature("lsx") {
314 convert_chunked_slice_4(src, dst, loongarch64::f32x4_to_f16x4_lsx)
315 } else {
316 slice_fallback(src, dst, f32_to_f16_fallback)
317 }
318 }
319}
320
321#[inline]
322pub(crate) fn f16_to_f32_slice(src: &[u16], dst: &mut [f32]) {
323 convert_fn! {
324 if x86_feature("f16c") {
325 convert_chunked_slice_8(src, dst, x86::f16x8_to_f32x8_x86_f16c,
326 x86::f16x4_to_f32x4_x86_f16c)
327 } else if aarch64_feature("fp16") {
328 convert_chunked_slice_4(src, dst, aarch64::f16x4_to_f32x4_fp16)
329 } else if loongarch64_feature("lsx") {
330 convert_chunked_slice_4(src, dst, loongarch64::f16x4_to_f32x4_lsx)
331 } else {
332 slice_fallback(src, dst, f16_to_f32_fallback)
333 }
334 }
335}
336
337#[inline]
338pub(crate) fn f64_to_f16_slice(src: &[f64], dst: &mut [u16]) {
339 convert_fn! {
340 if x86_feature("f16c") {
341 convert_chunked_slice_8(src, dst, x86::f64x8_to_f16x8_x86_f16c,
342 x86::f64x4_to_f16x4_x86_f16c)
343 } else if aarch64_feature("fp16") {
344 convert_chunked_slice_4(src, dst, aarch64::f64x4_to_f16x4_fp16)
345 } else if loongarch64_feature("lsx") {
346 convert_chunked_slice_4(src, dst, loongarch64::f64x4_to_f16x4_lsx)
347 } else {
348 slice_fallback(src, dst, f64_to_f16_fallback)
349 }
350 }
351}
352
353#[inline]
354pub(crate) fn f16_to_f64_slice(src: &[u16], dst: &mut [f64]) {
355 convert_fn! {
356 if x86_feature("f16c") {
357 convert_chunked_slice_8(src, dst, x86::f16x8_to_f64x8_x86_f16c,
358 x86::f16x4_to_f64x4_x86_f16c)
359 } else if aarch64_feature("fp16") {
360 convert_chunked_slice_4(src, dst, aarch64::f16x4_to_f64x4_fp16)
361 } else if loongarch64_feature("lsx") {
362 convert_chunked_slice_4(src, dst, loongarch64::f16x4_to_f64x4_lsx)
363 } else {
364 slice_fallback(src, dst, f16_to_f64_fallback)
365 }
366 }
367}
368
369macro_rules! math_fn {
370 (if aarch64_feature("fp16") { $aarch64:expr }
371 else { $fallback:expr }) => {
372 cfg_if::cfg_if! {
373 if #[cfg(all(
375 target_arch = "aarch64",
376 target_feature = "fp16"
377 ))] {
378 $aarch64
379 }
380
381 else if #[cfg(all(
383 feature = "std",
384 target_arch = "aarch64",
385 not(target_feature = "fp16")
386 ))] {
387 use std::arch::is_aarch64_feature_detected;
388 if is_aarch64_feature_detected!("fp16") {
389 $aarch64
390 } else {
391 $fallback
392 }
393 }
394
395 else {
397 $fallback
398 }
399 }
400 };
401}
402
403#[inline]
404pub(crate) fn add_f16(a: u16, b: u16) -> u16 {
405 math_fn! {
406 if aarch64_feature("fp16") {
407 unsafe { aarch64::add_f16_fp16(a, b) }
408 } else {
409 add_f16_fallback(a, b)
410 }
411 }
412}
413
414#[inline]
415pub(crate) fn subtract_f16(a: u16, b: u16) -> u16 {
416 math_fn! {
417 if aarch64_feature("fp16") {
418 unsafe { aarch64::subtract_f16_fp16(a, b) }
419 } else {
420 subtract_f16_fallback(a, b)
421 }
422 }
423}
424
425#[inline]
426pub(crate) fn multiply_f16(a: u16, b: u16) -> u16 {
427 math_fn! {
428 if aarch64_feature("fp16") {
429 unsafe { aarch64::multiply_f16_fp16(a, b) }
430 } else {
431 multiply_f16_fallback(a, b)
432 }
433 }
434}
435
436#[inline]
437pub(crate) fn divide_f16(a: u16, b: u16) -> u16 {
438 math_fn! {
439 if aarch64_feature("fp16") {
440 unsafe { aarch64::divide_f16_fp16(a, b) }
441 } else {
442 divide_f16_fallback(a, b)
443 }
444 }
445}
446
447#[inline]
448pub(crate) fn remainder_f16(a: u16, b: u16) -> u16 {
449 remainder_f16_fallback(a, b)
450}
451
452#[inline]
453pub(crate) fn product_f16<I: Iterator<Item = u16>>(iter: I) -> u16 {
454 math_fn! {
455 if aarch64_feature("fp16") {
456 iter.fold(0, |acc, x| unsafe { aarch64::multiply_f16_fp16(acc, x) })
457 } else {
458 product_f16_fallback(iter)
459 }
460 }
461}
462
463#[inline]
464pub(crate) fn sum_f16<I: Iterator<Item = u16>>(iter: I) -> u16 {
465 math_fn! {
466 if aarch64_feature("fp16") {
467 iter.fold(0, |acc, x| unsafe { aarch64::add_f16_fp16(acc, x) })
468 } else {
469 sum_f16_fallback(iter)
470 }
471 }
472}
473
474#[inline]
476fn convert_chunked_slice_8<S: Copy + Default, D: Copy>(
477 src: &[S],
478 dst: &mut [D],
479 fn8: unsafe fn(&[S; 8]) -> [D; 8],
480 fn4: unsafe fn(&[S; 4]) -> [D; 4],
481) {
482 assert_eq!(src.len(), dst.len());
483
484 let src_chunks = src.chunks_exact(8);
487 let mut dst_chunks = dst.chunks_exact_mut(8);
488 let src_remainder = src_chunks.remainder();
489 for (s, d) in src_chunks.zip(&mut dst_chunks) {
490 let chunk: &[S; 8] = s.try_into().unwrap();
491 d.copy_from_slice(unsafe { &fn8(chunk) });
492 }
493
494 if src_remainder.len() > 4 {
496 let mut buf: [S; 8] = Default::default();
497 buf[..src_remainder.len()].copy_from_slice(src_remainder);
498 let vec = unsafe { fn8(&buf) };
499 let dst_remainder = dst_chunks.into_remainder();
500 dst_remainder.copy_from_slice(&vec[..dst_remainder.len()]);
501 } else if !src_remainder.is_empty() {
502 let mut buf: [S; 4] = Default::default();
503 buf[..src_remainder.len()].copy_from_slice(src_remainder);
504 let vec = unsafe { fn4(&buf) };
505 let dst_remainder = dst_chunks.into_remainder();
506 dst_remainder.copy_from_slice(&vec[..dst_remainder.len()]);
507 }
508}
509
510#[inline]
512fn convert_chunked_slice_4<S: Copy + Default, D: Copy>(
513 src: &[S],
514 dst: &mut [D],
515 f: unsafe fn(&[S; 4]) -> [D; 4],
516) {
517 assert_eq!(src.len(), dst.len());
518
519 let src_chunks = src.chunks_exact(4);
522 let mut dst_chunks = dst.chunks_exact_mut(4);
523 let src_remainder = src_chunks.remainder();
524 for (s, d) in src_chunks.zip(&mut dst_chunks) {
525 let chunk: &[S; 4] = s.try_into().unwrap();
526 d.copy_from_slice(unsafe { &f(chunk) });
527 }
528
529 if !src_remainder.is_empty() {
531 let mut buf: [S; 4] = Default::default();
532 buf[..src_remainder.len()].copy_from_slice(src_remainder);
533 let vec = unsafe { f(&buf) };
534 let dst_remainder = dst_chunks.into_remainder();
535 dst_remainder.copy_from_slice(&vec[..dst_remainder.len()]);
536 }
537}
538
539#[inline]
556pub(crate) const fn f32_to_f16_fallback(value: f32) -> u16 {
557 let x: u32 = unsafe { mem::transmute::<f32, u32>(value) };
560
561 let sign = x & 0x8000_0000u32;
563 let exp = x & 0x7F80_0000u32;
564 let man = x & 0x007F_FFFFu32;
565
566 if exp == 0x7F80_0000u32 {
568 let nan_bit = if man == 0 { 0 } else { 0x0200u32 };
570 return ((sign >> 16) | 0x7C00u32 | nan_bit | (man >> 13)) as u16;
571 }
572
573 let half_sign = sign >> 16;
575 let unbiased_exp = ((exp >> 23) as i32) - 127;
577 let half_exp = unbiased_exp + 15;
578
579 if half_exp >= 0x1F {
581 return (half_sign | 0x7C00u32) as u16;
582 }
583
584 if half_exp <= 0 {
586 if 14 - half_exp > 24 {
588 return half_sign as u16;
590 }
591 let man = man | 0x0080_0000u32;
593 let mut half_man = man >> (14 - half_exp);
594 let round_bit = 1 << (13 - half_exp);
596 if (man & round_bit) != 0 && (man & (3 * round_bit - 1)) != 0 {
597 half_man += 1;
598 }
599 return (half_sign | half_man) as u16;
601 }
602
603 let half_exp = (half_exp as u32) << 10;
605 let half_man = man >> 13;
606 let round_bit = 0x0000_1000u32;
608 if (man & round_bit) != 0 && (man & (3 * round_bit - 1)) != 0 {
609 ((half_sign | half_exp | half_man) + 1) as u16
611 } else {
612 (half_sign | half_exp | half_man) as u16
613 }
614}
615
616#[inline]
617pub(crate) const fn f64_to_f16_fallback(value: f64) -> u16 {
618 let val: u64 = unsafe { mem::transmute::<f64, u64>(value) };
622 let x = (val >> 32) as u32;
623
624 let sign = x & 0x8000_0000u32;
626 let exp = x & 0x7FF0_0000u32;
627 let man = x & 0x000F_FFFFu32;
628
629 if exp == 0x7FF0_0000u32 {
631 let nan_bit = if man == 0 && (val as u32 == 0) {
634 0
635 } else {
636 0x0200u32
637 };
638 return ((sign >> 16) | 0x7C00u32 | nan_bit | (man >> 10)) as u16;
639 }
640
641 let half_sign = sign >> 16;
643 let unbiased_exp = ((exp >> 20) as i64) - 1023;
645 let half_exp = unbiased_exp + 15;
646
647 if half_exp >= 0x1F {
649 return (half_sign | 0x7C00u32) as u16;
650 }
651
652 if half_exp <= 0 {
654 if 10 - half_exp > 21 {
656 return half_sign as u16;
658 }
659 let man = man | 0x0010_0000u32;
661 let mut half_man = man >> (11 - half_exp);
662 let round_bit = 1 << (10 - half_exp);
664 if (man & round_bit) != 0 && (man & (3 * round_bit - 1)) != 0 {
665 half_man += 1;
666 }
667 return (half_sign | half_man) as u16;
669 }
670
671 let half_exp = (half_exp as u32) << 10;
673 let half_man = man >> 10;
674 let round_bit = 0x0000_0200u32;
676 if (man & round_bit) != 0 && (man & (3 * round_bit - 1)) != 0 {
677 ((half_sign | half_exp | half_man) + 1) as u16
679 } else {
680 (half_sign | half_exp | half_man) as u16
681 }
682}
683
684#[inline]
685pub(crate) const fn f16_to_f32_fallback(i: u16) -> f32 {
686 if i & 0x7FFFu16 == 0 {
689 return unsafe { mem::transmute::<u32, f32>((i as u32) << 16) };
690 }
691
692 let half_sign = (i & 0x8000u16) as u32;
693 let half_exp = (i & 0x7C00u16) as u32;
694 let half_man = (i & 0x03FFu16) as u32;
695
696 if half_exp == 0x7C00u32 {
698 if half_man == 0 {
700 return unsafe { mem::transmute::<u32, f32>((half_sign << 16) | 0x7F80_0000u32) };
701 } else {
702 return unsafe {
704 mem::transmute::<u32, f32>((half_sign << 16) | 0x7FC0_0000u32 | (half_man << 13))
705 };
706 }
707 }
708
709 let sign = half_sign << 16;
711 let unbiased_exp = ((half_exp as i32) >> 10) - 15;
713
714 if half_exp == 0 {
716 let e = leading_zeros_u16(half_man as u16) - 6;
718
719 let exp = (127 - 15 - e) << 23;
721 let man = (half_man << (14 + e)) & 0x7F_FF_FFu32;
722 return unsafe { mem::transmute::<u32, f32>(sign | exp | man) };
723 }
724
725 let exp = ((unbiased_exp + 127) as u32) << 23;
727 let man = (half_man & 0x03FFu32) << 13;
728 unsafe { mem::transmute::<u32, f32>(sign | exp | man) }
729}
730
731#[inline]
732pub(crate) const fn f16_to_f64_fallback(i: u16) -> f64 {
733 if i & 0x7FFFu16 == 0 {
736 return unsafe { mem::transmute::<u64, f64>((i as u64) << 48) };
737 }
738
739 let half_sign = (i & 0x8000u16) as u64;
740 let half_exp = (i & 0x7C00u16) as u64;
741 let half_man = (i & 0x03FFu16) as u64;
742
743 if half_exp == 0x7C00u64 {
745 if half_man == 0 {
747 return unsafe {
748 mem::transmute::<u64, f64>((half_sign << 48) | 0x7FF0_0000_0000_0000u64)
749 };
750 } else {
751 return unsafe {
753 mem::transmute::<u64, f64>(
754 (half_sign << 48) | 0x7FF8_0000_0000_0000u64 | (half_man << 42),
755 )
756 };
757 }
758 }
759
760 let sign = half_sign << 48;
762 let unbiased_exp = ((half_exp as i64) >> 10) - 15;
764
765 if half_exp == 0 {
767 let e = leading_zeros_u16(half_man as u16) - 6;
769
770 let exp = ((1023 - 15 - e) as u64) << 52;
772 let man = (half_man << (43 + e)) & 0xF_FFFF_FFFF_FFFFu64;
773 return unsafe { mem::transmute::<u64, f64>(sign | exp | man) };
774 }
775
776 let exp = ((unbiased_exp + 1023) as u64) << 52;
778 let man = (half_man & 0x03FFu64) << 42;
779 unsafe { mem::transmute::<u64, f64>(sign | exp | man) }
780}
781
782#[inline]
783fn f16x4_to_f32x4_fallback(v: &[u16; 4]) -> [f32; 4] {
784 [
785 f16_to_f32_fallback(v[0]),
786 f16_to_f32_fallback(v[1]),
787 f16_to_f32_fallback(v[2]),
788 f16_to_f32_fallback(v[3]),
789 ]
790}
791
792#[inline]
793fn f32x4_to_f16x4_fallback(v: &[f32; 4]) -> [u16; 4] {
794 [
795 f32_to_f16_fallback(v[0]),
796 f32_to_f16_fallback(v[1]),
797 f32_to_f16_fallback(v[2]),
798 f32_to_f16_fallback(v[3]),
799 ]
800}
801
802#[inline]
803fn f16x4_to_f64x4_fallback(v: &[u16; 4]) -> [f64; 4] {
804 [
805 f16_to_f64_fallback(v[0]),
806 f16_to_f64_fallback(v[1]),
807 f16_to_f64_fallback(v[2]),
808 f16_to_f64_fallback(v[3]),
809 ]
810}
811
812#[inline]
813fn f64x4_to_f16x4_fallback(v: &[f64; 4]) -> [u16; 4] {
814 [
815 f64_to_f16_fallback(v[0]),
816 f64_to_f16_fallback(v[1]),
817 f64_to_f16_fallback(v[2]),
818 f64_to_f16_fallback(v[3]),
819 ]
820}
821
822#[inline]
823fn f16x8_to_f32x8_fallback(v: &[u16; 8]) -> [f32; 8] {
824 [
825 f16_to_f32_fallback(v[0]),
826 f16_to_f32_fallback(v[1]),
827 f16_to_f32_fallback(v[2]),
828 f16_to_f32_fallback(v[3]),
829 f16_to_f32_fallback(v[4]),
830 f16_to_f32_fallback(v[5]),
831 f16_to_f32_fallback(v[6]),
832 f16_to_f32_fallback(v[7]),
833 ]
834}
835
836#[inline]
837fn f32x8_to_f16x8_fallback(v: &[f32; 8]) -> [u16; 8] {
838 [
839 f32_to_f16_fallback(v[0]),
840 f32_to_f16_fallback(v[1]),
841 f32_to_f16_fallback(v[2]),
842 f32_to_f16_fallback(v[3]),
843 f32_to_f16_fallback(v[4]),
844 f32_to_f16_fallback(v[5]),
845 f32_to_f16_fallback(v[6]),
846 f32_to_f16_fallback(v[7]),
847 ]
848}
849
850#[inline]
851fn f16x8_to_f64x8_fallback(v: &[u16; 8]) -> [f64; 8] {
852 [
853 f16_to_f64_fallback(v[0]),
854 f16_to_f64_fallback(v[1]),
855 f16_to_f64_fallback(v[2]),
856 f16_to_f64_fallback(v[3]),
857 f16_to_f64_fallback(v[4]),
858 f16_to_f64_fallback(v[5]),
859 f16_to_f64_fallback(v[6]),
860 f16_to_f64_fallback(v[7]),
861 ]
862}
863
864#[inline]
865fn f64x8_to_f16x8_fallback(v: &[f64; 8]) -> [u16; 8] {
866 [
867 f64_to_f16_fallback(v[0]),
868 f64_to_f16_fallback(v[1]),
869 f64_to_f16_fallback(v[2]),
870 f64_to_f16_fallback(v[3]),
871 f64_to_f16_fallback(v[4]),
872 f64_to_f16_fallback(v[5]),
873 f64_to_f16_fallback(v[6]),
874 f64_to_f16_fallback(v[7]),
875 ]
876}
877
878#[inline]
879fn slice_fallback<S: Copy, D>(src: &[S], dst: &mut [D], f: fn(S) -> D) {
880 assert_eq!(src.len(), dst.len());
881 for (s, d) in src.iter().copied().zip(dst.iter_mut()) {
882 *d = f(s);
883 }
884}
885
886#[inline]
887fn add_f16_fallback(a: u16, b: u16) -> u16 {
888 f32_to_f16(f16_to_f32(a) + f16_to_f32(b))
889}
890
891#[inline]
892fn subtract_f16_fallback(a: u16, b: u16) -> u16 {
893 f32_to_f16(f16_to_f32(a) - f16_to_f32(b))
894}
895
896#[inline]
897fn multiply_f16_fallback(a: u16, b: u16) -> u16 {
898 f32_to_f16(f16_to_f32(a) * f16_to_f32(b))
899}
900
901#[inline]
902fn divide_f16_fallback(a: u16, b: u16) -> u16 {
903 f32_to_f16(f16_to_f32(a) / f16_to_f32(b))
904}
905
906#[inline]
907fn remainder_f16_fallback(a: u16, b: u16) -> u16 {
908 f32_to_f16(f16_to_f32(a) % f16_to_f32(b))
909}
910
911#[inline]
912fn product_f16_fallback<I: Iterator<Item = u16>>(iter: I) -> u16 {
913 f32_to_f16(iter.map(f16_to_f32).product())
914}
915
916#[inline]
917fn sum_f16_fallback<I: Iterator<Item = u16>>(iter: I) -> u16 {
918 f32_to_f16(iter.map(f16_to_f32).sum())
919}
920
921