rav1e/transform/
forward.rs1use crate::cpu_features::CpuFeatureLevel;
11use crate::util::*;
12
13use super::TxType;
14
15cfg_if::cfg_if! {
16 if #[cfg(nasm_x86_64)] {
17 pub use crate::asm::x86::transform::forward::*;
18 } else if #[cfg(asm_neon)] {
19 pub use crate::asm::aarch64::transform::forward::*;
20 } else {
21 pub use self::rust::*;
22 }
23}
24
25pub mod rust {
26 use super::*;
27 use std::mem::MaybeUninit;
28
29 use crate::transform::forward_shared::*;
30 use crate::transform::{av1_round_shift_array, valid_av1_transform, TxSize};
31 use simd_helpers::cold_for_target_arch;
32
33 type TxfmFunc = fn(&mut [i32]);
34
35 impl_1d_tx!();
36
37 impl TxOperations for i32 {
38 fn zero() -> Self {
39 0
40 }
41
42 fn tx_mul<const SHIFT: i32>(self, mul: i32) -> Self {
43 ((self * mul) + (1 << SHIFT >> 1)) >> SHIFT
44 }
45
46 fn rshift1(self) -> Self {
47 (self + i32::from(self < 0)) >> 1
48 }
49
50 fn add(self, b: Self) -> Self {
51 self + b
52 }
53
54 fn sub(self, b: Self) -> Self {
55 self - b
56 }
57
58 fn add_avg(self, b: Self) -> Self {
59 (self + b) >> 1
60 }
61
62 fn sub_avg(self, b: Self) -> Self {
63 (self - b) >> 1
64 }
65 }
66
67 #[cold_for_target_arch("x86_64")]
71 pub fn forward_transform<T: Coefficient>(
72 input: &[i16], output: &mut [MaybeUninit<T>], stride: usize,
73 tx_size: TxSize, tx_type: TxType, bd: usize, _cpu: CpuFeatureLevel,
74 ) {
75 assert!(valid_av1_transform(tx_size, tx_type));
76
77 let txfm_size_col = tx_size.width();
84 let txfm_size_row = tx_size.height();
85
86 let mut buf = Aligned::<[MaybeUninit<i32>; 64 * 64]>::uninit_array();
87 let buf = &mut buf.data[..txfm_size_col * txfm_size_row];
88
89 let cfg = Txfm2DFlipCfg::fwd(tx_type, tx_size, bd);
90
91 let txfm_func_col = get_func(cfg.txfm_type_col);
92 let txfm_func_row = get_func(cfg.txfm_type_row);
93
94 for c in 0..txfm_size_col {
96 let mut col_coeffs = Aligned::<[MaybeUninit<i32>; 64]>::uninit_array();
97 let col_coeffs = &mut col_coeffs.data[..txfm_size_row];
98 if cfg.ud_flip {
99 for r in 0..txfm_size_row {
101 col_coeffs[r]
102 .write((input[(txfm_size_row - r - 1) * stride + c]).into());
103 }
104 } else {
105 for r in 0..txfm_size_row {
106 col_coeffs[r].write((input[r * stride + c]).into());
107 }
108 }
109 let col_coeffs = unsafe { slice_assume_init_mut(col_coeffs) };
111
112 av1_round_shift_array(col_coeffs, txfm_size_row, -cfg.shift[0]);
113 txfm_func_col(col_coeffs);
114 av1_round_shift_array(col_coeffs, txfm_size_row, -cfg.shift[1]);
115 if cfg.lr_flip {
116 for r in 0..txfm_size_row {
117 buf[r * txfm_size_col + (txfm_size_col - c - 1)]
119 .write(col_coeffs[r]);
120 }
121 } else {
122 for r in 0..txfm_size_row {
123 buf[r * txfm_size_col + c].write(col_coeffs[r]);
124 }
125 }
126 }
127 let buf = unsafe { slice_assume_init_mut(buf) };
129
130 for (r, row_coeffs) in buf.chunks_exact_mut(txfm_size_col).enumerate() {
132 txfm_func_row(row_coeffs);
133 av1_round_shift_array(row_coeffs, txfm_size_col, -cfg.shift[2]);
134
135 let output_stride = txfm_size_row.min(32);
146
147 let output = &mut output
149 [(r >= 32) as usize * output_stride * txfm_size_col.min(32)..];
150
151 for cg in (0..txfm_size_col).step_by(32) {
152 let output = &mut output[txfm_size_row * cg..];
154
155 for c in 0..txfm_size_col.min(32) {
156 output[c * output_stride + (r & 31)]
157 .write(T::cast_from(row_coeffs[c + cg]));
158 }
159 }
160 }
161 }
162}