rav1e/transform/
forward.rs

1// Copyright (c) 2018-2022, The rav1e contributors. All rights reserved
2//
3// This source code is subject to the terms of the BSD 2 Clause License and
4// the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
5// was not distributed with this source code in the LICENSE file, you can
6// obtain it at www.aomedia.org/license/software. If the Alliance for Open
7// Media Patent License 1.0 was not distributed with this source code in the
8// PATENTS file, you can obtain it at www.aomedia.org/license/patent.
9
10use crate::cpu_features::CpuFeatureLevel;
11use crate::util::*;
12
13use super::TxType;
14
15cfg_if::cfg_if! {
16  if #[cfg(nasm_x86_64)] {
17    pub use crate::asm::x86::transform::forward::*;
18  } else if #[cfg(asm_neon)] {
19    pub use crate::asm::aarch64::transform::forward::*;
20  } else {
21    pub use self::rust::*;
22  }
23}
24
25pub mod rust {
26  use super::*;
27  use std::mem::MaybeUninit;
28
29  use crate::transform::forward_shared::*;
30  use crate::transform::{av1_round_shift_array, valid_av1_transform, TxSize};
31  use simd_helpers::cold_for_target_arch;
32
33  type TxfmFunc = fn(&mut [i32]);
34
35  impl_1d_tx!();
36
37  impl TxOperations for i32 {
38    fn zero() -> Self {
39      0
40    }
41
42    fn tx_mul<const SHIFT: i32>(self, mul: i32) -> Self {
43      ((self * mul) + (1 << SHIFT >> 1)) >> SHIFT
44    }
45
46    fn rshift1(self) -> Self {
47      (self + i32::from(self < 0)) >> 1
48    }
49
50    fn add(self, b: Self) -> Self {
51      self + b
52    }
53
54    fn sub(self, b: Self) -> Self {
55      self - b
56    }
57
58    fn add_avg(self, b: Self) -> Self {
59      (self + b) >> 1
60    }
61
62    fn sub_avg(self, b: Self) -> Self {
63      (self - b) >> 1
64    }
65  }
66
67  /// # Panics
68  ///
69  /// - If called with an invalid combination of `tx_size` and `tx_type`
70  #[cold_for_target_arch("x86_64")]
71  pub fn forward_transform<T: Coefficient>(
72    input: &[i16], output: &mut [MaybeUninit<T>], stride: usize,
73    tx_size: TxSize, tx_type: TxType, bd: usize, _cpu: CpuFeatureLevel,
74  ) {
75    assert!(valid_av1_transform(tx_size, tx_type));
76
77    // Note when assigning txfm_size_col, we use the txfm_size from the
78    // row configuration and vice versa. This is intentionally done to
79    // accurately perform rectangular transforms. When the transform is
80    // rectangular, the number of columns will be the same as the
81    // txfm_size stored in the row cfg struct. It will make no difference
82    // for square transforms.
83    let txfm_size_col = tx_size.width();
84    let txfm_size_row = tx_size.height();
85
86    let mut buf = Aligned::<[MaybeUninit<i32>; 64 * 64]>::uninit_array();
87    let buf = &mut buf.data[..txfm_size_col * txfm_size_row];
88
89    let cfg = Txfm2DFlipCfg::fwd(tx_type, tx_size, bd);
90
91    let txfm_func_col = get_func(cfg.txfm_type_col);
92    let txfm_func_row = get_func(cfg.txfm_type_row);
93
94    // Columns
95    for c in 0..txfm_size_col {
96      let mut col_coeffs = Aligned::<[MaybeUninit<i32>; 64]>::uninit_array();
97      let col_coeffs = &mut col_coeffs.data[..txfm_size_row];
98      if cfg.ud_flip {
99        // flip upside down
100        for r in 0..txfm_size_row {
101          col_coeffs[r]
102            .write((input[(txfm_size_row - r - 1) * stride + c]).into());
103        }
104      } else {
105        for r in 0..txfm_size_row {
106          col_coeffs[r].write((input[r * stride + c]).into());
107        }
108      }
109      // SAFETY: The loops above have initialized all txfm_size_row elements
110      let col_coeffs = unsafe { slice_assume_init_mut(col_coeffs) };
111
112      av1_round_shift_array(col_coeffs, txfm_size_row, -cfg.shift[0]);
113      txfm_func_col(col_coeffs);
114      av1_round_shift_array(col_coeffs, txfm_size_row, -cfg.shift[1]);
115      if cfg.lr_flip {
116        for r in 0..txfm_size_row {
117          // flip from left to right
118          buf[r * txfm_size_col + (txfm_size_col - c - 1)]
119            .write(col_coeffs[r]);
120        }
121      } else {
122        for r in 0..txfm_size_row {
123          buf[r * txfm_size_col + c].write(col_coeffs[r]);
124        }
125      }
126    }
127    // SAFETY: The loops above have initialized the entire buf
128    let buf = unsafe { slice_assume_init_mut(buf) };
129
130    // Rows
131    for (r, row_coeffs) in buf.chunks_exact_mut(txfm_size_col).enumerate() {
132      txfm_func_row(row_coeffs);
133      av1_round_shift_array(row_coeffs, txfm_size_col, -cfg.shift[2]);
134
135      // Store output in at most 32x32 chunks so that the first 32x32
136      // coefficients are stored first. When we don't have 64 rows, there is no
137      // change in order. With 64 rows, the chunks are in this order
138      //  - First 32 rows and first 32 cols
139      //  - Last 32 rows and first 32 cols
140      //  - First 32 rows and last 32 cols
141      //  - Last 32 rows and last 32 cols
142
143      // Output is grouped into 32x32 chunks so a stride of at most 32 is
144      // used for each chunk.
145      let output_stride = txfm_size_row.min(32);
146
147      // Split the first 32 rows from the last 32 rows
148      let output = &mut output
149        [(r >= 32) as usize * output_stride * txfm_size_col.min(32)..];
150
151      for cg in (0..txfm_size_col).step_by(32) {
152        // Split the first 32 cols from the last 32 cols
153        let output = &mut output[txfm_size_row * cg..];
154
155        for c in 0..txfm_size_col.min(32) {
156          output[c * output_stride + (r & 31)]
157            .write(T::cast_from(row_coeffs[c + cg]));
158        }
159      }
160    }
161  }
162}