1use super::TxSize;
11use super::TxType;
12
13use super::HTX_TAB;
14use super::VTX_TAB;
15
16pub type TxfmShift = [i8; 3];
17pub type TxfmShifts = [TxfmShift; 3];
18
19const FWD_SHIFT_4X4: TxfmShifts = [[3, 0, 0], [2, 0, 1], [0, 0, 3]];
23const FWD_SHIFT_8X8: TxfmShifts = [[4, -1, 0], [2, 0, 1], [0, 0, 3]];
24const FWD_SHIFT_16X16: TxfmShifts = [[4, -1, 0], [2, 0, 1], [0, 0, 3]];
25const FWD_SHIFT_32X32: TxfmShifts = [[4, -2, 0], [2, 0, 0], [0, 0, 2]];
26const FWD_SHIFT_64X64: TxfmShifts = [[4, -1, -2], [2, 0, -1], [0, 0, 1]];
27const FWD_SHIFT_4X8: TxfmShifts = [[4, -1, 0], [2, 0, 1], [0, 0, 3]];
28const FWD_SHIFT_8X4: TxfmShifts = [[4, -1, 0], [2, 0, 1], [0, 0, 3]];
29const FWD_SHIFT_8X16: TxfmShifts = [[4, -1, 0], [2, 0, 1], [0, 0, 3]];
30const FWD_SHIFT_16X8: TxfmShifts = [[4, -1, 0], [2, 0, 1], [0, 0, 3]];
31const FWD_SHIFT_16X32: TxfmShifts = [[4, -2, 0], [2, 0, 0], [0, 0, 2]];
32const FWD_SHIFT_32X16: TxfmShifts = [[4, -2, 0], [2, 0, 0], [0, 0, 2]];
33const FWD_SHIFT_32X64: TxfmShifts = [[4, -1, -2], [2, 0, -1], [0, 0, 1]];
34const FWD_SHIFT_64X32: TxfmShifts = [[4, -1, -2], [2, 0, -1], [0, 0, 1]];
35const FWD_SHIFT_4X16: TxfmShifts = [[4, -1, 0], [2, 0, 1], [0, 0, 3]];
36const FWD_SHIFT_16X4: TxfmShifts = [[4, -1, 0], [2, 0, 1], [0, 0, 3]];
37const FWD_SHIFT_8X32: TxfmShifts = [[4, -1, 0], [2, 0, 1], [0, 0, 3]];
38const FWD_SHIFT_32X8: TxfmShifts = [[4, -1, 0], [2, 0, 1], [0, 0, 3]];
39const FWD_SHIFT_16X64: TxfmShifts = [[4, -2, 0], [2, 0, 0], [0, 0, 2]];
40const FWD_SHIFT_64X16: TxfmShifts = [[4, -2, 0], [2, 0, 0], [0, 0, 2]];
41
42const FWD_SHIFT_4X4_WHT: TxfmShift = [0, 0, 2];
43
44pub const FWD_TXFM_SHIFT_LS: [TxfmShifts; TxSize::TX_SIZES_ALL] = [
45  FWD_SHIFT_4X4,
46  FWD_SHIFT_8X8,
47  FWD_SHIFT_16X16,
48  FWD_SHIFT_32X32,
49  FWD_SHIFT_64X64,
50  FWD_SHIFT_4X8,
51  FWD_SHIFT_8X4,
52  FWD_SHIFT_8X16,
53  FWD_SHIFT_16X8,
54  FWD_SHIFT_16X32,
55  FWD_SHIFT_32X16,
56  FWD_SHIFT_32X64,
57  FWD_SHIFT_64X32,
58  FWD_SHIFT_4X16,
59  FWD_SHIFT_16X4,
60  FWD_SHIFT_8X32,
61  FWD_SHIFT_32X8,
62  FWD_SHIFT_16X64,
63  FWD_SHIFT_64X16,
64];
65
66#[derive(Debug, Clone, Copy, PartialEq, Eq)]
67pub enum TxfmType {
68  DCT4,
69  DCT8,
70  DCT16,
71  DCT32,
72  DCT64,
73  ADST4,
74  ADST8,
75  ADST16,
76  Identity4,
77  Identity8,
78  Identity16,
79  Identity32,
80  WHT4,
81}
82
83impl TxfmType {
84  const TX_TYPES_1D: usize = 5;
85  const AV1_TXFM_TYPE_LS: [[Option<TxfmType>; Self::TX_TYPES_1D]; 5] = [
86    [
87      Some(TxfmType::DCT4),
88      Some(TxfmType::ADST4),
89      Some(TxfmType::ADST4),
90      Some(TxfmType::Identity4),
91      Some(TxfmType::WHT4),
92    ],
93    [
94      Some(TxfmType::DCT8),
95      Some(TxfmType::ADST8),
96      Some(TxfmType::ADST8),
97      Some(TxfmType::Identity8),
98      None,
99    ],
100    [
101      Some(TxfmType::DCT16),
102      Some(TxfmType::ADST16),
103      Some(TxfmType::ADST16),
104      Some(TxfmType::Identity16),
105      None,
106    ],
107    [Some(TxfmType::DCT32), None, None, Some(TxfmType::Identity32), None],
108    [Some(TxfmType::DCT64), None, None, None, None],
109  ];
110}
111
112#[derive(Debug, Clone, Copy)]
113pub struct Txfm2DFlipCfg {
114  pub tx_size: TxSize,
115  pub ud_flip: bool,
117  pub lr_flip: bool,
119  pub shift: TxfmShift,
120  pub txfm_type_col: TxfmType,
121  pub txfm_type_row: TxfmType,
122}
123
124impl Txfm2DFlipCfg {
125  pub fn fwd(tx_type: TxType, tx_size: TxSize, bd: usize) -> Self {
129    let tx_type_1d_col = VTX_TAB[tx_type as usize];
130    let tx_type_1d_row = HTX_TAB[tx_type as usize];
131    let txw_idx = tx_size.width_index();
132    let txh_idx = tx_size.height_index();
133    let txfm_type_col =
134      TxfmType::AV1_TXFM_TYPE_LS[txh_idx][tx_type_1d_col as usize].unwrap();
135    let txfm_type_row =
136      TxfmType::AV1_TXFM_TYPE_LS[txw_idx][tx_type_1d_row as usize].unwrap();
137    let (ud_flip, lr_flip) = Self::get_flip_cfg(tx_type);
138    let shift = if tx_type == TxType::WHT_WHT {
139      FWD_SHIFT_4X4_WHT
140    } else {
141      FWD_TXFM_SHIFT_LS[tx_size as usize][(bd - 8) / 2]
142    };
143
144    Txfm2DFlipCfg {
145      tx_size,
146      ud_flip,
147      lr_flip,
148      shift,
149      txfm_type_col,
150      txfm_type_row,
151    }
152  }
153
154  const fn get_flip_cfg(tx_type: TxType) -> (bool, bool) {
156    use self::TxType::*;
157    match tx_type {
158      DCT_DCT | ADST_DCT | DCT_ADST | ADST_ADST | IDTX | V_DCT | H_DCT
159      | V_ADST | H_ADST | WHT_WHT => (false, false),
160      FLIPADST_DCT | FLIPADST_ADST | V_FLIPADST => (true, false),
161      DCT_FLIPADST | ADST_FLIPADST | H_FLIPADST => (false, true),
162      FLIPADST_FLIPADST => (true, true),
163    }
164  }
165}
166
167macro_rules! store_coeffs {
168  ( $arr:expr, $( $x:expr ),* ) => {
169      {
170      let mut i: i32 = -1;
171      $(
172        i += 1;
173        $arr[i as usize] = $x;
174      )*
175    }
176  };
177}
178
179macro_rules! impl_1d_tx {
180() => {
181  impl_1d_tx! {allow(unused_attributes), }
182};
183
184($m:meta, $($s:ident),*) => {
185  pub trait TxOperations: Copy {
186    $($s)* fn zero() -> Self;
187
188    $($s)* fn tx_mul<const SHIFT: i32>(self, mul: i32) -> Self;
189    $($s)* fn rshift1(self) -> Self;
190    $($s)* fn add(self, b: Self) -> Self;
191    $($s)* fn sub(self, b: Self) -> Self;
192    $($s)* fn add_avg(self, b: Self) -> Self;
193    $($s)* fn sub_avg(self, b: Self) -> Self;
194
195    $($s)* fn copy_fn(self) -> Self {
196      self
197    }
198  }
199
200  #[inline]
201  fn get_func(t: TxfmType) -> TxfmFunc {
202    use self::TxfmType::*;
203    match t {
204      DCT4 => daala_fdct4,
205      DCT8 => daala_fdct8,
206      DCT16 => daala_fdct16,
207      DCT32 => daala_fdct32,
208      DCT64 => daala_fdct64,
209      ADST4 => daala_fdst_vii_4,
210      ADST8 => daala_fdst8,
211      ADST16 => daala_fdst16,
212      Identity4 => fidentity,
213      Identity8 => fidentity,
214      Identity16 => fidentity,
215      Identity32 => fidentity,
216      WHT4 => fwht4,
217    }
218  }
219
220  trait RotateKernelPi4<T: TxOperations> {
221  const ADD: $($s)* fn(T, T) -> T;
222  const SUB: $($s)* fn(T, T) -> T;
223
224  #[$m]
225  $($s)* fn kernel<const SHIFT0: i32, const SHIFT1: i32>(p0: T, p1: T, m: (i32, i32)) -> (T, T) {
226    let t = Self::ADD(p1, p0);
227    let (a, out0) = (p0.tx_mul::<SHIFT0>(m.0), t.tx_mul::<SHIFT1>(m.1));
228    let out1 = Self::SUB(a, out0);
229    (out0, out1)
230  }
231}
232
233struct RotatePi4Add;
234struct RotatePi4AddAvg;
235struct RotatePi4Sub;
236struct RotatePi4SubAvg;
237
238impl<T: TxOperations> RotateKernelPi4<T> for RotatePi4Add {
239  const ADD: $($s)* fn(T, T) -> T = T::add;
240  const SUB: $($s)* fn(T, T) -> T = T::sub;
241}
242
243impl<T: TxOperations> RotateKernelPi4<T> for RotatePi4AddAvg {
244  const ADD: $($s)* fn(T, T) -> T = T::add_avg;
245  const SUB: $($s)* fn(T, T) -> T = T::sub;
246}
247
248impl<T: TxOperations> RotateKernelPi4<T> for RotatePi4Sub {
249  const ADD: $($s)* fn(T, T) -> T = T::sub;
250  const SUB: $($s)* fn(T, T) -> T = T::add;
251}
252
253impl<T: TxOperations> RotateKernelPi4<T> for RotatePi4SubAvg {
254  const ADD: $($s)* fn(T, T) -> T = T::sub_avg;
255  const SUB: $($s)* fn(T, T) -> T = T::add;
256}
257
258trait RotateKernel<T: TxOperations> {
259  const ADD: $($s)* fn(T, T) -> T;
260  const SUB: $($s)* fn(T, T) -> T;
261  const SHIFT: $($s)* fn(T) -> T;
262
263  #[$m]
264  $($s)* fn half_kernel<const SHIFT0: i32, const SHIFT1: i32, const SHIFT2: i32>(
265    p0: (T, T), p1: T, m: (i32, i32, i32),
266  ) -> (T, T) {
267    let t = Self::ADD(p1, p0.0);
268    let (a, b, c) = (p0.1.tx_mul::<SHIFT0>(m.0), p1.tx_mul::<SHIFT1>(m.1), t.tx_mul::<SHIFT2>(m.2));
269    let out0 = b.add(c);
270    let shifted = Self::SHIFT(c);
271    let out1 = Self::SUB(a, shifted);
272    (out0, out1)
273  }
274
275  #[$m]
276  $($s)* fn kernel<const SHIFT0: i32, const SHIFT1: i32, const SHIFT2: i32>(p0: T, p1: T, m: (i32, i32, i32)) -> (T, T) {
277    Self::half_kernel::<SHIFT0, SHIFT1, SHIFT2>((p0, p0), p1, m)
278  }
279}
280
281trait RotateKernelNeg<T: TxOperations> {
282  const ADD: $($s)* fn(T, T) -> T;
283
284  #[$m]
285  $($s)* fn kernel<const SHIFT0: i32, const SHIFT1: i32, const SHIFT2: i32>(p0: T, p1: T, m: (i32, i32, i32)) -> (T, T) {
286    let t = Self::ADD(p0, p1);
287    let (a, b, c) = (p0.tx_mul::<SHIFT0>(m.0), p1.tx_mul::<SHIFT1>(m.1), t.tx_mul::<SHIFT2>(m.2));
288    let out0 = b.sub(c);
289    let out1 = c.sub(a);
290    (out0, out1)
291  }
292}
293
294struct RotateAdd;
295struct RotateAddAvg;
296struct RotateAddShift;
297struct RotateSub;
298struct RotateSubAvg;
299struct RotateSubShift;
300struct RotateNeg;
301struct RotateNegAvg;
302
303impl<T: TxOperations> RotateKernel<T> for RotateAdd {
304  const ADD: $($s)* fn(T, T) -> T = T::add;
305  const SUB: $($s)* fn(T, T) -> T = T::sub;
306  const SHIFT: $($s)* fn(T) -> T = T::copy_fn;
307}
308
309impl<T: TxOperations> RotateKernel<T> for RotateAddAvg {
310  const ADD: $($s)* fn(T, T) -> T = T::add_avg;
311  const SUB: $($s)* fn(T, T) -> T = T::sub;
312  const SHIFT: $($s)* fn(T) -> T = T::copy_fn;
313}
314
315impl<T: TxOperations> RotateKernel<T> for RotateAddShift {
316  const ADD: $($s)* fn(T, T) -> T = T::add;
317  const SUB: $($s)* fn(T, T) -> T = T::sub;
318  const SHIFT: $($s)* fn(T) -> T = T::rshift1;
319}
320
321impl<T: TxOperations> RotateKernel<T> for RotateSub {
322  const ADD: $($s)* fn(T, T) -> T = T::sub;
323  const SUB: $($s)* fn(T, T) -> T = T::add;
324  const SHIFT: $($s)* fn(T) -> T = T::copy_fn;
325}
326
327impl<T: TxOperations> RotateKernel<T> for RotateSubAvg {
328  const ADD: $($s)* fn(T, T) -> T = T::sub_avg;
329  const SUB: $($s)* fn(T, T) -> T = T::add;
330  const SHIFT: $($s)* fn(T) -> T = T::copy_fn;
331}
332
333impl<T: TxOperations> RotateKernel<T> for RotateSubShift {
334  const ADD: $($s)* fn(T, T) -> T = T::sub;
335  const SUB: $($s)* fn(T, T) -> T = T::add;
336  const SHIFT: $($s)* fn(T) -> T = T::rshift1;
337}
338
339impl<T: TxOperations> RotateKernelNeg<T> for RotateNeg {
340  const ADD: $($s)* fn(T, T) -> T = T::sub;
341}
342
343impl<T: TxOperations> RotateKernelNeg<T> for RotateNegAvg {
344  const ADD: $($s)* fn(T, T) -> T = T::sub_avg;
345}
346
347#[inline]
348#[$m]
349$($s)* fn butterfly_add<T: TxOperations>(p0: T, p1: T) -> ((T, T), T) {
350  let p0 = p0.add(p1);
351  let p0h = p0.rshift1();
352  let p1h = p1.sub(p0h);
353  ((p0h, p0), p1h)
354}
355
356#[inline]
357#[$m]
358$($s)* fn butterfly_sub<T: TxOperations>(p0: T, p1: T) -> ((T, T), T) {
359  let p0 = p0.sub(p1);
360  let p0h = p0.rshift1();
361  let p1h = p1.add(p0h);
362  ((p0h, p0), p1h)
363}
364
365#[inline]
366#[$m]
367$($s)* fn butterfly_neg<T: TxOperations>(p0: T, p1: T) -> (T, (T, T)) {
368  let p1 = p0.sub(p1);
369  let p1h = p1.rshift1();
370  let p0h = p0.sub(p1h);
371  (p0h, (p1h, p1))
372}
373
374#[inline]
375#[$m]
376$($s)* fn butterfly_add_asym<T: TxOperations>(p0: (T, T), p1h: T) -> (T, T) {
377  let p1 = p1h.add(p0.0);
378  let p0 = p0.1.sub(p1);
379  (p0, p1)
380}
381
382#[inline]
383#[$m]
384$($s)* fn butterfly_sub_asym<T: TxOperations>(p0: (T, T), p1h: T) -> (T, T) {
385  let p1 = p1h.sub(p0.0);
386  let p0 = p0.1.add(p1);
387  (p0, p1)
388}
389
390#[inline]
391#[$m]
392$($s)* fn butterfly_neg_asym<T: TxOperations>(p0h: T, p1: (T, T)) -> (T, T) {
393  let p0 = p0h.add(p1.0);
394  let p1 = p0.sub(p1.1);
395  (p0, p1)
396}
397
398#[$m]
399$($s)* fn daala_fdct_ii_2_asym<T: TxOperations>(p0h: T, p1: (T, T)) -> (T, T) {
400  butterfly_neg_asym(p0h, p1)
401}
402
403#[$m]
404$($s)* fn daala_fdst_iv_2_asym<T: TxOperations>(p0: (T, T), p1h: T) -> (T, T) {
405  RotateAdd::half_kernel::<9, 12, 13>(p0, p1h, (473, 3135, 4433))
409}
410
411#[$m]
412$($s)* fn daala_fdct_ii_4<T: TxOperations>(
413  q0: T, q1: T, q2: T, q3: T, output: &mut [T],
414) {
415  let (q0h, q3) = butterfly_neg(q0, q3);
417  let (q1, q2h) = butterfly_add(q1, q2);
418
419  let (q0, q1) = daala_fdct_ii_2_asym(q0h, q1);
421  let (q3, q2) = daala_fdst_iv_2_asym(q3, q2h);
422
423  store_coeffs!(output, q0, q1, q2, q3);
424}
425
426#[$m]
427$($s)* fn daala_fdct4<T: TxOperations>(coeffs: &mut [T]) {
428  assert!(coeffs.len() >= 4);
429  let mut temp_out: [T; 4] = [T::zero(); 4];
430  daala_fdct_ii_4(coeffs[0], coeffs[1], coeffs[2], coeffs[3], &mut temp_out);
431
432  coeffs[0] = temp_out[0];
433  coeffs[1] = temp_out[2];
434  coeffs[2] = temp_out[1];
435  coeffs[3] = temp_out[3];
436}
437
438#[$m]
439$($s)* fn daala_fdst_vii_4<T: TxOperations>(coeffs: &mut [T]) {
440  assert!(coeffs.len() >= 4);
441
442  let q0 = coeffs[0];
443  let q1 = coeffs[1];
444  let q2 = coeffs[2];
445  let q3 = coeffs[3];
446  let t0 = q1.add(q3);
447  let t1 = q1.add(q0.sub_avg(t0));
449  let t2 = q0.sub(q1);
450  let t3 = q2;
451  let t4 = q0.add(q3);
452  let t0 = t0.tx_mul::<14>(7021);
454  let t1 = t1.tx_mul::<15>(37837);
456  let t2 = t2.tx_mul::<15>(21513);
458  let t3 = t3.tx_mul::<15>(37837);
460  let t4 = t4.tx_mul::<11>(467);
462  let t3h = t3.rshift1();
463  let u4 = t4.add(t3h);
464  coeffs[0] = t0.add(u4);
465  coeffs[1] = t1;
466  coeffs[2] = t0.add(t2.sub(t3h));
467  coeffs[3] = t2.add(t3.sub(u4));
468}
469
470#[$m]
471$($s)* fn daala_fdct_ii_2<T: TxOperations>(p0: T, p1: T) -> (T, T) {
472  let (p1, p0) = RotatePi4SubAvg::kernel::<13, 13>(p1, p0, (11585, 11585));
475  (p0, p1)
476}
477
478#[$m]
479$($s)* fn daala_fdst_iv_2<T: TxOperations>(p0: T, p1: T) -> (T, T) {
480  RotateAddAvg::kernel::<13, 14, 12>(p0, p1, (10703, 8867, 3135))
484}
485
486#[$m]
487$($s)* fn daala_fdct_ii_4_asym<T: TxOperations>(
488  q0h: T, q1: (T, T), q2h: T, q3: (T, T), output: &mut [T],
489) {
490  let (q0, q3) = butterfly_neg_asym(q0h, q3);
492  let (q1, q2) = butterfly_sub_asym(q1, q2h);
493
494  let (q0, q1) = daala_fdct_ii_2(q0, q1);
496  let (q3, q2) = daala_fdst_iv_2(q3, q2);
497
498  store_coeffs!(output, q0, q1, q2, q3);
499}
500
501#[$m]
502$($s)* fn daala_fdst_iv_4_asym<T: TxOperations>(
503  q0: (T, T), q1h: T, q2: (T, T), q3h: T, output: &mut [T],
504) {
505  let (q0, q3) = RotateAddShift::half_kernel::<14, 13, 15>(
510    q0,
511    q3h,
512    (9633, 12873, 12785),
513  );
514  let (q2, q1) = RotateSubShift::half_kernel::<14, 15, 12>(
518    q2,
519    q1h,
520    (11363, 18081, 4551),
521  );
522
523  let (q2, q3) = butterfly_sub_asym((q2.rshift1(), q2), q3);
525  let (q0, q1) = butterfly_sub_asym((q0.rshift1(), q0), q1);
526
527  let (q2, q1) = RotatePi4AddAvg::kernel::<13, 13>(q2, q1, (11585, 11585));
531
532  store_coeffs!(output, q0, q1, q2, q3);
533}
534
535#[$m]
536$($s)* fn daala_fdct_ii_8<T: TxOperations>(
537  r0: T, r1: T, r2: T, r3: T, r4: T, r5: T, r6: T, r7: T, output: &mut [T],
538) {
539  let (r0h, r7) = butterfly_neg(r0, r7);
541  let (r1, r6h) = butterfly_add(r1, r6);
542  let (r2h, r5) = butterfly_neg(r2, r5);
543  let (r3, r4h) = butterfly_add(r3, r4);
544
545  daala_fdct_ii_4_asym(r0h, r1, r2h, r3, &mut output[0..4]);
547  daala_fdst_iv_4_asym(r7, r6h, r5, r4h, &mut output[4..8]);
548  output[4..8].reverse();
549}
550
551#[$m]
552$($s)* fn daala_fdct8<T: TxOperations>(coeffs: &mut [T]) {
553  assert!(coeffs.len() >= 8);
554  let mut temp_out: [T; 8] = [T::zero(); 8];
555  daala_fdct_ii_8(
556    coeffs[0],
557    coeffs[1],
558    coeffs[2],
559    coeffs[3],
560    coeffs[4],
561    coeffs[5],
562    coeffs[6],
563    coeffs[7],
564    &mut temp_out,
565  );
566
567  coeffs[0] = temp_out[0];
568  coeffs[1] = temp_out[4];
569  coeffs[2] = temp_out[2];
570  coeffs[3] = temp_out[6];
571  coeffs[4] = temp_out[1];
572  coeffs[5] = temp_out[5];
573  coeffs[6] = temp_out[3];
574  coeffs[7] = temp_out[7];
575}
576
577#[$m]
578$($s)* fn daala_fdst_iv_8<T: TxOperations>(
579  r0: T, r1: T, r2: T, r3: T, r4: T, r5: T, r6: T, r7: T, output: &mut [T],
580) {
581  let (r0, r7) =
586    RotateAdd::kernel::<14, 14, 13>(r0, r7, (17911, 14699, 803));
587  let (r6, r1) =
591    RotateSub::kernel::<14, 15, 12>(r6, r1, (20435, 21845, 1189));
592  let (r2, r5) =
596    RotateAdd::kernel::<14, 13, 15>(r2, r5, (22173, 3363, 15447));
597  let (r4, r3) =
601    RotateSub::kernel::<14, 14, 13>(r4, r3, (23059, 2271, 5197));
602
603  let (r0, r3h) = butterfly_add(r0, r3);
605  let (r2, r1h) = butterfly_sub(r2, r1);
606  let (r5, r6h) = butterfly_add(r5, r6);
607  let (r7, r4h) = butterfly_sub(r7, r4);
608
609  let (r7, r6) = butterfly_add_asym(r7, r6h);
611  let (r5, r3) = butterfly_add_asym(r5, r3h);
612  let (r2, r4) = butterfly_add_asym(r2, r4h);
613  let (r0, r1) = butterfly_sub_asym(r0, r1h);
614
615  let (r3, r4) =
620    RotateSubAvg::kernel::<13, 14, 12>(r3, r4, (10703, 8867, 3135));
621  let (r2, r5) =
625    RotateNegAvg::kernel::<13, 14, 12>(r2, r5, (10703, 8867, 3135));
626  let (r1, r6) = RotatePi4SubAvg::kernel::<13, 13>(r1, r6, (11585, 11585));
629
630  store_coeffs!(output, r0, r1, r2, r3, r4, r5, r6, r7);
631}
632
633#[$m]
634$($s)* fn daala_fdst8<T: TxOperations>(coeffs: &mut [T]) {
635  assert!(coeffs.len() >= 8);
636  let mut temp_out: [T; 8] = [T::zero(); 8];
637  daala_fdst_iv_8(
638    coeffs[0],
639    coeffs[1],
640    coeffs[2],
641    coeffs[3],
642    coeffs[4],
643    coeffs[5],
644    coeffs[6],
645    coeffs[7],
646    &mut temp_out,
647  );
648
649  coeffs[0] = temp_out[0];
650  coeffs[1] = temp_out[4];
651  coeffs[2] = temp_out[2];
652  coeffs[3] = temp_out[6];
653  coeffs[4] = temp_out[1];
654  coeffs[5] = temp_out[5];
655  coeffs[6] = temp_out[3];
656  coeffs[7] = temp_out[7];
657}
658
659#[$m]
660$($s)* fn daala_fdst_iv_4<T: TxOperations>(
661  q0: T, q1: T, q2: T, q3: T, output: &mut [T],
662) {
663  let (q0, q3) =
668    RotateAddShift::kernel::<14, 12, 11>(q0, q3, (13623, 4551, 565));
669  let (q2, q1) =
673    RotateSubShift::kernel::<14, 15, 11>(q2, q1, (16069, 12785, 1609));
674
675  let (q2, q3) = butterfly_sub_asym((q2.rshift1(), q2), q3);
677  let (q0, q1) = butterfly_sub_asym((q0.rshift1(), q0), q1);
678
679  let (q2, q1) = RotatePi4AddAvg::kernel::<13, 13>(q2, q1, (11585, 11585));
683
684  store_coeffs!(output, q0, q1, q2, q3);
685}
686
687
688#[$m]
689$($s)* fn daala_fdct_ii_8_asym<T: TxOperations>(
690  r0h: T, r1: (T, T), r2h: T, r3: (T, T), r4h: T, r5: (T, T), r6h: T,
691  r7: (T, T), output: &mut [T],
692) {
693  let (r0, r7) = butterfly_neg_asym(r0h, r7);
695  let (r1, r6) = butterfly_sub_asym(r1, r6h);
696  let (r2, r5) = butterfly_neg_asym(r2h, r5);
697  let (r3, r4) = butterfly_sub_asym(r3, r4h);
698
699  daala_fdct_ii_4(r0, r1, r2, r3, &mut output[0..4]);
701  daala_fdst_iv_4(r7, r6, r5, r4, &mut output[4..8]);
702  output[4..8].reverse();
703}
704
705#[$m]
706$($s)* fn daala_fdst_iv_8_asym<T: TxOperations>(
707  r0: (T, T), r1h: T, r2: (T, T), r3h: T, r4: (T, T), r5h: T, r6: (T, T),
708  r7h: T, output: &mut [T],
709) {
710  let (r0, r7) =
715    RotateAdd::half_kernel::<14, 12, 14>(r0, r7h, (12665, 5197, 2271));
716  let (r6, r1) =
720    RotateSub::half_kernel::<14, 15, 13>(r6, r1h, (14449, 30893, 3363));
721  let (r2, r5) =
725    RotateAdd::half_kernel::<14, 11, 13>(r2, r5h, (15679, 1189, 5461));
726  let (r4, r3) =
730    RotateSub::half_kernel::<14, 12, 14>(r4, r3h, (16305, 803, 14699));
731
732  let (r0, r3h) = butterfly_add(r0, r3);
734  let (r2, r1h) = butterfly_sub(r2, r1);
735  let (r5, r6h) = butterfly_add(r5, r6);
736  let (r7, r4h) = butterfly_sub(r7, r4);
737
738  let (r7, r6) = butterfly_add_asym(r7, r6h);
740  let (r5, r3) = butterfly_add_asym(r5, r3h);
741  let (r2, r4) = butterfly_add_asym(r2, r4h);
742  let (r0, r1) = butterfly_sub_asym(r0, r1h);
743
744  let (r3, r4) =
749    RotateSubAvg::kernel::<9, 14, 12>(r3, r4, (669, 8867, 3135));
750  let (r2, r5) =
754    RotateNegAvg::kernel::<9, 14, 12>(r2, r5, (669, 8867, 3135));
755  let (r1, r6) = RotatePi4SubAvg::kernel::<12, 13>(r1, r6, (5793, 11585));
758
759  store_coeffs!(output, r0, r1, r2, r3, r4, r5, r6, r7);
760}
761
762#[$m]
763$($s)* fn daala_fdct_ii_16<T: TxOperations>(
764  s0: T, s1: T, s2: T, s3: T, s4: T, s5: T, s6: T, s7: T, s8: T, s9: T, sa: T,
765  sb: T, sc: T, sd: T, se: T, sf: T, output: &mut [T],
766) {
767  let (s0h, sf) = butterfly_neg(s0, sf);
769  let (s1, seh) = butterfly_add(s1, se);
770  let (s2h, sd) = butterfly_neg(s2, sd);
771  let (s3, sch) = butterfly_add(s3, sc);
772  let (s4h, sb) = butterfly_neg(s4, sb);
773  let (s5, sah) = butterfly_add(s5, sa);
774  let (s6h, s9) = butterfly_neg(s6, s9);
775  let (s7, s8h) = butterfly_add(s7, s8);
776
777  daala_fdct_ii_8_asym(s0h, s1, s2h, s3, s4h, s5, s6h, s7, &mut output[0..8]);
779  daala_fdst_iv_8_asym(sf, seh, sd, sch, sb, sah, s9, s8h, &mut output[8..16]);
780  output[8..16].reverse();
781}
782
783#[$m]
784$($s)* fn daala_fdct16<T: TxOperations>(coeffs: &mut [T]) {
785  assert!(coeffs.len() >= 16);
786  let mut temp_out: [T; 16] = [T::zero(); 16];
787  daala_fdct_ii_16(
788    coeffs[0],
789    coeffs[1],
790    coeffs[2],
791    coeffs[3],
792    coeffs[4],
793    coeffs[5],
794    coeffs[6],
795    coeffs[7],
796    coeffs[8],
797    coeffs[9],
798    coeffs[10],
799    coeffs[11],
800    coeffs[12],
801    coeffs[13],
802    coeffs[14],
803    coeffs[15],
804    &mut temp_out,
805  );
806
807  coeffs[0] = temp_out[0];
808  coeffs[1] = temp_out[8];
809  coeffs[2] = temp_out[4];
810  coeffs[3] = temp_out[12];
811  coeffs[4] = temp_out[2];
812  coeffs[5] = temp_out[10];
813  coeffs[6] = temp_out[6];
814  coeffs[7] = temp_out[14];
815  coeffs[8] = temp_out[1];
816  coeffs[9] = temp_out[9];
817  coeffs[10] = temp_out[5];
818  coeffs[11] = temp_out[13];
819  coeffs[12] = temp_out[3];
820  coeffs[13] = temp_out[11];
821  coeffs[14] = temp_out[7];
822  coeffs[15] = temp_out[15];
823}
824
825#[$m]
826$($s)* fn daala_fdst_iv_16<T: TxOperations>(
827  s0: T, s1: T, s2: T, s3: T, s4: T, s5: T, s6: T, s7: T, s8: T, s9: T, sa: T,
828  sb: T, sc: T, sd: T, se: T, sf: T, output: &mut [T],
829) {
830  let (s0, sf) =
835    RotateAddShift::kernel::<15, 13, 14>(s0, sf, (24279, 11003, 1137));
836  let (se, s1) =
840    RotateSubShift::kernel::<11, 8, 11>(se, s1, (1645, 305, 425));
841  let (s2, sd) =
845    RotateAddShift::kernel::<14, 13, 13>(s2, sd, (14053, 8423, 2815));
846  let (sc, s3) =
850    RotateSubShift::kernel::<14, 13, 13>(sc, s3, (14811, 7005, 3903));
851  let (s4, sb) =
855    RotateAddShift::kernel::<15, 14, 14>(s4, sb, (30853, 11039, 9907));
856  let (sa, s5) =
860    RotateSubShift::kernel::<14, 13, 11>(sa, s5, (15893, 3981, 1489));
861  let (s6, s9) =
865    RotateAddShift::kernel::<15, 11, 14>(s6, s9, (32413, 601, 13803));
866  let (s8, s7) =
870    RotateSubShift::kernel::<15, 11, 11>(s8, s7, (32729, 201, 1945));
871
872  let (s0, s7) = butterfly_sub_asym((s0.rshift1(), s0), s7);
874  let (s8, sf) = butterfly_sub_asym((s8.rshift1(), s8), sf);
875  let (s4, s3) = butterfly_add_asym((s4.rshift1(), s4), s3);
876  let (sc, sb) = butterfly_add_asym((sc.rshift1(), sc), sb);
877  let (s2, s5) = butterfly_sub_asym((s2.rshift1(), s2), s5);
878  let (sa, sd) = butterfly_sub_asym((sa.rshift1(), sa), sd);
879  let (s6, s1) = butterfly_add_asym((s6.rshift1(), s6), s1);
880  let (se, s9) = butterfly_add_asym((se.rshift1(), se), s9);
881
882  let ((_s8h, s8), s4h) = butterfly_add(s8, s4);
884  let ((_s7h, s7), sbh) = butterfly_add(s7, sb);
885  let ((_sah, sa), s6h) = butterfly_sub(sa, s6);
886  let ((_s5h, s5), s9h) = butterfly_sub(s5, s9);
887  let (s0, s3h) = butterfly_add(s0, s3);
888  let (sd, seh) = butterfly_add(sd, se);
889  let (s2, s1h) = butterfly_sub(s2, s1);
890  let (sf, sch) = butterfly_sub(sf, sc);
891
892  let (s8, s7) =
897    RotateAddAvg::kernel::<8, 11, 15>(s8, s7, (301, 1609, 12785));
898  let (s9, s6) =
902    RotateAdd::kernel::<13, 15, 13>(s9h, s6h, (11363, 9041, 4551));
903  let (s5, sa) =
907    RotateNegAvg::kernel::<12, 15, 12>(s5, sa, (5681, 9041, 4551));
908  let (s4, sb) =
912    RotateNeg::kernel::<13, 14, 15>(s4h, sbh, (9633, 12873, 6393));
913
914  let (s2, sc) = butterfly_add_asym(s2, sch);
916  let (s0, s1) = butterfly_sub_asym(s0, s1h);
917  let (sf, se) = butterfly_add_asym(sf, seh);
918  let (sd, s3) = butterfly_add_asym(sd, s3h);
919  let (s7, s6) = butterfly_add_asym((s7.rshift1(), s7), s6);
920  let (s8, s9) = butterfly_sub_asym((s8.rshift1(), s8), s9);
921  let (sa, sb) = butterfly_sub_asym((sa.rshift1(), sa), sb);
922  let (s5, s4) = butterfly_add_asym((s5.rshift1(), s5), s4);
923
924  let (sc, s3) =
929    RotateAddAvg::kernel::<9, 14, 12>(sc, s3, (669, 8867, 3135));
930  let (s2, sd) =
934    RotateNegAvg::kernel::<9, 14, 12>(s2, sd, (669, 8867, 3135));
935  let (sa, s5) = RotatePi4AddAvg::kernel::<12, 13>(sa, s5, (5793, 11585));
938  let (s6, s9) = RotatePi4AddAvg::kernel::<12, 13>(s6, s9, (5793, 11585));
941  let (se, s1) = RotatePi4AddAvg::kernel::<12, 13>(se, s1, (5793, 11585));
944
945  store_coeffs!(
946    output, s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sa, sb, sc, sd, se, sf
947  );
948}
949
950#[$m]
951$($s)* fn daala_fdst16<T: TxOperations>(coeffs: &mut [T]) {
952  assert!(coeffs.len() >= 16);
953  let mut temp_out: [T; 16] = [T::zero(); 16];
954  daala_fdst_iv_16(
955    coeffs[0],
956    coeffs[1],
957    coeffs[2],
958    coeffs[3],
959    coeffs[4],
960    coeffs[5],
961    coeffs[6],
962    coeffs[7],
963    coeffs[8],
964    coeffs[9],
965    coeffs[10],
966    coeffs[11],
967    coeffs[12],
968    coeffs[13],
969    coeffs[14],
970    coeffs[15],
971    &mut temp_out,
972  );
973
974  coeffs[0] = temp_out[0];
975  coeffs[1] = temp_out[8];
976  coeffs[2] = temp_out[4];
977  coeffs[3] = temp_out[12];
978  coeffs[4] = temp_out[2];
979  coeffs[5] = temp_out[10];
980  coeffs[6] = temp_out[6];
981  coeffs[7] = temp_out[14];
982  coeffs[8] = temp_out[1];
983  coeffs[9] = temp_out[9];
984  coeffs[10] = temp_out[5];
985  coeffs[11] = temp_out[13];
986  coeffs[12] = temp_out[3];
987  coeffs[13] = temp_out[11];
988  coeffs[14] = temp_out[7];
989  coeffs[15] = temp_out[15];
990}
991
992#[$m]
993$($s)* fn daala_fdct_ii_16_asym<T: TxOperations>(
994  s0h: T, s1: (T, T), s2h: T, s3: (T, T), s4h: T, s5: (T, T), s6h: T,
995  s7: (T, T), s8h: T, s9: (T, T), sah: T, sb: (T, T), sch: T, sd: (T, T),
996  seh: T, sf: (T, T), output: &mut [T],
997) {
998  let (s0, sf) = butterfly_neg_asym(s0h, sf);
1000  let (s1, se) = butterfly_sub_asym(s1, seh);
1001  let (s2, sd) = butterfly_neg_asym(s2h, sd);
1002  let (s3, sc) = butterfly_sub_asym(s3, sch);
1003  let (s4, sb) = butterfly_neg_asym(s4h, sb);
1004  let (s5, sa) = butterfly_sub_asym(s5, sah);
1005  let (s6, s9) = butterfly_neg_asym(s6h, s9);
1006  let (s7, s8) = butterfly_sub_asym(s7, s8h);
1007
1008  daala_fdct_ii_8(s0, s1, s2, s3, s4, s5, s6, s7, &mut output[0..8]);
1010  daala_fdst_iv_8(sf, se, sd, sc, sb, sa, s9, s8, &mut output[8..16]);
1011  output[8..16].reverse();
1012}
1013
1014#[$m]
1015$($s)* fn daala_fdst_iv_16_asym<T: TxOperations>(
1016  s0: (T, T), s1h: T, s2: (T, T), s3h: T, s4: (T, T), s5h: T, s6: (T, T),
1017  s7h: T, s8: (T, T), s9h: T, sa: (T, T), sbh: T, sc: (T, T), sdh: T,
1018  se: (T, T), sfh: T, output: &mut [T],
1019) {
1020  let (s0, sf) =
1025    RotateAddShift::half_kernel::<11, 15, 11>(s0, sfh, (1073, 62241, 201));
1026  let (se, s1) = RotateSubShift::half_kernel::<15, 15, 11>(
1030    se,
1031    s1h,
1032    (18611, 55211, 601),
1033  );
1034  let (s2, sd) =
1038    RotateAddShift::half_kernel::<14, 10, 13>(s2, sdh, (9937, 1489, 3981));
1039  let (sc, s3) = RotateSubShift::half_kernel::<14, 15, 14>(
1043    sc,
1044    s3h,
1045    (10473, 39627, 11039),
1046  );
1047  let (s4, sb) =
1051    RotateAddShift::half_kernel::<12, 12, 13>(s4, sbh, (2727, 3903, 7005));
1052  let (sa, s5) =
1056    RotateSubShift::half_kernel::<13, 12, 13>(sa, s5h, (5619, 2815, 8423));
1057  let (s6, s9) =
1061    RotateAddShift::half_kernel::<12, 15, 8>(s6, s9h, (2865, 13599, 305));
1062  let (s8, s7) = RotateSubShift::half_kernel::<15, 13, 13>(
1066    s8,
1067    s7h,
1068    (23143, 1137, 11003),
1069  );
1070
1071  let (s0, s7) = butterfly_sub_asym((s0.rshift1(), s0), s7);
1073  let (s8, sf) = butterfly_sub_asym((s8.rshift1(), s8), sf);
1074  let (s4, s3) = butterfly_add_asym((s4.rshift1(), s4), s3);
1075  let (sc, sb) = butterfly_add_asym((sc.rshift1(), sc), sb);
1076  let (s2, s5) = butterfly_sub_asym((s2.rshift1(), s2), s5);
1077  let (sa, sd) = butterfly_sub_asym((sa.rshift1(), sa), sd);
1078  let (s6, s1) = butterfly_add_asym((s6.rshift1(), s6), s1);
1079  let (se, s9) = butterfly_add_asym((se.rshift1(), se), s9);
1080
1081  let ((_s8h, s8), s4h) = butterfly_add(s8, s4);
1083  let ((_s7h, s7), sbh) = butterfly_add(s7, sb);
1084  let ((_sah, sa), s6h) = butterfly_sub(sa, s6);
1085  let ((_s5h, s5), s9h) = butterfly_sub(s5, s9);
1086  let (s0, s3h) = butterfly_add(s0, s3);
1087  let (sd, seh) = butterfly_add(sd, se);
1088  let (s2, s1h) = butterfly_sub(s2, s1);
1089  let (sf, sch) = butterfly_sub(sf, sc);
1090
1091  let (s8, s7) =
1096    RotateAdd::kernel::<13, 14, 15>(s8, s7, (9633, 12873, 6393));
1097  let (s9, s6) =
1101    RotateAdd::kernel::<14, 15, 13>(s9h, s6h, (22725, 9041, 4551));
1102  let (s5, sa) =
1106    RotateNeg::kernel::<13, 15, 13>(s5, sa, (11363, 9041, 4551));
1107  let (s4, sb) =
1111    RotateNeg::kernel::<13, 14, 15>(s4h, sbh, (9633, 12873, 6393));
1112
1113  let (s2, sc) = butterfly_add_asym(s2, sch);
1115  let (s0, s1) = butterfly_sub_asym(s0, s1h);
1116  let (sf, se) = butterfly_add_asym(sf, seh);
1117  let (sd, s3) = butterfly_add_asym(sd, s3h);
1118  let (s7, s6) = butterfly_add_asym((s7.rshift1(), s7), s6);
1119  let (s8, s9) = butterfly_sub_asym((s8.rshift1(), s8), s9);
1120  let (sa, sb) = butterfly_sub_asym((sa.rshift1(), sa), sb);
1121  let (s5, s4) = butterfly_add_asym((s5.rshift1(), s5), s4);
1122
1123  let (sc, s3) =
1128    RotateAdd::kernel::<13, 14, 13>(sc, s3, (10703, 8867, 3135));
1129  let (s2, sd) =
1133    RotateNeg::kernel::<13, 14, 13>(s2, sd, (10703, 8867, 3135));
1134  let (sa, s5) = RotatePi4Add::kernel::<13, 13>(sa, s5, (11585, 5793));
1137  let (s6, s9) = RotatePi4Add::kernel::<13, 13>(s6, s9, (11585, 5793));
1140  let (se, s1) = RotatePi4Add::kernel::<13, 13>(se, s1, (11585, 5793));
1143
1144  store_coeffs!(
1145    output, s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sa, sb, sc, sd, se, sf
1146  );
1147}
1148
1149#[$m]
1150$($s)* fn daala_fdct_ii_32<T: TxOperations>(
1151  t0: T, t1: T, t2: T, t3: T, t4: T, t5: T, t6: T, t7: T, t8: T, t9: T, ta: T,
1152  tb: T, tc: T, td: T, te: T, tf: T, tg: T, th: T, ti: T, tj: T, tk: T, tl: T,
1153  tm: T, tn: T, to: T, tp: T, tq: T, tr: T, ts: T, tt: T, tu: T, tv: T,
1154  output: &mut [T],
1155) {
1156  let (t0h, tv) = butterfly_neg(t0, tv);
1158  let (t1, tuh) = butterfly_add(t1, tu);
1159  let (t2h, tt) = butterfly_neg(t2, tt);
1160  let (t3, tsh) = butterfly_add(t3, ts);
1161  let (t4h, tr) = butterfly_neg(t4, tr);
1162  let (t5, tqh) = butterfly_add(t5, tq);
1163  let (t6h, tp) = butterfly_neg(t6, tp);
1164  let (t7, toh) = butterfly_add(t7, to);
1165  let (t8h, tn) = butterfly_neg(t8, tn);
1166  let (t9, tmh) = butterfly_add(t9, tm);
1167  let (tah, tl) = butterfly_neg(ta, tl);
1168  let (tb, tkh) = butterfly_add(tb, tk);
1169  let (tch, tj) = butterfly_neg(tc, tj);
1170  let (td, tih) = butterfly_add(td, ti);
1171  let (teh, th) = butterfly_neg(te, th);
1172  let (tf, tgh) = butterfly_add(tf, tg);
1173
1174  daala_fdct_ii_16_asym(
1176    t0h,
1177    t1,
1178    t2h,
1179    t3,
1180    t4h,
1181    t5,
1182    t6h,
1183    t7,
1184    t8h,
1185    t9,
1186    tah,
1187    tb,
1188    tch,
1189    td,
1190    teh,
1191    tf,
1192    &mut output[0..16],
1193  );
1194  daala_fdst_iv_16_asym(
1195    tv,
1196    tuh,
1197    tt,
1198    tsh,
1199    tr,
1200    tqh,
1201    tp,
1202    toh,
1203    tn,
1204    tmh,
1205    tl,
1206    tkh,
1207    tj,
1208    tih,
1209    th,
1210    tgh,
1211    &mut output[16..32],
1212  );
1213  output[16..32].reverse();
1214}
1215
1216#[$m]
1217$($s)* fn daala_fdct32<T: TxOperations>(coeffs: &mut [T]) {
1218  assert!(coeffs.len() >= 32);
1219  let mut temp_out: [T; 32] = [T::zero(); 32];
1220  daala_fdct_ii_32(
1221    coeffs[0],
1222    coeffs[1],
1223    coeffs[2],
1224    coeffs[3],
1225    coeffs[4],
1226    coeffs[5],
1227    coeffs[6],
1228    coeffs[7],
1229    coeffs[8],
1230    coeffs[9],
1231    coeffs[10],
1232    coeffs[11],
1233    coeffs[12],
1234    coeffs[13],
1235    coeffs[14],
1236    coeffs[15],
1237    coeffs[16],
1238    coeffs[17],
1239    coeffs[18],
1240    coeffs[19],
1241    coeffs[20],
1242    coeffs[21],
1243    coeffs[22],
1244    coeffs[23],
1245    coeffs[24],
1246    coeffs[25],
1247    coeffs[26],
1248    coeffs[27],
1249    coeffs[28],
1250    coeffs[29],
1251    coeffs[30],
1252    coeffs[31],
1253    &mut temp_out,
1254  );
1255
1256  coeffs[0] = temp_out[0];
1257  coeffs[1] = temp_out[16];
1258  coeffs[2] = temp_out[8];
1259  coeffs[3] = temp_out[24];
1260  coeffs[4] = temp_out[4];
1261  coeffs[5] = temp_out[20];
1262  coeffs[6] = temp_out[12];
1263  coeffs[7] = temp_out[28];
1264  coeffs[8] = temp_out[2];
1265  coeffs[9] = temp_out[18];
1266  coeffs[10] = temp_out[10];
1267  coeffs[11] = temp_out[26];
1268  coeffs[12] = temp_out[6];
1269  coeffs[13] = temp_out[22];
1270  coeffs[14] = temp_out[14];
1271  coeffs[15] = temp_out[30];
1272  coeffs[16] = temp_out[1];
1273  coeffs[17] = temp_out[17];
1274  coeffs[18] = temp_out[9];
1275  coeffs[19] = temp_out[25];
1276  coeffs[20] = temp_out[5];
1277  coeffs[21] = temp_out[21];
1278  coeffs[22] = temp_out[13];
1279  coeffs[23] = temp_out[29];
1280  coeffs[24] = temp_out[3];
1281  coeffs[25] = temp_out[19];
1282  coeffs[26] = temp_out[11];
1283  coeffs[27] = temp_out[27];
1284  coeffs[28] = temp_out[7];
1285  coeffs[29] = temp_out[23];
1286  coeffs[30] = temp_out[15];
1287  coeffs[31] = temp_out[31];
1288}
1289
1290#[$m]
1291$($s)* fn daala_fdct_ii_32_asym<T: TxOperations>(
1292  t0h: T, t1: (T, T), t2h: T, t3: (T, T), t4h: T, t5: (T, T), t6h: T,
1293  t7: (T, T), t8h: T, t9: (T, T), tah: T, tb: (T, T), tch: T, td: (T, T),
1294  teh: T, tf: (T, T), tgh: T, th: (T, T), tih: T, tj: (T, T), tkh: T,
1295  tl: (T, T), tmh: T, tn: (T, T), toh: T, tp: (T, T), tqh: T, tr: (T, T),
1296  tsh: T, tt: (T, T), tuh: T, tv: (T, T), output: &mut [T],
1297) {
1298  let (t0, tv) = butterfly_neg_asym(t0h, tv);
1300  let (t1, tu) = butterfly_sub_asym(t1, tuh);
1301  let (t2, tt) = butterfly_neg_asym(t2h, tt);
1302  let (t3, ts) = butterfly_sub_asym(t3, tsh);
1303  let (t4, tr) = butterfly_neg_asym(t4h, tr);
1304  let (t5, tq) = butterfly_sub_asym(t5, tqh);
1305  let (t6, tp) = butterfly_neg_asym(t6h, tp);
1306  let (t7, to) = butterfly_sub_asym(t7, toh);
1307  let (t8, tn) = butterfly_neg_asym(t8h, tn);
1308  let (t9, tm) = butterfly_sub_asym(t9, tmh);
1309  let (ta, tl) = butterfly_neg_asym(tah, tl);
1310  let (tb, tk) = butterfly_sub_asym(tb, tkh);
1311  let (tc, tj) = butterfly_neg_asym(tch, tj);
1312  let (td, ti) = butterfly_sub_asym(td, tih);
1313  let (te, th) = butterfly_neg_asym(teh, th);
1314  let (tf, tg) = butterfly_sub_asym(tf, tgh);
1315
1316  daala_fdct_ii_16(
1318    t0,
1319    t1,
1320    t2,
1321    t3,
1322    t4,
1323    t5,
1324    t6,
1325    t7,
1326    t8,
1327    t9,
1328    ta,
1329    tb,
1330    tc,
1331    td,
1332    te,
1333    tf,
1334    &mut output[0..16],
1335  );
1336  daala_fdst_iv_16(
1337    tv,
1338    tu,
1339    tt,
1340    ts,
1341    tr,
1342    tq,
1343    tp,
1344    to,
1345    tn,
1346    tm,
1347    tl,
1348    tk,
1349    tj,
1350    ti,
1351    th,
1352    tg,
1353    &mut output[16..32],
1354  );
1355  output[16..32].reverse();
1356}
1357
1358#[$m]
1359$($s)* fn daala_fdst_iv_32_asym<T: TxOperations>(
1360  t0: (T, T), t1h: T, t2: (T, T), t3h: T, t4: (T, T), t5h: T, t6: (T, T),
1361  t7h: T, t8: (T, T), t9h: T, ta: (T, T), tbh: T, tc: (T, T), tdh: T,
1362  te: (T, T), tfh: T, tg: (T, T), thh: T, ti: (T, T), tjh: T, tk: (T, T),
1363  tlh: T, tm: (T, T), tnh: T, to: (T, T), tph: T, tq: (T, T), trh: T,
1364  ts: (T, T), tth: T, tu: (T, T), tvh: T, output: &mut [T],
1365) {
1366  let (t0, tv) =
1371    RotateAdd::half_kernel::<13, 14, 15>(t0, tvh, (5933, 22595, 1137));
1372  let (tu, t1) =
1376    RotateSub::half_kernel::<13, 14, 15>(tu, t1h, (6203, 21403, 3409));
1377  let (t2, tt) =
1381    RotateAdd::half_kernel::<15, 8, 15>(t2, tth, (25833, 315, 5673));
1382  let (ts, t3) =
1386    RotateSub::half_kernel::<15, 12, 15>(ts, t3h, (26791, 4717, 7923));
1387  let (t4, tr) =
1391    RotateAdd::half_kernel::<13, 14, 15>(t4, trh, (6921, 17531, 10153));
1392  let (tq, t5) =
1396    RotateSub::half_kernel::<15, 15, 12>(tq, t5h, (28511, 32303, 1545));
1397  let (t6, tp) =
1401    RotateAdd::half_kernel::<15, 14, 12>(t6, tph, (29269, 14733, 1817));
1402  let (to, t7) =
1406    RotateSub::half_kernel::<15, 14, 14>(to, t7h, (29957, 13279, 8339));
1407  let (t8, tn) =
1411    RotateAdd::half_kernel::<13, 14, 15>(t8, tnh, (7643, 11793, 18779));
1412  let (tm, t9) =
1416    RotateSub::half_kernel::<14, 15, 15>(tm, t9h, (15557, 20557, 20835));
1417  let (ta, tl) =
1421    RotateAdd::half_kernel::<15, 15, 15>(ta, tlh, (31581, 17479, 22841));
1422  let (tk, tb) =
1426    RotateSub::half_kernel::<13, 15, 12>(tk, tbh, (7993, 14359, 3099));
1427  let (tc, tj) =
1431    RotateAdd::half_kernel::<14, 13, 15>(tc, tjh, (16143, 2801, 26683));
1432  let (ti, td) =
1436    RotateSub::half_kernel::<14, 14, 14>(ti, tdh, (16261, 4011, 14255));
1437  let (te, th) =
1441    RotateAdd::half_kernel::<15, 15, 15>(te, thh, (32679, 4821, 30269));
1442  let (tg, tf) =
1446    RotateSub::half_kernel::<14, 12, 14>(tg, tfh, (16379, 201, 15977));
1447
1448  let (t0, tfh) = butterfly_add(t0, tf);
1450  let (tv, tgh) = butterfly_sub(tv, tg);
1451  let (th, tuh) = butterfly_add(th, tu);
1452  let (te, t1h) = butterfly_sub(te, t1);
1453  let (t2, tdh) = butterfly_add(t2, td);
1454  let (tt, tih) = butterfly_sub(tt, ti);
1455  let (tj, tsh) = butterfly_add(tj, ts);
1456  let (tc, t3h) = butterfly_sub(tc, t3);
1457  let (t4, tbh) = butterfly_add(t4, tb);
1458  let (tr, tkh) = butterfly_sub(tr, tk);
1459  let (tl, tqh) = butterfly_add(tl, tq);
1460  let (ta, t5h) = butterfly_sub(ta, t5);
1461  let (t6, t9h) = butterfly_add(t6, t9);
1462  let (tp, tmh) = butterfly_sub(tp, tm);
1463  let (tn, toh) = butterfly_add(tn, to);
1464  let (t8, t7h) = butterfly_sub(t8, t7);
1465
1466  let (t0, t7) = butterfly_sub_asym(t0, t7h);
1468  let (tv, to) = butterfly_add_asym(tv, toh);
1469  let (tp, tu) = butterfly_sub_asym(tp, tuh);
1470  let (t6, t1) = butterfly_add_asym(t6, t1h);
1471  let (t2, t5) = butterfly_sub_asym(t2, t5h);
1472  let (tt, tq) = butterfly_add_asym(tt, tqh);
1473  let (tr, ts) = butterfly_sub_asym(tr, tsh);
1474  let (t4, t3) = butterfly_add_asym(t4, t3h);
1475  let (t8, tg) = butterfly_add_asym(t8, tgh);
1476  let (te, tm) = butterfly_sub_asym(te, tmh);
1477  let (tn, tf) = butterfly_add_asym(tn, tfh);
1478  let (th, t9) = butterfly_sub_asym(th, t9h);
1479  let (ta, ti) = butterfly_add_asym(ta, tih);
1480  let (tc, tk) = butterfly_sub_asym(tc, tkh);
1481  let (tl, td) = butterfly_add_asym(tl, tdh);
1482  let (tj, tb) = butterfly_sub_asym(tj, tbh);
1483
1484  let (tf, tg) =
1489    RotateSub::kernel::<14, 14, 13>(tf, tg, (17911, 14699, 803));
1490  let (th, te) =
1494    RotateAdd::kernel::<13, 13, 12>(th, te, (10217, 5461, 1189));
1495  let (ti, td) =
1499    RotateAdd::kernel::<12, 13, 14>(ti, td, (5543, 3363, 7723));
1500  let (tc, tj) =
1504    RotateSub::kernel::<13, 14, 13>(tc, tj, (11529, 2271, 5197));
1505  let (tb, tk) =
1509    RotateNeg::kernel::<13, 14, 13>(tb, tk, (11529, 2271, 5197));
1510  let (ta, tl) =
1514    RotateNeg::kernel::<12, 13, 14>(ta, tl, (5543, 3363, 7723));
1515  let (t9, tm) =
1519    RotateNeg::kernel::<13, 13, 12>(t9, tm, (10217, 5461, 1189));
1520  let (t8, tn) =
1524    RotateNeg::kernel::<14, 14, 13>(t8, tn, (17911, 14699, 803));
1525
1526  let (t3, t0h) = butterfly_sub(t3, t0);
1528  let (ts, tvh) = butterfly_add(ts, tv);
1529  let (tu, tth) = butterfly_sub(tu, tt);
1530  let (t1, t2h) = butterfly_add(t1, t2);
1531  let ((_toh, to), t4h) = butterfly_add(to, t4);
1532  let ((_tqh, tq), t6h) = butterfly_sub(tq, t6);
1533  let ((_t7h, t7), trh) = butterfly_add(t7, tr);
1534  let ((_t5h, t5), tph) = butterfly_sub(t5, tp);
1535  let (tb, t8h) = butterfly_sub(tb, t8);
1536  let (tk, tnh) = butterfly_add(tk, tn);
1537  let (tm, tlh) = butterfly_sub(tm, tl);
1538  let (t9, tah) = butterfly_add(t9, ta);
1539  let (tf, tch) = butterfly_sub(tf, tc);
1540  let (tg, tjh) = butterfly_add(tg, tj);
1541  let (ti, thh) = butterfly_sub(ti, th);
1542  let (td, teh) = butterfly_add(td, te);
1543
1544  let (to, t7) = RotateAdd::kernel::<8, 11, 15>(to, t7, (301, 1609, 6393));
1549  let (tph, t6h) =
1553    RotateAdd::kernel::<13, 15, 13>(tph, t6h, (11363, 9041, 4551));
1554  let (t5, tq) =
1558    RotateNeg::kernel::<12, 15, 13>(t5, tq, (5681, 9041, 4551));
1559  let (t4h, trh) =
1563    RotateNeg::kernel::<13, 14, 15>(t4h, trh, (9633, 12873, 6393));
1564
1565  let (t1, t0) = butterfly_add_asym(t1, t0h);
1567  let (tu, tv) = butterfly_sub_asym(tu, tvh);
1568  let (ts, t2) = butterfly_sub_asym(ts, t2h);
1569  let (t3, tt) = butterfly_sub_asym(t3, tth);
1570  let (t5, t4) = butterfly_add_asym((t5.rshift1(), t5), t4h);
1571  let (tq, tr) = butterfly_sub_asym((tq.rshift1(), tq), trh);
1572  let (t7, t6) = butterfly_add_asym((t7.rshift1(), t7), t6h);
1573  let (to, tp) = butterfly_sub_asym((to.rshift1(), to), tph);
1574  let (t9, t8) = butterfly_add_asym(t9, t8h);
1575  let (tm, tn) = butterfly_sub_asym(tm, tnh);
1576  let (tk, ta) = butterfly_sub_asym(tk, tah);
1577  let (tb, tl) = butterfly_sub_asym(tb, tlh);
1578  let (ti, tc) = butterfly_add_asym(ti, tch);
1579  let (td, tj) = butterfly_add_asym(td, tjh);
1580  let (tf, te) = butterfly_add_asym(tf, teh);
1581  let (tg, th) = butterfly_sub_asym(tg, thh);
1582
1583  let (t2, tt) = RotateNeg::kernel::<9, 14, 13>(t2, tt, (669, 8867, 3135));
1588  let (ts, t3) = RotateAdd::kernel::<9, 14, 13>(ts, t3, (669, 8867, 3135));
1592  let (ta, tl) = RotateNeg::kernel::<9, 14, 13>(ta, tl, (669, 8867, 3135));
1596  let (tk, tb) = RotateAdd::kernel::<9, 14, 13>(tk, tb, (669, 8867, 3135));
1600  let (tc, tj) = RotateAdd::kernel::<9, 14, 13>(tc, tj, (669, 8867, 3135));
1604  let (ti, td) = RotateNeg::kernel::<9, 14, 13>(ti, td, (669, 8867, 3135));
1608  let (tu, t1) = RotatePi4Add::kernel::<12, 13>(tu, t1, (5793, 5793));
1611  let (tq, t5) = RotatePi4Add::kernel::<12, 13>(tq, t5, (5793, 5793));
1614  let (tp, t6) = RotatePi4Sub::kernel::<12, 13>(tp, t6, (5793, 5793));
1617  let (tm, t9) = RotatePi4Add::kernel::<12, 13>(tm, t9, (5793, 5793));
1620  let (te, th) = RotatePi4Add::kernel::<12, 13>(te, th, (5793, 5793));
1623
1624  store_coeffs!(
1625    output, t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, ta, tb, tc, td, te, tf,
1626    tg, th, ti, tj, tk, tl, tm, tn, to, tp, tq, tr, ts, tt, tu, tv
1627  );
1628}
1629
1630#[allow(clippy::identity_op)]
1631#[$m]
1632$($s)* fn daala_fdct64<T: TxOperations>(coeffs: &mut [T]) {
1633  assert!(coeffs.len() >= 64);
1634  let mut asym: [(T, T); 32] = [(T::zero(), T::zero()); 32];
1636  let mut half: [T; 32] = [T::zero(); 32];
1637  {
1639    #[$m]
1640    #[inline]
1641    $($s)* fn butterfly_pair<T: TxOperations>(
1642      half: &mut [T; 32], asym: &mut [(T, T); 32], input: &[T], i: usize
1643    ) {
1644      let j = i * 2;
1645      let (ah, c) = butterfly_neg(input[j], input[63 - j]);
1646      let (b, dh) = butterfly_add(input[j + 1], input[63 - j - 1]);
1647      half[i] = ah;
1648      half[31 - i] = dh;
1649      asym[i] = b;
1650      asym[31 - i] = c;
1651    }
1652    butterfly_pair(&mut half, &mut asym, coeffs, 0);
1653    butterfly_pair(&mut half, &mut asym, coeffs, 1);
1654    butterfly_pair(&mut half, &mut asym, coeffs, 2);
1655    butterfly_pair(&mut half, &mut asym, coeffs, 3);
1656    butterfly_pair(&mut half, &mut asym, coeffs, 4);
1657    butterfly_pair(&mut half, &mut asym, coeffs, 5);
1658    butterfly_pair(&mut half, &mut asym, coeffs, 6);
1659    butterfly_pair(&mut half, &mut asym, coeffs, 7);
1660    butterfly_pair(&mut half, &mut asym, coeffs, 8);
1661    butterfly_pair(&mut half, &mut asym, coeffs, 9);
1662    butterfly_pair(&mut half, &mut asym, coeffs, 10);
1663    butterfly_pair(&mut half, &mut asym, coeffs, 11);
1664    butterfly_pair(&mut half, &mut asym, coeffs, 12);
1665    butterfly_pair(&mut half, &mut asym, coeffs, 13);
1666    butterfly_pair(&mut half, &mut asym, coeffs, 14);
1667    butterfly_pair(&mut half, &mut asym, coeffs, 15);
1668  }
1669
1670  let mut temp_out: [T; 64] = [T::zero(); 64];
1671  daala_fdct_ii_32_asym(
1673    half[0],
1674    asym[0],
1675    half[1],
1676    asym[1],
1677    half[2],
1678    asym[2],
1679    half[3],
1680    asym[3],
1681    half[4],
1682    asym[4],
1683    half[5],
1684    asym[5],
1685    half[6],
1686    asym[6],
1687    half[7],
1688    asym[7],
1689    half[8],
1690    asym[8],
1691    half[9],
1692    asym[9],
1693    half[10],
1694    asym[10],
1695    half[11],
1696    asym[11],
1697    half[12],
1698    asym[12],
1699    half[13],
1700    asym[13],
1701    half[14],
1702    asym[14],
1703    half[15],
1704    asym[15],
1705    &mut temp_out[0..32],
1706  );
1707  daala_fdst_iv_32_asym(
1708    asym[31],
1709    half[31],
1710    asym[30],
1711    half[30],
1712    asym[29],
1713    half[29],
1714    asym[28],
1715    half[28],
1716    asym[27],
1717    half[27],
1718    asym[26],
1719    half[26],
1720    asym[25],
1721    half[25],
1722    asym[24],
1723    half[24],
1724    asym[23],
1725    half[23],
1726    asym[22],
1727    half[22],
1728    asym[21],
1729    half[21],
1730    asym[20],
1731    half[20],
1732    asym[19],
1733    half[19],
1734    asym[18],
1735    half[18],
1736    asym[17],
1737    half[17],
1738    asym[16],
1739    half[16],
1740    &mut temp_out[32..64],
1741  );
1742  temp_out[32..64].reverse();
1743
1744  #[$m]
1746  #[inline]
1747  $($s)* fn reorder_4<T: TxOperations>(
1748    output: &mut [T], i: usize, tmp: [T; 64], j: usize
1749  ) {
1750    output[0 + i * 4] = tmp[0 + j];
1751    output[1 + i * 4] = tmp[32 + j];
1752    output[2 + i * 4] = tmp[16 + j];
1753    output[3 + i * 4] = tmp[48 + j];
1754  }
1755  reorder_4(coeffs, 0, temp_out, 0);
1756  reorder_4(coeffs, 1, temp_out, 8);
1757  reorder_4(coeffs, 2, temp_out, 4);
1758  reorder_4(coeffs, 3, temp_out, 12);
1759  reorder_4(coeffs, 4, temp_out, 2);
1760  reorder_4(coeffs, 5, temp_out, 10);
1761  reorder_4(coeffs, 6, temp_out, 6);
1762  reorder_4(coeffs, 7, temp_out, 14);
1763
1764  reorder_4(coeffs, 8, temp_out, 1);
1765  reorder_4(coeffs, 9, temp_out, 9);
1766  reorder_4(coeffs, 10, temp_out, 5);
1767  reorder_4(coeffs, 11, temp_out, 13);
1768  reorder_4(coeffs, 12, temp_out, 3);
1769  reorder_4(coeffs, 13, temp_out, 11);
1770  reorder_4(coeffs, 14, temp_out, 7);
1771  reorder_4(coeffs, 15, temp_out, 15);
1772}
1773
1774#[$m]
1775$($s)* fn fidentity<T: TxOperations>(_coeffs: &mut [T]) {}
1776
1777#[$m]
1778$($s)* fn fwht4<T: TxOperations>(coeffs: &mut [T]) {
1779  assert!(coeffs.len() >= 4);
1780  let x0 = coeffs[0];
1781  let x1 = coeffs[1];
1782  let x2 = coeffs[2];
1783  let x3 = coeffs[3];
1784
1785  let s0 = x0.add(x1);
1786  let s1 = x3.sub(x2);
1787  let s2 = s0.sub_avg(s1);
1788
1789  let q1 = s2.sub(x2);
1790  let q0 = s0.sub(q1);
1791  let q3 = s2.sub(x1);
1792  let q2 = s1.add(q3);
1793
1794  store_coeffs!(coeffs, q0, q1, q2, q3);
1795}
1796
1797}
1798
1799}