1use super::TxSize;
11use super::TxType;
12
13use super::HTX_TAB;
14use super::VTX_TAB;
15
16pub type TxfmShift = [i8; 3];
17pub type TxfmShifts = [TxfmShift; 3];
18
19const FWD_SHIFT_4X4: TxfmShifts = [[3, 0, 0], [2, 0, 1], [0, 0, 3]];
23const FWD_SHIFT_8X8: TxfmShifts = [[4, -1, 0], [2, 0, 1], [0, 0, 3]];
24const FWD_SHIFT_16X16: TxfmShifts = [[4, -1, 0], [2, 0, 1], [0, 0, 3]];
25const FWD_SHIFT_32X32: TxfmShifts = [[4, -2, 0], [2, 0, 0], [0, 0, 2]];
26const FWD_SHIFT_64X64: TxfmShifts = [[4, -1, -2], [2, 0, -1], [0, 0, 1]];
27const FWD_SHIFT_4X8: TxfmShifts = [[4, -1, 0], [2, 0, 1], [0, 0, 3]];
28const FWD_SHIFT_8X4: TxfmShifts = [[4, -1, 0], [2, 0, 1], [0, 0, 3]];
29const FWD_SHIFT_8X16: TxfmShifts = [[4, -1, 0], [2, 0, 1], [0, 0, 3]];
30const FWD_SHIFT_16X8: TxfmShifts = [[4, -1, 0], [2, 0, 1], [0, 0, 3]];
31const FWD_SHIFT_16X32: TxfmShifts = [[4, -2, 0], [2, 0, 0], [0, 0, 2]];
32const FWD_SHIFT_32X16: TxfmShifts = [[4, -2, 0], [2, 0, 0], [0, 0, 2]];
33const FWD_SHIFT_32X64: TxfmShifts = [[4, -1, -2], [2, 0, -1], [0, 0, 1]];
34const FWD_SHIFT_64X32: TxfmShifts = [[4, -1, -2], [2, 0, -1], [0, 0, 1]];
35const FWD_SHIFT_4X16: TxfmShifts = [[4, -1, 0], [2, 0, 1], [0, 0, 3]];
36const FWD_SHIFT_16X4: TxfmShifts = [[4, -1, 0], [2, 0, 1], [0, 0, 3]];
37const FWD_SHIFT_8X32: TxfmShifts = [[4, -1, 0], [2, 0, 1], [0, 0, 3]];
38const FWD_SHIFT_32X8: TxfmShifts = [[4, -1, 0], [2, 0, 1], [0, 0, 3]];
39const FWD_SHIFT_16X64: TxfmShifts = [[4, -2, 0], [2, 0, 0], [0, 0, 2]];
40const FWD_SHIFT_64X16: TxfmShifts = [[4, -2, 0], [2, 0, 0], [0, 0, 2]];
41
42const FWD_SHIFT_4X4_WHT: TxfmShift = [0, 0, 2];
43
44pub const FWD_TXFM_SHIFT_LS: [TxfmShifts; TxSize::TX_SIZES_ALL] = [
45 FWD_SHIFT_4X4,
46 FWD_SHIFT_8X8,
47 FWD_SHIFT_16X16,
48 FWD_SHIFT_32X32,
49 FWD_SHIFT_64X64,
50 FWD_SHIFT_4X8,
51 FWD_SHIFT_8X4,
52 FWD_SHIFT_8X16,
53 FWD_SHIFT_16X8,
54 FWD_SHIFT_16X32,
55 FWD_SHIFT_32X16,
56 FWD_SHIFT_32X64,
57 FWD_SHIFT_64X32,
58 FWD_SHIFT_4X16,
59 FWD_SHIFT_16X4,
60 FWD_SHIFT_8X32,
61 FWD_SHIFT_32X8,
62 FWD_SHIFT_16X64,
63 FWD_SHIFT_64X16,
64];
65
66#[derive(Debug, Clone, Copy, PartialEq, Eq)]
67pub enum TxfmType {
68 DCT4,
69 DCT8,
70 DCT16,
71 DCT32,
72 DCT64,
73 ADST4,
74 ADST8,
75 ADST16,
76 Identity4,
77 Identity8,
78 Identity16,
79 Identity32,
80 WHT4,
81}
82
83impl TxfmType {
84 const TX_TYPES_1D: usize = 5;
85 const AV1_TXFM_TYPE_LS: [[Option<TxfmType>; Self::TX_TYPES_1D]; 5] = [
86 [
87 Some(TxfmType::DCT4),
88 Some(TxfmType::ADST4),
89 Some(TxfmType::ADST4),
90 Some(TxfmType::Identity4),
91 Some(TxfmType::WHT4),
92 ],
93 [
94 Some(TxfmType::DCT8),
95 Some(TxfmType::ADST8),
96 Some(TxfmType::ADST8),
97 Some(TxfmType::Identity8),
98 None,
99 ],
100 [
101 Some(TxfmType::DCT16),
102 Some(TxfmType::ADST16),
103 Some(TxfmType::ADST16),
104 Some(TxfmType::Identity16),
105 None,
106 ],
107 [Some(TxfmType::DCT32), None, None, Some(TxfmType::Identity32), None],
108 [Some(TxfmType::DCT64), None, None, None, None],
109 ];
110}
111
112#[derive(Debug, Clone, Copy)]
113pub struct Txfm2DFlipCfg {
114 pub tx_size: TxSize,
115 pub ud_flip: bool,
117 pub lr_flip: bool,
119 pub shift: TxfmShift,
120 pub txfm_type_col: TxfmType,
121 pub txfm_type_row: TxfmType,
122}
123
124impl Txfm2DFlipCfg {
125 pub fn fwd(tx_type: TxType, tx_size: TxSize, bd: usize) -> Self {
129 let tx_type_1d_col = VTX_TAB[tx_type as usize];
130 let tx_type_1d_row = HTX_TAB[tx_type as usize];
131 let txw_idx = tx_size.width_index();
132 let txh_idx = tx_size.height_index();
133 let txfm_type_col =
134 TxfmType::AV1_TXFM_TYPE_LS[txh_idx][tx_type_1d_col as usize].unwrap();
135 let txfm_type_row =
136 TxfmType::AV1_TXFM_TYPE_LS[txw_idx][tx_type_1d_row as usize].unwrap();
137 let (ud_flip, lr_flip) = Self::get_flip_cfg(tx_type);
138 let shift = if tx_type == TxType::WHT_WHT {
139 FWD_SHIFT_4X4_WHT
140 } else {
141 FWD_TXFM_SHIFT_LS[tx_size as usize][(bd - 8) / 2]
142 };
143
144 Txfm2DFlipCfg {
145 tx_size,
146 ud_flip,
147 lr_flip,
148 shift,
149 txfm_type_col,
150 txfm_type_row,
151 }
152 }
153
154 const fn get_flip_cfg(tx_type: TxType) -> (bool, bool) {
156 use self::TxType::*;
157 match tx_type {
158 DCT_DCT | ADST_DCT | DCT_ADST | ADST_ADST | IDTX | V_DCT | H_DCT
159 | V_ADST | H_ADST | WHT_WHT => (false, false),
160 FLIPADST_DCT | FLIPADST_ADST | V_FLIPADST => (true, false),
161 DCT_FLIPADST | ADST_FLIPADST | H_FLIPADST => (false, true),
162 FLIPADST_FLIPADST => (true, true),
163 }
164 }
165}
166
167macro_rules! store_coeffs {
168 ( $arr:expr, $( $x:expr ),* ) => {
169 {
170 let mut i: i32 = -1;
171 $(
172 i += 1;
173 $arr[i as usize] = $x;
174 )*
175 }
176 };
177}
178
179macro_rules! impl_1d_tx {
180() => {
181 impl_1d_tx! {allow(unused_attributes), }
182};
183
184($m:meta, $($s:ident),*) => {
185 pub trait TxOperations: Copy {
186 $($s)* fn zero() -> Self;
187
188 $($s)* fn tx_mul<const SHIFT: i32>(self, mul: i32) -> Self;
189 $($s)* fn rshift1(self) -> Self;
190 $($s)* fn add(self, b: Self) -> Self;
191 $($s)* fn sub(self, b: Self) -> Self;
192 $($s)* fn add_avg(self, b: Self) -> Self;
193 $($s)* fn sub_avg(self, b: Self) -> Self;
194
195 $($s)* fn copy_fn(self) -> Self {
196 self
197 }
198 }
199
200 #[inline]
201 fn get_func(t: TxfmType) -> TxfmFunc {
202 use self::TxfmType::*;
203 match t {
204 DCT4 => daala_fdct4,
205 DCT8 => daala_fdct8,
206 DCT16 => daala_fdct16,
207 DCT32 => daala_fdct32,
208 DCT64 => daala_fdct64,
209 ADST4 => daala_fdst_vii_4,
210 ADST8 => daala_fdst8,
211 ADST16 => daala_fdst16,
212 Identity4 => fidentity,
213 Identity8 => fidentity,
214 Identity16 => fidentity,
215 Identity32 => fidentity,
216 WHT4 => fwht4,
217 }
218 }
219
220 trait RotateKernelPi4<T: TxOperations> {
221 const ADD: $($s)* fn(T, T) -> T;
222 const SUB: $($s)* fn(T, T) -> T;
223
224 #[$m]
225 $($s)* fn kernel<const SHIFT0: i32, const SHIFT1: i32>(p0: T, p1: T, m: (i32, i32)) -> (T, T) {
226 let t = Self::ADD(p1, p0);
227 let (a, out0) = (p0.tx_mul::<SHIFT0>(m.0), t.tx_mul::<SHIFT1>(m.1));
228 let out1 = Self::SUB(a, out0);
229 (out0, out1)
230 }
231}
232
233struct RotatePi4Add;
234struct RotatePi4AddAvg;
235struct RotatePi4Sub;
236struct RotatePi4SubAvg;
237
238impl<T: TxOperations> RotateKernelPi4<T> for RotatePi4Add {
239 const ADD: $($s)* fn(T, T) -> T = T::add;
240 const SUB: $($s)* fn(T, T) -> T = T::sub;
241}
242
243impl<T: TxOperations> RotateKernelPi4<T> for RotatePi4AddAvg {
244 const ADD: $($s)* fn(T, T) -> T = T::add_avg;
245 const SUB: $($s)* fn(T, T) -> T = T::sub;
246}
247
248impl<T: TxOperations> RotateKernelPi4<T> for RotatePi4Sub {
249 const ADD: $($s)* fn(T, T) -> T = T::sub;
250 const SUB: $($s)* fn(T, T) -> T = T::add;
251}
252
253impl<T: TxOperations> RotateKernelPi4<T> for RotatePi4SubAvg {
254 const ADD: $($s)* fn(T, T) -> T = T::sub_avg;
255 const SUB: $($s)* fn(T, T) -> T = T::add;
256}
257
258trait RotateKernel<T: TxOperations> {
259 const ADD: $($s)* fn(T, T) -> T;
260 const SUB: $($s)* fn(T, T) -> T;
261 const SHIFT: $($s)* fn(T) -> T;
262
263 #[$m]
264 $($s)* fn half_kernel<const SHIFT0: i32, const SHIFT1: i32, const SHIFT2: i32>(
265 p0: (T, T), p1: T, m: (i32, i32, i32),
266 ) -> (T, T) {
267 let t = Self::ADD(p1, p0.0);
268 let (a, b, c) = (p0.1.tx_mul::<SHIFT0>(m.0), p1.tx_mul::<SHIFT1>(m.1), t.tx_mul::<SHIFT2>(m.2));
269 let out0 = b.add(c);
270 let shifted = Self::SHIFT(c);
271 let out1 = Self::SUB(a, shifted);
272 (out0, out1)
273 }
274
275 #[$m]
276 $($s)* fn kernel<const SHIFT0: i32, const SHIFT1: i32, const SHIFT2: i32>(p0: T, p1: T, m: (i32, i32, i32)) -> (T, T) {
277 Self::half_kernel::<SHIFT0, SHIFT1, SHIFT2>((p0, p0), p1, m)
278 }
279}
280
281trait RotateKernelNeg<T: TxOperations> {
282 const ADD: $($s)* fn(T, T) -> T;
283
284 #[$m]
285 $($s)* fn kernel<const SHIFT0: i32, const SHIFT1: i32, const SHIFT2: i32>(p0: T, p1: T, m: (i32, i32, i32)) -> (T, T) {
286 let t = Self::ADD(p0, p1);
287 let (a, b, c) = (p0.tx_mul::<SHIFT0>(m.0), p1.tx_mul::<SHIFT1>(m.1), t.tx_mul::<SHIFT2>(m.2));
288 let out0 = b.sub(c);
289 let out1 = c.sub(a);
290 (out0, out1)
291 }
292}
293
294struct RotateAdd;
295struct RotateAddAvg;
296struct RotateAddShift;
297struct RotateSub;
298struct RotateSubAvg;
299struct RotateSubShift;
300struct RotateNeg;
301struct RotateNegAvg;
302
303impl<T: TxOperations> RotateKernel<T> for RotateAdd {
304 const ADD: $($s)* fn(T, T) -> T = T::add;
305 const SUB: $($s)* fn(T, T) -> T = T::sub;
306 const SHIFT: $($s)* fn(T) -> T = T::copy_fn;
307}
308
309impl<T: TxOperations> RotateKernel<T> for RotateAddAvg {
310 const ADD: $($s)* fn(T, T) -> T = T::add_avg;
311 const SUB: $($s)* fn(T, T) -> T = T::sub;
312 const SHIFT: $($s)* fn(T) -> T = T::copy_fn;
313}
314
315impl<T: TxOperations> RotateKernel<T> for RotateAddShift {
316 const ADD: $($s)* fn(T, T) -> T = T::add;
317 const SUB: $($s)* fn(T, T) -> T = T::sub;
318 const SHIFT: $($s)* fn(T) -> T = T::rshift1;
319}
320
321impl<T: TxOperations> RotateKernel<T> for RotateSub {
322 const ADD: $($s)* fn(T, T) -> T = T::sub;
323 const SUB: $($s)* fn(T, T) -> T = T::add;
324 const SHIFT: $($s)* fn(T) -> T = T::copy_fn;
325}
326
327impl<T: TxOperations> RotateKernel<T> for RotateSubAvg {
328 const ADD: $($s)* fn(T, T) -> T = T::sub_avg;
329 const SUB: $($s)* fn(T, T) -> T = T::add;
330 const SHIFT: $($s)* fn(T) -> T = T::copy_fn;
331}
332
333impl<T: TxOperations> RotateKernel<T> for RotateSubShift {
334 const ADD: $($s)* fn(T, T) -> T = T::sub;
335 const SUB: $($s)* fn(T, T) -> T = T::add;
336 const SHIFT: $($s)* fn(T) -> T = T::rshift1;
337}
338
339impl<T: TxOperations> RotateKernelNeg<T> for RotateNeg {
340 const ADD: $($s)* fn(T, T) -> T = T::sub;
341}
342
343impl<T: TxOperations> RotateKernelNeg<T> for RotateNegAvg {
344 const ADD: $($s)* fn(T, T) -> T = T::sub_avg;
345}
346
347#[inline]
348#[$m]
349$($s)* fn butterfly_add<T: TxOperations>(p0: T, p1: T) -> ((T, T), T) {
350 let p0 = p0.add(p1);
351 let p0h = p0.rshift1();
352 let p1h = p1.sub(p0h);
353 ((p0h, p0), p1h)
354}
355
356#[inline]
357#[$m]
358$($s)* fn butterfly_sub<T: TxOperations>(p0: T, p1: T) -> ((T, T), T) {
359 let p0 = p0.sub(p1);
360 let p0h = p0.rshift1();
361 let p1h = p1.add(p0h);
362 ((p0h, p0), p1h)
363}
364
365#[inline]
366#[$m]
367$($s)* fn butterfly_neg<T: TxOperations>(p0: T, p1: T) -> (T, (T, T)) {
368 let p1 = p0.sub(p1);
369 let p1h = p1.rshift1();
370 let p0h = p0.sub(p1h);
371 (p0h, (p1h, p1))
372}
373
374#[inline]
375#[$m]
376$($s)* fn butterfly_add_asym<T: TxOperations>(p0: (T, T), p1h: T) -> (T, T) {
377 let p1 = p1h.add(p0.0);
378 let p0 = p0.1.sub(p1);
379 (p0, p1)
380}
381
382#[inline]
383#[$m]
384$($s)* fn butterfly_sub_asym<T: TxOperations>(p0: (T, T), p1h: T) -> (T, T) {
385 let p1 = p1h.sub(p0.0);
386 let p0 = p0.1.add(p1);
387 (p0, p1)
388}
389
390#[inline]
391#[$m]
392$($s)* fn butterfly_neg_asym<T: TxOperations>(p0h: T, p1: (T, T)) -> (T, T) {
393 let p0 = p0h.add(p1.0);
394 let p1 = p0.sub(p1.1);
395 (p0, p1)
396}
397
398#[$m]
399$($s)* fn daala_fdct_ii_2_asym<T: TxOperations>(p0h: T, p1: (T, T)) -> (T, T) {
400 butterfly_neg_asym(p0h, p1)
401}
402
403#[$m]
404$($s)* fn daala_fdst_iv_2_asym<T: TxOperations>(p0: (T, T), p1h: T) -> (T, T) {
405 RotateAdd::half_kernel::<9, 12, 13>(p0, p1h, (473, 3135, 4433))
409}
410
411#[$m]
412$($s)* fn daala_fdct_ii_4<T: TxOperations>(
413 q0: T, q1: T, q2: T, q3: T, output: &mut [T],
414) {
415 let (q0h, q3) = butterfly_neg(q0, q3);
417 let (q1, q2h) = butterfly_add(q1, q2);
418
419 let (q0, q1) = daala_fdct_ii_2_asym(q0h, q1);
421 let (q3, q2) = daala_fdst_iv_2_asym(q3, q2h);
422
423 store_coeffs!(output, q0, q1, q2, q3);
424}
425
426#[$m]
427$($s)* fn daala_fdct4<T: TxOperations>(coeffs: &mut [T]) {
428 assert!(coeffs.len() >= 4);
429 let mut temp_out: [T; 4] = [T::zero(); 4];
430 daala_fdct_ii_4(coeffs[0], coeffs[1], coeffs[2], coeffs[3], &mut temp_out);
431
432 coeffs[0] = temp_out[0];
433 coeffs[1] = temp_out[2];
434 coeffs[2] = temp_out[1];
435 coeffs[3] = temp_out[3];
436}
437
438#[$m]
439$($s)* fn daala_fdst_vii_4<T: TxOperations>(coeffs: &mut [T]) {
440 assert!(coeffs.len() >= 4);
441
442 let q0 = coeffs[0];
443 let q1 = coeffs[1];
444 let q2 = coeffs[2];
445 let q3 = coeffs[3];
446 let t0 = q1.add(q3);
447 let t1 = q1.add(q0.sub_avg(t0));
449 let t2 = q0.sub(q1);
450 let t3 = q2;
451 let t4 = q0.add(q3);
452 let t0 = t0.tx_mul::<14>(7021);
454 let t1 = t1.tx_mul::<15>(37837);
456 let t2 = t2.tx_mul::<15>(21513);
458 let t3 = t3.tx_mul::<15>(37837);
460 let t4 = t4.tx_mul::<11>(467);
462 let t3h = t3.rshift1();
463 let u4 = t4.add(t3h);
464 coeffs[0] = t0.add(u4);
465 coeffs[1] = t1;
466 coeffs[2] = t0.add(t2.sub(t3h));
467 coeffs[3] = t2.add(t3.sub(u4));
468}
469
470#[$m]
471$($s)* fn daala_fdct_ii_2<T: TxOperations>(p0: T, p1: T) -> (T, T) {
472 let (p1, p0) = RotatePi4SubAvg::kernel::<13, 13>(p1, p0, (11585, 11585));
475 (p0, p1)
476}
477
478#[$m]
479$($s)* fn daala_fdst_iv_2<T: TxOperations>(p0: T, p1: T) -> (T, T) {
480 RotateAddAvg::kernel::<13, 14, 12>(p0, p1, (10703, 8867, 3135))
484}
485
486#[$m]
487$($s)* fn daala_fdct_ii_4_asym<T: TxOperations>(
488 q0h: T, q1: (T, T), q2h: T, q3: (T, T), output: &mut [T],
489) {
490 let (q0, q3) = butterfly_neg_asym(q0h, q3);
492 let (q1, q2) = butterfly_sub_asym(q1, q2h);
493
494 let (q0, q1) = daala_fdct_ii_2(q0, q1);
496 let (q3, q2) = daala_fdst_iv_2(q3, q2);
497
498 store_coeffs!(output, q0, q1, q2, q3);
499}
500
501#[$m]
502$($s)* fn daala_fdst_iv_4_asym<T: TxOperations>(
503 q0: (T, T), q1h: T, q2: (T, T), q3h: T, output: &mut [T],
504) {
505 let (q0, q3) = RotateAddShift::half_kernel::<14, 13, 15>(
510 q0,
511 q3h,
512 (9633, 12873, 12785),
513 );
514 let (q2, q1) = RotateSubShift::half_kernel::<14, 15, 12>(
518 q2,
519 q1h,
520 (11363, 18081, 4551),
521 );
522
523 let (q2, q3) = butterfly_sub_asym((q2.rshift1(), q2), q3);
525 let (q0, q1) = butterfly_sub_asym((q0.rshift1(), q0), q1);
526
527 let (q2, q1) = RotatePi4AddAvg::kernel::<13, 13>(q2, q1, (11585, 11585));
531
532 store_coeffs!(output, q0, q1, q2, q3);
533}
534
535#[$m]
536$($s)* fn daala_fdct_ii_8<T: TxOperations>(
537 r0: T, r1: T, r2: T, r3: T, r4: T, r5: T, r6: T, r7: T, output: &mut [T],
538) {
539 let (r0h, r7) = butterfly_neg(r0, r7);
541 let (r1, r6h) = butterfly_add(r1, r6);
542 let (r2h, r5) = butterfly_neg(r2, r5);
543 let (r3, r4h) = butterfly_add(r3, r4);
544
545 daala_fdct_ii_4_asym(r0h, r1, r2h, r3, &mut output[0..4]);
547 daala_fdst_iv_4_asym(r7, r6h, r5, r4h, &mut output[4..8]);
548 output[4..8].reverse();
549}
550
551#[$m]
552$($s)* fn daala_fdct8<T: TxOperations>(coeffs: &mut [T]) {
553 assert!(coeffs.len() >= 8);
554 let mut temp_out: [T; 8] = [T::zero(); 8];
555 daala_fdct_ii_8(
556 coeffs[0],
557 coeffs[1],
558 coeffs[2],
559 coeffs[3],
560 coeffs[4],
561 coeffs[5],
562 coeffs[6],
563 coeffs[7],
564 &mut temp_out,
565 );
566
567 coeffs[0] = temp_out[0];
568 coeffs[1] = temp_out[4];
569 coeffs[2] = temp_out[2];
570 coeffs[3] = temp_out[6];
571 coeffs[4] = temp_out[1];
572 coeffs[5] = temp_out[5];
573 coeffs[6] = temp_out[3];
574 coeffs[7] = temp_out[7];
575}
576
577#[$m]
578$($s)* fn daala_fdst_iv_8<T: TxOperations>(
579 r0: T, r1: T, r2: T, r3: T, r4: T, r5: T, r6: T, r7: T, output: &mut [T],
580) {
581 let (r0, r7) =
586 RotateAdd::kernel::<14, 14, 13>(r0, r7, (17911, 14699, 803));
587 let (r6, r1) =
591 RotateSub::kernel::<14, 15, 12>(r6, r1, (20435, 21845, 1189));
592 let (r2, r5) =
596 RotateAdd::kernel::<14, 13, 15>(r2, r5, (22173, 3363, 15447));
597 let (r4, r3) =
601 RotateSub::kernel::<14, 14, 13>(r4, r3, (23059, 2271, 5197));
602
603 let (r0, r3h) = butterfly_add(r0, r3);
605 let (r2, r1h) = butterfly_sub(r2, r1);
606 let (r5, r6h) = butterfly_add(r5, r6);
607 let (r7, r4h) = butterfly_sub(r7, r4);
608
609 let (r7, r6) = butterfly_add_asym(r7, r6h);
611 let (r5, r3) = butterfly_add_asym(r5, r3h);
612 let (r2, r4) = butterfly_add_asym(r2, r4h);
613 let (r0, r1) = butterfly_sub_asym(r0, r1h);
614
615 let (r3, r4) =
620 RotateSubAvg::kernel::<13, 14, 12>(r3, r4, (10703, 8867, 3135));
621 let (r2, r5) =
625 RotateNegAvg::kernel::<13, 14, 12>(r2, r5, (10703, 8867, 3135));
626 let (r1, r6) = RotatePi4SubAvg::kernel::<13, 13>(r1, r6, (11585, 11585));
629
630 store_coeffs!(output, r0, r1, r2, r3, r4, r5, r6, r7);
631}
632
633#[$m]
634$($s)* fn daala_fdst8<T: TxOperations>(coeffs: &mut [T]) {
635 assert!(coeffs.len() >= 8);
636 let mut temp_out: [T; 8] = [T::zero(); 8];
637 daala_fdst_iv_8(
638 coeffs[0],
639 coeffs[1],
640 coeffs[2],
641 coeffs[3],
642 coeffs[4],
643 coeffs[5],
644 coeffs[6],
645 coeffs[7],
646 &mut temp_out,
647 );
648
649 coeffs[0] = temp_out[0];
650 coeffs[1] = temp_out[4];
651 coeffs[2] = temp_out[2];
652 coeffs[3] = temp_out[6];
653 coeffs[4] = temp_out[1];
654 coeffs[5] = temp_out[5];
655 coeffs[6] = temp_out[3];
656 coeffs[7] = temp_out[7];
657}
658
659#[$m]
660$($s)* fn daala_fdst_iv_4<T: TxOperations>(
661 q0: T, q1: T, q2: T, q3: T, output: &mut [T],
662) {
663 let (q0, q3) =
668 RotateAddShift::kernel::<14, 12, 11>(q0, q3, (13623, 4551, 565));
669 let (q2, q1) =
673 RotateSubShift::kernel::<14, 15, 11>(q2, q1, (16069, 12785, 1609));
674
675 let (q2, q3) = butterfly_sub_asym((q2.rshift1(), q2), q3);
677 let (q0, q1) = butterfly_sub_asym((q0.rshift1(), q0), q1);
678
679 let (q2, q1) = RotatePi4AddAvg::kernel::<13, 13>(q2, q1, (11585, 11585));
683
684 store_coeffs!(output, q0, q1, q2, q3);
685}
686
687
688#[$m]
689$($s)* fn daala_fdct_ii_8_asym<T: TxOperations>(
690 r0h: T, r1: (T, T), r2h: T, r3: (T, T), r4h: T, r5: (T, T), r6h: T,
691 r7: (T, T), output: &mut [T],
692) {
693 let (r0, r7) = butterfly_neg_asym(r0h, r7);
695 let (r1, r6) = butterfly_sub_asym(r1, r6h);
696 let (r2, r5) = butterfly_neg_asym(r2h, r5);
697 let (r3, r4) = butterfly_sub_asym(r3, r4h);
698
699 daala_fdct_ii_4(r0, r1, r2, r3, &mut output[0..4]);
701 daala_fdst_iv_4(r7, r6, r5, r4, &mut output[4..8]);
702 output[4..8].reverse();
703}
704
705#[$m]
706$($s)* fn daala_fdst_iv_8_asym<T: TxOperations>(
707 r0: (T, T), r1h: T, r2: (T, T), r3h: T, r4: (T, T), r5h: T, r6: (T, T),
708 r7h: T, output: &mut [T],
709) {
710 let (r0, r7) =
715 RotateAdd::half_kernel::<14, 12, 14>(r0, r7h, (12665, 5197, 2271));
716 let (r6, r1) =
720 RotateSub::half_kernel::<14, 15, 13>(r6, r1h, (14449, 30893, 3363));
721 let (r2, r5) =
725 RotateAdd::half_kernel::<14, 11, 13>(r2, r5h, (15679, 1189, 5461));
726 let (r4, r3) =
730 RotateSub::half_kernel::<14, 12, 14>(r4, r3h, (16305, 803, 14699));
731
732 let (r0, r3h) = butterfly_add(r0, r3);
734 let (r2, r1h) = butterfly_sub(r2, r1);
735 let (r5, r6h) = butterfly_add(r5, r6);
736 let (r7, r4h) = butterfly_sub(r7, r4);
737
738 let (r7, r6) = butterfly_add_asym(r7, r6h);
740 let (r5, r3) = butterfly_add_asym(r5, r3h);
741 let (r2, r4) = butterfly_add_asym(r2, r4h);
742 let (r0, r1) = butterfly_sub_asym(r0, r1h);
743
744 let (r3, r4) =
749 RotateSubAvg::kernel::<9, 14, 12>(r3, r4, (669, 8867, 3135));
750 let (r2, r5) =
754 RotateNegAvg::kernel::<9, 14, 12>(r2, r5, (669, 8867, 3135));
755 let (r1, r6) = RotatePi4SubAvg::kernel::<12, 13>(r1, r6, (5793, 11585));
758
759 store_coeffs!(output, r0, r1, r2, r3, r4, r5, r6, r7);
760}
761
762#[$m]
763$($s)* fn daala_fdct_ii_16<T: TxOperations>(
764 s0: T, s1: T, s2: T, s3: T, s4: T, s5: T, s6: T, s7: T, s8: T, s9: T, sa: T,
765 sb: T, sc: T, sd: T, se: T, sf: T, output: &mut [T],
766) {
767 let (s0h, sf) = butterfly_neg(s0, sf);
769 let (s1, seh) = butterfly_add(s1, se);
770 let (s2h, sd) = butterfly_neg(s2, sd);
771 let (s3, sch) = butterfly_add(s3, sc);
772 let (s4h, sb) = butterfly_neg(s4, sb);
773 let (s5, sah) = butterfly_add(s5, sa);
774 let (s6h, s9) = butterfly_neg(s6, s9);
775 let (s7, s8h) = butterfly_add(s7, s8);
776
777 daala_fdct_ii_8_asym(s0h, s1, s2h, s3, s4h, s5, s6h, s7, &mut output[0..8]);
779 daala_fdst_iv_8_asym(sf, seh, sd, sch, sb, sah, s9, s8h, &mut output[8..16]);
780 output[8..16].reverse();
781}
782
783#[$m]
784$($s)* fn daala_fdct16<T: TxOperations>(coeffs: &mut [T]) {
785 assert!(coeffs.len() >= 16);
786 let mut temp_out: [T; 16] = [T::zero(); 16];
787 daala_fdct_ii_16(
788 coeffs[0],
789 coeffs[1],
790 coeffs[2],
791 coeffs[3],
792 coeffs[4],
793 coeffs[5],
794 coeffs[6],
795 coeffs[7],
796 coeffs[8],
797 coeffs[9],
798 coeffs[10],
799 coeffs[11],
800 coeffs[12],
801 coeffs[13],
802 coeffs[14],
803 coeffs[15],
804 &mut temp_out,
805 );
806
807 coeffs[0] = temp_out[0];
808 coeffs[1] = temp_out[8];
809 coeffs[2] = temp_out[4];
810 coeffs[3] = temp_out[12];
811 coeffs[4] = temp_out[2];
812 coeffs[5] = temp_out[10];
813 coeffs[6] = temp_out[6];
814 coeffs[7] = temp_out[14];
815 coeffs[8] = temp_out[1];
816 coeffs[9] = temp_out[9];
817 coeffs[10] = temp_out[5];
818 coeffs[11] = temp_out[13];
819 coeffs[12] = temp_out[3];
820 coeffs[13] = temp_out[11];
821 coeffs[14] = temp_out[7];
822 coeffs[15] = temp_out[15];
823}
824
825#[$m]
826$($s)* fn daala_fdst_iv_16<T: TxOperations>(
827 s0: T, s1: T, s2: T, s3: T, s4: T, s5: T, s6: T, s7: T, s8: T, s9: T, sa: T,
828 sb: T, sc: T, sd: T, se: T, sf: T, output: &mut [T],
829) {
830 let (s0, sf) =
835 RotateAddShift::kernel::<15, 13, 14>(s0, sf, (24279, 11003, 1137));
836 let (se, s1) =
840 RotateSubShift::kernel::<11, 8, 11>(se, s1, (1645, 305, 425));
841 let (s2, sd) =
845 RotateAddShift::kernel::<14, 13, 13>(s2, sd, (14053, 8423, 2815));
846 let (sc, s3) =
850 RotateSubShift::kernel::<14, 13, 13>(sc, s3, (14811, 7005, 3903));
851 let (s4, sb) =
855 RotateAddShift::kernel::<15, 14, 14>(s4, sb, (30853, 11039, 9907));
856 let (sa, s5) =
860 RotateSubShift::kernel::<14, 13, 11>(sa, s5, (15893, 3981, 1489));
861 let (s6, s9) =
865 RotateAddShift::kernel::<15, 11, 14>(s6, s9, (32413, 601, 13803));
866 let (s8, s7) =
870 RotateSubShift::kernel::<15, 11, 11>(s8, s7, (32729, 201, 1945));
871
872 let (s0, s7) = butterfly_sub_asym((s0.rshift1(), s0), s7);
874 let (s8, sf) = butterfly_sub_asym((s8.rshift1(), s8), sf);
875 let (s4, s3) = butterfly_add_asym((s4.rshift1(), s4), s3);
876 let (sc, sb) = butterfly_add_asym((sc.rshift1(), sc), sb);
877 let (s2, s5) = butterfly_sub_asym((s2.rshift1(), s2), s5);
878 let (sa, sd) = butterfly_sub_asym((sa.rshift1(), sa), sd);
879 let (s6, s1) = butterfly_add_asym((s6.rshift1(), s6), s1);
880 let (se, s9) = butterfly_add_asym((se.rshift1(), se), s9);
881
882 let ((_s8h, s8), s4h) = butterfly_add(s8, s4);
884 let ((_s7h, s7), sbh) = butterfly_add(s7, sb);
885 let ((_sah, sa), s6h) = butterfly_sub(sa, s6);
886 let ((_s5h, s5), s9h) = butterfly_sub(s5, s9);
887 let (s0, s3h) = butterfly_add(s0, s3);
888 let (sd, seh) = butterfly_add(sd, se);
889 let (s2, s1h) = butterfly_sub(s2, s1);
890 let (sf, sch) = butterfly_sub(sf, sc);
891
892 let (s8, s7) =
897 RotateAddAvg::kernel::<8, 11, 15>(s8, s7, (301, 1609, 12785));
898 let (s9, s6) =
902 RotateAdd::kernel::<13, 15, 13>(s9h, s6h, (11363, 9041, 4551));
903 let (s5, sa) =
907 RotateNegAvg::kernel::<12, 15, 12>(s5, sa, (5681, 9041, 4551));
908 let (s4, sb) =
912 RotateNeg::kernel::<13, 14, 15>(s4h, sbh, (9633, 12873, 6393));
913
914 let (s2, sc) = butterfly_add_asym(s2, sch);
916 let (s0, s1) = butterfly_sub_asym(s0, s1h);
917 let (sf, se) = butterfly_add_asym(sf, seh);
918 let (sd, s3) = butterfly_add_asym(sd, s3h);
919 let (s7, s6) = butterfly_add_asym((s7.rshift1(), s7), s6);
920 let (s8, s9) = butterfly_sub_asym((s8.rshift1(), s8), s9);
921 let (sa, sb) = butterfly_sub_asym((sa.rshift1(), sa), sb);
922 let (s5, s4) = butterfly_add_asym((s5.rshift1(), s5), s4);
923
924 let (sc, s3) =
929 RotateAddAvg::kernel::<9, 14, 12>(sc, s3, (669, 8867, 3135));
930 let (s2, sd) =
934 RotateNegAvg::kernel::<9, 14, 12>(s2, sd, (669, 8867, 3135));
935 let (sa, s5) = RotatePi4AddAvg::kernel::<12, 13>(sa, s5, (5793, 11585));
938 let (s6, s9) = RotatePi4AddAvg::kernel::<12, 13>(s6, s9, (5793, 11585));
941 let (se, s1) = RotatePi4AddAvg::kernel::<12, 13>(se, s1, (5793, 11585));
944
945 store_coeffs!(
946 output, s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sa, sb, sc, sd, se, sf
947 );
948}
949
950#[$m]
951$($s)* fn daala_fdst16<T: TxOperations>(coeffs: &mut [T]) {
952 assert!(coeffs.len() >= 16);
953 let mut temp_out: [T; 16] = [T::zero(); 16];
954 daala_fdst_iv_16(
955 coeffs[0],
956 coeffs[1],
957 coeffs[2],
958 coeffs[3],
959 coeffs[4],
960 coeffs[5],
961 coeffs[6],
962 coeffs[7],
963 coeffs[8],
964 coeffs[9],
965 coeffs[10],
966 coeffs[11],
967 coeffs[12],
968 coeffs[13],
969 coeffs[14],
970 coeffs[15],
971 &mut temp_out,
972 );
973
974 coeffs[0] = temp_out[0];
975 coeffs[1] = temp_out[8];
976 coeffs[2] = temp_out[4];
977 coeffs[3] = temp_out[12];
978 coeffs[4] = temp_out[2];
979 coeffs[5] = temp_out[10];
980 coeffs[6] = temp_out[6];
981 coeffs[7] = temp_out[14];
982 coeffs[8] = temp_out[1];
983 coeffs[9] = temp_out[9];
984 coeffs[10] = temp_out[5];
985 coeffs[11] = temp_out[13];
986 coeffs[12] = temp_out[3];
987 coeffs[13] = temp_out[11];
988 coeffs[14] = temp_out[7];
989 coeffs[15] = temp_out[15];
990}
991
992#[$m]
993$($s)* fn daala_fdct_ii_16_asym<T: TxOperations>(
994 s0h: T, s1: (T, T), s2h: T, s3: (T, T), s4h: T, s5: (T, T), s6h: T,
995 s7: (T, T), s8h: T, s9: (T, T), sah: T, sb: (T, T), sch: T, sd: (T, T),
996 seh: T, sf: (T, T), output: &mut [T],
997) {
998 let (s0, sf) = butterfly_neg_asym(s0h, sf);
1000 let (s1, se) = butterfly_sub_asym(s1, seh);
1001 let (s2, sd) = butterfly_neg_asym(s2h, sd);
1002 let (s3, sc) = butterfly_sub_asym(s3, sch);
1003 let (s4, sb) = butterfly_neg_asym(s4h, sb);
1004 let (s5, sa) = butterfly_sub_asym(s5, sah);
1005 let (s6, s9) = butterfly_neg_asym(s6h, s9);
1006 let (s7, s8) = butterfly_sub_asym(s7, s8h);
1007
1008 daala_fdct_ii_8(s0, s1, s2, s3, s4, s5, s6, s7, &mut output[0..8]);
1010 daala_fdst_iv_8(sf, se, sd, sc, sb, sa, s9, s8, &mut output[8..16]);
1011 output[8..16].reverse();
1012}
1013
1014#[$m]
1015$($s)* fn daala_fdst_iv_16_asym<T: TxOperations>(
1016 s0: (T, T), s1h: T, s2: (T, T), s3h: T, s4: (T, T), s5h: T, s6: (T, T),
1017 s7h: T, s8: (T, T), s9h: T, sa: (T, T), sbh: T, sc: (T, T), sdh: T,
1018 se: (T, T), sfh: T, output: &mut [T],
1019) {
1020 let (s0, sf) =
1025 RotateAddShift::half_kernel::<11, 15, 11>(s0, sfh, (1073, 62241, 201));
1026 let (se, s1) = RotateSubShift::half_kernel::<15, 15, 11>(
1030 se,
1031 s1h,
1032 (18611, 55211, 601),
1033 );
1034 let (s2, sd) =
1038 RotateAddShift::half_kernel::<14, 10, 13>(s2, sdh, (9937, 1489, 3981));
1039 let (sc, s3) = RotateSubShift::half_kernel::<14, 15, 14>(
1043 sc,
1044 s3h,
1045 (10473, 39627, 11039),
1046 );
1047 let (s4, sb) =
1051 RotateAddShift::half_kernel::<12, 12, 13>(s4, sbh, (2727, 3903, 7005));
1052 let (sa, s5) =
1056 RotateSubShift::half_kernel::<13, 12, 13>(sa, s5h, (5619, 2815, 8423));
1057 let (s6, s9) =
1061 RotateAddShift::half_kernel::<12, 15, 8>(s6, s9h, (2865, 13599, 305));
1062 let (s8, s7) = RotateSubShift::half_kernel::<15, 13, 13>(
1066 s8,
1067 s7h,
1068 (23143, 1137, 11003),
1069 );
1070
1071 let (s0, s7) = butterfly_sub_asym((s0.rshift1(), s0), s7);
1073 let (s8, sf) = butterfly_sub_asym((s8.rshift1(), s8), sf);
1074 let (s4, s3) = butterfly_add_asym((s4.rshift1(), s4), s3);
1075 let (sc, sb) = butterfly_add_asym((sc.rshift1(), sc), sb);
1076 let (s2, s5) = butterfly_sub_asym((s2.rshift1(), s2), s5);
1077 let (sa, sd) = butterfly_sub_asym((sa.rshift1(), sa), sd);
1078 let (s6, s1) = butterfly_add_asym((s6.rshift1(), s6), s1);
1079 let (se, s9) = butterfly_add_asym((se.rshift1(), se), s9);
1080
1081 let ((_s8h, s8), s4h) = butterfly_add(s8, s4);
1083 let ((_s7h, s7), sbh) = butterfly_add(s7, sb);
1084 let ((_sah, sa), s6h) = butterfly_sub(sa, s6);
1085 let ((_s5h, s5), s9h) = butterfly_sub(s5, s9);
1086 let (s0, s3h) = butterfly_add(s0, s3);
1087 let (sd, seh) = butterfly_add(sd, se);
1088 let (s2, s1h) = butterfly_sub(s2, s1);
1089 let (sf, sch) = butterfly_sub(sf, sc);
1090
1091 let (s8, s7) =
1096 RotateAdd::kernel::<13, 14, 15>(s8, s7, (9633, 12873, 6393));
1097 let (s9, s6) =
1101 RotateAdd::kernel::<14, 15, 13>(s9h, s6h, (22725, 9041, 4551));
1102 let (s5, sa) =
1106 RotateNeg::kernel::<13, 15, 13>(s5, sa, (11363, 9041, 4551));
1107 let (s4, sb) =
1111 RotateNeg::kernel::<13, 14, 15>(s4h, sbh, (9633, 12873, 6393));
1112
1113 let (s2, sc) = butterfly_add_asym(s2, sch);
1115 let (s0, s1) = butterfly_sub_asym(s0, s1h);
1116 let (sf, se) = butterfly_add_asym(sf, seh);
1117 let (sd, s3) = butterfly_add_asym(sd, s3h);
1118 let (s7, s6) = butterfly_add_asym((s7.rshift1(), s7), s6);
1119 let (s8, s9) = butterfly_sub_asym((s8.rshift1(), s8), s9);
1120 let (sa, sb) = butterfly_sub_asym((sa.rshift1(), sa), sb);
1121 let (s5, s4) = butterfly_add_asym((s5.rshift1(), s5), s4);
1122
1123 let (sc, s3) =
1128 RotateAdd::kernel::<13, 14, 13>(sc, s3, (10703, 8867, 3135));
1129 let (s2, sd) =
1133 RotateNeg::kernel::<13, 14, 13>(s2, sd, (10703, 8867, 3135));
1134 let (sa, s5) = RotatePi4Add::kernel::<13, 13>(sa, s5, (11585, 5793));
1137 let (s6, s9) = RotatePi4Add::kernel::<13, 13>(s6, s9, (11585, 5793));
1140 let (se, s1) = RotatePi4Add::kernel::<13, 13>(se, s1, (11585, 5793));
1143
1144 store_coeffs!(
1145 output, s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sa, sb, sc, sd, se, sf
1146 );
1147}
1148
1149#[$m]
1150$($s)* fn daala_fdct_ii_32<T: TxOperations>(
1151 t0: T, t1: T, t2: T, t3: T, t4: T, t5: T, t6: T, t7: T, t8: T, t9: T, ta: T,
1152 tb: T, tc: T, td: T, te: T, tf: T, tg: T, th: T, ti: T, tj: T, tk: T, tl: T,
1153 tm: T, tn: T, to: T, tp: T, tq: T, tr: T, ts: T, tt: T, tu: T, tv: T,
1154 output: &mut [T],
1155) {
1156 let (t0h, tv) = butterfly_neg(t0, tv);
1158 let (t1, tuh) = butterfly_add(t1, tu);
1159 let (t2h, tt) = butterfly_neg(t2, tt);
1160 let (t3, tsh) = butterfly_add(t3, ts);
1161 let (t4h, tr) = butterfly_neg(t4, tr);
1162 let (t5, tqh) = butterfly_add(t5, tq);
1163 let (t6h, tp) = butterfly_neg(t6, tp);
1164 let (t7, toh) = butterfly_add(t7, to);
1165 let (t8h, tn) = butterfly_neg(t8, tn);
1166 let (t9, tmh) = butterfly_add(t9, tm);
1167 let (tah, tl) = butterfly_neg(ta, tl);
1168 let (tb, tkh) = butterfly_add(tb, tk);
1169 let (tch, tj) = butterfly_neg(tc, tj);
1170 let (td, tih) = butterfly_add(td, ti);
1171 let (teh, th) = butterfly_neg(te, th);
1172 let (tf, tgh) = butterfly_add(tf, tg);
1173
1174 daala_fdct_ii_16_asym(
1176 t0h,
1177 t1,
1178 t2h,
1179 t3,
1180 t4h,
1181 t5,
1182 t6h,
1183 t7,
1184 t8h,
1185 t9,
1186 tah,
1187 tb,
1188 tch,
1189 td,
1190 teh,
1191 tf,
1192 &mut output[0..16],
1193 );
1194 daala_fdst_iv_16_asym(
1195 tv,
1196 tuh,
1197 tt,
1198 tsh,
1199 tr,
1200 tqh,
1201 tp,
1202 toh,
1203 tn,
1204 tmh,
1205 tl,
1206 tkh,
1207 tj,
1208 tih,
1209 th,
1210 tgh,
1211 &mut output[16..32],
1212 );
1213 output[16..32].reverse();
1214}
1215
1216#[$m]
1217$($s)* fn daala_fdct32<T: TxOperations>(coeffs: &mut [T]) {
1218 assert!(coeffs.len() >= 32);
1219 let mut temp_out: [T; 32] = [T::zero(); 32];
1220 daala_fdct_ii_32(
1221 coeffs[0],
1222 coeffs[1],
1223 coeffs[2],
1224 coeffs[3],
1225 coeffs[4],
1226 coeffs[5],
1227 coeffs[6],
1228 coeffs[7],
1229 coeffs[8],
1230 coeffs[9],
1231 coeffs[10],
1232 coeffs[11],
1233 coeffs[12],
1234 coeffs[13],
1235 coeffs[14],
1236 coeffs[15],
1237 coeffs[16],
1238 coeffs[17],
1239 coeffs[18],
1240 coeffs[19],
1241 coeffs[20],
1242 coeffs[21],
1243 coeffs[22],
1244 coeffs[23],
1245 coeffs[24],
1246 coeffs[25],
1247 coeffs[26],
1248 coeffs[27],
1249 coeffs[28],
1250 coeffs[29],
1251 coeffs[30],
1252 coeffs[31],
1253 &mut temp_out,
1254 );
1255
1256 coeffs[0] = temp_out[0];
1257 coeffs[1] = temp_out[16];
1258 coeffs[2] = temp_out[8];
1259 coeffs[3] = temp_out[24];
1260 coeffs[4] = temp_out[4];
1261 coeffs[5] = temp_out[20];
1262 coeffs[6] = temp_out[12];
1263 coeffs[7] = temp_out[28];
1264 coeffs[8] = temp_out[2];
1265 coeffs[9] = temp_out[18];
1266 coeffs[10] = temp_out[10];
1267 coeffs[11] = temp_out[26];
1268 coeffs[12] = temp_out[6];
1269 coeffs[13] = temp_out[22];
1270 coeffs[14] = temp_out[14];
1271 coeffs[15] = temp_out[30];
1272 coeffs[16] = temp_out[1];
1273 coeffs[17] = temp_out[17];
1274 coeffs[18] = temp_out[9];
1275 coeffs[19] = temp_out[25];
1276 coeffs[20] = temp_out[5];
1277 coeffs[21] = temp_out[21];
1278 coeffs[22] = temp_out[13];
1279 coeffs[23] = temp_out[29];
1280 coeffs[24] = temp_out[3];
1281 coeffs[25] = temp_out[19];
1282 coeffs[26] = temp_out[11];
1283 coeffs[27] = temp_out[27];
1284 coeffs[28] = temp_out[7];
1285 coeffs[29] = temp_out[23];
1286 coeffs[30] = temp_out[15];
1287 coeffs[31] = temp_out[31];
1288}
1289
1290#[$m]
1291$($s)* fn daala_fdct_ii_32_asym<T: TxOperations>(
1292 t0h: T, t1: (T, T), t2h: T, t3: (T, T), t4h: T, t5: (T, T), t6h: T,
1293 t7: (T, T), t8h: T, t9: (T, T), tah: T, tb: (T, T), tch: T, td: (T, T),
1294 teh: T, tf: (T, T), tgh: T, th: (T, T), tih: T, tj: (T, T), tkh: T,
1295 tl: (T, T), tmh: T, tn: (T, T), toh: T, tp: (T, T), tqh: T, tr: (T, T),
1296 tsh: T, tt: (T, T), tuh: T, tv: (T, T), output: &mut [T],
1297) {
1298 let (t0, tv) = butterfly_neg_asym(t0h, tv);
1300 let (t1, tu) = butterfly_sub_asym(t1, tuh);
1301 let (t2, tt) = butterfly_neg_asym(t2h, tt);
1302 let (t3, ts) = butterfly_sub_asym(t3, tsh);
1303 let (t4, tr) = butterfly_neg_asym(t4h, tr);
1304 let (t5, tq) = butterfly_sub_asym(t5, tqh);
1305 let (t6, tp) = butterfly_neg_asym(t6h, tp);
1306 let (t7, to) = butterfly_sub_asym(t7, toh);
1307 let (t8, tn) = butterfly_neg_asym(t8h, tn);
1308 let (t9, tm) = butterfly_sub_asym(t9, tmh);
1309 let (ta, tl) = butterfly_neg_asym(tah, tl);
1310 let (tb, tk) = butterfly_sub_asym(tb, tkh);
1311 let (tc, tj) = butterfly_neg_asym(tch, tj);
1312 let (td, ti) = butterfly_sub_asym(td, tih);
1313 let (te, th) = butterfly_neg_asym(teh, th);
1314 let (tf, tg) = butterfly_sub_asym(tf, tgh);
1315
1316 daala_fdct_ii_16(
1318 t0,
1319 t1,
1320 t2,
1321 t3,
1322 t4,
1323 t5,
1324 t6,
1325 t7,
1326 t8,
1327 t9,
1328 ta,
1329 tb,
1330 tc,
1331 td,
1332 te,
1333 tf,
1334 &mut output[0..16],
1335 );
1336 daala_fdst_iv_16(
1337 tv,
1338 tu,
1339 tt,
1340 ts,
1341 tr,
1342 tq,
1343 tp,
1344 to,
1345 tn,
1346 tm,
1347 tl,
1348 tk,
1349 tj,
1350 ti,
1351 th,
1352 tg,
1353 &mut output[16..32],
1354 );
1355 output[16..32].reverse();
1356}
1357
1358#[$m]
1359$($s)* fn daala_fdst_iv_32_asym<T: TxOperations>(
1360 t0: (T, T), t1h: T, t2: (T, T), t3h: T, t4: (T, T), t5h: T, t6: (T, T),
1361 t7h: T, t8: (T, T), t9h: T, ta: (T, T), tbh: T, tc: (T, T), tdh: T,
1362 te: (T, T), tfh: T, tg: (T, T), thh: T, ti: (T, T), tjh: T, tk: (T, T),
1363 tlh: T, tm: (T, T), tnh: T, to: (T, T), tph: T, tq: (T, T), trh: T,
1364 ts: (T, T), tth: T, tu: (T, T), tvh: T, output: &mut [T],
1365) {
1366 let (t0, tv) =
1371 RotateAdd::half_kernel::<13, 14, 15>(t0, tvh, (5933, 22595, 1137));
1372 let (tu, t1) =
1376 RotateSub::half_kernel::<13, 14, 15>(tu, t1h, (6203, 21403, 3409));
1377 let (t2, tt) =
1381 RotateAdd::half_kernel::<15, 8, 15>(t2, tth, (25833, 315, 5673));
1382 let (ts, t3) =
1386 RotateSub::half_kernel::<15, 12, 15>(ts, t3h, (26791, 4717, 7923));
1387 let (t4, tr) =
1391 RotateAdd::half_kernel::<13, 14, 15>(t4, trh, (6921, 17531, 10153));
1392 let (tq, t5) =
1396 RotateSub::half_kernel::<15, 15, 12>(tq, t5h, (28511, 32303, 1545));
1397 let (t6, tp) =
1401 RotateAdd::half_kernel::<15, 14, 12>(t6, tph, (29269, 14733, 1817));
1402 let (to, t7) =
1406 RotateSub::half_kernel::<15, 14, 14>(to, t7h, (29957, 13279, 8339));
1407 let (t8, tn) =
1411 RotateAdd::half_kernel::<13, 14, 15>(t8, tnh, (7643, 11793, 18779));
1412 let (tm, t9) =
1416 RotateSub::half_kernel::<14, 15, 15>(tm, t9h, (15557, 20557, 20835));
1417 let (ta, tl) =
1421 RotateAdd::half_kernel::<15, 15, 15>(ta, tlh, (31581, 17479, 22841));
1422 let (tk, tb) =
1426 RotateSub::half_kernel::<13, 15, 12>(tk, tbh, (7993, 14359, 3099));
1427 let (tc, tj) =
1431 RotateAdd::half_kernel::<14, 13, 15>(tc, tjh, (16143, 2801, 26683));
1432 let (ti, td) =
1436 RotateSub::half_kernel::<14, 14, 14>(ti, tdh, (16261, 4011, 14255));
1437 let (te, th) =
1441 RotateAdd::half_kernel::<15, 15, 15>(te, thh, (32679, 4821, 30269));
1442 let (tg, tf) =
1446 RotateSub::half_kernel::<14, 12, 14>(tg, tfh, (16379, 201, 15977));
1447
1448 let (t0, tfh) = butterfly_add(t0, tf);
1450 let (tv, tgh) = butterfly_sub(tv, tg);
1451 let (th, tuh) = butterfly_add(th, tu);
1452 let (te, t1h) = butterfly_sub(te, t1);
1453 let (t2, tdh) = butterfly_add(t2, td);
1454 let (tt, tih) = butterfly_sub(tt, ti);
1455 let (tj, tsh) = butterfly_add(tj, ts);
1456 let (tc, t3h) = butterfly_sub(tc, t3);
1457 let (t4, tbh) = butterfly_add(t4, tb);
1458 let (tr, tkh) = butterfly_sub(tr, tk);
1459 let (tl, tqh) = butterfly_add(tl, tq);
1460 let (ta, t5h) = butterfly_sub(ta, t5);
1461 let (t6, t9h) = butterfly_add(t6, t9);
1462 let (tp, tmh) = butterfly_sub(tp, tm);
1463 let (tn, toh) = butterfly_add(tn, to);
1464 let (t8, t7h) = butterfly_sub(t8, t7);
1465
1466 let (t0, t7) = butterfly_sub_asym(t0, t7h);
1468 let (tv, to) = butterfly_add_asym(tv, toh);
1469 let (tp, tu) = butterfly_sub_asym(tp, tuh);
1470 let (t6, t1) = butterfly_add_asym(t6, t1h);
1471 let (t2, t5) = butterfly_sub_asym(t2, t5h);
1472 let (tt, tq) = butterfly_add_asym(tt, tqh);
1473 let (tr, ts) = butterfly_sub_asym(tr, tsh);
1474 let (t4, t3) = butterfly_add_asym(t4, t3h);
1475 let (t8, tg) = butterfly_add_asym(t8, tgh);
1476 let (te, tm) = butterfly_sub_asym(te, tmh);
1477 let (tn, tf) = butterfly_add_asym(tn, tfh);
1478 let (th, t9) = butterfly_sub_asym(th, t9h);
1479 let (ta, ti) = butterfly_add_asym(ta, tih);
1480 let (tc, tk) = butterfly_sub_asym(tc, tkh);
1481 let (tl, td) = butterfly_add_asym(tl, tdh);
1482 let (tj, tb) = butterfly_sub_asym(tj, tbh);
1483
1484 let (tf, tg) =
1489 RotateSub::kernel::<14, 14, 13>(tf, tg, (17911, 14699, 803));
1490 let (th, te) =
1494 RotateAdd::kernel::<13, 13, 12>(th, te, (10217, 5461, 1189));
1495 let (ti, td) =
1499 RotateAdd::kernel::<12, 13, 14>(ti, td, (5543, 3363, 7723));
1500 let (tc, tj) =
1504 RotateSub::kernel::<13, 14, 13>(tc, tj, (11529, 2271, 5197));
1505 let (tb, tk) =
1509 RotateNeg::kernel::<13, 14, 13>(tb, tk, (11529, 2271, 5197));
1510 let (ta, tl) =
1514 RotateNeg::kernel::<12, 13, 14>(ta, tl, (5543, 3363, 7723));
1515 let (t9, tm) =
1519 RotateNeg::kernel::<13, 13, 12>(t9, tm, (10217, 5461, 1189));
1520 let (t8, tn) =
1524 RotateNeg::kernel::<14, 14, 13>(t8, tn, (17911, 14699, 803));
1525
1526 let (t3, t0h) = butterfly_sub(t3, t0);
1528 let (ts, tvh) = butterfly_add(ts, tv);
1529 let (tu, tth) = butterfly_sub(tu, tt);
1530 let (t1, t2h) = butterfly_add(t1, t2);
1531 let ((_toh, to), t4h) = butterfly_add(to, t4);
1532 let ((_tqh, tq), t6h) = butterfly_sub(tq, t6);
1533 let ((_t7h, t7), trh) = butterfly_add(t7, tr);
1534 let ((_t5h, t5), tph) = butterfly_sub(t5, tp);
1535 let (tb, t8h) = butterfly_sub(tb, t8);
1536 let (tk, tnh) = butterfly_add(tk, tn);
1537 let (tm, tlh) = butterfly_sub(tm, tl);
1538 let (t9, tah) = butterfly_add(t9, ta);
1539 let (tf, tch) = butterfly_sub(tf, tc);
1540 let (tg, tjh) = butterfly_add(tg, tj);
1541 let (ti, thh) = butterfly_sub(ti, th);
1542 let (td, teh) = butterfly_add(td, te);
1543
1544 let (to, t7) = RotateAdd::kernel::<8, 11, 15>(to, t7, (301, 1609, 6393));
1549 let (tph, t6h) =
1553 RotateAdd::kernel::<13, 15, 13>(tph, t6h, (11363, 9041, 4551));
1554 let (t5, tq) =
1558 RotateNeg::kernel::<12, 15, 13>(t5, tq, (5681, 9041, 4551));
1559 let (t4h, trh) =
1563 RotateNeg::kernel::<13, 14, 15>(t4h, trh, (9633, 12873, 6393));
1564
1565 let (t1, t0) = butterfly_add_asym(t1, t0h);
1567 let (tu, tv) = butterfly_sub_asym(tu, tvh);
1568 let (ts, t2) = butterfly_sub_asym(ts, t2h);
1569 let (t3, tt) = butterfly_sub_asym(t3, tth);
1570 let (t5, t4) = butterfly_add_asym((t5.rshift1(), t5), t4h);
1571 let (tq, tr) = butterfly_sub_asym((tq.rshift1(), tq), trh);
1572 let (t7, t6) = butterfly_add_asym((t7.rshift1(), t7), t6h);
1573 let (to, tp) = butterfly_sub_asym((to.rshift1(), to), tph);
1574 let (t9, t8) = butterfly_add_asym(t9, t8h);
1575 let (tm, tn) = butterfly_sub_asym(tm, tnh);
1576 let (tk, ta) = butterfly_sub_asym(tk, tah);
1577 let (tb, tl) = butterfly_sub_asym(tb, tlh);
1578 let (ti, tc) = butterfly_add_asym(ti, tch);
1579 let (td, tj) = butterfly_add_asym(td, tjh);
1580 let (tf, te) = butterfly_add_asym(tf, teh);
1581 let (tg, th) = butterfly_sub_asym(tg, thh);
1582
1583 let (t2, tt) = RotateNeg::kernel::<9, 14, 13>(t2, tt, (669, 8867, 3135));
1588 let (ts, t3) = RotateAdd::kernel::<9, 14, 13>(ts, t3, (669, 8867, 3135));
1592 let (ta, tl) = RotateNeg::kernel::<9, 14, 13>(ta, tl, (669, 8867, 3135));
1596 let (tk, tb) = RotateAdd::kernel::<9, 14, 13>(tk, tb, (669, 8867, 3135));
1600 let (tc, tj) = RotateAdd::kernel::<9, 14, 13>(tc, tj, (669, 8867, 3135));
1604 let (ti, td) = RotateNeg::kernel::<9, 14, 13>(ti, td, (669, 8867, 3135));
1608 let (tu, t1) = RotatePi4Add::kernel::<12, 13>(tu, t1, (5793, 5793));
1611 let (tq, t5) = RotatePi4Add::kernel::<12, 13>(tq, t5, (5793, 5793));
1614 let (tp, t6) = RotatePi4Sub::kernel::<12, 13>(tp, t6, (5793, 5793));
1617 let (tm, t9) = RotatePi4Add::kernel::<12, 13>(tm, t9, (5793, 5793));
1620 let (te, th) = RotatePi4Add::kernel::<12, 13>(te, th, (5793, 5793));
1623
1624 store_coeffs!(
1625 output, t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, ta, tb, tc, td, te, tf,
1626 tg, th, ti, tj, tk, tl, tm, tn, to, tp, tq, tr, ts, tt, tu, tv
1627 );
1628}
1629
1630#[allow(clippy::identity_op)]
1631#[$m]
1632$($s)* fn daala_fdct64<T: TxOperations>(coeffs: &mut [T]) {
1633 assert!(coeffs.len() >= 64);
1634 let mut asym: [(T, T); 32] = [(T::zero(), T::zero()); 32];
1636 let mut half: [T; 32] = [T::zero(); 32];
1637 {
1639 #[$m]
1640 #[inline]
1641 $($s)* fn butterfly_pair<T: TxOperations>(
1642 half: &mut [T; 32], asym: &mut [(T, T); 32], input: &[T], i: usize
1643 ) {
1644 let j = i * 2;
1645 let (ah, c) = butterfly_neg(input[j], input[63 - j]);
1646 let (b, dh) = butterfly_add(input[j + 1], input[63 - j - 1]);
1647 half[i] = ah;
1648 half[31 - i] = dh;
1649 asym[i] = b;
1650 asym[31 - i] = c;
1651 }
1652 butterfly_pair(&mut half, &mut asym, coeffs, 0);
1653 butterfly_pair(&mut half, &mut asym, coeffs, 1);
1654 butterfly_pair(&mut half, &mut asym, coeffs, 2);
1655 butterfly_pair(&mut half, &mut asym, coeffs, 3);
1656 butterfly_pair(&mut half, &mut asym, coeffs, 4);
1657 butterfly_pair(&mut half, &mut asym, coeffs, 5);
1658 butterfly_pair(&mut half, &mut asym, coeffs, 6);
1659 butterfly_pair(&mut half, &mut asym, coeffs, 7);
1660 butterfly_pair(&mut half, &mut asym, coeffs, 8);
1661 butterfly_pair(&mut half, &mut asym, coeffs, 9);
1662 butterfly_pair(&mut half, &mut asym, coeffs, 10);
1663 butterfly_pair(&mut half, &mut asym, coeffs, 11);
1664 butterfly_pair(&mut half, &mut asym, coeffs, 12);
1665 butterfly_pair(&mut half, &mut asym, coeffs, 13);
1666 butterfly_pair(&mut half, &mut asym, coeffs, 14);
1667 butterfly_pair(&mut half, &mut asym, coeffs, 15);
1668 }
1669
1670 let mut temp_out: [T; 64] = [T::zero(); 64];
1671 daala_fdct_ii_32_asym(
1673 half[0],
1674 asym[0],
1675 half[1],
1676 asym[1],
1677 half[2],
1678 asym[2],
1679 half[3],
1680 asym[3],
1681 half[4],
1682 asym[4],
1683 half[5],
1684 asym[5],
1685 half[6],
1686 asym[6],
1687 half[7],
1688 asym[7],
1689 half[8],
1690 asym[8],
1691 half[9],
1692 asym[9],
1693 half[10],
1694 asym[10],
1695 half[11],
1696 asym[11],
1697 half[12],
1698 asym[12],
1699 half[13],
1700 asym[13],
1701 half[14],
1702 asym[14],
1703 half[15],
1704 asym[15],
1705 &mut temp_out[0..32],
1706 );
1707 daala_fdst_iv_32_asym(
1708 asym[31],
1709 half[31],
1710 asym[30],
1711 half[30],
1712 asym[29],
1713 half[29],
1714 asym[28],
1715 half[28],
1716 asym[27],
1717 half[27],
1718 asym[26],
1719 half[26],
1720 asym[25],
1721 half[25],
1722 asym[24],
1723 half[24],
1724 asym[23],
1725 half[23],
1726 asym[22],
1727 half[22],
1728 asym[21],
1729 half[21],
1730 asym[20],
1731 half[20],
1732 asym[19],
1733 half[19],
1734 asym[18],
1735 half[18],
1736 asym[17],
1737 half[17],
1738 asym[16],
1739 half[16],
1740 &mut temp_out[32..64],
1741 );
1742 temp_out[32..64].reverse();
1743
1744 #[$m]
1746 #[inline]
1747 $($s)* fn reorder_4<T: TxOperations>(
1748 output: &mut [T], i: usize, tmp: [T; 64], j: usize
1749 ) {
1750 output[0 + i * 4] = tmp[0 + j];
1751 output[1 + i * 4] = tmp[32 + j];
1752 output[2 + i * 4] = tmp[16 + j];
1753 output[3 + i * 4] = tmp[48 + j];
1754 }
1755 reorder_4(coeffs, 0, temp_out, 0);
1756 reorder_4(coeffs, 1, temp_out, 8);
1757 reorder_4(coeffs, 2, temp_out, 4);
1758 reorder_4(coeffs, 3, temp_out, 12);
1759 reorder_4(coeffs, 4, temp_out, 2);
1760 reorder_4(coeffs, 5, temp_out, 10);
1761 reorder_4(coeffs, 6, temp_out, 6);
1762 reorder_4(coeffs, 7, temp_out, 14);
1763
1764 reorder_4(coeffs, 8, temp_out, 1);
1765 reorder_4(coeffs, 9, temp_out, 9);
1766 reorder_4(coeffs, 10, temp_out, 5);
1767 reorder_4(coeffs, 11, temp_out, 13);
1768 reorder_4(coeffs, 12, temp_out, 3);
1769 reorder_4(coeffs, 13, temp_out, 11);
1770 reorder_4(coeffs, 14, temp_out, 7);
1771 reorder_4(coeffs, 15, temp_out, 15);
1772}
1773
1774#[$m]
1775$($s)* fn fidentity<T: TxOperations>(_coeffs: &mut [T]) {}
1776
1777#[$m]
1778$($s)* fn fwht4<T: TxOperations>(coeffs: &mut [T]) {
1779 assert!(coeffs.len() >= 4);
1780 let x0 = coeffs[0];
1781 let x1 = coeffs[1];
1782 let x2 = coeffs[2];
1783 let x3 = coeffs[3];
1784
1785 let s0 = x0.add(x1);
1786 let s1 = x3.sub(x2);
1787 let s2 = s0.sub_avg(s1);
1788
1789 let q1 = s2.sub(x2);
1790 let q0 = s0.sub(q1);
1791 let q3 = s2.sub(x1);
1792 let q2 = s1.add(q3);
1793
1794 store_coeffs!(coeffs, q0, q1, q2, q3);
1795}
1796
1797}
1798
1799}