1const SCALE_BITS: i32 = 512 + 65536 + (128 << 17);
14
15#[inline(always)]
16fn wa(a: i32, b: i32) -> i32 {
17 a.wrapping_add(b)
18}
19
20#[inline(always)]
21fn ws(a: i32, b: i32) -> i32 {
22 a.wrapping_sub(b)
23}
24
25#[inline(always)]
26fn wm(a: i32, b: i32) -> i32 {
27 a.wrapping_mul(b)
28}
29
30#[inline]
31pub fn idct_int_1x1(in_vector: &mut [i32; 64], mut out_vector: &mut [i16], stride: usize) {
32 let coeff = ((wa(wa(in_vector[0], 4), 1024) >> 3).clamp(0, 255)) as i16;
33
34 out_vector[..8].fill(coeff);
35 for _ in 0..7 {
36 out_vector = &mut out_vector[stride..];
37 out_vector[..8].fill(coeff);
38
39 }
40}
41
42#[allow(unused_assignments)]
43#[allow(
44 clippy::too_many_lines,
45 clippy::op_ref,
46 clippy::cast_possible_truncation
47)]
48pub fn idct_int(in_vector: &mut [i32; 64], out_vector: &mut [i16], stride: usize) {
49 let mut pos = 0;
50 let mut i = 0;
51
52 if &in_vector[1..] == &[0_i32; 63] {
53 return idct_int_1x1(in_vector, out_vector, stride);
54 }
55
56 for ptr in 0..8 {
58 let p2 = in_vector[ptr + 16];
59 let p3 = in_vector[ptr + 48];
60
61 let p1 = wm(wa(p2, p3), 2217);
62
63 let t2 = wa(p1, wm(p3, -7567));
64 let t3 = wa(p1, wm(p2, 3135));
65
66 let p2 = in_vector[ptr];
67 let p3 = in_vector[32 + ptr];
68
69 let t0 = fsh(wa(p2, p3));
70 let t1 = fsh(ws(p2, p3));
71
72 let x0 = wa(wa(t0, t3), 512);
73 let x3 = wa(ws(t0, t3), 512);
74 let x1 = wa(wa(t1, t2), 512);
75 let x2 = wa(ws(t1, t2), 512);
76
77 let mut t0 = in_vector[ptr + 56];
78 let mut t1 = in_vector[ptr + 40];
79 let mut t2 = in_vector[ptr + 24];
80 let mut t3 = in_vector[ptr + 8];
81
82 let p3 = wa(t0, t2);
83 let p4 = wa(t1, t3);
84 let p1 = wa(t0, t3);
85 let p2 = wa(t1, t2);
86 let p5 = wm(wa(p3, p4), 4816);
87
88 t0 = wm(t0, 1223);
89 t1 = wm(t1, 8410);
90 t2 = wm(t2, 12586);
91 t3 = wm(t3, 6149);
92
93 let p1 = wa(p5, wm(p1, -3685));
94 let p2 = wa(p5, wm(p2, -10497));
95 let p3 = wm(p3, -8034);
96 let p4 = wm(p4, -1597);
97
98 t3 = wa(t3, wa(p1, p4));
99 t2 = wa(t2, wa(p2, p3));
100 t1 = wa(t1, wa(p2, p4));
101 t0 = wa(t0, wa(p1, p3));
102
103 in_vector[ptr] = ws(wa(x0, t3), 0) >> 10;
104 in_vector[ptr + 8] = ws(wa(x1, t2), 0) >> 10;
105 in_vector[ptr + 16] = ws(wa(x2, t1), 0) >> 10;
106 in_vector[ptr + 24] = ws(wa(x3, t0), 0) >> 10;
107 in_vector[ptr + 32] = ws(ws(x3, t0), 0) >> 10;
108 in_vector[ptr + 40] = ws(ws(x2, t1), 0) >> 10;
109 in_vector[ptr + 48] = ws(ws(x1, t2), 0) >> 10;
110 in_vector[ptr + 56] = ws(ws(x0, t3), 0) >> 10;
111 }
112
113 while i < 64 {
115 let p2 = in_vector[i + 2];
116 let p3 = in_vector[i + 6];
117
118 let p1 = wm(wa(p2, p3), 2217);
119 let t2 = wa(p1, wm(p3, -7567));
120 let t3 = wa(p1, wm(p2, 3135));
121
122 let p2 = in_vector[i];
123 let p3 = in_vector[i + 4];
124
125 let t0 = fsh(wa(p2, p3));
126 let t1 = fsh(ws(p2, p3));
127
128 let x0 = wa(wa(t0, t3), SCALE_BITS);
129 let x3 = wa(ws(t0, t3), SCALE_BITS);
130 let x1 = wa(wa(t1, t2), SCALE_BITS);
131 let x2 = wa(ws(t1, t2), SCALE_BITS);
132
133 let mut t0 = in_vector[i + 7];
134 let mut t1 = in_vector[i + 5];
135 let mut t2 = in_vector[i + 3];
136 let mut t3 = in_vector[i + 1];
137
138 let p3 = wa(t0, t2);
139 let p4 = wa(t1, t3);
140 let p1 = wa(t0, t3);
141 let p2 = wa(t1, t2);
142 let p5 = wm(wa(p3, p4), f2f(1.175875602));
143
144 t0 = wm(t0, 1223);
145 t1 = wm(t1, 8410);
146 t2 = wm(t2, 12586);
147 t3 = wm(t3, 6149);
148
149 let p1 = wa(p5, wm(p1, -3685));
150 let p2 = wa(p5, wm(p2, -10497));
151 let p3 = wm(p3, -8034);
152 let p4 = wm(p4, -1597);
153
154 t3 = wa(t3, wa(p1, p4));
155 t2 = wa(t2, wa(p2, p3));
156 t1 = wa(t1, wa(p2, p4));
157 t0 = wa(t0, wa(p1, p3));
158
159 let mut tmp = [0; 8];
161
162 let out: &mut [i16; 8] = out_vector
163 .get_mut(pos..pos + 8)
164 .unwrap_or(&mut tmp)
165 .try_into()
166 .unwrap();
167
168 out[0] = clamp(wa(x0, t3) >> 17);
169 out[1] = clamp(wa(x1, t2) >> 17);
170 out[2] = clamp(wa(x2, t1) >> 17);
171 out[3] = clamp(wa(x3, t0) >> 17);
172 out[4] = clamp(ws(x3, t0) >> 17);
173 out[5] = clamp(ws(x2, t1) >> 17);
174 out[6] = clamp(ws(x1, t2) >> 17);
175 out[7] = clamp(ws(x0, t3) >> 17);
176
177 i += 8;
178 pos += stride;
179 }
180}
181
182#[inline]
183#[allow(clippy::cast_possible_truncation)]
184fn f2f(x: f32) -> i32 {
186 (x * 4096.0 + 0.5) as i32
187}
188
189#[inline]
190fn fsh(x: i32) -> i32 {
192 x << 12
193}
194
195#[inline]
197#[allow(clippy::cast_possible_truncation)]
198fn clamp(a: i32) -> i16 {
199 a.clamp(0, 255) as i16
200}
201
202pub fn idct4x4(in_vector: &mut [i32; 64], out_vector: &mut [i16], stride: usize) {
204 let mut pos = 0;
205
206 for ptr in 0..4 {
208 let i0 = wa(fsh(in_vector[ptr]), 512);
209 let i2 = in_vector[ptr + 16];
210
211 let p1 = wm(i2, 2217);
212 let p3 = wm(i2, 5352);
213
214 let x0 = wa(i0, p3);
215 let x1 = wa(i0, p1);
216 let x2 = ws(i0, p1);
217 let x3 = ws(i0, p3);
218
219 let i4 = in_vector[ptr + 24];
221 let i3 = in_vector[ptr + 8];
222
223 let p5 = wm(wa(i4, i3), 4816);
224
225 let p1 = wa(p5, wm(i3, -3685));
226 let p2 = wa(p5, wm(i4, -10497));
227
228 let t3 = wa(p5, wm(i3, 867));
229 let t2 = wa(p5, wm(i4, -5945));
230
231 let t1 = wa(p2, wm(i3, -1597));
232 let t0 = wa(p1, wm(i4, -8034));
233
234 in_vector[ptr] = wa(x0, t3) >> 10;
235 in_vector[ptr + 8] = wa(x1, t2) >> 10;
236 in_vector[ptr + 16] = wa(x2, t1) >> 10;
237 in_vector[ptr + 24] = wa(x3, t0) >> 10;
238 in_vector[ptr + 32] = ws(x3, t0) >> 10;
239 in_vector[ptr + 40] = ws(x2, t1) >> 10;
240 in_vector[ptr + 48] = ws(x1, t2) >> 10;
241 in_vector[ptr + 56] = ws(x0, t3) >> 10;
242 }
243
244 for i in (0..8).map(|i| 8 * i) {
246 let i2 = in_vector[i + 2];
247 let i0 = in_vector[i];
248
249 let t0 = wa(fsh(i0), SCALE_BITS);
250 let t2 = wm(i2, 2217);
251 let t3 = wm(i2, 5352);
252
253 let x0 = wa(t0, t3);
254 let x3 = ws(t0, t3);
255 let x1 = wa(t0, t2);
256 let x2 = ws(t0, t2);
257
258 let i3 = in_vector[i + 3];
260 let i1 = in_vector[i + 1];
261
262 let p5 = wm(wa(i3, i1), f2f(1.175875602));
263
264 let p1 = wa(p5, wm(i1, -3685));
265 let p2 = wa(p5, wm(i3, -10497));
266
267 let t3 = wa(p5, wm(i1, 867));
268 let t2 = wa(p5, wm(i3, -5945));
269
270 let t1 = wa(p2, wm(i1, -1597));
271 let t0 = wa(p1, wm(i3, -8034));
272
273 let mut tmp = [0; 8];
275
276 let out: &mut [i16; 8] = out_vector
277 .get_mut(pos..pos + 8)
278 .unwrap_or(&mut tmp)
279 .try_into()
280 .unwrap();
281
282 out.copy_from_slice(&[
283 clamp(wa(x0, t3) >> 17),
284 clamp(wa(x1, t2) >> 17),
285 clamp(wa(x2, t1) >> 17),
286 clamp(wa(x3, t0) >> 17),
287 clamp(ws(x3, t0) >> 17),
288 clamp(ws(x2, t1) >> 17),
289 clamp(ws(x1, t2) >> 17),
290 clamp(ws(x0, t3) >> 17)
291 ]);
292
293 pos += stride;
294 }
295
296 in_vector[32..36].fill(0);
297 in_vector[40..44].fill(0);
298 in_vector[48..52].fill(0);
299 in_vector[56..60].fill(0);
300}