1#[cfg(target_arch = "wasm32")]
2use std::arch::wasm32::*;
3
4#[cfg(target_arch = "wasm32")]
5#[target_feature(enable = "simd128")]
6fn idct8(data: &mut [v128; 8]) {
7 let p2 = data[2];
16 let p3 = data[6];
17 let p1 = i16x8_q15mulr_sat(i16x8_add_sat(p2, p3), i16x8_splat(17734)); let t2 = i16x8_sub_sat(
19 i16x8_sub_sat(p1, p3),
20 i16x8_q15mulr_sat(p3, i16x8_splat(27779)), );
22 let t3 = i16x8_add_sat(p1, i16x8_q15mulr_sat(p2, i16x8_splat(25079))); let p2 = data[0];
25 let p3 = data[4];
26 let t0 = i16x8_add_sat(p2, p3);
27 let t1 = i16x8_sub_sat(p2, p3);
28
29 let x0 = i16x8_add_sat(t0, t3);
30 let x3 = i16x8_sub_sat(t0, t3);
31 let x1 = i16x8_add_sat(t1, t2);
32 let x2 = i16x8_sub_sat(t1, t2);
33
34 let t0 = data[7];
35 let t1 = data[5];
36 let t2 = data[3];
37 let t3 = data[1];
38
39 let p3 = i16x8_add_sat(t0, t2);
40 let p4 = i16x8_add_sat(t1, t3);
41 let p1 = i16x8_add_sat(t0, t3);
42 let p2 = i16x8_add_sat(t1, t2);
43 let p5 = i16x8_add_sat(p3, p4);
44 let p5 = i16x8_add_sat(p5, i16x8_q15mulr_sat(p5, i16x8_splat(5763))); let t0 = i16x8_q15mulr_sat(t0, i16x8_splat(9786)); let t1 = i16x8_add_sat(
48 i16x8_add_sat(t1, t1),
49 i16x8_q15mulr_sat(t1, i16x8_splat(1741)), );
51 let t2 = i16x8_add_sat(
52 i16x8_add_sat(t2, i16x8_add_sat(t2, t2)),
53 i16x8_q15mulr_sat(t2, i16x8_splat(2383)), );
55 let t3 = i16x8_add_sat(t3, i16x8_q15mulr_sat(t3, i16x8_splat(16427))); let p1 = i16x8_sub_sat(p5, i16x8_q15mulr_sat(p1, i16x8_splat(29490))); let p2 = i16x8_sub_sat(
59 i16x8_sub_sat(i16x8_sub_sat(p5, p2), p2),
60 i16x8_q15mulr_sat(p2, i16x8_splat(18446)), );
62
63 let p3 = i16x8_sub_sat(
64 i16x8_q15mulr_sat(p3, i16x8_splat(-31509)), p3,
66 );
67 let p4 = i16x8_q15mulr_sat(p4, i16x8_splat(-12785)); let t3 = i16x8_add_sat(i16x8_add_sat(p1, p4), t3);
70 let t2 = i16x8_add_sat(i16x8_add_sat(p2, p3), t2);
71 let t1 = i16x8_add_sat(i16x8_add_sat(p2, p4), t1);
72 let t0 = i16x8_add_sat(i16x8_add_sat(p1, p3), t0);
73
74 data[0] = i16x8_add_sat(x0, t3);
75 data[7] = i16x8_sub_sat(x0, t3);
76 data[1] = i16x8_add_sat(x1, t2);
77 data[6] = i16x8_sub_sat(x1, t2);
78 data[2] = i16x8_add_sat(x2, t1);
79 data[5] = i16x8_sub_sat(x2, t1);
80 data[3] = i16x8_add_sat(x3, t0);
81 data[4] = i16x8_sub_sat(x3, t0);
82}
83
84#[cfg(target_arch = "wasm32")]
85#[target_feature(enable = "simd128")]
86fn transpose8(data: &mut [v128; 8]) {
87 let d01l = i16x8_shuffle::<0, 8, 1, 9, 2, 10, 3, 11>(data[0], data[1]);
93 let d23l = i16x8_shuffle::<0, 8, 1, 9, 2, 10, 3, 11>(data[2], data[3]);
94 let d45l = i16x8_shuffle::<0, 8, 1, 9, 2, 10, 3, 11>(data[4], data[5]);
95 let d67l = i16x8_shuffle::<0, 8, 1, 9, 2, 10, 3, 11>(data[6], data[7]);
96 let d01h = i16x8_shuffle::<4, 12, 5, 13, 6, 14, 7, 15>(data[0], data[1]);
97 let d23h = i16x8_shuffle::<4, 12, 5, 13, 6, 14, 7, 15>(data[2], data[3]);
98 let d45h = i16x8_shuffle::<4, 12, 5, 13, 6, 14, 7, 15>(data[4], data[5]);
99 let d67h = i16x8_shuffle::<4, 12, 5, 13, 6, 14, 7, 15>(data[6], data[7]);
100
101 let d0123ll = i32x4_shuffle::<0, 4, 1, 5>(d01l, d23l);
103 let d0123lh = i32x4_shuffle::<2, 6, 3, 7>(d01l, d23l);
104 let d4567ll = i32x4_shuffle::<0, 4, 1, 5>(d45l, d67l);
105 let d4567lh = i32x4_shuffle::<2, 6, 3, 7>(d45l, d67l);
106 let d0123hl = i32x4_shuffle::<0, 4, 1, 5>(d01h, d23h);
107 let d0123hh = i32x4_shuffle::<2, 6, 3, 7>(d01h, d23h);
108 let d4567hl = i32x4_shuffle::<0, 4, 1, 5>(d45h, d67h);
109 let d4567hh = i32x4_shuffle::<2, 6, 3, 7>(d45h, d67h);
110
111 data[0] = i64x2_shuffle::<0, 2>(d0123ll, d4567ll);
113 data[1] = i64x2_shuffle::<1, 3>(d0123ll, d4567ll);
114 data[2] = i64x2_shuffle::<0, 2>(d0123lh, d4567lh);
115 data[3] = i64x2_shuffle::<1, 3>(d0123lh, d4567lh);
116 data[4] = i64x2_shuffle::<0, 2>(d0123hl, d4567hl);
117 data[5] = i64x2_shuffle::<1, 3>(d0123hl, d4567hl);
118 data[6] = i64x2_shuffle::<0, 2>(d0123hh, d4567hh);
119 data[7] = i64x2_shuffle::<1, 3>(d0123hh, d4567hh);
120}
121
122#[cfg(target_arch = "wasm32")]
123#[target_feature(enable = "simd128")]
124pub fn dequantize_and_idct_block_8x8(
125 coefficients: &[i16; 64],
126 quantization_table: &[u16; 64],
127 output_linestride: usize,
128 output: &mut [u8],
129) {
130 assert!(
134 output.len()
135 > output_linestride
136 .checked_mul(7)
137 .unwrap()
138 .checked_add(7)
139 .unwrap()
140 );
141
142 const SHIFT: u32 = 3;
143
144 let mut data = [i16x8_splat(0); 8];
146 unsafe {
147 for i in 0..8 {
148 data[i] = i16x8_shl(
149 i16x8_mul(
150 v128_load(coefficients.as_ptr().wrapping_add(i * 8) as *const _),
151 v128_load(quantization_table.as_ptr().wrapping_add(i * 8) as *const _),
152 ),
153 SHIFT,
154 );
155 }
156 }
157
158 idct8(&mut data);
160 transpose8(&mut data);
161 idct8(&mut data);
162 transpose8(&mut data);
163
164 for i in 0..8 {
165 const OFFSET: i16 = 128 << (SHIFT + 3);
170 const ROUNDING_BIAS: i16 = (1 << (SHIFT + 3)) >> 1;
172
173 let data_with_offset = i16x8_add_sat(data[i], i16x8_splat(OFFSET + ROUNDING_BIAS));
174
175 unsafe {
178 v128_store64_lane::<0>(
179 u8x16_narrow_i16x8(
180 i16x8_shr(data_with_offset, SHIFT + 3),
181 i16x8_splat(0),
182 ),
183 output.as_mut_ptr().wrapping_add(output_linestride * i) as *mut _,
184 );
185 }
186 }
187}
188
189#[cfg(target_arch = "wasm32")]
190#[target_feature(enable = "simd128")]
191pub fn color_convert_line_ycbcr(y_slice: &[u8], cb_slice: &[u8], cr_slice: &[u8], output: &mut [u8]) -> usize {
192
193 assert!(output.len() % 3 == 0);
194 let num = output.len() / 3;
195 assert!(num <= y_slice.len());
196 assert!(num <= cb_slice.len());
197 assert!(num <= cr_slice.len());
198
199 let num_vecs = num / 8;
200
201 for i in 0..num_vecs {
202 const SHIFT: u32 = 6;
203 let y: v128;
205 let cb: v128;
206 let cr: v128;
207 unsafe {
210 y = v128_load64_zero(y_slice.as_ptr().wrapping_add(i * 8) as *const _);
211 cb = v128_load64_zero(cb_slice.as_ptr().wrapping_add(i * 8) as *const _);
212 cr = v128_load64_zero(cr_slice.as_ptr().wrapping_add(i * 8) as *const _);
213 }
214
215 let y = i16x8_shl(i16x8_extend_low_u8x16(y), SHIFT);
217 let cb = i16x8_shl(i16x8_extend_low_u8x16(cb), SHIFT);
218 let cr = i16x8_shl(i16x8_extend_low_u8x16(cr), SHIFT);
219
220 let c128 = i16x8_splat(128 << SHIFT);
222 let y = i16x8_add_sat(y, i16x8_splat((1 << SHIFT) >> 1));
223 let cb = i16x8_sub_sat(cb, c128);
224 let cr = i16x8_sub_sat(cr, c128);
225
226 let cr_140200 = i16x8_add_sat(i16x8_q15mulr_sat(cr, i16x8_splat(13173)), cr);
228 let cb_034414 = i16x8_q15mulr_sat(cb, i16x8_splat(11276));
229 let cr_071414 = i16x8_q15mulr_sat(cr, i16x8_splat(23401));
230 let cb_177200 = i16x8_add_sat(i16x8_q15mulr_sat(cb, i16x8_splat(25297)), cb);
231
232 let r = i16x8_add_sat(y, cr_140200);
234 let g = i16x8_sub_sat(y, i16x8_add_sat(cb_034414, cr_071414));
235 let b = i16x8_add_sat(y, cb_177200);
236
237 let zero = u8x16_splat(0);
239 let r = u8x16_narrow_i16x8(i16x8_shr(r, SHIFT), zero);
240 let g = u8x16_narrow_i16x8(i16x8_shr(g, SHIFT), zero);
241 let b = u8x16_narrow_i16x8(i16x8_shr(b, SHIFT), zero);
242
243 let rg_lanes = i8x16_shuffle::<0, 16,
246 1, 17,
247 2, 18,
248 3, 19,
249 4, 20,
250 5, 21,
251 6, 22,
252 7, 23>(r, g);
253
254 let rgb_low = i8x16_shuffle::<0, 1, 16, 2, 3, 17, 4, 5, 18, 6, 7, 19, 8, 9, 20, 10>(rg_lanes, b); let rgb_hi = i8x16_shuffle::<11, 21, 12, 13, 22, 14, 15, 23, 0, 0, 0, 0, 0, 0, 0, 0>(rg_lanes, b); unsafe {
271 v128_store(output.as_mut_ptr().wrapping_add(24 * i) as *mut _, rgb_low);
272 v128_store64_lane::<0>(rgb_hi, output.as_mut_ptr().wrapping_add(24 * i + 16) as *mut _);
273 }
274 }
275
276 num_vecs * 8
277}