zune_jpeg/
upsampler.rs

1/*
2 * Copyright (c) 2023.
3 *
4 * This software is free software;
5 *
6 * You can redistribute it or modify it under terms of the MIT, Apache License or Zlib license
7 */
8
9//! Up-sampling routines
10//!
11//! The main upsampling method is a bi-linear interpolation or a "triangle
12//! filter " or libjpeg turbo `fancy_upsampling` which is a good compromise
13//! between speed and visual quality
14//!
15//! # The filter
16//! Each output pixel is made from `(3*A+B)/4` where A is the original
17//! pixel closer to the output and B is the one further.
18//!
19//! ```text
20//!+---+---+
21//! | A | B |
22//! +---+---+
23//! +-+-+-+-+
24//! | |P| | |
25//! +-+-+-+-+
26//! ```
27//!
28//! # Horizontal Bi-linear filter
29//! ```text
30//! |---+-----------+---+
31//! |   |           |   |
32//! | A | |p1 | p2| | B |
33//! |   |           |   |
34//! |---+-----------+---+
35//!
36//! ```
37//! For a horizontal bi-linear it's trivial to implement,
38//!
39//! `A` becomes the input closest to the output.
40//!
41//! `B` varies depending on output.
42//!  - For odd positions, input is the `next` pixel after A
43//!  - For even positions, input is the `previous` value before A.
44//!
45//! We iterate in a classic 1-D sliding window with a window of 3.
46//! For our sliding window approach, `A` is the 1st and `B` is either the 0th term or 2nd term
47//! depending on position we are writing.(see scalar code).
48//!
49//! For vector code see module sse for explanation.
50//!
51//! # Vertical bi-linear.
52//! Vertical up-sampling is a bit trickier.
53//!
54//! ```text
55//! +----+----+
56//! | A1 | A2 |
57//! +----+----+
58//! +----+----+
59//! | p1 | p2 |
60//! +----+-+--+
61//! +----+-+--+
62//! | p3 | p4 |
63//! +----+-+--+
64//! +----+----+
65//! | B1 | B2 |
66//! +----+----+
67//! ```
68//!
69//! For `p1`
70//! - `A1` is given a weight of `3` and `B1` is given a weight of 1.
71//!
72//! For `p3`
73//! - `B1` is given a weight of `3` and `A1` is given a weight of 1
74//!
75//! # Horizontal vertical downsampling/chroma quartering.
76//!
77//! Carry out a vertical filter in the first pass, then a horizontal filter in the second pass.
78#![allow(unreachable_code)]
79use zune_core::options::DecoderOptions;
80
81use crate::components::UpSampler;
82
83#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
84#[cfg(feature = "x86")]
85mod avx2;
86#[cfg(target_arch = "aarch64")]
87#[cfg(feature = "neon")]
88mod neon;
89#[cfg(feature = "portable_simd")]
90mod portable_simd;
91mod scalar;
92
93// choose the best possible implementation for this platform
94#[allow(unused_variables)]
95pub fn choose_horizontal_samp_function(options: &DecoderOptions) -> UpSampler {
96    #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
97    #[cfg(feature = "x86")]
98    if options.use_avx2() {
99        return |a: &[i16], b: &[i16], c: &[i16], d: &mut [i16], e: &mut [i16]| {
100            // SAFETY: `options.use_avx2()` only returns true if avx2 is supported.
101            unsafe { avx2::upsample_horizontal_avx2(a, b, c, d, e) }
102        };
103    }
104    #[cfg(target_arch = "aarch64")]
105    #[cfg(feature = "neon")]
106    if options.use_neon() {
107        return |a: &[i16], b: &[i16], c: &[i16], d: &mut [i16], e: &mut [i16]| {
108            // SAFETY: `options.use_neon()` only returns true if neon is supported.
109            unsafe { neon::upsample_horizontal_neon(a, b, c, d, e) }
110        };
111    }
112    #[cfg(feature = "portable_simd")]
113    return portable_simd::upsample_horizontal_simd;
114    return scalar::upsample_horizontal;
115}
116
117#[allow(unused_variables)]
118pub fn choose_hv_samp_function(options: &DecoderOptions) -> UpSampler {
119    #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
120    #[cfg(feature = "x86")]
121    if options.use_avx2() {
122        return |a: &[i16], b: &[i16], c: &[i16], d: &mut [i16], e: &mut [i16]| {
123            // SAFETY: `options.use_avx2()` only returns true if avx2 is supported.
124            unsafe { avx2::upsample_hv_avx2(a, b, c, d, e) }
125        };
126    }
127    #[cfg(target_arch = "aarch64")]
128    #[cfg(feature = "neon")]
129    if options.use_neon() {
130        return |a: &[i16], b: &[i16], c: &[i16], d: &mut [i16], e: &mut [i16]| {
131            // SAFETY: `options.use_neon()` only returns true if neon is supported.
132            unsafe { neon::upsample_hv_neon(a, b, c, d, e) }
133        };
134    }
135    #[cfg(feature = "portable_simd")]
136    return portable_simd::upsample_hv_simd;
137    return scalar::upsample_hv;
138}
139
140#[allow(unused_variables)]
141pub fn choose_v_samp_function(options: &DecoderOptions) -> UpSampler {
142    #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
143    #[cfg(feature = "x86")]
144    if options.use_avx2() {
145        return |a: &[i16], b: &[i16], c: &[i16], d: &mut [i16], e: &mut [i16]| {
146            // SAFETY: `options.use_avx2()` only returns true if avx2 is supported.
147            unsafe { avx2::upsample_vertical_avx2(a, b, c, d, e) }
148        };
149    }
150    #[cfg(target_arch = "aarch64")]
151    #[cfg(feature = "neon")]
152    if options.use_neon() {
153        return |a: &[i16], b: &[i16], c: &[i16], d: &mut [i16], e: &mut [i16]| {
154            // SAFETY: `options.use_neon()` only returns true if neon is supported.
155            unsafe { neon::upsample_vertical_neon(a, b, c, d, e) }
156        };
157    }
158    #[cfg(feature = "portable_simd")]
159    return portable_simd::upsample_vertical_simd;
160    return scalar::upsample_vertical;
161}
162
163/// Upsample nothing
164
165pub fn upsample_no_op(
166    _input: &[i16],
167    _in_ref: &[i16],
168    _in_near: &[i16],
169    _scratch_space: &mut [i16],
170    _output: &mut [i16],
171) {
172}
173
174pub fn generic_sampler() -> UpSampler {
175    scalar::upsample_generic
176}
177
178#[cfg(test)]
179mod tests {
180    use super::*;
181
182    #[cfg(feature = "portable_simd")]
183    mod portable_simd_impl {
184        use super::*;
185
186        #[test]
187        fn portable_simd_vertical() {
188            _test_vertical(portable_simd::upsample_vertical_simd)
189        }
190
191        #[test]
192        fn portable_simd_horizontal() {
193            _test_horizontal(portable_simd::upsample_horizontal_simd)
194        }
195
196        #[test]
197        fn portable_simd_hv() {
198            _test_hv(portable_simd::upsample_hv_simd)
199        }
200    }
201
202    #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
203    #[cfg(feature = "x86")]
204    #[cfg(target_feature = "avx2")]
205    mod avx2_impl {
206        use super::*;
207
208        #[test]
209        fn avx2_vertical() {
210            _test_vertical(|a: &[i16], b: &[i16], c: &[i16], d: &mut [i16], e: &mut [i16]| {
211                // SAFETY: Test guarded behind `target_feature`
212                unsafe { avx2::upsample_vertical_avx2(a, b, c, d, e) }
213            })
214        }
215
216        #[test]
217        fn avx2_horizontal() {
218            _test_horizontal(|a: &[i16], b: &[i16], c: &[i16], d: &mut [i16], e: &mut [i16]| {
219                // SAFETY: Test guarded behind `target_feature`
220                unsafe { avx2::upsample_horizontal_avx2(a, b, c, d, e) }
221            })
222        }
223
224        #[test]
225        fn avx2_hv() {
226            _test_hv(|a: &[i16], b: &[i16], c: &[i16], d: &mut [i16], e: &mut [i16]| {
227                // SAFETY: Test guarded behind `target_feature`
228                unsafe { avx2::upsample_hv_avx2(a, b, c, d, e) }
229            })
230        }
231    }
232
233    #[cfg(target_arch = "aarch64")]
234    #[cfg(feature = "neon")]
235    #[cfg(target_feature = "neon")]
236    mod neon_impl {
237        use super::*;
238
239        #[test]
240        fn neon_vertical() {
241            _test_vertical(|a: &[i16], b: &[i16], c: &[i16], d: &mut [i16], e: &mut [i16]| {
242                // SAFETY: Test guarded behind `target_feature`
243                unsafe { neon::upsample_vertical_neon(a, b, c, d, e) }
244            })
245        }
246
247        #[test]
248        fn neon_horizontal() {
249            _test_horizontal(|a: &[i16], b: &[i16], c: &[i16], d: &mut [i16], e: &mut [i16]| {
250                // SAFETY: Test guarded behind `target_feature`
251                unsafe { neon::upsample_horizontal_neon(a, b, c, d, e) }
252            })
253        }
254
255        #[test]
256        fn neon_hv() {
257            _test_hv(|a: &[i16], b: &[i16], c: &[i16], d: &mut [i16], e: &mut [i16]| {
258                // SAFETY: Test guarded behind `target_feature`
259                unsafe { neon::upsample_hv_neon(a, b, c, d, e) }
260            })
261        }
262    }
263
264    fn _test_vertical(upsampler: UpSampler) {
265        let width = 1024;
266        let input: Vec<i16> = (0..width).map(|x| ((x + 10) % 256) as i16).collect();
267        let in_near: Vec<i16> = (0..width).map(|x| ((x + 20) % 256) as i16).collect();
268        let in_far: Vec<i16> = (0..width).map(|x| ((x + 30) % 256) as i16).collect();
269        let mut scratch = vec![0i16; width];
270
271        let mut output_scalar = vec![0i16; width * 2];
272        let mut output_fast = vec![0i16; width * 2];
273
274        scalar::upsample_vertical(&input, &in_near, &in_far, &mut scratch, &mut output_scalar);
275        upsampler(&input, &in_near, &in_far, &mut scratch, &mut output_fast);
276
277        assert_eq!(output_scalar, output_fast);
278    }
279
280    fn _test_horizontal(upsampler: UpSampler) {
281        _test_horizontal_even_width(upsampler);
282        _test_horizontal_odd_width(upsampler);
283    }
284
285    fn _test_horizontal_even_width(upsampler: UpSampler) {
286        let width = 1024;
287        let input: Vec<i16> = (0..width).map(|x| ((x + 10) % 256) as i16).collect();
288
289        let mut scratch = vec![0i16; width];
290
291        let mut output_scalar = vec![0i16; width * 2];
292        let mut output_fast = vec![0i16; width * 2];
293
294        scalar::upsample_horizontal(&input, &[], &[], &mut scratch, &mut output_scalar);
295        upsampler(&input, &[], &[], &mut scratch, &mut output_fast);
296
297        assert_eq!(output_scalar, output_fast);
298    }
299
300    fn _test_horizontal_odd_width(upsampler: UpSampler) {
301        let width = 33;
302        let input: Vec<i16> = (0..width).map(|x| ((x + 10) % 256) as i16).collect();
303        let mut scratch = vec![0i16; width];
304        let mut output_scalar = vec![0i16; width * 2];
305        let mut output_fast = vec![0i16; width * 2];
306
307        scalar::upsample_horizontal(&input, &[], &[], &mut scratch, &mut output_scalar);
308        upsampler(&input, &[], &[], &mut scratch, &mut output_fast);
309
310        assert_eq!(output_scalar, output_fast);
311    }
312
313    fn _test_hv(upsampler: UpSampler) {
314        let width = 512;
315        let input: Vec<i16> = (0..width).map(|x| ((x + 10) % 256) as i16).collect();
316        let in_near: Vec<i16> = (0..width).map(|x| ((x + 20) % 256) as i16).collect();
317        let in_far: Vec<i16> = (0..width).map(|x| ((x + 30) % 256) as i16).collect();
318
319        // Output len is width * 4 for HV (vertical * 2, then horizontal * 2 for each row)
320        // scratch is width * 2
321        let mut scratch_scalar = vec![0i16; width * 2];
322        let mut scratch_fast = vec![0i16; width * 2];
323        let mut output_scalar = vec![0i16; width * 4];
324        let mut output_fast = vec![0i16; width * 4];
325
326        scalar::upsample_hv(
327            &input,
328            &in_near,
329            &in_far,
330            &mut scratch_scalar,
331            &mut output_scalar,
332        );
333        upsampler(
334            &input,
335            &in_near,
336            &in_far,
337            &mut scratch_fast,
338            &mut output_fast,
339        );
340
341        assert_eq!(output_scalar, output_fast);
342    }
343}
zune_jpeg/upsampler.rs

zune_jpeg/
upsampler.rs