zune_jpeg/
idct.rs

1/*
2 * Copyright (c) 2023.
3 *
4 * This software is free software;
5 *
6 * You can redistribute it or modify it under terms of the MIT, Apache License or Zlib license
7 */
8
9//! Routines for IDCT
10//!
11//! Essentially we provide 2 routines for IDCT, a scalar implementation and a not super optimized
12//! AVX2 one, i'll talk about them here.
13//!
14//! There are 2 reasons why we have the avx one
15//! 1. No one compiles with -C target-features=avx2 hence binaries won't probably take advantage(even
16//! if it exists).
17//! 2. AVX employs zero short circuit in a way the scalar code cannot employ it.
18//!     - AVX does this by checking for MCU's whose 63 AC coefficients are zero and if true, it writes
19//!        values directly, if false, it goes the long way of calculating.
20//!     -   Although this can be trivially implemented in the scalar version, it  generates code
21//!         I'm not happy width(scalar version that basically loops and that is too many branches for me)
22//!         The avx one does a better job of using bitwise or's with (`_mm256_or_si256`) which is magnitudes of faster
23//!         than anything I could come up with
24//!
25//! The AVX code also has some cool transpose_u16 instructions which look so complicated to be cool
26//! (spoiler alert, i barely understand how it works, that's why I credited the owner).
27//!
28#![allow(
29    clippy::excessive_precision,
30    clippy::unreadable_literal,
31    clippy::module_name_repetitions,
32    unused_parens,
33    clippy::wildcard_imports
34)]
35
36use zune_core::log::debug;
37use zune_core::options::DecoderOptions;
38
39use crate::decoder::IDCTPtr;
40use crate::idct::scalar::idct_int;
41
42#[cfg(feature = "x86")]
43pub mod avx2;
44#[cfg(feature = "neon")]
45pub mod neon;
46
47pub mod scalar;
48
49/// Choose an appropriate IDCT function
50#[allow(unused_variables)]
51pub fn choose_idct_func(options: &DecoderOptions) -> IDCTPtr {
52    #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
53    #[cfg(feature = "x86")]
54    {
55        if options.use_avx2() {
56            debug!("Using vector integer IDCT");
57            // use avx one
58            return crate::idct::avx2::idct_avx2;
59        }
60    }
61    #[cfg(target_arch = "aarch64")]
62    #[cfg(feature = "neon")]
63    {
64        if options.use_neon() {
65            debug!("Using vector integer IDCT");
66            return crate::idct::neon::idct_neon;
67        }
68    }
69    debug!("Using scalar integer IDCT");
70    // use generic one
71    return idct_int;
72}
73
74#[cfg(test)]
75#[allow(unreachable_code)]
76#[allow(dead_code)]
77mod tests {
78    use super::*;
79
80    #[test]
81    fn idct_test0() {
82        let stride = 8;
83        let mut coeff = [10; 64];
84        let mut coeff2 = [10; 64];
85        let mut output_scalar = [0; 64];
86        let mut output_vector = [0; 64];
87        idct_fnc()(&mut coeff, &mut output_vector, stride);
88        idct_int(&mut coeff2, &mut output_scalar, stride);
89        assert_eq!(output_scalar, output_vector, "IDCT and scalar do not match");
90    }
91
92    #[test]
93    fn do_idct_test1() {
94        let stride = 8;
95        let mut coeff = [14; 64];
96        let mut coeff2 = [14; 64];
97        let mut output_scalar = [0; 64];
98        let mut output_vector = [0; 64];
99        idct_fnc()(&mut coeff, &mut output_vector, stride);
100        idct_int(&mut coeff2, &mut output_scalar, stride);
101        assert_eq!(output_scalar, output_vector, "IDCT and scalar do not match");
102    }
103
104    #[test]
105    fn do_idct_test2() {
106        let stride = 8;
107        let mut coeff = [0; 64];
108        coeff[0] = 255;
109        coeff[63] = -256;
110        let mut coeff2 = coeff;
111        let mut output_scalar = [0; 64];
112        let mut output_vector = [0; 64];
113        idct_fnc()(&mut coeff, &mut output_vector, stride);
114        idct_int(&mut coeff2, &mut output_scalar, stride);
115        assert_eq!(output_scalar, output_vector, "IDCT and scalar do not match");
116    }
117
118    #[test]
119    fn do_idct_zeros() {
120        let stride = 8;
121        let mut coeff = [0; 64];
122        let mut coeff2 = [0; 64];
123        let mut output_scalar = [0; 64];
124        let mut output_vector = [0; 64];
125        idct_fnc()(&mut coeff, &mut output_vector, stride);
126        idct_int(&mut coeff2, &mut output_scalar, stride);
127        assert_eq!(output_scalar, output_vector, "IDCT and scalar do not match");
128    }
129
130    fn idct_fnc() -> IDCTPtr {
131        #[cfg(feature = "neon")]
132        #[cfg(target_arch = "aarch64")]
133        {
134            use crate::idct::neon::idct_neon;
135            return idct_neon;
136        }
137
138        #[cfg(feature = "x86")]
139        #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
140        {
141            use crate::idct::avx2::idct_avx2;
142            return idct_avx2;
143        }
144
145        idct_int
146    }
147}
zune_jpeg/idct.rs

zune_jpeg/
idct.rs