Skip to main content

av_scenechange/data/
mc.rs

1#[cfg(asm_neon)]
2mod simd_neon;
3#[cfg(asm_x86_64)]
4mod simd_x86;
5
6use v_frame::{pixel::Pixel, plane::PlaneSlice};
7
8#[cfg(not(any(asm_x86_64, asm_neon)))]
9use self::rust::*;
10#[cfg(asm_neon)]
11use self::simd_neon::*;
12#[cfg(asm_x86_64)]
13use self::simd_x86::*;
14use crate::{cpu::CpuFeatureLevel, data::plane::PlaneRegionMut};
15
16#[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd)]
17#[allow(clippy::upper_case_acronyms)]
18#[allow(dead_code)]
19pub enum FilterMode {
20    REGULAR = 0,
21    SMOOTH = 1,
22    SHARP = 2,
23    BILINEAR = 3,
24    SWITCHABLE = 4,
25}
26
27pub const SUBPEL_FILTER_SIZE: usize = 8;
28
29const SUBPEL_FILTERS: [[[i32; SUBPEL_FILTER_SIZE]; 16]; 6] = [
30    [
31        [0, 0, 0, 128, 0, 0, 0, 0],
32        [0, 2, -6, 126, 8, -2, 0, 0],
33        [0, 2, -10, 122, 18, -4, 0, 0],
34        [0, 2, -12, 116, 28, -8, 2, 0],
35        [0, 2, -14, 110, 38, -10, 2, 0],
36        [0, 2, -14, 102, 48, -12, 2, 0],
37        [0, 2, -16, 94, 58, -12, 2, 0],
38        [0, 2, -14, 84, 66, -12, 2, 0],
39        [0, 2, -14, 76, 76, -14, 2, 0],
40        [0, 2, -12, 66, 84, -14, 2, 0],
41        [0, 2, -12, 58, 94, -16, 2, 0],
42        [0, 2, -12, 48, 102, -14, 2, 0],
43        [0, 2, -10, 38, 110, -14, 2, 0],
44        [0, 2, -8, 28, 116, -12, 2, 0],
45        [0, 0, -4, 18, 122, -10, 2, 0],
46        [0, 0, -2, 8, 126, -6, 2, 0],
47    ],
48    [
49        [0, 0, 0, 128, 0, 0, 0, 0],
50        [0, 2, 28, 62, 34, 2, 0, 0],
51        [0, 0, 26, 62, 36, 4, 0, 0],
52        [0, 0, 22, 62, 40, 4, 0, 0],
53        [0, 0, 20, 60, 42, 6, 0, 0],
54        [0, 0, 18, 58, 44, 8, 0, 0],
55        [0, 0, 16, 56, 46, 10, 0, 0],
56        [0, -2, 16, 54, 48, 12, 0, 0],
57        [0, -2, 14, 52, 52, 14, -2, 0],
58        [0, 0, 12, 48, 54, 16, -2, 0],
59        [0, 0, 10, 46, 56, 16, 0, 0],
60        [0, 0, 8, 44, 58, 18, 0, 0],
61        [0, 0, 6, 42, 60, 20, 0, 0],
62        [0, 0, 4, 40, 62, 22, 0, 0],
63        [0, 0, 4, 36, 62, 26, 0, 0],
64        [0, 0, 2, 34, 62, 28, 2, 0],
65    ],
66    [
67        [0, 0, 0, 128, 0, 0, 0, 0],
68        [-2, 2, -6, 126, 8, -2, 2, 0],
69        [-2, 6, -12, 124, 16, -6, 4, -2],
70        [-2, 8, -18, 120, 26, -10, 6, -2],
71        [-4, 10, -22, 116, 38, -14, 6, -2],
72        [-4, 10, -22, 108, 48, -18, 8, -2],
73        [-4, 10, -24, 100, 60, -20, 8, -2],
74        [-4, 10, -24, 90, 70, -22, 10, -2],
75        [-4, 12, -24, 80, 80, -24, 12, -4],
76        [-2, 10, -22, 70, 90, -24, 10, -4],
77        [-2, 8, -20, 60, 100, -24, 10, -4],
78        [-2, 8, -18, 48, 108, -22, 10, -4],
79        [-2, 6, -14, 38, 116, -22, 10, -4],
80        [-2, 6, -10, 26, 120, -18, 8, -2],
81        [-2, 4, -6, 16, 124, -12, 6, -2],
82        [0, 2, -2, 8, 126, -6, 2, -2],
83    ],
84    [
85        [0, 0, 0, 128, 0, 0, 0, 0],
86        [0, 0, 0, 120, 8, 0, 0, 0],
87        [0, 0, 0, 112, 16, 0, 0, 0],
88        [0, 0, 0, 104, 24, 0, 0, 0],
89        [0, 0, 0, 96, 32, 0, 0, 0],
90        [0, 0, 0, 88, 40, 0, 0, 0],
91        [0, 0, 0, 80, 48, 0, 0, 0],
92        [0, 0, 0, 72, 56, 0, 0, 0],
93        [0, 0, 0, 64, 64, 0, 0, 0],
94        [0, 0, 0, 56, 72, 0, 0, 0],
95        [0, 0, 0, 48, 80, 0, 0, 0],
96        [0, 0, 0, 40, 88, 0, 0, 0],
97        [0, 0, 0, 32, 96, 0, 0, 0],
98        [0, 0, 0, 24, 104, 0, 0, 0],
99        [0, 0, 0, 16, 112, 0, 0, 0],
100        [0, 0, 0, 8, 120, 0, 0, 0],
101    ],
102    [
103        [0, 0, 0, 128, 0, 0, 0, 0],
104        [0, 0, -4, 126, 8, -2, 0, 0],
105        [0, 0, -8, 122, 18, -4, 0, 0],
106        [0, 0, -10, 116, 28, -6, 0, 0],
107        [0, 0, -12, 110, 38, -8, 0, 0],
108        [0, 0, -12, 102, 48, -10, 0, 0],
109        [0, 0, -14, 94, 58, -10, 0, 0],
110        [0, 0, -12, 84, 66, -10, 0, 0],
111        [0, 0, -12, 76, 76, -12, 0, 0],
112        [0, 0, -10, 66, 84, -12, 0, 0],
113        [0, 0, -10, 58, 94, -14, 0, 0],
114        [0, 0, -10, 48, 102, -12, 0, 0],
115        [0, 0, -8, 38, 110, -12, 0, 0],
116        [0, 0, -6, 28, 116, -10, 0, 0],
117        [0, 0, -4, 18, 122, -8, 0, 0],
118        [0, 0, -2, 8, 126, -4, 0, 0],
119    ],
120    [
121        [0, 0, 0, 128, 0, 0, 0, 0],
122        [0, 0, 30, 62, 34, 2, 0, 0],
123        [0, 0, 26, 62, 36, 4, 0, 0],
124        [0, 0, 22, 62, 40, 4, 0, 0],
125        [0, 0, 20, 60, 42, 6, 0, 0],
126        [0, 0, 18, 58, 44, 8, 0, 0],
127        [0, 0, 16, 56, 46, 10, 0, 0],
128        [0, 0, 14, 54, 48, 12, 0, 0],
129        [0, 0, 12, 52, 52, 12, 0, 0],
130        [0, 0, 12, 48, 54, 14, 0, 0],
131        [0, 0, 10, 46, 56, 16, 0, 0],
132        [0, 0, 8, 44, 58, 18, 0, 0],
133        [0, 0, 6, 42, 60, 20, 0, 0],
134        [0, 0, 4, 40, 62, 22, 0, 0],
135        [0, 0, 4, 36, 62, 26, 0, 0],
136        [0, 0, 2, 34, 62, 30, 0, 0],
137    ],
138];
139
140mod rust {
141    use num_traits::AsPrimitive;
142    use v_frame::{math::round_shift, pixel::Pixel, plane::PlaneSlice};
143
144    use crate::{
145        cpu::CpuFeatureLevel,
146        data::{
147            mc::{FilterMode, SUBPEL_FILTERS, SUBPEL_FILTER_SIZE},
148            plane::PlaneRegionMut,
149        },
150    };
151
152    #[cfg_attr(
153        all(asm_x86_64, any(target_feature = "ssse3", target_feature = "avx2")),
154        cold
155    )]
156    #[cfg_attr(asm_neon, cold)]
157    #[allow(clippy::too_many_arguments)]
158    pub fn put_8tap_internal<T: Pixel>(
159        dst: &mut PlaneRegionMut<'_, T>,
160        src: PlaneSlice<'_, T>,
161        width: usize,
162        height: usize,
163        col_frac: i32,
164        row_frac: i32,
165        bit_depth: usize,
166        _cpu: CpuFeatureLevel,
167    ) {
168        // The assembly only supports even heights and valid uncropped widths
169        assert_eq!(height & 1, 0);
170        assert!(width.is_power_of_two() && (2..=128).contains(&width));
171
172        let ref_stride = src.plane.cfg.stride;
173        let y_filter = get_filter(row_frac, height);
174        let x_filter = get_filter(col_frac, width);
175        let max_sample_val = (1 << bit_depth) - 1;
176        let intermediate_bits = 4 - if bit_depth == 12 { 2 } else { 0 };
177        match (col_frac, row_frac) {
178            (0, 0) => {
179                for r in 0..height {
180                    let src_slice = &src[r];
181                    let dst_slice = &mut dst[r];
182                    dst_slice[..width].copy_from_slice(&src_slice[..width]);
183                }
184            }
185            (0, _) => {
186                let offset_slice = src.go_up(3);
187                for r in 0..height {
188                    let src_slice = &offset_slice[r];
189                    let dst_slice = &mut dst[r];
190                    for c in 0..width {
191                        dst_slice[c] = T::cast_from(
192                            round_shift(
193                                // SAFETY: We pass this a raw pointer, but it's created from a
194                                // checked slice, so we are safe.
195                                unsafe {
196                                    run_filter(src_slice[c..].as_ptr(), ref_stride, y_filter)
197                                },
198                                7,
199                            )
200                            .clamp(0, max_sample_val),
201                        );
202                    }
203                }
204            }
205            (_, 0) => {
206                let offset_slice = src.go_left(3);
207                for r in 0..height {
208                    let src_slice = &offset_slice[r];
209                    let dst_slice = &mut dst[r];
210                    for c in 0..width {
211                        dst_slice[c] = T::cast_from(
212                            round_shift(
213                                round_shift(
214                                    // SAFETY: We pass this a raw pointer, but it's created from a
215                                    // checked slice, so we are safe.
216                                    unsafe { run_filter(src_slice[c..].as_ptr(), 1, x_filter) },
217                                    7 - intermediate_bits,
218                                ),
219                                intermediate_bits,
220                            )
221                            .clamp(0, max_sample_val),
222                        );
223                    }
224                }
225            }
226            (_, _) => {
227                let mut intermediate: [i16; 8 * (128 + 7)] = [0; 8 * (128 + 7)];
228
229                let offset_slice = src.go_left(3).go_up(3);
230                for cg in (0..width).step_by(8) {
231                    for r in 0..height + 7 {
232                        let src_slice = &offset_slice[r];
233                        for c in cg..(cg + 8).min(width) {
234                            intermediate[8 * r + (c - cg)] = round_shift(
235                                // SAFETY: We pass this a raw pointer, but it's created from a
236                                // checked slice, so we are safe.
237                                unsafe { run_filter(src_slice[c..].as_ptr(), 1, x_filter) },
238                                7 - intermediate_bits,
239                            ) as i16;
240                        }
241                    }
242
243                    for r in 0..height {
244                        let dst_slice = &mut dst[r];
245                        for c in cg..(cg + 8).min(width) {
246                            dst_slice[c] = T::cast_from(
247                                round_shift(
248                                    // SAFETY: We pass this a raw pointer, but it's created from a
249                                    // checked slice, so we are safe.
250                                    unsafe {
251                                        run_filter(
252                                            intermediate[8 * r + c - cg..].as_ptr(),
253                                            8,
254                                            y_filter,
255                                        )
256                                    },
257                                    7 + intermediate_bits,
258                                )
259                                .clamp(0, max_sample_val),
260                            );
261                        }
262                    }
263                }
264            }
265        }
266    }
267
268    fn get_filter(frac: i32, length: usize) -> [i32; SUBPEL_FILTER_SIZE] {
269        const MODE: FilterMode = FilterMode::REGULAR;
270
271        let filter_idx = if MODE == FilterMode::BILINEAR || length > 4 {
272            MODE as usize
273        } else {
274            (MODE as usize).min(1) + 4
275        };
276        SUBPEL_FILTERS[filter_idx][frac as usize]
277    }
278
279    unsafe fn run_filter<T: AsPrimitive<i32>>(
280        src: *const T,
281        stride: usize,
282        filter: [i32; 8],
283    ) -> i32 {
284        filter
285            .iter()
286            .enumerate()
287            .map(|(i, f)| {
288                let p = src.add(i * stride);
289                f * (*p).as_()
290            })
291            .sum::<i32>()
292    }
293}
294
295#[allow(clippy::too_many_arguments)]
296pub fn put_8tap<T: Pixel>(
297    dst: &mut PlaneRegionMut<'_, T>,
298    src: PlaneSlice<'_, T>,
299    width: usize,
300    height: usize,
301    col_frac: i32,
302    row_frac: i32,
303    bit_depth: usize,
304    cpu: CpuFeatureLevel,
305) {
306    put_8tap_internal(dst, src, width, height, col_frac, row_frac, bit_depth, cpu);
307}