Skip to main content

av_scenechange/data/
satd.rs

1#[cfg(asm_neon)]
2mod simd_neon;
3#[cfg(asm_x86_64)]
4mod simd_x86;
5#[cfg(test)]
6mod tests;
7
8#[cfg(not(any(asm_x86_64, asm_neon)))]
9use rust::*;
10#[cfg(asm_neon)]
11use simd_neon::*;
12#[cfg(asm_x86_64)]
13use simd_x86::*;
14use v_frame::pixel::Pixel;
15
16use super::{block::BlockSize, plane::PlaneRegion};
17use crate::cpu::CpuFeatureLevel;
18
19mod rust {
20    use v_frame::{
21        math::msb,
22        pixel::{CastFromPrimitive, Pixel},
23    };
24
25    use crate::{
26        cpu::CpuFeatureLevel,
27        data::{
28            hadamard::{hadamard4x4, hadamard8x8},
29            plane::{Area, PlaneRegion, Rect},
30            sad::get_sad,
31        },
32    };
33
34    /// Sum of absolute transformed differences over a block.
35    /// w and h can be at most 128, the size of the largest block.
36    /// Use the sum of 4x4 and 8x8 hadamard transforms for the transform, but
37    /// revert to sad on edges when these transforms do not fit into w and h.
38    /// 4x4 transforms instead of 8x8 transforms when width or height < 8.
39    #[cfg_attr(all(asm_x86_64, target_feature = "avx2"), cold)]
40    #[cfg_attr(asm_neon, cold)]
41    pub(super) fn get_satd_internal<T: Pixel>(
42        plane_org: &PlaneRegion<'_, T>,
43        plane_ref: &PlaneRegion<'_, T>,
44        w: usize,
45        h: usize,
46        bit_depth: usize,
47        cpu: CpuFeatureLevel,
48    ) -> u32 {
49        assert!(w <= 128 && h <= 128);
50        assert!(plane_org.rect().width >= w && plane_org.rect().height >= h);
51        assert!(plane_ref.rect().width >= w && plane_ref.rect().height >= h);
52
53        // Size of hadamard transform should be 4x4 or 8x8
54        // 4x* and *x4 use 4x4 and all other use 8x8
55        let size: usize = w.min(h).min(8);
56        let tx2d = if size == 4 { hadamard4x4 } else { hadamard8x8 };
57
58        let mut sum: u64 = 0;
59
60        // Loop over chunks the size of the chosen transform
61        for chunk_y in (0..h).step_by(size) {
62            let chunk_h = (h - chunk_y).min(size);
63            for chunk_x in (0..w).step_by(size) {
64                let chunk_w = (w - chunk_x).min(size);
65                let chunk_area = Area::Rect(Rect {
66                    x: chunk_x as isize,
67                    y: chunk_y as isize,
68                    width: chunk_w,
69                    height: chunk_h,
70                });
71                let chunk_org = plane_org.subregion(chunk_area);
72                let chunk_ref = plane_ref.subregion(chunk_area);
73
74                // Revert to sad on edge blocks (frame edges)
75                if chunk_w != size || chunk_h != size {
76                    sum += get_sad(&chunk_org, &chunk_ref, chunk_w, chunk_h, bit_depth, cpu) as u64;
77                    continue;
78                }
79
80                let buf: &mut [i32] = &mut [0; 8 * 8][..size * size];
81
82                // Move the difference of the transforms to a buffer
83                for (row_diff, (row_org, row_ref)) in buf
84                    .chunks_mut(size)
85                    .zip(chunk_org.rows_iter().zip(chunk_ref.rows_iter()))
86                {
87                    for (diff, (a, b)) in
88                        row_diff.iter_mut().zip(row_org.iter().zip(row_ref.iter()))
89                    {
90                        *diff = i32::cast_from(*a) - i32::cast_from(*b);
91                    }
92                }
93
94                // Perform the hadamard transform on the differences
95                // SAFETY: A sufficient number elements exist for the size of the transform.
96                unsafe {
97                    tx2d(buf);
98                }
99
100                // Sum the absolute values of the transformed differences
101                sum += buf.iter().map(|a| a.unsigned_abs() as u64).sum::<u64>();
102            }
103        }
104
105        // Normalize the results
106        let ln = msb(size as i32) as u64;
107        ((sum + (1 << ln >> 1)) >> ln) as u32
108    }
109}
110
111// BlockSize::BLOCK_SIZES.next_power_of_two()
112const DIST_FNS_LENGTH: usize = 32;
113
114const fn to_index(bsize: BlockSize) -> usize {
115    bsize as usize & (DIST_FNS_LENGTH - 1)
116}
117
118pub(crate) fn get_satd<T: Pixel>(
119    src: &PlaneRegion<'_, T>,
120    dst: &PlaneRegion<'_, T>,
121    w: usize,
122    h: usize,
123    bit_depth: usize,
124    cpu: CpuFeatureLevel,
125) -> u32 {
126    get_satd_internal(src, dst, w, h, bit_depth, cpu)
127}