av_scenechange/data/
satd.rs1#[cfg(asm_neon)]
2mod simd_neon;
3#[cfg(asm_x86_64)]
4mod simd_x86;
5#[cfg(test)]
6mod tests;
7
8#[cfg(not(any(asm_x86_64, asm_neon)))]
9use rust::*;
10#[cfg(asm_neon)]
11use simd_neon::*;
12#[cfg(asm_x86_64)]
13use simd_x86::*;
14use v_frame::pixel::Pixel;
15
16use super::{block::BlockSize, plane::PlaneRegion};
17use crate::cpu::CpuFeatureLevel;
18
19mod rust {
20 use v_frame::{
21 math::msb,
22 pixel::{CastFromPrimitive, Pixel},
23 };
24
25 use crate::{
26 cpu::CpuFeatureLevel,
27 data::{
28 hadamard::{hadamard4x4, hadamard8x8},
29 plane::{Area, PlaneRegion, Rect},
30 sad::get_sad,
31 },
32 };
33
34 #[cfg_attr(all(asm_x86_64, target_feature = "avx2"), cold)]
40 #[cfg_attr(asm_neon, cold)]
41 pub(super) fn get_satd_internal<T: Pixel>(
42 plane_org: &PlaneRegion<'_, T>,
43 plane_ref: &PlaneRegion<'_, T>,
44 w: usize,
45 h: usize,
46 bit_depth: usize,
47 cpu: CpuFeatureLevel,
48 ) -> u32 {
49 assert!(w <= 128 && h <= 128);
50 assert!(plane_org.rect().width >= w && plane_org.rect().height >= h);
51 assert!(plane_ref.rect().width >= w && plane_ref.rect().height >= h);
52
53 let size: usize = w.min(h).min(8);
56 let tx2d = if size == 4 { hadamard4x4 } else { hadamard8x8 };
57
58 let mut sum: u64 = 0;
59
60 for chunk_y in (0..h).step_by(size) {
62 let chunk_h = (h - chunk_y).min(size);
63 for chunk_x in (0..w).step_by(size) {
64 let chunk_w = (w - chunk_x).min(size);
65 let chunk_area = Area::Rect(Rect {
66 x: chunk_x as isize,
67 y: chunk_y as isize,
68 width: chunk_w,
69 height: chunk_h,
70 });
71 let chunk_org = plane_org.subregion(chunk_area);
72 let chunk_ref = plane_ref.subregion(chunk_area);
73
74 if chunk_w != size || chunk_h != size {
76 sum += get_sad(&chunk_org, &chunk_ref, chunk_w, chunk_h, bit_depth, cpu) as u64;
77 continue;
78 }
79
80 let buf: &mut [i32] = &mut [0; 8 * 8][..size * size];
81
82 for (row_diff, (row_org, row_ref)) in buf
84 .chunks_mut(size)
85 .zip(chunk_org.rows_iter().zip(chunk_ref.rows_iter()))
86 {
87 for (diff, (a, b)) in
88 row_diff.iter_mut().zip(row_org.iter().zip(row_ref.iter()))
89 {
90 *diff = i32::cast_from(*a) - i32::cast_from(*b);
91 }
92 }
93
94 unsafe {
97 tx2d(buf);
98 }
99
100 sum += buf.iter().map(|a| a.unsigned_abs() as u64).sum::<u64>();
102 }
103 }
104
105 let ln = msb(size as i32) as u64;
107 ((sum + (1 << ln >> 1)) >> ln) as u32
108 }
109}
110
111const DIST_FNS_LENGTH: usize = 32;
113
114const fn to_index(bsize: BlockSize) -> usize {
115 bsize as usize & (DIST_FNS_LENGTH - 1)
116}
117
118pub(crate) fn get_satd<T: Pixel>(
119 src: &PlaneRegion<'_, T>,
120 dst: &PlaneRegion<'_, T>,
121 w: usize,
122 h: usize,
123 bit_depth: usize,
124 cpu: CpuFeatureLevel,
125) -> u32 {
126 get_satd_internal(src, dst, w, h, bit_depth, cpu)
127}