1#[cfg(asm_neon)]
2mod simd_neon;
3#[cfg(asm_x86_64)]
4mod simd_x86;
5
6use v_frame::{pixel::Pixel, plane::PlaneSlice};
7
8#[cfg(not(any(asm_x86_64, asm_neon)))]
9use self::rust::*;
10#[cfg(asm_neon)]
11use self::simd_neon::*;
12#[cfg(asm_x86_64)]
13use self::simd_x86::*;
14use crate::{cpu::CpuFeatureLevel, data::plane::PlaneRegionMut};
15
16#[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd)]
17#[allow(clippy::upper_case_acronyms)]
18#[allow(dead_code)]
19pub enum FilterMode {
20 REGULAR = 0,
21 SMOOTH = 1,
22 SHARP = 2,
23 BILINEAR = 3,
24 SWITCHABLE = 4,
25}
26
27pub const SUBPEL_FILTER_SIZE: usize = 8;
28
29const SUBPEL_FILTERS: [[[i32; SUBPEL_FILTER_SIZE]; 16]; 6] = [
30 [
31 [0, 0, 0, 128, 0, 0, 0, 0],
32 [0, 2, -6, 126, 8, -2, 0, 0],
33 [0, 2, -10, 122, 18, -4, 0, 0],
34 [0, 2, -12, 116, 28, -8, 2, 0],
35 [0, 2, -14, 110, 38, -10, 2, 0],
36 [0, 2, -14, 102, 48, -12, 2, 0],
37 [0, 2, -16, 94, 58, -12, 2, 0],
38 [0, 2, -14, 84, 66, -12, 2, 0],
39 [0, 2, -14, 76, 76, -14, 2, 0],
40 [0, 2, -12, 66, 84, -14, 2, 0],
41 [0, 2, -12, 58, 94, -16, 2, 0],
42 [0, 2, -12, 48, 102, -14, 2, 0],
43 [0, 2, -10, 38, 110, -14, 2, 0],
44 [0, 2, -8, 28, 116, -12, 2, 0],
45 [0, 0, -4, 18, 122, -10, 2, 0],
46 [0, 0, -2, 8, 126, -6, 2, 0],
47 ],
48 [
49 [0, 0, 0, 128, 0, 0, 0, 0],
50 [0, 2, 28, 62, 34, 2, 0, 0],
51 [0, 0, 26, 62, 36, 4, 0, 0],
52 [0, 0, 22, 62, 40, 4, 0, 0],
53 [0, 0, 20, 60, 42, 6, 0, 0],
54 [0, 0, 18, 58, 44, 8, 0, 0],
55 [0, 0, 16, 56, 46, 10, 0, 0],
56 [0, -2, 16, 54, 48, 12, 0, 0],
57 [0, -2, 14, 52, 52, 14, -2, 0],
58 [0, 0, 12, 48, 54, 16, -2, 0],
59 [0, 0, 10, 46, 56, 16, 0, 0],
60 [0, 0, 8, 44, 58, 18, 0, 0],
61 [0, 0, 6, 42, 60, 20, 0, 0],
62 [0, 0, 4, 40, 62, 22, 0, 0],
63 [0, 0, 4, 36, 62, 26, 0, 0],
64 [0, 0, 2, 34, 62, 28, 2, 0],
65 ],
66 [
67 [0, 0, 0, 128, 0, 0, 0, 0],
68 [-2, 2, -6, 126, 8, -2, 2, 0],
69 [-2, 6, -12, 124, 16, -6, 4, -2],
70 [-2, 8, -18, 120, 26, -10, 6, -2],
71 [-4, 10, -22, 116, 38, -14, 6, -2],
72 [-4, 10, -22, 108, 48, -18, 8, -2],
73 [-4, 10, -24, 100, 60, -20, 8, -2],
74 [-4, 10, -24, 90, 70, -22, 10, -2],
75 [-4, 12, -24, 80, 80, -24, 12, -4],
76 [-2, 10, -22, 70, 90, -24, 10, -4],
77 [-2, 8, -20, 60, 100, -24, 10, -4],
78 [-2, 8, -18, 48, 108, -22, 10, -4],
79 [-2, 6, -14, 38, 116, -22, 10, -4],
80 [-2, 6, -10, 26, 120, -18, 8, -2],
81 [-2, 4, -6, 16, 124, -12, 6, -2],
82 [0, 2, -2, 8, 126, -6, 2, -2],
83 ],
84 [
85 [0, 0, 0, 128, 0, 0, 0, 0],
86 [0, 0, 0, 120, 8, 0, 0, 0],
87 [0, 0, 0, 112, 16, 0, 0, 0],
88 [0, 0, 0, 104, 24, 0, 0, 0],
89 [0, 0, 0, 96, 32, 0, 0, 0],
90 [0, 0, 0, 88, 40, 0, 0, 0],
91 [0, 0, 0, 80, 48, 0, 0, 0],
92 [0, 0, 0, 72, 56, 0, 0, 0],
93 [0, 0, 0, 64, 64, 0, 0, 0],
94 [0, 0, 0, 56, 72, 0, 0, 0],
95 [0, 0, 0, 48, 80, 0, 0, 0],
96 [0, 0, 0, 40, 88, 0, 0, 0],
97 [0, 0, 0, 32, 96, 0, 0, 0],
98 [0, 0, 0, 24, 104, 0, 0, 0],
99 [0, 0, 0, 16, 112, 0, 0, 0],
100 [0, 0, 0, 8, 120, 0, 0, 0],
101 ],
102 [
103 [0, 0, 0, 128, 0, 0, 0, 0],
104 [0, 0, -4, 126, 8, -2, 0, 0],
105 [0, 0, -8, 122, 18, -4, 0, 0],
106 [0, 0, -10, 116, 28, -6, 0, 0],
107 [0, 0, -12, 110, 38, -8, 0, 0],
108 [0, 0, -12, 102, 48, -10, 0, 0],
109 [0, 0, -14, 94, 58, -10, 0, 0],
110 [0, 0, -12, 84, 66, -10, 0, 0],
111 [0, 0, -12, 76, 76, -12, 0, 0],
112 [0, 0, -10, 66, 84, -12, 0, 0],
113 [0, 0, -10, 58, 94, -14, 0, 0],
114 [0, 0, -10, 48, 102, -12, 0, 0],
115 [0, 0, -8, 38, 110, -12, 0, 0],
116 [0, 0, -6, 28, 116, -10, 0, 0],
117 [0, 0, -4, 18, 122, -8, 0, 0],
118 [0, 0, -2, 8, 126, -4, 0, 0],
119 ],
120 [
121 [0, 0, 0, 128, 0, 0, 0, 0],
122 [0, 0, 30, 62, 34, 2, 0, 0],
123 [0, 0, 26, 62, 36, 4, 0, 0],
124 [0, 0, 22, 62, 40, 4, 0, 0],
125 [0, 0, 20, 60, 42, 6, 0, 0],
126 [0, 0, 18, 58, 44, 8, 0, 0],
127 [0, 0, 16, 56, 46, 10, 0, 0],
128 [0, 0, 14, 54, 48, 12, 0, 0],
129 [0, 0, 12, 52, 52, 12, 0, 0],
130 [0, 0, 12, 48, 54, 14, 0, 0],
131 [0, 0, 10, 46, 56, 16, 0, 0],
132 [0, 0, 8, 44, 58, 18, 0, 0],
133 [0, 0, 6, 42, 60, 20, 0, 0],
134 [0, 0, 4, 40, 62, 22, 0, 0],
135 [0, 0, 4, 36, 62, 26, 0, 0],
136 [0, 0, 2, 34, 62, 30, 0, 0],
137 ],
138];
139
140mod rust {
141 use num_traits::AsPrimitive;
142 use v_frame::{math::round_shift, pixel::Pixel, plane::PlaneSlice};
143
144 use crate::{
145 cpu::CpuFeatureLevel,
146 data::{
147 mc::{FilterMode, SUBPEL_FILTERS, SUBPEL_FILTER_SIZE},
148 plane::PlaneRegionMut,
149 },
150 };
151
152 #[cfg_attr(
153 all(asm_x86_64, any(target_feature = "ssse3", target_feature = "avx2")),
154 cold
155 )]
156 #[cfg_attr(asm_neon, cold)]
157 #[allow(clippy::too_many_arguments)]
158 pub fn put_8tap_internal<T: Pixel>(
159 dst: &mut PlaneRegionMut<'_, T>,
160 src: PlaneSlice<'_, T>,
161 width: usize,
162 height: usize,
163 col_frac: i32,
164 row_frac: i32,
165 bit_depth: usize,
166 _cpu: CpuFeatureLevel,
167 ) {
168 assert_eq!(height & 1, 0);
170 assert!(width.is_power_of_two() && (2..=128).contains(&width));
171
172 let ref_stride = src.plane.cfg.stride;
173 let y_filter = get_filter(row_frac, height);
174 let x_filter = get_filter(col_frac, width);
175 let max_sample_val = (1 << bit_depth) - 1;
176 let intermediate_bits = 4 - if bit_depth == 12 { 2 } else { 0 };
177 match (col_frac, row_frac) {
178 (0, 0) => {
179 for r in 0..height {
180 let src_slice = &src[r];
181 let dst_slice = &mut dst[r];
182 dst_slice[..width].copy_from_slice(&src_slice[..width]);
183 }
184 }
185 (0, _) => {
186 let offset_slice = src.go_up(3);
187 for r in 0..height {
188 let src_slice = &offset_slice[r];
189 let dst_slice = &mut dst[r];
190 for c in 0..width {
191 dst_slice[c] = T::cast_from(
192 round_shift(
193 unsafe {
196 run_filter(src_slice[c..].as_ptr(), ref_stride, y_filter)
197 },
198 7,
199 )
200 .clamp(0, max_sample_val),
201 );
202 }
203 }
204 }
205 (_, 0) => {
206 let offset_slice = src.go_left(3);
207 for r in 0..height {
208 let src_slice = &offset_slice[r];
209 let dst_slice = &mut dst[r];
210 for c in 0..width {
211 dst_slice[c] = T::cast_from(
212 round_shift(
213 round_shift(
214 unsafe { run_filter(src_slice[c..].as_ptr(), 1, x_filter) },
217 7 - intermediate_bits,
218 ),
219 intermediate_bits,
220 )
221 .clamp(0, max_sample_val),
222 );
223 }
224 }
225 }
226 (_, _) => {
227 let mut intermediate: [i16; 8 * (128 + 7)] = [0; 8 * (128 + 7)];
228
229 let offset_slice = src.go_left(3).go_up(3);
230 for cg in (0..width).step_by(8) {
231 for r in 0..height + 7 {
232 let src_slice = &offset_slice[r];
233 for c in cg..(cg + 8).min(width) {
234 intermediate[8 * r + (c - cg)] = round_shift(
235 unsafe { run_filter(src_slice[c..].as_ptr(), 1, x_filter) },
238 7 - intermediate_bits,
239 ) as i16;
240 }
241 }
242
243 for r in 0..height {
244 let dst_slice = &mut dst[r];
245 for c in cg..(cg + 8).min(width) {
246 dst_slice[c] = T::cast_from(
247 round_shift(
248 unsafe {
251 run_filter(
252 intermediate[8 * r + c - cg..].as_ptr(),
253 8,
254 y_filter,
255 )
256 },
257 7 + intermediate_bits,
258 )
259 .clamp(0, max_sample_val),
260 );
261 }
262 }
263 }
264 }
265 }
266 }
267
268 fn get_filter(frac: i32, length: usize) -> [i32; SUBPEL_FILTER_SIZE] {
269 const MODE: FilterMode = FilterMode::REGULAR;
270
271 let filter_idx = if MODE == FilterMode::BILINEAR || length > 4 {
272 MODE as usize
273 } else {
274 (MODE as usize).min(1) + 4
275 };
276 SUBPEL_FILTERS[filter_idx][frac as usize]
277 }
278
279 unsafe fn run_filter<T: AsPrimitive<i32>>(
280 src: *const T,
281 stride: usize,
282 filter: [i32; 8],
283 ) -> i32 {
284 filter
285 .iter()
286 .enumerate()
287 .map(|(i, f)| {
288 let p = src.add(i * stride);
289 f * (*p).as_()
290 })
291 .sum::<i32>()
292 }
293}
294
295#[allow(clippy::too_many_arguments)]
296pub fn put_8tap<T: Pixel>(
297 dst: &mut PlaneRegionMut<'_, T>,
298 src: PlaneSlice<'_, T>,
299 width: usize,
300 height: usize,
301 col_frac: i32,
302 row_frac: i32,
303 bit_depth: usize,
304 cpu: CpuFeatureLevel,
305) {
306 put_8tap_internal(dst, src, width, height, col_frac, row_frac, bit_depth, cpu);
307}