1#[cfg(asm_x86_64)]
2mod simd_x86;
3
4use std::mem::{transmute, MaybeUninit};
5
6use aligned::{Aligned, A64};
7#[cfg(not(asm_x86_64))]
8use rust::*;
9#[cfg(asm_x86_64)]
10use simd_x86::*;
11use v_frame::{
12 frame::Frame,
13 pixel::Pixel,
14 plane::{Plane, PlaneOffset},
15};
16
17use super::importance::IMPORTANCE_BLOCK_SIZE;
18use crate::{
19 cpu::CpuFeatureLevel,
20 data::{
21 block::{BlockSize, TxSize, MAX_TX_SIZE},
22 plane::{Area, AsRegion, PlaneRegion, PlaneRegionMut, Rect},
23 prediction::PredictionVariant,
24 satd::get_satd,
25 slice_assume_init_mut,
26 superblock::MI_SIZE_LOG2,
27 tile::TileRect,
28 },
29};
30
31pub const BLOCK_TO_PLANE_SHIFT: usize = MI_SIZE_LOG2;
32
33mod rust {
34 use v_frame::pixel::Pixel;
35
36 use super::IntraEdge;
37 use crate::{
38 cpu::CpuFeatureLevel,
39 data::{block::TxSize, plane::PlaneRegionMut, prediction::PredictionVariant},
40 };
41
42 #[cfg_attr(
43 all(asm_x86_64, any(target_feature = "ssse3", target_feature = "avx2")),
44 cold
45 )]
46 pub(super) fn dispatch_predict_dc_intra<T: Pixel>(
47 variant: PredictionVariant,
48 dst: &mut PlaneRegionMut<'_, T>,
49 tx_size: TxSize,
50 bit_depth: usize,
51 edge_buf: &IntraEdge<T>,
52 _cpu: CpuFeatureLevel,
53 ) {
54 let width = tx_size.width();
55 let height = tx_size.height();
56
57 let (left, _top_left, above) = edge_buf.as_slices();
59
60 let above_slice = above;
61 let left_slice = &left[left.len().saturating_sub(height)..];
62
63 (match variant {
64 PredictionVariant::NONE => pred_dc_128,
65 PredictionVariant::LEFT => pred_dc_left,
66 PredictionVariant::TOP => pred_dc_top,
67 PredictionVariant::BOTH => pred_dc,
68 })(dst, above_slice, left_slice, width, height, bit_depth)
69 }
70
71 fn pred_dc<T: Pixel>(
72 output: &mut PlaneRegionMut<'_, T>,
73 above: &[T],
74 left: &[T],
75 width: usize,
76 height: usize,
77 _bit_depth: usize,
78 ) {
79 let edges = left[..height].iter().chain(above[..width].iter());
80 let len = (width + height) as u32;
81 let avg = (edges.fold(0u32, |acc, &v| {
82 let v: u32 = v.into();
83 v + acc
84 }) + (len >> 1))
85 / len;
86 let avg = T::cast_from(avg);
87
88 for line in output.rows_iter_mut().take(height) {
89 line[..width].fill(avg);
90 }
91 }
92
93 fn pred_dc_128<T: Pixel>(
94 output: &mut PlaneRegionMut<'_, T>,
95 _above: &[T],
96 _left: &[T],
97 width: usize,
98 height: usize,
99 bit_depth: usize,
100 ) {
101 let v = T::cast_from(128u32 << (bit_depth - 8));
102 for line in output.rows_iter_mut().take(height) {
103 line[..width].fill(v);
104 }
105 }
106
107 fn pred_dc_left<T: Pixel>(
108 output: &mut PlaneRegionMut<'_, T>,
109 _above: &[T],
110 left: &[T],
111 width: usize,
112 height: usize,
113 _bit_depth: usize,
114 ) {
115 let sum = left[..].iter().fold(0u32, |acc, &v| {
116 let v: u32 = v.into();
117 v + acc
118 });
119 let avg = T::cast_from((sum + (height >> 1) as u32) / height as u32);
120 for line in output.rows_iter_mut().take(height) {
121 line[..width].fill(avg);
122 }
123 }
124
125 fn pred_dc_top<T: Pixel>(
126 output: &mut PlaneRegionMut<'_, T>,
127 above: &[T],
128 _left: &[T],
129 width: usize,
130 height: usize,
131 _bit_depth: usize,
132 ) {
133 let sum = above[..width].iter().fold(0u32, |acc, &v| {
134 let v: u32 = v.into();
135 v + acc
136 });
137 let avg = T::cast_from((sum + (width >> 1) as u32) / width as u32);
138 for line in output.rows_iter_mut().take(height) {
139 line[..width].fill(avg);
140 }
141 }
142}
143
144pub(crate) fn estimate_intra_costs<T: Pixel>(
145 temp_plane: &mut Plane<T>,
146 frame: &Frame<T>,
147 bit_depth: usize,
148 cpu_feature_level: CpuFeatureLevel,
149) -> Box<[u32]> {
150 let plane = &frame.planes[0];
151 let plane_after_prediction = temp_plane;
152
153 let bsize = BlockSize::from_width_and_height(IMPORTANCE_BLOCK_SIZE, IMPORTANCE_BLOCK_SIZE);
154 let tx_size = bsize.tx_size();
155
156 let h_in_imp_b = plane.cfg.height / IMPORTANCE_BLOCK_SIZE;
157 let w_in_imp_b = plane.cfg.width / IMPORTANCE_BLOCK_SIZE;
158 let mut intra_costs = Vec::with_capacity(h_in_imp_b * w_in_imp_b);
159
160 for y in 0..h_in_imp_b {
161 for x in 0..w_in_imp_b {
162 let plane_org = plane.region(Area::Rect(Rect {
163 x: (x * IMPORTANCE_BLOCK_SIZE) as isize,
164 y: (y * IMPORTANCE_BLOCK_SIZE) as isize,
165 width: IMPORTANCE_BLOCK_SIZE,
166 height: IMPORTANCE_BLOCK_SIZE,
167 }));
168
169 let mut edge_buf = Aligned([MaybeUninit::uninit(); 4 * MAX_TX_SIZE + 1]);
172 let edge_buf = get_intra_edges(
173 &mut edge_buf,
174 &plane.as_region(),
175 PlaneOffset {
176 x: (x * IMPORTANCE_BLOCK_SIZE) as isize,
177 y: (y * IMPORTANCE_BLOCK_SIZE) as isize,
178 },
179 bit_depth,
180 );
181
182 let mut plane_after_prediction_region =
183 plane_after_prediction.region_mut(Area::Rect(Rect {
184 x: (x * IMPORTANCE_BLOCK_SIZE) as isize,
185 y: (y * IMPORTANCE_BLOCK_SIZE) as isize,
186 width: IMPORTANCE_BLOCK_SIZE,
187 height: IMPORTANCE_BLOCK_SIZE,
188 }));
189
190 predict_dc_intra(
191 TileRect {
192 x: x * IMPORTANCE_BLOCK_SIZE,
193 y: y * IMPORTANCE_BLOCK_SIZE,
194 width: IMPORTANCE_BLOCK_SIZE,
195 height: IMPORTANCE_BLOCK_SIZE,
196 },
197 &mut plane_after_prediction_region,
198 tx_size,
199 bit_depth,
200 &edge_buf,
201 cpu_feature_level,
202 );
203
204 let plane_after_prediction_region = plane_after_prediction.region(Area::Rect(Rect {
205 x: (x * IMPORTANCE_BLOCK_SIZE) as isize,
206 y: (y * IMPORTANCE_BLOCK_SIZE) as isize,
207 width: IMPORTANCE_BLOCK_SIZE,
208 height: IMPORTANCE_BLOCK_SIZE,
209 }));
210
211 let intra_cost = get_satd(
212 &plane_org,
213 &plane_after_prediction_region,
214 bsize.width(),
215 bsize.height(),
216 bit_depth,
217 cpu_feature_level,
218 );
219
220 intra_costs.push(intra_cost);
221 }
222 }
223
224 intra_costs.into_boxed_slice()
225}
226
227pub fn get_intra_edges<'a, T: Pixel>(
228 edge_buf: &'a mut IntraEdgeBuffer<T>,
229 dst: &PlaneRegion<'_, T>,
230 po: PlaneOffset,
231 bit_depth: usize,
232) -> IntraEdge<'a, T> {
233 let tx_size = TxSize::TX_8X8;
234 let mut init_left: usize = 0;
235 let mut init_above: usize = 0;
236
237 let base = 128u16 << (bit_depth - 8);
238
239 {
240 let (left, not_left) = edge_buf.split_at_mut(2 * MAX_TX_SIZE);
242 let (top_left, above) = not_left.split_at_mut(1);
243
244 let x = po.x as usize;
245 let y = po.y as usize;
246
247 let needs_left = x != 0;
248 let needs_top = y != 0;
249
250 let rect_w = dst
251 .rect()
252 .width
253 .min(dst.plane_cfg.width - dst.rect().x as usize);
254 let rect_h = dst
255 .rect()
256 .height
257 .min(dst.plane_cfg.height - dst.rect().y as usize);
258
259 if needs_left {
261 let txh = if y + tx_size.height() > rect_h {
262 rect_h - y
263 } else {
264 tx_size.height()
265 };
266 if x != 0 {
267 for i in 0..txh {
268 debug_assert!(y + i < rect_h);
269 left[2 * MAX_TX_SIZE - 1 - i].write(dst[y + i][x - 1]);
270 }
271 if txh < tx_size.height() {
272 let val = dst[y + txh - 1][x - 1];
273 for i in txh..tx_size.height() {
274 left[2 * MAX_TX_SIZE - 1 - i].write(val);
275 }
276 }
277 } else {
278 let val = if y != 0 {
279 dst[y - 1][0]
280 } else {
281 T::cast_from(base + 1)
282 };
283 for v in left[2 * MAX_TX_SIZE - tx_size.height()..].iter_mut() {
284 v.write(val);
285 }
286 }
287 init_left += tx_size.height();
288 }
289
290 if needs_top {
292 let txw = if x + tx_size.width() > rect_w {
293 rect_w - x
294 } else {
295 tx_size.width()
296 };
297 if y != 0 {
298 above[..txw].copy_from_slice(
299 unsafe { transmute::<&[T], &[MaybeUninit<T>]>(&dst[y - 1][x..x + txw]) },
301 );
302 if txw < tx_size.width() {
303 let val = dst[y - 1][x + txw - 1];
304 for v in &mut above[txw..tx_size.width()] {
305 v.write(val);
306 }
307 }
308 } else {
309 let val = if x != 0 {
310 dst[0][x - 1]
311 } else {
312 T::cast_from(base - 1)
313 };
314 for v in &mut above[..tx_size.width()] {
315 v.write(val);
316 }
317 }
318 init_above += tx_size.width();
319 }
320
321 top_left[0].write(T::cast_from(base));
322 }
323 IntraEdge::new(edge_buf, init_left, init_above)
324}
325
326pub fn predict_dc_intra<T: Pixel>(
327 tile_rect: TileRect,
328 dst: &mut PlaneRegionMut<'_, T>,
329 tx_size: TxSize,
330 bit_depth: usize,
331 edge_buf: &IntraEdge<T>,
332 cpu: CpuFeatureLevel,
333) {
334 let &Rect {
335 x: frame_x,
336 y: frame_y,
337 ..
338 } = dst.rect();
339 debug_assert!(frame_x >= 0 && frame_y >= 0);
340 let x = frame_x as usize - tile_rect.x;
342 let y = frame_y as usize - tile_rect.y;
343
344 let variant = PredictionVariant::new(x, y);
345
346 dispatch_predict_dc_intra::<T>(variant, dst, tx_size, bit_depth, edge_buf, cpu);
347}
348
349type IntraEdgeBuffer<T> = Aligned<A64, [MaybeUninit<T>; 4 * MAX_TX_SIZE + 1]>;
350
351pub struct IntraEdge<'a, T: Pixel>(&'a [T], &'a [T], &'a [T]);
352
353impl<'a, T: Pixel> IntraEdge<'a, T> {
354 fn new(edge_buf: &'a mut IntraEdgeBuffer<T>, init_left: usize, init_above: usize) -> Self {
355 let left = unsafe {
357 let begin_left = 2 * MAX_TX_SIZE - init_left;
358 let end_above = 2 * MAX_TX_SIZE + 1 + init_above;
359 slice_assume_init_mut(&mut edge_buf[begin_left..end_above])
360 };
361 let (left, top_left) = left.split_at(init_left);
362 let (top_left, above) = top_left.split_at(1);
363 Self(left, top_left, above)
364 }
365
366 pub const fn as_slices(&self) -> (&'a [T], &'a [T], &'a [T]) {
367 (self.0, self.1, self.2)
368 }
369
370 #[allow(dead_code)]
371 pub const fn top_left_ptr(&self) -> *const T {
372 self.1.as_ptr()
373 }
374}