Skip to main content

av_scenechange/analyze/
intra.rs

1#[cfg(asm_x86_64)]
2mod simd_x86;
3
4use std::mem::{transmute, MaybeUninit};
5
6use aligned::{Aligned, A64};
7#[cfg(not(asm_x86_64))]
8use rust::*;
9#[cfg(asm_x86_64)]
10use simd_x86::*;
11use v_frame::{
12    frame::Frame,
13    pixel::Pixel,
14    plane::{Plane, PlaneOffset},
15};
16
17use super::importance::IMPORTANCE_BLOCK_SIZE;
18use crate::{
19    cpu::CpuFeatureLevel,
20    data::{
21        block::{BlockSize, TxSize, MAX_TX_SIZE},
22        plane::{Area, AsRegion, PlaneRegion, PlaneRegionMut, Rect},
23        prediction::PredictionVariant,
24        satd::get_satd,
25        slice_assume_init_mut,
26        superblock::MI_SIZE_LOG2,
27        tile::TileRect,
28    },
29};
30
31pub const BLOCK_TO_PLANE_SHIFT: usize = MI_SIZE_LOG2;
32
33mod rust {
34    use v_frame::pixel::Pixel;
35
36    use super::IntraEdge;
37    use crate::{
38        cpu::CpuFeatureLevel,
39        data::{block::TxSize, plane::PlaneRegionMut, prediction::PredictionVariant},
40    };
41
42    #[cfg_attr(
43        all(asm_x86_64, any(target_feature = "ssse3", target_feature = "avx2")),
44        cold
45    )]
46    pub(super) fn dispatch_predict_dc_intra<T: Pixel>(
47        variant: PredictionVariant,
48        dst: &mut PlaneRegionMut<'_, T>,
49        tx_size: TxSize,
50        bit_depth: usize,
51        edge_buf: &IntraEdge<T>,
52        _cpu: CpuFeatureLevel,
53    ) {
54        let width = tx_size.width();
55        let height = tx_size.height();
56
57        // left pixels are ordered from bottom to top and right-aligned
58        let (left, _top_left, above) = edge_buf.as_slices();
59
60        let above_slice = above;
61        let left_slice = &left[left.len().saturating_sub(height)..];
62
63        (match variant {
64            PredictionVariant::NONE => pred_dc_128,
65            PredictionVariant::LEFT => pred_dc_left,
66            PredictionVariant::TOP => pred_dc_top,
67            PredictionVariant::BOTH => pred_dc,
68        })(dst, above_slice, left_slice, width, height, bit_depth)
69    }
70
71    fn pred_dc<T: Pixel>(
72        output: &mut PlaneRegionMut<'_, T>,
73        above: &[T],
74        left: &[T],
75        width: usize,
76        height: usize,
77        _bit_depth: usize,
78    ) {
79        let edges = left[..height].iter().chain(above[..width].iter());
80        let len = (width + height) as u32;
81        let avg = (edges.fold(0u32, |acc, &v| {
82            let v: u32 = v.into();
83            v + acc
84        }) + (len >> 1))
85            / len;
86        let avg = T::cast_from(avg);
87
88        for line in output.rows_iter_mut().take(height) {
89            line[..width].fill(avg);
90        }
91    }
92
93    fn pred_dc_128<T: Pixel>(
94        output: &mut PlaneRegionMut<'_, T>,
95        _above: &[T],
96        _left: &[T],
97        width: usize,
98        height: usize,
99        bit_depth: usize,
100    ) {
101        let v = T::cast_from(128u32 << (bit_depth - 8));
102        for line in output.rows_iter_mut().take(height) {
103            line[..width].fill(v);
104        }
105    }
106
107    fn pred_dc_left<T: Pixel>(
108        output: &mut PlaneRegionMut<'_, T>,
109        _above: &[T],
110        left: &[T],
111        width: usize,
112        height: usize,
113        _bit_depth: usize,
114    ) {
115        let sum = left[..].iter().fold(0u32, |acc, &v| {
116            let v: u32 = v.into();
117            v + acc
118        });
119        let avg = T::cast_from((sum + (height >> 1) as u32) / height as u32);
120        for line in output.rows_iter_mut().take(height) {
121            line[..width].fill(avg);
122        }
123    }
124
125    fn pred_dc_top<T: Pixel>(
126        output: &mut PlaneRegionMut<'_, T>,
127        above: &[T],
128        _left: &[T],
129        width: usize,
130        height: usize,
131        _bit_depth: usize,
132    ) {
133        let sum = above[..width].iter().fold(0u32, |acc, &v| {
134            let v: u32 = v.into();
135            v + acc
136        });
137        let avg = T::cast_from((sum + (width >> 1) as u32) / width as u32);
138        for line in output.rows_iter_mut().take(height) {
139            line[..width].fill(avg);
140        }
141    }
142}
143
144pub(crate) fn estimate_intra_costs<T: Pixel>(
145    temp_plane: &mut Plane<T>,
146    frame: &Frame<T>,
147    bit_depth: usize,
148    cpu_feature_level: CpuFeatureLevel,
149) -> Box<[u32]> {
150    let plane = &frame.planes[0];
151    let plane_after_prediction = temp_plane;
152
153    let bsize = BlockSize::from_width_and_height(IMPORTANCE_BLOCK_SIZE, IMPORTANCE_BLOCK_SIZE);
154    let tx_size = bsize.tx_size();
155
156    let h_in_imp_b = plane.cfg.height / IMPORTANCE_BLOCK_SIZE;
157    let w_in_imp_b = plane.cfg.width / IMPORTANCE_BLOCK_SIZE;
158    let mut intra_costs = Vec::with_capacity(h_in_imp_b * w_in_imp_b);
159
160    for y in 0..h_in_imp_b {
161        for x in 0..w_in_imp_b {
162            let plane_org = plane.region(Area::Rect(Rect {
163                x: (x * IMPORTANCE_BLOCK_SIZE) as isize,
164                y: (y * IMPORTANCE_BLOCK_SIZE) as isize,
165                width: IMPORTANCE_BLOCK_SIZE,
166                height: IMPORTANCE_BLOCK_SIZE,
167            }));
168
169            // For scene detection, we are only going to support DC_PRED
170            // for simplicity and speed purposes.
171            let mut edge_buf = Aligned([MaybeUninit::uninit(); 4 * MAX_TX_SIZE + 1]);
172            let edge_buf = get_intra_edges(
173                &mut edge_buf,
174                &plane.as_region(),
175                PlaneOffset {
176                    x: (x * IMPORTANCE_BLOCK_SIZE) as isize,
177                    y: (y * IMPORTANCE_BLOCK_SIZE) as isize,
178                },
179                bit_depth,
180            );
181
182            let mut plane_after_prediction_region =
183                plane_after_prediction.region_mut(Area::Rect(Rect {
184                    x: (x * IMPORTANCE_BLOCK_SIZE) as isize,
185                    y: (y * IMPORTANCE_BLOCK_SIZE) as isize,
186                    width: IMPORTANCE_BLOCK_SIZE,
187                    height: IMPORTANCE_BLOCK_SIZE,
188                }));
189
190            predict_dc_intra(
191                TileRect {
192                    x: x * IMPORTANCE_BLOCK_SIZE,
193                    y: y * IMPORTANCE_BLOCK_SIZE,
194                    width: IMPORTANCE_BLOCK_SIZE,
195                    height: IMPORTANCE_BLOCK_SIZE,
196                },
197                &mut plane_after_prediction_region,
198                tx_size,
199                bit_depth,
200                &edge_buf,
201                cpu_feature_level,
202            );
203
204            let plane_after_prediction_region = plane_after_prediction.region(Area::Rect(Rect {
205                x: (x * IMPORTANCE_BLOCK_SIZE) as isize,
206                y: (y * IMPORTANCE_BLOCK_SIZE) as isize,
207                width: IMPORTANCE_BLOCK_SIZE,
208                height: IMPORTANCE_BLOCK_SIZE,
209            }));
210
211            let intra_cost = get_satd(
212                &plane_org,
213                &plane_after_prediction_region,
214                bsize.width(),
215                bsize.height(),
216                bit_depth,
217                cpu_feature_level,
218            );
219
220            intra_costs.push(intra_cost);
221        }
222    }
223
224    intra_costs.into_boxed_slice()
225}
226
227pub fn get_intra_edges<'a, T: Pixel>(
228    edge_buf: &'a mut IntraEdgeBuffer<T>,
229    dst: &PlaneRegion<'_, T>,
230    po: PlaneOffset,
231    bit_depth: usize,
232) -> IntraEdge<'a, T> {
233    let tx_size = TxSize::TX_8X8;
234    let mut init_left: usize = 0;
235    let mut init_above: usize = 0;
236
237    let base = 128u16 << (bit_depth - 8);
238
239    {
240        // left pixels are ordered from bottom to top and right-aligned
241        let (left, not_left) = edge_buf.split_at_mut(2 * MAX_TX_SIZE);
242        let (top_left, above) = not_left.split_at_mut(1);
243
244        let x = po.x as usize;
245        let y = po.y as usize;
246
247        let needs_left = x != 0;
248        let needs_top = y != 0;
249
250        let rect_w = dst
251            .rect()
252            .width
253            .min(dst.plane_cfg.width - dst.rect().x as usize);
254        let rect_h = dst
255            .rect()
256            .height
257            .min(dst.plane_cfg.height - dst.rect().y as usize);
258
259        // Needs left
260        if needs_left {
261            let txh = if y + tx_size.height() > rect_h {
262                rect_h - y
263            } else {
264                tx_size.height()
265            };
266            if x != 0 {
267                for i in 0..txh {
268                    debug_assert!(y + i < rect_h);
269                    left[2 * MAX_TX_SIZE - 1 - i].write(dst[y + i][x - 1]);
270                }
271                if txh < tx_size.height() {
272                    let val = dst[y + txh - 1][x - 1];
273                    for i in txh..tx_size.height() {
274                        left[2 * MAX_TX_SIZE - 1 - i].write(val);
275                    }
276                }
277            } else {
278                let val = if y != 0 {
279                    dst[y - 1][0]
280                } else {
281                    T::cast_from(base + 1)
282                };
283                for v in left[2 * MAX_TX_SIZE - tx_size.height()..].iter_mut() {
284                    v.write(val);
285                }
286            }
287            init_left += tx_size.height();
288        }
289
290        // Needs top
291        if needs_top {
292            let txw = if x + tx_size.width() > rect_w {
293                rect_w - x
294            } else {
295                tx_size.width()
296            };
297            if y != 0 {
298                above[..txw].copy_from_slice(
299                    // SAFETY: &[T] and &[MaybeUninit<T>] have the same layout
300                    unsafe { transmute::<&[T], &[MaybeUninit<T>]>(&dst[y - 1][x..x + txw]) },
301                );
302                if txw < tx_size.width() {
303                    let val = dst[y - 1][x + txw - 1];
304                    for v in &mut above[txw..tx_size.width()] {
305                        v.write(val);
306                    }
307                }
308            } else {
309                let val = if x != 0 {
310                    dst[0][x - 1]
311                } else {
312                    T::cast_from(base - 1)
313                };
314                for v in &mut above[..tx_size.width()] {
315                    v.write(val);
316                }
317            }
318            init_above += tx_size.width();
319        }
320
321        top_left[0].write(T::cast_from(base));
322    }
323    IntraEdge::new(edge_buf, init_left, init_above)
324}
325
326pub fn predict_dc_intra<T: Pixel>(
327    tile_rect: TileRect,
328    dst: &mut PlaneRegionMut<'_, T>,
329    tx_size: TxSize,
330    bit_depth: usize,
331    edge_buf: &IntraEdge<T>,
332    cpu: CpuFeatureLevel,
333) {
334    let &Rect {
335        x: frame_x,
336        y: frame_y,
337        ..
338    } = dst.rect();
339    debug_assert!(frame_x >= 0 && frame_y >= 0);
340    // x and y are expressed relative to the tile
341    let x = frame_x as usize - tile_rect.x;
342    let y = frame_y as usize - tile_rect.y;
343
344    let variant = PredictionVariant::new(x, y);
345
346    dispatch_predict_dc_intra::<T>(variant, dst, tx_size, bit_depth, edge_buf, cpu);
347}
348
349type IntraEdgeBuffer<T> = Aligned<A64, [MaybeUninit<T>; 4 * MAX_TX_SIZE + 1]>;
350
351pub struct IntraEdge<'a, T: Pixel>(&'a [T], &'a [T], &'a [T]);
352
353impl<'a, T: Pixel> IntraEdge<'a, T> {
354    fn new(edge_buf: &'a mut IntraEdgeBuffer<T>, init_left: usize, init_above: usize) -> Self {
355        // SAFETY: Initialized in `get_intra_edges`.
356        let left = unsafe {
357            let begin_left = 2 * MAX_TX_SIZE - init_left;
358            let end_above = 2 * MAX_TX_SIZE + 1 + init_above;
359            slice_assume_init_mut(&mut edge_buf[begin_left..end_above])
360        };
361        let (left, top_left) = left.split_at(init_left);
362        let (top_left, above) = top_left.split_at(1);
363        Self(left, top_left, above)
364    }
365
366    pub const fn as_slices(&self) -> (&'a [T], &'a [T], &'a [T]) {
367        (self.0, self.1, self.2)
368    }
369
370    #[allow(dead_code)]
371    pub const fn top_left_ptr(&self) -> *const T {
372        self.1.as_ptr()
373    }
374}