rav1e/
predict.rs

1// Copyright (c) 2017-2022, The rav1e contributors. All rights reserved
2//
3// This source code is subject to the terms of the BSD 2 Clause License and
4// the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
5// was not distributed with this source code in the LICENSE file, you can
6// obtain it at www.aomedia.org/license/software. If the Alliance for Open
7// Media Patent License 1.0 was not distributed with this source code in the
8// PATENTS file, you can obtain it at www.aomedia.org/license/patent.
9
10#![allow(non_upper_case_globals)]
11#![allow(non_camel_case_types)]
12#![allow(dead_code)]
13
14use std::mem::MaybeUninit;
15
16cfg_if::cfg_if! {
17  if #[cfg(nasm_x86_64)] {
18    pub use crate::asm::x86::predict::*;
19  } else if #[cfg(asm_neon)] {
20    pub use crate::asm::aarch64::predict::*;
21  } else {
22    pub use self::rust::*;
23  }
24}
25
26use crate::context::{TileBlockOffset, MAX_SB_SIZE_LOG2, MAX_TX_SIZE};
27use crate::cpu_features::CpuFeatureLevel;
28use crate::encoder::FrameInvariants;
29use crate::frame::*;
30use crate::mc::*;
31use crate::partition::*;
32use crate::tiling::*;
33use crate::transform::*;
34use crate::util::*;
35use std::convert::TryInto;
36
37pub const ANGLE_STEP: i8 = 3;
38
39// TODO: Review the order of this list.
40// The order impacts compression efficiency.
41pub static RAV1E_INTRA_MODES: &[PredictionMode] = &[
42  PredictionMode::DC_PRED,
43  PredictionMode::H_PRED,
44  PredictionMode::V_PRED,
45  PredictionMode::SMOOTH_PRED,
46  PredictionMode::SMOOTH_H_PRED,
47  PredictionMode::SMOOTH_V_PRED,
48  PredictionMode::PAETH_PRED,
49  PredictionMode::D45_PRED,
50  PredictionMode::D135_PRED,
51  PredictionMode::D113_PRED,
52  PredictionMode::D157_PRED,
53  PredictionMode::D203_PRED,
54  PredictionMode::D67_PRED,
55];
56
57pub static RAV1E_INTER_MODES_MINIMAL: &[PredictionMode] =
58  &[PredictionMode::NEARESTMV];
59
60pub static RAV1E_INTER_COMPOUND_MODES: &[PredictionMode] = &[
61  PredictionMode::GLOBAL_GLOBALMV,
62  PredictionMode::NEAREST_NEARESTMV,
63  PredictionMode::NEW_NEWMV,
64  PredictionMode::NEAREST_NEWMV,
65  PredictionMode::NEW_NEARESTMV,
66  PredictionMode::NEAR_NEAR0MV,
67  PredictionMode::NEAR_NEAR1MV,
68  PredictionMode::NEAR_NEAR2MV,
69];
70
71// There are more modes than in the spec because every allowed
72// drl index for NEAR modes is considered its own mode.
73#[derive(Copy, Clone, Debug, PartialEq, PartialOrd, Eq, Ord, Default)]
74pub enum PredictionMode {
75  #[default]
76  DC_PRED, // Average of above and left pixels
77  V_PRED,      // Vertical
78  H_PRED,      // Horizontal
79  D45_PRED,    // Directional 45  degree
80  D135_PRED,   // Directional 135 degree
81  D113_PRED,   // Directional 113 degree
82  D157_PRED,   // Directional 157 degree
83  D203_PRED,   // Directional 203 degree
84  D67_PRED,    // Directional 67  degree
85  SMOOTH_PRED, // Combination of horizontal and vertical interpolation
86  SMOOTH_V_PRED,
87  SMOOTH_H_PRED,
88  PAETH_PRED,
89  UV_CFL_PRED,
90  NEARESTMV,
91  NEAR0MV,
92  NEAR1MV,
93  NEAR2MV,
94  GLOBALMV,
95  NEWMV,
96  // Compound ref compound modes
97  NEAREST_NEARESTMV,
98  NEAR_NEAR0MV,
99  NEAR_NEAR1MV,
100  NEAR_NEAR2MV,
101  NEAREST_NEWMV,
102  NEW_NEARESTMV,
103  NEAR_NEW0MV,
104  NEAR_NEW1MV,
105  NEAR_NEW2MV,
106  NEW_NEAR0MV,
107  NEW_NEAR1MV,
108  NEW_NEAR2MV,
109  GLOBAL_GLOBALMV,
110  NEW_NEWMV,
111}
112
113// This is a higher number than in the spec and cannot be used
114// for bitstream writing purposes.
115pub const PREDICTION_MODES: usize = 34;
116
117#[derive(Copy, Clone, Debug)]
118pub enum PredictionVariant {
119  NONE,
120  LEFT,
121  TOP,
122  BOTH,
123}
124
125impl PredictionVariant {
126  #[inline]
127  const fn new(x: usize, y: usize) -> Self {
128    match (x, y) {
129      (0, 0) => PredictionVariant::NONE,
130      (_, 0) => PredictionVariant::LEFT,
131      (0, _) => PredictionVariant::TOP,
132      _ => PredictionVariant::BOTH,
133    }
134  }
135}
136
137pub const fn intra_mode_to_angle(mode: PredictionMode) -> isize {
138  match mode {
139    PredictionMode::V_PRED => 90,
140    PredictionMode::H_PRED => 180,
141    PredictionMode::D45_PRED => 45,
142    PredictionMode::D135_PRED => 135,
143    PredictionMode::D113_PRED => 113,
144    PredictionMode::D157_PRED => 157,
145    PredictionMode::D203_PRED => 203,
146    PredictionMode::D67_PRED => 67,
147    _ => 0,
148  }
149}
150
151impl PredictionMode {
152  #[inline]
153  pub fn is_compound(self) -> bool {
154    self >= PredictionMode::NEAREST_NEARESTMV
155  }
156  #[inline]
157  pub fn has_nearmv(self) -> bool {
158    self == PredictionMode::NEAR0MV
159      || self == PredictionMode::NEAR1MV
160      || self == PredictionMode::NEAR2MV
161      || self == PredictionMode::NEAR_NEAR0MV
162      || self == PredictionMode::NEAR_NEAR1MV
163      || self == PredictionMode::NEAR_NEAR2MV
164      || self == PredictionMode::NEAR_NEW0MV
165      || self == PredictionMode::NEAR_NEW1MV
166      || self == PredictionMode::NEAR_NEW2MV
167      || self == PredictionMode::NEW_NEAR0MV
168      || self == PredictionMode::NEW_NEAR1MV
169      || self == PredictionMode::NEW_NEAR2MV
170  }
171  #[inline]
172  pub fn has_newmv(self) -> bool {
173    self == PredictionMode::NEWMV
174      || self == PredictionMode::NEW_NEWMV
175      || self == PredictionMode::NEAREST_NEWMV
176      || self == PredictionMode::NEW_NEARESTMV
177      || self == PredictionMode::NEAR_NEW0MV
178      || self == PredictionMode::NEAR_NEW1MV
179      || self == PredictionMode::NEAR_NEW2MV
180      || self == PredictionMode::NEW_NEAR0MV
181      || self == PredictionMode::NEW_NEAR1MV
182      || self == PredictionMode::NEW_NEAR2MV
183  }
184  #[inline]
185  pub fn ref_mv_idx(self) -> usize {
186    if self == PredictionMode::NEAR0MV
187      || self == PredictionMode::NEAR1MV
188      || self == PredictionMode::NEAR2MV
189    {
190      self as usize - PredictionMode::NEAR0MV as usize + 1
191    } else if self == PredictionMode::NEAR_NEAR0MV
192      || self == PredictionMode::NEAR_NEAR1MV
193      || self == PredictionMode::NEAR_NEAR2MV
194    {
195      self as usize - PredictionMode::NEAR_NEAR0MV as usize + 1
196    } else {
197      1
198    }
199  }
200
201  /// # Panics
202  ///
203  /// - If called on an inter `PredictionMode`
204  pub fn predict_intra<T: Pixel>(
205    self, tile_rect: TileRect, dst: &mut PlaneRegionMut<'_, T>,
206    tx_size: TxSize, bit_depth: usize, ac: &[i16], intra_param: IntraParam,
207    ief_params: Option<IntraEdgeFilterParameters>, edge_buf: &IntraEdge<T>,
208    cpu: CpuFeatureLevel,
209  ) {
210    assert!(self.is_intra());
211    let &Rect { x: frame_x, y: frame_y, .. } = dst.rect();
212    debug_assert!(frame_x >= 0 && frame_y >= 0);
213    // x and y are expressed relative to the tile
214    let x = frame_x as usize - tile_rect.x;
215    let y = frame_y as usize - tile_rect.y;
216
217    let variant = PredictionVariant::new(x, y);
218
219    let alpha = match intra_param {
220      IntraParam::Alpha(val) => val,
221      _ => 0,
222    };
223    let angle_delta = match intra_param {
224      IntraParam::AngleDelta(val) => val,
225      _ => 0,
226    };
227
228    let mode = match self {
229      PredictionMode::PAETH_PRED => match variant {
230        PredictionVariant::NONE => PredictionMode::DC_PRED,
231        PredictionVariant::TOP => PredictionMode::V_PRED,
232        PredictionVariant::LEFT => PredictionMode::H_PRED,
233        PredictionVariant::BOTH => PredictionMode::PAETH_PRED,
234      },
235      PredictionMode::UV_CFL_PRED if alpha == 0 => PredictionMode::DC_PRED,
236      _ => self,
237    };
238
239    let angle = match mode {
240      PredictionMode::UV_CFL_PRED => alpha as isize,
241      _ => intra_mode_to_angle(mode) + (angle_delta * ANGLE_STEP) as isize,
242    };
243
244    dispatch_predict_intra::<T>(
245      mode, variant, dst, tx_size, bit_depth, ac, angle, ief_params, edge_buf,
246      cpu,
247    );
248  }
249
250  #[inline]
251  pub fn is_intra(self) -> bool {
252    self < PredictionMode::NEARESTMV
253  }
254
255  #[inline]
256  pub fn is_cfl(self) -> bool {
257    self == PredictionMode::UV_CFL_PRED
258  }
259
260  #[inline]
261  pub fn is_directional(self) -> bool {
262    self >= PredictionMode::V_PRED && self <= PredictionMode::D67_PRED
263  }
264
265  #[inline(always)]
266  pub const fn angle_delta_count(self) -> i8 {
267    match self {
268      PredictionMode::V_PRED
269      | PredictionMode::H_PRED
270      | PredictionMode::D45_PRED
271      | PredictionMode::D135_PRED
272      | PredictionMode::D113_PRED
273      | PredictionMode::D157_PRED
274      | PredictionMode::D203_PRED
275      | PredictionMode::D67_PRED => 7,
276      _ => 1,
277    }
278  }
279
280  // Used by inter prediction to extract the fractional component of a mv and
281  // obtain the correct PlaneSlice to operate on.
282  #[inline]
283  fn get_mv_params<T: Pixel>(
284    rec_plane: &Plane<T>, po: PlaneOffset, mv: MotionVector,
285  ) -> (i32, i32, PlaneSlice<T>) {
286    let &PlaneConfig { xdec, ydec, .. } = &rec_plane.cfg;
287    let row_offset = mv.row as i32 >> (3 + ydec);
288    let col_offset = mv.col as i32 >> (3 + xdec);
289    let row_frac = ((mv.row as i32) << (1 - ydec)) & 0xf;
290    let col_frac = ((mv.col as i32) << (1 - xdec)) & 0xf;
291    let qo = PlaneOffset {
292      x: po.x + col_offset as isize - 3,
293      y: po.y + row_offset as isize - 3,
294    };
295    (row_frac, col_frac, rec_plane.slice(qo).clamp().subslice(3, 3))
296  }
297
298  /// Inter prediction with a single reference (i.e. not compound mode)
299  ///
300  /// # Panics
301  ///
302  /// - If called on an intra `PredictionMode`
303  pub fn predict_inter_single<T: Pixel>(
304    self, fi: &FrameInvariants<T>, tile_rect: TileRect, p: usize,
305    po: PlaneOffset, dst: &mut PlaneRegionMut<'_, T>, width: usize,
306    height: usize, ref_frame: RefType, mv: MotionVector,
307  ) {
308    assert!(!self.is_intra());
309    let frame_po = tile_rect.to_frame_plane_offset(po);
310
311    let mode = fi.default_filter;
312
313    if let Some(ref rec) =
314      fi.rec_buffer.frames[fi.ref_frames[ref_frame.to_index()] as usize]
315    {
316      let (row_frac, col_frac, src) =
317        PredictionMode::get_mv_params(&rec.frame.planes[p], frame_po, mv);
318      put_8tap(
319        dst,
320        src,
321        width,
322        height,
323        col_frac,
324        row_frac,
325        mode,
326        mode,
327        fi.sequence.bit_depth,
328        fi.cpu_feature_level,
329      );
330    }
331  }
332
333  /// Inter prediction with two references.
334  ///
335  /// # Panics
336  ///
337  /// - If called on an intra `PredictionMode`
338  pub fn predict_inter_compound<T: Pixel>(
339    self, fi: &FrameInvariants<T>, tile_rect: TileRect, p: usize,
340    po: PlaneOffset, dst: &mut PlaneRegionMut<'_, T>, width: usize,
341    height: usize, ref_frames: [RefType; 2], mvs: [MotionVector; 2],
342    buffer: &mut InterCompoundBuffers,
343  ) {
344    assert!(!self.is_intra());
345    let frame_po = tile_rect.to_frame_plane_offset(po);
346
347    let mode = fi.default_filter;
348
349    for i in 0..2 {
350      if let Some(ref rec) =
351        fi.rec_buffer.frames[fi.ref_frames[ref_frames[i].to_index()] as usize]
352      {
353        let (row_frac, col_frac, src) = PredictionMode::get_mv_params(
354          &rec.frame.planes[p],
355          frame_po,
356          mvs[i],
357        );
358        prep_8tap(
359          buffer.get_buffer_mut(i),
360          src,
361          width,
362          height,
363          col_frac,
364          row_frac,
365          mode,
366          mode,
367          fi.sequence.bit_depth,
368          fi.cpu_feature_level,
369        );
370      }
371    }
372    mc_avg(
373      dst,
374      buffer.get_buffer(0),
375      buffer.get_buffer(1),
376      width,
377      height,
378      fi.sequence.bit_depth,
379      fi.cpu_feature_level,
380    );
381  }
382
383  /// Inter prediction that determines whether compound mode is being used based
384  /// on the second [`RefType`] in [`ref_frames`].
385  pub fn predict_inter<T: Pixel>(
386    self, fi: &FrameInvariants<T>, tile_rect: TileRect, p: usize,
387    po: PlaneOffset, dst: &mut PlaneRegionMut<'_, T>, width: usize,
388    height: usize, ref_frames: [RefType; 2], mvs: [MotionVector; 2],
389    compound_buffer: &mut InterCompoundBuffers,
390  ) {
391    let is_compound = ref_frames[1] != RefType::INTRA_FRAME
392      && ref_frames[1] != RefType::NONE_FRAME;
393
394    if !is_compound {
395      self.predict_inter_single(
396        fi,
397        tile_rect,
398        p,
399        po,
400        dst,
401        width,
402        height,
403        ref_frames[0],
404        mvs[0],
405      )
406    } else {
407      self.predict_inter_compound(
408        fi,
409        tile_rect,
410        p,
411        po,
412        dst,
413        width,
414        height,
415        ref_frames,
416        mvs,
417        compound_buffer,
418      );
419    }
420  }
421}
422
423/// A pair of buffers holding the interpolation of two references. Use for
424/// compound inter prediction.
425#[derive(Debug)]
426pub struct InterCompoundBuffers {
427  data: AlignedBoxedSlice<i16>,
428}
429
430impl InterCompoundBuffers {
431  // Size of one of the two buffers used.
432  const BUFFER_SIZE: usize = 1 << (2 * MAX_SB_SIZE_LOG2);
433
434  /// Get the buffer for eith
435  #[inline]
436  fn get_buffer_mut(&mut self, i: usize) -> &mut [i16] {
437    match i {
438      0 => &mut self.data[0..Self::BUFFER_SIZE],
439      1 => &mut self.data[Self::BUFFER_SIZE..2 * Self::BUFFER_SIZE],
440      _ => panic!(),
441    }
442  }
443
444  #[inline]
445  fn get_buffer(&self, i: usize) -> &[i16] {
446    match i {
447      0 => &self.data[0..Self::BUFFER_SIZE],
448      1 => &self.data[Self::BUFFER_SIZE..2 * Self::BUFFER_SIZE],
449      _ => panic!(),
450    }
451  }
452}
453
454impl Default for InterCompoundBuffers {
455  fn default() -> Self {
456    Self { data: AlignedBoxedSlice::new(2 * Self::BUFFER_SIZE, 0) }
457  }
458}
459
460#[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd)]
461pub enum InterIntraMode {
462  II_DC_PRED,
463  II_V_PRED,
464  II_H_PRED,
465  II_SMOOTH_PRED,
466  INTERINTRA_MODES,
467}
468
469#[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd)]
470pub enum CompoundType {
471  COMPOUND_AVERAGE,
472  COMPOUND_WEDGE,
473  COMPOUND_DIFFWTD,
474  COMPOUND_TYPES,
475}
476
477#[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd)]
478pub enum MotionMode {
479  SIMPLE_TRANSLATION,
480  OBMC_CAUSAL,   // 2-sided OBMC
481  WARPED_CAUSAL, // 2-sided WARPED
482  MOTION_MODES,
483}
484
485#[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd)]
486pub enum PaletteSize {
487  TWO_COLORS,
488  THREE_COLORS,
489  FOUR_COLORS,
490  FIVE_COLORS,
491  SIX_COLORS,
492  SEVEN_COLORS,
493  EIGHT_COLORS,
494  PALETTE_SIZES,
495}
496
497#[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd)]
498pub enum PaletteColor {
499  PALETTE_COLOR_ONE,
500  PALETTE_COLOR_TWO,
501  PALETTE_COLOR_THREE,
502  PALETTE_COLOR_FOUR,
503  PALETTE_COLOR_FIVE,
504  PALETTE_COLOR_SIX,
505  PALETTE_COLOR_SEVEN,
506  PALETTE_COLOR_EIGHT,
507  PALETTE_COLORS,
508}
509
510#[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd)]
511pub enum FilterIntraMode {
512  FILTER_DC_PRED,
513  FILTER_V_PRED,
514  FILTER_H_PRED,
515  FILTER_D157_PRED,
516  FILTER_PAETH_PRED,
517  FILTER_INTRA_MODES,
518}
519
520#[derive(Copy, Clone, Debug)]
521pub enum IntraParam {
522  AngleDelta(i8),
523  Alpha(i16),
524  None,
525}
526
527#[derive(Debug, Clone, Copy, Default)]
528pub struct AngleDelta {
529  pub y: i8,
530  pub uv: i8,
531}
532
533#[derive(Copy, Clone, Default)]
534pub struct IntraEdgeFilterParameters {
535  pub plane: usize,
536  pub above_ref_frame_types: Option<[RefType; 2]>,
537  pub left_ref_frame_types: Option<[RefType; 2]>,
538  pub above_mode: Option<PredictionMode>,
539  pub left_mode: Option<PredictionMode>,
540}
541
542impl IntraEdgeFilterParameters {
543  pub fn new(
544    plane: usize, above_ctx: Option<CodedBlockInfo>,
545    left_ctx: Option<CodedBlockInfo>,
546  ) -> Self {
547    IntraEdgeFilterParameters {
548      plane,
549      above_mode: match above_ctx {
550        Some(bi) => match plane {
551          0 => bi.luma_mode,
552          _ => bi.chroma_mode,
553        }
554        .into(),
555        None => None,
556      },
557      left_mode: match left_ctx {
558        Some(bi) => match plane {
559          0 => bi.luma_mode,
560          _ => bi.chroma_mode,
561        }
562        .into(),
563        None => None,
564      },
565      above_ref_frame_types: above_ctx.map(|bi| bi.reference_types),
566      left_ref_frame_types: left_ctx.map(|bi| bi.reference_types),
567    }
568  }
569
570  /// # Panics
571  ///
572  /// - If the appropriate ref frame types are not set on `self`
573  pub fn use_smooth_filter(self) -> bool {
574    let above_smooth = match self.above_mode {
575      Some(PredictionMode::SMOOTH_PRED)
576      | Some(PredictionMode::SMOOTH_V_PRED)
577      | Some(PredictionMode::SMOOTH_H_PRED) => {
578        self.plane == 0
579          || self.above_ref_frame_types.unwrap()[0] == RefType::INTRA_FRAME
580      }
581      _ => false,
582    };
583
584    let left_smooth = match self.left_mode {
585      Some(PredictionMode::SMOOTH_PRED)
586      | Some(PredictionMode::SMOOTH_V_PRED)
587      | Some(PredictionMode::SMOOTH_H_PRED) => {
588        self.plane == 0
589          || self.left_ref_frame_types.unwrap()[0] == RefType::INTRA_FRAME
590      }
591      _ => false,
592    };
593
594    above_smooth || left_smooth
595  }
596}
597
598// Weights are quadratic from '1' to '1 / block_size', scaled by 2^sm_weight_log2_scale.
599const sm_weight_log2_scale: u8 = 8;
600
601// Smooth predictor weights
602#[rustfmt::skip]
603static sm_weight_arrays: [u8; 2 * MAX_TX_SIZE] = [
604    // Unused, because we always offset by bs, which is at least 2.
605    0, 0,
606    // bs = 2
607    255, 128,
608    // bs = 4
609    255, 149, 85, 64,
610    // bs = 8
611    255, 197, 146, 105, 73, 50, 37, 32,
612    // bs = 16
613    255, 225, 196, 170, 145, 123, 102, 84, 68, 54, 43, 33, 26, 20, 17, 16,
614    // bs = 32
615    255, 240, 225, 210, 196, 182, 169, 157, 145, 133, 122, 111, 101, 92, 83, 74,
616    66, 59, 52, 45, 39, 34, 29, 25, 21, 17, 14, 12, 10, 9, 8, 8,
617    // bs = 64
618    255, 248, 240, 233, 225, 218, 210, 203, 196, 189, 182, 176, 169, 163, 156,
619    150, 144, 138, 133, 127, 121, 116, 111, 106, 101, 96, 91, 86, 82, 77, 73, 69,
620    65, 61, 57, 54, 50, 47, 44, 41, 38, 35, 32, 29, 27, 25, 22, 20, 18, 16, 15,
621    13, 12, 10, 9, 8, 7, 6, 6, 5, 5, 4, 4, 4,
622];
623
624#[inline(always)]
625const fn get_scaled_luma_q0(alpha_q3: i16, ac_pred_q3: i16) -> i32 {
626  let scaled_luma_q6 = (alpha_q3 as i32) * (ac_pred_q3 as i32);
627  let abs_scaled_luma_q0 = (scaled_luma_q6.abs() + 32) >> 6;
628  if scaled_luma_q6 < 0 {
629    -abs_scaled_luma_q0
630  } else {
631    abs_scaled_luma_q0
632  }
633}
634
635/// # Returns
636///
637/// Initialized luma AC coefficients
638///
639/// # Panics
640///
641/// - If the block size is invalid for subsampling
642///
643pub fn luma_ac<'ac, T: Pixel>(
644  ac: &'ac mut [MaybeUninit<i16>], ts: &mut TileStateMut<'_, T>,
645  tile_bo: TileBlockOffset, bsize: BlockSize, tx_size: TxSize,
646  fi: &FrameInvariants<T>,
647) -> &'ac mut [i16] {
648  use crate::context::MI_SIZE_LOG2;
649
650  let PlaneConfig { xdec, ydec, .. } = ts.input.planes[1].cfg;
651  let plane_bsize = bsize.subsampled_size(xdec, ydec).unwrap();
652
653  // ensure ac has the right length, so there aren't any uninitialized elements at the end
654  let ac = &mut ac[..plane_bsize.area()];
655
656  let bo = if bsize.is_sub8x8(xdec, ydec) {
657    let offset = bsize.sub8x8_offset(xdec, ydec);
658    tile_bo.with_offset(offset.0, offset.1)
659  } else {
660    tile_bo
661  };
662  let rec = &ts.rec.planes[0];
663  let luma = &rec.subregion(Area::BlockStartingAt { bo: bo.0 });
664  let frame_bo = ts.to_frame_block_offset(bo);
665
666  let frame_clipped_bw: usize =
667    ((fi.w_in_b - frame_bo.0.x) << MI_SIZE_LOG2).min(bsize.width());
668  let frame_clipped_bh: usize =
669    ((fi.h_in_b - frame_bo.0.y) << MI_SIZE_LOG2).min(bsize.height());
670
671  // Similar to 'MaxLumaW' and 'MaxLumaH' stated in https://aomediacodec.github.io/av1-spec/#transform-block-semantics
672  let max_luma_w = if bsize.width() > BlockSize::BLOCK_8X8.width() {
673    let txw_log2 = tx_size.width_log2();
674    ((frame_clipped_bw + (1 << txw_log2) - 1) >> txw_log2) << txw_log2
675  } else {
676    bsize.width()
677  };
678  let max_luma_h = if bsize.height() > BlockSize::BLOCK_8X8.height() {
679    let txh_log2 = tx_size.height_log2();
680    ((frame_clipped_bh + (1 << txh_log2) - 1) >> txh_log2) << txh_log2
681  } else {
682    bsize.height()
683  };
684
685  let w_pad = (bsize.width() - max_luma_w) >> (2 + xdec);
686  let h_pad = (bsize.height() - max_luma_h) >> (2 + ydec);
687  let cpu = fi.cpu_feature_level;
688
689  (match (xdec, ydec) {
690    (0, 0) => pred_cfl_ac::<T, 0, 0>,
691    (1, 0) => pred_cfl_ac::<T, 1, 0>,
692    (_, _) => pred_cfl_ac::<T, 1, 1>,
693  })(ac, luma, plane_bsize, w_pad, h_pad, cpu);
694
695  // SAFETY: it relies on individual pred_cfl_ac implementations to initialize the ac
696  unsafe { slice_assume_init_mut(ac) }
697}
698
699pub(crate) mod rust {
700  use super::*;
701  use crate::context::MAX_TX_SIZE;
702  use crate::cpu_features::CpuFeatureLevel;
703  use crate::tiling::PlaneRegionMut;
704  use crate::transform::TxSize;
705  use crate::util::round_shift;
706  use crate::Pixel;
707  use std::mem::{size_of, MaybeUninit};
708
709  #[inline(always)]
710  pub fn dispatch_predict_intra<T: Pixel>(
711    mode: PredictionMode, variant: PredictionVariant,
712    dst: &mut PlaneRegionMut<'_, T>, tx_size: TxSize, bit_depth: usize,
713    ac: &[i16], angle: isize, ief_params: Option<IntraEdgeFilterParameters>,
714    edge_buf: &IntraEdge<T>, _cpu: CpuFeatureLevel,
715  ) {
716    let width = tx_size.width();
717    let height = tx_size.height();
718
719    // left pixels are ordered from bottom to top and right-aligned
720    let (left, top_left, above) = edge_buf.as_slices();
721
722    let above_slice = above;
723    let left_slice = &left[left.len().saturating_sub(height)..];
724    let left_and_left_below_slice =
725      &left[left.len().saturating_sub(width + height)..];
726
727    match mode {
728      PredictionMode::DC_PRED => {
729        (match variant {
730          PredictionVariant::NONE => pred_dc_128,
731          PredictionVariant::LEFT => pred_dc_left,
732          PredictionVariant::TOP => pred_dc_top,
733          PredictionVariant::BOTH => pred_dc,
734        })(dst, above_slice, left_slice, width, height, bit_depth)
735      }
736      PredictionMode::V_PRED if angle == 90 => {
737        pred_v(dst, above_slice, width, height)
738      }
739      PredictionMode::H_PRED if angle == 180 => {
740        pred_h(dst, left_slice, width, height)
741      }
742      PredictionMode::H_PRED
743      | PredictionMode::V_PRED
744      | PredictionMode::D45_PRED
745      | PredictionMode::D135_PRED
746      | PredictionMode::D113_PRED
747      | PredictionMode::D157_PRED
748      | PredictionMode::D203_PRED
749      | PredictionMode::D67_PRED => pred_directional(
750        dst,
751        above_slice,
752        left_and_left_below_slice,
753        top_left,
754        angle as usize,
755        width,
756        height,
757        bit_depth,
758        ief_params,
759      ),
760      PredictionMode::SMOOTH_PRED => {
761        pred_smooth(dst, above_slice, left_slice, width, height)
762      }
763      PredictionMode::SMOOTH_V_PRED => {
764        pred_smooth_v(dst, above_slice, left_slice, width, height)
765      }
766      PredictionMode::SMOOTH_H_PRED => {
767        pred_smooth_h(dst, above_slice, left_slice, width, height)
768      }
769      PredictionMode::PAETH_PRED => {
770        pred_paeth(dst, above_slice, left_slice, top_left[0], width, height)
771      }
772      PredictionMode::UV_CFL_PRED => (match variant {
773        PredictionVariant::NONE => pred_cfl_128,
774        PredictionVariant::LEFT => pred_cfl_left,
775        PredictionVariant::TOP => pred_cfl_top,
776        PredictionVariant::BOTH => pred_cfl,
777      })(
778        dst,
779        ac,
780        angle as i16,
781        above_slice,
782        left_slice,
783        width,
784        height,
785        bit_depth,
786      ),
787      _ => unimplemented!(),
788    }
789  }
790
791  pub(crate) fn pred_dc<T: Pixel>(
792    output: &mut PlaneRegionMut<'_, T>, above: &[T], left: &[T], width: usize,
793    height: usize, _bit_depth: usize,
794  ) {
795    let edges = left[..height].iter().chain(above[..width].iter());
796    let len = (width + height) as u32;
797    let avg = (edges.fold(0u32, |acc, &v| {
798      let v: u32 = v.into();
799      v + acc
800    }) + (len >> 1))
801      / len;
802    let avg = T::cast_from(avg);
803
804    for line in output.rows_iter_mut().take(height) {
805      line[..width].fill(avg);
806    }
807  }
808
809  pub(crate) fn pred_dc_128<T: Pixel>(
810    output: &mut PlaneRegionMut<'_, T>, _above: &[T], _left: &[T],
811    width: usize, height: usize, bit_depth: usize,
812  ) {
813    let v = T::cast_from(128u32 << (bit_depth - 8));
814    for line in output.rows_iter_mut().take(height) {
815      line[..width].fill(v);
816    }
817  }
818
819  pub(crate) fn pred_dc_left<T: Pixel>(
820    output: &mut PlaneRegionMut<'_, T>, _above: &[T], left: &[T],
821    width: usize, height: usize, _bit_depth: usize,
822  ) {
823    let sum = left[..].iter().fold(0u32, |acc, &v| {
824      let v: u32 = v.into();
825      v + acc
826    });
827    let avg = T::cast_from((sum + (height >> 1) as u32) / height as u32);
828    for line in output.rows_iter_mut().take(height) {
829      line[..width].fill(avg);
830    }
831  }
832
833  pub(crate) fn pred_dc_top<T: Pixel>(
834    output: &mut PlaneRegionMut<'_, T>, above: &[T], _left: &[T],
835    width: usize, height: usize, _bit_depth: usize,
836  ) {
837    let sum = above[..width].iter().fold(0u32, |acc, &v| {
838      let v: u32 = v.into();
839      v + acc
840    });
841    let avg = T::cast_from((sum + (width >> 1) as u32) / width as u32);
842    for line in output.rows_iter_mut().take(height) {
843      line[..width].fill(avg);
844    }
845  }
846
847  pub(crate) fn pred_h<T: Pixel>(
848    output: &mut PlaneRegionMut<'_, T>, left: &[T], width: usize,
849    height: usize,
850  ) {
851    for (line, l) in output.rows_iter_mut().zip(left[..height].iter().rev()) {
852      line[..width].fill(*l);
853    }
854  }
855
856  pub(crate) fn pred_v<T: Pixel>(
857    output: &mut PlaneRegionMut<'_, T>, above: &[T], width: usize,
858    height: usize,
859  ) {
860    for line in output.rows_iter_mut().take(height) {
861      line[..width].copy_from_slice(&above[..width])
862    }
863  }
864
865  pub(crate) fn pred_paeth<T: Pixel>(
866    output: &mut PlaneRegionMut<'_, T>, above: &[T], left: &[T],
867    above_left: T, width: usize, height: usize,
868  ) {
869    for r in 0..height {
870      let row = &mut output[r];
871      for c in 0..width {
872        // Top-left pixel is fixed in libaom
873        let raw_top_left: i32 = above_left.into();
874        let raw_left: i32 = left[height - 1 - r].into();
875        let raw_top: i32 = above[c].into();
876
877        let p_base = raw_top + raw_left - raw_top_left;
878        let p_left = (p_base - raw_left).abs();
879        let p_top = (p_base - raw_top).abs();
880        let p_top_left = (p_base - raw_top_left).abs();
881
882        // Return nearest to base of left, top and top_left
883        if p_left <= p_top && p_left <= p_top_left {
884          row[c] = T::cast_from(raw_left);
885        } else if p_top <= p_top_left {
886          row[c] = T::cast_from(raw_top);
887        } else {
888          row[c] = T::cast_from(raw_top_left);
889        }
890      }
891    }
892  }
893
894  pub(crate) fn pred_smooth<T: Pixel>(
895    output: &mut PlaneRegionMut<'_, T>, above: &[T], left: &[T], width: usize,
896    height: usize,
897  ) {
898    let below_pred = left[0]; // estimated by bottom-left pixel
899    let right_pred = above[width - 1]; // estimated by top-right pixel
900    let sm_weights_w = &sm_weight_arrays[width..];
901    let sm_weights_h = &sm_weight_arrays[height..];
902
903    let log2_scale = 1 + sm_weight_log2_scale;
904    let scale = 1_u16 << sm_weight_log2_scale;
905
906    // Weights sanity checks
907    assert!((sm_weights_w[0] as u16) < scale);
908    assert!((sm_weights_h[0] as u16) < scale);
909    assert!((scale - sm_weights_w[width - 1] as u16) < scale);
910    assert!((scale - sm_weights_h[height - 1] as u16) < scale);
911    // ensures no overflow when calculating predictor
912    assert!(log2_scale as usize + size_of::<T>() < 31);
913
914    for r in 0..height {
915      let row = &mut output[r];
916      for c in 0..width {
917        let pixels = [above[c], below_pred, left[height - 1 - r], right_pred];
918
919        let weights = [
920          sm_weights_h[r] as u16,
921          scale - sm_weights_h[r] as u16,
922          sm_weights_w[c] as u16,
923          scale - sm_weights_w[c] as u16,
924        ];
925
926        assert!(
927          scale >= (sm_weights_h[r] as u16)
928            && scale >= (sm_weights_w[c] as u16)
929        );
930
931        // Sum up weighted pixels
932        let mut this_pred: u32 = weights
933          .iter()
934          .zip(pixels.iter())
935          .map(|(w, p)| {
936            let p: u32 = (*p).into();
937            (*w as u32) * p
938          })
939          .sum();
940        this_pred = (this_pred + (1 << (log2_scale - 1))) >> log2_scale;
941
942        row[c] = T::cast_from(this_pred);
943      }
944    }
945  }
946
947  pub(crate) fn pred_smooth_h<T: Pixel>(
948    output: &mut PlaneRegionMut<'_, T>, above: &[T], left: &[T], width: usize,
949    height: usize,
950  ) {
951    let right_pred = above[width - 1]; // estimated by top-right pixel
952    let sm_weights = &sm_weight_arrays[width..];
953
954    let log2_scale = sm_weight_log2_scale;
955    let scale = 1_u16 << sm_weight_log2_scale;
956
957    // Weights sanity checks
958    assert!((sm_weights[0] as u16) < scale);
959    assert!((scale - sm_weights[width - 1] as u16) < scale);
960    // ensures no overflow when calculating predictor
961    assert!(log2_scale as usize + size_of::<T>() < 31);
962
963    for r in 0..height {
964      let row = &mut output[r];
965      for c in 0..width {
966        let pixels = [left[height - 1 - r], right_pred];
967        let weights = [sm_weights[c] as u16, scale - sm_weights[c] as u16];
968
969        assert!(scale >= sm_weights[c] as u16);
970
971        let mut this_pred: u32 = weights
972          .iter()
973          .zip(pixels.iter())
974          .map(|(w, p)| {
975            let p: u32 = (*p).into();
976            (*w as u32) * p
977          })
978          .sum();
979        this_pred = (this_pred + (1 << (log2_scale - 1))) >> log2_scale;
980
981        row[c] = T::cast_from(this_pred);
982      }
983    }
984  }
985
986  pub(crate) fn pred_smooth_v<T: Pixel>(
987    output: &mut PlaneRegionMut<'_, T>, above: &[T], left: &[T], width: usize,
988    height: usize,
989  ) {
990    let below_pred = left[0]; // estimated by bottom-left pixel
991    let sm_weights = &sm_weight_arrays[height..];
992
993    let log2_scale = sm_weight_log2_scale;
994    let scale = 1_u16 << sm_weight_log2_scale;
995
996    // Weights sanity checks
997    assert!((sm_weights[0] as u16) < scale);
998    assert!((scale - sm_weights[height - 1] as u16) < scale);
999    // ensures no overflow when calculating predictor
1000    assert!(log2_scale as usize + size_of::<T>() < 31);
1001
1002    for r in 0..height {
1003      let row = &mut output[r];
1004      for c in 0..width {
1005        let pixels = [above[c], below_pred];
1006        let weights = [sm_weights[r] as u16, scale - sm_weights[r] as u16];
1007
1008        assert!(scale >= sm_weights[r] as u16);
1009
1010        let mut this_pred: u32 = weights
1011          .iter()
1012          .zip(pixels.iter())
1013          .map(|(w, p)| {
1014            let p: u32 = (*p).into();
1015            (*w as u32) * p
1016          })
1017          .sum();
1018        this_pred = (this_pred + (1 << (log2_scale - 1))) >> log2_scale;
1019
1020        row[c] = T::cast_from(this_pred);
1021      }
1022    }
1023  }
1024
1025  pub(crate) fn pred_cfl_ac<T: Pixel, const XDEC: usize, const YDEC: usize>(
1026    ac: &mut [MaybeUninit<i16>], luma: &PlaneRegion<'_, T>,
1027    plane_bsize: BlockSize, w_pad: usize, h_pad: usize, _cpu: CpuFeatureLevel,
1028  ) {
1029    let max_luma_w = (plane_bsize.width() - w_pad * 4) << XDEC;
1030    let max_luma_h = (plane_bsize.height() - h_pad * 4) << YDEC;
1031    let max_luma_x: usize = max_luma_w.max(8) - (1 << XDEC);
1032    let max_luma_y: usize = max_luma_h.max(8) - (1 << YDEC);
1033    let mut sum: i32 = 0;
1034
1035    let ac = &mut ac[..plane_bsize.area()];
1036
1037    for (sub_y, ac_rows) in
1038      ac.chunks_exact_mut(plane_bsize.width()).enumerate()
1039    {
1040      for (sub_x, ac_item) in ac_rows.iter_mut().enumerate() {
1041        // Refer to https://aomediacodec.github.io/av1-spec/#predict-chroma-from-luma-process
1042        let luma_y = sub_y << YDEC;
1043        let luma_x = sub_x << XDEC;
1044        let y = luma_y.min(max_luma_y);
1045        let x = luma_x.min(max_luma_x);
1046        let mut sample: i16 = i16::cast_from(luma[y][x]);
1047        if XDEC != 0 {
1048          sample += i16::cast_from(luma[y][x + 1]);
1049        }
1050        if YDEC != 0 {
1051          debug_assert!(XDEC != 0);
1052          sample += i16::cast_from(luma[y + 1][x])
1053            + i16::cast_from(luma[y + 1][x + 1]);
1054        }
1055        sample <<= 3 - XDEC - YDEC;
1056        ac_item.write(sample);
1057        sum += sample as i32;
1058      }
1059    }
1060    // SAFETY: the loop above has initialized all items
1061    let ac = unsafe { slice_assume_init_mut(ac) };
1062    let shift = plane_bsize.width_log2() + plane_bsize.height_log2();
1063    let average = ((sum + (1 << (shift - 1))) >> shift) as i16;
1064
1065    for val in ac {
1066      *val -= average;
1067    }
1068  }
1069
1070  pub(crate) fn pred_cfl_inner<T: Pixel>(
1071    output: &mut PlaneRegionMut<'_, T>, ac: &[i16], alpha: i16, width: usize,
1072    height: usize, bit_depth: usize,
1073  ) {
1074    if alpha == 0 {
1075      return;
1076    }
1077    debug_assert!(ac.len() >= width * height);
1078    assert!(output.plane_cfg.stride >= width);
1079    assert!(output.rows_iter().len() >= height);
1080
1081    let sample_max = (1 << bit_depth) - 1;
1082    let avg: i32 = output[0][0].into();
1083
1084    for (line, luma) in
1085      output.rows_iter_mut().zip(ac.chunks_exact(width)).take(height)
1086    {
1087      for (v, &l) in line[..width].iter_mut().zip(luma[..width].iter()) {
1088        *v = T::cast_from(
1089          (avg + get_scaled_luma_q0(alpha, l)).clamp(0, sample_max),
1090        );
1091      }
1092    }
1093  }
1094
1095  pub(crate) fn pred_cfl<T: Pixel>(
1096    output: &mut PlaneRegionMut<'_, T>, ac: &[i16], alpha: i16, above: &[T],
1097    left: &[T], width: usize, height: usize, bit_depth: usize,
1098  ) {
1099    pred_dc(output, above, left, width, height, bit_depth);
1100    pred_cfl_inner(output, ac, alpha, width, height, bit_depth);
1101  }
1102
1103  pub(crate) fn pred_cfl_128<T: Pixel>(
1104    output: &mut PlaneRegionMut<'_, T>, ac: &[i16], alpha: i16, above: &[T],
1105    left: &[T], width: usize, height: usize, bit_depth: usize,
1106  ) {
1107    pred_dc_128(output, above, left, width, height, bit_depth);
1108    pred_cfl_inner(output, ac, alpha, width, height, bit_depth);
1109  }
1110
1111  pub(crate) fn pred_cfl_left<T: Pixel>(
1112    output: &mut PlaneRegionMut<'_, T>, ac: &[i16], alpha: i16, above: &[T],
1113    left: &[T], width: usize, height: usize, bit_depth: usize,
1114  ) {
1115    pred_dc_left(output, above, left, width, height, bit_depth);
1116    pred_cfl_inner(output, ac, alpha, width, height, bit_depth);
1117  }
1118
1119  pub(crate) fn pred_cfl_top<T: Pixel>(
1120    output: &mut PlaneRegionMut<'_, T>, ac: &[i16], alpha: i16, above: &[T],
1121    left: &[T], width: usize, height: usize, bit_depth: usize,
1122  ) {
1123    pred_dc_top(output, above, left, width, height, bit_depth);
1124    pred_cfl_inner(output, ac, alpha, width, height, bit_depth);
1125  }
1126
1127  #[allow(clippy::collapsible_if)]
1128  #[allow(clippy::collapsible_else_if)]
1129  #[allow(clippy::needless_return)]
1130  pub(crate) const fn select_ief_strength(
1131    width: usize, height: usize, smooth_filter: bool, angle_delta: isize,
1132  ) -> u8 {
1133    let block_wh = width + height;
1134    let abs_delta = angle_delta.unsigned_abs();
1135
1136    if smooth_filter {
1137      if block_wh <= 8 {
1138        if abs_delta >= 64 {
1139          return 2;
1140        }
1141        if abs_delta >= 40 {
1142          return 1;
1143        }
1144      } else if block_wh <= 16 {
1145        if abs_delta >= 48 {
1146          return 2;
1147        }
1148        if abs_delta >= 20 {
1149          return 1;
1150        }
1151      } else if block_wh <= 24 {
1152        if abs_delta >= 4 {
1153          return 3;
1154        }
1155      } else {
1156        return 3;
1157      }
1158    } else {
1159      if block_wh <= 8 {
1160        if abs_delta >= 56 {
1161          return 1;
1162        }
1163      } else if block_wh <= 16 {
1164        if abs_delta >= 40 {
1165          return 1;
1166        }
1167      } else if block_wh <= 24 {
1168        if abs_delta >= 32 {
1169          return 3;
1170        }
1171        if abs_delta >= 16 {
1172          return 2;
1173        }
1174        if abs_delta >= 8 {
1175          return 1;
1176        }
1177      } else if block_wh <= 32 {
1178        if abs_delta >= 32 {
1179          return 3;
1180        }
1181        if abs_delta >= 4 {
1182          return 2;
1183        }
1184        return 1;
1185      } else {
1186        return 3;
1187      }
1188    }
1189
1190    return 0;
1191  }
1192
1193  pub(crate) const fn select_ief_upsample(
1194    width: usize, height: usize, smooth_filter: bool, angle_delta: isize,
1195  ) -> bool {
1196    let block_wh = width + height;
1197    let abs_delta = angle_delta.unsigned_abs();
1198
1199    if abs_delta == 0 || abs_delta >= 40 {
1200      false
1201    } else if smooth_filter {
1202      block_wh <= 8
1203    } else {
1204      block_wh <= 16
1205    }
1206  }
1207
1208  pub(crate) fn filter_edge<T: Pixel>(
1209    size: usize, strength: u8, edge: &mut [T],
1210  ) {
1211    const INTRA_EDGE_KERNEL: [[u32; 5]; 3] =
1212      [[0, 4, 8, 4, 0], [0, 5, 6, 5, 0], [2, 4, 4, 4, 2]];
1213
1214    if strength == 0 {
1215      return;
1216    }
1217
1218    // Copy the edge buffer to avoid predicting from
1219    // just-filtered samples.
1220    let mut edge_filtered = [MaybeUninit::<T>::uninit(); MAX_TX_SIZE * 4 + 1];
1221    let edge_filtered =
1222      init_slice_repeat_mut(&mut edge_filtered[..edge.len()], T::zero());
1223    edge_filtered.copy_from_slice(&edge[..edge.len()]);
1224
1225    for i in 1..size {
1226      let mut s = 0;
1227
1228      for j in 0..INTRA_EDGE_KERNEL[0].len() {
1229        let k = (i + j).saturating_sub(2).min(size - 1);
1230        s += INTRA_EDGE_KERNEL[(strength - 1) as usize][j]
1231          * edge[k].to_u32().unwrap();
1232      }
1233
1234      edge_filtered[i] = T::cast_from((s + 8) >> 4);
1235    }
1236    edge.copy_from_slice(edge_filtered);
1237  }
1238
1239  pub(crate) fn upsample_edge<T: Pixel>(
1240    size: usize, edge: &mut [T], bit_depth: usize,
1241  ) {
1242    // The input edge should be valid in the -1..size range,
1243    // where the -1 index is the top-left edge pixel. Since
1244    // negative indices are unsafe in Rust, the caller is
1245    // expected to globally offset it by 1, which makes the
1246    // input range 0..=size.
1247    let mut dup = [MaybeUninit::<T>::uninit(); MAX_TX_SIZE];
1248    let dup = init_slice_repeat_mut(&mut dup[..size + 3], T::zero());
1249    dup[0] = edge[0];
1250    dup[1..=size + 1].copy_from_slice(&edge[0..=size]);
1251    dup[size + 2] = edge[size];
1252
1253    // Past here the edge is being filtered, and its
1254    // effective range is shifted from -1..size to
1255    // -2..2*size-1. Again, because this is safe Rust,
1256    // we cannot use negative indices, and the actual range
1257    // will be 0..=2*size. The caller is expected to adjust
1258    // its indices on receipt of the filtered edge.
1259    edge[0] = dup[0];
1260
1261    for i in 0..size {
1262      let mut s = -dup[i].to_i32().unwrap()
1263        + (9 * dup[i + 1].to_i32().unwrap())
1264        + (9 * dup[i + 2].to_i32().unwrap())
1265        - dup[i + 3].to_i32().unwrap();
1266      s = ((s + 8) / 16).clamp(0, (1 << bit_depth) - 1);
1267
1268      edge[2 * i + 1] = T::cast_from(s);
1269      edge[2 * i + 2] = dup[i + 2];
1270    }
1271  }
1272
1273  pub(crate) const fn dr_intra_derivative(p_angle: usize) -> usize {
1274    match p_angle {
1275      3 => 1023,
1276      6 => 547,
1277      9 => 372,
1278      14 => 273,
1279      17 => 215,
1280      20 => 178,
1281      23 => 151,
1282      26 => 132,
1283      29 => 116,
1284      32 => 102,
1285      36 => 90,
1286      39 => 80,
1287      42 => 71,
1288      45 => 64,
1289      48 => 57,
1290      51 => 51,
1291      54 => 45,
1292      58 => 40,
1293      61 => 35,
1294      64 => 31,
1295      67 => 27,
1296      70 => 23,
1297      73 => 19,
1298      76 => 15,
1299      81 => 11,
1300      84 => 7,
1301      87 => 3,
1302      _ => 0,
1303    }
1304  }
1305
1306  pub(crate) fn pred_directional<T: Pixel>(
1307    output: &mut PlaneRegionMut<'_, T>, above: &[T], left: &[T],
1308    top_left: &[T], p_angle: usize, width: usize, height: usize,
1309    bit_depth: usize, ief_params: Option<IntraEdgeFilterParameters>,
1310  ) {
1311    let sample_max = (1 << bit_depth) - 1;
1312
1313    let max_x = output.plane_cfg.width as isize - 1;
1314    let max_y = output.plane_cfg.height as isize - 1;
1315
1316    let mut upsample_above = false;
1317    let mut upsample_left = false;
1318
1319    let mut above_edge: &[T] = above;
1320    let mut left_edge: &[T] = left;
1321    let top_left_edge: T = top_left[0];
1322
1323    let enable_edge_filter = ief_params.is_some();
1324
1325    // Initialize above and left edge buffers of the largest possible needed size if upsampled
1326    // The first value is the top left pixel, also mutable and indexed at -1 in the spec
1327    let mut above_filtered = [MaybeUninit::<T>::uninit(); MAX_TX_SIZE * 4 + 1];
1328    let above_filtered = init_slice_repeat_mut(
1329      &mut above_filtered[..=(width + height) * 2],
1330      T::zero(),
1331    );
1332    let mut left_filtered = [MaybeUninit::<T>::uninit(); MAX_TX_SIZE * 4 + 1];
1333    let left_filtered = init_slice_repeat_mut(
1334      &mut left_filtered[..=(width + height) * 2],
1335      T::zero(),
1336    );
1337
1338    if enable_edge_filter {
1339      let above_len = above.len().min(above_filtered.len() - 1);
1340      let left_len = left.len().min(left_filtered.len() - 1);
1341      above_filtered[1..=above_len].clone_from_slice(&above[..above_len]);
1342      for i in 1..=left_len {
1343        left_filtered[i] = left[left.len() - i];
1344      }
1345
1346      let smooth_filter = ief_params.unwrap().use_smooth_filter();
1347
1348      if p_angle != 90 && p_angle != 180 {
1349        above_filtered[0] = top_left_edge;
1350        left_filtered[0] = top_left_edge;
1351
1352        let num_px = (
1353          width.min((max_x - output.rect().x + 1).try_into().unwrap())
1354            + if p_angle < 90 { height } else { 0 }
1355            + 1, // above
1356          height.min((max_y - output.rect().y + 1).try_into().unwrap())
1357            + if p_angle > 180 { width } else { 0 }
1358            + 1, // left
1359        );
1360
1361        let filter_strength = select_ief_strength(
1362          width,
1363          height,
1364          smooth_filter,
1365          p_angle as isize - 90,
1366        );
1367        filter_edge(num_px.0, filter_strength, above_filtered);
1368        let filter_strength = select_ief_strength(
1369          width,
1370          height,
1371          smooth_filter,
1372          p_angle as isize - 180,
1373        );
1374        filter_edge(num_px.1, filter_strength, left_filtered);
1375      }
1376
1377      let num_px = (
1378        width + if p_angle < 90 { height } else { 0 }, // above
1379        height + if p_angle > 180 { width } else { 0 }, // left
1380      );
1381
1382      upsample_above = select_ief_upsample(
1383        width,
1384        height,
1385        smooth_filter,
1386        p_angle as isize - 90,
1387      );
1388      if upsample_above {
1389        upsample_edge(num_px.0, above_filtered, bit_depth);
1390      }
1391      upsample_left = select_ief_upsample(
1392        width,
1393        height,
1394        smooth_filter,
1395        p_angle as isize - 180,
1396      );
1397      if upsample_left {
1398        upsample_edge(num_px.1, left_filtered, bit_depth);
1399      }
1400
1401      left_filtered.reverse();
1402      above_edge = above_filtered;
1403      left_edge = left_filtered;
1404    }
1405
1406    let dx = if p_angle < 90 {
1407      dr_intra_derivative(p_angle)
1408    } else if p_angle > 90 && p_angle < 180 {
1409      dr_intra_derivative(180 - p_angle)
1410    } else {
1411      0 // undefined
1412    };
1413
1414    let dy = if p_angle > 90 && p_angle < 180 {
1415      dr_intra_derivative(p_angle - 90)
1416    } else if p_angle > 180 {
1417      dr_intra_derivative(270 - p_angle)
1418    } else {
1419      0 // undefined
1420    };
1421
1422    // edge buffer index offsets applied due to the fact
1423    // that we cannot safely use negative indices in Rust
1424    let upsample_above = upsample_above as usize;
1425    let upsample_left = upsample_left as usize;
1426    let offset_above = (enable_edge_filter as usize) << upsample_above;
1427    let offset_left = (enable_edge_filter as usize) << upsample_left;
1428
1429    if p_angle < 90 {
1430      for i in 0..height {
1431        let row = &mut output[i];
1432        for j in 0..width {
1433          let idx = (i + 1) * dx;
1434          let base = (idx >> (6 - upsample_above)) + (j << upsample_above);
1435          let shift = (((idx << upsample_above) >> 1) & 31) as i32;
1436          let max_base_x = (height + width - 1) << upsample_above;
1437          let v = (if base < max_base_x {
1438            let a: i32 = above_edge[base + offset_above].into();
1439            let b: i32 = above_edge[base + 1 + offset_above].into();
1440            round_shift(a * (32 - shift) + b * shift, 5)
1441          } else {
1442            let c: i32 = above_edge[max_base_x + offset_above].into();
1443            c
1444          })
1445          .clamp(0, sample_max);
1446          row[j] = T::cast_from(v);
1447        }
1448      }
1449    } else if p_angle > 90 && p_angle < 180 {
1450      for i in 0..height {
1451        let row = &mut output[i];
1452        for j in 0..width {
1453          let idx = (j << 6) as isize - ((i + 1) * dx) as isize;
1454          let base = idx >> (6 - upsample_above);
1455          if base >= -(1 << upsample_above) {
1456            let shift = (((idx << upsample_above) >> 1) & 31) as i32;
1457            let a: i32 = if !enable_edge_filter && base < 0 {
1458              top_left_edge
1459            } else {
1460              above_edge[(base + offset_above as isize) as usize]
1461            }
1462            .into();
1463            let b: i32 =
1464              above_edge[(base + 1 + offset_above as isize) as usize].into();
1465            let v = round_shift(a * (32 - shift) + b * shift, 5)
1466              .clamp(0, sample_max);
1467            row[j] = T::cast_from(v);
1468          } else {
1469            let idx = (i << 6) as isize - ((j + 1) * dy) as isize;
1470            let base = idx >> (6 - upsample_left);
1471            let shift = (((idx << upsample_left) >> 1) & 31) as i32;
1472            let l = left_edge.len() - 1;
1473            let a: i32 = if !enable_edge_filter && base < 0 {
1474              top_left_edge
1475            } else if (base + offset_left as isize) == -2 {
1476              left_edge[0]
1477            } else {
1478              left_edge[l - (base + offset_left as isize) as usize]
1479            }
1480            .into();
1481            let b: i32 = if (base + offset_left as isize) == -2 {
1482              left_edge[1]
1483            } else {
1484              left_edge[l - (base + offset_left as isize + 1) as usize]
1485            }
1486            .into();
1487            let v = round_shift(a * (32 - shift) + b * shift, 5)
1488              .clamp(0, sample_max);
1489            row[j] = T::cast_from(v);
1490          }
1491        }
1492      }
1493    } else if p_angle > 180 {
1494      for i in 0..height {
1495        let row = &mut output[i];
1496        for j in 0..width {
1497          let idx = (j + 1) * dy;
1498          let base = (idx >> (6 - upsample_left)) + (i << upsample_left);
1499          let shift = (((idx << upsample_left) >> 1) & 31) as i32;
1500          let l = left_edge.len() - 1;
1501          let a: i32 = left_edge[l.saturating_sub(base + offset_left)].into();
1502          let b: i32 =
1503            left_edge[l.saturating_sub(base + offset_left + 1)].into();
1504          let v =
1505            round_shift(a * (32 - shift) + b * shift, 5).clamp(0, sample_max);
1506          row[j] = T::cast_from(v);
1507        }
1508      }
1509    }
1510  }
1511}
1512
1513#[cfg(test)]
1514mod test {
1515  use super::*;
1516  use crate::predict::rust::*;
1517  use crate::util::Aligned;
1518  use num_traits::*;
1519
1520  #[test]
1521  fn pred_matches_u8() {
1522    let edge_buf =
1523      Aligned::from_fn(|i| (i + 32).saturating_sub(MAX_TX_SIZE * 2).as_());
1524    let (all_left, top_left, above) = IntraEdge::mock(&edge_buf).as_slices();
1525    let left = &all_left[all_left.len() - 4..];
1526
1527    let mut output = Plane::from_slice(&[0u8; 4 * 4], 4);
1528
1529    pred_dc(&mut output.as_region_mut(), above, left, 4, 4, 8);
1530    assert_eq!(&output.data[..], [32u8; 16]);
1531
1532    pred_dc_top(&mut output.as_region_mut(), above, left, 4, 4, 8);
1533    assert_eq!(&output.data[..], [35u8; 16]);
1534
1535    pred_dc_left(&mut output.as_region_mut(), above, left, 4, 4, 8);
1536    assert_eq!(&output.data[..], [30u8; 16]);
1537
1538    pred_dc_128(&mut output.as_region_mut(), above, left, 4, 4, 8);
1539    assert_eq!(&output.data[..], [128u8; 16]);
1540
1541    pred_v(&mut output.as_region_mut(), above, 4, 4);
1542    assert_eq!(
1543      &output.data[..],
1544      [33, 34, 35, 36, 33, 34, 35, 36, 33, 34, 35, 36, 33, 34, 35, 36]
1545    );
1546
1547    pred_h(&mut output.as_region_mut(), left, 4, 4);
1548    assert_eq!(
1549      &output.data[..],
1550      [31, 31, 31, 31, 30, 30, 30, 30, 29, 29, 29, 29, 28, 28, 28, 28]
1551    );
1552
1553    pred_paeth(&mut output.as_region_mut(), above, left, top_left[0], 4, 4);
1554    assert_eq!(
1555      &output.data[..],
1556      [32, 34, 35, 36, 30, 32, 32, 36, 29, 32, 32, 32, 28, 28, 32, 32]
1557    );
1558
1559    pred_smooth(&mut output.as_region_mut(), above, left, 4, 4);
1560    assert_eq!(
1561      &output.data[..],
1562      [32, 34, 35, 35, 30, 32, 33, 34, 29, 31, 32, 32, 29, 30, 32, 32]
1563    );
1564
1565    pred_smooth_h(&mut output.as_region_mut(), above, left, 4, 4);
1566    assert_eq!(
1567      &output.data[..],
1568      [31, 33, 34, 35, 30, 33, 34, 35, 29, 32, 34, 34, 28, 31, 33, 34]
1569    );
1570
1571    pred_smooth_v(&mut output.as_region_mut(), above, left, 4, 4);
1572    assert_eq!(
1573      &output.data[..],
1574      [33, 34, 35, 36, 31, 31, 32, 33, 30, 30, 30, 31, 29, 30, 30, 30]
1575    );
1576
1577    let left = &all_left[all_left.len() - 8..];
1578    let angles = [
1579      3, 6, 9, 14, 17, 20, 23, 26, 29, 32, 36, 39, 42, 45, 48, 51, 54, 58, 61,
1580      64, 67, 70, 73, 76, 81, 84, 87,
1581    ];
1582    let expected = [
1583      [40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40],
1584      [40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40],
1585      [39, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40],
1586      [37, 38, 39, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40],
1587      [36, 37, 38, 39, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40],
1588      [36, 37, 38, 39, 39, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40],
1589      [35, 36, 37, 38, 38, 39, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40],
1590      [35, 36, 37, 38, 37, 38, 39, 40, 39, 40, 40, 40, 40, 40, 40, 40],
1591      [35, 36, 37, 38, 37, 38, 39, 40, 38, 39, 40, 40, 40, 40, 40, 40],
1592      [35, 36, 37, 38, 36, 37, 38, 39, 38, 39, 40, 40, 39, 40, 40, 40],
1593      [34, 35, 36, 37, 36, 37, 38, 39, 37, 38, 39, 40, 39, 40, 40, 40],
1594      [34, 35, 36, 37, 36, 37, 38, 39, 37, 38, 39, 40, 38, 39, 40, 40],
1595      [34, 35, 36, 37, 35, 36, 37, 38, 36, 37, 38, 39, 37, 38, 39, 40],
1596      [34, 35, 36, 37, 35, 36, 37, 38, 36, 37, 38, 39, 37, 38, 39, 40],
1597      [34, 35, 36, 37, 35, 36, 37, 38, 36, 37, 38, 39, 37, 38, 39, 40],
1598      [34, 35, 36, 37, 35, 36, 37, 38, 35, 36, 37, 38, 36, 37, 38, 39],
1599      [34, 35, 36, 37, 34, 35, 36, 37, 35, 36, 37, 38, 36, 37, 38, 39],
1600      [34, 35, 36, 37, 34, 35, 36, 37, 35, 36, 37, 38, 36, 37, 38, 39],
1601      [34, 35, 36, 37, 34, 35, 36, 37, 35, 36, 37, 38, 35, 36, 37, 38],
1602      [33, 34, 35, 36, 34, 35, 36, 37, 34, 35, 36, 37, 35, 36, 37, 38],
1603      [33, 34, 35, 36, 34, 35, 36, 37, 34, 35, 36, 37, 35, 36, 37, 38],
1604      [33, 34, 35, 36, 34, 35, 36, 37, 34, 35, 36, 37, 34, 35, 36, 37],
1605      [33, 34, 35, 36, 34, 35, 36, 37, 34, 35, 36, 37, 34, 35, 36, 37],
1606      [33, 34, 35, 36, 33, 34, 35, 36, 34, 35, 36, 37, 34, 35, 36, 37],
1607      [33, 34, 35, 36, 33, 34, 35, 36, 34, 35, 36, 37, 34, 35, 36, 37],
1608      [33, 34, 35, 36, 33, 34, 35, 36, 33, 34, 35, 36, 33, 34, 35, 36],
1609      [33, 34, 35, 36, 33, 34, 35, 36, 33, 34, 35, 36, 33, 34, 35, 36],
1610    ];
1611    for (&angle, expected) in angles.iter().zip(expected.iter()) {
1612      pred_directional(
1613        &mut output.as_region_mut(),
1614        above,
1615        left,
1616        top_left,
1617        angle,
1618        4,
1619        4,
1620        8,
1621        None,
1622      );
1623      assert_eq!(&output.data[..], expected);
1624    }
1625  }
1626
1627  #[test]
1628  fn pred_max() {
1629    let max12bit = 4096 - 1;
1630    let above = [max12bit; 32];
1631    let left = [max12bit; 32];
1632
1633    let mut o = Plane::from_slice(&vec![0u16; 32 * 32], 32);
1634
1635    pred_dc(&mut o.as_region_mut(), &above[..4], &left[..4], 4, 4, 16);
1636
1637    for l in o.data.chunks(32).take(4) {
1638      for v in l[..4].iter() {
1639        assert_eq!(*v, max12bit);
1640      }
1641    }
1642
1643    pred_h(&mut o.as_region_mut(), &left[..4], 4, 4);
1644
1645    for l in o.data.chunks(32).take(4) {
1646      for v in l[..4].iter() {
1647        assert_eq!(*v, max12bit);
1648      }
1649    }
1650
1651    pred_v(&mut o.as_region_mut(), &above[..4], 4, 4);
1652
1653    for l in o.data.chunks(32).take(4) {
1654      for v in l[..4].iter() {
1655        assert_eq!(*v, max12bit);
1656      }
1657    }
1658
1659    let above_left = max12bit;
1660
1661    pred_paeth(
1662      &mut o.as_region_mut(),
1663      &above[..4],
1664      &left[..4],
1665      above_left,
1666      4,
1667      4,
1668    );
1669
1670    for l in o.data.chunks(32).take(4) {
1671      for v in l[..4].iter() {
1672        assert_eq!(*v, max12bit);
1673      }
1674    }
1675
1676    pred_smooth(&mut o.as_region_mut(), &above[..4], &left[..4], 4, 4);
1677
1678    for l in o.data.chunks(32).take(4) {
1679      for v in l[..4].iter() {
1680        assert_eq!(*v, max12bit);
1681      }
1682    }
1683
1684    pred_smooth_h(&mut o.as_region_mut(), &above[..4], &left[..4], 4, 4);
1685
1686    for l in o.data.chunks(32).take(4) {
1687      for v in l[..4].iter() {
1688        assert_eq!(*v, max12bit);
1689      }
1690    }
1691
1692    pred_smooth_v(&mut o.as_region_mut(), &above[..4], &left[..4], 4, 4);
1693
1694    for l in o.data.chunks(32).take(4) {
1695      for v in l[..4].iter() {
1696        assert_eq!(*v, max12bit);
1697      }
1698    }
1699  }
1700}