1use crate::api::InterConfig;
11use crate::context::{
12  BlockOffset, PlaneBlockOffset, SuperBlockOffset, TileBlockOffset,
13  TileSuperBlockOffset, MAX_SB_SIZE_LOG2, MIB_SIZE_LOG2, MI_SIZE,
14  MI_SIZE_LOG2, SB_SIZE,
15};
16use crate::dist::*;
17use crate::frame::*;
18use crate::mc::MotionVector;
19use crate::partition::*;
20use crate::predict::PredictionMode;
21use crate::tiling::*;
22use crate::util::ILog;
23use crate::util::{clamp, Pixel};
24use crate::FrameInvariants;
25
26use arrayvec::*;
27use std::ops::{Index, IndexMut};
28use std::sync::{Arc, RwLock, RwLockReadGuard, RwLockWriteGuard};
29
30#[derive(Debug, Copy, Clone, Default)]
31pub struct MEStats {
32  pub mv: MotionVector,
33  pub normalized_sad: u32,
35}
36
37#[derive(Debug, Clone)]
38pub struct FrameMEStats {
39  stats: Box<[MEStats]>,
40  pub cols: usize,
41  pub rows: usize,
42}
43
44pub type RefMEStats = Arc<RwLock<[FrameMEStats; REF_FRAMES]>>;
46pub type ReadGuardMEStats<'a> =
48  RwLockReadGuard<'a, [FrameMEStats; REF_FRAMES]>;
49pub type WriteGuardMEStats<'a> =
51  RwLockWriteGuard<'a, [FrameMEStats; REF_FRAMES]>;
52
53impl FrameMEStats {
54  #[inline]
55  pub fn rows_iter(&self) -> std::slice::ChunksExact<'_, MEStats> {
56    self.stats.chunks_exact(self.cols)
57  }
58
59  pub fn new(cols: usize, rows: usize) -> Self {
60    Self {
61      stats: vec![MEStats::default(); cols * rows].into_boxed_slice(),
63      cols,
64      rows,
65    }
66  }
67  pub fn new_arc_array(cols: usize, rows: usize) -> RefMEStats {
68    Arc::new(RwLock::new([
69      FrameMEStats::new(cols, rows),
70      FrameMEStats::new(cols, rows),
71      FrameMEStats::new(cols, rows),
72      FrameMEStats::new(cols, rows),
73      FrameMEStats::new(cols, rows),
74      FrameMEStats::new(cols, rows),
75      FrameMEStats::new(cols, rows),
76      FrameMEStats::new(cols, rows),
77    ]))
78  }
79}
80
81impl Index<usize> for FrameMEStats {
82  type Output = [MEStats];
83  #[inline]
84  fn index(&self, index: usize) -> &Self::Output {
85    &self.stats[index * self.cols..(index + 1) * self.cols]
86  }
87}
88
89impl IndexMut<usize> for FrameMEStats {
90  #[inline]
91  fn index_mut(&mut self, index: usize) -> &mut Self::Output {
92    &mut self.stats[index * self.cols..(index + 1) * self.cols]
93  }
94}
95
96#[derive(Debug, Copy, Clone)]
98pub struct MotionSearchResult {
99  pub mv: MotionVector,
101  pub rd: MVCandidateRD,
103}
104
105impl MotionSearchResult {
106  #[inline(always)]
112  pub fn empty() -> MotionSearchResult {
113    MotionSearchResult {
114      mv: MotionVector::default(),
115      rd: MVCandidateRD::empty(),
116    }
117  }
118
119  #[inline(always)]
121  const fn is_empty(&self) -> bool {
122    self.rd.cost == u64::MAX
123  }
124}
125
126#[derive(Debug, Copy, Clone)]
128pub struct MVCandidateRD {
129  pub cost: u64,
131  pub sad: u32,
133}
134
135impl MVCandidateRD {
136  #[inline(always)]
142  const fn empty() -> MVCandidateRD {
143    MVCandidateRD { sad: u32::MAX, cost: u64::MAX }
144  }
145}
146
147#[derive(Debug, Copy, Clone, Eq, PartialEq)]
148pub enum MVSamplingMode {
149  INIT,
150  CORNER { right: bool, bottom: bool },
151}
152
153pub fn estimate_tile_motion<T: Pixel>(
154  fi: &FrameInvariants<T>, ts: &mut TileStateMut<'_, T>,
155  inter_cfg: &InterConfig,
156) {
157  let init_size = MIB_SIZE_LOG2;
158
159  let mut prev_ssdec: Option<u8> = None;
160  for mv_size_in_b_log2 in (2..=init_size).rev() {
161    let init = mv_size_in_b_log2 == init_size;
162
163    let ssdec = match init_size - mv_size_in_b_log2 {
165      0 => 2,
166      1 => 1,
167      _ => 0,
168    };
169
170    let new_subsampling =
171      if let Some(prev) = prev_ssdec { prev != ssdec } else { false };
172    prev_ssdec = Some(ssdec);
173
174    let lambda = (fi.me_lambda * 256.0 / (1 << (2 * ssdec)) as f64
176      * if ssdec == 0 { 0.5 } else { 0.125 }) as u32;
177
178    for sby in 0..ts.sb_height {
179      for sbx in 0..ts.sb_width {
180        let mut tested_frames_flags = 0;
181        for &ref_frame in inter_cfg.allowed_ref_frames() {
182          let frame_flag = 1 << fi.ref_frames[ref_frame.to_index()];
183          if tested_frames_flags & frame_flag == frame_flag {
184            continue;
185          }
186          tested_frames_flags |= frame_flag;
187
188          let tile_bo =
189            TileSuperBlockOffset(SuperBlockOffset { x: sbx, y: sby })
190              .block_offset(0, 0);
191
192          if new_subsampling {
193            refine_subsampled_sb_motion(
194              fi,
195              ts,
196              ref_frame,
197              mv_size_in_b_log2 + 1,
198              tile_bo,
199              ssdec,
200              lambda,
201            );
202          }
203
204          estimate_sb_motion(
205            fi,
206            ts,
207            ref_frame,
208            mv_size_in_b_log2,
209            tile_bo,
210            init,
211            ssdec,
212            lambda,
213          );
214        }
215      }
216    }
217  }
218}
219
220fn estimate_sb_motion<T: Pixel>(
221  fi: &FrameInvariants<T>, ts: &mut TileStateMut<'_, T>, ref_frame: RefType,
222  mv_size_in_b_log2: usize, tile_bo: TileBlockOffset, init: bool, ssdec: u8,
223  lambda: u32,
224) {
225  let pix_offset = tile_bo.to_luma_plane_offset();
226  let sb_h: usize = SB_SIZE.min(ts.height - pix_offset.y as usize);
227  let sb_w: usize = SB_SIZE.min(ts.width - pix_offset.x as usize);
228
229  let mv_size = MI_SIZE << mv_size_in_b_log2;
230
231  for y in (0..sb_h).step_by(mv_size) {
233    for x in (0..sb_w).step_by(mv_size) {
234      let corner: MVSamplingMode = if init {
235        MVSamplingMode::INIT
236      } else {
237        MVSamplingMode::CORNER {
240          right: x & mv_size == mv_size,
241          bottom: y & mv_size == mv_size,
242        }
243      };
244
245      let sub_bo = tile_bo
246        .with_offset(x as isize >> MI_SIZE_LOG2, y as isize >> MI_SIZE_LOG2);
247
248      let w = mv_size.min(sb_w - x + (1 << ssdec) - 1) >> ssdec;
251      let h = mv_size.min(sb_h - y + (1 << ssdec) - 1) >> ssdec;
252
253      if let Some(results) = estimate_motion(
257        fi,
258        ts,
259        w,
260        h,
261        sub_bo,
262        ref_frame,
263        None,
264        corner,
265        init,
266        ssdec,
267        Some(lambda),
268      ) {
269        let sad = (((results.rd.sad as u64) << (MAX_SB_SIZE_LOG2 * 2))
271          / (w * h) as u64) as u32;
272        save_me_stats(
273          ts,
274          mv_size_in_b_log2,
275          sub_bo,
276          ref_frame,
277          MEStats { mv: results.mv, normalized_sad: sad },
278        );
279      }
280    }
281  }
282}
283
284fn refine_subsampled_sb_motion<T: Pixel>(
285  fi: &FrameInvariants<T>, ts: &mut TileStateMut<'_, T>, ref_frame: RefType,
286  mv_size_in_b_log2: usize, tile_bo: TileBlockOffset, ssdec: u8, lambda: u32,
287) {
288  let pix_offset = tile_bo.to_luma_plane_offset();
289  let sb_h: usize = SB_SIZE.min(ts.height - pix_offset.y as usize);
290  let sb_w: usize = SB_SIZE.min(ts.width - pix_offset.x as usize);
291
292  let mv_size = MI_SIZE << mv_size_in_b_log2;
293
294  for y in (0..sb_h).step_by(mv_size) {
296    for x in (0..sb_w).step_by(mv_size) {
297      let sub_bo = tile_bo
298        .with_offset(x as isize >> MI_SIZE_LOG2, y as isize >> MI_SIZE_LOG2);
299
300      let w = mv_size.min(sb_w - x + (1 << ssdec) - 1) >> ssdec;
303      let h = mv_size.min(sb_h - y + (1 << ssdec) - 1) >> ssdec;
304
305      if let Some(results) = refine_subsampled_motion_estimate(
307        fi, ts, w, h, sub_bo, ref_frame, ssdec, lambda,
308      ) {
309        let sad = (((results.rd.sad as u64) << (MAX_SB_SIZE_LOG2 * 2))
311          / (w * h) as u64) as u32;
312        save_me_stats(
313          ts,
314          mv_size_in_b_log2,
315          sub_bo,
316          ref_frame,
317          MEStats { mv: results.mv, normalized_sad: sad },
318        );
319      }
320    }
321  }
322}
323
324fn save_me_stats<T: Pixel>(
325  ts: &mut TileStateMut<'_, T>, mv_size_in_b_log2: usize,
326  tile_bo: TileBlockOffset, ref_frame: RefType, stats: MEStats,
327) {
328  let size_in_b = 1 << mv_size_in_b_log2;
329  let tile_me_stats = &mut ts.me_stats[ref_frame.to_index()];
330  let tile_bo_x_end = (tile_bo.0.x + size_in_b).min(ts.mi_width);
331  let tile_bo_y_end = (tile_bo.0.y + size_in_b).min(ts.mi_height);
332  for mi_y in tile_bo.0.y..tile_bo_y_end {
333    for a in tile_me_stats[mi_y][tile_bo.0.x..tile_bo_x_end].iter_mut() {
334      *a = stats;
335    }
336  }
337}
338
339fn get_mv_range(
340  w_in_b: usize, h_in_b: usize, bo: PlaneBlockOffset, blk_w: usize,
341  blk_h: usize,
342) -> (isize, isize, isize, isize) {
343  let border_w = 128 + blk_w as isize * 8;
344  let border_h = 128 + blk_h as isize * 8;
345  let mvx_min = -(bo.0.x as isize) * (8 * MI_SIZE) as isize - border_w;
346  let mvx_max = ((w_in_b - bo.0.x) as isize - (blk_w / MI_SIZE) as isize)
347    * (8 * MI_SIZE) as isize
348    + border_w;
349  let mvy_min = -(bo.0.y as isize) * (8 * MI_SIZE) as isize - border_h;
350  let mvy_max = ((h_in_b - bo.0.y) as isize - (blk_h / MI_SIZE) as isize)
351    * (8 * MI_SIZE) as isize
352    + border_h;
353
354  use crate::context::{MV_LOW, MV_UPP};
356  (
357    mvx_min.max(MV_LOW as isize + 1),
358    mvx_max.min(MV_UPP as isize - 1),
359    mvy_min.max(MV_LOW as isize + 1),
360    mvy_max.min(MV_UPP as isize - 1),
361  )
362}
363
364struct MotionEstimationSubsets {
365  min_sad: u32,
366  median: Option<MotionVector>,
367  subset_b: ArrayVec<MotionVector, 5>,
368  subset_c: ArrayVec<MotionVector, 5>,
369}
370
371impl MotionEstimationSubsets {
372  fn all_mvs(&self) -> ArrayVec<MotionVector, 11> {
373    let mut all = ArrayVec::new();
374    if let Some(median) = self.median {
375      all.push(median);
376    }
377
378    all.extend(self.subset_b.iter().copied());
379    all.extend(self.subset_c.iter().copied());
380
381    all
382  }
383}
384
385#[profiling::function]
386fn get_subset_predictors(
387  tile_bo: TileBlockOffset, tile_me_stats: &TileMEStats<'_>,
388  frame_ref_opt: Option<ReadGuardMEStats<'_>>, ref_frame_id: usize,
389  pix_w: usize, pix_h: usize, mvx_min: isize, mvx_max: isize, mvy_min: isize,
390  mvy_max: isize, corner: MVSamplingMode, ssdec: u8,
391) -> MotionEstimationSubsets {
392  let mut min_sad: u32 = u32::MAX;
393  let mut subset_b = ArrayVec::<MotionVector, 5>::new();
394  let mut subset_c = ArrayVec::<MotionVector, 5>::new();
395
396  let w = ((pix_w << ssdec) + MI_SIZE - 1) >> MI_SIZE_LOG2;
398  let h = ((pix_h << ssdec) + MI_SIZE - 1) >> MI_SIZE_LOG2;
399
400  let clipped_half_w = (w >> 1).min(tile_me_stats.cols() - 1 - tile_bo.0.x);
403  let clipped_half_h = (h >> 1).min(tile_me_stats.rows() - 1 - tile_bo.0.y);
404
405  let mut process_cand = |stats: MEStats| -> MotionVector {
406    min_sad = min_sad.min(stats.normalized_sad);
407    let mv = stats.mv.quantize_to_fullpel();
408    MotionVector {
409      col: clamp(mv.col as isize, mvx_min, mvx_max) as i16,
410      row: clamp(mv.row as isize, mvy_min, mvy_max) as i16,
411    }
412  };
413
414  if tile_bo.0.x > 0 {
421    subset_b.push(process_cand(
422      tile_me_stats[tile_bo.0.y + clipped_half_h][tile_bo.0.x - 1],
423    ));
424  }
425  if tile_bo.0.y > 0 {
427    subset_b.push(process_cand(
428      tile_me_stats[tile_bo.0.y - 1][tile_bo.0.x + clipped_half_w],
429    ));
430  }
431
432  if let MVSamplingMode::CORNER { right: true, bottom: _ } = corner {
438    if tile_bo.0.x + w < tile_me_stats.cols() {
439      subset_b.push(process_cand(
440        tile_me_stats[tile_bo.0.y + clipped_half_h][tile_bo.0.x + w],
441      ));
442    }
443  }
444  if let MVSamplingMode::CORNER { right: _, bottom: true } = corner {
446    if tile_bo.0.y + h < tile_me_stats.rows() {
447      subset_b.push(process_cand(
448        tile_me_stats[tile_bo.0.y + h][tile_bo.0.x + clipped_half_w],
449      ));
450    }
451  }
452
453  let median = if corner != MVSamplingMode::INIT {
454    Some(process_cand(
456      tile_me_stats[tile_bo.0.y + clipped_half_h]
457        [tile_bo.0.x + clipped_half_w],
458    ))
459  } else if subset_b.len() != 3 {
460    None
461  } else {
462    let mut rows: ArrayVec<i16, 3> = subset_b.iter().map(|&a| a.row).collect();
463    let mut cols: ArrayVec<i16, 3> = subset_b.iter().map(|&a| a.col).collect();
464    rows.as_mut_slice().sort_unstable();
465    cols.as_mut_slice().sort_unstable();
466    Some(MotionVector { row: rows[1], col: cols[1] })
467  };
468
469  subset_b.push(MotionVector::default());
471
472  if let Some(frame_me_stats) = frame_ref_opt {
478    let prev_frame = &frame_me_stats[ref_frame_id];
479
480    let frame_bo = PlaneBlockOffset(BlockOffset {
481      x: tile_me_stats.x() + tile_bo.0.x,
482      y: tile_me_stats.y() + tile_bo.0.y,
483    });
484    let clipped_half_w = (w >> 1).min(prev_frame.cols - 1 - frame_bo.0.x);
485    let clipped_half_h = (h >> 1).min(prev_frame.rows - 1 - frame_bo.0.y);
486
487    if frame_bo.0.x > 0 {
489      subset_c.push(process_cand(
490        prev_frame[frame_bo.0.y + clipped_half_h][frame_bo.0.x - 1],
491      ));
492    }
493    if frame_bo.0.y > 0 {
495      subset_c.push(process_cand(
496        prev_frame[frame_bo.0.y - 1][frame_bo.0.x + clipped_half_w],
497      ));
498    }
499    if frame_bo.0.x + w < prev_frame.cols {
501      subset_c.push(process_cand(
502        prev_frame[frame_bo.0.y + clipped_half_h][frame_bo.0.x + w],
503      ));
504    }
505    if frame_bo.0.y + h < prev_frame.rows {
507      subset_c.push(process_cand(
508        prev_frame[frame_bo.0.y + h][frame_bo.0.x + clipped_half_w],
509      ));
510    }
511
512    subset_c.push(process_cand(
513      prev_frame[frame_bo.0.y + clipped_half_h][frame_bo.0.x + clipped_half_w],
514    ));
515  }
516
517  let min_sad = ((min_sad as u64 * (pix_w * pix_h) as u64)
519    >> (MAX_SB_SIZE_LOG2 * 2)) as u32;
520
521  let dec_mv = |mv: MotionVector| MotionVector {
522    col: mv.col >> ssdec,
523    row: mv.row >> ssdec,
524  };
525  let median = median.map(dec_mv);
526  for mv in subset_b.iter_mut() {
527    *mv = dec_mv(*mv);
528  }
529  for mv in subset_c.iter_mut() {
530    *mv = dec_mv(*mv);
531  }
532
533  MotionEstimationSubsets { min_sad, median, subset_b, subset_c }
534}
535
536pub fn estimate_motion<T: Pixel>(
537  fi: &FrameInvariants<T>, ts: &TileStateMut<'_, T>, w: usize, h: usize,
538  tile_bo: TileBlockOffset, ref_frame: RefType,
539  pmv: Option<[MotionVector; 2]>, corner: MVSamplingMode,
540  extensive_search: bool, ssdec: u8, lambda: Option<u32>,
541) -> Option<MotionSearchResult> {
542  if let Some(ref rec) =
543    fi.rec_buffer.frames[fi.ref_frames[ref_frame.to_index()] as usize]
544  {
545    let frame_bo = ts.to_frame_block_offset(tile_bo);
546    let (mvx_min, mvx_max, mvy_min, mvy_max) =
547      get_mv_range(fi.w_in_b, fi.h_in_b, frame_bo, w << ssdec, h << ssdec);
548
549    let lambda = lambda.unwrap_or({
550      (fi.me_lambda * 256.0 * 0.5) as u32
552    });
553
554    let global_mv = [MotionVector { row: 0, col: 0 }; 2];
555
556    let po = frame_bo.to_luma_plane_offset();
557    let (mvx_min, mvx_max, mvy_min, mvy_max) =
558      (mvx_min >> ssdec, mvx_max >> ssdec, mvy_min >> ssdec, mvy_max >> ssdec);
559    let po = PlaneOffset { x: po.x >> ssdec, y: po.y >> ssdec };
560    let p_ref = match ssdec {
561      0 => &rec.frame.planes[0],
562      1 => &rec.input_hres,
563      2 => &rec.input_qres,
564      _ => unimplemented!(),
565    };
566
567    let org_region = &match ssdec {
568      0 => ts.input_tile.planes[0]
569        .subregion(Area::BlockStartingAt { bo: tile_bo.0 }),
570      1 => ts.input_hres.region(Area::StartingAt { x: po.x, y: po.y }),
571      2 => ts.input_qres.region(Area::StartingAt { x: po.x, y: po.y }),
572      _ => unimplemented!(),
573    };
574
575    let mut best: MotionSearchResult = full_pixel_me(
576      fi,
577      ts,
578      org_region,
579      p_ref,
580      tile_bo,
581      po,
582      lambda,
583      pmv.unwrap_or(global_mv),
584      w,
585      h,
586      mvx_min,
587      mvx_max,
588      mvy_min,
589      mvy_max,
590      ref_frame,
591      corner,
592      extensive_search,
593      ssdec,
594    );
595
596    if let Some(pmv) = pmv {
597      let use_satd: bool = fi.config.speed_settings.motion.use_satd_subpel;
598      if use_satd {
599        best.rd = get_fullpel_mv_rd(
600          fi,
601          po,
602          org_region,
603          p_ref,
604          fi.sequence.bit_depth,
605          pmv,
606          lambda,
607          use_satd,
608          mvx_min,
609          mvx_max,
610          mvy_min,
611          mvy_max,
612          w,
613          h,
614          best.mv,
615        );
616      }
617
618      sub_pixel_me(
619        fi, po, org_region, p_ref, lambda, pmv, mvx_min, mvx_max, mvy_min,
620        mvy_max, w, h, use_satd, &mut best, ref_frame,
621      );
622    }
623
624    best.mv = best.mv << ssdec;
626
627    Some(best)
628  } else {
629    None
630  }
631}
632
633fn refine_subsampled_motion_estimate<T: Pixel>(
635  fi: &FrameInvariants<T>, ts: &TileStateMut<'_, T>, w: usize, h: usize,
636  tile_bo: TileBlockOffset, ref_frame: RefType, ssdec: u8, lambda: u32,
637) -> Option<MotionSearchResult> {
638  if let Some(ref rec) =
639    fi.rec_buffer.frames[fi.ref_frames[ref_frame.to_index()] as usize]
640  {
641    let frame_bo = ts.to_frame_block_offset(tile_bo);
642    let (mvx_min, mvx_max, mvy_min, mvy_max) =
643      get_mv_range(fi.w_in_b, fi.h_in_b, frame_bo, w << ssdec, h << ssdec);
644
645    let pmv = [MotionVector { row: 0, col: 0 }; 2];
646
647    let po = frame_bo.to_luma_plane_offset();
648    let (mvx_min, mvx_max, mvy_min, mvy_max) =
649      (mvx_min >> ssdec, mvx_max >> ssdec, mvy_min >> ssdec, mvy_max >> ssdec);
650    let po = PlaneOffset { x: po.x >> ssdec, y: po.y >> ssdec };
651    let p_ref = match ssdec {
652      0 => &rec.frame.planes[0],
653      1 => &rec.input_hres,
654      2 => &rec.input_qres,
655      _ => unimplemented!(),
656    };
657
658    let org_region = &match ssdec {
659      0 => ts.input_tile.planes[0]
660        .subregion(Area::BlockStartingAt { bo: tile_bo.0 }),
661      1 => ts.input_hres.region(Area::StartingAt { x: po.x, y: po.y }),
662      2 => ts.input_qres.region(Area::StartingAt { x: po.x, y: po.y }),
663      _ => unimplemented!(),
664    };
665
666    let mv =
667      ts.me_stats[ref_frame.to_index()][tile_bo.0.y][tile_bo.0.x].mv >> ssdec;
668
669    let x_lo = po.x + (mv.col as isize / 8 - 1).max(mvx_min / 8);
676    let x_hi = po.x + (mv.col as isize / 8 + 2).min(mvx_max / 8);
677    let y_lo = po.y + (mv.row as isize / 8 - 1).max(mvy_min / 8);
678    let y_hi = po.y + (mv.row as isize / 8 + 2).min(mvy_max / 8);
679    let mut results = full_search(
680      fi, x_lo, x_hi, y_lo, y_hi, w, h, org_region, p_ref, po, 1, lambda, pmv,
681    );
682
683    results.mv = results.mv << ssdec;
685
686    Some(results)
687  } else {
688    None
689  }
690}
691
692#[profiling::function]
693fn full_pixel_me<T: Pixel>(
694  fi: &FrameInvariants<T>, ts: &TileStateMut<'_, T>,
695  org_region: &PlaneRegion<T>, p_ref: &Plane<T>, tile_bo: TileBlockOffset,
696  po: PlaneOffset, lambda: u32, pmv: [MotionVector; 2], w: usize, h: usize,
697  mvx_min: isize, mvx_max: isize, mvy_min: isize, mvy_max: isize,
698  ref_frame: RefType, corner: MVSamplingMode, extensive_search: bool,
699  ssdec: u8,
700) -> MotionSearchResult {
701  let ref_frame_id = ref_frame.to_index();
702  let tile_me_stats = &ts.me_stats[ref_frame_id].as_const();
703  let frame_ref = fi.rec_buffer.frames[fi.ref_frames[0] as usize]
704    .as_ref()
705    .map(|frame_ref| frame_ref.frame_me_stats.read().expect("poisoned lock"));
706  let subsets = get_subset_predictors(
707    tile_bo,
708    tile_me_stats,
709    frame_ref,
710    ref_frame_id,
711    w,
712    h,
713    mvx_min,
714    mvx_max,
715    mvy_min,
716    mvy_max,
717    corner,
718    ssdec,
719  );
720
721  let try_cands = |predictors: &[MotionVector],
722                   best: &mut MotionSearchResult| {
723    let mut results = get_best_predictor(
724      fi,
725      po,
726      org_region,
727      p_ref,
728      predictors,
729      fi.sequence.bit_depth,
730      pmv,
731      lambda,
732      mvx_min,
733      mvx_max,
734      mvy_min,
735      mvy_max,
736      w,
737      h,
738    );
739    fullpel_diamond_search(
740      fi,
741      po,
742      org_region,
743      p_ref,
744      &mut results,
745      fi.sequence.bit_depth,
746      pmv,
747      lambda,
748      mvx_min,
749      mvx_max,
750      mvy_min,
751      mvy_max,
752      w,
753      h,
754    );
755
756    if results.rd.cost < best.rd.cost {
757      *best = results;
758    }
759  };
760
761  let mut best: MotionSearchResult = MotionSearchResult::empty();
762  if !extensive_search {
763    try_cands(&subsets.all_mvs(), &mut best);
764    best
765  } else {
766    let thresh = (subsets.min_sad as f32 * 1.2) as u32
772      + (((w * h) as u32) << (fi.sequence.bit_depth - 8));
773
774    if let Some(median) = subsets.median {
775      try_cands(&[median], &mut best);
776
777      if best.rd.sad < thresh {
778        return best;
779      }
780    }
781
782    try_cands(&subsets.subset_b, &mut best);
783
784    if best.rd.sad < thresh {
785      return best;
786    }
787
788    try_cands(&subsets.subset_c, &mut best);
789
790    if best.rd.sad < thresh {
791      return best;
792    }
793
794    uneven_multi_hex_search(
797      fi,
798      po,
799      org_region,
800      p_ref,
801      &mut best,
802      fi.sequence.bit_depth,
803      pmv,
804      lambda,
805      mvx_min,
806      mvx_max,
807      mvy_min,
808      mvy_max,
809      w,
810      h,
811      24,
813    );
814
815    if !fi.config.speed_settings.motion.me_allow_full_search
816      || best.rd.sad < thresh
817    {
818      return best;
819    }
820
821    {
822      let range_x = (192 * fi.me_range_scale as isize) >> ssdec;
823      let range_y = (64 * fi.me_range_scale as isize) >> ssdec;
824      let x_lo = po.x + (-range_x).max(mvx_min / 8);
825      let x_hi = po.x + (range_x).min(mvx_max / 8);
826      let y_lo = po.y + (-range_y).max(mvy_min / 8);
827      let y_hi = po.y + (range_y).min(mvy_max / 8);
828
829      let results = full_search(
830        fi,
831        x_lo,
832        x_hi,
833        y_lo,
834        y_hi,
835        w,
836        h,
837        org_region,
838        p_ref,
839        po,
840        4 >> ssdec,
844        lambda,
845        [MotionVector::default(); 2],
846      );
847
848      if results.rd.cost < best.rd.cost {
849        results
850      } else {
851        best
852      }
853    }
854  }
855}
856
857fn sub_pixel_me<T: Pixel>(
858  fi: &FrameInvariants<T>, po: PlaneOffset, org_region: &PlaneRegion<T>,
859  p_ref: &Plane<T>, lambda: u32, pmv: [MotionVector; 2], mvx_min: isize,
860  mvx_max: isize, mvy_min: isize, mvy_max: isize, w: usize, h: usize,
861  use_satd: bool, best: &mut MotionSearchResult, ref_frame: RefType,
862) {
863  subpel_diamond_search(
864    fi,
865    po,
866    org_region,
867    p_ref,
868    fi.sequence.bit_depth,
869    pmv,
870    lambda,
871    mvx_min,
872    mvx_max,
873    mvy_min,
874    mvy_max,
875    w,
876    h,
877    use_satd,
878    best,
879    ref_frame,
880  );
881}
882
883#[profiling::function]
884fn get_best_predictor<T: Pixel>(
885  fi: &FrameInvariants<T>, po: PlaneOffset, org_region: &PlaneRegion<T>,
886  p_ref: &Plane<T>, predictors: &[MotionVector], bit_depth: usize,
887  pmv: [MotionVector; 2], lambda: u32, mvx_min: isize, mvx_max: isize,
888  mvy_min: isize, mvy_max: isize, w: usize, h: usize,
889) -> MotionSearchResult {
890  let mut best: MotionSearchResult = MotionSearchResult::empty();
891
892  for &init_mv in predictors.iter() {
893    let rd = get_fullpel_mv_rd(
894      fi, po, org_region, p_ref, bit_depth, pmv, lambda, false, mvx_min,
895      mvx_max, mvy_min, mvy_max, w, h, init_mv,
896    );
897
898    if rd.cost < best.rd.cost {
899      best.mv = init_mv;
900      best.rd = rd;
901    }
902  }
903
904  best
905}
906
907macro_rules! search_pattern {
911    ($field_a:ident: [$($ll_a:expr),*], $field_b:ident: [$($ll_b:expr),*]) => {
912      [ $(MotionVector { $field_a: $ll_a << 3, $field_b: $ll_b << 3 } ),*]
913    };
914}
915
916macro_rules! search_pattern_subpel {
918    ($field_a:ident: [$($ll_a:expr),*], $field_b:ident: [$($ll_b:expr),*]) => {
919      [ $(MotionVector { $field_a: $ll_a, $field_b: $ll_b } ),*]
920    };
921}
922
923const DIAMOND_R1_PATTERN_SUBPEL: [MotionVector; 4] = search_pattern_subpel!(
933  col: [  0,  1,  0, -1],
934  row: [  1,  0, -1,  0]
935);
936const DIAMOND_R1_PATTERN: [MotionVector; 4] = search_pattern!(
945  col: [  0,  1,  0, -1],
946  row: [  1,  0, -1,  0]
947);
948
949#[profiling::function]
955fn fullpel_diamond_search<T: Pixel>(
956  fi: &FrameInvariants<T>, po: PlaneOffset, org_region: &PlaneRegion<T>,
957  p_ref: &Plane<T>, current: &mut MotionSearchResult, bit_depth: usize,
958  pmv: [MotionVector; 2], lambda: u32, mvx_min: isize, mvx_max: isize,
959  mvy_min: isize, mvy_max: isize, w: usize, h: usize,
960) {
961  let (mut diamond_radius_log2, diamond_radius_end_log2) = (1u8, 0u8);
963
964  loop {
965    let mut best_cand: MotionSearchResult = MotionSearchResult::empty();
967    for &offset in &DIAMOND_R1_PATTERN {
968      let cand_mv = current.mv + (offset << diamond_radius_log2);
969      let rd = get_fullpel_mv_rd(
970        fi, po, org_region, p_ref, bit_depth, pmv, lambda, false, mvx_min,
971        mvx_max, mvy_min, mvy_max, w, h, cand_mv,
972      );
973
974      if rd.cost < best_cand.rd.cost {
975        best_cand.mv = cand_mv;
976        best_cand.rd = rd;
977      }
978    }
979
980    if current.rd.cost <= best_cand.rd.cost {
983      if diamond_radius_log2 == diamond_radius_end_log2 {
984        break;
985      } else {
986        diamond_radius_log2 -= 1;
987      }
988    } else {
989      *current = best_cand;
990    }
991  }
992
993  assert!(!current.is_empty());
994}
995
996const HEXAGON_PATTERN: [MotionVector; 6] = search_pattern!(
1023  col: [  0,  2,  2,  0, -2, -2],
1024  row: [ -2, -1,  1,  2,  1, -1]
1025);
1026
1027const SQUARE_REFINE_PATTERN: [MotionVector; 8] = search_pattern!(
1036  col: [ -1,  0,  1, -1,  1, -1,  0,  1],
1037  row: [  1,  1,  1,  0,  0, -1, -1, -1]
1038);
1039
1040#[profiling::function]
1055fn hexagon_search<T: Pixel>(
1056  fi: &FrameInvariants<T>, po: PlaneOffset, org_region: &PlaneRegion<T>,
1057  p_ref: &Plane<T>, current: &mut MotionSearchResult, bit_depth: usize,
1058  pmv: [MotionVector; 2], lambda: u32, mvx_min: isize, mvx_max: isize,
1059  mvy_min: isize, mvy_max: isize, w: usize, h: usize,
1060) {
1061  let mut best_cand_idx: usize = 0;
1068  let mut best_cand: MotionSearchResult = MotionSearchResult::empty();
1069
1070  for i in 0..6 {
1072    let cand_mv = current.mv + HEXAGON_PATTERN[i];
1073    let rd = get_fullpel_mv_rd(
1074      fi, po, org_region, p_ref, bit_depth, pmv, lambda, false, mvx_min,
1075      mvx_max, mvy_min, mvy_max, w, h, cand_mv,
1076    );
1077
1078    if rd.cost < best_cand.rd.cost {
1079      best_cand_idx = i;
1080      best_cand.mv = cand_mv;
1081      best_cand.rd = rd;
1082    }
1083  }
1084
1085  while best_cand.rd.cost < current.rd.cost {
1088    *current = best_cand;
1090    best_cand = MotionSearchResult::empty();
1091
1092    let center_cand_idx = best_cand_idx;
1095
1096    for idx_offset_mod6 in 5..=7 {
1102      let i = (center_cand_idx + idx_offset_mod6) % 6;
1103      let cand_mv = current.mv + HEXAGON_PATTERN[i];
1104
1105      let rd = get_fullpel_mv_rd(
1106        fi, po, org_region, p_ref, bit_depth, pmv, lambda, false, mvx_min,
1107        mvx_max, mvy_min, mvy_max, w, h, cand_mv,
1108      );
1109
1110      if rd.cost < best_cand.rd.cost {
1111        best_cand_idx = i;
1112        best_cand.mv = cand_mv;
1113        best_cand.rd = rd;
1114      }
1115    }
1116  }
1117
1118  let mut best_cand: MotionSearchResult = MotionSearchResult::empty();
1120  for &offset in &SQUARE_REFINE_PATTERN {
1121    let cand_mv = current.mv + offset;
1122    let rd = get_fullpel_mv_rd(
1123      fi, po, org_region, p_ref, bit_depth, pmv, lambda, false, mvx_min,
1124      mvx_max, mvy_min, mvy_max, w, h, cand_mv,
1125    );
1126
1127    if rd.cost < best_cand.rd.cost {
1128      best_cand.mv = cand_mv;
1129      best_cand.rd = rd;
1130    }
1131  }
1132  if best_cand.rd.cost < current.rd.cost {
1133    *current = best_cand;
1134  }
1135
1136  assert!(!current.is_empty());
1137}
1138
1139const UMH_PATTERN: [MotionVector; 16] = search_pattern!(
1154  col: [ -2, -1,  0,  1,  2,  3,  4,  3,  2,  1,  0, -1, -2,  3, -4, -3],
1155  row: [  4,  4,  4,  4,  4,  2,  0, -2, -4, -4, -4, -4, -4, -2,  0,  2]
1156);
1157
1158#[profiling::function]
1170fn uneven_multi_hex_search<T: Pixel>(
1171  fi: &FrameInvariants<T>, po: PlaneOffset, org_region: &PlaneRegion<T>,
1172  p_ref: &Plane<T>, current: &mut MotionSearchResult, bit_depth: usize,
1173  pmv: [MotionVector; 2], lambda: u32, mvx_min: isize, mvx_max: isize,
1174  mvy_min: isize, mvy_max: isize, w: usize, h: usize, me_range: i16,
1175) {
1176  assert!(!current.is_empty());
1177
1178  let center = current.mv;
1193
1194  for i in (1..=me_range).step_by(2) {
1196    const HORIZONTAL_LINE: [MotionVector; 2] = search_pattern!(
1197      col: [ 0, 0],
1198      row: [-1, 1]
1199    );
1200
1201    for &offset in &HORIZONTAL_LINE {
1202      let cand_mv = center + offset * i;
1203      let rd = get_fullpel_mv_rd(
1204        fi, po, org_region, p_ref, bit_depth, pmv, lambda, false, mvx_min,
1205        mvx_max, mvy_min, mvy_max, w, h, cand_mv,
1206      );
1207
1208      if rd.cost < current.rd.cost {
1209        current.mv = cand_mv;
1210        current.rd = rd;
1211      }
1212    }
1213  }
1214
1215  for i in (1..=me_range >> 1).step_by(2) {
1217    const VERTICAL_LINE: [MotionVector; 2] = search_pattern!(
1218      col: [-1, 1],
1219      row: [ 0, 0]
1220    );
1221
1222    for &offset in &VERTICAL_LINE {
1223      let cand_mv = center + offset * i;
1224      let rd = get_fullpel_mv_rd(
1225        fi, po, org_region, p_ref, bit_depth, pmv, lambda, false, mvx_min,
1226        mvx_max, mvy_min, mvy_max, w, h, cand_mv,
1227      );
1228
1229      if rd.cost < current.rd.cost {
1230        current.mv = cand_mv;
1231        current.rd = rd;
1232      }
1233    }
1234  }
1235
1236  let center = current.mv;
1238  for row in -2..=2 {
1239    for col in -2..=2 {
1240      if row == 0 && col == 0 {
1241        continue;
1242      }
1243      let cand_mv = center + MotionVector { row, col };
1244      let rd = get_fullpel_mv_rd(
1245        fi, po, org_region, p_ref, bit_depth, pmv, lambda, false, mvx_min,
1246        mvx_max, mvy_min, mvy_max, w, h, cand_mv,
1247      );
1248
1249      if rd.cost < current.rd.cost {
1250        current.mv = cand_mv;
1251        current.rd = rd;
1252      }
1253    }
1254  }
1255
1256  let center = current.mv;
1280
1281  let iterations = me_range >> 2;
1283  for i in 1..=iterations {
1284    for &offset in &UMH_PATTERN {
1285      let cand_mv = center + offset * i;
1286      let rd = get_fullpel_mv_rd(
1287        fi, po, org_region, p_ref, bit_depth, pmv, lambda, false, mvx_min,
1288        mvx_max, mvy_min, mvy_max, w, h, cand_mv,
1289      );
1290
1291      if rd.cost < current.rd.cost {
1292        current.mv = cand_mv;
1293        current.rd = rd;
1294      }
1295    }
1296  }
1297
1298  hexagon_search(
1300    fi, po, org_region, p_ref, current, bit_depth, pmv, lambda, mvx_min,
1301    mvx_max, mvy_min, mvy_max, w, h,
1302  );
1303}
1304
1305#[profiling::function]
1311fn subpel_diamond_search<T: Pixel>(
1312  fi: &FrameInvariants<T>, po: PlaneOffset, org_region: &PlaneRegion<T>,
1313  _p_ref: &Plane<T>, bit_depth: usize, pmv: [MotionVector; 2], lambda: u32,
1314  mvx_min: isize, mvx_max: isize, mvy_min: isize, mvy_max: isize, w: usize,
1315  h: usize, use_satd: bool, current: &mut MotionSearchResult,
1316  ref_frame: RefType,
1317) {
1318  use crate::util::Aligned;
1319
1320  let mc_w = w.next_power_of_two();
1322  let mc_h = (h + 1) & !1;
1323
1324  let cfg = PlaneConfig::new(mc_w, mc_h, 0, 0, 0, 0, std::mem::size_of::<T>());
1326  let mut buf: Aligned<[T; 128 * 128]> = unsafe { Aligned::uninitialized() };
1329  let mut tmp_region = PlaneRegionMut::from_slice(
1330    &mut buf.data,
1331    &cfg,
1332    Rect { x: 0, y: 0, width: cfg.width, height: cfg.height },
1333  );
1334
1335  let (mut diamond_radius_log2, diamond_radius_end_log2) =
1337    (2u8, u8::from(!fi.allow_high_precision_mv));
1338
1339  loop {
1340    let mut best_cand: MotionSearchResult = MotionSearchResult::empty();
1342    for &offset in &DIAMOND_R1_PATTERN_SUBPEL {
1343      let cand_mv = current.mv + (offset << diamond_radius_log2);
1344
1345      let rd = get_subpel_mv_rd(
1346        fi,
1347        po,
1348        org_region,
1349        bit_depth,
1350        pmv,
1351        lambda,
1352        use_satd,
1353        mvx_min,
1354        mvx_max,
1355        mvy_min,
1356        mvy_max,
1357        w,
1358        h,
1359        cand_mv,
1360        &mut tmp_region,
1361        ref_frame,
1362      );
1363
1364      if rd.cost < best_cand.rd.cost {
1365        best_cand.mv = cand_mv;
1366        best_cand.rd = rd;
1367      }
1368    }
1369
1370    if current.rd.cost <= best_cand.rd.cost {
1372      if diamond_radius_log2 == diamond_radius_end_log2 {
1373        break;
1374      } else {
1375        diamond_radius_log2 -= 1;
1376      }
1377    } else {
1378      *current = best_cand;
1379    }
1380  }
1381
1382  assert!(!current.is_empty());
1383}
1384
1385#[inline]
1386fn get_fullpel_mv_rd<T: Pixel>(
1387  fi: &FrameInvariants<T>, po: PlaneOffset, org_region: &PlaneRegion<T>,
1388  p_ref: &Plane<T>, bit_depth: usize, pmv: [MotionVector; 2], lambda: u32,
1389  use_satd: bool, mvx_min: isize, mvx_max: isize, mvy_min: isize,
1390  mvy_max: isize, w: usize, h: usize, cand_mv: MotionVector,
1391) -> MVCandidateRD {
1392  if (cand_mv.col as isize) < mvx_min
1393    || (cand_mv.col as isize) > mvx_max
1394    || (cand_mv.row as isize) < mvy_min
1395    || (cand_mv.row as isize) > mvy_max
1396  {
1397    return MVCandidateRD::empty();
1398  }
1399
1400  let plane_ref = p_ref.region(Area::StartingAt {
1402    x: po.x + (cand_mv.col / 8) as isize,
1403    y: po.y + (cand_mv.row / 8) as isize,
1404  });
1405  compute_mv_rd(
1406    fi, pmv, lambda, use_satd, bit_depth, w, h, cand_mv, org_region,
1407    &plane_ref,
1408  )
1409}
1410
1411fn get_subpel_mv_rd<T: Pixel>(
1412  fi: &FrameInvariants<T>, po: PlaneOffset, org_region: &PlaneRegion<T>,
1413  bit_depth: usize, pmv: [MotionVector; 2], lambda: u32, use_satd: bool,
1414  mvx_min: isize, mvx_max: isize, mvy_min: isize, mvy_max: isize, w: usize,
1415  h: usize, cand_mv: MotionVector, tmp_region: &mut PlaneRegionMut<T>,
1416  ref_frame: RefType,
1417) -> MVCandidateRD {
1418  if (cand_mv.col as isize) < mvx_min
1419    || (cand_mv.col as isize) > mvx_max
1420    || (cand_mv.row as isize) < mvy_min
1421    || (cand_mv.row as isize) > mvy_max
1422  {
1423    return MVCandidateRD::empty();
1424  }
1425
1426  let tmp_width = tmp_region.rect().width;
1427  let tmp_height = tmp_region.rect().height;
1428  let tile_rect =
1429    TileRect { x: 0, y: 0, width: tmp_width, height: tmp_height };
1430
1431  PredictionMode::NEWMV.predict_inter_single(
1432    fi, tile_rect, 0, po, tmp_region,
1433    tmp_width, tmp_height, ref_frame, cand_mv,
1435  );
1436  let plane_ref = tmp_region.as_const();
1437  compute_mv_rd(
1438    fi, pmv, lambda, use_satd, bit_depth, w, h, cand_mv, org_region,
1439    &plane_ref,
1440  )
1441}
1442
1443#[inline(always)]
1445fn compute_mv_rd<T: Pixel>(
1446  fi: &FrameInvariants<T>, pmv: [MotionVector; 2], lambda: u32,
1447  use_satd: bool, bit_depth: usize, w: usize, h: usize, cand_mv: MotionVector,
1448  plane_org: &PlaneRegion<'_, T>, plane_ref: &PlaneRegion<'_, T>,
1449) -> MVCandidateRD {
1450  let sad = if use_satd {
1451    get_satd(plane_org, plane_ref, w, h, bit_depth, fi.cpu_feature_level)
1452  } else {
1453    get_sad(plane_org, plane_ref, w, h, bit_depth, fi.cpu_feature_level)
1454  };
1455
1456  let rate1 = get_mv_rate(cand_mv, pmv[0], fi.allow_high_precision_mv);
1457  let rate2 = get_mv_rate(cand_mv, pmv[1], fi.allow_high_precision_mv);
1458  let rate = rate1.min(rate2 + 1);
1459
1460  MVCandidateRD { cost: 256 * sad as u64 + rate as u64 * lambda as u64, sad }
1461}
1462
1463#[profiling::function]
1464fn full_search<T: Pixel>(
1465  fi: &FrameInvariants<T>, x_lo: isize, x_hi: isize, y_lo: isize, y_hi: isize,
1466  w: usize, h: usize, org_region: &PlaneRegion<T>, p_ref: &Plane<T>,
1467  po: PlaneOffset, step: usize, lambda: u32, pmv: [MotionVector; 2],
1468) -> MotionSearchResult {
1469  let search_region = p_ref.region(Area::Rect {
1470    x: x_lo,
1471    y: y_lo,
1472    width: (x_hi - x_lo) as usize + w,
1473    height: (y_hi - y_lo) as usize + h,
1474  });
1475
1476  let mut best: MotionSearchResult = MotionSearchResult::empty();
1477
1478  for vert_window in search_region.vert_windows(h).step_by(step) {
1480    for ref_window in vert_window.horz_windows(w).step_by(step) {
1481      let &Rect { x, y, .. } = ref_window.rect();
1482
1483      let mv = MotionVector {
1484        row: 8 * (y as i16 - po.y as i16),
1485        col: 8 * (x as i16 - po.x as i16),
1486      };
1487
1488      let rd = compute_mv_rd(
1489        fi,
1490        pmv,
1491        lambda,
1492        false,
1493        fi.sequence.bit_depth,
1494        w,
1495        h,
1496        mv,
1497        org_region,
1498        &ref_window,
1499      );
1500
1501      if rd.cost < best.rd.cost {
1502        best.rd = rd;
1503        best.mv = mv;
1504      }
1505    }
1506  }
1507
1508  best
1509}
1510
1511#[inline(always)]
1512fn get_mv_rate(
1513  a: MotionVector, b: MotionVector, allow_high_precision_mv: bool,
1514) -> u32 {
1515  #[inline(always)]
1516  fn diff_to_rate(diff: i16, allow_high_precision_mv: bool) -> u32 {
1517    let d = if allow_high_precision_mv { diff } else { diff >> 1 };
1518    2 * ILog::ilog(d.abs()) as u32
1519  }
1520
1521  diff_to_rate(a.row - b.row, allow_high_precision_mv)
1522    + diff_to_rate(a.col - b.col, allow_high_precision_mv)
1523}