rav1e/
me.rs

1// Copyright (c) 2017-2022, The rav1e contributors. All rights reserved
2//
3// This source code is subject to the terms of the BSD 2 Clause License and
4// the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
5// was not distributed with this source code in the LICENSE file, you can
6// obtain it at www.aomedia.org/license/software. If the Alliance for Open
7// Media Patent License 1.0 was not distributed with this source code in the
8// PATENTS file, you can obtain it at www.aomedia.org/license/patent.
9
10use crate::api::InterConfig;
11use crate::context::{
12  BlockOffset, PlaneBlockOffset, SuperBlockOffset, TileBlockOffset,
13  TileSuperBlockOffset, MAX_SB_SIZE_LOG2, MIB_SIZE_LOG2, MI_SIZE,
14  MI_SIZE_LOG2, SB_SIZE,
15};
16use crate::dist::*;
17use crate::frame::*;
18use crate::mc::MotionVector;
19use crate::partition::*;
20use crate::predict::PredictionMode;
21use crate::tiling::*;
22use crate::util::ILog;
23use crate::util::{clamp, Pixel};
24use crate::FrameInvariants;
25
26use arrayvec::*;
27use std::ops::{Index, IndexMut};
28use std::sync::{Arc, RwLock, RwLockReadGuard, RwLockWriteGuard};
29
30#[derive(Debug, Copy, Clone, Default)]
31pub struct MEStats {
32  pub mv: MotionVector,
33  /// sad value, on the scale of a 128x128 block
34  pub normalized_sad: u32,
35}
36
37#[derive(Debug, Clone)]
38pub struct FrameMEStats {
39  stats: Box<[MEStats]>,
40  pub cols: usize,
41  pub rows: usize,
42}
43
44/// cbindgen:ignore
45pub type RefMEStats = Arc<RwLock<[FrameMEStats; REF_FRAMES]>>;
46/// cbindgen:ignore
47pub type ReadGuardMEStats<'a> =
48  RwLockReadGuard<'a, [FrameMEStats; REF_FRAMES]>;
49/// cbindgen:ignore
50pub type WriteGuardMEStats<'a> =
51  RwLockWriteGuard<'a, [FrameMEStats; REF_FRAMES]>;
52
53impl FrameMEStats {
54  #[inline]
55  pub fn rows_iter(&self) -> std::slice::ChunksExact<'_, MEStats> {
56    self.stats.chunks_exact(self.cols)
57  }
58
59  pub fn new(cols: usize, rows: usize) -> Self {
60    Self {
61      // dynamic allocation: once per frame
62      stats: vec![MEStats::default(); cols * rows].into_boxed_slice(),
63      cols,
64      rows,
65    }
66  }
67  pub fn new_arc_array(cols: usize, rows: usize) -> RefMEStats {
68    Arc::new(RwLock::new([
69      FrameMEStats::new(cols, rows),
70      FrameMEStats::new(cols, rows),
71      FrameMEStats::new(cols, rows),
72      FrameMEStats::new(cols, rows),
73      FrameMEStats::new(cols, rows),
74      FrameMEStats::new(cols, rows),
75      FrameMEStats::new(cols, rows),
76      FrameMEStats::new(cols, rows),
77    ]))
78  }
79}
80
81impl Index<usize> for FrameMEStats {
82  type Output = [MEStats];
83  #[inline]
84  fn index(&self, index: usize) -> &Self::Output {
85    &self.stats[index * self.cols..(index + 1) * self.cols]
86  }
87}
88
89impl IndexMut<usize> for FrameMEStats {
90  #[inline]
91  fn index_mut(&mut self, index: usize) -> &mut Self::Output {
92    &mut self.stats[index * self.cols..(index + 1) * self.cols]
93  }
94}
95
96/// Result of motion search.
97#[derive(Debug, Copy, Clone)]
98pub struct MotionSearchResult {
99  /// Motion vector chosen by the motion search.
100  pub mv: MotionVector,
101  /// Rate distortion data associated with `mv`.
102  pub rd: MVCandidateRD,
103}
104
105impl MotionSearchResult {
106  /// Creates an 'empty' value.
107  ///
108  /// To be considered empty, cost is set higher than any naturally occurring
109  /// cost value. The idea is that comparing to any valid rd output, the search
110  /// result will always be replaced.
111  #[inline(always)]
112  pub fn empty() -> MotionSearchResult {
113    MotionSearchResult {
114      mv: MotionVector::default(),
115      rd: MVCandidateRD::empty(),
116    }
117  }
118
119  /// Check if the value should be considered to be empty.
120  #[inline(always)]
121  const fn is_empty(&self) -> bool {
122    self.rd.cost == u64::MAX
123  }
124}
125
126/// Holds data from computing rate distortion of a motion vector.
127#[derive(Debug, Copy, Clone)]
128pub struct MVCandidateRD {
129  /// Rate distortion cost of the motion vector.
130  pub cost: u64,
131  /// Distortion metric value for the motion vector.
132  pub sad: u32,
133}
134
135impl MVCandidateRD {
136  /// Creates an 'empty' value.
137  ///
138  /// To be considered empty, cost is set higher than any naturally occurring
139  /// cost value. The idea is that comparing to any valid rd output, the search
140  /// result will always be replaced.
141  #[inline(always)]
142  const fn empty() -> MVCandidateRD {
143    MVCandidateRD { sad: u32::MAX, cost: u64::MAX }
144  }
145}
146
147#[derive(Debug, Copy, Clone, Eq, PartialEq)]
148pub enum MVSamplingMode {
149  INIT,
150  CORNER { right: bool, bottom: bool },
151}
152
153pub fn estimate_tile_motion<T: Pixel>(
154  fi: &FrameInvariants<T>, ts: &mut TileStateMut<'_, T>,
155  inter_cfg: &InterConfig,
156) {
157  let init_size = MIB_SIZE_LOG2;
158
159  let mut prev_ssdec: Option<u8> = None;
160  for mv_size_in_b_log2 in (2..=init_size).rev() {
161    let init = mv_size_in_b_log2 == init_size;
162
163    // Choose subsampling. Pass one is quarter res and pass two is at half res.
164    let ssdec = match init_size - mv_size_in_b_log2 {
165      0 => 2,
166      1 => 1,
167      _ => 0,
168    };
169
170    let new_subsampling =
171      if let Some(prev) = prev_ssdec { prev != ssdec } else { false };
172    prev_ssdec = Some(ssdec);
173
174    // 0.5 and 0.125 are a fudge factors
175    let lambda = (fi.me_lambda * 256.0 / (1 << (2 * ssdec)) as f64
176      * if ssdec == 0 { 0.5 } else { 0.125 }) as u32;
177
178    for sby in 0..ts.sb_height {
179      for sbx in 0..ts.sb_width {
180        let mut tested_frames_flags = 0;
181        for &ref_frame in inter_cfg.allowed_ref_frames() {
182          let frame_flag = 1 << fi.ref_frames[ref_frame.to_index()];
183          if tested_frames_flags & frame_flag == frame_flag {
184            continue;
185          }
186          tested_frames_flags |= frame_flag;
187
188          let tile_bo =
189            TileSuperBlockOffset(SuperBlockOffset { x: sbx, y: sby })
190              .block_offset(0, 0);
191
192          if new_subsampling {
193            refine_subsampled_sb_motion(
194              fi,
195              ts,
196              ref_frame,
197              mv_size_in_b_log2 + 1,
198              tile_bo,
199              ssdec,
200              lambda,
201            );
202          }
203
204          estimate_sb_motion(
205            fi,
206            ts,
207            ref_frame,
208            mv_size_in_b_log2,
209            tile_bo,
210            init,
211            ssdec,
212            lambda,
213          );
214        }
215      }
216    }
217  }
218}
219
220fn estimate_sb_motion<T: Pixel>(
221  fi: &FrameInvariants<T>, ts: &mut TileStateMut<'_, T>, ref_frame: RefType,
222  mv_size_in_b_log2: usize, tile_bo: TileBlockOffset, init: bool, ssdec: u8,
223  lambda: u32,
224) {
225  let pix_offset = tile_bo.to_luma_plane_offset();
226  let sb_h: usize = SB_SIZE.min(ts.height - pix_offset.y as usize);
227  let sb_w: usize = SB_SIZE.min(ts.width - pix_offset.x as usize);
228
229  let mv_size = MI_SIZE << mv_size_in_b_log2;
230
231  // Process in blocks, cropping at edges.
232  for y in (0..sb_h).step_by(mv_size) {
233    for x in (0..sb_w).step_by(mv_size) {
234      let corner: MVSamplingMode = if init {
235        MVSamplingMode::INIT
236      } else {
237        // Processing the block a size up produces data that can be used by
238        // the right and bottom corners.
239        MVSamplingMode::CORNER {
240          right: x & mv_size == mv_size,
241          bottom: y & mv_size == mv_size,
242        }
243      };
244
245      let sub_bo = tile_bo
246        .with_offset(x as isize >> MI_SIZE_LOG2, y as isize >> MI_SIZE_LOG2);
247
248      // Clamp to frame edge, rounding up in the case of subsampling.
249      // The rounding makes some assumptions about how subsampling is done.
250      let w = mv_size.min(sb_w - x + (1 << ssdec) - 1) >> ssdec;
251      let h = mv_size.min(sb_h - y + (1 << ssdec) - 1) >> ssdec;
252
253      // Run motion estimation.
254      // Note that the initial search (init) instructs the called function to
255      // perform a more extensive search.
256      if let Some(results) = estimate_motion(
257        fi,
258        ts,
259        w,
260        h,
261        sub_bo,
262        ref_frame,
263        None,
264        corner,
265        init,
266        ssdec,
267        Some(lambda),
268      ) {
269        // normalize sad to 128x128 block
270        let sad = (((results.rd.sad as u64) << (MAX_SB_SIZE_LOG2 * 2))
271          / (w * h) as u64) as u32;
272        save_me_stats(
273          ts,
274          mv_size_in_b_log2,
275          sub_bo,
276          ref_frame,
277          MEStats { mv: results.mv, normalized_sad: sad },
278        );
279      }
280    }
281  }
282}
283
284fn refine_subsampled_sb_motion<T: Pixel>(
285  fi: &FrameInvariants<T>, ts: &mut TileStateMut<'_, T>, ref_frame: RefType,
286  mv_size_in_b_log2: usize, tile_bo: TileBlockOffset, ssdec: u8, lambda: u32,
287) {
288  let pix_offset = tile_bo.to_luma_plane_offset();
289  let sb_h: usize = SB_SIZE.min(ts.height - pix_offset.y as usize);
290  let sb_w: usize = SB_SIZE.min(ts.width - pix_offset.x as usize);
291
292  let mv_size = MI_SIZE << mv_size_in_b_log2;
293
294  // Process in blocks, cropping at edges.
295  for y in (0..sb_h).step_by(mv_size) {
296    for x in (0..sb_w).step_by(mv_size) {
297      let sub_bo = tile_bo
298        .with_offset(x as isize >> MI_SIZE_LOG2, y as isize >> MI_SIZE_LOG2);
299
300      // Clamp to frame edge, rounding up in the case of subsampling.
301      // The rounding makes some assumptions about how subsampling is done.
302      let w = mv_size.min(sb_w - x + (1 << ssdec) - 1) >> ssdec;
303      let h = mv_size.min(sb_h - y + (1 << ssdec) - 1) >> ssdec;
304
305      // Refine the existing motion estimate
306      if let Some(results) = refine_subsampled_motion_estimate(
307        fi, ts, w, h, sub_bo, ref_frame, ssdec, lambda,
308      ) {
309        // normalize sad to 128x128 block
310        let sad = (((results.rd.sad as u64) << (MAX_SB_SIZE_LOG2 * 2))
311          / (w * h) as u64) as u32;
312        save_me_stats(
313          ts,
314          mv_size_in_b_log2,
315          sub_bo,
316          ref_frame,
317          MEStats { mv: results.mv, normalized_sad: sad },
318        );
319      }
320    }
321  }
322}
323
324fn save_me_stats<T: Pixel>(
325  ts: &mut TileStateMut<'_, T>, mv_size_in_b_log2: usize,
326  tile_bo: TileBlockOffset, ref_frame: RefType, stats: MEStats,
327) {
328  let size_in_b = 1 << mv_size_in_b_log2;
329  let tile_me_stats = &mut ts.me_stats[ref_frame.to_index()];
330  let tile_bo_x_end = (tile_bo.0.x + size_in_b).min(ts.mi_width);
331  let tile_bo_y_end = (tile_bo.0.y + size_in_b).min(ts.mi_height);
332  for mi_y in tile_bo.0.y..tile_bo_y_end {
333    for a in tile_me_stats[mi_y][tile_bo.0.x..tile_bo_x_end].iter_mut() {
334      *a = stats;
335    }
336  }
337}
338
339fn get_mv_range(
340  w_in_b: usize, h_in_b: usize, bo: PlaneBlockOffset, blk_w: usize,
341  blk_h: usize,
342) -> (isize, isize, isize, isize) {
343  let border_w = 128 + blk_w as isize * 8;
344  let border_h = 128 + blk_h as isize * 8;
345  let mvx_min = -(bo.0.x as isize) * (8 * MI_SIZE) as isize - border_w;
346  let mvx_max = ((w_in_b - bo.0.x) as isize - (blk_w / MI_SIZE) as isize)
347    * (8 * MI_SIZE) as isize
348    + border_w;
349  let mvy_min = -(bo.0.y as isize) * (8 * MI_SIZE) as isize - border_h;
350  let mvy_max = ((h_in_b - bo.0.y) as isize - (blk_h / MI_SIZE) as isize)
351    * (8 * MI_SIZE) as isize
352    + border_h;
353
354  // <https://aomediacodec.github.io/av1-spec/#assign-mv-semantics>
355  use crate::context::{MV_LOW, MV_UPP};
356  (
357    mvx_min.max(MV_LOW as isize + 1),
358    mvx_max.min(MV_UPP as isize - 1),
359    mvy_min.max(MV_LOW as isize + 1),
360    mvy_max.min(MV_UPP as isize - 1),
361  )
362}
363
364struct MotionEstimationSubsets {
365  min_sad: u32,
366  median: Option<MotionVector>,
367  subset_b: ArrayVec<MotionVector, 5>,
368  subset_c: ArrayVec<MotionVector, 5>,
369}
370
371impl MotionEstimationSubsets {
372  fn all_mvs(&self) -> ArrayVec<MotionVector, 11> {
373    let mut all = ArrayVec::new();
374    if let Some(median) = self.median {
375      all.push(median);
376    }
377
378    all.extend(self.subset_b.iter().copied());
379    all.extend(self.subset_c.iter().copied());
380
381    all
382  }
383}
384
385#[profiling::function]
386fn get_subset_predictors(
387  tile_bo: TileBlockOffset, tile_me_stats: &TileMEStats<'_>,
388  frame_ref_opt: Option<ReadGuardMEStats<'_>>, ref_frame_id: usize,
389  pix_w: usize, pix_h: usize, mvx_min: isize, mvx_max: isize, mvy_min: isize,
390  mvy_max: isize, corner: MVSamplingMode, ssdec: u8,
391) -> MotionEstimationSubsets {
392  let mut min_sad: u32 = u32::MAX;
393  let mut subset_b = ArrayVec::<MotionVector, 5>::new();
394  let mut subset_c = ArrayVec::<MotionVector, 5>::new();
395
396  // rounded up width in blocks
397  let w = ((pix_w << ssdec) + MI_SIZE - 1) >> MI_SIZE_LOG2;
398  let h = ((pix_h << ssdec) + MI_SIZE - 1) >> MI_SIZE_LOG2;
399
400  // Get predictors from the same frame.
401
402  let clipped_half_w = (w >> 1).min(tile_me_stats.cols() - 1 - tile_bo.0.x);
403  let clipped_half_h = (h >> 1).min(tile_me_stats.rows() - 1 - tile_bo.0.y);
404
405  let mut process_cand = |stats: MEStats| -> MotionVector {
406    min_sad = min_sad.min(stats.normalized_sad);
407    let mv = stats.mv.quantize_to_fullpel();
408    MotionVector {
409      col: clamp(mv.col as isize, mvx_min, mvx_max) as i16,
410      row: clamp(mv.row as isize, mvy_min, mvy_max) as i16,
411    }
412  };
413
414  // Sample the middle of all block edges bordering this one.
415  // Note: If motion vectors haven't been precomputed to a given blocksize, then
416  // the right and bottom edges will be duplicates of the center predictor when
417  // processing in raster order.
418
419  // left
420  if tile_bo.0.x > 0 {
421    subset_b.push(process_cand(
422      tile_me_stats[tile_bo.0.y + clipped_half_h][tile_bo.0.x - 1],
423    ));
424  }
425  // top
426  if tile_bo.0.y > 0 {
427    subset_b.push(process_cand(
428      tile_me_stats[tile_bo.0.y - 1][tile_bo.0.x + clipped_half_w],
429    ));
430  }
431
432  // Sampling far right and far bottom edges was tested, but had worse results
433  // without an extensive threshold test (with threshold being applied after
434  // checking median and the best of each subset).
435
436  // right
437  if let MVSamplingMode::CORNER { right: true, bottom: _ } = corner {
438    if tile_bo.0.x + w < tile_me_stats.cols() {
439      subset_b.push(process_cand(
440        tile_me_stats[tile_bo.0.y + clipped_half_h][tile_bo.0.x + w],
441      ));
442    }
443  }
444  // bottom
445  if let MVSamplingMode::CORNER { right: _, bottom: true } = corner {
446    if tile_bo.0.y + h < tile_me_stats.rows() {
447      subset_b.push(process_cand(
448        tile_me_stats[tile_bo.0.y + h][tile_bo.0.x + clipped_half_w],
449      ));
450    }
451  }
452
453  let median = if corner != MVSamplingMode::INIT {
454    // Sample the center of the current block.
455    Some(process_cand(
456      tile_me_stats[tile_bo.0.y + clipped_half_h]
457        [tile_bo.0.x + clipped_half_w],
458    ))
459  } else if subset_b.len() != 3 {
460    None
461  } else {
462    let mut rows: ArrayVec<i16, 3> = subset_b.iter().map(|&a| a.row).collect();
463    let mut cols: ArrayVec<i16, 3> = subset_b.iter().map(|&a| a.col).collect();
464    rows.as_mut_slice().sort_unstable();
465    cols.as_mut_slice().sort_unstable();
466    Some(MotionVector { row: rows[1], col: cols[1] })
467  };
468
469  // Zero motion vector, don't use add_cand since it skips zero vectors.
470  subset_b.push(MotionVector::default());
471
472  // EPZS subset C predictors.
473  // Sample the middle of bordering side of the left, right, top and bottom
474  // blocks of the previous frame.
475  // Sample the middle of this block in the previous frame.
476
477  if let Some(frame_me_stats) = frame_ref_opt {
478    let prev_frame = &frame_me_stats[ref_frame_id];
479
480    let frame_bo = PlaneBlockOffset(BlockOffset {
481      x: tile_me_stats.x() + tile_bo.0.x,
482      y: tile_me_stats.y() + tile_bo.0.y,
483    });
484    let clipped_half_w = (w >> 1).min(prev_frame.cols - 1 - frame_bo.0.x);
485    let clipped_half_h = (h >> 1).min(prev_frame.rows - 1 - frame_bo.0.y);
486
487    // left
488    if frame_bo.0.x > 0 {
489      subset_c.push(process_cand(
490        prev_frame[frame_bo.0.y + clipped_half_h][frame_bo.0.x - 1],
491      ));
492    }
493    // top
494    if frame_bo.0.y > 0 {
495      subset_c.push(process_cand(
496        prev_frame[frame_bo.0.y - 1][frame_bo.0.x + clipped_half_w],
497      ));
498    }
499    // right
500    if frame_bo.0.x + w < prev_frame.cols {
501      subset_c.push(process_cand(
502        prev_frame[frame_bo.0.y + clipped_half_h][frame_bo.0.x + w],
503      ));
504    }
505    // bottom
506    if frame_bo.0.y + h < prev_frame.rows {
507      subset_c.push(process_cand(
508        prev_frame[frame_bo.0.y + h][frame_bo.0.x + clipped_half_w],
509      ));
510    }
511
512    subset_c.push(process_cand(
513      prev_frame[frame_bo.0.y + clipped_half_h][frame_bo.0.x + clipped_half_w],
514    ));
515  }
516
517  // Undo normalization to 128x128 block size
518  let min_sad = ((min_sad as u64 * (pix_w * pix_h) as u64)
519    >> (MAX_SB_SIZE_LOG2 * 2)) as u32;
520
521  let dec_mv = |mv: MotionVector| MotionVector {
522    col: mv.col >> ssdec,
523    row: mv.row >> ssdec,
524  };
525  let median = median.map(dec_mv);
526  for mv in subset_b.iter_mut() {
527    *mv = dec_mv(*mv);
528  }
529  for mv in subset_c.iter_mut() {
530    *mv = dec_mv(*mv);
531  }
532
533  MotionEstimationSubsets { min_sad, median, subset_b, subset_c }
534}
535
536pub fn estimate_motion<T: Pixel>(
537  fi: &FrameInvariants<T>, ts: &TileStateMut<'_, T>, w: usize, h: usize,
538  tile_bo: TileBlockOffset, ref_frame: RefType,
539  pmv: Option<[MotionVector; 2]>, corner: MVSamplingMode,
540  extensive_search: bool, ssdec: u8, lambda: Option<u32>,
541) -> Option<MotionSearchResult> {
542  if let Some(ref rec) =
543    fi.rec_buffer.frames[fi.ref_frames[ref_frame.to_index()] as usize]
544  {
545    let frame_bo = ts.to_frame_block_offset(tile_bo);
546    let (mvx_min, mvx_max, mvy_min, mvy_max) =
547      get_mv_range(fi.w_in_b, fi.h_in_b, frame_bo, w << ssdec, h << ssdec);
548
549    let lambda = lambda.unwrap_or({
550      // 0.5 is a fudge factor
551      (fi.me_lambda * 256.0 * 0.5) as u32
552    });
553
554    let global_mv = [MotionVector { row: 0, col: 0 }; 2];
555
556    let po = frame_bo.to_luma_plane_offset();
557    let (mvx_min, mvx_max, mvy_min, mvy_max) =
558      (mvx_min >> ssdec, mvx_max >> ssdec, mvy_min >> ssdec, mvy_max >> ssdec);
559    let po = PlaneOffset { x: po.x >> ssdec, y: po.y >> ssdec };
560    let p_ref = match ssdec {
561      0 => &rec.frame.planes[0],
562      1 => &rec.input_hres,
563      2 => &rec.input_qres,
564      _ => unimplemented!(),
565    };
566
567    let org_region = &match ssdec {
568      0 => ts.input_tile.planes[0]
569        .subregion(Area::BlockStartingAt { bo: tile_bo.0 }),
570      1 => ts.input_hres.region(Area::StartingAt { x: po.x, y: po.y }),
571      2 => ts.input_qres.region(Area::StartingAt { x: po.x, y: po.y }),
572      _ => unimplemented!(),
573    };
574
575    let mut best: MotionSearchResult = full_pixel_me(
576      fi,
577      ts,
578      org_region,
579      p_ref,
580      tile_bo,
581      po,
582      lambda,
583      pmv.unwrap_or(global_mv),
584      w,
585      h,
586      mvx_min,
587      mvx_max,
588      mvy_min,
589      mvy_max,
590      ref_frame,
591      corner,
592      extensive_search,
593      ssdec,
594    );
595
596    if let Some(pmv) = pmv {
597      let use_satd: bool = fi.config.speed_settings.motion.use_satd_subpel;
598      if use_satd {
599        best.rd = get_fullpel_mv_rd(
600          fi,
601          po,
602          org_region,
603          p_ref,
604          fi.sequence.bit_depth,
605          pmv,
606          lambda,
607          use_satd,
608          mvx_min,
609          mvx_max,
610          mvy_min,
611          mvy_max,
612          w,
613          h,
614          best.mv,
615        );
616      }
617
618      sub_pixel_me(
619        fi, po, org_region, p_ref, lambda, pmv, mvx_min, mvx_max, mvy_min,
620        mvy_max, w, h, use_satd, &mut best, ref_frame,
621      );
622    }
623
624    // Scale motion vectors to full res size
625    best.mv = best.mv << ssdec;
626
627    Some(best)
628  } else {
629    None
630  }
631}
632
633/// Refine motion estimation that was computed one level of subsampling up.
634fn refine_subsampled_motion_estimate<T: Pixel>(
635  fi: &FrameInvariants<T>, ts: &TileStateMut<'_, T>, w: usize, h: usize,
636  tile_bo: TileBlockOffset, ref_frame: RefType, ssdec: u8, lambda: u32,
637) -> Option<MotionSearchResult> {
638  if let Some(ref rec) =
639    fi.rec_buffer.frames[fi.ref_frames[ref_frame.to_index()] as usize]
640  {
641    let frame_bo = ts.to_frame_block_offset(tile_bo);
642    let (mvx_min, mvx_max, mvy_min, mvy_max) =
643      get_mv_range(fi.w_in_b, fi.h_in_b, frame_bo, w << ssdec, h << ssdec);
644
645    let pmv = [MotionVector { row: 0, col: 0 }; 2];
646
647    let po = frame_bo.to_luma_plane_offset();
648    let (mvx_min, mvx_max, mvy_min, mvy_max) =
649      (mvx_min >> ssdec, mvx_max >> ssdec, mvy_min >> ssdec, mvy_max >> ssdec);
650    let po = PlaneOffset { x: po.x >> ssdec, y: po.y >> ssdec };
651    let p_ref = match ssdec {
652      0 => &rec.frame.planes[0],
653      1 => &rec.input_hres,
654      2 => &rec.input_qres,
655      _ => unimplemented!(),
656    };
657
658    let org_region = &match ssdec {
659      0 => ts.input_tile.planes[0]
660        .subregion(Area::BlockStartingAt { bo: tile_bo.0 }),
661      1 => ts.input_hres.region(Area::StartingAt { x: po.x, y: po.y }),
662      2 => ts.input_qres.region(Area::StartingAt { x: po.x, y: po.y }),
663      _ => unimplemented!(),
664    };
665
666    let mv =
667      ts.me_stats[ref_frame.to_index()][tile_bo.0.y][tile_bo.0.x].mv >> ssdec;
668
669    // Given a motion vector at 0 at higher subsampling:
670    // |  -1   |   0   |   1   |
671    // then the vectors at -1 to 2 should be tested at the current subsampling.
672    //      |-------------|
673    // | -2 -1 |  0  1 |  2  3 |
674    // This corresponds to a 4x4 full search.
675    let x_lo = po.x + (mv.col as isize / 8 - 1).max(mvx_min / 8);
676    let x_hi = po.x + (mv.col as isize / 8 + 2).min(mvx_max / 8);
677    let y_lo = po.y + (mv.row as isize / 8 - 1).max(mvy_min / 8);
678    let y_hi = po.y + (mv.row as isize / 8 + 2).min(mvy_max / 8);
679    let mut results = full_search(
680      fi, x_lo, x_hi, y_lo, y_hi, w, h, org_region, p_ref, po, 1, lambda, pmv,
681    );
682
683    // Scale motion vectors to full res size
684    results.mv = results.mv << ssdec;
685
686    Some(results)
687  } else {
688    None
689  }
690}
691
692#[profiling::function]
693fn full_pixel_me<T: Pixel>(
694  fi: &FrameInvariants<T>, ts: &TileStateMut<'_, T>,
695  org_region: &PlaneRegion<T>, p_ref: &Plane<T>, tile_bo: TileBlockOffset,
696  po: PlaneOffset, lambda: u32, pmv: [MotionVector; 2], w: usize, h: usize,
697  mvx_min: isize, mvx_max: isize, mvy_min: isize, mvy_max: isize,
698  ref_frame: RefType, corner: MVSamplingMode, extensive_search: bool,
699  ssdec: u8,
700) -> MotionSearchResult {
701  let ref_frame_id = ref_frame.to_index();
702  let tile_me_stats = &ts.me_stats[ref_frame_id].as_const();
703  let frame_ref = fi.rec_buffer.frames[fi.ref_frames[0] as usize]
704    .as_ref()
705    .map(|frame_ref| frame_ref.frame_me_stats.read().expect("poisoned lock"));
706  let subsets = get_subset_predictors(
707    tile_bo,
708    tile_me_stats,
709    frame_ref,
710    ref_frame_id,
711    w,
712    h,
713    mvx_min,
714    mvx_max,
715    mvy_min,
716    mvy_max,
717    corner,
718    ssdec,
719  );
720
721  let try_cands = |predictors: &[MotionVector],
722                   best: &mut MotionSearchResult| {
723    let mut results = get_best_predictor(
724      fi,
725      po,
726      org_region,
727      p_ref,
728      predictors,
729      fi.sequence.bit_depth,
730      pmv,
731      lambda,
732      mvx_min,
733      mvx_max,
734      mvy_min,
735      mvy_max,
736      w,
737      h,
738    );
739    fullpel_diamond_search(
740      fi,
741      po,
742      org_region,
743      p_ref,
744      &mut results,
745      fi.sequence.bit_depth,
746      pmv,
747      lambda,
748      mvx_min,
749      mvx_max,
750      mvy_min,
751      mvy_max,
752      w,
753      h,
754    );
755
756    if results.rd.cost < best.rd.cost {
757      *best = results;
758    }
759  };
760
761  let mut best: MotionSearchResult = MotionSearchResult::empty();
762  if !extensive_search {
763    try_cands(&subsets.all_mvs(), &mut best);
764    best
765  } else {
766    // Perform a more thorough search before resorting to full search.
767    // Search the median, the best mvs of neighboring blocks, and motion vectors
768    // from the previous frame. Stop once a candidate with a sad less than a
769    // threshold is found.
770
771    let thresh = (subsets.min_sad as f32 * 1.2) as u32
772      + (((w * h) as u32) << (fi.sequence.bit_depth - 8));
773
774    if let Some(median) = subsets.median {
775      try_cands(&[median], &mut best);
776
777      if best.rd.sad < thresh {
778        return best;
779      }
780    }
781
782    try_cands(&subsets.subset_b, &mut best);
783
784    if best.rd.sad < thresh {
785      return best;
786    }
787
788    try_cands(&subsets.subset_c, &mut best);
789
790    if best.rd.sad < thresh {
791      return best;
792    }
793
794    // Preform UMH search, either as the last possible search when full search
795    // is disabled, or as the last search before resorting to full search.
796    uneven_multi_hex_search(
797      fi,
798      po,
799      org_region,
800      p_ref,
801      &mut best,
802      fi.sequence.bit_depth,
803      pmv,
804      lambda,
805      mvx_min,
806      mvx_max,
807      mvy_min,
808      mvy_max,
809      w,
810      h,
811      // Use 24, since it is the largest range that x264 uses.
812      24,
813    );
814
815    if !fi.config.speed_settings.motion.me_allow_full_search
816      || best.rd.sad < thresh
817    {
818      return best;
819    }
820
821    {
822      let range_x = (192 * fi.me_range_scale as isize) >> ssdec;
823      let range_y = (64 * fi.me_range_scale as isize) >> ssdec;
824      let x_lo = po.x + (-range_x).max(mvx_min / 8);
825      let x_hi = po.x + (range_x).min(mvx_max / 8);
826      let y_lo = po.y + (-range_y).max(mvy_min / 8);
827      let y_hi = po.y + (range_y).min(mvy_max / 8);
828
829      let results = full_search(
830        fi,
831        x_lo,
832        x_hi,
833        y_lo,
834        y_hi,
835        w,
836        h,
837        org_region,
838        p_ref,
839        po,
840        // Full search is run at quarter resolution, except for short edges.
841        // When subsampling is lower than usual, the step size is raised so that
842        // the number of search locations stays the same.
843        4 >> ssdec,
844        lambda,
845        [MotionVector::default(); 2],
846      );
847
848      if results.rd.cost < best.rd.cost {
849        results
850      } else {
851        best
852      }
853    }
854  }
855}
856
857fn sub_pixel_me<T: Pixel>(
858  fi: &FrameInvariants<T>, po: PlaneOffset, org_region: &PlaneRegion<T>,
859  p_ref: &Plane<T>, lambda: u32, pmv: [MotionVector; 2], mvx_min: isize,
860  mvx_max: isize, mvy_min: isize, mvy_max: isize, w: usize, h: usize,
861  use_satd: bool, best: &mut MotionSearchResult, ref_frame: RefType,
862) {
863  subpel_diamond_search(
864    fi,
865    po,
866    org_region,
867    p_ref,
868    fi.sequence.bit_depth,
869    pmv,
870    lambda,
871    mvx_min,
872    mvx_max,
873    mvy_min,
874    mvy_max,
875    w,
876    h,
877    use_satd,
878    best,
879    ref_frame,
880  );
881}
882
883#[profiling::function]
884fn get_best_predictor<T: Pixel>(
885  fi: &FrameInvariants<T>, po: PlaneOffset, org_region: &PlaneRegion<T>,
886  p_ref: &Plane<T>, predictors: &[MotionVector], bit_depth: usize,
887  pmv: [MotionVector; 2], lambda: u32, mvx_min: isize, mvx_max: isize,
888  mvy_min: isize, mvy_max: isize, w: usize, h: usize,
889) -> MotionSearchResult {
890  let mut best: MotionSearchResult = MotionSearchResult::empty();
891
892  for &init_mv in predictors.iter() {
893    let rd = get_fullpel_mv_rd(
894      fi, po, org_region, p_ref, bit_depth, pmv, lambda, false, mvx_min,
895      mvx_max, mvy_min, mvy_max, w, h, init_mv,
896    );
897
898    if rd.cost < best.rd.cost {
899      best.mv = init_mv;
900      best.rd = rd;
901    }
902  }
903
904  best
905}
906
907/// Declares an array of motion vectors in structure of arrays syntax.
908/// Compared to [`search_pattern_subpel`], this version creates motion vectors
909/// in fullpel resolution (x8).
910macro_rules! search_pattern {
911    ($field_a:ident: [$($ll_a:expr),*], $field_b:ident: [$($ll_b:expr),*]) => {
912      [ $(MotionVector { $field_a: $ll_a << 3, $field_b: $ll_b << 3 } ),*]
913    };
914}
915
916/// Declares an array of motion vectors in structure of arrays syntax.
917macro_rules! search_pattern_subpel {
918    ($field_a:ident: [$($ll_a:expr),*], $field_b:ident: [$($ll_b:expr),*]) => {
919      [ $(MotionVector { $field_a: $ll_a, $field_b: $ll_b } ),*]
920    };
921}
922
923/// Diamond pattern of radius 1 as shown below. For fullpel search, use
924/// `DIAMOND_R1_PATTERN_FULLPEL` since it has been scaled for fullpel search.
925/// ```text
926///  X
927/// XoX
928///  X
929/// ```
930/// 'X's are motion candidates and the 'o' is the center.
931///
932const DIAMOND_R1_PATTERN_SUBPEL: [MotionVector; 4] = search_pattern_subpel!(
933  col: [  0,  1,  0, -1],
934  row: [  1,  0, -1,  0]
935);
936/// Diamond pattern of radius 1 as shown below. Unlike `DIAMOND_R1_PATTERN`, the
937/// vectors have been shifted fullpel scale.
938/// ```text
939///  X
940/// XoX
941///  X
942/// ```
943/// 'X's are motion candidates and the 'o' is the center.
944const DIAMOND_R1_PATTERN: [MotionVector; 4] = search_pattern!(
945  col: [  0,  1,  0, -1],
946  row: [  1,  0, -1,  0]
947);
948
949/// Run a full pixel diamond search. The search is run on multiple step sizes.
950///
951/// For each step size, candidate motion vectors are examined for improvement
952/// to the current search location. The search location is moved to the best
953/// candidate (if any). This is repeated until the search location stops moving.
954#[profiling::function]
955fn fullpel_diamond_search<T: Pixel>(
956  fi: &FrameInvariants<T>, po: PlaneOffset, org_region: &PlaneRegion<T>,
957  p_ref: &Plane<T>, current: &mut MotionSearchResult, bit_depth: usize,
958  pmv: [MotionVector; 2], lambda: u32, mvx_min: isize, mvx_max: isize,
959  mvy_min: isize, mvy_max: isize, w: usize, h: usize,
960) {
961  // Define the initial and the final scale (log2) of the diamond.
962  let (mut diamond_radius_log2, diamond_radius_end_log2) = (1u8, 0u8);
963
964  loop {
965    // Find the best candidate from the diamond pattern.
966    let mut best_cand: MotionSearchResult = MotionSearchResult::empty();
967    for &offset in &DIAMOND_R1_PATTERN {
968      let cand_mv = current.mv + (offset << diamond_radius_log2);
969      let rd = get_fullpel_mv_rd(
970        fi, po, org_region, p_ref, bit_depth, pmv, lambda, false, mvx_min,
971        mvx_max, mvy_min, mvy_max, w, h, cand_mv,
972      );
973
974      if rd.cost < best_cand.rd.cost {
975        best_cand.mv = cand_mv;
976        best_cand.rd = rd;
977      }
978    }
979
980    // Continue the search at this scale until the can't find a better candidate
981    // to use.
982    if current.rd.cost <= best_cand.rd.cost {
983      if diamond_radius_log2 == diamond_radius_end_log2 {
984        break;
985      } else {
986        diamond_radius_log2 -= 1;
987      }
988    } else {
989      *current = best_cand;
990    }
991  }
992
993  assert!(!current.is_empty());
994}
995
996/// A hexagon pattern around a center point. The pattern is ordered so that the
997/// offsets circle around the center. This is done to allow pruning locations
998/// covered by the last iteration.
999/// ```text
1000///   21012
1001/// 2  X X
1002/// 1
1003/// 0 X o X
1004/// 1
1005/// 2  X X
1006/// ```
1007/// 'X's are motion candidates and the 'o' is the center.
1008///
1009/// The illustration below shows the process of a hexagon search.
1010/// ```text
1011/// Step 1    Step 2
1012///  1 1       1 1 2
1013///
1014/// 1(0)1  => 1 0(1)2
1015///
1016///  1 1       1 1 2
1017/// ```
1018/// The search above has gone through the following steps.
1019/// 1. Search '1' elements for better candidates than the center '0'.
1020/// 2. Recenter around the best candidate ('(1)') and hexagon candidates that
1021/// don't overlap with the previous search step (labeled '2').
1022const HEXAGON_PATTERN: [MotionVector; 6] = search_pattern!(
1023  col: [  0,  2,  2,  0, -2, -2],
1024  row: [ -2, -1,  1,  2,  1, -1]
1025);
1026
1027/// A small square pattern around a center point.
1028/// ```text
1029///   101
1030/// 1 XXX
1031/// 0 XoX
1032/// 1 XXX
1033/// ```
1034/// 'X's are motion candidates and the 'o' is the center.
1035const SQUARE_REFINE_PATTERN: [MotionVector; 8] = search_pattern!(
1036  col: [ -1,  0,  1, -1,  1, -1,  0,  1],
1037  row: [  1,  1,  1,  0,  0, -1, -1, -1]
1038);
1039
1040/// Perform hexagon search and refine afterwards.
1041///
1042/// In the hexagon search stage, candidate motion vectors are examined for
1043/// improvement to the current search location. The search location is moved to
1044/// the best candidate (if any). This is repeated until the search location
1045/// stops moving.
1046///
1047/// Refinement uses a square pattern that fits between the hexagon candidates.
1048///
1049/// The hexagon pattern is defined by [`HEXAGON_PATTERN`] and the refinement
1050/// is defined by [`SQUARE_REFINE_PATTERN`].
1051///
1052/// `current` provides the initial search location and serves as
1053/// the output for the final search results.
1054#[profiling::function]
1055fn hexagon_search<T: Pixel>(
1056  fi: &FrameInvariants<T>, po: PlaneOffset, org_region: &PlaneRegion<T>,
1057  p_ref: &Plane<T>, current: &mut MotionSearchResult, bit_depth: usize,
1058  pmv: [MotionVector; 2], lambda: u32, mvx_min: isize, mvx_max: isize,
1059  mvy_min: isize, mvy_max: isize, w: usize, h: usize,
1060) {
1061  // The first iteration of hexagon search is implemented separate from
1062  // subsequent iterations, which overlap with previous iterations.
1063
1064  // Holds what candidate is used (if any). This is used to determine which
1065  // candidates have already been tested in a previous iteration and can be
1066  // skipped.
1067  let mut best_cand_idx: usize = 0;
1068  let mut best_cand: MotionSearchResult = MotionSearchResult::empty();
1069
1070  // First iteration of hexagon search. There are six candidates to consider.
1071  for i in 0..6 {
1072    let cand_mv = current.mv + HEXAGON_PATTERN[i];
1073    let rd = get_fullpel_mv_rd(
1074      fi, po, org_region, p_ref, bit_depth, pmv, lambda, false, mvx_min,
1075      mvx_max, mvy_min, mvy_max, w, h, cand_mv,
1076    );
1077
1078    if rd.cost < best_cand.rd.cost {
1079      best_cand_idx = i;
1080      best_cand.mv = cand_mv;
1081      best_cand.rd = rd;
1082    }
1083  }
1084
1085  // Run additional iterations of hexagon search until the search location
1086  // doesn't update.
1087  while best_cand.rd.cost < current.rd.cost {
1088    // Update the search location.
1089    *current = best_cand;
1090    best_cand = MotionSearchResult::empty();
1091
1092    // Save the index/direction taken in the previous iteration to the current
1093    // search location.
1094    let center_cand_idx = best_cand_idx;
1095
1096    // Look only at candidates that don't overlap with previous iterations. This
1097    // corresponds with the three offsets (2D) with the closest direction to
1098    // that traveled by the previous iteration. HEXAGON_PATTERN has clockwise
1099    // order, so the last direction -1, +0, and +1 (mod 6) give the indices for
1100    // these offsets.
1101    for idx_offset_mod6 in 5..=7 {
1102      let i = (center_cand_idx + idx_offset_mod6) % 6;
1103      let cand_mv = current.mv + HEXAGON_PATTERN[i];
1104
1105      let rd = get_fullpel_mv_rd(
1106        fi, po, org_region, p_ref, bit_depth, pmv, lambda, false, mvx_min,
1107        mvx_max, mvy_min, mvy_max, w, h, cand_mv,
1108      );
1109
1110      if rd.cost < best_cand.rd.cost {
1111        best_cand_idx = i;
1112        best_cand.mv = cand_mv;
1113        best_cand.rd = rd;
1114      }
1115    }
1116  }
1117
1118  // Refine the motion after completing hexagon search.
1119  let mut best_cand: MotionSearchResult = MotionSearchResult::empty();
1120  for &offset in &SQUARE_REFINE_PATTERN {
1121    let cand_mv = current.mv + offset;
1122    let rd = get_fullpel_mv_rd(
1123      fi, po, org_region, p_ref, bit_depth, pmv, lambda, false, mvx_min,
1124      mvx_max, mvy_min, mvy_max, w, h, cand_mv,
1125    );
1126
1127    if rd.cost < best_cand.rd.cost {
1128      best_cand.mv = cand_mv;
1129      best_cand.rd = rd;
1130    }
1131  }
1132  if best_cand.rd.cost < current.rd.cost {
1133    *current = best_cand;
1134  }
1135
1136  assert!(!current.is_empty());
1137}
1138
1139/// Uneven multi-hexagon search pattern around a center point. Used for locating
1140/// irregular movement.
1141/// ```text
1142///      X
1143///    X   X
1144///  X       X
1145///  X       X
1146///  X   o   X
1147///  X       X
1148///  X       X
1149///    X   X
1150///      X
1151/// ```
1152/// 'X's are motion candidates and the 'o' is the center.
1153const UMH_PATTERN: [MotionVector; 16] = search_pattern!(
1154  col: [ -2, -1,  0,  1,  2,  3,  4,  3,  2,  1,  0, -1, -2,  3, -4, -3],
1155  row: [  4,  4,  4,  4,  4,  2,  0, -2, -4, -4, -4, -4, -4, -2,  0,  2]
1156);
1157
1158/// Perform an uneven multi-hexagon search. There are 4 stages:
1159/// 1. Unsymmetrical-cross search: Search the horizontal and vertical directions
1160///   for the general direction of the motion.
1161/// 2. A 5x5 full search is done to refine the current candidate.
1162/// 3. Uneven multi-hexagon search. See [`UMH_PATTERN`].
1163/// 4. Refinement using standard hexagon search.
1164///
1165/// `current` provides the initial search location and serves as
1166/// the output for the final search results.
1167///
1168/// `me_range` parameter determines how far these stages can search.
1169#[profiling::function]
1170fn uneven_multi_hex_search<T: Pixel>(
1171  fi: &FrameInvariants<T>, po: PlaneOffset, org_region: &PlaneRegion<T>,
1172  p_ref: &Plane<T>, current: &mut MotionSearchResult, bit_depth: usize,
1173  pmv: [MotionVector; 2], lambda: u32, mvx_min: isize, mvx_max: isize,
1174  mvy_min: isize, mvy_max: isize, w: usize, h: usize, me_range: i16,
1175) {
1176  assert!(!current.is_empty());
1177
1178  // Search in a cross pattern to obtain a rough approximate of motion.
1179  // The cross is split into a horizontal and vertical component. Video content
1180  // tends to have more horizontal motion, so the horizontal part of the cross
1181  // is twice as large as the vertical half.
1182  //        X        -
1183  //                 | <- me_range/2
1184  //        X        |
1185  // X X X XoX X X X -
1186  //        X
1187  //
1188  //        X
1189  // |------|
1190  //     \
1191  //    me_range
1192  let center = current.mv;
1193
1194  // The larger, horizontal, part of the cross search.
1195  for i in (1..=me_range).step_by(2) {
1196    const HORIZONTAL_LINE: [MotionVector; 2] = search_pattern!(
1197      col: [ 0, 0],
1198      row: [-1, 1]
1199    );
1200
1201    for &offset in &HORIZONTAL_LINE {
1202      let cand_mv = center + offset * i;
1203      let rd = get_fullpel_mv_rd(
1204        fi, po, org_region, p_ref, bit_depth, pmv, lambda, false, mvx_min,
1205        mvx_max, mvy_min, mvy_max, w, h, cand_mv,
1206      );
1207
1208      if rd.cost < current.rd.cost {
1209        current.mv = cand_mv;
1210        current.rd = rd;
1211      }
1212    }
1213  }
1214
1215  // The smaller, vertical, part of the cross search
1216  for i in (1..=me_range >> 1).step_by(2) {
1217    const VERTICAL_LINE: [MotionVector; 2] = search_pattern!(
1218      col: [-1, 1],
1219      row: [ 0, 0]
1220    );
1221
1222    for &offset in &VERTICAL_LINE {
1223      let cand_mv = center + offset * i;
1224      let rd = get_fullpel_mv_rd(
1225        fi, po, org_region, p_ref, bit_depth, pmv, lambda, false, mvx_min,
1226        mvx_max, mvy_min, mvy_max, w, h, cand_mv,
1227      );
1228
1229      if rd.cost < current.rd.cost {
1230        current.mv = cand_mv;
1231        current.rd = rd;
1232      }
1233    }
1234  }
1235
1236  // 5x5 full search. Search a 5x5 square region around the current best mv.
1237  let center = current.mv;
1238  for row in -2..=2 {
1239    for col in -2..=2 {
1240      if row == 0 && col == 0 {
1241        continue;
1242      }
1243      let cand_mv = center + MotionVector { row, col };
1244      let rd = get_fullpel_mv_rd(
1245        fi, po, org_region, p_ref, bit_depth, pmv, lambda, false, mvx_min,
1246        mvx_max, mvy_min, mvy_max, w, h, cand_mv,
1247      );
1248
1249      if rd.cost < current.rd.cost {
1250        current.mv = cand_mv;
1251        current.rd = rd;
1252      }
1253    }
1254  }
1255
1256  // Run the hexagons in uneven multi-hexagon. The hexagonal pattern is tested
1257  // around the best vector at multiple scales.
1258  // Example of the UMH pattern run on a scale of 1 and 2:
1259  //         2         -
1260  //                   | <- me_range
1261  //     2       2     |
1262  //                   |
1263  // 2       1       2 |
1264  //       1   1       |
1265  // 2   1       1   2 |
1266  //     1       1     |
1267  // 2   1   o   1   2 |
1268  //     1       1     |
1269  // 2   1       1   2 |
1270  //       1   1       |
1271  // 2       1       2 |
1272  //                   |
1273  //     2       2     |
1274  //                   |
1275  //         2         -
1276  // |---------------|
1277  //        \
1278  //       me_range
1279  let center = current.mv;
1280
1281  // Divide by 4, the radius of the UMH's hexagon.
1282  let iterations = me_range >> 2;
1283  for i in 1..=iterations {
1284    for &offset in &UMH_PATTERN {
1285      let cand_mv = center + offset * i;
1286      let rd = get_fullpel_mv_rd(
1287        fi, po, org_region, p_ref, bit_depth, pmv, lambda, false, mvx_min,
1288        mvx_max, mvy_min, mvy_max, w, h, cand_mv,
1289      );
1290
1291      if rd.cost < current.rd.cost {
1292        current.mv = cand_mv;
1293        current.rd = rd;
1294      }
1295    }
1296  }
1297
1298  // Refine the search results using a 'normal' hexagon search.
1299  hexagon_search(
1300    fi, po, org_region, p_ref, current, bit_depth, pmv, lambda, mvx_min,
1301    mvx_max, mvy_min, mvy_max, w, h,
1302  );
1303}
1304
1305/// Run a subpixel diamond search. The search is run on multiple step sizes.
1306///
1307/// For each step size, candidate motion vectors are examined for improvement
1308/// to the current search location. The search location is moved to the best
1309/// candidate (if any). This is repeated until the search location stops moving.
1310#[profiling::function]
1311fn subpel_diamond_search<T: Pixel>(
1312  fi: &FrameInvariants<T>, po: PlaneOffset, org_region: &PlaneRegion<T>,
1313  _p_ref: &Plane<T>, bit_depth: usize, pmv: [MotionVector; 2], lambda: u32,
1314  mvx_min: isize, mvx_max: isize, mvy_min: isize, mvy_max: isize, w: usize,
1315  h: usize, use_satd: bool, current: &mut MotionSearchResult,
1316  ref_frame: RefType,
1317) {
1318  use crate::util::Aligned;
1319
1320  // Motion compensation assembly has special requirements for edges
1321  let mc_w = w.next_power_of_two();
1322  let mc_h = (h + 1) & !1;
1323
1324  // Metadata for subpel scratch pad.
1325  let cfg = PlaneConfig::new(mc_w, mc_h, 0, 0, 0, 0, std::mem::size_of::<T>());
1326  // Stack allocation for subpel scratch pad.
1327  // SAFETY: We write to the array below before reading from it.
1328  let mut buf: Aligned<[T; 128 * 128]> = unsafe { Aligned::uninitialized() };
1329  let mut tmp_region = PlaneRegionMut::from_slice(
1330    &mut buf.data,
1331    &cfg,
1332    Rect { x: 0, y: 0, width: cfg.width, height: cfg.height },
1333  );
1334
1335  // start at 1/2 pel and end at 1/4 or 1/8 pel
1336  let (mut diamond_radius_log2, diamond_radius_end_log2) =
1337    (2u8, u8::from(!fi.allow_high_precision_mv));
1338
1339  loop {
1340    // Find the best candidate from the diamond pattern.
1341    let mut best_cand: MotionSearchResult = MotionSearchResult::empty();
1342    for &offset in &DIAMOND_R1_PATTERN_SUBPEL {
1343      let cand_mv = current.mv + (offset << diamond_radius_log2);
1344
1345      let rd = get_subpel_mv_rd(
1346        fi,
1347        po,
1348        org_region,
1349        bit_depth,
1350        pmv,
1351        lambda,
1352        use_satd,
1353        mvx_min,
1354        mvx_max,
1355        mvy_min,
1356        mvy_max,
1357        w,
1358        h,
1359        cand_mv,
1360        &mut tmp_region,
1361        ref_frame,
1362      );
1363
1364      if rd.cost < best_cand.rd.cost {
1365        best_cand.mv = cand_mv;
1366        best_cand.rd = rd;
1367      }
1368    }
1369
1370    // Continue the search at this scale until a better candidate isn't found.
1371    if current.rd.cost <= best_cand.rd.cost {
1372      if diamond_radius_log2 == diamond_radius_end_log2 {
1373        break;
1374      } else {
1375        diamond_radius_log2 -= 1;
1376      }
1377    } else {
1378      *current = best_cand;
1379    }
1380  }
1381
1382  assert!(!current.is_empty());
1383}
1384
1385#[inline]
1386fn get_fullpel_mv_rd<T: Pixel>(
1387  fi: &FrameInvariants<T>, po: PlaneOffset, org_region: &PlaneRegion<T>,
1388  p_ref: &Plane<T>, bit_depth: usize, pmv: [MotionVector; 2], lambda: u32,
1389  use_satd: bool, mvx_min: isize, mvx_max: isize, mvy_min: isize,
1390  mvy_max: isize, w: usize, h: usize, cand_mv: MotionVector,
1391) -> MVCandidateRD {
1392  if (cand_mv.col as isize) < mvx_min
1393    || (cand_mv.col as isize) > mvx_max
1394    || (cand_mv.row as isize) < mvy_min
1395    || (cand_mv.row as isize) > mvy_max
1396  {
1397    return MVCandidateRD::empty();
1398  }
1399
1400  // Convert the motion vector into an full pixel offset.
1401  let plane_ref = p_ref.region(Area::StartingAt {
1402    x: po.x + (cand_mv.col / 8) as isize,
1403    y: po.y + (cand_mv.row / 8) as isize,
1404  });
1405  compute_mv_rd(
1406    fi, pmv, lambda, use_satd, bit_depth, w, h, cand_mv, org_region,
1407    &plane_ref,
1408  )
1409}
1410
1411fn get_subpel_mv_rd<T: Pixel>(
1412  fi: &FrameInvariants<T>, po: PlaneOffset, org_region: &PlaneRegion<T>,
1413  bit_depth: usize, pmv: [MotionVector; 2], lambda: u32, use_satd: bool,
1414  mvx_min: isize, mvx_max: isize, mvy_min: isize, mvy_max: isize, w: usize,
1415  h: usize, cand_mv: MotionVector, tmp_region: &mut PlaneRegionMut<T>,
1416  ref_frame: RefType,
1417) -> MVCandidateRD {
1418  if (cand_mv.col as isize) < mvx_min
1419    || (cand_mv.col as isize) > mvx_max
1420    || (cand_mv.row as isize) < mvy_min
1421    || (cand_mv.row as isize) > mvy_max
1422  {
1423    return MVCandidateRD::empty();
1424  }
1425
1426  let tmp_width = tmp_region.rect().width;
1427  let tmp_height = tmp_region.rect().height;
1428  let tile_rect =
1429    TileRect { x: 0, y: 0, width: tmp_width, height: tmp_height };
1430
1431  PredictionMode::NEWMV.predict_inter_single(
1432    fi, tile_rect, 0, po, tmp_region,
1433    // motion comp's w & h on edges can be different than distortion's
1434    tmp_width, tmp_height, ref_frame, cand_mv,
1435  );
1436  let plane_ref = tmp_region.as_const();
1437  compute_mv_rd(
1438    fi, pmv, lambda, use_satd, bit_depth, w, h, cand_mv, org_region,
1439    &plane_ref,
1440  )
1441}
1442
1443/// Compute the rate distortion stats for a motion vector.
1444#[inline(always)]
1445fn compute_mv_rd<T: Pixel>(
1446  fi: &FrameInvariants<T>, pmv: [MotionVector; 2], lambda: u32,
1447  use_satd: bool, bit_depth: usize, w: usize, h: usize, cand_mv: MotionVector,
1448  plane_org: &PlaneRegion<'_, T>, plane_ref: &PlaneRegion<'_, T>,
1449) -> MVCandidateRD {
1450  let sad = if use_satd {
1451    get_satd(plane_org, plane_ref, w, h, bit_depth, fi.cpu_feature_level)
1452  } else {
1453    get_sad(plane_org, plane_ref, w, h, bit_depth, fi.cpu_feature_level)
1454  };
1455
1456  let rate1 = get_mv_rate(cand_mv, pmv[0], fi.allow_high_precision_mv);
1457  let rate2 = get_mv_rate(cand_mv, pmv[1], fi.allow_high_precision_mv);
1458  let rate = rate1.min(rate2 + 1);
1459
1460  MVCandidateRD { cost: 256 * sad as u64 + rate as u64 * lambda as u64, sad }
1461}
1462
1463#[profiling::function]
1464fn full_search<T: Pixel>(
1465  fi: &FrameInvariants<T>, x_lo: isize, x_hi: isize, y_lo: isize, y_hi: isize,
1466  w: usize, h: usize, org_region: &PlaneRegion<T>, p_ref: &Plane<T>,
1467  po: PlaneOffset, step: usize, lambda: u32, pmv: [MotionVector; 2],
1468) -> MotionSearchResult {
1469  let search_region = p_ref.region(Area::Rect {
1470    x: x_lo,
1471    y: y_lo,
1472    width: (x_hi - x_lo) as usize + w,
1473    height: (y_hi - y_lo) as usize + h,
1474  });
1475
1476  let mut best: MotionSearchResult = MotionSearchResult::empty();
1477
1478  // Select rectangular regions within search region with vert+horz windows
1479  for vert_window in search_region.vert_windows(h).step_by(step) {
1480    for ref_window in vert_window.horz_windows(w).step_by(step) {
1481      let &Rect { x, y, .. } = ref_window.rect();
1482
1483      let mv = MotionVector {
1484        row: 8 * (y as i16 - po.y as i16),
1485        col: 8 * (x as i16 - po.x as i16),
1486      };
1487
1488      let rd = compute_mv_rd(
1489        fi,
1490        pmv,
1491        lambda,
1492        false,
1493        fi.sequence.bit_depth,
1494        w,
1495        h,
1496        mv,
1497        org_region,
1498        &ref_window,
1499      );
1500
1501      if rd.cost < best.rd.cost {
1502        best.rd = rd;
1503        best.mv = mv;
1504      }
1505    }
1506  }
1507
1508  best
1509}
1510
1511#[inline(always)]
1512fn get_mv_rate(
1513  a: MotionVector, b: MotionVector, allow_high_precision_mv: bool,
1514) -> u32 {
1515  #[inline(always)]
1516  fn diff_to_rate(diff: i16, allow_high_precision_mv: bool) -> u32 {
1517    let d = if allow_high_precision_mv { diff } else { diff >> 1 };
1518    2 * ILog::ilog(d.abs()) as u32
1519  }
1520
1521  diff_to_rate(a.row - b.row, allow_high_precision_mv)
1522    + diff_to_rate(a.col - b.col, allow_high_precision_mv)
1523}