1use crate::api::InterConfig;
11use crate::context::{
12 BlockOffset, PlaneBlockOffset, SuperBlockOffset, TileBlockOffset,
13 TileSuperBlockOffset, MAX_SB_SIZE_LOG2, MIB_SIZE_LOG2, MI_SIZE,
14 MI_SIZE_LOG2, SB_SIZE,
15};
16use crate::dist::*;
17use crate::frame::*;
18use crate::mc::MotionVector;
19use crate::partition::*;
20use crate::predict::PredictionMode;
21use crate::tiling::*;
22use crate::util::ILog;
23use crate::util::{clamp, Pixel};
24use crate::FrameInvariants;
25
26use arrayvec::*;
27use std::ops::{Index, IndexMut};
28use std::sync::{Arc, RwLock, RwLockReadGuard, RwLockWriteGuard};
29
30#[derive(Debug, Copy, Clone, Default)]
31pub struct MEStats {
32 pub mv: MotionVector,
33 pub normalized_sad: u32,
35}
36
37#[derive(Debug, Clone)]
38pub struct FrameMEStats {
39 stats: Box<[MEStats]>,
40 pub cols: usize,
41 pub rows: usize,
42}
43
44pub type RefMEStats = Arc<RwLock<[FrameMEStats; REF_FRAMES]>>;
46pub type ReadGuardMEStats<'a> =
48 RwLockReadGuard<'a, [FrameMEStats; REF_FRAMES]>;
49pub type WriteGuardMEStats<'a> =
51 RwLockWriteGuard<'a, [FrameMEStats; REF_FRAMES]>;
52
53impl FrameMEStats {
54 #[inline]
55 pub fn rows_iter(&self) -> std::slice::ChunksExact<'_, MEStats> {
56 self.stats.chunks_exact(self.cols)
57 }
58
59 pub fn new(cols: usize, rows: usize) -> Self {
60 Self {
61 stats: vec![MEStats::default(); cols * rows].into_boxed_slice(),
63 cols,
64 rows,
65 }
66 }
67 pub fn new_arc_array(cols: usize, rows: usize) -> RefMEStats {
68 Arc::new(RwLock::new([
69 FrameMEStats::new(cols, rows),
70 FrameMEStats::new(cols, rows),
71 FrameMEStats::new(cols, rows),
72 FrameMEStats::new(cols, rows),
73 FrameMEStats::new(cols, rows),
74 FrameMEStats::new(cols, rows),
75 FrameMEStats::new(cols, rows),
76 FrameMEStats::new(cols, rows),
77 ]))
78 }
79}
80
81impl Index<usize> for FrameMEStats {
82 type Output = [MEStats];
83 #[inline]
84 fn index(&self, index: usize) -> &Self::Output {
85 &self.stats[index * self.cols..(index + 1) * self.cols]
86 }
87}
88
89impl IndexMut<usize> for FrameMEStats {
90 #[inline]
91 fn index_mut(&mut self, index: usize) -> &mut Self::Output {
92 &mut self.stats[index * self.cols..(index + 1) * self.cols]
93 }
94}
95
96#[derive(Debug, Copy, Clone)]
98pub struct MotionSearchResult {
99 pub mv: MotionVector,
101 pub rd: MVCandidateRD,
103}
104
105impl MotionSearchResult {
106 #[inline(always)]
112 pub fn empty() -> MotionSearchResult {
113 MotionSearchResult {
114 mv: MotionVector::default(),
115 rd: MVCandidateRD::empty(),
116 }
117 }
118
119 #[inline(always)]
121 const fn is_empty(&self) -> bool {
122 self.rd.cost == u64::MAX
123 }
124}
125
126#[derive(Debug, Copy, Clone)]
128pub struct MVCandidateRD {
129 pub cost: u64,
131 pub sad: u32,
133}
134
135impl MVCandidateRD {
136 #[inline(always)]
142 const fn empty() -> MVCandidateRD {
143 MVCandidateRD { sad: u32::MAX, cost: u64::MAX }
144 }
145}
146
147#[derive(Debug, Copy, Clone, Eq, PartialEq)]
148pub enum MVSamplingMode {
149 INIT,
150 CORNER { right: bool, bottom: bool },
151}
152
153pub fn estimate_tile_motion<T: Pixel>(
154 fi: &FrameInvariants<T>, ts: &mut TileStateMut<'_, T>,
155 inter_cfg: &InterConfig,
156) {
157 let init_size = MIB_SIZE_LOG2;
158
159 let mut prev_ssdec: Option<u8> = None;
160 for mv_size_in_b_log2 in (2..=init_size).rev() {
161 let init = mv_size_in_b_log2 == init_size;
162
163 let ssdec = match init_size - mv_size_in_b_log2 {
165 0 => 2,
166 1 => 1,
167 _ => 0,
168 };
169
170 let new_subsampling =
171 if let Some(prev) = prev_ssdec { prev != ssdec } else { false };
172 prev_ssdec = Some(ssdec);
173
174 let lambda = (fi.me_lambda * 256.0 / (1 << (2 * ssdec)) as f64
176 * if ssdec == 0 { 0.5 } else { 0.125 }) as u32;
177
178 for sby in 0..ts.sb_height {
179 for sbx in 0..ts.sb_width {
180 let mut tested_frames_flags = 0;
181 for &ref_frame in inter_cfg.allowed_ref_frames() {
182 let frame_flag = 1 << fi.ref_frames[ref_frame.to_index()];
183 if tested_frames_flags & frame_flag == frame_flag {
184 continue;
185 }
186 tested_frames_flags |= frame_flag;
187
188 let tile_bo =
189 TileSuperBlockOffset(SuperBlockOffset { x: sbx, y: sby })
190 .block_offset(0, 0);
191
192 if new_subsampling {
193 refine_subsampled_sb_motion(
194 fi,
195 ts,
196 ref_frame,
197 mv_size_in_b_log2 + 1,
198 tile_bo,
199 ssdec,
200 lambda,
201 );
202 }
203
204 estimate_sb_motion(
205 fi,
206 ts,
207 ref_frame,
208 mv_size_in_b_log2,
209 tile_bo,
210 init,
211 ssdec,
212 lambda,
213 );
214 }
215 }
216 }
217 }
218}
219
220fn estimate_sb_motion<T: Pixel>(
221 fi: &FrameInvariants<T>, ts: &mut TileStateMut<'_, T>, ref_frame: RefType,
222 mv_size_in_b_log2: usize, tile_bo: TileBlockOffset, init: bool, ssdec: u8,
223 lambda: u32,
224) {
225 let pix_offset = tile_bo.to_luma_plane_offset();
226 let sb_h: usize = SB_SIZE.min(ts.height - pix_offset.y as usize);
227 let sb_w: usize = SB_SIZE.min(ts.width - pix_offset.x as usize);
228
229 let mv_size = MI_SIZE << mv_size_in_b_log2;
230
231 for y in (0..sb_h).step_by(mv_size) {
233 for x in (0..sb_w).step_by(mv_size) {
234 let corner: MVSamplingMode = if init {
235 MVSamplingMode::INIT
236 } else {
237 MVSamplingMode::CORNER {
240 right: x & mv_size == mv_size,
241 bottom: y & mv_size == mv_size,
242 }
243 };
244
245 let sub_bo = tile_bo
246 .with_offset(x as isize >> MI_SIZE_LOG2, y as isize >> MI_SIZE_LOG2);
247
248 let w = mv_size.min(sb_w - x + (1 << ssdec) - 1) >> ssdec;
251 let h = mv_size.min(sb_h - y + (1 << ssdec) - 1) >> ssdec;
252
253 if let Some(results) = estimate_motion(
257 fi,
258 ts,
259 w,
260 h,
261 sub_bo,
262 ref_frame,
263 None,
264 corner,
265 init,
266 ssdec,
267 Some(lambda),
268 ) {
269 let sad = (((results.rd.sad as u64) << (MAX_SB_SIZE_LOG2 * 2))
271 / (w * h) as u64) as u32;
272 save_me_stats(
273 ts,
274 mv_size_in_b_log2,
275 sub_bo,
276 ref_frame,
277 MEStats { mv: results.mv, normalized_sad: sad },
278 );
279 }
280 }
281 }
282}
283
284fn refine_subsampled_sb_motion<T: Pixel>(
285 fi: &FrameInvariants<T>, ts: &mut TileStateMut<'_, T>, ref_frame: RefType,
286 mv_size_in_b_log2: usize, tile_bo: TileBlockOffset, ssdec: u8, lambda: u32,
287) {
288 let pix_offset = tile_bo.to_luma_plane_offset();
289 let sb_h: usize = SB_SIZE.min(ts.height - pix_offset.y as usize);
290 let sb_w: usize = SB_SIZE.min(ts.width - pix_offset.x as usize);
291
292 let mv_size = MI_SIZE << mv_size_in_b_log2;
293
294 for y in (0..sb_h).step_by(mv_size) {
296 for x in (0..sb_w).step_by(mv_size) {
297 let sub_bo = tile_bo
298 .with_offset(x as isize >> MI_SIZE_LOG2, y as isize >> MI_SIZE_LOG2);
299
300 let w = mv_size.min(sb_w - x + (1 << ssdec) - 1) >> ssdec;
303 let h = mv_size.min(sb_h - y + (1 << ssdec) - 1) >> ssdec;
304
305 if let Some(results) = refine_subsampled_motion_estimate(
307 fi, ts, w, h, sub_bo, ref_frame, ssdec, lambda,
308 ) {
309 let sad = (((results.rd.sad as u64) << (MAX_SB_SIZE_LOG2 * 2))
311 / (w * h) as u64) as u32;
312 save_me_stats(
313 ts,
314 mv_size_in_b_log2,
315 sub_bo,
316 ref_frame,
317 MEStats { mv: results.mv, normalized_sad: sad },
318 );
319 }
320 }
321 }
322}
323
324fn save_me_stats<T: Pixel>(
325 ts: &mut TileStateMut<'_, T>, mv_size_in_b_log2: usize,
326 tile_bo: TileBlockOffset, ref_frame: RefType, stats: MEStats,
327) {
328 let size_in_b = 1 << mv_size_in_b_log2;
329 let tile_me_stats = &mut ts.me_stats[ref_frame.to_index()];
330 let tile_bo_x_end = (tile_bo.0.x + size_in_b).min(ts.mi_width);
331 let tile_bo_y_end = (tile_bo.0.y + size_in_b).min(ts.mi_height);
332 for mi_y in tile_bo.0.y..tile_bo_y_end {
333 for a in tile_me_stats[mi_y][tile_bo.0.x..tile_bo_x_end].iter_mut() {
334 *a = stats;
335 }
336 }
337}
338
339fn get_mv_range(
340 w_in_b: usize, h_in_b: usize, bo: PlaneBlockOffset, blk_w: usize,
341 blk_h: usize,
342) -> (isize, isize, isize, isize) {
343 let border_w = 128 + blk_w as isize * 8;
344 let border_h = 128 + blk_h as isize * 8;
345 let mvx_min = -(bo.0.x as isize) * (8 * MI_SIZE) as isize - border_w;
346 let mvx_max = ((w_in_b - bo.0.x) as isize - (blk_w / MI_SIZE) as isize)
347 * (8 * MI_SIZE) as isize
348 + border_w;
349 let mvy_min = -(bo.0.y as isize) * (8 * MI_SIZE) as isize - border_h;
350 let mvy_max = ((h_in_b - bo.0.y) as isize - (blk_h / MI_SIZE) as isize)
351 * (8 * MI_SIZE) as isize
352 + border_h;
353
354 use crate::context::{MV_LOW, MV_UPP};
356 (
357 mvx_min.max(MV_LOW as isize + 1),
358 mvx_max.min(MV_UPP as isize - 1),
359 mvy_min.max(MV_LOW as isize + 1),
360 mvy_max.min(MV_UPP as isize - 1),
361 )
362}
363
364struct MotionEstimationSubsets {
365 min_sad: u32,
366 median: Option<MotionVector>,
367 subset_b: ArrayVec<MotionVector, 5>,
368 subset_c: ArrayVec<MotionVector, 5>,
369}
370
371impl MotionEstimationSubsets {
372 fn all_mvs(&self) -> ArrayVec<MotionVector, 11> {
373 let mut all = ArrayVec::new();
374 if let Some(median) = self.median {
375 all.push(median);
376 }
377
378 all.extend(self.subset_b.iter().copied());
379 all.extend(self.subset_c.iter().copied());
380
381 all
382 }
383}
384
385#[profiling::function]
386fn get_subset_predictors(
387 tile_bo: TileBlockOffset, tile_me_stats: &TileMEStats<'_>,
388 frame_ref_opt: Option<ReadGuardMEStats<'_>>, ref_frame_id: usize,
389 pix_w: usize, pix_h: usize, mvx_min: isize, mvx_max: isize, mvy_min: isize,
390 mvy_max: isize, corner: MVSamplingMode, ssdec: u8,
391) -> MotionEstimationSubsets {
392 let mut min_sad: u32 = u32::MAX;
393 let mut subset_b = ArrayVec::<MotionVector, 5>::new();
394 let mut subset_c = ArrayVec::<MotionVector, 5>::new();
395
396 let w = ((pix_w << ssdec) + MI_SIZE - 1) >> MI_SIZE_LOG2;
398 let h = ((pix_h << ssdec) + MI_SIZE - 1) >> MI_SIZE_LOG2;
399
400 let clipped_half_w = (w >> 1).min(tile_me_stats.cols() - 1 - tile_bo.0.x);
403 let clipped_half_h = (h >> 1).min(tile_me_stats.rows() - 1 - tile_bo.0.y);
404
405 let mut process_cand = |stats: MEStats| -> MotionVector {
406 min_sad = min_sad.min(stats.normalized_sad);
407 let mv = stats.mv.quantize_to_fullpel();
408 MotionVector {
409 col: clamp(mv.col as isize, mvx_min, mvx_max) as i16,
410 row: clamp(mv.row as isize, mvy_min, mvy_max) as i16,
411 }
412 };
413
414 if tile_bo.0.x > 0 {
421 subset_b.push(process_cand(
422 tile_me_stats[tile_bo.0.y + clipped_half_h][tile_bo.0.x - 1],
423 ));
424 }
425 if tile_bo.0.y > 0 {
427 subset_b.push(process_cand(
428 tile_me_stats[tile_bo.0.y - 1][tile_bo.0.x + clipped_half_w],
429 ));
430 }
431
432 if let MVSamplingMode::CORNER { right: true, bottom: _ } = corner {
438 if tile_bo.0.x + w < tile_me_stats.cols() {
439 subset_b.push(process_cand(
440 tile_me_stats[tile_bo.0.y + clipped_half_h][tile_bo.0.x + w],
441 ));
442 }
443 }
444 if let MVSamplingMode::CORNER { right: _, bottom: true } = corner {
446 if tile_bo.0.y + h < tile_me_stats.rows() {
447 subset_b.push(process_cand(
448 tile_me_stats[tile_bo.0.y + h][tile_bo.0.x + clipped_half_w],
449 ));
450 }
451 }
452
453 let median = if corner != MVSamplingMode::INIT {
454 Some(process_cand(
456 tile_me_stats[tile_bo.0.y + clipped_half_h]
457 [tile_bo.0.x + clipped_half_w],
458 ))
459 } else if subset_b.len() != 3 {
460 None
461 } else {
462 let mut rows: ArrayVec<i16, 3> = subset_b.iter().map(|&a| a.row).collect();
463 let mut cols: ArrayVec<i16, 3> = subset_b.iter().map(|&a| a.col).collect();
464 rows.as_mut_slice().sort_unstable();
465 cols.as_mut_slice().sort_unstable();
466 Some(MotionVector { row: rows[1], col: cols[1] })
467 };
468
469 subset_b.push(MotionVector::default());
471
472 if let Some(frame_me_stats) = frame_ref_opt {
478 let prev_frame = &frame_me_stats[ref_frame_id];
479
480 let frame_bo = PlaneBlockOffset(BlockOffset {
481 x: tile_me_stats.x() + tile_bo.0.x,
482 y: tile_me_stats.y() + tile_bo.0.y,
483 });
484 let clipped_half_w = (w >> 1).min(prev_frame.cols - 1 - frame_bo.0.x);
485 let clipped_half_h = (h >> 1).min(prev_frame.rows - 1 - frame_bo.0.y);
486
487 if frame_bo.0.x > 0 {
489 subset_c.push(process_cand(
490 prev_frame[frame_bo.0.y + clipped_half_h][frame_bo.0.x - 1],
491 ));
492 }
493 if frame_bo.0.y > 0 {
495 subset_c.push(process_cand(
496 prev_frame[frame_bo.0.y - 1][frame_bo.0.x + clipped_half_w],
497 ));
498 }
499 if frame_bo.0.x + w < prev_frame.cols {
501 subset_c.push(process_cand(
502 prev_frame[frame_bo.0.y + clipped_half_h][frame_bo.0.x + w],
503 ));
504 }
505 if frame_bo.0.y + h < prev_frame.rows {
507 subset_c.push(process_cand(
508 prev_frame[frame_bo.0.y + h][frame_bo.0.x + clipped_half_w],
509 ));
510 }
511
512 subset_c.push(process_cand(
513 prev_frame[frame_bo.0.y + clipped_half_h][frame_bo.0.x + clipped_half_w],
514 ));
515 }
516
517 let min_sad = ((min_sad as u64 * (pix_w * pix_h) as u64)
519 >> (MAX_SB_SIZE_LOG2 * 2)) as u32;
520
521 let dec_mv = |mv: MotionVector| MotionVector {
522 col: mv.col >> ssdec,
523 row: mv.row >> ssdec,
524 };
525 let median = median.map(dec_mv);
526 for mv in subset_b.iter_mut() {
527 *mv = dec_mv(*mv);
528 }
529 for mv in subset_c.iter_mut() {
530 *mv = dec_mv(*mv);
531 }
532
533 MotionEstimationSubsets { min_sad, median, subset_b, subset_c }
534}
535
536pub fn estimate_motion<T: Pixel>(
537 fi: &FrameInvariants<T>, ts: &TileStateMut<'_, T>, w: usize, h: usize,
538 tile_bo: TileBlockOffset, ref_frame: RefType,
539 pmv: Option<[MotionVector; 2]>, corner: MVSamplingMode,
540 extensive_search: bool, ssdec: u8, lambda: Option<u32>,
541) -> Option<MotionSearchResult> {
542 if let Some(ref rec) =
543 fi.rec_buffer.frames[fi.ref_frames[ref_frame.to_index()] as usize]
544 {
545 let frame_bo = ts.to_frame_block_offset(tile_bo);
546 let (mvx_min, mvx_max, mvy_min, mvy_max) =
547 get_mv_range(fi.w_in_b, fi.h_in_b, frame_bo, w << ssdec, h << ssdec);
548
549 let lambda = lambda.unwrap_or({
550 (fi.me_lambda * 256.0 * 0.5) as u32
552 });
553
554 let global_mv = [MotionVector { row: 0, col: 0 }; 2];
555
556 let po = frame_bo.to_luma_plane_offset();
557 let (mvx_min, mvx_max, mvy_min, mvy_max) =
558 (mvx_min >> ssdec, mvx_max >> ssdec, mvy_min >> ssdec, mvy_max >> ssdec);
559 let po = PlaneOffset { x: po.x >> ssdec, y: po.y >> ssdec };
560 let p_ref = match ssdec {
561 0 => &rec.frame.planes[0],
562 1 => &rec.input_hres,
563 2 => &rec.input_qres,
564 _ => unimplemented!(),
565 };
566
567 let org_region = &match ssdec {
568 0 => ts.input_tile.planes[0]
569 .subregion(Area::BlockStartingAt { bo: tile_bo.0 }),
570 1 => ts.input_hres.region(Area::StartingAt { x: po.x, y: po.y }),
571 2 => ts.input_qres.region(Area::StartingAt { x: po.x, y: po.y }),
572 _ => unimplemented!(),
573 };
574
575 let mut best: MotionSearchResult = full_pixel_me(
576 fi,
577 ts,
578 org_region,
579 p_ref,
580 tile_bo,
581 po,
582 lambda,
583 pmv.unwrap_or(global_mv),
584 w,
585 h,
586 mvx_min,
587 mvx_max,
588 mvy_min,
589 mvy_max,
590 ref_frame,
591 corner,
592 extensive_search,
593 ssdec,
594 );
595
596 if let Some(pmv) = pmv {
597 let use_satd: bool = fi.config.speed_settings.motion.use_satd_subpel;
598 if use_satd {
599 best.rd = get_fullpel_mv_rd(
600 fi,
601 po,
602 org_region,
603 p_ref,
604 fi.sequence.bit_depth,
605 pmv,
606 lambda,
607 use_satd,
608 mvx_min,
609 mvx_max,
610 mvy_min,
611 mvy_max,
612 w,
613 h,
614 best.mv,
615 );
616 }
617
618 sub_pixel_me(
619 fi, po, org_region, p_ref, lambda, pmv, mvx_min, mvx_max, mvy_min,
620 mvy_max, w, h, use_satd, &mut best, ref_frame,
621 );
622 }
623
624 best.mv = best.mv << ssdec;
626
627 Some(best)
628 } else {
629 None
630 }
631}
632
633fn refine_subsampled_motion_estimate<T: Pixel>(
635 fi: &FrameInvariants<T>, ts: &TileStateMut<'_, T>, w: usize, h: usize,
636 tile_bo: TileBlockOffset, ref_frame: RefType, ssdec: u8, lambda: u32,
637) -> Option<MotionSearchResult> {
638 if let Some(ref rec) =
639 fi.rec_buffer.frames[fi.ref_frames[ref_frame.to_index()] as usize]
640 {
641 let frame_bo = ts.to_frame_block_offset(tile_bo);
642 let (mvx_min, mvx_max, mvy_min, mvy_max) =
643 get_mv_range(fi.w_in_b, fi.h_in_b, frame_bo, w << ssdec, h << ssdec);
644
645 let pmv = [MotionVector { row: 0, col: 0 }; 2];
646
647 let po = frame_bo.to_luma_plane_offset();
648 let (mvx_min, mvx_max, mvy_min, mvy_max) =
649 (mvx_min >> ssdec, mvx_max >> ssdec, mvy_min >> ssdec, mvy_max >> ssdec);
650 let po = PlaneOffset { x: po.x >> ssdec, y: po.y >> ssdec };
651 let p_ref = match ssdec {
652 0 => &rec.frame.planes[0],
653 1 => &rec.input_hres,
654 2 => &rec.input_qres,
655 _ => unimplemented!(),
656 };
657
658 let org_region = &match ssdec {
659 0 => ts.input_tile.planes[0]
660 .subregion(Area::BlockStartingAt { bo: tile_bo.0 }),
661 1 => ts.input_hres.region(Area::StartingAt { x: po.x, y: po.y }),
662 2 => ts.input_qres.region(Area::StartingAt { x: po.x, y: po.y }),
663 _ => unimplemented!(),
664 };
665
666 let mv =
667 ts.me_stats[ref_frame.to_index()][tile_bo.0.y][tile_bo.0.x].mv >> ssdec;
668
669 let x_lo = po.x + (mv.col as isize / 8 - 1).max(mvx_min / 8);
676 let x_hi = po.x + (mv.col as isize / 8 + 2).min(mvx_max / 8);
677 let y_lo = po.y + (mv.row as isize / 8 - 1).max(mvy_min / 8);
678 let y_hi = po.y + (mv.row as isize / 8 + 2).min(mvy_max / 8);
679 let mut results = full_search(
680 fi, x_lo, x_hi, y_lo, y_hi, w, h, org_region, p_ref, po, 1, lambda, pmv,
681 );
682
683 results.mv = results.mv << ssdec;
685
686 Some(results)
687 } else {
688 None
689 }
690}
691
692#[profiling::function]
693fn full_pixel_me<T: Pixel>(
694 fi: &FrameInvariants<T>, ts: &TileStateMut<'_, T>,
695 org_region: &PlaneRegion<T>, p_ref: &Plane<T>, tile_bo: TileBlockOffset,
696 po: PlaneOffset, lambda: u32, pmv: [MotionVector; 2], w: usize, h: usize,
697 mvx_min: isize, mvx_max: isize, mvy_min: isize, mvy_max: isize,
698 ref_frame: RefType, corner: MVSamplingMode, extensive_search: bool,
699 ssdec: u8,
700) -> MotionSearchResult {
701 let ref_frame_id = ref_frame.to_index();
702 let tile_me_stats = &ts.me_stats[ref_frame_id].as_const();
703 let frame_ref = fi.rec_buffer.frames[fi.ref_frames[0] as usize]
704 .as_ref()
705 .map(|frame_ref| frame_ref.frame_me_stats.read().expect("poisoned lock"));
706 let subsets = get_subset_predictors(
707 tile_bo,
708 tile_me_stats,
709 frame_ref,
710 ref_frame_id,
711 w,
712 h,
713 mvx_min,
714 mvx_max,
715 mvy_min,
716 mvy_max,
717 corner,
718 ssdec,
719 );
720
721 let try_cands = |predictors: &[MotionVector],
722 best: &mut MotionSearchResult| {
723 let mut results = get_best_predictor(
724 fi,
725 po,
726 org_region,
727 p_ref,
728 predictors,
729 fi.sequence.bit_depth,
730 pmv,
731 lambda,
732 mvx_min,
733 mvx_max,
734 mvy_min,
735 mvy_max,
736 w,
737 h,
738 );
739 fullpel_diamond_search(
740 fi,
741 po,
742 org_region,
743 p_ref,
744 &mut results,
745 fi.sequence.bit_depth,
746 pmv,
747 lambda,
748 mvx_min,
749 mvx_max,
750 mvy_min,
751 mvy_max,
752 w,
753 h,
754 );
755
756 if results.rd.cost < best.rd.cost {
757 *best = results;
758 }
759 };
760
761 let mut best: MotionSearchResult = MotionSearchResult::empty();
762 if !extensive_search {
763 try_cands(&subsets.all_mvs(), &mut best);
764 best
765 } else {
766 let thresh = (subsets.min_sad as f32 * 1.2) as u32
772 + (((w * h) as u32) << (fi.sequence.bit_depth - 8));
773
774 if let Some(median) = subsets.median {
775 try_cands(&[median], &mut best);
776
777 if best.rd.sad < thresh {
778 return best;
779 }
780 }
781
782 try_cands(&subsets.subset_b, &mut best);
783
784 if best.rd.sad < thresh {
785 return best;
786 }
787
788 try_cands(&subsets.subset_c, &mut best);
789
790 if best.rd.sad < thresh {
791 return best;
792 }
793
794 uneven_multi_hex_search(
797 fi,
798 po,
799 org_region,
800 p_ref,
801 &mut best,
802 fi.sequence.bit_depth,
803 pmv,
804 lambda,
805 mvx_min,
806 mvx_max,
807 mvy_min,
808 mvy_max,
809 w,
810 h,
811 24,
813 );
814
815 if !fi.config.speed_settings.motion.me_allow_full_search
816 || best.rd.sad < thresh
817 {
818 return best;
819 }
820
821 {
822 let range_x = (192 * fi.me_range_scale as isize) >> ssdec;
823 let range_y = (64 * fi.me_range_scale as isize) >> ssdec;
824 let x_lo = po.x + (-range_x).max(mvx_min / 8);
825 let x_hi = po.x + (range_x).min(mvx_max / 8);
826 let y_lo = po.y + (-range_y).max(mvy_min / 8);
827 let y_hi = po.y + (range_y).min(mvy_max / 8);
828
829 let results = full_search(
830 fi,
831 x_lo,
832 x_hi,
833 y_lo,
834 y_hi,
835 w,
836 h,
837 org_region,
838 p_ref,
839 po,
840 4 >> ssdec,
844 lambda,
845 [MotionVector::default(); 2],
846 );
847
848 if results.rd.cost < best.rd.cost {
849 results
850 } else {
851 best
852 }
853 }
854 }
855}
856
857fn sub_pixel_me<T: Pixel>(
858 fi: &FrameInvariants<T>, po: PlaneOffset, org_region: &PlaneRegion<T>,
859 p_ref: &Plane<T>, lambda: u32, pmv: [MotionVector; 2], mvx_min: isize,
860 mvx_max: isize, mvy_min: isize, mvy_max: isize, w: usize, h: usize,
861 use_satd: bool, best: &mut MotionSearchResult, ref_frame: RefType,
862) {
863 subpel_diamond_search(
864 fi,
865 po,
866 org_region,
867 p_ref,
868 fi.sequence.bit_depth,
869 pmv,
870 lambda,
871 mvx_min,
872 mvx_max,
873 mvy_min,
874 mvy_max,
875 w,
876 h,
877 use_satd,
878 best,
879 ref_frame,
880 );
881}
882
883#[profiling::function]
884fn get_best_predictor<T: Pixel>(
885 fi: &FrameInvariants<T>, po: PlaneOffset, org_region: &PlaneRegion<T>,
886 p_ref: &Plane<T>, predictors: &[MotionVector], bit_depth: usize,
887 pmv: [MotionVector; 2], lambda: u32, mvx_min: isize, mvx_max: isize,
888 mvy_min: isize, mvy_max: isize, w: usize, h: usize,
889) -> MotionSearchResult {
890 let mut best: MotionSearchResult = MotionSearchResult::empty();
891
892 for &init_mv in predictors.iter() {
893 let rd = get_fullpel_mv_rd(
894 fi, po, org_region, p_ref, bit_depth, pmv, lambda, false, mvx_min,
895 mvx_max, mvy_min, mvy_max, w, h, init_mv,
896 );
897
898 if rd.cost < best.rd.cost {
899 best.mv = init_mv;
900 best.rd = rd;
901 }
902 }
903
904 best
905}
906
907macro_rules! search_pattern {
911 ($field_a:ident: [$($ll_a:expr),*], $field_b:ident: [$($ll_b:expr),*]) => {
912 [ $(MotionVector { $field_a: $ll_a << 3, $field_b: $ll_b << 3 } ),*]
913 };
914}
915
916macro_rules! search_pattern_subpel {
918 ($field_a:ident: [$($ll_a:expr),*], $field_b:ident: [$($ll_b:expr),*]) => {
919 [ $(MotionVector { $field_a: $ll_a, $field_b: $ll_b } ),*]
920 };
921}
922
923const DIAMOND_R1_PATTERN_SUBPEL: [MotionVector; 4] = search_pattern_subpel!(
933 col: [ 0, 1, 0, -1],
934 row: [ 1, 0, -1, 0]
935);
936const DIAMOND_R1_PATTERN: [MotionVector; 4] = search_pattern!(
945 col: [ 0, 1, 0, -1],
946 row: [ 1, 0, -1, 0]
947);
948
949#[profiling::function]
955fn fullpel_diamond_search<T: Pixel>(
956 fi: &FrameInvariants<T>, po: PlaneOffset, org_region: &PlaneRegion<T>,
957 p_ref: &Plane<T>, current: &mut MotionSearchResult, bit_depth: usize,
958 pmv: [MotionVector; 2], lambda: u32, mvx_min: isize, mvx_max: isize,
959 mvy_min: isize, mvy_max: isize, w: usize, h: usize,
960) {
961 let (mut diamond_radius_log2, diamond_radius_end_log2) = (1u8, 0u8);
963
964 loop {
965 let mut best_cand: MotionSearchResult = MotionSearchResult::empty();
967 for &offset in &DIAMOND_R1_PATTERN {
968 let cand_mv = current.mv + (offset << diamond_radius_log2);
969 let rd = get_fullpel_mv_rd(
970 fi, po, org_region, p_ref, bit_depth, pmv, lambda, false, mvx_min,
971 mvx_max, mvy_min, mvy_max, w, h, cand_mv,
972 );
973
974 if rd.cost < best_cand.rd.cost {
975 best_cand.mv = cand_mv;
976 best_cand.rd = rd;
977 }
978 }
979
980 if current.rd.cost <= best_cand.rd.cost {
983 if diamond_radius_log2 == diamond_radius_end_log2 {
984 break;
985 } else {
986 diamond_radius_log2 -= 1;
987 }
988 } else {
989 *current = best_cand;
990 }
991 }
992
993 assert!(!current.is_empty());
994}
995
996const HEXAGON_PATTERN: [MotionVector; 6] = search_pattern!(
1023 col: [ 0, 2, 2, 0, -2, -2],
1024 row: [ -2, -1, 1, 2, 1, -1]
1025);
1026
1027const SQUARE_REFINE_PATTERN: [MotionVector; 8] = search_pattern!(
1036 col: [ -1, 0, 1, -1, 1, -1, 0, 1],
1037 row: [ 1, 1, 1, 0, 0, -1, -1, -1]
1038);
1039
1040#[profiling::function]
1055fn hexagon_search<T: Pixel>(
1056 fi: &FrameInvariants<T>, po: PlaneOffset, org_region: &PlaneRegion<T>,
1057 p_ref: &Plane<T>, current: &mut MotionSearchResult, bit_depth: usize,
1058 pmv: [MotionVector; 2], lambda: u32, mvx_min: isize, mvx_max: isize,
1059 mvy_min: isize, mvy_max: isize, w: usize, h: usize,
1060) {
1061 let mut best_cand_idx: usize = 0;
1068 let mut best_cand: MotionSearchResult = MotionSearchResult::empty();
1069
1070 for i in 0..6 {
1072 let cand_mv = current.mv + HEXAGON_PATTERN[i];
1073 let rd = get_fullpel_mv_rd(
1074 fi, po, org_region, p_ref, bit_depth, pmv, lambda, false, mvx_min,
1075 mvx_max, mvy_min, mvy_max, w, h, cand_mv,
1076 );
1077
1078 if rd.cost < best_cand.rd.cost {
1079 best_cand_idx = i;
1080 best_cand.mv = cand_mv;
1081 best_cand.rd = rd;
1082 }
1083 }
1084
1085 while best_cand.rd.cost < current.rd.cost {
1088 *current = best_cand;
1090 best_cand = MotionSearchResult::empty();
1091
1092 let center_cand_idx = best_cand_idx;
1095
1096 for idx_offset_mod6 in 5..=7 {
1102 let i = (center_cand_idx + idx_offset_mod6) % 6;
1103 let cand_mv = current.mv + HEXAGON_PATTERN[i];
1104
1105 let rd = get_fullpel_mv_rd(
1106 fi, po, org_region, p_ref, bit_depth, pmv, lambda, false, mvx_min,
1107 mvx_max, mvy_min, mvy_max, w, h, cand_mv,
1108 );
1109
1110 if rd.cost < best_cand.rd.cost {
1111 best_cand_idx = i;
1112 best_cand.mv = cand_mv;
1113 best_cand.rd = rd;
1114 }
1115 }
1116 }
1117
1118 let mut best_cand: MotionSearchResult = MotionSearchResult::empty();
1120 for &offset in &SQUARE_REFINE_PATTERN {
1121 let cand_mv = current.mv + offset;
1122 let rd = get_fullpel_mv_rd(
1123 fi, po, org_region, p_ref, bit_depth, pmv, lambda, false, mvx_min,
1124 mvx_max, mvy_min, mvy_max, w, h, cand_mv,
1125 );
1126
1127 if rd.cost < best_cand.rd.cost {
1128 best_cand.mv = cand_mv;
1129 best_cand.rd = rd;
1130 }
1131 }
1132 if best_cand.rd.cost < current.rd.cost {
1133 *current = best_cand;
1134 }
1135
1136 assert!(!current.is_empty());
1137}
1138
1139const UMH_PATTERN: [MotionVector; 16] = search_pattern!(
1154 col: [ -2, -1, 0, 1, 2, 3, 4, 3, 2, 1, 0, -1, -2, 3, -4, -3],
1155 row: [ 4, 4, 4, 4, 4, 2, 0, -2, -4, -4, -4, -4, -4, -2, 0, 2]
1156);
1157
1158#[profiling::function]
1170fn uneven_multi_hex_search<T: Pixel>(
1171 fi: &FrameInvariants<T>, po: PlaneOffset, org_region: &PlaneRegion<T>,
1172 p_ref: &Plane<T>, current: &mut MotionSearchResult, bit_depth: usize,
1173 pmv: [MotionVector; 2], lambda: u32, mvx_min: isize, mvx_max: isize,
1174 mvy_min: isize, mvy_max: isize, w: usize, h: usize, me_range: i16,
1175) {
1176 assert!(!current.is_empty());
1177
1178 let center = current.mv;
1193
1194 for i in (1..=me_range).step_by(2) {
1196 const HORIZONTAL_LINE: [MotionVector; 2] = search_pattern!(
1197 col: [ 0, 0],
1198 row: [-1, 1]
1199 );
1200
1201 for &offset in &HORIZONTAL_LINE {
1202 let cand_mv = center + offset * i;
1203 let rd = get_fullpel_mv_rd(
1204 fi, po, org_region, p_ref, bit_depth, pmv, lambda, false, mvx_min,
1205 mvx_max, mvy_min, mvy_max, w, h, cand_mv,
1206 );
1207
1208 if rd.cost < current.rd.cost {
1209 current.mv = cand_mv;
1210 current.rd = rd;
1211 }
1212 }
1213 }
1214
1215 for i in (1..=me_range >> 1).step_by(2) {
1217 const VERTICAL_LINE: [MotionVector; 2] = search_pattern!(
1218 col: [-1, 1],
1219 row: [ 0, 0]
1220 );
1221
1222 for &offset in &VERTICAL_LINE {
1223 let cand_mv = center + offset * i;
1224 let rd = get_fullpel_mv_rd(
1225 fi, po, org_region, p_ref, bit_depth, pmv, lambda, false, mvx_min,
1226 mvx_max, mvy_min, mvy_max, w, h, cand_mv,
1227 );
1228
1229 if rd.cost < current.rd.cost {
1230 current.mv = cand_mv;
1231 current.rd = rd;
1232 }
1233 }
1234 }
1235
1236 let center = current.mv;
1238 for row in -2..=2 {
1239 for col in -2..=2 {
1240 if row == 0 && col == 0 {
1241 continue;
1242 }
1243 let cand_mv = center + MotionVector { row, col };
1244 let rd = get_fullpel_mv_rd(
1245 fi, po, org_region, p_ref, bit_depth, pmv, lambda, false, mvx_min,
1246 mvx_max, mvy_min, mvy_max, w, h, cand_mv,
1247 );
1248
1249 if rd.cost < current.rd.cost {
1250 current.mv = cand_mv;
1251 current.rd = rd;
1252 }
1253 }
1254 }
1255
1256 let center = current.mv;
1280
1281 let iterations = me_range >> 2;
1283 for i in 1..=iterations {
1284 for &offset in &UMH_PATTERN {
1285 let cand_mv = center + offset * i;
1286 let rd = get_fullpel_mv_rd(
1287 fi, po, org_region, p_ref, bit_depth, pmv, lambda, false, mvx_min,
1288 mvx_max, mvy_min, mvy_max, w, h, cand_mv,
1289 );
1290
1291 if rd.cost < current.rd.cost {
1292 current.mv = cand_mv;
1293 current.rd = rd;
1294 }
1295 }
1296 }
1297
1298 hexagon_search(
1300 fi, po, org_region, p_ref, current, bit_depth, pmv, lambda, mvx_min,
1301 mvx_max, mvy_min, mvy_max, w, h,
1302 );
1303}
1304
1305#[profiling::function]
1311fn subpel_diamond_search<T: Pixel>(
1312 fi: &FrameInvariants<T>, po: PlaneOffset, org_region: &PlaneRegion<T>,
1313 _p_ref: &Plane<T>, bit_depth: usize, pmv: [MotionVector; 2], lambda: u32,
1314 mvx_min: isize, mvx_max: isize, mvy_min: isize, mvy_max: isize, w: usize,
1315 h: usize, use_satd: bool, current: &mut MotionSearchResult,
1316 ref_frame: RefType,
1317) {
1318 use crate::util::Aligned;
1319
1320 let mc_w = w.next_power_of_two();
1322 let mc_h = (h + 1) & !1;
1323
1324 let cfg = PlaneConfig::new(mc_w, mc_h, 0, 0, 0, 0, std::mem::size_of::<T>());
1326 let mut buf: Aligned<[T; 128 * 128]> = unsafe { Aligned::uninitialized() };
1329 let mut tmp_region = PlaneRegionMut::from_slice(
1330 &mut buf.data,
1331 &cfg,
1332 Rect { x: 0, y: 0, width: cfg.width, height: cfg.height },
1333 );
1334
1335 let (mut diamond_radius_log2, diamond_radius_end_log2) =
1337 (2u8, u8::from(!fi.allow_high_precision_mv));
1338
1339 loop {
1340 let mut best_cand: MotionSearchResult = MotionSearchResult::empty();
1342 for &offset in &DIAMOND_R1_PATTERN_SUBPEL {
1343 let cand_mv = current.mv + (offset << diamond_radius_log2);
1344
1345 let rd = get_subpel_mv_rd(
1346 fi,
1347 po,
1348 org_region,
1349 bit_depth,
1350 pmv,
1351 lambda,
1352 use_satd,
1353 mvx_min,
1354 mvx_max,
1355 mvy_min,
1356 mvy_max,
1357 w,
1358 h,
1359 cand_mv,
1360 &mut tmp_region,
1361 ref_frame,
1362 );
1363
1364 if rd.cost < best_cand.rd.cost {
1365 best_cand.mv = cand_mv;
1366 best_cand.rd = rd;
1367 }
1368 }
1369
1370 if current.rd.cost <= best_cand.rd.cost {
1372 if diamond_radius_log2 == diamond_radius_end_log2 {
1373 break;
1374 } else {
1375 diamond_radius_log2 -= 1;
1376 }
1377 } else {
1378 *current = best_cand;
1379 }
1380 }
1381
1382 assert!(!current.is_empty());
1383}
1384
1385#[inline]
1386fn get_fullpel_mv_rd<T: Pixel>(
1387 fi: &FrameInvariants<T>, po: PlaneOffset, org_region: &PlaneRegion<T>,
1388 p_ref: &Plane<T>, bit_depth: usize, pmv: [MotionVector; 2], lambda: u32,
1389 use_satd: bool, mvx_min: isize, mvx_max: isize, mvy_min: isize,
1390 mvy_max: isize, w: usize, h: usize, cand_mv: MotionVector,
1391) -> MVCandidateRD {
1392 if (cand_mv.col as isize) < mvx_min
1393 || (cand_mv.col as isize) > mvx_max
1394 || (cand_mv.row as isize) < mvy_min
1395 || (cand_mv.row as isize) > mvy_max
1396 {
1397 return MVCandidateRD::empty();
1398 }
1399
1400 let plane_ref = p_ref.region(Area::StartingAt {
1402 x: po.x + (cand_mv.col / 8) as isize,
1403 y: po.y + (cand_mv.row / 8) as isize,
1404 });
1405 compute_mv_rd(
1406 fi, pmv, lambda, use_satd, bit_depth, w, h, cand_mv, org_region,
1407 &plane_ref,
1408 )
1409}
1410
1411fn get_subpel_mv_rd<T: Pixel>(
1412 fi: &FrameInvariants<T>, po: PlaneOffset, org_region: &PlaneRegion<T>,
1413 bit_depth: usize, pmv: [MotionVector; 2], lambda: u32, use_satd: bool,
1414 mvx_min: isize, mvx_max: isize, mvy_min: isize, mvy_max: isize, w: usize,
1415 h: usize, cand_mv: MotionVector, tmp_region: &mut PlaneRegionMut<T>,
1416 ref_frame: RefType,
1417) -> MVCandidateRD {
1418 if (cand_mv.col as isize) < mvx_min
1419 || (cand_mv.col as isize) > mvx_max
1420 || (cand_mv.row as isize) < mvy_min
1421 || (cand_mv.row as isize) > mvy_max
1422 {
1423 return MVCandidateRD::empty();
1424 }
1425
1426 let tmp_width = tmp_region.rect().width;
1427 let tmp_height = tmp_region.rect().height;
1428 let tile_rect =
1429 TileRect { x: 0, y: 0, width: tmp_width, height: tmp_height };
1430
1431 PredictionMode::NEWMV.predict_inter_single(
1432 fi, tile_rect, 0, po, tmp_region,
1433 tmp_width, tmp_height, ref_frame, cand_mv,
1435 );
1436 let plane_ref = tmp_region.as_const();
1437 compute_mv_rd(
1438 fi, pmv, lambda, use_satd, bit_depth, w, h, cand_mv, org_region,
1439 &plane_ref,
1440 )
1441}
1442
1443#[inline(always)]
1445fn compute_mv_rd<T: Pixel>(
1446 fi: &FrameInvariants<T>, pmv: [MotionVector; 2], lambda: u32,
1447 use_satd: bool, bit_depth: usize, w: usize, h: usize, cand_mv: MotionVector,
1448 plane_org: &PlaneRegion<'_, T>, plane_ref: &PlaneRegion<'_, T>,
1449) -> MVCandidateRD {
1450 let sad = if use_satd {
1451 get_satd(plane_org, plane_ref, w, h, bit_depth, fi.cpu_feature_level)
1452 } else {
1453 get_sad(plane_org, plane_ref, w, h, bit_depth, fi.cpu_feature_level)
1454 };
1455
1456 let rate1 = get_mv_rate(cand_mv, pmv[0], fi.allow_high_precision_mv);
1457 let rate2 = get_mv_rate(cand_mv, pmv[1], fi.allow_high_precision_mv);
1458 let rate = rate1.min(rate2 + 1);
1459
1460 MVCandidateRD { cost: 256 * sad as u64 + rate as u64 * lambda as u64, sad }
1461}
1462
1463#[profiling::function]
1464fn full_search<T: Pixel>(
1465 fi: &FrameInvariants<T>, x_lo: isize, x_hi: isize, y_lo: isize, y_hi: isize,
1466 w: usize, h: usize, org_region: &PlaneRegion<T>, p_ref: &Plane<T>,
1467 po: PlaneOffset, step: usize, lambda: u32, pmv: [MotionVector; 2],
1468) -> MotionSearchResult {
1469 let search_region = p_ref.region(Area::Rect {
1470 x: x_lo,
1471 y: y_lo,
1472 width: (x_hi - x_lo) as usize + w,
1473 height: (y_hi - y_lo) as usize + h,
1474 });
1475
1476 let mut best: MotionSearchResult = MotionSearchResult::empty();
1477
1478 for vert_window in search_region.vert_windows(h).step_by(step) {
1480 for ref_window in vert_window.horz_windows(w).step_by(step) {
1481 let &Rect { x, y, .. } = ref_window.rect();
1482
1483 let mv = MotionVector {
1484 row: 8 * (y as i16 - po.y as i16),
1485 col: 8 * (x as i16 - po.x as i16),
1486 };
1487
1488 let rd = compute_mv_rd(
1489 fi,
1490 pmv,
1491 lambda,
1492 false,
1493 fi.sequence.bit_depth,
1494 w,
1495 h,
1496 mv,
1497 org_region,
1498 &ref_window,
1499 );
1500
1501 if rd.cost < best.rd.cost {
1502 best.rd = rd;
1503 best.mv = mv;
1504 }
1505 }
1506 }
1507
1508 best
1509}
1510
1511#[inline(always)]
1512fn get_mv_rate(
1513 a: MotionVector, b: MotionVector, allow_high_precision_mv: bool,
1514) -> u32 {
1515 #[inline(always)]
1516 fn diff_to_rate(diff: i16, allow_high_precision_mv: bool) -> u32 {
1517 let d = if allow_high_precision_mv { diff } else { diff >> 1 };
1518 2 * ILog::ilog(d.abs()) as u32
1519 }
1520
1521 diff_to_rate(a.row - b.row, allow_high_precision_mv)
1522 + diff_to_rate(a.col - b.col, allow_high_precision_mv)
1523}