1use std::sync::Arc;
2
3use aligned::{Aligned, A64};
4use arrayvec::ArrayVec;
5use num_rational::Rational32;
6use rayon::iter::{IntoParallelIterator, ParallelIterator};
7use v_frame::{
8 frame::Frame,
9 math::{clamp, ILog},
10 pixel::{ChromaSampling, Pixel},
11 plane::{Plane, PlaneConfig, PlaneOffset},
12};
13
14use super::importance::{
15 IMPORTANCE_BLOCK_SIZE,
16 IMP_BLOCK_MV_UNITS_PER_PIXEL,
17 IMP_BLOCK_SIZE_IN_MV_UNITS,
18};
19use crate::{
20 cpu::CpuFeatureLevel,
21 data::{
22 block::{BlockOffset, BlockSize, MIB_SIZE_LOG2},
23 frame::{FrameInvariants, FrameState, RefType, ALLOWED_REF_FRAMES},
24 motion::{
25 MEStats,
26 MVSamplingMode,
27 MotionEstimationSubsets,
28 MotionVector,
29 ReadGuardMEStats,
30 RefMEStats,
31 TileMEStats,
32 MV_LOW,
33 MV_UPP,
34 },
35 plane::{Area, AsRegion, PlaneBlockOffset, PlaneRegion, PlaneRegionMut, Rect},
36 prediction::PredictionMode,
37 sad::get_sad,
38 satd::get_satd,
39 superblock::{
40 SuperBlockOffset,
41 TileSuperBlockOffset,
42 MAX_SB_SIZE_LOG2,
43 MI_SIZE,
44 MI_SIZE_LOG2,
45 SB_SIZE,
46 },
47 tile::{TileBlockOffset, TileRect, TileStateMut, TilingInfo},
48 },
49};
50
51macro_rules! search_pattern_subpel {
53 ($field_a:ident: [$($ll_a:expr),*], $field_b:ident: [$($ll_b:expr),*]) => {
54 [ $(MotionVector { $field_a: $ll_a, $field_b: $ll_b } ),*]
55 };
56}
57
58macro_rules! search_pattern {
62 ($field_a:ident: [$($ll_a:expr),*], $field_b:ident: [$($ll_b:expr),*]) => {
63 [ $(MotionVector { $field_a: $ll_a << 3, $field_b: $ll_b << 3 } ),*]
64 };
65}
66
67const DIAMOND_R1_PATTERN_SUBPEL: [MotionVector; 4] = search_pattern_subpel!(
76 col: [ 0, 1, 0, -1],
77 row: [ 1, 0, -1, 0]
78);
79
80const DIAMOND_R1_PATTERN: [MotionVector; 4] = search_pattern!(
89 col: [ 0, 1, 0, -1],
90 row: [ 1, 0, -1, 0]
91);
92
93const UMH_PATTERN: [MotionVector; 16] = search_pattern!(
108 col: [ -2, -1, 0, 1, 2, 3, 4, 3, 2, 1, 0, -1, -2, 3, -4, -3],
109 row: [ 4, 4, 4, 4, 4, 2, 0, -2, -4, -4, -4, -4, -4, -2, 0, 2]
110);
111
112const HEXAGON_PATTERN: [MotionVector; 6] = search_pattern!(
139 col: [ 0, 2, 2, 0, -2, -2],
140 row: [ -2, -1, 1, 2, 1, -1]
141);
142
143const SQUARE_REFINE_PATTERN: [MotionVector; 8] = search_pattern!(
152 col: [ -1, 0, 1, -1, 1, -1, 0, 1],
153 row: [ 1, 1, 1, 0, 0, -1, -1, -1]
154);
155
156pub(crate) fn estimate_inter_costs<T: Pixel>(
157 frame: Arc<Frame<T>>,
158 ref_frame: Arc<Frame<T>>,
159 bit_depth: usize,
160 frame_rate: Rational32,
161 chroma_sampling: ChromaSampling,
162 buffer: RefMEStats,
163 cpu_feature_level: CpuFeatureLevel,
164) -> f64 {
165 let last_fi =
166 FrameInvariants::new_key_frame(frame.planes[0].cfg.width, frame.planes[0].cfg.height);
167 let fi = FrameInvariants::new_inter_frame(&last_fi, 1).unwrap();
168
169 let mut fs = FrameState::new_with_frame_and_me_stats_and_rec(Arc::clone(&frame), buffer);
171 let mut tiling = TilingInfo::from_target_tiles(
172 frame.planes[0].cfg.width,
173 frame.planes[0].cfg.height,
174 *frame_rate.numer() as f64 / *frame_rate.denom() as f64,
175 TilingInfo::tile_log2(1, 0).unwrap(),
176 TilingInfo::tile_log2(1, 0).unwrap(),
177 chroma_sampling == ChromaSampling::Cs422,
178 );
179 compute_motion_vectors(&fi, &mut fs, &mut tiling, bit_depth, cpu_feature_level);
180
181 let plane_org = &frame.planes[0];
183 let plane_ref = &ref_frame.planes[0];
184 let h_in_imp_b = plane_org.cfg.height / IMPORTANCE_BLOCK_SIZE;
185 let w_in_imp_b = plane_org.cfg.width / IMPORTANCE_BLOCK_SIZE;
186 let stats = &fs.frame_me_stats.read().expect("poisoned lock")[0];
187 let bsize = BlockSize::from_width_and_height(IMPORTANCE_BLOCK_SIZE, IMPORTANCE_BLOCK_SIZE);
188
189 let mut inter_costs = 0;
190 (0..h_in_imp_b).for_each(|y| {
191 (0..w_in_imp_b).for_each(|x| {
192 let mv = stats[y * 2][x * 2].mv;
193
194 let reference_x = x as i64 * IMP_BLOCK_SIZE_IN_MV_UNITS + mv.col as i64;
197 let reference_y = y as i64 * IMP_BLOCK_SIZE_IN_MV_UNITS + mv.row as i64;
198
199 let region_org = plane_org.region(Area::Rect(Rect {
200 x: (x * IMPORTANCE_BLOCK_SIZE) as isize,
201 y: (y * IMPORTANCE_BLOCK_SIZE) as isize,
202 width: IMPORTANCE_BLOCK_SIZE,
203 height: IMPORTANCE_BLOCK_SIZE,
204 }));
205
206 let region_ref = plane_ref.region(Area::Rect(Rect {
207 x: reference_x as isize / IMP_BLOCK_MV_UNITS_PER_PIXEL as isize,
208 y: reference_y as isize / IMP_BLOCK_MV_UNITS_PER_PIXEL as isize,
209 width: IMPORTANCE_BLOCK_SIZE,
210 height: IMPORTANCE_BLOCK_SIZE,
211 }));
212
213 inter_costs += get_satd(
214 ®ion_org,
215 ®ion_ref,
216 bsize.width(),
217 bsize.height(),
218 bit_depth,
219 cpu_feature_level,
220 ) as u64;
221 });
222 });
223 inter_costs as f64 / (w_in_imp_b * h_in_imp_b) as f64
224}
225
226fn compute_motion_vectors<T: Pixel>(
227 fi: &FrameInvariants<T>,
228 fs: &mut FrameState<T>,
229 tiling_info: &mut TilingInfo,
230 bit_depth: usize,
231 cpu_feature_level: CpuFeatureLevel,
232) {
233 tiling_info
234 .tile_iter_mut(fs)
235 .collect::<Vec<_>>()
236 .into_par_iter()
237 .for_each(|mut ctx| {
238 let ts = &mut ctx.ts;
239 estimate_tile_motion(fi, ts, bit_depth, cpu_feature_level);
240 });
241}
242
243fn estimate_tile_motion<T: Pixel>(
244 fi: &FrameInvariants<T>,
245 ts: &mut TileStateMut<'_, T>,
246 bit_depth: usize,
247 cpu_feature_level: CpuFeatureLevel,
248) {
249 let init_size = MIB_SIZE_LOG2;
250
251 let mut prev_ssdec: Option<u8> = None;
252 for mv_size_in_b_log2 in (2..=init_size).rev() {
253 let init = mv_size_in_b_log2 == init_size;
254
255 let ssdec = match init_size - mv_size_in_b_log2 {
257 0 => 2,
258 1 => 1,
259 _ => 0,
260 };
261
262 let new_subsampling = if let Some(prev) = prev_ssdec {
263 prev != ssdec
264 } else {
265 false
266 };
267 prev_ssdec = Some(ssdec);
268
269 let lambda = 0;
271
272 for sby in 0..ts.sb_height {
273 for sbx in 0..ts.sb_width {
274 let mut tested_frames_flags = 0;
275 for &ref_frame in ALLOWED_REF_FRAMES {
276 let frame_flag = 1 << fi.ref_frames[ref_frame.to_index()];
277 if tested_frames_flags & frame_flag == frame_flag {
278 continue;
279 }
280 tested_frames_flags |= frame_flag;
281
282 let tile_bo = TileSuperBlockOffset(SuperBlockOffset { x: sbx, y: sby })
283 .block_offset(0, 0);
284
285 if new_subsampling {
286 refine_subsampled_sb_motion(
287 fi,
288 ts,
289 ref_frame,
290 mv_size_in_b_log2 + 1,
291 tile_bo,
292 ssdec,
293 lambda,
294 bit_depth,
295 cpu_feature_level,
296 );
297 }
298
299 estimate_sb_motion(
300 fi,
301 ts,
302 ref_frame,
303 mv_size_in_b_log2,
304 tile_bo,
305 init,
306 ssdec,
307 lambda,
308 bit_depth,
309 cpu_feature_level,
310 );
311 }
312 }
313 }
314 }
315}
316
317#[allow(clippy::too_many_arguments)]
318fn refine_subsampled_sb_motion<T: Pixel>(
319 fi: &FrameInvariants<T>,
320 ts: &mut TileStateMut<'_, T>,
321 ref_frame: RefType,
322 mv_size_in_b_log2: usize,
323 tile_bo: TileBlockOffset,
324 ssdec: u8,
325 lambda: u32,
326 bit_depth: usize,
327 cpu_feature_level: CpuFeatureLevel,
328) {
329 let pix_offset = tile_bo.to_luma_plane_offset();
330 let sb_h: usize = SB_SIZE.min(ts.height - pix_offset.y as usize);
331 let sb_w: usize = SB_SIZE.min(ts.width - pix_offset.x as usize);
332
333 let mv_size = MI_SIZE << mv_size_in_b_log2;
334
335 for y in (0..sb_h).step_by(mv_size) {
337 for x in (0..sb_w).step_by(mv_size) {
338 let sub_bo =
339 tile_bo.with_offset(x as isize >> MI_SIZE_LOG2, y as isize >> MI_SIZE_LOG2);
340
341 let w = mv_size.min(sb_w - x + (1 << ssdec) - 1) >> ssdec;
344 let h = mv_size.min(sb_h - y + (1 << ssdec) - 1) >> ssdec;
345
346 if let Some(results) = refine_subsampled_motion_estimate(
348 fi,
349 ts,
350 w,
351 h,
352 sub_bo,
353 ref_frame,
354 ssdec,
355 lambda,
356 bit_depth,
357 cpu_feature_level,
358 ) {
359 let sad =
361 (((results.rd.sad as u64) << (MAX_SB_SIZE_LOG2 * 2)) / (w * h) as u64) as u32;
362 save_me_stats(ts, mv_size_in_b_log2, sub_bo, ref_frame, MEStats {
363 mv: results.mv,
364 normalized_sad: sad,
365 });
366 }
367 }
368 }
369}
370
371#[allow(clippy::too_many_arguments)]
373fn refine_subsampled_motion_estimate<T: Pixel>(
374 fi: &FrameInvariants<T>,
375 ts: &TileStateMut<'_, T>,
376 w: usize,
377 h: usize,
378 tile_bo: TileBlockOffset,
379 ref_frame: RefType,
380 ssdec: u8,
381 lambda: u32,
382 bit_depth: usize,
383 cpu_feature_level: CpuFeatureLevel,
384) -> Option<MotionSearchResult> {
385 if let Some(ref rec) = fi.rec_buffer.frames[fi.ref_frames[ref_frame.to_index()] as usize] {
386 let frame_bo = ts.to_frame_block_offset(tile_bo);
387 let (mvx_min, mvx_max, mvy_min, mvy_max) =
388 get_mv_range(fi.w_in_b, fi.h_in_b, frame_bo, w << ssdec, h << ssdec);
389
390 let pmv = [MotionVector { row: 0, col: 0 }; 2];
391
392 let po = frame_bo.to_luma_plane_offset();
393 let (mvx_min, mvx_max, mvy_min, mvy_max) = (
394 mvx_min >> ssdec,
395 mvx_max >> ssdec,
396 mvy_min >> ssdec,
397 mvy_max >> ssdec,
398 );
399 let po = PlaneOffset {
400 x: po.x >> ssdec,
401 y: po.y >> ssdec,
402 };
403 let p_ref = match ssdec {
404 0 => &rec.frame.planes[0],
405 1 => &rec.input_hres,
406 2 => &rec.input_qres,
407 _ => unimplemented!(),
408 };
409
410 let org_region = &match ssdec {
411 0 => ts.input_tile.planes[0].subregion(Area::BlockStartingAt { bo: tile_bo.0 }),
412 1 => ts.input_hres.region(Area::StartingAt { x: po.x, y: po.y }),
413 2 => ts.input_qres.region(Area::StartingAt { x: po.x, y: po.y }),
414 _ => unimplemented!(),
415 };
416
417 let mv = ts.me_stats[ref_frame.to_index()][tile_bo.0.y][tile_bo.0.x].mv >> ssdec;
418
419 let x_lo = po.x + (mv.col as isize / 8 - 1).max(mvx_min / 8);
426 let x_hi = po.x + (mv.col as isize / 8 + 2).min(mvx_max / 8);
427 let y_lo = po.y + (mv.row as isize / 8 - 1).max(mvy_min / 8);
428 let y_hi = po.y + (mv.row as isize / 8 + 2).min(mvy_max / 8);
429 let mut results = full_search(
430 x_lo,
431 x_hi,
432 y_lo,
433 y_hi,
434 w,
435 h,
436 org_region,
437 p_ref,
438 po,
439 1,
440 lambda,
441 pmv,
442 bit_depth,
443 cpu_feature_level,
444 );
445
446 results.mv = results.mv << ssdec;
448
449 Some(results)
450 } else {
451 None
452 }
453}
454
455fn get_mv_range(
456 w_in_b: usize,
457 h_in_b: usize,
458 bo: PlaneBlockOffset,
459 blk_w: usize,
460 blk_h: usize,
461) -> (isize, isize, isize, isize) {
462 let border_w = 128 + blk_w as isize * 8;
463 let border_h = 128 + blk_h as isize * 8;
464 let mvx_min = -(bo.0.x as isize) * (8 * MI_SIZE) as isize - border_w;
465 let mvx_max = ((w_in_b - bo.0.x) as isize - (blk_w / MI_SIZE) as isize)
466 * (8 * MI_SIZE) as isize
467 + border_w;
468 let mvy_min = -(bo.0.y as isize) * (8 * MI_SIZE) as isize - border_h;
469 let mvy_max = ((h_in_b - bo.0.y) as isize - (blk_h / MI_SIZE) as isize)
470 * (8 * MI_SIZE) as isize
471 + border_h;
472
473 (
475 mvx_min.max(MV_LOW as isize + 1),
476 mvx_max.min(MV_UPP as isize - 1),
477 mvy_min.max(MV_LOW as isize + 1),
478 mvy_max.min(MV_UPP as isize - 1),
479 )
480}
481
482#[allow(clippy::too_many_arguments)]
483fn full_search<T: Pixel>(
484 x_lo: isize,
485 x_hi: isize,
486 y_lo: isize,
487 y_hi: isize,
488 w: usize,
489 h: usize,
490 org_region: &PlaneRegion<T>,
491 p_ref: &Plane<T>,
492 po: PlaneOffset,
493 step: usize,
494 lambda: u32,
495 pmv: [MotionVector; 2],
496 bit_depth: usize,
497 cpu_feature_level: CpuFeatureLevel,
498) -> MotionSearchResult {
499 let search_region = p_ref.region(Area::Rect(Rect {
500 x: x_lo,
501 y: y_lo,
502 width: (x_hi - x_lo) as usize + w,
503 height: (y_hi - y_lo) as usize + h,
504 }));
505
506 let mut best: MotionSearchResult = MotionSearchResult::empty();
507
508 for vert_window in search_region.vert_windows(h).step_by(step) {
510 for ref_window in vert_window.horz_windows(w).step_by(step) {
511 let &Rect { x, y, .. } = ref_window.rect();
512
513 let mv = MotionVector {
514 row: 8 * (y as i16 - po.y as i16),
515 col: 8 * (x as i16 - po.x as i16),
516 };
517
518 let rd = compute_mv_rd(
519 pmv,
520 lambda,
521 false,
522 bit_depth,
523 w,
524 h,
525 mv,
526 org_region,
527 &ref_window,
528 cpu_feature_level,
529 );
530
531 if rd.cost < best.rd.cost {
532 best.rd = rd;
533 best.mv = mv;
534 }
535 }
536 }
537
538 best
539}
540
541#[allow(clippy::too_many_arguments)]
543fn compute_mv_rd<T: Pixel>(
544 pmv: [MotionVector; 2],
545 lambda: u32,
546 use_satd: bool,
547 bit_depth: usize,
548 w: usize,
549 h: usize,
550 cand_mv: MotionVector,
551 plane_org: &PlaneRegion<'_, T>,
552 plane_ref: &PlaneRegion<'_, T>,
553 cpu_feature_level: CpuFeatureLevel,
554) -> MVCandidateRD {
555 let sad = if use_satd {
556 get_satd(plane_org, plane_ref, w, h, bit_depth, cpu_feature_level)
557 } else {
558 get_sad(plane_org, plane_ref, w, h, bit_depth, cpu_feature_level)
559 };
560
561 let rate1 = get_mv_rate(cand_mv, pmv[0]);
562 let rate2 = get_mv_rate(cand_mv, pmv[1]);
563 let rate = rate1.min(rate2 + 1);
564
565 MVCandidateRD {
566 cost: 256 * sad as u64 + rate as u64 * lambda as u64,
567 sad,
568 }
569}
570
571fn diff_to_rate(diff: i16) -> u32 {
572 let d = diff >> 1;
573 2 * ILog::ilog(d.abs()) as u32
574}
575
576fn get_mv_rate(a: MotionVector, b: MotionVector) -> u32 {
577 diff_to_rate(a.row - b.row) + diff_to_rate(a.col - b.col)
578}
579
580#[derive(Debug, Copy, Clone)]
582pub struct MotionSearchResult {
583 pub mv: MotionVector,
585 pub rd: MVCandidateRD,
587}
588
589impl MotionSearchResult {
590 pub fn empty() -> MotionSearchResult {
596 MotionSearchResult {
597 mv: MotionVector::default(),
598 rd: MVCandidateRD::empty(),
599 }
600 }
601
602 const fn is_empty(&self) -> bool {
604 self.rd.cost == u64::MAX
605 }
606}
607
608#[derive(Debug, Copy, Clone)]
610pub struct MVCandidateRD {
611 pub cost: u64,
613 pub sad: u32,
615}
616
617impl MVCandidateRD {
618 const fn empty() -> MVCandidateRD {
624 MVCandidateRD {
625 sad: u32::MAX,
626 cost: u64::MAX,
627 }
628 }
629}
630
631fn save_me_stats<T: Pixel>(
632 ts: &mut TileStateMut<'_, T>,
633 mv_size_in_b_log2: usize,
634 tile_bo: TileBlockOffset,
635 ref_frame: RefType,
636 stats: MEStats,
637) {
638 let size_in_b = 1 << mv_size_in_b_log2;
639 let tile_me_stats = &mut ts.me_stats[ref_frame.to_index()];
640 let tile_bo_x_end = (tile_bo.0.x + size_in_b).min(ts.mi_width);
641 let tile_bo_y_end = (tile_bo.0.y + size_in_b).min(ts.mi_height);
642 for mi_y in tile_bo.0.y..tile_bo_y_end {
643 for a in tile_me_stats[mi_y][tile_bo.0.x..tile_bo_x_end].iter_mut() {
644 *a = stats;
645 }
646 }
647}
648
649#[allow(clippy::too_many_arguments)]
650fn estimate_sb_motion<T: Pixel>(
651 fi: &FrameInvariants<T>,
652 ts: &mut TileStateMut<'_, T>,
653 ref_frame: RefType,
654 mv_size_in_b_log2: usize,
655 tile_bo: TileBlockOffset,
656 init: bool,
657 ssdec: u8,
658 lambda: u32,
659 bit_depth: usize,
660 cpu_feature_level: CpuFeatureLevel,
661) {
662 let pix_offset = tile_bo.to_luma_plane_offset();
663 let sb_h: usize = SB_SIZE.min(ts.height - pix_offset.y as usize);
664 let sb_w: usize = SB_SIZE.min(ts.width - pix_offset.x as usize);
665
666 let mv_size = MI_SIZE << mv_size_in_b_log2;
667
668 for y in (0..sb_h).step_by(mv_size) {
670 for x in (0..sb_w).step_by(mv_size) {
671 let corner: MVSamplingMode = if init {
672 MVSamplingMode::INIT
673 } else {
674 MVSamplingMode::CORNER {
677 right: x & mv_size == mv_size,
678 bottom: y & mv_size == mv_size,
679 }
680 };
681
682 let sub_bo =
683 tile_bo.with_offset(x as isize >> MI_SIZE_LOG2, y as isize >> MI_SIZE_LOG2);
684
685 let w = mv_size.min(sb_w - x + (1 << ssdec) - 1) >> ssdec;
688 let h = mv_size.min(sb_h - y + (1 << ssdec) - 1) >> ssdec;
689
690 if let Some(results) = estimate_motion(
694 fi,
695 ts,
696 w,
697 h,
698 sub_bo,
699 ref_frame,
700 None,
701 corner,
702 init,
703 ssdec,
704 Some(lambda),
705 bit_depth,
706 cpu_feature_level,
707 ) {
708 let sad =
710 (((results.rd.sad as u64) << (MAX_SB_SIZE_LOG2 * 2)) / (w * h) as u64) as u32;
711 save_me_stats(ts, mv_size_in_b_log2, sub_bo, ref_frame, MEStats {
712 mv: results.mv,
713 normalized_sad: sad,
714 });
715 }
716 }
717 }
718}
719
720#[allow(clippy::too_many_arguments)]
721fn estimate_motion<T: Pixel>(
722 fi: &FrameInvariants<T>,
723 ts: &TileStateMut<'_, T>,
724 w: usize,
725 h: usize,
726 tile_bo: TileBlockOffset,
727 ref_frame: RefType,
728 pmv: Option<[MotionVector; 2]>,
729 corner: MVSamplingMode,
730 extensive_search: bool,
731 ssdec: u8,
732 lambda: Option<u32>,
733 bit_depth: usize,
734 cpu_feature_level: CpuFeatureLevel,
735) -> Option<MotionSearchResult> {
736 if let Some(ref rec) = fi.rec_buffer.frames[fi.ref_frames[ref_frame.to_index()] as usize] {
737 let frame_bo = ts.to_frame_block_offset(tile_bo);
738 let (mvx_min, mvx_max, mvy_min, mvy_max) =
739 get_mv_range(fi.w_in_b, fi.h_in_b, frame_bo, w << ssdec, h << ssdec);
740
741 let lambda = lambda.unwrap_or(0);
742
743 let global_mv = [MotionVector { row: 0, col: 0 }; 2];
744
745 let po = frame_bo.to_luma_plane_offset();
746 let (mvx_min, mvx_max, mvy_min, mvy_max) = (
747 mvx_min >> ssdec,
748 mvx_max >> ssdec,
749 mvy_min >> ssdec,
750 mvy_max >> ssdec,
751 );
752 let po = PlaneOffset {
753 x: po.x >> ssdec,
754 y: po.y >> ssdec,
755 };
756 let p_ref = match ssdec {
757 0 => &rec.frame.planes[0],
758 1 => &rec.input_hres,
759 2 => &rec.input_qres,
760 _ => unimplemented!(),
761 };
762
763 let org_region = &match ssdec {
764 0 => ts.input_tile.planes[0].subregion(Area::BlockStartingAt { bo: tile_bo.0 }),
765 1 => ts.input_hres.region(Area::StartingAt { x: po.x, y: po.y }),
766 2 => ts.input_qres.region(Area::StartingAt { x: po.x, y: po.y }),
767 _ => unimplemented!(),
768 };
769
770 let mut best: MotionSearchResult = full_pixel_me(
771 fi,
772 ts,
773 org_region,
774 p_ref,
775 tile_bo,
776 po,
777 lambda,
778 pmv.unwrap_or(global_mv),
779 w,
780 h,
781 mvx_min,
782 mvx_max,
783 mvy_min,
784 mvy_max,
785 ref_frame,
786 corner,
787 extensive_search,
788 ssdec,
789 bit_depth,
790 cpu_feature_level,
791 );
792
793 if let Some(pmv) = pmv {
794 best.rd = get_fullpel_mv_rd(
795 po,
796 org_region,
797 p_ref,
798 bit_depth,
799 pmv,
800 lambda,
801 true,
802 mvx_min,
803 mvx_max,
804 mvy_min,
805 mvy_max,
806 w,
807 h,
808 best.mv,
809 cpu_feature_level,
810 );
811
812 sub_pixel_me(
813 fi,
814 po,
815 org_region,
816 p_ref,
817 lambda,
818 pmv,
819 mvx_min,
820 mvx_max,
821 mvy_min,
822 mvy_max,
823 w,
824 h,
825 true,
826 &mut best,
827 ref_frame,
828 bit_depth,
829 cpu_feature_level,
830 );
831 }
832
833 best.mv = best.mv << ssdec;
835
836 Some(best)
837 } else {
838 None
839 }
840}
841
842#[allow(clippy::too_many_arguments)]
843fn full_pixel_me<T: Pixel>(
844 fi: &FrameInvariants<T>,
845 ts: &TileStateMut<'_, T>,
846 org_region: &PlaneRegion<T>,
847 p_ref: &Plane<T>,
848 tile_bo: TileBlockOffset,
849 po: PlaneOffset,
850 lambda: u32,
851 pmv: [MotionVector; 2],
852 w: usize,
853 h: usize,
854 mvx_min: isize,
855 mvx_max: isize,
856 mvy_min: isize,
857 mvy_max: isize,
858 ref_frame: RefType,
859 corner: MVSamplingMode,
860 extensive_search: bool,
861 ssdec: u8,
862 bit_depth: usize,
863 cpu_feature_level: CpuFeatureLevel,
864) -> MotionSearchResult {
865 let ref_frame_id = ref_frame.to_index();
866 let tile_me_stats = &ts.me_stats[ref_frame_id].as_const();
867 let frame_ref = fi.rec_buffer.frames[fi.ref_frames[0] as usize]
868 .as_ref()
869 .map(|frame_ref| frame_ref.frame_me_stats.read().expect("poisoned lock"));
870 let subsets = get_subset_predictors(
871 tile_bo,
872 tile_me_stats,
873 frame_ref,
874 ref_frame_id,
875 w,
876 h,
877 mvx_min,
878 mvx_max,
879 mvy_min,
880 mvy_max,
881 corner,
882 ssdec,
883 );
884
885 let try_cands = |predictors: &[MotionVector], best: &mut MotionSearchResult| {
886 let mut results = get_best_predictor(
887 po,
888 org_region,
889 p_ref,
890 predictors,
891 bit_depth,
892 pmv,
893 lambda,
894 mvx_min,
895 mvx_max,
896 mvy_min,
897 mvy_max,
898 w,
899 h,
900 cpu_feature_level,
901 );
902 fullpel_diamond_search(
903 po,
904 org_region,
905 p_ref,
906 &mut results,
907 bit_depth,
908 pmv,
909 lambda,
910 mvx_min,
911 mvx_max,
912 mvy_min,
913 mvy_max,
914 w,
915 h,
916 cpu_feature_level,
917 );
918
919 if results.rd.cost < best.rd.cost {
920 *best = results;
921 }
922 };
923
924 let mut best: MotionSearchResult = MotionSearchResult::empty();
925 if !extensive_search {
926 try_cands(&subsets.all_mvs(), &mut best);
927 best
928 } else {
929 let thresh = (subsets.min_sad as f32 * 1.2) as u32 + (((w * h) as u32) << (bit_depth - 8));
935
936 if let Some(median) = subsets.median {
937 try_cands(&[median], &mut best);
938
939 if best.rd.sad < thresh {
940 return best;
941 }
942 }
943
944 try_cands(&subsets.subset_b, &mut best);
945
946 if best.rd.sad < thresh {
947 return best;
948 }
949
950 try_cands(&subsets.subset_c, &mut best);
951
952 if best.rd.sad < thresh {
953 return best;
954 }
955
956 uneven_multi_hex_search(
960 po,
961 org_region,
962 p_ref,
963 &mut best,
964 bit_depth,
965 pmv,
966 lambda,
967 mvx_min,
968 mvx_max,
969 mvy_min,
970 mvy_max,
971 w,
972 h,
973 24,
974 cpu_feature_level,
975 );
976
977 best
978 }
979}
980
981#[allow(clippy::too_many_arguments)]
982fn sub_pixel_me<T: Pixel>(
983 fi: &FrameInvariants<T>,
984 po: PlaneOffset,
985 org_region: &PlaneRegion<T>,
986 p_ref: &Plane<T>,
987 lambda: u32,
988 pmv: [MotionVector; 2],
989 mvx_min: isize,
990 mvx_max: isize,
991 mvy_min: isize,
992 mvy_max: isize,
993 w: usize,
994 h: usize,
995 use_satd: bool,
996 best: &mut MotionSearchResult,
997 ref_frame: RefType,
998 bit_depth: usize,
999 cpu_feature_level: CpuFeatureLevel,
1000) {
1001 subpel_diamond_search(
1002 fi,
1003 po,
1004 org_region,
1005 p_ref,
1006 bit_depth,
1007 pmv,
1008 lambda,
1009 mvx_min,
1010 mvx_max,
1011 mvy_min,
1012 mvy_max,
1013 w,
1014 h,
1015 use_satd,
1016 best,
1017 ref_frame,
1018 cpu_feature_level,
1019 );
1020}
1021
1022#[allow(clippy::too_many_arguments)]
1028fn subpel_diamond_search<T: Pixel>(
1029 fi: &FrameInvariants<T>,
1030 po: PlaneOffset,
1031 org_region: &PlaneRegion<T>,
1032 _p_ref: &Plane<T>,
1033 bit_depth: usize,
1034 pmv: [MotionVector; 2],
1035 lambda: u32,
1036 mvx_min: isize,
1037 mvx_max: isize,
1038 mvy_min: isize,
1039 mvy_max: isize,
1040 w: usize,
1041 h: usize,
1042 use_satd: bool,
1043 current: &mut MotionSearchResult,
1044 ref_frame: RefType,
1045 cpu_feature_level: CpuFeatureLevel,
1046) {
1047 let mc_w = w.next_power_of_two();
1049 let mc_h = (h + 1) & !1;
1050
1051 let cfg = PlaneConfig::new(mc_w, mc_h, 0, 0, 0, 0, std::mem::size_of::<T>());
1053 let mut buf: Aligned<A64, [T; 128 * 128]> = Aligned([T::cast_from(0); 128 * 128]);
1056 let mut tmp_region = PlaneRegionMut::from_slice(buf.as_mut(), &cfg, Rect {
1057 x: 0,
1058 y: 0,
1059 width: cfg.width,
1060 height: cfg.height,
1061 });
1062
1063 let (mut diamond_radius_log2, diamond_radius_end_log2) = (2u8, 1u8);
1065
1066 loop {
1067 let mut best_cand: MotionSearchResult = MotionSearchResult::empty();
1069 for &offset in &DIAMOND_R1_PATTERN_SUBPEL {
1070 let cand_mv = current.mv + (offset << diamond_radius_log2);
1071
1072 let rd = get_subpel_mv_rd(
1073 fi,
1074 po,
1075 org_region,
1076 bit_depth,
1077 pmv,
1078 lambda,
1079 use_satd,
1080 mvx_min,
1081 mvx_max,
1082 mvy_min,
1083 mvy_max,
1084 w,
1085 h,
1086 cand_mv,
1087 &mut tmp_region,
1088 ref_frame,
1089 cpu_feature_level,
1090 );
1091
1092 if rd.cost < best_cand.rd.cost {
1093 best_cand.mv = cand_mv;
1094 best_cand.rd = rd;
1095 }
1096 }
1097
1098 if current.rd.cost <= best_cand.rd.cost {
1100 if diamond_radius_log2 == diamond_radius_end_log2 {
1101 break;
1102 } else {
1103 diamond_radius_log2 -= 1;
1104 }
1105 } else {
1106 *current = best_cand;
1107 }
1108 }
1109
1110 assert!(!current.is_empty());
1111}
1112
1113#[allow(clippy::too_many_arguments)]
1114fn get_subpel_mv_rd<T: Pixel>(
1115 fi: &FrameInvariants<T>,
1116 po: PlaneOffset,
1117 org_region: &PlaneRegion<T>,
1118 bit_depth: usize,
1119 pmv: [MotionVector; 2],
1120 lambda: u32,
1121 use_satd: bool,
1122 mvx_min: isize,
1123 mvx_max: isize,
1124 mvy_min: isize,
1125 mvy_max: isize,
1126 w: usize,
1127 h: usize,
1128 cand_mv: MotionVector,
1129 tmp_region: &mut PlaneRegionMut<T>,
1130 ref_frame: RefType,
1131 cpu_feature_level: CpuFeatureLevel,
1132) -> MVCandidateRD {
1133 if (cand_mv.col as isize) < mvx_min
1134 || (cand_mv.col as isize) > mvx_max
1135 || (cand_mv.row as isize) < mvy_min
1136 || (cand_mv.row as isize) > mvy_max
1137 {
1138 return MVCandidateRD::empty();
1139 }
1140
1141 let tmp_width = tmp_region.rect().width;
1142 let tmp_height = tmp_region.rect().height;
1143 let tile_rect = TileRect {
1144 x: 0,
1145 y: 0,
1146 width: tmp_width,
1147 height: tmp_height,
1148 };
1149
1150 PredictionMode::NEWMV.predict_inter_single(
1151 fi,
1152 tile_rect,
1153 0,
1154 po,
1155 tmp_region,
1156 tmp_width,
1158 tmp_height,
1159 ref_frame,
1160 cand_mv,
1161 bit_depth,
1162 cpu_feature_level,
1163 );
1164 let plane_ref = tmp_region.as_const();
1165 compute_mv_rd(
1166 pmv,
1167 lambda,
1168 use_satd,
1169 bit_depth,
1170 w,
1171 h,
1172 cand_mv,
1173 org_region,
1174 &plane_ref,
1175 cpu_feature_level,
1176 )
1177}
1178
1179#[allow(clippy::too_many_arguments)]
1191fn uneven_multi_hex_search<T: Pixel>(
1192 po: PlaneOffset,
1193 org_region: &PlaneRegion<T>,
1194 p_ref: &Plane<T>,
1195 current: &mut MotionSearchResult,
1196 bit_depth: usize,
1197 pmv: [MotionVector; 2],
1198 lambda: u32,
1199 mvx_min: isize,
1200 mvx_max: isize,
1201 mvy_min: isize,
1202 mvy_max: isize,
1203 w: usize,
1204 h: usize,
1205 me_range: i16,
1206 cpu_feature_level: CpuFeatureLevel,
1207) {
1208 assert!(!current.is_empty());
1209
1210 let center = current.mv;
1225
1226 for i in (1..=me_range).step_by(2) {
1228 const HORIZONTAL_LINE: [MotionVector; 2] = search_pattern!(
1229 col: [ 0, 0],
1230 row: [-1, 1]
1231 );
1232
1233 for &offset in &HORIZONTAL_LINE {
1234 let cand_mv = center + offset * i;
1235 let rd = get_fullpel_mv_rd(
1236 po,
1237 org_region,
1238 p_ref,
1239 bit_depth,
1240 pmv,
1241 lambda,
1242 false,
1243 mvx_min,
1244 mvx_max,
1245 mvy_min,
1246 mvy_max,
1247 w,
1248 h,
1249 cand_mv,
1250 cpu_feature_level,
1251 );
1252
1253 if rd.cost < current.rd.cost {
1254 current.mv = cand_mv;
1255 current.rd = rd;
1256 }
1257 }
1258 }
1259
1260 for i in (1..=me_range >> 1).step_by(2) {
1262 const VERTICAL_LINE: [MotionVector; 2] = search_pattern!(
1263 col: [-1, 1],
1264 row: [ 0, 0]
1265 );
1266
1267 for &offset in &VERTICAL_LINE {
1268 let cand_mv = center + offset * i;
1269 let rd = get_fullpel_mv_rd(
1270 po,
1271 org_region,
1272 p_ref,
1273 bit_depth,
1274 pmv,
1275 lambda,
1276 false,
1277 mvx_min,
1278 mvx_max,
1279 mvy_min,
1280 mvy_max,
1281 w,
1282 h,
1283 cand_mv,
1284 cpu_feature_level,
1285 );
1286
1287 if rd.cost < current.rd.cost {
1288 current.mv = cand_mv;
1289 current.rd = rd;
1290 }
1291 }
1292 }
1293
1294 let center = current.mv;
1296 for row in -2..=2 {
1297 for col in -2..=2 {
1298 if row == 0 && col == 0 {
1299 continue;
1300 }
1301 let cand_mv = center + MotionVector { row, col };
1302 let rd = get_fullpel_mv_rd(
1303 po,
1304 org_region,
1305 p_ref,
1306 bit_depth,
1307 pmv,
1308 lambda,
1309 false,
1310 mvx_min,
1311 mvx_max,
1312 mvy_min,
1313 mvy_max,
1314 w,
1315 h,
1316 cand_mv,
1317 cpu_feature_level,
1318 );
1319
1320 if rd.cost < current.rd.cost {
1321 current.mv = cand_mv;
1322 current.rd = rd;
1323 }
1324 }
1325 }
1326
1327 let center = current.mv;
1351
1352 let iterations = me_range >> 2;
1354 for i in 1..=iterations {
1355 for &offset in &UMH_PATTERN {
1356 let cand_mv = center + offset * i;
1357 let rd = get_fullpel_mv_rd(
1358 po,
1359 org_region,
1360 p_ref,
1361 bit_depth,
1362 pmv,
1363 lambda,
1364 false,
1365 mvx_min,
1366 mvx_max,
1367 mvy_min,
1368 mvy_max,
1369 w,
1370 h,
1371 cand_mv,
1372 cpu_feature_level,
1373 );
1374
1375 if rd.cost < current.rd.cost {
1376 current.mv = cand_mv;
1377 current.rd = rd;
1378 }
1379 }
1380 }
1381
1382 hexagon_search(
1384 po,
1385 org_region,
1386 p_ref,
1387 current,
1388 bit_depth,
1389 pmv,
1390 lambda,
1391 mvx_min,
1392 mvx_max,
1393 mvy_min,
1394 mvy_max,
1395 w,
1396 h,
1397 cpu_feature_level,
1398 );
1399}
1400
1401#[allow(clippy::too_many_arguments)]
1402fn get_subset_predictors(
1403 tile_bo: TileBlockOffset,
1404 tile_me_stats: &TileMEStats<'_>,
1405 frame_ref_opt: Option<ReadGuardMEStats<'_>>,
1406 ref_frame_id: usize,
1407 pix_w: usize,
1408 pix_h: usize,
1409 mvx_min: isize,
1410 mvx_max: isize,
1411 mvy_min: isize,
1412 mvy_max: isize,
1413 corner: MVSamplingMode,
1414 ssdec: u8,
1415) -> MotionEstimationSubsets {
1416 let mut min_sad: u32 = u32::MAX;
1417 let mut subset_b = ArrayVec::<MotionVector, 5>::new();
1418 let mut subset_c = ArrayVec::<MotionVector, 5>::new();
1419
1420 let w = ((pix_w << ssdec) + MI_SIZE - 1) >> MI_SIZE_LOG2;
1422 let h = ((pix_h << ssdec) + MI_SIZE - 1) >> MI_SIZE_LOG2;
1423
1424 let clipped_half_w = (w >> 1).min(tile_me_stats.cols() - 1 - tile_bo.0.x);
1427 let clipped_half_h = (h >> 1).min(tile_me_stats.rows() - 1 - tile_bo.0.y);
1428
1429 let mut process_cand = |stats: MEStats| -> MotionVector {
1430 min_sad = min_sad.min(stats.normalized_sad);
1431 let mv = stats.mv.quantize_to_fullpel();
1432 MotionVector {
1433 col: clamp(mv.col as isize, mvx_min, mvx_max) as i16,
1434 row: clamp(mv.row as isize, mvy_min, mvy_max) as i16,
1435 }
1436 };
1437
1438 if tile_bo.0.x > 0 {
1445 subset_b.push(process_cand(
1446 tile_me_stats[tile_bo.0.y + clipped_half_h][tile_bo.0.x - 1],
1447 ));
1448 }
1449 if tile_bo.0.y > 0 {
1451 subset_b.push(process_cand(
1452 tile_me_stats[tile_bo.0.y - 1][tile_bo.0.x + clipped_half_w],
1453 ));
1454 }
1455
1456 if let MVSamplingMode::CORNER {
1462 right: true,
1463 bottom: _,
1464 } = corner
1465 {
1466 if tile_bo.0.x + w < tile_me_stats.cols() {
1467 subset_b.push(process_cand(
1468 tile_me_stats[tile_bo.0.y + clipped_half_h][tile_bo.0.x + w],
1469 ));
1470 }
1471 }
1472 if let MVSamplingMode::CORNER {
1474 right: _,
1475 bottom: true,
1476 } = corner
1477 {
1478 if tile_bo.0.y + h < tile_me_stats.rows() {
1479 subset_b.push(process_cand(
1480 tile_me_stats[tile_bo.0.y + h][tile_bo.0.x + clipped_half_w],
1481 ));
1482 }
1483 }
1484
1485 let median = if corner != MVSamplingMode::INIT {
1486 Some(process_cand(
1488 tile_me_stats[tile_bo.0.y + clipped_half_h][tile_bo.0.x + clipped_half_w],
1489 ))
1490 } else if subset_b.len() != 3 {
1491 None
1492 } else {
1493 let mut rows: ArrayVec<i16, 3> = subset_b.iter().map(|&a| a.row).collect();
1494 let mut cols: ArrayVec<i16, 3> = subset_b.iter().map(|&a| a.col).collect();
1495 rows.as_mut_slice().sort_unstable();
1496 cols.as_mut_slice().sort_unstable();
1497 Some(MotionVector {
1498 row: rows[1],
1499 col: cols[1],
1500 })
1501 };
1502
1503 subset_b.push(MotionVector::default());
1505
1506 if let Some(frame_me_stats) = frame_ref_opt {
1512 let prev_frame = &frame_me_stats[ref_frame_id];
1513
1514 let frame_bo = PlaneBlockOffset(BlockOffset {
1515 x: tile_me_stats.x() + tile_bo.0.x,
1516 y: tile_me_stats.y() + tile_bo.0.y,
1517 });
1518 let clipped_half_w = (w >> 1).min(prev_frame.cols - 1 - frame_bo.0.x);
1519 let clipped_half_h = (h >> 1).min(prev_frame.rows - 1 - frame_bo.0.y);
1520
1521 if frame_bo.0.x > 0 {
1523 subset_c.push(process_cand(
1524 prev_frame[frame_bo.0.y + clipped_half_h][frame_bo.0.x - 1],
1525 ));
1526 }
1527 if frame_bo.0.y > 0 {
1529 subset_c.push(process_cand(
1530 prev_frame[frame_bo.0.y - 1][frame_bo.0.x + clipped_half_w],
1531 ));
1532 }
1533 if frame_bo.0.x + w < prev_frame.cols {
1535 subset_c.push(process_cand(
1536 prev_frame[frame_bo.0.y + clipped_half_h][frame_bo.0.x + w],
1537 ));
1538 }
1539 if frame_bo.0.y + h < prev_frame.rows {
1541 subset_c.push(process_cand(
1542 prev_frame[frame_bo.0.y + h][frame_bo.0.x + clipped_half_w],
1543 ));
1544 }
1545
1546 subset_c.push(process_cand(
1547 prev_frame[frame_bo.0.y + clipped_half_h][frame_bo.0.x + clipped_half_w],
1548 ));
1549 }
1550
1551 let min_sad = ((min_sad as u64 * (pix_w * pix_h) as u64) >> (MAX_SB_SIZE_LOG2 * 2)) as u32;
1553
1554 let dec_mv = |mv: MotionVector| MotionVector {
1555 col: mv.col >> ssdec,
1556 row: mv.row >> ssdec,
1557 };
1558 let median = median.map(dec_mv);
1559 for mv in subset_b.iter_mut() {
1560 *mv = dec_mv(*mv);
1561 }
1562 for mv in subset_c.iter_mut() {
1563 *mv = dec_mv(*mv);
1564 }
1565
1566 MotionEstimationSubsets {
1567 min_sad,
1568 median,
1569 subset_b,
1570 subset_c,
1571 }
1572}
1573
1574#[allow(clippy::too_many_arguments)]
1575fn get_best_predictor<T: Pixel>(
1576 po: PlaneOffset,
1577 org_region: &PlaneRegion<T>,
1578 p_ref: &Plane<T>,
1579 predictors: &[MotionVector],
1580 bit_depth: usize,
1581 pmv: [MotionVector; 2],
1582 lambda: u32,
1583 mvx_min: isize,
1584 mvx_max: isize,
1585 mvy_min: isize,
1586 mvy_max: isize,
1587 w: usize,
1588 h: usize,
1589 cpu_feature_level: CpuFeatureLevel,
1590) -> MotionSearchResult {
1591 let mut best: MotionSearchResult = MotionSearchResult::empty();
1592
1593 for &init_mv in predictors.iter() {
1594 let rd = get_fullpel_mv_rd(
1595 po,
1596 org_region,
1597 p_ref,
1598 bit_depth,
1599 pmv,
1600 lambda,
1601 false,
1602 mvx_min,
1603 mvx_max,
1604 mvy_min,
1605 mvy_max,
1606 w,
1607 h,
1608 init_mv,
1609 cpu_feature_level,
1610 );
1611
1612 if rd.cost < best.rd.cost {
1613 best.mv = init_mv;
1614 best.rd = rd;
1615 }
1616 }
1617
1618 best
1619}
1620
1621#[allow(clippy::too_many_arguments)]
1622fn get_fullpel_mv_rd<T: Pixel>(
1623 po: PlaneOffset,
1624 org_region: &PlaneRegion<T>,
1625 p_ref: &Plane<T>,
1626 bit_depth: usize,
1627 pmv: [MotionVector; 2],
1628 lambda: u32,
1629 use_satd: bool,
1630 mvx_min: isize,
1631 mvx_max: isize,
1632 mvy_min: isize,
1633 mvy_max: isize,
1634 w: usize,
1635 h: usize,
1636 cand_mv: MotionVector,
1637 cpu_feature_level: CpuFeatureLevel,
1638) -> MVCandidateRD {
1639 if (cand_mv.col as isize) < mvx_min
1640 || (cand_mv.col as isize) > mvx_max
1641 || (cand_mv.row as isize) < mvy_min
1642 || (cand_mv.row as isize) > mvy_max
1643 {
1644 return MVCandidateRD::empty();
1645 }
1646
1647 let plane_ref = p_ref.region(Area::StartingAt {
1649 x: po.x + (cand_mv.col / 8) as isize,
1650 y: po.y + (cand_mv.row / 8) as isize,
1651 });
1652 compute_mv_rd(
1653 pmv,
1654 lambda,
1655 use_satd,
1656 bit_depth,
1657 w,
1658 h,
1659 cand_mv,
1660 org_region,
1661 &plane_ref,
1662 cpu_feature_level,
1663 )
1664}
1665
1666#[allow(clippy::too_many_arguments)]
1681fn hexagon_search<T: Pixel>(
1682 po: PlaneOffset,
1683 org_region: &PlaneRegion<T>,
1684 p_ref: &Plane<T>,
1685 current: &mut MotionSearchResult,
1686 bit_depth: usize,
1687 pmv: [MotionVector; 2],
1688 lambda: u32,
1689 mvx_min: isize,
1690 mvx_max: isize,
1691 mvy_min: isize,
1692 mvy_max: isize,
1693 w: usize,
1694 h: usize,
1695 cpu_feature_level: CpuFeatureLevel,
1696) {
1697 let mut best_cand_idx: usize = 0;
1704 let mut best_cand: MotionSearchResult = MotionSearchResult::empty();
1705
1706 for (i, &pattern_mv) in HEXAGON_PATTERN.iter().enumerate() {
1708 let cand_mv = current.mv + pattern_mv;
1709 let rd = get_fullpel_mv_rd(
1710 po,
1711 org_region,
1712 p_ref,
1713 bit_depth,
1714 pmv,
1715 lambda,
1716 false,
1717 mvx_min,
1718 mvx_max,
1719 mvy_min,
1720 mvy_max,
1721 w,
1722 h,
1723 cand_mv,
1724 cpu_feature_level,
1725 );
1726
1727 if rd.cost < best_cand.rd.cost {
1728 best_cand_idx = i;
1729 best_cand.mv = cand_mv;
1730 best_cand.rd = rd;
1731 }
1732 }
1733
1734 while best_cand.rd.cost < current.rd.cost {
1737 *current = best_cand;
1739 best_cand = MotionSearchResult::empty();
1740
1741 let center_cand_idx = best_cand_idx;
1744
1745 for idx_offset_mod6 in 5..=7 {
1751 let i = (center_cand_idx + idx_offset_mod6) % 6;
1752 let cand_mv = current.mv + HEXAGON_PATTERN[i];
1753
1754 let rd = get_fullpel_mv_rd(
1755 po,
1756 org_region,
1757 p_ref,
1758 bit_depth,
1759 pmv,
1760 lambda,
1761 false,
1762 mvx_min,
1763 mvx_max,
1764 mvy_min,
1765 mvy_max,
1766 w,
1767 h,
1768 cand_mv,
1769 cpu_feature_level,
1770 );
1771
1772 if rd.cost < best_cand.rd.cost {
1773 best_cand_idx = i;
1774 best_cand.mv = cand_mv;
1775 best_cand.rd = rd;
1776 }
1777 }
1778 }
1779
1780 let mut best_cand: MotionSearchResult = MotionSearchResult::empty();
1782 for &offset in &SQUARE_REFINE_PATTERN {
1783 let cand_mv = current.mv + offset;
1784 let rd = get_fullpel_mv_rd(
1785 po,
1786 org_region,
1787 p_ref,
1788 bit_depth,
1789 pmv,
1790 lambda,
1791 false,
1792 mvx_min,
1793 mvx_max,
1794 mvy_min,
1795 mvy_max,
1796 w,
1797 h,
1798 cand_mv,
1799 cpu_feature_level,
1800 );
1801
1802 if rd.cost < best_cand.rd.cost {
1803 best_cand.mv = cand_mv;
1804 best_cand.rd = rd;
1805 }
1806 }
1807 if best_cand.rd.cost < current.rd.cost {
1808 *current = best_cand;
1809 }
1810
1811 assert!(!current.is_empty());
1812}
1813
1814#[allow(clippy::too_many_arguments)]
1820fn fullpel_diamond_search<T: Pixel>(
1821 po: PlaneOffset,
1822 org_region: &PlaneRegion<T>,
1823 p_ref: &Plane<T>,
1824 current: &mut MotionSearchResult,
1825 bit_depth: usize,
1826 pmv: [MotionVector; 2],
1827 lambda: u32,
1828 mvx_min: isize,
1829 mvx_max: isize,
1830 mvy_min: isize,
1831 mvy_max: isize,
1832 w: usize,
1833 h: usize,
1834 cpu_feature_level: CpuFeatureLevel,
1835) {
1836 let (mut diamond_radius_log2, diamond_radius_end_log2) = (1u8, 0u8);
1838
1839 loop {
1840 let mut best_cand: MotionSearchResult = MotionSearchResult::empty();
1842 for &offset in &DIAMOND_R1_PATTERN {
1843 let cand_mv = current.mv + (offset << diamond_radius_log2);
1844 let rd = get_fullpel_mv_rd(
1845 po,
1846 org_region,
1847 p_ref,
1848 bit_depth,
1849 pmv,
1850 lambda,
1851 false,
1852 mvx_min,
1853 mvx_max,
1854 mvy_min,
1855 mvy_max,
1856 w,
1857 h,
1858 cand_mv,
1859 cpu_feature_level,
1860 );
1861
1862 if rd.cost < best_cand.rd.cost {
1863 best_cand.mv = cand_mv;
1864 best_cand.rd = rd;
1865 }
1866 }
1867
1868 if current.rd.cost <= best_cand.rd.cost {
1871 if diamond_radius_log2 == diamond_radius_end_log2 {
1872 break;
1873 } else {
1874 diamond_radius_log2 -= 1;
1875 }
1876 } else {
1877 *current = best_cand;
1878 }
1879 }
1880
1881 assert!(!current.is_empty());
1882}