rav1e/
encoder.rs

1// Copyright (c) 2018-2023, The rav1e contributors. All rights reserved
2//
3// This source code is subject to the terms of the BSD 2 Clause License and
4// the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
5// was not distributed with this source code in the LICENSE file, you can
6// obtain it at www.aomedia.org/license/software. If the Alliance for Open
7// Media Patent License 1.0 was not distributed with this source code in the
8// PATENTS file, you can obtain it at www.aomedia.org/license/patent.
9
10use crate::activity::*;
11use crate::api::config::GrainTableSegment;
12use crate::api::*;
13use crate::cdef::*;
14use crate::context::*;
15use crate::deblock::*;
16use crate::ec::*;
17use crate::frame::*;
18use crate::header::*;
19use crate::lrf::*;
20use crate::mc::{FilterMode, MotionVector};
21use crate::me::*;
22use crate::partition::PartitionType::*;
23use crate::partition::RefType::*;
24use crate::partition::*;
25use crate::predict::{
26  luma_ac, AngleDelta, IntraEdgeFilterParameters, IntraParam, PredictionMode,
27};
28use crate::quantize::*;
29use crate::rate::{
30  QuantizerParameters, FRAME_SUBTYPE_I, FRAME_SUBTYPE_P, QSCALE,
31};
32use crate::rdo::*;
33use crate::segmentation::*;
34use crate::serialize::{Deserialize, Serialize};
35use crate::stats::EncoderStats;
36use crate::tiling::*;
37use crate::transform::*;
38use crate::util::*;
39use crate::wasm_bindgen::*;
40
41use arg_enum_proc_macro::ArgEnum;
42use arrayvec::*;
43use bitstream_io::{BigEndian, BitWrite, BitWriter};
44use rayon::iter::*;
45
46use std::collections::VecDeque;
47use std::io::Write;
48use std::mem::MaybeUninit;
49use std::sync::Arc;
50use std::{fmt, io, mem};
51
52#[allow(dead_code)]
53#[derive(Debug, Clone, Copy, PartialEq, Eq)]
54pub enum CDEFSearchMethod {
55  PickFromQ,
56  FastSearch,
57  FullSearch,
58}
59
60#[inline(always)]
61fn poly2(q: f32, a: f32, b: f32, c: f32, max: i32) -> i32 {
62  clamp((q * q).mul_add(a, q.mul_add(b, c)).round() as i32, 0, max)
63}
64
65pub static TEMPORAL_DELIMITER: [u8; 2] = [0x12, 0x00];
66
67const MAX_NUM_TEMPORAL_LAYERS: usize = 8;
68const MAX_NUM_SPATIAL_LAYERS: usize = 4;
69const MAX_NUM_OPERATING_POINTS: usize =
70  MAX_NUM_TEMPORAL_LAYERS * MAX_NUM_SPATIAL_LAYERS;
71
72/// Size of blocks for the importance computation, in pixels.
73pub const IMPORTANCE_BLOCK_SIZE: usize =
74  1 << (IMPORTANCE_BLOCK_TO_BLOCK_SHIFT + BLOCK_TO_PLANE_SHIFT);
75
76#[derive(Debug, Clone)]
77pub struct ReferenceFrame<T: Pixel> {
78  pub order_hint: u32,
79  pub width: u32,
80  pub height: u32,
81  pub render_width: u32,
82  pub render_height: u32,
83  pub frame: Arc<Frame<T>>,
84  pub input_hres: Arc<Plane<T>>,
85  pub input_qres: Arc<Plane<T>>,
86  pub cdfs: CDFContext,
87  pub frame_me_stats: RefMEStats,
88  pub output_frameno: u64,
89  pub segmentation: SegmentationState,
90}
91
92#[derive(Debug, Clone, Default)]
93pub struct ReferenceFramesSet<T: Pixel> {
94  pub frames: [Option<Arc<ReferenceFrame<T>>>; REF_FRAMES],
95  pub deblock: [DeblockState; REF_FRAMES],
96}
97
98impl<T: Pixel> ReferenceFramesSet<T> {
99  pub fn new() -> Self {
100    Self { frames: Default::default(), deblock: Default::default() }
101  }
102}
103
104#[wasm_bindgen]
105#[derive(
106  ArgEnum, Copy, Clone, Debug, PartialEq, Eq, Serialize, Deserialize, Default,
107)]
108#[repr(C)]
109pub enum Tune {
110  Psnr,
111  #[default]
112  Psychovisual,
113}
114
115const FRAME_ID_LENGTH: u32 = 15;
116const DELTA_FRAME_ID_LENGTH: u32 = 14;
117
118#[derive(Copy, Clone, Debug)]
119pub struct Sequence {
120  /// OBU Sequence header of AV1
121  pub profile: u8,
122  pub num_bits_width: u32,
123  pub num_bits_height: u32,
124  pub bit_depth: usize,
125  pub chroma_sampling: ChromaSampling,
126  pub chroma_sample_position: ChromaSamplePosition,
127  pub pixel_range: PixelRange,
128  pub color_description: Option<ColorDescription>,
129  pub mastering_display: Option<MasteringDisplay>,
130  pub content_light: Option<ContentLight>,
131  pub max_frame_width: u32,
132  pub max_frame_height: u32,
133  pub frame_id_numbers_present_flag: bool,
134  pub frame_id_length: u32,
135  pub delta_frame_id_length: u32,
136  pub use_128x128_superblock: bool,
137  pub order_hint_bits_minus_1: u32,
138  /// 0 - force off
139  /// 1 - force on
140  /// 2 - adaptive
141  pub force_screen_content_tools: u32,
142  /// 0 - Not to force. MV can be in 1/4 or 1/8
143  /// 1 - force to integer
144  /// 2 - adaptive
145  pub force_integer_mv: u32,
146  /// Video is a single frame still picture
147  pub still_picture: bool,
148  /// Use reduced header for still picture
149  pub reduced_still_picture_hdr: bool,
150  /// enables/disables filter_intra
151  pub enable_filter_intra: bool,
152  /// enables/disables corner/edge filtering and upsampling
153  pub enable_intra_edge_filter: bool,
154  /// enables/disables interintra_compound
155  pub enable_interintra_compound: bool,
156  /// enables/disables masked compound
157  pub enable_masked_compound: bool,
158  /// 0 - disable dual interpolation filter
159  /// 1 - enable vert/horiz filter selection
160  pub enable_dual_filter: bool,
161  /// 0 - disable order hint, and related tools
162  /// jnt_comp, ref_frame_mvs, frame_sign_bias
163  /// if 0, enable_jnt_comp and
164  /// enable_ref_frame_mvs must be set zs 0.
165  pub enable_order_hint: bool,
166  /// 0 - disable joint compound modes
167  /// 1 - enable it
168  pub enable_jnt_comp: bool,
169  /// 0 - disable ref frame mvs
170  /// 1 - enable it
171  pub enable_ref_frame_mvs: bool,
172  /// 0 - disable warped motion for sequence
173  /// 1 - enable it for the sequence
174  pub enable_warped_motion: bool,
175  /// 0 - Disable superres for the sequence, and disable
176  ///     transmitting per-frame superres enabled flag.
177  /// 1 - Enable superres for the sequence, and also
178  ///     enable per-frame flag to denote if superres is
179  ///     enabled for that frame.
180  pub enable_superres: bool,
181  /// To turn on/off CDEF
182  pub enable_cdef: bool,
183  /// To turn on/off loop restoration
184  pub enable_restoration: bool,
185  /// To turn on/off larger-than-superblock loop restoration units
186  pub enable_large_lru: bool,
187  /// allow encoder to delay loop filter RDO/coding until after frame reconstruciton is complete
188  pub enable_delayed_loopfilter_rdo: bool,
189  pub operating_points_cnt_minus_1: usize,
190  pub operating_point_idc: [u16; MAX_NUM_OPERATING_POINTS],
191  pub display_model_info_present_flag: bool,
192  pub decoder_model_info_present_flag: bool,
193  pub level_idx: [u8; MAX_NUM_OPERATING_POINTS],
194  /// seq_tier in the spec. One bit: 0 or 1.
195  pub tier: [usize; MAX_NUM_OPERATING_POINTS],
196  pub film_grain_params_present: bool,
197  pub timing_info_present: bool,
198  pub tiling: TilingInfo,
199  pub time_base: Rational,
200}
201
202impl Sequence {
203  /// # Panics
204  ///
205  /// Panics if the resulting tile sizes would be too large.
206  pub fn new(config: &EncoderConfig) -> Sequence {
207    let width_bits = 32 - (config.width as u32).leading_zeros();
208    let height_bits = 32 - (config.height as u32).leading_zeros();
209    assert!(width_bits <= 16);
210    assert!(height_bits <= 16);
211
212    let profile = if config.bit_depth == 12
213      || config.chroma_sampling == ChromaSampling::Cs422
214    {
215      2
216    } else {
217      u8::from(config.chroma_sampling == ChromaSampling::Cs444)
218    };
219
220    let operating_point_idc: [u16; MAX_NUM_OPERATING_POINTS] =
221      [0; MAX_NUM_OPERATING_POINTS];
222    let level_idx: [u8; MAX_NUM_OPERATING_POINTS] =
223      if let Some(level_idx) = config.level_idx {
224        [level_idx; MAX_NUM_OPERATING_POINTS]
225      } else {
226        [31; MAX_NUM_OPERATING_POINTS]
227      };
228    let tier: [usize; MAX_NUM_OPERATING_POINTS] =
229      [0; MAX_NUM_OPERATING_POINTS];
230
231    // Restoration filters are not useful for very small frame sizes,
232    // so disable them in that case.
233    let enable_restoration_filters = config.width >= 32 && config.height >= 32;
234    let use_128x128_superblock = false;
235
236    let frame_rate = config.frame_rate();
237    let sb_size_log2 = Self::sb_size_log2(use_128x128_superblock);
238
239    let mut tiling = TilingInfo::from_target_tiles(
240      sb_size_log2,
241      config.width,
242      config.height,
243      frame_rate,
244      TilingInfo::tile_log2(1, config.tile_cols).unwrap(),
245      TilingInfo::tile_log2(1, config.tile_rows).unwrap(),
246      config.chroma_sampling == ChromaSampling::Cs422,
247    );
248
249    if config.tiles > 0 {
250      let mut tile_rows_log2 = 0;
251      let mut tile_cols_log2 = 0;
252      while (tile_rows_log2 < tiling.max_tile_rows_log2)
253        || (tile_cols_log2 < tiling.max_tile_cols_log2)
254      {
255        tiling = TilingInfo::from_target_tiles(
256          sb_size_log2,
257          config.width,
258          config.height,
259          frame_rate,
260          tile_cols_log2,
261          tile_rows_log2,
262          config.chroma_sampling == ChromaSampling::Cs422,
263        );
264
265        if tiling.rows * tiling.cols >= config.tiles {
266          break;
267        };
268
269        if ((tiling.tile_height_sb >= tiling.tile_width_sb)
270          && (tiling.tile_rows_log2 < tiling.max_tile_rows_log2))
271          || (tile_cols_log2 >= tiling.max_tile_cols_log2)
272        {
273          tile_rows_log2 += 1;
274        } else {
275          tile_cols_log2 += 1;
276        }
277      }
278    }
279
280    Sequence {
281      tiling,
282      profile,
283      num_bits_width: width_bits,
284      num_bits_height: height_bits,
285      bit_depth: config.bit_depth,
286      chroma_sampling: config.chroma_sampling,
287      chroma_sample_position: config.chroma_sample_position,
288      pixel_range: config.pixel_range,
289      color_description: config.color_description,
290      mastering_display: config.mastering_display,
291      content_light: config.content_light,
292      max_frame_width: config.width as u32,
293      max_frame_height: config.height as u32,
294      frame_id_numbers_present_flag: false,
295      frame_id_length: FRAME_ID_LENGTH,
296      delta_frame_id_length: DELTA_FRAME_ID_LENGTH,
297      use_128x128_superblock,
298      order_hint_bits_minus_1: 5,
299      force_screen_content_tools: if config.still_picture { 2 } else { 0 },
300      force_integer_mv: 2,
301      still_picture: config.still_picture,
302      reduced_still_picture_hdr: config.still_picture,
303      enable_filter_intra: false,
304      enable_intra_edge_filter: true,
305      enable_interintra_compound: false,
306      enable_masked_compound: false,
307      enable_dual_filter: false,
308      enable_order_hint: !config.still_picture,
309      enable_jnt_comp: false,
310      enable_ref_frame_mvs: false,
311      enable_warped_motion: false,
312      enable_superres: false,
313      enable_cdef: config.speed_settings.cdef && enable_restoration_filters,
314      enable_restoration: config.speed_settings.lrf
315        && enable_restoration_filters,
316      enable_large_lru: true,
317      enable_delayed_loopfilter_rdo: true,
318      operating_points_cnt_minus_1: 0,
319      operating_point_idc,
320      display_model_info_present_flag: false,
321      decoder_model_info_present_flag: false,
322      level_idx,
323      tier,
324      film_grain_params_present: config
325        .film_grain_params
326        .as_ref()
327        .map(|entries| !entries.is_empty())
328        .unwrap_or(false),
329      timing_info_present: config.enable_timing_info,
330      time_base: config.time_base,
331    }
332  }
333
334  pub const fn get_relative_dist(&self, a: u32, b: u32) -> i32 {
335    let diff = a as i32 - b as i32;
336    let m = 1 << self.order_hint_bits_minus_1;
337    (diff & (m - 1)) - (diff & m)
338  }
339
340  pub fn get_skip_mode_allowed<T: Pixel>(
341    &self, fi: &FrameInvariants<T>, inter_cfg: &InterConfig,
342    reference_select: bool,
343  ) -> bool {
344    if fi.intra_only || !reference_select || !self.enable_order_hint {
345      return false;
346    }
347
348    let mut forward_idx: isize = -1;
349    let mut backward_idx: isize = -1;
350    let mut forward_hint = 0;
351    let mut backward_hint = 0;
352
353    for i in inter_cfg.allowed_ref_frames().iter().map(|rf| rf.to_index()) {
354      if let Some(ref rec) = fi.rec_buffer.frames[fi.ref_frames[i] as usize] {
355        let ref_hint = rec.order_hint;
356
357        if self.get_relative_dist(ref_hint, fi.order_hint) < 0 {
358          if forward_idx < 0
359            || self.get_relative_dist(ref_hint, forward_hint) > 0
360          {
361            forward_idx = i as isize;
362            forward_hint = ref_hint;
363          }
364        } else if self.get_relative_dist(ref_hint, fi.order_hint) > 0
365          && (backward_idx < 0
366            || self.get_relative_dist(ref_hint, backward_hint) > 0)
367        {
368          backward_idx = i as isize;
369          backward_hint = ref_hint;
370        }
371      }
372    }
373
374    if forward_idx < 0 {
375      false
376    } else if backward_idx >= 0 {
377      // set skip_mode_frame
378      true
379    } else {
380      let mut second_forward_idx: isize = -1;
381      let mut second_forward_hint = 0;
382
383      for i in inter_cfg.allowed_ref_frames().iter().map(|rf| rf.to_index()) {
384        if let Some(ref rec) = fi.rec_buffer.frames[fi.ref_frames[i] as usize]
385        {
386          let ref_hint = rec.order_hint;
387
388          if self.get_relative_dist(ref_hint, forward_hint) < 0
389            && (second_forward_idx < 0
390              || self.get_relative_dist(ref_hint, second_forward_hint) > 0)
391          {
392            second_forward_idx = i as isize;
393            second_forward_hint = ref_hint;
394          }
395        }
396      }
397
398      // TODO: Set skip_mode_frame, when second_forward_idx is not less than 0.
399      second_forward_idx >= 0
400    }
401  }
402
403  #[inline(always)]
404  const fn sb_size_log2(use_128x128_superblock: bool) -> usize {
405    6 + (use_128x128_superblock as usize)
406  }
407}
408
409#[derive(Debug, Clone)]
410pub struct FrameState<T: Pixel> {
411  pub sb_size_log2: usize,
412  pub input: Arc<Frame<T>>,
413  pub input_hres: Arc<Plane<T>>, // half-resolution version of input luma
414  pub input_qres: Arc<Plane<T>>, // quarter-resolution version of input luma
415  pub rec: Arc<Frame<T>>,
416  pub cdfs: CDFContext,
417  pub context_update_tile_id: usize, // tile id used for the CDFontext
418  pub max_tile_size_bytes: u32,
419  pub deblock: DeblockState,
420  pub segmentation: SegmentationState,
421  pub restoration: RestorationState,
422  // Because we only reference these within a tile context,
423  // these are stored per-tile for easier access.
424  pub frame_me_stats: RefMEStats,
425  pub enc_stats: EncoderStats,
426}
427
428impl<T: Pixel> FrameState<T> {
429  pub fn new(fi: &FrameInvariants<T>) -> Self {
430    // TODO(negge): Use fi.cfg.chroma_sampling when we store VideoDetails in FrameInvariants
431    FrameState::new_with_frame(
432      fi,
433      Arc::new(Frame::new(fi.width, fi.height, fi.sequence.chroma_sampling)),
434    )
435  }
436
437  /// Similar to [`FrameState::new_with_frame`], but takes an `me_stats`
438  /// and `rec` to enable reusing the same underlying allocations to create
439  /// a `FrameState`
440  ///
441  /// This function primarily exists for [`estimate_inter_costs`], and so
442  /// it does not create hres or qres versions of `frame` as downscaling is
443  /// somewhat expensive and are not needed for [`estimate_inter_costs`].
444  pub fn new_with_frame_and_me_stats_and_rec(
445    fi: &FrameInvariants<T>, frame: Arc<Frame<T>>, me_stats: RefMEStats,
446    rec: Arc<Frame<T>>,
447  ) -> Self {
448    let rs = RestorationState::new(fi, &frame);
449
450    let hres = Plane::new(0, 0, 0, 0, 0, 0);
451    let qres = Plane::new(0, 0, 0, 0, 0, 0);
452
453    Self {
454      sb_size_log2: fi.sb_size_log2(),
455      input: frame,
456      input_hres: Arc::new(hres),
457      input_qres: Arc::new(qres),
458      rec,
459      cdfs: CDFContext::new(0),
460      context_update_tile_id: 0,
461      max_tile_size_bytes: 0,
462      deblock: Default::default(),
463      segmentation: Default::default(),
464      restoration: rs,
465      frame_me_stats: me_stats,
466      enc_stats: Default::default(),
467    }
468  }
469
470  pub fn new_with_frame(
471    fi: &FrameInvariants<T>, frame: Arc<Frame<T>>,
472  ) -> Self {
473    let rs = RestorationState::new(fi, &frame);
474    let luma_width = frame.planes[0].cfg.width;
475    let luma_height = frame.planes[0].cfg.height;
476
477    let hres = frame.planes[0].downsampled(fi.width, fi.height);
478    let qres = hres.downsampled(fi.width, fi.height);
479
480    Self {
481      sb_size_log2: fi.sb_size_log2(),
482      input: frame,
483      input_hres: Arc::new(hres),
484      input_qres: Arc::new(qres),
485      rec: Arc::new(Frame::new(
486        luma_width,
487        luma_height,
488        fi.sequence.chroma_sampling,
489      )),
490      cdfs: CDFContext::new(0),
491      context_update_tile_id: 0,
492      max_tile_size_bytes: 0,
493      deblock: Default::default(),
494      segmentation: Default::default(),
495      restoration: rs,
496      frame_me_stats: FrameMEStats::new_arc_array(fi.w_in_b, fi.h_in_b),
497      enc_stats: Default::default(),
498    }
499  }
500
501  pub fn apply_tile_state_mut<F, R>(&mut self, f: F) -> R
502  where
503    F: FnOnce(&mut TileStateMut<'_, T>) -> R,
504  {
505    let PlaneConfig { width, height, .. } = self.rec.planes[0].cfg;
506    let sbo_0 = PlaneSuperBlockOffset(SuperBlockOffset { x: 0, y: 0 });
507    let frame_me_stats = self.frame_me_stats.clone();
508    let frame_me_stats = &mut *frame_me_stats.write().expect("poisoned lock");
509    let ts = &mut TileStateMut::new(
510      self,
511      sbo_0,
512      self.sb_size_log2,
513      width,
514      height,
515      frame_me_stats,
516    );
517
518    f(ts)
519  }
520}
521
522#[derive(Copy, Clone, Debug)]
523pub struct DeblockState {
524  pub levels: [u8; MAX_PLANES + 1], // Y vertical edges, Y horizontal, U, V
525  pub sharpness: u8,
526  pub deltas_enabled: bool,
527  pub delta_updates_enabled: bool,
528  pub ref_deltas: [i8; REF_FRAMES],
529  pub mode_deltas: [i8; 2],
530  pub block_deltas_enabled: bool,
531  pub block_delta_shift: u8,
532  pub block_delta_multi: bool,
533}
534
535impl Default for DeblockState {
536  fn default() -> Self {
537    DeblockState {
538      levels: [8, 8, 4, 4],
539      sharpness: 0,
540      deltas_enabled: false, // requires delta_q_enabled
541      delta_updates_enabled: false,
542      ref_deltas: [1, 0, 0, 0, 0, -1, -1, -1],
543      mode_deltas: [0, 0],
544      block_deltas_enabled: false,
545      block_delta_shift: 0,
546      block_delta_multi: false,
547    }
548  }
549}
550
551#[derive(Copy, Clone, Debug, Default)]
552pub struct SegmentationState {
553  pub enabled: bool,
554  pub update_data: bool,
555  pub update_map: bool,
556  pub preskip: bool,
557  pub last_active_segid: u8,
558  pub features: [[bool; SegLvl::SEG_LVL_MAX as usize]; 8],
559  pub data: [[i16; SegLvl::SEG_LVL_MAX as usize]; 8],
560  pub threshold: [DistortionScale; 7],
561  pub min_segment: u8,
562  pub max_segment: u8,
563}
564
565impl SegmentationState {
566  #[profiling::function]
567  pub fn update_threshold(&mut self, base_q_idx: u8, bd: usize) {
568    let base_ac_q = ac_q(base_q_idx, 0, bd).get() as u64;
569    let real_ac_q = ArrayVec::<_, MAX_SEGMENTS>::from_iter(
570      self.data[..=self.max_segment as usize].iter().map(|data| {
571        ac_q(base_q_idx, data[SegLvl::SEG_LVL_ALT_Q as usize] as i8, bd).get()
572          as u64
573      }),
574    );
575    self.threshold.fill(DistortionScale(0));
576    for ((q1, q2), threshold) in
577      real_ac_q.iter().skip(1).zip(&real_ac_q).zip(&mut self.threshold)
578    {
579      *threshold = DistortionScale::new(base_ac_q.pow(2), q1 * q2);
580    }
581  }
582
583  #[cfg(feature = "dump_lookahead_data")]
584  pub fn dump_threshold(
585    &self, data_location: std::path::PathBuf, input_frameno: u64,
586  ) {
587    use byteorder::{NativeEndian, WriteBytesExt};
588    let file_name = format!("{:010}-thresholds", input_frameno);
589    let max_segment = self.max_segment;
590    // dynamic allocation: debugging only
591    let mut buf = vec![];
592    buf.write_u64::<NativeEndian>(max_segment as u64).unwrap();
593    for &v in &self.threshold[..max_segment as usize] {
594      buf.write_u32::<NativeEndian>(v.0).unwrap();
595    }
596    ::std::fs::write(data_location.join(file_name).with_extension("bin"), buf)
597      .unwrap();
598  }
599}
600
601// Frame Invariants are invariant inside a frame
602#[allow(dead_code)]
603#[derive(Debug, Clone)]
604pub struct FrameInvariants<T: Pixel> {
605  pub sequence: Arc<Sequence>,
606  pub config: Arc<EncoderConfig>,
607  pub width: usize,
608  pub height: usize,
609  pub render_width: u32,
610  pub render_height: u32,
611  pub frame_size_override_flag: bool,
612  pub render_and_frame_size_different: bool,
613  pub sb_width: usize,
614  pub sb_height: usize,
615  pub w_in_b: usize,
616  pub h_in_b: usize,
617  pub input_frameno: u64,
618  pub order_hint: u32,
619  pub show_frame: bool,
620  pub showable_frame: bool,
621  pub error_resilient: bool,
622  pub intra_only: bool,
623  pub allow_high_precision_mv: bool,
624  pub frame_type: FrameType,
625  pub frame_to_show_map_idx: u32,
626  pub use_reduced_tx_set: bool,
627  pub reference_mode: ReferenceMode,
628  pub use_prev_frame_mvs: bool,
629  pub partition_range: PartitionRange,
630  pub globalmv_transformation_type: [GlobalMVMode; INTER_REFS_PER_FRAME],
631  pub num_tg: usize,
632  pub large_scale_tile: bool,
633  pub disable_cdf_update: bool,
634  pub allow_screen_content_tools: u32,
635  pub force_integer_mv: u32,
636  pub primary_ref_frame: u32,
637  pub refresh_frame_flags: u32, // a bitmask that specifies which
638  // reference frame slots will be updated with the current frame
639  // after it is decoded.
640  pub allow_intrabc: bool,
641  pub use_ref_frame_mvs: bool,
642  pub is_filter_switchable: bool,
643  pub is_motion_mode_switchable: bool,
644  pub disable_frame_end_update_cdf: bool,
645  pub allow_warped_motion: bool,
646  pub cdef_search_method: CDEFSearchMethod,
647  pub cdef_damping: u8,
648  pub cdef_bits: u8,
649  pub cdef_y_strengths: [u8; 8],
650  pub cdef_uv_strengths: [u8; 8],
651  pub delta_q_present: bool,
652  pub ref_frames: [u8; INTER_REFS_PER_FRAME],
653  pub ref_frame_sign_bias: [bool; INTER_REFS_PER_FRAME],
654  pub rec_buffer: ReferenceFramesSet<T>,
655  pub base_q_idx: u8,
656  pub dc_delta_q: [i8; 3],
657  pub ac_delta_q: [i8; 3],
658  pub lambda: f64,
659  pub me_lambda: f64,
660  pub dist_scale: [DistortionScale; 3],
661  pub me_range_scale: u8,
662  pub use_tx_domain_distortion: bool,
663  pub use_tx_domain_rate: bool,
664  pub idx_in_group_output: u64,
665  pub pyramid_level: u64,
666  pub enable_early_exit: bool,
667  pub tx_mode_select: bool,
668  pub enable_inter_txfm_split: bool,
669  pub default_filter: FilterMode,
670  pub enable_segmentation: bool,
671  pub t35_metadata: Box<[T35]>,
672  /// Target CPU feature level.
673  pub cpu_feature_level: crate::cpu_features::CpuFeatureLevel,
674
675  // These will be set if this is a coded (non-SEF) frame.
676  // We do not need them for SEFs.
677  pub coded_frame_data: Option<CodedFrameData<T>>,
678}
679
680/// These frame invariants are only used on coded frames, i.e. non-SEFs.
681/// They are stored separately to avoid useless allocations
682/// when we do not need them.
683///
684/// Currently this consists only of lookahaed data.
685/// This may change in the future.
686#[derive(Debug, Clone)]
687pub struct CodedFrameData<T: Pixel> {
688  /// The lookahead version of `rec_buffer`, used for storing and propagating
689  /// the original reference frames (rather than reconstructed ones). The
690  /// lookahead uses both `rec_buffer` and `lookahead_rec_buffer`, where
691  /// `rec_buffer` contains the current frame's reference frames and
692  /// `lookahead_rec_buffer` contains the next frame's reference frames.
693  pub lookahead_rec_buffer: ReferenceFramesSet<T>,
694  /// Frame width in importance blocks.
695  pub w_in_imp_b: usize,
696  /// Frame height in importance blocks.
697  pub h_in_imp_b: usize,
698  /// Intra prediction cost estimations for each importance block.
699  pub lookahead_intra_costs: Box<[u32]>,
700  /// Future importance values for each importance block. That is, a value
701  /// indicating how much future frames depend on the block (for example, via
702  /// inter-prediction).
703  pub block_importances: Box<[f32]>,
704  /// Pre-computed distortion_scale.
705  pub distortion_scales: Box<[DistortionScale]>,
706  /// Pre-computed activity_scale.
707  pub activity_scales: Box<[DistortionScale]>,
708  pub activity_mask: ActivityMask,
709  /// Combined metric of activity and distortion
710  pub spatiotemporal_scores: Box<[DistortionScale]>,
711}
712
713impl<T: Pixel> CodedFrameData<T> {
714  pub fn new(fi: &FrameInvariants<T>) -> CodedFrameData<T> {
715    // Width and height are padded to 8×8 block size.
716    let w_in_imp_b = fi.w_in_b / 2;
717    let h_in_imp_b = fi.h_in_b / 2;
718
719    CodedFrameData {
720      lookahead_rec_buffer: ReferenceFramesSet::new(),
721      w_in_imp_b,
722      h_in_imp_b,
723      // This is never used before it is assigned
724      lookahead_intra_costs: Box::new([]),
725      // dynamic allocation: once per frame
726      block_importances: vec![0.; w_in_imp_b * h_in_imp_b].into_boxed_slice(),
727      distortion_scales: vec![
728        DistortionScale::default();
729        w_in_imp_b * h_in_imp_b
730      ]
731      .into_boxed_slice(),
732      activity_scales: vec![
733        DistortionScale::default();
734        w_in_imp_b * h_in_imp_b
735      ]
736      .into_boxed_slice(),
737      activity_mask: Default::default(),
738      spatiotemporal_scores: Default::default(),
739    }
740  }
741
742  // Assumes that we have already computed activity scales and distortion scales
743  // Returns -0.5 log2(mean(scale))
744  #[profiling::function]
745  pub fn compute_spatiotemporal_scores(&mut self) -> i64 {
746    let mut scores = self
747      .distortion_scales
748      .iter()
749      .zip(self.activity_scales.iter())
750      .map(|(&d, &a)| d * a)
751      .collect::<Box<_>>();
752
753    let inv_mean = DistortionScale::inv_mean(&scores);
754
755    for score in scores.iter_mut() {
756      *score *= inv_mean;
757    }
758
759    for scale in self.distortion_scales.iter_mut() {
760      *scale *= inv_mean;
761    }
762
763    self.spatiotemporal_scores = scores;
764
765    inv_mean.blog64() >> 1
766  }
767
768  // Assumes that we have already computed distortion_scales
769  // Returns -0.5 log2(mean(scale))
770  #[profiling::function]
771  pub fn compute_temporal_scores(&mut self) -> i64 {
772    let inv_mean = DistortionScale::inv_mean(&self.distortion_scales);
773    for scale in self.distortion_scales.iter_mut() {
774      *scale *= inv_mean;
775    }
776    self.spatiotemporal_scores = self.distortion_scales.clone();
777    inv_mean.blog64() >> 1
778  }
779
780  #[cfg(feature = "dump_lookahead_data")]
781  pub fn dump_scales(
782    &self, data_location: std::path::PathBuf, scales: Scales,
783    input_frameno: u64,
784  ) {
785    use byteorder::{NativeEndian, WriteBytesExt};
786    let file_name = format!(
787      "{:010}-{}",
788      input_frameno,
789      match scales {
790        Scales::ActivityScales => "activity_scales",
791        Scales::DistortionScales => "distortion_scales",
792        Scales::SpatiotemporalScales => "spatiotemporal_scales",
793      }
794    );
795    // dynamic allocation: debugging only
796    let mut buf = vec![];
797    buf.write_u64::<NativeEndian>(self.w_in_imp_b as u64).unwrap();
798    buf.write_u64::<NativeEndian>(self.h_in_imp_b as u64).unwrap();
799    for &v in match scales {
800      Scales::ActivityScales => &self.activity_scales[..],
801      Scales::DistortionScales => &self.distortion_scales[..],
802      Scales::SpatiotemporalScales => &self.spatiotemporal_scores[..],
803    } {
804      buf.write_u32::<NativeEndian>(v.0).unwrap();
805    }
806    ::std::fs::write(data_location.join(file_name).with_extension("bin"), buf)
807      .unwrap();
808  }
809}
810
811#[cfg(feature = "dump_lookahead_data")]
812pub enum Scales {
813  ActivityScales,
814  DistortionScales,
815  SpatiotemporalScales,
816}
817
818pub(crate) const fn pos_to_lvl(pos: u64, pyramid_depth: u64) -> u64 {
819  // Derive level within pyramid for a frame with a given coding order position
820  // For example, with a pyramid of depth 2, the 2 least significant bits of the
821  // position determine the level:
822  // 00 -> 0
823  // 01 -> 2
824  // 10 -> 1
825  // 11 -> 2
826  pyramid_depth - (pos | (1 << pyramid_depth)).trailing_zeros() as u64
827}
828
829impl<T: Pixel> FrameInvariants<T> {
830  #[allow(clippy::erasing_op, clippy::identity_op)]
831  /// # Panics
832  ///
833  /// - If the size of `T` does not match the sequence's bit depth
834  pub fn new(config: Arc<EncoderConfig>, sequence: Arc<Sequence>) -> Self {
835    assert!(
836      sequence.bit_depth <= mem::size_of::<T>() * 8,
837      "bit depth cannot fit into u8"
838    );
839
840    let (width, height) = (config.width, config.height);
841    let frame_size_override_flag = width as u32 != sequence.max_frame_width
842      || height as u32 != sequence.max_frame_height;
843
844    let (render_width, render_height) = config.render_size();
845    let render_and_frame_size_different =
846      render_width != width || render_height != height;
847
848    let use_reduced_tx_set = config.speed_settings.transform.reduced_tx_set;
849    let use_tx_domain_distortion = config.tune == Tune::Psnr
850      && config.speed_settings.transform.tx_domain_distortion;
851    let use_tx_domain_rate = config.speed_settings.transform.tx_domain_rate;
852
853    let w_in_b = 2 * config.width.align_power_of_two_and_shift(3); // MiCols, ((width+7)/8)<<3 >> MI_SIZE_LOG2
854    let h_in_b = 2 * config.height.align_power_of_two_and_shift(3); // MiRows, ((height+7)/8)<<3 >> MI_SIZE_LOG2
855
856    Self {
857      width,
858      height,
859      render_width: render_width as u32,
860      render_height: render_height as u32,
861      frame_size_override_flag,
862      render_and_frame_size_different,
863      sb_width: width.align_power_of_two_and_shift(6),
864      sb_height: height.align_power_of_two_and_shift(6),
865      w_in_b,
866      h_in_b,
867      input_frameno: 0,
868      order_hint: 0,
869      show_frame: true,
870      showable_frame: !sequence.reduced_still_picture_hdr,
871      error_resilient: false,
872      intra_only: true,
873      allow_high_precision_mv: false,
874      frame_type: FrameType::KEY,
875      frame_to_show_map_idx: 0,
876      use_reduced_tx_set,
877      reference_mode: ReferenceMode::SINGLE,
878      use_prev_frame_mvs: false,
879      partition_range: config.speed_settings.partition.partition_range,
880      globalmv_transformation_type: [GlobalMVMode::IDENTITY;
881        INTER_REFS_PER_FRAME],
882      num_tg: 1,
883      large_scale_tile: false,
884      disable_cdf_update: false,
885      allow_screen_content_tools: sequence.force_screen_content_tools,
886      force_integer_mv: 1,
887      primary_ref_frame: PRIMARY_REF_NONE,
888      refresh_frame_flags: ALL_REF_FRAMES_MASK,
889      allow_intrabc: false,
890      use_ref_frame_mvs: false,
891      is_filter_switchable: false,
892      is_motion_mode_switchable: false, // 0: only the SIMPLE motion mode will be used.
893      disable_frame_end_update_cdf: sequence.reduced_still_picture_hdr,
894      allow_warped_motion: false,
895      cdef_search_method: CDEFSearchMethod::PickFromQ,
896      cdef_damping: 3,
897      cdef_bits: 0,
898      cdef_y_strengths: [
899        0 * 4 + 0,
900        1 * 4 + 0,
901        2 * 4 + 1,
902        3 * 4 + 1,
903        5 * 4 + 2,
904        7 * 4 + 3,
905        10 * 4 + 3,
906        13 * 4 + 3,
907      ],
908      cdef_uv_strengths: [
909        0 * 4 + 0,
910        1 * 4 + 0,
911        2 * 4 + 1,
912        3 * 4 + 1,
913        5 * 4 + 2,
914        7 * 4 + 3,
915        10 * 4 + 3,
916        13 * 4 + 3,
917      ],
918      delta_q_present: false,
919      ref_frames: [0; INTER_REFS_PER_FRAME],
920      ref_frame_sign_bias: [false; INTER_REFS_PER_FRAME],
921      rec_buffer: ReferenceFramesSet::new(),
922      base_q_idx: config.quantizer as u8,
923      dc_delta_q: [0; 3],
924      ac_delta_q: [0; 3],
925      lambda: 0.0,
926      dist_scale: Default::default(),
927      me_lambda: 0.0,
928      me_range_scale: 1,
929      use_tx_domain_distortion,
930      use_tx_domain_rate,
931      idx_in_group_output: 0,
932      pyramid_level: 0,
933      enable_early_exit: true,
934      tx_mode_select: false,
935      default_filter: FilterMode::REGULAR,
936      cpu_feature_level: Default::default(),
937      enable_segmentation: config.speed_settings.segmentation
938        != SegmentationLevel::Disabled,
939      enable_inter_txfm_split: config
940        .speed_settings
941        .transform
942        .enable_inter_tx_split,
943      t35_metadata: Box::new([]),
944      sequence,
945      config,
946      coded_frame_data: None,
947    }
948  }
949
950  pub fn new_key_frame(
951    config: Arc<EncoderConfig>, sequence: Arc<Sequence>,
952    gop_input_frameno_start: u64, t35_metadata: Box<[T35]>,
953  ) -> Self {
954    let tx_mode_select = config.speed_settings.transform.rdo_tx_decision;
955    let mut fi = Self::new(config, sequence);
956    fi.input_frameno = gop_input_frameno_start;
957    fi.tx_mode_select = tx_mode_select;
958    fi.coded_frame_data = Some(CodedFrameData::new(&fi));
959    fi.t35_metadata = t35_metadata;
960    fi
961  }
962
963  /// Returns the created `FrameInvariants`, or `None` if this should be
964  /// a placeholder frame.
965  pub(crate) fn new_inter_frame(
966    previous_coded_fi: &Self, inter_cfg: &InterConfig,
967    gop_input_frameno_start: u64, output_frameno_in_gop: u64,
968    next_keyframe_input_frameno: u64, error_resilient: bool,
969    t35_metadata: Box<[T35]>,
970  ) -> Option<Self> {
971    let input_frameno = inter_cfg
972      .get_input_frameno(output_frameno_in_gop, gop_input_frameno_start);
973    if input_frameno >= next_keyframe_input_frameno {
974      // This is an invalid frame. We set it as a placeholder in the FI list.
975      return None;
976    }
977
978    // We have this special thin clone method to avoid cloning the
979    // quite large lookahead data for SEFs, when it is not needed.
980    let mut fi = previous_coded_fi.clone_without_coded_data();
981    fi.intra_only = false;
982    fi.force_integer_mv = 0; // note: should be 1 if fi.intra_only is true
983    fi.idx_in_group_output =
984      inter_cfg.get_idx_in_group_output(output_frameno_in_gop);
985    fi.tx_mode_select = fi.enable_inter_txfm_split;
986
987    let show_existing_frame =
988      inter_cfg.get_show_existing_frame(fi.idx_in_group_output);
989    if !show_existing_frame {
990      fi.coded_frame_data = previous_coded_fi.coded_frame_data.clone();
991    }
992
993    fi.order_hint =
994      inter_cfg.get_order_hint(output_frameno_in_gop, fi.idx_in_group_output);
995
996    fi.pyramid_level = inter_cfg.get_level(fi.idx_in_group_output);
997
998    fi.frame_type = if (inter_cfg.switch_frame_interval > 0)
999      && (output_frameno_in_gop % inter_cfg.switch_frame_interval == 0)
1000      && (fi.pyramid_level == 0)
1001    {
1002      FrameType::SWITCH
1003    } else {
1004      FrameType::INTER
1005    };
1006    fi.error_resilient =
1007      if fi.frame_type == FrameType::SWITCH { true } else { error_resilient };
1008
1009    fi.frame_size_override_flag = if fi.frame_type == FrameType::SWITCH {
1010      true
1011    } else if fi.sequence.reduced_still_picture_hdr {
1012      false
1013    } else if fi.frame_type == FrameType::INTER
1014      && !fi.error_resilient
1015      && fi.render_and_frame_size_different
1016    {
1017      // force frame_size_with_refs() code path if render size != frame size
1018      true
1019    } else {
1020      fi.width as u32 != fi.sequence.max_frame_width
1021        || fi.height as u32 != fi.sequence.max_frame_height
1022    };
1023
1024    // this is the slot that the current frame is going to be saved into
1025    let slot_idx = inter_cfg.get_slot_idx(fi.pyramid_level, fi.order_hint);
1026    fi.show_frame = inter_cfg.get_show_frame(fi.idx_in_group_output);
1027    fi.t35_metadata = if fi.show_frame { t35_metadata } else { Box::new([]) };
1028    fi.frame_to_show_map_idx = slot_idx;
1029    fi.refresh_frame_flags = if fi.frame_type == FrameType::SWITCH {
1030      ALL_REF_FRAMES_MASK
1031    } else if fi.is_show_existing_frame() {
1032      0
1033    } else {
1034      1 << slot_idx
1035    };
1036
1037    let second_ref_frame =
1038      if fi.idx_in_group_output == 0 { LAST2_FRAME } else { ALTREF_FRAME };
1039    let ref_in_previous_group = LAST3_FRAME;
1040
1041    // reuse probability estimates from previous frames only in top level frames
1042    fi.primary_ref_frame = if fi.error_resilient || (fi.pyramid_level > 2) {
1043      PRIMARY_REF_NONE
1044    } else {
1045      (ref_in_previous_group.to_index()) as u32
1046    };
1047
1048    if fi.pyramid_level == 0 {
1049      // level 0 has no forward references
1050      // default to last P frame
1051      fi.ref_frames = [
1052        // calculations done relative to the slot_idx for this frame.
1053        // the last four frames can be found by subtracting from the current slot_idx
1054        // add 4 to prevent underflow
1055        // TODO: maybe use order_hint here like in get_slot_idx?
1056        // this is the previous P frame
1057        (slot_idx + 4 - 1) as u8 % 4
1058          ; INTER_REFS_PER_FRAME];
1059      if inter_cfg.multiref {
1060        // use the second-previous p frame as a second reference frame
1061        fi.ref_frames[second_ref_frame.to_index()] =
1062          (slot_idx + 4 - 2) as u8 % 4;
1063      }
1064    } else {
1065      debug_assert!(inter_cfg.multiref);
1066
1067      // fill in defaults
1068      // default to backwards reference in lower level
1069      fi.ref_frames = [{
1070        let oh = fi.order_hint
1071          - (inter_cfg.group_input_len as u32 >> fi.pyramid_level);
1072        let lvl1 = pos_to_lvl(oh as u64, inter_cfg.pyramid_depth);
1073        if lvl1 == 0 {
1074          ((oh >> inter_cfg.pyramid_depth) % 4) as u8
1075        } else {
1076          3 + lvl1 as u8
1077        }
1078      }; INTER_REFS_PER_FRAME];
1079      // use forward reference in lower level as a second reference frame
1080      fi.ref_frames[second_ref_frame.to_index()] = {
1081        let oh = fi.order_hint
1082          + (inter_cfg.group_input_len as u32 >> fi.pyramid_level);
1083        let lvl2 = pos_to_lvl(oh as u64, inter_cfg.pyramid_depth);
1084        if lvl2 == 0 {
1085          ((oh >> inter_cfg.pyramid_depth) % 4) as u8
1086        } else {
1087          3 + lvl2 as u8
1088        }
1089      };
1090      // use a reference to the previous frame in the same level
1091      // (horizontally) as a third reference
1092      fi.ref_frames[ref_in_previous_group.to_index()] = slot_idx as u8;
1093    }
1094
1095    fi.set_ref_frame_sign_bias();
1096
1097    fi.reference_mode = if inter_cfg.multiref && fi.idx_in_group_output != 0 {
1098      ReferenceMode::SELECT
1099    } else {
1100      ReferenceMode::SINGLE
1101    };
1102    fi.input_frameno = input_frameno;
1103    fi.me_range_scale = (inter_cfg.group_input_len >> fi.pyramid_level) as u8;
1104
1105    if fi.show_frame || fi.showable_frame {
1106      let cur_frame_time = fi.frame_timestamp();
1107      // Increment the film grain seed for the next frame
1108      if let Some(params) =
1109        Arc::make_mut(&mut fi.config).get_film_grain_mut_at(cur_frame_time)
1110      {
1111        params.random_seed = params.random_seed.wrapping_add(3248);
1112        if params.random_seed == 0 {
1113          params.random_seed = DEFAULT_GRAIN_SEED;
1114        }
1115      }
1116    }
1117
1118    Some(fi)
1119  }
1120
1121  pub fn is_show_existing_frame(&self) -> bool {
1122    self.coded_frame_data.is_none()
1123  }
1124
1125  pub fn clone_without_coded_data(&self) -> Self {
1126    Self {
1127      coded_frame_data: None,
1128
1129      sequence: self.sequence.clone(),
1130      config: self.config.clone(),
1131      width: self.width,
1132      height: self.height,
1133      render_width: self.render_width,
1134      render_height: self.render_height,
1135      frame_size_override_flag: self.frame_size_override_flag,
1136      render_and_frame_size_different: self.render_and_frame_size_different,
1137      sb_width: self.sb_width,
1138      sb_height: self.sb_height,
1139      w_in_b: self.w_in_b,
1140      h_in_b: self.h_in_b,
1141      input_frameno: self.input_frameno,
1142      order_hint: self.order_hint,
1143      show_frame: self.show_frame,
1144      showable_frame: self.showable_frame,
1145      error_resilient: self.error_resilient,
1146      intra_only: self.intra_only,
1147      allow_high_precision_mv: self.allow_high_precision_mv,
1148      frame_type: self.frame_type,
1149      frame_to_show_map_idx: self.frame_to_show_map_idx,
1150      use_reduced_tx_set: self.use_reduced_tx_set,
1151      reference_mode: self.reference_mode,
1152      use_prev_frame_mvs: self.use_prev_frame_mvs,
1153      partition_range: self.partition_range,
1154      globalmv_transformation_type: self.globalmv_transformation_type,
1155      num_tg: self.num_tg,
1156      large_scale_tile: self.large_scale_tile,
1157      disable_cdf_update: self.disable_cdf_update,
1158      allow_screen_content_tools: self.allow_screen_content_tools,
1159      force_integer_mv: self.force_integer_mv,
1160      primary_ref_frame: self.primary_ref_frame,
1161      refresh_frame_flags: self.refresh_frame_flags,
1162      allow_intrabc: self.allow_intrabc,
1163      use_ref_frame_mvs: self.use_ref_frame_mvs,
1164      is_filter_switchable: self.is_filter_switchable,
1165      is_motion_mode_switchable: self.is_motion_mode_switchable,
1166      disable_frame_end_update_cdf: self.disable_frame_end_update_cdf,
1167      allow_warped_motion: self.allow_warped_motion,
1168      cdef_search_method: self.cdef_search_method,
1169      cdef_damping: self.cdef_damping,
1170      cdef_bits: self.cdef_bits,
1171      cdef_y_strengths: self.cdef_y_strengths,
1172      cdef_uv_strengths: self.cdef_uv_strengths,
1173      delta_q_present: self.delta_q_present,
1174      ref_frames: self.ref_frames,
1175      ref_frame_sign_bias: self.ref_frame_sign_bias,
1176      rec_buffer: self.rec_buffer.clone(),
1177      base_q_idx: self.base_q_idx,
1178      dc_delta_q: self.dc_delta_q,
1179      ac_delta_q: self.ac_delta_q,
1180      lambda: self.lambda,
1181      me_lambda: self.me_lambda,
1182      dist_scale: self.dist_scale,
1183      me_range_scale: self.me_range_scale,
1184      use_tx_domain_distortion: self.use_tx_domain_distortion,
1185      use_tx_domain_rate: self.use_tx_domain_rate,
1186      idx_in_group_output: self.idx_in_group_output,
1187      pyramid_level: self.pyramid_level,
1188      enable_early_exit: self.enable_early_exit,
1189      tx_mode_select: self.tx_mode_select,
1190      enable_inter_txfm_split: self.enable_inter_txfm_split,
1191      default_filter: self.default_filter,
1192      enable_segmentation: self.enable_segmentation,
1193      t35_metadata: self.t35_metadata.clone(),
1194      cpu_feature_level: self.cpu_feature_level,
1195    }
1196  }
1197
1198  pub fn set_ref_frame_sign_bias(&mut self) {
1199    for i in 0..INTER_REFS_PER_FRAME {
1200      self.ref_frame_sign_bias[i] = if !self.sequence.enable_order_hint {
1201        false
1202      } else if let Some(ref rec) =
1203        self.rec_buffer.frames[self.ref_frames[i] as usize]
1204      {
1205        let hint = rec.order_hint;
1206        self.sequence.get_relative_dist(hint, self.order_hint) > 0
1207      } else {
1208        false
1209      };
1210    }
1211  }
1212
1213  pub fn get_frame_subtype(&self) -> usize {
1214    if self.frame_type == FrameType::KEY {
1215      FRAME_SUBTYPE_I
1216    } else {
1217      FRAME_SUBTYPE_P + (self.pyramid_level as usize)
1218    }
1219  }
1220
1221  fn pick_strength_from_q(&mut self, qps: &QuantizerParameters) {
1222    self.cdef_damping = 3 + (self.base_q_idx >> 6);
1223    let q = bexp64(qps.log_target_q + q57(QSCALE)) as f32;
1224    /* These coefficients were trained on libaom. */
1225    let (y_f1, y_f2, uv_f1, uv_f2) = if !self.intra_only {
1226      (
1227        poly2(q, -0.0000023593946_f32, 0.0068615186_f32, 0.02709886_f32, 15),
1228        poly2(q, -0.00000057629734_f32, 0.0013993345_f32, 0.03831067_f32, 3),
1229        poly2(q, -0.0000007095069_f32, 0.0034628846_f32, 0.00887099_f32, 15),
1230        poly2(q, 0.00000023874085_f32, 0.00028223585_f32, 0.05576307_f32, 3),
1231      )
1232    } else {
1233      (
1234        poly2(q, 0.0000033731974_f32, 0.008070594_f32, 0.0187634_f32, 15),
1235        poly2(q, 0.0000029167343_f32, 0.0027798624_f32, 0.0079405_f32, 3),
1236        poly2(q, -0.0000130790995_f32, 0.012892405_f32, -0.00748388_f32, 15),
1237        poly2(q, 0.0000032651783_f32, 0.00035520183_f32, 0.00228092_f32, 3),
1238      )
1239    };
1240    self.cdef_y_strengths[0] = (y_f1 * CDEF_SEC_STRENGTHS as i32 + y_f2) as u8;
1241    self.cdef_uv_strengths[0] =
1242      (uv_f1 * CDEF_SEC_STRENGTHS as i32 + uv_f2) as u8;
1243  }
1244
1245  pub fn set_quantizers(&mut self, qps: &QuantizerParameters) {
1246    self.base_q_idx = qps.ac_qi[0];
1247    let base_q_idx = self.base_q_idx as i32;
1248    for pi in 0..3 {
1249      self.dc_delta_q[pi] = (qps.dc_qi[pi] as i32 - base_q_idx) as i8;
1250      self.ac_delta_q[pi] = (qps.ac_qi[pi] as i32 - base_q_idx) as i8;
1251    }
1252    self.lambda =
1253      qps.lambda * ((1 << (2 * (self.sequence.bit_depth - 8))) as f64);
1254    self.me_lambda = self.lambda.sqrt();
1255    self.dist_scale = qps.dist_scale.map(DistortionScale::from);
1256
1257    match self.cdef_search_method {
1258      CDEFSearchMethod::PickFromQ => {
1259        self.pick_strength_from_q(qps);
1260      }
1261      // TODO: implement FastSearch and FullSearch
1262      _ => unreachable!(),
1263    }
1264  }
1265
1266  #[inline(always)]
1267  pub fn sb_size_log2(&self) -> usize {
1268    self.sequence.tiling.sb_size_log2
1269  }
1270
1271  pub fn film_grain_params(&self) -> Option<&GrainTableSegment> {
1272    if !(self.show_frame || self.showable_frame) {
1273      return None;
1274    }
1275    let cur_frame_time = self.frame_timestamp();
1276    self.config.get_film_grain_at(cur_frame_time)
1277  }
1278
1279  pub fn frame_timestamp(&self) -> u64 {
1280    // I don't know why this is the base unit for a timestamp but it is. 1/10000000 of a second.
1281    const TIMESTAMP_BASE_UNIT: u64 = 10_000_000;
1282
1283    self.input_frameno * TIMESTAMP_BASE_UNIT * self.sequence.time_base.num
1284      / self.sequence.time_base.den
1285  }
1286}
1287
1288impl<T: Pixel> fmt::Display for FrameInvariants<T> {
1289  fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1290    write!(f, "Input Frame {} - {}", self.input_frameno, self.frame_type)
1291  }
1292}
1293
1294/// # Errors
1295///
1296/// - If the frame packet cannot be written to
1297pub fn write_temporal_delimiter(packet: &mut dyn io::Write) -> io::Result<()> {
1298  packet.write_all(&TEMPORAL_DELIMITER)?;
1299  Ok(())
1300}
1301
1302fn write_key_frame_obus<T: Pixel>(
1303  packet: &mut dyn io::Write, fi: &FrameInvariants<T>, obu_extension: u32,
1304) -> io::Result<()> {
1305  let mut buf1 = Vec::new();
1306  let mut buf2 = Vec::new();
1307  {
1308    let mut bw2 = BitWriter::endian(&mut buf2, BigEndian);
1309    bw2.write_sequence_header_obu(fi)?;
1310    bw2.write_bit(true)?; // trailing bit
1311    bw2.byte_align()?;
1312  }
1313
1314  {
1315    let mut bw1 = BitWriter::endian(&mut buf1, BigEndian);
1316    bw1.write_obu_header(ObuType::OBU_SEQUENCE_HEADER, obu_extension)?;
1317  }
1318  packet.write_all(&buf1).unwrap();
1319  buf1.clear();
1320
1321  {
1322    let mut bw1 = BitWriter::endian(&mut buf1, BigEndian);
1323    bw1.write_uleb128(buf2.len() as u64)?;
1324  }
1325
1326  packet.write_all(&buf1).unwrap();
1327  buf1.clear();
1328
1329  packet.write_all(&buf2).unwrap();
1330  buf2.clear();
1331
1332  if fi.sequence.content_light.is_some() {
1333    let mut bw1 = BitWriter::endian(&mut buf1, BigEndian);
1334    bw1.write_sequence_metadata_obu(
1335      ObuMetaType::OBU_META_HDR_CLL,
1336      &fi.sequence,
1337    )?;
1338    packet.write_all(&buf1).unwrap();
1339    buf1.clear();
1340  }
1341
1342  if fi.sequence.mastering_display.is_some() {
1343    let mut bw1 = BitWriter::endian(&mut buf1, BigEndian);
1344    bw1.write_sequence_metadata_obu(
1345      ObuMetaType::OBU_META_HDR_MDCV,
1346      &fi.sequence,
1347    )?;
1348    packet.write_all(&buf1).unwrap();
1349    buf1.clear();
1350  }
1351
1352  Ok(())
1353}
1354
1355/// Write into `dst` the difference between the blocks at `src1` and `src2`
1356fn diff<T: Pixel>(
1357  dst: &mut [MaybeUninit<i16>], src1: &PlaneRegion<'_, T>,
1358  src2: &PlaneRegion<'_, T>,
1359) {
1360  debug_assert!(dst.len() % src1.rect().width == 0);
1361  debug_assert_eq!(src1.rows_iter().count(), src1.rect().height);
1362
1363  let width = src1.rect().width;
1364  let height = src1.rect().height;
1365
1366  if width == 0
1367    || width != src2.rect().width
1368    || height == 0
1369    || src1.rows_iter().len() != src2.rows_iter().len()
1370  {
1371    debug_assert!(false);
1372    return;
1373  }
1374
1375  for ((l, s1), s2) in
1376    dst.chunks_exact_mut(width).zip(src1.rows_iter()).zip(src2.rows_iter())
1377  {
1378    for ((r, v1), v2) in l.iter_mut().zip(s1).zip(s2) {
1379      r.write(i16::cast_from(*v1) - i16::cast_from(*v2));
1380    }
1381  }
1382}
1383
1384fn get_qidx<T: Pixel>(
1385  fi: &FrameInvariants<T>, ts: &TileStateMut<'_, T>, cw: &ContextWriter,
1386  tile_bo: TileBlockOffset,
1387) -> u8 {
1388  let mut qidx = fi.base_q_idx;
1389  let sidx = cw.bc.blocks[tile_bo].segmentation_idx as usize;
1390  if ts.segmentation.features[sidx][SegLvl::SEG_LVL_ALT_Q as usize] {
1391    let delta = ts.segmentation.data[sidx][SegLvl::SEG_LVL_ALT_Q as usize];
1392    qidx = clamp((qidx as i16) + delta, 0, 255) as u8;
1393  }
1394  qidx
1395}
1396
1397/// For a transform block,
1398/// predict, transform, quantize, write coefficients to a bitstream,
1399/// dequantize, inverse-transform.
1400///
1401/// # Panics
1402///
1403/// - If the block size is invalid for subsampling
1404/// - If a tx type other than DCT is used for 64x64 blocks
1405pub fn encode_tx_block<T: Pixel, W: Writer>(
1406  fi: &FrameInvariants<T>,
1407  ts: &mut TileStateMut<'_, T>,
1408  cw: &mut ContextWriter,
1409  w: &mut W,
1410  p: usize,
1411  // Offset in the luma plane of the partition enclosing this block.
1412  tile_partition_bo: TileBlockOffset,
1413  // tx block position within a partition, unit: tx block number
1414  bx: usize,
1415  by: usize,
1416  // Offset in the luma plane where this tx block is colocated. Note that for
1417  // a chroma block, this offset might be outside of the current partition.
1418  // For example in 4:2:0, four 4x4 luma partitions share one 4x4 chroma block,
1419  // this block is part of the last 4x4 partition, but its `tx_bo` offset
1420  // matches the offset of the first 4x4 partition.
1421  tx_bo: TileBlockOffset,
1422  mode: PredictionMode,
1423  tx_size: TxSize,
1424  tx_type: TxType,
1425  bsize: BlockSize,
1426  po: PlaneOffset,
1427  skip: bool,
1428  qidx: u8,
1429  ac: &[i16],
1430  pred_intra_param: IntraParam,
1431  rdo_type: RDOType,
1432  need_recon_pixel: bool,
1433) -> (bool, ScaledDistortion) {
1434  let PlaneConfig { xdec, ydec, .. } = ts.input.planes[p].cfg;
1435  let tile_rect = ts.tile_rect().decimated(xdec, ydec);
1436  let area = Area::BlockRect {
1437    bo: tx_bo.0,
1438    width: tx_size.width(),
1439    height: tx_size.height(),
1440  };
1441
1442  if tx_bo.0.x >= ts.mi_width || tx_bo.0.y >= ts.mi_height {
1443    return (false, ScaledDistortion::zero());
1444  }
1445
1446  debug_assert!(tx_bo.0.x < ts.mi_width);
1447  debug_assert!(tx_bo.0.y < ts.mi_height);
1448
1449  debug_assert!(
1450    tx_size.sqr() <= TxSize::TX_32X32 || tx_type == TxType::DCT_DCT
1451  );
1452
1453  let plane_bsize = bsize.subsampled_size(xdec, ydec).unwrap();
1454
1455  debug_assert!(p != 0 || !mode.is_intra() || tx_size.block_size() == plane_bsize || need_recon_pixel,
1456    "mode.is_intra()={:#?}, plane={:#?}, tx_size.block_size()={:#?}, plane_bsize={:#?}, need_recon_pixel={:#?}",
1457    mode.is_intra(), p, tx_size.block_size(), plane_bsize, need_recon_pixel);
1458
1459  let ief_params = if mode.is_directional()
1460    && fi.sequence.enable_intra_edge_filter
1461  {
1462    let (plane_xdec, plane_ydec) = if p == 0 { (0, 0) } else { (xdec, ydec) };
1463    let above_block_info =
1464      ts.above_block_info(tile_partition_bo, plane_xdec, plane_ydec);
1465    let left_block_info =
1466      ts.left_block_info(tile_partition_bo, plane_xdec, plane_ydec);
1467    Some(IntraEdgeFilterParameters::new(p, above_block_info, left_block_info))
1468  } else {
1469    None
1470  };
1471
1472  let frame_bo = ts.to_frame_block_offset(tx_bo);
1473  let rec = &mut ts.rec.planes[p];
1474
1475  if mode.is_intra() {
1476    let bit_depth = fi.sequence.bit_depth;
1477    let mut edge_buf = Aligned::uninit_array();
1478    let edge_buf = get_intra_edges(
1479      &mut edge_buf,
1480      &rec.as_const(),
1481      tile_partition_bo,
1482      bx,
1483      by,
1484      bsize,
1485      po,
1486      tx_size,
1487      bit_depth,
1488      Some(mode),
1489      fi.sequence.enable_intra_edge_filter,
1490      pred_intra_param,
1491    );
1492
1493    mode.predict_intra(
1494      tile_rect,
1495      &mut rec.subregion_mut(area),
1496      tx_size,
1497      bit_depth,
1498      ac,
1499      pred_intra_param,
1500      ief_params,
1501      &edge_buf,
1502      fi.cpu_feature_level,
1503    );
1504  }
1505
1506  if skip {
1507    return (false, ScaledDistortion::zero());
1508  }
1509
1510  let coded_tx_area = av1_get_coded_tx_size(tx_size).area();
1511  let mut residual = Aligned::<[MaybeUninit<i16>; 64 * 64]>::uninit_array();
1512  let mut coeffs = Aligned::<[MaybeUninit<T::Coeff>; 64 * 64]>::uninit_array();
1513  let mut qcoeffs =
1514    Aligned::<[MaybeUninit<T::Coeff>; 32 * 32]>::uninit_array();
1515  let mut rcoeffs =
1516    Aligned::<[MaybeUninit<T::Coeff>; 32 * 32]>::uninit_array();
1517  let residual = &mut residual.data[..tx_size.area()];
1518  let coeffs = &mut coeffs.data[..tx_size.area()];
1519  let qcoeffs = init_slice_repeat_mut(
1520    &mut qcoeffs.data[..coded_tx_area],
1521    T::Coeff::cast_from(0),
1522  );
1523  let rcoeffs = &mut rcoeffs.data[..coded_tx_area];
1524
1525  let (visible_tx_w, visible_tx_h) = clip_visible_bsize(
1526    (fi.width + xdec) >> xdec,
1527    (fi.height + ydec) >> ydec,
1528    tx_size.block_size(),
1529    (frame_bo.0.x << MI_SIZE_LOG2) >> xdec,
1530    (frame_bo.0.y << MI_SIZE_LOG2) >> ydec,
1531  );
1532
1533  if visible_tx_w != 0 && visible_tx_h != 0 {
1534    diff(
1535      residual,
1536      &ts.input_tile.planes[p].subregion(area),
1537      &rec.subregion(area),
1538    );
1539  } else {
1540    residual.fill(MaybeUninit::new(0));
1541  }
1542  // SAFETY: `diff()` inits `tx_size.area()` elements when it matches size of `subregion(area)`
1543  let residual = unsafe { slice_assume_init_mut(residual) };
1544
1545  forward_transform(
1546    residual,
1547    coeffs,
1548    tx_size.width(),
1549    tx_size,
1550    tx_type,
1551    fi.sequence.bit_depth,
1552    fi.cpu_feature_level,
1553  );
1554  // SAFETY: forward_transform initialized coeffs
1555  let coeffs = unsafe { slice_assume_init_mut(coeffs) };
1556
1557  let eob = ts.qc.quantize(coeffs, qcoeffs, tx_size, tx_type);
1558
1559  let has_coeff = if need_recon_pixel || rdo_type.needs_coeff_rate() {
1560    debug_assert!((((fi.w_in_b - frame_bo.0.x) << MI_SIZE_LOG2) >> xdec) >= 4);
1561    debug_assert!((((fi.h_in_b - frame_bo.0.y) << MI_SIZE_LOG2) >> ydec) >= 4);
1562    let frame_clipped_txw: usize =
1563      (((fi.w_in_b - frame_bo.0.x) << MI_SIZE_LOG2) >> xdec)
1564        .min(tx_size.width());
1565    let frame_clipped_txh: usize =
1566      (((fi.h_in_b - frame_bo.0.y) << MI_SIZE_LOG2) >> ydec)
1567        .min(tx_size.height());
1568
1569    cw.write_coeffs_lv_map(
1570      w,
1571      p,
1572      tx_bo,
1573      qcoeffs,
1574      eob,
1575      mode,
1576      tx_size,
1577      tx_type,
1578      plane_bsize,
1579      xdec,
1580      ydec,
1581      fi.use_reduced_tx_set,
1582      frame_clipped_txw,
1583      frame_clipped_txh,
1584    )
1585  } else {
1586    true
1587  };
1588
1589  // Reconstruct
1590  dequantize(
1591    qidx,
1592    qcoeffs,
1593    eob,
1594    rcoeffs,
1595    tx_size,
1596    fi.sequence.bit_depth,
1597    fi.dc_delta_q[p],
1598    fi.ac_delta_q[p],
1599    fi.cpu_feature_level,
1600  );
1601  // SAFETY: dequantize initialized rcoeffs
1602  let rcoeffs = unsafe { slice_assume_init_mut(rcoeffs) };
1603
1604  if eob == 0 {
1605    // All zero coefficients is a no-op
1606  } else if !fi.use_tx_domain_distortion || need_recon_pixel {
1607    inverse_transform_add(
1608      rcoeffs,
1609      &mut rec.subregion_mut(area),
1610      eob,
1611      tx_size,
1612      tx_type,
1613      fi.sequence.bit_depth,
1614      fi.cpu_feature_level,
1615    );
1616  }
1617
1618  let tx_dist =
1619    if rdo_type.needs_tx_dist() && visible_tx_w != 0 && visible_tx_h != 0 {
1620      // Store tx-domain distortion of this block
1621      // rcoeffs above 32 rows/cols aren't held in the array, because they are
1622      // always 0. The first 32x32 is stored first in coeffs so we can iterate
1623      // over coeffs and rcoeffs for the first 32 rows/cols. For the
1624      // coefficients above 32 rows/cols, we iterate over the rest of coeffs
1625      // with the assumption that rcoeff coefficients are zero.
1626      let mut raw_tx_dist = coeffs
1627        .iter()
1628        .zip(rcoeffs.iter())
1629        .map(|(&a, &b)| {
1630          let c = i32::cast_from(a) - i32::cast_from(b);
1631          (c * c) as u64
1632        })
1633        .sum::<u64>()
1634        + coeffs[rcoeffs.len()..]
1635          .iter()
1636          .map(|&a| {
1637            let c = i32::cast_from(a);
1638            (c * c) as u64
1639          })
1640          .sum::<u64>();
1641
1642      let tx_dist_scale_bits = 2 * (3 - get_log_tx_scale(tx_size));
1643      let tx_dist_scale_rounding_offset = 1 << (tx_dist_scale_bits - 1);
1644
1645      raw_tx_dist =
1646        (raw_tx_dist + tx_dist_scale_rounding_offset) >> tx_dist_scale_bits;
1647
1648      if rdo_type == RDOType::TxDistEstRate {
1649        // look up rate and distortion in table
1650        let estimated_rate =
1651          estimate_rate(fi.base_q_idx, tx_size, raw_tx_dist);
1652        w.add_bits_frac(estimated_rate as u32);
1653      }
1654
1655      let bias = distortion_scale(fi, ts.to_frame_block_offset(tx_bo), bsize);
1656      RawDistortion::new(raw_tx_dist) * bias * fi.dist_scale[p]
1657    } else {
1658      ScaledDistortion::zero()
1659    };
1660
1661  (has_coeff, tx_dist)
1662}
1663
1664/// # Panics
1665///
1666/// - If the block size is invalid for subsampling
1667#[profiling::function]
1668pub fn motion_compensate<T: Pixel>(
1669  fi: &FrameInvariants<T>, ts: &mut TileStateMut<'_, T>,
1670  cw: &mut ContextWriter, luma_mode: PredictionMode, ref_frames: [RefType; 2],
1671  mvs: [MotionVector; 2], bsize: BlockSize, tile_bo: TileBlockOffset,
1672  luma_only: bool,
1673) {
1674  debug_assert!(!luma_mode.is_intra());
1675
1676  let PlaneConfig { xdec: u_xdec, ydec: u_ydec, .. } = ts.input.planes[1].cfg;
1677
1678  // Inter mode prediction can take place once for a whole partition,
1679  // instead of each tx-block.
1680  let num_planes = 1
1681    + if !luma_only
1682      && has_chroma(
1683        tile_bo,
1684        bsize,
1685        u_xdec,
1686        u_ydec,
1687        fi.sequence.chroma_sampling,
1688      ) {
1689      2
1690    } else {
1691      0
1692    };
1693
1694  let luma_tile_rect = ts.tile_rect();
1695  let compound_buffer = &mut ts.inter_compound_buffers;
1696  for p in 0..num_planes {
1697    let plane_bsize = if p == 0 {
1698      bsize
1699    } else {
1700      bsize.subsampled_size(u_xdec, u_ydec).unwrap()
1701    };
1702
1703    let rec = &mut ts.rec.planes[p];
1704    let po = tile_bo.plane_offset(rec.plane_cfg);
1705    let &PlaneConfig { xdec, ydec, .. } = rec.plane_cfg;
1706    let tile_rect = luma_tile_rect.decimated(xdec, ydec);
1707
1708    let area = Area::BlockStartingAt { bo: tile_bo.0 };
1709    if p > 0 && bsize < BlockSize::BLOCK_8X8 {
1710      let mut some_use_intra = false;
1711      if bsize == BlockSize::BLOCK_4X4 || bsize == BlockSize::BLOCK_4X8 {
1712        some_use_intra |=
1713          cw.bc.blocks[tile_bo.with_offset(-1, 0)].mode.is_intra();
1714      };
1715      if !some_use_intra && bsize == BlockSize::BLOCK_4X4
1716        || bsize == BlockSize::BLOCK_8X4
1717      {
1718        some_use_intra |=
1719          cw.bc.blocks[tile_bo.with_offset(0, -1)].mode.is_intra();
1720      };
1721      if !some_use_intra && bsize == BlockSize::BLOCK_4X4 {
1722        some_use_intra |=
1723          cw.bc.blocks[tile_bo.with_offset(-1, -1)].mode.is_intra();
1724      };
1725
1726      if some_use_intra {
1727        luma_mode.predict_inter(
1728          fi,
1729          tile_rect,
1730          p,
1731          po,
1732          &mut rec.subregion_mut(area),
1733          plane_bsize.width(),
1734          plane_bsize.height(),
1735          ref_frames,
1736          mvs,
1737          compound_buffer,
1738        );
1739      } else {
1740        assert!(u_xdec == 1 && u_ydec == 1);
1741        // TODO: these are absolutely only valid for 4:2:0
1742        if bsize == BlockSize::BLOCK_4X4 {
1743          let mv0 = cw.bc.blocks[tile_bo.with_offset(-1, -1)].mv;
1744          let rf0 = cw.bc.blocks[tile_bo.with_offset(-1, -1)].ref_frames;
1745          let mv1 = cw.bc.blocks[tile_bo.with_offset(0, -1)].mv;
1746          let rf1 = cw.bc.blocks[tile_bo.with_offset(0, -1)].ref_frames;
1747          let po1 = PlaneOffset { x: po.x + 2, y: po.y };
1748          let area1 = Area::StartingAt { x: po1.x, y: po1.y };
1749          let mv2 = cw.bc.blocks[tile_bo.with_offset(-1, 0)].mv;
1750          let rf2 = cw.bc.blocks[tile_bo.with_offset(-1, 0)].ref_frames;
1751          let po2 = PlaneOffset { x: po.x, y: po.y + 2 };
1752          let area2 = Area::StartingAt { x: po2.x, y: po2.y };
1753          let po3 = PlaneOffset { x: po.x + 2, y: po.y + 2 };
1754          let area3 = Area::StartingAt { x: po3.x, y: po3.y };
1755          luma_mode.predict_inter(
1756            fi,
1757            tile_rect,
1758            p,
1759            po,
1760            &mut rec.subregion_mut(area),
1761            2,
1762            2,
1763            rf0,
1764            mv0,
1765            compound_buffer,
1766          );
1767          luma_mode.predict_inter(
1768            fi,
1769            tile_rect,
1770            p,
1771            po1,
1772            &mut rec.subregion_mut(area1),
1773            2,
1774            2,
1775            rf1,
1776            mv1,
1777            compound_buffer,
1778          );
1779          luma_mode.predict_inter(
1780            fi,
1781            tile_rect,
1782            p,
1783            po2,
1784            &mut rec.subregion_mut(area2),
1785            2,
1786            2,
1787            rf2,
1788            mv2,
1789            compound_buffer,
1790          );
1791          luma_mode.predict_inter(
1792            fi,
1793            tile_rect,
1794            p,
1795            po3,
1796            &mut rec.subregion_mut(area3),
1797            2,
1798            2,
1799            ref_frames,
1800            mvs,
1801            compound_buffer,
1802          );
1803        }
1804        if bsize == BlockSize::BLOCK_8X4 {
1805          let mv1 = cw.bc.blocks[tile_bo.with_offset(0, -1)].mv;
1806          let rf1 = cw.bc.blocks[tile_bo.with_offset(0, -1)].ref_frames;
1807          luma_mode.predict_inter(
1808            fi,
1809            tile_rect,
1810            p,
1811            po,
1812            &mut rec.subregion_mut(area),
1813            4,
1814            2,
1815            rf1,
1816            mv1,
1817            compound_buffer,
1818          );
1819          let po3 = PlaneOffset { x: po.x, y: po.y + 2 };
1820          let area3 = Area::StartingAt { x: po3.x, y: po3.y };
1821          luma_mode.predict_inter(
1822            fi,
1823            tile_rect,
1824            p,
1825            po3,
1826            &mut rec.subregion_mut(area3),
1827            4,
1828            2,
1829            ref_frames,
1830            mvs,
1831            compound_buffer,
1832          );
1833        }
1834        if bsize == BlockSize::BLOCK_4X8 {
1835          let mv2 = cw.bc.blocks[tile_bo.with_offset(-1, 0)].mv;
1836          let rf2 = cw.bc.blocks[tile_bo.with_offset(-1, 0)].ref_frames;
1837          luma_mode.predict_inter(
1838            fi,
1839            tile_rect,
1840            p,
1841            po,
1842            &mut rec.subregion_mut(area),
1843            2,
1844            4,
1845            rf2,
1846            mv2,
1847            compound_buffer,
1848          );
1849          let po3 = PlaneOffset { x: po.x + 2, y: po.y };
1850          let area3 = Area::StartingAt { x: po3.x, y: po3.y };
1851          luma_mode.predict_inter(
1852            fi,
1853            tile_rect,
1854            p,
1855            po3,
1856            &mut rec.subregion_mut(area3),
1857            2,
1858            4,
1859            ref_frames,
1860            mvs,
1861            compound_buffer,
1862          );
1863        }
1864      }
1865    } else {
1866      luma_mode.predict_inter(
1867        fi,
1868        tile_rect,
1869        p,
1870        po,
1871        &mut rec.subregion_mut(area),
1872        plane_bsize.width(),
1873        plane_bsize.height(),
1874        ref_frames,
1875        mvs,
1876        compound_buffer,
1877      );
1878    }
1879  }
1880}
1881
1882pub fn save_block_motion<T: Pixel>(
1883  ts: &mut TileStateMut<'_, T>, bsize: BlockSize, tile_bo: TileBlockOffset,
1884  ref_frame: usize, mv: MotionVector,
1885) {
1886  let tile_me_stats = &mut ts.me_stats[ref_frame];
1887  let tile_bo_x_end = (tile_bo.0.x + bsize.width_mi()).min(ts.mi_width);
1888  let tile_bo_y_end = (tile_bo.0.y + bsize.height_mi()).min(ts.mi_height);
1889  for mi_y in tile_bo.0.y..tile_bo_y_end {
1890    for mi_x in tile_bo.0.x..tile_bo_x_end {
1891      tile_me_stats[mi_y][mi_x].mv = mv;
1892    }
1893  }
1894}
1895
1896#[profiling::function]
1897pub fn encode_block_pre_cdef<T: Pixel, W: Writer>(
1898  seq: &Sequence, ts: &TileStateMut<'_, T>, cw: &mut ContextWriter, w: &mut W,
1899  bsize: BlockSize, tile_bo: TileBlockOffset, skip: bool,
1900) -> bool {
1901  cw.bc.blocks.set_skip(tile_bo, bsize, skip);
1902  if ts.segmentation.enabled
1903    && ts.segmentation.update_map
1904    && ts.segmentation.preskip
1905  {
1906    cw.write_segmentation(
1907      w,
1908      tile_bo,
1909      bsize,
1910      false,
1911      ts.segmentation.last_active_segid,
1912    );
1913  }
1914  cw.write_skip(w, tile_bo, skip);
1915  if ts.segmentation.enabled
1916    && ts.segmentation.update_map
1917    && !ts.segmentation.preskip
1918  {
1919    cw.write_segmentation(
1920      w,
1921      tile_bo,
1922      bsize,
1923      skip,
1924      ts.segmentation.last_active_segid,
1925    );
1926  }
1927  if !skip && seq.enable_cdef {
1928    cw.bc.cdef_coded = true;
1929  }
1930  cw.bc.cdef_coded
1931}
1932
1933/// # Panics
1934///
1935/// - If chroma and luma do not match for inter modes
1936/// - If an invalid motion vector is found
1937#[profiling::function]
1938pub fn encode_block_post_cdef<T: Pixel, W: Writer>(
1939  fi: &FrameInvariants<T>, ts: &mut TileStateMut<'_, T>,
1940  cw: &mut ContextWriter, w: &mut W, luma_mode: PredictionMode,
1941  chroma_mode: PredictionMode, angle_delta: AngleDelta,
1942  ref_frames: [RefType; 2], mvs: [MotionVector; 2], bsize: BlockSize,
1943  tile_bo: TileBlockOffset, skip: bool, cfl: CFLParams, tx_size: TxSize,
1944  tx_type: TxType, mode_context: usize, mv_stack: &[CandidateMV],
1945  rdo_type: RDOType, need_recon_pixel: bool,
1946  enc_stats: Option<&mut EncoderStats>,
1947) -> (bool, ScaledDistortion) {
1948  let planes =
1949    if fi.sequence.chroma_sampling == ChromaSampling::Cs400 { 1 } else { 3 };
1950  let is_inter = !luma_mode.is_intra();
1951  if is_inter {
1952    assert!(luma_mode == chroma_mode);
1953  };
1954  let sb_size = if fi.sequence.use_128x128_superblock {
1955    BlockSize::BLOCK_128X128
1956  } else {
1957    BlockSize::BLOCK_64X64
1958  };
1959  let PlaneConfig { xdec, ydec, .. } = ts.input.planes[1].cfg;
1960  if skip {
1961    cw.bc.reset_skip_context(
1962      tile_bo,
1963      bsize,
1964      xdec,
1965      ydec,
1966      fi.sequence.chroma_sampling,
1967    );
1968  }
1969  cw.bc.blocks.set_block_size(tile_bo, bsize);
1970  cw.bc.blocks.set_mode(tile_bo, bsize, luma_mode);
1971  cw.bc.blocks.set_tx_size(tile_bo, bsize, tx_size);
1972  cw.bc.blocks.set_ref_frames(tile_bo, bsize, ref_frames);
1973  cw.bc.blocks.set_motion_vectors(tile_bo, bsize, mvs);
1974
1975  //write_q_deltas();
1976  if cw.bc.code_deltas
1977    && ts.deblock.block_deltas_enabled
1978    && (bsize < sb_size || !skip)
1979  {
1980    cw.write_block_deblock_deltas(
1981      w,
1982      tile_bo,
1983      ts.deblock.block_delta_multi,
1984      planes,
1985    );
1986  }
1987  cw.bc.code_deltas = false;
1988
1989  if fi.frame_type.has_inter() {
1990    cw.write_is_inter(w, tile_bo, is_inter);
1991    if is_inter {
1992      cw.fill_neighbours_ref_counts(tile_bo);
1993      cw.write_ref_frames(w, fi, tile_bo);
1994
1995      if luma_mode.is_compound() {
1996        cw.write_compound_mode(w, luma_mode, mode_context);
1997      } else {
1998        cw.write_inter_mode(w, luma_mode, mode_context);
1999      }
2000
2001      let ref_mv_idx = 0;
2002      let num_mv_found = mv_stack.len();
2003
2004      if luma_mode == PredictionMode::NEWMV
2005        || luma_mode == PredictionMode::NEW_NEWMV
2006      {
2007        if luma_mode == PredictionMode::NEW_NEWMV {
2008          assert!(num_mv_found >= 2);
2009        }
2010        for idx in 0..2 {
2011          if num_mv_found > idx + 1 {
2012            let drl_mode = ref_mv_idx > idx;
2013            let ctx: usize = (mv_stack[idx].weight < REF_CAT_LEVEL) as usize
2014              + (mv_stack[idx + 1].weight < REF_CAT_LEVEL) as usize;
2015            cw.write_drl_mode(w, drl_mode, ctx);
2016            if !drl_mode {
2017              break;
2018            }
2019          }
2020        }
2021      }
2022
2023      let ref_mvs = if num_mv_found > 0 {
2024        [mv_stack[ref_mv_idx].this_mv, mv_stack[ref_mv_idx].comp_mv]
2025      } else {
2026        [MotionVector::default(); 2]
2027      };
2028
2029      let mv_precision = if fi.force_integer_mv != 0 {
2030        MvSubpelPrecision::MV_SUBPEL_NONE
2031      } else if fi.allow_high_precision_mv {
2032        MvSubpelPrecision::MV_SUBPEL_HIGH_PRECISION
2033      } else {
2034        MvSubpelPrecision::MV_SUBPEL_LOW_PRECISION
2035      };
2036
2037      if luma_mode == PredictionMode::NEWMV
2038        || luma_mode == PredictionMode::NEW_NEWMV
2039        || luma_mode == PredictionMode::NEW_NEARESTMV
2040      {
2041        cw.write_mv(w, mvs[0], ref_mvs[0], mv_precision);
2042      }
2043      if luma_mode == PredictionMode::NEW_NEWMV
2044        || luma_mode == PredictionMode::NEAREST_NEWMV
2045      {
2046        cw.write_mv(w, mvs[1], ref_mvs[1], mv_precision);
2047      }
2048
2049      if luma_mode.has_nearmv() {
2050        let ref_mv_idx = luma_mode.ref_mv_idx();
2051        if luma_mode != PredictionMode::NEAR0MV {
2052          assert!(num_mv_found > ref_mv_idx);
2053        }
2054
2055        for idx in 1..3 {
2056          if num_mv_found > idx + 1 {
2057            let drl_mode = ref_mv_idx > idx;
2058            let ctx: usize = (mv_stack[idx].weight < REF_CAT_LEVEL) as usize
2059              + (mv_stack[idx + 1].weight < REF_CAT_LEVEL) as usize;
2060
2061            cw.write_drl_mode(w, drl_mode, ctx);
2062            if !drl_mode {
2063              break;
2064            }
2065          }
2066        }
2067        if mv_stack.len() > 1 {
2068          assert!(mv_stack[ref_mv_idx].this_mv.row == mvs[0].row);
2069          assert!(mv_stack[ref_mv_idx].this_mv.col == mvs[0].col);
2070        } else {
2071          assert!(0 == mvs[0].row);
2072          assert!(0 == mvs[0].col);
2073        }
2074      } else if luma_mode == PredictionMode::NEARESTMV {
2075        if mv_stack.is_empty() {
2076          assert_eq!(mvs[0].row, 0);
2077          assert_eq!(mvs[0].col, 0);
2078        } else {
2079          assert_eq!(mvs[0].row, mv_stack[0].this_mv.row);
2080          assert_eq!(mvs[0].col, mv_stack[0].this_mv.col);
2081        }
2082      }
2083    } else {
2084      cw.write_intra_mode(w, bsize, luma_mode);
2085    }
2086  } else {
2087    cw.write_intra_mode_kf(w, tile_bo, luma_mode);
2088  }
2089
2090  if !is_inter {
2091    if luma_mode.is_directional() && bsize >= BlockSize::BLOCK_8X8 {
2092      cw.write_angle_delta(w, angle_delta.y, luma_mode);
2093    }
2094    if has_chroma(tile_bo, bsize, xdec, ydec, fi.sequence.chroma_sampling) {
2095      cw.write_intra_uv_mode(w, chroma_mode, luma_mode, bsize);
2096      if chroma_mode.is_cfl() {
2097        assert!(bsize.cfl_allowed());
2098        cw.write_cfl_alphas(w, cfl);
2099      }
2100      if chroma_mode.is_directional() && bsize >= BlockSize::BLOCK_8X8 {
2101        cw.write_angle_delta(w, angle_delta.uv, chroma_mode);
2102      }
2103    }
2104
2105    if fi.allow_screen_content_tools > 0
2106      && bsize >= BlockSize::BLOCK_8X8
2107      && bsize.width() <= 64
2108      && bsize.height() <= 64
2109    {
2110      cw.write_use_palette_mode(
2111        w,
2112        false,
2113        bsize,
2114        tile_bo,
2115        luma_mode,
2116        chroma_mode,
2117        xdec,
2118        ydec,
2119        fi.sequence.chroma_sampling,
2120      );
2121    }
2122
2123    if fi.sequence.enable_filter_intra
2124      && luma_mode == PredictionMode::DC_PRED
2125      && bsize.width() <= 32
2126      && bsize.height() <= 32
2127    {
2128      cw.write_use_filter_intra(w, false, bsize); // turn off FILTER_INTRA
2129    }
2130  }
2131
2132  // write tx_size here
2133  if fi.tx_mode_select {
2134    if bsize > BlockSize::BLOCK_4X4 && (!is_inter || !skip) {
2135      if !is_inter {
2136        cw.write_tx_size_intra(w, tile_bo, bsize, tx_size);
2137        cw.bc.update_tx_size_context(tile_bo, bsize, tx_size, false);
2138      } else {
2139        // write var_tx_size
2140        // if here, bsize > BLOCK_4X4 && is_inter && !skip && !Lossless
2141        debug_assert!(fi.tx_mode_select);
2142        debug_assert!(bsize > BlockSize::BLOCK_4X4);
2143        debug_assert!(is_inter);
2144        debug_assert!(!skip);
2145        let max_tx_size = max_txsize_rect_lookup[bsize as usize];
2146        debug_assert!(max_tx_size.block_size() <= BlockSize::BLOCK_64X64);
2147
2148        //TODO: "&& tx_size.block_size() < bsize" will be replaced with tx-split info for a partition
2149        //  once it is available.
2150        let txfm_split =
2151          fi.enable_inter_txfm_split && tx_size.block_size() < bsize;
2152
2153        // TODO: Revise write_tx_size_inter() for txfm_split = true
2154        cw.write_tx_size_inter(
2155          w,
2156          tile_bo,
2157          bsize,
2158          max_tx_size,
2159          txfm_split,
2160          0,
2161          0,
2162          0,
2163        );
2164      }
2165    } else {
2166      debug_assert!(bsize == BlockSize::BLOCK_4X4 || (is_inter && skip));
2167      cw.bc.update_tx_size_context(tile_bo, bsize, tx_size, is_inter && skip);
2168    }
2169  }
2170
2171  if let Some(enc_stats) = enc_stats {
2172    let pixels = tx_size.area();
2173    enc_stats.block_size_counts[bsize as usize] += pixels;
2174    enc_stats.tx_type_counts[tx_type as usize] += pixels;
2175    enc_stats.luma_pred_mode_counts[luma_mode as usize] += pixels;
2176    enc_stats.chroma_pred_mode_counts[chroma_mode as usize] += pixels;
2177    if skip {
2178      enc_stats.skip_block_count += pixels;
2179    }
2180  }
2181
2182  if fi.sequence.enable_intra_edge_filter {
2183    for y in 0..bsize.height_mi() {
2184      if tile_bo.0.y + y >= ts.mi_height {
2185        continue;
2186      }
2187      for x in 0..bsize.width_mi() {
2188        if tile_bo.0.x + x >= ts.mi_width {
2189          continue;
2190        }
2191        let bi = &mut ts.coded_block_info[tile_bo.0.y + y][tile_bo.0.x + x];
2192        bi.luma_mode = luma_mode;
2193        bi.chroma_mode = chroma_mode;
2194        bi.reference_types = ref_frames;
2195      }
2196    }
2197  }
2198
2199  if is_inter {
2200    motion_compensate(
2201      fi, ts, cw, luma_mode, ref_frames, mvs, bsize, tile_bo, false,
2202    );
2203    write_tx_tree(
2204      fi,
2205      ts,
2206      cw,
2207      w,
2208      luma_mode,
2209      angle_delta.y,
2210      tile_bo,
2211      bsize,
2212      tx_size,
2213      tx_type,
2214      skip,
2215      false,
2216      rdo_type,
2217      need_recon_pixel,
2218    )
2219  } else {
2220    write_tx_blocks(
2221      fi,
2222      ts,
2223      cw,
2224      w,
2225      luma_mode,
2226      chroma_mode,
2227      angle_delta,
2228      tile_bo,
2229      bsize,
2230      tx_size,
2231      tx_type,
2232      skip,
2233      cfl,
2234      false,
2235      rdo_type,
2236      need_recon_pixel,
2237    )
2238  }
2239}
2240
2241/// # Panics
2242///
2243/// - If attempting to encode a lossless block (not yet supported)
2244pub fn write_tx_blocks<T: Pixel, W: Writer>(
2245  fi: &FrameInvariants<T>, ts: &mut TileStateMut<'_, T>,
2246  cw: &mut ContextWriter, w: &mut W, luma_mode: PredictionMode,
2247  chroma_mode: PredictionMode, angle_delta: AngleDelta,
2248  tile_bo: TileBlockOffset, bsize: BlockSize, tx_size: TxSize,
2249  tx_type: TxType, skip: bool, cfl: CFLParams, luma_only: bool,
2250  rdo_type: RDOType, need_recon_pixel: bool,
2251) -> (bool, ScaledDistortion) {
2252  let bw = bsize.width_mi() / tx_size.width_mi();
2253  let bh = bsize.height_mi() / tx_size.height_mi();
2254  let qidx = get_qidx(fi, ts, cw, tile_bo);
2255
2256  // TODO: Lossless is not yet supported.
2257  if !skip {
2258    assert_ne!(qidx, 0);
2259  }
2260
2261  let PlaneConfig { xdec, ydec, .. } = ts.input.planes[1].cfg;
2262  let mut ac = Aligned::<[MaybeUninit<i16>; 32 * 32]>::uninit_array();
2263  let mut partition_has_coeff: bool = false;
2264  let mut tx_dist = ScaledDistortion::zero();
2265  let do_chroma =
2266    has_chroma(tile_bo, bsize, xdec, ydec, fi.sequence.chroma_sampling);
2267
2268  ts.qc.update(
2269    qidx,
2270    tx_size,
2271    luma_mode.is_intra(),
2272    fi.sequence.bit_depth,
2273    fi.dc_delta_q[0],
2274    0,
2275  );
2276
2277  for by in 0..bh {
2278    for bx in 0..bw {
2279      let tx_bo = TileBlockOffset(BlockOffset {
2280        x: tile_bo.0.x + bx * tx_size.width_mi(),
2281        y: tile_bo.0.y + by * tx_size.height_mi(),
2282      });
2283      if tx_bo.0.x >= ts.mi_width || tx_bo.0.y >= ts.mi_height {
2284        continue;
2285      }
2286      let po = tx_bo.plane_offset(&ts.input.planes[0].cfg);
2287      let (has_coeff, dist) = encode_tx_block(
2288        fi,
2289        ts,
2290        cw,
2291        w,
2292        0,
2293        tile_bo,
2294        bx,
2295        by,
2296        tx_bo,
2297        luma_mode,
2298        tx_size,
2299        tx_type,
2300        bsize,
2301        po,
2302        skip,
2303        qidx,
2304        &[],
2305        IntraParam::AngleDelta(angle_delta.y),
2306        rdo_type,
2307        need_recon_pixel,
2308      );
2309      partition_has_coeff |= has_coeff;
2310      tx_dist += dist;
2311    }
2312  }
2313
2314  if !do_chroma
2315    || luma_only
2316    || fi.sequence.chroma_sampling == ChromaSampling::Cs400
2317  {
2318    return (partition_has_coeff, tx_dist);
2319  };
2320  debug_assert!(has_chroma(
2321    tile_bo,
2322    bsize,
2323    xdec,
2324    ydec,
2325    fi.sequence.chroma_sampling
2326  ));
2327
2328  let uv_tx_size = bsize.largest_chroma_tx_size(xdec, ydec);
2329
2330  let mut bw_uv = (bw * tx_size.width_mi()) >> xdec;
2331  let mut bh_uv = (bh * tx_size.height_mi()) >> ydec;
2332
2333  if bw_uv == 0 || bh_uv == 0 {
2334    bw_uv = 1;
2335    bh_uv = 1;
2336  }
2337
2338  bw_uv /= uv_tx_size.width_mi();
2339  bh_uv /= uv_tx_size.height_mi();
2340
2341  let ac_data = if chroma_mode.is_cfl() {
2342    luma_ac(&mut ac.data, ts, tile_bo, bsize, tx_size, fi)
2343  } else {
2344    [].as_slice()
2345  };
2346
2347  let uv_tx_type = if uv_tx_size.width() >= 32 || uv_tx_size.height() >= 32 {
2348    TxType::DCT_DCT
2349  } else {
2350    uv_intra_mode_to_tx_type_context(chroma_mode)
2351  };
2352
2353  for p in 1..3 {
2354    ts.qc.update(
2355      qidx,
2356      uv_tx_size,
2357      true,
2358      fi.sequence.bit_depth,
2359      fi.dc_delta_q[p],
2360      fi.ac_delta_q[p],
2361    );
2362    let alpha = cfl.alpha(p - 1);
2363    for by in 0..bh_uv {
2364      for bx in 0..bw_uv {
2365        let tx_bo = TileBlockOffset(BlockOffset {
2366          x: tile_bo.0.x + ((bx * uv_tx_size.width_mi()) << xdec)
2367            - ((bw * tx_size.width_mi() == 1) as usize) * xdec,
2368          y: tile_bo.0.y + ((by * uv_tx_size.height_mi()) << ydec)
2369            - ((bh * tx_size.height_mi() == 1) as usize) * ydec,
2370        });
2371
2372        let mut po = tile_bo.plane_offset(&ts.input.planes[p].cfg);
2373        po.x += (bx * uv_tx_size.width()) as isize;
2374        po.y += (by * uv_tx_size.height()) as isize;
2375        let (has_coeff, dist) = encode_tx_block(
2376          fi,
2377          ts,
2378          cw,
2379          w,
2380          p,
2381          tile_bo,
2382          bx,
2383          by,
2384          tx_bo,
2385          chroma_mode,
2386          uv_tx_size,
2387          uv_tx_type,
2388          bsize,
2389          po,
2390          skip,
2391          qidx,
2392          ac_data,
2393          if chroma_mode.is_cfl() {
2394            IntraParam::Alpha(alpha)
2395          } else {
2396            IntraParam::AngleDelta(angle_delta.uv)
2397          },
2398          rdo_type,
2399          need_recon_pixel,
2400        );
2401        partition_has_coeff |= has_coeff;
2402        tx_dist += dist;
2403      }
2404    }
2405  }
2406
2407  (partition_has_coeff, tx_dist)
2408}
2409
2410pub fn write_tx_tree<T: Pixel, W: Writer>(
2411  fi: &FrameInvariants<T>, ts: &mut TileStateMut<'_, T>,
2412  cw: &mut ContextWriter, w: &mut W, luma_mode: PredictionMode,
2413  angle_delta_y: i8, tile_bo: TileBlockOffset, bsize: BlockSize,
2414  tx_size: TxSize, tx_type: TxType, skip: bool, luma_only: bool,
2415  rdo_type: RDOType, need_recon_pixel: bool,
2416) -> (bool, ScaledDistortion) {
2417  if skip {
2418    return (false, ScaledDistortion::zero());
2419  }
2420  let bw = bsize.width_mi() / tx_size.width_mi();
2421  let bh = bsize.height_mi() / tx_size.height_mi();
2422  let qidx = get_qidx(fi, ts, cw, tile_bo);
2423
2424  let PlaneConfig { xdec, ydec, .. } = ts.input.planes[1].cfg;
2425  let ac = &[0i16; 0];
2426  let mut partition_has_coeff: bool = false;
2427  let mut tx_dist = ScaledDistortion::zero();
2428
2429  ts.qc.update(
2430    qidx,
2431    tx_size,
2432    luma_mode.is_intra(),
2433    fi.sequence.bit_depth,
2434    fi.dc_delta_q[0],
2435    0,
2436  );
2437
2438  // TODO: If tx-parition more than only 1-level, this code does not work.
2439  // It should recursively traverse the tx block that are split recursivelty by calling write_tx_tree(),
2440  // as defined in https://aomediacodec.github.io/av1-spec/#transform-tree-syntax
2441  for by in 0..bh {
2442    for bx in 0..bw {
2443      let tx_bo = TileBlockOffset(BlockOffset {
2444        x: tile_bo.0.x + bx * tx_size.width_mi(),
2445        y: tile_bo.0.y + by * tx_size.height_mi(),
2446      });
2447      if tx_bo.0.x >= ts.mi_width || tx_bo.0.y >= ts.mi_height {
2448        continue;
2449      }
2450
2451      let po = tx_bo.plane_offset(&ts.input.planes[0].cfg);
2452      let (has_coeff, dist) = encode_tx_block(
2453        fi,
2454        ts,
2455        cw,
2456        w,
2457        0,
2458        tile_bo,
2459        0,
2460        0,
2461        tx_bo,
2462        luma_mode,
2463        tx_size,
2464        tx_type,
2465        bsize,
2466        po,
2467        skip,
2468        qidx,
2469        ac,
2470        IntraParam::AngleDelta(angle_delta_y),
2471        rdo_type,
2472        need_recon_pixel,
2473      );
2474      partition_has_coeff |= has_coeff;
2475      tx_dist += dist;
2476    }
2477  }
2478
2479  if !has_chroma(tile_bo, bsize, xdec, ydec, fi.sequence.chroma_sampling)
2480    || luma_only
2481    || fi.sequence.chroma_sampling == ChromaSampling::Cs400
2482  {
2483    return (partition_has_coeff, tx_dist);
2484  };
2485  debug_assert!(has_chroma(
2486    tile_bo,
2487    bsize,
2488    xdec,
2489    ydec,
2490    fi.sequence.chroma_sampling
2491  ));
2492
2493  let max_tx_size = max_txsize_rect_lookup[bsize as usize];
2494  debug_assert!(max_tx_size.block_size() <= BlockSize::BLOCK_64X64);
2495  let uv_tx_size = bsize.largest_chroma_tx_size(xdec, ydec);
2496
2497  let mut bw_uv = max_tx_size.width_mi() >> xdec;
2498  let mut bh_uv = max_tx_size.height_mi() >> ydec;
2499
2500  if bw_uv == 0 || bh_uv == 0 {
2501    bw_uv = 1;
2502    bh_uv = 1;
2503  }
2504
2505  bw_uv /= uv_tx_size.width_mi();
2506  bh_uv /= uv_tx_size.height_mi();
2507
2508  let uv_tx_type = if partition_has_coeff {
2509    tx_type.uv_inter(uv_tx_size)
2510  } else {
2511    TxType::DCT_DCT
2512  };
2513
2514  for p in 1..3 {
2515    ts.qc.update(
2516      qidx,
2517      uv_tx_size,
2518      false,
2519      fi.sequence.bit_depth,
2520      fi.dc_delta_q[p],
2521      fi.ac_delta_q[p],
2522    );
2523
2524    for by in 0..bh_uv {
2525      for bx in 0..bw_uv {
2526        let tx_bo = TileBlockOffset(BlockOffset {
2527          x: tile_bo.0.x + ((bx * uv_tx_size.width_mi()) << xdec)
2528            - (max_tx_size.width_mi() == 1) as usize * xdec,
2529          y: tile_bo.0.y + ((by * uv_tx_size.height_mi()) << ydec)
2530            - (max_tx_size.height_mi() == 1) as usize * ydec,
2531        });
2532
2533        let mut po = tile_bo.plane_offset(&ts.input.planes[p].cfg);
2534        po.x += (bx * uv_tx_size.width()) as isize;
2535        po.y += (by * uv_tx_size.height()) as isize;
2536        let (has_coeff, dist) = encode_tx_block(
2537          fi,
2538          ts,
2539          cw,
2540          w,
2541          p,
2542          tile_bo,
2543          bx,
2544          by,
2545          tx_bo,
2546          luma_mode,
2547          uv_tx_size,
2548          uv_tx_type,
2549          bsize,
2550          po,
2551          skip,
2552          qidx,
2553          ac,
2554          IntraParam::AngleDelta(angle_delta_y),
2555          rdo_type,
2556          need_recon_pixel,
2557        );
2558        partition_has_coeff |= has_coeff;
2559        tx_dist += dist;
2560      }
2561    }
2562  }
2563
2564  (partition_has_coeff, tx_dist)
2565}
2566
2567#[profiling::function]
2568pub fn encode_block_with_modes<T: Pixel, W: Writer>(
2569  fi: &FrameInvariants<T>, ts: &mut TileStateMut<'_, T>,
2570  cw: &mut ContextWriter, w_pre_cdef: &mut W, w_post_cdef: &mut W,
2571  bsize: BlockSize, tile_bo: TileBlockOffset,
2572  mode_decision: &PartitionParameters, rdo_type: RDOType,
2573  enc_stats: Option<&mut EncoderStats>,
2574) {
2575  let (mode_luma, mode_chroma) =
2576    (mode_decision.pred_mode_luma, mode_decision.pred_mode_chroma);
2577  let cfl = mode_decision.pred_cfl_params;
2578  let ref_frames = mode_decision.ref_frames;
2579  let mvs = mode_decision.mvs;
2580  let mut skip = mode_decision.skip;
2581  let mut cdef_coded = cw.bc.cdef_coded;
2582
2583  // Set correct segmentation ID before encoding and before
2584  // rdo_tx_size_type().
2585  cw.bc.blocks.set_segmentation_idx(tile_bo, bsize, mode_decision.sidx);
2586
2587  let mut mv_stack = ArrayVec::<CandidateMV, 9>::new();
2588  let is_compound = ref_frames[1] != NONE_FRAME;
2589  let mode_context =
2590    cw.find_mvrefs(tile_bo, ref_frames, &mut mv_stack, bsize, fi, is_compound);
2591
2592  let (tx_size, tx_type) = if !mode_decision.skip && !mode_decision.has_coeff {
2593    skip = true;
2594    rdo_tx_size_type(
2595      fi, ts, cw, bsize, tile_bo, mode_luma, ref_frames, mvs, skip,
2596    )
2597  } else {
2598    (mode_decision.tx_size, mode_decision.tx_type)
2599  };
2600
2601  cdef_coded = encode_block_pre_cdef(
2602    &fi.sequence,
2603    ts,
2604    cw,
2605    if cdef_coded { w_post_cdef } else { w_pre_cdef },
2606    bsize,
2607    tile_bo,
2608    skip,
2609  );
2610  encode_block_post_cdef(
2611    fi,
2612    ts,
2613    cw,
2614    if cdef_coded { w_post_cdef } else { w_pre_cdef },
2615    mode_luma,
2616    mode_chroma,
2617    mode_decision.angle_delta,
2618    ref_frames,
2619    mvs,
2620    bsize,
2621    tile_bo,
2622    skip,
2623    cfl,
2624    tx_size,
2625    tx_type,
2626    mode_context,
2627    &mv_stack,
2628    rdo_type,
2629    true,
2630    enc_stats,
2631  );
2632}
2633
2634#[profiling::function]
2635fn encode_partition_bottomup<T: Pixel, W: Writer>(
2636  fi: &FrameInvariants<T>, ts: &mut TileStateMut<'_, T>,
2637  cw: &mut ContextWriter, w_pre_cdef: &mut W, w_post_cdef: &mut W,
2638  bsize: BlockSize, tile_bo: TileBlockOffset, ref_rd_cost: f64,
2639  inter_cfg: &InterConfig, enc_stats: &mut EncoderStats,
2640) -> PartitionGroupParameters {
2641  let rdo_type = RDOType::PixelDistRealRate;
2642  let mut rd_cost = std::f64::MAX;
2643  let mut best_rd = std::f64::MAX;
2644  let mut rdo_output = PartitionGroupParameters {
2645    rd_cost,
2646    part_type: PartitionType::PARTITION_INVALID,
2647    part_modes: ArrayVec::new(),
2648  };
2649
2650  if tile_bo.0.x >= ts.mi_width || tile_bo.0.y >= ts.mi_height {
2651    return rdo_output;
2652  }
2653
2654  let is_square = bsize.is_sqr();
2655  let hbs = bsize.width_mi() / 2;
2656  let has_cols = tile_bo.0.x + hbs < ts.mi_width;
2657  let has_rows = tile_bo.0.y + hbs < ts.mi_height;
2658  let is_straddle_x = tile_bo.0.x + bsize.width_mi() > ts.mi_width;
2659  let is_straddle_y = tile_bo.0.y + bsize.height_mi() > ts.mi_height;
2660
2661  // TODO: Update for 128x128 superblocks
2662  assert!(fi.partition_range.max <= BlockSize::BLOCK_64X64);
2663
2664  let must_split =
2665    is_square && (bsize > fi.partition_range.max || !has_cols || !has_rows);
2666
2667  let can_split = // FIXME: sub-8x8 inter blocks not supported for non-4:2:0 sampling
2668    if fi.frame_type.has_inter() &&
2669      fi.sequence.chroma_sampling != ChromaSampling::Cs420 &&
2670      bsize <= BlockSize::BLOCK_8X8 {
2671      false
2672    } else {
2673      (bsize > fi.partition_range.min && is_square) || must_split
2674    };
2675
2676  assert!(bsize >= BlockSize::BLOCK_8X8 || !can_split);
2677
2678  let mut best_partition = PartitionType::PARTITION_INVALID;
2679
2680  let cw_checkpoint = cw.checkpoint(&tile_bo, fi.sequence.chroma_sampling);
2681  let w_pre_checkpoint = w_pre_cdef.checkpoint();
2682  let w_post_checkpoint = w_post_cdef.checkpoint();
2683
2684  // Code the whole block
2685  if !must_split {
2686    let cost = if bsize >= BlockSize::BLOCK_8X8 && is_square {
2687      let w: &mut W = if cw.bc.cdef_coded { w_post_cdef } else { w_pre_cdef };
2688      let tell = w.tell_frac();
2689      cw.write_partition(w, tile_bo, PartitionType::PARTITION_NONE, bsize);
2690      compute_rd_cost(fi, w.tell_frac() - tell, ScaledDistortion::zero())
2691    } else {
2692      0.0
2693    };
2694
2695    let mode_decision =
2696      rdo_mode_decision(fi, ts, cw, bsize, tile_bo, inter_cfg);
2697
2698    if !mode_decision.pred_mode_luma.is_intra() {
2699      // Fill the saved motion structure
2700      save_block_motion(
2701        ts,
2702        mode_decision.bsize,
2703        mode_decision.bo,
2704        mode_decision.ref_frames[0].to_index(),
2705        mode_decision.mvs[0],
2706      );
2707    }
2708
2709    rd_cost = mode_decision.rd_cost + cost;
2710
2711    best_partition = PartitionType::PARTITION_NONE;
2712    best_rd = rd_cost;
2713    rdo_output.part_modes.push(mode_decision.clone());
2714
2715    if !can_split {
2716      encode_block_with_modes(
2717        fi,
2718        ts,
2719        cw,
2720        w_pre_cdef,
2721        w_post_cdef,
2722        bsize,
2723        tile_bo,
2724        &mode_decision,
2725        rdo_type,
2726        Some(enc_stats),
2727      );
2728    }
2729  } // if !must_split
2730
2731  let mut early_exit = false;
2732
2733  // Test all partition types other than PARTITION_NONE by comparing their RD costs
2734  if can_split {
2735    debug_assert!(is_square);
2736
2737    let mut partition_types = ArrayVec::<PartitionType, 3>::new();
2738    if bsize
2739      <= fi.config.speed_settings.partition.non_square_partition_max_threshold
2740      || is_straddle_x
2741      || is_straddle_y
2742    {
2743      if has_cols {
2744        partition_types.push(PartitionType::PARTITION_HORZ);
2745      }
2746      if !(fi.sequence.chroma_sampling == ChromaSampling::Cs422) && has_rows {
2747        partition_types.push(PartitionType::PARTITION_VERT);
2748      }
2749    }
2750    partition_types.push(PartitionType::PARTITION_SPLIT);
2751
2752    for partition in partition_types {
2753      // (!has_rows || !has_cols) --> must_split
2754      debug_assert!((has_rows && has_cols) || must_split);
2755      // (!has_rows && has_cols) --> partition != PartitionType::PARTITION_VERT
2756      debug_assert!(
2757        has_rows || !has_cols || (partition != PartitionType::PARTITION_VERT)
2758      );
2759      // (has_rows && !has_cols) --> partition != PartitionType::PARTITION_HORZ
2760      debug_assert!(
2761        !has_rows || has_cols || (partition != PartitionType::PARTITION_HORZ)
2762      );
2763      // (!has_rows && !has_cols) --> partition == PartitionType::PARTITION_SPLIT
2764      debug_assert!(
2765        has_rows || has_cols || (partition == PartitionType::PARTITION_SPLIT)
2766      );
2767
2768      cw.rollback(&cw_checkpoint);
2769      w_pre_cdef.rollback(&w_pre_checkpoint);
2770      w_post_cdef.rollback(&w_post_checkpoint);
2771
2772      let subsize = bsize.subsize(partition).unwrap();
2773      let hbsw = subsize.width_mi(); // Half the block size width in blocks
2774      let hbsh = subsize.height_mi(); // Half the block size height in blocks
2775      let mut child_modes = ArrayVec::<PartitionParameters, 4>::new();
2776      rd_cost = 0.0;
2777
2778      if bsize >= BlockSize::BLOCK_8X8 {
2779        let w: &mut W =
2780          if cw.bc.cdef_coded { w_post_cdef } else { w_pre_cdef };
2781        let tell = w.tell_frac();
2782        cw.write_partition(w, tile_bo, partition, bsize);
2783        rd_cost =
2784          compute_rd_cost(fi, w.tell_frac() - tell, ScaledDistortion::zero());
2785      }
2786
2787      let four_partitions = [
2788        tile_bo,
2789        TileBlockOffset(BlockOffset { x: tile_bo.0.x + hbsw, y: tile_bo.0.y }),
2790        TileBlockOffset(BlockOffset { x: tile_bo.0.x, y: tile_bo.0.y + hbsh }),
2791        TileBlockOffset(BlockOffset {
2792          x: tile_bo.0.x + hbsw,
2793          y: tile_bo.0.y + hbsh,
2794        }),
2795      ];
2796      let partitions = get_sub_partitions(&four_partitions, partition);
2797
2798      early_exit = false;
2799      // If either of horz or vert partition types is being tested,
2800      // two partitioned rectangles, defined in 'partitions', of the current block
2801      // is passed to encode_partition_bottomup()
2802      for offset in partitions {
2803        if offset.0.x >= ts.mi_width || offset.0.y >= ts.mi_height {
2804          continue;
2805        }
2806        let child_rdo_output = encode_partition_bottomup(
2807          fi,
2808          ts,
2809          cw,
2810          w_pre_cdef,
2811          w_post_cdef,
2812          subsize,
2813          offset,
2814          best_rd,
2815          inter_cfg,
2816          enc_stats,
2817        );
2818        let cost = child_rdo_output.rd_cost;
2819        assert!(cost >= 0.0);
2820
2821        if cost != std::f64::MAX {
2822          rd_cost += cost;
2823          if !must_split
2824            && fi.enable_early_exit
2825            && (rd_cost >= best_rd || rd_cost >= ref_rd_cost)
2826          {
2827            assert!(cost != std::f64::MAX);
2828            early_exit = true;
2829            break;
2830          } else if partition != PartitionType::PARTITION_SPLIT {
2831            child_modes.push(child_rdo_output.part_modes[0].clone());
2832          }
2833        }
2834      }
2835
2836      if !early_exit && rd_cost < best_rd {
2837        best_rd = rd_cost;
2838        best_partition = partition;
2839        if partition != PartitionType::PARTITION_SPLIT {
2840          assert!(!child_modes.is_empty());
2841          rdo_output.part_modes = child_modes;
2842        }
2843      }
2844    }
2845
2846    debug_assert!(
2847      early_exit || best_partition != PartitionType::PARTITION_INVALID
2848    );
2849
2850    // If the best partition is not PARTITION_SPLIT, recode it
2851    if best_partition != PartitionType::PARTITION_SPLIT {
2852      assert!(!rdo_output.part_modes.is_empty());
2853      cw.rollback(&cw_checkpoint);
2854      w_pre_cdef.rollback(&w_pre_checkpoint);
2855      w_post_cdef.rollback(&w_post_checkpoint);
2856
2857      assert!(best_partition != PartitionType::PARTITION_NONE || !must_split);
2858      let subsize = bsize.subsize(best_partition).unwrap();
2859
2860      if bsize >= BlockSize::BLOCK_8X8 {
2861        let w: &mut W =
2862          if cw.bc.cdef_coded { w_post_cdef } else { w_pre_cdef };
2863        cw.write_partition(w, tile_bo, best_partition, bsize);
2864      }
2865      for mode in rdo_output.part_modes.clone() {
2866        assert!(subsize == mode.bsize);
2867
2868        if !mode.pred_mode_luma.is_intra() {
2869          save_block_motion(
2870            ts,
2871            mode.bsize,
2872            mode.bo,
2873            mode.ref_frames[0].to_index(),
2874            mode.mvs[0],
2875          );
2876        }
2877
2878        // FIXME: redundant block re-encode
2879        encode_block_with_modes(
2880          fi,
2881          ts,
2882          cw,
2883          w_pre_cdef,
2884          w_post_cdef,
2885          mode.bsize,
2886          mode.bo,
2887          &mode,
2888          rdo_type,
2889          Some(enc_stats),
2890        );
2891      }
2892    }
2893  } // if can_split {
2894
2895  assert!(best_partition != PartitionType::PARTITION_INVALID);
2896
2897  if is_square
2898    && bsize >= BlockSize::BLOCK_8X8
2899    && (bsize == BlockSize::BLOCK_8X8
2900      || best_partition != PartitionType::PARTITION_SPLIT)
2901  {
2902    cw.bc.update_partition_context(
2903      tile_bo,
2904      bsize.subsize(best_partition).unwrap(),
2905      bsize,
2906    );
2907  }
2908
2909  rdo_output.rd_cost = best_rd;
2910  rdo_output.part_type = best_partition;
2911
2912  if best_partition != PartitionType::PARTITION_NONE {
2913    rdo_output.part_modes.clear();
2914  }
2915  rdo_output
2916}
2917
2918fn encode_partition_topdown<T: Pixel, W: Writer>(
2919  fi: &FrameInvariants<T>, ts: &mut TileStateMut<'_, T>,
2920  cw: &mut ContextWriter, w_pre_cdef: &mut W, w_post_cdef: &mut W,
2921  bsize: BlockSize, tile_bo: TileBlockOffset,
2922  block_output: &Option<PartitionGroupParameters>, inter_cfg: &InterConfig,
2923  enc_stats: &mut EncoderStats,
2924) {
2925  if tile_bo.0.x >= ts.mi_width || tile_bo.0.y >= ts.mi_height {
2926    return;
2927  }
2928  let is_square = bsize.is_sqr();
2929  let rdo_type = RDOType::PixelDistRealRate;
2930  let hbs = bsize.width_mi() / 2;
2931  let has_cols = tile_bo.0.x + hbs < ts.mi_width;
2932  let has_rows = tile_bo.0.y + hbs < ts.mi_height;
2933
2934  // TODO: Update for 128x128 superblocks
2935  debug_assert!(fi.partition_range.max <= BlockSize::BLOCK_64X64);
2936
2937  let must_split =
2938    is_square && (bsize > fi.partition_range.max || !has_cols || !has_rows);
2939
2940  let can_split = // FIXME: sub-8x8 inter blocks not supported for non-4:2:0 sampling
2941    if fi.frame_type.has_inter() &&
2942      fi.sequence.chroma_sampling != ChromaSampling::Cs420 &&
2943      bsize <= BlockSize::BLOCK_8X8 {
2944      false
2945    } else {
2946      (bsize > fi.partition_range.min && is_square) || must_split
2947    };
2948
2949  let mut rdo_output =
2950    block_output.clone().unwrap_or_else(|| PartitionGroupParameters {
2951      part_type: PartitionType::PARTITION_INVALID,
2952      rd_cost: std::f64::MAX,
2953      part_modes: ArrayVec::new(),
2954    });
2955
2956  let partition = if must_split {
2957    PartitionType::PARTITION_SPLIT
2958  } else if can_split {
2959    debug_assert!(bsize.is_sqr());
2960
2961    // Blocks of sizes within the supported range are subjected to a partitioning decision
2962    rdo_output = rdo_partition_decision(
2963      fi,
2964      ts,
2965      cw,
2966      w_pre_cdef,
2967      w_post_cdef,
2968      bsize,
2969      tile_bo,
2970      &rdo_output,
2971      &[PartitionType::PARTITION_SPLIT, PartitionType::PARTITION_NONE],
2972      rdo_type,
2973      inter_cfg,
2974    );
2975    rdo_output.part_type
2976  } else {
2977    // Blocks of sizes below the supported range are encoded directly
2978    PartitionType::PARTITION_NONE
2979  };
2980
2981  debug_assert!(partition != PartitionType::PARTITION_INVALID);
2982
2983  let subsize = bsize.subsize(partition).unwrap();
2984
2985  if bsize >= BlockSize::BLOCK_8X8 && is_square {
2986    let w: &mut W = if cw.bc.cdef_coded { w_post_cdef } else { w_pre_cdef };
2987    cw.write_partition(w, tile_bo, partition, bsize);
2988  }
2989
2990  match partition {
2991    PartitionType::PARTITION_NONE => {
2992      let rdo_decision;
2993      let part_decision =
2994        if let Some(part_mode) = rdo_output.part_modes.first() {
2995          // The optimal prediction mode is known from a previous iteration
2996          part_mode
2997        } else {
2998          // Make a prediction mode decision for blocks encoded with no rdo_partition_decision call (e.g. edges)
2999          rdo_decision =
3000            rdo_mode_decision(fi, ts, cw, bsize, tile_bo, inter_cfg);
3001          &rdo_decision
3002        };
3003
3004      let mut mode_luma = part_decision.pred_mode_luma;
3005      let mut mode_chroma = part_decision.pred_mode_chroma;
3006
3007      let cfl = part_decision.pred_cfl_params;
3008      let skip = part_decision.skip;
3009      let ref_frames = part_decision.ref_frames;
3010      let mvs = part_decision.mvs;
3011      let mut cdef_coded = cw.bc.cdef_coded;
3012
3013      // Set correct segmentation ID before encoding and before
3014      // rdo_tx_size_type().
3015      cw.bc.blocks.set_segmentation_idx(tile_bo, bsize, part_decision.sidx);
3016
3017      // NOTE: Cannot avoid calling rdo_tx_size_type() here again,
3018      // because, with top-down partition RDO, the neighboring contexts
3019      // of current partition can change, i.e. neighboring partitions can split down more.
3020      let (tx_size, tx_type) = rdo_tx_size_type(
3021        fi, ts, cw, bsize, tile_bo, mode_luma, ref_frames, mvs, skip,
3022      );
3023
3024      let mut mv_stack = ArrayVec::<CandidateMV, 9>::new();
3025      let is_compound = ref_frames[1] != NONE_FRAME;
3026      let mode_context = cw.find_mvrefs(
3027        tile_bo,
3028        ref_frames,
3029        &mut mv_stack,
3030        bsize,
3031        fi,
3032        is_compound,
3033      );
3034
3035      // TODO: proper remap when is_compound is true
3036      if !mode_luma.is_intra() {
3037        if is_compound && mode_luma != PredictionMode::GLOBAL_GLOBALMV {
3038          let match0 = mv_stack[0].this_mv.row == mvs[0].row
3039            && mv_stack[0].this_mv.col == mvs[0].col;
3040          let match1 = mv_stack[0].comp_mv.row == mvs[1].row
3041            && mv_stack[0].comp_mv.col == mvs[1].col;
3042
3043          let match2 = mv_stack[1].this_mv.row == mvs[0].row
3044            && mv_stack[1].this_mv.col == mvs[0].col;
3045          let match3 = mv_stack[1].comp_mv.row == mvs[1].row
3046            && mv_stack[1].comp_mv.col == mvs[1].col;
3047
3048          let match4 = mv_stack.len() > 2 && mv_stack[2].this_mv == mvs[0];
3049          let match5 = mv_stack.len() > 2 && mv_stack[2].comp_mv == mvs[1];
3050
3051          let match6 = mv_stack.len() > 3 && mv_stack[3].this_mv == mvs[0];
3052          let match7 = mv_stack.len() > 3 && mv_stack[3].comp_mv == mvs[1];
3053
3054          mode_luma = if match0 && match1 {
3055            PredictionMode::NEAREST_NEARESTMV
3056          } else if match2 && match3 {
3057            PredictionMode::NEAR_NEAR0MV
3058          } else if match4 && match5 {
3059            PredictionMode::NEAR_NEAR1MV
3060          } else if match6 && match7 {
3061            PredictionMode::NEAR_NEAR2MV
3062          } else if match0 {
3063            PredictionMode::NEAREST_NEWMV
3064          } else if match1 {
3065            PredictionMode::NEW_NEARESTMV
3066          } else {
3067            PredictionMode::NEW_NEWMV
3068          };
3069
3070          if mode_luma != PredictionMode::NEAREST_NEARESTMV
3071            && mvs[0].row == 0
3072            && mvs[0].col == 0
3073            && mvs[1].row == 0
3074            && mvs[1].col == 0
3075          {
3076            mode_luma = PredictionMode::GLOBAL_GLOBALMV;
3077          }
3078          mode_chroma = mode_luma;
3079        } else if !is_compound && mode_luma != PredictionMode::GLOBALMV {
3080          mode_luma = PredictionMode::NEWMV;
3081          for (c, m) in mv_stack.iter().take(4).zip(
3082            [
3083              PredictionMode::NEARESTMV,
3084              PredictionMode::NEAR0MV,
3085              PredictionMode::NEAR1MV,
3086              PredictionMode::NEAR2MV,
3087            ]
3088            .iter(),
3089          ) {
3090            if c.this_mv.row == mvs[0].row && c.this_mv.col == mvs[0].col {
3091              mode_luma = *m;
3092            }
3093          }
3094          if mode_luma == PredictionMode::NEWMV
3095            && mvs[0].row == 0
3096            && mvs[0].col == 0
3097          {
3098            mode_luma = if mv_stack.is_empty() {
3099              PredictionMode::NEARESTMV
3100            } else if mv_stack.len() == 1 {
3101              PredictionMode::NEAR0MV
3102            } else {
3103              PredictionMode::GLOBALMV
3104            };
3105          }
3106          mode_chroma = mode_luma;
3107        }
3108
3109        save_block_motion(
3110          ts,
3111          part_decision.bsize,
3112          part_decision.bo,
3113          part_decision.ref_frames[0].to_index(),
3114          part_decision.mvs[0],
3115        );
3116      }
3117
3118      // FIXME: every final block that has gone through the RDO decision process is encoded twice
3119      cdef_coded = encode_block_pre_cdef(
3120        &fi.sequence,
3121        ts,
3122        cw,
3123        if cdef_coded { w_post_cdef } else { w_pre_cdef },
3124        bsize,
3125        tile_bo,
3126        skip,
3127      );
3128      encode_block_post_cdef(
3129        fi,
3130        ts,
3131        cw,
3132        if cdef_coded { w_post_cdef } else { w_pre_cdef },
3133        mode_luma,
3134        mode_chroma,
3135        part_decision.angle_delta,
3136        ref_frames,
3137        mvs,
3138        bsize,
3139        tile_bo,
3140        skip,
3141        cfl,
3142        tx_size,
3143        tx_type,
3144        mode_context,
3145        &mv_stack,
3146        RDOType::PixelDistRealRate,
3147        true,
3148        Some(enc_stats),
3149      );
3150    }
3151    PARTITION_SPLIT | PARTITION_HORZ | PARTITION_VERT => {
3152      if !rdo_output.part_modes.is_empty() {
3153        debug_assert!(can_split && !must_split);
3154
3155        // The optimal prediction modes for each split block is known from an rdo_partition_decision() call
3156        for mode in rdo_output.part_modes {
3157          // Each block is subjected to a new splitting decision
3158          encode_partition_topdown(
3159            fi,
3160            ts,
3161            cw,
3162            w_pre_cdef,
3163            w_post_cdef,
3164            subsize,
3165            mode.bo,
3166            &Some(PartitionGroupParameters {
3167              rd_cost: mode.rd_cost,
3168              part_type: PartitionType::PARTITION_NONE,
3169              part_modes: [mode][..].try_into().unwrap(),
3170            }),
3171            inter_cfg,
3172            enc_stats,
3173          );
3174        }
3175      } else {
3176        debug_assert!(must_split);
3177        let hbsw = subsize.width_mi(); // Half the block size width in blocks
3178        let hbsh = subsize.height_mi(); // Half the block size height in blocks
3179        let four_partitions = [
3180          tile_bo,
3181          TileBlockOffset(BlockOffset {
3182            x: tile_bo.0.x + hbsw,
3183            y: tile_bo.0.y,
3184          }),
3185          TileBlockOffset(BlockOffset {
3186            x: tile_bo.0.x,
3187            y: tile_bo.0.y + hbsh,
3188          }),
3189          TileBlockOffset(BlockOffset {
3190            x: tile_bo.0.x + hbsw,
3191            y: tile_bo.0.y + hbsh,
3192          }),
3193        ];
3194        let partitions = get_sub_partitions(&four_partitions, partition);
3195
3196        partitions.iter().for_each(|&offset| {
3197          encode_partition_topdown(
3198            fi,
3199            ts,
3200            cw,
3201            w_pre_cdef,
3202            w_post_cdef,
3203            subsize,
3204            offset,
3205            &None,
3206            inter_cfg,
3207            enc_stats,
3208          );
3209        });
3210      }
3211    }
3212    _ => unreachable!(),
3213  }
3214
3215  if is_square
3216    && bsize >= BlockSize::BLOCK_8X8
3217    && (bsize == BlockSize::BLOCK_8X8
3218      || partition != PartitionType::PARTITION_SPLIT)
3219  {
3220    cw.bc.update_partition_context(tile_bo, subsize, bsize);
3221  }
3222}
3223
3224fn get_initial_cdfcontext<T: Pixel>(fi: &FrameInvariants<T>) -> CDFContext {
3225  let cdf = if fi.primary_ref_frame == PRIMARY_REF_NONE {
3226    None
3227  } else {
3228    let ref_frame_idx = fi.ref_frames[fi.primary_ref_frame as usize] as usize;
3229    let ref_frame = fi.rec_buffer.frames[ref_frame_idx].as_ref();
3230    ref_frame.map(|rec| rec.cdfs)
3231  };
3232
3233  // return the retrieved instance if any, a new one otherwise
3234  cdf.unwrap_or_else(|| CDFContext::new(fi.base_q_idx))
3235}
3236
3237#[profiling::function]
3238fn encode_tile_group<T: Pixel>(
3239  fi: &FrameInvariants<T>, fs: &mut FrameState<T>, inter_cfg: &InterConfig,
3240) -> Vec<u8> {
3241  let planes =
3242    if fi.sequence.chroma_sampling == ChromaSampling::Cs400 { 1 } else { 3 };
3243  let mut blocks = FrameBlocks::new(fi.w_in_b, fi.h_in_b);
3244  let ti = &fi.sequence.tiling;
3245
3246  let initial_cdf = get_initial_cdfcontext(fi);
3247  // dynamic allocation: once per frame
3248  let mut cdfs = vec![initial_cdf; ti.tile_count()];
3249
3250  let (raw_tiles, stats): (Vec<_>, Vec<_>) = ti
3251    .tile_iter_mut(fs, &mut blocks)
3252    .zip(cdfs.iter_mut())
3253    .collect::<Vec<_>>()
3254    .into_par_iter()
3255    .map(|(mut ctx, cdf)| {
3256      encode_tile(fi, &mut ctx.ts, cdf, &mut ctx.tb, inter_cfg)
3257    })
3258    .unzip();
3259
3260  for tile_stats in stats {
3261    fs.enc_stats += &tile_stats;
3262  }
3263
3264  /* Frame deblocking operates over a single large tile wrapping the
3265   * frame rather than the frame itself so that deblocking is
3266   * available inside RDO when needed */
3267  /* TODO: Don't apply if lossless */
3268  let levels = fs.apply_tile_state_mut(|ts| {
3269    let rec = &mut ts.rec;
3270    deblock_filter_optimize(
3271      fi,
3272      &rec.as_const(),
3273      &ts.input.as_tile(),
3274      &blocks.as_tile_blocks(),
3275      fi.width,
3276      fi.height,
3277    )
3278  });
3279  fs.deblock.levels = levels;
3280
3281  if fs.deblock.levels[0] != 0 || fs.deblock.levels[1] != 0 {
3282    fs.apply_tile_state_mut(|ts| {
3283      let rec = &mut ts.rec;
3284      deblock_filter_frame(
3285        ts.deblock,
3286        rec,
3287        &blocks.as_tile_blocks(),
3288        fi.width,
3289        fi.height,
3290        fi.sequence.bit_depth,
3291        planes,
3292      );
3293    });
3294  }
3295
3296  if fi.sequence.enable_restoration {
3297    // Until the loop filters are better pipelined, we'll need to keep
3298    // around a copy of both the deblocked and cdeffed frame.
3299    let deblocked_frame = (*fs.rec).clone();
3300
3301    /* TODO: Don't apply if lossless */
3302    if fi.sequence.enable_cdef {
3303      fs.apply_tile_state_mut(|ts| {
3304        let rec = &mut ts.rec;
3305        cdef_filter_tile(fi, &deblocked_frame, &blocks.as_tile_blocks(), rec);
3306      });
3307    }
3308    /* TODO: Don't apply if lossless */
3309    fs.restoration.lrf_filter_frame(
3310      Arc::get_mut(&mut fs.rec).unwrap(),
3311      &deblocked_frame,
3312      fi,
3313    );
3314  } else {
3315    /* TODO: Don't apply if lossless */
3316    if fi.sequence.enable_cdef {
3317      let deblocked_frame = (*fs.rec).clone();
3318      fs.apply_tile_state_mut(|ts| {
3319        let rec = &mut ts.rec;
3320        cdef_filter_tile(fi, &deblocked_frame, &blocks.as_tile_blocks(), rec);
3321      });
3322    }
3323  }
3324
3325  let (idx_max, max_len) = raw_tiles
3326    .iter()
3327    .map(Vec::len)
3328    .enumerate()
3329    .max_by_key(|&(_, len)| len)
3330    .unwrap();
3331
3332  if !fi.disable_frame_end_update_cdf {
3333    // use the biggest tile (in bytes) for CDF update
3334    fs.context_update_tile_id = idx_max;
3335    fs.cdfs = cdfs[idx_max];
3336    fs.cdfs.reset_counts();
3337  }
3338
3339  let max_tile_size_bytes = ((ILog::ilog(max_len) + 7) / 8) as u32;
3340  debug_assert!(max_tile_size_bytes > 0 && max_tile_size_bytes <= 4);
3341  fs.max_tile_size_bytes = max_tile_size_bytes;
3342
3343  build_raw_tile_group(ti, &raw_tiles, max_tile_size_bytes)
3344}
3345
3346fn build_raw_tile_group(
3347  ti: &TilingInfo, raw_tiles: &[Vec<u8>], max_tile_size_bytes: u32,
3348) -> Vec<u8> {
3349  // <https://aomediacodec.github.io/av1-spec/#general-tile-group-obu-syntax>
3350  let mut raw = Vec::new();
3351  let mut bw = BitWriter::endian(&mut raw, BigEndian);
3352  if ti.cols * ti.rows > 1 {
3353    // tile_start_and_end_present_flag
3354    bw.write_bit(false).unwrap();
3355  }
3356  bw.byte_align().unwrap();
3357  for (i, raw_tile) in raw_tiles.iter().enumerate() {
3358    let last = raw_tiles.len() - 1;
3359    if i != last {
3360      let tile_size_minus_1 = raw_tile.len() - 1;
3361      bw.write_le(max_tile_size_bytes, tile_size_minus_1 as u64).unwrap();
3362    }
3363    bw.write_bytes(raw_tile).unwrap();
3364  }
3365  raw
3366}
3367
3368pub struct SBSQueueEntry {
3369  pub sbo: TileSuperBlockOffset,
3370  pub lru_index: [i32; MAX_PLANES],
3371  pub cdef_coded: bool,
3372  pub w_pre_cdef: WriterBase<WriterRecorder>,
3373  pub w_post_cdef: WriterBase<WriterRecorder>,
3374}
3375
3376#[profiling::function]
3377fn check_lf_queue<T: Pixel>(
3378  fi: &FrameInvariants<T>, ts: &mut TileStateMut<'_, T>,
3379  cw: &mut ContextWriter, w: &mut WriterBase<WriterEncoder>,
3380  sbs_q: &mut VecDeque<SBSQueueEntry>, last_lru_ready: &mut [i32; 3],
3381  last_lru_rdoed: &mut [i32; 3], last_lru_coded: &mut [i32; 3],
3382  deblock_p: bool,
3383) {
3384  let mut check_queue = true;
3385  let planes = if fi.sequence.chroma_sampling == ChromaSampling::Cs400 {
3386    1
3387  } else {
3388    MAX_PLANES
3389  };
3390
3391  // Walk queue from the head, see if anything is ready for RDO and flush
3392  while check_queue {
3393    if let Some(qe) = sbs_q.front_mut() {
3394      for pli in 0..planes {
3395        if qe.lru_index[pli] > last_lru_ready[pli] {
3396          check_queue = false;
3397          break;
3398        }
3399      }
3400      if check_queue {
3401        // yes, this entry is ready
3402        if qe.cdef_coded || fi.sequence.enable_restoration {
3403          // only RDO once for a given LRU.
3404
3405          // One quirk worth noting: LRUs in different planes
3406          // may be different sizes; eg, one chroma LRU may
3407          // cover four luma LRUs. However, we won't get here
3408          // until all are ready for RDO because the smaller
3409          // ones all fit inside the biggest, and the biggest
3410          // doesn't trigger until everything is done.
3411
3412          // RDO happens on all LRUs within the confines of the
3413          // biggest, all together.  If any of this SB's planes'
3414          // LRUs are RDOed, in actuality they all are.
3415
3416          // SBs tagged with a lru index of -1 are ignored in
3417          // LRU coding/rdoing decisions (but still need to rdo
3418          // for cdef).
3419          let mut already_rdoed = false;
3420          for pli in 0..planes {
3421            if qe.lru_index[pli] != -1
3422              && qe.lru_index[pli] <= last_lru_rdoed[pli]
3423            {
3424              already_rdoed = true;
3425              break;
3426            }
3427          }
3428          if !already_rdoed {
3429            rdo_loop_decision(qe.sbo, fi, ts, cw, w, deblock_p);
3430            for pli in 0..planes {
3431              if qe.lru_index[pli] != -1
3432                && last_lru_rdoed[pli] < qe.lru_index[pli]
3433              {
3434                last_lru_rdoed[pli] = qe.lru_index[pli];
3435              }
3436            }
3437          }
3438        }
3439        // write LRF information
3440        if !fi.allow_intrabc && fi.sequence.enable_restoration {
3441          // TODO: also disallow if lossless
3442          for pli in 0..planes {
3443            if qe.lru_index[pli] != -1
3444              && last_lru_coded[pli] < qe.lru_index[pli]
3445            {
3446              last_lru_coded[pli] = qe.lru_index[pli];
3447              cw.write_lrf(w, &mut ts.restoration, qe.sbo, pli);
3448            }
3449          }
3450        }
3451        // Now that loop restoration is coded, we can replay the initial block bits
3452        qe.w_pre_cdef.replay(w);
3453        // Now code CDEF into the middle of the block
3454        if qe.cdef_coded {
3455          let cdef_index = cw.bc.blocks.get_cdef(qe.sbo);
3456          cw.write_cdef(w, cdef_index, fi.cdef_bits);
3457          // Code queued symbols that come after the CDEF index
3458          qe.w_post_cdef.replay(w);
3459        }
3460        sbs_q.pop_front();
3461      }
3462    } else {
3463      check_queue = false;
3464    }
3465  }
3466}
3467
3468#[profiling::function]
3469fn encode_tile<'a, T: Pixel>(
3470  fi: &FrameInvariants<T>, ts: &'a mut TileStateMut<'_, T>,
3471  fc: &'a mut CDFContext, blocks: &'a mut TileBlocksMut<'a>,
3472  inter_cfg: &InterConfig,
3473) -> (Vec<u8>, EncoderStats) {
3474  let mut enc_stats = EncoderStats::default();
3475  let mut w = WriterEncoder::new();
3476  let planes =
3477    if fi.sequence.chroma_sampling == ChromaSampling::Cs400 { 1 } else { 3 };
3478
3479  let bc = BlockContext::new(blocks);
3480  let mut cw = ContextWriter::new(fc, bc);
3481  let mut sbs_q: VecDeque<SBSQueueEntry> = VecDeque::new();
3482  let mut last_lru_ready = [-1; 3];
3483  let mut last_lru_rdoed = [-1; 3];
3484  let mut last_lru_coded = [-1; 3];
3485
3486  // main loop
3487  for sby in 0..ts.sb_height {
3488    cw.bc.reset_left_contexts(planes);
3489
3490    for sbx in 0..ts.sb_width {
3491      cw.fc_log.clear();
3492
3493      let tile_sbo = TileSuperBlockOffset(SuperBlockOffset { x: sbx, y: sby });
3494      let mut sbs_qe = SBSQueueEntry {
3495        sbo: tile_sbo,
3496        lru_index: [-1; MAX_PLANES],
3497        cdef_coded: false,
3498        w_pre_cdef: WriterRecorder::new(),
3499        w_post_cdef: WriterRecorder::new(),
3500      };
3501
3502      let tile_bo = tile_sbo.block_offset(0, 0);
3503      cw.bc.cdef_coded = false;
3504      cw.bc.code_deltas = fi.delta_q_present;
3505
3506      let is_straddle_sbx =
3507        tile_bo.0.x + BlockSize::BLOCK_64X64.width_mi() > ts.mi_width;
3508      let is_straddle_sby =
3509        tile_bo.0.y + BlockSize::BLOCK_64X64.height_mi() > ts.mi_height;
3510
3511      // Encode SuperBlock
3512      if fi.config.speed_settings.partition.encode_bottomup
3513        || is_straddle_sbx
3514        || is_straddle_sby
3515      {
3516        encode_partition_bottomup(
3517          fi,
3518          ts,
3519          &mut cw,
3520          &mut sbs_qe.w_pre_cdef,
3521          &mut sbs_qe.w_post_cdef,
3522          BlockSize::BLOCK_64X64,
3523          tile_bo,
3524          std::f64::MAX,
3525          inter_cfg,
3526          &mut enc_stats,
3527        );
3528      } else {
3529        encode_partition_topdown(
3530          fi,
3531          ts,
3532          &mut cw,
3533          &mut sbs_qe.w_pre_cdef,
3534          &mut sbs_qe.w_post_cdef,
3535          BlockSize::BLOCK_64X64,
3536          tile_bo,
3537          &None,
3538          inter_cfg,
3539          &mut enc_stats,
3540        );
3541      }
3542
3543      {
3544        let mut check_queue = false;
3545        // queue our superblock for when the LRU is complete
3546        sbs_qe.cdef_coded = cw.bc.cdef_coded;
3547        for pli in 0..planes {
3548          if let Some((lru_x, lru_y)) =
3549            ts.restoration.planes[pli].restoration_unit_index(tile_sbo, false)
3550          {
3551            let lru_index = ts.restoration.planes[pli]
3552              .restoration_unit_countable(lru_x, lru_y)
3553              as i32;
3554            sbs_qe.lru_index[pli] = lru_index;
3555            if ts.restoration.planes[pli]
3556              .restoration_unit_last_sb_for_rdo(fi, ts.sbo, tile_sbo)
3557            {
3558              last_lru_ready[pli] = lru_index;
3559              check_queue = true;
3560            }
3561          } else {
3562            // we're likely in an area stretched into a new tile
3563            // tag this SB to be ignored in LRU decisions
3564            sbs_qe.lru_index[pli] = -1;
3565            check_queue = true;
3566          }
3567        }
3568        sbs_q.push_back(sbs_qe);
3569
3570        if check_queue && !fi.sequence.enable_delayed_loopfilter_rdo {
3571          check_lf_queue(
3572            fi,
3573            ts,
3574            &mut cw,
3575            &mut w,
3576            &mut sbs_q,
3577            &mut last_lru_ready,
3578            &mut last_lru_rdoed,
3579            &mut last_lru_coded,
3580            true,
3581          );
3582        }
3583      }
3584    }
3585  }
3586
3587  if fi.sequence.enable_delayed_loopfilter_rdo {
3588    // Solve deblocking for just this tile
3589    /* TODO: Don't apply if lossless */
3590    let deblock_levels = deblock_filter_optimize(
3591      fi,
3592      &ts.rec.as_const(),
3593      &ts.input_tile,
3594      &cw.bc.blocks.as_const(),
3595      fi.width,
3596      fi.height,
3597    );
3598
3599    if deblock_levels[0] != 0 || deblock_levels[1] != 0 {
3600      // copy reconstruction to a temp frame to restore it later
3601      let rec_copy = if planes == 3 {
3602        vec![
3603          ts.rec.planes[0].scratch_copy(),
3604          ts.rec.planes[1].scratch_copy(),
3605          ts.rec.planes[2].scratch_copy(),
3606        ]
3607      } else {
3608        vec![ts.rec.planes[0].scratch_copy()]
3609      };
3610
3611      // copy ts.deblock because we need to set some of our own values here
3612      let mut deblock_copy = *ts.deblock;
3613      deblock_copy.levels = deblock_levels;
3614
3615      // temporarily deblock the reference
3616      deblock_filter_frame(
3617        &deblock_copy,
3618        &mut ts.rec,
3619        &cw.bc.blocks.as_const(),
3620        fi.width,
3621        fi.height,
3622        fi.sequence.bit_depth,
3623        planes,
3624      );
3625
3626      // rdo lf and write
3627      check_lf_queue(
3628        fi,
3629        ts,
3630        &mut cw,
3631        &mut w,
3632        &mut sbs_q,
3633        &mut last_lru_ready,
3634        &mut last_lru_rdoed,
3635        &mut last_lru_coded,
3636        false,
3637      );
3638
3639      // copy original reference back in
3640      for pli in 0..planes {
3641        let dst = &mut ts.rec.planes[pli];
3642        let src = &rec_copy[pli];
3643        for (dst_row, src_row) in dst.rows_iter_mut().zip(src.rows_iter()) {
3644          for (out, input) in dst_row.iter_mut().zip(src_row) {
3645            *out = *input;
3646          }
3647        }
3648      }
3649    } else {
3650      // rdo lf and write
3651      check_lf_queue(
3652        fi,
3653        ts,
3654        &mut cw,
3655        &mut w,
3656        &mut sbs_q,
3657        &mut last_lru_ready,
3658        &mut last_lru_rdoed,
3659        &mut last_lru_coded,
3660        false,
3661      );
3662    }
3663  }
3664
3665  assert!(
3666    sbs_q.is_empty(),
3667    "Superblock queue not empty in tile at offset {}:{}",
3668    ts.sbo.0.x,
3669    ts.sbo.0.y
3670  );
3671  (w.done(), enc_stats)
3672}
3673
3674#[allow(unused)]
3675fn write_tile_group_header(tile_start_and_end_present_flag: bool) -> Vec<u8> {
3676  let mut buf = Vec::new();
3677  {
3678    let mut bw = BitWriter::endian(&mut buf, BigEndian);
3679    bw.write_bit(tile_start_and_end_present_flag).unwrap();
3680    bw.byte_align().unwrap();
3681  }
3682  buf
3683}
3684
3685/// Write a packet containing only the placeholder that tells the decoder
3686/// to present the already decoded frame present at `frame_to_show_map_idx`
3687///
3688/// See `av1-spec` Section 6.8.2 and 7.18.
3689///
3690/// # Panics
3691///
3692/// - If the frame packets cannot be written
3693#[profiling::function]
3694pub fn encode_show_existing_frame<T: Pixel>(
3695  fi: &FrameInvariants<T>, fs: &mut FrameState<T>, inter_cfg: &InterConfig,
3696) -> Vec<u8> {
3697  debug_assert!(fi.is_show_existing_frame());
3698  let obu_extension = 0;
3699
3700  let mut packet = Vec::new();
3701
3702  if fi.frame_type == FrameType::KEY {
3703    write_key_frame_obus(&mut packet, fi, obu_extension).unwrap();
3704  }
3705
3706  for t35 in fi.t35_metadata.iter() {
3707    let mut t35_buf = Vec::new();
3708    let mut t35_bw = BitWriter::endian(&mut t35_buf, BigEndian);
3709    t35_bw.write_t35_metadata_obu(t35).unwrap();
3710    packet.write_all(&t35_buf).unwrap();
3711    t35_buf.clear();
3712  }
3713
3714  let mut buf1 = Vec::new();
3715  let mut buf2 = Vec::new();
3716  {
3717    let mut bw2 = BitWriter::endian(&mut buf2, BigEndian);
3718    bw2.write_frame_header_obu(fi, fs, inter_cfg).unwrap();
3719  }
3720
3721  {
3722    let mut bw1 = BitWriter::endian(&mut buf1, BigEndian);
3723    bw1.write_obu_header(ObuType::OBU_FRAME_HEADER, obu_extension).unwrap();
3724  }
3725  packet.write_all(&buf1).unwrap();
3726  buf1.clear();
3727
3728  {
3729    let mut bw1 = BitWriter::endian(&mut buf1, BigEndian);
3730    bw1.write_uleb128(buf2.len() as u64).unwrap();
3731  }
3732  packet.write_all(&buf1).unwrap();
3733  buf1.clear();
3734
3735  packet.write_all(&buf2).unwrap();
3736  buf2.clear();
3737
3738  let map_idx = fi.frame_to_show_map_idx as usize;
3739  if let Some(ref rec) = fi.rec_buffer.frames[map_idx] {
3740    let fs_rec = Arc::get_mut(&mut fs.rec).unwrap();
3741    let planes =
3742      if fi.sequence.chroma_sampling == ChromaSampling::Cs400 { 1 } else { 3 };
3743    for p in 0..planes {
3744      fs_rec.planes[p].data.copy_from_slice(&rec.frame.planes[p].data);
3745    }
3746  }
3747  packet
3748}
3749
3750fn get_initial_segmentation<T: Pixel>(
3751  fi: &FrameInvariants<T>,
3752) -> SegmentationState {
3753  let segmentation = if fi.primary_ref_frame == PRIMARY_REF_NONE {
3754    None
3755  } else {
3756    let ref_frame_idx = fi.ref_frames[fi.primary_ref_frame as usize] as usize;
3757    let ref_frame = fi.rec_buffer.frames[ref_frame_idx].as_ref();
3758    ref_frame.map(|rec| rec.segmentation)
3759  };
3760
3761  // return the retrieved instance if any, a new one otherwise
3762  segmentation.unwrap_or_default()
3763}
3764
3765/// # Panics
3766///
3767/// - If the frame packets cannot be written
3768#[profiling::function]
3769pub fn encode_frame<T: Pixel>(
3770  fi: &FrameInvariants<T>, fs: &mut FrameState<T>, inter_cfg: &InterConfig,
3771) -> Vec<u8> {
3772  debug_assert!(!fi.is_show_existing_frame());
3773  let obu_extension = 0;
3774
3775  let mut packet = Vec::new();
3776
3777  if fi.enable_segmentation {
3778    fs.segmentation = get_initial_segmentation(fi);
3779    segmentation_optimize(fi, fs);
3780  }
3781  let tile_group = encode_tile_group(fi, fs, inter_cfg);
3782
3783  if fi.frame_type == FrameType::KEY {
3784    write_key_frame_obus(&mut packet, fi, obu_extension).unwrap();
3785  }
3786
3787  for t35 in fi.t35_metadata.iter() {
3788    let mut t35_buf = Vec::new();
3789    let mut t35_bw = BitWriter::endian(&mut t35_buf, BigEndian);
3790    t35_bw.write_t35_metadata_obu(t35).unwrap();
3791    packet.write_all(&t35_buf).unwrap();
3792    t35_buf.clear();
3793  }
3794
3795  let mut buf1 = Vec::new();
3796  let mut buf2 = Vec::new();
3797  {
3798    let mut bw2 = BitWriter::endian(&mut buf2, BigEndian);
3799    bw2.write_frame_header_obu(fi, fs, inter_cfg).unwrap();
3800  }
3801
3802  {
3803    let mut bw1 = BitWriter::endian(&mut buf1, BigEndian);
3804    bw1.write_obu_header(ObuType::OBU_FRAME, obu_extension).unwrap();
3805  }
3806  packet.write_all(&buf1).unwrap();
3807  buf1.clear();
3808
3809  {
3810    let mut bw1 = BitWriter::endian(&mut buf1, BigEndian);
3811    bw1.write_uleb128((buf2.len() + tile_group.len()) as u64).unwrap();
3812  }
3813  packet.write_all(&buf1).unwrap();
3814  buf1.clear();
3815
3816  packet.write_all(&buf2).unwrap();
3817  buf2.clear();
3818
3819  packet.write_all(&tile_group).unwrap();
3820  packet
3821}
3822
3823pub fn update_rec_buffer<T: Pixel>(
3824  output_frameno: u64, fi: &mut FrameInvariants<T>, fs: &FrameState<T>,
3825) {
3826  let rfs = Arc::new(ReferenceFrame {
3827    order_hint: fi.order_hint,
3828    width: fi.width as u32,
3829    height: fi.height as u32,
3830    render_width: fi.render_width,
3831    render_height: fi.render_height,
3832    frame: fs.rec.clone(),
3833    input_hres: fs.input_hres.clone(),
3834    input_qres: fs.input_qres.clone(),
3835    cdfs: fs.cdfs,
3836    frame_me_stats: fs.frame_me_stats.clone(),
3837    output_frameno,
3838    segmentation: fs.segmentation,
3839  });
3840  for i in 0..REF_FRAMES {
3841    if (fi.refresh_frame_flags & (1 << i)) != 0 {
3842      fi.rec_buffer.frames[i] = Some(Arc::clone(&rfs));
3843      fi.rec_buffer.deblock[i] = fs.deblock;
3844    }
3845  }
3846}
3847
3848#[cfg(test)]
3849mod test {
3850  use super::*;
3851
3852  #[test]
3853  fn check_partition_types_order() {
3854    assert_eq!(
3855      RAV1E_PARTITION_TYPES[RAV1E_PARTITION_TYPES.len() - 1],
3856      PartitionType::PARTITION_SPLIT
3857    );
3858  }
3859}