rav1e/api/config/
encoder.rs

1// Copyright (c) 2020-2022, The rav1e contributors. All rights reserved
2//
3// This source code is subject to the terms of the BSD 2 Clause License and
4// the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
5// was not distributed with this source code in the LICENSE file, you can
6// obtain it at www.aomedia.org/license/software. If the Alliance for Open
7// Media Patent License 1.0 was not distributed with this source code in the
8// PATENTS file, you can obtain it at www.aomedia.org/license/patent.
9
10use itertools::*;
11
12use crate::api::color::*;
13use crate::api::config::GrainTableSegment;
14use crate::api::{Rational, SpeedSettings};
15use crate::encoder::Tune;
16use crate::serialize::{Deserialize, Serialize};
17
18use std::fmt;
19
20// We add 1 to rdo_lookahead_frames in a bunch of places.
21pub(crate) const MAX_RDO_LOOKAHEAD_FRAMES: usize = usize::MAX - 1;
22// Due to the math in RCState::new() regarding the reservoir frame delay.
23pub(crate) const MAX_MAX_KEY_FRAME_INTERVAL: u64 = i32::MAX as u64 / 3;
24
25/// Encoder settings which impact the produced bitstream.
26#[derive(Clone, Debug, Serialize, Deserialize)]
27pub struct EncoderConfig {
28  // output size
29  /// Width of the frames in pixels.
30  pub width: usize,
31  /// Height of the frames in pixels.
32  pub height: usize,
33  /// Sample aspect ratio (for anamorphic video).
34  pub sample_aspect_ratio: Rational,
35  /// Video time base.
36  pub time_base: Rational,
37
38  // data format and ancillary color information
39  /// Bit depth.
40  pub bit_depth: usize,
41  /// Chroma subsampling.
42  pub chroma_sampling: ChromaSampling,
43  /// Chroma sample position.
44  pub chroma_sample_position: ChromaSamplePosition,
45  /// Pixel value range.
46  pub pixel_range: PixelRange,
47  /// Content color description (primaries, transfer characteristics, matrix).
48  pub color_description: Option<ColorDescription>,
49  /// HDR mastering display parameters.
50  pub mastering_display: Option<MasteringDisplay>,
51  /// HDR content light parameters.
52  pub content_light: Option<ContentLight>,
53
54  /// AV1 level index to target (0-31).
55  /// If None, allow the encoder to decide.
56  /// Currently, rav1e is unable to guarantee that the output bitstream
57  /// meets the rate limitations of the specified level.
58  pub level_idx: Option<u8>,
59
60  /// Enable signaling timing info in the bitstream.
61  pub enable_timing_info: bool,
62
63  /// Still picture mode flag.
64  pub still_picture: bool,
65
66  /// Flag to force all frames to be error resilient.
67  pub error_resilient: bool,
68
69  /// Interval between switch frames (0 to disable)
70  pub switch_frame_interval: u64,
71
72  // encoder configuration
73  /// The *minimum* interval between two keyframes
74  pub min_key_frame_interval: u64,
75  /// The *maximum* interval between two keyframes
76  pub max_key_frame_interval: u64,
77  /// The number of temporal units over which to distribute the reservoir
78  /// usage.
79  pub reservoir_frame_delay: Option<i32>,
80  /// Flag to enable low latency mode.
81  ///
82  /// In this mode the frame reordering is disabled.
83  pub low_latency: bool,
84  /// The base quantizer to use.
85  pub quantizer: usize,
86  /// The minimum allowed base quantizer to use in bitrate mode.
87  pub min_quantizer: u8,
88  /// The target bitrate for the bitrate mode.
89  pub bitrate: i32,
90  /// Metric to tune the quality for.
91  pub tune: Tune,
92  /// Parameters for grain synthesis.
93  pub film_grain_params: Option<Vec<GrainTableSegment>>,
94  /// Number of tiles horizontally. Must be a power of two.
95  ///
96  /// Overridden by [`tiles`], if present.
97  ///
98  /// [`tiles`]: #structfield.tiles
99  pub tile_cols: usize,
100  /// Number of tiles vertically. Must be a power of two.
101  ///
102  /// Overridden by [`tiles`], if present.
103  ///
104  /// [`tiles`]: #structfield.tiles
105  pub tile_rows: usize,
106  /// Total number of tiles desired.
107  ///
108  /// Encoder will try to optimally split to reach this number of tiles,
109  /// rounded up. Overrides [`tile_cols`] and [`tile_rows`].
110  ///
111  /// [`tile_cols`]: #structfield.tile_cols
112  /// [`tile_rows`]: #structfield.tile_rows
113  pub tiles: usize,
114
115  /// Settings which affect the encoding speed vs. quality trade-off.
116  pub speed_settings: SpeedSettings,
117}
118
119/// Default preset for `EncoderConfig`: it is a balance between quality and
120/// speed. See [`with_speed_preset()`].
121///
122/// [`with_speed_preset()`]: struct.EncoderConfig.html#method.with_speed_preset
123impl Default for EncoderConfig {
124  fn default() -> Self {
125    const DEFAULT_SPEED: u8 = 6;
126    Self::with_speed_preset(DEFAULT_SPEED)
127  }
128}
129
130impl EncoderConfig {
131  /// This is a preset which provides default settings according to a speed
132  /// value in the specific range 0–10. Each speed value corresponds to a
133  /// different preset. See [`from_preset()`]. If the input value is greater
134  /// than 10, it will result in the same settings as 10.
135  ///
136  /// [`from_preset()`]: struct.SpeedSettings.html#method.from_preset
137  pub fn with_speed_preset(speed: u8) -> Self {
138    EncoderConfig {
139      width: 640,
140      height: 480,
141      sample_aspect_ratio: Rational { num: 1, den: 1 },
142      time_base: Rational { num: 1, den: 30 },
143
144      bit_depth: 8,
145      chroma_sampling: ChromaSampling::Cs420,
146      chroma_sample_position: ChromaSamplePosition::Unknown,
147      pixel_range: Default::default(),
148      color_description: None,
149      mastering_display: None,
150      content_light: None,
151
152      level_idx: None,
153
154      enable_timing_info: false,
155
156      still_picture: false,
157
158      error_resilient: false,
159      switch_frame_interval: 0,
160
161      min_key_frame_interval: 12,
162      max_key_frame_interval: 240,
163      min_quantizer: 0,
164      reservoir_frame_delay: None,
165      low_latency: false,
166      quantizer: 100,
167      bitrate: 0,
168      tune: Tune::default(),
169      film_grain_params: None,
170      tile_cols: 0,
171      tile_rows: 0,
172      tiles: 0,
173      speed_settings: SpeedSettings::from_preset(speed),
174    }
175  }
176
177  /// Sets the minimum and maximum keyframe interval, handling special cases as needed.
178  pub fn set_key_frame_interval(
179    &mut self, min_interval: u64, max_interval: u64,
180  ) {
181    self.min_key_frame_interval = min_interval;
182
183    // Map an input value of 0 to an infinite interval
184    self.max_key_frame_interval = if max_interval == 0 {
185      MAX_MAX_KEY_FRAME_INTERVAL
186    } else {
187      max_interval
188    };
189  }
190
191  /// Returns the video frame rate computed from [`time_base`].
192  ///
193  /// [`time_base`]: #structfield.time_base
194  pub fn frame_rate(&self) -> f64 {
195    Rational::from_reciprocal(self.time_base).as_f64()
196  }
197
198  /// Computes the render width and height of the stream based
199  /// on [`width`], [`height`], and [`sample_aspect_ratio`].
200  ///
201  /// [`width`]: #structfield.width
202  /// [`height`]: #structfield.height
203  /// [`sample_aspect_ratio`]: #structfield.sample_aspect_ratio
204  pub fn render_size(&self) -> (usize, usize) {
205    let sar = self.sample_aspect_ratio.as_f64();
206
207    if sar > 1.0 {
208      ((self.width as f64 * sar).round() as usize, self.height)
209    } else {
210      (self.width, (self.height as f64 / sar).round() as usize)
211    }
212  }
213
214  /// Is temporal RDO enabled ?
215  #[inline]
216  pub const fn temporal_rdo(&self) -> bool {
217    // Note: This function is called frequently, unlike most other functions here.
218
219    // `compute_distortion_scale` computes a scaling factor for the distortion
220    // of an 8x8 block (4x4 blocks simply use the scaling of the enclosing 8x8
221    // block). As long as distortion is always computed on <= 8x8 blocks, this
222    // has the property that the scaled distortion of a 2Nx2N block is always
223    // equal to the sum of the scaled distortions of the NxN sub-blocks it's
224    // made of, this is a necessary property to be able to do RDO between
225    // multiple partition sizes properly. Unfortunately, when tx domain
226    // distortion is used, distortion is only known at the tx block level which
227    // might be bigger than 8x8. So temporal RDO is always disabled in that case.
228    !self.speed_settings.transform.tx_domain_distortion
229  }
230
231  /// Describes whether the output is targeted as HDR
232  pub fn is_hdr(&self) -> bool {
233    self
234      .color_description
235      .map(|colors| {
236        colors.transfer_characteristics == TransferCharacteristics::SMPTE2084
237      })
238      .unwrap_or(false)
239  }
240
241  pub(crate) fn get_film_grain_at(
242    &self, timestamp: u64,
243  ) -> Option<&GrainTableSegment> {
244    self.film_grain_params.as_ref().and_then(|entries| {
245      entries.iter().find(|entry| {
246        timestamp >= entry.start_time && timestamp < entry.end_time
247      })
248    })
249  }
250
251  pub(crate) fn get_film_grain_mut_at(
252    &mut self, timestamp: u64,
253  ) -> Option<&mut GrainTableSegment> {
254    self.film_grain_params.as_mut().and_then(|entries| {
255      entries.iter_mut().find(|entry| {
256        timestamp >= entry.start_time && timestamp < entry.end_time
257      })
258    })
259  }
260}
261
262impl fmt::Display for EncoderConfig {
263  fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> {
264    let pairs = [
265      ("keyint_min", self.min_key_frame_interval.to_string()),
266      ("keyint_max", self.max_key_frame_interval.to_string()),
267      ("quantizer", self.quantizer.to_string()),
268      ("bitrate", self.bitrate.to_string()),
269      ("min_quantizer", self.min_quantizer.to_string()),
270      ("low_latency", self.low_latency.to_string()),
271      ("tune", self.tune.to_string()),
272      (
273        "rdo_lookahead_frames",
274        self.speed_settings.rdo_lookahead_frames.to_string(),
275      ),
276      (
277        "multiref",
278        (!self.low_latency || self.speed_settings.multiref).to_string(),
279      ),
280      ("fast_deblock", self.speed_settings.fast_deblock.to_string()),
281      (
282        "scene_detection_mode",
283        self.speed_settings.scene_detection_mode.to_string(),
284      ),
285      ("cdef", self.speed_settings.cdef.to_string()),
286      ("lrf", self.speed_settings.lrf.to_string()),
287      ("enable_timing_info", self.enable_timing_info.to_string()),
288      (
289        "min_block_size",
290        self.speed_settings.partition.partition_range.min.to_string(),
291      ),
292      (
293        "max_block_size",
294        self.speed_settings.partition.partition_range.max.to_string(),
295      ),
296      (
297        "encode_bottomup",
298        self.speed_settings.partition.encode_bottomup.to_string(),
299      ),
300      (
301        "non_square_partition_max_threshold",
302        self
303          .speed_settings
304          .partition
305          .non_square_partition_max_threshold
306          .to_string(),
307      ),
308      (
309        "reduced_tx_set",
310        self.speed_settings.transform.reduced_tx_set.to_string(),
311      ),
312      (
313        "tx_domain_distortion",
314        self.speed_settings.transform.tx_domain_distortion.to_string(),
315      ),
316      (
317        "tx_domain_rate",
318        self.speed_settings.transform.tx_domain_rate.to_string(),
319      ),
320      (
321        "rdo_tx_decision",
322        self.speed_settings.transform.rdo_tx_decision.to_string(),
323      ),
324      (
325        "prediction_modes",
326        self.speed_settings.prediction.prediction_modes.to_string(),
327      ),
328      (
329        "fine_directional_intra",
330        self.speed_settings.prediction.fine_directional_intra.to_string(),
331      ),
332      (
333        "include_near_mvs",
334        self.speed_settings.motion.include_near_mvs.to_string(),
335      ),
336      (
337        "use_satd_subpel",
338        self.speed_settings.motion.use_satd_subpel.to_string(),
339      ),
340    ];
341    write!(
342      f,
343      "{}",
344      pairs.iter().map(|pair| format!("{}={}", pair.0, pair.1)).join(" ")
345    )
346  }
347}