Skip to main content

rav1e/
rate.rs

1// Copyright (c) 2019-2022, The rav1e contributors. All rights reserved
2//
3// This source code is subject to the terms of the BSD 2 Clause License and
4// the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
5// was not distributed with this source code in the LICENSE file, you can
6// obtain it at www.aomedia.org/license/software. If the Alliance for Open
7// Media Patent License 1.0 was not distributed with this source code in the
8// PATENTS file, you can obtain it at www.aomedia.org/license/patent.
9
10use std::cmp;
11
12use crate::api::color::ChromaSampling;
13use crate::api::ContextInner;
14use crate::encoder::TEMPORAL_DELIMITER;
15use crate::quantize::{ac_q, dc_q, select_ac_qi, select_dc_qi};
16use crate::util::{
17  bexp64, bexp_q24, blog64, clamp, q24_to_q57, q57, q57_to_q24, Pixel,
18};
19
20// The number of frame sub-types for which we track distinct parameters.
21// This does not include FRAME_SUBTYPE_SEF, because we don't need to do any
22//  parameter tracking for Show Existing Frame frames.
23pub const FRAME_NSUBTYPES: usize = 4;
24
25pub const FRAME_SUBTYPE_I: usize = 0;
26pub const FRAME_SUBTYPE_P: usize = 1;
27#[allow(unused)]
28pub const FRAME_SUBTYPE_B0: usize = 2;
29#[allow(unused)]
30pub const FRAME_SUBTYPE_B1: usize = 3;
31pub const FRAME_SUBTYPE_SEF: usize = 4;
32
33const PASS_SINGLE: i32 = 0;
34const PASS_1: i32 = 1;
35const PASS_2: i32 = 2;
36const PASS_2_PLUS_1: i32 = 3;
37
38// Magic value at the start of the 2-pass stats file
39const TWOPASS_MAGIC: i32 = 0x50324156;
40// Version number for the 2-pass stats file
41const TWOPASS_VERSION: i32 = 1;
42// 4 byte magic + 4 byte version + 4 byte TU count + 4 byte SEF frame count
43//  + FRAME_NSUBTYPES*(4 byte frame count + 1 byte exp + 8 byte scale_sum)
44pub(crate) const TWOPASS_HEADER_SZ: usize = 16 + FRAME_NSUBTYPES * (4 + 1 + 8);
45// 4 byte frame type (show_frame and fti jointly coded) + 4 byte log_scale_q24
46const TWOPASS_PACKET_SZ: usize = 8;
47
48const SEF_BITS: i64 = 24;
49
50// The scale of AV1 quantizer tables (relative to the pixel domain), i.e., Q3.
51pub(crate) const QSCALE: i32 = 3;
52
53// We clamp the actual I and B frame delays to a minimum of 10 to work
54//  within the range of values where later incrementing the delay works as
55//  designed.
56// 10 is not an exact choice, but rather a good working trade-off.
57const INTER_DELAY_TARGET_MIN: i32 = 10;
58
59// The base quantizer for a frame is adjusted based on the frame type using the
60//  formula (log_qp*mqp + dqp), where log_qp is the base-2 logarithm of the
61//  "linear" quantizer (the actual factor by which coefficients are divided).
62// Because log_qp has an implicit offset built in based on the scale of the
63//  coefficients (which depends on the pixel bit depth and the transform
64//  scale), we normalize the quantizer to the equivalent for 8-bit pixels with
65//  orthonormal transforms for the purposes of rate modeling.
66const MQP_Q12: &[i32; FRAME_NSUBTYPES] = &[
67  // TODO: Use a const function once f64 operations in const functions are
68  //  stable.
69  (1.0 * (1 << 12) as f64) as i32,
70  (1.0 * (1 << 12) as f64) as i32,
71  (1.0 * (1 << 12) as f64) as i32,
72  (1.0 * (1 << 12) as f64) as i32,
73];
74
75// The ratio 33_810_170.0 / 86_043_287.0 was derived by approximating the median
76// of a change of 15 quantizer steps in the quantizer tables.
77const DQP_Q57: &[i64; FRAME_NSUBTYPES] = &[
78  (-(33_810_170.0 / 86_043_287.0) * (1i64 << 57) as f64) as i64,
79  (0.0 * (1i64 << 57) as f64) as i64,
80  ((33_810_170.0 / 86_043_287.0) * (1i64 << 57) as f64) as i64,
81  (2.0 * (33_810_170.0 / 86_043_287.0) * (1i64 << 57) as f64) as i64,
82];
83
84// For 8-bit-depth inter frames, log_q_y is derived from log_target_q with a
85//  linear model:
86//  log_q_y = log_target_q + (log_target_q >> 32) * Q_MODEL_MUL + Q_MODEL_ADD
87// Derivation of the linear models:
88//  https://github.com/xiph/rav1e/blob/d02bdbd3b0b7b2cb9fc301031cc6a4e67a567a5c/doc/quantizer-weight-analysis.ipynb
89#[rustfmt::skip]
90const Q_MODEL_ADD: [i64; 4] = [
91  // 4:2:0
92  -0x24_4FE7_ECB3_DD90,
93  // 4:2:2
94  -0x37_41DA_38AD_0924,
95  // 4:4:4
96  -0x70_83BD_A626_311C,
97  // 4:0:0
98  0,
99];
100#[rustfmt::skip]
101const Q_MODEL_MUL: [i64; 4] = [
102  // 4:2:0
103  0x8A0_50DD,
104  // 4:2:2
105  0x887_7666,
106  // 4:4:4
107  0x8D4_A712,
108  // 4:0:0
109  0,
110];
111
112#[rustfmt::skip]
113const ROUGH_TAN_LOOKUP: &[u16; 18] = &[
114     0,   358,   722,  1098,  1491,  1910,
115  2365,  2868,  3437,  4096,  4881,  5850,
116  7094,  8784, 11254, 15286, 23230, 46817
117];
118
119// A digital approximation of a 2nd-order low-pass Bessel follower.
120// We use this for rate control because it has fast reaction time, but is
121//  critically damped.
122pub struct IIRBessel2 {
123  c: [i32; 2],
124  g: i32,
125  x: [i32; 2],
126  y: [i32; 2],
127}
128
129// alpha is Q24 in the range [0,0.5).
130// The return value is 5.12.
131fn warp_alpha(alpha: i32) -> i32 {
132  let i = ((alpha * 36) >> 24).min(16);
133  let t0 = ROUGH_TAN_LOOKUP[i as usize];
134  let t1 = ROUGH_TAN_LOOKUP[i as usize + 1];
135  let d = alpha * 36 - (i << 24);
136  ((((t0 as i64) << 32) + (((t1 - t0) << 8) as i64) * (d as i64)) >> 32) as i32
137}
138
139// Compute Bessel filter coefficients with the specified delay.
140// Return: Filter parameters (c[0], c[1], g).
141fn iir_bessel2_get_parameters(delay: i32) -> (i32, i32, i32) {
142  // This borrows some code from an unreleased version of Postfish.
143  // See the recipe at http://unicorn.us.com/alex/2polefilters.html for details
144  //  on deriving the filter coefficients.
145  // alpha is Q24
146  let alpha = (1 << 24) / delay;
147  // warp is 7.12 (5.12? the max value is 70386 in Q12).
148  let warp = warp_alpha(alpha).max(1) as i64;
149  // k1 is 9.12 (6.12?)
150  let k1 = 3 * warp;
151  // k2 is 16.24 (11.24?)
152  let k2 = k1 * warp;
153  // d is 16.15 (10.15?)
154  let d = ((((1 << 12) + k1) << 12) + k2 + 256) >> 9;
155  // a is 0.32, since d is larger than both 1.0 and k2
156  let a = (k2 << 23) / d;
157  // ik2 is 25.24
158  let ik2 = (1i64 << 48) / k2;
159  // b1 is Q56; in practice, the integer ranges between -2 and 2.
160  let b1 = 2 * a * (ik2 - (1i64 << 24));
161  // b2 is Q56; in practice, the integer ranges between -2 and 2.
162  let b2 = (1i64 << 56) - ((4 * a) << 24) - b1;
163  // All of the filter parameters are Q24.
164  (
165    ((b1 + (1i64 << 31)) >> 32) as i32,
166    ((b2 + (1i64 << 31)) >> 32) as i32,
167    ((a + 128) >> 8) as i32,
168  )
169}
170
171impl IIRBessel2 {
172  pub fn new(delay: i32, value: i32) -> IIRBessel2 {
173    let (c0, c1, g) = iir_bessel2_get_parameters(delay);
174    IIRBessel2 { c: [c0, c1], g, x: [value, value], y: [value, value] }
175  }
176
177  // Re-initialize Bessel filter coefficients with the specified delay.
178  // This does not alter the x/y state, but changes the reaction time of the
179  //  filter.
180  // Altering the time constant of a reactive filter without altering internal
181  //  state is something that has to be done carefully, but our design operates
182  //  at high enough delays and with small enough time constant changes to make
183  //  it safe.
184  pub fn reinit(&mut self, delay: i32) {
185    let (c0, c1, g) = iir_bessel2_get_parameters(delay);
186    self.c[0] = c0;
187    self.c[1] = c1;
188    self.g = g;
189  }
190
191  pub fn update(&mut self, x: i32) -> i32 {
192    let c0 = self.c[0] as i64;
193    let c1 = self.c[1] as i64;
194    let g = self.g as i64;
195    let x0 = self.x[0] as i64;
196    let x1 = self.x[1] as i64;
197    let y0 = self.y[0] as i64;
198    let y1 = self.y[1] as i64;
199    let ya =
200      ((((x as i64) + x0 * 2 + x1) * g + y0 * c0 + y1 * c1 + (1i64 << 23))
201        >> 24) as i32;
202    self.x[1] = self.x[0];
203    self.x[0] = x;
204    self.y[1] = self.y[0];
205    self.y[0] = ya;
206    ya
207  }
208}
209
210#[derive(Copy, Clone)]
211struct RCFrameMetrics {
212  // The log base 2 of the scale factor for this frame in Q24 format.
213  log_scale_q24: i32,
214  // The frame type from pass 1
215  fti: usize,
216  // Whether or not the frame was hidden in pass 1
217  show_frame: bool,
218  // TODO: The input frame number corresponding to this frame in the input.
219  // input_frameno: u32
220  // TODO vfr: PTS
221}
222
223impl RCFrameMetrics {
224  const fn new() -> RCFrameMetrics {
225    RCFrameMetrics { log_scale_q24: 0, fti: 0, show_frame: false }
226  }
227}
228
229/// Rate control pass summary
230///
231/// It contains encoding information related to the whole previous
232/// encoding pass.
233#[derive(Debug, Default, Clone)]
234pub struct RCSummary {
235  pub(crate) ntus: i32,
236  nframes: [i32; FRAME_NSUBTYPES + 1],
237  exp: [u8; FRAME_NSUBTYPES],
238  scale_sum: [i64; FRAME_NSUBTYPES],
239  pub(crate) total: i32,
240}
241
242// Backing storage to deserialize Summary and Per-Frame pass data
243//
244// Can store up to a full header size since it is the largest of the two
245// packet kinds.
246pub(crate) struct RCDeserialize {
247  // The current byte position in the frame metrics buffer.
248  pass2_buffer_pos: usize,
249  // In pass 2, this represents the number of bytes that are available in the
250  //  input buffer.
251  pass2_buffer_fill: usize,
252  // Buffer for current frame metrics in pass 2.
253  pass2_buffer: [u8; TWOPASS_HEADER_SZ],
254}
255
256impl Default for RCDeserialize {
257  fn default() -> Self {
258    RCDeserialize {
259      pass2_buffer: [0; TWOPASS_HEADER_SZ],
260      pass2_buffer_pos: 0,
261      pass2_buffer_fill: 0,
262    }
263  }
264}
265
266impl RCDeserialize {
267  // Fill the backing storage by reading enough bytes from the
268  // buf slice until goal bytes are available for parsing.
269  //
270  // goal must be at most TWOPASS_HEADER_SZ.
271  pub(crate) fn buffer_fill(
272    &mut self, buf: &[u8], consumed: usize, goal: usize,
273  ) -> usize {
274    let mut consumed = consumed;
275    while self.pass2_buffer_fill < goal && consumed < buf.len() {
276      self.pass2_buffer[self.pass2_buffer_fill] = buf[consumed];
277      self.pass2_buffer_fill += 1;
278      consumed += 1;
279    }
280    consumed
281  }
282
283  // Read the next n bytes as i64.
284  // n must be within 1 and 8
285  fn unbuffer_val(&mut self, n: usize) -> i64 {
286    let mut bytes = n;
287    let mut ret = 0;
288    let mut shift = 0;
289    while bytes > 0 {
290      bytes -= 1;
291      ret |= (self.pass2_buffer[self.pass2_buffer_pos] as i64) << shift;
292      self.pass2_buffer_pos += 1;
293      shift += 8;
294    }
295    ret
296  }
297
298  // Read metrics for the next frame.
299  fn parse_metrics(&mut self) -> Result<RCFrameMetrics, String> {
300    debug_assert!(self.pass2_buffer_fill >= TWOPASS_PACKET_SZ);
301    let ft_val = self.unbuffer_val(4);
302    let show_frame = (ft_val >> 31) != 0;
303    let fti = (ft_val & 0x7FFFFFFF) as usize;
304    // Make sure the frame type is valid.
305    if fti > FRAME_NSUBTYPES {
306      return Err("Invalid frame type".to_string());
307    }
308    let log_scale_q24 = self.unbuffer_val(4) as i32;
309    Ok(RCFrameMetrics { log_scale_q24, fti, show_frame })
310  }
311
312  // Read the summary header data.
313  pub(crate) fn parse_summary(&mut self) -> Result<RCSummary, String> {
314    // check the magic value and version number.
315    if self.unbuffer_val(4) != TWOPASS_MAGIC as i64 {
316      return Err("Magic value mismatch".to_string());
317    }
318    if self.unbuffer_val(4) != TWOPASS_VERSION as i64 {
319      return Err("Version number mismatch".to_string());
320    }
321    let mut s =
322      RCSummary { ntus: self.unbuffer_val(4) as i32, ..Default::default() };
323
324    // Make sure the file claims to have at least one TU.
325    // Otherwise we probably got the placeholder data from an aborted
326    //  pass 1.
327    if s.ntus < 1 {
328      return Err("No TUs found in first pass summary".to_string());
329    }
330    let mut total: i32 = 0;
331    for nframes in s.nframes.iter_mut() {
332      let n = self.unbuffer_val(4) as i32;
333      if n < 0 {
334        return Err("Got negative frame count".to_string());
335      }
336      total = total
337        .checked_add(n)
338        .ok_or_else(|| "Frame count too large".to_string())?;
339
340      *nframes = n;
341    }
342
343    // We can't have more TUs than frames.
344    if s.ntus > total {
345      return Err("More TUs than frames".to_string());
346    }
347
348    s.total = total;
349
350    for exp in s.exp.iter_mut() {
351      *exp = self.unbuffer_val(1) as u8;
352    }
353
354    for scale_sum in s.scale_sum.iter_mut() {
355      *scale_sum = self.unbuffer_val(8);
356      if *scale_sum < 0 {
357        return Err("Got negative scale sum".to_string());
358      }
359    }
360    Ok(s)
361  }
362}
363
364pub struct RCState {
365  // The target bit-rate in bits per second.
366  target_bitrate: i32,
367  // The number of TUs over which to distribute the reservoir usage.
368  // We use TUs because in our leaky bucket model, we only add bits to the
369  //  reservoir on TU boundaries.
370  reservoir_frame_delay: i32,
371  // Whether or not the reservoir_frame_delay was explicitly specified by the
372  //  user, or is the default value.
373  reservoir_frame_delay_is_set: bool,
374  // The maximum quantizer index to allow (for the luma AC coefficients, other
375  //  quantizers will still be adjusted to match).
376  maybe_ac_qi_max: Option<u8>,
377  // The minimum quantizer index to allow (for the luma AC coefficients).
378  ac_qi_min: u8,
379  // Will we drop frames to meet bitrate requirements?
380  drop_frames: bool,
381  // Do we respect the maximum reservoir fullness?
382  cap_overflow: bool,
383  // Can the reservoir go negative?
384  cap_underflow: bool,
385  // The log of the first-pass base quantizer.
386  pass1_log_base_q: i64,
387  // Two-pass mode state.
388  // PASS_SINGLE => 1-pass encoding.
389  // PASS_1 => 1st pass of 2-pass encoding.
390  // PASS_2 => 2nd pass of 2-pass encoding.
391  // PASS_2_PLUS_1 => 2nd pass of 2-pass encoding, but also emitting pass 1
392  //  data again.
393  twopass_state: i32,
394  // The log of the number of pixels in a frame in Q57 format.
395  log_npixels: i64,
396  // The target average bits per Temporal Unit (input frame).
397  bits_per_tu: i64,
398  // The current bit reservoir fullness (bits available to be used).
399  reservoir_fullness: i64,
400  // The target buffer fullness.
401  // This is where we'd like to be by the last keyframe that appears in the
402  //  next reservoir_frame_delay frames.
403  reservoir_target: i64,
404  // The maximum buffer fullness (total size of the buffer).
405  reservoir_max: i64,
406  // The log of estimated scale factor for the rate model in Q57 format.
407  //
408  // TODO: Convert to Q23 or figure out a better way to avoid overflow
409  // once 2-pass mode is introduced, if required.
410  log_scale: [i64; FRAME_NSUBTYPES],
411  // The exponent used in the rate model in Q6 format.
412  exp: [u8; FRAME_NSUBTYPES],
413  // The log of an estimated scale factor used to obtain the real framerate,
414  //  for VFR sources or, e.g., 12 fps content doubled to 24 fps, etc.
415  // TODO vfr: log_vfr_scale: i64,
416  // Second-order lowpass filters to track scale and VFR.
417  scalefilter: [IIRBessel2; FRAME_NSUBTYPES],
418  // TODO vfr: vfrfilter: IIRBessel2,
419  // The number of frames of each type we have seen, for filter adaptation
420  //  purposes.
421  // These are only 32 bits to guarantee that we can sum the scales over the
422  //  whole file without overflow in a 64-bit int.
423  // That limits us to 2.268 years at 60 fps (minus 33% with re-ordering).
424  nframes: [i32; FRAME_NSUBTYPES + 1],
425  inter_delay: [i32; FRAME_NSUBTYPES - 1],
426  inter_delay_target: i32,
427  // The total accumulated estimation bias.
428  rate_bias: i64,
429  // The number of (non-Show Existing Frame) frames that have been encoded.
430  nencoded_frames: i64,
431  // The number of Show Existing Frames that have been emitted.
432  nsef_frames: i64,
433  // Buffer for current frame metrics in pass 1.
434  pass1_buffer: [u8; TWOPASS_HEADER_SZ],
435  // Whether or not the user has retrieved the pass 1 data for the last frame.
436  // For PASS_1 or PASS_2_PLUS_1 encoding, this is set to false after each
437  //  frame is encoded, and must be set to true by calling twopass_out() before
438  //  the next frame can be encoded.
439  pub pass1_data_retrieved: bool,
440  // Marks whether or not the user has retrieved the summary data at the end of
441  //  the encode.
442  pass1_summary_retrieved: bool,
443  // Whether or not the user has provided enough data to encode in the second
444  //  pass.
445  // For PASS_2 or PASS_2_PLUS_1 encoding, this is set to false after each
446  //  frame, and must be set to true by calling twopass_in() before the next
447  //  frame can be encoded.
448  pass2_data_ready: bool,
449  // TODO: Add a way to force the next frame to be a keyframe in 2-pass mode.
450  // Right now we are relying on keyframe detection to detect the same
451  //  keyframes.
452  // The metrics for the previous frame.
453  prev_metrics: RCFrameMetrics,
454  // The metrics for the current frame.
455  cur_metrics: RCFrameMetrics,
456  // The buffered metrics for future frames.
457  frame_metrics: Vec<RCFrameMetrics>,
458  // The total number of frames still in use in the circular metric buffer.
459  nframe_metrics: usize,
460  // The index of the current frame in the circular metric buffer.
461  frame_metrics_head: usize,
462  // Data deserialization
463  des: RCDeserialize,
464  // The TU count encoded so far.
465  ntus: i32,
466  // The TU count for the whole file.
467  ntus_total: i32,
468  // The remaining TU count.
469  ntus_left: i32,
470  // The frame count of each frame subtype in the whole file.
471  nframes_total: [i32; FRAME_NSUBTYPES + 1],
472  // The sum of those counts.
473  nframes_total_total: i32,
474  // The number of frames of each subtype yet to be processed.
475  nframes_left: [i32; FRAME_NSUBTYPES + 1],
476  // The sum of the scale values for each frame subtype.
477  scale_sum: [i64; FRAME_NSUBTYPES],
478  // The number of TUs represented by the current scale sums.
479  scale_window_ntus: i32,
480  // The frame count of each frame subtype in the current scale window.
481  scale_window_nframes: [i32; FRAME_NSUBTYPES + 1],
482  // The sum of the scale values for each frame subtype in the current window.
483  scale_window_sum: [i64; FRAME_NSUBTYPES],
484}
485
486// TODO: Separate qi values for each color plane.
487pub struct QuantizerParameters {
488  // The full-precision, unmodulated log quantizer upon which our modulated
489  //  quantizer indices are based.
490  // This is only used to limit sudden quality changes from frame to frame, and
491  //  as such is not adjusted when we encounter buffer overrun or underrun.
492  pub log_base_q: i64,
493  // The full-precision log quantizer modulated by the current frame type upon
494  //  which our quantizer indices are based (including any adjustments to
495  //  prevent buffer overrun or underrun).
496  // This is used when estimating the scale parameter once we know the actual
497  //  bit usage of a frame.
498  pub log_target_q: i64,
499  pub dc_qi: [u8; 3],
500  pub ac_qi: [u8; 3],
501  pub lambda: f64,
502  pub dist_scale: [f64; 3],
503}
504
505const Q57_SQUARE_EXP_SCALE: f64 =
506  (2.0 * ::std::f64::consts::LN_2) / ((1i64 << 57) as f64);
507
508// Daala style log-offset for chroma quantizers
509// TODO: Optimal offsets for more configurations than just BT.709
510fn chroma_offset(
511  log_target_q: i64, chroma_sampling: ChromaSampling,
512) -> (i64, i64) {
513  let x = log_target_q.max(0);
514  // Gradient optimized for CIEDE2000+PSNR on subset3
515  let y = match chroma_sampling {
516    ChromaSampling::Cs400 => 0,
517    ChromaSampling::Cs420 => (x >> 2) + (x >> 6), // 0.266
518    ChromaSampling::Cs422 => (x >> 3) + (x >> 4) - (x >> 7), // 0.180
519    ChromaSampling::Cs444 => (x >> 4) + (x >> 5) + (x >> 8), // 0.098
520  };
521  // blog64(7) - blog64(4); blog64(5) - blog64(4)
522  (0x19D_5D9F_D501_0B37 - y, 0xA4_D3C2_5E68_DC58 - y)
523}
524
525impl QuantizerParameters {
526  fn new_from_log_q(
527    log_base_q: i64, log_target_q: i64, bit_depth: usize,
528    chroma_sampling: ChromaSampling, is_intra: bool,
529    log_isqrt_mean_scale: i64,
530  ) -> QuantizerParameters {
531    let scale = log_isqrt_mean_scale + q57(QSCALE + bit_depth as i32 - 8);
532
533    let mut log_q_y = log_target_q;
534    if !is_intra && bit_depth == 8 {
535      log_q_y = log_target_q
536        + (log_target_q >> 32) * Q_MODEL_MUL[chroma_sampling as usize]
537        + Q_MODEL_ADD[chroma_sampling as usize];
538    }
539
540    let quantizer = bexp64(log_q_y + scale);
541    let (offset_u, offset_v) =
542      chroma_offset(log_q_y + log_isqrt_mean_scale, chroma_sampling);
543    let mono = chroma_sampling == ChromaSampling::Cs400;
544    let log_q_u = log_q_y + offset_u;
545    let log_q_v = log_q_y + offset_v;
546    let quantizer_u = bexp64(log_q_u + scale);
547    let quantizer_v = bexp64(log_q_v + scale);
548    let lambda = (::std::f64::consts::LN_2 / 6.0)
549      * (((log_target_q + log_isqrt_mean_scale) as f64)
550        * Q57_SQUARE_EXP_SCALE)
551        .exp();
552
553    let scale = |q| bexp64((log_target_q - q) * 2 + q57(16)) as f64 / 65536.;
554    let dist_scale = [scale(log_q_y), scale(log_q_u), scale(log_q_v)];
555
556    let base_q_idx = select_ac_qi(quantizer, bit_depth).max(1);
557
558    // delta_q only gets 6 bits + a sign bit, so it can differ by 63 at most.
559    let min_qi = base_q_idx.saturating_sub(63).max(1);
560    let max_qi = base_q_idx.saturating_add(63);
561    let clamp_qi = |qi: u8| qi.clamp(min_qi, max_qi);
562
563    QuantizerParameters {
564      log_base_q,
565      log_target_q,
566      // TODO: Allow lossless mode; i.e. qi == 0.
567      dc_qi: [
568        clamp_qi(select_dc_qi(quantizer, bit_depth)),
569        if mono { 0 } else { clamp_qi(select_dc_qi(quantizer_u, bit_depth)) },
570        if mono { 0 } else { clamp_qi(select_dc_qi(quantizer_v, bit_depth)) },
571      ],
572      ac_qi: [
573        base_q_idx,
574        if mono { 0 } else { clamp_qi(select_ac_qi(quantizer_u, bit_depth)) },
575        if mono { 0 } else { clamp_qi(select_ac_qi(quantizer_v, bit_depth)) },
576      ],
577      lambda,
578      dist_scale,
579    }
580  }
581}
582
583impl RCState {
584  pub fn new(
585    frame_width: i32, frame_height: i32, framerate_num: i64,
586    framerate_den: i64, target_bitrate: i32, maybe_ac_qi_max: Option<u8>,
587    ac_qi_min: u8, max_key_frame_interval: i32,
588    maybe_reservoir_frame_delay: Option<i32>,
589  ) -> RCState {
590    // The default buffer size is set equal to 1.5x the keyframe interval, or 240
591    //  frames; whichever is smaller, with a minimum of 12.
592    // For user set values, we enforce a minimum of 12.
593    // The interval is short enough to allow reaction, but long enough to allow
594    //  looking into the next GOP (avoiding the case where the last frames
595    //  before an I-frame get starved), in most cases.
596    // The 12 frame minimum gives us some chance to distribute bit estimation
597    //  errors in the worst case.
598    let reservoir_frame_delay = maybe_reservoir_frame_delay
599      .unwrap_or_else(|| ((max_key_frame_interval * 3) >> 1).min(240))
600      .max(12);
601    // TODO: What are the limits on these?
602    let npixels = (frame_width as i64) * (frame_height as i64);
603    // Insane framerates or frame sizes mean insane bitrates.
604    // Let's not get carried away.
605    // We also subtract 16 bits from each temporal unit to account for the
606    //  temporal delimiter, whose bits are not included in the frame sizes
607    //  reported to update_state().
608    // TODO: Support constraints imposed by levels.
609    let bits_per_tu = clamp(
610      (target_bitrate as i64) * framerate_den / framerate_num,
611      40,
612      0x4000_0000_0000,
613    ) - (TEMPORAL_DELIMITER.len() * 8) as i64;
614    let reservoir_max = bits_per_tu * (reservoir_frame_delay as i64);
615    // Start with a buffer fullness and fullness target of 50%.
616    let reservoir_target = (reservoir_max + 1) >> 1;
617    // Pick exponents and initial scales for quantizer selection.
618    let ibpp = npixels / bits_per_tu;
619    // These have been derived by encoding many clips at every quantizer
620    // and running a piecewise-linear regression in binary log space.
621    let (i_exp, i_log_scale) = if ibpp < 1 {
622      (48u8, blog64(36) - q57(QSCALE))
623    } else if ibpp < 4 {
624      (61u8, blog64(55) - q57(QSCALE))
625    } else {
626      (77u8, blog64(129) - q57(QSCALE))
627    };
628    let (p_exp, p_log_scale) = if ibpp < 2 {
629      (69u8, blog64(32) - q57(QSCALE))
630    } else if ibpp < 139 {
631      (104u8, blog64(84) - q57(QSCALE))
632    } else {
633      (83u8, blog64(19) - q57(QSCALE))
634    };
635    let (b0_exp, b0_log_scale) = if ibpp < 2 {
636      (84u8, blog64(30) - q57(QSCALE))
637    } else if ibpp < 92 {
638      (120u8, blog64(68) - q57(QSCALE))
639    } else {
640      (68u8, blog64(4) - q57(QSCALE))
641    };
642    let (b1_exp, b1_log_scale) = if ibpp < 2 {
643      (87u8, blog64(27) - q57(QSCALE))
644    } else if ibpp < 126 {
645      (139u8, blog64(84) - q57(QSCALE))
646    } else {
647      (61u8, blog64(1) - q57(QSCALE))
648    };
649
650    // TODO: Add support for "golden" P frames.
651    RCState {
652      target_bitrate,
653      reservoir_frame_delay,
654      reservoir_frame_delay_is_set: maybe_reservoir_frame_delay.is_some(),
655      maybe_ac_qi_max,
656      ac_qi_min,
657      drop_frames: false,
658      cap_overflow: true,
659      cap_underflow: false,
660      pass1_log_base_q: 0,
661      twopass_state: PASS_SINGLE,
662      log_npixels: blog64(npixels),
663      bits_per_tu,
664      reservoir_fullness: reservoir_target,
665      reservoir_target,
666      reservoir_max,
667      log_scale: [i_log_scale, p_log_scale, b0_log_scale, b1_log_scale],
668      exp: [i_exp, p_exp, b0_exp, b1_exp],
669      scalefilter: [
670        IIRBessel2::new(4, q57_to_q24(i_log_scale)),
671        IIRBessel2::new(INTER_DELAY_TARGET_MIN, q57_to_q24(p_log_scale)),
672        IIRBessel2::new(INTER_DELAY_TARGET_MIN, q57_to_q24(b0_log_scale)),
673        IIRBessel2::new(INTER_DELAY_TARGET_MIN, q57_to_q24(b1_log_scale)),
674      ],
675      // TODO VFR
676      nframes: [0; FRAME_NSUBTYPES + 1],
677      inter_delay: [INTER_DELAY_TARGET_MIN; FRAME_NSUBTYPES - 1],
678      inter_delay_target: reservoir_frame_delay >> 1,
679      rate_bias: 0,
680      nencoded_frames: 0,
681      nsef_frames: 0,
682      pass1_buffer: [0; TWOPASS_HEADER_SZ],
683      pass1_data_retrieved: true,
684      pass1_summary_retrieved: false,
685      pass2_data_ready: false,
686      prev_metrics: RCFrameMetrics::new(),
687      cur_metrics: RCFrameMetrics::new(),
688      frame_metrics: Vec::new(),
689      nframe_metrics: 0,
690      frame_metrics_head: 0,
691      ntus: 0,
692      ntus_total: 0,
693      ntus_left: 0,
694      nframes_total: [0; FRAME_NSUBTYPES + 1],
695      nframes_total_total: 0,
696      nframes_left: [0; FRAME_NSUBTYPES + 1],
697      scale_sum: [0; FRAME_NSUBTYPES],
698      scale_window_ntus: 0,
699      scale_window_nframes: [0; FRAME_NSUBTYPES + 1],
700      scale_window_sum: [0; FRAME_NSUBTYPES],
701      des: RCDeserialize::default(),
702    }
703  }
704
705  pub(crate) fn select_first_pass_qi(
706    &self, bit_depth: usize, fti: usize, chroma_sampling: ChromaSampling,
707  ) -> QuantizerParameters {
708    // Adjust the quantizer for the frame type, result is Q57:
709    let log_q = ((self.pass1_log_base_q + (1i64 << 11)) >> 12)
710      * (MQP_Q12[fti] as i64)
711      + DQP_Q57[fti];
712    QuantizerParameters::new_from_log_q(
713      self.pass1_log_base_q,
714      log_q,
715      bit_depth,
716      chroma_sampling,
717      fti == 0,
718      0,
719    )
720  }
721
722  // TODO: Separate quantizers for Cb and Cr.
723  #[profiling::function]
724  pub(crate) fn select_qi<T: Pixel>(
725    &self, ctx: &ContextInner<T>, output_frameno: u64, fti: usize,
726    maybe_prev_log_base_q: Option<i64>, log_isqrt_mean_scale: i64,
727  ) -> QuantizerParameters {
728    // Is rate control active?
729    if self.target_bitrate <= 0 {
730      // Rate control is not active.
731      // Derive quantizer directly from frame type.
732      let bit_depth = ctx.config.bit_depth;
733      let chroma_sampling = ctx.config.chroma_sampling;
734      let (log_base_q, log_q) =
735        Self::calc_flat_quantizer(ctx.config.quantizer as u8, bit_depth, fti);
736      QuantizerParameters::new_from_log_q(
737        log_base_q,
738        log_q,
739        bit_depth,
740        chroma_sampling,
741        fti == 0,
742        log_isqrt_mean_scale,
743      )
744    } else {
745      let mut nframes: [i32; FRAME_NSUBTYPES + 1] = [0; FRAME_NSUBTYPES + 1];
746      let mut log_scale: [i64; FRAME_NSUBTYPES] = self.log_scale;
747      let mut reservoir_tus = self.reservoir_frame_delay.min(self.ntus_left);
748      let mut reservoir_frames = 0;
749      let mut log_cur_scale = (self.scalefilter[fti].y[0] as i64) << 33;
750      match self.twopass_state {
751        // First pass of 2-pass mode: use a fixed base quantizer.
752        PASS_1 => {
753          return self.select_first_pass_qi(
754            ctx.config.bit_depth,
755            fti,
756            ctx.config.chroma_sampling,
757          );
758        }
759        // Second pass of 2-pass mode: we know exactly how much of each frame
760        //  type there is in the current buffer window, and have estimates for
761        //  the scales.
762        PASS_2 | PASS_2_PLUS_1 => {
763          let mut scale_window_sum: [i64; FRAME_NSUBTYPES] =
764            self.scale_window_sum;
765          let mut scale_window_nframes: [i32; FRAME_NSUBTYPES + 1] =
766            self.scale_window_nframes;
767          // Intentionally exclude Show Existing Frame frames from this.
768          for ftj in 0..FRAME_NSUBTYPES {
769            reservoir_frames += scale_window_nframes[ftj];
770          }
771          // If we're approaching the end of the file, add some slack to keep
772          //  us from slamming into a rail.
773          // Our rate accuracy goes down, but it keeps the result sensible.
774          // We position the target where the first forced keyframe beyond the
775          //  end of the file would be (for consistency with 1-pass mode).
776          // TODO: let mut buf_pad = self.reservoir_frame_delay.min(...);
777          // if buf_delay < buf_pad {
778          //   buf_pad -= buf_delay;
779          // }
780          // else ...
781          // Otherwise, search for the last keyframe in the buffer window and
782          //  target that.
783          // Currently we only do this when using a finite buffer.
784          // We could save the position of the last keyframe in the stream in
785          //  the summary data and do it with a whole-file buffer as well, but
786          //  it isn't likely to make a difference.
787          if !self.frame_metrics.is_empty() {
788            let mut fm_tail = self.frame_metrics_head + self.nframe_metrics;
789            if fm_tail >= self.frame_metrics.len() {
790              fm_tail -= self.frame_metrics.len();
791            }
792            let mut fmi = fm_tail;
793            loop {
794              if fmi == 0 {
795                fmi += self.frame_metrics.len();
796              }
797              fmi -= 1;
798              // Stop before we remove the first frame.
799              if fmi == self.frame_metrics_head {
800                break;
801              }
802              // If we find a keyframe, remove it and everything past it.
803              if self.frame_metrics[fmi].fti == FRAME_SUBTYPE_I {
804                while fmi != fm_tail {
805                  let m = &self.frame_metrics[fmi];
806                  let ftj = m.fti;
807                  scale_window_nframes[ftj] -= 1;
808                  if ftj < FRAME_NSUBTYPES {
809                    scale_window_sum[ftj] -= bexp_q24(m.log_scale_q24);
810                    reservoir_frames -= 1;
811                  }
812                  if m.show_frame {
813                    reservoir_tus -= 1;
814                  }
815                  fmi += 1;
816                  if fmi >= self.frame_metrics.len() {
817                    fmi = 0;
818                  }
819                }
820                // And stop scanning backwards.
821                break;
822              }
823            }
824          }
825          nframes = scale_window_nframes;
826          // If we're not using the same frame type as in pass 1 (because
827          //  someone changed some encoding parameters), remove that scale
828          //  estimate.
829          // We'll add a replacement for the correct frame type below.
830          if self.cur_metrics.fti != fti {
831            scale_window_nframes[self.cur_metrics.fti] -= 1;
832            if self.cur_metrics.fti != FRAME_SUBTYPE_SEF {
833              scale_window_sum[self.cur_metrics.fti] -=
834                bexp_q24(self.cur_metrics.log_scale_q24);
835            }
836          } else {
837            log_cur_scale = (self.cur_metrics.log_scale_q24 as i64) << 33;
838          }
839          // If we're approaching the end of the file, add some slack to keep
840          //  us from slamming into a rail.
841          // Our rate accuracy goes down, but it keeps the result sensible.
842          // We position the target where the first forced keyframe beyond the
843          //  end of the file would be (for consistency with 1-pass mode).
844          if reservoir_tus >= self.ntus_left
845            && self.ntus_total as u64
846              > ctx.gop_input_frameno_start[&output_frameno]
847          {
848            let nfinal_gop_tus = self.ntus_total
849              - (ctx.gop_input_frameno_start[&output_frameno] as i32);
850            if ctx.config.max_key_frame_interval as i32 > nfinal_gop_tus {
851              let reservoir_pad = (ctx.config.max_key_frame_interval as i32
852                - nfinal_gop_tus)
853                .min(self.reservoir_frame_delay - reservoir_tus);
854              let (guessed_reservoir_frames, guessed_reservoir_tus) = ctx
855                .guess_frame_subtypes(
856                  &mut nframes,
857                  reservoir_tus + reservoir_pad,
858                );
859              reservoir_frames = guessed_reservoir_frames;
860              reservoir_tus = guessed_reservoir_tus;
861            }
862          }
863          // Blend in the low-pass filtered scale according to how many
864          //  frames of each type we need to add compared to the actual sums in
865          //  our window.
866          for ftj in 0..FRAME_NSUBTYPES {
867            let scale = scale_window_sum[ftj]
868              + bexp_q24(self.scalefilter[ftj].y[0])
869                * (nframes[ftj] - scale_window_nframes[ftj]) as i64;
870            log_scale[ftj] = if nframes[ftj] > 0 {
871              blog64(scale) - blog64(nframes[ftj] as i64) - q57(24)
872            } else {
873              -self.log_npixels
874            };
875          }
876        }
877        // Single pass.
878        _ => {
879          // Figure out how to re-distribute bits so that we hit our fullness
880          //  target before the last keyframe in our current buffer window
881          //  (after the current frame), or the end of the buffer window,
882          //  whichever comes first.
883          // Count the various types and classes of frames.
884          let (guessed_reservoir_frames, guessed_reservoir_tus) =
885            ctx.guess_frame_subtypes(&mut nframes, self.reservoir_frame_delay);
886          reservoir_frames = guessed_reservoir_frames;
887          reservoir_tus = guessed_reservoir_tus;
888          // TODO: Scale for VFR.
889        }
890      }
891      // If we've been missing our target, add a penalty term.
892      let rate_bias = (self.rate_bias / (self.nencoded_frames + 100))
893        * (reservoir_frames as i64);
894      // rate_total is the total bits available over the next
895      //  reservoir_tus TUs.
896      let rate_total = self.reservoir_fullness - self.reservoir_target
897        + rate_bias
898        + (reservoir_tus as i64) * self.bits_per_tu;
899      // Find a target quantizer that meets our rate target for the
900      //  specific mix of frame types we'll have over the next
901      //  reservoir_frame frames.
902      // We model the rate<->quantizer relationship as
903      //  rate = scale*(quantizer**-exp)
904      // In this case, we have our desired rate, an exponent selected in
905      //  setup, and a scale that's been measured over our frame history,
906      //  so we're solving for the quantizer.
907      // Exponentiation with arbitrary exponents is expensive, so we work
908      //  in the binary log domain (binary exp and log aren't too bad):
909      //  rate = exp2(log2(scale) - log2(quantizer)*exp)
910      // There's no easy closed form solution, so we bisection searh for it.
911      let bit_depth = ctx.config.bit_depth;
912      let chroma_sampling = ctx.config.chroma_sampling;
913      // TODO: Proper handling of lossless.
914      let mut log_qlo = blog64(ac_q(self.ac_qi_min, 0, bit_depth).get() as i64)
915        - q57(QSCALE + bit_depth as i32 - 8);
916      // The AC quantizer tables map to values larger than the DC quantizer
917      //  tables, so we use that as the upper bound to make sure we can use
918      //  the full table if needed.
919      let mut log_qhi = blog64(
920        ac_q(self.maybe_ac_qi_max.unwrap_or(255), 0, bit_depth).get() as i64,
921      ) - q57(QSCALE + bit_depth as i32 - 8);
922      let mut log_base_q = (log_qlo + log_qhi) >> 1;
923      while log_qlo < log_qhi {
924        // Count bits contributed by each frame type using the model.
925        let mut bits = 0i64;
926        for ftj in 0..FRAME_NSUBTYPES {
927          // Modulate base quantizer by frame type.
928          let log_q = ((log_base_q + (1i64 << 11)) >> 12)
929            * (MQP_Q12[ftj] as i64)
930            + DQP_Q57[ftj];
931          // All the fields here are Q57 except for the exponent, which is
932          //  Q6.
933          bits += (nframes[ftj] as i64)
934            * bexp64(
935              log_scale[ftj] + self.log_npixels
936                - ((log_q + 32) >> 6) * (self.exp[ftj] as i64),
937            );
938        }
939        // The number of bits for Show Existing Frame frames is constant.
940        bits += (nframes[FRAME_SUBTYPE_SEF] as i64) * SEF_BITS;
941        let diff = bits - rate_total;
942        if diff > 0 {
943          log_qlo = log_base_q + 1;
944        } else if diff < 0 {
945          log_qhi = log_base_q - 1;
946        } else {
947          break;
948        }
949        log_base_q = (log_qlo + log_qhi) >> 1;
950      }
951      // If this was not one of the initial frames, limit the change in
952      //  base quantizer to within [0.8*Q, 1.2*Q] where Q is the previous
953      //  frame's base quantizer.
954      if let Some(prev_log_base_q) = maybe_prev_log_base_q {
955        log_base_q = clamp(
956          log_base_q,
957          prev_log_base_q - 0xA4_D3C2_5E68_DC58,
958          prev_log_base_q + 0xA4_D3C2_5E68_DC58,
959        );
960      }
961      // Modulate base quantizer by frame type.
962      let mut log_q = ((log_base_q + (1i64 << 11)) >> 12)
963        * (MQP_Q12[fti] as i64)
964        + DQP_Q57[fti];
965      // The above allocation looks only at the total rate we'll accumulate
966      //  in the next reservoir_frame_delay frames.
967      // However, we could overflow the bit reservoir on the very next
968      //  frame.
969      // Check for that here if we're not using a soft target.
970      if self.cap_overflow {
971        // Allow 3% of the buffer for prediction error.
972        // This should be plenty, and we don't mind if we go a bit over.
973        // We only want to keep these bits from being completely wasted.
974        let margin = (self.reservoir_max + 31) >> 5;
975        // We want to use at least this many bits next frame.
976        let soft_limit = self.reservoir_fullness + self.bits_per_tu
977          - (self.reservoir_max - margin);
978        if soft_limit > 0 {
979          let log_soft_limit = blog64(soft_limit);
980          // If we're predicting we won't use that many bits...
981          // TODO: When using frame re-ordering, we should include the rate
982          //  for all of the frames in the current TU.
983          // When there is more than one frame, there will be no direct
984          //  solution for the required adjustment, however.
985          let log_scale_pixels = log_cur_scale + self.log_npixels;
986          let exp = self.exp[fti] as i64;
987          let mut log_q_exp = ((log_q + 32) >> 6) * exp;
988          if log_scale_pixels - log_q_exp < log_soft_limit {
989            // Scale the adjustment based on how far into the margin we are.
990            log_q_exp += ((log_scale_pixels - log_soft_limit - log_q_exp)
991              >> 32)
992              * ((margin.min(soft_limit) << 32) / margin);
993            log_q = ((log_q_exp + (exp >> 1)) / exp) << 6;
994          }
995        }
996      }
997      // We just checked we don't overflow the reservoir next frame, now
998      //  check we don't underflow and bust the budget (when not using a
999      //  soft target).
1000      if self.maybe_ac_qi_max.is_none() {
1001        // Compute the maximum number of bits we can use in the next frame.
1002        // Allow 50% of the rate for a single frame for prediction error.
1003        // This may not be enough for keyframes or sudden changes in
1004        //  complexity.
1005        let log_hard_limit =
1006          blog64(self.reservoir_fullness + (self.bits_per_tu >> 1));
1007        // If we're predicting we'll use more than this...
1008        // TODO: When using frame re-ordering, we should include the rate
1009        //  for all of the frames in the current TU.
1010        // When there is more than one frame, there will be no direct
1011        //  solution for the required adjustment, however.
1012        let log_scale_pixels = log_cur_scale + self.log_npixels;
1013        let exp = self.exp[fti] as i64;
1014        let mut log_q_exp = ((log_q + 32) >> 6) * exp;
1015        if log_scale_pixels - log_q_exp > log_hard_limit {
1016          // Force the target to hit our limit exactly.
1017          log_q_exp = log_scale_pixels - log_hard_limit;
1018          log_q = ((log_q_exp + (exp >> 1)) / exp) << 6;
1019          // If that target is unreasonable, oh well; we'll have to drop.
1020        }
1021      }
1022
1023      if let Some(qi_max) = self.maybe_ac_qi_max {
1024        let (max_log_base_q, max_log_q) =
1025          Self::calc_flat_quantizer(qi_max, ctx.config.bit_depth, fti);
1026        log_base_q = cmp::min(log_base_q, max_log_base_q);
1027        log_q = cmp::min(log_q, max_log_q);
1028      }
1029      if self.ac_qi_min > 0 {
1030        let (min_log_base_q, min_log_q) =
1031          Self::calc_flat_quantizer(self.ac_qi_min, ctx.config.bit_depth, fti);
1032        log_base_q = cmp::max(log_base_q, min_log_base_q);
1033        log_q = cmp::max(log_q, min_log_q);
1034      }
1035      QuantizerParameters::new_from_log_q(
1036        log_base_q,
1037        log_q,
1038        bit_depth,
1039        chroma_sampling,
1040        fti == 0,
1041        log_isqrt_mean_scale,
1042      )
1043    }
1044  }
1045
1046  // Computes a quantizer directly from the frame type and base quantizer index,
1047  // without consideration for rate control.
1048  fn calc_flat_quantizer(
1049    base_qi: u8, bit_depth: usize, fti: usize,
1050  ) -> (i64, i64) {
1051    // TODO: Rename "quantizer" something that indicates it is a quantizer
1052    //  index, and move it somewhere more sensible (or choose a better way to
1053    //  parameterize a "quality" configuration parameter).
1054
1055    // We use the AC quantizer as the source quantizer since its quantizer
1056    //  tables have unique entries, while the DC tables do not.
1057    let ac_quantizer = ac_q(base_qi, 0, bit_depth).get() as i64;
1058    // Pick the nearest DC entry since an exact match may be unavailable.
1059    let dc_qi = select_dc_qi(ac_quantizer, bit_depth);
1060    let dc_quantizer = dc_q(dc_qi, 0, bit_depth).get() as i64;
1061    // Get the log quantizers as Q57.
1062    let log_ac_q = blog64(ac_quantizer) - q57(QSCALE + bit_depth as i32 - 8);
1063    let log_dc_q = blog64(dc_quantizer) - q57(QSCALE + bit_depth as i32 - 8);
1064    // Target the midpoint of the chosen entries.
1065    let log_base_q = (log_ac_q + log_dc_q + 1) >> 1;
1066    // Adjust the quantizer for the frame type, result is Q57:
1067    let log_q = ((log_base_q + (1i64 << 11)) >> 12) * (MQP_Q12[fti] as i64)
1068      + DQP_Q57[fti];
1069    (log_base_q, log_q)
1070  }
1071
1072  #[profiling::function]
1073  pub fn update_state(
1074    &mut self, bits: i64, fti: usize, show_frame: bool, log_target_q: i64,
1075    trial: bool, droppable: bool,
1076  ) -> bool {
1077    if trial {
1078      assert!(self.needs_trial_encode(fti));
1079      assert!(bits > 0);
1080    }
1081    let mut dropped = false;
1082    // Update rate control only if rate control is active.
1083    if self.target_bitrate > 0 {
1084      let mut estimated_bits = 0;
1085      let mut bits = bits;
1086      let mut droppable = droppable;
1087      let mut log_scale = q57(-64);
1088      // Drop frames is also disabled for now in the case of infinite-buffer
1089      //  two-pass mode.
1090      if !self.drop_frames
1091        || fti == FRAME_SUBTYPE_SEF
1092        || (self.twopass_state == PASS_2
1093          || self.twopass_state == PASS_2_PLUS_1)
1094          && !self.frame_metrics.is_empty()
1095      {
1096        droppable = false;
1097      }
1098      if fti == FRAME_SUBTYPE_SEF {
1099        debug_assert!(bits == SEF_BITS);
1100        debug_assert!(show_frame);
1101        // Please don't make trial encodes of a SEF.
1102        debug_assert!(!trial);
1103        estimated_bits = SEF_BITS;
1104        self.nsef_frames += 1;
1105      } else {
1106        let log_q_exp = ((log_target_q + 32) >> 6) * (self.exp[fti] as i64);
1107        let prev_log_scale = self.log_scale[fti];
1108        if bits <= 0 {
1109          // We didn't code any blocks in this frame.
1110          bits = 0;
1111          dropped = true;
1112        // TODO: Adjust VFR rate based on drop count.
1113        } else {
1114          // Compute the estimated scale factor for this frame type.
1115          let log_bits = blog64(bits);
1116          log_scale = (log_bits - self.log_npixels + log_q_exp).min(q57(16));
1117          estimated_bits =
1118            bexp64(prev_log_scale + self.log_npixels - log_q_exp);
1119          if !trial {
1120            self.nencoded_frames += 1;
1121          }
1122        }
1123      }
1124      let log_scale_q24 = q57_to_q24(log_scale);
1125      // Special two-pass processing.
1126      if self.twopass_state == PASS_2 || self.twopass_state == PASS_2_PLUS_1 {
1127        // Pass 2 mode:
1128        if !trial {
1129          // Move the current metrics back one frame.
1130          self.prev_metrics = self.cur_metrics;
1131          // Back out the last frame's statistics from the sliding window.
1132          let ftj = self.prev_metrics.fti;
1133          self.nframes_left[ftj] -= 1;
1134          self.scale_window_nframes[ftj] -= 1;
1135          if ftj < FRAME_NSUBTYPES {
1136            self.scale_window_sum[ftj] -=
1137              bexp_q24(self.prev_metrics.log_scale_q24);
1138          }
1139          if self.prev_metrics.show_frame {
1140            self.ntus_left -= 1;
1141            self.scale_window_ntus -= 1;
1142          }
1143          // Free the corresponding entry in the circular buffer.
1144          if !self.frame_metrics.is_empty() {
1145            self.nframe_metrics -= 1;
1146            self.frame_metrics_head += 1;
1147            if self.frame_metrics_head >= self.frame_metrics.len() {
1148              self.frame_metrics_head = 0;
1149            }
1150          }
1151          // Mark us ready for the next 2-pass packet.
1152          self.pass2_data_ready = false;
1153          // Update state, so the user doesn't have to keep calling
1154          //  twopass_in() after they've fed in all the data when we're using
1155          //  a finite buffer.
1156          self.twopass_in(None).unwrap_or(0);
1157        }
1158      }
1159      if self.twopass_state == PASS_1 || self.twopass_state == PASS_2_PLUS_1 {
1160        // Pass 1 mode: save the metrics for this frame.
1161        self.prev_metrics.log_scale_q24 = log_scale_q24;
1162        self.prev_metrics.fti = fti;
1163        self.prev_metrics.show_frame = show_frame;
1164        self.pass1_data_retrieved = false;
1165      }
1166      // Common to all passes:
1167      if fti != FRAME_SUBTYPE_SEF && bits > 0 {
1168        // If this is the first example of the given frame type we've seen,
1169        //  we immediately replace the default scale factor guess with the
1170        //  estimate we just computed using the first frame.
1171        if trial || self.nframes[fti] <= 0 {
1172          let f = &mut self.scalefilter[fti];
1173          let x = log_scale_q24;
1174          f.x[0] = x;
1175          f.x[1] = x;
1176          f.y[0] = x;
1177          f.y[1] = x;
1178          self.log_scale[fti] = log_scale;
1179        // TODO: Duplicate regular P frame state for first golden P frame.
1180        } else {
1181          // Lengthen the time constant for the inter filters as we collect
1182          //  more frame statistics, until we reach our target.
1183          if fti > 0
1184            && self.inter_delay[fti - 1] < self.inter_delay_target
1185            && self.nframes[fti] >= self.inter_delay[fti - 1]
1186          {
1187            self.inter_delay[fti - 1] += 1;
1188            self.scalefilter[fti].reinit(self.inter_delay[fti - 1]);
1189          }
1190          // Update the low-pass scale filter for this frame type regardless
1191          //  of whether or not we will ultimately drop this frame.
1192          self.log_scale[fti] =
1193            q24_to_q57(self.scalefilter[fti].update(log_scale_q24));
1194        }
1195        // If this frame busts our budget, it must be dropped.
1196        if droppable && self.reservoir_fullness + self.bits_per_tu < bits {
1197          // TODO: Adjust VFR rate based on drop count.
1198          bits = 0;
1199          dropped = true;
1200        } else {
1201          // TODO: Update a low-pass filter to estimate the "real" frame rate
1202          //  taking timestamps and drops into account.
1203          // This is only done if the frame is coded, as it needs the final
1204          //  count of dropped frames.
1205        }
1206      }
1207      if !trial {
1208        // Increment the frame count for filter adaptation purposes.
1209        if !trial && self.nframes[fti] < i32::MAX {
1210          self.nframes[fti] += 1;
1211        }
1212        self.reservoir_fullness -= bits;
1213        if show_frame {
1214          self.reservoir_fullness += self.bits_per_tu;
1215          // TODO: Properly account for temporal delimiter bits.
1216        }
1217        // If we're too quick filling the buffer and overflow is capped, that
1218        //  rate is lost forever.
1219        if self.cap_overflow {
1220          self.reservoir_fullness =
1221            self.reservoir_fullness.min(self.reservoir_max);
1222        }
1223        // If we're too quick draining the buffer and underflow is capped,
1224        //  don't try to make up that rate later.
1225        if self.cap_underflow {
1226          self.reservoir_fullness = self.reservoir_fullness.max(0);
1227        }
1228        // Adjust the bias for the real bits we've used.
1229        self.rate_bias += estimated_bits - bits;
1230      }
1231    }
1232    dropped
1233  }
1234
1235  pub const fn needs_trial_encode(&self, fti: usize) -> bool {
1236    self.target_bitrate > 0 && self.nframes[fti] == 0
1237  }
1238
1239  pub(crate) const fn ready(&self) -> bool {
1240    match self.twopass_state {
1241      PASS_SINGLE => true,
1242      PASS_1 => self.pass1_data_retrieved,
1243      PASS_2 => self.pass2_data_ready,
1244      _ => self.pass1_data_retrieved && self.pass2_data_ready,
1245    }
1246  }
1247
1248  fn buffer_val(&mut self, val: i64, bytes: usize, cur_pos: usize) -> usize {
1249    let mut val = val;
1250    let mut bytes = bytes;
1251    let mut cur_pos = cur_pos;
1252    while bytes > 0 {
1253      bytes -= 1;
1254      self.pass1_buffer[cur_pos] = val as u8;
1255      cur_pos += 1;
1256      val >>= 8;
1257    }
1258    cur_pos
1259  }
1260
1261  pub(crate) fn select_pass1_log_base_q<T: Pixel>(
1262    &self, ctx: &ContextInner<T>, output_frameno: u64,
1263  ) -> i64 {
1264    assert_eq!(self.twopass_state, PASS_SINGLE);
1265    self.select_qi(ctx, output_frameno, FRAME_SUBTYPE_I, None, 0).log_base_q
1266  }
1267
1268  // Initialize the first pass and emit a placeholder summary
1269  pub(crate) fn init_first_pass(
1270    &mut self, maybe_pass1_log_base_q: Option<i64>,
1271  ) {
1272    if let Some(pass1_log_base_q) = maybe_pass1_log_base_q {
1273      assert_eq!(self.twopass_state, PASS_SINGLE);
1274      // Pick first-pass qi for scale calculations.
1275      self.pass1_log_base_q = pass1_log_base_q;
1276    } else {
1277      debug_assert!(self.twopass_state == PASS_2);
1278    }
1279    self.twopass_state += PASS_1;
1280  }
1281
1282  // Prepare a placeholder summary
1283  fn emit_placeholder_summary(&mut self) -> &[u8] {
1284    // Fill in dummy summary values.
1285    let mut cur_pos = 0;
1286    cur_pos = self.buffer_val(TWOPASS_MAGIC as i64, 4, cur_pos);
1287    cur_pos = self.buffer_val(TWOPASS_VERSION as i64, 4, cur_pos);
1288    cur_pos = self.buffer_val(0, TWOPASS_HEADER_SZ - 8, cur_pos);
1289    debug_assert!(cur_pos == TWOPASS_HEADER_SZ);
1290    self.pass1_data_retrieved = true;
1291    &self.pass1_buffer[..cur_pos]
1292  }
1293
1294  // Frame-specific pass data
1295  pub(crate) fn emit_frame_data(&mut self) -> Option<&[u8]> {
1296    let mut cur_pos = 0;
1297    let fti = self.prev_metrics.fti;
1298    if fti < FRAME_NSUBTYPES {
1299      self.scale_sum[fti] += bexp_q24(self.prev_metrics.log_scale_q24);
1300    }
1301    if self.prev_metrics.show_frame {
1302      self.ntus += 1;
1303    }
1304    // If we have encoded too many frames, prevent us from reaching the
1305    //  ready state required to encode more.
1306    if self.nencoded_frames + self.nsef_frames >= i32::MAX as i64 {
1307      None?
1308    }
1309    cur_pos = self.buffer_val(
1310      ((self.prev_metrics.show_frame as i64) << 31)
1311        | self.prev_metrics.fti as i64,
1312      4,
1313      cur_pos,
1314    );
1315    cur_pos =
1316      self.buffer_val(self.prev_metrics.log_scale_q24 as i64, 4, cur_pos);
1317    debug_assert!(cur_pos == TWOPASS_PACKET_SZ);
1318    self.pass1_data_retrieved = true;
1319    Some(&self.pass1_buffer[..cur_pos])
1320  }
1321
1322  // Summary of the whole encoding process.
1323  pub(crate) fn emit_summary(&mut self) -> &[u8] {
1324    let mut cur_pos = 0;
1325    cur_pos = self.buffer_val(TWOPASS_MAGIC as i64, 4, cur_pos);
1326    cur_pos = self.buffer_val(TWOPASS_VERSION as i64, 4, cur_pos);
1327    cur_pos = self.buffer_val(self.ntus as i64, 4, cur_pos);
1328    for fti in 0..=FRAME_NSUBTYPES {
1329      cur_pos = self.buffer_val(self.nframes[fti] as i64, 4, cur_pos);
1330    }
1331    for fti in 0..FRAME_NSUBTYPES {
1332      cur_pos = self.buffer_val(self.exp[fti] as i64, 1, cur_pos);
1333    }
1334    for fti in 0..FRAME_NSUBTYPES {
1335      cur_pos = self.buffer_val(self.scale_sum[fti], 8, cur_pos);
1336    }
1337    debug_assert!(cur_pos == TWOPASS_HEADER_SZ);
1338    self.pass1_summary_retrieved = true;
1339    &self.pass1_buffer[..cur_pos]
1340  }
1341
1342  // Emit either summary or frame-specific data depending on the previous call
1343  pub(crate) fn twopass_out(
1344    &mut self, done_processing: bool,
1345  ) -> Option<&[u8]> {
1346    if !self.pass1_data_retrieved {
1347      if self.twopass_state != PASS_1 && self.twopass_state != PASS_2_PLUS_1 {
1348        Some(self.emit_placeholder_summary())
1349      } else {
1350        self.emit_frame_data()
1351      }
1352    } else if done_processing && !self.pass1_summary_retrieved {
1353      Some(self.emit_summary())
1354    } else {
1355      // The data for this frame has already been retrieved.
1356      None
1357    }
1358  }
1359
1360  // Initialize the rate control for second pass encoding
1361  pub(crate) fn init_second_pass(&mut self) {
1362    if self.twopass_state == PASS_SINGLE || self.twopass_state == PASS_1 {
1363      // Initialize the second pass.
1364      self.twopass_state += PASS_2;
1365      // If the user requested a finite buffer, reserve the space required for
1366      //  it.
1367      if self.reservoir_frame_delay_is_set {
1368        debug_assert!(self.reservoir_frame_delay > 0);
1369        // reservoir_frame_delay counts in TUs, but RCFrameMetrics are stored
1370        //  per frame (including Show Existing Frame frames).
1371        // When re-ordering, we will have more frames than TUs.
1372        // How many more?
1373        // That depends on the re-ordering scheme used.
1374        // Doubling the number of TUs and adding a fixed latency equal to the
1375        //  maximum number of reference frames we can store should be
1376        //  sufficient for any reasonable scheme, and keeps this code from
1377        //  depending too closely on the details of the scheme currently used
1378        //  by rav1e.
1379        let nmetrics = (self.reservoir_frame_delay as usize) * 2 + 8;
1380        self.frame_metrics.reserve_exact(nmetrics);
1381        self.frame_metrics.resize(nmetrics, RCFrameMetrics::new());
1382      }
1383    }
1384  }
1385
1386  pub(crate) fn setup_second_pass(&mut self, s: &RCSummary) {
1387    self.ntus_total = s.ntus;
1388    self.ntus_left = s.ntus;
1389    self.nframes_total = s.nframes;
1390    self.nframes_left = s.nframes;
1391    self.nframes_total_total = s.nframes.iter().sum();
1392    if self.frame_metrics.is_empty() {
1393      self.reservoir_frame_delay = s.ntus;
1394      self.scale_window_nframes = self.nframes_total;
1395      self.scale_window_sum = s.scale_sum;
1396      self.reservoir_max =
1397        self.bits_per_tu * (self.reservoir_frame_delay as i64);
1398      self.reservoir_target = (self.reservoir_max + 1) >> 1;
1399      self.reservoir_fullness = self.reservoir_target;
1400    } else {
1401      self.reservoir_frame_delay = self.reservoir_frame_delay.min(s.ntus);
1402    }
1403    self.exp = s.exp;
1404  }
1405
1406  // Parse the rate control summary
1407  //
1408  // It returns the amount of data consumed in the process or
1409  // an empty error on parsing failure.
1410  fn twopass_parse_summary(&mut self, buf: &[u8]) -> Result<usize, String> {
1411    let consumed = self.des.buffer_fill(buf, 0, TWOPASS_HEADER_SZ);
1412    if self.des.pass2_buffer_fill >= TWOPASS_HEADER_SZ {
1413      self.des.pass2_buffer_pos = 0;
1414
1415      let s = self.des.parse_summary()?;
1416
1417      self.setup_second_pass(&s);
1418
1419      // Got a valid header.
1420      // Set up pass 2.
1421      // Clear the header data from the buffer to make room for the
1422      //  packet data.
1423      self.des.pass2_buffer_fill = 0;
1424    }
1425
1426    Ok(consumed)
1427  }
1428
1429  // Return the size of the first buffer twopass_in expects
1430  //
1431  // It is the summary size (constant) + the number of frame data packets
1432  // (variable depending on the configuration) it needs to starts encoding.
1433  pub(crate) fn twopass_first_packet_size(&self) -> usize {
1434    let frames_needed = if !self.frame_metrics.is_empty() {
1435      // If we're not using whole-file buffering, we need at least one
1436      //  frame per buffer slot.
1437      self.reservoir_frame_delay as usize
1438    } else {
1439      // Otherwise we need just one.
1440      1
1441    };
1442
1443    TWOPASS_HEADER_SZ + frames_needed * TWOPASS_PACKET_SZ
1444  }
1445
1446  // Return the number of frame data packets to be parsed before
1447  // the encoding process can continue.
1448  pub(crate) fn twopass_in_frames_needed(&self) -> i32 {
1449    if self.target_bitrate <= 0 {
1450      return 0;
1451    }
1452    if self.frame_metrics.is_empty() {
1453      return i32::from(!self.pass2_data_ready);
1454    }
1455    let mut cur_scale_window_nframes = 0;
1456    let mut cur_nframes_left = 0;
1457    for fti in 0..=FRAME_NSUBTYPES {
1458      cur_scale_window_nframes += self.scale_window_nframes[fti];
1459      cur_nframes_left += self.nframes_left[fti];
1460    }
1461
1462    (self.reservoir_frame_delay - self.scale_window_ntus)
1463      .clamp(0, cur_nframes_left - cur_scale_window_nframes)
1464  }
1465
1466  pub(crate) fn parse_frame_data_packet(
1467    &mut self, buf: &[u8],
1468  ) -> Result<(), String> {
1469    if buf.len() != TWOPASS_PACKET_SZ {
1470      return Err("Incorrect buffer size".to_string());
1471    }
1472
1473    self.des.buffer_fill(buf, 0, TWOPASS_PACKET_SZ);
1474    self.des.pass2_buffer_pos = 0;
1475    let m = self.des.parse_metrics()?;
1476    self.des.pass2_buffer_fill = 0;
1477
1478    if self.frame_metrics.is_empty() {
1479      // We're using a whole-file buffer.
1480      self.cur_metrics = m;
1481      self.pass2_data_ready = true;
1482    } else {
1483      // Safety check
1484      let frames_needed = self.twopass_in_frames_needed();
1485
1486      if frames_needed > 0 {
1487        if self.nframe_metrics >= self.frame_metrics.len() {
1488          return Err(
1489            "Read too many frames without finding enough TUs".to_string(),
1490          );
1491        }
1492
1493        let mut fmi = self.frame_metrics_head + self.nframe_metrics;
1494        if fmi >= self.frame_metrics.len() {
1495          fmi -= self.frame_metrics.len();
1496        }
1497        self.nframe_metrics += 1;
1498        self.frame_metrics[fmi] = m;
1499        // And accumulate the statistics over the window.
1500        self.scale_window_nframes[m.fti] += 1;
1501        if m.fti < FRAME_NSUBTYPES {
1502          self.scale_window_sum[m.fti] += bexp_q24(m.log_scale_q24);
1503        }
1504        if m.show_frame {
1505          self.scale_window_ntus += 1;
1506        }
1507        if frames_needed == 1 {
1508          self.pass2_data_ready = true;
1509          self.cur_metrics = self.frame_metrics[self.frame_metrics_head];
1510        }
1511      } else {
1512        return Err("No frames needed".to_string());
1513      }
1514    }
1515
1516    Ok(())
1517  }
1518
1519  // Parse the rate control per-frame data
1520  //
1521  // If no buffer is passed return the amount of data it expects
1522  // to consume next.
1523  //
1524  // If a properly sized buffer is passed it returns the amount of data
1525  // consumed in the process or an empty error on parsing failure.
1526  fn twopass_parse_frame_data(
1527    &mut self, maybe_buf: Option<&[u8]>, mut consumed: usize,
1528  ) -> Result<usize, String> {
1529    {
1530      if self.frame_metrics.is_empty() {
1531        // We're using a whole-file buffer.
1532        if let Some(buf) = maybe_buf {
1533          consumed = self.des.buffer_fill(buf, consumed, TWOPASS_PACKET_SZ);
1534          if self.des.pass2_buffer_fill >= TWOPASS_PACKET_SZ {
1535            self.des.pass2_buffer_pos = 0;
1536            // Read metrics for the next frame.
1537            self.cur_metrics = self.des.parse_metrics()?;
1538            // Clear the buffer for the next frame.
1539            self.des.pass2_buffer_fill = 0;
1540            self.pass2_data_ready = true;
1541          }
1542        } else {
1543          return Ok(TWOPASS_PACKET_SZ - self.des.pass2_buffer_fill);
1544        }
1545      } else {
1546        // We're using a finite buffer.
1547        let mut cur_scale_window_nframes = 0;
1548        let mut cur_nframes_left = 0;
1549
1550        for fti in 0..=FRAME_NSUBTYPES {
1551          cur_scale_window_nframes += self.scale_window_nframes[fti];
1552          cur_nframes_left += self.nframes_left[fti];
1553        }
1554
1555        let mut frames_needed = self.twopass_in_frames_needed();
1556        while frames_needed > 0 {
1557          if let Some(buf) = maybe_buf {
1558            consumed = self.des.buffer_fill(buf, consumed, TWOPASS_PACKET_SZ);
1559            if self.des.pass2_buffer_fill >= TWOPASS_PACKET_SZ {
1560              self.des.pass2_buffer_pos = 0;
1561              // Read the metrics for the next frame.
1562              let m = self.des.parse_metrics()?;
1563              // Add them to the circular buffer.
1564              if self.nframe_metrics >= self.frame_metrics.len() {
1565                return Err(
1566                  "Read too many frames without finding enough TUs"
1567                    .to_string(),
1568                );
1569              }
1570              let mut fmi = self.frame_metrics_head + self.nframe_metrics;
1571              if fmi >= self.frame_metrics.len() {
1572                fmi -= self.frame_metrics.len();
1573              }
1574              self.nframe_metrics += 1;
1575              self.frame_metrics[fmi] = m;
1576              // And accumulate the statistics over the window.
1577              self.scale_window_nframes[m.fti] += 1;
1578              cur_scale_window_nframes += 1;
1579              if m.fti < FRAME_NSUBTYPES {
1580                self.scale_window_sum[m.fti] += bexp_q24(m.log_scale_q24);
1581              }
1582              if m.show_frame {
1583                self.scale_window_ntus += 1;
1584              }
1585              frames_needed = (self.reservoir_frame_delay
1586                - self.scale_window_ntus)
1587                .clamp(0, cur_nframes_left - cur_scale_window_nframes);
1588              // Clear the buffer for the next frame.
1589              self.des.pass2_buffer_fill = 0;
1590            } else {
1591              // Go back for more data.
1592              break;
1593            }
1594          } else {
1595            return Ok(
1596              TWOPASS_PACKET_SZ * (frames_needed as usize)
1597                - self.des.pass2_buffer_fill,
1598            );
1599          }
1600        }
1601        // If we've got all the frames we need, fill in the current metrics.
1602        // We're ready to go.
1603        if frames_needed <= 0 {
1604          self.cur_metrics = self.frame_metrics[self.frame_metrics_head];
1605          // Mark us ready for the next frame.
1606          self.pass2_data_ready = true;
1607        }
1608      }
1609    }
1610
1611    Ok(consumed)
1612  }
1613
1614  // If called without a buffer it will return the size of the next
1615  // buffer it expects.
1616  //
1617  // If called with a buffer it will consume it fully.
1618  // It returns Ok(0) if the buffer had been parsed or Err(())
1619  // if the buffer hadn't been enough or other errors happened.
1620  pub(crate) fn twopass_in(
1621    &mut self, maybe_buf: Option<&[u8]>,
1622  ) -> Result<usize, String> {
1623    let mut consumed = 0;
1624    self.init_second_pass();
1625    // If we haven't got a valid summary header yet, try to parse one.
1626    if self.nframes_total[FRAME_SUBTYPE_I] == 0 {
1627      self.pass2_data_ready = false;
1628      if let Some(buf) = maybe_buf {
1629        consumed = self.twopass_parse_summary(buf)?
1630      } else {
1631        return Ok(self.twopass_first_packet_size());
1632      }
1633    }
1634    if self.nframes_total[FRAME_SUBTYPE_I] > 0 {
1635      if self.nencoded_frames + self.nsef_frames
1636        >= self.nframes_total_total as i64
1637      {
1638        // We don't want any more data after the last frame, and we don't want
1639        //  to allow any more frames to be encoded.
1640        self.pass2_data_ready = false;
1641      } else if !self.pass2_data_ready {
1642        return self.twopass_parse_frame_data(maybe_buf, consumed);
1643      }
1644    }
1645    Ok(consumed)
1646  }
1647}