rav1e/scenechange/
mod.rs

1// Copyright (c) 2018-2022, The rav1e contributors. All rights reserved
2//
3// This source code is subject to the terms of the BSD 2 Clause License and
4// the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
5// was not distributed with this source code in the LICENSE file, you can
6// obtain it at www.aomedia.org/license/software. If the Alliance for Open
7// Media Patent License 1.0 was not distributed with this source code in the
8// PATENTS file, you can obtain it at www.aomedia.org/license/patent.
9
10mod fast;
11mod standard;
12
13use crate::api::{EncoderConfig, SceneDetectionSpeed};
14use crate::cpu_features::CpuFeatureLevel;
15use crate::encoder::Sequence;
16use crate::frame::*;
17use crate::me::RefMEStats;
18use crate::util::Pixel;
19use std::collections::BTreeMap;
20use std::num::NonZeroUsize;
21use std::sync::Arc;
22use std::{cmp, u64};
23
24use self::fast::{detect_scale_factor, FAST_THRESHOLD};
25
26/// Experiments have determined this to be an optimal threshold
27const IMP_BLOCK_DIFF_THRESHOLD: f64 = 7.0;
28
29/// Fast integer division where divisor is a nonzero power of 2
30#[inline(always)]
31pub(crate) fn fast_idiv(n: usize, d: NonZeroUsize) -> usize {
32  debug_assert!(d.is_power_of_two());
33
34  n >> d.trailing_zeros()
35}
36
37struct ScaleFunction<T: Pixel> {
38  downscale_in_place:
39    fn(/* &self: */ &Plane<T>, /* in_plane: */ &mut Plane<T>),
40  downscale: fn(/* &self: */ &Plane<T>) -> Plane<T>,
41  factor: NonZeroUsize,
42}
43
44impl<T: Pixel> ScaleFunction<T> {
45  fn from_scale<const SCALE: usize>() -> Self {
46    assert!(
47      SCALE.is_power_of_two(),
48      "Scaling factor needs to be a nonzero power of two"
49    );
50
51    Self {
52      downscale: Plane::downscale::<SCALE>,
53      downscale_in_place: Plane::downscale_in_place::<SCALE>,
54      factor: NonZeroUsize::new(SCALE).unwrap(),
55    }
56  }
57}
58/// Runs keyframe detection on frames from the lookahead queue.
59pub struct SceneChangeDetector<T: Pixel> {
60  /// Minimum average difference between YUV deltas that will trigger a scene change.
61  threshold: f64,
62  /// Fast scene cut detection mode, uses simple SAD instead of encoder cost estimates.
63  speed_mode: SceneDetectionSpeed,
64  /// Downscaling function for fast scene detection
65  scale_func: Option<ScaleFunction<T>>,
66  /// Frame buffer for scaled frames
67  downscaled_frame_buffer: Option<[Plane<T>; 2]>,
68  /// Buffer for FrameMEStats for cost scenecut
69  frame_me_stats_buffer: Option<RefMEStats>,
70  /// Deque offset for current
71  lookahead_offset: usize,
72  /// Start deque offset based on lookahead
73  deque_offset: usize,
74  /// Scenechange results for adaptive threshold
75  score_deque: Vec<ScenecutResult>,
76  /// Number of pixels in scaled frame for fast mode
77  pixels: usize,
78  /// The bit depth of the video.
79  bit_depth: usize,
80  /// The CPU feature level to be used.
81  cpu_feature_level: CpuFeatureLevel,
82  encoder_config: EncoderConfig,
83  sequence: Arc<Sequence>,
84  /// Calculated intra costs for each input frame.
85  /// These are cached for reuse later in rav1e.
86  pub(crate) intra_costs: BTreeMap<u64, Box<[u32]>>,
87  /// Temporary buffer used by estimate_intra_costs.
88  pub(crate) temp_plane: Option<Plane<T>>,
89}
90
91impl<T: Pixel> SceneChangeDetector<T> {
92  pub fn new(
93    encoder_config: EncoderConfig, cpu_feature_level: CpuFeatureLevel,
94    lookahead_distance: usize, sequence: Arc<Sequence>,
95  ) -> Self {
96    let bit_depth = encoder_config.bit_depth;
97    let speed_mode = if encoder_config.low_latency {
98      SceneDetectionSpeed::Fast
99    } else {
100      encoder_config.speed_settings.scene_detection_mode
101    };
102
103    // Downscaling function for fast scene detection
104    let scale_func = detect_scale_factor(&sequence, speed_mode);
105
106    // Set lookahead offset to 5 if normal lookahead available
107    let lookahead_offset = if lookahead_distance >= 5 { 5 } else { 0 };
108    let deque_offset = lookahead_offset;
109
110    let score_deque = Vec::with_capacity(5 + lookahead_distance);
111
112    // Downscaling factor for fast scenedetect (is currently always a power of 2)
113    let factor =
114      scale_func.as_ref().map_or(NonZeroUsize::new(1).unwrap(), |x| x.factor);
115
116    let pixels = if speed_mode == SceneDetectionSpeed::Fast {
117      fast_idiv(sequence.max_frame_height as usize, factor)
118        * fast_idiv(sequence.max_frame_width as usize, factor)
119    } else {
120      1
121    };
122
123    let threshold = FAST_THRESHOLD * (bit_depth as f64) / 8.0;
124
125    Self {
126      threshold,
127      speed_mode,
128      scale_func,
129      downscaled_frame_buffer: None,
130      frame_me_stats_buffer: None,
131      lookahead_offset,
132      deque_offset,
133      score_deque,
134      pixels,
135      bit_depth,
136      cpu_feature_level,
137      encoder_config,
138      sequence,
139      intra_costs: BTreeMap::new(),
140      temp_plane: None,
141    }
142  }
143
144  /// Runs keyframe detection on the next frame in the lookahead queue.
145  ///
146  /// This function requires that a subset of input frames
147  /// is passed to it in order, and that `keyframes` is only
148  /// updated from this method. `input_frameno` should correspond
149  /// to the second frame in `frame_set`.
150  ///
151  /// This will gracefully handle the first frame in the video as well.
152  #[profiling::function]
153  pub fn analyze_next_frame(
154    &mut self, frame_set: &[&Arc<Frame<T>>], input_frameno: u64,
155    previous_keyframe: u64,
156  ) -> bool {
157    // Use score deque for adaptive threshold for scene cut
158    // Declare score_deque offset based on lookahead  for scene change scores
159
160    // Find the distance to the previous keyframe.
161    let distance = input_frameno - previous_keyframe;
162
163    if frame_set.len() <= self.lookahead_offset {
164      // Don't insert keyframes in the last few frames of the video
165      // This is basically a scene flash and a waste of bits
166      return false;
167    }
168
169    if self.encoder_config.speed_settings.scene_detection_mode
170      == SceneDetectionSpeed::None
171    {
172      if let Some(true) = self.handle_min_max_intervals(distance) {
173        return true;
174      };
175      return false;
176    }
177
178    // Initialization of score deque
179    // based on frame set length
180    if self.deque_offset > 0
181      && frame_set.len() > self.deque_offset + 1
182      && self.score_deque.is_empty()
183    {
184      self.initialize_score_deque(frame_set, input_frameno, self.deque_offset);
185    } else if self.score_deque.is_empty() {
186      self.initialize_score_deque(
187        frame_set,
188        input_frameno,
189        frame_set.len() - 1,
190      );
191
192      self.deque_offset = frame_set.len() - 2;
193    }
194    // Running single frame comparison and adding it to deque
195    // Decrease deque offset if there is no new frames
196    if frame_set.len() > self.deque_offset + 1 {
197      self.run_comparison(
198        frame_set[self.deque_offset].clone(),
199        frame_set[self.deque_offset + 1].clone(),
200        input_frameno + self.deque_offset as u64,
201      );
202    } else {
203      self.deque_offset -= 1;
204    }
205
206    // Adaptive scenecut check
207    let (scenecut, score) = self.adaptive_scenecut();
208    let scenecut = self.handle_min_max_intervals(distance).unwrap_or(scenecut);
209    debug!(
210      "[SC-Detect] Frame {}: Raw={:5.1}  ImpBl={:5.1}  Bwd={:5.1}  Fwd={:5.1}  Th={:.1}  {}",
211      input_frameno,
212      score.inter_cost,
213      score.imp_block_cost,
214      score.backward_adjusted_cost,
215      score.forward_adjusted_cost,
216      score.threshold,
217      if scenecut { "Scenecut" } else { "No cut" }
218    );
219
220    // Keep score deque of 5 backward frames
221    // and forward frames of length of lookahead offset
222    if self.score_deque.len() > 5 + self.lookahead_offset {
223      self.score_deque.pop();
224    }
225
226    scenecut
227  }
228
229  fn handle_min_max_intervals(&mut self, distance: u64) -> Option<bool> {
230    // Handle minimum and maximum keyframe intervals.
231    if distance < self.encoder_config.min_key_frame_interval {
232      return Some(false);
233    }
234    if distance >= self.encoder_config.max_key_frame_interval {
235      return Some(true);
236    }
237    None
238  }
239
240  // Initially fill score deque with frame scores
241  fn initialize_score_deque(
242    &mut self, frame_set: &[&Arc<Frame<T>>], input_frameno: u64,
243    init_len: usize,
244  ) {
245    for x in 0..init_len {
246      self.run_comparison(
247        frame_set[x].clone(),
248        frame_set[x + 1].clone(),
249        input_frameno + x as u64,
250      );
251    }
252  }
253
254  /// Runs scene change comparison beetween 2 given frames
255  /// Insert result to start of score deque
256  fn run_comparison(
257    &mut self, frame1: Arc<Frame<T>>, frame2: Arc<Frame<T>>,
258    input_frameno: u64,
259  ) {
260    let mut result = if self.speed_mode == SceneDetectionSpeed::Fast {
261      self.fast_scenecut(frame1, frame2)
262    } else {
263      self.cost_scenecut(frame1, frame2, input_frameno)
264    };
265
266    // Subtract the highest metric value of surrounding frames from the current one
267    // It makes the peaks in the metric more distinct
268    if self.speed_mode != SceneDetectionSpeed::Fast && self.deque_offset > 0 {
269      if input_frameno == 1 {
270        // Accounts for the second frame not having a score to adjust against.
271        // It should always be 0 because the first frame of the video is always a keyframe.
272        result.backward_adjusted_cost = 0.0;
273      } else {
274        let mut adjusted_cost = f64::MAX;
275        for other_cost in
276          self.score_deque.iter().take(self.deque_offset).map(|i| i.inter_cost)
277        {
278          let this_cost = result.inter_cost - other_cost;
279          if this_cost < adjusted_cost {
280            adjusted_cost = this_cost;
281          }
282          if adjusted_cost < 0.0 {
283            adjusted_cost = 0.0;
284            break;
285          }
286        }
287        result.backward_adjusted_cost = adjusted_cost;
288      }
289      if !self.score_deque.is_empty() {
290        for i in 0..(cmp::min(self.deque_offset, self.score_deque.len())) {
291          let adjusted_cost =
292            self.score_deque[i].inter_cost - result.inter_cost;
293          if i == 0
294            || adjusted_cost < self.score_deque[i].forward_adjusted_cost
295          {
296            self.score_deque[i].forward_adjusted_cost = adjusted_cost;
297          }
298          if self.score_deque[i].forward_adjusted_cost < 0.0 {
299            self.score_deque[i].forward_adjusted_cost = 0.0;
300          }
301        }
302      }
303    }
304    self.score_deque.insert(0, result);
305  }
306
307  /// Compares current scene score to adapted threshold based on previous scores
308  /// Value of current frame is offset by lookahead, if lookahead >=5
309  /// Returns true if current scene score is higher than adapted threshold
310  fn adaptive_scenecut(&mut self) -> (bool, ScenecutResult) {
311    let score = self.score_deque[self.deque_offset];
312
313    // We use the importance block algorithm's cost metrics as a secondary algorithm
314    // because, although it struggles in certain scenarios such as
315    // finding the end of a pan, it is very good at detecting hard scenecuts
316    // or detecting if a pan exists.
317    // Because of this, we only consider a frame for a scenechange if
318    // the importance block algorithm is over the threshold either on this frame (hard scenecut)
319    // or within the past few frames (pan). This helps filter out a few false positives
320    // produced by the cost-based algorithm.
321    let imp_block_threshold =
322      IMP_BLOCK_DIFF_THRESHOLD * (self.bit_depth as f64) / 8.0;
323    if !&self.score_deque[self.deque_offset..]
324      .iter()
325      .any(|result| result.imp_block_cost >= imp_block_threshold)
326    {
327      return (false, score);
328    }
329
330    let cost = score.forward_adjusted_cost;
331    if cost >= score.threshold {
332      let back_deque = &self.score_deque[self.deque_offset + 1..];
333      let forward_deque = &self.score_deque[..self.deque_offset];
334      let back_over_tr_count = back_deque
335        .iter()
336        .filter(|result| result.backward_adjusted_cost >= result.threshold)
337        .count();
338      let forward_over_tr_count = forward_deque
339        .iter()
340        .filter(|result| result.forward_adjusted_cost >= result.threshold)
341        .count();
342
343      // Check for scenecut after the flashes
344      // No frames over threshold forward
345      // and some frames over threshold backward
346      let back_count_req = if self.speed_mode == SceneDetectionSpeed::Fast {
347        // Fast scenecut is more sensitive to false flash detection,
348        // so we want more "evidence" of there being a flash before creating a keyframe.
349        2
350      } else {
351        1
352      };
353      if forward_over_tr_count == 0 && back_over_tr_count >= back_count_req {
354        return (true, score);
355      }
356
357      // Check for scenecut before flash
358      // If distance longer than max flash length
359      if back_over_tr_count == 0
360        && forward_over_tr_count == 1
361        && forward_deque[0].forward_adjusted_cost >= forward_deque[0].threshold
362      {
363        return (true, score);
364      }
365
366      if back_over_tr_count != 0 || forward_over_tr_count != 0 {
367        return (false, score);
368      }
369    }
370
371    (cost >= score.threshold, score)
372  }
373}
374
375#[derive(Debug, Clone, Copy)]
376struct ScenecutResult {
377  inter_cost: f64,
378  imp_block_cost: f64,
379  backward_adjusted_cost: f64,
380  forward_adjusted_cost: f64,
381  threshold: f64,
382}