Skip to main content

av_scenechange/analyze/
mod.rs

1use std::{cmp, collections::BTreeMap, num::NonZeroUsize, sync::Arc};
2
3use log::debug;
4use num_rational::Rational32;
5use v_frame::{
6    frame::Frame,
7    pixel::{ChromaSampling, Pixel},
8    plane::Plane,
9};
10
11use self::fast::{detect_scale_factor, FAST_THRESHOLD};
12use crate::{data::motion::RefMEStats, CpuFeatureLevel, SceneDetectionSpeed};
13
14mod fast;
15mod importance;
16mod inter;
17mod intra;
18mod standard;
19
20/// Experiments have determined this to be an optimal threshold
21const IMP_BLOCK_DIFF_THRESHOLD: f64 = 7.0;
22
23/// Fast integer division where divisor is a nonzero power of 2
24pub(crate) fn fast_idiv(n: usize, d: NonZeroUsize) -> usize {
25    debug_assert!(d.is_power_of_two());
26
27    n >> d.trailing_zeros()
28}
29
30struct ScaleFunction<T: Pixel> {
31    downscale_in_place: fn(/* &self: */ &Plane<T>, /* in_plane: */ &mut Plane<T>),
32    downscale: fn(/* &self: */ &Plane<T>) -> Plane<T>,
33    factor: NonZeroUsize,
34}
35
36impl<T: Pixel> ScaleFunction<T> {
37    fn from_scale<const SCALE: usize>() -> Self {
38        assert!(
39            SCALE.is_power_of_two(),
40            "Scaling factor needs to be a nonzero power of two"
41        );
42
43        Self {
44            downscale: Plane::downscale::<SCALE>,
45            downscale_in_place: Plane::downscale_in_place::<SCALE>,
46            factor: NonZeroUsize::new(SCALE).unwrap(),
47        }
48    }
49}
50/// Runs keyframe detection on frames from the lookahead queue.
51///
52/// This struct is intended for advanced users who need the ability to analyze
53/// a small subset of frames at a time, for example in a streaming fashion.
54/// Most users will prefer to use `new_detector` and `detect_scene_changes`
55/// at the top level of this crate.
56pub struct SceneChangeDetector<T: Pixel> {
57    // User configuration options
58    /// Scenecut detection mode
59    scene_detection_mode: SceneDetectionSpeed,
60    /// Deque offset for current
61    lookahead_offset: usize,
62    /// Minimum number of frames between two scenecuts
63    min_key_frame_interval: usize,
64    /// Maximum number of frames between two scenecuts
65    max_key_frame_interval: usize,
66    /// The CPU feature level to be used.
67    cpu_feature_level: CpuFeatureLevel,
68
69    // Internal configuration options
70    /// Minimum average difference between YUV deltas that will trigger a scene
71    /// change.
72    threshold: f64,
73    /// Width and height of the unscaled frame
74    resolution: (usize, usize),
75    /// The bit depth of the video.
76    bit_depth: usize,
77    /// The frame rate of the video.
78    frame_rate: Rational32,
79    /// The chroma subsampling of the video.
80    chroma_sampling: ChromaSampling,
81    /// Number of pixels in scaled frame for fast mode
82    scaled_pixels: usize,
83    /// Downscaling function for fast scene detection
84    scale_func: Option<ScaleFunction<T>>,
85
86    // Internal data structures
87    /// Start deque offset based on lookahead
88    deque_offset: usize,
89    /// Frame buffer for scaled frames
90    downscaled_frame_buffer: Option<[Plane<T>; 2]>,
91    /// Scenechange results for adaptive threshold
92    score_deque: Vec<ScenecutResult>,
93    /// Temporary buffer used by `estimate_intra_costs`.
94    /// We store it on the struct so we only need to allocate it once.
95    temp_plane: Option<Plane<T>>,
96    /// Buffer for `FrameMEStats` for cost scenecut
97    frame_me_stats_buffer: Option<RefMEStats>,
98
99    /// Calculated intra costs for each input frame.
100    /// These can be cached for reuse by advanced API users.
101    /// Caching will occur if this is not `None`.
102    pub intra_costs: Option<BTreeMap<usize, Box<[u32]>>>,
103}
104
105impl<T: Pixel> SceneChangeDetector<T> {
106    /// Creates a new instance of the `SceneChangeDetector`.
107    #[allow(clippy::too_many_arguments)]
108    #[allow(clippy::missing_panics_doc)]
109    #[inline]
110    pub fn new(
111        resolution: (usize, usize),
112        bit_depth: usize,
113        frame_rate: Rational32,
114        chroma_sampling: ChromaSampling,
115        lookahead_distance: usize,
116        scene_detection_mode: SceneDetectionSpeed,
117        min_key_frame_interval: usize,
118        max_key_frame_interval: usize,
119        cpu_feature_level: CpuFeatureLevel,
120    ) -> Self {
121        // Downscaling function for fast scene detection
122        let scale_func = detect_scale_factor(resolution, scene_detection_mode);
123
124        // Set lookahead offset to 5 if normal lookahead available
125        let lookahead_offset = if lookahead_distance >= 5 { 5 } else { 0 };
126        let deque_offset = lookahead_offset;
127
128        let score_deque = Vec::with_capacity(5 + lookahead_distance);
129
130        // Downscaling factor for fast scenedetect (is currently always a power of 2)
131        let factor = scale_func.as_ref().map_or(
132            NonZeroUsize::new(1).expect("constant should not panic"),
133            |x| x.factor,
134        );
135
136        let pixels = if scene_detection_mode == SceneDetectionSpeed::Fast {
137            fast_idiv(resolution.1, factor) * fast_idiv(resolution.0, factor)
138        } else {
139            1
140        };
141
142        let threshold = FAST_THRESHOLD * (bit_depth as f64) / 8.0;
143
144        Self {
145            threshold,
146            scene_detection_mode,
147            scale_func,
148            lookahead_offset,
149            deque_offset,
150            score_deque,
151            scaled_pixels: pixels,
152            bit_depth,
153            frame_rate,
154            chroma_sampling,
155            min_key_frame_interval,
156            max_key_frame_interval,
157            cpu_feature_level,
158            downscaled_frame_buffer: None,
159            resolution,
160            temp_plane: None,
161            frame_me_stats_buffer: None,
162            intra_costs: None,
163        }
164    }
165
166    /// Enables caching of intra costs. For advanced API users.
167    #[inline]
168    pub fn enable_cache(&mut self) {
169        if self.intra_costs.is_none() {
170            self.intra_costs = Some(BTreeMap::new());
171        }
172    }
173
174    /// Runs keyframe detection on the next frame in the lookahead queue.
175    ///
176    /// This function requires that a subset of input frames
177    /// is passed to it in order, and that `keyframes` is only
178    /// updated from this method. `input_frameno` should correspond
179    /// to the second frame in `frame_set`.
180    ///
181    /// This will gracefully handle the first frame in the video as well.
182    #[inline]
183    pub fn analyze_next_frame(
184        &mut self,
185        frame_set: &[&Arc<Frame<T>>],
186        input_frameno: usize,
187        previous_keyframe: usize,
188    ) -> bool {
189        // Use score deque for adaptive threshold for scene cut
190        // Declare score_deque offset based on lookahead  for scene change scores
191
192        // Find the distance to the previous keyframe.
193        let distance = input_frameno - previous_keyframe;
194
195        if frame_set.len() <= self.lookahead_offset {
196            // Don't insert keyframes in the last few frames of the video
197            // This is basically a scene flash and a waste of bits
198            return false;
199        }
200
201        if self.scene_detection_mode == SceneDetectionSpeed::None {
202            if let Some(true) = self.handle_min_max_intervals(distance) {
203                return true;
204            };
205            return false;
206        }
207
208        // Initialization of score deque
209        // based on frame set length
210        if self.deque_offset > 0
211            && frame_set.len() > self.deque_offset + 1
212            && self.score_deque.is_empty()
213        {
214            self.initialize_score_deque(frame_set, input_frameno, self.deque_offset);
215        } else if self.score_deque.is_empty() {
216            self.initialize_score_deque(frame_set, input_frameno, frame_set.len() - 1);
217
218            self.deque_offset = frame_set.len() - 2;
219        }
220        // Running single frame comparison and adding it to deque
221        // Decrease deque offset if there is no new frames
222        if frame_set.len() > self.deque_offset + 1 {
223            self.run_comparison(
224                frame_set[self.deque_offset].clone(),
225                frame_set[self.deque_offset + 1].clone(),
226                input_frameno + self.deque_offset,
227            );
228        } else {
229            self.deque_offset -= 1;
230        }
231
232        // Adaptive scenecut check
233        let (scenecut, score) = self.adaptive_scenecut();
234        let scenecut = self.handle_min_max_intervals(distance).unwrap_or(scenecut);
235        debug!(
236            "[SC-Detect] Frame {}: Raw={:5.1}  ImpBl={:5.1}  Bwd={:5.1}  Fwd={:5.1}  Th={:.1}  {}",
237            input_frameno,
238            score.inter_cost,
239            score.imp_block_cost,
240            score.backward_adjusted_cost,
241            score.forward_adjusted_cost,
242            score.threshold,
243            if scenecut { "Scenecut" } else { "No cut" }
244        );
245
246        // Keep score deque of 5 backward frames
247        // and forward frames of length of lookahead offset
248        if self.score_deque.len() > 5 + self.lookahead_offset {
249            self.score_deque.pop();
250        }
251
252        scenecut
253    }
254
255    fn handle_min_max_intervals(&mut self, distance: usize) -> Option<bool> {
256        // Handle minimum and maximum keyframe intervals.
257        if distance < self.min_key_frame_interval {
258            return Some(false);
259        }
260        if distance >= self.max_key_frame_interval {
261            return Some(true);
262        }
263        None
264    }
265
266    // Initially fill score deque with frame scores
267    fn initialize_score_deque(
268        &mut self,
269        frame_set: &[&Arc<Frame<T>>],
270        input_frameno: usize,
271        init_len: usize,
272    ) {
273        for x in 0..init_len {
274            self.run_comparison(
275                frame_set[x].clone(),
276                frame_set[x + 1].clone(),
277                input_frameno + x,
278            );
279        }
280    }
281
282    /// Runs scene change comparison between 2 given frames
283    /// Insert result to start of score deque
284    fn run_comparison(
285        &mut self,
286        frame1: Arc<Frame<T>>,
287        frame2: Arc<Frame<T>>,
288        input_frameno: usize,
289    ) {
290        let mut result = match self.scene_detection_mode {
291            SceneDetectionSpeed::Fast => self.fast_scenecut(frame1, frame2),
292            SceneDetectionSpeed::Standard => self.cost_scenecut(frame1, frame2, input_frameno),
293            _ => unreachable!(),
294        };
295
296        // Subtract the highest metric value of surrounding frames from the current one.
297        // It makes the peaks in the metric more distinct.
298        if self.scene_detection_mode == SceneDetectionSpeed::Standard && self.deque_offset > 0 {
299            if input_frameno == 1 {
300                // Accounts for the second frame not having a score to adjust against.
301                // It should always be 0 because the first frame of the video is always a
302                // keyframe.
303                result.backward_adjusted_cost = 0.0;
304            } else {
305                let mut adjusted_cost = f64::MAX;
306                for other_cost in self
307                    .score_deque
308                    .iter()
309                    .take(self.deque_offset)
310                    .map(|i| i.inter_cost)
311                {
312                    let this_cost = result.inter_cost - other_cost;
313                    if this_cost < adjusted_cost {
314                        adjusted_cost = this_cost;
315                    }
316                    if adjusted_cost < 0.0 {
317                        adjusted_cost = 0.0;
318                        break;
319                    }
320                }
321                result.backward_adjusted_cost = adjusted_cost;
322            }
323            if !self.score_deque.is_empty() {
324                for i in 0..cmp::min(self.deque_offset, self.score_deque.len()) {
325                    let adjusted_cost = self.score_deque[i].inter_cost - result.inter_cost;
326                    if i == 0 || adjusted_cost < self.score_deque[i].forward_adjusted_cost {
327                        self.score_deque[i].forward_adjusted_cost = adjusted_cost;
328                    }
329                    if self.score_deque[i].forward_adjusted_cost < 0.0 {
330                        self.score_deque[i].forward_adjusted_cost = 0.0;
331                    }
332                }
333            }
334        }
335        self.score_deque.insert(0, result);
336    }
337
338    /// Compares current scene score to adapted threshold based on previous
339    /// scores
340    ///
341    /// Value of current frame is offset by lookahead, if lookahead >=5
342    ///
343    /// Returns true if current scene score is higher than adapted threshold
344    fn adaptive_scenecut(&mut self) -> (bool, ScenecutResult) {
345        let score = self.score_deque[self.deque_offset];
346
347        // We use the importance block algorithm's cost metrics as a secondary algorithm
348        // because, although it struggles in certain scenarios such as
349        // finding the end of a pan, it is very good at detecting hard scenecuts
350        // or detecting if a pan exists.
351        //
352        // Because of this, we only consider a frame for a scenechange if
353        // the importance block algorithm is over the threshold either on this frame
354        // (hard scenecut) or within the past few frames (pan). This helps
355        // filter out a few false positives produced by the cost-based
356        // algorithm.
357        let imp_block_threshold = IMP_BLOCK_DIFF_THRESHOLD * (self.bit_depth as f64) / 8.0;
358        if !&self.score_deque[self.deque_offset..]
359            .iter()
360            .any(|result| result.imp_block_cost >= imp_block_threshold)
361        {
362            return (false, score);
363        }
364
365        let cost = score.forward_adjusted_cost;
366        if cost >= score.threshold {
367            let back_deque = &self.score_deque[self.deque_offset + 1..];
368            let forward_deque = &self.score_deque[..self.deque_offset];
369            let back_over_tr_count = back_deque
370                .iter()
371                .filter(|result| result.backward_adjusted_cost >= result.threshold)
372                .count();
373            let forward_over_tr_count = forward_deque
374                .iter()
375                .filter(|result| result.forward_adjusted_cost >= result.threshold)
376                .count();
377
378            // Check for scenecut after the flashes
379            // No frames over threshold forward
380            // and some frames over threshold backward
381            let back_count_req = if self.scene_detection_mode == SceneDetectionSpeed::Fast {
382                // Fast scenecut is more sensitive to false flash detection,
383                // so we want more "evidence" of there being a flash before creating a keyframe.
384                2
385            } else {
386                1
387            };
388            if forward_over_tr_count == 0 && back_over_tr_count >= back_count_req {
389                return (true, score);
390            }
391
392            // Check for scenecut before flash
393            // If distance longer than max flash length
394            if back_over_tr_count == 0
395                && forward_over_tr_count == 1
396                && forward_deque[0].forward_adjusted_cost >= forward_deque[0].threshold
397            {
398                return (true, score);
399            }
400
401            if back_over_tr_count != 0 || forward_over_tr_count != 0 {
402                return (false, score);
403            }
404        }
405
406        (cost >= score.threshold, score)
407    }
408}
409
410#[derive(Debug, Clone, Copy)]
411struct ScenecutResult {
412    inter_cost: f64,
413    imp_block_cost: f64,
414    backward_adjusted_cost: f64,
415    forward_adjusted_cost: f64,
416    threshold: f64,
417}